diff --git "a/checkpoint-7532/trainer_state.json" "b/checkpoint-7532/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-7532/trainer_state.json" @@ -0,0 +1,52758 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 7532, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002655689815429558, + "grad_norm": 1.8881195832990014, + "learning_rate": 0.0, + "loss": 1.1502833366394043, + "step": 1 + }, + { + "epoch": 0.0005311379630859116, + "grad_norm": 1.77718785062999, + "learning_rate": 5.3050397877984086e-08, + "loss": 1.1698756217956543, + "step": 2 + }, + { + "epoch": 0.0007967069446288673, + "grad_norm": 1.6766718507101437, + "learning_rate": 1.0610079575596817e-07, + "loss": 1.1060130596160889, + "step": 3 + }, + { + "epoch": 0.0010622759261718232, + "grad_norm": 1.876053682165919, + "learning_rate": 1.5915119363395226e-07, + "loss": 1.1075276136398315, + "step": 4 + }, + { + "epoch": 0.001327844907714779, + "grad_norm": 1.88228417845019, + "learning_rate": 2.1220159151193635e-07, + "loss": 1.2153511047363281, + "step": 5 + }, + { + "epoch": 0.0015934138892577346, + "grad_norm": 1.9273368394845023, + "learning_rate": 2.6525198938992043e-07, + "loss": 1.1400426626205444, + "step": 6 + }, + { + "epoch": 0.0018589828708006906, + "grad_norm": 1.904814034912833, + "learning_rate": 3.183023872679045e-07, + "loss": 1.2070660591125488, + "step": 7 + }, + { + "epoch": 0.0021245518523436463, + "grad_norm": 1.7346381008587795, + "learning_rate": 3.713527851458886e-07, + "loss": 1.1614588499069214, + "step": 8 + }, + { + "epoch": 0.002390120833886602, + "grad_norm": 1.817032704311048, + "learning_rate": 4.244031830238727e-07, + "loss": 1.1739476919174194, + "step": 9 + }, + { + "epoch": 0.002655689815429558, + "grad_norm": 1.8291974144657501, + "learning_rate": 4.774535809018568e-07, + "loss": 1.1559171676635742, + "step": 10 + }, + { + "epoch": 0.0029212587969725135, + "grad_norm": 2.0039010539208744, + "learning_rate": 5.305039787798409e-07, + "loss": 1.2086225748062134, + "step": 11 + }, + { + "epoch": 0.0031868277785154693, + "grad_norm": 1.876026657216244, + "learning_rate": 5.83554376657825e-07, + "loss": 1.227709174156189, + "step": 12 + }, + { + "epoch": 0.003452396760058425, + "grad_norm": 2.0245192813139825, + "learning_rate": 6.36604774535809e-07, + "loss": 1.255577564239502, + "step": 13 + }, + { + "epoch": 0.003717965741601381, + "grad_norm": 1.8641260357218605, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1953760385513306, + "step": 14 + }, + { + "epoch": 0.0039835347231443365, + "grad_norm": 1.9079733249323254, + "learning_rate": 7.427055702917772e-07, + "loss": 1.1325336694717407, + "step": 15 + }, + { + "epoch": 0.004249103704687293, + "grad_norm": 1.8230190567516942, + "learning_rate": 7.957559681697613e-07, + "loss": 1.232974648475647, + "step": 16 + }, + { + "epoch": 0.004514672686230248, + "grad_norm": 1.8532380418447003, + "learning_rate": 8.488063660477454e-07, + "loss": 1.1527395248413086, + "step": 17 + }, + { + "epoch": 0.004780241667773204, + "grad_norm": 1.986294801704247, + "learning_rate": 9.018567639257295e-07, + "loss": 1.151026964187622, + "step": 18 + }, + { + "epoch": 0.00504581064931616, + "grad_norm": 1.8048967405226255, + "learning_rate": 9.549071618037136e-07, + "loss": 1.155288815498352, + "step": 19 + }, + { + "epoch": 0.005311379630859116, + "grad_norm": 2.1631450267380767, + "learning_rate": 1.0079575596816979e-06, + "loss": 1.183434009552002, + "step": 20 + }, + { + "epoch": 0.005576948612402072, + "grad_norm": 1.88758019498484, + "learning_rate": 1.0610079575596817e-06, + "loss": 1.161030650138855, + "step": 21 + }, + { + "epoch": 0.005842517593945027, + "grad_norm": 1.9605989446426395, + "learning_rate": 1.1140583554376658e-06, + "loss": 1.123382806777954, + "step": 22 + }, + { + "epoch": 0.006108086575487983, + "grad_norm": 2.2042020560619306, + "learning_rate": 1.16710875331565e-06, + "loss": 1.238707423210144, + "step": 23 + }, + { + "epoch": 0.0063736555570309385, + "grad_norm": 2.289866056000848, + "learning_rate": 1.220159151193634e-06, + "loss": 1.2058464288711548, + "step": 24 + }, + { + "epoch": 0.006639224538573895, + "grad_norm": 2.724214643619529, + "learning_rate": 1.273209549071618e-06, + "loss": 1.2351092100143433, + "step": 25 + }, + { + "epoch": 0.00690479352011685, + "grad_norm": 2.5088520951326028, + "learning_rate": 1.3262599469496024e-06, + "loss": 1.1739860773086548, + "step": 26 + }, + { + "epoch": 0.007170362501659806, + "grad_norm": 2.3243798435890155, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1407617330551147, + "step": 27 + }, + { + "epoch": 0.007435931483202762, + "grad_norm": 2.533007430657115, + "learning_rate": 1.4323607427055705e-06, + "loss": 1.1844531297683716, + "step": 28 + }, + { + "epoch": 0.007701500464745718, + "grad_norm": 2.4702075978733804, + "learning_rate": 1.4854111405835544e-06, + "loss": 1.1293678283691406, + "step": 29 + }, + { + "epoch": 0.007967069446288673, + "grad_norm": 3.0873404038783963, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.1310899257659912, + "step": 30 + }, + { + "epoch": 0.00823263842783163, + "grad_norm": 2.7098364862500013, + "learning_rate": 1.5915119363395226e-06, + "loss": 1.1015795469284058, + "step": 31 + }, + { + "epoch": 0.008498207409374585, + "grad_norm": 2.8074949689582476, + "learning_rate": 1.6445623342175069e-06, + "loss": 1.0756056308746338, + "step": 32 + }, + { + "epoch": 0.00876377639091754, + "grad_norm": 3.1563034348975676, + "learning_rate": 1.6976127320954908e-06, + "loss": 1.1496126651763916, + "step": 33 + }, + { + "epoch": 0.009029345372460496, + "grad_norm": 2.842390896608423, + "learning_rate": 1.750663129973475e-06, + "loss": 1.203465461730957, + "step": 34 + }, + { + "epoch": 0.009294914354003453, + "grad_norm": 2.6747271223349753, + "learning_rate": 1.803713527851459e-06, + "loss": 1.0613923072814941, + "step": 35 + }, + { + "epoch": 0.009560483335546408, + "grad_norm": 2.146709655536541, + "learning_rate": 1.8567639257294432e-06, + "loss": 1.06027090549469, + "step": 36 + }, + { + "epoch": 0.009826052317089363, + "grad_norm": 1.9942495143394863, + "learning_rate": 1.909814323607427e-06, + "loss": 1.0508522987365723, + "step": 37 + }, + { + "epoch": 0.01009162129863232, + "grad_norm": 2.1704927298148107, + "learning_rate": 1.9628647214854114e-06, + "loss": 1.0353929996490479, + "step": 38 + }, + { + "epoch": 0.010357190280175276, + "grad_norm": 1.8252380884349957, + "learning_rate": 2.0159151193633957e-06, + "loss": 0.9974027276039124, + "step": 39 + }, + { + "epoch": 0.010622759261718231, + "grad_norm": 1.7188806752497834, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0849467515945435, + "step": 40 + }, + { + "epoch": 0.010888328243261186, + "grad_norm": 1.3692667089198218, + "learning_rate": 2.1220159151193635e-06, + "loss": 1.005434274673462, + "step": 41 + }, + { + "epoch": 0.011153897224804143, + "grad_norm": 1.3465343019370317, + "learning_rate": 2.1750663129973478e-06, + "loss": 1.052631139755249, + "step": 42 + }, + { + "epoch": 0.011419466206347099, + "grad_norm": 1.352421126005469, + "learning_rate": 2.2281167108753316e-06, + "loss": 0.9470957517623901, + "step": 43 + }, + { + "epoch": 0.011685035187890054, + "grad_norm": 1.2219308328594767, + "learning_rate": 2.281167108753316e-06, + "loss": 0.9865130186080933, + "step": 44 + }, + { + "epoch": 0.01195060416943301, + "grad_norm": 1.19161259271228, + "learning_rate": 2.3342175066313e-06, + "loss": 0.9405577778816223, + "step": 45 + }, + { + "epoch": 0.012216173150975966, + "grad_norm": 1.1603073869733838, + "learning_rate": 2.387267904509284e-06, + "loss": 0.9418795108795166, + "step": 46 + }, + { + "epoch": 0.012481742132518922, + "grad_norm": 1.1897328813812988, + "learning_rate": 2.440318302387268e-06, + "loss": 0.9841142892837524, + "step": 47 + }, + { + "epoch": 0.012747311114061877, + "grad_norm": 1.159720101499262, + "learning_rate": 2.4933687002652523e-06, + "loss": 0.9412609338760376, + "step": 48 + }, + { + "epoch": 0.013012880095604834, + "grad_norm": 1.1421347262548374, + "learning_rate": 2.546419098143236e-06, + "loss": 0.9239889979362488, + "step": 49 + }, + { + "epoch": 0.01327844907714779, + "grad_norm": 1.144363453746544, + "learning_rate": 2.59946949602122e-06, + "loss": 0.9212941527366638, + "step": 50 + }, + { + "epoch": 0.013544018058690745, + "grad_norm": 0.9916816911141796, + "learning_rate": 2.6525198938992047e-06, + "loss": 0.8863773345947266, + "step": 51 + }, + { + "epoch": 0.0138095870402337, + "grad_norm": 0.9890613082667745, + "learning_rate": 2.7055702917771886e-06, + "loss": 0.8990404009819031, + "step": 52 + }, + { + "epoch": 0.014075156021776657, + "grad_norm": 1.1123466462737277, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9257171154022217, + "step": 53 + }, + { + "epoch": 0.014340725003319612, + "grad_norm": 0.8689931750055545, + "learning_rate": 2.8116710875331564e-06, + "loss": 0.8239601254463196, + "step": 54 + }, + { + "epoch": 0.014606293984862568, + "grad_norm": 0.9936229603029793, + "learning_rate": 2.864721485411141e-06, + "loss": 0.8656830787658691, + "step": 55 + }, + { + "epoch": 0.014871862966405525, + "grad_norm": 1.0202371081091262, + "learning_rate": 2.917771883289125e-06, + "loss": 0.9470342397689819, + "step": 56 + }, + { + "epoch": 0.01513743194794848, + "grad_norm": 0.9663900963956384, + "learning_rate": 2.970822281167109e-06, + "loss": 0.8699859976768494, + "step": 57 + }, + { + "epoch": 0.015403000929491435, + "grad_norm": 0.940263545207204, + "learning_rate": 3.0238726790450927e-06, + "loss": 0.8668704628944397, + "step": 58 + }, + { + "epoch": 0.01566856991103439, + "grad_norm": 0.9865381848251076, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.841624915599823, + "step": 59 + }, + { + "epoch": 0.015934138892577346, + "grad_norm": 0.8909972421095332, + "learning_rate": 3.1299734748010613e-06, + "loss": 0.8412661552429199, + "step": 60 + }, + { + "epoch": 0.0161997078741203, + "grad_norm": 0.8771283277278942, + "learning_rate": 3.183023872679045e-06, + "loss": 0.818957507610321, + "step": 61 + }, + { + "epoch": 0.01646527685566326, + "grad_norm": 0.9190140482494583, + "learning_rate": 3.23607427055703e-06, + "loss": 0.8030763268470764, + "step": 62 + }, + { + "epoch": 0.016730845837206215, + "grad_norm": 0.8839367067386452, + "learning_rate": 3.2891246684350138e-06, + "loss": 0.7869359850883484, + "step": 63 + }, + { + "epoch": 0.01699641481874917, + "grad_norm": 0.8058255896640879, + "learning_rate": 3.3421750663129977e-06, + "loss": 0.7912170886993408, + "step": 64 + }, + { + "epoch": 0.017261983800292126, + "grad_norm": 0.8538938403853334, + "learning_rate": 3.3952254641909815e-06, + "loss": 0.7736695408821106, + "step": 65 + }, + { + "epoch": 0.01752755278183508, + "grad_norm": 0.8652625375848492, + "learning_rate": 3.448275862068966e-06, + "loss": 0.768275260925293, + "step": 66 + }, + { + "epoch": 0.017793121763378036, + "grad_norm": 0.8691478661970735, + "learning_rate": 3.50132625994695e-06, + "loss": 0.7210639119148254, + "step": 67 + }, + { + "epoch": 0.01805869074492099, + "grad_norm": 0.8378031795839386, + "learning_rate": 3.554376657824934e-06, + "loss": 0.7488028407096863, + "step": 68 + }, + { + "epoch": 0.01832425972646395, + "grad_norm": 0.8943989597273122, + "learning_rate": 3.607427055702918e-06, + "loss": 0.7329621911048889, + "step": 69 + }, + { + "epoch": 0.018589828708006906, + "grad_norm": 0.92104620358882, + "learning_rate": 3.660477453580902e-06, + "loss": 0.7270619869232178, + "step": 70 + }, + { + "epoch": 0.01885539768954986, + "grad_norm": 0.9782498013554233, + "learning_rate": 3.7135278514588865e-06, + "loss": 0.7271254658699036, + "step": 71 + }, + { + "epoch": 0.019120966671092816, + "grad_norm": 0.9115603845811348, + "learning_rate": 3.7665782493368703e-06, + "loss": 0.787033200263977, + "step": 72 + }, + { + "epoch": 0.01938653565263577, + "grad_norm": 0.8604692726067453, + "learning_rate": 3.819628647214854e-06, + "loss": 0.7049479484558105, + "step": 73 + }, + { + "epoch": 0.019652104634178727, + "grad_norm": 0.8610577281688413, + "learning_rate": 3.8726790450928385e-06, + "loss": 0.7146892547607422, + "step": 74 + }, + { + "epoch": 0.019917673615721682, + "grad_norm": 0.7602187567662452, + "learning_rate": 3.925729442970823e-06, + "loss": 0.7212516069412231, + "step": 75 + }, + { + "epoch": 0.02018324259726464, + "grad_norm": 0.6842508042039768, + "learning_rate": 3.978779840848806e-06, + "loss": 0.6612375378608704, + "step": 76 + }, + { + "epoch": 0.020448811578807596, + "grad_norm": 0.7781006919053841, + "learning_rate": 4.031830238726791e-06, + "loss": 0.7038244605064392, + "step": 77 + }, + { + "epoch": 0.02071438056035055, + "grad_norm": 0.7186592057129139, + "learning_rate": 4.084880636604775e-06, + "loss": 0.7081903219223022, + "step": 78 + }, + { + "epoch": 0.020979949541893507, + "grad_norm": 0.7655954113403886, + "learning_rate": 4.137931034482759e-06, + "loss": 0.7079841494560242, + "step": 79 + }, + { + "epoch": 0.021245518523436462, + "grad_norm": 0.7149787673446053, + "learning_rate": 4.190981432360743e-06, + "loss": 0.7090641260147095, + "step": 80 + }, + { + "epoch": 0.021511087504979418, + "grad_norm": 0.6657837070384769, + "learning_rate": 4.244031830238727e-06, + "loss": 0.6632575988769531, + "step": 81 + }, + { + "epoch": 0.021776656486522373, + "grad_norm": 0.6666401713606211, + "learning_rate": 4.297082228116711e-06, + "loss": 0.7231097221374512, + "step": 82 + }, + { + "epoch": 0.02204222546806533, + "grad_norm": 0.6804476609839887, + "learning_rate": 4.3501326259946955e-06, + "loss": 0.6696034669876099, + "step": 83 + }, + { + "epoch": 0.022307794449608287, + "grad_norm": 0.7073638927991296, + "learning_rate": 4.403183023872679e-06, + "loss": 0.7550696134567261, + "step": 84 + }, + { + "epoch": 0.022573363431151242, + "grad_norm": 0.7064770122504733, + "learning_rate": 4.456233421750663e-06, + "loss": 0.671328067779541, + "step": 85 + }, + { + "epoch": 0.022838932412694198, + "grad_norm": 0.6506139330803743, + "learning_rate": 4.5092838196286476e-06, + "loss": 0.6864410638809204, + "step": 86 + }, + { + "epoch": 0.023104501394237153, + "grad_norm": 0.6642837777732639, + "learning_rate": 4.562334217506632e-06, + "loss": 0.6870769262313843, + "step": 87 + }, + { + "epoch": 0.023370070375780108, + "grad_norm": 0.6947506894199804, + "learning_rate": 4.615384615384616e-06, + "loss": 0.6539690494537354, + "step": 88 + }, + { + "epoch": 0.023635639357323063, + "grad_norm": 0.6446743321890098, + "learning_rate": 4.6684350132626e-06, + "loss": 0.6946991086006165, + "step": 89 + }, + { + "epoch": 0.02390120833886602, + "grad_norm": 0.6384512383480915, + "learning_rate": 4.721485411140584e-06, + "loss": 0.6177583932876587, + "step": 90 + }, + { + "epoch": 0.024166777320408978, + "grad_norm": 0.7150510018442997, + "learning_rate": 4.774535809018568e-06, + "loss": 0.6890037059783936, + "step": 91 + }, + { + "epoch": 0.024432346301951933, + "grad_norm": 0.6592991709316253, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.6563063263893127, + "step": 92 + }, + { + "epoch": 0.024697915283494888, + "grad_norm": 0.6897740926797078, + "learning_rate": 4.880636604774536e-06, + "loss": 0.714318573474884, + "step": 93 + }, + { + "epoch": 0.024963484265037843, + "grad_norm": 0.6433596226177777, + "learning_rate": 4.93368700265252e-06, + "loss": 0.6720882654190063, + "step": 94 + }, + { + "epoch": 0.0252290532465808, + "grad_norm": 0.5910528348002435, + "learning_rate": 4.9867374005305045e-06, + "loss": 0.602899968624115, + "step": 95 + }, + { + "epoch": 0.025494622228123754, + "grad_norm": 0.6635651676723159, + "learning_rate": 5.039787798408489e-06, + "loss": 0.6628841161727905, + "step": 96 + }, + { + "epoch": 0.02576019120966671, + "grad_norm": 0.6070065577903714, + "learning_rate": 5.092838196286472e-06, + "loss": 0.6486932635307312, + "step": 97 + }, + { + "epoch": 0.026025760191209668, + "grad_norm": 0.6484848126679549, + "learning_rate": 5.145888594164457e-06, + "loss": 0.6719033122062683, + "step": 98 + }, + { + "epoch": 0.026291329172752623, + "grad_norm": 0.6856934201881044, + "learning_rate": 5.19893899204244e-06, + "loss": 0.6818530559539795, + "step": 99 + }, + { + "epoch": 0.02655689815429558, + "grad_norm": 0.6204811558305167, + "learning_rate": 5.251989389920424e-06, + "loss": 0.6306912899017334, + "step": 100 + }, + { + "epoch": 0.026822467135838534, + "grad_norm": 0.7820574736690976, + "learning_rate": 5.3050397877984095e-06, + "loss": 0.5952945351600647, + "step": 101 + }, + { + "epoch": 0.02708803611738149, + "grad_norm": 0.6546243503849497, + "learning_rate": 5.358090185676394e-06, + "loss": 0.6566107273101807, + "step": 102 + }, + { + "epoch": 0.027353605098924445, + "grad_norm": 0.707921645301647, + "learning_rate": 5.411140583554377e-06, + "loss": 0.6981694102287292, + "step": 103 + }, + { + "epoch": 0.0276191740804674, + "grad_norm": 0.6375441067969543, + "learning_rate": 5.4641909814323615e-06, + "loss": 0.6231328248977661, + "step": 104 + }, + { + "epoch": 0.02788474306201036, + "grad_norm": 0.6964560869475424, + "learning_rate": 5.517241379310345e-06, + "loss": 0.6414977312088013, + "step": 105 + }, + { + "epoch": 0.028150312043553314, + "grad_norm": 0.6835502446580011, + "learning_rate": 5.570291777188329e-06, + "loss": 0.6335234642028809, + "step": 106 + }, + { + "epoch": 0.02841588102509627, + "grad_norm": 0.6248033284508979, + "learning_rate": 5.623342175066313e-06, + "loss": 0.6040852665901184, + "step": 107 + }, + { + "epoch": 0.028681450006639225, + "grad_norm": 0.6645474785171195, + "learning_rate": 5.676392572944297e-06, + "loss": 0.6011114716529846, + "step": 108 + }, + { + "epoch": 0.02894701898818218, + "grad_norm": 0.655106623405533, + "learning_rate": 5.729442970822282e-06, + "loss": 0.6042627096176147, + "step": 109 + }, + { + "epoch": 0.029212587969725135, + "grad_norm": 0.720208539355598, + "learning_rate": 5.782493368700266e-06, + "loss": 0.6183412671089172, + "step": 110 + }, + { + "epoch": 0.02947815695126809, + "grad_norm": 0.6666287454908232, + "learning_rate": 5.83554376657825e-06, + "loss": 0.6150818467140198, + "step": 111 + }, + { + "epoch": 0.02974372593281105, + "grad_norm": 0.6840692324124527, + "learning_rate": 5.888594164456234e-06, + "loss": 0.6202039122581482, + "step": 112 + }, + { + "epoch": 0.030009294914354005, + "grad_norm": 0.6626407253242022, + "learning_rate": 5.941644562334218e-06, + "loss": 0.6334809064865112, + "step": 113 + }, + { + "epoch": 0.03027486389589696, + "grad_norm": 0.6319419097399773, + "learning_rate": 5.994694960212202e-06, + "loss": 0.5728089809417725, + "step": 114 + }, + { + "epoch": 0.030540432877439915, + "grad_norm": 0.6988175213443283, + "learning_rate": 6.0477453580901854e-06, + "loss": 0.6884603500366211, + "step": 115 + }, + { + "epoch": 0.03080600185898287, + "grad_norm": 0.6618120552387852, + "learning_rate": 6.1007957559681706e-06, + "loss": 0.5619829893112183, + "step": 116 + }, + { + "epoch": 0.031071570840525826, + "grad_norm": 0.6756012639437595, + "learning_rate": 6.153846153846155e-06, + "loss": 0.6224710941314697, + "step": 117 + }, + { + "epoch": 0.03133713982206878, + "grad_norm": 0.7208355833756769, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6119496822357178, + "step": 118 + }, + { + "epoch": 0.03160270880361174, + "grad_norm": 0.6917782946677038, + "learning_rate": 6.259946949602123e-06, + "loss": 0.6190857887268066, + "step": 119 + }, + { + "epoch": 0.03186827778515469, + "grad_norm": 0.6704531181022263, + "learning_rate": 6.312997347480107e-06, + "loss": 0.6460769176483154, + "step": 120 + }, + { + "epoch": 0.03213384676669765, + "grad_norm": 0.7493511248909543, + "learning_rate": 6.36604774535809e-06, + "loss": 0.6148796677589417, + "step": 121 + }, + { + "epoch": 0.0323994157482406, + "grad_norm": 0.6359613412994526, + "learning_rate": 6.419098143236075e-06, + "loss": 0.558960497379303, + "step": 122 + }, + { + "epoch": 0.03266498472978356, + "grad_norm": 0.6785691051694177, + "learning_rate": 6.47214854111406e-06, + "loss": 0.5844984650611877, + "step": 123 + }, + { + "epoch": 0.03293055371132652, + "grad_norm": 0.6692815537253501, + "learning_rate": 6.525198938992043e-06, + "loss": 0.5343623161315918, + "step": 124 + }, + { + "epoch": 0.03319612269286947, + "grad_norm": 0.6705726789318588, + "learning_rate": 6.5782493368700276e-06, + "loss": 0.5834348797798157, + "step": 125 + }, + { + "epoch": 0.03346169167441243, + "grad_norm": 0.7626576562771024, + "learning_rate": 6.631299734748011e-06, + "loss": 0.5997360944747925, + "step": 126 + }, + { + "epoch": 0.03372726065595538, + "grad_norm": 0.7117893752859364, + "learning_rate": 6.684350132625995e-06, + "loss": 0.5991666316986084, + "step": 127 + }, + { + "epoch": 0.03399282963749834, + "grad_norm": 0.7060406683837459, + "learning_rate": 6.737400530503979e-06, + "loss": 0.581120491027832, + "step": 128 + }, + { + "epoch": 0.03425839861904129, + "grad_norm": 0.6869761252397286, + "learning_rate": 6.790450928381963e-06, + "loss": 0.6219569444656372, + "step": 129 + }, + { + "epoch": 0.03452396760058425, + "grad_norm": 0.6916173566260286, + "learning_rate": 6.843501326259947e-06, + "loss": 0.5950608253479004, + "step": 130 + }, + { + "epoch": 0.03478953658212721, + "grad_norm": 0.6136480902733893, + "learning_rate": 6.896551724137932e-06, + "loss": 0.5762747526168823, + "step": 131 + }, + { + "epoch": 0.03505510556367016, + "grad_norm": 0.670368708945713, + "learning_rate": 6.949602122015916e-06, + "loss": 0.6003131866455078, + "step": 132 + }, + { + "epoch": 0.03532067454521312, + "grad_norm": 0.6439028776339482, + "learning_rate": 7.0026525198939e-06, + "loss": 0.5866605043411255, + "step": 133 + }, + { + "epoch": 0.03558624352675607, + "grad_norm": 0.8324202287699098, + "learning_rate": 7.055702917771884e-06, + "loss": 0.6668443083763123, + "step": 134 + }, + { + "epoch": 0.03585181250829903, + "grad_norm": 0.7064456856515898, + "learning_rate": 7.108753315649868e-06, + "loss": 0.5738306045532227, + "step": 135 + }, + { + "epoch": 0.03611738148984198, + "grad_norm": 0.6941604370641007, + "learning_rate": 7.1618037135278515e-06, + "loss": 0.5774663686752319, + "step": 136 + }, + { + "epoch": 0.03638295047138494, + "grad_norm": 0.7648336305672251, + "learning_rate": 7.214854111405836e-06, + "loss": 0.5721150636672974, + "step": 137 + }, + { + "epoch": 0.0366485194529279, + "grad_norm": 0.7394576462203543, + "learning_rate": 7.267904509283821e-06, + "loss": 0.6350122690200806, + "step": 138 + }, + { + "epoch": 0.03691408843447085, + "grad_norm": 0.6540602529440619, + "learning_rate": 7.320954907161804e-06, + "loss": 0.5435039401054382, + "step": 139 + }, + { + "epoch": 0.03717965741601381, + "grad_norm": 0.6965351191908165, + "learning_rate": 7.374005305039789e-06, + "loss": 0.5869162678718567, + "step": 140 + }, + { + "epoch": 0.03744522639755676, + "grad_norm": 0.6664228073022063, + "learning_rate": 7.427055702917773e-06, + "loss": 0.5645807981491089, + "step": 141 + }, + { + "epoch": 0.03771079537909972, + "grad_norm": 0.6503771775205762, + "learning_rate": 7.480106100795756e-06, + "loss": 0.5502692461013794, + "step": 142 + }, + { + "epoch": 0.037976364360642674, + "grad_norm": 0.6223645459397411, + "learning_rate": 7.533156498673741e-06, + "loss": 0.5602732300758362, + "step": 143 + }, + { + "epoch": 0.03824193334218563, + "grad_norm": 0.8638951879324807, + "learning_rate": 7.586206896551724e-06, + "loss": 0.6011391282081604, + "step": 144 + }, + { + "epoch": 0.03850750232372859, + "grad_norm": 0.6930636234613441, + "learning_rate": 7.639257294429708e-06, + "loss": 0.5482327938079834, + "step": 145 + }, + { + "epoch": 0.03877307130527154, + "grad_norm": 0.6693652199128735, + "learning_rate": 7.692307692307694e-06, + "loss": 0.5926344394683838, + "step": 146 + }, + { + "epoch": 0.0390386402868145, + "grad_norm": 0.8434991800954339, + "learning_rate": 7.745358090185677e-06, + "loss": 0.6558316946029663, + "step": 147 + }, + { + "epoch": 0.039304209268357454, + "grad_norm": 0.6845819362079449, + "learning_rate": 7.79840848806366e-06, + "loss": 0.572425365447998, + "step": 148 + }, + { + "epoch": 0.03956977824990041, + "grad_norm": 0.696296152543372, + "learning_rate": 7.851458885941646e-06, + "loss": 0.5684784650802612, + "step": 149 + }, + { + "epoch": 0.039835347231443365, + "grad_norm": 0.6779490529346879, + "learning_rate": 7.904509283819629e-06, + "loss": 0.5843643546104431, + "step": 150 + }, + { + "epoch": 0.04010091621298632, + "grad_norm": 0.6894842979231472, + "learning_rate": 7.957559681697613e-06, + "loss": 0.5471494793891907, + "step": 151 + }, + { + "epoch": 0.04036648519452928, + "grad_norm": 0.7583250211136208, + "learning_rate": 8.010610079575598e-06, + "loss": 0.595018744468689, + "step": 152 + }, + { + "epoch": 0.040632054176072234, + "grad_norm": 0.6904128122756304, + "learning_rate": 8.063660477453583e-06, + "loss": 0.5431865453720093, + "step": 153 + }, + { + "epoch": 0.04089762315761519, + "grad_norm": 0.7943246581886504, + "learning_rate": 8.116710875331566e-06, + "loss": 0.5622385740280151, + "step": 154 + }, + { + "epoch": 0.041163192139158145, + "grad_norm": 0.7792002007338675, + "learning_rate": 8.16976127320955e-06, + "loss": 0.5795880556106567, + "step": 155 + }, + { + "epoch": 0.0414287611207011, + "grad_norm": 0.7432143976693507, + "learning_rate": 8.222811671087533e-06, + "loss": 0.5854965448379517, + "step": 156 + }, + { + "epoch": 0.041694330102244055, + "grad_norm": 0.8104825185442435, + "learning_rate": 8.275862068965518e-06, + "loss": 0.5374501943588257, + "step": 157 + }, + { + "epoch": 0.041959899083787014, + "grad_norm": 0.7598674115735401, + "learning_rate": 8.328912466843502e-06, + "loss": 0.5779006481170654, + "step": 158 + }, + { + "epoch": 0.04222546806532997, + "grad_norm": 0.7033741631796787, + "learning_rate": 8.381962864721485e-06, + "loss": 0.550236701965332, + "step": 159 + }, + { + "epoch": 0.042491037046872925, + "grad_norm": 0.7285453499901458, + "learning_rate": 8.43501326259947e-06, + "loss": 0.557443380355835, + "step": 160 + }, + { + "epoch": 0.04275660602841588, + "grad_norm": 0.7050753960524794, + "learning_rate": 8.488063660477454e-06, + "loss": 0.5875238180160522, + "step": 161 + }, + { + "epoch": 0.043022175009958835, + "grad_norm": 0.7215582793376403, + "learning_rate": 8.541114058355439e-06, + "loss": 0.510900616645813, + "step": 162 + }, + { + "epoch": 0.043287743991501794, + "grad_norm": 0.7559114001900116, + "learning_rate": 8.594164456233422e-06, + "loss": 0.5465859174728394, + "step": 163 + }, + { + "epoch": 0.043553312973044746, + "grad_norm": 0.7494489908601825, + "learning_rate": 8.647214854111406e-06, + "loss": 0.5508615970611572, + "step": 164 + }, + { + "epoch": 0.043818881954587705, + "grad_norm": 0.7714387963397975, + "learning_rate": 8.700265251989391e-06, + "loss": 0.5437714457511902, + "step": 165 + }, + { + "epoch": 0.04408445093613066, + "grad_norm": 0.7480600693956645, + "learning_rate": 8.753315649867374e-06, + "loss": 0.542698323726654, + "step": 166 + }, + { + "epoch": 0.044350019917673615, + "grad_norm": 0.7339141407878966, + "learning_rate": 8.806366047745358e-06, + "loss": 0.5169371962547302, + "step": 167 + }, + { + "epoch": 0.044615588899216574, + "grad_norm": 0.725595419270195, + "learning_rate": 8.859416445623343e-06, + "loss": 0.5436176061630249, + "step": 168 + }, + { + "epoch": 0.044881157880759526, + "grad_norm": 0.8205411933516983, + "learning_rate": 8.912466843501327e-06, + "loss": 0.568030834197998, + "step": 169 + }, + { + "epoch": 0.045146726862302484, + "grad_norm": 0.7544356200090666, + "learning_rate": 8.965517241379312e-06, + "loss": 0.5218889713287354, + "step": 170 + }, + { + "epoch": 0.045412295843845436, + "grad_norm": 0.7860957525035722, + "learning_rate": 9.018567639257295e-06, + "loss": 0.5275779962539673, + "step": 171 + }, + { + "epoch": 0.045677864825388395, + "grad_norm": 0.6938225497373272, + "learning_rate": 9.071618037135279e-06, + "loss": 0.5263184905052185, + "step": 172 + }, + { + "epoch": 0.045943433806931354, + "grad_norm": 0.7549069812662602, + "learning_rate": 9.124668435013264e-06, + "loss": 0.563044548034668, + "step": 173 + }, + { + "epoch": 0.046209002788474306, + "grad_norm": 0.9364041083837341, + "learning_rate": 9.177718832891247e-06, + "loss": 0.5896912217140198, + "step": 174 + }, + { + "epoch": 0.046474571770017264, + "grad_norm": 0.7219752548557496, + "learning_rate": 9.230769230769232e-06, + "loss": 0.5163949131965637, + "step": 175 + }, + { + "epoch": 0.046740140751560216, + "grad_norm": 0.8391633255974319, + "learning_rate": 9.283819628647216e-06, + "loss": 0.6203320026397705, + "step": 176 + }, + { + "epoch": 0.047005709733103175, + "grad_norm": 0.9119997852547688, + "learning_rate": 9.3368700265252e-06, + "loss": 0.5528024435043335, + "step": 177 + }, + { + "epoch": 0.04727127871464613, + "grad_norm": 0.8828541610102935, + "learning_rate": 9.389920424403184e-06, + "loss": 0.5657555460929871, + "step": 178 + }, + { + "epoch": 0.047536847696189086, + "grad_norm": 0.7671789386737649, + "learning_rate": 9.442970822281168e-06, + "loss": 0.5301925539970398, + "step": 179 + }, + { + "epoch": 0.04780241667773204, + "grad_norm": 0.8675940797859782, + "learning_rate": 9.496021220159151e-06, + "loss": 0.5388369560241699, + "step": 180 + }, + { + "epoch": 0.048067985659274996, + "grad_norm": 0.7966332028310692, + "learning_rate": 9.549071618037136e-06, + "loss": 0.5549717545509338, + "step": 181 + }, + { + "epoch": 0.048333554640817955, + "grad_norm": 0.8814678011939608, + "learning_rate": 9.60212201591512e-06, + "loss": 0.5959764719009399, + "step": 182 + }, + { + "epoch": 0.04859912362236091, + "grad_norm": 0.7841222204736121, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5461844205856323, + "step": 183 + }, + { + "epoch": 0.048864692603903866, + "grad_norm": 0.7620084886447284, + "learning_rate": 9.708222811671088e-06, + "loss": 0.5428494811058044, + "step": 184 + }, + { + "epoch": 0.04913026158544682, + "grad_norm": 0.7918991595575344, + "learning_rate": 9.761273209549072e-06, + "loss": 0.552198052406311, + "step": 185 + }, + { + "epoch": 0.049395830566989776, + "grad_norm": 0.6896394660507362, + "learning_rate": 9.814323607427057e-06, + "loss": 0.49992549419403076, + "step": 186 + }, + { + "epoch": 0.04966139954853273, + "grad_norm": 0.7875507527713166, + "learning_rate": 9.86737400530504e-06, + "loss": 0.557820200920105, + "step": 187 + }, + { + "epoch": 0.04992696853007569, + "grad_norm": 0.8883719893129148, + "learning_rate": 9.920424403183024e-06, + "loss": 0.5238749384880066, + "step": 188 + }, + { + "epoch": 0.050192537511618646, + "grad_norm": 0.988465476825029, + "learning_rate": 9.973474801061009e-06, + "loss": 0.5346978902816772, + "step": 189 + }, + { + "epoch": 0.0504581064931616, + "grad_norm": 0.8024883433630577, + "learning_rate": 1.0026525198938993e-05, + "loss": 0.5256577730178833, + "step": 190 + }, + { + "epoch": 0.050723675474704556, + "grad_norm": 0.8026852335394901, + "learning_rate": 1.0079575596816978e-05, + "loss": 0.5235393047332764, + "step": 191 + }, + { + "epoch": 0.05098924445624751, + "grad_norm": 0.6835673591276205, + "learning_rate": 1.013262599469496e-05, + "loss": 0.4984837472438812, + "step": 192 + }, + { + "epoch": 0.05125481343779047, + "grad_norm": 0.7829913352817355, + "learning_rate": 1.0185676392572945e-05, + "loss": 0.5209602117538452, + "step": 193 + }, + { + "epoch": 0.05152038241933342, + "grad_norm": 0.8334733472253096, + "learning_rate": 1.023872679045093e-05, + "loss": 0.5468267202377319, + "step": 194 + }, + { + "epoch": 0.05178595140087638, + "grad_norm": 0.8107908645155819, + "learning_rate": 1.0291777188328913e-05, + "loss": 0.5531667470932007, + "step": 195 + }, + { + "epoch": 0.052051520382419336, + "grad_norm": 0.8437904919697584, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5741526484489441, + "step": 196 + }, + { + "epoch": 0.05231708936396229, + "grad_norm": 0.6830882515315945, + "learning_rate": 1.039787798408488e-05, + "loss": 0.46132561564445496, + "step": 197 + }, + { + "epoch": 0.05258265834550525, + "grad_norm": 0.8402230890409916, + "learning_rate": 1.0450928381962865e-05, + "loss": 0.5074198842048645, + "step": 198 + }, + { + "epoch": 0.0528482273270482, + "grad_norm": 0.7476727742688456, + "learning_rate": 1.0503978779840849e-05, + "loss": 0.5193089842796326, + "step": 199 + }, + { + "epoch": 0.05311379630859116, + "grad_norm": 0.7814745235248249, + "learning_rate": 1.0557029177718834e-05, + "loss": 0.5209243297576904, + "step": 200 + }, + { + "epoch": 0.05337936529013411, + "grad_norm": 0.8844918483638834, + "learning_rate": 1.0610079575596819e-05, + "loss": 0.5607191920280457, + "step": 201 + }, + { + "epoch": 0.05364493427167707, + "grad_norm": 0.7926104097207243, + "learning_rate": 1.0663129973474802e-05, + "loss": 0.5482805371284485, + "step": 202 + }, + { + "epoch": 0.05391050325322003, + "grad_norm": 0.8109463956858287, + "learning_rate": 1.0716180371352788e-05, + "loss": 0.5579961538314819, + "step": 203 + }, + { + "epoch": 0.05417607223476298, + "grad_norm": 0.8246893162942163, + "learning_rate": 1.076923076923077e-05, + "loss": 0.5119072794914246, + "step": 204 + }, + { + "epoch": 0.05444164121630594, + "grad_norm": 0.8293246958439139, + "learning_rate": 1.0822281167108754e-05, + "loss": 0.5129292607307434, + "step": 205 + }, + { + "epoch": 0.05470721019784889, + "grad_norm": 0.6895550242199711, + "learning_rate": 1.0875331564986738e-05, + "loss": 0.500032901763916, + "step": 206 + }, + { + "epoch": 0.05497277917939185, + "grad_norm": 0.8385731092525408, + "learning_rate": 1.0928381962864723e-05, + "loss": 0.5264571309089661, + "step": 207 + }, + { + "epoch": 0.0552383481609348, + "grad_norm": 0.7915802802090326, + "learning_rate": 1.0981432360742708e-05, + "loss": 0.5569590330123901, + "step": 208 + }, + { + "epoch": 0.05550391714247776, + "grad_norm": 0.8546725938844908, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5429908037185669, + "step": 209 + }, + { + "epoch": 0.05576948612402072, + "grad_norm": 0.8175642333393268, + "learning_rate": 1.1087533156498675e-05, + "loss": 0.5073692202568054, + "step": 210 + }, + { + "epoch": 0.05603505510556367, + "grad_norm": 0.9551222157670755, + "learning_rate": 1.1140583554376659e-05, + "loss": 0.5613659620285034, + "step": 211 + }, + { + "epoch": 0.05630062408710663, + "grad_norm": 1.8348970874488084, + "learning_rate": 1.1193633952254644e-05, + "loss": 0.5197691917419434, + "step": 212 + }, + { + "epoch": 0.05656619306864958, + "grad_norm": 0.9173115658326468, + "learning_rate": 1.1246684350132625e-05, + "loss": 0.5410990715026855, + "step": 213 + }, + { + "epoch": 0.05683176205019254, + "grad_norm": 0.8562107533946397, + "learning_rate": 1.129973474801061e-05, + "loss": 0.5852477550506592, + "step": 214 + }, + { + "epoch": 0.05709733103173549, + "grad_norm": 0.8483195878163089, + "learning_rate": 1.1352785145888594e-05, + "loss": 0.5312488079071045, + "step": 215 + }, + { + "epoch": 0.05736290001327845, + "grad_norm": 0.8817111257753456, + "learning_rate": 1.140583554376658e-05, + "loss": 0.5075235366821289, + "step": 216 + }, + { + "epoch": 0.05762846899482141, + "grad_norm": 0.8014885700994473, + "learning_rate": 1.1458885941644564e-05, + "loss": 0.5213298797607422, + "step": 217 + }, + { + "epoch": 0.05789403797636436, + "grad_norm": 0.8852582070340804, + "learning_rate": 1.1511936339522548e-05, + "loss": 0.5564183592796326, + "step": 218 + }, + { + "epoch": 0.05815960695790732, + "grad_norm": 1.0148412469588788, + "learning_rate": 1.1564986737400531e-05, + "loss": 0.5328387022018433, + "step": 219 + }, + { + "epoch": 0.05842517593945027, + "grad_norm": 0.7824132338865165, + "learning_rate": 1.1618037135278515e-05, + "loss": 0.5010273456573486, + "step": 220 + }, + { + "epoch": 0.05869074492099323, + "grad_norm": 0.8493817546068081, + "learning_rate": 1.16710875331565e-05, + "loss": 0.5473708510398865, + "step": 221 + }, + { + "epoch": 0.05895631390253618, + "grad_norm": 1.1554913959885298, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.5359818339347839, + "step": 222 + }, + { + "epoch": 0.05922188288407914, + "grad_norm": 0.9663065987200732, + "learning_rate": 1.1777188328912468e-05, + "loss": 0.5274665951728821, + "step": 223 + }, + { + "epoch": 0.0594874518656221, + "grad_norm": 0.8158672021913522, + "learning_rate": 1.1830238726790454e-05, + "loss": 0.5463781952857971, + "step": 224 + }, + { + "epoch": 0.05975302084716505, + "grad_norm": 0.7817235200046289, + "learning_rate": 1.1883289124668435e-05, + "loss": 0.553212583065033, + "step": 225 + }, + { + "epoch": 0.06001858982870801, + "grad_norm": 0.8540074681170072, + "learning_rate": 1.193633952254642e-05, + "loss": 0.47144171595573425, + "step": 226 + }, + { + "epoch": 0.06028415881025096, + "grad_norm": 0.9191106803002166, + "learning_rate": 1.1989389920424404e-05, + "loss": 0.506844162940979, + "step": 227 + }, + { + "epoch": 0.06054972779179392, + "grad_norm": 0.794192267301098, + "learning_rate": 1.2042440318302389e-05, + "loss": 0.4965322017669678, + "step": 228 + }, + { + "epoch": 0.06081529677333687, + "grad_norm": 0.8421546110465796, + "learning_rate": 1.2095490716180371e-05, + "loss": 0.4815751612186432, + "step": 229 + }, + { + "epoch": 0.06108086575487983, + "grad_norm": 0.8107361719185122, + "learning_rate": 1.2148541114058356e-05, + "loss": 0.5245312452316284, + "step": 230 + }, + { + "epoch": 0.06134643473642279, + "grad_norm": 0.8749447967552209, + "learning_rate": 1.2201591511936341e-05, + "loss": 0.5215133428573608, + "step": 231 + }, + { + "epoch": 0.06161200371796574, + "grad_norm": 0.8315635530714504, + "learning_rate": 1.2254641909814325e-05, + "loss": 0.5039419531822205, + "step": 232 + }, + { + "epoch": 0.0618775726995087, + "grad_norm": 1.0583546039713638, + "learning_rate": 1.230769230769231e-05, + "loss": 0.5562925338745117, + "step": 233 + }, + { + "epoch": 0.06214314168105165, + "grad_norm": 1.069780059811152, + "learning_rate": 1.2360742705570291e-05, + "loss": 0.5372984409332275, + "step": 234 + }, + { + "epoch": 0.06240871066259461, + "grad_norm": 0.8766841361731121, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.44987717270851135, + "step": 235 + }, + { + "epoch": 0.06267427964413756, + "grad_norm": 0.9229136432445015, + "learning_rate": 1.246684350132626e-05, + "loss": 0.537068247795105, + "step": 236 + }, + { + "epoch": 0.06293984862568051, + "grad_norm": 0.9828329951785308, + "learning_rate": 1.2519893899204245e-05, + "loss": 0.504779577255249, + "step": 237 + }, + { + "epoch": 0.06320541760722348, + "grad_norm": 1.0061858451025696, + "learning_rate": 1.257294429708223e-05, + "loss": 0.5524113774299622, + "step": 238 + }, + { + "epoch": 0.06347098658876643, + "grad_norm": 0.9888885225244529, + "learning_rate": 1.2625994694960214e-05, + "loss": 0.5089439153671265, + "step": 239 + }, + { + "epoch": 0.06373655557030938, + "grad_norm": 0.8394940482178029, + "learning_rate": 1.2679045092838197e-05, + "loss": 0.4501679837703705, + "step": 240 + }, + { + "epoch": 0.06400212455185235, + "grad_norm": 0.8117693384854435, + "learning_rate": 1.273209549071618e-05, + "loss": 0.5360216498374939, + "step": 241 + }, + { + "epoch": 0.0642676935333953, + "grad_norm": 0.876954304053235, + "learning_rate": 1.2785145888594166e-05, + "loss": 0.5595712661743164, + "step": 242 + }, + { + "epoch": 0.06453326251493825, + "grad_norm": 1.080992038181853, + "learning_rate": 1.283819628647215e-05, + "loss": 0.5010904669761658, + "step": 243 + }, + { + "epoch": 0.0647988314964812, + "grad_norm": 1.0446842005075034, + "learning_rate": 1.2891246684350134e-05, + "loss": 0.5053697228431702, + "step": 244 + }, + { + "epoch": 0.06506440047802417, + "grad_norm": 0.803002193385922, + "learning_rate": 1.294429708222812e-05, + "loss": 0.5045514106750488, + "step": 245 + }, + { + "epoch": 0.06532996945956712, + "grad_norm": 0.7912163744531999, + "learning_rate": 1.2997347480106101e-05, + "loss": 0.5546073913574219, + "step": 246 + }, + { + "epoch": 0.06559553844111007, + "grad_norm": 0.9572908035308383, + "learning_rate": 1.3050397877984087e-05, + "loss": 0.47276046872138977, + "step": 247 + }, + { + "epoch": 0.06586110742265304, + "grad_norm": 0.8233476091470914, + "learning_rate": 1.310344827586207e-05, + "loss": 0.4757889211177826, + "step": 248 + }, + { + "epoch": 0.06612667640419599, + "grad_norm": 0.8415305337388579, + "learning_rate": 1.3156498673740055e-05, + "loss": 0.5078848600387573, + "step": 249 + }, + { + "epoch": 0.06639224538573894, + "grad_norm": 0.8437984625649567, + "learning_rate": 1.3209549071618037e-05, + "loss": 0.4890335202217102, + "step": 250 + }, + { + "epoch": 0.0666578143672819, + "grad_norm": 0.8299999132068526, + "learning_rate": 1.3262599469496022e-05, + "loss": 0.5406580567359924, + "step": 251 + }, + { + "epoch": 0.06692338334882486, + "grad_norm": 0.9307594142144101, + "learning_rate": 1.3315649867374005e-05, + "loss": 0.5236875414848328, + "step": 252 + }, + { + "epoch": 0.06718895233036781, + "grad_norm": 1.0602580439454288, + "learning_rate": 1.336870026525199e-05, + "loss": 0.4991317391395569, + "step": 253 + }, + { + "epoch": 0.06745452131191076, + "grad_norm": 0.8277603880683132, + "learning_rate": 1.3421750663129976e-05, + "loss": 0.4234679639339447, + "step": 254 + }, + { + "epoch": 0.06772009029345373, + "grad_norm": 0.9984839302922622, + "learning_rate": 1.3474801061007958e-05, + "loss": 0.49749234318733215, + "step": 255 + }, + { + "epoch": 0.06798565927499668, + "grad_norm": 0.9543855303701088, + "learning_rate": 1.3527851458885943e-05, + "loss": 0.5049105286598206, + "step": 256 + }, + { + "epoch": 0.06825122825653963, + "grad_norm": 0.8443711840757044, + "learning_rate": 1.3580901856763926e-05, + "loss": 0.5355304479598999, + "step": 257 + }, + { + "epoch": 0.06851679723808259, + "grad_norm": 0.9255144140027944, + "learning_rate": 1.3633952254641911e-05, + "loss": 0.46302929520606995, + "step": 258 + }, + { + "epoch": 0.06878236621962555, + "grad_norm": 0.953877794861965, + "learning_rate": 1.3687002652519895e-05, + "loss": 0.5054173469543457, + "step": 259 + }, + { + "epoch": 0.0690479352011685, + "grad_norm": 0.8214682466537866, + "learning_rate": 1.374005305039788e-05, + "loss": 0.5018566846847534, + "step": 260 + }, + { + "epoch": 0.06931350418271146, + "grad_norm": 0.878430758752321, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.4938735365867615, + "step": 261 + }, + { + "epoch": 0.06957907316425442, + "grad_norm": 0.8343439459008911, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.4605029225349426, + "step": 262 + }, + { + "epoch": 0.06984464214579737, + "grad_norm": 0.8260329604526515, + "learning_rate": 1.3899204244031832e-05, + "loss": 0.5056782960891724, + "step": 263 + }, + { + "epoch": 0.07011021112734032, + "grad_norm": 0.860551370737139, + "learning_rate": 1.3952254641909815e-05, + "loss": 0.5017784833908081, + "step": 264 + }, + { + "epoch": 0.07037578010888328, + "grad_norm": 0.8353804409772935, + "learning_rate": 1.40053050397878e-05, + "loss": 0.5132012367248535, + "step": 265 + }, + { + "epoch": 0.07064134909042624, + "grad_norm": 0.8151795113028358, + "learning_rate": 1.4058355437665782e-05, + "loss": 0.531212329864502, + "step": 266 + }, + { + "epoch": 0.0709069180719692, + "grad_norm": 0.8086605566204427, + "learning_rate": 1.4111405835543767e-05, + "loss": 0.4900968074798584, + "step": 267 + }, + { + "epoch": 0.07117248705351215, + "grad_norm": 0.8735731145360269, + "learning_rate": 1.4164456233421753e-05, + "loss": 0.45277124643325806, + "step": 268 + }, + { + "epoch": 0.07143805603505511, + "grad_norm": 0.8760293380808535, + "learning_rate": 1.4217506631299736e-05, + "loss": 0.48026078939437866, + "step": 269 + }, + { + "epoch": 0.07170362501659806, + "grad_norm": 0.9019281227597356, + "learning_rate": 1.4270557029177721e-05, + "loss": 0.5111234784126282, + "step": 270 + }, + { + "epoch": 0.07196919399814102, + "grad_norm": 0.9120608197487232, + "learning_rate": 1.4323607427055703e-05, + "loss": 0.5448082685470581, + "step": 271 + }, + { + "epoch": 0.07223476297968397, + "grad_norm": 0.9400729117423203, + "learning_rate": 1.4376657824933688e-05, + "loss": 0.5242921113967896, + "step": 272 + }, + { + "epoch": 0.07250033196122693, + "grad_norm": 0.9404952891335322, + "learning_rate": 1.4429708222811672e-05, + "loss": 0.5194095373153687, + "step": 273 + }, + { + "epoch": 0.07276590094276988, + "grad_norm": 0.8893776382848525, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.4620330333709717, + "step": 274 + }, + { + "epoch": 0.07303146992431284, + "grad_norm": 0.886983687866706, + "learning_rate": 1.4535809018567642e-05, + "loss": 0.4654063582420349, + "step": 275 + }, + { + "epoch": 0.0732970389058558, + "grad_norm": 0.7984003718276244, + "learning_rate": 1.4588859416445624e-05, + "loss": 0.4637746810913086, + "step": 276 + }, + { + "epoch": 0.07356260788739875, + "grad_norm": 0.8288882522584324, + "learning_rate": 1.4641909814323609e-05, + "loss": 0.47949421405792236, + "step": 277 + }, + { + "epoch": 0.0738281768689417, + "grad_norm": 1.0041804846004008, + "learning_rate": 1.4694960212201592e-05, + "loss": 0.49565935134887695, + "step": 278 + }, + { + "epoch": 0.07409374585048466, + "grad_norm": 0.9214786055945364, + "learning_rate": 1.4748010610079577e-05, + "loss": 0.5057941675186157, + "step": 279 + }, + { + "epoch": 0.07435931483202762, + "grad_norm": 0.9073397896109812, + "learning_rate": 1.480106100795756e-05, + "loss": 0.5495956540107727, + "step": 280 + }, + { + "epoch": 0.07462488381357057, + "grad_norm": 0.8743353741776648, + "learning_rate": 1.4854111405835546e-05, + "loss": 0.4502897560596466, + "step": 281 + }, + { + "epoch": 0.07489045279511353, + "grad_norm": 0.8694785116368758, + "learning_rate": 1.490716180371353e-05, + "loss": 0.4799070954322815, + "step": 282 + }, + { + "epoch": 0.07515602177665649, + "grad_norm": 0.886176954457428, + "learning_rate": 1.4960212201591513e-05, + "loss": 0.45640307664871216, + "step": 283 + }, + { + "epoch": 0.07542159075819944, + "grad_norm": 0.8937725285994821, + "learning_rate": 1.5013262599469498e-05, + "loss": 0.47862207889556885, + "step": 284 + }, + { + "epoch": 0.0756871597397424, + "grad_norm": 0.8717898339198907, + "learning_rate": 1.5066312997347481e-05, + "loss": 0.48195987939834595, + "step": 285 + }, + { + "epoch": 0.07595272872128535, + "grad_norm": 0.9124586645482137, + "learning_rate": 1.5119363395225467e-05, + "loss": 0.518566370010376, + "step": 286 + }, + { + "epoch": 0.07621829770282831, + "grad_norm": 0.9766882853479317, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.5034162402153015, + "step": 287 + }, + { + "epoch": 0.07648386668437127, + "grad_norm": 0.8995114639723897, + "learning_rate": 1.5225464190981433e-05, + "loss": 0.497822642326355, + "step": 288 + }, + { + "epoch": 0.07674943566591422, + "grad_norm": 0.8484786603983125, + "learning_rate": 1.5278514588859417e-05, + "loss": 0.510530412197113, + "step": 289 + }, + { + "epoch": 0.07701500464745718, + "grad_norm": 0.9406440408252492, + "learning_rate": 1.53315649867374e-05, + "loss": 0.5163881778717041, + "step": 290 + }, + { + "epoch": 0.07728057362900013, + "grad_norm": 0.9825958938719339, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.5161621570587158, + "step": 291 + }, + { + "epoch": 0.07754614261054309, + "grad_norm": 0.8680267479326179, + "learning_rate": 1.543766578249337e-05, + "loss": 0.5260482430458069, + "step": 292 + }, + { + "epoch": 0.07781171159208604, + "grad_norm": 0.8791995274446183, + "learning_rate": 1.5490716180371354e-05, + "loss": 0.4946279227733612, + "step": 293 + }, + { + "epoch": 0.078077280573629, + "grad_norm": 0.9734620967906259, + "learning_rate": 1.5543766578249338e-05, + "loss": 0.5030514001846313, + "step": 294 + }, + { + "epoch": 0.07834284955517196, + "grad_norm": 0.899295097408943, + "learning_rate": 1.559681697612732e-05, + "loss": 0.48864102363586426, + "step": 295 + }, + { + "epoch": 0.07860841853671491, + "grad_norm": 0.8710376092284174, + "learning_rate": 1.5649867374005304e-05, + "loss": 0.48310425877571106, + "step": 296 + }, + { + "epoch": 0.07887398751825787, + "grad_norm": 1.0094258392730318, + "learning_rate": 1.570291777188329e-05, + "loss": 0.4451446533203125, + "step": 297 + }, + { + "epoch": 0.07913955649980083, + "grad_norm": 0.9863170561942101, + "learning_rate": 1.5755968169761275e-05, + "loss": 0.4884604811668396, + "step": 298 + }, + { + "epoch": 0.07940512548134378, + "grad_norm": 0.8355693003184833, + "learning_rate": 1.5809018567639258e-05, + "loss": 0.5047659873962402, + "step": 299 + }, + { + "epoch": 0.07967069446288673, + "grad_norm": 0.8879040718748079, + "learning_rate": 1.586206896551724e-05, + "loss": 0.49124205112457275, + "step": 300 + }, + { + "epoch": 0.0799362634444297, + "grad_norm": 0.9411885452551192, + "learning_rate": 1.5915119363395225e-05, + "loss": 0.5113086700439453, + "step": 301 + }, + { + "epoch": 0.08020183242597265, + "grad_norm": 0.9345380756850689, + "learning_rate": 1.5968169761273212e-05, + "loss": 0.5298338532447815, + "step": 302 + }, + { + "epoch": 0.0804674014075156, + "grad_norm": 0.9050429706274331, + "learning_rate": 1.6021220159151195e-05, + "loss": 0.4673181176185608, + "step": 303 + }, + { + "epoch": 0.08073297038905856, + "grad_norm": 0.8972864762330055, + "learning_rate": 1.607427055702918e-05, + "loss": 0.45361828804016113, + "step": 304 + }, + { + "epoch": 0.08099853937060152, + "grad_norm": 0.8848533583648175, + "learning_rate": 1.6127320954907166e-05, + "loss": 0.5144034624099731, + "step": 305 + }, + { + "epoch": 0.08126410835214447, + "grad_norm": 0.9263690972931414, + "learning_rate": 1.6180371352785146e-05, + "loss": 0.5027451515197754, + "step": 306 + }, + { + "epoch": 0.08152967733368742, + "grad_norm": 0.8575377500476566, + "learning_rate": 1.6233421750663133e-05, + "loss": 0.4987551271915436, + "step": 307 + }, + { + "epoch": 0.08179524631523039, + "grad_norm": 1.0121964253373468, + "learning_rate": 1.6286472148541116e-05, + "loss": 0.5433062314987183, + "step": 308 + }, + { + "epoch": 0.08206081529677334, + "grad_norm": 0.8973695218716041, + "learning_rate": 1.63395225464191e-05, + "loss": 0.49603772163391113, + "step": 309 + }, + { + "epoch": 0.08232638427831629, + "grad_norm": 0.9033181815462389, + "learning_rate": 1.6392572944297083e-05, + "loss": 0.47990959882736206, + "step": 310 + }, + { + "epoch": 0.08259195325985925, + "grad_norm": 0.9843185449650845, + "learning_rate": 1.6445623342175066e-05, + "loss": 0.5196831226348877, + "step": 311 + }, + { + "epoch": 0.0828575222414022, + "grad_norm": 0.8589822510995361, + "learning_rate": 1.6498673740053053e-05, + "loss": 0.4664091467857361, + "step": 312 + }, + { + "epoch": 0.08312309122294516, + "grad_norm": 0.9077443936761218, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.4405553936958313, + "step": 313 + }, + { + "epoch": 0.08338866020448811, + "grad_norm": 0.8561334135462362, + "learning_rate": 1.660477453580902e-05, + "loss": 0.46172815561294556, + "step": 314 + }, + { + "epoch": 0.08365422918603108, + "grad_norm": 0.8835708894071636, + "learning_rate": 1.6657824933687004e-05, + "loss": 0.5004327297210693, + "step": 315 + }, + { + "epoch": 0.08391979816757403, + "grad_norm": 0.8452618593185571, + "learning_rate": 1.6710875331564987e-05, + "loss": 0.4727814197540283, + "step": 316 + }, + { + "epoch": 0.08418536714911698, + "grad_norm": 0.7631381381409372, + "learning_rate": 1.676392572944297e-05, + "loss": 0.43602120876312256, + "step": 317 + }, + { + "epoch": 0.08445093613065995, + "grad_norm": 0.9092168864142193, + "learning_rate": 1.6816976127320957e-05, + "loss": 0.5110410451889038, + "step": 318 + }, + { + "epoch": 0.0847165051122029, + "grad_norm": 0.9902301773407237, + "learning_rate": 1.687002652519894e-05, + "loss": 0.4798283278942108, + "step": 319 + }, + { + "epoch": 0.08498207409374585, + "grad_norm": 0.8572923551208312, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.45690029859542847, + "step": 320 + }, + { + "epoch": 0.0852476430752888, + "grad_norm": 0.8864718165003516, + "learning_rate": 1.6976127320954908e-05, + "loss": 0.4770117998123169, + "step": 321 + }, + { + "epoch": 0.08551321205683177, + "grad_norm": 0.888032985544436, + "learning_rate": 1.702917771883289e-05, + "loss": 0.512240469455719, + "step": 322 + }, + { + "epoch": 0.08577878103837472, + "grad_norm": 0.8665270088700595, + "learning_rate": 1.7082228116710878e-05, + "loss": 0.4696195423603058, + "step": 323 + }, + { + "epoch": 0.08604435001991767, + "grad_norm": 0.8876364903970222, + "learning_rate": 1.713527851458886e-05, + "loss": 0.4779578149318695, + "step": 324 + }, + { + "epoch": 0.08630991900146064, + "grad_norm": 0.9604080935445363, + "learning_rate": 1.7188328912466845e-05, + "loss": 0.48670440912246704, + "step": 325 + }, + { + "epoch": 0.08657548798300359, + "grad_norm": 0.9813156772782552, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5285798907279968, + "step": 326 + }, + { + "epoch": 0.08684105696454654, + "grad_norm": 0.9264252564283505, + "learning_rate": 1.7294429708222812e-05, + "loss": 0.46095865964889526, + "step": 327 + }, + { + "epoch": 0.08710662594608949, + "grad_norm": 0.8953179311501671, + "learning_rate": 1.73474801061008e-05, + "loss": 0.44342565536499023, + "step": 328 + }, + { + "epoch": 0.08737219492763246, + "grad_norm": 0.9640917124230414, + "learning_rate": 1.7400530503978782e-05, + "loss": 0.48974257707595825, + "step": 329 + }, + { + "epoch": 0.08763776390917541, + "grad_norm": 1.3568266957703046, + "learning_rate": 1.7453580901856765e-05, + "loss": 0.4763977527618408, + "step": 330 + }, + { + "epoch": 0.08790333289071836, + "grad_norm": 1.0231360729141987, + "learning_rate": 1.750663129973475e-05, + "loss": 0.5390856266021729, + "step": 331 + }, + { + "epoch": 0.08816890187226133, + "grad_norm": 0.9254788253309115, + "learning_rate": 1.7559681697612732e-05, + "loss": 0.4833192825317383, + "step": 332 + }, + { + "epoch": 0.08843447085380428, + "grad_norm": 0.9106057248503829, + "learning_rate": 1.7612732095490716e-05, + "loss": 0.47842955589294434, + "step": 333 + }, + { + "epoch": 0.08870003983534723, + "grad_norm": 0.8653538374375338, + "learning_rate": 1.7665782493368703e-05, + "loss": 0.4543060064315796, + "step": 334 + }, + { + "epoch": 0.08896560881689018, + "grad_norm": 0.9024795887264612, + "learning_rate": 1.7718832891246686e-05, + "loss": 0.4492039978504181, + "step": 335 + }, + { + "epoch": 0.08923117779843315, + "grad_norm": 0.9660730803540603, + "learning_rate": 1.777188328912467e-05, + "loss": 0.4930066466331482, + "step": 336 + }, + { + "epoch": 0.0894967467799761, + "grad_norm": 0.9494811659806174, + "learning_rate": 1.7824933687002653e-05, + "loss": 0.46343356370925903, + "step": 337 + }, + { + "epoch": 0.08976231576151905, + "grad_norm": 0.98824099461907, + "learning_rate": 1.7877984084880636e-05, + "loss": 0.5118839740753174, + "step": 338 + }, + { + "epoch": 0.09002788474306202, + "grad_norm": 0.9759312233085756, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.4659194350242615, + "step": 339 + }, + { + "epoch": 0.09029345372460497, + "grad_norm": 0.868792760549277, + "learning_rate": 1.7984084880636607e-05, + "loss": 0.45929303765296936, + "step": 340 + }, + { + "epoch": 0.09055902270614792, + "grad_norm": 0.9774857416777888, + "learning_rate": 1.803713527851459e-05, + "loss": 0.5072556734085083, + "step": 341 + }, + { + "epoch": 0.09082459168769087, + "grad_norm": 0.8722377179138728, + "learning_rate": 1.8090185676392577e-05, + "loss": 0.42370402812957764, + "step": 342 + }, + { + "epoch": 0.09109016066923384, + "grad_norm": 0.9404121189660462, + "learning_rate": 1.8143236074270557e-05, + "loss": 0.5017818212509155, + "step": 343 + }, + { + "epoch": 0.09135572965077679, + "grad_norm": 1.0279846493738434, + "learning_rate": 1.8196286472148544e-05, + "loss": 0.4746384620666504, + "step": 344 + }, + { + "epoch": 0.09162129863231974, + "grad_norm": 1.0016746569872437, + "learning_rate": 1.8249336870026527e-05, + "loss": 0.49020540714263916, + "step": 345 + }, + { + "epoch": 0.09188686761386271, + "grad_norm": 0.8521475505102624, + "learning_rate": 1.830238726790451e-05, + "loss": 0.4569393992424011, + "step": 346 + }, + { + "epoch": 0.09215243659540566, + "grad_norm": 0.9587089968564823, + "learning_rate": 1.8355437665782494e-05, + "loss": 0.46831727027893066, + "step": 347 + }, + { + "epoch": 0.09241800557694861, + "grad_norm": 0.909230845841239, + "learning_rate": 1.8408488063660478e-05, + "loss": 0.4795265197753906, + "step": 348 + }, + { + "epoch": 0.09268357455849156, + "grad_norm": 0.9641043081337674, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.5122503042221069, + "step": 349 + }, + { + "epoch": 0.09294914354003453, + "grad_norm": 0.8617611974669258, + "learning_rate": 1.8514588859416448e-05, + "loss": 0.4190404713153839, + "step": 350 + }, + { + "epoch": 0.09321471252157748, + "grad_norm": 0.9061006884991066, + "learning_rate": 1.856763925729443e-05, + "loss": 0.47778886556625366, + "step": 351 + }, + { + "epoch": 0.09348028150312043, + "grad_norm": 0.9208451846579827, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.45851507782936096, + "step": 352 + }, + { + "epoch": 0.09374585048466338, + "grad_norm": 1.0050481975496854, + "learning_rate": 1.86737400530504e-05, + "loss": 0.4888782501220703, + "step": 353 + }, + { + "epoch": 0.09401141946620635, + "grad_norm": 0.9454138173982718, + "learning_rate": 1.8726790450928382e-05, + "loss": 0.5032983422279358, + "step": 354 + }, + { + "epoch": 0.0942769884477493, + "grad_norm": 0.9130362696106749, + "learning_rate": 1.877984084880637e-05, + "loss": 0.4754604697227478, + "step": 355 + }, + { + "epoch": 0.09454255742929225, + "grad_norm": 0.9970889038933597, + "learning_rate": 1.8832891246684352e-05, + "loss": 0.488397479057312, + "step": 356 + }, + { + "epoch": 0.09480812641083522, + "grad_norm": 1.222649143916529, + "learning_rate": 1.8885941644562336e-05, + "loss": 0.4775403141975403, + "step": 357 + }, + { + "epoch": 0.09507369539237817, + "grad_norm": 0.9872263151320333, + "learning_rate": 1.893899204244032e-05, + "loss": 0.47063153982162476, + "step": 358 + }, + { + "epoch": 0.09533926437392112, + "grad_norm": 1.0222144168199743, + "learning_rate": 1.8992042440318303e-05, + "loss": 0.4856908321380615, + "step": 359 + }, + { + "epoch": 0.09560483335546408, + "grad_norm": 0.9195037496858368, + "learning_rate": 1.904509283819629e-05, + "loss": 0.440033495426178, + "step": 360 + }, + { + "epoch": 0.09587040233700704, + "grad_norm": 0.9961899484684762, + "learning_rate": 1.9098143236074273e-05, + "loss": 0.4825770854949951, + "step": 361 + }, + { + "epoch": 0.09613597131854999, + "grad_norm": 0.9443841189655576, + "learning_rate": 1.9151193633952256e-05, + "loss": 0.48192232847213745, + "step": 362 + }, + { + "epoch": 0.09640154030009294, + "grad_norm": 0.9065595450317342, + "learning_rate": 1.920424403183024e-05, + "loss": 0.4689444899559021, + "step": 363 + }, + { + "epoch": 0.09666710928163591, + "grad_norm": 0.9970961253516039, + "learning_rate": 1.9257294429708223e-05, + "loss": 0.47120895981788635, + "step": 364 + }, + { + "epoch": 0.09693267826317886, + "grad_norm": 1.0106028234477955, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4968941807746887, + "step": 365 + }, + { + "epoch": 0.09719824724472181, + "grad_norm": 1.115125675989656, + "learning_rate": 1.9363395225464193e-05, + "loss": 0.46982288360595703, + "step": 366 + }, + { + "epoch": 0.09746381622626477, + "grad_norm": 0.9408972278578609, + "learning_rate": 1.9416445623342177e-05, + "loss": 0.4541531205177307, + "step": 367 + }, + { + "epoch": 0.09772938520780773, + "grad_norm": 0.9760564476186651, + "learning_rate": 1.946949602122016e-05, + "loss": 0.45576703548431396, + "step": 368 + }, + { + "epoch": 0.09799495418935068, + "grad_norm": 0.9893999168346334, + "learning_rate": 1.9522546419098144e-05, + "loss": 0.48060357570648193, + "step": 369 + }, + { + "epoch": 0.09826052317089363, + "grad_norm": 0.9675810264832774, + "learning_rate": 1.9575596816976127e-05, + "loss": 0.47536781430244446, + "step": 370 + }, + { + "epoch": 0.0985260921524366, + "grad_norm": 0.9516181191759193, + "learning_rate": 1.9628647214854114e-05, + "loss": 0.46463894844055176, + "step": 371 + }, + { + "epoch": 0.09879166113397955, + "grad_norm": 1.0082712913027811, + "learning_rate": 1.9681697612732098e-05, + "loss": 0.49570178985595703, + "step": 372 + }, + { + "epoch": 0.0990572301155225, + "grad_norm": 1.0327922438955468, + "learning_rate": 1.973474801061008e-05, + "loss": 0.4764043390750885, + "step": 373 + }, + { + "epoch": 0.09932279909706546, + "grad_norm": 0.9227866290107449, + "learning_rate": 1.9787798408488064e-05, + "loss": 0.43582671880722046, + "step": 374 + }, + { + "epoch": 0.09958836807860842, + "grad_norm": 0.9360238854832598, + "learning_rate": 1.9840848806366048e-05, + "loss": 0.46077725291252136, + "step": 375 + }, + { + "epoch": 0.09985393706015137, + "grad_norm": 0.9607682273492437, + "learning_rate": 1.9893899204244035e-05, + "loss": 0.4794929027557373, + "step": 376 + }, + { + "epoch": 0.10011950604169433, + "grad_norm": 0.9619848398175739, + "learning_rate": 1.9946949602122018e-05, + "loss": 0.43174588680267334, + "step": 377 + }, + { + "epoch": 0.10038507502323729, + "grad_norm": 0.90095462919728, + "learning_rate": 2e-05, + "loss": 0.44885915517807007, + "step": 378 + }, + { + "epoch": 0.10065064400478024, + "grad_norm": 1.0789787198205218, + "learning_rate": 1.9999999036058974e-05, + "loss": 0.520150899887085, + "step": 379 + }, + { + "epoch": 0.1009162129863232, + "grad_norm": 0.9699182604374589, + "learning_rate": 1.9999996144236068e-05, + "loss": 0.5139277577400208, + "step": 380 + }, + { + "epoch": 0.10118178196786615, + "grad_norm": 1.0077278580199993, + "learning_rate": 1.999999132453184e-05, + "loss": 0.48935171961784363, + "step": 381 + }, + { + "epoch": 0.10144735094940911, + "grad_norm": 0.9095465340361383, + "learning_rate": 1.999998457694723e-05, + "loss": 0.4805561304092407, + "step": 382 + }, + { + "epoch": 0.10171291993095206, + "grad_norm": 0.9209321398292457, + "learning_rate": 1.9999975901483532e-05, + "loss": 0.4340912997722626, + "step": 383 + }, + { + "epoch": 0.10197848891249502, + "grad_norm": 1.0414639039942946, + "learning_rate": 1.999996529814242e-05, + "loss": 0.48282474279403687, + "step": 384 + }, + { + "epoch": 0.10224405789403798, + "grad_norm": 0.9753320144694753, + "learning_rate": 1.999995276692593e-05, + "loss": 0.4653206169605255, + "step": 385 + }, + { + "epoch": 0.10250962687558093, + "grad_norm": 0.919281113033857, + "learning_rate": 1.999993830783649e-05, + "loss": 0.48501014709472656, + "step": 386 + }, + { + "epoch": 0.10277519585712389, + "grad_norm": 1.0711296444042975, + "learning_rate": 1.9999921920876882e-05, + "loss": 0.48260143399238586, + "step": 387 + }, + { + "epoch": 0.10304076483866684, + "grad_norm": 0.9590085896328235, + "learning_rate": 1.9999903606050267e-05, + "loss": 0.44557270407676697, + "step": 388 + }, + { + "epoch": 0.1033063338202098, + "grad_norm": 1.111282066618818, + "learning_rate": 1.9999883363360175e-05, + "loss": 0.4843652546405792, + "step": 389 + }, + { + "epoch": 0.10357190280175275, + "grad_norm": 0.9708048507544866, + "learning_rate": 1.9999861192810508e-05, + "loss": 0.4536727964878082, + "step": 390 + }, + { + "epoch": 0.1038374717832957, + "grad_norm": 1.0216212958759847, + "learning_rate": 1.9999837094405538e-05, + "loss": 0.49557366967201233, + "step": 391 + }, + { + "epoch": 0.10410304076483867, + "grad_norm": 1.0254795167373827, + "learning_rate": 1.9999811068149917e-05, + "loss": 0.45077240467071533, + "step": 392 + }, + { + "epoch": 0.10436860974638162, + "grad_norm": 0.9857255709196505, + "learning_rate": 1.9999783114048658e-05, + "loss": 0.4554041624069214, + "step": 393 + }, + { + "epoch": 0.10463417872792458, + "grad_norm": 0.8770920920154472, + "learning_rate": 1.999975323210715e-05, + "loss": 0.43526744842529297, + "step": 394 + }, + { + "epoch": 0.10489974770946753, + "grad_norm": 0.9824982196768539, + "learning_rate": 1.9999721422331154e-05, + "loss": 0.4097936749458313, + "step": 395 + }, + { + "epoch": 0.1051653166910105, + "grad_norm": 1.013432449022695, + "learning_rate": 1.9999687684726803e-05, + "loss": 0.4740130305290222, + "step": 396 + }, + { + "epoch": 0.10543088567255345, + "grad_norm": 0.9786752992542405, + "learning_rate": 1.9999652019300604e-05, + "loss": 0.43374374508857727, + "step": 397 + }, + { + "epoch": 0.1056964546540964, + "grad_norm": 0.9323415402935509, + "learning_rate": 1.999961442605943e-05, + "loss": 0.4423784911632538, + "step": 398 + }, + { + "epoch": 0.10596202363563936, + "grad_norm": 1.0497518439124596, + "learning_rate": 1.999957490501053e-05, + "loss": 0.4660544693470001, + "step": 399 + }, + { + "epoch": 0.10622759261718231, + "grad_norm": 1.11742327964835, + "learning_rate": 1.999953345616152e-05, + "loss": 0.4579896628856659, + "step": 400 + }, + { + "epoch": 0.10649316159872527, + "grad_norm": 1.0653029752390735, + "learning_rate": 1.9999490079520395e-05, + "loss": 0.4634096920490265, + "step": 401 + }, + { + "epoch": 0.10675873058026822, + "grad_norm": 0.9969566988589958, + "learning_rate": 1.9999444775095517e-05, + "loss": 0.45374077558517456, + "step": 402 + }, + { + "epoch": 0.10702429956181118, + "grad_norm": 1.1298291912896017, + "learning_rate": 1.9999397542895615e-05, + "loss": 0.49752670526504517, + "step": 403 + }, + { + "epoch": 0.10728986854335414, + "grad_norm": 1.049244919494092, + "learning_rate": 1.99993483829298e-05, + "loss": 0.4539335370063782, + "step": 404 + }, + { + "epoch": 0.10755543752489709, + "grad_norm": 1.0017841795942442, + "learning_rate": 1.999929729520755e-05, + "loss": 0.4665772616863251, + "step": 405 + }, + { + "epoch": 0.10782100650644005, + "grad_norm": 1.023688686658119, + "learning_rate": 1.9999244279738713e-05, + "loss": 0.4850832223892212, + "step": 406 + }, + { + "epoch": 0.108086575487983, + "grad_norm": 0.9960763191436038, + "learning_rate": 1.9999189336533508e-05, + "loss": 0.43974876403808594, + "step": 407 + }, + { + "epoch": 0.10835214446952596, + "grad_norm": 1.0378626233602128, + "learning_rate": 1.9999132465602526e-05, + "loss": 0.46823856234550476, + "step": 408 + }, + { + "epoch": 0.10861771345106891, + "grad_norm": 1.0461372802003532, + "learning_rate": 1.9999073666956734e-05, + "loss": 0.49704545736312866, + "step": 409 + }, + { + "epoch": 0.10888328243261187, + "grad_norm": 1.03380477635781, + "learning_rate": 1.999901294060747e-05, + "loss": 0.3863454759120941, + "step": 410 + }, + { + "epoch": 0.10914885141415483, + "grad_norm": 1.1280569204620268, + "learning_rate": 1.9998950286566438e-05, + "loss": 0.4903780221939087, + "step": 411 + }, + { + "epoch": 0.10941442039569778, + "grad_norm": 0.9546134462956446, + "learning_rate": 1.9998885704845716e-05, + "loss": 0.4312375485897064, + "step": 412 + }, + { + "epoch": 0.10967998937724074, + "grad_norm": 0.9382591225300354, + "learning_rate": 1.9998819195457756e-05, + "loss": 0.4350954294204712, + "step": 413 + }, + { + "epoch": 0.1099455583587837, + "grad_norm": 0.9201016144754837, + "learning_rate": 1.999875075841538e-05, + "loss": 0.4364873766899109, + "step": 414 + }, + { + "epoch": 0.11021112734032665, + "grad_norm": 0.9578414566062486, + "learning_rate": 1.999868039373178e-05, + "loss": 0.42079728841781616, + "step": 415 + }, + { + "epoch": 0.1104766963218696, + "grad_norm": 1.0011321946551845, + "learning_rate": 1.9998608101420527e-05, + "loss": 0.4396737515926361, + "step": 416 + }, + { + "epoch": 0.11074226530341257, + "grad_norm": 0.9922478693245596, + "learning_rate": 1.9998533881495552e-05, + "loss": 0.44765806198120117, + "step": 417 + }, + { + "epoch": 0.11100783428495552, + "grad_norm": 1.0219437952159112, + "learning_rate": 1.999845773397117e-05, + "loss": 0.46199291944503784, + "step": 418 + }, + { + "epoch": 0.11127340326649847, + "grad_norm": 0.9510961467421052, + "learning_rate": 1.9998379658862058e-05, + "loss": 0.44561129808425903, + "step": 419 + }, + { + "epoch": 0.11153897224804143, + "grad_norm": 1.0559368690309399, + "learning_rate": 1.9998299656183263e-05, + "loss": 0.46025681495666504, + "step": 420 + }, + { + "epoch": 0.11180454122958439, + "grad_norm": 0.9881679042322009, + "learning_rate": 1.999821772595022e-05, + "loss": 0.4408613443374634, + "step": 421 + }, + { + "epoch": 0.11207011021112734, + "grad_norm": 0.9620122842513851, + "learning_rate": 1.999813386817871e-05, + "loss": 0.4846842586994171, + "step": 422 + }, + { + "epoch": 0.11233567919267029, + "grad_norm": 0.9697081207450757, + "learning_rate": 1.999804808288491e-05, + "loss": 0.44503283500671387, + "step": 423 + }, + { + "epoch": 0.11260124817421326, + "grad_norm": 0.9687765160951803, + "learning_rate": 1.9997960370085355e-05, + "loss": 0.4090060293674469, + "step": 424 + }, + { + "epoch": 0.11286681715575621, + "grad_norm": 0.9575575943579401, + "learning_rate": 1.999787072979696e-05, + "loss": 0.43246471881866455, + "step": 425 + }, + { + "epoch": 0.11313238613729916, + "grad_norm": 1.001604978030575, + "learning_rate": 1.9997779162036996e-05, + "loss": 0.46283262968063354, + "step": 426 + }, + { + "epoch": 0.11339795511884213, + "grad_norm": 0.9108113962903395, + "learning_rate": 1.999768566682313e-05, + "loss": 0.3866165578365326, + "step": 427 + }, + { + "epoch": 0.11366352410038508, + "grad_norm": 0.9595506331685858, + "learning_rate": 1.9997590244173374e-05, + "loss": 0.4501144289970398, + "step": 428 + }, + { + "epoch": 0.11392909308192803, + "grad_norm": 0.9153639565172541, + "learning_rate": 1.9997492894106127e-05, + "loss": 0.43005290627479553, + "step": 429 + }, + { + "epoch": 0.11419466206347098, + "grad_norm": 0.9635360081712412, + "learning_rate": 1.9997393616640165e-05, + "loss": 0.4427964985370636, + "step": 430 + }, + { + "epoch": 0.11446023104501395, + "grad_norm": 1.0560533392763956, + "learning_rate": 1.999729241179462e-05, + "loss": 0.4690951108932495, + "step": 431 + }, + { + "epoch": 0.1147258000265569, + "grad_norm": 0.9559285214931015, + "learning_rate": 1.9997189279589003e-05, + "loss": 0.456949919462204, + "step": 432 + }, + { + "epoch": 0.11499136900809985, + "grad_norm": 0.9851459681291062, + "learning_rate": 1.99970842200432e-05, + "loss": 0.456052303314209, + "step": 433 + }, + { + "epoch": 0.11525693798964282, + "grad_norm": 0.9609923633405658, + "learning_rate": 1.9996977233177466e-05, + "loss": 0.43220120668411255, + "step": 434 + }, + { + "epoch": 0.11552250697118577, + "grad_norm": 0.9022181145862976, + "learning_rate": 1.9996868319012422e-05, + "loss": 0.4237494170665741, + "step": 435 + }, + { + "epoch": 0.11578807595272872, + "grad_norm": 1.1387519975876466, + "learning_rate": 1.9996757477569072e-05, + "loss": 0.4713878631591797, + "step": 436 + }, + { + "epoch": 0.11605364493427167, + "grad_norm": 1.026114633188765, + "learning_rate": 1.9996644708868776e-05, + "loss": 0.4561111330986023, + "step": 437 + }, + { + "epoch": 0.11631921391581464, + "grad_norm": 1.0425252904592188, + "learning_rate": 1.9996530012933285e-05, + "loss": 0.468253493309021, + "step": 438 + }, + { + "epoch": 0.11658478289735759, + "grad_norm": 0.9323050726416767, + "learning_rate": 1.9996413389784704e-05, + "loss": 0.4815019369125366, + "step": 439 + }, + { + "epoch": 0.11685035187890054, + "grad_norm": 0.9369313249225236, + "learning_rate": 1.9996294839445518e-05, + "loss": 0.4235987663269043, + "step": 440 + }, + { + "epoch": 0.1171159208604435, + "grad_norm": 0.9217309559918773, + "learning_rate": 1.999617436193858e-05, + "loss": 0.40562817454338074, + "step": 441 + }, + { + "epoch": 0.11738148984198646, + "grad_norm": 1.1384168500780398, + "learning_rate": 1.999605195728712e-05, + "loss": 0.424539715051651, + "step": 442 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.9616123874834243, + "learning_rate": 1.9995927625514736e-05, + "loss": 0.43677473068237305, + "step": 443 + }, + { + "epoch": 0.11791262780507236, + "grad_norm": 0.9761533315060044, + "learning_rate": 1.9995801366645396e-05, + "loss": 0.47325971722602844, + "step": 444 + }, + { + "epoch": 0.11817819678661533, + "grad_norm": 0.9447069768738408, + "learning_rate": 1.9995673180703443e-05, + "loss": 0.4206562638282776, + "step": 445 + }, + { + "epoch": 0.11844376576815828, + "grad_norm": 0.9743544240614231, + "learning_rate": 1.999554306771359e-05, + "loss": 0.4492834210395813, + "step": 446 + }, + { + "epoch": 0.11870933474970123, + "grad_norm": 1.0629000505790311, + "learning_rate": 1.9995411027700917e-05, + "loss": 0.4445284605026245, + "step": 447 + }, + { + "epoch": 0.1189749037312442, + "grad_norm": 0.9911650776890225, + "learning_rate": 1.9995277060690885e-05, + "loss": 0.4038352370262146, + "step": 448 + }, + { + "epoch": 0.11924047271278715, + "grad_norm": 0.9418518804089067, + "learning_rate": 1.9995141166709318e-05, + "loss": 0.4261324405670166, + "step": 449 + }, + { + "epoch": 0.1195060416943301, + "grad_norm": 1.067611227425969, + "learning_rate": 1.9995003345782416e-05, + "loss": 0.44187062978744507, + "step": 450 + }, + { + "epoch": 0.11977161067587305, + "grad_norm": 0.9191915914869351, + "learning_rate": 1.9994863597936752e-05, + "loss": 0.44672587513923645, + "step": 451 + }, + { + "epoch": 0.12003717965741602, + "grad_norm": 0.9882052007755191, + "learning_rate": 1.999472192319926e-05, + "loss": 0.44322314858436584, + "step": 452 + }, + { + "epoch": 0.12030274863895897, + "grad_norm": 0.9882289435866314, + "learning_rate": 1.9994578321597258e-05, + "loss": 0.4396611154079437, + "step": 453 + }, + { + "epoch": 0.12056831762050192, + "grad_norm": 0.9831868773412876, + "learning_rate": 1.9994432793158433e-05, + "loss": 0.4487733542919159, + "step": 454 + }, + { + "epoch": 0.12083388660204489, + "grad_norm": 0.9360753951175719, + "learning_rate": 1.999428533791084e-05, + "loss": 0.3969653248786926, + "step": 455 + }, + { + "epoch": 0.12109945558358784, + "grad_norm": 0.9662346637828156, + "learning_rate": 1.9994135955882906e-05, + "loss": 0.39312344789505005, + "step": 456 + }, + { + "epoch": 0.12136502456513079, + "grad_norm": 0.9019524086641805, + "learning_rate": 1.9993984647103425e-05, + "loss": 0.3979804217815399, + "step": 457 + }, + { + "epoch": 0.12163059354667374, + "grad_norm": 1.0970468981958466, + "learning_rate": 1.9993831411601573e-05, + "loss": 0.4430229365825653, + "step": 458 + }, + { + "epoch": 0.12189616252821671, + "grad_norm": 0.994492352252997, + "learning_rate": 1.9993676249406895e-05, + "loss": 0.4511718451976776, + "step": 459 + }, + { + "epoch": 0.12216173150975966, + "grad_norm": 1.091979336298699, + "learning_rate": 1.9993519160549298e-05, + "loss": 0.4686455726623535, + "step": 460 + }, + { + "epoch": 0.12242730049130261, + "grad_norm": 1.0158374042593608, + "learning_rate": 1.9993360145059073e-05, + "loss": 0.4501730501651764, + "step": 461 + }, + { + "epoch": 0.12269286947284558, + "grad_norm": 0.8530053413909426, + "learning_rate": 1.999319920296687e-05, + "loss": 0.40718767046928406, + "step": 462 + }, + { + "epoch": 0.12295843845438853, + "grad_norm": 1.1181007301257784, + "learning_rate": 1.9993036334303716e-05, + "loss": 0.47313761711120605, + "step": 463 + }, + { + "epoch": 0.12322400743593148, + "grad_norm": 0.9710975932515886, + "learning_rate": 1.9992871539101018e-05, + "loss": 0.47417378425598145, + "step": 464 + }, + { + "epoch": 0.12348957641747443, + "grad_norm": 0.9297582414898758, + "learning_rate": 1.999270481739054e-05, + "loss": 0.44206154346466064, + "step": 465 + }, + { + "epoch": 0.1237551453990174, + "grad_norm": 0.8745553533375581, + "learning_rate": 1.9992536169204427e-05, + "loss": 0.3800848722457886, + "step": 466 + }, + { + "epoch": 0.12402071438056035, + "grad_norm": 0.9337162704530373, + "learning_rate": 1.9992365594575194e-05, + "loss": 0.40339407324790955, + "step": 467 + }, + { + "epoch": 0.1242862833621033, + "grad_norm": 0.945328490567385, + "learning_rate": 1.999219309353572e-05, + "loss": 0.45280492305755615, + "step": 468 + }, + { + "epoch": 0.12455185234364627, + "grad_norm": 1.0911195899085697, + "learning_rate": 1.9992018666119266e-05, + "loss": 0.4600910544395447, + "step": 469 + }, + { + "epoch": 0.12481742132518922, + "grad_norm": 0.9649890056306747, + "learning_rate": 1.9991842312359458e-05, + "loss": 0.4475003480911255, + "step": 470 + }, + { + "epoch": 0.12508299030673217, + "grad_norm": 1.0493048741226816, + "learning_rate": 1.9991664032290297e-05, + "loss": 0.45377033948898315, + "step": 471 + }, + { + "epoch": 0.12534855928827512, + "grad_norm": 0.9964208438270044, + "learning_rate": 1.9991483825946147e-05, + "loss": 0.4397522509098053, + "step": 472 + }, + { + "epoch": 0.12561412826981808, + "grad_norm": 0.9309535511597795, + "learning_rate": 1.9991301693361756e-05, + "loss": 0.4258221387863159, + "step": 473 + }, + { + "epoch": 0.12587969725136103, + "grad_norm": 0.9120842027423138, + "learning_rate": 1.9991117634572234e-05, + "loss": 0.40272068977355957, + "step": 474 + }, + { + "epoch": 0.126145266232904, + "grad_norm": 0.8761120829975514, + "learning_rate": 1.9990931649613067e-05, + "loss": 0.3721206784248352, + "step": 475 + }, + { + "epoch": 0.12641083521444696, + "grad_norm": 0.9997105907953329, + "learning_rate": 1.9990743738520115e-05, + "loss": 0.4530203938484192, + "step": 476 + }, + { + "epoch": 0.1266764041959899, + "grad_norm": 0.999446109489731, + "learning_rate": 1.999055390132959e-05, + "loss": 0.4281614422798157, + "step": 477 + }, + { + "epoch": 0.12694197317753286, + "grad_norm": 1.3617327829527315, + "learning_rate": 1.999036213807811e-05, + "loss": 0.41965895891189575, + "step": 478 + }, + { + "epoch": 0.12720754215907581, + "grad_norm": 0.9525189428273744, + "learning_rate": 1.9990168448802633e-05, + "loss": 0.40055203437805176, + "step": 479 + }, + { + "epoch": 0.12747311114061877, + "grad_norm": 1.0868137290392272, + "learning_rate": 1.99899728335405e-05, + "loss": 0.4266522526741028, + "step": 480 + }, + { + "epoch": 0.12773868012216172, + "grad_norm": 1.028316280940819, + "learning_rate": 1.9989775292329425e-05, + "loss": 0.42291250824928284, + "step": 481 + }, + { + "epoch": 0.1280042491037047, + "grad_norm": 1.0319881226067493, + "learning_rate": 1.9989575825207494e-05, + "loss": 0.41346436738967896, + "step": 482 + }, + { + "epoch": 0.12826981808524765, + "grad_norm": 1.0162482863207583, + "learning_rate": 1.998937443221316e-05, + "loss": 0.4092825651168823, + "step": 483 + }, + { + "epoch": 0.1285353870667906, + "grad_norm": 0.9789070022917183, + "learning_rate": 1.998917111338525e-05, + "loss": 0.39763280749320984, + "step": 484 + }, + { + "epoch": 0.12880095604833355, + "grad_norm": 1.1639998102533433, + "learning_rate": 1.9988965868762956e-05, + "loss": 0.45523273944854736, + "step": 485 + }, + { + "epoch": 0.1290665250298765, + "grad_norm": 0.9737102573843942, + "learning_rate": 1.9988758698385854e-05, + "loss": 0.40181300044059753, + "step": 486 + }, + { + "epoch": 0.12933209401141946, + "grad_norm": 1.0269411713354706, + "learning_rate": 1.9988549602293884e-05, + "loss": 0.42487743496894836, + "step": 487 + }, + { + "epoch": 0.1295976629929624, + "grad_norm": 0.9805378587174307, + "learning_rate": 1.998833858052735e-05, + "loss": 0.41672298312187195, + "step": 488 + }, + { + "epoch": 0.1298632319745054, + "grad_norm": 0.9804335652831319, + "learning_rate": 1.998812563312694e-05, + "loss": 0.36750108003616333, + "step": 489 + }, + { + "epoch": 0.13012880095604834, + "grad_norm": 1.0991024476796578, + "learning_rate": 1.9987910760133712e-05, + "loss": 0.49290573596954346, + "step": 490 + }, + { + "epoch": 0.1303943699375913, + "grad_norm": 0.9956647709409898, + "learning_rate": 1.9987693961589084e-05, + "loss": 0.460039347410202, + "step": 491 + }, + { + "epoch": 0.13065993891913424, + "grad_norm": 1.269757897267166, + "learning_rate": 1.998747523753485e-05, + "loss": 0.4471668303012848, + "step": 492 + }, + { + "epoch": 0.1309255079006772, + "grad_norm": 0.9411513149719377, + "learning_rate": 1.9987254588013184e-05, + "loss": 0.395844966173172, + "step": 493 + }, + { + "epoch": 0.13119107688222015, + "grad_norm": 0.9546844808839872, + "learning_rate": 1.9987032013066623e-05, + "loss": 0.4465745985507965, + "step": 494 + }, + { + "epoch": 0.1314566458637631, + "grad_norm": 1.0929917252775374, + "learning_rate": 1.9986807512738075e-05, + "loss": 0.43123912811279297, + "step": 495 + }, + { + "epoch": 0.13172221484530608, + "grad_norm": 0.9741124155963404, + "learning_rate": 1.9986581087070824e-05, + "loss": 0.40066564083099365, + "step": 496 + }, + { + "epoch": 0.13198778382684903, + "grad_norm": 0.9421948045046618, + "learning_rate": 1.9986352736108515e-05, + "loss": 0.38514643907546997, + "step": 497 + }, + { + "epoch": 0.13225335280839198, + "grad_norm": 0.9713567699891517, + "learning_rate": 1.9986122459895182e-05, + "loss": 0.37397241592407227, + "step": 498 + }, + { + "epoch": 0.13251892178993493, + "grad_norm": 0.9697777712481016, + "learning_rate": 1.9985890258475215e-05, + "loss": 0.44865745306015015, + "step": 499 + }, + { + "epoch": 0.1327844907714779, + "grad_norm": 1.000823551239605, + "learning_rate": 1.9985656131893374e-05, + "loss": 0.4161406457424164, + "step": 500 + }, + { + "epoch": 0.13305005975302084, + "grad_norm": 1.049045844462056, + "learning_rate": 1.9985420080194804e-05, + "loss": 0.41364359855651855, + "step": 501 + }, + { + "epoch": 0.1333156287345638, + "grad_norm": 0.9766347522178017, + "learning_rate": 1.9985182103425007e-05, + "loss": 0.38466009497642517, + "step": 502 + }, + { + "epoch": 0.13358119771610677, + "grad_norm": 0.9820108788569575, + "learning_rate": 1.9984942201629868e-05, + "loss": 0.4189472794532776, + "step": 503 + }, + { + "epoch": 0.13384676669764972, + "grad_norm": 1.0124943582595707, + "learning_rate": 1.998470037485563e-05, + "loss": 0.4088754653930664, + "step": 504 + }, + { + "epoch": 0.13411233567919267, + "grad_norm": 0.9404621165531668, + "learning_rate": 1.9984456623148923e-05, + "loss": 0.4197084307670593, + "step": 505 + }, + { + "epoch": 0.13437790466073563, + "grad_norm": 1.022677047132229, + "learning_rate": 1.998421094655673e-05, + "loss": 0.4318644404411316, + "step": 506 + }, + { + "epoch": 0.13464347364227858, + "grad_norm": 0.9443470782499029, + "learning_rate": 1.9983963345126423e-05, + "loss": 0.38180238008499146, + "step": 507 + }, + { + "epoch": 0.13490904262382153, + "grad_norm": 0.9655473739081939, + "learning_rate": 1.9983713818905733e-05, + "loss": 0.38704103231430054, + "step": 508 + }, + { + "epoch": 0.13517461160536448, + "grad_norm": 1.050357567916831, + "learning_rate": 1.998346236794276e-05, + "loss": 0.4206693768501282, + "step": 509 + }, + { + "epoch": 0.13544018058690746, + "grad_norm": 1.1108901361228778, + "learning_rate": 1.9983208992285993e-05, + "loss": 0.42818987369537354, + "step": 510 + }, + { + "epoch": 0.1357057495684504, + "grad_norm": 1.0771548955106338, + "learning_rate": 1.9982953691984274e-05, + "loss": 0.44592660665512085, + "step": 511 + }, + { + "epoch": 0.13597131854999336, + "grad_norm": 1.006125968429414, + "learning_rate": 1.9982696467086815e-05, + "loss": 0.4272580146789551, + "step": 512 + }, + { + "epoch": 0.13623688753153632, + "grad_norm": 1.084212872761102, + "learning_rate": 1.9982437317643218e-05, + "loss": 0.4416295289993286, + "step": 513 + }, + { + "epoch": 0.13650245651307927, + "grad_norm": 1.1040865905907058, + "learning_rate": 1.998217624370343e-05, + "loss": 0.45108669996261597, + "step": 514 + }, + { + "epoch": 0.13676802549462222, + "grad_norm": 0.9866796372680723, + "learning_rate": 1.9981913245317802e-05, + "loss": 0.40311864018440247, + "step": 515 + }, + { + "epoch": 0.13703359447616517, + "grad_norm": 1.041531014011416, + "learning_rate": 1.9981648322537017e-05, + "loss": 0.4388020932674408, + "step": 516 + }, + { + "epoch": 0.13729916345770815, + "grad_norm": 1.069295153220874, + "learning_rate": 1.9981381475412162e-05, + "loss": 0.42741361260414124, + "step": 517 + }, + { + "epoch": 0.1375647324392511, + "grad_norm": 0.8562984414004653, + "learning_rate": 1.9981112703994677e-05, + "loss": 0.3766555190086365, + "step": 518 + }, + { + "epoch": 0.13783030142079405, + "grad_norm": 0.9297024970383198, + "learning_rate": 1.998084200833638e-05, + "loss": 0.38618308305740356, + "step": 519 + }, + { + "epoch": 0.138095870402337, + "grad_norm": 1.0033450202172107, + "learning_rate": 1.9980569388489457e-05, + "loss": 0.4553264379501343, + "step": 520 + }, + { + "epoch": 0.13836143938387996, + "grad_norm": 1.024202819723292, + "learning_rate": 1.9980294844506468e-05, + "loss": 0.44632673263549805, + "step": 521 + }, + { + "epoch": 0.1386270083654229, + "grad_norm": 1.0907023510727254, + "learning_rate": 1.998001837644033e-05, + "loss": 0.4285067617893219, + "step": 522 + }, + { + "epoch": 0.13889257734696586, + "grad_norm": 0.9721672428790065, + "learning_rate": 1.9979739984344365e-05, + "loss": 0.39360538125038147, + "step": 523 + }, + { + "epoch": 0.13915814632850884, + "grad_norm": 0.9475835393492287, + "learning_rate": 1.9979459668272226e-05, + "loss": 0.4007593095302582, + "step": 524 + }, + { + "epoch": 0.1394237153100518, + "grad_norm": 1.028990364637073, + "learning_rate": 1.9979177428277955e-05, + "loss": 0.40176767110824585, + "step": 525 + }, + { + "epoch": 0.13968928429159475, + "grad_norm": 1.0167293750004343, + "learning_rate": 1.9978893264415978e-05, + "loss": 0.4190528392791748, + "step": 526 + }, + { + "epoch": 0.1399548532731377, + "grad_norm": 0.9871913820335487, + "learning_rate": 1.9978607176741063e-05, + "loss": 0.4139288067817688, + "step": 527 + }, + { + "epoch": 0.14022042225468065, + "grad_norm": 0.8610694360554231, + "learning_rate": 1.9978319165308373e-05, + "loss": 0.3666151463985443, + "step": 528 + }, + { + "epoch": 0.1404859912362236, + "grad_norm": 1.016794526359022, + "learning_rate": 1.997802923017343e-05, + "loss": 0.44621142745018005, + "step": 529 + }, + { + "epoch": 0.14075156021776655, + "grad_norm": 0.9742602007181285, + "learning_rate": 1.9977737371392134e-05, + "loss": 0.4162977635860443, + "step": 530 + }, + { + "epoch": 0.14101712919930953, + "grad_norm": 1.0386051117102446, + "learning_rate": 1.997744358902075e-05, + "loss": 0.438882052898407, + "step": 531 + }, + { + "epoch": 0.14128269818085248, + "grad_norm": 0.9131334625730753, + "learning_rate": 1.997714788311591e-05, + "loss": 0.43381333351135254, + "step": 532 + }, + { + "epoch": 0.14154826716239544, + "grad_norm": 1.0341262373297713, + "learning_rate": 1.9976850253734633e-05, + "loss": 0.41925039887428284, + "step": 533 + }, + { + "epoch": 0.1418138361439384, + "grad_norm": 1.0366031704059997, + "learning_rate": 1.997655070093429e-05, + "loss": 0.40469998121261597, + "step": 534 + }, + { + "epoch": 0.14207940512548134, + "grad_norm": 1.069653848503876, + "learning_rate": 1.9976249224772638e-05, + "loss": 0.4252749979496002, + "step": 535 + }, + { + "epoch": 0.1423449741070243, + "grad_norm": 0.9131599330211423, + "learning_rate": 1.9975945825307788e-05, + "loss": 0.42437341809272766, + "step": 536 + }, + { + "epoch": 0.14261054308856724, + "grad_norm": 0.9295944144104017, + "learning_rate": 1.9975640502598243e-05, + "loss": 0.3435184955596924, + "step": 537 + }, + { + "epoch": 0.14287611207011022, + "grad_norm": 1.135805935036872, + "learning_rate": 1.9975333256702864e-05, + "loss": 0.4677535593509674, + "step": 538 + }, + { + "epoch": 0.14314168105165317, + "grad_norm": 0.9857610455714647, + "learning_rate": 1.9975024087680873e-05, + "loss": 0.3860551118850708, + "step": 539 + }, + { + "epoch": 0.14340725003319613, + "grad_norm": 1.0260051612127887, + "learning_rate": 1.9974712995591887e-05, + "loss": 0.4067271649837494, + "step": 540 + }, + { + "epoch": 0.14367281901473908, + "grad_norm": 1.0673102525592195, + "learning_rate": 1.9974399980495877e-05, + "loss": 0.42236536741256714, + "step": 541 + }, + { + "epoch": 0.14393838799628203, + "grad_norm": 0.9825710114440017, + "learning_rate": 1.9974085042453188e-05, + "loss": 0.45230624079704285, + "step": 542 + }, + { + "epoch": 0.14420395697782498, + "grad_norm": 1.0223761508252163, + "learning_rate": 1.997376818152453e-05, + "loss": 0.428194522857666, + "step": 543 + }, + { + "epoch": 0.14446952595936793, + "grad_norm": 1.0337438279048081, + "learning_rate": 1.9973449397771004e-05, + "loss": 0.40774789452552795, + "step": 544 + }, + { + "epoch": 0.1447350949409109, + "grad_norm": 0.9168779980285519, + "learning_rate": 1.9973128691254054e-05, + "loss": 0.4086815118789673, + "step": 545 + }, + { + "epoch": 0.14500066392245387, + "grad_norm": 0.9934439062572693, + "learning_rate": 1.997280606203552e-05, + "loss": 0.4045162796974182, + "step": 546 + }, + { + "epoch": 0.14526623290399682, + "grad_norm": 1.0110955437735047, + "learning_rate": 1.9972481510177594e-05, + "loss": 0.40463268756866455, + "step": 547 + }, + { + "epoch": 0.14553180188553977, + "grad_norm": 1.0029896014566093, + "learning_rate": 1.9972155035742847e-05, + "loss": 0.46733587980270386, + "step": 548 + }, + { + "epoch": 0.14579737086708272, + "grad_norm": 0.9683751197048177, + "learning_rate": 1.997182663879422e-05, + "loss": 0.45210930705070496, + "step": 549 + }, + { + "epoch": 0.14606293984862567, + "grad_norm": 0.9559484778346481, + "learning_rate": 1.9971496319395022e-05, + "loss": 0.39798587560653687, + "step": 550 + }, + { + "epoch": 0.14632850883016862, + "grad_norm": 1.0582410708312875, + "learning_rate": 1.9971164077608937e-05, + "loss": 0.4166080057621002, + "step": 551 + }, + { + "epoch": 0.1465940778117116, + "grad_norm": 0.99705391441119, + "learning_rate": 1.9970829913500017e-05, + "loss": 0.3995435833930969, + "step": 552 + }, + { + "epoch": 0.14685964679325456, + "grad_norm": 0.9693599664680953, + "learning_rate": 1.9970493827132686e-05, + "loss": 0.39335039258003235, + "step": 553 + }, + { + "epoch": 0.1471252157747975, + "grad_norm": 1.0653128556742777, + "learning_rate": 1.9970155818571733e-05, + "loss": 0.3923008441925049, + "step": 554 + }, + { + "epoch": 0.14739078475634046, + "grad_norm": 1.1000528384874784, + "learning_rate": 1.996981588788233e-05, + "loss": 0.42148759961128235, + "step": 555 + }, + { + "epoch": 0.1476563537378834, + "grad_norm": 0.9532704289154984, + "learning_rate": 1.9969474035130005e-05, + "loss": 0.36099517345428467, + "step": 556 + }, + { + "epoch": 0.14792192271942636, + "grad_norm": 0.9498609858415961, + "learning_rate": 1.9969130260380663e-05, + "loss": 0.39650559425354004, + "step": 557 + }, + { + "epoch": 0.14818749170096931, + "grad_norm": 0.9667452630427784, + "learning_rate": 1.9968784563700586e-05, + "loss": 0.36410078406333923, + "step": 558 + }, + { + "epoch": 0.1484530606825123, + "grad_norm": 1.002419821858965, + "learning_rate": 1.996843694515641e-05, + "loss": 0.41312888264656067, + "step": 559 + }, + { + "epoch": 0.14871862966405525, + "grad_norm": 1.1088153047335336, + "learning_rate": 1.9968087404815162e-05, + "loss": 0.3895263373851776, + "step": 560 + }, + { + "epoch": 0.1489841986455982, + "grad_norm": 1.2422388501205763, + "learning_rate": 1.9967735942744226e-05, + "loss": 0.4400597810745239, + "step": 561 + }, + { + "epoch": 0.14924976762714115, + "grad_norm": 1.1300700300497077, + "learning_rate": 1.9967382559011356e-05, + "loss": 0.36712852120399475, + "step": 562 + }, + { + "epoch": 0.1495153366086841, + "grad_norm": 1.0425502358891738, + "learning_rate": 1.9967027253684685e-05, + "loss": 0.4043564200401306, + "step": 563 + }, + { + "epoch": 0.14978090559022705, + "grad_norm": 1.101160625764444, + "learning_rate": 1.9966670026832707e-05, + "loss": 0.45233044028282166, + "step": 564 + }, + { + "epoch": 0.15004647457177, + "grad_norm": 1.3277254520379258, + "learning_rate": 1.9966310878524297e-05, + "loss": 0.441600501537323, + "step": 565 + }, + { + "epoch": 0.15031204355331299, + "grad_norm": 1.0833095900878238, + "learning_rate": 1.9965949808828687e-05, + "loss": 0.4268038868904114, + "step": 566 + }, + { + "epoch": 0.15057761253485594, + "grad_norm": 1.1492448156590855, + "learning_rate": 1.9965586817815494e-05, + "loss": 0.41927874088287354, + "step": 567 + }, + { + "epoch": 0.1508431815163989, + "grad_norm": 1.026170307581087, + "learning_rate": 1.9965221905554695e-05, + "loss": 0.41488781571388245, + "step": 568 + }, + { + "epoch": 0.15110875049794184, + "grad_norm": 0.9559142330236491, + "learning_rate": 1.9964855072116642e-05, + "loss": 0.3624749779701233, + "step": 569 + }, + { + "epoch": 0.1513743194794848, + "grad_norm": 1.254830306735622, + "learning_rate": 1.996448631757206e-05, + "loss": 0.45119866728782654, + "step": 570 + }, + { + "epoch": 0.15163988846102774, + "grad_norm": 1.095837461898702, + "learning_rate": 1.996411564199203e-05, + "loss": 0.41389739513397217, + "step": 571 + }, + { + "epoch": 0.1519054574425707, + "grad_norm": 0.9684460814064966, + "learning_rate": 1.996374304544802e-05, + "loss": 0.3640916347503662, + "step": 572 + }, + { + "epoch": 0.15217102642411368, + "grad_norm": 1.0711015344753547, + "learning_rate": 1.9963368528011867e-05, + "loss": 0.45648565888404846, + "step": 573 + }, + { + "epoch": 0.15243659540565663, + "grad_norm": 0.9722794055909949, + "learning_rate": 1.9962992089755765e-05, + "loss": 0.4335980713367462, + "step": 574 + }, + { + "epoch": 0.15270216438719958, + "grad_norm": 1.158400874054287, + "learning_rate": 1.996261373075229e-05, + "loss": 0.3908158540725708, + "step": 575 + }, + { + "epoch": 0.15296773336874253, + "grad_norm": 0.9311953954584888, + "learning_rate": 1.996223345107439e-05, + "loss": 0.36533305048942566, + "step": 576 + }, + { + "epoch": 0.15323330235028548, + "grad_norm": 0.9771467412652409, + "learning_rate": 1.9961851250795372e-05, + "loss": 0.407212495803833, + "step": 577 + }, + { + "epoch": 0.15349887133182843, + "grad_norm": 0.9988499065644934, + "learning_rate": 1.996146712998892e-05, + "loss": 0.4266315698623657, + "step": 578 + }, + { + "epoch": 0.1537644403133714, + "grad_norm": 0.9843108485081927, + "learning_rate": 1.9961081088729092e-05, + "loss": 0.3806581199169159, + "step": 579 + }, + { + "epoch": 0.15403000929491437, + "grad_norm": 0.9497423806639163, + "learning_rate": 1.9960693127090312e-05, + "loss": 0.40962716937065125, + "step": 580 + }, + { + "epoch": 0.15429557827645732, + "grad_norm": 0.94680923059909, + "learning_rate": 1.996030324514737e-05, + "loss": 0.4195394515991211, + "step": 581 + }, + { + "epoch": 0.15456114725800027, + "grad_norm": 1.0211843119224446, + "learning_rate": 1.995991144297543e-05, + "loss": 0.4366803765296936, + "step": 582 + }, + { + "epoch": 0.15482671623954322, + "grad_norm": 1.1779341722116263, + "learning_rate": 1.995951772065004e-05, + "loss": 0.44951680302619934, + "step": 583 + }, + { + "epoch": 0.15509228522108617, + "grad_norm": 1.1165714790353467, + "learning_rate": 1.9959122078247088e-05, + "loss": 0.42920851707458496, + "step": 584 + }, + { + "epoch": 0.15535785420262913, + "grad_norm": 1.3260467831670406, + "learning_rate": 1.9958724515842856e-05, + "loss": 0.3805098533630371, + "step": 585 + }, + { + "epoch": 0.15562342318417208, + "grad_norm": 1.1544212798945541, + "learning_rate": 1.995832503351399e-05, + "loss": 0.439333438873291, + "step": 586 + }, + { + "epoch": 0.15588899216571506, + "grad_norm": 0.9414235863159184, + "learning_rate": 1.9957923631337505e-05, + "loss": 0.38338547945022583, + "step": 587 + }, + { + "epoch": 0.156154561147258, + "grad_norm": 0.9711288321476074, + "learning_rate": 1.9957520309390786e-05, + "loss": 0.40603697299957275, + "step": 588 + }, + { + "epoch": 0.15642013012880096, + "grad_norm": 0.9468286962292546, + "learning_rate": 1.9957115067751594e-05, + "loss": 0.42816999554634094, + "step": 589 + }, + { + "epoch": 0.1566856991103439, + "grad_norm": 0.979497417166178, + "learning_rate": 1.9956707906498046e-05, + "loss": 0.42367884516716003, + "step": 590 + }, + { + "epoch": 0.15695126809188686, + "grad_norm": 1.1158588594509518, + "learning_rate": 1.995629882570864e-05, + "loss": 0.4349297881126404, + "step": 591 + }, + { + "epoch": 0.15721683707342982, + "grad_norm": 0.9762108745852242, + "learning_rate": 1.995588782546225e-05, + "loss": 0.37990960478782654, + "step": 592 + }, + { + "epoch": 0.15748240605497277, + "grad_norm": 0.9495653219493333, + "learning_rate": 1.9955474905838102e-05, + "loss": 0.4085468649864197, + "step": 593 + }, + { + "epoch": 0.15774797503651575, + "grad_norm": 0.9419429879365407, + "learning_rate": 1.995506006691581e-05, + "loss": 0.41362464427948, + "step": 594 + }, + { + "epoch": 0.1580135440180587, + "grad_norm": 1.002559702640921, + "learning_rate": 1.9954643308775342e-05, + "loss": 0.3830018937587738, + "step": 595 + }, + { + "epoch": 0.15827911299960165, + "grad_norm": 1.1505182326275074, + "learning_rate": 1.995422463149705e-05, + "loss": 0.48350822925567627, + "step": 596 + }, + { + "epoch": 0.1585446819811446, + "grad_norm": 0.9889824166630486, + "learning_rate": 1.995380403516165e-05, + "loss": 0.4215185344219208, + "step": 597 + }, + { + "epoch": 0.15881025096268755, + "grad_norm": 1.06826056700577, + "learning_rate": 1.9953381519850224e-05, + "loss": 0.42061948776245117, + "step": 598 + }, + { + "epoch": 0.1590758199442305, + "grad_norm": 1.032451381790901, + "learning_rate": 1.995295708564423e-05, + "loss": 0.38956254720687866, + "step": 599 + }, + { + "epoch": 0.15934138892577346, + "grad_norm": 1.0492553607775368, + "learning_rate": 1.9952530732625492e-05, + "loss": 0.3864685893058777, + "step": 600 + }, + { + "epoch": 0.15960695790731644, + "grad_norm": 0.9770856461072062, + "learning_rate": 1.9952102460876214e-05, + "loss": 0.395724356174469, + "step": 601 + }, + { + "epoch": 0.1598725268888594, + "grad_norm": 1.04245602393598, + "learning_rate": 1.995167227047895e-05, + "loss": 0.4220300316810608, + "step": 602 + }, + { + "epoch": 0.16013809587040234, + "grad_norm": 1.1406615370546667, + "learning_rate": 1.9951240161516643e-05, + "loss": 0.4129142165184021, + "step": 603 + }, + { + "epoch": 0.1604036648519453, + "grad_norm": 0.983753356740355, + "learning_rate": 1.9950806134072595e-05, + "loss": 0.3951375484466553, + "step": 604 + }, + { + "epoch": 0.16066923383348825, + "grad_norm": 1.0214548083454909, + "learning_rate": 1.9950370188230486e-05, + "loss": 0.4117582142353058, + "step": 605 + }, + { + "epoch": 0.1609348028150312, + "grad_norm": 1.0340746201961049, + "learning_rate": 1.994993232407436e-05, + "loss": 0.3920668363571167, + "step": 606 + }, + { + "epoch": 0.16120037179657415, + "grad_norm": 0.9768399206450091, + "learning_rate": 1.9949492541688626e-05, + "loss": 0.3756999373435974, + "step": 607 + }, + { + "epoch": 0.16146594077811713, + "grad_norm": 1.0034054922110034, + "learning_rate": 1.9949050841158078e-05, + "loss": 0.41009610891342163, + "step": 608 + }, + { + "epoch": 0.16173150975966008, + "grad_norm": 0.9847346075479474, + "learning_rate": 1.994860722256786e-05, + "loss": 0.3986571729183197, + "step": 609 + }, + { + "epoch": 0.16199707874120303, + "grad_norm": 0.9978440495541314, + "learning_rate": 1.994816168600351e-05, + "loss": 0.3903341591358185, + "step": 610 + }, + { + "epoch": 0.16226264772274598, + "grad_norm": 0.9992231775305654, + "learning_rate": 1.994771423155091e-05, + "loss": 0.39725261926651, + "step": 611 + }, + { + "epoch": 0.16252821670428894, + "grad_norm": 0.9446936558476315, + "learning_rate": 1.994726485929633e-05, + "loss": 0.39461129903793335, + "step": 612 + }, + { + "epoch": 0.1627937856858319, + "grad_norm": 1.0162077284831286, + "learning_rate": 1.99468135693264e-05, + "loss": 0.41346144676208496, + "step": 613 + }, + { + "epoch": 0.16305935466737484, + "grad_norm": 1.0305116850266922, + "learning_rate": 1.9946360361728127e-05, + "loss": 0.41148197650909424, + "step": 614 + }, + { + "epoch": 0.16332492364891782, + "grad_norm": 0.9678436330540818, + "learning_rate": 1.9945905236588884e-05, + "loss": 0.38204139471054077, + "step": 615 + }, + { + "epoch": 0.16359049263046077, + "grad_norm": 0.9830320911733957, + "learning_rate": 1.9945448193996412e-05, + "loss": 0.41496896743774414, + "step": 616 + }, + { + "epoch": 0.16385606161200372, + "grad_norm": 0.9327494941136337, + "learning_rate": 1.994498923403882e-05, + "loss": 0.38998982310295105, + "step": 617 + }, + { + "epoch": 0.16412163059354667, + "grad_norm": 1.0310759290486786, + "learning_rate": 1.99445283568046e-05, + "loss": 0.39018991589546204, + "step": 618 + }, + { + "epoch": 0.16438719957508963, + "grad_norm": 1.1133251353738367, + "learning_rate": 1.9944065562382594e-05, + "loss": 0.41579991579055786, + "step": 619 + }, + { + "epoch": 0.16465276855663258, + "grad_norm": 1.1413714641323347, + "learning_rate": 1.9943600850862027e-05, + "loss": 0.426283061504364, + "step": 620 + }, + { + "epoch": 0.16491833753817553, + "grad_norm": 1.0537239280428552, + "learning_rate": 1.9943134222332493e-05, + "loss": 0.418672651052475, + "step": 621 + }, + { + "epoch": 0.1651839065197185, + "grad_norm": 1.0177048245128393, + "learning_rate": 1.9942665676883946e-05, + "loss": 0.4014776349067688, + "step": 622 + }, + { + "epoch": 0.16544947550126146, + "grad_norm": 0.9703989792649265, + "learning_rate": 1.994219521460672e-05, + "loss": 0.3714776933193207, + "step": 623 + }, + { + "epoch": 0.1657150444828044, + "grad_norm": 1.005321267739283, + "learning_rate": 1.9941722835591514e-05, + "loss": 0.39415785670280457, + "step": 624 + }, + { + "epoch": 0.16598061346434737, + "grad_norm": 1.739817458909074, + "learning_rate": 1.9941248539929395e-05, + "loss": 0.3706223964691162, + "step": 625 + }, + { + "epoch": 0.16624618244589032, + "grad_norm": 0.9887487099192142, + "learning_rate": 1.9940772327711807e-05, + "loss": 0.4167429208755493, + "step": 626 + }, + { + "epoch": 0.16651175142743327, + "grad_norm": 1.0502993213264278, + "learning_rate": 1.9940294199030553e-05, + "loss": 0.38234227895736694, + "step": 627 + }, + { + "epoch": 0.16677732040897622, + "grad_norm": 0.9929957655695576, + "learning_rate": 1.9939814153977813e-05, + "loss": 0.4139519929885864, + "step": 628 + }, + { + "epoch": 0.1670428893905192, + "grad_norm": 1.0428716869119874, + "learning_rate": 1.9939332192646136e-05, + "loss": 0.44490402936935425, + "step": 629 + }, + { + "epoch": 0.16730845837206215, + "grad_norm": 0.9723220719956404, + "learning_rate": 1.993884831512843e-05, + "loss": 0.3870658278465271, + "step": 630 + }, + { + "epoch": 0.1675740273536051, + "grad_norm": 0.9337218443909966, + "learning_rate": 1.993836252151799e-05, + "loss": 0.3308948278427124, + "step": 631 + }, + { + "epoch": 0.16783959633514806, + "grad_norm": 1.1119638169858157, + "learning_rate": 1.993787481190847e-05, + "loss": 0.3727487623691559, + "step": 632 + }, + { + "epoch": 0.168105165316691, + "grad_norm": 1.0025380900585623, + "learning_rate": 1.9937385186393888e-05, + "loss": 0.4277465343475342, + "step": 633 + }, + { + "epoch": 0.16837073429823396, + "grad_norm": 1.2120120873899203, + "learning_rate": 1.9936893645068647e-05, + "loss": 0.4276485741138458, + "step": 634 + }, + { + "epoch": 0.1686363032797769, + "grad_norm": 1.000070161461063, + "learning_rate": 1.9936400188027502e-05, + "loss": 0.374578058719635, + "step": 635 + }, + { + "epoch": 0.1689018722613199, + "grad_norm": 1.113556890943216, + "learning_rate": 1.993590481536559e-05, + "loss": 0.4583400785923004, + "step": 636 + }, + { + "epoch": 0.16916744124286284, + "grad_norm": 0.9731147624235688, + "learning_rate": 1.9935407527178417e-05, + "loss": 0.3734489679336548, + "step": 637 + }, + { + "epoch": 0.1694330102244058, + "grad_norm": 1.0110441212525507, + "learning_rate": 1.9934908323561846e-05, + "loss": 0.39524513483047485, + "step": 638 + }, + { + "epoch": 0.16969857920594875, + "grad_norm": 1.0264447655460065, + "learning_rate": 1.9934407204612124e-05, + "loss": 0.42300352454185486, + "step": 639 + }, + { + "epoch": 0.1699641481874917, + "grad_norm": 0.9950374891978715, + "learning_rate": 1.9933904170425858e-05, + "loss": 0.4152276813983917, + "step": 640 + }, + { + "epoch": 0.17022971716903465, + "grad_norm": 1.230783330329369, + "learning_rate": 1.9933399221100026e-05, + "loss": 0.43046653270721436, + "step": 641 + }, + { + "epoch": 0.1704952861505776, + "grad_norm": 1.0095783418631343, + "learning_rate": 1.993289235673198e-05, + "loss": 0.4134339392185211, + "step": 642 + }, + { + "epoch": 0.17076085513212058, + "grad_norm": 1.0051407398693462, + "learning_rate": 1.9932383577419432e-05, + "loss": 0.44028693437576294, + "step": 643 + }, + { + "epoch": 0.17102642411366353, + "grad_norm": 1.0208746920457954, + "learning_rate": 1.9931872883260473e-05, + "loss": 0.3790222704410553, + "step": 644 + }, + { + "epoch": 0.17129199309520649, + "grad_norm": 1.041462978505965, + "learning_rate": 1.9931360274353556e-05, + "loss": 0.3683086633682251, + "step": 645 + }, + { + "epoch": 0.17155756207674944, + "grad_norm": 1.0400069352454702, + "learning_rate": 1.993084575079751e-05, + "loss": 0.3630594313144684, + "step": 646 + }, + { + "epoch": 0.1718231310582924, + "grad_norm": 1.0694046561659416, + "learning_rate": 1.993032931269153e-05, + "loss": 0.4398641884326935, + "step": 647 + }, + { + "epoch": 0.17208870003983534, + "grad_norm": 1.107156801944608, + "learning_rate": 1.992981096013517e-05, + "loss": 0.42222845554351807, + "step": 648 + }, + { + "epoch": 0.1723542690213783, + "grad_norm": 1.043160064840446, + "learning_rate": 1.992929069322837e-05, + "loss": 0.38966643810272217, + "step": 649 + }, + { + "epoch": 0.17261983800292127, + "grad_norm": 1.0607803195691352, + "learning_rate": 1.992876851207143e-05, + "loss": 0.4394804835319519, + "step": 650 + }, + { + "epoch": 0.17288540698446422, + "grad_norm": 0.9714467718451273, + "learning_rate": 1.9928244416765022e-05, + "loss": 0.3475287854671478, + "step": 651 + }, + { + "epoch": 0.17315097596600718, + "grad_norm": 0.9848879046616053, + "learning_rate": 1.992771840741018e-05, + "loss": 0.40047168731689453, + "step": 652 + }, + { + "epoch": 0.17341654494755013, + "grad_norm": 1.0744593937096147, + "learning_rate": 1.9927190484108315e-05, + "loss": 0.4028981328010559, + "step": 653 + }, + { + "epoch": 0.17368211392909308, + "grad_norm": 1.010491020672817, + "learning_rate": 1.9926660646961208e-05, + "loss": 0.3891482949256897, + "step": 654 + }, + { + "epoch": 0.17394768291063603, + "grad_norm": 1.1163232689680433, + "learning_rate": 1.9926128896071e-05, + "loss": 0.4570680856704712, + "step": 655 + }, + { + "epoch": 0.17421325189217898, + "grad_norm": 0.9509061944047602, + "learning_rate": 1.992559523154021e-05, + "loss": 0.392758309841156, + "step": 656 + }, + { + "epoch": 0.17447882087372196, + "grad_norm": 0.9648168194829144, + "learning_rate": 1.992505965347172e-05, + "loss": 0.39552047848701477, + "step": 657 + }, + { + "epoch": 0.17474438985526491, + "grad_norm": 1.045434666464082, + "learning_rate": 1.992452216196879e-05, + "loss": 0.4412619173526764, + "step": 658 + }, + { + "epoch": 0.17500995883680787, + "grad_norm": 1.033655605856329, + "learning_rate": 1.9923982757135028e-05, + "loss": 0.4075942635536194, + "step": 659 + }, + { + "epoch": 0.17527552781835082, + "grad_norm": 1.0660210414475448, + "learning_rate": 1.9923441439074434e-05, + "loss": 0.44615018367767334, + "step": 660 + }, + { + "epoch": 0.17554109679989377, + "grad_norm": 0.9504988883268379, + "learning_rate": 1.992289820789137e-05, + "loss": 0.3957441449165344, + "step": 661 + }, + { + "epoch": 0.17580666578143672, + "grad_norm": 0.9513339400965243, + "learning_rate": 1.992235306369056e-05, + "loss": 0.4014820158481598, + "step": 662 + }, + { + "epoch": 0.17607223476297967, + "grad_norm": 0.9988043316582222, + "learning_rate": 1.9921806006577102e-05, + "loss": 0.39478158950805664, + "step": 663 + }, + { + "epoch": 0.17633780374452265, + "grad_norm": 1.0278124558587338, + "learning_rate": 1.9921257036656463e-05, + "loss": 0.45742082595825195, + "step": 664 + }, + { + "epoch": 0.1766033727260656, + "grad_norm": 0.9674516471555401, + "learning_rate": 1.9920706154034477e-05, + "loss": 0.36519041657447815, + "step": 665 + }, + { + "epoch": 0.17686894170760856, + "grad_norm": 1.0086354363577679, + "learning_rate": 1.992015335881735e-05, + "loss": 0.40599358081817627, + "step": 666 + }, + { + "epoch": 0.1771345106891515, + "grad_norm": 0.958585892866014, + "learning_rate": 1.991959865111165e-05, + "loss": 0.4064781367778778, + "step": 667 + }, + { + "epoch": 0.17740007967069446, + "grad_norm": 0.9430583774727941, + "learning_rate": 1.991904203102432e-05, + "loss": 0.4076484143733978, + "step": 668 + }, + { + "epoch": 0.1776656486522374, + "grad_norm": 1.1044553051326549, + "learning_rate": 1.9918483498662678e-05, + "loss": 0.42157143354415894, + "step": 669 + }, + { + "epoch": 0.17793121763378036, + "grad_norm": 1.005923050768092, + "learning_rate": 1.9917923054134388e-05, + "loss": 0.3814900517463684, + "step": 670 + }, + { + "epoch": 0.17819678661532334, + "grad_norm": 1.0156953904207233, + "learning_rate": 1.9917360697547506e-05, + "loss": 0.4211175739765167, + "step": 671 + }, + { + "epoch": 0.1784623555968663, + "grad_norm": 1.0530805044024834, + "learning_rate": 1.991679642901045e-05, + "loss": 0.3975893259048462, + "step": 672 + }, + { + "epoch": 0.17872792457840925, + "grad_norm": 0.9633270935214763, + "learning_rate": 1.9916230248631993e-05, + "loss": 0.36090826988220215, + "step": 673 + }, + { + "epoch": 0.1789934935599522, + "grad_norm": 0.9408638333666679, + "learning_rate": 1.99156621565213e-05, + "loss": 0.36511334776878357, + "step": 674 + }, + { + "epoch": 0.17925906254149515, + "grad_norm": 1.0839117569759185, + "learning_rate": 1.9915092152787888e-05, + "loss": 0.4131924510002136, + "step": 675 + }, + { + "epoch": 0.1795246315230381, + "grad_norm": 1.1407281463751517, + "learning_rate": 1.9914520237541644e-05, + "loss": 0.4283728301525116, + "step": 676 + }, + { + "epoch": 0.17979020050458105, + "grad_norm": 0.9751873028047018, + "learning_rate": 1.991394641089283e-05, + "loss": 0.3855544924736023, + "step": 677 + }, + { + "epoch": 0.18005576948612403, + "grad_norm": 1.3517309919327671, + "learning_rate": 1.9913370672952074e-05, + "loss": 0.41288501024246216, + "step": 678 + }, + { + "epoch": 0.180321338467667, + "grad_norm": 1.1127679640996702, + "learning_rate": 1.9912793023830365e-05, + "loss": 0.3824073076248169, + "step": 679 + }, + { + "epoch": 0.18058690744920994, + "grad_norm": 1.0055812841256684, + "learning_rate": 1.9912213463639077e-05, + "loss": 0.39005106687545776, + "step": 680 + }, + { + "epoch": 0.1808524764307529, + "grad_norm": 1.0115332151563563, + "learning_rate": 1.9911631992489933e-05, + "loss": 0.3521374464035034, + "step": 681 + }, + { + "epoch": 0.18111804541229584, + "grad_norm": 0.983790464571211, + "learning_rate": 1.9911048610495037e-05, + "loss": 0.337347149848938, + "step": 682 + }, + { + "epoch": 0.1813836143938388, + "grad_norm": 1.1534370397304132, + "learning_rate": 1.9910463317766864e-05, + "loss": 0.4349983334541321, + "step": 683 + }, + { + "epoch": 0.18164918337538175, + "grad_norm": 1.059114838428009, + "learning_rate": 1.9909876114418242e-05, + "loss": 0.3783540427684784, + "step": 684 + }, + { + "epoch": 0.18191475235692472, + "grad_norm": 1.0050293498117582, + "learning_rate": 1.9909287000562383e-05, + "loss": 0.4065130054950714, + "step": 685 + }, + { + "epoch": 0.18218032133846768, + "grad_norm": 1.0122618604087057, + "learning_rate": 1.990869597631286e-05, + "loss": 0.3876315653324127, + "step": 686 + }, + { + "epoch": 0.18244589032001063, + "grad_norm": 0.9622962910168786, + "learning_rate": 1.9908103041783615e-05, + "loss": 0.3716024160385132, + "step": 687 + }, + { + "epoch": 0.18271145930155358, + "grad_norm": 1.086778230300176, + "learning_rate": 1.990750819708896e-05, + "loss": 0.4096733331680298, + "step": 688 + }, + { + "epoch": 0.18297702828309653, + "grad_norm": 1.131269280292305, + "learning_rate": 1.9906911442343567e-05, + "loss": 0.41432395577430725, + "step": 689 + }, + { + "epoch": 0.18324259726463948, + "grad_norm": 1.1182736792418642, + "learning_rate": 1.9906312777662493e-05, + "loss": 0.3934200406074524, + "step": 690 + }, + { + "epoch": 0.18350816624618244, + "grad_norm": 1.0493015785833109, + "learning_rate": 1.9905712203161148e-05, + "loss": 0.4246784746646881, + "step": 691 + }, + { + "epoch": 0.18377373522772542, + "grad_norm": 1.1362836227785695, + "learning_rate": 1.9905109718955323e-05, + "loss": 0.40027567744255066, + "step": 692 + }, + { + "epoch": 0.18403930420926837, + "grad_norm": 1.056262242708622, + "learning_rate": 1.990450532516116e-05, + "loss": 0.4162583351135254, + "step": 693 + }, + { + "epoch": 0.18430487319081132, + "grad_norm": 1.05760814074371, + "learning_rate": 1.990389902189518e-05, + "loss": 0.4133074879646301, + "step": 694 + }, + { + "epoch": 0.18457044217235427, + "grad_norm": 1.0438921885629904, + "learning_rate": 1.9903290809274277e-05, + "loss": 0.333192378282547, + "step": 695 + }, + { + "epoch": 0.18483601115389722, + "grad_norm": 0.9814281867123515, + "learning_rate": 1.9902680687415704e-05, + "loss": 0.39349496364593506, + "step": 696 + }, + { + "epoch": 0.18510158013544017, + "grad_norm": 1.0366332083029342, + "learning_rate": 1.9902068656437086e-05, + "loss": 0.39678412675857544, + "step": 697 + }, + { + "epoch": 0.18536714911698313, + "grad_norm": 1.0003960978434148, + "learning_rate": 1.9901454716456415e-05, + "loss": 0.3553932011127472, + "step": 698 + }, + { + "epoch": 0.18563271809852608, + "grad_norm": 1.0876315802223169, + "learning_rate": 1.990083886759205e-05, + "loss": 0.4264630079269409, + "step": 699 + }, + { + "epoch": 0.18589828708006906, + "grad_norm": 1.0135520655053032, + "learning_rate": 1.9900221109962726e-05, + "loss": 0.3883950412273407, + "step": 700 + }, + { + "epoch": 0.186163856061612, + "grad_norm": 1.0408639715408188, + "learning_rate": 1.989960144368753e-05, + "loss": 0.38465407490730286, + "step": 701 + }, + { + "epoch": 0.18642942504315496, + "grad_norm": 2.2198594223984065, + "learning_rate": 1.9898979868885933e-05, + "loss": 0.39897871017456055, + "step": 702 + }, + { + "epoch": 0.1866949940246979, + "grad_norm": 1.120873004114704, + "learning_rate": 1.9898356385677762e-05, + "loss": 0.4386023283004761, + "step": 703 + }, + { + "epoch": 0.18696056300624087, + "grad_norm": 1.0254606123190075, + "learning_rate": 1.989773099418322e-05, + "loss": 0.42621874809265137, + "step": 704 + }, + { + "epoch": 0.18722613198778382, + "grad_norm": 1.0153284696458207, + "learning_rate": 1.9897103694522877e-05, + "loss": 0.3811546266078949, + "step": 705 + }, + { + "epoch": 0.18749170096932677, + "grad_norm": 1.0634877610237485, + "learning_rate": 1.989647448681767e-05, + "loss": 0.4018982946872711, + "step": 706 + }, + { + "epoch": 0.18775726995086975, + "grad_norm": 1.0316038713106725, + "learning_rate": 1.9895843371188897e-05, + "loss": 0.3920126259326935, + "step": 707 + }, + { + "epoch": 0.1880228389324127, + "grad_norm": 0.9767495366810068, + "learning_rate": 1.9895210347758233e-05, + "loss": 0.3598487973213196, + "step": 708 + }, + { + "epoch": 0.18828840791395565, + "grad_norm": 1.0286682270198635, + "learning_rate": 1.9894575416647717e-05, + "loss": 0.4204316735267639, + "step": 709 + }, + { + "epoch": 0.1885539768954986, + "grad_norm": 0.9653709480495668, + "learning_rate": 1.9893938577979755e-05, + "loss": 0.33814263343811035, + "step": 710 + }, + { + "epoch": 0.18881954587704156, + "grad_norm": 0.9588770367914977, + "learning_rate": 1.9893299831877124e-05, + "loss": 0.3788227140903473, + "step": 711 + }, + { + "epoch": 0.1890851148585845, + "grad_norm": 0.9974371582936609, + "learning_rate": 1.989265917846297e-05, + "loss": 0.38141176104545593, + "step": 712 + }, + { + "epoch": 0.18935068384012746, + "grad_norm": 1.0051109402301954, + "learning_rate": 1.9892016617860793e-05, + "loss": 0.3757280707359314, + "step": 713 + }, + { + "epoch": 0.18961625282167044, + "grad_norm": 0.9863956856856875, + "learning_rate": 1.989137215019448e-05, + "loss": 0.37819087505340576, + "step": 714 + }, + { + "epoch": 0.1898818218032134, + "grad_norm": 1.1797000402703188, + "learning_rate": 1.9890725775588277e-05, + "loss": 0.46046000719070435, + "step": 715 + }, + { + "epoch": 0.19014739078475634, + "grad_norm": 0.9967163493181064, + "learning_rate": 1.9890077494166792e-05, + "loss": 0.33967363834381104, + "step": 716 + }, + { + "epoch": 0.1904129597662993, + "grad_norm": 0.9620841339155507, + "learning_rate": 1.988942730605501e-05, + "loss": 0.36672675609588623, + "step": 717 + }, + { + "epoch": 0.19067852874784225, + "grad_norm": 1.0666183498740949, + "learning_rate": 1.9888775211378278e-05, + "loss": 0.38705015182495117, + "step": 718 + }, + { + "epoch": 0.1909440977293852, + "grad_norm": 1.0696051052523068, + "learning_rate": 1.9888121210262313e-05, + "loss": 0.35257095098495483, + "step": 719 + }, + { + "epoch": 0.19120966671092815, + "grad_norm": 1.0337108803934987, + "learning_rate": 1.9887465302833194e-05, + "loss": 0.3803965449333191, + "step": 720 + }, + { + "epoch": 0.19147523569247113, + "grad_norm": 1.0097965015220993, + "learning_rate": 1.988680748921738e-05, + "loss": 0.38166487216949463, + "step": 721 + }, + { + "epoch": 0.19174080467401408, + "grad_norm": 0.971159209120872, + "learning_rate": 1.988614776954169e-05, + "loss": 0.4017483592033386, + "step": 722 + }, + { + "epoch": 0.19200637365555703, + "grad_norm": 1.0651840937747212, + "learning_rate": 1.98854861439333e-05, + "loss": 0.4343035817146301, + "step": 723 + }, + { + "epoch": 0.19227194263709999, + "grad_norm": 1.0527178531986199, + "learning_rate": 1.9884822612519773e-05, + "loss": 0.4017031192779541, + "step": 724 + }, + { + "epoch": 0.19253751161864294, + "grad_norm": 0.9558335625340557, + "learning_rate": 1.988415717542903e-05, + "loss": 0.32294636964797974, + "step": 725 + }, + { + "epoch": 0.1928030806001859, + "grad_norm": 1.018550638071552, + "learning_rate": 1.988348983278935e-05, + "loss": 0.34661561250686646, + "step": 726 + }, + { + "epoch": 0.19306864958172884, + "grad_norm": 1.1264464061553692, + "learning_rate": 1.98828205847294e-05, + "loss": 0.3588724434375763, + "step": 727 + }, + { + "epoch": 0.19333421856327182, + "grad_norm": 1.151476031768393, + "learning_rate": 1.9882149431378194e-05, + "loss": 0.45439180731773376, + "step": 728 + }, + { + "epoch": 0.19359978754481477, + "grad_norm": 1.092854672146059, + "learning_rate": 1.988147637286513e-05, + "loss": 0.3916742205619812, + "step": 729 + }, + { + "epoch": 0.19386535652635772, + "grad_norm": 1.1073017625666908, + "learning_rate": 1.988080140931996e-05, + "loss": 0.3838115334510803, + "step": 730 + }, + { + "epoch": 0.19413092550790068, + "grad_norm": 1.0305888563782257, + "learning_rate": 1.9880124540872813e-05, + "loss": 0.3803096413612366, + "step": 731 + }, + { + "epoch": 0.19439649448944363, + "grad_norm": 1.0697488639709387, + "learning_rate": 1.987944576765418e-05, + "loss": 0.4180675446987152, + "step": 732 + }, + { + "epoch": 0.19466206347098658, + "grad_norm": 0.968492149308095, + "learning_rate": 1.987876508979492e-05, + "loss": 0.34485924243927, + "step": 733 + }, + { + "epoch": 0.19492763245252953, + "grad_norm": 1.0301319893667387, + "learning_rate": 1.987808250742626e-05, + "loss": 0.3696223795413971, + "step": 734 + }, + { + "epoch": 0.1951932014340725, + "grad_norm": 1.0070871597151176, + "learning_rate": 1.9877398020679796e-05, + "loss": 0.39920324087142944, + "step": 735 + }, + { + "epoch": 0.19545877041561546, + "grad_norm": 0.9772548764362861, + "learning_rate": 1.987671162968748e-05, + "loss": 0.33534419536590576, + "step": 736 + }, + { + "epoch": 0.19572433939715841, + "grad_norm": 0.955184588375953, + "learning_rate": 1.9876023334581657e-05, + "loss": 0.3698185682296753, + "step": 737 + }, + { + "epoch": 0.19598990837870137, + "grad_norm": 1.0108475553340988, + "learning_rate": 1.9875333135495e-05, + "loss": 0.37388375401496887, + "step": 738 + }, + { + "epoch": 0.19625547736024432, + "grad_norm": 0.9685434293396273, + "learning_rate": 1.9874641032560594e-05, + "loss": 0.3285469114780426, + "step": 739 + }, + { + "epoch": 0.19652104634178727, + "grad_norm": 1.01794140535256, + "learning_rate": 1.9873947025911854e-05, + "loss": 0.3539549708366394, + "step": 740 + }, + { + "epoch": 0.19678661532333022, + "grad_norm": 1.0943847325994938, + "learning_rate": 1.9873251115682577e-05, + "loss": 0.4707021117210388, + "step": 741 + }, + { + "epoch": 0.1970521843048732, + "grad_norm": 0.9783865509799976, + "learning_rate": 1.987255330200693e-05, + "loss": 0.3871781826019287, + "step": 742 + }, + { + "epoch": 0.19731775328641615, + "grad_norm": 1.0462206197157178, + "learning_rate": 1.9871853585019446e-05, + "loss": 0.3890243172645569, + "step": 743 + }, + { + "epoch": 0.1975833222679591, + "grad_norm": 0.9914096392216383, + "learning_rate": 1.9871151964855013e-05, + "loss": 0.34914374351501465, + "step": 744 + }, + { + "epoch": 0.19784889124950206, + "grad_norm": 1.0157439665946277, + "learning_rate": 1.9870448441648905e-05, + "loss": 0.41009777784347534, + "step": 745 + }, + { + "epoch": 0.198114460231045, + "grad_norm": 1.0725931773033663, + "learning_rate": 1.9869743015536747e-05, + "loss": 0.39449363946914673, + "step": 746 + }, + { + "epoch": 0.19838002921258796, + "grad_norm": 1.081644116196219, + "learning_rate": 1.9869035686654538e-05, + "loss": 0.3530065417289734, + "step": 747 + }, + { + "epoch": 0.1986455981941309, + "grad_norm": 1.1338420898560146, + "learning_rate": 1.986832645513864e-05, + "loss": 0.4255196154117584, + "step": 748 + }, + { + "epoch": 0.1989111671756739, + "grad_norm": 1.0625457917520444, + "learning_rate": 1.9867615321125796e-05, + "loss": 0.3921143114566803, + "step": 749 + }, + { + "epoch": 0.19917673615721684, + "grad_norm": 1.1076371778966394, + "learning_rate": 1.986690228475309e-05, + "loss": 0.4157381057739258, + "step": 750 + }, + { + "epoch": 0.1994423051387598, + "grad_norm": 0.9887260401437288, + "learning_rate": 1.986618734615799e-05, + "loss": 0.3922047019004822, + "step": 751 + }, + { + "epoch": 0.19970787412030275, + "grad_norm": 1.2477225666156357, + "learning_rate": 1.9865470505478335e-05, + "loss": 0.4378710985183716, + "step": 752 + }, + { + "epoch": 0.1999734431018457, + "grad_norm": 0.9960415180367619, + "learning_rate": 1.986475176285232e-05, + "loss": 0.3636753261089325, + "step": 753 + }, + { + "epoch": 0.20023901208338865, + "grad_norm": 1.0691751577172293, + "learning_rate": 1.986403111841851e-05, + "loss": 0.3509834408760071, + "step": 754 + }, + { + "epoch": 0.2005045810649316, + "grad_norm": 0.9490438891131449, + "learning_rate": 1.986330857231583e-05, + "loss": 0.3539624512195587, + "step": 755 + }, + { + "epoch": 0.20077015004647458, + "grad_norm": 1.002849163142055, + "learning_rate": 1.9862584124683587e-05, + "loss": 0.417904257774353, + "step": 756 + }, + { + "epoch": 0.20103571902801753, + "grad_norm": 0.9438738740406134, + "learning_rate": 1.9861857775661442e-05, + "loss": 0.3602277636528015, + "step": 757 + }, + { + "epoch": 0.2013012880095605, + "grad_norm": 1.0703002408877305, + "learning_rate": 1.986112952538943e-05, + "loss": 0.41064661741256714, + "step": 758 + }, + { + "epoch": 0.20156685699110344, + "grad_norm": 0.9789269746167363, + "learning_rate": 1.9860399374007944e-05, + "loss": 0.36313754320144653, + "step": 759 + }, + { + "epoch": 0.2018324259726464, + "grad_norm": 1.0711706181502203, + "learning_rate": 1.9859667321657755e-05, + "loss": 0.39497628808021545, + "step": 760 + }, + { + "epoch": 0.20209799495418934, + "grad_norm": 1.0173001682725575, + "learning_rate": 1.9858933368479987e-05, + "loss": 0.405613511800766, + "step": 761 + }, + { + "epoch": 0.2023635639357323, + "grad_norm": 0.9881458101524105, + "learning_rate": 1.9858197514616142e-05, + "loss": 0.39093440771102905, + "step": 762 + }, + { + "epoch": 0.20262913291727527, + "grad_norm": 1.0330584509521943, + "learning_rate": 1.9857459760208084e-05, + "loss": 0.39908382296562195, + "step": 763 + }, + { + "epoch": 0.20289470189881822, + "grad_norm": 0.9416263868211369, + "learning_rate": 1.9856720105398038e-05, + "loss": 0.36787620186805725, + "step": 764 + }, + { + "epoch": 0.20316027088036118, + "grad_norm": 1.0128388377672763, + "learning_rate": 1.985597855032861e-05, + "loss": 0.390550822019577, + "step": 765 + }, + { + "epoch": 0.20342583986190413, + "grad_norm": 1.115759431869763, + "learning_rate": 1.9855235095142754e-05, + "loss": 0.4191611409187317, + "step": 766 + }, + { + "epoch": 0.20369140884344708, + "grad_norm": 1.1288935622655036, + "learning_rate": 1.985448973998381e-05, + "loss": 0.4060766100883484, + "step": 767 + }, + { + "epoch": 0.20395697782499003, + "grad_norm": 1.055264696895727, + "learning_rate": 1.985374248499546e-05, + "loss": 0.3906163275241852, + "step": 768 + }, + { + "epoch": 0.20422254680653298, + "grad_norm": 1.0101644212894914, + "learning_rate": 1.9852993330321774e-05, + "loss": 0.3926839828491211, + "step": 769 + }, + { + "epoch": 0.20448811578807596, + "grad_norm": 1.0474151984911524, + "learning_rate": 1.9852242276107182e-05, + "loss": 0.37276068329811096, + "step": 770 + }, + { + "epoch": 0.20475368476961892, + "grad_norm": 0.9531396793135881, + "learning_rate": 1.9851489322496476e-05, + "loss": 0.3765360414981842, + "step": 771 + }, + { + "epoch": 0.20501925375116187, + "grad_norm": 1.0017274873228423, + "learning_rate": 1.9850734469634815e-05, + "loss": 0.35091257095336914, + "step": 772 + }, + { + "epoch": 0.20528482273270482, + "grad_norm": 1.1164065944268338, + "learning_rate": 1.9849977717667725e-05, + "loss": 0.4259791076183319, + "step": 773 + }, + { + "epoch": 0.20555039171424777, + "grad_norm": 0.9939508272565134, + "learning_rate": 1.9849219066741102e-05, + "loss": 0.3563114404678345, + "step": 774 + }, + { + "epoch": 0.20581596069579072, + "grad_norm": 1.0814350606971046, + "learning_rate": 1.9848458517001203e-05, + "loss": 0.4148223102092743, + "step": 775 + }, + { + "epoch": 0.20608152967733367, + "grad_norm": 1.0296405515766518, + "learning_rate": 1.9847696068594655e-05, + "loss": 0.3817785382270813, + "step": 776 + }, + { + "epoch": 0.20634709865887665, + "grad_norm": 1.115875170640065, + "learning_rate": 1.984693172166845e-05, + "loss": 0.41741886734962463, + "step": 777 + }, + { + "epoch": 0.2066126676404196, + "grad_norm": 1.0479957521256793, + "learning_rate": 1.9846165476369938e-05, + "loss": 0.34800025820732117, + "step": 778 + }, + { + "epoch": 0.20687823662196256, + "grad_norm": 1.0122784392492805, + "learning_rate": 1.9845397332846848e-05, + "loss": 0.38093405961990356, + "step": 779 + }, + { + "epoch": 0.2071438056035055, + "grad_norm": 1.0953515150858002, + "learning_rate": 1.9844627291247268e-05, + "loss": 0.40733009576797485, + "step": 780 + }, + { + "epoch": 0.20740937458504846, + "grad_norm": 1.1011295166986532, + "learning_rate": 1.9843855351719655e-05, + "loss": 0.3829066753387451, + "step": 781 + }, + { + "epoch": 0.2076749435665914, + "grad_norm": 1.0316161170996605, + "learning_rate": 1.9843081514412827e-05, + "loss": 0.3574868440628052, + "step": 782 + }, + { + "epoch": 0.20794051254813437, + "grad_norm": 1.071531696766489, + "learning_rate": 1.984230577947597e-05, + "loss": 0.3675144612789154, + "step": 783 + }, + { + "epoch": 0.20820608152967734, + "grad_norm": 0.9982781618225591, + "learning_rate": 1.9841528147058638e-05, + "loss": 0.36120525002479553, + "step": 784 + }, + { + "epoch": 0.2084716505112203, + "grad_norm": 1.0016427535647234, + "learning_rate": 1.984074861731075e-05, + "loss": 0.3651392459869385, + "step": 785 + }, + { + "epoch": 0.20873721949276325, + "grad_norm": 1.1254815799645344, + "learning_rate": 1.983996719038259e-05, + "loss": 0.4204651117324829, + "step": 786 + }, + { + "epoch": 0.2090027884743062, + "grad_norm": 1.0600310007301286, + "learning_rate": 1.9839183866424806e-05, + "loss": 0.4452149271965027, + "step": 787 + }, + { + "epoch": 0.20926835745584915, + "grad_norm": 1.000047138771705, + "learning_rate": 1.9838398645588418e-05, + "loss": 0.3931270241737366, + "step": 788 + }, + { + "epoch": 0.2095339264373921, + "grad_norm": 1.0009892054118905, + "learning_rate": 1.98376115280248e-05, + "loss": 0.3680538535118103, + "step": 789 + }, + { + "epoch": 0.20979949541893506, + "grad_norm": 0.9848864128393906, + "learning_rate": 1.9836822513885704e-05, + "loss": 0.3766820728778839, + "step": 790 + }, + { + "epoch": 0.21006506440047804, + "grad_norm": 1.0494510099931045, + "learning_rate": 1.9836031603323245e-05, + "loss": 0.3602439761161804, + "step": 791 + }, + { + "epoch": 0.210330633382021, + "grad_norm": 0.9790632198207762, + "learning_rate": 1.98352387964899e-05, + "loss": 0.38925549387931824, + "step": 792 + }, + { + "epoch": 0.21059620236356394, + "grad_norm": 1.0121548586068807, + "learning_rate": 1.9834444093538504e-05, + "loss": 0.3569640517234802, + "step": 793 + }, + { + "epoch": 0.2108617713451069, + "grad_norm": 1.0171085592107372, + "learning_rate": 1.9833647494622275e-05, + "loss": 0.3543340265750885, + "step": 794 + }, + { + "epoch": 0.21112734032664984, + "grad_norm": 1.0426744340585967, + "learning_rate": 1.983284899989479e-05, + "loss": 0.37313222885131836, + "step": 795 + }, + { + "epoch": 0.2113929093081928, + "grad_norm": 1.0940501026222131, + "learning_rate": 1.983204860950998e-05, + "loss": 0.3874257802963257, + "step": 796 + }, + { + "epoch": 0.21165847828973575, + "grad_norm": 1.005805069630653, + "learning_rate": 1.983124632362216e-05, + "loss": 0.3815164864063263, + "step": 797 + }, + { + "epoch": 0.21192404727127873, + "grad_norm": 1.0879143214156584, + "learning_rate": 1.9830442142386e-05, + "loss": 0.39476731419563293, + "step": 798 + }, + { + "epoch": 0.21218961625282168, + "grad_norm": 1.0888281701524323, + "learning_rate": 1.9829636065956527e-05, + "loss": 0.399338036775589, + "step": 799 + }, + { + "epoch": 0.21245518523436463, + "grad_norm": 1.0679987938098825, + "learning_rate": 1.9828828094489157e-05, + "loss": 0.3940344452857971, + "step": 800 + }, + { + "epoch": 0.21272075421590758, + "grad_norm": 1.0124680733329086, + "learning_rate": 1.9828018228139647e-05, + "loss": 0.35597044229507446, + "step": 801 + }, + { + "epoch": 0.21298632319745053, + "grad_norm": 1.197291261672491, + "learning_rate": 1.9827206467064133e-05, + "loss": 0.4309435784816742, + "step": 802 + }, + { + "epoch": 0.21325189217899349, + "grad_norm": 1.0158009285134544, + "learning_rate": 1.9826392811419113e-05, + "loss": 0.37327438592910767, + "step": 803 + }, + { + "epoch": 0.21351746116053644, + "grad_norm": 0.9944187944281718, + "learning_rate": 1.9825577261361454e-05, + "loss": 0.35214242339134216, + "step": 804 + }, + { + "epoch": 0.21378303014207942, + "grad_norm": 1.1575422458756877, + "learning_rate": 1.982475981704838e-05, + "loss": 0.41114968061447144, + "step": 805 + }, + { + "epoch": 0.21404859912362237, + "grad_norm": 0.9719994027948292, + "learning_rate": 1.9823940478637486e-05, + "loss": 0.3632299304008484, + "step": 806 + }, + { + "epoch": 0.21431416810516532, + "grad_norm": 1.1699036102992622, + "learning_rate": 1.9823119246286727e-05, + "loss": 0.39640772342681885, + "step": 807 + }, + { + "epoch": 0.21457973708670827, + "grad_norm": 1.002397111320771, + "learning_rate": 1.9822296120154433e-05, + "loss": 0.39356929063796997, + "step": 808 + }, + { + "epoch": 0.21484530606825122, + "grad_norm": 1.061754718166072, + "learning_rate": 1.9821471100399294e-05, + "loss": 0.3710761070251465, + "step": 809 + }, + { + "epoch": 0.21511087504979418, + "grad_norm": 0.9713246248834058, + "learning_rate": 1.9820644187180354e-05, + "loss": 0.35515087842941284, + "step": 810 + }, + { + "epoch": 0.21537644403133713, + "grad_norm": 1.0166244205196049, + "learning_rate": 1.981981538065704e-05, + "loss": 0.3803205192089081, + "step": 811 + }, + { + "epoch": 0.2156420130128801, + "grad_norm": 1.0421456761704733, + "learning_rate": 1.9818984680989134e-05, + "loss": 0.40275394916534424, + "step": 812 + }, + { + "epoch": 0.21590758199442306, + "grad_norm": 1.0872785008811605, + "learning_rate": 1.9818152088336786e-05, + "loss": 0.3711051344871521, + "step": 813 + }, + { + "epoch": 0.216173150975966, + "grad_norm": 1.0872190904032264, + "learning_rate": 1.9817317602860512e-05, + "loss": 0.4198985695838928, + "step": 814 + }, + { + "epoch": 0.21643871995750896, + "grad_norm": 0.9931448766878032, + "learning_rate": 1.9816481224721185e-05, + "loss": 0.38333773612976074, + "step": 815 + }, + { + "epoch": 0.21670428893905191, + "grad_norm": 1.1679000778390602, + "learning_rate": 1.9815642954080055e-05, + "loss": 0.3959774971008301, + "step": 816 + }, + { + "epoch": 0.21696985792059487, + "grad_norm": 1.1013876458182361, + "learning_rate": 1.9814802791098728e-05, + "loss": 0.3475337326526642, + "step": 817 + }, + { + "epoch": 0.21723542690213782, + "grad_norm": 1.06867842878894, + "learning_rate": 1.981396073593918e-05, + "loss": 0.369370698928833, + "step": 818 + }, + { + "epoch": 0.2175009958836808, + "grad_norm": 1.085763343280496, + "learning_rate": 1.9813116788763744e-05, + "loss": 0.3515776991844177, + "step": 819 + }, + { + "epoch": 0.21776656486522375, + "grad_norm": 1.0780206278908893, + "learning_rate": 1.9812270949735124e-05, + "loss": 0.3637402355670929, + "step": 820 + }, + { + "epoch": 0.2180321338467667, + "grad_norm": 1.0342672695189807, + "learning_rate": 1.9811423219016395e-05, + "loss": 0.3930947780609131, + "step": 821 + }, + { + "epoch": 0.21829770282830965, + "grad_norm": 1.102521832922822, + "learning_rate": 1.981057359677098e-05, + "loss": 0.40081048011779785, + "step": 822 + }, + { + "epoch": 0.2185632718098526, + "grad_norm": 1.0386373096164698, + "learning_rate": 1.9809722083162682e-05, + "loss": 0.3831724226474762, + "step": 823 + }, + { + "epoch": 0.21882884079139556, + "grad_norm": 1.0516274934858763, + "learning_rate": 1.9808868678355662e-05, + "loss": 0.3919270932674408, + "step": 824 + }, + { + "epoch": 0.2190944097729385, + "grad_norm": 1.0623138704484363, + "learning_rate": 1.9808013382514448e-05, + "loss": 0.41782522201538086, + "step": 825 + }, + { + "epoch": 0.2193599787544815, + "grad_norm": 1.0570337251212087, + "learning_rate": 1.9807156195803926e-05, + "loss": 0.3751329779624939, + "step": 826 + }, + { + "epoch": 0.21962554773602444, + "grad_norm": 1.0009279652164118, + "learning_rate": 1.9806297118389353e-05, + "loss": 0.36451685428619385, + "step": 827 + }, + { + "epoch": 0.2198911167175674, + "grad_norm": 1.1911804759546862, + "learning_rate": 1.9805436150436352e-05, + "loss": 0.3924056887626648, + "step": 828 + }, + { + "epoch": 0.22015668569911034, + "grad_norm": 0.9887238598202497, + "learning_rate": 1.9804573292110906e-05, + "loss": 0.34744757413864136, + "step": 829 + }, + { + "epoch": 0.2204222546806533, + "grad_norm": 1.1506637434477502, + "learning_rate": 1.980370854357936e-05, + "loss": 0.4162982702255249, + "step": 830 + }, + { + "epoch": 0.22068782366219625, + "grad_norm": 1.103994708633239, + "learning_rate": 1.9802841905008434e-05, + "loss": 0.36572596430778503, + "step": 831 + }, + { + "epoch": 0.2209533926437392, + "grad_norm": 1.0028116020560682, + "learning_rate": 1.98019733765652e-05, + "loss": 0.3535170555114746, + "step": 832 + }, + { + "epoch": 0.22121896162528218, + "grad_norm": 1.061392974987333, + "learning_rate": 1.9801102958417107e-05, + "loss": 0.3906480073928833, + "step": 833 + }, + { + "epoch": 0.22148453060682513, + "grad_norm": 1.0646039703833918, + "learning_rate": 1.980023065073195e-05, + "loss": 0.34185755252838135, + "step": 834 + }, + { + "epoch": 0.22175009958836808, + "grad_norm": 1.1983506875652454, + "learning_rate": 1.9799356453677913e-05, + "loss": 0.4216359853744507, + "step": 835 + }, + { + "epoch": 0.22201566856991103, + "grad_norm": 1.038756499639493, + "learning_rate": 1.979848036742352e-05, + "loss": 0.365469366312027, + "step": 836 + }, + { + "epoch": 0.222281237551454, + "grad_norm": 1.0128951338762324, + "learning_rate": 1.9797602392137678e-05, + "loss": 0.3570204973220825, + "step": 837 + }, + { + "epoch": 0.22254680653299694, + "grad_norm": 1.0221196075964396, + "learning_rate": 1.9796722527989646e-05, + "loss": 0.3929975926876068, + "step": 838 + }, + { + "epoch": 0.2228123755145399, + "grad_norm": 1.1512146064832047, + "learning_rate": 1.979584077514905e-05, + "loss": 0.39064258337020874, + "step": 839 + }, + { + "epoch": 0.22307794449608287, + "grad_norm": 1.0559333522375243, + "learning_rate": 1.9794957133785884e-05, + "loss": 0.3626471757888794, + "step": 840 + }, + { + "epoch": 0.22334351347762582, + "grad_norm": 1.0867316997584564, + "learning_rate": 1.9794071604070506e-05, + "loss": 0.4337238371372223, + "step": 841 + }, + { + "epoch": 0.22360908245916877, + "grad_norm": 0.9358033183445809, + "learning_rate": 1.9793184186173632e-05, + "loss": 0.3361967206001282, + "step": 842 + }, + { + "epoch": 0.22387465144071172, + "grad_norm": 0.961043072021178, + "learning_rate": 1.9792294880266346e-05, + "loss": 0.3429332971572876, + "step": 843 + }, + { + "epoch": 0.22414022042225468, + "grad_norm": 1.012773989217256, + "learning_rate": 1.97914036865201e-05, + "loss": 0.39196616411209106, + "step": 844 + }, + { + "epoch": 0.22440578940379763, + "grad_norm": 1.1250916546708978, + "learning_rate": 1.9790510605106697e-05, + "loss": 0.3763045072555542, + "step": 845 + }, + { + "epoch": 0.22467135838534058, + "grad_norm": 1.1139610172600873, + "learning_rate": 1.978961563619832e-05, + "loss": 0.41614070534706116, + "step": 846 + }, + { + "epoch": 0.22493692736688356, + "grad_norm": 1.065347693165354, + "learning_rate": 1.9788718779967506e-05, + "loss": 0.3834165334701538, + "step": 847 + }, + { + "epoch": 0.2252024963484265, + "grad_norm": 0.9834992911039661, + "learning_rate": 1.978782003658716e-05, + "loss": 0.3552364110946655, + "step": 848 + }, + { + "epoch": 0.22546806532996946, + "grad_norm": 1.0365749744504318, + "learning_rate": 1.9786919406230544e-05, + "loss": 0.3857925534248352, + "step": 849 + }, + { + "epoch": 0.22573363431151242, + "grad_norm": 1.0779836727772776, + "learning_rate": 1.9786016889071294e-05, + "loss": 0.3501393795013428, + "step": 850 + }, + { + "epoch": 0.22599920329305537, + "grad_norm": 1.1363104904390704, + "learning_rate": 1.9785112485283404e-05, + "loss": 0.36280643939971924, + "step": 851 + }, + { + "epoch": 0.22626477227459832, + "grad_norm": 1.1791591930929934, + "learning_rate": 1.978420619504123e-05, + "loss": 0.3713894486427307, + "step": 852 + }, + { + "epoch": 0.22653034125614127, + "grad_norm": 1.0682718312185442, + "learning_rate": 1.97832980185195e-05, + "loss": 0.3668733537197113, + "step": 853 + }, + { + "epoch": 0.22679591023768425, + "grad_norm": 1.06232834606136, + "learning_rate": 1.978238795589329e-05, + "loss": 0.4054701626300812, + "step": 854 + }, + { + "epoch": 0.2270614792192272, + "grad_norm": 1.1024819375758403, + "learning_rate": 1.9781476007338058e-05, + "loss": 0.3824681043624878, + "step": 855 + }, + { + "epoch": 0.22732704820077015, + "grad_norm": 1.0604830101195206, + "learning_rate": 1.978056217302961e-05, + "loss": 0.4009544253349304, + "step": 856 + }, + { + "epoch": 0.2275926171823131, + "grad_norm": 1.0150812264671392, + "learning_rate": 1.9779646453144133e-05, + "loss": 0.34773316979408264, + "step": 857 + }, + { + "epoch": 0.22785818616385606, + "grad_norm": 1.0737509474924387, + "learning_rate": 1.977872884785815e-05, + "loss": 0.4067278206348419, + "step": 858 + }, + { + "epoch": 0.228123755145399, + "grad_norm": 1.0566398666110703, + "learning_rate": 1.9777809357348584e-05, + "loss": 0.3843458890914917, + "step": 859 + }, + { + "epoch": 0.22838932412694196, + "grad_norm": 1.083451143522079, + "learning_rate": 1.977688798179269e-05, + "loss": 0.4261704683303833, + "step": 860 + }, + { + "epoch": 0.22865489310848494, + "grad_norm": 1.0145015740681522, + "learning_rate": 1.9775964721368098e-05, + "loss": 0.39109086990356445, + "step": 861 + }, + { + "epoch": 0.2289204620900279, + "grad_norm": 1.1472642326588585, + "learning_rate": 1.9775039576252807e-05, + "loss": 0.39436954259872437, + "step": 862 + }, + { + "epoch": 0.22918603107157084, + "grad_norm": 0.9770870267905873, + "learning_rate": 1.9774112546625168e-05, + "loss": 0.3787967562675476, + "step": 863 + }, + { + "epoch": 0.2294516000531138, + "grad_norm": 1.5071435779935147, + "learning_rate": 1.9773183632663907e-05, + "loss": 0.3729320466518402, + "step": 864 + }, + { + "epoch": 0.22971716903465675, + "grad_norm": 1.0048578103437809, + "learning_rate": 1.9772252834548108e-05, + "loss": 0.3817081153392792, + "step": 865 + }, + { + "epoch": 0.2299827380161997, + "grad_norm": 0.9709592169890221, + "learning_rate": 1.9771320152457212e-05, + "loss": 0.3362218737602234, + "step": 866 + }, + { + "epoch": 0.23024830699774265, + "grad_norm": 1.0194192402395448, + "learning_rate": 1.9770385586571033e-05, + "loss": 0.37274059653282166, + "step": 867 + }, + { + "epoch": 0.23051387597928563, + "grad_norm": 1.058710969457703, + "learning_rate": 1.9769449137069746e-05, + "loss": 0.3832330107688904, + "step": 868 + }, + { + "epoch": 0.23077944496082858, + "grad_norm": 0.9857605594513371, + "learning_rate": 1.9768510804133886e-05, + "loss": 0.37420010566711426, + "step": 869 + }, + { + "epoch": 0.23104501394237154, + "grad_norm": 1.0333482020677847, + "learning_rate": 1.976757058794435e-05, + "loss": 0.35314565896987915, + "step": 870 + }, + { + "epoch": 0.2313105829239145, + "grad_norm": 1.0404097802666386, + "learning_rate": 1.97666284886824e-05, + "loss": 0.34667372703552246, + "step": 871 + }, + { + "epoch": 0.23157615190545744, + "grad_norm": 1.1826768759617956, + "learning_rate": 1.976568450652967e-05, + "loss": 0.3465980589389801, + "step": 872 + }, + { + "epoch": 0.2318417208870004, + "grad_norm": 1.6479387485919323, + "learning_rate": 1.9764738641668137e-05, + "loss": 0.40539389848709106, + "step": 873 + }, + { + "epoch": 0.23210728986854334, + "grad_norm": 1.090454596374008, + "learning_rate": 1.976379089428016e-05, + "loss": 0.35154545307159424, + "step": 874 + }, + { + "epoch": 0.23237285885008632, + "grad_norm": 1.1033163387519414, + "learning_rate": 1.9762841264548453e-05, + "loss": 0.39748087525367737, + "step": 875 + }, + { + "epoch": 0.23263842783162927, + "grad_norm": 1.0600221119400453, + "learning_rate": 1.976188975265609e-05, + "loss": 0.41628387570381165, + "step": 876 + }, + { + "epoch": 0.23290399681317223, + "grad_norm": 1.0805125037340586, + "learning_rate": 1.976093635878652e-05, + "loss": 0.4076233208179474, + "step": 877 + }, + { + "epoch": 0.23316956579471518, + "grad_norm": 0.9221839355888705, + "learning_rate": 1.9759981083123533e-05, + "loss": 0.3262259364128113, + "step": 878 + }, + { + "epoch": 0.23343513477625813, + "grad_norm": 1.1690018828805817, + "learning_rate": 1.9759023925851302e-05, + "loss": 0.36561673879623413, + "step": 879 + }, + { + "epoch": 0.23370070375780108, + "grad_norm": 1.083829918240926, + "learning_rate": 1.9758064887154358e-05, + "loss": 0.36661773920059204, + "step": 880 + }, + { + "epoch": 0.23396627273934403, + "grad_norm": 1.0655263771494812, + "learning_rate": 1.9757103967217587e-05, + "loss": 0.34671685099601746, + "step": 881 + }, + { + "epoch": 0.234231841720887, + "grad_norm": 1.0056372913167473, + "learning_rate": 1.9756141166226246e-05, + "loss": 0.3486331105232239, + "step": 882 + }, + { + "epoch": 0.23449741070242996, + "grad_norm": 1.1177836982205323, + "learning_rate": 1.9755176484365953e-05, + "loss": 0.3883505165576935, + "step": 883 + }, + { + "epoch": 0.23476297968397292, + "grad_norm": 1.0548520245203914, + "learning_rate": 1.9754209921822683e-05, + "loss": 0.3832106590270996, + "step": 884 + }, + { + "epoch": 0.23502854866551587, + "grad_norm": 1.078830112662993, + "learning_rate": 1.975324147878278e-05, + "loss": 0.37876033782958984, + "step": 885 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 1.0689289829128008, + "learning_rate": 1.975227115543295e-05, + "loss": 0.38931846618652344, + "step": 886 + }, + { + "epoch": 0.23555968662860177, + "grad_norm": 0.956721500767322, + "learning_rate": 1.9751298951960258e-05, + "loss": 0.3581021726131439, + "step": 887 + }, + { + "epoch": 0.23582525561014472, + "grad_norm": 1.0206944172292924, + "learning_rate": 1.9750324868552133e-05, + "loss": 0.35196465253829956, + "step": 888 + }, + { + "epoch": 0.2360908245916877, + "grad_norm": 0.9996206423870837, + "learning_rate": 1.974934890539637e-05, + "loss": 0.3635658025741577, + "step": 889 + }, + { + "epoch": 0.23635639357323066, + "grad_norm": 0.9523927655707425, + "learning_rate": 1.9748371062681122e-05, + "loss": 0.345594197511673, + "step": 890 + }, + { + "epoch": 0.2366219625547736, + "grad_norm": 1.0443032231121456, + "learning_rate": 1.97473913405949e-05, + "loss": 0.357181191444397, + "step": 891 + }, + { + "epoch": 0.23688753153631656, + "grad_norm": 1.0008000126392016, + "learning_rate": 1.974640973932659e-05, + "loss": 0.3264622986316681, + "step": 892 + }, + { + "epoch": 0.2371531005178595, + "grad_norm": 0.9731630083329554, + "learning_rate": 1.9745426259065434e-05, + "loss": 0.37950894236564636, + "step": 893 + }, + { + "epoch": 0.23741866949940246, + "grad_norm": 1.1493289415276364, + "learning_rate": 1.9744440900001027e-05, + "loss": 0.37400782108306885, + "step": 894 + }, + { + "epoch": 0.23768423848094541, + "grad_norm": 1.0325785235739895, + "learning_rate": 1.974345366232334e-05, + "loss": 0.3455463945865631, + "step": 895 + }, + { + "epoch": 0.2379498074624884, + "grad_norm": 1.1059511993758653, + "learning_rate": 1.9742464546222702e-05, + "loss": 0.3605351150035858, + "step": 896 + }, + { + "epoch": 0.23821537644403135, + "grad_norm": 0.9763906212855142, + "learning_rate": 1.97414735518898e-05, + "loss": 0.3839051127433777, + "step": 897 + }, + { + "epoch": 0.2384809454255743, + "grad_norm": 1.0304758127284366, + "learning_rate": 1.974048067951569e-05, + "loss": 0.34562867879867554, + "step": 898 + }, + { + "epoch": 0.23874651440711725, + "grad_norm": 1.1332867443652592, + "learning_rate": 1.9739485929291778e-05, + "loss": 0.3986506760120392, + "step": 899 + }, + { + "epoch": 0.2390120833886602, + "grad_norm": 1.1598961775072092, + "learning_rate": 1.9738489301409848e-05, + "loss": 0.3955162465572357, + "step": 900 + }, + { + "epoch": 0.23927765237020315, + "grad_norm": 1.080226447361195, + "learning_rate": 1.9737490796062036e-05, + "loss": 0.370066374540329, + "step": 901 + }, + { + "epoch": 0.2395432213517461, + "grad_norm": 1.0637004733407822, + "learning_rate": 1.973649041344084e-05, + "loss": 0.3777826726436615, + "step": 902 + }, + { + "epoch": 0.23980879033328908, + "grad_norm": 1.1358293788080334, + "learning_rate": 1.9735488153739128e-05, + "loss": 0.327572226524353, + "step": 903 + }, + { + "epoch": 0.24007435931483204, + "grad_norm": 1.071729158749965, + "learning_rate": 1.973448401715011e-05, + "loss": 0.3921743929386139, + "step": 904 + }, + { + "epoch": 0.240339928296375, + "grad_norm": 1.0635179670685195, + "learning_rate": 1.973347800386739e-05, + "loss": 0.3683379888534546, + "step": 905 + }, + { + "epoch": 0.24060549727791794, + "grad_norm": 1.023832589054702, + "learning_rate": 1.9732470114084905e-05, + "loss": 0.390872597694397, + "step": 906 + }, + { + "epoch": 0.2408710662594609, + "grad_norm": 1.0814023137489452, + "learning_rate": 1.9731460347996964e-05, + "loss": 0.3772459626197815, + "step": 907 + }, + { + "epoch": 0.24113663524100384, + "grad_norm": 1.0280982913686894, + "learning_rate": 1.973044870579824e-05, + "loss": 0.37990954518318176, + "step": 908 + }, + { + "epoch": 0.2414022042225468, + "grad_norm": 1.0035238419205756, + "learning_rate": 1.972943518768377e-05, + "loss": 0.3380817770957947, + "step": 909 + }, + { + "epoch": 0.24166777320408978, + "grad_norm": 0.9879847056007396, + "learning_rate": 1.9728419793848935e-05, + "loss": 0.3348115384578705, + "step": 910 + }, + { + "epoch": 0.24193334218563273, + "grad_norm": 1.0561235323428824, + "learning_rate": 1.9727402524489505e-05, + "loss": 0.36936551332473755, + "step": 911 + }, + { + "epoch": 0.24219891116717568, + "grad_norm": 1.0744513063457712, + "learning_rate": 1.9726383379801593e-05, + "loss": 0.3871539235115051, + "step": 912 + }, + { + "epoch": 0.24246448014871863, + "grad_norm": 1.0904556770971818, + "learning_rate": 1.9725362359981676e-05, + "loss": 0.37087059020996094, + "step": 913 + }, + { + "epoch": 0.24273004913026158, + "grad_norm": 0.9802916629421812, + "learning_rate": 1.9724339465226595e-05, + "loss": 0.35582688450813293, + "step": 914 + }, + { + "epoch": 0.24299561811180453, + "grad_norm": 1.0947021466091125, + "learning_rate": 1.9723314695733557e-05, + "loss": 0.38500669598579407, + "step": 915 + }, + { + "epoch": 0.2432611870933475, + "grad_norm": 0.9834121517145057, + "learning_rate": 1.9722288051700116e-05, + "loss": 0.32470762729644775, + "step": 916 + }, + { + "epoch": 0.24352675607489047, + "grad_norm": 1.0805011919993295, + "learning_rate": 1.9721259533324207e-05, + "loss": 0.3822774589061737, + "step": 917 + }, + { + "epoch": 0.24379232505643342, + "grad_norm": 0.9937398719966192, + "learning_rate": 1.972022914080411e-05, + "loss": 0.38374873995780945, + "step": 918 + }, + { + "epoch": 0.24405789403797637, + "grad_norm": 1.0550770033370775, + "learning_rate": 1.9719196874338472e-05, + "loss": 0.3419352173805237, + "step": 919 + }, + { + "epoch": 0.24432346301951932, + "grad_norm": 1.0164630853495407, + "learning_rate": 1.9718162734126308e-05, + "loss": 0.3294275403022766, + "step": 920 + }, + { + "epoch": 0.24458903200106227, + "grad_norm": 1.0668295499881337, + "learning_rate": 1.9717126720366982e-05, + "loss": 0.3585365414619446, + "step": 921 + }, + { + "epoch": 0.24485460098260522, + "grad_norm": 1.0609325079201495, + "learning_rate": 1.9716088833260225e-05, + "loss": 0.38130316138267517, + "step": 922 + }, + { + "epoch": 0.24512016996414818, + "grad_norm": 1.0577067392982809, + "learning_rate": 1.9715049073006133e-05, + "loss": 0.3745136260986328, + "step": 923 + }, + { + "epoch": 0.24538573894569116, + "grad_norm": 1.0457228779122651, + "learning_rate": 1.971400743980516e-05, + "loss": 0.3771660327911377, + "step": 924 + }, + { + "epoch": 0.2456513079272341, + "grad_norm": 1.0133861698501567, + "learning_rate": 1.971296393385812e-05, + "loss": 0.29661691188812256, + "step": 925 + }, + { + "epoch": 0.24591687690877706, + "grad_norm": 0.9516714902458889, + "learning_rate": 1.9711918555366184e-05, + "loss": 0.33783960342407227, + "step": 926 + }, + { + "epoch": 0.24618244589032, + "grad_norm": 1.2469460687001952, + "learning_rate": 1.971087130453089e-05, + "loss": 0.42983683943748474, + "step": 927 + }, + { + "epoch": 0.24644801487186296, + "grad_norm": 0.9725914261438413, + "learning_rate": 1.9709822181554142e-05, + "loss": 0.32242363691329956, + "step": 928 + }, + { + "epoch": 0.24671358385340592, + "grad_norm": 1.0989308968162201, + "learning_rate": 1.970877118663819e-05, + "loss": 0.3576955795288086, + "step": 929 + }, + { + "epoch": 0.24697915283494887, + "grad_norm": 1.116595385391156, + "learning_rate": 1.9707718319985663e-05, + "loss": 0.4185359477996826, + "step": 930 + }, + { + "epoch": 0.24724472181649185, + "grad_norm": 1.1178442474909813, + "learning_rate": 1.970666358179953e-05, + "loss": 0.35377705097198486, + "step": 931 + }, + { + "epoch": 0.2475102907980348, + "grad_norm": 1.1350743092525455, + "learning_rate": 1.9705606972283143e-05, + "loss": 0.3860151171684265, + "step": 932 + }, + { + "epoch": 0.24777585977957775, + "grad_norm": 1.1915035264404457, + "learning_rate": 1.9704548491640195e-05, + "loss": 0.39463168382644653, + "step": 933 + }, + { + "epoch": 0.2480414287611207, + "grad_norm": 1.0462444044755623, + "learning_rate": 1.9703488140074752e-05, + "loss": 0.3670084774494171, + "step": 934 + }, + { + "epoch": 0.24830699774266365, + "grad_norm": 1.2914788702644175, + "learning_rate": 1.9702425917791242e-05, + "loss": 0.388730525970459, + "step": 935 + }, + { + "epoch": 0.2485725667242066, + "grad_norm": 1.128517931307855, + "learning_rate": 1.970136182499444e-05, + "loss": 0.38767656683921814, + "step": 936 + }, + { + "epoch": 0.24883813570574956, + "grad_norm": 1.0771582387425684, + "learning_rate": 1.9700295861889497e-05, + "loss": 0.35394930839538574, + "step": 937 + }, + { + "epoch": 0.24910370468729254, + "grad_norm": 1.0639329095738126, + "learning_rate": 1.9699228028681917e-05, + "loss": 0.3360324501991272, + "step": 938 + }, + { + "epoch": 0.2493692736688355, + "grad_norm": 1.116621384383513, + "learning_rate": 1.9698158325577563e-05, + "loss": 0.390169233083725, + "step": 939 + }, + { + "epoch": 0.24963484265037844, + "grad_norm": 1.108635788765439, + "learning_rate": 1.9697086752782666e-05, + "loss": 0.3921571671962738, + "step": 940 + }, + { + "epoch": 0.2499004116319214, + "grad_norm": 1.0665933445619122, + "learning_rate": 1.9696013310503808e-05, + "loss": 0.3795739710330963, + "step": 941 + }, + { + "epoch": 0.25016598061346434, + "grad_norm": 1.2202319167117164, + "learning_rate": 1.9694937998947935e-05, + "loss": 0.3891025185585022, + "step": 942 + }, + { + "epoch": 0.2504315495950073, + "grad_norm": 0.9751921056908068, + "learning_rate": 1.9693860818322357e-05, + "loss": 0.3548225164413452, + "step": 943 + }, + { + "epoch": 0.25069711857655025, + "grad_norm": 1.0555900207888067, + "learning_rate": 1.9692781768834747e-05, + "loss": 0.3696819543838501, + "step": 944 + }, + { + "epoch": 0.2509626875580932, + "grad_norm": 1.1322184210541604, + "learning_rate": 1.9691700850693126e-05, + "loss": 0.3906037211418152, + "step": 945 + }, + { + "epoch": 0.25122825653963615, + "grad_norm": 1.072434154806742, + "learning_rate": 1.9690618064105883e-05, + "loss": 0.38181206583976746, + "step": 946 + }, + { + "epoch": 0.2514938255211791, + "grad_norm": 1.0644124497842522, + "learning_rate": 1.9689533409281765e-05, + "loss": 0.36904582381248474, + "step": 947 + }, + { + "epoch": 0.25175939450272206, + "grad_norm": 1.097105891991116, + "learning_rate": 1.9688446886429885e-05, + "loss": 0.3635823130607605, + "step": 948 + }, + { + "epoch": 0.25202496348426506, + "grad_norm": 0.9954310874837226, + "learning_rate": 1.9687358495759713e-05, + "loss": 0.3527260422706604, + "step": 949 + }, + { + "epoch": 0.252290532465808, + "grad_norm": 1.1902017812011518, + "learning_rate": 1.968626823748107e-05, + "loss": 0.3781110346317291, + "step": 950 + }, + { + "epoch": 0.25255610144735097, + "grad_norm": 1.0346217070487125, + "learning_rate": 1.968517611180415e-05, + "loss": 0.3931560814380646, + "step": 951 + }, + { + "epoch": 0.2528216704288939, + "grad_norm": 1.0783245371828571, + "learning_rate": 1.9684082118939503e-05, + "loss": 0.39111074805259705, + "step": 952 + }, + { + "epoch": 0.25308723941043687, + "grad_norm": 1.2090013193363973, + "learning_rate": 1.9682986259098037e-05, + "loss": 0.385967880487442, + "step": 953 + }, + { + "epoch": 0.2533528083919798, + "grad_norm": 1.0103878099057118, + "learning_rate": 1.9681888532491022e-05, + "loss": 0.34006553888320923, + "step": 954 + }, + { + "epoch": 0.2536183773735228, + "grad_norm": 1.0077784550534965, + "learning_rate": 1.9680788939330086e-05, + "loss": 0.36069998145103455, + "step": 955 + }, + { + "epoch": 0.2538839463550657, + "grad_norm": 1.090649670414093, + "learning_rate": 1.9679687479827212e-05, + "loss": 0.3354898691177368, + "step": 956 + }, + { + "epoch": 0.2541495153366087, + "grad_norm": 1.0691933766101984, + "learning_rate": 1.9678584154194756e-05, + "loss": 0.35667335987091064, + "step": 957 + }, + { + "epoch": 0.25441508431815163, + "grad_norm": 1.2652121820599898, + "learning_rate": 1.9677478962645422e-05, + "loss": 0.4003029465675354, + "step": 958 + }, + { + "epoch": 0.2546806532996946, + "grad_norm": 1.0313200756086844, + "learning_rate": 1.9676371905392278e-05, + "loss": 0.34397056698799133, + "step": 959 + }, + { + "epoch": 0.25494622228123753, + "grad_norm": 1.0544706314753822, + "learning_rate": 1.9675262982648757e-05, + "loss": 0.35319578647613525, + "step": 960 + }, + { + "epoch": 0.2552117912627805, + "grad_norm": 1.0179000224070893, + "learning_rate": 1.967415219462864e-05, + "loss": 0.34840327501296997, + "step": 961 + }, + { + "epoch": 0.25547736024432344, + "grad_norm": 0.9360325612494472, + "learning_rate": 1.9673039541546076e-05, + "loss": 0.3298989534378052, + "step": 962 + }, + { + "epoch": 0.25574292922586644, + "grad_norm": 1.0904225305922717, + "learning_rate": 1.9671925023615572e-05, + "loss": 0.38438719511032104, + "step": 963 + }, + { + "epoch": 0.2560084982074094, + "grad_norm": 1.128608711014793, + "learning_rate": 1.9670808641051994e-05, + "loss": 0.3834493160247803, + "step": 964 + }, + { + "epoch": 0.25627406718895235, + "grad_norm": 1.0456501331264114, + "learning_rate": 1.9669690394070564e-05, + "loss": 0.3713288903236389, + "step": 965 + }, + { + "epoch": 0.2565396361704953, + "grad_norm": 1.0864184401996346, + "learning_rate": 1.966857028288687e-05, + "loss": 0.37564241886138916, + "step": 966 + }, + { + "epoch": 0.25680520515203825, + "grad_norm": 1.0329676619050974, + "learning_rate": 1.9667448307716857e-05, + "loss": 0.30162689089775085, + "step": 967 + }, + { + "epoch": 0.2570707741335812, + "grad_norm": 1.0948768995323135, + "learning_rate": 1.9666324468776826e-05, + "loss": 0.35969680547714233, + "step": 968 + }, + { + "epoch": 0.25733634311512416, + "grad_norm": 1.206651724690857, + "learning_rate": 1.9665198766283444e-05, + "loss": 0.40947285294532776, + "step": 969 + }, + { + "epoch": 0.2576019120966671, + "grad_norm": 1.0651964473806064, + "learning_rate": 1.9664071200453726e-05, + "loss": 0.35868343710899353, + "step": 970 + }, + { + "epoch": 0.25786748107821006, + "grad_norm": 1.1330033214419297, + "learning_rate": 1.966294177150506e-05, + "loss": 0.3569234311580658, + "step": 971 + }, + { + "epoch": 0.258133050059753, + "grad_norm": 1.1641224987322216, + "learning_rate": 1.9661810479655184e-05, + "loss": 0.3381764888763428, + "step": 972 + }, + { + "epoch": 0.25839861904129596, + "grad_norm": 1.535927577191984, + "learning_rate": 1.9660677325122196e-05, + "loss": 0.39847785234451294, + "step": 973 + }, + { + "epoch": 0.2586641880228389, + "grad_norm": 0.9608622914302752, + "learning_rate": 1.965954230812456e-05, + "loss": 0.33162468671798706, + "step": 974 + }, + { + "epoch": 0.25892975700438187, + "grad_norm": 1.0421688584245348, + "learning_rate": 1.9658405428881087e-05, + "loss": 0.3627605438232422, + "step": 975 + }, + { + "epoch": 0.2591953259859248, + "grad_norm": 1.0501672081861986, + "learning_rate": 1.9657266687610965e-05, + "loss": 0.3253796100616455, + "step": 976 + }, + { + "epoch": 0.2594608949674678, + "grad_norm": 1.0198628618780734, + "learning_rate": 1.9656126084533716e-05, + "loss": 0.3341265916824341, + "step": 977 + }, + { + "epoch": 0.2597264639490108, + "grad_norm": 1.0202967346949672, + "learning_rate": 1.9654983619869242e-05, + "loss": 0.3714970052242279, + "step": 978 + }, + { + "epoch": 0.25999203293055373, + "grad_norm": 1.0333982958482495, + "learning_rate": 1.9653839293837798e-05, + "loss": 0.3360912501811981, + "step": 979 + }, + { + "epoch": 0.2602576019120967, + "grad_norm": 1.0322459892827835, + "learning_rate": 1.9652693106659995e-05, + "loss": 0.3780854642391205, + "step": 980 + }, + { + "epoch": 0.26052317089363963, + "grad_norm": 1.1062219940451128, + "learning_rate": 1.9651545058556803e-05, + "loss": 0.33595478534698486, + "step": 981 + }, + { + "epoch": 0.2607887398751826, + "grad_norm": 1.111464982167328, + "learning_rate": 1.965039514974955e-05, + "loss": 0.3608357012271881, + "step": 982 + }, + { + "epoch": 0.26105430885672554, + "grad_norm": 1.0024532391943957, + "learning_rate": 1.964924338045993e-05, + "loss": 0.3807666599750519, + "step": 983 + }, + { + "epoch": 0.2613198778382685, + "grad_norm": 1.0213030373156555, + "learning_rate": 1.964808975090999e-05, + "loss": 0.3551647663116455, + "step": 984 + }, + { + "epoch": 0.26158544681981144, + "grad_norm": 1.0761922389740786, + "learning_rate": 1.9646934261322135e-05, + "loss": 0.3771904706954956, + "step": 985 + }, + { + "epoch": 0.2618510158013544, + "grad_norm": 1.1925998045571422, + "learning_rate": 1.964577691191913e-05, + "loss": 0.41103222966194153, + "step": 986 + }, + { + "epoch": 0.26211658478289734, + "grad_norm": 1.0270282722515527, + "learning_rate": 1.9644617702924093e-05, + "loss": 0.34439292550086975, + "step": 987 + }, + { + "epoch": 0.2623821537644403, + "grad_norm": 1.1578988390038234, + "learning_rate": 1.9643456634560515e-05, + "loss": 0.41214391589164734, + "step": 988 + }, + { + "epoch": 0.26264772274598325, + "grad_norm": 0.9879567855265076, + "learning_rate": 1.9642293707052232e-05, + "loss": 0.3186502754688263, + "step": 989 + }, + { + "epoch": 0.2629132917275262, + "grad_norm": 1.039224300824638, + "learning_rate": 1.9641128920623438e-05, + "loss": 0.3534559905529022, + "step": 990 + }, + { + "epoch": 0.2631788607090692, + "grad_norm": 1.0867820667103292, + "learning_rate": 1.96399622754987e-05, + "loss": 0.35217320919036865, + "step": 991 + }, + { + "epoch": 0.26344442969061216, + "grad_norm": 0.954421559413849, + "learning_rate": 1.9638793771902924e-05, + "loss": 0.31661587953567505, + "step": 992 + }, + { + "epoch": 0.2637099986721551, + "grad_norm": 0.9881195075112362, + "learning_rate": 1.9637623410061392e-05, + "loss": 0.32468482851982117, + "step": 993 + }, + { + "epoch": 0.26397556765369806, + "grad_norm": 1.0355017939200293, + "learning_rate": 1.9636451190199727e-05, + "loss": 0.346771776676178, + "step": 994 + }, + { + "epoch": 0.264241136635241, + "grad_norm": 1.0997948902450267, + "learning_rate": 1.9635277112543928e-05, + "loss": 0.36409270763397217, + "step": 995 + }, + { + "epoch": 0.26450670561678397, + "grad_norm": 1.2132528670947562, + "learning_rate": 1.963410117732034e-05, + "loss": 0.404967725276947, + "step": 996 + }, + { + "epoch": 0.2647722745983269, + "grad_norm": 1.1962964423617835, + "learning_rate": 1.9632923384755666e-05, + "loss": 0.39506661891937256, + "step": 997 + }, + { + "epoch": 0.26503784357986987, + "grad_norm": 1.1967751692769375, + "learning_rate": 1.9631743735076972e-05, + "loss": 0.3833203911781311, + "step": 998 + }, + { + "epoch": 0.2653034125614128, + "grad_norm": 1.083140773107417, + "learning_rate": 1.9630562228511682e-05, + "loss": 0.34522518515586853, + "step": 999 + }, + { + "epoch": 0.2655689815429558, + "grad_norm": 1.1367328076589556, + "learning_rate": 1.962937886528758e-05, + "loss": 0.3818400800228119, + "step": 1000 + }, + { + "epoch": 0.2658345505244987, + "grad_norm": 1.2496699132911573, + "learning_rate": 1.9628193645632796e-05, + "loss": 0.40827828645706177, + "step": 1001 + }, + { + "epoch": 0.2661001195060417, + "grad_norm": 1.0406728708542907, + "learning_rate": 1.962700656977583e-05, + "loss": 0.3448852002620697, + "step": 1002 + }, + { + "epoch": 0.26636568848758463, + "grad_norm": 1.1035895986897222, + "learning_rate": 1.9625817637945542e-05, + "loss": 0.36560773849487305, + "step": 1003 + }, + { + "epoch": 0.2666312574691276, + "grad_norm": 1.1637977684704512, + "learning_rate": 1.962462685037114e-05, + "loss": 0.38305893540382385, + "step": 1004 + }, + { + "epoch": 0.2668968264506706, + "grad_norm": 1.0320363555261158, + "learning_rate": 1.962343420728219e-05, + "loss": 0.3562568426132202, + "step": 1005 + }, + { + "epoch": 0.26716239543221354, + "grad_norm": 1.18312934129538, + "learning_rate": 1.9622239708908626e-05, + "loss": 0.37458860874176025, + "step": 1006 + }, + { + "epoch": 0.2674279644137565, + "grad_norm": 1.058042672523148, + "learning_rate": 1.9621043355480726e-05, + "loss": 0.35852503776550293, + "step": 1007 + }, + { + "epoch": 0.26769353339529944, + "grad_norm": 1.0975239398171568, + "learning_rate": 1.961984514722914e-05, + "loss": 0.4056578278541565, + "step": 1008 + }, + { + "epoch": 0.2679591023768424, + "grad_norm": 1.1773057151207822, + "learning_rate": 1.9618645084384863e-05, + "loss": 0.4531296491622925, + "step": 1009 + }, + { + "epoch": 0.26822467135838535, + "grad_norm": 0.9095840908563808, + "learning_rate": 1.9617443167179256e-05, + "loss": 0.3356376886367798, + "step": 1010 + }, + { + "epoch": 0.2684902403399283, + "grad_norm": 1.09880831555839, + "learning_rate": 1.9616239395844033e-05, + "loss": 0.38045161962509155, + "step": 1011 + }, + { + "epoch": 0.26875580932147125, + "grad_norm": 1.028451509847456, + "learning_rate": 1.9615033770611268e-05, + "loss": 0.3549511730670929, + "step": 1012 + }, + { + "epoch": 0.2690213783030142, + "grad_norm": 1.0546213631772847, + "learning_rate": 1.9613826291713393e-05, + "loss": 0.33363252878189087, + "step": 1013 + }, + { + "epoch": 0.26928694728455715, + "grad_norm": 0.9539256345754278, + "learning_rate": 1.961261695938319e-05, + "loss": 0.3443339467048645, + "step": 1014 + }, + { + "epoch": 0.2695525162661001, + "grad_norm": 0.9897755385014708, + "learning_rate": 1.9611405773853807e-05, + "loss": 0.3258364796638489, + "step": 1015 + }, + { + "epoch": 0.26981808524764306, + "grad_norm": 1.0357196980681809, + "learning_rate": 1.961019273535875e-05, + "loss": 0.357122540473938, + "step": 1016 + }, + { + "epoch": 0.270083654229186, + "grad_norm": 0.9668495504097999, + "learning_rate": 1.9608977844131875e-05, + "loss": 0.32092082500457764, + "step": 1017 + }, + { + "epoch": 0.27034922321072896, + "grad_norm": 1.0067299219043435, + "learning_rate": 1.96077611004074e-05, + "loss": 0.36354511976242065, + "step": 1018 + }, + { + "epoch": 0.27061479219227197, + "grad_norm": 1.0982243281899924, + "learning_rate": 1.9606542504419895e-05, + "loss": 0.37128758430480957, + "step": 1019 + }, + { + "epoch": 0.2708803611738149, + "grad_norm": 1.1112959838703056, + "learning_rate": 1.9605322056404294e-05, + "loss": 0.3732859790325165, + "step": 1020 + }, + { + "epoch": 0.2711459301553579, + "grad_norm": 1.0058814849372155, + "learning_rate": 1.9604099756595885e-05, + "loss": 0.32642674446105957, + "step": 1021 + }, + { + "epoch": 0.2714114991369008, + "grad_norm": 1.10371255398192, + "learning_rate": 1.9602875605230313e-05, + "loss": 0.376791775226593, + "step": 1022 + }, + { + "epoch": 0.2716770681184438, + "grad_norm": 1.0603007725295257, + "learning_rate": 1.960164960254358e-05, + "loss": 0.34514784812927246, + "step": 1023 + }, + { + "epoch": 0.27194263709998673, + "grad_norm": 1.225533197470795, + "learning_rate": 1.9600421748772044e-05, + "loss": 0.3752189576625824, + "step": 1024 + }, + { + "epoch": 0.2722082060815297, + "grad_norm": 1.0783483670765837, + "learning_rate": 1.959919204415242e-05, + "loss": 0.33100831508636475, + "step": 1025 + }, + { + "epoch": 0.27247377506307263, + "grad_norm": 1.1910668751599112, + "learning_rate": 1.9597960488921785e-05, + "loss": 0.42713654041290283, + "step": 1026 + }, + { + "epoch": 0.2727393440446156, + "grad_norm": 1.110777223027095, + "learning_rate": 1.9596727083317565e-05, + "loss": 0.3746519684791565, + "step": 1027 + }, + { + "epoch": 0.27300491302615854, + "grad_norm": 1.1133725792972708, + "learning_rate": 1.9595491827577543e-05, + "loss": 0.39962098002433777, + "step": 1028 + }, + { + "epoch": 0.2732704820077015, + "grad_norm": 1.0544310192284179, + "learning_rate": 1.9594254721939866e-05, + "loss": 0.35112401843070984, + "step": 1029 + }, + { + "epoch": 0.27353605098924444, + "grad_norm": 1.0749153592990304, + "learning_rate": 1.9593015766643037e-05, + "loss": 0.3648139238357544, + "step": 1030 + }, + { + "epoch": 0.2738016199707874, + "grad_norm": 1.0268996180520502, + "learning_rate": 1.9591774961925902e-05, + "loss": 0.31544098258018494, + "step": 1031 + }, + { + "epoch": 0.27406718895233034, + "grad_norm": 1.1260952074052377, + "learning_rate": 1.959053230802768e-05, + "loss": 0.3593738079071045, + "step": 1032 + }, + { + "epoch": 0.27433275793387335, + "grad_norm": 1.1009303195981317, + "learning_rate": 1.958928780518794e-05, + "loss": 0.39784368872642517, + "step": 1033 + }, + { + "epoch": 0.2745983269154163, + "grad_norm": 1.1304731324804922, + "learning_rate": 1.9588041453646606e-05, + "loss": 0.3869936168193817, + "step": 1034 + }, + { + "epoch": 0.27486389589695925, + "grad_norm": 0.9803124730292929, + "learning_rate": 1.958679325364396e-05, + "loss": 0.31108593940734863, + "step": 1035 + }, + { + "epoch": 0.2751294648785022, + "grad_norm": 1.098791994520666, + "learning_rate": 1.958554320542064e-05, + "loss": 0.3917708098888397, + "step": 1036 + }, + { + "epoch": 0.27539503386004516, + "grad_norm": 0.9969159455112034, + "learning_rate": 1.958429130921764e-05, + "loss": 0.36782944202423096, + "step": 1037 + }, + { + "epoch": 0.2756606028415881, + "grad_norm": 0.9381100088398062, + "learning_rate": 1.9583037565276314e-05, + "loss": 0.36196422576904297, + "step": 1038 + }, + { + "epoch": 0.27592617182313106, + "grad_norm": 1.0783473143219733, + "learning_rate": 1.9581781973838368e-05, + "loss": 0.32208555936813354, + "step": 1039 + }, + { + "epoch": 0.276191740804674, + "grad_norm": 0.9653316626874986, + "learning_rate": 1.958052453514586e-05, + "loss": 0.33451759815216064, + "step": 1040 + }, + { + "epoch": 0.27645730978621696, + "grad_norm": 1.0328342572912144, + "learning_rate": 1.9579265249441216e-05, + "loss": 0.3228047788143158, + "step": 1041 + }, + { + "epoch": 0.2767228787677599, + "grad_norm": 1.0944658380016739, + "learning_rate": 1.957800411696721e-05, + "loss": 0.36992791295051575, + "step": 1042 + }, + { + "epoch": 0.27698844774930287, + "grad_norm": 0.9799580951396849, + "learning_rate": 1.9576741137966967e-05, + "loss": 0.3072342276573181, + "step": 1043 + }, + { + "epoch": 0.2772540167308458, + "grad_norm": 1.0637046756594408, + "learning_rate": 1.9575476312683985e-05, + "loss": 0.3372080326080322, + "step": 1044 + }, + { + "epoch": 0.27751958571238877, + "grad_norm": 1.0509701364189301, + "learning_rate": 1.95742096413621e-05, + "loss": 0.34725332260131836, + "step": 1045 + }, + { + "epoch": 0.2777851546939317, + "grad_norm": 1.1053591471100805, + "learning_rate": 1.9572941124245516e-05, + "loss": 0.36714982986450195, + "step": 1046 + }, + { + "epoch": 0.27805072367547473, + "grad_norm": 1.208127444221669, + "learning_rate": 1.957167076157878e-05, + "loss": 0.4163498282432556, + "step": 1047 + }, + { + "epoch": 0.2783162926570177, + "grad_norm": 1.1861975128714084, + "learning_rate": 1.9570398553606815e-05, + "loss": 0.40059348940849304, + "step": 1048 + }, + { + "epoch": 0.27858186163856063, + "grad_norm": 1.085993120538819, + "learning_rate": 1.956912450057488e-05, + "loss": 0.3622320294380188, + "step": 1049 + }, + { + "epoch": 0.2788474306201036, + "grad_norm": 1.1326017870689584, + "learning_rate": 1.9567848602728595e-05, + "loss": 0.35159534215927124, + "step": 1050 + }, + { + "epoch": 0.27911299960164654, + "grad_norm": 0.9516936878211085, + "learning_rate": 1.9566570860313944e-05, + "loss": 0.3093762993812561, + "step": 1051 + }, + { + "epoch": 0.2793785685831895, + "grad_norm": 1.040326152894859, + "learning_rate": 1.9565291273577255e-05, + "loss": 0.341474324464798, + "step": 1052 + }, + { + "epoch": 0.27964413756473244, + "grad_norm": 1.0885626452470811, + "learning_rate": 1.9564009842765225e-05, + "loss": 0.35376566648483276, + "step": 1053 + }, + { + "epoch": 0.2799097065462754, + "grad_norm": 1.09154548256864, + "learning_rate": 1.9562726568124892e-05, + "loss": 0.3487662374973297, + "step": 1054 + }, + { + "epoch": 0.28017527552781835, + "grad_norm": 1.014222924008021, + "learning_rate": 1.956144144990366e-05, + "loss": 0.3610745370388031, + "step": 1055 + }, + { + "epoch": 0.2804408445093613, + "grad_norm": 0.9789890869027496, + "learning_rate": 1.9560154488349284e-05, + "loss": 0.33230137825012207, + "step": 1056 + }, + { + "epoch": 0.28070641349090425, + "grad_norm": 1.0104241821081763, + "learning_rate": 1.9558865683709875e-05, + "loss": 0.310351699590683, + "step": 1057 + }, + { + "epoch": 0.2809719824724472, + "grad_norm": 1.1188708821966176, + "learning_rate": 1.9557575036233897e-05, + "loss": 0.39930224418640137, + "step": 1058 + }, + { + "epoch": 0.28123755145399015, + "grad_norm": 1.0498907782820184, + "learning_rate": 1.955628254617017e-05, + "loss": 0.3345295488834381, + "step": 1059 + }, + { + "epoch": 0.2815031204355331, + "grad_norm": 1.1059864789744056, + "learning_rate": 1.9554988213767875e-05, + "loss": 0.37963107228279114, + "step": 1060 + }, + { + "epoch": 0.2817686894170761, + "grad_norm": 1.0825219178132603, + "learning_rate": 1.9553692039276545e-05, + "loss": 0.3923654854297638, + "step": 1061 + }, + { + "epoch": 0.28203425839861906, + "grad_norm": 1.0736283126776336, + "learning_rate": 1.9552394022946068e-05, + "loss": 0.363646924495697, + "step": 1062 + }, + { + "epoch": 0.282299827380162, + "grad_norm": 1.1051684289136041, + "learning_rate": 1.9551094165026677e-05, + "loss": 0.35486382246017456, + "step": 1063 + }, + { + "epoch": 0.28256539636170497, + "grad_norm": 1.0845117937449689, + "learning_rate": 1.954979246576898e-05, + "loss": 0.35215455293655396, + "step": 1064 + }, + { + "epoch": 0.2828309653432479, + "grad_norm": 1.1587243435425785, + "learning_rate": 1.9548488925423924e-05, + "loss": 0.3936809003353119, + "step": 1065 + }, + { + "epoch": 0.28309653432479087, + "grad_norm": 1.0399965264634783, + "learning_rate": 1.9547183544242817e-05, + "loss": 0.36852866411209106, + "step": 1066 + }, + { + "epoch": 0.2833621033063338, + "grad_norm": 1.0679817467710029, + "learning_rate": 1.954587632247732e-05, + "loss": 0.3552001714706421, + "step": 1067 + }, + { + "epoch": 0.2836276722878768, + "grad_norm": 1.1330169189394568, + "learning_rate": 1.9544567260379455e-05, + "loss": 0.3684498965740204, + "step": 1068 + }, + { + "epoch": 0.2838932412694197, + "grad_norm": 0.9857931835351914, + "learning_rate": 1.9543256358201586e-05, + "loss": 0.3367026448249817, + "step": 1069 + }, + { + "epoch": 0.2841588102509627, + "grad_norm": 1.0677692738667734, + "learning_rate": 1.9541943616196443e-05, + "loss": 0.3702335059642792, + "step": 1070 + }, + { + "epoch": 0.28442437923250563, + "grad_norm": 1.1114119189633371, + "learning_rate": 1.9540629034617108e-05, + "loss": 0.3430984318256378, + "step": 1071 + }, + { + "epoch": 0.2846899482140486, + "grad_norm": 1.1406170357402363, + "learning_rate": 1.953931261371702e-05, + "loss": 0.36514735221862793, + "step": 1072 + }, + { + "epoch": 0.28495551719559153, + "grad_norm": 1.0428104806049732, + "learning_rate": 1.9537994353749963e-05, + "loss": 0.3524945080280304, + "step": 1073 + }, + { + "epoch": 0.2852210861771345, + "grad_norm": 1.0283973360981475, + "learning_rate": 1.9536674254970088e-05, + "loss": 0.32405683398246765, + "step": 1074 + }, + { + "epoch": 0.2854866551586775, + "grad_norm": 1.0649875575316718, + "learning_rate": 1.9535352317631888e-05, + "loss": 0.30863165855407715, + "step": 1075 + }, + { + "epoch": 0.28575222414022045, + "grad_norm": 1.0647565002745494, + "learning_rate": 1.953402854199022e-05, + "loss": 0.34343889355659485, + "step": 1076 + }, + { + "epoch": 0.2860177931217634, + "grad_norm": 1.2339349330872973, + "learning_rate": 1.9532702928300292e-05, + "loss": 0.3639434576034546, + "step": 1077 + }, + { + "epoch": 0.28628336210330635, + "grad_norm": 1.0888261251069975, + "learning_rate": 1.9531375476817667e-05, + "loss": 0.3380300998687744, + "step": 1078 + }, + { + "epoch": 0.2865489310848493, + "grad_norm": 1.1078839119175599, + "learning_rate": 1.9530046187798267e-05, + "loss": 0.3323265016078949, + "step": 1079 + }, + { + "epoch": 0.28681450006639225, + "grad_norm": 1.0529271541493659, + "learning_rate": 1.9528715061498355e-05, + "loss": 0.3439220190048218, + "step": 1080 + }, + { + "epoch": 0.2870800690479352, + "grad_norm": 1.088357435010649, + "learning_rate": 1.952738209817456e-05, + "loss": 0.36376965045928955, + "step": 1081 + }, + { + "epoch": 0.28734563802947816, + "grad_norm": 1.0188116446188513, + "learning_rate": 1.952604729808386e-05, + "loss": 0.3281211853027344, + "step": 1082 + }, + { + "epoch": 0.2876112070110211, + "grad_norm": 1.0999135645201878, + "learning_rate": 1.9524710661483594e-05, + "loss": 0.3538089990615845, + "step": 1083 + }, + { + "epoch": 0.28787677599256406, + "grad_norm": 1.1475903462769852, + "learning_rate": 1.9523372188631442e-05, + "loss": 0.3982803225517273, + "step": 1084 + }, + { + "epoch": 0.288142344974107, + "grad_norm": 1.11408923860859, + "learning_rate": 1.9522031879785453e-05, + "loss": 0.3958810567855835, + "step": 1085 + }, + { + "epoch": 0.28840791395564996, + "grad_norm": 1.191451776763126, + "learning_rate": 1.9520689735204016e-05, + "loss": 0.40133988857269287, + "step": 1086 + }, + { + "epoch": 0.2886734829371929, + "grad_norm": 1.048862195613205, + "learning_rate": 1.9519345755145886e-05, + "loss": 0.32411646842956543, + "step": 1087 + }, + { + "epoch": 0.28893905191873587, + "grad_norm": 1.210003646730205, + "learning_rate": 1.9517999939870166e-05, + "loss": 0.38678207993507385, + "step": 1088 + }, + { + "epoch": 0.2892046209002789, + "grad_norm": 1.0663258874668164, + "learning_rate": 1.951665228963631e-05, + "loss": 0.36829686164855957, + "step": 1089 + }, + { + "epoch": 0.2894701898818218, + "grad_norm": 0.9884592653808488, + "learning_rate": 1.9515302804704134e-05, + "loss": 0.38631704449653625, + "step": 1090 + }, + { + "epoch": 0.2897357588633648, + "grad_norm": 1.1934503112083867, + "learning_rate": 1.9513951485333798e-05, + "loss": 0.39288902282714844, + "step": 1091 + }, + { + "epoch": 0.29000132784490773, + "grad_norm": 1.0804742457342014, + "learning_rate": 1.9512598331785822e-05, + "loss": 0.3655658960342407, + "step": 1092 + }, + { + "epoch": 0.2902668968264507, + "grad_norm": 0.9929300268939649, + "learning_rate": 1.9511243344321076e-05, + "loss": 0.3263852596282959, + "step": 1093 + }, + { + "epoch": 0.29053246580799363, + "grad_norm": 1.1166275426043832, + "learning_rate": 1.9509886523200792e-05, + "loss": 0.37939125299453735, + "step": 1094 + }, + { + "epoch": 0.2907980347895366, + "grad_norm": 1.074761796186792, + "learning_rate": 1.9508527868686543e-05, + "loss": 0.34218865633010864, + "step": 1095 + }, + { + "epoch": 0.29106360377107954, + "grad_norm": 1.036633851483027, + "learning_rate": 1.9507167381040263e-05, + "loss": 0.368261456489563, + "step": 1096 + }, + { + "epoch": 0.2913291727526225, + "grad_norm": 1.083724731335207, + "learning_rate": 1.950580506052424e-05, + "loss": 0.36133286356925964, + "step": 1097 + }, + { + "epoch": 0.29159474173416544, + "grad_norm": 1.0542758401630365, + "learning_rate": 1.9504440907401113e-05, + "loss": 0.3667418658733368, + "step": 1098 + }, + { + "epoch": 0.2918603107157084, + "grad_norm": 0.9961595646698646, + "learning_rate": 1.950307492193387e-05, + "loss": 0.34444570541381836, + "step": 1099 + }, + { + "epoch": 0.29212587969725134, + "grad_norm": 1.1203470867439278, + "learning_rate": 1.9501707104385863e-05, + "loss": 0.41261589527130127, + "step": 1100 + }, + { + "epoch": 0.2923914486787943, + "grad_norm": 1.0847270622391922, + "learning_rate": 1.9500337455020788e-05, + "loss": 0.3762981593608856, + "step": 1101 + }, + { + "epoch": 0.29265701766033725, + "grad_norm": 1.108635996430537, + "learning_rate": 1.9498965974102697e-05, + "loss": 0.3527417480945587, + "step": 1102 + }, + { + "epoch": 0.29292258664188026, + "grad_norm": 1.1555485155020386, + "learning_rate": 1.9497592661895996e-05, + "loss": 0.34812286496162415, + "step": 1103 + }, + { + "epoch": 0.2931881556234232, + "grad_norm": 0.9844968948580171, + "learning_rate": 1.9496217518665444e-05, + "loss": 0.33663398027420044, + "step": 1104 + }, + { + "epoch": 0.29345372460496616, + "grad_norm": 0.997090208380272, + "learning_rate": 1.9494840544676156e-05, + "loss": 0.3632991313934326, + "step": 1105 + }, + { + "epoch": 0.2937192935865091, + "grad_norm": 1.3515018592791732, + "learning_rate": 1.9493461740193587e-05, + "loss": 0.37389490008354187, + "step": 1106 + }, + { + "epoch": 0.29398486256805206, + "grad_norm": 1.204356467911551, + "learning_rate": 1.949208110548356e-05, + "loss": 0.3634020686149597, + "step": 1107 + }, + { + "epoch": 0.294250431549595, + "grad_norm": 1.0778805299295515, + "learning_rate": 1.9490698640812247e-05, + "loss": 0.36032742261886597, + "step": 1108 + }, + { + "epoch": 0.29451600053113797, + "grad_norm": 1.1504972318858309, + "learning_rate": 1.9489314346446164e-05, + "loss": 0.3385765552520752, + "step": 1109 + }, + { + "epoch": 0.2947815695126809, + "grad_norm": 1.0946200184976398, + "learning_rate": 1.9487928222652195e-05, + "loss": 0.3751915991306305, + "step": 1110 + }, + { + "epoch": 0.29504713849422387, + "grad_norm": 1.0903856446796527, + "learning_rate": 1.9486540269697564e-05, + "loss": 0.36069825291633606, + "step": 1111 + }, + { + "epoch": 0.2953127074757668, + "grad_norm": 1.009573568422265, + "learning_rate": 1.948515048784985e-05, + "loss": 0.32703787088394165, + "step": 1112 + }, + { + "epoch": 0.2955782764573098, + "grad_norm": 0.9196963642088989, + "learning_rate": 1.948375887737699e-05, + "loss": 0.312494158744812, + "step": 1113 + }, + { + "epoch": 0.2958438454388527, + "grad_norm": 0.9880564768480579, + "learning_rate": 1.9482365438547272e-05, + "loss": 0.30626165866851807, + "step": 1114 + }, + { + "epoch": 0.2961094144203957, + "grad_norm": 1.07827456569524, + "learning_rate": 1.948097017162933e-05, + "loss": 0.3625817894935608, + "step": 1115 + }, + { + "epoch": 0.29637498340193863, + "grad_norm": 1.1789711489550672, + "learning_rate": 1.9479573076892152e-05, + "loss": 0.38403773307800293, + "step": 1116 + }, + { + "epoch": 0.2966405523834816, + "grad_norm": 1.0638061154391991, + "learning_rate": 1.9478174154605093e-05, + "loss": 0.3645164966583252, + "step": 1117 + }, + { + "epoch": 0.2969061213650246, + "grad_norm": 1.0428170431433939, + "learning_rate": 1.9476773405037836e-05, + "loss": 0.3714389503002167, + "step": 1118 + }, + { + "epoch": 0.29717169034656754, + "grad_norm": 1.1488169814057956, + "learning_rate": 1.9475370828460436e-05, + "loss": 0.39809900522232056, + "step": 1119 + }, + { + "epoch": 0.2974372593281105, + "grad_norm": 1.0702503358715294, + "learning_rate": 1.9473966425143292e-05, + "loss": 0.3698490262031555, + "step": 1120 + }, + { + "epoch": 0.29770282830965344, + "grad_norm": 1.0166542138266799, + "learning_rate": 1.947256019535716e-05, + "loss": 0.3072658181190491, + "step": 1121 + }, + { + "epoch": 0.2979683972911964, + "grad_norm": 1.0479599499698302, + "learning_rate": 1.947115213937314e-05, + "loss": 0.3294365406036377, + "step": 1122 + }, + { + "epoch": 0.29823396627273935, + "grad_norm": 1.007749929257712, + "learning_rate": 1.9469742257462684e-05, + "loss": 0.34933674335479736, + "step": 1123 + }, + { + "epoch": 0.2984995352542823, + "grad_norm": 1.133473784296847, + "learning_rate": 1.946833054989761e-05, + "loss": 0.34586772322654724, + "step": 1124 + }, + { + "epoch": 0.29876510423582525, + "grad_norm": 1.0225090189343862, + "learning_rate": 1.9466917016950076e-05, + "loss": 0.33158159255981445, + "step": 1125 + }, + { + "epoch": 0.2990306732173682, + "grad_norm": 1.0162208348084125, + "learning_rate": 1.946550165889259e-05, + "loss": 0.32665887475013733, + "step": 1126 + }, + { + "epoch": 0.29929624219891116, + "grad_norm": 1.1065475895733048, + "learning_rate": 1.946408447599802e-05, + "loss": 0.3333032429218292, + "step": 1127 + }, + { + "epoch": 0.2995618111804541, + "grad_norm": 1.0958997421479173, + "learning_rate": 1.9462665468539582e-05, + "loss": 0.3747228980064392, + "step": 1128 + }, + { + "epoch": 0.29982738016199706, + "grad_norm": 0.9447906277138843, + "learning_rate": 1.9461244636790845e-05, + "loss": 0.34040436148643494, + "step": 1129 + }, + { + "epoch": 0.30009294914354, + "grad_norm": 1.0062775259583612, + "learning_rate": 1.9459821981025723e-05, + "loss": 0.3279584050178528, + "step": 1130 + }, + { + "epoch": 0.30035851812508296, + "grad_norm": 1.136819731097147, + "learning_rate": 1.9458397501518496e-05, + "loss": 0.33507707715034485, + "step": 1131 + }, + { + "epoch": 0.30062408710662597, + "grad_norm": 0.9978141677663763, + "learning_rate": 1.945697119854378e-05, + "loss": 0.3511529862880707, + "step": 1132 + }, + { + "epoch": 0.3008896560881689, + "grad_norm": 1.1038696900269844, + "learning_rate": 1.945554307237655e-05, + "loss": 0.33260345458984375, + "step": 1133 + }, + { + "epoch": 0.3011552250697119, + "grad_norm": 1.1267244347055163, + "learning_rate": 1.9454113123292133e-05, + "loss": 0.37698423862457275, + "step": 1134 + }, + { + "epoch": 0.3014207940512548, + "grad_norm": 1.0482054605062838, + "learning_rate": 1.945268135156621e-05, + "loss": 0.34843316674232483, + "step": 1135 + }, + { + "epoch": 0.3016863630327978, + "grad_norm": 1.1518938911568848, + "learning_rate": 1.9451247757474805e-05, + "loss": 0.38723987340927124, + "step": 1136 + }, + { + "epoch": 0.30195193201434073, + "grad_norm": 1.0597410032778982, + "learning_rate": 1.9449812341294302e-05, + "loss": 0.3836795389652252, + "step": 1137 + }, + { + "epoch": 0.3022175009958837, + "grad_norm": 0.9828275773453091, + "learning_rate": 1.9448375103301424e-05, + "loss": 0.3362433612346649, + "step": 1138 + }, + { + "epoch": 0.30248306997742663, + "grad_norm": 1.0750556057741842, + "learning_rate": 1.9446936043773264e-05, + "loss": 0.3615792393684387, + "step": 1139 + }, + { + "epoch": 0.3027486389589696, + "grad_norm": 1.0233339727957385, + "learning_rate": 1.944549516298725e-05, + "loss": 0.33693915605545044, + "step": 1140 + }, + { + "epoch": 0.30301420794051254, + "grad_norm": 1.0074205515838075, + "learning_rate": 1.9444052461221167e-05, + "loss": 0.32611170411109924, + "step": 1141 + }, + { + "epoch": 0.3032797769220555, + "grad_norm": 1.0257687736898828, + "learning_rate": 1.9442607938753153e-05, + "loss": 0.3504132032394409, + "step": 1142 + }, + { + "epoch": 0.30354534590359844, + "grad_norm": 1.081217851264946, + "learning_rate": 1.944116159586169e-05, + "loss": 0.3598168194293976, + "step": 1143 + }, + { + "epoch": 0.3038109148851414, + "grad_norm": 1.025673115447757, + "learning_rate": 1.9439713432825625e-05, + "loss": 0.33447909355163574, + "step": 1144 + }, + { + "epoch": 0.30407648386668434, + "grad_norm": 0.9795127759513904, + "learning_rate": 1.943826344992414e-05, + "loss": 0.34026333689689636, + "step": 1145 + }, + { + "epoch": 0.30434205284822735, + "grad_norm": 1.070042442644686, + "learning_rate": 1.9436811647436772e-05, + "loss": 0.323203980922699, + "step": 1146 + }, + { + "epoch": 0.3046076218297703, + "grad_norm": 1.0588861737680213, + "learning_rate": 1.943535802564342e-05, + "loss": 0.332398921251297, + "step": 1147 + }, + { + "epoch": 0.30487319081131325, + "grad_norm": 1.175168490214782, + "learning_rate": 1.9433902584824316e-05, + "loss": 0.3882995545864105, + "step": 1148 + }, + { + "epoch": 0.3051387597928562, + "grad_norm": 1.093435738226519, + "learning_rate": 1.943244532526006e-05, + "loss": 0.35262739658355713, + "step": 1149 + }, + { + "epoch": 0.30540432877439916, + "grad_norm": 1.1043029209432185, + "learning_rate": 1.9430986247231586e-05, + "loss": 0.39694511890411377, + "step": 1150 + }, + { + "epoch": 0.3056698977559421, + "grad_norm": 1.1276348856512544, + "learning_rate": 1.9429525351020197e-05, + "loss": 0.3692580759525299, + "step": 1151 + }, + { + "epoch": 0.30593546673748506, + "grad_norm": 1.1284903074468042, + "learning_rate": 1.9428062636907526e-05, + "loss": 0.3685402572154999, + "step": 1152 + }, + { + "epoch": 0.306201035719028, + "grad_norm": 1.1120189967723886, + "learning_rate": 1.9426598105175575e-05, + "loss": 0.37557253241539, + "step": 1153 + }, + { + "epoch": 0.30646660470057097, + "grad_norm": 0.9544414078231065, + "learning_rate": 1.9425131756106687e-05, + "loss": 0.3323203921318054, + "step": 1154 + }, + { + "epoch": 0.3067321736821139, + "grad_norm": 1.085159318227953, + "learning_rate": 1.9423663589983554e-05, + "loss": 0.37262290716171265, + "step": 1155 + }, + { + "epoch": 0.30699774266365687, + "grad_norm": 1.138203326668225, + "learning_rate": 1.9422193607089224e-05, + "loss": 0.36621618270874023, + "step": 1156 + }, + { + "epoch": 0.3072633116451998, + "grad_norm": 1.0326975743253168, + "learning_rate": 1.942072180770709e-05, + "loss": 0.3844982385635376, + "step": 1157 + }, + { + "epoch": 0.3075288806267428, + "grad_norm": 0.9983252957319158, + "learning_rate": 1.94192481921209e-05, + "loss": 0.3229531943798065, + "step": 1158 + }, + { + "epoch": 0.3077944496082857, + "grad_norm": 1.0805327657153956, + "learning_rate": 1.9417772760614745e-05, + "loss": 0.34862661361694336, + "step": 1159 + }, + { + "epoch": 0.30806001858982873, + "grad_norm": 1.0329581193958253, + "learning_rate": 1.941629551347308e-05, + "loss": 0.35496509075164795, + "step": 1160 + }, + { + "epoch": 0.3083255875713717, + "grad_norm": 1.051163133463375, + "learning_rate": 1.9414816450980686e-05, + "loss": 0.3695065975189209, + "step": 1161 + }, + { + "epoch": 0.30859115655291464, + "grad_norm": 1.0254769076684076, + "learning_rate": 1.9413335573422723e-05, + "loss": 0.3472525179386139, + "step": 1162 + }, + { + "epoch": 0.3088567255344576, + "grad_norm": 1.008969123299064, + "learning_rate": 1.9411852881084683e-05, + "loss": 0.3447483479976654, + "step": 1163 + }, + { + "epoch": 0.30912229451600054, + "grad_norm": 0.9333424416365893, + "learning_rate": 1.941036837425241e-05, + "loss": 0.31047824025154114, + "step": 1164 + }, + { + "epoch": 0.3093878634975435, + "grad_norm": 1.0570471012152007, + "learning_rate": 1.9408882053212094e-05, + "loss": 0.34502410888671875, + "step": 1165 + }, + { + "epoch": 0.30965343247908644, + "grad_norm": 1.1849442151759089, + "learning_rate": 1.940739391825029e-05, + "loss": 0.3663109540939331, + "step": 1166 + }, + { + "epoch": 0.3099190014606294, + "grad_norm": 1.1136723468346887, + "learning_rate": 1.9405903969653887e-05, + "loss": 0.3635792136192322, + "step": 1167 + }, + { + "epoch": 0.31018457044217235, + "grad_norm": 1.0769441486287206, + "learning_rate": 1.940441220771013e-05, + "loss": 0.359528124332428, + "step": 1168 + }, + { + "epoch": 0.3104501394237153, + "grad_norm": 1.043185528474707, + "learning_rate": 1.9402918632706618e-05, + "loss": 0.32566630840301514, + "step": 1169 + }, + { + "epoch": 0.31071570840525825, + "grad_norm": 1.0286897614370414, + "learning_rate": 1.940142324493129e-05, + "loss": 0.34758460521698, + "step": 1170 + }, + { + "epoch": 0.3109812773868012, + "grad_norm": 1.0148570847451444, + "learning_rate": 1.9399926044672438e-05, + "loss": 0.3484055995941162, + "step": 1171 + }, + { + "epoch": 0.31124684636834415, + "grad_norm": 1.1806099587394492, + "learning_rate": 1.93984270322187e-05, + "loss": 0.41958773136138916, + "step": 1172 + }, + { + "epoch": 0.3115124153498871, + "grad_norm": 1.085314216258339, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.3578398525714874, + "step": 1173 + }, + { + "epoch": 0.3117779843314301, + "grad_norm": 1.0721505496116728, + "learning_rate": 1.9395423571882917e-05, + "loss": 0.38140422105789185, + "step": 1174 + }, + { + "epoch": 0.31204355331297307, + "grad_norm": 1.1224661464468277, + "learning_rate": 1.9393919124579898e-05, + "loss": 0.3782861828804016, + "step": 1175 + }, + { + "epoch": 0.312309122294516, + "grad_norm": 1.0482874367837718, + "learning_rate": 1.939241286624006e-05, + "loss": 0.3211040496826172, + "step": 1176 + }, + { + "epoch": 0.31257469127605897, + "grad_norm": 0.9909015391020882, + "learning_rate": 1.9390904797153795e-05, + "loss": 0.3090783953666687, + "step": 1177 + }, + { + "epoch": 0.3128402602576019, + "grad_norm": 1.0203166402095418, + "learning_rate": 1.938939491761184e-05, + "loss": 0.3542889654636383, + "step": 1178 + }, + { + "epoch": 0.3131058292391449, + "grad_norm": 1.016567110972503, + "learning_rate": 1.9387883227905285e-05, + "loss": 0.369164377450943, + "step": 1179 + }, + { + "epoch": 0.3133713982206878, + "grad_norm": 1.1492868354113897, + "learning_rate": 1.9386369728325562e-05, + "loss": 0.35200801491737366, + "step": 1180 + }, + { + "epoch": 0.3136369672022308, + "grad_norm": 1.1332626811675575, + "learning_rate": 1.9384854419164454e-05, + "loss": 0.3696276843547821, + "step": 1181 + }, + { + "epoch": 0.31390253618377373, + "grad_norm": 0.9856387823657043, + "learning_rate": 1.9383337300714104e-05, + "loss": 0.3403652012348175, + "step": 1182 + }, + { + "epoch": 0.3141681051653167, + "grad_norm": 0.9608300998441986, + "learning_rate": 1.9381818373266987e-05, + "loss": 0.3307063579559326, + "step": 1183 + }, + { + "epoch": 0.31443367414685963, + "grad_norm": 1.002604353314113, + "learning_rate": 1.9380297637115933e-05, + "loss": 0.3223465085029602, + "step": 1184 + }, + { + "epoch": 0.3146992431284026, + "grad_norm": 1.1668926481270334, + "learning_rate": 1.9378775092554124e-05, + "loss": 0.4013838768005371, + "step": 1185 + }, + { + "epoch": 0.31496481210994554, + "grad_norm": 1.2376602965184098, + "learning_rate": 1.9377250739875095e-05, + "loss": 0.3596574664115906, + "step": 1186 + }, + { + "epoch": 0.3152303810914885, + "grad_norm": 1.0683740579575798, + "learning_rate": 1.937572457937271e-05, + "loss": 0.41639968752861023, + "step": 1187 + }, + { + "epoch": 0.3154959500730315, + "grad_norm": 0.950341293536979, + "learning_rate": 1.9374196611341212e-05, + "loss": 0.3001318573951721, + "step": 1188 + }, + { + "epoch": 0.31576151905457445, + "grad_norm": 1.0390515723802394, + "learning_rate": 1.937266683607516e-05, + "loss": 0.33238667249679565, + "step": 1189 + }, + { + "epoch": 0.3160270880361174, + "grad_norm": 1.0559788990716998, + "learning_rate": 1.9371135253869483e-05, + "loss": 0.33638086915016174, + "step": 1190 + }, + { + "epoch": 0.31629265701766035, + "grad_norm": 1.0736881782093415, + "learning_rate": 1.9369601865019452e-05, + "loss": 0.34445878863334656, + "step": 1191 + }, + { + "epoch": 0.3165582259992033, + "grad_norm": 1.116672373820781, + "learning_rate": 1.9368066669820684e-05, + "loss": 0.33554553985595703, + "step": 1192 + }, + { + "epoch": 0.31682379498074625, + "grad_norm": 1.2940820576034424, + "learning_rate": 1.936652966856915e-05, + "loss": 0.3668493628501892, + "step": 1193 + }, + { + "epoch": 0.3170893639622892, + "grad_norm": 1.1460266164336763, + "learning_rate": 1.9364990861561163e-05, + "loss": 0.3813396990299225, + "step": 1194 + }, + { + "epoch": 0.31735493294383216, + "grad_norm": 1.048871056336621, + "learning_rate": 1.936345024909339e-05, + "loss": 0.33625900745391846, + "step": 1195 + }, + { + "epoch": 0.3176205019253751, + "grad_norm": 1.0238786804477913, + "learning_rate": 1.9361907831462836e-05, + "loss": 0.31131428480148315, + "step": 1196 + }, + { + "epoch": 0.31788607090691806, + "grad_norm": 0.9751456398999766, + "learning_rate": 1.936036360896687e-05, + "loss": 0.32571589946746826, + "step": 1197 + }, + { + "epoch": 0.318151639888461, + "grad_norm": 1.1296061558872548, + "learning_rate": 1.9358817581903193e-05, + "loss": 0.36207717657089233, + "step": 1198 + }, + { + "epoch": 0.31841720887000396, + "grad_norm": 1.062344543153862, + "learning_rate": 1.9357269750569864e-05, + "loss": 0.3743855059146881, + "step": 1199 + }, + { + "epoch": 0.3186827778515469, + "grad_norm": 1.1254060799620074, + "learning_rate": 1.9355720115265283e-05, + "loss": 0.3862137794494629, + "step": 1200 + }, + { + "epoch": 0.31894834683308987, + "grad_norm": 1.1135871061204583, + "learning_rate": 1.935416867628821e-05, + "loss": 0.33353424072265625, + "step": 1201 + }, + { + "epoch": 0.3192139158146329, + "grad_norm": 9.759113022509682, + "learning_rate": 1.9352615433937733e-05, + "loss": 0.3277953267097473, + "step": 1202 + }, + { + "epoch": 0.3194794847961758, + "grad_norm": 1.104737565124737, + "learning_rate": 1.9351060388513304e-05, + "loss": 0.38247692584991455, + "step": 1203 + }, + { + "epoch": 0.3197450537777188, + "grad_norm": 1.0645482624060865, + "learning_rate": 1.9349503540314724e-05, + "loss": 0.3330709934234619, + "step": 1204 + }, + { + "epoch": 0.32001062275926173, + "grad_norm": 1.1382102351287038, + "learning_rate": 1.9347944889642125e-05, + "loss": 0.3809449076652527, + "step": 1205 + }, + { + "epoch": 0.3202761917408047, + "grad_norm": 0.9591245399492223, + "learning_rate": 1.9346384436796e-05, + "loss": 0.33623188734054565, + "step": 1206 + }, + { + "epoch": 0.32054176072234764, + "grad_norm": 1.0414583731283242, + "learning_rate": 1.9344822182077184e-05, + "loss": 0.35465264320373535, + "step": 1207 + }, + { + "epoch": 0.3208073297038906, + "grad_norm": 1.0419539507532576, + "learning_rate": 1.9343258125786866e-05, + "loss": 0.3532233238220215, + "step": 1208 + }, + { + "epoch": 0.32107289868543354, + "grad_norm": 0.972348986123494, + "learning_rate": 1.9341692268226572e-05, + "loss": 0.3498903512954712, + "step": 1209 + }, + { + "epoch": 0.3213384676669765, + "grad_norm": 1.057700016356479, + "learning_rate": 1.9340124609698185e-05, + "loss": 0.36124879121780396, + "step": 1210 + }, + { + "epoch": 0.32160403664851944, + "grad_norm": 1.1891126233384992, + "learning_rate": 1.933855515050393e-05, + "loss": 0.38535434007644653, + "step": 1211 + }, + { + "epoch": 0.3218696056300624, + "grad_norm": 1.1201736183139164, + "learning_rate": 1.9336983890946383e-05, + "loss": 0.39999911189079285, + "step": 1212 + }, + { + "epoch": 0.32213517461160535, + "grad_norm": 1.1396977359685507, + "learning_rate": 1.9335410831328457e-05, + "loss": 0.3519791066646576, + "step": 1213 + }, + { + "epoch": 0.3224007435931483, + "grad_norm": 1.1624196201646915, + "learning_rate": 1.9333835971953424e-05, + "loss": 0.35882368683815, + "step": 1214 + }, + { + "epoch": 0.32266631257469125, + "grad_norm": 1.2089532713833613, + "learning_rate": 1.93322593131249e-05, + "loss": 0.36132001876831055, + "step": 1215 + }, + { + "epoch": 0.32293188155623426, + "grad_norm": 1.0741169297687752, + "learning_rate": 1.9330680855146845e-05, + "loss": 0.36840832233428955, + "step": 1216 + }, + { + "epoch": 0.3231974505377772, + "grad_norm": 1.1553079333487188, + "learning_rate": 1.9329100598323563e-05, + "loss": 0.3755963444709778, + "step": 1217 + }, + { + "epoch": 0.32346301951932016, + "grad_norm": 1.1792888887437214, + "learning_rate": 1.9327518542959717e-05, + "loss": 0.400601863861084, + "step": 1218 + }, + { + "epoch": 0.3237285885008631, + "grad_norm": 1.0342294479515497, + "learning_rate": 1.93259346893603e-05, + "loss": 0.3100128769874573, + "step": 1219 + }, + { + "epoch": 0.32399415748240606, + "grad_norm": 1.0633052239431813, + "learning_rate": 1.9324349037830665e-05, + "loss": 0.3439880609512329, + "step": 1220 + }, + { + "epoch": 0.324259726463949, + "grad_norm": 1.1634088151631976, + "learning_rate": 1.9322761588676505e-05, + "loss": 0.3612631559371948, + "step": 1221 + }, + { + "epoch": 0.32452529544549197, + "grad_norm": 1.1292400605185824, + "learning_rate": 1.9321172342203863e-05, + "loss": 0.38202327489852905, + "step": 1222 + }, + { + "epoch": 0.3247908644270349, + "grad_norm": 1.0253004653890312, + "learning_rate": 1.9319581298719127e-05, + "loss": 0.3405265808105469, + "step": 1223 + }, + { + "epoch": 0.32505643340857787, + "grad_norm": 1.1499639639111883, + "learning_rate": 1.931798845852903e-05, + "loss": 0.4110907018184662, + "step": 1224 + }, + { + "epoch": 0.3253220023901208, + "grad_norm": 1.2758168253168263, + "learning_rate": 1.9316393821940654e-05, + "loss": 0.3007548451423645, + "step": 1225 + }, + { + "epoch": 0.3255875713716638, + "grad_norm": 2.5438383009304673, + "learning_rate": 1.9314797389261426e-05, + "loss": 0.32769858837127686, + "step": 1226 + }, + { + "epoch": 0.3258531403532067, + "grad_norm": 1.0370704182885782, + "learning_rate": 1.931319916079912e-05, + "loss": 0.3619830310344696, + "step": 1227 + }, + { + "epoch": 0.3261187093347497, + "grad_norm": 1.2983573666738066, + "learning_rate": 1.9311599136861853e-05, + "loss": 0.3470210134983063, + "step": 1228 + }, + { + "epoch": 0.32638427831629263, + "grad_norm": 1.145435126731274, + "learning_rate": 1.9309997317758093e-05, + "loss": 0.3471665382385254, + "step": 1229 + }, + { + "epoch": 0.32664984729783564, + "grad_norm": 1.0757592201920594, + "learning_rate": 1.930839370379665e-05, + "loss": 0.3717760443687439, + "step": 1230 + }, + { + "epoch": 0.3269154162793786, + "grad_norm": 1.1173068015382108, + "learning_rate": 1.9306788295286687e-05, + "loss": 0.37279975414276123, + "step": 1231 + }, + { + "epoch": 0.32718098526092154, + "grad_norm": 1.1523781527891401, + "learning_rate": 1.93051810925377e-05, + "loss": 0.3884522020816803, + "step": 1232 + }, + { + "epoch": 0.3274465542424645, + "grad_norm": 1.1200431222189422, + "learning_rate": 1.9303572095859545e-05, + "loss": 0.4277604818344116, + "step": 1233 + }, + { + "epoch": 0.32771212322400745, + "grad_norm": 1.1197023145386935, + "learning_rate": 1.9301961305562415e-05, + "loss": 0.2888818681240082, + "step": 1234 + }, + { + "epoch": 0.3279776922055504, + "grad_norm": 1.0271311895282893, + "learning_rate": 1.9300348721956854e-05, + "loss": 0.3134511709213257, + "step": 1235 + }, + { + "epoch": 0.32824326118709335, + "grad_norm": 1.0800984792046815, + "learning_rate": 1.9298734345353745e-05, + "loss": 0.38525280356407166, + "step": 1236 + }, + { + "epoch": 0.3285088301686363, + "grad_norm": 1.134011749036063, + "learning_rate": 1.9297118176064324e-05, + "loss": 0.3692918121814728, + "step": 1237 + }, + { + "epoch": 0.32877439915017925, + "grad_norm": 1.0348260315377988, + "learning_rate": 1.9295500214400165e-05, + "loss": 0.3443421721458435, + "step": 1238 + }, + { + "epoch": 0.3290399681317222, + "grad_norm": 1.0129455663017488, + "learning_rate": 1.9293880460673197e-05, + "loss": 0.3228621184825897, + "step": 1239 + }, + { + "epoch": 0.32930553711326516, + "grad_norm": 1.0116024279908165, + "learning_rate": 1.9292258915195688e-05, + "loss": 0.330943763256073, + "step": 1240 + }, + { + "epoch": 0.3295711060948081, + "grad_norm": 1.1814587344422625, + "learning_rate": 1.929063557828025e-05, + "loss": 0.356637567281723, + "step": 1241 + }, + { + "epoch": 0.32983667507635106, + "grad_norm": 0.9888159780201056, + "learning_rate": 1.9289010450239843e-05, + "loss": 0.3481113910675049, + "step": 1242 + }, + { + "epoch": 0.330102244057894, + "grad_norm": 1.1876931030431213, + "learning_rate": 1.928738353138778e-05, + "loss": 0.36579906940460205, + "step": 1243 + }, + { + "epoch": 0.330367813039437, + "grad_norm": 1.0281454378567854, + "learning_rate": 1.9285754822037705e-05, + "loss": 0.33025234937667847, + "step": 1244 + }, + { + "epoch": 0.33063338202097997, + "grad_norm": 1.0936673160473642, + "learning_rate": 1.9284124322503613e-05, + "loss": 0.34848469495773315, + "step": 1245 + }, + { + "epoch": 0.3308989510025229, + "grad_norm": 1.1232405017277023, + "learning_rate": 1.928249203309985e-05, + "loss": 0.3523876368999481, + "step": 1246 + }, + { + "epoch": 0.3311645199840659, + "grad_norm": 1.140153458583263, + "learning_rate": 1.92808579541411e-05, + "loss": 0.3695565462112427, + "step": 1247 + }, + { + "epoch": 0.3314300889656088, + "grad_norm": 1.0267337296320096, + "learning_rate": 1.9279222085942396e-05, + "loss": 0.3557945191860199, + "step": 1248 + }, + { + "epoch": 0.3316956579471518, + "grad_norm": 1.0261133198060035, + "learning_rate": 1.9277584428819113e-05, + "loss": 0.3015502989292145, + "step": 1249 + }, + { + "epoch": 0.33196122692869473, + "grad_norm": 0.9384869314897972, + "learning_rate": 1.9275944983086964e-05, + "loss": 0.31333664059638977, + "step": 1250 + }, + { + "epoch": 0.3322267959102377, + "grad_norm": 1.103154580638619, + "learning_rate": 1.9274303749062028e-05, + "loss": 0.36595287919044495, + "step": 1251 + }, + { + "epoch": 0.33249236489178063, + "grad_norm": 1.0573816777840739, + "learning_rate": 1.9272660727060705e-05, + "loss": 0.3400266170501709, + "step": 1252 + }, + { + "epoch": 0.3327579338733236, + "grad_norm": 1.0994664368429343, + "learning_rate": 1.927101591739976e-05, + "loss": 0.3642529547214508, + "step": 1253 + }, + { + "epoch": 0.33302350285486654, + "grad_norm": 1.08059410662081, + "learning_rate": 1.926936932039628e-05, + "loss": 0.3418777287006378, + "step": 1254 + }, + { + "epoch": 0.3332890718364095, + "grad_norm": 1.0881678177934593, + "learning_rate": 1.9267720936367723e-05, + "loss": 0.33382388949394226, + "step": 1255 + }, + { + "epoch": 0.33355464081795244, + "grad_norm": 1.1227567600503816, + "learning_rate": 1.926607076563187e-05, + "loss": 0.36257779598236084, + "step": 1256 + }, + { + "epoch": 0.3338202097994954, + "grad_norm": 1.5546101865012443, + "learning_rate": 1.926441880850686e-05, + "loss": 0.3018002510070801, + "step": 1257 + }, + { + "epoch": 0.3340857787810384, + "grad_norm": 1.0263747105982135, + "learning_rate": 1.9262765065311165e-05, + "loss": 0.3373662233352661, + "step": 1258 + }, + { + "epoch": 0.33435134776258135, + "grad_norm": 1.0001644182280367, + "learning_rate": 1.9261109536363613e-05, + "loss": 0.3555397391319275, + "step": 1259 + }, + { + "epoch": 0.3346169167441243, + "grad_norm": 1.1519069907937776, + "learning_rate": 1.925945222198336e-05, + "loss": 0.3004256784915924, + "step": 1260 + }, + { + "epoch": 0.33488248572566726, + "grad_norm": 2.328412351070072, + "learning_rate": 1.925779312248993e-05, + "loss": 0.33299940824508667, + "step": 1261 + }, + { + "epoch": 0.3351480547072102, + "grad_norm": 1.0617967738999583, + "learning_rate": 1.9256132238203166e-05, + "loss": 0.3715725541114807, + "step": 1262 + }, + { + "epoch": 0.33541362368875316, + "grad_norm": 1.0140049717249513, + "learning_rate": 1.9254469569443274e-05, + "loss": 0.35133951902389526, + "step": 1263 + }, + { + "epoch": 0.3356791926702961, + "grad_norm": 0.9980129680534503, + "learning_rate": 1.92528051165308e-05, + "loss": 0.3328818380832672, + "step": 1264 + }, + { + "epoch": 0.33594476165183906, + "grad_norm": 1.0764552464682182, + "learning_rate": 1.925113887978662e-05, + "loss": 0.3665468692779541, + "step": 1265 + }, + { + "epoch": 0.336210330633382, + "grad_norm": 1.0446302802374996, + "learning_rate": 1.9249470859531976e-05, + "loss": 0.3489571511745453, + "step": 1266 + }, + { + "epoch": 0.33647589961492497, + "grad_norm": 1.0629721705272823, + "learning_rate": 1.9247801056088433e-05, + "loss": 0.30038982629776, + "step": 1267 + }, + { + "epoch": 0.3367414685964679, + "grad_norm": 1.1798569183028156, + "learning_rate": 1.9246129469777918e-05, + "loss": 0.4163355827331543, + "step": 1268 + }, + { + "epoch": 0.33700703757801087, + "grad_norm": 1.0428552063046848, + "learning_rate": 1.924445610092269e-05, + "loss": 0.33687612414360046, + "step": 1269 + }, + { + "epoch": 0.3372726065595538, + "grad_norm": 1.0466869124167506, + "learning_rate": 1.924278094984535e-05, + "loss": 0.3448297679424286, + "step": 1270 + }, + { + "epoch": 0.3375381755410968, + "grad_norm": 1.0979384797680924, + "learning_rate": 1.9241104016868853e-05, + "loss": 0.35257208347320557, + "step": 1271 + }, + { + "epoch": 0.3378037445226398, + "grad_norm": 1.0794393535441016, + "learning_rate": 1.9239425302316487e-05, + "loss": 0.34880566596984863, + "step": 1272 + }, + { + "epoch": 0.33806931350418273, + "grad_norm": 1.1081978913885613, + "learning_rate": 1.9237744806511895e-05, + "loss": 0.33643782138824463, + "step": 1273 + }, + { + "epoch": 0.3383348824857257, + "grad_norm": 1.0185962864877929, + "learning_rate": 1.9236062529779057e-05, + "loss": 0.32345050573349, + "step": 1274 + }, + { + "epoch": 0.33860045146726864, + "grad_norm": 1.0547576972102612, + "learning_rate": 1.9234378472442286e-05, + "loss": 0.33983978629112244, + "step": 1275 + }, + { + "epoch": 0.3388660204488116, + "grad_norm": 1.0305326470674594, + "learning_rate": 1.923269263482626e-05, + "loss": 0.32825571298599243, + "step": 1276 + }, + { + "epoch": 0.33913158943035454, + "grad_norm": 1.0836151603415423, + "learning_rate": 1.923100501725598e-05, + "loss": 0.3434044122695923, + "step": 1277 + }, + { + "epoch": 0.3393971584118975, + "grad_norm": 1.1293248576076373, + "learning_rate": 1.9229315620056805e-05, + "loss": 0.3463204503059387, + "step": 1278 + }, + { + "epoch": 0.33966272739344044, + "grad_norm": 1.0476463818396518, + "learning_rate": 1.9227624443554425e-05, + "loss": 0.3608240485191345, + "step": 1279 + }, + { + "epoch": 0.3399282963749834, + "grad_norm": 1.111712780266586, + "learning_rate": 1.9225931488074882e-05, + "loss": 0.36131763458251953, + "step": 1280 + }, + { + "epoch": 0.34019386535652635, + "grad_norm": 0.9948222919660873, + "learning_rate": 1.922423675394456e-05, + "loss": 0.3270101547241211, + "step": 1281 + }, + { + "epoch": 0.3404594343380693, + "grad_norm": 1.1047356141038558, + "learning_rate": 1.922254024149018e-05, + "loss": 0.3551778495311737, + "step": 1282 + }, + { + "epoch": 0.34072500331961225, + "grad_norm": 1.1057498393465535, + "learning_rate": 1.9220841951038815e-05, + "loss": 0.3686622381210327, + "step": 1283 + }, + { + "epoch": 0.3409905723011552, + "grad_norm": 1.0810198379819234, + "learning_rate": 1.921914188291787e-05, + "loss": 0.35161536931991577, + "step": 1284 + }, + { + "epoch": 0.34125614128269816, + "grad_norm": 1.1489267376414198, + "learning_rate": 1.92174400374551e-05, + "loss": 0.3549870550632477, + "step": 1285 + }, + { + "epoch": 0.34152171026424116, + "grad_norm": 1.0904860537070935, + "learning_rate": 1.9215736414978593e-05, + "loss": 0.36780738830566406, + "step": 1286 + }, + { + "epoch": 0.3417872792457841, + "grad_norm": 1.132171748367688, + "learning_rate": 1.9214031015816803e-05, + "loss": 0.36060047149658203, + "step": 1287 + }, + { + "epoch": 0.34205284822732707, + "grad_norm": 1.0753334155968608, + "learning_rate": 1.9212323840298502e-05, + "loss": 0.32578715682029724, + "step": 1288 + }, + { + "epoch": 0.34231841720887, + "grad_norm": 1.0380534929488934, + "learning_rate": 1.9210614888752813e-05, + "loss": 0.3505493402481079, + "step": 1289 + }, + { + "epoch": 0.34258398619041297, + "grad_norm": 1.0227959332298084, + "learning_rate": 1.9208904161509203e-05, + "loss": 0.32681795954704285, + "step": 1290 + }, + { + "epoch": 0.3428495551719559, + "grad_norm": 1.0227973616384467, + "learning_rate": 1.9207191658897473e-05, + "loss": 0.34808459877967834, + "step": 1291 + }, + { + "epoch": 0.3431151241534989, + "grad_norm": 1.0810974703490968, + "learning_rate": 1.920547738124779e-05, + "loss": 0.3588678240776062, + "step": 1292 + }, + { + "epoch": 0.3433806931350418, + "grad_norm": 1.2030053357742059, + "learning_rate": 1.9203761328890626e-05, + "loss": 0.3528832495212555, + "step": 1293 + }, + { + "epoch": 0.3436462621165848, + "grad_norm": 1.35729757891191, + "learning_rate": 1.9202043502156833e-05, + "loss": 0.33549001812934875, + "step": 1294 + }, + { + "epoch": 0.34391183109812773, + "grad_norm": 1.0986147605525078, + "learning_rate": 1.920032390137758e-05, + "loss": 0.3466021418571472, + "step": 1295 + }, + { + "epoch": 0.3441774000796707, + "grad_norm": 1.0492164389172054, + "learning_rate": 1.9198602526884388e-05, + "loss": 0.35646146535873413, + "step": 1296 + }, + { + "epoch": 0.34444296906121363, + "grad_norm": 1.0348991752364494, + "learning_rate": 1.9196879379009112e-05, + "loss": 0.3442128300666809, + "step": 1297 + }, + { + "epoch": 0.3447085380427566, + "grad_norm": 1.083291442034964, + "learning_rate": 1.9195154458083962e-05, + "loss": 0.3854391872882843, + "step": 1298 + }, + { + "epoch": 0.34497410702429954, + "grad_norm": 1.202325074766952, + "learning_rate": 1.9193427764441477e-05, + "loss": 0.376137375831604, + "step": 1299 + }, + { + "epoch": 0.34523967600584254, + "grad_norm": 1.1591691335477168, + "learning_rate": 1.9191699298414547e-05, + "loss": 0.3115769028663635, + "step": 1300 + }, + { + "epoch": 0.3455052449873855, + "grad_norm": 1.125127529667975, + "learning_rate": 1.9189969060336396e-05, + "loss": 0.32553282380104065, + "step": 1301 + }, + { + "epoch": 0.34577081396892845, + "grad_norm": 1.2442677252107, + "learning_rate": 1.9188237050540597e-05, + "loss": 0.39529356360435486, + "step": 1302 + }, + { + "epoch": 0.3460363829504714, + "grad_norm": 1.016155926476122, + "learning_rate": 1.9186503269361063e-05, + "loss": 0.3027458190917969, + "step": 1303 + }, + { + "epoch": 0.34630195193201435, + "grad_norm": 1.2178145504108082, + "learning_rate": 1.918476771713204e-05, + "loss": 0.39317795634269714, + "step": 1304 + }, + { + "epoch": 0.3465675209135573, + "grad_norm": 1.1358253756284789, + "learning_rate": 1.918303039418813e-05, + "loss": 0.3730325698852539, + "step": 1305 + }, + { + "epoch": 0.34683308989510025, + "grad_norm": 1.0835224567793253, + "learning_rate": 1.918129130086426e-05, + "loss": 0.34862780570983887, + "step": 1306 + }, + { + "epoch": 0.3470986588766432, + "grad_norm": 1.106131252801308, + "learning_rate": 1.9179550437495707e-05, + "loss": 0.32139018177986145, + "step": 1307 + }, + { + "epoch": 0.34736422785818616, + "grad_norm": 1.118754726003564, + "learning_rate": 1.91778078044181e-05, + "loss": 0.37246090173721313, + "step": 1308 + }, + { + "epoch": 0.3476297968397291, + "grad_norm": 1.035507147337034, + "learning_rate": 1.9176063401967386e-05, + "loss": 0.30985957384109497, + "step": 1309 + }, + { + "epoch": 0.34789536582127206, + "grad_norm": 1.1303664709170593, + "learning_rate": 1.917431723047987e-05, + "loss": 0.3713758587837219, + "step": 1310 + }, + { + "epoch": 0.348160934802815, + "grad_norm": 1.076206973404712, + "learning_rate": 1.9172569290292193e-05, + "loss": 0.3465833067893982, + "step": 1311 + }, + { + "epoch": 0.34842650378435797, + "grad_norm": 1.1789932919731194, + "learning_rate": 1.917081958174134e-05, + "loss": 0.34807220101356506, + "step": 1312 + }, + { + "epoch": 0.3486920727659009, + "grad_norm": 1.0178456651378849, + "learning_rate": 1.9169068105164627e-05, + "loss": 0.3369640111923218, + "step": 1313 + }, + { + "epoch": 0.3489576417474439, + "grad_norm": 1.1714339652663717, + "learning_rate": 1.9167314860899724e-05, + "loss": 0.3521544337272644, + "step": 1314 + }, + { + "epoch": 0.3492232107289869, + "grad_norm": 0.9756562815370131, + "learning_rate": 1.9165559849284635e-05, + "loss": 0.3256300687789917, + "step": 1315 + }, + { + "epoch": 0.34948877971052983, + "grad_norm": 1.1173269078403432, + "learning_rate": 1.9163803070657706e-05, + "loss": 0.32401931285858154, + "step": 1316 + }, + { + "epoch": 0.3497543486920728, + "grad_norm": 1.104564951170044, + "learning_rate": 1.916204452535762e-05, + "loss": 0.372749924659729, + "step": 1317 + }, + { + "epoch": 0.35001991767361573, + "grad_norm": 1.053240444697934, + "learning_rate": 1.9160284213723407e-05, + "loss": 0.35853224992752075, + "step": 1318 + }, + { + "epoch": 0.3502854866551587, + "grad_norm": 1.048325144857422, + "learning_rate": 1.9158522136094433e-05, + "loss": 0.32850801944732666, + "step": 1319 + }, + { + "epoch": 0.35055105563670164, + "grad_norm": 1.1274703494911789, + "learning_rate": 1.9156758292810404e-05, + "loss": 0.3548474907875061, + "step": 1320 + }, + { + "epoch": 0.3508166246182446, + "grad_norm": 1.10371779317482, + "learning_rate": 1.9154992684211372e-05, + "loss": 0.38709041476249695, + "step": 1321 + }, + { + "epoch": 0.35108219359978754, + "grad_norm": 1.1369910570736041, + "learning_rate": 1.9153225310637726e-05, + "loss": 0.40369266271591187, + "step": 1322 + }, + { + "epoch": 0.3513477625813305, + "grad_norm": 1.179710362637603, + "learning_rate": 1.9151456172430186e-05, + "loss": 0.3570155203342438, + "step": 1323 + }, + { + "epoch": 0.35161333156287344, + "grad_norm": 1.0315056954444073, + "learning_rate": 1.9149685269929833e-05, + "loss": 0.34426411986351013, + "step": 1324 + }, + { + "epoch": 0.3518789005444164, + "grad_norm": 1.0980268876500368, + "learning_rate": 1.9147912603478066e-05, + "loss": 0.35666006803512573, + "step": 1325 + }, + { + "epoch": 0.35214446952595935, + "grad_norm": 1.0320732816254274, + "learning_rate": 1.9146138173416643e-05, + "loss": 0.36225512623786926, + "step": 1326 + }, + { + "epoch": 0.3524100385075023, + "grad_norm": 1.0499655117353668, + "learning_rate": 1.9144361980087643e-05, + "loss": 0.3312349319458008, + "step": 1327 + }, + { + "epoch": 0.3526756074890453, + "grad_norm": 1.0828461821707789, + "learning_rate": 1.9142584023833506e-05, + "loss": 0.3590523302555084, + "step": 1328 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 1.2432343198034153, + "learning_rate": 1.9140804304996997e-05, + "loss": 0.341480016708374, + "step": 1329 + }, + { + "epoch": 0.3532067454521312, + "grad_norm": 1.0165353851066345, + "learning_rate": 1.913902282392122e-05, + "loss": 0.37246501445770264, + "step": 1330 + }, + { + "epoch": 0.35347231443367416, + "grad_norm": 1.0959834963108057, + "learning_rate": 1.913723958094963e-05, + "loss": 0.33834031224250793, + "step": 1331 + }, + { + "epoch": 0.3537378834152171, + "grad_norm": 1.0066884605687934, + "learning_rate": 1.913545457642601e-05, + "loss": 0.29285067319869995, + "step": 1332 + }, + { + "epoch": 0.35400345239676007, + "grad_norm": 1.0768479974972798, + "learning_rate": 1.913366781069449e-05, + "loss": 0.2903720736503601, + "step": 1333 + }, + { + "epoch": 0.354269021378303, + "grad_norm": 1.1311334028851072, + "learning_rate": 1.913187928409954e-05, + "loss": 0.36428314447402954, + "step": 1334 + }, + { + "epoch": 0.35453459035984597, + "grad_norm": 1.0473346547130091, + "learning_rate": 1.9130088996985967e-05, + "loss": 0.3379477560520172, + "step": 1335 + }, + { + "epoch": 0.3548001593413889, + "grad_norm": 1.0963924260325884, + "learning_rate": 1.912829694969891e-05, + "loss": 0.35286659002304077, + "step": 1336 + }, + { + "epoch": 0.3550657283229319, + "grad_norm": 1.1930831242867357, + "learning_rate": 1.9126503142583864e-05, + "loss": 0.3670174479484558, + "step": 1337 + }, + { + "epoch": 0.3553312973044748, + "grad_norm": 1.1294601866875984, + "learning_rate": 1.9124707575986642e-05, + "loss": 0.3422902226448059, + "step": 1338 + }, + { + "epoch": 0.3555968662860178, + "grad_norm": 0.9984746022499613, + "learning_rate": 1.912291025025342e-05, + "loss": 0.29778385162353516, + "step": 1339 + }, + { + "epoch": 0.35586243526756073, + "grad_norm": 1.1907673127670892, + "learning_rate": 1.91211111657307e-05, + "loss": 0.36249661445617676, + "step": 1340 + }, + { + "epoch": 0.3561280042491037, + "grad_norm": 1.1054946723600563, + "learning_rate": 1.9119310322765315e-05, + "loss": 0.340925395488739, + "step": 1341 + }, + { + "epoch": 0.3563935732306467, + "grad_norm": 1.1964466720866056, + "learning_rate": 1.9117507721704455e-05, + "loss": 0.35674089193344116, + "step": 1342 + }, + { + "epoch": 0.35665914221218964, + "grad_norm": 1.1077144979302902, + "learning_rate": 1.9115703362895636e-05, + "loss": 0.3602067828178406, + "step": 1343 + }, + { + "epoch": 0.3569247111937326, + "grad_norm": 1.1669501112510636, + "learning_rate": 1.9113897246686716e-05, + "loss": 0.35211697220802307, + "step": 1344 + }, + { + "epoch": 0.35719028017527554, + "grad_norm": 1.1098565168791754, + "learning_rate": 1.91120893734259e-05, + "loss": 0.3706115484237671, + "step": 1345 + }, + { + "epoch": 0.3574558491568185, + "grad_norm": 0.955637908965499, + "learning_rate": 1.9110279743461717e-05, + "loss": 0.3365110754966736, + "step": 1346 + }, + { + "epoch": 0.35772141813836145, + "grad_norm": 1.2071736385011052, + "learning_rate": 1.9108468357143047e-05, + "loss": 0.40012121200561523, + "step": 1347 + }, + { + "epoch": 0.3579869871199044, + "grad_norm": 1.1409634140225444, + "learning_rate": 1.91066552148191e-05, + "loss": 0.4003351926803589, + "step": 1348 + }, + { + "epoch": 0.35825255610144735, + "grad_norm": 1.0613274196364288, + "learning_rate": 1.910484031683943e-05, + "loss": 0.3574616014957428, + "step": 1349 + }, + { + "epoch": 0.3585181250829903, + "grad_norm": 1.0904662824068834, + "learning_rate": 1.910302366355393e-05, + "loss": 0.3345073461532593, + "step": 1350 + }, + { + "epoch": 0.35878369406453325, + "grad_norm": 1.0532412802136695, + "learning_rate": 1.910120525531283e-05, + "loss": 0.3467676341533661, + "step": 1351 + }, + { + "epoch": 0.3590492630460762, + "grad_norm": 1.0529131768701299, + "learning_rate": 1.9099385092466695e-05, + "loss": 0.32433655858039856, + "step": 1352 + }, + { + "epoch": 0.35931483202761916, + "grad_norm": 1.0442908892383016, + "learning_rate": 1.909756317536643e-05, + "loss": 0.3366447985172272, + "step": 1353 + }, + { + "epoch": 0.3595804010091621, + "grad_norm": 1.0770054348386777, + "learning_rate": 1.909573950436328e-05, + "loss": 0.310118168592453, + "step": 1354 + }, + { + "epoch": 0.35984596999070506, + "grad_norm": 1.4782002462322321, + "learning_rate": 1.909391407980883e-05, + "loss": 0.3503451943397522, + "step": 1355 + }, + { + "epoch": 0.36011153897224807, + "grad_norm": 1.0889726916887852, + "learning_rate": 1.9092086902054996e-05, + "loss": 0.3375343978404999, + "step": 1356 + }, + { + "epoch": 0.360377107953791, + "grad_norm": 0.9368081121032712, + "learning_rate": 1.909025797145404e-05, + "loss": 0.3056451082229614, + "step": 1357 + }, + { + "epoch": 0.360642676935334, + "grad_norm": 0.9554491579006472, + "learning_rate": 1.9088427288358556e-05, + "loss": 0.3063391447067261, + "step": 1358 + }, + { + "epoch": 0.3609082459168769, + "grad_norm": 0.9358824747825566, + "learning_rate": 1.908659485312148e-05, + "loss": 0.3055405616760254, + "step": 1359 + }, + { + "epoch": 0.3611738148984199, + "grad_norm": 1.1828231629690173, + "learning_rate": 1.908476066609608e-05, + "loss": 0.38323235511779785, + "step": 1360 + }, + { + "epoch": 0.36143938387996283, + "grad_norm": 1.0971994038941366, + "learning_rate": 1.908292472763597e-05, + "loss": 0.33526092767715454, + "step": 1361 + }, + { + "epoch": 0.3617049528615058, + "grad_norm": 1.0449346093027478, + "learning_rate": 1.9081087038095094e-05, + "loss": 0.34485238790512085, + "step": 1362 + }, + { + "epoch": 0.36197052184304873, + "grad_norm": 1.0943982229718532, + "learning_rate": 1.907924759782774e-05, + "loss": 0.2963239252567291, + "step": 1363 + }, + { + "epoch": 0.3622360908245917, + "grad_norm": 1.2033822452903298, + "learning_rate": 1.9077406407188532e-05, + "loss": 0.3536864221096039, + "step": 1364 + }, + { + "epoch": 0.36250165980613464, + "grad_norm": 1.1739216512613182, + "learning_rate": 1.907556346653242e-05, + "loss": 0.3724798858165741, + "step": 1365 + }, + { + "epoch": 0.3627672287876776, + "grad_norm": 1.2035474175290464, + "learning_rate": 1.9073718776214717e-05, + "loss": 0.36241161823272705, + "step": 1366 + }, + { + "epoch": 0.36303279776922054, + "grad_norm": 1.2262905723198394, + "learning_rate": 1.9071872336591042e-05, + "loss": 0.3484225273132324, + "step": 1367 + }, + { + "epoch": 0.3632983667507635, + "grad_norm": 1.11285184075262, + "learning_rate": 1.9070024148017375e-05, + "loss": 0.33606311678886414, + "step": 1368 + }, + { + "epoch": 0.36356393573230644, + "grad_norm": 1.076908267109863, + "learning_rate": 1.906817421085002e-05, + "loss": 0.3263503909111023, + "step": 1369 + }, + { + "epoch": 0.36382950471384945, + "grad_norm": 1.126388175466026, + "learning_rate": 1.906632252544563e-05, + "loss": 0.33454492688179016, + "step": 1370 + }, + { + "epoch": 0.3640950736953924, + "grad_norm": 1.1264022314316273, + "learning_rate": 1.9064469092161185e-05, + "loss": 0.34858438372612, + "step": 1371 + }, + { + "epoch": 0.36436064267693535, + "grad_norm": 1.0527021112264499, + "learning_rate": 1.9062613911354005e-05, + "loss": 0.3466234505176544, + "step": 1372 + }, + { + "epoch": 0.3646262116584783, + "grad_norm": 1.0325760706581486, + "learning_rate": 1.9060756983381743e-05, + "loss": 0.33574312925338745, + "step": 1373 + }, + { + "epoch": 0.36489178064002126, + "grad_norm": 1.0321788657369535, + "learning_rate": 1.90588983086024e-05, + "loss": 0.3012363016605377, + "step": 1374 + }, + { + "epoch": 0.3651573496215642, + "grad_norm": 1.0033389586223882, + "learning_rate": 1.90570378873743e-05, + "loss": 0.3050191402435303, + "step": 1375 + }, + { + "epoch": 0.36542291860310716, + "grad_norm": 1.0078763869776561, + "learning_rate": 1.905517572005611e-05, + "loss": 0.35090070962905884, + "step": 1376 + }, + { + "epoch": 0.3656884875846501, + "grad_norm": 1.011051809727729, + "learning_rate": 1.9053311807006845e-05, + "loss": 0.3276262581348419, + "step": 1377 + }, + { + "epoch": 0.36595405656619306, + "grad_norm": 1.300904148134606, + "learning_rate": 1.9051446148585833e-05, + "loss": 0.3303500711917877, + "step": 1378 + }, + { + "epoch": 0.366219625547736, + "grad_norm": 1.113413634877815, + "learning_rate": 1.9049578745152754e-05, + "loss": 0.3748486042022705, + "step": 1379 + }, + { + "epoch": 0.36648519452927897, + "grad_norm": 0.8707302355459249, + "learning_rate": 1.9047709597067628e-05, + "loss": 0.30339744687080383, + "step": 1380 + }, + { + "epoch": 0.3667507635108219, + "grad_norm": 1.0245709544347914, + "learning_rate": 1.9045838704690796e-05, + "loss": 0.31811147928237915, + "step": 1381 + }, + { + "epoch": 0.36701633249236487, + "grad_norm": 1.1759156162745943, + "learning_rate": 1.9043966068382945e-05, + "loss": 0.3541119694709778, + "step": 1382 + }, + { + "epoch": 0.3672819014739078, + "grad_norm": 1.0874467494483675, + "learning_rate": 1.9042091688505104e-05, + "loss": 0.36639657616615295, + "step": 1383 + }, + { + "epoch": 0.36754747045545083, + "grad_norm": 1.0242460437241268, + "learning_rate": 1.9040215565418628e-05, + "loss": 0.35859787464141846, + "step": 1384 + }, + { + "epoch": 0.3678130394369938, + "grad_norm": 1.017105790679022, + "learning_rate": 1.9038337699485207e-05, + "loss": 0.3210521340370178, + "step": 1385 + }, + { + "epoch": 0.36807860841853673, + "grad_norm": 1.0362268895966902, + "learning_rate": 1.9036458091066875e-05, + "loss": 0.3207433819770813, + "step": 1386 + }, + { + "epoch": 0.3683441774000797, + "grad_norm": 0.9948382455278952, + "learning_rate": 1.9034576740526e-05, + "loss": 0.3475082218647003, + "step": 1387 + }, + { + "epoch": 0.36860974638162264, + "grad_norm": 1.167057707852143, + "learning_rate": 1.903269364822528e-05, + "loss": 0.33252987265586853, + "step": 1388 + }, + { + "epoch": 0.3688753153631656, + "grad_norm": 1.0281516525035093, + "learning_rate": 1.903080881452776e-05, + "loss": 0.32200103998184204, + "step": 1389 + }, + { + "epoch": 0.36914088434470854, + "grad_norm": 1.0752934055327636, + "learning_rate": 1.9028922239796803e-05, + "loss": 0.34780022501945496, + "step": 1390 + }, + { + "epoch": 0.3694064533262515, + "grad_norm": 1.1028643639363398, + "learning_rate": 1.902703392439613e-05, + "loss": 0.35411912202835083, + "step": 1391 + }, + { + "epoch": 0.36967202230779445, + "grad_norm": 1.6627965093255739, + "learning_rate": 1.9025143868689773e-05, + "loss": 0.35232803225517273, + "step": 1392 + }, + { + "epoch": 0.3699375912893374, + "grad_norm": 1.168292115519334, + "learning_rate": 1.9023252073042128e-05, + "loss": 0.38561391830444336, + "step": 1393 + }, + { + "epoch": 0.37020316027088035, + "grad_norm": 0.9982322437598163, + "learning_rate": 1.9021358537817897e-05, + "loss": 0.3184170126914978, + "step": 1394 + }, + { + "epoch": 0.3704687292524233, + "grad_norm": 1.0557333187102689, + "learning_rate": 1.9019463263382142e-05, + "loss": 0.32455068826675415, + "step": 1395 + }, + { + "epoch": 0.37073429823396625, + "grad_norm": 1.0862364532602506, + "learning_rate": 1.901756625010024e-05, + "loss": 0.32998934388160706, + "step": 1396 + }, + { + "epoch": 0.3709998672155092, + "grad_norm": 1.1350071137219766, + "learning_rate": 1.901566749833792e-05, + "loss": 0.3361780643463135, + "step": 1397 + }, + { + "epoch": 0.37126543619705216, + "grad_norm": 1.1483051699341575, + "learning_rate": 1.9013767008461236e-05, + "loss": 0.3618711829185486, + "step": 1398 + }, + { + "epoch": 0.37153100517859516, + "grad_norm": 1.1250978483748488, + "learning_rate": 1.901186478083658e-05, + "loss": 0.3904131054878235, + "step": 1399 + }, + { + "epoch": 0.3717965741601381, + "grad_norm": 1.0885741580509858, + "learning_rate": 1.9009960815830676e-05, + "loss": 0.35742759704589844, + "step": 1400 + }, + { + "epoch": 0.37206214314168107, + "grad_norm": 1.073570835222054, + "learning_rate": 1.9008055113810595e-05, + "loss": 0.32880812883377075, + "step": 1401 + }, + { + "epoch": 0.372327712123224, + "grad_norm": 1.0645240727318732, + "learning_rate": 1.9006147675143724e-05, + "loss": 0.3379839360713959, + "step": 1402 + }, + { + "epoch": 0.37259328110476697, + "grad_norm": 1.1363528922504198, + "learning_rate": 1.90042385001978e-05, + "loss": 0.3635789453983307, + "step": 1403 + }, + { + "epoch": 0.3728588500863099, + "grad_norm": 1.1103620354136925, + "learning_rate": 1.900232758934089e-05, + "loss": 0.3462461233139038, + "step": 1404 + }, + { + "epoch": 0.3731244190678529, + "grad_norm": 1.1087128591527484, + "learning_rate": 1.900041494294139e-05, + "loss": 0.34578579664230347, + "step": 1405 + }, + { + "epoch": 0.3733899880493958, + "grad_norm": 1.1067984269435176, + "learning_rate": 1.899850056136804e-05, + "loss": 0.36266931891441345, + "step": 1406 + }, + { + "epoch": 0.3736555570309388, + "grad_norm": 1.089685836132972, + "learning_rate": 1.899658444498991e-05, + "loss": 0.34019365906715393, + "step": 1407 + }, + { + "epoch": 0.37392112601248173, + "grad_norm": 1.0009475991478056, + "learning_rate": 1.8994666594176404e-05, + "loss": 0.3057953119277954, + "step": 1408 + }, + { + "epoch": 0.3741866949940247, + "grad_norm": 1.1008245937613312, + "learning_rate": 1.8992747009297265e-05, + "loss": 0.3663131892681122, + "step": 1409 + }, + { + "epoch": 0.37445226397556763, + "grad_norm": 1.0696938984110862, + "learning_rate": 1.8990825690722557e-05, + "loss": 0.3402065634727478, + "step": 1410 + }, + { + "epoch": 0.3747178329571106, + "grad_norm": 1.017664192724319, + "learning_rate": 1.8988902638822693e-05, + "loss": 0.3437868654727936, + "step": 1411 + }, + { + "epoch": 0.37498340193865354, + "grad_norm": 1.2246388577961873, + "learning_rate": 1.8986977853968416e-05, + "loss": 0.40972524881362915, + "step": 1412 + }, + { + "epoch": 0.37524897092019655, + "grad_norm": 1.0293557658064552, + "learning_rate": 1.89850513365308e-05, + "loss": 0.3237977921962738, + "step": 1413 + }, + { + "epoch": 0.3755145399017395, + "grad_norm": 0.9581631299919097, + "learning_rate": 1.8983123086881254e-05, + "loss": 0.3146173357963562, + "step": 1414 + }, + { + "epoch": 0.37578010888328245, + "grad_norm": 0.9942979474502576, + "learning_rate": 1.8981193105391524e-05, + "loss": 0.33485543727874756, + "step": 1415 + }, + { + "epoch": 0.3760456778648254, + "grad_norm": 1.0963696340494955, + "learning_rate": 1.8979261392433685e-05, + "loss": 0.36379897594451904, + "step": 1416 + }, + { + "epoch": 0.37631124684636835, + "grad_norm": 0.902828061805848, + "learning_rate": 1.8977327948380154e-05, + "loss": 0.2737882137298584, + "step": 1417 + }, + { + "epoch": 0.3765768158279113, + "grad_norm": 1.1168765744666191, + "learning_rate": 1.897539277360367e-05, + "loss": 0.3554575443267822, + "step": 1418 + }, + { + "epoch": 0.37684238480945426, + "grad_norm": 1.0021058464909711, + "learning_rate": 1.897345586847731e-05, + "loss": 0.3297621011734009, + "step": 1419 + }, + { + "epoch": 0.3771079537909972, + "grad_norm": 1.1638469907551372, + "learning_rate": 1.8971517233374497e-05, + "loss": 0.32272985577583313, + "step": 1420 + }, + { + "epoch": 0.37737352277254016, + "grad_norm": 1.0280583772355378, + "learning_rate": 1.8969576868668967e-05, + "loss": 0.32175642251968384, + "step": 1421 + }, + { + "epoch": 0.3776390917540831, + "grad_norm": 1.1136468557030246, + "learning_rate": 1.8967634774734807e-05, + "loss": 0.35973137617111206, + "step": 1422 + }, + { + "epoch": 0.37790466073562606, + "grad_norm": 1.1892680335343753, + "learning_rate": 1.8965690951946424e-05, + "loss": 0.3385169506072998, + "step": 1423 + }, + { + "epoch": 0.378170229717169, + "grad_norm": 1.1245023779822048, + "learning_rate": 1.8963745400678564e-05, + "loss": 0.3683067560195923, + "step": 1424 + }, + { + "epoch": 0.37843579869871197, + "grad_norm": 1.1630069521478075, + "learning_rate": 1.896179812130631e-05, + "loss": 0.3711622357368469, + "step": 1425 + }, + { + "epoch": 0.3787013676802549, + "grad_norm": 1.015020556732164, + "learning_rate": 1.895984911420507e-05, + "loss": 0.30416572093963623, + "step": 1426 + }, + { + "epoch": 0.3789669366617979, + "grad_norm": 1.079958708031102, + "learning_rate": 1.8957898379750598e-05, + "loss": 0.3439522385597229, + "step": 1427 + }, + { + "epoch": 0.3792325056433409, + "grad_norm": 1.1382084488728177, + "learning_rate": 1.895594591831896e-05, + "loss": 0.3663806617259979, + "step": 1428 + }, + { + "epoch": 0.37949807462488383, + "grad_norm": 1.0501527452156108, + "learning_rate": 1.895399173028658e-05, + "loss": 0.32132354378700256, + "step": 1429 + }, + { + "epoch": 0.3797636436064268, + "grad_norm": 0.9916462964383544, + "learning_rate": 1.8952035816030196e-05, + "loss": 0.3040635585784912, + "step": 1430 + }, + { + "epoch": 0.38002921258796973, + "grad_norm": 1.1155299107557486, + "learning_rate": 1.8950078175926886e-05, + "loss": 0.3548869788646698, + "step": 1431 + }, + { + "epoch": 0.3802947815695127, + "grad_norm": 1.1280933582225339, + "learning_rate": 1.894811881035406e-05, + "loss": 0.3114319443702698, + "step": 1432 + }, + { + "epoch": 0.38056035055105564, + "grad_norm": 1.151174980739505, + "learning_rate": 1.894615771968946e-05, + "loss": 0.3589673936367035, + "step": 1433 + }, + { + "epoch": 0.3808259195325986, + "grad_norm": 1.1074661491088642, + "learning_rate": 1.894419490431116e-05, + "loss": 0.3073863983154297, + "step": 1434 + }, + { + "epoch": 0.38109148851414154, + "grad_norm": 1.0689323921068359, + "learning_rate": 1.8942230364597572e-05, + "loss": 0.32474076747894287, + "step": 1435 + }, + { + "epoch": 0.3813570574956845, + "grad_norm": 2.6127931856999314, + "learning_rate": 1.8940264100927432e-05, + "loss": 0.3363546133041382, + "step": 1436 + }, + { + "epoch": 0.38162262647722744, + "grad_norm": 0.9995665434586938, + "learning_rate": 1.8938296113679814e-05, + "loss": 0.33679312467575073, + "step": 1437 + }, + { + "epoch": 0.3818881954587704, + "grad_norm": 1.0113319573344832, + "learning_rate": 1.8936326403234125e-05, + "loss": 0.33171382546424866, + "step": 1438 + }, + { + "epoch": 0.38215376444031335, + "grad_norm": 1.0880785150495547, + "learning_rate": 1.8934354969970097e-05, + "loss": 0.3717402219772339, + "step": 1439 + }, + { + "epoch": 0.3824193334218563, + "grad_norm": 1.1102375952968466, + "learning_rate": 1.8932381814267802e-05, + "loss": 0.335337370634079, + "step": 1440 + }, + { + "epoch": 0.3826849024033993, + "grad_norm": 1.010201255539417, + "learning_rate": 1.893040693650764e-05, + "loss": 0.32745444774627686, + "step": 1441 + }, + { + "epoch": 0.38295047138494226, + "grad_norm": 1.045820108792802, + "learning_rate": 1.892843033707035e-05, + "loss": 0.34863507747650146, + "step": 1442 + }, + { + "epoch": 0.3832160403664852, + "grad_norm": 1.0344465763282014, + "learning_rate": 1.8926452016336987e-05, + "loss": 0.3428313732147217, + "step": 1443 + }, + { + "epoch": 0.38348160934802816, + "grad_norm": 0.9882681324904586, + "learning_rate": 1.8924471974688956e-05, + "loss": 0.3223801851272583, + "step": 1444 + }, + { + "epoch": 0.3837471783295711, + "grad_norm": 1.2003387152989082, + "learning_rate": 1.8922490212507983e-05, + "loss": 0.33248746395111084, + "step": 1445 + }, + { + "epoch": 0.38401274731111407, + "grad_norm": 1.0404747226700646, + "learning_rate": 1.8920506730176125e-05, + "loss": 0.3472076654434204, + "step": 1446 + }, + { + "epoch": 0.384278316292657, + "grad_norm": 1.229166058737197, + "learning_rate": 1.891852152807578e-05, + "loss": 0.4385136365890503, + "step": 1447 + }, + { + "epoch": 0.38454388527419997, + "grad_norm": 1.0444838405880497, + "learning_rate": 1.8916534606589666e-05, + "loss": 0.36871540546417236, + "step": 1448 + }, + { + "epoch": 0.3848094542557429, + "grad_norm": 1.0803859921763799, + "learning_rate": 1.8914545966100843e-05, + "loss": 0.3136710524559021, + "step": 1449 + }, + { + "epoch": 0.3850750232372859, + "grad_norm": 1.0902031451870209, + "learning_rate": 1.891255560699269e-05, + "loss": 0.3236457109451294, + "step": 1450 + }, + { + "epoch": 0.3853405922188288, + "grad_norm": 0.9936714818929803, + "learning_rate": 1.8910563529648933e-05, + "loss": 0.3176822066307068, + "step": 1451 + }, + { + "epoch": 0.3856061612003718, + "grad_norm": 1.0635659473367998, + "learning_rate": 1.890856973445362e-05, + "loss": 0.3531719744205475, + "step": 1452 + }, + { + "epoch": 0.38587173018191473, + "grad_norm": 0.9470574553293423, + "learning_rate": 1.8906574221791127e-05, + "loss": 0.2911416292190552, + "step": 1453 + }, + { + "epoch": 0.3861372991634577, + "grad_norm": 1.0992858203425024, + "learning_rate": 1.890457699204617e-05, + "loss": 0.3522392511367798, + "step": 1454 + }, + { + "epoch": 0.3864028681450007, + "grad_norm": 1.1706910837372075, + "learning_rate": 1.8902578045603787e-05, + "loss": 0.3724471628665924, + "step": 1455 + }, + { + "epoch": 0.38666843712654364, + "grad_norm": 1.1807687078274312, + "learning_rate": 1.890057738284935e-05, + "loss": 0.2935449481010437, + "step": 1456 + }, + { + "epoch": 0.3869340061080866, + "grad_norm": 1.1181603604376231, + "learning_rate": 1.8898575004168568e-05, + "loss": 0.3413137197494507, + "step": 1457 + }, + { + "epoch": 0.38719957508962954, + "grad_norm": 1.1002740783107277, + "learning_rate": 1.8896570909947477e-05, + "loss": 0.32282277941703796, + "step": 1458 + }, + { + "epoch": 0.3874651440711725, + "grad_norm": 1.0071931608273124, + "learning_rate": 1.8894565100572435e-05, + "loss": 0.3285476565361023, + "step": 1459 + }, + { + "epoch": 0.38773071305271545, + "grad_norm": 1.010871057653593, + "learning_rate": 1.8892557576430147e-05, + "loss": 0.29517480731010437, + "step": 1460 + }, + { + "epoch": 0.3879962820342584, + "grad_norm": 0.9710184588467288, + "learning_rate": 1.8890548337907636e-05, + "loss": 0.2913149297237396, + "step": 1461 + }, + { + "epoch": 0.38826185101580135, + "grad_norm": 1.096024980027641, + "learning_rate": 1.8888537385392258e-05, + "loss": 0.32154160737991333, + "step": 1462 + }, + { + "epoch": 0.3885274199973443, + "grad_norm": 1.157775550745099, + "learning_rate": 1.88865247192717e-05, + "loss": 0.30677905678749084, + "step": 1463 + }, + { + "epoch": 0.38879298897888726, + "grad_norm": 1.1509749466488566, + "learning_rate": 1.888451033993399e-05, + "loss": 0.37568169832229614, + "step": 1464 + }, + { + "epoch": 0.3890585579604302, + "grad_norm": 1.0554287268781006, + "learning_rate": 1.8882494247767465e-05, + "loss": 0.34972083568573, + "step": 1465 + }, + { + "epoch": 0.38932412694197316, + "grad_norm": 1.1253148629548142, + "learning_rate": 1.888047644316081e-05, + "loss": 0.3198736906051636, + "step": 1466 + }, + { + "epoch": 0.3895896959235161, + "grad_norm": 1.0268445477998984, + "learning_rate": 1.887845692650303e-05, + "loss": 0.3405846953392029, + "step": 1467 + }, + { + "epoch": 0.38985526490505906, + "grad_norm": 1.1800981831391237, + "learning_rate": 1.8876435698183465e-05, + "loss": 0.3600257337093353, + "step": 1468 + }, + { + "epoch": 0.39012083388660207, + "grad_norm": 1.042232512137109, + "learning_rate": 1.887441275859179e-05, + "loss": 0.32415103912353516, + "step": 1469 + }, + { + "epoch": 0.390386402868145, + "grad_norm": 1.1736259107415346, + "learning_rate": 1.8872388108117995e-05, + "loss": 0.3450891673564911, + "step": 1470 + }, + { + "epoch": 0.390651971849688, + "grad_norm": 1.0534871304087963, + "learning_rate": 1.8870361747152416e-05, + "loss": 0.3210057318210602, + "step": 1471 + }, + { + "epoch": 0.3909175408312309, + "grad_norm": 1.1749127166764717, + "learning_rate": 1.8868333676085707e-05, + "loss": 0.3615706264972687, + "step": 1472 + }, + { + "epoch": 0.3911831098127739, + "grad_norm": 1.0750237065987462, + "learning_rate": 1.8866303895308856e-05, + "loss": 0.34149813652038574, + "step": 1473 + }, + { + "epoch": 0.39144867879431683, + "grad_norm": 0.91786674858188, + "learning_rate": 1.8864272405213188e-05, + "loss": 0.2795295715332031, + "step": 1474 + }, + { + "epoch": 0.3917142477758598, + "grad_norm": 1.1110559595870293, + "learning_rate": 1.8862239206190337e-05, + "loss": 0.3459053933620453, + "step": 1475 + }, + { + "epoch": 0.39197981675740273, + "grad_norm": 1.1048084354602663, + "learning_rate": 1.8860204298632294e-05, + "loss": 0.3531072735786438, + "step": 1476 + }, + { + "epoch": 0.3922453857389457, + "grad_norm": 1.128095083544478, + "learning_rate": 1.8858167682931357e-05, + "loss": 0.3788977265357971, + "step": 1477 + }, + { + "epoch": 0.39251095472048864, + "grad_norm": 1.3263027090109385, + "learning_rate": 1.8856129359480163e-05, + "loss": 0.3210671544075012, + "step": 1478 + }, + { + "epoch": 0.3927765237020316, + "grad_norm": 1.0773816671223826, + "learning_rate": 1.8854089328671673e-05, + "loss": 0.3442102074623108, + "step": 1479 + }, + { + "epoch": 0.39304209268357454, + "grad_norm": 1.0501956367137624, + "learning_rate": 1.885204759089919e-05, + "loss": 0.29128211736679077, + "step": 1480 + }, + { + "epoch": 0.3933076616651175, + "grad_norm": 1.1403330671915806, + "learning_rate": 1.885000414655633e-05, + "loss": 0.3601154088973999, + "step": 1481 + }, + { + "epoch": 0.39357323064666044, + "grad_norm": 1.032058056545269, + "learning_rate": 1.8847958996037042e-05, + "loss": 0.3173052668571472, + "step": 1482 + }, + { + "epoch": 0.39383879962820345, + "grad_norm": 1.0840123249628424, + "learning_rate": 1.8845912139735616e-05, + "loss": 0.32759106159210205, + "step": 1483 + }, + { + "epoch": 0.3941043686097464, + "grad_norm": 1.0868479290241493, + "learning_rate": 1.8843863578046657e-05, + "loss": 0.3213586211204529, + "step": 1484 + }, + { + "epoch": 0.39436993759128935, + "grad_norm": 1.0263834848721582, + "learning_rate": 1.8841813311365105e-05, + "loss": 0.342970073223114, + "step": 1485 + }, + { + "epoch": 0.3946355065728323, + "grad_norm": 1.1467746465148738, + "learning_rate": 1.883976134008622e-05, + "loss": 0.3852401375770569, + "step": 1486 + }, + { + "epoch": 0.39490107555437526, + "grad_norm": 1.0974253808771965, + "learning_rate": 1.883770766460561e-05, + "loss": 0.2965390682220459, + "step": 1487 + }, + { + "epoch": 0.3951666445359182, + "grad_norm": 1.1655078685340161, + "learning_rate": 1.883565228531919e-05, + "loss": 0.3899655044078827, + "step": 1488 + }, + { + "epoch": 0.39543221351746116, + "grad_norm": 1.1086105484757183, + "learning_rate": 1.8833595202623222e-05, + "loss": 0.339199423789978, + "step": 1489 + }, + { + "epoch": 0.3956977824990041, + "grad_norm": 1.049526058190211, + "learning_rate": 1.8831536416914278e-05, + "loss": 0.3121682405471802, + "step": 1490 + }, + { + "epoch": 0.39596335148054707, + "grad_norm": 1.073417591294797, + "learning_rate": 1.8829475928589272e-05, + "loss": 0.31947991251945496, + "step": 1491 + }, + { + "epoch": 0.39622892046209, + "grad_norm": 1.1660176936819076, + "learning_rate": 1.882741373804544e-05, + "loss": 0.3569333553314209, + "step": 1492 + }, + { + "epoch": 0.39649448944363297, + "grad_norm": 1.1521030930761056, + "learning_rate": 1.882534984568035e-05, + "loss": 0.3739020526409149, + "step": 1493 + }, + { + "epoch": 0.3967600584251759, + "grad_norm": 1.0930221251915908, + "learning_rate": 1.882328425189189e-05, + "loss": 0.34350353479385376, + "step": 1494 + }, + { + "epoch": 0.3970256274067189, + "grad_norm": 1.0780622136577362, + "learning_rate": 1.882121695707829e-05, + "loss": 0.3103981614112854, + "step": 1495 + }, + { + "epoch": 0.3972911963882618, + "grad_norm": 1.066229649085828, + "learning_rate": 1.8819147961638104e-05, + "loss": 0.33847716450691223, + "step": 1496 + }, + { + "epoch": 0.39755676536980483, + "grad_norm": 0.943119049120047, + "learning_rate": 1.8817077265970196e-05, + "loss": 0.3080996870994568, + "step": 1497 + }, + { + "epoch": 0.3978223343513478, + "grad_norm": 0.9758181744675688, + "learning_rate": 1.8815004870473777e-05, + "loss": 0.3247831463813782, + "step": 1498 + }, + { + "epoch": 0.39808790333289074, + "grad_norm": 0.9965389459031595, + "learning_rate": 1.8812930775548387e-05, + "loss": 0.2919698655605316, + "step": 1499 + }, + { + "epoch": 0.3983534723144337, + "grad_norm": 1.1815639690812958, + "learning_rate": 1.8810854981593883e-05, + "loss": 0.3627319931983948, + "step": 1500 + }, + { + "epoch": 0.39861904129597664, + "grad_norm": 1.0245222516327634, + "learning_rate": 1.880877748901045e-05, + "loss": 0.3619319796562195, + "step": 1501 + }, + { + "epoch": 0.3988846102775196, + "grad_norm": 1.0294076265521692, + "learning_rate": 1.8806698298198608e-05, + "loss": 0.3393789827823639, + "step": 1502 + }, + { + "epoch": 0.39915017925906254, + "grad_norm": 1.1375999694611314, + "learning_rate": 1.88046174095592e-05, + "loss": 0.3736116886138916, + "step": 1503 + }, + { + "epoch": 0.3994157482406055, + "grad_norm": 0.9615847393601772, + "learning_rate": 1.8802534823493395e-05, + "loss": 0.32829388976097107, + "step": 1504 + }, + { + "epoch": 0.39968131722214845, + "grad_norm": 1.004520084683698, + "learning_rate": 1.8800450540402694e-05, + "loss": 0.340041846036911, + "step": 1505 + }, + { + "epoch": 0.3999468862036914, + "grad_norm": 1.6423190284198783, + "learning_rate": 1.8798364560688917e-05, + "loss": 0.2830736041069031, + "step": 1506 + }, + { + "epoch": 0.40021245518523435, + "grad_norm": 1.126838308447994, + "learning_rate": 1.8796276884754224e-05, + "loss": 0.33011579513549805, + "step": 1507 + }, + { + "epoch": 0.4004780241667773, + "grad_norm": 1.0024833819275993, + "learning_rate": 1.8794187513001088e-05, + "loss": 0.2893834114074707, + "step": 1508 + }, + { + "epoch": 0.40074359314832025, + "grad_norm": 1.0682148927963429, + "learning_rate": 1.8792096445832317e-05, + "loss": 0.3590015172958374, + "step": 1509 + }, + { + "epoch": 0.4010091621298632, + "grad_norm": 1.1883404603513603, + "learning_rate": 1.8790003683651045e-05, + "loss": 0.3968508541584015, + "step": 1510 + }, + { + "epoch": 0.4012747311114062, + "grad_norm": 1.1506641785596874, + "learning_rate": 1.878790922686073e-05, + "loss": 0.324398934841156, + "step": 1511 + }, + { + "epoch": 0.40154030009294917, + "grad_norm": 1.0455658872732225, + "learning_rate": 1.8785813075865164e-05, + "loss": 0.35111895203590393, + "step": 1512 + }, + { + "epoch": 0.4018058690744921, + "grad_norm": 1.055231257150353, + "learning_rate": 1.8783715231068452e-05, + "loss": 0.28124356269836426, + "step": 1513 + }, + { + "epoch": 0.40207143805603507, + "grad_norm": 1.0070468428923411, + "learning_rate": 1.878161569287504e-05, + "loss": 0.28962311148643494, + "step": 1514 + }, + { + "epoch": 0.402337007037578, + "grad_norm": 1.0934983041480315, + "learning_rate": 1.877951446168969e-05, + "loss": 0.3646606206893921, + "step": 1515 + }, + { + "epoch": 0.402602576019121, + "grad_norm": 1.1065863254454682, + "learning_rate": 1.8777411537917497e-05, + "loss": 0.2815355360507965, + "step": 1516 + }, + { + "epoch": 0.4028681450006639, + "grad_norm": 1.1372178900816394, + "learning_rate": 1.877530692196388e-05, + "loss": 0.33208370208740234, + "step": 1517 + }, + { + "epoch": 0.4031337139822069, + "grad_norm": 1.0968319662456871, + "learning_rate": 1.8773200614234587e-05, + "loss": 0.33741289377212524, + "step": 1518 + }, + { + "epoch": 0.40339928296374983, + "grad_norm": 1.1178822197952292, + "learning_rate": 1.877109261513568e-05, + "loss": 0.31304073333740234, + "step": 1519 + }, + { + "epoch": 0.4036648519452928, + "grad_norm": 1.264796618244999, + "learning_rate": 1.8768982925073566e-05, + "loss": 0.32556387782096863, + "step": 1520 + }, + { + "epoch": 0.40393042092683573, + "grad_norm": 1.1057344226732335, + "learning_rate": 1.8766871544454963e-05, + "loss": 0.3584224581718445, + "step": 1521 + }, + { + "epoch": 0.4041959899083787, + "grad_norm": 1.0109621512685618, + "learning_rate": 1.8764758473686918e-05, + "loss": 0.2864416837692261, + "step": 1522 + }, + { + "epoch": 0.40446155888992164, + "grad_norm": 1.0390539229722413, + "learning_rate": 1.8762643713176815e-05, + "loss": 0.28925320506095886, + "step": 1523 + }, + { + "epoch": 0.4047271278714646, + "grad_norm": 1.022628245189221, + "learning_rate": 1.876052726333235e-05, + "loss": 0.30940550565719604, + "step": 1524 + }, + { + "epoch": 0.4049926968530076, + "grad_norm": 1.1648500528958037, + "learning_rate": 1.875840912456155e-05, + "loss": 0.3463154733181, + "step": 1525 + }, + { + "epoch": 0.40525826583455055, + "grad_norm": 1.1823420506345301, + "learning_rate": 1.8756289297272764e-05, + "loss": 0.3349658250808716, + "step": 1526 + }, + { + "epoch": 0.4055238348160935, + "grad_norm": 1.0511817500052025, + "learning_rate": 1.8754167781874674e-05, + "loss": 0.32588714361190796, + "step": 1527 + }, + { + "epoch": 0.40578940379763645, + "grad_norm": 1.0750045197041278, + "learning_rate": 1.875204457877628e-05, + "loss": 0.33787310123443604, + "step": 1528 + }, + { + "epoch": 0.4060549727791794, + "grad_norm": 1.0444881434472735, + "learning_rate": 1.8749919688386912e-05, + "loss": 0.3223261833190918, + "step": 1529 + }, + { + "epoch": 0.40632054176072235, + "grad_norm": 1.2251483540500576, + "learning_rate": 1.8747793111116226e-05, + "loss": 0.38505882024765015, + "step": 1530 + }, + { + "epoch": 0.4065861107422653, + "grad_norm": 1.077913563059366, + "learning_rate": 1.8745664847374197e-05, + "loss": 0.33071833848953247, + "step": 1531 + }, + { + "epoch": 0.40685167972380826, + "grad_norm": 1.2405893427169952, + "learning_rate": 1.874353489757113e-05, + "loss": 0.36603987216949463, + "step": 1532 + }, + { + "epoch": 0.4071172487053512, + "grad_norm": 0.9982674001932202, + "learning_rate": 1.874140326211766e-05, + "loss": 0.3103085160255432, + "step": 1533 + }, + { + "epoch": 0.40738281768689416, + "grad_norm": 1.1470515997968143, + "learning_rate": 1.873926994142473e-05, + "loss": 0.3471127152442932, + "step": 1534 + }, + { + "epoch": 0.4076483866684371, + "grad_norm": 1.0759117431352352, + "learning_rate": 1.873713493590363e-05, + "loss": 0.33152899146080017, + "step": 1535 + }, + { + "epoch": 0.40791395564998006, + "grad_norm": 1.0887192073538825, + "learning_rate": 1.8734998245965958e-05, + "loss": 0.340177059173584, + "step": 1536 + }, + { + "epoch": 0.408179524631523, + "grad_norm": 1.175803638176176, + "learning_rate": 1.8732859872023644e-05, + "loss": 0.3331618010997772, + "step": 1537 + }, + { + "epoch": 0.40844509361306597, + "grad_norm": 1.0971311272588662, + "learning_rate": 1.8730719814488937e-05, + "loss": 0.3911997675895691, + "step": 1538 + }, + { + "epoch": 0.408710662594609, + "grad_norm": 1.0986179012488992, + "learning_rate": 1.8728578073774427e-05, + "loss": 0.3699817955493927, + "step": 1539 + }, + { + "epoch": 0.4089762315761519, + "grad_norm": 1.086312859301249, + "learning_rate": 1.8726434650293e-05, + "loss": 0.31567275524139404, + "step": 1540 + }, + { + "epoch": 0.4092418005576949, + "grad_norm": 1.1099279461258769, + "learning_rate": 1.8724289544457897e-05, + "loss": 0.3387305438518524, + "step": 1541 + }, + { + "epoch": 0.40950736953923783, + "grad_norm": 1.6366665349052443, + "learning_rate": 1.8722142756682663e-05, + "loss": 0.3460234999656677, + "step": 1542 + }, + { + "epoch": 0.4097729385207808, + "grad_norm": 1.1109783591024025, + "learning_rate": 1.8719994287381173e-05, + "loss": 0.35653382539749146, + "step": 1543 + }, + { + "epoch": 0.41003850750232373, + "grad_norm": 1.1054235252004945, + "learning_rate": 1.8717844136967626e-05, + "loss": 0.3828277885913849, + "step": 1544 + }, + { + "epoch": 0.4103040764838667, + "grad_norm": 1.0929819002464054, + "learning_rate": 1.871569230585655e-05, + "loss": 0.35883858799934387, + "step": 1545 + }, + { + "epoch": 0.41056964546540964, + "grad_norm": 0.988264800308937, + "learning_rate": 1.8713538794462783e-05, + "loss": 0.27414464950561523, + "step": 1546 + }, + { + "epoch": 0.4108352144469526, + "grad_norm": 1.0216234157414708, + "learning_rate": 1.871138360320151e-05, + "loss": 0.2924337387084961, + "step": 1547 + }, + { + "epoch": 0.41110078342849554, + "grad_norm": 1.1264719097344291, + "learning_rate": 1.8709226732488216e-05, + "loss": 0.34270918369293213, + "step": 1548 + }, + { + "epoch": 0.4113663524100385, + "grad_norm": 1.056133674601812, + "learning_rate": 1.870706818273872e-05, + "loss": 0.33866482973098755, + "step": 1549 + }, + { + "epoch": 0.41163192139158145, + "grad_norm": 1.0578429496037574, + "learning_rate": 1.8704907954369176e-05, + "loss": 0.3350633382797241, + "step": 1550 + }, + { + "epoch": 0.4118974903731244, + "grad_norm": 1.0981882806330738, + "learning_rate": 1.870274604779604e-05, + "loss": 0.32763785123825073, + "step": 1551 + }, + { + "epoch": 0.41216305935466735, + "grad_norm": 1.1235534336905566, + "learning_rate": 1.8700582463436102e-05, + "loss": 0.3130378723144531, + "step": 1552 + }, + { + "epoch": 0.41242862833621036, + "grad_norm": 1.1311593123986747, + "learning_rate": 1.8698417201706484e-05, + "loss": 0.34318777918815613, + "step": 1553 + }, + { + "epoch": 0.4126941973177533, + "grad_norm": 1.038517953287962, + "learning_rate": 1.8696250263024617e-05, + "loss": 0.3250104784965515, + "step": 1554 + }, + { + "epoch": 0.41295976629929626, + "grad_norm": 1.1047081419569766, + "learning_rate": 1.869408164780826e-05, + "loss": 0.3409217298030853, + "step": 1555 + }, + { + "epoch": 0.4132253352808392, + "grad_norm": 0.9892429720688775, + "learning_rate": 1.86919113564755e-05, + "loss": 0.2885017395019531, + "step": 1556 + }, + { + "epoch": 0.41349090426238216, + "grad_norm": 0.9861078966083267, + "learning_rate": 1.8689739389444744e-05, + "loss": 0.31912562251091003, + "step": 1557 + }, + { + "epoch": 0.4137564732439251, + "grad_norm": 1.0037060940033242, + "learning_rate": 1.8687565747134716e-05, + "loss": 0.29874011874198914, + "step": 1558 + }, + { + "epoch": 0.41402204222546807, + "grad_norm": 1.0308167425812278, + "learning_rate": 1.8685390429964473e-05, + "loss": 0.3132701516151428, + "step": 1559 + }, + { + "epoch": 0.414287611207011, + "grad_norm": 1.0029824533275895, + "learning_rate": 1.868321343835339e-05, + "loss": 0.31158843636512756, + "step": 1560 + }, + { + "epoch": 0.41455318018855397, + "grad_norm": 0.959841401113078, + "learning_rate": 1.8681034772721167e-05, + "loss": 0.30490344762802124, + "step": 1561 + }, + { + "epoch": 0.4148187491700969, + "grad_norm": 1.1053356359227535, + "learning_rate": 1.867885443348782e-05, + "loss": 0.3150998055934906, + "step": 1562 + }, + { + "epoch": 0.4150843181516399, + "grad_norm": 1.0578010897773087, + "learning_rate": 1.86766724210737e-05, + "loss": 0.3391645550727844, + "step": 1563 + }, + { + "epoch": 0.4153498871331828, + "grad_norm": 1.1317933031731224, + "learning_rate": 1.8674488735899466e-05, + "loss": 0.35013002157211304, + "step": 1564 + }, + { + "epoch": 0.4156154561147258, + "grad_norm": 1.1514144052665038, + "learning_rate": 1.867230337838611e-05, + "loss": 0.3455789387226105, + "step": 1565 + }, + { + "epoch": 0.41588102509626873, + "grad_norm": 1.0985743755307058, + "learning_rate": 1.8670116348954945e-05, + "loss": 0.3179319500923157, + "step": 1566 + }, + { + "epoch": 0.41614659407781174, + "grad_norm": 1.046997092909125, + "learning_rate": 1.8667927648027596e-05, + "loss": 0.3628920018672943, + "step": 1567 + }, + { + "epoch": 0.4164121630593547, + "grad_norm": 1.1175553372657145, + "learning_rate": 1.8665737276026033e-05, + "loss": 0.33599400520324707, + "step": 1568 + }, + { + "epoch": 0.41667773204089764, + "grad_norm": 1.0741100001694928, + "learning_rate": 1.8663545233372524e-05, + "loss": 0.31519144773483276, + "step": 1569 + }, + { + "epoch": 0.4169433010224406, + "grad_norm": 1.0564388001425704, + "learning_rate": 1.8661351520489667e-05, + "loss": 0.3326237201690674, + "step": 1570 + }, + { + "epoch": 0.41720887000398355, + "grad_norm": 1.0506499046982631, + "learning_rate": 1.865915613780039e-05, + "loss": 0.35254499316215515, + "step": 1571 + }, + { + "epoch": 0.4174744389855265, + "grad_norm": 1.134962500533026, + "learning_rate": 1.8656959085727936e-05, + "loss": 0.36689436435699463, + "step": 1572 + }, + { + "epoch": 0.41774000796706945, + "grad_norm": 1.104702895545828, + "learning_rate": 1.8654760364695873e-05, + "loss": 0.3113600015640259, + "step": 1573 + }, + { + "epoch": 0.4180055769486124, + "grad_norm": 1.0072243279377031, + "learning_rate": 1.865255997512808e-05, + "loss": 0.3336432874202728, + "step": 1574 + }, + { + "epoch": 0.41827114593015535, + "grad_norm": 1.1762721663897004, + "learning_rate": 1.8650357917448774e-05, + "loss": 0.3657492995262146, + "step": 1575 + }, + { + "epoch": 0.4185367149116983, + "grad_norm": 1.1286123264778107, + "learning_rate": 1.864815419208248e-05, + "loss": 0.3087846338748932, + "step": 1576 + }, + { + "epoch": 0.41880228389324126, + "grad_norm": 1.059893684126419, + "learning_rate": 1.8645948799454058e-05, + "loss": 0.31422343850135803, + "step": 1577 + }, + { + "epoch": 0.4190678528747842, + "grad_norm": 1.0232345658393134, + "learning_rate": 1.8643741739988672e-05, + "loss": 0.3172760009765625, + "step": 1578 + }, + { + "epoch": 0.41933342185632716, + "grad_norm": 1.131569038679809, + "learning_rate": 1.8641533014111824e-05, + "loss": 0.36819136142730713, + "step": 1579 + }, + { + "epoch": 0.4195989908378701, + "grad_norm": 1.0215370560204735, + "learning_rate": 1.863932262224933e-05, + "loss": 0.29081088304519653, + "step": 1580 + }, + { + "epoch": 0.4198645598194131, + "grad_norm": 1.0406040134422527, + "learning_rate": 1.8637110564827325e-05, + "loss": 0.3209632635116577, + "step": 1581 + }, + { + "epoch": 0.42013012880095607, + "grad_norm": 1.9161132832998955, + "learning_rate": 1.863489684227227e-05, + "loss": 0.3357914686203003, + "step": 1582 + }, + { + "epoch": 0.420395697782499, + "grad_norm": 1.0469990353974015, + "learning_rate": 1.8632681455010937e-05, + "loss": 0.285677969455719, + "step": 1583 + }, + { + "epoch": 0.420661266764042, + "grad_norm": 1.1491447855439996, + "learning_rate": 1.8630464403470435e-05, + "loss": 0.377876341342926, + "step": 1584 + }, + { + "epoch": 0.4209268357455849, + "grad_norm": 1.0642007656116979, + "learning_rate": 1.8628245688078187e-05, + "loss": 0.3141768276691437, + "step": 1585 + }, + { + "epoch": 0.4211924047271279, + "grad_norm": 1.078787810404599, + "learning_rate": 1.8626025309261927e-05, + "loss": 0.34249693155288696, + "step": 1586 + }, + { + "epoch": 0.42145797370867083, + "grad_norm": 1.1583509747022063, + "learning_rate": 1.8623803267449722e-05, + "loss": 0.32564717531204224, + "step": 1587 + }, + { + "epoch": 0.4217235426902138, + "grad_norm": 1.0623179841052965, + "learning_rate": 1.8621579563069957e-05, + "loss": 0.3425004184246063, + "step": 1588 + }, + { + "epoch": 0.42198911167175673, + "grad_norm": 1.05392590229203, + "learning_rate": 1.8619354196551333e-05, + "loss": 0.3676222562789917, + "step": 1589 + }, + { + "epoch": 0.4222546806532997, + "grad_norm": 0.9612536546184688, + "learning_rate": 1.8617127168322877e-05, + "loss": 0.28915971517562866, + "step": 1590 + }, + { + "epoch": 0.42252024963484264, + "grad_norm": 1.1293248025877465, + "learning_rate": 1.8614898478813933e-05, + "loss": 0.3387221097946167, + "step": 1591 + }, + { + "epoch": 0.4227858186163856, + "grad_norm": 1.0804518757125117, + "learning_rate": 1.8612668128454164e-05, + "loss": 0.33886784315109253, + "step": 1592 + }, + { + "epoch": 0.42305138759792854, + "grad_norm": 1.0780507904890781, + "learning_rate": 1.8610436117673557e-05, + "loss": 0.3364121913909912, + "step": 1593 + }, + { + "epoch": 0.4233169565794715, + "grad_norm": 1.0590527240631433, + "learning_rate": 1.8608202446902418e-05, + "loss": 0.3661370873451233, + "step": 1594 + }, + { + "epoch": 0.4235825255610145, + "grad_norm": 1.254416564930449, + "learning_rate": 1.8605967116571372e-05, + "loss": 0.2980557680130005, + "step": 1595 + }, + { + "epoch": 0.42384809454255745, + "grad_norm": 1.180518248335952, + "learning_rate": 1.8603730127111363e-05, + "loss": 0.36112043261528015, + "step": 1596 + }, + { + "epoch": 0.4241136635241004, + "grad_norm": 0.9967676484164163, + "learning_rate": 1.860149147895366e-05, + "loss": 0.30641958117485046, + "step": 1597 + }, + { + "epoch": 0.42437923250564336, + "grad_norm": 1.06006138769355, + "learning_rate": 1.8599251172529836e-05, + "loss": 0.3312561511993408, + "step": 1598 + }, + { + "epoch": 0.4246448014871863, + "grad_norm": 1.070580032885208, + "learning_rate": 1.859700920827181e-05, + "loss": 0.3757131099700928, + "step": 1599 + }, + { + "epoch": 0.42491037046872926, + "grad_norm": 1.0514692584176801, + "learning_rate": 1.8594765586611805e-05, + "loss": 0.3225080370903015, + "step": 1600 + }, + { + "epoch": 0.4251759394502722, + "grad_norm": 1.0857454483782787, + "learning_rate": 1.859252030798236e-05, + "loss": 0.35943928360939026, + "step": 1601 + }, + { + "epoch": 0.42544150843181516, + "grad_norm": 0.9907794348406631, + "learning_rate": 1.859027337281633e-05, + "loss": 0.29319390654563904, + "step": 1602 + }, + { + "epoch": 0.4257070774133581, + "grad_norm": 1.1441852776057728, + "learning_rate": 1.8588024781546914e-05, + "loss": 0.32320237159729004, + "step": 1603 + }, + { + "epoch": 0.42597264639490107, + "grad_norm": 1.1070076098385897, + "learning_rate": 1.8585774534607606e-05, + "loss": 0.3381520211696625, + "step": 1604 + }, + { + "epoch": 0.426238215376444, + "grad_norm": 0.9826840529093485, + "learning_rate": 1.858352263243223e-05, + "loss": 0.30010825395584106, + "step": 1605 + }, + { + "epoch": 0.42650378435798697, + "grad_norm": 0.9805553200940528, + "learning_rate": 1.8581269075454918e-05, + "loss": 0.26282748579978943, + "step": 1606 + }, + { + "epoch": 0.4267693533395299, + "grad_norm": 1.0395702570014627, + "learning_rate": 1.857901386411014e-05, + "loss": 0.33613401651382446, + "step": 1607 + }, + { + "epoch": 0.4270349223210729, + "grad_norm": 1.1625768546626036, + "learning_rate": 1.8576756998832667e-05, + "loss": 0.34522315859794617, + "step": 1608 + }, + { + "epoch": 0.4273004913026159, + "grad_norm": 1.0776480516530333, + "learning_rate": 1.8574498480057598e-05, + "loss": 0.3253153860569, + "step": 1609 + }, + { + "epoch": 0.42756606028415883, + "grad_norm": 1.177683979502923, + "learning_rate": 1.8572238308220347e-05, + "loss": 0.32180655002593994, + "step": 1610 + }, + { + "epoch": 0.4278316292657018, + "grad_norm": 1.2444289754345055, + "learning_rate": 1.856997648375665e-05, + "loss": 0.3274008333683014, + "step": 1611 + }, + { + "epoch": 0.42809719824724474, + "grad_norm": 1.006782047196068, + "learning_rate": 1.8567713007102565e-05, + "loss": 0.3196510374546051, + "step": 1612 + }, + { + "epoch": 0.4283627672287877, + "grad_norm": 1.0069133029708661, + "learning_rate": 1.8565447878694455e-05, + "loss": 0.2759617567062378, + "step": 1613 + }, + { + "epoch": 0.42862833621033064, + "grad_norm": 1.1572573238869637, + "learning_rate": 1.8563181098969017e-05, + "loss": 0.35069289803504944, + "step": 1614 + }, + { + "epoch": 0.4288939051918736, + "grad_norm": 1.1400434606874466, + "learning_rate": 1.8560912668363253e-05, + "loss": 0.3388484716415405, + "step": 1615 + }, + { + "epoch": 0.42915947417341654, + "grad_norm": 1.0338736294243014, + "learning_rate": 1.8558642587314496e-05, + "loss": 0.34116029739379883, + "step": 1616 + }, + { + "epoch": 0.4294250431549595, + "grad_norm": 1.0487376701262667, + "learning_rate": 1.8556370856260387e-05, + "loss": 0.30212706327438354, + "step": 1617 + }, + { + "epoch": 0.42969061213650245, + "grad_norm": 1.0633174136084793, + "learning_rate": 1.855409747563889e-05, + "loss": 0.32250338792800903, + "step": 1618 + }, + { + "epoch": 0.4299561811180454, + "grad_norm": 1.132237618998821, + "learning_rate": 1.8551822445888285e-05, + "loss": 0.35972943902015686, + "step": 1619 + }, + { + "epoch": 0.43022175009958835, + "grad_norm": 0.9921112897877987, + "learning_rate": 1.8549545767447174e-05, + "loss": 0.3112533390522003, + "step": 1620 + }, + { + "epoch": 0.4304873190811313, + "grad_norm": 1.0331176116114555, + "learning_rate": 1.854726744075447e-05, + "loss": 0.3044458031654358, + "step": 1621 + }, + { + "epoch": 0.43075288806267426, + "grad_norm": 1.0421498129424722, + "learning_rate": 1.8544987466249412e-05, + "loss": 0.3261772096157074, + "step": 1622 + }, + { + "epoch": 0.43101845704421726, + "grad_norm": 1.3249821498842442, + "learning_rate": 1.8542705844371544e-05, + "loss": 0.3485907018184662, + "step": 1623 + }, + { + "epoch": 0.4312840260257602, + "grad_norm": 2.6643478315387576, + "learning_rate": 1.8540422575560747e-05, + "loss": 0.3016113340854645, + "step": 1624 + }, + { + "epoch": 0.43154959500730317, + "grad_norm": 1.021133157663628, + "learning_rate": 1.8538137660257198e-05, + "loss": 0.35383081436157227, + "step": 1625 + }, + { + "epoch": 0.4318151639888461, + "grad_norm": 1.170997891522692, + "learning_rate": 1.8535851098901406e-05, + "loss": 0.32015109062194824, + "step": 1626 + }, + { + "epoch": 0.43208073297038907, + "grad_norm": 1.1526156179794622, + "learning_rate": 1.8533562891934195e-05, + "loss": 0.3801743984222412, + "step": 1627 + }, + { + "epoch": 0.432346301951932, + "grad_norm": 1.0686097183664227, + "learning_rate": 1.85312730397967e-05, + "loss": 0.33140939474105835, + "step": 1628 + }, + { + "epoch": 0.432611870933475, + "grad_norm": 1.232101025230023, + "learning_rate": 1.8528981542930382e-05, + "loss": 0.4052904546260834, + "step": 1629 + }, + { + "epoch": 0.4328774399150179, + "grad_norm": 1.0850305465298753, + "learning_rate": 1.8526688401777014e-05, + "loss": 0.3661607801914215, + "step": 1630 + }, + { + "epoch": 0.4331430088965609, + "grad_norm": 1.0520968780833948, + "learning_rate": 1.852439361677868e-05, + "loss": 0.33260756731033325, + "step": 1631 + }, + { + "epoch": 0.43340857787810383, + "grad_norm": 1.0137607762513057, + "learning_rate": 1.85220971883778e-05, + "loss": 0.30222776532173157, + "step": 1632 + }, + { + "epoch": 0.4336741468596468, + "grad_norm": 1.1138822281677037, + "learning_rate": 1.8519799117017086e-05, + "loss": 0.3444751799106598, + "step": 1633 + }, + { + "epoch": 0.43393971584118973, + "grad_norm": 1.0896517914007275, + "learning_rate": 1.8517499403139586e-05, + "loss": 0.33887404203414917, + "step": 1634 + }, + { + "epoch": 0.4342052848227327, + "grad_norm": 0.9260010903737679, + "learning_rate": 1.8515198047188652e-05, + "loss": 0.287893146276474, + "step": 1635 + }, + { + "epoch": 0.43447085380427564, + "grad_norm": 1.0080783350179279, + "learning_rate": 1.8512895049607965e-05, + "loss": 0.32236215472221375, + "step": 1636 + }, + { + "epoch": 0.43473642278581864, + "grad_norm": 1.0861808896793093, + "learning_rate": 1.8510590410841515e-05, + "loss": 0.30670079588890076, + "step": 1637 + }, + { + "epoch": 0.4350019917673616, + "grad_norm": 1.045996826542631, + "learning_rate": 1.8508284131333604e-05, + "loss": 0.34104713797569275, + "step": 1638 + }, + { + "epoch": 0.43526756074890455, + "grad_norm": 1.13616869746559, + "learning_rate": 1.8505976211528857e-05, + "loss": 0.3402378559112549, + "step": 1639 + }, + { + "epoch": 0.4355331297304475, + "grad_norm": 1.1414650328718847, + "learning_rate": 1.8503666651872217e-05, + "loss": 0.35236096382141113, + "step": 1640 + }, + { + "epoch": 0.43579869871199045, + "grad_norm": 1.1137846416322885, + "learning_rate": 1.850135545280894e-05, + "loss": 0.3385634422302246, + "step": 1641 + }, + { + "epoch": 0.4360642676935334, + "grad_norm": 1.0049349552180111, + "learning_rate": 1.849904261478459e-05, + "loss": 0.32222414016723633, + "step": 1642 + }, + { + "epoch": 0.43632983667507635, + "grad_norm": 1.1246487142505726, + "learning_rate": 1.8496728138245062e-05, + "loss": 0.3251120448112488, + "step": 1643 + }, + { + "epoch": 0.4365954056566193, + "grad_norm": 1.3230672810485753, + "learning_rate": 1.8494412023636563e-05, + "loss": 0.3199063837528229, + "step": 1644 + }, + { + "epoch": 0.43686097463816226, + "grad_norm": 1.031106173264746, + "learning_rate": 1.8492094271405605e-05, + "loss": 0.3470883071422577, + "step": 1645 + }, + { + "epoch": 0.4371265436197052, + "grad_norm": 1.1420067933967792, + "learning_rate": 1.848977488199903e-05, + "loss": 0.319596529006958, + "step": 1646 + }, + { + "epoch": 0.43739211260124816, + "grad_norm": 1.172387725238046, + "learning_rate": 1.848745385586398e-05, + "loss": 0.3445591628551483, + "step": 1647 + }, + { + "epoch": 0.4376576815827911, + "grad_norm": 1.0622512502557289, + "learning_rate": 1.848513119344793e-05, + "loss": 0.35861149430274963, + "step": 1648 + }, + { + "epoch": 0.43792325056433407, + "grad_norm": 1.3423176489021205, + "learning_rate": 1.8482806895198658e-05, + "loss": 0.36727622151374817, + "step": 1649 + }, + { + "epoch": 0.438188819545877, + "grad_norm": 1.0985203266462633, + "learning_rate": 1.848048096156426e-05, + "loss": 0.3505704402923584, + "step": 1650 + }, + { + "epoch": 0.43845438852742, + "grad_norm": 1.050005044594017, + "learning_rate": 1.8478153392993154e-05, + "loss": 0.3508742153644562, + "step": 1651 + }, + { + "epoch": 0.438719957508963, + "grad_norm": 1.0688095584032915, + "learning_rate": 1.8475824189934063e-05, + "loss": 0.32757264375686646, + "step": 1652 + }, + { + "epoch": 0.43898552649050593, + "grad_norm": 1.0768843323365103, + "learning_rate": 1.8473493352836032e-05, + "loss": 0.3117530643939972, + "step": 1653 + }, + { + "epoch": 0.4392510954720489, + "grad_norm": 1.1751248406507369, + "learning_rate": 1.8471160882148417e-05, + "loss": 0.3506043553352356, + "step": 1654 + }, + { + "epoch": 0.43951666445359183, + "grad_norm": 1.1247697965204402, + "learning_rate": 1.8468826778320892e-05, + "loss": 0.33997148275375366, + "step": 1655 + }, + { + "epoch": 0.4397822334351348, + "grad_norm": 1.007133328419329, + "learning_rate": 1.8466491041803446e-05, + "loss": 0.30060335993766785, + "step": 1656 + }, + { + "epoch": 0.44004780241667774, + "grad_norm": 0.9546594059496064, + "learning_rate": 1.846415367304638e-05, + "loss": 0.3057805597782135, + "step": 1657 + }, + { + "epoch": 0.4403133713982207, + "grad_norm": 1.006954520739026, + "learning_rate": 1.846181467250031e-05, + "loss": 0.30772098898887634, + "step": 1658 + }, + { + "epoch": 0.44057894037976364, + "grad_norm": 1.043209753174748, + "learning_rate": 1.845947404061617e-05, + "loss": 0.3183813989162445, + "step": 1659 + }, + { + "epoch": 0.4408445093613066, + "grad_norm": 1.0413807475941115, + "learning_rate": 1.8457131777845204e-05, + "loss": 0.2986184358596802, + "step": 1660 + }, + { + "epoch": 0.44111007834284954, + "grad_norm": 1.0330249735438937, + "learning_rate": 1.8454787884638973e-05, + "loss": 0.33342432975769043, + "step": 1661 + }, + { + "epoch": 0.4413756473243925, + "grad_norm": 1.6337494282252796, + "learning_rate": 1.8452442361449353e-05, + "loss": 0.33435192704200745, + "step": 1662 + }, + { + "epoch": 0.44164121630593545, + "grad_norm": 1.1084487395338765, + "learning_rate": 1.8450095208728537e-05, + "loss": 0.31596100330352783, + "step": 1663 + }, + { + "epoch": 0.4419067852874784, + "grad_norm": 1.0372033094770008, + "learning_rate": 1.8447746426929022e-05, + "loss": 0.29850512742996216, + "step": 1664 + }, + { + "epoch": 0.4421723542690214, + "grad_norm": 1.1891933812209383, + "learning_rate": 1.8445396016503628e-05, + "loss": 0.34898555278778076, + "step": 1665 + }, + { + "epoch": 0.44243792325056436, + "grad_norm": 1.0486597661615855, + "learning_rate": 1.8443043977905484e-05, + "loss": 0.283272385597229, + "step": 1666 + }, + { + "epoch": 0.4427034922321073, + "grad_norm": 1.041766578180328, + "learning_rate": 1.844069031158804e-05, + "loss": 0.32765433192253113, + "step": 1667 + }, + { + "epoch": 0.44296906121365026, + "grad_norm": 1.1465241668847563, + "learning_rate": 1.8438335018005052e-05, + "loss": 0.347957044839859, + "step": 1668 + }, + { + "epoch": 0.4432346301951932, + "grad_norm": 1.1330493919292772, + "learning_rate": 1.8435978097610594e-05, + "loss": 0.36188018321990967, + "step": 1669 + }, + { + "epoch": 0.44350019917673617, + "grad_norm": 1.1541714860130494, + "learning_rate": 1.843361955085905e-05, + "loss": 0.35944315791130066, + "step": 1670 + }, + { + "epoch": 0.4437657681582791, + "grad_norm": 1.0564596521414393, + "learning_rate": 1.8431259378205122e-05, + "loss": 0.33441367745399475, + "step": 1671 + }, + { + "epoch": 0.44403133713982207, + "grad_norm": 1.1043363461383413, + "learning_rate": 1.8428897580103827e-05, + "loss": 0.3157849907875061, + "step": 1672 + }, + { + "epoch": 0.444296906121365, + "grad_norm": 1.0760645254646117, + "learning_rate": 1.8426534157010486e-05, + "loss": 0.33416497707366943, + "step": 1673 + }, + { + "epoch": 0.444562475102908, + "grad_norm": 1.1629646905519946, + "learning_rate": 1.842416910938074e-05, + "loss": 0.3611617684364319, + "step": 1674 + }, + { + "epoch": 0.4448280440844509, + "grad_norm": 1.079831089952362, + "learning_rate": 1.8421802437670546e-05, + "loss": 0.3030395805835724, + "step": 1675 + }, + { + "epoch": 0.4450936130659939, + "grad_norm": 0.9867988845558019, + "learning_rate": 1.8419434142336167e-05, + "loss": 0.30281510949134827, + "step": 1676 + }, + { + "epoch": 0.44535918204753683, + "grad_norm": 1.2041533085675928, + "learning_rate": 1.8417064223834184e-05, + "loss": 0.3489738404750824, + "step": 1677 + }, + { + "epoch": 0.4456247510290798, + "grad_norm": 1.0320394434428715, + "learning_rate": 1.8414692682621487e-05, + "loss": 0.30453425645828247, + "step": 1678 + }, + { + "epoch": 0.44589032001062273, + "grad_norm": 0.9586890082829097, + "learning_rate": 1.841231951915528e-05, + "loss": 0.28717339038848877, + "step": 1679 + }, + { + "epoch": 0.44615588899216574, + "grad_norm": 1.0685350052372018, + "learning_rate": 1.840994473389309e-05, + "loss": 0.3227912187576294, + "step": 1680 + }, + { + "epoch": 0.4464214579737087, + "grad_norm": 1.0774879432227336, + "learning_rate": 1.8407568327292737e-05, + "loss": 0.3575928807258606, + "step": 1681 + }, + { + "epoch": 0.44668702695525164, + "grad_norm": 1.0240612597420884, + "learning_rate": 1.840519029981237e-05, + "loss": 0.35601454973220825, + "step": 1682 + }, + { + "epoch": 0.4469525959367946, + "grad_norm": 1.1829639598617365, + "learning_rate": 1.8402810651910444e-05, + "loss": 0.34867429733276367, + "step": 1683 + }, + { + "epoch": 0.44721816491833755, + "grad_norm": 1.0185115495756123, + "learning_rate": 1.8400429384045724e-05, + "loss": 0.3333359360694885, + "step": 1684 + }, + { + "epoch": 0.4474837338998805, + "grad_norm": 1.1658514468774803, + "learning_rate": 1.8398046496677296e-05, + "loss": 0.3269057273864746, + "step": 1685 + }, + { + "epoch": 0.44774930288142345, + "grad_norm": 1.0186865264151983, + "learning_rate": 1.839566199026455e-05, + "loss": 0.3507213890552521, + "step": 1686 + }, + { + "epoch": 0.4480148718629664, + "grad_norm": 1.0962029873559684, + "learning_rate": 1.8393275865267185e-05, + "loss": 0.32935822010040283, + "step": 1687 + }, + { + "epoch": 0.44828044084450935, + "grad_norm": 1.168811125319112, + "learning_rate": 1.8390888122145225e-05, + "loss": 0.3780096769332886, + "step": 1688 + }, + { + "epoch": 0.4485460098260523, + "grad_norm": 1.08432540630583, + "learning_rate": 1.8388498761358997e-05, + "loss": 0.3412250578403473, + "step": 1689 + }, + { + "epoch": 0.44881157880759526, + "grad_norm": 1.0725143861051711, + "learning_rate": 1.838610778336914e-05, + "loss": 0.33751022815704346, + "step": 1690 + }, + { + "epoch": 0.4490771477891382, + "grad_norm": 1.113628501747759, + "learning_rate": 1.8383715188636608e-05, + "loss": 0.35736170411109924, + "step": 1691 + }, + { + "epoch": 0.44934271677068116, + "grad_norm": 1.0608679340591776, + "learning_rate": 1.8381320977622664e-05, + "loss": 0.3133913278579712, + "step": 1692 + }, + { + "epoch": 0.4496082857522241, + "grad_norm": 1.0696112323301112, + "learning_rate": 1.8378925150788886e-05, + "loss": 0.2890821099281311, + "step": 1693 + }, + { + "epoch": 0.4498738547337671, + "grad_norm": 1.0759892831738864, + "learning_rate": 1.8376527708597155e-05, + "loss": 0.34016966819763184, + "step": 1694 + }, + { + "epoch": 0.45013942371531007, + "grad_norm": 1.0933611032669988, + "learning_rate": 1.8374128651509676e-05, + "loss": 0.3502900302410126, + "step": 1695 + }, + { + "epoch": 0.450404992696853, + "grad_norm": 1.1956521483077693, + "learning_rate": 1.8371727979988957e-05, + "loss": 0.31828251481056213, + "step": 1696 + }, + { + "epoch": 0.450670561678396, + "grad_norm": 1.1739995891800665, + "learning_rate": 1.836932569449782e-05, + "loss": 0.33322471380233765, + "step": 1697 + }, + { + "epoch": 0.4509361306599389, + "grad_norm": 0.977715581129718, + "learning_rate": 1.8366921795499394e-05, + "loss": 0.28489458560943604, + "step": 1698 + }, + { + "epoch": 0.4512016996414819, + "grad_norm": 1.0351592490047028, + "learning_rate": 1.8364516283457127e-05, + "loss": 0.3125787079334259, + "step": 1699 + }, + { + "epoch": 0.45146726862302483, + "grad_norm": 1.6801930060854708, + "learning_rate": 1.8362109158834767e-05, + "loss": 0.3352596163749695, + "step": 1700 + }, + { + "epoch": 0.4517328376045678, + "grad_norm": 1.0152758212914303, + "learning_rate": 1.8359700422096385e-05, + "loss": 0.2986747622489929, + "step": 1701 + }, + { + "epoch": 0.45199840658611073, + "grad_norm": 1.0704573865215896, + "learning_rate": 1.8357290073706355e-05, + "loss": 0.3276829123497009, + "step": 1702 + }, + { + "epoch": 0.4522639755676537, + "grad_norm": 1.05119725558451, + "learning_rate": 1.8354878114129368e-05, + "loss": 0.3183029890060425, + "step": 1703 + }, + { + "epoch": 0.45252954454919664, + "grad_norm": 1.0595099003295023, + "learning_rate": 1.835246454383041e-05, + "loss": 0.32149460911750793, + "step": 1704 + }, + { + "epoch": 0.4527951135307396, + "grad_norm": 1.0365725372264356, + "learning_rate": 1.8350049363274802e-05, + "loss": 0.2963859438896179, + "step": 1705 + }, + { + "epoch": 0.45306068251228254, + "grad_norm": 1.132218144997021, + "learning_rate": 1.8347632572928154e-05, + "loss": 0.35251080989837646, + "step": 1706 + }, + { + "epoch": 0.4533262514938255, + "grad_norm": 1.1840188868504486, + "learning_rate": 1.8345214173256395e-05, + "loss": 0.3585474491119385, + "step": 1707 + }, + { + "epoch": 0.4535918204753685, + "grad_norm": 1.1792148584627284, + "learning_rate": 1.834279416472577e-05, + "loss": 0.32339078187942505, + "step": 1708 + }, + { + "epoch": 0.45385738945691145, + "grad_norm": 1.030916532610971, + "learning_rate": 1.8340372547802822e-05, + "loss": 0.3473295569419861, + "step": 1709 + }, + { + "epoch": 0.4541229584384544, + "grad_norm": 1.149162033618886, + "learning_rate": 1.833794932295441e-05, + "loss": 0.35146117210388184, + "step": 1710 + }, + { + "epoch": 0.45438852741999736, + "grad_norm": 1.080751163824508, + "learning_rate": 1.833552449064771e-05, + "loss": 0.29697534441947937, + "step": 1711 + }, + { + "epoch": 0.4546540964015403, + "grad_norm": 1.0590764839143914, + "learning_rate": 1.8333098051350197e-05, + "loss": 0.30980685353279114, + "step": 1712 + }, + { + "epoch": 0.45491966538308326, + "grad_norm": 1.2023264217964575, + "learning_rate": 1.8330670005529657e-05, + "loss": 0.3271983861923218, + "step": 1713 + }, + { + "epoch": 0.4551852343646262, + "grad_norm": 1.061456665590969, + "learning_rate": 1.8328240353654193e-05, + "loss": 0.3421804904937744, + "step": 1714 + }, + { + "epoch": 0.45545080334616916, + "grad_norm": 0.988281834877126, + "learning_rate": 1.8325809096192207e-05, + "loss": 0.2949771285057068, + "step": 1715 + }, + { + "epoch": 0.4557163723277121, + "grad_norm": 1.1467541005281106, + "learning_rate": 1.832337623361242e-05, + "loss": 0.35578668117523193, + "step": 1716 + }, + { + "epoch": 0.45598194130925507, + "grad_norm": 1.099618839558401, + "learning_rate": 1.832094176638387e-05, + "loss": 0.3714647889137268, + "step": 1717 + }, + { + "epoch": 0.456247510290798, + "grad_norm": 1.116087725713372, + "learning_rate": 1.8318505694975877e-05, + "loss": 0.36253875494003296, + "step": 1718 + }, + { + "epoch": 0.45651307927234097, + "grad_norm": 1.0310426822464949, + "learning_rate": 1.8316068019858093e-05, + "loss": 0.3148016035556793, + "step": 1719 + }, + { + "epoch": 0.4567786482538839, + "grad_norm": 1.0869949789046671, + "learning_rate": 1.8313628741500476e-05, + "loss": 0.3420512080192566, + "step": 1720 + }, + { + "epoch": 0.4570442172354269, + "grad_norm": 1.0955610437646774, + "learning_rate": 1.831118786037329e-05, + "loss": 0.2941698431968689, + "step": 1721 + }, + { + "epoch": 0.4573097862169699, + "grad_norm": 0.9987507632564111, + "learning_rate": 1.83087453769471e-05, + "loss": 0.3033481240272522, + "step": 1722 + }, + { + "epoch": 0.45757535519851283, + "grad_norm": 1.0508818993675257, + "learning_rate": 1.8306301291692798e-05, + "loss": 0.3405943810939789, + "step": 1723 + }, + { + "epoch": 0.4578409241800558, + "grad_norm": 1.0291343903638976, + "learning_rate": 1.8303855605081567e-05, + "loss": 0.32217931747436523, + "step": 1724 + }, + { + "epoch": 0.45810649316159874, + "grad_norm": 1.1797464113481113, + "learning_rate": 1.8301408317584913e-05, + "loss": 0.3627573847770691, + "step": 1725 + }, + { + "epoch": 0.4583720621431417, + "grad_norm": 1.1425882725361838, + "learning_rate": 1.829895942967464e-05, + "loss": 0.3512224853038788, + "step": 1726 + }, + { + "epoch": 0.45863763112468464, + "grad_norm": 1.1358093316461328, + "learning_rate": 1.8296508941822868e-05, + "loss": 0.35433265566825867, + "step": 1727 + }, + { + "epoch": 0.4589032001062276, + "grad_norm": 1.1217406683513973, + "learning_rate": 1.829405685450202e-05, + "loss": 0.33105185627937317, + "step": 1728 + }, + { + "epoch": 0.45916876908777055, + "grad_norm": 1.0087946676492725, + "learning_rate": 1.829160316818483e-05, + "loss": 0.31765925884246826, + "step": 1729 + }, + { + "epoch": 0.4594343380693135, + "grad_norm": 1.0268902541251206, + "learning_rate": 1.8289147883344338e-05, + "loss": 0.3276101350784302, + "step": 1730 + }, + { + "epoch": 0.45969990705085645, + "grad_norm": 2.1185922480389676, + "learning_rate": 1.8286691000453895e-05, + "loss": 0.2921130061149597, + "step": 1731 + }, + { + "epoch": 0.4599654760323994, + "grad_norm": 0.9680106013727008, + "learning_rate": 1.828423251998716e-05, + "loss": 0.3025062382221222, + "step": 1732 + }, + { + "epoch": 0.46023104501394235, + "grad_norm": 1.0299077884479195, + "learning_rate": 1.82817724424181e-05, + "loss": 0.3128702640533447, + "step": 1733 + }, + { + "epoch": 0.4604966139954853, + "grad_norm": 0.9957682350134235, + "learning_rate": 1.8279310768220987e-05, + "loss": 0.31156033277511597, + "step": 1734 + }, + { + "epoch": 0.46076218297702826, + "grad_norm": 1.0327514294429654, + "learning_rate": 1.82768474978704e-05, + "loss": 0.30409976840019226, + "step": 1735 + }, + { + "epoch": 0.46102775195857126, + "grad_norm": 1.0533664417585449, + "learning_rate": 1.827438263184124e-05, + "loss": 0.305557519197464, + "step": 1736 + }, + { + "epoch": 0.4612933209401142, + "grad_norm": 1.1216722893854725, + "learning_rate": 1.827191617060869e-05, + "loss": 0.36079999804496765, + "step": 1737 + }, + { + "epoch": 0.46155888992165717, + "grad_norm": 1.0546022345807051, + "learning_rate": 1.8269448114648264e-05, + "loss": 0.3341830372810364, + "step": 1738 + }, + { + "epoch": 0.4618244589032001, + "grad_norm": 1.0085785444907966, + "learning_rate": 1.8266978464435764e-05, + "loss": 0.3222450017929077, + "step": 1739 + }, + { + "epoch": 0.46209002788474307, + "grad_norm": 1.112818872130856, + "learning_rate": 1.826450722044732e-05, + "loss": 0.34665441513061523, + "step": 1740 + }, + { + "epoch": 0.462355596866286, + "grad_norm": 1.1112300040840664, + "learning_rate": 1.8262034383159357e-05, + "loss": 0.31024169921875, + "step": 1741 + }, + { + "epoch": 0.462621165847829, + "grad_norm": 1.2322752248386413, + "learning_rate": 1.8259559953048606e-05, + "loss": 0.2950369119644165, + "step": 1742 + }, + { + "epoch": 0.4628867348293719, + "grad_norm": 1.109045795536776, + "learning_rate": 1.8257083930592102e-05, + "loss": 0.3378523886203766, + "step": 1743 + }, + { + "epoch": 0.4631523038109149, + "grad_norm": 0.9899845397184047, + "learning_rate": 1.8254606316267204e-05, + "loss": 0.2930060923099518, + "step": 1744 + }, + { + "epoch": 0.46341787279245783, + "grad_norm": 1.079619676645024, + "learning_rate": 1.8252127110551564e-05, + "loss": 0.3236517012119293, + "step": 1745 + }, + { + "epoch": 0.4636834417740008, + "grad_norm": 0.9852877201201444, + "learning_rate": 1.824964631392314e-05, + "loss": 0.3010406196117401, + "step": 1746 + }, + { + "epoch": 0.46394901075554373, + "grad_norm": 1.0095585954453505, + "learning_rate": 1.8247163926860204e-05, + "loss": 0.3269607424736023, + "step": 1747 + }, + { + "epoch": 0.4642145797370867, + "grad_norm": 1.0474961373680607, + "learning_rate": 1.8244679949841328e-05, + "loss": 0.3437904715538025, + "step": 1748 + }, + { + "epoch": 0.46448014871862964, + "grad_norm": 1.1512723462780612, + "learning_rate": 1.8242194383345394e-05, + "loss": 0.37820738554000854, + "step": 1749 + }, + { + "epoch": 0.46474571770017264, + "grad_norm": 1.0989334641357904, + "learning_rate": 1.8239707227851592e-05, + "loss": 0.3365899920463562, + "step": 1750 + }, + { + "epoch": 0.4650112866817156, + "grad_norm": 0.9943228703349263, + "learning_rate": 1.8237218483839414e-05, + "loss": 0.30418774485588074, + "step": 1751 + }, + { + "epoch": 0.46527685566325855, + "grad_norm": 0.9379554406122236, + "learning_rate": 1.823472815178866e-05, + "loss": 0.2923222780227661, + "step": 1752 + }, + { + "epoch": 0.4655424246448015, + "grad_norm": 1.1096787188742467, + "learning_rate": 1.823223623217944e-05, + "loss": 0.3358995020389557, + "step": 1753 + }, + { + "epoch": 0.46580799362634445, + "grad_norm": 1.0997620749237405, + "learning_rate": 1.822974272549216e-05, + "loss": 0.3413343131542206, + "step": 1754 + }, + { + "epoch": 0.4660735626078874, + "grad_norm": 1.0873990469892099, + "learning_rate": 1.822724763220755e-05, + "loss": 0.33553364872932434, + "step": 1755 + }, + { + "epoch": 0.46633913158943036, + "grad_norm": 1.0957210856960815, + "learning_rate": 1.8224750952806626e-05, + "loss": 0.35896626114845276, + "step": 1756 + }, + { + "epoch": 0.4666047005709733, + "grad_norm": 1.1032076691430248, + "learning_rate": 1.8222252687770718e-05, + "loss": 0.35345566272735596, + "step": 1757 + }, + { + "epoch": 0.46687026955251626, + "grad_norm": 1.0034635235769087, + "learning_rate": 1.8219752837581466e-05, + "loss": 0.3146013617515564, + "step": 1758 + }, + { + "epoch": 0.4671358385340592, + "grad_norm": 1.0191336075935247, + "learning_rate": 1.8217251402720807e-05, + "loss": 0.33270642161369324, + "step": 1759 + }, + { + "epoch": 0.46740140751560216, + "grad_norm": 1.030475428136688, + "learning_rate": 1.821474838367099e-05, + "loss": 0.3172033727169037, + "step": 1760 + }, + { + "epoch": 0.4676669764971451, + "grad_norm": 1.6535016363051902, + "learning_rate": 1.8212243780914578e-05, + "loss": 0.3277033567428589, + "step": 1761 + }, + { + "epoch": 0.46793254547868807, + "grad_norm": 1.1570228647748637, + "learning_rate": 1.820973759493441e-05, + "loss": 0.3523799777030945, + "step": 1762 + }, + { + "epoch": 0.468198114460231, + "grad_norm": 1.0907259849913267, + "learning_rate": 1.8207229826213664e-05, + "loss": 0.32437676191329956, + "step": 1763 + }, + { + "epoch": 0.468463683441774, + "grad_norm": 1.1347618214788342, + "learning_rate": 1.82047204752358e-05, + "loss": 0.34185051918029785, + "step": 1764 + }, + { + "epoch": 0.468729252423317, + "grad_norm": 1.0561382700570243, + "learning_rate": 1.8202209542484594e-05, + "loss": 0.32034197449684143, + "step": 1765 + }, + { + "epoch": 0.46899482140485993, + "grad_norm": 1.097207173265362, + "learning_rate": 1.8199697028444125e-05, + "loss": 0.30969515442848206, + "step": 1766 + }, + { + "epoch": 0.4692603903864029, + "grad_norm": 0.9320632629292236, + "learning_rate": 1.8197182933598776e-05, + "loss": 0.24751389026641846, + "step": 1767 + }, + { + "epoch": 0.46952595936794583, + "grad_norm": 1.2001835130139573, + "learning_rate": 1.8194667258433235e-05, + "loss": 0.3859948217868805, + "step": 1768 + }, + { + "epoch": 0.4697915283494888, + "grad_norm": 1.0989779617923678, + "learning_rate": 1.819215000343249e-05, + "loss": 0.29364967346191406, + "step": 1769 + }, + { + "epoch": 0.47005709733103174, + "grad_norm": 1.1161641657952082, + "learning_rate": 1.8189631169081845e-05, + "loss": 0.3560323715209961, + "step": 1770 + }, + { + "epoch": 0.4703226663125747, + "grad_norm": 1.6505675097600017, + "learning_rate": 1.8187110755866898e-05, + "loss": 0.3458098769187927, + "step": 1771 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.0148526914708587, + "learning_rate": 1.8184588764273555e-05, + "loss": 0.32131001353263855, + "step": 1772 + }, + { + "epoch": 0.4708538042756606, + "grad_norm": 1.0453234866463608, + "learning_rate": 1.8182065194788024e-05, + "loss": 0.3011054992675781, + "step": 1773 + }, + { + "epoch": 0.47111937325720354, + "grad_norm": 1.1076832582073854, + "learning_rate": 1.8179540047896827e-05, + "loss": 0.3314674496650696, + "step": 1774 + }, + { + "epoch": 0.4713849422387465, + "grad_norm": 1.0853788387965118, + "learning_rate": 1.8177013324086774e-05, + "loss": 0.3437536060810089, + "step": 1775 + }, + { + "epoch": 0.47165051122028945, + "grad_norm": 1.166112048160084, + "learning_rate": 1.8174485023844993e-05, + "loss": 0.36137935519218445, + "step": 1776 + }, + { + "epoch": 0.4719160802018324, + "grad_norm": 1.0726359370167762, + "learning_rate": 1.8171955147658905e-05, + "loss": 0.34018874168395996, + "step": 1777 + }, + { + "epoch": 0.4721816491833754, + "grad_norm": 1.0596665602066746, + "learning_rate": 1.8169423696016245e-05, + "loss": 0.33298587799072266, + "step": 1778 + }, + { + "epoch": 0.47244721816491836, + "grad_norm": 1.1107712039752602, + "learning_rate": 1.816689066940505e-05, + "loss": 0.3649418354034424, + "step": 1779 + }, + { + "epoch": 0.4727127871464613, + "grad_norm": 1.0148859742506888, + "learning_rate": 1.8164356068313646e-05, + "loss": 0.32419171929359436, + "step": 1780 + }, + { + "epoch": 0.47297835612800426, + "grad_norm": 1.047167823612948, + "learning_rate": 1.8161819893230688e-05, + "loss": 0.288555383682251, + "step": 1781 + }, + { + "epoch": 0.4732439251095472, + "grad_norm": 1.005455205363293, + "learning_rate": 1.815928214464511e-05, + "loss": 0.3231011629104614, + "step": 1782 + }, + { + "epoch": 0.47350949409109017, + "grad_norm": 1.0470674131364166, + "learning_rate": 1.815674282304617e-05, + "loss": 0.29310134053230286, + "step": 1783 + }, + { + "epoch": 0.4737750630726331, + "grad_norm": 1.0390137248114197, + "learning_rate": 1.815420192892341e-05, + "loss": 0.32683852314949036, + "step": 1784 + }, + { + "epoch": 0.47404063205417607, + "grad_norm": 1.0353379429668699, + "learning_rate": 1.8151659462766685e-05, + "loss": 0.3200969099998474, + "step": 1785 + }, + { + "epoch": 0.474306201035719, + "grad_norm": 1.051359679014311, + "learning_rate": 1.814911542506616e-05, + "loss": 0.3091360032558441, + "step": 1786 + }, + { + "epoch": 0.474571770017262, + "grad_norm": 1.1630088603070372, + "learning_rate": 1.814656981631229e-05, + "loss": 0.3679049611091614, + "step": 1787 + }, + { + "epoch": 0.4748373389988049, + "grad_norm": 1.1065634125772459, + "learning_rate": 1.814402263699584e-05, + "loss": 0.290119469165802, + "step": 1788 + }, + { + "epoch": 0.4751029079803479, + "grad_norm": 1.0987492456650414, + "learning_rate": 1.8141473887607874e-05, + "loss": 0.31878861784935, + "step": 1789 + }, + { + "epoch": 0.47536847696189083, + "grad_norm": 1.1254389921885528, + "learning_rate": 1.8138923568639763e-05, + "loss": 0.35820287466049194, + "step": 1790 + }, + { + "epoch": 0.4756340459434338, + "grad_norm": 1.0046454439717083, + "learning_rate": 1.8136371680583176e-05, + "loss": 0.2924647629261017, + "step": 1791 + }, + { + "epoch": 0.4758996149249768, + "grad_norm": 1.2202907606610718, + "learning_rate": 1.8133818223930092e-05, + "loss": 0.3799927234649658, + "step": 1792 + }, + { + "epoch": 0.47616518390651974, + "grad_norm": 1.1097316301591598, + "learning_rate": 1.8131263199172783e-05, + "loss": 0.3505420386791229, + "step": 1793 + }, + { + "epoch": 0.4764307528880627, + "grad_norm": 1.1021438648339534, + "learning_rate": 1.8128706606803823e-05, + "loss": 0.3291688859462738, + "step": 1794 + }, + { + "epoch": 0.47669632186960564, + "grad_norm": 1.0814065231113215, + "learning_rate": 1.8126148447316104e-05, + "loss": 0.34079697728157043, + "step": 1795 + }, + { + "epoch": 0.4769618908511486, + "grad_norm": 1.2185578909639558, + "learning_rate": 1.8123588721202802e-05, + "loss": 0.2898064851760864, + "step": 1796 + }, + { + "epoch": 0.47722745983269155, + "grad_norm": 1.0448194415877836, + "learning_rate": 1.8121027428957402e-05, + "loss": 0.32089224457740784, + "step": 1797 + }, + { + "epoch": 0.4774930288142345, + "grad_norm": 1.903396083379018, + "learning_rate": 1.8118464571073697e-05, + "loss": 0.3402039408683777, + "step": 1798 + }, + { + "epoch": 0.47775859779577745, + "grad_norm": 1.1693256768707747, + "learning_rate": 1.8115900148045767e-05, + "loss": 0.29904159903526306, + "step": 1799 + }, + { + "epoch": 0.4780241667773204, + "grad_norm": 1.0688058843932313, + "learning_rate": 1.8113334160368007e-05, + "loss": 0.34074240922927856, + "step": 1800 + }, + { + "epoch": 0.47828973575886335, + "grad_norm": 1.0404364284009804, + "learning_rate": 1.811076660853511e-05, + "loss": 0.28566253185272217, + "step": 1801 + }, + { + "epoch": 0.4785553047404063, + "grad_norm": 1.0267154270839738, + "learning_rate": 1.8108197493042065e-05, + "loss": 0.34523358941078186, + "step": 1802 + }, + { + "epoch": 0.47882087372194926, + "grad_norm": 1.0082361251695107, + "learning_rate": 1.8105626814384173e-05, + "loss": 0.3261171281337738, + "step": 1803 + }, + { + "epoch": 0.4790864427034922, + "grad_norm": 1.0353580811121572, + "learning_rate": 1.8103054573057027e-05, + "loss": 0.2915942966938019, + "step": 1804 + }, + { + "epoch": 0.47935201168503516, + "grad_norm": 1.117140176261941, + "learning_rate": 1.810048076955653e-05, + "loss": 0.2999255657196045, + "step": 1805 + }, + { + "epoch": 0.47961758066657817, + "grad_norm": 1.0967176640726466, + "learning_rate": 1.8097905404378874e-05, + "loss": 0.3294594883918762, + "step": 1806 + }, + { + "epoch": 0.4798831496481211, + "grad_norm": 1.025641731681811, + "learning_rate": 1.8095328478020563e-05, + "loss": 0.30720093846321106, + "step": 1807 + }, + { + "epoch": 0.4801487186296641, + "grad_norm": 1.0583824100775536, + "learning_rate": 1.8092749990978395e-05, + "loss": 0.31076985597610474, + "step": 1808 + }, + { + "epoch": 0.480414287611207, + "grad_norm": 1.0650372083327142, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3182013928890228, + "step": 1809 + }, + { + "epoch": 0.48067985659275, + "grad_norm": 1.1560421045272382, + "learning_rate": 1.8087588336831206e-05, + "loss": 0.325716108083725, + "step": 1810 + }, + { + "epoch": 0.48094542557429293, + "grad_norm": 1.034822212222003, + "learning_rate": 1.8085005170721287e-05, + "loss": 0.3148769736289978, + "step": 1811 + }, + { + "epoch": 0.4812109945558359, + "grad_norm": 0.9998987744353804, + "learning_rate": 1.8082420445917727e-05, + "loss": 0.30645644664764404, + "step": 1812 + }, + { + "epoch": 0.48147656353737883, + "grad_norm": 0.9765412034449941, + "learning_rate": 1.807983416291883e-05, + "loss": 0.2978900969028473, + "step": 1813 + }, + { + "epoch": 0.4817421325189218, + "grad_norm": 1.1281577444413164, + "learning_rate": 1.8077246322223194e-05, + "loss": 0.34340181946754456, + "step": 1814 + }, + { + "epoch": 0.48200770150046474, + "grad_norm": 1.0940690010095575, + "learning_rate": 1.8074656924329733e-05, + "loss": 0.3272106349468231, + "step": 1815 + }, + { + "epoch": 0.4822732704820077, + "grad_norm": 1.0823130111098402, + "learning_rate": 1.807206596973765e-05, + "loss": 0.31061962246894836, + "step": 1816 + }, + { + "epoch": 0.48253883946355064, + "grad_norm": 1.1134329507970786, + "learning_rate": 1.8069473458946445e-05, + "loss": 0.28947243094444275, + "step": 1817 + }, + { + "epoch": 0.4828044084450936, + "grad_norm": 1.066867737773279, + "learning_rate": 1.8066879392455932e-05, + "loss": 0.35057532787323, + "step": 1818 + }, + { + "epoch": 0.48306997742663654, + "grad_norm": 1.5202577425125505, + "learning_rate": 1.8064283770766212e-05, + "loss": 0.31032001972198486, + "step": 1819 + }, + { + "epoch": 0.48333554640817955, + "grad_norm": 1.1166414917810035, + "learning_rate": 1.8061686594377685e-05, + "loss": 0.3802293539047241, + "step": 1820 + }, + { + "epoch": 0.4836011153897225, + "grad_norm": 1.122052528401037, + "learning_rate": 1.8059087863791066e-05, + "loss": 0.3306402564048767, + "step": 1821 + }, + { + "epoch": 0.48386668437126545, + "grad_norm": 1.051177925612534, + "learning_rate": 1.8056487579507352e-05, + "loss": 0.32170724868774414, + "step": 1822 + }, + { + "epoch": 0.4841322533528084, + "grad_norm": 1.0182895505748566, + "learning_rate": 1.8053885742027854e-05, + "loss": 0.35058924555778503, + "step": 1823 + }, + { + "epoch": 0.48439782233435136, + "grad_norm": 1.079491665486815, + "learning_rate": 1.8051282351854168e-05, + "loss": 0.3796595335006714, + "step": 1824 + }, + { + "epoch": 0.4846633913158943, + "grad_norm": 1.0882057457557335, + "learning_rate": 1.8048677409488205e-05, + "loss": 0.28997284173965454, + "step": 1825 + }, + { + "epoch": 0.48492896029743726, + "grad_norm": 1.7307038017833063, + "learning_rate": 1.804607091543216e-05, + "loss": 0.35110151767730713, + "step": 1826 + }, + { + "epoch": 0.4851945292789802, + "grad_norm": 1.1036882170711018, + "learning_rate": 1.8043462870188535e-05, + "loss": 0.3194088637828827, + "step": 1827 + }, + { + "epoch": 0.48546009826052317, + "grad_norm": 1.0664676604065728, + "learning_rate": 1.8040853274260137e-05, + "loss": 0.28777945041656494, + "step": 1828 + }, + { + "epoch": 0.4857256672420661, + "grad_norm": 1.0702584286398438, + "learning_rate": 1.803824212815006e-05, + "loss": 0.3642069697380066, + "step": 1829 + }, + { + "epoch": 0.48599123622360907, + "grad_norm": 1.0626897024145745, + "learning_rate": 1.80356294323617e-05, + "loss": 0.32396575808525085, + "step": 1830 + }, + { + "epoch": 0.486256805205152, + "grad_norm": 1.205959051296984, + "learning_rate": 1.8033015187398758e-05, + "loss": 0.36421436071395874, + "step": 1831 + }, + { + "epoch": 0.486522374186695, + "grad_norm": 1.0011906322370974, + "learning_rate": 1.8030399393765227e-05, + "loss": 0.3170832395553589, + "step": 1832 + }, + { + "epoch": 0.4867879431682379, + "grad_norm": 0.9739220394650455, + "learning_rate": 1.8027782051965408e-05, + "loss": 0.3003416359424591, + "step": 1833 + }, + { + "epoch": 0.48705351214978093, + "grad_norm": 1.0701369618567955, + "learning_rate": 1.802516316250388e-05, + "loss": 0.30362898111343384, + "step": 1834 + }, + { + "epoch": 0.4873190811313239, + "grad_norm": 1.0466563888798912, + "learning_rate": 1.802254272588555e-05, + "loss": 0.32721444964408875, + "step": 1835 + }, + { + "epoch": 0.48758465011286684, + "grad_norm": 1.345049864677536, + "learning_rate": 1.8019920742615596e-05, + "loss": 0.317483514547348, + "step": 1836 + }, + { + "epoch": 0.4878502190944098, + "grad_norm": 1.0589953518283157, + "learning_rate": 1.801729721319951e-05, + "loss": 0.2928479015827179, + "step": 1837 + }, + { + "epoch": 0.48811578807595274, + "grad_norm": 1.1098495840377043, + "learning_rate": 1.8014672138143073e-05, + "loss": 0.3425772190093994, + "step": 1838 + }, + { + "epoch": 0.4883813570574957, + "grad_norm": 1.0286414092040284, + "learning_rate": 1.801204551795238e-05, + "loss": 0.334087997674942, + "step": 1839 + }, + { + "epoch": 0.48864692603903864, + "grad_norm": 1.0797374159140127, + "learning_rate": 1.80094173531338e-05, + "loss": 0.3186641335487366, + "step": 1840 + }, + { + "epoch": 0.4889124950205816, + "grad_norm": 1.0361897985848911, + "learning_rate": 1.800678764419401e-05, + "loss": 0.3153733015060425, + "step": 1841 + }, + { + "epoch": 0.48917806400212455, + "grad_norm": 1.070217807683518, + "learning_rate": 1.8004156391640004e-05, + "loss": 0.3323214054107666, + "step": 1842 + }, + { + "epoch": 0.4894436329836675, + "grad_norm": 0.9455521865874897, + "learning_rate": 1.8001523595979043e-05, + "loss": 0.2856762409210205, + "step": 1843 + }, + { + "epoch": 0.48970920196521045, + "grad_norm": 1.0256135363684138, + "learning_rate": 1.79988892577187e-05, + "loss": 0.32493725419044495, + "step": 1844 + }, + { + "epoch": 0.4899747709467534, + "grad_norm": 1.1082860888483268, + "learning_rate": 1.7996253377366846e-05, + "loss": 0.350448876619339, + "step": 1845 + }, + { + "epoch": 0.49024033992829635, + "grad_norm": 1.096249407467401, + "learning_rate": 1.7993615955431648e-05, + "loss": 0.32246965169906616, + "step": 1846 + }, + { + "epoch": 0.4905059089098393, + "grad_norm": 0.9715072313794847, + "learning_rate": 1.799097699242157e-05, + "loss": 0.302636057138443, + "step": 1847 + }, + { + "epoch": 0.4907714778913823, + "grad_norm": 1.1573319310132777, + "learning_rate": 1.7988336488845374e-05, + "loss": 0.34280693531036377, + "step": 1848 + }, + { + "epoch": 0.49103704687292526, + "grad_norm": 1.1205814585182334, + "learning_rate": 1.7985694445212118e-05, + "loss": 0.3650673031806946, + "step": 1849 + }, + { + "epoch": 0.4913026158544682, + "grad_norm": 1.1348057531260405, + "learning_rate": 1.798305086203115e-05, + "loss": 0.33800822496414185, + "step": 1850 + }, + { + "epoch": 0.49156818483601117, + "grad_norm": 1.0428655272942455, + "learning_rate": 1.7980405739812134e-05, + "loss": 0.31522083282470703, + "step": 1851 + }, + { + "epoch": 0.4918337538175541, + "grad_norm": 1.177464907100392, + "learning_rate": 1.7977759079065003e-05, + "loss": 0.3374335765838623, + "step": 1852 + }, + { + "epoch": 0.49209932279909707, + "grad_norm": 1.060278247692231, + "learning_rate": 1.7975110880300018e-05, + "loss": 0.33803191781044006, + "step": 1853 + }, + { + "epoch": 0.49236489178064, + "grad_norm": 1.0982376140773644, + "learning_rate": 1.797246114402771e-05, + "loss": 0.37764933705329895, + "step": 1854 + }, + { + "epoch": 0.492630460762183, + "grad_norm": 0.9654297547716862, + "learning_rate": 1.796980987075892e-05, + "loss": 0.3075840473175049, + "step": 1855 + }, + { + "epoch": 0.4928960297437259, + "grad_norm": 0.9768928030686648, + "learning_rate": 1.7967157061004782e-05, + "loss": 0.306305855512619, + "step": 1856 + }, + { + "epoch": 0.4931615987252689, + "grad_norm": 1.0225684543938522, + "learning_rate": 1.796450271527673e-05, + "loss": 0.3474302291870117, + "step": 1857 + }, + { + "epoch": 0.49342716770681183, + "grad_norm": 1.0243106870487633, + "learning_rate": 1.7961846834086483e-05, + "loss": 0.31059685349464417, + "step": 1858 + }, + { + "epoch": 0.4936927366883548, + "grad_norm": 1.0236396527349367, + "learning_rate": 1.795918941794607e-05, + "loss": 0.346218079328537, + "step": 1859 + }, + { + "epoch": 0.49395830566989773, + "grad_norm": 0.9969229384493907, + "learning_rate": 1.7956530467367805e-05, + "loss": 0.28371214866638184, + "step": 1860 + }, + { + "epoch": 0.4942238746514407, + "grad_norm": 0.8979156608776232, + "learning_rate": 1.7953869982864306e-05, + "loss": 0.27775150537490845, + "step": 1861 + }, + { + "epoch": 0.4944894436329837, + "grad_norm": 1.279703247293047, + "learning_rate": 1.795120796494848e-05, + "loss": 0.328782856464386, + "step": 1862 + }, + { + "epoch": 0.49475501261452665, + "grad_norm": 1.0950381369417217, + "learning_rate": 1.7948544414133534e-05, + "loss": 0.33220064640045166, + "step": 1863 + }, + { + "epoch": 0.4950205815960696, + "grad_norm": 1.0528449584388764, + "learning_rate": 1.794587933093297e-05, + "loss": 0.32681554555892944, + "step": 1864 + }, + { + "epoch": 0.49528615057761255, + "grad_norm": 1.1023465974826758, + "learning_rate": 1.7943212715860586e-05, + "loss": 0.32202866673469543, + "step": 1865 + }, + { + "epoch": 0.4955517195591555, + "grad_norm": 2.266456857585339, + "learning_rate": 1.7940544569430468e-05, + "loss": 0.3051350712776184, + "step": 1866 + }, + { + "epoch": 0.49581728854069845, + "grad_norm": 1.1617568134775966, + "learning_rate": 1.793787489215701e-05, + "loss": 0.3924705386161804, + "step": 1867 + }, + { + "epoch": 0.4960828575222414, + "grad_norm": 1.018817969430421, + "learning_rate": 1.793520368455489e-05, + "loss": 0.30267882347106934, + "step": 1868 + }, + { + "epoch": 0.49634842650378436, + "grad_norm": 1.0585020042998596, + "learning_rate": 1.793253094713909e-05, + "loss": 0.3150729238986969, + "step": 1869 + }, + { + "epoch": 0.4966139954853273, + "grad_norm": 1.314679145900761, + "learning_rate": 1.7929856680424872e-05, + "loss": 0.33814147114753723, + "step": 1870 + }, + { + "epoch": 0.49687956446687026, + "grad_norm": 1.010460021909887, + "learning_rate": 1.7927180884927814e-05, + "loss": 0.31929856538772583, + "step": 1871 + }, + { + "epoch": 0.4971451334484132, + "grad_norm": 1.1376790681693039, + "learning_rate": 1.7924503561163775e-05, + "loss": 0.3797461688518524, + "step": 1872 + }, + { + "epoch": 0.49741070242995616, + "grad_norm": 1.057594588942085, + "learning_rate": 1.792182470964891e-05, + "loss": 0.3056377172470093, + "step": 1873 + }, + { + "epoch": 0.4976762714114991, + "grad_norm": 1.1254473942016883, + "learning_rate": 1.7919144330899668e-05, + "loss": 0.3526398539543152, + "step": 1874 + }, + { + "epoch": 0.49794184039304207, + "grad_norm": 1.0289140670533532, + "learning_rate": 1.79164624254328e-05, + "loss": 0.3183595538139343, + "step": 1875 + }, + { + "epoch": 0.4982074093745851, + "grad_norm": 1.1908370019011798, + "learning_rate": 1.791377899376534e-05, + "loss": 0.3604113459587097, + "step": 1876 + }, + { + "epoch": 0.498472978356128, + "grad_norm": 1.1651856770093412, + "learning_rate": 1.7911094036414623e-05, + "loss": 0.3219848573207855, + "step": 1877 + }, + { + "epoch": 0.498738547337671, + "grad_norm": 1.0586801467718077, + "learning_rate": 1.7908407553898282e-05, + "loss": 0.28773394227027893, + "step": 1878 + }, + { + "epoch": 0.49900411631921393, + "grad_norm": 1.0649509880321448, + "learning_rate": 1.7905719546734233e-05, + "loss": 0.31453996896743774, + "step": 1879 + }, + { + "epoch": 0.4992696853007569, + "grad_norm": 0.9878415524405192, + "learning_rate": 1.7903030015440696e-05, + "loss": 0.2947153151035309, + "step": 1880 + }, + { + "epoch": 0.49953525428229983, + "grad_norm": 1.0652111521233423, + "learning_rate": 1.7900338960536178e-05, + "loss": 0.313723087310791, + "step": 1881 + }, + { + "epoch": 0.4998008232638428, + "grad_norm": 1.0853994840945123, + "learning_rate": 1.7897646382539485e-05, + "loss": 0.3385108709335327, + "step": 1882 + }, + { + "epoch": 0.5000663922453857, + "grad_norm": 1.0993457819479324, + "learning_rate": 1.7894952281969712e-05, + "loss": 0.31417039036750793, + "step": 1883 + }, + { + "epoch": 0.5003319612269287, + "grad_norm": 1.1452192213941934, + "learning_rate": 1.7892256659346253e-05, + "loss": 0.3555717468261719, + "step": 1884 + }, + { + "epoch": 0.5005975302084716, + "grad_norm": 1.1989261836629121, + "learning_rate": 1.7889559515188793e-05, + "loss": 0.3724518120288849, + "step": 1885 + }, + { + "epoch": 0.5008630991900146, + "grad_norm": 1.0516015708006068, + "learning_rate": 1.7886860850017306e-05, + "loss": 0.32646167278289795, + "step": 1886 + }, + { + "epoch": 0.5011286681715575, + "grad_norm": 1.079300223054909, + "learning_rate": 1.7884160664352062e-05, + "loss": 0.31072959303855896, + "step": 1887 + }, + { + "epoch": 0.5013942371531005, + "grad_norm": 0.9518526173941219, + "learning_rate": 1.7881458958713628e-05, + "loss": 0.26987242698669434, + "step": 1888 + }, + { + "epoch": 0.5016598061346434, + "grad_norm": 0.9908294117764815, + "learning_rate": 1.787875573362286e-05, + "loss": 0.30105817317962646, + "step": 1889 + }, + { + "epoch": 0.5019253751161864, + "grad_norm": 1.0444226583374554, + "learning_rate": 1.7876050989600908e-05, + "loss": 0.31277188658714294, + "step": 1890 + }, + { + "epoch": 0.5021909440977294, + "grad_norm": 1.0192470233304842, + "learning_rate": 1.7873344727169214e-05, + "loss": 0.31068161129951477, + "step": 1891 + }, + { + "epoch": 0.5024565130792723, + "grad_norm": 1.0797105219167356, + "learning_rate": 1.7870636946849512e-05, + "loss": 0.3491121530532837, + "step": 1892 + }, + { + "epoch": 0.5027220820608153, + "grad_norm": 1.0753654491775293, + "learning_rate": 1.7867927649163838e-05, + "loss": 0.3223581612110138, + "step": 1893 + }, + { + "epoch": 0.5029876510423582, + "grad_norm": 1.1295999155195493, + "learning_rate": 1.7865216834634506e-05, + "loss": 0.345224529504776, + "step": 1894 + }, + { + "epoch": 0.5032532200239012, + "grad_norm": 1.1419032071310418, + "learning_rate": 1.7862504503784123e-05, + "loss": 0.3408205211162567, + "step": 1895 + }, + { + "epoch": 0.5035187890054441, + "grad_norm": 0.9713066472066385, + "learning_rate": 1.7859790657135608e-05, + "loss": 0.2680068016052246, + "step": 1896 + }, + { + "epoch": 0.5037843579869872, + "grad_norm": 0.9186813995364894, + "learning_rate": 1.7857075295212148e-05, + "loss": 0.29733535647392273, + "step": 1897 + }, + { + "epoch": 0.5040499269685301, + "grad_norm": 1.1196248802118025, + "learning_rate": 1.785435841853724e-05, + "loss": 0.34820133447647095, + "step": 1898 + }, + { + "epoch": 0.5043154959500731, + "grad_norm": 1.134445876132798, + "learning_rate": 1.785164002763466e-05, + "loss": 0.3306594491004944, + "step": 1899 + }, + { + "epoch": 0.504581064931616, + "grad_norm": 1.0579272410020724, + "learning_rate": 1.7848920123028482e-05, + "loss": 0.3166846036911011, + "step": 1900 + }, + { + "epoch": 0.504846633913159, + "grad_norm": 1.2213509498849395, + "learning_rate": 1.784619870524308e-05, + "loss": 0.3406408727169037, + "step": 1901 + }, + { + "epoch": 0.5051122028947019, + "grad_norm": 1.0410168562106317, + "learning_rate": 1.78434757748031e-05, + "loss": 0.36358171701431274, + "step": 1902 + }, + { + "epoch": 0.5053777718762449, + "grad_norm": 1.0510382236040618, + "learning_rate": 1.7840751332233498e-05, + "loss": 0.34045761823654175, + "step": 1903 + }, + { + "epoch": 0.5056433408577878, + "grad_norm": 1.0566120463915532, + "learning_rate": 1.783802537805951e-05, + "loss": 0.3442475199699402, + "step": 1904 + }, + { + "epoch": 0.5059089098393308, + "grad_norm": 1.1632822330113848, + "learning_rate": 1.7835297912806675e-05, + "loss": 0.3488585650920868, + "step": 1905 + }, + { + "epoch": 0.5061744788208737, + "grad_norm": 1.098650773563784, + "learning_rate": 1.7832568937000808e-05, + "loss": 0.3340107500553131, + "step": 1906 + }, + { + "epoch": 0.5064400478024167, + "grad_norm": 1.0195614065654457, + "learning_rate": 1.7829838451168027e-05, + "loss": 0.3206177353858948, + "step": 1907 + }, + { + "epoch": 0.5067056167839596, + "grad_norm": 1.0219563874782234, + "learning_rate": 1.782710645583473e-05, + "loss": 0.2851010262966156, + "step": 1908 + }, + { + "epoch": 0.5069711857655026, + "grad_norm": 1.0249326570563306, + "learning_rate": 1.782437295152763e-05, + "loss": 0.31850844621658325, + "step": 1909 + }, + { + "epoch": 0.5072367547470455, + "grad_norm": 1.0890541355083159, + "learning_rate": 1.7821637938773704e-05, + "loss": 0.3343108892440796, + "step": 1910 + }, + { + "epoch": 0.5075023237285885, + "grad_norm": 1.1131994842325255, + "learning_rate": 1.781890141810023e-05, + "loss": 0.3423745930194855, + "step": 1911 + }, + { + "epoch": 0.5077678927101315, + "grad_norm": 1.057536319451762, + "learning_rate": 1.7816163390034775e-05, + "loss": 0.30980780720710754, + "step": 1912 + }, + { + "epoch": 0.5080334616916744, + "grad_norm": 1.0099692843485935, + "learning_rate": 1.7813423855105203e-05, + "loss": 0.31217479705810547, + "step": 1913 + }, + { + "epoch": 0.5082990306732174, + "grad_norm": 1.0721675523916532, + "learning_rate": 1.7810682813839664e-05, + "loss": 0.34741947054862976, + "step": 1914 + }, + { + "epoch": 0.5085645996547603, + "grad_norm": 1.1098427332228447, + "learning_rate": 1.7807940266766595e-05, + "loss": 0.32275527715682983, + "step": 1915 + }, + { + "epoch": 0.5088301686363033, + "grad_norm": 1.1130434711054393, + "learning_rate": 1.7805196214414728e-05, + "loss": 0.32760411500930786, + "step": 1916 + }, + { + "epoch": 0.5090957376178462, + "grad_norm": 1.1445787919507704, + "learning_rate": 1.7802450657313086e-05, + "loss": 0.3877720832824707, + "step": 1917 + }, + { + "epoch": 0.5093613065993892, + "grad_norm": 1.1135916509560913, + "learning_rate": 1.779970359599098e-05, + "loss": 0.33458876609802246, + "step": 1918 + }, + { + "epoch": 0.5096268755809321, + "grad_norm": 0.9826034605244246, + "learning_rate": 1.7796955030978007e-05, + "loss": 0.30603206157684326, + "step": 1919 + }, + { + "epoch": 0.5098924445624751, + "grad_norm": 0.9902684589377142, + "learning_rate": 1.7794204962804063e-05, + "loss": 0.2920286953449249, + "step": 1920 + }, + { + "epoch": 0.510158013544018, + "grad_norm": 1.1034173597508874, + "learning_rate": 1.7791453391999325e-05, + "loss": 0.32407981157302856, + "step": 1921 + }, + { + "epoch": 0.510423582525561, + "grad_norm": 1.3200648964540613, + "learning_rate": 1.7788700319094263e-05, + "loss": 0.30423563718795776, + "step": 1922 + }, + { + "epoch": 0.5106891515071039, + "grad_norm": 1.1213502448496324, + "learning_rate": 1.7785945744619642e-05, + "loss": 0.34691399335861206, + "step": 1923 + }, + { + "epoch": 0.5109547204886469, + "grad_norm": 1.0498801582672959, + "learning_rate": 1.7783189669106503e-05, + "loss": 0.3217603266239166, + "step": 1924 + }, + { + "epoch": 0.5112202894701899, + "grad_norm": 1.1943957961346587, + "learning_rate": 1.7780432093086198e-05, + "loss": 0.365132212638855, + "step": 1925 + }, + { + "epoch": 0.5114858584517329, + "grad_norm": 0.9783494867108459, + "learning_rate": 1.7777673017090344e-05, + "loss": 0.29662930965423584, + "step": 1926 + }, + { + "epoch": 0.5117514274332758, + "grad_norm": 1.0707541061431447, + "learning_rate": 1.7774912441650857e-05, + "loss": 0.3324819803237915, + "step": 1927 + }, + { + "epoch": 0.5120169964148188, + "grad_norm": 1.0040789031204058, + "learning_rate": 1.7772150367299953e-05, + "loss": 0.29331067204475403, + "step": 1928 + }, + { + "epoch": 0.5122825653963617, + "grad_norm": 1.064062495235822, + "learning_rate": 1.7769386794570117e-05, + "loss": 0.3158259987831116, + "step": 1929 + }, + { + "epoch": 0.5125481343779047, + "grad_norm": 1.020159871349018, + "learning_rate": 1.7766621723994145e-05, + "loss": 0.2824791967868805, + "step": 1930 + }, + { + "epoch": 0.5128137033594476, + "grad_norm": 1.0493215169042918, + "learning_rate": 1.7763855156105097e-05, + "loss": 0.2690732777118683, + "step": 1931 + }, + { + "epoch": 0.5130792723409906, + "grad_norm": 1.043157004637876, + "learning_rate": 1.7761087091436346e-05, + "loss": 0.31360942125320435, + "step": 1932 + }, + { + "epoch": 0.5133448413225336, + "grad_norm": 0.9858891902519169, + "learning_rate": 1.7758317530521535e-05, + "loss": 0.28334349393844604, + "step": 1933 + }, + { + "epoch": 0.5136104103040765, + "grad_norm": 1.1739380172138798, + "learning_rate": 1.7755546473894604e-05, + "loss": 0.3857404589653015, + "step": 1934 + }, + { + "epoch": 0.5138759792856195, + "grad_norm": 1.0280582546011092, + "learning_rate": 1.7752773922089784e-05, + "loss": 0.2852492332458496, + "step": 1935 + }, + { + "epoch": 0.5141415482671624, + "grad_norm": 1.003050995152578, + "learning_rate": 1.7749999875641585e-05, + "loss": 0.2959831953048706, + "step": 1936 + }, + { + "epoch": 0.5144071172487054, + "grad_norm": 1.100974201889633, + "learning_rate": 1.7747224335084815e-05, + "loss": 0.3129635453224182, + "step": 1937 + }, + { + "epoch": 0.5146726862302483, + "grad_norm": 1.0336946735940622, + "learning_rate": 1.774444730095456e-05, + "loss": 0.31391531229019165, + "step": 1938 + }, + { + "epoch": 0.5149382552117913, + "grad_norm": 1.0155253897885985, + "learning_rate": 1.7741668773786202e-05, + "loss": 0.30274757742881775, + "step": 1939 + }, + { + "epoch": 0.5152038241933342, + "grad_norm": 1.026561688701391, + "learning_rate": 1.7738888754115413e-05, + "loss": 0.29162222146987915, + "step": 1940 + }, + { + "epoch": 0.5154693931748772, + "grad_norm": 1.045931473256506, + "learning_rate": 1.7736107242478143e-05, + "loss": 0.30358970165252686, + "step": 1941 + }, + { + "epoch": 0.5157349621564201, + "grad_norm": 1.11915386227621, + "learning_rate": 1.7733324239410634e-05, + "loss": 0.32268065214157104, + "step": 1942 + }, + { + "epoch": 0.5160005311379631, + "grad_norm": 1.0626040245012975, + "learning_rate": 1.7730539745449417e-05, + "loss": 0.31925222277641296, + "step": 1943 + }, + { + "epoch": 0.516266100119506, + "grad_norm": 1.1170224886553113, + "learning_rate": 1.7727753761131312e-05, + "loss": 0.32883748412132263, + "step": 1944 + }, + { + "epoch": 0.516531669101049, + "grad_norm": 1.101510406621582, + "learning_rate": 1.7724966286993425e-05, + "loss": 0.3212829530239105, + "step": 1945 + }, + { + "epoch": 0.5167972380825919, + "grad_norm": 1.1477333753851342, + "learning_rate": 1.772217732357314e-05, + "loss": 0.32909759879112244, + "step": 1946 + }, + { + "epoch": 0.5170628070641349, + "grad_norm": 33.3722959000957, + "learning_rate": 1.7719386871408147e-05, + "loss": 0.3451213538646698, + "step": 1947 + }, + { + "epoch": 0.5173283760456778, + "grad_norm": 1.0792459943819739, + "learning_rate": 1.7716594931036402e-05, + "loss": 0.318422794342041, + "step": 1948 + }, + { + "epoch": 0.5175939450272208, + "grad_norm": 1.1243494025490273, + "learning_rate": 1.7713801502996166e-05, + "loss": 0.3165292739868164, + "step": 1949 + }, + { + "epoch": 0.5178595140087637, + "grad_norm": 1.1353818628503742, + "learning_rate": 1.7711006587825975e-05, + "loss": 0.3116700351238251, + "step": 1950 + }, + { + "epoch": 0.5181250829903067, + "grad_norm": 1.2005138291757869, + "learning_rate": 1.7708210186064656e-05, + "loss": 0.32102686166763306, + "step": 1951 + }, + { + "epoch": 0.5183906519718496, + "grad_norm": 1.079523368082095, + "learning_rate": 1.7705412298251323e-05, + "loss": 0.33025500178337097, + "step": 1952 + }, + { + "epoch": 0.5186562209533926, + "grad_norm": 1.2087703844513067, + "learning_rate": 1.7702612924925377e-05, + "loss": 0.36113062500953674, + "step": 1953 + }, + { + "epoch": 0.5189217899349357, + "grad_norm": 1.1242566727618883, + "learning_rate": 1.7699812066626503e-05, + "loss": 0.3092479109764099, + "step": 1954 + }, + { + "epoch": 0.5191873589164786, + "grad_norm": 1.117146005158035, + "learning_rate": 1.769700972389467e-05, + "loss": 0.3389117419719696, + "step": 1955 + }, + { + "epoch": 0.5194529278980216, + "grad_norm": 1.1525168535902064, + "learning_rate": 1.7694205897270147e-05, + "loss": 0.3225803077220917, + "step": 1956 + }, + { + "epoch": 0.5197184968795645, + "grad_norm": 1.0237361691251219, + "learning_rate": 1.7691400587293467e-05, + "loss": 0.3226786255836487, + "step": 1957 + }, + { + "epoch": 0.5199840658611075, + "grad_norm": 1.0060672564491426, + "learning_rate": 1.7688593794505466e-05, + "loss": 0.27708399295806885, + "step": 1958 + }, + { + "epoch": 0.5202496348426504, + "grad_norm": 1.0763214880079806, + "learning_rate": 1.768578551944726e-05, + "loss": 0.36100950837135315, + "step": 1959 + }, + { + "epoch": 0.5205152038241934, + "grad_norm": 1.043549985204807, + "learning_rate": 1.768297576266025e-05, + "loss": 0.3138211965560913, + "step": 1960 + }, + { + "epoch": 0.5207807728057363, + "grad_norm": 1.0618046264640966, + "learning_rate": 1.7680164524686128e-05, + "loss": 0.33959656953811646, + "step": 1961 + }, + { + "epoch": 0.5210463417872793, + "grad_norm": 0.9826913420332539, + "learning_rate": 1.7677351806066863e-05, + "loss": 0.3093605637550354, + "step": 1962 + }, + { + "epoch": 0.5213119107688222, + "grad_norm": 1.13307401094871, + "learning_rate": 1.7674537607344717e-05, + "loss": 0.3098641633987427, + "step": 1963 + }, + { + "epoch": 0.5215774797503652, + "grad_norm": 1.0810255128706003, + "learning_rate": 1.767172192906223e-05, + "loss": 0.35172683000564575, + "step": 1964 + }, + { + "epoch": 0.5218430487319081, + "grad_norm": 1.0729896509671073, + "learning_rate": 1.7668904771762242e-05, + "loss": 0.3535798192024231, + "step": 1965 + }, + { + "epoch": 0.5221086177134511, + "grad_norm": 1.2521081937006913, + "learning_rate": 1.766608613598785e-05, + "loss": 0.36183854937553406, + "step": 1966 + }, + { + "epoch": 0.522374186694994, + "grad_norm": 1.0735439944400962, + "learning_rate": 1.7663266022282473e-05, + "loss": 0.35995131731033325, + "step": 1967 + }, + { + "epoch": 0.522639755676537, + "grad_norm": 1.117054454049305, + "learning_rate": 1.766044443118978e-05, + "loss": 0.38672733306884766, + "step": 1968 + }, + { + "epoch": 0.5229053246580799, + "grad_norm": 1.0862044019422723, + "learning_rate": 1.765762136325375e-05, + "loss": 0.3389524221420288, + "step": 1969 + }, + { + "epoch": 0.5231708936396229, + "grad_norm": 0.9847521483407152, + "learning_rate": 1.7654796819018635e-05, + "loss": 0.3325779139995575, + "step": 1970 + }, + { + "epoch": 0.5234364626211658, + "grad_norm": 1.014607581135561, + "learning_rate": 1.7651970799028976e-05, + "loss": 0.328407347202301, + "step": 1971 + }, + { + "epoch": 0.5237020316027088, + "grad_norm": 0.9793310107257689, + "learning_rate": 1.764914330382959e-05, + "loss": 0.3050537705421448, + "step": 1972 + }, + { + "epoch": 0.5239676005842517, + "grad_norm": 1.1408686145630131, + "learning_rate": 1.7646314333965588e-05, + "loss": 0.35500285029411316, + "step": 1973 + }, + { + "epoch": 0.5242331695657947, + "grad_norm": 1.1035893819341516, + "learning_rate": 1.7643483889982364e-05, + "loss": 0.30319780111312866, + "step": 1974 + }, + { + "epoch": 0.5244987385473376, + "grad_norm": 1.0161223434375823, + "learning_rate": 1.7640651972425592e-05, + "loss": 0.315757691860199, + "step": 1975 + }, + { + "epoch": 0.5247643075288806, + "grad_norm": 1.0278713767432786, + "learning_rate": 1.7637818581841234e-05, + "loss": 0.28562331199645996, + "step": 1976 + }, + { + "epoch": 0.5250298765104235, + "grad_norm": 1.017204404946826, + "learning_rate": 1.763498371877553e-05, + "loss": 0.29798296093940735, + "step": 1977 + }, + { + "epoch": 0.5252954454919665, + "grad_norm": 1.1245986087835715, + "learning_rate": 1.763214738377501e-05, + "loss": 0.2923639416694641, + "step": 1978 + }, + { + "epoch": 0.5255610144735094, + "grad_norm": 1.0282257211254215, + "learning_rate": 1.7629309577386492e-05, + "loss": 0.2858009934425354, + "step": 1979 + }, + { + "epoch": 0.5258265834550524, + "grad_norm": 1.1185725636940211, + "learning_rate": 1.7626470300157064e-05, + "loss": 0.3615952134132385, + "step": 1980 + }, + { + "epoch": 0.5260921524365954, + "grad_norm": 1.1357118701340632, + "learning_rate": 1.762362955263411e-05, + "loss": 0.36142098903656006, + "step": 1981 + }, + { + "epoch": 0.5263577214181384, + "grad_norm": 1.1305105783283786, + "learning_rate": 1.762078733536529e-05, + "loss": 0.3335961699485779, + "step": 1982 + }, + { + "epoch": 0.5266232903996814, + "grad_norm": 1.2367655641806865, + "learning_rate": 1.761794364889855e-05, + "loss": 0.34549272060394287, + "step": 1983 + }, + { + "epoch": 0.5268888593812243, + "grad_norm": 1.1166612317693478, + "learning_rate": 1.761509849378212e-05, + "loss": 0.3177812993526459, + "step": 1984 + }, + { + "epoch": 0.5271544283627673, + "grad_norm": 1.1485560676920734, + "learning_rate": 1.7612251870564515e-05, + "loss": 0.33191388845443726, + "step": 1985 + }, + { + "epoch": 0.5274199973443102, + "grad_norm": 1.0807821541967428, + "learning_rate": 1.7609403779794523e-05, + "loss": 0.30732038617134094, + "step": 1986 + }, + { + "epoch": 0.5276855663258532, + "grad_norm": 1.1038043700347457, + "learning_rate": 1.7606554222021226e-05, + "loss": 0.33012068271636963, + "step": 1987 + }, + { + "epoch": 0.5279511353073961, + "grad_norm": 1.2233212729045404, + "learning_rate": 1.760370319779399e-05, + "loss": 0.3396066427230835, + "step": 1988 + }, + { + "epoch": 0.5282167042889391, + "grad_norm": 1.0755028443639627, + "learning_rate": 1.7600850707662454e-05, + "loss": 0.29053401947021484, + "step": 1989 + }, + { + "epoch": 0.528482273270482, + "grad_norm": 1.0859289781343007, + "learning_rate": 1.7597996752176545e-05, + "loss": 0.32927206158638, + "step": 1990 + }, + { + "epoch": 0.528747842252025, + "grad_norm": 1.0494460781018915, + "learning_rate": 1.759514133188647e-05, + "loss": 0.309224933385849, + "step": 1991 + }, + { + "epoch": 0.5290134112335679, + "grad_norm": 1.0870307368096292, + "learning_rate": 1.7592284447342725e-05, + "loss": 0.31973862648010254, + "step": 1992 + }, + { + "epoch": 0.5292789802151109, + "grad_norm": 1.0491029702582455, + "learning_rate": 1.758942609909608e-05, + "loss": 0.3331080377101898, + "step": 1993 + }, + { + "epoch": 0.5295445491966538, + "grad_norm": 1.0710245753206995, + "learning_rate": 1.7586566287697592e-05, + "loss": 0.32755160331726074, + "step": 1994 + }, + { + "epoch": 0.5298101181781968, + "grad_norm": 1.0377451052992368, + "learning_rate": 1.7583705013698602e-05, + "loss": 0.31942498683929443, + "step": 1995 + }, + { + "epoch": 0.5300756871597397, + "grad_norm": 1.1665695354682926, + "learning_rate": 1.7580842277650723e-05, + "loss": 0.3199199438095093, + "step": 1996 + }, + { + "epoch": 0.5303412561412827, + "grad_norm": 0.9680761404148592, + "learning_rate": 1.7577978080105864e-05, + "loss": 0.28153708577156067, + "step": 1997 + }, + { + "epoch": 0.5306068251228256, + "grad_norm": 1.0336529884327843, + "learning_rate": 1.7575112421616203e-05, + "loss": 0.3050921559333801, + "step": 1998 + }, + { + "epoch": 0.5308723941043686, + "grad_norm": 1.0836881519572394, + "learning_rate": 1.7572245302734208e-05, + "loss": 0.3242149353027344, + "step": 1999 + }, + { + "epoch": 0.5311379630859115, + "grad_norm": 0.9889139549595165, + "learning_rate": 1.7569376724012622e-05, + "loss": 0.29947227239608765, + "step": 2000 + }, + { + "epoch": 0.5314035320674545, + "grad_norm": 1.132976441688301, + "learning_rate": 1.756650668600448e-05, + "loss": 0.3229755163192749, + "step": 2001 + }, + { + "epoch": 0.5316691010489975, + "grad_norm": 1.0802391073518836, + "learning_rate": 1.7563635189263086e-05, + "loss": 0.3544544577598572, + "step": 2002 + }, + { + "epoch": 0.5319346700305404, + "grad_norm": 1.0996284853033707, + "learning_rate": 1.756076223434203e-05, + "loss": 0.32807621359825134, + "step": 2003 + }, + { + "epoch": 0.5322002390120834, + "grad_norm": 0.9920629294688551, + "learning_rate": 1.7557887821795192e-05, + "loss": 0.3057190477848053, + "step": 2004 + }, + { + "epoch": 0.5324658079936263, + "grad_norm": 1.0234244423063892, + "learning_rate": 1.7555011952176716e-05, + "loss": 0.29419198632240295, + "step": 2005 + }, + { + "epoch": 0.5327313769751693, + "grad_norm": 0.9799120327217228, + "learning_rate": 1.755213462604104e-05, + "loss": 0.3232089877128601, + "step": 2006 + }, + { + "epoch": 0.5329969459567122, + "grad_norm": 1.0186576745896931, + "learning_rate": 1.7549255843942875e-05, + "loss": 0.29784274101257324, + "step": 2007 + }, + { + "epoch": 0.5332625149382552, + "grad_norm": 1.0470325382276877, + "learning_rate": 1.7546375606437216e-05, + "loss": 0.31421899795532227, + "step": 2008 + }, + { + "epoch": 0.5335280839197981, + "grad_norm": 1.0641694414781755, + "learning_rate": 1.7543493914079345e-05, + "loss": 0.30681121349334717, + "step": 2009 + }, + { + "epoch": 0.5337936529013412, + "grad_norm": 1.0092085906510277, + "learning_rate": 1.7540610767424813e-05, + "loss": 0.3114027976989746, + "step": 2010 + }, + { + "epoch": 0.5340592218828841, + "grad_norm": 1.0064230726553411, + "learning_rate": 1.753772616702946e-05, + "loss": 0.3030378520488739, + "step": 2011 + }, + { + "epoch": 0.5343247908644271, + "grad_norm": 1.1096181297712675, + "learning_rate": 1.75348401134494e-05, + "loss": 0.30272024869918823, + "step": 2012 + }, + { + "epoch": 0.53459035984597, + "grad_norm": 1.049795668852804, + "learning_rate": 1.7531952607241033e-05, + "loss": 0.35117241740226746, + "step": 2013 + }, + { + "epoch": 0.534855928827513, + "grad_norm": 1.2552056089457548, + "learning_rate": 1.7529063648961035e-05, + "loss": 0.297889769077301, + "step": 2014 + }, + { + "epoch": 0.5351214978090559, + "grad_norm": 1.1238332501182418, + "learning_rate": 1.752617323916636e-05, + "loss": 0.32858210802078247, + "step": 2015 + }, + { + "epoch": 0.5353870667905989, + "grad_norm": 1.117582559290418, + "learning_rate": 1.7523281378414246e-05, + "loss": 0.3095484673976898, + "step": 2016 + }, + { + "epoch": 0.5356526357721418, + "grad_norm": 1.1072331793921826, + "learning_rate": 1.752038806726222e-05, + "loss": 0.34490731358528137, + "step": 2017 + }, + { + "epoch": 0.5359182047536848, + "grad_norm": 1.1427367564985542, + "learning_rate": 1.751749330626806e-05, + "loss": 0.35144859552383423, + "step": 2018 + }, + { + "epoch": 0.5361837737352277, + "grad_norm": 1.0337528414474293, + "learning_rate": 1.751459709598985e-05, + "loss": 0.26337549090385437, + "step": 2019 + }, + { + "epoch": 0.5364493427167707, + "grad_norm": 1.0719958558069054, + "learning_rate": 1.7511699436985952e-05, + "loss": 0.3235297203063965, + "step": 2020 + }, + { + "epoch": 0.5367149116983136, + "grad_norm": 1.1655117185465573, + "learning_rate": 1.7508800329814993e-05, + "loss": 0.35195302963256836, + "step": 2021 + }, + { + "epoch": 0.5369804806798566, + "grad_norm": 1.0547432431007058, + "learning_rate": 1.7505899775035887e-05, + "loss": 0.3226467967033386, + "step": 2022 + }, + { + "epoch": 0.5372460496613995, + "grad_norm": 1.0406958245289468, + "learning_rate": 1.750299777320783e-05, + "loss": 0.30616605281829834, + "step": 2023 + }, + { + "epoch": 0.5375116186429425, + "grad_norm": 1.074902411593199, + "learning_rate": 1.7500094324890294e-05, + "loss": 0.3007400333881378, + "step": 2024 + }, + { + "epoch": 0.5377771876244855, + "grad_norm": 1.1883491645763606, + "learning_rate": 1.7497189430643025e-05, + "loss": 0.35409432649612427, + "step": 2025 + }, + { + "epoch": 0.5380427566060284, + "grad_norm": 1.6951314154408594, + "learning_rate": 1.7494283091026053e-05, + "loss": 0.33718281984329224, + "step": 2026 + }, + { + "epoch": 0.5383083255875714, + "grad_norm": 1.0940933435725269, + "learning_rate": 1.749137530659969e-05, + "loss": 0.3589650094509125, + "step": 2027 + }, + { + "epoch": 0.5385738945691143, + "grad_norm": 1.1114345705753812, + "learning_rate": 1.7488466077924525e-05, + "loss": 0.35314273834228516, + "step": 2028 + }, + { + "epoch": 0.5388394635506573, + "grad_norm": 1.017869922891923, + "learning_rate": 1.7485555405561412e-05, + "loss": 0.28393587470054626, + "step": 2029 + }, + { + "epoch": 0.5391050325322002, + "grad_norm": 1.0276825009259218, + "learning_rate": 1.7482643290071503e-05, + "loss": 0.3262496292591095, + "step": 2030 + }, + { + "epoch": 0.5393706015137432, + "grad_norm": 1.122887144479208, + "learning_rate": 1.7479729732016218e-05, + "loss": 0.3549670875072479, + "step": 2031 + }, + { + "epoch": 0.5396361704952861, + "grad_norm": 1.0211791251004596, + "learning_rate": 1.7476814731957253e-05, + "loss": 0.30668947100639343, + "step": 2032 + }, + { + "epoch": 0.5399017394768291, + "grad_norm": 0.9278865240006526, + "learning_rate": 1.747389829045659e-05, + "loss": 0.2942228317260742, + "step": 2033 + }, + { + "epoch": 0.540167308458372, + "grad_norm": 1.023956047651912, + "learning_rate": 1.7470980408076484e-05, + "loss": 0.3166583478450775, + "step": 2034 + }, + { + "epoch": 0.540432877439915, + "grad_norm": 1.1503051826481139, + "learning_rate": 1.7468061085379467e-05, + "loss": 0.35149675607681274, + "step": 2035 + }, + { + "epoch": 0.5406984464214579, + "grad_norm": 1.1081467050264138, + "learning_rate": 1.7465140322928353e-05, + "loss": 0.32645004987716675, + "step": 2036 + }, + { + "epoch": 0.5409640154030009, + "grad_norm": 1.1656339653416823, + "learning_rate": 1.7462218121286224e-05, + "loss": 0.3078027367591858, + "step": 2037 + }, + { + "epoch": 0.5412295843845439, + "grad_norm": 1.0310810248927436, + "learning_rate": 1.7459294481016452e-05, + "loss": 0.28726300597190857, + "step": 2038 + }, + { + "epoch": 0.5414951533660869, + "grad_norm": 1.028103971871598, + "learning_rate": 1.7456369402682675e-05, + "loss": 0.29330572485923767, + "step": 2039 + }, + { + "epoch": 0.5417607223476298, + "grad_norm": 1.176742297493161, + "learning_rate": 1.7453442886848818e-05, + "loss": 0.3151019215583801, + "step": 2040 + }, + { + "epoch": 0.5420262913291728, + "grad_norm": 1.0830810759861134, + "learning_rate": 1.745051493407908e-05, + "loss": 0.3267561197280884, + "step": 2041 + }, + { + "epoch": 0.5422918603107157, + "grad_norm": 1.0462822233377385, + "learning_rate": 1.7447585544937933e-05, + "loss": 0.2834410071372986, + "step": 2042 + }, + { + "epoch": 0.5425574292922587, + "grad_norm": 0.9922210453154783, + "learning_rate": 1.7444654719990128e-05, + "loss": 0.29896080493927, + "step": 2043 + }, + { + "epoch": 0.5428229982738016, + "grad_norm": 1.0716195406510356, + "learning_rate": 1.7441722459800695e-05, + "loss": 0.3084600865840912, + "step": 2044 + }, + { + "epoch": 0.5430885672553446, + "grad_norm": 1.100381998832612, + "learning_rate": 1.743878876493494e-05, + "loss": 0.3178163170814514, + "step": 2045 + }, + { + "epoch": 0.5433541362368876, + "grad_norm": 1.1512124937535644, + "learning_rate": 1.743585363595844e-05, + "loss": 0.32886385917663574, + "step": 2046 + }, + { + "epoch": 0.5436197052184305, + "grad_norm": 1.0499932799675828, + "learning_rate": 1.743291707343706e-05, + "loss": 0.31810784339904785, + "step": 2047 + }, + { + "epoch": 0.5438852741999735, + "grad_norm": 0.994229574171737, + "learning_rate": 1.7429979077936928e-05, + "loss": 0.3003198504447937, + "step": 2048 + }, + { + "epoch": 0.5441508431815164, + "grad_norm": 1.1622503660754158, + "learning_rate": 1.7427039650024462e-05, + "loss": 0.33889323472976685, + "step": 2049 + }, + { + "epoch": 0.5444164121630594, + "grad_norm": 1.062972427778211, + "learning_rate": 1.7424098790266343e-05, + "loss": 0.3238763213157654, + "step": 2050 + }, + { + "epoch": 0.5446819811446023, + "grad_norm": 1.3651581380225686, + "learning_rate": 1.742115649922954e-05, + "loss": 0.34304776787757874, + "step": 2051 + }, + { + "epoch": 0.5449475501261453, + "grad_norm": 1.1192647204238841, + "learning_rate": 1.741821277748128e-05, + "loss": 0.31528347730636597, + "step": 2052 + }, + { + "epoch": 0.5452131191076882, + "grad_norm": 1.0728286121769783, + "learning_rate": 1.7415267625589094e-05, + "loss": 0.2992726266384125, + "step": 2053 + }, + { + "epoch": 0.5454786880892312, + "grad_norm": 1.0217638219637288, + "learning_rate": 1.741232104412076e-05, + "loss": 0.31706419587135315, + "step": 2054 + }, + { + "epoch": 0.5457442570707741, + "grad_norm": 1.8373163603702176, + "learning_rate": 1.7409373033644355e-05, + "loss": 0.2887676954269409, + "step": 2055 + }, + { + "epoch": 0.5460098260523171, + "grad_norm": 1.1434290988558236, + "learning_rate": 1.740642359472821e-05, + "loss": 0.3410964906215668, + "step": 2056 + }, + { + "epoch": 0.54627539503386, + "grad_norm": 1.0501323660770627, + "learning_rate": 1.740347272794095e-05, + "loss": 0.3711693286895752, + "step": 2057 + }, + { + "epoch": 0.546540964015403, + "grad_norm": 1.10922453334831, + "learning_rate": 1.7400520433851457e-05, + "loss": 0.3512499928474426, + "step": 2058 + }, + { + "epoch": 0.5468065329969459, + "grad_norm": 1.0790222544341648, + "learning_rate": 1.739756671302891e-05, + "loss": 0.3136678636074066, + "step": 2059 + }, + { + "epoch": 0.5470721019784889, + "grad_norm": 1.0417668658369865, + "learning_rate": 1.7394611566042748e-05, + "loss": 0.2983730435371399, + "step": 2060 + }, + { + "epoch": 0.5473376709600318, + "grad_norm": 1.1233530419836393, + "learning_rate": 1.7391654993462686e-05, + "loss": 0.36603933572769165, + "step": 2061 + }, + { + "epoch": 0.5476032399415748, + "grad_norm": 1.1758952832381078, + "learning_rate": 1.7388696995858717e-05, + "loss": 0.3651789128780365, + "step": 2062 + }, + { + "epoch": 0.5478688089231177, + "grad_norm": 1.2065493864331982, + "learning_rate": 1.7385737573801108e-05, + "loss": 0.30580615997314453, + "step": 2063 + }, + { + "epoch": 0.5481343779046607, + "grad_norm": 0.981372496476623, + "learning_rate": 1.7382776727860406e-05, + "loss": 0.2630755305290222, + "step": 2064 + }, + { + "epoch": 0.5483999468862036, + "grad_norm": 1.0020540486713174, + "learning_rate": 1.7379814458607416e-05, + "loss": 0.2947537899017334, + "step": 2065 + }, + { + "epoch": 0.5486655158677467, + "grad_norm": 1.034048631807644, + "learning_rate": 1.737685076661324e-05, + "loss": 0.3119455873966217, + "step": 2066 + }, + { + "epoch": 0.5489310848492897, + "grad_norm": 1.052273536899897, + "learning_rate": 1.7373885652449237e-05, + "loss": 0.3162347972393036, + "step": 2067 + }, + { + "epoch": 0.5491966538308326, + "grad_norm": 1.2320011234530202, + "learning_rate": 1.7370919116687047e-05, + "loss": 0.34120452404022217, + "step": 2068 + }, + { + "epoch": 0.5494622228123756, + "grad_norm": 1.095244169583748, + "learning_rate": 1.7367951159898583e-05, + "loss": 0.3126780092716217, + "step": 2069 + }, + { + "epoch": 0.5497277917939185, + "grad_norm": 0.9591128480333501, + "learning_rate": 1.7364981782656033e-05, + "loss": 0.2833349406719208, + "step": 2070 + }, + { + "epoch": 0.5499933607754615, + "grad_norm": 1.0921809927618633, + "learning_rate": 1.7362010985531855e-05, + "loss": 0.31617453694343567, + "step": 2071 + }, + { + "epoch": 0.5502589297570044, + "grad_norm": 1.0809700153666713, + "learning_rate": 1.735903876909879e-05, + "loss": 0.31372442841529846, + "step": 2072 + }, + { + "epoch": 0.5505244987385474, + "grad_norm": 1.1616077591637106, + "learning_rate": 1.735606513392984e-05, + "loss": 0.3500489592552185, + "step": 2073 + }, + { + "epoch": 0.5507900677200903, + "grad_norm": 1.0373404262028456, + "learning_rate": 1.735309008059829e-05, + "loss": 0.3219031095504761, + "step": 2074 + }, + { + "epoch": 0.5510556367016333, + "grad_norm": 1.0701365395287485, + "learning_rate": 1.7350113609677694e-05, + "loss": 0.32419610023498535, + "step": 2075 + }, + { + "epoch": 0.5513212056831762, + "grad_norm": 1.1054492395059694, + "learning_rate": 1.7347135721741874e-05, + "loss": 0.34804612398147583, + "step": 2076 + }, + { + "epoch": 0.5515867746647192, + "grad_norm": 1.09814942010155, + "learning_rate": 1.7344156417364946e-05, + "loss": 0.33105939626693726, + "step": 2077 + }, + { + "epoch": 0.5518523436462621, + "grad_norm": 1.0139790776190714, + "learning_rate": 1.7341175697121273e-05, + "loss": 0.3426011800765991, + "step": 2078 + }, + { + "epoch": 0.5521179126278051, + "grad_norm": 1.1120942872149455, + "learning_rate": 1.7338193561585507e-05, + "loss": 0.33207643032073975, + "step": 2079 + }, + { + "epoch": 0.552383481609348, + "grad_norm": 0.9807946500665143, + "learning_rate": 1.7335210011332573e-05, + "loss": 0.31849467754364014, + "step": 2080 + }, + { + "epoch": 0.552649050590891, + "grad_norm": 1.081622565959563, + "learning_rate": 1.7332225046937655e-05, + "loss": 0.3549337685108185, + "step": 2081 + }, + { + "epoch": 0.5529146195724339, + "grad_norm": 0.9652343930669623, + "learning_rate": 1.7329238668976224e-05, + "loss": 0.2850857377052307, + "step": 2082 + }, + { + "epoch": 0.5531801885539769, + "grad_norm": 1.1370461672740964, + "learning_rate": 1.732625087802402e-05, + "loss": 0.3277609348297119, + "step": 2083 + }, + { + "epoch": 0.5534457575355198, + "grad_norm": 1.0712095451099939, + "learning_rate": 1.732326167465705e-05, + "loss": 0.2951444983482361, + "step": 2084 + }, + { + "epoch": 0.5537113265170628, + "grad_norm": 1.0893938459197319, + "learning_rate": 1.7320271059451597e-05, + "loss": 0.36634138226509094, + "step": 2085 + }, + { + "epoch": 0.5539768954986057, + "grad_norm": 1.060256238160636, + "learning_rate": 1.7317279032984222e-05, + "loss": 0.3407907783985138, + "step": 2086 + }, + { + "epoch": 0.5542424644801487, + "grad_norm": 1.0563310141876696, + "learning_rate": 1.7314285595831747e-05, + "loss": 0.34038978815078735, + "step": 2087 + }, + { + "epoch": 0.5545080334616916, + "grad_norm": 1.0558109709205228, + "learning_rate": 1.7311290748571273e-05, + "loss": 0.337898313999176, + "step": 2088 + }, + { + "epoch": 0.5547736024432346, + "grad_norm": 1.1543867929059073, + "learning_rate": 1.7308294491780175e-05, + "loss": 0.3250765800476074, + "step": 2089 + }, + { + "epoch": 0.5550391714247775, + "grad_norm": 1.101568217376945, + "learning_rate": 1.730529682603609e-05, + "loss": 0.31562721729278564, + "step": 2090 + }, + { + "epoch": 0.5553047404063205, + "grad_norm": 1.2678079753749867, + "learning_rate": 1.730229775191693e-05, + "loss": 0.32757896184921265, + "step": 2091 + }, + { + "epoch": 0.5555703093878634, + "grad_norm": 1.1010819086774664, + "learning_rate": 1.7299297270000894e-05, + "loss": 0.35861605405807495, + "step": 2092 + }, + { + "epoch": 0.5558358783694064, + "grad_norm": 1.0999873688088635, + "learning_rate": 1.7296295380866425e-05, + "loss": 0.3383220434188843, + "step": 2093 + }, + { + "epoch": 0.5561014473509495, + "grad_norm": 1.1431134206724336, + "learning_rate": 1.7293292085092263e-05, + "loss": 0.30144187808036804, + "step": 2094 + }, + { + "epoch": 0.5563670163324924, + "grad_norm": 1.0354659821546437, + "learning_rate": 1.72902873832574e-05, + "loss": 0.2626546323299408, + "step": 2095 + }, + { + "epoch": 0.5566325853140354, + "grad_norm": 1.0939710377386638, + "learning_rate": 1.7287281275941112e-05, + "loss": 0.3289363980293274, + "step": 2096 + }, + { + "epoch": 0.5568981542955783, + "grad_norm": 0.9797533003070389, + "learning_rate": 1.7284273763722943e-05, + "loss": 0.26631784439086914, + "step": 2097 + }, + { + "epoch": 0.5571637232771213, + "grad_norm": 1.0035421194069876, + "learning_rate": 1.7281264847182697e-05, + "loss": 0.3051939606666565, + "step": 2098 + }, + { + "epoch": 0.5574292922586642, + "grad_norm": 1.0515034870910809, + "learning_rate": 1.7278254526900468e-05, + "loss": 0.34456121921539307, + "step": 2099 + }, + { + "epoch": 0.5576948612402072, + "grad_norm": 1.2038994359149542, + "learning_rate": 1.72752428034566e-05, + "loss": 0.2747807502746582, + "step": 2100 + }, + { + "epoch": 0.5579604302217501, + "grad_norm": 2.186270123050143, + "learning_rate": 1.7272229677431723e-05, + "loss": 0.31111812591552734, + "step": 2101 + }, + { + "epoch": 0.5582259992032931, + "grad_norm": 1.0150701360001215, + "learning_rate": 1.7269215149406737e-05, + "loss": 0.29648226499557495, + "step": 2102 + }, + { + "epoch": 0.558491568184836, + "grad_norm": 0.9846402594569152, + "learning_rate": 1.72661992199628e-05, + "loss": 0.28303876519203186, + "step": 2103 + }, + { + "epoch": 0.558757137166379, + "grad_norm": 1.1069492435421613, + "learning_rate": 1.726318188968135e-05, + "loss": 0.30540165305137634, + "step": 2104 + }, + { + "epoch": 0.5590227061479219, + "grad_norm": 1.2177152582591586, + "learning_rate": 1.726016315914409e-05, + "loss": 0.31810393929481506, + "step": 2105 + }, + { + "epoch": 0.5592882751294649, + "grad_norm": 1.134577587954556, + "learning_rate": 1.7257143028933004e-05, + "loss": 0.33605068922042847, + "step": 2106 + }, + { + "epoch": 0.5595538441110078, + "grad_norm": 1.089019585879268, + "learning_rate": 1.725412149963033e-05, + "loss": 0.3340590298175812, + "step": 2107 + }, + { + "epoch": 0.5598194130925508, + "grad_norm": 0.9872121137775324, + "learning_rate": 1.7251098571818586e-05, + "loss": 0.29560500383377075, + "step": 2108 + }, + { + "epoch": 0.5600849820740937, + "grad_norm": 1.0964006197085026, + "learning_rate": 1.7248074246080555e-05, + "loss": 0.30100107192993164, + "step": 2109 + }, + { + "epoch": 0.5603505510556367, + "grad_norm": 1.1506338140671328, + "learning_rate": 1.7245048522999294e-05, + "loss": 0.35551172494888306, + "step": 2110 + }, + { + "epoch": 0.5606161200371796, + "grad_norm": 1.0513397818607815, + "learning_rate": 1.724202140315812e-05, + "loss": 0.3182663023471832, + "step": 2111 + }, + { + "epoch": 0.5608816890187226, + "grad_norm": 1.092960095111009, + "learning_rate": 1.723899288714064e-05, + "loss": 0.3160201609134674, + "step": 2112 + }, + { + "epoch": 0.5611472580002655, + "grad_norm": 1.0656744789709975, + "learning_rate": 1.72359629755307e-05, + "loss": 0.3126063942909241, + "step": 2113 + }, + { + "epoch": 0.5614128269818085, + "grad_norm": 1.0376603045942787, + "learning_rate": 1.723293166891244e-05, + "loss": 0.3222552239894867, + "step": 2114 + }, + { + "epoch": 0.5616783959633515, + "grad_norm": 1.1154320347150413, + "learning_rate": 1.722989896787026e-05, + "loss": 0.33601805567741394, + "step": 2115 + }, + { + "epoch": 0.5619439649448944, + "grad_norm": 1.0241046952841495, + "learning_rate": 1.722686487298883e-05, + "loss": 0.28679755330085754, + "step": 2116 + }, + { + "epoch": 0.5622095339264374, + "grad_norm": 0.9498185678215705, + "learning_rate": 1.722382938485308e-05, + "loss": 0.2895340323448181, + "step": 2117 + }, + { + "epoch": 0.5624751029079803, + "grad_norm": 1.3753225282493697, + "learning_rate": 1.7220792504048227e-05, + "loss": 0.310183048248291, + "step": 2118 + }, + { + "epoch": 0.5627406718895233, + "grad_norm": 0.9776305745351022, + "learning_rate": 1.7217754231159737e-05, + "loss": 0.2768586277961731, + "step": 2119 + }, + { + "epoch": 0.5630062408710662, + "grad_norm": 0.9838874956474448, + "learning_rate": 1.7214714566773358e-05, + "loss": 0.2785574793815613, + "step": 2120 + }, + { + "epoch": 0.5632718098526092, + "grad_norm": 1.1815363465765012, + "learning_rate": 1.72116735114751e-05, + "loss": 0.30544358491897583, + "step": 2121 + }, + { + "epoch": 0.5635373788341522, + "grad_norm": 1.0704755380783626, + "learning_rate": 1.7208631065851243e-05, + "loss": 0.31662559509277344, + "step": 2122 + }, + { + "epoch": 0.5638029478156952, + "grad_norm": 0.9893085866675072, + "learning_rate": 1.7205587230488335e-05, + "loss": 0.31466105580329895, + "step": 2123 + }, + { + "epoch": 0.5640685167972381, + "grad_norm": 1.1520731756820097, + "learning_rate": 1.720254200597319e-05, + "loss": 0.3471367359161377, + "step": 2124 + }, + { + "epoch": 0.5643340857787811, + "grad_norm": 1.056530578075146, + "learning_rate": 1.7199495392892892e-05, + "loss": 0.3325269818305969, + "step": 2125 + }, + { + "epoch": 0.564599654760324, + "grad_norm": 1.1040662937900534, + "learning_rate": 1.7196447391834797e-05, + "loss": 0.32423460483551025, + "step": 2126 + }, + { + "epoch": 0.564865223741867, + "grad_norm": 1.0403895710374138, + "learning_rate": 1.7193398003386514e-05, + "loss": 0.3083527088165283, + "step": 2127 + }, + { + "epoch": 0.5651307927234099, + "grad_norm": 1.1794029606730059, + "learning_rate": 1.7190347228135933e-05, + "loss": 0.3418716490268707, + "step": 2128 + }, + { + "epoch": 0.5653963617049529, + "grad_norm": 1.0509473075306943, + "learning_rate": 1.7187295066671214e-05, + "loss": 0.33037957549095154, + "step": 2129 + }, + { + "epoch": 0.5656619306864958, + "grad_norm": 1.229094630243538, + "learning_rate": 1.7184241519580767e-05, + "loss": 0.3383673131465912, + "step": 2130 + }, + { + "epoch": 0.5659274996680388, + "grad_norm": 0.9364933789266218, + "learning_rate": 1.718118658745329e-05, + "loss": 0.27756133675575256, + "step": 2131 + }, + { + "epoch": 0.5661930686495817, + "grad_norm": 1.1307081535546069, + "learning_rate": 1.717813027087773e-05, + "loss": 0.2987852692604065, + "step": 2132 + }, + { + "epoch": 0.5664586376311247, + "grad_norm": 1.0924971268375117, + "learning_rate": 1.717507257044331e-05, + "loss": 0.30016621947288513, + "step": 2133 + }, + { + "epoch": 0.5667242066126676, + "grad_norm": 1.0923612277165435, + "learning_rate": 1.7172013486739528e-05, + "loss": 0.31592345237731934, + "step": 2134 + }, + { + "epoch": 0.5669897755942106, + "grad_norm": 1.0932899901018698, + "learning_rate": 1.716895302035613e-05, + "loss": 0.3500048816204071, + "step": 2135 + }, + { + "epoch": 0.5672553445757536, + "grad_norm": 1.0529476139624208, + "learning_rate": 1.7165891171883134e-05, + "loss": 0.32069307565689087, + "step": 2136 + }, + { + "epoch": 0.5675209135572965, + "grad_norm": 1.10329279559138, + "learning_rate": 1.7162827941910837e-05, + "loss": 0.3100130558013916, + "step": 2137 + }, + { + "epoch": 0.5677864825388395, + "grad_norm": 1.080836142172887, + "learning_rate": 1.715976333102979e-05, + "loss": 0.3205985128879547, + "step": 2138 + }, + { + "epoch": 0.5680520515203824, + "grad_norm": 1.0861679281182697, + "learning_rate": 1.715669733983081e-05, + "loss": 0.3243224024772644, + "step": 2139 + }, + { + "epoch": 0.5683176205019254, + "grad_norm": 1.0818895017967487, + "learning_rate": 1.7153629968904997e-05, + "loss": 0.3278832733631134, + "step": 2140 + }, + { + "epoch": 0.5685831894834683, + "grad_norm": 0.9949896264020713, + "learning_rate": 1.7150561218843693e-05, + "loss": 0.29137033224105835, + "step": 2141 + }, + { + "epoch": 0.5688487584650113, + "grad_norm": 1.0470808838345107, + "learning_rate": 1.7147491090238516e-05, + "loss": 0.3065168857574463, + "step": 2142 + }, + { + "epoch": 0.5691143274465542, + "grad_norm": 1.0368441449557109, + "learning_rate": 1.7144419583681354e-05, + "loss": 0.3367912173271179, + "step": 2143 + }, + { + "epoch": 0.5693798964280972, + "grad_norm": 1.086220090850542, + "learning_rate": 1.7141346699764357e-05, + "loss": 0.32278239727020264, + "step": 2144 + }, + { + "epoch": 0.5696454654096401, + "grad_norm": 1.080765529331453, + "learning_rate": 1.713827243907994e-05, + "loss": 0.2887166440486908, + "step": 2145 + }, + { + "epoch": 0.5699110343911831, + "grad_norm": 1.1353258061614586, + "learning_rate": 1.713519680222079e-05, + "loss": 0.33214619755744934, + "step": 2146 + }, + { + "epoch": 0.570176603372726, + "grad_norm": 1.1145274058321384, + "learning_rate": 1.7132119789779846e-05, + "loss": 0.2865470051765442, + "step": 2147 + }, + { + "epoch": 0.570442172354269, + "grad_norm": 1.1145678631141913, + "learning_rate": 1.7129041402350317e-05, + "loss": 0.32746967673301697, + "step": 2148 + }, + { + "epoch": 0.5707077413358119, + "grad_norm": 1.0454330804264187, + "learning_rate": 1.712596164052569e-05, + "loss": 0.3029513359069824, + "step": 2149 + }, + { + "epoch": 0.570973310317355, + "grad_norm": 0.9779058393705973, + "learning_rate": 1.7122880504899698e-05, + "loss": 0.3052698075771332, + "step": 2150 + }, + { + "epoch": 0.5712388792988979, + "grad_norm": 1.055591157713499, + "learning_rate": 1.7119797996066355e-05, + "loss": 0.29221272468566895, + "step": 2151 + }, + { + "epoch": 0.5715044482804409, + "grad_norm": 1.0014263274293047, + "learning_rate": 1.711671411461993e-05, + "loss": 0.3165368139743805, + "step": 2152 + }, + { + "epoch": 0.5717700172619838, + "grad_norm": 1.0763149059705845, + "learning_rate": 1.7113628861154953e-05, + "loss": 0.30877187848091125, + "step": 2153 + }, + { + "epoch": 0.5720355862435268, + "grad_norm": 1.0826550246568385, + "learning_rate": 1.711054223626623e-05, + "loss": 0.2985781729221344, + "step": 2154 + }, + { + "epoch": 0.5723011552250697, + "grad_norm": 1.1063225967671673, + "learning_rate": 1.7107454240548825e-05, + "loss": 0.3449699878692627, + "step": 2155 + }, + { + "epoch": 0.5725667242066127, + "grad_norm": 1.0430022801820942, + "learning_rate": 1.7104364874598066e-05, + "loss": 0.3219606578350067, + "step": 2156 + }, + { + "epoch": 0.5728322931881557, + "grad_norm": 1.0017795464639185, + "learning_rate": 1.710127413900955e-05, + "loss": 0.3059350550174713, + "step": 2157 + }, + { + "epoch": 0.5730978621696986, + "grad_norm": 1.0027463566346577, + "learning_rate": 1.7098182034379132e-05, + "loss": 0.29461371898651123, + "step": 2158 + }, + { + "epoch": 0.5733634311512416, + "grad_norm": 1.0159484116581767, + "learning_rate": 1.709508856130293e-05, + "loss": 0.2998795509338379, + "step": 2159 + }, + { + "epoch": 0.5736290001327845, + "grad_norm": 1.0092216110834475, + "learning_rate": 1.7091993720377336e-05, + "loss": 0.28214582800865173, + "step": 2160 + }, + { + "epoch": 0.5738945691143275, + "grad_norm": 1.2106483053766084, + "learning_rate": 1.708889751219899e-05, + "loss": 0.3036864697933197, + "step": 2161 + }, + { + "epoch": 0.5741601380958704, + "grad_norm": 1.1139097359759478, + "learning_rate": 1.7085799937364815e-05, + "loss": 0.34146320819854736, + "step": 2162 + }, + { + "epoch": 0.5744257070774134, + "grad_norm": 1.0631963944232283, + "learning_rate": 1.708270099647198e-05, + "loss": 0.33996909856796265, + "step": 2163 + }, + { + "epoch": 0.5746912760589563, + "grad_norm": 1.0779467399705778, + "learning_rate": 1.7079600690117924e-05, + "loss": 0.3308744728565216, + "step": 2164 + }, + { + "epoch": 0.5749568450404993, + "grad_norm": 1.0447240453690412, + "learning_rate": 1.707649901890035e-05, + "loss": 0.2945587933063507, + "step": 2165 + }, + { + "epoch": 0.5752224140220422, + "grad_norm": 1.0321317558144223, + "learning_rate": 1.7073395983417227e-05, + "loss": 0.30348697304725647, + "step": 2166 + }, + { + "epoch": 0.5754879830035852, + "grad_norm": 1.025806147580304, + "learning_rate": 1.707029158426678e-05, + "loss": 0.28789055347442627, + "step": 2167 + }, + { + "epoch": 0.5757535519851281, + "grad_norm": 1.168965754707192, + "learning_rate": 1.7067185822047502e-05, + "loss": 0.3026643693447113, + "step": 2168 + }, + { + "epoch": 0.5760191209666711, + "grad_norm": 1.1108861255752682, + "learning_rate": 1.7064078697358147e-05, + "loss": 0.34021061658859253, + "step": 2169 + }, + { + "epoch": 0.576284689948214, + "grad_norm": 1.1062563353075296, + "learning_rate": 1.7060970210797735e-05, + "loss": 0.32793867588043213, + "step": 2170 + }, + { + "epoch": 0.576550258929757, + "grad_norm": 1.1692826638365306, + "learning_rate": 1.705786036296554e-05, + "loss": 0.36144691705703735, + "step": 2171 + }, + { + "epoch": 0.5768158279112999, + "grad_norm": 1.1177501875227254, + "learning_rate": 1.7054749154461105e-05, + "loss": 0.3630291223526001, + "step": 2172 + }, + { + "epoch": 0.5770813968928429, + "grad_norm": 1.144365708172633, + "learning_rate": 1.705163658588424e-05, + "loss": 0.34964969754219055, + "step": 2173 + }, + { + "epoch": 0.5773469658743858, + "grad_norm": 1.0298961015626151, + "learning_rate": 1.7048522657835004e-05, + "loss": 0.2877815067768097, + "step": 2174 + }, + { + "epoch": 0.5776125348559288, + "grad_norm": 1.1148926749607628, + "learning_rate": 1.7045407370913732e-05, + "loss": 0.3185664713382721, + "step": 2175 + }, + { + "epoch": 0.5778781038374717, + "grad_norm": 1.0393243287048395, + "learning_rate": 1.704229072572101e-05, + "loss": 0.3035257160663605, + "step": 2176 + }, + { + "epoch": 0.5781436728190147, + "grad_norm": 1.048139429574759, + "learning_rate": 1.7039172722857695e-05, + "loss": 0.325702965259552, + "step": 2177 + }, + { + "epoch": 0.5784092418005577, + "grad_norm": 1.1046410504333486, + "learning_rate": 1.7036053362924896e-05, + "loss": 0.32837462425231934, + "step": 2178 + }, + { + "epoch": 0.5786748107821007, + "grad_norm": 1.066094854816524, + "learning_rate": 1.703293264652399e-05, + "loss": 0.3430028259754181, + "step": 2179 + }, + { + "epoch": 0.5789403797636437, + "grad_norm": 1.1007701198247044, + "learning_rate": 1.702981057425662e-05, + "loss": 0.32792964577674866, + "step": 2180 + }, + { + "epoch": 0.5792059487451866, + "grad_norm": 0.9964902607677808, + "learning_rate": 1.7026687146724675e-05, + "loss": 0.3037140965461731, + "step": 2181 + }, + { + "epoch": 0.5794715177267296, + "grad_norm": 0.9962684392556416, + "learning_rate": 1.7023562364530322e-05, + "loss": 0.33083540201187134, + "step": 2182 + }, + { + "epoch": 0.5797370867082725, + "grad_norm": 0.9979777099745417, + "learning_rate": 1.702043622827598e-05, + "loss": 0.3108663260936737, + "step": 2183 + }, + { + "epoch": 0.5800026556898155, + "grad_norm": 0.9618495492417584, + "learning_rate": 1.7017308738564336e-05, + "loss": 0.2939792573451996, + "step": 2184 + }, + { + "epoch": 0.5802682246713584, + "grad_norm": 1.1315656989934186, + "learning_rate": 1.7014179895998322e-05, + "loss": 0.3686106503009796, + "step": 2185 + }, + { + "epoch": 0.5805337936529014, + "grad_norm": 1.0524191997810952, + "learning_rate": 1.7011049701181152e-05, + "loss": 0.3497159779071808, + "step": 2186 + }, + { + "epoch": 0.5807993626344443, + "grad_norm": 1.0989364128809138, + "learning_rate": 1.7007918154716286e-05, + "loss": 0.31730401515960693, + "step": 2187 + }, + { + "epoch": 0.5810649316159873, + "grad_norm": 1.0000330799865447, + "learning_rate": 1.7004785257207456e-05, + "loss": 0.3064701557159424, + "step": 2188 + }, + { + "epoch": 0.5813305005975302, + "grad_norm": 1.1111458283716926, + "learning_rate": 1.7001651009258635e-05, + "loss": 0.37174129486083984, + "step": 2189 + }, + { + "epoch": 0.5815960695790732, + "grad_norm": 1.068050904458805, + "learning_rate": 1.699851541147408e-05, + "loss": 0.3548140823841095, + "step": 2190 + }, + { + "epoch": 0.5818616385606161, + "grad_norm": 1.2340650081251097, + "learning_rate": 1.6995378464458292e-05, + "loss": 0.3486049473285675, + "step": 2191 + }, + { + "epoch": 0.5821272075421591, + "grad_norm": 1.996025853729682, + "learning_rate": 1.6992240168816037e-05, + "loss": 0.3083210587501526, + "step": 2192 + }, + { + "epoch": 0.582392776523702, + "grad_norm": 1.0284637251594817, + "learning_rate": 1.6989100525152346e-05, + "loss": 0.3006829619407654, + "step": 2193 + }, + { + "epoch": 0.582658345505245, + "grad_norm": 1.103386023825705, + "learning_rate": 1.6985959534072502e-05, + "loss": 0.32856425642967224, + "step": 2194 + }, + { + "epoch": 0.5829239144867879, + "grad_norm": 1.1293873964177752, + "learning_rate": 1.6982817196182052e-05, + "loss": 0.3382526934146881, + "step": 2195 + }, + { + "epoch": 0.5831894834683309, + "grad_norm": 1.0326113865244562, + "learning_rate": 1.69796735120868e-05, + "loss": 0.3311583399772644, + "step": 2196 + }, + { + "epoch": 0.5834550524498738, + "grad_norm": 1.0267321140886136, + "learning_rate": 1.6976528482392815e-05, + "loss": 0.312778115272522, + "step": 2197 + }, + { + "epoch": 0.5837206214314168, + "grad_norm": 1.0148067463802801, + "learning_rate": 1.697338210770642e-05, + "loss": 0.2996736466884613, + "step": 2198 + }, + { + "epoch": 0.5839861904129597, + "grad_norm": 1.1885772355333009, + "learning_rate": 1.6970234388634192e-05, + "loss": 0.344571590423584, + "step": 2199 + }, + { + "epoch": 0.5842517593945027, + "grad_norm": 0.9183671512098872, + "learning_rate": 1.6967085325782984e-05, + "loss": 0.25299468636512756, + "step": 2200 + }, + { + "epoch": 0.5845173283760456, + "grad_norm": 1.042142544774348, + "learning_rate": 1.6963934919759896e-05, + "loss": 0.3080691695213318, + "step": 2201 + }, + { + "epoch": 0.5847828973575886, + "grad_norm": 1.0216299822000434, + "learning_rate": 1.6960783171172286e-05, + "loss": 0.27491697669029236, + "step": 2202 + }, + { + "epoch": 0.5850484663391315, + "grad_norm": 1.1629234714983534, + "learning_rate": 1.6957630080627772e-05, + "loss": 0.3422500193119049, + "step": 2203 + }, + { + "epoch": 0.5853140353206745, + "grad_norm": 1.0832524871656921, + "learning_rate": 1.695447564873424e-05, + "loss": 0.27703234553337097, + "step": 2204 + }, + { + "epoch": 0.5855796043022174, + "grad_norm": 1.0275000328668338, + "learning_rate": 1.6951319876099825e-05, + "loss": 0.3088543117046356, + "step": 2205 + }, + { + "epoch": 0.5858451732837605, + "grad_norm": 1.0671359142705343, + "learning_rate": 1.694816276333292e-05, + "loss": 0.29875609278678894, + "step": 2206 + }, + { + "epoch": 0.5861107422653035, + "grad_norm": 1.0185982306074886, + "learning_rate": 1.6945004311042176e-05, + "loss": 0.30804386734962463, + "step": 2207 + }, + { + "epoch": 0.5863763112468464, + "grad_norm": 1.081134235929082, + "learning_rate": 1.694184451983651e-05, + "loss": 0.3324572741985321, + "step": 2208 + }, + { + "epoch": 0.5866418802283894, + "grad_norm": 1.0822730402391103, + "learning_rate": 1.6938683390325096e-05, + "loss": 0.30302488803863525, + "step": 2209 + }, + { + "epoch": 0.5869074492099323, + "grad_norm": 1.1499037543983048, + "learning_rate": 1.6935520923117355e-05, + "loss": 0.3264358341693878, + "step": 2210 + }, + { + "epoch": 0.5871730181914753, + "grad_norm": 1.1305858167915457, + "learning_rate": 1.693235711882298e-05, + "loss": 0.3172164261341095, + "step": 2211 + }, + { + "epoch": 0.5874385871730182, + "grad_norm": 0.9910314790510931, + "learning_rate": 1.6929191978051908e-05, + "loss": 0.300851047039032, + "step": 2212 + }, + { + "epoch": 0.5877041561545612, + "grad_norm": 1.1122516205102002, + "learning_rate": 1.6926025501414352e-05, + "loss": 0.2887764871120453, + "step": 2213 + }, + { + "epoch": 0.5879697251361041, + "grad_norm": 1.0991421920944897, + "learning_rate": 1.692285768952076e-05, + "loss": 0.3246796727180481, + "step": 2214 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1069795382063548, + "learning_rate": 1.6919688542981852e-05, + "loss": 0.30595412850379944, + "step": 2215 + }, + { + "epoch": 0.58850086309919, + "grad_norm": 1.068918741300791, + "learning_rate": 1.6916518062408604e-05, + "loss": 0.2885501980781555, + "step": 2216 + }, + { + "epoch": 0.588766432080733, + "grad_norm": 1.066918066226772, + "learning_rate": 1.6913346248412245e-05, + "loss": 0.34449082612991333, + "step": 2217 + }, + { + "epoch": 0.5890320010622759, + "grad_norm": 1.0585511422631098, + "learning_rate": 1.6910173101604267e-05, + "loss": 0.29410409927368164, + "step": 2218 + }, + { + "epoch": 0.5892975700438189, + "grad_norm": 1.1710793080996782, + "learning_rate": 1.690699862259641e-05, + "loss": 0.3250378370285034, + "step": 2219 + }, + { + "epoch": 0.5895631390253618, + "grad_norm": 1.3327292763951073, + "learning_rate": 1.690382281200068e-05, + "loss": 0.34420648217201233, + "step": 2220 + }, + { + "epoch": 0.5898287080069048, + "grad_norm": 1.1196949637967406, + "learning_rate": 1.6900645670429338e-05, + "loss": 0.33951860666275024, + "step": 2221 + }, + { + "epoch": 0.5900942769884477, + "grad_norm": 1.064177847952839, + "learning_rate": 1.6897467198494892e-05, + "loss": 0.35045644640922546, + "step": 2222 + }, + { + "epoch": 0.5903598459699907, + "grad_norm": 1.0378256375427404, + "learning_rate": 1.689428739681012e-05, + "loss": 0.3262789845466614, + "step": 2223 + }, + { + "epoch": 0.5906254149515336, + "grad_norm": 1.0662878016953237, + "learning_rate": 1.689110626598805e-05, + "loss": 0.2959234118461609, + "step": 2224 + }, + { + "epoch": 0.5908909839330766, + "grad_norm": 1.040953230887288, + "learning_rate": 1.6887923806641965e-05, + "loss": 0.3185187876224518, + "step": 2225 + }, + { + "epoch": 0.5911565529146195, + "grad_norm": 0.9754385668000993, + "learning_rate": 1.6884740019385403e-05, + "loss": 0.2861860692501068, + "step": 2226 + }, + { + "epoch": 0.5914221218961625, + "grad_norm": 1.0067160421449919, + "learning_rate": 1.6881554904832163e-05, + "loss": 0.28718897700309753, + "step": 2227 + }, + { + "epoch": 0.5916876908777055, + "grad_norm": 1.0412433017248806, + "learning_rate": 1.68783684635963e-05, + "loss": 0.2919235825538635, + "step": 2228 + }, + { + "epoch": 0.5919532598592484, + "grad_norm": 0.9981457951279066, + "learning_rate": 1.687518069629212e-05, + "loss": 0.29265689849853516, + "step": 2229 + }, + { + "epoch": 0.5922188288407914, + "grad_norm": 1.105624159979672, + "learning_rate": 1.6871991603534183e-05, + "loss": 0.3257937431335449, + "step": 2230 + }, + { + "epoch": 0.5924843978223343, + "grad_norm": 0.9776528734928177, + "learning_rate": 1.6868801185937318e-05, + "loss": 0.30709922313690186, + "step": 2231 + }, + { + "epoch": 0.5927499668038773, + "grad_norm": 1.0470693079191735, + "learning_rate": 1.6865609444116594e-05, + "loss": 0.34016695618629456, + "step": 2232 + }, + { + "epoch": 0.5930155357854202, + "grad_norm": 3.119158292180646, + "learning_rate": 1.686241637868734e-05, + "loss": 0.27988332509994507, + "step": 2233 + }, + { + "epoch": 0.5932811047669632, + "grad_norm": 1.0478488923431404, + "learning_rate": 1.685922199026514e-05, + "loss": 0.33241748809814453, + "step": 2234 + }, + { + "epoch": 0.5935466737485062, + "grad_norm": 1.131470783603603, + "learning_rate": 1.685602627946584e-05, + "loss": 0.29636645317077637, + "step": 2235 + }, + { + "epoch": 0.5938122427300492, + "grad_norm": 1.0270882549188534, + "learning_rate": 1.6852829246905532e-05, + "loss": 0.32173705101013184, + "step": 2236 + }, + { + "epoch": 0.5940778117115921, + "grad_norm": 1.0825392737706068, + "learning_rate": 1.6849630893200567e-05, + "loss": 0.318726122379303, + "step": 2237 + }, + { + "epoch": 0.5943433806931351, + "grad_norm": 1.0382165285294276, + "learning_rate": 1.684643121896755e-05, + "loss": 0.3085494339466095, + "step": 2238 + }, + { + "epoch": 0.594608949674678, + "grad_norm": 1.0527313536489507, + "learning_rate": 1.684323022482334e-05, + "loss": 0.3402160406112671, + "step": 2239 + }, + { + "epoch": 0.594874518656221, + "grad_norm": 1.0380085019224927, + "learning_rate": 1.684002791138505e-05, + "loss": 0.28099578619003296, + "step": 2240 + }, + { + "epoch": 0.5951400876377639, + "grad_norm": 1.0821564922133853, + "learning_rate": 1.6836824279270053e-05, + "loss": 0.3049670159816742, + "step": 2241 + }, + { + "epoch": 0.5954056566193069, + "grad_norm": 1.0644252940512267, + "learning_rate": 1.6833619329095966e-05, + "loss": 0.2999834716320038, + "step": 2242 + }, + { + "epoch": 0.5956712256008498, + "grad_norm": 1.0828247808996563, + "learning_rate": 1.6830413061480663e-05, + "loss": 0.2976648509502411, + "step": 2243 + }, + { + "epoch": 0.5959367945823928, + "grad_norm": 0.9516700397999099, + "learning_rate": 1.6827205477042282e-05, + "loss": 0.2937200963497162, + "step": 2244 + }, + { + "epoch": 0.5962023635639357, + "grad_norm": 0.9800041770842799, + "learning_rate": 1.6823996576399208e-05, + "loss": 0.27944231033325195, + "step": 2245 + }, + { + "epoch": 0.5964679325454787, + "grad_norm": 1.2497901059935828, + "learning_rate": 1.6820786360170073e-05, + "loss": 0.37821248173713684, + "step": 2246 + }, + { + "epoch": 0.5967335015270216, + "grad_norm": 1.0764913922139379, + "learning_rate": 1.681757482897377e-05, + "loss": 0.31929296255111694, + "step": 2247 + }, + { + "epoch": 0.5969990705085646, + "grad_norm": 1.0997353700477965, + "learning_rate": 1.6814361983429446e-05, + "loss": 0.29905542731285095, + "step": 2248 + }, + { + "epoch": 0.5972646394901076, + "grad_norm": 1.1012066663218303, + "learning_rate": 1.6811147824156503e-05, + "loss": 0.31056714057922363, + "step": 2249 + }, + { + "epoch": 0.5975302084716505, + "grad_norm": 1.0740873036211436, + "learning_rate": 1.6807932351774585e-05, + "loss": 0.3311445415019989, + "step": 2250 + }, + { + "epoch": 0.5977957774531935, + "grad_norm": 0.9539008733822649, + "learning_rate": 1.6804715566903603e-05, + "loss": 0.28413334488868713, + "step": 2251 + }, + { + "epoch": 0.5980613464347364, + "grad_norm": 1.068533794622215, + "learning_rate": 1.6801497470163717e-05, + "loss": 0.27681154012680054, + "step": 2252 + }, + { + "epoch": 0.5983269154162794, + "grad_norm": 1.0654200190327086, + "learning_rate": 1.679827806217533e-05, + "loss": 0.290216863155365, + "step": 2253 + }, + { + "epoch": 0.5985924843978223, + "grad_norm": 1.1041469834048565, + "learning_rate": 1.6795057343559115e-05, + "loss": 0.31263259053230286, + "step": 2254 + }, + { + "epoch": 0.5988580533793653, + "grad_norm": 1.126601485756597, + "learning_rate": 1.6791835314935984e-05, + "loss": 0.31527474522590637, + "step": 2255 + }, + { + "epoch": 0.5991236223609082, + "grad_norm": 1.078203294441185, + "learning_rate": 1.6788611976927104e-05, + "loss": 0.308803915977478, + "step": 2256 + }, + { + "epoch": 0.5993891913424512, + "grad_norm": 1.0503773076355036, + "learning_rate": 1.6785387330153898e-05, + "loss": 0.3038686215877533, + "step": 2257 + }, + { + "epoch": 0.5996547603239941, + "grad_norm": 1.0216209005739547, + "learning_rate": 1.6782161375238045e-05, + "loss": 0.32485973834991455, + "step": 2258 + }, + { + "epoch": 0.5999203293055371, + "grad_norm": 1.182450532742011, + "learning_rate": 1.6778934112801467e-05, + "loss": 0.32350587844848633, + "step": 2259 + }, + { + "epoch": 0.60018589828708, + "grad_norm": 1.0888151703509321, + "learning_rate": 1.6775705543466337e-05, + "loss": 0.31593745946884155, + "step": 2260 + }, + { + "epoch": 0.600451467268623, + "grad_norm": 1.0882766479814592, + "learning_rate": 1.6772475667855098e-05, + "loss": 0.3266843855381012, + "step": 2261 + }, + { + "epoch": 0.6007170362501659, + "grad_norm": 1.1815872316974045, + "learning_rate": 1.676924448659042e-05, + "loss": 0.3334394693374634, + "step": 2262 + }, + { + "epoch": 0.600982605231709, + "grad_norm": 1.1019346354795203, + "learning_rate": 1.676601200029524e-05, + "loss": 0.29688704013824463, + "step": 2263 + }, + { + "epoch": 0.6012481742132519, + "grad_norm": 1.0675092497220116, + "learning_rate": 1.6762778209592744e-05, + "loss": 0.3163599967956543, + "step": 2264 + }, + { + "epoch": 0.6015137431947949, + "grad_norm": 3.310146638883422, + "learning_rate": 1.675954311510637e-05, + "loss": 0.3001909554004669, + "step": 2265 + }, + { + "epoch": 0.6017793121763378, + "grad_norm": 1.052342150287052, + "learning_rate": 1.6756306717459804e-05, + "loss": 0.306442528963089, + "step": 2266 + }, + { + "epoch": 0.6020448811578808, + "grad_norm": 1.0462245388504205, + "learning_rate": 1.6753069017276988e-05, + "loss": 0.32714736461639404, + "step": 2267 + }, + { + "epoch": 0.6023104501394237, + "grad_norm": 1.1462408299032063, + "learning_rate": 1.6749830015182106e-05, + "loss": 0.3276352286338806, + "step": 2268 + }, + { + "epoch": 0.6025760191209667, + "grad_norm": 1.196238497855594, + "learning_rate": 1.6746589711799607e-05, + "loss": 0.3151017427444458, + "step": 2269 + }, + { + "epoch": 0.6028415881025097, + "grad_norm": 1.0342963680315473, + "learning_rate": 1.674334810775418e-05, + "loss": 0.30252715945243835, + "step": 2270 + }, + { + "epoch": 0.6031071570840526, + "grad_norm": 1.013150034994447, + "learning_rate": 1.674010520367077e-05, + "loss": 0.28994205594062805, + "step": 2271 + }, + { + "epoch": 0.6033727260655956, + "grad_norm": 1.060884408167446, + "learning_rate": 1.6736861000174566e-05, + "loss": 0.31821542978286743, + "step": 2272 + }, + { + "epoch": 0.6036382950471385, + "grad_norm": 1.0745731746159097, + "learning_rate": 1.6733615497891018e-05, + "loss": 0.33488404750823975, + "step": 2273 + }, + { + "epoch": 0.6039038640286815, + "grad_norm": 1.1687722013665731, + "learning_rate": 1.6730368697445815e-05, + "loss": 0.32545825839042664, + "step": 2274 + }, + { + "epoch": 0.6041694330102244, + "grad_norm": 1.0959659967153625, + "learning_rate": 1.6727120599464904e-05, + "loss": 0.3229105770587921, + "step": 2275 + }, + { + "epoch": 0.6044350019917674, + "grad_norm": 1.0190980223229251, + "learning_rate": 1.672387120457448e-05, + "loss": 0.29090648889541626, + "step": 2276 + }, + { + "epoch": 0.6047005709733103, + "grad_norm": 1.0135966931724694, + "learning_rate": 1.6720620513400993e-05, + "loss": 0.3102695345878601, + "step": 2277 + }, + { + "epoch": 0.6049661399548533, + "grad_norm": 0.9853472262099896, + "learning_rate": 1.6717368526571133e-05, + "loss": 0.3104533851146698, + "step": 2278 + }, + { + "epoch": 0.6052317089363962, + "grad_norm": 1.0624907138843722, + "learning_rate": 1.671411524471184e-05, + "loss": 0.3340798616409302, + "step": 2279 + }, + { + "epoch": 0.6054972779179392, + "grad_norm": 0.9362556276145145, + "learning_rate": 1.6710860668450318e-05, + "loss": 0.2807982563972473, + "step": 2280 + }, + { + "epoch": 0.6057628468994821, + "grad_norm": 1.0604829312359818, + "learning_rate": 1.6707604798414005e-05, + "loss": 0.28892064094543457, + "step": 2281 + }, + { + "epoch": 0.6060284158810251, + "grad_norm": 1.1005771261022437, + "learning_rate": 1.6704347635230594e-05, + "loss": 0.29660698771476746, + "step": 2282 + }, + { + "epoch": 0.606293984862568, + "grad_norm": 1.0826898129560842, + "learning_rate": 1.6701089179528032e-05, + "loss": 0.32079893350601196, + "step": 2283 + }, + { + "epoch": 0.606559553844111, + "grad_norm": 1.0711524337358722, + "learning_rate": 1.6697829431934508e-05, + "loss": 0.3464012145996094, + "step": 2284 + }, + { + "epoch": 0.6068251228256539, + "grad_norm": 1.113831391037599, + "learning_rate": 1.669456839307846e-05, + "loss": 0.3378494381904602, + "step": 2285 + }, + { + "epoch": 0.6070906918071969, + "grad_norm": 1.1314381443012484, + "learning_rate": 1.6691306063588583e-05, + "loss": 0.2856704294681549, + "step": 2286 + }, + { + "epoch": 0.6073562607887398, + "grad_norm": 1.117095467957477, + "learning_rate": 1.6688042444093816e-05, + "loss": 0.317970871925354, + "step": 2287 + }, + { + "epoch": 0.6076218297702828, + "grad_norm": 0.9765740214705895, + "learning_rate": 1.6684777535223338e-05, + "loss": 0.3067381978034973, + "step": 2288 + }, + { + "epoch": 0.6078873987518257, + "grad_norm": 0.9795122588790717, + "learning_rate": 1.6681511337606594e-05, + "loss": 0.28682243824005127, + "step": 2289 + }, + { + "epoch": 0.6081529677333687, + "grad_norm": 1.0967806384391572, + "learning_rate": 1.667824385187327e-05, + "loss": 0.30516478419303894, + "step": 2290 + }, + { + "epoch": 0.6084185367149118, + "grad_norm": 1.2090889717256932, + "learning_rate": 1.6674975078653284e-05, + "loss": 0.3114034831523895, + "step": 2291 + }, + { + "epoch": 0.6086841056964547, + "grad_norm": 1.045779035897072, + "learning_rate": 1.6671705018576837e-05, + "loss": 0.3119916617870331, + "step": 2292 + }, + { + "epoch": 0.6089496746779977, + "grad_norm": 1.0110290976394836, + "learning_rate": 1.666843367227434e-05, + "loss": 0.2695278823375702, + "step": 2293 + }, + { + "epoch": 0.6092152436595406, + "grad_norm": 1.1042693591067085, + "learning_rate": 1.6665161040376483e-05, + "loss": 0.32162508368492126, + "step": 2294 + }, + { + "epoch": 0.6094808126410836, + "grad_norm": 1.1533266295102853, + "learning_rate": 1.6661887123514183e-05, + "loss": 0.3115222752094269, + "step": 2295 + }, + { + "epoch": 0.6097463816226265, + "grad_norm": 1.1903173397636237, + "learning_rate": 1.6658611922318618e-05, + "loss": 0.3239362835884094, + "step": 2296 + }, + { + "epoch": 0.6100119506041695, + "grad_norm": 1.0224008240467277, + "learning_rate": 1.66553354374212e-05, + "loss": 0.29716256260871887, + "step": 2297 + }, + { + "epoch": 0.6102775195857124, + "grad_norm": 1.1579823586849616, + "learning_rate": 1.6652057669453606e-05, + "loss": 0.3337557911872864, + "step": 2298 + }, + { + "epoch": 0.6105430885672554, + "grad_norm": 1.0726602627394455, + "learning_rate": 1.6648778619047747e-05, + "loss": 0.30258649587631226, + "step": 2299 + }, + { + "epoch": 0.6108086575487983, + "grad_norm": 1.0836532202857172, + "learning_rate": 1.6645498286835784e-05, + "loss": 0.3151426315307617, + "step": 2300 + }, + { + "epoch": 0.6110742265303413, + "grad_norm": 0.9639622977001232, + "learning_rate": 1.664221667345013e-05, + "loss": 0.274954617023468, + "step": 2301 + }, + { + "epoch": 0.6113397955118842, + "grad_norm": 1.0454921478368049, + "learning_rate": 1.6638933779523437e-05, + "loss": 0.3055363893508911, + "step": 2302 + }, + { + "epoch": 0.6116053644934272, + "grad_norm": 1.0132221767482874, + "learning_rate": 1.663564960568861e-05, + "loss": 0.30296921730041504, + "step": 2303 + }, + { + "epoch": 0.6118709334749701, + "grad_norm": 1.0766188111034134, + "learning_rate": 1.66323641525788e-05, + "loss": 0.3118343651294708, + "step": 2304 + }, + { + "epoch": 0.6121365024565131, + "grad_norm": 1.164685781665666, + "learning_rate": 1.6629077420827405e-05, + "loss": 0.3277447819709778, + "step": 2305 + }, + { + "epoch": 0.612402071438056, + "grad_norm": 1.11996036014055, + "learning_rate": 1.6625789411068063e-05, + "loss": 0.307643860578537, + "step": 2306 + }, + { + "epoch": 0.612667640419599, + "grad_norm": 1.0752891079202938, + "learning_rate": 1.6622500123934665e-05, + "loss": 0.3043777346611023, + "step": 2307 + }, + { + "epoch": 0.6129332094011419, + "grad_norm": 1.1229566611504027, + "learning_rate": 1.6619209560061352e-05, + "loss": 0.28634852170944214, + "step": 2308 + }, + { + "epoch": 0.6131987783826849, + "grad_norm": 1.1746890844036781, + "learning_rate": 1.6615917720082503e-05, + "loss": 0.33200016617774963, + "step": 2309 + }, + { + "epoch": 0.6134643473642278, + "grad_norm": 1.0620493011215435, + "learning_rate": 1.661262460463274e-05, + "loss": 0.26568055152893066, + "step": 2310 + }, + { + "epoch": 0.6137299163457708, + "grad_norm": 1.0408157138123326, + "learning_rate": 1.6609330214346945e-05, + "loss": 0.2772855758666992, + "step": 2311 + }, + { + "epoch": 0.6139954853273137, + "grad_norm": 1.2060076126932109, + "learning_rate": 1.6606034549860236e-05, + "loss": 0.3330409824848175, + "step": 2312 + }, + { + "epoch": 0.6142610543088567, + "grad_norm": 1.0235644562455184, + "learning_rate": 1.6602737611807975e-05, + "loss": 0.27702978253364563, + "step": 2313 + }, + { + "epoch": 0.6145266232903996, + "grad_norm": 1.1266755606893777, + "learning_rate": 1.6599439400825775e-05, + "loss": 0.29985183477401733, + "step": 2314 + }, + { + "epoch": 0.6147921922719426, + "grad_norm": 1.0266522277907775, + "learning_rate": 1.659613991754949e-05, + "loss": 0.2666100859642029, + "step": 2315 + }, + { + "epoch": 0.6150577612534855, + "grad_norm": 1.0676553477298287, + "learning_rate": 1.6592839162615223e-05, + "loss": 0.2968613803386688, + "step": 2316 + }, + { + "epoch": 0.6153233302350285, + "grad_norm": 1.26155090118547, + "learning_rate": 1.6589537136659326e-05, + "loss": 0.2693714499473572, + "step": 2317 + }, + { + "epoch": 0.6155888992165715, + "grad_norm": 1.1411779960646509, + "learning_rate": 1.658623384031838e-05, + "loss": 0.3192713260650635, + "step": 2318 + }, + { + "epoch": 0.6158544681981145, + "grad_norm": 1.099028639770974, + "learning_rate": 1.658292927422923e-05, + "loss": 0.2958469092845917, + "step": 2319 + }, + { + "epoch": 0.6161200371796575, + "grad_norm": 1.0613129939040433, + "learning_rate": 1.657962343902895e-05, + "loss": 0.28580743074417114, + "step": 2320 + }, + { + "epoch": 0.6163856061612004, + "grad_norm": 1.2105545865052383, + "learning_rate": 1.6576316335354875e-05, + "loss": 0.34325680136680603, + "step": 2321 + }, + { + "epoch": 0.6166511751427434, + "grad_norm": 1.076014963599046, + "learning_rate": 1.657300796384457e-05, + "loss": 0.3220894932746887, + "step": 2322 + }, + { + "epoch": 0.6169167441242863, + "grad_norm": 1.003861259990267, + "learning_rate": 1.656969832513585e-05, + "loss": 0.2934642434120178, + "step": 2323 + }, + { + "epoch": 0.6171823131058293, + "grad_norm": 1.0182182491222724, + "learning_rate": 1.656638741986677e-05, + "loss": 0.3066999912261963, + "step": 2324 + }, + { + "epoch": 0.6174478820873722, + "grad_norm": 1.0780285957414313, + "learning_rate": 1.6563075248675645e-05, + "loss": 0.2947896122932434, + "step": 2325 + }, + { + "epoch": 0.6177134510689152, + "grad_norm": 1.1567241875430703, + "learning_rate": 1.6559761812201018e-05, + "loss": 0.33616161346435547, + "step": 2326 + }, + { + "epoch": 0.6179790200504581, + "grad_norm": 1.0754490235924812, + "learning_rate": 1.6556447111081678e-05, + "loss": 0.29555875062942505, + "step": 2327 + }, + { + "epoch": 0.6182445890320011, + "grad_norm": 1.0070791342344025, + "learning_rate": 1.655313114595666e-05, + "loss": 0.276498019695282, + "step": 2328 + }, + { + "epoch": 0.618510158013544, + "grad_norm": 1.0894248364537533, + "learning_rate": 1.6549813917465242e-05, + "loss": 0.3081165552139282, + "step": 2329 + }, + { + "epoch": 0.618775726995087, + "grad_norm": 1.2153046006588315, + "learning_rate": 1.654649542624695e-05, + "loss": 0.3610053062438965, + "step": 2330 + }, + { + "epoch": 0.6190412959766299, + "grad_norm": 1.0676492266011808, + "learning_rate": 1.654317567294155e-05, + "loss": 0.2775106430053711, + "step": 2331 + }, + { + "epoch": 0.6193068649581729, + "grad_norm": 4.371469554540211, + "learning_rate": 1.653985465818905e-05, + "loss": 0.2915893793106079, + "step": 2332 + }, + { + "epoch": 0.6195724339397158, + "grad_norm": 1.0032536414224313, + "learning_rate": 1.6536532382629696e-05, + "loss": 0.30868977308273315, + "step": 2333 + }, + { + "epoch": 0.6198380029212588, + "grad_norm": 1.1011191125099704, + "learning_rate": 1.6533208846903996e-05, + "loss": 0.3083038330078125, + "step": 2334 + }, + { + "epoch": 0.6201035719028017, + "grad_norm": 0.9895882037041855, + "learning_rate": 1.652988405165268e-05, + "loss": 0.25192466378211975, + "step": 2335 + }, + { + "epoch": 0.6203691408843447, + "grad_norm": 1.1020677364796136, + "learning_rate": 1.6526557997516737e-05, + "loss": 0.32154130935668945, + "step": 2336 + }, + { + "epoch": 0.6206347098658876, + "grad_norm": 1.1174587266065723, + "learning_rate": 1.6523230685137382e-05, + "loss": 0.2860945165157318, + "step": 2337 + }, + { + "epoch": 0.6209002788474306, + "grad_norm": 1.1647384960602913, + "learning_rate": 1.6519902115156084e-05, + "loss": 0.3279789984226227, + "step": 2338 + }, + { + "epoch": 0.6211658478289735, + "grad_norm": 1.062678685453679, + "learning_rate": 1.6516572288214555e-05, + "loss": 0.3082200884819031, + "step": 2339 + }, + { + "epoch": 0.6214314168105165, + "grad_norm": 1.1253285275737313, + "learning_rate": 1.6513241204954745e-05, + "loss": 0.29032304883003235, + "step": 2340 + }, + { + "epoch": 0.6216969857920595, + "grad_norm": 1.004918906125766, + "learning_rate": 1.6509908866018843e-05, + "loss": 0.3096848130226135, + "step": 2341 + }, + { + "epoch": 0.6219625547736024, + "grad_norm": 1.021047856460921, + "learning_rate": 1.6506575272049294e-05, + "loss": 0.309989333152771, + "step": 2342 + }, + { + "epoch": 0.6222281237551454, + "grad_norm": 1.119097166323709, + "learning_rate": 1.6503240423688768e-05, + "loss": 0.311350554227829, + "step": 2343 + }, + { + "epoch": 0.6224936927366883, + "grad_norm": 1.0659510240862446, + "learning_rate": 1.6499904321580187e-05, + "loss": 0.3313952386379242, + "step": 2344 + }, + { + "epoch": 0.6227592617182313, + "grad_norm": 1.0702797293760455, + "learning_rate": 1.649656696636671e-05, + "loss": 0.2984781265258789, + "step": 2345 + }, + { + "epoch": 0.6230248306997742, + "grad_norm": 1.0312282361562104, + "learning_rate": 1.6493228358691748e-05, + "loss": 0.3058238625526428, + "step": 2346 + }, + { + "epoch": 0.6232903996813173, + "grad_norm": 1.0462474005488736, + "learning_rate": 1.6489888499198935e-05, + "loss": 0.33439138531684875, + "step": 2347 + }, + { + "epoch": 0.6235559686628602, + "grad_norm": 1.0386002000588619, + "learning_rate": 1.6486547388532157e-05, + "loss": 0.2883133292198181, + "step": 2348 + }, + { + "epoch": 0.6238215376444032, + "grad_norm": 0.9997410916606129, + "learning_rate": 1.648320502733555e-05, + "loss": 0.30258435010910034, + "step": 2349 + }, + { + "epoch": 0.6240871066259461, + "grad_norm": 1.0226158069339855, + "learning_rate": 1.6479861416253476e-05, + "loss": 0.316353440284729, + "step": 2350 + }, + { + "epoch": 0.6243526756074891, + "grad_norm": 1.0638089423798769, + "learning_rate": 1.647651655593054e-05, + "loss": 0.3230556547641754, + "step": 2351 + }, + { + "epoch": 0.624618244589032, + "grad_norm": 1.2043111611037318, + "learning_rate": 1.6473170447011593e-05, + "loss": 0.3327128291130066, + "step": 2352 + }, + { + "epoch": 0.624883813570575, + "grad_norm": 1.081123131766037, + "learning_rate": 1.6469823090141733e-05, + "loss": 0.3152993619441986, + "step": 2353 + }, + { + "epoch": 0.6251493825521179, + "grad_norm": 1.0655193061859811, + "learning_rate": 1.6466474485966286e-05, + "loss": 0.26792511343955994, + "step": 2354 + }, + { + "epoch": 0.6254149515336609, + "grad_norm": 1.121022507517606, + "learning_rate": 1.6463124635130824e-05, + "loss": 0.31665652990341187, + "step": 2355 + }, + { + "epoch": 0.6256805205152038, + "grad_norm": 1.0108098757868682, + "learning_rate": 1.645977353828115e-05, + "loss": 0.29573655128479004, + "step": 2356 + }, + { + "epoch": 0.6259460894967468, + "grad_norm": 1.0973823257435635, + "learning_rate": 1.6456421196063334e-05, + "loss": 0.3210436999797821, + "step": 2357 + }, + { + "epoch": 0.6262116584782897, + "grad_norm": 1.2424369194288305, + "learning_rate": 1.6453067609123656e-05, + "loss": 0.2837316691875458, + "step": 2358 + }, + { + "epoch": 0.6264772274598327, + "grad_norm": 1.0217734190114693, + "learning_rate": 1.6449712778108645e-05, + "loss": 0.2885812520980835, + "step": 2359 + }, + { + "epoch": 0.6267427964413756, + "grad_norm": 1.1369177274860889, + "learning_rate": 1.6446356703665078e-05, + "loss": 0.34908249974250793, + "step": 2360 + }, + { + "epoch": 0.6270083654229186, + "grad_norm": 0.9942151080492051, + "learning_rate": 1.6442999386439967e-05, + "loss": 0.30398470163345337, + "step": 2361 + }, + { + "epoch": 0.6272739344044616, + "grad_norm": 0.9838105681310805, + "learning_rate": 1.6439640827080565e-05, + "loss": 0.2780487537384033, + "step": 2362 + }, + { + "epoch": 0.6275395033860045, + "grad_norm": 0.956534505955689, + "learning_rate": 1.6436281026234357e-05, + "loss": 0.2575770616531372, + "step": 2363 + }, + { + "epoch": 0.6278050723675475, + "grad_norm": 0.9675911826739493, + "learning_rate": 1.6432919984549077e-05, + "loss": 0.2888547480106354, + "step": 2364 + }, + { + "epoch": 0.6280706413490904, + "grad_norm": 1.2303845977564731, + "learning_rate": 1.6429557702672694e-05, + "loss": 0.3259009122848511, + "step": 2365 + }, + { + "epoch": 0.6283362103306334, + "grad_norm": 1.3923197622537806, + "learning_rate": 1.6426194181253415e-05, + "loss": 0.2899959683418274, + "step": 2366 + }, + { + "epoch": 0.6286017793121763, + "grad_norm": 1.058685915432802, + "learning_rate": 1.6422829420939688e-05, + "loss": 0.28471851348876953, + "step": 2367 + }, + { + "epoch": 0.6288673482937193, + "grad_norm": 1.0822140266216713, + "learning_rate": 1.64194634223802e-05, + "loss": 0.2958947420120239, + "step": 2368 + }, + { + "epoch": 0.6291329172752622, + "grad_norm": 1.1251439755337522, + "learning_rate": 1.6416096186223872e-05, + "loss": 0.3089750111103058, + "step": 2369 + }, + { + "epoch": 0.6293984862568052, + "grad_norm": 1.0517657351777636, + "learning_rate": 1.641272771311987e-05, + "loss": 0.31597089767456055, + "step": 2370 + }, + { + "epoch": 0.6296640552383481, + "grad_norm": 1.237586073778816, + "learning_rate": 1.6409358003717598e-05, + "loss": 0.2968488931655884, + "step": 2371 + }, + { + "epoch": 0.6299296242198911, + "grad_norm": 1.0062603647307793, + "learning_rate": 1.6405987058666694e-05, + "loss": 0.27532660961151123, + "step": 2372 + }, + { + "epoch": 0.630195193201434, + "grad_norm": 1.0061271713511417, + "learning_rate": 1.6402614878617037e-05, + "loss": 0.2800731956958771, + "step": 2373 + }, + { + "epoch": 0.630460762182977, + "grad_norm": 1.0867786948587836, + "learning_rate": 1.6399241464218744e-05, + "loss": 0.31728652119636536, + "step": 2374 + }, + { + "epoch": 0.63072633116452, + "grad_norm": 1.0634834793994077, + "learning_rate": 1.6395866816122167e-05, + "loss": 0.2776367664337158, + "step": 2375 + }, + { + "epoch": 0.630991900146063, + "grad_norm": 1.2696308030410766, + "learning_rate": 1.63924909349779e-05, + "loss": 0.3308418095111847, + "step": 2376 + }, + { + "epoch": 0.6312574691276059, + "grad_norm": 1.027144235831433, + "learning_rate": 1.6389113821436775e-05, + "loss": 0.31589487195014954, + "step": 2377 + }, + { + "epoch": 0.6315230381091489, + "grad_norm": 0.9983142729953255, + "learning_rate": 1.6385735476149855e-05, + "loss": 0.27181899547576904, + "step": 2378 + }, + { + "epoch": 0.6317886070906918, + "grad_norm": 1.0656862561919935, + "learning_rate": 1.638235589976845e-05, + "loss": 0.2603747546672821, + "step": 2379 + }, + { + "epoch": 0.6320541760722348, + "grad_norm": 1.0543823342651422, + "learning_rate": 1.63789750929441e-05, + "loss": 0.29050707817077637, + "step": 2380 + }, + { + "epoch": 0.6323197450537777, + "grad_norm": 1.0310549396867945, + "learning_rate": 1.6375593056328586e-05, + "loss": 0.2979413866996765, + "step": 2381 + }, + { + "epoch": 0.6325853140353207, + "grad_norm": 1.0460005843129836, + "learning_rate": 1.6372209790573926e-05, + "loss": 0.30875420570373535, + "step": 2382 + }, + { + "epoch": 0.6328508830168637, + "grad_norm": 0.9698416111844145, + "learning_rate": 1.6368825296332366e-05, + "loss": 0.2755935788154602, + "step": 2383 + }, + { + "epoch": 0.6331164519984066, + "grad_norm": 1.1336778567410772, + "learning_rate": 1.6365439574256406e-05, + "loss": 0.3459136486053467, + "step": 2384 + }, + { + "epoch": 0.6333820209799496, + "grad_norm": 1.116018329054477, + "learning_rate": 1.6362052624998767e-05, + "loss": 0.29043829441070557, + "step": 2385 + }, + { + "epoch": 0.6336475899614925, + "grad_norm": 1.123039696178655, + "learning_rate": 1.635866444921242e-05, + "loss": 0.321551114320755, + "step": 2386 + }, + { + "epoch": 0.6339131589430355, + "grad_norm": 1.0451682936950502, + "learning_rate": 1.6355275047550553e-05, + "loss": 0.28478139638900757, + "step": 2387 + }, + { + "epoch": 0.6341787279245784, + "grad_norm": 1.060617338056141, + "learning_rate": 1.6351884420666616e-05, + "loss": 0.30913087725639343, + "step": 2388 + }, + { + "epoch": 0.6344442969061214, + "grad_norm": 1.0996519301974148, + "learning_rate": 1.6348492569214275e-05, + "loss": 0.328342467546463, + "step": 2389 + }, + { + "epoch": 0.6347098658876643, + "grad_norm": 1.0657562962668374, + "learning_rate": 1.634509949384744e-05, + "loss": 0.3291119933128357, + "step": 2390 + }, + { + "epoch": 0.6349754348692073, + "grad_norm": 1.0805286951038287, + "learning_rate": 1.6341705195220257e-05, + "loss": 0.3542378544807434, + "step": 2391 + }, + { + "epoch": 0.6352410038507502, + "grad_norm": 1.1387422668526126, + "learning_rate": 1.63383096739871e-05, + "loss": 0.3167935609817505, + "step": 2392 + }, + { + "epoch": 0.6355065728322932, + "grad_norm": 0.9614211236141011, + "learning_rate": 1.63349129308026e-05, + "loss": 0.27623263001441956, + "step": 2393 + }, + { + "epoch": 0.6357721418138361, + "grad_norm": 1.1351525352268206, + "learning_rate": 1.6331514966321596e-05, + "loss": 0.3615761399269104, + "step": 2394 + }, + { + "epoch": 0.6360377107953791, + "grad_norm": 1.1430561223010627, + "learning_rate": 1.632811578119918e-05, + "loss": 0.3503292500972748, + "step": 2395 + }, + { + "epoch": 0.636303279776922, + "grad_norm": 1.0400637290516392, + "learning_rate": 1.6324715376090673e-05, + "loss": 0.2994767129421234, + "step": 2396 + }, + { + "epoch": 0.636568848758465, + "grad_norm": 1.2836743734514182, + "learning_rate": 1.6321313751651638e-05, + "loss": 0.29903143644332886, + "step": 2397 + }, + { + "epoch": 0.6368344177400079, + "grad_norm": 1.0273086079776361, + "learning_rate": 1.6317910908537865e-05, + "loss": 0.310536652803421, + "step": 2398 + }, + { + "epoch": 0.6370999867215509, + "grad_norm": 1.2820707601171073, + "learning_rate": 1.6314506847405382e-05, + "loss": 0.32584354281425476, + "step": 2399 + }, + { + "epoch": 0.6373655557030938, + "grad_norm": 1.186095937719991, + "learning_rate": 1.6311101568910448e-05, + "loss": 0.3536352217197418, + "step": 2400 + }, + { + "epoch": 0.6376311246846368, + "grad_norm": 1.0361661707144088, + "learning_rate": 1.6307695073709565e-05, + "loss": 0.3198434114456177, + "step": 2401 + }, + { + "epoch": 0.6378966936661797, + "grad_norm": 0.8809138916670839, + "learning_rate": 1.6304287362459462e-05, + "loss": 0.264182448387146, + "step": 2402 + }, + { + "epoch": 0.6381622626477228, + "grad_norm": 1.0526335869529386, + "learning_rate": 1.6300878435817115e-05, + "loss": 0.31182044744491577, + "step": 2403 + }, + { + "epoch": 0.6384278316292658, + "grad_norm": 1.0495886453587215, + "learning_rate": 1.6297468294439708e-05, + "loss": 0.28221404552459717, + "step": 2404 + }, + { + "epoch": 0.6386934006108087, + "grad_norm": 1.0211141314743026, + "learning_rate": 1.6294056938984693e-05, + "loss": 0.27788785099983215, + "step": 2405 + }, + { + "epoch": 0.6389589695923517, + "grad_norm": 1.068610455564362, + "learning_rate": 1.6290644370109728e-05, + "loss": 0.3300796151161194, + "step": 2406 + }, + { + "epoch": 0.6392245385738946, + "grad_norm": 1.0949996094795582, + "learning_rate": 1.628723058847272e-05, + "loss": 0.32170963287353516, + "step": 2407 + }, + { + "epoch": 0.6394901075554376, + "grad_norm": 1.1320309851276869, + "learning_rate": 1.628381559473181e-05, + "loss": 0.3243589997291565, + "step": 2408 + }, + { + "epoch": 0.6397556765369805, + "grad_norm": 1.4458945786524546, + "learning_rate": 1.6280399389545358e-05, + "loss": 0.311046838760376, + "step": 2409 + }, + { + "epoch": 0.6400212455185235, + "grad_norm": 1.0237689913585555, + "learning_rate": 1.6276981973571973e-05, + "loss": 0.2642543911933899, + "step": 2410 + }, + { + "epoch": 0.6402868145000664, + "grad_norm": 1.1424399755044237, + "learning_rate": 1.62735633474705e-05, + "loss": 0.3593730926513672, + "step": 2411 + }, + { + "epoch": 0.6405523834816094, + "grad_norm": 1.1145611429504636, + "learning_rate": 1.62701435119e-05, + "loss": 0.3147425353527069, + "step": 2412 + }, + { + "epoch": 0.6408179524631523, + "grad_norm": 1.1400749315540035, + "learning_rate": 1.6266722467519783e-05, + "loss": 0.32639142870903015, + "step": 2413 + }, + { + "epoch": 0.6410835214446953, + "grad_norm": 1.1011849489387644, + "learning_rate": 1.626330021498938e-05, + "loss": 0.32113659381866455, + "step": 2414 + }, + { + "epoch": 0.6413490904262382, + "grad_norm": 1.0371621680767618, + "learning_rate": 1.6259876754968568e-05, + "loss": 0.3188290297985077, + "step": 2415 + }, + { + "epoch": 0.6416146594077812, + "grad_norm": 1.076893351246201, + "learning_rate": 1.625645208811734e-05, + "loss": 0.3145543932914734, + "step": 2416 + }, + { + "epoch": 0.6418802283893241, + "grad_norm": 1.1368093372185335, + "learning_rate": 1.6253026215095943e-05, + "loss": 0.30433323979377747, + "step": 2417 + }, + { + "epoch": 0.6421457973708671, + "grad_norm": 1.1042321396184265, + "learning_rate": 1.6249599136564837e-05, + "loss": 0.30946728587150574, + "step": 2418 + }, + { + "epoch": 0.64241136635241, + "grad_norm": 0.991248414026241, + "learning_rate": 1.6246170853184726e-05, + "loss": 0.26245906949043274, + "step": 2419 + }, + { + "epoch": 0.642676935333953, + "grad_norm": 1.1213671588278835, + "learning_rate": 1.624274136561654e-05, + "loss": 0.31468862295150757, + "step": 2420 + }, + { + "epoch": 0.6429425043154959, + "grad_norm": 1.0200744973975597, + "learning_rate": 1.6239310674521443e-05, + "loss": 0.28946155309677124, + "step": 2421 + }, + { + "epoch": 0.6432080732970389, + "grad_norm": 1.1088143851501708, + "learning_rate": 1.6235878780560835e-05, + "loss": 0.26272106170654297, + "step": 2422 + }, + { + "epoch": 0.6434736422785818, + "grad_norm": 1.1185700160494145, + "learning_rate": 1.6232445684396347e-05, + "loss": 0.3094574213027954, + "step": 2423 + }, + { + "epoch": 0.6437392112601248, + "grad_norm": 0.9377280048944331, + "learning_rate": 1.6229011386689832e-05, + "loss": 0.2503833770751953, + "step": 2424 + }, + { + "epoch": 0.6440047802416677, + "grad_norm": 0.9657663244207705, + "learning_rate": 1.6225575888103387e-05, + "loss": 0.2655009627342224, + "step": 2425 + }, + { + "epoch": 0.6442703492232107, + "grad_norm": 1.123117061290067, + "learning_rate": 1.6222139189299336e-05, + "loss": 0.2819611728191376, + "step": 2426 + }, + { + "epoch": 0.6445359182047536, + "grad_norm": 1.0859641118248262, + "learning_rate": 1.6218701290940232e-05, + "loss": 0.2956068217754364, + "step": 2427 + }, + { + "epoch": 0.6448014871862966, + "grad_norm": 1.2445728810553593, + "learning_rate": 1.6215262193688862e-05, + "loss": 0.3330997824668884, + "step": 2428 + }, + { + "epoch": 0.6450670561678395, + "grad_norm": 1.0073602881165937, + "learning_rate": 1.6211821898208242e-05, + "loss": 0.25897055864334106, + "step": 2429 + }, + { + "epoch": 0.6453326251493825, + "grad_norm": 1.1228221759016932, + "learning_rate": 1.6208380405161623e-05, + "loss": 0.3119947016239166, + "step": 2430 + }, + { + "epoch": 0.6455981941309256, + "grad_norm": 1.143631742936843, + "learning_rate": 1.6204937715212482e-05, + "loss": 0.30833956599235535, + "step": 2431 + }, + { + "epoch": 0.6458637631124685, + "grad_norm": 1.1584271404994573, + "learning_rate": 1.620149382902453e-05, + "loss": 0.2935214638710022, + "step": 2432 + }, + { + "epoch": 0.6461293320940115, + "grad_norm": 1.6063755788258844, + "learning_rate": 1.619804874726171e-05, + "loss": 0.24297356605529785, + "step": 2433 + }, + { + "epoch": 0.6463949010755544, + "grad_norm": 1.14218339304969, + "learning_rate": 1.6194602470588186e-05, + "loss": 0.319774866104126, + "step": 2434 + }, + { + "epoch": 0.6466604700570974, + "grad_norm": 1.1751618225153557, + "learning_rate": 1.6191154999668368e-05, + "loss": 0.29197463393211365, + "step": 2435 + }, + { + "epoch": 0.6469260390386403, + "grad_norm": 1.1008916130088804, + "learning_rate": 1.6187706335166882e-05, + "loss": 0.2939727306365967, + "step": 2436 + }, + { + "epoch": 0.6471916080201833, + "grad_norm": 1.0935449463761302, + "learning_rate": 1.6184256477748595e-05, + "loss": 0.2941162586212158, + "step": 2437 + }, + { + "epoch": 0.6474571770017262, + "grad_norm": 1.1336931987797143, + "learning_rate": 1.6180805428078593e-05, + "loss": 0.2823144197463989, + "step": 2438 + }, + { + "epoch": 0.6477227459832692, + "grad_norm": 1.0912252779984561, + "learning_rate": 1.61773531868222e-05, + "loss": 0.30048274993896484, + "step": 2439 + }, + { + "epoch": 0.6479883149648121, + "grad_norm": 1.183044095349839, + "learning_rate": 1.617389975464497e-05, + "loss": 0.30927354097366333, + "step": 2440 + }, + { + "epoch": 0.6482538839463551, + "grad_norm": 1.166570736507726, + "learning_rate": 1.6170445132212678e-05, + "loss": 0.34835004806518555, + "step": 2441 + }, + { + "epoch": 0.648519452927898, + "grad_norm": 1.0325781129961564, + "learning_rate": 1.616698932019134e-05, + "loss": 0.2890225648880005, + "step": 2442 + }, + { + "epoch": 0.648785021909441, + "grad_norm": 1.1182329319338478, + "learning_rate": 1.6163532319247195e-05, + "loss": 0.31410521268844604, + "step": 2443 + }, + { + "epoch": 0.6490505908909839, + "grad_norm": 0.9213656240638256, + "learning_rate": 1.616007413004671e-05, + "loss": 0.267375111579895, + "step": 2444 + }, + { + "epoch": 0.6493161598725269, + "grad_norm": 1.1587177777274813, + "learning_rate": 1.6156614753256583e-05, + "loss": 0.3300023376941681, + "step": 2445 + }, + { + "epoch": 0.6495817288540698, + "grad_norm": 1.0295072511714587, + "learning_rate": 1.615315418954374e-05, + "loss": 0.2822847366333008, + "step": 2446 + }, + { + "epoch": 0.6498472978356128, + "grad_norm": 1.1626615137060834, + "learning_rate": 1.6149692439575348e-05, + "loss": 0.3093401789665222, + "step": 2447 + }, + { + "epoch": 0.6501128668171557, + "grad_norm": 1.0475923101386018, + "learning_rate": 1.6146229504018777e-05, + "loss": 0.2892506718635559, + "step": 2448 + }, + { + "epoch": 0.6503784357986987, + "grad_norm": 0.9972012319936079, + "learning_rate": 1.6142765383541643e-05, + "loss": 0.2805558741092682, + "step": 2449 + }, + { + "epoch": 0.6506440047802416, + "grad_norm": 1.0535842654025462, + "learning_rate": 1.6139300078811794e-05, + "loss": 0.29852935671806335, + "step": 2450 + }, + { + "epoch": 0.6509095737617846, + "grad_norm": 1.193949473615032, + "learning_rate": 1.6135833590497295e-05, + "loss": 0.3567991256713867, + "step": 2451 + }, + { + "epoch": 0.6511751427433276, + "grad_norm": 1.1265709697559396, + "learning_rate": 1.6132365919266442e-05, + "loss": 0.29564782977104187, + "step": 2452 + }, + { + "epoch": 0.6514407117248705, + "grad_norm": 1.011180050217134, + "learning_rate": 1.612889706578777e-05, + "loss": 0.30027297139167786, + "step": 2453 + }, + { + "epoch": 0.6517062807064135, + "grad_norm": 1.0908136110597069, + "learning_rate": 1.6125427030730027e-05, + "loss": 0.3318096697330475, + "step": 2454 + }, + { + "epoch": 0.6519718496879564, + "grad_norm": 1.0728958387824694, + "learning_rate": 1.612195581476219e-05, + "loss": 0.30962997674942017, + "step": 2455 + }, + { + "epoch": 0.6522374186694994, + "grad_norm": 1.2969539714019946, + "learning_rate": 1.6118483418553476e-05, + "loss": 0.3152836859226227, + "step": 2456 + }, + { + "epoch": 0.6525029876510423, + "grad_norm": 1.0160215490589632, + "learning_rate": 1.6115009842773322e-05, + "loss": 0.26117920875549316, + "step": 2457 + }, + { + "epoch": 0.6527685566325853, + "grad_norm": 0.9780826840488046, + "learning_rate": 1.6111535088091388e-05, + "loss": 0.2705717384815216, + "step": 2458 + }, + { + "epoch": 0.6530341256141283, + "grad_norm": 1.112935626593024, + "learning_rate": 1.6108059155177568e-05, + "loss": 0.3281205892562866, + "step": 2459 + }, + { + "epoch": 0.6532996945956713, + "grad_norm": 1.0805050021999307, + "learning_rate": 1.6104582044701983e-05, + "loss": 0.3300125002861023, + "step": 2460 + }, + { + "epoch": 0.6535652635772142, + "grad_norm": 1.0596352955938992, + "learning_rate": 1.6101103757334973e-05, + "loss": 0.29286977648735046, + "step": 2461 + }, + { + "epoch": 0.6538308325587572, + "grad_norm": 1.114611766363321, + "learning_rate": 1.6097624293747115e-05, + "loss": 0.2920498847961426, + "step": 2462 + }, + { + "epoch": 0.6540964015403001, + "grad_norm": 1.0455118881549736, + "learning_rate": 1.609414365460921e-05, + "loss": 0.31018689274787903, + "step": 2463 + }, + { + "epoch": 0.6543619705218431, + "grad_norm": 1.0028130278859915, + "learning_rate": 1.609066184059228e-05, + "loss": 0.26806512475013733, + "step": 2464 + }, + { + "epoch": 0.654627539503386, + "grad_norm": 1.0385768164913443, + "learning_rate": 1.608717885236758e-05, + "loss": 0.29770639538764954, + "step": 2465 + }, + { + "epoch": 0.654893108484929, + "grad_norm": 1.0811683391440958, + "learning_rate": 1.6083694690606592e-05, + "loss": 0.36161965131759644, + "step": 2466 + }, + { + "epoch": 0.6551586774664719, + "grad_norm": 1.1455214370068598, + "learning_rate": 1.6080209355981016e-05, + "loss": 0.36114081740379333, + "step": 2467 + }, + { + "epoch": 0.6554242464480149, + "grad_norm": 0.9911085328884063, + "learning_rate": 1.6076722849162786e-05, + "loss": 0.28924882411956787, + "step": 2468 + }, + { + "epoch": 0.6556898154295578, + "grad_norm": 1.1198872767040324, + "learning_rate": 1.6073235170824058e-05, + "loss": 0.3088049292564392, + "step": 2469 + }, + { + "epoch": 0.6559553844111008, + "grad_norm": 1.062389027957873, + "learning_rate": 1.6069746321637216e-05, + "loss": 0.2684907615184784, + "step": 2470 + }, + { + "epoch": 0.6562209533926437, + "grad_norm": 0.9850175058697045, + "learning_rate": 1.6066256302274873e-05, + "loss": 0.2674641013145447, + "step": 2471 + }, + { + "epoch": 0.6564865223741867, + "grad_norm": 1.0658104164235327, + "learning_rate": 1.6062765113409854e-05, + "loss": 0.2865106165409088, + "step": 2472 + }, + { + "epoch": 0.6567520913557297, + "grad_norm": 1.1117203943537428, + "learning_rate": 1.605927275571523e-05, + "loss": 0.33163607120513916, + "step": 2473 + }, + { + "epoch": 0.6570176603372726, + "grad_norm": 1.1177244627769223, + "learning_rate": 1.6055779229864276e-05, + "loss": 0.32725927233695984, + "step": 2474 + }, + { + "epoch": 0.6572832293188156, + "grad_norm": 1.171322314473831, + "learning_rate": 1.605228453653051e-05, + "loss": 0.31537747383117676, + "step": 2475 + }, + { + "epoch": 0.6575487983003585, + "grad_norm": 1.0855461390356589, + "learning_rate": 1.604878867638767e-05, + "loss": 0.29331761598587036, + "step": 2476 + }, + { + "epoch": 0.6578143672819015, + "grad_norm": 1.0342424424241736, + "learning_rate": 1.6045291650109706e-05, + "loss": 0.315193772315979, + "step": 2477 + }, + { + "epoch": 0.6580799362634444, + "grad_norm": 1.2286540067411784, + "learning_rate": 1.6041793458370812e-05, + "loss": 0.3595796227455139, + "step": 2478 + }, + { + "epoch": 0.6583455052449874, + "grad_norm": 1.0251892797499218, + "learning_rate": 1.6038294101845394e-05, + "loss": 0.3069949150085449, + "step": 2479 + }, + { + "epoch": 0.6586110742265303, + "grad_norm": 1.1576253586981062, + "learning_rate": 1.603479358120809e-05, + "loss": 0.3154812455177307, + "step": 2480 + }, + { + "epoch": 0.6588766432080733, + "grad_norm": 1.1008921076459075, + "learning_rate": 1.6031291897133756e-05, + "loss": 0.3005039691925049, + "step": 2481 + }, + { + "epoch": 0.6591422121896162, + "grad_norm": 1.1463594149599334, + "learning_rate": 1.6027789050297476e-05, + "loss": 0.2885095775127411, + "step": 2482 + }, + { + "epoch": 0.6594077811711592, + "grad_norm": 1.002066881102099, + "learning_rate": 1.602428504137456e-05, + "loss": 0.291950523853302, + "step": 2483 + }, + { + "epoch": 0.6596733501527021, + "grad_norm": 1.0919380790727968, + "learning_rate": 1.6020779871040538e-05, + "loss": 0.31630760431289673, + "step": 2484 + }, + { + "epoch": 0.6599389191342451, + "grad_norm": 1.0827567425634856, + "learning_rate": 1.6017273539971167e-05, + "loss": 0.29767507314682007, + "step": 2485 + }, + { + "epoch": 0.660204488115788, + "grad_norm": 1.036820980968177, + "learning_rate": 1.601376604884242e-05, + "loss": 0.2882775664329529, + "step": 2486 + }, + { + "epoch": 0.6604700570973311, + "grad_norm": 1.0885135950320362, + "learning_rate": 1.601025739833051e-05, + "loss": 0.325736403465271, + "step": 2487 + }, + { + "epoch": 0.660735626078874, + "grad_norm": 1.048580856774253, + "learning_rate": 1.6006747589111854e-05, + "loss": 0.3007255792617798, + "step": 2488 + }, + { + "epoch": 0.661001195060417, + "grad_norm": 1.146836506523448, + "learning_rate": 1.6003236621863107e-05, + "loss": 0.33199968934059143, + "step": 2489 + }, + { + "epoch": 0.6612667640419599, + "grad_norm": 1.1430196866694278, + "learning_rate": 1.5999724497261138e-05, + "loss": 0.3784569799900055, + "step": 2490 + }, + { + "epoch": 0.6615323330235029, + "grad_norm": 1.0506667031587968, + "learning_rate": 1.5996211215983052e-05, + "loss": 0.28146931529045105, + "step": 2491 + }, + { + "epoch": 0.6617979020050458, + "grad_norm": 1.0621415260673002, + "learning_rate": 1.599269677870616e-05, + "loss": 0.32187730073928833, + "step": 2492 + }, + { + "epoch": 0.6620634709865888, + "grad_norm": 1.0631524880676668, + "learning_rate": 1.5989181186108003e-05, + "loss": 0.3021823465824127, + "step": 2493 + }, + { + "epoch": 0.6623290399681317, + "grad_norm": 1.0248198480240434, + "learning_rate": 1.5985664438866354e-05, + "loss": 0.3309648334980011, + "step": 2494 + }, + { + "epoch": 0.6625946089496747, + "grad_norm": 1.0183038789118495, + "learning_rate": 1.598214653765919e-05, + "loss": 0.2939694821834564, + "step": 2495 + }, + { + "epoch": 0.6628601779312177, + "grad_norm": 1.0091208408649601, + "learning_rate": 1.597862748316473e-05, + "loss": 0.31219810247421265, + "step": 2496 + }, + { + "epoch": 0.6631257469127606, + "grad_norm": 1.3669850946739606, + "learning_rate": 1.5975107276061405e-05, + "loss": 0.29435622692108154, + "step": 2497 + }, + { + "epoch": 0.6633913158943036, + "grad_norm": 1.0359724885535866, + "learning_rate": 1.5971585917027864e-05, + "loss": 0.27167004346847534, + "step": 2498 + }, + { + "epoch": 0.6636568848758465, + "grad_norm": 1.121619558624798, + "learning_rate": 1.5968063406742988e-05, + "loss": 0.3360658884048462, + "step": 2499 + }, + { + "epoch": 0.6639224538573895, + "grad_norm": 1.0767207810238415, + "learning_rate": 1.596453974588587e-05, + "loss": 0.2994089424610138, + "step": 2500 + }, + { + "epoch": 0.6641880228389324, + "grad_norm": 1.0997593865705806, + "learning_rate": 1.596101493513584e-05, + "loss": 0.32302889227867126, + "step": 2501 + }, + { + "epoch": 0.6644535918204754, + "grad_norm": 1.1249891187970829, + "learning_rate": 1.595748897517243e-05, + "loss": 0.3122987747192383, + "step": 2502 + }, + { + "epoch": 0.6647191608020183, + "grad_norm": 1.014108779554691, + "learning_rate": 1.5953961866675408e-05, + "loss": 0.2746438980102539, + "step": 2503 + }, + { + "epoch": 0.6649847297835613, + "grad_norm": 1.0758059481680302, + "learning_rate": 1.5950433610324758e-05, + "loss": 0.3043097257614136, + "step": 2504 + }, + { + "epoch": 0.6652502987651042, + "grad_norm": 1.2204942135197403, + "learning_rate": 1.594690420680069e-05, + "loss": 0.3208698332309723, + "step": 2505 + }, + { + "epoch": 0.6655158677466472, + "grad_norm": 1.1502218188727449, + "learning_rate": 1.5943373656783628e-05, + "loss": 0.317341148853302, + "step": 2506 + }, + { + "epoch": 0.6657814367281901, + "grad_norm": 1.1223078751349502, + "learning_rate": 1.5939841960954218e-05, + "loss": 0.3250347673892975, + "step": 2507 + }, + { + "epoch": 0.6660470057097331, + "grad_norm": 1.066903715567463, + "learning_rate": 1.5936309119993333e-05, + "loss": 0.32255828380584717, + "step": 2508 + }, + { + "epoch": 0.666312574691276, + "grad_norm": 1.0591506680476068, + "learning_rate": 1.593277513458206e-05, + "loss": 0.3247614800930023, + "step": 2509 + }, + { + "epoch": 0.666578143672819, + "grad_norm": 1.087253896768941, + "learning_rate": 1.5929240005401715e-05, + "loss": 0.34171730279922485, + "step": 2510 + }, + { + "epoch": 0.6668437126543619, + "grad_norm": 1.092874100004657, + "learning_rate": 1.5925703733133823e-05, + "loss": 0.30671584606170654, + "step": 2511 + }, + { + "epoch": 0.6671092816359049, + "grad_norm": 1.1250075389065, + "learning_rate": 1.5922166318460138e-05, + "loss": 0.3387908339500427, + "step": 2512 + }, + { + "epoch": 0.6673748506174478, + "grad_norm": 1.0272141820522305, + "learning_rate": 1.5918627762062635e-05, + "loss": 0.2772873044013977, + "step": 2513 + }, + { + "epoch": 0.6676404195989908, + "grad_norm": 1.0802689739154336, + "learning_rate": 1.59150880646235e-05, + "loss": 0.31555238366127014, + "step": 2514 + }, + { + "epoch": 0.6679059885805337, + "grad_norm": 0.9930963010924009, + "learning_rate": 1.5911547226825154e-05, + "loss": 0.2821594476699829, + "step": 2515 + }, + { + "epoch": 0.6681715575620768, + "grad_norm": 1.098936156337469, + "learning_rate": 1.5908005249350217e-05, + "loss": 0.3176054358482361, + "step": 2516 + }, + { + "epoch": 0.6684371265436198, + "grad_norm": 1.083365844116071, + "learning_rate": 1.590446213288155e-05, + "loss": 0.28484907746315, + "step": 2517 + }, + { + "epoch": 0.6687026955251627, + "grad_norm": 1.0028500327966023, + "learning_rate": 1.590091787810222e-05, + "loss": 0.25227850675582886, + "step": 2518 + }, + { + "epoch": 0.6689682645067057, + "grad_norm": 0.993931866088294, + "learning_rate": 1.5897372485695514e-05, + "loss": 0.276819109916687, + "step": 2519 + }, + { + "epoch": 0.6692338334882486, + "grad_norm": 1.1883846939575156, + "learning_rate": 1.589382595634495e-05, + "loss": 0.27944183349609375, + "step": 2520 + }, + { + "epoch": 0.6694994024697916, + "grad_norm": 1.0217591474349375, + "learning_rate": 1.589027829073425e-05, + "loss": 0.295337975025177, + "step": 2521 + }, + { + "epoch": 0.6697649714513345, + "grad_norm": 1.0940479681497102, + "learning_rate": 1.5886729489547365e-05, + "loss": 0.31168580055236816, + "step": 2522 + }, + { + "epoch": 0.6700305404328775, + "grad_norm": 1.0847233646991081, + "learning_rate": 1.5883179553468465e-05, + "loss": 0.34520941972732544, + "step": 2523 + }, + { + "epoch": 0.6702961094144204, + "grad_norm": 1.0941539012056998, + "learning_rate": 1.587962848318193e-05, + "loss": 0.3121863901615143, + "step": 2524 + }, + { + "epoch": 0.6705616783959634, + "grad_norm": 1.2414605611463847, + "learning_rate": 1.587607627937237e-05, + "loss": 0.3450377583503723, + "step": 2525 + }, + { + "epoch": 0.6708272473775063, + "grad_norm": 1.0575484463097053, + "learning_rate": 1.58725229427246e-05, + "loss": 0.33431196212768555, + "step": 2526 + }, + { + "epoch": 0.6710928163590493, + "grad_norm": 2.8101197900274433, + "learning_rate": 1.5868968473923675e-05, + "loss": 0.2753226161003113, + "step": 2527 + }, + { + "epoch": 0.6713583853405922, + "grad_norm": 1.1171540013343635, + "learning_rate": 1.586541287365484e-05, + "loss": 0.31394219398498535, + "step": 2528 + }, + { + "epoch": 0.6716239543221352, + "grad_norm": 1.0940027543433968, + "learning_rate": 1.586185614260358e-05, + "loss": 0.352859765291214, + "step": 2529 + }, + { + "epoch": 0.6718895233036781, + "grad_norm": 1.158790754412002, + "learning_rate": 1.5858298281455592e-05, + "loss": 0.3182204067707062, + "step": 2530 + }, + { + "epoch": 0.6721550922852211, + "grad_norm": 1.0901686159979078, + "learning_rate": 1.5854739290896785e-05, + "loss": 0.3107008934020996, + "step": 2531 + }, + { + "epoch": 0.672420661266764, + "grad_norm": 1.0367853416177613, + "learning_rate": 1.5851179171613294e-05, + "loss": 0.2737328112125397, + "step": 2532 + }, + { + "epoch": 0.672686230248307, + "grad_norm": 1.070700914663809, + "learning_rate": 1.5847617924291466e-05, + "loss": 0.2744509279727936, + "step": 2533 + }, + { + "epoch": 0.6729517992298499, + "grad_norm": 1.0763385778363233, + "learning_rate": 1.584405554961787e-05, + "loss": 0.3149082660675049, + "step": 2534 + }, + { + "epoch": 0.6732173682113929, + "grad_norm": 1.1199335422347676, + "learning_rate": 1.584049204827929e-05, + "loss": 0.32643741369247437, + "step": 2535 + }, + { + "epoch": 0.6734829371929358, + "grad_norm": 1.1153920819002263, + "learning_rate": 1.583692742096272e-05, + "loss": 0.31901559233665466, + "step": 2536 + }, + { + "epoch": 0.6737485061744788, + "grad_norm": 1.037012713250851, + "learning_rate": 1.583336166835539e-05, + "loss": 0.3020802140235901, + "step": 2537 + }, + { + "epoch": 0.6740140751560217, + "grad_norm": 0.9884255382698084, + "learning_rate": 1.5829794791144723e-05, + "loss": 0.29683804512023926, + "step": 2538 + }, + { + "epoch": 0.6742796441375647, + "grad_norm": 1.0549080502640127, + "learning_rate": 1.582622679001838e-05, + "loss": 0.2898966073989868, + "step": 2539 + }, + { + "epoch": 0.6745452131191076, + "grad_norm": 1.0628349250468347, + "learning_rate": 1.582265766566422e-05, + "loss": 0.2665000855922699, + "step": 2540 + }, + { + "epoch": 0.6748107821006506, + "grad_norm": 1.1059852721256176, + "learning_rate": 1.581908741877034e-05, + "loss": 0.2987207770347595, + "step": 2541 + }, + { + "epoch": 0.6750763510821935, + "grad_norm": 1.1051901132495052, + "learning_rate": 1.5815516050025032e-05, + "loss": 0.32591086626052856, + "step": 2542 + }, + { + "epoch": 0.6753419200637365, + "grad_norm": 0.9752097662975195, + "learning_rate": 1.581194356011682e-05, + "loss": 0.28181299567222595, + "step": 2543 + }, + { + "epoch": 0.6756074890452796, + "grad_norm": 1.0983389872703522, + "learning_rate": 1.5808369949734433e-05, + "loss": 0.3256041407585144, + "step": 2544 + }, + { + "epoch": 0.6758730580268225, + "grad_norm": 1.1228012917357884, + "learning_rate": 1.5804795219566825e-05, + "loss": 0.3079703152179718, + "step": 2545 + }, + { + "epoch": 0.6761386270083655, + "grad_norm": 1.1504916593616519, + "learning_rate": 1.580121937030316e-05, + "loss": 0.3364162743091583, + "step": 2546 + }, + { + "epoch": 0.6764041959899084, + "grad_norm": 1.046870504650359, + "learning_rate": 1.5797642402632816e-05, + "loss": 0.2774898111820221, + "step": 2547 + }, + { + "epoch": 0.6766697649714514, + "grad_norm": 1.1108782100380157, + "learning_rate": 1.5794064317245396e-05, + "loss": 0.33260244131088257, + "step": 2548 + }, + { + "epoch": 0.6769353339529943, + "grad_norm": 1.16229568793775, + "learning_rate": 1.5790485114830708e-05, + "loss": 0.3327571153640747, + "step": 2549 + }, + { + "epoch": 0.6772009029345373, + "grad_norm": 1.1256526679188055, + "learning_rate": 1.5786904796078783e-05, + "loss": 0.28527912497520447, + "step": 2550 + }, + { + "epoch": 0.6774664719160802, + "grad_norm": 1.1757868172389025, + "learning_rate": 1.5783323361679865e-05, + "loss": 0.3100908100605011, + "step": 2551 + }, + { + "epoch": 0.6777320408976232, + "grad_norm": 1.1187226402475792, + "learning_rate": 1.577974081232441e-05, + "loss": 0.3434574007987976, + "step": 2552 + }, + { + "epoch": 0.6779976098791661, + "grad_norm": 1.0691671390255433, + "learning_rate": 1.5776157148703094e-05, + "loss": 0.3151341676712036, + "step": 2553 + }, + { + "epoch": 0.6782631788607091, + "grad_norm": 1.1432839314923735, + "learning_rate": 1.5772572371506803e-05, + "loss": 0.33334124088287354, + "step": 2554 + }, + { + "epoch": 0.678528747842252, + "grad_norm": 0.9718187941404679, + "learning_rate": 1.576898648142664e-05, + "loss": 0.26933547854423523, + "step": 2555 + }, + { + "epoch": 0.678794316823795, + "grad_norm": 1.0146251280063243, + "learning_rate": 1.576539947915392e-05, + "loss": 0.3087029755115509, + "step": 2556 + }, + { + "epoch": 0.6790598858053379, + "grad_norm": 2.0746649121309244, + "learning_rate": 1.576181136538018e-05, + "loss": 0.32620540261268616, + "step": 2557 + }, + { + "epoch": 0.6793254547868809, + "grad_norm": 1.0462752825892652, + "learning_rate": 1.575822214079716e-05, + "loss": 0.29112139344215393, + "step": 2558 + }, + { + "epoch": 0.6795910237684238, + "grad_norm": 1.108770761520566, + "learning_rate": 1.5754631806096822e-05, + "loss": 0.3394843339920044, + "step": 2559 + }, + { + "epoch": 0.6798565927499668, + "grad_norm": 1.0789431162979184, + "learning_rate": 1.5751040361971342e-05, + "loss": 0.32754629850387573, + "step": 2560 + }, + { + "epoch": 0.6801221617315097, + "grad_norm": 1.055729440740922, + "learning_rate": 1.574744780911311e-05, + "loss": 0.2829592823982239, + "step": 2561 + }, + { + "epoch": 0.6803877307130527, + "grad_norm": 3.1916720491195423, + "learning_rate": 1.5743854148214724e-05, + "loss": 0.2718046307563782, + "step": 2562 + }, + { + "epoch": 0.6806532996945956, + "grad_norm": 1.0355755791413483, + "learning_rate": 1.5740259379969002e-05, + "loss": 0.29244256019592285, + "step": 2563 + }, + { + "epoch": 0.6809188686761386, + "grad_norm": 1.0678189150114252, + "learning_rate": 1.5736663505068972e-05, + "loss": 0.2925388514995575, + "step": 2564 + }, + { + "epoch": 0.6811844376576816, + "grad_norm": 1.109826571766002, + "learning_rate": 1.5733066524207875e-05, + "loss": 0.26742440462112427, + "step": 2565 + }, + { + "epoch": 0.6814500066392245, + "grad_norm": 1.0365586719986022, + "learning_rate": 1.5729468438079167e-05, + "loss": 0.33688807487487793, + "step": 2566 + }, + { + "epoch": 0.6817155756207675, + "grad_norm": 1.0939355325909954, + "learning_rate": 1.5725869247376514e-05, + "loss": 0.2953096330165863, + "step": 2567 + }, + { + "epoch": 0.6819811446023104, + "grad_norm": 1.081510188555139, + "learning_rate": 1.5722268952793806e-05, + "loss": 0.321500301361084, + "step": 2568 + }, + { + "epoch": 0.6822467135838534, + "grad_norm": 1.1427798210793014, + "learning_rate": 1.5718667555025127e-05, + "loss": 0.29148590564727783, + "step": 2569 + }, + { + "epoch": 0.6825122825653963, + "grad_norm": 1.0849106130015975, + "learning_rate": 1.5715065054764792e-05, + "loss": 0.26887139678001404, + "step": 2570 + }, + { + "epoch": 0.6827778515469393, + "grad_norm": 0.9118900514894542, + "learning_rate": 1.5711461452707316e-05, + "loss": 0.2698139250278473, + "step": 2571 + }, + { + "epoch": 0.6830434205284823, + "grad_norm": 0.9420578172190551, + "learning_rate": 1.5707856749547433e-05, + "loss": 0.264956533908844, + "step": 2572 + }, + { + "epoch": 0.6833089895100253, + "grad_norm": 1.0786584040903482, + "learning_rate": 1.5704250945980085e-05, + "loss": 0.32535314559936523, + "step": 2573 + }, + { + "epoch": 0.6835745584915682, + "grad_norm": 1.1132312438200667, + "learning_rate": 1.5700644042700432e-05, + "loss": 0.30529654026031494, + "step": 2574 + }, + { + "epoch": 0.6838401274731112, + "grad_norm": 0.9518994724553314, + "learning_rate": 1.569703604040384e-05, + "loss": 0.27253150939941406, + "step": 2575 + }, + { + "epoch": 0.6841056964546541, + "grad_norm": 1.0559070796873817, + "learning_rate": 1.5693426939785886e-05, + "loss": 0.27451053261756897, + "step": 2576 + }, + { + "epoch": 0.6843712654361971, + "grad_norm": 1.1393124405849042, + "learning_rate": 1.5689816741542374e-05, + "loss": 0.33280283212661743, + "step": 2577 + }, + { + "epoch": 0.68463683441774, + "grad_norm": 1.1306113061745138, + "learning_rate": 1.5686205446369293e-05, + "loss": 0.2911887764930725, + "step": 2578 + }, + { + "epoch": 0.684902403399283, + "grad_norm": 1.0940465986734231, + "learning_rate": 1.5682593054962866e-05, + "loss": 0.2950279116630554, + "step": 2579 + }, + { + "epoch": 0.6851679723808259, + "grad_norm": 1.0911163136563768, + "learning_rate": 1.5678979568019518e-05, + "loss": 0.3267458975315094, + "step": 2580 + }, + { + "epoch": 0.6854335413623689, + "grad_norm": 1.2739312763430675, + "learning_rate": 1.5675364986235887e-05, + "loss": 0.3209132254123688, + "step": 2581 + }, + { + "epoch": 0.6856991103439118, + "grad_norm": 1.1101887519376679, + "learning_rate": 1.5671749310308818e-05, + "loss": 0.3186662197113037, + "step": 2582 + }, + { + "epoch": 0.6859646793254548, + "grad_norm": 0.9652854961372175, + "learning_rate": 1.566813254093538e-05, + "loss": 0.24875827133655548, + "step": 2583 + }, + { + "epoch": 0.6862302483069977, + "grad_norm": 1.0684425959326884, + "learning_rate": 1.5664514678812835e-05, + "loss": 0.26657983660697937, + "step": 2584 + }, + { + "epoch": 0.6864958172885407, + "grad_norm": 1.0670123202559558, + "learning_rate": 1.5660895724638666e-05, + "loss": 0.2889682650566101, + "step": 2585 + }, + { + "epoch": 0.6867613862700837, + "grad_norm": 1.2310590689373582, + "learning_rate": 1.5657275679110564e-05, + "loss": 0.32035061717033386, + "step": 2586 + }, + { + "epoch": 0.6870269552516266, + "grad_norm": 0.9946580402808185, + "learning_rate": 1.5653654542926435e-05, + "loss": 0.2844264507293701, + "step": 2587 + }, + { + "epoch": 0.6872925242331696, + "grad_norm": 1.0738818938413612, + "learning_rate": 1.5650032316784388e-05, + "loss": 0.27645713090896606, + "step": 2588 + }, + { + "epoch": 0.6875580932147125, + "grad_norm": 1.0078062598096618, + "learning_rate": 1.5646409001382745e-05, + "loss": 0.29902809858322144, + "step": 2589 + }, + { + "epoch": 0.6878236621962555, + "grad_norm": 1.0662439819494403, + "learning_rate": 1.564278459742004e-05, + "loss": 0.28179824352264404, + "step": 2590 + }, + { + "epoch": 0.6880892311777984, + "grad_norm": 0.9959782320912598, + "learning_rate": 1.563915910559502e-05, + "loss": 0.30527305603027344, + "step": 2591 + }, + { + "epoch": 0.6883548001593414, + "grad_norm": 0.9640464455731136, + "learning_rate": 1.5635532526606625e-05, + "loss": 0.29411792755126953, + "step": 2592 + }, + { + "epoch": 0.6886203691408843, + "grad_norm": 1.0659796212639145, + "learning_rate": 1.563190486115403e-05, + "loss": 0.32294154167175293, + "step": 2593 + }, + { + "epoch": 0.6888859381224273, + "grad_norm": 1.0983041505312465, + "learning_rate": 1.5628276109936594e-05, + "loss": 0.31873172521591187, + "step": 2594 + }, + { + "epoch": 0.6891515071039702, + "grad_norm": 1.2163401358885952, + "learning_rate": 1.5624646273653908e-05, + "loss": 0.37790048122406006, + "step": 2595 + }, + { + "epoch": 0.6894170760855132, + "grad_norm": 1.0271206309222516, + "learning_rate": 1.5621015353005754e-05, + "loss": 0.27596205472946167, + "step": 2596 + }, + { + "epoch": 0.6896826450670561, + "grad_norm": 1.2915034278595348, + "learning_rate": 1.5617383348692135e-05, + "loss": 0.30952686071395874, + "step": 2597 + }, + { + "epoch": 0.6899482140485991, + "grad_norm": 1.089414433310086, + "learning_rate": 1.5613750261413256e-05, + "loss": 0.2933235764503479, + "step": 2598 + }, + { + "epoch": 0.690213783030142, + "grad_norm": 1.1151043496896997, + "learning_rate": 1.5610116091869538e-05, + "loss": 0.2961776554584503, + "step": 2599 + }, + { + "epoch": 0.6904793520116851, + "grad_norm": 1.0596230408388436, + "learning_rate": 1.56064808407616e-05, + "loss": 0.2843313217163086, + "step": 2600 + }, + { + "epoch": 0.690744920993228, + "grad_norm": 1.0545406618996236, + "learning_rate": 1.560284450879028e-05, + "loss": 0.29366564750671387, + "step": 2601 + }, + { + "epoch": 0.691010489974771, + "grad_norm": 1.028254286030692, + "learning_rate": 1.5599207096656614e-05, + "loss": 0.32668614387512207, + "step": 2602 + }, + { + "epoch": 0.6912760589563139, + "grad_norm": 1.1962201821774399, + "learning_rate": 1.5595568605061858e-05, + "loss": 0.344367653131485, + "step": 2603 + }, + { + "epoch": 0.6915416279378569, + "grad_norm": 1.2250839657368426, + "learning_rate": 1.5591929034707468e-05, + "loss": 0.2875809371471405, + "step": 2604 + }, + { + "epoch": 0.6918071969193998, + "grad_norm": 0.9717157700868733, + "learning_rate": 1.5588288386295113e-05, + "loss": 0.2688799202442169, + "step": 2605 + }, + { + "epoch": 0.6920727659009428, + "grad_norm": 1.2520016236289049, + "learning_rate": 1.558464666052667e-05, + "loss": 0.28575828671455383, + "step": 2606 + }, + { + "epoch": 0.6923383348824858, + "grad_norm": 1.0741907315089707, + "learning_rate": 1.5581003858104203e-05, + "loss": 0.2800632119178772, + "step": 2607 + }, + { + "epoch": 0.6926039038640287, + "grad_norm": 1.096176752690496, + "learning_rate": 1.5577359979730022e-05, + "loss": 0.3066416382789612, + "step": 2608 + }, + { + "epoch": 0.6928694728455717, + "grad_norm": 1.0146792499875503, + "learning_rate": 1.5573715026106617e-05, + "loss": 0.3164110779762268, + "step": 2609 + }, + { + "epoch": 0.6931350418271146, + "grad_norm": 1.0292100354922897, + "learning_rate": 1.5570068997936686e-05, + "loss": 0.2908422350883484, + "step": 2610 + }, + { + "epoch": 0.6934006108086576, + "grad_norm": 0.9996966110923509, + "learning_rate": 1.5566421895923148e-05, + "loss": 0.29055240750312805, + "step": 2611 + }, + { + "epoch": 0.6936661797902005, + "grad_norm": 1.1296077877181152, + "learning_rate": 1.556277372076912e-05, + "loss": 0.3247227370738983, + "step": 2612 + }, + { + "epoch": 0.6939317487717435, + "grad_norm": 1.0869397458201258, + "learning_rate": 1.555912447317792e-05, + "loss": 0.29944315552711487, + "step": 2613 + }, + { + "epoch": 0.6941973177532864, + "grad_norm": 1.140637727836958, + "learning_rate": 1.5555474153853092e-05, + "loss": 0.2984931170940399, + "step": 2614 + }, + { + "epoch": 0.6944628867348294, + "grad_norm": 1.0644561032518303, + "learning_rate": 1.5551822763498364e-05, + "loss": 0.301285982131958, + "step": 2615 + }, + { + "epoch": 0.6947284557163723, + "grad_norm": 1.0271314049069311, + "learning_rate": 1.5548170302817683e-05, + "loss": 0.2862967252731323, + "step": 2616 + }, + { + "epoch": 0.6949940246979153, + "grad_norm": 1.0216494335731472, + "learning_rate": 1.5544516772515207e-05, + "loss": 0.3071482181549072, + "step": 2617 + }, + { + "epoch": 0.6952595936794582, + "grad_norm": 1.153798162838472, + "learning_rate": 1.5540862173295285e-05, + "loss": 0.33668914437294006, + "step": 2618 + }, + { + "epoch": 0.6955251626610012, + "grad_norm": 1.0451730984690786, + "learning_rate": 1.5537206505862486e-05, + "loss": 0.32204627990722656, + "step": 2619 + }, + { + "epoch": 0.6957907316425441, + "grad_norm": 1.083101648134336, + "learning_rate": 1.5533549770921576e-05, + "loss": 0.30210041999816895, + "step": 2620 + }, + { + "epoch": 0.6960563006240871, + "grad_norm": 1.1518417167078652, + "learning_rate": 1.5529891969177535e-05, + "loss": 0.3116886019706726, + "step": 2621 + }, + { + "epoch": 0.69632186960563, + "grad_norm": 1.1473344970327815, + "learning_rate": 1.5526233101335543e-05, + "loss": 0.3460058867931366, + "step": 2622 + }, + { + "epoch": 0.696587438587173, + "grad_norm": 1.0477810576486106, + "learning_rate": 1.552257316810098e-05, + "loss": 0.30080512166023254, + "step": 2623 + }, + { + "epoch": 0.6968530075687159, + "grad_norm": 1.1107090823955428, + "learning_rate": 1.5518912170179447e-05, + "loss": 0.3381347954273224, + "step": 2624 + }, + { + "epoch": 0.6971185765502589, + "grad_norm": 1.0737064011248665, + "learning_rate": 1.5515250108276733e-05, + "loss": 0.30345672369003296, + "step": 2625 + }, + { + "epoch": 0.6973841455318018, + "grad_norm": 1.1809134250993814, + "learning_rate": 1.5511586983098847e-05, + "loss": 0.3002641797065735, + "step": 2626 + }, + { + "epoch": 0.6976497145133448, + "grad_norm": 0.9975793486319376, + "learning_rate": 1.5507922795351992e-05, + "loss": 0.2848126292228699, + "step": 2627 + }, + { + "epoch": 0.6979152834948879, + "grad_norm": 1.1203755244922207, + "learning_rate": 1.5504257545742585e-05, + "loss": 0.32360371947288513, + "step": 2628 + }, + { + "epoch": 0.6981808524764308, + "grad_norm": 1.0674295201271842, + "learning_rate": 1.5500591234977237e-05, + "loss": 0.2970595955848694, + "step": 2629 + }, + { + "epoch": 0.6984464214579738, + "grad_norm": 1.1343972682519483, + "learning_rate": 1.5496923863762773e-05, + "loss": 0.35431474447250366, + "step": 2630 + }, + { + "epoch": 0.6987119904395167, + "grad_norm": 1.027377246814574, + "learning_rate": 1.549325543280622e-05, + "loss": 0.30133551359176636, + "step": 2631 + }, + { + "epoch": 0.6989775594210597, + "grad_norm": 1.066148832325447, + "learning_rate": 1.5489585942814807e-05, + "loss": 0.3013160824775696, + "step": 2632 + }, + { + "epoch": 0.6992431284026026, + "grad_norm": 1.1981871164483473, + "learning_rate": 1.5485915394495967e-05, + "loss": 0.3291313052177429, + "step": 2633 + }, + { + "epoch": 0.6995086973841456, + "grad_norm": 1.3083774012082008, + "learning_rate": 1.5482243788557336e-05, + "loss": 0.32308053970336914, + "step": 2634 + }, + { + "epoch": 0.6997742663656885, + "grad_norm": 1.0802428984314951, + "learning_rate": 1.5478571125706762e-05, + "loss": 0.321450412273407, + "step": 2635 + }, + { + "epoch": 0.7000398353472315, + "grad_norm": 1.1144035500723286, + "learning_rate": 1.547489740665229e-05, + "loss": 0.30871254205703735, + "step": 2636 + }, + { + "epoch": 0.7003054043287744, + "grad_norm": 1.1599776854022048, + "learning_rate": 1.5471222632102168e-05, + "loss": 0.29414835572242737, + "step": 2637 + }, + { + "epoch": 0.7005709733103174, + "grad_norm": 1.019484878273918, + "learning_rate": 1.546754680276485e-05, + "loss": 0.2841604948043823, + "step": 2638 + }, + { + "epoch": 0.7008365422918603, + "grad_norm": 1.039625714192533, + "learning_rate": 1.546386991934899e-05, + "loss": 0.2895316183567047, + "step": 2639 + }, + { + "epoch": 0.7011021112734033, + "grad_norm": 1.0418724746200432, + "learning_rate": 1.546019198256345e-05, + "loss": 0.310278058052063, + "step": 2640 + }, + { + "epoch": 0.7013676802549462, + "grad_norm": 1.1737622034955963, + "learning_rate": 1.5456512993117297e-05, + "loss": 0.3000732660293579, + "step": 2641 + }, + { + "epoch": 0.7016332492364892, + "grad_norm": 1.034060473081883, + "learning_rate": 1.545283295171979e-05, + "loss": 0.2650133967399597, + "step": 2642 + }, + { + "epoch": 0.7018988182180321, + "grad_norm": 1.1833814596994714, + "learning_rate": 1.5449151859080395e-05, + "loss": 0.3414345681667328, + "step": 2643 + }, + { + "epoch": 0.7021643871995751, + "grad_norm": 0.9407765615747015, + "learning_rate": 1.5445469715908793e-05, + "loss": 0.26955321431159973, + "step": 2644 + }, + { + "epoch": 0.702429956181118, + "grad_norm": 1.0775826100815478, + "learning_rate": 1.5441786522914855e-05, + "loss": 0.3028743863105774, + "step": 2645 + }, + { + "epoch": 0.702695525162661, + "grad_norm": 1.1630883359211883, + "learning_rate": 1.5438102280808653e-05, + "loss": 0.28710106015205383, + "step": 2646 + }, + { + "epoch": 0.7029610941442039, + "grad_norm": 1.0828201415955274, + "learning_rate": 1.543441699030047e-05, + "loss": 0.33343076705932617, + "step": 2647 + }, + { + "epoch": 0.7032266631257469, + "grad_norm": 2.8774903725783445, + "learning_rate": 1.543073065210078e-05, + "loss": 0.27760642766952515, + "step": 2648 + }, + { + "epoch": 0.7034922321072898, + "grad_norm": 1.0939125975780095, + "learning_rate": 1.5427043266920276e-05, + "loss": 0.2844334840774536, + "step": 2649 + }, + { + "epoch": 0.7037578010888328, + "grad_norm": 1.0671776711844796, + "learning_rate": 1.542335483546983e-05, + "loss": 0.28979432582855225, + "step": 2650 + }, + { + "epoch": 0.7040233700703757, + "grad_norm": 1.1018820862649594, + "learning_rate": 1.5419665358460537e-05, + "loss": 0.313267320394516, + "step": 2651 + }, + { + "epoch": 0.7042889390519187, + "grad_norm": 1.122792570050495, + "learning_rate": 1.5415974836603676e-05, + "loss": 0.26702141761779785, + "step": 2652 + }, + { + "epoch": 0.7045545080334616, + "grad_norm": 1.084104909381419, + "learning_rate": 1.5412283270610752e-05, + "loss": 0.3256012499332428, + "step": 2653 + }, + { + "epoch": 0.7048200770150046, + "grad_norm": 1.1096374178765924, + "learning_rate": 1.540859066119344e-05, + "loss": 0.3035642206668854, + "step": 2654 + }, + { + "epoch": 0.7050856459965475, + "grad_norm": 1.1410920430169775, + "learning_rate": 1.5404897009063636e-05, + "loss": 0.32206645607948303, + "step": 2655 + }, + { + "epoch": 0.7053512149780906, + "grad_norm": 0.9596610334229038, + "learning_rate": 1.5401202314933436e-05, + "loss": 0.3023940920829773, + "step": 2656 + }, + { + "epoch": 0.7056167839596336, + "grad_norm": 0.9678878502259071, + "learning_rate": 1.539750657951513e-05, + "loss": 0.2839987277984619, + "step": 2657 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 0.9744312269236198, + "learning_rate": 1.5393809803521213e-05, + "loss": 0.2488149106502533, + "step": 2658 + }, + { + "epoch": 0.7061479219227195, + "grad_norm": 1.0311988168007409, + "learning_rate": 1.539011198766438e-05, + "loss": 0.27156201004981995, + "step": 2659 + }, + { + "epoch": 0.7064134909042624, + "grad_norm": 1.0925039664890526, + "learning_rate": 1.5386413132657528e-05, + "loss": 0.3038437068462372, + "step": 2660 + }, + { + "epoch": 0.7066790598858054, + "grad_norm": 0.9713190505037098, + "learning_rate": 1.5382713239213746e-05, + "loss": 0.27626922726631165, + "step": 2661 + }, + { + "epoch": 0.7069446288673483, + "grad_norm": 1.9675808121081846, + "learning_rate": 1.537901230804634e-05, + "loss": 0.27338162064552307, + "step": 2662 + }, + { + "epoch": 0.7072101978488913, + "grad_norm": 0.9540020890839573, + "learning_rate": 1.5375310339868798e-05, + "loss": 0.2635098099708557, + "step": 2663 + }, + { + "epoch": 0.7074757668304342, + "grad_norm": 1.1274430903932144, + "learning_rate": 1.537160733539482e-05, + "loss": 0.3245551288127899, + "step": 2664 + }, + { + "epoch": 0.7077413358119772, + "grad_norm": 1.1100804783644485, + "learning_rate": 1.53679032953383e-05, + "loss": 0.3226238787174225, + "step": 2665 + }, + { + "epoch": 0.7080069047935201, + "grad_norm": 1.0972084780717322, + "learning_rate": 1.536419822041333e-05, + "loss": 0.31588318943977356, + "step": 2666 + }, + { + "epoch": 0.7082724737750631, + "grad_norm": 1.031778059845932, + "learning_rate": 1.536049211133421e-05, + "loss": 0.2494429647922516, + "step": 2667 + }, + { + "epoch": 0.708538042756606, + "grad_norm": 1.1110915785079796, + "learning_rate": 1.5356784968815436e-05, + "loss": 0.30966901779174805, + "step": 2668 + }, + { + "epoch": 0.708803611738149, + "grad_norm": 1.1803956993815392, + "learning_rate": 1.5353076793571692e-05, + "loss": 0.29383328557014465, + "step": 2669 + }, + { + "epoch": 0.7090691807196919, + "grad_norm": 1.086625008831518, + "learning_rate": 1.5349367586317875e-05, + "loss": 0.30337825417518616, + "step": 2670 + }, + { + "epoch": 0.7093347497012349, + "grad_norm": 1.0049086741144315, + "learning_rate": 1.5345657347769082e-05, + "loss": 0.28128665685653687, + "step": 2671 + }, + { + "epoch": 0.7096003186827778, + "grad_norm": 1.1819105498956106, + "learning_rate": 1.5341946078640594e-05, + "loss": 0.35167062282562256, + "step": 2672 + }, + { + "epoch": 0.7098658876643208, + "grad_norm": 1.0441531577784944, + "learning_rate": 1.533823377964791e-05, + "loss": 0.30409517884254456, + "step": 2673 + }, + { + "epoch": 0.7101314566458637, + "grad_norm": 1.013441954819978, + "learning_rate": 1.5334520451506706e-05, + "loss": 0.2667735815048218, + "step": 2674 + }, + { + "epoch": 0.7103970256274067, + "grad_norm": 1.130854753100919, + "learning_rate": 1.5330806094932876e-05, + "loss": 0.290219247341156, + "step": 2675 + }, + { + "epoch": 0.7106625946089496, + "grad_norm": 1.120803532670259, + "learning_rate": 1.5327090710642503e-05, + "loss": 0.33118927478790283, + "step": 2676 + }, + { + "epoch": 0.7109281635904926, + "grad_norm": 1.2896959817209073, + "learning_rate": 1.5323374299351867e-05, + "loss": 0.34287041425704956, + "step": 2677 + }, + { + "epoch": 0.7111937325720356, + "grad_norm": 1.0183367847991263, + "learning_rate": 1.531965686177745e-05, + "loss": 0.27093711495399475, + "step": 2678 + }, + { + "epoch": 0.7114593015535785, + "grad_norm": 1.0913550671130643, + "learning_rate": 1.531593839863593e-05, + "loss": 0.2987911105155945, + "step": 2679 + }, + { + "epoch": 0.7117248705351215, + "grad_norm": 1.0145664449432468, + "learning_rate": 1.5312218910644185e-05, + "loss": 0.2914583086967468, + "step": 2680 + }, + { + "epoch": 0.7119904395166644, + "grad_norm": 1.0712171950199525, + "learning_rate": 1.530849839851928e-05, + "loss": 0.34159964323043823, + "step": 2681 + }, + { + "epoch": 0.7122560084982074, + "grad_norm": 1.0132523095253043, + "learning_rate": 1.5304776862978496e-05, + "loss": 0.28327372670173645, + "step": 2682 + }, + { + "epoch": 0.7125215774797503, + "grad_norm": 1.0473430655235008, + "learning_rate": 1.5301054304739292e-05, + "loss": 0.2902851104736328, + "step": 2683 + }, + { + "epoch": 0.7127871464612934, + "grad_norm": 1.106440530120003, + "learning_rate": 1.5297330724519344e-05, + "loss": 0.3192726969718933, + "step": 2684 + }, + { + "epoch": 0.7130527154428363, + "grad_norm": 1.0682705697817987, + "learning_rate": 1.5293606123036508e-05, + "loss": 0.30242764949798584, + "step": 2685 + }, + { + "epoch": 0.7133182844243793, + "grad_norm": 1.0059439200202651, + "learning_rate": 1.528988050100884e-05, + "loss": 0.2718653082847595, + "step": 2686 + }, + { + "epoch": 0.7135838534059222, + "grad_norm": 1.019566462631627, + "learning_rate": 1.52861538591546e-05, + "loss": 0.3014821708202362, + "step": 2687 + }, + { + "epoch": 0.7138494223874652, + "grad_norm": 1.1473508187880241, + "learning_rate": 1.528242619819224e-05, + "loss": 0.3378177881240845, + "step": 2688 + }, + { + "epoch": 0.7141149913690081, + "grad_norm": 1.0632179838195628, + "learning_rate": 1.5278697518840415e-05, + "loss": 0.29286471009254456, + "step": 2689 + }, + { + "epoch": 0.7143805603505511, + "grad_norm": 1.1140242619678895, + "learning_rate": 1.527496782181796e-05, + "loss": 0.3371768593788147, + "step": 2690 + }, + { + "epoch": 0.714646129332094, + "grad_norm": 1.0421377750374783, + "learning_rate": 1.5271237107843925e-05, + "loss": 0.30571556091308594, + "step": 2691 + }, + { + "epoch": 0.714911698313637, + "grad_norm": 1.0650624138184501, + "learning_rate": 1.526750537763754e-05, + "loss": 0.33064618706703186, + "step": 2692 + }, + { + "epoch": 0.7151772672951799, + "grad_norm": 1.0787164498543842, + "learning_rate": 1.5263772631918242e-05, + "loss": 0.3369274139404297, + "step": 2693 + }, + { + "epoch": 0.7154428362767229, + "grad_norm": 1.079249778019668, + "learning_rate": 1.5260038871405663e-05, + "loss": 0.2422705739736557, + "step": 2694 + }, + { + "epoch": 0.7157084052582658, + "grad_norm": 1.3990281605221084, + "learning_rate": 1.5256304096819628e-05, + "loss": 0.35786008834838867, + "step": 2695 + }, + { + "epoch": 0.7159739742398088, + "grad_norm": 1.0368618301698236, + "learning_rate": 1.5252568308880155e-05, + "loss": 0.2853243052959442, + "step": 2696 + }, + { + "epoch": 0.7162395432213517, + "grad_norm": 1.1300838792843926, + "learning_rate": 1.5248831508307459e-05, + "loss": 0.2903040051460266, + "step": 2697 + }, + { + "epoch": 0.7165051122028947, + "grad_norm": 1.0779989148221412, + "learning_rate": 1.5245093695821954e-05, + "loss": 0.3375359773635864, + "step": 2698 + }, + { + "epoch": 0.7167706811844377, + "grad_norm": 0.9828776196369989, + "learning_rate": 1.5241354872144242e-05, + "loss": 0.27855974435806274, + "step": 2699 + }, + { + "epoch": 0.7170362501659806, + "grad_norm": 1.0672391327565405, + "learning_rate": 1.5237615037995129e-05, + "loss": 0.32226768136024475, + "step": 2700 + }, + { + "epoch": 0.7173018191475236, + "grad_norm": 1.1089458515112456, + "learning_rate": 1.5233874194095606e-05, + "loss": 0.32856303453445435, + "step": 2701 + }, + { + "epoch": 0.7175673881290665, + "grad_norm": 1.15556869357308, + "learning_rate": 1.5230132341166868e-05, + "loss": 0.31619006395339966, + "step": 2702 + }, + { + "epoch": 0.7178329571106095, + "grad_norm": 1.09474796019269, + "learning_rate": 1.5226389479930296e-05, + "loss": 0.29736411571502686, + "step": 2703 + }, + { + "epoch": 0.7180985260921524, + "grad_norm": 1.0969127487202406, + "learning_rate": 1.5222645611107477e-05, + "loss": 0.2767728865146637, + "step": 2704 + }, + { + "epoch": 0.7183640950736954, + "grad_norm": 1.054074095850648, + "learning_rate": 1.5218900735420174e-05, + "loss": 0.30994221568107605, + "step": 2705 + }, + { + "epoch": 0.7186296640552383, + "grad_norm": 1.0931807335310835, + "learning_rate": 1.5215154853590362e-05, + "loss": 0.3419484496116638, + "step": 2706 + }, + { + "epoch": 0.7188952330367813, + "grad_norm": 1.0503021732812985, + "learning_rate": 1.5211407966340203e-05, + "loss": 0.3063664436340332, + "step": 2707 + }, + { + "epoch": 0.7191608020183242, + "grad_norm": 1.0345938706194526, + "learning_rate": 1.520766007439205e-05, + "loss": 0.2856604754924774, + "step": 2708 + }, + { + "epoch": 0.7194263709998672, + "grad_norm": 0.9757823992785323, + "learning_rate": 1.5203911178468453e-05, + "loss": 0.23257851600646973, + "step": 2709 + }, + { + "epoch": 0.7196919399814101, + "grad_norm": 1.0292145399058534, + "learning_rate": 1.5200161279292154e-05, + "loss": 0.31451839208602905, + "step": 2710 + }, + { + "epoch": 0.7199575089629531, + "grad_norm": 1.1017577588578753, + "learning_rate": 1.5196410377586095e-05, + "loss": 0.30298277735710144, + "step": 2711 + }, + { + "epoch": 0.7202230779444961, + "grad_norm": 1.0759590578514124, + "learning_rate": 1.5192658474073398e-05, + "loss": 0.28654640913009644, + "step": 2712 + }, + { + "epoch": 0.7204886469260391, + "grad_norm": 1.1189221983197806, + "learning_rate": 1.5188905569477391e-05, + "loss": 0.3148455023765564, + "step": 2713 + }, + { + "epoch": 0.720754215907582, + "grad_norm": 1.079970608729249, + "learning_rate": 1.5185151664521585e-05, + "loss": 0.3004840612411499, + "step": 2714 + }, + { + "epoch": 0.721019784889125, + "grad_norm": 1.206470642332625, + "learning_rate": 1.518139675992969e-05, + "loss": 0.3378010392189026, + "step": 2715 + }, + { + "epoch": 0.721285353870668, + "grad_norm": 1.0802971688897103, + "learning_rate": 1.517764085642561e-05, + "loss": 0.3084215223789215, + "step": 2716 + }, + { + "epoch": 0.7215509228522109, + "grad_norm": 1.1196175790564493, + "learning_rate": 1.517388395473344e-05, + "loss": 0.3434324264526367, + "step": 2717 + }, + { + "epoch": 0.7218164918337538, + "grad_norm": 1.2084125695848371, + "learning_rate": 1.517012605557746e-05, + "loss": 0.2862265706062317, + "step": 2718 + }, + { + "epoch": 0.7220820608152968, + "grad_norm": 0.9574562560549519, + "learning_rate": 1.5166367159682156e-05, + "loss": 0.2760370671749115, + "step": 2719 + }, + { + "epoch": 0.7223476297968398, + "grad_norm": 1.0623260792686084, + "learning_rate": 1.5162607267772194e-05, + "loss": 0.26659202575683594, + "step": 2720 + }, + { + "epoch": 0.7226131987783827, + "grad_norm": 1.069380288412464, + "learning_rate": 1.5158846380572439e-05, + "loss": 0.31900978088378906, + "step": 2721 + }, + { + "epoch": 0.7228787677599257, + "grad_norm": 0.9775730121294547, + "learning_rate": 1.5155084498807941e-05, + "loss": 0.2983658015727997, + "step": 2722 + }, + { + "epoch": 0.7231443367414686, + "grad_norm": 1.0202126383266699, + "learning_rate": 1.5151321623203953e-05, + "loss": 0.3086162805557251, + "step": 2723 + }, + { + "epoch": 0.7234099057230116, + "grad_norm": 1.2685875339489936, + "learning_rate": 1.5147557754485908e-05, + "loss": 0.3233461380004883, + "step": 2724 + }, + { + "epoch": 0.7236754747045545, + "grad_norm": 1.1386667332230644, + "learning_rate": 1.5143792893379441e-05, + "loss": 0.2979195713996887, + "step": 2725 + }, + { + "epoch": 0.7239410436860975, + "grad_norm": 0.9598628443474388, + "learning_rate": 1.5140027040610367e-05, + "loss": 0.27854713797569275, + "step": 2726 + }, + { + "epoch": 0.7242066126676404, + "grad_norm": 1.0735596908703036, + "learning_rate": 1.5136260196904704e-05, + "loss": 0.293560266494751, + "step": 2727 + }, + { + "epoch": 0.7244721816491834, + "grad_norm": 1.1273149809893865, + "learning_rate": 1.513249236298865e-05, + "loss": 0.3033742308616638, + "step": 2728 + }, + { + "epoch": 0.7247377506307263, + "grad_norm": 1.1425183002588892, + "learning_rate": 1.51287235395886e-05, + "loss": 0.27958324551582336, + "step": 2729 + }, + { + "epoch": 0.7250033196122693, + "grad_norm": 1.022839475112705, + "learning_rate": 1.512495372743114e-05, + "loss": 0.3063122034072876, + "step": 2730 + }, + { + "epoch": 0.7252688885938122, + "grad_norm": 1.0524007495354166, + "learning_rate": 1.5121182927243043e-05, + "loss": 0.29126864671707153, + "step": 2731 + }, + { + "epoch": 0.7255344575753552, + "grad_norm": 1.0517432179455284, + "learning_rate": 1.5117411139751279e-05, + "loss": 0.27507084608078003, + "step": 2732 + }, + { + "epoch": 0.7258000265568981, + "grad_norm": 1.1167955582078537, + "learning_rate": 1.5113638365682996e-05, + "loss": 0.3432404398918152, + "step": 2733 + }, + { + "epoch": 0.7260655955384411, + "grad_norm": 1.0687371329401973, + "learning_rate": 1.5109864605765552e-05, + "loss": 0.27633196115493774, + "step": 2734 + }, + { + "epoch": 0.726331164519984, + "grad_norm": 1.0811244514830984, + "learning_rate": 1.5106089860726474e-05, + "loss": 0.274509072303772, + "step": 2735 + }, + { + "epoch": 0.726596733501527, + "grad_norm": 0.97012581020674, + "learning_rate": 1.5102314131293494e-05, + "loss": 0.26650723814964294, + "step": 2736 + }, + { + "epoch": 0.7268623024830699, + "grad_norm": 0.9681782432226156, + "learning_rate": 1.5098537418194524e-05, + "loss": 0.24476298689842224, + "step": 2737 + }, + { + "epoch": 0.7271278714646129, + "grad_norm": 1.1154772400244737, + "learning_rate": 1.5094759722157671e-05, + "loss": 0.3337150812149048, + "step": 2738 + }, + { + "epoch": 0.7273934404461558, + "grad_norm": 1.0187825093211873, + "learning_rate": 1.509098104391123e-05, + "loss": 0.3147660195827484, + "step": 2739 + }, + { + "epoch": 0.7276590094276989, + "grad_norm": 0.969229068573487, + "learning_rate": 1.5087201384183687e-05, + "loss": 0.2613281309604645, + "step": 2740 + }, + { + "epoch": 0.7279245784092419, + "grad_norm": 1.0641712204852296, + "learning_rate": 1.5083420743703717e-05, + "loss": 0.2773926854133606, + "step": 2741 + }, + { + "epoch": 0.7281901473907848, + "grad_norm": 1.0826759541494775, + "learning_rate": 1.5079639123200179e-05, + "loss": 0.30515575408935547, + "step": 2742 + }, + { + "epoch": 0.7284557163723278, + "grad_norm": 1.0619554532285063, + "learning_rate": 1.5075856523402128e-05, + "loss": 0.3174355626106262, + "step": 2743 + }, + { + "epoch": 0.7287212853538707, + "grad_norm": 0.9676487172589012, + "learning_rate": 1.5072072945038802e-05, + "loss": 0.25163760781288147, + "step": 2744 + }, + { + "epoch": 0.7289868543354137, + "grad_norm": 1.009992458232401, + "learning_rate": 1.5068288388839634e-05, + "loss": 0.28822118043899536, + "step": 2745 + }, + { + "epoch": 0.7292524233169566, + "grad_norm": 1.1623698216562623, + "learning_rate": 1.5064502855534237e-05, + "loss": 0.3129134476184845, + "step": 2746 + }, + { + "epoch": 0.7295179922984996, + "grad_norm": 1.0993962878508883, + "learning_rate": 1.5060716345852423e-05, + "loss": 0.332313597202301, + "step": 2747 + }, + { + "epoch": 0.7297835612800425, + "grad_norm": 1.1989932540466257, + "learning_rate": 1.5056928860524181e-05, + "loss": 0.3425176739692688, + "step": 2748 + }, + { + "epoch": 0.7300491302615855, + "grad_norm": 1.006044605592889, + "learning_rate": 1.5053140400279693e-05, + "loss": 0.2737991511821747, + "step": 2749 + }, + { + "epoch": 0.7303146992431284, + "grad_norm": 0.963162900300573, + "learning_rate": 1.5049350965849337e-05, + "loss": 0.27506589889526367, + "step": 2750 + }, + { + "epoch": 0.7305802682246714, + "grad_norm": 0.9901021314780329, + "learning_rate": 1.5045560557963663e-05, + "loss": 0.25581830739974976, + "step": 2751 + }, + { + "epoch": 0.7308458372062143, + "grad_norm": 1.0977147554610498, + "learning_rate": 1.5041769177353423e-05, + "loss": 0.31746333837509155, + "step": 2752 + }, + { + "epoch": 0.7311114061877573, + "grad_norm": 1.142455577048558, + "learning_rate": 1.5037976824749545e-05, + "loss": 0.3119337260723114, + "step": 2753 + }, + { + "epoch": 0.7313769751693002, + "grad_norm": 1.0824713857839723, + "learning_rate": 1.5034183500883153e-05, + "loss": 0.3330266773700714, + "step": 2754 + }, + { + "epoch": 0.7316425441508432, + "grad_norm": 1.1870819737785345, + "learning_rate": 1.5030389206485554e-05, + "loss": 0.2794867753982544, + "step": 2755 + }, + { + "epoch": 0.7319081131323861, + "grad_norm": 1.0826714009199063, + "learning_rate": 1.5026593942288248e-05, + "loss": 0.33273079991340637, + "step": 2756 + }, + { + "epoch": 0.7321736821139291, + "grad_norm": 1.1000195904608074, + "learning_rate": 1.502279770902291e-05, + "loss": 0.30673256516456604, + "step": 2757 + }, + { + "epoch": 0.732439251095472, + "grad_norm": 1.1311236734843304, + "learning_rate": 1.5019000507421412e-05, + "loss": 0.3126910924911499, + "step": 2758 + }, + { + "epoch": 0.732704820077015, + "grad_norm": 1.1665747930638253, + "learning_rate": 1.5015202338215811e-05, + "loss": 0.35423290729522705, + "step": 2759 + }, + { + "epoch": 0.7329703890585579, + "grad_norm": 1.0691634248957984, + "learning_rate": 1.5011403202138346e-05, + "loss": 0.31541377305984497, + "step": 2760 + }, + { + "epoch": 0.7332359580401009, + "grad_norm": 3.4446251175420257, + "learning_rate": 1.5007603099921451e-05, + "loss": 0.31460440158843994, + "step": 2761 + }, + { + "epoch": 0.7335015270216438, + "grad_norm": 1.0828016056563536, + "learning_rate": 1.5003802032297735e-05, + "loss": 0.2786293923854828, + "step": 2762 + }, + { + "epoch": 0.7337670960031868, + "grad_norm": 1.1025311021139896, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.27977997064590454, + "step": 2763 + }, + { + "epoch": 0.7340326649847297, + "grad_norm": 1.1136339551828278, + "learning_rate": 1.4996197003761237e-05, + "loss": 0.2933383584022522, + "step": 2764 + }, + { + "epoch": 0.7342982339662727, + "grad_norm": 1.0743056930311463, + "learning_rate": 1.4992393044314617e-05, + "loss": 0.30623573064804077, + "step": 2765 + }, + { + "epoch": 0.7345638029478156, + "grad_norm": 1.112681662128017, + "learning_rate": 1.4988588122393497e-05, + "loss": 0.28665077686309814, + "step": 2766 + }, + { + "epoch": 0.7348293719293586, + "grad_norm": 1.0268941907147413, + "learning_rate": 1.4984782238731422e-05, + "loss": 0.3245697021484375, + "step": 2767 + }, + { + "epoch": 0.7350949409109017, + "grad_norm": 1.118864717612721, + "learning_rate": 1.4980975394062122e-05, + "loss": 0.29477447271347046, + "step": 2768 + }, + { + "epoch": 0.7353605098924446, + "grad_norm": 1.009879072463833, + "learning_rate": 1.4977167589119508e-05, + "loss": 0.29174134135246277, + "step": 2769 + }, + { + "epoch": 0.7356260788739876, + "grad_norm": 1.010733766191454, + "learning_rate": 1.4973358824637687e-05, + "loss": 0.29473474621772766, + "step": 2770 + }, + { + "epoch": 0.7358916478555305, + "grad_norm": 1.3454647120520804, + "learning_rate": 1.4969549101350938e-05, + "loss": 0.3095156252384186, + "step": 2771 + }, + { + "epoch": 0.7361572168370735, + "grad_norm": 1.0578448721867733, + "learning_rate": 1.4965738419993733e-05, + "loss": 0.26295265555381775, + "step": 2772 + }, + { + "epoch": 0.7364227858186164, + "grad_norm": 1.0590497560307077, + "learning_rate": 1.4961926781300723e-05, + "loss": 0.2989509701728821, + "step": 2773 + }, + { + "epoch": 0.7366883548001594, + "grad_norm": 1.0783454816561941, + "learning_rate": 1.4958114186006756e-05, + "loss": 0.31087079644203186, + "step": 2774 + }, + { + "epoch": 0.7369539237817023, + "grad_norm": 1.0953647378016445, + "learning_rate": 1.4954300634846845e-05, + "loss": 0.3063197433948517, + "step": 2775 + }, + { + "epoch": 0.7372194927632453, + "grad_norm": 1.0858506486148067, + "learning_rate": 1.4950486128556208e-05, + "loss": 0.3149424195289612, + "step": 2776 + }, + { + "epoch": 0.7374850617447882, + "grad_norm": 1.0199984929310564, + "learning_rate": 1.4946670667870224e-05, + "loss": 0.2724878191947937, + "step": 2777 + }, + { + "epoch": 0.7377506307263312, + "grad_norm": 1.0033150283887489, + "learning_rate": 1.4942854253524479e-05, + "loss": 0.2556690275669098, + "step": 2778 + }, + { + "epoch": 0.7380161997078741, + "grad_norm": 1.0594159401263619, + "learning_rate": 1.4939036886254727e-05, + "loss": 0.2704542875289917, + "step": 2779 + }, + { + "epoch": 0.7382817686894171, + "grad_norm": 1.052456117640013, + "learning_rate": 1.4935218566796918e-05, + "loss": 0.26762163639068604, + "step": 2780 + }, + { + "epoch": 0.73854733767096, + "grad_norm": 1.1328164222449624, + "learning_rate": 1.4931399295887172e-05, + "loss": 0.3376831114292145, + "step": 2781 + }, + { + "epoch": 0.738812906652503, + "grad_norm": 1.0695003562166123, + "learning_rate": 1.4927579074261803e-05, + "loss": 0.2980082631111145, + "step": 2782 + }, + { + "epoch": 0.7390784756340459, + "grad_norm": 1.0340858480290613, + "learning_rate": 1.4923757902657306e-05, + "loss": 0.27693796157836914, + "step": 2783 + }, + { + "epoch": 0.7393440446155889, + "grad_norm": 1.0204290883803, + "learning_rate": 1.4919935781810353e-05, + "loss": 0.3109282851219177, + "step": 2784 + }, + { + "epoch": 0.7396096135971318, + "grad_norm": 1.12631585013599, + "learning_rate": 1.4916112712457807e-05, + "loss": 0.3123949468135834, + "step": 2785 + }, + { + "epoch": 0.7398751825786748, + "grad_norm": 1.143039341014623, + "learning_rate": 1.4912288695336709e-05, + "loss": 0.3232062757015228, + "step": 2786 + }, + { + "epoch": 0.7401407515602177, + "grad_norm": 1.0315778016896975, + "learning_rate": 1.4908463731184287e-05, + "loss": 0.2685563862323761, + "step": 2787 + }, + { + "epoch": 0.7404063205417607, + "grad_norm": 1.076569860938466, + "learning_rate": 1.4904637820737945e-05, + "loss": 0.25752881169319153, + "step": 2788 + }, + { + "epoch": 0.7406718895233037, + "grad_norm": 1.2236263687690485, + "learning_rate": 1.4900810964735279e-05, + "loss": 0.2887497544288635, + "step": 2789 + }, + { + "epoch": 0.7409374585048466, + "grad_norm": 1.126755867019387, + "learning_rate": 1.489698316391406e-05, + "loss": 0.28804779052734375, + "step": 2790 + }, + { + "epoch": 0.7412030274863896, + "grad_norm": 1.0931262335064922, + "learning_rate": 1.489315441901224e-05, + "loss": 0.2684408724308014, + "step": 2791 + }, + { + "epoch": 0.7414685964679325, + "grad_norm": 1.0509233991385625, + "learning_rate": 1.4889324730767959e-05, + "loss": 0.31945526599884033, + "step": 2792 + }, + { + "epoch": 0.7417341654494755, + "grad_norm": 1.3391113530092205, + "learning_rate": 1.488549409991953e-05, + "loss": 0.34446024894714355, + "step": 2793 + }, + { + "epoch": 0.7419997344310184, + "grad_norm": 1.094751814978447, + "learning_rate": 1.488166252720546e-05, + "loss": 0.28849151730537415, + "step": 2794 + }, + { + "epoch": 0.7422653034125614, + "grad_norm": 1.0431424597135226, + "learning_rate": 1.4877830013364429e-05, + "loss": 0.2793633043766022, + "step": 2795 + }, + { + "epoch": 0.7425308723941043, + "grad_norm": 1.1811188011136542, + "learning_rate": 1.4873996559135298e-05, + "loss": 0.3211687505245209, + "step": 2796 + }, + { + "epoch": 0.7427964413756474, + "grad_norm": 1.004634818722801, + "learning_rate": 1.4870162165257114e-05, + "loss": 0.26225876808166504, + "step": 2797 + }, + { + "epoch": 0.7430620103571903, + "grad_norm": 1.7885293848946355, + "learning_rate": 1.4866326832469105e-05, + "loss": 0.3100029528141022, + "step": 2798 + }, + { + "epoch": 0.7433275793387333, + "grad_norm": 1.0428487423040855, + "learning_rate": 1.4862490561510675e-05, + "loss": 0.29399827122688293, + "step": 2799 + }, + { + "epoch": 0.7435931483202762, + "grad_norm": 0.9886298200418341, + "learning_rate": 1.4858653353121412e-05, + "loss": 0.27357399463653564, + "step": 2800 + }, + { + "epoch": 0.7438587173018192, + "grad_norm": 1.1101962385134683, + "learning_rate": 1.4854815208041087e-05, + "loss": 0.34575730562210083, + "step": 2801 + }, + { + "epoch": 0.7441242862833621, + "grad_norm": 1.0351474931606812, + "learning_rate": 1.4850976127009644e-05, + "loss": 0.28487247228622437, + "step": 2802 + }, + { + "epoch": 0.7443898552649051, + "grad_norm": 1.0283492066128257, + "learning_rate": 1.484713611076722e-05, + "loss": 0.264443576335907, + "step": 2803 + }, + { + "epoch": 0.744655424246448, + "grad_norm": 1.085429543255666, + "learning_rate": 1.4843295160054116e-05, + "loss": 0.32750973105430603, + "step": 2804 + }, + { + "epoch": 0.744920993227991, + "grad_norm": 1.0136013055294886, + "learning_rate": 1.4839453275610827e-05, + "loss": 0.24080191552639008, + "step": 2805 + }, + { + "epoch": 0.7451865622095339, + "grad_norm": 1.1486643921382949, + "learning_rate": 1.4835610458178025e-05, + "loss": 0.31667011976242065, + "step": 2806 + }, + { + "epoch": 0.7454521311910769, + "grad_norm": 1.0103490185384167, + "learning_rate": 1.4831766708496553e-05, + "loss": 0.2754175066947937, + "step": 2807 + }, + { + "epoch": 0.7457177001726198, + "grad_norm": 1.0607394107689443, + "learning_rate": 1.482792202730745e-05, + "loss": 0.2890132963657379, + "step": 2808 + }, + { + "epoch": 0.7459832691541628, + "grad_norm": 1.049970305589495, + "learning_rate": 1.4824076415351918e-05, + "loss": 0.3402877748012543, + "step": 2809 + }, + { + "epoch": 0.7462488381357057, + "grad_norm": 1.0879104018503691, + "learning_rate": 1.4820229873371347e-05, + "loss": 0.3167210519313812, + "step": 2810 + }, + { + "epoch": 0.7465144071172487, + "grad_norm": 0.9983910427341833, + "learning_rate": 1.4816382402107308e-05, + "loss": 0.2653643786907196, + "step": 2811 + }, + { + "epoch": 0.7467799760987917, + "grad_norm": 1.2191167585139304, + "learning_rate": 1.4812534002301547e-05, + "loss": 0.3202674984931946, + "step": 2812 + }, + { + "epoch": 0.7470455450803346, + "grad_norm": 1.0461975743299208, + "learning_rate": 1.4808684674695985e-05, + "loss": 0.2942724823951721, + "step": 2813 + }, + { + "epoch": 0.7473111140618776, + "grad_norm": 1.0581736193326858, + "learning_rate": 1.480483442003273e-05, + "loss": 0.28640663623809814, + "step": 2814 + }, + { + "epoch": 0.7475766830434205, + "grad_norm": 0.9932743335315769, + "learning_rate": 1.4800983239054071e-05, + "loss": 0.26214420795440674, + "step": 2815 + }, + { + "epoch": 0.7478422520249635, + "grad_norm": 1.0324489729554576, + "learning_rate": 1.4797131132502464e-05, + "loss": 0.3288992643356323, + "step": 2816 + }, + { + "epoch": 0.7481078210065064, + "grad_norm": 0.9775792939666473, + "learning_rate": 1.4793278101120551e-05, + "loss": 0.2622208297252655, + "step": 2817 + }, + { + "epoch": 0.7483733899880494, + "grad_norm": 1.0856486279870832, + "learning_rate": 1.4789424145651152e-05, + "loss": 0.3223533034324646, + "step": 2818 + }, + { + "epoch": 0.7486389589695923, + "grad_norm": 0.9640735701611682, + "learning_rate": 1.4785569266837264e-05, + "loss": 0.25849875807762146, + "step": 2819 + }, + { + "epoch": 0.7489045279511353, + "grad_norm": 1.20204465384733, + "learning_rate": 1.478171346542206e-05, + "loss": 0.3477833569049835, + "step": 2820 + }, + { + "epoch": 0.7491700969326782, + "grad_norm": 1.0577809669167442, + "learning_rate": 1.4777856742148897e-05, + "loss": 0.2799205780029297, + "step": 2821 + }, + { + "epoch": 0.7494356659142212, + "grad_norm": 1.624939710599736, + "learning_rate": 1.4773999097761304e-05, + "loss": 0.2591988444328308, + "step": 2822 + }, + { + "epoch": 0.7497012348957641, + "grad_norm": 1.2869478314125868, + "learning_rate": 1.477014053300299e-05, + "loss": 0.30161747336387634, + "step": 2823 + }, + { + "epoch": 0.7499668038773071, + "grad_norm": 1.0738509532979332, + "learning_rate": 1.4766281048617837e-05, + "loss": 0.28202176094055176, + "step": 2824 + }, + { + "epoch": 0.7502323728588501, + "grad_norm": 1.0042946509670743, + "learning_rate": 1.4762420645349912e-05, + "loss": 0.26074907183647156, + "step": 2825 + }, + { + "epoch": 0.7504979418403931, + "grad_norm": 1.1385436298617553, + "learning_rate": 1.4758559323943455e-05, + "loss": 0.2822819948196411, + "step": 2826 + }, + { + "epoch": 0.750763510821936, + "grad_norm": 1.1069166183989807, + "learning_rate": 1.4754697085142879e-05, + "loss": 0.2704991102218628, + "step": 2827 + }, + { + "epoch": 0.751029079803479, + "grad_norm": 1.1005590878466516, + "learning_rate": 1.4750833929692785e-05, + "loss": 0.2627401053905487, + "step": 2828 + }, + { + "epoch": 0.751294648785022, + "grad_norm": 1.0886740028659867, + "learning_rate": 1.474696985833794e-05, + "loss": 0.2898240089416504, + "step": 2829 + }, + { + "epoch": 0.7515602177665649, + "grad_norm": 1.0291450176805186, + "learning_rate": 1.4743104871823291e-05, + "loss": 0.30080029368400574, + "step": 2830 + }, + { + "epoch": 0.7518257867481078, + "grad_norm": 1.0953597523125502, + "learning_rate": 1.473923897089396e-05, + "loss": 0.2950359284877777, + "step": 2831 + }, + { + "epoch": 0.7520913557296508, + "grad_norm": 1.1129882579718784, + "learning_rate": 1.4735372156295253e-05, + "loss": 0.31936827301979065, + "step": 2832 + }, + { + "epoch": 0.7523569247111938, + "grad_norm": 1.1117484749822675, + "learning_rate": 1.4731504428772642e-05, + "loss": 0.2771468460559845, + "step": 2833 + }, + { + "epoch": 0.7526224936927367, + "grad_norm": 1.1332551367729735, + "learning_rate": 1.4727635789071779e-05, + "loss": 0.3135997951030731, + "step": 2834 + }, + { + "epoch": 0.7528880626742797, + "grad_norm": 1.1215560189558773, + "learning_rate": 1.4723766237938495e-05, + "loss": 0.29874372482299805, + "step": 2835 + }, + { + "epoch": 0.7531536316558226, + "grad_norm": 1.0292177835845961, + "learning_rate": 1.4719895776118789e-05, + "loss": 0.249681293964386, + "step": 2836 + }, + { + "epoch": 0.7534192006373656, + "grad_norm": 1.0567186687732057, + "learning_rate": 1.4716024404358847e-05, + "loss": 0.28544771671295166, + "step": 2837 + }, + { + "epoch": 0.7536847696189085, + "grad_norm": 1.1290911495331684, + "learning_rate": 1.4712152123405018e-05, + "loss": 0.32532355189323425, + "step": 2838 + }, + { + "epoch": 0.7539503386004515, + "grad_norm": 1.1212187873017119, + "learning_rate": 1.4708278934003835e-05, + "loss": 0.31663140654563904, + "step": 2839 + }, + { + "epoch": 0.7542159075819944, + "grad_norm": 1.123142254862964, + "learning_rate": 1.4704404836902005e-05, + "loss": 0.30552318692207336, + "step": 2840 + }, + { + "epoch": 0.7544814765635374, + "grad_norm": 1.1574657252500693, + "learning_rate": 1.47005298328464e-05, + "loss": 0.3019601106643677, + "step": 2841 + }, + { + "epoch": 0.7547470455450803, + "grad_norm": 1.0814580547673966, + "learning_rate": 1.4696653922584084e-05, + "loss": 0.321606308221817, + "step": 2842 + }, + { + "epoch": 0.7550126145266233, + "grad_norm": 1.138590953455986, + "learning_rate": 1.4692777106862281e-05, + "loss": 0.2709462642669678, + "step": 2843 + }, + { + "epoch": 0.7552781835081662, + "grad_norm": 1.1366302949330385, + "learning_rate": 1.46888993864284e-05, + "loss": 0.2882609963417053, + "step": 2844 + }, + { + "epoch": 0.7555437524897092, + "grad_norm": 0.9948609987035232, + "learning_rate": 1.4685020762030019e-05, + "loss": 0.25843000411987305, + "step": 2845 + }, + { + "epoch": 0.7558093214712521, + "grad_norm": 1.1002004205654323, + "learning_rate": 1.4681141234414889e-05, + "loss": 0.30962038040161133, + "step": 2846 + }, + { + "epoch": 0.7560748904527951, + "grad_norm": 1.2025960097123465, + "learning_rate": 1.4677260804330938e-05, + "loss": 0.304874062538147, + "step": 2847 + }, + { + "epoch": 0.756340459434338, + "grad_norm": 1.2287867091921092, + "learning_rate": 1.4673379472526268e-05, + "loss": 0.3425619602203369, + "step": 2848 + }, + { + "epoch": 0.756606028415881, + "grad_norm": 1.0701256182117689, + "learning_rate": 1.4669497239749153e-05, + "loss": 0.3002302050590515, + "step": 2849 + }, + { + "epoch": 0.7568715973974239, + "grad_norm": 1.1005370830207322, + "learning_rate": 1.4665614106748038e-05, + "loss": 0.31008803844451904, + "step": 2850 + }, + { + "epoch": 0.7571371663789669, + "grad_norm": 1.0175712407141912, + "learning_rate": 1.4661730074271551e-05, + "loss": 0.27829408645629883, + "step": 2851 + }, + { + "epoch": 0.7574027353605098, + "grad_norm": 1.0501959661073665, + "learning_rate": 1.4657845143068488e-05, + "loss": 0.25915467739105225, + "step": 2852 + }, + { + "epoch": 0.7576683043420529, + "grad_norm": 1.0719536636155031, + "learning_rate": 1.4653959313887813e-05, + "loss": 0.2843416929244995, + "step": 2853 + }, + { + "epoch": 0.7579338733235959, + "grad_norm": 1.0489373710223147, + "learning_rate": 1.465007258747867e-05, + "loss": 0.2851647138595581, + "step": 2854 + }, + { + "epoch": 0.7581994423051388, + "grad_norm": 1.085754694338766, + "learning_rate": 1.4646184964590378e-05, + "loss": 0.266017884016037, + "step": 2855 + }, + { + "epoch": 0.7584650112866818, + "grad_norm": 1.0789098348141843, + "learning_rate": 1.4642296445972421e-05, + "loss": 0.30142179131507874, + "step": 2856 + }, + { + "epoch": 0.7587305802682247, + "grad_norm": 0.9904299934324251, + "learning_rate": 1.463840703237446e-05, + "loss": 0.2878327965736389, + "step": 2857 + }, + { + "epoch": 0.7589961492497677, + "grad_norm": 1.114310168260114, + "learning_rate": 1.4634516724546326e-05, + "loss": 0.2919169068336487, + "step": 2858 + }, + { + "epoch": 0.7592617182313106, + "grad_norm": 0.9954308342175644, + "learning_rate": 1.4630625523238027e-05, + "loss": 0.2530924081802368, + "step": 2859 + }, + { + "epoch": 0.7595272872128536, + "grad_norm": 1.0858688189416337, + "learning_rate": 1.462673342919974e-05, + "loss": 0.3009106516838074, + "step": 2860 + }, + { + "epoch": 0.7597928561943965, + "grad_norm": 1.1572533440881312, + "learning_rate": 1.4622840443181817e-05, + "loss": 0.3114222288131714, + "step": 2861 + }, + { + "epoch": 0.7600584251759395, + "grad_norm": 1.2224434370177688, + "learning_rate": 1.4618946565934775e-05, + "loss": 0.344540536403656, + "step": 2862 + }, + { + "epoch": 0.7603239941574824, + "grad_norm": 1.0685722656113568, + "learning_rate": 1.4615051798209312e-05, + "loss": 0.263607919216156, + "step": 2863 + }, + { + "epoch": 0.7605895631390254, + "grad_norm": 1.018611353798299, + "learning_rate": 1.4611156140756293e-05, + "loss": 0.2685706317424774, + "step": 2864 + }, + { + "epoch": 0.7608551321205683, + "grad_norm": 1.1431197890714058, + "learning_rate": 1.4607259594326752e-05, + "loss": 0.32342326641082764, + "step": 2865 + }, + { + "epoch": 0.7611207011021113, + "grad_norm": 1.182050624874759, + "learning_rate": 1.4603362159671902e-05, + "loss": 0.3088849186897278, + "step": 2866 + }, + { + "epoch": 0.7613862700836542, + "grad_norm": 1.0482348167122462, + "learning_rate": 1.4599463837543114e-05, + "loss": 0.26718589663505554, + "step": 2867 + }, + { + "epoch": 0.7616518390651972, + "grad_norm": 1.0051992534296357, + "learning_rate": 1.4595564628691944e-05, + "loss": 0.29511263966560364, + "step": 2868 + }, + { + "epoch": 0.7619174080467401, + "grad_norm": 1.0974088254649037, + "learning_rate": 1.4591664533870118e-05, + "loss": 0.2940484285354614, + "step": 2869 + }, + { + "epoch": 0.7621829770282831, + "grad_norm": 1.1564456059915547, + "learning_rate": 1.4587763553829521e-05, + "loss": 0.28167295455932617, + "step": 2870 + }, + { + "epoch": 0.762448546009826, + "grad_norm": 1.0590804851451585, + "learning_rate": 1.4583861689322219e-05, + "loss": 0.3362962007522583, + "step": 2871 + }, + { + "epoch": 0.762714114991369, + "grad_norm": 1.1206777555300773, + "learning_rate": 1.4579958941100445e-05, + "loss": 0.3003339171409607, + "step": 2872 + }, + { + "epoch": 0.7629796839729119, + "grad_norm": 1.0572512051509857, + "learning_rate": 1.4576055309916602e-05, + "loss": 0.3191443979740143, + "step": 2873 + }, + { + "epoch": 0.7632452529544549, + "grad_norm": 1.0684782615871369, + "learning_rate": 1.4572150796523265e-05, + "loss": 0.30804574489593506, + "step": 2874 + }, + { + "epoch": 0.7635108219359978, + "grad_norm": 1.0214046475154577, + "learning_rate": 1.4568245401673178e-05, + "loss": 0.32462549209594727, + "step": 2875 + }, + { + "epoch": 0.7637763909175408, + "grad_norm": 1.1357318078490404, + "learning_rate": 1.4564339126119254e-05, + "loss": 0.27751386165618896, + "step": 2876 + }, + { + "epoch": 0.7640419598990837, + "grad_norm": 1.0701221152994065, + "learning_rate": 1.4560431970614578e-05, + "loss": 0.27194011211395264, + "step": 2877 + }, + { + "epoch": 0.7643075288806267, + "grad_norm": 1.134082938487784, + "learning_rate": 1.4556523935912406e-05, + "loss": 0.28701072931289673, + "step": 2878 + }, + { + "epoch": 0.7645730978621696, + "grad_norm": 1.0814539768930527, + "learning_rate": 1.4552615022766156e-05, + "loss": 0.3278783857822418, + "step": 2879 + }, + { + "epoch": 0.7648386668437126, + "grad_norm": 1.096499511679905, + "learning_rate": 1.4548705231929426e-05, + "loss": 0.3292006254196167, + "step": 2880 + }, + { + "epoch": 0.7651042358252557, + "grad_norm": 1.30563906707581, + "learning_rate": 1.4544794564155971e-05, + "loss": 0.33038759231567383, + "step": 2881 + }, + { + "epoch": 0.7653698048067986, + "grad_norm": 1.0799053745016685, + "learning_rate": 1.4540883020199725e-05, + "loss": 0.29183000326156616, + "step": 2882 + }, + { + "epoch": 0.7656353737883416, + "grad_norm": 1.049945067498866, + "learning_rate": 1.4536970600814789e-05, + "loss": 0.28066399693489075, + "step": 2883 + }, + { + "epoch": 0.7659009427698845, + "grad_norm": 1.0673215015420034, + "learning_rate": 1.4533057306755427e-05, + "loss": 0.2832046151161194, + "step": 2884 + }, + { + "epoch": 0.7661665117514275, + "grad_norm": 1.0799218487874103, + "learning_rate": 1.4529143138776078e-05, + "loss": 0.3006540834903717, + "step": 2885 + }, + { + "epoch": 0.7664320807329704, + "grad_norm": 0.965945374746046, + "learning_rate": 1.4525228097631351e-05, + "loss": 0.2793240547180176, + "step": 2886 + }, + { + "epoch": 0.7666976497145134, + "grad_norm": 1.0791298696355873, + "learning_rate": 1.452131218407602e-05, + "loss": 0.2895192503929138, + "step": 2887 + }, + { + "epoch": 0.7669632186960563, + "grad_norm": 1.1085071656285739, + "learning_rate": 1.4517395398865022e-05, + "loss": 0.27707618474960327, + "step": 2888 + }, + { + "epoch": 0.7672287876775993, + "grad_norm": 0.9801959170871006, + "learning_rate": 1.4513477742753465e-05, + "loss": 0.29167065024375916, + "step": 2889 + }, + { + "epoch": 0.7674943566591422, + "grad_norm": 0.9760628575291594, + "learning_rate": 1.4509559216496631e-05, + "loss": 0.2670987844467163, + "step": 2890 + }, + { + "epoch": 0.7677599256406852, + "grad_norm": 1.0541213606202946, + "learning_rate": 1.4505639820849968e-05, + "loss": 0.3025206923484802, + "step": 2891 + }, + { + "epoch": 0.7680254946222281, + "grad_norm": 1.0721054101606857, + "learning_rate": 1.4501719556569087e-05, + "loss": 0.3104705512523651, + "step": 2892 + }, + { + "epoch": 0.7682910636037711, + "grad_norm": 1.1715745485021363, + "learning_rate": 1.4497798424409766e-05, + "loss": 0.2972267270088196, + "step": 2893 + }, + { + "epoch": 0.768556632585314, + "grad_norm": 1.3084992927105763, + "learning_rate": 1.4493876425127957e-05, + "loss": 0.34956347942352295, + "step": 2894 + }, + { + "epoch": 0.768822201566857, + "grad_norm": 1.0910589486872886, + "learning_rate": 1.4489953559479775e-05, + "loss": 0.3122873902320862, + "step": 2895 + }, + { + "epoch": 0.7690877705483999, + "grad_norm": 1.0070263080445798, + "learning_rate": 1.4486029828221497e-05, + "loss": 0.29645755887031555, + "step": 2896 + }, + { + "epoch": 0.7693533395299429, + "grad_norm": 1.1312479199974272, + "learning_rate": 1.448210523210958e-05, + "loss": 0.33357223868370056, + "step": 2897 + }, + { + "epoch": 0.7696189085114858, + "grad_norm": 1.0807209302083978, + "learning_rate": 1.4478179771900634e-05, + "loss": 0.2780191898345947, + "step": 2898 + }, + { + "epoch": 0.7698844774930288, + "grad_norm": 1.098992372480737, + "learning_rate": 1.447425344835144e-05, + "loss": 0.31503236293792725, + "step": 2899 + }, + { + "epoch": 0.7701500464745717, + "grad_norm": 1.0152023365250116, + "learning_rate": 1.4470326262218955e-05, + "loss": 0.2843332290649414, + "step": 2900 + }, + { + "epoch": 0.7704156154561147, + "grad_norm": 1.1041753681410225, + "learning_rate": 1.4466398214260286e-05, + "loss": 0.305475652217865, + "step": 2901 + }, + { + "epoch": 0.7706811844376577, + "grad_norm": 1.0159008972115877, + "learning_rate": 1.446246930523272e-05, + "loss": 0.28418007493019104, + "step": 2902 + }, + { + "epoch": 0.7709467534192006, + "grad_norm": 2.0289726917266027, + "learning_rate": 1.44585395358937e-05, + "loss": 0.28237032890319824, + "step": 2903 + }, + { + "epoch": 0.7712123224007436, + "grad_norm": 1.1334683720848762, + "learning_rate": 1.4454608907000843e-05, + "loss": 0.33727777004241943, + "step": 2904 + }, + { + "epoch": 0.7714778913822865, + "grad_norm": 1.1393257541232447, + "learning_rate": 1.4450677419311925e-05, + "loss": 0.2977198660373688, + "step": 2905 + }, + { + "epoch": 0.7717434603638295, + "grad_norm": 1.0793508547506123, + "learning_rate": 1.4446745073584891e-05, + "loss": 0.3095981776714325, + "step": 2906 + }, + { + "epoch": 0.7720090293453724, + "grad_norm": 1.138471500425881, + "learning_rate": 1.4442811870577851e-05, + "loss": 0.29808440804481506, + "step": 2907 + }, + { + "epoch": 0.7722745983269154, + "grad_norm": 1.2668271633221484, + "learning_rate": 1.4438877811049079e-05, + "loss": 0.32444530725479126, + "step": 2908 + }, + { + "epoch": 0.7725401673084584, + "grad_norm": 1.0229226464155372, + "learning_rate": 1.443494289575702e-05, + "loss": 0.24782602488994598, + "step": 2909 + }, + { + "epoch": 0.7728057362900014, + "grad_norm": 1.079755307057506, + "learning_rate": 1.4431007125460274e-05, + "loss": 0.31289762258529663, + "step": 2910 + }, + { + "epoch": 0.7730713052715443, + "grad_norm": 1.0928540626872372, + "learning_rate": 1.4427070500917615e-05, + "loss": 0.31444042921066284, + "step": 2911 + }, + { + "epoch": 0.7733368742530873, + "grad_norm": 1.1235251868548595, + "learning_rate": 1.4423133022887973e-05, + "loss": 0.31347882747650146, + "step": 2912 + }, + { + "epoch": 0.7736024432346302, + "grad_norm": 1.1449169077961199, + "learning_rate": 1.4419194692130453e-05, + "loss": 0.3025411367416382, + "step": 2913 + }, + { + "epoch": 0.7738680122161732, + "grad_norm": 0.9734590933720824, + "learning_rate": 1.4415255509404316e-05, + "loss": 0.2954581081867218, + "step": 2914 + }, + { + "epoch": 0.7741335811977161, + "grad_norm": 1.051295802747811, + "learning_rate": 1.4411315475468988e-05, + "loss": 0.2675531506538391, + "step": 2915 + }, + { + "epoch": 0.7743991501792591, + "grad_norm": 1.0207923958770302, + "learning_rate": 1.4407374591084064e-05, + "loss": 0.29307854175567627, + "step": 2916 + }, + { + "epoch": 0.774664719160802, + "grad_norm": 0.9134258889524259, + "learning_rate": 1.4403432857009295e-05, + "loss": 0.2805953025817871, + "step": 2917 + }, + { + "epoch": 0.774930288142345, + "grad_norm": 1.1114518211112974, + "learning_rate": 1.439949027400461e-05, + "loss": 0.30805838108062744, + "step": 2918 + }, + { + "epoch": 0.7751958571238879, + "grad_norm": 1.063187320260136, + "learning_rate": 1.4395546842830085e-05, + "loss": 0.31501835584640503, + "step": 2919 + }, + { + "epoch": 0.7754614261054309, + "grad_norm": 1.025310766436644, + "learning_rate": 1.4391602564245975e-05, + "loss": 0.2719186246395111, + "step": 2920 + }, + { + "epoch": 0.7757269950869738, + "grad_norm": 1.0474571998069828, + "learning_rate": 1.4387657439012677e-05, + "loss": 0.29554325342178345, + "step": 2921 + }, + { + "epoch": 0.7759925640685168, + "grad_norm": 1.0103166752174864, + "learning_rate": 1.4383711467890776e-05, + "loss": 0.2993816137313843, + "step": 2922 + }, + { + "epoch": 0.7762581330500598, + "grad_norm": 1.087143911717871, + "learning_rate": 1.4379764651641004e-05, + "loss": 0.3412264883518219, + "step": 2923 + }, + { + "epoch": 0.7765237020316027, + "grad_norm": 1.3163055539647115, + "learning_rate": 1.4375816991024263e-05, + "loss": 0.3137913942337036, + "step": 2924 + }, + { + "epoch": 0.7767892710131457, + "grad_norm": 1.0026858390591848, + "learning_rate": 1.4371868486801611e-05, + "loss": 0.2710151672363281, + "step": 2925 + }, + { + "epoch": 0.7770548399946886, + "grad_norm": 1.060508746597415, + "learning_rate": 1.4367919139734279e-05, + "loss": 0.28521692752838135, + "step": 2926 + }, + { + "epoch": 0.7773204089762316, + "grad_norm": 0.9938687291505847, + "learning_rate": 1.4363968950583651e-05, + "loss": 0.2889919579029083, + "step": 2927 + }, + { + "epoch": 0.7775859779577745, + "grad_norm": 1.0641534591195945, + "learning_rate": 1.436001792011128e-05, + "loss": 0.31562381982803345, + "step": 2928 + }, + { + "epoch": 0.7778515469393175, + "grad_norm": 0.980719397790632, + "learning_rate": 1.4356066049078871e-05, + "loss": 0.2747528553009033, + "step": 2929 + }, + { + "epoch": 0.7781171159208604, + "grad_norm": 1.0890864939874727, + "learning_rate": 1.4352113338248303e-05, + "loss": 0.2918938398361206, + "step": 2930 + }, + { + "epoch": 0.7783826849024034, + "grad_norm": 1.1375978489291394, + "learning_rate": 1.4348159788381615e-05, + "loss": 0.3348507285118103, + "step": 2931 + }, + { + "epoch": 0.7786482538839463, + "grad_norm": 1.049930284325584, + "learning_rate": 1.4344205400241e-05, + "loss": 0.27206242084503174, + "step": 2932 + }, + { + "epoch": 0.7789138228654893, + "grad_norm": 1.0635705360778813, + "learning_rate": 1.434025017458882e-05, + "loss": 0.28496092557907104, + "step": 2933 + }, + { + "epoch": 0.7791793918470322, + "grad_norm": 1.1207237235097192, + "learning_rate": 1.4336294112187595e-05, + "loss": 0.3080131411552429, + "step": 2934 + }, + { + "epoch": 0.7794449608285752, + "grad_norm": 1.1562549835000784, + "learning_rate": 1.4332337213800008e-05, + "loss": 0.3116779029369354, + "step": 2935 + }, + { + "epoch": 0.7797105298101181, + "grad_norm": 1.0230593279992428, + "learning_rate": 1.43283794801889e-05, + "loss": 0.26526543498039246, + "step": 2936 + }, + { + "epoch": 0.7799760987916612, + "grad_norm": 1.0768548459396885, + "learning_rate": 1.4324420912117274e-05, + "loss": 0.2829325497150421, + "step": 2937 + }, + { + "epoch": 0.7802416677732041, + "grad_norm": 1.197165846783245, + "learning_rate": 1.43204615103483e-05, + "loss": 0.34146445989608765, + "step": 2938 + }, + { + "epoch": 0.7805072367547471, + "grad_norm": 1.1418950254878286, + "learning_rate": 1.43165012756453e-05, + "loss": 0.316609650850296, + "step": 2939 + }, + { + "epoch": 0.78077280573629, + "grad_norm": 1.119861281862994, + "learning_rate": 1.4312540208771766e-05, + "loss": 0.3215107321739197, + "step": 2940 + }, + { + "epoch": 0.781038374717833, + "grad_norm": 1.0591732101512668, + "learning_rate": 1.4308578310491342e-05, + "loss": 0.2834000587463379, + "step": 2941 + }, + { + "epoch": 0.781303943699376, + "grad_norm": 1.1186376453102755, + "learning_rate": 1.430461558156783e-05, + "loss": 0.30184993147850037, + "step": 2942 + }, + { + "epoch": 0.7815695126809189, + "grad_norm": 1.1319557052801907, + "learning_rate": 1.4300652022765207e-05, + "loss": 0.3299996256828308, + "step": 2943 + }, + { + "epoch": 0.7818350816624619, + "grad_norm": 1.1269288601015153, + "learning_rate": 1.4296687634847592e-05, + "loss": 0.27565228939056396, + "step": 2944 + }, + { + "epoch": 0.7821006506440048, + "grad_norm": 1.1019395409868211, + "learning_rate": 1.4292722418579278e-05, + "loss": 0.30347493290901184, + "step": 2945 + }, + { + "epoch": 0.7823662196255478, + "grad_norm": 1.125677517693181, + "learning_rate": 1.4288756374724709e-05, + "loss": 0.31469428539276123, + "step": 2946 + }, + { + "epoch": 0.7826317886070907, + "grad_norm": 1.0500101449680372, + "learning_rate": 1.4284789504048493e-05, + "loss": 0.27361029386520386, + "step": 2947 + }, + { + "epoch": 0.7828973575886337, + "grad_norm": 1.057442611584268, + "learning_rate": 1.428082180731539e-05, + "loss": 0.29180705547332764, + "step": 2948 + }, + { + "epoch": 0.7831629265701766, + "grad_norm": 1.0218659697209738, + "learning_rate": 1.4276853285290334e-05, + "loss": 0.281120628118515, + "step": 2949 + }, + { + "epoch": 0.7834284955517196, + "grad_norm": 1.0029783457826962, + "learning_rate": 1.4272883938738406e-05, + "loss": 0.26144471764564514, + "step": 2950 + }, + { + "epoch": 0.7836940645332625, + "grad_norm": 1.0904458839940374, + "learning_rate": 1.4268913768424848e-05, + "loss": 0.3118991255760193, + "step": 2951 + }, + { + "epoch": 0.7839596335148055, + "grad_norm": 1.0581869365443632, + "learning_rate": 1.4264942775115065e-05, + "loss": 0.29352328181266785, + "step": 2952 + }, + { + "epoch": 0.7842252024963484, + "grad_norm": 1.025234952757571, + "learning_rate": 1.426097095957461e-05, + "loss": 0.2687748968601227, + "step": 2953 + }, + { + "epoch": 0.7844907714778914, + "grad_norm": 1.0817782920006436, + "learning_rate": 1.4256998322569212e-05, + "loss": 0.3106890916824341, + "step": 2954 + }, + { + "epoch": 0.7847563404594343, + "grad_norm": 1.0039841255701216, + "learning_rate": 1.4253024864864742e-05, + "loss": 0.2522161304950714, + "step": 2955 + }, + { + "epoch": 0.7850219094409773, + "grad_norm": 1.031799618380073, + "learning_rate": 1.424905058722724e-05, + "loss": 0.2994377613067627, + "step": 2956 + }, + { + "epoch": 0.7852874784225202, + "grad_norm": 1.295564211303899, + "learning_rate": 1.4245075490422893e-05, + "loss": 0.3753565549850464, + "step": 2957 + }, + { + "epoch": 0.7855530474040632, + "grad_norm": 1.2386689798654595, + "learning_rate": 1.424109957521806e-05, + "loss": 0.29544737935066223, + "step": 2958 + }, + { + "epoch": 0.7858186163856061, + "grad_norm": 1.0381164701705432, + "learning_rate": 1.423712284237925e-05, + "loss": 0.307847797870636, + "step": 2959 + }, + { + "epoch": 0.7860841853671491, + "grad_norm": 1.1107576873332587, + "learning_rate": 1.4233145292673127e-05, + "loss": 0.31758183240890503, + "step": 2960 + }, + { + "epoch": 0.786349754348692, + "grad_norm": 1.0358601319268448, + "learning_rate": 1.4229166926866517e-05, + "loss": 0.307254433631897, + "step": 2961 + }, + { + "epoch": 0.786615323330235, + "grad_norm": 1.2228062733167704, + "learning_rate": 1.42251877457264e-05, + "loss": 0.3513748049736023, + "step": 2962 + }, + { + "epoch": 0.7868808923117779, + "grad_norm": 1.1359729522705007, + "learning_rate": 1.422120775001992e-05, + "loss": 0.3025718629360199, + "step": 2963 + }, + { + "epoch": 0.7871464612933209, + "grad_norm": 1.076503168390535, + "learning_rate": 1.4217226940514367e-05, + "loss": 0.2922811508178711, + "step": 2964 + }, + { + "epoch": 0.787412030274864, + "grad_norm": 1.07297262661661, + "learning_rate": 1.42132453179772e-05, + "loss": 0.29599297046661377, + "step": 2965 + }, + { + "epoch": 0.7876775992564069, + "grad_norm": 0.992121967255531, + "learning_rate": 1.4209262883176025e-05, + "loss": 0.28336548805236816, + "step": 2966 + }, + { + "epoch": 0.7879431682379499, + "grad_norm": 1.0655541697156172, + "learning_rate": 1.4205279636878613e-05, + "loss": 0.3100801110267639, + "step": 2967 + }, + { + "epoch": 0.7882087372194928, + "grad_norm": 1.165527486411767, + "learning_rate": 1.4201295579852881e-05, + "loss": 0.33067989349365234, + "step": 2968 + }, + { + "epoch": 0.7884743062010358, + "grad_norm": 1.1896877635723886, + "learning_rate": 1.4197310712866909e-05, + "loss": 0.282347172498703, + "step": 2969 + }, + { + "epoch": 0.7887398751825787, + "grad_norm": 1.0769183433483809, + "learning_rate": 1.419332503668894e-05, + "loss": 0.30585426092147827, + "step": 2970 + }, + { + "epoch": 0.7890054441641217, + "grad_norm": 1.0616062054836604, + "learning_rate": 1.4189338552087351e-05, + "loss": 0.3011561632156372, + "step": 2971 + }, + { + "epoch": 0.7892710131456646, + "grad_norm": 0.9722574451184507, + "learning_rate": 1.4185351259830705e-05, + "loss": 0.2700524926185608, + "step": 2972 + }, + { + "epoch": 0.7895365821272076, + "grad_norm": 1.0849811262666431, + "learning_rate": 1.4181363160687693e-05, + "loss": 0.2963382303714752, + "step": 2973 + }, + { + "epoch": 0.7898021511087505, + "grad_norm": 1.0388990841328773, + "learning_rate": 1.4177374255427183e-05, + "loss": 0.27132824063301086, + "step": 2974 + }, + { + "epoch": 0.7900677200902935, + "grad_norm": 0.9602477794817199, + "learning_rate": 1.417338454481818e-05, + "loss": 0.2539706826210022, + "step": 2975 + }, + { + "epoch": 0.7903332890718364, + "grad_norm": 1.0972216427869486, + "learning_rate": 1.416939402962986e-05, + "loss": 0.28465601801872253, + "step": 2976 + }, + { + "epoch": 0.7905988580533794, + "grad_norm": 1.1885027397372414, + "learning_rate": 1.4165402710631544e-05, + "loss": 0.3020748198032379, + "step": 2977 + }, + { + "epoch": 0.7908644270349223, + "grad_norm": 1.0709231597298363, + "learning_rate": 1.416141058859271e-05, + "loss": 0.3157690465450287, + "step": 2978 + }, + { + "epoch": 0.7911299960164653, + "grad_norm": 1.0874979641604023, + "learning_rate": 1.4157417664282994e-05, + "loss": 0.2720191776752472, + "step": 2979 + }, + { + "epoch": 0.7913955649980082, + "grad_norm": 1.0670143355557837, + "learning_rate": 1.4153423938472185e-05, + "loss": 0.2931746542453766, + "step": 2980 + }, + { + "epoch": 0.7916611339795512, + "grad_norm": 1.0836941185599118, + "learning_rate": 1.4149429411930226e-05, + "loss": 0.2683875560760498, + "step": 2981 + }, + { + "epoch": 0.7919267029610941, + "grad_norm": 1.0454189872619364, + "learning_rate": 1.4145434085427216e-05, + "loss": 0.2559819519519806, + "step": 2982 + }, + { + "epoch": 0.7921922719426371, + "grad_norm": 1.1028368657772893, + "learning_rate": 1.4141437959733404e-05, + "loss": 0.2845582365989685, + "step": 2983 + }, + { + "epoch": 0.79245784092418, + "grad_norm": 1.05827279827959, + "learning_rate": 1.4137441035619197e-05, + "loss": 0.26766544580459595, + "step": 2984 + }, + { + "epoch": 0.792723409905723, + "grad_norm": 1.2459472391823172, + "learning_rate": 1.4133443313855155e-05, + "loss": 0.32089024782180786, + "step": 2985 + }, + { + "epoch": 0.7929889788872659, + "grad_norm": 1.053106908199776, + "learning_rate": 1.4129444795211993e-05, + "loss": 0.2756182551383972, + "step": 2986 + }, + { + "epoch": 0.7932545478688089, + "grad_norm": 1.231241306668284, + "learning_rate": 1.4125445480460573e-05, + "loss": 0.29487302899360657, + "step": 2987 + }, + { + "epoch": 0.7935201168503518, + "grad_norm": 1.1738297230948855, + "learning_rate": 1.4121445370371922e-05, + "loss": 0.3362561762332916, + "step": 2988 + }, + { + "epoch": 0.7937856858318948, + "grad_norm": 1.1591988507026376, + "learning_rate": 1.4117444465717209e-05, + "loss": 0.2986692488193512, + "step": 2989 + }, + { + "epoch": 0.7940512548134377, + "grad_norm": 1.0341012671875776, + "learning_rate": 1.4113442767267766e-05, + "loss": 0.2725266218185425, + "step": 2990 + }, + { + "epoch": 0.7943168237949807, + "grad_norm": 1.1125466640148414, + "learning_rate": 1.4109440275795071e-05, + "loss": 0.29827257990837097, + "step": 2991 + }, + { + "epoch": 0.7945823927765236, + "grad_norm": 1.0512885973195232, + "learning_rate": 1.410543699207076e-05, + "loss": 0.2506203055381775, + "step": 2992 + }, + { + "epoch": 0.7948479617580667, + "grad_norm": 0.9867416114744889, + "learning_rate": 1.410143291686661e-05, + "loss": 0.2675034701824188, + "step": 2993 + }, + { + "epoch": 0.7951135307396097, + "grad_norm": 1.1763547306282318, + "learning_rate": 1.4097428050954571e-05, + "loss": 0.34528690576553345, + "step": 2994 + }, + { + "epoch": 0.7953790997211526, + "grad_norm": 1.1374135219725177, + "learning_rate": 1.4093422395106726e-05, + "loss": 0.27551063895225525, + "step": 2995 + }, + { + "epoch": 0.7956446687026956, + "grad_norm": 1.1195982376159075, + "learning_rate": 1.408941595009532e-05, + "loss": 0.3176268935203552, + "step": 2996 + }, + { + "epoch": 0.7959102376842385, + "grad_norm": 1.1804373403956752, + "learning_rate": 1.408540871669275e-05, + "loss": 0.30056723952293396, + "step": 2997 + }, + { + "epoch": 0.7961758066657815, + "grad_norm": 1.124570387942151, + "learning_rate": 1.4081400695671562e-05, + "loss": 0.32109886407852173, + "step": 2998 + }, + { + "epoch": 0.7964413756473244, + "grad_norm": 1.1262740571855958, + "learning_rate": 1.4077391887804457e-05, + "loss": 0.33622005581855774, + "step": 2999 + }, + { + "epoch": 0.7967069446288674, + "grad_norm": 1.1195153536613822, + "learning_rate": 1.4073382293864283e-05, + "loss": 0.3054961860179901, + "step": 3000 + }, + { + "epoch": 0.7969725136104103, + "grad_norm": 1.1210721039096916, + "learning_rate": 1.4069371914624044e-05, + "loss": 0.3022462725639343, + "step": 3001 + }, + { + "epoch": 0.7972380825919533, + "grad_norm": 1.0116555063320039, + "learning_rate": 1.4065360750856891e-05, + "loss": 0.2500512897968292, + "step": 3002 + }, + { + "epoch": 0.7975036515734962, + "grad_norm": 1.233947002119444, + "learning_rate": 1.4061348803336135e-05, + "loss": 0.2960171699523926, + "step": 3003 + }, + { + "epoch": 0.7977692205550392, + "grad_norm": 3.53476121579318, + "learning_rate": 1.4057336072835228e-05, + "loss": 0.2941724359989166, + "step": 3004 + }, + { + "epoch": 0.7980347895365821, + "grad_norm": 1.0143157952003843, + "learning_rate": 1.4053322560127779e-05, + "loss": 0.2827858328819275, + "step": 3005 + }, + { + "epoch": 0.7983003585181251, + "grad_norm": 1.34417890867956, + "learning_rate": 1.4049308265987544e-05, + "loss": 0.32525116205215454, + "step": 3006 + }, + { + "epoch": 0.798565927499668, + "grad_norm": 1.1622605286979444, + "learning_rate": 1.4045293191188431e-05, + "loss": 0.26509979367256165, + "step": 3007 + }, + { + "epoch": 0.798831496481211, + "grad_norm": 1.1649049829769997, + "learning_rate": 1.4041277336504503e-05, + "loss": 0.3462742567062378, + "step": 3008 + }, + { + "epoch": 0.7990970654627539, + "grad_norm": 1.118975693723979, + "learning_rate": 1.4037260702709967e-05, + "loss": 0.2971092164516449, + "step": 3009 + }, + { + "epoch": 0.7993626344442969, + "grad_norm": 1.0541078602131526, + "learning_rate": 1.4033243290579182e-05, + "loss": 0.32359808683395386, + "step": 3010 + }, + { + "epoch": 0.7996282034258398, + "grad_norm": 0.9819968107477214, + "learning_rate": 1.4029225100886657e-05, + "loss": 0.2949031591415405, + "step": 3011 + }, + { + "epoch": 0.7998937724073828, + "grad_norm": 0.9639154080405838, + "learning_rate": 1.4025206134407051e-05, + "loss": 0.29888901114463806, + "step": 3012 + }, + { + "epoch": 0.8001593413889257, + "grad_norm": 1.0921369087209054, + "learning_rate": 1.4021186391915181e-05, + "loss": 0.2999705672264099, + "step": 3013 + }, + { + "epoch": 0.8004249103704687, + "grad_norm": 1.027092536189555, + "learning_rate": 1.4017165874185996e-05, + "loss": 0.2725638449192047, + "step": 3014 + }, + { + "epoch": 0.8006904793520117, + "grad_norm": 1.6251260873819724, + "learning_rate": 1.4013144581994609e-05, + "loss": 0.2809314727783203, + "step": 3015 + }, + { + "epoch": 0.8009560483335546, + "grad_norm": 1.194026798460289, + "learning_rate": 1.400912251611628e-05, + "loss": 0.30335327982902527, + "step": 3016 + }, + { + "epoch": 0.8012216173150976, + "grad_norm": 1.0526756572542106, + "learning_rate": 1.400509967732641e-05, + "loss": 0.27780598402023315, + "step": 3017 + }, + { + "epoch": 0.8014871862966405, + "grad_norm": 1.0036615790617616, + "learning_rate": 1.400107606640056e-05, + "loss": 0.2865309715270996, + "step": 3018 + }, + { + "epoch": 0.8017527552781835, + "grad_norm": 1.067182271229665, + "learning_rate": 1.3997051684114431e-05, + "loss": 0.2691546082496643, + "step": 3019 + }, + { + "epoch": 0.8020183242597264, + "grad_norm": 1.0174199108878024, + "learning_rate": 1.3993026531243876e-05, + "loss": 0.30289226770401, + "step": 3020 + }, + { + "epoch": 0.8022838932412695, + "grad_norm": 1.1180967643802684, + "learning_rate": 1.3989000608564905e-05, + "loss": 0.2767682671546936, + "step": 3021 + }, + { + "epoch": 0.8025494622228124, + "grad_norm": 1.1982508587685934, + "learning_rate": 1.3984973916853657e-05, + "loss": 0.3423742353916168, + "step": 3022 + }, + { + "epoch": 0.8028150312043554, + "grad_norm": 1.1718790013716964, + "learning_rate": 1.3980946456886439e-05, + "loss": 0.3000536561012268, + "step": 3023 + }, + { + "epoch": 0.8030806001858983, + "grad_norm": 1.1431161282459077, + "learning_rate": 1.3976918229439698e-05, + "loss": 0.3071063756942749, + "step": 3024 + }, + { + "epoch": 0.8033461691674413, + "grad_norm": 1.6885640285561154, + "learning_rate": 1.397288923529002e-05, + "loss": 0.31261157989501953, + "step": 3025 + }, + { + "epoch": 0.8036117381489842, + "grad_norm": 1.0076153318556622, + "learning_rate": 1.3968859475214156e-05, + "loss": 0.2658939063549042, + "step": 3026 + }, + { + "epoch": 0.8038773071305272, + "grad_norm": 1.0309089161631302, + "learning_rate": 1.3964828949988993e-05, + "loss": 0.2772905230522156, + "step": 3027 + }, + { + "epoch": 0.8041428761120701, + "grad_norm": 1.1271894525974708, + "learning_rate": 1.396079766039157e-05, + "loss": 0.2903479337692261, + "step": 3028 + }, + { + "epoch": 0.8044084450936131, + "grad_norm": 1.2165332424367126, + "learning_rate": 1.3956765607199069e-05, + "loss": 0.35709524154663086, + "step": 3029 + }, + { + "epoch": 0.804674014075156, + "grad_norm": 1.0863328323430816, + "learning_rate": 1.3952732791188828e-05, + "loss": 0.2929389774799347, + "step": 3030 + }, + { + "epoch": 0.804939583056699, + "grad_norm": 0.999480167032172, + "learning_rate": 1.3948699213138321e-05, + "loss": 0.2609884440898895, + "step": 3031 + }, + { + "epoch": 0.805205152038242, + "grad_norm": 1.0946442757602284, + "learning_rate": 1.394466487382518e-05, + "loss": 0.3026544749736786, + "step": 3032 + }, + { + "epoch": 0.8054707210197849, + "grad_norm": 1.0415601836945267, + "learning_rate": 1.394062977402717e-05, + "loss": 0.28281137347221375, + "step": 3033 + }, + { + "epoch": 0.8057362900013278, + "grad_norm": 0.9908513124522437, + "learning_rate": 1.3936593914522214e-05, + "loss": 0.26189178228378296, + "step": 3034 + }, + { + "epoch": 0.8060018589828708, + "grad_norm": 1.0541854732158313, + "learning_rate": 1.3932557296088383e-05, + "loss": 0.27987509965896606, + "step": 3035 + }, + { + "epoch": 0.8062674279644138, + "grad_norm": 0.9961129101435677, + "learning_rate": 1.3928519919503884e-05, + "loss": 0.2857724130153656, + "step": 3036 + }, + { + "epoch": 0.8065329969459567, + "grad_norm": 0.9752377302684325, + "learning_rate": 1.3924481785547076e-05, + "loss": 0.28102418780326843, + "step": 3037 + }, + { + "epoch": 0.8067985659274997, + "grad_norm": 1.06882045524996, + "learning_rate": 1.3920442894996464e-05, + "loss": 0.30250412225723267, + "step": 3038 + }, + { + "epoch": 0.8070641349090426, + "grad_norm": 0.9854538363943691, + "learning_rate": 1.3916403248630703e-05, + "loss": 0.28951483964920044, + "step": 3039 + }, + { + "epoch": 0.8073297038905856, + "grad_norm": 0.990016753911339, + "learning_rate": 1.3912362847228585e-05, + "loss": 0.28455328941345215, + "step": 3040 + }, + { + "epoch": 0.8075952728721285, + "grad_norm": 1.0887176497400486, + "learning_rate": 1.3908321691569048e-05, + "loss": 0.29541105031967163, + "step": 3041 + }, + { + "epoch": 0.8078608418536715, + "grad_norm": 1.162648796815669, + "learning_rate": 1.3904279782431187e-05, + "loss": 0.3057629466056824, + "step": 3042 + }, + { + "epoch": 0.8081264108352144, + "grad_norm": 1.0909846424659564, + "learning_rate": 1.3900237120594226e-05, + "loss": 0.3204082250595093, + "step": 3043 + }, + { + "epoch": 0.8083919798167574, + "grad_norm": 0.9793203113476959, + "learning_rate": 1.3896193706837551e-05, + "loss": 0.28629523515701294, + "step": 3044 + }, + { + "epoch": 0.8086575487983003, + "grad_norm": 1.1874958252714642, + "learning_rate": 1.389214954194068e-05, + "loss": 0.298164427280426, + "step": 3045 + }, + { + "epoch": 0.8089231177798433, + "grad_norm": 1.005892758898695, + "learning_rate": 1.3888104626683282e-05, + "loss": 0.27309298515319824, + "step": 3046 + }, + { + "epoch": 0.8091886867613862, + "grad_norm": 0.9950263488620656, + "learning_rate": 1.3884058961845166e-05, + "loss": 0.25635263323783875, + "step": 3047 + }, + { + "epoch": 0.8094542557429292, + "grad_norm": 1.002808171969614, + "learning_rate": 1.3880012548206292e-05, + "loss": 0.29926127195358276, + "step": 3048 + }, + { + "epoch": 0.8097198247244722, + "grad_norm": 0.9867331912864394, + "learning_rate": 1.387596538654676e-05, + "loss": 0.26633137464523315, + "step": 3049 + }, + { + "epoch": 0.8099853937060152, + "grad_norm": 1.0757993931692869, + "learning_rate": 1.387191747764681e-05, + "loss": 0.28725534677505493, + "step": 3050 + }, + { + "epoch": 0.8102509626875581, + "grad_norm": 1.4955713597704303, + "learning_rate": 1.3867868822286838e-05, + "loss": 0.3015314042568207, + "step": 3051 + }, + { + "epoch": 0.8105165316691011, + "grad_norm": 1.048643971484194, + "learning_rate": 1.3863819421247375e-05, + "loss": 0.3054691553115845, + "step": 3052 + }, + { + "epoch": 0.810782100650644, + "grad_norm": 1.1596568650600225, + "learning_rate": 1.3859769275309097e-05, + "loss": 0.26315444707870483, + "step": 3053 + }, + { + "epoch": 0.811047669632187, + "grad_norm": 1.024319547072995, + "learning_rate": 1.3855718385252824e-05, + "loss": 0.2973077595233917, + "step": 3054 + }, + { + "epoch": 0.81131323861373, + "grad_norm": 1.1845129171721744, + "learning_rate": 1.385166675185952e-05, + "loss": 0.32824432849884033, + "step": 3055 + }, + { + "epoch": 0.8115788075952729, + "grad_norm": 1.2351976774044444, + "learning_rate": 1.3847614375910292e-05, + "loss": 0.3127811849117279, + "step": 3056 + }, + { + "epoch": 0.8118443765768159, + "grad_norm": 1.0840317870226388, + "learning_rate": 1.384356125818639e-05, + "loss": 0.2631932497024536, + "step": 3057 + }, + { + "epoch": 0.8121099455583588, + "grad_norm": 1.0251225163823416, + "learning_rate": 1.3839507399469213e-05, + "loss": 0.2856106162071228, + "step": 3058 + }, + { + "epoch": 0.8123755145399018, + "grad_norm": 1.2604810760435325, + "learning_rate": 1.3835452800540288e-05, + "loss": 0.28986629843711853, + "step": 3059 + }, + { + "epoch": 0.8126410835214447, + "grad_norm": 1.0804422287227695, + "learning_rate": 1.3831397462181298e-05, + "loss": 0.28411972522735596, + "step": 3060 + }, + { + "epoch": 0.8129066525029877, + "grad_norm": 1.117697190248139, + "learning_rate": 1.3827341385174063e-05, + "loss": 0.3234354853630066, + "step": 3061 + }, + { + "epoch": 0.8131722214845306, + "grad_norm": 0.9917598533716923, + "learning_rate": 1.3823284570300551e-05, + "loss": 0.24779736995697021, + "step": 3062 + }, + { + "epoch": 0.8134377904660736, + "grad_norm": 1.1743500466494587, + "learning_rate": 1.3819227018342865e-05, + "loss": 0.3306904137134552, + "step": 3063 + }, + { + "epoch": 0.8137033594476165, + "grad_norm": 1.1120224667451313, + "learning_rate": 1.3815168730083254e-05, + "loss": 0.31705451011657715, + "step": 3064 + }, + { + "epoch": 0.8139689284291595, + "grad_norm": 1.1351768868234977, + "learning_rate": 1.3811109706304105e-05, + "loss": 0.29830047488212585, + "step": 3065 + }, + { + "epoch": 0.8142344974107024, + "grad_norm": 1.1496885073051233, + "learning_rate": 1.3807049947787954e-05, + "loss": 0.30605942010879517, + "step": 3066 + }, + { + "epoch": 0.8145000663922454, + "grad_norm": 1.0745429008877887, + "learning_rate": 1.3802989455317475e-05, + "loss": 0.3139193058013916, + "step": 3067 + }, + { + "epoch": 0.8147656353737883, + "grad_norm": 1.0541430221228831, + "learning_rate": 1.3798928229675478e-05, + "loss": 0.3175879716873169, + "step": 3068 + }, + { + "epoch": 0.8150312043553313, + "grad_norm": 1.0450888698469754, + "learning_rate": 1.3794866271644922e-05, + "loss": 0.26391106843948364, + "step": 3069 + }, + { + "epoch": 0.8152967733368742, + "grad_norm": 0.945534402365018, + "learning_rate": 1.3790803582008906e-05, + "loss": 0.24128863215446472, + "step": 3070 + }, + { + "epoch": 0.8155623423184172, + "grad_norm": 1.1627322372772537, + "learning_rate": 1.378674016155067e-05, + "loss": 0.3249368965625763, + "step": 3071 + }, + { + "epoch": 0.8158279112999601, + "grad_norm": 1.0060562228451158, + "learning_rate": 1.3782676011053592e-05, + "loss": 0.2871986925601959, + "step": 3072 + }, + { + "epoch": 0.8160934802815031, + "grad_norm": 1.1624248444882197, + "learning_rate": 1.377861113130119e-05, + "loss": 0.29047372937202454, + "step": 3073 + }, + { + "epoch": 0.816359049263046, + "grad_norm": 1.0925698386610025, + "learning_rate": 1.3774545523077122e-05, + "loss": 0.3055281341075897, + "step": 3074 + }, + { + "epoch": 0.816624618244589, + "grad_norm": 0.9197098274775629, + "learning_rate": 1.37704791871652e-05, + "loss": 0.2565494179725647, + "step": 3075 + }, + { + "epoch": 0.8168901872261319, + "grad_norm": 1.0377185359248249, + "learning_rate": 1.3766412124349358e-05, + "loss": 0.3016049861907959, + "step": 3076 + }, + { + "epoch": 0.8171557562076749, + "grad_norm": 1.0790995041055653, + "learning_rate": 1.3762344335413677e-05, + "loss": 0.3021200895309448, + "step": 3077 + }, + { + "epoch": 0.817421325189218, + "grad_norm": 1.0643017770253544, + "learning_rate": 1.3758275821142382e-05, + "loss": 0.3024774193763733, + "step": 3078 + }, + { + "epoch": 0.8176868941707609, + "grad_norm": 1.0591328005001268, + "learning_rate": 1.3754206582319836e-05, + "loss": 0.33114269375801086, + "step": 3079 + }, + { + "epoch": 0.8179524631523039, + "grad_norm": 1.0815809107319383, + "learning_rate": 1.3750136619730534e-05, + "loss": 0.27339494228363037, + "step": 3080 + }, + { + "epoch": 0.8182180321338468, + "grad_norm": 1.170674128986789, + "learning_rate": 1.3746065934159123e-05, + "loss": 0.2827128767967224, + "step": 3081 + }, + { + "epoch": 0.8184836011153898, + "grad_norm": 1.1064880736532463, + "learning_rate": 1.3741994526390379e-05, + "loss": 0.2972746193408966, + "step": 3082 + }, + { + "epoch": 0.8187491700969327, + "grad_norm": 1.143548636761381, + "learning_rate": 1.3737922397209222e-05, + "loss": 0.29932117462158203, + "step": 3083 + }, + { + "epoch": 0.8190147390784757, + "grad_norm": 1.0415876434255473, + "learning_rate": 1.3733849547400713e-05, + "loss": 0.28307998180389404, + "step": 3084 + }, + { + "epoch": 0.8192803080600186, + "grad_norm": 1.1070561443231863, + "learning_rate": 1.3729775977750048e-05, + "loss": 0.2885883152484894, + "step": 3085 + }, + { + "epoch": 0.8195458770415616, + "grad_norm": 1.1106477390667713, + "learning_rate": 1.3725701689042564e-05, + "loss": 0.28837913274765015, + "step": 3086 + }, + { + "epoch": 0.8198114460231045, + "grad_norm": 1.0553526039271008, + "learning_rate": 1.3721626682063733e-05, + "loss": 0.2775058150291443, + "step": 3087 + }, + { + "epoch": 0.8200770150046475, + "grad_norm": 1.153176622627066, + "learning_rate": 1.3717550957599172e-05, + "loss": 0.2813493609428406, + "step": 3088 + }, + { + "epoch": 0.8203425839861904, + "grad_norm": 1.1477738573738745, + "learning_rate": 1.371347451643463e-05, + "loss": 0.2677592933177948, + "step": 3089 + }, + { + "epoch": 0.8206081529677334, + "grad_norm": 1.184705398593534, + "learning_rate": 1.3709397359355998e-05, + "loss": 0.3104957938194275, + "step": 3090 + }, + { + "epoch": 0.8208737219492763, + "grad_norm": 1.1714327280441006, + "learning_rate": 1.3705319487149303e-05, + "loss": 0.29315799474716187, + "step": 3091 + }, + { + "epoch": 0.8211392909308193, + "grad_norm": 1.1179168081295616, + "learning_rate": 1.370124090060071e-05, + "loss": 0.3044348657131195, + "step": 3092 + }, + { + "epoch": 0.8214048599123622, + "grad_norm": 1.1122209585212142, + "learning_rate": 1.3697161600496525e-05, + "loss": 0.2918691635131836, + "step": 3093 + }, + { + "epoch": 0.8216704288939052, + "grad_norm": 1.0702091422822353, + "learning_rate": 1.3693081587623187e-05, + "loss": 0.2887750267982483, + "step": 3094 + }, + { + "epoch": 0.8219359978754481, + "grad_norm": 1.1155429990394359, + "learning_rate": 1.3689000862767274e-05, + "loss": 0.3055661916732788, + "step": 3095 + }, + { + "epoch": 0.8222015668569911, + "grad_norm": 1.0251756704247361, + "learning_rate": 1.3684919426715504e-05, + "loss": 0.271525114774704, + "step": 3096 + }, + { + "epoch": 0.822467135838534, + "grad_norm": 1.1269584199088303, + "learning_rate": 1.3680837280254726e-05, + "loss": 0.3220426142215729, + "step": 3097 + }, + { + "epoch": 0.822732704820077, + "grad_norm": 1.0149552227204566, + "learning_rate": 1.3676754424171935e-05, + "loss": 0.29091203212738037, + "step": 3098 + }, + { + "epoch": 0.8229982738016199, + "grad_norm": 1.051328362150218, + "learning_rate": 1.3672670859254252e-05, + "loss": 0.2928692102432251, + "step": 3099 + }, + { + "epoch": 0.8232638427831629, + "grad_norm": 1.0366528987524315, + "learning_rate": 1.3668586586288942e-05, + "loss": 0.28635919094085693, + "step": 3100 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 1.0374876833794577, + "learning_rate": 1.3664501606063402e-05, + "loss": 0.2912571430206299, + "step": 3101 + }, + { + "epoch": 0.8237949807462488, + "grad_norm": 1.051516198651511, + "learning_rate": 1.3660415919365178e-05, + "loss": 0.2783615291118622, + "step": 3102 + }, + { + "epoch": 0.8240605497277917, + "grad_norm": 1.088921494432588, + "learning_rate": 1.365632952698193e-05, + "loss": 0.3064395785331726, + "step": 3103 + }, + { + "epoch": 0.8243261187093347, + "grad_norm": 1.023130230207284, + "learning_rate": 1.3652242429701477e-05, + "loss": 0.2528907358646393, + "step": 3104 + }, + { + "epoch": 0.8245916876908777, + "grad_norm": 1.0503421945431453, + "learning_rate": 1.3648154628311754e-05, + "loss": 0.2648676633834839, + "step": 3105 + }, + { + "epoch": 0.8248572566724207, + "grad_norm": 1.2732480631249905, + "learning_rate": 1.3644066123600846e-05, + "loss": 0.33425620198249817, + "step": 3106 + }, + { + "epoch": 0.8251228256539637, + "grad_norm": 1.0925062122156084, + "learning_rate": 1.3639976916356965e-05, + "loss": 0.3108072280883789, + "step": 3107 + }, + { + "epoch": 0.8253883946355066, + "grad_norm": 1.0815679409684162, + "learning_rate": 1.3635887007368467e-05, + "loss": 0.2860543131828308, + "step": 3108 + }, + { + "epoch": 0.8256539636170496, + "grad_norm": 1.0711932859903586, + "learning_rate": 1.3631796397423833e-05, + "loss": 0.25440749526023865, + "step": 3109 + }, + { + "epoch": 0.8259195325985925, + "grad_norm": 1.1006663978120534, + "learning_rate": 1.3627705087311687e-05, + "loss": 0.2676115334033966, + "step": 3110 + }, + { + "epoch": 0.8261851015801355, + "grad_norm": 1.1597529133358384, + "learning_rate": 1.3623613077820788e-05, + "loss": 0.28977078199386597, + "step": 3111 + }, + { + "epoch": 0.8264506705616784, + "grad_norm": 1.1046761011596355, + "learning_rate": 1.361952036974002e-05, + "loss": 0.30161401629447937, + "step": 3112 + }, + { + "epoch": 0.8267162395432214, + "grad_norm": 1.135120464396266, + "learning_rate": 1.3615426963858416e-05, + "loss": 0.28676310181617737, + "step": 3113 + }, + { + "epoch": 0.8269818085247643, + "grad_norm": 1.100109147839879, + "learning_rate": 1.361133286096513e-05, + "loss": 0.2957243323326111, + "step": 3114 + }, + { + "epoch": 0.8272473775063073, + "grad_norm": 1.0691905028493969, + "learning_rate": 1.3607238061849461e-05, + "loss": 0.3036375343799591, + "step": 3115 + }, + { + "epoch": 0.8275129464878502, + "grad_norm": 1.1142331461612014, + "learning_rate": 1.360314256730084e-05, + "loss": 0.31175294518470764, + "step": 3116 + }, + { + "epoch": 0.8277785154693932, + "grad_norm": 1.0665802680669934, + "learning_rate": 1.3599046378108825e-05, + "loss": 0.30212485790252686, + "step": 3117 + }, + { + "epoch": 0.8280440844509361, + "grad_norm": 1.1992776426845386, + "learning_rate": 1.3594949495063117e-05, + "loss": 0.3290692865848541, + "step": 3118 + }, + { + "epoch": 0.8283096534324791, + "grad_norm": 1.007005509411099, + "learning_rate": 1.3590851918953542e-05, + "loss": 0.25952839851379395, + "step": 3119 + }, + { + "epoch": 0.828575222414022, + "grad_norm": 1.0949064818424232, + "learning_rate": 1.3586753650570069e-05, + "loss": 0.27737247943878174, + "step": 3120 + }, + { + "epoch": 0.828840791395565, + "grad_norm": 1.0156990629875267, + "learning_rate": 1.3582654690702795e-05, + "loss": 0.29415374994277954, + "step": 3121 + }, + { + "epoch": 0.8291063603771079, + "grad_norm": 1.066804105313739, + "learning_rate": 1.3578555040141948e-05, + "loss": 0.29197627305984497, + "step": 3122 + }, + { + "epoch": 0.8293719293586509, + "grad_norm": 1.1089730397237387, + "learning_rate": 1.3574454699677893e-05, + "loss": 0.30318522453308105, + "step": 3123 + }, + { + "epoch": 0.8296374983401938, + "grad_norm": 1.0916871079120407, + "learning_rate": 1.357035367010113e-05, + "loss": 0.3184241056442261, + "step": 3124 + }, + { + "epoch": 0.8299030673217368, + "grad_norm": 1.3286365770942894, + "learning_rate": 1.3566251952202288e-05, + "loss": 0.30330199003219604, + "step": 3125 + }, + { + "epoch": 0.8301686363032797, + "grad_norm": 1.1117453782986153, + "learning_rate": 1.356214954677213e-05, + "loss": 0.25366994738578796, + "step": 3126 + }, + { + "epoch": 0.8304342052848227, + "grad_norm": 1.109752753436135, + "learning_rate": 1.3558046454601552e-05, + "loss": 0.3213343918323517, + "step": 3127 + }, + { + "epoch": 0.8306997742663657, + "grad_norm": 1.0918389418395038, + "learning_rate": 1.355394267648158e-05, + "loss": 0.3012468218803406, + "step": 3128 + }, + { + "epoch": 0.8309653432479086, + "grad_norm": 1.1319633441718049, + "learning_rate": 1.3549838213203374e-05, + "loss": 0.3272971510887146, + "step": 3129 + }, + { + "epoch": 0.8312309122294516, + "grad_norm": 1.0778057413430624, + "learning_rate": 1.354573306555823e-05, + "loss": 0.30032482743263245, + "step": 3130 + }, + { + "epoch": 0.8314964812109945, + "grad_norm": 1.0778331818873157, + "learning_rate": 1.3541627234337567e-05, + "loss": 0.2820669412612915, + "step": 3131 + }, + { + "epoch": 0.8317620501925375, + "grad_norm": 1.0187129279356677, + "learning_rate": 1.3537520720332943e-05, + "loss": 0.2638673782348633, + "step": 3132 + }, + { + "epoch": 0.8320276191740804, + "grad_norm": 1.0843507637886551, + "learning_rate": 1.3533413524336043e-05, + "loss": 0.2766842246055603, + "step": 3133 + }, + { + "epoch": 0.8322931881556235, + "grad_norm": 1.2660530642163288, + "learning_rate": 1.3529305647138689e-05, + "loss": 0.330536425113678, + "step": 3134 + }, + { + "epoch": 0.8325587571371664, + "grad_norm": 1.0925834195413107, + "learning_rate": 1.3525197089532833e-05, + "loss": 0.30375364422798157, + "step": 3135 + }, + { + "epoch": 0.8328243261187094, + "grad_norm": 1.1657669106128519, + "learning_rate": 1.3521087852310555e-05, + "loss": 0.3092171549797058, + "step": 3136 + }, + { + "epoch": 0.8330898951002523, + "grad_norm": 1.1686338102407274, + "learning_rate": 1.3516977936264062e-05, + "loss": 0.28651195764541626, + "step": 3137 + }, + { + "epoch": 0.8333554640817953, + "grad_norm": 1.0845327487717817, + "learning_rate": 1.3512867342185705e-05, + "loss": 0.2882133722305298, + "step": 3138 + }, + { + "epoch": 0.8336210330633382, + "grad_norm": 1.1325019700739036, + "learning_rate": 1.3508756070867955e-05, + "loss": 0.30633628368377686, + "step": 3139 + }, + { + "epoch": 0.8338866020448812, + "grad_norm": 1.090943303162736, + "learning_rate": 1.3504644123103415e-05, + "loss": 0.2819565236568451, + "step": 3140 + }, + { + "epoch": 0.8341521710264241, + "grad_norm": 1.0804420637943886, + "learning_rate": 1.3500531499684819e-05, + "loss": 0.29544374346733093, + "step": 3141 + }, + { + "epoch": 0.8344177400079671, + "grad_norm": 1.10400689114043, + "learning_rate": 1.3496418201405037e-05, + "loss": 0.29383376240730286, + "step": 3142 + }, + { + "epoch": 0.83468330898951, + "grad_norm": 0.9862964562028984, + "learning_rate": 1.3492304229057062e-05, + "loss": 0.24945983290672302, + "step": 3143 + }, + { + "epoch": 0.834948877971053, + "grad_norm": 1.2055608503616826, + "learning_rate": 1.3488189583434023e-05, + "loss": 0.338919997215271, + "step": 3144 + }, + { + "epoch": 0.835214446952596, + "grad_norm": 1.071166648249549, + "learning_rate": 1.348407426532917e-05, + "loss": 0.29555821418762207, + "step": 3145 + }, + { + "epoch": 0.8354800159341389, + "grad_norm": 1.0650010322896095, + "learning_rate": 1.3479958275535887e-05, + "loss": 0.31038299202919006, + "step": 3146 + }, + { + "epoch": 0.8357455849156818, + "grad_norm": 1.021351909092412, + "learning_rate": 1.347584161484769e-05, + "loss": 0.2595089077949524, + "step": 3147 + }, + { + "epoch": 0.8360111538972248, + "grad_norm": 1.1885926674667484, + "learning_rate": 1.3471724284058227e-05, + "loss": 0.3287338614463806, + "step": 3148 + }, + { + "epoch": 0.8362767228787678, + "grad_norm": 1.1997618392346763, + "learning_rate": 1.3467606283961268e-05, + "loss": 0.3109680414199829, + "step": 3149 + }, + { + "epoch": 0.8365422918603107, + "grad_norm": 1.0762954067078139, + "learning_rate": 1.346348761535071e-05, + "loss": 0.2584227919578552, + "step": 3150 + }, + { + "epoch": 0.8368078608418537, + "grad_norm": 1.137771769139511, + "learning_rate": 1.345936827902059e-05, + "loss": 0.3038554787635803, + "step": 3151 + }, + { + "epoch": 0.8370734298233966, + "grad_norm": 1.029659281383911, + "learning_rate": 1.3455248275765067e-05, + "loss": 0.28267812728881836, + "step": 3152 + }, + { + "epoch": 0.8373389988049396, + "grad_norm": 1.163661242492436, + "learning_rate": 1.3451127606378425e-05, + "loss": 0.3328094184398651, + "step": 3153 + }, + { + "epoch": 0.8376045677864825, + "grad_norm": 1.084045978606854, + "learning_rate": 1.3447006271655082e-05, + "loss": 0.3235865533351898, + "step": 3154 + }, + { + "epoch": 0.8378701367680255, + "grad_norm": 1.037100355990568, + "learning_rate": 1.3442884272389583e-05, + "loss": 0.25394493341445923, + "step": 3155 + }, + { + "epoch": 0.8381357057495684, + "grad_norm": 1.1250984496593863, + "learning_rate": 1.3438761609376604e-05, + "loss": 0.29841768741607666, + "step": 3156 + }, + { + "epoch": 0.8384012747311114, + "grad_norm": 1.1999100818775306, + "learning_rate": 1.3434638283410942e-05, + "loss": 0.3161924183368683, + "step": 3157 + }, + { + "epoch": 0.8386668437126543, + "grad_norm": 0.9017579941601053, + "learning_rate": 1.3430514295287526e-05, + "loss": 0.22781039774417877, + "step": 3158 + }, + { + "epoch": 0.8389324126941973, + "grad_norm": 1.0534948555265085, + "learning_rate": 1.3426389645801415e-05, + "loss": 0.2947984039783478, + "step": 3159 + }, + { + "epoch": 0.8391979816757402, + "grad_norm": 1.0286789238265646, + "learning_rate": 1.342226433574779e-05, + "loss": 0.2827467918395996, + "step": 3160 + }, + { + "epoch": 0.8394635506572832, + "grad_norm": 1.0453932660244052, + "learning_rate": 1.3418138365921962e-05, + "loss": 0.3149232268333435, + "step": 3161 + }, + { + "epoch": 0.8397291196388262, + "grad_norm": 1.2487567497076437, + "learning_rate": 1.3414011737119373e-05, + "loss": 0.33154603838920593, + "step": 3162 + }, + { + "epoch": 0.8399946886203692, + "grad_norm": 1.074983718750332, + "learning_rate": 1.3409884450135581e-05, + "loss": 0.28532034158706665, + "step": 3163 + }, + { + "epoch": 0.8402602576019121, + "grad_norm": 1.0695327636228384, + "learning_rate": 1.3405756505766286e-05, + "loss": 0.2539500892162323, + "step": 3164 + }, + { + "epoch": 0.8405258265834551, + "grad_norm": 1.0653532722719707, + "learning_rate": 1.3401627904807302e-05, + "loss": 0.3023888170719147, + "step": 3165 + }, + { + "epoch": 0.840791395564998, + "grad_norm": 1.0811844194203637, + "learning_rate": 1.3397498648054579e-05, + "loss": 0.3088506758213043, + "step": 3166 + }, + { + "epoch": 0.841056964546541, + "grad_norm": 1.2249048833028835, + "learning_rate": 1.3393368736304184e-05, + "loss": 0.3223467469215393, + "step": 3167 + }, + { + "epoch": 0.841322533528084, + "grad_norm": 1.0772937869709083, + "learning_rate": 1.3389238170352318e-05, + "loss": 0.2541419565677643, + "step": 3168 + }, + { + "epoch": 0.8415881025096269, + "grad_norm": 1.0463826735598363, + "learning_rate": 1.3385106950995308e-05, + "loss": 0.2915497422218323, + "step": 3169 + }, + { + "epoch": 0.8418536714911699, + "grad_norm": 1.1726858597591174, + "learning_rate": 1.3380975079029598e-05, + "loss": 0.2907465994358063, + "step": 3170 + }, + { + "epoch": 0.8421192404727128, + "grad_norm": 1.0581221380369799, + "learning_rate": 1.337684255525177e-05, + "loss": 0.2587417960166931, + "step": 3171 + }, + { + "epoch": 0.8423848094542558, + "grad_norm": 1.1080472137531636, + "learning_rate": 1.3372709380458522e-05, + "loss": 0.2932469844818115, + "step": 3172 + }, + { + "epoch": 0.8426503784357987, + "grad_norm": 1.2359417241278925, + "learning_rate": 1.3368575555446681e-05, + "loss": 0.31451860070228577, + "step": 3173 + }, + { + "epoch": 0.8429159474173417, + "grad_norm": 1.067745190297883, + "learning_rate": 1.3364441081013205e-05, + "loss": 0.24513742327690125, + "step": 3174 + }, + { + "epoch": 0.8431815163988846, + "grad_norm": 1.0795526820997523, + "learning_rate": 1.3360305957955166e-05, + "loss": 0.29781201481819153, + "step": 3175 + }, + { + "epoch": 0.8434470853804276, + "grad_norm": 1.3176130252584213, + "learning_rate": 1.3356170187069775e-05, + "loss": 0.30925726890563965, + "step": 3176 + }, + { + "epoch": 0.8437126543619705, + "grad_norm": 1.1110632932678028, + "learning_rate": 1.3352033769154347e-05, + "loss": 0.2822851538658142, + "step": 3177 + }, + { + "epoch": 0.8439782233435135, + "grad_norm": 1.0033731418220575, + "learning_rate": 1.3347896705006344e-05, + "loss": 0.2511071264743805, + "step": 3178 + }, + { + "epoch": 0.8442437923250564, + "grad_norm": 1.1921629041957855, + "learning_rate": 1.3343758995423344e-05, + "loss": 0.3002505302429199, + "step": 3179 + }, + { + "epoch": 0.8445093613065994, + "grad_norm": 0.9942107511416755, + "learning_rate": 1.3339620641203043e-05, + "loss": 0.285504549741745, + "step": 3180 + }, + { + "epoch": 0.8447749302881423, + "grad_norm": 1.1880306222164103, + "learning_rate": 1.3335481643143271e-05, + "loss": 0.31988856196403503, + "step": 3181 + }, + { + "epoch": 0.8450404992696853, + "grad_norm": 1.0905691447057935, + "learning_rate": 1.3331342002041973e-05, + "loss": 0.29330819845199585, + "step": 3182 + }, + { + "epoch": 0.8453060682512282, + "grad_norm": 1.049547579497453, + "learning_rate": 1.3327201718697232e-05, + "loss": 0.28694427013397217, + "step": 3183 + }, + { + "epoch": 0.8455716372327712, + "grad_norm": 1.0561569710297949, + "learning_rate": 1.3323060793907239e-05, + "loss": 0.24912211298942566, + "step": 3184 + }, + { + "epoch": 0.8458372062143141, + "grad_norm": 1.1346018526864223, + "learning_rate": 1.3318919228470315e-05, + "loss": 0.28117647767066956, + "step": 3185 + }, + { + "epoch": 0.8461027751958571, + "grad_norm": 1.2524387900920857, + "learning_rate": 1.3314777023184907e-05, + "loss": 0.3176446557044983, + "step": 3186 + }, + { + "epoch": 0.8463683441774, + "grad_norm": 1.0728463380702977, + "learning_rate": 1.3310634178849583e-05, + "loss": 0.31205689907073975, + "step": 3187 + }, + { + "epoch": 0.846633913158943, + "grad_norm": 1.1500545538779043, + "learning_rate": 1.3306490696263034e-05, + "loss": 0.29942232370376587, + "step": 3188 + }, + { + "epoch": 0.8468994821404859, + "grad_norm": 1.161750107962421, + "learning_rate": 1.3302346576224077e-05, + "loss": 0.3149508833885193, + "step": 3189 + }, + { + "epoch": 0.847165051122029, + "grad_norm": 1.0924626607758976, + "learning_rate": 1.3298201819531646e-05, + "loss": 0.2930619418621063, + "step": 3190 + }, + { + "epoch": 0.847430620103572, + "grad_norm": 1.0958680594537196, + "learning_rate": 1.3294056426984804e-05, + "loss": 0.3089582920074463, + "step": 3191 + }, + { + "epoch": 0.8476961890851149, + "grad_norm": 1.2175163313381927, + "learning_rate": 1.3289910399382733e-05, + "loss": 0.3120991587638855, + "step": 3192 + }, + { + "epoch": 0.8479617580666579, + "grad_norm": 1.0535688994558223, + "learning_rate": 1.3285763737524738e-05, + "loss": 0.2728833258152008, + "step": 3193 + }, + { + "epoch": 0.8482273270482008, + "grad_norm": 1.0457465617551238, + "learning_rate": 1.3281616442210246e-05, + "loss": 0.2833358347415924, + "step": 3194 + }, + { + "epoch": 0.8484928960297438, + "grad_norm": 1.0714039101779447, + "learning_rate": 1.3277468514238803e-05, + "loss": 0.26218950748443604, + "step": 3195 + }, + { + "epoch": 0.8487584650112867, + "grad_norm": 1.0938436245702892, + "learning_rate": 1.3273319954410088e-05, + "loss": 0.3120720386505127, + "step": 3196 + }, + { + "epoch": 0.8490240339928297, + "grad_norm": 1.0412833763909957, + "learning_rate": 1.3269170763523892e-05, + "loss": 0.2748696208000183, + "step": 3197 + }, + { + "epoch": 0.8492896029743726, + "grad_norm": 1.0148051769031237, + "learning_rate": 1.326502094238013e-05, + "loss": 0.2892690598964691, + "step": 3198 + }, + { + "epoch": 0.8495551719559156, + "grad_norm": 1.068648430192615, + "learning_rate": 1.3260870491778835e-05, + "loss": 0.26583510637283325, + "step": 3199 + }, + { + "epoch": 0.8498207409374585, + "grad_norm": 1.105620955007001, + "learning_rate": 1.325671941252017e-05, + "loss": 0.31602388620376587, + "step": 3200 + }, + { + "epoch": 0.8500863099190015, + "grad_norm": 1.068517421778971, + "learning_rate": 1.3252567705404409e-05, + "loss": 0.2980017364025116, + "step": 3201 + }, + { + "epoch": 0.8503518789005444, + "grad_norm": 1.0740685936810315, + "learning_rate": 1.3248415371231957e-05, + "loss": 0.27081727981567383, + "step": 3202 + }, + { + "epoch": 0.8506174478820874, + "grad_norm": 1.2590520587844396, + "learning_rate": 1.3244262410803333e-05, + "loss": 0.28895002603530884, + "step": 3203 + }, + { + "epoch": 0.8508830168636303, + "grad_norm": 1.1373552047630993, + "learning_rate": 1.3240108824919176e-05, + "loss": 0.30804315209388733, + "step": 3204 + }, + { + "epoch": 0.8511485858451733, + "grad_norm": 1.1074447190812993, + "learning_rate": 1.3235954614380253e-05, + "loss": 0.28173667192459106, + "step": 3205 + }, + { + "epoch": 0.8514141548267162, + "grad_norm": 1.097058715769224, + "learning_rate": 1.3231799779987445e-05, + "loss": 0.3113047778606415, + "step": 3206 + }, + { + "epoch": 0.8516797238082592, + "grad_norm": 1.0285862677327642, + "learning_rate": 1.3227644322541754e-05, + "loss": 0.247248113155365, + "step": 3207 + }, + { + "epoch": 0.8519452927898021, + "grad_norm": 1.1032823581833329, + "learning_rate": 1.3223488242844309e-05, + "loss": 0.27078187465667725, + "step": 3208 + }, + { + "epoch": 0.8522108617713451, + "grad_norm": 1.0635139884249352, + "learning_rate": 1.321933154169634e-05, + "loss": 0.2749357223510742, + "step": 3209 + }, + { + "epoch": 0.852476430752888, + "grad_norm": 1.0129100217319345, + "learning_rate": 1.3215174219899224e-05, + "loss": 0.25382956862449646, + "step": 3210 + }, + { + "epoch": 0.852741999734431, + "grad_norm": 1.0528151094235563, + "learning_rate": 1.3211016278254436e-05, + "loss": 0.3237685263156891, + "step": 3211 + }, + { + "epoch": 0.8530075687159739, + "grad_norm": 1.273911241149791, + "learning_rate": 1.3206857717563581e-05, + "loss": 0.2899032235145569, + "step": 3212 + }, + { + "epoch": 0.8532731376975169, + "grad_norm": 1.040323856520164, + "learning_rate": 1.3202698538628376e-05, + "loss": 0.25997933745384216, + "step": 3213 + }, + { + "epoch": 0.8535387066790598, + "grad_norm": 1.121125084608177, + "learning_rate": 1.3198538742250668e-05, + "loss": 0.3228183090686798, + "step": 3214 + }, + { + "epoch": 0.8538042756606028, + "grad_norm": 1.1002230220524851, + "learning_rate": 1.3194378329232413e-05, + "loss": 0.31993368268013, + "step": 3215 + }, + { + "epoch": 0.8540698446421457, + "grad_norm": 1.157115702913611, + "learning_rate": 1.3190217300375694e-05, + "loss": 0.29520007967948914, + "step": 3216 + }, + { + "epoch": 0.8543354136236887, + "grad_norm": 1.0898926058638614, + "learning_rate": 1.3186055656482702e-05, + "loss": 0.31073522567749023, + "step": 3217 + }, + { + "epoch": 0.8546009826052318, + "grad_norm": 1.1465583376043518, + "learning_rate": 1.3181893398355752e-05, + "loss": 0.34354183077812195, + "step": 3218 + }, + { + "epoch": 0.8548665515867747, + "grad_norm": 1.179928846812524, + "learning_rate": 1.3177730526797286e-05, + "loss": 0.27676698565483093, + "step": 3219 + }, + { + "epoch": 0.8551321205683177, + "grad_norm": 1.0792983255501365, + "learning_rate": 1.3173567042609852e-05, + "loss": 0.27313530445098877, + "step": 3220 + }, + { + "epoch": 0.8553976895498606, + "grad_norm": 0.9249374113484707, + "learning_rate": 1.3169402946596119e-05, + "loss": 0.2517555058002472, + "step": 3221 + }, + { + "epoch": 0.8556632585314036, + "grad_norm": 1.0684778793194236, + "learning_rate": 1.3165238239558878e-05, + "loss": 0.29700207710266113, + "step": 3222 + }, + { + "epoch": 0.8559288275129465, + "grad_norm": 1.1262235464302217, + "learning_rate": 1.3161072922301037e-05, + "loss": 0.3182620704174042, + "step": 3223 + }, + { + "epoch": 0.8561943964944895, + "grad_norm": 1.123570804553303, + "learning_rate": 1.3156906995625615e-05, + "loss": 0.3112961947917938, + "step": 3224 + }, + { + "epoch": 0.8564599654760324, + "grad_norm": 1.1746597736734636, + "learning_rate": 1.3152740460335757e-05, + "loss": 0.3080563545227051, + "step": 3225 + }, + { + "epoch": 0.8567255344575754, + "grad_norm": 1.1646363575237453, + "learning_rate": 1.3148573317234726e-05, + "loss": 0.31197935342788696, + "step": 3226 + }, + { + "epoch": 0.8569911034391183, + "grad_norm": 1.0455051980244612, + "learning_rate": 1.3144405567125886e-05, + "loss": 0.27377086877822876, + "step": 3227 + }, + { + "epoch": 0.8572566724206613, + "grad_norm": 1.050528412475655, + "learning_rate": 1.3140237210812741e-05, + "loss": 0.25303182005882263, + "step": 3228 + }, + { + "epoch": 0.8575222414022042, + "grad_norm": 1.0664458431943622, + "learning_rate": 1.3136068249098899e-05, + "loss": 0.27949726581573486, + "step": 3229 + }, + { + "epoch": 0.8577878103837472, + "grad_norm": 1.0907347405782384, + "learning_rate": 1.3131898682788082e-05, + "loss": 0.278359055519104, + "step": 3230 + }, + { + "epoch": 0.8580533793652901, + "grad_norm": 1.081462335761227, + "learning_rate": 1.312772851268414e-05, + "loss": 0.28507643938064575, + "step": 3231 + }, + { + "epoch": 0.8583189483468331, + "grad_norm": 1.0256133822907842, + "learning_rate": 1.3123557739591026e-05, + "loss": 0.2689790427684784, + "step": 3232 + }, + { + "epoch": 0.858584517328376, + "grad_norm": 1.1569049456144243, + "learning_rate": 1.3119386364312821e-05, + "loss": 0.31956973671913147, + "step": 3233 + }, + { + "epoch": 0.858850086309919, + "grad_norm": 1.0914807974802394, + "learning_rate": 1.3115214387653711e-05, + "loss": 0.2837323546409607, + "step": 3234 + }, + { + "epoch": 0.8591156552914619, + "grad_norm": 1.0015578039784754, + "learning_rate": 1.3111041810418011e-05, + "loss": 0.2756272554397583, + "step": 3235 + }, + { + "epoch": 0.8593812242730049, + "grad_norm": 1.0283979772106548, + "learning_rate": 1.3106868633410139e-05, + "loss": 0.2664923369884491, + "step": 3236 + }, + { + "epoch": 0.8596467932545478, + "grad_norm": 1.2217960050611696, + "learning_rate": 1.3102694857434637e-05, + "loss": 0.2842246890068054, + "step": 3237 + }, + { + "epoch": 0.8599123622360908, + "grad_norm": 1.0632739499737671, + "learning_rate": 1.3098520483296159e-05, + "loss": 0.3066467344760895, + "step": 3238 + }, + { + "epoch": 0.8601779312176338, + "grad_norm": 1.148754786147734, + "learning_rate": 1.3094345511799478e-05, + "loss": 0.3042510151863098, + "step": 3239 + }, + { + "epoch": 0.8604435001991767, + "grad_norm": 0.9995895975923785, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.2753696143627167, + "step": 3240 + }, + { + "epoch": 0.8607090691807197, + "grad_norm": 1.0325788591675433, + "learning_rate": 1.3085993779951154e-05, + "loss": 0.2561766803264618, + "step": 3241 + }, + { + "epoch": 0.8609746381622626, + "grad_norm": 1.2136300404308455, + "learning_rate": 1.3081817021209626e-05, + "loss": 0.297982782125473, + "step": 3242 + }, + { + "epoch": 0.8612402071438056, + "grad_norm": 1.0615498924909679, + "learning_rate": 1.3077639668330124e-05, + "loss": 0.2961920499801636, + "step": 3243 + }, + { + "epoch": 0.8615057761253485, + "grad_norm": 1.1445145037694135, + "learning_rate": 1.3073461722117991e-05, + "loss": 0.2868857979774475, + "step": 3244 + }, + { + "epoch": 0.8617713451068915, + "grad_norm": 0.9475657969770804, + "learning_rate": 1.3069283183378683e-05, + "loss": 0.22930951416492462, + "step": 3245 + }, + { + "epoch": 0.8620369140884345, + "grad_norm": 1.1416904771862697, + "learning_rate": 1.306510405291778e-05, + "loss": 0.29737964272499084, + "step": 3246 + }, + { + "epoch": 0.8623024830699775, + "grad_norm": 1.0401904023883137, + "learning_rate": 1.3060924331540964e-05, + "loss": 0.2764522433280945, + "step": 3247 + }, + { + "epoch": 0.8625680520515204, + "grad_norm": 0.9863739655208709, + "learning_rate": 1.3056744020054039e-05, + "loss": 0.27608832716941833, + "step": 3248 + }, + { + "epoch": 0.8628336210330634, + "grad_norm": 1.0115944755696356, + "learning_rate": 1.3052563119262915e-05, + "loss": 0.25667035579681396, + "step": 3249 + }, + { + "epoch": 0.8630991900146063, + "grad_norm": 1.1289498412687866, + "learning_rate": 1.3048381629973622e-05, + "loss": 0.3015863597393036, + "step": 3250 + }, + { + "epoch": 0.8633647589961493, + "grad_norm": 1.123802742380982, + "learning_rate": 1.3044199552992307e-05, + "loss": 0.2798422873020172, + "step": 3251 + }, + { + "epoch": 0.8636303279776922, + "grad_norm": 1.1385670465264601, + "learning_rate": 1.304001688912522e-05, + "loss": 0.2856596112251282, + "step": 3252 + }, + { + "epoch": 0.8638958969592352, + "grad_norm": 1.2094473565150297, + "learning_rate": 1.303583363917873e-05, + "loss": 0.30247554183006287, + "step": 3253 + }, + { + "epoch": 0.8641614659407781, + "grad_norm": 1.1517937069448307, + "learning_rate": 1.303164980395932e-05, + "loss": 0.26817965507507324, + "step": 3254 + }, + { + "epoch": 0.8644270349223211, + "grad_norm": 1.197653632931973, + "learning_rate": 1.3027465384273579e-05, + "loss": 0.26919034123420715, + "step": 3255 + }, + { + "epoch": 0.864692603903864, + "grad_norm": 1.1206851183742237, + "learning_rate": 1.3023280380928223e-05, + "loss": 0.29495447874069214, + "step": 3256 + }, + { + "epoch": 0.864958172885407, + "grad_norm": 1.0428738517831404, + "learning_rate": 1.3019094794730063e-05, + "loss": 0.26766717433929443, + "step": 3257 + }, + { + "epoch": 0.86522374186695, + "grad_norm": 0.9998039586765358, + "learning_rate": 1.3014908626486032e-05, + "loss": 0.2573341131210327, + "step": 3258 + }, + { + "epoch": 0.8654893108484929, + "grad_norm": 1.226366277313196, + "learning_rate": 1.3010721877003177e-05, + "loss": 0.32776498794555664, + "step": 3259 + }, + { + "epoch": 0.8657548798300359, + "grad_norm": 1.1631189448763641, + "learning_rate": 1.3006534547088651e-05, + "loss": 0.3107950687408447, + "step": 3260 + }, + { + "epoch": 0.8660204488115788, + "grad_norm": 1.0476224109192296, + "learning_rate": 1.3002346637549726e-05, + "loss": 0.26143360137939453, + "step": 3261 + }, + { + "epoch": 0.8662860177931218, + "grad_norm": 1.035123297672666, + "learning_rate": 1.2998158149193773e-05, + "loss": 0.25666722655296326, + "step": 3262 + }, + { + "epoch": 0.8665515867746647, + "grad_norm": 1.1492097701405037, + "learning_rate": 1.2993969082828296e-05, + "loss": 0.2982695698738098, + "step": 3263 + }, + { + "epoch": 0.8668171557562077, + "grad_norm": 1.0937256102841277, + "learning_rate": 1.2989779439260888e-05, + "loss": 0.30144304037094116, + "step": 3264 + }, + { + "epoch": 0.8670827247377506, + "grad_norm": 1.0563159913050848, + "learning_rate": 1.2985589219299264e-05, + "loss": 0.30421534180641174, + "step": 3265 + }, + { + "epoch": 0.8673482937192936, + "grad_norm": 1.0698350081311019, + "learning_rate": 1.298139842375125e-05, + "loss": 0.23653842508792877, + "step": 3266 + }, + { + "epoch": 0.8676138627008365, + "grad_norm": 1.2059661362441823, + "learning_rate": 1.2977207053424781e-05, + "loss": 0.284118115901947, + "step": 3267 + }, + { + "epoch": 0.8678794316823795, + "grad_norm": 1.0387152548948486, + "learning_rate": 1.2973015109127907e-05, + "loss": 0.30857348442077637, + "step": 3268 + }, + { + "epoch": 0.8681450006639224, + "grad_norm": 1.0987728632322369, + "learning_rate": 1.2968822591668784e-05, + "loss": 0.2826589047908783, + "step": 3269 + }, + { + "epoch": 0.8684105696454654, + "grad_norm": 1.109218087764862, + "learning_rate": 1.2964629501855678e-05, + "loss": 0.27634552121162415, + "step": 3270 + }, + { + "epoch": 0.8686761386270083, + "grad_norm": 1.0217259699141916, + "learning_rate": 1.296043584049697e-05, + "loss": 0.25823545455932617, + "step": 3271 + }, + { + "epoch": 0.8689417076085513, + "grad_norm": 1.148249635090711, + "learning_rate": 1.2956241608401145e-05, + "loss": 0.28939294815063477, + "step": 3272 + }, + { + "epoch": 0.8692072765900942, + "grad_norm": 1.0622455952024017, + "learning_rate": 1.2952046806376806e-05, + "loss": 0.3042459785938263, + "step": 3273 + }, + { + "epoch": 0.8694728455716373, + "grad_norm": 1.042505415392428, + "learning_rate": 1.2947851435232658e-05, + "loss": 0.2834415137767792, + "step": 3274 + }, + { + "epoch": 0.8697384145531802, + "grad_norm": 1.144903021800522, + "learning_rate": 1.2943655495777518e-05, + "loss": 0.28226330876350403, + "step": 3275 + }, + { + "epoch": 0.8700039835347232, + "grad_norm": 1.023547316743189, + "learning_rate": 1.2939458988820317e-05, + "loss": 0.2796105742454529, + "step": 3276 + }, + { + "epoch": 0.8702695525162661, + "grad_norm": 0.9903193313068561, + "learning_rate": 1.2935261915170091e-05, + "loss": 0.24790553748607635, + "step": 3277 + }, + { + "epoch": 0.8705351214978091, + "grad_norm": 1.0279177898991045, + "learning_rate": 1.2931064275635987e-05, + "loss": 0.25101587176322937, + "step": 3278 + }, + { + "epoch": 0.870800690479352, + "grad_norm": 1.1728597267839225, + "learning_rate": 1.2926866071027257e-05, + "loss": 0.3060816526412964, + "step": 3279 + }, + { + "epoch": 0.871066259460895, + "grad_norm": 1.1510511467115991, + "learning_rate": 1.2922667302153268e-05, + "loss": 0.3137212097644806, + "step": 3280 + }, + { + "epoch": 0.871331828442438, + "grad_norm": 0.9977159840643061, + "learning_rate": 1.2918467969823497e-05, + "loss": 0.2391548752784729, + "step": 3281 + }, + { + "epoch": 0.8715973974239809, + "grad_norm": 1.2003880700717509, + "learning_rate": 1.2914268074847516e-05, + "loss": 0.3219330608844757, + "step": 3282 + }, + { + "epoch": 0.8718629664055239, + "grad_norm": 1.126134187698585, + "learning_rate": 1.2910067618035025e-05, + "loss": 0.2934436798095703, + "step": 3283 + }, + { + "epoch": 0.8721285353870668, + "grad_norm": 1.2016016844780073, + "learning_rate": 1.2905866600195815e-05, + "loss": 0.2919486165046692, + "step": 3284 + }, + { + "epoch": 0.8723941043686098, + "grad_norm": 1.1895929482131946, + "learning_rate": 1.2901665022139796e-05, + "loss": 0.2840641438961029, + "step": 3285 + }, + { + "epoch": 0.8726596733501527, + "grad_norm": 1.0215741253911979, + "learning_rate": 1.2897462884676983e-05, + "loss": 0.24151530861854553, + "step": 3286 + }, + { + "epoch": 0.8729252423316957, + "grad_norm": 1.0040194757671277, + "learning_rate": 1.28932601886175e-05, + "loss": 0.24515505135059357, + "step": 3287 + }, + { + "epoch": 0.8731908113132386, + "grad_norm": 1.2173512735867882, + "learning_rate": 1.2889056934771577e-05, + "loss": 0.2561264634132385, + "step": 3288 + }, + { + "epoch": 0.8734563802947816, + "grad_norm": 1.1645401251165897, + "learning_rate": 1.2884853123949547e-05, + "loss": 0.2798641622066498, + "step": 3289 + }, + { + "epoch": 0.8737219492763245, + "grad_norm": 1.2693161910394721, + "learning_rate": 1.288064875696186e-05, + "loss": 0.35207298398017883, + "step": 3290 + }, + { + "epoch": 0.8739875182578675, + "grad_norm": 1.0184365377421387, + "learning_rate": 1.2876443834619066e-05, + "loss": 0.2778821289539337, + "step": 3291 + }, + { + "epoch": 0.8742530872394104, + "grad_norm": 1.044209880952949, + "learning_rate": 1.2872238357731825e-05, + "loss": 0.2691737413406372, + "step": 3292 + }, + { + "epoch": 0.8745186562209534, + "grad_norm": 1.1392637940929287, + "learning_rate": 1.2868032327110904e-05, + "loss": 0.25476595759391785, + "step": 3293 + }, + { + "epoch": 0.8747842252024963, + "grad_norm": 1.012064080488804, + "learning_rate": 1.2863825743567174e-05, + "loss": 0.258474737405777, + "step": 3294 + }, + { + "epoch": 0.8750497941840393, + "grad_norm": 1.17733236715245, + "learning_rate": 1.285961860791162e-05, + "loss": 0.32421568036079407, + "step": 3295 + }, + { + "epoch": 0.8753153631655822, + "grad_norm": 1.0747747984737868, + "learning_rate": 1.2855410920955323e-05, + "loss": 0.3090333342552185, + "step": 3296 + }, + { + "epoch": 0.8755809321471252, + "grad_norm": 1.1729934635240566, + "learning_rate": 1.2851202683509476e-05, + "loss": 0.26548707485198975, + "step": 3297 + }, + { + "epoch": 0.8758465011286681, + "grad_norm": 2.497627852681845, + "learning_rate": 1.2846993896385378e-05, + "loss": 0.3002355098724365, + "step": 3298 + }, + { + "epoch": 0.8761120701102111, + "grad_norm": 1.1706582997439863, + "learning_rate": 1.2842784560394433e-05, + "loss": 0.2924933135509491, + "step": 3299 + }, + { + "epoch": 0.876377639091754, + "grad_norm": 1.1544391256229967, + "learning_rate": 1.2838574676348155e-05, + "loss": 0.2886514663696289, + "step": 3300 + }, + { + "epoch": 0.876643208073297, + "grad_norm": 1.1131138367993383, + "learning_rate": 1.2834364245058155e-05, + "loss": 0.29821154475212097, + "step": 3301 + }, + { + "epoch": 0.87690877705484, + "grad_norm": 1.0278540671542709, + "learning_rate": 1.2830153267336159e-05, + "loss": 0.2656530737876892, + "step": 3302 + }, + { + "epoch": 0.877174346036383, + "grad_norm": 1.2018449655833119, + "learning_rate": 1.282594174399399e-05, + "loss": 0.3437826633453369, + "step": 3303 + }, + { + "epoch": 0.877439915017926, + "grad_norm": 1.0564301800372577, + "learning_rate": 1.2821729675843581e-05, + "loss": 0.29773175716400146, + "step": 3304 + }, + { + "epoch": 0.8777054839994689, + "grad_norm": 1.0707167209814024, + "learning_rate": 1.2817517063696973e-05, + "loss": 0.29772818088531494, + "step": 3305 + }, + { + "epoch": 0.8779710529810119, + "grad_norm": 1.1530012432828134, + "learning_rate": 1.2813303908366303e-05, + "loss": 0.3266611099243164, + "step": 3306 + }, + { + "epoch": 0.8782366219625548, + "grad_norm": 1.0044541774243023, + "learning_rate": 1.2809090210663818e-05, + "loss": 0.26599690318107605, + "step": 3307 + }, + { + "epoch": 0.8785021909440978, + "grad_norm": 1.0142651525790767, + "learning_rate": 1.2804875971401872e-05, + "loss": 0.27988117933273315, + "step": 3308 + }, + { + "epoch": 0.8787677599256407, + "grad_norm": 1.0221522532224918, + "learning_rate": 1.2800661191392916e-05, + "loss": 0.2630334496498108, + "step": 3309 + }, + { + "epoch": 0.8790333289071837, + "grad_norm": 1.022950247187023, + "learning_rate": 1.2796445871449517e-05, + "loss": 0.2628091871738434, + "step": 3310 + }, + { + "epoch": 0.8792988978887266, + "grad_norm": 1.1994310454875075, + "learning_rate": 1.2792230012384333e-05, + "loss": 0.3443898558616638, + "step": 3311 + }, + { + "epoch": 0.8795644668702696, + "grad_norm": 1.0673533832636588, + "learning_rate": 1.2788013615010136e-05, + "loss": 0.2966022491455078, + "step": 3312 + }, + { + "epoch": 0.8798300358518125, + "grad_norm": 1.1030087744198647, + "learning_rate": 1.2783796680139793e-05, + "loss": 0.2995494604110718, + "step": 3313 + }, + { + "epoch": 0.8800956048333555, + "grad_norm": 1.0504434000468303, + "learning_rate": 1.2779579208586283e-05, + "loss": 0.2652590870857239, + "step": 3314 + }, + { + "epoch": 0.8803611738148984, + "grad_norm": 1.1388460976467547, + "learning_rate": 1.2775361201162684e-05, + "loss": 0.3145690858364105, + "step": 3315 + }, + { + "epoch": 0.8806267427964414, + "grad_norm": 1.040210802651612, + "learning_rate": 1.2771142658682175e-05, + "loss": 0.25744086503982544, + "step": 3316 + }, + { + "epoch": 0.8808923117779843, + "grad_norm": 1.1618029117732733, + "learning_rate": 1.2766923581958046e-05, + "loss": 0.3129793405532837, + "step": 3317 + }, + { + "epoch": 0.8811578807595273, + "grad_norm": 1.166975234876197, + "learning_rate": 1.2762703971803684e-05, + "loss": 0.233384907245636, + "step": 3318 + }, + { + "epoch": 0.8814234497410702, + "grad_norm": 0.9242808009438505, + "learning_rate": 1.2758483829032579e-05, + "loss": 0.2422962635755539, + "step": 3319 + }, + { + "epoch": 0.8816890187226132, + "grad_norm": 1.0844595421589949, + "learning_rate": 1.2754263154458328e-05, + "loss": 0.2801973819732666, + "step": 3320 + }, + { + "epoch": 0.8819545877041561, + "grad_norm": 1.294346594070355, + "learning_rate": 1.2750041948894621e-05, + "loss": 0.30659937858581543, + "step": 3321 + }, + { + "epoch": 0.8822201566856991, + "grad_norm": 1.0921019252616484, + "learning_rate": 1.274582021315526e-05, + "loss": 0.28527066111564636, + "step": 3322 + }, + { + "epoch": 0.882485725667242, + "grad_norm": 1.0598264473011552, + "learning_rate": 1.2741597948054146e-05, + "loss": 0.23065675795078278, + "step": 3323 + }, + { + "epoch": 0.882751294648785, + "grad_norm": 1.0918730747592962, + "learning_rate": 1.2737375154405283e-05, + "loss": 0.2727832794189453, + "step": 3324 + }, + { + "epoch": 0.8830168636303279, + "grad_norm": 1.0789259788038712, + "learning_rate": 1.273315183302277e-05, + "loss": 0.26809507608413696, + "step": 3325 + }, + { + "epoch": 0.8832824326118709, + "grad_norm": 1.1647625824499415, + "learning_rate": 1.2728927984720823e-05, + "loss": 0.3250407576560974, + "step": 3326 + }, + { + "epoch": 0.8835480015934138, + "grad_norm": 1.0915300736309757, + "learning_rate": 1.2724703610313742e-05, + "loss": 0.2651330232620239, + "step": 3327 + }, + { + "epoch": 0.8838135705749568, + "grad_norm": 1.206298710080754, + "learning_rate": 1.2720478710615944e-05, + "loss": 0.27337920665740967, + "step": 3328 + }, + { + "epoch": 0.8840791395564997, + "grad_norm": 1.0282478968996285, + "learning_rate": 1.2716253286441935e-05, + "loss": 0.2664092183113098, + "step": 3329 + }, + { + "epoch": 0.8843447085380428, + "grad_norm": 1.1354570950284573, + "learning_rate": 1.2712027338606323e-05, + "loss": 0.27927765250205994, + "step": 3330 + }, + { + "epoch": 0.8846102775195858, + "grad_norm": 1.1204979208217445, + "learning_rate": 1.270780086792383e-05, + "loss": 0.27241113781929016, + "step": 3331 + }, + { + "epoch": 0.8848758465011287, + "grad_norm": 1.0795162414965664, + "learning_rate": 1.2703573875209264e-05, + "loss": 0.28279373049736023, + "step": 3332 + }, + { + "epoch": 0.8851414154826717, + "grad_norm": 1.1634487658284207, + "learning_rate": 1.2699346361277538e-05, + "loss": 0.3011108934879303, + "step": 3333 + }, + { + "epoch": 0.8854069844642146, + "grad_norm": 2.772716513531517, + "learning_rate": 1.2695118326943671e-05, + "loss": 0.3071288764476776, + "step": 3334 + }, + { + "epoch": 0.8856725534457576, + "grad_norm": 1.0969950934626527, + "learning_rate": 1.2690889773022778e-05, + "loss": 0.2688761353492737, + "step": 3335 + }, + { + "epoch": 0.8859381224273005, + "grad_norm": 1.1363327585955358, + "learning_rate": 1.2686660700330074e-05, + "loss": 0.2788669466972351, + "step": 3336 + }, + { + "epoch": 0.8862036914088435, + "grad_norm": 1.0884694079711634, + "learning_rate": 1.268243110968087e-05, + "loss": 0.2801516652107239, + "step": 3337 + }, + { + "epoch": 0.8864692603903864, + "grad_norm": 1.0414904749451368, + "learning_rate": 1.2678201001890587e-05, + "loss": 0.2876908779144287, + "step": 3338 + }, + { + "epoch": 0.8867348293719294, + "grad_norm": 1.1731879069090343, + "learning_rate": 1.2673970377774733e-05, + "loss": 0.27709734439849854, + "step": 3339 + }, + { + "epoch": 0.8870003983534723, + "grad_norm": 1.2053408848372587, + "learning_rate": 1.266973923814893e-05, + "loss": 0.3191622793674469, + "step": 3340 + }, + { + "epoch": 0.8872659673350153, + "grad_norm": 1.098682297791164, + "learning_rate": 1.2665507583828889e-05, + "loss": 0.2873385548591614, + "step": 3341 + }, + { + "epoch": 0.8875315363165582, + "grad_norm": 1.1730973936717166, + "learning_rate": 1.2661275415630421e-05, + "loss": 0.2922922372817993, + "step": 3342 + }, + { + "epoch": 0.8877971052981012, + "grad_norm": 1.1127017834272521, + "learning_rate": 1.2657042734369443e-05, + "loss": 0.305694043636322, + "step": 3343 + }, + { + "epoch": 0.8880626742796441, + "grad_norm": 1.120364019457983, + "learning_rate": 1.2652809540861958e-05, + "loss": 0.29108062386512756, + "step": 3344 + }, + { + "epoch": 0.8883282432611871, + "grad_norm": 1.076655765525218, + "learning_rate": 1.2648575835924084e-05, + "loss": 0.24170495569705963, + "step": 3345 + }, + { + "epoch": 0.88859381224273, + "grad_norm": 1.4853370236272063, + "learning_rate": 1.2644341620372025e-05, + "loss": 0.2987719476222992, + "step": 3346 + }, + { + "epoch": 0.888859381224273, + "grad_norm": 0.9743774864126274, + "learning_rate": 1.2640106895022088e-05, + "loss": 0.21037599444389343, + "step": 3347 + }, + { + "epoch": 0.889124950205816, + "grad_norm": 1.034527053965976, + "learning_rate": 1.2635871660690677e-05, + "loss": 0.25263655185699463, + "step": 3348 + }, + { + "epoch": 0.8893905191873589, + "grad_norm": 1.2196740502064325, + "learning_rate": 1.2631635918194301e-05, + "loss": 0.30169543623924255, + "step": 3349 + }, + { + "epoch": 0.8896560881689018, + "grad_norm": 1.0624381650731511, + "learning_rate": 1.2627399668349554e-05, + "loss": 0.26982420682907104, + "step": 3350 + }, + { + "epoch": 0.8899216571504448, + "grad_norm": 1.1785068724165282, + "learning_rate": 1.262316291197314e-05, + "loss": 0.3281899690628052, + "step": 3351 + }, + { + "epoch": 0.8901872261319878, + "grad_norm": 1.1157278400935415, + "learning_rate": 1.2618925649881852e-05, + "loss": 0.30140435695648193, + "step": 3352 + }, + { + "epoch": 0.8904527951135307, + "grad_norm": 0.9928732296573972, + "learning_rate": 1.261468788289259e-05, + "loss": 0.22343885898590088, + "step": 3353 + }, + { + "epoch": 0.8907183640950737, + "grad_norm": 1.0410264886026745, + "learning_rate": 1.261044961182234e-05, + "loss": 0.2889901399612427, + "step": 3354 + }, + { + "epoch": 0.8909839330766166, + "grad_norm": 1.0933214790144683, + "learning_rate": 1.260621083748819e-05, + "loss": 0.27896153926849365, + "step": 3355 + }, + { + "epoch": 0.8912495020581596, + "grad_norm": 1.077111437166839, + "learning_rate": 1.2601971560707328e-05, + "loss": 0.29390811920166016, + "step": 3356 + }, + { + "epoch": 0.8915150710397025, + "grad_norm": 1.0468332572471015, + "learning_rate": 1.2597731782297036e-05, + "loss": 0.2872384190559387, + "step": 3357 + }, + { + "epoch": 0.8917806400212455, + "grad_norm": 1.3094137802442116, + "learning_rate": 1.2593491503074698e-05, + "loss": 0.29753726720809937, + "step": 3358 + }, + { + "epoch": 0.8920462090027885, + "grad_norm": 1.1441306843080605, + "learning_rate": 1.2589250723857782e-05, + "loss": 0.31631946563720703, + "step": 3359 + }, + { + "epoch": 0.8923117779843315, + "grad_norm": 1.1374138683367387, + "learning_rate": 1.2585009445463867e-05, + "loss": 0.2932048738002777, + "step": 3360 + }, + { + "epoch": 0.8925773469658744, + "grad_norm": 1.0483655110874528, + "learning_rate": 1.2580767668710614e-05, + "loss": 0.2902034521102905, + "step": 3361 + }, + { + "epoch": 0.8928429159474174, + "grad_norm": 1.0712531988705474, + "learning_rate": 1.2576525394415795e-05, + "loss": 0.2596299648284912, + "step": 3362 + }, + { + "epoch": 0.8931084849289603, + "grad_norm": 1.1916540375753872, + "learning_rate": 1.2572282623397268e-05, + "loss": 0.29102641344070435, + "step": 3363 + }, + { + "epoch": 0.8933740539105033, + "grad_norm": 1.236954620143465, + "learning_rate": 1.2568039356472985e-05, + "loss": 0.2970406711101532, + "step": 3364 + }, + { + "epoch": 0.8936396228920462, + "grad_norm": 1.1384210267422126, + "learning_rate": 1.2563795594461003e-05, + "loss": 0.2916618585586548, + "step": 3365 + }, + { + "epoch": 0.8939051918735892, + "grad_norm": 1.1769911575713834, + "learning_rate": 1.2559551338179468e-05, + "loss": 0.3217374086380005, + "step": 3366 + }, + { + "epoch": 0.8941707608551321, + "grad_norm": 1.1228623922561494, + "learning_rate": 1.255530658844662e-05, + "loss": 0.3000059425830841, + "step": 3367 + }, + { + "epoch": 0.8944363298366751, + "grad_norm": 1.2170346898517979, + "learning_rate": 1.2551061346080804e-05, + "loss": 0.2848728895187378, + "step": 3368 + }, + { + "epoch": 0.894701898818218, + "grad_norm": 1.3197542136745113, + "learning_rate": 1.2546815611900442e-05, + "loss": 0.3328903317451477, + "step": 3369 + }, + { + "epoch": 0.894967467799761, + "grad_norm": 1.0838958961687528, + "learning_rate": 1.2542569386724069e-05, + "loss": 0.2920045256614685, + "step": 3370 + }, + { + "epoch": 0.895233036781304, + "grad_norm": 1.0679716869166582, + "learning_rate": 1.2538322671370305e-05, + "loss": 0.30370092391967773, + "step": 3371 + }, + { + "epoch": 0.8954986057628469, + "grad_norm": 1.069215534600395, + "learning_rate": 1.2534075466657866e-05, + "loss": 0.24454624950885773, + "step": 3372 + }, + { + "epoch": 0.8957641747443899, + "grad_norm": 1.172481734803523, + "learning_rate": 1.2529827773405566e-05, + "loss": 0.30908581614494324, + "step": 3373 + }, + { + "epoch": 0.8960297437259328, + "grad_norm": 1.1095939186212227, + "learning_rate": 1.2525579592432304e-05, + "loss": 0.2792360782623291, + "step": 3374 + }, + { + "epoch": 0.8962953127074758, + "grad_norm": 1.0658472517819026, + "learning_rate": 1.2521330924557087e-05, + "loss": 0.285555362701416, + "step": 3375 + }, + { + "epoch": 0.8965608816890187, + "grad_norm": 1.1649386203925687, + "learning_rate": 1.2517081770599002e-05, + "loss": 0.3159451484680176, + "step": 3376 + }, + { + "epoch": 0.8968264506705617, + "grad_norm": 1.2867424735092035, + "learning_rate": 1.2512832131377237e-05, + "loss": 0.35929200053215027, + "step": 3377 + }, + { + "epoch": 0.8970920196521046, + "grad_norm": 1.0781651079446009, + "learning_rate": 1.2508582007711074e-05, + "loss": 0.28624874353408813, + "step": 3378 + }, + { + "epoch": 0.8973575886336476, + "grad_norm": 1.0156684050998903, + "learning_rate": 1.2504331400419884e-05, + "loss": 0.27670109272003174, + "step": 3379 + }, + { + "epoch": 0.8976231576151905, + "grad_norm": 1.0786636895703534, + "learning_rate": 1.2500080310323139e-05, + "loss": 0.2894589304924011, + "step": 3380 + }, + { + "epoch": 0.8978887265967335, + "grad_norm": 1.1385795160382524, + "learning_rate": 1.2495828738240396e-05, + "loss": 0.31378716230392456, + "step": 3381 + }, + { + "epoch": 0.8981542955782764, + "grad_norm": 1.3149597134232174, + "learning_rate": 1.2491576684991306e-05, + "loss": 0.33676713705062866, + "step": 3382 + }, + { + "epoch": 0.8984198645598194, + "grad_norm": 0.9814689350619926, + "learning_rate": 1.2487324151395618e-05, + "loss": 0.2875351011753082, + "step": 3383 + }, + { + "epoch": 0.8986854335413623, + "grad_norm": 1.1646557221945626, + "learning_rate": 1.2483071138273168e-05, + "loss": 0.29729989171028137, + "step": 3384 + }, + { + "epoch": 0.8989510025229053, + "grad_norm": 1.0864970585536224, + "learning_rate": 1.2478817646443888e-05, + "loss": 0.3227398991584778, + "step": 3385 + }, + { + "epoch": 0.8992165715044482, + "grad_norm": 1.1586445900518523, + "learning_rate": 1.2474563676727803e-05, + "loss": 0.2664690315723419, + "step": 3386 + }, + { + "epoch": 0.8994821404859913, + "grad_norm": 1.1748792923054732, + "learning_rate": 1.2470309229945021e-05, + "loss": 0.29543352127075195, + "step": 3387 + }, + { + "epoch": 0.8997477094675342, + "grad_norm": 0.9899792334789409, + "learning_rate": 1.2466054306915756e-05, + "loss": 0.26658856868743896, + "step": 3388 + }, + { + "epoch": 0.9000132784490772, + "grad_norm": 1.123207894421506, + "learning_rate": 1.2461798908460305e-05, + "loss": 0.2899627387523651, + "step": 3389 + }, + { + "epoch": 0.9002788474306201, + "grad_norm": 1.1137567335053833, + "learning_rate": 1.245754303539906e-05, + "loss": 0.2708336114883423, + "step": 3390 + }, + { + "epoch": 0.9005444164121631, + "grad_norm": 1.1459655330577214, + "learning_rate": 1.2453286688552502e-05, + "loss": 0.28124746680259705, + "step": 3391 + }, + { + "epoch": 0.900809985393706, + "grad_norm": 1.0470005335558448, + "learning_rate": 1.2449029868741202e-05, + "loss": 0.2599399983882904, + "step": 3392 + }, + { + "epoch": 0.901075554375249, + "grad_norm": 0.9576026734877732, + "learning_rate": 1.2444772576785828e-05, + "loss": 0.25035667419433594, + "step": 3393 + }, + { + "epoch": 0.901341123356792, + "grad_norm": 1.1148471766082222, + "learning_rate": 1.2440514813507136e-05, + "loss": 0.2772521376609802, + "step": 3394 + }, + { + "epoch": 0.9016066923383349, + "grad_norm": 1.103787889433512, + "learning_rate": 1.2436256579725969e-05, + "loss": 0.3282839357852936, + "step": 3395 + }, + { + "epoch": 0.9018722613198779, + "grad_norm": 1.080988888326222, + "learning_rate": 1.2431997876263269e-05, + "loss": 0.2507914900779724, + "step": 3396 + }, + { + "epoch": 0.9021378303014208, + "grad_norm": 1.1123927965933749, + "learning_rate": 1.2427738703940055e-05, + "loss": 0.2620914876461029, + "step": 3397 + }, + { + "epoch": 0.9024033992829638, + "grad_norm": 1.0713438905056172, + "learning_rate": 1.2423479063577458e-05, + "loss": 0.26561641693115234, + "step": 3398 + }, + { + "epoch": 0.9026689682645067, + "grad_norm": 1.151582271756571, + "learning_rate": 1.2419218955996677e-05, + "loss": 0.2998678386211395, + "step": 3399 + }, + { + "epoch": 0.9029345372460497, + "grad_norm": 1.0484454707225395, + "learning_rate": 1.2414958382019017e-05, + "loss": 0.2368398755788803, + "step": 3400 + }, + { + "epoch": 0.9032001062275926, + "grad_norm": 1.0429929570241405, + "learning_rate": 1.241069734246586e-05, + "loss": 0.2623558044433594, + "step": 3401 + }, + { + "epoch": 0.9034656752091356, + "grad_norm": 1.0283944167565489, + "learning_rate": 1.2406435838158686e-05, + "loss": 0.2693074941635132, + "step": 3402 + }, + { + "epoch": 0.9037312441906785, + "grad_norm": 1.1211950634171715, + "learning_rate": 1.2402173869919063e-05, + "loss": 0.2933652698993683, + "step": 3403 + }, + { + "epoch": 0.9039968131722215, + "grad_norm": 1.0858313001207585, + "learning_rate": 1.2397911438568651e-05, + "loss": 0.28515487909317017, + "step": 3404 + }, + { + "epoch": 0.9042623821537644, + "grad_norm": 1.1243916508543286, + "learning_rate": 1.2393648544929193e-05, + "loss": 0.282942533493042, + "step": 3405 + }, + { + "epoch": 0.9045279511353074, + "grad_norm": 1.112018853789466, + "learning_rate": 1.2389385189822526e-05, + "loss": 0.28300392627716064, + "step": 3406 + }, + { + "epoch": 0.9047935201168503, + "grad_norm": 1.0490322847853841, + "learning_rate": 1.2385121374070577e-05, + "loss": 0.25697019696235657, + "step": 3407 + }, + { + "epoch": 0.9050590890983933, + "grad_norm": 1.15038978087342, + "learning_rate": 1.2380857098495355e-05, + "loss": 0.31156057119369507, + "step": 3408 + }, + { + "epoch": 0.9053246580799362, + "grad_norm": 1.1544066045654053, + "learning_rate": 1.2376592363918967e-05, + "loss": 0.2943422794342041, + "step": 3409 + }, + { + "epoch": 0.9055902270614792, + "grad_norm": 0.9968457114080438, + "learning_rate": 1.2372327171163596e-05, + "loss": 0.2792074680328369, + "step": 3410 + }, + { + "epoch": 0.9058557960430221, + "grad_norm": 1.0328662447203703, + "learning_rate": 1.2368061521051526e-05, + "loss": 0.2547443211078644, + "step": 3411 + }, + { + "epoch": 0.9061213650245651, + "grad_norm": 1.068901181257851, + "learning_rate": 1.2363795414405125e-05, + "loss": 0.25637373328208923, + "step": 3412 + }, + { + "epoch": 0.906386934006108, + "grad_norm": 1.1660475318941728, + "learning_rate": 1.2359528852046844e-05, + "loss": 0.3269123435020447, + "step": 3413 + }, + { + "epoch": 0.906652502987651, + "grad_norm": 1.0197427295072394, + "learning_rate": 1.2355261834799232e-05, + "loss": 0.28538423776626587, + "step": 3414 + }, + { + "epoch": 0.906918071969194, + "grad_norm": 1.1343354993973966, + "learning_rate": 1.2350994363484915e-05, + "loss": 0.2961096167564392, + "step": 3415 + }, + { + "epoch": 0.907183640950737, + "grad_norm": 1.0930595123597455, + "learning_rate": 1.2346726438926613e-05, + "loss": 0.3134537935256958, + "step": 3416 + }, + { + "epoch": 0.90744920993228, + "grad_norm": 1.018679268761631, + "learning_rate": 1.2342458061947129e-05, + "loss": 0.2614031434059143, + "step": 3417 + }, + { + "epoch": 0.9077147789138229, + "grad_norm": 1.0403373381004117, + "learning_rate": 1.2338189233369357e-05, + "loss": 0.27166056632995605, + "step": 3418 + }, + { + "epoch": 0.9079803478953659, + "grad_norm": 1.0735839504787106, + "learning_rate": 1.2333919954016277e-05, + "loss": 0.26053497195243835, + "step": 3419 + }, + { + "epoch": 0.9082459168769088, + "grad_norm": 1.1112591016079632, + "learning_rate": 1.2329650224710956e-05, + "loss": 0.3109636902809143, + "step": 3420 + }, + { + "epoch": 0.9085114858584518, + "grad_norm": 1.081828404421451, + "learning_rate": 1.232538004627655e-05, + "loss": 0.2576507329940796, + "step": 3421 + }, + { + "epoch": 0.9087770548399947, + "grad_norm": 1.0981308884589311, + "learning_rate": 1.2321109419536292e-05, + "loss": 0.2525216341018677, + "step": 3422 + }, + { + "epoch": 0.9090426238215377, + "grad_norm": 1.0732531844020532, + "learning_rate": 1.2316838345313517e-05, + "loss": 0.2483336180448532, + "step": 3423 + }, + { + "epoch": 0.9093081928030806, + "grad_norm": 1.1592146270526706, + "learning_rate": 1.2312566824431631e-05, + "loss": 0.26372796297073364, + "step": 3424 + }, + { + "epoch": 0.9095737617846236, + "grad_norm": 1.1537675520237485, + "learning_rate": 1.2308294857714138e-05, + "loss": 0.2933644950389862, + "step": 3425 + }, + { + "epoch": 0.9098393307661665, + "grad_norm": 1.0330883162146767, + "learning_rate": 1.2304022445984618e-05, + "loss": 0.2543371915817261, + "step": 3426 + }, + { + "epoch": 0.9101048997477095, + "grad_norm": 1.1689002717846686, + "learning_rate": 1.2299749590066745e-05, + "loss": 0.29246431589126587, + "step": 3427 + }, + { + "epoch": 0.9103704687292524, + "grad_norm": 1.0141798843769114, + "learning_rate": 1.2295476290784273e-05, + "loss": 0.2475431263446808, + "step": 3428 + }, + { + "epoch": 0.9106360377107954, + "grad_norm": 1.1845034794986053, + "learning_rate": 1.2291202548961042e-05, + "loss": 0.3312363624572754, + "step": 3429 + }, + { + "epoch": 0.9109016066923383, + "grad_norm": 1.0459618447051044, + "learning_rate": 1.2286928365420987e-05, + "loss": 0.25192639231681824, + "step": 3430 + }, + { + "epoch": 0.9111671756738813, + "grad_norm": 1.2038671566275931, + "learning_rate": 1.2282653740988114e-05, + "loss": 0.23189345002174377, + "step": 3431 + }, + { + "epoch": 0.9114327446554242, + "grad_norm": 1.17767221221897, + "learning_rate": 1.2278378676486522e-05, + "loss": 0.2888398766517639, + "step": 3432 + }, + { + "epoch": 0.9116983136369672, + "grad_norm": 1.1295595703903276, + "learning_rate": 1.2274103172740387e-05, + "loss": 0.2857785224914551, + "step": 3433 + }, + { + "epoch": 0.9119638826185101, + "grad_norm": 1.039533312390003, + "learning_rate": 1.2269827230573986e-05, + "loss": 0.23961025476455688, + "step": 3434 + }, + { + "epoch": 0.9122294516000531, + "grad_norm": 1.1192521835175562, + "learning_rate": 1.2265550850811663e-05, + "loss": 0.2791004478931427, + "step": 3435 + }, + { + "epoch": 0.912495020581596, + "grad_norm": 1.052040685054951, + "learning_rate": 1.2261274034277858e-05, + "loss": 0.2875480651855469, + "step": 3436 + }, + { + "epoch": 0.912760589563139, + "grad_norm": 1.12188070500717, + "learning_rate": 1.2256996781797086e-05, + "loss": 0.29422929883003235, + "step": 3437 + }, + { + "epoch": 0.9130261585446819, + "grad_norm": 1.2976046274469295, + "learning_rate": 1.225271909419395e-05, + "loss": 0.27114444971084595, + "step": 3438 + }, + { + "epoch": 0.9132917275262249, + "grad_norm": 1.0684416452719028, + "learning_rate": 1.2248440972293146e-05, + "loss": 0.3007166385650635, + "step": 3439 + }, + { + "epoch": 0.9135572965077678, + "grad_norm": 1.1408150577224654, + "learning_rate": 1.224416241691944e-05, + "loss": 0.28550055623054504, + "step": 3440 + }, + { + "epoch": 0.9138228654893108, + "grad_norm": 1.1159473328967766, + "learning_rate": 1.2239883428897687e-05, + "loss": 0.2861761450767517, + "step": 3441 + }, + { + "epoch": 0.9140884344708538, + "grad_norm": 1.1186358936011263, + "learning_rate": 1.2235604009052823e-05, + "loss": 0.3288506865501404, + "step": 3442 + }, + { + "epoch": 0.9143540034523968, + "grad_norm": 1.2101661293343442, + "learning_rate": 1.2231324158209876e-05, + "loss": 0.33189019560813904, + "step": 3443 + }, + { + "epoch": 0.9146195724339398, + "grad_norm": 0.9931883995236199, + "learning_rate": 1.2227043877193947e-05, + "loss": 0.20846885442733765, + "step": 3444 + }, + { + "epoch": 0.9148851414154827, + "grad_norm": 0.9579263575635046, + "learning_rate": 1.2222763166830223e-05, + "loss": 0.25184741616249084, + "step": 3445 + }, + { + "epoch": 0.9151507103970257, + "grad_norm": 1.0775642304955, + "learning_rate": 1.2218482027943977e-05, + "loss": 0.2954701781272888, + "step": 3446 + }, + { + "epoch": 0.9154162793785686, + "grad_norm": 1.055908963813806, + "learning_rate": 1.221420046136056e-05, + "loss": 0.263336718082428, + "step": 3447 + }, + { + "epoch": 0.9156818483601116, + "grad_norm": 1.2181481624195412, + "learning_rate": 1.2209918467905405e-05, + "loss": 0.31178128719329834, + "step": 3448 + }, + { + "epoch": 0.9159474173416545, + "grad_norm": 1.1248939907914326, + "learning_rate": 1.2205636048404037e-05, + "loss": 0.30373090505599976, + "step": 3449 + }, + { + "epoch": 0.9162129863231975, + "grad_norm": 1.1316476755108689, + "learning_rate": 1.2201353203682052e-05, + "loss": 0.31057459115982056, + "step": 3450 + }, + { + "epoch": 0.9164785553047404, + "grad_norm": 1.0432699213656527, + "learning_rate": 1.2197069934565126e-05, + "loss": 0.26834744215011597, + "step": 3451 + }, + { + "epoch": 0.9167441242862834, + "grad_norm": 1.0235490532622333, + "learning_rate": 1.2192786241879033e-05, + "loss": 0.30224066972732544, + "step": 3452 + }, + { + "epoch": 0.9170096932678263, + "grad_norm": 1.1136690118430506, + "learning_rate": 1.2188502126449616e-05, + "loss": 0.28249508142471313, + "step": 3453 + }, + { + "epoch": 0.9172752622493693, + "grad_norm": 1.0210144972314754, + "learning_rate": 1.2184217589102798e-05, + "loss": 0.24823793768882751, + "step": 3454 + }, + { + "epoch": 0.9175408312309122, + "grad_norm": 1.1878687209379464, + "learning_rate": 1.2179932630664589e-05, + "loss": 0.32556289434432983, + "step": 3455 + }, + { + "epoch": 0.9178064002124552, + "grad_norm": 1.0899520670240972, + "learning_rate": 1.217564725196108e-05, + "loss": 0.29420584440231323, + "step": 3456 + }, + { + "epoch": 0.9180719691939981, + "grad_norm": 1.028247015068141, + "learning_rate": 1.2171361453818437e-05, + "loss": 0.29294469952583313, + "step": 3457 + }, + { + "epoch": 0.9183375381755411, + "grad_norm": 1.0399893903415627, + "learning_rate": 1.2167075237062918e-05, + "loss": 0.3173823952674866, + "step": 3458 + }, + { + "epoch": 0.918603107157084, + "grad_norm": 1.1571492956528482, + "learning_rate": 1.2162788602520851e-05, + "loss": 0.32950159907341003, + "step": 3459 + }, + { + "epoch": 0.918868676138627, + "grad_norm": 1.0478118037587627, + "learning_rate": 1.2158501551018647e-05, + "loss": 0.3011544942855835, + "step": 3460 + }, + { + "epoch": 0.91913424512017, + "grad_norm": 1.0135067760604335, + "learning_rate": 1.2154214083382802e-05, + "loss": 0.25775954127311707, + "step": 3461 + }, + { + "epoch": 0.9193998141017129, + "grad_norm": 1.0514508898774713, + "learning_rate": 1.214992620043989e-05, + "loss": 0.286748468875885, + "step": 3462 + }, + { + "epoch": 0.9196653830832558, + "grad_norm": 1.1050004366949897, + "learning_rate": 1.214563790301656e-05, + "loss": 0.30588221549987793, + "step": 3463 + }, + { + "epoch": 0.9199309520647988, + "grad_norm": 1.0079666808538812, + "learning_rate": 1.214134919193955e-05, + "loss": 0.23506608605384827, + "step": 3464 + }, + { + "epoch": 0.9201965210463418, + "grad_norm": 1.037364536446331, + "learning_rate": 1.2137060068035672e-05, + "loss": 0.2612350285053253, + "step": 3465 + }, + { + "epoch": 0.9204620900278847, + "grad_norm": 1.0810309706979688, + "learning_rate": 1.2132770532131815e-05, + "loss": 0.3268318772315979, + "step": 3466 + }, + { + "epoch": 0.9207276590094277, + "grad_norm": 1.0723394192428657, + "learning_rate": 1.2128480585054951e-05, + "loss": 0.2970179319381714, + "step": 3467 + }, + { + "epoch": 0.9209932279909706, + "grad_norm": 1.0036147426745694, + "learning_rate": 1.2124190227632138e-05, + "loss": 0.2910206615924835, + "step": 3468 + }, + { + "epoch": 0.9212587969725136, + "grad_norm": 1.1089890742219906, + "learning_rate": 1.2119899460690496e-05, + "loss": 0.3000222444534302, + "step": 3469 + }, + { + "epoch": 0.9215243659540565, + "grad_norm": 1.1166450826016983, + "learning_rate": 1.2115608285057242e-05, + "loss": 0.30304765701293945, + "step": 3470 + }, + { + "epoch": 0.9217899349355996, + "grad_norm": 0.9893826238823328, + "learning_rate": 1.2111316701559663e-05, + "loss": 0.26393038034439087, + "step": 3471 + }, + { + "epoch": 0.9220555039171425, + "grad_norm": 1.1384217438340345, + "learning_rate": 1.2107024711025128e-05, + "loss": 0.3111063838005066, + "step": 3472 + }, + { + "epoch": 0.9223210728986855, + "grad_norm": 0.9599961450252364, + "learning_rate": 1.2102732314281073e-05, + "loss": 0.2897321581840515, + "step": 3473 + }, + { + "epoch": 0.9225866418802284, + "grad_norm": 1.1396280258666305, + "learning_rate": 1.2098439512155028e-05, + "loss": 0.2835896611213684, + "step": 3474 + }, + { + "epoch": 0.9228522108617714, + "grad_norm": 1.0165194494005183, + "learning_rate": 1.2094146305474596e-05, + "loss": 0.27648821473121643, + "step": 3475 + }, + { + "epoch": 0.9231177798433143, + "grad_norm": 1.1221504506656363, + "learning_rate": 1.2089852695067457e-05, + "loss": 0.2528097629547119, + "step": 3476 + }, + { + "epoch": 0.9233833488248573, + "grad_norm": 1.1105562286202324, + "learning_rate": 1.2085558681761361e-05, + "loss": 0.2750067412853241, + "step": 3477 + }, + { + "epoch": 0.9236489178064002, + "grad_norm": 1.1199967050670125, + "learning_rate": 1.2081264266384148e-05, + "loss": 0.3115938901901245, + "step": 3478 + }, + { + "epoch": 0.9239144867879432, + "grad_norm": 1.1203071431737686, + "learning_rate": 1.2076969449763734e-05, + "loss": 0.2858419418334961, + "step": 3479 + }, + { + "epoch": 0.9241800557694861, + "grad_norm": 1.051118385350032, + "learning_rate": 1.2072674232728105e-05, + "loss": 0.24990032613277435, + "step": 3480 + }, + { + "epoch": 0.9244456247510291, + "grad_norm": 1.2991104394876676, + "learning_rate": 1.206837861610533e-05, + "loss": 0.23106999695301056, + "step": 3481 + }, + { + "epoch": 0.924711193732572, + "grad_norm": 1.0396779513824141, + "learning_rate": 1.2064082600723546e-05, + "loss": 0.2737967371940613, + "step": 3482 + }, + { + "epoch": 0.924976762714115, + "grad_norm": 1.1890061925781694, + "learning_rate": 1.2059786187410984e-05, + "loss": 0.2810317873954773, + "step": 3483 + }, + { + "epoch": 0.925242331695658, + "grad_norm": 1.1358698893490913, + "learning_rate": 1.2055489376995938e-05, + "loss": 0.30852559208869934, + "step": 3484 + }, + { + "epoch": 0.9255079006772009, + "grad_norm": 1.1003932874354148, + "learning_rate": 1.2051192170306784e-05, + "loss": 0.2956348657608032, + "step": 3485 + }, + { + "epoch": 0.9257734696587439, + "grad_norm": 1.18261367067389, + "learning_rate": 1.204689456817197e-05, + "loss": 0.2825953960418701, + "step": 3486 + }, + { + "epoch": 0.9260390386402868, + "grad_norm": 1.2502616697865143, + "learning_rate": 1.2042596571420025e-05, + "loss": 0.3351168632507324, + "step": 3487 + }, + { + "epoch": 0.9263046076218298, + "grad_norm": 1.2354469073344645, + "learning_rate": 1.2038298180879548e-05, + "loss": 0.2718926668167114, + "step": 3488 + }, + { + "epoch": 0.9265701766033727, + "grad_norm": 1.1387239259181285, + "learning_rate": 1.2033999397379223e-05, + "loss": 0.29036587476730347, + "step": 3489 + }, + { + "epoch": 0.9268357455849157, + "grad_norm": 0.9499049433325992, + "learning_rate": 1.2029700221747804e-05, + "loss": 0.22917689383029938, + "step": 3490 + }, + { + "epoch": 0.9271013145664586, + "grad_norm": 1.2322966399012754, + "learning_rate": 1.2025400654814119e-05, + "loss": 0.2963443398475647, + "step": 3491 + }, + { + "epoch": 0.9273668835480016, + "grad_norm": 1.100231072465541, + "learning_rate": 1.2021100697407075e-05, + "loss": 0.2866464853286743, + "step": 3492 + }, + { + "epoch": 0.9276324525295445, + "grad_norm": 1.1717529025248212, + "learning_rate": 1.2016800350355654e-05, + "loss": 0.3069216012954712, + "step": 3493 + }, + { + "epoch": 0.9278980215110875, + "grad_norm": 1.0745448017128252, + "learning_rate": 1.2012499614488913e-05, + "loss": 0.27206870913505554, + "step": 3494 + }, + { + "epoch": 0.9281635904926304, + "grad_norm": 1.0995365532444106, + "learning_rate": 1.2008198490635978e-05, + "loss": 0.32130372524261475, + "step": 3495 + }, + { + "epoch": 0.9284291594741734, + "grad_norm": 1.151015013814654, + "learning_rate": 1.2003896979626061e-05, + "loss": 0.30631259083747864, + "step": 3496 + }, + { + "epoch": 0.9286947284557163, + "grad_norm": 1.125856079122124, + "learning_rate": 1.199959508228844e-05, + "loss": 0.3005716800689697, + "step": 3497 + }, + { + "epoch": 0.9289602974372593, + "grad_norm": 0.9983757548693274, + "learning_rate": 1.1995292799452472e-05, + "loss": 0.2381039410829544, + "step": 3498 + }, + { + "epoch": 0.9292258664188023, + "grad_norm": 1.1338580261514946, + "learning_rate": 1.1990990131947582e-05, + "loss": 0.31764286756515503, + "step": 3499 + }, + { + "epoch": 0.9294914354003453, + "grad_norm": 1.1445030838538803, + "learning_rate": 1.1986687080603273e-05, + "loss": 0.3029370903968811, + "step": 3500 + }, + { + "epoch": 0.9297570043818882, + "grad_norm": 1.0814133109661386, + "learning_rate": 1.198238364624913e-05, + "loss": 0.30967646837234497, + "step": 3501 + }, + { + "epoch": 0.9300225733634312, + "grad_norm": 1.0376796287878236, + "learning_rate": 1.1978079829714799e-05, + "loss": 0.24687506258487701, + "step": 3502 + }, + { + "epoch": 0.9302881423449741, + "grad_norm": 1.0529899744692286, + "learning_rate": 1.1973775631830007e-05, + "loss": 0.25909408926963806, + "step": 3503 + }, + { + "epoch": 0.9305537113265171, + "grad_norm": 1.1136411983367804, + "learning_rate": 1.196947105342455e-05, + "loss": 0.281025230884552, + "step": 3504 + }, + { + "epoch": 0.93081928030806, + "grad_norm": 1.2858712177395888, + "learning_rate": 1.1965166095328302e-05, + "loss": 0.33401811122894287, + "step": 3505 + }, + { + "epoch": 0.931084849289603, + "grad_norm": 0.9732764276792689, + "learning_rate": 1.1960860758371208e-05, + "loss": 0.25839388370513916, + "step": 3506 + }, + { + "epoch": 0.931350418271146, + "grad_norm": 0.954364218435113, + "learning_rate": 1.1956555043383286e-05, + "loss": 0.23343560099601746, + "step": 3507 + }, + { + "epoch": 0.9316159872526889, + "grad_norm": 1.176408931412559, + "learning_rate": 1.1952248951194629e-05, + "loss": 0.31106436252593994, + "step": 3508 + }, + { + "epoch": 0.9318815562342319, + "grad_norm": 1.108418204277134, + "learning_rate": 1.1947942482635395e-05, + "loss": 0.29152095317840576, + "step": 3509 + }, + { + "epoch": 0.9321471252157748, + "grad_norm": 1.2651732065185788, + "learning_rate": 1.1943635638535827e-05, + "loss": 0.31517675518989563, + "step": 3510 + }, + { + "epoch": 0.9324126941973178, + "grad_norm": 1.2309480505410157, + "learning_rate": 1.1939328419726231e-05, + "loss": 0.33221137523651123, + "step": 3511 + }, + { + "epoch": 0.9326782631788607, + "grad_norm": 1.2277892053470791, + "learning_rate": 1.193502082703699e-05, + "loss": 0.314359575510025, + "step": 3512 + }, + { + "epoch": 0.9329438321604037, + "grad_norm": 1.129757464324541, + "learning_rate": 1.1930712861298553e-05, + "loss": 0.2879924178123474, + "step": 3513 + }, + { + "epoch": 0.9332094011419466, + "grad_norm": 1.1622909402406336, + "learning_rate": 1.1926404523341443e-05, + "loss": 0.2732955515384674, + "step": 3514 + }, + { + "epoch": 0.9334749701234896, + "grad_norm": 1.1586501434218468, + "learning_rate": 1.1922095813996264e-05, + "loss": 0.32156097888946533, + "step": 3515 + }, + { + "epoch": 0.9337405391050325, + "grad_norm": 1.110486475282156, + "learning_rate": 1.1917786734093682e-05, + "loss": 0.2694319486618042, + "step": 3516 + }, + { + "epoch": 0.9340061080865755, + "grad_norm": 1.0871387001943549, + "learning_rate": 1.1913477284464434e-05, + "loss": 0.3049655258655548, + "step": 3517 + }, + { + "epoch": 0.9342716770681184, + "grad_norm": 1.0962864613999421, + "learning_rate": 1.1909167465939334e-05, + "loss": 0.30053725838661194, + "step": 3518 + }, + { + "epoch": 0.9345372460496614, + "grad_norm": 1.0261517334123498, + "learning_rate": 1.1904857279349265e-05, + "loss": 0.2611788809299469, + "step": 3519 + }, + { + "epoch": 0.9348028150312043, + "grad_norm": 1.1400957154071245, + "learning_rate": 1.1900546725525175e-05, + "loss": 0.28344646096229553, + "step": 3520 + }, + { + "epoch": 0.9350683840127473, + "grad_norm": 1.067093022484818, + "learning_rate": 1.1896235805298093e-05, + "loss": 0.2504042685031891, + "step": 3521 + }, + { + "epoch": 0.9353339529942902, + "grad_norm": 1.0534608212516616, + "learning_rate": 1.1891924519499113e-05, + "loss": 0.27877938747406006, + "step": 3522 + }, + { + "epoch": 0.9355995219758332, + "grad_norm": 1.046331705593262, + "learning_rate": 1.1887612868959394e-05, + "loss": 0.28176525235176086, + "step": 3523 + }, + { + "epoch": 0.9358650909573761, + "grad_norm": 1.1750063194789062, + "learning_rate": 1.1883300854510178e-05, + "loss": 0.32376354932785034, + "step": 3524 + }, + { + "epoch": 0.9361306599389191, + "grad_norm": 1.0908366283033504, + "learning_rate": 1.1878988476982772e-05, + "loss": 0.2846054434776306, + "step": 3525 + }, + { + "epoch": 0.936396228920462, + "grad_norm": 1.0507783491664777, + "learning_rate": 1.1874675737208546e-05, + "loss": 0.25711044669151306, + "step": 3526 + }, + { + "epoch": 0.9366617979020051, + "grad_norm": 1.078360429057703, + "learning_rate": 1.1870362636018946e-05, + "loss": 0.2810837924480438, + "step": 3527 + }, + { + "epoch": 0.936927366883548, + "grad_norm": 1.2088151262046463, + "learning_rate": 1.186604917424549e-05, + "loss": 0.3090322017669678, + "step": 3528 + }, + { + "epoch": 0.937192935865091, + "grad_norm": 1.061646146170892, + "learning_rate": 1.1861735352719763e-05, + "loss": 0.2797972559928894, + "step": 3529 + }, + { + "epoch": 0.937458504846634, + "grad_norm": 1.3937474116807773, + "learning_rate": 1.1857421172273415e-05, + "loss": 0.3124893605709076, + "step": 3530 + }, + { + "epoch": 0.9377240738281769, + "grad_norm": 1.1043040217194096, + "learning_rate": 1.1853106633738174e-05, + "loss": 0.28317195177078247, + "step": 3531 + }, + { + "epoch": 0.9379896428097199, + "grad_norm": 1.0483798154842934, + "learning_rate": 1.1848791737945823e-05, + "loss": 0.27804574370384216, + "step": 3532 + }, + { + "epoch": 0.9382552117912628, + "grad_norm": 1.1007797171562173, + "learning_rate": 1.1844476485728236e-05, + "loss": 0.24936731159687042, + "step": 3533 + }, + { + "epoch": 0.9385207807728058, + "grad_norm": 1.16922301793574, + "learning_rate": 1.1840160877917335e-05, + "loss": 0.296974778175354, + "step": 3534 + }, + { + "epoch": 0.9387863497543487, + "grad_norm": 1.1172266681075624, + "learning_rate": 1.1835844915345117e-05, + "loss": 0.3048890233039856, + "step": 3535 + }, + { + "epoch": 0.9390519187358917, + "grad_norm": 1.0372698095624082, + "learning_rate": 1.1831528598843654e-05, + "loss": 0.2703601121902466, + "step": 3536 + }, + { + "epoch": 0.9393174877174346, + "grad_norm": 1.123009081238491, + "learning_rate": 1.1827211929245075e-05, + "loss": 0.30738013982772827, + "step": 3537 + }, + { + "epoch": 0.9395830566989776, + "grad_norm": 1.0660333251952498, + "learning_rate": 1.1822894907381589e-05, + "loss": 0.26538529992103577, + "step": 3538 + }, + { + "epoch": 0.9398486256805205, + "grad_norm": 1.1050453871275616, + "learning_rate": 1.1818577534085462e-05, + "loss": 0.26795464754104614, + "step": 3539 + }, + { + "epoch": 0.9401141946620635, + "grad_norm": 1.1533311536850575, + "learning_rate": 1.1814259810189034e-05, + "loss": 0.30891868472099304, + "step": 3540 + }, + { + "epoch": 0.9403797636436064, + "grad_norm": 1.8167204702159565, + "learning_rate": 1.1809941736524713e-05, + "loss": 0.29164037108421326, + "step": 3541 + }, + { + "epoch": 0.9406453326251494, + "grad_norm": 1.0875424396631934, + "learning_rate": 1.180562331392497e-05, + "loss": 0.30322739481925964, + "step": 3542 + }, + { + "epoch": 0.9409109016066923, + "grad_norm": 1.0765622649066557, + "learning_rate": 1.1801304543222349e-05, + "loss": 0.275432288646698, + "step": 3543 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 1.1566847425916267, + "learning_rate": 1.1796985425249459e-05, + "loss": 0.2788141965866089, + "step": 3544 + }, + { + "epoch": 0.9414420395697782, + "grad_norm": 1.203313197377309, + "learning_rate": 1.1792665960838967e-05, + "loss": 0.24254676699638367, + "step": 3545 + }, + { + "epoch": 0.9417076085513212, + "grad_norm": 1.1050026210111878, + "learning_rate": 1.1788346150823625e-05, + "loss": 0.2803058326244354, + "step": 3546 + }, + { + "epoch": 0.9419731775328641, + "grad_norm": 1.0993090963339842, + "learning_rate": 1.1784025996036232e-05, + "loss": 0.3068317174911499, + "step": 3547 + }, + { + "epoch": 0.9422387465144071, + "grad_norm": 0.9977731134117688, + "learning_rate": 1.1779705497309673e-05, + "loss": 0.23124024271965027, + "step": 3548 + }, + { + "epoch": 0.94250431549595, + "grad_norm": 1.080710306089679, + "learning_rate": 1.177538465547688e-05, + "loss": 0.2815462648868561, + "step": 3549 + }, + { + "epoch": 0.942769884477493, + "grad_norm": 1.1118952137889662, + "learning_rate": 1.1771063471370862e-05, + "loss": 0.29448196291923523, + "step": 3550 + }, + { + "epoch": 0.9430354534590359, + "grad_norm": 1.2691077751501818, + "learning_rate": 1.1766741945824698e-05, + "loss": 0.3176615834236145, + "step": 3551 + }, + { + "epoch": 0.9433010224405789, + "grad_norm": 1.1390071879475103, + "learning_rate": 1.1762420079671527e-05, + "loss": 0.29126274585723877, + "step": 3552 + }, + { + "epoch": 0.9435665914221218, + "grad_norm": 1.084504171285626, + "learning_rate": 1.1758097873744547e-05, + "loss": 0.27074337005615234, + "step": 3553 + }, + { + "epoch": 0.9438321604036648, + "grad_norm": 1.0495499557301764, + "learning_rate": 1.175377532887703e-05, + "loss": 0.2756083011627197, + "step": 3554 + }, + { + "epoch": 0.9440977293852079, + "grad_norm": 1.1028881447166687, + "learning_rate": 1.1749452445902315e-05, + "loss": 0.26918384432792664, + "step": 3555 + }, + { + "epoch": 0.9443632983667508, + "grad_norm": 1.0856468025535497, + "learning_rate": 1.17451292256538e-05, + "loss": 0.2550349235534668, + "step": 3556 + }, + { + "epoch": 0.9446288673482938, + "grad_norm": 1.0791996633460945, + "learning_rate": 1.1740805668964954e-05, + "loss": 0.2601481080055237, + "step": 3557 + }, + { + "epoch": 0.9448944363298367, + "grad_norm": 1.1367109564667788, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.2848352789878845, + "step": 3558 + }, + { + "epoch": 0.9451600053113797, + "grad_norm": 1.1168278064757895, + "learning_rate": 1.173215754960045e-05, + "loss": 0.266584575176239, + "step": 3559 + }, + { + "epoch": 0.9454255742929226, + "grad_norm": 0.9979692557530664, + "learning_rate": 1.172783298859205e-05, + "loss": 0.25037410855293274, + "step": 3560 + }, + { + "epoch": 0.9456911432744656, + "grad_norm": 1.1049326363207628, + "learning_rate": 1.1723508094477825e-05, + "loss": 0.30239278078079224, + "step": 3561 + }, + { + "epoch": 0.9459567122560085, + "grad_norm": 1.0413977608943958, + "learning_rate": 1.1719182868091567e-05, + "loss": 0.2893553078174591, + "step": 3562 + }, + { + "epoch": 0.9462222812375515, + "grad_norm": 1.215187947788902, + "learning_rate": 1.1714857310267124e-05, + "loss": 0.2840202748775482, + "step": 3563 + }, + { + "epoch": 0.9464878502190944, + "grad_norm": 1.0615180068139964, + "learning_rate": 1.1710531421838422e-05, + "loss": 0.2614031732082367, + "step": 3564 + }, + { + "epoch": 0.9467534192006374, + "grad_norm": 1.0290230331800772, + "learning_rate": 1.1706205203639433e-05, + "loss": 0.267095148563385, + "step": 3565 + }, + { + "epoch": 0.9470189881821803, + "grad_norm": 1.2397291626994196, + "learning_rate": 1.1701878656504206e-05, + "loss": 0.25835227966308594, + "step": 3566 + }, + { + "epoch": 0.9472845571637233, + "grad_norm": 1.1319162410146095, + "learning_rate": 1.1697551781266845e-05, + "loss": 0.27547580003738403, + "step": 3567 + }, + { + "epoch": 0.9475501261452662, + "grad_norm": 1.089656044815204, + "learning_rate": 1.169322457876152e-05, + "loss": 0.251165509223938, + "step": 3568 + }, + { + "epoch": 0.9478156951268092, + "grad_norm": 1.2350323802819905, + "learning_rate": 1.1688897049822467e-05, + "loss": 0.2738516926765442, + "step": 3569 + }, + { + "epoch": 0.9480812641083521, + "grad_norm": 1.0315369616879289, + "learning_rate": 1.1684569195283981e-05, + "loss": 0.2745274305343628, + "step": 3570 + }, + { + "epoch": 0.9483468330898951, + "grad_norm": 1.180099592022995, + "learning_rate": 1.1680241015980423e-05, + "loss": 0.28586819767951965, + "step": 3571 + }, + { + "epoch": 0.948612402071438, + "grad_norm": 1.2233918967574897, + "learning_rate": 1.167591251274621e-05, + "loss": 0.2559577524662018, + "step": 3572 + }, + { + "epoch": 0.948877971052981, + "grad_norm": 1.155824963337958, + "learning_rate": 1.1671583686415833e-05, + "loss": 0.26069143414497375, + "step": 3573 + }, + { + "epoch": 0.949143540034524, + "grad_norm": 1.078529730225554, + "learning_rate": 1.1667254537823838e-05, + "loss": 0.26866453886032104, + "step": 3574 + }, + { + "epoch": 0.9494091090160669, + "grad_norm": 1.0772599867154102, + "learning_rate": 1.166292506780483e-05, + "loss": 0.25285348296165466, + "step": 3575 + }, + { + "epoch": 0.9496746779976099, + "grad_norm": 1.1335172942215501, + "learning_rate": 1.1658595277193479e-05, + "loss": 0.3330434262752533, + "step": 3576 + }, + { + "epoch": 0.9499402469791528, + "grad_norm": 1.076438251163932, + "learning_rate": 1.1654265166824522e-05, + "loss": 0.2789473533630371, + "step": 3577 + }, + { + "epoch": 0.9502058159606958, + "grad_norm": 1.2746037306212283, + "learning_rate": 1.164993473753275e-05, + "loss": 0.30984824895858765, + "step": 3578 + }, + { + "epoch": 0.9504713849422387, + "grad_norm": 1.0517088315750878, + "learning_rate": 1.164560399015302e-05, + "loss": 0.23881833255290985, + "step": 3579 + }, + { + "epoch": 0.9507369539237817, + "grad_norm": 1.1012484750770577, + "learning_rate": 1.164127292552025e-05, + "loss": 0.3027937114238739, + "step": 3580 + }, + { + "epoch": 0.9510025229053246, + "grad_norm": 1.1998484228117954, + "learning_rate": 1.1636941544469413e-05, + "loss": 0.2901906371116638, + "step": 3581 + }, + { + "epoch": 0.9512680918868676, + "grad_norm": 1.069491787313744, + "learning_rate": 1.1632609847835556e-05, + "loss": 0.28961148858070374, + "step": 3582 + }, + { + "epoch": 0.9515336608684106, + "grad_norm": 1.0782542825887276, + "learning_rate": 1.1628277836453774e-05, + "loss": 0.2730783224105835, + "step": 3583 + }, + { + "epoch": 0.9517992298499536, + "grad_norm": 1.0952017771476839, + "learning_rate": 1.1623945511159232e-05, + "loss": 0.3195485770702362, + "step": 3584 + }, + { + "epoch": 0.9520647988314965, + "grad_norm": 1.1514370971708257, + "learning_rate": 1.1619612872787144e-05, + "loss": 0.3097516894340515, + "step": 3585 + }, + { + "epoch": 0.9523303678130395, + "grad_norm": 1.0422990071728377, + "learning_rate": 1.1615279922172796e-05, + "loss": 0.2716284692287445, + "step": 3586 + }, + { + "epoch": 0.9525959367945824, + "grad_norm": 0.9669355988334725, + "learning_rate": 1.1610946660151531e-05, + "loss": 0.2601209878921509, + "step": 3587 + }, + { + "epoch": 0.9528615057761254, + "grad_norm": 1.1027425019898653, + "learning_rate": 1.1606613087558748e-05, + "loss": 0.28665289282798767, + "step": 3588 + }, + { + "epoch": 0.9531270747576683, + "grad_norm": 1.082078861677668, + "learning_rate": 1.1602279205229912e-05, + "loss": 0.3019893765449524, + "step": 3589 + }, + { + "epoch": 0.9533926437392113, + "grad_norm": 0.9778282797717269, + "learning_rate": 1.1597945014000537e-05, + "loss": 0.2635146677494049, + "step": 3590 + }, + { + "epoch": 0.9536582127207542, + "grad_norm": 1.0527782897227813, + "learning_rate": 1.1593610514706217e-05, + "loss": 0.2704858183860779, + "step": 3591 + }, + { + "epoch": 0.9539237817022972, + "grad_norm": 1.2295509988273574, + "learning_rate": 1.1589275708182581e-05, + "loss": 0.31997931003570557, + "step": 3592 + }, + { + "epoch": 0.9541893506838401, + "grad_norm": 1.1529907760165448, + "learning_rate": 1.1584940595265332e-05, + "loss": 0.2308788150548935, + "step": 3593 + }, + { + "epoch": 0.9544549196653831, + "grad_norm": 1.0980235303762964, + "learning_rate": 1.1580605176790229e-05, + "loss": 0.28886470198631287, + "step": 3594 + }, + { + "epoch": 0.954720488646926, + "grad_norm": 1.313883667721807, + "learning_rate": 1.157626945359309e-05, + "loss": 0.30698686838150024, + "step": 3595 + }, + { + "epoch": 0.954986057628469, + "grad_norm": 1.1087251273709688, + "learning_rate": 1.1571933426509789e-05, + "loss": 0.27475905418395996, + "step": 3596 + }, + { + "epoch": 0.955251626610012, + "grad_norm": 1.1064883207545173, + "learning_rate": 1.1567597096376264e-05, + "loss": 0.2568071484565735, + "step": 3597 + }, + { + "epoch": 0.9555171955915549, + "grad_norm": 1.28706485993144, + "learning_rate": 1.1563260464028507e-05, + "loss": 0.2574060261249542, + "step": 3598 + }, + { + "epoch": 0.9557827645730979, + "grad_norm": 1.193494963897618, + "learning_rate": 1.1558923530302571e-05, + "loss": 0.2847997546195984, + "step": 3599 + }, + { + "epoch": 0.9560483335546408, + "grad_norm": 1.0723094070831873, + "learning_rate": 1.155458629603456e-05, + "loss": 0.2594734728336334, + "step": 3600 + }, + { + "epoch": 0.9563139025361838, + "grad_norm": 1.0020160427681732, + "learning_rate": 1.155024876206065e-05, + "loss": 0.2300589680671692, + "step": 3601 + }, + { + "epoch": 0.9565794715177267, + "grad_norm": 1.1475438454718678, + "learning_rate": 1.1545910929217059e-05, + "loss": 0.29174795746803284, + "step": 3602 + }, + { + "epoch": 0.9568450404992697, + "grad_norm": 1.0425930414114217, + "learning_rate": 1.1541572798340076e-05, + "loss": 0.2666400074958801, + "step": 3603 + }, + { + "epoch": 0.9571106094808126, + "grad_norm": 1.0067559469755134, + "learning_rate": 1.1537234370266035e-05, + "loss": 0.24651308357715607, + "step": 3604 + }, + { + "epoch": 0.9573761784623556, + "grad_norm": 1.1542471481522265, + "learning_rate": 1.1532895645831339e-05, + "loss": 0.29991376399993896, + "step": 3605 + }, + { + "epoch": 0.9576417474438985, + "grad_norm": 1.0631305192934537, + "learning_rate": 1.1528556625872443e-05, + "loss": 0.27713578939437866, + "step": 3606 + }, + { + "epoch": 0.9579073164254415, + "grad_norm": 1.0497999275546905, + "learning_rate": 1.1524217311225857e-05, + "loss": 0.26503294706344604, + "step": 3607 + }, + { + "epoch": 0.9581728854069844, + "grad_norm": 1.1479000180189152, + "learning_rate": 1.1519877702728149e-05, + "loss": 0.28627675771713257, + "step": 3608 + }, + { + "epoch": 0.9584384543885274, + "grad_norm": 1.0333891142616893, + "learning_rate": 1.1515537801215944e-05, + "loss": 0.26862916350364685, + "step": 3609 + }, + { + "epoch": 0.9587040233700703, + "grad_norm": 1.2518522451268181, + "learning_rate": 1.1511197607525926e-05, + "loss": 0.29697147011756897, + "step": 3610 + }, + { + "epoch": 0.9589695923516134, + "grad_norm": 1.0668919106736792, + "learning_rate": 1.1506857122494832e-05, + "loss": 0.2980155944824219, + "step": 3611 + }, + { + "epoch": 0.9592351613331563, + "grad_norm": 1.1016644329026075, + "learning_rate": 1.1502516346959458e-05, + "loss": 0.2847440838813782, + "step": 3612 + }, + { + "epoch": 0.9595007303146993, + "grad_norm": 1.1131533712076647, + "learning_rate": 1.149817528175665e-05, + "loss": 0.2812016010284424, + "step": 3613 + }, + { + "epoch": 0.9597662992962422, + "grad_norm": 1.0387818826049915, + "learning_rate": 1.1493833927723319e-05, + "loss": 0.26856982707977295, + "step": 3614 + }, + { + "epoch": 0.9600318682777852, + "grad_norm": 1.0595715138301371, + "learning_rate": 1.1489492285696424e-05, + "loss": 0.2651693820953369, + "step": 3615 + }, + { + "epoch": 0.9602974372593281, + "grad_norm": 1.1384265947297394, + "learning_rate": 1.1485150356512986e-05, + "loss": 0.29811644554138184, + "step": 3616 + }, + { + "epoch": 0.9605630062408711, + "grad_norm": 1.0449713925688802, + "learning_rate": 1.1480808141010071e-05, + "loss": 0.2622855007648468, + "step": 3617 + }, + { + "epoch": 0.960828575222414, + "grad_norm": 1.1964334046740135, + "learning_rate": 1.1476465640024814e-05, + "loss": 0.3067246377468109, + "step": 3618 + }, + { + "epoch": 0.961094144203957, + "grad_norm": 1.0999678942020576, + "learning_rate": 1.1472122854394394e-05, + "loss": 0.25928011536598206, + "step": 3619 + }, + { + "epoch": 0.9613597131855, + "grad_norm": 1.0356853160291564, + "learning_rate": 1.146777978495605e-05, + "loss": 0.2574170231819153, + "step": 3620 + }, + { + "epoch": 0.9616252821670429, + "grad_norm": 1.1366453776894136, + "learning_rate": 1.1463436432547073e-05, + "loss": 0.2845388650894165, + "step": 3621 + }, + { + "epoch": 0.9618908511485859, + "grad_norm": 1.1067131961561003, + "learning_rate": 1.145909279800481e-05, + "loss": 0.28735876083374023, + "step": 3622 + }, + { + "epoch": 0.9621564201301288, + "grad_norm": 1.100639151702203, + "learning_rate": 1.1454748882166666e-05, + "loss": 0.25739723443984985, + "step": 3623 + }, + { + "epoch": 0.9624219891116718, + "grad_norm": 1.0743852778260963, + "learning_rate": 1.1450404685870098e-05, + "loss": 0.25144338607788086, + "step": 3624 + }, + { + "epoch": 0.9626875580932147, + "grad_norm": 1.0451944769292063, + "learning_rate": 1.144606020995261e-05, + "loss": 0.23981891572475433, + "step": 3625 + }, + { + "epoch": 0.9629531270747577, + "grad_norm": 1.1215387475511582, + "learning_rate": 1.1441715455251764e-05, + "loss": 0.30925339460372925, + "step": 3626 + }, + { + "epoch": 0.9632186960563006, + "grad_norm": 1.1193965021491372, + "learning_rate": 1.1437370422605184e-05, + "loss": 0.2559184432029724, + "step": 3627 + }, + { + "epoch": 0.9634842650378436, + "grad_norm": 1.221260182162867, + "learning_rate": 1.1433025112850542e-05, + "loss": 0.3001229166984558, + "step": 3628 + }, + { + "epoch": 0.9637498340193865, + "grad_norm": 0.9957913669659347, + "learning_rate": 1.1428679526825557e-05, + "loss": 0.24304218590259552, + "step": 3629 + }, + { + "epoch": 0.9640154030009295, + "grad_norm": 1.0405086595778643, + "learning_rate": 1.1424333665368011e-05, + "loss": 0.25677186250686646, + "step": 3630 + }, + { + "epoch": 0.9642809719824724, + "grad_norm": 1.0362119568252992, + "learning_rate": 1.141998752931573e-05, + "loss": 0.2589085102081299, + "step": 3631 + }, + { + "epoch": 0.9645465409640154, + "grad_norm": 1.1004952842028541, + "learning_rate": 1.1415641119506601e-05, + "loss": 0.2588059604167938, + "step": 3632 + }, + { + "epoch": 0.9648121099455583, + "grad_norm": 1.1379378571012249, + "learning_rate": 1.1411294436778562e-05, + "loss": 0.26097869873046875, + "step": 3633 + }, + { + "epoch": 0.9650776789271013, + "grad_norm": 1.2218308438631786, + "learning_rate": 1.1406947481969598e-05, + "loss": 0.26022520661354065, + "step": 3634 + }, + { + "epoch": 0.9653432479086442, + "grad_norm": 1.0737420773814035, + "learning_rate": 1.140260025591775e-05, + "loss": 0.26242876052856445, + "step": 3635 + }, + { + "epoch": 0.9656088168901872, + "grad_norm": 1.1396910340144906, + "learning_rate": 1.1398252759461119e-05, + "loss": 0.30035555362701416, + "step": 3636 + }, + { + "epoch": 0.9658743858717301, + "grad_norm": 1.1365210980452296, + "learning_rate": 1.1393904993437848e-05, + "loss": 0.26388341188430786, + "step": 3637 + }, + { + "epoch": 0.9661399548532731, + "grad_norm": 1.06242333907382, + "learning_rate": 1.1389556958686132e-05, + "loss": 0.28116434812545776, + "step": 3638 + }, + { + "epoch": 0.966405523834816, + "grad_norm": 1.0513966621960738, + "learning_rate": 1.1385208656044222e-05, + "loss": 0.25372493267059326, + "step": 3639 + }, + { + "epoch": 0.9666710928163591, + "grad_norm": 1.1171784181414381, + "learning_rate": 1.1380860086350422e-05, + "loss": 0.2648317813873291, + "step": 3640 + }, + { + "epoch": 0.966936661797902, + "grad_norm": 1.0508956007113521, + "learning_rate": 1.1376511250443082e-05, + "loss": 0.26981276273727417, + "step": 3641 + }, + { + "epoch": 0.967202230779445, + "grad_norm": 1.1513465918880585, + "learning_rate": 1.1372162149160608e-05, + "loss": 0.2934207618236542, + "step": 3642 + }, + { + "epoch": 0.967467799760988, + "grad_norm": 0.9705407845284122, + "learning_rate": 1.1367812783341454e-05, + "loss": 0.24250900745391846, + "step": 3643 + }, + { + "epoch": 0.9677333687425309, + "grad_norm": 1.0409007473472116, + "learning_rate": 1.1363463153824125e-05, + "loss": 0.2565772235393524, + "step": 3644 + }, + { + "epoch": 0.9679989377240739, + "grad_norm": 1.2386980142351325, + "learning_rate": 1.1359113261447183e-05, + "loss": 0.28407829999923706, + "step": 3645 + }, + { + "epoch": 0.9682645067056168, + "grad_norm": 1.1134220293120092, + "learning_rate": 1.1354763107049234e-05, + "loss": 0.2974489629268646, + "step": 3646 + }, + { + "epoch": 0.9685300756871598, + "grad_norm": 1.1611486704366027, + "learning_rate": 1.1350412691468935e-05, + "loss": 0.27539899945259094, + "step": 3647 + }, + { + "epoch": 0.9687956446687027, + "grad_norm": 1.1777496863563888, + "learning_rate": 1.1346062015544997e-05, + "loss": 0.28256523609161377, + "step": 3648 + }, + { + "epoch": 0.9690612136502457, + "grad_norm": 1.0910813538672366, + "learning_rate": 1.1341711080116176e-05, + "loss": 0.27582883834838867, + "step": 3649 + }, + { + "epoch": 0.9693267826317886, + "grad_norm": 1.2299419127493794, + "learning_rate": 1.1337359886021285e-05, + "loss": 0.3199389576911926, + "step": 3650 + }, + { + "epoch": 0.9695923516133316, + "grad_norm": 1.078226808322517, + "learning_rate": 1.1333008434099178e-05, + "loss": 0.2922326922416687, + "step": 3651 + }, + { + "epoch": 0.9698579205948745, + "grad_norm": 1.1833154338367669, + "learning_rate": 1.1328656725188767e-05, + "loss": 0.285635381937027, + "step": 3652 + }, + { + "epoch": 0.9701234895764175, + "grad_norm": 1.1606724829825772, + "learning_rate": 1.1324304760129009e-05, + "loss": 0.3347492814064026, + "step": 3653 + }, + { + "epoch": 0.9703890585579604, + "grad_norm": 1.1079831575977723, + "learning_rate": 1.1319952539758912e-05, + "loss": 0.27379873394966125, + "step": 3654 + }, + { + "epoch": 0.9706546275395034, + "grad_norm": 1.2487680540467303, + "learning_rate": 1.1315600064917534e-05, + "loss": 0.27911311388015747, + "step": 3655 + }, + { + "epoch": 0.9709201965210463, + "grad_norm": 1.187492816658345, + "learning_rate": 1.1311247336443982e-05, + "loss": 0.25750118494033813, + "step": 3656 + }, + { + "epoch": 0.9711857655025893, + "grad_norm": 1.1010343448161526, + "learning_rate": 1.1306894355177405e-05, + "loss": 0.28723078966140747, + "step": 3657 + }, + { + "epoch": 0.9714513344841322, + "grad_norm": 1.0378840795289885, + "learning_rate": 1.1302541121957008e-05, + "loss": 0.25269389152526855, + "step": 3658 + }, + { + "epoch": 0.9717169034656752, + "grad_norm": 1.1923604766845932, + "learning_rate": 1.1298187637622046e-05, + "loss": 0.3041607439517975, + "step": 3659 + }, + { + "epoch": 0.9719824724472181, + "grad_norm": 1.0812687625707742, + "learning_rate": 1.1293833903011819e-05, + "loss": 0.2826605439186096, + "step": 3660 + }, + { + "epoch": 0.9722480414287611, + "grad_norm": 1.1010565715724137, + "learning_rate": 1.1289479918965675e-05, + "loss": 0.2830520570278168, + "step": 3661 + }, + { + "epoch": 0.972513610410304, + "grad_norm": 1.0160541896764337, + "learning_rate": 1.1285125686323011e-05, + "loss": 0.24295952916145325, + "step": 3662 + }, + { + "epoch": 0.972779179391847, + "grad_norm": 1.108181435484162, + "learning_rate": 1.1280771205923269e-05, + "loss": 0.28775808215141296, + "step": 3663 + }, + { + "epoch": 0.97304474837339, + "grad_norm": 0.9715417125511246, + "learning_rate": 1.127641647860595e-05, + "loss": 0.24650296568870544, + "step": 3664 + }, + { + "epoch": 0.9733103173549329, + "grad_norm": 0.9305293200248026, + "learning_rate": 1.1272061505210584e-05, + "loss": 0.22344040870666504, + "step": 3665 + }, + { + "epoch": 0.9735758863364758, + "grad_norm": 1.0859092127038839, + "learning_rate": 1.1267706286576759e-05, + "loss": 0.26920852065086365, + "step": 3666 + }, + { + "epoch": 0.9738414553180188, + "grad_norm": 1.1792674236289236, + "learning_rate": 1.1263350823544115e-05, + "loss": 0.27615875005722046, + "step": 3667 + }, + { + "epoch": 0.9741070242995619, + "grad_norm": 1.0470064037587914, + "learning_rate": 1.1258995116952334e-05, + "loss": 0.2768712043762207, + "step": 3668 + }, + { + "epoch": 0.9743725932811048, + "grad_norm": 1.0568329464095596, + "learning_rate": 1.1254639167641141e-05, + "loss": 0.27764153480529785, + "step": 3669 + }, + { + "epoch": 0.9746381622626478, + "grad_norm": 1.139437307258024, + "learning_rate": 1.1250282976450316e-05, + "loss": 0.27423611283302307, + "step": 3670 + }, + { + "epoch": 0.9749037312441907, + "grad_norm": 1.1238013222894891, + "learning_rate": 1.1245926544219676e-05, + "loss": 0.2626228332519531, + "step": 3671 + }, + { + "epoch": 0.9751693002257337, + "grad_norm": 1.2807555997920204, + "learning_rate": 1.1241569871789096e-05, + "loss": 0.25524014234542847, + "step": 3672 + }, + { + "epoch": 0.9754348692072766, + "grad_norm": 1.1042234540757712, + "learning_rate": 1.1237212959998485e-05, + "loss": 0.30857735872268677, + "step": 3673 + }, + { + "epoch": 0.9757004381888196, + "grad_norm": 1.0235359310129009, + "learning_rate": 1.1232855809687807e-05, + "loss": 0.25099021196365356, + "step": 3674 + }, + { + "epoch": 0.9759660071703625, + "grad_norm": 1.0116202981123898, + "learning_rate": 1.1228498421697068e-05, + "loss": 0.22664576768875122, + "step": 3675 + }, + { + "epoch": 0.9762315761519055, + "grad_norm": 1.151038777130998, + "learning_rate": 1.1224140796866322e-05, + "loss": 0.24727366864681244, + "step": 3676 + }, + { + "epoch": 0.9764971451334484, + "grad_norm": 1.160849411640656, + "learning_rate": 1.121978293603567e-05, + "loss": 0.2561935782432556, + "step": 3677 + }, + { + "epoch": 0.9767627141149914, + "grad_norm": 1.10648815955184, + "learning_rate": 1.1215424840045254e-05, + "loss": 0.2594214677810669, + "step": 3678 + }, + { + "epoch": 0.9770282830965343, + "grad_norm": 1.130419852826836, + "learning_rate": 1.1211066509735265e-05, + "loss": 0.2383778691291809, + "step": 3679 + }, + { + "epoch": 0.9772938520780773, + "grad_norm": 1.2393377504128167, + "learning_rate": 1.1206707945945934e-05, + "loss": 0.2864387035369873, + "step": 3680 + }, + { + "epoch": 0.9775594210596202, + "grad_norm": 1.2012269867709167, + "learning_rate": 1.1202349149517541e-05, + "loss": 0.30415672063827515, + "step": 3681 + }, + { + "epoch": 0.9778249900411632, + "grad_norm": 1.1590063847406842, + "learning_rate": 1.1197990121290415e-05, + "loss": 0.3030807375907898, + "step": 3682 + }, + { + "epoch": 0.9780905590227061, + "grad_norm": 1.1251124481371277, + "learning_rate": 1.1193630862104922e-05, + "loss": 0.2518938481807709, + "step": 3683 + }, + { + "epoch": 0.9783561280042491, + "grad_norm": 1.2096921428918863, + "learning_rate": 1.1189271372801474e-05, + "loss": 0.25353187322616577, + "step": 3684 + }, + { + "epoch": 0.978621696985792, + "grad_norm": 1.401372369430627, + "learning_rate": 1.1184911654220534e-05, + "loss": 0.30639684200286865, + "step": 3685 + }, + { + "epoch": 0.978887265967335, + "grad_norm": 1.1636733460077495, + "learning_rate": 1.1180551707202602e-05, + "loss": 0.295099139213562, + "step": 3686 + }, + { + "epoch": 0.979152834948878, + "grad_norm": 1.0596592048702305, + "learning_rate": 1.1176191532588224e-05, + "loss": 0.2428167164325714, + "step": 3687 + }, + { + "epoch": 0.9794184039304209, + "grad_norm": 1.0401088292404943, + "learning_rate": 1.1171831131217989e-05, + "loss": 0.2716362774372101, + "step": 3688 + }, + { + "epoch": 0.9796839729119639, + "grad_norm": 1.1130709970940986, + "learning_rate": 1.1167470503932534e-05, + "loss": 0.28350287675857544, + "step": 3689 + }, + { + "epoch": 0.9799495418935068, + "grad_norm": 1.0214004744947676, + "learning_rate": 1.1163109651572535e-05, + "loss": 0.2776945233345032, + "step": 3690 + }, + { + "epoch": 0.9802151108750498, + "grad_norm": 1.041237294346951, + "learning_rate": 1.115874857497871e-05, + "loss": 0.2712942063808441, + "step": 3691 + }, + { + "epoch": 0.9804806798565927, + "grad_norm": 1.058232702389033, + "learning_rate": 1.1154387274991829e-05, + "loss": 0.2530008852481842, + "step": 3692 + }, + { + "epoch": 0.9807462488381357, + "grad_norm": 1.0327043619893976, + "learning_rate": 1.1150025752452693e-05, + "loss": 0.24889500439167023, + "step": 3693 + }, + { + "epoch": 0.9810118178196786, + "grad_norm": 1.1013842404358833, + "learning_rate": 1.1145664008202158e-05, + "loss": 0.3051255941390991, + "step": 3694 + }, + { + "epoch": 0.9812773868012216, + "grad_norm": 1.0503003262830894, + "learning_rate": 1.1141302043081112e-05, + "loss": 0.24781765043735504, + "step": 3695 + }, + { + "epoch": 0.9815429557827646, + "grad_norm": 1.2510153019418302, + "learning_rate": 1.1136939857930497e-05, + "loss": 0.3021858036518097, + "step": 3696 + }, + { + "epoch": 0.9818085247643076, + "grad_norm": 1.1052947984569603, + "learning_rate": 1.1132577453591284e-05, + "loss": 0.3026372194290161, + "step": 3697 + }, + { + "epoch": 0.9820740937458505, + "grad_norm": 1.2367828155450835, + "learning_rate": 1.1128214830904494e-05, + "loss": 0.31511861085891724, + "step": 3698 + }, + { + "epoch": 0.9823396627273935, + "grad_norm": 1.076549494496895, + "learning_rate": 1.112385199071119e-05, + "loss": 0.27885258197784424, + "step": 3699 + }, + { + "epoch": 0.9826052317089364, + "grad_norm": 1.0546536629749794, + "learning_rate": 1.1119488933852477e-05, + "loss": 0.2724893391132355, + "step": 3700 + }, + { + "epoch": 0.9828708006904794, + "grad_norm": 1.0683428715266594, + "learning_rate": 1.1115125661169503e-05, + "loss": 0.2836218774318695, + "step": 3701 + }, + { + "epoch": 0.9831363696720223, + "grad_norm": 1.1039385208642913, + "learning_rate": 1.111076217350345e-05, + "loss": 0.24220457673072815, + "step": 3702 + }, + { + "epoch": 0.9834019386535653, + "grad_norm": 1.1586770288767172, + "learning_rate": 1.1106398471695554e-05, + "loss": 0.28599557280540466, + "step": 3703 + }, + { + "epoch": 0.9836675076351082, + "grad_norm": 1.0806945340822165, + "learning_rate": 1.110203455658708e-05, + "loss": 0.30559849739074707, + "step": 3704 + }, + { + "epoch": 0.9839330766166512, + "grad_norm": 1.0573640293446354, + "learning_rate": 1.109767042901934e-05, + "loss": 0.2763117551803589, + "step": 3705 + }, + { + "epoch": 0.9841986455981941, + "grad_norm": 0.9563131800944344, + "learning_rate": 1.109330608983369e-05, + "loss": 0.2028101086616516, + "step": 3706 + }, + { + "epoch": 0.9844642145797371, + "grad_norm": 0.9787835815750591, + "learning_rate": 1.1088941539871515e-05, + "loss": 0.25386112928390503, + "step": 3707 + }, + { + "epoch": 0.98472978356128, + "grad_norm": 1.075996733851366, + "learning_rate": 1.1084576779974257e-05, + "loss": 0.2588289976119995, + "step": 3708 + }, + { + "epoch": 0.984995352542823, + "grad_norm": 1.3003014971272602, + "learning_rate": 1.1080211810983385e-05, + "loss": 0.3201071321964264, + "step": 3709 + }, + { + "epoch": 0.985260921524366, + "grad_norm": 1.2030478206249715, + "learning_rate": 1.107584663374042e-05, + "loss": 0.28439003229141235, + "step": 3710 + }, + { + "epoch": 0.9855264905059089, + "grad_norm": 1.060347062251152, + "learning_rate": 1.1071481249086908e-05, + "loss": 0.2734091579914093, + "step": 3711 + }, + { + "epoch": 0.9857920594874519, + "grad_norm": 1.2115603819692051, + "learning_rate": 1.1067115657864451e-05, + "loss": 0.2917581796646118, + "step": 3712 + }, + { + "epoch": 0.9860576284689948, + "grad_norm": 1.2063997459644484, + "learning_rate": 1.1062749860914681e-05, + "loss": 0.3569914996623993, + "step": 3713 + }, + { + "epoch": 0.9863231974505378, + "grad_norm": 1.127711451799425, + "learning_rate": 1.1058383859079271e-05, + "loss": 0.2574514150619507, + "step": 3714 + }, + { + "epoch": 0.9865887664320807, + "grad_norm": 1.119813552337215, + "learning_rate": 1.1054017653199936e-05, + "loss": 0.3035826086997986, + "step": 3715 + }, + { + "epoch": 0.9868543354136237, + "grad_norm": 1.5863085854725767, + "learning_rate": 1.1049651244118424e-05, + "loss": 0.28067824244499207, + "step": 3716 + }, + { + "epoch": 0.9871199043951666, + "grad_norm": 1.0916600834300794, + "learning_rate": 1.1045284632676535e-05, + "loss": 0.2511579394340515, + "step": 3717 + }, + { + "epoch": 0.9873854733767096, + "grad_norm": 1.2657546371764674, + "learning_rate": 1.1040917819716097e-05, + "loss": 0.3059889078140259, + "step": 3718 + }, + { + "epoch": 0.9876510423582525, + "grad_norm": 1.1224253435238671, + "learning_rate": 1.103655080607898e-05, + "loss": 0.2642200291156769, + "step": 3719 + }, + { + "epoch": 0.9879166113397955, + "grad_norm": 1.0969568004465404, + "learning_rate": 1.1032183592607094e-05, + "loss": 0.2743483781814575, + "step": 3720 + }, + { + "epoch": 0.9881821803213384, + "grad_norm": 1.1317768374698567, + "learning_rate": 1.1027816180142383e-05, + "loss": 0.2597433030605316, + "step": 3721 + }, + { + "epoch": 0.9884477493028814, + "grad_norm": 1.0759312888673545, + "learning_rate": 1.1023448569526834e-05, + "loss": 0.24439337849617004, + "step": 3722 + }, + { + "epoch": 0.9887133182844243, + "grad_norm": 1.0386429343076329, + "learning_rate": 1.1019080761602473e-05, + "loss": 0.2520195245742798, + "step": 3723 + }, + { + "epoch": 0.9889788872659674, + "grad_norm": 1.0921837996926786, + "learning_rate": 1.1014712757211359e-05, + "loss": 0.2904737889766693, + "step": 3724 + }, + { + "epoch": 0.9892444562475103, + "grad_norm": 1.12008182824954, + "learning_rate": 1.1010344557195588e-05, + "loss": 0.28096869587898254, + "step": 3725 + }, + { + "epoch": 0.9895100252290533, + "grad_norm": 1.8392230806075218, + "learning_rate": 1.1005976162397309e-05, + "loss": 0.317839652299881, + "step": 3726 + }, + { + "epoch": 0.9897755942105962, + "grad_norm": 1.19381185696067, + "learning_rate": 1.100160757365869e-05, + "loss": 0.29213201999664307, + "step": 3727 + }, + { + "epoch": 0.9900411631921392, + "grad_norm": 1.215113877896921, + "learning_rate": 1.0997238791821943e-05, + "loss": 0.27034991979599, + "step": 3728 + }, + { + "epoch": 0.9903067321736821, + "grad_norm": 1.2893524723691567, + "learning_rate": 1.0992869817729317e-05, + "loss": 0.30504971742630005, + "step": 3729 + }, + { + "epoch": 0.9905723011552251, + "grad_norm": 1.109889585740049, + "learning_rate": 1.09885006522231e-05, + "loss": 0.30673110485076904, + "step": 3730 + }, + { + "epoch": 0.990837870136768, + "grad_norm": 1.0963153712692437, + "learning_rate": 1.0984131296145616e-05, + "loss": 0.27990686893463135, + "step": 3731 + }, + { + "epoch": 0.991103439118311, + "grad_norm": 1.0228240366531471, + "learning_rate": 1.0979761750339225e-05, + "loss": 0.24379019439220428, + "step": 3732 + }, + { + "epoch": 0.991369008099854, + "grad_norm": 1.1055702239918885, + "learning_rate": 1.0975392015646323e-05, + "loss": 0.30554595589637756, + "step": 3733 + }, + { + "epoch": 0.9916345770813969, + "grad_norm": 1.062606047652276, + "learning_rate": 1.0971022092909342e-05, + "loss": 0.245269775390625, + "step": 3734 + }, + { + "epoch": 0.9919001460629399, + "grad_norm": 1.0977829197687445, + "learning_rate": 1.0966651982970757e-05, + "loss": 0.2732948064804077, + "step": 3735 + }, + { + "epoch": 0.9921657150444828, + "grad_norm": 0.992060831416128, + "learning_rate": 1.0962281686673071e-05, + "loss": 0.25989004969596863, + "step": 3736 + }, + { + "epoch": 0.9924312840260258, + "grad_norm": 1.1415489224758493, + "learning_rate": 1.0957911204858824e-05, + "loss": 0.32891198992729187, + "step": 3737 + }, + { + "epoch": 0.9926968530075687, + "grad_norm": 1.094277657297916, + "learning_rate": 1.0953540538370591e-05, + "loss": 0.29184675216674805, + "step": 3738 + }, + { + "epoch": 0.9929624219891117, + "grad_norm": 1.1381026162174743, + "learning_rate": 1.094916968805099e-05, + "loss": 0.2784018814563751, + "step": 3739 + }, + { + "epoch": 0.9932279909706546, + "grad_norm": 1.1670677505581852, + "learning_rate": 1.094479865474267e-05, + "loss": 0.26586195826530457, + "step": 3740 + }, + { + "epoch": 0.9934935599521976, + "grad_norm": 0.9575913416137994, + "learning_rate": 1.094042743928831e-05, + "loss": 0.24593298137187958, + "step": 3741 + }, + { + "epoch": 0.9937591289337405, + "grad_norm": 1.065966707682552, + "learning_rate": 1.0936056042530632e-05, + "loss": 0.2462792694568634, + "step": 3742 + }, + { + "epoch": 0.9940246979152835, + "grad_norm": 1.2074020558104472, + "learning_rate": 1.0931684465312388e-05, + "loss": 0.2688900828361511, + "step": 3743 + }, + { + "epoch": 0.9942902668968264, + "grad_norm": 1.099682442025033, + "learning_rate": 1.0927312708476367e-05, + "loss": 0.2842782735824585, + "step": 3744 + }, + { + "epoch": 0.9945558358783694, + "grad_norm": 1.0548829148077135, + "learning_rate": 1.0922940772865393e-05, + "loss": 0.249299556016922, + "step": 3745 + }, + { + "epoch": 0.9948214048599123, + "grad_norm": 1.175705262338143, + "learning_rate": 1.0918568659322325e-05, + "loss": 0.2765413522720337, + "step": 3746 + }, + { + "epoch": 0.9950869738414553, + "grad_norm": 1.1414819691892306, + "learning_rate": 1.0914196368690049e-05, + "loss": 0.29750365018844604, + "step": 3747 + }, + { + "epoch": 0.9953525428229982, + "grad_norm": 1.153321336461836, + "learning_rate": 1.0909823901811496e-05, + "loss": 0.25272879004478455, + "step": 3748 + }, + { + "epoch": 0.9956181118045412, + "grad_norm": 1.1906489486154657, + "learning_rate": 1.0905451259529626e-05, + "loss": 0.3056861460208893, + "step": 3749 + }, + { + "epoch": 0.9958836807860841, + "grad_norm": 1.1596775625362263, + "learning_rate": 1.090107844268743e-05, + "loss": 0.26723814010620117, + "step": 3750 + }, + { + "epoch": 0.9961492497676271, + "grad_norm": 1.167023454532776, + "learning_rate": 1.0896705452127943e-05, + "loss": 0.29998716711997986, + "step": 3751 + }, + { + "epoch": 0.9964148187491702, + "grad_norm": 1.1519689723038142, + "learning_rate": 1.0892332288694216e-05, + "loss": 0.2690891623497009, + "step": 3752 + }, + { + "epoch": 0.9966803877307131, + "grad_norm": 1.1385088428140973, + "learning_rate": 1.0887958953229349e-05, + "loss": 0.25555333495140076, + "step": 3753 + }, + { + "epoch": 0.996945956712256, + "grad_norm": 1.1617836993376212, + "learning_rate": 1.088358544657647e-05, + "loss": 0.27788421511650085, + "step": 3754 + }, + { + "epoch": 0.997211525693799, + "grad_norm": 1.0981105518173184, + "learning_rate": 1.0879211769578734e-05, + "loss": 0.2566586136817932, + "step": 3755 + }, + { + "epoch": 0.997477094675342, + "grad_norm": 1.1742409056404244, + "learning_rate": 1.0874837923079339e-05, + "loss": 0.3028980493545532, + "step": 3756 + }, + { + "epoch": 0.9977426636568849, + "grad_norm": 1.151070664269376, + "learning_rate": 1.0870463907921512e-05, + "loss": 0.30244824290275574, + "step": 3757 + }, + { + "epoch": 0.9980082326384279, + "grad_norm": 1.0175517300218122, + "learning_rate": 1.086608972494851e-05, + "loss": 0.2610962390899658, + "step": 3758 + }, + { + "epoch": 0.9982738016199708, + "grad_norm": 1.1587347636182326, + "learning_rate": 1.0861715375003623e-05, + "loss": 0.2733536660671234, + "step": 3759 + }, + { + "epoch": 0.9985393706015138, + "grad_norm": 1.094010099730521, + "learning_rate": 1.0857340858930175e-05, + "loss": 0.2915020287036896, + "step": 3760 + }, + { + "epoch": 0.9988049395830567, + "grad_norm": 1.1164899423303463, + "learning_rate": 1.085296617757152e-05, + "loss": 0.2940186560153961, + "step": 3761 + }, + { + "epoch": 0.9990705085645997, + "grad_norm": 1.1441195343158572, + "learning_rate": 1.0848591331771045e-05, + "loss": 0.3002738952636719, + "step": 3762 + }, + { + "epoch": 0.9993360775461426, + "grad_norm": 1.0530840422742196, + "learning_rate": 1.0844216322372172e-05, + "loss": 0.284588485956192, + "step": 3763 + }, + { + "epoch": 0.9996016465276856, + "grad_norm": 1.0971261053209735, + "learning_rate": 1.0839841150218347e-05, + "loss": 0.29395923018455505, + "step": 3764 + }, + { + "epoch": 0.9998672155092285, + "grad_norm": 1.1355876604442514, + "learning_rate": 1.083546581615305e-05, + "loss": 0.2574613094329834, + "step": 3765 + }, + { + "epoch": 1.0, + "grad_norm": 1.535375625820537, + "learning_rate": 1.0831090321019801e-05, + "loss": 0.177712082862854, + "step": 3766 + }, + { + "epoch": 1.000265568981543, + "grad_norm": 1.1101315935040728, + "learning_rate": 1.0826714665662139e-05, + "loss": 0.29758381843566895, + "step": 3767 + }, + { + "epoch": 1.000531137963086, + "grad_norm": 1.055973006911073, + "learning_rate": 1.0822338850923644e-05, + "loss": 0.23377545177936554, + "step": 3768 + }, + { + "epoch": 1.0007967069446289, + "grad_norm": 1.1573191222761028, + "learning_rate": 1.0817962877647911e-05, + "loss": 0.2505020797252655, + "step": 3769 + }, + { + "epoch": 1.0010622759261718, + "grad_norm": 1.0395021899779042, + "learning_rate": 1.0813586746678584e-05, + "loss": 0.26122647523880005, + "step": 3770 + }, + { + "epoch": 1.0013278449077148, + "grad_norm": 1.1508778318464672, + "learning_rate": 1.0809210458859327e-05, + "loss": 0.27962177991867065, + "step": 3771 + }, + { + "epoch": 1.0015934138892577, + "grad_norm": 1.0479777844917506, + "learning_rate": 1.080483401503384e-05, + "loss": 0.21921640634536743, + "step": 3772 + }, + { + "epoch": 1.0018589828708007, + "grad_norm": 1.1277812491041006, + "learning_rate": 1.0800457416045845e-05, + "loss": 0.24623796343803406, + "step": 3773 + }, + { + "epoch": 1.0021245518523436, + "grad_norm": 1.259401152466985, + "learning_rate": 1.0796080662739098e-05, + "loss": 0.3130728006362915, + "step": 3774 + }, + { + "epoch": 1.0023901208338866, + "grad_norm": 1.1209083810179328, + "learning_rate": 1.0791703755957392e-05, + "loss": 0.2548064589500427, + "step": 3775 + }, + { + "epoch": 1.0026556898154295, + "grad_norm": 1.1167206534835417, + "learning_rate": 1.078732669654454e-05, + "loss": 0.20517288148403168, + "step": 3776 + }, + { + "epoch": 1.0029212587969725, + "grad_norm": 1.1055374385175383, + "learning_rate": 1.0782949485344385e-05, + "loss": 0.2634897530078888, + "step": 3777 + }, + { + "epoch": 1.0031868277785154, + "grad_norm": 1.3696848286677328, + "learning_rate": 1.0778572123200804e-05, + "loss": 0.2743223309516907, + "step": 3778 + }, + { + "epoch": 1.0034523967600584, + "grad_norm": 0.9930991365195264, + "learning_rate": 1.0774194610957695e-05, + "loss": 0.24595436453819275, + "step": 3779 + }, + { + "epoch": 1.0037179657416013, + "grad_norm": 1.0885778480679946, + "learning_rate": 1.0769816949459002e-05, + "loss": 0.2508128881454468, + "step": 3780 + }, + { + "epoch": 1.0039835347231443, + "grad_norm": 1.1243431648812525, + "learning_rate": 1.0765439139548677e-05, + "loss": 0.2326367199420929, + "step": 3781 + }, + { + "epoch": 1.0042491037046872, + "grad_norm": 1.1514050771182385, + "learning_rate": 1.0761061182070716e-05, + "loss": 0.2888404130935669, + "step": 3782 + }, + { + "epoch": 1.0045146726862302, + "grad_norm": 1.1399638718055765, + "learning_rate": 1.0756683077869133e-05, + "loss": 0.2804296612739563, + "step": 3783 + }, + { + "epoch": 1.0047802416677731, + "grad_norm": 1.1286027319524963, + "learning_rate": 1.0752304827787979e-05, + "loss": 0.2644953429698944, + "step": 3784 + }, + { + "epoch": 1.005045810649316, + "grad_norm": 1.2396532451569051, + "learning_rate": 1.0747926432671323e-05, + "loss": 0.297788143157959, + "step": 3785 + }, + { + "epoch": 1.005311379630859, + "grad_norm": 1.065071455363874, + "learning_rate": 1.0743547893363276e-05, + "loss": 0.2644156515598297, + "step": 3786 + }, + { + "epoch": 1.005576948612402, + "grad_norm": 1.1640867578019738, + "learning_rate": 1.073916921070796e-05, + "loss": 0.23818905651569366, + "step": 3787 + }, + { + "epoch": 1.005842517593945, + "grad_norm": 1.11872081222192, + "learning_rate": 1.0734790385549538e-05, + "loss": 0.2544933259487152, + "step": 3788 + }, + { + "epoch": 1.006108086575488, + "grad_norm": 1.0836442452511366, + "learning_rate": 1.0730411418732198e-05, + "loss": 0.2569275498390198, + "step": 3789 + }, + { + "epoch": 1.0063736555570308, + "grad_norm": 1.0348585374954582, + "learning_rate": 1.0726032311100153e-05, + "loss": 0.2248159945011139, + "step": 3790 + }, + { + "epoch": 1.0066392245385738, + "grad_norm": 1.1242207493876892, + "learning_rate": 1.072165306349764e-05, + "loss": 0.25541940331459045, + "step": 3791 + }, + { + "epoch": 1.0069047935201167, + "grad_norm": 9.328291099250833, + "learning_rate": 1.0717273676768924e-05, + "loss": 0.24429568648338318, + "step": 3792 + }, + { + "epoch": 1.0071703625016597, + "grad_norm": 1.0574884647737486, + "learning_rate": 1.0712894151758306e-05, + "loss": 0.2586621344089508, + "step": 3793 + }, + { + "epoch": 1.0074359314832027, + "grad_norm": 1.165205157800888, + "learning_rate": 1.0708514489310103e-05, + "loss": 0.28685104846954346, + "step": 3794 + }, + { + "epoch": 1.0077015004647458, + "grad_norm": 1.1536672746294196, + "learning_rate": 1.0704134690268661e-05, + "loss": 0.2847924530506134, + "step": 3795 + }, + { + "epoch": 1.0079670694462888, + "grad_norm": 1.1168453704329862, + "learning_rate": 1.0699754755478358e-05, + "loss": 0.24646440148353577, + "step": 3796 + }, + { + "epoch": 1.0082326384278317, + "grad_norm": 1.217438590106057, + "learning_rate": 1.0695374685783586e-05, + "loss": 0.22286385297775269, + "step": 3797 + }, + { + "epoch": 1.0084982074093747, + "grad_norm": 1.1352166249232278, + "learning_rate": 1.069099448202878e-05, + "loss": 0.2524179518222809, + "step": 3798 + }, + { + "epoch": 1.0087637763909176, + "grad_norm": 1.109981913009372, + "learning_rate": 1.0686614145058387e-05, + "loss": 0.2625758647918701, + "step": 3799 + }, + { + "epoch": 1.0090293453724606, + "grad_norm": 1.0622342238121125, + "learning_rate": 1.0682233675716884e-05, + "loss": 0.25318068265914917, + "step": 3800 + }, + { + "epoch": 1.0092949143540035, + "grad_norm": 1.073699024276181, + "learning_rate": 1.0677853074848774e-05, + "loss": 0.24224570393562317, + "step": 3801 + }, + { + "epoch": 1.0095604833355465, + "grad_norm": 1.1995813349182267, + "learning_rate": 1.0673472343298588e-05, + "loss": 0.28595417737960815, + "step": 3802 + }, + { + "epoch": 1.0098260523170894, + "grad_norm": 1.1558738404506108, + "learning_rate": 1.0669091481910874e-05, + "loss": 0.26894015073776245, + "step": 3803 + }, + { + "epoch": 1.0100916212986324, + "grad_norm": 1.0901744125075639, + "learning_rate": 1.0664710491530214e-05, + "loss": 0.2605208158493042, + "step": 3804 + }, + { + "epoch": 1.0103571902801753, + "grad_norm": 1.082458382717597, + "learning_rate": 1.0660329373001212e-05, + "loss": 0.2595113515853882, + "step": 3805 + }, + { + "epoch": 1.0106227592617183, + "grad_norm": 1.2467081294979763, + "learning_rate": 1.0655948127168494e-05, + "loss": 0.27478674054145813, + "step": 3806 + }, + { + "epoch": 1.0108883282432612, + "grad_norm": 1.0742167098010935, + "learning_rate": 1.0651566754876715e-05, + "loss": 0.2587064504623413, + "step": 3807 + }, + { + "epoch": 1.0111538972248042, + "grad_norm": 1.0593019665426413, + "learning_rate": 1.064718525697055e-05, + "loss": 0.2420537769794464, + "step": 3808 + }, + { + "epoch": 1.0114194662063472, + "grad_norm": 1.1660072059036033, + "learning_rate": 1.0642803634294699e-05, + "loss": 0.29424652457237244, + "step": 3809 + }, + { + "epoch": 1.01168503518789, + "grad_norm": 1.0902934718743655, + "learning_rate": 1.0638421887693887e-05, + "loss": 0.25162142515182495, + "step": 3810 + }, + { + "epoch": 1.011950604169433, + "grad_norm": 1.1456242703963635, + "learning_rate": 1.0634040018012865e-05, + "loss": 0.25661247968673706, + "step": 3811 + }, + { + "epoch": 1.012216173150976, + "grad_norm": 1.0060634238068926, + "learning_rate": 1.0629658026096408e-05, + "loss": 0.2042091339826584, + "step": 3812 + }, + { + "epoch": 1.012481742132519, + "grad_norm": 1.0129340658577524, + "learning_rate": 1.0625275912789307e-05, + "loss": 0.22496266663074493, + "step": 3813 + }, + { + "epoch": 1.012747311114062, + "grad_norm": 1.1382961966722176, + "learning_rate": 1.0620893678936385e-05, + "loss": 0.23609521985054016, + "step": 3814 + }, + { + "epoch": 1.0130128800956049, + "grad_norm": 1.2645443214744188, + "learning_rate": 1.0616511325382486e-05, + "loss": 0.2561722993850708, + "step": 3815 + }, + { + "epoch": 1.0132784490771478, + "grad_norm": 1.1379816472778304, + "learning_rate": 1.0612128852972474e-05, + "loss": 0.2617529630661011, + "step": 3816 + }, + { + "epoch": 1.0135440180586908, + "grad_norm": 1.1862833237483508, + "learning_rate": 1.060774626255124e-05, + "loss": 0.2633543014526367, + "step": 3817 + }, + { + "epoch": 1.0138095870402337, + "grad_norm": 1.0263666085354948, + "learning_rate": 1.0603363554963693e-05, + "loss": 0.19401729106903076, + "step": 3818 + }, + { + "epoch": 1.0140751560217767, + "grad_norm": 1.0891094169836097, + "learning_rate": 1.0598980731054765e-05, + "loss": 0.2583369016647339, + "step": 3819 + }, + { + "epoch": 1.0143407250033196, + "grad_norm": 1.1826598806695992, + "learning_rate": 1.0594597791669419e-05, + "loss": 0.26138922572135925, + "step": 3820 + }, + { + "epoch": 1.0146062939848626, + "grad_norm": 1.1580137447688548, + "learning_rate": 1.0590214737652632e-05, + "loss": 0.2506800591945648, + "step": 3821 + }, + { + "epoch": 1.0148718629664055, + "grad_norm": 1.032579662550809, + "learning_rate": 1.0585831569849405e-05, + "loss": 0.21569974720478058, + "step": 3822 + }, + { + "epoch": 1.0151374319479485, + "grad_norm": 1.37079648056154, + "learning_rate": 1.0581448289104759e-05, + "loss": 0.2765602767467499, + "step": 3823 + }, + { + "epoch": 1.0154030009294914, + "grad_norm": 1.2046968903946047, + "learning_rate": 1.0577064896263743e-05, + "loss": 0.25180384516716003, + "step": 3824 + }, + { + "epoch": 1.0156685699110344, + "grad_norm": 1.0796182560924539, + "learning_rate": 1.0572681392171417e-05, + "loss": 0.24164071679115295, + "step": 3825 + }, + { + "epoch": 1.0159341388925773, + "grad_norm": 1.1523354919316235, + "learning_rate": 1.0568297777672875e-05, + "loss": 0.24206972122192383, + "step": 3826 + }, + { + "epoch": 1.0161997078741203, + "grad_norm": 1.115771237946875, + "learning_rate": 1.0563914053613227e-05, + "loss": 0.24563468992710114, + "step": 3827 + }, + { + "epoch": 1.0164652768556632, + "grad_norm": 1.121826691352643, + "learning_rate": 1.0559530220837593e-05, + "loss": 0.23226243257522583, + "step": 3828 + }, + { + "epoch": 1.0167308458372062, + "grad_norm": 1.4499652400392462, + "learning_rate": 1.0555146280191137e-05, + "loss": 0.2245083749294281, + "step": 3829 + }, + { + "epoch": 1.0169964148187491, + "grad_norm": 1.1230707875328865, + "learning_rate": 1.0550762232519023e-05, + "loss": 0.24455049633979797, + "step": 3830 + }, + { + "epoch": 1.017261983800292, + "grad_norm": 1.1434011419253403, + "learning_rate": 1.0546378078666448e-05, + "loss": 0.2540651857852936, + "step": 3831 + }, + { + "epoch": 1.017527552781835, + "grad_norm": 1.222189193306495, + "learning_rate": 1.0541993819478622e-05, + "loss": 0.23392565548419952, + "step": 3832 + }, + { + "epoch": 1.017793121763378, + "grad_norm": 1.239236731837986, + "learning_rate": 1.053760945580078e-05, + "loss": 0.21601927280426025, + "step": 3833 + }, + { + "epoch": 1.018058690744921, + "grad_norm": 1.1697918037357793, + "learning_rate": 1.0533224988478176e-05, + "loss": 0.24622616171836853, + "step": 3834 + }, + { + "epoch": 1.018324259726464, + "grad_norm": 1.186224891573799, + "learning_rate": 1.0528840418356086e-05, + "loss": 0.2774650752544403, + "step": 3835 + }, + { + "epoch": 1.0185898287080069, + "grad_norm": 1.1218094293898884, + "learning_rate": 1.0524455746279795e-05, + "loss": 0.22323890030384064, + "step": 3836 + }, + { + "epoch": 1.0188553976895498, + "grad_norm": 1.0569207532138136, + "learning_rate": 1.0520070973094622e-05, + "loss": 0.21901552379131317, + "step": 3837 + }, + { + "epoch": 1.0191209666710928, + "grad_norm": 1.1936231752235407, + "learning_rate": 1.0515686099645901e-05, + "loss": 0.3037784695625305, + "step": 3838 + }, + { + "epoch": 1.0193865356526357, + "grad_norm": 1.0847362828180318, + "learning_rate": 1.0511301126778984e-05, + "loss": 0.22658365964889526, + "step": 3839 + }, + { + "epoch": 1.0196521046341787, + "grad_norm": 1.09040618490447, + "learning_rate": 1.0506916055339237e-05, + "loss": 0.23144160211086273, + "step": 3840 + }, + { + "epoch": 1.0199176736157216, + "grad_norm": 1.28339134317777, + "learning_rate": 1.0502530886172055e-05, + "loss": 0.25658899545669556, + "step": 3841 + }, + { + "epoch": 1.0201832425972646, + "grad_norm": 0.9689646092731519, + "learning_rate": 1.0498145620122845e-05, + "loss": 0.19658756256103516, + "step": 3842 + }, + { + "epoch": 1.0204488115788075, + "grad_norm": 1.0949311372526576, + "learning_rate": 1.049376025803703e-05, + "loss": 0.19045208394527435, + "step": 3843 + }, + { + "epoch": 1.0207143805603505, + "grad_norm": 1.1626763108379607, + "learning_rate": 1.0489374800760066e-05, + "loss": 0.2577810287475586, + "step": 3844 + }, + { + "epoch": 1.0209799495418934, + "grad_norm": 1.1521055149329589, + "learning_rate": 1.048498924913741e-05, + "loss": 0.2807403802871704, + "step": 3845 + }, + { + "epoch": 1.0212455185234364, + "grad_norm": 1.2275557893789377, + "learning_rate": 1.0480603604014545e-05, + "loss": 0.2710269093513489, + "step": 3846 + }, + { + "epoch": 1.0215110875049793, + "grad_norm": 1.173604136076929, + "learning_rate": 1.0476217866236974e-05, + "loss": 0.2560620903968811, + "step": 3847 + }, + { + "epoch": 1.0217766564865223, + "grad_norm": 1.1571778426612858, + "learning_rate": 1.0471832036650217e-05, + "loss": 0.2599894404411316, + "step": 3848 + }, + { + "epoch": 1.0220422254680652, + "grad_norm": 1.1339420848197217, + "learning_rate": 1.046744611609981e-05, + "loss": 0.2411944717168808, + "step": 3849 + }, + { + "epoch": 1.0223077944496084, + "grad_norm": 1.1528658942490468, + "learning_rate": 1.0463060105431303e-05, + "loss": 0.25216251611709595, + "step": 3850 + }, + { + "epoch": 1.0225733634311513, + "grad_norm": 1.1884423925105638, + "learning_rate": 1.0458674005490263e-05, + "loss": 0.255629301071167, + "step": 3851 + }, + { + "epoch": 1.0228389324126943, + "grad_norm": 1.0777718220336832, + "learning_rate": 1.0454287817122291e-05, + "loss": 0.24032849073410034, + "step": 3852 + }, + { + "epoch": 1.0231045013942373, + "grad_norm": 1.1154013609024198, + "learning_rate": 1.0449901541172983e-05, + "loss": 0.23188306391239166, + "step": 3853 + }, + { + "epoch": 1.0233700703757802, + "grad_norm": 1.149374478972437, + "learning_rate": 1.0445515178487965e-05, + "loss": 0.2718146741390228, + "step": 3854 + }, + { + "epoch": 1.0236356393573232, + "grad_norm": 1.460691184866812, + "learning_rate": 1.0441128729912876e-05, + "loss": 0.30279839038848877, + "step": 3855 + }, + { + "epoch": 1.023901208338866, + "grad_norm": 1.0711762201816422, + "learning_rate": 1.0436742196293368e-05, + "loss": 0.2185024917125702, + "step": 3856 + }, + { + "epoch": 1.024166777320409, + "grad_norm": 1.2737960148140446, + "learning_rate": 1.0432355578475118e-05, + "loss": 0.2956481873989105, + "step": 3857 + }, + { + "epoch": 1.024432346301952, + "grad_norm": 1.1913794327080105, + "learning_rate": 1.0427968877303809e-05, + "loss": 0.28460678458213806, + "step": 3858 + }, + { + "epoch": 1.024697915283495, + "grad_norm": 1.1716718579119476, + "learning_rate": 1.0423582093625146e-05, + "loss": 0.24597057700157166, + "step": 3859 + }, + { + "epoch": 1.024963484265038, + "grad_norm": 0.987642591779768, + "learning_rate": 1.0419195228284856e-05, + "loss": 0.23986583948135376, + "step": 3860 + }, + { + "epoch": 1.0252290532465809, + "grad_norm": 1.0867576400643644, + "learning_rate": 1.0414808282128668e-05, + "loss": 0.2489446997642517, + "step": 3861 + }, + { + "epoch": 1.0254946222281238, + "grad_norm": 1.1200031637603385, + "learning_rate": 1.0410421256002334e-05, + "loss": 0.26777884364128113, + "step": 3862 + }, + { + "epoch": 1.0257601912096668, + "grad_norm": 1.1645962699086565, + "learning_rate": 1.0406034150751625e-05, + "loss": 0.23506489396095276, + "step": 3863 + }, + { + "epoch": 1.0260257601912097, + "grad_norm": 1.1861093965134106, + "learning_rate": 1.040164696722232e-05, + "loss": 0.2526484429836273, + "step": 3864 + }, + { + "epoch": 1.0262913291727527, + "grad_norm": 1.1320109702434422, + "learning_rate": 1.0397259706260216e-05, + "loss": 0.2179267853498459, + "step": 3865 + }, + { + "epoch": 1.0265568981542956, + "grad_norm": 1.0267487594121727, + "learning_rate": 1.0392872368711126e-05, + "loss": 0.2431088387966156, + "step": 3866 + }, + { + "epoch": 1.0268224671358386, + "grad_norm": 1.1394336459602463, + "learning_rate": 1.0388484955420877e-05, + "loss": 0.26101407408714294, + "step": 3867 + }, + { + "epoch": 1.0270880361173815, + "grad_norm": 1.0741553283028158, + "learning_rate": 1.0384097467235308e-05, + "loss": 0.23780573904514313, + "step": 3868 + }, + { + "epoch": 1.0273536050989245, + "grad_norm": 1.467981467949694, + "learning_rate": 1.0379709905000278e-05, + "loss": 0.2469894289970398, + "step": 3869 + }, + { + "epoch": 1.0276191740804674, + "grad_norm": 1.074989572738127, + "learning_rate": 1.0375322269561658e-05, + "loss": 0.21271926164627075, + "step": 3870 + }, + { + "epoch": 1.0278847430620104, + "grad_norm": 1.1192343716648714, + "learning_rate": 1.0370934561765331e-05, + "loss": 0.22995726764202118, + "step": 3871 + }, + { + "epoch": 1.0281503120435533, + "grad_norm": 1.2051770162428763, + "learning_rate": 1.0366546782457196e-05, + "loss": 0.27448171377182007, + "step": 3872 + }, + { + "epoch": 1.0284158810250963, + "grad_norm": 1.232887313588547, + "learning_rate": 1.0362158932483165e-05, + "loss": 0.25459539890289307, + "step": 3873 + }, + { + "epoch": 1.0286814500066392, + "grad_norm": 1.1436601222318827, + "learning_rate": 1.0357771012689162e-05, + "loss": 0.23213380575180054, + "step": 3874 + }, + { + "epoch": 1.0289470189881822, + "grad_norm": 1.107979602389345, + "learning_rate": 1.0353383023921127e-05, + "loss": 0.2219776064157486, + "step": 3875 + }, + { + "epoch": 1.0292125879697251, + "grad_norm": 1.2445278934711803, + "learning_rate": 1.0348994967025012e-05, + "loss": 0.27059125900268555, + "step": 3876 + }, + { + "epoch": 1.029478156951268, + "grad_norm": 1.2314072238589235, + "learning_rate": 1.034460684284678e-05, + "loss": 0.26921501755714417, + "step": 3877 + }, + { + "epoch": 1.029743725932811, + "grad_norm": 1.153389282583655, + "learning_rate": 1.0340218652232419e-05, + "loss": 0.24727991223335266, + "step": 3878 + }, + { + "epoch": 1.030009294914354, + "grad_norm": 1.2105369925319034, + "learning_rate": 1.0335830396027912e-05, + "loss": 0.26276054978370667, + "step": 3879 + }, + { + "epoch": 1.030274863895897, + "grad_norm": 1.1222835146983237, + "learning_rate": 1.0331442075079268e-05, + "loss": 0.25906458497047424, + "step": 3880 + }, + { + "epoch": 1.03054043287744, + "grad_norm": 1.1936099182612667, + "learning_rate": 1.0327053690232498e-05, + "loss": 0.2708794176578522, + "step": 3881 + }, + { + "epoch": 1.0308060018589829, + "grad_norm": 1.1283814494585969, + "learning_rate": 1.0322665242333634e-05, + "loss": 0.24968653917312622, + "step": 3882 + }, + { + "epoch": 1.0310715708405258, + "grad_norm": 1.1912763351930955, + "learning_rate": 1.0318276732228716e-05, + "loss": 0.2669135332107544, + "step": 3883 + }, + { + "epoch": 1.0313371398220688, + "grad_norm": 1.0733368423352447, + "learning_rate": 1.0313888160763799e-05, + "loss": 0.24173730611801147, + "step": 3884 + }, + { + "epoch": 1.0316027088036117, + "grad_norm": 1.4084549111395024, + "learning_rate": 1.0309499528784948e-05, + "loss": 0.27513059973716736, + "step": 3885 + }, + { + "epoch": 1.0318682777851547, + "grad_norm": 1.163470416419209, + "learning_rate": 1.0305110837138235e-05, + "loss": 0.2512688934803009, + "step": 3886 + }, + { + "epoch": 1.0321338467666976, + "grad_norm": 1.100016135139411, + "learning_rate": 1.0300722086669753e-05, + "loss": 0.2584962844848633, + "step": 3887 + }, + { + "epoch": 1.0323994157482406, + "grad_norm": 1.1125458904355436, + "learning_rate": 1.0296333278225599e-05, + "loss": 0.23692303895950317, + "step": 3888 + }, + { + "epoch": 1.0326649847297835, + "grad_norm": 1.1981051682884363, + "learning_rate": 1.0291944412651884e-05, + "loss": 0.2570871114730835, + "step": 3889 + }, + { + "epoch": 1.0329305537113265, + "grad_norm": 1.1839354606788588, + "learning_rate": 1.028755549079473e-05, + "loss": 0.2896367609500885, + "step": 3890 + }, + { + "epoch": 1.0331961226928694, + "grad_norm": 0.958593784491898, + "learning_rate": 1.0283166513500267e-05, + "loss": 0.19990365207195282, + "step": 3891 + }, + { + "epoch": 1.0334616916744124, + "grad_norm": 1.1157517117826752, + "learning_rate": 1.0278777481614639e-05, + "loss": 0.25235646963119507, + "step": 3892 + }, + { + "epoch": 1.0337272606559553, + "grad_norm": 1.1808927381569394, + "learning_rate": 1.0274388395984003e-05, + "loss": 0.23675012588500977, + "step": 3893 + }, + { + "epoch": 1.0339928296374983, + "grad_norm": 1.1370597202642294, + "learning_rate": 1.026999925745452e-05, + "loss": 0.250516414642334, + "step": 3894 + }, + { + "epoch": 1.0342583986190412, + "grad_norm": 1.0692414219621886, + "learning_rate": 1.0265610066872365e-05, + "loss": 0.24573490023612976, + "step": 3895 + }, + { + "epoch": 1.0345239676005842, + "grad_norm": 1.085358990363196, + "learning_rate": 1.026122082508372e-05, + "loss": 0.2473086714744568, + "step": 3896 + }, + { + "epoch": 1.0347895365821271, + "grad_norm": 1.162338198859519, + "learning_rate": 1.0256831532934783e-05, + "loss": 0.26546406745910645, + "step": 3897 + }, + { + "epoch": 1.03505510556367, + "grad_norm": 1.1034436628854154, + "learning_rate": 1.0252442191271754e-05, + "loss": 0.2565246522426605, + "step": 3898 + }, + { + "epoch": 1.035320674545213, + "grad_norm": 1.0272875416109402, + "learning_rate": 1.0248052800940846e-05, + "loss": 0.24923476576805115, + "step": 3899 + }, + { + "epoch": 1.035586243526756, + "grad_norm": 1.1519345059696067, + "learning_rate": 1.0243663362788286e-05, + "loss": 0.3079240322113037, + "step": 3900 + }, + { + "epoch": 1.035851812508299, + "grad_norm": 1.0586971174066726, + "learning_rate": 1.0239273877660302e-05, + "loss": 0.2482951581478119, + "step": 3901 + }, + { + "epoch": 1.036117381489842, + "grad_norm": 1.1495296797401515, + "learning_rate": 1.0234884346403138e-05, + "loss": 0.2626204192638397, + "step": 3902 + }, + { + "epoch": 1.0363829504713848, + "grad_norm": 1.0578834148114886, + "learning_rate": 1.023049476986304e-05, + "loss": 0.23181654512882233, + "step": 3903 + }, + { + "epoch": 1.0366485194529278, + "grad_norm": 1.2527800012652353, + "learning_rate": 1.0226105148886272e-05, + "loss": 0.29164040088653564, + "step": 3904 + }, + { + "epoch": 1.0369140884344707, + "grad_norm": 1.034136654365203, + "learning_rate": 1.0221715484319094e-05, + "loss": 0.22025801241397858, + "step": 3905 + }, + { + "epoch": 1.0371796574160137, + "grad_norm": 1.1162047929812215, + "learning_rate": 1.021732577700779e-05, + "loss": 0.2819385826587677, + "step": 3906 + }, + { + "epoch": 1.0374452263975567, + "grad_norm": 1.0524498644463125, + "learning_rate": 1.0212936027798637e-05, + "loss": 0.24709002673625946, + "step": 3907 + }, + { + "epoch": 1.0377107953790998, + "grad_norm": 0.9984579723832369, + "learning_rate": 1.0208546237537928e-05, + "loss": 0.22570034861564636, + "step": 3908 + }, + { + "epoch": 1.0379763643606428, + "grad_norm": 1.1543900299803864, + "learning_rate": 1.0204156407071964e-05, + "loss": 0.25642865896224976, + "step": 3909 + }, + { + "epoch": 1.0382419333421857, + "grad_norm": 1.1657404882715603, + "learning_rate": 1.0199766537247053e-05, + "loss": 0.25970256328582764, + "step": 3910 + }, + { + "epoch": 1.0385075023237287, + "grad_norm": 1.1347864223586095, + "learning_rate": 1.019537662890951e-05, + "loss": 0.2560003101825714, + "step": 3911 + }, + { + "epoch": 1.0387730713052716, + "grad_norm": 1.3160565196765366, + "learning_rate": 1.0190986682905656e-05, + "loss": 0.28138649463653564, + "step": 3912 + }, + { + "epoch": 1.0390386402868146, + "grad_norm": 1.4353879235637104, + "learning_rate": 1.0186596700081825e-05, + "loss": 0.23531222343444824, + "step": 3913 + }, + { + "epoch": 1.0393042092683575, + "grad_norm": 1.1850676655471586, + "learning_rate": 1.018220668128435e-05, + "loss": 0.24912862479686737, + "step": 3914 + }, + { + "epoch": 1.0395697782499005, + "grad_norm": 1.0811585337632708, + "learning_rate": 1.0177816627359575e-05, + "loss": 0.24188724160194397, + "step": 3915 + }, + { + "epoch": 1.0398353472314434, + "grad_norm": 1.2093489820950423, + "learning_rate": 1.0173426539153853e-05, + "loss": 0.2709474563598633, + "step": 3916 + }, + { + "epoch": 1.0401009162129864, + "grad_norm": 1.1793292324294091, + "learning_rate": 1.0169036417513538e-05, + "loss": 0.2400204837322235, + "step": 3917 + }, + { + "epoch": 1.0403664851945293, + "grad_norm": 1.0489256907825586, + "learning_rate": 1.0164646263284993e-05, + "loss": 0.2687132954597473, + "step": 3918 + }, + { + "epoch": 1.0406320541760723, + "grad_norm": 1.1628887826217675, + "learning_rate": 1.0160256077314592e-05, + "loss": 0.25139346718788147, + "step": 3919 + }, + { + "epoch": 1.0408976231576152, + "grad_norm": 1.1762633281473511, + "learning_rate": 1.0155865860448712e-05, + "loss": 0.25873464345932007, + "step": 3920 + }, + { + "epoch": 1.0411631921391582, + "grad_norm": 1.1207165962030725, + "learning_rate": 1.0151475613533732e-05, + "loss": 0.2510434687137604, + "step": 3921 + }, + { + "epoch": 1.0414287611207012, + "grad_norm": 1.2260247662339232, + "learning_rate": 1.0147085337416036e-05, + "loss": 0.24567106366157532, + "step": 3922 + }, + { + "epoch": 1.041694330102244, + "grad_norm": 1.1642096823951156, + "learning_rate": 1.0142695032942024e-05, + "loss": 0.25028282403945923, + "step": 3923 + }, + { + "epoch": 1.041959899083787, + "grad_norm": 1.140963361472911, + "learning_rate": 1.0138304700958096e-05, + "loss": 0.23542484641075134, + "step": 3924 + }, + { + "epoch": 1.04222546806533, + "grad_norm": 1.2475887570620718, + "learning_rate": 1.0133914342310649e-05, + "loss": 0.28974449634552, + "step": 3925 + }, + { + "epoch": 1.042491037046873, + "grad_norm": 1.0648736453755918, + "learning_rate": 1.0129523957846097e-05, + "loss": 0.23417247831821442, + "step": 3926 + }, + { + "epoch": 1.042756606028416, + "grad_norm": 1.1427047582178407, + "learning_rate": 1.0125133548410852e-05, + "loss": 0.23247018456459045, + "step": 3927 + }, + { + "epoch": 1.0430221750099589, + "grad_norm": 1.1496713132119072, + "learning_rate": 1.0120743114851337e-05, + "loss": 0.23860129714012146, + "step": 3928 + }, + { + "epoch": 1.0432877439915018, + "grad_norm": 1.1567405333157526, + "learning_rate": 1.0116352658013973e-05, + "loss": 0.2609105706214905, + "step": 3929 + }, + { + "epoch": 1.0435533129730448, + "grad_norm": 1.2453984448185509, + "learning_rate": 1.0111962178745187e-05, + "loss": 0.2559507489204407, + "step": 3930 + }, + { + "epoch": 1.0438188819545877, + "grad_norm": 1.2247288020965454, + "learning_rate": 1.0107571677891415e-05, + "loss": 0.2708527147769928, + "step": 3931 + }, + { + "epoch": 1.0440844509361307, + "grad_norm": 1.2373037230453465, + "learning_rate": 1.0103181156299091e-05, + "loss": 0.25884875655174255, + "step": 3932 + }, + { + "epoch": 1.0443500199176736, + "grad_norm": 1.3022673165052032, + "learning_rate": 1.0098790614814658e-05, + "loss": 0.2631877660751343, + "step": 3933 + }, + { + "epoch": 1.0446155888992166, + "grad_norm": 1.0267097797291302, + "learning_rate": 1.0094400054284559e-05, + "loss": 0.27179086208343506, + "step": 3934 + }, + { + "epoch": 1.0448811578807595, + "grad_norm": 2.1081344450494144, + "learning_rate": 1.0090009475555245e-05, + "loss": 0.21690386533737183, + "step": 3935 + }, + { + "epoch": 1.0451467268623025, + "grad_norm": 1.0188398651288513, + "learning_rate": 1.0085618879473162e-05, + "loss": 0.20192815363407135, + "step": 3936 + }, + { + "epoch": 1.0454122958438454, + "grad_norm": 1.213624997308106, + "learning_rate": 1.0081228266884773e-05, + "loss": 0.2680777907371521, + "step": 3937 + }, + { + "epoch": 1.0456778648253884, + "grad_norm": 1.1871222610891168, + "learning_rate": 1.007683763863653e-05, + "loss": 0.2566579580307007, + "step": 3938 + }, + { + "epoch": 1.0459434338069313, + "grad_norm": 1.1229802475790265, + "learning_rate": 1.0072446995574895e-05, + "loss": 0.2508152723312378, + "step": 3939 + }, + { + "epoch": 1.0462090027884743, + "grad_norm": 1.0850640213400236, + "learning_rate": 1.0068056338546335e-05, + "loss": 0.2880190908908844, + "step": 3940 + }, + { + "epoch": 1.0464745717700172, + "grad_norm": 1.1129549761108044, + "learning_rate": 1.0063665668397316e-05, + "loss": 0.2646787464618683, + "step": 3941 + }, + { + "epoch": 1.0467401407515602, + "grad_norm": 1.1116528447502043, + "learning_rate": 1.0059274985974305e-05, + "loss": 0.2327616810798645, + "step": 3942 + }, + { + "epoch": 1.0470057097331031, + "grad_norm": 1.1644185595792014, + "learning_rate": 1.0054884292123778e-05, + "loss": 0.24756258726119995, + "step": 3943 + }, + { + "epoch": 1.047271278714646, + "grad_norm": 1.1010853288322209, + "learning_rate": 1.0050493587692207e-05, + "loss": 0.23657771944999695, + "step": 3944 + }, + { + "epoch": 1.047536847696189, + "grad_norm": 1.1386107444709148, + "learning_rate": 1.0046102873526068e-05, + "loss": 0.2541351616382599, + "step": 3945 + }, + { + "epoch": 1.047802416677732, + "grad_norm": 1.0912263009271301, + "learning_rate": 1.0041712150471839e-05, + "loss": 0.2330317348241806, + "step": 3946 + }, + { + "epoch": 1.048067985659275, + "grad_norm": 1.0696190454357721, + "learning_rate": 1.0037321419375997e-05, + "loss": 0.23411181569099426, + "step": 3947 + }, + { + "epoch": 1.048333554640818, + "grad_norm": 1.1223872975815399, + "learning_rate": 1.0032930681085028e-05, + "loss": 0.2605017125606537, + "step": 3948 + }, + { + "epoch": 1.0485991236223609, + "grad_norm": 1.1766579775240698, + "learning_rate": 1.0028539936445407e-05, + "loss": 0.28651514649391174, + "step": 3949 + }, + { + "epoch": 1.0488646926039038, + "grad_norm": 1.1469362905517786, + "learning_rate": 1.0024149186303628e-05, + "loss": 0.22912876307964325, + "step": 3950 + }, + { + "epoch": 1.0491302615854468, + "grad_norm": 1.206814749340921, + "learning_rate": 1.001975843150617e-05, + "loss": 0.24032847583293915, + "step": 3951 + }, + { + "epoch": 1.0493958305669897, + "grad_norm": 1.0089656289438405, + "learning_rate": 1.0015367672899521e-05, + "loss": 0.17826229333877563, + "step": 3952 + }, + { + "epoch": 1.0496613995485327, + "grad_norm": 1.1440301784208975, + "learning_rate": 1.0010976911330163e-05, + "loss": 0.2619745433330536, + "step": 3953 + }, + { + "epoch": 1.0499269685300756, + "grad_norm": 1.1124743886634039, + "learning_rate": 1.0006586147644585e-05, + "loss": 0.24104374647140503, + "step": 3954 + }, + { + "epoch": 1.0501925375116186, + "grad_norm": 1.2465051058358483, + "learning_rate": 1.0002195382689277e-05, + "loss": 0.22913998365402222, + "step": 3955 + }, + { + "epoch": 1.0504581064931615, + "grad_norm": 1.2288244416278613, + "learning_rate": 9.997804617310724e-06, + "loss": 0.2625126838684082, + "step": 3956 + }, + { + "epoch": 1.0507236754747045, + "grad_norm": 1.1016811290492863, + "learning_rate": 9.993413852355416e-06, + "loss": 0.23098430037498474, + "step": 3957 + }, + { + "epoch": 1.0509892444562474, + "grad_norm": 1.2581954843436995, + "learning_rate": 9.98902308866984e-06, + "loss": 0.2866731882095337, + "step": 3958 + }, + { + "epoch": 1.0512548134377904, + "grad_norm": 1.2595027481112393, + "learning_rate": 9.984632327100482e-06, + "loss": 0.2520306706428528, + "step": 3959 + }, + { + "epoch": 1.0515203824193333, + "grad_norm": 1.2731218614589663, + "learning_rate": 9.980241568493834e-06, + "loss": 0.29688766598701477, + "step": 3960 + }, + { + "epoch": 1.0517859514008763, + "grad_norm": 1.2865298416208544, + "learning_rate": 9.975850813696375e-06, + "loss": 0.2876695990562439, + "step": 3961 + }, + { + "epoch": 1.0520515203824194, + "grad_norm": 1.1190033835182807, + "learning_rate": 9.971460063554595e-06, + "loss": 0.2402629554271698, + "step": 3962 + }, + { + "epoch": 1.0523170893639624, + "grad_norm": 1.288030170241207, + "learning_rate": 9.967069318914977e-06, + "loss": 0.32080164551734924, + "step": 3963 + }, + { + "epoch": 1.0525826583455054, + "grad_norm": 1.3484684025161604, + "learning_rate": 9.962678580624008e-06, + "loss": 0.2642936110496521, + "step": 3964 + }, + { + "epoch": 1.0528482273270483, + "grad_norm": 1.1668064537758471, + "learning_rate": 9.958287849528163e-06, + "loss": 0.255870521068573, + "step": 3965 + }, + { + "epoch": 1.0531137963085913, + "grad_norm": 1.1779058124731279, + "learning_rate": 9.953897126473933e-06, + "loss": 0.2695184350013733, + "step": 3966 + }, + { + "epoch": 1.0533793652901342, + "grad_norm": 1.1937956388734083, + "learning_rate": 9.949506412307795e-06, + "loss": 0.24576464295387268, + "step": 3967 + }, + { + "epoch": 1.0536449342716772, + "grad_norm": 1.210893055599799, + "learning_rate": 9.945115707876224e-06, + "loss": 0.26517459750175476, + "step": 3968 + }, + { + "epoch": 1.05391050325322, + "grad_norm": 1.261309936483727, + "learning_rate": 9.940725014025696e-06, + "loss": 0.30468082427978516, + "step": 3969 + }, + { + "epoch": 1.054176072234763, + "grad_norm": 1.1007633858966879, + "learning_rate": 9.936334331602687e-06, + "loss": 0.25299298763275146, + "step": 3970 + }, + { + "epoch": 1.054441641216306, + "grad_norm": 1.1621642625136148, + "learning_rate": 9.931943661453668e-06, + "loss": 0.2659488320350647, + "step": 3971 + }, + { + "epoch": 1.054707210197849, + "grad_norm": 1.129768041847351, + "learning_rate": 9.92755300442511e-06, + "loss": 0.25957295298576355, + "step": 3972 + }, + { + "epoch": 1.054972779179392, + "grad_norm": 1.0969185518732962, + "learning_rate": 9.923162361363476e-06, + "loss": 0.2416645884513855, + "step": 3973 + }, + { + "epoch": 1.0552383481609349, + "grad_norm": 1.1032067417924427, + "learning_rate": 9.91877173311523e-06, + "loss": 0.2627662122249603, + "step": 3974 + }, + { + "epoch": 1.0555039171424778, + "grad_norm": 1.1485553701369502, + "learning_rate": 9.91438112052684e-06, + "loss": 0.2876631021499634, + "step": 3975 + }, + { + "epoch": 1.0557694861240208, + "grad_norm": 1.1306607772682384, + "learning_rate": 9.90999052444476e-06, + "loss": 0.28336596488952637, + "step": 3976 + }, + { + "epoch": 1.0560350551055637, + "grad_norm": 1.266085815857313, + "learning_rate": 9.905599945715443e-06, + "loss": 0.2970484495162964, + "step": 3977 + }, + { + "epoch": 1.0563006240871067, + "grad_norm": 1.188464425479595, + "learning_rate": 9.901209385185345e-06, + "loss": 0.27202755212783813, + "step": 3978 + }, + { + "epoch": 1.0565661930686496, + "grad_norm": 1.0823738866829473, + "learning_rate": 9.896818843700912e-06, + "loss": 0.2702459990978241, + "step": 3979 + }, + { + "epoch": 1.0568317620501926, + "grad_norm": 1.2166105195755876, + "learning_rate": 9.89242832210859e-06, + "loss": 0.26057881116867065, + "step": 3980 + }, + { + "epoch": 1.0570973310317355, + "grad_norm": 1.1526398422075472, + "learning_rate": 9.888037821254816e-06, + "loss": 0.24006876349449158, + "step": 3981 + }, + { + "epoch": 1.0573629000132785, + "grad_norm": 1.0864441989704317, + "learning_rate": 9.883647341986032e-06, + "loss": 0.2437625676393509, + "step": 3982 + }, + { + "epoch": 1.0576284689948214, + "grad_norm": 1.0572722810626467, + "learning_rate": 9.879256885148666e-06, + "loss": 0.24256819486618042, + "step": 3983 + }, + { + "epoch": 1.0578940379763644, + "grad_norm": 1.2008491436753201, + "learning_rate": 9.874866451589151e-06, + "loss": 0.2714581787586212, + "step": 3984 + }, + { + "epoch": 1.0581596069579073, + "grad_norm": 1.1859043120388024, + "learning_rate": 9.870476042153907e-06, + "loss": 0.30309075117111206, + "step": 3985 + }, + { + "epoch": 1.0584251759394503, + "grad_norm": 1.3001941243887445, + "learning_rate": 9.866085657689355e-06, + "loss": 0.2938288450241089, + "step": 3986 + }, + { + "epoch": 1.0586907449209932, + "grad_norm": 1.1041962963159588, + "learning_rate": 9.86169529904191e-06, + "loss": 0.23748518526554108, + "step": 3987 + }, + { + "epoch": 1.0589563139025362, + "grad_norm": 1.2345572480055271, + "learning_rate": 9.857304967057977e-06, + "loss": 0.2883969247341156, + "step": 3988 + }, + { + "epoch": 1.0592218828840791, + "grad_norm": 1.0871048681541509, + "learning_rate": 9.852914662583966e-06, + "loss": 0.28301289677619934, + "step": 3989 + }, + { + "epoch": 1.059487451865622, + "grad_norm": 1.0733060702724175, + "learning_rate": 9.848524386466273e-06, + "loss": 0.22616548836231232, + "step": 3990 + }, + { + "epoch": 1.059753020847165, + "grad_norm": 1.06530549901144, + "learning_rate": 9.844134139551291e-06, + "loss": 0.2282804250717163, + "step": 3991 + }, + { + "epoch": 1.060018589828708, + "grad_norm": 1.154557745213229, + "learning_rate": 9.839743922685408e-06, + "loss": 0.2407834678888321, + "step": 3992 + }, + { + "epoch": 1.060284158810251, + "grad_norm": 1.0504099183304738, + "learning_rate": 9.835353736715007e-06, + "loss": 0.22690361738204956, + "step": 3993 + }, + { + "epoch": 1.060549727791794, + "grad_norm": 1.529267187296219, + "learning_rate": 9.830963582486465e-06, + "loss": 0.23291411995887756, + "step": 3994 + }, + { + "epoch": 1.0608152967733369, + "grad_norm": 1.0804914844168854, + "learning_rate": 9.82657346084615e-06, + "loss": 0.24524198472499847, + "step": 3995 + }, + { + "epoch": 1.0610808657548798, + "grad_norm": 1.130929241291739, + "learning_rate": 9.822183372640426e-06, + "loss": 0.22087743878364563, + "step": 3996 + }, + { + "epoch": 1.0613464347364228, + "grad_norm": 1.1374060021264791, + "learning_rate": 9.817793318715652e-06, + "loss": 0.2459079772233963, + "step": 3997 + }, + { + "epoch": 1.0616120037179657, + "grad_norm": 1.1393890830478974, + "learning_rate": 9.813403299918178e-06, + "loss": 0.24429920315742493, + "step": 3998 + }, + { + "epoch": 1.0618775726995087, + "grad_norm": 1.140499707599593, + "learning_rate": 9.809013317094345e-06, + "loss": 0.2332335114479065, + "step": 3999 + }, + { + "epoch": 1.0621431416810516, + "grad_norm": 1.2157908167694267, + "learning_rate": 9.804623371090493e-06, + "loss": 0.2861659526824951, + "step": 4000 + }, + { + "epoch": 1.0624087106625946, + "grad_norm": 1.1293440606459217, + "learning_rate": 9.800233462752949e-06, + "loss": 0.22731532156467438, + "step": 4001 + }, + { + "epoch": 1.0626742796441375, + "grad_norm": 1.127775309467411, + "learning_rate": 9.795843592928036e-06, + "loss": 0.245025634765625, + "step": 4002 + }, + { + "epoch": 1.0629398486256805, + "grad_norm": 1.2380242649872155, + "learning_rate": 9.791453762462075e-06, + "loss": 0.2826273441314697, + "step": 4003 + }, + { + "epoch": 1.0632054176072234, + "grad_norm": 1.1330484645300947, + "learning_rate": 9.787063972201368e-06, + "loss": 0.24737229943275452, + "step": 4004 + }, + { + "epoch": 1.0634709865887664, + "grad_norm": 1.3814870803010457, + "learning_rate": 9.782674222992214e-06, + "loss": 0.23368477821350098, + "step": 4005 + }, + { + "epoch": 1.0637365555703093, + "grad_norm": 1.2631953536046527, + "learning_rate": 9.778284515680908e-06, + "loss": 0.2754492461681366, + "step": 4006 + }, + { + "epoch": 1.0640021245518523, + "grad_norm": 1.1906091191722363, + "learning_rate": 9.773894851113732e-06, + "loss": 0.2814168334007263, + "step": 4007 + }, + { + "epoch": 1.0642676935333952, + "grad_norm": 1.1594492512554253, + "learning_rate": 9.769505230136962e-06, + "loss": 0.25388047099113464, + "step": 4008 + }, + { + "epoch": 1.0645332625149382, + "grad_norm": 1.2618382745485697, + "learning_rate": 9.765115653596867e-06, + "loss": 0.25435230135917664, + "step": 4009 + }, + { + "epoch": 1.0647988314964811, + "grad_norm": 1.2251032153283614, + "learning_rate": 9.760726122339698e-06, + "loss": 0.265840083360672, + "step": 4010 + }, + { + "epoch": 1.065064400478024, + "grad_norm": 1.1297656349054435, + "learning_rate": 9.756336637211716e-06, + "loss": 0.2533451020717621, + "step": 4011 + }, + { + "epoch": 1.065329969459567, + "grad_norm": 1.0890158421111886, + "learning_rate": 9.751947199059155e-06, + "loss": 0.25214290618896484, + "step": 4012 + }, + { + "epoch": 1.06559553844111, + "grad_norm": 1.0603532415232781, + "learning_rate": 9.74755780872825e-06, + "loss": 0.25039419531822205, + "step": 4013 + }, + { + "epoch": 1.065861107422653, + "grad_norm": 1.0177623632775965, + "learning_rate": 9.74316846706522e-06, + "loss": 0.21251091361045837, + "step": 4014 + }, + { + "epoch": 1.066126676404196, + "grad_norm": 1.123294230398497, + "learning_rate": 9.738779174916281e-06, + "loss": 0.25898969173431396, + "step": 4015 + }, + { + "epoch": 1.0663922453857388, + "grad_norm": 1.1054663361669936, + "learning_rate": 9.734389933127639e-06, + "loss": 0.2655499577522278, + "step": 4016 + }, + { + "epoch": 1.0666578143672818, + "grad_norm": 1.1153507141873742, + "learning_rate": 9.730000742545485e-06, + "loss": 0.2221338450908661, + "step": 4017 + }, + { + "epoch": 1.0669233833488247, + "grad_norm": 1.1746716643835395, + "learning_rate": 9.725611604016002e-06, + "loss": 0.2567589581012726, + "step": 4018 + }, + { + "epoch": 1.0671889523303677, + "grad_norm": 1.1090772377521565, + "learning_rate": 9.721222518385361e-06, + "loss": 0.24440976977348328, + "step": 4019 + }, + { + "epoch": 1.0674545213119107, + "grad_norm": 1.061787642846094, + "learning_rate": 9.716833486499735e-06, + "loss": 0.2229192852973938, + "step": 4020 + }, + { + "epoch": 1.0677200902934538, + "grad_norm": 1.1014121727705226, + "learning_rate": 9.712444509205273e-06, + "loss": 0.26231470704078674, + "step": 4021 + }, + { + "epoch": 1.0679856592749968, + "grad_norm": 1.2531191320236732, + "learning_rate": 9.708055587348119e-06, + "loss": 0.25099092721939087, + "step": 4022 + }, + { + "epoch": 1.0682512282565397, + "grad_norm": 1.1402160070516023, + "learning_rate": 9.703666721774403e-06, + "loss": 0.22979633510112762, + "step": 4023 + }, + { + "epoch": 1.0685167972380827, + "grad_norm": 1.09571485621585, + "learning_rate": 9.699277913330252e-06, + "loss": 0.2361093908548355, + "step": 4024 + }, + { + "epoch": 1.0687823662196256, + "grad_norm": 1.0765448804717204, + "learning_rate": 9.694889162861768e-06, + "loss": 0.2390863001346588, + "step": 4025 + }, + { + "epoch": 1.0690479352011686, + "grad_norm": 1.2569917808844517, + "learning_rate": 9.690500471215057e-06, + "loss": 0.24917885661125183, + "step": 4026 + }, + { + "epoch": 1.0693135041827115, + "grad_norm": 1.1387127210628816, + "learning_rate": 9.686111839236206e-06, + "loss": 0.24215272068977356, + "step": 4027 + }, + { + "epoch": 1.0695790731642545, + "grad_norm": 1.2809085503832063, + "learning_rate": 9.681723267771284e-06, + "loss": 0.27874231338500977, + "step": 4028 + }, + { + "epoch": 1.0698446421457974, + "grad_norm": 1.1707122559783085, + "learning_rate": 9.677334757666368e-06, + "loss": 0.24076086282730103, + "step": 4029 + }, + { + "epoch": 1.0701102111273404, + "grad_norm": 1.1092369229920938, + "learning_rate": 9.672946309767504e-06, + "loss": 0.2444242238998413, + "step": 4030 + }, + { + "epoch": 1.0703757801088833, + "grad_norm": 1.2086874522857378, + "learning_rate": 9.668557924920735e-06, + "loss": 0.2737279236316681, + "step": 4031 + }, + { + "epoch": 1.0706413490904263, + "grad_norm": 1.1006436240463247, + "learning_rate": 9.664169603972091e-06, + "loss": 0.24105575680732727, + "step": 4032 + }, + { + "epoch": 1.0709069180719692, + "grad_norm": 1.336482466569566, + "learning_rate": 9.659781347767584e-06, + "loss": 0.27791836857795715, + "step": 4033 + }, + { + "epoch": 1.0711724870535122, + "grad_norm": 1.1518461528529822, + "learning_rate": 9.655393157153221e-06, + "loss": 0.255472868680954, + "step": 4034 + }, + { + "epoch": 1.0714380560350552, + "grad_norm": 1.371220848551681, + "learning_rate": 9.651005032974994e-06, + "loss": 0.2523707151412964, + "step": 4035 + }, + { + "epoch": 1.071703625016598, + "grad_norm": 1.235756547113907, + "learning_rate": 9.64661697607888e-06, + "loss": 0.24584606289863586, + "step": 4036 + }, + { + "epoch": 1.071969193998141, + "grad_norm": 1.1497174260677319, + "learning_rate": 9.64222898731084e-06, + "loss": 0.25182732939720154, + "step": 4037 + }, + { + "epoch": 1.072234762979684, + "grad_norm": 1.0822892740683951, + "learning_rate": 9.637841067516837e-06, + "loss": 0.254008412361145, + "step": 4038 + }, + { + "epoch": 1.072500331961227, + "grad_norm": 1.080204167750926, + "learning_rate": 9.633453217542806e-06, + "loss": 0.2314324826002121, + "step": 4039 + }, + { + "epoch": 1.07276590094277, + "grad_norm": 1.1139945732367915, + "learning_rate": 9.62906543823467e-06, + "loss": 0.2256058305501938, + "step": 4040 + }, + { + "epoch": 1.0730314699243129, + "grad_norm": 1.283214941862177, + "learning_rate": 9.624677730438344e-06, + "loss": 0.2577894330024719, + "step": 4041 + }, + { + "epoch": 1.0732970389058558, + "grad_norm": 1.0911199623079508, + "learning_rate": 9.620290094999723e-06, + "loss": 0.23520560562610626, + "step": 4042 + }, + { + "epoch": 1.0735626078873988, + "grad_norm": 1.1791405346126818, + "learning_rate": 9.615902532764695e-06, + "loss": 0.2472849190235138, + "step": 4043 + }, + { + "epoch": 1.0738281768689417, + "grad_norm": 1.2195787110249676, + "learning_rate": 9.611515044579128e-06, + "loss": 0.25053414702415466, + "step": 4044 + }, + { + "epoch": 1.0740937458504847, + "grad_norm": 1.1090102650773974, + "learning_rate": 9.607127631288879e-06, + "loss": 0.24229007959365845, + "step": 4045 + }, + { + "epoch": 1.0743593148320276, + "grad_norm": 1.4628298980675831, + "learning_rate": 9.602740293739786e-06, + "loss": 0.2793073058128357, + "step": 4046 + }, + { + "epoch": 1.0746248838135706, + "grad_norm": 1.225079236387791, + "learning_rate": 9.598353032777682e-06, + "loss": 0.24547399580478668, + "step": 4047 + }, + { + "epoch": 1.0748904527951135, + "grad_norm": 1.1980997957436126, + "learning_rate": 9.593965849248378e-06, + "loss": 0.2776937186717987, + "step": 4048 + }, + { + "epoch": 1.0751560217766565, + "grad_norm": 1.0781858695117066, + "learning_rate": 9.589578743997668e-06, + "loss": 0.22677727043628693, + "step": 4049 + }, + { + "epoch": 1.0754215907581994, + "grad_norm": 1.4867723677136682, + "learning_rate": 9.585191717871336e-06, + "loss": 0.23254704475402832, + "step": 4050 + }, + { + "epoch": 1.0756871597397424, + "grad_norm": 1.3243435003953368, + "learning_rate": 9.580804771715148e-06, + "loss": 0.2899828255176544, + "step": 4051 + }, + { + "epoch": 1.0759527287212853, + "grad_norm": 1.1397018772236696, + "learning_rate": 9.576417906374856e-06, + "loss": 0.24632850289344788, + "step": 4052 + }, + { + "epoch": 1.0762182977028283, + "grad_norm": 1.2322214200527608, + "learning_rate": 9.572031122696196e-06, + "loss": 0.2661561369895935, + "step": 4053 + }, + { + "epoch": 1.0764838666843712, + "grad_norm": 1.1394013200357536, + "learning_rate": 9.567644421524889e-06, + "loss": 0.22364279627799988, + "step": 4054 + }, + { + "epoch": 1.0767494356659142, + "grad_norm": 1.5026366502842776, + "learning_rate": 9.563257803706635e-06, + "loss": 0.26748427748680115, + "step": 4055 + }, + { + "epoch": 1.0770150046474571, + "grad_norm": 1.1794922225625246, + "learning_rate": 9.55887127008713e-06, + "loss": 0.22851283848285675, + "step": 4056 + }, + { + "epoch": 1.077280573629, + "grad_norm": 1.1340260741391435, + "learning_rate": 9.554484821512037e-06, + "loss": 0.2456260323524475, + "step": 4057 + }, + { + "epoch": 1.077546142610543, + "grad_norm": 1.2884657617459025, + "learning_rate": 9.55009845882702e-06, + "loss": 0.2556169629096985, + "step": 4058 + }, + { + "epoch": 1.077811711592086, + "grad_norm": 1.274618544457263, + "learning_rate": 9.545712182877714e-06, + "loss": 0.280727744102478, + "step": 4059 + }, + { + "epoch": 1.078077280573629, + "grad_norm": 1.1205087247319334, + "learning_rate": 9.54132599450974e-06, + "loss": 0.25315386056900024, + "step": 4060 + }, + { + "epoch": 1.078342849555172, + "grad_norm": 1.1990539773915618, + "learning_rate": 9.536939894568704e-06, + "loss": 0.21985477209091187, + "step": 4061 + }, + { + "epoch": 1.0786084185367149, + "grad_norm": 1.1575613416248978, + "learning_rate": 9.532553883900196e-06, + "loss": 0.24329043924808502, + "step": 4062 + }, + { + "epoch": 1.0788739875182578, + "grad_norm": 1.173950465827748, + "learning_rate": 9.528167963349786e-06, + "loss": 0.2362256497144699, + "step": 4063 + }, + { + "epoch": 1.0791395564998008, + "grad_norm": 1.1458704347110154, + "learning_rate": 9.523782133763027e-06, + "loss": 0.23685476183891296, + "step": 4064 + }, + { + "epoch": 1.0794051254813437, + "grad_norm": 1.2383774104342302, + "learning_rate": 9.519396395985456e-06, + "loss": 0.26232481002807617, + "step": 4065 + }, + { + "epoch": 1.0796706944628867, + "grad_norm": 1.2768574792534622, + "learning_rate": 9.515010750862594e-06, + "loss": 0.25196313858032227, + "step": 4066 + }, + { + "epoch": 1.0799362634444296, + "grad_norm": 1.082792256362845, + "learning_rate": 9.510625199239939e-06, + "loss": 0.22520464658737183, + "step": 4067 + }, + { + "epoch": 1.0802018324259726, + "grad_norm": 1.190229461562689, + "learning_rate": 9.506239741962971e-06, + "loss": 0.27422505617141724, + "step": 4068 + }, + { + "epoch": 1.0804674014075155, + "grad_norm": 1.3120430811123187, + "learning_rate": 9.50185437987716e-06, + "loss": 0.2646682560443878, + "step": 4069 + }, + { + "epoch": 1.0807329703890585, + "grad_norm": 1.3425819541318131, + "learning_rate": 9.497469113827949e-06, + "loss": 0.2661365866661072, + "step": 4070 + }, + { + "epoch": 1.0809985393706014, + "grad_norm": 1.1101351469883673, + "learning_rate": 9.493083944660766e-06, + "loss": 0.23156839609146118, + "step": 4071 + }, + { + "epoch": 1.0812641083521444, + "grad_norm": 1.1805541153651362, + "learning_rate": 9.488698873221021e-06, + "loss": 0.25353243947029114, + "step": 4072 + }, + { + "epoch": 1.0815296773336873, + "grad_norm": 1.2862671823918606, + "learning_rate": 9.484313900354099e-06, + "loss": 0.27488404512405396, + "step": 4073 + }, + { + "epoch": 1.0817952463152305, + "grad_norm": 1.4041005997261422, + "learning_rate": 9.479929026905378e-06, + "loss": 0.2580753564834595, + "step": 4074 + }, + { + "epoch": 1.0820608152967734, + "grad_norm": 1.1405056260482733, + "learning_rate": 9.475544253720206e-06, + "loss": 0.2425471544265747, + "step": 4075 + }, + { + "epoch": 1.0823263842783164, + "grad_norm": 1.2040355319488043, + "learning_rate": 9.471159581643918e-06, + "loss": 0.25268295407295227, + "step": 4076 + }, + { + "epoch": 1.0825919532598594, + "grad_norm": 1.1573228524057126, + "learning_rate": 9.466775011521825e-06, + "loss": 0.2683602571487427, + "step": 4077 + }, + { + "epoch": 1.0828575222414023, + "grad_norm": 1.1300610618916742, + "learning_rate": 9.462390544199221e-06, + "loss": 0.24945034086704254, + "step": 4078 + }, + { + "epoch": 1.0831230912229453, + "grad_norm": 1.1698494765527112, + "learning_rate": 9.458006180521379e-06, + "loss": 0.21784156560897827, + "step": 4079 + }, + { + "epoch": 1.0833886602044882, + "grad_norm": 1.136268907040887, + "learning_rate": 9.453621921333554e-06, + "loss": 0.22704020142555237, + "step": 4080 + }, + { + "epoch": 1.0836542291860312, + "grad_norm": 1.1373990713388034, + "learning_rate": 9.449237767480979e-06, + "loss": 0.2532106637954712, + "step": 4081 + }, + { + "epoch": 1.0839197981675741, + "grad_norm": 1.1568862012297532, + "learning_rate": 9.444853719808864e-06, + "loss": 0.27809134125709534, + "step": 4082 + }, + { + "epoch": 1.084185367149117, + "grad_norm": 1.2102387789201872, + "learning_rate": 9.440469779162407e-06, + "loss": 0.25704264640808105, + "step": 4083 + }, + { + "epoch": 1.08445093613066, + "grad_norm": 1.1827141084910668, + "learning_rate": 9.436085946386778e-06, + "loss": 0.2656276226043701, + "step": 4084 + }, + { + "epoch": 1.084716505112203, + "grad_norm": 1.256991317445651, + "learning_rate": 9.431702222327126e-06, + "loss": 0.277826726436615, + "step": 4085 + }, + { + "epoch": 1.084982074093746, + "grad_norm": 1.2975495041461134, + "learning_rate": 9.427318607828584e-06, + "loss": 0.24656976759433746, + "step": 4086 + }, + { + "epoch": 1.0852476430752889, + "grad_norm": 1.1974770836803283, + "learning_rate": 9.42293510373626e-06, + "loss": 0.2498110830783844, + "step": 4087 + }, + { + "epoch": 1.0855132120568318, + "grad_norm": 1.1492935678310237, + "learning_rate": 9.418551710895243e-06, + "loss": 0.24574093520641327, + "step": 4088 + }, + { + "epoch": 1.0857787810383748, + "grad_norm": 1.2274895872775384, + "learning_rate": 9.414168430150601e-06, + "loss": 0.25271761417388916, + "step": 4089 + }, + { + "epoch": 1.0860443500199177, + "grad_norm": 1.1759358027679858, + "learning_rate": 9.409785262347373e-06, + "loss": 0.29269370436668396, + "step": 4090 + }, + { + "epoch": 1.0863099190014607, + "grad_norm": 1.1247973273146177, + "learning_rate": 9.405402208330581e-06, + "loss": 0.244449645280838, + "step": 4091 + }, + { + "epoch": 1.0865754879830036, + "grad_norm": 1.186787867713906, + "learning_rate": 9.401019268945237e-06, + "loss": 0.23785406351089478, + "step": 4092 + }, + { + "epoch": 1.0868410569645466, + "grad_norm": 1.1479686632621091, + "learning_rate": 9.39663644503631e-06, + "loss": 0.2493479996919632, + "step": 4093 + }, + { + "epoch": 1.0871066259460895, + "grad_norm": 1.1474347559215512, + "learning_rate": 9.392253737448764e-06, + "loss": 0.23758000135421753, + "step": 4094 + }, + { + "epoch": 1.0873721949276325, + "grad_norm": 1.0946885138749496, + "learning_rate": 9.387871147027528e-06, + "loss": 0.22560475766658783, + "step": 4095 + }, + { + "epoch": 1.0876377639091754, + "grad_norm": 1.1552533162715968, + "learning_rate": 9.383488674617515e-06, + "loss": 0.2558273673057556, + "step": 4096 + }, + { + "epoch": 1.0879033328907184, + "grad_norm": 1.2619180705972233, + "learning_rate": 9.379106321063618e-06, + "loss": 0.2822023034095764, + "step": 4097 + }, + { + "epoch": 1.0881689018722613, + "grad_norm": 1.2076346653444254, + "learning_rate": 9.374724087210698e-06, + "loss": 0.2596978545188904, + "step": 4098 + }, + { + "epoch": 1.0884344708538043, + "grad_norm": 1.6785014002913365, + "learning_rate": 9.370341973903597e-06, + "loss": 0.25353628396987915, + "step": 4099 + }, + { + "epoch": 1.0887000398353472, + "grad_norm": 1.2184499887942242, + "learning_rate": 9.365959981987135e-06, + "loss": 0.2547294497489929, + "step": 4100 + }, + { + "epoch": 1.0889656088168902, + "grad_norm": 1.40658558629773, + "learning_rate": 9.361578112306115e-06, + "loss": 0.2688470780849457, + "step": 4101 + }, + { + "epoch": 1.0892311777984331, + "grad_norm": 1.207208011814592, + "learning_rate": 9.357196365705303e-06, + "loss": 0.25772029161453247, + "step": 4102 + }, + { + "epoch": 1.089496746779976, + "grad_norm": 1.3552039168974384, + "learning_rate": 9.352814743029454e-06, + "loss": 0.2875550091266632, + "step": 4103 + }, + { + "epoch": 1.089762315761519, + "grad_norm": 1.4164869081453233, + "learning_rate": 9.34843324512329e-06, + "loss": 0.23085735738277435, + "step": 4104 + }, + { + "epoch": 1.090027884743062, + "grad_norm": 1.2013725541896922, + "learning_rate": 9.34405187283151e-06, + "loss": 0.2607901096343994, + "step": 4105 + }, + { + "epoch": 1.090293453724605, + "grad_norm": 1.1738523720935938, + "learning_rate": 9.339670626998791e-06, + "loss": 0.26165345311164856, + "step": 4106 + }, + { + "epoch": 1.090559022706148, + "grad_norm": 1.1931234826270498, + "learning_rate": 9.335289508469789e-06, + "loss": 0.27884238958358765, + "step": 4107 + }, + { + "epoch": 1.0908245916876909, + "grad_norm": 1.283025870689831, + "learning_rate": 9.33090851808913e-06, + "loss": 0.2689289152622223, + "step": 4108 + }, + { + "epoch": 1.0910901606692338, + "grad_norm": 1.2574326426613287, + "learning_rate": 9.326527656701414e-06, + "loss": 0.2633207440376282, + "step": 4109 + }, + { + "epoch": 1.0913557296507768, + "grad_norm": 1.1611202948336292, + "learning_rate": 9.322146925151226e-06, + "loss": 0.26001888513565063, + "step": 4110 + }, + { + "epoch": 1.0916212986323197, + "grad_norm": 1.1436383156785508, + "learning_rate": 9.31776632428312e-06, + "loss": 0.2739099860191345, + "step": 4111 + }, + { + "epoch": 1.0918868676138627, + "grad_norm": 1.1080458686771364, + "learning_rate": 9.313385854941616e-06, + "loss": 0.24885550141334534, + "step": 4112 + }, + { + "epoch": 1.0921524365954056, + "grad_norm": 1.1643870148920956, + "learning_rate": 9.309005517971222e-06, + "loss": 0.2609873414039612, + "step": 4113 + }, + { + "epoch": 1.0924180055769486, + "grad_norm": 1.427636157796487, + "learning_rate": 9.304625314216415e-06, + "loss": 0.28853538632392883, + "step": 4114 + }, + { + "epoch": 1.0926835745584915, + "grad_norm": 1.072833070391428, + "learning_rate": 9.300245244521647e-06, + "loss": 0.2629924714565277, + "step": 4115 + }, + { + "epoch": 1.0929491435400345, + "grad_norm": 1.1804644749067619, + "learning_rate": 9.295865309731342e-06, + "loss": 0.2687820494174957, + "step": 4116 + }, + { + "epoch": 1.0932147125215774, + "grad_norm": 1.0831905202820669, + "learning_rate": 9.2914855106899e-06, + "loss": 0.2293676733970642, + "step": 4117 + }, + { + "epoch": 1.0934802815031204, + "grad_norm": 1.1645005992728827, + "learning_rate": 9.287105848241694e-06, + "loss": 0.25261443853378296, + "step": 4118 + }, + { + "epoch": 1.0937458504846633, + "grad_norm": 1.1209341991417805, + "learning_rate": 9.282726323231077e-06, + "loss": 0.26238197088241577, + "step": 4119 + }, + { + "epoch": 1.0940114194662063, + "grad_norm": 1.1230838898563178, + "learning_rate": 9.278346936502364e-06, + "loss": 0.25718310475349426, + "step": 4120 + }, + { + "epoch": 1.0942769884477492, + "grad_norm": 1.1872711264618019, + "learning_rate": 9.273967688899849e-06, + "loss": 0.23810459673404694, + "step": 4121 + }, + { + "epoch": 1.0945425574292922, + "grad_norm": 1.0680734314830214, + "learning_rate": 9.269588581267804e-06, + "loss": 0.2197081446647644, + "step": 4122 + }, + { + "epoch": 1.0948081264108351, + "grad_norm": 1.1043223190124707, + "learning_rate": 9.265209614450463e-06, + "loss": 0.2429335117340088, + "step": 4123 + }, + { + "epoch": 1.095073695392378, + "grad_norm": 1.1380552272436657, + "learning_rate": 9.260830789292043e-06, + "loss": 0.23028087615966797, + "step": 4124 + }, + { + "epoch": 1.095339264373921, + "grad_norm": 1.2203393500716264, + "learning_rate": 9.25645210663673e-06, + "loss": 0.2783699035644531, + "step": 4125 + }, + { + "epoch": 1.095604833355464, + "grad_norm": 1.1686978964802806, + "learning_rate": 9.25207356732868e-06, + "loss": 0.25055867433547974, + "step": 4126 + }, + { + "epoch": 1.095870402337007, + "grad_norm": 1.2313132067115398, + "learning_rate": 9.247695172212026e-06, + "loss": 0.28629350662231445, + "step": 4127 + }, + { + "epoch": 1.09613597131855, + "grad_norm": 1.2403423880097748, + "learning_rate": 9.24331692213087e-06, + "loss": 0.2626604735851288, + "step": 4128 + }, + { + "epoch": 1.0964015403000928, + "grad_norm": 1.2478078302425437, + "learning_rate": 9.238938817929288e-06, + "loss": 0.237881600856781, + "step": 4129 + }, + { + "epoch": 1.0966671092816358, + "grad_norm": 1.144955023428898, + "learning_rate": 9.234560860451325e-06, + "loss": 0.2602109909057617, + "step": 4130 + }, + { + "epoch": 1.0969326782631788, + "grad_norm": 1.1775071297104545, + "learning_rate": 9.230183050541001e-06, + "loss": 0.2721475064754486, + "step": 4131 + }, + { + "epoch": 1.0971982472447217, + "grad_norm": 1.7664052681173497, + "learning_rate": 9.225805389042307e-06, + "loss": 0.25844910740852356, + "step": 4132 + }, + { + "epoch": 1.0974638162262647, + "grad_norm": 1.1612334633259545, + "learning_rate": 9.221427876799201e-06, + "loss": 0.26671040058135986, + "step": 4133 + }, + { + "epoch": 1.0977293852078078, + "grad_norm": 1.3116748641368057, + "learning_rate": 9.21705051465562e-06, + "loss": 0.2610115706920624, + "step": 4134 + }, + { + "epoch": 1.0979949541893508, + "grad_norm": 1.1348320206960383, + "learning_rate": 9.212673303455464e-06, + "loss": 0.2518802881240845, + "step": 4135 + }, + { + "epoch": 1.0982605231708937, + "grad_norm": 1.2313324732863455, + "learning_rate": 9.20829624404261e-06, + "loss": 0.28600364923477173, + "step": 4136 + }, + { + "epoch": 1.0985260921524367, + "grad_norm": 1.0787729379648288, + "learning_rate": 9.203919337260903e-06, + "loss": 0.2649504840373993, + "step": 4137 + }, + { + "epoch": 1.0987916611339796, + "grad_norm": 1.0717018301402161, + "learning_rate": 9.199542583954159e-06, + "loss": 0.22613298892974854, + "step": 4138 + }, + { + "epoch": 1.0990572301155226, + "grad_norm": 1.1049408193201318, + "learning_rate": 9.195165984966163e-06, + "loss": 0.22546961903572083, + "step": 4139 + }, + { + "epoch": 1.0993227990970655, + "grad_norm": 1.1132579479037434, + "learning_rate": 9.190789541140675e-06, + "loss": 0.20618169009685516, + "step": 4140 + }, + { + "epoch": 1.0995883680786085, + "grad_norm": 1.1910818165933836, + "learning_rate": 9.18641325332142e-06, + "loss": 0.2434382289648056, + "step": 4141 + }, + { + "epoch": 1.0998539370601514, + "grad_norm": 1.0160349259469954, + "learning_rate": 9.182037122352092e-06, + "loss": 0.19114840030670166, + "step": 4142 + }, + { + "epoch": 1.1001195060416944, + "grad_norm": 1.371175220167047, + "learning_rate": 9.17766114907636e-06, + "loss": 0.2793614864349365, + "step": 4143 + }, + { + "epoch": 1.1003850750232373, + "grad_norm": 1.3230746818872392, + "learning_rate": 9.173285334337863e-06, + "loss": 0.2908466160297394, + "step": 4144 + }, + { + "epoch": 1.1006506440047803, + "grad_norm": 1.1707475106499343, + "learning_rate": 9.168909678980199e-06, + "loss": 0.260933518409729, + "step": 4145 + }, + { + "epoch": 1.1009162129863233, + "grad_norm": 1.170079737982666, + "learning_rate": 9.16453418384695e-06, + "loss": 0.2819761037826538, + "step": 4146 + }, + { + "epoch": 1.1011817819678662, + "grad_norm": 1.251357168283767, + "learning_rate": 9.160158849781657e-06, + "loss": 0.25290411710739136, + "step": 4147 + }, + { + "epoch": 1.1014473509494092, + "grad_norm": 1.0782378998536035, + "learning_rate": 9.155783677627831e-06, + "loss": 0.21255841851234436, + "step": 4148 + }, + { + "epoch": 1.101712919930952, + "grad_norm": 0.9808101112826028, + "learning_rate": 9.151408668228958e-06, + "loss": 0.20631751418113708, + "step": 4149 + }, + { + "epoch": 1.101978488912495, + "grad_norm": 1.0273447794760797, + "learning_rate": 9.147033822428484e-06, + "loss": 0.20976273715496063, + "step": 4150 + }, + { + "epoch": 1.102244057894038, + "grad_norm": 1.0193138467531315, + "learning_rate": 9.142659141069828e-06, + "loss": 0.21464477479457855, + "step": 4151 + }, + { + "epoch": 1.102509626875581, + "grad_norm": 1.182770191723374, + "learning_rate": 9.13828462499638e-06, + "loss": 0.2262338101863861, + "step": 4152 + }, + { + "epoch": 1.102775195857124, + "grad_norm": 1.2057409707570275, + "learning_rate": 9.133910275051493e-06, + "loss": 0.26331469416618347, + "step": 4153 + }, + { + "epoch": 1.1030407648386669, + "grad_norm": 1.1729382721759571, + "learning_rate": 9.129536092078488e-06, + "loss": 0.26280921697616577, + "step": 4154 + }, + { + "epoch": 1.1033063338202098, + "grad_norm": 1.1474203361843618, + "learning_rate": 9.12516207692066e-06, + "loss": 0.2527182698249817, + "step": 4155 + }, + { + "epoch": 1.1035719028017528, + "grad_norm": 1.114868090084267, + "learning_rate": 9.120788230421267e-06, + "loss": 0.21416455507278442, + "step": 4156 + }, + { + "epoch": 1.1038374717832957, + "grad_norm": 1.149698502937602, + "learning_rate": 9.116414553423535e-06, + "loss": 0.25882014632225037, + "step": 4157 + }, + { + "epoch": 1.1041030407648387, + "grad_norm": 1.1615644224212993, + "learning_rate": 9.112041046770653e-06, + "loss": 0.20510248839855194, + "step": 4158 + }, + { + "epoch": 1.1043686097463816, + "grad_norm": 1.372282887646487, + "learning_rate": 9.107667711305786e-06, + "loss": 0.2348058819770813, + "step": 4159 + }, + { + "epoch": 1.1046341787279246, + "grad_norm": 1.2389958643414019, + "learning_rate": 9.10329454787206e-06, + "loss": 0.24561384320259094, + "step": 4160 + }, + { + "epoch": 1.1048997477094675, + "grad_norm": 1.133562757165387, + "learning_rate": 9.098921557312573e-06, + "loss": 0.23025226593017578, + "step": 4161 + }, + { + "epoch": 1.1051653166910105, + "grad_norm": 1.2483870007074676, + "learning_rate": 9.094548740470375e-06, + "loss": 0.2724589705467224, + "step": 4162 + }, + { + "epoch": 1.1054308856725534, + "grad_norm": 1.2319217483915181, + "learning_rate": 9.090176098188504e-06, + "loss": 0.25196704268455505, + "step": 4163 + }, + { + "epoch": 1.1056964546540964, + "grad_norm": 1.0723466269314343, + "learning_rate": 9.085803631309953e-06, + "loss": 0.22673696279525757, + "step": 4164 + }, + { + "epoch": 1.1059620236356393, + "grad_norm": 1.3129015386402236, + "learning_rate": 9.081431340677679e-06, + "loss": 0.23913519084453583, + "step": 4165 + }, + { + "epoch": 1.1062275926171823, + "grad_norm": 1.3859005835374885, + "learning_rate": 9.07705922713461e-06, + "loss": 0.2723861336708069, + "step": 4166 + }, + { + "epoch": 1.1064931615987252, + "grad_norm": 1.15651219284811, + "learning_rate": 9.072687291523636e-06, + "loss": 0.262167364358902, + "step": 4167 + }, + { + "epoch": 1.1067587305802682, + "grad_norm": 1.4186208937810438, + "learning_rate": 9.068315534687615e-06, + "loss": 0.2394658625125885, + "step": 4168 + }, + { + "epoch": 1.1070242995618111, + "grad_norm": 1.116555661084851, + "learning_rate": 9.063943957469373e-06, + "loss": 0.2547619938850403, + "step": 4169 + }, + { + "epoch": 1.107289868543354, + "grad_norm": 1.1242129377429575, + "learning_rate": 9.059572560711697e-06, + "loss": 0.24057570099830627, + "step": 4170 + }, + { + "epoch": 1.107555437524897, + "grad_norm": 1.057297781351654, + "learning_rate": 9.055201345257331e-06, + "loss": 0.21729445457458496, + "step": 4171 + }, + { + "epoch": 1.10782100650644, + "grad_norm": 1.2310508574302907, + "learning_rate": 9.05083031194901e-06, + "loss": 0.26590001583099365, + "step": 4172 + }, + { + "epoch": 1.108086575487983, + "grad_norm": 1.2932563576951384, + "learning_rate": 9.04645946162941e-06, + "loss": 0.26114848256111145, + "step": 4173 + }, + { + "epoch": 1.108352144469526, + "grad_norm": 1.1776684059902396, + "learning_rate": 9.04208879514118e-06, + "loss": 0.2255469262599945, + "step": 4174 + }, + { + "epoch": 1.1086177134510689, + "grad_norm": 1.1791871226781019, + "learning_rate": 9.037718313326932e-06, + "loss": 0.2597671151161194, + "step": 4175 + }, + { + "epoch": 1.1088832824326118, + "grad_norm": 1.1140795273935102, + "learning_rate": 9.033348017029247e-06, + "loss": 0.24820469319820404, + "step": 4176 + }, + { + "epoch": 1.1091488514141548, + "grad_norm": 1.2459789693741423, + "learning_rate": 9.028977907090661e-06, + "loss": 0.23886600136756897, + "step": 4177 + }, + { + "epoch": 1.1094144203956977, + "grad_norm": 1.091274384086243, + "learning_rate": 9.024607984353682e-06, + "loss": 0.24204152822494507, + "step": 4178 + }, + { + "epoch": 1.1096799893772407, + "grad_norm": 1.0934112812518066, + "learning_rate": 9.02023824966078e-06, + "loss": 0.23246638476848602, + "step": 4179 + }, + { + "epoch": 1.1099455583587836, + "grad_norm": 1.124332043141092, + "learning_rate": 9.015868703854386e-06, + "loss": 0.25057342648506165, + "step": 4180 + }, + { + "epoch": 1.1102111273403266, + "grad_norm": 1.117105393632997, + "learning_rate": 9.011499347776902e-06, + "loss": 0.2316257357597351, + "step": 4181 + }, + { + "epoch": 1.1104766963218695, + "grad_norm": 1.4294765240232425, + "learning_rate": 9.007130182270685e-06, + "loss": 0.24824783205986023, + "step": 4182 + }, + { + "epoch": 1.1107422653034125, + "grad_norm": 1.1667528236187257, + "learning_rate": 9.002761208178059e-06, + "loss": 0.25174480676651, + "step": 4183 + }, + { + "epoch": 1.1110078342849554, + "grad_norm": 1.0615254217045484, + "learning_rate": 8.998392426341313e-06, + "loss": 0.22364717721939087, + "step": 4184 + }, + { + "epoch": 1.1112734032664984, + "grad_norm": 1.0478203412338092, + "learning_rate": 8.994023837602694e-06, + "loss": 0.2205432504415512, + "step": 4185 + }, + { + "epoch": 1.1115389722480415, + "grad_norm": 1.4181125559874541, + "learning_rate": 8.989655442804413e-06, + "loss": 0.23303675651550293, + "step": 4186 + }, + { + "epoch": 1.1118045412295845, + "grad_norm": 1.2558407878646785, + "learning_rate": 8.985287242788646e-06, + "loss": 0.3003222644329071, + "step": 4187 + }, + { + "epoch": 1.1120701102111274, + "grad_norm": 1.146183553652687, + "learning_rate": 8.980919238397532e-06, + "loss": 0.2734413146972656, + "step": 4188 + }, + { + "epoch": 1.1123356791926704, + "grad_norm": 1.200748942223162, + "learning_rate": 8.976551430473166e-06, + "loss": 0.24086692929267883, + "step": 4189 + }, + { + "epoch": 1.1126012481742134, + "grad_norm": 1.2277073829430902, + "learning_rate": 8.972183819857618e-06, + "loss": 0.2531188130378723, + "step": 4190 + }, + { + "epoch": 1.1128668171557563, + "grad_norm": 1.1067327267341682, + "learning_rate": 8.96781640739291e-06, + "loss": 0.25059640407562256, + "step": 4191 + }, + { + "epoch": 1.1131323861372993, + "grad_norm": 1.1987793097859372, + "learning_rate": 8.963449193921023e-06, + "loss": 0.22427335381507874, + "step": 4192 + }, + { + "epoch": 1.1133979551188422, + "grad_norm": 1.1842662472837817, + "learning_rate": 8.959082180283906e-06, + "loss": 0.28835898637771606, + "step": 4193 + }, + { + "epoch": 1.1136635241003852, + "grad_norm": 1.1161865281550452, + "learning_rate": 8.954715367323468e-06, + "loss": 0.23919034004211426, + "step": 4194 + }, + { + "epoch": 1.1139290930819281, + "grad_norm": 1.186821665962327, + "learning_rate": 8.950348755881578e-06, + "loss": 0.24583986401557922, + "step": 4195 + }, + { + "epoch": 1.114194662063471, + "grad_norm": 1.2519292440490923, + "learning_rate": 8.94598234680007e-06, + "loss": 0.23869696259498596, + "step": 4196 + }, + { + "epoch": 1.114460231045014, + "grad_norm": 1.1662462204488522, + "learning_rate": 8.941616140920734e-06, + "loss": 0.2672434449195862, + "step": 4197 + }, + { + "epoch": 1.114725800026557, + "grad_norm": 1.2253961517889995, + "learning_rate": 8.937250139085322e-06, + "loss": 0.2660336494445801, + "step": 4198 + }, + { + "epoch": 1.1149913690081, + "grad_norm": 1.1608224464613695, + "learning_rate": 8.932884342135552e-06, + "loss": 0.26461780071258545, + "step": 4199 + }, + { + "epoch": 1.1152569379896429, + "grad_norm": 1.1632580978978435, + "learning_rate": 8.928518750913094e-06, + "loss": 0.22947481274604797, + "step": 4200 + }, + { + "epoch": 1.1155225069711858, + "grad_norm": 1.116659758904741, + "learning_rate": 8.924153366259584e-06, + "loss": 0.22715970873832703, + "step": 4201 + }, + { + "epoch": 1.1157880759527288, + "grad_norm": 1.3785482068816968, + "learning_rate": 8.919788189016618e-06, + "loss": 0.2994215190410614, + "step": 4202 + }, + { + "epoch": 1.1160536449342717, + "grad_norm": 1.158412598714371, + "learning_rate": 8.915423220025747e-06, + "loss": 0.2290656566619873, + "step": 4203 + }, + { + "epoch": 1.1163192139158147, + "grad_norm": 1.093685203516635, + "learning_rate": 8.911058460128489e-06, + "loss": 0.22284844517707825, + "step": 4204 + }, + { + "epoch": 1.1165847828973576, + "grad_norm": 1.0534371355750514, + "learning_rate": 8.906693910166316e-06, + "loss": 0.2095392495393753, + "step": 4205 + }, + { + "epoch": 1.1168503518789006, + "grad_norm": 1.197609739800315, + "learning_rate": 8.902329570980665e-06, + "loss": 0.25098133087158203, + "step": 4206 + }, + { + "epoch": 1.1171159208604435, + "grad_norm": 1.1630125842119448, + "learning_rate": 8.897965443412923e-06, + "loss": 0.24768148362636566, + "step": 4207 + }, + { + "epoch": 1.1173814898419865, + "grad_norm": 1.1213395777051767, + "learning_rate": 8.89360152830445e-06, + "loss": 0.22255480289459229, + "step": 4208 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 1.2306365389400118, + "learning_rate": 8.889237826496551e-06, + "loss": 0.23721200227737427, + "step": 4209 + }, + { + "epoch": 1.1179126278050724, + "grad_norm": 1.1422779685655824, + "learning_rate": 8.8848743388305e-06, + "loss": 0.25002530217170715, + "step": 4210 + }, + { + "epoch": 1.1181781967866153, + "grad_norm": 1.2862841308153614, + "learning_rate": 8.880511066147524e-06, + "loss": 0.27188029885292053, + "step": 4211 + }, + { + "epoch": 1.1184437657681583, + "grad_norm": 1.1517061730387759, + "learning_rate": 8.876148009288813e-06, + "loss": 0.23056066036224365, + "step": 4212 + }, + { + "epoch": 1.1187093347497012, + "grad_norm": 1.172676602980077, + "learning_rate": 8.87178516909551e-06, + "loss": 0.2336079478263855, + "step": 4213 + }, + { + "epoch": 1.1189749037312442, + "grad_norm": 1.1868473876345316, + "learning_rate": 8.86742254640872e-06, + "loss": 0.27449533343315125, + "step": 4214 + }, + { + "epoch": 1.1192404727127871, + "grad_norm": 1.1500112066365369, + "learning_rate": 8.863060142069508e-06, + "loss": 0.24714893102645874, + "step": 4215 + }, + { + "epoch": 1.11950604169433, + "grad_norm": 1.072070573678295, + "learning_rate": 8.858697956918886e-06, + "loss": 0.2155439257621765, + "step": 4216 + }, + { + "epoch": 1.119771610675873, + "grad_norm": 1.1798452175680678, + "learning_rate": 8.854335991797842e-06, + "loss": 0.23189155757427216, + "step": 4217 + }, + { + "epoch": 1.120037179657416, + "grad_norm": 1.0773206236657924, + "learning_rate": 8.849974247547307e-06, + "loss": 0.23413527011871338, + "step": 4218 + }, + { + "epoch": 1.120302748638959, + "grad_norm": 1.1991513784988423, + "learning_rate": 8.845612725008173e-06, + "loss": 0.2569039463996887, + "step": 4219 + }, + { + "epoch": 1.120568317620502, + "grad_norm": 1.1795807532964264, + "learning_rate": 8.84125142502129e-06, + "loss": 0.2699541449546814, + "step": 4220 + }, + { + "epoch": 1.1208338866020449, + "grad_norm": 1.1092727759218166, + "learning_rate": 8.836890348427468e-06, + "loss": 0.27172449231147766, + "step": 4221 + }, + { + "epoch": 1.1210994555835878, + "grad_norm": 1.2315684717645485, + "learning_rate": 8.83252949606747e-06, + "loss": 0.2839444875717163, + "step": 4222 + }, + { + "epoch": 1.1213650245651308, + "grad_norm": 1.1676850588618106, + "learning_rate": 8.828168868782013e-06, + "loss": 0.22782178223133087, + "step": 4223 + }, + { + "epoch": 1.1216305935466737, + "grad_norm": 1.132889704492098, + "learning_rate": 8.82380846741178e-06, + "loss": 0.2567726671695709, + "step": 4224 + }, + { + "epoch": 1.1218961625282167, + "grad_norm": 1.1872540675130212, + "learning_rate": 8.8194482927974e-06, + "loss": 0.25879523158073425, + "step": 4225 + }, + { + "epoch": 1.1221617315097596, + "grad_norm": 1.0193477801534692, + "learning_rate": 8.815088345779466e-06, + "loss": 0.22109058499336243, + "step": 4226 + }, + { + "epoch": 1.1224273004913026, + "grad_norm": 1.1414592493281657, + "learning_rate": 8.810728627198526e-06, + "loss": 0.23615925014019012, + "step": 4227 + }, + { + "epoch": 1.1226928694728455, + "grad_norm": 1.160290266155045, + "learning_rate": 8.806369137895081e-06, + "loss": 0.2751353085041046, + "step": 4228 + }, + { + "epoch": 1.1229584384543885, + "grad_norm": 1.2566953981709197, + "learning_rate": 8.802009878709587e-06, + "loss": 0.2361963391304016, + "step": 4229 + }, + { + "epoch": 1.1232240074359314, + "grad_norm": 1.186723455251228, + "learning_rate": 8.79765085048246e-06, + "loss": 0.22435930371284485, + "step": 4230 + }, + { + "epoch": 1.1234895764174744, + "grad_norm": 1.1759467333820823, + "learning_rate": 8.79329205405407e-06, + "loss": 0.2355855256319046, + "step": 4231 + }, + { + "epoch": 1.1237551453990173, + "grad_norm": 1.1450490838951077, + "learning_rate": 8.78893349026474e-06, + "loss": 0.24127572774887085, + "step": 4232 + }, + { + "epoch": 1.1240207143805603, + "grad_norm": 1.222656849347683, + "learning_rate": 8.784575159954748e-06, + "loss": 0.2677989602088928, + "step": 4233 + }, + { + "epoch": 1.1242862833621032, + "grad_norm": 1.109384474337522, + "learning_rate": 8.78021706396433e-06, + "loss": 0.2283135950565338, + "step": 4234 + }, + { + "epoch": 1.1245518523436462, + "grad_norm": 1.1669732456316693, + "learning_rate": 8.775859203133678e-06, + "loss": 0.2686103582382202, + "step": 4235 + }, + { + "epoch": 1.1248174213251891, + "grad_norm": 1.3869789172842044, + "learning_rate": 8.771501578302934e-06, + "loss": 0.2638726234436035, + "step": 4236 + }, + { + "epoch": 1.125082990306732, + "grad_norm": 1.0752600847920544, + "learning_rate": 8.767144190312196e-06, + "loss": 0.2517441511154175, + "step": 4237 + }, + { + "epoch": 1.125348559288275, + "grad_norm": 1.1903096570499558, + "learning_rate": 8.762787040001518e-06, + "loss": 0.2593642771244049, + "step": 4238 + }, + { + "epoch": 1.125614128269818, + "grad_norm": 1.123653942868709, + "learning_rate": 8.758430128210908e-06, + "loss": 0.23758336901664734, + "step": 4239 + }, + { + "epoch": 1.125879697251361, + "grad_norm": 1.182033088729647, + "learning_rate": 8.754073455780327e-06, + "loss": 0.2557980716228485, + "step": 4240 + }, + { + "epoch": 1.126145266232904, + "grad_norm": 1.1182311632466304, + "learning_rate": 8.74971702354969e-06, + "loss": 0.2484067678451538, + "step": 4241 + }, + { + "epoch": 1.1264108352144468, + "grad_norm": 1.121886097833982, + "learning_rate": 8.745360832358864e-06, + "loss": 0.23103098571300507, + "step": 4242 + }, + { + "epoch": 1.1266764041959898, + "grad_norm": 1.1856800379472048, + "learning_rate": 8.741004883047667e-06, + "loss": 0.2630731463432312, + "step": 4243 + }, + { + "epoch": 1.1269419731775328, + "grad_norm": 1.1814851216743405, + "learning_rate": 8.736649176455885e-06, + "loss": 0.2413114309310913, + "step": 4244 + }, + { + "epoch": 1.1272075421590757, + "grad_norm": 1.1465608986560651, + "learning_rate": 8.732293713423243e-06, + "loss": 0.22463169693946838, + "step": 4245 + }, + { + "epoch": 1.1274731111406187, + "grad_norm": 1.1943136125759177, + "learning_rate": 8.727938494789421e-06, + "loss": 0.23641429841518402, + "step": 4246 + }, + { + "epoch": 1.1277386801221616, + "grad_norm": 1.399290186521162, + "learning_rate": 8.723583521394054e-06, + "loss": 0.2547767162322998, + "step": 4247 + }, + { + "epoch": 1.1280042491037048, + "grad_norm": 1.1274578262359225, + "learning_rate": 8.719228794076733e-06, + "loss": 0.25753074884414673, + "step": 4248 + }, + { + "epoch": 1.1282698180852477, + "grad_norm": 1.2581544322188265, + "learning_rate": 8.714874313676992e-06, + "loss": 0.30602240562438965, + "step": 4249 + }, + { + "epoch": 1.1285353870667907, + "grad_norm": 1.3693509289176364, + "learning_rate": 8.710520081034328e-06, + "loss": 0.28336623311042786, + "step": 4250 + }, + { + "epoch": 1.1288009560483336, + "grad_norm": 1.179198933472593, + "learning_rate": 8.706166096988185e-06, + "loss": 0.24065867066383362, + "step": 4251 + }, + { + "epoch": 1.1290665250298766, + "grad_norm": 1.1350442144429624, + "learning_rate": 8.701812362377954e-06, + "loss": 0.25674968957901, + "step": 4252 + }, + { + "epoch": 1.1293320940114195, + "grad_norm": 1.0526431620404462, + "learning_rate": 8.697458878042992e-06, + "loss": 0.21502923965454102, + "step": 4253 + }, + { + "epoch": 1.1295976629929625, + "grad_norm": 1.199807552125115, + "learning_rate": 8.693105644822598e-06, + "loss": 0.26848286390304565, + "step": 4254 + }, + { + "epoch": 1.1298632319745054, + "grad_norm": 1.1632395937948599, + "learning_rate": 8.688752663556022e-06, + "loss": 0.24283824861049652, + "step": 4255 + }, + { + "epoch": 1.1301288009560484, + "grad_norm": 1.231861138079484, + "learning_rate": 8.684399935082468e-06, + "loss": 0.2511506974697113, + "step": 4256 + }, + { + "epoch": 1.1303943699375913, + "grad_norm": 1.1293067099587706, + "learning_rate": 8.68004746024109e-06, + "loss": 0.23932483792304993, + "step": 4257 + }, + { + "epoch": 1.1306599389191343, + "grad_norm": 1.229437521917496, + "learning_rate": 8.675695239870993e-06, + "loss": 0.30030694603919983, + "step": 4258 + }, + { + "epoch": 1.1309255079006773, + "grad_norm": 1.1154596754627621, + "learning_rate": 8.671343274811238e-06, + "loss": 0.24699059128761292, + "step": 4259 + }, + { + "epoch": 1.1311910768822202, + "grad_norm": 1.1288414782501015, + "learning_rate": 8.666991565900827e-06, + "loss": 0.26828041672706604, + "step": 4260 + }, + { + "epoch": 1.1314566458637632, + "grad_norm": 1.0765132569205758, + "learning_rate": 8.662640113978717e-06, + "loss": 0.2372082769870758, + "step": 4261 + }, + { + "epoch": 1.131722214845306, + "grad_norm": 1.2100447285144145, + "learning_rate": 8.658288919883824e-06, + "loss": 0.26367881894111633, + "step": 4262 + }, + { + "epoch": 1.131987783826849, + "grad_norm": 1.1035052537421275, + "learning_rate": 8.653937984455007e-06, + "loss": 0.2287222146987915, + "step": 4263 + }, + { + "epoch": 1.132253352808392, + "grad_norm": 1.1417963040520365, + "learning_rate": 8.649587308531067e-06, + "loss": 0.244521826505661, + "step": 4264 + }, + { + "epoch": 1.132518921789935, + "grad_norm": 1.2243689126496846, + "learning_rate": 8.64523689295077e-06, + "loss": 0.26912257075309753, + "step": 4265 + }, + { + "epoch": 1.132784490771478, + "grad_norm": 1.2384832947619873, + "learning_rate": 8.64088673855282e-06, + "loss": 0.23002780973911285, + "step": 4266 + }, + { + "epoch": 1.1330500597530209, + "grad_norm": 1.253742603342847, + "learning_rate": 8.636536846175878e-06, + "loss": 0.2561958432197571, + "step": 4267 + }, + { + "epoch": 1.1333156287345638, + "grad_norm": 1.2156026453092519, + "learning_rate": 8.63218721665855e-06, + "loss": 0.25553008913993835, + "step": 4268 + }, + { + "epoch": 1.1335811977161068, + "grad_norm": 1.1992385112791626, + "learning_rate": 8.627837850839398e-06, + "loss": 0.1992083340883255, + "step": 4269 + }, + { + "epoch": 1.1338467666976497, + "grad_norm": 1.3643398602160783, + "learning_rate": 8.62348874955692e-06, + "loss": 0.23075388371944427, + "step": 4270 + }, + { + "epoch": 1.1341123356791927, + "grad_norm": 1.1072751580070286, + "learning_rate": 8.619139913649582e-06, + "loss": 0.23691913485527039, + "step": 4271 + }, + { + "epoch": 1.1343779046607356, + "grad_norm": 1.2656689209279672, + "learning_rate": 8.61479134395578e-06, + "loss": 0.2536017894744873, + "step": 4272 + }, + { + "epoch": 1.1346434736422786, + "grad_norm": 1.2870409796681632, + "learning_rate": 8.61044304131387e-06, + "loss": 0.3014161288738251, + "step": 4273 + }, + { + "epoch": 1.1349090426238215, + "grad_norm": 1.1669055614665604, + "learning_rate": 8.606095006562156e-06, + "loss": 0.26333582401275635, + "step": 4274 + }, + { + "epoch": 1.1351746116053645, + "grad_norm": 1.2370251285176135, + "learning_rate": 8.601747240538883e-06, + "loss": 0.23796264827251434, + "step": 4275 + }, + { + "epoch": 1.1354401805869074, + "grad_norm": 1.1989417705813543, + "learning_rate": 8.597399744082251e-06, + "loss": 0.23737141489982605, + "step": 4276 + }, + { + "epoch": 1.1357057495684504, + "grad_norm": 1.1281376384049915, + "learning_rate": 8.593052518030407e-06, + "loss": 0.21073032915592194, + "step": 4277 + }, + { + "epoch": 1.1359713185499933, + "grad_norm": 1.2935455290015059, + "learning_rate": 8.588705563221444e-06, + "loss": 0.2597163915634155, + "step": 4278 + }, + { + "epoch": 1.1362368875315363, + "grad_norm": 1.137636804234172, + "learning_rate": 8.584358880493402e-06, + "loss": 0.24541154503822327, + "step": 4279 + }, + { + "epoch": 1.1365024565130792, + "grad_norm": 1.1331800338594176, + "learning_rate": 8.580012470684273e-06, + "loss": 0.19294027984142303, + "step": 4280 + }, + { + "epoch": 1.1367680254946222, + "grad_norm": 1.2387583554091215, + "learning_rate": 8.575666334631994e-06, + "loss": 0.26909738779067993, + "step": 4281 + }, + { + "epoch": 1.1370335944761651, + "grad_norm": 1.2850664046416893, + "learning_rate": 8.571320473174444e-06, + "loss": 0.2550502121448517, + "step": 4282 + }, + { + "epoch": 1.137299163457708, + "grad_norm": 1.138070930000495, + "learning_rate": 8.566974887149461e-06, + "loss": 0.2256634682416916, + "step": 4283 + }, + { + "epoch": 1.137564732439251, + "grad_norm": 1.3289753418379673, + "learning_rate": 8.562629577394817e-06, + "loss": 0.26154983043670654, + "step": 4284 + }, + { + "epoch": 1.137830301420794, + "grad_norm": 1.2426566834274124, + "learning_rate": 8.558284544748239e-06, + "loss": 0.24685145914554596, + "step": 4285 + }, + { + "epoch": 1.138095870402337, + "grad_norm": 1.177162412641928, + "learning_rate": 8.553939790047396e-06, + "loss": 0.2584421932697296, + "step": 4286 + }, + { + "epoch": 1.13836143938388, + "grad_norm": 1.2486541463378953, + "learning_rate": 8.549595314129907e-06, + "loss": 0.24582788348197937, + "step": 4287 + }, + { + "epoch": 1.1386270083654229, + "grad_norm": 1.1978925998644077, + "learning_rate": 8.545251117833334e-06, + "loss": 0.26023977994918823, + "step": 4288 + }, + { + "epoch": 1.1388925773469658, + "grad_norm": 1.2566090334130535, + "learning_rate": 8.54090720199519e-06, + "loss": 0.25575515627861023, + "step": 4289 + }, + { + "epoch": 1.1391581463285088, + "grad_norm": 1.2234599227483165, + "learning_rate": 8.53656356745293e-06, + "loss": 0.2784460783004761, + "step": 4290 + }, + { + "epoch": 1.1394237153100517, + "grad_norm": 1.11922615590049, + "learning_rate": 8.532220215043953e-06, + "loss": 0.24723297357559204, + "step": 4291 + }, + { + "epoch": 1.1396892842915947, + "grad_norm": 1.1960822646368614, + "learning_rate": 8.52787714560561e-06, + "loss": 0.24694418907165527, + "step": 4292 + }, + { + "epoch": 1.1399548532731376, + "grad_norm": 1.2073723964066632, + "learning_rate": 8.52353435997519e-06, + "loss": 0.19976040720939636, + "step": 4293 + }, + { + "epoch": 1.1402204222546806, + "grad_norm": 1.0875644999756633, + "learning_rate": 8.519191858989932e-06, + "loss": 0.21742458641529083, + "step": 4294 + }, + { + "epoch": 1.1404859912362235, + "grad_norm": 1.2040315384402727, + "learning_rate": 8.514849643487018e-06, + "loss": 0.26382917165756226, + "step": 4295 + }, + { + "epoch": 1.1407515602177665, + "grad_norm": 1.3073789721234685, + "learning_rate": 8.510507714303577e-06, + "loss": 0.30778488516807556, + "step": 4296 + }, + { + "epoch": 1.1410171291993096, + "grad_norm": 1.0727267660957265, + "learning_rate": 8.506166072276681e-06, + "loss": 0.20894449949264526, + "step": 4297 + }, + { + "epoch": 1.1412826981808526, + "grad_norm": 1.2119089915252295, + "learning_rate": 8.50182471824335e-06, + "loss": 0.2389567494392395, + "step": 4298 + }, + { + "epoch": 1.1415482671623955, + "grad_norm": 1.0286533711803312, + "learning_rate": 8.497483653040545e-06, + "loss": 0.20531126856803894, + "step": 4299 + }, + { + "epoch": 1.1418138361439385, + "grad_norm": 1.2153067733576255, + "learning_rate": 8.49314287750517e-06, + "loss": 0.2577363848686218, + "step": 4300 + }, + { + "epoch": 1.1420794051254815, + "grad_norm": 1.211343687077752, + "learning_rate": 8.488802392474076e-06, + "loss": 0.24225997924804688, + "step": 4301 + }, + { + "epoch": 1.1423449741070244, + "grad_norm": 1.2698570110354703, + "learning_rate": 8.484462198784058e-06, + "loss": 0.26494917273521423, + "step": 4302 + }, + { + "epoch": 1.1426105430885674, + "grad_norm": 1.2988704892129896, + "learning_rate": 8.480122297271855e-06, + "loss": 0.24903994798660278, + "step": 4303 + }, + { + "epoch": 1.1428761120701103, + "grad_norm": 1.1681075442122268, + "learning_rate": 8.475782688774147e-06, + "loss": 0.25291907787323, + "step": 4304 + }, + { + "epoch": 1.1431416810516533, + "grad_norm": 1.1301459507046017, + "learning_rate": 8.47144337412756e-06, + "loss": 0.22958475351333618, + "step": 4305 + }, + { + "epoch": 1.1434072500331962, + "grad_norm": 1.175766015682232, + "learning_rate": 8.46710435416866e-06, + "loss": 0.2305452972650528, + "step": 4306 + }, + { + "epoch": 1.1436728190147392, + "grad_norm": 1.2105790475425935, + "learning_rate": 8.462765629733965e-06, + "loss": 0.25028055906295776, + "step": 4307 + }, + { + "epoch": 1.1439383879962821, + "grad_norm": 1.2809924485725674, + "learning_rate": 8.458427201659926e-06, + "loss": 0.24873222410678864, + "step": 4308 + }, + { + "epoch": 1.144203956977825, + "grad_norm": 1.2345010944986379, + "learning_rate": 8.454089070782943e-06, + "loss": 0.23396535217761993, + "step": 4309 + }, + { + "epoch": 1.144469525959368, + "grad_norm": 1.1955062282547588, + "learning_rate": 8.449751237939354e-06, + "loss": 0.27120494842529297, + "step": 4310 + }, + { + "epoch": 1.144735094940911, + "grad_norm": 1.182924840045628, + "learning_rate": 8.445413703965441e-06, + "loss": 0.2734759449958801, + "step": 4311 + }, + { + "epoch": 1.145000663922454, + "grad_norm": 1.1584309667252248, + "learning_rate": 8.441076469697434e-06, + "loss": 0.25353512167930603, + "step": 4312 + }, + { + "epoch": 1.1452662329039969, + "grad_norm": 1.1913513856414861, + "learning_rate": 8.436739535971497e-06, + "loss": 0.23851020634174347, + "step": 4313 + }, + { + "epoch": 1.1455318018855398, + "grad_norm": 1.2006838398252668, + "learning_rate": 8.432402903623741e-06, + "loss": 0.26320093870162964, + "step": 4314 + }, + { + "epoch": 1.1457973708670828, + "grad_norm": 1.1065666799118796, + "learning_rate": 8.428066573490211e-06, + "loss": 0.23859955370426178, + "step": 4315 + }, + { + "epoch": 1.1460629398486257, + "grad_norm": 1.197716796975668, + "learning_rate": 8.423730546406911e-06, + "loss": 0.2636772096157074, + "step": 4316 + }, + { + "epoch": 1.1463285088301687, + "grad_norm": 1.2459962038175347, + "learning_rate": 8.419394823209773e-06, + "loss": 0.2656415104866028, + "step": 4317 + }, + { + "epoch": 1.1465940778117116, + "grad_norm": 1.2225993542972535, + "learning_rate": 8.41505940473467e-06, + "loss": 0.2872830033302307, + "step": 4318 + }, + { + "epoch": 1.1468596467932546, + "grad_norm": 1.4653362839323858, + "learning_rate": 8.410724291817422e-06, + "loss": 0.229783833026886, + "step": 4319 + }, + { + "epoch": 1.1471252157747975, + "grad_norm": 4.273944826146497, + "learning_rate": 8.406389485293786e-06, + "loss": 0.24418675899505615, + "step": 4320 + }, + { + "epoch": 1.1473907847563405, + "grad_norm": 1.2385236183806463, + "learning_rate": 8.402054985999464e-06, + "loss": 0.2535584270954132, + "step": 4321 + }, + { + "epoch": 1.1476563537378834, + "grad_norm": 1.2116145926695832, + "learning_rate": 8.397720794770093e-06, + "loss": 0.23207828402519226, + "step": 4322 + }, + { + "epoch": 1.1479219227194264, + "grad_norm": 1.8129143471218838, + "learning_rate": 8.393386912441257e-06, + "loss": 0.27990391850471497, + "step": 4323 + }, + { + "epoch": 1.1481874917009693, + "grad_norm": 1.059877272327032, + "learning_rate": 8.38905333984847e-06, + "loss": 0.2098318189382553, + "step": 4324 + }, + { + "epoch": 1.1484530606825123, + "grad_norm": 1.1462464609840002, + "learning_rate": 8.384720077827204e-06, + "loss": 0.25303804874420166, + "step": 4325 + }, + { + "epoch": 1.1487186296640552, + "grad_norm": 1.0794728099252306, + "learning_rate": 8.380387127212858e-06, + "loss": 0.23481838405132294, + "step": 4326 + }, + { + "epoch": 1.1489841986455982, + "grad_norm": 1.1782142095551065, + "learning_rate": 8.376054488840771e-06, + "loss": 0.24842356145381927, + "step": 4327 + }, + { + "epoch": 1.1492497676271411, + "grad_norm": 1.136832039914945, + "learning_rate": 8.37172216354623e-06, + "loss": 0.23927366733551025, + "step": 4328 + }, + { + "epoch": 1.149515336608684, + "grad_norm": 1.1577812724546028, + "learning_rate": 8.367390152164448e-06, + "loss": 0.23836453258991241, + "step": 4329 + }, + { + "epoch": 1.149780905590227, + "grad_norm": 1.2492179140984832, + "learning_rate": 8.36305845553059e-06, + "loss": 0.2562161982059479, + "step": 4330 + }, + { + "epoch": 1.15004647457177, + "grad_norm": 1.120151700121908, + "learning_rate": 8.358727074479755e-06, + "loss": 0.21255920827388763, + "step": 4331 + }, + { + "epoch": 1.150312043553313, + "grad_norm": 1.1011600870179878, + "learning_rate": 8.354396009846985e-06, + "loss": 0.24200043082237244, + "step": 4332 + }, + { + "epoch": 1.150577612534856, + "grad_norm": 1.1644551235897023, + "learning_rate": 8.35006526246725e-06, + "loss": 0.23582379519939423, + "step": 4333 + }, + { + "epoch": 1.1508431815163989, + "grad_norm": 1.093546349726341, + "learning_rate": 8.34573483317548e-06, + "loss": 0.21554499864578247, + "step": 4334 + }, + { + "epoch": 1.1511087504979418, + "grad_norm": 1.2460346716976907, + "learning_rate": 8.341404722806525e-06, + "loss": 0.2789759039878845, + "step": 4335 + }, + { + "epoch": 1.1513743194794848, + "grad_norm": 1.212813860768853, + "learning_rate": 8.337074932195175e-06, + "loss": 0.24677832424640656, + "step": 4336 + }, + { + "epoch": 1.1516398884610277, + "grad_norm": 1.2351497128261646, + "learning_rate": 8.332745462176166e-06, + "loss": 0.28122392296791077, + "step": 4337 + }, + { + "epoch": 1.1519054574425707, + "grad_norm": 1.2447069177647443, + "learning_rate": 8.328416313584169e-06, + "loss": 0.23219403624534607, + "step": 4338 + }, + { + "epoch": 1.1521710264241136, + "grad_norm": 1.1258797089625292, + "learning_rate": 8.324087487253792e-06, + "loss": 0.19928379356861115, + "step": 4339 + }, + { + "epoch": 1.1524365954056566, + "grad_norm": 1.2737910298174706, + "learning_rate": 8.31975898401958e-06, + "loss": 0.27730467915534973, + "step": 4340 + }, + { + "epoch": 1.1527021643871995, + "grad_norm": 1.3906235348842741, + "learning_rate": 8.315430804716022e-06, + "loss": 0.25462737679481506, + "step": 4341 + }, + { + "epoch": 1.1529677333687425, + "grad_norm": 1.1703737499238527, + "learning_rate": 8.311102950177533e-06, + "loss": 0.2363007366657257, + "step": 4342 + }, + { + "epoch": 1.1532333023502854, + "grad_norm": 1.2498285131266695, + "learning_rate": 8.306775421238482e-06, + "loss": 0.2648352384567261, + "step": 4343 + }, + { + "epoch": 1.1534988713318284, + "grad_norm": 1.394847110607811, + "learning_rate": 8.302448218733158e-06, + "loss": 0.25645309686660767, + "step": 4344 + }, + { + "epoch": 1.1537644403133713, + "grad_norm": 1.2178564426244172, + "learning_rate": 8.298121343495797e-06, + "loss": 0.22962522506713867, + "step": 4345 + }, + { + "epoch": 1.1540300092949143, + "grad_norm": 1.132403649349265, + "learning_rate": 8.293794796360569e-06, + "loss": 0.21269623935222626, + "step": 4346 + }, + { + "epoch": 1.1542955782764572, + "grad_norm": 1.1646919704485588, + "learning_rate": 8.289468578161581e-06, + "loss": 0.2518436014652252, + "step": 4347 + }, + { + "epoch": 1.1545611472580002, + "grad_norm": 1.193830808481187, + "learning_rate": 8.285142689732877e-06, + "loss": 0.2318439483642578, + "step": 4348 + }, + { + "epoch": 1.1548267162395431, + "grad_norm": 1.0953821300718658, + "learning_rate": 8.280817131908438e-06, + "loss": 0.2278512567281723, + "step": 4349 + }, + { + "epoch": 1.155092285221086, + "grad_norm": 1.3446091578493078, + "learning_rate": 8.27649190552218e-06, + "loss": 0.2521114945411682, + "step": 4350 + }, + { + "epoch": 1.155357854202629, + "grad_norm": 1.1722019112748296, + "learning_rate": 8.272167011407955e-06, + "loss": 0.2565760016441345, + "step": 4351 + }, + { + "epoch": 1.155623423184172, + "grad_norm": 1.3209067321897832, + "learning_rate": 8.267842450399552e-06, + "loss": 0.2603546679019928, + "step": 4352 + }, + { + "epoch": 1.155888992165715, + "grad_norm": 1.1697050726438265, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2175855189561844, + "step": 4353 + }, + { + "epoch": 1.156154561147258, + "grad_norm": 1.1937135661774867, + "learning_rate": 8.25919433103505e-06, + "loss": 0.24521774053573608, + "step": 4354 + }, + { + "epoch": 1.1564201301288008, + "grad_norm": 1.3267445452853517, + "learning_rate": 8.254870774346203e-06, + "loss": 0.29673823714256287, + "step": 4355 + }, + { + "epoch": 1.1566856991103438, + "grad_norm": 1.260162624950344, + "learning_rate": 8.25054755409769e-06, + "loss": 0.26994144916534424, + "step": 4356 + }, + { + "epoch": 1.1569512680918868, + "grad_norm": 1.1578908727655277, + "learning_rate": 8.246224671122974e-06, + "loss": 0.2545935809612274, + "step": 4357 + }, + { + "epoch": 1.1572168370734297, + "grad_norm": 1.1469888258961152, + "learning_rate": 8.241902126255458e-06, + "loss": 0.23589034378528595, + "step": 4358 + }, + { + "epoch": 1.1574824060549727, + "grad_norm": 1.229284708155894, + "learning_rate": 8.237579920328478e-06, + "loss": 0.2617190480232239, + "step": 4359 + }, + { + "epoch": 1.1577479750365158, + "grad_norm": 1.2741716320060574, + "learning_rate": 8.233258054175302e-06, + "loss": 0.3092418313026428, + "step": 4360 + }, + { + "epoch": 1.1580135440180588, + "grad_norm": 1.1377305602079475, + "learning_rate": 8.228936528629138e-06, + "loss": 0.22873908281326294, + "step": 4361 + }, + { + "epoch": 1.1582791129996017, + "grad_norm": 1.0592847205754, + "learning_rate": 8.224615344523123e-06, + "loss": 0.22549089789390564, + "step": 4362 + }, + { + "epoch": 1.1585446819811447, + "grad_norm": 1.0288617285826194, + "learning_rate": 8.22029450269033e-06, + "loss": 0.19141459465026855, + "step": 4363 + }, + { + "epoch": 1.1588102509626876, + "grad_norm": 1.1679333849265336, + "learning_rate": 8.21597400396377e-06, + "loss": 0.24277547001838684, + "step": 4364 + }, + { + "epoch": 1.1590758199442306, + "grad_norm": 1.1463053400858605, + "learning_rate": 8.21165384917638e-06, + "loss": 0.2429513931274414, + "step": 4365 + }, + { + "epoch": 1.1593413889257735, + "grad_norm": 1.0775583631999657, + "learning_rate": 8.207334039161035e-06, + "loss": 0.24710172414779663, + "step": 4366 + }, + { + "epoch": 1.1596069579073165, + "grad_norm": 1.1226530732908067, + "learning_rate": 8.203014574750546e-06, + "loss": 0.2553783357143402, + "step": 4367 + }, + { + "epoch": 1.1598725268888594, + "grad_norm": 1.1664625510577165, + "learning_rate": 8.198695456777653e-06, + "loss": 0.2558436095714569, + "step": 4368 + }, + { + "epoch": 1.1601380958704024, + "grad_norm": 1.093371491828669, + "learning_rate": 8.19437668607503e-06, + "loss": 0.20780377089977264, + "step": 4369 + }, + { + "epoch": 1.1604036648519453, + "grad_norm": 1.0184271240235683, + "learning_rate": 8.190058263475288e-06, + "loss": 0.22397254407405853, + "step": 4370 + }, + { + "epoch": 1.1606692338334883, + "grad_norm": 1.1123966470918765, + "learning_rate": 8.185740189810967e-06, + "loss": 0.2763773798942566, + "step": 4371 + }, + { + "epoch": 1.1609348028150313, + "grad_norm": 1.234569017856286, + "learning_rate": 8.181422465914541e-06, + "loss": 0.2801940441131592, + "step": 4372 + }, + { + "epoch": 1.1612003717965742, + "grad_norm": 1.3078225086374202, + "learning_rate": 8.177105092618413e-06, + "loss": 0.20949441194534302, + "step": 4373 + }, + { + "epoch": 1.1614659407781172, + "grad_norm": 1.020800458401727, + "learning_rate": 8.172788070754927e-06, + "loss": 0.24503354728221893, + "step": 4374 + }, + { + "epoch": 1.16173150975966, + "grad_norm": 1.212252624187319, + "learning_rate": 8.16847140115635e-06, + "loss": 0.256147563457489, + "step": 4375 + }, + { + "epoch": 1.161997078741203, + "grad_norm": 1.079933692504349, + "learning_rate": 8.164155084654886e-06, + "loss": 0.2178848683834076, + "step": 4376 + }, + { + "epoch": 1.162262647722746, + "grad_norm": 1.0121292441974634, + "learning_rate": 8.159839122082668e-06, + "loss": 0.22624582052230835, + "step": 4377 + }, + { + "epoch": 1.162528216704289, + "grad_norm": 1.0294597777179986, + "learning_rate": 8.155523514271764e-06, + "loss": 0.2184191346168518, + "step": 4378 + }, + { + "epoch": 1.162793785685832, + "grad_norm": 1.2825595051682412, + "learning_rate": 8.151208262054175e-06, + "loss": 0.2623840868473053, + "step": 4379 + }, + { + "epoch": 1.1630593546673749, + "grad_norm": 1.2529929341607686, + "learning_rate": 8.14689336626183e-06, + "loss": 0.27181199193000793, + "step": 4380 + }, + { + "epoch": 1.1633249236489178, + "grad_norm": 1.282994089786083, + "learning_rate": 8.142578827726587e-06, + "loss": 0.2791554629802704, + "step": 4381 + }, + { + "epoch": 1.1635904926304608, + "grad_norm": 1.221608581014812, + "learning_rate": 8.13826464728024e-06, + "loss": 0.2466641068458557, + "step": 4382 + }, + { + "epoch": 1.1638560616120037, + "grad_norm": 0.9724735599541757, + "learning_rate": 8.133950825754511e-06, + "loss": 0.1951724737882614, + "step": 4383 + }, + { + "epoch": 1.1641216305935467, + "grad_norm": 1.2462068833977051, + "learning_rate": 8.129637363981056e-06, + "loss": 0.2520062029361725, + "step": 4384 + }, + { + "epoch": 1.1643871995750896, + "grad_norm": 1.230128345167748, + "learning_rate": 8.12532426279146e-06, + "loss": 0.24101334810256958, + "step": 4385 + }, + { + "epoch": 1.1646527685566326, + "grad_norm": 1.244671245504639, + "learning_rate": 8.121011523017235e-06, + "loss": 0.2741190791130066, + "step": 4386 + }, + { + "epoch": 1.1649183375381755, + "grad_norm": 1.1570746383559662, + "learning_rate": 8.116699145489822e-06, + "loss": 0.2575281858444214, + "step": 4387 + }, + { + "epoch": 1.1651839065197185, + "grad_norm": 1.157233381368316, + "learning_rate": 8.112387131040608e-06, + "loss": 0.2557298243045807, + "step": 4388 + }, + { + "epoch": 1.1654494755012614, + "grad_norm": 1.2560692108341776, + "learning_rate": 8.108075480500892e-06, + "loss": 0.27485036849975586, + "step": 4389 + }, + { + "epoch": 1.1657150444828044, + "grad_norm": 1.2517544472207511, + "learning_rate": 8.103764194701909e-06, + "loss": 0.26458340883255005, + "step": 4390 + }, + { + "epoch": 1.1659806134643473, + "grad_norm": 1.2310585386329624, + "learning_rate": 8.099453274474827e-06, + "loss": 0.2281840592622757, + "step": 4391 + }, + { + "epoch": 1.1662461824458903, + "grad_norm": 1.2367230880082285, + "learning_rate": 8.095142720650739e-06, + "loss": 0.24956555664539337, + "step": 4392 + }, + { + "epoch": 1.1665117514274332, + "grad_norm": 1.109202461245095, + "learning_rate": 8.090832534060671e-06, + "loss": 0.22619420289993286, + "step": 4393 + }, + { + "epoch": 1.1667773204089762, + "grad_norm": 1.2922206575995636, + "learning_rate": 8.086522715535571e-06, + "loss": 0.2780688405036926, + "step": 4394 + }, + { + "epoch": 1.1670428893905191, + "grad_norm": 1.2699378735794575, + "learning_rate": 8.082213265906323e-06, + "loss": 0.2600886821746826, + "step": 4395 + }, + { + "epoch": 1.167308458372062, + "grad_norm": 1.244234758234162, + "learning_rate": 8.077904186003736e-06, + "loss": 0.25049078464508057, + "step": 4396 + }, + { + "epoch": 1.167574027353605, + "grad_norm": 1.2327544821473595, + "learning_rate": 8.073595476658558e-06, + "loss": 0.27745798230171204, + "step": 4397 + }, + { + "epoch": 1.167839596335148, + "grad_norm": 1.1682547274263488, + "learning_rate": 8.069287138701452e-06, + "loss": 0.2191929668188095, + "step": 4398 + }, + { + "epoch": 1.168105165316691, + "grad_norm": 1.297306908163856, + "learning_rate": 8.064979172963014e-06, + "loss": 0.24307313561439514, + "step": 4399 + }, + { + "epoch": 1.168370734298234, + "grad_norm": 1.1837345133145987, + "learning_rate": 8.060671580273772e-06, + "loss": 0.23036238551139832, + "step": 4400 + }, + { + "epoch": 1.1686363032797769, + "grad_norm": 1.096627050675377, + "learning_rate": 8.056364361464176e-06, + "loss": 0.2394433617591858, + "step": 4401 + }, + { + "epoch": 1.1689018722613198, + "grad_norm": 1.183557399538609, + "learning_rate": 8.052057517364608e-06, + "loss": 0.24099211394786835, + "step": 4402 + }, + { + "epoch": 1.1691674412428628, + "grad_norm": 1.1293667282926971, + "learning_rate": 8.047751048805376e-06, + "loss": 0.22036173939704895, + "step": 4403 + }, + { + "epoch": 1.1694330102244057, + "grad_norm": 1.185484128157471, + "learning_rate": 8.043444956616717e-06, + "loss": 0.22400429844856262, + "step": 4404 + }, + { + "epoch": 1.1696985792059487, + "grad_norm": 1.0594769241160498, + "learning_rate": 8.039139241628792e-06, + "loss": 0.21649131178855896, + "step": 4405 + }, + { + "epoch": 1.1699641481874916, + "grad_norm": 1.150957898906185, + "learning_rate": 8.034833904671698e-06, + "loss": 0.23412205278873444, + "step": 4406 + }, + { + "epoch": 1.1702297171690346, + "grad_norm": 1.2025485392569255, + "learning_rate": 8.030528946575453e-06, + "loss": 0.23822304606437683, + "step": 4407 + }, + { + "epoch": 1.1704952861505775, + "grad_norm": 1.2929661052617345, + "learning_rate": 8.026224368169998e-06, + "loss": 0.29250186681747437, + "step": 4408 + }, + { + "epoch": 1.1707608551321207, + "grad_norm": 1.4098437716027425, + "learning_rate": 8.021920170285205e-06, + "loss": 0.26794207096099854, + "step": 4409 + }, + { + "epoch": 1.1710264241136636, + "grad_norm": 1.2469013694849018, + "learning_rate": 8.017616353750874e-06, + "loss": 0.2573787271976471, + "step": 4410 + }, + { + "epoch": 1.1712919930952066, + "grad_norm": 1.1835378975512396, + "learning_rate": 8.01331291939673e-06, + "loss": 0.2744356691837311, + "step": 4411 + }, + { + "epoch": 1.1715575620767495, + "grad_norm": 1.4542599881672131, + "learning_rate": 8.009009868052424e-06, + "loss": 0.2582886815071106, + "step": 4412 + }, + { + "epoch": 1.1718231310582925, + "grad_norm": 1.1766031171819216, + "learning_rate": 8.004707200547534e-06, + "loss": 0.2553568482398987, + "step": 4413 + }, + { + "epoch": 1.1720887000398355, + "grad_norm": 1.144579662849428, + "learning_rate": 8.00040491771156e-06, + "loss": 0.2670289874076843, + "step": 4414 + }, + { + "epoch": 1.1723542690213784, + "grad_norm": 1.1520006084984327, + "learning_rate": 7.99610302037394e-06, + "loss": 0.215460866689682, + "step": 4415 + }, + { + "epoch": 1.1726198380029214, + "grad_norm": 1.2764670908026035, + "learning_rate": 7.991801509364023e-06, + "loss": 0.26481571793556213, + "step": 4416 + }, + { + "epoch": 1.1728854069844643, + "grad_norm": 1.0239999030663398, + "learning_rate": 7.98750038551109e-06, + "loss": 0.2060776650905609, + "step": 4417 + }, + { + "epoch": 1.1731509759660073, + "grad_norm": 1.147707044406535, + "learning_rate": 7.983199649644349e-06, + "loss": 0.2401561588048935, + "step": 4418 + }, + { + "epoch": 1.1734165449475502, + "grad_norm": 1.3064882111410037, + "learning_rate": 7.978899302592927e-06, + "loss": 0.2545842230319977, + "step": 4419 + }, + { + "epoch": 1.1736821139290932, + "grad_norm": 1.199445262296627, + "learning_rate": 7.974599345185884e-06, + "loss": 0.29925093054771423, + "step": 4420 + }, + { + "epoch": 1.1739476829106361, + "grad_norm": 1.7583031900565322, + "learning_rate": 7.9702997782522e-06, + "loss": 0.23944757878780365, + "step": 4421 + }, + { + "epoch": 1.174213251892179, + "grad_norm": 1.057746400765015, + "learning_rate": 7.96600060262078e-06, + "loss": 0.23745761811733246, + "step": 4422 + }, + { + "epoch": 1.174478820873722, + "grad_norm": 1.1164780002442092, + "learning_rate": 7.961701819120453e-06, + "loss": 0.22170330584049225, + "step": 4423 + }, + { + "epoch": 1.174744389855265, + "grad_norm": 1.2607094160663312, + "learning_rate": 7.95740342857998e-06, + "loss": 0.2645890712738037, + "step": 4424 + }, + { + "epoch": 1.175009958836808, + "grad_norm": 1.2171129338535713, + "learning_rate": 7.953105431828032e-06, + "loss": 0.25232207775115967, + "step": 4425 + }, + { + "epoch": 1.1752755278183509, + "grad_norm": 1.20503293579659, + "learning_rate": 7.948807829693219e-06, + "loss": 0.2656644880771637, + "step": 4426 + }, + { + "epoch": 1.1755410967998938, + "grad_norm": 1.069230366230624, + "learning_rate": 7.944510623004063e-06, + "loss": 0.25290653109550476, + "step": 4427 + }, + { + "epoch": 1.1758066657814368, + "grad_norm": 1.1825821036814732, + "learning_rate": 7.940213812589018e-06, + "loss": 0.27464741468429565, + "step": 4428 + }, + { + "epoch": 1.1760722347629797, + "grad_norm": 1.4910942744639428, + "learning_rate": 7.935917399276455e-06, + "loss": 0.2562064528465271, + "step": 4429 + }, + { + "epoch": 1.1763378037445227, + "grad_norm": 1.2720371671465533, + "learning_rate": 7.931621383894676e-06, + "loss": 0.267793208360672, + "step": 4430 + }, + { + "epoch": 1.1766033727260656, + "grad_norm": 1.1490167098873316, + "learning_rate": 7.9273257672719e-06, + "loss": 0.23651085793972015, + "step": 4431 + }, + { + "epoch": 1.1768689417076086, + "grad_norm": 1.0804412076412697, + "learning_rate": 7.923030550236267e-06, + "loss": 0.23691008985042572, + "step": 4432 + }, + { + "epoch": 1.1771345106891515, + "grad_norm": 1.1540873295746452, + "learning_rate": 7.918735733615852e-06, + "loss": 0.24495704472064972, + "step": 4433 + }, + { + "epoch": 1.1774000796706945, + "grad_norm": 1.4423069413713672, + "learning_rate": 7.91444131823864e-06, + "loss": 0.25423017144203186, + "step": 4434 + }, + { + "epoch": 1.1776656486522374, + "grad_norm": 1.1113893983435537, + "learning_rate": 7.910147304932548e-06, + "loss": 0.22870390117168427, + "step": 4435 + }, + { + "epoch": 1.1779312176337804, + "grad_norm": 1.0473620824498977, + "learning_rate": 7.905853694525405e-06, + "loss": 0.23037508130073547, + "step": 4436 + }, + { + "epoch": 1.1781967866153233, + "grad_norm": 1.2886040363623328, + "learning_rate": 7.901560487844973e-06, + "loss": 0.31184864044189453, + "step": 4437 + }, + { + "epoch": 1.1784623555968663, + "grad_norm": 1.302197101799982, + "learning_rate": 7.89726768571893e-06, + "loss": 0.24140426516532898, + "step": 4438 + }, + { + "epoch": 1.1787279245784092, + "grad_norm": 1.2134032336682008, + "learning_rate": 7.892975288974877e-06, + "loss": 0.25602301955223083, + "step": 4439 + }, + { + "epoch": 1.1789934935599522, + "grad_norm": 1.1868063067331378, + "learning_rate": 7.888683298440339e-06, + "loss": 0.2717514932155609, + "step": 4440 + }, + { + "epoch": 1.1792590625414952, + "grad_norm": 1.1670818939848298, + "learning_rate": 7.884391714942757e-06, + "loss": 0.252475380897522, + "step": 4441 + }, + { + "epoch": 1.179524631523038, + "grad_norm": 1.161546405047816, + "learning_rate": 7.880100539309506e-06, + "loss": 0.24777942895889282, + "step": 4442 + }, + { + "epoch": 1.179790200504581, + "grad_norm": 1.194146333188245, + "learning_rate": 7.875809772367867e-06, + "loss": 0.25111010670661926, + "step": 4443 + }, + { + "epoch": 1.180055769486124, + "grad_norm": 1.163412583383914, + "learning_rate": 7.87151941494505e-06, + "loss": 0.26183217763900757, + "step": 4444 + }, + { + "epoch": 1.180321338467667, + "grad_norm": 1.2974065116766642, + "learning_rate": 7.867229467868189e-06, + "loss": 0.27538490295410156, + "step": 4445 + }, + { + "epoch": 1.18058690744921, + "grad_norm": 1.078206017492716, + "learning_rate": 7.862939931964333e-06, + "loss": 0.2192106693983078, + "step": 4446 + }, + { + "epoch": 1.1808524764307529, + "grad_norm": 1.2415747879020278, + "learning_rate": 7.858650808060453e-06, + "loss": 0.26506057381629944, + "step": 4447 + }, + { + "epoch": 1.1811180454122958, + "grad_norm": 1.103375758703505, + "learning_rate": 7.854362096983443e-06, + "loss": 0.2345719337463379, + "step": 4448 + }, + { + "epoch": 1.1813836143938388, + "grad_norm": 1.1651284585435833, + "learning_rate": 7.850073799560114e-06, + "loss": 0.21404311060905457, + "step": 4449 + }, + { + "epoch": 1.1816491833753817, + "grad_norm": 1.1572235550991925, + "learning_rate": 7.8457859166172e-06, + "loss": 0.24332138895988464, + "step": 4450 + }, + { + "epoch": 1.1819147523569247, + "grad_norm": 1.1687901862394692, + "learning_rate": 7.841498448981354e-06, + "loss": 0.25025150179862976, + "step": 4451 + }, + { + "epoch": 1.1821803213384676, + "grad_norm": 1.167419454587793, + "learning_rate": 7.837211397479152e-06, + "loss": 0.21918940544128418, + "step": 4452 + }, + { + "epoch": 1.1824458903200106, + "grad_norm": 1.1517463754639392, + "learning_rate": 7.832924762937083e-06, + "loss": 0.24976079165935516, + "step": 4453 + }, + { + "epoch": 1.1827114593015535, + "grad_norm": 1.1165052000707918, + "learning_rate": 7.828638546181565e-06, + "loss": 0.21146243810653687, + "step": 4454 + }, + { + "epoch": 1.1829770282830965, + "grad_norm": 1.1110608449393633, + "learning_rate": 7.824352748038924e-06, + "loss": 0.22921445965766907, + "step": 4455 + }, + { + "epoch": 1.1832425972646394, + "grad_norm": 1.1833669908026252, + "learning_rate": 7.820067369335413e-06, + "loss": 0.24401478469371796, + "step": 4456 + }, + { + "epoch": 1.1835081662461824, + "grad_norm": 1.2543977272663969, + "learning_rate": 7.815782410897209e-06, + "loss": 0.2717207074165344, + "step": 4457 + }, + { + "epoch": 1.1837737352277253, + "grad_norm": 1.0934075655453726, + "learning_rate": 7.81149787355039e-06, + "loss": 0.20752058923244476, + "step": 4458 + }, + { + "epoch": 1.1840393042092683, + "grad_norm": 1.3448722481333402, + "learning_rate": 7.807213758120965e-06, + "loss": 0.31095850467681885, + "step": 4459 + }, + { + "epoch": 1.1843048731908112, + "grad_norm": 1.1769654791590503, + "learning_rate": 7.802930065434874e-06, + "loss": 0.23761102557182312, + "step": 4460 + }, + { + "epoch": 1.1845704421723542, + "grad_norm": 1.3225327364557968, + "learning_rate": 7.798646796317952e-06, + "loss": 0.2509460151195526, + "step": 4461 + }, + { + "epoch": 1.1848360111538971, + "grad_norm": 1.472525937697874, + "learning_rate": 7.794363951595966e-06, + "loss": 0.25903213024139404, + "step": 4462 + }, + { + "epoch": 1.18510158013544, + "grad_norm": 1.1904413554334654, + "learning_rate": 7.790081532094596e-06, + "loss": 0.23304736614227295, + "step": 4463 + }, + { + "epoch": 1.185367149116983, + "grad_norm": 1.311875765456408, + "learning_rate": 7.785799538639445e-06, + "loss": 0.28707265853881836, + "step": 4464 + }, + { + "epoch": 1.185632718098526, + "grad_norm": 1.0202920254712324, + "learning_rate": 7.781517972056028e-06, + "loss": 0.20282745361328125, + "step": 4465 + }, + { + "epoch": 1.185898287080069, + "grad_norm": 1.2606153791729335, + "learning_rate": 7.777236833169782e-06, + "loss": 0.24056631326675415, + "step": 4466 + }, + { + "epoch": 1.186163856061612, + "grad_norm": 1.4946194524955894, + "learning_rate": 7.772956122806058e-06, + "loss": 0.2677255868911743, + "step": 4467 + }, + { + "epoch": 1.1864294250431549, + "grad_norm": 1.2681064192856966, + "learning_rate": 7.768675841790124e-06, + "loss": 0.22032876312732697, + "step": 4468 + }, + { + "epoch": 1.1866949940246978, + "grad_norm": 1.3138325978828467, + "learning_rate": 7.764395990947177e-06, + "loss": 0.2980336546897888, + "step": 4469 + }, + { + "epoch": 1.1869605630062408, + "grad_norm": 1.2624280680532078, + "learning_rate": 7.760116571102314e-06, + "loss": 0.2562638521194458, + "step": 4470 + }, + { + "epoch": 1.1872261319877837, + "grad_norm": 1.2207997545500016, + "learning_rate": 7.755837583080561e-06, + "loss": 0.262576699256897, + "step": 4471 + }, + { + "epoch": 1.1874917009693267, + "grad_norm": 1.2672893771429377, + "learning_rate": 7.751559027706858e-06, + "loss": 0.2654029130935669, + "step": 4472 + }, + { + "epoch": 1.1877572699508698, + "grad_norm": 1.2996444615622489, + "learning_rate": 7.747280905806051e-06, + "loss": 0.2946662902832031, + "step": 4473 + }, + { + "epoch": 1.1880228389324128, + "grad_norm": 1.193974235945654, + "learning_rate": 7.743003218202921e-06, + "loss": 0.25140905380249023, + "step": 4474 + }, + { + "epoch": 1.1882884079139557, + "grad_norm": 1.2240016583398612, + "learning_rate": 7.738725965722149e-06, + "loss": 0.2601654529571533, + "step": 4475 + }, + { + "epoch": 1.1885539768954987, + "grad_norm": 1.9675422662507516, + "learning_rate": 7.73444914918834e-06, + "loss": 0.2639954090118408, + "step": 4476 + }, + { + "epoch": 1.1888195458770416, + "grad_norm": 1.174151986382161, + "learning_rate": 7.730172769426014e-06, + "loss": 0.23391291499137878, + "step": 4477 + }, + { + "epoch": 1.1890851148585846, + "grad_norm": 2.254589386622623, + "learning_rate": 7.725896827259613e-06, + "loss": 0.2912144958972931, + "step": 4478 + }, + { + "epoch": 1.1893506838401275, + "grad_norm": 1.0905445077469016, + "learning_rate": 7.72162132351348e-06, + "loss": 0.23867549002170563, + "step": 4479 + }, + { + "epoch": 1.1896162528216705, + "grad_norm": 1.1124853975848743, + "learning_rate": 7.717346259011888e-06, + "loss": 0.22434742748737335, + "step": 4480 + }, + { + "epoch": 1.1898818218032134, + "grad_norm": 1.2440839352544732, + "learning_rate": 7.713071634579017e-06, + "loss": 0.2504398822784424, + "step": 4481 + }, + { + "epoch": 1.1901473907847564, + "grad_norm": 1.1759629506533034, + "learning_rate": 7.70879745103896e-06, + "loss": 0.24887195229530334, + "step": 4482 + }, + { + "epoch": 1.1904129597662994, + "grad_norm": 1.2603454999195398, + "learning_rate": 7.704523709215732e-06, + "loss": 0.2730141580104828, + "step": 4483 + }, + { + "epoch": 1.1906785287478423, + "grad_norm": 1.2285382464481551, + "learning_rate": 7.70025040993326e-06, + "loss": 0.22197315096855164, + "step": 4484 + }, + { + "epoch": 1.1909440977293853, + "grad_norm": 1.2004564929121084, + "learning_rate": 7.695977554015387e-06, + "loss": 0.2852731943130493, + "step": 4485 + }, + { + "epoch": 1.1912096667109282, + "grad_norm": 1.2815387200597224, + "learning_rate": 7.691705142285863e-06, + "loss": 0.2577238976955414, + "step": 4486 + }, + { + "epoch": 1.1914752356924712, + "grad_norm": 1.066499567502605, + "learning_rate": 7.68743317556837e-06, + "loss": 0.23510503768920898, + "step": 4487 + }, + { + "epoch": 1.191740804674014, + "grad_norm": 1.557745891642732, + "learning_rate": 7.683161654686486e-06, + "loss": 0.2553985118865967, + "step": 4488 + }, + { + "epoch": 1.192006373655557, + "grad_norm": 1.1965147913981737, + "learning_rate": 7.67889058046371e-06, + "loss": 0.2778642475605011, + "step": 4489 + }, + { + "epoch": 1.1922719426371, + "grad_norm": 1.1622951487110165, + "learning_rate": 7.674619953723455e-06, + "loss": 0.24740618467330933, + "step": 4490 + }, + { + "epoch": 1.192537511618643, + "grad_norm": 1.1598996003550786, + "learning_rate": 7.670349775289047e-06, + "loss": 0.2453901171684265, + "step": 4491 + }, + { + "epoch": 1.192803080600186, + "grad_norm": 1.1444233008842855, + "learning_rate": 7.666080045983726e-06, + "loss": 0.2336064875125885, + "step": 4492 + }, + { + "epoch": 1.1930686495817289, + "grad_norm": 1.18047841753512, + "learning_rate": 7.661810766630648e-06, + "loss": 0.2375800907611847, + "step": 4493 + }, + { + "epoch": 1.1933342185632718, + "grad_norm": 1.1241813274405275, + "learning_rate": 7.657541938052876e-06, + "loss": 0.21272733807563782, + "step": 4494 + }, + { + "epoch": 1.1935997875448148, + "grad_norm": 1.1531042348696576, + "learning_rate": 7.65327356107339e-06, + "loss": 0.26597708463668823, + "step": 4495 + }, + { + "epoch": 1.1938653565263577, + "grad_norm": 1.1715955143508257, + "learning_rate": 7.649005636515088e-06, + "loss": 0.267806738615036, + "step": 4496 + }, + { + "epoch": 1.1941309255079007, + "grad_norm": 1.1812545197713797, + "learning_rate": 7.64473816520077e-06, + "loss": 0.2260194569826126, + "step": 4497 + }, + { + "epoch": 1.1943964944894436, + "grad_norm": 1.298416110387325, + "learning_rate": 7.640471147953157e-06, + "loss": 0.24523532390594482, + "step": 4498 + }, + { + "epoch": 1.1946620634709866, + "grad_norm": 1.1020194586485352, + "learning_rate": 7.636204585594879e-06, + "loss": 0.23230910301208496, + "step": 4499 + }, + { + "epoch": 1.1949276324525295, + "grad_norm": 1.1141631171804318, + "learning_rate": 7.631938478948478e-06, + "loss": 0.23322705924510956, + "step": 4500 + }, + { + "epoch": 1.1951932014340725, + "grad_norm": 1.3011711597097497, + "learning_rate": 7.6276728288364086e-06, + "loss": 0.25614386796951294, + "step": 4501 + }, + { + "epoch": 1.1954587704156154, + "grad_norm": 1.2188058731839337, + "learning_rate": 7.62340763608104e-06, + "loss": 0.22921821475028992, + "step": 4502 + }, + { + "epoch": 1.1957243393971584, + "grad_norm": 1.1538976889459698, + "learning_rate": 7.619142901504649e-06, + "loss": 0.25528913736343384, + "step": 4503 + }, + { + "epoch": 1.1959899083787013, + "grad_norm": 1.1730292690453887, + "learning_rate": 7.614878625929425e-06, + "loss": 0.2528502643108368, + "step": 4504 + }, + { + "epoch": 1.1962554773602443, + "grad_norm": 1.2636827238002009, + "learning_rate": 7.610614810177474e-06, + "loss": 0.2519027590751648, + "step": 4505 + }, + { + "epoch": 1.1965210463417872, + "grad_norm": 1.3563109831905724, + "learning_rate": 7.606351455070808e-06, + "loss": 0.2895655333995819, + "step": 4506 + }, + { + "epoch": 1.1967866153233302, + "grad_norm": 1.2317858842714817, + "learning_rate": 7.6020885614313515e-06, + "loss": 0.24588793516159058, + "step": 4507 + }, + { + "epoch": 1.1970521843048731, + "grad_norm": 1.3148149004868621, + "learning_rate": 7.597826130080938e-06, + "loss": 0.2996830940246582, + "step": 4508 + }, + { + "epoch": 1.197317753286416, + "grad_norm": 1.2289139982746875, + "learning_rate": 7.593564161841318e-06, + "loss": 0.2654343247413635, + "step": 4509 + }, + { + "epoch": 1.197583322267959, + "grad_norm": 1.2104660234722762, + "learning_rate": 7.589302657534144e-06, + "loss": 0.24949109554290771, + "step": 4510 + }, + { + "epoch": 1.197848891249502, + "grad_norm": 1.1785955409512114, + "learning_rate": 7.5850416179809886e-06, + "loss": 0.23205731809139252, + "step": 4511 + }, + { + "epoch": 1.198114460231045, + "grad_norm": 3.351023225066079, + "learning_rate": 7.580781044003324e-06, + "loss": 0.232904314994812, + "step": 4512 + }, + { + "epoch": 1.198380029212588, + "grad_norm": 1.0569352775404934, + "learning_rate": 7.576520936422542e-06, + "loss": 0.25071364641189575, + "step": 4513 + }, + { + "epoch": 1.1986455981941309, + "grad_norm": 1.3613643273685416, + "learning_rate": 7.572261296059944e-06, + "loss": 0.2574467658996582, + "step": 4514 + }, + { + "epoch": 1.1989111671756738, + "grad_norm": 1.1866331959407248, + "learning_rate": 7.568002123736735e-06, + "loss": 0.23134055733680725, + "step": 4515 + }, + { + "epoch": 1.1991767361572168, + "grad_norm": 1.093870770411857, + "learning_rate": 7.5637434202740334e-06, + "loss": 0.22163332998752594, + "step": 4516 + }, + { + "epoch": 1.1994423051387597, + "grad_norm": 1.182308432196374, + "learning_rate": 7.559485186492868e-06, + "loss": 0.2665749788284302, + "step": 4517 + }, + { + "epoch": 1.1997078741203027, + "grad_norm": 1.0758759053634162, + "learning_rate": 7.555227423214174e-06, + "loss": 0.2237103432416916, + "step": 4518 + }, + { + "epoch": 1.1999734431018456, + "grad_norm": 1.2216323349035507, + "learning_rate": 7.550970131258801e-06, + "loss": 0.23287461698055267, + "step": 4519 + }, + { + "epoch": 1.2002390120833886, + "grad_norm": 1.1237156855078405, + "learning_rate": 7.5467133114475025e-06, + "loss": 0.2296323925256729, + "step": 4520 + }, + { + "epoch": 1.2005045810649315, + "grad_norm": 1.0900498705064874, + "learning_rate": 7.542456964600944e-06, + "loss": 0.21358339488506317, + "step": 4521 + }, + { + "epoch": 1.2007701500464747, + "grad_norm": 1.2516498821908515, + "learning_rate": 7.5382010915396954e-06, + "loss": 0.2355872094631195, + "step": 4522 + }, + { + "epoch": 1.2010357190280176, + "grad_norm": 1.2039029354448443, + "learning_rate": 7.5339456930842455e-06, + "loss": 0.25397661328315735, + "step": 4523 + }, + { + "epoch": 1.2013012880095606, + "grad_norm": 1.1762399479435963, + "learning_rate": 7.52969077005498e-06, + "loss": 0.26658257842063904, + "step": 4524 + }, + { + "epoch": 1.2015668569911035, + "grad_norm": 1.1889790145170218, + "learning_rate": 7.525436323272201e-06, + "loss": 0.27207136154174805, + "step": 4525 + }, + { + "epoch": 1.2018324259726465, + "grad_norm": 1.1867510172835751, + "learning_rate": 7.521182353556114e-06, + "loss": 0.25889313220977783, + "step": 4526 + }, + { + "epoch": 1.2020979949541895, + "grad_norm": 1.3095753328357655, + "learning_rate": 7.516928861726834e-06, + "loss": 0.272185742855072, + "step": 4527 + }, + { + "epoch": 1.2023635639357324, + "grad_norm": 1.156226984644319, + "learning_rate": 7.512675848604385e-06, + "loss": 0.25371503829956055, + "step": 4528 + }, + { + "epoch": 1.2026291329172754, + "grad_norm": 1.2028831911106082, + "learning_rate": 7.5084233150086964e-06, + "loss": 0.2554902732372284, + "step": 4529 + }, + { + "epoch": 1.2028947018988183, + "grad_norm": 1.1714528701705076, + "learning_rate": 7.50417126175961e-06, + "loss": 0.22007369995117188, + "step": 4530 + }, + { + "epoch": 1.2031602708803613, + "grad_norm": 1.2057968317835202, + "learning_rate": 7.499919689676861e-06, + "loss": 0.27492445707321167, + "step": 4531 + }, + { + "epoch": 1.2034258398619042, + "grad_norm": 1.1229280499713745, + "learning_rate": 7.4956685995801144e-06, + "loss": 0.2321021854877472, + "step": 4532 + }, + { + "epoch": 1.2036914088434472, + "grad_norm": 1.1735641467762012, + "learning_rate": 7.491417992288927e-06, + "loss": 0.25410759449005127, + "step": 4533 + }, + { + "epoch": 1.2039569778249901, + "grad_norm": 1.0638924164212193, + "learning_rate": 7.487167868622765e-06, + "loss": 0.2080576866865158, + "step": 4534 + }, + { + "epoch": 1.204222546806533, + "grad_norm": 1.115815492341061, + "learning_rate": 7.482918229401001e-06, + "loss": 0.2333327978849411, + "step": 4535 + }, + { + "epoch": 1.204488115788076, + "grad_norm": 1.1999209092526242, + "learning_rate": 7.478669075442917e-06, + "loss": 0.23160479962825775, + "step": 4536 + }, + { + "epoch": 1.204753684769619, + "grad_norm": 1.2136747509439494, + "learning_rate": 7.474420407567699e-06, + "loss": 0.2627696394920349, + "step": 4537 + }, + { + "epoch": 1.205019253751162, + "grad_norm": 1.0694648198090266, + "learning_rate": 7.470172226594441e-06, + "loss": 0.18656940758228302, + "step": 4538 + }, + { + "epoch": 1.2052848227327049, + "grad_norm": 1.2245138263513848, + "learning_rate": 7.465924533342139e-06, + "loss": 0.2749083340167999, + "step": 4539 + }, + { + "epoch": 1.2055503917142478, + "grad_norm": 1.3944907322006155, + "learning_rate": 7.461677328629696e-06, + "loss": 0.27484387159347534, + "step": 4540 + }, + { + "epoch": 1.2058159606957908, + "grad_norm": 1.254197138569937, + "learning_rate": 7.457430613275934e-06, + "loss": 0.26357588171958923, + "step": 4541 + }, + { + "epoch": 1.2060815296773337, + "grad_norm": 1.2004336778554112, + "learning_rate": 7.453184388099559e-06, + "loss": 0.23495343327522278, + "step": 4542 + }, + { + "epoch": 1.2063470986588767, + "grad_norm": 1.2123259782755003, + "learning_rate": 7.4489386539192e-06, + "loss": 0.253970205783844, + "step": 4543 + }, + { + "epoch": 1.2066126676404196, + "grad_norm": 1.1523820852778563, + "learning_rate": 7.444693411553383e-06, + "loss": 0.24919062852859497, + "step": 4544 + }, + { + "epoch": 1.2068782366219626, + "grad_norm": 1.2181666045865969, + "learning_rate": 7.440448661820536e-06, + "loss": 0.24373450875282288, + "step": 4545 + }, + { + "epoch": 1.2071438056035055, + "grad_norm": 1.3762501451890354, + "learning_rate": 7.436204405539002e-06, + "loss": 0.24739482998847961, + "step": 4546 + }, + { + "epoch": 1.2074093745850485, + "grad_norm": 1.2982074074943253, + "learning_rate": 7.4319606435270195e-06, + "loss": 0.27041494846343994, + "step": 4547 + }, + { + "epoch": 1.2076749435665914, + "grad_norm": 1.1359942984852744, + "learning_rate": 7.427717376602739e-06, + "loss": 0.23243938386440277, + "step": 4548 + }, + { + "epoch": 1.2079405125481344, + "grad_norm": 1.3118758722508392, + "learning_rate": 7.423474605584206e-06, + "loss": 0.2346343696117401, + "step": 4549 + }, + { + "epoch": 1.2082060815296773, + "grad_norm": 1.1819354183035133, + "learning_rate": 7.419232331289385e-06, + "loss": 0.2587367296218872, + "step": 4550 + }, + { + "epoch": 1.2084716505112203, + "grad_norm": 1.195922174249915, + "learning_rate": 7.414990554536134e-06, + "loss": 0.2552938461303711, + "step": 4551 + }, + { + "epoch": 1.2087372194927632, + "grad_norm": 1.2688216449772127, + "learning_rate": 7.410749276142221e-06, + "loss": 0.2693648040294647, + "step": 4552 + }, + { + "epoch": 1.2090027884743062, + "grad_norm": 1.1997939452425357, + "learning_rate": 7.406508496925307e-06, + "loss": 0.21543294191360474, + "step": 4553 + }, + { + "epoch": 1.2092683574558492, + "grad_norm": 1.2385892147047024, + "learning_rate": 7.402268217702966e-06, + "loss": 0.2913009524345398, + "step": 4554 + }, + { + "epoch": 1.209533926437392, + "grad_norm": 1.0671356100150298, + "learning_rate": 7.398028439292675e-06, + "loss": 0.23279520869255066, + "step": 4555 + }, + { + "epoch": 1.209799495418935, + "grad_norm": 1.0946575444558022, + "learning_rate": 7.393789162511815e-06, + "loss": 0.25086939334869385, + "step": 4556 + }, + { + "epoch": 1.210065064400478, + "grad_norm": 1.0964890001200192, + "learning_rate": 7.389550388177662e-06, + "loss": 0.21704714000225067, + "step": 4557 + }, + { + "epoch": 1.210330633382021, + "grad_norm": 1.126699331966135, + "learning_rate": 7.3853121171074115e-06, + "loss": 0.230219304561615, + "step": 4558 + }, + { + "epoch": 1.210596202363564, + "grad_norm": 1.1809668678269754, + "learning_rate": 7.381074350118149e-06, + "loss": 0.26073017716407776, + "step": 4559 + }, + { + "epoch": 1.2108617713451069, + "grad_norm": 1.2065072762311946, + "learning_rate": 7.376837088026863e-06, + "loss": 0.25186216831207275, + "step": 4560 + }, + { + "epoch": 1.2111273403266498, + "grad_norm": 1.3978877577958326, + "learning_rate": 7.372600331650449e-06, + "loss": 0.28719040751457214, + "step": 4561 + }, + { + "epoch": 1.2113929093081928, + "grad_norm": 1.16073083909203, + "learning_rate": 7.368364081805704e-06, + "loss": 0.23972755670547485, + "step": 4562 + }, + { + "epoch": 1.2116584782897357, + "grad_norm": 1.096919114864748, + "learning_rate": 7.364128339309326e-06, + "loss": 0.23053769767284393, + "step": 4563 + }, + { + "epoch": 1.2119240472712787, + "grad_norm": 1.2910615683085556, + "learning_rate": 7.359893104977917e-06, + "loss": 0.25124189257621765, + "step": 4564 + }, + { + "epoch": 1.2121896162528216, + "grad_norm": 1.1863697592423188, + "learning_rate": 7.355658379627981e-06, + "loss": 0.2243686318397522, + "step": 4565 + }, + { + "epoch": 1.2124551852343646, + "grad_norm": 1.244591161752608, + "learning_rate": 7.3514241640759175e-06, + "loss": 0.26047343015670776, + "step": 4566 + }, + { + "epoch": 1.2127207542159075, + "grad_norm": 1.1775978450301259, + "learning_rate": 7.3471904591380434e-06, + "loss": 0.23603469133377075, + "step": 4567 + }, + { + "epoch": 1.2129863231974505, + "grad_norm": 1.2261707581126196, + "learning_rate": 7.342957265630561e-06, + "loss": 0.31320711970329285, + "step": 4568 + }, + { + "epoch": 1.2132518921789934, + "grad_norm": 1.22464158648852, + "learning_rate": 7.338724584369581e-06, + "loss": 0.22159788012504578, + "step": 4569 + }, + { + "epoch": 1.2135174611605364, + "grad_norm": 1.1206153371836056, + "learning_rate": 7.334492416171114e-06, + "loss": 0.21992239356040955, + "step": 4570 + }, + { + "epoch": 1.2137830301420793, + "grad_norm": 1.3229661253734524, + "learning_rate": 7.330260761851071e-06, + "loss": 0.20708827674388885, + "step": 4571 + }, + { + "epoch": 1.2140485991236223, + "grad_norm": 1.1899658624900848, + "learning_rate": 7.326029622225269e-06, + "loss": 0.2846507132053375, + "step": 4572 + }, + { + "epoch": 1.2143141681051652, + "grad_norm": 1.2218224134688922, + "learning_rate": 7.321798998109417e-06, + "loss": 0.24903801083564758, + "step": 4573 + }, + { + "epoch": 1.2145797370867082, + "grad_norm": 1.1817295734811926, + "learning_rate": 7.317568890319134e-06, + "loss": 0.23426681756973267, + "step": 4574 + }, + { + "epoch": 1.2148453060682511, + "grad_norm": 1.1685993771040228, + "learning_rate": 7.31333929966993e-06, + "loss": 0.2374490350484848, + "step": 4575 + }, + { + "epoch": 1.215110875049794, + "grad_norm": 1.13335327598736, + "learning_rate": 7.309110226977223e-06, + "loss": 0.24035832285881042, + "step": 4576 + }, + { + "epoch": 1.215376444031337, + "grad_norm": 1.2837405582571324, + "learning_rate": 7.30488167305633e-06, + "loss": 0.21872258186340332, + "step": 4577 + }, + { + "epoch": 1.21564201301288, + "grad_norm": 1.3425258296129825, + "learning_rate": 7.300653638722463e-06, + "loss": 0.2940255403518677, + "step": 4578 + }, + { + "epoch": 1.215907581994423, + "grad_norm": 1.1158795437619367, + "learning_rate": 7.29642612479074e-06, + "loss": 0.20970892906188965, + "step": 4579 + }, + { + "epoch": 1.216173150975966, + "grad_norm": 1.1571301789790744, + "learning_rate": 7.292199132076175e-06, + "loss": 0.21217449009418488, + "step": 4580 + }, + { + "epoch": 1.2164387199575089, + "grad_norm": 1.2448503896532135, + "learning_rate": 7.28797266139368e-06, + "loss": 0.2463359832763672, + "step": 4581 + }, + { + "epoch": 1.2167042889390518, + "grad_norm": 1.132320428820701, + "learning_rate": 7.283746713558071e-06, + "loss": 0.21921415627002716, + "step": 4582 + }, + { + "epoch": 1.2169698579205948, + "grad_norm": 1.2437376760058587, + "learning_rate": 7.279521289384059e-06, + "loss": 0.2412380576133728, + "step": 4583 + }, + { + "epoch": 1.2172354269021377, + "grad_norm": 1.180878934188553, + "learning_rate": 7.275296389686258e-06, + "loss": 0.2558564245700836, + "step": 4584 + }, + { + "epoch": 1.2175009958836809, + "grad_norm": 1.2566060880081307, + "learning_rate": 7.271072015279179e-06, + "loss": 0.2548869848251343, + "step": 4585 + }, + { + "epoch": 1.2177665648652238, + "grad_norm": 1.4407566508510072, + "learning_rate": 7.2668481669772304e-06, + "loss": 0.22183407843112946, + "step": 4586 + }, + { + "epoch": 1.2180321338467668, + "grad_norm": 1.20165829214997, + "learning_rate": 7.262624845594721e-06, + "loss": 0.24722473323345184, + "step": 4587 + }, + { + "epoch": 1.2182977028283097, + "grad_norm": 1.190564524584547, + "learning_rate": 7.258402051945858e-06, + "loss": 0.2678988575935364, + "step": 4588 + }, + { + "epoch": 1.2185632718098527, + "grad_norm": 1.187777405395345, + "learning_rate": 7.2541797868447435e-06, + "loss": 0.2116469144821167, + "step": 4589 + }, + { + "epoch": 1.2188288407913956, + "grad_norm": 1.2500071795758152, + "learning_rate": 7.249958051105383e-06, + "loss": 0.23897933959960938, + "step": 4590 + }, + { + "epoch": 1.2190944097729386, + "grad_norm": 1.2473885744661077, + "learning_rate": 7.245736845541676e-06, + "loss": 0.25434061884880066, + "step": 4591 + }, + { + "epoch": 1.2193599787544815, + "grad_norm": 1.2108382272450464, + "learning_rate": 7.2415161709674235e-06, + "loss": 0.2602628469467163, + "step": 4592 + }, + { + "epoch": 1.2196255477360245, + "grad_norm": 3.1633443202169764, + "learning_rate": 7.2372960281963165e-06, + "loss": 0.2519065737724304, + "step": 4593 + }, + { + "epoch": 1.2198911167175674, + "grad_norm": 1.550903602515833, + "learning_rate": 7.233076418041954e-06, + "loss": 0.24404102563858032, + "step": 4594 + }, + { + "epoch": 1.2201566856991104, + "grad_norm": 1.1561711817096534, + "learning_rate": 7.228857341317825e-06, + "loss": 0.23633979260921478, + "step": 4595 + }, + { + "epoch": 1.2204222546806534, + "grad_norm": 1.2128002082313463, + "learning_rate": 7.224638798837319e-06, + "loss": 0.2513781189918518, + "step": 4596 + }, + { + "epoch": 1.2206878236621963, + "grad_norm": 1.2409533600026899, + "learning_rate": 7.220420791413721e-06, + "loss": 0.23270189762115479, + "step": 4597 + }, + { + "epoch": 1.2209533926437393, + "grad_norm": 1.2503409564498669, + "learning_rate": 7.21620331986021e-06, + "loss": 0.2770010530948639, + "step": 4598 + }, + { + "epoch": 1.2212189616252822, + "grad_norm": 1.1284522462719728, + "learning_rate": 7.2119863849898684e-06, + "loss": 0.2312745451927185, + "step": 4599 + }, + { + "epoch": 1.2214845306068252, + "grad_norm": 1.2725314186948387, + "learning_rate": 7.20776998761567e-06, + "loss": 0.231276735663414, + "step": 4600 + }, + { + "epoch": 1.221750099588368, + "grad_norm": 1.1715742737590393, + "learning_rate": 7.203554128550486e-06, + "loss": 0.24927708506584167, + "step": 4601 + }, + { + "epoch": 1.222015668569911, + "grad_norm": 1.1138441718661785, + "learning_rate": 7.199338808607084e-06, + "loss": 0.23033373057842255, + "step": 4602 + }, + { + "epoch": 1.222281237551454, + "grad_norm": 1.2545098885673684, + "learning_rate": 7.195124028598131e-06, + "loss": 0.24003425240516663, + "step": 4603 + }, + { + "epoch": 1.222546806532997, + "grad_norm": 1.1872708193619057, + "learning_rate": 7.190909789336185e-06, + "loss": 0.22648809850215912, + "step": 4604 + }, + { + "epoch": 1.22281237551454, + "grad_norm": 1.2511860493227276, + "learning_rate": 7.1866960916337006e-06, + "loss": 0.2605816125869751, + "step": 4605 + }, + { + "epoch": 1.2230779444960829, + "grad_norm": 1.1424629632361756, + "learning_rate": 7.1824829363030305e-06, + "loss": 0.21549202501773834, + "step": 4606 + }, + { + "epoch": 1.2233435134776258, + "grad_norm": 1.1532084986944064, + "learning_rate": 7.17827032415642e-06, + "loss": 0.23113220930099487, + "step": 4607 + }, + { + "epoch": 1.2236090824591688, + "grad_norm": 1.1649312720163907, + "learning_rate": 7.174058256006012e-06, + "loss": 0.22736643254756927, + "step": 4608 + }, + { + "epoch": 1.2238746514407117, + "grad_norm": 1.172011833362534, + "learning_rate": 7.169846732663845e-06, + "loss": 0.2686663866043091, + "step": 4609 + }, + { + "epoch": 1.2241402204222547, + "grad_norm": 1.1555217624379808, + "learning_rate": 7.1656357549418485e-06, + "loss": 0.1980462670326233, + "step": 4610 + }, + { + "epoch": 1.2244057894037976, + "grad_norm": 1.2401629806715768, + "learning_rate": 7.161425323651846e-06, + "loss": 0.22997641563415527, + "step": 4611 + }, + { + "epoch": 1.2246713583853406, + "grad_norm": 1.3367939845671126, + "learning_rate": 7.157215439605567e-06, + "loss": 0.28781357407569885, + "step": 4612 + }, + { + "epoch": 1.2249369273668835, + "grad_norm": 1.2895382897388425, + "learning_rate": 7.153006103614624e-06, + "loss": 0.22558270394802094, + "step": 4613 + }, + { + "epoch": 1.2252024963484265, + "grad_norm": 1.1860196927831441, + "learning_rate": 7.148797316490527e-06, + "loss": 0.2435922622680664, + "step": 4614 + }, + { + "epoch": 1.2254680653299694, + "grad_norm": 1.2828543438888096, + "learning_rate": 7.14458907904468e-06, + "loss": 0.27840936183929443, + "step": 4615 + }, + { + "epoch": 1.2257336343115124, + "grad_norm": 1.2350405670943831, + "learning_rate": 7.1403813920883825e-06, + "loss": 0.2775651812553406, + "step": 4616 + }, + { + "epoch": 1.2259992032930553, + "grad_norm": 1.2738452228129284, + "learning_rate": 7.136174256432828e-06, + "loss": 0.2430988848209381, + "step": 4617 + }, + { + "epoch": 1.2262647722745983, + "grad_norm": 1.0618083363199646, + "learning_rate": 7.131967672889101e-06, + "loss": 0.2018759697675705, + "step": 4618 + }, + { + "epoch": 1.2265303412561412, + "grad_norm": 1.2320094058432127, + "learning_rate": 7.127761642268179e-06, + "loss": 0.25314825773239136, + "step": 4619 + }, + { + "epoch": 1.2267959102376842, + "grad_norm": 1.409693024729639, + "learning_rate": 7.123556165380935e-06, + "loss": 0.2542746365070343, + "step": 4620 + }, + { + "epoch": 1.2270614792192271, + "grad_norm": 1.2571649384815597, + "learning_rate": 7.119351243038142e-06, + "loss": 0.2912300229072571, + "step": 4621 + }, + { + "epoch": 1.22732704820077, + "grad_norm": 1.3877507856901592, + "learning_rate": 7.115146876050454e-06, + "loss": 0.26893284916877747, + "step": 4622 + }, + { + "epoch": 1.227592617182313, + "grad_norm": 1.3833428208823224, + "learning_rate": 7.110943065228425e-06, + "loss": 0.2711215317249298, + "step": 4623 + }, + { + "epoch": 1.227858186163856, + "grad_norm": 1.346165350849743, + "learning_rate": 7.106739811382501e-06, + "loss": 0.25530266761779785, + "step": 4624 + }, + { + "epoch": 1.228123755145399, + "grad_norm": 1.268299981159743, + "learning_rate": 7.102537115323018e-06, + "loss": 0.2547178864479065, + "step": 4625 + }, + { + "epoch": 1.228389324126942, + "grad_norm": 1.5802606545447795, + "learning_rate": 7.0983349778602064e-06, + "loss": 0.27973634004592896, + "step": 4626 + }, + { + "epoch": 1.2286548931084849, + "grad_norm": 1.205257873334912, + "learning_rate": 7.0941333998041884e-06, + "loss": 0.24066339433193207, + "step": 4627 + }, + { + "epoch": 1.2289204620900278, + "grad_norm": 1.1798307734371165, + "learning_rate": 7.0899323819649816e-06, + "loss": 0.24305742979049683, + "step": 4628 + }, + { + "epoch": 1.2291860310715708, + "grad_norm": 1.163221794708842, + "learning_rate": 7.085731925152484e-06, + "loss": 0.22478783130645752, + "step": 4629 + }, + { + "epoch": 1.2294516000531137, + "grad_norm": 1.1812808698189172, + "learning_rate": 7.081532030176506e-06, + "loss": 0.24995659291744232, + "step": 4630 + }, + { + "epoch": 1.2297171690346567, + "grad_norm": 1.1575900439946216, + "learning_rate": 7.077332697846733e-06, + "loss": 0.2579454183578491, + "step": 4631 + }, + { + "epoch": 1.2299827380161996, + "grad_norm": 1.2378373931288529, + "learning_rate": 7.073133928972745e-06, + "loss": 0.2513299286365509, + "step": 4632 + }, + { + "epoch": 1.2302483069977426, + "grad_norm": 1.0751310135047412, + "learning_rate": 7.068935724364016e-06, + "loss": 0.23344315588474274, + "step": 4633 + }, + { + "epoch": 1.2305138759792857, + "grad_norm": 1.1882346043976466, + "learning_rate": 7.064738084829912e-06, + "loss": 0.26750341057777405, + "step": 4634 + }, + { + "epoch": 1.2307794449608287, + "grad_norm": 1.1622882344241228, + "learning_rate": 7.0605410111796855e-06, + "loss": 0.22424373030662537, + "step": 4635 + }, + { + "epoch": 1.2310450139423716, + "grad_norm": 1.0711348851881108, + "learning_rate": 7.056344504222485e-06, + "loss": 0.24261844158172607, + "step": 4636 + }, + { + "epoch": 1.2313105829239146, + "grad_norm": 1.1382788327638453, + "learning_rate": 7.052148564767347e-06, + "loss": 0.22273704409599304, + "step": 4637 + }, + { + "epoch": 1.2315761519054576, + "grad_norm": 1.217398110209698, + "learning_rate": 7.047953193623195e-06, + "loss": 0.23726603388786316, + "step": 4638 + }, + { + "epoch": 1.2318417208870005, + "grad_norm": 1.1961933626954258, + "learning_rate": 7.043758391598856e-06, + "loss": 0.2612340748310089, + "step": 4639 + }, + { + "epoch": 1.2321072898685435, + "grad_norm": 1.3828917417203295, + "learning_rate": 7.039564159503034e-06, + "loss": 0.25722867250442505, + "step": 4640 + }, + { + "epoch": 1.2323728588500864, + "grad_norm": 1.2106898963951274, + "learning_rate": 7.035370498144325e-06, + "loss": 0.25940731167793274, + "step": 4641 + }, + { + "epoch": 1.2326384278316294, + "grad_norm": 1.1431229158704634, + "learning_rate": 7.03117740833122e-06, + "loss": 0.2328685224056244, + "step": 4642 + }, + { + "epoch": 1.2329039968131723, + "grad_norm": 1.360549509974518, + "learning_rate": 7.0269848908720965e-06, + "loss": 0.3019352853298187, + "step": 4643 + }, + { + "epoch": 1.2331695657947153, + "grad_norm": 1.370123584713732, + "learning_rate": 7.022792946575222e-06, + "loss": 0.2665002942085266, + "step": 4644 + }, + { + "epoch": 1.2334351347762582, + "grad_norm": 1.2172549009924116, + "learning_rate": 7.018601576248755e-06, + "loss": 0.2425101399421692, + "step": 4645 + }, + { + "epoch": 1.2337007037578012, + "grad_norm": 1.2088470091841177, + "learning_rate": 7.014410780700743e-06, + "loss": 0.23319771885871887, + "step": 4646 + }, + { + "epoch": 1.2339662727393441, + "grad_norm": 1.1714631765087196, + "learning_rate": 7.010220560739116e-06, + "loss": 0.23033195734024048, + "step": 4647 + }, + { + "epoch": 1.234231841720887, + "grad_norm": 1.211199620492339, + "learning_rate": 7.006030917171707e-06, + "loss": 0.24682006239891052, + "step": 4648 + }, + { + "epoch": 1.23449741070243, + "grad_norm": 1.2881207045369418, + "learning_rate": 7.001841850806228e-06, + "loss": 0.25566285848617554, + "step": 4649 + }, + { + "epoch": 1.234762979683973, + "grad_norm": 1.32329780476303, + "learning_rate": 6.9976533624502784e-06, + "loss": 0.2791779339313507, + "step": 4650 + }, + { + "epoch": 1.235028548665516, + "grad_norm": 1.3093366388831746, + "learning_rate": 6.993465452911352e-06, + "loss": 0.25597846508026123, + "step": 4651 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 1.197170425293823, + "learning_rate": 6.9892781229968275e-06, + "loss": 0.24034728109836578, + "step": 4652 + }, + { + "epoch": 1.2355596866286018, + "grad_norm": 1.2583607623295634, + "learning_rate": 6.985091373513972e-06, + "loss": 0.2209509015083313, + "step": 4653 + }, + { + "epoch": 1.2358252556101448, + "grad_norm": 1.298261075070858, + "learning_rate": 6.980905205269942e-06, + "loss": 0.29106947779655457, + "step": 4654 + }, + { + "epoch": 1.2360908245916877, + "grad_norm": 1.226505577270481, + "learning_rate": 6.976719619071782e-06, + "loss": 0.24014753103256226, + "step": 4655 + }, + { + "epoch": 1.2363563935732307, + "grad_norm": 1.2297022971330018, + "learning_rate": 6.972534615726422e-06, + "loss": 0.27135470509529114, + "step": 4656 + }, + { + "epoch": 1.2366219625547736, + "grad_norm": 1.2219120714336154, + "learning_rate": 6.968350196040683e-06, + "loss": 0.23386257886886597, + "step": 4657 + }, + { + "epoch": 1.2368875315363166, + "grad_norm": 1.1452987159774544, + "learning_rate": 6.964166360821271e-06, + "loss": 0.23119661211967468, + "step": 4658 + }, + { + "epoch": 1.2371531005178595, + "grad_norm": 1.1767967288021879, + "learning_rate": 6.959983110874782e-06, + "loss": 0.2399922013282776, + "step": 4659 + }, + { + "epoch": 1.2374186694994025, + "grad_norm": 1.0521231856668218, + "learning_rate": 6.9558004470076944e-06, + "loss": 0.18323534727096558, + "step": 4660 + }, + { + "epoch": 1.2376842384809454, + "grad_norm": 1.1985431375912965, + "learning_rate": 6.951618370026378e-06, + "loss": 0.25683268904685974, + "step": 4661 + }, + { + "epoch": 1.2379498074624884, + "grad_norm": 1.307367140627743, + "learning_rate": 6.947436880737089e-06, + "loss": 0.2861499786376953, + "step": 4662 + }, + { + "epoch": 1.2382153764440313, + "grad_norm": 1.3831407282476516, + "learning_rate": 6.943255979945965e-06, + "loss": 0.28021398186683655, + "step": 4663 + }, + { + "epoch": 1.2384809454255743, + "grad_norm": 1.2940713851528283, + "learning_rate": 6.939075668459039e-06, + "loss": 0.2739776074886322, + "step": 4664 + }, + { + "epoch": 1.2387465144071172, + "grad_norm": 1.3433235944815516, + "learning_rate": 6.934895947082221e-06, + "loss": 0.26015231013298035, + "step": 4665 + }, + { + "epoch": 1.2390120833886602, + "grad_norm": 1.3230400884249285, + "learning_rate": 6.930716816621317e-06, + "loss": 0.2572113871574402, + "step": 4666 + }, + { + "epoch": 1.2392776523702032, + "grad_norm": 1.266134559335497, + "learning_rate": 6.926538277882012e-06, + "loss": 0.24094708263874054, + "step": 4667 + }, + { + "epoch": 1.239543221351746, + "grad_norm": 1.1175335748548278, + "learning_rate": 6.92236033166988e-06, + "loss": 0.22803835570812225, + "step": 4668 + }, + { + "epoch": 1.239808790333289, + "grad_norm": 1.1198379137737728, + "learning_rate": 6.9181829787903774e-06, + "loss": 0.23672322928905487, + "step": 4669 + }, + { + "epoch": 1.240074359314832, + "grad_norm": 1.3356297624894082, + "learning_rate": 6.91400622004885e-06, + "loss": 0.2568579912185669, + "step": 4670 + }, + { + "epoch": 1.240339928296375, + "grad_norm": 1.1768710116388783, + "learning_rate": 6.909830056250527e-06, + "loss": 0.25267845392227173, + "step": 4671 + }, + { + "epoch": 1.240605497277918, + "grad_norm": 1.2702969549109802, + "learning_rate": 6.905654488200524e-06, + "loss": 0.30336999893188477, + "step": 4672 + }, + { + "epoch": 1.2408710662594609, + "grad_norm": 1.17710991443045, + "learning_rate": 6.901479516703842e-06, + "loss": 0.2741299867630005, + "step": 4673 + }, + { + "epoch": 1.2411366352410038, + "grad_norm": 1.276658372251755, + "learning_rate": 6.897305142565363e-06, + "loss": 0.2896823585033417, + "step": 4674 + }, + { + "epoch": 1.2414022042225468, + "grad_norm": 1.2718591233587666, + "learning_rate": 6.8931313665898625e-06, + "loss": 0.23102329671382904, + "step": 4675 + }, + { + "epoch": 1.2416677732040897, + "grad_norm": 1.3209479857777737, + "learning_rate": 6.8889581895819915e-06, + "loss": 0.2600775361061096, + "step": 4676 + }, + { + "epoch": 1.2419333421856327, + "grad_norm": 1.1932453661715805, + "learning_rate": 6.884785612346291e-06, + "loss": 0.23589132726192474, + "step": 4677 + }, + { + "epoch": 1.2421989111671756, + "grad_norm": 1.155454248544126, + "learning_rate": 6.880613635687184e-06, + "loss": 0.24419361352920532, + "step": 4678 + }, + { + "epoch": 1.2424644801487186, + "grad_norm": 1.1323309321599895, + "learning_rate": 6.876442260408977e-06, + "loss": 0.23267227411270142, + "step": 4679 + }, + { + "epoch": 1.2427300491302615, + "grad_norm": 1.2244929254620942, + "learning_rate": 6.8722714873158635e-06, + "loss": 0.2507064938545227, + "step": 4680 + }, + { + "epoch": 1.2429956181118045, + "grad_norm": 1.2079227486812785, + "learning_rate": 6.868101317211922e-06, + "loss": 0.2529929280281067, + "step": 4681 + }, + { + "epoch": 1.2432611870933474, + "grad_norm": 1.1627205371245832, + "learning_rate": 6.863931750901107e-06, + "loss": 0.23255379498004913, + "step": 4682 + }, + { + "epoch": 1.2435267560748904, + "grad_norm": 1.1997195000446994, + "learning_rate": 6.859762789187259e-06, + "loss": 0.22757332026958466, + "step": 4683 + }, + { + "epoch": 1.2437923250564333, + "grad_norm": 1.2115398233652928, + "learning_rate": 6.8555944328741145e-06, + "loss": 0.2578364312648773, + "step": 4684 + }, + { + "epoch": 1.2440578940379763, + "grad_norm": 1.1854445431935166, + "learning_rate": 6.851426682765278e-06, + "loss": 0.27568408846855164, + "step": 4685 + }, + { + "epoch": 1.2443234630195192, + "grad_norm": 1.19754548578965, + "learning_rate": 6.847259539664244e-06, + "loss": 0.25595831871032715, + "step": 4686 + }, + { + "epoch": 1.2445890320010622, + "grad_norm": 1.1807617266458326, + "learning_rate": 6.843093004374386e-06, + "loss": 0.2195426970720291, + "step": 4687 + }, + { + "epoch": 1.2448546009826051, + "grad_norm": 1.1623631531241645, + "learning_rate": 6.838927077698967e-06, + "loss": 0.23247741162776947, + "step": 4688 + }, + { + "epoch": 1.245120169964148, + "grad_norm": 1.2953467781322094, + "learning_rate": 6.834761760441127e-06, + "loss": 0.26149916648864746, + "step": 4689 + }, + { + "epoch": 1.245385738945691, + "grad_norm": 1.1310243964126157, + "learning_rate": 6.830597053403885e-06, + "loss": 0.2521447241306305, + "step": 4690 + }, + { + "epoch": 1.245651307927234, + "grad_norm": 1.1803812700297758, + "learning_rate": 6.826432957390155e-06, + "loss": 0.23401981592178345, + "step": 4691 + }, + { + "epoch": 1.245916876908777, + "grad_norm": 1.3114713754211442, + "learning_rate": 6.822269473202714e-06, + "loss": 0.25341230630874634, + "step": 4692 + }, + { + "epoch": 1.24618244589032, + "grad_norm": 1.2025537581570156, + "learning_rate": 6.818106601644248e-06, + "loss": 0.2513907551765442, + "step": 4693 + }, + { + "epoch": 1.2464480148718629, + "grad_norm": 1.2263403478965602, + "learning_rate": 6.8139443435173005e-06, + "loss": 0.2682073414325714, + "step": 4694 + }, + { + "epoch": 1.2467135838534058, + "grad_norm": 1.1801313342439474, + "learning_rate": 6.809782699624308e-06, + "loss": 0.22726872563362122, + "step": 4695 + }, + { + "epoch": 1.2469791528349488, + "grad_norm": 1.3004812874511507, + "learning_rate": 6.805621670767588e-06, + "loss": 0.24184030294418335, + "step": 4696 + }, + { + "epoch": 1.247244721816492, + "grad_norm": 1.0395051535883466, + "learning_rate": 6.801461257749334e-06, + "loss": 0.203639417886734, + "step": 4697 + }, + { + "epoch": 1.2475102907980349, + "grad_norm": 1.1786557175840897, + "learning_rate": 6.797301461371626e-06, + "loss": 0.2170606106519699, + "step": 4698 + }, + { + "epoch": 1.2477758597795778, + "grad_norm": 1.1231113548110434, + "learning_rate": 6.7931422824364245e-06, + "loss": 0.2225056290626526, + "step": 4699 + }, + { + "epoch": 1.2480414287611208, + "grad_norm": 1.1702414518259399, + "learning_rate": 6.788983721745569e-06, + "loss": 0.2388974130153656, + "step": 4700 + }, + { + "epoch": 1.2483069977426637, + "grad_norm": 1.14649445863332, + "learning_rate": 6.784825780100776e-06, + "loss": 0.2291644811630249, + "step": 4701 + }, + { + "epoch": 1.2485725667242067, + "grad_norm": 1.3474164807852358, + "learning_rate": 6.7806684583036595e-06, + "loss": 0.23793739080429077, + "step": 4702 + }, + { + "epoch": 1.2488381357057496, + "grad_norm": 1.2839354787463726, + "learning_rate": 6.776511757155695e-06, + "loss": 0.2756902277469635, + "step": 4703 + }, + { + "epoch": 1.2491037046872926, + "grad_norm": 1.3039866822855, + "learning_rate": 6.772355677458249e-06, + "loss": 0.25046268105506897, + "step": 4704 + }, + { + "epoch": 1.2493692736688355, + "grad_norm": 1.3053078100109528, + "learning_rate": 6.7682002200125575e-06, + "loss": 0.238486647605896, + "step": 4705 + }, + { + "epoch": 1.2496348426503785, + "grad_norm": 1.1855651210182463, + "learning_rate": 6.764045385619751e-06, + "loss": 0.2366628348827362, + "step": 4706 + }, + { + "epoch": 1.2499004116319214, + "grad_norm": 1.21176387977239, + "learning_rate": 6.759891175080827e-06, + "loss": 0.24825221300125122, + "step": 4707 + }, + { + "epoch": 1.2501659806134644, + "grad_norm": 1.2922207381934139, + "learning_rate": 6.755737589196673e-06, + "loss": 0.2304186224937439, + "step": 4708 + }, + { + "epoch": 1.2504315495950074, + "grad_norm": 1.200468035859197, + "learning_rate": 6.7515846287680476e-06, + "loss": 0.2824471592903137, + "step": 4709 + }, + { + "epoch": 1.2506971185765503, + "grad_norm": 1.1994302764371214, + "learning_rate": 6.747432294595591e-06, + "loss": 0.23130697011947632, + "step": 4710 + }, + { + "epoch": 1.2509626875580933, + "grad_norm": 1.3183641444794993, + "learning_rate": 6.7432805874798334e-06, + "loss": 0.28371602296829224, + "step": 4711 + }, + { + "epoch": 1.2512282565396362, + "grad_norm": 1.1529924861272876, + "learning_rate": 6.739129508221167e-06, + "loss": 0.23452092707157135, + "step": 4712 + }, + { + "epoch": 1.2514938255211792, + "grad_norm": 1.245806995398341, + "learning_rate": 6.734979057619873e-06, + "loss": 0.22486859560012817, + "step": 4713 + }, + { + "epoch": 1.2517593945027221, + "grad_norm": 1.3481589110906722, + "learning_rate": 6.730829236476111e-06, + "loss": 0.2818532884120941, + "step": 4714 + }, + { + "epoch": 1.252024963484265, + "grad_norm": 1.172531442878329, + "learning_rate": 6.7266800455899125e-06, + "loss": 0.2060810923576355, + "step": 4715 + }, + { + "epoch": 1.252290532465808, + "grad_norm": 1.2183128764116598, + "learning_rate": 6.722531485761199e-06, + "loss": 0.2183244377374649, + "step": 4716 + }, + { + "epoch": 1.252556101447351, + "grad_norm": 1.2596677279915016, + "learning_rate": 6.71838355778976e-06, + "loss": 0.24757327139377594, + "step": 4717 + }, + { + "epoch": 1.252821670428894, + "grad_norm": 1.3267776765958388, + "learning_rate": 6.714236262475268e-06, + "loss": 0.3058333396911621, + "step": 4718 + }, + { + "epoch": 1.2530872394104369, + "grad_norm": 1.1893155452841293, + "learning_rate": 6.71008960061727e-06, + "loss": 0.24095620214939117, + "step": 4719 + }, + { + "epoch": 1.2533528083919798, + "grad_norm": 1.3050165159615794, + "learning_rate": 6.705943573015199e-06, + "loss": 0.25614839792251587, + "step": 4720 + }, + { + "epoch": 1.2536183773735228, + "grad_norm": 1.2537185610498753, + "learning_rate": 6.701798180468356e-06, + "loss": 0.22295254468917847, + "step": 4721 + }, + { + "epoch": 1.2538839463550657, + "grad_norm": 1.1724661677534984, + "learning_rate": 6.697653423775926e-06, + "loss": 0.24783796072006226, + "step": 4722 + }, + { + "epoch": 1.2541495153366087, + "grad_norm": 1.5676339911360846, + "learning_rate": 6.693509303736969e-06, + "loss": 0.19702200591564178, + "step": 4723 + }, + { + "epoch": 1.2544150843181516, + "grad_norm": 1.2713976115459882, + "learning_rate": 6.689365821150421e-06, + "loss": 0.2539074122905731, + "step": 4724 + }, + { + "epoch": 1.2546806532996946, + "grad_norm": 1.2015875463338734, + "learning_rate": 6.6852229768150976e-06, + "loss": 0.2480372041463852, + "step": 4725 + }, + { + "epoch": 1.2549462222812375, + "grad_norm": 1.1742876462412417, + "learning_rate": 6.68108077152969e-06, + "loss": 0.2231048047542572, + "step": 4726 + }, + { + "epoch": 1.2552117912627805, + "grad_norm": 1.1571308721577904, + "learning_rate": 6.676939206092766e-06, + "loss": 0.260783851146698, + "step": 4727 + }, + { + "epoch": 1.2554773602443234, + "grad_norm": 1.2569537102203152, + "learning_rate": 6.67279828130277e-06, + "loss": 0.24069254100322723, + "step": 4728 + }, + { + "epoch": 1.2557429292258664, + "grad_norm": 1.1732343490674524, + "learning_rate": 6.668657997958027e-06, + "loss": 0.2578867971897125, + "step": 4729 + }, + { + "epoch": 1.2560084982074093, + "grad_norm": 1.102080552368197, + "learning_rate": 6.664518356856732e-06, + "loss": 0.20724457502365112, + "step": 4730 + }, + { + "epoch": 1.2562740671889523, + "grad_norm": 1.1527224778451435, + "learning_rate": 6.6603793587969586e-06, + "loss": 0.23107580840587616, + "step": 4731 + }, + { + "epoch": 1.2565396361704952, + "grad_norm": 1.123633807819834, + "learning_rate": 6.656241004576659e-06, + "loss": 0.2481832504272461, + "step": 4732 + }, + { + "epoch": 1.2568052051520382, + "grad_norm": 1.1353422900728998, + "learning_rate": 6.652103294993657e-06, + "loss": 0.2219698578119278, + "step": 4733 + }, + { + "epoch": 1.2570707741335811, + "grad_norm": 1.1538807443087884, + "learning_rate": 6.647966230845655e-06, + "loss": 0.2245863974094391, + "step": 4734 + }, + { + "epoch": 1.257336343115124, + "grad_norm": 1.1991392114731283, + "learning_rate": 6.643829812930231e-06, + "loss": 0.2086387574672699, + "step": 4735 + }, + { + "epoch": 1.257601912096667, + "grad_norm": 1.1702949625685939, + "learning_rate": 6.6396940420448355e-06, + "loss": 0.23484499752521515, + "step": 4736 + }, + { + "epoch": 1.25786748107821, + "grad_norm": 1.1449620939429583, + "learning_rate": 6.635558918986797e-06, + "loss": 0.22011062502861023, + "step": 4737 + }, + { + "epoch": 1.258133050059753, + "grad_norm": 1.240312422577115, + "learning_rate": 6.631424444553319e-06, + "loss": 0.2426830381155014, + "step": 4738 + }, + { + "epoch": 1.258398619041296, + "grad_norm": 1.2472398676845469, + "learning_rate": 6.627290619541481e-06, + "loss": 0.2702174484729767, + "step": 4739 + }, + { + "epoch": 1.2586641880228389, + "grad_norm": 1.4005529994015682, + "learning_rate": 6.623157444748234e-06, + "loss": 0.26594820618629456, + "step": 4740 + }, + { + "epoch": 1.2589297570043818, + "grad_norm": 1.2550785934224764, + "learning_rate": 6.619024920970405e-06, + "loss": 0.2546013593673706, + "step": 4741 + }, + { + "epoch": 1.2591953259859248, + "grad_norm": 1.425429985784882, + "learning_rate": 6.614893049004696e-06, + "loss": 0.27207985520362854, + "step": 4742 + }, + { + "epoch": 1.259460894967468, + "grad_norm": 1.4445692953489113, + "learning_rate": 6.610761829647685e-06, + "loss": 0.2640937566757202, + "step": 4743 + }, + { + "epoch": 1.2597264639490109, + "grad_norm": 1.4095791296432063, + "learning_rate": 6.60663126369582e-06, + "loss": 0.2890278697013855, + "step": 4744 + }, + { + "epoch": 1.2599920329305538, + "grad_norm": 1.1225606468440805, + "learning_rate": 6.602501351945425e-06, + "loss": 0.24610492587089539, + "step": 4745 + }, + { + "epoch": 1.2602576019120968, + "grad_norm": 1.5273064552741338, + "learning_rate": 6.598372095192699e-06, + "loss": 0.24946746230125427, + "step": 4746 + }, + { + "epoch": 1.2605231708936397, + "grad_norm": 1.0546449518544165, + "learning_rate": 6.594243494233717e-06, + "loss": 0.2369944453239441, + "step": 4747 + }, + { + "epoch": 1.2607887398751827, + "grad_norm": 1.180556169492091, + "learning_rate": 6.590115549864421e-06, + "loss": 0.20980143547058105, + "step": 4748 + }, + { + "epoch": 1.2610543088567256, + "grad_norm": 1.1524244978042124, + "learning_rate": 6.5859882628806315e-06, + "loss": 0.22930344939231873, + "step": 4749 + }, + { + "epoch": 1.2613198778382686, + "grad_norm": 1.1353386909454481, + "learning_rate": 6.5818616340780405e-06, + "loss": 0.22352416813373566, + "step": 4750 + }, + { + "epoch": 1.2615854468198116, + "grad_norm": 1.0615225488277533, + "learning_rate": 6.577735664252214e-06, + "loss": 0.2049327939748764, + "step": 4751 + }, + { + "epoch": 1.2618510158013545, + "grad_norm": 1.3420243952278277, + "learning_rate": 6.573610354198587e-06, + "loss": 0.21858355402946472, + "step": 4752 + }, + { + "epoch": 1.2621165847828975, + "grad_norm": 1.1248247337478985, + "learning_rate": 6.5694857047124786e-06, + "loss": 0.225118950009346, + "step": 4753 + }, + { + "epoch": 1.2623821537644404, + "grad_norm": 1.1623337764465298, + "learning_rate": 6.565361716589063e-06, + "loss": 0.25780409574508667, + "step": 4754 + }, + { + "epoch": 1.2626477227459834, + "grad_norm": 1.1580907073042885, + "learning_rate": 6.5612383906233964e-06, + "loss": 0.23507939279079437, + "step": 4755 + }, + { + "epoch": 1.2629132917275263, + "grad_norm": 1.1733914893757196, + "learning_rate": 6.557115727610417e-06, + "loss": 0.27884477376937866, + "step": 4756 + }, + { + "epoch": 1.2631788607090693, + "grad_norm": 1.145599873702901, + "learning_rate": 6.552993728344921e-06, + "loss": 0.2564120888710022, + "step": 4757 + }, + { + "epoch": 1.2634444296906122, + "grad_norm": 1.3139857622357067, + "learning_rate": 6.548872393621578e-06, + "loss": 0.259651243686676, + "step": 4758 + }, + { + "epoch": 1.2637099986721552, + "grad_norm": 1.2930462493551071, + "learning_rate": 6.544751724234937e-06, + "loss": 0.23473814129829407, + "step": 4759 + }, + { + "epoch": 1.2639755676536981, + "grad_norm": 1.4411652435541018, + "learning_rate": 6.540631720979411e-06, + "loss": 0.2447129189968109, + "step": 4760 + }, + { + "epoch": 1.264241136635241, + "grad_norm": 1.1968236723875711, + "learning_rate": 6.536512384649294e-06, + "loss": 0.22695237398147583, + "step": 4761 + }, + { + "epoch": 1.264506705616784, + "grad_norm": 1.117214929215876, + "learning_rate": 6.532393716038738e-06, + "loss": 0.24303656816482544, + "step": 4762 + }, + { + "epoch": 1.264772274598327, + "grad_norm": 1.2106972269991043, + "learning_rate": 6.528275715941776e-06, + "loss": 0.23911908268928528, + "step": 4763 + }, + { + "epoch": 1.26503784357987, + "grad_norm": 1.0480584899589354, + "learning_rate": 6.524158385152309e-06, + "loss": 0.19766747951507568, + "step": 4764 + }, + { + "epoch": 1.2653034125614129, + "grad_norm": 1.390914844473808, + "learning_rate": 6.520041724464114e-06, + "loss": 0.24074134230613708, + "step": 4765 + }, + { + "epoch": 1.2655689815429558, + "grad_norm": 1.3379815630375766, + "learning_rate": 6.515925734670834e-06, + "loss": 0.27557867765426636, + "step": 4766 + }, + { + "epoch": 1.2658345505244988, + "grad_norm": 1.3286252957995823, + "learning_rate": 6.511810416565979e-06, + "loss": 0.24387787282466888, + "step": 4767 + }, + { + "epoch": 1.2661001195060417, + "grad_norm": 1.4234035593814256, + "learning_rate": 6.507695770942939e-06, + "loss": 0.27863091230392456, + "step": 4768 + }, + { + "epoch": 1.2663656884875847, + "grad_norm": 1.1364646133588507, + "learning_rate": 6.503581798594965e-06, + "loss": 0.23589591681957245, + "step": 4769 + }, + { + "epoch": 1.2666312574691276, + "grad_norm": 1.1932509985997282, + "learning_rate": 6.499468500315185e-06, + "loss": 0.22869807481765747, + "step": 4770 + }, + { + "epoch": 1.2668968264506706, + "grad_norm": 1.2498634762148577, + "learning_rate": 6.495355876896592e-06, + "loss": 0.2351568192243576, + "step": 4771 + }, + { + "epoch": 1.2671623954322135, + "grad_norm": 1.1271253337210285, + "learning_rate": 6.491243929132052e-06, + "loss": 0.2291228175163269, + "step": 4772 + }, + { + "epoch": 1.2674279644137565, + "grad_norm": 1.2013953219342957, + "learning_rate": 6.487132657814297e-06, + "loss": 0.23203743994235992, + "step": 4773 + }, + { + "epoch": 1.2676935333952994, + "grad_norm": 1.0887907712326863, + "learning_rate": 6.483022063735938e-06, + "loss": 0.22035656869411469, + "step": 4774 + }, + { + "epoch": 1.2679591023768424, + "grad_norm": 1.1270651148723736, + "learning_rate": 6.478912147689448e-06, + "loss": 0.21576716005802155, + "step": 4775 + }, + { + "epoch": 1.2682246713583853, + "grad_norm": 1.3174966546949713, + "learning_rate": 6.474802910467171e-06, + "loss": 0.27764660120010376, + "step": 4776 + }, + { + "epoch": 1.2684902403399283, + "grad_norm": 1.2418434137314485, + "learning_rate": 6.4706943528613135e-06, + "loss": 0.23715822398662567, + "step": 4777 + }, + { + "epoch": 1.2687558093214713, + "grad_norm": 1.1794293567561218, + "learning_rate": 6.4665864756639606e-06, + "loss": 0.27764302492141724, + "step": 4778 + }, + { + "epoch": 1.2690213783030142, + "grad_norm": 1.2157630211554828, + "learning_rate": 6.4624792796670624e-06, + "loss": 0.21634885668754578, + "step": 4779 + }, + { + "epoch": 1.2692869472845572, + "grad_norm": 1.2217447541656432, + "learning_rate": 6.458372765662438e-06, + "loss": 0.27262234687805176, + "step": 4780 + }, + { + "epoch": 1.2695525162661, + "grad_norm": 1.1716437260315133, + "learning_rate": 6.454266934441775e-06, + "loss": 0.2219458371400833, + "step": 4781 + }, + { + "epoch": 1.269818085247643, + "grad_norm": 1.2515340549821425, + "learning_rate": 6.450161786796625e-06, + "loss": 0.22181497514247894, + "step": 4782 + }, + { + "epoch": 1.270083654229186, + "grad_norm": 1.1858127036353512, + "learning_rate": 6.446057323518422e-06, + "loss": 0.22642338275909424, + "step": 4783 + }, + { + "epoch": 1.270349223210729, + "grad_norm": 1.2243357553110101, + "learning_rate": 6.441953545398451e-06, + "loss": 0.239711195230484, + "step": 4784 + }, + { + "epoch": 1.270614792192272, + "grad_norm": 1.29507599792429, + "learning_rate": 6.437850453227872e-06, + "loss": 0.2422255128622055, + "step": 4785 + }, + { + "epoch": 1.2708803611738149, + "grad_norm": 1.3013507424737665, + "learning_rate": 6.433748047797715e-06, + "loss": 0.23184439539909363, + "step": 4786 + }, + { + "epoch": 1.2711459301553578, + "grad_norm": 1.3032581886502261, + "learning_rate": 6.429646329898873e-06, + "loss": 0.2737428843975067, + "step": 4787 + }, + { + "epoch": 1.2714114991369008, + "grad_norm": 1.2565288812855064, + "learning_rate": 6.4255453003221115e-06, + "loss": 0.23565897345542908, + "step": 4788 + }, + { + "epoch": 1.2716770681184437, + "grad_norm": 1.3665497750328797, + "learning_rate": 6.421444959858059e-06, + "loss": 0.24349254369735718, + "step": 4789 + }, + { + "epoch": 1.2719426370999867, + "grad_norm": 1.2050219186384792, + "learning_rate": 6.4173453092972115e-06, + "loss": 0.2637769281864166, + "step": 4790 + }, + { + "epoch": 1.2722082060815296, + "grad_norm": 1.0381858832581394, + "learning_rate": 6.413246349429934e-06, + "loss": 0.21420228481292725, + "step": 4791 + }, + { + "epoch": 1.2724737750630726, + "grad_norm": 1.1333618917642097, + "learning_rate": 6.409148081046461e-06, + "loss": 0.25270405411720276, + "step": 4792 + }, + { + "epoch": 1.2727393440446155, + "grad_norm": 1.270676964933882, + "learning_rate": 6.405050504936887e-06, + "loss": 0.2710546851158142, + "step": 4793 + }, + { + "epoch": 1.2730049130261585, + "grad_norm": 1.1608891040490155, + "learning_rate": 6.400953621891178e-06, + "loss": 0.2388489842414856, + "step": 4794 + }, + { + "epoch": 1.2732704820077014, + "grad_norm": 1.1600463634666516, + "learning_rate": 6.396857432699164e-06, + "loss": 0.24581485986709595, + "step": 4795 + }, + { + "epoch": 1.2735360509892444, + "grad_norm": 1.18464881130754, + "learning_rate": 6.3927619381505404e-06, + "loss": 0.24219104647636414, + "step": 4796 + }, + { + "epoch": 1.2738016199707873, + "grad_norm": 1.0878857914267965, + "learning_rate": 6.388667139034873e-06, + "loss": 0.22722014784812927, + "step": 4797 + }, + { + "epoch": 1.2740671889523303, + "grad_norm": 1.275017638940232, + "learning_rate": 6.384573036141589e-06, + "loss": 0.25177234411239624, + "step": 4798 + }, + { + "epoch": 1.2743327579338732, + "grad_norm": 1.2824350948041237, + "learning_rate": 6.380479630259983e-06, + "loss": 0.2291412651538849, + "step": 4799 + }, + { + "epoch": 1.2745983269154162, + "grad_norm": 1.3215047708165757, + "learning_rate": 6.376386922179216e-06, + "loss": 0.2528606951236725, + "step": 4800 + }, + { + "epoch": 1.2748638958969591, + "grad_norm": 1.11001311385955, + "learning_rate": 6.372294912688315e-06, + "loss": 0.21383032202720642, + "step": 4801 + }, + { + "epoch": 1.275129464878502, + "grad_norm": 1.2162134010863295, + "learning_rate": 6.368203602576168e-06, + "loss": 0.2538087069988251, + "step": 4802 + }, + { + "epoch": 1.275395033860045, + "grad_norm": 1.2127822206191197, + "learning_rate": 6.364112992631537e-06, + "loss": 0.24437417089939117, + "step": 4803 + }, + { + "epoch": 1.275660602841588, + "grad_norm": 1.1678428848154245, + "learning_rate": 6.360023083643036e-06, + "loss": 0.2347753942012787, + "step": 4804 + }, + { + "epoch": 1.275926171823131, + "grad_norm": 1.226812886332051, + "learning_rate": 6.3559338763991576e-06, + "loss": 0.271645188331604, + "step": 4805 + }, + { + "epoch": 1.276191740804674, + "grad_norm": 1.2088165730060163, + "learning_rate": 6.35184537168825e-06, + "loss": 0.2465275228023529, + "step": 4806 + }, + { + "epoch": 1.2764573097862169, + "grad_norm": 1.216147524532817, + "learning_rate": 6.347757570298527e-06, + "loss": 0.26494044065475464, + "step": 4807 + }, + { + "epoch": 1.2767228787677598, + "grad_norm": 3.360286997098956, + "learning_rate": 6.343670473018071e-06, + "loss": 0.28292080760002136, + "step": 4808 + }, + { + "epoch": 1.2769884477493028, + "grad_norm": 1.2160142828428218, + "learning_rate": 6.339584080634824e-06, + "loss": 0.2525850534439087, + "step": 4809 + }, + { + "epoch": 1.2772540167308457, + "grad_norm": 1.224576908350391, + "learning_rate": 6.335498393936597e-06, + "loss": 0.22056345641613007, + "step": 4810 + }, + { + "epoch": 1.2775195857123887, + "grad_norm": 1.1603347806824698, + "learning_rate": 6.331413413711061e-06, + "loss": 0.23081058263778687, + "step": 4811 + }, + { + "epoch": 1.2777851546939316, + "grad_norm": 1.2309265633693007, + "learning_rate": 6.327329140745751e-06, + "loss": 0.2722470760345459, + "step": 4812 + }, + { + "epoch": 1.2780507236754748, + "grad_norm": 1.2598117885787161, + "learning_rate": 6.32324557582807e-06, + "loss": 0.24454641342163086, + "step": 4813 + }, + { + "epoch": 1.2783162926570177, + "grad_norm": 1.2713820573097572, + "learning_rate": 6.319162719745277e-06, + "loss": 0.21884413063526154, + "step": 4814 + }, + { + "epoch": 1.2785818616385607, + "grad_norm": 1.276590514388197, + "learning_rate": 6.3150805732845e-06, + "loss": 0.2737545669078827, + "step": 4815 + }, + { + "epoch": 1.2788474306201036, + "grad_norm": 1.1747258996206047, + "learning_rate": 6.31099913723273e-06, + "loss": 0.2478230595588684, + "step": 4816 + }, + { + "epoch": 1.2791129996016466, + "grad_norm": 1.2461752717378811, + "learning_rate": 6.306918412376817e-06, + "loss": 0.2508094310760498, + "step": 4817 + }, + { + "epoch": 1.2793785685831895, + "grad_norm": 1.267840547546021, + "learning_rate": 6.302838399503477e-06, + "loss": 0.24666383862495422, + "step": 4818 + }, + { + "epoch": 1.2796441375647325, + "grad_norm": 1.176059099377582, + "learning_rate": 6.298759099399292e-06, + "loss": 0.27833491563796997, + "step": 4819 + }, + { + "epoch": 1.2799097065462754, + "grad_norm": 1.1948595147219725, + "learning_rate": 6.294680512850699e-06, + "loss": 0.23092475533485413, + "step": 4820 + }, + { + "epoch": 1.2801752755278184, + "grad_norm": 1.1935160504644853, + "learning_rate": 6.290602640644005e-06, + "loss": 0.2714667022228241, + "step": 4821 + }, + { + "epoch": 1.2804408445093614, + "grad_norm": 1.1769422055863235, + "learning_rate": 6.286525483565373e-06, + "loss": 0.23292411863803864, + "step": 4822 + }, + { + "epoch": 1.2807064134909043, + "grad_norm": 1.1322856806053188, + "learning_rate": 6.282449042400831e-06, + "loss": 0.23809143900871277, + "step": 4823 + }, + { + "epoch": 1.2809719824724473, + "grad_norm": 1.0235534573008647, + "learning_rate": 6.278373317936269e-06, + "loss": 0.22593267261981964, + "step": 4824 + }, + { + "epoch": 1.2812375514539902, + "grad_norm": 1.2491300300411192, + "learning_rate": 6.274298310957439e-06, + "loss": 0.26024624705314636, + "step": 4825 + }, + { + "epoch": 1.2815031204355332, + "grad_norm": 1.138185007529017, + "learning_rate": 6.270224022249957e-06, + "loss": 0.22418126463890076, + "step": 4826 + }, + { + "epoch": 1.2817686894170761, + "grad_norm": 1.2374650134400174, + "learning_rate": 6.266150452599288e-06, + "loss": 0.26452577114105225, + "step": 4827 + }, + { + "epoch": 1.282034258398619, + "grad_norm": 1.2453587043668277, + "learning_rate": 6.262077602790779e-06, + "loss": 0.24412381649017334, + "step": 4828 + }, + { + "epoch": 1.282299827380162, + "grad_norm": 1.1670875672055734, + "learning_rate": 6.258005473609623e-06, + "loss": 0.22476118803024292, + "step": 4829 + }, + { + "epoch": 1.282565396361705, + "grad_norm": 1.1744502576491334, + "learning_rate": 6.25393406584088e-06, + "loss": 0.2208547294139862, + "step": 4830 + }, + { + "epoch": 1.282830965343248, + "grad_norm": 1.340282271944368, + "learning_rate": 6.249863380269467e-06, + "loss": 0.2903650999069214, + "step": 4831 + }, + { + "epoch": 1.2830965343247909, + "grad_norm": 1.2018727401561922, + "learning_rate": 6.245793417680168e-06, + "loss": 0.24413639307022095, + "step": 4832 + }, + { + "epoch": 1.2833621033063338, + "grad_norm": 1.162422850806728, + "learning_rate": 6.241724178857621e-06, + "loss": 0.2193944752216339, + "step": 4833 + }, + { + "epoch": 1.2836276722878768, + "grad_norm": 1.2159517583191957, + "learning_rate": 6.237655664586326e-06, + "loss": 0.22847513854503632, + "step": 4834 + }, + { + "epoch": 1.2838932412694197, + "grad_norm": 1.4211501406512423, + "learning_rate": 6.233587875650648e-06, + "loss": 0.269639253616333, + "step": 4835 + }, + { + "epoch": 1.2841588102509627, + "grad_norm": 1.3153478129856002, + "learning_rate": 6.229520812834801e-06, + "loss": 0.26329392194747925, + "step": 4836 + }, + { + "epoch": 1.2844243792325056, + "grad_norm": 1.0811891602166492, + "learning_rate": 6.225454476922877e-06, + "loss": 0.18800514936447144, + "step": 4837 + }, + { + "epoch": 1.2846899482140486, + "grad_norm": 1.2987987933289529, + "learning_rate": 6.2213888686988125e-06, + "loss": 0.2617965340614319, + "step": 4838 + }, + { + "epoch": 1.2849555171955915, + "grad_norm": 1.2029687476094635, + "learning_rate": 6.217323988946411e-06, + "loss": 0.22468717396259308, + "step": 4839 + }, + { + "epoch": 1.2852210861771345, + "grad_norm": 1.2126923104659393, + "learning_rate": 6.213259838449333e-06, + "loss": 0.22465646266937256, + "step": 4840 + }, + { + "epoch": 1.2854866551586774, + "grad_norm": 1.243457795287806, + "learning_rate": 6.209196417991096e-06, + "loss": 0.2655075490474701, + "step": 4841 + }, + { + "epoch": 1.2857522241402204, + "grad_norm": 1.2818071805394324, + "learning_rate": 6.205133728355081e-06, + "loss": 0.25313282012939453, + "step": 4842 + }, + { + "epoch": 1.2860177931217633, + "grad_norm": 1.2136879668034726, + "learning_rate": 6.201071770324527e-06, + "loss": 0.23176322877407074, + "step": 4843 + }, + { + "epoch": 1.2862833621033063, + "grad_norm": 1.3628911983979357, + "learning_rate": 6.197010544682531e-06, + "loss": 0.27396953105926514, + "step": 4844 + }, + { + "epoch": 1.2865489310848492, + "grad_norm": 1.2333432651370633, + "learning_rate": 6.192950052212046e-06, + "loss": 0.24966171383857727, + "step": 4845 + }, + { + "epoch": 1.2868145000663922, + "grad_norm": 1.184789059228899, + "learning_rate": 6.188890293695895e-06, + "loss": 0.23290866613388062, + "step": 4846 + }, + { + "epoch": 1.2870800690479351, + "grad_norm": 1.2080105834836115, + "learning_rate": 6.184831269916749e-06, + "loss": 0.2368975132703781, + "step": 4847 + }, + { + "epoch": 1.287345638029478, + "grad_norm": 1.35199057217418, + "learning_rate": 6.180772981657139e-06, + "loss": 0.25305312871932983, + "step": 4848 + }, + { + "epoch": 1.287611207011021, + "grad_norm": 1.1825950927599171, + "learning_rate": 6.176715429699452e-06, + "loss": 0.22752982378005981, + "step": 4849 + }, + { + "epoch": 1.287876775992564, + "grad_norm": 1.152582857494987, + "learning_rate": 6.1726586148259395e-06, + "loss": 0.22426503896713257, + "step": 4850 + }, + { + "epoch": 1.288142344974107, + "grad_norm": 1.2203273234703247, + "learning_rate": 6.168602537818706e-06, + "loss": 0.21261993050575256, + "step": 4851 + }, + { + "epoch": 1.28840791395565, + "grad_norm": 1.1907151660933317, + "learning_rate": 6.1645471994597185e-06, + "loss": 0.237461656332016, + "step": 4852 + }, + { + "epoch": 1.2886734829371929, + "grad_norm": 1.113120156932308, + "learning_rate": 6.160492600530794e-06, + "loss": 0.1926390826702118, + "step": 4853 + }, + { + "epoch": 1.2889390519187358, + "grad_norm": 1.6824005161064397, + "learning_rate": 6.156438741813608e-06, + "loss": 0.22673740983009338, + "step": 4854 + }, + { + "epoch": 1.289204620900279, + "grad_norm": 1.1453361708789405, + "learning_rate": 6.15238562408971e-06, + "loss": 0.22148582339286804, + "step": 4855 + }, + { + "epoch": 1.289470189881822, + "grad_norm": 1.3581323367394031, + "learning_rate": 6.148333248140483e-06, + "loss": 0.28319716453552246, + "step": 4856 + }, + { + "epoch": 1.289735758863365, + "grad_norm": 1.4367360633574449, + "learning_rate": 6.14428161474718e-06, + "loss": 0.23505647480487823, + "step": 4857 + }, + { + "epoch": 1.2900013278449078, + "grad_norm": 1.2052965186154045, + "learning_rate": 6.140230724690908e-06, + "loss": 0.24323523044586182, + "step": 4858 + }, + { + "epoch": 1.2902668968264508, + "grad_norm": 1.2357784405363281, + "learning_rate": 6.136180578752629e-06, + "loss": 0.22818386554718018, + "step": 4859 + }, + { + "epoch": 1.2905324658079937, + "grad_norm": 1.2670464740614045, + "learning_rate": 6.132131177713165e-06, + "loss": 0.24285198748111725, + "step": 4860 + }, + { + "epoch": 1.2907980347895367, + "grad_norm": 1.1369753370104339, + "learning_rate": 6.128082522353194e-06, + "loss": 0.24115213751792908, + "step": 4861 + }, + { + "epoch": 1.2910636037710796, + "grad_norm": 1.2213111344560537, + "learning_rate": 6.124034613453247e-06, + "loss": 0.21564510464668274, + "step": 4862 + }, + { + "epoch": 1.2913291727526226, + "grad_norm": 1.299973209896211, + "learning_rate": 6.119987451793711e-06, + "loss": 0.2329743504524231, + "step": 4863 + }, + { + "epoch": 1.2915947417341656, + "grad_norm": 1.2218786239106318, + "learning_rate": 6.115941038154835e-06, + "loss": 0.2161208689212799, + "step": 4864 + }, + { + "epoch": 1.2918603107157085, + "grad_norm": 1.2078035628631776, + "learning_rate": 6.111895373316721e-06, + "loss": 0.22765520215034485, + "step": 4865 + }, + { + "epoch": 1.2921258796972515, + "grad_norm": 1.2199257873933993, + "learning_rate": 6.107850458059322e-06, + "loss": 0.25506818294525146, + "step": 4866 + }, + { + "epoch": 1.2923914486787944, + "grad_norm": 1.2014544077782259, + "learning_rate": 6.1038062931624505e-06, + "loss": 0.22543852031230927, + "step": 4867 + }, + { + "epoch": 1.2926570176603374, + "grad_norm": 1.282222410309602, + "learning_rate": 6.099762879405776e-06, + "loss": 0.24295030534267426, + "step": 4868 + }, + { + "epoch": 1.2929225866418803, + "grad_norm": 1.2221545432256802, + "learning_rate": 6.095720217568819e-06, + "loss": 0.2385009229183197, + "step": 4869 + }, + { + "epoch": 1.2931881556234233, + "grad_norm": 1.119514297375773, + "learning_rate": 6.091678308430956e-06, + "loss": 0.21410472691059113, + "step": 4870 + }, + { + "epoch": 1.2934537246049662, + "grad_norm": 1.299309717988783, + "learning_rate": 6.087637152771422e-06, + "loss": 0.25934773683547974, + "step": 4871 + }, + { + "epoch": 1.2937192935865092, + "grad_norm": 1.1783576597419445, + "learning_rate": 6.0835967513693e-06, + "loss": 0.24584373831748962, + "step": 4872 + }, + { + "epoch": 1.2939848625680521, + "grad_norm": 1.3413866916188153, + "learning_rate": 6.079557105003537e-06, + "loss": 0.2403055876493454, + "step": 4873 + }, + { + "epoch": 1.294250431549595, + "grad_norm": 1.2348806886655737, + "learning_rate": 6.075518214452927e-06, + "loss": 0.23861736059188843, + "step": 4874 + }, + { + "epoch": 1.294516000531138, + "grad_norm": 1.2099712971645404, + "learning_rate": 6.071480080496119e-06, + "loss": 0.21356427669525146, + "step": 4875 + }, + { + "epoch": 1.294781569512681, + "grad_norm": 1.314183683224707, + "learning_rate": 6.067442703911621e-06, + "loss": 0.2835869789123535, + "step": 4876 + }, + { + "epoch": 1.295047138494224, + "grad_norm": 1.1868362719294436, + "learning_rate": 6.063406085477788e-06, + "loss": 0.24233242869377136, + "step": 4877 + }, + { + "epoch": 1.2953127074757669, + "grad_norm": 1.2596980829406919, + "learning_rate": 6.059370225972834e-06, + "loss": 0.24986369907855988, + "step": 4878 + }, + { + "epoch": 1.2955782764573098, + "grad_norm": 1.2583930460503605, + "learning_rate": 6.055335126174826e-06, + "loss": 0.2445756494998932, + "step": 4879 + }, + { + "epoch": 1.2958438454388528, + "grad_norm": 1.0635663336037695, + "learning_rate": 6.0513007868616825e-06, + "loss": 0.21331898868083954, + "step": 4880 + }, + { + "epoch": 1.2961094144203957, + "grad_norm": 1.1578193819974294, + "learning_rate": 6.047267208811174e-06, + "loss": 0.2782329320907593, + "step": 4881 + }, + { + "epoch": 1.2963749834019387, + "grad_norm": 2.326385436360766, + "learning_rate": 6.043234392800932e-06, + "loss": 0.20866765081882477, + "step": 4882 + }, + { + "epoch": 1.2966405523834816, + "grad_norm": 1.3211750202424803, + "learning_rate": 6.039202339608432e-06, + "loss": 0.2517815828323364, + "step": 4883 + }, + { + "epoch": 1.2969061213650246, + "grad_norm": 1.283845753322191, + "learning_rate": 6.03517105001101e-06, + "loss": 0.2617926597595215, + "step": 4884 + }, + { + "epoch": 1.2971716903465675, + "grad_norm": 1.3255504140080887, + "learning_rate": 6.0311405247858465e-06, + "loss": 0.24753305315971375, + "step": 4885 + }, + { + "epoch": 1.2974372593281105, + "grad_norm": 1.1805849927447047, + "learning_rate": 6.027110764709982e-06, + "loss": 0.19791719317436218, + "step": 4886 + }, + { + "epoch": 1.2977028283096534, + "grad_norm": 1.236398594932959, + "learning_rate": 6.023081770560307e-06, + "loss": 0.243608757853508, + "step": 4887 + }, + { + "epoch": 1.2979683972911964, + "grad_norm": 1.3652744342035896, + "learning_rate": 6.019053543113564e-06, + "loss": 0.20469853281974792, + "step": 4888 + }, + { + "epoch": 1.2982339662727393, + "grad_norm": 1.4682720215540639, + "learning_rate": 6.015026083146345e-06, + "loss": 0.25613903999328613, + "step": 4889 + }, + { + "epoch": 1.2984995352542823, + "grad_norm": 1.236223607561111, + "learning_rate": 6.010999391435097e-06, + "loss": 0.23349006474018097, + "step": 4890 + }, + { + "epoch": 1.2987651042358253, + "grad_norm": 1.1137410591057113, + "learning_rate": 6.006973468756124e-06, + "loss": 0.23646268248558044, + "step": 4891 + }, + { + "epoch": 1.2990306732173682, + "grad_norm": 1.2845979720118916, + "learning_rate": 6.002948315885572e-06, + "loss": 0.2371794581413269, + "step": 4892 + }, + { + "epoch": 1.2992962421989112, + "grad_norm": 1.1150236044260142, + "learning_rate": 5.998923933599443e-06, + "loss": 0.23791949450969696, + "step": 4893 + }, + { + "epoch": 1.299561811180454, + "grad_norm": 1.2865838186648229, + "learning_rate": 5.994900322673593e-06, + "loss": 0.26923009753227234, + "step": 4894 + }, + { + "epoch": 1.299827380161997, + "grad_norm": 1.2724647699376699, + "learning_rate": 5.990877483883723e-06, + "loss": 0.20164884626865387, + "step": 4895 + }, + { + "epoch": 1.30009294914354, + "grad_norm": 1.1263986142938482, + "learning_rate": 5.986855418005393e-06, + "loss": 0.22345462441444397, + "step": 4896 + }, + { + "epoch": 1.300358518125083, + "grad_norm": 1.2936789930425872, + "learning_rate": 5.982834125814007e-06, + "loss": 0.26678675413131714, + "step": 4897 + }, + { + "epoch": 1.300624087106626, + "grad_norm": 1.3112472329084983, + "learning_rate": 5.978813608084825e-06, + "loss": 0.24674496054649353, + "step": 4898 + }, + { + "epoch": 1.3008896560881689, + "grad_norm": 1.3746634467420622, + "learning_rate": 5.974793865592947e-06, + "loss": 0.2804900109767914, + "step": 4899 + }, + { + "epoch": 1.3011552250697118, + "grad_norm": 1.3113866221822363, + "learning_rate": 5.970774899113345e-06, + "loss": 0.2413155734539032, + "step": 4900 + }, + { + "epoch": 1.3014207940512548, + "grad_norm": 1.139036608300987, + "learning_rate": 5.96675670942082e-06, + "loss": 0.21217301487922668, + "step": 4901 + }, + { + "epoch": 1.3016863630327977, + "grad_norm": 1.2012277530250777, + "learning_rate": 5.962739297290035e-06, + "loss": 0.23362940549850464, + "step": 4902 + }, + { + "epoch": 1.3019519320143407, + "grad_norm": 1.251148135143295, + "learning_rate": 5.958722663495499e-06, + "loss": 0.2669242322444916, + "step": 4903 + }, + { + "epoch": 1.3022175009958836, + "grad_norm": 1.2365395348631665, + "learning_rate": 5.95470680881157e-06, + "loss": 0.2234608232975006, + "step": 4904 + }, + { + "epoch": 1.3024830699774266, + "grad_norm": 1.2441781101215288, + "learning_rate": 5.95069173401246e-06, + "loss": 0.25150394439697266, + "step": 4905 + }, + { + "epoch": 1.3027486389589695, + "grad_norm": 1.127228294882686, + "learning_rate": 5.9466774398722264e-06, + "loss": 0.2408430427312851, + "step": 4906 + }, + { + "epoch": 1.3030142079405125, + "grad_norm": 1.1200862415380408, + "learning_rate": 5.942663927164776e-06, + "loss": 0.2197013795375824, + "step": 4907 + }, + { + "epoch": 1.3032797769220554, + "grad_norm": 1.1474317141184802, + "learning_rate": 5.938651196663865e-06, + "loss": 0.2224964201450348, + "step": 4908 + }, + { + "epoch": 1.3035453459035984, + "grad_norm": 1.313380369558454, + "learning_rate": 5.934639249143108e-06, + "loss": 0.26466232538223267, + "step": 4909 + }, + { + "epoch": 1.3038109148851413, + "grad_norm": 1.2910852400248352, + "learning_rate": 5.930628085375958e-06, + "loss": 0.257996141910553, + "step": 4910 + }, + { + "epoch": 1.3040764838666843, + "grad_norm": 1.2056479933898356, + "learning_rate": 5.92661770613572e-06, + "loss": 0.21995162963867188, + "step": 4911 + }, + { + "epoch": 1.3043420528482272, + "grad_norm": 1.3003100511120855, + "learning_rate": 5.922608112195546e-06, + "loss": 0.26007258892059326, + "step": 4912 + }, + { + "epoch": 1.3046076218297702, + "grad_norm": 1.2951583817832037, + "learning_rate": 5.918599304328442e-06, + "loss": 0.25168827176094055, + "step": 4913 + }, + { + "epoch": 1.3048731908113131, + "grad_norm": 1.1932184000685677, + "learning_rate": 5.9145912833072535e-06, + "loss": 0.24686852097511292, + "step": 4914 + }, + { + "epoch": 1.305138759792856, + "grad_norm": 1.1951264683753895, + "learning_rate": 5.910584049904684e-06, + "loss": 0.247032031416893, + "step": 4915 + }, + { + "epoch": 1.305404328774399, + "grad_norm": 1.1517786776797445, + "learning_rate": 5.906577604893278e-06, + "loss": 0.21644674241542816, + "step": 4916 + }, + { + "epoch": 1.305669897755942, + "grad_norm": 1.3685662184124912, + "learning_rate": 5.9025719490454304e-06, + "loss": 0.28093478083610535, + "step": 4917 + }, + { + "epoch": 1.305935466737485, + "grad_norm": 1.2246452754262638, + "learning_rate": 5.898567083133389e-06, + "loss": 0.23731757700443268, + "step": 4918 + }, + { + "epoch": 1.306201035719028, + "grad_norm": 1.1125400405938466, + "learning_rate": 5.894563007929243e-06, + "loss": 0.20725491642951965, + "step": 4919 + }, + { + "epoch": 1.3064666047005709, + "grad_norm": 1.3186749566879576, + "learning_rate": 5.89055972420493e-06, + "loss": 0.2509433329105377, + "step": 4920 + }, + { + "epoch": 1.3067321736821138, + "grad_norm": 1.2793911736037649, + "learning_rate": 5.886557232732235e-06, + "loss": 0.2611580491065979, + "step": 4921 + }, + { + "epoch": 1.3069977426636568, + "grad_norm": 1.1754660821918204, + "learning_rate": 5.882555534282792e-06, + "loss": 0.20567595958709717, + "step": 4922 + }, + { + "epoch": 1.3072633116451997, + "grad_norm": 1.2179299933591687, + "learning_rate": 5.878554629628081e-06, + "loss": 0.22851137816905975, + "step": 4923 + }, + { + "epoch": 1.3075288806267427, + "grad_norm": 1.2283350051517878, + "learning_rate": 5.874554519539431e-06, + "loss": 0.24295902252197266, + "step": 4924 + }, + { + "epoch": 1.3077944496082856, + "grad_norm": 1.4565590371796837, + "learning_rate": 5.870555204788013e-06, + "loss": 0.29564642906188965, + "step": 4925 + }, + { + "epoch": 1.3080600185898288, + "grad_norm": 1.1906652754397118, + "learning_rate": 5.8665566861448465e-06, + "loss": 0.2399739921092987, + "step": 4926 + }, + { + "epoch": 1.3083255875713717, + "grad_norm": 1.2056826487968673, + "learning_rate": 5.862558964380806e-06, + "loss": 0.23882555961608887, + "step": 4927 + }, + { + "epoch": 1.3085911565529147, + "grad_norm": 1.2167231777259742, + "learning_rate": 5.858562040266599e-06, + "loss": 0.2510842978954315, + "step": 4928 + }, + { + "epoch": 1.3088567255344576, + "grad_norm": 1.3760419048772665, + "learning_rate": 5.854565914572787e-06, + "loss": 0.257358193397522, + "step": 4929 + }, + { + "epoch": 1.3091222945160006, + "grad_norm": 1.1144476904886809, + "learning_rate": 5.850570588069775e-06, + "loss": 0.23228219151496887, + "step": 4930 + }, + { + "epoch": 1.3093878634975435, + "grad_norm": 1.2711888334314898, + "learning_rate": 5.846576061527818e-06, + "loss": 0.2234456092119217, + "step": 4931 + }, + { + "epoch": 1.3096534324790865, + "grad_norm": 1.1978737759145446, + "learning_rate": 5.842582335717009e-06, + "loss": 0.2273438423871994, + "step": 4932 + }, + { + "epoch": 1.3099190014606295, + "grad_norm": 1.2382395020505186, + "learning_rate": 5.838589411407294e-06, + "loss": 0.2423306405544281, + "step": 4933 + }, + { + "epoch": 1.3101845704421724, + "grad_norm": 1.2388376015521172, + "learning_rate": 5.834597289368463e-06, + "loss": 0.266438364982605, + "step": 4934 + }, + { + "epoch": 1.3104501394237154, + "grad_norm": 1.2553012161793193, + "learning_rate": 5.830605970370142e-06, + "loss": 0.2469342052936554, + "step": 4935 + }, + { + "epoch": 1.3107157084052583, + "grad_norm": 1.2077087937137967, + "learning_rate": 5.8266154551818225e-06, + "loss": 0.2834509611129761, + "step": 4936 + }, + { + "epoch": 1.3109812773868013, + "grad_norm": 1.3037377411135151, + "learning_rate": 5.822625744572821e-06, + "loss": 0.2615162134170532, + "step": 4937 + }, + { + "epoch": 1.3112468463683442, + "grad_norm": 1.1529903033018742, + "learning_rate": 5.818636839312309e-06, + "loss": 0.2247931957244873, + "step": 4938 + }, + { + "epoch": 1.3115124153498872, + "grad_norm": 1.162136486746663, + "learning_rate": 5.814648740169299e-06, + "loss": 0.23759335279464722, + "step": 4939 + }, + { + "epoch": 1.3117779843314301, + "grad_norm": 1.2647326324758852, + "learning_rate": 5.8106614479126515e-06, + "loss": 0.23381784558296204, + "step": 4940 + }, + { + "epoch": 1.312043553312973, + "grad_norm": 1.2132087226777075, + "learning_rate": 5.8066749633110675e-06, + "loss": 0.2671264410018921, + "step": 4941 + }, + { + "epoch": 1.312309122294516, + "grad_norm": 1.09997395594631, + "learning_rate": 5.8026892871330944e-06, + "loss": 0.226065531373024, + "step": 4942 + }, + { + "epoch": 1.312574691276059, + "grad_norm": 1.3057172624305828, + "learning_rate": 5.798704420147124e-06, + "loss": 0.2654735743999481, + "step": 4943 + }, + { + "epoch": 1.312840260257602, + "grad_norm": 1.2538641402604982, + "learning_rate": 5.794720363121389e-06, + "loss": 0.23757833242416382, + "step": 4944 + }, + { + "epoch": 1.3131058292391449, + "grad_norm": 1.2131030914710175, + "learning_rate": 5.790737116823975e-06, + "loss": 0.2561591565608978, + "step": 4945 + }, + { + "epoch": 1.3133713982206878, + "grad_norm": 1.1698592689009908, + "learning_rate": 5.7867546820227995e-06, + "loss": 0.22105304896831512, + "step": 4946 + }, + { + "epoch": 1.3136369672022308, + "grad_norm": 1.190016500907537, + "learning_rate": 5.7827730594856325e-06, + "loss": 0.2485857605934143, + "step": 4947 + }, + { + "epoch": 1.3139025361837737, + "grad_norm": 1.2087719424455774, + "learning_rate": 5.7787922499800804e-06, + "loss": 0.21256676316261292, + "step": 4948 + }, + { + "epoch": 1.3141681051653167, + "grad_norm": 1.2561271472593831, + "learning_rate": 5.774812254273604e-06, + "loss": 0.2700715661048889, + "step": 4949 + }, + { + "epoch": 1.3144336741468596, + "grad_norm": 1.072264118800501, + "learning_rate": 5.770833073133488e-06, + "loss": 0.22239381074905396, + "step": 4950 + }, + { + "epoch": 1.3146992431284026, + "grad_norm": 1.2811464089131772, + "learning_rate": 5.766854707326878e-06, + "loss": 0.22973249852657318, + "step": 4951 + }, + { + "epoch": 1.3149648121099455, + "grad_norm": 1.3904264621036453, + "learning_rate": 5.762877157620751e-06, + "loss": 0.27923673391342163, + "step": 4952 + }, + { + "epoch": 1.3152303810914885, + "grad_norm": 1.1321859486950596, + "learning_rate": 5.758900424781939e-06, + "loss": 0.23142218589782715, + "step": 4953 + }, + { + "epoch": 1.3154959500730314, + "grad_norm": 1.2732500147617782, + "learning_rate": 5.754924509577107e-06, + "loss": 0.23697996139526367, + "step": 4954 + }, + { + "epoch": 1.3157615190545744, + "grad_norm": 1.2838523265227373, + "learning_rate": 5.750949412772764e-06, + "loss": 0.27600961923599243, + "step": 4955 + }, + { + "epoch": 1.3160270880361173, + "grad_norm": 1.1644607269636458, + "learning_rate": 5.74697513513526e-06, + "loss": 0.2300705760717392, + "step": 4956 + }, + { + "epoch": 1.3162926570176603, + "grad_norm": 1.2927833273456342, + "learning_rate": 5.743001677430791e-06, + "loss": 0.2771111726760864, + "step": 4957 + }, + { + "epoch": 1.3165582259992032, + "grad_norm": 1.2582954956741819, + "learning_rate": 5.739029040425391e-06, + "loss": 0.2195657342672348, + "step": 4958 + }, + { + "epoch": 1.3168237949807462, + "grad_norm": 1.3450534906440017, + "learning_rate": 5.735057224884939e-06, + "loss": 0.2877159118652344, + "step": 4959 + }, + { + "epoch": 1.3170893639622892, + "grad_norm": 1.2211564124942835, + "learning_rate": 5.731086231575154e-06, + "loss": 0.264115571975708, + "step": 4960 + }, + { + "epoch": 1.317354932943832, + "grad_norm": 1.1286607753384608, + "learning_rate": 5.727116061261593e-06, + "loss": 0.22574637830257416, + "step": 4961 + }, + { + "epoch": 1.317620501925375, + "grad_norm": 1.3177978069758023, + "learning_rate": 5.723146714709664e-06, + "loss": 0.26063698530197144, + "step": 4962 + }, + { + "epoch": 1.317886070906918, + "grad_norm": 1.2211473527893268, + "learning_rate": 5.719178192684611e-06, + "loss": 0.26272428035736084, + "step": 4963 + }, + { + "epoch": 1.318151639888461, + "grad_norm": 1.257373941755789, + "learning_rate": 5.715210495951513e-06, + "loss": 0.27188578248023987, + "step": 4964 + }, + { + "epoch": 1.318417208870004, + "grad_norm": 1.2786927551317604, + "learning_rate": 5.711243625275296e-06, + "loss": 0.26374363899230957, + "step": 4965 + }, + { + "epoch": 1.3186827778515469, + "grad_norm": 1.2469422291735242, + "learning_rate": 5.7072775814207275e-06, + "loss": 0.24819093942642212, + "step": 4966 + }, + { + "epoch": 1.3189483468330898, + "grad_norm": 1.3834225319345155, + "learning_rate": 5.703312365152412e-06, + "loss": 0.24387019872665405, + "step": 4967 + }, + { + "epoch": 1.319213915814633, + "grad_norm": 1.2919715806670669, + "learning_rate": 5.699347977234799e-06, + "loss": 0.2198091745376587, + "step": 4968 + }, + { + "epoch": 1.319479484796176, + "grad_norm": 1.3500197578827224, + "learning_rate": 5.695384418432174e-06, + "loss": 0.24349649250507355, + "step": 4969 + }, + { + "epoch": 1.319745053777719, + "grad_norm": 1.238323956307032, + "learning_rate": 5.691421689508661e-06, + "loss": 0.2330506294965744, + "step": 4970 + }, + { + "epoch": 1.3200106227592618, + "grad_norm": 1.2015417123740977, + "learning_rate": 5.687459791228234e-06, + "loss": 0.22821848094463348, + "step": 4971 + }, + { + "epoch": 1.3202761917408048, + "grad_norm": 1.1813366864368284, + "learning_rate": 5.683498724354699e-06, + "loss": 0.2342798113822937, + "step": 4972 + }, + { + "epoch": 1.3205417607223477, + "grad_norm": 1.0659168750954966, + "learning_rate": 5.679538489651702e-06, + "loss": 0.19689922034740448, + "step": 4973 + }, + { + "epoch": 1.3208073297038907, + "grad_norm": 1.1808385090527131, + "learning_rate": 5.675579087882727e-06, + "loss": 0.23910056054592133, + "step": 4974 + }, + { + "epoch": 1.3210728986854336, + "grad_norm": 1.381638431012013, + "learning_rate": 5.671620519811105e-06, + "loss": 0.25725993514060974, + "step": 4975 + }, + { + "epoch": 1.3213384676669766, + "grad_norm": 1.3528699347449313, + "learning_rate": 5.667662786199997e-06, + "loss": 0.3030434250831604, + "step": 4976 + }, + { + "epoch": 1.3216040366485196, + "grad_norm": 1.1182092617897728, + "learning_rate": 5.6637058878124075e-06, + "loss": 0.223737433552742, + "step": 4977 + }, + { + "epoch": 1.3218696056300625, + "grad_norm": 1.07766141822832, + "learning_rate": 5.659749825411183e-06, + "loss": 0.21480265259742737, + "step": 4978 + }, + { + "epoch": 1.3221351746116055, + "grad_norm": 1.2398269968997129, + "learning_rate": 5.655794599759001e-06, + "loss": 0.23288744688034058, + "step": 4979 + }, + { + "epoch": 1.3224007435931484, + "grad_norm": 1.3344080514533678, + "learning_rate": 5.651840211618387e-06, + "loss": 0.23701068758964539, + "step": 4980 + }, + { + "epoch": 1.3226663125746914, + "grad_norm": 1.2102834630940547, + "learning_rate": 5.647886661751698e-06, + "loss": 0.22164157032966614, + "step": 4981 + }, + { + "epoch": 1.3229318815562343, + "grad_norm": 1.2096538262244674, + "learning_rate": 5.643933950921132e-06, + "loss": 0.23426607251167297, + "step": 4982 + }, + { + "epoch": 1.3231974505377773, + "grad_norm": 1.1880047089826309, + "learning_rate": 5.6399820798887266e-06, + "loss": 0.2567834258079529, + "step": 4983 + }, + { + "epoch": 1.3234630195193202, + "grad_norm": 1.3013809826248692, + "learning_rate": 5.6360310494163525e-06, + "loss": 0.2713038921356201, + "step": 4984 + }, + { + "epoch": 1.3237285885008632, + "grad_norm": 1.2908080991459006, + "learning_rate": 5.632080860265725e-06, + "loss": 0.2548249661922455, + "step": 4985 + }, + { + "epoch": 1.3239941574824061, + "grad_norm": 1.3471244082770852, + "learning_rate": 5.628131513198392e-06, + "loss": 0.2442832589149475, + "step": 4986 + }, + { + "epoch": 1.324259726463949, + "grad_norm": 1.3063670062134878, + "learning_rate": 5.6241830089757435e-06, + "loss": 0.24654853343963623, + "step": 4987 + }, + { + "epoch": 1.324525295445492, + "grad_norm": 1.2792033582455469, + "learning_rate": 5.620235348358997e-06, + "loss": 0.2802797853946686, + "step": 4988 + }, + { + "epoch": 1.324790864427035, + "grad_norm": 1.0588655062771883, + "learning_rate": 5.616288532109225e-06, + "loss": 0.18801404535770416, + "step": 4989 + }, + { + "epoch": 1.325056433408578, + "grad_norm": 1.2235746865490262, + "learning_rate": 5.6123425609873235e-06, + "loss": 0.2685382068157196, + "step": 4990 + }, + { + "epoch": 1.3253220023901209, + "grad_norm": 1.1873888072876837, + "learning_rate": 5.608397435754029e-06, + "loss": 0.23479774594306946, + "step": 4991 + }, + { + "epoch": 1.3255875713716638, + "grad_norm": 1.2164455244711625, + "learning_rate": 5.604453157169914e-06, + "loss": 0.24198031425476074, + "step": 4992 + }, + { + "epoch": 1.3258531403532068, + "grad_norm": 1.3448749532595476, + "learning_rate": 5.60050972599539e-06, + "loss": 0.25523462891578674, + "step": 4993 + }, + { + "epoch": 1.3261187093347497, + "grad_norm": 1.1695382845281797, + "learning_rate": 5.596567142990703e-06, + "loss": 0.23196743428707123, + "step": 4994 + }, + { + "epoch": 1.3263842783162927, + "grad_norm": 1.3145586744837223, + "learning_rate": 5.592625408915939e-06, + "loss": 0.29365748167037964, + "step": 4995 + }, + { + "epoch": 1.3266498472978356, + "grad_norm": 1.1946134760289593, + "learning_rate": 5.588684524531014e-06, + "loss": 0.24509185552597046, + "step": 4996 + }, + { + "epoch": 1.3269154162793786, + "grad_norm": 1.3358300509723116, + "learning_rate": 5.584744490595687e-06, + "loss": 0.27032390236854553, + "step": 4997 + }, + { + "epoch": 1.3271809852609215, + "grad_norm": 1.1645416268641489, + "learning_rate": 5.580805307869549e-06, + "loss": 0.24401508271694183, + "step": 4998 + }, + { + "epoch": 1.3274465542424645, + "grad_norm": 1.1506901325018217, + "learning_rate": 5.576866977112028e-06, + "loss": 0.2216658741235733, + "step": 4999 + }, + { + "epoch": 1.3277121232240074, + "grad_norm": 1.1830944265124126, + "learning_rate": 5.5729294990823875e-06, + "loss": 0.24545373022556305, + "step": 5000 + }, + { + "epoch": 1.3279776922055504, + "grad_norm": 1.377548009409137, + "learning_rate": 5.568992874539728e-06, + "loss": 0.260816752910614, + "step": 5001 + }, + { + "epoch": 1.3282432611870933, + "grad_norm": 1.1392730403811622, + "learning_rate": 5.565057104242984e-06, + "loss": 0.1850551962852478, + "step": 5002 + }, + { + "epoch": 1.3285088301686363, + "grad_norm": 2.1232949408605624, + "learning_rate": 5.561122188950923e-06, + "loss": 0.26854407787323, + "step": 5003 + }, + { + "epoch": 1.3287743991501793, + "grad_norm": 1.1591208934359583, + "learning_rate": 5.557188129422153e-06, + "loss": 0.24294906854629517, + "step": 5004 + }, + { + "epoch": 1.3290399681317222, + "grad_norm": 1.1880501452095942, + "learning_rate": 5.553254926415114e-06, + "loss": 0.2533603310585022, + "step": 5005 + }, + { + "epoch": 1.3293055371132652, + "grad_norm": 1.1756183262516449, + "learning_rate": 5.549322580688077e-06, + "loss": 0.2082313448190689, + "step": 5006 + }, + { + "epoch": 1.329571106094808, + "grad_norm": 1.1602290025540025, + "learning_rate": 5.545391092999158e-06, + "loss": 0.24265842139720917, + "step": 5007 + }, + { + "epoch": 1.329836675076351, + "grad_norm": 1.2321490774961563, + "learning_rate": 5.541460464106301e-06, + "loss": 0.2483578324317932, + "step": 5008 + }, + { + "epoch": 1.330102244057894, + "grad_norm": 1.2798509363454456, + "learning_rate": 5.537530694767281e-06, + "loss": 0.2769540548324585, + "step": 5009 + }, + { + "epoch": 1.330367813039437, + "grad_norm": 1.1781048091325885, + "learning_rate": 5.533601785739714e-06, + "loss": 0.2132025957107544, + "step": 5010 + }, + { + "epoch": 1.33063338202098, + "grad_norm": 1.2726887496075767, + "learning_rate": 5.529673737781047e-06, + "loss": 0.25223806500434875, + "step": 5011 + }, + { + "epoch": 1.3308989510025229, + "grad_norm": 1.13329365262538, + "learning_rate": 5.52574655164856e-06, + "loss": 0.22631296515464783, + "step": 5012 + }, + { + "epoch": 1.3311645199840658, + "grad_norm": 1.1821255064699665, + "learning_rate": 5.5218202280993725e-06, + "loss": 0.23756693303585052, + "step": 5013 + }, + { + "epoch": 1.3314300889656088, + "grad_norm": 1.2775335630974591, + "learning_rate": 5.517894767890427e-06, + "loss": 0.24746376276016235, + "step": 5014 + }, + { + "epoch": 1.3316956579471517, + "grad_norm": 1.105165815318004, + "learning_rate": 5.513970171778504e-06, + "loss": 0.21463070809841156, + "step": 5015 + }, + { + "epoch": 1.3319612269286947, + "grad_norm": 1.2090979668871258, + "learning_rate": 5.510046440520228e-06, + "loss": 0.21256107091903687, + "step": 5016 + }, + { + "epoch": 1.3322267959102376, + "grad_norm": 1.1963664670778913, + "learning_rate": 5.506123574872044e-06, + "loss": 0.25800254940986633, + "step": 5017 + }, + { + "epoch": 1.3324923648917806, + "grad_norm": 1.2726257558813519, + "learning_rate": 5.502201575590236e-06, + "loss": 0.2421891689300537, + "step": 5018 + }, + { + "epoch": 1.3327579338733235, + "grad_norm": 1.3181283061442692, + "learning_rate": 5.498280443430917e-06, + "loss": 0.24375903606414795, + "step": 5019 + }, + { + "epoch": 1.3330235028548665, + "grad_norm": 1.2419078132332353, + "learning_rate": 5.494360179150033e-06, + "loss": 0.22173303365707397, + "step": 5020 + }, + { + "epoch": 1.3332890718364094, + "grad_norm": 1.1754676882141941, + "learning_rate": 5.49044078350337e-06, + "loss": 0.24005022644996643, + "step": 5021 + }, + { + "epoch": 1.3335546408179524, + "grad_norm": 1.194558748352182, + "learning_rate": 5.486522257246538e-06, + "loss": 0.2600201964378357, + "step": 5022 + }, + { + "epoch": 1.3338202097994953, + "grad_norm": 1.2112657273591712, + "learning_rate": 5.482604601134984e-06, + "loss": 0.22889836132526398, + "step": 5023 + }, + { + "epoch": 1.3340857787810383, + "grad_norm": 1.151722502872684, + "learning_rate": 5.478687815923981e-06, + "loss": 0.25045812129974365, + "step": 5024 + }, + { + "epoch": 1.3343513477625812, + "grad_norm": 1.2499612320902753, + "learning_rate": 5.474771902368646e-06, + "loss": 0.24649837613105774, + "step": 5025 + }, + { + "epoch": 1.3346169167441242, + "grad_norm": 1.1975824340507155, + "learning_rate": 5.470856861223919e-06, + "loss": 0.23994389176368713, + "step": 5026 + }, + { + "epoch": 1.3348824857256671, + "grad_norm": 1.2488470912807048, + "learning_rate": 5.466942693244572e-06, + "loss": 0.24381600320339203, + "step": 5027 + }, + { + "epoch": 1.33514805470721, + "grad_norm": 1.1770895947351019, + "learning_rate": 5.463029399185217e-06, + "loss": 0.22110486030578613, + "step": 5028 + }, + { + "epoch": 1.335413623688753, + "grad_norm": 1.2878634690011452, + "learning_rate": 5.459116979800281e-06, + "loss": 0.25733259320259094, + "step": 5029 + }, + { + "epoch": 1.335679192670296, + "grad_norm": 1.2598918710105835, + "learning_rate": 5.4552054358440355e-06, + "loss": 0.22853803634643555, + "step": 5030 + }, + { + "epoch": 1.335944761651839, + "grad_norm": 1.3118793520277159, + "learning_rate": 5.451294768070581e-06, + "loss": 0.27503639459609985, + "step": 5031 + }, + { + "epoch": 1.336210330633382, + "grad_norm": 1.2721314541046291, + "learning_rate": 5.447384977233849e-06, + "loss": 0.27931997179985046, + "step": 5032 + }, + { + "epoch": 1.3364758996149249, + "grad_norm": 1.2287817779118972, + "learning_rate": 5.443476064087596e-06, + "loss": 0.2477954626083374, + "step": 5033 + }, + { + "epoch": 1.3367414685964678, + "grad_norm": 1.2204002745504476, + "learning_rate": 5.439568029385422e-06, + "loss": 0.2195623219013214, + "step": 5034 + }, + { + "epoch": 1.3370070375780108, + "grad_norm": 1.230653492520276, + "learning_rate": 5.435660873880747e-06, + "loss": 0.22160238027572632, + "step": 5035 + }, + { + "epoch": 1.3372726065595537, + "grad_norm": 1.6764380815480615, + "learning_rate": 5.4317545983268235e-06, + "loss": 0.24107405543327332, + "step": 5036 + }, + { + "epoch": 1.3375381755410967, + "grad_norm": 1.2985203082435115, + "learning_rate": 5.427849203476738e-06, + "loss": 0.2480086386203766, + "step": 5037 + }, + { + "epoch": 1.3378037445226398, + "grad_norm": 1.2654518356324462, + "learning_rate": 5.4239446900834005e-06, + "loss": 0.22476691007614136, + "step": 5038 + }, + { + "epoch": 1.3380693135041828, + "grad_norm": 1.217906592075979, + "learning_rate": 5.420041058899559e-06, + "loss": 0.23685473203659058, + "step": 5039 + }, + { + "epoch": 1.3383348824857257, + "grad_norm": 1.215790635675812, + "learning_rate": 5.416138310677784e-06, + "loss": 0.27753746509552, + "step": 5040 + }, + { + "epoch": 1.3386004514672687, + "grad_norm": 1.2682075315501737, + "learning_rate": 5.412236446170482e-06, + "loss": 0.22446027398109436, + "step": 5041 + }, + { + "epoch": 1.3388660204488116, + "grad_norm": 1.2214424011593596, + "learning_rate": 5.4083354661298816e-06, + "loss": 0.2535285949707031, + "step": 5042 + }, + { + "epoch": 1.3391315894303546, + "grad_norm": 1.2982364680013232, + "learning_rate": 5.4044353713080565e-06, + "loss": 0.2412964254617691, + "step": 5043 + }, + { + "epoch": 1.3393971584118975, + "grad_norm": 1.3092797704576777, + "learning_rate": 5.4005361624568895e-06, + "loss": 0.23863038420677185, + "step": 5044 + }, + { + "epoch": 1.3396627273934405, + "grad_norm": 1.159506578977356, + "learning_rate": 5.396637840328105e-06, + "loss": 0.22741727530956268, + "step": 5045 + }, + { + "epoch": 1.3399282963749835, + "grad_norm": 1.285452356277395, + "learning_rate": 5.392740405673251e-06, + "loss": 0.2497379630804062, + "step": 5046 + }, + { + "epoch": 1.3401938653565264, + "grad_norm": 1.2401289485061215, + "learning_rate": 5.388843859243712e-06, + "loss": 0.19558298587799072, + "step": 5047 + }, + { + "epoch": 1.3404594343380694, + "grad_norm": 1.2074615239750155, + "learning_rate": 5.3849482017906914e-06, + "loss": 0.2266748994588852, + "step": 5048 + }, + { + "epoch": 1.3407250033196123, + "grad_norm": 1.2657162316868396, + "learning_rate": 5.381053434065229e-06, + "loss": 0.2410028576850891, + "step": 5049 + }, + { + "epoch": 1.3409905723011553, + "grad_norm": 1.301692886719208, + "learning_rate": 5.37715955681819e-06, + "loss": 0.23965512216091156, + "step": 5050 + }, + { + "epoch": 1.3412561412826982, + "grad_norm": 1.1756365557449155, + "learning_rate": 5.373266570800262e-06, + "loss": 0.22440138459205627, + "step": 5051 + }, + { + "epoch": 1.3415217102642412, + "grad_norm": 1.2562473271519534, + "learning_rate": 5.369374476761975e-06, + "loss": 0.2509710192680359, + "step": 5052 + }, + { + "epoch": 1.3417872792457841, + "grad_norm": 1.3381440207626536, + "learning_rate": 5.365483275453677e-06, + "loss": 0.26555800437927246, + "step": 5053 + }, + { + "epoch": 1.342052848227327, + "grad_norm": 1.2240809600669689, + "learning_rate": 5.361592967625544e-06, + "loss": 0.23089733719825745, + "step": 5054 + }, + { + "epoch": 1.34231841720887, + "grad_norm": 1.1178692263054482, + "learning_rate": 5.357703554027582e-06, + "loss": 0.2040700763463974, + "step": 5055 + }, + { + "epoch": 1.342583986190413, + "grad_norm": 1.309704975193781, + "learning_rate": 5.353815035409624e-06, + "loss": 0.23539039492607117, + "step": 5056 + }, + { + "epoch": 1.342849555171956, + "grad_norm": 1.7065922202358847, + "learning_rate": 5.3499274125213294e-06, + "loss": 0.2190464437007904, + "step": 5057 + }, + { + "epoch": 1.3431151241534989, + "grad_norm": 1.1478595499251703, + "learning_rate": 5.346040686112189e-06, + "loss": 0.21557429432868958, + "step": 5058 + }, + { + "epoch": 1.3433806931350418, + "grad_norm": 1.1934269644730748, + "learning_rate": 5.342154856931515e-06, + "loss": 0.24398267269134521, + "step": 5059 + }, + { + "epoch": 1.3436462621165848, + "grad_norm": 1.1089059625649784, + "learning_rate": 5.338269925728451e-06, + "loss": 0.21652038395404816, + "step": 5060 + }, + { + "epoch": 1.3439118310981277, + "grad_norm": 1.1937531358219302, + "learning_rate": 5.334385893251966e-06, + "loss": 0.2031325101852417, + "step": 5061 + }, + { + "epoch": 1.3441774000796707, + "grad_norm": 1.1621991357090053, + "learning_rate": 5.330502760250853e-06, + "loss": 0.2484835982322693, + "step": 5062 + }, + { + "epoch": 1.3444429690612136, + "grad_norm": 1.2657742595884374, + "learning_rate": 5.326620527473737e-06, + "loss": 0.23698699474334717, + "step": 5063 + }, + { + "epoch": 1.3447085380427566, + "grad_norm": 1.2000433743668328, + "learning_rate": 5.322739195669065e-06, + "loss": 0.23928484320640564, + "step": 5064 + }, + { + "epoch": 1.3449741070242995, + "grad_norm": 1.1828146199314795, + "learning_rate": 5.318858765585115e-06, + "loss": 0.22679512202739716, + "step": 5065 + }, + { + "epoch": 1.3452396760058425, + "grad_norm": 1.2334385564497414, + "learning_rate": 5.314979237969984e-06, + "loss": 0.2115025818347931, + "step": 5066 + }, + { + "epoch": 1.3455052449873854, + "grad_norm": 1.261129899382787, + "learning_rate": 5.311100613571603e-06, + "loss": 0.2441834807395935, + "step": 5067 + }, + { + "epoch": 1.3457708139689284, + "grad_norm": 1.2722125718860966, + "learning_rate": 5.307222893137722e-06, + "loss": 0.2549205720424652, + "step": 5068 + }, + { + "epoch": 1.3460363829504713, + "grad_norm": 1.179054242584843, + "learning_rate": 5.3033460774159185e-06, + "loss": 0.24652990698814392, + "step": 5069 + }, + { + "epoch": 1.3463019519320143, + "grad_norm": 1.2062419936470874, + "learning_rate": 5.299470167153602e-06, + "loss": 0.2403775006532669, + "step": 5070 + }, + { + "epoch": 1.3465675209135572, + "grad_norm": 1.1208895570259512, + "learning_rate": 5.295595163097999e-06, + "loss": 0.2215663194656372, + "step": 5071 + }, + { + "epoch": 1.3468330898951002, + "grad_norm": 1.2914937229567889, + "learning_rate": 5.291721065996167e-06, + "loss": 0.2567424774169922, + "step": 5072 + }, + { + "epoch": 1.3470986588766432, + "grad_norm": 1.0608079556396839, + "learning_rate": 5.287847876594984e-06, + "loss": 0.21162359416484833, + "step": 5073 + }, + { + "epoch": 1.347364227858186, + "grad_norm": 1.221049341797181, + "learning_rate": 5.283975595641155e-06, + "loss": 0.21851085126399994, + "step": 5074 + }, + { + "epoch": 1.347629796839729, + "grad_norm": 1.2935501467753354, + "learning_rate": 5.280104223881212e-06, + "loss": 0.2491171509027481, + "step": 5075 + }, + { + "epoch": 1.347895365821272, + "grad_norm": 1.2921255335421646, + "learning_rate": 5.276233762061507e-06, + "loss": 0.22467780113220215, + "step": 5076 + }, + { + "epoch": 1.348160934802815, + "grad_norm": 1.159790816626821, + "learning_rate": 5.272364210928223e-06, + "loss": 0.24531611800193787, + "step": 5077 + }, + { + "epoch": 1.348426503784358, + "grad_norm": 1.2178282841242851, + "learning_rate": 5.268495571227361e-06, + "loss": 0.2582520544528961, + "step": 5078 + }, + { + "epoch": 1.3486920727659009, + "grad_norm": 1.2175282778251775, + "learning_rate": 5.264627843704749e-06, + "loss": 0.21180811524391174, + "step": 5079 + }, + { + "epoch": 1.348957641747444, + "grad_norm": 1.2942378328530906, + "learning_rate": 5.2607610291060406e-06, + "loss": 0.27026671171188354, + "step": 5080 + }, + { + "epoch": 1.349223210728987, + "grad_norm": 1.1721525183169563, + "learning_rate": 5.256895128176712e-06, + "loss": 0.22954419255256653, + "step": 5081 + }, + { + "epoch": 1.34948877971053, + "grad_norm": 1.3561853541918854, + "learning_rate": 5.253030141662063e-06, + "loss": 0.24064484238624573, + "step": 5082 + }, + { + "epoch": 1.349754348692073, + "grad_norm": 1.1245550279116328, + "learning_rate": 5.249166070307218e-06, + "loss": 0.1981196105480194, + "step": 5083 + }, + { + "epoch": 1.3500199176736158, + "grad_norm": 1.0881909699390468, + "learning_rate": 5.2453029148571226e-06, + "loss": 0.19882233440876007, + "step": 5084 + }, + { + "epoch": 1.3502854866551588, + "grad_norm": 1.2123536275051694, + "learning_rate": 5.24144067605655e-06, + "loss": 0.2409907579421997, + "step": 5085 + }, + { + "epoch": 1.3505510556367017, + "grad_norm": 1.2197874501412473, + "learning_rate": 5.237579354650092e-06, + "loss": 0.2205093652009964, + "step": 5086 + }, + { + "epoch": 1.3508166246182447, + "grad_norm": 1.4716074796051495, + "learning_rate": 5.233718951382163e-06, + "loss": 0.2283058911561966, + "step": 5087 + }, + { + "epoch": 1.3510821935997877, + "grad_norm": 1.2561007307780203, + "learning_rate": 5.229859466997012e-06, + "loss": 0.25584474205970764, + "step": 5088 + }, + { + "epoch": 1.3513477625813306, + "grad_norm": 1.1491167817661179, + "learning_rate": 5.226000902238696e-06, + "loss": 0.22516845166683197, + "step": 5089 + }, + { + "epoch": 1.3516133315628736, + "grad_norm": 1.2604818786719383, + "learning_rate": 5.222143257851102e-06, + "loss": 0.23440764844417572, + "step": 5090 + }, + { + "epoch": 1.3518789005444165, + "grad_norm": 1.2156754572685655, + "learning_rate": 5.218286534577938e-06, + "loss": 0.25858962535858154, + "step": 5091 + }, + { + "epoch": 1.3521444695259595, + "grad_norm": 1.1425154357949754, + "learning_rate": 5.214430733162736e-06, + "loss": 0.20676326751708984, + "step": 5092 + }, + { + "epoch": 1.3524100385075024, + "grad_norm": 1.1266241214136956, + "learning_rate": 5.210575854348853e-06, + "loss": 0.21892425417900085, + "step": 5093 + }, + { + "epoch": 1.3526756074890454, + "grad_norm": 1.2379350388596377, + "learning_rate": 5.206721898879454e-06, + "loss": 0.2538335919380188, + "step": 5094 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.2059035716196298, + "learning_rate": 5.202868867497542e-06, + "loss": 0.24750448763370514, + "step": 5095 + }, + { + "epoch": 1.3532067454521313, + "grad_norm": 1.2602608504342458, + "learning_rate": 5.199016760945931e-06, + "loss": 0.2569364011287689, + "step": 5096 + }, + { + "epoch": 1.3534723144336742, + "grad_norm": 0.9860855220263709, + "learning_rate": 5.19516557996727e-06, + "loss": 0.16788914799690247, + "step": 5097 + }, + { + "epoch": 1.3537378834152172, + "grad_norm": 1.0020852845957948, + "learning_rate": 5.191315325304018e-06, + "loss": 0.19006651639938354, + "step": 5098 + }, + { + "epoch": 1.3540034523967601, + "grad_norm": 1.187896658740898, + "learning_rate": 5.1874659976984575e-06, + "loss": 0.23474551737308502, + "step": 5099 + }, + { + "epoch": 1.354269021378303, + "grad_norm": 1.2829971661643687, + "learning_rate": 5.183617597892694e-06, + "loss": 0.26601099967956543, + "step": 5100 + }, + { + "epoch": 1.354534590359846, + "grad_norm": 1.1758855450162613, + "learning_rate": 5.179770126628654e-06, + "loss": 0.24207550287246704, + "step": 5101 + }, + { + "epoch": 1.354800159341389, + "grad_norm": 1.2535446057143411, + "learning_rate": 5.175923584648083e-06, + "loss": 0.2538307309150696, + "step": 5102 + }, + { + "epoch": 1.355065728322932, + "grad_norm": 1.1865818667829109, + "learning_rate": 5.172077972692553e-06, + "loss": 0.23073242604732513, + "step": 5103 + }, + { + "epoch": 1.3553312973044749, + "grad_norm": 1.348848385270533, + "learning_rate": 5.168233291503448e-06, + "loss": 0.2634595036506653, + "step": 5104 + }, + { + "epoch": 1.3555968662860178, + "grad_norm": 1.225057907199874, + "learning_rate": 5.1643895418219744e-06, + "loss": 0.23282350599765778, + "step": 5105 + }, + { + "epoch": 1.3558624352675608, + "grad_norm": 1.333152685269679, + "learning_rate": 5.160546724389172e-06, + "loss": 0.2543700933456421, + "step": 5106 + }, + { + "epoch": 1.3561280042491037, + "grad_norm": 1.1449256417555271, + "learning_rate": 5.1567048399458855e-06, + "loss": 0.2005772739648819, + "step": 5107 + }, + { + "epoch": 1.3563935732306467, + "grad_norm": 1.2429630346358373, + "learning_rate": 5.152863889232787e-06, + "loss": 0.2367073893547058, + "step": 5108 + }, + { + "epoch": 1.3566591422121896, + "grad_norm": 1.2839253544945022, + "learning_rate": 5.14902387299036e-06, + "loss": 0.25600770115852356, + "step": 5109 + }, + { + "epoch": 1.3569247111937326, + "grad_norm": 1.198566513294344, + "learning_rate": 5.145184791958918e-06, + "loss": 0.21678754687309265, + "step": 5110 + }, + { + "epoch": 1.3571902801752755, + "grad_norm": 1.3894724787206996, + "learning_rate": 5.141346646878591e-06, + "loss": 0.265438973903656, + "step": 5111 + }, + { + "epoch": 1.3574558491568185, + "grad_norm": 1.1239736089383028, + "learning_rate": 5.13750943848933e-06, + "loss": 0.24246999621391296, + "step": 5112 + }, + { + "epoch": 1.3577214181383614, + "grad_norm": 1.299396280421792, + "learning_rate": 5.133673167530899e-06, + "loss": 0.25401771068573, + "step": 5113 + }, + { + "epoch": 1.3579869871199044, + "grad_norm": 1.2329813534125698, + "learning_rate": 5.129837834742885e-06, + "loss": 0.2698017656803131, + "step": 5114 + }, + { + "epoch": 1.3582525561014474, + "grad_norm": 1.2787210937788358, + "learning_rate": 5.126003440864703e-06, + "loss": 0.27006995677948, + "step": 5115 + }, + { + "epoch": 1.3585181250829903, + "grad_norm": 1.2695682196385796, + "learning_rate": 5.122169986635575e-06, + "loss": 0.2370866984128952, + "step": 5116 + }, + { + "epoch": 1.3587836940645333, + "grad_norm": 1.3031561376922138, + "learning_rate": 5.1183374727945425e-06, + "loss": 0.24017807841300964, + "step": 5117 + }, + { + "epoch": 1.3590492630460762, + "grad_norm": 1.1487956614446662, + "learning_rate": 5.114505900080473e-06, + "loss": 0.21664533019065857, + "step": 5118 + }, + { + "epoch": 1.3593148320276192, + "grad_norm": 4.246209132455192, + "learning_rate": 5.110675269232046e-06, + "loss": 0.24561598896980286, + "step": 5119 + }, + { + "epoch": 1.359580401009162, + "grad_norm": 1.3902415348604562, + "learning_rate": 5.106845580987763e-06, + "loss": 0.26678937673568726, + "step": 5120 + }, + { + "epoch": 1.359845969990705, + "grad_norm": 1.354168350096278, + "learning_rate": 5.103016836085943e-06, + "loss": 0.21919070184230804, + "step": 5121 + }, + { + "epoch": 1.360111538972248, + "grad_norm": 1.3057665036353723, + "learning_rate": 5.099189035264722e-06, + "loss": 0.24887943267822266, + "step": 5122 + }, + { + "epoch": 1.360377107953791, + "grad_norm": 1.2017875007060346, + "learning_rate": 5.0953621792620556e-06, + "loss": 0.23597784340381622, + "step": 5123 + }, + { + "epoch": 1.360642676935334, + "grad_norm": 1.2098630506546966, + "learning_rate": 5.091536268815717e-06, + "loss": 0.21265193819999695, + "step": 5124 + }, + { + "epoch": 1.3609082459168769, + "grad_norm": 1.3606980074054404, + "learning_rate": 5.0877113046632945e-06, + "loss": 0.29837465286254883, + "step": 5125 + }, + { + "epoch": 1.3611738148984198, + "grad_norm": 1.1915793844006848, + "learning_rate": 5.0838872875421975e-06, + "loss": 0.2324269413948059, + "step": 5126 + }, + { + "epoch": 1.3614393838799628, + "grad_norm": 1.0970197687294143, + "learning_rate": 5.080064218189652e-06, + "loss": 0.19149541854858398, + "step": 5127 + }, + { + "epoch": 1.3617049528615057, + "grad_norm": 1.1710303609542994, + "learning_rate": 5.0762420973427e-06, + "loss": 0.247644305229187, + "step": 5128 + }, + { + "epoch": 1.3619705218430487, + "grad_norm": 1.1403838601028529, + "learning_rate": 5.0724209257382006e-06, + "loss": 0.2272202968597412, + "step": 5129 + }, + { + "epoch": 1.3622360908245916, + "grad_norm": 1.2012952880900256, + "learning_rate": 5.068600704112832e-06, + "loss": 0.25735989212989807, + "step": 5130 + }, + { + "epoch": 1.3625016598061346, + "grad_norm": 1.1771555574179005, + "learning_rate": 5.064781433203086e-06, + "loss": 0.19970473647117615, + "step": 5131 + }, + { + "epoch": 1.3627672287876775, + "grad_norm": 1.2156620394191346, + "learning_rate": 5.060963113745272e-06, + "loss": 0.24289372563362122, + "step": 5132 + }, + { + "epoch": 1.3630327977692205, + "grad_norm": 1.2352988713677027, + "learning_rate": 5.0571457464755226e-06, + "loss": 0.2757350504398346, + "step": 5133 + }, + { + "epoch": 1.3632983667507634, + "grad_norm": 1.2115447809386193, + "learning_rate": 5.053329332129777e-06, + "loss": 0.24552851915359497, + "step": 5134 + }, + { + "epoch": 1.3635639357323064, + "grad_norm": 1.1546263092618338, + "learning_rate": 5.049513871443797e-06, + "loss": 0.22152797877788544, + "step": 5135 + }, + { + "epoch": 1.3638295047138493, + "grad_norm": 1.2567398712194906, + "learning_rate": 5.045699365153155e-06, + "loss": 0.27098602056503296, + "step": 5136 + }, + { + "epoch": 1.3640950736953923, + "grad_norm": 1.201852433475055, + "learning_rate": 5.041885813993246e-06, + "loss": 0.21275216341018677, + "step": 5137 + }, + { + "epoch": 1.3643606426769352, + "grad_norm": 1.3326670101473788, + "learning_rate": 5.038073218699275e-06, + "loss": 0.2510162591934204, + "step": 5138 + }, + { + "epoch": 1.3646262116584782, + "grad_norm": 1.2702563681918038, + "learning_rate": 5.034261580006269e-06, + "loss": 0.23203429579734802, + "step": 5139 + }, + { + "epoch": 1.3648917806400211, + "grad_norm": 1.137285489869793, + "learning_rate": 5.030450898649064e-06, + "loss": 0.22178995609283447, + "step": 5140 + }, + { + "epoch": 1.365157349621564, + "grad_norm": 1.2415754400243457, + "learning_rate": 5.026641175362316e-06, + "loss": 0.2567412257194519, + "step": 5141 + }, + { + "epoch": 1.365422918603107, + "grad_norm": 1.232487080143156, + "learning_rate": 5.022832410880494e-06, + "loss": 0.21939827501773834, + "step": 5142 + }, + { + "epoch": 1.36568848758465, + "grad_norm": 1.4733425270104286, + "learning_rate": 5.019024605937882e-06, + "loss": 0.2325637936592102, + "step": 5143 + }, + { + "epoch": 1.365954056566193, + "grad_norm": 1.266575596941496, + "learning_rate": 5.015217761268582e-06, + "loss": 0.2416393756866455, + "step": 5144 + }, + { + "epoch": 1.366219625547736, + "grad_norm": 1.289260413423763, + "learning_rate": 5.011411877606507e-06, + "loss": 0.2439568042755127, + "step": 5145 + }, + { + "epoch": 1.3664851945292789, + "grad_norm": 1.1439689034996021, + "learning_rate": 5.007606955685387e-06, + "loss": 0.2495957612991333, + "step": 5146 + }, + { + "epoch": 1.3667507635108218, + "grad_norm": 1.1937127912858143, + "learning_rate": 5.003802996238766e-06, + "loss": 0.23415328562259674, + "step": 5147 + }, + { + "epoch": 1.3670163324923648, + "grad_norm": 1.26410321081345, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2637922465801239, + "step": 5148 + }, + { + "epoch": 1.3672819014739077, + "grad_norm": 1.243307173830296, + "learning_rate": 4.9961979677022696e-06, + "loss": 0.2319526970386505, + "step": 5149 + }, + { + "epoch": 1.3675474704554509, + "grad_norm": 1.2115383829826751, + "learning_rate": 4.992396900078551e-06, + "loss": 0.2338445484638214, + "step": 5150 + }, + { + "epoch": 1.3678130394369938, + "grad_norm": 1.1683439299091893, + "learning_rate": 4.988596797861654e-06, + "loss": 0.19041961431503296, + "step": 5151 + }, + { + "epoch": 1.3680786084185368, + "grad_norm": 1.233073404450011, + "learning_rate": 4.984797661784191e-06, + "loss": 0.2698138952255249, + "step": 5152 + }, + { + "epoch": 1.3683441774000797, + "grad_norm": 1.2592426315358647, + "learning_rate": 4.980999492578588e-06, + "loss": 0.2208167165517807, + "step": 5153 + }, + { + "epoch": 1.3686097463816227, + "grad_norm": 1.1935159953807641, + "learning_rate": 4.9772022909770915e-06, + "loss": 0.2515152096748352, + "step": 5154 + }, + { + "epoch": 1.3688753153631656, + "grad_norm": 1.3110804278343313, + "learning_rate": 4.973406057711755e-06, + "loss": 0.2393365204334259, + "step": 5155 + }, + { + "epoch": 1.3691408843447086, + "grad_norm": 1.302037077529998, + "learning_rate": 4.969610793514446e-06, + "loss": 0.24546492099761963, + "step": 5156 + }, + { + "epoch": 1.3694064533262515, + "grad_norm": 1.5300417364025873, + "learning_rate": 4.965816499116849e-06, + "loss": 0.252412348985672, + "step": 5157 + }, + { + "epoch": 1.3696720223077945, + "grad_norm": 1.1552882128683561, + "learning_rate": 4.962023175250461e-06, + "loss": 0.22654281556606293, + "step": 5158 + }, + { + "epoch": 1.3699375912893375, + "grad_norm": 1.2873880265204376, + "learning_rate": 4.958230822646581e-06, + "loss": 0.2542813718318939, + "step": 5159 + }, + { + "epoch": 1.3702031602708804, + "grad_norm": 1.2851879635778218, + "learning_rate": 4.9544394420363395e-06, + "loss": 0.25376224517822266, + "step": 5160 + }, + { + "epoch": 1.3704687292524234, + "grad_norm": 1.252574665809313, + "learning_rate": 4.950649034150666e-06, + "loss": 0.21911674737930298, + "step": 5161 + }, + { + "epoch": 1.3707342982339663, + "grad_norm": 1.3527776455922371, + "learning_rate": 4.946859599720308e-06, + "loss": 0.2805126905441284, + "step": 5162 + }, + { + "epoch": 1.3709998672155093, + "grad_norm": 1.1716388954292443, + "learning_rate": 4.943071139475824e-06, + "loss": 0.2189590483903885, + "step": 5163 + }, + { + "epoch": 1.3712654361970522, + "grad_norm": 1.2218109142926636, + "learning_rate": 4.939283654147582e-06, + "loss": 0.21837599575519562, + "step": 5164 + }, + { + "epoch": 1.3715310051785952, + "grad_norm": 1.2779646624690562, + "learning_rate": 4.935497144465766e-06, + "loss": 0.25090983510017395, + "step": 5165 + }, + { + "epoch": 1.3717965741601381, + "grad_norm": 1.1988734011828608, + "learning_rate": 4.93171161116037e-06, + "loss": 0.22028754651546478, + "step": 5166 + }, + { + "epoch": 1.372062143141681, + "grad_norm": 1.1554753760684375, + "learning_rate": 4.927927054961201e-06, + "loss": 0.20097196102142334, + "step": 5167 + }, + { + "epoch": 1.372327712123224, + "grad_norm": 1.209557738779129, + "learning_rate": 4.924143476597872e-06, + "loss": 0.230082705616951, + "step": 5168 + }, + { + "epoch": 1.372593281104767, + "grad_norm": 1.1549715219295726, + "learning_rate": 4.920360876799821e-06, + "loss": 0.23701804876327515, + "step": 5169 + }, + { + "epoch": 1.37285885008631, + "grad_norm": 1.2740998730652584, + "learning_rate": 4.9165792562962834e-06, + "loss": 0.22357231378555298, + "step": 5170 + }, + { + "epoch": 1.3731244190678529, + "grad_norm": 1.2042473616661704, + "learning_rate": 4.912798615816312e-06, + "loss": 0.2533026337623596, + "step": 5171 + }, + { + "epoch": 1.3733899880493958, + "grad_norm": 1.3342025781776312, + "learning_rate": 4.90901895608877e-06, + "loss": 0.24878138303756714, + "step": 5172 + }, + { + "epoch": 1.3736555570309388, + "grad_norm": 1.5415419516618216, + "learning_rate": 4.905240277842335e-06, + "loss": 0.22641420364379883, + "step": 5173 + }, + { + "epoch": 1.3739211260124817, + "grad_norm": 1.2916997982097302, + "learning_rate": 4.901462581805483e-06, + "loss": 0.24495793879032135, + "step": 5174 + }, + { + "epoch": 1.3741866949940247, + "grad_norm": 1.3531795848957913, + "learning_rate": 4.897685868706512e-06, + "loss": 0.2688868045806885, + "step": 5175 + }, + { + "epoch": 1.3744522639755676, + "grad_norm": 1.2828126418821555, + "learning_rate": 4.893910139273531e-06, + "loss": 0.25796642899513245, + "step": 5176 + }, + { + "epoch": 1.3747178329571106, + "grad_norm": 1.4091718050104127, + "learning_rate": 4.890135394234451e-06, + "loss": 0.27557405829429626, + "step": 5177 + }, + { + "epoch": 1.3749834019386535, + "grad_norm": 1.620605499986823, + "learning_rate": 4.886361634317004e-06, + "loss": 0.23553809523582458, + "step": 5178 + }, + { + "epoch": 1.3752489709201965, + "grad_norm": 1.2608742989736732, + "learning_rate": 4.882588860248725e-06, + "loss": 0.2454400360584259, + "step": 5179 + }, + { + "epoch": 1.3755145399017394, + "grad_norm": 1.1743865548501493, + "learning_rate": 4.878817072756959e-06, + "loss": 0.19460657238960266, + "step": 5180 + }, + { + "epoch": 1.3757801088832824, + "grad_norm": 1.2528300475452, + "learning_rate": 4.875046272568863e-06, + "loss": 0.24833449721336365, + "step": 5181 + }, + { + "epoch": 1.3760456778648253, + "grad_norm": 1.3263672125712147, + "learning_rate": 4.871276460411403e-06, + "loss": 0.2774161994457245, + "step": 5182 + }, + { + "epoch": 1.3763112468463683, + "grad_norm": 2.6268834337513667, + "learning_rate": 4.867507637011353e-06, + "loss": 0.2277964949607849, + "step": 5183 + }, + { + "epoch": 1.3765768158279112, + "grad_norm": 1.8924198767245841, + "learning_rate": 4.863739803095299e-06, + "loss": 0.2176733911037445, + "step": 5184 + }, + { + "epoch": 1.3768423848094542, + "grad_norm": 1.3153810073025014, + "learning_rate": 4.859972959389634e-06, + "loss": 0.23529113829135895, + "step": 5185 + }, + { + "epoch": 1.3771079537909972, + "grad_norm": 1.3909544444662505, + "learning_rate": 4.856207106620557e-06, + "loss": 0.2646695077419281, + "step": 5186 + }, + { + "epoch": 1.37737352277254, + "grad_norm": 1.2095108180861869, + "learning_rate": 4.852442245514093e-06, + "loss": 0.23179873824119568, + "step": 5187 + }, + { + "epoch": 1.377639091754083, + "grad_norm": 1.1084014698771758, + "learning_rate": 4.84867837679605e-06, + "loss": 0.2127494066953659, + "step": 5188 + }, + { + "epoch": 1.377904660735626, + "grad_norm": 1.2275201950569183, + "learning_rate": 4.844915501192062e-06, + "loss": 0.2204679548740387, + "step": 5189 + }, + { + "epoch": 1.378170229717169, + "grad_norm": 1.2078653060668294, + "learning_rate": 4.841153619427567e-06, + "loss": 0.20271794497966766, + "step": 5190 + }, + { + "epoch": 1.378435798698712, + "grad_norm": 1.4269963155687142, + "learning_rate": 4.837392732227811e-06, + "loss": 0.2785792052745819, + "step": 5191 + }, + { + "epoch": 1.3787013676802549, + "grad_norm": 1.2501319487764966, + "learning_rate": 4.8336328403178486e-06, + "loss": 0.24904468655586243, + "step": 5192 + }, + { + "epoch": 1.378966936661798, + "grad_norm": 1.1230965332904321, + "learning_rate": 4.829873944422544e-06, + "loss": 0.20045346021652222, + "step": 5193 + }, + { + "epoch": 1.379232505643341, + "grad_norm": 1.1339816903135191, + "learning_rate": 4.826116045266565e-06, + "loss": 0.21814313530921936, + "step": 5194 + }, + { + "epoch": 1.379498074624884, + "grad_norm": 1.236126479276255, + "learning_rate": 4.82235914357439e-06, + "loss": 0.2408592253923416, + "step": 5195 + }, + { + "epoch": 1.379763643606427, + "grad_norm": 1.1229995433845732, + "learning_rate": 4.818603240070311e-06, + "loss": 0.21453416347503662, + "step": 5196 + }, + { + "epoch": 1.3800292125879698, + "grad_norm": 1.2915687788203387, + "learning_rate": 4.814848335478418e-06, + "loss": 0.2578599154949188, + "step": 5197 + }, + { + "epoch": 1.3802947815695128, + "grad_norm": 1.0696662022967476, + "learning_rate": 4.811094430522613e-06, + "loss": 0.1980094015598297, + "step": 5198 + }, + { + "epoch": 1.3805603505510557, + "grad_norm": 1.202740960535961, + "learning_rate": 4.807341525926604e-06, + "loss": 0.24620960652828217, + "step": 5199 + }, + { + "epoch": 1.3808259195325987, + "grad_norm": 1.2486655803425535, + "learning_rate": 4.803589622413908e-06, + "loss": 0.23525282740592957, + "step": 5200 + }, + { + "epoch": 1.3810914885141417, + "grad_norm": 1.1657735912575689, + "learning_rate": 4.799838720707847e-06, + "loss": 0.2277744859457016, + "step": 5201 + }, + { + "epoch": 1.3813570574956846, + "grad_norm": 1.2927728942283212, + "learning_rate": 4.796088821531549e-06, + "loss": 0.2727074921131134, + "step": 5202 + }, + { + "epoch": 1.3816226264772276, + "grad_norm": 1.2370931993726209, + "learning_rate": 4.7923399256079525e-06, + "loss": 0.21686753630638123, + "step": 5203 + }, + { + "epoch": 1.3818881954587705, + "grad_norm": 1.2572583885252075, + "learning_rate": 4.788592033659799e-06, + "loss": 0.2841380834579468, + "step": 5204 + }, + { + "epoch": 1.3821537644403135, + "grad_norm": 1.1157272204593003, + "learning_rate": 4.78484514640964e-06, + "loss": 0.24577853083610535, + "step": 5205 + }, + { + "epoch": 1.3824193334218564, + "grad_norm": 1.2077705032221964, + "learning_rate": 4.7810992645798285e-06, + "loss": 0.22289782762527466, + "step": 5206 + }, + { + "epoch": 1.3826849024033994, + "grad_norm": 1.1476107334002954, + "learning_rate": 4.7773543888925274e-06, + "loss": 0.2223999947309494, + "step": 5207 + }, + { + "epoch": 1.3829504713849423, + "grad_norm": 1.2183085137487102, + "learning_rate": 4.773610520069706e-06, + "loss": 0.23938870429992676, + "step": 5208 + }, + { + "epoch": 1.3832160403664853, + "grad_norm": 1.219370193725879, + "learning_rate": 4.769867658833136e-06, + "loss": 0.260856568813324, + "step": 5209 + }, + { + "epoch": 1.3834816093480282, + "grad_norm": 1.2333269697463725, + "learning_rate": 4.766125805904398e-06, + "loss": 0.23602089285850525, + "step": 5210 + }, + { + "epoch": 1.3837471783295712, + "grad_norm": 1.156747833138865, + "learning_rate": 4.762384962004877e-06, + "loss": 0.22543978691101074, + "step": 5211 + }, + { + "epoch": 1.3840127473111141, + "grad_norm": 1.3639051201807257, + "learning_rate": 4.758645127855763e-06, + "loss": 0.2432224452495575, + "step": 5212 + }, + { + "epoch": 1.384278316292657, + "grad_norm": 1.3947016936895973, + "learning_rate": 4.754906304178049e-06, + "loss": 0.22764597833156586, + "step": 5213 + }, + { + "epoch": 1.3845438852742, + "grad_norm": 1.2064067504011344, + "learning_rate": 4.751168491692541e-06, + "loss": 0.22503387928009033, + "step": 5214 + }, + { + "epoch": 1.384809454255743, + "grad_norm": 1.1066861130484609, + "learning_rate": 4.747431691119846e-06, + "loss": 0.21889932453632355, + "step": 5215 + }, + { + "epoch": 1.385075023237286, + "grad_norm": 1.3903278318809302, + "learning_rate": 4.743695903180372e-06, + "loss": 0.2695825695991516, + "step": 5216 + }, + { + "epoch": 1.3853405922188289, + "grad_norm": 1.2921759622470506, + "learning_rate": 4.739961128594336e-06, + "loss": 0.265118271112442, + "step": 5217 + }, + { + "epoch": 1.3856061612003718, + "grad_norm": 1.1349207398090602, + "learning_rate": 4.736227368081757e-06, + "loss": 0.2050788253545761, + "step": 5218 + }, + { + "epoch": 1.3858717301819148, + "grad_norm": 1.23951121142384, + "learning_rate": 4.7324946223624625e-06, + "loss": 0.274588406085968, + "step": 5219 + }, + { + "epoch": 1.3861372991634577, + "grad_norm": 1.209560473571303, + "learning_rate": 4.728762892156079e-06, + "loss": 0.2242514044046402, + "step": 5220 + }, + { + "epoch": 1.3864028681450007, + "grad_norm": 1.1337174836883812, + "learning_rate": 4.725032178182042e-06, + "loss": 0.19989261031150818, + "step": 5221 + }, + { + "epoch": 1.3866684371265436, + "grad_norm": 1.1989339880554155, + "learning_rate": 4.721302481159588e-06, + "loss": 0.24409207701683044, + "step": 5222 + }, + { + "epoch": 1.3869340061080866, + "grad_norm": 1.2425140627800753, + "learning_rate": 4.71757380180776e-06, + "loss": 0.25146353244781494, + "step": 5223 + }, + { + "epoch": 1.3871995750896295, + "grad_norm": 1.245669068902739, + "learning_rate": 4.713846140845401e-06, + "loss": 0.23076622188091278, + "step": 5224 + }, + { + "epoch": 1.3874651440711725, + "grad_norm": 1.1122357580396618, + "learning_rate": 4.7101194989911635e-06, + "loss": 0.2159188687801361, + "step": 5225 + }, + { + "epoch": 1.3877307130527154, + "grad_norm": 1.433039209205417, + "learning_rate": 4.706393876963497e-06, + "loss": 0.24891307950019836, + "step": 5226 + }, + { + "epoch": 1.3879962820342584, + "grad_norm": 1.2167285098476437, + "learning_rate": 4.702669275480659e-06, + "loss": 0.26254773139953613, + "step": 5227 + }, + { + "epoch": 1.3882618510158014, + "grad_norm": 1.0872799599118763, + "learning_rate": 4.698945695260709e-06, + "loss": 0.19589121639728546, + "step": 5228 + }, + { + "epoch": 1.3885274199973443, + "grad_norm": 1.273899860234835, + "learning_rate": 4.695223137021509e-06, + "loss": 0.23796147108078003, + "step": 5229 + }, + { + "epoch": 1.3887929889788873, + "grad_norm": 1.1566738109261303, + "learning_rate": 4.6915016014807235e-06, + "loss": 0.21211156249046326, + "step": 5230 + }, + { + "epoch": 1.3890585579604302, + "grad_norm": 1.1477189909918881, + "learning_rate": 4.687781089355817e-06, + "loss": 0.22418555617332458, + "step": 5231 + }, + { + "epoch": 1.3893241269419732, + "grad_norm": 1.1999712861158167, + "learning_rate": 4.68406160136407e-06, + "loss": 0.24140511453151703, + "step": 5232 + }, + { + "epoch": 1.389589695923516, + "grad_norm": 1.3515422291949701, + "learning_rate": 4.68034313822255e-06, + "loss": 0.2863473892211914, + "step": 5233 + }, + { + "epoch": 1.389855264905059, + "grad_norm": 1.1002404477789451, + "learning_rate": 4.676625700648133e-06, + "loss": 0.21283546090126038, + "step": 5234 + }, + { + "epoch": 1.390120833886602, + "grad_norm": 1.311958297113244, + "learning_rate": 4.672909289357498e-06, + "loss": 0.2701990008354187, + "step": 5235 + }, + { + "epoch": 1.390386402868145, + "grad_norm": 1.1672674472381515, + "learning_rate": 4.669193905067124e-06, + "loss": 0.23807264864444733, + "step": 5236 + }, + { + "epoch": 1.390651971849688, + "grad_norm": 1.3282268361230456, + "learning_rate": 4.665479548493298e-06, + "loss": 0.22204206883907318, + "step": 5237 + }, + { + "epoch": 1.3909175408312309, + "grad_norm": 1.2590492281878678, + "learning_rate": 4.661766220352098e-06, + "loss": 0.22389569878578186, + "step": 5238 + }, + { + "epoch": 1.3911831098127738, + "grad_norm": 1.2844920522393721, + "learning_rate": 4.65805392135941e-06, + "loss": 0.23752997815608978, + "step": 5239 + }, + { + "epoch": 1.3914486787943168, + "grad_norm": 1.8677910056359206, + "learning_rate": 4.654342652230921e-06, + "loss": 0.24055880308151245, + "step": 5240 + }, + { + "epoch": 1.3917142477758597, + "grad_norm": 1.2030621240735913, + "learning_rate": 4.6506324136821255e-06, + "loss": 0.22136151790618896, + "step": 5241 + }, + { + "epoch": 1.3919798167574027, + "grad_norm": 1.299031121789001, + "learning_rate": 4.646923206428311e-06, + "loss": 0.2616429924964905, + "step": 5242 + }, + { + "epoch": 1.3922453857389456, + "grad_norm": 1.218734267375269, + "learning_rate": 4.643215031184569e-06, + "loss": 0.24827662110328674, + "step": 5243 + }, + { + "epoch": 1.3925109547204886, + "grad_norm": 1.3223478407487963, + "learning_rate": 4.639507888665792e-06, + "loss": 0.21999669075012207, + "step": 5244 + }, + { + "epoch": 1.3927765237020315, + "grad_norm": 1.3241857590600639, + "learning_rate": 4.6358017795866715e-06, + "loss": 0.24511300027370453, + "step": 5245 + }, + { + "epoch": 1.3930420926835745, + "grad_norm": 1.2459535025826622, + "learning_rate": 4.632096704661704e-06, + "loss": 0.2410753220319748, + "step": 5246 + }, + { + "epoch": 1.3933076616651174, + "grad_norm": 1.157173292152249, + "learning_rate": 4.628392664605184e-06, + "loss": 0.2160021960735321, + "step": 5247 + }, + { + "epoch": 1.3935732306466604, + "grad_norm": 1.2204303717623475, + "learning_rate": 4.624689660131204e-06, + "loss": 0.22672782838344574, + "step": 5248 + }, + { + "epoch": 1.3938387996282033, + "grad_norm": 1.3056904555347544, + "learning_rate": 4.620987691953659e-06, + "loss": 0.25474926829338074, + "step": 5249 + }, + { + "epoch": 1.3941043686097463, + "grad_norm": 1.3078938706976893, + "learning_rate": 4.617286760786252e-06, + "loss": 0.2449323832988739, + "step": 5250 + }, + { + "epoch": 1.3943699375912892, + "grad_norm": 1.4350253205296164, + "learning_rate": 4.613586867342473e-06, + "loss": 0.23727643489837646, + "step": 5251 + }, + { + "epoch": 1.3946355065728322, + "grad_norm": 1.492440797106639, + "learning_rate": 4.609888012335624e-06, + "loss": 0.23727962374687195, + "step": 5252 + }, + { + "epoch": 1.3949010755543751, + "grad_norm": 1.1595482332609377, + "learning_rate": 4.60619019647879e-06, + "loss": 0.21957805752754211, + "step": 5253 + }, + { + "epoch": 1.395166644535918, + "grad_norm": 1.1972608851584254, + "learning_rate": 4.6024934204848745e-06, + "loss": 0.24184471368789673, + "step": 5254 + }, + { + "epoch": 1.395432213517461, + "grad_norm": 1.2654091836286674, + "learning_rate": 4.598797685066568e-06, + "loss": 0.239216148853302, + "step": 5255 + }, + { + "epoch": 1.395697782499004, + "grad_norm": 1.1503034311319646, + "learning_rate": 4.595102990936367e-06, + "loss": 0.17741018533706665, + "step": 5256 + }, + { + "epoch": 1.395963351480547, + "grad_norm": 1.2669115039567294, + "learning_rate": 4.591409338806566e-06, + "loss": 0.26139867305755615, + "step": 5257 + }, + { + "epoch": 1.39622892046209, + "grad_norm": 1.1295627244433792, + "learning_rate": 4.587716729389251e-06, + "loss": 0.23689255118370056, + "step": 5258 + }, + { + "epoch": 1.3964944894436329, + "grad_norm": 1.3449494333614898, + "learning_rate": 4.584025163396323e-06, + "loss": 0.22679267823696136, + "step": 5259 + }, + { + "epoch": 1.3967600584251758, + "grad_norm": 1.4665032620533849, + "learning_rate": 4.580334641539467e-06, + "loss": 0.2743435204029083, + "step": 5260 + }, + { + "epoch": 1.3970256274067188, + "grad_norm": 1.166091966014122, + "learning_rate": 4.5766451645301735e-06, + "loss": 0.22738990187644958, + "step": 5261 + }, + { + "epoch": 1.3972911963882617, + "grad_norm": 1.2398512539901747, + "learning_rate": 4.57295673307973e-06, + "loss": 0.24826082587242126, + "step": 5262 + }, + { + "epoch": 1.3975567653698049, + "grad_norm": 1.2172880570038314, + "learning_rate": 4.569269347899222e-06, + "loss": 0.23121042549610138, + "step": 5263 + }, + { + "epoch": 1.3978223343513478, + "grad_norm": 2.1881918032824443, + "learning_rate": 4.5655830096995345e-06, + "loss": 0.21382957696914673, + "step": 5264 + }, + { + "epoch": 1.3980879033328908, + "grad_norm": 1.6700623666107715, + "learning_rate": 4.561897719191349e-06, + "loss": 0.24439184367656708, + "step": 5265 + }, + { + "epoch": 1.3983534723144337, + "grad_norm": 1.1734120938371422, + "learning_rate": 4.558213477085148e-06, + "loss": 0.2106003314256668, + "step": 5266 + }, + { + "epoch": 1.3986190412959767, + "grad_norm": 1.568387486793487, + "learning_rate": 4.554530284091209e-06, + "loss": 0.3073291480541229, + "step": 5267 + }, + { + "epoch": 1.3988846102775196, + "grad_norm": 1.226744359266016, + "learning_rate": 4.550848140919606e-06, + "loss": 0.2448226660490036, + "step": 5268 + }, + { + "epoch": 1.3991501792590626, + "grad_norm": 1.4434974870419186, + "learning_rate": 4.5471670482802165e-06, + "loss": 0.25378671288490295, + "step": 5269 + }, + { + "epoch": 1.3994157482406056, + "grad_norm": 1.243366792714921, + "learning_rate": 4.5434870068827086e-06, + "loss": 0.2735089659690857, + "step": 5270 + }, + { + "epoch": 1.3996813172221485, + "grad_norm": 1.3983115308066707, + "learning_rate": 4.539808017436552e-06, + "loss": 0.2530548870563507, + "step": 5271 + }, + { + "epoch": 1.3999468862036915, + "grad_norm": 1.2566722493021396, + "learning_rate": 4.536130080651015e-06, + "loss": 0.23692254722118378, + "step": 5272 + }, + { + "epoch": 1.4002124551852344, + "grad_norm": 1.257120121799197, + "learning_rate": 4.532453197235155e-06, + "loss": 0.24554882943630219, + "step": 5273 + }, + { + "epoch": 1.4004780241667774, + "grad_norm": 1.2106096425654094, + "learning_rate": 4.528777367897837e-06, + "loss": 0.20152084529399872, + "step": 5274 + }, + { + "epoch": 1.4007435931483203, + "grad_norm": 1.207683737630722, + "learning_rate": 4.525102593347714e-06, + "loss": 0.20908965170383453, + "step": 5275 + }, + { + "epoch": 1.4010091621298633, + "grad_norm": 1.2398706056963738, + "learning_rate": 4.521428874293238e-06, + "loss": 0.23158209025859833, + "step": 5276 + }, + { + "epoch": 1.4012747311114062, + "grad_norm": 1.2494835342931663, + "learning_rate": 4.517756211442664e-06, + "loss": 0.2483675330877304, + "step": 5277 + }, + { + "epoch": 1.4015403000929492, + "grad_norm": 1.1662936164598174, + "learning_rate": 4.514084605504035e-06, + "loss": 0.23435397446155548, + "step": 5278 + }, + { + "epoch": 1.4018058690744921, + "grad_norm": 1.242534131664269, + "learning_rate": 4.510414057185195e-06, + "loss": 0.2605316936969757, + "step": 5279 + }, + { + "epoch": 1.402071438056035, + "grad_norm": 1.148911142729499, + "learning_rate": 4.506744567193782e-06, + "loss": 0.2279929518699646, + "step": 5280 + }, + { + "epoch": 1.402337007037578, + "grad_norm": 1.1849060379752767, + "learning_rate": 4.503076136237228e-06, + "loss": 0.23011639714241028, + "step": 5281 + }, + { + "epoch": 1.402602576019121, + "grad_norm": 1.1735153050753564, + "learning_rate": 4.499408765022765e-06, + "loss": 0.213611900806427, + "step": 5282 + }, + { + "epoch": 1.402868145000664, + "grad_norm": 1.3225078215525052, + "learning_rate": 4.495742454257418e-06, + "loss": 0.25555503368377686, + "step": 5283 + }, + { + "epoch": 1.4031337139822069, + "grad_norm": 1.331030123703595, + "learning_rate": 4.4920772046480095e-06, + "loss": 0.2694614827632904, + "step": 5284 + }, + { + "epoch": 1.4033992829637498, + "grad_norm": 1.3958578164403037, + "learning_rate": 4.4884130169011565e-06, + "loss": 0.2160607874393463, + "step": 5285 + }, + { + "epoch": 1.4036648519452928, + "grad_norm": 1.4996515147203022, + "learning_rate": 4.48474989172327e-06, + "loss": 0.2556128203868866, + "step": 5286 + }, + { + "epoch": 1.4039304209268357, + "grad_norm": 1.2506403611380352, + "learning_rate": 4.481087829820558e-06, + "loss": 0.2251313328742981, + "step": 5287 + }, + { + "epoch": 1.4041959899083787, + "grad_norm": 1.380992563161254, + "learning_rate": 4.477426831899024e-06, + "loss": 0.26856666803359985, + "step": 5288 + }, + { + "epoch": 1.4044615588899216, + "grad_norm": 1.2429158128712894, + "learning_rate": 4.473766898664464e-06, + "loss": 0.25573840737342834, + "step": 5289 + }, + { + "epoch": 1.4047271278714646, + "grad_norm": 1.2559748496125192, + "learning_rate": 4.4701080308224685e-06, + "loss": 0.26519301533699036, + "step": 5290 + }, + { + "epoch": 1.4049926968530075, + "grad_norm": 1.5959863642176566, + "learning_rate": 4.466450229078427e-06, + "loss": 0.2329619824886322, + "step": 5291 + }, + { + "epoch": 1.4052582658345505, + "grad_norm": 1.208485124140325, + "learning_rate": 4.4627934941375185e-06, + "loss": 0.2243901491165161, + "step": 5292 + }, + { + "epoch": 1.4055238348160934, + "grad_norm": 1.2042065274178317, + "learning_rate": 4.45913782670472e-06, + "loss": 0.22516998648643494, + "step": 5293 + }, + { + "epoch": 1.4057894037976364, + "grad_norm": 1.2427926273641645, + "learning_rate": 4.455483227484796e-06, + "loss": 0.25573113560676575, + "step": 5294 + }, + { + "epoch": 1.4060549727791793, + "grad_norm": 1.3935629686917204, + "learning_rate": 4.451829697182317e-06, + "loss": 0.2568536698818207, + "step": 5295 + }, + { + "epoch": 1.4063205417607223, + "grad_norm": 1.293797792298673, + "learning_rate": 4.448177236501638e-06, + "loss": 0.24510663747787476, + "step": 5296 + }, + { + "epoch": 1.4065861107422652, + "grad_norm": 1.3445763390180965, + "learning_rate": 4.444525846146911e-06, + "loss": 0.24890470504760742, + "step": 5297 + }, + { + "epoch": 1.4068516797238082, + "grad_norm": 1.3096169257052843, + "learning_rate": 4.440875526822081e-06, + "loss": 0.21442994475364685, + "step": 5298 + }, + { + "epoch": 1.4071172487053512, + "grad_norm": 1.2628911672392604, + "learning_rate": 4.437226279230884e-06, + "loss": 0.24281370639801025, + "step": 5299 + }, + { + "epoch": 1.407382817686894, + "grad_norm": 1.2336479145010515, + "learning_rate": 4.433578104076853e-06, + "loss": 0.19542500376701355, + "step": 5300 + }, + { + "epoch": 1.407648386668437, + "grad_norm": 1.256359230599367, + "learning_rate": 4.429931002063315e-06, + "loss": 0.22688990831375122, + "step": 5301 + }, + { + "epoch": 1.40791395564998, + "grad_norm": 1.3692436485711592, + "learning_rate": 4.42628497389339e-06, + "loss": 0.2520858347415924, + "step": 5302 + }, + { + "epoch": 1.408179524631523, + "grad_norm": 1.1723697651028326, + "learning_rate": 4.42264002026998e-06, + "loss": 0.237991064786911, + "step": 5303 + }, + { + "epoch": 1.408445093613066, + "grad_norm": 1.1277997255078087, + "learning_rate": 4.418996141895797e-06, + "loss": 0.20164436101913452, + "step": 5304 + }, + { + "epoch": 1.408710662594609, + "grad_norm": 1.2657361694815492, + "learning_rate": 4.415353339473338e-06, + "loss": 0.24009189009666443, + "step": 5305 + }, + { + "epoch": 1.408976231576152, + "grad_norm": 1.138145945953283, + "learning_rate": 4.411711613704889e-06, + "loss": 0.23170322179794312, + "step": 5306 + }, + { + "epoch": 1.409241800557695, + "grad_norm": 1.2244077415708243, + "learning_rate": 4.408070965292534e-06, + "loss": 0.2280617356300354, + "step": 5307 + }, + { + "epoch": 1.409507369539238, + "grad_norm": 1.2724409466040383, + "learning_rate": 4.404431394938145e-06, + "loss": 0.21982887387275696, + "step": 5308 + }, + { + "epoch": 1.409772938520781, + "grad_norm": 1.265647410959733, + "learning_rate": 4.40079290334339e-06, + "loss": 0.25295430421829224, + "step": 5309 + }, + { + "epoch": 1.4100385075023238, + "grad_norm": 1.1099961782761754, + "learning_rate": 4.397155491209727e-06, + "loss": 0.20109041035175323, + "step": 5310 + }, + { + "epoch": 1.4103040764838668, + "grad_norm": 1.3436616824827443, + "learning_rate": 4.393519159238405e-06, + "loss": 0.2487715482711792, + "step": 5311 + }, + { + "epoch": 1.4105696454654097, + "grad_norm": 1.1475311486694626, + "learning_rate": 4.389883908130465e-06, + "loss": 0.2031790167093277, + "step": 5312 + }, + { + "epoch": 1.4108352144469527, + "grad_norm": 1.277969729475343, + "learning_rate": 4.386249738586744e-06, + "loss": 0.23029211163520813, + "step": 5313 + }, + { + "epoch": 1.4111007834284957, + "grad_norm": 1.2100830863469687, + "learning_rate": 4.382616651307866e-06, + "loss": 0.23080995678901672, + "step": 5314 + }, + { + "epoch": 1.4113663524100386, + "grad_norm": 1.2376227742095711, + "learning_rate": 4.378984646994248e-06, + "loss": 0.2450534999370575, + "step": 5315 + }, + { + "epoch": 1.4116319213915816, + "grad_norm": 1.266655148641824, + "learning_rate": 4.375353726346094e-06, + "loss": 0.24349799752235413, + "step": 5316 + }, + { + "epoch": 1.4118974903731245, + "grad_norm": 1.2696628766548714, + "learning_rate": 4.371723890063411e-06, + "loss": 0.2431599199771881, + "step": 5317 + }, + { + "epoch": 1.4121630593546675, + "grad_norm": 1.3688178233929764, + "learning_rate": 4.368095138845978e-06, + "loss": 0.2051251232624054, + "step": 5318 + }, + { + "epoch": 1.4124286283362104, + "grad_norm": 1.1726447102511934, + "learning_rate": 4.36446747339338e-06, + "loss": 0.21346575021743774, + "step": 5319 + }, + { + "epoch": 1.4126941973177534, + "grad_norm": 1.2726406383058895, + "learning_rate": 4.360840894404989e-06, + "loss": 0.22193217277526855, + "step": 5320 + }, + { + "epoch": 1.4129597662992963, + "grad_norm": 1.2762131056761095, + "learning_rate": 4.357215402579961e-06, + "loss": 0.2112501859664917, + "step": 5321 + }, + { + "epoch": 1.4132253352808393, + "grad_norm": 1.1864412536946314, + "learning_rate": 4.3535909986172565e-06, + "loss": 0.2648766040802002, + "step": 5322 + }, + { + "epoch": 1.4134909042623822, + "grad_norm": 1.1533413783243194, + "learning_rate": 4.349967683215614e-06, + "loss": 0.22139690816402435, + "step": 5323 + }, + { + "epoch": 1.4137564732439252, + "grad_norm": 1.0259028802936685, + "learning_rate": 4.346345457073568e-06, + "loss": 0.21558481454849243, + "step": 5324 + }, + { + "epoch": 1.4140220422254681, + "grad_norm": 1.2763949378052617, + "learning_rate": 4.342724320889438e-06, + "loss": 0.2013886272907257, + "step": 5325 + }, + { + "epoch": 1.414287611207011, + "grad_norm": 1.2216640015824227, + "learning_rate": 4.3391042753613375e-06, + "loss": 0.2428729385137558, + "step": 5326 + }, + { + "epoch": 1.414553180188554, + "grad_norm": 1.2385329501903242, + "learning_rate": 4.3354853211871696e-06, + "loss": 0.20930354297161102, + "step": 5327 + }, + { + "epoch": 1.414818749170097, + "grad_norm": 1.1373474530618315, + "learning_rate": 4.331867459064623e-06, + "loss": 0.18988853693008423, + "step": 5328 + }, + { + "epoch": 1.41508431815164, + "grad_norm": 1.2833653393491664, + "learning_rate": 4.328250689691182e-06, + "loss": 0.24618801474571228, + "step": 5329 + }, + { + "epoch": 1.4153498871331829, + "grad_norm": 1.2635824567099267, + "learning_rate": 4.324635013764113e-06, + "loss": 0.23857265710830688, + "step": 5330 + }, + { + "epoch": 1.4156154561147258, + "grad_norm": 1.3200622076177175, + "learning_rate": 4.321020431980483e-06, + "loss": 0.21869014203548431, + "step": 5331 + }, + { + "epoch": 1.4158810250962688, + "grad_norm": 1.2317649692424293, + "learning_rate": 4.317406945037138e-06, + "loss": 0.2508969008922577, + "step": 5332 + }, + { + "epoch": 1.4161465940778117, + "grad_norm": 1.2114692744130235, + "learning_rate": 4.313794553630711e-06, + "loss": 0.2406233549118042, + "step": 5333 + }, + { + "epoch": 1.4164121630593547, + "grad_norm": 1.3314396378070763, + "learning_rate": 4.310183258457632e-06, + "loss": 0.2376224398612976, + "step": 5334 + }, + { + "epoch": 1.4166777320408976, + "grad_norm": 1.4802475566731417, + "learning_rate": 4.306573060214115e-06, + "loss": 0.2818688750267029, + "step": 5335 + }, + { + "epoch": 1.4169433010224406, + "grad_norm": 1.2248721858463099, + "learning_rate": 4.302963959596165e-06, + "loss": 0.2279777228832245, + "step": 5336 + }, + { + "epoch": 1.4172088700039835, + "grad_norm": 1.3681495314955672, + "learning_rate": 4.299355957299573e-06, + "loss": 0.2652052640914917, + "step": 5337 + }, + { + "epoch": 1.4174744389855265, + "grad_norm": 1.2814638931564002, + "learning_rate": 4.2957490540199185e-06, + "loss": 0.24415750801563263, + "step": 5338 + }, + { + "epoch": 1.4177400079670694, + "grad_norm": 1.2028147011593575, + "learning_rate": 4.292143250452569e-06, + "loss": 0.2318287044763565, + "step": 5339 + }, + { + "epoch": 1.4180055769486124, + "grad_norm": 1.1621443407054215, + "learning_rate": 4.288538547292685e-06, + "loss": 0.19914361834526062, + "step": 5340 + }, + { + "epoch": 1.4182711459301554, + "grad_norm": 1.2533818722517012, + "learning_rate": 4.2849349452352095e-06, + "loss": 0.22550678253173828, + "step": 5341 + }, + { + "epoch": 1.4185367149116983, + "grad_norm": 1.3481328868952585, + "learning_rate": 4.281332444974874e-06, + "loss": 0.25001436471939087, + "step": 5342 + }, + { + "epoch": 1.4188022838932413, + "grad_norm": 1.2557895781680242, + "learning_rate": 4.277731047206197e-06, + "loss": 0.24873407185077667, + "step": 5343 + }, + { + "epoch": 1.4190678528747842, + "grad_norm": 1.2532145662207181, + "learning_rate": 4.274130752623487e-06, + "loss": 0.25732600688934326, + "step": 5344 + }, + { + "epoch": 1.4193334218563272, + "grad_norm": 1.1956499236331526, + "learning_rate": 4.270531561920836e-06, + "loss": 0.1894054263830185, + "step": 5345 + }, + { + "epoch": 1.4195989908378701, + "grad_norm": 1.2861805940078326, + "learning_rate": 4.2669334757921284e-06, + "loss": 0.2632025480270386, + "step": 5346 + }, + { + "epoch": 1.419864559819413, + "grad_norm": 1.1223708980675566, + "learning_rate": 4.2633364949310315e-06, + "loss": 0.22106415033340454, + "step": 5347 + }, + { + "epoch": 1.420130128800956, + "grad_norm": 1.2191554963858982, + "learning_rate": 4.259740620031e-06, + "loss": 0.2246699184179306, + "step": 5348 + }, + { + "epoch": 1.420395697782499, + "grad_norm": 1.2377251567235985, + "learning_rate": 4.256145851785277e-06, + "loss": 0.2335890382528305, + "step": 5349 + }, + { + "epoch": 1.420661266764042, + "grad_norm": 1.3200881727026734, + "learning_rate": 4.252552190886892e-06, + "loss": 0.25485220551490784, + "step": 5350 + }, + { + "epoch": 1.4209268357455849, + "grad_norm": 1.406483107573335, + "learning_rate": 4.248959638028659e-06, + "loss": 0.26234719157218933, + "step": 5351 + }, + { + "epoch": 1.4211924047271278, + "grad_norm": 1.1946878328095272, + "learning_rate": 4.245368193903181e-06, + "loss": 0.22083795070648193, + "step": 5352 + }, + { + "epoch": 1.4214579737086708, + "grad_norm": 1.288602079194267, + "learning_rate": 4.241777859202846e-06, + "loss": 0.1886332929134369, + "step": 5353 + }, + { + "epoch": 1.4217235426902137, + "grad_norm": 1.506700165302322, + "learning_rate": 4.238188634619826e-06, + "loss": 0.26154160499572754, + "step": 5354 + }, + { + "epoch": 1.4219891116717567, + "grad_norm": 1.1472960297751262, + "learning_rate": 4.234600520846085e-06, + "loss": 0.24761158227920532, + "step": 5355 + }, + { + "epoch": 1.4222546806532996, + "grad_norm": 1.154393443673505, + "learning_rate": 4.2310135185733625e-06, + "loss": 0.20936736464500427, + "step": 5356 + }, + { + "epoch": 1.4225202496348426, + "grad_norm": 1.15600424022186, + "learning_rate": 4.227427628493198e-06, + "loss": 0.2173127979040146, + "step": 5357 + }, + { + "epoch": 1.4227858186163855, + "grad_norm": 1.217414245555098, + "learning_rate": 4.223842851296907e-06, + "loss": 0.2598559260368347, + "step": 5358 + }, + { + "epoch": 1.4230513875979285, + "grad_norm": 1.224021391863692, + "learning_rate": 4.22025918767559e-06, + "loss": 0.23701196908950806, + "step": 5359 + }, + { + "epoch": 1.4233169565794714, + "grad_norm": 1.2134140712383175, + "learning_rate": 4.216676638320135e-06, + "loss": 0.26052403450012207, + "step": 5360 + }, + { + "epoch": 1.4235825255610144, + "grad_norm": 1.2465682642545985, + "learning_rate": 4.213095203921217e-06, + "loss": 0.2464584857225418, + "step": 5361 + }, + { + "epoch": 1.4238480945425573, + "grad_norm": 1.2646547527576821, + "learning_rate": 4.209514885169294e-06, + "loss": 0.25889426469802856, + "step": 5362 + }, + { + "epoch": 1.4241136635241003, + "grad_norm": 1.2990812156107416, + "learning_rate": 4.2059356827546076e-06, + "loss": 0.26529380679130554, + "step": 5363 + }, + { + "epoch": 1.4243792325056432, + "grad_norm": 1.1509506747022789, + "learning_rate": 4.202357597367187e-06, + "loss": 0.2284630388021469, + "step": 5364 + }, + { + "epoch": 1.4246448014871862, + "grad_norm": 1.1509689814009059, + "learning_rate": 4.198780629696845e-06, + "loss": 0.2361873984336853, + "step": 5365 + }, + { + "epoch": 1.4249103704687291, + "grad_norm": 1.2489364054166838, + "learning_rate": 4.195204780433179e-06, + "loss": 0.2473624348640442, + "step": 5366 + }, + { + "epoch": 1.425175939450272, + "grad_norm": 1.2584581044476912, + "learning_rate": 4.19163005026557e-06, + "loss": 0.24852773547172546, + "step": 5367 + }, + { + "epoch": 1.425441508431815, + "grad_norm": 1.413523972125062, + "learning_rate": 4.188056439883183e-06, + "loss": 0.28409647941589355, + "step": 5368 + }, + { + "epoch": 1.425707077413358, + "grad_norm": 1.2672381227374172, + "learning_rate": 4.18448394997497e-06, + "loss": 0.2500985562801361, + "step": 5369 + }, + { + "epoch": 1.425972646394901, + "grad_norm": 1.2421534737421158, + "learning_rate": 4.1809125812296635e-06, + "loss": 0.23475977778434753, + "step": 5370 + }, + { + "epoch": 1.426238215376444, + "grad_norm": 1.3107626948919207, + "learning_rate": 4.177342334335782e-06, + "loss": 0.22925345599651337, + "step": 5371 + }, + { + "epoch": 1.4265037843579869, + "grad_norm": 1.1701714137905739, + "learning_rate": 4.173773209981627e-06, + "loss": 0.24463894963264465, + "step": 5372 + }, + { + "epoch": 1.4267693533395298, + "grad_norm": 1.2600839330793319, + "learning_rate": 4.170205208855281e-06, + "loss": 0.2451590746641159, + "step": 5373 + }, + { + "epoch": 1.4270349223210728, + "grad_norm": 1.192456234510782, + "learning_rate": 4.166638331644613e-06, + "loss": 0.21078437566757202, + "step": 5374 + }, + { + "epoch": 1.427300491302616, + "grad_norm": 1.1548728286132999, + "learning_rate": 4.163072579037279e-06, + "loss": 0.21466529369354248, + "step": 5375 + }, + { + "epoch": 1.4275660602841589, + "grad_norm": 1.3327200015078104, + "learning_rate": 4.159507951720713e-06, + "loss": 0.20103147625923157, + "step": 5376 + }, + { + "epoch": 1.4278316292657018, + "grad_norm": 1.2634022835060015, + "learning_rate": 4.15594445038213e-06, + "loss": 0.2618871331214905, + "step": 5377 + }, + { + "epoch": 1.4280971982472448, + "grad_norm": 1.314150540124243, + "learning_rate": 4.152382075708534e-06, + "loss": 0.2496388852596283, + "step": 5378 + }, + { + "epoch": 1.4283627672287877, + "grad_norm": 1.2776066314767451, + "learning_rate": 4.148820828386707e-06, + "loss": 0.2663899064064026, + "step": 5379 + }, + { + "epoch": 1.4286283362103307, + "grad_norm": 1.223751737565641, + "learning_rate": 4.145260709103216e-06, + "loss": 0.23617541790008545, + "step": 5380 + }, + { + "epoch": 1.4288939051918736, + "grad_norm": 1.2184450229688006, + "learning_rate": 4.141701718544411e-06, + "loss": 0.200006365776062, + "step": 5381 + }, + { + "epoch": 1.4291594741734166, + "grad_norm": 1.2899877428495155, + "learning_rate": 4.138143857396425e-06, + "loss": 0.22707203030586243, + "step": 5382 + }, + { + "epoch": 1.4294250431549596, + "grad_norm": 1.210998695531734, + "learning_rate": 4.134587126345162e-06, + "loss": 0.23903624713420868, + "step": 5383 + }, + { + "epoch": 1.4296906121365025, + "grad_norm": 1.56990305006701, + "learning_rate": 4.131031526076329e-06, + "loss": 0.2308908998966217, + "step": 5384 + }, + { + "epoch": 1.4299561811180455, + "grad_norm": 1.2125776866133393, + "learning_rate": 4.127477057275398e-06, + "loss": 0.18762601912021637, + "step": 5385 + }, + { + "epoch": 1.4302217500995884, + "grad_norm": 1.3670823879917342, + "learning_rate": 4.123923720627633e-06, + "loss": 0.281406044960022, + "step": 5386 + }, + { + "epoch": 1.4304873190811314, + "grad_norm": 1.24677960623226, + "learning_rate": 4.120371516818071e-06, + "loss": 0.24858589470386505, + "step": 5387 + }, + { + "epoch": 1.4307528880626743, + "grad_norm": 1.2017896897650255, + "learning_rate": 4.116820446531538e-06, + "loss": 0.22179371118545532, + "step": 5388 + }, + { + "epoch": 1.4310184570442173, + "grad_norm": 1.1523445225939053, + "learning_rate": 4.113270510452636e-06, + "loss": 0.22086869180202484, + "step": 5389 + }, + { + "epoch": 1.4312840260257602, + "grad_norm": 1.295626323300653, + "learning_rate": 4.109721709265753e-06, + "loss": 0.231503427028656, + "step": 5390 + }, + { + "epoch": 1.4315495950073032, + "grad_norm": 1.31237620612278, + "learning_rate": 4.106174043655054e-06, + "loss": 0.255252867937088, + "step": 5391 + }, + { + "epoch": 1.4318151639888461, + "grad_norm": 1.2773394357808008, + "learning_rate": 4.1026275143044854e-06, + "loss": 0.23336587846279144, + "step": 5392 + }, + { + "epoch": 1.432080732970389, + "grad_norm": 1.3267952754600625, + "learning_rate": 4.099082121897783e-06, + "loss": 0.2468583881855011, + "step": 5393 + }, + { + "epoch": 1.432346301951932, + "grad_norm": 1.2137255679394872, + "learning_rate": 4.095537867118452e-06, + "loss": 0.21211153268814087, + "step": 5394 + }, + { + "epoch": 1.432611870933475, + "grad_norm": 1.2552061461264346, + "learning_rate": 4.091994750649783e-06, + "loss": 0.23173204064369202, + "step": 5395 + }, + { + "epoch": 1.432877439915018, + "grad_norm": 1.2420339991667666, + "learning_rate": 4.088452773174853e-06, + "loss": 0.2606658935546875, + "step": 5396 + }, + { + "epoch": 1.4331430088965609, + "grad_norm": 1.2141954954044303, + "learning_rate": 4.084911935376502e-06, + "loss": 0.21198314428329468, + "step": 5397 + }, + { + "epoch": 1.4334085778781038, + "grad_norm": 1.273859413406427, + "learning_rate": 4.08137223793737e-06, + "loss": 0.216193288564682, + "step": 5398 + }, + { + "epoch": 1.4336741468596468, + "grad_norm": 1.3862686522767422, + "learning_rate": 4.077833681539866e-06, + "loss": 0.27767330408096313, + "step": 5399 + }, + { + "epoch": 1.4339397158411897, + "grad_norm": 1.193043888736233, + "learning_rate": 4.0742962668661826e-06, + "loss": 0.21584349870681763, + "step": 5400 + }, + { + "epoch": 1.4342052848227327, + "grad_norm": 1.2801175216615184, + "learning_rate": 4.070759994598288e-06, + "loss": 0.220070481300354, + "step": 5401 + }, + { + "epoch": 1.4344708538042756, + "grad_norm": 1.4276288870785, + "learning_rate": 4.067224865417941e-06, + "loss": 0.26035353541374207, + "step": 5402 + }, + { + "epoch": 1.4347364227858186, + "grad_norm": 1.1784144309393945, + "learning_rate": 4.063690880006671e-06, + "loss": 0.23704876005649567, + "step": 5403 + }, + { + "epoch": 1.4350019917673615, + "grad_norm": 1.2793709287846655, + "learning_rate": 4.060158039045785e-06, + "loss": 0.2345760464668274, + "step": 5404 + }, + { + "epoch": 1.4352675607489045, + "grad_norm": 1.2583985201804126, + "learning_rate": 4.056626343216377e-06, + "loss": 0.21307331323623657, + "step": 5405 + }, + { + "epoch": 1.4355331297304474, + "grad_norm": 1.2401804894465362, + "learning_rate": 4.053095793199313e-06, + "loss": 0.22029465436935425, + "step": 5406 + }, + { + "epoch": 1.4357986987119904, + "grad_norm": 1.3865770800537958, + "learning_rate": 4.049566389675244e-06, + "loss": 0.23419252038002014, + "step": 5407 + }, + { + "epoch": 1.4360642676935333, + "grad_norm": 1.2114754283066453, + "learning_rate": 4.046038133324595e-06, + "loss": 0.21648669242858887, + "step": 5408 + }, + { + "epoch": 1.4363298366750763, + "grad_norm": 1.3682353450989566, + "learning_rate": 4.042511024827573e-06, + "loss": 0.2343464195728302, + "step": 5409 + }, + { + "epoch": 1.4365954056566193, + "grad_norm": 1.28417678054491, + "learning_rate": 4.0389850648641615e-06, + "loss": 0.20108605921268463, + "step": 5410 + }, + { + "epoch": 1.4368609746381622, + "grad_norm": 1.2806759093192033, + "learning_rate": 4.0354602541141315e-06, + "loss": 0.21885806322097778, + "step": 5411 + }, + { + "epoch": 1.4371265436197052, + "grad_norm": 1.276580988371958, + "learning_rate": 4.031936593257017e-06, + "loss": 0.2382376492023468, + "step": 5412 + }, + { + "epoch": 1.437392112601248, + "grad_norm": 1.1333519329501958, + "learning_rate": 4.028414082972141e-06, + "loss": 0.21434128284454346, + "step": 5413 + }, + { + "epoch": 1.437657681582791, + "grad_norm": 1.2161992893188567, + "learning_rate": 4.024892723938601e-06, + "loss": 0.2345191240310669, + "step": 5414 + }, + { + "epoch": 1.437923250564334, + "grad_norm": 1.309666461481554, + "learning_rate": 4.021372516835273e-06, + "loss": 0.2478899210691452, + "step": 5415 + }, + { + "epoch": 1.438188819545877, + "grad_norm": 1.2593045594203824, + "learning_rate": 4.017853462340813e-06, + "loss": 0.21356827020645142, + "step": 5416 + }, + { + "epoch": 1.4384543885274201, + "grad_norm": 1.3891493537034765, + "learning_rate": 4.014335561133652e-06, + "loss": 0.26329827308654785, + "step": 5417 + }, + { + "epoch": 1.438719957508963, + "grad_norm": 1.3689872343615141, + "learning_rate": 4.010818813892e-06, + "loss": 0.25880998373031616, + "step": 5418 + }, + { + "epoch": 1.438985526490506, + "grad_norm": 1.2738388972586026, + "learning_rate": 4.007303221293844e-06, + "loss": 0.22749441862106323, + "step": 5419 + }, + { + "epoch": 1.439251095472049, + "grad_norm": 1.2267331489472144, + "learning_rate": 4.00378878401695e-06, + "loss": 0.2242615520954132, + "step": 5420 + }, + { + "epoch": 1.439516664453592, + "grad_norm": 1.168704950265394, + "learning_rate": 4.000275502738862e-06, + "loss": 0.19751839339733124, + "step": 5421 + }, + { + "epoch": 1.439782233435135, + "grad_norm": 1.4000090999513362, + "learning_rate": 3.996763378136895e-06, + "loss": 0.27319905161857605, + "step": 5422 + }, + { + "epoch": 1.4400478024166778, + "grad_norm": 1.1483039760635705, + "learning_rate": 3.993252410888149e-06, + "loss": 0.21676769852638245, + "step": 5423 + }, + { + "epoch": 1.4403133713982208, + "grad_norm": 1.222649759682682, + "learning_rate": 3.989742601669494e-06, + "loss": 0.22788718342781067, + "step": 5424 + }, + { + "epoch": 1.4405789403797638, + "grad_norm": 1.1800102666876688, + "learning_rate": 3.986233951157581e-06, + "loss": 0.23224875330924988, + "step": 5425 + }, + { + "epoch": 1.4408445093613067, + "grad_norm": 1.3242271211713557, + "learning_rate": 3.982726460028836e-06, + "loss": 0.23625247180461884, + "step": 5426 + }, + { + "epoch": 1.4411100783428497, + "grad_norm": 1.237043381628487, + "learning_rate": 3.979220128959463e-06, + "loss": 0.2092093527317047, + "step": 5427 + }, + { + "epoch": 1.4413756473243926, + "grad_norm": 1.164989095324882, + "learning_rate": 3.975714958625442e-06, + "loss": 0.22196070849895477, + "step": 5428 + }, + { + "epoch": 1.4416412163059356, + "grad_norm": 1.248575755705502, + "learning_rate": 3.972210949702525e-06, + "loss": 0.21276375651359558, + "step": 5429 + }, + { + "epoch": 1.4419067852874785, + "grad_norm": 1.2714203744447936, + "learning_rate": 3.968708102866247e-06, + "loss": 0.22150103747844696, + "step": 5430 + }, + { + "epoch": 1.4421723542690215, + "grad_norm": 1.2519929176778726, + "learning_rate": 3.965206418791914e-06, + "loss": 0.24529573321342468, + "step": 5431 + }, + { + "epoch": 1.4424379232505644, + "grad_norm": 1.3331662749929607, + "learning_rate": 3.961705898154609e-06, + "loss": 0.24349135160446167, + "step": 5432 + }, + { + "epoch": 1.4427034922321074, + "grad_norm": 1.3094668545917496, + "learning_rate": 3.9582065416291926e-06, + "loss": 0.23481428623199463, + "step": 5433 + }, + { + "epoch": 1.4429690612136503, + "grad_norm": 1.2664431166747565, + "learning_rate": 3.954708349890299e-06, + "loss": 0.2366936057806015, + "step": 5434 + }, + { + "epoch": 1.4432346301951933, + "grad_norm": 1.2699903819491114, + "learning_rate": 3.951211323612336e-06, + "loss": 0.24792322516441345, + "step": 5435 + }, + { + "epoch": 1.4435001991767362, + "grad_norm": 1.1943208090894295, + "learning_rate": 3.947715463469493e-06, + "loss": 0.22601652145385742, + "step": 5436 + }, + { + "epoch": 1.4437657681582792, + "grad_norm": 1.1333130191791405, + "learning_rate": 3.9442207701357235e-06, + "loss": 0.19603165984153748, + "step": 5437 + }, + { + "epoch": 1.4440313371398221, + "grad_norm": 1.26512939224431, + "learning_rate": 3.940727244284772e-06, + "loss": 0.22619353234767914, + "step": 5438 + }, + { + "epoch": 1.444296906121365, + "grad_norm": 1.3207139711857465, + "learning_rate": 3.937234886590146e-06, + "loss": 0.24836638569831848, + "step": 5439 + }, + { + "epoch": 1.444562475102908, + "grad_norm": 1.2114237797025103, + "learning_rate": 3.933743697725129e-06, + "loss": 0.21585768461227417, + "step": 5440 + }, + { + "epoch": 1.444828044084451, + "grad_norm": 1.2037953387653635, + "learning_rate": 3.930253678362784e-06, + "loss": 0.20876167714595795, + "step": 5441 + }, + { + "epoch": 1.445093613065994, + "grad_norm": 1.2825218153573943, + "learning_rate": 3.926764829175943e-06, + "loss": 0.24337999522686005, + "step": 5442 + }, + { + "epoch": 1.4453591820475369, + "grad_norm": 1.2238662957767994, + "learning_rate": 3.9232771508372155e-06, + "loss": 0.2511219084262848, + "step": 5443 + }, + { + "epoch": 1.4456247510290798, + "grad_norm": 1.2796769482653771, + "learning_rate": 3.919790644018986e-06, + "loss": 0.26257213950157166, + "step": 5444 + }, + { + "epoch": 1.4458903200106228, + "grad_norm": 1.3570371082898334, + "learning_rate": 3.91630530939341e-06, + "loss": 0.2720959782600403, + "step": 5445 + }, + { + "epoch": 1.4461558889921657, + "grad_norm": 1.2897968589877258, + "learning_rate": 3.912821147632421e-06, + "loss": 0.23849177360534668, + "step": 5446 + }, + { + "epoch": 1.4464214579737087, + "grad_norm": 1.2539273982781811, + "learning_rate": 3.909338159407722e-06, + "loss": 0.2366214245557785, + "step": 5447 + }, + { + "epoch": 1.4466870269552516, + "grad_norm": 1.21348130376658, + "learning_rate": 3.905856345390793e-06, + "loss": 0.21905584633350372, + "step": 5448 + }, + { + "epoch": 1.4469525959367946, + "grad_norm": 1.3001423574977207, + "learning_rate": 3.902375706252887e-06, + "loss": 0.23964065313339233, + "step": 5449 + }, + { + "epoch": 1.4472181649183375, + "grad_norm": 1.2161208716702177, + "learning_rate": 3.89889624266503e-06, + "loss": 0.22246500849723816, + "step": 5450 + }, + { + "epoch": 1.4474837338998805, + "grad_norm": 1.2845367508241097, + "learning_rate": 3.895417955298022e-06, + "loss": 0.22980710864067078, + "step": 5451 + }, + { + "epoch": 1.4477493028814234, + "grad_norm": 1.4690832477509688, + "learning_rate": 3.8919408448224346e-06, + "loss": 0.21276253461837769, + "step": 5452 + }, + { + "epoch": 1.4480148718629664, + "grad_norm": 1.3515036942552143, + "learning_rate": 3.888464911908616e-06, + "loss": 0.23925542831420898, + "step": 5453 + }, + { + "epoch": 1.4482804408445094, + "grad_norm": 1.1871457723177183, + "learning_rate": 3.884990157226683e-06, + "loss": 0.21528369188308716, + "step": 5454 + }, + { + "epoch": 1.4485460098260523, + "grad_norm": 1.2673056278722348, + "learning_rate": 3.8815165814465235e-06, + "loss": 0.24563542008399963, + "step": 5455 + }, + { + "epoch": 1.4488115788075953, + "grad_norm": 1.2561210989748839, + "learning_rate": 3.87804418523781e-06, + "loss": 0.2721150517463684, + "step": 5456 + }, + { + "epoch": 1.4490771477891382, + "grad_norm": 1.3721328159682122, + "learning_rate": 3.874572969269976e-06, + "loss": 0.23716527223587036, + "step": 5457 + }, + { + "epoch": 1.4493427167706812, + "grad_norm": 1.5185790933002854, + "learning_rate": 3.871102934212231e-06, + "loss": 0.2182254046201706, + "step": 5458 + }, + { + "epoch": 1.4496082857522241, + "grad_norm": 1.233204842662738, + "learning_rate": 3.867634080733557e-06, + "loss": 0.2179020643234253, + "step": 5459 + }, + { + "epoch": 1.449873854733767, + "grad_norm": 1.2633976965193632, + "learning_rate": 3.864166409502706e-06, + "loss": 0.22901684045791626, + "step": 5460 + }, + { + "epoch": 1.45013942371531, + "grad_norm": 1.209132482684757, + "learning_rate": 3.860699921188211e-06, + "loss": 0.2287352979183197, + "step": 5461 + }, + { + "epoch": 1.450404992696853, + "grad_norm": 1.214494370780124, + "learning_rate": 3.85723461645836e-06, + "loss": 0.2448873668909073, + "step": 5462 + }, + { + "epoch": 1.450670561678396, + "grad_norm": 1.323933009108344, + "learning_rate": 3.85377049598123e-06, + "loss": 0.2693510055541992, + "step": 5463 + }, + { + "epoch": 1.4509361306599389, + "grad_norm": 1.1826355120377283, + "learning_rate": 3.8503075604246554e-06, + "loss": 0.25414884090423584, + "step": 5464 + }, + { + "epoch": 1.4512016996414818, + "grad_norm": 1.3400776704302024, + "learning_rate": 3.846845810456258e-06, + "loss": 0.27798837423324585, + "step": 5465 + }, + { + "epoch": 1.4514672686230248, + "grad_norm": 1.3109571985733361, + "learning_rate": 3.8433852467434175e-06, + "loss": 0.23348593711853027, + "step": 5466 + }, + { + "epoch": 1.4517328376045677, + "grad_norm": 1.148921292979252, + "learning_rate": 3.839925869953292e-06, + "loss": 0.20993635058403015, + "step": 5467 + }, + { + "epoch": 1.4519984065861107, + "grad_norm": 1.1967150813107374, + "learning_rate": 3.836467680752808e-06, + "loss": 0.225263774394989, + "step": 5468 + }, + { + "epoch": 1.4522639755676536, + "grad_norm": 4.549069881323283, + "learning_rate": 3.833010679808662e-06, + "loss": 0.2481595277786255, + "step": 5469 + }, + { + "epoch": 1.4525295445491966, + "grad_norm": 1.098861894900169, + "learning_rate": 3.829554867787324e-06, + "loss": 0.20755310356616974, + "step": 5470 + }, + { + "epoch": 1.4527951135307395, + "grad_norm": 1.3031978879220207, + "learning_rate": 3.826100245355034e-06, + "loss": 0.22124455869197845, + "step": 5471 + }, + { + "epoch": 1.4530606825122825, + "grad_norm": 1.1779333046553406, + "learning_rate": 3.822646813177803e-06, + "loss": 0.23461398482322693, + "step": 5472 + }, + { + "epoch": 1.4533262514938254, + "grad_norm": 1.123494857736561, + "learning_rate": 3.819194571921407e-06, + "loss": 0.22890526056289673, + "step": 5473 + }, + { + "epoch": 1.4535918204753684, + "grad_norm": 1.1163449125196687, + "learning_rate": 3.815743522251406e-06, + "loss": 0.23236533999443054, + "step": 5474 + }, + { + "epoch": 1.4538573894569113, + "grad_norm": 1.204733497516731, + "learning_rate": 3.8122936648331164e-06, + "loss": 0.2192365825176239, + "step": 5475 + }, + { + "epoch": 1.4541229584384543, + "grad_norm": 1.3061324350348682, + "learning_rate": 3.8088450003316346e-06, + "loss": 0.23970162868499756, + "step": 5476 + }, + { + "epoch": 1.4543885274199972, + "grad_norm": 1.256131451943752, + "learning_rate": 3.8053975294118163e-06, + "loss": 0.24270984530448914, + "step": 5477 + }, + { + "epoch": 1.4546540964015402, + "grad_norm": 1.1616491435133687, + "learning_rate": 3.801951252738295e-06, + "loss": 0.22228944301605225, + "step": 5478 + }, + { + "epoch": 1.4549196653830831, + "grad_norm": 1.2998939083384287, + "learning_rate": 3.7985061709754735e-06, + "loss": 0.25029584765434265, + "step": 5479 + }, + { + "epoch": 1.455185234364626, + "grad_norm": 1.1546196330858232, + "learning_rate": 3.795062284787522e-06, + "loss": 0.23831725120544434, + "step": 5480 + }, + { + "epoch": 1.455450803346169, + "grad_norm": 1.2698177511587796, + "learning_rate": 3.7916195948383817e-06, + "loss": 0.2571605145931244, + "step": 5481 + }, + { + "epoch": 1.455716372327712, + "grad_norm": 1.4321109332673951, + "learning_rate": 3.7881781017917586e-06, + "loss": 0.2660857141017914, + "step": 5482 + }, + { + "epoch": 1.455981941309255, + "grad_norm": 1.3406733437493707, + "learning_rate": 3.7847378063111394e-06, + "loss": 0.2468302845954895, + "step": 5483 + }, + { + "epoch": 1.456247510290798, + "grad_norm": 1.363296358111954, + "learning_rate": 3.7812987090597696e-06, + "loss": 0.2559482753276825, + "step": 5484 + }, + { + "epoch": 1.4565130792723409, + "grad_norm": 1.2144737578388247, + "learning_rate": 3.7778608107006654e-06, + "loss": 0.24484393000602722, + "step": 5485 + }, + { + "epoch": 1.4567786482538838, + "grad_norm": 1.1782087302857855, + "learning_rate": 3.774424111896614e-06, + "loss": 0.2376541644334793, + "step": 5486 + }, + { + "epoch": 1.4570442172354268, + "grad_norm": 1.1748479481028287, + "learning_rate": 3.770988613310169e-06, + "loss": 0.22265875339508057, + "step": 5487 + }, + { + "epoch": 1.45730978621697, + "grad_norm": 1.2316185421612622, + "learning_rate": 3.7675543156036555e-06, + "loss": 0.2511552572250366, + "step": 5488 + }, + { + "epoch": 1.457575355198513, + "grad_norm": 1.2601957381413438, + "learning_rate": 3.764121219439165e-06, + "loss": 0.2412843108177185, + "step": 5489 + }, + { + "epoch": 1.4578409241800558, + "grad_norm": 1.2622123015546969, + "learning_rate": 3.760689325478559e-06, + "loss": 0.26342809200286865, + "step": 5490 + }, + { + "epoch": 1.4581064931615988, + "grad_norm": 1.2994089172948287, + "learning_rate": 3.7572586343834638e-06, + "loss": 0.23315641283988953, + "step": 5491 + }, + { + "epoch": 1.4583720621431417, + "grad_norm": 1.0927170518216454, + "learning_rate": 3.753829146815279e-06, + "loss": 0.24148929119110107, + "step": 5492 + }, + { + "epoch": 1.4586376311246847, + "grad_norm": 1.363697618202234, + "learning_rate": 3.750400863435166e-06, + "loss": 0.22838115692138672, + "step": 5493 + }, + { + "epoch": 1.4589032001062276, + "grad_norm": 1.2083898158968958, + "learning_rate": 3.746973784904061e-06, + "loss": 0.21669608354568481, + "step": 5494 + }, + { + "epoch": 1.4591687690877706, + "grad_norm": 1.4819576271076944, + "learning_rate": 3.743547911882662e-06, + "loss": 0.25619322061538696, + "step": 5495 + }, + { + "epoch": 1.4594343380693136, + "grad_norm": 1.2058542987095502, + "learning_rate": 3.7401232450314384e-06, + "loss": 0.23629480600357056, + "step": 5496 + }, + { + "epoch": 1.4596999070508565, + "grad_norm": 1.189438722154431, + "learning_rate": 3.7366997850106245e-06, + "loss": 0.21799582242965698, + "step": 5497 + }, + { + "epoch": 1.4599654760323995, + "grad_norm": 1.372571579127378, + "learning_rate": 3.733277532480223e-06, + "loss": 0.2582590579986572, + "step": 5498 + }, + { + "epoch": 1.4602310450139424, + "grad_norm": 1.1675281771435806, + "learning_rate": 3.729856488100003e-06, + "loss": 0.23641736805438995, + "step": 5499 + }, + { + "epoch": 1.4604966139954854, + "grad_norm": 1.3024331747300109, + "learning_rate": 3.7264366525295e-06, + "loss": 0.24150417745113373, + "step": 5500 + }, + { + "epoch": 1.4607621829770283, + "grad_norm": 1.2012687985267718, + "learning_rate": 3.7230180264280245e-06, + "loss": 0.2474009394645691, + "step": 5501 + }, + { + "epoch": 1.4610277519585713, + "grad_norm": 1.3411668359609863, + "learning_rate": 3.7196006104546435e-06, + "loss": 0.269604355096817, + "step": 5502 + }, + { + "epoch": 1.4612933209401142, + "grad_norm": 1.3014753471077654, + "learning_rate": 3.716184405268194e-06, + "loss": 0.24324679374694824, + "step": 5503 + }, + { + "epoch": 1.4615588899216572, + "grad_norm": 1.1306865007600708, + "learning_rate": 3.7127694115272805e-06, + "loss": 0.2249709963798523, + "step": 5504 + }, + { + "epoch": 1.4618244589032001, + "grad_norm": 1.2915165646779034, + "learning_rate": 3.7093556298902734e-06, + "loss": 0.2560918629169464, + "step": 5505 + }, + { + "epoch": 1.462090027884743, + "grad_norm": 1.154084739271703, + "learning_rate": 3.705943061015309e-06, + "loss": 0.22693020105361938, + "step": 5506 + }, + { + "epoch": 1.462355596866286, + "grad_norm": 1.2640727525169442, + "learning_rate": 3.702531705560292e-06, + "loss": 0.2617371678352356, + "step": 5507 + }, + { + "epoch": 1.462621165847829, + "grad_norm": 1.2561844307954502, + "learning_rate": 3.6991215641828903e-06, + "loss": 0.2314397394657135, + "step": 5508 + }, + { + "epoch": 1.462886734829372, + "grad_norm": 1.1063207547372251, + "learning_rate": 3.6957126375405383e-06, + "loss": 0.23186162114143372, + "step": 5509 + }, + { + "epoch": 1.4631523038109149, + "grad_norm": 1.2602306615156422, + "learning_rate": 3.6923049262904375e-06, + "loss": 0.21775083243846893, + "step": 5510 + }, + { + "epoch": 1.4634178727924578, + "grad_norm": 1.2619669881473867, + "learning_rate": 3.688898431089556e-06, + "loss": 0.24707889556884766, + "step": 5511 + }, + { + "epoch": 1.4636834417740008, + "grad_norm": 1.0923805026421214, + "learning_rate": 3.6854931525946237e-06, + "loss": 0.1941150575876236, + "step": 5512 + }, + { + "epoch": 1.4639490107555437, + "grad_norm": 1.0123090946182933, + "learning_rate": 3.6820890914621376e-06, + "loss": 0.17808857560157776, + "step": 5513 + }, + { + "epoch": 1.4642145797370867, + "grad_norm": 1.2139965705715394, + "learning_rate": 3.678686248348363e-06, + "loss": 0.2150077074766159, + "step": 5514 + }, + { + "epoch": 1.4644801487186296, + "grad_norm": 1.4267562521267494, + "learning_rate": 3.6752846239093276e-06, + "loss": 0.2605292797088623, + "step": 5515 + }, + { + "epoch": 1.4647457177001726, + "grad_norm": 1.202920213288267, + "learning_rate": 3.671884218800822e-06, + "loss": 0.22481867671012878, + "step": 5516 + }, + { + "epoch": 1.4650112866817155, + "grad_norm": 5.588780783186036, + "learning_rate": 3.668485033678406e-06, + "loss": 0.24453294277191162, + "step": 5517 + }, + { + "epoch": 1.4652768556632585, + "grad_norm": 1.379432138271627, + "learning_rate": 3.6650870691973996e-06, + "loss": 0.2672286033630371, + "step": 5518 + }, + { + "epoch": 1.4655424246448014, + "grad_norm": 1.2625747265975353, + "learning_rate": 3.661690326012897e-06, + "loss": 0.2514987587928772, + "step": 5519 + }, + { + "epoch": 1.4658079936263444, + "grad_norm": 1.3337549906693908, + "learning_rate": 3.6582948047797438e-06, + "loss": 0.25671514868736267, + "step": 5520 + }, + { + "epoch": 1.4660735626078873, + "grad_norm": 1.3535247420304835, + "learning_rate": 3.654900506152561e-06, + "loss": 0.25485602021217346, + "step": 5521 + }, + { + "epoch": 1.4663391315894303, + "grad_norm": 1.1813027271086827, + "learning_rate": 3.6515074307857257e-06, + "loss": 0.23556292057037354, + "step": 5522 + }, + { + "epoch": 1.4666047005709733, + "grad_norm": 1.15604598759747, + "learning_rate": 3.6481155793333855e-06, + "loss": 0.23347696661949158, + "step": 5523 + }, + { + "epoch": 1.4668702695525162, + "grad_norm": 1.218328581124676, + "learning_rate": 3.6447249524494466e-06, + "loss": 0.2405884712934494, + "step": 5524 + }, + { + "epoch": 1.4671358385340592, + "grad_norm": 1.2423110513745568, + "learning_rate": 3.6413355507875845e-06, + "loss": 0.23668336868286133, + "step": 5525 + }, + { + "epoch": 1.467401407515602, + "grad_norm": 1.207526661238473, + "learning_rate": 3.6379473750012375e-06, + "loss": 0.25534945726394653, + "step": 5526 + }, + { + "epoch": 1.467666976497145, + "grad_norm": 1.267472887202726, + "learning_rate": 3.634560425743596e-06, + "loss": 0.22227410972118378, + "step": 5527 + }, + { + "epoch": 1.467932545478688, + "grad_norm": 1.4853214348875312, + "learning_rate": 3.631174703667636e-06, + "loss": 0.23395927250385284, + "step": 5528 + }, + { + "epoch": 1.468198114460231, + "grad_norm": 1.2396534638298151, + "learning_rate": 3.6277902094260785e-06, + "loss": 0.23419208824634552, + "step": 5529 + }, + { + "epoch": 1.4684636834417741, + "grad_norm": 1.3441597355302621, + "learning_rate": 3.6244069436714158e-06, + "loss": 0.22185654938220978, + "step": 5530 + }, + { + "epoch": 1.468729252423317, + "grad_norm": 1.2489989202798994, + "learning_rate": 3.621024907055901e-06, + "loss": 0.2705134153366089, + "step": 5531 + }, + { + "epoch": 1.46899482140486, + "grad_norm": 1.23195362246657, + "learning_rate": 3.617644100231551e-06, + "loss": 0.23426109552383423, + "step": 5532 + }, + { + "epoch": 1.469260390386403, + "grad_norm": 1.2477206941188708, + "learning_rate": 3.6142645238501462e-06, + "loss": 0.25527146458625793, + "step": 5533 + }, + { + "epoch": 1.469525959367946, + "grad_norm": 1.1030456616341389, + "learning_rate": 3.610886178563228e-06, + "loss": 0.1882668435573578, + "step": 5534 + }, + { + "epoch": 1.469791528349489, + "grad_norm": 1.2622509171219458, + "learning_rate": 3.607509065022101e-06, + "loss": 0.24060532450675964, + "step": 5535 + }, + { + "epoch": 1.4700570973310318, + "grad_norm": 1.2245038712856335, + "learning_rate": 3.6041331838778325e-06, + "loss": 0.23555803298950195, + "step": 5536 + }, + { + "epoch": 1.4703226663125748, + "grad_norm": 1.2192798079575136, + "learning_rate": 3.6007585357812557e-06, + "loss": 0.23126551508903503, + "step": 5537 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.139497037450913, + "learning_rate": 3.597385121382961e-06, + "loss": 0.24203836917877197, + "step": 5538 + }, + { + "epoch": 1.4708538042756607, + "grad_norm": 1.2467383616518404, + "learning_rate": 3.5940129413333046e-06, + "loss": 0.239767923951149, + "step": 5539 + }, + { + "epoch": 1.4711193732572037, + "grad_norm": 1.158137574546163, + "learning_rate": 3.5906419962824002e-06, + "loss": 0.24732957780361176, + "step": 5540 + }, + { + "epoch": 1.4713849422387466, + "grad_norm": 1.2722296085836442, + "learning_rate": 3.587272286880131e-06, + "loss": 0.2296421229839325, + "step": 5541 + }, + { + "epoch": 1.4716505112202896, + "grad_norm": 1.2453973567418024, + "learning_rate": 3.583903813776132e-06, + "loss": 0.2339775711297989, + "step": 5542 + }, + { + "epoch": 1.4719160802018325, + "grad_norm": 1.194940832073201, + "learning_rate": 3.5805365776198052e-06, + "loss": 0.230351984500885, + "step": 5543 + }, + { + "epoch": 1.4721816491833755, + "grad_norm": 1.2792126719917591, + "learning_rate": 3.5771705790603163e-06, + "loss": 0.2501414716243744, + "step": 5544 + }, + { + "epoch": 1.4724472181649184, + "grad_norm": 1.2327284472179139, + "learning_rate": 3.5738058187465864e-06, + "loss": 0.23387153446674347, + "step": 5545 + }, + { + "epoch": 1.4727127871464614, + "grad_norm": 1.2921618045206031, + "learning_rate": 3.570442297327307e-06, + "loss": 0.23874594271183014, + "step": 5546 + }, + { + "epoch": 1.4729783561280043, + "grad_norm": 1.2841826918754735, + "learning_rate": 3.5670800154509245e-06, + "loss": 0.21867451071739197, + "step": 5547 + }, + { + "epoch": 1.4732439251095473, + "grad_norm": 1.2937830650411482, + "learning_rate": 3.563718973765644e-06, + "loss": 0.24124100804328918, + "step": 5548 + }, + { + "epoch": 1.4735094940910902, + "grad_norm": 1.2156419794246578, + "learning_rate": 3.5603591729194377e-06, + "loss": 0.22185327112674713, + "step": 5549 + }, + { + "epoch": 1.4737750630726332, + "grad_norm": 1.1571779294098303, + "learning_rate": 3.5570006135600345e-06, + "loss": 0.21193793416023254, + "step": 5550 + }, + { + "epoch": 1.4740406320541761, + "grad_norm": 1.3939617841899903, + "learning_rate": 3.553643296334924e-06, + "loss": 0.2615143656730652, + "step": 5551 + }, + { + "epoch": 1.474306201035719, + "grad_norm": 1.1936451275051074, + "learning_rate": 3.5502872218913597e-06, + "loss": 0.24937541782855988, + "step": 5552 + }, + { + "epoch": 1.474571770017262, + "grad_norm": 1.0736225386439564, + "learning_rate": 3.5469323908763507e-06, + "loss": 0.22849224507808685, + "step": 5553 + }, + { + "epoch": 1.474837338998805, + "grad_norm": 1.6488166459783042, + "learning_rate": 3.5435788039366657e-06, + "loss": 0.2209717333316803, + "step": 5554 + }, + { + "epoch": 1.475102907980348, + "grad_norm": 1.2992665215674652, + "learning_rate": 3.5402264617188453e-06, + "loss": 0.2529235780239105, + "step": 5555 + }, + { + "epoch": 1.4753684769618909, + "grad_norm": 1.2133685762997675, + "learning_rate": 3.536875364869181e-06, + "loss": 0.2045450657606125, + "step": 5556 + }, + { + "epoch": 1.4756340459434338, + "grad_norm": 1.0591536248970717, + "learning_rate": 3.5335255140337167e-06, + "loss": 0.1973644196987152, + "step": 5557 + }, + { + "epoch": 1.4758996149249768, + "grad_norm": 1.3059187006673687, + "learning_rate": 3.5301769098582685e-06, + "loss": 0.27417299151420593, + "step": 5558 + }, + { + "epoch": 1.4761651839065197, + "grad_norm": 1.2500382678843112, + "learning_rate": 3.5268295529884077e-06, + "loss": 0.24541756510734558, + "step": 5559 + }, + { + "epoch": 1.4764307528880627, + "grad_norm": 1.4461383875060436, + "learning_rate": 3.5234834440694655e-06, + "loss": 0.25785958766937256, + "step": 5560 + }, + { + "epoch": 1.4766963218696056, + "grad_norm": 1.1676448271023605, + "learning_rate": 3.5201385837465307e-06, + "loss": 0.21099212765693665, + "step": 5561 + }, + { + "epoch": 1.4769618908511486, + "grad_norm": 1.1787333048605453, + "learning_rate": 3.5167949726644545e-06, + "loss": 0.26023173332214355, + "step": 5562 + }, + { + "epoch": 1.4772274598326915, + "grad_norm": 1.6670162101301063, + "learning_rate": 3.5134526114678426e-06, + "loss": 0.22882963716983795, + "step": 5563 + }, + { + "epoch": 1.4774930288142345, + "grad_norm": 1.312450944331431, + "learning_rate": 3.5101115008010677e-06, + "loss": 0.21987251937389374, + "step": 5564 + }, + { + "epoch": 1.4777585977957775, + "grad_norm": 1.163985983495263, + "learning_rate": 3.506771641308255e-06, + "loss": 0.2169610857963562, + "step": 5565 + }, + { + "epoch": 1.4780241667773204, + "grad_norm": 4.440133890295746, + "learning_rate": 3.50343303363329e-06, + "loss": 0.22723034024238586, + "step": 5566 + }, + { + "epoch": 1.4782897357588634, + "grad_norm": 1.2392064660120468, + "learning_rate": 3.5000956784198157e-06, + "loss": 0.23738276958465576, + "step": 5567 + }, + { + "epoch": 1.4785553047404063, + "grad_norm": 1.1818266174210303, + "learning_rate": 3.496759576311235e-06, + "loss": 0.19922251999378204, + "step": 5568 + }, + { + "epoch": 1.4788208737219493, + "grad_norm": 1.294067668946831, + "learning_rate": 3.4934247279507092e-06, + "loss": 0.22529268264770508, + "step": 5569 + }, + { + "epoch": 1.4790864427034922, + "grad_norm": 1.3551359298814187, + "learning_rate": 3.4900911339811583e-06, + "loss": 0.26758015155792236, + "step": 5570 + }, + { + "epoch": 1.4793520116850352, + "grad_norm": 1.2627897957153122, + "learning_rate": 3.48675879504526e-06, + "loss": 0.24752648174762726, + "step": 5571 + }, + { + "epoch": 1.4796175806665781, + "grad_norm": 1.3085621441307098, + "learning_rate": 3.483427711785449e-06, + "loss": 0.25337618589401245, + "step": 5572 + }, + { + "epoch": 1.479883149648121, + "grad_norm": 1.3543288061594618, + "learning_rate": 3.480097884843919e-06, + "loss": 0.24504786729812622, + "step": 5573 + }, + { + "epoch": 1.480148718629664, + "grad_norm": 1.1750849317955903, + "learning_rate": 3.4767693148626223e-06, + "loss": 0.21255145967006683, + "step": 5574 + }, + { + "epoch": 1.480414287611207, + "grad_norm": 1.2853041773936769, + "learning_rate": 3.473442002483267e-06, + "loss": 0.2501891553401947, + "step": 5575 + }, + { + "epoch": 1.48067985659275, + "grad_norm": 1.195974425335747, + "learning_rate": 3.4701159483473202e-06, + "loss": 0.25276634097099304, + "step": 5576 + }, + { + "epoch": 1.4809454255742929, + "grad_norm": 1.427206116406706, + "learning_rate": 3.4667911530960052e-06, + "loss": 0.2760567367076874, + "step": 5577 + }, + { + "epoch": 1.4812109945558358, + "grad_norm": 1.2442739080424003, + "learning_rate": 3.463467617370305e-06, + "loss": 0.22686481475830078, + "step": 5578 + }, + { + "epoch": 1.4814765635373788, + "grad_norm": 1.2374194002920247, + "learning_rate": 3.4601453418109554e-06, + "loss": 0.23262599110603333, + "step": 5579 + }, + { + "epoch": 1.4817421325189217, + "grad_norm": 1.2263890428702933, + "learning_rate": 3.4568243270584545e-06, + "loss": 0.22231365740299225, + "step": 5580 + }, + { + "epoch": 1.4820077015004647, + "grad_norm": 1.2193067799394695, + "learning_rate": 3.4535045737530504e-06, + "loss": 0.22237855195999146, + "step": 5581 + }, + { + "epoch": 1.4822732704820076, + "grad_norm": 1.208437884817879, + "learning_rate": 3.4501860825347587e-06, + "loss": 0.2260412871837616, + "step": 5582 + }, + { + "epoch": 1.4825388394635506, + "grad_norm": 1.3488909026023506, + "learning_rate": 3.4468688540433425e-06, + "loss": 0.2133496105670929, + "step": 5583 + }, + { + "epoch": 1.4828044084450935, + "grad_norm": 1.231358912436915, + "learning_rate": 3.4435528889183245e-06, + "loss": 0.24750375747680664, + "step": 5584 + }, + { + "epoch": 1.4830699774266365, + "grad_norm": 1.2053641188090713, + "learning_rate": 3.440238187798983e-06, + "loss": 0.23673412203788757, + "step": 5585 + }, + { + "epoch": 1.4833355464081794, + "grad_norm": 1.312048381493266, + "learning_rate": 3.436924751324354e-06, + "loss": 0.2505243420600891, + "step": 5586 + }, + { + "epoch": 1.4836011153897224, + "grad_norm": 1.2769153596955758, + "learning_rate": 3.433612580133229e-06, + "loss": 0.276151180267334, + "step": 5587 + }, + { + "epoch": 1.4838666843712653, + "grad_norm": 1.0245497892529305, + "learning_rate": 3.430301674864154e-06, + "loss": 0.1756816953420639, + "step": 5588 + }, + { + "epoch": 1.4841322533528083, + "grad_norm": 1.2667973514811224, + "learning_rate": 3.4269920361554342e-06, + "loss": 0.25901898741722107, + "step": 5589 + }, + { + "epoch": 1.4843978223343512, + "grad_norm": 1.2034260428652863, + "learning_rate": 3.4236836646451286e-06, + "loss": 0.21196085214614868, + "step": 5590 + }, + { + "epoch": 1.4846633913158942, + "grad_norm": 1.2887221468811698, + "learning_rate": 3.4203765609710525e-06, + "loss": 0.24153128266334534, + "step": 5591 + }, + { + "epoch": 1.4849289602974372, + "grad_norm": 1.2285562462634616, + "learning_rate": 3.4170707257707757e-06, + "loss": 0.25715887546539307, + "step": 5592 + }, + { + "epoch": 1.48519452927898, + "grad_norm": 1.430212837200284, + "learning_rate": 3.413766159681624e-06, + "loss": 0.2920379042625427, + "step": 5593 + }, + { + "epoch": 1.485460098260523, + "grad_norm": 1.2173970332611068, + "learning_rate": 3.41046286334068e-06, + "loss": 0.22127456963062286, + "step": 5594 + }, + { + "epoch": 1.485725667242066, + "grad_norm": 1.2534339617557788, + "learning_rate": 3.4071608373847786e-06, + "loss": 0.23103584349155426, + "step": 5595 + }, + { + "epoch": 1.485991236223609, + "grad_norm": 1.2999427041349472, + "learning_rate": 3.403860082450513e-06, + "loss": 0.29068222641944885, + "step": 5596 + }, + { + "epoch": 1.486256805205152, + "grad_norm": 1.2532608064541852, + "learning_rate": 3.4005605991742296e-06, + "loss": 0.23703888058662415, + "step": 5597 + }, + { + "epoch": 1.4865223741866949, + "grad_norm": 1.4039489349034764, + "learning_rate": 3.3972623881920296e-06, + "loss": 0.23348261415958405, + "step": 5598 + }, + { + "epoch": 1.4867879431682378, + "grad_norm": 1.1603139615742908, + "learning_rate": 3.3939654501397645e-06, + "loss": 0.24733223021030426, + "step": 5599 + }, + { + "epoch": 1.487053512149781, + "grad_norm": 1.1220204153088178, + "learning_rate": 3.3906697856530548e-06, + "loss": 0.22576835751533508, + "step": 5600 + }, + { + "epoch": 1.487319081131324, + "grad_norm": 1.1809335952834177, + "learning_rate": 3.3873753953672593e-06, + "loss": 0.20863527059555054, + "step": 5601 + }, + { + "epoch": 1.487584650112867, + "grad_norm": 1.1823379745083873, + "learning_rate": 3.384082279917499e-06, + "loss": 0.2299712598323822, + "step": 5602 + }, + { + "epoch": 1.4878502190944098, + "grad_norm": 1.1858521746021262, + "learning_rate": 3.380790439938648e-06, + "loss": 0.23058944940567017, + "step": 5603 + }, + { + "epoch": 1.4881157880759528, + "grad_norm": 1.1304663814123712, + "learning_rate": 3.3774998760653344e-06, + "loss": 0.20307201147079468, + "step": 5604 + }, + { + "epoch": 1.4883813570574957, + "grad_norm": 1.112411027996001, + "learning_rate": 3.3742105889319388e-06, + "loss": 0.2296266108751297, + "step": 5605 + }, + { + "epoch": 1.4886469260390387, + "grad_norm": 1.3206442060716181, + "learning_rate": 3.370922579172601e-06, + "loss": 0.22702309489250183, + "step": 5606 + }, + { + "epoch": 1.4889124950205816, + "grad_norm": 1.4590848907033545, + "learning_rate": 3.3676358474212035e-06, + "loss": 0.30432331562042236, + "step": 5607 + }, + { + "epoch": 1.4891780640021246, + "grad_norm": 1.201356120373459, + "learning_rate": 3.3643503943113907e-06, + "loss": 0.2488052248954773, + "step": 5608 + }, + { + "epoch": 1.4894436329836676, + "grad_norm": 1.2096846483257637, + "learning_rate": 3.361066220476564e-06, + "loss": 0.2221754938364029, + "step": 5609 + }, + { + "epoch": 1.4897092019652105, + "grad_norm": 1.289556223007011, + "learning_rate": 3.3577833265498728e-06, + "loss": 0.2547761797904968, + "step": 5610 + }, + { + "epoch": 1.4899747709467535, + "grad_norm": 1.3306628367975963, + "learning_rate": 3.3545017131642164e-06, + "loss": 0.21811938285827637, + "step": 5611 + }, + { + "epoch": 1.4902403399282964, + "grad_norm": 1.4022029015386877, + "learning_rate": 3.3512213809522554e-06, + "loss": 0.30436158180236816, + "step": 5612 + }, + { + "epoch": 1.4905059089098394, + "grad_norm": 1.2224150283856856, + "learning_rate": 3.3479423305463953e-06, + "loss": 0.2053622156381607, + "step": 5613 + }, + { + "epoch": 1.4907714778913823, + "grad_norm": 1.3026832238379669, + "learning_rate": 3.344664562578801e-06, + "loss": 0.2017601728439331, + "step": 5614 + }, + { + "epoch": 1.4910370468729253, + "grad_norm": 1.2856046275416113, + "learning_rate": 3.341388077681387e-06, + "loss": 0.23668046295642853, + "step": 5615 + }, + { + "epoch": 1.4913026158544682, + "grad_norm": 1.1460002150937032, + "learning_rate": 3.338112876485821e-06, + "loss": 0.20016951858997345, + "step": 5616 + }, + { + "epoch": 1.4915681848360112, + "grad_norm": 1.3606548245166536, + "learning_rate": 3.3348389596235177e-06, + "loss": 0.25477850437164307, + "step": 5617 + }, + { + "epoch": 1.4918337538175541, + "grad_norm": 1.2758175160721472, + "learning_rate": 3.3315663277256594e-06, + "loss": 0.24063366651535034, + "step": 5618 + }, + { + "epoch": 1.492099322799097, + "grad_norm": 1.2737128535751616, + "learning_rate": 3.328294981423165e-06, + "loss": 0.23443251848220825, + "step": 5619 + }, + { + "epoch": 1.49236489178064, + "grad_norm": 1.1580169148577781, + "learning_rate": 3.325024921346717e-06, + "loss": 0.21191264688968658, + "step": 5620 + }, + { + "epoch": 1.492630460762183, + "grad_norm": 1.213323558189925, + "learning_rate": 3.3217561481267367e-06, + "loss": 0.22062326967716217, + "step": 5621 + }, + { + "epoch": 1.492896029743726, + "grad_norm": 1.1757529457487401, + "learning_rate": 3.318488662393409e-06, + "loss": 0.2235480695962906, + "step": 5622 + }, + { + "epoch": 1.4931615987252689, + "grad_norm": 1.2611472240425432, + "learning_rate": 3.315222464776665e-06, + "loss": 0.26665517687797546, + "step": 5623 + }, + { + "epoch": 1.4934271677068118, + "grad_norm": 1.270220596773442, + "learning_rate": 3.3119575559061902e-06, + "loss": 0.24300602078437805, + "step": 5624 + }, + { + "epoch": 1.4936927366883548, + "grad_norm": 1.2622444254847978, + "learning_rate": 3.308693936411421e-06, + "loss": 0.25441884994506836, + "step": 5625 + }, + { + "epoch": 1.4939583056698977, + "grad_norm": 1.2781695234171213, + "learning_rate": 3.3054316069215407e-06, + "loss": 0.23236152529716492, + "step": 5626 + }, + { + "epoch": 1.4942238746514407, + "grad_norm": 1.2299113342509724, + "learning_rate": 3.3021705680654946e-06, + "loss": 0.24535568058490753, + "step": 5627 + }, + { + "epoch": 1.4944894436329836, + "grad_norm": 1.3635919919461823, + "learning_rate": 3.29891082047197e-06, + "loss": 0.2542986273765564, + "step": 5628 + }, + { + "epoch": 1.4947550126145266, + "grad_norm": 1.3442816383357798, + "learning_rate": 3.295652364769407e-06, + "loss": 0.26490268111228943, + "step": 5629 + }, + { + "epoch": 1.4950205815960695, + "grad_norm": 1.2455944135633985, + "learning_rate": 3.292395201585997e-06, + "loss": 0.25576913356781006, + "step": 5630 + }, + { + "epoch": 1.4952861505776125, + "grad_norm": 1.321982811797117, + "learning_rate": 3.2891393315496846e-06, + "loss": 0.2930823266506195, + "step": 5631 + }, + { + "epoch": 1.4955517195591554, + "grad_norm": 1.3029577245101889, + "learning_rate": 3.285884755288161e-06, + "loss": 0.2426074892282486, + "step": 5632 + }, + { + "epoch": 1.4958172885406984, + "grad_norm": 1.1912484566122454, + "learning_rate": 3.2826314734288713e-06, + "loss": 0.24090878665447235, + "step": 5633 + }, + { + "epoch": 1.4960828575222413, + "grad_norm": 1.291391881665867, + "learning_rate": 3.2793794865990092e-06, + "loss": 0.26155173778533936, + "step": 5634 + }, + { + "epoch": 1.4963484265037843, + "grad_norm": 1.2581171617638447, + "learning_rate": 3.2761287954255195e-06, + "loss": 0.2594009041786194, + "step": 5635 + }, + { + "epoch": 1.4966139954853273, + "grad_norm": 1.248912763921314, + "learning_rate": 3.2728794005350972e-06, + "loss": 0.24434763193130493, + "step": 5636 + }, + { + "epoch": 1.4968795644668702, + "grad_norm": 1.3459414061970596, + "learning_rate": 3.269631302554188e-06, + "loss": 0.2622208297252655, + "step": 5637 + }, + { + "epoch": 1.4971451334484132, + "grad_norm": 1.2222057610309294, + "learning_rate": 3.266384502108987e-06, + "loss": 0.18913154304027557, + "step": 5638 + }, + { + "epoch": 1.497410702429956, + "grad_norm": 1.260519406868159, + "learning_rate": 3.263138999825437e-06, + "loss": 0.2610907554626465, + "step": 5639 + }, + { + "epoch": 1.497676271411499, + "grad_norm": 1.2585537664404678, + "learning_rate": 3.2598947963292337e-06, + "loss": 0.25841569900512695, + "step": 5640 + }, + { + "epoch": 1.497941840393042, + "grad_norm": 1.1680179490188496, + "learning_rate": 3.256651892245822e-06, + "loss": 0.2066381573677063, + "step": 5641 + }, + { + "epoch": 1.4982074093745852, + "grad_norm": 1.1877407935219242, + "learning_rate": 3.253410288200396e-06, + "loss": 0.23956719040870667, + "step": 5642 + }, + { + "epoch": 1.4984729783561281, + "grad_norm": 1.1996406642135662, + "learning_rate": 3.250169984817897e-06, + "loss": 0.23999394476413727, + "step": 5643 + }, + { + "epoch": 1.498738547337671, + "grad_norm": 1.4056134439986134, + "learning_rate": 3.2469309827230156e-06, + "loss": 0.24273940920829773, + "step": 5644 + }, + { + "epoch": 1.499004116319214, + "grad_norm": 1.193555704549332, + "learning_rate": 3.2436932825401977e-06, + "loss": 0.2212621569633484, + "step": 5645 + }, + { + "epoch": 1.499269685300757, + "grad_norm": 1.293874995027958, + "learning_rate": 3.2404568848936325e-06, + "loss": 0.2487148940563202, + "step": 5646 + }, + { + "epoch": 1.4995352542823, + "grad_norm": 1.2610121684030642, + "learning_rate": 3.237221790407259e-06, + "loss": 0.29314422607421875, + "step": 5647 + }, + { + "epoch": 1.499800823263843, + "grad_norm": 1.1765702458871505, + "learning_rate": 3.233987999704763e-06, + "loss": 0.22727417945861816, + "step": 5648 + }, + { + "epoch": 1.5000663922453858, + "grad_norm": 1.1578089091098656, + "learning_rate": 3.230755513409585e-06, + "loss": 0.18877442181110382, + "step": 5649 + }, + { + "epoch": 1.5003319612269288, + "grad_norm": 1.2855274132536632, + "learning_rate": 3.2275243321449068e-06, + "loss": 0.2504552900791168, + "step": 5650 + }, + { + "epoch": 1.5005975302084718, + "grad_norm": 1.1905373910388852, + "learning_rate": 3.224294456533663e-06, + "loss": 0.23579174280166626, + "step": 5651 + }, + { + "epoch": 1.5008630991900147, + "grad_norm": 1.3692203179408873, + "learning_rate": 3.221065887198537e-06, + "loss": 0.29236793518066406, + "step": 5652 + }, + { + "epoch": 1.5011286681715577, + "grad_norm": 1.3245217175369617, + "learning_rate": 3.2178386247619577e-06, + "loss": 0.2735568881034851, + "step": 5653 + }, + { + "epoch": 1.5013942371531006, + "grad_norm": 1.240462888838021, + "learning_rate": 3.214612669846103e-06, + "loss": 0.2391616702079773, + "step": 5654 + }, + { + "epoch": 1.5016598061346436, + "grad_norm": 1.3766117264936455, + "learning_rate": 3.2113880230729e-06, + "loss": 0.24532485008239746, + "step": 5655 + }, + { + "epoch": 1.5019253751161865, + "grad_norm": 1.3310069624279295, + "learning_rate": 3.2081646850640215e-06, + "loss": 0.2605767250061035, + "step": 5656 + }, + { + "epoch": 1.5021909440977295, + "grad_norm": 1.2109489933208193, + "learning_rate": 3.2049426564408893e-06, + "loss": 0.2651350200176239, + "step": 5657 + }, + { + "epoch": 1.5024565130792724, + "grad_norm": 1.3305800775425032, + "learning_rate": 3.2017219378246734e-06, + "loss": 0.2719389498233795, + "step": 5658 + }, + { + "epoch": 1.5027220820608154, + "grad_norm": 1.2359239723239188, + "learning_rate": 3.198502529836288e-06, + "loss": 0.23077815771102905, + "step": 5659 + }, + { + "epoch": 1.5029876510423583, + "grad_norm": 1.0838054114896152, + "learning_rate": 3.1952844330964007e-06, + "loss": 0.21954959630966187, + "step": 5660 + }, + { + "epoch": 1.5032532200239013, + "grad_norm": 1.3480229773492907, + "learning_rate": 3.1920676482254186e-06, + "loss": 0.28229185938835144, + "step": 5661 + }, + { + "epoch": 1.5035187890054442, + "grad_norm": 1.2587796771658648, + "learning_rate": 3.1888521758435e-06, + "loss": 0.24612295627593994, + "step": 5662 + }, + { + "epoch": 1.5037843579869872, + "grad_norm": 1.2649379995915024, + "learning_rate": 3.185638016570555e-06, + "loss": 0.24191413819789886, + "step": 5663 + }, + { + "epoch": 1.5040499269685301, + "grad_norm": 1.225446339219085, + "learning_rate": 3.1824251710262323e-06, + "loss": 0.2427935004234314, + "step": 5664 + }, + { + "epoch": 1.504315495950073, + "grad_norm": 1.2595635392757376, + "learning_rate": 3.17921363982993e-06, + "loss": 0.2600318193435669, + "step": 5665 + }, + { + "epoch": 1.504581064931616, + "grad_norm": 1.2817020254494476, + "learning_rate": 3.1760034236007954e-06, + "loss": 0.25215205550193787, + "step": 5666 + }, + { + "epoch": 1.504846633913159, + "grad_norm": 1.2568573714231897, + "learning_rate": 3.1727945229577183e-06, + "loss": 0.24460548162460327, + "step": 5667 + }, + { + "epoch": 1.505112202894702, + "grad_norm": 1.2881955251422392, + "learning_rate": 3.169586938519338e-06, + "loss": 0.2812577486038208, + "step": 5668 + }, + { + "epoch": 1.5053777718762449, + "grad_norm": 1.1272225605105841, + "learning_rate": 3.166380670904039e-06, + "loss": 0.23297616839408875, + "step": 5669 + }, + { + "epoch": 1.5056433408577878, + "grad_norm": 1.1954331932042688, + "learning_rate": 3.163175720729954e-06, + "loss": 0.21659572422504425, + "step": 5670 + }, + { + "epoch": 1.5059089098393308, + "grad_norm": 1.2142230208725098, + "learning_rate": 3.1599720886149508e-06, + "loss": 0.22246181964874268, + "step": 5671 + }, + { + "epoch": 1.5061744788208737, + "grad_norm": 1.132636194795227, + "learning_rate": 3.1567697751766624e-06, + "loss": 0.20020918548107147, + "step": 5672 + }, + { + "epoch": 1.5064400478024167, + "grad_norm": 1.363041735701654, + "learning_rate": 3.1535687810324523e-06, + "loss": 0.25693628191947937, + "step": 5673 + }, + { + "epoch": 1.5067056167839596, + "grad_norm": 1.5250673507385644, + "learning_rate": 3.150369106799436e-06, + "loss": 0.21841923892498016, + "step": 5674 + }, + { + "epoch": 1.5069711857655026, + "grad_norm": 1.1710254495806258, + "learning_rate": 3.1471707530944707e-06, + "loss": 0.18131780624389648, + "step": 5675 + }, + { + "epoch": 1.5072367547470455, + "grad_norm": 1.180596749481675, + "learning_rate": 3.143973720534164e-06, + "loss": 0.22510449588298798, + "step": 5676 + }, + { + "epoch": 1.5075023237285885, + "grad_norm": 1.3952546557365002, + "learning_rate": 3.1407780097348627e-06, + "loss": 0.23721462488174438, + "step": 5677 + }, + { + "epoch": 1.5077678927101315, + "grad_norm": 1.2200574848273704, + "learning_rate": 3.1375836213126653e-06, + "loss": 0.24281899631023407, + "step": 5678 + }, + { + "epoch": 1.5080334616916744, + "grad_norm": 1.3211068465604292, + "learning_rate": 3.134390555883412e-06, + "loss": 0.23910081386566162, + "step": 5679 + }, + { + "epoch": 1.5082990306732174, + "grad_norm": 1.357027881520108, + "learning_rate": 3.1311988140626825e-06, + "loss": 0.2635132670402527, + "step": 5680 + }, + { + "epoch": 1.5085645996547603, + "grad_norm": 1.239638674575543, + "learning_rate": 3.1280083964658147e-06, + "loss": 0.24802634119987488, + "step": 5681 + }, + { + "epoch": 1.5088301686363033, + "grad_norm": 1.3861680174510138, + "learning_rate": 3.1248193037078823e-06, + "loss": 0.24081437289714813, + "step": 5682 + }, + { + "epoch": 1.5090957376178462, + "grad_norm": 1.2124748227090532, + "learning_rate": 3.121631536403701e-06, + "loss": 0.19550001621246338, + "step": 5683 + }, + { + "epoch": 1.5093613065993892, + "grad_norm": 1.309177755877421, + "learning_rate": 3.118445095167837e-06, + "loss": 0.2397807538509369, + "step": 5684 + }, + { + "epoch": 1.5096268755809321, + "grad_norm": 1.2243819490197418, + "learning_rate": 3.115259980614602e-06, + "loss": 0.2185651659965515, + "step": 5685 + }, + { + "epoch": 1.509892444562475, + "grad_norm": 1.2555724014592389, + "learning_rate": 3.1120761933580414e-06, + "loss": 0.22214055061340332, + "step": 5686 + }, + { + "epoch": 1.510158013544018, + "grad_norm": 1.4127254863789025, + "learning_rate": 3.108893734011955e-06, + "loss": 0.23971091210842133, + "step": 5687 + }, + { + "epoch": 1.510423582525561, + "grad_norm": 1.3331222718828735, + "learning_rate": 3.1057126031898843e-06, + "loss": 0.26458197832107544, + "step": 5688 + }, + { + "epoch": 1.510689151507104, + "grad_norm": 1.3487790050882777, + "learning_rate": 3.1025328015051093e-06, + "loss": 0.23730339109897614, + "step": 5689 + }, + { + "epoch": 1.5109547204886469, + "grad_norm": 1.2964784198979393, + "learning_rate": 3.0993543295706653e-06, + "loss": 0.21981677412986755, + "step": 5690 + }, + { + "epoch": 1.5112202894701898, + "grad_norm": 1.1812817656913812, + "learning_rate": 3.0961771879993206e-06, + "loss": 0.21984878182411194, + "step": 5691 + }, + { + "epoch": 1.5114858584517328, + "grad_norm": 1.2732802047873515, + "learning_rate": 3.093001377403592e-06, + "loss": 0.23086440563201904, + "step": 5692 + }, + { + "epoch": 1.5117514274332757, + "grad_norm": 2.3681680891314953, + "learning_rate": 3.0898268983957368e-06, + "loss": 0.2355024814605713, + "step": 5693 + }, + { + "epoch": 1.5120169964148187, + "grad_norm": 1.3061363772251866, + "learning_rate": 3.0866537515877584e-06, + "loss": 0.21210229396820068, + "step": 5694 + }, + { + "epoch": 1.5122825653963616, + "grad_norm": 1.3436771657394675, + "learning_rate": 3.0834819375914003e-06, + "loss": 0.2387622594833374, + "step": 5695 + }, + { + "epoch": 1.5125481343779046, + "grad_norm": 1.3482258979232278, + "learning_rate": 3.0803114570181527e-06, + "loss": 0.23822402954101562, + "step": 5696 + }, + { + "epoch": 1.5128137033594475, + "grad_norm": 1.3248058910768958, + "learning_rate": 3.0771423104792454e-06, + "loss": 0.26844173669815063, + "step": 5697 + }, + { + "epoch": 1.5130792723409905, + "grad_norm": 1.2131778927640824, + "learning_rate": 3.07397449858565e-06, + "loss": 0.23288767039775848, + "step": 5698 + }, + { + "epoch": 1.5133448413225334, + "grad_norm": 1.2716046597052009, + "learning_rate": 3.0708080219480896e-06, + "loss": 0.23273086547851562, + "step": 5699 + }, + { + "epoch": 1.5136104103040764, + "grad_norm": 1.4240236624695346, + "learning_rate": 3.067642881177023e-06, + "loss": 0.2505509555339813, + "step": 5700 + }, + { + "epoch": 1.5138759792856193, + "grad_norm": 1.1441752919653974, + "learning_rate": 3.0644790768826473e-06, + "loss": 0.22801508009433746, + "step": 5701 + }, + { + "epoch": 1.5141415482671623, + "grad_norm": 1.1462347465841034, + "learning_rate": 3.061316609674908e-06, + "loss": 0.2110593169927597, + "step": 5702 + }, + { + "epoch": 1.5144071172487052, + "grad_norm": 1.2145033288630525, + "learning_rate": 3.0581554801634927e-06, + "loss": 0.22201795876026154, + "step": 5703 + }, + { + "epoch": 1.5146726862302482, + "grad_norm": 1.2993896506173446, + "learning_rate": 3.054995688957829e-06, + "loss": 0.23104460537433624, + "step": 5704 + }, + { + "epoch": 1.5149382552117912, + "grad_norm": 1.5590161841107484, + "learning_rate": 3.0518372366670877e-06, + "loss": 0.23373261094093323, + "step": 5705 + }, + { + "epoch": 1.515203824193334, + "grad_norm": 1.368121139637646, + "learning_rate": 3.0486801239001806e-06, + "loss": 0.2404957264661789, + "step": 5706 + }, + { + "epoch": 1.515469393174877, + "grad_norm": 1.2346548477581518, + "learning_rate": 3.0455243512657606e-06, + "loss": 0.23209382593631744, + "step": 5707 + }, + { + "epoch": 1.51573496215642, + "grad_norm": 1.156984368318911, + "learning_rate": 3.042369919372228e-06, + "loss": 0.218237042427063, + "step": 5708 + }, + { + "epoch": 1.516000531137963, + "grad_norm": 12.380411974697722, + "learning_rate": 3.039216828827717e-06, + "loss": 0.25025027990341187, + "step": 5709 + }, + { + "epoch": 1.516266100119506, + "grad_norm": 1.3454644235463973, + "learning_rate": 3.036065080240106e-06, + "loss": 0.24729448556900024, + "step": 5710 + }, + { + "epoch": 1.5165316691010489, + "grad_norm": 1.246980236713752, + "learning_rate": 3.032914674217017e-06, + "loss": 0.23614796996116638, + "step": 5711 + }, + { + "epoch": 1.5167972380825918, + "grad_norm": 1.1947534591327391, + "learning_rate": 3.029765611365808e-06, + "loss": 0.2313452661037445, + "step": 5712 + }, + { + "epoch": 1.5170628070641348, + "grad_norm": 1.2169352172923076, + "learning_rate": 3.0266178922935842e-06, + "loss": 0.22152003645896912, + "step": 5713 + }, + { + "epoch": 1.5173283760456777, + "grad_norm": 1.3132034423317465, + "learning_rate": 3.0234715176071874e-06, + "loss": 0.25942179560661316, + "step": 5714 + }, + { + "epoch": 1.5175939450272207, + "grad_norm": 1.213532583392701, + "learning_rate": 3.0203264879132e-06, + "loss": 0.25030237436294556, + "step": 5715 + }, + { + "epoch": 1.5178595140087636, + "grad_norm": 1.212709044397772, + "learning_rate": 3.0171828038179497e-06, + "loss": 0.2025807797908783, + "step": 5716 + }, + { + "epoch": 1.5181250829903066, + "grad_norm": 1.3035190960753136, + "learning_rate": 3.014040465927499e-06, + "loss": 0.20455190539360046, + "step": 5717 + }, + { + "epoch": 1.5183906519718495, + "grad_norm": 1.2171025232725439, + "learning_rate": 3.010899474847655e-06, + "loss": 0.24197113513946533, + "step": 5718 + }, + { + "epoch": 1.5186562209533925, + "grad_norm": 1.243656057613246, + "learning_rate": 3.007759831183964e-06, + "loss": 0.22290384769439697, + "step": 5719 + }, + { + "epoch": 1.5189217899349357, + "grad_norm": 1.133911078511842, + "learning_rate": 3.0046215355417117e-06, + "loss": 0.23087520897388458, + "step": 5720 + }, + { + "epoch": 1.5191873589164786, + "grad_norm": 1.3329430419316783, + "learning_rate": 3.0014845885259236e-06, + "loss": 0.24425405263900757, + "step": 5721 + }, + { + "epoch": 1.5194529278980216, + "grad_norm": 1.310265396817766, + "learning_rate": 2.9983489907413675e-06, + "loss": 0.24888862669467926, + "step": 5722 + }, + { + "epoch": 1.5197184968795645, + "grad_norm": 1.3023172954247402, + "learning_rate": 2.9952147427925493e-06, + "loss": 0.23556756973266602, + "step": 5723 + }, + { + "epoch": 1.5199840658611075, + "grad_norm": 1.3924872169111115, + "learning_rate": 2.992081845283715e-06, + "loss": 0.2532619833946228, + "step": 5724 + }, + { + "epoch": 1.5202496348426504, + "grad_norm": 1.3351422936737996, + "learning_rate": 2.988950298818848e-06, + "loss": 0.2574974000453949, + "step": 5725 + }, + { + "epoch": 1.5205152038241934, + "grad_norm": 1.1244851887087242, + "learning_rate": 2.9858201040016775e-06, + "loss": 0.21997734904289246, + "step": 5726 + }, + { + "epoch": 1.5207807728057363, + "grad_norm": 1.3952335702566243, + "learning_rate": 2.982691261435666e-06, + "loss": 0.2174127697944641, + "step": 5727 + }, + { + "epoch": 1.5210463417872793, + "grad_norm": 1.4277294646697747, + "learning_rate": 2.979563771724019e-06, + "loss": 0.22455093264579773, + "step": 5728 + }, + { + "epoch": 1.5213119107688222, + "grad_norm": 1.2606427849530746, + "learning_rate": 2.976437635469678e-06, + "loss": 0.270727276802063, + "step": 5729 + }, + { + "epoch": 1.5215774797503652, + "grad_norm": 1.1901052998095392, + "learning_rate": 2.9733128532753254e-06, + "loss": 0.2233714610338211, + "step": 5730 + }, + { + "epoch": 1.5218430487319081, + "grad_norm": 1.364720864117707, + "learning_rate": 2.970189425743383e-06, + "loss": 0.23599566519260406, + "step": 5731 + }, + { + "epoch": 1.522108617713451, + "grad_norm": 1.2707197493270106, + "learning_rate": 2.967067353476011e-06, + "loss": 0.23598654568195343, + "step": 5732 + }, + { + "epoch": 1.522374186694994, + "grad_norm": 1.1793549120144597, + "learning_rate": 2.963946637075107e-06, + "loss": 0.205197274684906, + "step": 5733 + }, + { + "epoch": 1.522639755676537, + "grad_norm": 1.1887492971446227, + "learning_rate": 2.9608272771423073e-06, + "loss": 0.23581506311893463, + "step": 5734 + }, + { + "epoch": 1.52290532465808, + "grad_norm": 1.2937911951812968, + "learning_rate": 2.9577092742789915e-06, + "loss": 0.2088197022676468, + "step": 5735 + }, + { + "epoch": 1.5231708936396229, + "grad_norm": 1.2943182118738674, + "learning_rate": 2.95459262908627e-06, + "loss": 0.22607067227363586, + "step": 5736 + }, + { + "epoch": 1.5234364626211658, + "grad_norm": 1.1748118237242067, + "learning_rate": 2.951477342164998e-06, + "loss": 0.22242344915866852, + "step": 5737 + }, + { + "epoch": 1.5237020316027088, + "grad_norm": 1.3280405020263697, + "learning_rate": 2.9483634141157636e-06, + "loss": 0.25626271963119507, + "step": 5738 + }, + { + "epoch": 1.5239676005842517, + "grad_norm": 1.2212084732536523, + "learning_rate": 2.9452508455388975e-06, + "loss": 0.2241421341896057, + "step": 5739 + }, + { + "epoch": 1.5242331695657947, + "grad_norm": 1.5088982481303157, + "learning_rate": 2.9421396370344648e-06, + "loss": 0.2191103994846344, + "step": 5740 + }, + { + "epoch": 1.5244987385473376, + "grad_norm": 1.2411878451658047, + "learning_rate": 2.9390297892022703e-06, + "loss": 0.26252660155296326, + "step": 5741 + }, + { + "epoch": 1.5247643075288806, + "grad_norm": 1.3964551352557335, + "learning_rate": 2.9359213026418567e-06, + "loss": 0.21522507071495056, + "step": 5742 + }, + { + "epoch": 1.5250298765104235, + "grad_norm": 1.0905013771622027, + "learning_rate": 2.932814177952499e-06, + "loss": 0.20159044861793518, + "step": 5743 + }, + { + "epoch": 1.5252954454919665, + "grad_norm": 1.138416177249403, + "learning_rate": 2.929708415733221e-06, + "loss": 0.22679558396339417, + "step": 5744 + }, + { + "epoch": 1.5255610144735094, + "grad_norm": 1.199157018703913, + "learning_rate": 2.926604016582776e-06, + "loss": 0.2315664291381836, + "step": 5745 + }, + { + "epoch": 1.5258265834550524, + "grad_norm": 1.2568252329386058, + "learning_rate": 2.923500981099652e-06, + "loss": 0.229634091258049, + "step": 5746 + }, + { + "epoch": 1.5260921524365954, + "grad_norm": 1.2179751735416722, + "learning_rate": 2.9203993098820793e-06, + "loss": 0.20657674968242645, + "step": 5747 + }, + { + "epoch": 1.5263577214181385, + "grad_norm": 1.2447733239425043, + "learning_rate": 2.9172990035280237e-06, + "loss": 0.2306358814239502, + "step": 5748 + }, + { + "epoch": 1.5266232903996815, + "grad_norm": 1.2950411042959078, + "learning_rate": 2.9142000626351875e-06, + "loss": 0.2608031928539276, + "step": 5749 + }, + { + "epoch": 1.5268888593812244, + "grad_norm": 1.337100599856471, + "learning_rate": 2.911102487801013e-06, + "loss": 0.24675670266151428, + "step": 5750 + }, + { + "epoch": 1.5271544283627674, + "grad_norm": 1.3568337572597398, + "learning_rate": 2.908006279622667e-06, + "loss": 0.22544966638088226, + "step": 5751 + }, + { + "epoch": 1.5274199973443103, + "grad_norm": 1.3214418017258782, + "learning_rate": 2.904911438697071e-06, + "loss": 0.2328556478023529, + "step": 5752 + }, + { + "epoch": 1.5276855663258533, + "grad_norm": 1.25396823790717, + "learning_rate": 2.901817965620871e-06, + "loss": 0.2316005825996399, + "step": 5753 + }, + { + "epoch": 1.5279511353073962, + "grad_norm": 1.2976508240318196, + "learning_rate": 2.8987258609904522e-06, + "loss": 0.2332756370306015, + "step": 5754 + }, + { + "epoch": 1.5282167042889392, + "grad_norm": 1.3432276903845415, + "learning_rate": 2.8956351254019355e-06, + "loss": 0.24855142831802368, + "step": 5755 + }, + { + "epoch": 1.5284822732704821, + "grad_norm": 1.2138875439685706, + "learning_rate": 2.8925457594511775e-06, + "loss": 0.18745368719100952, + "step": 5756 + }, + { + "epoch": 1.528747842252025, + "grad_norm": 1.877743895818308, + "learning_rate": 2.889457763733774e-06, + "loss": 0.22402942180633545, + "step": 5757 + }, + { + "epoch": 1.529013411233568, + "grad_norm": 1.292567134146249, + "learning_rate": 2.886371138845051e-06, + "loss": 0.2156108319759369, + "step": 5758 + }, + { + "epoch": 1.529278980215111, + "grad_norm": 1.2848231417758293, + "learning_rate": 2.883285885380076e-06, + "loss": 0.22866520285606384, + "step": 5759 + }, + { + "epoch": 1.529544549196654, + "grad_norm": 1.2907471990668473, + "learning_rate": 2.880202003933645e-06, + "loss": 0.2486938238143921, + "step": 5760 + }, + { + "epoch": 1.529810118178197, + "grad_norm": 1.34098643692872, + "learning_rate": 2.877119495100301e-06, + "loss": 0.2565295696258545, + "step": 5761 + }, + { + "epoch": 1.5300756871597399, + "grad_norm": 1.1480290388256142, + "learning_rate": 2.8740383594743116e-06, + "loss": 0.21510455012321472, + "step": 5762 + }, + { + "epoch": 1.5303412561412828, + "grad_norm": 1.266250058472157, + "learning_rate": 2.8709585976496825e-06, + "loss": 0.2122025489807129, + "step": 5763 + }, + { + "epoch": 1.5306068251228258, + "grad_norm": 1.3017513152107745, + "learning_rate": 2.8678802102201575e-06, + "loss": 0.24274399876594543, + "step": 5764 + }, + { + "epoch": 1.5308723941043687, + "grad_norm": 1.4573413266326471, + "learning_rate": 2.864803197779216e-06, + "loss": 0.22325341403484344, + "step": 5765 + }, + { + "epoch": 1.5311379630859117, + "grad_norm": 1.3303976558080437, + "learning_rate": 2.8617275609200625e-06, + "loss": 0.25205284357070923, + "step": 5766 + }, + { + "epoch": 1.5314035320674546, + "grad_norm": 1.2638986714524767, + "learning_rate": 2.8586533002356465e-06, + "loss": 0.2047557830810547, + "step": 5767 + }, + { + "epoch": 1.5316691010489976, + "grad_norm": 1.2195584514594966, + "learning_rate": 2.8555804163186508e-06, + "loss": 0.2166992425918579, + "step": 5768 + }, + { + "epoch": 1.5319346700305405, + "grad_norm": 1.2333416807696795, + "learning_rate": 2.8525089097614867e-06, + "loss": 0.26253193616867065, + "step": 5769 + }, + { + "epoch": 1.5322002390120835, + "grad_norm": 1.2030637435961495, + "learning_rate": 2.8494387811563108e-06, + "loss": 0.23307687044143677, + "step": 5770 + }, + { + "epoch": 1.5324658079936264, + "grad_norm": 1.2191481171426857, + "learning_rate": 2.8463700310950047e-06, + "loss": 0.22128549218177795, + "step": 5771 + }, + { + "epoch": 1.5327313769751694, + "grad_norm": 1.272136705974986, + "learning_rate": 2.8433026601691883e-06, + "loss": 0.21966281533241272, + "step": 5772 + }, + { + "epoch": 1.5329969459567123, + "grad_norm": 1.341088625881783, + "learning_rate": 2.840236668970213e-06, + "loss": 0.22869305312633514, + "step": 5773 + }, + { + "epoch": 1.5332625149382553, + "grad_norm": 1.2257027323986465, + "learning_rate": 2.837172058089167e-06, + "loss": 0.21431279182434082, + "step": 5774 + }, + { + "epoch": 1.5335280839197982, + "grad_norm": 1.3512853622822856, + "learning_rate": 2.8341088281168693e-06, + "loss": 0.24610282480716705, + "step": 5775 + }, + { + "epoch": 1.5337936529013412, + "grad_norm": 1.3400303957635655, + "learning_rate": 2.8310469796438767e-06, + "loss": 0.24414925277233124, + "step": 5776 + }, + { + "epoch": 1.5340592218828841, + "grad_norm": 1.3597459613858938, + "learning_rate": 2.8279865132604766e-06, + "loss": 0.2330513596534729, + "step": 5777 + }, + { + "epoch": 1.534324790864427, + "grad_norm": 1.2551411616890042, + "learning_rate": 2.8249274295566863e-06, + "loss": 0.23048308491706848, + "step": 5778 + }, + { + "epoch": 1.53459035984597, + "grad_norm": 1.2566974883874766, + "learning_rate": 2.821869729122273e-06, + "loss": 0.2411375492811203, + "step": 5779 + }, + { + "epoch": 1.534855928827513, + "grad_norm": 1.384873838300398, + "learning_rate": 2.818813412546715e-06, + "loss": 0.22985543310642242, + "step": 5780 + }, + { + "epoch": 1.535121497809056, + "grad_norm": 1.320574666083159, + "learning_rate": 2.815758480419235e-06, + "loss": 0.20867247879505157, + "step": 5781 + }, + { + "epoch": 1.5353870667905989, + "grad_norm": 2.0414068761810182, + "learning_rate": 2.8127049333287913e-06, + "loss": 0.26378586888313293, + "step": 5782 + }, + { + "epoch": 1.5356526357721418, + "grad_norm": 1.552041032509997, + "learning_rate": 2.8096527718640687e-06, + "loss": 0.2690306305885315, + "step": 5783 + }, + { + "epoch": 1.5359182047536848, + "grad_norm": 1.1602606034579108, + "learning_rate": 2.8066019966134907e-06, + "loss": 0.22226165235042572, + "step": 5784 + }, + { + "epoch": 1.5361837737352277, + "grad_norm": 1.2201060637055436, + "learning_rate": 2.803552608165209e-06, + "loss": 0.23370322585105896, + "step": 5785 + }, + { + "epoch": 1.5364493427167707, + "grad_norm": 1.3067141176486328, + "learning_rate": 2.8005046071071107e-06, + "loss": 0.26137909293174744, + "step": 5786 + }, + { + "epoch": 1.5367149116983136, + "grad_norm": 1.3588127622676833, + "learning_rate": 2.7974579940268096e-06, + "loss": 0.22630617022514343, + "step": 5787 + }, + { + "epoch": 1.5369804806798566, + "grad_norm": 1.2356618590652273, + "learning_rate": 2.7944127695116663e-06, + "loss": 0.22641140222549438, + "step": 5788 + }, + { + "epoch": 1.5372460496613995, + "grad_norm": 1.266648551925957, + "learning_rate": 2.791368934148757e-06, + "loss": 0.19647541642189026, + "step": 5789 + }, + { + "epoch": 1.5375116186429425, + "grad_norm": 1.212906210017999, + "learning_rate": 2.788326488524901e-06, + "loss": 0.22399532794952393, + "step": 5790 + }, + { + "epoch": 1.5377771876244855, + "grad_norm": 1.2862970389756843, + "learning_rate": 2.7852854332266434e-06, + "loss": 0.22549685835838318, + "step": 5791 + }, + { + "epoch": 1.5380427566060284, + "grad_norm": 1.168406987557996, + "learning_rate": 2.7822457688402637e-06, + "loss": 0.2129821628332138, + "step": 5792 + }, + { + "epoch": 1.5383083255875714, + "grad_norm": 1.2301298306170827, + "learning_rate": 2.7792074959517755e-06, + "loss": 0.25330638885498047, + "step": 5793 + }, + { + "epoch": 1.5385738945691143, + "grad_norm": 1.3148661968254225, + "learning_rate": 2.7761706151469204e-06, + "loss": 0.2413945198059082, + "step": 5794 + }, + { + "epoch": 1.5388394635506573, + "grad_norm": 1.2551515744231165, + "learning_rate": 2.773135127011174e-06, + "loss": 0.21930523216724396, + "step": 5795 + }, + { + "epoch": 1.5391050325322002, + "grad_norm": 1.2506577052831476, + "learning_rate": 2.7701010321297416e-06, + "loss": 0.25499141216278076, + "step": 5796 + }, + { + "epoch": 1.5393706015137432, + "grad_norm": 1.1567311669751301, + "learning_rate": 2.7670683310875613e-06, + "loss": 0.19475680589675903, + "step": 5797 + }, + { + "epoch": 1.5396361704952861, + "grad_norm": 1.3159422945276043, + "learning_rate": 2.7640370244693026e-06, + "loss": 0.22155825793743134, + "step": 5798 + }, + { + "epoch": 1.539901739476829, + "grad_norm": 1.1818601031709017, + "learning_rate": 2.761007112859365e-06, + "loss": 0.2146138846874237, + "step": 5799 + }, + { + "epoch": 1.540167308458372, + "grad_norm": 1.146035478957987, + "learning_rate": 2.7579785968418804e-06, + "loss": 0.22698411345481873, + "step": 5800 + }, + { + "epoch": 1.540432877439915, + "grad_norm": 1.2904710642906891, + "learning_rate": 2.75495147700071e-06, + "loss": 0.23889532685279846, + "step": 5801 + }, + { + "epoch": 1.540698446421458, + "grad_norm": 1.2353012354195356, + "learning_rate": 2.7519257539194488e-06, + "loss": 0.2514609694480896, + "step": 5802 + }, + { + "epoch": 1.5409640154030009, + "grad_norm": 1.2405153867334813, + "learning_rate": 2.7489014281814185e-06, + "loss": 0.22332100570201874, + "step": 5803 + }, + { + "epoch": 1.5412295843845438, + "grad_norm": 1.1768236369414826, + "learning_rate": 2.745878500369673e-06, + "loss": 0.21316683292388916, + "step": 5804 + }, + { + "epoch": 1.5414951533660868, + "grad_norm": 1.2446325297163028, + "learning_rate": 2.742856971066996e-06, + "loss": 0.2228018194437027, + "step": 5805 + }, + { + "epoch": 1.5417607223476297, + "grad_norm": 1.3243067869686356, + "learning_rate": 2.7398368408559084e-06, + "loss": 0.22217239439487457, + "step": 5806 + }, + { + "epoch": 1.5420262913291727, + "grad_norm": 1.331116794742511, + "learning_rate": 2.736818110318652e-06, + "loss": 0.21147233247756958, + "step": 5807 + }, + { + "epoch": 1.5422918603107156, + "grad_norm": 1.2851526092309566, + "learning_rate": 2.7338007800372024e-06, + "loss": 0.23844698071479797, + "step": 5808 + }, + { + "epoch": 1.5425574292922586, + "grad_norm": 1.3238454632326748, + "learning_rate": 2.7307848505932653e-06, + "loss": 0.2361423820257187, + "step": 5809 + }, + { + "epoch": 1.5428229982738015, + "grad_norm": 1.1977956377916248, + "learning_rate": 2.727770322568277e-06, + "loss": 0.21585656702518463, + "step": 5810 + }, + { + "epoch": 1.5430885672553445, + "grad_norm": 1.172295737533699, + "learning_rate": 2.724757196543403e-06, + "loss": 0.233969584107399, + "step": 5811 + }, + { + "epoch": 1.5433541362368874, + "grad_norm": 1.3309852612756656, + "learning_rate": 2.7217454730995363e-06, + "loss": 0.25040164589881897, + "step": 5812 + }, + { + "epoch": 1.5436197052184304, + "grad_norm": 1.5198455877328005, + "learning_rate": 2.7187351528173046e-06, + "loss": 0.25848713517189026, + "step": 5813 + }, + { + "epoch": 1.5438852741999733, + "grad_norm": 1.409976572144199, + "learning_rate": 2.715726236277061e-06, + "loss": 0.22255051136016846, + "step": 5814 + }, + { + "epoch": 1.5441508431815163, + "grad_norm": 1.1799889920310853, + "learning_rate": 2.7127187240588883e-06, + "loss": 0.1882694661617279, + "step": 5815 + }, + { + "epoch": 1.5444164121630592, + "grad_norm": 1.178741445510241, + "learning_rate": 2.7097126167426002e-06, + "loss": 0.20070400834083557, + "step": 5816 + }, + { + "epoch": 1.5446819811446022, + "grad_norm": 1.2959554460073714, + "learning_rate": 2.706707914907739e-06, + "loss": 0.25316092371940613, + "step": 5817 + }, + { + "epoch": 1.5449475501261452, + "grad_norm": 1.334925654094324, + "learning_rate": 2.703704619133576e-06, + "loss": 0.24665585160255432, + "step": 5818 + }, + { + "epoch": 1.545213119107688, + "grad_norm": 1.290703779819622, + "learning_rate": 2.7007027299991095e-06, + "loss": 0.24172846972942352, + "step": 5819 + }, + { + "epoch": 1.545478688089231, + "grad_norm": 1.2781945872260183, + "learning_rate": 2.6977022480830708e-06, + "loss": 0.2405129075050354, + "step": 5820 + }, + { + "epoch": 1.545744257070774, + "grad_norm": 1.075296946307477, + "learning_rate": 2.694703173963914e-06, + "loss": 0.19716276228427887, + "step": 5821 + }, + { + "epoch": 1.546009826052317, + "grad_norm": 1.1434881656258093, + "learning_rate": 2.6917055082198284e-06, + "loss": 0.20343703031539917, + "step": 5822 + }, + { + "epoch": 1.54627539503386, + "grad_norm": 1.5985849963050902, + "learning_rate": 2.688709251428725e-06, + "loss": 0.24382619559764862, + "step": 5823 + }, + { + "epoch": 1.5465409640154029, + "grad_norm": 1.7314575476063523, + "learning_rate": 2.6857144041682514e-06, + "loss": 0.2962399423122406, + "step": 5824 + }, + { + "epoch": 1.5468065329969458, + "grad_norm": 1.2699118659079873, + "learning_rate": 2.6827209670157774e-06, + "loss": 0.24034687876701355, + "step": 5825 + }, + { + "epoch": 1.5470721019784888, + "grad_norm": 1.3757632125147359, + "learning_rate": 2.6797289405484016e-06, + "loss": 0.2575085163116455, + "step": 5826 + }, + { + "epoch": 1.5473376709600317, + "grad_norm": 1.556424910652697, + "learning_rate": 2.6767383253429515e-06, + "loss": 0.2586629092693329, + "step": 5827 + }, + { + "epoch": 1.5476032399415747, + "grad_norm": 1.096117045688234, + "learning_rate": 2.6737491219759815e-06, + "loss": 0.18447624146938324, + "step": 5828 + }, + { + "epoch": 1.5478688089231176, + "grad_norm": 1.3930188378643134, + "learning_rate": 2.670761331023779e-06, + "loss": 0.244853213429451, + "step": 5829 + }, + { + "epoch": 1.5481343779046606, + "grad_norm": 1.3163693020327074, + "learning_rate": 2.66777495306235e-06, + "loss": 0.24641919136047363, + "step": 5830 + }, + { + "epoch": 1.5483999468862035, + "grad_norm": 1.4086337954424433, + "learning_rate": 2.6647899886674323e-06, + "loss": 0.2364550232887268, + "step": 5831 + }, + { + "epoch": 1.5486655158677467, + "grad_norm": 1.1695450852938096, + "learning_rate": 2.6618064384144925e-06, + "loss": 0.17760278284549713, + "step": 5832 + }, + { + "epoch": 1.5489310848492897, + "grad_norm": 1.1988872335295608, + "learning_rate": 2.6588243028787274e-06, + "loss": 0.18571510910987854, + "step": 5833 + }, + { + "epoch": 1.5491966538308326, + "grad_norm": 1.2537289047953852, + "learning_rate": 2.655843582635057e-06, + "loss": 0.23693162202835083, + "step": 5834 + }, + { + "epoch": 1.5494622228123756, + "grad_norm": 1.3552352092705502, + "learning_rate": 2.652864278258126e-06, + "loss": 0.26481011509895325, + "step": 5835 + }, + { + "epoch": 1.5497277917939185, + "grad_norm": 1.4182429828127188, + "learning_rate": 2.6498863903223115e-06, + "loss": 0.23405003547668457, + "step": 5836 + }, + { + "epoch": 1.5499933607754615, + "grad_norm": 2.5576796684815686, + "learning_rate": 2.6469099194017144e-06, + "loss": 0.20662814378738403, + "step": 5837 + }, + { + "epoch": 1.5502589297570044, + "grad_norm": 1.3124069479853646, + "learning_rate": 2.6439348660701634e-06, + "loss": 0.2722313404083252, + "step": 5838 + }, + { + "epoch": 1.5505244987385474, + "grad_norm": 1.3906100112719377, + "learning_rate": 2.6409612309012134e-06, + "loss": 0.2288864552974701, + "step": 5839 + }, + { + "epoch": 1.5507900677200903, + "grad_norm": 1.322570753297788, + "learning_rate": 2.6379890144681464e-06, + "loss": 0.2286190539598465, + "step": 5840 + }, + { + "epoch": 1.5510556367016333, + "grad_norm": 1.2231420705695173, + "learning_rate": 2.6350182173439666e-06, + "loss": 0.22478938102722168, + "step": 5841 + }, + { + "epoch": 1.5513212056831762, + "grad_norm": 1.415848841276022, + "learning_rate": 2.6320488401014166e-06, + "loss": 0.2520615756511688, + "step": 5842 + }, + { + "epoch": 1.5515867746647192, + "grad_norm": 1.3741284890856262, + "learning_rate": 2.629080883312952e-06, + "loss": 0.2121289074420929, + "step": 5843 + }, + { + "epoch": 1.5518523436462621, + "grad_norm": 1.3092311759839703, + "learning_rate": 2.6261143475507656e-06, + "loss": 0.2252352237701416, + "step": 5844 + }, + { + "epoch": 1.552117912627805, + "grad_norm": 1.191285245143269, + "learning_rate": 2.6231492333867626e-06, + "loss": 0.21188892424106598, + "step": 5845 + }, + { + "epoch": 1.552383481609348, + "grad_norm": 1.1276138403597054, + "learning_rate": 2.6201855413925857e-06, + "loss": 0.21534699201583862, + "step": 5846 + }, + { + "epoch": 1.552649050590891, + "grad_norm": 1.2849885490704696, + "learning_rate": 2.6172232721395998e-06, + "loss": 0.21781614422798157, + "step": 5847 + }, + { + "epoch": 1.552914619572434, + "grad_norm": 1.3317886914724781, + "learning_rate": 2.6142624261988947e-06, + "loss": 0.2476508915424347, + "step": 5848 + }, + { + "epoch": 1.5531801885539769, + "grad_norm": 1.3439658215829489, + "learning_rate": 2.611303004141287e-06, + "loss": 0.2692151665687561, + "step": 5849 + }, + { + "epoch": 1.5534457575355198, + "grad_norm": 1.2839746536411722, + "learning_rate": 2.6083450065373163e-06, + "loss": 0.24868687987327576, + "step": 5850 + }, + { + "epoch": 1.5537113265170628, + "grad_norm": 1.2704813852574235, + "learning_rate": 2.6053884339572543e-06, + "loss": 0.24215853214263916, + "step": 5851 + }, + { + "epoch": 1.5539768954986057, + "grad_norm": 1.2100819665594098, + "learning_rate": 2.602433286971091e-06, + "loss": 0.2157444804906845, + "step": 5852 + }, + { + "epoch": 1.5542424644801487, + "grad_norm": 1.369237575424674, + "learning_rate": 2.599479566148544e-06, + "loss": 0.22152379155158997, + "step": 5853 + }, + { + "epoch": 1.5545080334616916, + "grad_norm": 1.1930490692336162, + "learning_rate": 2.596527272059055e-06, + "loss": 0.2278299182653427, + "step": 5854 + }, + { + "epoch": 1.5547736024432346, + "grad_norm": 1.406485645097326, + "learning_rate": 2.593576405271793e-06, + "loss": 0.23183950781822205, + "step": 5855 + }, + { + "epoch": 1.5550391714247775, + "grad_norm": 1.209726796816396, + "learning_rate": 2.5906269663556484e-06, + "loss": 0.22167566418647766, + "step": 5856 + }, + { + "epoch": 1.5553047404063205, + "grad_norm": 1.1790986825354977, + "learning_rate": 2.5876789558792403e-06, + "loss": 0.24111366271972656, + "step": 5857 + }, + { + "epoch": 1.5555703093878634, + "grad_norm": 1.1706391072024214, + "learning_rate": 2.5847323744109087e-06, + "loss": 0.2090388983488083, + "step": 5858 + }, + { + "epoch": 1.5558358783694064, + "grad_norm": 1.2588154614837785, + "learning_rate": 2.58178722251872e-06, + "loss": 0.2087189108133316, + "step": 5859 + }, + { + "epoch": 1.5561014473509496, + "grad_norm": 1.300626487965864, + "learning_rate": 2.578843500770465e-06, + "loss": 0.2277342677116394, + "step": 5860 + }, + { + "epoch": 1.5563670163324925, + "grad_norm": 1.3517116904487896, + "learning_rate": 2.57590120973366e-06, + "loss": 0.2204241305589676, + "step": 5861 + }, + { + "epoch": 1.5566325853140355, + "grad_norm": 1.213807933631201, + "learning_rate": 2.5729603499755416e-06, + "loss": 0.2138606607913971, + "step": 5862 + }, + { + "epoch": 1.5568981542955784, + "grad_norm": 1.4669648743657906, + "learning_rate": 2.5700209220630733e-06, + "loss": 0.21257862448692322, + "step": 5863 + }, + { + "epoch": 1.5571637232771214, + "grad_norm": 1.2314998246120414, + "learning_rate": 2.5670829265629437e-06, + "loss": 0.20991909503936768, + "step": 5864 + }, + { + "epoch": 1.5574292922586643, + "grad_norm": 1.294980658460416, + "learning_rate": 2.5641463640415633e-06, + "loss": 0.23745422065258026, + "step": 5865 + }, + { + "epoch": 1.5576948612402073, + "grad_norm": 1.2425796180120088, + "learning_rate": 2.561211235065065e-06, + "loss": 0.21482989192008972, + "step": 5866 + }, + { + "epoch": 1.5579604302217502, + "grad_norm": 1.008120888370748, + "learning_rate": 2.558277540199309e-06, + "loss": 0.17866572737693787, + "step": 5867 + }, + { + "epoch": 1.5582259992032932, + "grad_norm": 1.2966262005019353, + "learning_rate": 2.555345280009872e-06, + "loss": 0.223822683095932, + "step": 5868 + }, + { + "epoch": 1.5584915681848361, + "grad_norm": 1.339606961190666, + "learning_rate": 2.552414455062068e-06, + "loss": 0.2293519228696823, + "step": 5869 + }, + { + "epoch": 1.558757137166379, + "grad_norm": 1.3023504432012787, + "learning_rate": 2.5494850659209203e-06, + "loss": 0.2556726038455963, + "step": 5870 + }, + { + "epoch": 1.559022706147922, + "grad_norm": 1.255574464472328, + "learning_rate": 2.546557113151181e-06, + "loss": 0.26891303062438965, + "step": 5871 + }, + { + "epoch": 1.559288275129465, + "grad_norm": 1.1754509839553133, + "learning_rate": 2.5436305973173257e-06, + "loss": 0.19510813057422638, + "step": 5872 + }, + { + "epoch": 1.559553844111008, + "grad_norm": 1.2819966401856495, + "learning_rate": 2.5407055189835518e-06, + "loss": 0.22906547784805298, + "step": 5873 + }, + { + "epoch": 1.559819413092551, + "grad_norm": 1.3121165067922245, + "learning_rate": 2.5377818787137788e-06, + "loss": 0.25452786684036255, + "step": 5874 + }, + { + "epoch": 1.5600849820740939, + "grad_norm": 1.2743199898597464, + "learning_rate": 2.5348596770716503e-06, + "loss": 0.205597922205925, + "step": 5875 + }, + { + "epoch": 1.5603505510556368, + "grad_norm": 1.3020148941868286, + "learning_rate": 2.5319389146205344e-06, + "loss": 0.24009352922439575, + "step": 5876 + }, + { + "epoch": 1.5606161200371798, + "grad_norm": 1.433983972963341, + "learning_rate": 2.5290195919235173e-06, + "loss": 0.23381268978118896, + "step": 5877 + }, + { + "epoch": 1.5608816890187227, + "grad_norm": 1.1554092234943296, + "learning_rate": 2.52610170954341e-06, + "loss": 0.2267276644706726, + "step": 5878 + }, + { + "epoch": 1.5611472580002657, + "grad_norm": 1.2742422977156036, + "learning_rate": 2.5231852680427482e-06, + "loss": 0.24330289661884308, + "step": 5879 + }, + { + "epoch": 1.5614128269818086, + "grad_norm": 1.2802855767249914, + "learning_rate": 2.5202702679837852e-06, + "loss": 0.24877145886421204, + "step": 5880 + }, + { + "epoch": 1.5616783959633516, + "grad_norm": 1.1377670913842177, + "learning_rate": 2.5173567099285e-06, + "loss": 0.20410388708114624, + "step": 5881 + }, + { + "epoch": 1.5619439649448945, + "grad_norm": 1.2268765869469427, + "learning_rate": 2.514444594438591e-06, + "loss": 0.21524877846240997, + "step": 5882 + }, + { + "epoch": 1.5622095339264375, + "grad_norm": 1.1986269244208958, + "learning_rate": 2.5115339220754796e-06, + "loss": 0.18785043060779572, + "step": 5883 + }, + { + "epoch": 1.5624751029079804, + "grad_norm": 1.3539528047627718, + "learning_rate": 2.5086246934003113e-06, + "loss": 0.21200208365917206, + "step": 5884 + }, + { + "epoch": 1.5627406718895234, + "grad_norm": 1.6373531833898813, + "learning_rate": 2.5057169089739485e-06, + "loss": 0.20752021670341492, + "step": 5885 + }, + { + "epoch": 1.5630062408710663, + "grad_norm": 1.1717071963534185, + "learning_rate": 2.502810569356976e-06, + "loss": 0.21395736932754517, + "step": 5886 + }, + { + "epoch": 1.5632718098526093, + "grad_norm": 1.2664848714228343, + "learning_rate": 2.499905675109707e-06, + "loss": 0.26949262619018555, + "step": 5887 + }, + { + "epoch": 1.5635373788341522, + "grad_norm": 1.5283985889023297, + "learning_rate": 2.497002226792169e-06, + "loss": 0.2309839278459549, + "step": 5888 + }, + { + "epoch": 1.5638029478156952, + "grad_norm": 1.2596143819163301, + "learning_rate": 2.4941002249641123e-06, + "loss": 0.24415400624275208, + "step": 5889 + }, + { + "epoch": 1.5640685167972381, + "grad_norm": 1.3074402223027564, + "learning_rate": 2.4911996701850083e-06, + "loss": 0.23493322730064392, + "step": 5890 + }, + { + "epoch": 1.564334085778781, + "grad_norm": 1.260748243658743, + "learning_rate": 2.488300563014049e-06, + "loss": 0.23824438452720642, + "step": 5891 + }, + { + "epoch": 1.564599654760324, + "grad_norm": 1.2534870916273309, + "learning_rate": 2.4854029040101503e-06, + "loss": 0.2523414194583893, + "step": 5892 + }, + { + "epoch": 1.564865223741867, + "grad_norm": 1.2879106186872462, + "learning_rate": 2.482506693731944e-06, + "loss": 0.21360887587070465, + "step": 5893 + }, + { + "epoch": 1.56513079272341, + "grad_norm": 1.1951820042572139, + "learning_rate": 2.47961193273779e-06, + "loss": 0.21182934939861298, + "step": 5894 + }, + { + "epoch": 1.5653963617049529, + "grad_norm": 1.4293886797193323, + "learning_rate": 2.4767186215857542e-06, + "loss": 0.23104771971702576, + "step": 5895 + }, + { + "epoch": 1.5656619306864958, + "grad_norm": 1.2606491547398977, + "learning_rate": 2.473826760833643e-06, + "loss": 0.22297397255897522, + "step": 5896 + }, + { + "epoch": 1.5659274996680388, + "grad_norm": 1.176802218612286, + "learning_rate": 2.4709363510389684e-06, + "loss": 0.21597865223884583, + "step": 5897 + }, + { + "epoch": 1.5661930686495817, + "grad_norm": 1.4303555951561693, + "learning_rate": 2.468047392758969e-06, + "loss": 0.27620527148246765, + "step": 5898 + }, + { + "epoch": 1.5664586376311247, + "grad_norm": 1.373809252877093, + "learning_rate": 2.465159886550601e-06, + "loss": 0.25262463092803955, + "step": 5899 + }, + { + "epoch": 1.5667242066126676, + "grad_norm": 1.376719462816966, + "learning_rate": 2.462273832970542e-06, + "loss": 0.2729034125804901, + "step": 5900 + }, + { + "epoch": 1.5669897755942106, + "grad_norm": 1.3637563490895455, + "learning_rate": 2.459389232575188e-06, + "loss": 0.2313854992389679, + "step": 5901 + }, + { + "epoch": 1.5672553445757536, + "grad_norm": 1.3202318144066494, + "learning_rate": 2.456506085920658e-06, + "loss": 0.22513791918754578, + "step": 5902 + }, + { + "epoch": 1.5675209135572965, + "grad_norm": 1.3152362934287614, + "learning_rate": 2.4536243935627856e-06, + "loss": 0.2658824026584625, + "step": 5903 + }, + { + "epoch": 1.5677864825388395, + "grad_norm": 1.1721087348112986, + "learning_rate": 2.4507441560571275e-06, + "loss": 0.21781010925769806, + "step": 5904 + }, + { + "epoch": 1.5680520515203824, + "grad_norm": 1.3393030222309363, + "learning_rate": 2.4478653739589632e-06, + "loss": 0.21047937870025635, + "step": 5905 + }, + { + "epoch": 1.5683176205019254, + "grad_norm": 1.2196979825563006, + "learning_rate": 2.4449880478232858e-06, + "loss": 0.21674057841300964, + "step": 5906 + }, + { + "epoch": 1.5685831894834683, + "grad_norm": 1.200112520021674, + "learning_rate": 2.44211217820481e-06, + "loss": 0.22062627971172333, + "step": 5907 + }, + { + "epoch": 1.5688487584650113, + "grad_norm": 1.3158234051142574, + "learning_rate": 2.439237765657968e-06, + "loss": 0.22440886497497559, + "step": 5908 + }, + { + "epoch": 1.5691143274465542, + "grad_norm": 1.129873307165861, + "learning_rate": 2.4363648107369175e-06, + "loss": 0.21888123452663422, + "step": 5909 + }, + { + "epoch": 1.5693798964280972, + "grad_norm": 1.2586007199788052, + "learning_rate": 2.433493313995524e-06, + "loss": 0.23104462027549744, + "step": 5910 + }, + { + "epoch": 1.5696454654096401, + "grad_norm": 1.427902558182486, + "learning_rate": 2.4306232759873803e-06, + "loss": 0.23032237589359283, + "step": 5911 + }, + { + "epoch": 1.569911034391183, + "grad_norm": 1.3780752776280365, + "learning_rate": 2.4277546972657974e-06, + "loss": 0.2588527202606201, + "step": 5912 + }, + { + "epoch": 1.570176603372726, + "grad_norm": 1.4647042397629928, + "learning_rate": 2.424887578383799e-06, + "loss": 0.2845698893070221, + "step": 5913 + }, + { + "epoch": 1.570442172354269, + "grad_norm": 1.338246310760916, + "learning_rate": 2.4220219198941384e-06, + "loss": 0.23010894656181335, + "step": 5914 + }, + { + "epoch": 1.570707741335812, + "grad_norm": 1.3783426416349442, + "learning_rate": 2.419157722349278e-06, + "loss": 0.2623594403266907, + "step": 5915 + }, + { + "epoch": 1.5709733103173549, + "grad_norm": 1.2349976574308903, + "learning_rate": 2.416294986301401e-06, + "loss": 0.2107153981924057, + "step": 5916 + }, + { + "epoch": 1.5712388792988978, + "grad_norm": 1.3633626366853218, + "learning_rate": 2.413433712302409e-06, + "loss": 0.2115003615617752, + "step": 5917 + }, + { + "epoch": 1.5715044482804408, + "grad_norm": 1.3738602333573011, + "learning_rate": 2.410573900903921e-06, + "loss": 0.22406762838363647, + "step": 5918 + }, + { + "epoch": 1.5717700172619837, + "grad_norm": 1.3017270649216575, + "learning_rate": 2.407715552657277e-06, + "loss": 0.24878525733947754, + "step": 5919 + }, + { + "epoch": 1.5720355862435267, + "grad_norm": 1.5003273963811, + "learning_rate": 2.404858668113532e-06, + "loss": 0.24546805024147034, + "step": 5920 + }, + { + "epoch": 1.5723011552250696, + "grad_norm": 1.5650848412040055, + "learning_rate": 2.402003247823459e-06, + "loss": 0.23430263996124268, + "step": 5921 + }, + { + "epoch": 1.5725667242066126, + "grad_norm": 1.3939131226044492, + "learning_rate": 2.399149292337547e-06, + "loss": 0.26935267448425293, + "step": 5922 + }, + { + "epoch": 1.5728322931881555, + "grad_norm": 1.1554138984093538, + "learning_rate": 2.3962968022060097e-06, + "loss": 0.21104472875595093, + "step": 5923 + }, + { + "epoch": 1.5730978621696985, + "grad_norm": 1.147816084956367, + "learning_rate": 2.3934457779787755e-06, + "loss": 0.17162750661373138, + "step": 5924 + }, + { + "epoch": 1.5733634311512414, + "grad_norm": 1.2036391990293953, + "learning_rate": 2.390596220205481e-06, + "loss": 0.22233474254608154, + "step": 5925 + }, + { + "epoch": 1.5736290001327844, + "grad_norm": 1.456348691360017, + "learning_rate": 2.387748129435491e-06, + "loss": 0.2326992005109787, + "step": 5926 + }, + { + "epoch": 1.5738945691143273, + "grad_norm": 1.2656294085970974, + "learning_rate": 2.3849015062178835e-06, + "loss": 0.245779350399971, + "step": 5927 + }, + { + "epoch": 1.5741601380958703, + "grad_norm": 1.2198185109849795, + "learning_rate": 2.382056351101454e-06, + "loss": 0.24269379675388336, + "step": 5928 + }, + { + "epoch": 1.5744257070774133, + "grad_norm": 1.2241918308854736, + "learning_rate": 2.3792126646347138e-06, + "loss": 0.23644019663333893, + "step": 5929 + }, + { + "epoch": 1.5746912760589562, + "grad_norm": 1.2680435600362268, + "learning_rate": 2.376370447365893e-06, + "loss": 0.254330575466156, + "step": 5930 + }, + { + "epoch": 1.5749568450404992, + "grad_norm": 1.4146409212378834, + "learning_rate": 2.373529699842936e-06, + "loss": 0.2728506922721863, + "step": 5931 + }, + { + "epoch": 1.575222414022042, + "grad_norm": 1.3627178065769006, + "learning_rate": 2.3706904226135087e-06, + "loss": 0.23671439290046692, + "step": 5932 + }, + { + "epoch": 1.575487983003585, + "grad_norm": 1.409873356618632, + "learning_rate": 2.367852616224989e-06, + "loss": 0.24205748736858368, + "step": 5933 + }, + { + "epoch": 1.575753551985128, + "grad_norm": 1.2728197754861583, + "learning_rate": 2.3650162812244725e-06, + "loss": 0.1915436089038849, + "step": 5934 + }, + { + "epoch": 1.576019120966671, + "grad_norm": 1.2091326643578577, + "learning_rate": 2.3621814181587697e-06, + "loss": 0.23453299701213837, + "step": 5935 + }, + { + "epoch": 1.576284689948214, + "grad_norm": 1.3060415308267561, + "learning_rate": 2.3593480275744106e-06, + "loss": 0.24066327512264252, + "step": 5936 + }, + { + "epoch": 1.5765502589297569, + "grad_norm": 1.246429396187596, + "learning_rate": 2.356516110017639e-06, + "loss": 0.22510530054569244, + "step": 5937 + }, + { + "epoch": 1.5768158279112998, + "grad_norm": 1.2889494549478113, + "learning_rate": 2.3536856660344144e-06, + "loss": 0.22967353463172913, + "step": 5938 + }, + { + "epoch": 1.5770813968928428, + "grad_norm": 1.2404139099674472, + "learning_rate": 2.3508566961704127e-06, + "loss": 0.2299107313156128, + "step": 5939 + }, + { + "epoch": 1.5773469658743857, + "grad_norm": 1.2560783974284127, + "learning_rate": 2.3480292009710282e-06, + "loss": 0.23418918251991272, + "step": 5940 + }, + { + "epoch": 1.5776125348559287, + "grad_norm": 1.2857056044544095, + "learning_rate": 2.3452031809813657e-06, + "loss": 0.26528510451316833, + "step": 5941 + }, + { + "epoch": 1.5778781038374716, + "grad_norm": 1.1247059842406957, + "learning_rate": 2.342378636746251e-06, + "loss": 0.21878717839717865, + "step": 5942 + }, + { + "epoch": 1.5781436728190146, + "grad_norm": 1.1637472196421235, + "learning_rate": 2.339555568810221e-06, + "loss": 0.19697530567646027, + "step": 5943 + }, + { + "epoch": 1.5784092418005577, + "grad_norm": 1.3422665805434115, + "learning_rate": 2.3367339777175313e-06, + "loss": 0.24812257289886475, + "step": 5944 + }, + { + "epoch": 1.5786748107821007, + "grad_norm": 1.3285793357341238, + "learning_rate": 2.3339138640121504e-06, + "loss": 0.27651745080947876, + "step": 5945 + }, + { + "epoch": 1.5789403797636437, + "grad_norm": 1.308131821171991, + "learning_rate": 2.3310952282377643e-06, + "loss": 0.2651634216308594, + "step": 5946 + }, + { + "epoch": 1.5792059487451866, + "grad_norm": 1.3163549633798883, + "learning_rate": 2.328278070937772e-06, + "loss": 0.23799028992652893, + "step": 5947 + }, + { + "epoch": 1.5794715177267296, + "grad_norm": 1.4229706240812914, + "learning_rate": 2.3254623926552867e-06, + "loss": 0.2528802752494812, + "step": 5948 + }, + { + "epoch": 1.5797370867082725, + "grad_norm": 1.2071666314804592, + "learning_rate": 2.322648193933137e-06, + "loss": 0.23819346725940704, + "step": 5949 + }, + { + "epoch": 1.5800026556898155, + "grad_norm": 1.2694222057013376, + "learning_rate": 2.319835475313873e-06, + "loss": 0.2510845959186554, + "step": 5950 + }, + { + "epoch": 1.5802682246713584, + "grad_norm": 1.0731141255180743, + "learning_rate": 2.31702423733975e-06, + "loss": 0.20156612992286682, + "step": 5951 + }, + { + "epoch": 1.5805337936529014, + "grad_norm": 1.320010192923148, + "learning_rate": 2.3142144805527413e-06, + "loss": 0.23375174403190613, + "step": 5952 + }, + { + "epoch": 1.5807993626344443, + "grad_norm": 1.187058092026163, + "learning_rate": 2.311406205494535e-06, + "loss": 0.2378280758857727, + "step": 5953 + }, + { + "epoch": 1.5810649316159873, + "grad_norm": 1.4550533599389408, + "learning_rate": 2.308599412706535e-06, + "loss": 0.2087683081626892, + "step": 5954 + }, + { + "epoch": 1.5813305005975302, + "grad_norm": 1.2856302099767283, + "learning_rate": 2.3057941027298557e-06, + "loss": 0.2228693962097168, + "step": 5955 + }, + { + "epoch": 1.5815960695790732, + "grad_norm": 1.4738789364963756, + "learning_rate": 2.302990276105329e-06, + "loss": 0.22694727778434753, + "step": 5956 + }, + { + "epoch": 1.5818616385606161, + "grad_norm": 1.2486840544551192, + "learning_rate": 2.300187933373499e-06, + "loss": 0.22996942698955536, + "step": 5957 + }, + { + "epoch": 1.582127207542159, + "grad_norm": 1.331719034245123, + "learning_rate": 2.2973870750746253e-06, + "loss": 0.2440253496170044, + "step": 5958 + }, + { + "epoch": 1.582392776523702, + "grad_norm": 1.3266637203740035, + "learning_rate": 2.2945877017486782e-06, + "loss": 0.2507309019565582, + "step": 5959 + }, + { + "epoch": 1.582658345505245, + "grad_norm": 2.8683041985739677, + "learning_rate": 2.2917898139353467e-06, + "loss": 0.24790918827056885, + "step": 5960 + }, + { + "epoch": 1.582923914486788, + "grad_norm": 1.4168604850261965, + "learning_rate": 2.2889934121740287e-06, + "loss": 0.22106975317001343, + "step": 5961 + }, + { + "epoch": 1.5831894834683309, + "grad_norm": 1.5726662217531726, + "learning_rate": 2.2861984970038385e-06, + "loss": 0.2410939633846283, + "step": 5962 + }, + { + "epoch": 1.5834550524498738, + "grad_norm": 1.1559016560001114, + "learning_rate": 2.283405068963601e-06, + "loss": 0.22821484506130219, + "step": 5963 + }, + { + "epoch": 1.5837206214314168, + "grad_norm": 1.2324685594628142, + "learning_rate": 2.2806131285918588e-06, + "loss": 0.21425281465053558, + "step": 5964 + }, + { + "epoch": 1.5839861904129597, + "grad_norm": 1.2434376170807215, + "learning_rate": 2.277822676426863e-06, + "loss": 0.22428902983665466, + "step": 5965 + }, + { + "epoch": 1.5842517593945027, + "grad_norm": 1.4592375031786005, + "learning_rate": 2.27503371300658e-06, + "loss": 0.2986769676208496, + "step": 5966 + }, + { + "epoch": 1.5845173283760456, + "grad_norm": 1.4384957681975041, + "learning_rate": 2.272246238868687e-06, + "loss": 0.24697065353393555, + "step": 5967 + }, + { + "epoch": 1.5847828973575886, + "grad_norm": 1.3175254870878064, + "learning_rate": 2.269460254550583e-06, + "loss": 0.23725461959838867, + "step": 5968 + }, + { + "epoch": 1.5850484663391315, + "grad_norm": 1.5010497616053564, + "learning_rate": 2.2666757605893664e-06, + "loss": 0.2661248445510864, + "step": 5969 + }, + { + "epoch": 1.5853140353206745, + "grad_norm": 1.2390278830143426, + "learning_rate": 2.263892757521858e-06, + "loss": 0.23328733444213867, + "step": 5970 + }, + { + "epoch": 1.5855796043022174, + "grad_norm": 1.2547818797647754, + "learning_rate": 2.2611112458845873e-06, + "loss": 0.22886580228805542, + "step": 5971 + }, + { + "epoch": 1.5858451732837606, + "grad_norm": 1.1882681583888588, + "learning_rate": 2.2583312262137966e-06, + "loss": 0.25051698088645935, + "step": 5972 + }, + { + "epoch": 1.5861107422653036, + "grad_norm": 1.2988472953319592, + "learning_rate": 2.2555526990454413e-06, + "loss": 0.2400815784931183, + "step": 5973 + }, + { + "epoch": 1.5863763112468465, + "grad_norm": 1.1598677166947555, + "learning_rate": 2.2527756649151912e-06, + "loss": 0.2212347537279129, + "step": 5974 + }, + { + "epoch": 1.5866418802283895, + "grad_norm": 1.355013417523964, + "learning_rate": 2.2500001243584204e-06, + "loss": 0.3002026379108429, + "step": 5975 + }, + { + "epoch": 1.5869074492099324, + "grad_norm": 1.1899701199057289, + "learning_rate": 2.2472260779102185e-06, + "loss": 0.19813531637191772, + "step": 5976 + }, + { + "epoch": 1.5871730181914754, + "grad_norm": 1.2404972223723234, + "learning_rate": 2.2444535261053968e-06, + "loss": 0.2233983874320984, + "step": 5977 + }, + { + "epoch": 1.5874385871730183, + "grad_norm": 1.417840431772693, + "learning_rate": 2.2416824694784676e-06, + "loss": 0.26059988141059875, + "step": 5978 + }, + { + "epoch": 1.5877041561545613, + "grad_norm": 1.2961846276739968, + "learning_rate": 2.2389129085636573e-06, + "loss": 0.23058606684207916, + "step": 5979 + }, + { + "epoch": 1.5879697251361042, + "grad_norm": 1.3397298592095879, + "learning_rate": 2.236144843894904e-06, + "loss": 0.2414383739233017, + "step": 5980 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 1.2013757541083616, + "learning_rate": 2.23337827600586e-06, + "loss": 0.21688291430473328, + "step": 5981 + }, + { + "epoch": 1.5885008630991901, + "grad_norm": 1.2977536190104755, + "learning_rate": 2.2306132054298847e-06, + "loss": 0.24297408759593964, + "step": 5982 + }, + { + "epoch": 1.588766432080733, + "grad_norm": 1.449081017944755, + "learning_rate": 2.227849632700052e-06, + "loss": 0.2655821442604065, + "step": 5983 + }, + { + "epoch": 1.589032001062276, + "grad_norm": 1.2305338711146763, + "learning_rate": 2.225087558349146e-06, + "loss": 0.20545080304145813, + "step": 5984 + }, + { + "epoch": 1.589297570043819, + "grad_norm": 1.470607418959754, + "learning_rate": 2.2223269829096593e-06, + "loss": 0.24151475727558136, + "step": 5985 + }, + { + "epoch": 1.589563139025362, + "grad_norm": 1.2194062039730535, + "learning_rate": 2.2195679069138043e-06, + "loss": 0.2294519543647766, + "step": 5986 + }, + { + "epoch": 1.589828708006905, + "grad_norm": 1.3319096935394759, + "learning_rate": 2.2168103308934953e-06, + "loss": 0.2041824758052826, + "step": 5987 + }, + { + "epoch": 1.5900942769884479, + "grad_norm": 1.181577384258167, + "learning_rate": 2.21405425538036e-06, + "loss": 0.1856188029050827, + "step": 5988 + }, + { + "epoch": 1.5903598459699908, + "grad_norm": 1.2644853901124522, + "learning_rate": 2.2112996809057395e-06, + "loss": 0.24337685108184814, + "step": 5989 + }, + { + "epoch": 1.5906254149515338, + "grad_norm": 1.1714048449744126, + "learning_rate": 2.20854660800068e-06, + "loss": 0.2201787382364273, + "step": 5990 + }, + { + "epoch": 1.5908909839330767, + "grad_norm": 1.322531300676563, + "learning_rate": 2.2057950371959427e-06, + "loss": 0.23505619168281555, + "step": 5991 + }, + { + "epoch": 1.5911565529146197, + "grad_norm": 1.4085526679551708, + "learning_rate": 2.203044969021997e-06, + "loss": 0.19528049230575562, + "step": 5992 + }, + { + "epoch": 1.5914221218961626, + "grad_norm": 1.2299879902160842, + "learning_rate": 2.2002964040090256e-06, + "loss": 0.22281290590763092, + "step": 5993 + }, + { + "epoch": 1.5916876908777056, + "grad_norm": 1.310771483519368, + "learning_rate": 2.1975493426869155e-06, + "loss": 0.19606761634349823, + "step": 5994 + }, + { + "epoch": 1.5919532598592485, + "grad_norm": 1.2570005315725017, + "learning_rate": 2.1948037855852733e-06, + "loss": 0.22559323906898499, + "step": 5995 + }, + { + "epoch": 1.5922188288407915, + "grad_norm": 1.2326545276620708, + "learning_rate": 2.192059733233408e-06, + "loss": 0.20417393743991852, + "step": 5996 + }, + { + "epoch": 1.5924843978223344, + "grad_norm": 1.351064737074131, + "learning_rate": 2.18931718616034e-06, + "loss": 0.2579960525035858, + "step": 5997 + }, + { + "epoch": 1.5927499668038774, + "grad_norm": 1.2980140620122547, + "learning_rate": 2.1865761448948e-06, + "loss": 0.23339781165122986, + "step": 5998 + }, + { + "epoch": 1.5930155357854203, + "grad_norm": 1.2588476812522966, + "learning_rate": 2.1838366099652274e-06, + "loss": 0.2368197739124298, + "step": 5999 + }, + { + "epoch": 1.5932811047669633, + "grad_norm": 1.2980274155826699, + "learning_rate": 2.1810985818997743e-06, + "loss": 0.2225847840309143, + "step": 6000 + }, + { + "epoch": 1.5935466737485062, + "grad_norm": 1.3094945647641514, + "learning_rate": 2.1783620612263e-06, + "loss": 0.2426701784133911, + "step": 6001 + }, + { + "epoch": 1.5938122427300492, + "grad_norm": 1.284834767608695, + "learning_rate": 2.175627048472372e-06, + "loss": 0.23647268116474152, + "step": 6002 + }, + { + "epoch": 1.5940778117115921, + "grad_norm": 1.2525920428706867, + "learning_rate": 2.1728935441652687e-06, + "loss": 0.22843337059020996, + "step": 6003 + }, + { + "epoch": 1.594343380693135, + "grad_norm": 1.1786632019087344, + "learning_rate": 2.1701615488319785e-06, + "loss": 0.21524465084075928, + "step": 6004 + }, + { + "epoch": 1.594608949674678, + "grad_norm": 1.225831889373155, + "learning_rate": 2.167431062999197e-06, + "loss": 0.2160830795764923, + "step": 6005 + }, + { + "epoch": 1.594874518656221, + "grad_norm": 1.238709201727011, + "learning_rate": 2.1647020871933288e-06, + "loss": 0.2321595996618271, + "step": 6006 + }, + { + "epoch": 1.595140087637764, + "grad_norm": 1.164283210992047, + "learning_rate": 2.1619746219404916e-06, + "loss": 0.21255026757717133, + "step": 6007 + }, + { + "epoch": 1.5954056566193069, + "grad_norm": 1.3822319128280973, + "learning_rate": 2.1592486677665047e-06, + "loss": 0.22851255536079407, + "step": 6008 + }, + { + "epoch": 1.5956712256008498, + "grad_norm": 1.3982384304626327, + "learning_rate": 2.1565242251969022e-06, + "loss": 0.23844364285469055, + "step": 6009 + }, + { + "epoch": 1.5959367945823928, + "grad_norm": 1.3184134341650149, + "learning_rate": 2.153801294756924e-06, + "loss": 0.2592385411262512, + "step": 6010 + }, + { + "epoch": 1.5962023635639357, + "grad_norm": 1.221300094567036, + "learning_rate": 2.151079876971519e-06, + "loss": 0.22163718938827515, + "step": 6011 + }, + { + "epoch": 1.5964679325454787, + "grad_norm": 1.1840952132259899, + "learning_rate": 2.1483599723653415e-06, + "loss": 0.1960998773574829, + "step": 6012 + }, + { + "epoch": 1.5967335015270216, + "grad_norm": 1.1732770789502442, + "learning_rate": 2.145641581462762e-06, + "loss": 0.20811150968074799, + "step": 6013 + }, + { + "epoch": 1.5969990705085646, + "grad_norm": 1.2065470685478314, + "learning_rate": 2.1429247047878534e-06, + "loss": 0.23184621334075928, + "step": 6014 + }, + { + "epoch": 1.5972646394901076, + "grad_norm": 1.3338850940720004, + "learning_rate": 2.1402093428643942e-06, + "loss": 0.22043758630752563, + "step": 6015 + }, + { + "epoch": 1.5975302084716505, + "grad_norm": 1.1736165993383876, + "learning_rate": 2.137495496215878e-06, + "loss": 0.18621152639389038, + "step": 6016 + }, + { + "epoch": 1.5977957774531935, + "grad_norm": 1.332636421894691, + "learning_rate": 2.1347831653654995e-06, + "loss": 0.2422473132610321, + "step": 6017 + }, + { + "epoch": 1.5980613464347364, + "grad_norm": 1.5933227500597664, + "learning_rate": 2.132072350836164e-06, + "loss": 0.2147202491760254, + "step": 6018 + }, + { + "epoch": 1.5983269154162794, + "grad_norm": 1.5455916288717333, + "learning_rate": 2.1293630531504873e-06, + "loss": 0.23091933131217957, + "step": 6019 + }, + { + "epoch": 1.5985924843978223, + "grad_norm": 1.290869089573798, + "learning_rate": 2.1266552728307876e-06, + "loss": 0.220037579536438, + "step": 6020 + }, + { + "epoch": 1.5988580533793653, + "grad_norm": 1.3343924424387823, + "learning_rate": 2.1239490103990946e-06, + "loss": 0.25520551204681396, + "step": 6021 + }, + { + "epoch": 1.5991236223609082, + "grad_norm": 1.412222062207012, + "learning_rate": 2.1212442663771427e-06, + "loss": 0.23216915130615234, + "step": 6022 + }, + { + "epoch": 1.5993891913424512, + "grad_norm": 1.381515312381825, + "learning_rate": 2.118541041286374e-06, + "loss": 0.22098806500434875, + "step": 6023 + }, + { + "epoch": 1.5996547603239941, + "grad_norm": 1.4609594644715316, + "learning_rate": 2.11583933564794e-06, + "loss": 0.261300265789032, + "step": 6024 + }, + { + "epoch": 1.599920329305537, + "grad_norm": 1.2095539498781858, + "learning_rate": 2.113139149982698e-06, + "loss": 0.20427154004573822, + "step": 6025 + }, + { + "epoch": 1.60018589828708, + "grad_norm": 1.2158101663646808, + "learning_rate": 2.110440484811209e-06, + "loss": 0.20700547099113464, + "step": 6026 + }, + { + "epoch": 1.600451467268623, + "grad_norm": 1.4331467444820847, + "learning_rate": 2.1077433406537475e-06, + "loss": 0.2789752185344696, + "step": 6027 + }, + { + "epoch": 1.600717036250166, + "grad_norm": 1.2991321976135584, + "learning_rate": 2.1050477180302885e-06, + "loss": 0.2205841988325119, + "step": 6028 + }, + { + "epoch": 1.6009826052317089, + "grad_norm": 1.3197920849647402, + "learning_rate": 2.1023536174605184e-06, + "loss": 0.24921822547912598, + "step": 6029 + }, + { + "epoch": 1.6012481742132518, + "grad_norm": 2.014197229906981, + "learning_rate": 2.0996610394638228e-06, + "loss": 0.2516329288482666, + "step": 6030 + }, + { + "epoch": 1.6015137431947948, + "grad_norm": 1.2656936665142342, + "learning_rate": 2.096969984559306e-06, + "loss": 0.21832503378391266, + "step": 6031 + }, + { + "epoch": 1.6017793121763377, + "grad_norm": 1.530808592055088, + "learning_rate": 2.094280453265769e-06, + "loss": 0.2499273419380188, + "step": 6032 + }, + { + "epoch": 1.6020448811578807, + "grad_norm": 1.167125195859278, + "learning_rate": 2.09159244610172e-06, + "loss": 0.21701282262802124, + "step": 6033 + }, + { + "epoch": 1.6023104501394236, + "grad_norm": 1.2536801575307182, + "learning_rate": 2.0889059635853783e-06, + "loss": 0.24446213245391846, + "step": 6034 + }, + { + "epoch": 1.6025760191209666, + "grad_norm": 1.412317581200794, + "learning_rate": 2.0862210062346622e-06, + "loss": 0.27299973368644714, + "step": 6035 + }, + { + "epoch": 1.6028415881025095, + "grad_norm": 1.320945278338079, + "learning_rate": 2.0835375745672027e-06, + "loss": 0.2384832501411438, + "step": 6036 + }, + { + "epoch": 1.6031071570840525, + "grad_norm": 1.340788170535406, + "learning_rate": 2.0808556691003335e-06, + "loss": 0.2563338875770569, + "step": 6037 + }, + { + "epoch": 1.6033727260655954, + "grad_norm": 1.5240284764155023, + "learning_rate": 2.0781752903510954e-06, + "loss": 0.29148975014686584, + "step": 6038 + }, + { + "epoch": 1.6036382950471384, + "grad_norm": 1.1673304070468655, + "learning_rate": 2.0754964388362264e-06, + "loss": 0.24276503920555115, + "step": 6039 + }, + { + "epoch": 1.6039038640286813, + "grad_norm": 1.2629655044665746, + "learning_rate": 2.0728191150721866e-06, + "loss": 0.1863931119441986, + "step": 6040 + }, + { + "epoch": 1.6041694330102243, + "grad_norm": 1.1731073698012655, + "learning_rate": 2.0701433195751286e-06, + "loss": 0.21270868182182312, + "step": 6041 + }, + { + "epoch": 1.6044350019917673, + "grad_norm": 1.2780583308550695, + "learning_rate": 2.0674690528609155e-06, + "loss": 0.21542516350746155, + "step": 6042 + }, + { + "epoch": 1.6047005709733102, + "grad_norm": 1.256432235067539, + "learning_rate": 2.0647963154451124e-06, + "loss": 0.23099860548973083, + "step": 6043 + }, + { + "epoch": 1.6049661399548532, + "grad_norm": 1.1769565332020941, + "learning_rate": 2.062125107842993e-06, + "loss": 0.22757291793823242, + "step": 6044 + }, + { + "epoch": 1.605231708936396, + "grad_norm": 1.317404807729369, + "learning_rate": 2.0594554305695346e-06, + "loss": 0.2370409518480301, + "step": 6045 + }, + { + "epoch": 1.605497277917939, + "grad_norm": 1.1803781252235817, + "learning_rate": 2.0567872841394186e-06, + "loss": 0.21620309352874756, + "step": 6046 + }, + { + "epoch": 1.605762846899482, + "grad_norm": 1.2191738819977833, + "learning_rate": 2.0541206690670324e-06, + "loss": 0.22821158170700073, + "step": 6047 + }, + { + "epoch": 1.606028415881025, + "grad_norm": 1.385940331470305, + "learning_rate": 2.0514555858664663e-06, + "loss": 0.24930253624916077, + "step": 6048 + }, + { + "epoch": 1.606293984862568, + "grad_norm": 1.3966922562239508, + "learning_rate": 2.048792035051521e-06, + "loss": 0.2491561770439148, + "step": 6049 + }, + { + "epoch": 1.6065595538441109, + "grad_norm": 1.3037697337655914, + "learning_rate": 2.046130017135697e-06, + "loss": 0.20652002096176147, + "step": 6050 + }, + { + "epoch": 1.6068251228256538, + "grad_norm": 1.1970911046995705, + "learning_rate": 2.0434695326321975e-06, + "loss": 0.25670793652534485, + "step": 6051 + }, + { + "epoch": 1.6070906918071968, + "grad_norm": 1.2469219040368793, + "learning_rate": 2.0408105820539328e-06, + "loss": 0.2328418493270874, + "step": 6052 + }, + { + "epoch": 1.6073562607887397, + "grad_norm": 1.2657559287734064, + "learning_rate": 2.0381531659135213e-06, + "loss": 0.20811162889003754, + "step": 6053 + }, + { + "epoch": 1.6076218297702827, + "grad_norm": 1.2637409014709644, + "learning_rate": 2.0354972847232756e-06, + "loss": 0.24068522453308105, + "step": 6054 + }, + { + "epoch": 1.6078873987518256, + "grad_norm": 1.3537388998191249, + "learning_rate": 2.032842938995221e-06, + "loss": 0.2519197463989258, + "step": 6055 + }, + { + "epoch": 1.6081529677333686, + "grad_norm": 1.349413355425799, + "learning_rate": 2.030190129241083e-06, + "loss": 0.2293267697095871, + "step": 6056 + }, + { + "epoch": 1.6084185367149118, + "grad_norm": 1.8474927483406436, + "learning_rate": 2.027538855972291e-06, + "loss": 0.22398510575294495, + "step": 6057 + }, + { + "epoch": 1.6086841056964547, + "grad_norm": 1.4186878733418118, + "learning_rate": 2.0248891196999833e-06, + "loss": 0.23074102401733398, + "step": 6058 + }, + { + "epoch": 1.6089496746779977, + "grad_norm": 1.352152679115686, + "learning_rate": 2.0222409209349957e-06, + "loss": 0.2618173658847809, + "step": 6059 + }, + { + "epoch": 1.6092152436595406, + "grad_norm": 1.2898742263880296, + "learning_rate": 2.0195942601878703e-06, + "loss": 0.25361114740371704, + "step": 6060 + }, + { + "epoch": 1.6094808126410836, + "grad_norm": 1.2270527625039152, + "learning_rate": 2.016949137968851e-06, + "loss": 0.2276519238948822, + "step": 6061 + }, + { + "epoch": 1.6097463816226265, + "grad_norm": 1.3155356069823825, + "learning_rate": 2.0143055547878863e-06, + "loss": 0.20834363996982574, + "step": 6062 + }, + { + "epoch": 1.6100119506041695, + "grad_norm": 1.348708703656222, + "learning_rate": 2.011663511154628e-06, + "loss": 0.2579394578933716, + "step": 6063 + }, + { + "epoch": 1.6102775195857124, + "grad_norm": 1.2574503425710122, + "learning_rate": 2.009023007578431e-06, + "loss": 0.22118912637233734, + "step": 6064 + }, + { + "epoch": 1.6105430885672554, + "grad_norm": 1.1631210187007555, + "learning_rate": 2.0063840445683537e-06, + "loss": 0.1881515383720398, + "step": 6065 + }, + { + "epoch": 1.6108086575487983, + "grad_norm": 1.2884662240297928, + "learning_rate": 2.003746622633155e-06, + "loss": 0.2270805984735489, + "step": 6066 + }, + { + "epoch": 1.6110742265303413, + "grad_norm": 1.4261065534360056, + "learning_rate": 2.0011107422813013e-06, + "loss": 0.26356351375579834, + "step": 6067 + }, + { + "epoch": 1.6113397955118842, + "grad_norm": 1.2506363457624738, + "learning_rate": 1.9984764040209615e-06, + "loss": 0.22937676310539246, + "step": 6068 + }, + { + "epoch": 1.6116053644934272, + "grad_norm": 1.329188800311282, + "learning_rate": 1.99584360836e-06, + "loss": 0.25062739849090576, + "step": 6069 + }, + { + "epoch": 1.6118709334749701, + "grad_norm": 1.1593663351806502, + "learning_rate": 1.993212355805989e-06, + "loss": 0.2031324952840805, + "step": 6070 + }, + { + "epoch": 1.612136502456513, + "grad_norm": 1.3722085699931008, + "learning_rate": 1.990582646866206e-06, + "loss": 0.25769656896591187, + "step": 6071 + }, + { + "epoch": 1.612402071438056, + "grad_norm": 1.3184109520906713, + "learning_rate": 1.987954482047626e-06, + "loss": 0.23856252431869507, + "step": 6072 + }, + { + "epoch": 1.612667640419599, + "grad_norm": 1.3452730145342116, + "learning_rate": 1.9853278618569284e-06, + "loss": 0.2336723804473877, + "step": 6073 + }, + { + "epoch": 1.612933209401142, + "grad_norm": 1.3427497614935235, + "learning_rate": 1.9827027868004942e-06, + "loss": 0.22327622771263123, + "step": 6074 + }, + { + "epoch": 1.6131987783826849, + "grad_norm": 1.302817235652594, + "learning_rate": 1.980079257384405e-06, + "loss": 0.26695019006729126, + "step": 6075 + }, + { + "epoch": 1.6134643473642278, + "grad_norm": 1.174792834468628, + "learning_rate": 1.9774572741144514e-06, + "loss": 0.2467387616634369, + "step": 6076 + }, + { + "epoch": 1.6137299163457708, + "grad_norm": 1.3974546997540778, + "learning_rate": 1.9748368374961193e-06, + "loss": 0.25473737716674805, + "step": 6077 + }, + { + "epoch": 1.6139954853273137, + "grad_norm": 1.295354894556923, + "learning_rate": 1.972217948034596e-06, + "loss": 0.25508594512939453, + "step": 6078 + }, + { + "epoch": 1.6142610543088567, + "grad_norm": 1.2627621502033493, + "learning_rate": 1.969600606234774e-06, + "loss": 0.23020131886005402, + "step": 6079 + }, + { + "epoch": 1.6145266232903996, + "grad_norm": 1.2036992831321345, + "learning_rate": 1.9669848126012447e-06, + "loss": 0.249805748462677, + "step": 6080 + }, + { + "epoch": 1.6147921922719426, + "grad_norm": 1.2304217597704168, + "learning_rate": 1.964370567638303e-06, + "loss": 0.2377707064151764, + "step": 6081 + }, + { + "epoch": 1.6150577612534855, + "grad_norm": 1.3812388616949685, + "learning_rate": 1.9617578718499452e-06, + "loss": 0.28656789660453796, + "step": 6082 + }, + { + "epoch": 1.6153233302350285, + "grad_norm": 1.3083477730508752, + "learning_rate": 1.9591467257398668e-06, + "loss": 0.22079989314079285, + "step": 6083 + }, + { + "epoch": 1.6155888992165715, + "grad_norm": 1.048982897357468, + "learning_rate": 1.9565371298114666e-06, + "loss": 0.1993042230606079, + "step": 6084 + }, + { + "epoch": 1.6158544681981146, + "grad_norm": 1.1837758778278344, + "learning_rate": 1.9539290845678438e-06, + "loss": 0.20818357169628143, + "step": 6085 + }, + { + "epoch": 1.6161200371796576, + "grad_norm": 1.2192677831294998, + "learning_rate": 1.9513225905117996e-06, + "loss": 0.20531761646270752, + "step": 6086 + }, + { + "epoch": 1.6163856061612005, + "grad_norm": 1.2499003349392819, + "learning_rate": 1.948717648145834e-06, + "loss": 0.23414376378059387, + "step": 6087 + }, + { + "epoch": 1.6166511751427435, + "grad_norm": 1.2073482694002922, + "learning_rate": 1.9461142579721493e-06, + "loss": 0.2025471031665802, + "step": 6088 + }, + { + "epoch": 1.6169167441242864, + "grad_norm": 1.4729414889087271, + "learning_rate": 1.943512420492649e-06, + "loss": 0.19130446016788483, + "step": 6089 + }, + { + "epoch": 1.6171823131058294, + "grad_norm": 1.1947055473554775, + "learning_rate": 1.940912136208938e-06, + "loss": 0.21637848019599915, + "step": 6090 + }, + { + "epoch": 1.6174478820873723, + "grad_norm": 1.301401884532825, + "learning_rate": 1.9383134056223176e-06, + "loss": 0.26844075322151184, + "step": 6091 + }, + { + "epoch": 1.6177134510689153, + "grad_norm": 1.1755891449306313, + "learning_rate": 1.935716229233794e-06, + "loss": 0.19573305547237396, + "step": 6092 + }, + { + "epoch": 1.6179790200504582, + "grad_norm": 1.2705214543802177, + "learning_rate": 1.93312060754407e-06, + "loss": 0.22705954313278198, + "step": 6093 + }, + { + "epoch": 1.6182445890320012, + "grad_norm": 1.279170245457384, + "learning_rate": 1.9305265410535545e-06, + "loss": 0.2505400478839874, + "step": 6094 + }, + { + "epoch": 1.6185101580135441, + "grad_norm": 1.2108711177458409, + "learning_rate": 1.927934030262353e-06, + "loss": 0.2328193187713623, + "step": 6095 + }, + { + "epoch": 1.618775726995087, + "grad_norm": 1.2588974628750198, + "learning_rate": 1.9253430756702674e-06, + "loss": 0.23876577615737915, + "step": 6096 + }, + { + "epoch": 1.61904129597663, + "grad_norm": 1.3685755624123837, + "learning_rate": 1.9227536777768063e-06, + "loss": 0.2390732318162918, + "step": 6097 + }, + { + "epoch": 1.619306864958173, + "grad_norm": 1.3858306009370809, + "learning_rate": 1.9201658370811736e-06, + "loss": 0.25231993198394775, + "step": 6098 + }, + { + "epoch": 1.619572433939716, + "grad_norm": 1.2520374949609627, + "learning_rate": 1.917579554082274e-06, + "loss": 0.21527352929115295, + "step": 6099 + }, + { + "epoch": 1.619838002921259, + "grad_norm": 1.2236250632687489, + "learning_rate": 1.9149948292787133e-06, + "loss": 0.21394580602645874, + "step": 6100 + }, + { + "epoch": 1.6201035719028019, + "grad_norm": 1.3465338603905943, + "learning_rate": 1.912411663168796e-06, + "loss": 0.26093196868896484, + "step": 6101 + }, + { + "epoch": 1.6203691408843448, + "grad_norm": 1.3518497357465815, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2631412744522095, + "step": 6102 + }, + { + "epoch": 1.6206347098658878, + "grad_norm": 1.3007944720423297, + "learning_rate": 1.9072500090216073e-06, + "loss": 0.270250141620636, + "step": 6103 + }, + { + "epoch": 1.6209002788474307, + "grad_norm": 1.3385737712068424, + "learning_rate": 1.9046715219794397e-06, + "loss": 0.22944031655788422, + "step": 6104 + }, + { + "epoch": 1.6211658478289737, + "grad_norm": 1.2125488505372424, + "learning_rate": 1.902094595621129e-06, + "loss": 0.24429070949554443, + "step": 6105 + }, + { + "epoch": 1.6214314168105166, + "grad_norm": 1.2581532570405378, + "learning_rate": 1.8995192304434729e-06, + "loss": 0.25656238198280334, + "step": 6106 + }, + { + "epoch": 1.6216969857920596, + "grad_norm": 1.3466122688772229, + "learning_rate": 1.8969454269429743e-06, + "loss": 0.2575233280658722, + "step": 6107 + }, + { + "epoch": 1.6219625547736025, + "grad_norm": 1.245984919504028, + "learning_rate": 1.8943731856158299e-06, + "loss": 0.24881063401699066, + "step": 6108 + }, + { + "epoch": 1.6222281237551455, + "grad_norm": 1.2845731125917577, + "learning_rate": 1.8918025069579382e-06, + "loss": 0.23353847861289978, + "step": 6109 + }, + { + "epoch": 1.6224936927366884, + "grad_norm": 1.2505489106727152, + "learning_rate": 1.8892333914648953e-06, + "loss": 0.21085457503795624, + "step": 6110 + }, + { + "epoch": 1.6227592617182314, + "grad_norm": 1.4134001131082032, + "learning_rate": 1.8866658396319947e-06, + "loss": 0.28600943088531494, + "step": 6111 + }, + { + "epoch": 1.6230248306997743, + "grad_norm": 1.1689838110439057, + "learning_rate": 1.8840998519542352e-06, + "loss": 0.22580507397651672, + "step": 6112 + }, + { + "epoch": 1.6232903996813173, + "grad_norm": 1.212526750953587, + "learning_rate": 1.8815354289263066e-06, + "loss": 0.19310800731182098, + "step": 6113 + }, + { + "epoch": 1.6235559686628602, + "grad_norm": 1.3020905454433194, + "learning_rate": 1.8789725710425988e-06, + "loss": 0.21633204817771912, + "step": 6114 + }, + { + "epoch": 1.6238215376444032, + "grad_norm": 1.4315370828946672, + "learning_rate": 1.8764112787972e-06, + "loss": 0.21346023678779602, + "step": 6115 + }, + { + "epoch": 1.6240871066259461, + "grad_norm": 1.21392020481053, + "learning_rate": 1.8738515526838986e-06, + "loss": 0.21206694841384888, + "step": 6116 + }, + { + "epoch": 1.624352675607489, + "grad_norm": 1.3197096686410696, + "learning_rate": 1.8712933931961773e-06, + "loss": 0.2135339230298996, + "step": 6117 + }, + { + "epoch": 1.624618244589032, + "grad_norm": 1.2484635869956482, + "learning_rate": 1.8687368008272243e-06, + "loss": 0.2168758660554886, + "step": 6118 + }, + { + "epoch": 1.624883813570575, + "grad_norm": 1.1804251189525716, + "learning_rate": 1.866181776069914e-06, + "loss": 0.20825617015361786, + "step": 6119 + }, + { + "epoch": 1.625149382552118, + "grad_norm": 1.291082575518304, + "learning_rate": 1.863628319416826e-06, + "loss": 0.25367867946624756, + "step": 6120 + }, + { + "epoch": 1.625414951533661, + "grad_norm": 1.3053498393136334, + "learning_rate": 1.8610764313602404e-06, + "loss": 0.21604284644126892, + "step": 6121 + }, + { + "epoch": 1.6256805205152038, + "grad_norm": 1.2871138327885168, + "learning_rate": 1.8585261123921283e-06, + "loss": 0.2324865758419037, + "step": 6122 + }, + { + "epoch": 1.6259460894967468, + "grad_norm": 1.2467444217539543, + "learning_rate": 1.8559773630041632e-06, + "loss": 0.2077629417181015, + "step": 6123 + }, + { + "epoch": 1.6262116584782897, + "grad_norm": 1.1704936500874914, + "learning_rate": 1.8534301836877122e-06, + "loss": 0.19919469952583313, + "step": 6124 + }, + { + "epoch": 1.6264772274598327, + "grad_norm": 1.1998850682672693, + "learning_rate": 1.8508845749338412e-06, + "loss": 0.21069160103797913, + "step": 6125 + }, + { + "epoch": 1.6267427964413756, + "grad_norm": 1.218804714337499, + "learning_rate": 1.8483405372333152e-06, + "loss": 0.2286640703678131, + "step": 6126 + }, + { + "epoch": 1.6270083654229186, + "grad_norm": 1.33630910648056, + "learning_rate": 1.8457980710765932e-06, + "loss": 0.2430541068315506, + "step": 6127 + }, + { + "epoch": 1.6272739344044616, + "grad_norm": 1.3713498598627625, + "learning_rate": 1.8432571769538344e-06, + "loss": 0.21875709295272827, + "step": 6128 + }, + { + "epoch": 1.6275395033860045, + "grad_norm": 1.4416966555618131, + "learning_rate": 1.8407178553548876e-06, + "loss": 0.22591018676757812, + "step": 6129 + }, + { + "epoch": 1.6278050723675475, + "grad_norm": 1.362917465597037, + "learning_rate": 1.8381801067693129e-06, + "loss": 0.25429075956344604, + "step": 6130 + }, + { + "epoch": 1.6280706413490904, + "grad_norm": 1.31452454626215, + "learning_rate": 1.8356439316863528e-06, + "loss": 0.2437858283519745, + "step": 6131 + }, + { + "epoch": 1.6283362103306334, + "grad_norm": 1.2489983792436092, + "learning_rate": 1.8331093305949532e-06, + "loss": 0.24196262657642365, + "step": 6132 + }, + { + "epoch": 1.6286017793121763, + "grad_norm": 1.3756170241894088, + "learning_rate": 1.8305763039837576e-06, + "loss": 0.25779271125793457, + "step": 6133 + }, + { + "epoch": 1.6288673482937193, + "grad_norm": 1.223955710903011, + "learning_rate": 1.8280448523410987e-06, + "loss": 0.23418015241622925, + "step": 6134 + }, + { + "epoch": 1.6291329172752622, + "grad_norm": 1.3748973147827792, + "learning_rate": 1.8255149761550128e-06, + "loss": 0.2670775353908539, + "step": 6135 + }, + { + "epoch": 1.6293984862568052, + "grad_norm": 1.423176544673552, + "learning_rate": 1.822986675913231e-06, + "loss": 0.29342639446258545, + "step": 6136 + }, + { + "epoch": 1.6296640552383481, + "grad_norm": 1.244422511511833, + "learning_rate": 1.8204599521031785e-06, + "loss": 0.22768062353134155, + "step": 6137 + }, + { + "epoch": 1.629929624219891, + "grad_norm": 1.6355607569945512, + "learning_rate": 1.817934805211976e-06, + "loss": 0.23938167095184326, + "step": 6138 + }, + { + "epoch": 1.630195193201434, + "grad_norm": 1.311916117620117, + "learning_rate": 1.8154112357264474e-06, + "loss": 0.1982264518737793, + "step": 6139 + }, + { + "epoch": 1.630460762182977, + "grad_norm": 1.3026965235969699, + "learning_rate": 1.8128892441331047e-06, + "loss": 0.23591312766075134, + "step": 6140 + }, + { + "epoch": 1.63072633116452, + "grad_norm": 1.259123916156089, + "learning_rate": 1.8103688309181567e-06, + "loss": 0.20317673683166504, + "step": 6141 + }, + { + "epoch": 1.6309919001460629, + "grad_norm": 1.2846300858550195, + "learning_rate": 1.8078499965675112e-06, + "loss": 0.233676478266716, + "step": 6142 + }, + { + "epoch": 1.6312574691276058, + "grad_norm": 1.3296785293607047, + "learning_rate": 1.8053327415667688e-06, + "loss": 0.22850775718688965, + "step": 6143 + }, + { + "epoch": 1.6315230381091488, + "grad_norm": 1.2850656633806874, + "learning_rate": 1.8028170664012268e-06, + "loss": 0.2603572607040405, + "step": 6144 + }, + { + "epoch": 1.6317886070906917, + "grad_norm": 1.3208849168125785, + "learning_rate": 1.8003029715558773e-06, + "loss": 0.27881523966789246, + "step": 6145 + }, + { + "epoch": 1.6320541760722347, + "grad_norm": 1.225668329292659, + "learning_rate": 1.797790457515406e-06, + "loss": 0.21744176745414734, + "step": 6146 + }, + { + "epoch": 1.6323197450537776, + "grad_norm": 1.2220588910103882, + "learning_rate": 1.7952795247642008e-06, + "loss": 0.20449542999267578, + "step": 6147 + }, + { + "epoch": 1.6325853140353206, + "grad_norm": 1.3015735321136237, + "learning_rate": 1.7927701737863402e-06, + "loss": 0.25641053915023804, + "step": 6148 + }, + { + "epoch": 1.6328508830168635, + "grad_norm": 1.294201240106412, + "learning_rate": 1.7902624050655914e-06, + "loss": 0.23583751916885376, + "step": 6149 + }, + { + "epoch": 1.6331164519984065, + "grad_norm": 1.4310897316272893, + "learning_rate": 1.787756219085427e-06, + "loss": 0.2709866762161255, + "step": 6150 + }, + { + "epoch": 1.6333820209799494, + "grad_norm": 1.2536554341378991, + "learning_rate": 1.785251616329009e-06, + "loss": 0.233103945851326, + "step": 6151 + }, + { + "epoch": 1.6336475899614924, + "grad_norm": 1.2660813048243769, + "learning_rate": 1.7827485972791957e-06, + "loss": 0.2665184438228607, + "step": 6152 + }, + { + "epoch": 1.6339131589430353, + "grad_norm": 1.2551185732946457, + "learning_rate": 1.7802471624185392e-06, + "loss": 0.20934605598449707, + "step": 6153 + }, + { + "epoch": 1.6341787279245783, + "grad_norm": 1.2179362426676639, + "learning_rate": 1.7777473122292866e-06, + "loss": 0.2102464735507965, + "step": 6154 + }, + { + "epoch": 1.6344442969061213, + "grad_norm": 1.2289784110367914, + "learning_rate": 1.7752490471933769e-06, + "loss": 0.22889986634254456, + "step": 6155 + }, + { + "epoch": 1.6347098658876642, + "grad_norm": 1.3627659705359922, + "learning_rate": 1.772752367792452e-06, + "loss": 0.2261584997177124, + "step": 6156 + }, + { + "epoch": 1.6349754348692072, + "grad_norm": 1.2186249427048736, + "learning_rate": 1.7702572745078395e-06, + "loss": 0.21456710994243622, + "step": 6157 + }, + { + "epoch": 1.63524100385075, + "grad_norm": 1.1535452073956258, + "learning_rate": 1.7677637678205627e-06, + "loss": 0.22762097418308258, + "step": 6158 + }, + { + "epoch": 1.635506572832293, + "grad_norm": 1.306484526102534, + "learning_rate": 1.7652718482113417e-06, + "loss": 0.24772633612155914, + "step": 6159 + }, + { + "epoch": 1.635772141813836, + "grad_norm": 1.3290630048425123, + "learning_rate": 1.7627815161605887e-06, + "loss": 0.22980757057666779, + "step": 6160 + }, + { + "epoch": 1.636037710795379, + "grad_norm": 1.1593602123779645, + "learning_rate": 1.760292772148411e-06, + "loss": 0.19560125470161438, + "step": 6161 + }, + { + "epoch": 1.636303279776922, + "grad_norm": 1.388673809129743, + "learning_rate": 1.7578056166546086e-06, + "loss": 0.23733064532279968, + "step": 6162 + }, + { + "epoch": 1.6365688487584649, + "grad_norm": 1.2026681813349183, + "learning_rate": 1.7553200501586743e-06, + "loss": 0.21064560115337372, + "step": 6163 + }, + { + "epoch": 1.6368344177400078, + "grad_norm": 1.3444341606502546, + "learning_rate": 1.7528360731397986e-06, + "loss": 0.26709994673728943, + "step": 6164 + }, + { + "epoch": 1.6370999867215508, + "grad_norm": 1.2755110888757868, + "learning_rate": 1.750353686076861e-06, + "loss": 0.26555943489074707, + "step": 6165 + }, + { + "epoch": 1.6373655557030937, + "grad_norm": 1.3299250322981557, + "learning_rate": 1.7478728894484375e-06, + "loss": 0.24480760097503662, + "step": 6166 + }, + { + "epoch": 1.6376311246846367, + "grad_norm": 1.2560095314061934, + "learning_rate": 1.7453936837327967e-06, + "loss": 0.2170884907245636, + "step": 6167 + }, + { + "epoch": 1.6378966936661796, + "grad_norm": 1.340756013397369, + "learning_rate": 1.7429160694078983e-06, + "loss": 0.24728982150554657, + "step": 6168 + }, + { + "epoch": 1.6381622626477228, + "grad_norm": 1.1911402182063675, + "learning_rate": 1.7404400469513994e-06, + "loss": 0.20886945724487305, + "step": 6169 + }, + { + "epoch": 1.6384278316292658, + "grad_norm": 1.2150445755778985, + "learning_rate": 1.7379656168406467e-06, + "loss": 0.1892474740743637, + "step": 6170 + }, + { + "epoch": 1.6386934006108087, + "grad_norm": 1.3004801024505461, + "learning_rate": 1.7354927795526821e-06, + "loss": 0.24953782558441162, + "step": 6171 + }, + { + "epoch": 1.6389589695923517, + "grad_norm": 1.2292705802712374, + "learning_rate": 1.7330215355642377e-06, + "loss": 0.2311600148677826, + "step": 6172 + }, + { + "epoch": 1.6392245385738946, + "grad_norm": 1.2596864005467026, + "learning_rate": 1.73055188535174e-06, + "loss": 0.24018675088882446, + "step": 6173 + }, + { + "epoch": 1.6394901075554376, + "grad_norm": 1.3394449685829455, + "learning_rate": 1.7280838293913116e-06, + "loss": 0.22607022523880005, + "step": 6174 + }, + { + "epoch": 1.6397556765369805, + "grad_norm": 1.2860534255043978, + "learning_rate": 1.7256173681587619e-06, + "loss": 0.23725482821464539, + "step": 6175 + }, + { + "epoch": 1.6400212455185235, + "grad_norm": 1.2500709715234832, + "learning_rate": 1.723152502129597e-06, + "loss": 0.241235613822937, + "step": 6176 + }, + { + "epoch": 1.6402868145000664, + "grad_norm": 1.2070755501863832, + "learning_rate": 1.7206892317790136e-06, + "loss": 0.2150690108537674, + "step": 6177 + }, + { + "epoch": 1.6405523834816094, + "grad_norm": 1.2557873581014805, + "learning_rate": 1.7182275575819007e-06, + "loss": 0.22133421897888184, + "step": 6178 + }, + { + "epoch": 1.6408179524631523, + "grad_norm": 1.1297884729403, + "learning_rate": 1.7157674800128399e-06, + "loss": 0.1937463879585266, + "step": 6179 + }, + { + "epoch": 1.6410835214446953, + "grad_norm": 1.0851305240668396, + "learning_rate": 1.7133089995461062e-06, + "loss": 0.18938027322292328, + "step": 6180 + }, + { + "epoch": 1.6413490904262382, + "grad_norm": 1.2621430482402598, + "learning_rate": 1.7108521166556646e-06, + "loss": 0.23577997088432312, + "step": 6181 + }, + { + "epoch": 1.6416146594077812, + "grad_norm": 1.2915526813468403, + "learning_rate": 1.7083968318151734e-06, + "loss": 0.2712448537349701, + "step": 6182 + }, + { + "epoch": 1.6418802283893241, + "grad_norm": 1.276409938985324, + "learning_rate": 1.7059431454979825e-06, + "loss": 0.24242255091667175, + "step": 6183 + }, + { + "epoch": 1.642145797370867, + "grad_norm": 1.3152058895449834, + "learning_rate": 1.7034910581771347e-06, + "loss": 0.22521010041236877, + "step": 6184 + }, + { + "epoch": 1.64241136635241, + "grad_norm": 1.3840145244958133, + "learning_rate": 1.7010405703253618e-06, + "loss": 0.22026273608207703, + "step": 6185 + }, + { + "epoch": 1.642676935333953, + "grad_norm": 1.458737402535225, + "learning_rate": 1.6985916824150894e-06, + "loss": 0.22726528346538544, + "step": 6186 + }, + { + "epoch": 1.642942504315496, + "grad_norm": 1.3396783040947258, + "learning_rate": 1.6961443949184353e-06, + "loss": 0.25172409415245056, + "step": 6187 + }, + { + "epoch": 1.6432080732970389, + "grad_norm": 1.1393591185728944, + "learning_rate": 1.6936987083072065e-06, + "loss": 0.21173113584518433, + "step": 6188 + }, + { + "epoch": 1.6434736422785818, + "grad_norm": 1.3589729407555038, + "learning_rate": 1.6912546230529036e-06, + "loss": 0.22596749663352966, + "step": 6189 + }, + { + "epoch": 1.6437392112601248, + "grad_norm": 1.3604263454917045, + "learning_rate": 1.6888121396267166e-06, + "loss": 0.2749077081680298, + "step": 6190 + }, + { + "epoch": 1.6440047802416677, + "grad_norm": 2.5555069132462283, + "learning_rate": 1.6863712584995252e-06, + "loss": 0.22150780260562897, + "step": 6191 + }, + { + "epoch": 1.6442703492232107, + "grad_norm": 1.2838243253096144, + "learning_rate": 1.6839319801419073e-06, + "loss": 0.23437368869781494, + "step": 6192 + }, + { + "epoch": 1.6445359182047536, + "grad_norm": 1.3069256977628543, + "learning_rate": 1.681494305024125e-06, + "loss": 0.22949008643627167, + "step": 6193 + }, + { + "epoch": 1.6448014871862966, + "grad_norm": 1.2956112975441718, + "learning_rate": 1.6790582336161332e-06, + "loss": 0.24147525429725647, + "step": 6194 + }, + { + "epoch": 1.6450670561678395, + "grad_norm": 1.180082798545332, + "learning_rate": 1.6766237663875773e-06, + "loss": 0.2001456618309021, + "step": 6195 + }, + { + "epoch": 1.6453326251493825, + "grad_norm": 1.2710753216206616, + "learning_rate": 1.674190903807794e-06, + "loss": 0.17668186128139496, + "step": 6196 + }, + { + "epoch": 1.6455981941309257, + "grad_norm": 1.369840319031622, + "learning_rate": 1.6717596463458107e-06, + "loss": 0.24585255980491638, + "step": 6197 + }, + { + "epoch": 1.6458637631124686, + "grad_norm": 1.2328642285488454, + "learning_rate": 1.6693299944703479e-06, + "loss": 0.2234572172164917, + "step": 6198 + }, + { + "epoch": 1.6461293320940116, + "grad_norm": 1.2369910191993496, + "learning_rate": 1.6669019486498083e-06, + "loss": 0.2007240653038025, + "step": 6199 + }, + { + "epoch": 1.6463949010755545, + "grad_norm": 1.317383450933259, + "learning_rate": 1.6644755093522913e-06, + "loss": 0.21926215291023254, + "step": 6200 + }, + { + "epoch": 1.6466604700570975, + "grad_norm": 1.3404302006039666, + "learning_rate": 1.662050677045589e-06, + "loss": 0.24797898530960083, + "step": 6201 + }, + { + "epoch": 1.6469260390386404, + "grad_norm": 1.285343354391859, + "learning_rate": 1.65962745219718e-06, + "loss": 0.22087037563323975, + "step": 6202 + }, + { + "epoch": 1.6471916080201834, + "grad_norm": 1.2765781805195457, + "learning_rate": 1.6572058352742327e-06, + "loss": 0.23073960840702057, + "step": 6203 + }, + { + "epoch": 1.6474571770017263, + "grad_norm": 1.3644493807061109, + "learning_rate": 1.6547858267436056e-06, + "loss": 0.2430298924446106, + "step": 6204 + }, + { + "epoch": 1.6477227459832693, + "grad_norm": 1.286198443262182, + "learning_rate": 1.6523674270718493e-06, + "loss": 0.23337247967720032, + "step": 6205 + }, + { + "epoch": 1.6479883149648122, + "grad_norm": 1.2144238817830517, + "learning_rate": 1.6499506367252016e-06, + "loss": 0.22141093015670776, + "step": 6206 + }, + { + "epoch": 1.6482538839463552, + "grad_norm": 1.280282959866893, + "learning_rate": 1.647535456169591e-06, + "loss": 0.23247988522052765, + "step": 6207 + }, + { + "epoch": 1.6485194529278981, + "grad_norm": 1.3728921390628253, + "learning_rate": 1.6451218858706374e-06, + "loss": 0.2659391760826111, + "step": 6208 + }, + { + "epoch": 1.648785021909441, + "grad_norm": 1.2534645715863684, + "learning_rate": 1.642709926293644e-06, + "loss": 0.2154998630285263, + "step": 6209 + }, + { + "epoch": 1.649050590890984, + "grad_norm": 1.322825591754104, + "learning_rate": 1.6402995779036146e-06, + "loss": 0.20363599061965942, + "step": 6210 + }, + { + "epoch": 1.649316159872527, + "grad_norm": 1.3775669953664806, + "learning_rate": 1.6378908411652328e-06, + "loss": 0.23388779163360596, + "step": 6211 + }, + { + "epoch": 1.64958172885407, + "grad_norm": 1.205059730534318, + "learning_rate": 1.6354837165428772e-06, + "loss": 0.20465341210365295, + "step": 6212 + }, + { + "epoch": 1.649847297835613, + "grad_norm": 1.2409004364034002, + "learning_rate": 1.6330782045006088e-06, + "loss": 0.2233584225177765, + "step": 6213 + }, + { + "epoch": 1.6501128668171559, + "grad_norm": 1.313264623251788, + "learning_rate": 1.6306743055021834e-06, + "loss": 0.2880077064037323, + "step": 6214 + }, + { + "epoch": 1.6503784357986988, + "grad_norm": 1.2769524753658168, + "learning_rate": 1.6282720200110458e-06, + "loss": 0.23332230746746063, + "step": 6215 + }, + { + "epoch": 1.6506440047802418, + "grad_norm": 1.2682336609825682, + "learning_rate": 1.6258713484903266e-06, + "loss": 0.22191204130649567, + "step": 6216 + }, + { + "epoch": 1.6509095737617847, + "grad_norm": 1.2899982671052521, + "learning_rate": 1.6234722914028478e-06, + "loss": 0.2403659224510193, + "step": 6217 + }, + { + "epoch": 1.6511751427433277, + "grad_norm": 1.2823746538865957, + "learning_rate": 1.6210748492111161e-06, + "loss": 0.2230256348848343, + "step": 6218 + }, + { + "epoch": 1.6514407117248706, + "grad_norm": 1.233703409456991, + "learning_rate": 1.6186790223773375e-06, + "loss": 0.2086302787065506, + "step": 6219 + }, + { + "epoch": 1.6517062807064136, + "grad_norm": 1.2696219439991872, + "learning_rate": 1.6162848113633934e-06, + "loss": 0.22336703538894653, + "step": 6220 + }, + { + "epoch": 1.6519718496879565, + "grad_norm": 1.2026474951561137, + "learning_rate": 1.6138922166308613e-06, + "loss": 0.2354746013879776, + "step": 6221 + }, + { + "epoch": 1.6522374186694995, + "grad_norm": 1.212799588563382, + "learning_rate": 1.6115012386410045e-06, + "loss": 0.23983564972877502, + "step": 6222 + }, + { + "epoch": 1.6525029876510424, + "grad_norm": 1.3394195242071623, + "learning_rate": 1.6091118778547765e-06, + "loss": 0.25468897819519043, + "step": 6223 + }, + { + "epoch": 1.6527685566325854, + "grad_norm": 1.2085737685975797, + "learning_rate": 1.6067241347328166e-06, + "loss": 0.2225346863269806, + "step": 6224 + }, + { + "epoch": 1.6530341256141283, + "grad_norm": 1.4474708027397767, + "learning_rate": 1.6043380097354543e-06, + "loss": 0.28801992535591125, + "step": 6225 + }, + { + "epoch": 1.6532996945956713, + "grad_norm": 1.1308003259460488, + "learning_rate": 1.6019535033227063e-06, + "loss": 0.1869816929101944, + "step": 6226 + }, + { + "epoch": 1.6535652635772142, + "grad_norm": 1.3022141110443597, + "learning_rate": 1.5995706159542768e-06, + "loss": 0.2569049894809723, + "step": 6227 + }, + { + "epoch": 1.6538308325587572, + "grad_norm": 1.2689496619282572, + "learning_rate": 1.5971893480895583e-06, + "loss": 0.19138488173484802, + "step": 6228 + }, + { + "epoch": 1.6540964015403001, + "grad_norm": 1.2583553251304942, + "learning_rate": 1.5948097001876318e-06, + "loss": 0.23107777535915375, + "step": 6229 + }, + { + "epoch": 1.654361970521843, + "grad_norm": 1.4140324563807463, + "learning_rate": 1.5924316727072652e-06, + "loss": 0.21682313084602356, + "step": 6230 + }, + { + "epoch": 1.654627539503386, + "grad_norm": 1.6445896965406597, + "learning_rate": 1.5900552661069135e-06, + "loss": 0.27629974484443665, + "step": 6231 + }, + { + "epoch": 1.654893108484929, + "grad_norm": 1.2060133562172235, + "learning_rate": 1.587680480844721e-06, + "loss": 0.21919876337051392, + "step": 6232 + }, + { + "epoch": 1.655158677466472, + "grad_norm": 1.4827934801999716, + "learning_rate": 1.5853073173785183e-06, + "loss": 0.2556184232234955, + "step": 6233 + }, + { + "epoch": 1.655424246448015, + "grad_norm": 1.1362954303327644, + "learning_rate": 1.5829357761658214e-06, + "loss": 0.1904449462890625, + "step": 6234 + }, + { + "epoch": 1.6556898154295578, + "grad_norm": 1.2410374365127181, + "learning_rate": 1.5805658576638372e-06, + "loss": 0.1991434246301651, + "step": 6235 + }, + { + "epoch": 1.6559553844111008, + "grad_norm": 1.4428347821081515, + "learning_rate": 1.5781975623294554e-06, + "loss": 0.2609177231788635, + "step": 6236 + }, + { + "epoch": 1.6562209533926437, + "grad_norm": 1.276051044481299, + "learning_rate": 1.575830890619261e-06, + "loss": 0.2481592893600464, + "step": 6237 + }, + { + "epoch": 1.6564865223741867, + "grad_norm": 1.2930470444266673, + "learning_rate": 1.5734658429895156e-06, + "loss": 0.23855090141296387, + "step": 6238 + }, + { + "epoch": 1.6567520913557297, + "grad_norm": 1.326739898505445, + "learning_rate": 1.5711024198961745e-06, + "loss": 0.2480623573064804, + "step": 6239 + }, + { + "epoch": 1.6570176603372726, + "grad_norm": 1.4145385747738486, + "learning_rate": 1.5687406217948775e-06, + "loss": 0.2504739463329315, + "step": 6240 + }, + { + "epoch": 1.6572832293188156, + "grad_norm": 1.1843269954841462, + "learning_rate": 1.5663804491409506e-06, + "loss": 0.2068580538034439, + "step": 6241 + }, + { + "epoch": 1.6575487983003585, + "grad_norm": 1.45151426190796, + "learning_rate": 1.5640219023894077e-06, + "loss": 0.2448163628578186, + "step": 6242 + }, + { + "epoch": 1.6578143672819015, + "grad_norm": 1.3391765527579818, + "learning_rate": 1.5616649819949492e-06, + "loss": 0.2514716386795044, + "step": 6243 + }, + { + "epoch": 1.6580799362634444, + "grad_norm": 1.1884099966156902, + "learning_rate": 1.559309688411962e-06, + "loss": 0.2067629098892212, + "step": 6244 + }, + { + "epoch": 1.6583455052449874, + "grad_norm": 1.2042735442206352, + "learning_rate": 1.5569560220945168e-06, + "loss": 0.22909750044345856, + "step": 6245 + }, + { + "epoch": 1.6586110742265303, + "grad_norm": 1.4646403481954997, + "learning_rate": 1.5546039834963745e-06, + "loss": 0.203629732131958, + "step": 6246 + }, + { + "epoch": 1.6588766432080733, + "grad_norm": 1.2050936311763847, + "learning_rate": 1.552253573070981e-06, + "loss": 0.21919086575508118, + "step": 6247 + }, + { + "epoch": 1.6591422121896162, + "grad_norm": 1.4379501702554756, + "learning_rate": 1.549904791271466e-06, + "loss": 0.2535661458969116, + "step": 6248 + }, + { + "epoch": 1.6594077811711592, + "grad_norm": 1.2609582047884877, + "learning_rate": 1.5475576385506475e-06, + "loss": 0.224460631608963, + "step": 6249 + }, + { + "epoch": 1.6596733501527021, + "grad_norm": 1.2625738742925756, + "learning_rate": 1.5452121153610288e-06, + "loss": 0.21925818920135498, + "step": 6250 + }, + { + "epoch": 1.659938919134245, + "grad_norm": 1.2787763694898493, + "learning_rate": 1.5428682221547997e-06, + "loss": 0.2100696563720703, + "step": 6251 + }, + { + "epoch": 1.660204488115788, + "grad_norm": 1.3484219674096825, + "learning_rate": 1.540525959383834e-06, + "loss": 0.25982293486595154, + "step": 6252 + }, + { + "epoch": 1.660470057097331, + "grad_norm": 1.2527966644905648, + "learning_rate": 1.538185327499694e-06, + "loss": 0.23615162074565887, + "step": 6253 + }, + { + "epoch": 1.660735626078874, + "grad_norm": 1.2738910414784854, + "learning_rate": 1.5358463269536218e-06, + "loss": 0.2454022467136383, + "step": 6254 + }, + { + "epoch": 1.6610011950604169, + "grad_norm": 1.3825181535789863, + "learning_rate": 1.5335089581965556e-06, + "loss": 0.2330605536699295, + "step": 6255 + }, + { + "epoch": 1.6612667640419598, + "grad_norm": 1.2169082012465264, + "learning_rate": 1.5311732216791087e-06, + "loss": 0.23193006217479706, + "step": 6256 + }, + { + "epoch": 1.6615323330235028, + "grad_norm": 1.2690481284418431, + "learning_rate": 1.5288391178515838e-06, + "loss": 0.23254770040512085, + "step": 6257 + }, + { + "epoch": 1.6617979020050457, + "grad_norm": 1.2246821396199268, + "learning_rate": 1.5265066471639701e-06, + "loss": 0.23240572214126587, + "step": 6258 + }, + { + "epoch": 1.6620634709865887, + "grad_norm": 1.3414134094293932, + "learning_rate": 1.5241758100659386e-06, + "loss": 0.2765730619430542, + "step": 6259 + }, + { + "epoch": 1.6623290399681316, + "grad_norm": 1.2956291225041994, + "learning_rate": 1.5218466070068472e-06, + "loss": 0.26366496086120605, + "step": 6260 + }, + { + "epoch": 1.6625946089496746, + "grad_norm": 1.240730160583952, + "learning_rate": 1.5195190384357405e-06, + "loss": 0.22322653234004974, + "step": 6261 + }, + { + "epoch": 1.6628601779312175, + "grad_norm": 1.2433877123660553, + "learning_rate": 1.5171931048013466e-06, + "loss": 0.24144116044044495, + "step": 6262 + }, + { + "epoch": 1.6631257469127605, + "grad_norm": 1.3783130308299147, + "learning_rate": 1.5148688065520734e-06, + "loss": 0.24559618532657623, + "step": 6263 + }, + { + "epoch": 1.6633913158943034, + "grad_norm": 1.3258590224160887, + "learning_rate": 1.5125461441360223e-06, + "loss": 0.24337056279182434, + "step": 6264 + }, + { + "epoch": 1.6636568848758464, + "grad_norm": 1.3292875380649603, + "learning_rate": 1.5102251180009752e-06, + "loss": 0.2733612358570099, + "step": 6265 + }, + { + "epoch": 1.6639224538573893, + "grad_norm": 1.2329811544038785, + "learning_rate": 1.5079057285943976e-06, + "loss": 0.2116459757089615, + "step": 6266 + }, + { + "epoch": 1.6641880228389323, + "grad_norm": 1.2335642813115397, + "learning_rate": 1.5055879763634407e-06, + "loss": 0.21221664547920227, + "step": 6267 + }, + { + "epoch": 1.6644535918204753, + "grad_norm": 1.2500150658336624, + "learning_rate": 1.503271861754939e-06, + "loss": 0.21166589856147766, + "step": 6268 + }, + { + "epoch": 1.6647191608020182, + "grad_norm": 1.5113123418333367, + "learning_rate": 1.5009573852154136e-06, + "loss": 0.2652161121368408, + "step": 6269 + }, + { + "epoch": 1.6649847297835612, + "grad_norm": 1.262834880378694, + "learning_rate": 1.4986445471910672e-06, + "loss": 0.22142267227172852, + "step": 6270 + }, + { + "epoch": 1.665250298765104, + "grad_norm": 1.4442965183949772, + "learning_rate": 1.4963333481277874e-06, + "loss": 0.2307332456111908, + "step": 6271 + }, + { + "epoch": 1.665515867746647, + "grad_norm": 1.411326986781179, + "learning_rate": 1.494023788471144e-06, + "loss": 0.2669411897659302, + "step": 6272 + }, + { + "epoch": 1.66578143672819, + "grad_norm": 1.2823998109594834, + "learning_rate": 1.4917158686663992e-06, + "loss": 0.2468804121017456, + "step": 6273 + }, + { + "epoch": 1.666047005709733, + "grad_norm": 1.2639666166307362, + "learning_rate": 1.4894095891584882e-06, + "loss": 0.24152463674545288, + "step": 6274 + }, + { + "epoch": 1.666312574691276, + "grad_norm": 1.098201760932299, + "learning_rate": 1.4871049503920353e-06, + "loss": 0.1966545283794403, + "step": 6275 + }, + { + "epoch": 1.6665781436728189, + "grad_norm": 1.2773845282560163, + "learning_rate": 1.4848019528113477e-06, + "loss": 0.24772626161575317, + "step": 6276 + }, + { + "epoch": 1.6668437126543618, + "grad_norm": 1.3731672204722256, + "learning_rate": 1.4825005968604189e-06, + "loss": 0.22138851881027222, + "step": 6277 + }, + { + "epoch": 1.6671092816359048, + "grad_norm": 1.2245583238686863, + "learning_rate": 1.4802008829829172e-06, + "loss": 0.24345465004444122, + "step": 6278 + }, + { + "epoch": 1.6673748506174477, + "grad_norm": 1.3209828849983516, + "learning_rate": 1.477902811622205e-06, + "loss": 0.22862716019153595, + "step": 6279 + }, + { + "epoch": 1.6676404195989907, + "grad_norm": 1.2914770883474422, + "learning_rate": 1.4756063832213207e-06, + "loss": 0.2763083577156067, + "step": 6280 + }, + { + "epoch": 1.6679059885805336, + "grad_norm": 1.3142139937070516, + "learning_rate": 1.4733115982229885e-06, + "loss": 0.24631357192993164, + "step": 6281 + }, + { + "epoch": 1.6681715575620768, + "grad_norm": 1.322429969576976, + "learning_rate": 1.4710184570696184e-06, + "loss": 0.22650030255317688, + "step": 6282 + }, + { + "epoch": 1.6684371265436198, + "grad_norm": 1.3243342318873437, + "learning_rate": 1.4687269602033006e-06, + "loss": 0.2455909103155136, + "step": 6283 + }, + { + "epoch": 1.6687026955251627, + "grad_norm": 1.3711517369784783, + "learning_rate": 1.4664371080658079e-06, + "loss": 0.25625506043434143, + "step": 6284 + }, + { + "epoch": 1.6689682645067057, + "grad_norm": 1.1450036681372322, + "learning_rate": 1.4641489010985954e-06, + "loss": 0.22178369760513306, + "step": 6285 + }, + { + "epoch": 1.6692338334882486, + "grad_norm": 1.2644620602089436, + "learning_rate": 1.4618623397428055e-06, + "loss": 0.23936234414577484, + "step": 6286 + }, + { + "epoch": 1.6694994024697916, + "grad_norm": 1.2667144776178243, + "learning_rate": 1.459577424439258e-06, + "loss": 0.21629829704761505, + "step": 6287 + }, + { + "epoch": 1.6697649714513345, + "grad_norm": 1.3486786043134158, + "learning_rate": 1.457294155628457e-06, + "loss": 0.238427072763443, + "step": 6288 + }, + { + "epoch": 1.6700305404328775, + "grad_norm": 1.412674472973442, + "learning_rate": 1.4550125337505926e-06, + "loss": 0.23168250918388367, + "step": 6289 + }, + { + "epoch": 1.6702961094144204, + "grad_norm": 1.3185872633193214, + "learning_rate": 1.45273255924553e-06, + "loss": 0.25518402457237244, + "step": 6290 + }, + { + "epoch": 1.6705616783959634, + "grad_norm": 1.2092220747685465, + "learning_rate": 1.450454232552826e-06, + "loss": 0.2488553822040558, + "step": 6291 + }, + { + "epoch": 1.6708272473775063, + "grad_norm": 1.4309048190710245, + "learning_rate": 1.448177554111716e-06, + "loss": 0.2684085965156555, + "step": 6292 + }, + { + "epoch": 1.6710928163590493, + "grad_norm": 1.3645105519242562, + "learning_rate": 1.4459025243611124e-06, + "loss": 0.24627447128295898, + "step": 6293 + }, + { + "epoch": 1.6713583853405922, + "grad_norm": 1.2960987120962004, + "learning_rate": 1.4436291437396156e-06, + "loss": 0.24725376069545746, + "step": 6294 + }, + { + "epoch": 1.6716239543221352, + "grad_norm": 1.2752333210419433, + "learning_rate": 1.4413574126855067e-06, + "loss": 0.23488914966583252, + "step": 6295 + }, + { + "epoch": 1.6718895233036781, + "grad_norm": 1.2385365684534737, + "learning_rate": 1.4390873316367492e-06, + "loss": 0.2031177133321762, + "step": 6296 + }, + { + "epoch": 1.672155092285221, + "grad_norm": 1.265889760948498, + "learning_rate": 1.4368189010309874e-06, + "loss": 0.25378018617630005, + "step": 6297 + }, + { + "epoch": 1.672420661266764, + "grad_norm": 1.2443137764428682, + "learning_rate": 1.434552121305548e-06, + "loss": 0.21305282413959503, + "step": 6298 + }, + { + "epoch": 1.672686230248307, + "grad_norm": 1.1925787762252436, + "learning_rate": 1.432286992897437e-06, + "loss": 0.20908987522125244, + "step": 6299 + }, + { + "epoch": 1.67295179922985, + "grad_norm": 1.2228377563088515, + "learning_rate": 1.4300235162433496e-06, + "loss": 0.21945340931415558, + "step": 6300 + }, + { + "epoch": 1.6732173682113929, + "grad_norm": 1.3659267409445854, + "learning_rate": 1.4277616917796544e-06, + "loss": 0.22096669673919678, + "step": 6301 + }, + { + "epoch": 1.6734829371929358, + "grad_norm": 1.2773291306452106, + "learning_rate": 1.425501519942406e-06, + "loss": 0.2233850657939911, + "step": 6302 + }, + { + "epoch": 1.6737485061744788, + "grad_norm": 1.2672720076411363, + "learning_rate": 1.423243001167337e-06, + "loss": 0.21432995796203613, + "step": 6303 + }, + { + "epoch": 1.6740140751560217, + "grad_norm": 1.3864014459258447, + "learning_rate": 1.4209861358898636e-06, + "loss": 0.2649557590484619, + "step": 6304 + }, + { + "epoch": 1.6742796441375647, + "grad_norm": 1.2642836811067808, + "learning_rate": 1.418730924545083e-06, + "loss": 0.24918347597122192, + "step": 6305 + }, + { + "epoch": 1.6745452131191076, + "grad_norm": 1.3089175693989048, + "learning_rate": 1.4164773675677745e-06, + "loss": 0.24121029675006866, + "step": 6306 + }, + { + "epoch": 1.6748107821006506, + "grad_norm": 1.2569762960026158, + "learning_rate": 1.4142254653923949e-06, + "loss": 0.24401789903640747, + "step": 6307 + }, + { + "epoch": 1.6750763510821935, + "grad_norm": 1.3272546708188746, + "learning_rate": 1.4119752184530867e-06, + "loss": 0.2374853938817978, + "step": 6308 + }, + { + "epoch": 1.6753419200637365, + "grad_norm": 1.2973848864698938, + "learning_rate": 1.4097266271836695e-06, + "loss": 0.2351088970899582, + "step": 6309 + }, + { + "epoch": 1.6756074890452797, + "grad_norm": 1.301417674196528, + "learning_rate": 1.407479692017647e-06, + "loss": 0.19560754299163818, + "step": 6310 + }, + { + "epoch": 1.6758730580268226, + "grad_norm": 1.390250023674765, + "learning_rate": 1.405234413388199e-06, + "loss": 0.24124252796173096, + "step": 6311 + }, + { + "epoch": 1.6761386270083656, + "grad_norm": 1.3742469305206364, + "learning_rate": 1.4029907917281903e-06, + "loss": 0.2208215445280075, + "step": 6312 + }, + { + "epoch": 1.6764041959899085, + "grad_norm": 1.2125662977366807, + "learning_rate": 1.4007488274701653e-06, + "loss": 0.23888292908668518, + "step": 6313 + }, + { + "epoch": 1.6766697649714515, + "grad_norm": 1.2936432356109655, + "learning_rate": 1.3985085210463479e-06, + "loss": 0.24079063534736633, + "step": 6314 + }, + { + "epoch": 1.6769353339529944, + "grad_norm": 1.2011852751375642, + "learning_rate": 1.3962698728886414e-06, + "loss": 0.18975606560707092, + "step": 6315 + }, + { + "epoch": 1.6772009029345374, + "grad_norm": 1.322599968285396, + "learning_rate": 1.3940328834286333e-06, + "loss": 0.201214998960495, + "step": 6316 + }, + { + "epoch": 1.6774664719160803, + "grad_norm": 1.2090909210103018, + "learning_rate": 1.3917975530975836e-06, + "loss": 0.20079322159290314, + "step": 6317 + }, + { + "epoch": 1.6777320408976233, + "grad_norm": 1.2732868066143843, + "learning_rate": 1.3895638823264447e-06, + "loss": 0.23593586683273315, + "step": 6318 + }, + { + "epoch": 1.6779976098791662, + "grad_norm": 1.3931846809533017, + "learning_rate": 1.3873318715458383e-06, + "loss": 0.26574259996414185, + "step": 6319 + }, + { + "epoch": 1.6782631788607092, + "grad_norm": 1.252943610173436, + "learning_rate": 1.3851015211860696e-06, + "loss": 0.20573323965072632, + "step": 6320 + }, + { + "epoch": 1.6785287478422521, + "grad_norm": 1.4484920974875073, + "learning_rate": 1.3828728316771244e-06, + "loss": 0.25610506534576416, + "step": 6321 + }, + { + "epoch": 1.678794316823795, + "grad_norm": 1.330338299337135, + "learning_rate": 1.380645803448668e-06, + "loss": 0.2138693630695343, + "step": 6322 + }, + { + "epoch": 1.679059885805338, + "grad_norm": 1.1479105398064924, + "learning_rate": 1.3784204369300447e-06, + "loss": 0.21522866189479828, + "step": 6323 + }, + { + "epoch": 1.679325454786881, + "grad_norm": 1.441538971613898, + "learning_rate": 1.376196732550279e-06, + "loss": 0.25622743368148804, + "step": 6324 + }, + { + "epoch": 1.679591023768424, + "grad_norm": 1.354050705773023, + "learning_rate": 1.3739746907380757e-06, + "loss": 0.18025386333465576, + "step": 6325 + }, + { + "epoch": 1.679856592749967, + "grad_norm": 1.1665775097977176, + "learning_rate": 1.3717543119218168e-06, + "loss": 0.18785078823566437, + "step": 6326 + }, + { + "epoch": 1.6801221617315099, + "grad_norm": 1.3771154706722653, + "learning_rate": 1.3695355965295653e-06, + "loss": 0.24682481586933136, + "step": 6327 + }, + { + "epoch": 1.6803877307130528, + "grad_norm": 1.2994385931646761, + "learning_rate": 1.3673185449890647e-06, + "loss": 0.2193487137556076, + "step": 6328 + }, + { + "epoch": 1.6806532996945958, + "grad_norm": 1.2960131024456552, + "learning_rate": 1.3651031577277351e-06, + "loss": 0.24963265657424927, + "step": 6329 + }, + { + "epoch": 1.6809188686761387, + "grad_norm": 1.2714587333981215, + "learning_rate": 1.3628894351726785e-06, + "loss": 0.21473057568073273, + "step": 6330 + }, + { + "epoch": 1.6811844376576817, + "grad_norm": 1.4508064568072063, + "learning_rate": 1.3606773777506731e-06, + "loss": 0.2539534866809845, + "step": 6331 + }, + { + "epoch": 1.6814500066392246, + "grad_norm": 1.5049767699399101, + "learning_rate": 1.3584669858881771e-06, + "loss": 0.2671799659729004, + "step": 6332 + }, + { + "epoch": 1.6817155756207676, + "grad_norm": 1.211295376852026, + "learning_rate": 1.3562582600113295e-06, + "loss": 0.24291013181209564, + "step": 6333 + }, + { + "epoch": 1.6819811446023105, + "grad_norm": 1.3672105989135315, + "learning_rate": 1.354051200545946e-06, + "loss": 0.24249233305454254, + "step": 6334 + }, + { + "epoch": 1.6822467135838535, + "grad_norm": 1.2855842039831968, + "learning_rate": 1.351845807917519e-06, + "loss": 0.21647261083126068, + "step": 6335 + }, + { + "epoch": 1.6825122825653964, + "grad_norm": 1.2764605035604815, + "learning_rate": 1.349642082551227e-06, + "loss": 0.2348332703113556, + "step": 6336 + }, + { + "epoch": 1.6827778515469394, + "grad_norm": 1.3049495455341118, + "learning_rate": 1.34744002487192e-06, + "loss": 0.22503259778022766, + "step": 6337 + }, + { + "epoch": 1.6830434205284823, + "grad_norm": 1.3236190891705721, + "learning_rate": 1.3452396353041286e-06, + "loss": 0.2397763580083847, + "step": 6338 + }, + { + "epoch": 1.6833089895100253, + "grad_norm": 1.156426557066381, + "learning_rate": 1.3430409142720624e-06, + "loss": 0.23345956206321716, + "step": 6339 + }, + { + "epoch": 1.6835745584915682, + "grad_norm": 1.1932341696009043, + "learning_rate": 1.3408438621996088e-06, + "loss": 0.19660598039627075, + "step": 6340 + }, + { + "epoch": 1.6838401274731112, + "grad_norm": 1.262928020262074, + "learning_rate": 1.3386484795103327e-06, + "loss": 0.19148695468902588, + "step": 6341 + }, + { + "epoch": 1.6841056964546541, + "grad_norm": 1.2112774084067142, + "learning_rate": 1.3364547666274819e-06, + "loss": 0.2078169733285904, + "step": 6342 + }, + { + "epoch": 1.684371265436197, + "grad_norm": 1.3703852622718744, + "learning_rate": 1.3342627239739715e-06, + "loss": 0.23122575879096985, + "step": 6343 + }, + { + "epoch": 1.68463683441774, + "grad_norm": 1.350523705417422, + "learning_rate": 1.3320723519724032e-06, + "loss": 0.2744083106517792, + "step": 6344 + }, + { + "epoch": 1.684902403399283, + "grad_norm": 1.3462449472678248, + "learning_rate": 1.3298836510450597e-06, + "loss": 0.26361098885536194, + "step": 6345 + }, + { + "epoch": 1.685167972380826, + "grad_norm": 1.2550654654863131, + "learning_rate": 1.3276966216138932e-06, + "loss": 0.21833205223083496, + "step": 6346 + }, + { + "epoch": 1.685433541362369, + "grad_norm": 1.306325021058624, + "learning_rate": 1.3255112641005374e-06, + "loss": 0.22075100243091583, + "step": 6347 + }, + { + "epoch": 1.6856991103439118, + "grad_norm": 1.4286786068270776, + "learning_rate": 1.3233275789263034e-06, + "loss": 0.24352343380451202, + "step": 6348 + }, + { + "epoch": 1.6859646793254548, + "grad_norm": 1.5476580340833483, + "learning_rate": 1.3211455665121808e-06, + "loss": 0.2331303060054779, + "step": 6349 + }, + { + "epoch": 1.6862302483069977, + "grad_norm": 1.398559395598541, + "learning_rate": 1.3189652272788356e-06, + "loss": 0.2511689066886902, + "step": 6350 + }, + { + "epoch": 1.6864958172885407, + "grad_norm": 1.1704691076383393, + "learning_rate": 1.3167865616466113e-06, + "loss": 0.18535873293876648, + "step": 6351 + }, + { + "epoch": 1.6867613862700837, + "grad_norm": 1.3097469055952822, + "learning_rate": 1.3146095700355289e-06, + "loss": 0.23924914002418518, + "step": 6352 + }, + { + "epoch": 1.6870269552516266, + "grad_norm": 1.1591649275755667, + "learning_rate": 1.3124342528652845e-06, + "loss": 0.19710025191307068, + "step": 6353 + }, + { + "epoch": 1.6872925242331696, + "grad_norm": 1.393629731020981, + "learning_rate": 1.3102606105552585e-06, + "loss": 0.21439281105995178, + "step": 6354 + }, + { + "epoch": 1.6875580932147125, + "grad_norm": 1.3051512833867451, + "learning_rate": 1.3080886435245e-06, + "loss": 0.2647722363471985, + "step": 6355 + }, + { + "epoch": 1.6878236621962555, + "grad_norm": 2.6038516980586355, + "learning_rate": 1.3059183521917396e-06, + "loss": 0.2202019840478897, + "step": 6356 + }, + { + "epoch": 1.6880892311777984, + "grad_norm": 1.3022104210295473, + "learning_rate": 1.3037497369753871e-06, + "loss": 0.25833001732826233, + "step": 6357 + }, + { + "epoch": 1.6883548001593414, + "grad_norm": 1.1906464618269579, + "learning_rate": 1.3015827982935192e-06, + "loss": 0.19984321296215057, + "step": 6358 + }, + { + "epoch": 1.6886203691408843, + "grad_norm": 1.3347301103088016, + "learning_rate": 1.2994175365638996e-06, + "loss": 0.2190552055835724, + "step": 6359 + }, + { + "epoch": 1.6888859381224273, + "grad_norm": 1.265894337049371, + "learning_rate": 1.2972539522039652e-06, + "loss": 0.26262593269348145, + "step": 6360 + }, + { + "epoch": 1.6891515071039702, + "grad_norm": 1.285416913994909, + "learning_rate": 1.2950920456308292e-06, + "loss": 0.2665651738643646, + "step": 6361 + }, + { + "epoch": 1.6894170760855132, + "grad_norm": 1.213162722605336, + "learning_rate": 1.2929318172612803e-06, + "loss": 0.22369208931922913, + "step": 6362 + }, + { + "epoch": 1.6896826450670561, + "grad_norm": 1.2234073567984471, + "learning_rate": 1.2907732675117878e-06, + "loss": 0.21063543856143951, + "step": 6363 + }, + { + "epoch": 1.689948214048599, + "grad_norm": 1.3608426715056905, + "learning_rate": 1.2886163967984944e-06, + "loss": 0.2303045690059662, + "step": 6364 + }, + { + "epoch": 1.690213783030142, + "grad_norm": 1.1473656525455074, + "learning_rate": 1.2864612055372182e-06, + "loss": 0.20185884833335876, + "step": 6365 + }, + { + "epoch": 1.690479352011685, + "grad_norm": 1.2673026097919315, + "learning_rate": 1.284307694143455e-06, + "loss": 0.22900527715682983, + "step": 6366 + }, + { + "epoch": 1.690744920993228, + "grad_norm": 1.2373147270640896, + "learning_rate": 1.282155863032377e-06, + "loss": 0.21405862271785736, + "step": 6367 + }, + { + "epoch": 1.6910104899747709, + "grad_norm": 1.3139606008654157, + "learning_rate": 1.2800057126188304e-06, + "loss": 0.26143258810043335, + "step": 6368 + }, + { + "epoch": 1.6912760589563138, + "grad_norm": 1.319330305112879, + "learning_rate": 1.2778572433173397e-06, + "loss": 0.24437926709651947, + "step": 6369 + }, + { + "epoch": 1.6915416279378568, + "grad_norm": 1.1954155676954614, + "learning_rate": 1.275710455542104e-06, + "loss": 0.24862337112426758, + "step": 6370 + }, + { + "epoch": 1.6918071969193997, + "grad_norm": 1.2264107157331223, + "learning_rate": 1.2735653497069978e-06, + "loss": 0.2146604359149933, + "step": 6371 + }, + { + "epoch": 1.6920727659009427, + "grad_norm": 1.3217815480091177, + "learning_rate": 1.2714219262255777e-06, + "loss": 0.2525256872177124, + "step": 6372 + }, + { + "epoch": 1.6923383348824856, + "grad_norm": 1.289957068010404, + "learning_rate": 1.2692801855110638e-06, + "loss": 0.23462912440299988, + "step": 6373 + }, + { + "epoch": 1.6926039038640286, + "grad_norm": 1.3468375801476438, + "learning_rate": 1.2671401279763595e-06, + "loss": 0.21551170945167542, + "step": 6374 + }, + { + "epoch": 1.6928694728455715, + "grad_norm": 1.4457180200872415, + "learning_rate": 1.2650017540340454e-06, + "loss": 0.24094407260417938, + "step": 6375 + }, + { + "epoch": 1.6931350418271145, + "grad_norm": 1.2168123169553724, + "learning_rate": 1.2628650640963736e-06, + "loss": 0.23101133108139038, + "step": 6376 + }, + { + "epoch": 1.6934006108086574, + "grad_norm": 1.4830646801660192, + "learning_rate": 1.2607300585752724e-06, + "loss": 0.2513899803161621, + "step": 6377 + }, + { + "epoch": 1.6936661797902004, + "grad_norm": 1.417144859782869, + "learning_rate": 1.258596737882345e-06, + "loss": 0.2490600198507309, + "step": 6378 + }, + { + "epoch": 1.6939317487717434, + "grad_norm": 1.3403225341914131, + "learning_rate": 1.256465102428872e-06, + "loss": 0.25767675042152405, + "step": 6379 + }, + { + "epoch": 1.6941973177532863, + "grad_norm": 1.2775246675329248, + "learning_rate": 1.254335152625804e-06, + "loss": 0.2231348305940628, + "step": 6380 + }, + { + "epoch": 1.6944628867348293, + "grad_norm": 1.4410136520558763, + "learning_rate": 1.2522068888837758e-06, + "loss": 0.25873979926109314, + "step": 6381 + }, + { + "epoch": 1.6947284557163722, + "grad_norm": 1.4111151195923193, + "learning_rate": 1.2500803116130887e-06, + "loss": 0.2848423421382904, + "step": 6382 + }, + { + "epoch": 1.6949940246979152, + "grad_norm": 1.1110125207312456, + "learning_rate": 1.247955421223721e-06, + "loss": 0.21343804895877838, + "step": 6383 + }, + { + "epoch": 1.695259593679458, + "grad_norm": 1.3025436504976033, + "learning_rate": 1.245832218125328e-06, + "loss": 0.23080062866210938, + "step": 6384 + }, + { + "epoch": 1.695525162661001, + "grad_norm": 1.3020267493975237, + "learning_rate": 1.2437107027272376e-06, + "loss": 0.2397225797176361, + "step": 6385 + }, + { + "epoch": 1.695790731642544, + "grad_norm": 1.3120966348534624, + "learning_rate": 1.2415908754384532e-06, + "loss": 0.22798654437065125, + "step": 6386 + }, + { + "epoch": 1.696056300624087, + "grad_norm": 1.3399304326822938, + "learning_rate": 1.2394727366676518e-06, + "loss": 0.2534061074256897, + "step": 6387 + }, + { + "epoch": 1.69632186960563, + "grad_norm": 1.2269756633197797, + "learning_rate": 1.2373562868231858e-06, + "loss": 0.2127036452293396, + "step": 6388 + }, + { + "epoch": 1.6965874385871729, + "grad_norm": 1.341525895521795, + "learning_rate": 1.2352415263130813e-06, + "loss": 0.22341205179691315, + "step": 6389 + }, + { + "epoch": 1.6968530075687158, + "grad_norm": 1.316572711467383, + "learning_rate": 1.2331284555450406e-06, + "loss": 0.2435426563024521, + "step": 6390 + }, + { + "epoch": 1.6971185765502588, + "grad_norm": 1.3203864338710647, + "learning_rate": 1.2310170749264383e-06, + "loss": 0.24652531743049622, + "step": 6391 + }, + { + "epoch": 1.6973841455318017, + "grad_norm": 1.251250109623578, + "learning_rate": 1.228907384864323e-06, + "loss": 0.24172671139240265, + "step": 6392 + }, + { + "epoch": 1.6976497145133447, + "grad_norm": 1.293405881850453, + "learning_rate": 1.2267993857654182e-06, + "loss": 0.21534420549869537, + "step": 6393 + }, + { + "epoch": 1.6979152834948879, + "grad_norm": 2.1259133697182575, + "learning_rate": 1.2246930780361221e-06, + "loss": 0.2617778182029724, + "step": 6394 + }, + { + "epoch": 1.6981808524764308, + "grad_norm": 1.1793022391098469, + "learning_rate": 1.2225884620825046e-06, + "loss": 0.20388583838939667, + "step": 6395 + }, + { + "epoch": 1.6984464214579738, + "grad_norm": 1.289033320527503, + "learning_rate": 1.220485538310312e-06, + "loss": 0.23714327812194824, + "step": 6396 + }, + { + "epoch": 1.6987119904395167, + "grad_norm": 1.3592785135687544, + "learning_rate": 1.2183843071249634e-06, + "loss": 0.2495463341474533, + "step": 6397 + }, + { + "epoch": 1.6989775594210597, + "grad_norm": 1.2730498991215184, + "learning_rate": 1.2162847689315483e-06, + "loss": 0.2419012188911438, + "step": 6398 + }, + { + "epoch": 1.6992431284026026, + "grad_norm": 1.2226640861076554, + "learning_rate": 1.214186924134838e-06, + "loss": 0.23392438888549805, + "step": 6399 + }, + { + "epoch": 1.6995086973841456, + "grad_norm": 1.3210458214149883, + "learning_rate": 1.2120907731392695e-06, + "loss": 0.22855526208877563, + "step": 6400 + }, + { + "epoch": 1.6997742663656885, + "grad_norm": 1.2152782326664608, + "learning_rate": 1.2099963163489558e-06, + "loss": 0.22393949329853058, + "step": 6401 + }, + { + "epoch": 1.7000398353472315, + "grad_norm": 1.3855673404796554, + "learning_rate": 1.2079035541676832e-06, + "loss": 0.2539960741996765, + "step": 6402 + }, + { + "epoch": 1.7003054043287744, + "grad_norm": 1.3330270743987416, + "learning_rate": 1.2058124869989129e-06, + "loss": 0.23716852068901062, + "step": 6403 + }, + { + "epoch": 1.7005709733103174, + "grad_norm": 1.347782549245642, + "learning_rate": 1.2037231152457773e-06, + "loss": 0.24658545851707458, + "step": 6404 + }, + { + "epoch": 1.7008365422918603, + "grad_norm": 1.2494300647338343, + "learning_rate": 1.201635439311083e-06, + "loss": 0.2316630333662033, + "step": 6405 + }, + { + "epoch": 1.7011021112734033, + "grad_norm": 1.0834142572483991, + "learning_rate": 1.1995494595973089e-06, + "loss": 0.20434345304965973, + "step": 6406 + }, + { + "epoch": 1.7013676802549462, + "grad_norm": 1.3445140884275912, + "learning_rate": 1.197465176506607e-06, + "loss": 0.2585931420326233, + "step": 6407 + }, + { + "epoch": 1.7016332492364892, + "grad_norm": 1.2567668360829787, + "learning_rate": 1.1953825904408033e-06, + "loss": 0.23007069528102875, + "step": 6408 + }, + { + "epoch": 1.7018988182180321, + "grad_norm": 1.2770978609777501, + "learning_rate": 1.1933017018013948e-06, + "loss": 0.21822810173034668, + "step": 6409 + }, + { + "epoch": 1.702164387199575, + "grad_norm": 1.2875752799081717, + "learning_rate": 1.1912225109895526e-06, + "loss": 0.241228848695755, + "step": 6410 + }, + { + "epoch": 1.702429956181118, + "grad_norm": 1.3509759956774154, + "learning_rate": 1.1891450184061203e-06, + "loss": 0.28803908824920654, + "step": 6411 + }, + { + "epoch": 1.702695525162661, + "grad_norm": 1.3018941028318989, + "learning_rate": 1.1870692244516147e-06, + "loss": 0.2387516349554062, + "step": 6412 + }, + { + "epoch": 1.702961094144204, + "grad_norm": 1.2538051398244094, + "learning_rate": 1.1849951295262242e-06, + "loss": 0.19774140417575836, + "step": 6413 + }, + { + "epoch": 1.7032266631257469, + "grad_norm": 1.269953409174644, + "learning_rate": 1.1829227340298088e-06, + "loss": 0.22842247784137726, + "step": 6414 + }, + { + "epoch": 1.7034922321072898, + "grad_norm": 1.1987695898844528, + "learning_rate": 1.1808520383619015e-06, + "loss": 0.21994739770889282, + "step": 6415 + }, + { + "epoch": 1.7037578010888328, + "grad_norm": 1.2719096074486522, + "learning_rate": 1.1787830429217084e-06, + "loss": 0.22328051924705505, + "step": 6416 + }, + { + "epoch": 1.7040233700703757, + "grad_norm": 1.3583279531737376, + "learning_rate": 1.1767157481081092e-06, + "loss": 0.26704326272010803, + "step": 6417 + }, + { + "epoch": 1.7042889390519187, + "grad_norm": 1.2796404749500392, + "learning_rate": 1.174650154319653e-06, + "loss": 0.2148481160402298, + "step": 6418 + }, + { + "epoch": 1.7045545080334616, + "grad_norm": 1.1912742761204351, + "learning_rate": 1.1725862619545625e-06, + "loss": 0.21731218695640564, + "step": 6419 + }, + { + "epoch": 1.7048200770150046, + "grad_norm": 1.3502505047017879, + "learning_rate": 1.1705240714107301e-06, + "loss": 0.20832043886184692, + "step": 6420 + }, + { + "epoch": 1.7050856459965475, + "grad_norm": 1.2922565511595965, + "learning_rate": 1.1684635830857249e-06, + "loss": 0.21739046275615692, + "step": 6421 + }, + { + "epoch": 1.7053512149780907, + "grad_norm": 1.3041232291639149, + "learning_rate": 1.1664047973767811e-06, + "loss": 0.23972246050834656, + "step": 6422 + }, + { + "epoch": 1.7056167839596337, + "grad_norm": 1.2420174603299015, + "learning_rate": 1.1643477146808092e-06, + "loss": 0.2471289187669754, + "step": 6423 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 1.2148999014811244, + "learning_rate": 1.1622923353943916e-06, + "loss": 0.2014283537864685, + "step": 6424 + }, + { + "epoch": 1.7061479219227196, + "grad_norm": 1.1799937956162947, + "learning_rate": 1.1602386599137782e-06, + "loss": 0.21680915355682373, + "step": 6425 + }, + { + "epoch": 1.7064134909042625, + "grad_norm": 1.2221660563202492, + "learning_rate": 1.158186688634898e-06, + "loss": 0.2101205736398697, + "step": 6426 + }, + { + "epoch": 1.7066790598858055, + "grad_norm": 1.2879683442276364, + "learning_rate": 1.1561364219533444e-06, + "loss": 0.22114071249961853, + "step": 6427 + }, + { + "epoch": 1.7069446288673484, + "grad_norm": 1.2910925736026095, + "learning_rate": 1.1540878602643858e-06, + "loss": 0.20608706772327423, + "step": 6428 + }, + { + "epoch": 1.7072101978488914, + "grad_norm": 1.2486066037383718, + "learning_rate": 1.1520410039629593e-06, + "loss": 0.2247905433177948, + "step": 6429 + }, + { + "epoch": 1.7074757668304343, + "grad_norm": 1.1718742986299986, + "learning_rate": 1.1499958534436751e-06, + "loss": 0.22623226046562195, + "step": 6430 + }, + { + "epoch": 1.7077413358119773, + "grad_norm": 1.2776253558863635, + "learning_rate": 1.1479524091008142e-06, + "loss": 0.2063906192779541, + "step": 6431 + }, + { + "epoch": 1.7080069047935202, + "grad_norm": 1.4035125322254989, + "learning_rate": 1.1459106713283286e-06, + "loss": 0.2787795960903168, + "step": 6432 + }, + { + "epoch": 1.7082724737750632, + "grad_norm": 1.2096674582385407, + "learning_rate": 1.1438706405198419e-06, + "loss": 0.23090440034866333, + "step": 6433 + }, + { + "epoch": 1.7085380427566061, + "grad_norm": 1.288319877687408, + "learning_rate": 1.141832317068645e-06, + "loss": 0.23690670728683472, + "step": 6434 + }, + { + "epoch": 1.708803611738149, + "grad_norm": 1.2499926164056985, + "learning_rate": 1.1397957013677064e-06, + "loss": 0.209202378988266, + "step": 6435 + }, + { + "epoch": 1.709069180719692, + "grad_norm": 1.2311768368116, + "learning_rate": 1.1377607938096635e-06, + "loss": 0.22541575133800507, + "step": 6436 + }, + { + "epoch": 1.709334749701235, + "grad_norm": 1.3505125458173146, + "learning_rate": 1.1357275947868162e-06, + "loss": 0.2460884153842926, + "step": 6437 + }, + { + "epoch": 1.709600318682778, + "grad_norm": 1.195327574575731, + "learning_rate": 1.1336961046911443e-06, + "loss": 0.21967202425003052, + "step": 6438 + }, + { + "epoch": 1.709865887664321, + "grad_norm": 1.346022527152768, + "learning_rate": 1.1316663239142954e-06, + "loss": 0.23619329929351807, + "step": 6439 + }, + { + "epoch": 1.7101314566458639, + "grad_norm": 1.3033234842407981, + "learning_rate": 1.129638252847587e-06, + "loss": 0.24563436210155487, + "step": 6440 + }, + { + "epoch": 1.7103970256274068, + "grad_norm": 1.3840933006905622, + "learning_rate": 1.1276118918820068e-06, + "loss": 0.25508859753608704, + "step": 6441 + }, + { + "epoch": 1.7106625946089498, + "grad_norm": 1.3406379279103604, + "learning_rate": 1.1255872414082136e-06, + "loss": 0.24761545658111572, + "step": 6442 + }, + { + "epoch": 1.7109281635904927, + "grad_norm": 4.632018568484065, + "learning_rate": 1.1235643018165344e-06, + "loss": 0.2355962097644806, + "step": 6443 + }, + { + "epoch": 1.7111937325720357, + "grad_norm": 1.3274457548497118, + "learning_rate": 1.1215430734969723e-06, + "loss": 0.2534273862838745, + "step": 6444 + }, + { + "epoch": 1.7114593015535786, + "grad_norm": 1.2846712625276346, + "learning_rate": 1.1195235568391938e-06, + "loss": 0.2756424844264984, + "step": 6445 + }, + { + "epoch": 1.7117248705351216, + "grad_norm": 1.2126020570228762, + "learning_rate": 1.1175057522325383e-06, + "loss": 0.2198309451341629, + "step": 6446 + }, + { + "epoch": 1.7119904395166645, + "grad_norm": 1.2343738377988847, + "learning_rate": 1.1154896600660136e-06, + "loss": 0.21767666935920715, + "step": 6447 + }, + { + "epoch": 1.7122560084982075, + "grad_norm": 1.4965895030859304, + "learning_rate": 1.1134752807283e-06, + "loss": 0.2679128348827362, + "step": 6448 + }, + { + "epoch": 1.7125215774797504, + "grad_norm": 1.292131622576057, + "learning_rate": 1.1114626146077457e-06, + "loss": 0.2268792986869812, + "step": 6449 + }, + { + "epoch": 1.7127871464612934, + "grad_norm": 1.224637524783582, + "learning_rate": 1.109451662092369e-06, + "loss": 0.21585378050804138, + "step": 6450 + }, + { + "epoch": 1.7130527154428363, + "grad_norm": 1.3157463227820392, + "learning_rate": 1.1074424235698567e-06, + "loss": 0.2258647382259369, + "step": 6451 + }, + { + "epoch": 1.7133182844243793, + "grad_norm": 1.3742268123946286, + "learning_rate": 1.1054348994275677e-06, + "loss": 0.2456682175397873, + "step": 6452 + }, + { + "epoch": 1.7135838534059222, + "grad_norm": 1.4853732102975625, + "learning_rate": 1.1034290900525279e-06, + "loss": 0.22897745668888092, + "step": 6453 + }, + { + "epoch": 1.7138494223874652, + "grad_norm": 1.133114987282755, + "learning_rate": 1.101424995831435e-06, + "loss": 0.1910650134086609, + "step": 6454 + }, + { + "epoch": 1.7141149913690081, + "grad_norm": 1.2728981818199352, + "learning_rate": 1.0994226171506529e-06, + "loss": 0.2519158720970154, + "step": 6455 + }, + { + "epoch": 1.714380560350551, + "grad_norm": 1.259309948081026, + "learning_rate": 1.0974219543962184e-06, + "loss": 0.24191951751708984, + "step": 6456 + }, + { + "epoch": 1.714646129332094, + "grad_norm": 1.3159238719963862, + "learning_rate": 1.0954230079538352e-06, + "loss": 0.2560814619064331, + "step": 6457 + }, + { + "epoch": 1.714911698313637, + "grad_norm": 1.2640782659289207, + "learning_rate": 1.0934257782088763e-06, + "loss": 0.22969035804271698, + "step": 6458 + }, + { + "epoch": 1.71517726729518, + "grad_norm": 1.3584917562872394, + "learning_rate": 1.0914302655463837e-06, + "loss": 0.26114046573638916, + "step": 6459 + }, + { + "epoch": 1.715442836276723, + "grad_norm": 1.2235177756044688, + "learning_rate": 1.0894364703510685e-06, + "loss": 0.21457752585411072, + "step": 6460 + }, + { + "epoch": 1.7157084052582658, + "grad_norm": 1.164559577491723, + "learning_rate": 1.0874443930073098e-06, + "loss": 0.19998760521411896, + "step": 6461 + }, + { + "epoch": 1.7159739742398088, + "grad_norm": 1.2278101157674874, + "learning_rate": 1.0854540338991615e-06, + "loss": 0.2379671037197113, + "step": 6462 + }, + { + "epoch": 1.7162395432213517, + "grad_norm": 1.3827652808641404, + "learning_rate": 1.0834653934103367e-06, + "loss": 0.2236609309911728, + "step": 6463 + }, + { + "epoch": 1.7165051122028947, + "grad_norm": 1.2673726734268553, + "learning_rate": 1.0814784719242234e-06, + "loss": 0.22507379949092865, + "step": 6464 + }, + { + "epoch": 1.7167706811844377, + "grad_norm": 1.3174434539455087, + "learning_rate": 1.079493269823877e-06, + "loss": 0.22138816118240356, + "step": 6465 + }, + { + "epoch": 1.7170362501659806, + "grad_norm": 1.3880746036316538, + "learning_rate": 1.0775097874920204e-06, + "loss": 0.227338969707489, + "step": 6466 + }, + { + "epoch": 1.7173018191475236, + "grad_norm": 1.2588670866885754, + "learning_rate": 1.0755280253110466e-06, + "loss": 0.23694375157356262, + "step": 6467 + }, + { + "epoch": 1.7175673881290665, + "grad_norm": 1.365387614603678, + "learning_rate": 1.0735479836630136e-06, + "loss": 0.26219409704208374, + "step": 6468 + }, + { + "epoch": 1.7178329571106095, + "grad_norm": 1.20539748496599, + "learning_rate": 1.0715696629296524e-06, + "loss": 0.22215887904167175, + "step": 6469 + }, + { + "epoch": 1.7180985260921524, + "grad_norm": 1.3543481839639284, + "learning_rate": 1.0695930634923602e-06, + "loss": 0.25434768199920654, + "step": 6470 + }, + { + "epoch": 1.7183640950736954, + "grad_norm": 1.1809119822759757, + "learning_rate": 1.0676181857321998e-06, + "loss": 0.2092076987028122, + "step": 6471 + }, + { + "epoch": 1.7186296640552383, + "grad_norm": 1.330663320526799, + "learning_rate": 1.0656450300299048e-06, + "loss": 0.2710237503051758, + "step": 6472 + }, + { + "epoch": 1.7188952330367813, + "grad_norm": 1.2715188060789504, + "learning_rate": 1.0636735967658785e-06, + "loss": 0.2533886432647705, + "step": 6473 + }, + { + "epoch": 1.7191608020183242, + "grad_norm": 1.2174102707049457, + "learning_rate": 1.0617038863201878e-06, + "loss": 0.2545754909515381, + "step": 6474 + }, + { + "epoch": 1.7194263709998672, + "grad_norm": 1.2560655592374788, + "learning_rate": 1.0597358990725703e-06, + "loss": 0.26010993123054504, + "step": 6475 + }, + { + "epoch": 1.7196919399814101, + "grad_norm": 1.2632076366916114, + "learning_rate": 1.0577696354024314e-06, + "loss": 0.22529907524585724, + "step": 6476 + }, + { + "epoch": 1.719957508962953, + "grad_norm": 1.157260113755536, + "learning_rate": 1.0558050956888433e-06, + "loss": 0.1897469311952591, + "step": 6477 + }, + { + "epoch": 1.720223077944496, + "grad_norm": 1.31651804495616, + "learning_rate": 1.0538422803105441e-06, + "loss": 0.24663670361042023, + "step": 6478 + }, + { + "epoch": 1.720488646926039, + "grad_norm": 1.343902959790046, + "learning_rate": 1.0518811896459423e-06, + "loss": 0.2462892383337021, + "step": 6479 + }, + { + "epoch": 1.720754215907582, + "grad_norm": 1.117431347891292, + "learning_rate": 1.0499218240731157e-06, + "loss": 0.18652144074440002, + "step": 6480 + }, + { + "epoch": 1.7210197848891249, + "grad_norm": 1.2234103731079693, + "learning_rate": 1.0479641839698052e-06, + "loss": 0.24614468216896057, + "step": 6481 + }, + { + "epoch": 1.7212853538706678, + "grad_norm": 1.2632894895468527, + "learning_rate": 1.046008269713421e-06, + "loss": 0.27925312519073486, + "step": 6482 + }, + { + "epoch": 1.7215509228522108, + "grad_norm": 1.3426272887839532, + "learning_rate": 1.0440540816810395e-06, + "loss": 0.2626710832118988, + "step": 6483 + }, + { + "epoch": 1.7218164918337537, + "grad_norm": 1.2982212521269376, + "learning_rate": 1.042101620249405e-06, + "loss": 0.23039895296096802, + "step": 6484 + }, + { + "epoch": 1.7220820608152967, + "grad_norm": 1.2564768074123291, + "learning_rate": 1.0401508857949295e-06, + "loss": 0.19559775292873383, + "step": 6485 + }, + { + "epoch": 1.7223476297968396, + "grad_norm": 1.222035384596064, + "learning_rate": 1.0382018786936943e-06, + "loss": 0.24982990324497223, + "step": 6486 + }, + { + "epoch": 1.7226131987783826, + "grad_norm": 1.356827120814655, + "learning_rate": 1.0362545993214402e-06, + "loss": 0.26212313771247864, + "step": 6487 + }, + { + "epoch": 1.7228787677599255, + "grad_norm": 1.2583181328160484, + "learning_rate": 1.0343090480535788e-06, + "loss": 0.22827446460723877, + "step": 6488 + }, + { + "epoch": 1.7231443367414685, + "grad_norm": 1.3650470156220376, + "learning_rate": 1.032365225265196e-06, + "loss": 0.2710435390472412, + "step": 6489 + }, + { + "epoch": 1.7234099057230114, + "grad_norm": 1.560435811081079, + "learning_rate": 1.030423131331033e-06, + "loss": 0.25116702914237976, + "step": 6490 + }, + { + "epoch": 1.7236754747045544, + "grad_norm": 1.2598369270207033, + "learning_rate": 1.0284827666255048e-06, + "loss": 0.1980481743812561, + "step": 6491 + }, + { + "epoch": 1.7239410436860974, + "grad_norm": 1.3159445178277585, + "learning_rate": 1.0265441315226898e-06, + "loss": 0.2777971625328064, + "step": 6492 + }, + { + "epoch": 1.7242066126676403, + "grad_norm": 1.3290253215924488, + "learning_rate": 1.0246072263963336e-06, + "loss": 0.23041702806949615, + "step": 6493 + }, + { + "epoch": 1.7244721816491833, + "grad_norm": 1.2761862568921072, + "learning_rate": 1.0226720516198495e-06, + "loss": 0.21428728103637695, + "step": 6494 + }, + { + "epoch": 1.7247377506307262, + "grad_norm": 1.2965072992275601, + "learning_rate": 1.020738607566316e-06, + "loss": 0.22577518224716187, + "step": 6495 + }, + { + "epoch": 1.7250033196122692, + "grad_norm": 1.2489154030372867, + "learning_rate": 1.0188068946084783e-06, + "loss": 0.21080979704856873, + "step": 6496 + }, + { + "epoch": 1.7252688885938121, + "grad_norm": 1.1941107816051266, + "learning_rate": 1.0168769131187472e-06, + "loss": 0.21232858300209045, + "step": 6497 + }, + { + "epoch": 1.725534457575355, + "grad_norm": 1.3035016990745079, + "learning_rate": 1.0149486634692019e-06, + "loss": 0.25525614619255066, + "step": 6498 + }, + { + "epoch": 1.725800026556898, + "grad_norm": 1.2742578592858531, + "learning_rate": 1.0130221460315858e-06, + "loss": 0.26291778683662415, + "step": 6499 + }, + { + "epoch": 1.726065595538441, + "grad_norm": 1.1747703502148148, + "learning_rate": 1.011097361177308e-06, + "loss": 0.21314382553100586, + "step": 6500 + }, + { + "epoch": 1.726331164519984, + "grad_norm": 1.3027182735878766, + "learning_rate": 1.0091743092774474e-06, + "loss": 0.2106419950723648, + "step": 6501 + }, + { + "epoch": 1.7265967335015269, + "grad_norm": 1.2753206037657139, + "learning_rate": 1.0072529907027407e-06, + "loss": 0.22456032037734985, + "step": 6502 + }, + { + "epoch": 1.7268623024830698, + "grad_norm": 2.1059170179774807, + "learning_rate": 1.0053334058235975e-06, + "loss": 0.2301097959280014, + "step": 6503 + }, + { + "epoch": 1.7271278714646128, + "grad_norm": 1.4062353485935484, + "learning_rate": 1.0034155550100922e-06, + "loss": 0.21207617223262787, + "step": 6504 + }, + { + "epoch": 1.7273934404461557, + "grad_norm": 1.3379977808716934, + "learning_rate": 1.0014994386319621e-06, + "loss": 0.24378664791584015, + "step": 6505 + }, + { + "epoch": 1.727659009427699, + "grad_norm": 1.402146752515372, + "learning_rate": 9.995850570586107e-07, + "loss": 0.24914023280143738, + "step": 6506 + }, + { + "epoch": 1.7279245784092419, + "grad_norm": 1.2949159811476645, + "learning_rate": 9.976724106591128e-07, + "loss": 0.23235921561717987, + "step": 6507 + }, + { + "epoch": 1.7281901473907848, + "grad_norm": 1.295455173430887, + "learning_rate": 9.957614998022015e-07, + "loss": 0.22441455721855164, + "step": 6508 + }, + { + "epoch": 1.7284557163723278, + "grad_norm": 1.4195770964317103, + "learning_rate": 9.93852324856278e-07, + "loss": 0.2559920847415924, + "step": 6509 + }, + { + "epoch": 1.7287212853538707, + "grad_norm": 1.2106097617539484, + "learning_rate": 9.919448861894088e-07, + "loss": 0.21378321945667267, + "step": 6510 + }, + { + "epoch": 1.7289868543354137, + "grad_norm": 1.223247289196822, + "learning_rate": 9.900391841693247e-07, + "loss": 0.23622627556324005, + "step": 6511 + }, + { + "epoch": 1.7292524233169566, + "grad_norm": 1.2354266119490807, + "learning_rate": 9.88135219163424e-07, + "loss": 0.217013418674469, + "step": 6512 + }, + { + "epoch": 1.7295179922984996, + "grad_norm": 1.342902376475473, + "learning_rate": 9.862329915387669e-07, + "loss": 0.2221517264842987, + "step": 6513 + }, + { + "epoch": 1.7297835612800425, + "grad_norm": 1.3136496001371853, + "learning_rate": 9.84332501662083e-07, + "loss": 0.24377144873142242, + "step": 6514 + }, + { + "epoch": 1.7300491302615855, + "grad_norm": 1.2574348774674273, + "learning_rate": 9.824337498997593e-07, + "loss": 0.23368799686431885, + "step": 6515 + }, + { + "epoch": 1.7303146992431284, + "grad_norm": 1.1949944292188206, + "learning_rate": 9.805367366178608e-07, + "loss": 0.23061680793762207, + "step": 6516 + }, + { + "epoch": 1.7305802682246714, + "grad_norm": 1.2715048223769598, + "learning_rate": 9.78641462182104e-07, + "loss": 0.24157950282096863, + "step": 6517 + }, + { + "epoch": 1.7308458372062143, + "grad_norm": 1.3248165077712177, + "learning_rate": 9.76747926957875e-07, + "loss": 0.2122395783662796, + "step": 6518 + }, + { + "epoch": 1.7311114061877573, + "grad_norm": 1.320024810941134, + "learning_rate": 9.748561313102266e-07, + "loss": 0.2351134717464447, + "step": 6519 + }, + { + "epoch": 1.7313769751693002, + "grad_norm": 1.2421546716744003, + "learning_rate": 9.729660756038738e-07, + "loss": 0.22462692856788635, + "step": 6520 + }, + { + "epoch": 1.7316425441508432, + "grad_norm": 1.191887437920794, + "learning_rate": 9.710777602031985e-07, + "loss": 0.2140806019306183, + "step": 6521 + }, + { + "epoch": 1.7319081131323861, + "grad_norm": 1.1138928252794336, + "learning_rate": 9.691911854722447e-07, + "loss": 0.22256694734096527, + "step": 6522 + }, + { + "epoch": 1.732173682113929, + "grad_norm": 1.3703383963226383, + "learning_rate": 9.673063517747216e-07, + "loss": 0.26044604182243347, + "step": 6523 + }, + { + "epoch": 1.732439251095472, + "grad_norm": 1.2598416492801234, + "learning_rate": 9.65423259474001e-07, + "loss": 0.22553196549415588, + "step": 6524 + }, + { + "epoch": 1.732704820077015, + "grad_norm": 1.351471142700479, + "learning_rate": 9.635419089331255e-07, + "loss": 0.2240113914012909, + "step": 6525 + }, + { + "epoch": 1.732970389058558, + "grad_norm": 1.1814437793767476, + "learning_rate": 9.616623005147952e-07, + "loss": 0.2239987701177597, + "step": 6526 + }, + { + "epoch": 1.7332359580401009, + "grad_norm": 1.3385972692968178, + "learning_rate": 9.597844345813746e-07, + "loss": 0.2779507040977478, + "step": 6527 + }, + { + "epoch": 1.7335015270216438, + "grad_norm": 1.24243402144453, + "learning_rate": 9.57908311494896e-07, + "loss": 0.20211297273635864, + "step": 6528 + }, + { + "epoch": 1.7337670960031868, + "grad_norm": 1.3764658259437736, + "learning_rate": 9.560339316170542e-07, + "loss": 0.2552817165851593, + "step": 6529 + }, + { + "epoch": 1.7340326649847297, + "grad_norm": 1.2797541334315956, + "learning_rate": 9.54161295309206e-07, + "loss": 0.248790442943573, + "step": 6530 + }, + { + "epoch": 1.7342982339662727, + "grad_norm": 1.2952054804389268, + "learning_rate": 9.522904029323754e-07, + "loss": 0.22865381836891174, + "step": 6531 + }, + { + "epoch": 1.7345638029478156, + "grad_norm": 1.2248102039230788, + "learning_rate": 9.504212548472458e-07, + "loss": 0.212583988904953, + "step": 6532 + }, + { + "epoch": 1.7348293719293586, + "grad_norm": 1.3834113478738954, + "learning_rate": 9.48553851414169e-07, + "loss": 0.24632221460342407, + "step": 6533 + }, + { + "epoch": 1.7350949409109018, + "grad_norm": 1.2843254083507383, + "learning_rate": 9.466881929931582e-07, + "loss": 0.2264299988746643, + "step": 6534 + }, + { + "epoch": 1.7353605098924447, + "grad_norm": 1.1969400150248917, + "learning_rate": 9.4482427994389e-07, + "loss": 0.21560585498809814, + "step": 6535 + }, + { + "epoch": 1.7356260788739877, + "grad_norm": 1.2133784097522973, + "learning_rate": 9.429621126257038e-07, + "loss": 0.24358224868774414, + "step": 6536 + }, + { + "epoch": 1.7358916478555306, + "grad_norm": 1.2714225965713206, + "learning_rate": 9.411016913976045e-07, + "loss": 0.23307816684246063, + "step": 6537 + }, + { + "epoch": 1.7361572168370736, + "grad_norm": 1.3040669928143356, + "learning_rate": 9.392430166182597e-07, + "loss": 0.28001490235328674, + "step": 6538 + }, + { + "epoch": 1.7364227858186165, + "grad_norm": 1.271471324412232, + "learning_rate": 9.373860886459996e-07, + "loss": 0.22544093430042267, + "step": 6539 + }, + { + "epoch": 1.7366883548001595, + "grad_norm": 1.196472605989987, + "learning_rate": 9.355309078388186e-07, + "loss": 0.2066478282213211, + "step": 6540 + }, + { + "epoch": 1.7369539237817024, + "grad_norm": 1.3162468805281542, + "learning_rate": 9.336774745543697e-07, + "loss": 0.21185964345932007, + "step": 6541 + }, + { + "epoch": 1.7372194927632454, + "grad_norm": 1.2806137892507987, + "learning_rate": 9.318257891499793e-07, + "loss": 0.2337890863418579, + "step": 6542 + }, + { + "epoch": 1.7374850617447883, + "grad_norm": 1.3468215205180822, + "learning_rate": 9.299758519826274e-07, + "loss": 0.2430594563484192, + "step": 6543 + }, + { + "epoch": 1.7377506307263313, + "grad_norm": 1.4072339591675835, + "learning_rate": 9.281276634089609e-07, + "loss": 0.24799269437789917, + "step": 6544 + }, + { + "epoch": 1.7380161997078742, + "grad_norm": 1.3533264573117185, + "learning_rate": 9.26281223785287e-07, + "loss": 0.24756166338920593, + "step": 6545 + }, + { + "epoch": 1.7382817686894172, + "grad_norm": 1.281195516970091, + "learning_rate": 9.244365334675787e-07, + "loss": 0.23465190827846527, + "step": 6546 + }, + { + "epoch": 1.7385473376709601, + "grad_norm": 1.22953964144765, + "learning_rate": 9.225935928114716e-07, + "loss": 0.2039640098810196, + "step": 6547 + }, + { + "epoch": 1.738812906652503, + "grad_norm": 1.3426382286400422, + "learning_rate": 9.207524021722602e-07, + "loss": 0.22304412722587585, + "step": 6548 + }, + { + "epoch": 1.739078475634046, + "grad_norm": 1.2253196898929546, + "learning_rate": 9.189129619049064e-07, + "loss": 0.19985908269882202, + "step": 6549 + }, + { + "epoch": 1.739344044615589, + "grad_norm": 1.3354963919439176, + "learning_rate": 9.17075272364032e-07, + "loss": 0.2335432469844818, + "step": 6550 + }, + { + "epoch": 1.739609613597132, + "grad_norm": 1.6822196536181961, + "learning_rate": 9.152393339039223e-07, + "loss": 0.2313593327999115, + "step": 6551 + }, + { + "epoch": 1.739875182578675, + "grad_norm": 1.310977344619443, + "learning_rate": 9.134051468785243e-07, + "loss": 0.2320600152015686, + "step": 6552 + }, + { + "epoch": 1.7401407515602179, + "grad_norm": 1.0942022372096942, + "learning_rate": 9.115727116414475e-07, + "loss": 0.1870848387479782, + "step": 6553 + }, + { + "epoch": 1.7404063205417608, + "grad_norm": 1.340037469005655, + "learning_rate": 9.097420285459635e-07, + "loss": 0.22922812402248383, + "step": 6554 + }, + { + "epoch": 1.7406718895233038, + "grad_norm": 1.3705243227438364, + "learning_rate": 9.079130979450068e-07, + "loss": 0.2505050301551819, + "step": 6555 + }, + { + "epoch": 1.7409374585048467, + "grad_norm": 1.3187608464438627, + "learning_rate": 9.060859201911732e-07, + "loss": 0.20445439219474792, + "step": 6556 + }, + { + "epoch": 1.7412030274863897, + "grad_norm": 1.1489822386745985, + "learning_rate": 9.042604956367218e-07, + "loss": 0.22338441014289856, + "step": 6557 + }, + { + "epoch": 1.7414685964679326, + "grad_norm": 1.2900464387857213, + "learning_rate": 9.024368246335735e-07, + "loss": 0.24923941493034363, + "step": 6558 + }, + { + "epoch": 1.7417341654494756, + "grad_norm": 1.3383952744906746, + "learning_rate": 9.006149075333071e-07, + "loss": 0.22842931747436523, + "step": 6559 + }, + { + "epoch": 1.7419997344310185, + "grad_norm": 1.391145524863548, + "learning_rate": 8.987947446871703e-07, + "loss": 0.22451579570770264, + "step": 6560 + }, + { + "epoch": 1.7422653034125615, + "grad_norm": 1.3218089225892669, + "learning_rate": 8.969763364460682e-07, + "loss": 0.2521047592163086, + "step": 6561 + }, + { + "epoch": 1.7425308723941044, + "grad_norm": 1.1675892500249985, + "learning_rate": 8.951596831605691e-07, + "loss": 0.25001099705696106, + "step": 6562 + }, + { + "epoch": 1.7427964413756474, + "grad_norm": 1.175521207104519, + "learning_rate": 8.933447851809007e-07, + "loss": 0.19592508673667908, + "step": 6563 + }, + { + "epoch": 1.7430620103571903, + "grad_norm": 1.399887131584603, + "learning_rate": 8.915316428569554e-07, + "loss": 0.2785179018974304, + "step": 6564 + }, + { + "epoch": 1.7433275793387333, + "grad_norm": 1.1688351316361159, + "learning_rate": 8.897202565382845e-07, + "loss": 0.20700594782829285, + "step": 6565 + }, + { + "epoch": 1.7435931483202762, + "grad_norm": 1.2225569857896341, + "learning_rate": 8.879106265741044e-07, + "loss": 0.253167062997818, + "step": 6566 + }, + { + "epoch": 1.7438587173018192, + "grad_norm": 1.4278912909015264, + "learning_rate": 8.861027533132859e-07, + "loss": 0.27672937512397766, + "step": 6567 + }, + { + "epoch": 1.7441242862833621, + "grad_norm": 1.3136368448280313, + "learning_rate": 8.842966371043671e-07, + "loss": 0.23050950467586517, + "step": 6568 + }, + { + "epoch": 1.744389855264905, + "grad_norm": 1.2790658189865058, + "learning_rate": 8.824922782955481e-07, + "loss": 0.23529425263404846, + "step": 6569 + }, + { + "epoch": 1.744655424246448, + "grad_norm": 1.2887213562899031, + "learning_rate": 8.806896772346873e-07, + "loss": 0.21803250908851624, + "step": 6570 + }, + { + "epoch": 1.744920993227991, + "grad_norm": 1.3669961004756481, + "learning_rate": 8.788888342693047e-07, + "loss": 0.24237293004989624, + "step": 6571 + }, + { + "epoch": 1.745186562209534, + "grad_norm": 1.1957319745445254, + "learning_rate": 8.770897497465803e-07, + "loss": 0.2008107602596283, + "step": 6572 + }, + { + "epoch": 1.745452131191077, + "grad_norm": 1.2693790937709173, + "learning_rate": 8.752924240133587e-07, + "loss": 0.23106279969215393, + "step": 6573 + }, + { + "epoch": 1.7457177001726198, + "grad_norm": 1.377716829660982, + "learning_rate": 8.734968574161406e-07, + "loss": 0.23726215958595276, + "step": 6574 + }, + { + "epoch": 1.7459832691541628, + "grad_norm": 1.211024095215965, + "learning_rate": 8.717030503010915e-07, + "loss": 0.26349812746047974, + "step": 6575 + }, + { + "epoch": 1.7462488381357057, + "grad_norm": 1.2871963140003055, + "learning_rate": 8.699110030140367e-07, + "loss": 0.23226451873779297, + "step": 6576 + }, + { + "epoch": 1.7465144071172487, + "grad_norm": 1.3173524718115384, + "learning_rate": 8.68120715900459e-07, + "loss": 0.22188402712345123, + "step": 6577 + }, + { + "epoch": 1.7467799760987917, + "grad_norm": 1.2367242455559135, + "learning_rate": 8.663321893055087e-07, + "loss": 0.21238234639167786, + "step": 6578 + }, + { + "epoch": 1.7470455450803346, + "grad_norm": 1.3423960800972676, + "learning_rate": 8.645454235739903e-07, + "loss": 0.2700675427913666, + "step": 6579 + }, + { + "epoch": 1.7473111140618776, + "grad_norm": 1.2737029023524005, + "learning_rate": 8.627604190503714e-07, + "loss": 0.24463894963264465, + "step": 6580 + }, + { + "epoch": 1.7475766830434205, + "grad_norm": 1.2537801110870739, + "learning_rate": 8.609771760787822e-07, + "loss": 0.23429079353809357, + "step": 6581 + }, + { + "epoch": 1.7478422520249635, + "grad_norm": 1.342775712878445, + "learning_rate": 8.591956950030067e-07, + "loss": 0.21767663955688477, + "step": 6582 + }, + { + "epoch": 1.7481078210065064, + "grad_norm": 1.3390334282971272, + "learning_rate": 8.574159761664957e-07, + "loss": 0.2499813735485077, + "step": 6583 + }, + { + "epoch": 1.7483733899880494, + "grad_norm": 1.471955255689367, + "learning_rate": 8.556380199123582e-07, + "loss": 0.28065958619117737, + "step": 6584 + }, + { + "epoch": 1.7486389589695923, + "grad_norm": 1.3012440070718, + "learning_rate": 8.538618265833621e-07, + "loss": 0.2166985273361206, + "step": 6585 + }, + { + "epoch": 1.7489045279511353, + "grad_norm": 1.2228700023368582, + "learning_rate": 8.520873965219356e-07, + "loss": 0.22835782170295715, + "step": 6586 + }, + { + "epoch": 1.7491700969326782, + "grad_norm": 1.2209097376008975, + "learning_rate": 8.503147300701709e-07, + "loss": 0.23575961589813232, + "step": 6587 + }, + { + "epoch": 1.7494356659142212, + "grad_norm": 1.1275514661567778, + "learning_rate": 8.485438275698154e-07, + "loss": 0.183369442820549, + "step": 6588 + }, + { + "epoch": 1.7497012348957641, + "grad_norm": 1.519810508178025, + "learning_rate": 8.467746893622786e-07, + "loss": 0.2731352746486664, + "step": 6589 + }, + { + "epoch": 1.749966803877307, + "grad_norm": 1.2913957246056922, + "learning_rate": 8.450073157886296e-07, + "loss": 0.20177578926086426, + "step": 6590 + }, + { + "epoch": 1.75023237285885, + "grad_norm": 1.2742798574628598, + "learning_rate": 8.432417071895982e-07, + "loss": 0.21672385931015015, + "step": 6591 + }, + { + "epoch": 1.750497941840393, + "grad_norm": 1.370933216008306, + "learning_rate": 8.414778639055699e-07, + "loss": 0.2503831386566162, + "step": 6592 + }, + { + "epoch": 1.750763510821936, + "grad_norm": 1.2884133202144494, + "learning_rate": 8.397157862765959e-07, + "loss": 0.2427521049976349, + "step": 6593 + }, + { + "epoch": 1.7510290798034789, + "grad_norm": 1.3424141731181953, + "learning_rate": 8.379554746423824e-07, + "loss": 0.23128533363342285, + "step": 6594 + }, + { + "epoch": 1.7512946487850218, + "grad_norm": 1.2353999110478557, + "learning_rate": 8.361969293422967e-07, + "loss": 0.2470957189798355, + "step": 6595 + }, + { + "epoch": 1.7515602177665648, + "grad_norm": 1.3335789710762707, + "learning_rate": 8.344401507153665e-07, + "loss": 0.29447510838508606, + "step": 6596 + }, + { + "epoch": 1.7518257867481077, + "grad_norm": 1.197223419032368, + "learning_rate": 8.326851391002777e-07, + "loss": 0.21585828065872192, + "step": 6597 + }, + { + "epoch": 1.7520913557296507, + "grad_norm": 1.2653558688292899, + "learning_rate": 8.30931894835375e-07, + "loss": 0.24081121385097504, + "step": 6598 + }, + { + "epoch": 1.7523569247111936, + "grad_norm": 1.3408805119391818, + "learning_rate": 8.291804182586638e-07, + "loss": 0.23052063584327698, + "step": 6599 + }, + { + "epoch": 1.7526224936927366, + "grad_norm": 1.2126901970374089, + "learning_rate": 8.274307097078093e-07, + "loss": 0.19008183479309082, + "step": 6600 + }, + { + "epoch": 1.7528880626742795, + "grad_norm": 1.3285441470167585, + "learning_rate": 8.25682769520132e-07, + "loss": 0.2632960379123688, + "step": 6601 + }, + { + "epoch": 1.7531536316558225, + "grad_norm": 1.4350439941988302, + "learning_rate": 8.239365980326175e-07, + "loss": 0.25958624482154846, + "step": 6602 + }, + { + "epoch": 1.7534192006373654, + "grad_norm": 1.304275360361708, + "learning_rate": 8.221921955819035e-07, + "loss": 0.22370605170726776, + "step": 6603 + }, + { + "epoch": 1.7536847696189084, + "grad_norm": 1.2385957043075924, + "learning_rate": 8.204495625042919e-07, + "loss": 0.22018703818321228, + "step": 6604 + }, + { + "epoch": 1.7539503386004514, + "grad_norm": 1.3626754196729718, + "learning_rate": 8.187086991357418e-07, + "loss": 0.26802191138267517, + "step": 6605 + }, + { + "epoch": 1.7542159075819943, + "grad_norm": 1.5313825040978437, + "learning_rate": 8.169696058118725e-07, + "loss": 0.21560518443584442, + "step": 6606 + }, + { + "epoch": 1.7544814765635373, + "grad_norm": 1.270508998157205, + "learning_rate": 8.152322828679593e-07, + "loss": 0.23222430050373077, + "step": 6607 + }, + { + "epoch": 1.7547470455450802, + "grad_norm": 1.1542994886817455, + "learning_rate": 8.134967306389374e-07, + "loss": 0.17638427019119263, + "step": 6608 + }, + { + "epoch": 1.7550126145266232, + "grad_norm": 1.3257823658984844, + "learning_rate": 8.117629494594015e-07, + "loss": 0.21539513766765594, + "step": 6609 + }, + { + "epoch": 1.7552781835081661, + "grad_norm": 1.3431199934216977, + "learning_rate": 8.100309396636031e-07, + "loss": 0.2265736162662506, + "step": 6610 + }, + { + "epoch": 1.755543752489709, + "grad_norm": 1.3478032961337874, + "learning_rate": 8.083007015854549e-07, + "loss": 0.2688787281513214, + "step": 6611 + }, + { + "epoch": 1.755809321471252, + "grad_norm": 1.3027271078273857, + "learning_rate": 8.065722355585249e-07, + "loss": 0.19756367802619934, + "step": 6612 + }, + { + "epoch": 1.756074890452795, + "grad_norm": 1.3749986253881121, + "learning_rate": 8.048455419160405e-07, + "loss": 0.19934290647506714, + "step": 6613 + }, + { + "epoch": 1.756340459434338, + "grad_norm": 1.5756000064179743, + "learning_rate": 8.031206209908904e-07, + "loss": 0.2523588538169861, + "step": 6614 + }, + { + "epoch": 1.7566060284158809, + "grad_norm": 1.2988900493114706, + "learning_rate": 8.01397473115616e-07, + "loss": 0.22825747728347778, + "step": 6615 + }, + { + "epoch": 1.7568715973974238, + "grad_norm": 1.3238944187902402, + "learning_rate": 7.996760986224228e-07, + "loss": 0.24525251984596252, + "step": 6616 + }, + { + "epoch": 1.7571371663789668, + "grad_norm": 1.366323962207031, + "learning_rate": 7.979564978431687e-07, + "loss": 0.21883559226989746, + "step": 6617 + }, + { + "epoch": 1.7574027353605097, + "grad_norm": 1.5827948860142422, + "learning_rate": 7.96238671109374e-07, + "loss": 0.2642098069190979, + "step": 6618 + }, + { + "epoch": 1.757668304342053, + "grad_norm": 1.3345016667633411, + "learning_rate": 7.945226187522159e-07, + "loss": 0.24094998836517334, + "step": 6619 + }, + { + "epoch": 1.7579338733235959, + "grad_norm": 1.2243450261876818, + "learning_rate": 7.928083411025278e-07, + "loss": 0.2225762903690338, + "step": 6620 + }, + { + "epoch": 1.7581994423051388, + "grad_norm": 1.2991544127435968, + "learning_rate": 7.910958384908041e-07, + "loss": 0.26722851395606995, + "step": 6621 + }, + { + "epoch": 1.7584650112866818, + "grad_norm": 1.3206157533666447, + "learning_rate": 7.893851112471907e-07, + "loss": 0.2176910787820816, + "step": 6622 + }, + { + "epoch": 1.7587305802682247, + "grad_norm": 1.3618122023344794, + "learning_rate": 7.876761597015003e-07, + "loss": 0.20261354744434357, + "step": 6623 + }, + { + "epoch": 1.7589961492497677, + "grad_norm": 1.1728416456458601, + "learning_rate": 7.859689841831975e-07, + "loss": 0.23314467072486877, + "step": 6624 + }, + { + "epoch": 1.7592617182313106, + "grad_norm": 1.3115277523344588, + "learning_rate": 7.842635850214054e-07, + "loss": 0.19854989647865295, + "step": 6625 + }, + { + "epoch": 1.7595272872128536, + "grad_norm": 1.2614486006783794, + "learning_rate": 7.825599625449043e-07, + "loss": 0.2422565519809723, + "step": 6626 + }, + { + "epoch": 1.7597928561943965, + "grad_norm": 1.342773057026848, + "learning_rate": 7.808581170821328e-07, + "loss": 0.27029529213905334, + "step": 6627 + }, + { + "epoch": 1.7600584251759395, + "grad_norm": 1.1918292148332001, + "learning_rate": 7.791580489611872e-07, + "loss": 0.23596832156181335, + "step": 6628 + }, + { + "epoch": 1.7603239941574824, + "grad_norm": 1.2062344481848934, + "learning_rate": 7.774597585098198e-07, + "loss": 0.218271404504776, + "step": 6629 + }, + { + "epoch": 1.7605895631390254, + "grad_norm": 1.3762692469809215, + "learning_rate": 7.75763246055441e-07, + "loss": 0.2551255226135254, + "step": 6630 + }, + { + "epoch": 1.7608551321205683, + "grad_norm": 1.3049962391533094, + "learning_rate": 7.740685119251179e-07, + "loss": 0.24410653114318848, + "step": 6631 + }, + { + "epoch": 1.7611207011021113, + "grad_norm": 1.2577276419448338, + "learning_rate": 7.723755564455771e-07, + "loss": 0.23044872283935547, + "step": 6632 + }, + { + "epoch": 1.7613862700836542, + "grad_norm": 1.334208934461724, + "learning_rate": 7.706843799431985e-07, + "loss": 0.24569427967071533, + "step": 6633 + }, + { + "epoch": 1.7616518390651972, + "grad_norm": 1.1605227177029394, + "learning_rate": 7.689949827440224e-07, + "loss": 0.200277179479599, + "step": 6634 + }, + { + "epoch": 1.7619174080467401, + "grad_norm": 1.1742759165978003, + "learning_rate": 7.673073651737428e-07, + "loss": 0.19217821955680847, + "step": 6635 + }, + { + "epoch": 1.762182977028283, + "grad_norm": 1.281151649074766, + "learning_rate": 7.656215275577151e-07, + "loss": 0.227005273103714, + "step": 6636 + }, + { + "epoch": 1.762448546009826, + "grad_norm": 1.2211778988331632, + "learning_rate": 7.639374702209468e-07, + "loss": 0.21359863877296448, + "step": 6637 + }, + { + "epoch": 1.762714114991369, + "grad_norm": 1.267969218396632, + "learning_rate": 7.62255193488105e-07, + "loss": 0.24056711792945862, + "step": 6638 + }, + { + "epoch": 1.762979683972912, + "grad_norm": 1.28035138481303, + "learning_rate": 7.605746976835127e-07, + "loss": 0.20897413790225983, + "step": 6639 + }, + { + "epoch": 1.763245252954455, + "grad_norm": 1.2567764889990254, + "learning_rate": 7.588959831311493e-07, + "loss": 0.20395967364311218, + "step": 6640 + }, + { + "epoch": 1.7635108219359978, + "grad_norm": 1.4827108993688454, + "learning_rate": 7.572190501546517e-07, + "loss": 0.2334095984697342, + "step": 6641 + }, + { + "epoch": 1.7637763909175408, + "grad_norm": 1.3358734576215814, + "learning_rate": 7.555438990773134e-07, + "loss": 0.23892858624458313, + "step": 6642 + }, + { + "epoch": 1.7640419598990837, + "grad_norm": 1.3063666339869877, + "learning_rate": 7.538705302220839e-07, + "loss": 0.23515449464321136, + "step": 6643 + }, + { + "epoch": 1.7643075288806267, + "grad_norm": 1.1919354046726482, + "learning_rate": 7.521989439115674e-07, + "loss": 0.19728611409664154, + "step": 6644 + }, + { + "epoch": 1.7645730978621696, + "grad_norm": 1.2609989060636697, + "learning_rate": 7.505291404680281e-07, + "loss": 0.22277355194091797, + "step": 6645 + }, + { + "epoch": 1.7648386668437126, + "grad_norm": 1.2129119488866849, + "learning_rate": 7.488611202133822e-07, + "loss": 0.24117602407932281, + "step": 6646 + }, + { + "epoch": 1.7651042358252558, + "grad_norm": 1.3643314179100876, + "learning_rate": 7.471948834692045e-07, + "loss": 0.24675750732421875, + "step": 6647 + }, + { + "epoch": 1.7653698048067987, + "grad_norm": 1.3261352525807495, + "learning_rate": 7.455304305567279e-07, + "loss": 0.2413899004459381, + "step": 6648 + }, + { + "epoch": 1.7656353737883417, + "grad_norm": 1.3357210816225529, + "learning_rate": 7.438677617968348e-07, + "loss": 0.22125428915023804, + "step": 6649 + }, + { + "epoch": 1.7659009427698846, + "grad_norm": 1.2099689083776513, + "learning_rate": 7.422068775100732e-07, + "loss": 0.205051988363266, + "step": 6650 + }, + { + "epoch": 1.7661665117514276, + "grad_norm": 1.2734255069971199, + "learning_rate": 7.405477780166415e-07, + "loss": 0.23711715638637543, + "step": 6651 + }, + { + "epoch": 1.7664320807329705, + "grad_norm": 1.4063590395204508, + "learning_rate": 7.388904636363914e-07, + "loss": 0.2591046988964081, + "step": 6652 + }, + { + "epoch": 1.7666976497145135, + "grad_norm": 1.4323150626725398, + "learning_rate": 7.372349346888363e-07, + "loss": 0.24837243556976318, + "step": 6653 + }, + { + "epoch": 1.7669632186960564, + "grad_norm": 1.1492996795155954, + "learning_rate": 7.35581191493141e-07, + "loss": 0.20910412073135376, + "step": 6654 + }, + { + "epoch": 1.7672287876775994, + "grad_norm": 1.113119722429438, + "learning_rate": 7.339292343681282e-07, + "loss": 0.2056204229593277, + "step": 6655 + }, + { + "epoch": 1.7674943566591423, + "grad_norm": 1.2927092177897141, + "learning_rate": 7.322790636322764e-07, + "loss": 0.2496742308139801, + "step": 6656 + }, + { + "epoch": 1.7677599256406853, + "grad_norm": 1.3571185149739835, + "learning_rate": 7.306306796037188e-07, + "loss": 0.24432921409606934, + "step": 6657 + }, + { + "epoch": 1.7680254946222282, + "grad_norm": 1.3006085174415165, + "learning_rate": 7.289840826002414e-07, + "loss": 0.2492775321006775, + "step": 6658 + }, + { + "epoch": 1.7682910636037712, + "grad_norm": 1.3256617876861967, + "learning_rate": 7.273392729392936e-07, + "loss": 0.22673827409744263, + "step": 6659 + }, + { + "epoch": 1.7685566325853141, + "grad_norm": 1.3730978211523115, + "learning_rate": 7.25696250937975e-07, + "loss": 0.2225622981786728, + "step": 6660 + }, + { + "epoch": 1.768822201566857, + "grad_norm": 1.2296766172450786, + "learning_rate": 7.240550169130378e-07, + "loss": 0.24896883964538574, + "step": 6661 + }, + { + "epoch": 1.7690877705484, + "grad_norm": 1.2103035123370711, + "learning_rate": 7.224155711808923e-07, + "loss": 0.2395302951335907, + "step": 6662 + }, + { + "epoch": 1.769353339529943, + "grad_norm": 1.2658162555194572, + "learning_rate": 7.207779140576066e-07, + "loss": 0.2255886197090149, + "step": 6663 + }, + { + "epoch": 1.769618908511486, + "grad_norm": 1.2518907529925698, + "learning_rate": 7.191420458589005e-07, + "loss": 0.24029678106307983, + "step": 6664 + }, + { + "epoch": 1.769884477493029, + "grad_norm": 1.1016484922093457, + "learning_rate": 7.175079669001506e-07, + "loss": 0.19399142265319824, + "step": 6665 + }, + { + "epoch": 1.7701500464745719, + "grad_norm": 1.2291425924678119, + "learning_rate": 7.158756774963882e-07, + "loss": 0.24569162726402283, + "step": 6666 + }, + { + "epoch": 1.7704156154561148, + "grad_norm": 1.2180012837263907, + "learning_rate": 7.142451779622971e-07, + "loss": 0.2484329342842102, + "step": 6667 + }, + { + "epoch": 1.7706811844376578, + "grad_norm": 1.2505833357389051, + "learning_rate": 7.126164686122216e-07, + "loss": 0.24423512816429138, + "step": 6668 + }, + { + "epoch": 1.7709467534192007, + "grad_norm": 1.1277554918017485, + "learning_rate": 7.109895497601571e-07, + "loss": 0.20146678388118744, + "step": 6669 + }, + { + "epoch": 1.7712123224007437, + "grad_norm": 1.2945002187740315, + "learning_rate": 7.093644217197526e-07, + "loss": 0.23329001665115356, + "step": 6670 + }, + { + "epoch": 1.7714778913822866, + "grad_norm": 1.1689758736288713, + "learning_rate": 7.077410848043165e-07, + "loss": 0.2290019690990448, + "step": 6671 + }, + { + "epoch": 1.7717434603638296, + "grad_norm": 1.2744441159542537, + "learning_rate": 7.061195393268061e-07, + "loss": 0.2329377382993698, + "step": 6672 + }, + { + "epoch": 1.7720090293453725, + "grad_norm": 1.1430677052322078, + "learning_rate": 7.04499785599837e-07, + "loss": 0.21513575315475464, + "step": 6673 + }, + { + "epoch": 1.7722745983269155, + "grad_norm": 1.1659646021132744, + "learning_rate": 7.028818239356794e-07, + "loss": 0.19022463262081146, + "step": 6674 + }, + { + "epoch": 1.7725401673084584, + "grad_norm": 1.2837523861206293, + "learning_rate": 7.012656546462571e-07, + "loss": 0.2097887396812439, + "step": 6675 + }, + { + "epoch": 1.7728057362900014, + "grad_norm": 1.3991640357566577, + "learning_rate": 6.996512780431486e-07, + "loss": 0.2559792101383209, + "step": 6676 + }, + { + "epoch": 1.7730713052715443, + "grad_norm": 1.3219531410357084, + "learning_rate": 6.980386944375849e-07, + "loss": 0.24624274671077728, + "step": 6677 + }, + { + "epoch": 1.7733368742530873, + "grad_norm": 1.2405076465604956, + "learning_rate": 6.964279041404553e-07, + "loss": 0.22904372215270996, + "step": 6678 + }, + { + "epoch": 1.7736024432346302, + "grad_norm": 1.216707646052236, + "learning_rate": 6.948189074623002e-07, + "loss": 0.20808623731136322, + "step": 6679 + }, + { + "epoch": 1.7738680122161732, + "grad_norm": 1.229477200185015, + "learning_rate": 6.932117047133158e-07, + "loss": 0.1931435763835907, + "step": 6680 + }, + { + "epoch": 1.7741335811977161, + "grad_norm": 1.2962984681963328, + "learning_rate": 6.91606296203351e-07, + "loss": 0.22938531637191772, + "step": 6681 + }, + { + "epoch": 1.774399150179259, + "grad_norm": 1.2921857742770726, + "learning_rate": 6.900026822419103e-07, + "loss": 0.240365132689476, + "step": 6682 + }, + { + "epoch": 1.774664719160802, + "grad_norm": 1.3560359754116593, + "learning_rate": 6.8840086313815e-07, + "loss": 0.26665499806404114, + "step": 6683 + }, + { + "epoch": 1.774930288142345, + "grad_norm": 1.1827095382370005, + "learning_rate": 6.86800839200884e-07, + "loss": 0.19775834679603577, + "step": 6684 + }, + { + "epoch": 1.775195857123888, + "grad_norm": 1.2698613362606737, + "learning_rate": 6.852026107385756e-07, + "loss": 0.20334021747112274, + "step": 6685 + }, + { + "epoch": 1.775461426105431, + "grad_norm": 1.1845529296493982, + "learning_rate": 6.836061780593484e-07, + "loss": 0.20670340955257416, + "step": 6686 + }, + { + "epoch": 1.7757269950869738, + "grad_norm": 1.2940248868651125, + "learning_rate": 6.820115414709727e-07, + "loss": 0.2033209353685379, + "step": 6687 + }, + { + "epoch": 1.7759925640685168, + "grad_norm": 1.101442360403221, + "learning_rate": 6.804187012808761e-07, + "loss": 0.23827815055847168, + "step": 6688 + }, + { + "epoch": 1.7762581330500598, + "grad_norm": 1.200357834005043, + "learning_rate": 6.788276577961394e-07, + "loss": 0.2054731547832489, + "step": 6689 + }, + { + "epoch": 1.7765237020316027, + "grad_norm": 1.3006753644657554, + "learning_rate": 6.772384113234987e-07, + "loss": 0.25553691387176514, + "step": 6690 + }, + { + "epoch": 1.7767892710131457, + "grad_norm": 1.2800516387465457, + "learning_rate": 6.756509621693385e-07, + "loss": 0.23650874197483063, + "step": 6691 + }, + { + "epoch": 1.7770548399946886, + "grad_norm": 1.2987358367196533, + "learning_rate": 6.740653106397033e-07, + "loss": 0.2353624701499939, + "step": 6692 + }, + { + "epoch": 1.7773204089762316, + "grad_norm": 1.3578478166739052, + "learning_rate": 6.724814570402871e-07, + "loss": 0.26034629344940186, + "step": 6693 + }, + { + "epoch": 1.7775859779577745, + "grad_norm": 1.2070636800070726, + "learning_rate": 6.70899401676438e-07, + "loss": 0.2272130399942398, + "step": 6694 + }, + { + "epoch": 1.7778515469393175, + "grad_norm": 1.353295285146214, + "learning_rate": 6.693191448531589e-07, + "loss": 0.27940404415130615, + "step": 6695 + }, + { + "epoch": 1.7781171159208604, + "grad_norm": 1.2726244327901954, + "learning_rate": 6.677406868751013e-07, + "loss": 0.22997702658176422, + "step": 6696 + }, + { + "epoch": 1.7783826849024034, + "grad_norm": 1.2569026906720413, + "learning_rate": 6.661640280465775e-07, + "loss": 0.22918452322483063, + "step": 6697 + }, + { + "epoch": 1.7786482538839463, + "grad_norm": 1.2456580683228033, + "learning_rate": 6.645891686715456e-07, + "loss": 0.18456090986728668, + "step": 6698 + }, + { + "epoch": 1.7789138228654893, + "grad_norm": 1.3290472252808803, + "learning_rate": 6.630161090536214e-07, + "loss": 0.23256534337997437, + "step": 6699 + }, + { + "epoch": 1.7791793918470322, + "grad_norm": 1.2224316750050632, + "learning_rate": 6.614448494960713e-07, + "loss": 0.21171879768371582, + "step": 6700 + }, + { + "epoch": 1.7794449608285752, + "grad_norm": 1.201224789246079, + "learning_rate": 6.598753903018163e-07, + "loss": 0.21382400393486023, + "step": 6701 + }, + { + "epoch": 1.7797105298101181, + "grad_norm": 1.2240177347792593, + "learning_rate": 6.583077317734299e-07, + "loss": 0.22954748570919037, + "step": 6702 + }, + { + "epoch": 1.779976098791661, + "grad_norm": 1.519530195710278, + "learning_rate": 6.56741874213136e-07, + "loss": 0.25691086053848267, + "step": 6703 + }, + { + "epoch": 1.780241667773204, + "grad_norm": 1.4662002194098382, + "learning_rate": 6.551778179228174e-07, + "loss": 0.23413901031017303, + "step": 6704 + }, + { + "epoch": 1.780507236754747, + "grad_norm": 1.2775019242293946, + "learning_rate": 6.536155632040031e-07, + "loss": 0.2493733912706375, + "step": 6705 + }, + { + "epoch": 1.78077280573629, + "grad_norm": 1.2512747936457356, + "learning_rate": 6.520551103578776e-07, + "loss": 0.26094138622283936, + "step": 6706 + }, + { + "epoch": 1.7810383747178329, + "grad_norm": 1.3016608765448805, + "learning_rate": 6.504964596852781e-07, + "loss": 0.23509518802165985, + "step": 6707 + }, + { + "epoch": 1.7813039436993758, + "grad_norm": 1.4726929969063267, + "learning_rate": 6.489396114866942e-07, + "loss": 0.2471122294664383, + "step": 6708 + }, + { + "epoch": 1.7815695126809188, + "grad_norm": 1.3034668854019054, + "learning_rate": 6.47384566062268e-07, + "loss": 0.2363303005695343, + "step": 6709 + }, + { + "epoch": 1.7818350816624617, + "grad_norm": 1.1801501968168786, + "learning_rate": 6.458313237117953e-07, + "loss": 0.18868233263492584, + "step": 6710 + }, + { + "epoch": 1.7821006506440047, + "grad_norm": 1.3437880175802723, + "learning_rate": 6.442798847347187e-07, + "loss": 0.23380546271800995, + "step": 6711 + }, + { + "epoch": 1.7823662196255476, + "grad_norm": 1.471740030592424, + "learning_rate": 6.42730249430139e-07, + "loss": 0.24112167954444885, + "step": 6712 + }, + { + "epoch": 1.7826317886070906, + "grad_norm": 1.2664184946697812, + "learning_rate": 6.411824180968096e-07, + "loss": 0.2397521436214447, + "step": 6713 + }, + { + "epoch": 1.7828973575886335, + "grad_norm": 1.309174308390434, + "learning_rate": 6.396363910331338e-07, + "loss": 0.23775406181812286, + "step": 6714 + }, + { + "epoch": 1.7831629265701765, + "grad_norm": 1.4327166340451307, + "learning_rate": 6.380921685371655e-07, + "loss": 0.23278602957725525, + "step": 6715 + }, + { + "epoch": 1.7834284955517195, + "grad_norm": 1.1135605228940266, + "learning_rate": 6.365497509066143e-07, + "loss": 0.20028996467590332, + "step": 6716 + }, + { + "epoch": 1.7836940645332624, + "grad_norm": 1.146963533940078, + "learning_rate": 6.35009138438839e-07, + "loss": 0.20862875878810883, + "step": 6717 + }, + { + "epoch": 1.7839596335148054, + "grad_norm": 1.3257848293601993, + "learning_rate": 6.334703314308521e-07, + "loss": 0.23522542417049408, + "step": 6718 + }, + { + "epoch": 1.7842252024963483, + "grad_norm": 1.2172150430538355, + "learning_rate": 6.319333301793173e-07, + "loss": 0.24633824825286865, + "step": 6719 + }, + { + "epoch": 1.7844907714778913, + "grad_norm": 1.3131451310460658, + "learning_rate": 6.30398134980551e-07, + "loss": 0.22141410410404205, + "step": 6720 + }, + { + "epoch": 1.7847563404594342, + "grad_norm": 1.3593079444355614, + "learning_rate": 6.288647461305186e-07, + "loss": 0.23313754796981812, + "step": 6721 + }, + { + "epoch": 1.7850219094409772, + "grad_norm": 1.2751593889081192, + "learning_rate": 6.273331639248414e-07, + "loss": 0.22015389800071716, + "step": 6722 + }, + { + "epoch": 1.7852874784225201, + "grad_norm": 1.2716859790694561, + "learning_rate": 6.258033886587911e-07, + "loss": 0.21154522895812988, + "step": 6723 + }, + { + "epoch": 1.785553047404063, + "grad_norm": 1.3319130935282857, + "learning_rate": 6.242754206272883e-07, + "loss": 0.2320503294467926, + "step": 6724 + }, + { + "epoch": 1.785818616385606, + "grad_norm": 1.2016740259413836, + "learning_rate": 6.227492601249097e-07, + "loss": 0.21778921782970428, + "step": 6725 + }, + { + "epoch": 1.786084185367149, + "grad_norm": 1.2321504813505204, + "learning_rate": 6.212249074458776e-07, + "loss": 0.2368871569633484, + "step": 6726 + }, + { + "epoch": 1.786349754348692, + "grad_norm": 1.5195368545073897, + "learning_rate": 6.197023628840704e-07, + "loss": 0.27269479632377625, + "step": 6727 + }, + { + "epoch": 1.7866153233302349, + "grad_norm": 1.2744130185555103, + "learning_rate": 6.181816267330177e-07, + "loss": 0.2414151132106781, + "step": 6728 + }, + { + "epoch": 1.7868808923117778, + "grad_norm": 1.1197825562175172, + "learning_rate": 6.166626992858993e-07, + "loss": 0.2156972736120224, + "step": 6729 + }, + { + "epoch": 1.7871464612933208, + "grad_norm": 1.2748992996552195, + "learning_rate": 6.151455808355455e-07, + "loss": 0.2510441541671753, + "step": 6730 + }, + { + "epoch": 1.787412030274864, + "grad_norm": 1.2924509412618195, + "learning_rate": 6.136302716744402e-07, + "loss": 0.20290088653564453, + "step": 6731 + }, + { + "epoch": 1.787677599256407, + "grad_norm": 1.3705736121123597, + "learning_rate": 6.121167720947174e-07, + "loss": 0.25088101625442505, + "step": 6732 + }, + { + "epoch": 1.7879431682379499, + "grad_norm": 1.3723338572382136, + "learning_rate": 6.106050823881604e-07, + "loss": 0.2566376328468323, + "step": 6733 + }, + { + "epoch": 1.7882087372194928, + "grad_norm": 1.1043772478174716, + "learning_rate": 6.09095202846206e-07, + "loss": 0.1882714033126831, + "step": 6734 + }, + { + "epoch": 1.7884743062010358, + "grad_norm": 1.2323780172305254, + "learning_rate": 6.075871337599404e-07, + "loss": 0.18705856800079346, + "step": 6735 + }, + { + "epoch": 1.7887398751825787, + "grad_norm": 1.1976910574931858, + "learning_rate": 6.060808754201031e-07, + "loss": 0.24756133556365967, + "step": 6736 + }, + { + "epoch": 1.7890054441641217, + "grad_norm": 1.3197777974144425, + "learning_rate": 6.045764281170818e-07, + "loss": 0.2537599205970764, + "step": 6737 + }, + { + "epoch": 1.7892710131456646, + "grad_norm": 1.330362234255321, + "learning_rate": 6.030737921409169e-07, + "loss": 0.22049202024936676, + "step": 6738 + }, + { + "epoch": 1.7895365821272076, + "grad_norm": 1.1222347914068396, + "learning_rate": 6.015729677812965e-07, + "loss": 0.20820394158363342, + "step": 6739 + }, + { + "epoch": 1.7898021511087505, + "grad_norm": 1.3153590716408405, + "learning_rate": 6.00073955327567e-07, + "loss": 0.2339879721403122, + "step": 6740 + }, + { + "epoch": 1.7900677200902935, + "grad_norm": 1.2483259153993207, + "learning_rate": 5.98576755068715e-07, + "loss": 0.22082161903381348, + "step": 6741 + }, + { + "epoch": 1.7903332890718364, + "grad_norm": 1.28162605766883, + "learning_rate": 5.97081367293385e-07, + "loss": 0.21883058547973633, + "step": 6742 + }, + { + "epoch": 1.7905988580533794, + "grad_norm": 1.1591166092235485, + "learning_rate": 5.955877922898712e-07, + "loss": 0.214680016040802, + "step": 6743 + }, + { + "epoch": 1.7908644270349223, + "grad_norm": 1.37628370977899, + "learning_rate": 5.940960303461152e-07, + "loss": 0.24533744156360626, + "step": 6744 + }, + { + "epoch": 1.7911299960164653, + "grad_norm": 1.3046535737377691, + "learning_rate": 5.926060817497137e-07, + "loss": 0.19857585430145264, + "step": 6745 + }, + { + "epoch": 1.7913955649980082, + "grad_norm": 1.4468975368000232, + "learning_rate": 5.911179467879081e-07, + "loss": 0.27493876218795776, + "step": 6746 + }, + { + "epoch": 1.7916611339795512, + "grad_norm": 1.1490145590407708, + "learning_rate": 5.896316257475954e-07, + "loss": 0.20560544729232788, + "step": 6747 + }, + { + "epoch": 1.7919267029610941, + "grad_norm": 1.2213631424870741, + "learning_rate": 5.881471189153199e-07, + "loss": 0.23559418320655823, + "step": 6748 + }, + { + "epoch": 1.792192271942637, + "grad_norm": 1.3144055462601232, + "learning_rate": 5.866644265772769e-07, + "loss": 0.23055103421211243, + "step": 6749 + }, + { + "epoch": 1.79245784092418, + "grad_norm": 1.4747052812755685, + "learning_rate": 5.851835490193136e-07, + "loss": 0.2780724763870239, + "step": 6750 + }, + { + "epoch": 1.792723409905723, + "grad_norm": 1.2354333862915858, + "learning_rate": 5.837044865269248e-07, + "loss": 0.20216618478298187, + "step": 6751 + }, + { + "epoch": 1.792988978887266, + "grad_norm": 1.308066661539038, + "learning_rate": 5.822272393852557e-07, + "loss": 0.2289930284023285, + "step": 6752 + }, + { + "epoch": 1.793254547868809, + "grad_norm": 1.2952454297764495, + "learning_rate": 5.80751807879103e-07, + "loss": 0.2028929740190506, + "step": 6753 + }, + { + "epoch": 1.7935201168503518, + "grad_norm": 1.2960791997009702, + "learning_rate": 5.792781922929114e-07, + "loss": 0.1964842826128006, + "step": 6754 + }, + { + "epoch": 1.7937856858318948, + "grad_norm": 1.4512315838061285, + "learning_rate": 5.77806392910778e-07, + "loss": 0.2617039084434509, + "step": 6755 + }, + { + "epoch": 1.7940512548134377, + "grad_norm": 1.325466585449178, + "learning_rate": 5.76336410016447e-07, + "loss": 0.2582395374774933, + "step": 6756 + }, + { + "epoch": 1.7943168237949807, + "grad_norm": 1.2587701407069858, + "learning_rate": 5.74868243893314e-07, + "loss": 0.23379334807395935, + "step": 6757 + }, + { + "epoch": 1.7945823927765236, + "grad_norm": 1.2979435124807637, + "learning_rate": 5.734018948244247e-07, + "loss": 0.2376977801322937, + "step": 6758 + }, + { + "epoch": 1.7948479617580668, + "grad_norm": 1.414785341098569, + "learning_rate": 5.719373630924741e-07, + "loss": 0.21816037595272064, + "step": 6759 + }, + { + "epoch": 1.7951135307396098, + "grad_norm": 1.1404163081963787, + "learning_rate": 5.704746489798063e-07, + "loss": 0.22156387567520142, + "step": 6760 + }, + { + "epoch": 1.7953790997211527, + "grad_norm": 1.195358056085369, + "learning_rate": 5.690137527684147e-07, + "loss": 0.20818129181861877, + "step": 6761 + }, + { + "epoch": 1.7956446687026957, + "grad_norm": 1.1501993150491747, + "learning_rate": 5.67554674739944e-07, + "loss": 0.18672943115234375, + "step": 6762 + }, + { + "epoch": 1.7959102376842386, + "grad_norm": 1.2143392515173568, + "learning_rate": 5.66097415175686e-07, + "loss": 0.2023036777973175, + "step": 6763 + }, + { + "epoch": 1.7961758066657816, + "grad_norm": 1.3551091626165586, + "learning_rate": 5.646419743565845e-07, + "loss": 0.24798424541950226, + "step": 6764 + }, + { + "epoch": 1.7964413756473245, + "grad_norm": 1.2034553304236573, + "learning_rate": 5.631883525632297e-07, + "loss": 0.1885790377855301, + "step": 6765 + }, + { + "epoch": 1.7967069446288675, + "grad_norm": 1.3693229184747842, + "learning_rate": 5.617365500758631e-07, + "loss": 0.24120381474494934, + "step": 6766 + }, + { + "epoch": 1.7969725136104104, + "grad_norm": 1.2063823939207, + "learning_rate": 5.602865671743763e-07, + "loss": 0.24238690733909607, + "step": 6767 + }, + { + "epoch": 1.7972380825919534, + "grad_norm": 1.2611645650605894, + "learning_rate": 5.588384041383089e-07, + "loss": 0.22928190231323242, + "step": 6768 + }, + { + "epoch": 1.7975036515734963, + "grad_norm": 1.3148280979127052, + "learning_rate": 5.573920612468486e-07, + "loss": 0.2464730143547058, + "step": 6769 + }, + { + "epoch": 1.7977692205550393, + "grad_norm": 1.149985298163883, + "learning_rate": 5.559475387788348e-07, + "loss": 0.2167670875787735, + "step": 6770 + }, + { + "epoch": 1.7980347895365822, + "grad_norm": 1.3365719233561757, + "learning_rate": 5.545048370127526e-07, + "loss": 0.24080663919448853, + "step": 6771 + }, + { + "epoch": 1.7983003585181252, + "grad_norm": 1.3571891328346308, + "learning_rate": 5.530639562267382e-07, + "loss": 0.25481417775154114, + "step": 6772 + }, + { + "epoch": 1.7985659274996681, + "grad_norm": 1.3525822075957274, + "learning_rate": 5.51624896698576e-07, + "loss": 0.23328909277915955, + "step": 6773 + }, + { + "epoch": 1.798831496481211, + "grad_norm": 1.136424514008492, + "learning_rate": 5.50187658705702e-07, + "loss": 0.18779747188091278, + "step": 6774 + }, + { + "epoch": 1.799097065462754, + "grad_norm": 1.3089016035676113, + "learning_rate": 5.487522425251968e-07, + "loss": 0.24840545654296875, + "step": 6775 + }, + { + "epoch": 1.799362634444297, + "grad_norm": 1.4658187281761286, + "learning_rate": 5.473186484337911e-07, + "loss": 0.2559642791748047, + "step": 6776 + }, + { + "epoch": 1.79962820342584, + "grad_norm": 1.3714243263968933, + "learning_rate": 5.458868767078673e-07, + "loss": 0.2005981206893921, + "step": 6777 + }, + { + "epoch": 1.799893772407383, + "grad_norm": 1.4085177100377464, + "learning_rate": 5.444569276234523e-07, + "loss": 0.2480883002281189, + "step": 6778 + }, + { + "epoch": 1.8001593413889259, + "grad_norm": 1.2203856732153913, + "learning_rate": 5.430288014562235e-07, + "loss": 0.23043295741081238, + "step": 6779 + }, + { + "epoch": 1.8004249103704688, + "grad_norm": 1.4245462518797845, + "learning_rate": 5.416024984815072e-07, + "loss": 0.22702521085739136, + "step": 6780 + }, + { + "epoch": 1.8006904793520118, + "grad_norm": 1.153610007644359, + "learning_rate": 5.401780189742789e-07, + "loss": 0.19955751299858093, + "step": 6781 + }, + { + "epoch": 1.8009560483335547, + "grad_norm": 1.2560139759300732, + "learning_rate": 5.387553632091591e-07, + "loss": 0.19743162393569946, + "step": 6782 + }, + { + "epoch": 1.8012216173150977, + "grad_norm": 1.3072968250539403, + "learning_rate": 5.373345314604206e-07, + "loss": 0.2262525111436844, + "step": 6783 + }, + { + "epoch": 1.8014871862966406, + "grad_norm": 1.2987858405959638, + "learning_rate": 5.359155240019809e-07, + "loss": 0.249632328748703, + "step": 6784 + }, + { + "epoch": 1.8017527552781836, + "grad_norm": 1.1804135507002813, + "learning_rate": 5.344983411074111e-07, + "loss": 0.19300231337547302, + "step": 6785 + }, + { + "epoch": 1.8020183242597265, + "grad_norm": 1.293291337799575, + "learning_rate": 5.330829830499263e-07, + "loss": 0.22256134450435638, + "step": 6786 + }, + { + "epoch": 1.8022838932412695, + "grad_norm": 1.283065855572867, + "learning_rate": 5.316694501023911e-07, + "loss": 0.2666356563568115, + "step": 6787 + }, + { + "epoch": 1.8025494622228124, + "grad_norm": 1.239663996945653, + "learning_rate": 5.302577425373156e-07, + "loss": 0.223050057888031, + "step": 6788 + }, + { + "epoch": 1.8028150312043554, + "grad_norm": 1.3011452698852823, + "learning_rate": 5.288478606268632e-07, + "loss": 0.2298094481229782, + "step": 6789 + }, + { + "epoch": 1.8030806001858983, + "grad_norm": 1.4761708863150307, + "learning_rate": 5.27439804642843e-07, + "loss": 0.23596417903900146, + "step": 6790 + }, + { + "epoch": 1.8033461691674413, + "grad_norm": 1.226229776793909, + "learning_rate": 5.26033574856708e-07, + "loss": 0.19501623511314392, + "step": 6791 + }, + { + "epoch": 1.8036117381489842, + "grad_norm": 1.2825838070785722, + "learning_rate": 5.246291715395657e-07, + "loss": 0.23518472909927368, + "step": 6792 + }, + { + "epoch": 1.8038773071305272, + "grad_norm": 1.1820374841237484, + "learning_rate": 5.232265949621651e-07, + "loss": 0.2251899093389511, + "step": 6793 + }, + { + "epoch": 1.8041428761120701, + "grad_norm": 1.1527654541489951, + "learning_rate": 5.218258453949099e-07, + "loss": 0.1764119267463684, + "step": 6794 + }, + { + "epoch": 1.804408445093613, + "grad_norm": 1.2895741356204065, + "learning_rate": 5.204269231078484e-07, + "loss": 0.20768773555755615, + "step": 6795 + }, + { + "epoch": 1.804674014075156, + "grad_norm": 1.3841780370828203, + "learning_rate": 5.19029828370674e-07, + "loss": 0.2115546613931656, + "step": 6796 + }, + { + "epoch": 1.804939583056699, + "grad_norm": 1.315680847185169, + "learning_rate": 5.176345614527312e-07, + "loss": 0.2465972602367401, + "step": 6797 + }, + { + "epoch": 1.805205152038242, + "grad_norm": 1.379203464130328, + "learning_rate": 5.162411226230102e-07, + "loss": 0.2359803020954132, + "step": 6798 + }, + { + "epoch": 1.805470721019785, + "grad_norm": 1.4106819634653143, + "learning_rate": 5.148495121501506e-07, + "loss": 0.27518990635871887, + "step": 6799 + }, + { + "epoch": 1.8057362900013278, + "grad_norm": 1.3653410113402416, + "learning_rate": 5.134597303024391e-07, + "loss": 0.23914849758148193, + "step": 6800 + }, + { + "epoch": 1.8060018589828708, + "grad_norm": 1.256847668479307, + "learning_rate": 5.120717773478068e-07, + "loss": 0.21771098673343658, + "step": 6801 + }, + { + "epoch": 1.8062674279644138, + "grad_norm": 1.2716100664289411, + "learning_rate": 5.106856535538363e-07, + "loss": 0.235421285033226, + "step": 6802 + }, + { + "epoch": 1.8065329969459567, + "grad_norm": 1.4167241401735549, + "learning_rate": 5.093013591877561e-07, + "loss": 0.23973548412322998, + "step": 6803 + }, + { + "epoch": 1.8067985659274997, + "grad_norm": 1.484886222602596, + "learning_rate": 5.079188945164426e-07, + "loss": 0.24059349298477173, + "step": 6804 + }, + { + "epoch": 1.8070641349090426, + "grad_norm": 1.3840991454067133, + "learning_rate": 5.065382598064161e-07, + "loss": 0.25188207626342773, + "step": 6805 + }, + { + "epoch": 1.8073297038905856, + "grad_norm": 1.1866308474402574, + "learning_rate": 5.051594553238482e-07, + "loss": 0.20124536752700806, + "step": 6806 + }, + { + "epoch": 1.8075952728721285, + "grad_norm": 1.2234769875088154, + "learning_rate": 5.037824813345571e-07, + "loss": 0.2059330940246582, + "step": 6807 + }, + { + "epoch": 1.8078608418536715, + "grad_norm": 1.2468279665046458, + "learning_rate": 5.024073381040052e-07, + "loss": 0.2122621238231659, + "step": 6808 + }, + { + "epoch": 1.8081264108352144, + "grad_norm": 1.2203093249465347, + "learning_rate": 5.010340258973046e-07, + "loss": 0.20064303278923035, + "step": 6809 + }, + { + "epoch": 1.8083919798167574, + "grad_norm": 1.3685187895509534, + "learning_rate": 4.996625449792147e-07, + "loss": 0.24773281812667847, + "step": 6810 + }, + { + "epoch": 1.8086575487983003, + "grad_norm": 1.149837064877599, + "learning_rate": 4.982928956141375e-07, + "loss": 0.2111661732196808, + "step": 6811 + }, + { + "epoch": 1.8089231177798433, + "grad_norm": 1.2721912706796665, + "learning_rate": 4.969250780661306e-07, + "loss": 0.24823394417762756, + "step": 6812 + }, + { + "epoch": 1.8091886867613862, + "grad_norm": 1.410632443971984, + "learning_rate": 4.955590925988896e-07, + "loss": 0.24726605415344238, + "step": 6813 + }, + { + "epoch": 1.8094542557429292, + "grad_norm": 1.3112520269484638, + "learning_rate": 4.941949394757605e-07, + "loss": 0.2269962728023529, + "step": 6814 + }, + { + "epoch": 1.8097198247244721, + "grad_norm": 1.311172380903373, + "learning_rate": 4.928326189597377e-07, + "loss": 0.2336469292640686, + "step": 6815 + }, + { + "epoch": 1.809985393706015, + "grad_norm": 1.3372206959113173, + "learning_rate": 4.914721313134585e-07, + "loss": 0.24872124195098877, + "step": 6816 + }, + { + "epoch": 1.810250962687558, + "grad_norm": 1.3116570930981006, + "learning_rate": 4.901134767992099e-07, + "loss": 0.2484157383441925, + "step": 6817 + }, + { + "epoch": 1.810516531669101, + "grad_norm": 1.5234901533359522, + "learning_rate": 4.887566556789247e-07, + "loss": 0.24683158099651337, + "step": 6818 + }, + { + "epoch": 1.810782100650644, + "grad_norm": 1.1959899225802055, + "learning_rate": 4.874016682141802e-07, + "loss": 0.18717995285987854, + "step": 6819 + }, + { + "epoch": 1.8110476696321869, + "grad_norm": 1.2862771000886628, + "learning_rate": 4.860485146662053e-07, + "loss": 0.2220807671546936, + "step": 6820 + }, + { + "epoch": 1.8113132386137298, + "grad_norm": 1.196369102162481, + "learning_rate": 4.84697195295869e-07, + "loss": 0.2178400307893753, + "step": 6821 + }, + { + "epoch": 1.8115788075952728, + "grad_norm": 1.2250082051849178, + "learning_rate": 4.833477103636908e-07, + "loss": 0.2056645154953003, + "step": 6822 + }, + { + "epoch": 1.8118443765768157, + "grad_norm": 1.1729075702986809, + "learning_rate": 4.820000601298358e-07, + "loss": 0.21441905200481415, + "step": 6823 + }, + { + "epoch": 1.8121099455583587, + "grad_norm": 1.4445497728186703, + "learning_rate": 4.806542448541151e-07, + "loss": 0.17688237130641937, + "step": 6824 + }, + { + "epoch": 1.8123755145399016, + "grad_norm": 1.3216659704658935, + "learning_rate": 4.793102647959847e-07, + "loss": 0.22405505180358887, + "step": 6825 + }, + { + "epoch": 1.8126410835214446, + "grad_norm": 1.4226735460298432, + "learning_rate": 4.779681202145503e-07, + "loss": 0.21617908775806427, + "step": 6826 + }, + { + "epoch": 1.8129066525029875, + "grad_norm": 1.3284639992790963, + "learning_rate": 4.766278113685596e-07, + "loss": 0.23570871353149414, + "step": 6827 + }, + { + "epoch": 1.8131722214845305, + "grad_norm": 1.222373726415007, + "learning_rate": 4.7528933851641036e-07, + "loss": 0.23806743323802948, + "step": 6828 + }, + { + "epoch": 1.8134377904660735, + "grad_norm": 1.3312930220149763, + "learning_rate": 4.739527019161405e-07, + "loss": 0.24859179556369781, + "step": 6829 + }, + { + "epoch": 1.8137033594476164, + "grad_norm": 1.2143252342774762, + "learning_rate": 4.726179018254418e-07, + "loss": 0.21314260363578796, + "step": 6830 + }, + { + "epoch": 1.8139689284291594, + "grad_norm": 1.272910058647325, + "learning_rate": 4.7128493850164715e-07, + "loss": 0.25290659070014954, + "step": 6831 + }, + { + "epoch": 1.8142344974107023, + "grad_norm": 1.1800117497978073, + "learning_rate": 4.699538122017355e-07, + "loss": 0.22606703639030457, + "step": 6832 + }, + { + "epoch": 1.8145000663922453, + "grad_norm": 1.3037958158309495, + "learning_rate": 4.6862452318233275e-07, + "loss": 0.23973071575164795, + "step": 6833 + }, + { + "epoch": 1.8147656353737882, + "grad_norm": 1.2341358358957555, + "learning_rate": 4.672970716997094e-07, + "loss": 0.2225341498851776, + "step": 6834 + }, + { + "epoch": 1.8150312043553312, + "grad_norm": 1.441833447404081, + "learning_rate": 4.6597145800978183e-07, + "loss": 0.19153356552124023, + "step": 6835 + }, + { + "epoch": 1.8152967733368741, + "grad_norm": 1.2010339801105188, + "learning_rate": 4.646476823681145e-07, + "loss": 0.19694843888282776, + "step": 6836 + }, + { + "epoch": 1.815562342318417, + "grad_norm": 1.2719437537675773, + "learning_rate": 4.6332574502991554e-07, + "loss": 0.2353869527578354, + "step": 6837 + }, + { + "epoch": 1.81582791129996, + "grad_norm": 1.3504470280928214, + "learning_rate": 4.6200564625003775e-07, + "loss": 0.20919787883758545, + "step": 6838 + }, + { + "epoch": 1.816093480281503, + "grad_norm": 1.1775336742921327, + "learning_rate": 4.6068738628298193e-07, + "loss": 0.18352919816970825, + "step": 6839 + }, + { + "epoch": 1.816359049263046, + "grad_norm": 1.3571378213568392, + "learning_rate": 4.5937096538289147e-07, + "loss": 0.24711212515830994, + "step": 6840 + }, + { + "epoch": 1.8166246182445889, + "grad_norm": 1.2216287617055834, + "learning_rate": 4.580563838035579e-07, + "loss": 0.2350531816482544, + "step": 6841 + }, + { + "epoch": 1.8168901872261318, + "grad_norm": 1.3731447849726235, + "learning_rate": 4.5674364179841614e-07, + "loss": 0.26124465465545654, + "step": 6842 + }, + { + "epoch": 1.8171557562076748, + "grad_norm": 1.3819435677197398, + "learning_rate": 4.5543273962054934e-07, + "loss": 0.2110440880060196, + "step": 6843 + }, + { + "epoch": 1.817421325189218, + "grad_norm": 1.425540844923539, + "learning_rate": 4.5412367752268094e-07, + "loss": 0.2409415990114212, + "step": 6844 + }, + { + "epoch": 1.817686894170761, + "grad_norm": 1.2827549712815094, + "learning_rate": 4.528164557571857e-07, + "loss": 0.2280777543783188, + "step": 6845 + }, + { + "epoch": 1.8179524631523039, + "grad_norm": 1.111661347066374, + "learning_rate": 4.515110745760787e-07, + "loss": 0.201339989900589, + "step": 6846 + }, + { + "epoch": 1.8182180321338468, + "grad_norm": 1.2576623337538495, + "learning_rate": 4.5020753423102083e-07, + "loss": 0.22910752892494202, + "step": 6847 + }, + { + "epoch": 1.8184836011153898, + "grad_norm": 1.2835742527474332, + "learning_rate": 4.4890583497332327e-07, + "loss": 0.21736779808998108, + "step": 6848 + }, + { + "epoch": 1.8187491700969327, + "grad_norm": 1.282796826855034, + "learning_rate": 4.476059770539354e-07, + "loss": 0.20898449420928955, + "step": 6849 + }, + { + "epoch": 1.8190147390784757, + "grad_norm": 1.2514312774528749, + "learning_rate": 4.463079607234555e-07, + "loss": 0.22159051895141602, + "step": 6850 + }, + { + "epoch": 1.8192803080600186, + "grad_norm": 1.290667660986327, + "learning_rate": 4.450117862321246e-07, + "loss": 0.24081172049045563, + "step": 6851 + }, + { + "epoch": 1.8195458770415616, + "grad_norm": 1.2092663587603776, + "learning_rate": 4.4371745382983164e-07, + "loss": 0.17856758832931519, + "step": 6852 + }, + { + "epoch": 1.8198114460231045, + "grad_norm": 1.2002967167521004, + "learning_rate": 4.424249637661071e-07, + "loss": 0.20796868205070496, + "step": 6853 + }, + { + "epoch": 1.8200770150046475, + "grad_norm": 1.5683273026632796, + "learning_rate": 4.4113431629013046e-07, + "loss": 0.24277149140834808, + "step": 6854 + }, + { + "epoch": 1.8203425839861904, + "grad_norm": 1.1767967505464594, + "learning_rate": 4.3984551165071944e-07, + "loss": 0.19315838813781738, + "step": 6855 + }, + { + "epoch": 1.8206081529677334, + "grad_norm": 1.2457379727303777, + "learning_rate": 4.3855855009634075e-07, + "loss": 0.20789340138435364, + "step": 6856 + }, + { + "epoch": 1.8208737219492763, + "grad_norm": 1.4246348317049922, + "learning_rate": 4.372734318751082e-07, + "loss": 0.2871186137199402, + "step": 6857 + }, + { + "epoch": 1.8211392909308193, + "grad_norm": 1.3878283876849893, + "learning_rate": 4.359901572347758e-07, + "loss": 0.2419736236333847, + "step": 6858 + }, + { + "epoch": 1.8214048599123622, + "grad_norm": 1.3237602075469659, + "learning_rate": 4.3470872642274455e-07, + "loss": 0.2190292328596115, + "step": 6859 + }, + { + "epoch": 1.8216704288939052, + "grad_norm": 1.3879953178475168, + "learning_rate": 4.3342913968605903e-07, + "loss": 0.2654367685317993, + "step": 6860 + }, + { + "epoch": 1.8219359978754481, + "grad_norm": 1.3362249609314758, + "learning_rate": 4.321513972714075e-07, + "loss": 0.2536984086036682, + "step": 6861 + }, + { + "epoch": 1.822201566856991, + "grad_norm": 1.3804156416489965, + "learning_rate": 4.308754994251252e-07, + "loss": 0.260431170463562, + "step": 6862 + }, + { + "epoch": 1.822467135838534, + "grad_norm": 1.1376782237723586, + "learning_rate": 4.2960144639318855e-07, + "loss": 0.19348303973674774, + "step": 6863 + }, + { + "epoch": 1.822732704820077, + "grad_norm": 1.3505211109720399, + "learning_rate": 4.283292384212201e-07, + "loss": 0.2284386157989502, + "step": 6864 + }, + { + "epoch": 1.82299827380162, + "grad_norm": 1.2449697035186624, + "learning_rate": 4.270588757544869e-07, + "loss": 0.23439526557922363, + "step": 6865 + }, + { + "epoch": 1.823263842783163, + "grad_norm": 1.247098399621602, + "learning_rate": 4.2579035863790086e-07, + "loss": 0.2123441994190216, + "step": 6866 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 1.251423525262008, + "learning_rate": 4.245236873160163e-07, + "loss": 0.24568180739879608, + "step": 6867 + }, + { + "epoch": 1.8237949807462488, + "grad_norm": 1.4504253184377665, + "learning_rate": 4.232588620330325e-07, + "loss": 0.24078285694122314, + "step": 6868 + }, + { + "epoch": 1.8240605497277917, + "grad_norm": 1.157509101798501, + "learning_rate": 4.2199588303279414e-07, + "loss": 0.2003621608018875, + "step": 6869 + }, + { + "epoch": 1.8243261187093347, + "grad_norm": 1.3049050095763572, + "learning_rate": 4.2073475055878664e-07, + "loss": 0.21201889216899872, + "step": 6870 + }, + { + "epoch": 1.8245916876908777, + "grad_norm": 1.429124542908126, + "learning_rate": 4.1947546485414215e-07, + "loss": 0.23175427317619324, + "step": 6871 + }, + { + "epoch": 1.8248572566724208, + "grad_norm": 1.3101487536079581, + "learning_rate": 4.182180261616364e-07, + "loss": 0.2391383945941925, + "step": 6872 + }, + { + "epoch": 1.8251228256539638, + "grad_norm": 1.341869026992186, + "learning_rate": 4.169624347236878e-07, + "loss": 0.23120146989822388, + "step": 6873 + }, + { + "epoch": 1.8253883946355067, + "grad_norm": 1.1699948636498165, + "learning_rate": 4.157086907823604e-07, + "loss": 0.22541432082653046, + "step": 6874 + }, + { + "epoch": 1.8256539636170497, + "grad_norm": 1.3354293669412138, + "learning_rate": 4.1445679457936094e-07, + "loss": 0.25613510608673096, + "step": 6875 + }, + { + "epoch": 1.8259195325985926, + "grad_norm": 1.191861909098097, + "learning_rate": 4.1320674635604186e-07, + "loss": 0.21002547442913055, + "step": 6876 + }, + { + "epoch": 1.8261851015801356, + "grad_norm": 1.230870532242656, + "learning_rate": 4.119585463533959e-07, + "loss": 0.2593066692352295, + "step": 6877 + }, + { + "epoch": 1.8264506705616785, + "grad_norm": 1.4772106156087776, + "learning_rate": 4.1071219481206184e-07, + "loss": 0.23771531879901886, + "step": 6878 + }, + { + "epoch": 1.8267162395432215, + "grad_norm": 1.3106459571340912, + "learning_rate": 4.094676919723206e-07, + "loss": 0.2069541960954666, + "step": 6879 + }, + { + "epoch": 1.8269818085247644, + "grad_norm": 1.2065450512433227, + "learning_rate": 4.082250380740993e-07, + "loss": 0.21314311027526855, + "step": 6880 + }, + { + "epoch": 1.8272473775063074, + "grad_norm": 1.2723957233809677, + "learning_rate": 4.069842333569662e-07, + "loss": 0.198696106672287, + "step": 6881 + }, + { + "epoch": 1.8275129464878503, + "grad_norm": 1.2365636263350124, + "learning_rate": 4.057452780601334e-07, + "loss": 0.22771228849887848, + "step": 6882 + }, + { + "epoch": 1.8277785154693933, + "grad_norm": 1.3935711018120034, + "learning_rate": 4.045081724224564e-07, + "loss": 0.24176150560379028, + "step": 6883 + }, + { + "epoch": 1.8280440844509362, + "grad_norm": 1.1711714123320747, + "learning_rate": 4.0327291668243785e-07, + "loss": 0.18257084488868713, + "step": 6884 + }, + { + "epoch": 1.8283096534324792, + "grad_norm": 1.7740145369201021, + "learning_rate": 4.02039511078216e-07, + "loss": 0.2317531704902649, + "step": 6885 + }, + { + "epoch": 1.8285752224140222, + "grad_norm": 1.237685133468282, + "learning_rate": 4.008079558475797e-07, + "loss": 0.22523516416549683, + "step": 6886 + }, + { + "epoch": 1.828840791395565, + "grad_norm": 1.338469580607285, + "learning_rate": 3.995782512279578e-07, + "loss": 0.22351330518722534, + "step": 6887 + }, + { + "epoch": 1.829106360377108, + "grad_norm": 1.3272231861758204, + "learning_rate": 3.983503974564229e-07, + "loss": 0.22151902318000793, + "step": 6888 + }, + { + "epoch": 1.829371929358651, + "grad_norm": 1.2483501881623744, + "learning_rate": 3.971243947696901e-07, + "loss": 0.20800583064556122, + "step": 6889 + }, + { + "epoch": 1.829637498340194, + "grad_norm": 1.189419989304772, + "learning_rate": 3.959002434041181e-07, + "loss": 0.21332690119743347, + "step": 6890 + }, + { + "epoch": 1.829903067321737, + "grad_norm": 1.3040750377284556, + "learning_rate": 3.946779435957093e-07, + "loss": 0.2561502456665039, + "step": 6891 + }, + { + "epoch": 1.8301686363032799, + "grad_norm": 1.2150229659643972, + "learning_rate": 3.934574955801074e-07, + "loss": 0.23636910319328308, + "step": 6892 + }, + { + "epoch": 1.8304342052848228, + "grad_norm": 1.303931878967275, + "learning_rate": 3.922388995926041e-07, + "loss": 0.26683998107910156, + "step": 6893 + }, + { + "epoch": 1.8306997742663658, + "grad_norm": 1.319570373744726, + "learning_rate": 3.910221558681271e-07, + "loss": 0.2779492735862732, + "step": 6894 + }, + { + "epoch": 1.8309653432479087, + "grad_norm": 1.473106593059021, + "learning_rate": 3.8980726464125095e-07, + "loss": 0.20174488425254822, + "step": 6895 + }, + { + "epoch": 1.8312309122294517, + "grad_norm": 1.3128034885814306, + "learning_rate": 3.885942261461928e-07, + "loss": 0.21486055850982666, + "step": 6896 + }, + { + "epoch": 1.8314964812109946, + "grad_norm": 1.2201269476427121, + "learning_rate": 3.8738304061681107e-07, + "loss": 0.25637733936309814, + "step": 6897 + }, + { + "epoch": 1.8317620501925376, + "grad_norm": 1.3661274524986262, + "learning_rate": 3.8617370828661014e-07, + "loss": 0.2518364489078522, + "step": 6898 + }, + { + "epoch": 1.8320276191740805, + "grad_norm": 1.2902396654446358, + "learning_rate": 3.849662293887324e-07, + "loss": 0.25752246379852295, + "step": 6899 + }, + { + "epoch": 1.8322931881556235, + "grad_norm": 1.1514833439027936, + "learning_rate": 3.8376060415596826e-07, + "loss": 0.20891718566417694, + "step": 6900 + }, + { + "epoch": 1.8325587571371664, + "grad_norm": 1.378720679176223, + "learning_rate": 3.825568328207452e-07, + "loss": 0.20491960644721985, + "step": 6901 + }, + { + "epoch": 1.8328243261187094, + "grad_norm": 1.2540067790590503, + "learning_rate": 3.813549156151386e-07, + "loss": 0.22183339297771454, + "step": 6902 + }, + { + "epoch": 1.8330898951002523, + "grad_norm": 1.3321077338345055, + "learning_rate": 3.801548527708621e-07, + "loss": 0.2476987987756729, + "step": 6903 + }, + { + "epoch": 1.8333554640817953, + "grad_norm": 1.470629998110282, + "learning_rate": 3.7895664451927493e-07, + "loss": 0.26486238837242126, + "step": 6904 + }, + { + "epoch": 1.8336210330633382, + "grad_norm": 1.2524745099106778, + "learning_rate": 3.777602910913769e-07, + "loss": 0.25922873616218567, + "step": 6905 + }, + { + "epoch": 1.8338866020448812, + "grad_norm": 1.317563058388092, + "learning_rate": 3.7656579271781127e-07, + "loss": 0.22682476043701172, + "step": 6906 + }, + { + "epoch": 1.8341521710264241, + "grad_norm": 1.2391277284536568, + "learning_rate": 3.753731496288626e-07, + "loss": 0.20371592044830322, + "step": 6907 + }, + { + "epoch": 1.834417740007967, + "grad_norm": 1.2444383452097851, + "learning_rate": 3.7418236205445826e-07, + "loss": 0.23857446014881134, + "step": 6908 + }, + { + "epoch": 1.83468330898951, + "grad_norm": 2.6487436557467645, + "learning_rate": 3.729934302241689e-07, + "loss": 0.27119290828704834, + "step": 6909 + }, + { + "epoch": 1.834948877971053, + "grad_norm": 1.254159773595776, + "learning_rate": 3.7180635436720567e-07, + "loss": 0.2354927361011505, + "step": 6910 + }, + { + "epoch": 1.835214446952596, + "grad_norm": 1.301136184663389, + "learning_rate": 3.706211347124233e-07, + "loss": 0.26378512382507324, + "step": 6911 + }, + { + "epoch": 1.835480015934139, + "grad_norm": 1.3296098934003593, + "learning_rate": 3.6943777148831907e-07, + "loss": 0.20725026726722717, + "step": 6912 + }, + { + "epoch": 1.8357455849156818, + "grad_norm": 1.2212362377090786, + "learning_rate": 3.682562649230304e-07, + "loss": 0.2049856185913086, + "step": 6913 + }, + { + "epoch": 1.8360111538972248, + "grad_norm": 1.2555620791922353, + "learning_rate": 3.6707661524433833e-07, + "loss": 0.19303423166275024, + "step": 6914 + }, + { + "epoch": 1.8362767228787678, + "grad_norm": 1.2395332139010746, + "learning_rate": 3.6589882267966445e-07, + "loss": 0.21510104835033417, + "step": 6915 + }, + { + "epoch": 1.8365422918603107, + "grad_norm": 1.1669418633603965, + "learning_rate": 3.6472288745607376e-07, + "loss": 0.1933138072490692, + "step": 6916 + }, + { + "epoch": 1.8368078608418537, + "grad_norm": 1.112367559966563, + "learning_rate": 3.6354880980027373e-07, + "loss": 0.2015206664800644, + "step": 6917 + }, + { + "epoch": 1.8370734298233966, + "grad_norm": 1.2823070307410491, + "learning_rate": 3.6237658993861114e-07, + "loss": 0.20550866425037384, + "step": 6918 + }, + { + "epoch": 1.8373389988049396, + "grad_norm": 1.3067689335737758, + "learning_rate": 3.612062280970763e-07, + "loss": 0.221620112657547, + "step": 6919 + }, + { + "epoch": 1.8376045677864825, + "grad_norm": 1.3556317520839982, + "learning_rate": 3.6003772450130315e-07, + "loss": 0.23098941147327423, + "step": 6920 + }, + { + "epoch": 1.8378701367680255, + "grad_norm": 1.147765516964157, + "learning_rate": 3.588710793765626e-07, + "loss": 0.2119837999343872, + "step": 6921 + }, + { + "epoch": 1.8381357057495684, + "grad_norm": 1.3802709807389941, + "learning_rate": 3.5770629294777146e-07, + "loss": 0.24879229068756104, + "step": 6922 + }, + { + "epoch": 1.8384012747311114, + "grad_norm": 1.3060365647669372, + "learning_rate": 3.565433654394879e-07, + "loss": 0.18895789980888367, + "step": 6923 + }, + { + "epoch": 1.8386668437126543, + "grad_norm": 1.2553378569117732, + "learning_rate": 3.55382297075908e-07, + "loss": 0.23148275911808014, + "step": 6924 + }, + { + "epoch": 1.8389324126941973, + "grad_norm": 1.212120061404488, + "learning_rate": 3.542230880808739e-07, + "loss": 0.20919913053512573, + "step": 6925 + }, + { + "epoch": 1.8391979816757402, + "grad_norm": 1.4703495422250146, + "learning_rate": 3.53065738677868e-07, + "loss": 0.22832845151424408, + "step": 6926 + }, + { + "epoch": 1.8394635506572832, + "grad_norm": 1.2792392305491092, + "learning_rate": 3.519102490900117e-07, + "loss": 0.25866004824638367, + "step": 6927 + }, + { + "epoch": 1.8397291196388261, + "grad_norm": 1.4425441758777668, + "learning_rate": 3.507566195400691e-07, + "loss": 0.23372048139572144, + "step": 6928 + }, + { + "epoch": 1.839994688620369, + "grad_norm": 1.3100572186568338, + "learning_rate": 3.496048502504501e-07, + "loss": 0.2516997158527374, + "step": 6929 + }, + { + "epoch": 1.840260257601912, + "grad_norm": 1.3352189279547024, + "learning_rate": 3.4845494144320036e-07, + "loss": 0.21170508861541748, + "step": 6930 + }, + { + "epoch": 1.840525826583455, + "grad_norm": 1.3970465930645521, + "learning_rate": 3.473068933400081e-07, + "loss": 0.2642953395843506, + "step": 6931 + }, + { + "epoch": 1.840791395564998, + "grad_norm": 1.2429277065520816, + "learning_rate": 3.461607061622041e-07, + "loss": 0.2294994294643402, + "step": 6932 + }, + { + "epoch": 1.8410569645465409, + "grad_norm": 1.3898674163561502, + "learning_rate": 3.450163801307582e-07, + "loss": 0.2554621696472168, + "step": 6933 + }, + { + "epoch": 1.8413225335280838, + "grad_norm": 1.5251200097904765, + "learning_rate": 3.4387391546628733e-07, + "loss": 0.2291295826435089, + "step": 6934 + }, + { + "epoch": 1.8415881025096268, + "grad_norm": 1.2253918775229307, + "learning_rate": 3.4273331238903974e-07, + "loss": 0.1996842920780182, + "step": 6935 + }, + { + "epoch": 1.8418536714911697, + "grad_norm": 1.3974356568527164, + "learning_rate": 3.415945711189128e-07, + "loss": 0.248038187623024, + "step": 6936 + }, + { + "epoch": 1.8421192404727127, + "grad_norm": 1.4224083213114915, + "learning_rate": 3.4045769187544096e-07, + "loss": 0.232235848903656, + "step": 6937 + }, + { + "epoch": 1.8423848094542556, + "grad_norm": 1.2811247103872994, + "learning_rate": 3.3932267487780333e-07, + "loss": 0.2526085376739502, + "step": 6938 + }, + { + "epoch": 1.8426503784357986, + "grad_norm": 1.324059920588895, + "learning_rate": 3.381895203448182e-07, + "loss": 0.22401389479637146, + "step": 6939 + }, + { + "epoch": 1.8429159474173415, + "grad_norm": 1.2904044842651823, + "learning_rate": 3.3705822849494195e-07, + "loss": 0.2509264647960663, + "step": 6940 + }, + { + "epoch": 1.8431815163988845, + "grad_norm": 1.2502849304352568, + "learning_rate": 3.3592879954627564e-07, + "loss": 0.2451169192790985, + "step": 6941 + }, + { + "epoch": 1.8434470853804275, + "grad_norm": 1.2774613485778883, + "learning_rate": 3.3480123371655957e-07, + "loss": 0.2361738532781601, + "step": 6942 + }, + { + "epoch": 1.8437126543619704, + "grad_norm": 1.1823675774441849, + "learning_rate": 3.3367553122317544e-07, + "loss": 0.22336295247077942, + "step": 6943 + }, + { + "epoch": 1.8439782233435134, + "grad_norm": 1.4218109729535482, + "learning_rate": 3.325516922831451e-07, + "loss": 0.22287659347057343, + "step": 6944 + }, + { + "epoch": 1.8442437923250563, + "grad_norm": 1.2819242467045069, + "learning_rate": 3.3142971711312975e-07, + "loss": 0.21845945715904236, + "step": 6945 + }, + { + "epoch": 1.8445093613065993, + "grad_norm": 1.2822597279006254, + "learning_rate": 3.303096059294364e-07, + "loss": 0.2650350332260132, + "step": 6946 + }, + { + "epoch": 1.8447749302881422, + "grad_norm": 1.346661503925149, + "learning_rate": 3.291913589480078e-07, + "loss": 0.21282124519348145, + "step": 6947 + }, + { + "epoch": 1.8450404992696852, + "grad_norm": 1.1254422779054267, + "learning_rate": 3.280749763844293e-07, + "loss": 0.17899346351623535, + "step": 6948 + }, + { + "epoch": 1.8453060682512281, + "grad_norm": 1.3295675928838626, + "learning_rate": 3.269604584539254e-07, + "loss": 0.23462103307247162, + "step": 6949 + }, + { + "epoch": 1.845571637232771, + "grad_norm": 1.2573990354862534, + "learning_rate": 3.2584780537136206e-07, + "loss": 0.20188388228416443, + "step": 6950 + }, + { + "epoch": 1.845837206214314, + "grad_norm": 1.3823133322277716, + "learning_rate": 3.247370173512443e-07, + "loss": 0.2760109305381775, + "step": 6951 + }, + { + "epoch": 1.846102775195857, + "grad_norm": 1.1542508493730164, + "learning_rate": 3.236280946077219e-07, + "loss": 0.20977352559566498, + "step": 6952 + }, + { + "epoch": 1.8463683441774, + "grad_norm": 1.299549634983184, + "learning_rate": 3.225210373545806e-07, + "loss": 0.26468873023986816, + "step": 6953 + }, + { + "epoch": 1.8466339131589429, + "grad_norm": 1.287524526318513, + "learning_rate": 3.214158458052463e-07, + "loss": 0.2362184375524521, + "step": 6954 + }, + { + "epoch": 1.8468994821404858, + "grad_norm": 1.29131597308928, + "learning_rate": 3.2031252017278966e-07, + "loss": 0.21406327188014984, + "step": 6955 + }, + { + "epoch": 1.847165051122029, + "grad_norm": 1.4794600314925854, + "learning_rate": 3.1921106066991835e-07, + "loss": 0.2698758840560913, + "step": 6956 + }, + { + "epoch": 1.847430620103572, + "grad_norm": 1.3029413719135112, + "learning_rate": 3.1811146750898025e-07, + "loss": 0.22954389452934265, + "step": 6957 + }, + { + "epoch": 1.847696189085115, + "grad_norm": 1.149631756175727, + "learning_rate": 3.170137409019636e-07, + "loss": 0.23005755245685577, + "step": 6958 + }, + { + "epoch": 1.8479617580666579, + "grad_norm": 1.270561680049171, + "learning_rate": 3.159178810604968e-07, + "loss": 0.22408893704414368, + "step": 6959 + }, + { + "epoch": 1.8482273270482008, + "grad_norm": 1.1761716687553918, + "learning_rate": 3.14823888195851e-07, + "loss": 0.1983698308467865, + "step": 6960 + }, + { + "epoch": 1.8484928960297438, + "grad_norm": 1.387251984339494, + "learning_rate": 3.137317625189329e-07, + "loss": 0.24643054604530334, + "step": 6961 + }, + { + "epoch": 1.8487584650112867, + "grad_norm": 1.3612119090250128, + "learning_rate": 3.1264150424029083e-07, + "loss": 0.274917870759964, + "step": 6962 + }, + { + "epoch": 1.8490240339928297, + "grad_norm": 1.2836957141365997, + "learning_rate": 3.115531135701155e-07, + "loss": 0.2129468023777008, + "step": 6963 + }, + { + "epoch": 1.8492896029743726, + "grad_norm": 1.3421884287788837, + "learning_rate": 3.1046659071823695e-07, + "loss": 0.24127928912639618, + "step": 6964 + }, + { + "epoch": 1.8495551719559156, + "grad_norm": 1.2737231627436634, + "learning_rate": 3.093819358941208e-07, + "loss": 0.2528054416179657, + "step": 6965 + }, + { + "epoch": 1.8498207409374585, + "grad_norm": 1.253824703575336, + "learning_rate": 3.0829914930687767e-07, + "loss": 0.23623798787593842, + "step": 6966 + }, + { + "epoch": 1.8500863099190015, + "grad_norm": 1.231408637511902, + "learning_rate": 3.0721823116525497e-07, + "loss": 0.20241659879684448, + "step": 6967 + }, + { + "epoch": 1.8503518789005444, + "grad_norm": 1.264350645442844, + "learning_rate": 3.0613918167764156e-07, + "loss": 0.24365916848182678, + "step": 6968 + }, + { + "epoch": 1.8506174478820874, + "grad_norm": 1.311846273217192, + "learning_rate": 3.0506200105206554e-07, + "loss": 0.2550637722015381, + "step": 6969 + }, + { + "epoch": 1.8508830168636303, + "grad_norm": 1.1438212130974086, + "learning_rate": 3.0398668949619515e-07, + "loss": 0.21531938016414642, + "step": 6970 + }, + { + "epoch": 1.8511485858451733, + "grad_norm": 1.3468646282560623, + "learning_rate": 3.029132472173368e-07, + "loss": 0.22749900817871094, + "step": 6971 + }, + { + "epoch": 1.8514141548267162, + "grad_norm": 1.186404759445675, + "learning_rate": 3.018416744224373e-07, + "loss": 0.1826775223016739, + "step": 6972 + }, + { + "epoch": 1.8516797238082592, + "grad_norm": 1.1782373460713542, + "learning_rate": 3.0077197131808344e-07, + "loss": 0.21982814371585846, + "step": 6973 + }, + { + "epoch": 1.8519452927898021, + "grad_norm": 1.2874557997839566, + "learning_rate": 2.997041381105026e-07, + "loss": 0.23515473306179047, + "step": 6974 + }, + { + "epoch": 1.852210861771345, + "grad_norm": 1.2184369208885015, + "learning_rate": 2.9863817500556e-07, + "loss": 0.19620616734027863, + "step": 6975 + }, + { + "epoch": 1.852476430752888, + "grad_norm": 1.208715706835639, + "learning_rate": 2.975740822087603e-07, + "loss": 0.22158116102218628, + "step": 6976 + }, + { + "epoch": 1.852741999734431, + "grad_norm": 1.5176127203291871, + "learning_rate": 2.96511859925247e-07, + "loss": 0.23082244396209717, + "step": 6977 + }, + { + "epoch": 1.853007568715974, + "grad_norm": 1.286088700644728, + "learning_rate": 2.954515083598064e-07, + "loss": 0.22743141651153564, + "step": 6978 + }, + { + "epoch": 1.853273137697517, + "grad_norm": 1.3437900472909596, + "learning_rate": 2.943930277168594e-07, + "loss": 0.2329188883304596, + "step": 6979 + }, + { + "epoch": 1.8535387066790598, + "grad_norm": 1.1892741095151198, + "learning_rate": 2.9333641820047055e-07, + "loss": 0.20360302925109863, + "step": 6980 + }, + { + "epoch": 1.8538042756606028, + "grad_norm": 1.1771915113483071, + "learning_rate": 2.922816800143402e-07, + "loss": 0.1903664767742157, + "step": 6981 + }, + { + "epoch": 1.8540698446421457, + "grad_norm": 1.2252145672801615, + "learning_rate": 2.912288133618102e-07, + "loss": 0.2247854322195053, + "step": 6982 + }, + { + "epoch": 1.8543354136236887, + "grad_norm": 1.305215823982529, + "learning_rate": 2.9017781844586035e-07, + "loss": 0.22693192958831787, + "step": 6983 + }, + { + "epoch": 1.8546009826052319, + "grad_norm": 1.3213552294005186, + "learning_rate": 2.891286954691108e-07, + "loss": 0.23769894242286682, + "step": 6984 + }, + { + "epoch": 1.8548665515867748, + "grad_norm": 1.267542763443237, + "learning_rate": 2.880814446338198e-07, + "loss": 0.23251450061798096, + "step": 6985 + }, + { + "epoch": 1.8551321205683178, + "grad_norm": 1.3253334264213772, + "learning_rate": 2.870360661418847e-07, + "loss": 0.20828741788864136, + "step": 6986 + }, + { + "epoch": 1.8553976895498607, + "grad_norm": 1.2448815733296377, + "learning_rate": 2.859925601948421e-07, + "loss": 0.2324519008398056, + "step": 6987 + }, + { + "epoch": 1.8556632585314037, + "grad_norm": 1.2799176737952995, + "learning_rate": 2.8495092699386774e-07, + "loss": 0.2166297733783722, + "step": 6988 + }, + { + "epoch": 1.8559288275129466, + "grad_norm": 1.416567928880924, + "learning_rate": 2.839111667397765e-07, + "loss": 0.2760158181190491, + "step": 6989 + }, + { + "epoch": 1.8561943964944896, + "grad_norm": 1.1117414218952344, + "learning_rate": 2.8287327963302025e-07, + "loss": 0.2263752520084381, + "step": 6990 + }, + { + "epoch": 1.8564599654760325, + "grad_norm": 1.328135206527719, + "learning_rate": 2.8183726587369455e-07, + "loss": 0.2490656077861786, + "step": 6991 + }, + { + "epoch": 1.8567255344575755, + "grad_norm": 1.4860885268210424, + "learning_rate": 2.808031256615285e-07, + "loss": 0.22495508193969727, + "step": 6992 + }, + { + "epoch": 1.8569911034391184, + "grad_norm": 1.297235121122649, + "learning_rate": 2.7977085919589253e-07, + "loss": 0.2671046853065491, + "step": 6993 + }, + { + "epoch": 1.8572566724206614, + "grad_norm": 1.2050300397617886, + "learning_rate": 2.7874046667579535e-07, + "loss": 0.19782954454421997, + "step": 6994 + }, + { + "epoch": 1.8575222414022043, + "grad_norm": 1.3009259795352104, + "learning_rate": 2.777119482998847e-07, + "loss": 0.24458879232406616, + "step": 6995 + }, + { + "epoch": 1.8577878103837473, + "grad_norm": 1.203325902936209, + "learning_rate": 2.7668530426644637e-07, + "loss": 0.23476794362068176, + "step": 6996 + }, + { + "epoch": 1.8580533793652902, + "grad_norm": 1.3828799415147273, + "learning_rate": 2.7566053477340535e-07, + "loss": 0.2318287342786789, + "step": 6997 + }, + { + "epoch": 1.8583189483468332, + "grad_norm": 1.1075382213650395, + "learning_rate": 2.746376400183259e-07, + "loss": 0.21341973543167114, + "step": 6998 + }, + { + "epoch": 1.8585845173283762, + "grad_norm": 1.3634634009375282, + "learning_rate": 2.7361662019840916e-07, + "loss": 0.25269803404808044, + "step": 6999 + }, + { + "epoch": 1.858850086309919, + "grad_norm": 1.2242004376785176, + "learning_rate": 2.7259747551049653e-07, + "loss": 0.24590039253234863, + "step": 7000 + }, + { + "epoch": 1.859115655291462, + "grad_norm": 1.2116643717780577, + "learning_rate": 2.715802061510664e-07, + "loss": 0.19907096028327942, + "step": 7001 + }, + { + "epoch": 1.859381224273005, + "grad_norm": 1.319285786592131, + "learning_rate": 2.705648123162363e-07, + "loss": 0.24304917454719543, + "step": 7002 + }, + { + "epoch": 1.859646793254548, + "grad_norm": 1.3884525546157216, + "learning_rate": 2.6955129420176193e-07, + "loss": 0.24846915900707245, + "step": 7003 + }, + { + "epoch": 1.859912362236091, + "grad_norm": 1.365283429552511, + "learning_rate": 2.685396520030381e-07, + "loss": 0.21709200739860535, + "step": 7004 + }, + { + "epoch": 1.8601779312176339, + "grad_norm": 1.3687506828870908, + "learning_rate": 2.675298859150977e-07, + "loss": 0.28031325340270996, + "step": 7005 + }, + { + "epoch": 1.8604435001991768, + "grad_norm": 1.1527129171653896, + "learning_rate": 2.6652199613261155e-07, + "loss": 0.20367707312107086, + "step": 7006 + }, + { + "epoch": 1.8607090691807198, + "grad_norm": 1.1875101722790007, + "learning_rate": 2.6551598284988877e-07, + "loss": 0.20737403631210327, + "step": 7007 + }, + { + "epoch": 1.8609746381622627, + "grad_norm": 1.3375926225189751, + "learning_rate": 2.6451184626087646e-07, + "loss": 0.2504046559333801, + "step": 7008 + }, + { + "epoch": 1.8612402071438057, + "grad_norm": 1.3403751507501938, + "learning_rate": 2.635095865591608e-07, + "loss": 0.26347339153289795, + "step": 7009 + }, + { + "epoch": 1.8615057761253486, + "grad_norm": 1.1832867553985462, + "learning_rate": 2.625092039379662e-07, + "loss": 0.2347220480442047, + "step": 7010 + }, + { + "epoch": 1.8617713451068916, + "grad_norm": 1.2487098903864389, + "learning_rate": 2.6151069859015386e-07, + "loss": 0.23565630614757538, + "step": 7011 + }, + { + "epoch": 1.8620369140884345, + "grad_norm": 1.2377624004623402, + "learning_rate": 2.605140707082243e-07, + "loss": 0.21462437510490417, + "step": 7012 + }, + { + "epoch": 1.8623024830699775, + "grad_norm": 1.2992774401284823, + "learning_rate": 2.595193204843149e-07, + "loss": 0.24224728345870972, + "step": 7013 + }, + { + "epoch": 1.8625680520515204, + "grad_norm": 1.3531530893390702, + "learning_rate": 2.5852644811020344e-07, + "loss": 0.24200880527496338, + "step": 7014 + }, + { + "epoch": 1.8628336210330634, + "grad_norm": 1.2331149203562455, + "learning_rate": 2.5753545377730227e-07, + "loss": 0.23315191268920898, + "step": 7015 + }, + { + "epoch": 1.8630991900146063, + "grad_norm": 1.4360061023192454, + "learning_rate": 2.56546337676663e-07, + "loss": 0.31112274527549744, + "step": 7016 + }, + { + "epoch": 1.8633647589961493, + "grad_norm": 1.1775380155652753, + "learning_rate": 2.555590999989754e-07, + "loss": 0.2291945070028305, + "step": 7017 + }, + { + "epoch": 1.8636303279776922, + "grad_norm": 1.3248749602779475, + "learning_rate": 2.5457374093457057e-07, + "loss": 0.2324746549129486, + "step": 7018 + }, + { + "epoch": 1.8638958969592352, + "grad_norm": 1.3333311590100283, + "learning_rate": 2.5359026067341086e-07, + "loss": 0.2585206627845764, + "step": 7019 + }, + { + "epoch": 1.8641614659407781, + "grad_norm": 1.254813387894953, + "learning_rate": 2.5260865940510027e-07, + "loss": 0.22986871004104614, + "step": 7020 + }, + { + "epoch": 1.864427034922321, + "grad_norm": 1.3302473304174876, + "learning_rate": 2.5162893731888074e-07, + "loss": 0.22615428268909454, + "step": 7021 + }, + { + "epoch": 1.864692603903864, + "grad_norm": 1.2311139475810073, + "learning_rate": 2.5065109460363113e-07, + "loss": 0.21324753761291504, + "step": 7022 + }, + { + "epoch": 1.864958172885407, + "grad_norm": 1.2499721276179248, + "learning_rate": 2.4967513144786736e-07, + "loss": 0.2247733324766159, + "step": 7023 + }, + { + "epoch": 1.86522374186695, + "grad_norm": 1.198842298043478, + "learning_rate": 2.4870104803974336e-07, + "loss": 0.22080597281455994, + "step": 7024 + }, + { + "epoch": 1.865489310848493, + "grad_norm": 1.3721040923851937, + "learning_rate": 2.4772884456705224e-07, + "loss": 0.23669888079166412, + "step": 7025 + }, + { + "epoch": 1.8657548798300359, + "grad_norm": 1.2946969495879501, + "learning_rate": 2.4675852121722075e-07, + "loss": 0.2320847064256668, + "step": 7026 + }, + { + "epoch": 1.8660204488115788, + "grad_norm": 1.374404266409337, + "learning_rate": 2.4579007817731925e-07, + "loss": 0.2595662474632263, + "step": 7027 + }, + { + "epoch": 1.8662860177931218, + "grad_norm": 1.2351512812852723, + "learning_rate": 2.4482351563405174e-07, + "loss": 0.22152045369148254, + "step": 7028 + }, + { + "epoch": 1.8665515867746647, + "grad_norm": 1.270416082371449, + "learning_rate": 2.4385883377375683e-07, + "loss": 0.2391948401927948, + "step": 7029 + }, + { + "epoch": 1.8668171557562077, + "grad_norm": 1.3234796115140017, + "learning_rate": 2.428960327824159e-07, + "loss": 0.23117749392986298, + "step": 7030 + }, + { + "epoch": 1.8670827247377506, + "grad_norm": 1.313106749776766, + "learning_rate": 2.41935112845646e-07, + "loss": 0.24019500613212585, + "step": 7031 + }, + { + "epoch": 1.8673482937192936, + "grad_norm": 1.253088890729472, + "learning_rate": 2.4097607414869995e-07, + "loss": 0.19560202956199646, + "step": 7032 + }, + { + "epoch": 1.8676138627008365, + "grad_norm": 1.3625686769003584, + "learning_rate": 2.4001891687647103e-07, + "loss": 0.23110055923461914, + "step": 7033 + }, + { + "epoch": 1.8678794316823795, + "grad_norm": 1.3388200482229684, + "learning_rate": 2.39063641213485e-07, + "loss": 0.2214709371328354, + "step": 7034 + }, + { + "epoch": 1.8681450006639224, + "grad_norm": 1.2700799842548796, + "learning_rate": 2.381102473439101e-07, + "loss": 0.22123369574546814, + "step": 7035 + }, + { + "epoch": 1.8684105696454654, + "grad_norm": 1.4629863869289934, + "learning_rate": 2.371587354515481e-07, + "loss": 0.23984813690185547, + "step": 7036 + }, + { + "epoch": 1.8686761386270083, + "grad_norm": 1.4496870886295976, + "learning_rate": 2.3620910571984124e-07, + "loss": 0.26089030504226685, + "step": 7037 + }, + { + "epoch": 1.8689417076085513, + "grad_norm": 1.2076380290124689, + "learning_rate": 2.3526135833186527e-07, + "loss": 0.2344229370355606, + "step": 7038 + }, + { + "epoch": 1.8692072765900942, + "grad_norm": 1.290620691312973, + "learning_rate": 2.34315493470334e-07, + "loss": 0.24499498307704926, + "step": 7039 + }, + { + "epoch": 1.8694728455716372, + "grad_norm": 1.2975050166282813, + "learning_rate": 2.333715113176005e-07, + "loss": 0.21971477568149567, + "step": 7040 + }, + { + "epoch": 1.8697384145531801, + "grad_norm": 1.2659856510175163, + "learning_rate": 2.3242941205565362e-07, + "loss": 0.2594453990459442, + "step": 7041 + }, + { + "epoch": 1.870003983534723, + "grad_norm": 1.3125676617059407, + "learning_rate": 2.3148919586611806e-07, + "loss": 0.24689960479736328, + "step": 7042 + }, + { + "epoch": 1.870269552516266, + "grad_norm": 1.2165345453138858, + "learning_rate": 2.3055086293025665e-07, + "loss": 0.19972509145736694, + "step": 7043 + }, + { + "epoch": 1.870535121497809, + "grad_norm": 1.2460782677559714, + "learning_rate": 2.2961441342896795e-07, + "loss": 0.2139236032962799, + "step": 7044 + }, + { + "epoch": 1.870800690479352, + "grad_norm": 1.196552292185578, + "learning_rate": 2.286798475427898e-07, + "loss": 0.2251984179019928, + "step": 7045 + }, + { + "epoch": 1.8710662594608949, + "grad_norm": 1.2395291577625112, + "learning_rate": 2.277471654518959e-07, + "loss": 0.24517378211021423, + "step": 7046 + }, + { + "epoch": 1.8713318284424378, + "grad_norm": 1.3048847468612028, + "learning_rate": 2.2681636733609457e-07, + "loss": 0.19115275144577026, + "step": 7047 + }, + { + "epoch": 1.8715973974239808, + "grad_norm": 1.2997607659373802, + "learning_rate": 2.2588745337483454e-07, + "loss": 0.26092633605003357, + "step": 7048 + }, + { + "epoch": 1.8718629664055237, + "grad_norm": 1.2646212726473884, + "learning_rate": 2.2496042374719807e-07, + "loss": 0.18862302601337433, + "step": 7049 + }, + { + "epoch": 1.8721285353870667, + "grad_norm": 1.1602330038245767, + "learning_rate": 2.2403527863190554e-07, + "loss": 0.20728996396064758, + "step": 7050 + }, + { + "epoch": 1.8723941043686096, + "grad_norm": 1.236025812615254, + "learning_rate": 2.231120182073143e-07, + "loss": 0.24244122207164764, + "step": 7051 + }, + { + "epoch": 1.8726596733501526, + "grad_norm": 1.205655043915546, + "learning_rate": 2.2219064265141866e-07, + "loss": 0.18956953287124634, + "step": 7052 + }, + { + "epoch": 1.8729252423316956, + "grad_norm": 1.1159089015267554, + "learning_rate": 2.2127115214184868e-07, + "loss": 0.19873176515102386, + "step": 7053 + }, + { + "epoch": 1.8731908113132385, + "grad_norm": 1.2896839736015335, + "learning_rate": 2.203535468558704e-07, + "loss": 0.23717360198497772, + "step": 7054 + }, + { + "epoch": 1.8734563802947815, + "grad_norm": 1.3203924338573048, + "learning_rate": 2.1943782697038896e-07, + "loss": 0.24051904678344727, + "step": 7055 + }, + { + "epoch": 1.8737219492763244, + "grad_norm": 1.3193670550613668, + "learning_rate": 2.1852399266194312e-07, + "loss": 0.23541691899299622, + "step": 7056 + }, + { + "epoch": 1.8739875182578674, + "grad_norm": 1.3395958296451687, + "learning_rate": 2.1761204410671088e-07, + "loss": 0.22566163539886475, + "step": 7057 + }, + { + "epoch": 1.8742530872394103, + "grad_norm": 1.297432294479727, + "learning_rate": 2.167019814805027e-07, + "loss": 0.25771743059158325, + "step": 7058 + }, + { + "epoch": 1.8745186562209533, + "grad_norm": 1.1482951648622821, + "learning_rate": 2.1579380495876934e-07, + "loss": 0.22624637186527252, + "step": 7059 + }, + { + "epoch": 1.8747842252024962, + "grad_norm": 1.3036126318267591, + "learning_rate": 2.148875147165963e-07, + "loss": 0.24671627581119537, + "step": 7060 + }, + { + "epoch": 1.8750497941840392, + "grad_norm": 1.1983704285109544, + "learning_rate": 2.1398311092870605e-07, + "loss": 0.21607278287410736, + "step": 7061 + }, + { + "epoch": 1.8753153631655821, + "grad_norm": 1.1102939736369823, + "learning_rate": 2.1308059376945689e-07, + "loss": 0.1960655301809311, + "step": 7062 + }, + { + "epoch": 1.875580932147125, + "grad_norm": 1.2816228458436618, + "learning_rate": 2.1217996341284297e-07, + "loss": 0.22005721926689148, + "step": 7063 + }, + { + "epoch": 1.875846501128668, + "grad_norm": 1.2746284533707484, + "learning_rate": 2.1128122003249541e-07, + "loss": 0.21442776918411255, + "step": 7064 + }, + { + "epoch": 1.876112070110211, + "grad_norm": 1.1849768238897622, + "learning_rate": 2.1038436380168114e-07, + "loss": 0.23126785457134247, + "step": 7065 + }, + { + "epoch": 1.876377639091754, + "grad_norm": 1.4246070766583077, + "learning_rate": 2.094893948933041e-07, + "loss": 0.24286629259586334, + "step": 7066 + }, + { + "epoch": 1.8766432080732969, + "grad_norm": 1.3706445020134141, + "learning_rate": 2.0859631347990406e-07, + "loss": 0.25771957635879517, + "step": 7067 + }, + { + "epoch": 1.87690877705484, + "grad_norm": 1.1754559873110961, + "learning_rate": 2.0770511973365436e-07, + "loss": 0.19837790727615356, + "step": 7068 + }, + { + "epoch": 1.877174346036383, + "grad_norm": 1.2372359407501599, + "learning_rate": 2.0681581382636984e-07, + "loss": 0.21209359169006348, + "step": 7069 + }, + { + "epoch": 1.877439915017926, + "grad_norm": 1.9178204608286211, + "learning_rate": 2.0592839592949554e-07, + "loss": 0.26641422510147095, + "step": 7070 + }, + { + "epoch": 1.877705483999469, + "grad_norm": 1.3604176831947503, + "learning_rate": 2.050428662141146e-07, + "loss": 0.21609601378440857, + "step": 7071 + }, + { + "epoch": 1.8779710529810119, + "grad_norm": 1.2861845280896875, + "learning_rate": 2.0415922485095051e-07, + "loss": 0.23642000555992126, + "step": 7072 + }, + { + "epoch": 1.8782366219625548, + "grad_norm": 1.3854568667341272, + "learning_rate": 2.0327747201035587e-07, + "loss": 0.24564675986766815, + "step": 7073 + }, + { + "epoch": 1.8785021909440978, + "grad_norm": 1.229212126818568, + "learning_rate": 2.0239760786232355e-07, + "loss": 0.20001479983329773, + "step": 7074 + }, + { + "epoch": 1.8787677599256407, + "grad_norm": 1.2817747323253132, + "learning_rate": 2.015196325764801e-07, + "loss": 0.2590208649635315, + "step": 7075 + }, + { + "epoch": 1.8790333289071837, + "grad_norm": 1.2462050168824985, + "learning_rate": 2.0064354632208904e-07, + "loss": 0.23298504948616028, + "step": 7076 + }, + { + "epoch": 1.8792988978887266, + "grad_norm": 1.2573573484068483, + "learning_rate": 1.997693492680497e-07, + "loss": 0.22409996390342712, + "step": 7077 + }, + { + "epoch": 1.8795644668702696, + "grad_norm": 1.410723892029772, + "learning_rate": 1.9889704158289724e-07, + "loss": 0.27316784858703613, + "step": 7078 + }, + { + "epoch": 1.8798300358518125, + "grad_norm": 1.2924796650338854, + "learning_rate": 1.980266234348016e-07, + "loss": 0.2271946519613266, + "step": 7079 + }, + { + "epoch": 1.8800956048333555, + "grad_norm": 1.2438429761767338, + "learning_rate": 1.9715809499156858e-07, + "loss": 0.20887964963912964, + "step": 7080 + }, + { + "epoch": 1.8803611738148984, + "grad_norm": 1.2112268618082698, + "learning_rate": 1.9629145642064197e-07, + "loss": 0.23468685150146484, + "step": 7081 + }, + { + "epoch": 1.8806267427964414, + "grad_norm": 1.308865144497765, + "learning_rate": 1.9542670788909813e-07, + "loss": 0.21624556183815002, + "step": 7082 + }, + { + "epoch": 1.8808923117779843, + "grad_norm": 1.1751415989571612, + "learning_rate": 1.9456384956365149e-07, + "loss": 0.22328166663646698, + "step": 7083 + }, + { + "epoch": 1.8811578807595273, + "grad_norm": 1.3508603820961609, + "learning_rate": 1.93702881610649e-07, + "loss": 0.2526431381702423, + "step": 7084 + }, + { + "epoch": 1.8814234497410702, + "grad_norm": 1.3562256445660688, + "learning_rate": 1.9284380419607784e-07, + "loss": 0.23668771982192993, + "step": 7085 + }, + { + "epoch": 1.8816890187226132, + "grad_norm": 1.2668189225170288, + "learning_rate": 1.9198661748555557e-07, + "loss": 0.24710845947265625, + "step": 7086 + }, + { + "epoch": 1.8819545877041561, + "grad_norm": 1.4047256701053605, + "learning_rate": 1.911313216443389e-07, + "loss": 0.22696900367736816, + "step": 7087 + }, + { + "epoch": 1.882220156685699, + "grad_norm": 1.3717447863189725, + "learning_rate": 1.9027791683731922e-07, + "loss": 0.21652163565158844, + "step": 7088 + }, + { + "epoch": 1.882485725667242, + "grad_norm": 1.3189608691767827, + "learning_rate": 1.894264032290205e-07, + "loss": 0.2166716307401657, + "step": 7089 + }, + { + "epoch": 1.882751294648785, + "grad_norm": 1.3746931913110367, + "learning_rate": 1.8857678098360698e-07, + "loss": 0.26200050115585327, + "step": 7090 + }, + { + "epoch": 1.883016863630328, + "grad_norm": 1.2945644704190118, + "learning_rate": 1.8772905026487654e-07, + "loss": 0.2292764037847519, + "step": 7091 + }, + { + "epoch": 1.883282432611871, + "grad_norm": 1.3106590918741248, + "learning_rate": 1.8688321123625842e-07, + "loss": 0.23893016576766968, + "step": 7092 + }, + { + "epoch": 1.8835480015934138, + "grad_norm": 1.2241030970764724, + "learning_rate": 1.860392640608244e-07, + "loss": 0.2509230673313141, + "step": 7093 + }, + { + "epoch": 1.8838135705749568, + "grad_norm": 1.2218686374923997, + "learning_rate": 1.8519720890127434e-07, + "loss": 0.24156486988067627, + "step": 7094 + }, + { + "epoch": 1.8840791395564997, + "grad_norm": 1.2859122561460798, + "learning_rate": 1.843570459199462e-07, + "loss": 0.2120019942522049, + "step": 7095 + }, + { + "epoch": 1.884344708538043, + "grad_norm": 1.6579646138710773, + "learning_rate": 1.835187752788159e-07, + "loss": 0.23400259017944336, + "step": 7096 + }, + { + "epoch": 1.8846102775195859, + "grad_norm": 1.281132346942695, + "learning_rate": 1.8268239713949087e-07, + "loss": 0.20913103222846985, + "step": 7097 + }, + { + "epoch": 1.8848758465011288, + "grad_norm": 1.3381319381686223, + "learning_rate": 1.8184791166321546e-07, + "loss": 0.24468877911567688, + "step": 7098 + }, + { + "epoch": 1.8851414154826718, + "grad_norm": 1.236616212709848, + "learning_rate": 1.8101531901086767e-07, + "loss": 0.2038918137550354, + "step": 7099 + }, + { + "epoch": 1.8854069844642147, + "grad_norm": 1.3201086548941574, + "learning_rate": 1.8018461934296239e-07, + "loss": 0.24191413819789886, + "step": 7100 + }, + { + "epoch": 1.8856725534457577, + "grad_norm": 1.277539269643606, + "learning_rate": 1.793558128196493e-07, + "loss": 0.24394474923610687, + "step": 7101 + }, + { + "epoch": 1.8859381224273006, + "grad_norm": 1.1561225023553612, + "learning_rate": 1.7852889960071063e-07, + "loss": 0.22630709409713745, + "step": 7102 + }, + { + "epoch": 1.8862036914088436, + "grad_norm": 1.5472360212555962, + "learning_rate": 1.7770387984556768e-07, + "loss": 0.23936980962753296, + "step": 7103 + }, + { + "epoch": 1.8864692603903865, + "grad_norm": 1.275471897769737, + "learning_rate": 1.768807537132733e-07, + "loss": 0.24808618426322937, + "step": 7104 + }, + { + "epoch": 1.8867348293719295, + "grad_norm": 1.273035999339445, + "learning_rate": 1.7605952136251603e-07, + "loss": 0.23934635519981384, + "step": 7105 + }, + { + "epoch": 1.8870003983534724, + "grad_norm": 1.189686791776393, + "learning_rate": 1.7524018295162148e-07, + "loss": 0.22107656300067902, + "step": 7106 + }, + { + "epoch": 1.8872659673350154, + "grad_norm": 1.3496800848037154, + "learning_rate": 1.7442273863854553e-07, + "loss": 0.23253028094768524, + "step": 7107 + }, + { + "epoch": 1.8875315363165583, + "grad_norm": 1.3028365552765204, + "learning_rate": 1.7360718858088542e-07, + "loss": 0.2501102387905121, + "step": 7108 + }, + { + "epoch": 1.8877971052981013, + "grad_norm": 1.4057988238229884, + "learning_rate": 1.7279353293586765e-07, + "loss": 0.25537967681884766, + "step": 7109 + }, + { + "epoch": 1.8880626742796442, + "grad_norm": 2.7876746143917033, + "learning_rate": 1.7198177186035447e-07, + "loss": 0.25701045989990234, + "step": 7110 + }, + { + "epoch": 1.8883282432611872, + "grad_norm": 1.1447271563365653, + "learning_rate": 1.7117190551084628e-07, + "loss": 0.2109440565109253, + "step": 7111 + }, + { + "epoch": 1.8885938122427302, + "grad_norm": 1.2454061070152636, + "learning_rate": 1.7036393404347373e-07, + "loss": 0.22767721116542816, + "step": 7112 + }, + { + "epoch": 1.888859381224273, + "grad_norm": 1.1572937395529788, + "learning_rate": 1.6955785761400444e-07, + "loss": 0.1976814568042755, + "step": 7113 + }, + { + "epoch": 1.889124950205816, + "grad_norm": 1.1727224852039306, + "learning_rate": 1.687536763778419e-07, + "loss": 0.21109873056411743, + "step": 7114 + }, + { + "epoch": 1.889390519187359, + "grad_norm": 1.1916227822459606, + "learning_rate": 1.6795139049002095e-07, + "loss": 0.2165786623954773, + "step": 7115 + }, + { + "epoch": 1.889656088168902, + "grad_norm": 1.2917556149315792, + "learning_rate": 1.6715100010521347e-07, + "loss": 0.23962441086769104, + "step": 7116 + }, + { + "epoch": 1.889921657150445, + "grad_norm": 1.2423009900583697, + "learning_rate": 1.6635250537772596e-07, + "loss": 0.23351140320301056, + "step": 7117 + }, + { + "epoch": 1.8901872261319879, + "grad_norm": 1.3034348272306633, + "learning_rate": 1.6555590646149866e-07, + "loss": 0.19999945163726807, + "step": 7118 + }, + { + "epoch": 1.8904527951135308, + "grad_norm": 1.432201467842623, + "learning_rate": 1.647612035101054e-07, + "loss": 0.27142196893692017, + "step": 7119 + }, + { + "epoch": 1.8907183640950738, + "grad_norm": 1.2861780172834696, + "learning_rate": 1.6396839667675691e-07, + "loss": 0.21525685489177704, + "step": 7120 + }, + { + "epoch": 1.8909839330766167, + "grad_norm": 3.2062699859400396, + "learning_rate": 1.631774861142965e-07, + "loss": 0.24305005371570587, + "step": 7121 + }, + { + "epoch": 1.8912495020581597, + "grad_norm": 1.2019998279555377, + "learning_rate": 1.6238847197520113e-07, + "loss": 0.23202842473983765, + "step": 7122 + }, + { + "epoch": 1.8915150710397026, + "grad_norm": 1.4409003412080332, + "learning_rate": 1.6160135441158576e-07, + "loss": 0.24373790621757507, + "step": 7123 + }, + { + "epoch": 1.8917806400212456, + "grad_norm": 1.2360359431057044, + "learning_rate": 1.6081613357519565e-07, + "loss": 0.22774222493171692, + "step": 7124 + }, + { + "epoch": 1.8920462090027885, + "grad_norm": 1.2064368847282083, + "learning_rate": 1.6003280961741196e-07, + "loss": 0.20660057663917542, + "step": 7125 + }, + { + "epoch": 1.8923117779843315, + "grad_norm": 1.3070998228758686, + "learning_rate": 1.5925138268925166e-07, + "loss": 0.23578912019729614, + "step": 7126 + }, + { + "epoch": 1.8925773469658744, + "grad_norm": 1.2737250152668298, + "learning_rate": 1.5847185294136313e-07, + "loss": 0.20852091908454895, + "step": 7127 + }, + { + "epoch": 1.8928429159474174, + "grad_norm": 1.1465883719364975, + "learning_rate": 1.5769422052403172e-07, + "loss": 0.17455898225307465, + "step": 7128 + }, + { + "epoch": 1.8931084849289603, + "grad_norm": 1.5036497092390075, + "learning_rate": 1.5691848558717638e-07, + "loss": 0.29552748799324036, + "step": 7129 + }, + { + "epoch": 1.8933740539105033, + "grad_norm": 1.3009458238394367, + "learning_rate": 1.5614464828034746e-07, + "loss": 0.22972649335861206, + "step": 7130 + }, + { + "epoch": 1.8936396228920462, + "grad_norm": 1.2296689152648304, + "learning_rate": 1.5537270875273348e-07, + "loss": 0.2134108692407608, + "step": 7131 + }, + { + "epoch": 1.8939051918735892, + "grad_norm": 1.4119584533896288, + "learning_rate": 1.546026671531542e-07, + "loss": 0.24145451188087463, + "step": 7132 + }, + { + "epoch": 1.8941707608551321, + "grad_norm": 1.355860353407812, + "learning_rate": 1.5383452363006534e-07, + "loss": 0.2323920726776123, + "step": 7133 + }, + { + "epoch": 1.894436329836675, + "grad_norm": 1.197617700552455, + "learning_rate": 1.5306827833155403e-07, + "loss": 0.20091015100479126, + "step": 7134 + }, + { + "epoch": 1.894701898818218, + "grad_norm": 1.370489911603159, + "learning_rate": 1.523039314053465e-07, + "loss": 0.2451317310333252, + "step": 7135 + }, + { + "epoch": 1.894967467799761, + "grad_norm": 1.2946538259097045, + "learning_rate": 1.5154148299879822e-07, + "loss": 0.22744594514369965, + "step": 7136 + }, + { + "epoch": 1.895233036781304, + "grad_norm": 1.2046527835430252, + "learning_rate": 1.5078093325889943e-07, + "loss": 0.2460673749446869, + "step": 7137 + }, + { + "epoch": 1.895498605762847, + "grad_norm": 1.4172423595206858, + "learning_rate": 1.5002228233227722e-07, + "loss": 0.2524537444114685, + "step": 7138 + }, + { + "epoch": 1.8957641747443899, + "grad_norm": 1.1840127480017744, + "learning_rate": 1.4926553036518798e-07, + "loss": 0.2056279480457306, + "step": 7139 + }, + { + "epoch": 1.8960297437259328, + "grad_norm": 1.2144930845419581, + "learning_rate": 1.485106775035261e-07, + "loss": 0.2656184732913971, + "step": 7140 + }, + { + "epoch": 1.8962953127074758, + "grad_norm": 1.1903286988332102, + "learning_rate": 1.477577238928185e-07, + "loss": 0.2190116047859192, + "step": 7141 + }, + { + "epoch": 1.8965608816890187, + "grad_norm": 1.206151177902952, + "learning_rate": 1.4700666967822574e-07, + "loss": 0.22984017431735992, + "step": 7142 + }, + { + "epoch": 1.8968264506705617, + "grad_norm": 1.1949819121682481, + "learning_rate": 1.462575150045409e-07, + "loss": 0.17947378754615784, + "step": 7143 + }, + { + "epoch": 1.8970920196521046, + "grad_norm": 1.2649423314993642, + "learning_rate": 1.4551026001619395e-07, + "loss": 0.24965715408325195, + "step": 7144 + }, + { + "epoch": 1.8973575886336476, + "grad_norm": 1.236302993447548, + "learning_rate": 1.4476490485724526e-07, + "loss": 0.2337307333946228, + "step": 7145 + }, + { + "epoch": 1.8976231576151905, + "grad_norm": 1.2205039464348546, + "learning_rate": 1.4402144967139098e-07, + "loss": 0.22668538987636566, + "step": 7146 + }, + { + "epoch": 1.8978887265967335, + "grad_norm": 1.350785859399433, + "learning_rate": 1.4327989460196091e-07, + "loss": 0.21934781968593597, + "step": 7147 + }, + { + "epoch": 1.8981542955782764, + "grad_norm": 1.2212959594670445, + "learning_rate": 1.4254023979191844e-07, + "loss": 0.1957930624485016, + "step": 7148 + }, + { + "epoch": 1.8984198645598194, + "grad_norm": 1.1724780894008597, + "learning_rate": 1.4180248538385956e-07, + "loss": 0.22351369261741638, + "step": 7149 + }, + { + "epoch": 1.8986854335413623, + "grad_norm": 1.3930947329130605, + "learning_rate": 1.4106663152001487e-07, + "loss": 0.2603265047073364, + "step": 7150 + }, + { + "epoch": 1.8989510025229053, + "grad_norm": 1.260479860356455, + "learning_rate": 1.4033267834224873e-07, + "loss": 0.2566663324832916, + "step": 7151 + }, + { + "epoch": 1.8992165715044482, + "grad_norm": 1.2799319314175146, + "learning_rate": 1.3960062599205682e-07, + "loss": 0.23130206763744354, + "step": 7152 + }, + { + "epoch": 1.8994821404859912, + "grad_norm": 1.1757231252562024, + "learning_rate": 1.3887047461057179e-07, + "loss": 0.17946425080299377, + "step": 7153 + }, + { + "epoch": 1.8997477094675341, + "grad_norm": 1.2434099546308155, + "learning_rate": 1.3814222433855884e-07, + "loss": 0.23946328461170197, + "step": 7154 + }, + { + "epoch": 1.900013278449077, + "grad_norm": 1.2249367291717066, + "learning_rate": 1.3741587531641566e-07, + "loss": 0.21002715826034546, + "step": 7155 + }, + { + "epoch": 1.90027884743062, + "grad_norm": 1.3062374823275615, + "learning_rate": 1.3669142768417242e-07, + "loss": 0.2121986746788025, + "step": 7156 + }, + { + "epoch": 1.900544416412163, + "grad_norm": 1.373871289837254, + "learning_rate": 1.3596888158149525e-07, + "loss": 0.26400670409202576, + "step": 7157 + }, + { + "epoch": 1.900809985393706, + "grad_norm": 1.1813353744292436, + "learning_rate": 1.3524823714768375e-07, + "loss": 0.18764406442642212, + "step": 7158 + }, + { + "epoch": 1.9010755543752489, + "grad_norm": 1.415975931925435, + "learning_rate": 1.3452949452166686e-07, + "loss": 0.2550342381000519, + "step": 7159 + }, + { + "epoch": 1.9013411233567918, + "grad_norm": 1.304366194966887, + "learning_rate": 1.3381265384201035e-07, + "loss": 0.23188576102256775, + "step": 7160 + }, + { + "epoch": 1.9016066923383348, + "grad_norm": 1.2473914592639561, + "learning_rate": 1.3309771524691372e-07, + "loss": 0.23124513030052185, + "step": 7161 + }, + { + "epoch": 1.9018722613198777, + "grad_norm": 1.2056745011797427, + "learning_rate": 1.323846788742078e-07, + "loss": 0.19941067695617676, + "step": 7162 + }, + { + "epoch": 1.9021378303014207, + "grad_norm": 1.4624998875104938, + "learning_rate": 1.316735448613593e-07, + "loss": 0.22510412335395813, + "step": 7163 + }, + { + "epoch": 1.9024033992829636, + "grad_norm": 1.2448961229015743, + "learning_rate": 1.309643133454641e-07, + "loss": 0.19102326035499573, + "step": 7164 + }, + { + "epoch": 1.9026689682645066, + "grad_norm": 1.2307397875458914, + "learning_rate": 1.3025698446325618e-07, + "loss": 0.20826731622219086, + "step": 7165 + }, + { + "epoch": 1.9029345372460496, + "grad_norm": 1.3483240422328144, + "learning_rate": 1.2955155835109757e-07, + "loss": 0.23238909244537354, + "step": 7166 + }, + { + "epoch": 1.9032001062275925, + "grad_norm": 1.4338552298496805, + "learning_rate": 1.2884803514498833e-07, + "loss": 0.2635011374950409, + "step": 7167 + }, + { + "epoch": 1.9034656752091355, + "grad_norm": 1.1745725675637841, + "learning_rate": 1.281464149805578e-07, + "loss": 0.2073322981595993, + "step": 7168 + }, + { + "epoch": 1.9037312441906784, + "grad_norm": 1.2344038568124596, + "learning_rate": 1.274466979930711e-07, + "loss": 0.22091326117515564, + "step": 7169 + }, + { + "epoch": 1.9039968131722214, + "grad_norm": 1.114689842836081, + "learning_rate": 1.2674888431742472e-07, + "loss": 0.18613001704216003, + "step": 7170 + }, + { + "epoch": 1.9042623821537643, + "grad_norm": 1.2788383965135535, + "learning_rate": 1.2605297408814887e-07, + "loss": 0.2165849655866623, + "step": 7171 + }, + { + "epoch": 1.9045279511353073, + "grad_norm": 1.294203512401496, + "learning_rate": 1.2535896743940844e-07, + "loss": 0.21317794919013977, + "step": 7172 + }, + { + "epoch": 1.9047935201168502, + "grad_norm": 1.47127212987638, + "learning_rate": 1.2466686450499866e-07, + "loss": 0.25221073627471924, + "step": 7173 + }, + { + "epoch": 1.9050590890983932, + "grad_norm": 1.2647474973058104, + "learning_rate": 1.239766654183472e-07, + "loss": 0.21598559617996216, + "step": 7174 + }, + { + "epoch": 1.9053246580799361, + "grad_norm": 1.2635227030316536, + "learning_rate": 1.232883703125187e-07, + "loss": 0.2284495085477829, + "step": 7175 + }, + { + "epoch": 1.905590227061479, + "grad_norm": 1.1825527167306378, + "learning_rate": 1.2260197932020713e-07, + "loss": 0.21899332106113434, + "step": 7176 + }, + { + "epoch": 1.905855796043022, + "grad_norm": 1.3588902485974734, + "learning_rate": 1.2191749257374097e-07, + "loss": 0.2633277177810669, + "step": 7177 + }, + { + "epoch": 1.906121365024565, + "grad_norm": 1.2643904365611611, + "learning_rate": 1.2123491020508137e-07, + "loss": 0.2330140471458435, + "step": 7178 + }, + { + "epoch": 1.906386934006108, + "grad_norm": 1.2757939155257039, + "learning_rate": 1.2055423234582087e-07, + "loss": 0.21859750151634216, + "step": 7179 + }, + { + "epoch": 1.9066525029876509, + "grad_norm": 1.3985563606047093, + "learning_rate": 1.198754591271878e-07, + "loss": 0.252164363861084, + "step": 7180 + }, + { + "epoch": 1.906918071969194, + "grad_norm": 1.4365501399575176, + "learning_rate": 1.191985906800408e-07, + "loss": 0.24968160688877106, + "step": 7181 + }, + { + "epoch": 1.907183640950737, + "grad_norm": 1.199067091736319, + "learning_rate": 1.185236271348722e-07, + "loss": 0.2083423137664795, + "step": 7182 + }, + { + "epoch": 1.90744920993228, + "grad_norm": 1.258208503364781, + "learning_rate": 1.1785056862180789e-07, + "loss": 0.2468394935131073, + "step": 7183 + }, + { + "epoch": 1.907714778913823, + "grad_norm": 1.2908738922715033, + "learning_rate": 1.1717941527060405e-07, + "loss": 0.22417521476745605, + "step": 7184 + }, + { + "epoch": 1.9079803478953659, + "grad_norm": 1.2789853859840312, + "learning_rate": 1.1651016721065167e-07, + "loss": 0.2411842793226242, + "step": 7185 + }, + { + "epoch": 1.9082459168769088, + "grad_norm": 1.311967953603668, + "learning_rate": 1.1584282457097417e-07, + "loss": 0.24650761485099792, + "step": 7186 + }, + { + "epoch": 1.9085114858584518, + "grad_norm": 1.3305923315328496, + "learning_rate": 1.1517738748022755e-07, + "loss": 0.22433717548847198, + "step": 7187 + }, + { + "epoch": 1.9087770548399947, + "grad_norm": 1.2666444248015347, + "learning_rate": 1.145138560667003e-07, + "loss": 0.20867910981178284, + "step": 7188 + }, + { + "epoch": 1.9090426238215377, + "grad_norm": 1.2511449541105855, + "learning_rate": 1.138522304583134e-07, + "loss": 0.21889618039131165, + "step": 7189 + }, + { + "epoch": 1.9093081928030806, + "grad_norm": 1.113107479716362, + "learning_rate": 1.1319251078261928e-07, + "loss": 0.19350749254226685, + "step": 7190 + }, + { + "epoch": 1.9095737617846236, + "grad_norm": 1.183265546980091, + "learning_rate": 1.125346971668051e-07, + "loss": 0.19123657047748566, + "step": 7191 + }, + { + "epoch": 1.9098393307661665, + "grad_norm": 1.2653223306994201, + "learning_rate": 1.118787897376905e-07, + "loss": 0.21433782577514648, + "step": 7192 + }, + { + "epoch": 1.9101048997477095, + "grad_norm": 1.474925382041675, + "learning_rate": 1.1122478862172437e-07, + "loss": 0.2521187663078308, + "step": 7193 + }, + { + "epoch": 1.9103704687292524, + "grad_norm": 1.2835872924926361, + "learning_rate": 1.1057269394499248e-07, + "loss": 0.2141486555337906, + "step": 7194 + }, + { + "epoch": 1.9106360377107954, + "grad_norm": 1.271472683987379, + "learning_rate": 1.0992250583320985e-07, + "loss": 0.22960343956947327, + "step": 7195 + }, + { + "epoch": 1.9109016066923383, + "grad_norm": 1.3433609684783299, + "learning_rate": 1.092742244117262e-07, + "loss": 0.21809744834899902, + "step": 7196 + }, + { + "epoch": 1.9111671756738813, + "grad_norm": 1.248347973820862, + "learning_rate": 1.0862784980552044e-07, + "loss": 0.22418212890625, + "step": 7197 + }, + { + "epoch": 1.9114327446554242, + "grad_norm": 1.2504701200893746, + "learning_rate": 1.0798338213920845e-07, + "loss": 0.22050701081752777, + "step": 7198 + }, + { + "epoch": 1.9116983136369672, + "grad_norm": 1.206849931438756, + "learning_rate": 1.0734082153703418e-07, + "loss": 0.23200345039367676, + "step": 7199 + }, + { + "epoch": 1.9119638826185101, + "grad_norm": 1.1102825382626649, + "learning_rate": 1.0670016812287631e-07, + "loss": 0.18366631865501404, + "step": 7200 + }, + { + "epoch": 1.912229451600053, + "grad_norm": 1.2844567521026582, + "learning_rate": 1.0606142202024605e-07, + "loss": 0.24362193048000336, + "step": 7201 + }, + { + "epoch": 1.912495020581596, + "grad_norm": 1.2822631921528913, + "learning_rate": 1.0542458335228601e-07, + "loss": 0.2216200977563858, + "step": 7202 + }, + { + "epoch": 1.912760589563139, + "grad_norm": 1.0921875359661608, + "learning_rate": 1.0478965224176907e-07, + "loss": 0.20216065645217896, + "step": 7203 + }, + { + "epoch": 1.913026158544682, + "grad_norm": 1.254966671592246, + "learning_rate": 1.041566288111051e-07, + "loss": 0.22054359316825867, + "step": 7204 + }, + { + "epoch": 1.913291727526225, + "grad_norm": 1.3532366246655447, + "learning_rate": 1.0352551318233206e-07, + "loss": 0.21569015085697174, + "step": 7205 + }, + { + "epoch": 1.9135572965077678, + "grad_norm": 1.2826756039782425, + "learning_rate": 1.028963054771226e-07, + "loss": 0.22967267036437988, + "step": 7206 + }, + { + "epoch": 1.9138228654893108, + "grad_norm": 1.3494789006319945, + "learning_rate": 1.0226900581677968e-07, + "loss": 0.2422460913658142, + "step": 7207 + }, + { + "epoch": 1.9140884344708538, + "grad_norm": 1.3606228589652338, + "learning_rate": 1.0164361432223879e-07, + "loss": 0.25891292095184326, + "step": 7208 + }, + { + "epoch": 1.914354003452397, + "grad_norm": 1.3570561855059022, + "learning_rate": 1.0102013111406905e-07, + "loss": 0.26915764808654785, + "step": 7209 + }, + { + "epoch": 1.9146195724339399, + "grad_norm": 1.3889996377213247, + "learning_rate": 1.0039855631247097e-07, + "loss": 0.2268485426902771, + "step": 7210 + }, + { + "epoch": 1.9148851414154828, + "grad_norm": 1.254622691077732, + "learning_rate": 9.977889003727647e-08, + "loss": 0.22551512718200684, + "step": 7211 + }, + { + "epoch": 1.9151507103970258, + "grad_norm": 1.233084698895248, + "learning_rate": 9.91611324079489e-08, + "loss": 0.24224743247032166, + "step": 7212 + }, + { + "epoch": 1.9154162793785687, + "grad_norm": 1.2426176239380708, + "learning_rate": 9.854528354358517e-08, + "loss": 0.19550879299640656, + "step": 7213 + }, + { + "epoch": 1.9156818483601117, + "grad_norm": 1.3449782320604147, + "learning_rate": 9.793134356291478e-08, + "loss": 0.24986523389816284, + "step": 7214 + }, + { + "epoch": 1.9159474173416546, + "grad_norm": 1.3340583070384961, + "learning_rate": 9.731931258429638e-08, + "loss": 0.2565170228481293, + "step": 7215 + }, + { + "epoch": 1.9162129863231976, + "grad_norm": 1.185156912642083, + "learning_rate": 9.670919072572449e-08, + "loss": 0.2166958749294281, + "step": 7216 + }, + { + "epoch": 1.9164785553047405, + "grad_norm": 1.2903999319183896, + "learning_rate": 9.610097810482166e-08, + "loss": 0.2002115249633789, + "step": 7217 + }, + { + "epoch": 1.9167441242862835, + "grad_norm": 1.1589813054229285, + "learning_rate": 9.549467483884412e-08, + "loss": 0.209486186504364, + "step": 7218 + }, + { + "epoch": 1.9170096932678264, + "grad_norm": 1.2748483155423624, + "learning_rate": 9.489028104468056e-08, + "loss": 0.22061321139335632, + "step": 7219 + }, + { + "epoch": 1.9172752622493694, + "grad_norm": 1.3916500275624957, + "learning_rate": 9.428779683885114e-08, + "loss": 0.21880047023296356, + "step": 7220 + }, + { + "epoch": 1.9175408312309123, + "grad_norm": 1.174801358834737, + "learning_rate": 9.368722233750849e-08, + "loss": 0.22674325108528137, + "step": 7221 + }, + { + "epoch": 1.9178064002124553, + "grad_norm": 1.2877078963500264, + "learning_rate": 9.308855765643332e-08, + "loss": 0.22100718319416046, + "step": 7222 + }, + { + "epoch": 1.9180719691939982, + "grad_norm": 1.3291196619762962, + "learning_rate": 9.249180291104553e-08, + "loss": 0.23105254769325256, + "step": 7223 + }, + { + "epoch": 1.9183375381755412, + "grad_norm": 1.2897395451200044, + "learning_rate": 9.189695821638755e-08, + "loss": 0.22483405470848083, + "step": 7224 + }, + { + "epoch": 1.9186031071570842, + "grad_norm": 1.0701399001286365, + "learning_rate": 9.130402368714208e-08, + "loss": 0.1939004510641098, + "step": 7225 + }, + { + "epoch": 1.918868676138627, + "grad_norm": 1.2349263677236755, + "learning_rate": 9.071299943761769e-08, + "loss": 0.21722440421581268, + "step": 7226 + }, + { + "epoch": 1.91913424512017, + "grad_norm": 1.2911544131515666, + "learning_rate": 9.012388558175877e-08, + "loss": 0.24213966727256775, + "step": 7227 + }, + { + "epoch": 1.919399814101713, + "grad_norm": 1.2266941536480729, + "learning_rate": 8.953668223313783e-08, + "loss": 0.2305546998977661, + "step": 7228 + }, + { + "epoch": 1.919665383083256, + "grad_norm": 1.3932840646040938, + "learning_rate": 8.895138950496207e-08, + "loss": 0.2678033709526062, + "step": 7229 + }, + { + "epoch": 1.919930952064799, + "grad_norm": 1.2449965535251106, + "learning_rate": 8.836800751006791e-08, + "loss": 0.2491014301776886, + "step": 7230 + }, + { + "epoch": 1.9201965210463419, + "grad_norm": 1.2551836576043742, + "learning_rate": 8.778653636092537e-08, + "loss": 0.21837326884269714, + "step": 7231 + }, + { + "epoch": 1.9204620900278848, + "grad_norm": 1.2745391136427304, + "learning_rate": 8.72069761696348e-08, + "loss": 0.24149999022483826, + "step": 7232 + }, + { + "epoch": 1.9207276590094278, + "grad_norm": 1.3444140835580012, + "learning_rate": 8.662932704792793e-08, + "loss": 0.2124684453010559, + "step": 7233 + }, + { + "epoch": 1.9209932279909707, + "grad_norm": 1.3660213009765734, + "learning_rate": 8.60535891071712e-08, + "loss": 0.2452150285243988, + "step": 7234 + }, + { + "epoch": 1.9212587969725137, + "grad_norm": 1.2005299446152509, + "learning_rate": 8.547976245835698e-08, + "loss": 0.23598846793174744, + "step": 7235 + }, + { + "epoch": 1.9215243659540566, + "grad_norm": 1.3152974069295431, + "learning_rate": 8.490784721211454e-08, + "loss": 0.2105225920677185, + "step": 7236 + }, + { + "epoch": 1.9217899349355996, + "grad_norm": 1.4424977304862223, + "learning_rate": 8.433784347870122e-08, + "loss": 0.2585388720035553, + "step": 7237 + }, + { + "epoch": 1.9220555039171425, + "grad_norm": 1.2300698994172445, + "learning_rate": 8.376975136800691e-08, + "loss": 0.21703900396823883, + "step": 7238 + }, + { + "epoch": 1.9223210728986855, + "grad_norm": 1.2580366958382383, + "learning_rate": 8.3203570989554e-08, + "loss": 0.22771210968494415, + "step": 7239 + }, + { + "epoch": 1.9225866418802284, + "grad_norm": 1.1645003525207898, + "learning_rate": 8.263930245249408e-08, + "loss": 0.22535575926303864, + "step": 7240 + }, + { + "epoch": 1.9228522108617714, + "grad_norm": 1.1822452042500315, + "learning_rate": 8.207694586561344e-08, + "loss": 0.2052595466375351, + "step": 7241 + }, + { + "epoch": 1.9231177798433143, + "grad_norm": 1.2683012213528768, + "learning_rate": 8.151650133732536e-08, + "loss": 0.19611456990242004, + "step": 7242 + }, + { + "epoch": 1.9233833488248573, + "grad_norm": 1.2762939262923303, + "learning_rate": 8.095796897567787e-08, + "loss": 0.20256826281547546, + "step": 7243 + }, + { + "epoch": 1.9236489178064002, + "grad_norm": 1.5444723931343434, + "learning_rate": 8.040134888835038e-08, + "loss": 0.25462138652801514, + "step": 7244 + }, + { + "epoch": 1.9239144867879432, + "grad_norm": 1.2813246309729553, + "learning_rate": 7.984664118265262e-08, + "loss": 0.27362316846847534, + "step": 7245 + }, + { + "epoch": 1.9241800557694861, + "grad_norm": 1.3526739723939418, + "learning_rate": 7.929384596552459e-08, + "loss": 0.23749098181724548, + "step": 7246 + }, + { + "epoch": 1.924445624751029, + "grad_norm": 1.3016147885306604, + "learning_rate": 7.874296334353882e-08, + "loss": 0.2472018599510193, + "step": 7247 + }, + { + "epoch": 1.924711193732572, + "grad_norm": 1.3451463766339227, + "learning_rate": 7.819399342290034e-08, + "loss": 0.23181989789009094, + "step": 7248 + }, + { + "epoch": 1.924976762714115, + "grad_norm": 1.2415200588572097, + "learning_rate": 7.764693630944231e-08, + "loss": 0.21363665163516998, + "step": 7249 + }, + { + "epoch": 1.925242331695658, + "grad_norm": 1.1849821155034532, + "learning_rate": 7.710179210863144e-08, + "loss": 0.21239221096038818, + "step": 7250 + }, + { + "epoch": 1.925507900677201, + "grad_norm": 1.4494720585200522, + "learning_rate": 7.655856092556591e-08, + "loss": 0.2643742263317108, + "step": 7251 + }, + { + "epoch": 1.9257734696587439, + "grad_norm": 1.251877664981762, + "learning_rate": 7.601724286497414e-08, + "loss": 0.2232428789138794, + "step": 7252 + }, + { + "epoch": 1.9260390386402868, + "grad_norm": 1.313277386530887, + "learning_rate": 7.547783803121489e-08, + "loss": 0.2052377462387085, + "step": 7253 + }, + { + "epoch": 1.9263046076218298, + "grad_norm": 1.2540878413614547, + "learning_rate": 7.494034652827942e-08, + "loss": 0.22194740176200867, + "step": 7254 + }, + { + "epoch": 1.9265701766033727, + "grad_norm": 1.2500554609811554, + "learning_rate": 7.440476845979038e-08, + "loss": 0.22004084289073944, + "step": 7255 + }, + { + "epoch": 1.9268357455849157, + "grad_norm": 1.5480704193409933, + "learning_rate": 7.387110392899965e-08, + "loss": 0.2218078374862671, + "step": 7256 + }, + { + "epoch": 1.9271013145664586, + "grad_norm": 1.3006193889830067, + "learning_rate": 7.33393530387927e-08, + "loss": 0.23272839188575745, + "step": 7257 + }, + { + "epoch": 1.9273668835480016, + "grad_norm": 1.3119971487868216, + "learning_rate": 7.280951589168417e-08, + "loss": 0.23666653037071228, + "step": 7258 + }, + { + "epoch": 1.9276324525295445, + "grad_norm": 1.235294099691234, + "learning_rate": 7.228159258982126e-08, + "loss": 0.21946533024311066, + "step": 7259 + }, + { + "epoch": 1.9278980215110875, + "grad_norm": 1.252328485116134, + "learning_rate": 7.175558323498033e-08, + "loss": 0.22158634662628174, + "step": 7260 + }, + { + "epoch": 1.9281635904926304, + "grad_norm": 1.1330771135999202, + "learning_rate": 7.123148792857026e-08, + "loss": 0.19978654384613037, + "step": 7261 + }, + { + "epoch": 1.9284291594741734, + "grad_norm": 1.2859436875650823, + "learning_rate": 7.070930677163023e-08, + "loss": 0.21197813749313354, + "step": 7262 + }, + { + "epoch": 1.9286947284557163, + "grad_norm": 1.2611518825786316, + "learning_rate": 7.018903986483083e-08, + "loss": 0.22650468349456787, + "step": 7263 + }, + { + "epoch": 1.9289602974372593, + "grad_norm": 1.2701948406662635, + "learning_rate": 6.967068730847293e-08, + "loss": 0.22257481515407562, + "step": 7264 + }, + { + "epoch": 1.9292258664188022, + "grad_norm": 1.3219742856760701, + "learning_rate": 6.915424920248992e-08, + "loss": 0.24899804592132568, + "step": 7265 + }, + { + "epoch": 1.9294914354003452, + "grad_norm": 1.2996576951077934, + "learning_rate": 6.863972564644328e-08, + "loss": 0.250610888004303, + "step": 7266 + }, + { + "epoch": 1.9297570043818881, + "grad_norm": 1.251137163804366, + "learning_rate": 6.81271167395292e-08, + "loss": 0.22786292433738708, + "step": 7267 + }, + { + "epoch": 1.930022573363431, + "grad_norm": 1.2890465128808872, + "learning_rate": 6.761642258056977e-08, + "loss": 0.22816789150238037, + "step": 7268 + }, + { + "epoch": 1.930288142344974, + "grad_norm": 1.3522601458627446, + "learning_rate": 6.7107643268024e-08, + "loss": 0.2589687407016754, + "step": 7269 + }, + { + "epoch": 1.930553711326517, + "grad_norm": 1.1963236616697677, + "learning_rate": 6.660077889997673e-08, + "loss": 0.2281583547592163, + "step": 7270 + }, + { + "epoch": 1.93081928030806, + "grad_norm": 1.3347065729182181, + "learning_rate": 6.60958295741454e-08, + "loss": 0.22833740711212158, + "step": 7271 + }, + { + "epoch": 1.931084849289603, + "grad_norm": 1.1611313283452582, + "learning_rate": 6.559279538787877e-08, + "loss": 0.20720313489437103, + "step": 7272 + }, + { + "epoch": 1.9313504182711458, + "grad_norm": 1.1884544288263172, + "learning_rate": 6.509167643815594e-08, + "loss": 0.17191773653030396, + "step": 7273 + }, + { + "epoch": 1.9316159872526888, + "grad_norm": 1.1354230474675757, + "learning_rate": 6.459247282158632e-08, + "loss": 0.23586943745613098, + "step": 7274 + }, + { + "epoch": 1.9318815562342317, + "grad_norm": 1.3318856895013969, + "learning_rate": 6.409518463441067e-08, + "loss": 0.21353168785572052, + "step": 7275 + }, + { + "epoch": 1.9321471252157747, + "grad_norm": 1.404937308132313, + "learning_rate": 6.359981197250009e-08, + "loss": 0.23148195445537567, + "step": 7276 + }, + { + "epoch": 1.9324126941973176, + "grad_norm": 1.3040478141172254, + "learning_rate": 6.310635493135709e-08, + "loss": 0.2113666534423828, + "step": 7277 + }, + { + "epoch": 1.9326782631788606, + "grad_norm": 1.3399999009479682, + "learning_rate": 6.261481360611332e-08, + "loss": 0.27689510583877563, + "step": 7278 + }, + { + "epoch": 1.9329438321604036, + "grad_norm": 1.2809237898551964, + "learning_rate": 6.2125188091533e-08, + "loss": 0.23746277391910553, + "step": 7279 + }, + { + "epoch": 1.9332094011419465, + "grad_norm": 1.4215326252349767, + "learning_rate": 6.163747848201062e-08, + "loss": 0.23123708367347717, + "step": 7280 + }, + { + "epoch": 1.9334749701234895, + "grad_norm": 1.3095914464878196, + "learning_rate": 6.115168487157097e-08, + "loss": 0.23640167713165283, + "step": 7281 + }, + { + "epoch": 1.9337405391050324, + "grad_norm": 1.3278235730632808, + "learning_rate": 6.066780735386801e-08, + "loss": 0.2259385585784912, + "step": 7282 + }, + { + "epoch": 1.9340061080865754, + "grad_norm": 1.230137664492021, + "learning_rate": 6.018584602218824e-08, + "loss": 0.219761461019516, + "step": 7283 + }, + { + "epoch": 1.9342716770681183, + "grad_norm": 1.43054331413576, + "learning_rate": 5.970580096944733e-08, + "loss": 0.24411989748477936, + "step": 7284 + }, + { + "epoch": 1.9345372460496613, + "grad_norm": 1.196712051616964, + "learning_rate": 5.922767228819459e-08, + "loss": 0.232415571808815, + "step": 7285 + }, + { + "epoch": 1.9348028150312042, + "grad_norm": 1.341424963494065, + "learning_rate": 5.875146007060517e-08, + "loss": 0.25938165187835693, + "step": 7286 + }, + { + "epoch": 1.9350683840127472, + "grad_norm": 1.253589726996753, + "learning_rate": 5.827716440848785e-08, + "loss": 0.22138425707817078, + "step": 7287 + }, + { + "epoch": 1.9353339529942901, + "grad_norm": 1.12038038288381, + "learning_rate": 5.7804785393282825e-08, + "loss": 0.19724398851394653, + "step": 7288 + }, + { + "epoch": 1.935599521975833, + "grad_norm": 1.4840167690508577, + "learning_rate": 5.7334323116056136e-08, + "loss": 0.25307583808898926, + "step": 7289 + }, + { + "epoch": 1.935865090957376, + "grad_norm": 1.2525903433235852, + "learning_rate": 5.686577766751078e-08, + "loss": 0.2436421811580658, + "step": 7290 + }, + { + "epoch": 1.936130659938919, + "grad_norm": 1.2518328182394873, + "learning_rate": 5.6399149137973394e-08, + "loss": 0.2164984941482544, + "step": 7291 + }, + { + "epoch": 1.936396228920462, + "grad_norm": 1.2277499731042363, + "learning_rate": 5.5934437617407576e-08, + "loss": 0.22526800632476807, + "step": 7292 + }, + { + "epoch": 1.936661797902005, + "grad_norm": 2.195756796154145, + "learning_rate": 5.547164319540277e-08, + "loss": 0.27787747979164124, + "step": 7293 + }, + { + "epoch": 1.936927366883548, + "grad_norm": 1.2647979578451993, + "learning_rate": 5.5010765961179825e-08, + "loss": 0.2188001275062561, + "step": 7294 + }, + { + "epoch": 1.937192935865091, + "grad_norm": 1.2454775538056309, + "learning_rate": 5.4551806003591e-08, + "loss": 0.22620335221290588, + "step": 7295 + }, + { + "epoch": 1.937458504846634, + "grad_norm": 1.186081247005514, + "learning_rate": 5.409476341111775e-08, + "loss": 0.20357783138751984, + "step": 7296 + }, + { + "epoch": 1.937724073828177, + "grad_norm": 1.2316030990526627, + "learning_rate": 5.3639638271872906e-08, + "loss": 0.22717830538749695, + "step": 7297 + }, + { + "epoch": 1.9379896428097199, + "grad_norm": 1.1600371116406252, + "learning_rate": 5.318643067360074e-08, + "loss": 0.20139163732528687, + "step": 7298 + }, + { + "epoch": 1.9382552117912628, + "grad_norm": 1.3377291184643103, + "learning_rate": 5.273514070367247e-08, + "loss": 0.2620807886123657, + "step": 7299 + }, + { + "epoch": 1.9385207807728058, + "grad_norm": 1.2240680803779018, + "learning_rate": 5.2285768449091834e-08, + "loss": 0.2102596014738083, + "step": 7300 + }, + { + "epoch": 1.9387863497543487, + "grad_norm": 1.3057613284367482, + "learning_rate": 5.183831399649175e-08, + "loss": 0.2105238288640976, + "step": 7301 + }, + { + "epoch": 1.9390519187358917, + "grad_norm": 1.2241670740951547, + "learning_rate": 5.1392777432138773e-08, + "loss": 0.22178848087787628, + "step": 7302 + }, + { + "epoch": 1.9393174877174346, + "grad_norm": 1.3648564311332518, + "learning_rate": 5.094915884192419e-08, + "loss": 0.23375345766544342, + "step": 7303 + }, + { + "epoch": 1.9395830566989776, + "grad_norm": 1.3411332724549108, + "learning_rate": 5.050745831137405e-08, + "loss": 0.22709332406520844, + "step": 7304 + }, + { + "epoch": 1.9398486256805205, + "grad_norm": 1.270429998105922, + "learning_rate": 5.0067675925642437e-08, + "loss": 0.2312362790107727, + "step": 7305 + }, + { + "epoch": 1.9401141946620635, + "grad_norm": 1.159162680689607, + "learning_rate": 4.962981176951376e-08, + "loss": 0.2014419138431549, + "step": 7306 + }, + { + "epoch": 1.9403797636436064, + "grad_norm": 1.4294147842238243, + "learning_rate": 4.9193865927404936e-08, + "loss": 0.23700466752052307, + "step": 7307 + }, + { + "epoch": 1.9406453326251494, + "grad_norm": 1.3814639969092575, + "learning_rate": 4.8759838483358745e-08, + "loss": 0.23362770676612854, + "step": 7308 + }, + { + "epoch": 1.9409109016066923, + "grad_norm": 1.4217349736822034, + "learning_rate": 4.832772952105269e-08, + "loss": 0.26057323813438416, + "step": 7309 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.1693504727058668, + "learning_rate": 4.789753912379014e-08, + "loss": 0.20954950153827667, + "step": 7310 + }, + { + "epoch": 1.9414420395697782, + "grad_norm": 1.1532528532836688, + "learning_rate": 4.746926737450919e-08, + "loss": 0.2100827842950821, + "step": 7311 + }, + { + "epoch": 1.9417076085513212, + "grad_norm": 1.2509560196931713, + "learning_rate": 4.7042914355773795e-08, + "loss": 0.216691792011261, + "step": 7312 + }, + { + "epoch": 1.9419731775328641, + "grad_norm": 1.2086430330598397, + "learning_rate": 4.6618480149780434e-08, + "loss": 0.22815749049186707, + "step": 7313 + }, + { + "epoch": 1.942238746514407, + "grad_norm": 1.3440658280324072, + "learning_rate": 4.6195964838353646e-08, + "loss": 0.23365731537342072, + "step": 7314 + }, + { + "epoch": 1.94250431549595, + "grad_norm": 1.5301363693806977, + "learning_rate": 4.577536850295161e-08, + "loss": 0.2112172693014145, + "step": 7315 + }, + { + "epoch": 1.942769884477493, + "grad_norm": 1.1945701714854287, + "learning_rate": 4.5356691224659466e-08, + "loss": 0.21821950376033783, + "step": 7316 + }, + { + "epoch": 1.943035453459036, + "grad_norm": 1.1491339078592526, + "learning_rate": 4.4939933084192646e-08, + "loss": 0.2374412566423416, + "step": 7317 + }, + { + "epoch": 1.943301022440579, + "grad_norm": 1.3549046355713708, + "learning_rate": 4.4525094161897987e-08, + "loss": 0.2483779489994049, + "step": 7318 + }, + { + "epoch": 1.9435665914221218, + "grad_norm": 1.327945477663327, + "learning_rate": 4.411217453775152e-08, + "loss": 0.23641882836818695, + "step": 7319 + }, + { + "epoch": 1.9438321604036648, + "grad_norm": 1.3586245026219714, + "learning_rate": 4.370117429135956e-08, + "loss": 0.24779492616653442, + "step": 7320 + }, + { + "epoch": 1.944097729385208, + "grad_norm": 1.1641395539357577, + "learning_rate": 4.329209350195651e-08, + "loss": 0.20288071036338806, + "step": 7321 + }, + { + "epoch": 1.944363298366751, + "grad_norm": 1.2676649817410126, + "learning_rate": 4.288493224840928e-08, + "loss": 0.24286144971847534, + "step": 7322 + }, + { + "epoch": 1.9446288673482939, + "grad_norm": 1.3164985028745375, + "learning_rate": 4.2479690609213976e-08, + "loss": 0.22825902700424194, + "step": 7323 + }, + { + "epoch": 1.9448944363298368, + "grad_norm": 1.255280762331411, + "learning_rate": 4.207636866249587e-08, + "loss": 0.22563335299491882, + "step": 7324 + }, + { + "epoch": 1.9451600053113798, + "grad_norm": 1.2990544857906836, + "learning_rate": 4.167496648601166e-08, + "loss": 0.22853273153305054, + "step": 7325 + }, + { + "epoch": 1.9454255742929227, + "grad_norm": 1.1281442356079434, + "learning_rate": 4.1275484157147216e-08, + "loss": 0.20790672302246094, + "step": 7326 + }, + { + "epoch": 1.9456911432744657, + "grad_norm": 1.1980029703513235, + "learning_rate": 4.087792175291649e-08, + "loss": 0.2165423035621643, + "step": 7327 + }, + { + "epoch": 1.9459567122560086, + "grad_norm": 1.3858946395294593, + "learning_rate": 4.048227934996485e-08, + "loss": 0.2605394721031189, + "step": 7328 + }, + { + "epoch": 1.9462222812375516, + "grad_norm": 1.280554987273632, + "learning_rate": 4.008855702456904e-08, + "loss": 0.22624900937080383, + "step": 7329 + }, + { + "epoch": 1.9464878502190945, + "grad_norm": 1.1967949808184344, + "learning_rate": 3.9696754852632804e-08, + "loss": 0.23086196184158325, + "step": 7330 + }, + { + "epoch": 1.9467534192006375, + "grad_norm": 1.4330145211347993, + "learning_rate": 3.9306872909691265e-08, + "loss": 0.24633410573005676, + "step": 7331 + }, + { + "epoch": 1.9470189881821804, + "grad_norm": 2.2568432653955894, + "learning_rate": 3.8918911270908745e-08, + "loss": 0.2535535395145416, + "step": 7332 + }, + { + "epoch": 1.9472845571637234, + "grad_norm": 1.3555855555438505, + "learning_rate": 3.853287001108097e-08, + "loss": 0.23904260993003845, + "step": 7333 + }, + { + "epoch": 1.9475501261452663, + "grad_norm": 1.3963340527453718, + "learning_rate": 3.814874920463063e-08, + "loss": 0.22525179386138916, + "step": 7334 + }, + { + "epoch": 1.9478156951268093, + "grad_norm": 1.415360473918547, + "learning_rate": 3.776654892561293e-08, + "loss": 0.21139883995056152, + "step": 7335 + }, + { + "epoch": 1.9480812641083523, + "grad_norm": 1.2272269269066283, + "learning_rate": 3.738626924771005e-08, + "loss": 0.21939310431480408, + "step": 7336 + }, + { + "epoch": 1.9483468330898952, + "grad_norm": 1.1845473795192814, + "learning_rate": 3.7007910244236664e-08, + "loss": 0.22852283716201782, + "step": 7337 + }, + { + "epoch": 1.9486124020714382, + "grad_norm": 1.2529721413425112, + "learning_rate": 3.663147198813666e-08, + "loss": 0.20769211649894714, + "step": 7338 + }, + { + "epoch": 1.948877971052981, + "grad_norm": 1.216093250313145, + "learning_rate": 3.625695455198086e-08, + "loss": 0.21721890568733215, + "step": 7339 + }, + { + "epoch": 1.949143540034524, + "grad_norm": 1.261493312403511, + "learning_rate": 3.588435800797263e-08, + "loss": 0.24236848950386047, + "step": 7340 + }, + { + "epoch": 1.949409109016067, + "grad_norm": 1.21142050375974, + "learning_rate": 3.5513682427944505e-08, + "loss": 0.2300192266702652, + "step": 7341 + }, + { + "epoch": 1.94967467799761, + "grad_norm": 1.1850825722481098, + "learning_rate": 3.5144927883358215e-08, + "loss": 0.21636728942394257, + "step": 7342 + }, + { + "epoch": 1.949940246979153, + "grad_norm": 1.3000939007920165, + "learning_rate": 3.477809444530578e-08, + "loss": 0.25367966294288635, + "step": 7343 + }, + { + "epoch": 1.9502058159606959, + "grad_norm": 1.4245768388392126, + "learning_rate": 3.4413182184507285e-08, + "loss": 0.24514247477054596, + "step": 7344 + }, + { + "epoch": 1.9504713849422388, + "grad_norm": 1.1048557155163508, + "learning_rate": 3.405019117131425e-08, + "loss": 0.18460404872894287, + "step": 7345 + }, + { + "epoch": 1.9507369539237818, + "grad_norm": 1.275062396510646, + "learning_rate": 3.3689121475706244e-08, + "loss": 0.2096845805644989, + "step": 7346 + }, + { + "epoch": 1.9510025229053247, + "grad_norm": 1.2314050158221594, + "learning_rate": 3.332997316729536e-08, + "loss": 0.22435057163238525, + "step": 7347 + }, + { + "epoch": 1.9512680918868677, + "grad_norm": 1.208912476805739, + "learning_rate": 3.2972746315318436e-08, + "loss": 0.20798128843307495, + "step": 7348 + }, + { + "epoch": 1.9515336608684106, + "grad_norm": 1.2922181556866412, + "learning_rate": 3.2617440988645945e-08, + "loss": 0.23958316445350647, + "step": 7349 + }, + { + "epoch": 1.9517992298499536, + "grad_norm": 1.3799363972113297, + "learning_rate": 3.2264057255777525e-08, + "loss": 0.21934574842453003, + "step": 7350 + }, + { + "epoch": 1.9520647988314965, + "grad_norm": 1.2014453671941887, + "learning_rate": 3.1912595184839804e-08, + "loss": 0.24321375787258148, + "step": 7351 + }, + { + "epoch": 1.9523303678130395, + "grad_norm": 1.1661737247347086, + "learning_rate": 3.156305484359079e-08, + "loss": 0.20932736992835999, + "step": 7352 + }, + { + "epoch": 1.9525959367945824, + "grad_norm": 1.2983329607047998, + "learning_rate": 3.12154362994177e-08, + "loss": 0.19824840128421783, + "step": 7353 + }, + { + "epoch": 1.9528615057761254, + "grad_norm": 1.3128795915591134, + "learning_rate": 3.0869739619338034e-08, + "loss": 0.212745800614357, + "step": 7354 + }, + { + "epoch": 1.9531270747576683, + "grad_norm": 1.247129470001585, + "learning_rate": 3.0525964869997374e-08, + "loss": 0.23044779896736145, + "step": 7355 + }, + { + "epoch": 1.9533926437392113, + "grad_norm": 1.2323689907378315, + "learning_rate": 3.018411211767158e-08, + "loss": 0.2237459123134613, + "step": 7356 + }, + { + "epoch": 1.9536582127207542, + "grad_norm": 1.3228713238231502, + "learning_rate": 2.984418142826684e-08, + "loss": 0.2592429518699646, + "step": 7357 + }, + { + "epoch": 1.9539237817022972, + "grad_norm": 1.1444806738907807, + "learning_rate": 2.9506172867315163e-08, + "loss": 0.17559123039245605, + "step": 7358 + }, + { + "epoch": 1.9541893506838401, + "grad_norm": 1.287127142439038, + "learning_rate": 2.917008649998332e-08, + "loss": 0.24143017828464508, + "step": 7359 + }, + { + "epoch": 1.954454919665383, + "grad_norm": 1.310526275865734, + "learning_rate": 2.883592239106392e-08, + "loss": 0.23560799658298492, + "step": 7360 + }, + { + "epoch": 1.954720488646926, + "grad_norm": 1.357586181070064, + "learning_rate": 2.8503680604979878e-08, + "loss": 0.2456119805574417, + "step": 7361 + }, + { + "epoch": 1.954986057628469, + "grad_norm": 1.2143945666113656, + "learning_rate": 2.817336120578329e-08, + "loss": 0.21878069639205933, + "step": 7362 + }, + { + "epoch": 1.955251626610012, + "grad_norm": 1.2288786099560105, + "learning_rate": 2.7844964257155438e-08, + "loss": 0.20496608316898346, + "step": 7363 + }, + { + "epoch": 1.955517195591555, + "grad_norm": 1.2067776880816419, + "learning_rate": 2.7518489822407902e-08, + "loss": 0.23219498991966248, + "step": 7364 + }, + { + "epoch": 1.9557827645730979, + "grad_norm": 1.3499865013336032, + "learning_rate": 2.7193937964481442e-08, + "loss": 0.2284272015094757, + "step": 7365 + }, + { + "epoch": 1.9560483335546408, + "grad_norm": 1.3177047034961433, + "learning_rate": 2.68713087459449e-08, + "loss": 0.22303974628448486, + "step": 7366 + }, + { + "epoch": 1.9563139025361838, + "grad_norm": 1.337791009624748, + "learning_rate": 2.655060222899741e-08, + "loss": 0.22489243745803833, + "step": 7367 + }, + { + "epoch": 1.9565794715177267, + "grad_norm": 1.2719472133739602, + "learning_rate": 2.6231818475468407e-08, + "loss": 0.27986854314804077, + "step": 7368 + }, + { + "epoch": 1.9568450404992697, + "grad_norm": 1.3884495118427658, + "learning_rate": 2.591495754681539e-08, + "loss": 0.29321208596229553, + "step": 7369 + }, + { + "epoch": 1.9571106094808126, + "grad_norm": 1.3942541242432065, + "learning_rate": 2.5600019504125053e-08, + "loss": 0.2560982406139374, + "step": 7370 + }, + { + "epoch": 1.9573761784623556, + "grad_norm": 1.4283472016053, + "learning_rate": 2.528700440811438e-08, + "loss": 0.264164537191391, + "step": 7371 + }, + { + "epoch": 1.9576417474438985, + "grad_norm": 1.1832183058517125, + "learning_rate": 2.4975912319127326e-08, + "loss": 0.2135474979877472, + "step": 7372 + }, + { + "epoch": 1.9579073164254415, + "grad_norm": 1.265205421311282, + "learning_rate": 2.466674329714036e-08, + "loss": 0.2100939154624939, + "step": 7373 + }, + { + "epoch": 1.9581728854069844, + "grad_norm": 1.395586955333931, + "learning_rate": 2.4359497401758026e-08, + "loss": 0.23327934741973877, + "step": 7374 + }, + { + "epoch": 1.9584384543885274, + "grad_norm": 1.0722904974981595, + "learning_rate": 2.405417469221183e-08, + "loss": 0.18830639123916626, + "step": 7375 + }, + { + "epoch": 1.9587040233700703, + "grad_norm": 1.284092871282835, + "learning_rate": 2.3750775227364686e-08, + "loss": 0.2558823227882385, + "step": 7376 + }, + { + "epoch": 1.9589695923516133, + "grad_norm": 1.2598399224501151, + "learning_rate": 2.3449299065710917e-08, + "loss": 0.24241580069065094, + "step": 7377 + }, + { + "epoch": 1.9592351613331562, + "grad_norm": 1.1684337819721369, + "learning_rate": 2.3149746265368478e-08, + "loss": 0.21678534150123596, + "step": 7378 + }, + { + "epoch": 1.9595007303146992, + "grad_norm": 1.2804084693654512, + "learning_rate": 2.2852116884088947e-08, + "loss": 0.20956794917583466, + "step": 7379 + }, + { + "epoch": 1.9597662992962421, + "grad_norm": 1.2682321373225172, + "learning_rate": 2.2556410979253095e-08, + "loss": 0.2185555249452591, + "step": 7380 + }, + { + "epoch": 1.960031868277785, + "grad_norm": 1.3369178147645102, + "learning_rate": 2.226262860786643e-08, + "loss": 0.21802933514118195, + "step": 7381 + }, + { + "epoch": 1.960297437259328, + "grad_norm": 1.4565773631347612, + "learning_rate": 2.1970769826570317e-08, + "loss": 0.22842684388160706, + "step": 7382 + }, + { + "epoch": 1.960563006240871, + "grad_norm": 1.2737807469252465, + "learning_rate": 2.1680834691628627e-08, + "loss": 0.23380814492702484, + "step": 7383 + }, + { + "epoch": 1.960828575222414, + "grad_norm": 1.311531421948895, + "learning_rate": 2.1392823258938877e-08, + "loss": 0.23476335406303406, + "step": 7384 + }, + { + "epoch": 1.961094144203957, + "grad_norm": 1.2100451325455786, + "learning_rate": 2.110673558402554e-08, + "loss": 0.19657662510871887, + "step": 7385 + }, + { + "epoch": 1.9613597131854998, + "grad_norm": 1.191542044024077, + "learning_rate": 2.0822571722044494e-08, + "loss": 0.1724000722169876, + "step": 7386 + }, + { + "epoch": 1.9616252821670428, + "grad_norm": 1.3535695538712786, + "learning_rate": 2.0540331727777475e-08, + "loss": 0.22960031032562256, + "step": 7387 + }, + { + "epoch": 1.9618908511485857, + "grad_norm": 1.4028518726902017, + "learning_rate": 2.0260015655637623e-08, + "loss": 0.2601638436317444, + "step": 7388 + }, + { + "epoch": 1.9621564201301287, + "grad_norm": 1.3907771240802078, + "learning_rate": 1.998162355966726e-08, + "loss": 0.2562445402145386, + "step": 7389 + }, + { + "epoch": 1.9624219891116716, + "grad_norm": 1.1881922077977833, + "learning_rate": 1.9705155493535688e-08, + "loss": 0.20073221623897552, + "step": 7390 + }, + { + "epoch": 1.9626875580932146, + "grad_norm": 1.2076860773847395, + "learning_rate": 1.9430611510544707e-08, + "loss": 0.18454071879386902, + "step": 7391 + }, + { + "epoch": 1.9629531270747576, + "grad_norm": 1.1878203901407238, + "learning_rate": 1.915799166362087e-08, + "loss": 0.18515023589134216, + "step": 7392 + }, + { + "epoch": 1.9632186960563005, + "grad_norm": 1.3323308983960227, + "learning_rate": 1.8887296005323242e-08, + "loss": 0.25658512115478516, + "step": 7393 + }, + { + "epoch": 1.9634842650378435, + "grad_norm": 1.4122913637661163, + "learning_rate": 1.861852458783897e-08, + "loss": 0.2219933569431305, + "step": 7394 + }, + { + "epoch": 1.9637498340193864, + "grad_norm": 1.3005286775146463, + "learning_rate": 1.8351677462983276e-08, + "loss": 0.24949616193771362, + "step": 7395 + }, + { + "epoch": 1.9640154030009294, + "grad_norm": 1.4026906711741571, + "learning_rate": 1.808675468220167e-08, + "loss": 0.24348726868629456, + "step": 7396 + }, + { + "epoch": 1.9642809719824723, + "grad_norm": 1.3848607909391346, + "learning_rate": 1.782375629656885e-08, + "loss": 0.2329033762216568, + "step": 7397 + }, + { + "epoch": 1.9645465409640153, + "grad_norm": 1.2075544796662319, + "learning_rate": 1.7562682356786488e-08, + "loss": 0.22265426814556122, + "step": 7398 + }, + { + "epoch": 1.9648121099455582, + "grad_norm": 1.2895787739524316, + "learning_rate": 1.730353291318654e-08, + "loss": 0.24438990652561188, + "step": 7399 + }, + { + "epoch": 1.9650776789271012, + "grad_norm": 1.3518107746112518, + "learning_rate": 1.704630801573015e-08, + "loss": 0.2632136642932892, + "step": 7400 + }, + { + "epoch": 1.9653432479086441, + "grad_norm": 1.3377019916165274, + "learning_rate": 1.6791007714008766e-08, + "loss": 0.22230927646160126, + "step": 7401 + }, + { + "epoch": 1.965608816890187, + "grad_norm": 1.3577982430958546, + "learning_rate": 1.653763205723968e-08, + "loss": 0.26317098736763, + "step": 7402 + }, + { + "epoch": 1.96587438587173, + "grad_norm": 1.3261620865973216, + "learning_rate": 1.628618109427049e-08, + "loss": 0.23205846548080444, + "step": 7403 + }, + { + "epoch": 1.966139954853273, + "grad_norm": 1.1507090645553337, + "learning_rate": 1.6036654873579084e-08, + "loss": 0.202583909034729, + "step": 7404 + }, + { + "epoch": 1.966405523834816, + "grad_norm": 1.3959078486467311, + "learning_rate": 1.5789053443270308e-08, + "loss": 0.2579672038555145, + "step": 7405 + }, + { + "epoch": 1.966671092816359, + "grad_norm": 1.4293268160842907, + "learning_rate": 1.5543376851080428e-08, + "loss": 0.27483606338500977, + "step": 7406 + }, + { + "epoch": 1.966936661797902, + "grad_norm": 1.6466914863601023, + "learning_rate": 1.5299625144370444e-08, + "loss": 0.22510311007499695, + "step": 7407 + }, + { + "epoch": 1.967202230779445, + "grad_norm": 1.3926470224592478, + "learning_rate": 1.505779837013499e-08, + "loss": 0.24941131472587585, + "step": 7408 + }, + { + "epoch": 1.967467799760988, + "grad_norm": 1.316826202799614, + "learning_rate": 1.481789657499344e-08, + "loss": 0.22301170229911804, + "step": 7409 + }, + { + "epoch": 1.967733368742531, + "grad_norm": 1.4513024231529628, + "learning_rate": 1.4579919805198795e-08, + "loss": 0.23045194149017334, + "step": 7410 + }, + { + "epoch": 1.9679989377240739, + "grad_norm": 1.2632313332378347, + "learning_rate": 1.4343868106627689e-08, + "loss": 0.25892990827560425, + "step": 7411 + }, + { + "epoch": 1.9682645067056168, + "grad_norm": 1.316940344896203, + "learning_rate": 1.4109741524788167e-08, + "loss": 0.23086567223072052, + "step": 7412 + }, + { + "epoch": 1.9685300756871598, + "grad_norm": 1.2838593122102535, + "learning_rate": 1.3877540104818566e-08, + "loss": 0.2514735460281372, + "step": 7413 + }, + { + "epoch": 1.9687956446687027, + "grad_norm": 1.2787980812943278, + "learning_rate": 1.3647263891484187e-08, + "loss": 0.21824213862419128, + "step": 7414 + }, + { + "epoch": 1.9690612136502457, + "grad_norm": 1.3351479110439386, + "learning_rate": 1.3418912929178407e-08, + "loss": 0.2262609452009201, + "step": 7415 + }, + { + "epoch": 1.9693267826317886, + "grad_norm": 1.2373165426791106, + "learning_rate": 1.3192487261926013e-08, + "loss": 0.23119492828845978, + "step": 7416 + }, + { + "epoch": 1.9695923516133316, + "grad_norm": 1.2213219567044962, + "learning_rate": 1.2967986933378751e-08, + "loss": 0.20173534750938416, + "step": 7417 + }, + { + "epoch": 1.9698579205948745, + "grad_norm": 1.3102471335629409, + "learning_rate": 1.2745411986816447e-08, + "loss": 0.2212662547826767, + "step": 7418 + }, + { + "epoch": 1.9701234895764175, + "grad_norm": 1.2461352597734543, + "learning_rate": 1.2524762465151442e-08, + "loss": 0.21990706026554108, + "step": 7419 + }, + { + "epoch": 1.9703890585579604, + "grad_norm": 1.2130065240866306, + "learning_rate": 1.2306038410919707e-08, + "loss": 0.18648189306259155, + "step": 7420 + }, + { + "epoch": 1.9706546275395034, + "grad_norm": 1.334350070832243, + "learning_rate": 1.2089239866289737e-08, + "loss": 0.23273484408855438, + "step": 7421 + }, + { + "epoch": 1.9709201965210463, + "grad_norm": 1.3083344252475524, + "learning_rate": 1.1874366873059206e-08, + "loss": 0.21514324843883514, + "step": 7422 + }, + { + "epoch": 1.9711857655025893, + "grad_norm": 1.2628839077455776, + "learning_rate": 1.1661419472650538e-08, + "loss": 0.2544926106929779, + "step": 7423 + }, + { + "epoch": 1.9714513344841322, + "grad_norm": 1.1881271398224822, + "learning_rate": 1.1450397706119776e-08, + "loss": 0.235082745552063, + "step": 7424 + }, + { + "epoch": 1.9717169034656752, + "grad_norm": 1.3712056139426412, + "learning_rate": 1.1241301614147715e-08, + "loss": 0.24777358770370483, + "step": 7425 + }, + { + "epoch": 1.9719824724472181, + "grad_norm": 1.5271853101134352, + "learning_rate": 1.1034131237045443e-08, + "loss": 0.23714174330234528, + "step": 7426 + }, + { + "epoch": 1.972248041428761, + "grad_norm": 1.3430700979817631, + "learning_rate": 1.0828886614754342e-08, + "loss": 0.24665668606758118, + "step": 7427 + }, + { + "epoch": 1.972513610410304, + "grad_norm": 1.3931055934155485, + "learning_rate": 1.062556778684276e-08, + "loss": 0.23421131074428558, + "step": 7428 + }, + { + "epoch": 1.972779179391847, + "grad_norm": 1.274566697934482, + "learning_rate": 1.0424174792508234e-08, + "loss": 0.23443526029586792, + "step": 7429 + }, + { + "epoch": 1.97304474837339, + "grad_norm": 1.3315316306417777, + "learning_rate": 1.0224707670576373e-08, + "loss": 0.24177192151546478, + "step": 7430 + }, + { + "epoch": 1.973310317354933, + "grad_norm": 1.4439736433803494, + "learning_rate": 1.002716645950197e-08, + "loss": 0.20957472920417786, + "step": 7431 + }, + { + "epoch": 1.9735758863364758, + "grad_norm": 1.2252184749081894, + "learning_rate": 9.831551197370116e-09, + "loss": 0.21594710648059845, + "step": 7432 + }, + { + "epoch": 1.9738414553180188, + "grad_norm": 1.4445839220306718, + "learning_rate": 9.637861921891756e-09, + "loss": 0.2372155487537384, + "step": 7433 + }, + { + "epoch": 1.974107024299562, + "grad_norm": 1.295551996082086, + "learning_rate": 9.446098670408132e-09, + "loss": 0.211237370967865, + "step": 7434 + }, + { + "epoch": 1.974372593281105, + "grad_norm": 1.3006326416512255, + "learning_rate": 9.256261479888562e-09, + "loss": 0.25123757123947144, + "step": 7435 + }, + { + "epoch": 1.9746381622626479, + "grad_norm": 1.2670719422156809, + "learning_rate": 9.068350386932655e-09, + "loss": 0.23048831522464752, + "step": 7436 + }, + { + "epoch": 1.9749037312441908, + "grad_norm": 1.2157385411321804, + "learning_rate": 8.882365427765883e-09, + "loss": 0.22923544049263, + "step": 7437 + }, + { + "epoch": 1.9751693002257338, + "grad_norm": 1.1040485462060259, + "learning_rate": 8.698306638245114e-09, + "loss": 0.199529767036438, + "step": 7438 + }, + { + "epoch": 1.9754348692072767, + "grad_norm": 1.314383264088006, + "learning_rate": 8.516174053854187e-09, + "loss": 0.22778059542179108, + "step": 7439 + }, + { + "epoch": 1.9757004381888197, + "grad_norm": 1.3428968973890816, + "learning_rate": 8.335967709706128e-09, + "loss": 0.22807848453521729, + "step": 7440 + }, + { + "epoch": 1.9759660071703626, + "grad_norm": 1.3347725648799278, + "learning_rate": 8.157687640543143e-09, + "loss": 0.24764932692050934, + "step": 7441 + }, + { + "epoch": 1.9762315761519056, + "grad_norm": 1.376463462320243, + "learning_rate": 7.98133388073552e-09, + "loss": 0.22213312983512878, + "step": 7442 + }, + { + "epoch": 1.9764971451334485, + "grad_norm": 1.2799794398059858, + "learning_rate": 7.806906464281617e-09, + "loss": 0.22822709381580353, + "step": 7443 + }, + { + "epoch": 1.9767627141149915, + "grad_norm": 1.2148981447749936, + "learning_rate": 7.634405424808977e-09, + "loss": 0.2236599326133728, + "step": 7444 + }, + { + "epoch": 1.9770282830965344, + "grad_norm": 1.263255403192069, + "learning_rate": 7.463830795574334e-09, + "loss": 0.20294487476348877, + "step": 7445 + }, + { + "epoch": 1.9772938520780774, + "grad_norm": 1.3034015114742201, + "learning_rate": 7.295182609461382e-09, + "loss": 0.2187870740890503, + "step": 7446 + }, + { + "epoch": 1.9775594210596203, + "grad_norm": 1.362800468373944, + "learning_rate": 7.128460898984113e-09, + "loss": 0.2629002630710602, + "step": 7447 + }, + { + "epoch": 1.9778249900411633, + "grad_norm": 1.3155096560899557, + "learning_rate": 6.963665696285704e-09, + "loss": 0.24024136364459991, + "step": 7448 + }, + { + "epoch": 1.9780905590227063, + "grad_norm": 1.240780926418524, + "learning_rate": 6.800797033134077e-09, + "loss": 0.22334401309490204, + "step": 7449 + }, + { + "epoch": 1.9783561280042492, + "grad_norm": 1.2853076050759633, + "learning_rate": 6.639854940930779e-09, + "loss": 0.21535055339336395, + "step": 7450 + }, + { + "epoch": 1.9786216969857922, + "grad_norm": 1.3182931470109147, + "learning_rate": 6.480839450703214e-09, + "loss": 0.26096785068511963, + "step": 7451 + }, + { + "epoch": 1.978887265967335, + "grad_norm": 1.2393293544951642, + "learning_rate": 6.323750593106859e-09, + "loss": 0.22461384534835815, + "step": 7452 + }, + { + "epoch": 1.979152834948878, + "grad_norm": 1.2999818118404687, + "learning_rate": 6.168588398426378e-09, + "loss": 0.24372713267803192, + "step": 7453 + }, + { + "epoch": 1.979418403930421, + "grad_norm": 1.2743158428703243, + "learning_rate": 6.015352896576732e-09, + "loss": 0.19544872641563416, + "step": 7454 + }, + { + "epoch": 1.979683972911964, + "grad_norm": 1.1957228310016947, + "learning_rate": 5.864044117097623e-09, + "loss": 0.22004768252372742, + "step": 7455 + }, + { + "epoch": 1.979949541893507, + "grad_norm": 1.3624679399119848, + "learning_rate": 5.714662089162381e-09, + "loss": 0.2509492337703705, + "step": 7456 + }, + { + "epoch": 1.9802151108750499, + "grad_norm": 1.1563599654889156, + "learning_rate": 5.567206841567974e-09, + "loss": 0.19315078854560852, + "step": 7457 + }, + { + "epoch": 1.9804806798565928, + "grad_norm": 1.1652222675857882, + "learning_rate": 5.421678402741659e-09, + "loss": 0.20722024142742157, + "step": 7458 + }, + { + "epoch": 1.9807462488381358, + "grad_norm": 1.2430974429352135, + "learning_rate": 5.278076800742105e-09, + "loss": 0.2041238397359848, + "step": 7459 + }, + { + "epoch": 1.9810118178196787, + "grad_norm": 1.226308526828602, + "learning_rate": 5.136402063251611e-09, + "loss": 0.21889238059520721, + "step": 7460 + }, + { + "epoch": 1.9812773868012217, + "grad_norm": 1.2925316754685727, + "learning_rate": 4.996654217584995e-09, + "loss": 0.23580557107925415, + "step": 7461 + }, + { + "epoch": 1.9815429557827646, + "grad_norm": 1.5912986799887796, + "learning_rate": 4.858833290684039e-09, + "loss": 0.24967315793037415, + "step": 7462 + }, + { + "epoch": 1.9818085247643076, + "grad_norm": 1.3642305983011473, + "learning_rate": 4.722939309116381e-09, + "loss": 0.21802274882793427, + "step": 7463 + }, + { + "epoch": 1.9820740937458505, + "grad_norm": 1.2778589071361273, + "learning_rate": 4.588972299084393e-09, + "loss": 0.2641376554965973, + "step": 7464 + }, + { + "epoch": 1.9823396627273935, + "grad_norm": 1.181293128126433, + "learning_rate": 4.456932286412974e-09, + "loss": 0.20166629552841187, + "step": 7465 + }, + { + "epoch": 1.9826052317089364, + "grad_norm": 1.3531318882305197, + "learning_rate": 4.3268192965573164e-09, + "loss": 0.22796592116355896, + "step": 7466 + }, + { + "epoch": 1.9828708006904794, + "grad_norm": 1.1849961491022751, + "learning_rate": 4.19863335460402e-09, + "loss": 0.19833455979824066, + "step": 7467 + }, + { + "epoch": 1.9831363696720223, + "grad_norm": 1.273561592311718, + "learning_rate": 4.07237448526554e-09, + "loss": 0.23009257018566132, + "step": 7468 + }, + { + "epoch": 1.9834019386535653, + "grad_norm": 1.2188380225442625, + "learning_rate": 3.9480427128812945e-09, + "loss": 0.22418440878391266, + "step": 7469 + }, + { + "epoch": 1.9836675076351082, + "grad_norm": 1.2878640211544259, + "learning_rate": 3.825638061421e-09, + "loss": 0.2015800178050995, + "step": 7470 + }, + { + "epoch": 1.9839330766166512, + "grad_norm": 1.2488639013131106, + "learning_rate": 3.705160554485776e-09, + "loss": 0.22166767716407776, + "step": 7471 + }, + { + "epoch": 1.9841986455981941, + "grad_norm": 1.476152466944419, + "learning_rate": 3.5866102152981586e-09, + "loss": 0.3154509961605072, + "step": 7472 + }, + { + "epoch": 1.984464214579737, + "grad_norm": 1.3338840715084874, + "learning_rate": 3.4699870667165292e-09, + "loss": 0.25891417264938354, + "step": 7473 + }, + { + "epoch": 1.98472978356128, + "grad_norm": 1.2984805204003045, + "learning_rate": 3.355291131222904e-09, + "loss": 0.24837851524353027, + "step": 7474 + }, + { + "epoch": 1.984995352542823, + "grad_norm": 1.2923319105031845, + "learning_rate": 3.2425224309307055e-09, + "loss": 0.24254213273525238, + "step": 7475 + }, + { + "epoch": 1.985260921524366, + "grad_norm": 1.3479980629574153, + "learning_rate": 3.1316809875781005e-09, + "loss": 0.24822884798049927, + "step": 7476 + }, + { + "epoch": 1.985526490505909, + "grad_norm": 1.2515754926310612, + "learning_rate": 3.022766822535772e-09, + "loss": 0.19553488492965698, + "step": 7477 + }, + { + "epoch": 1.9857920594874519, + "grad_norm": 1.289139949226706, + "learning_rate": 2.9157799568002576e-09, + "loss": 0.24758943915367126, + "step": 7478 + }, + { + "epoch": 1.9860576284689948, + "grad_norm": 1.3254058481790592, + "learning_rate": 2.810720410998391e-09, + "loss": 0.22947746515274048, + "step": 7479 + }, + { + "epoch": 1.9863231974505378, + "grad_norm": 1.1718425441422213, + "learning_rate": 2.7075882053828605e-09, + "loss": 0.20573696494102478, + "step": 7480 + }, + { + "epoch": 1.9865887664320807, + "grad_norm": 1.3248019948595686, + "learning_rate": 2.606383359837761e-09, + "loss": 0.2547800838947296, + "step": 7481 + }, + { + "epoch": 1.9868543354136237, + "grad_norm": 1.3239089800396548, + "learning_rate": 2.507105893874151e-09, + "loss": 0.22227191925048828, + "step": 7482 + }, + { + "epoch": 1.9871199043951666, + "grad_norm": 1.379027057566697, + "learning_rate": 2.409755826630056e-09, + "loss": 0.24687603116035461, + "step": 7483 + }, + { + "epoch": 1.9873854733767096, + "grad_norm": 1.3626347731044859, + "learning_rate": 2.3143331768749053e-09, + "loss": 0.23577818274497986, + "step": 7484 + }, + { + "epoch": 1.9876510423582525, + "grad_norm": 1.2429616783261994, + "learning_rate": 2.2208379630039858e-09, + "loss": 0.23012465238571167, + "step": 7485 + }, + { + "epoch": 1.9879166113397955, + "grad_norm": 1.2667278392117014, + "learning_rate": 2.129270203043987e-09, + "loss": 0.21479251980781555, + "step": 7486 + }, + { + "epoch": 1.9881821803213384, + "grad_norm": 1.2419157692275362, + "learning_rate": 2.039629914645236e-09, + "loss": 0.24436548352241516, + "step": 7487 + }, + { + "epoch": 1.9884477493028814, + "grad_norm": 1.3198752588445606, + "learning_rate": 1.951917115091684e-09, + "loss": 0.22225134074687958, + "step": 7488 + }, + { + "epoch": 1.9887133182844243, + "grad_norm": 1.4243538533938824, + "learning_rate": 1.8661318212920275e-09, + "loss": 0.22320827841758728, + "step": 7489 + }, + { + "epoch": 1.9889788872659673, + "grad_norm": 1.3025984911365984, + "learning_rate": 1.7822740497852597e-09, + "loss": 0.2317924201488495, + "step": 7490 + }, + { + "epoch": 1.9892444562475102, + "grad_norm": 1.370204940685918, + "learning_rate": 1.700343816738448e-09, + "loss": 0.2275170385837555, + "step": 7491 + }, + { + "epoch": 1.9895100252290532, + "grad_norm": 1.652167024814656, + "learning_rate": 1.6203411379456247e-09, + "loss": 0.24541540443897247, + "step": 7492 + }, + { + "epoch": 1.9897755942105961, + "grad_norm": 1.311164124852614, + "learning_rate": 1.5422660288322288e-09, + "loss": 0.23041896522045135, + "step": 7493 + }, + { + "epoch": 1.990041163192139, + "grad_norm": 1.301476042648128, + "learning_rate": 1.4661185044484438e-09, + "loss": 0.22362437844276428, + "step": 7494 + }, + { + "epoch": 1.990306732173682, + "grad_norm": 1.1872303288026824, + "learning_rate": 1.3918985794747486e-09, + "loss": 0.22082944214344025, + "step": 7495 + }, + { + "epoch": 1.990572301155225, + "grad_norm": 1.2985516009859217, + "learning_rate": 1.3196062682208078e-09, + "loss": 0.2210516780614853, + "step": 7496 + }, + { + "epoch": 1.990837870136768, + "grad_norm": 1.2609254238659025, + "learning_rate": 1.249241584623251e-09, + "loss": 0.21891455352306366, + "step": 7497 + }, + { + "epoch": 1.991103439118311, + "grad_norm": 1.2687100133579783, + "learning_rate": 1.1808045422478932e-09, + "loss": 0.23363247513771057, + "step": 7498 + }, + { + "epoch": 1.9913690080998538, + "grad_norm": 1.188481032582791, + "learning_rate": 1.1142951542875146e-09, + "loss": 0.20676104724407196, + "step": 7499 + }, + { + "epoch": 1.9916345770813968, + "grad_norm": 1.2983095103442552, + "learning_rate": 1.0497134335663018e-09, + "loss": 0.23037788271903992, + "step": 7500 + }, + { + "epoch": 1.9919001460629397, + "grad_norm": 1.1706822471326355, + "learning_rate": 9.870593925320748e-10, + "loss": 0.21958573162555695, + "step": 7501 + }, + { + "epoch": 1.9921657150444827, + "grad_norm": 1.3574206120623875, + "learning_rate": 9.263330432662809e-10, + "loss": 0.23280993103981018, + "step": 7502 + }, + { + "epoch": 1.9924312840260257, + "grad_norm": 1.2662411212973668, + "learning_rate": 8.675343974762219e-10, + "loss": 0.2254818230867386, + "step": 7503 + }, + { + "epoch": 1.9926968530075686, + "grad_norm": 1.255709874874282, + "learning_rate": 8.106634664950541e-10, + "loss": 0.1850586235523224, + "step": 7504 + }, + { + "epoch": 1.9929624219891116, + "grad_norm": 1.1965362861662039, + "learning_rate": 7.557202612895609e-10, + "loss": 0.21080443263053894, + "step": 7505 + }, + { + "epoch": 1.9932279909706545, + "grad_norm": 1.2788710791805473, + "learning_rate": 7.027047924512698e-10, + "loss": 0.21604907512664795, + "step": 7506 + }, + { + "epoch": 1.9934935599521975, + "grad_norm": 1.287068201404914, + "learning_rate": 6.516170701997837e-10, + "loss": 0.24684564769268036, + "step": 7507 + }, + { + "epoch": 1.9937591289337404, + "grad_norm": 1.2013851004960618, + "learning_rate": 6.024571043861116e-10, + "loss": 0.21735510230064392, + "step": 7508 + }, + { + "epoch": 1.9940246979152834, + "grad_norm": 1.2853945699676002, + "learning_rate": 5.552249044860069e-10, + "loss": 0.23616179823875427, + "step": 7509 + }, + { + "epoch": 1.9942902668968263, + "grad_norm": 1.280261468721699, + "learning_rate": 5.099204796066293e-10, + "loss": 0.23930129408836365, + "step": 7510 + }, + { + "epoch": 1.9945558358783693, + "grad_norm": 1.30216307212454, + "learning_rate": 4.665438384809928e-10, + "loss": 0.2354714274406433, + "step": 7511 + }, + { + "epoch": 1.9948214048599122, + "grad_norm": 1.4489462806357751, + "learning_rate": 4.250949894724077e-10, + "loss": 0.28315576910972595, + "step": 7512 + }, + { + "epoch": 1.9950869738414552, + "grad_norm": 1.1749720994980957, + "learning_rate": 3.8557394057114895e-10, + "loss": 0.19599778950214386, + "step": 7513 + }, + { + "epoch": 1.9953525428229981, + "grad_norm": 1.5080290285974376, + "learning_rate": 3.4798069939667725e-10, + "loss": 0.2295808494091034, + "step": 7514 + }, + { + "epoch": 1.995618111804541, + "grad_norm": 1.2840127096725462, + "learning_rate": 3.1231527319763864e-10, + "loss": 0.23212578892707825, + "step": 7515 + }, + { + "epoch": 1.995883680786084, + "grad_norm": 1.2763709143213344, + "learning_rate": 2.78577668847424e-10, + "loss": 0.2408447265625, + "step": 7516 + }, + { + "epoch": 1.996149249767627, + "grad_norm": 1.325995428985527, + "learning_rate": 2.4676789285305034e-10, + "loss": 0.25482073426246643, + "step": 7517 + }, + { + "epoch": 1.9964148187491702, + "grad_norm": 1.2453043840474796, + "learning_rate": 2.1688595134516932e-10, + "loss": 0.21228459477424622, + "step": 7518 + }, + { + "epoch": 1.996680387730713, + "grad_norm": 1.3949495270151018, + "learning_rate": 1.8893185008472814e-10, + "loss": 0.2467353343963623, + "step": 7519 + }, + { + "epoch": 1.996945956712256, + "grad_norm": 1.3819791453502894, + "learning_rate": 1.6290559446185962e-10, + "loss": 0.24475792050361633, + "step": 7520 + }, + { + "epoch": 1.997211525693799, + "grad_norm": 1.3766398068169023, + "learning_rate": 1.3880718949366155e-10, + "loss": 0.24821621179580688, + "step": 7521 + }, + { + "epoch": 1.997477094675342, + "grad_norm": 1.2860965423885737, + "learning_rate": 1.1663663982530715e-10, + "loss": 0.24725303053855896, + "step": 7522 + }, + { + "epoch": 1.997742663656885, + "grad_norm": 1.2302869290522314, + "learning_rate": 9.639394973226523e-11, + "loss": 0.2319290041923523, + "step": 7523 + }, + { + "epoch": 1.9980082326384279, + "grad_norm": 1.3169058540691405, + "learning_rate": 7.807912311696974e-11, + "loss": 0.22183239459991455, + "step": 7524 + }, + { + "epoch": 1.9982738016199708, + "grad_norm": 1.3038532813647647, + "learning_rate": 6.169216350881968e-11, + "loss": 0.2154427468776703, + "step": 7525 + }, + { + "epoch": 1.9985393706015138, + "grad_norm": 1.3153427866812037, + "learning_rate": 4.723307406973021e-11, + "loss": 0.22269389033317566, + "step": 7526 + }, + { + "epoch": 1.9988049395830567, + "grad_norm": 1.1809886655167368, + "learning_rate": 3.4701857584140686e-11, + "loss": 0.20317527651786804, + "step": 7527 + }, + { + "epoch": 1.9990705085645997, + "grad_norm": 1.2813479125348537, + "learning_rate": 2.409851647011685e-11, + "loss": 0.20792551338672638, + "step": 7528 + }, + { + "epoch": 1.9993360775461426, + "grad_norm": 1.1774217019209885, + "learning_rate": 1.5423052770469072e-11, + "loss": 0.2128266990184784, + "step": 7529 + }, + { + "epoch": 1.9996016465276856, + "grad_norm": 1.2535950646579268, + "learning_rate": 8.67546815941367e-12, + "loss": 0.23220527172088623, + "step": 7530 + }, + { + "epoch": 1.9998672155092285, + "grad_norm": 1.234107937433565, + "learning_rate": 3.8557639359115826e-12, + "loss": 0.22269386053085327, + "step": 7531 + }, + { + "epoch": 2.0, + "grad_norm": 2.3086652843747557, + "learning_rate": 9.63941030329707e-13, + "loss": 0.2053365409374237, + "step": 7532 + } + ], + "logging_steps": 1, + "max_steps": 7532, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5704003196682240.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}