{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3765, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007972892166633447, "grad_norm": 1.1188605106489524, "learning_rate": 0.0, "loss": 0.8451830148696899, "step": 1 }, { "epoch": 0.0015945784333266893, "grad_norm": 0.8699833026405545, "learning_rate": 5.291005291005291e-07, "loss": 0.8462981581687927, "step": 2 }, { "epoch": 0.002391867649990034, "grad_norm": 0.9520781965447559, "learning_rate": 1.0582010582010582e-06, "loss": 1.0181745290756226, "step": 3 }, { "epoch": 0.0031891568666533787, "grad_norm": 1.0487502833167572, "learning_rate": 1.5873015873015873e-06, "loss": 1.0049467086791992, "step": 4 }, { "epoch": 0.003986446083316724, "grad_norm": 1.0107534514425207, "learning_rate": 2.1164021164021164e-06, "loss": 0.9837175607681274, "step": 5 }, { "epoch": 0.004783735299980068, "grad_norm": 1.14852508181288, "learning_rate": 2.6455026455026455e-06, "loss": 0.9785027503967285, "step": 6 }, { "epoch": 0.0055810245166434125, "grad_norm": 1.045343060625274, "learning_rate": 3.1746031746031746e-06, "loss": 0.9235939979553223, "step": 7 }, { "epoch": 0.006378313733306757, "grad_norm": 1.0023003017182361, "learning_rate": 3.7037037037037037e-06, "loss": 0.9700994491577148, "step": 8 }, { "epoch": 0.007175602949970101, "grad_norm": 1.1213289208913457, "learning_rate": 4.232804232804233e-06, "loss": 1.1149085760116577, "step": 9 }, { "epoch": 0.007972892166633447, "grad_norm": 1.017427766861512, "learning_rate": 4.7619047619047615e-06, "loss": 0.9658799171447754, "step": 10 }, { "epoch": 0.00877018138329679, "grad_norm": 1.0720075032133045, "learning_rate": 5.291005291005291e-06, "loss": 1.0690594911575317, "step": 11 }, { "epoch": 0.009567470599960135, "grad_norm": 0.9438954436367889, "learning_rate": 5.82010582010582e-06, "loss": 0.9390145540237427, "step": 12 }, { "epoch": 0.01036475981662348, "grad_norm": 1.0040255644100964, "learning_rate": 6.349206349206349e-06, "loss": 0.9223315715789795, "step": 13 }, { "epoch": 0.011162049033286825, "grad_norm": 0.986722940822448, "learning_rate": 6.878306878306878e-06, "loss": 0.8579893112182617, "step": 14 }, { "epoch": 0.01195933824995017, "grad_norm": 0.8711595168551494, "learning_rate": 7.4074074074074075e-06, "loss": 0.899712085723877, "step": 15 }, { "epoch": 0.012756627466613515, "grad_norm": 0.9345187889725753, "learning_rate": 7.936507936507936e-06, "loss": 0.9955716729164124, "step": 16 }, { "epoch": 0.013553916683276858, "grad_norm": 0.8347025796752333, "learning_rate": 8.465608465608466e-06, "loss": 0.9417692422866821, "step": 17 }, { "epoch": 0.014351205899940203, "grad_norm": 0.8506783530917961, "learning_rate": 8.994708994708995e-06, "loss": 0.9631177186965942, "step": 18 }, { "epoch": 0.015148495116603548, "grad_norm": 0.8307307747622344, "learning_rate": 9.523809523809523e-06, "loss": 0.8896239995956421, "step": 19 }, { "epoch": 0.015945784333266894, "grad_norm": 0.7206868810443214, "learning_rate": 1.0052910052910053e-05, "loss": 0.8034681677818298, "step": 20 }, { "epoch": 0.016743073549930237, "grad_norm": 0.6217558508678762, "learning_rate": 1.0582010582010582e-05, "loss": 0.9133136868476868, "step": 21 }, { "epoch": 0.01754036276659358, "grad_norm": 0.6036016124039175, "learning_rate": 1.1111111111111112e-05, "loss": 0.8133245706558228, "step": 22 }, { "epoch": 0.018337651983256927, "grad_norm": 0.627062163347783, "learning_rate": 1.164021164021164e-05, "loss": 0.7000857591629028, "step": 23 }, { "epoch": 0.01913494119992027, "grad_norm": 0.6138277459509189, "learning_rate": 1.2169312169312169e-05, "loss": 0.7752558588981628, "step": 24 }, { "epoch": 0.019932230416583617, "grad_norm": 0.5992067632790454, "learning_rate": 1.2698412698412699e-05, "loss": 0.8245622515678406, "step": 25 }, { "epoch": 0.02072951963324696, "grad_norm": 0.6212745170614481, "learning_rate": 1.3227513227513228e-05, "loss": 0.888140857219696, "step": 26 }, { "epoch": 0.021526808849910303, "grad_norm": 0.5286325321033547, "learning_rate": 1.3756613756613756e-05, "loss": 0.636292040348053, "step": 27 }, { "epoch": 0.02232409806657365, "grad_norm": 0.6269785530727698, "learning_rate": 1.4285714285714285e-05, "loss": 0.82733154296875, "step": 28 }, { "epoch": 0.023121387283236993, "grad_norm": 0.6128136859973192, "learning_rate": 1.4814814814814815e-05, "loss": 0.6614896655082703, "step": 29 }, { "epoch": 0.02391867649990034, "grad_norm": 0.7311008893325611, "learning_rate": 1.5343915343915344e-05, "loss": 0.8509453535079956, "step": 30 }, { "epoch": 0.024715965716563683, "grad_norm": 0.6904526192295033, "learning_rate": 1.5873015873015872e-05, "loss": 0.9308737516403198, "step": 31 }, { "epoch": 0.02551325493322703, "grad_norm": 0.5546429638270666, "learning_rate": 1.6402116402116404e-05, "loss": 0.7365565299987793, "step": 32 }, { "epoch": 0.026310544149890373, "grad_norm": 0.7009979306461872, "learning_rate": 1.693121693121693e-05, "loss": 0.7287313342094421, "step": 33 }, { "epoch": 0.027107833366553716, "grad_norm": 0.6467623955652316, "learning_rate": 1.746031746031746e-05, "loss": 0.7720486521720886, "step": 34 }, { "epoch": 0.027905122583217062, "grad_norm": 0.6313822382791102, "learning_rate": 1.798941798941799e-05, "loss": 0.6935867071151733, "step": 35 }, { "epoch": 0.028702411799880406, "grad_norm": 0.6315993187160613, "learning_rate": 1.8518518518518518e-05, "loss": 0.78423011302948, "step": 36 }, { "epoch": 0.029499701016543752, "grad_norm": 0.674650793465051, "learning_rate": 1.9047619047619046e-05, "loss": 0.7429696917533875, "step": 37 }, { "epoch": 0.030296990233207095, "grad_norm": 0.6188131750017825, "learning_rate": 1.9576719576719577e-05, "loss": 0.7526654601097107, "step": 38 }, { "epoch": 0.031094279449870442, "grad_norm": 0.6280147133916636, "learning_rate": 2.0105820105820105e-05, "loss": 0.7430333495140076, "step": 39 }, { "epoch": 0.03189156866653379, "grad_norm": 0.6053240102682963, "learning_rate": 2.0634920634920636e-05, "loss": 0.5816086530685425, "step": 40 }, { "epoch": 0.03268885788319713, "grad_norm": 0.5867212807637767, "learning_rate": 2.1164021164021164e-05, "loss": 0.6919189095497131, "step": 41 }, { "epoch": 0.033486147099860475, "grad_norm": 0.5573308992110599, "learning_rate": 2.1693121693121692e-05, "loss": 0.739343523979187, "step": 42 }, { "epoch": 0.03428343631652382, "grad_norm": 0.6841031158160551, "learning_rate": 2.2222222222222223e-05, "loss": 0.8663535714149475, "step": 43 }, { "epoch": 0.03508072553318716, "grad_norm": 0.5764370724131378, "learning_rate": 2.275132275132275e-05, "loss": 0.7593598961830139, "step": 44 }, { "epoch": 0.03587801474985051, "grad_norm": 0.5990500152719835, "learning_rate": 2.328042328042328e-05, "loss": 0.6732544898986816, "step": 45 }, { "epoch": 0.036675303966513854, "grad_norm": 0.6348417313788108, "learning_rate": 2.380952380952381e-05, "loss": 0.6807636022567749, "step": 46 }, { "epoch": 0.037472593183177194, "grad_norm": 0.631587102288571, "learning_rate": 2.4338624338624338e-05, "loss": 0.6802229881286621, "step": 47 }, { "epoch": 0.03826988239984054, "grad_norm": 0.5314105071961448, "learning_rate": 2.4867724867724866e-05, "loss": 0.5194002389907837, "step": 48 }, { "epoch": 0.03906717161650389, "grad_norm": 0.6936267469849475, "learning_rate": 2.5396825396825397e-05, "loss": 0.8001822829246521, "step": 49 }, { "epoch": 0.039864460833167234, "grad_norm": 0.6136287091794124, "learning_rate": 2.5925925925925925e-05, "loss": 0.6253584623336792, "step": 50 }, { "epoch": 0.040661750049830574, "grad_norm": 0.630882268706655, "learning_rate": 2.6455026455026456e-05, "loss": 0.6106195449829102, "step": 51 }, { "epoch": 0.04145903926649392, "grad_norm": 0.7520955689403463, "learning_rate": 2.6984126984126984e-05, "loss": 0.7964527606964111, "step": 52 }, { "epoch": 0.04225632848315727, "grad_norm": 0.7956853616921452, "learning_rate": 2.7513227513227512e-05, "loss": 0.7826541066169739, "step": 53 }, { "epoch": 0.04305361769982061, "grad_norm": 0.6678296370580534, "learning_rate": 2.8042328042328043e-05, "loss": 0.6770031452178955, "step": 54 }, { "epoch": 0.04385090691648395, "grad_norm": 0.7692447052103629, "learning_rate": 2.857142857142857e-05, "loss": 0.5636471509933472, "step": 55 }, { "epoch": 0.0446481961331473, "grad_norm": 0.7294124382254855, "learning_rate": 2.91005291005291e-05, "loss": 0.6527897119522095, "step": 56 }, { "epoch": 0.045445485349810646, "grad_norm": 0.5721650642662541, "learning_rate": 2.962962962962963e-05, "loss": 0.5694578289985657, "step": 57 }, { "epoch": 0.046242774566473986, "grad_norm": 0.6613686411775581, "learning_rate": 3.0158730158730158e-05, "loss": 0.7540780305862427, "step": 58 }, { "epoch": 0.04704006378313733, "grad_norm": 0.7255213484486487, "learning_rate": 3.068783068783069e-05, "loss": 0.7258141040802002, "step": 59 }, { "epoch": 0.04783735299980068, "grad_norm": 0.6536332621415744, "learning_rate": 3.121693121693122e-05, "loss": 0.5830332040786743, "step": 60 }, { "epoch": 0.04863464221646402, "grad_norm": 0.6735162525918211, "learning_rate": 3.1746031746031745e-05, "loss": 0.6373740434646606, "step": 61 }, { "epoch": 0.049431931433127366, "grad_norm": 0.6741029646315322, "learning_rate": 3.227513227513227e-05, "loss": 0.650443434715271, "step": 62 }, { "epoch": 0.05022922064979071, "grad_norm": 0.6388886887569873, "learning_rate": 3.280423280423281e-05, "loss": 0.6124881505966187, "step": 63 }, { "epoch": 0.05102650986645406, "grad_norm": 0.6332194564234735, "learning_rate": 3.3333333333333335e-05, "loss": 0.5588638782501221, "step": 64 }, { "epoch": 0.0518237990831174, "grad_norm": 0.6858847817921032, "learning_rate": 3.386243386243386e-05, "loss": 0.7258895039558411, "step": 65 }, { "epoch": 0.052621088299780745, "grad_norm": 0.6375697373980477, "learning_rate": 3.439153439153439e-05, "loss": 0.5896444916725159, "step": 66 }, { "epoch": 0.05341837751644409, "grad_norm": 0.6465553086714068, "learning_rate": 3.492063492063492e-05, "loss": 0.6349502801895142, "step": 67 }, { "epoch": 0.05421566673310743, "grad_norm": 0.7073873342961886, "learning_rate": 3.5449735449735446e-05, "loss": 0.718804121017456, "step": 68 }, { "epoch": 0.05501295594977078, "grad_norm": 0.6222727401170883, "learning_rate": 3.597883597883598e-05, "loss": 0.6692647933959961, "step": 69 }, { "epoch": 0.055810245166434125, "grad_norm": 0.6777174463956971, "learning_rate": 3.650793650793651e-05, "loss": 0.5698482990264893, "step": 70 }, { "epoch": 0.05660753438309747, "grad_norm": 0.6821699365327132, "learning_rate": 3.7037037037037037e-05, "loss": 0.7232347726821899, "step": 71 }, { "epoch": 0.05740482359976081, "grad_norm": 0.7033012020820858, "learning_rate": 3.7566137566137564e-05, "loss": 0.776823103427887, "step": 72 }, { "epoch": 0.05820211281642416, "grad_norm": 0.6914359768190611, "learning_rate": 3.809523809523809e-05, "loss": 0.5638333559036255, "step": 73 }, { "epoch": 0.058999402033087504, "grad_norm": 0.754414089205192, "learning_rate": 3.862433862433863e-05, "loss": 0.8180814385414124, "step": 74 }, { "epoch": 0.059796691249750844, "grad_norm": 0.6569898714922326, "learning_rate": 3.9153439153439155e-05, "loss": 0.6265695691108704, "step": 75 }, { "epoch": 0.06059398046641419, "grad_norm": 0.6327651972809227, "learning_rate": 3.968253968253968e-05, "loss": 0.6066941022872925, "step": 76 }, { "epoch": 0.06139126968307754, "grad_norm": 0.7038791857259671, "learning_rate": 4.021164021164021e-05, "loss": 0.6579304933547974, "step": 77 }, { "epoch": 0.062188558899740884, "grad_norm": 0.8008162829715784, "learning_rate": 4.074074074074074e-05, "loss": 0.7492961883544922, "step": 78 }, { "epoch": 0.06298584811640423, "grad_norm": 0.672244082559622, "learning_rate": 4.126984126984127e-05, "loss": 0.6155840754508972, "step": 79 }, { "epoch": 0.06378313733306758, "grad_norm": 0.6965977438063428, "learning_rate": 4.17989417989418e-05, "loss": 0.5750330686569214, "step": 80 }, { "epoch": 0.06458042654973091, "grad_norm": 0.6205680290372362, "learning_rate": 4.232804232804233e-05, "loss": 0.5844976305961609, "step": 81 }, { "epoch": 0.06537771576639426, "grad_norm": 0.746495345412428, "learning_rate": 4.2857142857142856e-05, "loss": 0.6817867755889893, "step": 82 }, { "epoch": 0.0661750049830576, "grad_norm": 0.6614336234042518, "learning_rate": 4.3386243386243384e-05, "loss": 0.5549920201301575, "step": 83 }, { "epoch": 0.06697229419972095, "grad_norm": 0.6419387989966399, "learning_rate": 4.391534391534391e-05, "loss": 0.6742616891860962, "step": 84 }, { "epoch": 0.0677695834163843, "grad_norm": 0.667957051480894, "learning_rate": 4.4444444444444447e-05, "loss": 0.7917462587356567, "step": 85 }, { "epoch": 0.06856687263304764, "grad_norm": 0.6717592584519512, "learning_rate": 4.4973544973544974e-05, "loss": 0.5981391668319702, "step": 86 }, { "epoch": 0.06936416184971098, "grad_norm": 0.6455649866233759, "learning_rate": 4.55026455026455e-05, "loss": 0.5726906061172485, "step": 87 }, { "epoch": 0.07016145106637432, "grad_norm": 0.7099290381359605, "learning_rate": 4.603174603174603e-05, "loss": 0.686972975730896, "step": 88 }, { "epoch": 0.07095874028303767, "grad_norm": 0.7133381161161251, "learning_rate": 4.656084656084656e-05, "loss": 0.7525381445884705, "step": 89 }, { "epoch": 0.07175602949970102, "grad_norm": 0.6787735195396777, "learning_rate": 4.708994708994709e-05, "loss": 0.6638092994689941, "step": 90 }, { "epoch": 0.07255331871636436, "grad_norm": 0.6761427420158901, "learning_rate": 4.761904761904762e-05, "loss": 0.6075881123542786, "step": 91 }, { "epoch": 0.07335060793302771, "grad_norm": 0.6233754356068947, "learning_rate": 4.814814814814815e-05, "loss": 0.5231776237487793, "step": 92 }, { "epoch": 0.07414789714969106, "grad_norm": 0.6851309658096618, "learning_rate": 4.8677248677248676e-05, "loss": 0.6355250477790833, "step": 93 }, { "epoch": 0.07494518636635439, "grad_norm": 0.7537371021964191, "learning_rate": 4.9206349206349204e-05, "loss": 0.7101712226867676, "step": 94 }, { "epoch": 0.07574247558301773, "grad_norm": 0.6667127018162496, "learning_rate": 4.973544973544973e-05, "loss": 0.555098831653595, "step": 95 }, { "epoch": 0.07653976479968108, "grad_norm": 0.5963550331165396, "learning_rate": 5.026455026455027e-05, "loss": 0.5201166272163391, "step": 96 }, { "epoch": 0.07733705401634443, "grad_norm": 0.662903277562847, "learning_rate": 5.0793650793650794e-05, "loss": 0.6484946012496948, "step": 97 }, { "epoch": 0.07813434323300777, "grad_norm": 0.689235612956613, "learning_rate": 5.132275132275133e-05, "loss": 0.6864213347434998, "step": 98 }, { "epoch": 0.07893163244967112, "grad_norm": 0.6699340149711482, "learning_rate": 5.185185185185185e-05, "loss": 0.7234477996826172, "step": 99 }, { "epoch": 0.07972892166633447, "grad_norm": 0.7137439614369674, "learning_rate": 5.2380952380952384e-05, "loss": 0.6080222129821777, "step": 100 }, { "epoch": 0.0805262108829978, "grad_norm": 0.5726596746304087, "learning_rate": 5.291005291005291e-05, "loss": 0.4615941047668457, "step": 101 }, { "epoch": 0.08132350009966115, "grad_norm": 0.5950488290944137, "learning_rate": 5.343915343915345e-05, "loss": 0.6018553376197815, "step": 102 }, { "epoch": 0.0821207893163245, "grad_norm": 0.6680114953555774, "learning_rate": 5.396825396825397e-05, "loss": 0.7352499961853027, "step": 103 }, { "epoch": 0.08291807853298784, "grad_norm": 0.6627247649838822, "learning_rate": 5.44973544973545e-05, "loss": 0.6194936633110046, "step": 104 }, { "epoch": 0.08371536774965119, "grad_norm": 0.6521720843677864, "learning_rate": 5.5026455026455024e-05, "loss": 0.6316890120506287, "step": 105 }, { "epoch": 0.08451265696631453, "grad_norm": 0.6149009181694113, "learning_rate": 5.555555555555556e-05, "loss": 0.5337021946907043, "step": 106 }, { "epoch": 0.08530994618297788, "grad_norm": 0.6605550729482619, "learning_rate": 5.6084656084656086e-05, "loss": 0.5606067180633545, "step": 107 }, { "epoch": 0.08610723539964121, "grad_norm": 0.6358462691610958, "learning_rate": 5.661375661375662e-05, "loss": 0.46860021352767944, "step": 108 }, { "epoch": 0.08690452461630456, "grad_norm": 0.6047451236684837, "learning_rate": 5.714285714285714e-05, "loss": 0.5496193170547485, "step": 109 }, { "epoch": 0.0877018138329679, "grad_norm": 0.7955791566756587, "learning_rate": 5.7671957671957676e-05, "loss": 0.6632341742515564, "step": 110 }, { "epoch": 0.08849910304963125, "grad_norm": 0.805378232708959, "learning_rate": 5.82010582010582e-05, "loss": 0.7120374441146851, "step": 111 }, { "epoch": 0.0892963922662946, "grad_norm": 0.6633161671700595, "learning_rate": 5.873015873015873e-05, "loss": 0.5854419469833374, "step": 112 }, { "epoch": 0.09009368148295795, "grad_norm": 0.5803589920825066, "learning_rate": 5.925925925925926e-05, "loss": 0.5638005137443542, "step": 113 }, { "epoch": 0.09089097069962129, "grad_norm": 0.6242875681627857, "learning_rate": 5.9788359788359794e-05, "loss": 0.6103172898292542, "step": 114 }, { "epoch": 0.09168825991628463, "grad_norm": 0.5893175871577293, "learning_rate": 6.0317460317460316e-05, "loss": 0.48989635705947876, "step": 115 }, { "epoch": 0.09248554913294797, "grad_norm": 0.5831577205098064, "learning_rate": 6.084656084656085e-05, "loss": 0.6758100390434265, "step": 116 }, { "epoch": 0.09328283834961132, "grad_norm": 0.6168994409409496, "learning_rate": 6.137566137566138e-05, "loss": 0.7169866561889648, "step": 117 }, { "epoch": 0.09408012756627467, "grad_norm": 0.6056951193752271, "learning_rate": 6.19047619047619e-05, "loss": 0.6483331918716431, "step": 118 }, { "epoch": 0.09487741678293801, "grad_norm": 0.5713994504837342, "learning_rate": 6.243386243386243e-05, "loss": 0.6775704622268677, "step": 119 }, { "epoch": 0.09567470599960136, "grad_norm": 0.655818596435538, "learning_rate": 6.296296296296296e-05, "loss": 0.6473055481910706, "step": 120 }, { "epoch": 0.0964719952162647, "grad_norm": 0.5724557205413847, "learning_rate": 6.349206349206349e-05, "loss": 0.5742613077163696, "step": 121 }, { "epoch": 0.09726928443292804, "grad_norm": 0.5475881320132908, "learning_rate": 6.402116402116403e-05, "loss": 0.5449977517127991, "step": 122 }, { "epoch": 0.09806657364959138, "grad_norm": 0.623895381687462, "learning_rate": 6.455026455026454e-05, "loss": 0.5297889709472656, "step": 123 }, { "epoch": 0.09886386286625473, "grad_norm": 0.581735979722867, "learning_rate": 6.507936507936509e-05, "loss": 0.628908634185791, "step": 124 }, { "epoch": 0.09966115208291808, "grad_norm": 0.555025818456113, "learning_rate": 6.560846560846561e-05, "loss": 0.5576918125152588, "step": 125 }, { "epoch": 0.10045844129958142, "grad_norm": 0.605868616774835, "learning_rate": 6.613756613756614e-05, "loss": 0.6588343977928162, "step": 126 }, { "epoch": 0.10125573051624477, "grad_norm": 0.577982682305058, "learning_rate": 6.666666666666667e-05, "loss": 0.5618064999580383, "step": 127 }, { "epoch": 0.10205301973290812, "grad_norm": 0.6659893753133435, "learning_rate": 6.71957671957672e-05, "loss": 0.7562912702560425, "step": 128 }, { "epoch": 0.10285030894957145, "grad_norm": 0.5638237378901895, "learning_rate": 6.772486772486773e-05, "loss": 0.6268394589424133, "step": 129 }, { "epoch": 0.1036475981662348, "grad_norm": 0.5432927450722417, "learning_rate": 6.825396825396825e-05, "loss": 0.5464900732040405, "step": 130 }, { "epoch": 0.10444488738289814, "grad_norm": 0.7189175522831021, "learning_rate": 6.878306878306878e-05, "loss": 0.5940570831298828, "step": 131 }, { "epoch": 0.10524217659956149, "grad_norm": 0.6160616009299601, "learning_rate": 6.931216931216932e-05, "loss": 0.630046010017395, "step": 132 }, { "epoch": 0.10603946581622484, "grad_norm": 0.615535136969569, "learning_rate": 6.984126984126984e-05, "loss": 0.7178760766983032, "step": 133 }, { "epoch": 0.10683675503288818, "grad_norm": 0.6368792673073925, "learning_rate": 7.037037037037038e-05, "loss": 0.7056331634521484, "step": 134 }, { "epoch": 0.10763404424955153, "grad_norm": 0.6356009949273699, "learning_rate": 7.089947089947089e-05, "loss": 0.5743733644485474, "step": 135 }, { "epoch": 0.10843133346621486, "grad_norm": 0.6134906086486488, "learning_rate": 7.142857142857143e-05, "loss": 0.6977953910827637, "step": 136 }, { "epoch": 0.10922862268287821, "grad_norm": 0.6840888535398655, "learning_rate": 7.195767195767196e-05, "loss": 0.7893917560577393, "step": 137 }, { "epoch": 0.11002591189954156, "grad_norm": 0.5546637972992274, "learning_rate": 7.248677248677249e-05, "loss": 0.5476382970809937, "step": 138 }, { "epoch": 0.1108232011162049, "grad_norm": 0.6039854826363897, "learning_rate": 7.301587301587302e-05, "loss": 0.6769068837165833, "step": 139 }, { "epoch": 0.11162049033286825, "grad_norm": 0.506087565982272, "learning_rate": 7.354497354497355e-05, "loss": 0.4965619444847107, "step": 140 }, { "epoch": 0.1124177795495316, "grad_norm": 0.5689918661015069, "learning_rate": 7.407407407407407e-05, "loss": 0.6099745631217957, "step": 141 }, { "epoch": 0.11321506876619494, "grad_norm": 0.6063509116612147, "learning_rate": 7.460317460317461e-05, "loss": 0.5823173522949219, "step": 142 }, { "epoch": 0.11401235798285828, "grad_norm": 0.6003523544530807, "learning_rate": 7.513227513227513e-05, "loss": 0.6946728229522705, "step": 143 }, { "epoch": 0.11480964719952162, "grad_norm": 0.6082709398937443, "learning_rate": 7.566137566137567e-05, "loss": 0.6324775218963623, "step": 144 }, { "epoch": 0.11560693641618497, "grad_norm": 0.5691767516231895, "learning_rate": 7.619047619047618e-05, "loss": 0.5173876881599426, "step": 145 }, { "epoch": 0.11640422563284832, "grad_norm": 0.9294229282952667, "learning_rate": 7.671957671957673e-05, "loss": 0.676567018032074, "step": 146 }, { "epoch": 0.11720151484951166, "grad_norm": 0.6095924896307525, "learning_rate": 7.724867724867725e-05, "loss": 0.6398120522499084, "step": 147 }, { "epoch": 0.11799880406617501, "grad_norm": 0.5835055437472435, "learning_rate": 7.777777777777778e-05, "loss": 0.4974275529384613, "step": 148 }, { "epoch": 0.11879609328283836, "grad_norm": 0.5879162014024971, "learning_rate": 7.830687830687831e-05, "loss": 0.7410945296287537, "step": 149 }, { "epoch": 0.11959338249950169, "grad_norm": 0.5558549574009657, "learning_rate": 7.883597883597884e-05, "loss": 0.594329833984375, "step": 150 }, { "epoch": 0.12039067171616503, "grad_norm": 0.5439924204830261, "learning_rate": 7.936507936507937e-05, "loss": 0.7496218681335449, "step": 151 }, { "epoch": 0.12118796093282838, "grad_norm": 0.5372227828241345, "learning_rate": 7.989417989417989e-05, "loss": 0.5481572151184082, "step": 152 }, { "epoch": 0.12198525014949173, "grad_norm": 0.6185687039381349, "learning_rate": 8.042328042328042e-05, "loss": 0.6521518230438232, "step": 153 }, { "epoch": 0.12278253936615507, "grad_norm": 0.6253111468784984, "learning_rate": 8.095238095238096e-05, "loss": 0.5910241603851318, "step": 154 }, { "epoch": 0.12357982858281842, "grad_norm": 0.518864951620714, "learning_rate": 8.148148148148148e-05, "loss": 0.6129660606384277, "step": 155 }, { "epoch": 0.12437711779948177, "grad_norm": 0.6479980947997911, "learning_rate": 8.201058201058202e-05, "loss": 0.5905724763870239, "step": 156 }, { "epoch": 0.1251744070161451, "grad_norm": 0.46482578153168475, "learning_rate": 8.253968253968255e-05, "loss": 0.29997217655181885, "step": 157 }, { "epoch": 0.12597169623280846, "grad_norm": 0.6469988468682148, "learning_rate": 8.306878306878307e-05, "loss": 0.7109941840171814, "step": 158 }, { "epoch": 0.1267689854494718, "grad_norm": 0.5824076621030191, "learning_rate": 8.35978835978836e-05, "loss": 0.5769335627555847, "step": 159 }, { "epoch": 0.12756627466613515, "grad_norm": 0.5112152555110699, "learning_rate": 8.412698412698413e-05, "loss": 0.428470641374588, "step": 160 }, { "epoch": 0.1283635638827985, "grad_norm": 0.557684800970914, "learning_rate": 8.465608465608466e-05, "loss": 0.5258879065513611, "step": 161 }, { "epoch": 0.12916085309946182, "grad_norm": 0.5206500090859404, "learning_rate": 8.518518518518518e-05, "loss": 0.6293078064918518, "step": 162 }, { "epoch": 0.12995814231612518, "grad_norm": 0.5457097017530537, "learning_rate": 8.571428571428571e-05, "loss": 0.5716619491577148, "step": 163 }, { "epoch": 0.1307554315327885, "grad_norm": 0.4880531021400616, "learning_rate": 8.624338624338625e-05, "loss": 0.5707421898841858, "step": 164 }, { "epoch": 0.13155272074945187, "grad_norm": 0.5364280068936383, "learning_rate": 8.677248677248677e-05, "loss": 0.67533940076828, "step": 165 }, { "epoch": 0.1323500099661152, "grad_norm": 0.5891947964223225, "learning_rate": 8.730158730158731e-05, "loss": 0.5344416499137878, "step": 166 }, { "epoch": 0.13314729918277857, "grad_norm": 0.5086278059457084, "learning_rate": 8.783068783068782e-05, "loss": 0.5131420493125916, "step": 167 }, { "epoch": 0.1339445883994419, "grad_norm": 0.449009931974179, "learning_rate": 8.835978835978837e-05, "loss": 0.43399137258529663, "step": 168 }, { "epoch": 0.13474187761610523, "grad_norm": 0.5725961446222205, "learning_rate": 8.888888888888889e-05, "loss": 0.6143461465835571, "step": 169 }, { "epoch": 0.1355391668327686, "grad_norm": 0.572175359734591, "learning_rate": 8.941798941798942e-05, "loss": 0.5715004801750183, "step": 170 }, { "epoch": 0.13633645604943193, "grad_norm": 0.562225748352357, "learning_rate": 8.994708994708995e-05, "loss": 0.7009696960449219, "step": 171 }, { "epoch": 0.13713374526609529, "grad_norm": 0.4839471040457175, "learning_rate": 9.047619047619048e-05, "loss": 0.4348934292793274, "step": 172 }, { "epoch": 0.13793103448275862, "grad_norm": 0.5630340812296597, "learning_rate": 9.1005291005291e-05, "loss": 0.5407055616378784, "step": 173 }, { "epoch": 0.13872832369942195, "grad_norm": 0.49644171468266995, "learning_rate": 9.153439153439155e-05, "loss": 0.5209079384803772, "step": 174 }, { "epoch": 0.1395256129160853, "grad_norm": 0.5925936168857415, "learning_rate": 9.206349206349206e-05, "loss": 0.691206693649292, "step": 175 }, { "epoch": 0.14032290213274864, "grad_norm": 0.5075114632464603, "learning_rate": 9.25925925925926e-05, "loss": 0.5864906311035156, "step": 176 }, { "epoch": 0.141120191349412, "grad_norm": 0.5607655363673762, "learning_rate": 9.312169312169312e-05, "loss": 0.6587859392166138, "step": 177 }, { "epoch": 0.14191748056607534, "grad_norm": 0.5041309787712785, "learning_rate": 9.365079365079366e-05, "loss": 0.5076941251754761, "step": 178 }, { "epoch": 0.1427147697827387, "grad_norm": 0.40144872655467106, "learning_rate": 9.417989417989419e-05, "loss": 0.43114036321640015, "step": 179 }, { "epoch": 0.14351205899940203, "grad_norm": 0.5211463180937618, "learning_rate": 9.470899470899471e-05, "loss": 0.5542252659797668, "step": 180 }, { "epoch": 0.14430934821606536, "grad_norm": 0.5372561336439176, "learning_rate": 9.523809523809524e-05, "loss": 0.64825040102005, "step": 181 }, { "epoch": 0.14510663743272872, "grad_norm": 0.5445851251476479, "learning_rate": 9.576719576719577e-05, "loss": 0.770200252532959, "step": 182 }, { "epoch": 0.14590392664939206, "grad_norm": 0.5330260150558548, "learning_rate": 9.62962962962963e-05, "loss": 0.6521638035774231, "step": 183 }, { "epoch": 0.14670121586605542, "grad_norm": 0.6601754736379108, "learning_rate": 9.682539682539682e-05, "loss": 0.7438864707946777, "step": 184 }, { "epoch": 0.14749850508271875, "grad_norm": 0.5090448038352426, "learning_rate": 9.735449735449735e-05, "loss": 0.4346984028816223, "step": 185 }, { "epoch": 0.1482957942993821, "grad_norm": 0.49740604690006646, "learning_rate": 9.78835978835979e-05, "loss": 0.48929333686828613, "step": 186 }, { "epoch": 0.14909308351604544, "grad_norm": 0.5552125662152761, "learning_rate": 9.841269841269841e-05, "loss": 0.6882792711257935, "step": 187 }, { "epoch": 0.14989037273270878, "grad_norm": 0.5849196451249816, "learning_rate": 9.894179894179895e-05, "loss": 0.6517977714538574, "step": 188 }, { "epoch": 0.15068766194937214, "grad_norm": 0.537263200878466, "learning_rate": 9.947089947089946e-05, "loss": 0.6538422107696533, "step": 189 }, { "epoch": 0.15148495116603547, "grad_norm": 0.5805577738708395, "learning_rate": 0.0001, "loss": 0.6561852693557739, "step": 190 }, { "epoch": 0.15228224038269883, "grad_norm": 0.4894459663374945, "learning_rate": 9.999998070500343e-05, "loss": 0.5324437022209167, "step": 191 }, { "epoch": 0.15307952959936216, "grad_norm": 0.5856840804262196, "learning_rate": 9.999992282002859e-05, "loss": 0.5449459552764893, "step": 192 }, { "epoch": 0.15387681881602552, "grad_norm": 0.513874770641767, "learning_rate": 9.999982634512015e-05, "loss": 0.6142128705978394, "step": 193 }, { "epoch": 0.15467410803268886, "grad_norm": 0.5596220414791703, "learning_rate": 9.99996912803526e-05, "loss": 0.6218991875648499, "step": 194 }, { "epoch": 0.1554713972493522, "grad_norm": 0.5652163168932208, "learning_rate": 9.999951762583015e-05, "loss": 0.5910047888755798, "step": 195 }, { "epoch": 0.15626868646601555, "grad_norm": 0.502581664769647, "learning_rate": 9.999930538168685e-05, "loss": 0.5436276793479919, "step": 196 }, { "epoch": 0.15706597568267888, "grad_norm": 0.46387192289887036, "learning_rate": 9.99990545480865e-05, "loss": 0.4433811902999878, "step": 197 }, { "epoch": 0.15786326489934224, "grad_norm": 0.4083328155608287, "learning_rate": 9.999876512522269e-05, "loss": 0.5550883412361145, "step": 198 }, { "epoch": 0.15866055411600558, "grad_norm": 0.5550398604599691, "learning_rate": 9.99984371133188e-05, "loss": 0.6075869202613831, "step": 199 }, { "epoch": 0.15945784333266894, "grad_norm": 0.5651563868838965, "learning_rate": 9.999807051262799e-05, "loss": 0.6965482831001282, "step": 200 }, { "epoch": 0.16025513254933227, "grad_norm": 0.5440505099660013, "learning_rate": 9.99976653234332e-05, "loss": 0.7042391300201416, "step": 201 }, { "epoch": 0.1610524217659956, "grad_norm": 0.5340005710044862, "learning_rate": 9.999722154604716e-05, "loss": 0.6763291954994202, "step": 202 }, { "epoch": 0.16184971098265896, "grad_norm": 0.5017905562028049, "learning_rate": 9.999673918081238e-05, "loss": 0.54468834400177, "step": 203 }, { "epoch": 0.1626470001993223, "grad_norm": 0.5338350095777552, "learning_rate": 9.999621822810114e-05, "loss": 0.637951672077179, "step": 204 }, { "epoch": 0.16344428941598566, "grad_norm": 0.519335565110686, "learning_rate": 9.999565868831551e-05, "loss": 0.708831787109375, "step": 205 }, { "epoch": 0.164241578632649, "grad_norm": 0.4986759845967597, "learning_rate": 9.999506056188737e-05, "loss": 0.6399855017662048, "step": 206 }, { "epoch": 0.16503886784931235, "grad_norm": 0.5307255577359274, "learning_rate": 9.999442384927831e-05, "loss": 0.6679644584655762, "step": 207 }, { "epoch": 0.16583615706597568, "grad_norm": 0.4731616915773854, "learning_rate": 9.999374855097978e-05, "loss": 0.634097158908844, "step": 208 }, { "epoch": 0.16663344628263901, "grad_norm": 0.5360569944269579, "learning_rate": 9.999303466751295e-05, "loss": 0.6805696487426758, "step": 209 }, { "epoch": 0.16743073549930237, "grad_norm": 0.5178007773919179, "learning_rate": 9.999228219942881e-05, "loss": 0.5799918174743652, "step": 210 }, { "epoch": 0.1682280247159657, "grad_norm": 0.49490201430468955, "learning_rate": 9.999149114730811e-05, "loss": 0.5753475427627563, "step": 211 }, { "epoch": 0.16902531393262907, "grad_norm": 0.5310591039596549, "learning_rate": 9.99906615117614e-05, "loss": 0.5268412828445435, "step": 212 }, { "epoch": 0.1698226031492924, "grad_norm": 0.6205275982042163, "learning_rate": 9.998979329342897e-05, "loss": 0.7786662578582764, "step": 213 }, { "epoch": 0.17061989236595576, "grad_norm": 0.4693653209510958, "learning_rate": 9.99888864929809e-05, "loss": 0.6556232571601868, "step": 214 }, { "epoch": 0.1714171815826191, "grad_norm": 0.4776723219288572, "learning_rate": 9.998794111111711e-05, "loss": 0.656956136226654, "step": 215 }, { "epoch": 0.17221447079928243, "grad_norm": 0.5388118212999502, "learning_rate": 9.998695714856719e-05, "loss": 0.6596938967704773, "step": 216 }, { "epoch": 0.1730117600159458, "grad_norm": 0.4951356861308604, "learning_rate": 9.99859346060906e-05, "loss": 0.4920451045036316, "step": 217 }, { "epoch": 0.17380904923260912, "grad_norm": 0.5360216407478556, "learning_rate": 9.998487348447652e-05, "loss": 0.5162692666053772, "step": 218 }, { "epoch": 0.17460633844927248, "grad_norm": 0.5606662680883827, "learning_rate": 9.998377378454391e-05, "loss": 0.6339883208274841, "step": 219 }, { "epoch": 0.1754036276659358, "grad_norm": 0.4800614422785935, "learning_rate": 9.998263550714156e-05, "loss": 0.4067378342151642, "step": 220 }, { "epoch": 0.17620091688259917, "grad_norm": 0.552685218183279, "learning_rate": 9.998145865314795e-05, "loss": 0.6040431261062622, "step": 221 }, { "epoch": 0.1769982060992625, "grad_norm": 1.1655578956859127, "learning_rate": 9.998024322347141e-05, "loss": 0.6176721453666687, "step": 222 }, { "epoch": 0.17779549531592584, "grad_norm": 0.5239864492134085, "learning_rate": 9.997898921904997e-05, "loss": 0.614580512046814, "step": 223 }, { "epoch": 0.1785927845325892, "grad_norm": 0.5367199451551828, "learning_rate": 9.99776966408515e-05, "loss": 0.599067211151123, "step": 224 }, { "epoch": 0.17939007374925253, "grad_norm": 0.7608593583166087, "learning_rate": 9.997636548987362e-05, "loss": 0.6465499401092529, "step": 225 }, { "epoch": 0.1801873629659159, "grad_norm": 0.48853670200857297, "learning_rate": 9.997499576714368e-05, "loss": 0.5656086206436157, "step": 226 }, { "epoch": 0.18098465218257923, "grad_norm": 0.5422398223060215, "learning_rate": 9.997358747371885e-05, "loss": 0.6954162120819092, "step": 227 }, { "epoch": 0.18178194139924259, "grad_norm": 0.4552664327941517, "learning_rate": 9.997214061068604e-05, "loss": 0.6330534219741821, "step": 228 }, { "epoch": 0.18257923061590592, "grad_norm": 0.4930702591534668, "learning_rate": 9.997065517916197e-05, "loss": 0.478765070438385, "step": 229 }, { "epoch": 0.18337651983256925, "grad_norm": 0.48760335855994247, "learning_rate": 9.996913118029306e-05, "loss": 0.5922573804855347, "step": 230 }, { "epoch": 0.1841738090492326, "grad_norm": 0.6112279671740566, "learning_rate": 9.996756861525555e-05, "loss": 0.45633360743522644, "step": 231 }, { "epoch": 0.18497109826589594, "grad_norm": 0.5294223754311395, "learning_rate": 9.996596748525541e-05, "loss": 0.5053507685661316, "step": 232 }, { "epoch": 0.1857683874825593, "grad_norm": 0.5253250604708931, "learning_rate": 9.996432779152842e-05, "loss": 0.5291705131530762, "step": 233 }, { "epoch": 0.18656567669922264, "grad_norm": 0.49997665345531456, "learning_rate": 9.996264953534007e-05, "loss": 0.4802708625793457, "step": 234 }, { "epoch": 0.187362965915886, "grad_norm": 0.5712800005366027, "learning_rate": 9.996093271798565e-05, "loss": 0.5854290723800659, "step": 235 }, { "epoch": 0.18816025513254933, "grad_norm": 0.5158552570504509, "learning_rate": 9.99591773407902e-05, "loss": 0.601582407951355, "step": 236 }, { "epoch": 0.18895754434921266, "grad_norm": 0.5426310661385518, "learning_rate": 9.995738340510852e-05, "loss": 0.6026809811592102, "step": 237 }, { "epoch": 0.18975483356587602, "grad_norm": 0.49729355412467274, "learning_rate": 9.995555091232516e-05, "loss": 0.6141121983528137, "step": 238 }, { "epoch": 0.19055212278253936, "grad_norm": 0.5120049645738499, "learning_rate": 9.995367986385445e-05, "loss": 0.6970478296279907, "step": 239 }, { "epoch": 0.19134941199920272, "grad_norm": 0.451753893455427, "learning_rate": 9.995177026114044e-05, "loss": 0.545665979385376, "step": 240 }, { "epoch": 0.19214670121586605, "grad_norm": 0.4882639279549517, "learning_rate": 9.9949822105657e-05, "loss": 0.6046403050422668, "step": 241 }, { "epoch": 0.1929439904325294, "grad_norm": 0.5047586617122564, "learning_rate": 9.99478353989077e-05, "loss": 0.5940122008323669, "step": 242 }, { "epoch": 0.19374127964919274, "grad_norm": 0.52494760381446, "learning_rate": 9.994581014242586e-05, "loss": 0.5034940838813782, "step": 243 }, { "epoch": 0.19453856886585608, "grad_norm": 0.5121847154494702, "learning_rate": 9.99437463377746e-05, "loss": 0.5243697166442871, "step": 244 }, { "epoch": 0.19533585808251944, "grad_norm": 0.46868305832950935, "learning_rate": 9.994164398654676e-05, "loss": 0.5527983903884888, "step": 245 }, { "epoch": 0.19613314729918277, "grad_norm": 0.5235596349473837, "learning_rate": 9.99395030903649e-05, "loss": 0.5947701930999756, "step": 246 }, { "epoch": 0.19693043651584613, "grad_norm": 0.48168751261179643, "learning_rate": 9.993732365088142e-05, "loss": 0.5003915429115295, "step": 247 }, { "epoch": 0.19772772573250946, "grad_norm": 0.5587508115155733, "learning_rate": 9.993510566977837e-05, "loss": 0.5694053173065186, "step": 248 }, { "epoch": 0.19852501494917282, "grad_norm": 0.47445500632542836, "learning_rate": 9.993284914876761e-05, "loss": 0.4716844856739044, "step": 249 }, { "epoch": 0.19932230416583616, "grad_norm": 0.47144394750211954, "learning_rate": 9.99305540895907e-05, "loss": 0.6478769779205322, "step": 250 }, { "epoch": 0.2001195933824995, "grad_norm": 0.47707113773074317, "learning_rate": 9.992822049401898e-05, "loss": 0.6261820793151855, "step": 251 }, { "epoch": 0.20091688259916285, "grad_norm": 0.4487806474567857, "learning_rate": 9.992584836385352e-05, "loss": 0.650500476360321, "step": 252 }, { "epoch": 0.20171417181582618, "grad_norm": 0.43659375951197904, "learning_rate": 9.992343770092513e-05, "loss": 0.5629013776779175, "step": 253 }, { "epoch": 0.20251146103248954, "grad_norm": 0.467103821740291, "learning_rate": 9.992098850709435e-05, "loss": 0.6068912744522095, "step": 254 }, { "epoch": 0.20330875024915288, "grad_norm": 0.4947187292400198, "learning_rate": 9.991850078425147e-05, "loss": 0.6766856908798218, "step": 255 }, { "epoch": 0.20410603946581624, "grad_norm": 0.4826622967770273, "learning_rate": 9.991597453431652e-05, "loss": 0.48296865820884705, "step": 256 }, { "epoch": 0.20490332868247957, "grad_norm": 0.47463688268246573, "learning_rate": 9.991340975923927e-05, "loss": 0.6013198494911194, "step": 257 }, { "epoch": 0.2057006178991429, "grad_norm": 0.5899688190241534, "learning_rate": 9.99108064609992e-05, "loss": 0.6499711275100708, "step": 258 }, { "epoch": 0.20649790711580626, "grad_norm": 0.47798964365256574, "learning_rate": 9.990816464160552e-05, "loss": 0.6036586165428162, "step": 259 }, { "epoch": 0.2072951963324696, "grad_norm": 0.46688971081691516, "learning_rate": 9.99054843030972e-05, "loss": 0.5272042751312256, "step": 260 }, { "epoch": 0.20809248554913296, "grad_norm": 0.4810857350252768, "learning_rate": 9.990276544754294e-05, "loss": 0.6708547472953796, "step": 261 }, { "epoch": 0.2088897747657963, "grad_norm": 0.5347858804864486, "learning_rate": 9.990000807704114e-05, "loss": 0.6009984016418457, "step": 262 }, { "epoch": 0.20968706398245965, "grad_norm": 0.5506997740082348, "learning_rate": 9.989721219371992e-05, "loss": 0.6689156889915466, "step": 263 }, { "epoch": 0.21048435319912298, "grad_norm": 0.5097439971812652, "learning_rate": 9.989437779973718e-05, "loss": 0.6574123501777649, "step": 264 }, { "epoch": 0.2112816424157863, "grad_norm": 0.5042433746285608, "learning_rate": 9.989150489728047e-05, "loss": 0.8204598426818848, "step": 265 }, { "epoch": 0.21207893163244967, "grad_norm": 0.43795581150040946, "learning_rate": 9.988859348856712e-05, "loss": 0.7045704126358032, "step": 266 }, { "epoch": 0.212876220849113, "grad_norm": 0.5208238751413873, "learning_rate": 9.988564357584413e-05, "loss": 0.7731660604476929, "step": 267 }, { "epoch": 0.21367351006577637, "grad_norm": 0.44276227963939485, "learning_rate": 9.988265516138827e-05, "loss": 0.530709445476532, "step": 268 }, { "epoch": 0.2144707992824397, "grad_norm": 0.47937554952435724, "learning_rate": 9.987962824750598e-05, "loss": 0.6376925110816956, "step": 269 }, { "epoch": 0.21526808849910306, "grad_norm": 0.4578579639180989, "learning_rate": 9.987656283653343e-05, "loss": 0.5797412395477295, "step": 270 }, { "epoch": 0.2160653777157664, "grad_norm": 0.49017085885416617, "learning_rate": 9.987345893083651e-05, "loss": 0.8157128691673279, "step": 271 }, { "epoch": 0.21686266693242973, "grad_norm": 0.5152975722276302, "learning_rate": 9.987031653281083e-05, "loss": 0.6416962146759033, "step": 272 }, { "epoch": 0.2176599561490931, "grad_norm": 0.4198088675771505, "learning_rate": 9.986713564488166e-05, "loss": 0.5577811002731323, "step": 273 }, { "epoch": 0.21845724536575642, "grad_norm": 0.5098593790436827, "learning_rate": 9.986391626950404e-05, "loss": 0.6627807021141052, "step": 274 }, { "epoch": 0.21925453458241978, "grad_norm": 0.48644856749966714, "learning_rate": 9.986065840916265e-05, "loss": 0.45618441700935364, "step": 275 }, { "epoch": 0.2200518237990831, "grad_norm": 0.46607372191583657, "learning_rate": 9.985736206637195e-05, "loss": 0.5854500532150269, "step": 276 }, { "epoch": 0.22084911301574647, "grad_norm": 0.47871878204007834, "learning_rate": 9.985402724367602e-05, "loss": 0.5510873794555664, "step": 277 }, { "epoch": 0.2216464022324098, "grad_norm": 0.4780893273095335, "learning_rate": 9.985065394364869e-05, "loss": 0.7215077877044678, "step": 278 }, { "epoch": 0.22244369144907314, "grad_norm": 0.4989844005904445, "learning_rate": 9.984724216889347e-05, "loss": 0.7177236080169678, "step": 279 }, { "epoch": 0.2232409806657365, "grad_norm": 0.4660832694400703, "learning_rate": 9.984379192204357e-05, "loss": 0.5279465913772583, "step": 280 }, { "epoch": 0.22403826988239983, "grad_norm": 0.508048926500413, "learning_rate": 9.984030320576188e-05, "loss": 0.7287507057189941, "step": 281 }, { "epoch": 0.2248355590990632, "grad_norm": 0.5206971947980278, "learning_rate": 9.983677602274099e-05, "loss": 0.6400103569030762, "step": 282 }, { "epoch": 0.22563284831572653, "grad_norm": 0.642116391456589, "learning_rate": 9.983321037570319e-05, "loss": 0.7817617654800415, "step": 283 }, { "epoch": 0.22643013753238989, "grad_norm": 0.48124287290606205, "learning_rate": 9.982960626740045e-05, "loss": 0.714570939540863, "step": 284 }, { "epoch": 0.22722742674905322, "grad_norm": 0.4772002064016699, "learning_rate": 9.982596370061443e-05, "loss": 0.6681346297264099, "step": 285 }, { "epoch": 0.22802471596571655, "grad_norm": 0.48726517449616075, "learning_rate": 9.982228267815643e-05, "loss": 0.5823335647583008, "step": 286 }, { "epoch": 0.2288220051823799, "grad_norm": 0.5277693001011462, "learning_rate": 9.981856320286748e-05, "loss": 0.6041414737701416, "step": 287 }, { "epoch": 0.22961929439904324, "grad_norm": 0.431405416966105, "learning_rate": 9.981480527761826e-05, "loss": 0.4923948049545288, "step": 288 }, { "epoch": 0.2304165836157066, "grad_norm": 0.45116500200412973, "learning_rate": 9.981100890530917e-05, "loss": 0.6001613140106201, "step": 289 }, { "epoch": 0.23121387283236994, "grad_norm": 0.4320032666546067, "learning_rate": 9.980717408887022e-05, "loss": 0.4850527346134186, "step": 290 }, { "epoch": 0.2320111620490333, "grad_norm": 0.47277389903034506, "learning_rate": 9.98033008312611e-05, "loss": 0.61723792552948, "step": 291 }, { "epoch": 0.23280845126569663, "grad_norm": 0.5402708147134132, "learning_rate": 9.979938913547124e-05, "loss": 0.7778280377388, "step": 292 }, { "epoch": 0.23360574048235996, "grad_norm": 0.48318254447789244, "learning_rate": 9.979543900451966e-05, "loss": 0.5611835718154907, "step": 293 }, { "epoch": 0.23440302969902332, "grad_norm": 0.5475594132867138, "learning_rate": 9.979145044145506e-05, "loss": 0.7875928282737732, "step": 294 }, { "epoch": 0.23520031891568666, "grad_norm": 0.44226677033887535, "learning_rate": 9.978742344935583e-05, "loss": 0.5510091781616211, "step": 295 }, { "epoch": 0.23599760813235002, "grad_norm": 0.4523183005347748, "learning_rate": 9.978335803133001e-05, "loss": 0.6997008323669434, "step": 296 }, { "epoch": 0.23679489734901335, "grad_norm": 0.4906937222391488, "learning_rate": 9.977925419051526e-05, "loss": 0.5965918898582458, "step": 297 }, { "epoch": 0.2375921865656767, "grad_norm": 0.4130767981781061, "learning_rate": 9.977511193007896e-05, "loss": 0.5479072332382202, "step": 298 }, { "epoch": 0.23838947578234004, "grad_norm": 0.4580945875883026, "learning_rate": 9.977093125321807e-05, "loss": 0.7516599893569946, "step": 299 }, { "epoch": 0.23918676499900338, "grad_norm": 0.4525347004723234, "learning_rate": 9.976671216315925e-05, "loss": 0.5704783201217651, "step": 300 }, { "epoch": 0.23998405421566674, "grad_norm": 0.46872551097767257, "learning_rate": 9.97624546631588e-05, "loss": 0.6114388704299927, "step": 301 }, { "epoch": 0.24078134343233007, "grad_norm": 0.48749800362199314, "learning_rate": 9.975815875650266e-05, "loss": 0.5496057271957397, "step": 302 }, { "epoch": 0.24157863264899343, "grad_norm": 0.5134913959022418, "learning_rate": 9.975382444650638e-05, "loss": 0.6354531645774841, "step": 303 }, { "epoch": 0.24237592186565676, "grad_norm": 0.5485710661698464, "learning_rate": 9.974945173651523e-05, "loss": 0.7779744267463684, "step": 304 }, { "epoch": 0.24317321108232012, "grad_norm": 0.4812565633854899, "learning_rate": 9.974504062990402e-05, "loss": 0.5857188701629639, "step": 305 }, { "epoch": 0.24397050029898346, "grad_norm": 0.488232213123252, "learning_rate": 9.974059113007728e-05, "loss": 0.657517671585083, "step": 306 }, { "epoch": 0.2447677895156468, "grad_norm": 0.6114328616452749, "learning_rate": 9.97361032404691e-05, "loss": 0.906180739402771, "step": 307 }, { "epoch": 0.24556507873231015, "grad_norm": 0.4652900899551811, "learning_rate": 9.973157696454325e-05, "loss": 0.4666250944137573, "step": 308 }, { "epoch": 0.24636236794897348, "grad_norm": 0.42848653926739844, "learning_rate": 9.97270123057931e-05, "loss": 0.5379083752632141, "step": 309 }, { "epoch": 0.24715965716563684, "grad_norm": 0.45302589995459175, "learning_rate": 9.972240926774168e-05, "loss": 0.562437891960144, "step": 310 }, { "epoch": 0.24795694638230018, "grad_norm": 0.45794532118385206, "learning_rate": 9.971776785394158e-05, "loss": 0.582377016544342, "step": 311 }, { "epoch": 0.24875423559896354, "grad_norm": 0.5189512840324854, "learning_rate": 9.971308806797506e-05, "loss": 0.5751237869262695, "step": 312 }, { "epoch": 0.24955152481562687, "grad_norm": 0.45049575094137506, "learning_rate": 9.970836991345397e-05, "loss": 0.6156517267227173, "step": 313 }, { "epoch": 0.2503488140322902, "grad_norm": 0.48548247609951645, "learning_rate": 9.970361339401979e-05, "loss": 0.6537054777145386, "step": 314 }, { "epoch": 0.25114610324895353, "grad_norm": 0.4598628060740275, "learning_rate": 9.969881851334359e-05, "loss": 0.5939797759056091, "step": 315 }, { "epoch": 0.2519433924656169, "grad_norm": 0.47855461177969383, "learning_rate": 9.969398527512606e-05, "loss": 0.5568140149116516, "step": 316 }, { "epoch": 0.25274068168228025, "grad_norm": 0.4346719568240684, "learning_rate": 9.968911368309751e-05, "loss": 0.5056378245353699, "step": 317 }, { "epoch": 0.2535379708989436, "grad_norm": 0.46350701988916276, "learning_rate": 9.968420374101781e-05, "loss": 0.6505611538887024, "step": 318 }, { "epoch": 0.2543352601156069, "grad_norm": 0.4646271528414299, "learning_rate": 9.967925545267647e-05, "loss": 0.5402166247367859, "step": 319 }, { "epoch": 0.2551325493322703, "grad_norm": 0.5095266847255124, "learning_rate": 9.967426882189258e-05, "loss": 0.5531996488571167, "step": 320 }, { "epoch": 0.25592983854893364, "grad_norm": 0.4928414835388224, "learning_rate": 9.96692438525148e-05, "loss": 0.5827917456626892, "step": 321 }, { "epoch": 0.256727127765597, "grad_norm": 0.4417258489212668, "learning_rate": 9.966418054842143e-05, "loss": 0.5324844121932983, "step": 322 }, { "epoch": 0.2575244169822603, "grad_norm": 0.4598348063791111, "learning_rate": 9.965907891352029e-05, "loss": 0.5500366687774658, "step": 323 }, { "epoch": 0.25832170619892364, "grad_norm": 0.5711723951094343, "learning_rate": 9.965393895174886e-05, "loss": 0.7471845149993896, "step": 324 }, { "epoch": 0.25911899541558703, "grad_norm": 0.48887224364057436, "learning_rate": 9.964876066707413e-05, "loss": 0.6946783661842346, "step": 325 }, { "epoch": 0.25991628463225036, "grad_norm": 0.5227766184254139, "learning_rate": 9.964354406349273e-05, "loss": 0.5181636810302734, "step": 326 }, { "epoch": 0.2607135738489137, "grad_norm": 0.46900568060731657, "learning_rate": 9.963828914503079e-05, "loss": 0.46674424409866333, "step": 327 }, { "epoch": 0.261510863065577, "grad_norm": 0.4886473099268501, "learning_rate": 9.96329959157441e-05, "loss": 0.5832171440124512, "step": 328 }, { "epoch": 0.26230815228224036, "grad_norm": 0.4393566074899197, "learning_rate": 9.962766437971796e-05, "loss": 0.5666168928146362, "step": 329 }, { "epoch": 0.26310544149890375, "grad_norm": 0.49740152520121855, "learning_rate": 9.962229454106724e-05, "loss": 0.6694560647010803, "step": 330 }, { "epoch": 0.2639027307155671, "grad_norm": 0.43846407863590053, "learning_rate": 9.961688640393639e-05, "loss": 0.5220842957496643, "step": 331 }, { "epoch": 0.2647000199322304, "grad_norm": 0.5071819987865847, "learning_rate": 9.96114399724994e-05, "loss": 0.6320483088493347, "step": 332 }, { "epoch": 0.26549730914889375, "grad_norm": 0.5218022286512702, "learning_rate": 9.960595525095984e-05, "loss": 0.6672880053520203, "step": 333 }, { "epoch": 0.26629459836555713, "grad_norm": 0.45569006840780596, "learning_rate": 9.96004322435508e-05, "loss": 0.47796210646629333, "step": 334 }, { "epoch": 0.26709188758222047, "grad_norm": 0.4253672341229229, "learning_rate": 9.959487095453494e-05, "loss": 0.425771564245224, "step": 335 }, { "epoch": 0.2678891767988838, "grad_norm": 0.49067562548045823, "learning_rate": 9.958927138820447e-05, "loss": 0.6916324496269226, "step": 336 }, { "epoch": 0.26868646601554713, "grad_norm": 0.43560565848799676, "learning_rate": 9.958363354888113e-05, "loss": 0.5097326636314392, "step": 337 }, { "epoch": 0.26948375523221046, "grad_norm": 0.47438529500752785, "learning_rate": 9.957795744091621e-05, "loss": 0.5923242568969727, "step": 338 }, { "epoch": 0.27028104444887385, "grad_norm": 0.7431898475093441, "learning_rate": 9.957224306869053e-05, "loss": 0.6381533741950989, "step": 339 }, { "epoch": 0.2710783336655372, "grad_norm": 0.4554272023220337, "learning_rate": 9.956649043661442e-05, "loss": 0.5776190757751465, "step": 340 }, { "epoch": 0.2718756228822005, "grad_norm": 0.440594942842347, "learning_rate": 9.956069954912779e-05, "loss": 0.4558720886707306, "step": 341 }, { "epoch": 0.27267291209886385, "grad_norm": 0.43902683600523196, "learning_rate": 9.955487041070002e-05, "loss": 0.4977036118507385, "step": 342 }, { "epoch": 0.2734702013155272, "grad_norm": 0.47578300056643325, "learning_rate": 9.954900302583005e-05, "loss": 0.6339856386184692, "step": 343 }, { "epoch": 0.27426749053219057, "grad_norm": 0.43758516507912826, "learning_rate": 9.954309739904633e-05, "loss": 0.5387046933174133, "step": 344 }, { "epoch": 0.2750647797488539, "grad_norm": 0.47173850439942877, "learning_rate": 9.953715353490683e-05, "loss": 0.58820641040802, "step": 345 }, { "epoch": 0.27586206896551724, "grad_norm": 0.4828505382173731, "learning_rate": 9.953117143799899e-05, "loss": 0.6135766506195068, "step": 346 }, { "epoch": 0.27665935818218057, "grad_norm": 0.47909826264334937, "learning_rate": 9.952515111293984e-05, "loss": 0.6029750108718872, "step": 347 }, { "epoch": 0.2774566473988439, "grad_norm": 0.502503287566963, "learning_rate": 9.951909256437582e-05, "loss": 0.652255117893219, "step": 348 }, { "epoch": 0.2782539366155073, "grad_norm": 0.4905071808609595, "learning_rate": 9.951299579698295e-05, "loss": 0.6211282014846802, "step": 349 }, { "epoch": 0.2790512258321706, "grad_norm": 0.4947107921832806, "learning_rate": 9.95068608154667e-05, "loss": 0.7261977195739746, "step": 350 }, { "epoch": 0.27984851504883396, "grad_norm": 0.4751079237141207, "learning_rate": 9.950068762456203e-05, "loss": 0.6289840340614319, "step": 351 }, { "epoch": 0.2806458042654973, "grad_norm": 0.41735887208383027, "learning_rate": 9.949447622903346e-05, "loss": 0.5315046310424805, "step": 352 }, { "epoch": 0.2814430934821607, "grad_norm": 0.42095898747740934, "learning_rate": 9.94882266336749e-05, "loss": 0.46853071451187134, "step": 353 }, { "epoch": 0.282240382698824, "grad_norm": 0.46323615091872217, "learning_rate": 9.948193884330978e-05, "loss": 0.6165741682052612, "step": 354 }, { "epoch": 0.28303767191548734, "grad_norm": 0.445611696162592, "learning_rate": 9.947561286279105e-05, "loss": 0.5427416563034058, "step": 355 }, { "epoch": 0.2838349611321507, "grad_norm": 0.44377795616092613, "learning_rate": 9.94692486970011e-05, "loss": 0.5470292568206787, "step": 356 }, { "epoch": 0.284632250348814, "grad_norm": 0.42697484501633837, "learning_rate": 9.946284635085175e-05, "loss": 0.49538227915763855, "step": 357 }, { "epoch": 0.2854295395654774, "grad_norm": 0.5795503338589468, "learning_rate": 9.945640582928437e-05, "loss": 0.6338120102882385, "step": 358 }, { "epoch": 0.28622682878214073, "grad_norm": 0.4544087676418052, "learning_rate": 9.944992713726974e-05, "loss": 0.579820990562439, "step": 359 }, { "epoch": 0.28702411799880406, "grad_norm": 0.43847017146405626, "learning_rate": 9.944341027980811e-05, "loss": 0.587826132774353, "step": 360 }, { "epoch": 0.2878214072154674, "grad_norm": 0.5140336026729736, "learning_rate": 9.94368552619292e-05, "loss": 0.5005619525909424, "step": 361 }, { "epoch": 0.28861869643213073, "grad_norm": 0.41294220032163764, "learning_rate": 9.943026208869217e-05, "loss": 0.5103585720062256, "step": 362 }, { "epoch": 0.2894159856487941, "grad_norm": 0.46304332477930066, "learning_rate": 9.942363076518562e-05, "loss": 0.6852410435676575, "step": 363 }, { "epoch": 0.29021327486545745, "grad_norm": 0.5438171927669393, "learning_rate": 9.94169612965276e-05, "loss": 0.4887428879737854, "step": 364 }, { "epoch": 0.2910105640821208, "grad_norm": 0.4804327017017936, "learning_rate": 9.941025368786563e-05, "loss": 0.6052253246307373, "step": 365 }, { "epoch": 0.2918078532987841, "grad_norm": 0.4809482521466942, "learning_rate": 9.940350794437663e-05, "loss": 0.6085155010223389, "step": 366 }, { "epoch": 0.2926051425154475, "grad_norm": 0.4972552671052146, "learning_rate": 9.939672407126696e-05, "loss": 0.623116135597229, "step": 367 }, { "epoch": 0.29340243173211084, "grad_norm": 0.4589603631753697, "learning_rate": 9.938990207377242e-05, "loss": 0.42201995849609375, "step": 368 }, { "epoch": 0.29419972094877417, "grad_norm": 0.37927449688645953, "learning_rate": 9.93830419571582e-05, "loss": 0.4208407402038574, "step": 369 }, { "epoch": 0.2949970101654375, "grad_norm": 0.48321283820456146, "learning_rate": 9.937614372671897e-05, "loss": 0.6236253380775452, "step": 370 }, { "epoch": 0.29579429938210083, "grad_norm": 0.3716781562093702, "learning_rate": 9.936920738777876e-05, "loss": 0.44359952211380005, "step": 371 }, { "epoch": 0.2965915885987642, "grad_norm": 0.49326630603488064, "learning_rate": 9.936223294569105e-05, "loss": 0.5632373690605164, "step": 372 }, { "epoch": 0.29738887781542755, "grad_norm": 0.4655994439510978, "learning_rate": 9.935522040583871e-05, "loss": 0.5966030359268188, "step": 373 }, { "epoch": 0.2981861670320909, "grad_norm": 0.4116086620059068, "learning_rate": 9.934816977363403e-05, "loss": 0.5367377400398254, "step": 374 }, { "epoch": 0.2989834562487542, "grad_norm": 0.4702477572237078, "learning_rate": 9.934108105451867e-05, "loss": 0.665490984916687, "step": 375 }, { "epoch": 0.29978074546541755, "grad_norm": 0.4458220738192757, "learning_rate": 9.93339542539637e-05, "loss": 0.6292649507522583, "step": 376 }, { "epoch": 0.30057803468208094, "grad_norm": 0.5195188089977972, "learning_rate": 9.932678937746959e-05, "loss": 0.5892680883407593, "step": 377 }, { "epoch": 0.3013753238987443, "grad_norm": 0.47147777605624086, "learning_rate": 9.931958643056618e-05, "loss": 0.6030368804931641, "step": 378 }, { "epoch": 0.3021726131154076, "grad_norm": 0.5509802940315162, "learning_rate": 9.931234541881273e-05, "loss": 0.6131481528282166, "step": 379 }, { "epoch": 0.30296990233207094, "grad_norm": 0.4911651007229327, "learning_rate": 9.930506634779783e-05, "loss": 0.6468270421028137, "step": 380 }, { "epoch": 0.3037671915487343, "grad_norm": 0.4846846865780176, "learning_rate": 9.929774922313948e-05, "loss": 0.5818793773651123, "step": 381 }, { "epoch": 0.30456448076539766, "grad_norm": 0.4399376820529813, "learning_rate": 9.929039405048501e-05, "loss": 0.5764477252960205, "step": 382 }, { "epoch": 0.305361769982061, "grad_norm": 0.4415492001454133, "learning_rate": 9.928300083551119e-05, "loss": 0.620733916759491, "step": 383 }, { "epoch": 0.3061590591987243, "grad_norm": 0.48778997512266237, "learning_rate": 9.927556958392404e-05, "loss": 0.6159343123435974, "step": 384 }, { "epoch": 0.30695634841538766, "grad_norm": 0.43167110655700414, "learning_rate": 9.926810030145906e-05, "loss": 0.6212811470031738, "step": 385 }, { "epoch": 0.30775363763205105, "grad_norm": 0.39846069297414555, "learning_rate": 9.926059299388098e-05, "loss": 0.5959455966949463, "step": 386 }, { "epoch": 0.3085509268487144, "grad_norm": 0.38594924860018265, "learning_rate": 9.925304766698397e-05, "loss": 0.4813476800918579, "step": 387 }, { "epoch": 0.3093482160653777, "grad_norm": 0.44728958853945877, "learning_rate": 9.924546432659154e-05, "loss": 0.5625510215759277, "step": 388 }, { "epoch": 0.31014550528204105, "grad_norm": 0.45401723475533556, "learning_rate": 9.923784297855648e-05, "loss": 0.5711951851844788, "step": 389 }, { "epoch": 0.3109427944987044, "grad_norm": 0.44088540933782033, "learning_rate": 9.923018362876093e-05, "loss": 0.50668865442276, "step": 390 }, { "epoch": 0.31174008371536777, "grad_norm": 0.4856295422247378, "learning_rate": 9.922248628311641e-05, "loss": 0.5901417136192322, "step": 391 }, { "epoch": 0.3125373729320311, "grad_norm": 0.4625391155519684, "learning_rate": 9.921475094756371e-05, "loss": 0.6308175325393677, "step": 392 }, { "epoch": 0.31333466214869443, "grad_norm": 0.4414935424406626, "learning_rate": 9.920697762807296e-05, "loss": 0.541878879070282, "step": 393 }, { "epoch": 0.31413195136535776, "grad_norm": 0.4688509818947221, "learning_rate": 9.919916633064362e-05, "loss": 0.6517980098724365, "step": 394 }, { "epoch": 0.31492924058202115, "grad_norm": 0.4652753549593818, "learning_rate": 9.919131706130445e-05, "loss": 0.48931068181991577, "step": 395 }, { "epoch": 0.3157265297986845, "grad_norm": 0.4591654889411999, "learning_rate": 9.918342982611348e-05, "loss": 0.5997287034988403, "step": 396 }, { "epoch": 0.3165238190153478, "grad_norm": 0.46328563921386984, "learning_rate": 9.917550463115812e-05, "loss": 0.6720573902130127, "step": 397 }, { "epoch": 0.31732110823201115, "grad_norm": 0.4663812991363668, "learning_rate": 9.916754148255501e-05, "loss": 0.6288709044456482, "step": 398 }, { "epoch": 0.3181183974486745, "grad_norm": 0.4482140208724434, "learning_rate": 9.915954038645012e-05, "loss": 0.7110555768013, "step": 399 }, { "epoch": 0.31891568666533787, "grad_norm": 0.3923925780988048, "learning_rate": 9.91515013490187e-05, "loss": 0.46181586384773254, "step": 400 }, { "epoch": 0.3197129758820012, "grad_norm": 0.523134820211431, "learning_rate": 9.914342437646524e-05, "loss": 0.662895679473877, "step": 401 }, { "epoch": 0.32051026509866454, "grad_norm": 0.43733953668350123, "learning_rate": 9.913530947502359e-05, "loss": 0.6702781915664673, "step": 402 }, { "epoch": 0.32130755431532787, "grad_norm": 0.4304161611710282, "learning_rate": 9.912715665095682e-05, "loss": 0.5562620162963867, "step": 403 }, { "epoch": 0.3221048435319912, "grad_norm": 0.4722172757623949, "learning_rate": 9.911896591055725e-05, "loss": 0.5548921227455139, "step": 404 }, { "epoch": 0.3229021327486546, "grad_norm": 0.4961066675766669, "learning_rate": 9.911073726014652e-05, "loss": 0.5065759420394897, "step": 405 }, { "epoch": 0.3236994219653179, "grad_norm": 0.46728888382659706, "learning_rate": 9.910247070607552e-05, "loss": 0.5553586483001709, "step": 406 }, { "epoch": 0.32449671118198126, "grad_norm": 0.45414818656326306, "learning_rate": 9.909416625472431e-05, "loss": 0.6570261716842651, "step": 407 }, { "epoch": 0.3252940003986446, "grad_norm": 0.4944011193754291, "learning_rate": 9.908582391250234e-05, "loss": 0.6729617714881897, "step": 408 }, { "epoch": 0.326091289615308, "grad_norm": 0.39635946508552544, "learning_rate": 9.90774436858482e-05, "loss": 0.4252096116542816, "step": 409 }, { "epoch": 0.3268885788319713, "grad_norm": 0.5349176366575032, "learning_rate": 9.90690255812297e-05, "loss": 0.6979129314422607, "step": 410 }, { "epoch": 0.32768586804863464, "grad_norm": 0.4649634957412004, "learning_rate": 9.906056960514401e-05, "loss": 0.6572585105895996, "step": 411 }, { "epoch": 0.328483157265298, "grad_norm": 0.44730905626623263, "learning_rate": 9.905207576411739e-05, "loss": 0.5420549511909485, "step": 412 }, { "epoch": 0.3292804464819613, "grad_norm": 0.4894108591825683, "learning_rate": 9.904354406470542e-05, "loss": 0.573428750038147, "step": 413 }, { "epoch": 0.3300777356986247, "grad_norm": 0.46144138772495313, "learning_rate": 9.903497451349287e-05, "loss": 0.5927143096923828, "step": 414 }, { "epoch": 0.33087502491528803, "grad_norm": 0.44628154611343485, "learning_rate": 9.902636711709368e-05, "loss": 0.5663644075393677, "step": 415 }, { "epoch": 0.33167231413195136, "grad_norm": 0.45209196152818787, "learning_rate": 9.901772188215107e-05, "loss": 0.5965126752853394, "step": 416 }, { "epoch": 0.3324696033486147, "grad_norm": 0.43795243872673967, "learning_rate": 9.90090388153374e-05, "loss": 0.5430083870887756, "step": 417 }, { "epoch": 0.33326689256527803, "grad_norm": 0.46614431981420323, "learning_rate": 9.900031792335432e-05, "loss": 0.681374192237854, "step": 418 }, { "epoch": 0.3340641817819414, "grad_norm": 0.42058817202529175, "learning_rate": 9.899155921293255e-05, "loss": 0.5767754316329956, "step": 419 }, { "epoch": 0.33486147099860475, "grad_norm": 0.4387616506099145, "learning_rate": 9.898276269083209e-05, "loss": 0.6101611256599426, "step": 420 }, { "epoch": 0.3356587602152681, "grad_norm": 0.4439036658128423, "learning_rate": 9.897392836384209e-05, "loss": 0.5465982556343079, "step": 421 }, { "epoch": 0.3364560494319314, "grad_norm": 0.43917081411128267, "learning_rate": 9.896505623878087e-05, "loss": 0.4745447039604187, "step": 422 }, { "epoch": 0.3372533386485948, "grad_norm": 0.45956655079477327, "learning_rate": 9.895614632249597e-05, "loss": 0.569831371307373, "step": 423 }, { "epoch": 0.33805062786525814, "grad_norm": 0.47933049156815927, "learning_rate": 9.894719862186403e-05, "loss": 0.691043496131897, "step": 424 }, { "epoch": 0.33884791708192147, "grad_norm": 0.4960294801120083, "learning_rate": 9.89382131437909e-05, "loss": 0.6781651377677917, "step": 425 }, { "epoch": 0.3396452062985848, "grad_norm": 0.47155853825957816, "learning_rate": 9.892918989521156e-05, "loss": 0.6106862425804138, "step": 426 }, { "epoch": 0.34044249551524813, "grad_norm": 0.47809304230598904, "learning_rate": 9.892012888309015e-05, "loss": 0.6437129378318787, "step": 427 }, { "epoch": 0.3412397847319115, "grad_norm": 0.44495583730316174, "learning_rate": 9.891103011441998e-05, "loss": 0.5531318187713623, "step": 428 }, { "epoch": 0.34203707394857485, "grad_norm": 0.4846166801770658, "learning_rate": 9.890189359622343e-05, "loss": 0.669035792350769, "step": 429 }, { "epoch": 0.3428343631652382, "grad_norm": 0.4446306370571513, "learning_rate": 9.889271933555213e-05, "loss": 0.5434859991073608, "step": 430 }, { "epoch": 0.3436316523819015, "grad_norm": 0.40097641405682516, "learning_rate": 9.888350733948674e-05, "loss": 0.5466962456703186, "step": 431 }, { "epoch": 0.34442894159856485, "grad_norm": 0.4640744754993888, "learning_rate": 9.887425761513707e-05, "loss": 0.7391524314880371, "step": 432 }, { "epoch": 0.34522623081522824, "grad_norm": 0.44192196082050816, "learning_rate": 9.886497016964205e-05, "loss": 0.6058928966522217, "step": 433 }, { "epoch": 0.3460235200318916, "grad_norm": 0.40102569089133644, "learning_rate": 9.885564501016975e-05, "loss": 0.5142593383789062, "step": 434 }, { "epoch": 0.3468208092485549, "grad_norm": 0.4314030274914796, "learning_rate": 9.884628214391733e-05, "loss": 0.6543936133384705, "step": 435 }, { "epoch": 0.34761809846521824, "grad_norm": 0.46885113124025474, "learning_rate": 9.883688157811102e-05, "loss": 0.6159886121749878, "step": 436 }, { "epoch": 0.3484153876818816, "grad_norm": 0.49724373802301314, "learning_rate": 9.88274433200062e-05, "loss": 0.6463795304298401, "step": 437 }, { "epoch": 0.34921267689854496, "grad_norm": 0.3981456376390215, "learning_rate": 9.881796737688731e-05, "loss": 0.5202028751373291, "step": 438 }, { "epoch": 0.3500099661152083, "grad_norm": 0.45138955021203814, "learning_rate": 9.880845375606788e-05, "loss": 0.6172412037849426, "step": 439 }, { "epoch": 0.3508072553318716, "grad_norm": 0.4852374398813873, "learning_rate": 9.879890246489052e-05, "loss": 0.5672765970230103, "step": 440 }, { "epoch": 0.35160454454853496, "grad_norm": 0.4583900630678909, "learning_rate": 9.878931351072693e-05, "loss": 0.5679107904434204, "step": 441 }, { "epoch": 0.35240183376519835, "grad_norm": 0.4679513755769699, "learning_rate": 9.877968690097784e-05, "loss": 0.5920689105987549, "step": 442 }, { "epoch": 0.3531991229818617, "grad_norm": 0.41119439016083037, "learning_rate": 9.877002264307309e-05, "loss": 0.498252809047699, "step": 443 }, { "epoch": 0.353996412198525, "grad_norm": 0.4407853087369141, "learning_rate": 9.876032074447152e-05, "loss": 0.5669230222702026, "step": 444 }, { "epoch": 0.35479370141518835, "grad_norm": 0.39007172167967485, "learning_rate": 9.875058121266109e-05, "loss": 0.505557656288147, "step": 445 }, { "epoch": 0.3555909906318517, "grad_norm": 0.42221470849036274, "learning_rate": 9.874080405515874e-05, "loss": 0.6117410659790039, "step": 446 }, { "epoch": 0.35638827984851507, "grad_norm": 0.41494975771842485, "learning_rate": 9.87309892795105e-05, "loss": 0.5894678831100464, "step": 447 }, { "epoch": 0.3571855690651784, "grad_norm": 0.45323125988708934, "learning_rate": 9.872113689329141e-05, "loss": 0.6855286359786987, "step": 448 }, { "epoch": 0.35798285828184173, "grad_norm": 0.6008370767952844, "learning_rate": 9.871124690410554e-05, "loss": 0.6156555414199829, "step": 449 }, { "epoch": 0.35878014749850506, "grad_norm": 0.43752262918239515, "learning_rate": 9.870131931958595e-05, "loss": 0.5626705288887024, "step": 450 }, { "epoch": 0.35957743671516845, "grad_norm": 0.43162073882986357, "learning_rate": 9.86913541473948e-05, "loss": 0.4869219660758972, "step": 451 }, { "epoch": 0.3603747259318318, "grad_norm": 0.45907575665566325, "learning_rate": 9.868135139522318e-05, "loss": 0.6213362216949463, "step": 452 }, { "epoch": 0.3611720151484951, "grad_norm": 0.4539633523307908, "learning_rate": 9.867131107079121e-05, "loss": 0.5594902038574219, "step": 453 }, { "epoch": 0.36196930436515845, "grad_norm": 0.5340114284940718, "learning_rate": 9.866123318184803e-05, "loss": 0.7223303318023682, "step": 454 }, { "epoch": 0.3627665935818218, "grad_norm": 0.45658718063307097, "learning_rate": 9.865111773617172e-05, "loss": 0.4734572172164917, "step": 455 }, { "epoch": 0.36356388279848517, "grad_norm": 0.4402809647739277, "learning_rate": 9.864096474156942e-05, "loss": 0.6529785990715027, "step": 456 }, { "epoch": 0.3643611720151485, "grad_norm": 0.46157109824825526, "learning_rate": 9.863077420587719e-05, "loss": 0.6718568801879883, "step": 457 }, { "epoch": 0.36515846123181184, "grad_norm": 0.4448122700964172, "learning_rate": 9.862054613696008e-05, "loss": 0.6439890265464783, "step": 458 }, { "epoch": 0.36595575044847517, "grad_norm": 0.4047379684798494, "learning_rate": 9.861028054271211e-05, "loss": 0.5966838598251343, "step": 459 }, { "epoch": 0.3667530396651385, "grad_norm": 0.3849719835749956, "learning_rate": 9.85999774310563e-05, "loss": 0.5894796252250671, "step": 460 }, { "epoch": 0.3675503288818019, "grad_norm": 0.4131803903578482, "learning_rate": 9.858963680994453e-05, "loss": 0.6025094985961914, "step": 461 }, { "epoch": 0.3683476180984652, "grad_norm": 0.5133887365625189, "learning_rate": 9.857925868735773e-05, "loss": 0.7098044753074646, "step": 462 }, { "epoch": 0.36914490731512856, "grad_norm": 0.4735489555758055, "learning_rate": 9.856884307130572e-05, "loss": 0.5647828578948975, "step": 463 }, { "epoch": 0.3699421965317919, "grad_norm": 0.39240830016358097, "learning_rate": 9.855838996982729e-05, "loss": 0.5959030389785767, "step": 464 }, { "epoch": 0.3707394857484553, "grad_norm": 0.45435348659103597, "learning_rate": 9.854789939099011e-05, "loss": 0.6308571100234985, "step": 465 }, { "epoch": 0.3715367749651186, "grad_norm": 0.3852088708318438, "learning_rate": 9.853737134289085e-05, "loss": 0.48355937004089355, "step": 466 }, { "epoch": 0.37233406418178194, "grad_norm": 0.440528822374536, "learning_rate": 9.852680583365501e-05, "loss": 0.5775323510169983, "step": 467 }, { "epoch": 0.3731313533984453, "grad_norm": 0.4558296991920749, "learning_rate": 9.851620287143707e-05, "loss": 0.6038222312927246, "step": 468 }, { "epoch": 0.3739286426151086, "grad_norm": 0.4451704988009219, "learning_rate": 9.850556246442042e-05, "loss": 0.510440468788147, "step": 469 }, { "epoch": 0.374725931831772, "grad_norm": 0.40626782747593504, "learning_rate": 9.849488462081728e-05, "loss": 0.47087177634239197, "step": 470 }, { "epoch": 0.37552322104843533, "grad_norm": 0.42883829677601676, "learning_rate": 9.848416934886883e-05, "loss": 0.44809436798095703, "step": 471 }, { "epoch": 0.37632051026509866, "grad_norm": 0.5234310544702266, "learning_rate": 9.847341665684511e-05, "loss": 0.6576576232910156, "step": 472 }, { "epoch": 0.377117799481762, "grad_norm": 0.45360901459403397, "learning_rate": 9.846262655304506e-05, "loss": 0.5555305480957031, "step": 473 }, { "epoch": 0.3779150886984253, "grad_norm": 0.4731178211821702, "learning_rate": 9.845179904579647e-05, "loss": 0.5824108123779297, "step": 474 }, { "epoch": 0.3787123779150887, "grad_norm": 0.4556853453695963, "learning_rate": 9.8440934143456e-05, "loss": 0.4932464361190796, "step": 475 }, { "epoch": 0.37950966713175205, "grad_norm": 0.49403694960039185, "learning_rate": 9.843003185440918e-05, "loss": 0.5764445066452026, "step": 476 }, { "epoch": 0.3803069563484154, "grad_norm": 0.4177823738715831, "learning_rate": 9.841909218707043e-05, "loss": 0.47162002325057983, "step": 477 }, { "epoch": 0.3811042455650787, "grad_norm": 0.5462544020938537, "learning_rate": 9.840811514988294e-05, "loss": 0.5898071527481079, "step": 478 }, { "epoch": 0.3819015347817421, "grad_norm": 0.49984579794809514, "learning_rate": 9.83971007513188e-05, "loss": 0.5459364652633667, "step": 479 }, { "epoch": 0.38269882399840544, "grad_norm": 0.510375644138215, "learning_rate": 9.838604899987895e-05, "loss": 0.6346120834350586, "step": 480 }, { "epoch": 0.38349611321506877, "grad_norm": 0.47011213025385307, "learning_rate": 9.837495990409309e-05, "loss": 0.6454225182533264, "step": 481 }, { "epoch": 0.3842934024317321, "grad_norm": 0.9945993824851015, "learning_rate": 9.83638334725198e-05, "loss": 0.7244710922241211, "step": 482 }, { "epoch": 0.38509069164839543, "grad_norm": 0.383386442707786, "learning_rate": 9.835266971374645e-05, "loss": 0.5478464365005493, "step": 483 }, { "epoch": 0.3858879808650588, "grad_norm": 0.4467084347970285, "learning_rate": 9.834146863638923e-05, "loss": 0.6279883980751038, "step": 484 }, { "epoch": 0.38668527008172215, "grad_norm": 0.48856344190885004, "learning_rate": 9.833023024909315e-05, "loss": 0.6187289953231812, "step": 485 }, { "epoch": 0.3874825592983855, "grad_norm": 0.45002627707940673, "learning_rate": 9.831895456053197e-05, "loss": 0.5958802700042725, "step": 486 }, { "epoch": 0.3882798485150488, "grad_norm": 0.4321031921623947, "learning_rate": 9.830764157940827e-05, "loss": 0.5445156097412109, "step": 487 }, { "epoch": 0.38907713773171215, "grad_norm": 0.4129324471707705, "learning_rate": 9.829629131445342e-05, "loss": 0.6393333077430725, "step": 488 }, { "epoch": 0.38987442694837554, "grad_norm": 0.45960409231051724, "learning_rate": 9.828490377442754e-05, "loss": 0.6715397834777832, "step": 489 }, { "epoch": 0.3906717161650389, "grad_norm": 0.4408154269746818, "learning_rate": 9.827347896811953e-05, "loss": 0.6882790327072144, "step": 490 }, { "epoch": 0.3914690053817022, "grad_norm": 0.4550459882838028, "learning_rate": 9.826201690434708e-05, "loss": 0.7144509553909302, "step": 491 }, { "epoch": 0.39226629459836554, "grad_norm": 0.4101725741694093, "learning_rate": 9.825051759195658e-05, "loss": 0.6013184785842896, "step": 492 }, { "epoch": 0.3930635838150289, "grad_norm": 0.4720683228702644, "learning_rate": 9.823898103982321e-05, "loss": 0.6422757506370544, "step": 493 }, { "epoch": 0.39386087303169226, "grad_norm": 0.4459655087663578, "learning_rate": 9.822740725685086e-05, "loss": 0.5126609802246094, "step": 494 }, { "epoch": 0.3946581622483556, "grad_norm": 0.45834229803896714, "learning_rate": 9.821579625197222e-05, "loss": 0.5752924084663391, "step": 495 }, { "epoch": 0.3954554514650189, "grad_norm": 0.42630152913773495, "learning_rate": 9.820414803414862e-05, "loss": 0.6253732442855835, "step": 496 }, { "epoch": 0.39625274068168226, "grad_norm": 0.41331921880901007, "learning_rate": 9.819246261237013e-05, "loss": 0.45170891284942627, "step": 497 }, { "epoch": 0.39705002989834565, "grad_norm": 0.4471114721827365, "learning_rate": 9.818073999565565e-05, "loss": 0.7678471207618713, "step": 498 }, { "epoch": 0.397847319115009, "grad_norm": 0.42398763640417453, "learning_rate": 9.81689801930526e-05, "loss": 0.5922031998634338, "step": 499 }, { "epoch": 0.3986446083316723, "grad_norm": 0.44633469313734553, "learning_rate": 9.815718321363724e-05, "loss": 0.6470973491668701, "step": 500 }, { "epoch": 0.39944189754833564, "grad_norm": 0.4619063895237721, "learning_rate": 9.814534906651448e-05, "loss": 0.5900227427482605, "step": 501 }, { "epoch": 0.400239186764999, "grad_norm": 0.4334261541678682, "learning_rate": 9.813347776081789e-05, "loss": 0.5794212818145752, "step": 502 }, { "epoch": 0.40103647598166237, "grad_norm": 0.4537004178795101, "learning_rate": 9.812156930570974e-05, "loss": 0.5096148252487183, "step": 503 }, { "epoch": 0.4018337651983257, "grad_norm": 0.4592464967532519, "learning_rate": 9.810962371038101e-05, "loss": 0.5234513282775879, "step": 504 }, { "epoch": 0.40263105441498903, "grad_norm": 0.43769712846361575, "learning_rate": 9.809764098405127e-05, "loss": 0.6274980306625366, "step": 505 }, { "epoch": 0.40342834363165236, "grad_norm": 0.4486424583616185, "learning_rate": 9.808562113596882e-05, "loss": 0.5349489450454712, "step": 506 }, { "epoch": 0.40422563284831575, "grad_norm": 0.4675606850049261, "learning_rate": 9.807356417541057e-05, "loss": 0.6336418390274048, "step": 507 }, { "epoch": 0.4050229220649791, "grad_norm": 0.45503636927238206, "learning_rate": 9.806147011168203e-05, "loss": 0.5456087589263916, "step": 508 }, { "epoch": 0.4058202112816424, "grad_norm": 0.4539268351094348, "learning_rate": 9.804933895411746e-05, "loss": 0.5967632532119751, "step": 509 }, { "epoch": 0.40661750049830575, "grad_norm": 0.45535514774065083, "learning_rate": 9.803717071207966e-05, "loss": 0.5308210253715515, "step": 510 }, { "epoch": 0.4074147897149691, "grad_norm": 0.4628529533068637, "learning_rate": 9.802496539496008e-05, "loss": 0.655828058719635, "step": 511 }, { "epoch": 0.40821207893163247, "grad_norm": 0.4514390324729588, "learning_rate": 9.801272301217877e-05, "loss": 0.5765966773033142, "step": 512 }, { "epoch": 0.4090093681482958, "grad_norm": 0.6476715641802971, "learning_rate": 9.800044357318443e-05, "loss": 0.734703540802002, "step": 513 }, { "epoch": 0.40980665736495914, "grad_norm": 0.4164622987143602, "learning_rate": 9.79881270874543e-05, "loss": 0.5139474868774414, "step": 514 }, { "epoch": 0.41060394658162247, "grad_norm": 0.4206776883027899, "learning_rate": 9.797577356449425e-05, "loss": 0.596643328666687, "step": 515 }, { "epoch": 0.4114012357982858, "grad_norm": 0.46852937526407323, "learning_rate": 9.796338301383875e-05, "loss": 0.636924147605896, "step": 516 }, { "epoch": 0.4121985250149492, "grad_norm": 0.41587465107835614, "learning_rate": 9.795095544505079e-05, "loss": 0.5834908485412598, "step": 517 }, { "epoch": 0.4129958142316125, "grad_norm": 0.4680345829946573, "learning_rate": 9.793849086772198e-05, "loss": 0.5619515776634216, "step": 518 }, { "epoch": 0.41379310344827586, "grad_norm": 0.4732590611192024, "learning_rate": 9.79259892914725e-05, "loss": 0.6468405723571777, "step": 519 }, { "epoch": 0.4145903926649392, "grad_norm": 0.4750629999365875, "learning_rate": 9.791345072595101e-05, "loss": 0.6082079410552979, "step": 520 }, { "epoch": 0.4153876818816026, "grad_norm": 0.41050267661261336, "learning_rate": 9.790087518083483e-05, "loss": 0.4785745143890381, "step": 521 }, { "epoch": 0.4161849710982659, "grad_norm": 0.38252921803730805, "learning_rate": 9.788826266582976e-05, "loss": 0.44516491889953613, "step": 522 }, { "epoch": 0.41698226031492924, "grad_norm": 0.4697908372649267, "learning_rate": 9.78756131906701e-05, "loss": 0.5814577341079712, "step": 523 }, { "epoch": 0.4177795495315926, "grad_norm": 0.454057510186388, "learning_rate": 9.786292676511873e-05, "loss": 0.5784962773323059, "step": 524 }, { "epoch": 0.4185768387482559, "grad_norm": 0.4716495452815906, "learning_rate": 9.785020339896703e-05, "loss": 0.6327345371246338, "step": 525 }, { "epoch": 0.4193741279649193, "grad_norm": 0.4509280790911056, "learning_rate": 9.783744310203491e-05, "loss": 0.5963488221168518, "step": 526 }, { "epoch": 0.42017141718158263, "grad_norm": 0.5180231873595948, "learning_rate": 9.782464588417074e-05, "loss": 0.7146288156509399, "step": 527 }, { "epoch": 0.42096870639824596, "grad_norm": 0.4433020700457637, "learning_rate": 9.781181175525143e-05, "loss": 0.5872855186462402, "step": 528 }, { "epoch": 0.4217659956149093, "grad_norm": 0.49373208196679536, "learning_rate": 9.779894072518235e-05, "loss": 0.7114073038101196, "step": 529 }, { "epoch": 0.4225632848315726, "grad_norm": 0.47778499554726256, "learning_rate": 9.778603280389738e-05, "loss": 0.6166990995407104, "step": 530 }, { "epoch": 0.423360574048236, "grad_norm": 0.554145386372645, "learning_rate": 9.77730880013588e-05, "loss": 0.5458114743232727, "step": 531 }, { "epoch": 0.42415786326489935, "grad_norm": 0.45058768724272696, "learning_rate": 9.776010632755745e-05, "loss": 0.5869497060775757, "step": 532 }, { "epoch": 0.4249551524815627, "grad_norm": 0.42336314238460393, "learning_rate": 9.77470877925126e-05, "loss": 0.5507436394691467, "step": 533 }, { "epoch": 0.425752441698226, "grad_norm": 0.4935454305343217, "learning_rate": 9.77340324062719e-05, "loss": 0.646842896938324, "step": 534 }, { "epoch": 0.4265497309148894, "grad_norm": 0.42230833720609434, "learning_rate": 9.772094017891154e-05, "loss": 0.5471802949905396, "step": 535 }, { "epoch": 0.42734702013155274, "grad_norm": 0.4392295291909372, "learning_rate": 9.770781112053606e-05, "loss": 0.6093338131904602, "step": 536 }, { "epoch": 0.42814430934821607, "grad_norm": 0.45904523862206903, "learning_rate": 9.769464524127852e-05, "loss": 0.6374368667602539, "step": 537 }, { "epoch": 0.4289415985648794, "grad_norm": 0.4614456964931972, "learning_rate": 9.768144255130028e-05, "loss": 0.6235023140907288, "step": 538 }, { "epoch": 0.42973888778154273, "grad_norm": 0.45517160687135255, "learning_rate": 9.766820306079123e-05, "loss": 0.6615297198295593, "step": 539 }, { "epoch": 0.4305361769982061, "grad_norm": 0.49039787886618913, "learning_rate": 9.765492677996956e-05, "loss": 0.7057394981384277, "step": 540 }, { "epoch": 0.43133346621486945, "grad_norm": 0.48831258233285013, "learning_rate": 9.764161371908195e-05, "loss": 0.5800653100013733, "step": 541 }, { "epoch": 0.4321307554315328, "grad_norm": 0.455718654179006, "learning_rate": 9.762826388840338e-05, "loss": 0.5659594535827637, "step": 542 }, { "epoch": 0.4329280446481961, "grad_norm": 0.6742087302487737, "learning_rate": 9.761487729823726e-05, "loss": 0.6558187007904053, "step": 543 }, { "epoch": 0.43372533386485945, "grad_norm": 1.0003583089073802, "learning_rate": 9.760145395891536e-05, "loss": 0.574370265007019, "step": 544 }, { "epoch": 0.43452262308152284, "grad_norm": 0.5068128024126043, "learning_rate": 9.75879938807978e-05, "loss": 0.6502658724784851, "step": 545 }, { "epoch": 0.4353199122981862, "grad_norm": 0.5741670950196105, "learning_rate": 9.75744970742731e-05, "loss": 0.6901801824569702, "step": 546 }, { "epoch": 0.4361172015148495, "grad_norm": 0.4378126049543129, "learning_rate": 9.756096354975806e-05, "loss": 0.5809288620948792, "step": 547 }, { "epoch": 0.43691449073151284, "grad_norm": 0.5290406287275471, "learning_rate": 9.754739331769785e-05, "loss": 0.6275231838226318, "step": 548 }, { "epoch": 0.4377117799481762, "grad_norm": 0.47760712333517996, "learning_rate": 9.7533786388566e-05, "loss": 0.6691612005233765, "step": 549 }, { "epoch": 0.43850906916483956, "grad_norm": 0.45268661703588225, "learning_rate": 9.752014277286432e-05, "loss": 0.5508706569671631, "step": 550 }, { "epoch": 0.4393063583815029, "grad_norm": 0.6920135467915488, "learning_rate": 9.750646248112295e-05, "loss": 0.7351214289665222, "step": 551 }, { "epoch": 0.4401036475981662, "grad_norm": 0.4460416900330197, "learning_rate": 9.749274552390035e-05, "loss": 0.6694363951683044, "step": 552 }, { "epoch": 0.44090093681482956, "grad_norm": 0.4455962272140273, "learning_rate": 9.747899191178325e-05, "loss": 0.6685846447944641, "step": 553 }, { "epoch": 0.44169822603149295, "grad_norm": 0.4355026698303592, "learning_rate": 9.746520165538669e-05, "loss": 0.5683099031448364, "step": 554 }, { "epoch": 0.4424955152481563, "grad_norm": 0.45326364500855476, "learning_rate": 9.745137476535398e-05, "loss": 0.6304695010185242, "step": 555 }, { "epoch": 0.4432928044648196, "grad_norm": 0.38762416412412276, "learning_rate": 9.743751125235674e-05, "loss": 0.5226762294769287, "step": 556 }, { "epoch": 0.44409009368148294, "grad_norm": 0.5393630206726171, "learning_rate": 9.742361112709479e-05, "loss": 0.6815453171730042, "step": 557 }, { "epoch": 0.4448873828981463, "grad_norm": 0.4330244912650573, "learning_rate": 9.740967440029628e-05, "loss": 0.559374213218689, "step": 558 }, { "epoch": 0.44568467211480967, "grad_norm": 0.42350533370878257, "learning_rate": 9.739570108271755e-05, "loss": 0.4697226583957672, "step": 559 }, { "epoch": 0.446481961331473, "grad_norm": 0.4630612925772285, "learning_rate": 9.738169118514323e-05, "loss": 0.6222900152206421, "step": 560 }, { "epoch": 0.44727925054813633, "grad_norm": 0.4682374413087705, "learning_rate": 9.73676447183861e-05, "loss": 0.6029460430145264, "step": 561 }, { "epoch": 0.44807653976479966, "grad_norm": 0.47694127662446667, "learning_rate": 9.735356169328729e-05, "loss": 0.4911309480667114, "step": 562 }, { "epoch": 0.44887382898146305, "grad_norm": 0.5138170423774807, "learning_rate": 9.733944212071604e-05, "loss": 0.5558739304542542, "step": 563 }, { "epoch": 0.4496711181981264, "grad_norm": 0.4561701657448304, "learning_rate": 9.732528601156985e-05, "loss": 0.5636507868766785, "step": 564 }, { "epoch": 0.4504684074147897, "grad_norm": 0.47054488046182724, "learning_rate": 9.731109337677437e-05, "loss": 0.6375722289085388, "step": 565 }, { "epoch": 0.45126569663145305, "grad_norm": 0.4902668877637484, "learning_rate": 9.729686422728353e-05, "loss": 0.584967851638794, "step": 566 }, { "epoch": 0.4520629858481164, "grad_norm": 0.4585449156757475, "learning_rate": 9.728259857407934e-05, "loss": 0.4943973124027252, "step": 567 }, { "epoch": 0.45286027506477977, "grad_norm": 0.5682404666067367, "learning_rate": 9.726829642817204e-05, "loss": 0.6016470789909363, "step": 568 }, { "epoch": 0.4536575642814431, "grad_norm": 0.44711165669905395, "learning_rate": 9.725395780060002e-05, "loss": 0.6920987367630005, "step": 569 }, { "epoch": 0.45445485349810644, "grad_norm": 0.4620992773712448, "learning_rate": 9.723958270242985e-05, "loss": 0.5105484127998352, "step": 570 }, { "epoch": 0.45525214271476977, "grad_norm": 0.43142194128901024, "learning_rate": 9.722517114475622e-05, "loss": 0.5696845054626465, "step": 571 }, { "epoch": 0.4560494319314331, "grad_norm": 0.49289037691792514, "learning_rate": 9.721072313870195e-05, "loss": 0.7879433631896973, "step": 572 }, { "epoch": 0.4568467211480965, "grad_norm": 0.4691634809536185, "learning_rate": 9.719623869541803e-05, "loss": 0.7068961262702942, "step": 573 }, { "epoch": 0.4576440103647598, "grad_norm": 0.41781441459424323, "learning_rate": 9.718171782608356e-05, "loss": 0.6006783246994019, "step": 574 }, { "epoch": 0.45844129958142316, "grad_norm": 0.41889078112505457, "learning_rate": 9.716716054190571e-05, "loss": 0.5192777514457703, "step": 575 }, { "epoch": 0.4592385887980865, "grad_norm": 0.4464998779710712, "learning_rate": 9.715256685411982e-05, "loss": 0.5750688314437866, "step": 576 }, { "epoch": 0.4600358780147499, "grad_norm": 0.40290531845195765, "learning_rate": 9.713793677398927e-05, "loss": 0.572567343711853, "step": 577 }, { "epoch": 0.4608331672314132, "grad_norm": 0.39366706186008965, "learning_rate": 9.71232703128056e-05, "loss": 0.5388445854187012, "step": 578 }, { "epoch": 0.46163045644807654, "grad_norm": 0.4679708154538769, "learning_rate": 9.710856748188835e-05, "loss": 0.5599831342697144, "step": 579 }, { "epoch": 0.4624277456647399, "grad_norm": 0.4489354433362738, "learning_rate": 9.709382829258515e-05, "loss": 0.6588541269302368, "step": 580 }, { "epoch": 0.4632250348814032, "grad_norm": 0.4380522227425299, "learning_rate": 9.707905275627173e-05, "loss": 0.6432113647460938, "step": 581 }, { "epoch": 0.4640223240980666, "grad_norm": 0.43127735372388915, "learning_rate": 9.706424088435182e-05, "loss": 0.5280407071113586, "step": 582 }, { "epoch": 0.46481961331472993, "grad_norm": 0.46736450148045516, "learning_rate": 9.704939268825726e-05, "loss": 0.534479558467865, "step": 583 }, { "epoch": 0.46561690253139326, "grad_norm": 0.4510651190768869, "learning_rate": 9.703450817944786e-05, "loss": 0.49648934602737427, "step": 584 }, { "epoch": 0.4664141917480566, "grad_norm": 0.48783506791638637, "learning_rate": 9.701958736941148e-05, "loss": 0.5398257970809937, "step": 585 }, { "epoch": 0.4672114809647199, "grad_norm": 0.44156029966186994, "learning_rate": 9.7004630269664e-05, "loss": 0.5235372185707092, "step": 586 }, { "epoch": 0.4680087701813833, "grad_norm": 0.46439241005424364, "learning_rate": 9.698963689174932e-05, "loss": 0.5505254864692688, "step": 587 }, { "epoch": 0.46880605939804665, "grad_norm": 0.7113778543853244, "learning_rate": 9.69746072472393e-05, "loss": 0.6892968416213989, "step": 588 }, { "epoch": 0.46960334861471, "grad_norm": 0.49519095827323084, "learning_rate": 9.695954134773386e-05, "loss": 0.5082768797874451, "step": 589 }, { "epoch": 0.4704006378313733, "grad_norm": 0.4297284998750724, "learning_rate": 9.694443920486082e-05, "loss": 0.582457959651947, "step": 590 }, { "epoch": 0.4711979270480367, "grad_norm": 0.4000057150953857, "learning_rate": 9.692930083027605e-05, "loss": 0.4383409023284912, "step": 591 }, { "epoch": 0.47199521626470003, "grad_norm": 0.39540750213955156, "learning_rate": 9.691412623566332e-05, "loss": 0.4802122712135315, "step": 592 }, { "epoch": 0.47279250548136337, "grad_norm": 0.46748053473754103, "learning_rate": 9.689891543273436e-05, "loss": 0.6536918878555298, "step": 593 }, { "epoch": 0.4735897946980267, "grad_norm": 0.4798699284601644, "learning_rate": 9.688366843322891e-05, "loss": 0.603112518787384, "step": 594 }, { "epoch": 0.47438708391469003, "grad_norm": 0.4511584491798817, "learning_rate": 9.686838524891458e-05, "loss": 0.6060042381286621, "step": 595 }, { "epoch": 0.4751843731313534, "grad_norm": 0.48268515413364377, "learning_rate": 9.685306589158691e-05, "loss": 0.5447802543640137, "step": 596 }, { "epoch": 0.47598166234801675, "grad_norm": 0.494423621872368, "learning_rate": 9.683771037306942e-05, "loss": 0.6877695322036743, "step": 597 }, { "epoch": 0.4767789515646801, "grad_norm": 0.47405829540366073, "learning_rate": 9.682231870521347e-05, "loss": 0.57517409324646, "step": 598 }, { "epoch": 0.4775762407813434, "grad_norm": 0.46889712564179015, "learning_rate": 9.680689089989834e-05, "loss": 0.6663451790809631, "step": 599 }, { "epoch": 0.47837352999800675, "grad_norm": 0.465250212230262, "learning_rate": 9.679142696903123e-05, "loss": 0.6634753942489624, "step": 600 }, { "epoch": 0.47917081921467014, "grad_norm": 0.5093570023328545, "learning_rate": 9.67759269245472e-05, "loss": 0.7008824348449707, "step": 601 }, { "epoch": 0.4799681084313335, "grad_norm": 0.6125032965238416, "learning_rate": 9.676039077840916e-05, "loss": 0.5067781209945679, "step": 602 }, { "epoch": 0.4807653976479968, "grad_norm": 0.4490431367154909, "learning_rate": 9.674481854260793e-05, "loss": 0.6212892532348633, "step": 603 }, { "epoch": 0.48156268686466014, "grad_norm": 0.43709876125698693, "learning_rate": 9.672921022916214e-05, "loss": 0.598461925983429, "step": 604 }, { "epoch": 0.4823599760813235, "grad_norm": 0.4327850371890699, "learning_rate": 9.671356585011829e-05, "loss": 0.5238300561904907, "step": 605 }, { "epoch": 0.48315726529798686, "grad_norm": 0.45184279097743224, "learning_rate": 9.669788541755072e-05, "loss": 0.5629293918609619, "step": 606 }, { "epoch": 0.4839545545146502, "grad_norm": 0.440045767851692, "learning_rate": 9.668216894356157e-05, "loss": 0.4987711012363434, "step": 607 }, { "epoch": 0.4847518437313135, "grad_norm": 0.43275056930255484, "learning_rate": 9.66664164402808e-05, "loss": 0.5592584609985352, "step": 608 }, { "epoch": 0.48554913294797686, "grad_norm": 0.43211733326183954, "learning_rate": 9.665062791986624e-05, "loss": 0.5570223331451416, "step": 609 }, { "epoch": 0.48634642216464025, "grad_norm": 0.5064688599453248, "learning_rate": 9.663480339450344e-05, "loss": 0.6797738075256348, "step": 610 }, { "epoch": 0.4871437113813036, "grad_norm": 0.4232490038283235, "learning_rate": 9.661894287640574e-05, "loss": 0.526086688041687, "step": 611 }, { "epoch": 0.4879410005979669, "grad_norm": 0.453537458844833, "learning_rate": 9.660304637781432e-05, "loss": 0.6317825317382812, "step": 612 }, { "epoch": 0.48873828981463024, "grad_norm": 0.43614627063394634, "learning_rate": 9.658711391099808e-05, "loss": 0.5666950941085815, "step": 613 }, { "epoch": 0.4895355790312936, "grad_norm": 0.4892050713290902, "learning_rate": 9.657114548825371e-05, "loss": 0.6072052717208862, "step": 614 }, { "epoch": 0.49033286824795697, "grad_norm": 0.4922150442334722, "learning_rate": 9.655514112190562e-05, "loss": 0.4791298508644104, "step": 615 }, { "epoch": 0.4911301574646203, "grad_norm": 0.4464910014511995, "learning_rate": 9.6539100824306e-05, "loss": 0.6209923624992371, "step": 616 }, { "epoch": 0.49192744668128363, "grad_norm": 0.44463946546242245, "learning_rate": 9.652302460783472e-05, "loss": 0.5557315945625305, "step": 617 }, { "epoch": 0.49272473589794696, "grad_norm": 0.4455796787171109, "learning_rate": 9.650691248489941e-05, "loss": 0.5895600914955139, "step": 618 }, { "epoch": 0.49352202511461035, "grad_norm": 0.40579565533770734, "learning_rate": 9.649076446793543e-05, "loss": 0.5449697971343994, "step": 619 }, { "epoch": 0.4943193143312737, "grad_norm": 0.46705433904411436, "learning_rate": 9.647458056940578e-05, "loss": 0.5242958068847656, "step": 620 }, { "epoch": 0.495116603547937, "grad_norm": 0.4266207298768299, "learning_rate": 9.645836080180121e-05, "loss": 0.594025194644928, "step": 621 }, { "epoch": 0.49591389276460035, "grad_norm": 0.44515609202491196, "learning_rate": 9.644210517764014e-05, "loss": 0.5790475606918335, "step": 622 }, { "epoch": 0.4967111819812637, "grad_norm": 0.49143604312235567, "learning_rate": 9.642581370946866e-05, "loss": 0.6067743301391602, "step": 623 }, { "epoch": 0.49750847119792707, "grad_norm": 0.449995168262537, "learning_rate": 9.64094864098605e-05, "loss": 0.6541000604629517, "step": 624 }, { "epoch": 0.4983057604145904, "grad_norm": 0.45054360581643443, "learning_rate": 9.639312329141708e-05, "loss": 0.6169480085372925, "step": 625 }, { "epoch": 0.49910304963125374, "grad_norm": 0.4551518046521785, "learning_rate": 9.637672436676746e-05, "loss": 0.5123620629310608, "step": 626 }, { "epoch": 0.49990033884791707, "grad_norm": 0.4435218278651129, "learning_rate": 9.636028964856832e-05, "loss": 0.538784384727478, "step": 627 }, { "epoch": 0.5006976280645804, "grad_norm": 0.41366622854759216, "learning_rate": 9.634381914950399e-05, "loss": 0.5251980423927307, "step": 628 }, { "epoch": 0.5014949172812437, "grad_norm": 0.45298359860916454, "learning_rate": 9.632731288228635e-05, "loss": 0.7366654872894287, "step": 629 }, { "epoch": 0.5022922064979071, "grad_norm": 0.46950348426850164, "learning_rate": 9.6310770859655e-05, "loss": 0.6027372479438782, "step": 630 }, { "epoch": 0.5030894957145705, "grad_norm": 0.38739249836747675, "learning_rate": 9.629419309437703e-05, "loss": 0.553181529045105, "step": 631 }, { "epoch": 0.5038867849312338, "grad_norm": 0.4204011145166443, "learning_rate": 9.627757959924716e-05, "loss": 0.5062991976737976, "step": 632 }, { "epoch": 0.5046840741478972, "grad_norm": 0.4253104451098354, "learning_rate": 9.626093038708771e-05, "loss": 0.5618441104888916, "step": 633 }, { "epoch": 0.5054813633645605, "grad_norm": 0.4314501704414475, "learning_rate": 9.62442454707485e-05, "loss": 0.6008468270301819, "step": 634 }, { "epoch": 0.5062786525812238, "grad_norm": 0.5025833712393862, "learning_rate": 9.622752486310698e-05, "loss": 0.5119160413742065, "step": 635 }, { "epoch": 0.5070759417978872, "grad_norm": 0.4386820265000625, "learning_rate": 9.62107685770681e-05, "loss": 0.5579307079315186, "step": 636 }, { "epoch": 0.5078732310145505, "grad_norm": 0.4263099384732608, "learning_rate": 9.619397662556435e-05, "loss": 0.56053227186203, "step": 637 }, { "epoch": 0.5086705202312138, "grad_norm": 0.40169193566937134, "learning_rate": 9.617714902155575e-05, "loss": 0.5168663263320923, "step": 638 }, { "epoch": 0.5094678094478772, "grad_norm": 0.49812630224852067, "learning_rate": 9.616028577802986e-05, "loss": 0.6658270359039307, "step": 639 }, { "epoch": 0.5102650986645406, "grad_norm": 0.4925135327051764, "learning_rate": 9.614338690800175e-05, "loss": 0.6534044742584229, "step": 640 }, { "epoch": 0.511062387881204, "grad_norm": 0.42357753113946073, "learning_rate": 9.612645242451393e-05, "loss": 0.5385574102401733, "step": 641 }, { "epoch": 0.5118596770978673, "grad_norm": 0.46239632147086124, "learning_rate": 9.610948234063644e-05, "loss": 0.586579442024231, "step": 642 }, { "epoch": 0.5126569663145306, "grad_norm": 0.45155704524989554, "learning_rate": 9.60924766694668e-05, "loss": 0.5849452018737793, "step": 643 }, { "epoch": 0.513454255531194, "grad_norm": 0.43140265275193274, "learning_rate": 9.607543542412996e-05, "loss": 0.6400216221809387, "step": 644 }, { "epoch": 0.5142515447478573, "grad_norm": 0.392492202435951, "learning_rate": 9.605835861777837e-05, "loss": 0.5916197896003723, "step": 645 }, { "epoch": 0.5150488339645206, "grad_norm": 0.4463422965383645, "learning_rate": 9.60412462635919e-05, "loss": 0.5232757925987244, "step": 646 }, { "epoch": 0.515846123181184, "grad_norm": 0.3924134180092752, "learning_rate": 9.602409837477789e-05, "loss": 0.5253641605377197, "step": 647 }, { "epoch": 0.5166434123978473, "grad_norm": 0.4956481112941794, "learning_rate": 9.600691496457102e-05, "loss": 0.7093149423599243, "step": 648 }, { "epoch": 0.5174407016145106, "grad_norm": 0.4073256645114191, "learning_rate": 9.598969604623349e-05, "loss": 0.5896748304367065, "step": 649 }, { "epoch": 0.5182379908311741, "grad_norm": 0.41257299615353654, "learning_rate": 9.597244163305486e-05, "loss": 0.6031298637390137, "step": 650 }, { "epoch": 0.5190352800478374, "grad_norm": 0.39963493710743686, "learning_rate": 9.595515173835205e-05, "loss": 0.5582987070083618, "step": 651 }, { "epoch": 0.5198325692645007, "grad_norm": 0.4611840246510215, "learning_rate": 9.593782637546944e-05, "loss": 0.5976774096488953, "step": 652 }, { "epoch": 0.520629858481164, "grad_norm": 0.4780507572591094, "learning_rate": 9.592046555777872e-05, "loss": 0.6398218274116516, "step": 653 }, { "epoch": 0.5214271476978274, "grad_norm": 0.5236695032393756, "learning_rate": 9.590306929867896e-05, "loss": 0.6230917572975159, "step": 654 }, { "epoch": 0.5222244369144907, "grad_norm": 0.4120848087895042, "learning_rate": 9.58856376115966e-05, "loss": 0.5302759408950806, "step": 655 }, { "epoch": 0.523021726131154, "grad_norm": 0.4382316312237303, "learning_rate": 9.586817050998543e-05, "loss": 0.631851077079773, "step": 656 }, { "epoch": 0.5238190153478174, "grad_norm": 0.4620593194896478, "learning_rate": 9.585066800732652e-05, "loss": 0.5415442585945129, "step": 657 }, { "epoch": 0.5246163045644807, "grad_norm": 0.43317754677043263, "learning_rate": 9.583313011712831e-05, "loss": 0.5001699328422546, "step": 658 }, { "epoch": 0.5254135937811442, "grad_norm": 0.5115386751464382, "learning_rate": 9.581555685292655e-05, "loss": 0.7069197297096252, "step": 659 }, { "epoch": 0.5262108829978075, "grad_norm": 0.4653070398643882, "learning_rate": 9.579794822828431e-05, "loss": 0.5768203139305115, "step": 660 }, { "epoch": 0.5270081722144708, "grad_norm": 0.42846338603052103, "learning_rate": 9.578030425679187e-05, "loss": 0.5426847338676453, "step": 661 }, { "epoch": 0.5278054614311342, "grad_norm": 0.5081975221635538, "learning_rate": 9.57626249520669e-05, "loss": 0.7744450569152832, "step": 662 }, { "epoch": 0.5286027506477975, "grad_norm": 0.4316138461613879, "learning_rate": 9.574491032775423e-05, "loss": 0.5469563007354736, "step": 663 }, { "epoch": 0.5294000398644608, "grad_norm": 0.4897137157502005, "learning_rate": 9.572716039752603e-05, "loss": 0.7004687786102295, "step": 664 }, { "epoch": 0.5301973290811242, "grad_norm": 0.37347377662026304, "learning_rate": 9.57093751750817e-05, "loss": 0.44365787506103516, "step": 665 }, { "epoch": 0.5309946182977875, "grad_norm": 0.45623966700631774, "learning_rate": 9.569155467414788e-05, "loss": 0.7861499190330505, "step": 666 }, { "epoch": 0.5317919075144508, "grad_norm": 0.4057948055610417, "learning_rate": 9.56736989084784e-05, "loss": 0.5135864615440369, "step": 667 }, { "epoch": 0.5325891967311143, "grad_norm": 0.41434434935974146, "learning_rate": 9.565580789185436e-05, "loss": 0.5364166498184204, "step": 668 }, { "epoch": 0.5333864859477776, "grad_norm": 0.44456771508774123, "learning_rate": 9.563788163808405e-05, "loss": 0.5763890743255615, "step": 669 }, { "epoch": 0.5341837751644409, "grad_norm": 0.4556301428820817, "learning_rate": 9.561992016100293e-05, "loss": 0.6676862239837646, "step": 670 }, { "epoch": 0.5349810643811043, "grad_norm": 0.43388907420159045, "learning_rate": 9.560192347447367e-05, "loss": 0.5443181991577148, "step": 671 }, { "epoch": 0.5357783535977676, "grad_norm": 0.41896892096808924, "learning_rate": 9.558389159238613e-05, "loss": 0.6035862565040588, "step": 672 }, { "epoch": 0.5365756428144309, "grad_norm": 0.4141518286281006, "learning_rate": 9.556582452865728e-05, "loss": 0.581026554107666, "step": 673 }, { "epoch": 0.5373729320310943, "grad_norm": 0.42518540838290936, "learning_rate": 9.55477222972313e-05, "loss": 0.6450905203819275, "step": 674 }, { "epoch": 0.5381702212477576, "grad_norm": 0.4171293051036858, "learning_rate": 9.552958491207949e-05, "loss": 0.5413328409194946, "step": 675 }, { "epoch": 0.5389675104644209, "grad_norm": 0.5060116257008441, "learning_rate": 9.551141238720028e-05, "loss": 0.5412304401397705, "step": 676 }, { "epoch": 0.5397647996810843, "grad_norm": 0.5232971452330628, "learning_rate": 9.549320473661922e-05, "loss": 0.7101565003395081, "step": 677 }, { "epoch": 0.5405620888977477, "grad_norm": 0.4903687326529158, "learning_rate": 9.547496197438897e-05, "loss": 0.6348576545715332, "step": 678 }, { "epoch": 0.541359378114411, "grad_norm": 0.47387527876598246, "learning_rate": 9.545668411458928e-05, "loss": 0.6147969961166382, "step": 679 }, { "epoch": 0.5421566673310744, "grad_norm": 0.4713328635450289, "learning_rate": 9.543837117132703e-05, "loss": 0.6835145950317383, "step": 680 }, { "epoch": 0.5429539565477377, "grad_norm": 0.4974407032094931, "learning_rate": 9.542002315873612e-05, "loss": 0.6679293513298035, "step": 681 }, { "epoch": 0.543751245764401, "grad_norm": 0.7810524146332325, "learning_rate": 9.540164009097757e-05, "loss": 0.6498271226882935, "step": 682 }, { "epoch": 0.5445485349810644, "grad_norm": 0.4334509803579083, "learning_rate": 9.538322198223938e-05, "loss": 0.5142498016357422, "step": 683 }, { "epoch": 0.5453458241977277, "grad_norm": 0.545660814977313, "learning_rate": 9.536476884673671e-05, "loss": 0.6370818018913269, "step": 684 }, { "epoch": 0.546143113414391, "grad_norm": 0.4416004359682847, "learning_rate": 9.534628069871165e-05, "loss": 0.6955193281173706, "step": 685 }, { "epoch": 0.5469404026310544, "grad_norm": 0.9069220674032354, "learning_rate": 9.532775755243334e-05, "loss": 0.5272481441497803, "step": 686 }, { "epoch": 0.5477376918477178, "grad_norm": 0.4497711408155145, "learning_rate": 9.530919942219796e-05, "loss": 0.6363149285316467, "step": 687 }, { "epoch": 0.5485349810643811, "grad_norm": 0.47647512367635325, "learning_rate": 9.529060632232867e-05, "loss": 0.5419552326202393, "step": 688 }, { "epoch": 0.5493322702810445, "grad_norm": 0.533748668141721, "learning_rate": 9.527197826717562e-05, "loss": 0.4912715554237366, "step": 689 }, { "epoch": 0.5501295594977078, "grad_norm": 0.36273636138357795, "learning_rate": 9.525331527111594e-05, "loss": 0.45101815462112427, "step": 690 }, { "epoch": 0.5509268487143711, "grad_norm": 0.4391694404189104, "learning_rate": 9.523461734855372e-05, "loss": 0.4657702147960663, "step": 691 }, { "epoch": 0.5517241379310345, "grad_norm": 0.46916098932117084, "learning_rate": 9.521588451392004e-05, "loss": 0.5663467049598694, "step": 692 }, { "epoch": 0.5525214271476978, "grad_norm": 0.44977365132049796, "learning_rate": 9.519711678167286e-05, "loss": 0.5241972804069519, "step": 693 }, { "epoch": 0.5533187163643611, "grad_norm": 0.44904929706970315, "learning_rate": 9.517831416629716e-05, "loss": 0.5627957582473755, "step": 694 }, { "epoch": 0.5541160055810245, "grad_norm": 0.5001899729976238, "learning_rate": 9.515947668230475e-05, "loss": 0.6135656237602234, "step": 695 }, { "epoch": 0.5549132947976878, "grad_norm": 0.4576958456860752, "learning_rate": 9.514060434423445e-05, "loss": 0.6056007742881775, "step": 696 }, { "epoch": 0.5557105840143512, "grad_norm": 0.4499746130788157, "learning_rate": 9.512169716665185e-05, "loss": 0.6098620891571045, "step": 697 }, { "epoch": 0.5565078732310146, "grad_norm": 0.4623150733346902, "learning_rate": 9.510275516414958e-05, "loss": 0.5459835529327393, "step": 698 }, { "epoch": 0.5573051624476779, "grad_norm": 0.6078312265730901, "learning_rate": 9.508377835134705e-05, "loss": 0.6256232261657715, "step": 699 }, { "epoch": 0.5581024516643412, "grad_norm": 0.38865374092010746, "learning_rate": 9.506476674289056e-05, "loss": 0.5181186199188232, "step": 700 }, { "epoch": 0.5588997408810046, "grad_norm": 0.4197860156260236, "learning_rate": 9.504572035345325e-05, "loss": 0.5532658100128174, "step": 701 }, { "epoch": 0.5596970300976679, "grad_norm": 0.3955824301808604, "learning_rate": 9.502663919773515e-05, "loss": 0.5791688561439514, "step": 702 }, { "epoch": 0.5604943193143312, "grad_norm": 0.4469857736609296, "learning_rate": 9.500752329046308e-05, "loss": 0.6050961017608643, "step": 703 }, { "epoch": 0.5612916085309946, "grad_norm": 0.4121666337170292, "learning_rate": 9.49883726463907e-05, "loss": 0.6427582502365112, "step": 704 }, { "epoch": 0.5620888977476579, "grad_norm": 0.41574253345652995, "learning_rate": 9.496918728029847e-05, "loss": 0.5440660119056702, "step": 705 }, { "epoch": 0.5628861869643214, "grad_norm": 0.4429424369784425, "learning_rate": 9.494996720699363e-05, "loss": 0.6090558767318726, "step": 706 }, { "epoch": 0.5636834761809847, "grad_norm": 0.44034744844898255, "learning_rate": 9.493071244131028e-05, "loss": 0.6683006286621094, "step": 707 }, { "epoch": 0.564480765397648, "grad_norm": 0.5172336033618553, "learning_rate": 9.49114229981092e-05, "loss": 0.7168609499931335, "step": 708 }, { "epoch": 0.5652780546143114, "grad_norm": 0.509299701916808, "learning_rate": 9.489209889227802e-05, "loss": 0.6534023880958557, "step": 709 }, { "epoch": 0.5660753438309747, "grad_norm": 0.37008889043613397, "learning_rate": 9.487274013873105e-05, "loss": 0.4487345218658447, "step": 710 }, { "epoch": 0.566872633047638, "grad_norm": 0.5121890135041738, "learning_rate": 9.485334675240937e-05, "loss": 0.7508560419082642, "step": 711 }, { "epoch": 0.5676699222643014, "grad_norm": 0.5360541267255301, "learning_rate": 9.483391874828081e-05, "loss": 0.7596970200538635, "step": 712 }, { "epoch": 0.5684672114809647, "grad_norm": 0.3944020244363453, "learning_rate": 9.48144561413399e-05, "loss": 0.5351129174232483, "step": 713 }, { "epoch": 0.569264500697628, "grad_norm": 0.42685484784523786, "learning_rate": 9.479495894660789e-05, "loss": 0.6264957189559937, "step": 714 }, { "epoch": 0.5700617899142915, "grad_norm": 0.4089573095047102, "learning_rate": 9.477542717913268e-05, "loss": 0.6344755291938782, "step": 715 }, { "epoch": 0.5708590791309548, "grad_norm": 0.4867688087866793, "learning_rate": 9.47558608539889e-05, "loss": 0.5887293219566345, "step": 716 }, { "epoch": 0.5716563683476181, "grad_norm": 0.5650998398627004, "learning_rate": 9.473625998627786e-05, "loss": 0.70284503698349, "step": 717 }, { "epoch": 0.5724536575642815, "grad_norm": 0.4745679016850819, "learning_rate": 9.471662459112747e-05, "loss": 0.6602259874343872, "step": 718 }, { "epoch": 0.5732509467809448, "grad_norm": 0.4744547933402319, "learning_rate": 9.469695468369235e-05, "loss": 0.6397655010223389, "step": 719 }, { "epoch": 0.5740482359976081, "grad_norm": 0.4795465586824415, "learning_rate": 9.46772502791537e-05, "loss": 0.6321254968643188, "step": 720 }, { "epoch": 0.5748455252142715, "grad_norm": 0.44023103776611844, "learning_rate": 9.465751139271943e-05, "loss": 0.5741268992424011, "step": 721 }, { "epoch": 0.5756428144309348, "grad_norm": 0.41222938422413374, "learning_rate": 9.463773803962397e-05, "loss": 0.5024715662002563, "step": 722 }, { "epoch": 0.5764401036475981, "grad_norm": 0.44949524237797694, "learning_rate": 9.46179302351284e-05, "loss": 0.5833186507225037, "step": 723 }, { "epoch": 0.5772373928642615, "grad_norm": 0.4535350725619186, "learning_rate": 9.45980879945204e-05, "loss": 0.6473857164382935, "step": 724 }, { "epoch": 0.5780346820809249, "grad_norm": 0.4045348579136256, "learning_rate": 9.457821133311415e-05, "loss": 0.48836928606033325, "step": 725 }, { "epoch": 0.5788319712975882, "grad_norm": 0.4294718499287213, "learning_rate": 9.455830026625051e-05, "loss": 0.5590646266937256, "step": 726 }, { "epoch": 0.5796292605142516, "grad_norm": 0.3902954592588113, "learning_rate": 9.453835480929685e-05, "loss": 0.47675004601478577, "step": 727 }, { "epoch": 0.5804265497309149, "grad_norm": 0.47155070683663663, "learning_rate": 9.451837497764703e-05, "loss": 0.6360700130462646, "step": 728 }, { "epoch": 0.5812238389475782, "grad_norm": 0.44811213533614425, "learning_rate": 9.449836078672149e-05, "loss": 0.6172173023223877, "step": 729 }, { "epoch": 0.5820211281642416, "grad_norm": 0.39333317717167027, "learning_rate": 9.447831225196719e-05, "loss": 0.6072366237640381, "step": 730 }, { "epoch": 0.5828184173809049, "grad_norm": 0.47892190686956554, "learning_rate": 9.445822938885758e-05, "loss": 0.6441460847854614, "step": 731 }, { "epoch": 0.5836157065975682, "grad_norm": 0.4491064125061803, "learning_rate": 9.443811221289263e-05, "loss": 0.5575034022331238, "step": 732 }, { "epoch": 0.5844129958142316, "grad_norm": 0.4062678637586915, "learning_rate": 9.441796073959874e-05, "loss": 0.5881840586662292, "step": 733 }, { "epoch": 0.585210285030895, "grad_norm": 0.42889598221390707, "learning_rate": 9.439777498452883e-05, "loss": 0.6694631576538086, "step": 734 }, { "epoch": 0.5860075742475583, "grad_norm": 0.469479719167127, "learning_rate": 9.437755496326226e-05, "loss": 0.6540462374687195, "step": 735 }, { "epoch": 0.5868048634642217, "grad_norm": 0.4476892157091803, "learning_rate": 9.435730069140486e-05, "loss": 0.708655595779419, "step": 736 }, { "epoch": 0.587602152680885, "grad_norm": 0.5815831804106988, "learning_rate": 9.433701218458886e-05, "loss": 0.7895397543907166, "step": 737 }, { "epoch": 0.5883994418975483, "grad_norm": 0.4017356338152868, "learning_rate": 9.431668945847291e-05, "loss": 0.6339403390884399, "step": 738 }, { "epoch": 0.5891967311142117, "grad_norm": 0.39714159671062405, "learning_rate": 9.42963325287421e-05, "loss": 0.4710286259651184, "step": 739 }, { "epoch": 0.589994020330875, "grad_norm": 0.4952129026446847, "learning_rate": 9.427594141110792e-05, "loss": 0.5618221163749695, "step": 740 }, { "epoch": 0.5907913095475383, "grad_norm": 0.3639396239772073, "learning_rate": 9.425551612130823e-05, "loss": 0.4170251488685608, "step": 741 }, { "epoch": 0.5915885987642017, "grad_norm": 0.46399110952275374, "learning_rate": 9.423505667510724e-05, "loss": 0.5604273676872253, "step": 742 }, { "epoch": 0.5923858879808651, "grad_norm": 0.40438793915677185, "learning_rate": 9.421456308829556e-05, "loss": 0.5907918214797974, "step": 743 }, { "epoch": 0.5931831771975284, "grad_norm": 0.43979522190961884, "learning_rate": 9.419403537669014e-05, "loss": 0.5010349154472351, "step": 744 }, { "epoch": 0.5939804664141918, "grad_norm": 0.4551507575925388, "learning_rate": 9.417347355613428e-05, "loss": 0.735142171382904, "step": 745 }, { "epoch": 0.5947777556308551, "grad_norm": 0.38272186961717547, "learning_rate": 9.415287764249757e-05, "loss": 0.4926493763923645, "step": 746 }, { "epoch": 0.5955750448475184, "grad_norm": 0.49461354681815956, "learning_rate": 9.413224765167594e-05, "loss": 0.48153871297836304, "step": 747 }, { "epoch": 0.5963723340641818, "grad_norm": 0.4940034382824494, "learning_rate": 9.41115835995916e-05, "loss": 0.6274658441543579, "step": 748 }, { "epoch": 0.5971696232808451, "grad_norm": 0.4646014924077353, "learning_rate": 9.40908855021931e-05, "loss": 0.6525982618331909, "step": 749 }, { "epoch": 0.5979669124975084, "grad_norm": 0.40285042830878126, "learning_rate": 9.40701533754552e-05, "loss": 0.5155797004699707, "step": 750 }, { "epoch": 0.5987642017141718, "grad_norm": 0.4288219234412225, "learning_rate": 9.404938723537894e-05, "loss": 0.6410408616065979, "step": 751 }, { "epoch": 0.5995614909308351, "grad_norm": 0.42744800364401253, "learning_rate": 9.402858709799164e-05, "loss": 0.5489498972892761, "step": 752 }, { "epoch": 0.6003587801474985, "grad_norm": 0.46326219445892947, "learning_rate": 9.400775297934688e-05, "loss": 0.6268047094345093, "step": 753 }, { "epoch": 0.6011560693641619, "grad_norm": 0.4522334682431089, "learning_rate": 9.398688489552436e-05, "loss": 0.6192749738693237, "step": 754 }, { "epoch": 0.6019533585808252, "grad_norm": 0.4362705885121305, "learning_rate": 9.396598286263011e-05, "loss": 0.5739413499832153, "step": 755 }, { "epoch": 0.6027506477974885, "grad_norm": 0.4246208884567158, "learning_rate": 9.394504689679631e-05, "loss": 0.5874543190002441, "step": 756 }, { "epoch": 0.6035479370141519, "grad_norm": 0.46667729099752114, "learning_rate": 9.392407701418132e-05, "loss": 0.721917986869812, "step": 757 }, { "epoch": 0.6043452262308152, "grad_norm": 0.4655677361393127, "learning_rate": 9.390307323096971e-05, "loss": 0.6763489842414856, "step": 758 }, { "epoch": 0.6051425154474785, "grad_norm": 0.4872841007128653, "learning_rate": 9.388203556337219e-05, "loss": 0.6874793767929077, "step": 759 }, { "epoch": 0.6059398046641419, "grad_norm": 0.46666612113432954, "learning_rate": 9.386096402762563e-05, "loss": 0.6406216025352478, "step": 760 }, { "epoch": 0.6067370938808052, "grad_norm": 0.4336129281800834, "learning_rate": 9.383985863999303e-05, "loss": 0.5401081442832947, "step": 761 }, { "epoch": 0.6075343830974687, "grad_norm": 0.37947525560329, "learning_rate": 9.381871941676355e-05, "loss": 0.4726782739162445, "step": 762 }, { "epoch": 0.608331672314132, "grad_norm": 0.42943612774467244, "learning_rate": 9.37975463742524e-05, "loss": 0.7087767720222473, "step": 763 }, { "epoch": 0.6091289615307953, "grad_norm": 0.42868134903913674, "learning_rate": 9.377633952880097e-05, "loss": 0.5949928760528564, "step": 764 }, { "epoch": 0.6099262507474587, "grad_norm": 0.43860983290501754, "learning_rate": 9.375509889677667e-05, "loss": 0.4907504916191101, "step": 765 }, { "epoch": 0.610723539964122, "grad_norm": 0.42461547173939773, "learning_rate": 9.373382449457304e-05, "loss": 0.49689581990242004, "step": 766 }, { "epoch": 0.6115208291807853, "grad_norm": 0.4416498774671302, "learning_rate": 9.371251633860966e-05, "loss": 0.5376303791999817, "step": 767 }, { "epoch": 0.6123181183974487, "grad_norm": 0.36249317971241984, "learning_rate": 9.369117444533214e-05, "loss": 0.4030584990978241, "step": 768 }, { "epoch": 0.613115407614112, "grad_norm": 0.4719495753721166, "learning_rate": 9.366979883121216e-05, "loss": 0.6134971380233765, "step": 769 }, { "epoch": 0.6139126968307753, "grad_norm": 0.48294431350497935, "learning_rate": 9.364838951274744e-05, "loss": 0.6232562065124512, "step": 770 }, { "epoch": 0.6147099860474388, "grad_norm": 0.4581130083145302, "learning_rate": 9.362694650646167e-05, "loss": 0.712213933467865, "step": 771 }, { "epoch": 0.6155072752641021, "grad_norm": 0.4238218099138077, "learning_rate": 9.360546982890454e-05, "loss": 0.6055985689163208, "step": 772 }, { "epoch": 0.6163045644807654, "grad_norm": 0.441270955985967, "learning_rate": 9.358395949665177e-05, "loss": 0.5182202458381653, "step": 773 }, { "epoch": 0.6171018536974288, "grad_norm": 0.47314213636299274, "learning_rate": 9.356241552630502e-05, "loss": 0.6865096092224121, "step": 774 }, { "epoch": 0.6178991429140921, "grad_norm": 0.47371275893725456, "learning_rate": 9.354083793449194e-05, "loss": 0.7366055250167847, "step": 775 }, { "epoch": 0.6186964321307554, "grad_norm": 0.4598519092988918, "learning_rate": 9.351922673786611e-05, "loss": 0.6360257863998413, "step": 776 }, { "epoch": 0.6194937213474188, "grad_norm": 0.39449662526362955, "learning_rate": 9.349758195310703e-05, "loss": 0.5187716484069824, "step": 777 }, { "epoch": 0.6202910105640821, "grad_norm": 0.4803033220481284, "learning_rate": 9.347590359692014e-05, "loss": 0.6055858135223389, "step": 778 }, { "epoch": 0.6210882997807454, "grad_norm": 0.39705533363642537, "learning_rate": 9.345419168603682e-05, "loss": 0.5126244425773621, "step": 779 }, { "epoch": 0.6218855889974088, "grad_norm": 0.40814886185750426, "learning_rate": 9.343244623721432e-05, "loss": 0.6152849197387695, "step": 780 }, { "epoch": 0.6226828782140722, "grad_norm": 0.41030506228882774, "learning_rate": 9.341066726723573e-05, "loss": 0.41124227643013, "step": 781 }, { "epoch": 0.6234801674307355, "grad_norm": 0.4260042400921611, "learning_rate": 9.338885479291011e-05, "loss": 0.6829202175140381, "step": 782 }, { "epoch": 0.6242774566473989, "grad_norm": 0.41045996413783004, "learning_rate": 9.336700883107228e-05, "loss": 0.5525668859481812, "step": 783 }, { "epoch": 0.6250747458640622, "grad_norm": 0.4346543374933874, "learning_rate": 9.334512939858298e-05, "loss": 0.6195856928825378, "step": 784 }, { "epoch": 0.6258720350807255, "grad_norm": 0.5041017424802687, "learning_rate": 9.332321651232875e-05, "loss": 0.7075793147087097, "step": 785 }, { "epoch": 0.6266693242973889, "grad_norm": 0.3789299312799717, "learning_rate": 9.330127018922194e-05, "loss": 0.4346361756324768, "step": 786 }, { "epoch": 0.6274666135140522, "grad_norm": 0.4124307501453267, "learning_rate": 9.327929044620071e-05, "loss": 0.5476053953170776, "step": 787 }, { "epoch": 0.6282639027307155, "grad_norm": 0.4507522063110351, "learning_rate": 9.325727730022906e-05, "loss": 0.553621232509613, "step": 788 }, { "epoch": 0.6290611919473789, "grad_norm": 0.39469784825238496, "learning_rate": 9.323523076829671e-05, "loss": 0.5213109254837036, "step": 789 }, { "epoch": 0.6298584811640423, "grad_norm": 0.45939343345651495, "learning_rate": 9.321315086741916e-05, "loss": 0.7072631120681763, "step": 790 }, { "epoch": 0.6306557703807056, "grad_norm": 0.4625095675517638, "learning_rate": 9.31910376146377e-05, "loss": 0.5647929906845093, "step": 791 }, { "epoch": 0.631453059597369, "grad_norm": 0.4479614434829708, "learning_rate": 9.316889102701931e-05, "loss": 0.5851097702980042, "step": 792 }, { "epoch": 0.6322503488140323, "grad_norm": 0.41321397542580746, "learning_rate": 9.314671112165674e-05, "loss": 0.6273714900016785, "step": 793 }, { "epoch": 0.6330476380306956, "grad_norm": 0.45652419835130487, "learning_rate": 9.312449791566842e-05, "loss": 0.6627153158187866, "step": 794 }, { "epoch": 0.633844927247359, "grad_norm": 0.42908028232058737, "learning_rate": 9.310225142619852e-05, "loss": 0.5870777368545532, "step": 795 }, { "epoch": 0.6346422164640223, "grad_norm": 0.3866988164212823, "learning_rate": 9.307997167041689e-05, "loss": 0.5666153430938721, "step": 796 }, { "epoch": 0.6354395056806856, "grad_norm": 0.4176333835312135, "learning_rate": 9.3057658665519e-05, "loss": 0.5907472968101501, "step": 797 }, { "epoch": 0.636236794897349, "grad_norm": 0.3345282951964024, "learning_rate": 9.303531242872606e-05, "loss": 0.45666852593421936, "step": 798 }, { "epoch": 0.6370340841140124, "grad_norm": 0.4710300001392961, "learning_rate": 9.301293297728486e-05, "loss": 0.669687032699585, "step": 799 }, { "epoch": 0.6378313733306757, "grad_norm": 0.3962684995973382, "learning_rate": 9.299052032846788e-05, "loss": 0.5052920579910278, "step": 800 }, { "epoch": 0.6386286625473391, "grad_norm": 0.6377302729005404, "learning_rate": 9.29680744995732e-05, "loss": 0.7256404161453247, "step": 801 }, { "epoch": 0.6394259517640024, "grad_norm": 0.4114769365552957, "learning_rate": 9.294559550792449e-05, "loss": 0.6092151403427124, "step": 802 }, { "epoch": 0.6402232409806657, "grad_norm": 0.4485453778174703, "learning_rate": 9.292308337087107e-05, "loss": 0.5356189012527466, "step": 803 }, { "epoch": 0.6410205301973291, "grad_norm": 0.48555238639855947, "learning_rate": 9.290053810578776e-05, "loss": 0.65544593334198, "step": 804 }, { "epoch": 0.6418178194139924, "grad_norm": 0.4597484929623525, "learning_rate": 9.287795973007502e-05, "loss": 0.6348232626914978, "step": 805 }, { "epoch": 0.6426151086306557, "grad_norm": 0.6691297750662635, "learning_rate": 9.285534826115885e-05, "loss": 0.5452053546905518, "step": 806 }, { "epoch": 0.6434123978473191, "grad_norm": 0.46625561230880647, "learning_rate": 9.283270371649073e-05, "loss": 0.7588813900947571, "step": 807 }, { "epoch": 0.6442096870639824, "grad_norm": 0.4527082786642772, "learning_rate": 9.281002611354774e-05, "loss": 0.6823574304580688, "step": 808 }, { "epoch": 0.6450069762806458, "grad_norm": 0.5103418050450867, "learning_rate": 9.278731546983249e-05, "loss": 0.6040791869163513, "step": 809 }, { "epoch": 0.6458042654973092, "grad_norm": 0.45184447059691535, "learning_rate": 9.276457180287299e-05, "loss": 0.5767542719841003, "step": 810 }, { "epoch": 0.6466015547139725, "grad_norm": 0.3422830735157731, "learning_rate": 9.274179513022282e-05, "loss": 0.42830508947372437, "step": 811 }, { "epoch": 0.6473988439306358, "grad_norm": 0.4718768059464205, "learning_rate": 9.271898546946106e-05, "loss": 0.6226209402084351, "step": 812 }, { "epoch": 0.6481961331472992, "grad_norm": 0.4722079837232603, "learning_rate": 9.269614283819213e-05, "loss": 0.7373334765434265, "step": 813 }, { "epoch": 0.6489934223639625, "grad_norm": 0.42632681476761847, "learning_rate": 9.267326725404599e-05, "loss": 0.5425838828086853, "step": 814 }, { "epoch": 0.6497907115806258, "grad_norm": 0.41983028454481564, "learning_rate": 9.265035873467805e-05, "loss": 0.49474430084228516, "step": 815 }, { "epoch": 0.6505880007972892, "grad_norm": 0.45160909304204355, "learning_rate": 9.262741729776907e-05, "loss": 0.6901643872261047, "step": 816 }, { "epoch": 0.6513852900139525, "grad_norm": 0.46838837236000563, "learning_rate": 9.260444296102525e-05, "loss": 0.7365382313728333, "step": 817 }, { "epoch": 0.652182579230616, "grad_norm": 0.4869834307670089, "learning_rate": 9.25814357421782e-05, "loss": 0.6203328967094421, "step": 818 }, { "epoch": 0.6529798684472793, "grad_norm": 0.3771866506807964, "learning_rate": 9.255839565898485e-05, "loss": 0.4721485376358032, "step": 819 }, { "epoch": 0.6537771576639426, "grad_norm": 0.4019769863582025, "learning_rate": 9.253532272922757e-05, "loss": 0.4882972538471222, "step": 820 }, { "epoch": 0.654574446880606, "grad_norm": 0.42593662477658367, "learning_rate": 9.251221697071403e-05, "loss": 0.5404720902442932, "step": 821 }, { "epoch": 0.6553717360972693, "grad_norm": 0.5187634323262547, "learning_rate": 9.248907840127725e-05, "loss": 0.6396200060844421, "step": 822 }, { "epoch": 0.6561690253139326, "grad_norm": 0.5941727889533286, "learning_rate": 9.246590703877558e-05, "loss": 0.5670813322067261, "step": 823 }, { "epoch": 0.656966314530596, "grad_norm": 0.49586661111387687, "learning_rate": 9.244270290109266e-05, "loss": 0.585698127746582, "step": 824 }, { "epoch": 0.6577636037472593, "grad_norm": 0.4031507341568755, "learning_rate": 9.241946600613746e-05, "loss": 0.5668481588363647, "step": 825 }, { "epoch": 0.6585608929639226, "grad_norm": 0.4616484819178229, "learning_rate": 9.23961963718442e-05, "loss": 0.5956938862800598, "step": 826 }, { "epoch": 0.6593581821805861, "grad_norm": 0.43685127681834357, "learning_rate": 9.237289401617237e-05, "loss": 0.6326920986175537, "step": 827 }, { "epoch": 0.6601554713972494, "grad_norm": 0.4695875031584818, "learning_rate": 9.234955895710676e-05, "loss": 0.4382616877555847, "step": 828 }, { "epoch": 0.6609527606139127, "grad_norm": 0.4584624350049309, "learning_rate": 9.232619121265733e-05, "loss": 0.7363680601119995, "step": 829 }, { "epoch": 0.6617500498305761, "grad_norm": 0.46617858855076943, "learning_rate": 9.230279080085932e-05, "loss": 0.6395404934883118, "step": 830 }, { "epoch": 0.6625473390472394, "grad_norm": 0.41192381082613994, "learning_rate": 9.227935773977316e-05, "loss": 0.5252783298492432, "step": 831 }, { "epoch": 0.6633446282639027, "grad_norm": 0.4371762308992192, "learning_rate": 9.225589204748451e-05, "loss": 0.6175894141197205, "step": 832 }, { "epoch": 0.6641419174805661, "grad_norm": 0.41024585768309946, "learning_rate": 9.223239374210413e-05, "loss": 0.5862935781478882, "step": 833 }, { "epoch": 0.6649392066972294, "grad_norm": 0.44472694365449106, "learning_rate": 9.220886284176804e-05, "loss": 0.565878689289093, "step": 834 }, { "epoch": 0.6657364959138927, "grad_norm": 0.4119860160408848, "learning_rate": 9.21852993646374e-05, "loss": 0.5728747844696045, "step": 835 }, { "epoch": 0.6665337851305561, "grad_norm": 0.4580744089865989, "learning_rate": 9.216170332889849e-05, "loss": 0.6716585159301758, "step": 836 }, { "epoch": 0.6673310743472195, "grad_norm": 0.38623497505643223, "learning_rate": 9.21380747527627e-05, "loss": 0.5856902599334717, "step": 837 }, { "epoch": 0.6681283635638828, "grad_norm": 0.395941700195363, "learning_rate": 9.21144136544666e-05, "loss": 0.4948454201221466, "step": 838 }, { "epoch": 0.6689256527805462, "grad_norm": 0.4149870558735529, "learning_rate": 9.209072005227182e-05, "loss": 0.5812603235244751, "step": 839 }, { "epoch": 0.6697229419972095, "grad_norm": 0.3721308765258349, "learning_rate": 9.206699396446504e-05, "loss": 0.5387722849845886, "step": 840 }, { "epoch": 0.6705202312138728, "grad_norm": 0.4464777768424962, "learning_rate": 9.204323540935809e-05, "loss": 0.6103124618530273, "step": 841 }, { "epoch": 0.6713175204305362, "grad_norm": 0.45533641662736046, "learning_rate": 9.20194444052878e-05, "loss": 0.682235836982727, "step": 842 }, { "epoch": 0.6721148096471995, "grad_norm": 0.43025772374733334, "learning_rate": 9.199562097061608e-05, "loss": 0.5561180710792542, "step": 843 }, { "epoch": 0.6729120988638628, "grad_norm": 0.4352364091525913, "learning_rate": 9.197176512372983e-05, "loss": 0.5548825263977051, "step": 844 }, { "epoch": 0.6737093880805262, "grad_norm": 0.43128926364140674, "learning_rate": 9.194787688304101e-05, "loss": 0.6216987371444702, "step": 845 }, { "epoch": 0.6745066772971896, "grad_norm": 0.4194276772180669, "learning_rate": 9.192395626698656e-05, "loss": 0.5835301876068115, "step": 846 }, { "epoch": 0.6753039665138529, "grad_norm": 0.3647772052885477, "learning_rate": 9.190000329402839e-05, "loss": 0.42825770378112793, "step": 847 }, { "epoch": 0.6761012557305163, "grad_norm": 0.4158774257274311, "learning_rate": 9.187601798265341e-05, "loss": 0.6069319844245911, "step": 848 }, { "epoch": 0.6768985449471796, "grad_norm": 0.4353888765973685, "learning_rate": 9.185200035137349e-05, "loss": 0.5601628422737122, "step": 849 }, { "epoch": 0.6776958341638429, "grad_norm": 0.45240171958281616, "learning_rate": 9.182795041872542e-05, "loss": 0.489355206489563, "step": 850 }, { "epoch": 0.6784931233805063, "grad_norm": 0.5465187405813241, "learning_rate": 9.180386820327096e-05, "loss": 0.7259072065353394, "step": 851 }, { "epoch": 0.6792904125971696, "grad_norm": 0.3825153608431051, "learning_rate": 9.177975372359674e-05, "loss": 0.5122416615486145, "step": 852 }, { "epoch": 0.6800877018138329, "grad_norm": 0.40072025090559765, "learning_rate": 9.175560699831431e-05, "loss": 0.6188830733299255, "step": 853 }, { "epoch": 0.6808849910304963, "grad_norm": 0.37930651432239887, "learning_rate": 9.173142804606012e-05, "loss": 0.44100645184516907, "step": 854 }, { "epoch": 0.6816822802471597, "grad_norm": 0.4042343865735694, "learning_rate": 9.170721688549547e-05, "loss": 0.6163058280944824, "step": 855 }, { "epoch": 0.682479569463823, "grad_norm": 0.38621555648148576, "learning_rate": 9.168297353530654e-05, "loss": 0.5097896456718445, "step": 856 }, { "epoch": 0.6832768586804864, "grad_norm": 0.4167452333695989, "learning_rate": 9.165869801420433e-05, "loss": 0.5671826601028442, "step": 857 }, { "epoch": 0.6840741478971497, "grad_norm": 0.41406517767119033, "learning_rate": 9.163439034092471e-05, "loss": 0.5983768701553345, "step": 858 }, { "epoch": 0.684871437113813, "grad_norm": 0.4393848529553713, "learning_rate": 9.161005053422832e-05, "loss": 0.601074755191803, "step": 859 }, { "epoch": 0.6856687263304764, "grad_norm": 0.38775270027073516, "learning_rate": 9.158567861290061e-05, "loss": 0.547869086265564, "step": 860 }, { "epoch": 0.6864660155471397, "grad_norm": 0.4196308506189373, "learning_rate": 9.156127459575187e-05, "loss": 0.6046299934387207, "step": 861 }, { "epoch": 0.687263304763803, "grad_norm": 0.43435444565718184, "learning_rate": 9.153683850161706e-05, "loss": 0.6702632904052734, "step": 862 }, { "epoch": 0.6880605939804664, "grad_norm": 0.4498508245144672, "learning_rate": 9.151237034935597e-05, "loss": 0.6431222558021545, "step": 863 }, { "epoch": 0.6888578831971297, "grad_norm": 0.3898387059613932, "learning_rate": 9.148787015785316e-05, "loss": 0.4870852828025818, "step": 864 }, { "epoch": 0.6896551724137931, "grad_norm": 0.39020640502054627, "learning_rate": 9.146333794601782e-05, "loss": 0.46288999915122986, "step": 865 }, { "epoch": 0.6904524616304565, "grad_norm": 0.42078408578149096, "learning_rate": 9.143877373278395e-05, "loss": 0.5849325656890869, "step": 866 }, { "epoch": 0.6912497508471198, "grad_norm": 0.3761245324008915, "learning_rate": 9.141417753711016e-05, "loss": 0.5517107248306274, "step": 867 }, { "epoch": 0.6920470400637831, "grad_norm": 0.37855632634932546, "learning_rate": 9.138954937797984e-05, "loss": 0.5936501622200012, "step": 868 }, { "epoch": 0.6928443292804465, "grad_norm": 0.40825049758888404, "learning_rate": 9.136488927440096e-05, "loss": 0.5394968390464783, "step": 869 }, { "epoch": 0.6936416184971098, "grad_norm": 0.41880049918838225, "learning_rate": 9.13401972454062e-05, "loss": 0.5944331288337708, "step": 870 }, { "epoch": 0.6944389077137731, "grad_norm": 0.3980892877230139, "learning_rate": 9.131547331005287e-05, "loss": 0.5421624779701233, "step": 871 }, { "epoch": 0.6952361969304365, "grad_norm": 0.4932041491641856, "learning_rate": 9.12907174874229e-05, "loss": 0.7250620722770691, "step": 872 }, { "epoch": 0.6960334861470998, "grad_norm": 0.42564910539657397, "learning_rate": 9.126592979662281e-05, "loss": 0.5061754584312439, "step": 873 }, { "epoch": 0.6968307753637633, "grad_norm": 0.45544453817650365, "learning_rate": 9.124111025678377e-05, "loss": 0.649502158164978, "step": 874 }, { "epoch": 0.6976280645804266, "grad_norm": 0.37439507654782195, "learning_rate": 9.121625888706146e-05, "loss": 0.4838150143623352, "step": 875 }, { "epoch": 0.6984253537970899, "grad_norm": 0.4285246336422614, "learning_rate": 9.119137570663619e-05, "loss": 0.5939500331878662, "step": 876 }, { "epoch": 0.6992226430137533, "grad_norm": 0.37120312709517406, "learning_rate": 9.116646073471279e-05, "loss": 0.4006609320640564, "step": 877 }, { "epoch": 0.7000199322304166, "grad_norm": 0.3796367153708729, "learning_rate": 9.114151399052064e-05, "loss": 0.4729502499103546, "step": 878 }, { "epoch": 0.7008172214470799, "grad_norm": 0.42110455160351745, "learning_rate": 9.11165354933136e-05, "loss": 0.5758118629455566, "step": 879 }, { "epoch": 0.7016145106637433, "grad_norm": 0.43063387549001914, "learning_rate": 9.109152526237012e-05, "loss": 0.5463246703147888, "step": 880 }, { "epoch": 0.7024117998804066, "grad_norm": 0.4390266005902369, "learning_rate": 9.106648331699306e-05, "loss": 0.6177988648414612, "step": 881 }, { "epoch": 0.7032090890970699, "grad_norm": 0.3668060842295724, "learning_rate": 9.104140967650978e-05, "loss": 0.43230879306793213, "step": 882 }, { "epoch": 0.7040063783137334, "grad_norm": 0.4584969288312949, "learning_rate": 9.101630436027216e-05, "loss": 0.5900431275367737, "step": 883 }, { "epoch": 0.7048036675303967, "grad_norm": 0.44316302987508815, "learning_rate": 9.099116738765643e-05, "loss": 0.6576458811759949, "step": 884 }, { "epoch": 0.70560095674706, "grad_norm": 0.3903956862196273, "learning_rate": 9.096599877806333e-05, "loss": 0.5297501683235168, "step": 885 }, { "epoch": 0.7063982459637234, "grad_norm": 0.4572425098041001, "learning_rate": 9.094079855091797e-05, "loss": 0.5625426769256592, "step": 886 }, { "epoch": 0.7071955351803867, "grad_norm": 0.39272597938614145, "learning_rate": 9.09155667256699e-05, "loss": 0.48169857263565063, "step": 887 }, { "epoch": 0.70799282439705, "grad_norm": 0.4217311849387543, "learning_rate": 9.089030332179302e-05, "loss": 0.5065918564796448, "step": 888 }, { "epoch": 0.7087901136137134, "grad_norm": 0.4307214502889378, "learning_rate": 9.086500835878565e-05, "loss": 0.5919649600982666, "step": 889 }, { "epoch": 0.7095874028303767, "grad_norm": 0.44199954858660084, "learning_rate": 9.083968185617042e-05, "loss": 0.6359586715698242, "step": 890 }, { "epoch": 0.71038469204704, "grad_norm": 0.43793824669274733, "learning_rate": 9.081432383349432e-05, "loss": 0.6113216280937195, "step": 891 }, { "epoch": 0.7111819812637034, "grad_norm": 0.4575412145421313, "learning_rate": 9.078893431032866e-05, "loss": 0.6451565027236938, "step": 892 }, { "epoch": 0.7119792704803668, "grad_norm": 0.45041008999465504, "learning_rate": 9.076351330626911e-05, "loss": 0.5993229150772095, "step": 893 }, { "epoch": 0.7127765596970301, "grad_norm": 0.33692435250206254, "learning_rate": 9.073806084093555e-05, "loss": 0.4078628420829773, "step": 894 }, { "epoch": 0.7135738489136935, "grad_norm": 0.48598520737776146, "learning_rate": 9.071257693397222e-05, "loss": 0.5881745219230652, "step": 895 }, { "epoch": 0.7143711381303568, "grad_norm": 0.4474718692785222, "learning_rate": 9.068706160504757e-05, "loss": 0.6910086274147034, "step": 896 }, { "epoch": 0.7151684273470201, "grad_norm": 0.3615006205293258, "learning_rate": 9.066151487385436e-05, "loss": 0.4456910192966461, "step": 897 }, { "epoch": 0.7159657165636835, "grad_norm": 0.41674357141939583, "learning_rate": 9.063593676010953e-05, "loss": 0.6184250712394714, "step": 898 }, { "epoch": 0.7167630057803468, "grad_norm": 0.4158328119505099, "learning_rate": 9.061032728355428e-05, "loss": 0.6271911263465881, "step": 899 }, { "epoch": 0.7175602949970101, "grad_norm": 0.5001225278532977, "learning_rate": 9.058468646395397e-05, "loss": 0.6615949869155884, "step": 900 }, { "epoch": 0.7183575842136735, "grad_norm": 0.427808743860062, "learning_rate": 9.05590143210982e-05, "loss": 0.6414538621902466, "step": 901 }, { "epoch": 0.7191548734303369, "grad_norm": 0.421239111388591, "learning_rate": 9.053331087480076e-05, "loss": 0.6159845590591431, "step": 902 }, { "epoch": 0.7199521626470002, "grad_norm": 0.3758478313623113, "learning_rate": 9.050757614489952e-05, "loss": 0.537022590637207, "step": 903 }, { "epoch": 0.7207494518636636, "grad_norm": 0.4521599786072774, "learning_rate": 9.048181015125656e-05, "loss": 0.6505130529403687, "step": 904 }, { "epoch": 0.7215467410803269, "grad_norm": 0.4268973842929388, "learning_rate": 9.045601291375806e-05, "loss": 0.4794812798500061, "step": 905 }, { "epoch": 0.7223440302969902, "grad_norm": 0.46588157319407625, "learning_rate": 9.043018445231433e-05, "loss": 0.6085473299026489, "step": 906 }, { "epoch": 0.7231413195136536, "grad_norm": 0.4162640542570375, "learning_rate": 9.040432478685978e-05, "loss": 0.5245082378387451, "step": 907 }, { "epoch": 0.7239386087303169, "grad_norm": 0.3919470519905789, "learning_rate": 9.037843393735288e-05, "loss": 0.4513174593448639, "step": 908 }, { "epoch": 0.7247358979469802, "grad_norm": 0.39388317102987513, "learning_rate": 9.03525119237762e-05, "loss": 0.5009539127349854, "step": 909 }, { "epoch": 0.7255331871636436, "grad_norm": 0.37137533005512663, "learning_rate": 9.032655876613636e-05, "loss": 0.4950876235961914, "step": 910 }, { "epoch": 0.726330476380307, "grad_norm": 0.4108861968737162, "learning_rate": 9.030057448446396e-05, "loss": 0.6474599242210388, "step": 911 }, { "epoch": 0.7271277655969703, "grad_norm": 0.42461693874333645, "learning_rate": 9.027455909881371e-05, "loss": 0.5245916247367859, "step": 912 }, { "epoch": 0.7279250548136337, "grad_norm": 0.39307088142705254, "learning_rate": 9.024851262926424e-05, "loss": 0.5279796123504639, "step": 913 }, { "epoch": 0.728722344030297, "grad_norm": 0.47314585826734407, "learning_rate": 9.022243509591823e-05, "loss": 0.6837424039840698, "step": 914 }, { "epoch": 0.7295196332469603, "grad_norm": 0.4766685407030476, "learning_rate": 9.019632651890233e-05, "loss": 0.6727612018585205, "step": 915 }, { "epoch": 0.7303169224636237, "grad_norm": 0.41444594468234053, "learning_rate": 9.017018691836714e-05, "loss": 0.5851683616638184, "step": 916 }, { "epoch": 0.731114211680287, "grad_norm": 0.37678197758049115, "learning_rate": 9.014401631448717e-05, "loss": 0.5373088717460632, "step": 917 }, { "epoch": 0.7319115008969503, "grad_norm": 0.4111381708981532, "learning_rate": 9.01178147274609e-05, "loss": 0.5252950191497803, "step": 918 }, { "epoch": 0.7327087901136137, "grad_norm": 0.44504708260533576, "learning_rate": 9.009158217751071e-05, "loss": 0.5228504538536072, "step": 919 }, { "epoch": 0.733506079330277, "grad_norm": 0.45633808469921705, "learning_rate": 9.00653186848829e-05, "loss": 0.698624312877655, "step": 920 }, { "epoch": 0.7343033685469404, "grad_norm": 0.5025008545101637, "learning_rate": 9.00390242698476e-05, "loss": 0.6851727366447449, "step": 921 }, { "epoch": 0.7351006577636038, "grad_norm": 0.5087986944243474, "learning_rate": 9.001269895269886e-05, "loss": 0.729667067527771, "step": 922 }, { "epoch": 0.7358979469802671, "grad_norm": 0.43274779682597836, "learning_rate": 8.998634275375454e-05, "loss": 0.6295567750930786, "step": 923 }, { "epoch": 0.7366952361969304, "grad_norm": 0.4535546295959354, "learning_rate": 8.995995569335636e-05, "loss": 0.5728248357772827, "step": 924 }, { "epoch": 0.7374925254135938, "grad_norm": 0.424281062926389, "learning_rate": 8.993353779186984e-05, "loss": 0.6549117565155029, "step": 925 }, { "epoch": 0.7382898146302571, "grad_norm": 0.45079311942044065, "learning_rate": 8.990708906968432e-05, "loss": 0.5838459134101868, "step": 926 }, { "epoch": 0.7390871038469204, "grad_norm": 0.4072769655082482, "learning_rate": 8.988060954721292e-05, "loss": 0.6696759462356567, "step": 927 }, { "epoch": 0.7398843930635838, "grad_norm": 0.44384826534949223, "learning_rate": 8.985409924489252e-05, "loss": 0.6275361180305481, "step": 928 }, { "epoch": 0.7406816822802471, "grad_norm": 0.42387547920237734, "learning_rate": 8.98275581831838e-05, "loss": 0.5602185726165771, "step": 929 }, { "epoch": 0.7414789714969106, "grad_norm": 0.4390297911922667, "learning_rate": 8.980098638257111e-05, "loss": 0.6497112512588501, "step": 930 }, { "epoch": 0.7422762607135739, "grad_norm": 0.4382937041123669, "learning_rate": 8.977438386356258e-05, "loss": 0.6219656467437744, "step": 931 }, { "epoch": 0.7430735499302372, "grad_norm": 0.45102746931308707, "learning_rate": 8.974775064669004e-05, "loss": 0.532805323600769, "step": 932 }, { "epoch": 0.7438708391469006, "grad_norm": 0.4111184230131764, "learning_rate": 8.972108675250899e-05, "loss": 0.6003149151802063, "step": 933 }, { "epoch": 0.7446681283635639, "grad_norm": 0.440256016167903, "learning_rate": 8.96943922015986e-05, "loss": 0.6140011548995972, "step": 934 }, { "epoch": 0.7454654175802272, "grad_norm": 0.4479992482157235, "learning_rate": 8.966766701456177e-05, "loss": 0.5494929552078247, "step": 935 }, { "epoch": 0.7462627067968906, "grad_norm": 0.4201227321091381, "learning_rate": 8.964091121202494e-05, "loss": 0.5770664811134338, "step": 936 }, { "epoch": 0.7470599960135539, "grad_norm": 0.40940911987587564, "learning_rate": 8.961412481463829e-05, "loss": 0.5768025517463684, "step": 937 }, { "epoch": 0.7478572852302172, "grad_norm": 0.4465016453831168, "learning_rate": 8.958730784307552e-05, "loss": 0.6582019329071045, "step": 938 }, { "epoch": 0.7486545744468807, "grad_norm": 0.47576885517301365, "learning_rate": 8.956046031803398e-05, "loss": 0.6808803081512451, "step": 939 }, { "epoch": 0.749451863663544, "grad_norm": 0.4569038764023653, "learning_rate": 8.953358226023457e-05, "loss": 0.6558268666267395, "step": 940 }, { "epoch": 0.7502491528802073, "grad_norm": 0.4881275053142363, "learning_rate": 8.950667369042179e-05, "loss": 0.6761746406555176, "step": 941 }, { "epoch": 0.7510464420968707, "grad_norm": 0.42154105312806056, "learning_rate": 8.947973462936366e-05, "loss": 0.6647793054580688, "step": 942 }, { "epoch": 0.751843731313534, "grad_norm": 0.4586483125930324, "learning_rate": 8.945276509785177e-05, "loss": 0.5712122917175293, "step": 943 }, { "epoch": 0.7526410205301973, "grad_norm": 0.4106326062677422, "learning_rate": 8.942576511670115e-05, "loss": 0.5705179572105408, "step": 944 }, { "epoch": 0.7534383097468607, "grad_norm": 0.4075927409302181, "learning_rate": 8.939873470675042e-05, "loss": 0.4910382032394409, "step": 945 }, { "epoch": 0.754235598963524, "grad_norm": 0.40870194500360624, "learning_rate": 8.937167388886163e-05, "loss": 0.637507438659668, "step": 946 }, { "epoch": 0.7550328881801873, "grad_norm": 0.3967333863203224, "learning_rate": 8.934458268392034e-05, "loss": 0.6514694690704346, "step": 947 }, { "epoch": 0.7558301773968507, "grad_norm": 0.37254496549592786, "learning_rate": 8.93174611128355e-05, "loss": 0.43819522857666016, "step": 948 }, { "epoch": 0.7566274666135141, "grad_norm": 0.4252686375745193, "learning_rate": 8.929030919653955e-05, "loss": 0.6674104332923889, "step": 949 }, { "epoch": 0.7574247558301774, "grad_norm": 0.437011858305347, "learning_rate": 8.926312695598837e-05, "loss": 0.6260120272636414, "step": 950 }, { "epoch": 0.7582220450468408, "grad_norm": 0.3933599144853053, "learning_rate": 8.923591441216117e-05, "loss": 0.6040182709693909, "step": 951 }, { "epoch": 0.7590193342635041, "grad_norm": 0.4183910206043107, "learning_rate": 8.920867158606058e-05, "loss": 0.588239312171936, "step": 952 }, { "epoch": 0.7598166234801674, "grad_norm": 0.38175386531667826, "learning_rate": 8.918139849871264e-05, "loss": 0.546373724937439, "step": 953 }, { "epoch": 0.7606139126968308, "grad_norm": 0.38971377677209934, "learning_rate": 8.91540951711667e-05, "loss": 0.4956075847148895, "step": 954 }, { "epoch": 0.7614112019134941, "grad_norm": 0.37558883310105, "learning_rate": 8.912676162449547e-05, "loss": 0.4785592555999756, "step": 955 }, { "epoch": 0.7622084911301574, "grad_norm": 0.42357276600026794, "learning_rate": 8.909939787979498e-05, "loss": 0.5240855813026428, "step": 956 }, { "epoch": 0.7630057803468208, "grad_norm": 0.41770865384580896, "learning_rate": 8.907200395818455e-05, "loss": 0.6333093643188477, "step": 957 }, { "epoch": 0.7638030695634842, "grad_norm": 0.4495475268411399, "learning_rate": 8.904457988080681e-05, "loss": 0.6009361743927002, "step": 958 }, { "epoch": 0.7646003587801475, "grad_norm": 0.5377506234612787, "learning_rate": 8.901712566882767e-05, "loss": 0.7835143804550171, "step": 959 }, { "epoch": 0.7653976479968109, "grad_norm": 0.4506064334291557, "learning_rate": 8.898964134343628e-05, "loss": 0.7268452644348145, "step": 960 }, { "epoch": 0.7661949372134742, "grad_norm": 0.35220815041980663, "learning_rate": 8.896212692584503e-05, "loss": 0.43515831232070923, "step": 961 }, { "epoch": 0.7669922264301375, "grad_norm": 0.4406535168009274, "learning_rate": 8.893458243728956e-05, "loss": 0.5400960445404053, "step": 962 }, { "epoch": 0.7677895156468009, "grad_norm": 0.40856015788060907, "learning_rate": 8.890700789902869e-05, "loss": 0.4826542139053345, "step": 963 }, { "epoch": 0.7685868048634642, "grad_norm": 0.4259133577923441, "learning_rate": 8.887940333234445e-05, "loss": 0.47515442967414856, "step": 964 }, { "epoch": 0.7693840940801275, "grad_norm": 0.3753727331157503, "learning_rate": 8.885176875854205e-05, "loss": 0.4985824227333069, "step": 965 }, { "epoch": 0.7701813832967909, "grad_norm": 0.39241765068063134, "learning_rate": 8.882410419894982e-05, "loss": 0.5049811601638794, "step": 966 }, { "epoch": 0.7709786725134543, "grad_norm": 0.40252078595884627, "learning_rate": 8.87964096749193e-05, "loss": 0.5527772903442383, "step": 967 }, { "epoch": 0.7717759617301176, "grad_norm": 0.40351562010944586, "learning_rate": 8.876868520782509e-05, "loss": 0.5342116355895996, "step": 968 }, { "epoch": 0.772573250946781, "grad_norm": 0.4810258345058989, "learning_rate": 8.874093081906495e-05, "loss": 0.6729565858840942, "step": 969 }, { "epoch": 0.7733705401634443, "grad_norm": 0.4555871028329483, "learning_rate": 8.871314653005972e-05, "loss": 0.6058116555213928, "step": 970 }, { "epoch": 0.7741678293801076, "grad_norm": 0.41855359437106404, "learning_rate": 8.868533236225329e-05, "loss": 0.5161345601081848, "step": 971 }, { "epoch": 0.774965118596771, "grad_norm": 0.450584774160487, "learning_rate": 8.865748833711264e-05, "loss": 0.5826140642166138, "step": 972 }, { "epoch": 0.7757624078134343, "grad_norm": 0.42363203636330066, "learning_rate": 8.862961447612777e-05, "loss": 0.6237989664077759, "step": 973 }, { "epoch": 0.7765596970300976, "grad_norm": 0.42620069947280115, "learning_rate": 8.860171080081174e-05, "loss": 0.5964707136154175, "step": 974 }, { "epoch": 0.777356986246761, "grad_norm": 0.4234797599948656, "learning_rate": 8.85737773327006e-05, "loss": 0.5482522249221802, "step": 975 }, { "epoch": 0.7781542754634243, "grad_norm": 0.47177733084932616, "learning_rate": 8.854581409335341e-05, "loss": 0.660527229309082, "step": 976 }, { "epoch": 0.7789515646800877, "grad_norm": 0.4499195661152184, "learning_rate": 8.851782110435216e-05, "loss": 0.491074800491333, "step": 977 }, { "epoch": 0.7797488538967511, "grad_norm": 0.4578709964199619, "learning_rate": 8.848979838730187e-05, "loss": 0.6249175071716309, "step": 978 }, { "epoch": 0.7805461431134144, "grad_norm": 0.3905716385672406, "learning_rate": 8.846174596383044e-05, "loss": 0.5061800479888916, "step": 979 }, { "epoch": 0.7813434323300777, "grad_norm": 0.4805651212597816, "learning_rate": 8.843366385558873e-05, "loss": 0.4976520538330078, "step": 980 }, { "epoch": 0.7821407215467411, "grad_norm": 0.48207249790102363, "learning_rate": 8.840555208425054e-05, "loss": 0.7450715899467468, "step": 981 }, { "epoch": 0.7829380107634044, "grad_norm": 0.43068808997918373, "learning_rate": 8.83774106715125e-05, "loss": 0.5721495151519775, "step": 982 }, { "epoch": 0.7837352999800677, "grad_norm": 0.4925151071508664, "learning_rate": 8.834923963909416e-05, "loss": 0.6041548848152161, "step": 983 }, { "epoch": 0.7845325891967311, "grad_norm": 0.40815234073779905, "learning_rate": 8.832103900873791e-05, "loss": 0.5881200432777405, "step": 984 }, { "epoch": 0.7853298784133944, "grad_norm": 0.4606142684075768, "learning_rate": 8.8292808802209e-05, "loss": 0.5610684156417847, "step": 985 }, { "epoch": 0.7861271676300579, "grad_norm": 0.45948713571910604, "learning_rate": 8.826454904129551e-05, "loss": 0.6563761234283447, "step": 986 }, { "epoch": 0.7869244568467212, "grad_norm": 0.41619281540415004, "learning_rate": 8.823625974780829e-05, "loss": 0.5732560753822327, "step": 987 }, { "epoch": 0.7877217460633845, "grad_norm": 0.3898184731251468, "learning_rate": 8.820794094358104e-05, "loss": 0.557607889175415, "step": 988 }, { "epoch": 0.7885190352800479, "grad_norm": 0.4406466960916001, "learning_rate": 8.81795926504702e-05, "loss": 0.5372775197029114, "step": 989 }, { "epoch": 0.7893163244967112, "grad_norm": 0.3886864601427882, "learning_rate": 8.815121489035498e-05, "loss": 0.45492035150527954, "step": 990 }, { "epoch": 0.7901136137133745, "grad_norm": 0.503023255487228, "learning_rate": 8.812280768513733e-05, "loss": 0.6827413439750671, "step": 991 }, { "epoch": 0.7909109029300379, "grad_norm": 0.41605586953992946, "learning_rate": 8.809437105674192e-05, "loss": 0.5358473062515259, "step": 992 }, { "epoch": 0.7917081921467012, "grad_norm": 0.3876977130823049, "learning_rate": 8.806590502711615e-05, "loss": 0.48171067237854004, "step": 993 }, { "epoch": 0.7925054813633645, "grad_norm": 0.46045689017265545, "learning_rate": 8.803740961823008e-05, "loss": 0.6174921989440918, "step": 994 }, { "epoch": 0.793302770580028, "grad_norm": 0.4185759181128049, "learning_rate": 8.800888485207647e-05, "loss": 0.6076773405075073, "step": 995 }, { "epoch": 0.7941000597966913, "grad_norm": 0.39058933761088677, "learning_rate": 8.798033075067076e-05, "loss": 0.5604955554008484, "step": 996 }, { "epoch": 0.7948973490133546, "grad_norm": 0.3809459772347911, "learning_rate": 8.795174733605097e-05, "loss": 0.5276381969451904, "step": 997 }, { "epoch": 0.795694638230018, "grad_norm": 0.4201154055772482, "learning_rate": 8.792313463027777e-05, "loss": 0.6149541139602661, "step": 998 }, { "epoch": 0.7964919274466813, "grad_norm": 0.3714938525913816, "learning_rate": 8.789449265543446e-05, "loss": 0.4218512773513794, "step": 999 }, { "epoch": 0.7972892166633446, "grad_norm": 0.3925617891613998, "learning_rate": 8.786582143362689e-05, "loss": 0.527498722076416, "step": 1000 }, { "epoch": 0.798086505880008, "grad_norm": 0.35033699096043763, "learning_rate": 8.783712098698354e-05, "loss": 0.4476920962333679, "step": 1001 }, { "epoch": 0.7988837950966713, "grad_norm": 0.4206073238952077, "learning_rate": 8.780839133765539e-05, "loss": 0.5836808681488037, "step": 1002 }, { "epoch": 0.7996810843133346, "grad_norm": 0.45018202560176035, "learning_rate": 8.777963250781598e-05, "loss": 0.5345502495765686, "step": 1003 }, { "epoch": 0.800478373529998, "grad_norm": 0.437485980457382, "learning_rate": 8.775084451966136e-05, "loss": 0.5442671179771423, "step": 1004 }, { "epoch": 0.8012756627466614, "grad_norm": 0.5459789648902484, "learning_rate": 8.772202739541012e-05, "loss": 0.5697953701019287, "step": 1005 }, { "epoch": 0.8020729519633247, "grad_norm": 0.4221962495096397, "learning_rate": 8.76931811573033e-05, "loss": 0.5301165580749512, "step": 1006 }, { "epoch": 0.8028702411799881, "grad_norm": 0.47103534071288194, "learning_rate": 8.76643058276044e-05, "loss": 0.6580334305763245, "step": 1007 }, { "epoch": 0.8036675303966514, "grad_norm": 0.45460253786997684, "learning_rate": 8.763540142859945e-05, "loss": 0.5637500286102295, "step": 1008 }, { "epoch": 0.8044648196133147, "grad_norm": 0.4347794126457204, "learning_rate": 8.760646798259682e-05, "loss": 0.6448011994361877, "step": 1009 }, { "epoch": 0.8052621088299781, "grad_norm": 0.44496369415939346, "learning_rate": 8.757750551192734e-05, "loss": 0.5275229811668396, "step": 1010 }, { "epoch": 0.8060593980466414, "grad_norm": 0.42390427518432927, "learning_rate": 8.754851403894425e-05, "loss": 0.5245242714881897, "step": 1011 }, { "epoch": 0.8068566872633047, "grad_norm": 0.4345193635545909, "learning_rate": 8.751949358602316e-05, "loss": 0.580277681350708, "step": 1012 }, { "epoch": 0.8076539764799681, "grad_norm": 0.4437508405503923, "learning_rate": 8.749044417556207e-05, "loss": 0.6410685777664185, "step": 1013 }, { "epoch": 0.8084512656966315, "grad_norm": 0.4701812867695828, "learning_rate": 8.746136582998131e-05, "loss": 0.6567517518997192, "step": 1014 }, { "epoch": 0.8092485549132948, "grad_norm": 0.4130926136261522, "learning_rate": 8.743225857172351e-05, "loss": 0.620101809501648, "step": 1015 }, { "epoch": 0.8100458441299582, "grad_norm": 0.43108573060277533, "learning_rate": 8.740312242325365e-05, "loss": 0.5640413761138916, "step": 1016 }, { "epoch": 0.8108431333466215, "grad_norm": 0.5622145451979437, "learning_rate": 8.737395740705905e-05, "loss": 0.5416503548622131, "step": 1017 }, { "epoch": 0.8116404225632848, "grad_norm": 0.44368989237689266, "learning_rate": 8.734476354564923e-05, "loss": 0.508804976940155, "step": 1018 }, { "epoch": 0.8124377117799482, "grad_norm": 0.40536217636670385, "learning_rate": 8.731554086155602e-05, "loss": 0.5353458523750305, "step": 1019 }, { "epoch": 0.8132350009966115, "grad_norm": 0.5041818153630854, "learning_rate": 8.72862893773335e-05, "loss": 0.6493529081344604, "step": 1020 }, { "epoch": 0.8140322902132748, "grad_norm": 0.41739224807139635, "learning_rate": 8.725700911555791e-05, "loss": 0.497514009475708, "step": 1021 }, { "epoch": 0.8148295794299382, "grad_norm": 0.44776273686014534, "learning_rate": 8.72277000988278e-05, "loss": 0.5862264037132263, "step": 1022 }, { "epoch": 0.8156268686466015, "grad_norm": 0.4488297098908124, "learning_rate": 8.719836234976386e-05, "loss": 0.5643548965454102, "step": 1023 }, { "epoch": 0.8164241578632649, "grad_norm": 0.3990518764425635, "learning_rate": 8.716899589100893e-05, "loss": 0.6099210381507874, "step": 1024 }, { "epoch": 0.8172214470799283, "grad_norm": 0.4832461336432715, "learning_rate": 8.713960074522807e-05, "loss": 0.62254798412323, "step": 1025 }, { "epoch": 0.8180187362965916, "grad_norm": 0.38652236304744536, "learning_rate": 8.711017693510844e-05, "loss": 0.4672405421733856, "step": 1026 }, { "epoch": 0.8188160255132549, "grad_norm": 0.4022165962140202, "learning_rate": 8.708072448335933e-05, "loss": 0.5432902574539185, "step": 1027 }, { "epoch": 0.8196133147299183, "grad_norm": 0.38613738443249584, "learning_rate": 8.705124341271214e-05, "loss": 0.5492627620697021, "step": 1028 }, { "epoch": 0.8204106039465816, "grad_norm": 0.43965553100160987, "learning_rate": 8.702173374592035e-05, "loss": 0.6000350713729858, "step": 1029 }, { "epoch": 0.8212078931632449, "grad_norm": 0.40759726826120096, "learning_rate": 8.699219550575953e-05, "loss": 0.511928915977478, "step": 1030 }, { "epoch": 0.8220051823799083, "grad_norm": 0.39090856900093074, "learning_rate": 8.696262871502728e-05, "loss": 0.44878634810447693, "step": 1031 }, { "epoch": 0.8228024715965716, "grad_norm": 0.3615126382691587, "learning_rate": 8.693303339654325e-05, "loss": 0.39995068311691284, "step": 1032 }, { "epoch": 0.823599760813235, "grad_norm": 0.42312359806647276, "learning_rate": 8.69034095731491e-05, "loss": 0.5724337100982666, "step": 1033 }, { "epoch": 0.8243970500298984, "grad_norm": 0.42397843715771083, "learning_rate": 8.68737572677085e-05, "loss": 0.632296085357666, "step": 1034 }, { "epoch": 0.8251943392465617, "grad_norm": 0.42069188081855813, "learning_rate": 8.684407650310707e-05, "loss": 0.524156391620636, "step": 1035 }, { "epoch": 0.825991628463225, "grad_norm": 0.44955140591162335, "learning_rate": 8.681436730225246e-05, "loss": 0.6520594954490662, "step": 1036 }, { "epoch": 0.8267889176798884, "grad_norm": 0.46165028245539685, "learning_rate": 8.678462968807419e-05, "loss": 0.5532397627830505, "step": 1037 }, { "epoch": 0.8275862068965517, "grad_norm": 0.39365161003009064, "learning_rate": 8.675486368352375e-05, "loss": 0.5744783878326416, "step": 1038 }, { "epoch": 0.828383496113215, "grad_norm": 0.4050710684844973, "learning_rate": 8.672506931157457e-05, "loss": 0.5540138483047485, "step": 1039 }, { "epoch": 0.8291807853298784, "grad_norm": 0.4780535878806783, "learning_rate": 8.669524659522193e-05, "loss": 0.7367125153541565, "step": 1040 }, { "epoch": 0.8299780745465417, "grad_norm": 0.3901812440362129, "learning_rate": 8.666539555748297e-05, "loss": 0.5764138698577881, "step": 1041 }, { "epoch": 0.8307753637632052, "grad_norm": 0.44094063577773895, "learning_rate": 8.663551622139674e-05, "loss": 0.6376828551292419, "step": 1042 }, { "epoch": 0.8315726529798685, "grad_norm": 0.3769386498538212, "learning_rate": 8.660560861002411e-05, "loss": 0.5306693315505981, "step": 1043 }, { "epoch": 0.8323699421965318, "grad_norm": 0.40852672336747575, "learning_rate": 8.657567274644777e-05, "loss": 0.6005908250808716, "step": 1044 }, { "epoch": 0.8331672314131952, "grad_norm": 0.34620102101849776, "learning_rate": 8.654570865377221e-05, "loss": 0.4871188700199127, "step": 1045 }, { "epoch": 0.8339645206298585, "grad_norm": 0.4101323428010465, "learning_rate": 8.651571635512372e-05, "loss": 0.49140554666519165, "step": 1046 }, { "epoch": 0.8347618098465218, "grad_norm": 0.43019443489153086, "learning_rate": 8.648569587365034e-05, "loss": 0.6349713802337646, "step": 1047 }, { "epoch": 0.8355590990631852, "grad_norm": 0.460681262794943, "learning_rate": 8.645564723252188e-05, "loss": 0.5265627503395081, "step": 1048 }, { "epoch": 0.8363563882798485, "grad_norm": 0.4729868182908888, "learning_rate": 8.642557045492988e-05, "loss": 0.784264087677002, "step": 1049 }, { "epoch": 0.8371536774965118, "grad_norm": 0.43923753045994246, "learning_rate": 8.639546556408757e-05, "loss": 0.6769385933876038, "step": 1050 }, { "epoch": 0.8379509667131751, "grad_norm": 0.41123331505764493, "learning_rate": 8.636533258322994e-05, "loss": 0.5514147877693176, "step": 1051 }, { "epoch": 0.8387482559298386, "grad_norm": 0.4138645038006056, "learning_rate": 8.633517153561359e-05, "loss": 0.6522994637489319, "step": 1052 }, { "epoch": 0.8395455451465019, "grad_norm": 0.44698157547835043, "learning_rate": 8.630498244451682e-05, "loss": 0.6421476602554321, "step": 1053 }, { "epoch": 0.8403428343631653, "grad_norm": 0.4696572084178878, "learning_rate": 8.627476533323957e-05, "loss": 0.6827902793884277, "step": 1054 }, { "epoch": 0.8411401235798286, "grad_norm": 0.3900250663363861, "learning_rate": 8.62445202251034e-05, "loss": 0.4822857677936554, "step": 1055 }, { "epoch": 0.8419374127964919, "grad_norm": 0.4306887437440544, "learning_rate": 8.62142471434515e-05, "loss": 0.6794077157974243, "step": 1056 }, { "epoch": 0.8427347020131553, "grad_norm": 0.4359786561367492, "learning_rate": 8.618394611164859e-05, "loss": 0.5620750188827515, "step": 1057 }, { "epoch": 0.8435319912298186, "grad_norm": 0.43935990039432576, "learning_rate": 8.615361715308104e-05, "loss": 0.6070531606674194, "step": 1058 }, { "epoch": 0.8443292804464819, "grad_norm": 0.5399841096731954, "learning_rate": 8.612326029115671e-05, "loss": 0.6448693871498108, "step": 1059 }, { "epoch": 0.8451265696631453, "grad_norm": 0.5092599176928517, "learning_rate": 8.609287554930504e-05, "loss": 0.7449469566345215, "step": 1060 }, { "epoch": 0.8459238588798087, "grad_norm": 0.44453158212962274, "learning_rate": 8.606246295097697e-05, "loss": 0.6559039950370789, "step": 1061 }, { "epoch": 0.846721148096472, "grad_norm": 0.39533066080937235, "learning_rate": 8.60320225196449e-05, "loss": 0.5308577418327332, "step": 1062 }, { "epoch": 0.8475184373131354, "grad_norm": 0.4303058698869765, "learning_rate": 8.60015542788028e-05, "loss": 0.6042134761810303, "step": 1063 }, { "epoch": 0.8483157265297987, "grad_norm": 0.38094062406511414, "learning_rate": 8.597105825196606e-05, "loss": 0.5747206211090088, "step": 1064 }, { "epoch": 0.849113015746462, "grad_norm": 0.37573168766815096, "learning_rate": 8.594053446267145e-05, "loss": 0.5672894716262817, "step": 1065 }, { "epoch": 0.8499103049631254, "grad_norm": 0.37940957121864527, "learning_rate": 8.590998293447727e-05, "loss": 0.5013814568519592, "step": 1066 }, { "epoch": 0.8507075941797887, "grad_norm": 0.4534574223226905, "learning_rate": 8.587940369096318e-05, "loss": 0.5945917963981628, "step": 1067 }, { "epoch": 0.851504883396452, "grad_norm": 0.415024413135262, "learning_rate": 8.584879675573024e-05, "loss": 0.5201203227043152, "step": 1068 }, { "epoch": 0.8523021726131154, "grad_norm": 0.4377478061505334, "learning_rate": 8.581816215240087e-05, "loss": 0.6019213199615479, "step": 1069 }, { "epoch": 0.8530994618297788, "grad_norm": 0.34734993537829967, "learning_rate": 8.578749990461884e-05, "loss": 0.38367989659309387, "step": 1070 }, { "epoch": 0.8538967510464421, "grad_norm": 0.3923970141357382, "learning_rate": 8.575681003604929e-05, "loss": 0.5673266649246216, "step": 1071 }, { "epoch": 0.8546940402631055, "grad_norm": 0.40699700325737354, "learning_rate": 8.572609257037865e-05, "loss": 0.6373673677444458, "step": 1072 }, { "epoch": 0.8554913294797688, "grad_norm": 0.4527466784449026, "learning_rate": 8.569534753131465e-05, "loss": 0.6106905937194824, "step": 1073 }, { "epoch": 0.8562886186964321, "grad_norm": 0.5153154484627009, "learning_rate": 8.566457494258631e-05, "loss": 0.637147068977356, "step": 1074 }, { "epoch": 0.8570859079130955, "grad_norm": 0.46800223465789553, "learning_rate": 8.563377482794392e-05, "loss": 0.5304033756256104, "step": 1075 }, { "epoch": 0.8578831971297588, "grad_norm": 0.4794299561788583, "learning_rate": 8.5602947211159e-05, "loss": 0.5505977869033813, "step": 1076 }, { "epoch": 0.8586804863464221, "grad_norm": 0.42665418803057464, "learning_rate": 8.557209211602428e-05, "loss": 0.5743616819381714, "step": 1077 }, { "epoch": 0.8594777755630855, "grad_norm": 0.39045463237927786, "learning_rate": 8.554120956635375e-05, "loss": 0.4690971374511719, "step": 1078 }, { "epoch": 0.8602750647797488, "grad_norm": 0.48191348162016445, "learning_rate": 8.551029958598254e-05, "loss": 0.5881896615028381, "step": 1079 }, { "epoch": 0.8610723539964122, "grad_norm": 0.4288895895519534, "learning_rate": 8.547936219876695e-05, "loss": 0.49915003776550293, "step": 1080 }, { "epoch": 0.8618696432130756, "grad_norm": 0.3861704876842269, "learning_rate": 8.544839742858448e-05, "loss": 0.5769656896591187, "step": 1081 }, { "epoch": 0.8626669324297389, "grad_norm": 1.03055021010742, "learning_rate": 8.541740529933374e-05, "loss": 0.5413870215415955, "step": 1082 }, { "epoch": 0.8634642216464022, "grad_norm": 0.3952170308607006, "learning_rate": 8.538638583493444e-05, "loss": 0.5292851328849792, "step": 1083 }, { "epoch": 0.8642615108630656, "grad_norm": 0.3999592159987327, "learning_rate": 8.535533905932738e-05, "loss": 0.5996208190917969, "step": 1084 }, { "epoch": 0.8650588000797289, "grad_norm": 0.40430677797406006, "learning_rate": 8.532426499647448e-05, "loss": 0.5973973870277405, "step": 1085 }, { "epoch": 0.8658560892963922, "grad_norm": 0.3495949738412872, "learning_rate": 8.52931636703587e-05, "loss": 0.4216043949127197, "step": 1086 }, { "epoch": 0.8666533785130556, "grad_norm": 0.36833515704349534, "learning_rate": 8.526203510498402e-05, "loss": 0.5076268911361694, "step": 1087 }, { "epoch": 0.8674506677297189, "grad_norm": 0.40709699673289557, "learning_rate": 8.523087932437548e-05, "loss": 0.5933756828308105, "step": 1088 }, { "epoch": 0.8682479569463823, "grad_norm": 0.42008360810564954, "learning_rate": 8.51996963525791e-05, "loss": 0.5951849222183228, "step": 1089 }, { "epoch": 0.8690452461630457, "grad_norm": 0.38435044289002435, "learning_rate": 8.516848621366188e-05, "loss": 0.5825787782669067, "step": 1090 }, { "epoch": 0.869842535379709, "grad_norm": 0.38617711997655463, "learning_rate": 8.513724893171183e-05, "loss": 0.5637725591659546, "step": 1091 }, { "epoch": 0.8706398245963723, "grad_norm": 0.45488816044237346, "learning_rate": 8.510598453083787e-05, "loss": 0.48315685987472534, "step": 1092 }, { "epoch": 0.8714371138130357, "grad_norm": 0.44374091675703875, "learning_rate": 8.507469303516984e-05, "loss": 0.7241154909133911, "step": 1093 }, { "epoch": 0.872234403029699, "grad_norm": 0.40995632773956403, "learning_rate": 8.504337446885853e-05, "loss": 0.4457489252090454, "step": 1094 }, { "epoch": 0.8730316922463623, "grad_norm": 0.4576991845193317, "learning_rate": 8.50120288560756e-05, "loss": 0.6013844013214111, "step": 1095 }, { "epoch": 0.8738289814630257, "grad_norm": 0.43857656147391544, "learning_rate": 8.49806562210136e-05, "loss": 0.5191975235939026, "step": 1096 }, { "epoch": 0.874626270679689, "grad_norm": 0.42196609801567503, "learning_rate": 8.49492565878859e-05, "loss": 0.5252233743667603, "step": 1097 }, { "epoch": 0.8754235598963525, "grad_norm": 0.4012910224461855, "learning_rate": 8.491782998092676e-05, "loss": 0.5317015647888184, "step": 1098 }, { "epoch": 0.8762208491130158, "grad_norm": 0.41919154638119405, "learning_rate": 8.488637642439121e-05, "loss": 0.5721572637557983, "step": 1099 }, { "epoch": 0.8770181383296791, "grad_norm": 0.38407522650212794, "learning_rate": 8.485489594255513e-05, "loss": 0.47613921761512756, "step": 1100 }, { "epoch": 0.8778154275463425, "grad_norm": 0.4142495837599391, "learning_rate": 8.482338855971512e-05, "loss": 0.6494086384773254, "step": 1101 }, { "epoch": 0.8786127167630058, "grad_norm": 0.42808985301728025, "learning_rate": 8.479185430018858e-05, "loss": 0.6141901612281799, "step": 1102 }, { "epoch": 0.8794100059796691, "grad_norm": 0.5857817899325195, "learning_rate": 8.476029318831368e-05, "loss": 0.6323969960212708, "step": 1103 }, { "epoch": 0.8802072951963325, "grad_norm": 0.47007306308123126, "learning_rate": 8.472870524844925e-05, "loss": 0.6683332324028015, "step": 1104 }, { "epoch": 0.8810045844129958, "grad_norm": 0.3977537891545268, "learning_rate": 8.469709050497486e-05, "loss": 0.5342932343482971, "step": 1105 }, { "epoch": 0.8818018736296591, "grad_norm": 0.4289423276176722, "learning_rate": 8.466544898229075e-05, "loss": 0.6110314130783081, "step": 1106 }, { "epoch": 0.8825991628463224, "grad_norm": 0.39016863224911763, "learning_rate": 8.463378070481787e-05, "loss": 0.5735726952552795, "step": 1107 }, { "epoch": 0.8833964520629859, "grad_norm": 0.40849618544505234, "learning_rate": 8.460208569699778e-05, "loss": 0.6070728302001953, "step": 1108 }, { "epoch": 0.8841937412796492, "grad_norm": 0.4024415968099423, "learning_rate": 8.45703639832927e-05, "loss": 0.6602135300636292, "step": 1109 }, { "epoch": 0.8849910304963126, "grad_norm": 0.4380550379997064, "learning_rate": 8.453861558818542e-05, "loss": 0.6099100112915039, "step": 1110 }, { "epoch": 0.8857883197129759, "grad_norm": 0.4557951349835033, "learning_rate": 8.450684053617935e-05, "loss": 0.5653393864631653, "step": 1111 }, { "epoch": 0.8865856089296392, "grad_norm": 0.3905032944473295, "learning_rate": 8.44750388517985e-05, "loss": 0.5718656182289124, "step": 1112 }, { "epoch": 0.8873828981463026, "grad_norm": 0.40776999133332953, "learning_rate": 8.444321055958736e-05, "loss": 0.532712996006012, "step": 1113 }, { "epoch": 0.8881801873629659, "grad_norm": 0.4170793251661713, "learning_rate": 8.441135568411103e-05, "loss": 0.6141367554664612, "step": 1114 }, { "epoch": 0.8889774765796292, "grad_norm": 0.40814052274353907, "learning_rate": 8.437947424995509e-05, "loss": 0.6463122367858887, "step": 1115 }, { "epoch": 0.8897747657962926, "grad_norm": 0.397121369278699, "learning_rate": 8.434756628172564e-05, "loss": 0.5368135571479797, "step": 1116 }, { "epoch": 0.890572055012956, "grad_norm": 0.4173214849005214, "learning_rate": 8.431563180404924e-05, "loss": 0.68674635887146, "step": 1117 }, { "epoch": 0.8913693442296193, "grad_norm": 0.38365930475428117, "learning_rate": 8.428367084157292e-05, "loss": 0.5169991254806519, "step": 1118 }, { "epoch": 0.8921666334462827, "grad_norm": 0.3861261775999919, "learning_rate": 8.425168341896411e-05, "loss": 0.5740624070167542, "step": 1119 }, { "epoch": 0.892963922662946, "grad_norm": 0.419207241787629, "learning_rate": 8.421966956091074e-05, "loss": 0.6594865918159485, "step": 1120 }, { "epoch": 0.8937612118796093, "grad_norm": 0.4618034678658246, "learning_rate": 8.418762929212108e-05, "loss": 0.6024535298347473, "step": 1121 }, { "epoch": 0.8945585010962727, "grad_norm": 0.4333142099395535, "learning_rate": 8.415556263732383e-05, "loss": 0.6825629472732544, "step": 1122 }, { "epoch": 0.895355790312936, "grad_norm": 0.4622143714886195, "learning_rate": 8.412346962126797e-05, "loss": 0.5696868300437927, "step": 1123 }, { "epoch": 0.8961530795295993, "grad_norm": 0.4201851909548047, "learning_rate": 8.409135026872297e-05, "loss": 0.5873940587043762, "step": 1124 }, { "epoch": 0.8969503687462627, "grad_norm": 0.45507602283688847, "learning_rate": 8.405920460447849e-05, "loss": 0.5737271308898926, "step": 1125 }, { "epoch": 0.8977476579629261, "grad_norm": 0.4081342162751121, "learning_rate": 8.402703265334455e-05, "loss": 0.5154270529747009, "step": 1126 }, { "epoch": 0.8985449471795894, "grad_norm": 0.4228792788666116, "learning_rate": 8.399483444015147e-05, "loss": 0.5345121026039124, "step": 1127 }, { "epoch": 0.8993422363962528, "grad_norm": 0.4588131146117594, "learning_rate": 8.396260998974983e-05, "loss": 0.6625559329986572, "step": 1128 }, { "epoch": 0.9001395256129161, "grad_norm": 0.4192201763775922, "learning_rate": 8.393035932701044e-05, "loss": 0.6431373357772827, "step": 1129 }, { "epoch": 0.9009368148295794, "grad_norm": 0.3889223592679824, "learning_rate": 8.389808247682436e-05, "loss": 0.4959959387779236, "step": 1130 }, { "epoch": 0.9017341040462428, "grad_norm": 0.4073222671418678, "learning_rate": 8.386577946410288e-05, "loss": 0.4970683455467224, "step": 1131 }, { "epoch": 0.9025313932629061, "grad_norm": 0.4481946534704734, "learning_rate": 8.383345031377745e-05, "loss": 0.5772395730018616, "step": 1132 }, { "epoch": 0.9033286824795694, "grad_norm": 0.37728841007435154, "learning_rate": 8.380109505079968e-05, "loss": 0.5491273403167725, "step": 1133 }, { "epoch": 0.9041259716962328, "grad_norm": 0.4661248339710892, "learning_rate": 8.376871370014139e-05, "loss": 0.5637055039405823, "step": 1134 }, { "epoch": 0.9049232609128961, "grad_norm": 0.4022595773112011, "learning_rate": 8.373630628679447e-05, "loss": 0.6275621652603149, "step": 1135 }, { "epoch": 0.9057205501295595, "grad_norm": 0.41195775864148737, "learning_rate": 8.3703872835771e-05, "loss": 0.6700772047042847, "step": 1136 }, { "epoch": 0.9065178393462229, "grad_norm": 0.44895239867317066, "learning_rate": 8.367141337210307e-05, "loss": 0.5766112208366394, "step": 1137 }, { "epoch": 0.9073151285628862, "grad_norm": 0.38529781743630376, "learning_rate": 8.363892792084291e-05, "loss": 0.5747822523117065, "step": 1138 }, { "epoch": 0.9081124177795495, "grad_norm": 0.40470742095562534, "learning_rate": 8.36064165070628e-05, "loss": 0.5802819132804871, "step": 1139 }, { "epoch": 0.9089097069962129, "grad_norm": 0.431856145690662, "learning_rate": 8.357387915585502e-05, "loss": 0.6096622943878174, "step": 1140 }, { "epoch": 0.9097069962128762, "grad_norm": 0.40725521541053583, "learning_rate": 8.35413158923319e-05, "loss": 0.6488831639289856, "step": 1141 }, { "epoch": 0.9105042854295395, "grad_norm": 0.4466967345100223, "learning_rate": 8.350872674162578e-05, "loss": 0.670214056968689, "step": 1142 }, { "epoch": 0.9113015746462029, "grad_norm": 0.4297753304687678, "learning_rate": 8.347611172888893e-05, "loss": 0.6184495687484741, "step": 1143 }, { "epoch": 0.9120988638628662, "grad_norm": 0.38654282536978385, "learning_rate": 8.344347087929365e-05, "loss": 0.5114691257476807, "step": 1144 }, { "epoch": 0.9128961530795296, "grad_norm": 0.38907204513030935, "learning_rate": 8.34108042180321e-05, "loss": 0.46298593282699585, "step": 1145 }, { "epoch": 0.913693442296193, "grad_norm": 0.41507245813873767, "learning_rate": 8.337811177031645e-05, "loss": 0.5777847766876221, "step": 1146 }, { "epoch": 0.9144907315128563, "grad_norm": 0.40379542650178907, "learning_rate": 8.334539356137869e-05, "loss": 0.532257080078125, "step": 1147 }, { "epoch": 0.9152880207295196, "grad_norm": 0.4405762757360495, "learning_rate": 8.331264961647074e-05, "loss": 0.5866928100585938, "step": 1148 }, { "epoch": 0.916085309946183, "grad_norm": 0.3666779386413746, "learning_rate": 8.327987996086439e-05, "loss": 0.4672767221927643, "step": 1149 }, { "epoch": 0.9168825991628463, "grad_norm": 0.40771409559896415, "learning_rate": 8.324708461985124e-05, "loss": 0.61552894115448, "step": 1150 }, { "epoch": 0.9176798883795096, "grad_norm": 0.4146195892719989, "learning_rate": 8.321426361874273e-05, "loss": 0.5895347595214844, "step": 1151 }, { "epoch": 0.918477177596173, "grad_norm": 0.4174757947889022, "learning_rate": 8.31814169828701e-05, "loss": 0.5896688103675842, "step": 1152 }, { "epoch": 0.9192744668128363, "grad_norm": 0.3646316403168599, "learning_rate": 8.314854473758438e-05, "loss": 0.46427619457244873, "step": 1153 }, { "epoch": 0.9200717560294998, "grad_norm": 0.38667052744024555, "learning_rate": 8.311564690825638e-05, "loss": 0.5047367215156555, "step": 1154 }, { "epoch": 0.9208690452461631, "grad_norm": 0.4013208332062222, "learning_rate": 8.308272352027662e-05, "loss": 0.6035627126693726, "step": 1155 }, { "epoch": 0.9216663344628264, "grad_norm": 0.4580248146184982, "learning_rate": 8.304977459905539e-05, "loss": 0.5817784070968628, "step": 1156 }, { "epoch": 0.9224636236794898, "grad_norm": 0.3786002330516096, "learning_rate": 8.301680017002264e-05, "loss": 0.42648953199386597, "step": 1157 }, { "epoch": 0.9232609128961531, "grad_norm": 0.45173600143446735, "learning_rate": 8.298380025862804e-05, "loss": 0.6511542201042175, "step": 1158 }, { "epoch": 0.9240582021128164, "grad_norm": 0.4661386765204832, "learning_rate": 8.295077489034092e-05, "loss": 0.6939324736595154, "step": 1159 }, { "epoch": 0.9248554913294798, "grad_norm": 0.4637284265924118, "learning_rate": 8.291772409065025e-05, "loss": 0.5981205105781555, "step": 1160 }, { "epoch": 0.9256527805461431, "grad_norm": 0.44900861967288946, "learning_rate": 8.288464788506462e-05, "loss": 0.6487246751785278, "step": 1161 }, { "epoch": 0.9264500697628064, "grad_norm": 0.4266117643382244, "learning_rate": 8.285154629911227e-05, "loss": 0.5903390645980835, "step": 1162 }, { "epoch": 0.9272473589794697, "grad_norm": 0.4209801161353294, "learning_rate": 8.281841935834099e-05, "loss": 0.6485974788665771, "step": 1163 }, { "epoch": 0.9280446481961332, "grad_norm": 0.37238858347708226, "learning_rate": 8.278526708831811e-05, "loss": 0.4278629720211029, "step": 1164 }, { "epoch": 0.9288419374127965, "grad_norm": 0.49056111220623955, "learning_rate": 8.27520895146306e-05, "loss": 0.6191521286964417, "step": 1165 }, { "epoch": 0.9296392266294599, "grad_norm": 0.39108547357707824, "learning_rate": 8.271888666288488e-05, "loss": 0.4572655260562897, "step": 1166 }, { "epoch": 0.9304365158461232, "grad_norm": 0.42937292878099653, "learning_rate": 8.268565855870692e-05, "loss": 0.5635271072387695, "step": 1167 }, { "epoch": 0.9312338050627865, "grad_norm": 0.4070487995341932, "learning_rate": 8.265240522774213e-05, "loss": 0.6372859477996826, "step": 1168 }, { "epoch": 0.9320310942794499, "grad_norm": 0.40115156913218025, "learning_rate": 8.261912669565549e-05, "loss": 0.4815105199813843, "step": 1169 }, { "epoch": 0.9328283834961132, "grad_norm": 0.3594227067830663, "learning_rate": 8.258582298813133e-05, "loss": 0.4830341339111328, "step": 1170 }, { "epoch": 0.9336256727127765, "grad_norm": 0.4188181992578724, "learning_rate": 8.255249413087343e-05, "loss": 0.5210269093513489, "step": 1171 }, { "epoch": 0.9344229619294399, "grad_norm": 0.44212504278143194, "learning_rate": 8.251914014960503e-05, "loss": 0.6164445281028748, "step": 1172 }, { "epoch": 0.9352202511461033, "grad_norm": 0.43200536592923766, "learning_rate": 8.248576107006871e-05, "loss": 0.48517340421676636, "step": 1173 }, { "epoch": 0.9360175403627666, "grad_norm": 0.4414942222102041, "learning_rate": 8.245235691802644e-05, "loss": 0.6077653765678406, "step": 1174 }, { "epoch": 0.93681482957943, "grad_norm": 0.43330797571306956, "learning_rate": 8.241892771925954e-05, "loss": 0.5722517967224121, "step": 1175 }, { "epoch": 0.9376121187960933, "grad_norm": 0.5484103905494527, "learning_rate": 8.238547349956866e-05, "loss": 0.6367721557617188, "step": 1176 }, { "epoch": 0.9384094080127566, "grad_norm": 0.41686125410567704, "learning_rate": 8.235199428477378e-05, "loss": 0.5280842781066895, "step": 1177 }, { "epoch": 0.93920669722942, "grad_norm": 0.4472759472179358, "learning_rate": 8.231849010071413e-05, "loss": 0.5696661472320557, "step": 1178 }, { "epoch": 0.9400039864460833, "grad_norm": 0.41410435885724367, "learning_rate": 8.228496097324824e-05, "loss": 0.4903353452682495, "step": 1179 }, { "epoch": 0.9408012756627466, "grad_norm": 0.45858594291615423, "learning_rate": 8.22514069282539e-05, "loss": 0.5818476676940918, "step": 1180 }, { "epoch": 0.94159856487941, "grad_norm": 0.3815635455845773, "learning_rate": 8.221782799162812e-05, "loss": 0.43895113468170166, "step": 1181 }, { "epoch": 0.9423958540960734, "grad_norm": 0.45397550133770403, "learning_rate": 8.218422418928709e-05, "loss": 0.5865646600723267, "step": 1182 }, { "epoch": 0.9431931433127367, "grad_norm": 0.4250250573621219, "learning_rate": 8.215059554716625e-05, "loss": 0.5127947330474854, "step": 1183 }, { "epoch": 0.9439904325294001, "grad_norm": 0.40275938733118444, "learning_rate": 8.211694209122014e-05, "loss": 0.632763683795929, "step": 1184 }, { "epoch": 0.9447877217460634, "grad_norm": 0.41825060357616545, "learning_rate": 8.208326384742254e-05, "loss": 0.5997350811958313, "step": 1185 }, { "epoch": 0.9455850109627267, "grad_norm": 0.43662644723368227, "learning_rate": 8.20495608417663e-05, "loss": 0.6040158867835999, "step": 1186 }, { "epoch": 0.9463823001793901, "grad_norm": 0.4193525184389282, "learning_rate": 8.201583310026337e-05, "loss": 0.6457774639129639, "step": 1187 }, { "epoch": 0.9471795893960534, "grad_norm": 0.44311723710571854, "learning_rate": 8.198208064894484e-05, "loss": 0.533602774143219, "step": 1188 }, { "epoch": 0.9479768786127167, "grad_norm": 0.44843701013702975, "learning_rate": 8.194830351386086e-05, "loss": 0.5861932635307312, "step": 1189 }, { "epoch": 0.9487741678293801, "grad_norm": 0.4764012808532865, "learning_rate": 8.191450172108058e-05, "loss": 0.5999536514282227, "step": 1190 }, { "epoch": 0.9495714570460434, "grad_norm": 0.43834928955990504, "learning_rate": 8.188067529669226e-05, "loss": 0.7290194034576416, "step": 1191 }, { "epoch": 0.9503687462627068, "grad_norm": 0.3976154142755603, "learning_rate": 8.18468242668031e-05, "loss": 0.6288789510726929, "step": 1192 }, { "epoch": 0.9511660354793702, "grad_norm": 0.30267345364227866, "learning_rate": 8.181294865753932e-05, "loss": 0.33869531750679016, "step": 1193 }, { "epoch": 0.9519633246960335, "grad_norm": 0.4306120595747675, "learning_rate": 8.177904849504614e-05, "loss": 0.6098452806472778, "step": 1194 }, { "epoch": 0.9527606139126968, "grad_norm": 0.4302207978619367, "learning_rate": 8.174512380548767e-05, "loss": 0.6291964650154114, "step": 1195 }, { "epoch": 0.9535579031293602, "grad_norm": 0.4166416275358811, "learning_rate": 8.171117461504701e-05, "loss": 0.7053741216659546, "step": 1196 }, { "epoch": 0.9543551923460235, "grad_norm": 0.3828106664680279, "learning_rate": 8.16772009499261e-05, "loss": 0.559660017490387, "step": 1197 }, { "epoch": 0.9551524815626868, "grad_norm": 0.39359839108620187, "learning_rate": 8.164320283634585e-05, "loss": 0.5558567047119141, "step": 1198 }, { "epoch": 0.9559497707793502, "grad_norm": 0.36810625184920504, "learning_rate": 8.160918030054598e-05, "loss": 0.5399413704872131, "step": 1199 }, { "epoch": 0.9567470599960135, "grad_norm": 0.4685258908679461, "learning_rate": 8.157513336878507e-05, "loss": 0.7106207013130188, "step": 1200 }, { "epoch": 0.957544349212677, "grad_norm": 0.3986857649814555, "learning_rate": 8.154106206734056e-05, "loss": 0.5742528438568115, "step": 1201 }, { "epoch": 0.9583416384293403, "grad_norm": 0.4229789402752719, "learning_rate": 8.150696642250866e-05, "loss": 0.6440906524658203, "step": 1202 }, { "epoch": 0.9591389276460036, "grad_norm": 0.41106170739557757, "learning_rate": 8.147284646060438e-05, "loss": 0.5851889848709106, "step": 1203 }, { "epoch": 0.959936216862667, "grad_norm": 0.49426736298414947, "learning_rate": 8.14387022079615e-05, "loss": 0.5727464556694031, "step": 1204 }, { "epoch": 0.9607335060793303, "grad_norm": 0.46223192873836194, "learning_rate": 8.140453369093259e-05, "loss": 0.6086017489433289, "step": 1205 }, { "epoch": 0.9615307952959936, "grad_norm": 0.4409976052982152, "learning_rate": 8.137034093588886e-05, "loss": 0.6495932936668396, "step": 1206 }, { "epoch": 0.962328084512657, "grad_norm": 0.4406268635135246, "learning_rate": 8.133612396922029e-05, "loss": 0.6352964639663696, "step": 1207 }, { "epoch": 0.9631253737293203, "grad_norm": 0.4686936635968288, "learning_rate": 8.130188281733552e-05, "loss": 0.7819342613220215, "step": 1208 }, { "epoch": 0.9639226629459836, "grad_norm": 0.40099166213506837, "learning_rate": 8.12676175066619e-05, "loss": 0.4839407205581665, "step": 1209 }, { "epoch": 0.964719952162647, "grad_norm": 0.41607241266345907, "learning_rate": 8.123332806364537e-05, "loss": 0.6288001537322998, "step": 1210 }, { "epoch": 0.9655172413793104, "grad_norm": 0.40209420353834613, "learning_rate": 8.119901451475049e-05, "loss": 0.5806839466094971, "step": 1211 }, { "epoch": 0.9663145305959737, "grad_norm": 0.4081839211861573, "learning_rate": 8.11646768864605e-05, "loss": 0.5540825128555298, "step": 1212 }, { "epoch": 0.967111819812637, "grad_norm": 0.45384724088241235, "learning_rate": 8.113031520527714e-05, "loss": 0.6484171748161316, "step": 1213 }, { "epoch": 0.9679091090293004, "grad_norm": 0.4030225026378551, "learning_rate": 8.109592949772077e-05, "loss": 0.550308883190155, "step": 1214 }, { "epoch": 0.9687063982459637, "grad_norm": 0.4382227238460056, "learning_rate": 8.106151979033027e-05, "loss": 0.5120720863342285, "step": 1215 }, { "epoch": 0.969503687462627, "grad_norm": 0.41513839519439016, "learning_rate": 8.102708610966306e-05, "loss": 0.562924325466156, "step": 1216 }, { "epoch": 0.9703009766792904, "grad_norm": 0.3810738718428801, "learning_rate": 8.099262848229501e-05, "loss": 0.5111925601959229, "step": 1217 }, { "epoch": 0.9710982658959537, "grad_norm": 0.4575378955344783, "learning_rate": 8.095814693482057e-05, "loss": 0.6057713627815247, "step": 1218 }, { "epoch": 0.971895555112617, "grad_norm": 0.4417768156584349, "learning_rate": 8.092364149385254e-05, "loss": 0.6590816378593445, "step": 1219 }, { "epoch": 0.9726928443292805, "grad_norm": 0.4492622152684125, "learning_rate": 8.088911218602225e-05, "loss": 0.6292006373405457, "step": 1220 }, { "epoch": 0.9734901335459438, "grad_norm": 0.4574117857900282, "learning_rate": 8.085455903797941e-05, "loss": 0.5614067912101746, "step": 1221 }, { "epoch": 0.9742874227626072, "grad_norm": 0.45132949655993176, "learning_rate": 8.081998207639212e-05, "loss": 0.7289793491363525, "step": 1222 }, { "epoch": 0.9750847119792705, "grad_norm": 0.40025732673418823, "learning_rate": 8.078538132794689e-05, "loss": 0.5534725189208984, "step": 1223 }, { "epoch": 0.9758820011959338, "grad_norm": 0.3550897320779602, "learning_rate": 8.075075681934856e-05, "loss": 0.5413033962249756, "step": 1224 }, { "epoch": 0.9766792904125972, "grad_norm": 0.4417426050857849, "learning_rate": 8.071610857732033e-05, "loss": 0.596700131893158, "step": 1225 }, { "epoch": 0.9774765796292605, "grad_norm": 0.4398445724499878, "learning_rate": 8.068143662860368e-05, "loss": 0.6464220285415649, "step": 1226 }, { "epoch": 0.9782738688459238, "grad_norm": 0.3966406607271019, "learning_rate": 8.064674099995847e-05, "loss": 0.4889727234840393, "step": 1227 }, { "epoch": 0.9790711580625872, "grad_norm": 0.42798137155851457, "learning_rate": 8.061202171816273e-05, "loss": 0.648052453994751, "step": 1228 }, { "epoch": 0.9798684472792506, "grad_norm": 0.3785339254199302, "learning_rate": 8.057727881001284e-05, "loss": 0.5008516907691956, "step": 1229 }, { "epoch": 0.9806657364959139, "grad_norm": 0.4160169591372152, "learning_rate": 8.054251230232333e-05, "loss": 0.46178120374679565, "step": 1230 }, { "epoch": 0.9814630257125773, "grad_norm": 0.3827612915872092, "learning_rate": 8.050772222192702e-05, "loss": 0.46299129724502563, "step": 1231 }, { "epoch": 0.9822603149292406, "grad_norm": 0.4360990106678067, "learning_rate": 8.047290859567486e-05, "loss": 0.5615251660346985, "step": 1232 }, { "epoch": 0.9830576041459039, "grad_norm": 0.4497048624803235, "learning_rate": 8.043807145043604e-05, "loss": 0.6083569526672363, "step": 1233 }, { "epoch": 0.9838548933625673, "grad_norm": 0.3773063499768787, "learning_rate": 8.040321081309782e-05, "loss": 0.5510833263397217, "step": 1234 }, { "epoch": 0.9846521825792306, "grad_norm": 0.4305520741350576, "learning_rate": 8.036832671056567e-05, "loss": 0.64714515209198, "step": 1235 }, { "epoch": 0.9854494717958939, "grad_norm": 0.4066474801150064, "learning_rate": 8.033341916976313e-05, "loss": 0.6275158524513245, "step": 1236 }, { "epoch": 0.9862467610125573, "grad_norm": 0.4211610591135688, "learning_rate": 8.02984882176318e-05, "loss": 0.57306969165802, "step": 1237 }, { "epoch": 0.9870440502292207, "grad_norm": 0.41820964224274326, "learning_rate": 8.026353388113143e-05, "loss": 0.5892846584320068, "step": 1238 }, { "epoch": 0.987841339445884, "grad_norm": 0.40618115578431113, "learning_rate": 8.022855618723974e-05, "loss": 0.5568780303001404, "step": 1239 }, { "epoch": 0.9886386286625474, "grad_norm": 0.38065374933180024, "learning_rate": 8.019355516295254e-05, "loss": 0.5148648023605347, "step": 1240 }, { "epoch": 0.9894359178792107, "grad_norm": 0.3622294231870027, "learning_rate": 8.015853083528358e-05, "loss": 0.5978236198425293, "step": 1241 }, { "epoch": 0.990233207095874, "grad_norm": 0.41274264182514464, "learning_rate": 8.012348323126466e-05, "loss": 0.5505573749542236, "step": 1242 }, { "epoch": 0.9910304963125374, "grad_norm": 0.3961037190908662, "learning_rate": 8.008841237794551e-05, "loss": 0.6136634349822998, "step": 1243 }, { "epoch": 0.9918277855292007, "grad_norm": 0.37827955977390004, "learning_rate": 8.00533183023938e-05, "loss": 0.5497356653213501, "step": 1244 }, { "epoch": 0.992625074745864, "grad_norm": 0.38999661051610945, "learning_rate": 8.001820103169513e-05, "loss": 0.5363799333572388, "step": 1245 }, { "epoch": 0.9934223639625274, "grad_norm": 0.4099412074238942, "learning_rate": 7.998306059295301e-05, "loss": 0.5506718754768372, "step": 1246 }, { "epoch": 0.9942196531791907, "grad_norm": 0.418509171775409, "learning_rate": 7.994789701328886e-05, "loss": 0.60845947265625, "step": 1247 }, { "epoch": 0.9950169423958541, "grad_norm": 0.3975568377429219, "learning_rate": 7.991271031984186e-05, "loss": 0.5581965446472168, "step": 1248 }, { "epoch": 0.9958142316125175, "grad_norm": 0.38669893331573274, "learning_rate": 7.987750053976916e-05, "loss": 0.4850766658782959, "step": 1249 }, { "epoch": 0.9966115208291808, "grad_norm": 0.4034598659725264, "learning_rate": 7.984226770024561e-05, "loss": 0.5503137111663818, "step": 1250 }, { "epoch": 0.9974088100458441, "grad_norm": 0.4599391012763866, "learning_rate": 7.980701182846395e-05, "loss": 0.616321325302124, "step": 1251 }, { "epoch": 0.9982060992625075, "grad_norm": 0.4533473055074272, "learning_rate": 7.977173295163463e-05, "loss": 0.5370123386383057, "step": 1252 }, { "epoch": 0.9990033884791708, "grad_norm": 0.40846278841172545, "learning_rate": 7.973643109698589e-05, "loss": 0.5585544109344482, "step": 1253 }, { "epoch": 0.9998006776958341, "grad_norm": 0.4435980015450221, "learning_rate": 7.970110629176371e-05, "loss": 0.5564839839935303, "step": 1254 }, { "epoch": 1.0, "grad_norm": 0.8247945587005544, "learning_rate": 7.966575856323175e-05, "loss": 0.46917644143104553, "step": 1255 }, { "epoch": 1.0007972892166634, "grad_norm": 0.37276915906458086, "learning_rate": 7.963038793867139e-05, "loss": 0.5710886716842651, "step": 1256 }, { "epoch": 1.0015945784333267, "grad_norm": 0.3882623159911572, "learning_rate": 7.959499444538168e-05, "loss": 0.46737027168273926, "step": 1257 }, { "epoch": 1.00239186764999, "grad_norm": 0.4301865905887802, "learning_rate": 7.955957811067931e-05, "loss": 0.6371545195579529, "step": 1258 }, { "epoch": 1.0031891568666533, "grad_norm": 0.32332745361947246, "learning_rate": 7.952413896189861e-05, "loss": 0.30173712968826294, "step": 1259 }, { "epoch": 1.0039864460833168, "grad_norm": 0.41953502279699056, "learning_rate": 7.948867702639149e-05, "loss": 0.5181361436843872, "step": 1260 }, { "epoch": 1.00478373529998, "grad_norm": 0.3874181820913582, "learning_rate": 7.945319233152748e-05, "loss": 0.40506547689437866, "step": 1261 }, { "epoch": 1.0055810245166434, "grad_norm": 0.4316575176904466, "learning_rate": 7.941768490469367e-05, "loss": 0.5702318549156189, "step": 1262 }, { "epoch": 1.0063783137333067, "grad_norm": 0.4085964871431682, "learning_rate": 7.938215477329466e-05, "loss": 0.4611324667930603, "step": 1263 }, { "epoch": 1.00717560294997, "grad_norm": 0.45117205541624417, "learning_rate": 7.934660196475263e-05, "loss": 0.5248788595199585, "step": 1264 }, { "epoch": 1.0079728921666335, "grad_norm": 0.43707210335631924, "learning_rate": 7.931102650650721e-05, "loss": 0.49813902378082275, "step": 1265 }, { "epoch": 1.0087701813832968, "grad_norm": 0.498263481456251, "learning_rate": 7.927542842601555e-05, "loss": 0.5871745944023132, "step": 1266 }, { "epoch": 1.0095674705999602, "grad_norm": 0.48090941991983893, "learning_rate": 7.923980775075224e-05, "loss": 0.6097631454467773, "step": 1267 }, { "epoch": 1.0103647598166234, "grad_norm": 0.4919068725519782, "learning_rate": 7.920416450820931e-05, "loss": 0.45273786783218384, "step": 1268 }, { "epoch": 1.0111620490332869, "grad_norm": 0.3861504836575802, "learning_rate": 7.91684987258962e-05, "loss": 0.4130266010761261, "step": 1269 }, { "epoch": 1.01195933824995, "grad_norm": 0.42118078673275117, "learning_rate": 7.913281043133978e-05, "loss": 0.5147322416305542, "step": 1270 }, { "epoch": 1.0127566274666135, "grad_norm": 0.4006477160860645, "learning_rate": 7.909709965208425e-05, "loss": 0.44209349155426025, "step": 1271 }, { "epoch": 1.0135539166832768, "grad_norm": 0.49157765568706796, "learning_rate": 7.90613664156912e-05, "loss": 0.5839757919311523, "step": 1272 }, { "epoch": 1.0143512058999402, "grad_norm": 0.44783993776056186, "learning_rate": 7.902561074973952e-05, "loss": 0.5160073637962341, "step": 1273 }, { "epoch": 1.0151484951166037, "grad_norm": 0.5114530979064419, "learning_rate": 7.898983268182544e-05, "loss": 0.5757949352264404, "step": 1274 }, { "epoch": 1.0159457843332669, "grad_norm": 0.44255863595712996, "learning_rate": 7.895403223956246e-05, "loss": 0.5975279808044434, "step": 1275 }, { "epoch": 1.0167430735499303, "grad_norm": 0.39324774394946327, "learning_rate": 7.891820945058137e-05, "loss": 0.4992014169692993, "step": 1276 }, { "epoch": 1.0175403627665935, "grad_norm": 0.3860578203000381, "learning_rate": 7.888236434253018e-05, "loss": 0.5493947863578796, "step": 1277 }, { "epoch": 1.018337651983257, "grad_norm": 0.4693868679686389, "learning_rate": 7.884649694307413e-05, "loss": 0.5582137107849121, "step": 1278 }, { "epoch": 1.0191349411999202, "grad_norm": 0.46151513329355937, "learning_rate": 7.881060727989571e-05, "loss": 0.5353618860244751, "step": 1279 }, { "epoch": 1.0199322304165837, "grad_norm": 0.4575477526092908, "learning_rate": 7.877469538069453e-05, "loss": 0.500811755657196, "step": 1280 }, { "epoch": 1.0207295196332469, "grad_norm": 0.4038702351878148, "learning_rate": 7.873876127318738e-05, "loss": 0.5082919597625732, "step": 1281 }, { "epoch": 1.0215268088499103, "grad_norm": 0.4718776004286571, "learning_rate": 7.870280498510824e-05, "loss": 0.5172199010848999, "step": 1282 }, { "epoch": 1.0223240980665738, "grad_norm": 0.43364200357582255, "learning_rate": 7.866682654420811e-05, "loss": 0.4309069514274597, "step": 1283 }, { "epoch": 1.023121387283237, "grad_norm": 0.39563857256010304, "learning_rate": 7.863082597825519e-05, "loss": 0.428300678730011, "step": 1284 }, { "epoch": 1.0239186764999004, "grad_norm": 0.4120480088263029, "learning_rate": 7.85948033150347e-05, "loss": 0.5419866442680359, "step": 1285 }, { "epoch": 1.0247159657165636, "grad_norm": 0.4027727149708381, "learning_rate": 7.855875858234893e-05, "loss": 0.4744648337364197, "step": 1286 }, { "epoch": 1.025513254933227, "grad_norm": 0.4602167582907263, "learning_rate": 7.85226918080172e-05, "loss": 0.5644003748893738, "step": 1287 }, { "epoch": 1.0263105441498903, "grad_norm": 0.441197879851069, "learning_rate": 7.848660301987585e-05, "loss": 0.4753655195236206, "step": 1288 }, { "epoch": 1.0271078333665538, "grad_norm": 0.4905944872443682, "learning_rate": 7.845049224577817e-05, "loss": 0.6700382828712463, "step": 1289 }, { "epoch": 1.027905122583217, "grad_norm": 0.4713885897423349, "learning_rate": 7.841435951359446e-05, "loss": 0.5174194574356079, "step": 1290 }, { "epoch": 1.0287024117998804, "grad_norm": 0.5010466383775212, "learning_rate": 7.837820485121196e-05, "loss": 0.6600837111473083, "step": 1291 }, { "epoch": 1.0294997010165436, "grad_norm": 0.39304025943456766, "learning_rate": 7.834202828653487e-05, "loss": 0.40685147047042847, "step": 1292 }, { "epoch": 1.030296990233207, "grad_norm": 0.4131806278230161, "learning_rate": 7.83058298474842e-05, "loss": 0.509067177772522, "step": 1293 }, { "epoch": 1.0310942794498705, "grad_norm": 0.463114178251694, "learning_rate": 7.826960956199794e-05, "loss": 0.5271415114402771, "step": 1294 }, { "epoch": 1.0318915686665338, "grad_norm": 0.401461891936321, "learning_rate": 7.82333674580309e-05, "loss": 0.479606568813324, "step": 1295 }, { "epoch": 1.0326888578831972, "grad_norm": 0.4106086658033351, "learning_rate": 7.819710356355472e-05, "loss": 0.5155538320541382, "step": 1296 }, { "epoch": 1.0334861470998604, "grad_norm": 0.4384321661195685, "learning_rate": 7.816081790655785e-05, "loss": 0.5494290590286255, "step": 1297 }, { "epoch": 1.0342834363165239, "grad_norm": 0.43690619690940746, "learning_rate": 7.812451051504559e-05, "loss": 0.48548316955566406, "step": 1298 }, { "epoch": 1.035080725533187, "grad_norm": 0.44048156667434585, "learning_rate": 7.808818141703998e-05, "loss": 0.47656363248825073, "step": 1299 }, { "epoch": 1.0358780147498505, "grad_norm": 0.3937480021188485, "learning_rate": 7.805183064057977e-05, "loss": 0.4734817445278168, "step": 1300 }, { "epoch": 1.0366753039665138, "grad_norm": 0.4265781101214335, "learning_rate": 7.801545821372051e-05, "loss": 0.46750929951667786, "step": 1301 }, { "epoch": 1.0374725931831772, "grad_norm": 0.4269011557627048, "learning_rate": 7.797906416453445e-05, "loss": 0.5079821348190308, "step": 1302 }, { "epoch": 1.0382698823998406, "grad_norm": 0.4453320727964163, "learning_rate": 7.794264852111049e-05, "loss": 0.5078684091567993, "step": 1303 }, { "epoch": 1.0390671716165039, "grad_norm": 0.38903914717620647, "learning_rate": 7.790621131155424e-05, "loss": 0.40937691926956177, "step": 1304 }, { "epoch": 1.0398644608331673, "grad_norm": 0.45661004557340457, "learning_rate": 7.78697525639879e-05, "loss": 0.5164692401885986, "step": 1305 }, { "epoch": 1.0406617500498305, "grad_norm": 0.4706296691901682, "learning_rate": 7.783327230655035e-05, "loss": 0.518214225769043, "step": 1306 }, { "epoch": 1.041459039266494, "grad_norm": 0.43047612354724535, "learning_rate": 7.779677056739705e-05, "loss": 0.44543129205703735, "step": 1307 }, { "epoch": 1.0422563284831572, "grad_norm": 0.41894909824196774, "learning_rate": 7.776024737470004e-05, "loss": 0.41595175862312317, "step": 1308 }, { "epoch": 1.0430536176998206, "grad_norm": 0.4425269132023713, "learning_rate": 7.772370275664789e-05, "loss": 0.43113037943840027, "step": 1309 }, { "epoch": 1.0438509069164839, "grad_norm": 0.39624699436371263, "learning_rate": 7.768713674144577e-05, "loss": 0.36065107583999634, "step": 1310 }, { "epoch": 1.0446481961331473, "grad_norm": 0.5169007236116326, "learning_rate": 7.765054935731528e-05, "loss": 0.5943465232849121, "step": 1311 }, { "epoch": 1.0454454853498107, "grad_norm": 0.45056014400362915, "learning_rate": 7.761394063249459e-05, "loss": 0.49855685234069824, "step": 1312 }, { "epoch": 1.046242774566474, "grad_norm": 0.4127100079371469, "learning_rate": 7.75773105952383e-05, "loss": 0.44212374091148376, "step": 1313 }, { "epoch": 1.0470400637831374, "grad_norm": 0.45639137495049537, "learning_rate": 7.754065927381746e-05, "loss": 0.5005576610565186, "step": 1314 }, { "epoch": 1.0478373529998006, "grad_norm": 0.37586921174965265, "learning_rate": 7.750398669651957e-05, "loss": 0.3599758446216583, "step": 1315 }, { "epoch": 1.048634642216464, "grad_norm": 0.45015812214667006, "learning_rate": 7.746729289164851e-05, "loss": 0.4821392297744751, "step": 1316 }, { "epoch": 1.0494319314331273, "grad_norm": 0.4258173267425795, "learning_rate": 7.743057788752457e-05, "loss": 0.48365235328674316, "step": 1317 }, { "epoch": 1.0502292206497907, "grad_norm": 0.4620837524304386, "learning_rate": 7.739384171248435e-05, "loss": 0.45600688457489014, "step": 1318 }, { "epoch": 1.051026509866454, "grad_norm": 0.39536563676947756, "learning_rate": 7.735708439488085e-05, "loss": 0.44999897480010986, "step": 1319 }, { "epoch": 1.0518237990831174, "grad_norm": 0.45543424675986216, "learning_rate": 7.732030596308337e-05, "loss": 0.5398444533348083, "step": 1320 }, { "epoch": 1.0526210882997808, "grad_norm": 0.48772526019411, "learning_rate": 7.728350644547749e-05, "loss": 0.5677058100700378, "step": 1321 }, { "epoch": 1.053418377516444, "grad_norm": 0.46546400769250296, "learning_rate": 7.724668587046505e-05, "loss": 0.6156116724014282, "step": 1322 }, { "epoch": 1.0542156667331075, "grad_norm": 0.37565696727661513, "learning_rate": 7.720984426646419e-05, "loss": 0.4044550955295563, "step": 1323 }, { "epoch": 1.0550129559497707, "grad_norm": 0.4287723082537283, "learning_rate": 7.717298166190926e-05, "loss": 0.505020797252655, "step": 1324 }, { "epoch": 1.0558102451664342, "grad_norm": 0.45512980098100914, "learning_rate": 7.713609808525079e-05, "loss": 0.5960977673530579, "step": 1325 }, { "epoch": 1.0566075343830974, "grad_norm": 0.48814531093694746, "learning_rate": 7.709919356495556e-05, "loss": 0.5570214986801147, "step": 1326 }, { "epoch": 1.0574048235997608, "grad_norm": 0.379278309434597, "learning_rate": 7.70622681295064e-05, "loss": 0.4623227119445801, "step": 1327 }, { "epoch": 1.058202112816424, "grad_norm": 0.44114046098900056, "learning_rate": 7.702532180740245e-05, "loss": 0.5316198468208313, "step": 1328 }, { "epoch": 1.0589994020330875, "grad_norm": 0.43195070152086507, "learning_rate": 7.69883546271588e-05, "loss": 0.5256252288818359, "step": 1329 }, { "epoch": 1.0597966912497507, "grad_norm": 0.4785694614452866, "learning_rate": 7.695136661730676e-05, "loss": 0.5965954065322876, "step": 1330 }, { "epoch": 1.0605939804664142, "grad_norm": 0.38969362364693394, "learning_rate": 7.691435780639365e-05, "loss": 0.44816282391548157, "step": 1331 }, { "epoch": 1.0613912696830776, "grad_norm": 0.43935202077082636, "learning_rate": 7.687732822298285e-05, "loss": 0.46832340955734253, "step": 1332 }, { "epoch": 1.0621885588997408, "grad_norm": 0.4713289868083838, "learning_rate": 7.684027789565384e-05, "loss": 0.4393313229084015, "step": 1333 }, { "epoch": 1.0629858481164043, "grad_norm": 0.4607305305912381, "learning_rate": 7.6803206853002e-05, "loss": 0.4395507872104645, "step": 1334 }, { "epoch": 1.0637831373330675, "grad_norm": 0.383160121295378, "learning_rate": 7.676611512363879e-05, "loss": 0.4790024161338806, "step": 1335 }, { "epoch": 1.064580426549731, "grad_norm": 0.38726864004801437, "learning_rate": 7.672900273619158e-05, "loss": 0.4720363914966583, "step": 1336 }, { "epoch": 1.0653777157663942, "grad_norm": 0.3937679901632482, "learning_rate": 7.669186971930371e-05, "loss": 0.4378887712955475, "step": 1337 }, { "epoch": 1.0661750049830576, "grad_norm": 0.46900836221354575, "learning_rate": 7.665471610163447e-05, "loss": 0.5678130984306335, "step": 1338 }, { "epoch": 1.0669722941997208, "grad_norm": 0.4519775342169544, "learning_rate": 7.661754191185898e-05, "loss": 0.4801293611526489, "step": 1339 }, { "epoch": 1.0677695834163843, "grad_norm": 0.3985552429444468, "learning_rate": 7.658034717866829e-05, "loss": 0.43749651312828064, "step": 1340 }, { "epoch": 1.0685668726330477, "grad_norm": 0.44145791301134885, "learning_rate": 7.654313193076929e-05, "loss": 0.4939076602458954, "step": 1341 }, { "epoch": 1.069364161849711, "grad_norm": 0.4249119436162043, "learning_rate": 7.650589619688469e-05, "loss": 0.43495550751686096, "step": 1342 }, { "epoch": 1.0701614510663744, "grad_norm": 0.4561929918196966, "learning_rate": 7.646864000575303e-05, "loss": 0.5118223428726196, "step": 1343 }, { "epoch": 1.0709587402830376, "grad_norm": 0.38936961602130915, "learning_rate": 7.643136338612864e-05, "loss": 0.33904510736465454, "step": 1344 }, { "epoch": 1.071756029499701, "grad_norm": 0.4351535742630361, "learning_rate": 7.639406636678162e-05, "loss": 0.4034179449081421, "step": 1345 }, { "epoch": 1.0725533187163643, "grad_norm": 0.4710400082045431, "learning_rate": 7.635674897649776e-05, "loss": 0.48322218656539917, "step": 1346 }, { "epoch": 1.0733506079330277, "grad_norm": 0.4918718032859412, "learning_rate": 7.631941124407867e-05, "loss": 0.5914115905761719, "step": 1347 }, { "epoch": 1.074147897149691, "grad_norm": 0.5451578457891623, "learning_rate": 7.628205319834159e-05, "loss": 0.6362003684043884, "step": 1348 }, { "epoch": 1.0749451863663544, "grad_norm": 0.45976516930038247, "learning_rate": 7.624467486811943e-05, "loss": 0.48883602023124695, "step": 1349 }, { "epoch": 1.0757424755830178, "grad_norm": 0.47359418514117796, "learning_rate": 7.62072762822608e-05, "loss": 0.5514037609100342, "step": 1350 }, { "epoch": 1.076539764799681, "grad_norm": 0.4629615879361288, "learning_rate": 7.616985746962995e-05, "loss": 0.5680970549583435, "step": 1351 }, { "epoch": 1.0773370540163445, "grad_norm": 0.47074114727781724, "learning_rate": 7.613241845910668e-05, "loss": 0.5356358885765076, "step": 1352 }, { "epoch": 1.0781343432330077, "grad_norm": 0.42577108941513936, "learning_rate": 7.60949592795864e-05, "loss": 0.45187896490097046, "step": 1353 }, { "epoch": 1.0789316324496712, "grad_norm": 0.4506111305078421, "learning_rate": 7.605747995998013e-05, "loss": 0.5221598744392395, "step": 1354 }, { "epoch": 1.0797289216663344, "grad_norm": 0.4490922037275809, "learning_rate": 7.60199805292144e-05, "loss": 0.44877806305885315, "step": 1355 }, { "epoch": 1.0805262108829978, "grad_norm": 0.48908081016792554, "learning_rate": 7.598246101623126e-05, "loss": 0.6115208864212036, "step": 1356 }, { "epoch": 1.081323500099661, "grad_norm": 0.41827413400254015, "learning_rate": 7.594492144998825e-05, "loss": 0.4250984787940979, "step": 1357 }, { "epoch": 1.0821207893163245, "grad_norm": 0.49763621575989603, "learning_rate": 7.590736185945843e-05, "loss": 0.5221043229103088, "step": 1358 }, { "epoch": 1.082918078532988, "grad_norm": 0.5090428077444095, "learning_rate": 7.586978227363026e-05, "loss": 0.5116435289382935, "step": 1359 }, { "epoch": 1.0837153677496512, "grad_norm": 0.5021552960473107, "learning_rate": 7.583218272150769e-05, "loss": 0.5580320358276367, "step": 1360 }, { "epoch": 1.0845126569663146, "grad_norm": 0.4389253330464656, "learning_rate": 7.579456323211001e-05, "loss": 0.38861551880836487, "step": 1361 }, { "epoch": 1.0853099461829778, "grad_norm": 0.45492845576333074, "learning_rate": 7.575692383447199e-05, "loss": 0.501214861869812, "step": 1362 }, { "epoch": 1.0861072353996413, "grad_norm": 0.5025616488279568, "learning_rate": 7.571926455764364e-05, "loss": 0.5596179962158203, "step": 1363 }, { "epoch": 1.0869045246163045, "grad_norm": 0.4430013500178393, "learning_rate": 7.568158543069047e-05, "loss": 0.5441907048225403, "step": 1364 }, { "epoch": 1.087701813832968, "grad_norm": 0.4052354601358164, "learning_rate": 7.564388648269314e-05, "loss": 0.4745023846626282, "step": 1365 }, { "epoch": 1.0884991030496312, "grad_norm": 0.40327232482431347, "learning_rate": 7.560616774274775e-05, "loss": 0.5152475833892822, "step": 1366 }, { "epoch": 1.0892963922662946, "grad_norm": 0.45182015828328775, "learning_rate": 7.556842923996561e-05, "loss": 0.5001053214073181, "step": 1367 }, { "epoch": 1.090093681482958, "grad_norm": 0.4171708788356318, "learning_rate": 7.553067100347326e-05, "loss": 0.4711199700832367, "step": 1368 }, { "epoch": 1.0908909706996213, "grad_norm": 0.48763289015136513, "learning_rate": 7.549289306241256e-05, "loss": 0.5137256383895874, "step": 1369 }, { "epoch": 1.0916882599162847, "grad_norm": 0.4476584181732116, "learning_rate": 7.545509544594045e-05, "loss": 0.5027186870574951, "step": 1370 }, { "epoch": 1.092485549132948, "grad_norm": 0.43711645236741675, "learning_rate": 7.541727818322916e-05, "loss": 0.6085895895957947, "step": 1371 }, { "epoch": 1.0932828383496114, "grad_norm": 0.44050628059616065, "learning_rate": 7.537944130346607e-05, "loss": 0.48190581798553467, "step": 1372 }, { "epoch": 1.0940801275662746, "grad_norm": 0.4408507275927816, "learning_rate": 7.534158483585364e-05, "loss": 0.5537252426147461, "step": 1373 }, { "epoch": 1.094877416782938, "grad_norm": 0.466907424824075, "learning_rate": 7.530370880960951e-05, "loss": 0.5499142408370972, "step": 1374 }, { "epoch": 1.0956747059996013, "grad_norm": 0.46333358778948963, "learning_rate": 7.526581325396637e-05, "loss": 0.46132564544677734, "step": 1375 }, { "epoch": 1.0964719952162647, "grad_norm": 0.48326815309393967, "learning_rate": 7.522789819817204e-05, "loss": 0.5599722266197205, "step": 1376 }, { "epoch": 1.0972692844329281, "grad_norm": 0.4689219935590651, "learning_rate": 7.518996367148933e-05, "loss": 0.4482729434967041, "step": 1377 }, { "epoch": 1.0980665736495914, "grad_norm": 0.44673111685870304, "learning_rate": 7.51520097031961e-05, "loss": 0.5398910641670227, "step": 1378 }, { "epoch": 1.0988638628662548, "grad_norm": 0.4401977266847545, "learning_rate": 7.511403632258522e-05, "loss": 0.5326400995254517, "step": 1379 }, { "epoch": 1.099661152082918, "grad_norm": 0.41166388830573686, "learning_rate": 7.507604355896457e-05, "loss": 0.47373878955841064, "step": 1380 }, { "epoch": 1.1004584412995815, "grad_norm": 0.4669453639150514, "learning_rate": 7.50380314416569e-05, "loss": 0.5356267690658569, "step": 1381 }, { "epoch": 1.1012557305162447, "grad_norm": 0.4036827884204457, "learning_rate": 7.500000000000001e-05, "loss": 0.5012600421905518, "step": 1382 }, { "epoch": 1.1020530197329081, "grad_norm": 0.4614299650054126, "learning_rate": 7.496194926334655e-05, "loss": 0.5335882902145386, "step": 1383 }, { "epoch": 1.1028503089495714, "grad_norm": 0.4121183810931479, "learning_rate": 7.492387926106404e-05, "loss": 0.5400184392929077, "step": 1384 }, { "epoch": 1.1036475981662348, "grad_norm": 0.4032875514680088, "learning_rate": 7.488579002253494e-05, "loss": 0.42131680250167847, "step": 1385 }, { "epoch": 1.1044448873828983, "grad_norm": 0.4575429665490599, "learning_rate": 7.484768157715649e-05, "loss": 0.5884301662445068, "step": 1386 }, { "epoch": 1.1052421765995615, "grad_norm": 0.46445358001830833, "learning_rate": 7.480955395434082e-05, "loss": 0.5226682424545288, "step": 1387 }, { "epoch": 1.106039465816225, "grad_norm": 0.4563982836290216, "learning_rate": 7.47714071835148e-05, "loss": 0.4356810748577118, "step": 1388 }, { "epoch": 1.1068367550328881, "grad_norm": 0.48616915239601133, "learning_rate": 7.473324129412012e-05, "loss": 0.5608357787132263, "step": 1389 }, { "epoch": 1.1076340442495516, "grad_norm": 0.4441919927497563, "learning_rate": 7.469505631561317e-05, "loss": 0.5283908247947693, "step": 1390 }, { "epoch": 1.1084313334662148, "grad_norm": 0.455297077372604, "learning_rate": 7.465685227746517e-05, "loss": 0.4676363468170166, "step": 1391 }, { "epoch": 1.1092286226828783, "grad_norm": 0.4423724679739213, "learning_rate": 7.461862920916191e-05, "loss": 0.46249890327453613, "step": 1392 }, { "epoch": 1.1100259118995415, "grad_norm": 0.44005287475291627, "learning_rate": 7.458038714020406e-05, "loss": 0.4506583511829376, "step": 1393 }, { "epoch": 1.110823201116205, "grad_norm": 0.4871538241012882, "learning_rate": 7.454212610010672e-05, "loss": 0.5601943731307983, "step": 1394 }, { "epoch": 1.1116204903328684, "grad_norm": 0.4064923971000881, "learning_rate": 7.450384611839985e-05, "loss": 0.44235754013061523, "step": 1395 }, { "epoch": 1.1124177795495316, "grad_norm": 0.4422324177576657, "learning_rate": 7.446554722462789e-05, "loss": 0.4287424087524414, "step": 1396 }, { "epoch": 1.113215068766195, "grad_norm": 0.49390971696516056, "learning_rate": 7.442722944834993e-05, "loss": 0.5601609945297241, "step": 1397 }, { "epoch": 1.1140123579828582, "grad_norm": 0.44385255956223213, "learning_rate": 7.438889281913962e-05, "loss": 0.5206173658370972, "step": 1398 }, { "epoch": 1.1148096471995217, "grad_norm": 0.44862258384294973, "learning_rate": 7.435053736658517e-05, "loss": 0.5094597339630127, "step": 1399 }, { "epoch": 1.115606936416185, "grad_norm": 0.46464862381252764, "learning_rate": 7.431216312028932e-05, "loss": 0.52358078956604, "step": 1400 }, { "epoch": 1.1164042256328484, "grad_norm": 0.42264835296013564, "learning_rate": 7.42737701098693e-05, "loss": 0.5196784734725952, "step": 1401 }, { "epoch": 1.1172015148495116, "grad_norm": 0.4010948889657914, "learning_rate": 7.423535836495683e-05, "loss": 0.4597955644130707, "step": 1402 }, { "epoch": 1.117998804066175, "grad_norm": 0.48488961108599493, "learning_rate": 7.41969279151981e-05, "loss": 0.6369611024856567, "step": 1403 }, { "epoch": 1.1187960932828385, "grad_norm": 0.42713391447403964, "learning_rate": 7.415847879025372e-05, "loss": 0.4591018557548523, "step": 1404 }, { "epoch": 1.1195933824995017, "grad_norm": 0.4159761348391667, "learning_rate": 7.41200110197987e-05, "loss": 0.5014282464981079, "step": 1405 }, { "epoch": 1.1203906717161651, "grad_norm": 0.4527688938089587, "learning_rate": 7.40815246335225e-05, "loss": 0.5580105781555176, "step": 1406 }, { "epoch": 1.1211879609328284, "grad_norm": 0.44651753361130647, "learning_rate": 7.404301966112885e-05, "loss": 0.5115188360214233, "step": 1407 }, { "epoch": 1.1219852501494918, "grad_norm": 0.4073651131107603, "learning_rate": 7.400449613233596e-05, "loss": 0.48261332511901855, "step": 1408 }, { "epoch": 1.122782539366155, "grad_norm": 0.4883452263162925, "learning_rate": 7.39659540768762e-05, "loss": 0.43564435839653015, "step": 1409 }, { "epoch": 1.1235798285828185, "grad_norm": 0.439969486979431, "learning_rate": 7.392739352449638e-05, "loss": 0.46871405839920044, "step": 1410 }, { "epoch": 1.1243771177994817, "grad_norm": 0.41268709159420164, "learning_rate": 7.388881450495752e-05, "loss": 0.45209211111068726, "step": 1411 }, { "epoch": 1.1251744070161451, "grad_norm": 0.48398353087071017, "learning_rate": 7.385021704803489e-05, "loss": 0.5744408965110779, "step": 1412 }, { "epoch": 1.1259716962328086, "grad_norm": 0.46006648681849727, "learning_rate": 7.381160118351801e-05, "loss": 0.49958595633506775, "step": 1413 }, { "epoch": 1.1267689854494718, "grad_norm": 0.4950710572785882, "learning_rate": 7.377296694121058e-05, "loss": 0.5428223609924316, "step": 1414 }, { "epoch": 1.1275662746661352, "grad_norm": 0.43407250820612414, "learning_rate": 7.373431435093055e-05, "loss": 0.4602851867675781, "step": 1415 }, { "epoch": 1.1283635638827985, "grad_norm": 0.4269599006002697, "learning_rate": 7.369564344250992e-05, "loss": 0.4110310971736908, "step": 1416 }, { "epoch": 1.129160853099462, "grad_norm": 0.4791931583639768, "learning_rate": 7.365695424579495e-05, "loss": 0.4895980954170227, "step": 1417 }, { "epoch": 1.1299581423161251, "grad_norm": 0.4486498910275994, "learning_rate": 7.361824679064591e-05, "loss": 0.48449280858039856, "step": 1418 }, { "epoch": 1.1307554315327886, "grad_norm": 0.49134605297585676, "learning_rate": 7.357952110693725e-05, "loss": 0.44632387161254883, "step": 1419 }, { "epoch": 1.1315527207494518, "grad_norm": 0.4183484293885515, "learning_rate": 7.354077722455742e-05, "loss": 0.3923068642616272, "step": 1420 }, { "epoch": 1.1323500099661152, "grad_norm": 0.46836370149866335, "learning_rate": 7.350201517340896e-05, "loss": 0.6487841606140137, "step": 1421 }, { "epoch": 1.1331472991827787, "grad_norm": 0.49651159733181216, "learning_rate": 7.34632349834084e-05, "loss": 0.6212894916534424, "step": 1422 }, { "epoch": 1.133944588399442, "grad_norm": 0.40111280892383255, "learning_rate": 7.342443668448628e-05, "loss": 0.3745911121368408, "step": 1423 }, { "epoch": 1.1347418776161051, "grad_norm": 0.44210163545219855, "learning_rate": 7.338562030658713e-05, "loss": 0.49464043974876404, "step": 1424 }, { "epoch": 1.1355391668327686, "grad_norm": 0.4271551951577077, "learning_rate": 7.334678587966944e-05, "loss": 0.4697701632976532, "step": 1425 }, { "epoch": 1.136336456049432, "grad_norm": 0.4296751717104552, "learning_rate": 7.33079334337056e-05, "loss": 0.4739733338356018, "step": 1426 }, { "epoch": 1.1371337452660952, "grad_norm": 0.48164261091200333, "learning_rate": 7.326906299868193e-05, "loss": 0.6205064654350281, "step": 1427 }, { "epoch": 1.1379310344827587, "grad_norm": 0.4126058608867619, "learning_rate": 7.323017460459862e-05, "loss": 0.4536704421043396, "step": 1428 }, { "epoch": 1.138728323699422, "grad_norm": 0.4641291887720184, "learning_rate": 7.319126828146974e-05, "loss": 0.530032753944397, "step": 1429 }, { "epoch": 1.1395256129160853, "grad_norm": 0.4699712734094953, "learning_rate": 7.315234405932317e-05, "loss": 0.5986607074737549, "step": 1430 }, { "epoch": 1.1403229021327486, "grad_norm": 0.4390419911067186, "learning_rate": 7.311340196820062e-05, "loss": 0.43126600980758667, "step": 1431 }, { "epoch": 1.141120191349412, "grad_norm": 0.4193625584436858, "learning_rate": 7.30744420381576e-05, "loss": 0.42810630798339844, "step": 1432 }, { "epoch": 1.1419174805660752, "grad_norm": 0.43913631798257224, "learning_rate": 7.303546429926336e-05, "loss": 0.5311804413795471, "step": 1433 }, { "epoch": 1.1427147697827387, "grad_norm": 0.44225524038143205, "learning_rate": 7.299646878160094e-05, "loss": 0.512696385383606, "step": 1434 }, { "epoch": 1.1435120589994021, "grad_norm": 0.383869374101978, "learning_rate": 7.295745551526707e-05, "loss": 0.46928277611732483, "step": 1435 }, { "epoch": 1.1443093482160653, "grad_norm": 0.4053535795315278, "learning_rate": 7.291842453037217e-05, "loss": 0.4392014145851135, "step": 1436 }, { "epoch": 1.1451066374327288, "grad_norm": 0.4598816517265106, "learning_rate": 7.287937585704035e-05, "loss": 0.6456787586212158, "step": 1437 }, { "epoch": 1.145903926649392, "grad_norm": 0.4185199121804393, "learning_rate": 7.284030952540937e-05, "loss": 0.4318179190158844, "step": 1438 }, { "epoch": 1.1467012158660554, "grad_norm": 0.48209518296722026, "learning_rate": 7.280122556563062e-05, "loss": 0.563558042049408, "step": 1439 }, { "epoch": 1.1474985050827187, "grad_norm": 0.38051575850036434, "learning_rate": 7.276212400786911e-05, "loss": 0.4279117286205292, "step": 1440 }, { "epoch": 1.148295794299382, "grad_norm": 0.5007676581415444, "learning_rate": 7.272300488230341e-05, "loss": 0.5801602005958557, "step": 1441 }, { "epoch": 1.1490930835160453, "grad_norm": 0.48274412558062874, "learning_rate": 7.268386821912564e-05, "loss": 0.5667632818222046, "step": 1442 }, { "epoch": 1.1498903727327088, "grad_norm": 0.4738138485409575, "learning_rate": 7.26447140485415e-05, "loss": 0.4756554961204529, "step": 1443 }, { "epoch": 1.1506876619493722, "grad_norm": 0.4987620051533735, "learning_rate": 7.260554240077016e-05, "loss": 0.5176194310188293, "step": 1444 }, { "epoch": 1.1514849511660354, "grad_norm": 0.44565836032967604, "learning_rate": 7.256635330604426e-05, "loss": 0.4120911955833435, "step": 1445 }, { "epoch": 1.1522822403826989, "grad_norm": 0.4018456655567581, "learning_rate": 7.252714679461e-05, "loss": 0.4321288466453552, "step": 1446 }, { "epoch": 1.153079529599362, "grad_norm": 0.4287366004010353, "learning_rate": 7.248792289672692e-05, "loss": 0.4821227192878723, "step": 1447 }, { "epoch": 1.1538768188160256, "grad_norm": 0.4779396836136716, "learning_rate": 7.244868164266803e-05, "loss": 0.5511746406555176, "step": 1448 }, { "epoch": 1.1546741080326888, "grad_norm": 0.3938621753318888, "learning_rate": 7.24094230627197e-05, "loss": 0.4434305429458618, "step": 1449 }, { "epoch": 1.1554713972493522, "grad_norm": 0.47512405608328134, "learning_rate": 7.237014718718174e-05, "loss": 0.5027536749839783, "step": 1450 }, { "epoch": 1.1562686864660154, "grad_norm": 0.5199190478729299, "learning_rate": 7.233085404636723e-05, "loss": 0.5581514835357666, "step": 1451 }, { "epoch": 1.1570659756826789, "grad_norm": 0.4004262470046865, "learning_rate": 7.229154367060263e-05, "loss": 0.37170350551605225, "step": 1452 }, { "epoch": 1.1578632648993423, "grad_norm": 0.5169289636779059, "learning_rate": 7.225221609022766e-05, "loss": 0.5939280986785889, "step": 1453 }, { "epoch": 1.1586605541160055, "grad_norm": 0.3972858081453212, "learning_rate": 7.221287133559537e-05, "loss": 0.42927196621894836, "step": 1454 }, { "epoch": 1.159457843332669, "grad_norm": 0.45202573762959913, "learning_rate": 7.217350943707202e-05, "loss": 0.581291675567627, "step": 1455 }, { "epoch": 1.1602551325493322, "grad_norm": 0.40210721337301286, "learning_rate": 7.213413042503714e-05, "loss": 0.41605785489082336, "step": 1456 }, { "epoch": 1.1610524217659957, "grad_norm": 0.4235691049872662, "learning_rate": 7.20947343298834e-05, "loss": 0.4721585512161255, "step": 1457 }, { "epoch": 1.1618497109826589, "grad_norm": 0.4518473204572692, "learning_rate": 7.205532118201676e-05, "loss": 0.5275952816009521, "step": 1458 }, { "epoch": 1.1626470001993223, "grad_norm": 0.415188991655604, "learning_rate": 7.201589101185622e-05, "loss": 0.4567352533340454, "step": 1459 }, { "epoch": 1.1634442894159855, "grad_norm": 0.42081649609994565, "learning_rate": 7.197644384983405e-05, "loss": 0.49267077445983887, "step": 1460 }, { "epoch": 1.164241578632649, "grad_norm": 0.4524981135813123, "learning_rate": 7.19369797263955e-05, "loss": 0.4182363450527191, "step": 1461 }, { "epoch": 1.1650388678493124, "grad_norm": 0.4922966952044974, "learning_rate": 7.189749867199899e-05, "loss": 0.5348106026649475, "step": 1462 }, { "epoch": 1.1658361570659757, "grad_norm": 0.45224353753296836, "learning_rate": 7.185800071711601e-05, "loss": 0.48603498935699463, "step": 1463 }, { "epoch": 1.166633446282639, "grad_norm": 0.4290808980344157, "learning_rate": 7.181848589223108e-05, "loss": 0.4816068410873413, "step": 1464 }, { "epoch": 1.1674307354993023, "grad_norm": 0.45592615360335703, "learning_rate": 7.17789542278417e-05, "loss": 0.464562326669693, "step": 1465 }, { "epoch": 1.1682280247159658, "grad_norm": 0.3988232466367728, "learning_rate": 7.173940575445846e-05, "loss": 0.44519251585006714, "step": 1466 }, { "epoch": 1.169025313932629, "grad_norm": 0.42105538709386564, "learning_rate": 7.16998405026048e-05, "loss": 0.4534478783607483, "step": 1467 }, { "epoch": 1.1698226031492924, "grad_norm": 0.40128398126848536, "learning_rate": 7.166025850281723e-05, "loss": 0.428994745016098, "step": 1468 }, { "epoch": 1.1706198923659556, "grad_norm": 0.4130695302956996, "learning_rate": 7.162065978564511e-05, "loss": 0.4195587933063507, "step": 1469 }, { "epoch": 1.171417181582619, "grad_norm": 0.42406757862447825, "learning_rate": 7.158104438165073e-05, "loss": 0.444065123796463, "step": 1470 }, { "epoch": 1.1722144707992825, "grad_norm": 0.5317434660483155, "learning_rate": 7.154141232140925e-05, "loss": 0.5536863803863525, "step": 1471 }, { "epoch": 1.1730117600159458, "grad_norm": 0.49486398161335315, "learning_rate": 7.150176363550869e-05, "loss": 0.6395037770271301, "step": 1472 }, { "epoch": 1.1738090492326092, "grad_norm": 0.46165430362037513, "learning_rate": 7.146209835454992e-05, "loss": 0.6023667454719543, "step": 1473 }, { "epoch": 1.1746063384492724, "grad_norm": 0.47089745255469456, "learning_rate": 7.142241650914654e-05, "loss": 0.490632563829422, "step": 1474 }, { "epoch": 1.1754036276659359, "grad_norm": 0.4097510167559564, "learning_rate": 7.138271812992503e-05, "loss": 0.5075156688690186, "step": 1475 }, { "epoch": 1.176200916882599, "grad_norm": 0.463967073611326, "learning_rate": 7.134300324752463e-05, "loss": 0.454807311296463, "step": 1476 }, { "epoch": 1.1769982060992625, "grad_norm": 0.45714710956234045, "learning_rate": 7.130327189259723e-05, "loss": 0.5764347314834595, "step": 1477 }, { "epoch": 1.1777954953159258, "grad_norm": 0.4804681941917389, "learning_rate": 7.126352409580748e-05, "loss": 0.5317808389663696, "step": 1478 }, { "epoch": 1.1785927845325892, "grad_norm": 0.4587073807486625, "learning_rate": 7.122375988783276e-05, "loss": 0.478884220123291, "step": 1479 }, { "epoch": 1.1793900737492526, "grad_norm": 0.46056677466287244, "learning_rate": 7.118397929936306e-05, "loss": 0.49898773431777954, "step": 1480 }, { "epoch": 1.1801873629659159, "grad_norm": 0.42461583732108904, "learning_rate": 7.114418236110105e-05, "loss": 0.46337753534317017, "step": 1481 }, { "epoch": 1.1809846521825793, "grad_norm": 0.4815994312748056, "learning_rate": 7.110436910376197e-05, "loss": 0.4758963882923126, "step": 1482 }, { "epoch": 1.1817819413992425, "grad_norm": 0.4463694650587255, "learning_rate": 7.106453955807371e-05, "loss": 0.49004653096199036, "step": 1483 }, { "epoch": 1.182579230615906, "grad_norm": 0.527057285217107, "learning_rate": 7.102469375477672e-05, "loss": 0.5233070254325867, "step": 1484 }, { "epoch": 1.1833765198325692, "grad_norm": 0.5786765656268136, "learning_rate": 7.098483172462394e-05, "loss": 0.5329142808914185, "step": 1485 }, { "epoch": 1.1841738090492326, "grad_norm": 0.41259028642464785, "learning_rate": 7.094495349838092e-05, "loss": 0.3886168897151947, "step": 1486 }, { "epoch": 1.1849710982658959, "grad_norm": 0.3860729235755012, "learning_rate": 7.090505910682568e-05, "loss": 0.4169931709766388, "step": 1487 }, { "epoch": 1.1857683874825593, "grad_norm": 0.5015857842291525, "learning_rate": 7.086514858074864e-05, "loss": 0.5246478915214539, "step": 1488 }, { "epoch": 1.1865656766992227, "grad_norm": 0.4588545494214363, "learning_rate": 7.082522195095282e-05, "loss": 0.4751738905906677, "step": 1489 }, { "epoch": 1.187362965915886, "grad_norm": 0.484504106025966, "learning_rate": 7.078527924825352e-05, "loss": 0.577305793762207, "step": 1490 }, { "epoch": 1.1881602551325494, "grad_norm": 0.4042848728954278, "learning_rate": 7.074532050347855e-05, "loss": 0.49864161014556885, "step": 1491 }, { "epoch": 1.1889575443492126, "grad_norm": 0.49344107392319725, "learning_rate": 7.070534574746806e-05, "loss": 0.538289487361908, "step": 1492 }, { "epoch": 1.189754833565876, "grad_norm": 0.48372096203657106, "learning_rate": 7.066535501107456e-05, "loss": 0.536859929561615, "step": 1493 }, { "epoch": 1.1905521227825393, "grad_norm": 0.42049854841067413, "learning_rate": 7.062534832516287e-05, "loss": 0.4297150671482086, "step": 1494 }, { "epoch": 1.1913494119992027, "grad_norm": 0.4842427938800027, "learning_rate": 7.058532572061018e-05, "loss": 0.5851834416389465, "step": 1495 }, { "epoch": 1.192146701215866, "grad_norm": 0.46750952331497414, "learning_rate": 7.054528722830593e-05, "loss": 0.5412791967391968, "step": 1496 }, { "epoch": 1.1929439904325294, "grad_norm": 0.5120438143229424, "learning_rate": 7.05052328791518e-05, "loss": 0.541975736618042, "step": 1497 }, { "epoch": 1.1937412796491929, "grad_norm": 0.4337333352199283, "learning_rate": 7.046516270406174e-05, "loss": 0.4815516471862793, "step": 1498 }, { "epoch": 1.194538568865856, "grad_norm": 0.4530596538662744, "learning_rate": 7.042507673396191e-05, "loss": 0.6014719605445862, "step": 1499 }, { "epoch": 1.1953358580825195, "grad_norm": 0.4714313723301209, "learning_rate": 7.038497499979064e-05, "loss": 0.531376838684082, "step": 1500 }, { "epoch": 1.1961331472991827, "grad_norm": 0.45473138956520875, "learning_rate": 7.034485753249846e-05, "loss": 0.49929383397102356, "step": 1501 }, { "epoch": 1.1969304365158462, "grad_norm": 0.46082514918239836, "learning_rate": 7.030472436304803e-05, "loss": 0.4502291977405548, "step": 1502 }, { "epoch": 1.1977277257325094, "grad_norm": 0.45269184612953006, "learning_rate": 7.026457552241411e-05, "loss": 0.3640419542789459, "step": 1503 }, { "epoch": 1.1985250149491729, "grad_norm": 0.49856104128251105, "learning_rate": 7.022441104158357e-05, "loss": 0.4956182539463043, "step": 1504 }, { "epoch": 1.199322304165836, "grad_norm": 0.41232074924018025, "learning_rate": 7.018423095155537e-05, "loss": 0.40079593658447266, "step": 1505 }, { "epoch": 1.2001195933824995, "grad_norm": 0.45954528748131057, "learning_rate": 7.014403528334048e-05, "loss": 0.5451599359512329, "step": 1506 }, { "epoch": 1.200916882599163, "grad_norm": 0.42008976363434597, "learning_rate": 7.01038240679619e-05, "loss": 0.442186176776886, "step": 1507 }, { "epoch": 1.2017141718158262, "grad_norm": 0.4272871237362925, "learning_rate": 7.006359733645468e-05, "loss": 0.49154943227767944, "step": 1508 }, { "epoch": 1.2025114610324896, "grad_norm": 0.4529803766667225, "learning_rate": 7.002335511986577e-05, "loss": 0.5178903341293335, "step": 1509 }, { "epoch": 1.2033087502491528, "grad_norm": 0.4237875082149065, "learning_rate": 6.998309744925411e-05, "loss": 0.4682721495628357, "step": 1510 }, { "epoch": 1.2041060394658163, "grad_norm": 0.481166731073205, "learning_rate": 6.994282435569059e-05, "loss": 0.5302301645278931, "step": 1511 }, { "epoch": 1.2049033286824795, "grad_norm": 0.4488514596269912, "learning_rate": 6.990253587025797e-05, "loss": 0.5108544826507568, "step": 1512 }, { "epoch": 1.205700617899143, "grad_norm": 0.436808946286642, "learning_rate": 6.986223202405088e-05, "loss": 0.5186824798583984, "step": 1513 }, { "epoch": 1.2064979071158062, "grad_norm": 0.47822767963068447, "learning_rate": 6.982191284817581e-05, "loss": 0.5296720862388611, "step": 1514 }, { "epoch": 1.2072951963324696, "grad_norm": 0.4381222455604449, "learning_rate": 6.978157837375114e-05, "loss": 0.4202471077442169, "step": 1515 }, { "epoch": 1.208092485549133, "grad_norm": 0.4338464828965573, "learning_rate": 6.9741228631907e-05, "loss": 0.4488116204738617, "step": 1516 }, { "epoch": 1.2088897747657963, "grad_norm": 0.4125998235826632, "learning_rate": 6.970086365378527e-05, "loss": 0.46495041251182556, "step": 1517 }, { "epoch": 1.2096870639824597, "grad_norm": 0.4470200535882345, "learning_rate": 6.96604834705397e-05, "loss": 0.5239953994750977, "step": 1518 }, { "epoch": 1.210484353199123, "grad_norm": 0.4072789990209941, "learning_rate": 6.962008811333567e-05, "loss": 0.4182446002960205, "step": 1519 }, { "epoch": 1.2112816424157864, "grad_norm": 0.39723637683482815, "learning_rate": 6.957967761335031e-05, "loss": 0.47481268644332886, "step": 1520 }, { "epoch": 1.2120789316324496, "grad_norm": 0.46666741275451284, "learning_rate": 6.953925200177248e-05, "loss": 0.5410287380218506, "step": 1521 }, { "epoch": 1.212876220849113, "grad_norm": 0.45641090324055816, "learning_rate": 6.949881130980259e-05, "loss": 0.5488059520721436, "step": 1522 }, { "epoch": 1.2136735100657763, "grad_norm": 0.45280792404026277, "learning_rate": 6.945835556865281e-05, "loss": 0.4638601243495941, "step": 1523 }, { "epoch": 1.2144707992824397, "grad_norm": 0.5498590227995847, "learning_rate": 6.941788480954687e-05, "loss": 0.4831535220146179, "step": 1524 }, { "epoch": 1.2152680884991032, "grad_norm": 0.4963595920642049, "learning_rate": 6.937739906372011e-05, "loss": 0.5259831547737122, "step": 1525 }, { "epoch": 1.2160653777157664, "grad_norm": 0.41499509739285534, "learning_rate": 6.93368983624194e-05, "loss": 0.3997352123260498, "step": 1526 }, { "epoch": 1.2168626669324296, "grad_norm": 0.41582342941523764, "learning_rate": 6.929638273690316e-05, "loss": 0.48160895705223083, "step": 1527 }, { "epoch": 1.217659956149093, "grad_norm": 0.4599540229008718, "learning_rate": 6.925585221844136e-05, "loss": 0.47102442383766174, "step": 1528 }, { "epoch": 1.2184572453657565, "grad_norm": 0.4276600096954411, "learning_rate": 6.921530683831548e-05, "loss": 0.45254769921302795, "step": 1529 }, { "epoch": 1.2192545345824197, "grad_norm": 0.4585146730877282, "learning_rate": 6.91747466278184e-05, "loss": 0.5099950432777405, "step": 1530 }, { "epoch": 1.2200518237990832, "grad_norm": 0.44398109531863333, "learning_rate": 6.91341716182545e-05, "loss": 0.5317036509513855, "step": 1531 }, { "epoch": 1.2208491130157464, "grad_norm": 0.4184073161517232, "learning_rate": 6.909358184093956e-05, "loss": 0.44132906198501587, "step": 1532 }, { "epoch": 1.2216464022324098, "grad_norm": 0.4359208908745489, "learning_rate": 6.905297732720077e-05, "loss": 0.447479248046875, "step": 1533 }, { "epoch": 1.222443691449073, "grad_norm": 0.4244692605398414, "learning_rate": 6.901235810837669e-05, "loss": 0.46434175968170166, "step": 1534 }, { "epoch": 1.2232409806657365, "grad_norm": 0.4417743734421653, "learning_rate": 6.897172421581723e-05, "loss": 0.5178321003913879, "step": 1535 }, { "epoch": 1.2240382698823997, "grad_norm": 0.42800171243070356, "learning_rate": 6.893107568088359e-05, "loss": 0.4472936689853668, "step": 1536 }, { "epoch": 1.2248355590990632, "grad_norm": 0.48512389200101363, "learning_rate": 6.889041253494837e-05, "loss": 0.5533809065818787, "step": 1537 }, { "epoch": 1.2256328483157266, "grad_norm": 0.4632453570617167, "learning_rate": 6.884973480939531e-05, "loss": 0.4700123071670532, "step": 1538 }, { "epoch": 1.2264301375323898, "grad_norm": 0.44137830478120205, "learning_rate": 6.880904253561953e-05, "loss": 0.5532336235046387, "step": 1539 }, { "epoch": 1.2272274267490533, "grad_norm": 0.5014603279005532, "learning_rate": 6.876833574502728e-05, "loss": 0.5889062285423279, "step": 1540 }, { "epoch": 1.2280247159657165, "grad_norm": 0.4546855356368596, "learning_rate": 6.872761446903607e-05, "loss": 0.44784489274024963, "step": 1541 }, { "epoch": 1.22882200518238, "grad_norm": 0.4376673594237557, "learning_rate": 6.868687873907457e-05, "loss": 0.4821138381958008, "step": 1542 }, { "epoch": 1.2296192943990432, "grad_norm": 0.4335634047699875, "learning_rate": 6.864612858658263e-05, "loss": 0.49023035168647766, "step": 1543 }, { "epoch": 1.2304165836157066, "grad_norm": 0.4527903320820594, "learning_rate": 6.860536404301122e-05, "loss": 0.5102931261062622, "step": 1544 }, { "epoch": 1.2312138728323698, "grad_norm": 0.4701358893443655, "learning_rate": 6.856458513982235e-05, "loss": 0.5063461661338806, "step": 1545 }, { "epoch": 1.2320111620490333, "grad_norm": 0.5006121605231899, "learning_rate": 6.852379190848922e-05, "loss": 0.5705239772796631, "step": 1546 }, { "epoch": 1.2328084512656967, "grad_norm": 0.44636982299550887, "learning_rate": 6.848298438049603e-05, "loss": 0.5276082158088684, "step": 1547 }, { "epoch": 1.23360574048236, "grad_norm": 0.5090620989579024, "learning_rate": 6.844216258733805e-05, "loss": 0.5149741172790527, "step": 1548 }, { "epoch": 1.2344030296990234, "grad_norm": 0.39395999476582316, "learning_rate": 6.840132656052149e-05, "loss": 0.36361241340637207, "step": 1549 }, { "epoch": 1.2352003189156866, "grad_norm": 0.4557078931634818, "learning_rate": 6.83604763315636e-05, "loss": 0.5179538130760193, "step": 1550 }, { "epoch": 1.23599760813235, "grad_norm": 0.49111695406189976, "learning_rate": 6.831961193199261e-05, "loss": 0.564212441444397, "step": 1551 }, { "epoch": 1.2367948973490133, "grad_norm": 0.4254585540021649, "learning_rate": 6.827873339334762e-05, "loss": 0.4942207634449005, "step": 1552 }, { "epoch": 1.2375921865656767, "grad_norm": 0.5140410377030243, "learning_rate": 6.823784074717872e-05, "loss": 0.524287760257721, "step": 1553 }, { "epoch": 1.23838947578234, "grad_norm": 0.4324045812369501, "learning_rate": 6.819693402504681e-05, "loss": 0.44322583079338074, "step": 1554 }, { "epoch": 1.2391867649990034, "grad_norm": 0.5041306676195727, "learning_rate": 6.815601325852372e-05, "loss": 0.5185147523880005, "step": 1555 }, { "epoch": 1.2399840542156668, "grad_norm": 0.4709367567738293, "learning_rate": 6.811507847919208e-05, "loss": 0.5668642520904541, "step": 1556 }, { "epoch": 1.24078134343233, "grad_norm": 0.5190023517784941, "learning_rate": 6.807412971864536e-05, "loss": 0.5851553678512573, "step": 1557 }, { "epoch": 1.2415786326489935, "grad_norm": 0.4096932961525138, "learning_rate": 6.803316700848779e-05, "loss": 0.4508688747882843, "step": 1558 }, { "epoch": 1.2423759218656567, "grad_norm": 0.38065501288857817, "learning_rate": 6.799219038033438e-05, "loss": 0.38686829805374146, "step": 1559 }, { "epoch": 1.2431732110823202, "grad_norm": 0.46848211273414364, "learning_rate": 6.79511998658109e-05, "loss": 0.5236266255378723, "step": 1560 }, { "epoch": 1.2439705002989834, "grad_norm": 0.4381666561096925, "learning_rate": 6.791019549655383e-05, "loss": 0.5232095122337341, "step": 1561 }, { "epoch": 1.2447677895156468, "grad_norm": 0.38931739054716763, "learning_rate": 6.786917730421032e-05, "loss": 0.4062214493751526, "step": 1562 }, { "epoch": 1.24556507873231, "grad_norm": 0.5091538958643107, "learning_rate": 6.782814532043822e-05, "loss": 0.6299790740013123, "step": 1563 }, { "epoch": 1.2463623679489735, "grad_norm": 0.4967656563146406, "learning_rate": 6.7787099576906e-05, "loss": 0.4654979705810547, "step": 1564 }, { "epoch": 1.247159657165637, "grad_norm": 0.4204227044347179, "learning_rate": 6.774604010529277e-05, "loss": 0.4481666386127472, "step": 1565 }, { "epoch": 1.2479569463823001, "grad_norm": 0.4652509424910204, "learning_rate": 6.77049669372882e-05, "loss": 0.536744236946106, "step": 1566 }, { "epoch": 1.2487542355989636, "grad_norm": 0.3982266288678375, "learning_rate": 6.766388010459256e-05, "loss": 0.38330745697021484, "step": 1567 }, { "epoch": 1.2495515248156268, "grad_norm": 0.4837101712017087, "learning_rate": 6.76227796389167e-05, "loss": 0.5140476226806641, "step": 1568 }, { "epoch": 1.2503488140322903, "grad_norm": 0.5145671039146849, "learning_rate": 6.758166557198192e-05, "loss": 0.6239028573036194, "step": 1569 }, { "epoch": 1.2511461032489535, "grad_norm": 0.46152360272974485, "learning_rate": 6.754053793552006e-05, "loss": 0.4876564145088196, "step": 1570 }, { "epoch": 1.251943392465617, "grad_norm": 0.5205654002131365, "learning_rate": 6.749939676127341e-05, "loss": 0.5736894011497498, "step": 1571 }, { "epoch": 1.2527406816822801, "grad_norm": 0.4291641267496757, "learning_rate": 6.745824208099474e-05, "loss": 0.4822772741317749, "step": 1572 }, { "epoch": 1.2535379708989436, "grad_norm": 0.5128212230879317, "learning_rate": 6.741707392644722e-05, "loss": 0.5816131830215454, "step": 1573 }, { "epoch": 1.254335260115607, "grad_norm": 0.5172334125046711, "learning_rate": 6.737589232940444e-05, "loss": 0.5013480186462402, "step": 1574 }, { "epoch": 1.2551325493322703, "grad_norm": 0.49615740598102426, "learning_rate": 6.733469732165034e-05, "loss": 0.6210629343986511, "step": 1575 }, { "epoch": 1.2559298385489337, "grad_norm": 0.47517455103248224, "learning_rate": 6.72934889349792e-05, "loss": 0.5003795027732849, "step": 1576 }, { "epoch": 1.256727127765597, "grad_norm": 0.46853105762508723, "learning_rate": 6.725226720119568e-05, "loss": 0.4428825378417969, "step": 1577 }, { "epoch": 1.2575244169822604, "grad_norm": 0.4211760416035636, "learning_rate": 6.72110321521147e-05, "loss": 0.4154621958732605, "step": 1578 }, { "epoch": 1.2583217061989236, "grad_norm": 0.5017923617325816, "learning_rate": 6.716978381956145e-05, "loss": 0.5936499238014221, "step": 1579 }, { "epoch": 1.259118995415587, "grad_norm": 0.4030504376259383, "learning_rate": 6.71285222353714e-05, "loss": 0.45504939556121826, "step": 1580 }, { "epoch": 1.2599162846322502, "grad_norm": 0.4676566760670475, "learning_rate": 6.708724743139025e-05, "loss": 0.5734666585922241, "step": 1581 }, { "epoch": 1.2607135738489137, "grad_norm": 0.40158193662623964, "learning_rate": 6.704595943947385e-05, "loss": 0.4296387732028961, "step": 1582 }, { "epoch": 1.2615108630655771, "grad_norm": 0.4562669535438313, "learning_rate": 6.70046582914883e-05, "loss": 0.5607947111129761, "step": 1583 }, { "epoch": 1.2623081522822404, "grad_norm": 0.4138023257640188, "learning_rate": 6.696334401930981e-05, "loss": 0.41126155853271484, "step": 1584 }, { "epoch": 1.2631054414989038, "grad_norm": 0.4185604604994389, "learning_rate": 6.692201665482472e-05, "loss": 0.4422800838947296, "step": 1585 }, { "epoch": 1.263902730715567, "grad_norm": 0.45301306197975877, "learning_rate": 6.68806762299295e-05, "loss": 0.4411412179470062, "step": 1586 }, { "epoch": 1.2647000199322305, "grad_norm": 0.3925797302752333, "learning_rate": 6.683932277653067e-05, "loss": 0.34669268131256104, "step": 1587 }, { "epoch": 1.2654973091488937, "grad_norm": 0.4724251287227581, "learning_rate": 6.679795632654481e-05, "loss": 0.580497682094574, "step": 1588 }, { "epoch": 1.2662945983655571, "grad_norm": 0.5061630280031838, "learning_rate": 6.675657691189858e-05, "loss": 0.5519345998764038, "step": 1589 }, { "epoch": 1.2670918875822204, "grad_norm": 0.4736259683331562, "learning_rate": 6.671518456452859e-05, "loss": 0.5066143870353699, "step": 1590 }, { "epoch": 1.2678891767988838, "grad_norm": 0.42348528578266725, "learning_rate": 6.667377931638144e-05, "loss": 0.46788614988327026, "step": 1591 }, { "epoch": 1.2686864660155472, "grad_norm": 0.501212977136781, "learning_rate": 6.663236119941368e-05, "loss": 0.4851151704788208, "step": 1592 }, { "epoch": 1.2694837552322105, "grad_norm": 0.43875797615220663, "learning_rate": 6.659093024559182e-05, "loss": 0.4821646213531494, "step": 1593 }, { "epoch": 1.270281044448874, "grad_norm": 0.5039581150176602, "learning_rate": 6.654948648689228e-05, "loss": 0.5214994549751282, "step": 1594 }, { "epoch": 1.2710783336655371, "grad_norm": 0.429553000002067, "learning_rate": 6.650802995530132e-05, "loss": 0.4804784059524536, "step": 1595 }, { "epoch": 1.2718756228822006, "grad_norm": 0.4620342712009682, "learning_rate": 6.646656068281511e-05, "loss": 0.510259211063385, "step": 1596 }, { "epoch": 1.2726729120988638, "grad_norm": 0.49008982449919036, "learning_rate": 6.642507870143963e-05, "loss": 0.5420605540275574, "step": 1597 }, { "epoch": 1.2734702013155272, "grad_norm": 0.42128498634991063, "learning_rate": 6.638358404319063e-05, "loss": 0.4693329632282257, "step": 1598 }, { "epoch": 1.2742674905321905, "grad_norm": 0.5849849690193772, "learning_rate": 6.634207674009374e-05, "loss": 0.582932710647583, "step": 1599 }, { "epoch": 1.275064779748854, "grad_norm": 0.4685827782321243, "learning_rate": 6.630055682418424e-05, "loss": 0.5217303037643433, "step": 1600 }, { "epoch": 1.2758620689655173, "grad_norm": 0.41371943700488223, "learning_rate": 6.625902432750722e-05, "loss": 0.47999435663223267, "step": 1601 }, { "epoch": 1.2766593581821806, "grad_norm": 0.5128081714047115, "learning_rate": 6.621747928211745e-05, "loss": 0.6690784692764282, "step": 1602 }, { "epoch": 1.2774566473988438, "grad_norm": 0.5081662131584379, "learning_rate": 6.617592172007938e-05, "loss": 0.5649018287658691, "step": 1603 }, { "epoch": 1.2782539366155072, "grad_norm": 0.4564511504540423, "learning_rate": 6.613435167346715e-05, "loss": 0.4591326117515564, "step": 1604 }, { "epoch": 1.2790512258321707, "grad_norm": 0.44337940486209126, "learning_rate": 6.60927691743645e-05, "loss": 0.5408031344413757, "step": 1605 }, { "epoch": 1.279848515048834, "grad_norm": 0.45388477450265835, "learning_rate": 6.605117425486482e-05, "loss": 0.5312472581863403, "step": 1606 }, { "epoch": 1.2806458042654973, "grad_norm": 0.43835946576614854, "learning_rate": 6.600956694707103e-05, "loss": 0.4766298532485962, "step": 1607 }, { "epoch": 1.2814430934821606, "grad_norm": 0.40621576787948094, "learning_rate": 6.596794728309567e-05, "loss": 0.4883287847042084, "step": 1608 }, { "epoch": 1.282240382698824, "grad_norm": 0.5257451637435071, "learning_rate": 6.592631529506079e-05, "loss": 0.6106482744216919, "step": 1609 }, { "epoch": 1.2830376719154875, "grad_norm": 0.45546311161803693, "learning_rate": 6.588467101509793e-05, "loss": 0.4402807056903839, "step": 1610 }, { "epoch": 1.2838349611321507, "grad_norm": 0.4057211936895401, "learning_rate": 6.584301447534817e-05, "loss": 0.4559544026851654, "step": 1611 }, { "epoch": 1.284632250348814, "grad_norm": 0.4703310275487352, "learning_rate": 6.580134570796199e-05, "loss": 0.5284522175788879, "step": 1612 }, { "epoch": 1.2854295395654773, "grad_norm": 0.5083260249247837, "learning_rate": 6.575966474509937e-05, "loss": 0.5571845769882202, "step": 1613 }, { "epoch": 1.2862268287821408, "grad_norm": 0.43271049677234563, "learning_rate": 6.571797161892963e-05, "loss": 0.4750712811946869, "step": 1614 }, { "epoch": 1.287024117998804, "grad_norm": 0.4989550032734919, "learning_rate": 6.567626636163158e-05, "loss": 0.6241389513015747, "step": 1615 }, { "epoch": 1.2878214072154675, "grad_norm": 0.5180883774431019, "learning_rate": 6.56345490053933e-05, "loss": 0.5247232913970947, "step": 1616 }, { "epoch": 1.2886186964321307, "grad_norm": 0.4847804337595767, "learning_rate": 6.559281958241221e-05, "loss": 0.5594719648361206, "step": 1617 }, { "epoch": 1.2894159856487941, "grad_norm": 0.5813406793906513, "learning_rate": 6.555107812489513e-05, "loss": 0.6504477262496948, "step": 1618 }, { "epoch": 1.2902132748654576, "grad_norm": 0.46491970114941616, "learning_rate": 6.550932466505806e-05, "loss": 0.4737130105495453, "step": 1619 }, { "epoch": 1.2910105640821208, "grad_norm": 0.4722673310251764, "learning_rate": 6.546755923512634e-05, "loss": 0.44050654768943787, "step": 1620 }, { "epoch": 1.291807853298784, "grad_norm": 0.4886694227758835, "learning_rate": 6.542578186733453e-05, "loss": 0.5468002557754517, "step": 1621 }, { "epoch": 1.2926051425154474, "grad_norm": 0.4937062875096124, "learning_rate": 6.538399259392637e-05, "loss": 0.5746578574180603, "step": 1622 }, { "epoch": 1.293402431732111, "grad_norm": 0.5056609475466062, "learning_rate": 6.534219144715484e-05, "loss": 0.6495195627212524, "step": 1623 }, { "epoch": 1.2941997209487741, "grad_norm": 0.41737844534230745, "learning_rate": 6.530037845928204e-05, "loss": 0.4784516394138336, "step": 1624 }, { "epoch": 1.2949970101654376, "grad_norm": 0.46798977246826035, "learning_rate": 6.525855366257926e-05, "loss": 0.4820546507835388, "step": 1625 }, { "epoch": 1.2957942993821008, "grad_norm": 0.47216959212610604, "learning_rate": 6.521671708932684e-05, "loss": 0.5192359089851379, "step": 1626 }, { "epoch": 1.2965915885987642, "grad_norm": 0.48243461791465564, "learning_rate": 6.517486877181425e-05, "loss": 0.5110974311828613, "step": 1627 }, { "epoch": 1.2973888778154277, "grad_norm": 0.4702772875900189, "learning_rate": 6.513300874234002e-05, "loss": 0.5215347409248352, "step": 1628 }, { "epoch": 1.2981861670320909, "grad_norm": 0.4727792728394112, "learning_rate": 6.509113703321174e-05, "loss": 0.5824134945869446, "step": 1629 }, { "epoch": 1.298983456248754, "grad_norm": 0.4878233721928825, "learning_rate": 6.504925367674594e-05, "loss": 0.5934640765190125, "step": 1630 }, { "epoch": 1.2997807454654176, "grad_norm": 0.42983809957042296, "learning_rate": 6.500735870526822e-05, "loss": 0.5003533363342285, "step": 1631 }, { "epoch": 1.300578034682081, "grad_norm": 0.4579115859890835, "learning_rate": 6.49654521511131e-05, "loss": 0.5683318376541138, "step": 1632 }, { "epoch": 1.3013753238987442, "grad_norm": 0.46864365167086613, "learning_rate": 6.492353404662406e-05, "loss": 0.5180444717407227, "step": 1633 }, { "epoch": 1.3021726131154077, "grad_norm": 0.44811523674841275, "learning_rate": 6.488160442415349e-05, "loss": 0.5062253475189209, "step": 1634 }, { "epoch": 1.3029699023320709, "grad_norm": 0.42883620862564825, "learning_rate": 6.483966331606265e-05, "loss": 0.47391775250434875, "step": 1635 }, { "epoch": 1.3037671915487343, "grad_norm": 0.4719937310869798, "learning_rate": 6.47977107547217e-05, "loss": 0.5067468285560608, "step": 1636 }, { "epoch": 1.3045644807653978, "grad_norm": 0.41319265270339434, "learning_rate": 6.475574677250962e-05, "loss": 0.4175983965396881, "step": 1637 }, { "epoch": 1.305361769982061, "grad_norm": 0.43584095465907785, "learning_rate": 6.47137714018142e-05, "loss": 0.5315756797790527, "step": 1638 }, { "epoch": 1.3061590591987242, "grad_norm": 0.5136189256952794, "learning_rate": 6.467178467503203e-05, "loss": 0.6006282567977905, "step": 1639 }, { "epoch": 1.3069563484153877, "grad_norm": 0.4101948265820167, "learning_rate": 6.462978662456843e-05, "loss": 0.42632830142974854, "step": 1640 }, { "epoch": 1.307753637632051, "grad_norm": 0.45648271945190844, "learning_rate": 6.458777728283754e-05, "loss": 0.49287858605384827, "step": 1641 }, { "epoch": 1.3085509268487143, "grad_norm": 0.4881417316044207, "learning_rate": 6.454575668226215e-05, "loss": 0.5806134939193726, "step": 1642 }, { "epoch": 1.3093482160653778, "grad_norm": 0.4926306631051945, "learning_rate": 6.450372485527373e-05, "loss": 0.5407088994979858, "step": 1643 }, { "epoch": 1.310145505282041, "grad_norm": 0.449598358806197, "learning_rate": 6.446168183431244e-05, "loss": 0.5542231798171997, "step": 1644 }, { "epoch": 1.3109427944987044, "grad_norm": 0.44514530941702996, "learning_rate": 6.441962765182711e-05, "loss": 0.44734346866607666, "step": 1645 }, { "epoch": 1.3117400837153679, "grad_norm": 0.4777249743562594, "learning_rate": 6.437756234027512e-05, "loss": 0.49089106917381287, "step": 1646 }, { "epoch": 1.312537372932031, "grad_norm": 0.46410517944709245, "learning_rate": 6.43354859321225e-05, "loss": 0.5057041645050049, "step": 1647 }, { "epoch": 1.3133346621486943, "grad_norm": 0.5506105960211825, "learning_rate": 6.429339845984376e-05, "loss": 0.5880754590034485, "step": 1648 }, { "epoch": 1.3141319513653578, "grad_norm": 0.5303765694513712, "learning_rate": 6.42512999559221e-05, "loss": 0.6427692174911499, "step": 1649 }, { "epoch": 1.3149292405820212, "grad_norm": 0.5188175935644069, "learning_rate": 6.420919045284904e-05, "loss": 0.560006856918335, "step": 1650 }, { "epoch": 1.3157265297986844, "grad_norm": 0.4409351989048046, "learning_rate": 6.416706998312476e-05, "loss": 0.4211207926273346, "step": 1651 }, { "epoch": 1.3165238190153479, "grad_norm": 0.4755737779489395, "learning_rate": 6.41249385792578e-05, "loss": 0.5165649652481079, "step": 1652 }, { "epoch": 1.317321108232011, "grad_norm": 0.4304387803937783, "learning_rate": 6.408279627376516e-05, "loss": 0.4144556522369385, "step": 1653 }, { "epoch": 1.3181183974486745, "grad_norm": 0.4425676441103072, "learning_rate": 6.404064309917231e-05, "loss": 0.4798901379108429, "step": 1654 }, { "epoch": 1.318915686665338, "grad_norm": 0.4372785619437003, "learning_rate": 6.399847908801303e-05, "loss": 0.43967393040657043, "step": 1655 }, { "epoch": 1.3197129758820012, "grad_norm": 0.45632470710834666, "learning_rate": 6.39563042728295e-05, "loss": 0.5157422423362732, "step": 1656 }, { "epoch": 1.3205102650986644, "grad_norm": 0.5192443101481089, "learning_rate": 6.391411868617226e-05, "loss": 0.6544426083564758, "step": 1657 }, { "epoch": 1.3213075543153279, "grad_norm": 0.47826474908129046, "learning_rate": 6.38719223606001e-05, "loss": 0.5363085269927979, "step": 1658 }, { "epoch": 1.3221048435319913, "grad_norm": 0.4866157946304911, "learning_rate": 6.382971532868018e-05, "loss": 0.5176933407783508, "step": 1659 }, { "epoch": 1.3229021327486545, "grad_norm": 0.4597870831510972, "learning_rate": 6.378749762298785e-05, "loss": 0.4919046461582184, "step": 1660 }, { "epoch": 1.323699421965318, "grad_norm": 0.3611744312613514, "learning_rate": 6.374526927610674e-05, "loss": 0.3468974530696869, "step": 1661 }, { "epoch": 1.3244967111819812, "grad_norm": 0.50739448237261, "learning_rate": 6.370303032062868e-05, "loss": 0.5785070061683655, "step": 1662 }, { "epoch": 1.3252940003986446, "grad_norm": 0.48227629779299236, "learning_rate": 6.366078078915372e-05, "loss": 0.4900100827217102, "step": 1663 }, { "epoch": 1.326091289615308, "grad_norm": 0.3615858857704086, "learning_rate": 6.361852071428999e-05, "loss": 0.3681463599205017, "step": 1664 }, { "epoch": 1.3268885788319713, "grad_norm": 0.487793586306688, "learning_rate": 6.357625012865385e-05, "loss": 0.49310585856437683, "step": 1665 }, { "epoch": 1.3276858680486345, "grad_norm": 0.46332401926009836, "learning_rate": 6.353396906486971e-05, "loss": 0.5131553411483765, "step": 1666 }, { "epoch": 1.328483157265298, "grad_norm": 0.45294056807345195, "learning_rate": 6.34916775555701e-05, "loss": 0.5310300588607788, "step": 1667 }, { "epoch": 1.3292804464819614, "grad_norm": 0.46921263667776736, "learning_rate": 6.344937563339561e-05, "loss": 0.47366803884506226, "step": 1668 }, { "epoch": 1.3300777356986246, "grad_norm": 0.44010744821348174, "learning_rate": 6.340706333099484e-05, "loss": 0.45985350012779236, "step": 1669 }, { "epoch": 1.330875024915288, "grad_norm": 0.42973402372394415, "learning_rate": 6.336474068102443e-05, "loss": 0.4982597231864929, "step": 1670 }, { "epoch": 1.3316723141319513, "grad_norm": 0.4963817724381276, "learning_rate": 6.332240771614898e-05, "loss": 0.5466208457946777, "step": 1671 }, { "epoch": 1.3324696033486148, "grad_norm": 0.4536702370696285, "learning_rate": 6.32800644690411e-05, "loss": 0.4615926146507263, "step": 1672 }, { "epoch": 1.333266892565278, "grad_norm": 0.4341959602981462, "learning_rate": 6.323771097238127e-05, "loss": 0.4577060639858246, "step": 1673 }, { "epoch": 1.3340641817819414, "grad_norm": 0.47207363648457723, "learning_rate": 6.319534725885792e-05, "loss": 0.5384096503257751, "step": 1674 }, { "epoch": 1.3348614709986046, "grad_norm": 0.42235743399287434, "learning_rate": 6.315297336116736e-05, "loss": 0.4686552882194519, "step": 1675 }, { "epoch": 1.335658760215268, "grad_norm": 0.4392093351241457, "learning_rate": 6.311058931201376e-05, "loss": 0.5149237513542175, "step": 1676 }, { "epoch": 1.3364560494319315, "grad_norm": 0.452456055923063, "learning_rate": 6.306819514410914e-05, "loss": 0.4913617968559265, "step": 1677 }, { "epoch": 1.3372533386485947, "grad_norm": 0.5016650477229664, "learning_rate": 6.302579089017327e-05, "loss": 0.5559096336364746, "step": 1678 }, { "epoch": 1.3380506278652582, "grad_norm": 0.4722844176859, "learning_rate": 6.298337658293379e-05, "loss": 0.514155924320221, "step": 1679 }, { "epoch": 1.3388479170819214, "grad_norm": 0.3982803616720566, "learning_rate": 6.294095225512603e-05, "loss": 0.3633560240268707, "step": 1680 }, { "epoch": 1.3396452062985849, "grad_norm": 0.5059117668438907, "learning_rate": 6.289851793949311e-05, "loss": 0.5423291921615601, "step": 1681 }, { "epoch": 1.340442495515248, "grad_norm": 0.38483477503087127, "learning_rate": 6.28560736687858e-05, "loss": 0.34767112135887146, "step": 1682 }, { "epoch": 1.3412397847319115, "grad_norm": 0.45099820131884416, "learning_rate": 6.281361947576261e-05, "loss": 0.4875020682811737, "step": 1683 }, { "epoch": 1.3420370739485747, "grad_norm": 0.520914193268141, "learning_rate": 6.277115539318963e-05, "loss": 0.5020010471343994, "step": 1684 }, { "epoch": 1.3428343631652382, "grad_norm": 0.5100348955680284, "learning_rate": 6.272868145384069e-05, "loss": 0.5660788416862488, "step": 1685 }, { "epoch": 1.3436316523819016, "grad_norm": 0.46757933133243407, "learning_rate": 6.268619769049713e-05, "loss": 0.4886610209941864, "step": 1686 }, { "epoch": 1.3444289415985649, "grad_norm": 0.4216397735794579, "learning_rate": 6.264370413594793e-05, "loss": 0.49027135968208313, "step": 1687 }, { "epoch": 1.3452262308152283, "grad_norm": 0.5806356491959862, "learning_rate": 6.260120082298962e-05, "loss": 0.5910488367080688, "step": 1688 }, { "epoch": 1.3460235200318915, "grad_norm": 0.4864539251357754, "learning_rate": 6.255868778442623e-05, "loss": 0.5393929481506348, "step": 1689 }, { "epoch": 1.346820809248555, "grad_norm": 0.46612882445675846, "learning_rate": 6.251616505306933e-05, "loss": 0.49283477663993835, "step": 1690 }, { "epoch": 1.3476180984652182, "grad_norm": 0.4351874441827576, "learning_rate": 6.247363266173796e-05, "loss": 0.4798882007598877, "step": 1691 }, { "epoch": 1.3484153876818816, "grad_norm": 0.5437408383205365, "learning_rate": 6.24310906432586e-05, "loss": 0.41987156867980957, "step": 1692 }, { "epoch": 1.3492126768985448, "grad_norm": 0.46002603193729125, "learning_rate": 6.23885390304652e-05, "loss": 0.5146379470825195, "step": 1693 }, { "epoch": 1.3500099661152083, "grad_norm": 0.47630020280506385, "learning_rate": 6.234597785619905e-05, "loss": 0.50115966796875, "step": 1694 }, { "epoch": 1.3508072553318717, "grad_norm": 0.46456476733723356, "learning_rate": 6.230340715330889e-05, "loss": 0.4563947916030884, "step": 1695 }, { "epoch": 1.351604544548535, "grad_norm": 0.4844807563837915, "learning_rate": 6.22608269546508e-05, "loss": 0.5505249500274658, "step": 1696 }, { "epoch": 1.3524018337651984, "grad_norm": 0.5176515511931443, "learning_rate": 6.221823729308811e-05, "loss": 0.6409153938293457, "step": 1697 }, { "epoch": 1.3531991229818616, "grad_norm": 0.4022522942997168, "learning_rate": 6.217563820149157e-05, "loss": 0.4576549530029297, "step": 1698 }, { "epoch": 1.353996412198525, "grad_norm": 0.5684400888722582, "learning_rate": 6.213302971273911e-05, "loss": 0.6325719952583313, "step": 1699 }, { "epoch": 1.3547937014151883, "grad_norm": 0.47277081642467844, "learning_rate": 6.209041185971599e-05, "loss": 0.592699408531189, "step": 1700 }, { "epoch": 1.3555909906318517, "grad_norm": 0.45513083339923044, "learning_rate": 6.204778467531466e-05, "loss": 0.525570273399353, "step": 1701 }, { "epoch": 1.356388279848515, "grad_norm": 0.5191047300000383, "learning_rate": 6.200514819243476e-05, "loss": 0.5483945608139038, "step": 1702 }, { "epoch": 1.3571855690651784, "grad_norm": 0.4518694515926253, "learning_rate": 6.196250244398312e-05, "loss": 0.45427972078323364, "step": 1703 }, { "epoch": 1.3579828582818418, "grad_norm": 0.5031391856573866, "learning_rate": 6.191984746287374e-05, "loss": 0.5627346634864807, "step": 1704 }, { "epoch": 1.358780147498505, "grad_norm": 0.47588958802608883, "learning_rate": 6.187718328202772e-05, "loss": 0.5703861117362976, "step": 1705 }, { "epoch": 1.3595774367151685, "grad_norm": 0.49433418655418493, "learning_rate": 6.183450993437327e-05, "loss": 0.5336759090423584, "step": 1706 }, { "epoch": 1.3603747259318317, "grad_norm": 0.43473205839692625, "learning_rate": 6.179182745284567e-05, "loss": 0.5186454653739929, "step": 1707 }, { "epoch": 1.3611720151484952, "grad_norm": 0.4472487241514416, "learning_rate": 6.174913587038724e-05, "loss": 0.4652584195137024, "step": 1708 }, { "epoch": 1.3619693043651584, "grad_norm": 0.43585047240371344, "learning_rate": 6.170643521994738e-05, "loss": 0.44982442259788513, "step": 1709 }, { "epoch": 1.3627665935818218, "grad_norm": 0.49250832947723855, "learning_rate": 6.166372553448241e-05, "loss": 0.5337331295013428, "step": 1710 }, { "epoch": 1.363563882798485, "grad_norm": 0.46577790882790016, "learning_rate": 6.162100684695567e-05, "loss": 0.5563732385635376, "step": 1711 }, { "epoch": 1.3643611720151485, "grad_norm": 0.5048121113029718, "learning_rate": 6.157827919033745e-05, "loss": 0.6077046990394592, "step": 1712 }, { "epoch": 1.365158461231812, "grad_norm": 0.4770554122304231, "learning_rate": 6.15355425976049e-05, "loss": 0.5311509370803833, "step": 1713 }, { "epoch": 1.3659557504484752, "grad_norm": 0.5176931831544979, "learning_rate": 6.149279710174218e-05, "loss": 0.49574607610702515, "step": 1714 }, { "epoch": 1.3667530396651384, "grad_norm": 0.48689944867799156, "learning_rate": 6.145004273574023e-05, "loss": 0.5102576017379761, "step": 1715 }, { "epoch": 1.3675503288818018, "grad_norm": 0.43501026186024827, "learning_rate": 6.140727953259685e-05, "loss": 0.498335599899292, "step": 1716 }, { "epoch": 1.3683476180984653, "grad_norm": 0.4689552030349595, "learning_rate": 6.13645075253167e-05, "loss": 0.5465537905693054, "step": 1717 }, { "epoch": 1.3691449073151285, "grad_norm": 0.4971721646013969, "learning_rate": 6.132172674691119e-05, "loss": 0.5545504689216614, "step": 1718 }, { "epoch": 1.369942196531792, "grad_norm": 0.4491889224786699, "learning_rate": 6.127893723039852e-05, "loss": 0.47210025787353516, "step": 1719 }, { "epoch": 1.3707394857484552, "grad_norm": 0.39006634723848765, "learning_rate": 6.123613900880366e-05, "loss": 0.4592728614807129, "step": 1720 }, { "epoch": 1.3715367749651186, "grad_norm": 0.52757048883828, "learning_rate": 6.119333211515823e-05, "loss": 0.6252458095550537, "step": 1721 }, { "epoch": 1.372334064181782, "grad_norm": 0.46931763936117676, "learning_rate": 6.115051658250063e-05, "loss": 0.497394859790802, "step": 1722 }, { "epoch": 1.3731313533984453, "grad_norm": 0.4683993477757472, "learning_rate": 6.110769244387583e-05, "loss": 0.5576981902122498, "step": 1723 }, { "epoch": 1.3739286426151085, "grad_norm": 0.5431748565456203, "learning_rate": 6.106485973233556e-05, "loss": 0.7075527310371399, "step": 1724 }, { "epoch": 1.374725931831772, "grad_norm": 0.45182094784591165, "learning_rate": 6.102201848093804e-05, "loss": 0.48359233140945435, "step": 1725 }, { "epoch": 1.3755232210484354, "grad_norm": 0.4561303407523485, "learning_rate": 6.097916872274815e-05, "loss": 0.44662201404571533, "step": 1726 }, { "epoch": 1.3763205102650986, "grad_norm": 0.44508216862011996, "learning_rate": 6.093631049083737e-05, "loss": 0.49609440565109253, "step": 1727 }, { "epoch": 1.377117799481762, "grad_norm": 0.454390154463056, "learning_rate": 6.089344381828363e-05, "loss": 0.526128351688385, "step": 1728 }, { "epoch": 1.3779150886984253, "grad_norm": 0.429748911954788, "learning_rate": 6.085056873817144e-05, "loss": 0.46070659160614014, "step": 1729 }, { "epoch": 1.3787123779150887, "grad_norm": 0.5407537256970921, "learning_rate": 6.08076852835918e-05, "loss": 0.6032283306121826, "step": 1730 }, { "epoch": 1.3795096671317522, "grad_norm": 0.44663799334848187, "learning_rate": 6.076479348764214e-05, "loss": 0.49246662855148315, "step": 1731 }, { "epoch": 1.3803069563484154, "grad_norm": 0.4842518976986465, "learning_rate": 6.072189338342632e-05, "loss": 0.5196794867515564, "step": 1732 }, { "epoch": 1.3811042455650786, "grad_norm": 0.48215669264645683, "learning_rate": 6.0678985004054664e-05, "loss": 0.5471093058586121, "step": 1733 }, { "epoch": 1.381901534781742, "grad_norm": 0.5458762690099679, "learning_rate": 6.063606838264384e-05, "loss": 0.5862388610839844, "step": 1734 }, { "epoch": 1.3826988239984055, "grad_norm": 0.46143117039423187, "learning_rate": 6.059314355231691e-05, "loss": 0.43560534715652466, "step": 1735 }, { "epoch": 1.3834961132150687, "grad_norm": 0.5005989207868533, "learning_rate": 6.055021054620322e-05, "loss": 0.5110834836959839, "step": 1736 }, { "epoch": 1.3842934024317322, "grad_norm": 0.44883745251718554, "learning_rate": 6.050726939743848e-05, "loss": 0.4792291522026062, "step": 1737 }, { "epoch": 1.3850906916483954, "grad_norm": 0.4511230512834271, "learning_rate": 6.0464320139164665e-05, "loss": 0.47305208444595337, "step": 1738 }, { "epoch": 1.3858879808650588, "grad_norm": 0.4906055173386411, "learning_rate": 6.042136280452998e-05, "loss": 0.48942261934280396, "step": 1739 }, { "epoch": 1.3866852700817223, "grad_norm": 0.5188395984285818, "learning_rate": 6.037839742668893e-05, "loss": 0.5652337670326233, "step": 1740 }, { "epoch": 1.3874825592983855, "grad_norm": 0.44816407371712497, "learning_rate": 6.0335424038802156e-05, "loss": 0.4621368944644928, "step": 1741 }, { "epoch": 1.3882798485150487, "grad_norm": 0.4591194411172283, "learning_rate": 6.029244267403652e-05, "loss": 0.5019533634185791, "step": 1742 }, { "epoch": 1.3890771377317122, "grad_norm": 0.49955089438673306, "learning_rate": 6.0249453365565045e-05, "loss": 0.6402943134307861, "step": 1743 }, { "epoch": 1.3898744269483756, "grad_norm": 0.4348199084337703, "learning_rate": 6.020645614656686e-05, "loss": 0.41986584663391113, "step": 1744 }, { "epoch": 1.3906717161650388, "grad_norm": 0.4385615843771412, "learning_rate": 6.016345105022722e-05, "loss": 0.45487841963768005, "step": 1745 }, { "epoch": 1.3914690053817023, "grad_norm": 0.3854808513192511, "learning_rate": 6.0120438109737454e-05, "loss": 0.3540864884853363, "step": 1746 }, { "epoch": 1.3922662945983655, "grad_norm": 0.4886763795301402, "learning_rate": 6.007741735829493e-05, "loss": 0.5004433989524841, "step": 1747 }, { "epoch": 1.393063583815029, "grad_norm": 0.47428375687424024, "learning_rate": 6.0034388829103086e-05, "loss": 0.5705279111862183, "step": 1748 }, { "epoch": 1.3938608730316924, "grad_norm": 0.4882242983941074, "learning_rate": 5.999135255537132e-05, "loss": 0.4707436263561249, "step": 1749 }, { "epoch": 1.3946581622483556, "grad_norm": 0.47207930106069856, "learning_rate": 5.994830857031499e-05, "loss": 0.4840267598628998, "step": 1750 }, { "epoch": 1.3954554514650188, "grad_norm": 0.5320523426364019, "learning_rate": 5.990525690715549e-05, "loss": 0.515528678894043, "step": 1751 }, { "epoch": 1.3962527406816823, "grad_norm": 0.47695810303586256, "learning_rate": 5.9862197599120054e-05, "loss": 0.5318611264228821, "step": 1752 }, { "epoch": 1.3970500298983457, "grad_norm": 0.4868729689140135, "learning_rate": 5.981913067944187e-05, "loss": 0.47635751962661743, "step": 1753 }, { "epoch": 1.397847319115009, "grad_norm": 0.44887829623143627, "learning_rate": 5.9776056181359964e-05, "loss": 0.4685096740722656, "step": 1754 }, { "epoch": 1.3986446083316724, "grad_norm": 0.4002299910485173, "learning_rate": 5.973297413811923e-05, "loss": 0.39276841282844543, "step": 1755 }, { "epoch": 1.3994418975483356, "grad_norm": 0.46159437272461223, "learning_rate": 5.96898845829704e-05, "loss": 0.5248412489891052, "step": 1756 }, { "epoch": 1.400239186764999, "grad_norm": 0.4379359275543135, "learning_rate": 5.964678754916998e-05, "loss": 0.46773216128349304, "step": 1757 }, { "epoch": 1.4010364759816625, "grad_norm": 0.4987832184901371, "learning_rate": 5.960368306998023e-05, "loss": 0.6108304262161255, "step": 1758 }, { "epoch": 1.4018337651983257, "grad_norm": 0.48083913198708966, "learning_rate": 5.956057117866922e-05, "loss": 0.6313371658325195, "step": 1759 }, { "epoch": 1.402631054414989, "grad_norm": 0.4603787216675076, "learning_rate": 5.9517451908510666e-05, "loss": 0.5098650455474854, "step": 1760 }, { "epoch": 1.4034283436316524, "grad_norm": 0.47773152630326415, "learning_rate": 5.947432529278405e-05, "loss": 0.516000509262085, "step": 1761 }, { "epoch": 1.4042256328483158, "grad_norm": 0.44291982237682115, "learning_rate": 5.9431191364774484e-05, "loss": 0.49521496891975403, "step": 1762 }, { "epoch": 1.405022922064979, "grad_norm": 0.4428037127877572, "learning_rate": 5.938805015777268e-05, "loss": 0.46004652976989746, "step": 1763 }, { "epoch": 1.4058202112816425, "grad_norm": 0.44975947475182826, "learning_rate": 5.934490170507506e-05, "loss": 0.510112464427948, "step": 1764 }, { "epoch": 1.4066175004983057, "grad_norm": 0.4628228236202424, "learning_rate": 5.930174603998359e-05, "loss": 0.5048148036003113, "step": 1765 }, { "epoch": 1.4074147897149691, "grad_norm": 0.5142379108852085, "learning_rate": 5.9258583195805797e-05, "loss": 0.586459219455719, "step": 1766 }, { "epoch": 1.4082120789316326, "grad_norm": 0.5146690578173109, "learning_rate": 5.9215413205854754e-05, "loss": 0.4926565885543823, "step": 1767 }, { "epoch": 1.4090093681482958, "grad_norm": 0.4828013792048509, "learning_rate": 5.917223610344906e-05, "loss": 0.5353639125823975, "step": 1768 }, { "epoch": 1.409806657364959, "grad_norm": 0.4824587662944911, "learning_rate": 5.91290519219128e-05, "loss": 0.5120559930801392, "step": 1769 }, { "epoch": 1.4106039465816225, "grad_norm": 0.40850302556551277, "learning_rate": 5.908586069457552e-05, "loss": 0.47584158182144165, "step": 1770 }, { "epoch": 1.411401235798286, "grad_norm": 0.42709423191031537, "learning_rate": 5.904266245477219e-05, "loss": 0.44415444135665894, "step": 1771 }, { "epoch": 1.4121985250149491, "grad_norm": 0.4991029772556794, "learning_rate": 5.899945723584324e-05, "loss": 0.6199991703033447, "step": 1772 }, { "epoch": 1.4129958142316126, "grad_norm": 0.4833509939557793, "learning_rate": 5.895624507113441e-05, "loss": 0.5195547342300415, "step": 1773 }, { "epoch": 1.4137931034482758, "grad_norm": 0.47679946893949576, "learning_rate": 5.891302599399685e-05, "loss": 0.6356580257415771, "step": 1774 }, { "epoch": 1.4145903926649392, "grad_norm": 0.42230312966629063, "learning_rate": 5.886980003778707e-05, "loss": 0.46468648314476013, "step": 1775 }, { "epoch": 1.4153876818816027, "grad_norm": 0.44926928912961955, "learning_rate": 5.8826567235866814e-05, "loss": 0.4759521484375, "step": 1776 }, { "epoch": 1.416184971098266, "grad_norm": 0.5057929385365133, "learning_rate": 5.878332762160319e-05, "loss": 0.5225579142570496, "step": 1777 }, { "epoch": 1.4169822603149291, "grad_norm": 0.5680839416377449, "learning_rate": 5.874008122836849e-05, "loss": 0.663547158241272, "step": 1778 }, { "epoch": 1.4177795495315926, "grad_norm": 0.44667720187006776, "learning_rate": 5.8696828089540314e-05, "loss": 0.44894009828567505, "step": 1779 }, { "epoch": 1.418576838748256, "grad_norm": 0.5396704131986362, "learning_rate": 5.865356823850141e-05, "loss": 0.5051974058151245, "step": 1780 }, { "epoch": 1.4193741279649192, "grad_norm": 0.44067500094222156, "learning_rate": 5.86103017086397e-05, "loss": 0.4172391891479492, "step": 1781 }, { "epoch": 1.4201714171815827, "grad_norm": 0.4786043581591068, "learning_rate": 5.8567028533348336e-05, "loss": 0.4653257131576538, "step": 1782 }, { "epoch": 1.420968706398246, "grad_norm": 0.4142471487061719, "learning_rate": 5.8523748746025517e-05, "loss": 0.4168768525123596, "step": 1783 }, { "epoch": 1.4217659956149094, "grad_norm": 0.4805525604557964, "learning_rate": 5.848046238007457e-05, "loss": 0.534494936466217, "step": 1784 }, { "epoch": 1.4225632848315726, "grad_norm": 0.47605518323336166, "learning_rate": 5.8437169468903926e-05, "loss": 0.5261463522911072, "step": 1785 }, { "epoch": 1.423360574048236, "grad_norm": 0.4461897460394223, "learning_rate": 5.839387004592705e-05, "loss": 0.5490058660507202, "step": 1786 }, { "epoch": 1.4241578632648992, "grad_norm": 0.452975844274656, "learning_rate": 5.83505641445624e-05, "loss": 0.5365421772003174, "step": 1787 }, { "epoch": 1.4249551524815627, "grad_norm": 0.4825813307188572, "learning_rate": 5.830725179823352e-05, "loss": 0.4485633671283722, "step": 1788 }, { "epoch": 1.4257524416982261, "grad_norm": 0.4488804718797838, "learning_rate": 5.826393304036881e-05, "loss": 0.4610632359981537, "step": 1789 }, { "epoch": 1.4265497309148893, "grad_norm": 0.4418216245436144, "learning_rate": 5.8220607904401725e-05, "loss": 0.40615254640579224, "step": 1790 }, { "epoch": 1.4273470201315528, "grad_norm": 0.44461369085267965, "learning_rate": 5.8177276423770574e-05, "loss": 0.5038137435913086, "step": 1791 }, { "epoch": 1.428144309348216, "grad_norm": 0.40315775627310707, "learning_rate": 5.81339386319186e-05, "loss": 0.39354777336120605, "step": 1792 }, { "epoch": 1.4289415985648795, "grad_norm": 0.49077772817630233, "learning_rate": 5.809059456229392e-05, "loss": 0.548068642616272, "step": 1793 }, { "epoch": 1.4297388877815427, "grad_norm": 0.49863911932376903, "learning_rate": 5.804724424834944e-05, "loss": 0.5291998386383057, "step": 1794 }, { "epoch": 1.4305361769982061, "grad_norm": 0.5385671545640839, "learning_rate": 5.800388772354297e-05, "loss": 0.6042247414588928, "step": 1795 }, { "epoch": 1.4313334662148693, "grad_norm": 0.4706702848087096, "learning_rate": 5.796052502133704e-05, "loss": 0.4132844805717468, "step": 1796 }, { "epoch": 1.4321307554315328, "grad_norm": 0.49156010237916103, "learning_rate": 5.7917156175198995e-05, "loss": 0.5617932081222534, "step": 1797 }, { "epoch": 1.4329280446481962, "grad_norm": 0.44483502200416725, "learning_rate": 5.78737812186009e-05, "loss": 0.4341007471084595, "step": 1798 }, { "epoch": 1.4337253338648595, "grad_norm": 0.4702116918906812, "learning_rate": 5.783040018501953e-05, "loss": 0.4763357639312744, "step": 1799 }, { "epoch": 1.434522623081523, "grad_norm": 0.5508044613161389, "learning_rate": 5.778701310793636e-05, "loss": 0.556450605392456, "step": 1800 }, { "epoch": 1.4353199122981861, "grad_norm": 0.4820833912724316, "learning_rate": 5.774362002083756e-05, "loss": 0.5044284462928772, "step": 1801 }, { "epoch": 1.4361172015148496, "grad_norm": 0.4540303428593206, "learning_rate": 5.7700220957213877e-05, "loss": 0.5377346277236938, "step": 1802 }, { "epoch": 1.4369144907315128, "grad_norm": 0.4824962082260507, "learning_rate": 5.765681595056072e-05, "loss": 0.4817813038825989, "step": 1803 }, { "epoch": 1.4377117799481762, "grad_norm": 0.4107439761411929, "learning_rate": 5.7613405034378065e-05, "loss": 0.44365012645721436, "step": 1804 }, { "epoch": 1.4385090691648394, "grad_norm": 0.4382808702263369, "learning_rate": 5.7569988242170426e-05, "loss": 0.45413392782211304, "step": 1805 }, { "epoch": 1.439306358381503, "grad_norm": 0.4301672599713722, "learning_rate": 5.752656560744692e-05, "loss": 0.47929704189300537, "step": 1806 }, { "epoch": 1.4401036475981663, "grad_norm": 0.47661063815092697, "learning_rate": 5.748313716372108e-05, "loss": 0.47215014696121216, "step": 1807 }, { "epoch": 1.4409009368148296, "grad_norm": 0.43642356789260206, "learning_rate": 5.743970294451102e-05, "loss": 0.4788683354854584, "step": 1808 }, { "epoch": 1.441698226031493, "grad_norm": 0.516376375820169, "learning_rate": 5.739626298333926e-05, "loss": 0.5151042342185974, "step": 1809 }, { "epoch": 1.4424955152481562, "grad_norm": 0.5143263034181578, "learning_rate": 5.735281731373271e-05, "loss": 0.4680730998516083, "step": 1810 }, { "epoch": 1.4432928044648197, "grad_norm": 0.5537260855021798, "learning_rate": 5.7309365969222784e-05, "loss": 0.6311664581298828, "step": 1811 }, { "epoch": 1.444090093681483, "grad_norm": 0.48295132072351094, "learning_rate": 5.7265908983345206e-05, "loss": 0.5305048227310181, "step": 1812 }, { "epoch": 1.4448873828981463, "grad_norm": 0.4607514550358971, "learning_rate": 5.722244638964005e-05, "loss": 0.5374394655227661, "step": 1813 }, { "epoch": 1.4456846721148096, "grad_norm": 0.5414496657387331, "learning_rate": 5.7178978221651794e-05, "loss": 0.473479688167572, "step": 1814 }, { "epoch": 1.446481961331473, "grad_norm": 0.46704509090857055, "learning_rate": 5.7135504512929095e-05, "loss": 0.48847487568855286, "step": 1815 }, { "epoch": 1.4472792505481364, "grad_norm": 0.49416486811605975, "learning_rate": 5.709202529702501e-05, "loss": 0.6065043807029724, "step": 1816 }, { "epoch": 1.4480765397647997, "grad_norm": 0.4159873752949442, "learning_rate": 5.704854060749676e-05, "loss": 0.5019790530204773, "step": 1817 }, { "epoch": 1.448873828981463, "grad_norm": 0.49648362651876216, "learning_rate": 5.700505047790583e-05, "loss": 0.5364262461662292, "step": 1818 }, { "epoch": 1.4496711181981263, "grad_norm": 0.5210092100155486, "learning_rate": 5.696155494181792e-05, "loss": 0.5156529545783997, "step": 1819 }, { "epoch": 1.4504684074147898, "grad_norm": 0.4760733939661432, "learning_rate": 5.6918054032802835e-05, "loss": 0.5664858222007751, "step": 1820 }, { "epoch": 1.451265696631453, "grad_norm": 0.46886538255291205, "learning_rate": 5.6874547784434605e-05, "loss": 0.5615605711936951, "step": 1821 }, { "epoch": 1.4520629858481164, "grad_norm": 0.41241654288536017, "learning_rate": 5.683103623029135e-05, "loss": 0.4234973192214966, "step": 1822 }, { "epoch": 1.4528602750647797, "grad_norm": 0.5238374128456499, "learning_rate": 5.678751940395525e-05, "loss": 0.5182336568832397, "step": 1823 }, { "epoch": 1.453657564281443, "grad_norm": 0.5274216248513236, "learning_rate": 5.67439973390126e-05, "loss": 0.48294582962989807, "step": 1824 }, { "epoch": 1.4544548534981065, "grad_norm": 0.4492920156359109, "learning_rate": 5.6700470069053746e-05, "loss": 0.5135596990585327, "step": 1825 }, { "epoch": 1.4552521427147698, "grad_norm": 0.5082835992562269, "learning_rate": 5.665693762767299e-05, "loss": 0.5518569350242615, "step": 1826 }, { "epoch": 1.456049431931433, "grad_norm": 0.5105832670959402, "learning_rate": 5.661340004846871e-05, "loss": 0.6280573606491089, "step": 1827 }, { "epoch": 1.4568467211480964, "grad_norm": 0.4470539031202028, "learning_rate": 5.6569857365043144e-05, "loss": 0.4664788246154785, "step": 1828 }, { "epoch": 1.4576440103647599, "grad_norm": 0.4925919643965613, "learning_rate": 5.6526309611002594e-05, "loss": 0.5405832529067993, "step": 1829 }, { "epoch": 1.458441299581423, "grad_norm": 0.4894252605012073, "learning_rate": 5.648275681995716e-05, "loss": 0.5501021146774292, "step": 1830 }, { "epoch": 1.4592385887980865, "grad_norm": 0.4978480947131828, "learning_rate": 5.64391990255209e-05, "loss": 0.5300042629241943, "step": 1831 }, { "epoch": 1.4600358780147498, "grad_norm": 0.47644339004539527, "learning_rate": 5.639563626131172e-05, "loss": 0.47845524549484253, "step": 1832 }, { "epoch": 1.4608331672314132, "grad_norm": 0.4664892966485065, "learning_rate": 5.635206856095134e-05, "loss": 0.46916043758392334, "step": 1833 }, { "epoch": 1.4616304564480767, "grad_norm": 0.4837756731933026, "learning_rate": 5.630849595806533e-05, "loss": 0.5758082866668701, "step": 1834 }, { "epoch": 1.4624277456647399, "grad_norm": 0.489554701300057, "learning_rate": 5.6264918486283004e-05, "loss": 0.4290483593940735, "step": 1835 }, { "epoch": 1.463225034881403, "grad_norm": 0.5012184790488022, "learning_rate": 5.6221336179237425e-05, "loss": 0.5252904295921326, "step": 1836 }, { "epoch": 1.4640223240980665, "grad_norm": 0.586665586201468, "learning_rate": 5.617774907056546e-05, "loss": 0.589228630065918, "step": 1837 }, { "epoch": 1.46481961331473, "grad_norm": 0.5050257900226283, "learning_rate": 5.613415719390759e-05, "loss": 0.4625256061553955, "step": 1838 }, { "epoch": 1.4656169025313932, "grad_norm": 0.5193775122493987, "learning_rate": 5.6090560582908035e-05, "loss": 0.5599784255027771, "step": 1839 }, { "epoch": 1.4664141917480567, "grad_norm": 0.5449320749391791, "learning_rate": 5.604695927121468e-05, "loss": 0.5358104705810547, "step": 1840 }, { "epoch": 1.4672114809647199, "grad_norm": 0.5017758763074468, "learning_rate": 5.6003353292478963e-05, "loss": 0.5211753845214844, "step": 1841 }, { "epoch": 1.4680087701813833, "grad_norm": 0.4838445140904663, "learning_rate": 5.595974268035601e-05, "loss": 0.578395426273346, "step": 1842 }, { "epoch": 1.4688060593980468, "grad_norm": 0.4124149635356844, "learning_rate": 5.591612746850447e-05, "loss": 0.48411017656326294, "step": 1843 }, { "epoch": 1.46960334861471, "grad_norm": 0.4987673614926415, "learning_rate": 5.587250769058655e-05, "loss": 0.5591435432434082, "step": 1844 }, { "epoch": 1.4704006378313732, "grad_norm": 0.42106733094282317, "learning_rate": 5.582888338026802e-05, "loss": 0.45086944103240967, "step": 1845 }, { "epoch": 1.4711979270480366, "grad_norm": 0.4796862766500565, "learning_rate": 5.578525457121807e-05, "loss": 0.4772908687591553, "step": 1846 }, { "epoch": 1.4719952162647, "grad_norm": 0.4926490205428977, "learning_rate": 5.574162129710945e-05, "loss": 0.549340546131134, "step": 1847 }, { "epoch": 1.4727925054813633, "grad_norm": 0.4725162944562323, "learning_rate": 5.5697983591618295e-05, "loss": 0.4827421009540558, "step": 1848 }, { "epoch": 1.4735897946980268, "grad_norm": 0.4692918370330288, "learning_rate": 5.565434148842417e-05, "loss": 0.4421544373035431, "step": 1849 }, { "epoch": 1.47438708391469, "grad_norm": 0.5023707977674424, "learning_rate": 5.561069502121008e-05, "loss": 0.589576005935669, "step": 1850 }, { "epoch": 1.4751843731313534, "grad_norm": 0.42877581365363204, "learning_rate": 5.5567044223662335e-05, "loss": 0.443621963262558, "step": 1851 }, { "epoch": 1.4759816623480169, "grad_norm": 0.4676788162297739, "learning_rate": 5.5523389129470606e-05, "loss": 0.50635826587677, "step": 1852 }, { "epoch": 1.47677895156468, "grad_norm": 0.48258044390640426, "learning_rate": 5.547972977232792e-05, "loss": 0.45960304141044617, "step": 1853 }, { "epoch": 1.4775762407813433, "grad_norm": 0.4421358351608877, "learning_rate": 5.543606618593054e-05, "loss": 0.43942052125930786, "step": 1854 }, { "epoch": 1.4783735299980068, "grad_norm": 0.38368170426211856, "learning_rate": 5.539239840397802e-05, "loss": 0.4400025010108948, "step": 1855 }, { "epoch": 1.4791708192146702, "grad_norm": 0.513932919064797, "learning_rate": 5.5348726460173165e-05, "loss": 0.5759945511817932, "step": 1856 }, { "epoch": 1.4799681084313334, "grad_norm": 0.47415187292737226, "learning_rate": 5.5305050388221935e-05, "loss": 0.4201805889606476, "step": 1857 }, { "epoch": 1.4807653976479969, "grad_norm": 0.48046718244896064, "learning_rate": 5.5261370221833555e-05, "loss": 0.49198609590530396, "step": 1858 }, { "epoch": 1.48156268686466, "grad_norm": 0.4546795830537235, "learning_rate": 5.5217685994720355e-05, "loss": 0.4799003601074219, "step": 1859 }, { "epoch": 1.4823599760813235, "grad_norm": 0.5679964563957849, "learning_rate": 5.517399774059782e-05, "loss": 0.5356204509735107, "step": 1860 }, { "epoch": 1.483157265297987, "grad_norm": 0.4460841010290021, "learning_rate": 5.513030549318453e-05, "loss": 0.4727621078491211, "step": 1861 }, { "epoch": 1.4839545545146502, "grad_norm": 0.49929711660779036, "learning_rate": 5.508660928620216e-05, "loss": 0.4533916711807251, "step": 1862 }, { "epoch": 1.4847518437313134, "grad_norm": 0.4379972444358608, "learning_rate": 5.504290915337545e-05, "loss": 0.4933019280433655, "step": 1863 }, { "epoch": 1.4855491329479769, "grad_norm": 0.48324717730738714, "learning_rate": 5.499920512843215e-05, "loss": 0.4394932985305786, "step": 1864 }, { "epoch": 1.4863464221646403, "grad_norm": 0.48618927966154424, "learning_rate": 5.495549724510301e-05, "loss": 0.4517173171043396, "step": 1865 }, { "epoch": 1.4871437113813035, "grad_norm": 0.4842085738276252, "learning_rate": 5.4911785537121777e-05, "loss": 0.4791920781135559, "step": 1866 }, { "epoch": 1.487941000597967, "grad_norm": 0.5142825577215902, "learning_rate": 5.486807003822514e-05, "loss": 0.4907638132572174, "step": 1867 }, { "epoch": 1.4887382898146302, "grad_norm": 0.5500655721442735, "learning_rate": 5.482435078215271e-05, "loss": 0.5627091526985168, "step": 1868 }, { "epoch": 1.4895355790312936, "grad_norm": 0.42438366788372106, "learning_rate": 5.4780627802647014e-05, "loss": 0.4631059169769287, "step": 1869 }, { "epoch": 1.490332868247957, "grad_norm": 0.4905726800557758, "learning_rate": 5.473690113345342e-05, "loss": 0.5326109528541565, "step": 1870 }, { "epoch": 1.4911301574646203, "grad_norm": 0.45345666805807955, "learning_rate": 5.469317080832019e-05, "loss": 0.49178269505500793, "step": 1871 }, { "epoch": 1.4919274466812835, "grad_norm": 0.4448629277448677, "learning_rate": 5.464943686099837e-05, "loss": 0.4508042633533478, "step": 1872 }, { "epoch": 1.492724735897947, "grad_norm": 0.5437154381729732, "learning_rate": 5.460569932524181e-05, "loss": 0.6461951732635498, "step": 1873 }, { "epoch": 1.4935220251146104, "grad_norm": 0.43910238601898854, "learning_rate": 5.456195823480715e-05, "loss": 0.39220982789993286, "step": 1874 }, { "epoch": 1.4943193143312736, "grad_norm": 0.5272843406293447, "learning_rate": 5.4518213623453754e-05, "loss": 0.58455491065979, "step": 1875 }, { "epoch": 1.495116603547937, "grad_norm": 0.435202946995717, "learning_rate": 5.447446552494369e-05, "loss": 0.39802294969558716, "step": 1876 }, { "epoch": 1.4959138927646003, "grad_norm": 0.43527491941680113, "learning_rate": 5.443071397304176e-05, "loss": 0.44612568616867065, "step": 1877 }, { "epoch": 1.4967111819812637, "grad_norm": 0.4686385792732995, "learning_rate": 5.438695900151537e-05, "loss": 0.546696662902832, "step": 1878 }, { "epoch": 1.4975084711979272, "grad_norm": 0.4966353405599858, "learning_rate": 5.434320064413464e-05, "loss": 0.5137814283370972, "step": 1879 }, { "epoch": 1.4983057604145904, "grad_norm": 0.42433868171932554, "learning_rate": 5.429943893467224e-05, "loss": 0.492544949054718, "step": 1880 }, { "epoch": 1.4991030496312536, "grad_norm": 0.44721065183477426, "learning_rate": 5.425567390690347e-05, "loss": 0.45401161909103394, "step": 1881 }, { "epoch": 1.499900338847917, "grad_norm": 0.4539731297914826, "learning_rate": 5.421190559460616e-05, "loss": 0.5121893286705017, "step": 1882 }, { "epoch": 1.5006976280645805, "grad_norm": 0.4536942330378719, "learning_rate": 5.416813403156068e-05, "loss": 0.5047990083694458, "step": 1883 }, { "epoch": 1.5014949172812437, "grad_norm": 0.5403903530032707, "learning_rate": 5.412435925154993e-05, "loss": 0.6318309903144836, "step": 1884 }, { "epoch": 1.502292206497907, "grad_norm": 0.47254357734737595, "learning_rate": 5.4080581288359275e-05, "loss": 0.5608416199684143, "step": 1885 }, { "epoch": 1.5030894957145704, "grad_norm": 0.49294827647165074, "learning_rate": 5.4036800175776525e-05, "loss": 0.5622866153717041, "step": 1886 }, { "epoch": 1.5038867849312338, "grad_norm": 0.5216872113380826, "learning_rate": 5.399301594759197e-05, "loss": 0.5325763821601868, "step": 1887 }, { "epoch": 1.5046840741478973, "grad_norm": 0.4458995983530114, "learning_rate": 5.3949228637598235e-05, "loss": 0.4473879635334015, "step": 1888 }, { "epoch": 1.5054813633645605, "grad_norm": 0.44955034095899166, "learning_rate": 5.39054382795904e-05, "loss": 0.511492133140564, "step": 1889 }, { "epoch": 1.5062786525812237, "grad_norm": 0.46393341780945624, "learning_rate": 5.386164490736583e-05, "loss": 0.5278236269950867, "step": 1890 }, { "epoch": 1.5070759417978872, "grad_norm": 0.45572635995376715, "learning_rate": 5.381784855472424e-05, "loss": 0.46308138966560364, "step": 1891 }, { "epoch": 1.5078732310145506, "grad_norm": 0.499821938481772, "learning_rate": 5.377404925546766e-05, "loss": 0.5724700689315796, "step": 1892 }, { "epoch": 1.5086705202312138, "grad_norm": 0.5169741404545188, "learning_rate": 5.373024704340038e-05, "loss": 0.5428746938705444, "step": 1893 }, { "epoch": 1.509467809447877, "grad_norm": 0.49375430628880235, "learning_rate": 5.368644195232896e-05, "loss": 0.5592851638793945, "step": 1894 }, { "epoch": 1.5102650986645405, "grad_norm": 0.4595665075468746, "learning_rate": 5.364263401606213e-05, "loss": 0.47514888644218445, "step": 1895 }, { "epoch": 1.511062387881204, "grad_norm": 0.44843836317969776, "learning_rate": 5.359882326841087e-05, "loss": 0.524956226348877, "step": 1896 }, { "epoch": 1.5118596770978674, "grad_norm": 0.4488953832152159, "learning_rate": 5.35550097431883e-05, "loss": 0.46802884340286255, "step": 1897 }, { "epoch": 1.5126569663145306, "grad_norm": 0.5153511531544412, "learning_rate": 5.35111934742097e-05, "loss": 0.5619144439697266, "step": 1898 }, { "epoch": 1.5134542555311938, "grad_norm": 0.4360741395399156, "learning_rate": 5.3467374495292444e-05, "loss": 0.4369947612285614, "step": 1899 }, { "epoch": 1.5142515447478573, "grad_norm": 0.41894512385146254, "learning_rate": 5.342355284025605e-05, "loss": 0.42649590969085693, "step": 1900 }, { "epoch": 1.5150488339645207, "grad_norm": 0.5349925150132679, "learning_rate": 5.337972854292203e-05, "loss": 0.5842547416687012, "step": 1901 }, { "epoch": 1.515846123181184, "grad_norm": 0.47096714769876574, "learning_rate": 5.3335901637113986e-05, "loss": 0.6301301717758179, "step": 1902 }, { "epoch": 1.5166434123978472, "grad_norm": 0.46568654506445717, "learning_rate": 5.329207215665751e-05, "loss": 0.45424115657806396, "step": 1903 }, { "epoch": 1.5174407016145106, "grad_norm": 0.45225183027563237, "learning_rate": 5.324824013538019e-05, "loss": 0.5519524216651917, "step": 1904 }, { "epoch": 1.518237990831174, "grad_norm": 0.3779440344318418, "learning_rate": 5.32044056071116e-05, "loss": 0.4149254262447357, "step": 1905 }, { "epoch": 1.5190352800478375, "grad_norm": 0.4860868699160449, "learning_rate": 5.316056860568318e-05, "loss": 0.5237791538238525, "step": 1906 }, { "epoch": 1.5198325692645007, "grad_norm": 0.5050484522822531, "learning_rate": 5.311672916492836e-05, "loss": 0.5181390643119812, "step": 1907 }, { "epoch": 1.520629858481164, "grad_norm": 0.5169733891948587, "learning_rate": 5.3072887318682386e-05, "loss": 0.5822312831878662, "step": 1908 }, { "epoch": 1.5214271476978274, "grad_norm": 0.550190944431667, "learning_rate": 5.302904310078239e-05, "loss": 0.5187931060791016, "step": 1909 }, { "epoch": 1.5222244369144908, "grad_norm": 0.469222207331518, "learning_rate": 5.298519654506736e-05, "loss": 0.5583351850509644, "step": 1910 }, { "epoch": 1.523021726131154, "grad_norm": 0.4830299921265798, "learning_rate": 5.294134768537804e-05, "loss": 0.5129443407058716, "step": 1911 }, { "epoch": 1.5238190153478173, "grad_norm": 0.4824677179505062, "learning_rate": 5.289749655555698e-05, "loss": 0.4784897565841675, "step": 1912 }, { "epoch": 1.5246163045644807, "grad_norm": 0.457113941954699, "learning_rate": 5.2853643189448476e-05, "loss": 0.5194821953773499, "step": 1913 }, { "epoch": 1.5254135937811442, "grad_norm": 0.411273667464201, "learning_rate": 5.2809787620898544e-05, "loss": 0.40892088413238525, "step": 1914 }, { "epoch": 1.5262108829978076, "grad_norm": 0.5150512563668989, "learning_rate": 5.2765929883754904e-05, "loss": 0.5737799406051636, "step": 1915 }, { "epoch": 1.5270081722144708, "grad_norm": 0.468897684712428, "learning_rate": 5.272207001186698e-05, "loss": 0.5936537384986877, "step": 1916 }, { "epoch": 1.527805461431134, "grad_norm": 0.48265308819032826, "learning_rate": 5.2678208039085775e-05, "loss": 0.5485925078392029, "step": 1917 }, { "epoch": 1.5286027506477975, "grad_norm": 0.4207002595768308, "learning_rate": 5.263434399926398e-05, "loss": 0.45061054825782776, "step": 1918 }, { "epoch": 1.529400039864461, "grad_norm": 0.4671436768084123, "learning_rate": 5.2590477926255845e-05, "loss": 0.4886104166507721, "step": 1919 }, { "epoch": 1.5301973290811242, "grad_norm": 0.4556434670857662, "learning_rate": 5.2546609853917205e-05, "loss": 0.46587851643562317, "step": 1920 }, { "epoch": 1.5309946182977874, "grad_norm": 0.46567717183843677, "learning_rate": 5.250273981610543e-05, "loss": 0.5097188353538513, "step": 1921 }, { "epoch": 1.5317919075144508, "grad_norm": 0.43588169193306037, "learning_rate": 5.245886784667939e-05, "loss": 0.39122435450553894, "step": 1922 }, { "epoch": 1.5325891967311143, "grad_norm": 0.4150930583953998, "learning_rate": 5.241499397949951e-05, "loss": 0.3963327705860138, "step": 1923 }, { "epoch": 1.5333864859477777, "grad_norm": 0.4580730394976739, "learning_rate": 5.237111824842759e-05, "loss": 0.41999372839927673, "step": 1924 }, { "epoch": 1.534183775164441, "grad_norm": 0.42432440610194316, "learning_rate": 5.232724068732693e-05, "loss": 0.44481977820396423, "step": 1925 }, { "epoch": 1.5349810643811042, "grad_norm": 0.4833755539028183, "learning_rate": 5.228336133006223e-05, "loss": 0.5155543088912964, "step": 1926 }, { "epoch": 1.5357783535977676, "grad_norm": 0.4869812016975556, "learning_rate": 5.223948021049957e-05, "loss": 0.46267175674438477, "step": 1927 }, { "epoch": 1.536575642814431, "grad_norm": 0.5396125120078955, "learning_rate": 5.2195597362506375e-05, "loss": 0.6869015097618103, "step": 1928 }, { "epoch": 1.5373729320310943, "grad_norm": 0.4858758962236281, "learning_rate": 5.215171281995145e-05, "loss": 0.45937004685401917, "step": 1929 }, { "epoch": 1.5381702212477575, "grad_norm": 0.4741527102643884, "learning_rate": 5.2107826616704855e-05, "loss": 0.3841989040374756, "step": 1930 }, { "epoch": 1.538967510464421, "grad_norm": 0.4568592476366963, "learning_rate": 5.206393878663798e-05, "loss": 0.4753262996673584, "step": 1931 }, { "epoch": 1.5397647996810844, "grad_norm": 0.5036569617926753, "learning_rate": 5.202004936362344e-05, "loss": 0.4916149973869324, "step": 1932 }, { "epoch": 1.5405620888977478, "grad_norm": 0.4410973208750554, "learning_rate": 5.197615838153506e-05, "loss": 0.4211105704307556, "step": 1933 }, { "epoch": 1.541359378114411, "grad_norm": 0.48247889652413284, "learning_rate": 5.193226587424792e-05, "loss": 0.5079454183578491, "step": 1934 }, { "epoch": 1.5421566673310743, "grad_norm": 0.5172998546507195, "learning_rate": 5.188837187563823e-05, "loss": 0.4749060869216919, "step": 1935 }, { "epoch": 1.5429539565477377, "grad_norm": 0.49316110444089306, "learning_rate": 5.184447641958339e-05, "loss": 0.42316538095474243, "step": 1936 }, { "epoch": 1.5437512457644011, "grad_norm": 0.4877486882911781, "learning_rate": 5.180057953996191e-05, "loss": 0.5776586532592773, "step": 1937 }, { "epoch": 1.5445485349810644, "grad_norm": 0.43185908110952154, "learning_rate": 5.175668127065337e-05, "loss": 0.43600836396217346, "step": 1938 }, { "epoch": 1.5453458241977276, "grad_norm": 0.4792090577320377, "learning_rate": 5.1712781645538485e-05, "loss": 0.49186408519744873, "step": 1939 }, { "epoch": 1.546143113414391, "grad_norm": 0.4994533637553548, "learning_rate": 5.166888069849896e-05, "loss": 0.5745625495910645, "step": 1940 }, { "epoch": 1.5469404026310545, "grad_norm": 0.479905123335369, "learning_rate": 5.162497846341753e-05, "loss": 0.4691859483718872, "step": 1941 }, { "epoch": 1.547737691847718, "grad_norm": 0.5132725995710397, "learning_rate": 5.158107497417795e-05, "loss": 0.5062724351882935, "step": 1942 }, { "epoch": 1.5485349810643811, "grad_norm": 0.4318368147549285, "learning_rate": 5.1537170264664914e-05, "loss": 0.36414822936058044, "step": 1943 }, { "epoch": 1.5493322702810444, "grad_norm": 0.4459100194313837, "learning_rate": 5.149326436876408e-05, "loss": 0.5313982963562012, "step": 1944 }, { "epoch": 1.5501295594977078, "grad_norm": 0.5042340457540169, "learning_rate": 5.144935732036202e-05, "loss": 0.6017592549324036, "step": 1945 }, { "epoch": 1.5509268487143713, "grad_norm": 0.5195173064825229, "learning_rate": 5.1405449153346175e-05, "loss": 0.5859835147857666, "step": 1946 }, { "epoch": 1.5517241379310345, "grad_norm": 0.4846240151143562, "learning_rate": 5.1361539901604874e-05, "loss": 0.5516513586044312, "step": 1947 }, { "epoch": 1.5525214271476977, "grad_norm": 0.46789265220690085, "learning_rate": 5.131762959902724e-05, "loss": 0.4865471124649048, "step": 1948 }, { "epoch": 1.5533187163643611, "grad_norm": 0.46837843716793115, "learning_rate": 5.127371827950328e-05, "loss": 0.5139905214309692, "step": 1949 }, { "epoch": 1.5541160055810246, "grad_norm": 0.4000293517321295, "learning_rate": 5.122980597692372e-05, "loss": 0.4526653587818146, "step": 1950 }, { "epoch": 1.5549132947976878, "grad_norm": 0.5232902771029695, "learning_rate": 5.1185892725180074e-05, "loss": 0.5537616014480591, "step": 1951 }, { "epoch": 1.5557105840143512, "grad_norm": 0.5049194150663447, "learning_rate": 5.1141978558164584e-05, "loss": 0.54034024477005, "step": 1952 }, { "epoch": 1.5565078732310145, "grad_norm": 0.47951704217746155, "learning_rate": 5.109806350977021e-05, "loss": 0.6178165674209595, "step": 1953 }, { "epoch": 1.557305162447678, "grad_norm": 0.49177804237033607, "learning_rate": 5.1054147613890555e-05, "loss": 0.5972080230712891, "step": 1954 }, { "epoch": 1.5581024516643414, "grad_norm": 0.4935934114561318, "learning_rate": 5.101023090441993e-05, "loss": 0.5773733258247375, "step": 1955 }, { "epoch": 1.5588997408810046, "grad_norm": 0.5195716803028727, "learning_rate": 5.096631341525321e-05, "loss": 0.5259584784507751, "step": 1956 }, { "epoch": 1.5596970300976678, "grad_norm": 0.42845501602221964, "learning_rate": 5.0922395180285954e-05, "loss": 0.45627620816230774, "step": 1957 }, { "epoch": 1.5604943193143312, "grad_norm": 0.4800037370259321, "learning_rate": 5.0878476233414205e-05, "loss": 0.5306329131126404, "step": 1958 }, { "epoch": 1.5612916085309947, "grad_norm": 0.42193369443315276, "learning_rate": 5.0834556608534635e-05, "loss": 0.48554712533950806, "step": 1959 }, { "epoch": 1.562088897747658, "grad_norm": 0.4725768535132972, "learning_rate": 5.0790636339544385e-05, "loss": 0.4865756034851074, "step": 1960 }, { "epoch": 1.5628861869643214, "grad_norm": 0.4418747082945728, "learning_rate": 5.074671546034111e-05, "loss": 0.5258670449256897, "step": 1961 }, { "epoch": 1.5636834761809846, "grad_norm": 0.5065606394420668, "learning_rate": 5.0702794004822953e-05, "loss": 0.5104618072509766, "step": 1962 }, { "epoch": 1.564480765397648, "grad_norm": 0.45998700121565766, "learning_rate": 5.065887200688847e-05, "loss": 0.45644310116767883, "step": 1963 }, { "epoch": 1.5652780546143115, "grad_norm": 0.4423584094842723, "learning_rate": 5.0614949500436657e-05, "loss": 0.4271678626537323, "step": 1964 }, { "epoch": 1.5660753438309747, "grad_norm": 0.43695192763953117, "learning_rate": 5.057102651936692e-05, "loss": 0.49452340602874756, "step": 1965 }, { "epoch": 1.566872633047638, "grad_norm": 0.4890728802229282, "learning_rate": 5.052710309757899e-05, "loss": 0.5512958765029907, "step": 1966 }, { "epoch": 1.5676699222643014, "grad_norm": 0.5251741780656394, "learning_rate": 5.048317926897295e-05, "loss": 0.5286526083946228, "step": 1967 }, { "epoch": 1.5684672114809648, "grad_norm": 0.37724725732648046, "learning_rate": 5.043925506744922e-05, "loss": 0.3932845890522003, "step": 1968 }, { "epoch": 1.569264500697628, "grad_norm": 0.4288466058290961, "learning_rate": 5.0395330526908504e-05, "loss": 0.42416852712631226, "step": 1969 }, { "epoch": 1.5700617899142915, "grad_norm": 0.5277116812827145, "learning_rate": 5.035140568125172e-05, "loss": 0.5684529542922974, "step": 1970 }, { "epoch": 1.5708590791309547, "grad_norm": 0.4258547416014673, "learning_rate": 5.03074805643801e-05, "loss": 0.37101757526397705, "step": 1971 }, { "epoch": 1.5716563683476181, "grad_norm": 0.45546559575882917, "learning_rate": 5.0263555210195e-05, "loss": 0.5087990760803223, "step": 1972 }, { "epoch": 1.5724536575642816, "grad_norm": 0.5906976019898883, "learning_rate": 5.021962965259804e-05, "loss": 0.5135700702667236, "step": 1973 }, { "epoch": 1.5732509467809448, "grad_norm": 0.47790030917263565, "learning_rate": 5.0175703925490936e-05, "loss": 0.5741754174232483, "step": 1974 }, { "epoch": 1.574048235997608, "grad_norm": 0.47311352329478, "learning_rate": 5.0131778062775583e-05, "loss": 0.5384047627449036, "step": 1975 }, { "epoch": 1.5748455252142715, "grad_norm": 0.4634888946716118, "learning_rate": 5.008785209835393e-05, "loss": 0.5336609482765198, "step": 1976 }, { "epoch": 1.575642814430935, "grad_norm": 0.4342499495552089, "learning_rate": 5.004392606612802e-05, "loss": 0.4349193871021271, "step": 1977 }, { "epoch": 1.5764401036475981, "grad_norm": 0.43303339679921454, "learning_rate": 5e-05, "loss": 0.41276389360427856, "step": 1978 }, { "epoch": 1.5772373928642613, "grad_norm": 0.48283674045005437, "learning_rate": 4.9956073933871985e-05, "loss": 0.5224923491477966, "step": 1979 }, { "epoch": 1.5780346820809248, "grad_norm": 0.5017147348314628, "learning_rate": 4.991214790164608e-05, "loss": 0.527434766292572, "step": 1980 }, { "epoch": 1.5788319712975882, "grad_norm": 0.4011764620401812, "learning_rate": 4.986822193722443e-05, "loss": 0.35419392585754395, "step": 1981 }, { "epoch": 1.5796292605142517, "grad_norm": 0.5108233251306306, "learning_rate": 4.982429607450907e-05, "loss": 0.5202866792678833, "step": 1982 }, { "epoch": 1.580426549730915, "grad_norm": 0.4729689104030786, "learning_rate": 4.9780370347401964e-05, "loss": 0.44605469703674316, "step": 1983 }, { "epoch": 1.5812238389475781, "grad_norm": 0.4756485204585003, "learning_rate": 4.9736444789805e-05, "loss": 0.4956095218658447, "step": 1984 }, { "epoch": 1.5820211281642416, "grad_norm": 0.4258891799498581, "learning_rate": 4.969251943561991e-05, "loss": 0.45818355679512024, "step": 1985 }, { "epoch": 1.582818417380905, "grad_norm": 0.46301891696041814, "learning_rate": 4.9648594318748284e-05, "loss": 0.4768552780151367, "step": 1986 }, { "epoch": 1.5836157065975682, "grad_norm": 0.43860857270457404, "learning_rate": 4.9604669473091514e-05, "loss": 0.4250813126564026, "step": 1987 }, { "epoch": 1.5844129958142315, "grad_norm": 0.40283142535843414, "learning_rate": 4.956074493255078e-05, "loss": 0.40152817964553833, "step": 1988 }, { "epoch": 1.585210285030895, "grad_norm": 0.44098271842127024, "learning_rate": 4.951682073102705e-05, "loss": 0.4065626859664917, "step": 1989 }, { "epoch": 1.5860075742475583, "grad_norm": 0.5076978904487688, "learning_rate": 4.947289690242102e-05, "loss": 0.5281823873519897, "step": 1990 }, { "epoch": 1.5868048634642218, "grad_norm": 0.396887225255597, "learning_rate": 4.942897348063308e-05, "loss": 0.39981183409690857, "step": 1991 }, { "epoch": 1.587602152680885, "grad_norm": 0.4716729746090661, "learning_rate": 4.938505049956335e-05, "loss": 0.4479895234107971, "step": 1992 }, { "epoch": 1.5883994418975482, "grad_norm": 0.43046591424718234, "learning_rate": 4.934112799311155e-05, "loss": 0.48249924182891846, "step": 1993 }, { "epoch": 1.5891967311142117, "grad_norm": 0.515882833451454, "learning_rate": 4.929720599517708e-05, "loss": 0.5385246276855469, "step": 1994 }, { "epoch": 1.5899940203308751, "grad_norm": 0.45328664677624675, "learning_rate": 4.925328453965891e-05, "loss": 0.4954071640968323, "step": 1995 }, { "epoch": 1.5907913095475383, "grad_norm": 0.49860916369253394, "learning_rate": 4.920936366045564e-05, "loss": 0.48832249641418457, "step": 1996 }, { "epoch": 1.5915885987642016, "grad_norm": 0.4320851490894965, "learning_rate": 4.916544339146539e-05, "loss": 0.3925401270389557, "step": 1997 }, { "epoch": 1.592385887980865, "grad_norm": 0.4950065003889634, "learning_rate": 4.91215237665858e-05, "loss": 0.48259782791137695, "step": 1998 }, { "epoch": 1.5931831771975284, "grad_norm": 0.48144656012333864, "learning_rate": 4.9077604819714065e-05, "loss": 0.48803865909576416, "step": 1999 }, { "epoch": 1.5939804664141919, "grad_norm": 0.48425460252466823, "learning_rate": 4.9033686584746796e-05, "loss": 0.492999404668808, "step": 2000 }, { "epoch": 1.594777755630855, "grad_norm": 0.4340276919791215, "learning_rate": 4.8989769095580084e-05, "loss": 0.4609432816505432, "step": 2001 }, { "epoch": 1.5955750448475183, "grad_norm": 0.47358131394149594, "learning_rate": 4.8945852386109456e-05, "loss": 0.5191185474395752, "step": 2002 }, { "epoch": 1.5963723340641818, "grad_norm": 0.4465681198558669, "learning_rate": 4.89019364902298e-05, "loss": 0.39155229926109314, "step": 2003 }, { "epoch": 1.5971696232808452, "grad_norm": 0.4809729136881667, "learning_rate": 4.885802144183542e-05, "loss": 0.40640103816986084, "step": 2004 }, { "epoch": 1.5979669124975084, "grad_norm": 0.4804870333222526, "learning_rate": 4.8814107274819945e-05, "loss": 0.5120691657066345, "step": 2005 }, { "epoch": 1.5987642017141717, "grad_norm": 0.47762947897093294, "learning_rate": 4.877019402307629e-05, "loss": 0.5474914312362671, "step": 2006 }, { "epoch": 1.599561490930835, "grad_norm": 0.47649527043327833, "learning_rate": 4.872628172049674e-05, "loss": 0.5116087198257446, "step": 2007 }, { "epoch": 1.6003587801474985, "grad_norm": 0.45857555751094387, "learning_rate": 4.868237040097277e-05, "loss": 0.5111743211746216, "step": 2008 }, { "epoch": 1.601156069364162, "grad_norm": 0.49439089906731126, "learning_rate": 4.8638460098395144e-05, "loss": 0.5007218718528748, "step": 2009 }, { "epoch": 1.6019533585808252, "grad_norm": 0.39675866087166317, "learning_rate": 4.8594550846653836e-05, "loss": 0.382869690656662, "step": 2010 }, { "epoch": 1.6027506477974884, "grad_norm": 0.48186415170391184, "learning_rate": 4.8550642679637984e-05, "loss": 0.4754393994808197, "step": 2011 }, { "epoch": 1.6035479370141519, "grad_norm": 0.47223930471156156, "learning_rate": 4.850673563123592e-05, "loss": 0.5075907111167908, "step": 2012 }, { "epoch": 1.6043452262308153, "grad_norm": 0.4234284745111132, "learning_rate": 4.846282973533509e-05, "loss": 0.4746629595756531, "step": 2013 }, { "epoch": 1.6051425154474785, "grad_norm": 0.4821376711949653, "learning_rate": 4.841892502582206e-05, "loss": 0.5364595651626587, "step": 2014 }, { "epoch": 1.6059398046641418, "grad_norm": 0.4362565944969237, "learning_rate": 4.837502153658248e-05, "loss": 0.45682311058044434, "step": 2015 }, { "epoch": 1.6067370938808052, "grad_norm": 0.49216295150447714, "learning_rate": 4.833111930150105e-05, "loss": 0.4794192910194397, "step": 2016 }, { "epoch": 1.6075343830974687, "grad_norm": 0.48070618550806576, "learning_rate": 4.828721835446151e-05, "loss": 0.48840978741645813, "step": 2017 }, { "epoch": 1.608331672314132, "grad_norm": 0.4870235308345208, "learning_rate": 4.824331872934664e-05, "loss": 0.5210964679718018, "step": 2018 }, { "epoch": 1.6091289615307953, "grad_norm": 0.509063712045922, "learning_rate": 4.8199420460038115e-05, "loss": 0.4979393482208252, "step": 2019 }, { "epoch": 1.6099262507474585, "grad_norm": 0.48729176932630835, "learning_rate": 4.815552358041663e-05, "loss": 0.5492841005325317, "step": 2020 }, { "epoch": 1.610723539964122, "grad_norm": 0.5013296463407043, "learning_rate": 4.811162812436178e-05, "loss": 0.5798001289367676, "step": 2021 }, { "epoch": 1.6115208291807854, "grad_norm": 0.439818171353275, "learning_rate": 4.8067734125752104e-05, "loss": 0.4220429062843323, "step": 2022 }, { "epoch": 1.6123181183974487, "grad_norm": 0.5245642716391623, "learning_rate": 4.802384161846497e-05, "loss": 0.6282564401626587, "step": 2023 }, { "epoch": 1.6131154076141119, "grad_norm": 0.5066468201812554, "learning_rate": 4.797995063637658e-05, "loss": 0.5015411376953125, "step": 2024 }, { "epoch": 1.6139126968307753, "grad_norm": 0.3801214468212537, "learning_rate": 4.793606121336203e-05, "loss": 0.3647180497646332, "step": 2025 }, { "epoch": 1.6147099860474388, "grad_norm": 0.4872968842389571, "learning_rate": 4.789217338329515e-05, "loss": 0.6326854825019836, "step": 2026 }, { "epoch": 1.6155072752641022, "grad_norm": 0.4588000847487999, "learning_rate": 4.7848287180048554e-05, "loss": 0.4585835039615631, "step": 2027 }, { "epoch": 1.6163045644807654, "grad_norm": 0.512249485556306, "learning_rate": 4.780440263749364e-05, "loss": 0.49125123023986816, "step": 2028 }, { "epoch": 1.6171018536974286, "grad_norm": 0.4524422097386433, "learning_rate": 4.7760519789500447e-05, "loss": 0.49295732378959656, "step": 2029 }, { "epoch": 1.617899142914092, "grad_norm": 0.45596056833778065, "learning_rate": 4.7716638669937786e-05, "loss": 0.5116190910339355, "step": 2030 }, { "epoch": 1.6186964321307555, "grad_norm": 0.6237317764454667, "learning_rate": 4.767275931267309e-05, "loss": 0.549274206161499, "step": 2031 }, { "epoch": 1.6194937213474188, "grad_norm": 0.469448237568274, "learning_rate": 4.7628881751572424e-05, "loss": 0.5588555335998535, "step": 2032 }, { "epoch": 1.620291010564082, "grad_norm": 0.48025655426700775, "learning_rate": 4.7585006020500504e-05, "loss": 0.549206018447876, "step": 2033 }, { "epoch": 1.6210882997807454, "grad_norm": 0.43741627739523153, "learning_rate": 4.754113215332062e-05, "loss": 0.47213178873062134, "step": 2034 }, { "epoch": 1.6218855889974089, "grad_norm": 0.415168254484229, "learning_rate": 4.749726018389458e-05, "loss": 0.4442673325538635, "step": 2035 }, { "epoch": 1.6226828782140723, "grad_norm": 0.4536649926858789, "learning_rate": 4.745339014608281e-05, "loss": 0.5580207109451294, "step": 2036 }, { "epoch": 1.6234801674307355, "grad_norm": 0.44102063905397754, "learning_rate": 4.740952207374416e-05, "loss": 0.5259444713592529, "step": 2037 }, { "epoch": 1.6242774566473988, "grad_norm": 0.44835704304908824, "learning_rate": 4.736565600073602e-05, "loss": 0.3587202727794647, "step": 2038 }, { "epoch": 1.6250747458640622, "grad_norm": 0.4990956364940413, "learning_rate": 4.732179196091423e-05, "loss": 0.5775196552276611, "step": 2039 }, { "epoch": 1.6258720350807256, "grad_norm": 0.39021784321746295, "learning_rate": 4.7277929988133027e-05, "loss": 0.33443501591682434, "step": 2040 }, { "epoch": 1.6266693242973889, "grad_norm": 0.4703427748036003, "learning_rate": 4.723407011624509e-05, "loss": 0.595404326915741, "step": 2041 }, { "epoch": 1.627466613514052, "grad_norm": 0.4614581394986319, "learning_rate": 4.719021237910146e-05, "loss": 0.46221867203712463, "step": 2042 }, { "epoch": 1.6282639027307155, "grad_norm": 0.4887343893175338, "learning_rate": 4.714635681055153e-05, "loss": 0.43335452675819397, "step": 2043 }, { "epoch": 1.629061191947379, "grad_norm": 0.4622837876535961, "learning_rate": 4.7102503444443046e-05, "loss": 0.44484180212020874, "step": 2044 }, { "epoch": 1.6298584811640424, "grad_norm": 0.47577110546168194, "learning_rate": 4.705865231462198e-05, "loss": 0.5565328001976013, "step": 2045 }, { "epoch": 1.6306557703807056, "grad_norm": 0.4589010969837316, "learning_rate": 4.701480345493266e-05, "loss": 0.4704294800758362, "step": 2046 }, { "epoch": 1.6314530595973689, "grad_norm": 0.5653724447526145, "learning_rate": 4.697095689921762e-05, "loss": 0.5969666838645935, "step": 2047 }, { "epoch": 1.6322503488140323, "grad_norm": 0.5773443028998292, "learning_rate": 4.692711268131764e-05, "loss": 0.5804845094680786, "step": 2048 }, { "epoch": 1.6330476380306957, "grad_norm": 0.4590166831179682, "learning_rate": 4.6883270835071666e-05, "loss": 0.4709867835044861, "step": 2049 }, { "epoch": 1.633844927247359, "grad_norm": 0.48568964840047774, "learning_rate": 4.683943139431683e-05, "loss": 0.44637900590896606, "step": 2050 }, { "epoch": 1.6346422164640222, "grad_norm": 0.5377568725470214, "learning_rate": 4.679559439288842e-05, "loss": 0.5323460102081299, "step": 2051 }, { "epoch": 1.6354395056806856, "grad_norm": 0.45956219529418946, "learning_rate": 4.675175986461982e-05, "loss": 0.45339539647102356, "step": 2052 }, { "epoch": 1.636236794897349, "grad_norm": 0.4700562138137359, "learning_rate": 4.67079278433425e-05, "loss": 0.4826333820819855, "step": 2053 }, { "epoch": 1.6370340841140125, "grad_norm": 0.5112482606487925, "learning_rate": 4.666409836288603e-05, "loss": 0.5562089085578918, "step": 2054 }, { "epoch": 1.6378313733306757, "grad_norm": 0.5040001684230047, "learning_rate": 4.662027145707799e-05, "loss": 0.5592242479324341, "step": 2055 }, { "epoch": 1.638628662547339, "grad_norm": 0.537734267543141, "learning_rate": 4.657644715974396e-05, "loss": 0.6538245677947998, "step": 2056 }, { "epoch": 1.6394259517640024, "grad_norm": 0.4840856802940744, "learning_rate": 4.653262550470756e-05, "loss": 0.5192695260047913, "step": 2057 }, { "epoch": 1.6402232409806659, "grad_norm": 0.4508882588570121, "learning_rate": 4.6488806525790306e-05, "loss": 0.42996418476104736, "step": 2058 }, { "epoch": 1.641020530197329, "grad_norm": 0.4323000949625412, "learning_rate": 4.644499025681171e-05, "loss": 0.4406481683254242, "step": 2059 }, { "epoch": 1.6418178194139923, "grad_norm": 0.38740082964905237, "learning_rate": 4.6401176731589146e-05, "loss": 0.3789903521537781, "step": 2060 }, { "epoch": 1.6426151086306557, "grad_norm": 0.4650344728538094, "learning_rate": 4.6357365983937875e-05, "loss": 0.4646250009536743, "step": 2061 }, { "epoch": 1.6434123978473192, "grad_norm": 0.4249922335365216, "learning_rate": 4.631355804767105e-05, "loss": 0.47908395528793335, "step": 2062 }, { "epoch": 1.6442096870639824, "grad_norm": 0.49722267640755125, "learning_rate": 4.6269752956599624e-05, "loss": 0.5476301908493042, "step": 2063 }, { "epoch": 1.6450069762806458, "grad_norm": 0.4705744267424959, "learning_rate": 4.6225950744532336e-05, "loss": 0.5386606454849243, "step": 2064 }, { "epoch": 1.645804265497309, "grad_norm": 0.4849524210699648, "learning_rate": 4.6182151445275767e-05, "loss": 0.5252748131752014, "step": 2065 }, { "epoch": 1.6466015547139725, "grad_norm": 0.4903342110389741, "learning_rate": 4.6138355092634175e-05, "loss": 0.6520310640335083, "step": 2066 }, { "epoch": 1.647398843930636, "grad_norm": 0.49719005077968603, "learning_rate": 4.60945617204096e-05, "loss": 0.6191816329956055, "step": 2067 }, { "epoch": 1.6481961331472992, "grad_norm": 0.503063956370581, "learning_rate": 4.605077136240176e-05, "loss": 0.4872092604637146, "step": 2068 }, { "epoch": 1.6489934223639624, "grad_norm": 0.4378862649120015, "learning_rate": 4.6006984052408034e-05, "loss": 0.47652941942214966, "step": 2069 }, { "epoch": 1.6497907115806258, "grad_norm": 0.46688788447232465, "learning_rate": 4.596319982422347e-05, "loss": 0.5951347947120667, "step": 2070 }, { "epoch": 1.6505880007972893, "grad_norm": 0.40004183391263015, "learning_rate": 4.591941871164075e-05, "loss": 0.35688215494155884, "step": 2071 }, { "epoch": 1.6513852900139525, "grad_norm": 0.4892090697744779, "learning_rate": 4.58756407484501e-05, "loss": 0.5785177946090698, "step": 2072 }, { "epoch": 1.652182579230616, "grad_norm": 0.5145100665278141, "learning_rate": 4.583186596843933e-05, "loss": 0.6220821142196655, "step": 2073 }, { "epoch": 1.6529798684472792, "grad_norm": 0.4672869473171125, "learning_rate": 4.578809440539386e-05, "loss": 0.603569507598877, "step": 2074 }, { "epoch": 1.6537771576639426, "grad_norm": 0.4625575203600626, "learning_rate": 4.5744326093096544e-05, "loss": 0.5474835634231567, "step": 2075 }, { "epoch": 1.654574446880606, "grad_norm": 0.4440306983229596, "learning_rate": 4.570056106532776e-05, "loss": 0.39051929116249084, "step": 2076 }, { "epoch": 1.6553717360972693, "grad_norm": 0.42851023248831827, "learning_rate": 4.565679935586537e-05, "loss": 0.43851399421691895, "step": 2077 }, { "epoch": 1.6561690253139325, "grad_norm": 0.32883349025870917, "learning_rate": 4.561304099848464e-05, "loss": 0.2865898013114929, "step": 2078 }, { "epoch": 1.656966314530596, "grad_norm": 0.3971906888575605, "learning_rate": 4.556928602695826e-05, "loss": 0.3890320658683777, "step": 2079 }, { "epoch": 1.6577636037472594, "grad_norm": 0.45526756474919483, "learning_rate": 4.552553447505632e-05, "loss": 0.5292671918869019, "step": 2080 }, { "epoch": 1.6585608929639226, "grad_norm": 0.4205910928849849, "learning_rate": 4.5481786376546264e-05, "loss": 0.40974271297454834, "step": 2081 }, { "epoch": 1.659358182180586, "grad_norm": 0.5319490710899668, "learning_rate": 4.5438041765192854e-05, "loss": 0.48440641164779663, "step": 2082 }, { "epoch": 1.6601554713972493, "grad_norm": 0.5026251985969673, "learning_rate": 4.5394300674758196e-05, "loss": 0.4887389540672302, "step": 2083 }, { "epoch": 1.6609527606139127, "grad_norm": 0.58000772630619, "learning_rate": 4.5350563139001635e-05, "loss": 0.46783992648124695, "step": 2084 }, { "epoch": 1.6617500498305762, "grad_norm": 0.43501554775289236, "learning_rate": 4.530682919167982e-05, "loss": 0.4649938642978668, "step": 2085 }, { "epoch": 1.6625473390472394, "grad_norm": 0.46479313003955597, "learning_rate": 4.5263098866546586e-05, "loss": 0.4659399390220642, "step": 2086 }, { "epoch": 1.6633446282639026, "grad_norm": 0.46294005580584746, "learning_rate": 4.521937219735299e-05, "loss": 0.5309715270996094, "step": 2087 }, { "epoch": 1.664141917480566, "grad_norm": 0.49983535930561024, "learning_rate": 4.51756492178473e-05, "loss": 0.5383461713790894, "step": 2088 }, { "epoch": 1.6649392066972295, "grad_norm": 0.4995891403518647, "learning_rate": 4.513192996177487e-05, "loss": 0.5565457344055176, "step": 2089 }, { "epoch": 1.6657364959138927, "grad_norm": 0.4664262400629205, "learning_rate": 4.508821446287823e-05, "loss": 0.45715904235839844, "step": 2090 }, { "epoch": 1.666533785130556, "grad_norm": 0.4897180514940944, "learning_rate": 4.504450275489699e-05, "loss": 0.46482032537460327, "step": 2091 }, { "epoch": 1.6673310743472194, "grad_norm": 0.4713785003721443, "learning_rate": 4.5000794871567854e-05, "loss": 0.4811255633831024, "step": 2092 }, { "epoch": 1.6681283635638828, "grad_norm": 0.5500245874995927, "learning_rate": 4.495709084662454e-05, "loss": 0.5653396248817444, "step": 2093 }, { "epoch": 1.6689256527805463, "grad_norm": 0.4658603778196583, "learning_rate": 4.491339071379783e-05, "loss": 0.4564715623855591, "step": 2094 }, { "epoch": 1.6697229419972095, "grad_norm": 0.5032154408465277, "learning_rate": 4.486969450681546e-05, "loss": 0.5712124705314636, "step": 2095 }, { "epoch": 1.6705202312138727, "grad_norm": 0.42550483932171934, "learning_rate": 4.4826002259402186e-05, "loss": 0.38866591453552246, "step": 2096 }, { "epoch": 1.6713175204305362, "grad_norm": 0.47944253640307843, "learning_rate": 4.478231400527966e-05, "loss": 0.537717342376709, "step": 2097 }, { "epoch": 1.6721148096471996, "grad_norm": 0.49528086854802267, "learning_rate": 4.473862977816647e-05, "loss": 0.5233160257339478, "step": 2098 }, { "epoch": 1.6729120988638628, "grad_norm": 0.5145677198544815, "learning_rate": 4.469494961177809e-05, "loss": 0.5398626923561096, "step": 2099 }, { "epoch": 1.673709388080526, "grad_norm": 0.5362129651092524, "learning_rate": 4.465127353982687e-05, "loss": 0.5386818647384644, "step": 2100 }, { "epoch": 1.6745066772971895, "grad_norm": 0.45410405354203004, "learning_rate": 4.4607601596021996e-05, "loss": 0.4810185730457306, "step": 2101 }, { "epoch": 1.675303966513853, "grad_norm": 0.436188850434176, "learning_rate": 4.4563933814069466e-05, "loss": 0.5086308717727661, "step": 2102 }, { "epoch": 1.6761012557305164, "grad_norm": 0.48882639578710474, "learning_rate": 4.4520270227672095e-05, "loss": 0.5173102021217346, "step": 2103 }, { "epoch": 1.6768985449471796, "grad_norm": 0.4329474639228526, "learning_rate": 4.4476610870529405e-05, "loss": 0.4798124432563782, "step": 2104 }, { "epoch": 1.6776958341638428, "grad_norm": 0.5114417163196959, "learning_rate": 4.443295577633768e-05, "loss": 0.4927440285682678, "step": 2105 }, { "epoch": 1.6784931233805063, "grad_norm": 0.4761188255125498, "learning_rate": 4.4389304978789935e-05, "loss": 0.48580288887023926, "step": 2106 }, { "epoch": 1.6792904125971697, "grad_norm": 0.4709365152745899, "learning_rate": 4.434565851157584e-05, "loss": 0.550413966178894, "step": 2107 }, { "epoch": 1.680087701813833, "grad_norm": 0.4499646825821384, "learning_rate": 4.430201640838172e-05, "loss": 0.5176081657409668, "step": 2108 }, { "epoch": 1.6808849910304962, "grad_norm": 0.48816882224067154, "learning_rate": 4.425837870289057e-05, "loss": 0.5822881460189819, "step": 2109 }, { "epoch": 1.6816822802471596, "grad_norm": 0.5351099638031886, "learning_rate": 4.421474542878195e-05, "loss": 0.5557562112808228, "step": 2110 }, { "epoch": 1.682479569463823, "grad_norm": 0.5040359789612221, "learning_rate": 4.417111661973199e-05, "loss": 0.5457931756973267, "step": 2111 }, { "epoch": 1.6832768586804865, "grad_norm": 0.4337164688490284, "learning_rate": 4.412749230941346e-05, "loss": 0.4522227346897125, "step": 2112 }, { "epoch": 1.6840741478971497, "grad_norm": 0.4553151674659458, "learning_rate": 4.408387253149553e-05, "loss": 0.49258559942245483, "step": 2113 }, { "epoch": 1.684871437113813, "grad_norm": 0.47793241544428017, "learning_rate": 4.4040257319644e-05, "loss": 0.5514311194419861, "step": 2114 }, { "epoch": 1.6856687263304764, "grad_norm": 0.4389438173863725, "learning_rate": 4.399664670752104e-05, "loss": 0.5020496845245361, "step": 2115 }, { "epoch": 1.6864660155471398, "grad_norm": 0.46922983539253194, "learning_rate": 4.3953040728785325e-05, "loss": 0.5012001395225525, "step": 2116 }, { "epoch": 1.687263304763803, "grad_norm": 0.5014239793413763, "learning_rate": 4.390943941709196e-05, "loss": 0.5087147355079651, "step": 2117 }, { "epoch": 1.6880605939804663, "grad_norm": 0.44787097401845605, "learning_rate": 4.3865842806092414e-05, "loss": 0.4401724338531494, "step": 2118 }, { "epoch": 1.6888578831971297, "grad_norm": 0.49485083868114854, "learning_rate": 4.382225092943455e-05, "loss": 0.5171929001808167, "step": 2119 }, { "epoch": 1.6896551724137931, "grad_norm": 0.4932733785569558, "learning_rate": 4.377866382076257e-05, "loss": 0.5787883996963501, "step": 2120 }, { "epoch": 1.6904524616304566, "grad_norm": 0.5674071773247014, "learning_rate": 4.3735081513717e-05, "loss": 0.5874959230422974, "step": 2121 }, { "epoch": 1.6912497508471198, "grad_norm": 0.6609020816414256, "learning_rate": 4.3691504041934674e-05, "loss": 0.456844687461853, "step": 2122 }, { "epoch": 1.692047040063783, "grad_norm": 0.40335332847845945, "learning_rate": 4.364793143904867e-05, "loss": 0.4041115939617157, "step": 2123 }, { "epoch": 1.6928443292804465, "grad_norm": 0.42265086208818237, "learning_rate": 4.3604363738688296e-05, "loss": 0.40679967403411865, "step": 2124 }, { "epoch": 1.69364161849711, "grad_norm": 0.47031721502476626, "learning_rate": 4.356080097447912e-05, "loss": 0.44752198457717896, "step": 2125 }, { "epoch": 1.6944389077137731, "grad_norm": 0.44925395748509656, "learning_rate": 4.351724318004285e-05, "loss": 0.4495357573032379, "step": 2126 }, { "epoch": 1.6952361969304364, "grad_norm": 0.538607240567144, "learning_rate": 4.347369038899744e-05, "loss": 0.6120071411132812, "step": 2127 }, { "epoch": 1.6960334861470998, "grad_norm": 0.43215245465078406, "learning_rate": 4.343014263495686e-05, "loss": 0.4525986611843109, "step": 2128 }, { "epoch": 1.6968307753637633, "grad_norm": 0.40459865994169936, "learning_rate": 4.3386599951531313e-05, "loss": 0.4348326325416565, "step": 2129 }, { "epoch": 1.6976280645804267, "grad_norm": 0.43327528676415583, "learning_rate": 4.3343062372327025e-05, "loss": 0.41013672947883606, "step": 2130 }, { "epoch": 1.69842535379709, "grad_norm": 0.4943606209941072, "learning_rate": 4.329952993094627e-05, "loss": 0.4972572922706604, "step": 2131 }, { "epoch": 1.6992226430137531, "grad_norm": 0.46085038834631314, "learning_rate": 4.3256002660987405e-05, "loss": 0.45110106468200684, "step": 2132 }, { "epoch": 1.7000199322304166, "grad_norm": 0.49133010197079396, "learning_rate": 4.321248059604477e-05, "loss": 0.5405274629592896, "step": 2133 }, { "epoch": 1.70081722144708, "grad_norm": 0.5013870275818467, "learning_rate": 4.316896376970866e-05, "loss": 0.5338960886001587, "step": 2134 }, { "epoch": 1.7016145106637433, "grad_norm": 0.4045582809176958, "learning_rate": 4.31254522155654e-05, "loss": 0.34784311056137085, "step": 2135 }, { "epoch": 1.7024117998804065, "grad_norm": 0.42064021910978827, "learning_rate": 4.3081945967197176e-05, "loss": 0.40068280696868896, "step": 2136 }, { "epoch": 1.70320908909707, "grad_norm": 0.4389754533151887, "learning_rate": 4.30384450581821e-05, "loss": 0.46404924988746643, "step": 2137 }, { "epoch": 1.7040063783137334, "grad_norm": 0.4306981348992219, "learning_rate": 4.299494952209417e-05, "loss": 0.47260788083076477, "step": 2138 }, { "epoch": 1.7048036675303968, "grad_norm": 0.4941507382041921, "learning_rate": 4.295145939250325e-05, "loss": 0.5117053985595703, "step": 2139 }, { "epoch": 1.70560095674706, "grad_norm": 0.49309278088625236, "learning_rate": 4.290797470297501e-05, "loss": 0.49512791633605957, "step": 2140 }, { "epoch": 1.7063982459637232, "grad_norm": 0.4673706475361271, "learning_rate": 4.2864495487070917e-05, "loss": 0.5003269910812378, "step": 2141 }, { "epoch": 1.7071955351803867, "grad_norm": 0.46125683577717536, "learning_rate": 4.282102177834822e-05, "loss": 0.46091127395629883, "step": 2142 }, { "epoch": 1.7079928243970501, "grad_norm": 0.4553987482103577, "learning_rate": 4.277755361035994e-05, "loss": 0.40685635805130005, "step": 2143 }, { "epoch": 1.7087901136137134, "grad_norm": 0.4913804664378059, "learning_rate": 4.273409101665481e-05, "loss": 0.5814262628555298, "step": 2144 }, { "epoch": 1.7095874028303766, "grad_norm": 0.41759339219669533, "learning_rate": 4.2690634030777214e-05, "loss": 0.38650041818618774, "step": 2145 }, { "epoch": 1.71038469204704, "grad_norm": 0.4827416838413985, "learning_rate": 4.264718268626729e-05, "loss": 0.5392318964004517, "step": 2146 }, { "epoch": 1.7111819812637035, "grad_norm": 0.44360514066994605, "learning_rate": 4.260373701666076e-05, "loss": 0.4460008144378662, "step": 2147 }, { "epoch": 1.711979270480367, "grad_norm": 0.4324326406982163, "learning_rate": 4.256029705548897e-05, "loss": 0.4693434238433838, "step": 2148 }, { "epoch": 1.7127765596970301, "grad_norm": 0.6103796782730069, "learning_rate": 4.251686283627893e-05, "loss": 0.5460285544395447, "step": 2149 }, { "epoch": 1.7135738489136934, "grad_norm": 0.42011122666340855, "learning_rate": 4.2473434392553115e-05, "loss": 0.40865856409072876, "step": 2150 }, { "epoch": 1.7143711381303568, "grad_norm": 0.4670585645642599, "learning_rate": 4.24300117578296e-05, "loss": 0.5011352300643921, "step": 2151 }, { "epoch": 1.7151684273470202, "grad_norm": 0.44596812288929183, "learning_rate": 4.238659496562196e-05, "loss": 0.4383903741836548, "step": 2152 }, { "epoch": 1.7159657165636835, "grad_norm": 0.5408412080430632, "learning_rate": 4.23431840494393e-05, "loss": 0.6716495752334595, "step": 2153 }, { "epoch": 1.7167630057803467, "grad_norm": 0.5315504012217394, "learning_rate": 4.229977904278614e-05, "loss": 0.5496014356613159, "step": 2154 }, { "epoch": 1.7175602949970101, "grad_norm": 0.5008003935554748, "learning_rate": 4.225637997916245e-05, "loss": 0.4381727874279022, "step": 2155 }, { "epoch": 1.7183575842136736, "grad_norm": 0.4409083483489546, "learning_rate": 4.221298689206365e-05, "loss": 0.4983893930912018, "step": 2156 }, { "epoch": 1.719154873430337, "grad_norm": 0.5247539946876508, "learning_rate": 4.216959981498048e-05, "loss": 0.6357954144477844, "step": 2157 }, { "epoch": 1.7199521626470002, "grad_norm": 0.4703321164561755, "learning_rate": 4.212621878139912e-05, "loss": 0.5039327144622803, "step": 2158 }, { "epoch": 1.7207494518636635, "grad_norm": 0.4106862093152239, "learning_rate": 4.208284382480102e-05, "loss": 0.39287635684013367, "step": 2159 }, { "epoch": 1.721546741080327, "grad_norm": 0.4884210421915648, "learning_rate": 4.2039474978662963e-05, "loss": 0.45836833119392395, "step": 2160 }, { "epoch": 1.7223440302969903, "grad_norm": 0.42453673922214047, "learning_rate": 4.199611227645704e-05, "loss": 0.4383794069290161, "step": 2161 }, { "epoch": 1.7231413195136536, "grad_norm": 0.42801681430284555, "learning_rate": 4.1952755751650566e-05, "loss": 0.39482739567756653, "step": 2162 }, { "epoch": 1.7239386087303168, "grad_norm": 0.52179335793938, "learning_rate": 4.1909405437706094e-05, "loss": 0.5842040777206421, "step": 2163 }, { "epoch": 1.7247358979469802, "grad_norm": 0.4591435075578651, "learning_rate": 4.186606136808141e-05, "loss": 0.43722379207611084, "step": 2164 }, { "epoch": 1.7255331871636437, "grad_norm": 0.4820801903668401, "learning_rate": 4.182272357622944e-05, "loss": 0.5227453708648682, "step": 2165 }, { "epoch": 1.7263304763803071, "grad_norm": 0.477784451157349, "learning_rate": 4.1779392095598286e-05, "loss": 0.48623666167259216, "step": 2166 }, { "epoch": 1.7271277655969703, "grad_norm": 0.4213542244916531, "learning_rate": 4.17360669596312e-05, "loss": 0.5211964249610901, "step": 2167 }, { "epoch": 1.7279250548136336, "grad_norm": 0.4630031348024719, "learning_rate": 4.169274820176649e-05, "loss": 0.49473124742507935, "step": 2168 }, { "epoch": 1.728722344030297, "grad_norm": 0.49519239065111015, "learning_rate": 4.164943585543759e-05, "loss": 0.4943283200263977, "step": 2169 }, { "epoch": 1.7295196332469605, "grad_norm": 0.4890033294596724, "learning_rate": 4.1606129954072955e-05, "loss": 0.49952101707458496, "step": 2170 }, { "epoch": 1.7303169224636237, "grad_norm": 0.4710658996187607, "learning_rate": 4.156283053109607e-05, "loss": 0.4346727132797241, "step": 2171 }, { "epoch": 1.731114211680287, "grad_norm": 0.5011339534525604, "learning_rate": 4.151953761992543e-05, "loss": 0.5288316607475281, "step": 2172 }, { "epoch": 1.7319115008969503, "grad_norm": 0.5232313852089747, "learning_rate": 4.1476251253974495e-05, "loss": 0.5154266357421875, "step": 2173 }, { "epoch": 1.7327087901136138, "grad_norm": 0.5081722600601766, "learning_rate": 4.143297146665166e-05, "loss": 0.47528934478759766, "step": 2174 }, { "epoch": 1.733506079330277, "grad_norm": 0.44020001608917647, "learning_rate": 4.138969829136029e-05, "loss": 0.4043598473072052, "step": 2175 }, { "epoch": 1.7343033685469404, "grad_norm": 0.4211617493201827, "learning_rate": 4.134643176149862e-05, "loss": 0.4484303295612335, "step": 2176 }, { "epoch": 1.7351006577636037, "grad_norm": 0.5005455349043705, "learning_rate": 4.1303171910459705e-05, "loss": 0.4893571734428406, "step": 2177 }, { "epoch": 1.7358979469802671, "grad_norm": 0.46283340995079997, "learning_rate": 4.1259918771631515e-05, "loss": 0.5370721220970154, "step": 2178 }, { "epoch": 1.7366952361969306, "grad_norm": 0.5046990693978984, "learning_rate": 4.121667237839683e-05, "loss": 0.5730204582214355, "step": 2179 }, { "epoch": 1.7374925254135938, "grad_norm": 0.5018891584004398, "learning_rate": 4.1173432764133205e-05, "loss": 0.5322356224060059, "step": 2180 }, { "epoch": 1.738289814630257, "grad_norm": 0.4215313841381742, "learning_rate": 4.1130199962212954e-05, "loss": 0.4318227767944336, "step": 2181 }, { "epoch": 1.7390871038469204, "grad_norm": 0.4182750950081613, "learning_rate": 4.108697400600316e-05, "loss": 0.3124406933784485, "step": 2182 }, { "epoch": 1.739884393063584, "grad_norm": 0.5223426958009959, "learning_rate": 4.10437549288656e-05, "loss": 0.6039837598800659, "step": 2183 }, { "epoch": 1.7406816822802471, "grad_norm": 0.5352523335556447, "learning_rate": 4.100054276415678e-05, "loss": 0.632463812828064, "step": 2184 }, { "epoch": 1.7414789714969106, "grad_norm": 0.4751541900124715, "learning_rate": 4.095733754522781e-05, "loss": 0.5247578620910645, "step": 2185 }, { "epoch": 1.7422762607135738, "grad_norm": 0.46945097467247027, "learning_rate": 4.0914139305424484e-05, "loss": 0.468034029006958, "step": 2186 }, { "epoch": 1.7430735499302372, "grad_norm": 0.4259248864694435, "learning_rate": 4.087094807808721e-05, "loss": 0.41987499594688416, "step": 2187 }, { "epoch": 1.7438708391469007, "grad_norm": 0.4553273801379878, "learning_rate": 4.0827763896550955e-05, "loss": 0.4865603744983673, "step": 2188 }, { "epoch": 1.7446681283635639, "grad_norm": 0.41880771864617866, "learning_rate": 4.078458679414525e-05, "loss": 0.46277880668640137, "step": 2189 }, { "epoch": 1.745465417580227, "grad_norm": 0.4848215478512359, "learning_rate": 4.074141680419422e-05, "loss": 0.5830406546592712, "step": 2190 }, { "epoch": 1.7462627067968906, "grad_norm": 0.5172594085666984, "learning_rate": 4.069825396001643e-05, "loss": 0.5576241612434387, "step": 2191 }, { "epoch": 1.747059996013554, "grad_norm": 0.44436381086133947, "learning_rate": 4.0655098294924944e-05, "loss": 0.5271766185760498, "step": 2192 }, { "epoch": 1.7478572852302172, "grad_norm": 0.4772096315846895, "learning_rate": 4.0611949842227334e-05, "loss": 0.5504609942436218, "step": 2193 }, { "epoch": 1.7486545744468807, "grad_norm": 0.48630690427845463, "learning_rate": 4.056880863522553e-05, "loss": 0.5276205539703369, "step": 2194 }, { "epoch": 1.7494518636635439, "grad_norm": 0.5044921571254066, "learning_rate": 4.052567470721594e-05, "loss": 0.47938162088394165, "step": 2195 }, { "epoch": 1.7502491528802073, "grad_norm": 0.4847345571647915, "learning_rate": 4.0482548091489325e-05, "loss": 0.5099669694900513, "step": 2196 }, { "epoch": 1.7510464420968708, "grad_norm": 0.49587781454075186, "learning_rate": 4.043942882133078e-05, "loss": 0.5596211552619934, "step": 2197 }, { "epoch": 1.751843731313534, "grad_norm": 0.4572535847188611, "learning_rate": 4.039631693001976e-05, "loss": 0.4183407127857208, "step": 2198 }, { "epoch": 1.7526410205301972, "grad_norm": 0.472449770760318, "learning_rate": 4.035321245083003e-05, "loss": 0.43530088663101196, "step": 2199 }, { "epoch": 1.7534383097468607, "grad_norm": 0.45640693061415266, "learning_rate": 4.031011541702959e-05, "loss": 0.39254137873649597, "step": 2200 }, { "epoch": 1.754235598963524, "grad_norm": 0.5169277444889357, "learning_rate": 4.026702586188077e-05, "loss": 0.5472736358642578, "step": 2201 }, { "epoch": 1.7550328881801873, "grad_norm": 0.5348630216310861, "learning_rate": 4.0223943818640054e-05, "loss": 0.5276006460189819, "step": 2202 }, { "epoch": 1.7558301773968505, "grad_norm": 0.4507952292535031, "learning_rate": 4.018086932055816e-05, "loss": 0.4569789171218872, "step": 2203 }, { "epoch": 1.756627466613514, "grad_norm": 0.4618021277629074, "learning_rate": 4.013780240087996e-05, "loss": 0.504827082157135, "step": 2204 }, { "epoch": 1.7574247558301774, "grad_norm": 0.46637837262612764, "learning_rate": 4.0094743092844533e-05, "loss": 0.5539575815200806, "step": 2205 }, { "epoch": 1.7582220450468409, "grad_norm": 0.5063163330428976, "learning_rate": 4.005169142968503e-05, "loss": 0.5138503313064575, "step": 2206 }, { "epoch": 1.759019334263504, "grad_norm": 0.4633343463424551, "learning_rate": 4.000864744462871e-05, "loss": 0.5191101431846619, "step": 2207 }, { "epoch": 1.7598166234801673, "grad_norm": 0.48530769717768835, "learning_rate": 3.996561117089693e-05, "loss": 0.46422526240348816, "step": 2208 }, { "epoch": 1.7606139126968308, "grad_norm": 0.43093663196975807, "learning_rate": 3.992258264170508e-05, "loss": 0.4933486580848694, "step": 2209 }, { "epoch": 1.7614112019134942, "grad_norm": 0.4617159715789238, "learning_rate": 3.987956189026256e-05, "loss": 0.5148743391036987, "step": 2210 }, { "epoch": 1.7622084911301574, "grad_norm": 0.4651002655253669, "learning_rate": 3.9836548949772794e-05, "loss": 0.49729710817337036, "step": 2211 }, { "epoch": 1.7630057803468207, "grad_norm": 0.5583207966460515, "learning_rate": 3.9793543853433144e-05, "loss": 0.5888391137123108, "step": 2212 }, { "epoch": 1.763803069563484, "grad_norm": 0.4666767999429528, "learning_rate": 3.9750546634434966e-05, "loss": 0.5302126407623291, "step": 2213 }, { "epoch": 1.7646003587801475, "grad_norm": 0.4957078412875888, "learning_rate": 3.970755732596349e-05, "loss": 0.46859341859817505, "step": 2214 }, { "epoch": 1.765397647996811, "grad_norm": 0.44373616695281815, "learning_rate": 3.9664575961197855e-05, "loss": 0.4837612211704254, "step": 2215 }, { "epoch": 1.7661949372134742, "grad_norm": 0.4389423927651162, "learning_rate": 3.9621602573311085e-05, "loss": 0.4380437135696411, "step": 2216 }, { "epoch": 1.7669922264301374, "grad_norm": 0.48026780568567884, "learning_rate": 3.9578637195470026e-05, "loss": 0.5353031754493713, "step": 2217 }, { "epoch": 1.7677895156468009, "grad_norm": 0.45958782997012515, "learning_rate": 3.9535679860835347e-05, "loss": 0.5169510841369629, "step": 2218 }, { "epoch": 1.7685868048634643, "grad_norm": 0.4264746096030852, "learning_rate": 3.949273060256153e-05, "loss": 0.4359395205974579, "step": 2219 }, { "epoch": 1.7693840940801275, "grad_norm": 0.46618410135296096, "learning_rate": 3.944978945379679e-05, "loss": 0.5229651927947998, "step": 2220 }, { "epoch": 1.7701813832967908, "grad_norm": 0.4943307087816052, "learning_rate": 3.94068564476831e-05, "loss": 0.5417607426643372, "step": 2221 }, { "epoch": 1.7709786725134542, "grad_norm": 0.4895360498411457, "learning_rate": 3.936393161735616e-05, "loss": 0.5576687455177307, "step": 2222 }, { "epoch": 1.7717759617301176, "grad_norm": 0.4325507914426074, "learning_rate": 3.9321014995945334e-05, "loss": 0.48658865690231323, "step": 2223 }, { "epoch": 1.772573250946781, "grad_norm": 0.41439058935980505, "learning_rate": 3.927810661657368e-05, "loss": 0.4119236469268799, "step": 2224 }, { "epoch": 1.7733705401634443, "grad_norm": 0.5174505687117993, "learning_rate": 3.923520651235787e-05, "loss": 0.47661852836608887, "step": 2225 }, { "epoch": 1.7741678293801075, "grad_norm": 0.4492206335744675, "learning_rate": 3.9192314716408194e-05, "loss": 0.48735296726226807, "step": 2226 }, { "epoch": 1.774965118596771, "grad_norm": 0.44213909003896995, "learning_rate": 3.9149431261828556e-05, "loss": 0.4693095088005066, "step": 2227 }, { "epoch": 1.7757624078134344, "grad_norm": 0.4915184034543795, "learning_rate": 3.910655618171639e-05, "loss": 0.4939384162425995, "step": 2228 }, { "epoch": 1.7765596970300976, "grad_norm": 0.4857358199815753, "learning_rate": 3.906368950916266e-05, "loss": 0.5440147519111633, "step": 2229 }, { "epoch": 1.7773569862467609, "grad_norm": 0.47154188482440723, "learning_rate": 3.9020831277251863e-05, "loss": 0.529879629611969, "step": 2230 }, { "epoch": 1.7781542754634243, "grad_norm": 0.46025270574557214, "learning_rate": 3.897798151906199e-05, "loss": 0.45831942558288574, "step": 2231 }, { "epoch": 1.7789515646800877, "grad_norm": 0.4832640236795688, "learning_rate": 3.8935140267664473e-05, "loss": 0.5149707198143005, "step": 2232 }, { "epoch": 1.7797488538967512, "grad_norm": 0.45305382915112424, "learning_rate": 3.889230755612417e-05, "loss": 0.49009713530540466, "step": 2233 }, { "epoch": 1.7805461431134144, "grad_norm": 0.45129351092907477, "learning_rate": 3.884948341749939e-05, "loss": 0.4482331871986389, "step": 2234 }, { "epoch": 1.7813434323300776, "grad_norm": 0.5067143115477163, "learning_rate": 3.880666788484178e-05, "loss": 0.4704906940460205, "step": 2235 }, { "epoch": 1.782140721546741, "grad_norm": 0.48871862432745766, "learning_rate": 3.876386099119635e-05, "loss": 0.5029533505439758, "step": 2236 }, { "epoch": 1.7829380107634045, "grad_norm": 0.4416766384704108, "learning_rate": 3.8721062769601485e-05, "loss": 0.44069311022758484, "step": 2237 }, { "epoch": 1.7837352999800677, "grad_norm": 0.43618673747517694, "learning_rate": 3.8678273253088815e-05, "loss": 0.4199385643005371, "step": 2238 }, { "epoch": 1.784532589196731, "grad_norm": 0.4842983204063288, "learning_rate": 3.863549247468331e-05, "loss": 0.5278534889221191, "step": 2239 }, { "epoch": 1.7853298784133944, "grad_norm": 0.5112969341540942, "learning_rate": 3.859272046740316e-05, "loss": 0.5107297897338867, "step": 2240 }, { "epoch": 1.7861271676300579, "grad_norm": 0.4269639318430279, "learning_rate": 3.854995726425978e-05, "loss": 0.38903650641441345, "step": 2241 }, { "epoch": 1.7869244568467213, "grad_norm": 0.5013445275802898, "learning_rate": 3.850720289825783e-05, "loss": 0.47619569301605225, "step": 2242 }, { "epoch": 1.7877217460633845, "grad_norm": 0.5175849315095062, "learning_rate": 3.8464457402395104e-05, "loss": 0.5726200342178345, "step": 2243 }, { "epoch": 1.7885190352800477, "grad_norm": 0.4939783661320638, "learning_rate": 3.8421720809662565e-05, "loss": 0.49277985095977783, "step": 2244 }, { "epoch": 1.7893163244967112, "grad_norm": 0.5577975509441456, "learning_rate": 3.837899315304433e-05, "loss": 0.6267544031143188, "step": 2245 }, { "epoch": 1.7901136137133746, "grad_norm": 0.4661663418517736, "learning_rate": 3.8336274465517595e-05, "loss": 0.5382815599441528, "step": 2246 }, { "epoch": 1.7909109029300379, "grad_norm": 0.5437846580104608, "learning_rate": 3.829356478005262e-05, "loss": 0.6004168391227722, "step": 2247 }, { "epoch": 1.791708192146701, "grad_norm": 0.40735036890322723, "learning_rate": 3.825086412961275e-05, "loss": 0.3646554946899414, "step": 2248 }, { "epoch": 1.7925054813633645, "grad_norm": 0.4372301203113717, "learning_rate": 3.820817254715433e-05, "loss": 0.42994874715805054, "step": 2249 }, { "epoch": 1.793302770580028, "grad_norm": 0.4916542620050233, "learning_rate": 3.8165490065626736e-05, "loss": 0.548194944858551, "step": 2250 }, { "epoch": 1.7941000597966914, "grad_norm": 0.4488105107162057, "learning_rate": 3.812281671797229e-05, "loss": 0.3948429822921753, "step": 2251 }, { "epoch": 1.7948973490133546, "grad_norm": 0.49870806047571137, "learning_rate": 3.8080152537126256e-05, "loss": 0.640418529510498, "step": 2252 }, { "epoch": 1.7956946382300178, "grad_norm": 0.5071177995327278, "learning_rate": 3.8037497556016884e-05, "loss": 0.5800844430923462, "step": 2253 }, { "epoch": 1.7964919274466813, "grad_norm": 0.4779590684836842, "learning_rate": 3.7994851807565254e-05, "loss": 0.5117017030715942, "step": 2254 }, { "epoch": 1.7972892166633447, "grad_norm": 0.4650504713553412, "learning_rate": 3.795221532468537e-05, "loss": 0.507012128829956, "step": 2255 }, { "epoch": 1.798086505880008, "grad_norm": 0.48563226516050373, "learning_rate": 3.7909588140284026e-05, "loss": 0.5633031725883484, "step": 2256 }, { "epoch": 1.7988837950966712, "grad_norm": 0.47687689196601557, "learning_rate": 3.7866970287260904e-05, "loss": 0.5466035604476929, "step": 2257 }, { "epoch": 1.7996810843133346, "grad_norm": 0.45827531875930727, "learning_rate": 3.782436179850846e-05, "loss": 0.4811246395111084, "step": 2258 }, { "epoch": 1.800478373529998, "grad_norm": 0.4707622738619363, "learning_rate": 3.7781762706911906e-05, "loss": 0.5481853485107422, "step": 2259 }, { "epoch": 1.8012756627466615, "grad_norm": 0.4750708784063069, "learning_rate": 3.773917304534923e-05, "loss": 0.5176928043365479, "step": 2260 }, { "epoch": 1.8020729519633247, "grad_norm": 0.48210199632306583, "learning_rate": 3.769659284669112e-05, "loss": 0.53791743516922, "step": 2261 }, { "epoch": 1.802870241179988, "grad_norm": 0.42721535156205137, "learning_rate": 3.765402214380095e-05, "loss": 0.492635577917099, "step": 2262 }, { "epoch": 1.8036675303966514, "grad_norm": 0.41058185763847593, "learning_rate": 3.761146096953482e-05, "loss": 0.425847589969635, "step": 2263 }, { "epoch": 1.8044648196133148, "grad_norm": 0.5157126611134432, "learning_rate": 3.7568909356741414e-05, "loss": 0.5250444412231445, "step": 2264 }, { "epoch": 1.805262108829978, "grad_norm": 0.5082361363217509, "learning_rate": 3.752636733826205e-05, "loss": 0.5127905011177063, "step": 2265 }, { "epoch": 1.8060593980466413, "grad_norm": 0.49752175376493546, "learning_rate": 3.748383494693068e-05, "loss": 0.5394729971885681, "step": 2266 }, { "epoch": 1.8068566872633047, "grad_norm": 0.5315078459468929, "learning_rate": 3.744131221557378e-05, "loss": 0.5334067940711975, "step": 2267 }, { "epoch": 1.8076539764799682, "grad_norm": 0.47048827572713225, "learning_rate": 3.739879917701039e-05, "loss": 0.5163922309875488, "step": 2268 }, { "epoch": 1.8084512656966316, "grad_norm": 0.3971988092576666, "learning_rate": 3.735629586405208e-05, "loss": 0.38149988651275635, "step": 2269 }, { "epoch": 1.8092485549132948, "grad_norm": 0.49220442755322635, "learning_rate": 3.731380230950288e-05, "loss": 0.45064204931259155, "step": 2270 }, { "epoch": 1.810045844129958, "grad_norm": 0.5194136416358663, "learning_rate": 3.727131854615933e-05, "loss": 0.4928259253501892, "step": 2271 }, { "epoch": 1.8108431333466215, "grad_norm": 0.5119728501103152, "learning_rate": 3.7228844606810384e-05, "loss": 0.512791633605957, "step": 2272 }, { "epoch": 1.811640422563285, "grad_norm": 0.5183105195497875, "learning_rate": 3.7186380524237404e-05, "loss": 0.53575199842453, "step": 2273 }, { "epoch": 1.8124377117799482, "grad_norm": 0.5065335996316827, "learning_rate": 3.714392633121421e-05, "loss": 0.5468844771385193, "step": 2274 }, { "epoch": 1.8132350009966114, "grad_norm": 0.5385916432653636, "learning_rate": 3.71014820605069e-05, "loss": 0.599644124507904, "step": 2275 }, { "epoch": 1.8140322902132748, "grad_norm": 0.5213517336768771, "learning_rate": 3.705904774487396e-05, "loss": 0.5067418217658997, "step": 2276 }, { "epoch": 1.8148295794299383, "grad_norm": 0.4828021933031718, "learning_rate": 3.701662341706621e-05, "loss": 0.3663436770439148, "step": 2277 }, { "epoch": 1.8156268686466015, "grad_norm": 0.4476765828874631, "learning_rate": 3.6974209109826726e-05, "loss": 0.508708119392395, "step": 2278 }, { "epoch": 1.816424157863265, "grad_norm": 0.47532489318871285, "learning_rate": 3.6931804855890874e-05, "loss": 0.507599949836731, "step": 2279 }, { "epoch": 1.8172214470799282, "grad_norm": 0.4738267642661633, "learning_rate": 3.688941068798624e-05, "loss": 0.48141542077064514, "step": 2280 }, { "epoch": 1.8180187362965916, "grad_norm": 0.43931322363983294, "learning_rate": 3.684702663883266e-05, "loss": 0.4186022877693176, "step": 2281 }, { "epoch": 1.818816025513255, "grad_norm": 0.44938832767135256, "learning_rate": 3.680465274114211e-05, "loss": 0.4298703670501709, "step": 2282 }, { "epoch": 1.8196133147299183, "grad_norm": 0.4948026302400921, "learning_rate": 3.676228902761875e-05, "loss": 0.595637857913971, "step": 2283 }, { "epoch": 1.8204106039465815, "grad_norm": 0.44215248045255556, "learning_rate": 3.6719935530958926e-05, "loss": 0.46066707372665405, "step": 2284 }, { "epoch": 1.821207893163245, "grad_norm": 0.3917803857416546, "learning_rate": 3.667759228385103e-05, "loss": 0.4413895905017853, "step": 2285 }, { "epoch": 1.8220051823799084, "grad_norm": 0.4021569528330963, "learning_rate": 3.663525931897559e-05, "loss": 0.3869418799877167, "step": 2286 }, { "epoch": 1.8228024715965716, "grad_norm": 0.4365187853987174, "learning_rate": 3.659293666900518e-05, "loss": 0.44480180740356445, "step": 2287 }, { "epoch": 1.823599760813235, "grad_norm": 0.5823084060377283, "learning_rate": 3.6550624366604405e-05, "loss": 0.5740994215011597, "step": 2288 }, { "epoch": 1.8243970500298983, "grad_norm": 0.4360723126719446, "learning_rate": 3.650832244442991e-05, "loss": 0.4996200203895569, "step": 2289 }, { "epoch": 1.8251943392465617, "grad_norm": 0.4865178890658815, "learning_rate": 3.6466030935130305e-05, "loss": 0.46342039108276367, "step": 2290 }, { "epoch": 1.8259916284632252, "grad_norm": 0.5532517084200784, "learning_rate": 3.642374987134616e-05, "loss": 0.43349167704582214, "step": 2291 }, { "epoch": 1.8267889176798884, "grad_norm": 0.4729202407161879, "learning_rate": 3.638147928571003e-05, "loss": 0.5090664625167847, "step": 2292 }, { "epoch": 1.8275862068965516, "grad_norm": 0.5144961632058492, "learning_rate": 3.63392192108463e-05, "loss": 0.5186576843261719, "step": 2293 }, { "epoch": 1.828383496113215, "grad_norm": 0.5194755848935048, "learning_rate": 3.6296969679371324e-05, "loss": 0.528731107711792, "step": 2294 }, { "epoch": 1.8291807853298785, "grad_norm": 0.469042201630895, "learning_rate": 3.6254730723893275e-05, "loss": 0.5311959385871887, "step": 2295 }, { "epoch": 1.8299780745465417, "grad_norm": 0.5157835313114204, "learning_rate": 3.621250237701217e-05, "loss": 0.458829402923584, "step": 2296 }, { "epoch": 1.8307753637632052, "grad_norm": 0.5160114681074387, "learning_rate": 3.617028467131984e-05, "loss": 0.5403294563293457, "step": 2297 }, { "epoch": 1.8315726529798684, "grad_norm": 0.43349133999824657, "learning_rate": 3.612807763939992e-05, "loss": 0.4861155152320862, "step": 2298 }, { "epoch": 1.8323699421965318, "grad_norm": 0.4100658595704923, "learning_rate": 3.6085881313827754e-05, "loss": 0.4213009476661682, "step": 2299 }, { "epoch": 1.8331672314131953, "grad_norm": 0.416152587114497, "learning_rate": 3.60436957271705e-05, "loss": 0.42582303285598755, "step": 2300 }, { "epoch": 1.8339645206298585, "grad_norm": 0.47982710219957775, "learning_rate": 3.600152091198698e-05, "loss": 0.5117221474647522, "step": 2301 }, { "epoch": 1.8347618098465217, "grad_norm": 0.4420254318122513, "learning_rate": 3.595935690082769e-05, "loss": 0.5055834650993347, "step": 2302 }, { "epoch": 1.8355590990631852, "grad_norm": 0.4799796288734757, "learning_rate": 3.591720372623484e-05, "loss": 0.5743011236190796, "step": 2303 }, { "epoch": 1.8363563882798486, "grad_norm": 0.466323406176251, "learning_rate": 3.587506142074221e-05, "loss": 0.4736800789833069, "step": 2304 }, { "epoch": 1.8371536774965118, "grad_norm": 0.461469878855747, "learning_rate": 3.5832930016875243e-05, "loss": 0.4940425753593445, "step": 2305 }, { "epoch": 1.837950966713175, "grad_norm": 0.45435018514244024, "learning_rate": 3.579080954715096e-05, "loss": 0.47785356640815735, "step": 2306 }, { "epoch": 1.8387482559298385, "grad_norm": 0.4158377561249959, "learning_rate": 3.574870004407793e-05, "loss": 0.40946054458618164, "step": 2307 }, { "epoch": 1.839545545146502, "grad_norm": 0.47804855641879795, "learning_rate": 3.570660154015625e-05, "loss": 0.49405771493911743, "step": 2308 }, { "epoch": 1.8403428343631654, "grad_norm": 0.5062572538707163, "learning_rate": 3.566451406787753e-05, "loss": 0.49035900831222534, "step": 2309 }, { "epoch": 1.8411401235798286, "grad_norm": 0.49925922632031, "learning_rate": 3.56224376597249e-05, "loss": 0.45204663276672363, "step": 2310 }, { "epoch": 1.8419374127964918, "grad_norm": 0.43329809765224486, "learning_rate": 3.5580372348172906e-05, "loss": 0.4169992506504059, "step": 2311 }, { "epoch": 1.8427347020131553, "grad_norm": 0.6624973694796609, "learning_rate": 3.553831816568757e-05, "loss": 0.6305347084999084, "step": 2312 }, { "epoch": 1.8435319912298187, "grad_norm": 0.4711080234463921, "learning_rate": 3.54962751447263e-05, "loss": 0.540981650352478, "step": 2313 }, { "epoch": 1.844329280446482, "grad_norm": 0.42495003724378455, "learning_rate": 3.545424331773787e-05, "loss": 0.4197847247123718, "step": 2314 }, { "epoch": 1.8451265696631451, "grad_norm": 0.4783875525073993, "learning_rate": 3.541222271716247e-05, "loss": 0.43224406242370605, "step": 2315 }, { "epoch": 1.8459238588798086, "grad_norm": 0.408393341402126, "learning_rate": 3.5370213375431577e-05, "loss": 0.35315966606140137, "step": 2316 }, { "epoch": 1.846721148096472, "grad_norm": 0.4291740348828108, "learning_rate": 3.532821532496799e-05, "loss": 0.4069916605949402, "step": 2317 }, { "epoch": 1.8475184373131355, "grad_norm": 0.5042770060644375, "learning_rate": 3.528622859818582e-05, "loss": 0.5475220084190369, "step": 2318 }, { "epoch": 1.8483157265297987, "grad_norm": 0.4582148436626212, "learning_rate": 3.52442532274904e-05, "loss": 0.48951053619384766, "step": 2319 }, { "epoch": 1.849113015746462, "grad_norm": 0.5245783987395768, "learning_rate": 3.520228924527831e-05, "loss": 0.5944608449935913, "step": 2320 }, { "epoch": 1.8499103049631254, "grad_norm": 0.44757694826515004, "learning_rate": 3.5160336683937364e-05, "loss": 0.48002249002456665, "step": 2321 }, { "epoch": 1.8507075941797888, "grad_norm": 0.45503703136642637, "learning_rate": 3.5118395575846525e-05, "loss": 0.5052036046981812, "step": 2322 }, { "epoch": 1.851504883396452, "grad_norm": 0.4629721599615239, "learning_rate": 3.5076465953375955e-05, "loss": 0.5111009478569031, "step": 2323 }, { "epoch": 1.8523021726131152, "grad_norm": 0.43609603337079306, "learning_rate": 3.503454784888691e-05, "loss": 0.45378533005714417, "step": 2324 }, { "epoch": 1.8530994618297787, "grad_norm": 0.4983489081198811, "learning_rate": 3.499264129473179e-05, "loss": 0.4217856526374817, "step": 2325 }, { "epoch": 1.8538967510464421, "grad_norm": 0.44091155163476653, "learning_rate": 3.495074632325407e-05, "loss": 0.42982804775238037, "step": 2326 }, { "epoch": 1.8546940402631056, "grad_norm": 0.5320571488301072, "learning_rate": 3.490886296678828e-05, "loss": 0.5463424324989319, "step": 2327 }, { "epoch": 1.8554913294797688, "grad_norm": 0.4443101342474504, "learning_rate": 3.486699125765997e-05, "loss": 0.4556514620780945, "step": 2328 }, { "epoch": 1.856288618696432, "grad_norm": 0.5935708378845523, "learning_rate": 3.482513122818575e-05, "loss": 0.5992587804794312, "step": 2329 }, { "epoch": 1.8570859079130955, "grad_norm": 0.4635563113073608, "learning_rate": 3.478328291067316e-05, "loss": 0.4330010414123535, "step": 2330 }, { "epoch": 1.857883197129759, "grad_norm": 0.4946128888891604, "learning_rate": 3.474144633742074e-05, "loss": 0.5311582088470459, "step": 2331 }, { "epoch": 1.8586804863464221, "grad_norm": 0.5015517856559839, "learning_rate": 3.4699621540717956e-05, "loss": 0.5802497863769531, "step": 2332 }, { "epoch": 1.8594777755630854, "grad_norm": 0.4230919483248154, "learning_rate": 3.4657808552845185e-05, "loss": 0.4705192446708679, "step": 2333 }, { "epoch": 1.8602750647797488, "grad_norm": 0.44659436730437513, "learning_rate": 3.461600740607366e-05, "loss": 0.4772969186306, "step": 2334 }, { "epoch": 1.8610723539964122, "grad_norm": 0.47201742608009845, "learning_rate": 3.457421813266549e-05, "loss": 0.524689793586731, "step": 2335 }, { "epoch": 1.8618696432130757, "grad_norm": 0.5168844444359223, "learning_rate": 3.453244076487368e-05, "loss": 0.5901569724082947, "step": 2336 }, { "epoch": 1.862666932429739, "grad_norm": 0.48914229157159994, "learning_rate": 3.449067533494197e-05, "loss": 0.47909992933273315, "step": 2337 }, { "epoch": 1.8634642216464021, "grad_norm": 0.5003029313925287, "learning_rate": 3.444892187510489e-05, "loss": 0.49898189306259155, "step": 2338 }, { "epoch": 1.8642615108630656, "grad_norm": 0.48311219624231794, "learning_rate": 3.44071804175878e-05, "loss": 0.4687238335609436, "step": 2339 }, { "epoch": 1.865058800079729, "grad_norm": 0.4867375145634405, "learning_rate": 3.436545099460672e-05, "loss": 0.5396848320960999, "step": 2340 }, { "epoch": 1.8658560892963922, "grad_norm": 0.48762013478269906, "learning_rate": 3.432373363836843e-05, "loss": 0.5190573930740356, "step": 2341 }, { "epoch": 1.8666533785130555, "grad_norm": 0.5461088010172069, "learning_rate": 3.428202838107036e-05, "loss": 0.5988553762435913, "step": 2342 }, { "epoch": 1.867450667729719, "grad_norm": 0.4781057607152089, "learning_rate": 3.424033525490064e-05, "loss": 0.5457171201705933, "step": 2343 }, { "epoch": 1.8682479569463823, "grad_norm": 0.5022605495286917, "learning_rate": 3.4198654292038016e-05, "loss": 0.5080080628395081, "step": 2344 }, { "epoch": 1.8690452461630458, "grad_norm": 0.47461367871446275, "learning_rate": 3.415698552465185e-05, "loss": 0.48335000872612, "step": 2345 }, { "epoch": 1.869842535379709, "grad_norm": 0.575229183571367, "learning_rate": 3.411532898490207e-05, "loss": 0.5155913829803467, "step": 2346 }, { "epoch": 1.8706398245963722, "grad_norm": 0.522015746356492, "learning_rate": 3.407368470493922e-05, "loss": 0.5380173921585083, "step": 2347 }, { "epoch": 1.8714371138130357, "grad_norm": 0.4591472975475734, "learning_rate": 3.403205271690433e-05, "loss": 0.5079054236412048, "step": 2348 }, { "epoch": 1.8722344030296991, "grad_norm": 0.439598675326923, "learning_rate": 3.399043305292897e-05, "loss": 0.4692186713218689, "step": 2349 }, { "epoch": 1.8730316922463623, "grad_norm": 0.4522352092257785, "learning_rate": 3.394882574513519e-05, "loss": 0.520854651927948, "step": 2350 }, { "epoch": 1.8738289814630256, "grad_norm": 0.48957358594478745, "learning_rate": 3.3907230825635494e-05, "loss": 0.555504322052002, "step": 2351 }, { "epoch": 1.874626270679689, "grad_norm": 0.5152616816166501, "learning_rate": 3.386564832653285e-05, "loss": 0.5869249105453491, "step": 2352 }, { "epoch": 1.8754235598963525, "grad_norm": 0.49170340607933505, "learning_rate": 3.3824078279920625e-05, "loss": 0.5576410293579102, "step": 2353 }, { "epoch": 1.876220849113016, "grad_norm": 0.46695074146404203, "learning_rate": 3.378252071788255e-05, "loss": 0.4385836124420166, "step": 2354 }, { "epoch": 1.8770181383296791, "grad_norm": 0.44882964188972063, "learning_rate": 3.374097567249279e-05, "loss": 0.4991183876991272, "step": 2355 }, { "epoch": 1.8778154275463423, "grad_norm": 0.5473005618791371, "learning_rate": 3.369944317581577e-05, "loss": 0.6478971838951111, "step": 2356 }, { "epoch": 1.8786127167630058, "grad_norm": 0.45267863450889123, "learning_rate": 3.3657923259906264e-05, "loss": 0.4821853041648865, "step": 2357 }, { "epoch": 1.8794100059796692, "grad_norm": 0.5031683287677461, "learning_rate": 3.3616415956809366e-05, "loss": 0.43522679805755615, "step": 2358 }, { "epoch": 1.8802072951963325, "grad_norm": 0.4239230052709435, "learning_rate": 3.357492129856037e-05, "loss": 0.4551738500595093, "step": 2359 }, { "epoch": 1.8810045844129957, "grad_norm": 0.48953135313520896, "learning_rate": 3.3533439317184904e-05, "loss": 0.581479012966156, "step": 2360 }, { "epoch": 1.8818018736296591, "grad_norm": 0.48715916024724853, "learning_rate": 3.349197004469869e-05, "loss": 0.6160458326339722, "step": 2361 }, { "epoch": 1.8825991628463226, "grad_norm": 0.4959781118248289, "learning_rate": 3.345051351310774e-05, "loss": 0.5055485367774963, "step": 2362 }, { "epoch": 1.883396452062986, "grad_norm": 0.42379889061228687, "learning_rate": 3.34090697544082e-05, "loss": 0.47209084033966064, "step": 2363 }, { "epoch": 1.8841937412796492, "grad_norm": 0.5042966321319825, "learning_rate": 3.336763880058634e-05, "loss": 0.5944509506225586, "step": 2364 }, { "epoch": 1.8849910304963124, "grad_norm": 0.4239522111110681, "learning_rate": 3.3326220683618595e-05, "loss": 0.4800717830657959, "step": 2365 }, { "epoch": 1.885788319712976, "grad_norm": 0.42692537877077297, "learning_rate": 3.328481543547142e-05, "loss": 0.41244417428970337, "step": 2366 }, { "epoch": 1.8865856089296393, "grad_norm": 0.4414185638959494, "learning_rate": 3.324342308810142e-05, "loss": 0.4029223918914795, "step": 2367 }, { "epoch": 1.8873828981463026, "grad_norm": 0.48956450749212793, "learning_rate": 3.320204367345519e-05, "loss": 0.46174511313438416, "step": 2368 }, { "epoch": 1.8881801873629658, "grad_norm": 0.5056233048607428, "learning_rate": 3.316067722346934e-05, "loss": 0.4938368499279022, "step": 2369 }, { "epoch": 1.8889774765796292, "grad_norm": 0.48258442419470127, "learning_rate": 3.311932377007052e-05, "loss": 0.5463810563087463, "step": 2370 }, { "epoch": 1.8897747657962927, "grad_norm": 0.4468561634970142, "learning_rate": 3.3077983345175295e-05, "loss": 0.39382073283195496, "step": 2371 }, { "epoch": 1.890572055012956, "grad_norm": 0.5082417605985178, "learning_rate": 3.30366559806902e-05, "loss": 0.4664785861968994, "step": 2372 }, { "epoch": 1.8913693442296193, "grad_norm": 0.5082456396514397, "learning_rate": 3.2995341708511704e-05, "loss": 0.5251672267913818, "step": 2373 }, { "epoch": 1.8921666334462826, "grad_norm": 0.39056909188316, "learning_rate": 3.295404056052616e-05, "loss": 0.43346667289733887, "step": 2374 }, { "epoch": 1.892963922662946, "grad_norm": 0.509609897218585, "learning_rate": 3.2912752568609765e-05, "loss": 0.48178479075431824, "step": 2375 }, { "epoch": 1.8937612118796094, "grad_norm": 0.48852688293030083, "learning_rate": 3.28714777646286e-05, "loss": 0.5690582990646362, "step": 2376 }, { "epoch": 1.8945585010962727, "grad_norm": 0.48970106725608903, "learning_rate": 3.283021618043855e-05, "loss": 0.5281620025634766, "step": 2377 }, { "epoch": 1.8953557903129359, "grad_norm": 0.49136113485566074, "learning_rate": 3.278896784788531e-05, "loss": 0.5232473015785217, "step": 2378 }, { "epoch": 1.8961530795295993, "grad_norm": 0.4948363960025479, "learning_rate": 3.274773279880433e-05, "loss": 0.5304704904556274, "step": 2379 }, { "epoch": 1.8969503687462628, "grad_norm": 0.43816795605213144, "learning_rate": 3.2706511065020796e-05, "loss": 0.4691748321056366, "step": 2380 }, { "epoch": 1.8977476579629262, "grad_norm": 0.48295393043460094, "learning_rate": 3.2665302678349676e-05, "loss": 0.5336175560951233, "step": 2381 }, { "epoch": 1.8985449471795894, "grad_norm": 0.4994036634547068, "learning_rate": 3.262410767059557e-05, "loss": 0.5008664727210999, "step": 2382 }, { "epoch": 1.8993422363962527, "grad_norm": 0.5080520338368854, "learning_rate": 3.258292607355277e-05, "loss": 0.46541762351989746, "step": 2383 }, { "epoch": 1.900139525612916, "grad_norm": 0.49371401590628544, "learning_rate": 3.254175791900526e-05, "loss": 0.4890346825122833, "step": 2384 }, { "epoch": 1.9009368148295795, "grad_norm": 0.4707898033248143, "learning_rate": 3.25006032387266e-05, "loss": 0.4980608820915222, "step": 2385 }, { "epoch": 1.9017341040462428, "grad_norm": 0.501919032949857, "learning_rate": 3.2459462064479976e-05, "loss": 0.4971160888671875, "step": 2386 }, { "epoch": 1.902531393262906, "grad_norm": 0.4386536837491404, "learning_rate": 3.24183344280181e-05, "loss": 0.45464491844177246, "step": 2387 }, { "epoch": 1.9033286824795694, "grad_norm": 0.42343217864464827, "learning_rate": 3.2377220361083315e-05, "loss": 0.41628825664520264, "step": 2388 }, { "epoch": 1.9041259716962329, "grad_norm": 0.4677807595819877, "learning_rate": 3.233611989540745e-05, "loss": 0.4382380247116089, "step": 2389 }, { "epoch": 1.904923260912896, "grad_norm": 0.4470994210360193, "learning_rate": 3.2295033062711824e-05, "loss": 0.4349832832813263, "step": 2390 }, { "epoch": 1.9057205501295595, "grad_norm": 0.5107627565999947, "learning_rate": 3.225395989470726e-05, "loss": 0.5652886629104614, "step": 2391 }, { "epoch": 1.9065178393462228, "grad_norm": 0.4832230211385373, "learning_rate": 3.2212900423094026e-05, "loss": 0.47517549991607666, "step": 2392 }, { "epoch": 1.9073151285628862, "grad_norm": 0.5780644432747474, "learning_rate": 3.21718546795618e-05, "loss": 0.5583841800689697, "step": 2393 }, { "epoch": 1.9081124177795497, "grad_norm": 0.5186649167415073, "learning_rate": 3.21308226957897e-05, "loss": 0.5108955502510071, "step": 2394 }, { "epoch": 1.9089097069962129, "grad_norm": 0.4317023924181084, "learning_rate": 3.2089804503446185e-05, "loss": 0.41820383071899414, "step": 2395 }, { "epoch": 1.909706996212876, "grad_norm": 0.43983343040186956, "learning_rate": 3.204880013418911e-05, "loss": 0.38972318172454834, "step": 2396 }, { "epoch": 1.9105042854295395, "grad_norm": 0.443310733233353, "learning_rate": 3.200780961966564e-05, "loss": 0.4714217185974121, "step": 2397 }, { "epoch": 1.911301574646203, "grad_norm": 0.49692847377451593, "learning_rate": 3.196683299151223e-05, "loss": 0.5917254090309143, "step": 2398 }, { "epoch": 1.9120988638628662, "grad_norm": 0.4159966683953029, "learning_rate": 3.192587028135466e-05, "loss": 0.44801828265190125, "step": 2399 }, { "epoch": 1.9128961530795296, "grad_norm": 0.4184893732069209, "learning_rate": 3.188492152080793e-05, "loss": 0.40149348974227905, "step": 2400 }, { "epoch": 1.9136934422961929, "grad_norm": 0.502517422030922, "learning_rate": 3.184398674147628e-05, "loss": 0.5544445514678955, "step": 2401 }, { "epoch": 1.9144907315128563, "grad_norm": 0.44633598472932506, "learning_rate": 3.1803065974953196e-05, "loss": 0.4937151074409485, "step": 2402 }, { "epoch": 1.9152880207295198, "grad_norm": 0.47406704260355054, "learning_rate": 3.176215925282129e-05, "loss": 0.5168018341064453, "step": 2403 }, { "epoch": 1.916085309946183, "grad_norm": 0.4401291553067982, "learning_rate": 3.172126660665238e-05, "loss": 0.4961961805820465, "step": 2404 }, { "epoch": 1.9168825991628462, "grad_norm": 0.45771750137609296, "learning_rate": 3.1680388068007404e-05, "loss": 0.4714515805244446, "step": 2405 }, { "epoch": 1.9176798883795096, "grad_norm": 0.47788116792854385, "learning_rate": 3.16395236684364e-05, "loss": 0.5541231036186218, "step": 2406 }, { "epoch": 1.918477177596173, "grad_norm": 0.49366140426955923, "learning_rate": 3.1598673439478524e-05, "loss": 0.4089202582836151, "step": 2407 }, { "epoch": 1.9192744668128363, "grad_norm": 0.430757991728144, "learning_rate": 3.155783741266196e-05, "loss": 0.41179606318473816, "step": 2408 }, { "epoch": 1.9200717560294998, "grad_norm": 0.4739522692591944, "learning_rate": 3.151701561950396e-05, "loss": 0.5086155533790588, "step": 2409 }, { "epoch": 1.920869045246163, "grad_norm": 0.41823345861282873, "learning_rate": 3.147620809151078e-05, "loss": 0.43684348464012146, "step": 2410 }, { "epoch": 1.9216663344628264, "grad_norm": 0.49593644690994554, "learning_rate": 3.143541486017766e-05, "loss": 0.5191595554351807, "step": 2411 }, { "epoch": 1.9224636236794899, "grad_norm": 0.48695858970893247, "learning_rate": 3.1394635956988817e-05, "loss": 0.5239454507827759, "step": 2412 }, { "epoch": 1.923260912896153, "grad_norm": 0.43634577669281954, "learning_rate": 3.135387141341738e-05, "loss": 0.4309311807155609, "step": 2413 }, { "epoch": 1.9240582021128163, "grad_norm": 0.43949999759922564, "learning_rate": 3.131312126092544e-05, "loss": 0.49322086572647095, "step": 2414 }, { "epoch": 1.9248554913294798, "grad_norm": 0.5074692321780667, "learning_rate": 3.127238553096395e-05, "loss": 0.5673158764839172, "step": 2415 }, { "epoch": 1.9256527805461432, "grad_norm": 0.49529407607931286, "learning_rate": 3.1231664254972736e-05, "loss": 0.4953680634498596, "step": 2416 }, { "epoch": 1.9264500697628064, "grad_norm": 0.5210581316603438, "learning_rate": 3.1190957464380496e-05, "loss": 0.572614312171936, "step": 2417 }, { "epoch": 1.9272473589794696, "grad_norm": 0.5127072523954985, "learning_rate": 3.1150265190604703e-05, "loss": 0.599911093711853, "step": 2418 }, { "epoch": 1.928044648196133, "grad_norm": 0.5408543753855958, "learning_rate": 3.110958746505165e-05, "loss": 0.64491206407547, "step": 2419 }, { "epoch": 1.9288419374127965, "grad_norm": 0.5068721440307042, "learning_rate": 3.106892431911641e-05, "loss": 0.5892620086669922, "step": 2420 }, { "epoch": 1.92963922662946, "grad_norm": 0.45624488397257795, "learning_rate": 3.1028275784182785e-05, "loss": 0.49471837282180786, "step": 2421 }, { "epoch": 1.9304365158461232, "grad_norm": 0.44711371182099946, "learning_rate": 3.098764189162332e-05, "loss": 0.3987007439136505, "step": 2422 }, { "epoch": 1.9312338050627864, "grad_norm": 0.45106793248263716, "learning_rate": 3.094702267279924e-05, "loss": 0.4272152781486511, "step": 2423 }, { "epoch": 1.9320310942794499, "grad_norm": 0.5385673623110782, "learning_rate": 3.090641815906044e-05, "loss": 0.5265756845474243, "step": 2424 }, { "epoch": 1.9328283834961133, "grad_norm": 0.4774116003793981, "learning_rate": 3.086582838174551e-05, "loss": 0.5107181668281555, "step": 2425 }, { "epoch": 1.9336256727127765, "grad_norm": 0.5207718876624642, "learning_rate": 3.082525337218162e-05, "loss": 0.5313001275062561, "step": 2426 }, { "epoch": 1.9344229619294397, "grad_norm": 0.46130599538638284, "learning_rate": 3.078469316168453e-05, "loss": 0.4791850447654724, "step": 2427 }, { "epoch": 1.9352202511461032, "grad_norm": 0.4575087534333671, "learning_rate": 3.074414778155864e-05, "loss": 0.43007755279541016, "step": 2428 }, { "epoch": 1.9360175403627666, "grad_norm": 0.42649038741332124, "learning_rate": 3.0703617263096865e-05, "loss": 0.4233744740486145, "step": 2429 }, { "epoch": 1.93681482957943, "grad_norm": 0.4246598105166287, "learning_rate": 3.066310163758063e-05, "loss": 0.47498786449432373, "step": 2430 }, { "epoch": 1.9376121187960933, "grad_norm": 0.4872556575428426, "learning_rate": 3.062260093627991e-05, "loss": 0.5081727504730225, "step": 2431 }, { "epoch": 1.9384094080127565, "grad_norm": 0.44855308869129396, "learning_rate": 3.058211519045312e-05, "loss": 0.49129247665405273, "step": 2432 }, { "epoch": 1.93920669722942, "grad_norm": 0.456106702302393, "learning_rate": 3.054164443134719e-05, "loss": 0.42142581939697266, "step": 2433 }, { "epoch": 1.9400039864460834, "grad_norm": 0.4639795471051293, "learning_rate": 3.050118869019742e-05, "loss": 0.4926954209804535, "step": 2434 }, { "epoch": 1.9408012756627466, "grad_norm": 0.5112403545026024, "learning_rate": 3.0460747998227535e-05, "loss": 0.5394572019577026, "step": 2435 }, { "epoch": 1.9415985648794098, "grad_norm": 0.4609007301398124, "learning_rate": 3.0420322386649684e-05, "loss": 0.4884849488735199, "step": 2436 }, { "epoch": 1.9423958540960733, "grad_norm": 0.4447897417936368, "learning_rate": 3.0379911886664335e-05, "loss": 0.39962220191955566, "step": 2437 }, { "epoch": 1.9431931433127367, "grad_norm": 0.46098636986494296, "learning_rate": 3.03395165294603e-05, "loss": 0.47724735736846924, "step": 2438 }, { "epoch": 1.9439904325294002, "grad_norm": 0.4835321153133148, "learning_rate": 3.029913634621473e-05, "loss": 0.4964035153388977, "step": 2439 }, { "epoch": 1.9447877217460634, "grad_norm": 0.5049502393511368, "learning_rate": 3.0258771368093024e-05, "loss": 0.5079831480979919, "step": 2440 }, { "epoch": 1.9455850109627266, "grad_norm": 0.4819494784404239, "learning_rate": 3.0218421626248872e-05, "loss": 0.5202668905258179, "step": 2441 }, { "epoch": 1.94638230017939, "grad_norm": 0.4902135682529399, "learning_rate": 3.0178087151824197e-05, "loss": 0.5101970434188843, "step": 2442 }, { "epoch": 1.9471795893960535, "grad_norm": 0.4936092253538246, "learning_rate": 3.013776797594915e-05, "loss": 0.5307422876358032, "step": 2443 }, { "epoch": 1.9479768786127167, "grad_norm": 0.5086962629092366, "learning_rate": 3.009746412974206e-05, "loss": 0.5357460975646973, "step": 2444 }, { "epoch": 1.94877416782938, "grad_norm": 0.47159996193209797, "learning_rate": 3.005717564430942e-05, "loss": 0.4772723615169525, "step": 2445 }, { "epoch": 1.9495714570460434, "grad_norm": 0.42755736577632225, "learning_rate": 3.0016902550745897e-05, "loss": 0.465602844953537, "step": 2446 }, { "epoch": 1.9503687462627068, "grad_norm": 0.5174596063042198, "learning_rate": 2.9976644880134253e-05, "loss": 0.618943452835083, "step": 2447 }, { "epoch": 1.9511660354793703, "grad_norm": 0.4964457706853122, "learning_rate": 2.993640266354534e-05, "loss": 0.5159696340560913, "step": 2448 }, { "epoch": 1.9519633246960335, "grad_norm": 0.4690506725263776, "learning_rate": 2.9896175932038106e-05, "loss": 0.46117356419563293, "step": 2449 }, { "epoch": 1.9527606139126967, "grad_norm": 0.4603357962694867, "learning_rate": 2.9855964716659535e-05, "loss": 0.49445411562919617, "step": 2450 }, { "epoch": 1.9535579031293602, "grad_norm": 0.42772410978832875, "learning_rate": 2.9815769048444642e-05, "loss": 0.4460833668708801, "step": 2451 }, { "epoch": 1.9543551923460236, "grad_norm": 0.49290730312228426, "learning_rate": 2.9775588958416445e-05, "loss": 0.47203391790390015, "step": 2452 }, { "epoch": 1.9551524815626868, "grad_norm": 0.4886272121053032, "learning_rate": 2.97354244775859e-05, "loss": 0.4685468375682831, "step": 2453 }, { "epoch": 1.95594977077935, "grad_norm": 0.4517036156088303, "learning_rate": 2.969527563695198e-05, "loss": 0.4429199695587158, "step": 2454 }, { "epoch": 1.9567470599960135, "grad_norm": 0.4093397913750703, "learning_rate": 2.965514246750155e-05, "loss": 0.38585981726646423, "step": 2455 }, { "epoch": 1.957544349212677, "grad_norm": 0.5818428798784874, "learning_rate": 2.9615025000209368e-05, "loss": 0.6210814714431763, "step": 2456 }, { "epoch": 1.9583416384293404, "grad_norm": 0.4777895544184144, "learning_rate": 2.9574923266038114e-05, "loss": 0.4520055651664734, "step": 2457 }, { "epoch": 1.9591389276460036, "grad_norm": 0.4677084867154778, "learning_rate": 2.9534837295938267e-05, "loss": 0.46907341480255127, "step": 2458 }, { "epoch": 1.9599362168626668, "grad_norm": 0.4306082521051819, "learning_rate": 2.949476712084821e-05, "loss": 0.41311460733413696, "step": 2459 }, { "epoch": 1.9607335060793303, "grad_norm": 0.49068858768083706, "learning_rate": 2.9454712771694083e-05, "loss": 0.49426326155662537, "step": 2460 }, { "epoch": 1.9615307952959937, "grad_norm": 0.4731938966808968, "learning_rate": 2.941467427938981e-05, "loss": 0.5115640759468079, "step": 2461 }, { "epoch": 1.962328084512657, "grad_norm": 0.468009715826152, "learning_rate": 2.9374651674837127e-05, "loss": 0.4571795165538788, "step": 2462 }, { "epoch": 1.9631253737293202, "grad_norm": 0.43956968964077037, "learning_rate": 2.9334644988925453e-05, "loss": 0.4217934012413025, "step": 2463 }, { "epoch": 1.9639226629459836, "grad_norm": 0.4842888569785687, "learning_rate": 2.929465425253194e-05, "loss": 0.5108646154403687, "step": 2464 }, { "epoch": 1.964719952162647, "grad_norm": 0.4765428957877468, "learning_rate": 2.9254679496521463e-05, "loss": 0.4999402165412903, "step": 2465 }, { "epoch": 1.9655172413793105, "grad_norm": 0.4257026243759351, "learning_rate": 2.9214720751746492e-05, "loss": 0.4849398136138916, "step": 2466 }, { "epoch": 1.9663145305959737, "grad_norm": 0.50340775869072, "learning_rate": 2.9174778049047213e-05, "loss": 0.5649405121803284, "step": 2467 }, { "epoch": 1.967111819812637, "grad_norm": 0.5336892495389157, "learning_rate": 2.913485141925136e-05, "loss": 0.5300091505050659, "step": 2468 }, { "epoch": 1.9679091090293004, "grad_norm": 0.5152208437894743, "learning_rate": 2.909494089317436e-05, "loss": 0.4642132520675659, "step": 2469 }, { "epoch": 1.9687063982459638, "grad_norm": 0.5249852828893629, "learning_rate": 2.905504650161909e-05, "loss": 0.5427931547164917, "step": 2470 }, { "epoch": 1.969503687462627, "grad_norm": 0.5065157472995888, "learning_rate": 2.9015168275376058e-05, "loss": 0.49116045236587524, "step": 2471 }, { "epoch": 1.9703009766792903, "grad_norm": 0.5164060814081222, "learning_rate": 2.897530624522329e-05, "loss": 0.5614076852798462, "step": 2472 }, { "epoch": 1.9710982658959537, "grad_norm": 0.468077140629046, "learning_rate": 2.89354604419263e-05, "loss": 0.5389944911003113, "step": 2473 }, { "epoch": 1.9718955551126172, "grad_norm": 0.44743438289844745, "learning_rate": 2.8895630896238033e-05, "loss": 0.4174045920372009, "step": 2474 }, { "epoch": 1.9726928443292806, "grad_norm": 0.414844332011201, "learning_rate": 2.8855817638898953e-05, "loss": 0.4189109206199646, "step": 2475 }, { "epoch": 1.9734901335459438, "grad_norm": 0.4599894321709978, "learning_rate": 2.8816020700636948e-05, "loss": 0.47148531675338745, "step": 2476 }, { "epoch": 1.974287422762607, "grad_norm": 0.4636904040179016, "learning_rate": 2.877624011216724e-05, "loss": 0.4241091310977936, "step": 2477 }, { "epoch": 1.9750847119792705, "grad_norm": 0.4577173614279463, "learning_rate": 2.8736475904192516e-05, "loss": 0.4518607258796692, "step": 2478 }, { "epoch": 1.975882001195934, "grad_norm": 0.5290237622658168, "learning_rate": 2.8696728107402794e-05, "loss": 0.5837839841842651, "step": 2479 }, { "epoch": 1.9766792904125972, "grad_norm": 0.4394943861325193, "learning_rate": 2.8656996752475386e-05, "loss": 0.4371342062950134, "step": 2480 }, { "epoch": 1.9774765796292604, "grad_norm": 0.4631905727959778, "learning_rate": 2.8617281870074953e-05, "loss": 0.5378214120864868, "step": 2481 }, { "epoch": 1.9782738688459238, "grad_norm": 0.43317744943018455, "learning_rate": 2.8577583490853478e-05, "loss": 0.4289478659629822, "step": 2482 }, { "epoch": 1.9790711580625873, "grad_norm": 0.44107703447416474, "learning_rate": 2.8537901645450103e-05, "loss": 0.4695112109184265, "step": 2483 }, { "epoch": 1.9798684472792507, "grad_norm": 0.5228406003434833, "learning_rate": 2.8498236364491304e-05, "loss": 0.5446277856826782, "step": 2484 }, { "epoch": 1.980665736495914, "grad_norm": 0.46928677592171925, "learning_rate": 2.8458587678590752e-05, "loss": 0.443320631980896, "step": 2485 }, { "epoch": 1.9814630257125772, "grad_norm": 0.4816971230818061, "learning_rate": 2.8418955618349268e-05, "loss": 0.5046685338020325, "step": 2486 }, { "epoch": 1.9822603149292406, "grad_norm": 0.555986111041745, "learning_rate": 2.8379340214354878e-05, "loss": 0.557136058807373, "step": 2487 }, { "epoch": 1.983057604145904, "grad_norm": 0.44748840353044017, "learning_rate": 2.8339741497182774e-05, "loss": 0.4402580261230469, "step": 2488 }, { "epoch": 1.9838548933625673, "grad_norm": 0.48440989023272263, "learning_rate": 2.8300159497395197e-05, "loss": 0.5081099271774292, "step": 2489 }, { "epoch": 1.9846521825792305, "grad_norm": 0.4341392978668408, "learning_rate": 2.8260594245541548e-05, "loss": 0.40825772285461426, "step": 2490 }, { "epoch": 1.985449471795894, "grad_norm": 0.5290959241517884, "learning_rate": 2.8221045772158304e-05, "loss": 0.4534068703651428, "step": 2491 }, { "epoch": 1.9862467610125574, "grad_norm": 0.5387169692955452, "learning_rate": 2.8181514107768957e-05, "loss": 0.5812690258026123, "step": 2492 }, { "epoch": 1.9870440502292208, "grad_norm": 0.5136933651259985, "learning_rate": 2.8141999282884013e-05, "loss": 0.5082804560661316, "step": 2493 }, { "epoch": 1.987841339445884, "grad_norm": 0.5228396024138148, "learning_rate": 2.810250132800103e-05, "loss": 0.4913046061992645, "step": 2494 }, { "epoch": 1.9886386286625473, "grad_norm": 0.4389797836145636, "learning_rate": 2.806302027360454e-05, "loss": 0.44463908672332764, "step": 2495 }, { "epoch": 1.9894359178792107, "grad_norm": 0.4436857771950829, "learning_rate": 2.8023556150165985e-05, "loss": 0.49323809146881104, "step": 2496 }, { "epoch": 1.9902332070958741, "grad_norm": 0.4971869334798599, "learning_rate": 2.798410898814378e-05, "loss": 0.5950908660888672, "step": 2497 }, { "epoch": 1.9910304963125374, "grad_norm": 0.5422170994214504, "learning_rate": 2.794467881798325e-05, "loss": 0.45908403396606445, "step": 2498 }, { "epoch": 1.9918277855292006, "grad_norm": 0.5212066469533777, "learning_rate": 2.7905265670116604e-05, "loss": 0.5250855684280396, "step": 2499 }, { "epoch": 1.992625074745864, "grad_norm": 0.4732325552095432, "learning_rate": 2.7865869574962873e-05, "loss": 0.4916102886199951, "step": 2500 }, { "epoch": 1.9934223639625275, "grad_norm": 0.4763704759537067, "learning_rate": 2.782649056292797e-05, "loss": 0.5160295963287354, "step": 2501 }, { "epoch": 1.9942196531791907, "grad_norm": 0.4313761568459929, "learning_rate": 2.7787128664404638e-05, "loss": 0.40432077646255493, "step": 2502 }, { "epoch": 1.9950169423958541, "grad_norm": 0.5925771169636425, "learning_rate": 2.774778390977234e-05, "loss": 0.6630966663360596, "step": 2503 }, { "epoch": 1.9958142316125174, "grad_norm": 0.41530772181442277, "learning_rate": 2.770845632939738e-05, "loss": 0.42565053701400757, "step": 2504 }, { "epoch": 1.9966115208291808, "grad_norm": 0.4357239307463584, "learning_rate": 2.766914595363279e-05, "loss": 0.40726134181022644, "step": 2505 }, { "epoch": 1.9974088100458443, "grad_norm": 0.5023322374916439, "learning_rate": 2.762985281281828e-05, "loss": 0.48836299777030945, "step": 2506 }, { "epoch": 1.9982060992625075, "grad_norm": 0.45472118374840287, "learning_rate": 2.75905769372803e-05, "loss": 0.4257217049598694, "step": 2507 }, { "epoch": 1.9990033884791707, "grad_norm": 0.45416963420074946, "learning_rate": 2.7551318357331997e-05, "loss": 0.493725061416626, "step": 2508 }, { "epoch": 1.9998006776958341, "grad_norm": 0.4656672066250159, "learning_rate": 2.7512077103273094e-05, "loss": 0.40535202622413635, "step": 2509 }, { "epoch": 2.0, "grad_norm": 1.2046743521147603, "learning_rate": 2.7472853205389996e-05, "loss": 0.4107244908809662, "step": 2510 }, { "epoch": 2.0007972892166634, "grad_norm": 0.41240316238034136, "learning_rate": 2.743364669395574e-05, "loss": 0.3428141474723816, "step": 2511 }, { "epoch": 2.001594578433327, "grad_norm": 0.43546899804528627, "learning_rate": 2.739445759922985e-05, "loss": 0.38523298501968384, "step": 2512 }, { "epoch": 2.00239186764999, "grad_norm": 0.4373385913526956, "learning_rate": 2.7355285951458494e-05, "loss": 0.4717475175857544, "step": 2513 }, { "epoch": 2.0031891568666533, "grad_norm": 0.4144683859161786, "learning_rate": 2.731613178087436e-05, "loss": 0.43002188205718994, "step": 2514 }, { "epoch": 2.0039864460833168, "grad_norm": 0.43384086034766367, "learning_rate": 2.727699511769659e-05, "loss": 0.4284818470478058, "step": 2515 }, { "epoch": 2.00478373529998, "grad_norm": 0.46582551296547264, "learning_rate": 2.7237875992130877e-05, "loss": 0.44465625286102295, "step": 2516 }, { "epoch": 2.005581024516643, "grad_norm": 0.46235153829646036, "learning_rate": 2.7198774434369385e-05, "loss": 0.4255943298339844, "step": 2517 }, { "epoch": 2.0063783137333067, "grad_norm": 0.43265623639335915, "learning_rate": 2.715969047459066e-05, "loss": 0.3979130685329437, "step": 2518 }, { "epoch": 2.00717560294997, "grad_norm": 0.5158116071763137, "learning_rate": 2.7120624142959678e-05, "loss": 0.35068392753601074, "step": 2519 }, { "epoch": 2.0079728921666335, "grad_norm": 0.45582844583040116, "learning_rate": 2.708157546962785e-05, "loss": 0.4053882360458374, "step": 2520 }, { "epoch": 2.008770181383297, "grad_norm": 0.43662465167369524, "learning_rate": 2.7042544484732955e-05, "loss": 0.38947397470474243, "step": 2521 }, { "epoch": 2.00956747059996, "grad_norm": 0.5226490183101599, "learning_rate": 2.7003531218399068e-05, "loss": 0.43282005190849304, "step": 2522 }, { "epoch": 2.0103647598166234, "grad_norm": 0.4813971845687137, "learning_rate": 2.6964535700736644e-05, "loss": 0.38331854343414307, "step": 2523 }, { "epoch": 2.011162049033287, "grad_norm": 0.5160115839787668, "learning_rate": 2.692555796184243e-05, "loss": 0.44792672991752625, "step": 2524 }, { "epoch": 2.0119593382499503, "grad_norm": 0.5211570558024915, "learning_rate": 2.688659803179939e-05, "loss": 0.43156546354293823, "step": 2525 }, { "epoch": 2.0127566274666133, "grad_norm": 0.44483351906441543, "learning_rate": 2.684765594067684e-05, "loss": 0.4014327824115753, "step": 2526 }, { "epoch": 2.0135539166832768, "grad_norm": 0.5387794615867437, "learning_rate": 2.680873171853026e-05, "loss": 0.41500356793403625, "step": 2527 }, { "epoch": 2.01435120589994, "grad_norm": 0.503927623124635, "learning_rate": 2.6769825395401388e-05, "loss": 0.4107219874858856, "step": 2528 }, { "epoch": 2.0151484951166037, "grad_norm": 0.6023653867546285, "learning_rate": 2.6730937001318074e-05, "loss": 0.45825880765914917, "step": 2529 }, { "epoch": 2.015945784333267, "grad_norm": 0.49562080929627117, "learning_rate": 2.6692066566294392e-05, "loss": 0.33911916613578796, "step": 2530 }, { "epoch": 2.01674307354993, "grad_norm": 0.4909074038739314, "learning_rate": 2.665321412033057e-05, "loss": 0.39496809244155884, "step": 2531 }, { "epoch": 2.0175403627665935, "grad_norm": 0.5157129747487309, "learning_rate": 2.6614379693412873e-05, "loss": 0.39433348178863525, "step": 2532 }, { "epoch": 2.018337651983257, "grad_norm": 0.5464105676990172, "learning_rate": 2.657556331551373e-05, "loss": 0.4320933520793915, "step": 2533 }, { "epoch": 2.0191349411999204, "grad_norm": 0.5480176972608422, "learning_rate": 2.6536765016591624e-05, "loss": 0.41145041584968567, "step": 2534 }, { "epoch": 2.0199322304165834, "grad_norm": 0.5707239786870449, "learning_rate": 2.649798482659106e-05, "loss": 0.44036367535591125, "step": 2535 }, { "epoch": 2.020729519633247, "grad_norm": 0.5014269445782608, "learning_rate": 2.6459222775442582e-05, "loss": 0.3958403170108795, "step": 2536 }, { "epoch": 2.0215268088499103, "grad_norm": 0.5998683768652899, "learning_rate": 2.6420478893062763e-05, "loss": 0.44459664821624756, "step": 2537 }, { "epoch": 2.0223240980665738, "grad_norm": 0.5074711510452966, "learning_rate": 2.6381753209354086e-05, "loss": 0.36215275526046753, "step": 2538 }, { "epoch": 2.023121387283237, "grad_norm": 0.5740943171662394, "learning_rate": 2.6343045754205053e-05, "loss": 0.39484772086143494, "step": 2539 }, { "epoch": 2.0239186764999, "grad_norm": 0.5567260360721823, "learning_rate": 2.6304356557490084e-05, "loss": 0.42621713876724243, "step": 2540 }, { "epoch": 2.0247159657165636, "grad_norm": 0.5303270386501631, "learning_rate": 2.626568564906946e-05, "loss": 0.3913501799106598, "step": 2541 }, { "epoch": 2.025513254933227, "grad_norm": 0.5776776170842505, "learning_rate": 2.6227033058789408e-05, "loss": 0.40086644887924194, "step": 2542 }, { "epoch": 2.0263105441498905, "grad_norm": 0.4356308045215088, "learning_rate": 2.6188398816482006e-05, "loss": 0.34685078263282776, "step": 2543 }, { "epoch": 2.0271078333665535, "grad_norm": 0.5670352698332299, "learning_rate": 2.6149782951965136e-05, "loss": 0.4591815173625946, "step": 2544 }, { "epoch": 2.027905122583217, "grad_norm": 0.4425979166422807, "learning_rate": 2.61111854950425e-05, "loss": 0.3226779103279114, "step": 2545 }, { "epoch": 2.0287024117998804, "grad_norm": 0.532384746255433, "learning_rate": 2.6072606475503624e-05, "loss": 0.3806024491786957, "step": 2546 }, { "epoch": 2.029499701016544, "grad_norm": 0.48513364435540574, "learning_rate": 2.603404592312382e-05, "loss": 0.3849235475063324, "step": 2547 }, { "epoch": 2.0302969902332073, "grad_norm": 0.468163383583891, "learning_rate": 2.599550386766407e-05, "loss": 0.29992327094078064, "step": 2548 }, { "epoch": 2.0310942794498703, "grad_norm": 0.5181854132364339, "learning_rate": 2.595698033887115e-05, "loss": 0.4144233763217926, "step": 2549 }, { "epoch": 2.0318915686665338, "grad_norm": 0.5049224220750694, "learning_rate": 2.5918475366477533e-05, "loss": 0.4386288523674011, "step": 2550 }, { "epoch": 2.032688857883197, "grad_norm": 0.44346912518895854, "learning_rate": 2.587998898020131e-05, "loss": 0.336753785610199, "step": 2551 }, { "epoch": 2.0334861470998606, "grad_norm": 0.4814461973314408, "learning_rate": 2.5841521209746294e-05, "loss": 0.367242693901062, "step": 2552 }, { "epoch": 2.0342834363165236, "grad_norm": 0.5916948026423817, "learning_rate": 2.58030720848019e-05, "loss": 0.4798583686351776, "step": 2553 }, { "epoch": 2.035080725533187, "grad_norm": 0.5104684567272127, "learning_rate": 2.5764641635043175e-05, "loss": 0.4205506443977356, "step": 2554 }, { "epoch": 2.0358780147498505, "grad_norm": 0.5419082222672567, "learning_rate": 2.5726229890130706e-05, "loss": 0.4456963539123535, "step": 2555 }, { "epoch": 2.036675303966514, "grad_norm": 0.524257581140756, "learning_rate": 2.5687836879710682e-05, "loss": 0.31449246406555176, "step": 2556 }, { "epoch": 2.0374725931831774, "grad_norm": 0.5054535610244263, "learning_rate": 2.5649462633414845e-05, "loss": 0.44809433817863464, "step": 2557 }, { "epoch": 2.0382698823998404, "grad_norm": 0.5595146047616042, "learning_rate": 2.5611107180860395e-05, "loss": 0.4848392605781555, "step": 2558 }, { "epoch": 2.039067171616504, "grad_norm": 0.5800489978379789, "learning_rate": 2.5572770551650083e-05, "loss": 0.4121823310852051, "step": 2559 }, { "epoch": 2.0398644608331673, "grad_norm": 0.4866743511448262, "learning_rate": 2.553445277537213e-05, "loss": 0.36580416560173035, "step": 2560 }, { "epoch": 2.0406617500498307, "grad_norm": 0.4716051135032755, "learning_rate": 2.549615388160016e-05, "loss": 0.43307241797447205, "step": 2561 }, { "epoch": 2.0414590392664937, "grad_norm": 0.5657754616844002, "learning_rate": 2.5457873899893277e-05, "loss": 0.5126803517341614, "step": 2562 }, { "epoch": 2.042256328483157, "grad_norm": 0.5391129903971162, "learning_rate": 2.541961285979597e-05, "loss": 0.42378515005111694, "step": 2563 }, { "epoch": 2.0430536176998206, "grad_norm": 0.5387395735256747, "learning_rate": 2.5381370790838076e-05, "loss": 0.38468655943870544, "step": 2564 }, { "epoch": 2.043850906916484, "grad_norm": 0.4620143486243759, "learning_rate": 2.534314772253484e-05, "loss": 0.3651203215122223, "step": 2565 }, { "epoch": 2.0446481961331475, "grad_norm": 0.6071663880498281, "learning_rate": 2.530494368438683e-05, "loss": 0.4361479580402374, "step": 2566 }, { "epoch": 2.0454454853498105, "grad_norm": 0.4818891565221009, "learning_rate": 2.526675870587989e-05, "loss": 0.39769673347473145, "step": 2567 }, { "epoch": 2.046242774566474, "grad_norm": 0.4660427257991003, "learning_rate": 2.5228592816485184e-05, "loss": 0.3957539200782776, "step": 2568 }, { "epoch": 2.0470400637831374, "grad_norm": 0.7806810796714682, "learning_rate": 2.5190446045659183e-05, "loss": 0.35947561264038086, "step": 2569 }, { "epoch": 2.047837352999801, "grad_norm": 0.4820123587095753, "learning_rate": 2.5152318422843525e-05, "loss": 0.353276789188385, "step": 2570 }, { "epoch": 2.048634642216464, "grad_norm": 0.48898046340457035, "learning_rate": 2.5114209977465086e-05, "loss": 0.3053602874279022, "step": 2571 }, { "epoch": 2.0494319314331273, "grad_norm": 0.5132414427037506, "learning_rate": 2.5076120738935975e-05, "loss": 0.44966867566108704, "step": 2572 }, { "epoch": 2.0502292206497907, "grad_norm": 0.5788069537312582, "learning_rate": 2.503805073665349e-05, "loss": 0.47166502475738525, "step": 2573 }, { "epoch": 2.051026509866454, "grad_norm": 0.5976830501094383, "learning_rate": 2.500000000000001e-05, "loss": 0.3952937424182892, "step": 2574 }, { "epoch": 2.051823799083117, "grad_norm": 0.504277743181379, "learning_rate": 2.4961968558343107e-05, "loss": 0.38925060629844666, "step": 2575 }, { "epoch": 2.0526210882997806, "grad_norm": 0.47466141803516115, "learning_rate": 2.4923956441035467e-05, "loss": 0.3781808018684387, "step": 2576 }, { "epoch": 2.053418377516444, "grad_norm": 0.5444749399612051, "learning_rate": 2.4885963677414787e-05, "loss": 0.3193039894104004, "step": 2577 }, { "epoch": 2.0542156667331075, "grad_norm": 0.5317729453319402, "learning_rate": 2.4847990296803908e-05, "loss": 0.3838326334953308, "step": 2578 }, { "epoch": 2.055012955949771, "grad_norm": 0.508646273372026, "learning_rate": 2.4810036328510693e-05, "loss": 0.36821654438972473, "step": 2579 }, { "epoch": 2.055810245166434, "grad_norm": 0.5002780820910881, "learning_rate": 2.4772101801827967e-05, "loss": 0.41470637917518616, "step": 2580 }, { "epoch": 2.0566075343830974, "grad_norm": 0.5139196535290627, "learning_rate": 2.4734186746033626e-05, "loss": 0.4429609477519989, "step": 2581 }, { "epoch": 2.057404823599761, "grad_norm": 0.48358436524076287, "learning_rate": 2.469629119039049e-05, "loss": 0.33906492590904236, "step": 2582 }, { "epoch": 2.0582021128164243, "grad_norm": 0.5026937650737402, "learning_rate": 2.465841516414637e-05, "loss": 0.38362929224967957, "step": 2583 }, { "epoch": 2.0589994020330873, "grad_norm": 0.5710228637718929, "learning_rate": 2.462055869653394e-05, "loss": 0.47473788261413574, "step": 2584 }, { "epoch": 2.0597966912497507, "grad_norm": 0.6161269877382713, "learning_rate": 2.458272181677083e-05, "loss": 0.42480340600013733, "step": 2585 }, { "epoch": 2.060593980466414, "grad_norm": 0.4823416904018156, "learning_rate": 2.4544904554059566e-05, "loss": 0.36649107933044434, "step": 2586 }, { "epoch": 2.0613912696830776, "grad_norm": 0.5273604621392073, "learning_rate": 2.450710693758746e-05, "loss": 0.42881065607070923, "step": 2587 }, { "epoch": 2.062188558899741, "grad_norm": 0.5869803985837986, "learning_rate": 2.446932899652673e-05, "loss": 0.4627222716808319, "step": 2588 }, { "epoch": 2.062985848116404, "grad_norm": 0.5222524318336329, "learning_rate": 2.4431570760034406e-05, "loss": 0.3709850609302521, "step": 2589 }, { "epoch": 2.0637831373330675, "grad_norm": 0.4894403279658339, "learning_rate": 2.4393832257252252e-05, "loss": 0.3839566111564636, "step": 2590 }, { "epoch": 2.064580426549731, "grad_norm": 0.5509070076101923, "learning_rate": 2.4356113517306854e-05, "loss": 0.3647838830947876, "step": 2591 }, { "epoch": 2.0653777157663944, "grad_norm": 0.621581146926722, "learning_rate": 2.4318414569309556e-05, "loss": 0.5094685554504395, "step": 2592 }, { "epoch": 2.0661750049830574, "grad_norm": 0.5438063166062009, "learning_rate": 2.4280735442356355e-05, "loss": 0.42001157999038696, "step": 2593 }, { "epoch": 2.066972294199721, "grad_norm": 0.5236151126043059, "learning_rate": 2.4243076165528018e-05, "loss": 0.3945579528808594, "step": 2594 }, { "epoch": 2.0677695834163843, "grad_norm": 0.45768163796891426, "learning_rate": 2.4205436767889992e-05, "loss": 0.2978895902633667, "step": 2595 }, { "epoch": 2.0685668726330477, "grad_norm": 0.5272270518248873, "learning_rate": 2.416781727849234e-05, "loss": 0.3511428236961365, "step": 2596 }, { "epoch": 2.069364161849711, "grad_norm": 0.5177247879923098, "learning_rate": 2.413021772636975e-05, "loss": 0.46484190225601196, "step": 2597 }, { "epoch": 2.070161451066374, "grad_norm": 0.5284224656642545, "learning_rate": 2.4092638140541585e-05, "loss": 0.4351509213447571, "step": 2598 }, { "epoch": 2.0709587402830376, "grad_norm": 0.5353699973505789, "learning_rate": 2.4055078550011774e-05, "loss": 0.39285412430763245, "step": 2599 }, { "epoch": 2.071756029499701, "grad_norm": 0.5187119559368576, "learning_rate": 2.401753898376876e-05, "loss": 0.392058789730072, "step": 2600 }, { "epoch": 2.0725533187163645, "grad_norm": 0.5649333075974022, "learning_rate": 2.3980019470785607e-05, "loss": 0.44884344935417175, "step": 2601 }, { "epoch": 2.0733506079330275, "grad_norm": 0.538059973065819, "learning_rate": 2.394252004001989e-05, "loss": 0.3256934881210327, "step": 2602 }, { "epoch": 2.074147897149691, "grad_norm": 0.462476080789869, "learning_rate": 2.3905040720413612e-05, "loss": 0.4072318971157074, "step": 2603 }, { "epoch": 2.0749451863663544, "grad_norm": 0.5536275677289484, "learning_rate": 2.3867581540893336e-05, "loss": 0.4606662392616272, "step": 2604 }, { "epoch": 2.075742475583018, "grad_norm": 0.56000088276469, "learning_rate": 2.383014253037007e-05, "loss": 0.41388893127441406, "step": 2605 }, { "epoch": 2.0765397647996813, "grad_norm": 0.5281082662897822, "learning_rate": 2.3792723717739195e-05, "loss": 0.46552443504333496, "step": 2606 }, { "epoch": 2.0773370540163443, "grad_norm": 0.4884810846800001, "learning_rate": 2.3755325131880575e-05, "loss": 0.3863532841205597, "step": 2607 }, { "epoch": 2.0781343432330077, "grad_norm": 0.576082879618922, "learning_rate": 2.3717946801658413e-05, "loss": 0.42493295669555664, "step": 2608 }, { "epoch": 2.078931632449671, "grad_norm": 0.5114632553217519, "learning_rate": 2.368058875592134e-05, "loss": 0.3314210772514343, "step": 2609 }, { "epoch": 2.0797289216663346, "grad_norm": 0.5124554914507401, "learning_rate": 2.3643251023502244e-05, "loss": 0.366494357585907, "step": 2610 }, { "epoch": 2.0805262108829976, "grad_norm": 0.52627659620517, "learning_rate": 2.3605933633218396e-05, "loss": 0.3743566870689392, "step": 2611 }, { "epoch": 2.081323500099661, "grad_norm": 0.4528688744764385, "learning_rate": 2.3568636613871376e-05, "loss": 0.3208684027194977, "step": 2612 }, { "epoch": 2.0821207893163245, "grad_norm": 0.6573013025686952, "learning_rate": 2.3531359994246983e-05, "loss": 0.45893874764442444, "step": 2613 }, { "epoch": 2.082918078532988, "grad_norm": 0.6176064613709428, "learning_rate": 2.349410380311532e-05, "loss": 0.3915063738822937, "step": 2614 }, { "epoch": 2.0837153677496514, "grad_norm": 0.6247934481379668, "learning_rate": 2.3456868069230732e-05, "loss": 0.3985448181629181, "step": 2615 }, { "epoch": 2.0845126569663144, "grad_norm": 0.46247669327608565, "learning_rate": 2.3419652821331715e-05, "loss": 0.3464108407497406, "step": 2616 }, { "epoch": 2.085309946182978, "grad_norm": 0.5908066190356446, "learning_rate": 2.338245808814102e-05, "loss": 0.44872939586639404, "step": 2617 }, { "epoch": 2.0861072353996413, "grad_norm": 0.5349866100397311, "learning_rate": 2.3345283898365538e-05, "loss": 0.42108604311943054, "step": 2618 }, { "epoch": 2.0869045246163047, "grad_norm": 0.5819463656437797, "learning_rate": 2.3308130280696283e-05, "loss": 0.4191708266735077, "step": 2619 }, { "epoch": 2.0877018138329677, "grad_norm": 0.5092921328035112, "learning_rate": 2.3270997263808418e-05, "loss": 0.36356139183044434, "step": 2620 }, { "epoch": 2.088499103049631, "grad_norm": 0.5143812491483, "learning_rate": 2.3233884876361227e-05, "loss": 0.3849013149738312, "step": 2621 }, { "epoch": 2.0892963922662946, "grad_norm": 0.5527134072851602, "learning_rate": 2.319679314699801e-05, "loss": 0.40358030796051025, "step": 2622 }, { "epoch": 2.090093681482958, "grad_norm": 0.5010509444894016, "learning_rate": 2.3159722104346187e-05, "loss": 0.37265026569366455, "step": 2623 }, { "epoch": 2.0908909706996215, "grad_norm": 0.5762935018468084, "learning_rate": 2.3122671777017152e-05, "loss": 0.4606088399887085, "step": 2624 }, { "epoch": 2.0916882599162845, "grad_norm": 0.589611230981757, "learning_rate": 2.3085642193606378e-05, "loss": 0.38967767357826233, "step": 2625 }, { "epoch": 2.092485549132948, "grad_norm": 0.5831275115064749, "learning_rate": 2.304863338269326e-05, "loss": 0.4304373562335968, "step": 2626 }, { "epoch": 2.0932828383496114, "grad_norm": 0.5024687206982632, "learning_rate": 2.30116453728412e-05, "loss": 0.3423686623573303, "step": 2627 }, { "epoch": 2.094080127566275, "grad_norm": 0.5693360463036162, "learning_rate": 2.297467819259757e-05, "loss": 0.43288201093673706, "step": 2628 }, { "epoch": 2.094877416782938, "grad_norm": 0.506950872868089, "learning_rate": 2.2937731870493595e-05, "loss": 0.41645950078964233, "step": 2629 }, { "epoch": 2.0956747059996013, "grad_norm": 0.4983086260893431, "learning_rate": 2.290080643504446e-05, "loss": 0.29266685247421265, "step": 2630 }, { "epoch": 2.0964719952162647, "grad_norm": 0.54868980341535, "learning_rate": 2.286390191474922e-05, "loss": 0.3841140866279602, "step": 2631 }, { "epoch": 2.097269284432928, "grad_norm": 0.5933675819537605, "learning_rate": 2.282701833809075e-05, "loss": 0.43266761302948, "step": 2632 }, { "epoch": 2.0980665736495916, "grad_norm": 0.4669480217645435, "learning_rate": 2.2790155733535812e-05, "loss": 0.35650816559791565, "step": 2633 }, { "epoch": 2.0988638628662546, "grad_norm": 0.5194237979235766, "learning_rate": 2.275331412953497e-05, "loss": 0.33917534351348877, "step": 2634 }, { "epoch": 2.099661152082918, "grad_norm": 0.5941239328886242, "learning_rate": 2.2716493554522532e-05, "loss": 0.4139540493488312, "step": 2635 }, { "epoch": 2.1004584412995815, "grad_norm": 0.5370268038946518, "learning_rate": 2.267969403691664e-05, "loss": 0.3891141712665558, "step": 2636 }, { "epoch": 2.101255730516245, "grad_norm": 0.630752157011966, "learning_rate": 2.2642915605119146e-05, "loss": 0.43457967042922974, "step": 2637 }, { "epoch": 2.102053019732908, "grad_norm": 0.5302685930392742, "learning_rate": 2.260615828751566e-05, "loss": 0.4122369587421417, "step": 2638 }, { "epoch": 2.1028503089495714, "grad_norm": 0.4541344342059693, "learning_rate": 2.2569422112475446e-05, "loss": 0.32668498158454895, "step": 2639 }, { "epoch": 2.103647598166235, "grad_norm": 0.5073261747739701, "learning_rate": 2.2532707108351487e-05, "loss": 0.36279910802841187, "step": 2640 }, { "epoch": 2.1044448873828983, "grad_norm": 0.5108899959110472, "learning_rate": 2.249601330348044e-05, "loss": 0.41195499897003174, "step": 2641 }, { "epoch": 2.1052421765995617, "grad_norm": 1.271158477781097, "learning_rate": 2.245934072618254e-05, "loss": 0.39594122767448425, "step": 2642 }, { "epoch": 2.1060394658162247, "grad_norm": 0.514709956191584, "learning_rate": 2.24226894047617e-05, "loss": 0.3497564196586609, "step": 2643 }, { "epoch": 2.106836755032888, "grad_norm": 0.5219222672583844, "learning_rate": 2.2386059367505424e-05, "loss": 0.3678882122039795, "step": 2644 }, { "epoch": 2.1076340442495516, "grad_norm": 0.5402568344711822, "learning_rate": 2.234945064268472e-05, "loss": 0.4104117155075073, "step": 2645 }, { "epoch": 2.108431333466215, "grad_norm": 0.5157002850109877, "learning_rate": 2.2312863258554235e-05, "loss": 0.3972019553184509, "step": 2646 }, { "epoch": 2.109228622682878, "grad_norm": 0.4625796690532155, "learning_rate": 2.2276297243352112e-05, "loss": 0.293329656124115, "step": 2647 }, { "epoch": 2.1100259118995415, "grad_norm": 0.5790533918133622, "learning_rate": 2.2239752625299965e-05, "loss": 0.39169007539749146, "step": 2648 }, { "epoch": 2.110823201116205, "grad_norm": 0.5376475125529137, "learning_rate": 2.2203229432602956e-05, "loss": 0.4136650562286377, "step": 2649 }, { "epoch": 2.1116204903328684, "grad_norm": 0.5467294584240464, "learning_rate": 2.2166727693449644e-05, "loss": 0.3384951055049896, "step": 2650 }, { "epoch": 2.112417779549532, "grad_norm": 0.5191433301265876, "learning_rate": 2.213024743601212e-05, "loss": 0.37342745065689087, "step": 2651 }, { "epoch": 2.113215068766195, "grad_norm": 0.5506991869018356, "learning_rate": 2.2093788688445783e-05, "loss": 0.43812963366508484, "step": 2652 }, { "epoch": 2.1140123579828582, "grad_norm": 0.5668370559533044, "learning_rate": 2.2057351478889514e-05, "loss": 0.38999953866004944, "step": 2653 }, { "epoch": 2.1148096471995217, "grad_norm": 0.5788098221899525, "learning_rate": 2.2020935835465567e-05, "loss": 0.4646732211112976, "step": 2654 }, { "epoch": 2.115606936416185, "grad_norm": 0.5117726286046119, "learning_rate": 2.19845417862795e-05, "loss": 0.37137240171432495, "step": 2655 }, { "epoch": 2.116404225632848, "grad_norm": 0.4639011656759293, "learning_rate": 2.194816935942024e-05, "loss": 0.3630253076553345, "step": 2656 }, { "epoch": 2.1172015148495116, "grad_norm": 0.5937625775117759, "learning_rate": 2.1911818582960053e-05, "loss": 0.409474641084671, "step": 2657 }, { "epoch": 2.117998804066175, "grad_norm": 0.5627251675743291, "learning_rate": 2.187548948495442e-05, "loss": 0.3952358067035675, "step": 2658 }, { "epoch": 2.1187960932828385, "grad_norm": 0.47469503942739383, "learning_rate": 2.183918209344215e-05, "loss": 0.3196803331375122, "step": 2659 }, { "epoch": 2.1195933824995015, "grad_norm": 0.6256078514267795, "learning_rate": 2.1802896436445303e-05, "loss": 0.43055450916290283, "step": 2660 }, { "epoch": 2.120390671716165, "grad_norm": 0.5742304126889781, "learning_rate": 2.1766632541969112e-05, "loss": 0.40075796842575073, "step": 2661 }, { "epoch": 2.1211879609328284, "grad_norm": 0.5128387660611565, "learning_rate": 2.173039043800206e-05, "loss": 0.33446675539016724, "step": 2662 }, { "epoch": 2.121985250149492, "grad_norm": 0.5797385744192208, "learning_rate": 2.169417015251579e-05, "loss": 0.38519155979156494, "step": 2663 }, { "epoch": 2.1227825393661552, "grad_norm": 0.4846822259574801, "learning_rate": 2.1657971713465147e-05, "loss": 0.3646356165409088, "step": 2664 }, { "epoch": 2.1235798285828182, "grad_norm": 0.5279625581095404, "learning_rate": 2.162179514878804e-05, "loss": 0.3881179690361023, "step": 2665 }, { "epoch": 2.1243771177994817, "grad_norm": 0.6300772644283922, "learning_rate": 2.1585640486405544e-05, "loss": 0.4458649158477783, "step": 2666 }, { "epoch": 2.125174407016145, "grad_norm": 0.5474692436316542, "learning_rate": 2.1549507754221853e-05, "loss": 0.4045790731906891, "step": 2667 }, { "epoch": 2.1259716962328086, "grad_norm": 0.5749466482793693, "learning_rate": 2.1513396980124167e-05, "loss": 0.5015423893928528, "step": 2668 }, { "epoch": 2.1267689854494716, "grad_norm": 0.5759180540529796, "learning_rate": 2.1477308191982793e-05, "loss": 0.442526638507843, "step": 2669 }, { "epoch": 2.127566274666135, "grad_norm": 0.6683918091172545, "learning_rate": 2.144124141765107e-05, "loss": 0.41758713126182556, "step": 2670 }, { "epoch": 2.1283635638827985, "grad_norm": 0.6156454591269864, "learning_rate": 2.140519668496529e-05, "loss": 0.41928428411483765, "step": 2671 }, { "epoch": 2.129160853099462, "grad_norm": 0.5829291342952122, "learning_rate": 2.1369174021744797e-05, "loss": 0.4588075876235962, "step": 2672 }, { "epoch": 2.1299581423161253, "grad_norm": 0.5518713299793397, "learning_rate": 2.133317345579189e-05, "loss": 0.3675617575645447, "step": 2673 }, { "epoch": 2.1307554315327883, "grad_norm": 0.5901793022676101, "learning_rate": 2.1297195014891767e-05, "loss": 0.42004847526550293, "step": 2674 }, { "epoch": 2.131552720749452, "grad_norm": 0.5275443782655415, "learning_rate": 2.1261238726812626e-05, "loss": 0.3397993743419647, "step": 2675 }, { "epoch": 2.1323500099661152, "grad_norm": 0.48076246148958046, "learning_rate": 2.122530461930548e-05, "loss": 0.3341997265815735, "step": 2676 }, { "epoch": 2.1331472991827787, "grad_norm": 0.5213179279893753, "learning_rate": 2.1189392720104307e-05, "loss": 0.30850547552108765, "step": 2677 }, { "epoch": 2.1339445883994417, "grad_norm": 0.6256085466539845, "learning_rate": 2.115350305692587e-05, "loss": 0.5216628313064575, "step": 2678 }, { "epoch": 2.134741877616105, "grad_norm": 0.5832040492341503, "learning_rate": 2.111763565746983e-05, "loss": 0.4113471806049347, "step": 2679 }, { "epoch": 2.1355391668327686, "grad_norm": 0.5969380788483487, "learning_rate": 2.108179054941865e-05, "loss": 0.46123507618904114, "step": 2680 }, { "epoch": 2.136336456049432, "grad_norm": 0.5128959418549647, "learning_rate": 2.104596776043755e-05, "loss": 0.4124094247817993, "step": 2681 }, { "epoch": 2.1371337452660955, "grad_norm": 0.5460213517837159, "learning_rate": 2.1010167318174568e-05, "loss": 0.36438897252082825, "step": 2682 }, { "epoch": 2.1379310344827585, "grad_norm": 0.6032457469583524, "learning_rate": 2.0974389250260503e-05, "loss": 0.49179187417030334, "step": 2683 }, { "epoch": 2.138728323699422, "grad_norm": 0.5803500897452231, "learning_rate": 2.0938633584308816e-05, "loss": 0.4333716034889221, "step": 2684 }, { "epoch": 2.1395256129160853, "grad_norm": 0.5324707908562752, "learning_rate": 2.0902900347915753e-05, "loss": 0.40017205476760864, "step": 2685 }, { "epoch": 2.140322902132749, "grad_norm": 0.49636546496685957, "learning_rate": 2.086718956866024e-05, "loss": 0.3574758768081665, "step": 2686 }, { "epoch": 2.141120191349412, "grad_norm": 0.5322638947995143, "learning_rate": 2.0831501274103804e-05, "loss": 0.437277227640152, "step": 2687 }, { "epoch": 2.1419174805660752, "grad_norm": 0.51982758158586, "learning_rate": 2.07958354917907e-05, "loss": 0.3655397593975067, "step": 2688 }, { "epoch": 2.1427147697827387, "grad_norm": 0.45370962944971027, "learning_rate": 2.076019224924778e-05, "loss": 0.3046844005584717, "step": 2689 }, { "epoch": 2.143512058999402, "grad_norm": 0.5063329639258715, "learning_rate": 2.0724571573984462e-05, "loss": 0.3711724281311035, "step": 2690 }, { "epoch": 2.1443093482160656, "grad_norm": 0.45553413262001585, "learning_rate": 2.0688973493492797e-05, "loss": 0.35046327114105225, "step": 2691 }, { "epoch": 2.1451066374327286, "grad_norm": 0.5797724724714121, "learning_rate": 2.0653398035247378e-05, "loss": 0.3769068419933319, "step": 2692 }, { "epoch": 2.145903926649392, "grad_norm": 0.5124449446955325, "learning_rate": 2.061784522670535e-05, "loss": 0.32739606499671936, "step": 2693 }, { "epoch": 2.1467012158660554, "grad_norm": 0.45497989121087234, "learning_rate": 2.0582315095306344e-05, "loss": 0.28550460934638977, "step": 2694 }, { "epoch": 2.147498505082719, "grad_norm": 0.5879055016562771, "learning_rate": 2.0546807668472517e-05, "loss": 0.4850584864616394, "step": 2695 }, { "epoch": 2.148295794299382, "grad_norm": 0.4405423588599789, "learning_rate": 2.0511322973608514e-05, "loss": 0.3045344650745392, "step": 2696 }, { "epoch": 2.1490930835160453, "grad_norm": 0.538425163525223, "learning_rate": 2.0475861038101397e-05, "loss": 0.3863498270511627, "step": 2697 }, { "epoch": 2.1498903727327088, "grad_norm": 0.4276229310991066, "learning_rate": 2.0440421889320677e-05, "loss": 0.28554943203926086, "step": 2698 }, { "epoch": 2.150687661949372, "grad_norm": 0.5635586123337913, "learning_rate": 2.0405005554618318e-05, "loss": 0.35471951961517334, "step": 2699 }, { "epoch": 2.1514849511660357, "grad_norm": 0.6097289286870908, "learning_rate": 2.036961206132861e-05, "loss": 0.4218328297138214, "step": 2700 }, { "epoch": 2.1522822403826987, "grad_norm": 0.5729183187508651, "learning_rate": 2.0334241436768248e-05, "loss": 0.40059277415275574, "step": 2701 }, { "epoch": 2.153079529599362, "grad_norm": 0.5854049071406139, "learning_rate": 2.0298893708236306e-05, "loss": 0.3839888870716095, "step": 2702 }, { "epoch": 2.1538768188160256, "grad_norm": 0.659505793572343, "learning_rate": 2.026356890301413e-05, "loss": 0.5049005746841431, "step": 2703 }, { "epoch": 2.154674108032689, "grad_norm": 0.5649084257113214, "learning_rate": 2.022826704836539e-05, "loss": 0.40705758333206177, "step": 2704 }, { "epoch": 2.155471397249352, "grad_norm": 0.5514168490528895, "learning_rate": 2.0192988171536066e-05, "loss": 0.38438206911087036, "step": 2705 }, { "epoch": 2.1562686864660154, "grad_norm": 0.6308498556704932, "learning_rate": 2.0157732299754412e-05, "loss": 0.4591555595397949, "step": 2706 }, { "epoch": 2.157065975682679, "grad_norm": 0.5048117079968224, "learning_rate": 2.012249946023086e-05, "loss": 0.322212278842926, "step": 2707 }, { "epoch": 2.1578632648993423, "grad_norm": 0.5532540249608238, "learning_rate": 2.008728968015814e-05, "loss": 0.3814984858036041, "step": 2708 }, { "epoch": 2.1586605541160058, "grad_norm": 0.4854261493680422, "learning_rate": 2.0052102986711163e-05, "loss": 0.2675703167915344, "step": 2709 }, { "epoch": 2.1594578433326688, "grad_norm": 0.5849652257779875, "learning_rate": 2.0016939407046987e-05, "loss": 0.4476856291294098, "step": 2710 }, { "epoch": 2.160255132549332, "grad_norm": 0.5383870942218962, "learning_rate": 1.998179896830487e-05, "loss": 0.3587649166584015, "step": 2711 }, { "epoch": 2.1610524217659957, "grad_norm": 0.5997967771952034, "learning_rate": 1.9946681697606223e-05, "loss": 0.38662731647491455, "step": 2712 }, { "epoch": 2.161849710982659, "grad_norm": 0.5693074239877208, "learning_rate": 1.9911587622054507e-05, "loss": 0.45611563324928284, "step": 2713 }, { "epoch": 2.162647000199322, "grad_norm": 0.561212114502945, "learning_rate": 1.987651676873534e-05, "loss": 0.4437008202075958, "step": 2714 }, { "epoch": 2.1634442894159855, "grad_norm": 0.6667499243227296, "learning_rate": 1.984146916471643e-05, "loss": 0.4090425372123718, "step": 2715 }, { "epoch": 2.164241578632649, "grad_norm": 0.5795162492999137, "learning_rate": 1.9806444837047477e-05, "loss": 0.4230348765850067, "step": 2716 }, { "epoch": 2.1650388678493124, "grad_norm": 0.6243758446058871, "learning_rate": 1.977144381276026e-05, "loss": 0.4497097432613373, "step": 2717 }, { "epoch": 2.165836157065976, "grad_norm": 0.5820151498441836, "learning_rate": 1.9736466118868576e-05, "loss": 0.4388848543167114, "step": 2718 }, { "epoch": 2.166633446282639, "grad_norm": 0.5562279694921366, "learning_rate": 1.970151178236821e-05, "loss": 0.4167141318321228, "step": 2719 }, { "epoch": 2.1674307354993023, "grad_norm": 0.5299488816010552, "learning_rate": 1.9666580830236885e-05, "loss": 0.40935230255126953, "step": 2720 }, { "epoch": 2.1682280247159658, "grad_norm": 0.5761887444604019, "learning_rate": 1.9631673289434326e-05, "loss": 0.43036776781082153, "step": 2721 }, { "epoch": 2.169025313932629, "grad_norm": 0.5148913565288216, "learning_rate": 1.9596789186902182e-05, "loss": 0.36561453342437744, "step": 2722 }, { "epoch": 2.169822603149292, "grad_norm": 0.5749897497793935, "learning_rate": 1.9561928549563968e-05, "loss": 0.3901434540748596, "step": 2723 }, { "epoch": 2.1706198923659556, "grad_norm": 0.6057513245751737, "learning_rate": 1.9527091404325126e-05, "loss": 0.45776236057281494, "step": 2724 }, { "epoch": 2.171417181582619, "grad_norm": 0.5684923015473723, "learning_rate": 1.9492277778072986e-05, "loss": 0.38395604491233826, "step": 2725 }, { "epoch": 2.1722144707992825, "grad_norm": 0.5481330976302399, "learning_rate": 1.9457487697676667e-05, "loss": 0.4334850013256073, "step": 2726 }, { "epoch": 2.173011760015946, "grad_norm": 0.5705852534305699, "learning_rate": 1.942272118998716e-05, "loss": 0.40701931715011597, "step": 2727 }, { "epoch": 2.173809049232609, "grad_norm": 0.6185752516925933, "learning_rate": 1.9387978281837272e-05, "loss": 0.4217440187931061, "step": 2728 }, { "epoch": 2.1746063384492724, "grad_norm": 0.6195079364967139, "learning_rate": 1.9353259000041553e-05, "loss": 0.4124920964241028, "step": 2729 }, { "epoch": 2.175403627665936, "grad_norm": 0.5631719666983294, "learning_rate": 1.9318563371396324e-05, "loss": 0.3671838939189911, "step": 2730 }, { "epoch": 2.1762009168825993, "grad_norm": 0.535270709308157, "learning_rate": 1.928389142267969e-05, "loss": 0.41167357563972473, "step": 2731 }, { "epoch": 2.1769982060992623, "grad_norm": 0.579366976641574, "learning_rate": 1.9249243180651466e-05, "loss": 0.41126832365989685, "step": 2732 }, { "epoch": 2.1777954953159258, "grad_norm": 0.6257525135297837, "learning_rate": 1.921461867205313e-05, "loss": 0.42171216011047363, "step": 2733 }, { "epoch": 2.178592784532589, "grad_norm": 0.5549426302345742, "learning_rate": 1.9180017923607886e-05, "loss": 0.3953387141227722, "step": 2734 }, { "epoch": 2.1793900737492526, "grad_norm": 0.5710256461084555, "learning_rate": 1.9145440962020605e-05, "loss": 0.3895798325538635, "step": 2735 }, { "epoch": 2.180187362965916, "grad_norm": 0.5331500047515881, "learning_rate": 1.9110887813977752e-05, "loss": 0.35467809438705444, "step": 2736 }, { "epoch": 2.180984652182579, "grad_norm": 0.5386519983929249, "learning_rate": 1.9076358506147456e-05, "loss": 0.37931028008461, "step": 2737 }, { "epoch": 2.1817819413992425, "grad_norm": 0.5304765513186183, "learning_rate": 1.904185306517945e-05, "loss": 0.41350311040878296, "step": 2738 }, { "epoch": 2.182579230615906, "grad_norm": 0.4606835628832249, "learning_rate": 1.900737151770499e-05, "loss": 0.26186102628707886, "step": 2739 }, { "epoch": 2.1833765198325694, "grad_norm": 0.5107746422813567, "learning_rate": 1.8972913890336953e-05, "loss": 0.34940436482429504, "step": 2740 }, { "epoch": 2.1841738090492324, "grad_norm": 0.5744583484649819, "learning_rate": 1.8938480209669745e-05, "loss": 0.46034830808639526, "step": 2741 }, { "epoch": 2.184971098265896, "grad_norm": 0.5385425091613817, "learning_rate": 1.890407050227924e-05, "loss": 0.3952460289001465, "step": 2742 }, { "epoch": 2.1857683874825593, "grad_norm": 0.557917478492134, "learning_rate": 1.886968479472287e-05, "loss": 0.4314436614513397, "step": 2743 }, { "epoch": 2.1865656766992227, "grad_norm": 0.530183614093757, "learning_rate": 1.8835323113539527e-05, "loss": 0.3548696041107178, "step": 2744 }, { "epoch": 2.187362965915886, "grad_norm": 0.4476688693314649, "learning_rate": 1.8800985485249523e-05, "loss": 0.28766554594039917, "step": 2745 }, { "epoch": 2.188160255132549, "grad_norm": 0.4636967173471016, "learning_rate": 1.8766671936354647e-05, "loss": 0.32750290632247925, "step": 2746 }, { "epoch": 2.1889575443492126, "grad_norm": 0.561116749779508, "learning_rate": 1.8732382493338095e-05, "loss": 0.44000816345214844, "step": 2747 }, { "epoch": 2.189754833565876, "grad_norm": 0.5511562910819595, "learning_rate": 1.8698117182664477e-05, "loss": 0.3881528377532959, "step": 2748 }, { "epoch": 2.1905521227825395, "grad_norm": 0.6082878758938792, "learning_rate": 1.8663876030779714e-05, "loss": 0.4727729558944702, "step": 2749 }, { "epoch": 2.1913494119992025, "grad_norm": 0.5748319903238123, "learning_rate": 1.8629659064111138e-05, "loss": 0.3744213879108429, "step": 2750 }, { "epoch": 2.192146701215866, "grad_norm": 0.5179939367279912, "learning_rate": 1.8595466309067424e-05, "loss": 0.3846222162246704, "step": 2751 }, { "epoch": 2.1929439904325294, "grad_norm": 0.6120490622456095, "learning_rate": 1.856129779203849e-05, "loss": 0.3880252242088318, "step": 2752 }, { "epoch": 2.193741279649193, "grad_norm": 0.5982630621889573, "learning_rate": 1.8527153539395618e-05, "loss": 0.4227294921875, "step": 2753 }, { "epoch": 2.1945385688658563, "grad_norm": 0.5790253176850915, "learning_rate": 1.849303357749136e-05, "loss": 0.40795934200286865, "step": 2754 }, { "epoch": 2.1953358580825193, "grad_norm": 0.47573014811375863, "learning_rate": 1.845893793265947e-05, "loss": 0.3512038588523865, "step": 2755 }, { "epoch": 2.1961331472991827, "grad_norm": 0.5477360808821913, "learning_rate": 1.842486663121495e-05, "loss": 0.4017782211303711, "step": 2756 }, { "epoch": 2.196930436515846, "grad_norm": 0.607323178931381, "learning_rate": 1.8390819699454037e-05, "loss": 0.47922125458717346, "step": 2757 }, { "epoch": 2.1977277257325096, "grad_norm": 0.6073355735905097, "learning_rate": 1.835679716365417e-05, "loss": 0.4466491937637329, "step": 2758 }, { "epoch": 2.1985250149491726, "grad_norm": 0.5268497784047205, "learning_rate": 1.8322799050073912e-05, "loss": 0.38180193305015564, "step": 2759 }, { "epoch": 2.199322304165836, "grad_norm": 0.5219875754607022, "learning_rate": 1.8288825384953007e-05, "loss": 0.3383881747722626, "step": 2760 }, { "epoch": 2.2001195933824995, "grad_norm": 0.6228713075835057, "learning_rate": 1.8254876194512342e-05, "loss": 0.48487919569015503, "step": 2761 }, { "epoch": 2.200916882599163, "grad_norm": 0.5549012639835401, "learning_rate": 1.822095150495387e-05, "loss": 0.4684241712093353, "step": 2762 }, { "epoch": 2.2017141718158264, "grad_norm": 0.5703629558048608, "learning_rate": 1.8187051342460676e-05, "loss": 0.4601238965988159, "step": 2763 }, { "epoch": 2.2025114610324894, "grad_norm": 0.48212402698442625, "learning_rate": 1.815317573319692e-05, "loss": 0.37218180298805237, "step": 2764 }, { "epoch": 2.203308750249153, "grad_norm": 0.5581281695790354, "learning_rate": 1.8119324703307757e-05, "loss": 0.43681493401527405, "step": 2765 }, { "epoch": 2.2041060394658163, "grad_norm": 0.5071648211076474, "learning_rate": 1.808549827891942e-05, "loss": 0.37181341648101807, "step": 2766 }, { "epoch": 2.2049033286824797, "grad_norm": 0.5503374128852235, "learning_rate": 1.8051696486139162e-05, "loss": 0.3734795153141022, "step": 2767 }, { "epoch": 2.2057006178991427, "grad_norm": 0.5088093273826164, "learning_rate": 1.801791935105516e-05, "loss": 0.3798905313014984, "step": 2768 }, { "epoch": 2.206497907115806, "grad_norm": 0.5493705079456173, "learning_rate": 1.7984166899736638e-05, "loss": 0.3927134573459625, "step": 2769 }, { "epoch": 2.2072951963324696, "grad_norm": 0.5430052404211366, "learning_rate": 1.7950439158233727e-05, "loss": 0.35710638761520386, "step": 2770 }, { "epoch": 2.208092485549133, "grad_norm": 0.5902624310054316, "learning_rate": 1.7916736152577474e-05, "loss": 0.4824860692024231, "step": 2771 }, { "epoch": 2.2088897747657965, "grad_norm": 0.6286604616794016, "learning_rate": 1.788305790877986e-05, "loss": 0.44355839490890503, "step": 2772 }, { "epoch": 2.2096870639824595, "grad_norm": 0.5598181503362175, "learning_rate": 1.784940445283376e-05, "loss": 0.40347838401794434, "step": 2773 }, { "epoch": 2.210484353199123, "grad_norm": 0.6176140811306661, "learning_rate": 1.781577581071292e-05, "loss": 0.43095070123672485, "step": 2774 }, { "epoch": 2.2112816424157864, "grad_norm": 0.6695942962768733, "learning_rate": 1.7782172008371888e-05, "loss": 0.4177628457546234, "step": 2775 }, { "epoch": 2.21207893163245, "grad_norm": 0.5100357078143489, "learning_rate": 1.774859307174609e-05, "loss": 0.3581973910331726, "step": 2776 }, { "epoch": 2.212876220849113, "grad_norm": 0.5143096578090365, "learning_rate": 1.7715039026751756e-05, "loss": 0.38256362080574036, "step": 2777 }, { "epoch": 2.2136735100657763, "grad_norm": 0.5582572129756274, "learning_rate": 1.768150989928587e-05, "loss": 0.38917869329452515, "step": 2778 }, { "epoch": 2.2144707992824397, "grad_norm": 0.5736219936483484, "learning_rate": 1.764800571522622e-05, "loss": 0.38434284925460815, "step": 2779 }, { "epoch": 2.215268088499103, "grad_norm": 0.551140126507011, "learning_rate": 1.7614526500431344e-05, "loss": 0.4052828550338745, "step": 2780 }, { "epoch": 2.2160653777157666, "grad_norm": 0.5368829346821329, "learning_rate": 1.7581072280740478e-05, "loss": 0.44304049015045166, "step": 2781 }, { "epoch": 2.2168626669324296, "grad_norm": 0.5292146339406234, "learning_rate": 1.754764308197358e-05, "loss": 0.3999442756175995, "step": 2782 }, { "epoch": 2.217659956149093, "grad_norm": 0.8141362247155031, "learning_rate": 1.7514238929931303e-05, "loss": 0.4291272461414337, "step": 2783 }, { "epoch": 2.2184572453657565, "grad_norm": 0.558665561856897, "learning_rate": 1.7480859850394992e-05, "loss": 0.3593091368675232, "step": 2784 }, { "epoch": 2.21925453458242, "grad_norm": 0.5385233804782704, "learning_rate": 1.7447505869126577e-05, "loss": 0.3999871611595154, "step": 2785 }, { "epoch": 2.220051823799083, "grad_norm": 0.4826669532275527, "learning_rate": 1.741417701186868e-05, "loss": 0.3300282955169678, "step": 2786 }, { "epoch": 2.2208491130157464, "grad_norm": 0.5111735243334631, "learning_rate": 1.7380873304344526e-05, "loss": 0.35488763451576233, "step": 2787 }, { "epoch": 2.22164640223241, "grad_norm": 0.5731955039019221, "learning_rate": 1.7347594772257865e-05, "loss": 0.41247230768203735, "step": 2788 }, { "epoch": 2.2224436914490733, "grad_norm": 0.5553361315239153, "learning_rate": 1.7314341441293093e-05, "loss": 0.3917520046234131, "step": 2789 }, { "epoch": 2.2232409806657367, "grad_norm": 0.6321180021599002, "learning_rate": 1.728111333711514e-05, "loss": 0.39506474137306213, "step": 2790 }, { "epoch": 2.2240382698823997, "grad_norm": 0.6310636692233432, "learning_rate": 1.7247910485369417e-05, "loss": 0.4454514980316162, "step": 2791 }, { "epoch": 2.224835559099063, "grad_norm": 0.5726558819700394, "learning_rate": 1.7214732911681897e-05, "loss": 0.39044690132141113, "step": 2792 }, { "epoch": 2.2256328483157266, "grad_norm": 0.5509099119006109, "learning_rate": 1.7181580641659045e-05, "loss": 0.34556734561920166, "step": 2793 }, { "epoch": 2.22643013753239, "grad_norm": 0.59339407202227, "learning_rate": 1.7148453700887744e-05, "loss": 0.38913241028785706, "step": 2794 }, { "epoch": 2.227227426749053, "grad_norm": 0.5777637199026481, "learning_rate": 1.711535211493538e-05, "loss": 0.38675594329833984, "step": 2795 }, { "epoch": 2.2280247159657165, "grad_norm": 0.5962813687550252, "learning_rate": 1.708227590934977e-05, "loss": 0.42789044976234436, "step": 2796 }, { "epoch": 2.22882200518238, "grad_norm": 0.6271598921953613, "learning_rate": 1.704922510965909e-05, "loss": 0.41925328969955444, "step": 2797 }, { "epoch": 2.2296192943990434, "grad_norm": 0.5420943338698522, "learning_rate": 1.701619974137196e-05, "loss": 0.37794625759124756, "step": 2798 }, { "epoch": 2.230416583615707, "grad_norm": 0.5676366268125934, "learning_rate": 1.698319982997737e-05, "loss": 0.3948010802268982, "step": 2799 }, { "epoch": 2.23121387283237, "grad_norm": 0.5185874076002887, "learning_rate": 1.6950225400944615e-05, "loss": 0.316039502620697, "step": 2800 }, { "epoch": 2.2320111620490333, "grad_norm": 0.5417598631779073, "learning_rate": 1.691727647972337e-05, "loss": 0.3967132568359375, "step": 2801 }, { "epoch": 2.2328084512656967, "grad_norm": 0.5276477908879026, "learning_rate": 1.688435309174361e-05, "loss": 0.4037865996360779, "step": 2802 }, { "epoch": 2.23360574048236, "grad_norm": 0.6195561523934606, "learning_rate": 1.685145526241562e-05, "loss": 0.43581750988960266, "step": 2803 }, { "epoch": 2.234403029699023, "grad_norm": 0.5041618299936564, "learning_rate": 1.6818583017129902e-05, "loss": 0.36653393507003784, "step": 2804 }, { "epoch": 2.2352003189156866, "grad_norm": 0.5830465011984654, "learning_rate": 1.678573638125727e-05, "loss": 0.40140533447265625, "step": 2805 }, { "epoch": 2.23599760813235, "grad_norm": 0.5471291161884551, "learning_rate": 1.675291538014877e-05, "loss": 0.4165595471858978, "step": 2806 }, { "epoch": 2.2367948973490135, "grad_norm": 0.5429305859944348, "learning_rate": 1.6720120039135627e-05, "loss": 0.37880945205688477, "step": 2807 }, { "epoch": 2.237592186565677, "grad_norm": 0.587522587513771, "learning_rate": 1.6687350383529267e-05, "loss": 0.4531378149986267, "step": 2808 }, { "epoch": 2.23838947578234, "grad_norm": 0.5728160478543182, "learning_rate": 1.665460643862132e-05, "loss": 0.3768921196460724, "step": 2809 }, { "epoch": 2.2391867649990034, "grad_norm": 0.5583550430451676, "learning_rate": 1.6621888229683573e-05, "loss": 0.37107565999031067, "step": 2810 }, { "epoch": 2.239984054215667, "grad_norm": 0.5469332185280069, "learning_rate": 1.6589195781967908e-05, "loss": 0.3483424186706543, "step": 2811 }, { "epoch": 2.2407813434323303, "grad_norm": 0.5510407026342198, "learning_rate": 1.6556529120706365e-05, "loss": 0.38066989183425903, "step": 2812 }, { "epoch": 2.2415786326489933, "grad_norm": 0.534397679952964, "learning_rate": 1.652388827111109e-05, "loss": 0.3942036032676697, "step": 2813 }, { "epoch": 2.2423759218656567, "grad_norm": 0.5772938497159597, "learning_rate": 1.649127325837424e-05, "loss": 0.42870569229125977, "step": 2814 }, { "epoch": 2.24317321108232, "grad_norm": 0.5326576341456252, "learning_rate": 1.6458684107668105e-05, "loss": 0.3696829080581665, "step": 2815 }, { "epoch": 2.2439705002989836, "grad_norm": 0.48767842498476327, "learning_rate": 1.6426120844145e-05, "loss": 0.312699556350708, "step": 2816 }, { "epoch": 2.2447677895156466, "grad_norm": 0.5506589914218389, "learning_rate": 1.6393583492937216e-05, "loss": 0.3853512108325958, "step": 2817 }, { "epoch": 2.24556507873231, "grad_norm": 0.47357502612051733, "learning_rate": 1.636107207915709e-05, "loss": 0.32332268357276917, "step": 2818 }, { "epoch": 2.2463623679489735, "grad_norm": 0.563928402367261, "learning_rate": 1.6328586627896948e-05, "loss": 0.37901246547698975, "step": 2819 }, { "epoch": 2.247159657165637, "grad_norm": 0.5917805208774932, "learning_rate": 1.6296127164229018e-05, "loss": 0.41805964708328247, "step": 2820 }, { "epoch": 2.2479569463823004, "grad_norm": 0.5582759471378356, "learning_rate": 1.626369371320553e-05, "loss": 0.435054749250412, "step": 2821 }, { "epoch": 2.2487542355989634, "grad_norm": 0.4953211009163747, "learning_rate": 1.6231286299858635e-05, "loss": 0.38018473982810974, "step": 2822 }, { "epoch": 2.249551524815627, "grad_norm": 0.5757901563608385, "learning_rate": 1.6198904949200333e-05, "loss": 0.3656071424484253, "step": 2823 }, { "epoch": 2.2503488140322903, "grad_norm": 0.5269305399434708, "learning_rate": 1.6166549686222566e-05, "loss": 0.3583618998527527, "step": 2824 }, { "epoch": 2.2511461032489537, "grad_norm": 0.63310012564342, "learning_rate": 1.6134220535897133e-05, "loss": 0.4208824634552002, "step": 2825 }, { "epoch": 2.251943392465617, "grad_norm": 0.6133170500133097, "learning_rate": 1.610191752317564e-05, "loss": 0.4704321622848511, "step": 2826 }, { "epoch": 2.25274068168228, "grad_norm": 0.5830722443447859, "learning_rate": 1.606964067298956e-05, "loss": 0.4011741876602173, "step": 2827 }, { "epoch": 2.2535379708989436, "grad_norm": 0.5952049512644862, "learning_rate": 1.603739001025017e-05, "loss": 0.414475679397583, "step": 2828 }, { "epoch": 2.254335260115607, "grad_norm": 0.543643171102606, "learning_rate": 1.6005165559848535e-05, "loss": 0.3882423937320709, "step": 2829 }, { "epoch": 2.2551325493322705, "grad_norm": 0.5809083788088892, "learning_rate": 1.5972967346655448e-05, "loss": 0.3856762647628784, "step": 2830 }, { "epoch": 2.2559298385489335, "grad_norm": 0.46172029497864797, "learning_rate": 1.5940795395521507e-05, "loss": 0.33134588599205017, "step": 2831 }, { "epoch": 2.256727127765597, "grad_norm": 0.6442268148900728, "learning_rate": 1.590864973127703e-05, "loss": 0.46707379817962646, "step": 2832 }, { "epoch": 2.2575244169822604, "grad_norm": 0.478800192497805, "learning_rate": 1.587653037873203e-05, "loss": 0.3438432812690735, "step": 2833 }, { "epoch": 2.258321706198924, "grad_norm": 0.5230734895554935, "learning_rate": 1.584443736267619e-05, "loss": 0.3691922426223755, "step": 2834 }, { "epoch": 2.2591189954155872, "grad_norm": 0.6187482026123748, "learning_rate": 1.581237070787892e-05, "loss": 0.3899630904197693, "step": 2835 }, { "epoch": 2.2599162846322502, "grad_norm": 0.5449316930110454, "learning_rate": 1.578033043908928e-05, "loss": 0.363080233335495, "step": 2836 }, { "epoch": 2.2607135738489137, "grad_norm": 0.4901616052378673, "learning_rate": 1.5748316581035898e-05, "loss": 0.3352287709712982, "step": 2837 }, { "epoch": 2.261510863065577, "grad_norm": 0.5228530511701286, "learning_rate": 1.5716329158427094e-05, "loss": 0.3580567538738251, "step": 2838 }, { "epoch": 2.26230815228224, "grad_norm": 0.5298931123460856, "learning_rate": 1.568436819595077e-05, "loss": 0.4160604774951935, "step": 2839 }, { "epoch": 2.2631054414989036, "grad_norm": 0.5296994829481922, "learning_rate": 1.5652433718274357e-05, "loss": 0.31693851947784424, "step": 2840 }, { "epoch": 2.263902730715567, "grad_norm": 0.605390267390386, "learning_rate": 1.5620525750044902e-05, "loss": 0.46627917885780334, "step": 2841 }, { "epoch": 2.2647000199322305, "grad_norm": 0.5305296938862121, "learning_rate": 1.558864431588898e-05, "loss": 0.31110161542892456, "step": 2842 }, { "epoch": 2.265497309148894, "grad_norm": 0.5943522064717666, "learning_rate": 1.555678944041265e-05, "loss": 0.4232354760169983, "step": 2843 }, { "epoch": 2.2662945983655574, "grad_norm": 0.6112962589990563, "learning_rate": 1.552496114820151e-05, "loss": 0.43727168440818787, "step": 2844 }, { "epoch": 2.2670918875822204, "grad_norm": 0.8568186422470632, "learning_rate": 1.5493159463820657e-05, "loss": 0.45860618352890015, "step": 2845 }, { "epoch": 2.267889176798884, "grad_norm": 0.4898363258817632, "learning_rate": 1.546138441181459e-05, "loss": 0.28965821862220764, "step": 2846 }, { "epoch": 2.2686864660155472, "grad_norm": 0.5756958478312746, "learning_rate": 1.5429636016707305e-05, "loss": 0.45079687237739563, "step": 2847 }, { "epoch": 2.2694837552322102, "grad_norm": 0.5593991039254461, "learning_rate": 1.5397914303002227e-05, "loss": 0.33118879795074463, "step": 2848 }, { "epoch": 2.2702810444488737, "grad_norm": 0.48778349435634966, "learning_rate": 1.536621929518214e-05, "loss": 0.3099055290222168, "step": 2849 }, { "epoch": 2.271078333665537, "grad_norm": 0.4603023752498812, "learning_rate": 1.5334551017709253e-05, "loss": 0.30477216839790344, "step": 2850 }, { "epoch": 2.2718756228822006, "grad_norm": 0.6241901878786048, "learning_rate": 1.5302909495025164e-05, "loss": 0.4720269441604614, "step": 2851 }, { "epoch": 2.272672912098864, "grad_norm": 0.6297807209185678, "learning_rate": 1.5271294751550763e-05, "loss": 0.5060440301895142, "step": 2852 }, { "epoch": 2.273470201315527, "grad_norm": 0.6176126161358992, "learning_rate": 1.5239706811686322e-05, "loss": 0.4575200378894806, "step": 2853 }, { "epoch": 2.2742674905321905, "grad_norm": 0.5545739423509098, "learning_rate": 1.5208145699811415e-05, "loss": 0.3675934076309204, "step": 2854 }, { "epoch": 2.275064779748854, "grad_norm": 0.593051858633469, "learning_rate": 1.5176611440284883e-05, "loss": 0.3924582600593567, "step": 2855 }, { "epoch": 2.2758620689655173, "grad_norm": 0.5247861446963671, "learning_rate": 1.514510405744487e-05, "loss": 0.35661131143569946, "step": 2856 }, { "epoch": 2.2766593581821803, "grad_norm": 0.6461404730860926, "learning_rate": 1.5113623575608776e-05, "loss": 0.5116153955459595, "step": 2857 }, { "epoch": 2.277456647398844, "grad_norm": 0.5899130005498685, "learning_rate": 1.508217001907325e-05, "loss": 0.4860977530479431, "step": 2858 }, { "epoch": 2.2782539366155072, "grad_norm": 0.612324314312676, "learning_rate": 1.5050743412114122e-05, "loss": 0.3525826930999756, "step": 2859 }, { "epoch": 2.2790512258321707, "grad_norm": 0.5881972415522854, "learning_rate": 1.5019343778986428e-05, "loss": 0.3500008285045624, "step": 2860 }, { "epoch": 2.279848515048834, "grad_norm": 0.6159406288775567, "learning_rate": 1.4987971143924428e-05, "loss": 0.46199631690979004, "step": 2861 }, { "epoch": 2.280645804265497, "grad_norm": 0.5978451332481352, "learning_rate": 1.4956625531141493e-05, "loss": 0.4181378483772278, "step": 2862 }, { "epoch": 2.2814430934821606, "grad_norm": 0.5843915259531329, "learning_rate": 1.4925306964830182e-05, "loss": 0.3929179310798645, "step": 2863 }, { "epoch": 2.282240382698824, "grad_norm": 0.5290126098205694, "learning_rate": 1.4894015469162149e-05, "loss": 0.3527885675430298, "step": 2864 }, { "epoch": 2.2830376719154875, "grad_norm": 0.5568609773348054, "learning_rate": 1.486275106828819e-05, "loss": 0.40574362874031067, "step": 2865 }, { "epoch": 2.2838349611321505, "grad_norm": 0.5647538894332601, "learning_rate": 1.4831513786338125e-05, "loss": 0.4108649790287018, "step": 2866 }, { "epoch": 2.284632250348814, "grad_norm": 0.48190194959743665, "learning_rate": 1.480030364742091e-05, "loss": 0.3691405653953552, "step": 2867 }, { "epoch": 2.2854295395654773, "grad_norm": 0.4414990215064448, "learning_rate": 1.4769120675624536e-05, "loss": 0.2977346181869507, "step": 2868 }, { "epoch": 2.286226828782141, "grad_norm": 0.5966235113825042, "learning_rate": 1.4737964895015988e-05, "loss": 0.4111267626285553, "step": 2869 }, { "epoch": 2.2870241179988042, "grad_norm": 0.5585317584902911, "learning_rate": 1.4706836329641311e-05, "loss": 0.3858644366264343, "step": 2870 }, { "epoch": 2.2878214072154672, "grad_norm": 0.588887984784563, "learning_rate": 1.4675735003525532e-05, "loss": 0.37877923250198364, "step": 2871 }, { "epoch": 2.2886186964321307, "grad_norm": 0.5786833193953466, "learning_rate": 1.4644660940672627e-05, "loss": 0.3732781410217285, "step": 2872 }, { "epoch": 2.289415985648794, "grad_norm": 0.6774937266571471, "learning_rate": 1.461361416506557e-05, "loss": 0.46353936195373535, "step": 2873 }, { "epoch": 2.2902132748654576, "grad_norm": 0.5353369074445453, "learning_rate": 1.4582594700666274e-05, "loss": 0.3865011930465698, "step": 2874 }, { "epoch": 2.2910105640821206, "grad_norm": 0.546477856411973, "learning_rate": 1.4551602571415518e-05, "loss": 0.42576709389686584, "step": 2875 }, { "epoch": 2.291807853298784, "grad_norm": 0.5471154430615921, "learning_rate": 1.452063780123305e-05, "loss": 0.4166170060634613, "step": 2876 }, { "epoch": 2.2926051425154474, "grad_norm": 0.6200180482047334, "learning_rate": 1.4489700414017488e-05, "loss": 0.41361820697784424, "step": 2877 }, { "epoch": 2.293402431732111, "grad_norm": 0.5056331325913241, "learning_rate": 1.4458790433646263e-05, "loss": 0.40239766240119934, "step": 2878 }, { "epoch": 2.2941997209487743, "grad_norm": 0.6073516080658152, "learning_rate": 1.4427907883975722e-05, "loss": 0.42955130338668823, "step": 2879 }, { "epoch": 2.2949970101654373, "grad_norm": 0.49609426992615585, "learning_rate": 1.439705278884102e-05, "loss": 0.3386748731136322, "step": 2880 }, { "epoch": 2.295794299382101, "grad_norm": 0.44928635673350337, "learning_rate": 1.4366225172056086e-05, "loss": 0.283164918422699, "step": 2881 }, { "epoch": 2.296591588598764, "grad_norm": 0.5375721437810609, "learning_rate": 1.4335425057413687e-05, "loss": 0.3540676236152649, "step": 2882 }, { "epoch": 2.2973888778154277, "grad_norm": 0.5925850232922619, "learning_rate": 1.4304652468685348e-05, "loss": 0.49057528376579285, "step": 2883 }, { "epoch": 2.2981861670320907, "grad_norm": 0.5449426535862454, "learning_rate": 1.4273907429621363e-05, "loss": 0.34001868963241577, "step": 2884 }, { "epoch": 2.298983456248754, "grad_norm": 0.49571928398860593, "learning_rate": 1.4243189963950715e-05, "loss": 0.3510063886642456, "step": 2885 }, { "epoch": 2.2997807454654176, "grad_norm": 0.5147949145284724, "learning_rate": 1.4212500095381177e-05, "loss": 0.37198689579963684, "step": 2886 }, { "epoch": 2.300578034682081, "grad_norm": 0.6882456468091729, "learning_rate": 1.4181837847599162e-05, "loss": 0.5025501251220703, "step": 2887 }, { "epoch": 2.3013753238987444, "grad_norm": 0.5655689878424237, "learning_rate": 1.4151203244269785e-05, "loss": 0.3788100779056549, "step": 2888 }, { "epoch": 2.3021726131154074, "grad_norm": 0.6193764172032198, "learning_rate": 1.4120596309036827e-05, "loss": 0.4454669654369354, "step": 2889 }, { "epoch": 2.302969902332071, "grad_norm": 0.5445004930118158, "learning_rate": 1.4090017065522732e-05, "loss": 0.37039023637771606, "step": 2890 }, { "epoch": 2.3037671915487343, "grad_norm": 0.5676467362357331, "learning_rate": 1.4059465537328565e-05, "loss": 0.3852652311325073, "step": 2891 }, { "epoch": 2.3045644807653978, "grad_norm": 0.46910170562870956, "learning_rate": 1.4028941748033963e-05, "loss": 0.33894360065460205, "step": 2892 }, { "epoch": 2.3053617699820608, "grad_norm": 0.5627720118487195, "learning_rate": 1.3998445721197195e-05, "loss": 0.3806559145450592, "step": 2893 }, { "epoch": 2.306159059198724, "grad_norm": 0.5572232160804469, "learning_rate": 1.3967977480355105e-05, "loss": 0.4116613268852234, "step": 2894 }, { "epoch": 2.3069563484153877, "grad_norm": 0.6210072674554983, "learning_rate": 1.3937537049023048e-05, "loss": 0.401995450258255, "step": 2895 }, { "epoch": 2.307753637632051, "grad_norm": 0.5722636924808752, "learning_rate": 1.3907124450694959e-05, "loss": 0.38010725378990173, "step": 2896 }, { "epoch": 2.3085509268487145, "grad_norm": 0.6570508642626222, "learning_rate": 1.3876739708843295e-05, "loss": 0.4776437282562256, "step": 2897 }, { "epoch": 2.3093482160653775, "grad_norm": 0.5767133454578435, "learning_rate": 1.3846382846918965e-05, "loss": 0.39437779784202576, "step": 2898 }, { "epoch": 2.310145505282041, "grad_norm": 0.5031023461021745, "learning_rate": 1.3816053888351405e-05, "loss": 0.3631185293197632, "step": 2899 }, { "epoch": 2.3109427944987044, "grad_norm": 0.5650366935423546, "learning_rate": 1.3785752856548512e-05, "loss": 0.3529535233974457, "step": 2900 }, { "epoch": 2.311740083715368, "grad_norm": 0.5407031412783161, "learning_rate": 1.3755479774896596e-05, "loss": 0.3156425356864929, "step": 2901 }, { "epoch": 2.312537372932031, "grad_norm": 0.5475389983054122, "learning_rate": 1.3725234666760428e-05, "loss": 0.38539668917655945, "step": 2902 }, { "epoch": 2.3133346621486943, "grad_norm": 0.5704033177604744, "learning_rate": 1.3695017555483192e-05, "loss": 0.39023658633232117, "step": 2903 }, { "epoch": 2.3141319513653578, "grad_norm": 0.5675362002715041, "learning_rate": 1.366482846438642e-05, "loss": 0.40497857332229614, "step": 2904 }, { "epoch": 2.314929240582021, "grad_norm": 0.5974154781267562, "learning_rate": 1.363466741677007e-05, "loss": 0.43929851055145264, "step": 2905 }, { "epoch": 2.3157265297986847, "grad_norm": 0.5695435619022592, "learning_rate": 1.3604534435912441e-05, "loss": 0.3497362732887268, "step": 2906 }, { "epoch": 2.3165238190153477, "grad_norm": 0.5817786985980871, "learning_rate": 1.357442954507014e-05, "loss": 0.38760825991630554, "step": 2907 }, { "epoch": 2.317321108232011, "grad_norm": 0.47192119520220205, "learning_rate": 1.354435276747813e-05, "loss": 0.3510738015174866, "step": 2908 }, { "epoch": 2.3181183974486745, "grad_norm": 0.5402360875092989, "learning_rate": 1.3514304126349675e-05, "loss": 0.32054564356803894, "step": 2909 }, { "epoch": 2.318915686665338, "grad_norm": 0.5326064982727523, "learning_rate": 1.3484283644876289e-05, "loss": 0.3505931496620178, "step": 2910 }, { "epoch": 2.319712975882001, "grad_norm": 0.5507476562916237, "learning_rate": 1.3454291346227788e-05, "loss": 0.36956140398979187, "step": 2911 }, { "epoch": 2.3205102650986644, "grad_norm": 0.5499428169774911, "learning_rate": 1.3424327253552232e-05, "loss": 0.39960503578186035, "step": 2912 }, { "epoch": 2.321307554315328, "grad_norm": 0.7050377455673807, "learning_rate": 1.3394391389975902e-05, "loss": 0.4693441092967987, "step": 2913 }, { "epoch": 2.3221048435319913, "grad_norm": 0.5498842770501616, "learning_rate": 1.336448377860327e-05, "loss": 0.404085636138916, "step": 2914 }, { "epoch": 2.3229021327486548, "grad_norm": 0.49849714346417856, "learning_rate": 1.333460444251704e-05, "loss": 0.3466857671737671, "step": 2915 }, { "epoch": 2.3236994219653178, "grad_norm": 0.5694739414296545, "learning_rate": 1.3304753404778097e-05, "loss": 0.3781156539916992, "step": 2916 }, { "epoch": 2.324496711181981, "grad_norm": 0.4828991470343684, "learning_rate": 1.3274930688425441e-05, "loss": 0.34231865406036377, "step": 2917 }, { "epoch": 2.3252940003986446, "grad_norm": 0.5727026051056524, "learning_rate": 1.3245136316476252e-05, "loss": 0.42990028858184814, "step": 2918 }, { "epoch": 2.326091289615308, "grad_norm": 0.5358519911291761, "learning_rate": 1.3215370311925822e-05, "loss": 0.3291170299053192, "step": 2919 }, { "epoch": 2.326888578831971, "grad_norm": 0.44846738577274, "learning_rate": 1.3185632697747568e-05, "loss": 0.3219546675682068, "step": 2920 }, { "epoch": 2.3276858680486345, "grad_norm": 0.6420663087141895, "learning_rate": 1.315592349689294e-05, "loss": 0.42693230509757996, "step": 2921 }, { "epoch": 2.328483157265298, "grad_norm": 0.46074746815846224, "learning_rate": 1.3126242732291516e-05, "loss": 0.2859739065170288, "step": 2922 }, { "epoch": 2.3292804464819614, "grad_norm": 0.5383811234278226, "learning_rate": 1.3096590426850914e-05, "loss": 0.4229266941547394, "step": 2923 }, { "epoch": 2.330077735698625, "grad_norm": 0.5269632254761971, "learning_rate": 1.3066966603456759e-05, "loss": 0.34208184480667114, "step": 2924 }, { "epoch": 2.330875024915288, "grad_norm": 0.6739077190955846, "learning_rate": 1.3037371284972721e-05, "loss": 0.4796290993690491, "step": 2925 }, { "epoch": 2.3316723141319513, "grad_norm": 0.6500898379893539, "learning_rate": 1.3007804494240478e-05, "loss": 0.48221486806869507, "step": 2926 }, { "epoch": 2.3324696033486148, "grad_norm": 0.5081254424459621, "learning_rate": 1.2978266254079651e-05, "loss": 0.3847363591194153, "step": 2927 }, { "epoch": 2.333266892565278, "grad_norm": 0.554256887806846, "learning_rate": 1.2948756587287863e-05, "loss": 0.3488319218158722, "step": 2928 }, { "epoch": 2.334064181781941, "grad_norm": 0.5563322235119603, "learning_rate": 1.2919275516640684e-05, "loss": 0.43148624897003174, "step": 2929 }, { "epoch": 2.3348614709986046, "grad_norm": 0.6205749175401128, "learning_rate": 1.2889823064891564e-05, "loss": 0.40979060530662537, "step": 2930 }, { "epoch": 2.335658760215268, "grad_norm": 0.5003674957309654, "learning_rate": 1.2860399254771927e-05, "loss": 0.3681198060512543, "step": 2931 }, { "epoch": 2.3364560494319315, "grad_norm": 0.5791342879234376, "learning_rate": 1.2831004108991079e-05, "loss": 0.4154352843761444, "step": 2932 }, { "epoch": 2.337253338648595, "grad_norm": 0.5162670844070265, "learning_rate": 1.2801637650236154e-05, "loss": 0.34559178352355957, "step": 2933 }, { "epoch": 2.338050627865258, "grad_norm": 0.5225861985498856, "learning_rate": 1.2772299901172197e-05, "loss": 0.3808538317680359, "step": 2934 }, { "epoch": 2.3388479170819214, "grad_norm": 0.47904397439072854, "learning_rate": 1.2742990884442097e-05, "loss": 0.29383960366249084, "step": 2935 }, { "epoch": 2.339645206298585, "grad_norm": 0.475056680859015, "learning_rate": 1.2713710622666519e-05, "loss": 0.3090176582336426, "step": 2936 }, { "epoch": 2.3404424955152483, "grad_norm": 0.6246519575159549, "learning_rate": 1.268445913844397e-05, "loss": 0.459750235080719, "step": 2937 }, { "epoch": 2.3412397847319113, "grad_norm": 0.558905695809203, "learning_rate": 1.2655236454350777e-05, "loss": 0.3917646110057831, "step": 2938 }, { "epoch": 2.3420370739485747, "grad_norm": 0.5885876152295155, "learning_rate": 1.2626042592940968e-05, "loss": 0.3999767005443573, "step": 2939 }, { "epoch": 2.342834363165238, "grad_norm": 0.6783080699231474, "learning_rate": 1.2596877576746358e-05, "loss": 0.4681207537651062, "step": 2940 }, { "epoch": 2.3436316523819016, "grad_norm": 0.7005542006969058, "learning_rate": 1.2567741428276513e-05, "loss": 0.5284163355827332, "step": 2941 }, { "epoch": 2.344428941598565, "grad_norm": 0.5303667712764476, "learning_rate": 1.2538634170018727e-05, "loss": 0.3935745060443878, "step": 2942 }, { "epoch": 2.345226230815228, "grad_norm": 0.5685216215871621, "learning_rate": 1.250955582443794e-05, "loss": 0.377205491065979, "step": 2943 }, { "epoch": 2.3460235200318915, "grad_norm": 0.5459826188803208, "learning_rate": 1.2480506413976845e-05, "loss": 0.3941291272640228, "step": 2944 }, { "epoch": 2.346820809248555, "grad_norm": 0.5506406649340152, "learning_rate": 1.2451485961055759e-05, "loss": 0.3423709571361542, "step": 2945 }, { "epoch": 2.3476180984652184, "grad_norm": 0.5690461816659802, "learning_rate": 1.2422494488072678e-05, "loss": 0.4043852388858795, "step": 2946 }, { "epoch": 2.3484153876818814, "grad_norm": 0.5822947497794906, "learning_rate": 1.2393532017403198e-05, "loss": 0.403683602809906, "step": 2947 }, { "epoch": 2.349212676898545, "grad_norm": 0.6181939451960975, "learning_rate": 1.236459857140056e-05, "loss": 0.4372858703136444, "step": 2948 }, { "epoch": 2.3500099661152083, "grad_norm": 0.6048486841900437, "learning_rate": 1.2335694172395606e-05, "loss": 0.4158094823360443, "step": 2949 }, { "epoch": 2.3508072553318717, "grad_norm": 0.6523675399695452, "learning_rate": 1.2306818842696716e-05, "loss": 0.39452141523361206, "step": 2950 }, { "epoch": 2.351604544548535, "grad_norm": 0.6164520961567749, "learning_rate": 1.2277972604589888e-05, "loss": 0.3464305102825165, "step": 2951 }, { "epoch": 2.352401833765198, "grad_norm": 0.519456600262302, "learning_rate": 1.2249155480338654e-05, "loss": 0.417542427778244, "step": 2952 }, { "epoch": 2.3531991229818616, "grad_norm": 0.5654466905319446, "learning_rate": 1.2220367492184038e-05, "loss": 0.31955111026763916, "step": 2953 }, { "epoch": 2.353996412198525, "grad_norm": 0.5315754763377101, "learning_rate": 1.2191608662344617e-05, "loss": 0.33018922805786133, "step": 2954 }, { "epoch": 2.3547937014151885, "grad_norm": 0.5191395944314494, "learning_rate": 1.216287901301647e-05, "loss": 0.41029950976371765, "step": 2955 }, { "epoch": 2.3555909906318515, "grad_norm": 0.5922996285090982, "learning_rate": 1.2134178566373111e-05, "loss": 0.4194818437099457, "step": 2956 }, { "epoch": 2.356388279848515, "grad_norm": 0.47429790216412065, "learning_rate": 1.2105507344565548e-05, "loss": 0.35975363850593567, "step": 2957 }, { "epoch": 2.3571855690651784, "grad_norm": 0.5212011302712196, "learning_rate": 1.2076865369722245e-05, "loss": 0.34535032510757446, "step": 2958 }, { "epoch": 2.357982858281842, "grad_norm": 0.4924475755461846, "learning_rate": 1.2048252663949039e-05, "loss": 0.26523104310035706, "step": 2959 }, { "epoch": 2.3587801474985053, "grad_norm": 0.5566021124229725, "learning_rate": 1.2019669249329235e-05, "loss": 0.37449178099632263, "step": 2960 }, { "epoch": 2.3595774367151683, "grad_norm": 0.5949357217891699, "learning_rate": 1.199111514792352e-05, "loss": 0.42326435446739197, "step": 2961 }, { "epoch": 2.3603747259318317, "grad_norm": 0.5114379858145126, "learning_rate": 1.1962590381769922e-05, "loss": 0.39657795429229736, "step": 2962 }, { "epoch": 2.361172015148495, "grad_norm": 0.5815021001730115, "learning_rate": 1.1934094972883852e-05, "loss": 0.44913965463638306, "step": 2963 }, { "epoch": 2.3619693043651586, "grad_norm": 0.501028094562462, "learning_rate": 1.190562894325809e-05, "loss": 0.40733224153518677, "step": 2964 }, { "epoch": 2.3627665935818216, "grad_norm": 0.515453230865946, "learning_rate": 1.1877192314862696e-05, "loss": 0.3343517780303955, "step": 2965 }, { "epoch": 2.363563882798485, "grad_norm": 0.5121271967488198, "learning_rate": 1.1848785109645039e-05, "loss": 0.34764131903648376, "step": 2966 }, { "epoch": 2.3643611720151485, "grad_norm": 0.48291532323236736, "learning_rate": 1.1820407349529811e-05, "loss": 0.3697676360607147, "step": 2967 }, { "epoch": 2.365158461231812, "grad_norm": 0.5520196349250898, "learning_rate": 1.179205905641898e-05, "loss": 0.3831638693809509, "step": 2968 }, { "epoch": 2.3659557504484754, "grad_norm": 0.47421813401996754, "learning_rate": 1.1763740252191724e-05, "loss": 0.3300672173500061, "step": 2969 }, { "epoch": 2.3667530396651384, "grad_norm": 0.5410183083391789, "learning_rate": 1.1735450958704502e-05, "loss": 0.4149124026298523, "step": 2970 }, { "epoch": 2.367550328881802, "grad_norm": 0.5488076713074448, "learning_rate": 1.1707191197791012e-05, "loss": 0.37884625792503357, "step": 2971 }, { "epoch": 2.3683476180984653, "grad_norm": 0.6033705053609744, "learning_rate": 1.16789609912621e-05, "loss": 0.45495396852493286, "step": 2972 }, { "epoch": 2.3691449073151287, "grad_norm": 0.5390168961862707, "learning_rate": 1.1650760360905849e-05, "loss": 0.38069280982017517, "step": 2973 }, { "epoch": 2.3699421965317917, "grad_norm": 0.4569865648741897, "learning_rate": 1.1622589328487504e-05, "loss": 0.26459139585494995, "step": 2974 }, { "epoch": 2.370739485748455, "grad_norm": 0.5735225008146518, "learning_rate": 1.1594447915749473e-05, "loss": 0.3942721486091614, "step": 2975 }, { "epoch": 2.3715367749651186, "grad_norm": 0.5764359875322304, "learning_rate": 1.1566336144411265e-05, "loss": 0.4314473867416382, "step": 2976 }, { "epoch": 2.372334064181782, "grad_norm": 0.6417030100617336, "learning_rate": 1.1538254036169566e-05, "loss": 0.46725234389305115, "step": 2977 }, { "epoch": 2.3731313533984455, "grad_norm": 0.6712753815058439, "learning_rate": 1.1510201612698152e-05, "loss": 0.46410244703292847, "step": 2978 }, { "epoch": 2.3739286426151085, "grad_norm": 0.5668748668935215, "learning_rate": 1.1482178895647844e-05, "loss": 0.38902294635772705, "step": 2979 }, { "epoch": 2.374725931831772, "grad_norm": 0.5107939130714284, "learning_rate": 1.1454185906646598e-05, "loss": 0.29805633425712585, "step": 2980 }, { "epoch": 2.3755232210484354, "grad_norm": 0.4796949658322925, "learning_rate": 1.1426222667299403e-05, "loss": 0.32789599895477295, "step": 2981 }, { "epoch": 2.376320510265099, "grad_norm": 0.5328633783156256, "learning_rate": 1.1398289199188262e-05, "loss": 0.3713456690311432, "step": 2982 }, { "epoch": 2.377117799481762, "grad_norm": 0.6388034576280683, "learning_rate": 1.1370385523872228e-05, "loss": 0.4553091824054718, "step": 2983 }, { "epoch": 2.3779150886984253, "grad_norm": 0.5253537166532729, "learning_rate": 1.1342511662887378e-05, "loss": 0.3350941240787506, "step": 2984 }, { "epoch": 2.3787123779150887, "grad_norm": 0.6471821421011473, "learning_rate": 1.1314667637746717e-05, "loss": 0.4054361581802368, "step": 2985 }, { "epoch": 2.379509667131752, "grad_norm": 0.530767691952913, "learning_rate": 1.1286853469940278e-05, "loss": 0.33032459020614624, "step": 2986 }, { "epoch": 2.3803069563484156, "grad_norm": 0.5945912861299009, "learning_rate": 1.125906918093505e-05, "loss": 0.41627037525177, "step": 2987 }, { "epoch": 2.3811042455650786, "grad_norm": 0.592404869759946, "learning_rate": 1.1231314792174908e-05, "loss": 0.37148189544677734, "step": 2988 }, { "epoch": 2.381901534781742, "grad_norm": 0.5796819759729702, "learning_rate": 1.1203590325080705e-05, "loss": 0.46716293692588806, "step": 2989 }, { "epoch": 2.3826988239984055, "grad_norm": 0.5984501135993744, "learning_rate": 1.1175895801050185e-05, "loss": 0.4035344123840332, "step": 2990 }, { "epoch": 2.383496113215069, "grad_norm": 0.5226980076821796, "learning_rate": 1.114823124145798e-05, "loss": 0.34139835834503174, "step": 2991 }, { "epoch": 2.384293402431732, "grad_norm": 0.5480029319616901, "learning_rate": 1.1120596667655564e-05, "loss": 0.3540887236595154, "step": 2992 }, { "epoch": 2.3850906916483954, "grad_norm": 0.6315126783240017, "learning_rate": 1.1092992100971322e-05, "loss": 0.4436336159706116, "step": 2993 }, { "epoch": 2.385887980865059, "grad_norm": 0.5030936327052669, "learning_rate": 1.1065417562710462e-05, "loss": 0.3255704939365387, "step": 2994 }, { "epoch": 2.3866852700817223, "grad_norm": 0.5389550400990514, "learning_rate": 1.1037873074154981e-05, "loss": 0.43866243958473206, "step": 2995 }, { "epoch": 2.3874825592983857, "grad_norm": 0.6027643368782467, "learning_rate": 1.1010358656563736e-05, "loss": 0.3681175112724304, "step": 2996 }, { "epoch": 2.3882798485150487, "grad_norm": 0.6865042349476449, "learning_rate": 1.0982874331172349e-05, "loss": 0.41908419132232666, "step": 2997 }, { "epoch": 2.389077137731712, "grad_norm": 0.5285459180419836, "learning_rate": 1.0955420119193199e-05, "loss": 0.3338533043861389, "step": 2998 }, { "epoch": 2.3898744269483756, "grad_norm": 0.6114952202603268, "learning_rate": 1.0927996041815459e-05, "loss": 0.36752378940582275, "step": 2999 }, { "epoch": 2.390671716165039, "grad_norm": 0.6402322188143689, "learning_rate": 1.0900602120205028e-05, "loss": 0.4086535573005676, "step": 3000 }, { "epoch": 2.391469005381702, "grad_norm": 0.48784581925104276, "learning_rate": 1.0873238375504536e-05, "loss": 0.3536694049835205, "step": 3001 }, { "epoch": 2.3922662945983655, "grad_norm": 0.4946459063558401, "learning_rate": 1.0845904828833297e-05, "loss": 0.31287336349487305, "step": 3002 }, { "epoch": 2.393063583815029, "grad_norm": 0.524962752150012, "learning_rate": 1.0818601501287357e-05, "loss": 0.38032567501068115, "step": 3003 }, { "epoch": 2.3938608730316924, "grad_norm": 0.626743301408106, "learning_rate": 1.0791328413939422e-05, "loss": 0.44376513361930847, "step": 3004 }, { "epoch": 2.394658162248356, "grad_norm": 0.6027007707093804, "learning_rate": 1.076408558783884e-05, "loss": 0.3851691484451294, "step": 3005 }, { "epoch": 2.395455451465019, "grad_norm": 0.5787029974208682, "learning_rate": 1.073687304401163e-05, "loss": 0.40882188081741333, "step": 3006 }, { "epoch": 2.3962527406816823, "grad_norm": 0.6351549868160286, "learning_rate": 1.0709690803460442e-05, "loss": 0.39337313175201416, "step": 3007 }, { "epoch": 2.3970500298983457, "grad_norm": 0.6216746967586447, "learning_rate": 1.0682538887164506e-05, "loss": 0.3867277503013611, "step": 3008 }, { "epoch": 2.397847319115009, "grad_norm": 0.5897200758625453, "learning_rate": 1.065541731607967e-05, "loss": 0.41856956481933594, "step": 3009 }, { "epoch": 2.398644608331672, "grad_norm": 0.5261097744996297, "learning_rate": 1.0628326111138375e-05, "loss": 0.3971220850944519, "step": 3010 }, { "epoch": 2.3994418975483356, "grad_norm": 0.5797501200545798, "learning_rate": 1.0601265293249585e-05, "loss": 0.37441498041152954, "step": 3011 }, { "epoch": 2.400239186764999, "grad_norm": 0.5901589323184017, "learning_rate": 1.057423488329885e-05, "loss": 0.41874784231185913, "step": 3012 }, { "epoch": 2.4010364759816625, "grad_norm": 0.5114845049838462, "learning_rate": 1.054723490214825e-05, "loss": 0.34576836228370667, "step": 3013 }, { "epoch": 2.401833765198326, "grad_norm": 0.5279639489409597, "learning_rate": 1.052026537063634e-05, "loss": 0.293587863445282, "step": 3014 }, { "epoch": 2.402631054414989, "grad_norm": 0.6421803524759242, "learning_rate": 1.0493326309578205e-05, "loss": 0.4365427792072296, "step": 3015 }, { "epoch": 2.4034283436316524, "grad_norm": 0.5447696510184843, "learning_rate": 1.0466417739765439e-05, "loss": 0.3419388234615326, "step": 3016 }, { "epoch": 2.404225632848316, "grad_norm": 0.5959346516133295, "learning_rate": 1.0439539681966044e-05, "loss": 0.4001530706882477, "step": 3017 }, { "epoch": 2.4050229220649793, "grad_norm": 0.5323579970429727, "learning_rate": 1.0412692156924498e-05, "loss": 0.3831475079059601, "step": 3018 }, { "epoch": 2.4058202112816423, "grad_norm": 0.610237260051266, "learning_rate": 1.038587518536172e-05, "loss": 0.5030947327613831, "step": 3019 }, { "epoch": 2.4066175004983057, "grad_norm": 0.573872135956977, "learning_rate": 1.0359088787975068e-05, "loss": 0.4350951313972473, "step": 3020 }, { "epoch": 2.407414789714969, "grad_norm": 0.5448422215182507, "learning_rate": 1.0332332985438248e-05, "loss": 0.3821776807308197, "step": 3021 }, { "epoch": 2.4082120789316326, "grad_norm": 0.49799847669715624, "learning_rate": 1.03056077984014e-05, "loss": 0.376516193151474, "step": 3022 }, { "epoch": 2.409009368148296, "grad_norm": 0.6058600869693986, "learning_rate": 1.027891324749104e-05, "loss": 0.4070271849632263, "step": 3023 }, { "epoch": 2.409806657364959, "grad_norm": 0.5918738989977509, "learning_rate": 1.0252249353309979e-05, "loss": 0.41364043951034546, "step": 3024 }, { "epoch": 2.4106039465816225, "grad_norm": 0.48072313248672555, "learning_rate": 1.0225616136437422e-05, "loss": 0.3579602837562561, "step": 3025 }, { "epoch": 2.411401235798286, "grad_norm": 0.5603244843712487, "learning_rate": 1.0199013617428904e-05, "loss": 0.43681585788726807, "step": 3026 }, { "epoch": 2.4121985250149494, "grad_norm": 0.5418753166161324, "learning_rate": 1.0172441816816214e-05, "loss": 0.35702961683273315, "step": 3027 }, { "epoch": 2.4129958142316124, "grad_norm": 0.6897195905942677, "learning_rate": 1.014590075510748e-05, "loss": 0.38392072916030884, "step": 3028 }, { "epoch": 2.413793103448276, "grad_norm": 0.5810976793367155, "learning_rate": 1.0119390452787086e-05, "loss": 0.4496387243270874, "step": 3029 }, { "epoch": 2.4145903926649392, "grad_norm": 0.5737237945777802, "learning_rate": 1.0092910930315697e-05, "loss": 0.34426742792129517, "step": 3030 }, { "epoch": 2.4153876818816027, "grad_norm": 0.5914503946602162, "learning_rate": 1.0066462208130173e-05, "loss": 0.38094091415405273, "step": 3031 }, { "epoch": 2.416184971098266, "grad_norm": 0.5349940527927092, "learning_rate": 1.0040044306643648e-05, "loss": 0.32520705461502075, "step": 3032 }, { "epoch": 2.416982260314929, "grad_norm": 0.6392777305239044, "learning_rate": 1.0013657246245472e-05, "loss": 0.4568195343017578, "step": 3033 }, { "epoch": 2.4177795495315926, "grad_norm": 0.49035590882299196, "learning_rate": 9.98730104730115e-06, "loss": 0.3256309926509857, "step": 3034 }, { "epoch": 2.418576838748256, "grad_norm": 0.6242100716360187, "learning_rate": 9.9609757301524e-06, "loss": 0.44562599062919617, "step": 3035 }, { "epoch": 2.4193741279649195, "grad_norm": 0.6590792002558906, "learning_rate": 9.934681315117117e-06, "loss": 0.4218868017196655, "step": 3036 }, { "epoch": 2.4201714171815825, "grad_norm": 0.4925420835554254, "learning_rate": 9.908417822489291e-06, "loss": 0.3679753243923187, "step": 3037 }, { "epoch": 2.420968706398246, "grad_norm": 0.6341895594685409, "learning_rate": 9.882185272539108e-06, "loss": 0.4594114422798157, "step": 3038 }, { "epoch": 2.4217659956149094, "grad_norm": 0.5229117627663151, "learning_rate": 9.855983685512848e-06, "loss": 0.34453925490379333, "step": 3039 }, { "epoch": 2.422563284831573, "grad_norm": 0.5048092473315707, "learning_rate": 9.829813081632872e-06, "loss": 0.32801640033721924, "step": 3040 }, { "epoch": 2.4233605740482362, "grad_norm": 0.48749682948529616, "learning_rate": 9.803673481097663e-06, "loss": 0.38293784856796265, "step": 3041 }, { "epoch": 2.4241578632648992, "grad_norm": 0.557057425625147, "learning_rate": 9.77756490408177e-06, "loss": 0.39376360177993774, "step": 3042 }, { "epoch": 2.4249551524815627, "grad_norm": 0.6102335014305459, "learning_rate": 9.75148737073578e-06, "loss": 0.40936338901519775, "step": 3043 }, { "epoch": 2.425752441698226, "grad_norm": 0.5852824456147703, "learning_rate": 9.725440901186316e-06, "loss": 0.39862576127052307, "step": 3044 }, { "epoch": 2.4265497309148896, "grad_norm": 0.6128360595337078, "learning_rate": 9.699425515536048e-06, "loss": 0.38061287999153137, "step": 3045 }, { "epoch": 2.4273470201315526, "grad_norm": 0.5289407321142185, "learning_rate": 9.673441233863662e-06, "loss": 0.4060685634613037, "step": 3046 }, { "epoch": 2.428144309348216, "grad_norm": 0.5764773627237675, "learning_rate": 9.647488076223805e-06, "loss": 0.39636898040771484, "step": 3047 }, { "epoch": 2.4289415985648795, "grad_norm": 0.6743824569588063, "learning_rate": 9.621566062647119e-06, "loss": 0.4582592844963074, "step": 3048 }, { "epoch": 2.429738887781543, "grad_norm": 0.5315235008805514, "learning_rate": 9.595675213140237e-06, "loss": 0.3445022404193878, "step": 3049 }, { "epoch": 2.4305361769982063, "grad_norm": 0.5950529278527275, "learning_rate": 9.569815547685678e-06, "loss": 0.4206591248512268, "step": 3050 }, { "epoch": 2.4313334662148693, "grad_norm": 0.6217694760978789, "learning_rate": 9.543987086241952e-06, "loss": 0.35537174344062805, "step": 3051 }, { "epoch": 2.432130755431533, "grad_norm": 0.6346534947685432, "learning_rate": 9.518189848743464e-06, "loss": 0.3548535704612732, "step": 3052 }, { "epoch": 2.4329280446481962, "grad_norm": 0.560499422275515, "learning_rate": 9.492423855100491e-06, "loss": 0.36059242486953735, "step": 3053 }, { "epoch": 2.4337253338648592, "grad_norm": 0.5613329128450476, "learning_rate": 9.466689125199246e-06, "loss": 0.38900694251060486, "step": 3054 }, { "epoch": 2.4345226230815227, "grad_norm": 0.5637473883027203, "learning_rate": 9.440985678901787e-06, "loss": 0.40991920232772827, "step": 3055 }, { "epoch": 2.435319912298186, "grad_norm": 0.5135715535581881, "learning_rate": 9.415313536046039e-06, "loss": 0.4011596143245697, "step": 3056 }, { "epoch": 2.4361172015148496, "grad_norm": 0.6009068614611034, "learning_rate": 9.389672716445742e-06, "loss": 0.3867872357368469, "step": 3057 }, { "epoch": 2.436914490731513, "grad_norm": 0.4981388985687127, "learning_rate": 9.364063239890476e-06, "loss": 0.30235207080841064, "step": 3058 }, { "epoch": 2.4377117799481764, "grad_norm": 0.5715084083190242, "learning_rate": 9.338485126145652e-06, "loss": 0.39095425605773926, "step": 3059 }, { "epoch": 2.4385090691648394, "grad_norm": 0.5579957053771735, "learning_rate": 9.312938394952431e-06, "loss": 0.4324857294559479, "step": 3060 }, { "epoch": 2.439306358381503, "grad_norm": 0.5171635275686787, "learning_rate": 9.28742306602779e-06, "loss": 0.3546736240386963, "step": 3061 }, { "epoch": 2.4401036475981663, "grad_norm": 0.5273171756959664, "learning_rate": 9.261939159064465e-06, "loss": 0.328052818775177, "step": 3062 }, { "epoch": 2.4409009368148293, "grad_norm": 0.5834932664349325, "learning_rate": 9.2364866937309e-06, "loss": 0.4233499765396118, "step": 3063 }, { "epoch": 2.441698226031493, "grad_norm": 0.553890865133612, "learning_rate": 9.211065689671334e-06, "loss": 0.32251298427581787, "step": 3064 }, { "epoch": 2.4424955152481562, "grad_norm": 0.6826498425676606, "learning_rate": 9.185676166505697e-06, "loss": 0.48125091195106506, "step": 3065 }, { "epoch": 2.4432928044648197, "grad_norm": 0.6751815263941593, "learning_rate": 9.160318143829588e-06, "loss": 0.48469382524490356, "step": 3066 }, { "epoch": 2.444090093681483, "grad_norm": 0.49658489875893164, "learning_rate": 9.134991641214353e-06, "loss": 0.33828386664390564, "step": 3067 }, { "epoch": 2.444887382898146, "grad_norm": 0.4901526923638096, "learning_rate": 9.109696678206986e-06, "loss": 0.37218472361564636, "step": 3068 }, { "epoch": 2.4456846721148096, "grad_norm": 0.548531422118351, "learning_rate": 9.084433274330112e-06, "loss": 0.37975817918777466, "step": 3069 }, { "epoch": 2.446481961331473, "grad_norm": 0.6376402538288319, "learning_rate": 9.059201449082045e-06, "loss": 0.4655555486679077, "step": 3070 }, { "epoch": 2.4472792505481364, "grad_norm": 0.46993842180285195, "learning_rate": 9.03400122193669e-06, "loss": 0.29572585225105286, "step": 3071 }, { "epoch": 2.4480765397647994, "grad_norm": 0.5371870676337251, "learning_rate": 9.008832612343592e-06, "loss": 0.37012332677841187, "step": 3072 }, { "epoch": 2.448873828981463, "grad_norm": 0.5934391072186764, "learning_rate": 8.983695639727857e-06, "loss": 0.4286786913871765, "step": 3073 }, { "epoch": 2.4496711181981263, "grad_norm": 0.5559424082388893, "learning_rate": 8.958590323490218e-06, "loss": 0.41284266114234924, "step": 3074 }, { "epoch": 2.4504684074147898, "grad_norm": 0.5583859080533315, "learning_rate": 8.933516683006959e-06, "loss": 0.3645273447036743, "step": 3075 }, { "epoch": 2.451265696631453, "grad_norm": 0.5340999575983516, "learning_rate": 8.908474737629896e-06, "loss": 0.3570456802845001, "step": 3076 }, { "epoch": 2.452062985848116, "grad_norm": 0.5266834570073811, "learning_rate": 8.8834645066864e-06, "loss": 0.3668473958969116, "step": 3077 }, { "epoch": 2.4528602750647797, "grad_norm": 0.5912501685849607, "learning_rate": 8.858486009479383e-06, "loss": 0.4611150622367859, "step": 3078 }, { "epoch": 2.453657564281443, "grad_norm": 0.612232780289165, "learning_rate": 8.833539265287217e-06, "loss": 0.432400107383728, "step": 3079 }, { "epoch": 2.4544548534981065, "grad_norm": 0.6009489266713003, "learning_rate": 8.808624293363815e-06, "loss": 0.41770777106285095, "step": 3080 }, { "epoch": 2.4552521427147695, "grad_norm": 0.5824968914260534, "learning_rate": 8.783741112938554e-06, "loss": 0.44138696789741516, "step": 3081 }, { "epoch": 2.456049431931433, "grad_norm": 0.5316066345134923, "learning_rate": 8.758889743216247e-06, "loss": 0.3280312418937683, "step": 3082 }, { "epoch": 2.4568467211480964, "grad_norm": 0.5649866701108014, "learning_rate": 8.734070203377198e-06, "loss": 0.4562263786792755, "step": 3083 }, { "epoch": 2.45764401036476, "grad_norm": 0.5271608219554035, "learning_rate": 8.70928251257711e-06, "loss": 0.3604361414909363, "step": 3084 }, { "epoch": 2.4584412995814233, "grad_norm": 0.6133356872272083, "learning_rate": 8.684526689947142e-06, "loss": 0.3983261287212372, "step": 3085 }, { "epoch": 2.4592385887980863, "grad_norm": 0.5482375104097464, "learning_rate": 8.659802754593804e-06, "loss": 0.3585200607776642, "step": 3086 }, { "epoch": 2.4600358780147498, "grad_norm": 0.4479196323192748, "learning_rate": 8.635110725599043e-06, "loss": 0.2821616232395172, "step": 3087 }, { "epoch": 2.460833167231413, "grad_norm": 0.564747867594857, "learning_rate": 8.610450622020178e-06, "loss": 0.36202213168144226, "step": 3088 }, { "epoch": 2.4616304564480767, "grad_norm": 0.655526170392364, "learning_rate": 8.585822462889836e-06, "loss": 0.44524115324020386, "step": 3089 }, { "epoch": 2.4624277456647397, "grad_norm": 0.6360535590555193, "learning_rate": 8.561226267216055e-06, "loss": 0.44493815302848816, "step": 3090 }, { "epoch": 2.463225034881403, "grad_norm": 0.6126179270012106, "learning_rate": 8.53666205398218e-06, "loss": 0.3956524133682251, "step": 3091 }, { "epoch": 2.4640223240980665, "grad_norm": 0.5413303621468675, "learning_rate": 8.512129842146838e-06, "loss": 0.35723960399627686, "step": 3092 }, { "epoch": 2.46481961331473, "grad_norm": 0.4830815631232667, "learning_rate": 8.487629650644014e-06, "loss": 0.36363455653190613, "step": 3093 }, { "epoch": 2.4656169025313934, "grad_norm": 0.5687729893192417, "learning_rate": 8.463161498382948e-06, "loss": 0.37670308351516724, "step": 3094 }, { "epoch": 2.4664141917480564, "grad_norm": 0.6419555508678333, "learning_rate": 8.438725404248144e-06, "loss": 0.43069055676460266, "step": 3095 }, { "epoch": 2.46721148096472, "grad_norm": 0.5402104500856526, "learning_rate": 8.414321387099389e-06, "loss": 0.35280078649520874, "step": 3096 }, { "epoch": 2.4680087701813833, "grad_norm": 0.5265633417746661, "learning_rate": 8.389949465771691e-06, "loss": 0.37253284454345703, "step": 3097 }, { "epoch": 2.4688060593980468, "grad_norm": 0.5390830815995389, "learning_rate": 8.365609659075308e-06, "loss": 0.3347204029560089, "step": 3098 }, { "epoch": 2.4696033486147098, "grad_norm": 0.5948030143529253, "learning_rate": 8.341301985795674e-06, "loss": 0.41241008043289185, "step": 3099 }, { "epoch": 2.470400637831373, "grad_norm": 0.5575848436199362, "learning_rate": 8.317026464693472e-06, "loss": 0.3834206163883209, "step": 3100 }, { "epoch": 2.4711979270480366, "grad_norm": 0.5917973812691569, "learning_rate": 8.292783114504548e-06, "loss": 0.4162955582141876, "step": 3101 }, { "epoch": 2.4719952162647, "grad_norm": 0.6160259143580633, "learning_rate": 8.268571953939896e-06, "loss": 0.4204873740673065, "step": 3102 }, { "epoch": 2.4727925054813635, "grad_norm": 0.6156133866065258, "learning_rate": 8.244393001685697e-06, "loss": 0.4164203107357025, "step": 3103 }, { "epoch": 2.4735897946980265, "grad_norm": 0.6557249654406704, "learning_rate": 8.220246276403277e-06, "loss": 0.4873819053173065, "step": 3104 }, { "epoch": 2.47438708391469, "grad_norm": 0.5949938125929913, "learning_rate": 8.196131796729045e-06, "loss": 0.3675978481769562, "step": 3105 }, { "epoch": 2.4751843731313534, "grad_norm": 0.651363382859668, "learning_rate": 8.17204958127457e-06, "loss": 0.4179675281047821, "step": 3106 }, { "epoch": 2.475981662348017, "grad_norm": 0.5774365776755822, "learning_rate": 8.147999648626519e-06, "loss": 0.41289183497428894, "step": 3107 }, { "epoch": 2.47677895156468, "grad_norm": 0.6157620211471619, "learning_rate": 8.123982017346599e-06, "loss": 0.4323911666870117, "step": 3108 }, { "epoch": 2.4775762407813433, "grad_norm": 0.6320777886683898, "learning_rate": 8.099996705971619e-06, "loss": 0.427425742149353, "step": 3109 }, { "epoch": 2.4783735299980068, "grad_norm": 0.6206946396963852, "learning_rate": 8.07604373301345e-06, "loss": 0.38477781414985657, "step": 3110 }, { "epoch": 2.47917081921467, "grad_norm": 0.6143516215430688, "learning_rate": 8.052123116959004e-06, "loss": 0.39360523223876953, "step": 3111 }, { "epoch": 2.4799681084313336, "grad_norm": 0.5621207003571481, "learning_rate": 8.028234876270175e-06, "loss": 0.3551103472709656, "step": 3112 }, { "epoch": 2.4807653976479966, "grad_norm": 0.5893048420283103, "learning_rate": 8.004379029383929e-06, "loss": 0.40097278356552124, "step": 3113 }, { "epoch": 2.48156268686466, "grad_norm": 0.5795914589144245, "learning_rate": 7.980555594712208e-06, "loss": 0.30808860063552856, "step": 3114 }, { "epoch": 2.4823599760813235, "grad_norm": 0.6048271291818287, "learning_rate": 7.956764590641918e-06, "loss": 0.42802196741104126, "step": 3115 }, { "epoch": 2.483157265297987, "grad_norm": 0.5489438298070868, "learning_rate": 7.933006035534956e-06, "loss": 0.41561368107795715, "step": 3116 }, { "epoch": 2.48395455451465, "grad_norm": 0.6549893029416408, "learning_rate": 7.909279947728198e-06, "loss": 0.48019495606422424, "step": 3117 }, { "epoch": 2.4847518437313134, "grad_norm": 0.5571569673167729, "learning_rate": 7.885586345533397e-06, "loss": 0.3578467071056366, "step": 3118 }, { "epoch": 2.485549132947977, "grad_norm": 0.5955084757758554, "learning_rate": 7.86192524723729e-06, "loss": 0.37844061851501465, "step": 3119 }, { "epoch": 2.4863464221646403, "grad_norm": 0.668791905983678, "learning_rate": 7.838296671101519e-06, "loss": 0.48848938941955566, "step": 3120 }, { "epoch": 2.4871437113813037, "grad_norm": 0.5822306805799973, "learning_rate": 7.814700635362598e-06, "loss": 0.4066412150859833, "step": 3121 }, { "epoch": 2.4879410005979667, "grad_norm": 0.6472441911220514, "learning_rate": 7.791137158231965e-06, "loss": 0.38161972165107727, "step": 3122 }, { "epoch": 2.48873828981463, "grad_norm": 0.5815535759872685, "learning_rate": 7.767606257895877e-06, "loss": 0.3867443799972534, "step": 3123 }, { "epoch": 2.4895355790312936, "grad_norm": 0.5363393588167472, "learning_rate": 7.744107952515517e-06, "loss": 0.387591689825058, "step": 3124 }, { "epoch": 2.490332868247957, "grad_norm": 0.4844199803966091, "learning_rate": 7.72064226022684e-06, "loss": 0.3521724343299866, "step": 3125 }, { "epoch": 2.49113015746462, "grad_norm": 0.574121003783443, "learning_rate": 7.697209199140676e-06, "loss": 0.46519768238067627, "step": 3126 }, { "epoch": 2.4919274466812835, "grad_norm": 0.5728929149059092, "learning_rate": 7.673808787342679e-06, "loss": 0.4945107400417328, "step": 3127 }, { "epoch": 2.492724735897947, "grad_norm": 0.5100815364067196, "learning_rate": 7.650441042893248e-06, "loss": 0.3312646448612213, "step": 3128 }, { "epoch": 2.4935220251146104, "grad_norm": 0.5275770795941236, "learning_rate": 7.627105983827626e-06, "loss": 0.34310945868492126, "step": 3129 }, { "epoch": 2.494319314331274, "grad_norm": 0.6759424412296315, "learning_rate": 7.60380362815582e-06, "loss": 0.4336388409137726, "step": 3130 }, { "epoch": 2.495116603547937, "grad_norm": 0.6494197492942207, "learning_rate": 7.5805339938625545e-06, "loss": 0.37157800793647766, "step": 3131 }, { "epoch": 2.4959138927646003, "grad_norm": 0.5270401344197639, "learning_rate": 7.557297098907346e-06, "loss": 0.35886672139167786, "step": 3132 }, { "epoch": 2.4967111819812637, "grad_norm": 0.5933957484525627, "learning_rate": 7.534092961224442e-06, "loss": 0.3887011408805847, "step": 3133 }, { "epoch": 2.497508471197927, "grad_norm": 0.5938822465992545, "learning_rate": 7.510921598722764e-06, "loss": 0.3672727048397064, "step": 3134 }, { "epoch": 2.49830576041459, "grad_norm": 0.5224846746286286, "learning_rate": 7.487783029285978e-06, "loss": 0.33035922050476074, "step": 3135 }, { "epoch": 2.4991030496312536, "grad_norm": 0.6874200737559257, "learning_rate": 7.464677270772441e-06, "loss": 0.37078142166137695, "step": 3136 }, { "epoch": 2.499900338847917, "grad_norm": 0.5326930178917794, "learning_rate": 7.441604341015157e-06, "loss": 0.3463507890701294, "step": 3137 }, { "epoch": 2.5006976280645805, "grad_norm": 0.593674439535288, "learning_rate": 7.418564257821814e-06, "loss": 0.39522579312324524, "step": 3138 }, { "epoch": 2.501494917281244, "grad_norm": 0.864788186165685, "learning_rate": 7.395557038974749e-06, "loss": 0.3906126320362091, "step": 3139 }, { "epoch": 2.502292206497907, "grad_norm": 0.5110370568061529, "learning_rate": 7.37258270223094e-06, "loss": 0.33179202675819397, "step": 3140 }, { "epoch": 2.5030894957145704, "grad_norm": 0.6235619940605331, "learning_rate": 7.349641265321955e-06, "loss": 0.4006156325340271, "step": 3141 }, { "epoch": 2.503886784931234, "grad_norm": 0.45301479125926053, "learning_rate": 7.3267327459540015e-06, "loss": 0.3341095447540283, "step": 3142 }, { "epoch": 2.5046840741478973, "grad_norm": 0.5808755961913088, "learning_rate": 7.303857161807887e-06, "loss": 0.4009498953819275, "step": 3143 }, { "epoch": 2.5054813633645603, "grad_norm": 0.5766297209543877, "learning_rate": 7.28101453053896e-06, "loss": 0.39632564783096313, "step": 3144 }, { "epoch": 2.5062786525812237, "grad_norm": 0.5561669674036871, "learning_rate": 7.258204869777169e-06, "loss": 0.35861483216285706, "step": 3145 }, { "epoch": 2.507075941797887, "grad_norm": 0.6368971473791438, "learning_rate": 7.23542819712702e-06, "loss": 0.42921996116638184, "step": 3146 }, { "epoch": 2.5078732310145506, "grad_norm": 0.5736514392492706, "learning_rate": 7.212684530167524e-06, "loss": 0.3800908923149109, "step": 3147 }, { "epoch": 2.508670520231214, "grad_norm": 0.5614760924327277, "learning_rate": 7.1899738864522505e-06, "loss": 0.35107314586639404, "step": 3148 }, { "epoch": 2.509467809447877, "grad_norm": 0.5369986840537099, "learning_rate": 7.167296283509279e-06, "loss": 0.35098516941070557, "step": 3149 }, { "epoch": 2.5102650986645405, "grad_norm": 0.5226500671317931, "learning_rate": 7.144651738841174e-06, "loss": 0.3695114254951477, "step": 3150 }, { "epoch": 2.511062387881204, "grad_norm": 0.5446883222515059, "learning_rate": 7.1220402699249846e-06, "loss": 0.3058769106864929, "step": 3151 }, { "epoch": 2.5118596770978674, "grad_norm": 0.5215954293481961, "learning_rate": 7.099461894212239e-06, "loss": 0.3814972937107086, "step": 3152 }, { "epoch": 2.5126569663145304, "grad_norm": 0.5370483693182942, "learning_rate": 7.076916629128944e-06, "loss": 0.39538174867630005, "step": 3153 }, { "epoch": 2.513454255531194, "grad_norm": 0.5158230753701766, "learning_rate": 7.054404492075512e-06, "loss": 0.3633831739425659, "step": 3154 }, { "epoch": 2.5142515447478573, "grad_norm": 0.5793480987338396, "learning_rate": 7.031925500426806e-06, "loss": 0.42230910062789917, "step": 3155 }, { "epoch": 2.5150488339645207, "grad_norm": 0.5723477085373176, "learning_rate": 7.0094796715321375e-06, "loss": 0.37032514810562134, "step": 3156 }, { "epoch": 2.515846123181184, "grad_norm": 0.6485656424907339, "learning_rate": 6.987067022715155e-06, "loss": 0.4579048752784729, "step": 3157 }, { "epoch": 2.516643412397847, "grad_norm": 0.5236833589678472, "learning_rate": 6.964687571273959e-06, "loss": 0.3969863951206207, "step": 3158 }, { "epoch": 2.5174407016145106, "grad_norm": 0.5743048195960551, "learning_rate": 6.942341334481017e-06, "loss": 0.3862801492214203, "step": 3159 }, { "epoch": 2.518237990831174, "grad_norm": 0.5327927252137695, "learning_rate": 6.920028329583128e-06, "loss": 0.37638992071151733, "step": 3160 }, { "epoch": 2.5190352800478375, "grad_norm": 0.4583125032069009, "learning_rate": 6.897748573801471e-06, "loss": 0.3126966953277588, "step": 3161 }, { "epoch": 2.5198325692645005, "grad_norm": 0.6025333719234693, "learning_rate": 6.875502084331581e-06, "loss": 0.4614095687866211, "step": 3162 }, { "epoch": 2.520629858481164, "grad_norm": 0.5380845214114455, "learning_rate": 6.853288878343273e-06, "loss": 0.32757529616355896, "step": 3163 }, { "epoch": 2.5214271476978274, "grad_norm": 0.5463283459438673, "learning_rate": 6.8311089729806935e-06, "loss": 0.29830366373062134, "step": 3164 }, { "epoch": 2.522224436914491, "grad_norm": 0.6151365351639995, "learning_rate": 6.808962385362305e-06, "loss": 0.3959811329841614, "step": 3165 }, { "epoch": 2.5230217261311543, "grad_norm": 0.5085697837700855, "learning_rate": 6.786849132580842e-06, "loss": 0.3649958372116089, "step": 3166 }, { "epoch": 2.5238190153478173, "grad_norm": 0.5408581753770055, "learning_rate": 6.764769231703294e-06, "loss": 0.4377138316631317, "step": 3167 }, { "epoch": 2.5246163045644807, "grad_norm": 0.640612999210062, "learning_rate": 6.742722699770937e-06, "loss": 0.4121410548686981, "step": 3168 }, { "epoch": 2.525413593781144, "grad_norm": 0.49924210046137674, "learning_rate": 6.720709553799287e-06, "loss": 0.31409257650375366, "step": 3169 }, { "epoch": 2.5262108829978076, "grad_norm": 0.5376881202754634, "learning_rate": 6.698729810778065e-06, "loss": 0.389652281999588, "step": 3170 }, { "epoch": 2.5270081722144706, "grad_norm": 0.6146496548432254, "learning_rate": 6.676783487671251e-06, "loss": 0.3938755691051483, "step": 3171 }, { "epoch": 2.527805461431134, "grad_norm": 0.5136322575976735, "learning_rate": 6.654870601417024e-06, "loss": 0.43856847286224365, "step": 3172 }, { "epoch": 2.5286027506477975, "grad_norm": 0.6100736031978331, "learning_rate": 6.632991168927721e-06, "loss": 0.4216829836368561, "step": 3173 }, { "epoch": 2.529400039864461, "grad_norm": 0.5700122789081427, "learning_rate": 6.611145207089897e-06, "loss": 0.40337368845939636, "step": 3174 }, { "epoch": 2.5301973290811244, "grad_norm": 0.5753554048675101, "learning_rate": 6.589332732764275e-06, "loss": 0.4119567573070526, "step": 3175 }, { "epoch": 2.5309946182977874, "grad_norm": 0.4505368140956578, "learning_rate": 6.5675537627857054e-06, "loss": 0.2353774607181549, "step": 3176 }, { "epoch": 2.531791907514451, "grad_norm": 0.5528349168328219, "learning_rate": 6.545808313963187e-06, "loss": 0.3595430254936218, "step": 3177 }, { "epoch": 2.5325891967311143, "grad_norm": 0.6200400602092433, "learning_rate": 6.5240964030798605e-06, "loss": 0.3819778859615326, "step": 3178 }, { "epoch": 2.5333864859477777, "grad_norm": 0.6191510991302225, "learning_rate": 6.502418046892988e-06, "loss": 0.4070875346660614, "step": 3179 }, { "epoch": 2.5341837751644407, "grad_norm": 0.5440427562065581, "learning_rate": 6.4807732621339105e-06, "loss": 0.3277583122253418, "step": 3180 }, { "epoch": 2.534981064381104, "grad_norm": 0.4982557363976704, "learning_rate": 6.459162065508067e-06, "loss": 0.3701302111148834, "step": 3181 }, { "epoch": 2.5357783535977676, "grad_norm": 0.6172326630507717, "learning_rate": 6.4375844736949904e-06, "loss": 0.40911397337913513, "step": 3182 }, { "epoch": 2.536575642814431, "grad_norm": 0.5821548498520767, "learning_rate": 6.416040503348242e-06, "loss": 0.387284517288208, "step": 3183 }, { "epoch": 2.5373729320310945, "grad_norm": 0.5252463062657449, "learning_rate": 6.39453017109547e-06, "loss": 0.3451211750507355, "step": 3184 }, { "epoch": 2.5381702212477575, "grad_norm": 0.5578102313079499, "learning_rate": 6.373053493538355e-06, "loss": 0.4198966324329376, "step": 3185 }, { "epoch": 2.538967510464421, "grad_norm": 0.6356880138883196, "learning_rate": 6.351610487252568e-06, "loss": 0.47351378202438354, "step": 3186 }, { "epoch": 2.5397647996810844, "grad_norm": 0.5437896018451045, "learning_rate": 6.330201168787836e-06, "loss": 0.3861709237098694, "step": 3187 }, { "epoch": 2.540562088897748, "grad_norm": 0.5644994400409974, "learning_rate": 6.308825554667868e-06, "loss": 0.40111714601516724, "step": 3188 }, { "epoch": 2.541359378114411, "grad_norm": 0.5662634209220282, "learning_rate": 6.287483661390353e-06, "loss": 0.40941181778907776, "step": 3189 }, { "epoch": 2.5421566673310743, "grad_norm": 0.6426913927000336, "learning_rate": 6.266175505426958e-06, "loss": 0.4386158287525177, "step": 3190 }, { "epoch": 2.5429539565477377, "grad_norm": 0.5373134631985741, "learning_rate": 6.2449011032233384e-06, "loss": 0.36776450276374817, "step": 3191 }, { "epoch": 2.543751245764401, "grad_norm": 0.5858594471929294, "learning_rate": 6.223660471199039e-06, "loss": 0.40949109196662903, "step": 3192 }, { "epoch": 2.5445485349810646, "grad_norm": 0.5690505608344593, "learning_rate": 6.202453625747601e-06, "loss": 0.3771588206291199, "step": 3193 }, { "epoch": 2.5453458241977276, "grad_norm": 0.6378876036735014, "learning_rate": 6.181280583236459e-06, "loss": 0.4455053210258484, "step": 3194 }, { "epoch": 2.546143113414391, "grad_norm": 0.6122594294782593, "learning_rate": 6.160141360006977e-06, "loss": 0.37406569719314575, "step": 3195 }, { "epoch": 2.5469404026310545, "grad_norm": 0.5762713578478503, "learning_rate": 6.139035972374374e-06, "loss": 0.41650986671447754, "step": 3196 }, { "epoch": 2.547737691847718, "grad_norm": 0.6422904155818707, "learning_rate": 6.11796443662781e-06, "loss": 0.4426405727863312, "step": 3197 }, { "epoch": 2.548534981064381, "grad_norm": 0.5557038544139129, "learning_rate": 6.0969267690302976e-06, "loss": 0.4170720875263214, "step": 3198 }, { "epoch": 2.5493322702810444, "grad_norm": 0.5187649627925145, "learning_rate": 6.075922985818683e-06, "loss": 0.4024465084075928, "step": 3199 }, { "epoch": 2.550129559497708, "grad_norm": 0.6022788216765766, "learning_rate": 6.054953103203697e-06, "loss": 0.4003203213214874, "step": 3200 }, { "epoch": 2.5509268487143713, "grad_norm": 0.5383212554571271, "learning_rate": 6.034017137369896e-06, "loss": 0.30679669976234436, "step": 3201 }, { "epoch": 2.5517241379310347, "grad_norm": 0.6756069668907732, "learning_rate": 6.013115104475653e-06, "loss": 0.4570748805999756, "step": 3202 }, { "epoch": 2.5525214271476977, "grad_norm": 0.6279649684805797, "learning_rate": 5.992247020653141e-06, "loss": 0.38452956080436707, "step": 3203 }, { "epoch": 2.553318716364361, "grad_norm": 0.6505441612973988, "learning_rate": 5.971412902008356e-06, "loss": 0.42472487688064575, "step": 3204 }, { "epoch": 2.5541160055810246, "grad_norm": 0.6685041894625414, "learning_rate": 5.950612764621072e-06, "loss": 0.45850247144699097, "step": 3205 }, { "epoch": 2.5549132947976876, "grad_norm": 0.5782303807590142, "learning_rate": 5.929846624544821e-06, "loss": 0.4098803997039795, "step": 3206 }, { "epoch": 2.555710584014351, "grad_norm": 0.5377165863069461, "learning_rate": 5.909114497806911e-06, "loss": 0.2996128797531128, "step": 3207 }, { "epoch": 2.5565078732310145, "grad_norm": 0.5335654265192379, "learning_rate": 5.888416400408403e-06, "loss": 0.45460787415504456, "step": 3208 }, { "epoch": 2.557305162447678, "grad_norm": 0.5552816117467462, "learning_rate": 5.8677523483240734e-06, "loss": 0.4068324565887451, "step": 3209 }, { "epoch": 2.5581024516643414, "grad_norm": 0.5839339923967911, "learning_rate": 5.847122357502438e-06, "loss": 0.41578760743141174, "step": 3210 }, { "epoch": 2.558899740881005, "grad_norm": 0.63009061607353, "learning_rate": 5.8265264438657315e-06, "loss": 0.40214186906814575, "step": 3211 }, { "epoch": 2.559697030097668, "grad_norm": 0.5699692422528574, "learning_rate": 5.805964623309862e-06, "loss": 0.3685140609741211, "step": 3212 }, { "epoch": 2.5604943193143312, "grad_norm": 0.5606253172932394, "learning_rate": 5.7854369117044446e-06, "loss": 0.39264559745788574, "step": 3213 }, { "epoch": 2.5612916085309947, "grad_norm": 0.5153040580882539, "learning_rate": 5.76494332489278e-06, "loss": 0.35876306891441345, "step": 3214 }, { "epoch": 2.5620888977476577, "grad_norm": 0.5085581708610399, "learning_rate": 5.744483878691786e-06, "loss": 0.3058330714702606, "step": 3215 }, { "epoch": 2.562886186964321, "grad_norm": 0.5681881643640336, "learning_rate": 5.724058588892078e-06, "loss": 0.4092772901058197, "step": 3216 }, { "epoch": 2.5636834761809846, "grad_norm": 0.5279935189660231, "learning_rate": 5.703667471257901e-06, "loss": 0.34576913714408875, "step": 3217 }, { "epoch": 2.564480765397648, "grad_norm": 0.5062998528545207, "learning_rate": 5.683310541527098e-06, "loss": 0.32789912819862366, "step": 3218 }, { "epoch": 2.5652780546143115, "grad_norm": 0.595018616172831, "learning_rate": 5.6629878154111525e-06, "loss": 0.4290085434913635, "step": 3219 }, { "epoch": 2.566075343830975, "grad_norm": 0.6182283810776877, "learning_rate": 5.6426993085951396e-06, "loss": 0.40317249298095703, "step": 3220 }, { "epoch": 2.566872633047638, "grad_norm": 0.6001130965384238, "learning_rate": 5.622445036737739e-06, "loss": 0.4233166575431824, "step": 3221 }, { "epoch": 2.5676699222643014, "grad_norm": 0.5975184133527178, "learning_rate": 5.602225015471174e-06, "loss": 0.4484713673591614, "step": 3222 }, { "epoch": 2.568467211480965, "grad_norm": 0.6632145351789633, "learning_rate": 5.582039260401267e-06, "loss": 0.5243499279022217, "step": 3223 }, { "epoch": 2.569264500697628, "grad_norm": 0.5341652798274934, "learning_rate": 5.561887787107384e-06, "loss": 0.37947601079940796, "step": 3224 }, { "epoch": 2.5700617899142912, "grad_norm": 0.5892552765906486, "learning_rate": 5.541770611142416e-06, "loss": 0.37676766514778137, "step": 3225 }, { "epoch": 2.5708590791309547, "grad_norm": 0.5778664586787197, "learning_rate": 5.521687748032806e-06, "loss": 0.39179933071136475, "step": 3226 }, { "epoch": 2.571656368347618, "grad_norm": 0.5063306886683911, "learning_rate": 5.501639213278514e-06, "loss": 0.30148109793663025, "step": 3227 }, { "epoch": 2.5724536575642816, "grad_norm": 0.5984414089316794, "learning_rate": 5.481625022352987e-06, "loss": 0.3757040500640869, "step": 3228 }, { "epoch": 2.573250946780945, "grad_norm": 0.6143243474871299, "learning_rate": 5.461645190703158e-06, "loss": 0.4902521073818207, "step": 3229 }, { "epoch": 2.574048235997608, "grad_norm": 0.5597385557791515, "learning_rate": 5.441699733749478e-06, "loss": 0.42827874422073364, "step": 3230 }, { "epoch": 2.5748455252142715, "grad_norm": 0.6784151759885616, "learning_rate": 5.421788666885852e-06, "loss": 0.5321083068847656, "step": 3231 }, { "epoch": 2.575642814430935, "grad_norm": 0.5843959926364901, "learning_rate": 5.401912005479626e-06, "loss": 0.42624759674072266, "step": 3232 }, { "epoch": 2.576440103647598, "grad_norm": 0.5777915176924178, "learning_rate": 5.382069764871606e-06, "loss": 0.432757169008255, "step": 3233 }, { "epoch": 2.5772373928642613, "grad_norm": 0.5436736207431287, "learning_rate": 5.3622619603760415e-06, "loss": 0.38021886348724365, "step": 3234 }, { "epoch": 2.578034682080925, "grad_norm": 0.6311712257937765, "learning_rate": 5.342488607280577e-06, "loss": 0.41046077013015747, "step": 3235 }, { "epoch": 2.5788319712975882, "grad_norm": 0.6320787594929117, "learning_rate": 5.3227497208462925e-06, "loss": 0.43985795974731445, "step": 3236 }, { "epoch": 2.5796292605142517, "grad_norm": 0.6142673662270045, "learning_rate": 5.303045316307664e-06, "loss": 0.39781835675239563, "step": 3237 }, { "epoch": 2.580426549730915, "grad_norm": 0.6483361834456666, "learning_rate": 5.283375408872537e-06, "loss": 0.5238796472549438, "step": 3238 }, { "epoch": 2.581223838947578, "grad_norm": 0.6163747007482252, "learning_rate": 5.263740013722146e-06, "loss": 0.46693089604377747, "step": 3239 }, { "epoch": 2.5820211281642416, "grad_norm": 0.5023329582825798, "learning_rate": 5.244139146011101e-06, "loss": 0.2723483145236969, "step": 3240 }, { "epoch": 2.582818417380905, "grad_norm": 0.4909209889301303, "learning_rate": 5.224572820867324e-06, "loss": 0.3408634066581726, "step": 3241 }, { "epoch": 2.583615706597568, "grad_norm": 0.5100714406456988, "learning_rate": 5.205041053392118e-06, "loss": 0.323640912771225, "step": 3242 }, { "epoch": 2.5844129958142315, "grad_norm": 0.5238422254040902, "learning_rate": 5.185543858660102e-06, "loss": 0.359829306602478, "step": 3243 }, { "epoch": 2.585210285030895, "grad_norm": 0.5883803879279365, "learning_rate": 5.1660812517191905e-06, "loss": 0.3635004460811615, "step": 3244 }, { "epoch": 2.5860075742475583, "grad_norm": 0.6450629200253724, "learning_rate": 5.146653247590638e-06, "loss": 0.4144681692123413, "step": 3245 }, { "epoch": 2.586804863464222, "grad_norm": 0.5030386707769006, "learning_rate": 5.127259861268974e-06, "loss": 0.3152557611465454, "step": 3246 }, { "epoch": 2.5876021526808852, "grad_norm": 0.6257468044666978, "learning_rate": 5.107901107721996e-06, "loss": 0.46707189083099365, "step": 3247 }, { "epoch": 2.5883994418975482, "grad_norm": 0.5698138215480201, "learning_rate": 5.088577001890799e-06, "loss": 0.3996257781982422, "step": 3248 }, { "epoch": 2.5891967311142117, "grad_norm": 0.5561039422803669, "learning_rate": 5.069287558689723e-06, "loss": 0.4037163257598877, "step": 3249 }, { "epoch": 2.589994020330875, "grad_norm": 0.592317645125164, "learning_rate": 5.05003279300637e-06, "loss": 0.4273968040943146, "step": 3250 }, { "epoch": 2.590791309547538, "grad_norm": 0.5898505119056147, "learning_rate": 5.030812719701544e-06, "loss": 0.3504106402397156, "step": 3251 }, { "epoch": 2.5915885987642016, "grad_norm": 0.6075964134228174, "learning_rate": 5.011627353609305e-06, "loss": 0.4138500988483429, "step": 3252 }, { "epoch": 2.592385887980865, "grad_norm": 0.5280176516322641, "learning_rate": 4.992476709536925e-06, "loss": 0.40450844168663025, "step": 3253 }, { "epoch": 2.5931831771975284, "grad_norm": 0.505297992989471, "learning_rate": 4.973360802264859e-06, "loss": 0.33351820707321167, "step": 3254 }, { "epoch": 2.593980466414192, "grad_norm": 0.5705029875760708, "learning_rate": 4.954279646546756e-06, "loss": 0.4282197058200836, "step": 3255 }, { "epoch": 2.5947777556308553, "grad_norm": 0.5877137219625077, "learning_rate": 4.935233257109456e-06, "loss": 0.3564456105232239, "step": 3256 }, { "epoch": 2.5955750448475183, "grad_norm": 0.6252234693768958, "learning_rate": 4.9162216486529635e-06, "loss": 0.4258411228656769, "step": 3257 }, { "epoch": 2.5963723340641818, "grad_norm": 0.6148521347948422, "learning_rate": 4.897244835850428e-06, "loss": 0.4302756190299988, "step": 3258 }, { "epoch": 2.597169623280845, "grad_norm": 0.5718407077480976, "learning_rate": 4.8783028333481525e-06, "loss": 0.3820505440235138, "step": 3259 }, { "epoch": 2.597966912497508, "grad_norm": 0.5503198640534841, "learning_rate": 4.859395655765581e-06, "loss": 0.3550432026386261, "step": 3260 }, { "epoch": 2.5987642017141717, "grad_norm": 0.6412294818297838, "learning_rate": 4.84052331769525e-06, "loss": 0.436762273311615, "step": 3261 }, { "epoch": 2.599561490930835, "grad_norm": 0.6583258678467551, "learning_rate": 4.821685833702849e-06, "loss": 0.44586890935897827, "step": 3262 }, { "epoch": 2.6003587801474985, "grad_norm": 0.6245458182994861, "learning_rate": 4.802883218327142e-06, "loss": 0.39270275831222534, "step": 3263 }, { "epoch": 2.601156069364162, "grad_norm": 0.6392935586153266, "learning_rate": 4.784115486079971e-06, "loss": 0.3856537938117981, "step": 3264 }, { "epoch": 2.6019533585808254, "grad_norm": 0.4490728589476078, "learning_rate": 4.765382651446276e-06, "loss": 0.2960383892059326, "step": 3265 }, { "epoch": 2.6027506477974884, "grad_norm": 0.5391479509785253, "learning_rate": 4.746684728884071e-06, "loss": 0.3713158369064331, "step": 3266 }, { "epoch": 2.603547937014152, "grad_norm": 0.5167043790644132, "learning_rate": 4.728021732824389e-06, "loss": 0.36749160289764404, "step": 3267 }, { "epoch": 2.6043452262308153, "grad_norm": 0.5456518664743542, "learning_rate": 4.709393677671336e-06, "loss": 0.3659873306751251, "step": 3268 }, { "epoch": 2.6051425154474783, "grad_norm": 0.5153654165075905, "learning_rate": 4.690800577802051e-06, "loss": 0.35837322473526, "step": 3269 }, { "epoch": 2.6059398046641418, "grad_norm": 0.5308592883175387, "learning_rate": 4.672242447566671e-06, "loss": 0.3598008155822754, "step": 3270 }, { "epoch": 2.606737093880805, "grad_norm": 0.5780851344462797, "learning_rate": 4.653719301288362e-06, "loss": 0.4146574139595032, "step": 3271 }, { "epoch": 2.6075343830974687, "grad_norm": 0.6502865025295225, "learning_rate": 4.635231153263298e-06, "loss": 0.43880367279052734, "step": 3272 }, { "epoch": 2.608331672314132, "grad_norm": 0.5276231826149671, "learning_rate": 4.616778017760614e-06, "loss": 0.39163047075271606, "step": 3273 }, { "epoch": 2.6091289615307955, "grad_norm": 0.5357386032909458, "learning_rate": 4.598359909022443e-06, "loss": 0.40065819025039673, "step": 3274 }, { "epoch": 2.6099262507474585, "grad_norm": 0.45872282546942816, "learning_rate": 4.579976841263877e-06, "loss": 0.3270605206489563, "step": 3275 }, { "epoch": 2.610723539964122, "grad_norm": 0.6451670866781843, "learning_rate": 4.561628828672976e-06, "loss": 0.43902862071990967, "step": 3276 }, { "epoch": 2.6115208291807854, "grad_norm": 0.5592625010405465, "learning_rate": 4.543315885410715e-06, "loss": 0.39629459381103516, "step": 3277 }, { "epoch": 2.6123181183974484, "grad_norm": 0.5602288373739684, "learning_rate": 4.5250380256110335e-06, "loss": 0.3648795187473297, "step": 3278 }, { "epoch": 2.613115407614112, "grad_norm": 0.6282227886056793, "learning_rate": 4.506795263380792e-06, "loss": 0.4355687201023102, "step": 3279 }, { "epoch": 2.6139126968307753, "grad_norm": 0.5638351195421907, "learning_rate": 4.488587612799733e-06, "loss": 0.4349854588508606, "step": 3280 }, { "epoch": 2.6147099860474388, "grad_norm": 0.618994276572602, "learning_rate": 4.470415087920515e-06, "loss": 0.4348711371421814, "step": 3281 }, { "epoch": 2.615507275264102, "grad_norm": 0.5736232887985412, "learning_rate": 4.452277702768703e-06, "loss": 0.41419297456741333, "step": 3282 }, { "epoch": 2.6163045644807656, "grad_norm": 0.490626039810147, "learning_rate": 4.434175471342733e-06, "loss": 0.3438268303871155, "step": 3283 }, { "epoch": 2.6171018536974286, "grad_norm": 0.6062574826191359, "learning_rate": 4.416108407613889e-06, "loss": 0.3405243754386902, "step": 3284 }, { "epoch": 2.617899142914092, "grad_norm": 0.5809043179337058, "learning_rate": 4.398076525526334e-06, "loss": 0.3894893229007721, "step": 3285 }, { "epoch": 2.6186964321307555, "grad_norm": 0.49065692018536816, "learning_rate": 4.380079838997086e-06, "loss": 0.2956354320049286, "step": 3286 }, { "epoch": 2.6194937213474185, "grad_norm": 0.6209327053845071, "learning_rate": 4.362118361915962e-06, "loss": 0.37232309579849243, "step": 3287 }, { "epoch": 2.620291010564082, "grad_norm": 0.6349268281581741, "learning_rate": 4.344192108145639e-06, "loss": 0.5017603635787964, "step": 3288 }, { "epoch": 2.6210882997807454, "grad_norm": 0.5941683075658589, "learning_rate": 4.3263010915216065e-06, "loss": 0.3429720997810364, "step": 3289 }, { "epoch": 2.621885588997409, "grad_norm": 0.6044357519547785, "learning_rate": 4.308445325852134e-06, "loss": 0.34745141863822937, "step": 3290 }, { "epoch": 2.6226828782140723, "grad_norm": 0.5702098916350192, "learning_rate": 4.290624824918299e-06, "loss": 0.4176359176635742, "step": 3291 }, { "epoch": 2.6234801674307358, "grad_norm": 0.5500436052183092, "learning_rate": 4.272839602473977e-06, "loss": 0.3539411127567291, "step": 3292 }, { "epoch": 2.6242774566473988, "grad_norm": 0.566069271506748, "learning_rate": 4.255089672245783e-06, "loss": 0.4092276096343994, "step": 3293 }, { "epoch": 2.625074745864062, "grad_norm": 0.4862308413705672, "learning_rate": 4.237375047933118e-06, "loss": 0.2957075834274292, "step": 3294 }, { "epoch": 2.6258720350807256, "grad_norm": 0.636965765864332, "learning_rate": 4.219695743208135e-06, "loss": 0.3732312023639679, "step": 3295 }, { "epoch": 2.6266693242973886, "grad_norm": 0.5911995111893361, "learning_rate": 4.2020517717157035e-06, "loss": 0.35998424887657166, "step": 3296 }, { "epoch": 2.627466613514052, "grad_norm": 0.5448187676935433, "learning_rate": 4.184443147073441e-06, "loss": 0.3664681017398834, "step": 3297 }, { "epoch": 2.6282639027307155, "grad_norm": 0.6326154291127932, "learning_rate": 4.166869882871699e-06, "loss": 0.5162670612335205, "step": 3298 }, { "epoch": 2.629061191947379, "grad_norm": 0.523660400345433, "learning_rate": 4.1493319926735e-06, "loss": 0.3290325999259949, "step": 3299 }, { "epoch": 2.6298584811640424, "grad_norm": 0.6281197105967452, "learning_rate": 4.1318294900145885e-06, "loss": 0.4456695020198822, "step": 3300 }, { "epoch": 2.630655770380706, "grad_norm": 0.6067209401518562, "learning_rate": 4.114362388403409e-06, "loss": 0.4935718774795532, "step": 3301 }, { "epoch": 2.631453059597369, "grad_norm": 0.6010237892716511, "learning_rate": 4.096930701321044e-06, "loss": 0.45071882009506226, "step": 3302 }, { "epoch": 2.6322503488140323, "grad_norm": 0.5511092616236943, "learning_rate": 4.079534442221283e-06, "loss": 0.391877681016922, "step": 3303 }, { "epoch": 2.6330476380306957, "grad_norm": 0.5989401757946128, "learning_rate": 4.062173624530558e-06, "loss": 0.3890798091888428, "step": 3304 }, { "epoch": 2.6338449272473587, "grad_norm": 0.508116057924069, "learning_rate": 4.044848261647943e-06, "loss": 0.33978551626205444, "step": 3305 }, { "epoch": 2.634642216464022, "grad_norm": 0.5999689336496787, "learning_rate": 4.02755836694515e-06, "loss": 0.423325777053833, "step": 3306 }, { "epoch": 2.6354395056806856, "grad_norm": 0.5998416489905567, "learning_rate": 4.010303953766509e-06, "loss": 0.3823911249637604, "step": 3307 }, { "epoch": 2.636236794897349, "grad_norm": 0.5815739119030059, "learning_rate": 3.993085035428989e-06, "loss": 0.38808003067970276, "step": 3308 }, { "epoch": 2.6370340841140125, "grad_norm": 0.5994272263168263, "learning_rate": 3.975901625222133e-06, "loss": 0.39395660161972046, "step": 3309 }, { "epoch": 2.637831373330676, "grad_norm": 0.5688670838259227, "learning_rate": 3.958753736408105e-06, "loss": 0.39472341537475586, "step": 3310 }, { "epoch": 2.638628662547339, "grad_norm": 0.5062653405419026, "learning_rate": 3.941641382221639e-06, "loss": 0.37984925508499146, "step": 3311 }, { "epoch": 2.6394259517640024, "grad_norm": 0.5743439482600934, "learning_rate": 3.92456457587006e-06, "loss": 0.4209235906600952, "step": 3312 }, { "epoch": 2.640223240980666, "grad_norm": 0.49902714826684935, "learning_rate": 3.907523330533219e-06, "loss": 0.37017935514450073, "step": 3313 }, { "epoch": 2.641020530197329, "grad_norm": 0.5825596184457104, "learning_rate": 3.890517659363569e-06, "loss": 0.338087260723114, "step": 3314 }, { "epoch": 2.6418178194139923, "grad_norm": 0.4739233013406104, "learning_rate": 3.8735475754860865e-06, "loss": 0.36009976267814636, "step": 3315 }, { "epoch": 2.6426151086306557, "grad_norm": 0.622699315312551, "learning_rate": 3.856613091998257e-06, "loss": 0.4799412488937378, "step": 3316 }, { "epoch": 2.643412397847319, "grad_norm": 0.6182866360336345, "learning_rate": 3.839714221970131e-06, "loss": 0.4120270609855652, "step": 3317 }, { "epoch": 2.6442096870639826, "grad_norm": 0.5582959296916156, "learning_rate": 3.822850978444254e-06, "loss": 0.3915528953075409, "step": 3318 }, { "epoch": 2.645006976280646, "grad_norm": 0.6079236293628945, "learning_rate": 3.8060233744356633e-06, "loss": 0.42705923318862915, "step": 3319 }, { "epoch": 2.645804265497309, "grad_norm": 0.5778059421388706, "learning_rate": 3.7892314229319102e-06, "loss": 0.39385586977005005, "step": 3320 }, { "epoch": 2.6466015547139725, "grad_norm": 0.58276665586706, "learning_rate": 3.7724751368930276e-06, "loss": 0.41850659251213074, "step": 3321 }, { "epoch": 2.647398843930636, "grad_norm": 0.5260609507111988, "learning_rate": 3.7557545292514983e-06, "loss": 0.3830314874649048, "step": 3322 }, { "epoch": 2.648196133147299, "grad_norm": 0.50779352745427, "learning_rate": 3.7390696129122937e-06, "loss": 0.33612850308418274, "step": 3323 }, { "epoch": 2.6489934223639624, "grad_norm": 0.5252744721393233, "learning_rate": 3.7224204007528353e-06, "loss": 0.3556079864501953, "step": 3324 }, { "epoch": 2.649790711580626, "grad_norm": 0.5041911302170897, "learning_rate": 3.7058069056229717e-06, "loss": 0.3357621431350708, "step": 3325 }, { "epoch": 2.6505880007972893, "grad_norm": 0.5921831656185763, "learning_rate": 3.6892291403449965e-06, "loss": 0.3740721344947815, "step": 3326 }, { "epoch": 2.6513852900139527, "grad_norm": 0.4730338813234771, "learning_rate": 3.672687117713641e-06, "loss": 0.30988144874572754, "step": 3327 }, { "epoch": 2.652182579230616, "grad_norm": 0.5107048186366436, "learning_rate": 3.6561808504960217e-06, "loss": 0.3278537690639496, "step": 3328 }, { "epoch": 2.652979868447279, "grad_norm": 0.580834106299546, "learning_rate": 3.6397103514316743e-06, "loss": 0.38835981488227844, "step": 3329 }, { "epoch": 2.6537771576639426, "grad_norm": 0.5408656399977939, "learning_rate": 3.623275633232537e-06, "loss": 0.4080798327922821, "step": 3330 }, { "epoch": 2.654574446880606, "grad_norm": 0.5563930988582938, "learning_rate": 3.606876708582918e-06, "loss": 0.4093663692474365, "step": 3331 }, { "epoch": 2.655371736097269, "grad_norm": 0.6314182433820711, "learning_rate": 3.5905135901395048e-06, "loss": 0.4209539592266083, "step": 3332 }, { "epoch": 2.6561690253139325, "grad_norm": 0.5958527634910518, "learning_rate": 3.5741862905313506e-06, "loss": 0.42281070351600647, "step": 3333 }, { "epoch": 2.656966314530596, "grad_norm": 0.475602105190175, "learning_rate": 3.557894822359864e-06, "loss": 0.3307143449783325, "step": 3334 }, { "epoch": 2.6577636037472594, "grad_norm": 0.7325137655936371, "learning_rate": 3.541639198198793e-06, "loss": 0.33829882740974426, "step": 3335 }, { "epoch": 2.658560892963923, "grad_norm": 0.5496225064748405, "learning_rate": 3.5254194305942234e-06, "loss": 0.36527079343795776, "step": 3336 }, { "epoch": 2.6593581821805863, "grad_norm": 0.593611329598848, "learning_rate": 3.5092355320645807e-06, "loss": 0.41060322523117065, "step": 3337 }, { "epoch": 2.6601554713972493, "grad_norm": 0.561416373260713, "learning_rate": 3.4930875151005904e-06, "loss": 0.3704788088798523, "step": 3338 }, { "epoch": 2.6609527606139127, "grad_norm": 0.6021213722352462, "learning_rate": 3.4769753921652883e-06, "loss": 0.3748849928379059, "step": 3339 }, { "epoch": 2.661750049830576, "grad_norm": 0.5723861918910029, "learning_rate": 3.46089917569401e-06, "loss": 0.3959687352180481, "step": 3340 }, { "epoch": 2.662547339047239, "grad_norm": 0.5919229546277053, "learning_rate": 3.4448588780943814e-06, "loss": 0.4141688942909241, "step": 3341 }, { "epoch": 2.6633446282639026, "grad_norm": 0.5883687036122389, "learning_rate": 3.4288545117462934e-06, "loss": 0.4035644829273224, "step": 3342 }, { "epoch": 2.664141917480566, "grad_norm": 0.5057959844521119, "learning_rate": 3.4128860890019154e-06, "loss": 0.386327862739563, "step": 3343 }, { "epoch": 2.6649392066972295, "grad_norm": 0.5419130954630647, "learning_rate": 3.396953622185689e-06, "loss": 0.3449072539806366, "step": 3344 }, { "epoch": 2.665736495913893, "grad_norm": 0.7096960414663316, "learning_rate": 3.3810571235942666e-06, "loss": 0.43556642532348633, "step": 3345 }, { "epoch": 2.666533785130556, "grad_norm": 0.5181526564095721, "learning_rate": 3.3651966054965733e-06, "loss": 0.3671996593475342, "step": 3346 }, { "epoch": 2.6673310743472194, "grad_norm": 0.6728745169436251, "learning_rate": 3.349372080133767e-06, "loss": 0.48375144600868225, "step": 3347 }, { "epoch": 2.668128363563883, "grad_norm": 0.5272481907318305, "learning_rate": 3.3335835597191944e-06, "loss": 0.37469616532325745, "step": 3348 }, { "epoch": 2.6689256527805463, "grad_norm": 0.577460912291613, "learning_rate": 3.317831056438442e-06, "loss": 0.3211839497089386, "step": 3349 }, { "epoch": 2.6697229419972093, "grad_norm": 0.5450218049852381, "learning_rate": 3.302114582449295e-06, "loss": 0.36216533184051514, "step": 3350 }, { "epoch": 2.6705202312138727, "grad_norm": 0.5853051540658324, "learning_rate": 3.2864341498817173e-06, "loss": 0.48323509097099304, "step": 3351 }, { "epoch": 2.671317520430536, "grad_norm": 0.5990072273407928, "learning_rate": 3.2707897708378677e-06, "loss": 0.42206358909606934, "step": 3352 }, { "epoch": 2.6721148096471996, "grad_norm": 0.5453533986438879, "learning_rate": 3.2551814573920815e-06, "loss": 0.3284844756126404, "step": 3353 }, { "epoch": 2.672912098863863, "grad_norm": 0.5750480920434924, "learning_rate": 3.2396092215908446e-06, "loss": 0.3761211335659027, "step": 3354 }, { "epoch": 2.673709388080526, "grad_norm": 0.627610442522674, "learning_rate": 3.2240730754528048e-06, "loss": 0.4286021590232849, "step": 3355 }, { "epoch": 2.6745066772971895, "grad_norm": 0.5913500066229775, "learning_rate": 3.20857303096877e-06, "loss": 0.39545178413391113, "step": 3356 }, { "epoch": 2.675303966513853, "grad_norm": 0.5787715112305555, "learning_rate": 3.19310910010166e-06, "loss": 0.3788149654865265, "step": 3357 }, { "epoch": 2.6761012557305164, "grad_norm": 0.5787839475018883, "learning_rate": 3.1776812947865385e-06, "loss": 0.3659389913082123, "step": 3358 }, { "epoch": 2.6768985449471794, "grad_norm": 0.5562469018438275, "learning_rate": 3.1622896269305924e-06, "loss": 0.41709697246551514, "step": 3359 }, { "epoch": 2.677695834163843, "grad_norm": 0.556392544136696, "learning_rate": 3.146934108413102e-06, "loss": 0.41574740409851074, "step": 3360 }, { "epoch": 2.6784931233805063, "grad_norm": 0.6397019754919123, "learning_rate": 3.1316147510854432e-06, "loss": 0.46635982394218445, "step": 3361 }, { "epoch": 2.6792904125971697, "grad_norm": 0.5369752306659592, "learning_rate": 3.1163315667711024e-06, "loss": 0.33261817693710327, "step": 3362 }, { "epoch": 2.680087701813833, "grad_norm": 0.5203595663920614, "learning_rate": 3.1010845672656497e-06, "loss": 0.41046980023384094, "step": 3363 }, { "epoch": 2.680884991030496, "grad_norm": 0.5314548383425276, "learning_rate": 3.0858737643367e-06, "loss": 0.37154561281204224, "step": 3364 }, { "epoch": 2.6816822802471596, "grad_norm": 0.47257721824713445, "learning_rate": 3.070699169723956e-06, "loss": 0.3223976790904999, "step": 3365 }, { "epoch": 2.682479569463823, "grad_norm": 0.559954171597075, "learning_rate": 3.055560795139173e-06, "loss": 0.36151549220085144, "step": 3366 }, { "epoch": 2.6832768586804865, "grad_norm": 0.510385576290566, "learning_rate": 3.0404586522661482e-06, "loss": 0.300226092338562, "step": 3367 }, { "epoch": 2.6840741478971495, "grad_norm": 0.6415894727878307, "learning_rate": 3.0253927527606973e-06, "loss": 0.4407046437263489, "step": 3368 }, { "epoch": 2.684871437113813, "grad_norm": 0.5369476886578979, "learning_rate": 3.0103631082506913e-06, "loss": 0.35652270913124084, "step": 3369 }, { "epoch": 2.6856687263304764, "grad_norm": 0.5276570350423065, "learning_rate": 2.995369730336012e-06, "loss": 0.3988882005214691, "step": 3370 }, { "epoch": 2.68646601554714, "grad_norm": 0.6215907523278238, "learning_rate": 2.980412630588536e-06, "loss": 0.4174453020095825, "step": 3371 }, { "epoch": 2.6872633047638033, "grad_norm": 0.5489153398071038, "learning_rate": 2.965491820552152e-06, "loss": 0.3842538595199585, "step": 3372 }, { "epoch": 2.6880605939804663, "grad_norm": 0.465014673735717, "learning_rate": 2.9506073117427536e-06, "loss": 0.32069307565689087, "step": 3373 }, { "epoch": 2.6888578831971297, "grad_norm": 0.6677994339092919, "learning_rate": 2.9357591156481788e-06, "loss": 0.4972512423992157, "step": 3374 }, { "epoch": 2.689655172413793, "grad_norm": 0.507512904347421, "learning_rate": 2.9209472437282783e-06, "loss": 0.2867007255554199, "step": 3375 }, { "epoch": 2.6904524616304566, "grad_norm": 0.611206279122291, "learning_rate": 2.906171707414862e-06, "loss": 0.4372481405735016, "step": 3376 }, { "epoch": 2.6912497508471196, "grad_norm": 0.6665928393845185, "learning_rate": 2.8914325181116643e-06, "loss": 0.4970610737800598, "step": 3377 }, { "epoch": 2.692047040063783, "grad_norm": 0.5635143384300856, "learning_rate": 2.8767296871944014e-06, "loss": 0.42109978199005127, "step": 3378 }, { "epoch": 2.6928443292804465, "grad_norm": 0.5988078783773374, "learning_rate": 2.8620632260107195e-06, "loss": 0.3895643353462219, "step": 3379 }, { "epoch": 2.69364161849711, "grad_norm": 0.6159409228650826, "learning_rate": 2.847433145880185e-06, "loss": 0.45033952593803406, "step": 3380 }, { "epoch": 2.6944389077137734, "grad_norm": 0.5778200464995307, "learning_rate": 2.8328394580942883e-06, "loss": 0.3509983718395233, "step": 3381 }, { "epoch": 2.6952361969304364, "grad_norm": 0.5439824092235667, "learning_rate": 2.8182821739164534e-06, "loss": 0.4009159207344055, "step": 3382 }, { "epoch": 2.6960334861471, "grad_norm": 0.6708315987261966, "learning_rate": 2.80376130458197e-06, "loss": 0.482560932636261, "step": 3383 }, { "epoch": 2.6968307753637633, "grad_norm": 0.614457662118961, "learning_rate": 2.7892768612980514e-06, "loss": 0.46296045184135437, "step": 3384 }, { "epoch": 2.6976280645804267, "grad_norm": 0.5843913171613763, "learning_rate": 2.774828855243794e-06, "loss": 0.4086743891239166, "step": 3385 }, { "epoch": 2.6984253537970897, "grad_norm": 0.5929058640736584, "learning_rate": 2.7604172975701614e-06, "loss": 0.4156409502029419, "step": 3386 }, { "epoch": 2.699222643013753, "grad_norm": 0.4627417420812677, "learning_rate": 2.746042199399984e-06, "loss": 0.2856704592704773, "step": 3387 }, { "epoch": 2.7000199322304166, "grad_norm": 0.6008546922639916, "learning_rate": 2.7317035718279704e-06, "loss": 0.406379371881485, "step": 3388 }, { "epoch": 2.70081722144708, "grad_norm": 0.5661634034438421, "learning_rate": 2.717401425920679e-06, "loss": 0.32125306129455566, "step": 3389 }, { "epoch": 2.7016145106637435, "grad_norm": 0.7125431270521412, "learning_rate": 2.7031357727164863e-06, "loss": 0.5193711519241333, "step": 3390 }, { "epoch": 2.7024117998804065, "grad_norm": 0.6039263040429675, "learning_rate": 2.6889066232256286e-06, "loss": 0.4198905825614929, "step": 3391 }, { "epoch": 2.70320908909707, "grad_norm": 0.6579815243422511, "learning_rate": 2.674713988430161e-06, "loss": 0.4792756736278534, "step": 3392 }, { "epoch": 2.7040063783137334, "grad_norm": 0.5638473846971415, "learning_rate": 2.66055787928397e-06, "loss": 0.3748607337474823, "step": 3393 }, { "epoch": 2.704803667530397, "grad_norm": 0.5941605506882321, "learning_rate": 2.6464383067127173e-06, "loss": 0.3762661814689636, "step": 3394 }, { "epoch": 2.70560095674706, "grad_norm": 0.6270790999697714, "learning_rate": 2.6323552816138963e-06, "loss": 0.42704862356185913, "step": 3395 }, { "epoch": 2.7063982459637232, "grad_norm": 0.5425496213090057, "learning_rate": 2.618308814856796e-06, "loss": 0.3634372353553772, "step": 3396 }, { "epoch": 2.7071955351803867, "grad_norm": 0.6501616059070923, "learning_rate": 2.604298917282455e-06, "loss": 0.41391539573669434, "step": 3397 }, { "epoch": 2.70799282439705, "grad_norm": 0.6563881651468811, "learning_rate": 2.5903255997037245e-06, "loss": 0.42359763383865356, "step": 3398 }, { "epoch": 2.7087901136137136, "grad_norm": 0.5206044146829581, "learning_rate": 2.5763888729052164e-06, "loss": 0.3095484673976898, "step": 3399 }, { "epoch": 2.7095874028303766, "grad_norm": 0.5820661446541545, "learning_rate": 2.562488747643271e-06, "loss": 0.4039982557296753, "step": 3400 }, { "epoch": 2.71038469204704, "grad_norm": 0.6040946444908012, "learning_rate": 2.548625234646024e-06, "loss": 0.44002050161361694, "step": 3401 }, { "epoch": 2.7111819812637035, "grad_norm": 0.6102940895427096, "learning_rate": 2.534798344613326e-06, "loss": 0.43627822399139404, "step": 3402 }, { "epoch": 2.711979270480367, "grad_norm": 0.46241432346476136, "learning_rate": 2.521008088216764e-06, "loss": 0.24899248778820038, "step": 3403 }, { "epoch": 2.71277655969703, "grad_norm": 0.5261160150519449, "learning_rate": 2.5072544760996574e-06, "loss": 0.32208967208862305, "step": 3404 }, { "epoch": 2.7135738489136934, "grad_norm": 0.5259896231644183, "learning_rate": 2.4935375188770538e-06, "loss": 0.33316174149513245, "step": 3405 }, { "epoch": 2.714371138130357, "grad_norm": 0.46160849584079017, "learning_rate": 2.4798572271356846e-06, "loss": 0.34251683950424194, "step": 3406 }, { "epoch": 2.7151684273470202, "grad_norm": 0.6172549048557755, "learning_rate": 2.466213611434004e-06, "loss": 0.40482398867607117, "step": 3407 }, { "epoch": 2.7159657165636837, "grad_norm": 0.5615274427159963, "learning_rate": 2.452606682302155e-06, "loss": 0.4390622675418854, "step": 3408 }, { "epoch": 2.7167630057803467, "grad_norm": 0.6247118707222685, "learning_rate": 2.439036450241949e-06, "loss": 0.45180749893188477, "step": 3409 }, { "epoch": 2.71756029499701, "grad_norm": 0.553632626892962, "learning_rate": 2.425502925726908e-06, "loss": 0.3455575406551361, "step": 3410 }, { "epoch": 2.7183575842136736, "grad_norm": 0.5126300323736227, "learning_rate": 2.412006119202198e-06, "loss": 0.405692994594574, "step": 3411 }, { "epoch": 2.719154873430337, "grad_norm": 0.6670589679655191, "learning_rate": 2.3985460410846538e-06, "loss": 0.4585513472557068, "step": 3412 }, { "epoch": 2.719952162647, "grad_norm": 0.5327157162838313, "learning_rate": 2.3851227017627543e-06, "loss": 0.3709025979042053, "step": 3413 }, { "epoch": 2.7207494518636635, "grad_norm": 0.6863957166182161, "learning_rate": 2.3717361115966342e-06, "loss": 0.44893887639045715, "step": 3414 }, { "epoch": 2.721546741080327, "grad_norm": 0.5883489076849985, "learning_rate": 2.358386280918068e-06, "loss": 0.3784184157848358, "step": 3415 }, { "epoch": 2.7223440302969903, "grad_norm": 0.5764754279231217, "learning_rate": 2.3450732200304416e-06, "loss": 0.421016663312912, "step": 3416 }, { "epoch": 2.723141319513654, "grad_norm": 0.6072858628034518, "learning_rate": 2.3317969392087804e-06, "loss": 0.37158986926078796, "step": 3417 }, { "epoch": 2.723938608730317, "grad_norm": 0.5941591253941817, "learning_rate": 2.3185574486997262e-06, "loss": 0.39142751693725586, "step": 3418 }, { "epoch": 2.7247358979469802, "grad_norm": 0.5784573028015747, "learning_rate": 2.305354758721495e-06, "loss": 0.37153327465057373, "step": 3419 }, { "epoch": 2.7255331871636437, "grad_norm": 0.5811259820218143, "learning_rate": 2.292188879463936e-06, "loss": 0.4123116433620453, "step": 3420 }, { "epoch": 2.726330476380307, "grad_norm": 0.6465083997287896, "learning_rate": 2.2790598210884694e-06, "loss": 0.4454333484172821, "step": 3421 }, { "epoch": 2.72712776559697, "grad_norm": 0.5597700728075993, "learning_rate": 2.265967593728108e-06, "loss": 0.3874860405921936, "step": 3422 }, { "epoch": 2.7279250548136336, "grad_norm": 0.5747404363813486, "learning_rate": 2.252912207487412e-06, "loss": 0.37700602412223816, "step": 3423 }, { "epoch": 2.728722344030297, "grad_norm": 0.5407405080711244, "learning_rate": 2.239893672442539e-06, "loss": 0.34443914890289307, "step": 3424 }, { "epoch": 2.7295196332469605, "grad_norm": 0.5742833761877677, "learning_rate": 2.2269119986412013e-06, "loss": 0.3367951512336731, "step": 3425 }, { "epoch": 2.730316922463624, "grad_norm": 0.6028872622776192, "learning_rate": 2.21396719610264e-06, "loss": 0.40082409977912903, "step": 3426 }, { "epoch": 2.731114211680287, "grad_norm": 0.5247872866077224, "learning_rate": 2.2010592748176518e-06, "loss": 0.36947396397590637, "step": 3427 }, { "epoch": 2.7319115008969503, "grad_norm": 0.5855878204773637, "learning_rate": 2.1881882447485736e-06, "loss": 0.3707790970802307, "step": 3428 }, { "epoch": 2.732708790113614, "grad_norm": 0.5552133953944708, "learning_rate": 2.1753541158292644e-06, "loss": 0.37187090516090393, "step": 3429 }, { "epoch": 2.733506079330277, "grad_norm": 0.6252439129082273, "learning_rate": 2.1625568979651014e-06, "loss": 0.44985049962997437, "step": 3430 }, { "epoch": 2.7343033685469402, "grad_norm": 0.5467444445138814, "learning_rate": 2.149796601032977e-06, "loss": 0.36394062638282776, "step": 3431 }, { "epoch": 2.7351006577636037, "grad_norm": 0.515227581193449, "learning_rate": 2.137073234881282e-06, "loss": 0.3278900384902954, "step": 3432 }, { "epoch": 2.735897946980267, "grad_norm": 0.5297747706940763, "learning_rate": 2.1243868093299145e-06, "loss": 0.32453811168670654, "step": 3433 }, { "epoch": 2.7366952361969306, "grad_norm": 0.6358308785553088, "learning_rate": 2.1117373341702606e-06, "loss": 0.3903607130050659, "step": 3434 }, { "epoch": 2.737492525413594, "grad_norm": 0.5317395670458561, "learning_rate": 2.099124819165166e-06, "loss": 0.3269319236278534, "step": 3435 }, { "epoch": 2.738289814630257, "grad_norm": 0.5140986169591174, "learning_rate": 2.0865492740489823e-06, "loss": 0.31491708755493164, "step": 3436 }, { "epoch": 2.7390871038469204, "grad_norm": 0.5867299606332262, "learning_rate": 2.0740107085275184e-06, "loss": 0.36478477716445923, "step": 3437 }, { "epoch": 2.739884393063584, "grad_norm": 0.5933404319411574, "learning_rate": 2.0615091322780277e-06, "loss": 0.4287503957748413, "step": 3438 }, { "epoch": 2.740681682280247, "grad_norm": 0.6036574889567282, "learning_rate": 2.0490445549492197e-06, "loss": 0.403832346200943, "step": 3439 }, { "epoch": 2.7414789714969103, "grad_norm": 0.5830291514979882, "learning_rate": 2.0366169861612604e-06, "loss": 0.4060303568840027, "step": 3440 }, { "epoch": 2.7422762607135738, "grad_norm": 0.5699686137894419, "learning_rate": 2.0242264355057493e-06, "loss": 0.3318461775779724, "step": 3441 }, { "epoch": 2.743073549930237, "grad_norm": 0.5716463029682526, "learning_rate": 2.0118729125457037e-06, "loss": 0.3998161554336548, "step": 3442 }, { "epoch": 2.7438708391469007, "grad_norm": 0.5197305553265262, "learning_rate": 1.999556426815574e-06, "loss": 0.33129262924194336, "step": 3443 }, { "epoch": 2.744668128363564, "grad_norm": 0.49321053302365697, "learning_rate": 1.987276987821235e-06, "loss": 0.3029579520225525, "step": 3444 }, { "epoch": 2.745465417580227, "grad_norm": 0.5444224729210474, "learning_rate": 1.9750346050399327e-06, "loss": 0.3678513169288635, "step": 3445 }, { "epoch": 2.7462627067968906, "grad_norm": 0.6003677679511115, "learning_rate": 1.962829287920348e-06, "loss": 0.3847750425338745, "step": 3446 }, { "epoch": 2.747059996013554, "grad_norm": 0.4662726893856003, "learning_rate": 1.950661045882546e-06, "loss": 0.278730571269989, "step": 3447 }, { "epoch": 2.747857285230217, "grad_norm": 0.573160645847384, "learning_rate": 1.9385298883179746e-06, "loss": 0.4093448519706726, "step": 3448 }, { "epoch": 2.7486545744468804, "grad_norm": 0.5565975834484947, "learning_rate": 1.9264358245894564e-06, "loss": 0.35238224267959595, "step": 3449 }, { "epoch": 2.749451863663544, "grad_norm": 0.5692577963679629, "learning_rate": 1.91437886403118e-06, "loss": 0.46149736642837524, "step": 3450 }, { "epoch": 2.7502491528802073, "grad_norm": 0.6899577558194874, "learning_rate": 1.9023590159487238e-06, "loss": 0.4763375222682953, "step": 3451 }, { "epoch": 2.7510464420968708, "grad_norm": 0.5229699685500927, "learning_rate": 1.8903762896189892e-06, "loss": 0.3055652379989624, "step": 3452 }, { "epoch": 2.751843731313534, "grad_norm": 0.6028217383493605, "learning_rate": 1.8784306942902507e-06, "loss": 0.3716338574886322, "step": 3453 }, { "epoch": 2.752641020530197, "grad_norm": 0.5688222096010295, "learning_rate": 1.8665222391821169e-06, "loss": 0.4058264195919037, "step": 3454 }, { "epoch": 2.7534383097468607, "grad_norm": 0.5964250653925535, "learning_rate": 1.85465093348553e-06, "loss": 0.3777772784233093, "step": 3455 }, { "epoch": 2.754235598963524, "grad_norm": 0.5776952383865648, "learning_rate": 1.8428167863627555e-06, "loss": 0.380662202835083, "step": 3456 }, { "epoch": 2.755032888180187, "grad_norm": 0.5523701177664847, "learning_rate": 1.8310198069474038e-06, "loss": 0.37201234698295593, "step": 3457 }, { "epoch": 2.7558301773968505, "grad_norm": 0.5387504199900869, "learning_rate": 1.819260004344364e-06, "loss": 0.3685578405857086, "step": 3458 }, { "epoch": 2.756627466613514, "grad_norm": 0.5118760160199515, "learning_rate": 1.807537387629854e-06, "loss": 0.3144787847995758, "step": 3459 }, { "epoch": 2.7574247558301774, "grad_norm": 0.5257212883776767, "learning_rate": 1.7958519658514027e-06, "loss": 0.3511994481086731, "step": 3460 }, { "epoch": 2.758222045046841, "grad_norm": 0.5068935603285344, "learning_rate": 1.7842037480277963e-06, "loss": 0.29232698678970337, "step": 3461 }, { "epoch": 2.7590193342635043, "grad_norm": 0.5486248189952582, "learning_rate": 1.7725927431491373e-06, "loss": 0.33639445900917053, "step": 3462 }, { "epoch": 2.7598166234801673, "grad_norm": 0.6248563827468978, "learning_rate": 1.7610189601768024e-06, "loss": 0.38584116101264954, "step": 3463 }, { "epoch": 2.7606139126968308, "grad_norm": 0.5561831747643081, "learning_rate": 1.7494824080434347e-06, "loss": 0.4027893841266632, "step": 3464 }, { "epoch": 2.761411201913494, "grad_norm": 0.5902382208076223, "learning_rate": 1.7379830956529287e-06, "loss": 0.4132477343082428, "step": 3465 }, { "epoch": 2.762208491130157, "grad_norm": 0.5877449305300448, "learning_rate": 1.7265210318804682e-06, "loss": 0.4038558006286621, "step": 3466 }, { "epoch": 2.7630057803468207, "grad_norm": 0.5573832992421568, "learning_rate": 1.715096225572471e-06, "loss": 0.3834325671195984, "step": 3467 }, { "epoch": 2.763803069563484, "grad_norm": 0.5384556270671734, "learning_rate": 1.70370868554659e-06, "loss": 0.32279062271118164, "step": 3468 }, { "epoch": 2.7646003587801475, "grad_norm": 0.583509603934112, "learning_rate": 1.692358420591733e-06, "loss": 0.3989024758338928, "step": 3469 }, { "epoch": 2.765397647996811, "grad_norm": 0.6664296515629792, "learning_rate": 1.6810454394680431e-06, "loss": 0.4301661252975464, "step": 3470 }, { "epoch": 2.7661949372134744, "grad_norm": 0.5136536510144026, "learning_rate": 1.6697697509068632e-06, "loss": 0.3039530813694, "step": 3471 }, { "epoch": 2.7669922264301374, "grad_norm": 0.6331545829065828, "learning_rate": 1.6585313636107714e-06, "loss": 0.4091618061065674, "step": 3472 }, { "epoch": 2.767789515646801, "grad_norm": 0.6011037228468299, "learning_rate": 1.6473302862535623e-06, "loss": 0.44938117265701294, "step": 3473 }, { "epoch": 2.7685868048634643, "grad_norm": 0.5963735408865906, "learning_rate": 1.636166527480215e-06, "loss": 0.43031418323516846, "step": 3474 }, { "epoch": 2.7693840940801273, "grad_norm": 0.6289651235711956, "learning_rate": 1.6250400959069211e-06, "loss": 0.45556640625, "step": 3475 }, { "epoch": 2.7701813832967908, "grad_norm": 0.6284728017710296, "learning_rate": 1.6139510001210557e-06, "loss": 0.4318508505821228, "step": 3476 }, { "epoch": 2.770978672513454, "grad_norm": 0.6087686348168285, "learning_rate": 1.6028992486811944e-06, "loss": 0.43209147453308105, "step": 3477 }, { "epoch": 2.7717759617301176, "grad_norm": 0.5866191636725803, "learning_rate": 1.5918848501170647e-06, "loss": 0.44723066687583923, "step": 3478 }, { "epoch": 2.772573250946781, "grad_norm": 0.5871225617882092, "learning_rate": 1.5809078129295773e-06, "loss": 0.401987761259079, "step": 3479 }, { "epoch": 2.7733705401634445, "grad_norm": 0.5706959703991119, "learning_rate": 1.5699681455908167e-06, "loss": 0.34448161721229553, "step": 3480 }, { "epoch": 2.7741678293801075, "grad_norm": 0.5828880465988512, "learning_rate": 1.5590658565440063e-06, "loss": 0.3782805800437927, "step": 3481 }, { "epoch": 2.774965118596771, "grad_norm": 0.5716255695927109, "learning_rate": 1.5482009542035381e-06, "loss": 0.4240151047706604, "step": 3482 }, { "epoch": 2.7757624078134344, "grad_norm": 0.548264197189575, "learning_rate": 1.5373734469549482e-06, "loss": 0.45670080184936523, "step": 3483 }, { "epoch": 2.7765596970300974, "grad_norm": 0.6139288594978115, "learning_rate": 1.5265833431548916e-06, "loss": 0.3560226559638977, "step": 3484 }, { "epoch": 2.777356986246761, "grad_norm": 0.6256556563900157, "learning_rate": 1.5158306511311727e-06, "loss": 0.40817752480506897, "step": 3485 }, { "epoch": 2.7781542754634243, "grad_norm": 0.5586112898792329, "learning_rate": 1.505115379182731e-06, "loss": 0.33994489908218384, "step": 3486 }, { "epoch": 2.7789515646800877, "grad_norm": 0.5705451616189857, "learning_rate": 1.4944375355795903e-06, "loss": 0.37449952960014343, "step": 3487 }, { "epoch": 2.779748853896751, "grad_norm": 0.5702745783916756, "learning_rate": 1.4837971285629248e-06, "loss": 0.39055490493774414, "step": 3488 }, { "epoch": 2.7805461431134146, "grad_norm": 0.5759035708582587, "learning_rate": 1.4731941663449933e-06, "loss": 0.35294896364212036, "step": 3489 }, { "epoch": 2.7813434323300776, "grad_norm": 0.6389488216439045, "learning_rate": 1.4626286571091663e-06, "loss": 0.3697643578052521, "step": 3490 }, { "epoch": 2.782140721546741, "grad_norm": 0.5977686851943573, "learning_rate": 1.4521006090098932e-06, "loss": 0.4122718274593353, "step": 3491 }, { "epoch": 2.7829380107634045, "grad_norm": 0.5462584764099637, "learning_rate": 1.4416100301727188e-06, "loss": 0.3954659700393677, "step": 3492 }, { "epoch": 2.7837352999800675, "grad_norm": 0.5405836949052132, "learning_rate": 1.4311569286942883e-06, "loss": 0.3150964379310608, "step": 3493 }, { "epoch": 2.784532589196731, "grad_norm": 0.5332493001244898, "learning_rate": 1.420741312642282e-06, "loss": 0.3876032531261444, "step": 3494 }, { "epoch": 2.7853298784133944, "grad_norm": 0.542019275899045, "learning_rate": 1.4103631900554804e-06, "loss": 0.38946908712387085, "step": 3495 }, { "epoch": 2.786127167630058, "grad_norm": 0.6280785424302548, "learning_rate": 1.4000225689437263e-06, "loss": 0.41322657465934753, "step": 3496 }, { "epoch": 2.7869244568467213, "grad_norm": 0.640078289418895, "learning_rate": 1.3897194572878912e-06, "loss": 0.49730777740478516, "step": 3497 }, { "epoch": 2.7877217460633847, "grad_norm": 0.5875006327557606, "learning_rate": 1.3794538630399257e-06, "loss": 0.40828585624694824, "step": 3498 }, { "epoch": 2.7885190352800477, "grad_norm": 0.5759882425025463, "learning_rate": 1.3692257941228193e-06, "loss": 0.4087575078010559, "step": 3499 }, { "epoch": 2.789316324496711, "grad_norm": 0.5873163213070236, "learning_rate": 1.3590352584305854e-06, "loss": 0.3318338394165039, "step": 3500 }, { "epoch": 2.7901136137133746, "grad_norm": 0.548922312364999, "learning_rate": 1.3488822638282771e-06, "loss": 0.3452531397342682, "step": 3501 }, { "epoch": 2.7909109029300376, "grad_norm": 0.6854212799771675, "learning_rate": 1.338766818151982e-06, "loss": 0.49326759576797485, "step": 3502 }, { "epoch": 2.791708192146701, "grad_norm": 0.5690669633266208, "learning_rate": 1.328688929208799e-06, "loss": 0.36016401648521423, "step": 3503 }, { "epoch": 2.7925054813633645, "grad_norm": 0.6187489727705654, "learning_rate": 1.3186486047768286e-06, "loss": 0.3429969847202301, "step": 3504 }, { "epoch": 2.793302770580028, "grad_norm": 0.5776373039746467, "learning_rate": 1.3086458526052058e-06, "loss": 0.42477717995643616, "step": 3505 }, { "epoch": 2.7941000597966914, "grad_norm": 0.5328873654876706, "learning_rate": 1.2986806804140494e-06, "loss": 0.32977813482284546, "step": 3506 }, { "epoch": 2.794897349013355, "grad_norm": 0.5677116299774857, "learning_rate": 1.2887530958944738e-06, "loss": 0.37529847025871277, "step": 3507 }, { "epoch": 2.795694638230018, "grad_norm": 0.5506385112775849, "learning_rate": 1.2788631067085942e-06, "loss": 0.3535630404949188, "step": 3508 }, { "epoch": 2.7964919274466813, "grad_norm": 0.5512230863221745, "learning_rate": 1.2690107204894997e-06, "loss": 0.3525133430957794, "step": 3509 }, { "epoch": 2.7972892166633447, "grad_norm": 0.6004277941635064, "learning_rate": 1.2591959448412626e-06, "loss": 0.30920085310935974, "step": 3510 }, { "epoch": 2.7980865058800077, "grad_norm": 0.5210126876138356, "learning_rate": 1.2494187873389185e-06, "loss": 0.3800197243690491, "step": 3511 }, { "epoch": 2.798883795096671, "grad_norm": 0.5488343406036781, "learning_rate": 1.2396792555284865e-06, "loss": 0.36965399980545044, "step": 3512 }, { "epoch": 2.7996810843133346, "grad_norm": 0.5885407873644802, "learning_rate": 1.2299773569269201e-06, "loss": 0.40308111906051636, "step": 3513 }, { "epoch": 2.800478373529998, "grad_norm": 0.4641031649632885, "learning_rate": 1.2203130990221634e-06, "loss": 0.34355488419532776, "step": 3514 }, { "epoch": 2.8012756627466615, "grad_norm": 0.7062342646615712, "learning_rate": 1.2106864892730774e-06, "loss": 0.4699465036392212, "step": 3515 }, { "epoch": 2.802072951963325, "grad_norm": 0.4990913307555107, "learning_rate": 1.2010975351094744e-06, "loss": 0.3563637137413025, "step": 3516 }, { "epoch": 2.802870241179988, "grad_norm": 0.5818730828534252, "learning_rate": 1.1915462439321234e-06, "loss": 0.41239187121391296, "step": 3517 }, { "epoch": 2.8036675303966514, "grad_norm": 0.6007896303766557, "learning_rate": 1.1820326231126943e-06, "loss": 0.42118147015571594, "step": 3518 }, { "epoch": 2.804464819613315, "grad_norm": 0.5690009917299145, "learning_rate": 1.1725566799938082e-06, "loss": 0.4286114275455475, "step": 3519 }, { "epoch": 2.805262108829978, "grad_norm": 0.5380225377787781, "learning_rate": 1.1631184218889869e-06, "loss": 0.35103699564933777, "step": 3520 }, { "epoch": 2.8060593980466413, "grad_norm": 0.6302396252365808, "learning_rate": 1.1537178560826868e-06, "loss": 0.3916354179382324, "step": 3521 }, { "epoch": 2.8068566872633047, "grad_norm": 0.5218371308368669, "learning_rate": 1.1443549898302597e-06, "loss": 0.3797284960746765, "step": 3522 }, { "epoch": 2.807653976479968, "grad_norm": 0.5928207805281223, "learning_rate": 1.1350298303579588e-06, "loss": 0.39701947569847107, "step": 3523 }, { "epoch": 2.8084512656966316, "grad_norm": 0.6676592549588406, "learning_rate": 1.1257423848629433e-06, "loss": 0.49112486839294434, "step": 3524 }, { "epoch": 2.809248554913295, "grad_norm": 0.4765492323385493, "learning_rate": 1.116492660513274e-06, "loss": 0.3120116591453552, "step": 3525 }, { "epoch": 2.810045844129958, "grad_norm": 0.48530165094254385, "learning_rate": 1.1072806644478739e-06, "loss": 0.30452531576156616, "step": 3526 }, { "epoch": 2.8108431333466215, "grad_norm": 0.6761777249702935, "learning_rate": 1.098106403776561e-06, "loss": 0.3774540424346924, "step": 3527 }, { "epoch": 2.811640422563285, "grad_norm": 0.5744615336783693, "learning_rate": 1.0889698855800378e-06, "loss": 0.37122392654418945, "step": 3528 }, { "epoch": 2.812437711779948, "grad_norm": 0.5981425822066411, "learning_rate": 1.0798711169098585e-06, "loss": 0.3723526895046234, "step": 3529 }, { "epoch": 2.8132350009966114, "grad_norm": 0.5875679512895964, "learning_rate": 1.07081010478845e-06, "loss": 0.34059688448905945, "step": 3530 }, { "epoch": 2.814032290213275, "grad_norm": 0.503067305892195, "learning_rate": 1.0617868562091071e-06, "loss": 0.32751381397247314, "step": 3531 }, { "epoch": 2.8148295794299383, "grad_norm": 0.5992634657014955, "learning_rate": 1.0528013781359757e-06, "loss": 0.41079503297805786, "step": 3532 }, { "epoch": 2.8156268686466017, "grad_norm": 0.5944622171525423, "learning_rate": 1.0438536775040363e-06, "loss": 0.40658026933670044, "step": 3533 }, { "epoch": 2.816424157863265, "grad_norm": 0.5922186246347407, "learning_rate": 1.034943761219126e-06, "loss": 0.4416784346103668, "step": 3534 }, { "epoch": 2.817221447079928, "grad_norm": 0.5934746454568489, "learning_rate": 1.0260716361579215e-06, "loss": 0.3367970585823059, "step": 3535 }, { "epoch": 2.8180187362965916, "grad_norm": 0.4901047800288754, "learning_rate": 1.0172373091679176e-06, "loss": 0.3629915416240692, "step": 3536 }, { "epoch": 2.818816025513255, "grad_norm": 0.52388067812127, "learning_rate": 1.008440787067455e-06, "loss": 0.3729637861251831, "step": 3537 }, { "epoch": 2.819613314729918, "grad_norm": 0.5670123012240446, "learning_rate": 9.996820766456917e-07, "loss": 0.3788610100746155, "step": 3538 }, { "epoch": 2.8204106039465815, "grad_norm": 0.6271192345075777, "learning_rate": 9.90961184662592e-07, "loss": 0.41973385214805603, "step": 3539 }, { "epoch": 2.821207893163245, "grad_norm": 0.6710225667580979, "learning_rate": 9.822781178489392e-07, "loss": 0.5104718208312988, "step": 3540 }, { "epoch": 2.8220051823799084, "grad_norm": 0.5142544175725703, "learning_rate": 9.73632882906328e-07, "loss": 0.39760318398475647, "step": 3541 }, { "epoch": 2.822802471596572, "grad_norm": 0.5186294286356652, "learning_rate": 9.650254865071428e-07, "loss": 0.3571227788925171, "step": 3542 }, { "epoch": 2.8235997608132353, "grad_norm": 0.5283161928409609, "learning_rate": 9.564559352945811e-07, "loss": 0.2601865530014038, "step": 3543 }, { "epoch": 2.8243970500298983, "grad_norm": 0.5428455453562764, "learning_rate": 9.479242358826135e-07, "loss": 0.380282998085022, "step": 3544 }, { "epoch": 2.8251943392465617, "grad_norm": 0.6633896996575913, "learning_rate": 9.394303948560057e-07, "loss": 0.4485586881637573, "step": 3545 }, { "epoch": 2.825991628463225, "grad_norm": 0.5692421827994398, "learning_rate": 9.309744187702974e-07, "loss": 0.35691002011299133, "step": 3546 }, { "epoch": 2.826788917679888, "grad_norm": 0.6639202113003746, "learning_rate": 9.225563141518178e-07, "loss": 0.5262545347213745, "step": 3547 }, { "epoch": 2.8275862068965516, "grad_norm": 0.5169348552938348, "learning_rate": 9.141760874976646e-07, "loss": 0.3751649856567383, "step": 3548 }, { "epoch": 2.828383496113215, "grad_norm": 0.4975124484401928, "learning_rate": 9.058337452756804e-07, "loss": 0.3541361093521118, "step": 3549 }, { "epoch": 2.8291807853298785, "grad_norm": 0.4778692633940945, "learning_rate": 8.975292939244928e-07, "loss": 0.32955995202064514, "step": 3550 }, { "epoch": 2.829978074546542, "grad_norm": 0.5397810690695606, "learning_rate": 8.892627398534748e-07, "loss": 0.3983113765716553, "step": 3551 }, { "epoch": 2.8307753637632054, "grad_norm": 0.6882422340751579, "learning_rate": 8.810340894427505e-07, "loss": 0.4584479331970215, "step": 3552 }, { "epoch": 2.8315726529798684, "grad_norm": 0.547680754848746, "learning_rate": 8.728433490431897e-07, "loss": 0.38034725189208984, "step": 3553 }, { "epoch": 2.832369942196532, "grad_norm": 0.6247705430707312, "learning_rate": 8.646905249764137e-07, "loss": 0.43260258436203003, "step": 3554 }, { "epoch": 2.8331672314131953, "grad_norm": 0.539938646545602, "learning_rate": 8.565756235347611e-07, "loss": 0.3844519257545471, "step": 3555 }, { "epoch": 2.8339645206298583, "grad_norm": 0.6536019790056304, "learning_rate": 8.484986509813164e-07, "loss": 0.41403305530548096, "step": 3556 }, { "epoch": 2.8347618098465217, "grad_norm": 0.5378210462801013, "learning_rate": 8.404596135498821e-07, "loss": 0.28846389055252075, "step": 3557 }, { "epoch": 2.835559099063185, "grad_norm": 0.5641179962148947, "learning_rate": 8.324585174449894e-07, "loss": 0.4206916093826294, "step": 3558 }, { "epoch": 2.8363563882798486, "grad_norm": 0.63400558076177, "learning_rate": 8.244953688418822e-07, "loss": 0.390312522649765, "step": 3559 }, { "epoch": 2.837153677496512, "grad_norm": 0.5807393953602003, "learning_rate": 8.165701738865217e-07, "loss": 0.3737761676311493, "step": 3560 }, { "epoch": 2.837950966713175, "grad_norm": 0.6855456246127498, "learning_rate": 8.086829386955652e-07, "loss": 0.4299160838127136, "step": 3561 }, { "epoch": 2.8387482559298385, "grad_norm": 0.551222617986066, "learning_rate": 8.008336693563823e-07, "loss": 0.36759889125823975, "step": 3562 }, { "epoch": 2.839545545146502, "grad_norm": 0.6751879157485198, "learning_rate": 7.93022371927038e-07, "loss": 0.3976407051086426, "step": 3563 }, { "epoch": 2.8403428343631654, "grad_norm": 0.549922911876012, "learning_rate": 7.852490524362988e-07, "loss": 0.3567848205566406, "step": 3564 }, { "epoch": 2.8411401235798284, "grad_norm": 0.6423437449628179, "learning_rate": 7.775137168835933e-07, "loss": 0.456561803817749, "step": 3565 }, { "epoch": 2.841937412796492, "grad_norm": 0.6248676238860377, "learning_rate": 7.698163712390683e-07, "loss": 0.4317227005958557, "step": 3566 }, { "epoch": 2.8427347020131553, "grad_norm": 0.6046703850281908, "learning_rate": 7.621570214435325e-07, "loss": 0.4300510585308075, "step": 3567 }, { "epoch": 2.8435319912298187, "grad_norm": 0.5811408286149276, "learning_rate": 7.54535673408463e-07, "loss": 0.40870726108551025, "step": 3568 }, { "epoch": 2.844329280446482, "grad_norm": 0.6816296824687926, "learning_rate": 7.469523330160211e-07, "loss": 0.46035391092300415, "step": 3569 }, { "epoch": 2.845126569663145, "grad_norm": 0.5456629148483582, "learning_rate": 7.39407006119025e-07, "loss": 0.39543768763542175, "step": 3570 }, { "epoch": 2.8459238588798086, "grad_norm": 0.5475965387198869, "learning_rate": 7.31899698540961e-07, "loss": 0.4121628999710083, "step": 3571 }, { "epoch": 2.846721148096472, "grad_norm": 0.5901905242082391, "learning_rate": 7.244304160759608e-07, "loss": 0.3733591139316559, "step": 3572 }, { "epoch": 2.8475184373131355, "grad_norm": 0.6235435071932239, "learning_rate": 7.169991644888241e-07, "loss": 0.41255682706832886, "step": 3573 }, { "epoch": 2.8483157265297985, "grad_norm": 0.6603573652637299, "learning_rate": 7.096059495149854e-07, "loss": 0.43097570538520813, "step": 3574 }, { "epoch": 2.849113015746462, "grad_norm": 0.6384899058772125, "learning_rate": 7.022507768605247e-07, "loss": 0.47448301315307617, "step": 3575 }, { "epoch": 2.8499103049631254, "grad_norm": 0.5812186944307116, "learning_rate": 6.949336522021677e-07, "loss": 0.4128512740135193, "step": 3576 }, { "epoch": 2.850707594179789, "grad_norm": 0.6472296661987712, "learning_rate": 6.876545811872747e-07, "loss": 0.4612996280193329, "step": 3577 }, { "epoch": 2.8515048833964523, "grad_norm": 0.5556107105366199, "learning_rate": 6.804135694338187e-07, "loss": 0.423197865486145, "step": 3578 }, { "epoch": 2.8523021726131152, "grad_norm": 0.5663784653946357, "learning_rate": 6.73210622530418e-07, "loss": 0.4351773262023926, "step": 3579 }, { "epoch": 2.8530994618297787, "grad_norm": 0.6240142665854657, "learning_rate": 6.660457460363145e-07, "loss": 0.36546528339385986, "step": 3580 }, { "epoch": 2.853896751046442, "grad_norm": 0.5863691678537407, "learning_rate": 6.589189454813405e-07, "loss": 0.4228714108467102, "step": 3581 }, { "epoch": 2.8546940402631056, "grad_norm": 0.5868664338315498, "learning_rate": 6.518302263659737e-07, "loss": 0.37118974328041077, "step": 3582 }, { "epoch": 2.8554913294797686, "grad_norm": 0.6048436720229278, "learning_rate": 6.447795941612877e-07, "loss": 0.3891400694847107, "step": 3583 }, { "epoch": 2.856288618696432, "grad_norm": 0.5623949967084926, "learning_rate": 6.37767054308952e-07, "loss": 0.32278352975845337, "step": 3584 }, { "epoch": 2.8570859079130955, "grad_norm": 0.5044991798102292, "learning_rate": 6.307926122212426e-07, "loss": 0.35882896184921265, "step": 3585 }, { "epoch": 2.857883197129759, "grad_norm": 0.5061357848184798, "learning_rate": 6.238562732810426e-07, "loss": 0.3327827453613281, "step": 3586 }, { "epoch": 2.8586804863464224, "grad_norm": 0.5767414717734927, "learning_rate": 6.169580428418087e-07, "loss": 0.3851569890975952, "step": 3587 }, { "epoch": 2.8594777755630854, "grad_norm": 0.5470744318893792, "learning_rate": 6.100979262275986e-07, "loss": 0.3108038604259491, "step": 3588 }, { "epoch": 2.860275064779749, "grad_norm": 0.5680998590266001, "learning_rate": 6.032759287330491e-07, "loss": 0.326829731464386, "step": 3589 }, { "epoch": 2.8610723539964122, "grad_norm": 0.6402618392482461, "learning_rate": 5.964920556233767e-07, "loss": 0.408569872379303, "step": 3590 }, { "epoch": 2.8618696432130757, "grad_norm": 0.5677187110153736, "learning_rate": 5.897463121343705e-07, "loss": 0.4120437800884247, "step": 3591 }, { "epoch": 2.8626669324297387, "grad_norm": 0.5418378234041165, "learning_rate": 5.830387034723938e-07, "loss": 0.3323689103126526, "step": 3592 }, { "epoch": 2.863464221646402, "grad_norm": 0.5957188502135813, "learning_rate": 5.76369234814389e-07, "loss": 0.37576454877853394, "step": 3593 }, { "epoch": 2.8642615108630656, "grad_norm": 0.647966850087848, "learning_rate": 5.697379113078383e-07, "loss": 0.42364367842674255, "step": 3594 }, { "epoch": 2.865058800079729, "grad_norm": 0.5108001717764223, "learning_rate": 5.631447380708033e-07, "loss": 0.34786829352378845, "step": 3595 }, { "epoch": 2.8658560892963925, "grad_norm": 0.5699843239059034, "learning_rate": 5.565897201918913e-07, "loss": 0.3826954960823059, "step": 3596 }, { "epoch": 2.8666533785130555, "grad_norm": 0.5546663201525801, "learning_rate": 5.500728627302664e-07, "loss": 0.3493894338607788, "step": 3597 }, { "epoch": 2.867450667729719, "grad_norm": 0.5583257623230978, "learning_rate": 5.435941707156389e-07, "loss": 0.3773672580718994, "step": 3598 }, { "epoch": 2.8682479569463823, "grad_norm": 0.5066278630796374, "learning_rate": 5.371536491482531e-07, "loss": 0.3610362708568573, "step": 3599 }, { "epoch": 2.869045246163046, "grad_norm": 0.5244576981848574, "learning_rate": 5.307513029989164e-07, "loss": 0.44151467084884644, "step": 3600 }, { "epoch": 2.869842535379709, "grad_norm": 0.6021507111115793, "learning_rate": 5.243871372089481e-07, "loss": 0.4339079260826111, "step": 3601 }, { "epoch": 2.8706398245963722, "grad_norm": 0.5851557510781479, "learning_rate": 5.180611566902194e-07, "loss": 0.3489859700202942, "step": 3602 }, { "epoch": 2.8714371138130357, "grad_norm": 0.6060006934519103, "learning_rate": 5.117733663251134e-07, "loss": 0.42443156242370605, "step": 3603 }, { "epoch": 2.872234403029699, "grad_norm": 0.6000312719121937, "learning_rate": 5.055237709665484e-07, "loss": 0.3873860836029053, "step": 3604 }, { "epoch": 2.8730316922463626, "grad_norm": 0.5456884407101501, "learning_rate": 4.993123754379603e-07, "loss": 0.3215893805027008, "step": 3605 }, { "epoch": 2.8738289814630256, "grad_norm": 0.59838502921084, "learning_rate": 4.931391845333089e-07, "loss": 0.42339208722114563, "step": 3606 }, { "epoch": 2.874626270679689, "grad_norm": 0.5462014561071209, "learning_rate": 4.870042030170552e-07, "loss": 0.4111260175704956, "step": 3607 }, { "epoch": 2.8754235598963525, "grad_norm": 0.6001804858524501, "learning_rate": 4.809074356241839e-07, "loss": 0.4514489769935608, "step": 3608 }, { "epoch": 2.876220849113016, "grad_norm": 0.6400870763955033, "learning_rate": 4.7484888706017526e-07, "loss": 0.3815283477306366, "step": 3609 }, { "epoch": 2.877018138329679, "grad_norm": 0.5038526709625704, "learning_rate": 4.688285620010113e-07, "loss": 0.32880616188049316, "step": 3610 }, { "epoch": 2.8778154275463423, "grad_norm": 0.7172694072535681, "learning_rate": 4.6284646509318075e-07, "loss": 0.4124400019645691, "step": 3611 }, { "epoch": 2.878612716763006, "grad_norm": 0.7235640685674903, "learning_rate": 4.5690260095367363e-07, "loss": 0.44870615005493164, "step": 3612 }, { "epoch": 2.8794100059796692, "grad_norm": 0.5877071005887494, "learning_rate": 4.509969741699538e-07, "loss": 0.39842143654823303, "step": 3613 }, { "epoch": 2.8802072951963327, "grad_norm": 0.5637763443318, "learning_rate": 4.4512958929998625e-07, "loss": 0.33213964104652405, "step": 3614 }, { "epoch": 2.8810045844129957, "grad_norm": 0.5939380013717543, "learning_rate": 4.393004508722209e-07, "loss": 0.34777358174324036, "step": 3615 }, { "epoch": 2.881801873629659, "grad_norm": 0.5012764540881911, "learning_rate": 4.335095633855868e-07, "loss": 0.31210601329803467, "step": 3616 }, { "epoch": 2.8825991628463226, "grad_norm": 0.5583682112942241, "learning_rate": 4.277569313094809e-07, "loss": 0.39089640974998474, "step": 3617 }, { "epoch": 2.883396452062986, "grad_norm": 0.6276924373306468, "learning_rate": 4.220425590837906e-07, "loss": 0.44167864322662354, "step": 3618 }, { "epoch": 2.884193741279649, "grad_norm": 0.5185337525660919, "learning_rate": 4.1636645111886583e-07, "loss": 0.3902936577796936, "step": 3619 }, { "epoch": 2.8849910304963124, "grad_norm": 0.5009310436636493, "learning_rate": 4.1072861179553e-07, "loss": 0.31363919377326965, "step": 3620 }, { "epoch": 2.885788319712976, "grad_norm": 0.6140873189238838, "learning_rate": 4.0512904546505805e-07, "loss": 0.3794901371002197, "step": 3621 }, { "epoch": 2.8865856089296393, "grad_norm": 0.5558121445693487, "learning_rate": 3.9956775644920395e-07, "loss": 0.34907016158103943, "step": 3622 }, { "epoch": 2.887382898146303, "grad_norm": 0.5584996439567358, "learning_rate": 3.9404474904016775e-07, "loss": 0.37143707275390625, "step": 3623 }, { "epoch": 2.888180187362966, "grad_norm": 0.5136350282051405, "learning_rate": 3.8856002750060073e-07, "loss": 0.3431438207626343, "step": 3624 }, { "epoch": 2.8889774765796292, "grad_norm": 0.6268695589280181, "learning_rate": 3.831135960636112e-07, "loss": 0.39189469814300537, "step": 3625 }, { "epoch": 2.8897747657962927, "grad_norm": 0.447575377763609, "learning_rate": 3.777054589327589e-07, "loss": 0.2562876343727112, "step": 3626 }, { "epoch": 2.890572055012956, "grad_norm": 0.5186570906615376, "learning_rate": 3.7233562028203826e-07, "loss": 0.3385850787162781, "step": 3627 }, { "epoch": 2.891369344229619, "grad_norm": 0.6437361645390662, "learning_rate": 3.6700408425589525e-07, "loss": 0.42208176851272583, "step": 3628 }, { "epoch": 2.8921666334462826, "grad_norm": 0.6308257187865036, "learning_rate": 3.6171085496920496e-07, "loss": 0.41894295811653137, "step": 3629 }, { "epoch": 2.892963922662946, "grad_norm": 0.6695109263565269, "learning_rate": 3.564559365072828e-07, "loss": 0.42637258768081665, "step": 3630 }, { "epoch": 2.8937612118796094, "grad_norm": 0.5047443209980287, "learning_rate": 3.512393329258734e-07, "loss": 0.30017799139022827, "step": 3631 }, { "epoch": 2.894558501096273, "grad_norm": 0.5778012991120328, "learning_rate": 3.460610482511506e-07, "loss": 0.34583038091659546, "step": 3632 }, { "epoch": 2.895355790312936, "grad_norm": 0.630777360750874, "learning_rate": 3.409210864797119e-07, "loss": 0.40304580330848694, "step": 3633 }, { "epoch": 2.8961530795295993, "grad_norm": 0.6271723939742335, "learning_rate": 3.358194515785784e-07, "loss": 0.38790810108184814, "step": 3634 }, { "epoch": 2.8969503687462628, "grad_norm": 0.5405729922667756, "learning_rate": 3.3075614748520057e-07, "loss": 0.3670065999031067, "step": 3635 }, { "epoch": 2.897747657962926, "grad_norm": 0.5306022458462278, "learning_rate": 3.257311781074246e-07, "loss": 0.3170243501663208, "step": 3636 }, { "epoch": 2.898544947179589, "grad_norm": 0.5871170988834558, "learning_rate": 3.20744547323526e-07, "loss": 0.3718588352203369, "step": 3637 }, { "epoch": 2.8993422363962527, "grad_norm": 0.5809940760088358, "learning_rate": 3.157962589821872e-07, "loss": 0.39870306849479675, "step": 3638 }, { "epoch": 2.900139525612916, "grad_norm": 0.5855379363940195, "learning_rate": 3.1088631690249225e-07, "loss": 0.4254349172115326, "step": 3639 }, { "epoch": 2.9009368148295795, "grad_norm": 0.5618318820747888, "learning_rate": 3.060147248739376e-07, "loss": 0.3508654832839966, "step": 3640 }, { "epoch": 2.901734104046243, "grad_norm": 0.5069217371197832, "learning_rate": 3.0118148665641553e-07, "loss": 0.3795837461948395, "step": 3641 }, { "epoch": 2.902531393262906, "grad_norm": 0.6296377886271936, "learning_rate": 2.963866059802201e-07, "loss": 0.439308226108551, "step": 3642 }, { "epoch": 2.9033286824795694, "grad_norm": 0.5132553364809542, "learning_rate": 2.916300865460353e-07, "loss": 0.3338620662689209, "step": 3643 }, { "epoch": 2.904125971696233, "grad_norm": 0.6744172485223664, "learning_rate": 2.869119320249469e-07, "loss": 0.3988448679447174, "step": 3644 }, { "epoch": 2.904923260912896, "grad_norm": 0.5649028200751376, "learning_rate": 2.8223214605842543e-07, "loss": 0.3885508179664612, "step": 3645 }, { "epoch": 2.9057205501295593, "grad_norm": 0.550118705763141, "learning_rate": 2.77590732258326e-07, "loss": 0.36322617530822754, "step": 3646 }, { "epoch": 2.9065178393462228, "grad_norm": 0.5268231872631923, "learning_rate": 2.7298769420688876e-07, "loss": 0.35310184955596924, "step": 3647 }, { "epoch": 2.907315128562886, "grad_norm": 0.4787925190313202, "learning_rate": 2.684230354567496e-07, "loss": 0.35471439361572266, "step": 3648 }, { "epoch": 2.9081124177795497, "grad_norm": 0.6190155697299695, "learning_rate": 2.638967595309072e-07, "loss": 0.36252421140670776, "step": 3649 }, { "epoch": 2.908909706996213, "grad_norm": 0.5687781366201988, "learning_rate": 2.5940886992272814e-07, "loss": 0.3774687945842743, "step": 3650 }, { "epoch": 2.909706996212876, "grad_norm": 0.54733509947138, "learning_rate": 2.549593700959751e-07, "loss": 0.36249807476997375, "step": 3651 }, { "epoch": 2.9105042854295395, "grad_norm": 0.6130051586019982, "learning_rate": 2.505482634847789e-07, "loss": 0.4215911328792572, "step": 3652 }, { "epoch": 2.911301574646203, "grad_norm": 0.6636167554702739, "learning_rate": 2.461755534936161e-07, "loss": 0.4839073419570923, "step": 3653 }, { "epoch": 2.912098863862866, "grad_norm": 0.5234789503718331, "learning_rate": 2.4184124349734827e-07, "loss": 0.2906040847301483, "step": 3654 }, { "epoch": 2.9128961530795294, "grad_norm": 0.5588031626825342, "learning_rate": 2.3754533684120504e-07, "loss": 0.3571120798587799, "step": 3655 }, { "epoch": 2.913693442296193, "grad_norm": 0.5628545969931479, "learning_rate": 2.3328783684075096e-07, "loss": 0.37966787815093994, "step": 3656 }, { "epoch": 2.9144907315128563, "grad_norm": 0.5501077089064246, "learning_rate": 2.2906874678193524e-07, "loss": 0.38316479325294495, "step": 3657 }, { "epoch": 2.9152880207295198, "grad_norm": 0.502456401079251, "learning_rate": 2.2488806992105315e-07, "loss": 0.27141979336738586, "step": 3658 }, { "epoch": 2.916085309946183, "grad_norm": 0.5453940823327635, "learning_rate": 2.2074580948474034e-07, "loss": 0.3375670909881592, "step": 3659 }, { "epoch": 2.916882599162846, "grad_norm": 0.5539199057464472, "learning_rate": 2.1664196866999497e-07, "loss": 0.37777701020240784, "step": 3660 }, { "epoch": 2.9176798883795096, "grad_norm": 0.6277129175788118, "learning_rate": 2.1257655064416683e-07, "loss": 0.4407883286476135, "step": 3661 }, { "epoch": 2.918477177596173, "grad_norm": 0.516022006317826, "learning_rate": 2.085495585449404e-07, "loss": 0.31122884154319763, "step": 3662 }, { "epoch": 2.919274466812836, "grad_norm": 0.560707206767619, "learning_rate": 2.0456099548035179e-07, "loss": 0.35472190380096436, "step": 3663 }, { "epoch": 2.9200717560294995, "grad_norm": 0.5545591303437103, "learning_rate": 2.0061086452876633e-07, "loss": 0.3202042877674103, "step": 3664 }, { "epoch": 2.920869045246163, "grad_norm": 0.5199264722031383, "learning_rate": 1.966991687389008e-07, "loss": 0.3926413953304291, "step": 3665 }, { "epoch": 2.9216663344628264, "grad_norm": 0.5989196119292497, "learning_rate": 1.9282591112979586e-07, "loss": 0.3470114469528198, "step": 3666 }, { "epoch": 2.92246362367949, "grad_norm": 0.5825204968218406, "learning_rate": 1.8899109469083798e-07, "loss": 0.3969470262527466, "step": 3667 }, { "epoch": 2.9232609128961533, "grad_norm": 0.6019847517649975, "learning_rate": 1.851947223817374e-07, "loss": 0.44566357135772705, "step": 3668 }, { "epoch": 2.9240582021128163, "grad_norm": 0.4793389538767735, "learning_rate": 1.8143679713252814e-07, "loss": 0.3518367409706116, "step": 3669 }, { "epoch": 2.9248554913294798, "grad_norm": 0.5395504181422623, "learning_rate": 1.7771732184357904e-07, "loss": 0.35270974040031433, "step": 3670 }, { "epoch": 2.925652780546143, "grad_norm": 0.614749605727218, "learning_rate": 1.7403629938558264e-07, "loss": 0.3860675096511841, "step": 3671 }, { "epoch": 2.926450069762806, "grad_norm": 0.6225892080416643, "learning_rate": 1.7039373259954415e-07, "loss": 0.44081902503967285, "step": 3672 }, { "epoch": 2.9272473589794696, "grad_norm": 0.5516497522884811, "learning_rate": 1.6678962429680922e-07, "loss": 0.32287436723709106, "step": 3673 }, { "epoch": 2.928044648196133, "grad_norm": 0.6643538691762393, "learning_rate": 1.6322397725901384e-07, "loss": 0.4317154884338379, "step": 3674 }, { "epoch": 2.9288419374127965, "grad_norm": 0.5597921352786361, "learning_rate": 1.5969679423814e-07, "loss": 0.3393734395503998, "step": 3675 }, { "epoch": 2.92963922662946, "grad_norm": 0.62562627016321, "learning_rate": 1.56208077956449e-07, "loss": 0.467535138130188, "step": 3676 }, { "epoch": 2.9304365158461234, "grad_norm": 0.6020685564077741, "learning_rate": 1.5275783110654252e-07, "loss": 0.3778578042984009, "step": 3677 }, { "epoch": 2.9312338050627864, "grad_norm": 0.5892649497559602, "learning_rate": 1.493460563513238e-07, "loss": 0.3788735568523407, "step": 3678 }, { "epoch": 2.93203109427945, "grad_norm": 0.5509842020195961, "learning_rate": 1.4597275632398655e-07, "loss": 0.36080285906791687, "step": 3679 }, { "epoch": 2.9328283834961133, "grad_norm": 0.5960998945116354, "learning_rate": 1.426379336280592e-07, "loss": 0.380865216255188, "step": 3680 }, { "epoch": 2.9336256727127763, "grad_norm": 0.5675365292079115, "learning_rate": 1.393415908373441e-07, "loss": 0.3545058071613312, "step": 3681 }, { "epoch": 2.9344229619294397, "grad_norm": 0.5902424528115554, "learning_rate": 1.3608373049596724e-07, "loss": 0.45715808868408203, "step": 3682 }, { "epoch": 2.935220251146103, "grad_norm": 0.6285990255302039, "learning_rate": 1.328643551183395e-07, "loss": 0.3755960464477539, "step": 3683 }, { "epoch": 2.9360175403627666, "grad_norm": 0.7184043029068421, "learning_rate": 1.296834671891789e-07, "loss": 0.48334938287734985, "step": 3684 }, { "epoch": 2.93681482957943, "grad_norm": 0.6747873687610004, "learning_rate": 1.265410691634883e-07, "loss": 0.447987824678421, "step": 3685 }, { "epoch": 2.9376121187960935, "grad_norm": 0.7294165139126572, "learning_rate": 1.234371634665721e-07, "loss": 0.4385327100753784, "step": 3686 }, { "epoch": 2.9384094080127565, "grad_norm": 0.5713523129375397, "learning_rate": 1.2037175249403066e-07, "loss": 0.37826794385910034, "step": 3687 }, { "epoch": 2.93920669722942, "grad_norm": 0.5300235128043751, "learning_rate": 1.1734483861173817e-07, "loss": 0.35062849521636963, "step": 3688 }, { "epoch": 2.9400039864460834, "grad_norm": 0.5890089722055727, "learning_rate": 1.1435642415587588e-07, "loss": 0.44199180603027344, "step": 3689 }, { "epoch": 2.9408012756627464, "grad_norm": 0.5360831888494848, "learning_rate": 1.1140651143289327e-07, "loss": 0.4128521978855133, "step": 3690 }, { "epoch": 2.94159856487941, "grad_norm": 0.5483662942164476, "learning_rate": 1.0849510271953578e-07, "loss": 0.41890567541122437, "step": 3691 }, { "epoch": 2.9423958540960733, "grad_norm": 0.5268458600547761, "learning_rate": 1.0562220026282821e-07, "loss": 0.3725431263446808, "step": 3692 }, { "epoch": 2.9431931433127367, "grad_norm": 0.5829679155483185, "learning_rate": 1.0278780628008022e-07, "loss": 0.41444146633148193, "step": 3693 }, { "epoch": 2.9439904325294, "grad_norm": 0.6422910927246825, "learning_rate": 9.999192295886972e-08, "loss": 0.4599803388118744, "step": 3694 }, { "epoch": 2.9447877217460636, "grad_norm": 0.542252865455752, "learning_rate": 9.723455245706503e-08, "loss": 0.38050466775894165, "step": 3695 }, { "epoch": 2.9455850109627266, "grad_norm": 0.5852367800524465, "learning_rate": 9.451569690279716e-08, "loss": 0.4263037443161011, "step": 3696 }, { "epoch": 2.94638230017939, "grad_norm": 0.5794835629931637, "learning_rate": 9.183535839448753e-08, "loss": 0.417125940322876, "step": 3697 }, { "epoch": 2.9471795893960535, "grad_norm": 0.5462773145916864, "learning_rate": 8.919353900081473e-08, "loss": 0.3439648747444153, "step": 3698 }, { "epoch": 2.9479768786127165, "grad_norm": 0.48517668272314535, "learning_rate": 8.659024076073663e-08, "loss": 0.3206111490726471, "step": 3699 }, { "epoch": 2.94877416782938, "grad_norm": 0.5353924249391965, "learning_rate": 8.402546568347935e-08, "loss": 0.3591378629207611, "step": 3700 }, { "epoch": 2.9495714570460434, "grad_norm": 0.5967182104783332, "learning_rate": 8.14992157485317e-08, "loss": 0.3966243863105774, "step": 3701 }, { "epoch": 2.950368746262707, "grad_norm": 0.5412515855109105, "learning_rate": 7.901149290566179e-08, "loss": 0.3725879192352295, "step": 3702 }, { "epoch": 2.9511660354793703, "grad_norm": 0.5615870268861233, "learning_rate": 7.656229907487822e-08, "loss": 0.39857161045074463, "step": 3703 }, { "epoch": 2.9519633246960337, "grad_norm": 0.5095167591215918, "learning_rate": 7.415163614648556e-08, "loss": 0.37426677346229553, "step": 3704 }, { "epoch": 2.9527606139126967, "grad_norm": 0.46379999932947846, "learning_rate": 7.177950598102334e-08, "loss": 0.31076130270957947, "step": 3705 }, { "epoch": 2.95355790312936, "grad_norm": 0.5366016693073636, "learning_rate": 6.944591040930481e-08, "loss": 0.360372394323349, "step": 3706 }, { "epoch": 2.9543551923460236, "grad_norm": 0.514929513281497, "learning_rate": 6.71508512324004e-08, "loss": 0.3412725031375885, "step": 3707 }, { "epoch": 2.9551524815626866, "grad_norm": 0.5534572108480401, "learning_rate": 6.489433022163205e-08, "loss": 0.40425148606300354, "step": 3708 }, { "epoch": 2.95594977077935, "grad_norm": 0.6010232441478089, "learning_rate": 6.267634911858445e-08, "loss": 0.3968646824359894, "step": 3709 }, { "epoch": 2.9567470599960135, "grad_norm": 0.4769307919801164, "learning_rate": 6.04969096350938e-08, "loss": 0.2678525149822235, "step": 3710 }, { "epoch": 2.957544349212677, "grad_norm": 0.5422286539712684, "learning_rate": 5.835601345324792e-08, "loss": 0.3460836410522461, "step": 3711 }, { "epoch": 2.9583416384293404, "grad_norm": 0.5985911594960792, "learning_rate": 5.6253662225402846e-08, "loss": 0.4500443935394287, "step": 3712 }, { "epoch": 2.959138927646004, "grad_norm": 0.5437646554443065, "learning_rate": 5.4189857574138415e-08, "loss": 0.33598190546035767, "step": 3713 }, { "epoch": 2.959936216862667, "grad_norm": 0.5766612300729905, "learning_rate": 5.2164601092308254e-08, "loss": 0.39850616455078125, "step": 3714 }, { "epoch": 2.9607335060793303, "grad_norm": 0.5547836737602562, "learning_rate": 5.017789434300091e-08, "loss": 0.3888060748577118, "step": 3715 }, { "epoch": 2.9615307952959937, "grad_norm": 0.5444909075081418, "learning_rate": 4.8229738859556506e-08, "loss": 0.3807793855667114, "step": 3716 }, { "epoch": 2.9623280845126567, "grad_norm": 0.6148754510907154, "learning_rate": 4.632013614556119e-08, "loss": 0.39851123094558716, "step": 3717 }, { "epoch": 2.96312537372932, "grad_norm": 0.5257286530148879, "learning_rate": 4.4449087674847125e-08, "loss": 0.36020779609680176, "step": 3718 }, { "epoch": 2.9639226629459836, "grad_norm": 0.5092245809057336, "learning_rate": 4.261659489148695e-08, "loss": 0.35982391238212585, "step": 3719 }, { "epoch": 2.964719952162647, "grad_norm": 0.5166108472982656, "learning_rate": 4.082265920980488e-08, "loss": 0.337514728307724, "step": 3720 }, { "epoch": 2.9655172413793105, "grad_norm": 0.542468462012599, "learning_rate": 3.90672820143545e-08, "loss": 0.33224916458129883, "step": 3721 }, { "epoch": 2.966314530595974, "grad_norm": 0.5759178926721465, "learning_rate": 3.7350464659935416e-08, "loss": 0.36703187227249146, "step": 3722 }, { "epoch": 2.967111819812637, "grad_norm": 0.5047239890672665, "learning_rate": 3.56722084715877e-08, "loss": 0.3154442310333252, "step": 3723 }, { "epoch": 2.9679091090293004, "grad_norm": 0.7034723554653614, "learning_rate": 3.4032514744591904e-08, "loss": 0.4975269138813019, "step": 3724 }, { "epoch": 2.968706398245964, "grad_norm": 0.5547336780040306, "learning_rate": 3.243138474445795e-08, "loss": 0.36965757608413696, "step": 3725 }, { "epoch": 2.969503687462627, "grad_norm": 0.5531197698766331, "learning_rate": 3.0868819706947325e-08, "loss": 0.36441469192504883, "step": 3726 }, { "epoch": 2.9703009766792903, "grad_norm": 0.5678280855733944, "learning_rate": 2.934482083803425e-08, "loss": 0.3731723427772522, "step": 3727 }, { "epoch": 2.9710982658959537, "grad_norm": 0.5690856232930416, "learning_rate": 2.7859389313955597e-08, "loss": 0.45030638575553894, "step": 3728 }, { "epoch": 2.971895555112617, "grad_norm": 0.5644461241837451, "learning_rate": 2.6412526281155425e-08, "loss": 0.3915750980377197, "step": 3729 }, { "epoch": 2.9726928443292806, "grad_norm": 0.5127471672139374, "learning_rate": 2.5004232856323805e-08, "loss": 0.3735301196575165, "step": 3730 }, { "epoch": 2.973490133545944, "grad_norm": 0.5841985387239175, "learning_rate": 2.3634510126391284e-08, "loss": 0.3886120617389679, "step": 3731 }, { "epoch": 2.974287422762607, "grad_norm": 0.5939644020232466, "learning_rate": 2.2303359148495573e-08, "loss": 0.4424504041671753, "step": 3732 }, { "epoch": 2.9750847119792705, "grad_norm": 0.49256026950866216, "learning_rate": 2.1010780950031506e-08, "loss": 0.3255407214164734, "step": 3733 }, { "epoch": 2.975882001195934, "grad_norm": 0.4187009192177689, "learning_rate": 1.9756776528601083e-08, "loss": 0.2439863532781601, "step": 3734 }, { "epoch": 2.976679290412597, "grad_norm": 0.5161753867049735, "learning_rate": 1.8541346852052334e-08, "loss": 0.2900104522705078, "step": 3735 }, { "epoch": 2.9774765796292604, "grad_norm": 0.5466772109977683, "learning_rate": 1.736449285845154e-08, "loss": 0.3691480755805969, "step": 3736 }, { "epoch": 2.978273868845924, "grad_norm": 0.5839180062349696, "learning_rate": 1.6226215456088823e-08, "loss": 0.4583936333656311, "step": 3737 }, { "epoch": 2.9790711580625873, "grad_norm": 0.5554903628029597, "learning_rate": 1.5126515523489203e-08, "loss": 0.377361536026001, "step": 3738 }, { "epoch": 2.9798684472792507, "grad_norm": 0.5503267916454597, "learning_rate": 1.4065393909407087e-08, "loss": 0.3826066255569458, "step": 3739 }, { "epoch": 2.980665736495914, "grad_norm": 0.6452174657347118, "learning_rate": 1.3042851432809588e-08, "loss": 0.3713170289993286, "step": 3740 }, { "epoch": 2.981463025712577, "grad_norm": 0.6071788569121168, "learning_rate": 1.2058888882898744e-08, "loss": 0.39457252621650696, "step": 3741 }, { "epoch": 2.9822603149292406, "grad_norm": 0.6472065099093252, "learning_rate": 1.111350701909486e-08, "loss": 0.3923817276954651, "step": 3742 }, { "epoch": 2.983057604145904, "grad_norm": 0.559624408285568, "learning_rate": 1.0206706571042057e-08, "loss": 0.3836771845817566, "step": 3743 }, { "epoch": 2.983854893362567, "grad_norm": 0.5743081800746961, "learning_rate": 9.338488238608279e-09, "loss": 0.3612498342990875, "step": 3744 }, { "epoch": 2.9846521825792305, "grad_norm": 0.5274367868787783, "learning_rate": 8.508852691890834e-09, "loss": 0.3744524121284485, "step": 3745 }, { "epoch": 2.985449471795894, "grad_norm": 0.5230397722622576, "learning_rate": 7.7178005711942e-09, "loss": 0.33026862144470215, "step": 3746 }, { "epoch": 2.9862467610125574, "grad_norm": 0.554988776582405, "learning_rate": 6.965332487052223e-09, "loss": 0.3429415822029114, "step": 3747 }, { "epoch": 2.987044050229221, "grad_norm": 0.5826962341047096, "learning_rate": 6.251449020228117e-09, "loss": 0.375468909740448, "step": 3748 }, { "epoch": 2.9878413394458843, "grad_norm": 0.5861715657529336, "learning_rate": 5.576150721692264e-09, "loss": 0.4350898265838623, "step": 3749 }, { "epoch": 2.9886386286625473, "grad_norm": 0.5637367594864076, "learning_rate": 4.939438112638861e-09, "loss": 0.39124324917793274, "step": 3750 }, { "epoch": 2.9894359178792107, "grad_norm": 0.5734378598635372, "learning_rate": 4.341311684485927e-09, "loss": 0.3921917974948883, "step": 3751 }, { "epoch": 2.990233207095874, "grad_norm": 0.6002590533908257, "learning_rate": 3.7817718988586434e-09, "loss": 0.4442606568336487, "step": 3752 }, { "epoch": 2.991030496312537, "grad_norm": 0.5478256910860083, "learning_rate": 3.2608191876226658e-09, "loss": 0.4084850251674652, "step": 3753 }, { "epoch": 2.9918277855292006, "grad_norm": 0.4724837091985969, "learning_rate": 2.7784539528397103e-09, "loss": 0.28333067893981934, "step": 3754 }, { "epoch": 2.992625074745864, "grad_norm": 0.5701170525782691, "learning_rate": 2.334676566800864e-09, "loss": 0.373496413230896, "step": 3755 }, { "epoch": 2.9934223639625275, "grad_norm": 0.6289792102382159, "learning_rate": 1.9294873720154816e-09, "loss": 0.35047027468681335, "step": 3756 }, { "epoch": 2.994219653179191, "grad_norm": 0.5310041310569156, "learning_rate": 1.5628866812056332e-09, "loss": 0.30539780855178833, "step": 3757 }, { "epoch": 2.9950169423958544, "grad_norm": 0.5584160048153188, "learning_rate": 1.2348747773172075e-09, "loss": 0.3384200632572174, "step": 3758 }, { "epoch": 2.9958142316125174, "grad_norm": 0.6198555803014397, "learning_rate": 9.454519135088103e-10, "loss": 0.39542460441589355, "step": 3759 }, { "epoch": 2.996611520829181, "grad_norm": 0.6402445867532001, "learning_rate": 6.946183131573136e-10, "loss": 0.42988044023513794, "step": 3760 }, { "epoch": 2.9974088100458443, "grad_norm": 0.562272248335671, "learning_rate": 4.823741698523065e-10, "loss": 0.3465660512447357, "step": 3761 }, { "epoch": 2.9982060992625073, "grad_norm": 0.5658680020964573, "learning_rate": 3.0871964740719626e-10, "loss": 0.37933433055877686, "step": 3762 }, { "epoch": 2.9990033884791707, "grad_norm": 0.5326400218512993, "learning_rate": 1.7365487984810636e-10, "loss": 0.32629749178886414, "step": 3763 }, { "epoch": 2.999800677695834, "grad_norm": 0.4049887058478659, "learning_rate": 7.717997141942768e-11, "loss": 0.20771485567092896, "step": 3764 }, { "epoch": 3.0, "grad_norm": 2.2154128515487757, "learning_rate": 1.9294996578267388e-11, "loss": 0.5176386833190918, "step": 3765 }, { "epoch": 3.0, "step": 3765, "total_flos": 2252442977517568.0, "train_loss": 0.49659017846878784, "train_runtime": 36543.5014, "train_samples_per_second": 0.824, "train_steps_per_second": 0.103 } ], "logging_steps": 1, "max_steps": 3765, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2252442977517568.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }