{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 696, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0028776978417266188, "grad_norm": 0.42690583075609034, "learning_rate": 0.0, "loss": 1.3486042022705078, "step": 1 }, { "epoch": 0.0057553956834532375, "grad_norm": 0.3882655966887319, "learning_rate": 5.714285714285714e-08, "loss": 1.2684719562530518, "step": 2 }, { "epoch": 0.008633093525179856, "grad_norm": 0.4443954365616111, "learning_rate": 1.1428571428571427e-07, "loss": 1.177267074584961, "step": 3 }, { "epoch": 0.011510791366906475, "grad_norm": 0.4253698770879055, "learning_rate": 1.7142857142857143e-07, "loss": 1.181878924369812, "step": 4 }, { "epoch": 0.014388489208633094, "grad_norm": 0.3720421036789158, "learning_rate": 2.2857142857142855e-07, "loss": 1.1671853065490723, "step": 5 }, { "epoch": 0.017266187050359712, "grad_norm": 0.3622614120618611, "learning_rate": 2.857142857142857e-07, "loss": 1.1122022867202759, "step": 6 }, { "epoch": 0.02014388489208633, "grad_norm": 0.38384210342955205, "learning_rate": 3.4285714285714286e-07, "loss": 1.1873408555984497, "step": 7 }, { "epoch": 0.02302158273381295, "grad_norm": 0.4284045629754763, "learning_rate": 4e-07, "loss": 1.1845028400421143, "step": 8 }, { "epoch": 0.025899280575539568, "grad_norm": 0.41551879217805887, "learning_rate": 4.571428571428571e-07, "loss": 1.2185403108596802, "step": 9 }, { "epoch": 0.02877697841726619, "grad_norm": 0.42087035069001, "learning_rate": 5.142857142857142e-07, "loss": 1.0747895240783691, "step": 10 }, { "epoch": 0.031654676258992806, "grad_norm": 0.5150810858798297, "learning_rate": 5.714285714285714e-07, "loss": 1.2050367593765259, "step": 11 }, { "epoch": 0.034532374100719423, "grad_norm": 0.3402347213407099, "learning_rate": 6.285714285714286e-07, "loss": 1.1960644721984863, "step": 12 }, { "epoch": 0.03741007194244604, "grad_norm": 0.3833689066105734, "learning_rate": 6.857142857142857e-07, "loss": 1.2497148513793945, "step": 13 }, { "epoch": 0.04028776978417266, "grad_norm": 0.3335104915047139, "learning_rate": 7.428571428571429e-07, "loss": 1.1446340084075928, "step": 14 }, { "epoch": 0.04316546762589928, "grad_norm": 0.3492060423539416, "learning_rate": 8e-07, "loss": 1.1868774890899658, "step": 15 }, { "epoch": 0.0460431654676259, "grad_norm": 0.36339916703647873, "learning_rate": 8.57142857142857e-07, "loss": 1.1652871370315552, "step": 16 }, { "epoch": 0.04892086330935252, "grad_norm": 0.35128380927769104, "learning_rate": 9.142857142857142e-07, "loss": 1.1377315521240234, "step": 17 }, { "epoch": 0.051798561151079135, "grad_norm": 0.3216270031913542, "learning_rate": 9.714285714285715e-07, "loss": 1.179404377937317, "step": 18 }, { "epoch": 0.05467625899280575, "grad_norm": 0.3626006607419513, "learning_rate": 1.0285714285714284e-06, "loss": 1.272096872329712, "step": 19 }, { "epoch": 0.05755395683453238, "grad_norm": 0.37548463438614677, "learning_rate": 1.0857142857142856e-06, "loss": 1.1252775192260742, "step": 20 }, { "epoch": 0.060431654676258995, "grad_norm": 0.39203682362934145, "learning_rate": 1.1428571428571428e-06, "loss": 1.2636396884918213, "step": 21 }, { "epoch": 0.06330935251798561, "grad_norm": 0.3929267980473854, "learning_rate": 1.2e-06, "loss": 1.1296113729476929, "step": 22 }, { "epoch": 0.06618705035971223, "grad_norm": 0.3580571203740857, "learning_rate": 1.2571428571428571e-06, "loss": 1.2140036821365356, "step": 23 }, { "epoch": 0.06906474820143885, "grad_norm": 0.40128457938538337, "learning_rate": 1.3142857142857143e-06, "loss": 1.3345097303390503, "step": 24 }, { "epoch": 0.07194244604316546, "grad_norm": 0.3624963705827193, "learning_rate": 1.3714285714285715e-06, "loss": 1.2285950183868408, "step": 25 }, { "epoch": 0.07482014388489208, "grad_norm": 0.3891545493397791, "learning_rate": 1.4285714285714286e-06, "loss": 1.1885042190551758, "step": 26 }, { "epoch": 0.0776978417266187, "grad_norm": 0.45890158291879024, "learning_rate": 1.4857142857142858e-06, "loss": 1.1534702777862549, "step": 27 }, { "epoch": 0.08057553956834532, "grad_norm": 0.4134301257295623, "learning_rate": 1.5428571428571428e-06, "loss": 1.1666285991668701, "step": 28 }, { "epoch": 0.08345323741007195, "grad_norm": 0.42695898719570075, "learning_rate": 1.6e-06, "loss": 1.0518786907196045, "step": 29 }, { "epoch": 0.08633093525179857, "grad_norm": 0.40529199232299007, "learning_rate": 1.657142857142857e-06, "loss": 1.2913450002670288, "step": 30 }, { "epoch": 0.08920863309352518, "grad_norm": 0.42614405561233504, "learning_rate": 1.714285714285714e-06, "loss": 1.1956894397735596, "step": 31 }, { "epoch": 0.0920863309352518, "grad_norm": 0.4536901431733599, "learning_rate": 1.7714285714285712e-06, "loss": 1.2299771308898926, "step": 32 }, { "epoch": 0.09496402877697842, "grad_norm": 0.4275911040935052, "learning_rate": 1.8285714285714284e-06, "loss": 1.230122685432434, "step": 33 }, { "epoch": 0.09784172661870504, "grad_norm": 0.40907326990128035, "learning_rate": 1.8857142857142856e-06, "loss": 1.2399665117263794, "step": 34 }, { "epoch": 0.10071942446043165, "grad_norm": 0.42873759553168767, "learning_rate": 1.942857142857143e-06, "loss": 1.209876298904419, "step": 35 }, { "epoch": 0.10359712230215827, "grad_norm": 0.3875078895858393, "learning_rate": 2e-06, "loss": 1.3261746168136597, "step": 36 }, { "epoch": 0.10647482014388489, "grad_norm": 0.3895695917104401, "learning_rate": 1.999988705525916e-06, "loss": 1.1430740356445312, "step": 37 }, { "epoch": 0.1093525179856115, "grad_norm": 0.390575759377815, "learning_rate": 1.9999548223587944e-06, "loss": 1.0920931100845337, "step": 38 }, { "epoch": 0.11223021582733812, "grad_norm": 0.4547783385877052, "learning_rate": 1.9998983512640208e-06, "loss": 1.1944105625152588, "step": 39 }, { "epoch": 0.11510791366906475, "grad_norm": 0.46472429487773786, "learning_rate": 1.9998192935172177e-06, "loss": 1.2673561573028564, "step": 40 }, { "epoch": 0.11798561151079137, "grad_norm": 0.41362852460476074, "learning_rate": 1.9997176509042157e-06, "loss": 1.2279549837112427, "step": 41 }, { "epoch": 0.12086330935251799, "grad_norm": 0.4271272595194793, "learning_rate": 1.9995934257210153e-06, "loss": 1.1604218482971191, "step": 42 }, { "epoch": 0.1237410071942446, "grad_norm": 0.3804983247156394, "learning_rate": 1.9994466207737324e-06, "loss": 1.1747047901153564, "step": 43 }, { "epoch": 0.12661870503597122, "grad_norm": 0.3650820732490411, "learning_rate": 1.9992772393785363e-06, "loss": 1.0480847358703613, "step": 44 }, { "epoch": 0.12949640287769784, "grad_norm": 0.41209897515894023, "learning_rate": 1.9990852853615746e-06, "loss": 1.2965943813323975, "step": 45 }, { "epoch": 0.13237410071942446, "grad_norm": 0.4043694740064971, "learning_rate": 1.9988707630588874e-06, "loss": 1.1381937265396118, "step": 46 }, { "epoch": 0.13525179856115108, "grad_norm": 0.4147421237580474, "learning_rate": 1.9986336773163066e-06, "loss": 1.1976345777511597, "step": 47 }, { "epoch": 0.1381294964028777, "grad_norm": 0.46096126219291444, "learning_rate": 1.99837403348935e-06, "loss": 1.1909739971160889, "step": 48 }, { "epoch": 0.1410071942446043, "grad_norm": 0.48554729123892804, "learning_rate": 1.9980918374430994e-06, "loss": 1.1516118049621582, "step": 49 }, { "epoch": 0.14388489208633093, "grad_norm": 0.46308911997767715, "learning_rate": 1.997787095552066e-06, "loss": 1.2086803913116455, "step": 50 }, { "epoch": 0.14676258992805755, "grad_norm": 0.4490535916599434, "learning_rate": 1.9974598147000487e-06, "loss": 1.2800133228302002, "step": 51 }, { "epoch": 0.14964028776978416, "grad_norm": 0.40996217705477556, "learning_rate": 1.997110002279978e-06, "loss": 1.2382150888442993, "step": 52 }, { "epoch": 0.15251798561151078, "grad_norm": 0.40988669582643505, "learning_rate": 1.9967376661937477e-06, "loss": 1.1741186380386353, "step": 53 }, { "epoch": 0.1553956834532374, "grad_norm": 0.5464526851837473, "learning_rate": 1.9963428148520393e-06, "loss": 1.1607799530029297, "step": 54 }, { "epoch": 0.15827338129496402, "grad_norm": 0.42016622274268145, "learning_rate": 1.9959254571741285e-06, "loss": 1.2755463123321533, "step": 55 }, { "epoch": 0.16115107913669063, "grad_norm": 0.4490415553167208, "learning_rate": 1.995485602587687e-06, "loss": 1.261953592300415, "step": 56 }, { "epoch": 0.16402877697841728, "grad_norm": 0.5169029226242617, "learning_rate": 1.995023261028567e-06, "loss": 1.1530394554138184, "step": 57 }, { "epoch": 0.1669064748201439, "grad_norm": 0.43016608573228415, "learning_rate": 1.9945384429405776e-06, "loss": 1.268787145614624, "step": 58 }, { "epoch": 0.1697841726618705, "grad_norm": 0.4793080238946335, "learning_rate": 1.99403115927525e-06, "loss": 1.2686214447021484, "step": 59 }, { "epoch": 0.17266187050359713, "grad_norm": 0.4249978192550724, "learning_rate": 1.9935014214915883e-06, "loss": 1.201757550239563, "step": 60 }, { "epoch": 0.17553956834532375, "grad_norm": 0.4481671623233787, "learning_rate": 1.992949241555812e-06, "loss": 1.1886329650878906, "step": 61 }, { "epoch": 0.17841726618705037, "grad_norm": 0.5402187081810303, "learning_rate": 1.9923746319410847e-06, "loss": 1.2228707075119019, "step": 62 }, { "epoch": 0.18129496402877698, "grad_norm": 0.4000631018631766, "learning_rate": 1.991777605627234e-06, "loss": 1.0736989974975586, "step": 63 }, { "epoch": 0.1841726618705036, "grad_norm": 0.4881119026873745, "learning_rate": 1.9911581761004556e-06, "loss": 1.213085651397705, "step": 64 }, { "epoch": 0.18705035971223022, "grad_norm": 0.5274580867703768, "learning_rate": 1.990516357353011e-06, "loss": 1.2776455879211426, "step": 65 }, { "epoch": 0.18992805755395684, "grad_norm": 0.5316412618386857, "learning_rate": 1.989852163882911e-06, "loss": 1.1717431545257568, "step": 66 }, { "epoch": 0.19280575539568345, "grad_norm": 0.5326686824141037, "learning_rate": 1.9891656106935873e-06, "loss": 1.1462079286575317, "step": 67 }, { "epoch": 0.19568345323741007, "grad_norm": 0.4445628555318413, "learning_rate": 1.988456713293554e-06, "loss": 1.174164056777954, "step": 68 }, { "epoch": 0.1985611510791367, "grad_norm": 0.5068823690157335, "learning_rate": 1.987725487696059e-06, "loss": 1.3018139600753784, "step": 69 }, { "epoch": 0.2014388489208633, "grad_norm": 0.5106162613433823, "learning_rate": 1.9869719504187175e-06, "loss": 1.273469090461731, "step": 70 }, { "epoch": 0.20431654676258992, "grad_norm": 0.46468998684527285, "learning_rate": 1.9861961184831453e-06, "loss": 1.2473914623260498, "step": 71 }, { "epoch": 0.20719424460431654, "grad_norm": 0.5345828232737263, "learning_rate": 1.9853980094145696e-06, "loss": 1.193030834197998, "step": 72 }, { "epoch": 0.21007194244604316, "grad_norm": 0.5271717020423939, "learning_rate": 1.9845776412414346e-06, "loss": 1.1826913356781006, "step": 73 }, { "epoch": 0.21294964028776978, "grad_norm": 0.4004103214424577, "learning_rate": 1.9837350324949944e-06, "loss": 1.055051565170288, "step": 74 }, { "epoch": 0.2158273381294964, "grad_norm": 0.5075363846617762, "learning_rate": 1.9828702022088942e-06, "loss": 1.1969430446624756, "step": 75 }, { "epoch": 0.218705035971223, "grad_norm": 0.5116674728159791, "learning_rate": 1.9819831699187407e-06, "loss": 1.2737852334976196, "step": 76 }, { "epoch": 0.22158273381294963, "grad_norm": 0.5134518143732013, "learning_rate": 1.9810739556616607e-06, "loss": 1.1505439281463623, "step": 77 }, { "epoch": 0.22446043165467625, "grad_norm": 0.4961762001577513, "learning_rate": 1.980142579975847e-06, "loss": 1.1265602111816406, "step": 78 }, { "epoch": 0.2273381294964029, "grad_norm": 0.5222448272100187, "learning_rate": 1.9791890639000973e-06, "loss": 1.1243963241577148, "step": 79 }, { "epoch": 0.2302158273381295, "grad_norm": 0.500186205073849, "learning_rate": 1.9782134289733374e-06, "loss": 1.2614185810089111, "step": 80 }, { "epoch": 0.23309352517985613, "grad_norm": 0.49912691652286095, "learning_rate": 1.9772156972341326e-06, "loss": 1.1954736709594727, "step": 81 }, { "epoch": 0.23597122302158274, "grad_norm": 0.4383210281801482, "learning_rate": 1.9761958912201945e-06, "loss": 1.125051736831665, "step": 82 }, { "epoch": 0.23884892086330936, "grad_norm": 0.4960615716168437, "learning_rate": 1.9751540339678683e-06, "loss": 1.0172779560089111, "step": 83 }, { "epoch": 0.24172661870503598, "grad_norm": 0.6116032459432448, "learning_rate": 1.9740901490116133e-06, "loss": 1.1732102632522583, "step": 84 }, { "epoch": 0.2446043165467626, "grad_norm": 0.5176440293767078, "learning_rate": 1.973004260383471e-06, "loss": 1.225417137145996, "step": 85 }, { "epoch": 0.2474820143884892, "grad_norm": 0.5678048576376955, "learning_rate": 1.9718963926125244e-06, "loss": 1.0927081108093262, "step": 86 }, { "epoch": 0.2503597122302158, "grad_norm": 0.6364015437310658, "learning_rate": 1.9707665707243406e-06, "loss": 1.2957037687301636, "step": 87 }, { "epoch": 0.25323741007194245, "grad_norm": 0.5809276103827633, "learning_rate": 1.969614820240407e-06, "loss": 1.187430500984192, "step": 88 }, { "epoch": 0.25611510791366904, "grad_norm": 0.47690557886764195, "learning_rate": 1.9684411671775568e-06, "loss": 1.1036494970321655, "step": 89 }, { "epoch": 0.2589928057553957, "grad_norm": 0.6365313959891131, "learning_rate": 1.967245638047378e-06, "loss": 1.1274656057357788, "step": 90 }, { "epoch": 0.26187050359712233, "grad_norm": 0.542056873386327, "learning_rate": 1.9660282598556155e-06, "loss": 1.1317627429962158, "step": 91 }, { "epoch": 0.2647482014388489, "grad_norm": 0.5772496326462602, "learning_rate": 1.964789060101563e-06, "loss": 1.1629116535186768, "step": 92 }, { "epoch": 0.26762589928057556, "grad_norm": 0.5006687207127247, "learning_rate": 1.9635280667774385e-06, "loss": 1.0691213607788086, "step": 93 }, { "epoch": 0.27050359712230215, "grad_norm": 0.4948922663102226, "learning_rate": 1.9622453083677555e-06, "loss": 1.0724678039550781, "step": 94 }, { "epoch": 0.2733812949640288, "grad_norm": 0.6310621741039645, "learning_rate": 1.9609408138486773e-06, "loss": 1.1892088651657104, "step": 95 }, { "epoch": 0.2762589928057554, "grad_norm": 0.5720157291662107, "learning_rate": 1.959614612687363e-06, "loss": 1.1208692789077759, "step": 96 }, { "epoch": 0.27913669064748203, "grad_norm": 0.4968629674990631, "learning_rate": 1.9582667348413013e-06, "loss": 1.1870933771133423, "step": 97 }, { "epoch": 0.2820143884892086, "grad_norm": 0.562208720820431, "learning_rate": 1.9568972107576355e-06, "loss": 1.1234577894210815, "step": 98 }, { "epoch": 0.28489208633093527, "grad_norm": 0.5269154276860062, "learning_rate": 1.9555060713724737e-06, "loss": 1.0910080671310425, "step": 99 }, { "epoch": 0.28776978417266186, "grad_norm": 0.5740033705130164, "learning_rate": 1.9540933481101923e-06, "loss": 1.1712496280670166, "step": 100 }, { "epoch": 0.2906474820143885, "grad_norm": 0.5087584410520719, "learning_rate": 1.952659072882723e-06, "loss": 1.1709492206573486, "step": 101 }, { "epoch": 0.2935251798561151, "grad_norm": 0.507763542956114, "learning_rate": 1.9512032780888346e-06, "loss": 1.2015647888183594, "step": 102 }, { "epoch": 0.29640287769784174, "grad_norm": 0.46170542788719804, "learning_rate": 1.9497259966134005e-06, "loss": 1.100395679473877, "step": 103 }, { "epoch": 0.2992805755395683, "grad_norm": 0.5868783299117551, "learning_rate": 1.9482272618266554e-06, "loss": 1.1746639013290405, "step": 104 }, { "epoch": 0.302158273381295, "grad_norm": 0.5697876288298696, "learning_rate": 1.946707107583442e-06, "loss": 1.106672763824463, "step": 105 }, { "epoch": 0.30503597122302156, "grad_norm": 0.569742102482636, "learning_rate": 1.945165568222445e-06, "loss": 1.213707685470581, "step": 106 }, { "epoch": 0.3079136690647482, "grad_norm": 0.538364984750885, "learning_rate": 1.9436026785654175e-06, "loss": 1.0930910110473633, "step": 107 }, { "epoch": 0.3107913669064748, "grad_norm": 0.5312913632919434, "learning_rate": 1.942018473916393e-06, "loss": 1.142619252204895, "step": 108 }, { "epoch": 0.31366906474820144, "grad_norm": 0.5975458242690872, "learning_rate": 1.940412990060888e-06, "loss": 1.2266335487365723, "step": 109 }, { "epoch": 0.31654676258992803, "grad_norm": 0.46671843811650277, "learning_rate": 1.9387862632650944e-06, "loss": 1.1608915328979492, "step": 110 }, { "epoch": 0.3194244604316547, "grad_norm": 0.5834160016362078, "learning_rate": 1.937138330275059e-06, "loss": 1.183951735496521, "step": 111 }, { "epoch": 0.32230215827338127, "grad_norm": 0.5226121969782161, "learning_rate": 1.9354692283158553e-06, "loss": 1.1224737167358398, "step": 112 }, { "epoch": 0.3251798561151079, "grad_norm": 0.5495555876202739, "learning_rate": 1.9337789950907407e-06, "loss": 1.0857056379318237, "step": 113 }, { "epoch": 0.32805755395683456, "grad_norm": 0.4562188941273161, "learning_rate": 1.9320676687803055e-06, "loss": 1.0629336833953857, "step": 114 }, { "epoch": 0.33093525179856115, "grad_norm": 0.4181080129706698, "learning_rate": 1.930335288041612e-06, "loss": 1.1054446697235107, "step": 115 }, { "epoch": 0.3338129496402878, "grad_norm": 0.4892645824645763, "learning_rate": 1.928581892007318e-06, "loss": 1.0204641819000244, "step": 116 }, { "epoch": 0.3366906474820144, "grad_norm": 0.42126704545419896, "learning_rate": 1.926807520284796e-06, "loss": 1.0821490287780762, "step": 117 }, { "epoch": 0.339568345323741, "grad_norm": 0.5344372210987457, "learning_rate": 1.9250122129552364e-06, "loss": 1.1084657907485962, "step": 118 }, { "epoch": 0.3424460431654676, "grad_norm": 0.502255473105476, "learning_rate": 1.923196010572744e-06, "loss": 1.1403509378433228, "step": 119 }, { "epoch": 0.34532374100719426, "grad_norm": 0.4478418679539473, "learning_rate": 1.92135895416342e-06, "loss": 1.1791510581970215, "step": 120 }, { "epoch": 0.34820143884892085, "grad_norm": 0.5040410026582576, "learning_rate": 1.9195010852244366e-06, "loss": 1.1240849494934082, "step": 121 }, { "epoch": 0.3510791366906475, "grad_norm": 0.3694753679987858, "learning_rate": 1.917622445723099e-06, "loss": 0.9320825338363647, "step": 122 }, { "epoch": 0.3539568345323741, "grad_norm": 0.5088967091444829, "learning_rate": 1.9157230780958975e-06, "loss": 1.1261234283447266, "step": 123 }, { "epoch": 0.35683453237410073, "grad_norm": 0.48547365885933336, "learning_rate": 1.9138030252475484e-06, "loss": 1.0831753015518188, "step": 124 }, { "epoch": 0.3597122302158273, "grad_norm": 0.5680540539558109, "learning_rate": 1.911862330550027e-06, "loss": 1.1547625064849854, "step": 125 }, { "epoch": 0.36258992805755397, "grad_norm": 0.482809160276131, "learning_rate": 1.9099010378415844e-06, "loss": 0.9853061437606812, "step": 126 }, { "epoch": 0.36546762589928056, "grad_norm": 0.5999988506556185, "learning_rate": 1.90791919142576e-06, "loss": 1.0587449073791504, "step": 127 }, { "epoch": 0.3683453237410072, "grad_norm": 0.4786126684230341, "learning_rate": 1.9059168360703803e-06, "loss": 1.0581047534942627, "step": 128 }, { "epoch": 0.3712230215827338, "grad_norm": 0.4721916332432008, "learning_rate": 1.9038940170065466e-06, "loss": 1.2065491676330566, "step": 129 }, { "epoch": 0.37410071942446044, "grad_norm": 0.43066899683927695, "learning_rate": 1.9018507799276131e-06, "loss": 0.9673759937286377, "step": 130 }, { "epoch": 0.376978417266187, "grad_norm": 0.4819631279931072, "learning_rate": 1.8997871709881567e-06, "loss": 1.055248498916626, "step": 131 }, { "epoch": 0.37985611510791367, "grad_norm": 0.49209482786786624, "learning_rate": 1.8977032368029332e-06, "loss": 1.0030591487884521, "step": 132 }, { "epoch": 0.38273381294964026, "grad_norm": 0.5018665575618141, "learning_rate": 1.8955990244458233e-06, "loss": 1.1191744804382324, "step": 133 }, { "epoch": 0.3856115107913669, "grad_norm": 0.5576523584422169, "learning_rate": 1.8934745814487712e-06, "loss": 1.0733585357666016, "step": 134 }, { "epoch": 0.38848920863309355, "grad_norm": 0.5008878898473639, "learning_rate": 1.8913299558007095e-06, "loss": 1.1800191402435303, "step": 135 }, { "epoch": 0.39136690647482014, "grad_norm": 0.43511113369960597, "learning_rate": 1.8891651959464758e-06, "loss": 1.027364730834961, "step": 136 }, { "epoch": 0.3942446043165468, "grad_norm": 0.4765093745936347, "learning_rate": 1.8869803507857185e-06, "loss": 1.107445478439331, "step": 137 }, { "epoch": 0.3971223021582734, "grad_norm": 0.46129319329450635, "learning_rate": 1.884775469671791e-06, "loss": 1.187384009361267, "step": 138 }, { "epoch": 0.4, "grad_norm": 0.48437113838726986, "learning_rate": 1.8825506024106396e-06, "loss": 1.0362842082977295, "step": 139 }, { "epoch": 0.4028776978417266, "grad_norm": 0.4645234599714407, "learning_rate": 1.8803057992596747e-06, "loss": 1.0802561044692993, "step": 140 }, { "epoch": 0.40575539568345326, "grad_norm": 0.4280947125747255, "learning_rate": 1.8780411109266385e-06, "loss": 1.1036925315856934, "step": 141 }, { "epoch": 0.40863309352517985, "grad_norm": 0.407505252457033, "learning_rate": 1.8757565885684584e-06, "loss": 1.0300638675689697, "step": 142 }, { "epoch": 0.4115107913669065, "grad_norm": 0.3623983611321653, "learning_rate": 1.8734522837900915e-06, "loss": 0.982805609703064, "step": 143 }, { "epoch": 0.4143884892086331, "grad_norm": 0.4850168244727846, "learning_rate": 1.8711282486433594e-06, "loss": 1.1880314350128174, "step": 144 }, { "epoch": 0.4172661870503597, "grad_norm": 0.39657849815671453, "learning_rate": 1.8687845356257705e-06, "loss": 1.001549482345581, "step": 145 }, { "epoch": 0.4201438848920863, "grad_norm": 0.43575502402332317, "learning_rate": 1.866421197679338e-06, "loss": 1.1122441291809082, "step": 146 }, { "epoch": 0.42302158273381296, "grad_norm": 0.42214821227567706, "learning_rate": 1.8640382881893797e-06, "loss": 0.9325125217437744, "step": 147 }, { "epoch": 0.42589928057553955, "grad_norm": 0.49793043845219986, "learning_rate": 1.8616358609833144e-06, "loss": 1.1867802143096924, "step": 148 }, { "epoch": 0.4287769784172662, "grad_norm": 0.5003465171318868, "learning_rate": 1.8592139703294456e-06, "loss": 1.1244676113128662, "step": 149 }, { "epoch": 0.4316546762589928, "grad_norm": 0.4321152662969621, "learning_rate": 1.8567726709357365e-06, "loss": 1.0435458421707153, "step": 150 }, { "epoch": 0.43453237410071943, "grad_norm": 0.42883319369137934, "learning_rate": 1.854312017948572e-06, "loss": 0.9999338388442993, "step": 151 }, { "epoch": 0.437410071942446, "grad_norm": 0.41457898959091355, "learning_rate": 1.8518320669515145e-06, "loss": 1.0550625324249268, "step": 152 }, { "epoch": 0.44028776978417267, "grad_norm": 0.37601633308325455, "learning_rate": 1.8493328739640494e-06, "loss": 1.1828843355178833, "step": 153 }, { "epoch": 0.44316546762589926, "grad_norm": 0.48403246563497276, "learning_rate": 1.8468144954403174e-06, "loss": 1.0219019651412964, "step": 154 }, { "epoch": 0.4460431654676259, "grad_norm": 0.4080458449115876, "learning_rate": 1.8442769882678397e-06, "loss": 1.064319372177124, "step": 155 }, { "epoch": 0.4489208633093525, "grad_norm": 0.4307991968666485, "learning_rate": 1.8417204097662348e-06, "loss": 0.9629073143005371, "step": 156 }, { "epoch": 0.45179856115107914, "grad_norm": 0.42498698874553337, "learning_rate": 1.8391448176859221e-06, "loss": 0.9967993497848511, "step": 157 }, { "epoch": 0.4546762589928058, "grad_norm": 0.3864381339362373, "learning_rate": 1.8365502702068176e-06, "loss": 0.9952638149261475, "step": 158 }, { "epoch": 0.45755395683453237, "grad_norm": 0.40273255363591914, "learning_rate": 1.8339368259370196e-06, "loss": 1.1115927696228027, "step": 159 }, { "epoch": 0.460431654676259, "grad_norm": 0.41064614803619237, "learning_rate": 1.8313045439114854e-06, "loss": 1.0840253829956055, "step": 160 }, { "epoch": 0.4633093525179856, "grad_norm": 0.5037888020430983, "learning_rate": 1.8286534835906967e-06, "loss": 0.9446510076522827, "step": 161 }, { "epoch": 0.46618705035971225, "grad_norm": 0.5305475182215801, "learning_rate": 1.8259837048593187e-06, "loss": 1.074650526046753, "step": 162 }, { "epoch": 0.46906474820143884, "grad_norm": 0.4475049031892367, "learning_rate": 1.8232952680248439e-06, "loss": 1.1149487495422363, "step": 163 }, { "epoch": 0.4719424460431655, "grad_norm": 0.36490672571189003, "learning_rate": 1.8205882338162333e-06, "loss": 1.0125229358673096, "step": 164 }, { "epoch": 0.4748201438848921, "grad_norm": 0.41629528136632005, "learning_rate": 1.8178626633825417e-06, "loss": 1.079350471496582, "step": 165 }, { "epoch": 0.4776978417266187, "grad_norm": 0.49379487126662264, "learning_rate": 1.8151186182915383e-06, "loss": 1.057182788848877, "step": 166 }, { "epoch": 0.4805755395683453, "grad_norm": 0.4055282585841769, "learning_rate": 1.8123561605283163e-06, "loss": 0.9132846593856812, "step": 167 }, { "epoch": 0.48345323741007196, "grad_norm": 0.3366384943701116, "learning_rate": 1.8095753524938903e-06, "loss": 1.0845749378204346, "step": 168 }, { "epoch": 0.48633093525179855, "grad_norm": 0.40679871398886064, "learning_rate": 1.8067762570037885e-06, "loss": 1.042318344116211, "step": 169 }, { "epoch": 0.4892086330935252, "grad_norm": 0.35055813919371737, "learning_rate": 1.8039589372866347e-06, "loss": 1.01352858543396, "step": 170 }, { "epoch": 0.4920863309352518, "grad_norm": 0.441317686613448, "learning_rate": 1.8011234569827172e-06, "loss": 1.0617296695709229, "step": 171 }, { "epoch": 0.4949640287769784, "grad_norm": 0.41119908042871567, "learning_rate": 1.798269880142554e-06, "loss": 1.0274578332901, "step": 172 }, { "epoch": 0.497841726618705, "grad_norm": 0.4140046822969208, "learning_rate": 1.7953982712254446e-06, "loss": 1.1444511413574219, "step": 173 }, { "epoch": 0.5007194244604316, "grad_norm": 0.360331860974586, "learning_rate": 1.7925086950980134e-06, "loss": 1.0049320459365845, "step": 174 }, { "epoch": 0.5035971223021583, "grad_norm": 0.46448969705810783, "learning_rate": 1.7896012170327466e-06, "loss": 1.0649842023849487, "step": 175 }, { "epoch": 0.5064748201438849, "grad_norm": 0.379616869145886, "learning_rate": 1.7866759027065149e-06, "loss": 1.0953956842422485, "step": 176 }, { "epoch": 0.5093525179856115, "grad_norm": 0.46655176135320064, "learning_rate": 1.783732818199092e-06, "loss": 1.1652858257293701, "step": 177 }, { "epoch": 0.5122302158273381, "grad_norm": 0.4331501137557297, "learning_rate": 1.7807720299916613e-06, "loss": 1.0503497123718262, "step": 178 }, { "epoch": 0.5151079136690647, "grad_norm": 0.4528588241142532, "learning_rate": 1.7777936049653146e-06, "loss": 0.9811398983001709, "step": 179 }, { "epoch": 0.5179856115107914, "grad_norm": 0.3875771596875948, "learning_rate": 1.77479761039954e-06, "loss": 1.0009725093841553, "step": 180 }, { "epoch": 0.520863309352518, "grad_norm": 0.379690279869989, "learning_rate": 1.7717841139707038e-06, "loss": 1.1046425104141235, "step": 181 }, { "epoch": 0.5237410071942447, "grad_norm": 0.40872536920209385, "learning_rate": 1.76875318375052e-06, "loss": 0.9237216711044312, "step": 182 }, { "epoch": 0.5266187050359712, "grad_norm": 0.3943687213438678, "learning_rate": 1.7657048882045149e-06, "loss": 1.0758323669433594, "step": 183 }, { "epoch": 0.5294964028776978, "grad_norm": 0.4366095757286378, "learning_rate": 1.7626392961904783e-06, "loss": 1.103142261505127, "step": 184 }, { "epoch": 0.5323741007194245, "grad_norm": 0.4230178437251199, "learning_rate": 1.7595564769569094e-06, "loss": 0.9749042987823486, "step": 185 }, { "epoch": 0.5352517985611511, "grad_norm": 0.3416533764601269, "learning_rate": 1.7564565001414522e-06, "loss": 0.8281745910644531, "step": 186 }, { "epoch": 0.5381294964028777, "grad_norm": 0.33708804570536016, "learning_rate": 1.753339435769322e-06, "loss": 1.102489709854126, "step": 187 }, { "epoch": 0.5410071942446043, "grad_norm": 0.37891052167871625, "learning_rate": 1.7502053542517244e-06, "loss": 1.0745601654052734, "step": 188 }, { "epoch": 0.543884892086331, "grad_norm": 0.3738524538746129, "learning_rate": 1.7470543263842642e-06, "loss": 1.111441969871521, "step": 189 }, { "epoch": 0.5467625899280576, "grad_norm": 0.39050521266738286, "learning_rate": 1.7438864233453473e-06, "loss": 1.1269681453704834, "step": 190 }, { "epoch": 0.5496402877697841, "grad_norm": 0.31482595417583814, "learning_rate": 1.7407017166945706e-06, "loss": 1.0488468408584595, "step": 191 }, { "epoch": 0.5525179856115108, "grad_norm": 0.3136290302777941, "learning_rate": 1.7375002783711076e-06, "loss": 0.9358277320861816, "step": 192 }, { "epoch": 0.5553956834532374, "grad_norm": 0.3338322035032311, "learning_rate": 1.7342821806920829e-06, "loss": 1.072392225265503, "step": 193 }, { "epoch": 0.5582733812949641, "grad_norm": 0.3471468140531117, "learning_rate": 1.7310474963509378e-06, "loss": 1.0486462116241455, "step": 194 }, { "epoch": 0.5611510791366906, "grad_norm": 0.38596584622793473, "learning_rate": 1.72779629841579e-06, "loss": 0.9716250896453857, "step": 195 }, { "epoch": 0.5640287769784172, "grad_norm": 0.3798579435668601, "learning_rate": 1.7245286603277803e-06, "loss": 1.033220648765564, "step": 196 }, { "epoch": 0.5669064748201439, "grad_norm": 0.3425201594360531, "learning_rate": 1.721244655899416e-06, "loss": 0.9934518337249756, "step": 197 }, { "epoch": 0.5697841726618705, "grad_norm": 0.3427994445976512, "learning_rate": 1.717944359312904e-06, "loss": 1.134864330291748, "step": 198 }, { "epoch": 0.5726618705035971, "grad_norm": 0.3723446907907705, "learning_rate": 1.7146278451184717e-06, "loss": 1.1344006061553955, "step": 199 }, { "epoch": 0.5755395683453237, "grad_norm": 0.33571105673864887, "learning_rate": 1.7112951882326869e-06, "loss": 0.9915531873703003, "step": 200 }, { "epoch": 0.5784172661870504, "grad_norm": 0.3692434093950694, "learning_rate": 1.7079464639367632e-06, "loss": 1.028855800628662, "step": 201 }, { "epoch": 0.581294964028777, "grad_norm": 0.3094858241361718, "learning_rate": 1.7045817478748598e-06, "loss": 0.9810290932655334, "step": 202 }, { "epoch": 0.5841726618705037, "grad_norm": 0.33222888657473965, "learning_rate": 1.701201116052374e-06, "loss": 0.8440494537353516, "step": 203 }, { "epoch": 0.5870503597122302, "grad_norm": 0.3220131017798883, "learning_rate": 1.6978046448342226e-06, "loss": 1.0670182704925537, "step": 204 }, { "epoch": 0.5899280575539568, "grad_norm": 0.4023809574277352, "learning_rate": 1.6943924109431179e-06, "loss": 1.038970708847046, "step": 205 }, { "epoch": 0.5928057553956835, "grad_norm": 0.36736787076416194, "learning_rate": 1.690964491457834e-06, "loss": 1.0510860681533813, "step": 206 }, { "epoch": 0.5956834532374101, "grad_norm": 0.3488299733915227, "learning_rate": 1.687520963811467e-06, "loss": 0.913723886013031, "step": 207 }, { "epoch": 0.5985611510791367, "grad_norm": 0.48422640633599995, "learning_rate": 1.684061905789684e-06, "loss": 0.9846644401550293, "step": 208 }, { "epoch": 0.6014388489208633, "grad_norm": 0.31689088814964833, "learning_rate": 1.6805873955289678e-06, "loss": 1.038316249847412, "step": 209 }, { "epoch": 0.60431654676259, "grad_norm": 0.31535153436268476, "learning_rate": 1.6770975115148503e-06, "loss": 1.1639020442962646, "step": 210 }, { "epoch": 0.6071942446043166, "grad_norm": 0.32901232219616355, "learning_rate": 1.6735923325801406e-06, "loss": 1.0157148838043213, "step": 211 }, { "epoch": 0.6100719424460431, "grad_norm": 0.32346917708292794, "learning_rate": 1.670071937903144e-06, "loss": 0.9528936743736267, "step": 212 }, { "epoch": 0.6129496402877698, "grad_norm": 0.3431066323853164, "learning_rate": 1.6665364070058736e-06, "loss": 1.089216709136963, "step": 213 }, { "epoch": 0.6158273381294964, "grad_norm": 0.3096527786452577, "learning_rate": 1.6629858197522535e-06, "loss": 1.0500307083129883, "step": 214 }, { "epoch": 0.6187050359712231, "grad_norm": 0.34740584906307037, "learning_rate": 1.6594202563463149e-06, "loss": 0.9973140954971313, "step": 215 }, { "epoch": 0.6215827338129496, "grad_norm": 0.3076575246625187, "learning_rate": 1.6558397973303851e-06, "loss": 0.9394571781158447, "step": 216 }, { "epoch": 0.6244604316546762, "grad_norm": 0.35489785566062343, "learning_rate": 1.652244523583267e-06, "loss": 0.9569211006164551, "step": 217 }, { "epoch": 0.6273381294964029, "grad_norm": 0.33512033241700295, "learning_rate": 1.6486345163184129e-06, "loss": 1.0791332721710205, "step": 218 }, { "epoch": 0.6302158273381295, "grad_norm": 0.3626683432890907, "learning_rate": 1.6450098570820896e-06, "loss": 1.0544092655181885, "step": 219 }, { "epoch": 0.6330935251798561, "grad_norm": 0.358904663222277, "learning_rate": 1.6413706277515373e-06, "loss": 0.9803202152252197, "step": 220 }, { "epoch": 0.6359712230215827, "grad_norm": 0.32815545381559164, "learning_rate": 1.6377169105331182e-06, "loss": 0.9604759216308594, "step": 221 }, { "epoch": 0.6388489208633094, "grad_norm": 0.32597650541963474, "learning_rate": 1.6340487879604617e-06, "loss": 1.0064623355865479, "step": 222 }, { "epoch": 0.641726618705036, "grad_norm": 0.3506857994251924, "learning_rate": 1.630366342892598e-06, "loss": 1.014646053314209, "step": 223 }, { "epoch": 0.6446043165467625, "grad_norm": 0.41874730381325936, "learning_rate": 1.626669658512088e-06, "loss": 0.9256491661071777, "step": 224 }, { "epoch": 0.6474820143884892, "grad_norm": 0.3188217704851316, "learning_rate": 1.6229588183231434e-06, "loss": 0.9941632151603699, "step": 225 }, { "epoch": 0.6503597122302158, "grad_norm": 0.32011807732834047, "learning_rate": 1.6192339061497413e-06, "loss": 0.9773931503295898, "step": 226 }, { "epoch": 0.6532374100719425, "grad_norm": 0.3189594924614036, "learning_rate": 1.615495006133729e-06, "loss": 0.9987149238586426, "step": 227 }, { "epoch": 0.6561151079136691, "grad_norm": 0.35037906857078205, "learning_rate": 1.6117422027329263e-06, "loss": 0.9832175374031067, "step": 228 }, { "epoch": 0.6589928057553956, "grad_norm": 0.38864609779113907, "learning_rate": 1.6079755807192136e-06, "loss": 1.0916314125061035, "step": 229 }, { "epoch": 0.6618705035971223, "grad_norm": 0.30929668859135395, "learning_rate": 1.604195225176621e-06, "loss": 0.9629628658294678, "step": 230 }, { "epoch": 0.6647482014388489, "grad_norm": 0.32671840835956706, "learning_rate": 1.6004012214994035e-06, "loss": 0.9343143701553345, "step": 231 }, { "epoch": 0.6676258992805756, "grad_norm": 0.33641494062099064, "learning_rate": 1.5965936553901136e-06, "loss": 1.0556144714355469, "step": 232 }, { "epoch": 0.6705035971223021, "grad_norm": 0.3187574882066994, "learning_rate": 1.592772612857665e-06, "loss": 0.9991135597229004, "step": 233 }, { "epoch": 0.6733812949640288, "grad_norm": 0.3480834665064568, "learning_rate": 1.5889381802153896e-06, "loss": 1.0254430770874023, "step": 234 }, { "epoch": 0.6762589928057554, "grad_norm": 0.3072999299525753, "learning_rate": 1.585090444079087e-06, "loss": 0.985275149345398, "step": 235 }, { "epoch": 0.679136690647482, "grad_norm": 0.35851120910777423, "learning_rate": 1.5812294913650694e-06, "loss": 0.9904893636703491, "step": 236 }, { "epoch": 0.6820143884892086, "grad_norm": 0.31773614133543254, "learning_rate": 1.5773554092881984e-06, "loss": 1.0499398708343506, "step": 237 }, { "epoch": 0.6848920863309352, "grad_norm": 0.30921377977469555, "learning_rate": 1.5734682853599122e-06, "loss": 1.0339066982269287, "step": 238 }, { "epoch": 0.6877697841726619, "grad_norm": 0.35671236366028325, "learning_rate": 1.5695682073862525e-06, "loss": 0.9532429575920105, "step": 239 }, { "epoch": 0.6906474820143885, "grad_norm": 0.3015523412700019, "learning_rate": 1.5656552634658776e-06, "loss": 1.038594365119934, "step": 240 }, { "epoch": 0.6935251798561151, "grad_norm": 0.36455740495219996, "learning_rate": 1.561729541988076e-06, "loss": 1.0890312194824219, "step": 241 }, { "epoch": 0.6964028776978417, "grad_norm": 0.3680930663786755, "learning_rate": 1.5577911316307658e-06, "loss": 1.0601049661636353, "step": 242 }, { "epoch": 0.6992805755395683, "grad_norm": 0.2880128205816018, "learning_rate": 1.5538401213584948e-06, "loss": 0.8997229337692261, "step": 243 }, { "epoch": 0.702158273381295, "grad_norm": 0.32285821272462195, "learning_rate": 1.549876600420429e-06, "loss": 0.9955217242240906, "step": 244 }, { "epoch": 0.7050359712230215, "grad_norm": 0.29115419774124135, "learning_rate": 1.545900658348338e-06, "loss": 0.8849923610687256, "step": 245 }, { "epoch": 0.7079136690647482, "grad_norm": 0.2706455457776965, "learning_rate": 1.5419123849545708e-06, "loss": 0.9076135754585266, "step": 246 }, { "epoch": 0.7107913669064748, "grad_norm": 0.36531081434638296, "learning_rate": 1.5379118703300282e-06, "loss": 1.0192983150482178, "step": 247 }, { "epoch": 0.7136690647482015, "grad_norm": 0.35715016923044796, "learning_rate": 1.533899204842128e-06, "loss": 1.034571647644043, "step": 248 }, { "epoch": 0.7165467625899281, "grad_norm": 0.30344023495404443, "learning_rate": 1.529874479132763e-06, "loss": 0.8733739256858826, "step": 249 }, { "epoch": 0.7194244604316546, "grad_norm": 0.33635279495274495, "learning_rate": 1.5258377841162533e-06, "loss": 0.9661943316459656, "step": 250 }, { "epoch": 0.7223021582733813, "grad_norm": 0.3375538070903443, "learning_rate": 1.5217892109772935e-06, "loss": 1.0986987352371216, "step": 251 }, { "epoch": 0.7251798561151079, "grad_norm": 0.2662157952853344, "learning_rate": 1.5177288511688927e-06, "loss": 0.9541377425193787, "step": 252 }, { "epoch": 0.7280575539568346, "grad_norm": 0.30886908842504907, "learning_rate": 1.5136567964103076e-06, "loss": 1.0753300189971924, "step": 253 }, { "epoch": 0.7309352517985611, "grad_norm": 0.30701154449906404, "learning_rate": 1.5095731386849723e-06, "loss": 0.9976100921630859, "step": 254 }, { "epoch": 0.7338129496402878, "grad_norm": 0.303376410309656, "learning_rate": 1.5054779702384198e-06, "loss": 1.0058211088180542, "step": 255 }, { "epoch": 0.7366906474820144, "grad_norm": 0.3652950100731028, "learning_rate": 1.5013713835761975e-06, "loss": 1.0633628368377686, "step": 256 }, { "epoch": 0.739568345323741, "grad_norm": 0.3390438283446466, "learning_rate": 1.497253471461779e-06, "loss": 0.8934162259101868, "step": 257 }, { "epoch": 0.7424460431654676, "grad_norm": 0.3246861490189164, "learning_rate": 1.493124326914467e-06, "loss": 1.0370798110961914, "step": 258 }, { "epoch": 0.7453237410071942, "grad_norm": 0.30533785722726153, "learning_rate": 1.4889840432072945e-06, "loss": 0.9263877868652344, "step": 259 }, { "epoch": 0.7482014388489209, "grad_norm": 0.31370371579277184, "learning_rate": 1.484832713864915e-06, "loss": 0.9624022245407104, "step": 260 }, { "epoch": 0.7510791366906475, "grad_norm": 0.32008108759680487, "learning_rate": 1.4806704326614918e-06, "loss": 0.8735676407814026, "step": 261 }, { "epoch": 0.753956834532374, "grad_norm": 0.3566203918476789, "learning_rate": 1.4764972936185795e-06, "loss": 1.0989207029342651, "step": 262 }, { "epoch": 0.7568345323741007, "grad_norm": 0.36407543844243995, "learning_rate": 1.4723133910029996e-06, "loss": 0.9619901180267334, "step": 263 }, { "epoch": 0.7597122302158273, "grad_norm": 0.29266238338520917, "learning_rate": 1.4681188193247115e-06, "loss": 0.9620180130004883, "step": 264 }, { "epoch": 0.762589928057554, "grad_norm": 0.32115744502647553, "learning_rate": 1.4639136733346776e-06, "loss": 0.9723782539367676, "step": 265 }, { "epoch": 0.7654676258992805, "grad_norm": 0.32955472439646183, "learning_rate": 1.4596980480227222e-06, "loss": 1.01808762550354, "step": 266 }, { "epoch": 0.7683453237410072, "grad_norm": 0.30150737980380415, "learning_rate": 1.4554720386153869e-06, "loss": 1.0717837810516357, "step": 267 }, { "epoch": 0.7712230215827338, "grad_norm": 0.2886477892998947, "learning_rate": 1.4512357405737797e-06, "loss": 0.8863840699195862, "step": 268 }, { "epoch": 0.7741007194244605, "grad_norm": 0.3310173561528487, "learning_rate": 1.4469892495914172e-06, "loss": 0.964940071105957, "step": 269 }, { "epoch": 0.7769784172661871, "grad_norm": 0.2856238879913019, "learning_rate": 1.4427326615920641e-06, "loss": 0.9396013021469116, "step": 270 }, { "epoch": 0.7798561151079136, "grad_norm": 0.2842593394923139, "learning_rate": 1.4384660727275662e-06, "loss": 1.0147062540054321, "step": 271 }, { "epoch": 0.7827338129496403, "grad_norm": 0.3377858534929305, "learning_rate": 1.4341895793756781e-06, "loss": 1.0019702911376953, "step": 272 }, { "epoch": 0.7856115107913669, "grad_norm": 0.28919748050640776, "learning_rate": 1.4299032781378863e-06, "loss": 0.9657357931137085, "step": 273 }, { "epoch": 0.7884892086330936, "grad_norm": 0.2761094660745925, "learning_rate": 1.4256072658372278e-06, "loss": 0.9581419229507446, "step": 274 }, { "epoch": 0.7913669064748201, "grad_norm": 0.32811507081877733, "learning_rate": 1.4213016395161016e-06, "loss": 0.9768601655960083, "step": 275 }, { "epoch": 0.7942446043165468, "grad_norm": 0.3197698268118257, "learning_rate": 1.416986496434077e-06, "loss": 1.0802795886993408, "step": 276 }, { "epoch": 0.7971223021582734, "grad_norm": 0.32014493538109184, "learning_rate": 1.412661934065698e-06, "loss": 1.129173994064331, "step": 277 }, { "epoch": 0.8, "grad_norm": 0.3408260667112233, "learning_rate": 1.4083280500982796e-06, "loss": 1.0172650814056396, "step": 278 }, { "epoch": 0.8028776978417266, "grad_norm": 0.2994608201736648, "learning_rate": 1.4039849424297022e-06, "loss": 1.002464771270752, "step": 279 }, { "epoch": 0.8057553956834532, "grad_norm": 0.27936016058449986, "learning_rate": 1.3996327091661994e-06, "loss": 0.9435924887657166, "step": 280 }, { "epoch": 0.8086330935251799, "grad_norm": 0.3421589464369171, "learning_rate": 1.3952714486201433e-06, "loss": 0.9648728370666504, "step": 281 }, { "epoch": 0.8115107913669065, "grad_norm": 0.3027125759086274, "learning_rate": 1.3909012593078223e-06, "loss": 1.0883413553237915, "step": 282 }, { "epoch": 0.814388489208633, "grad_norm": 0.2718451517981759, "learning_rate": 1.3865222399472154e-06, "loss": 0.9606098532676697, "step": 283 }, { "epoch": 0.8172661870503597, "grad_norm": 0.3439278935498304, "learning_rate": 1.382134489455765e-06, "loss": 1.006915807723999, "step": 284 }, { "epoch": 0.8201438848920863, "grad_norm": 0.25579346143996035, "learning_rate": 1.3777381069481396e-06, "loss": 0.9337391257286072, "step": 285 }, { "epoch": 0.823021582733813, "grad_norm": 0.3050859668016162, "learning_rate": 1.373333191733995e-06, "loss": 0.9900962710380554, "step": 286 }, { "epoch": 0.8258992805755395, "grad_norm": 0.30270443732056235, "learning_rate": 1.3689198433157332e-06, "loss": 0.8408849835395813, "step": 287 }, { "epoch": 0.8287769784172662, "grad_norm": 0.32722776782068325, "learning_rate": 1.3644981613862523e-06, "loss": 0.9334912300109863, "step": 288 }, { "epoch": 0.8316546762589928, "grad_norm": 0.30271696679801074, "learning_rate": 1.360068245826697e-06, "loss": 0.9546651840209961, "step": 289 }, { "epoch": 0.8345323741007195, "grad_norm": 0.30274211349049623, "learning_rate": 1.3556301967041997e-06, "loss": 0.9813221096992493, "step": 290 }, { "epoch": 0.837410071942446, "grad_norm": 0.2635388567144702, "learning_rate": 1.351184114269622e-06, "loss": 0.9474866390228271, "step": 291 }, { "epoch": 0.8402877697841726, "grad_norm": 0.3010633615089385, "learning_rate": 1.34673009895529e-06, "loss": 0.986327588558197, "step": 292 }, { "epoch": 0.8431654676258993, "grad_norm": 0.3250052939342708, "learning_rate": 1.3422682513727243e-06, "loss": 0.9753819704055786, "step": 293 }, { "epoch": 0.8460431654676259, "grad_norm": 0.33588340814315554, "learning_rate": 1.3377986723103692e-06, "loss": 0.9891970753669739, "step": 294 }, { "epoch": 0.8489208633093526, "grad_norm": 0.31646789049784285, "learning_rate": 1.3333214627313138e-06, "loss": 0.9514651298522949, "step": 295 }, { "epoch": 0.8517985611510791, "grad_norm": 0.2813259845708673, "learning_rate": 1.3288367237710139e-06, "loss": 0.9831069707870483, "step": 296 }, { "epoch": 0.8546762589928057, "grad_norm": 0.30604078940680873, "learning_rate": 1.3243445567350046e-06, "loss": 0.9211512207984924, "step": 297 }, { "epoch": 0.8575539568345324, "grad_norm": 0.3167520608936244, "learning_rate": 1.319845063096615e-06, "loss": 1.0003859996795654, "step": 298 }, { "epoch": 0.860431654676259, "grad_norm": 0.31829945664441645, "learning_rate": 1.3153383444946735e-06, "loss": 0.8789474964141846, "step": 299 }, { "epoch": 0.8633093525179856, "grad_norm": 0.31495160151302437, "learning_rate": 1.3108245027312128e-06, "loss": 1.0840336084365845, "step": 300 }, { "epoch": 0.8661870503597122, "grad_norm": 0.30915276693739346, "learning_rate": 1.3063036397691708e-06, "loss": 1.0036927461624146, "step": 301 }, { "epoch": 0.8690647482014389, "grad_norm": 0.2941453011820651, "learning_rate": 1.3017758577300862e-06, "loss": 1.0740652084350586, "step": 302 }, { "epoch": 0.8719424460431655, "grad_norm": 0.29455577634561325, "learning_rate": 1.297241258891793e-06, "loss": 0.989548921585083, "step": 303 }, { "epoch": 0.874820143884892, "grad_norm": 0.3299592819973091, "learning_rate": 1.2926999456861096e-06, "loss": 1.0820207595825195, "step": 304 }, { "epoch": 0.8776978417266187, "grad_norm": 0.2673487326485298, "learning_rate": 1.2881520206965243e-06, "loss": 0.9292148351669312, "step": 305 }, { "epoch": 0.8805755395683453, "grad_norm": 0.28532631258001817, "learning_rate": 1.2835975866558792e-06, "loss": 0.9342219233512878, "step": 306 }, { "epoch": 0.883453237410072, "grad_norm": 0.3025210511532024, "learning_rate": 1.2790367464440484e-06, "loss": 0.9670717120170593, "step": 307 }, { "epoch": 0.8863309352517985, "grad_norm": 0.28896814534982135, "learning_rate": 1.2744696030856153e-06, "loss": 0.9335446357727051, "step": 308 }, { "epoch": 0.8892086330935252, "grad_norm": 0.3084903177297785, "learning_rate": 1.2698962597475445e-06, "loss": 0.9629756808280945, "step": 309 }, { "epoch": 0.8920863309352518, "grad_norm": 0.32422156062771545, "learning_rate": 1.2653168197368519e-06, "loss": 0.9787018299102783, "step": 310 }, { "epoch": 0.8949640287769784, "grad_norm": 0.30159646505494975, "learning_rate": 1.2607313864982697e-06, "loss": 0.9642415642738342, "step": 311 }, { "epoch": 0.897841726618705, "grad_norm": 0.31856979960613646, "learning_rate": 1.2561400636119124e-06, "loss": 1.0449435710906982, "step": 312 }, { "epoch": 0.9007194244604316, "grad_norm": 0.3458241524079836, "learning_rate": 1.2515429547909346e-06, "loss": 1.0429253578186035, "step": 313 }, { "epoch": 0.9035971223021583, "grad_norm": 0.30946600198200386, "learning_rate": 1.246940163879189e-06, "loss": 1.0028799772262573, "step": 314 }, { "epoch": 0.9064748201438849, "grad_norm": 0.31702914654332653, "learning_rate": 1.2423317948488813e-06, "loss": 0.9168355464935303, "step": 315 }, { "epoch": 0.9093525179856116, "grad_norm": 0.2568478715797543, "learning_rate": 1.23771795179822e-06, "loss": 0.9950739145278931, "step": 316 }, { "epoch": 0.9122302158273381, "grad_norm": 0.31321859143517206, "learning_rate": 1.233098738949067e-06, "loss": 1.0762598514556885, "step": 317 }, { "epoch": 0.9151079136690647, "grad_norm": 0.30023123590979206, "learning_rate": 1.2284742606445817e-06, "loss": 0.9474934339523315, "step": 318 }, { "epoch": 0.9179856115107914, "grad_norm": 0.29274105879380363, "learning_rate": 1.2238446213468653e-06, "loss": 0.9199013710021973, "step": 319 }, { "epoch": 0.920863309352518, "grad_norm": 0.3343568620635621, "learning_rate": 1.2192099256345999e-06, "loss": 1.0041630268096924, "step": 320 }, { "epoch": 0.9237410071942446, "grad_norm": 0.3310327147204012, "learning_rate": 1.2145702782006862e-06, "loss": 0.8189488649368286, "step": 321 }, { "epoch": 0.9266187050359712, "grad_norm": 0.2594459346828645, "learning_rate": 1.2099257838498797e-06, "loss": 0.8715246915817261, "step": 322 }, { "epoch": 0.9294964028776979, "grad_norm": 0.34108254418878664, "learning_rate": 1.205276547496423e-06, "loss": 0.9883395433425903, "step": 323 }, { "epoch": 0.9323741007194245, "grad_norm": 0.2900507060656894, "learning_rate": 1.200622674161675e-06, "loss": 1.0369722843170166, "step": 324 }, { "epoch": 0.935251798561151, "grad_norm": 0.2793616872911977, "learning_rate": 1.195964268971739e-06, "loss": 0.923148512840271, "step": 325 }, { "epoch": 0.9381294964028777, "grad_norm": 0.31499649360389437, "learning_rate": 1.191301437155088e-06, "loss": 0.9886481165885925, "step": 326 }, { "epoch": 0.9410071942446043, "grad_norm": 0.3421793579841603, "learning_rate": 1.186634284040189e-06, "loss": 1.049983263015747, "step": 327 }, { "epoch": 0.943884892086331, "grad_norm": 0.2770772261448908, "learning_rate": 1.1819629150531216e-06, "loss": 0.9720487594604492, "step": 328 }, { "epoch": 0.9467625899280575, "grad_norm": 0.31715029343065254, "learning_rate": 1.1772874357151978e-06, "loss": 0.9858945608139038, "step": 329 }, { "epoch": 0.9496402877697842, "grad_norm": 0.27382247211499205, "learning_rate": 1.1726079516405775e-06, "loss": 0.8920480012893677, "step": 330 }, { "epoch": 0.9525179856115108, "grad_norm": 0.3233112136350598, "learning_rate": 1.1679245685338845e-06, "loss": 1.059034824371338, "step": 331 }, { "epoch": 0.9553956834532374, "grad_norm": 0.3014575533302111, "learning_rate": 1.1632373921878167e-06, "loss": 0.9916867017745972, "step": 332 }, { "epoch": 0.958273381294964, "grad_norm": 0.2733813212594252, "learning_rate": 1.1585465284807575e-06, "loss": 1.0110840797424316, "step": 333 }, { "epoch": 0.9611510791366906, "grad_norm": 0.28448057189574405, "learning_rate": 1.1538520833743843e-06, "loss": 0.9681780338287354, "step": 334 }, { "epoch": 0.9640287769784173, "grad_norm": 0.28115226388230347, "learning_rate": 1.1491541629112744e-06, "loss": 0.9256088733673096, "step": 335 }, { "epoch": 0.9669064748201439, "grad_norm": 0.29939601958186174, "learning_rate": 1.1444528732125096e-06, "loss": 0.9332914352416992, "step": 336 }, { "epoch": 0.9697841726618706, "grad_norm": 0.32298682544185786, "learning_rate": 1.1397483204752789e-06, "loss": 0.9759551882743835, "step": 337 }, { "epoch": 0.9726618705035971, "grad_norm": 0.32833751314810994, "learning_rate": 1.1350406109704804e-06, "loss": 0.955263614654541, "step": 338 }, { "epoch": 0.9755395683453237, "grad_norm": 0.3075747126988841, "learning_rate": 1.1303298510403204e-06, "loss": 1.0056906938552856, "step": 339 }, { "epoch": 0.9784172661870504, "grad_norm": 0.3107177190802721, "learning_rate": 1.1256161470959105e-06, "loss": 1.0631227493286133, "step": 340 }, { "epoch": 0.981294964028777, "grad_norm": 0.26705526854232686, "learning_rate": 1.1208996056148645e-06, "loss": 0.901911735534668, "step": 341 }, { "epoch": 0.9841726618705036, "grad_norm": 0.35096026513434014, "learning_rate": 1.116180333138894e-06, "loss": 0.9325671195983887, "step": 342 }, { "epoch": 0.9870503597122302, "grad_norm": 0.3163252628308116, "learning_rate": 1.1114584362714004e-06, "loss": 0.9670236706733704, "step": 343 }, { "epoch": 0.9899280575539569, "grad_norm": 0.3187089894926652, "learning_rate": 1.1067340216750666e-06, "loss": 0.9988418221473694, "step": 344 }, { "epoch": 0.9928057553956835, "grad_norm": 0.31077878018423455, "learning_rate": 1.1020071960694498e-06, "loss": 0.9381593465805054, "step": 345 }, { "epoch": 0.99568345323741, "grad_norm": 0.3019571122091896, "learning_rate": 1.0972780662285681e-06, "loss": 0.993405818939209, "step": 346 }, { "epoch": 0.9985611510791367, "grad_norm": 0.30414175155215467, "learning_rate": 1.0925467389784904e-06, "loss": 0.9964547753334045, "step": 347 }, { "epoch": 1.0, "grad_norm": 0.39789673725223623, "learning_rate": 1.0878133211949227e-06, "loss": 0.8202004432678223, "step": 348 }, { "epoch": 1.0028776978417266, "grad_norm": 0.2656098302983397, "learning_rate": 1.0830779198007942e-06, "loss": 0.9116101264953613, "step": 349 }, { "epoch": 1.0057553956834533, "grad_norm": 0.3008389282200421, "learning_rate": 1.0783406417638417e-06, "loss": 0.9478936791419983, "step": 350 }, { "epoch": 1.00863309352518, "grad_norm": 0.35871582917260014, "learning_rate": 1.0736015940941926e-06, "loss": 0.8595709800720215, "step": 351 }, { "epoch": 1.0115107913669066, "grad_norm": 0.31305380574483693, "learning_rate": 1.0688608838419494e-06, "loss": 0.8735829591751099, "step": 352 }, { "epoch": 1.014388489208633, "grad_norm": 0.3548820441522539, "learning_rate": 1.0641186180947708e-06, "loss": 0.9741727113723755, "step": 353 }, { "epoch": 1.0172661870503596, "grad_norm": 0.36321462939223775, "learning_rate": 1.059374903975451e-06, "loss": 0.8974572420120239, "step": 354 }, { "epoch": 1.0201438848920863, "grad_norm": 0.29496183204532933, "learning_rate": 1.0546298486395032e-06, "loss": 0.9210361242294312, "step": 355 }, { "epoch": 1.023021582733813, "grad_norm": 0.2928921022780455, "learning_rate": 1.0498835592727356e-06, "loss": 0.9430476427078247, "step": 356 }, { "epoch": 1.0258992805755396, "grad_norm": 0.3049343344878922, "learning_rate": 1.0451361430888335e-06, "loss": 0.861330509185791, "step": 357 }, { "epoch": 1.0287769784172662, "grad_norm": 0.329234989560513, "learning_rate": 1.0403877073269346e-06, "loss": 0.9548070430755615, "step": 358 }, { "epoch": 1.0316546762589929, "grad_norm": 0.2927660935027829, "learning_rate": 1.0356383592492083e-06, "loss": 0.9394206404685974, "step": 359 }, { "epoch": 1.0345323741007195, "grad_norm": 0.28029486425646316, "learning_rate": 1.0308882061384322e-06, "loss": 0.940388560295105, "step": 360 }, { "epoch": 1.037410071942446, "grad_norm": 0.3277189443862227, "learning_rate": 1.0261373552955689e-06, "loss": 1.0485488176345825, "step": 361 }, { "epoch": 1.0402877697841726, "grad_norm": 0.3336979554860064, "learning_rate": 1.021385914037341e-06, "loss": 1.006148338317871, "step": 362 }, { "epoch": 1.0431654676258992, "grad_norm": 0.3034246503039526, "learning_rate": 1.0166339896938096e-06, "loss": 1.040244460105896, "step": 363 }, { "epoch": 1.0460431654676259, "grad_norm": 0.2909854917188287, "learning_rate": 1.0118816896059472e-06, "loss": 1.0620298385620117, "step": 364 }, { "epoch": 1.0489208633093525, "grad_norm": 0.3342829322918414, "learning_rate": 1.0071291211232142e-06, "loss": 1.0369703769683838, "step": 365 }, { "epoch": 1.0517985611510792, "grad_norm": 0.3620459894772739, "learning_rate": 1.0023763916011337e-06, "loss": 1.005780816078186, "step": 366 }, { "epoch": 1.0546762589928058, "grad_norm": 0.3245799271587319, "learning_rate": 9.976236083988662e-07, "loss": 1.0978028774261475, "step": 367 }, { "epoch": 1.0575539568345325, "grad_norm": 0.2906620053944161, "learning_rate": 9.928708788767857e-07, "loss": 1.0192337036132812, "step": 368 }, { "epoch": 1.0604316546762589, "grad_norm": 0.2788940484645077, "learning_rate": 9.881183103940525e-07, "loss": 1.0336426496505737, "step": 369 }, { "epoch": 1.0633093525179855, "grad_norm": 0.2646864305544589, "learning_rate": 9.833660103061903e-07, "loss": 0.9359861612319946, "step": 370 }, { "epoch": 1.0661870503597122, "grad_norm": 0.3024535696584821, "learning_rate": 9.78614085962659e-07, "loss": 0.9596098065376282, "step": 371 }, { "epoch": 1.0690647482014388, "grad_norm": 0.3433854130018685, "learning_rate": 9.738626447044315e-07, "loss": 0.9648246169090271, "step": 372 }, { "epoch": 1.0719424460431655, "grad_norm": 0.29727053840895096, "learning_rate": 9.691117938615677e-07, "loss": 0.937362551689148, "step": 373 }, { "epoch": 1.074820143884892, "grad_norm": 0.3148473300114735, "learning_rate": 9.643616407507916e-07, "loss": 1.0278003215789795, "step": 374 }, { "epoch": 1.0776978417266188, "grad_norm": 0.34189586023197116, "learning_rate": 9.596122926730653e-07, "loss": 1.053139090538025, "step": 375 }, { "epoch": 1.0805755395683454, "grad_norm": 0.2979805327926045, "learning_rate": 9.548638569111664e-07, "loss": 0.9690728187561035, "step": 376 }, { "epoch": 1.083453237410072, "grad_norm": 0.32530063715847013, "learning_rate": 9.501164407272641e-07, "loss": 0.9638134837150574, "step": 377 }, { "epoch": 1.0863309352517985, "grad_norm": 0.28340436102152416, "learning_rate": 9.453701513604971e-07, "loss": 1.0154237747192383, "step": 378 }, { "epoch": 1.0892086330935251, "grad_norm": 0.3344518694998693, "learning_rate": 9.406250960245492e-07, "loss": 1.0023622512817383, "step": 379 }, { "epoch": 1.0920863309352518, "grad_norm": 0.3057560258941549, "learning_rate": 9.358813819052293e-07, "loss": 0.943859338760376, "step": 380 }, { "epoch": 1.0949640287769784, "grad_norm": 0.31581539413489, "learning_rate": 9.311391161580505e-07, "loss": 0.9440896511077881, "step": 381 }, { "epoch": 1.097841726618705, "grad_norm": 0.28253845985432785, "learning_rate": 9.263984059058073e-07, "loss": 0.9603610634803772, "step": 382 }, { "epoch": 1.1007194244604317, "grad_norm": 0.3743060929428334, "learning_rate": 9.216593582361584e-07, "loss": 0.9380893707275391, "step": 383 }, { "epoch": 1.1035971223021583, "grad_norm": 0.29782880129060824, "learning_rate": 9.169220801992054e-07, "loss": 0.9593515396118164, "step": 384 }, { "epoch": 1.106474820143885, "grad_norm": 0.25830569907542367, "learning_rate": 9.121866788050772e-07, "loss": 0.9210997819900513, "step": 385 }, { "epoch": 1.1093525179856114, "grad_norm": 0.29300460156324154, "learning_rate": 9.074532610215097e-07, "loss": 0.9233313798904419, "step": 386 }, { "epoch": 1.112230215827338, "grad_norm": 0.2922799068070249, "learning_rate": 9.027219337714323e-07, "loss": 0.9572200179100037, "step": 387 }, { "epoch": 1.1151079136690647, "grad_norm": 0.294905286599111, "learning_rate": 8.979928039305502e-07, "loss": 1.0306824445724487, "step": 388 }, { "epoch": 1.1179856115107913, "grad_norm": 0.3096443969458927, "learning_rate": 8.932659783249332e-07, "loss": 0.9011950492858887, "step": 389 }, { "epoch": 1.120863309352518, "grad_norm": 0.269705652072412, "learning_rate": 8.885415637285997e-07, "loss": 0.9103861451148987, "step": 390 }, { "epoch": 1.1237410071942446, "grad_norm": 0.3045551669252975, "learning_rate": 8.838196668611056e-07, "loss": 0.9500089883804321, "step": 391 }, { "epoch": 1.1266187050359713, "grad_norm": 0.3584458914570723, "learning_rate": 8.791003943851352e-07, "loss": 1.0625544786453247, "step": 392 }, { "epoch": 1.129496402877698, "grad_norm": 0.40204111666436204, "learning_rate": 8.743838529040896e-07, "loss": 1.0451273918151855, "step": 393 }, { "epoch": 1.1323741007194243, "grad_norm": 0.2801446258828758, "learning_rate": 8.696701489596796e-07, "loss": 0.8780025839805603, "step": 394 }, { "epoch": 1.135251798561151, "grad_norm": 0.30848025340500973, "learning_rate": 8.649593890295195e-07, "loss": 0.853165328502655, "step": 395 }, { "epoch": 1.1381294964028776, "grad_norm": 0.2774601092911939, "learning_rate": 8.602516795247212e-07, "loss": 0.997830331325531, "step": 396 }, { "epoch": 1.1410071942446043, "grad_norm": 0.3339281391141689, "learning_rate": 8.555471267874904e-07, "loss": 1.0442490577697754, "step": 397 }, { "epoch": 1.143884892086331, "grad_norm": 0.2740872772386324, "learning_rate": 8.508458370887254e-07, "loss": 0.9518193602561951, "step": 398 }, { "epoch": 1.1467625899280576, "grad_norm": 0.2720117171082711, "learning_rate": 8.461479166256155e-07, "loss": 0.8949469327926636, "step": 399 }, { "epoch": 1.1496402877697842, "grad_norm": 0.296704642333982, "learning_rate": 8.414534715192424e-07, "loss": 1.002563714981079, "step": 400 }, { "epoch": 1.1525179856115109, "grad_norm": 0.3435410162561758, "learning_rate": 8.367626078121836e-07, "loss": 0.972290575504303, "step": 401 }, { "epoch": 1.1553956834532375, "grad_norm": 0.3129884619450547, "learning_rate": 8.320754314661158e-07, "loss": 1.01462984085083, "step": 402 }, { "epoch": 1.158273381294964, "grad_norm": 0.30417813847501757, "learning_rate": 8.273920483594224e-07, "loss": 0.9698868989944458, "step": 403 }, { "epoch": 1.1611510791366906, "grad_norm": 0.3020411325830846, "learning_rate": 8.227125642848023e-07, "loss": 0.9128695726394653, "step": 404 }, { "epoch": 1.1640287769784172, "grad_norm": 0.346879261409609, "learning_rate": 8.180370849468783e-07, "loss": 0.9776325821876526, "step": 405 }, { "epoch": 1.1669064748201439, "grad_norm": 0.36045310471755976, "learning_rate": 8.133657159598107e-07, "loss": 1.0711374282836914, "step": 406 }, { "epoch": 1.1697841726618705, "grad_norm": 0.33405600358385434, "learning_rate": 8.086985628449118e-07, "loss": 1.01808500289917, "step": 407 }, { "epoch": 1.1726618705035972, "grad_norm": 0.33784058532809186, "learning_rate": 8.040357310282614e-07, "loss": 1.073177695274353, "step": 408 }, { "epoch": 1.1755395683453238, "grad_norm": 0.31432792819184735, "learning_rate": 7.993773258383251e-07, "loss": 0.9766973257064819, "step": 409 }, { "epoch": 1.1784172661870504, "grad_norm": 0.29143664503321964, "learning_rate": 7.94723452503577e-07, "loss": 0.8510106801986694, "step": 410 }, { "epoch": 1.181294964028777, "grad_norm": 0.3117754281019984, "learning_rate": 7.900742161501203e-07, "loss": 0.9605945348739624, "step": 411 }, { "epoch": 1.1841726618705035, "grad_norm": 0.3083610748282882, "learning_rate": 7.854297217993138e-07, "loss": 1.0148074626922607, "step": 412 }, { "epoch": 1.1870503597122302, "grad_norm": 0.3292067324035066, "learning_rate": 7.807900743654003e-07, "loss": 1.0121517181396484, "step": 413 }, { "epoch": 1.1899280575539568, "grad_norm": 0.2906819013001279, "learning_rate": 7.761553786531344e-07, "loss": 0.9553067684173584, "step": 414 }, { "epoch": 1.1928057553956835, "grad_norm": 0.25709992178123586, "learning_rate": 7.71525739355418e-07, "loss": 0.8994815945625305, "step": 415 }, { "epoch": 1.19568345323741, "grad_norm": 0.36452886143608954, "learning_rate": 7.669012610509332e-07, "loss": 0.953561544418335, "step": 416 }, { "epoch": 1.1985611510791367, "grad_norm": 0.3247835868196829, "learning_rate": 7.622820482017803e-07, "loss": 0.9593473076820374, "step": 417 }, { "epoch": 1.2014388489208634, "grad_norm": 0.2805164760032836, "learning_rate": 7.57668205151119e-07, "loss": 0.9459452629089355, "step": 418 }, { "epoch": 1.2043165467625898, "grad_norm": 0.28523487135593184, "learning_rate": 7.53059836120811e-07, "loss": 0.9797439575195312, "step": 419 }, { "epoch": 1.2071942446043165, "grad_norm": 0.3003875524590878, "learning_rate": 7.484570452090654e-07, "loss": 0.9212760925292969, "step": 420 }, { "epoch": 1.210071942446043, "grad_norm": 0.34132960956027913, "learning_rate": 7.438599363880873e-07, "loss": 1.0429980754852295, "step": 421 }, { "epoch": 1.2129496402877697, "grad_norm": 0.30371204747015557, "learning_rate": 7.3926861350173e-07, "loss": 0.9603173732757568, "step": 422 }, { "epoch": 1.2158273381294964, "grad_norm": 0.3162676331919284, "learning_rate": 7.346831802631485e-07, "loss": 1.010259985923767, "step": 423 }, { "epoch": 1.218705035971223, "grad_norm": 0.335784603934896, "learning_rate": 7.301037402524554e-07, "loss": 0.9941245913505554, "step": 424 }, { "epoch": 1.2215827338129497, "grad_norm": 0.34689945542903367, "learning_rate": 7.255303969143847e-07, "loss": 0.9076559543609619, "step": 425 }, { "epoch": 1.2244604316546763, "grad_norm": 0.30699564205037894, "learning_rate": 7.209632535559517e-07, "loss": 0.9393267035484314, "step": 426 }, { "epoch": 1.227338129496403, "grad_norm": 0.3645881875578552, "learning_rate": 7.164024133441209e-07, "loss": 1.0797785520553589, "step": 427 }, { "epoch": 1.2302158273381294, "grad_norm": 0.30842152383790683, "learning_rate": 7.118479793034757e-07, "loss": 0.8502181172370911, "step": 428 }, { "epoch": 1.233093525179856, "grad_norm": 0.2837918860269475, "learning_rate": 7.073000543138903e-07, "loss": 0.8781344294548035, "step": 429 }, { "epoch": 1.2359712230215827, "grad_norm": 0.30258666124852246, "learning_rate": 7.027587411082068e-07, "loss": 0.9787595272064209, "step": 430 }, { "epoch": 1.2388489208633093, "grad_norm": 0.32255457306195484, "learning_rate": 6.98224142269914e-07, "loss": 1.0447101593017578, "step": 431 }, { "epoch": 1.241726618705036, "grad_norm": 0.2842145651170118, "learning_rate": 6.936963602308296e-07, "loss": 0.8477309942245483, "step": 432 }, { "epoch": 1.2446043165467626, "grad_norm": 0.2857078174523759, "learning_rate": 6.891754972687872e-07, "loss": 0.973019003868103, "step": 433 }, { "epoch": 1.2474820143884893, "grad_norm": 0.26675519419868937, "learning_rate": 6.846616555053265e-07, "loss": 0.8788484930992126, "step": 434 }, { "epoch": 1.2503597122302157, "grad_norm": 0.2722315460093143, "learning_rate": 6.80154936903385e-07, "loss": 0.8963809013366699, "step": 435 }, { "epoch": 1.2532374100719426, "grad_norm": 0.321353554344839, "learning_rate": 6.756554432649952e-07, "loss": 0.9304237365722656, "step": 436 }, { "epoch": 1.256115107913669, "grad_norm": 0.8802677887155806, "learning_rate": 6.711632762289863e-07, "loss": 0.9569498300552368, "step": 437 }, { "epoch": 1.2589928057553956, "grad_norm": 0.3027788818991336, "learning_rate": 6.666785372686862e-07, "loss": 0.9950339198112488, "step": 438 }, { "epoch": 1.2618705035971223, "grad_norm": 0.3278298564206992, "learning_rate": 6.622013276896309e-07, "loss": 1.0428767204284668, "step": 439 }, { "epoch": 1.264748201438849, "grad_norm": 0.322273469382939, "learning_rate": 6.577317486272756e-07, "loss": 1.0519962310791016, "step": 440 }, { "epoch": 1.2676258992805756, "grad_norm": 0.27000899918490673, "learning_rate": 6.5326990104471e-07, "loss": 0.9430403709411621, "step": 441 }, { "epoch": 1.2705035971223022, "grad_norm": 0.2733597667465118, "learning_rate": 6.488158857303778e-07, "loss": 0.8923604488372803, "step": 442 }, { "epoch": 1.2733812949640289, "grad_norm": 0.3379057723615061, "learning_rate": 6.443698032958003e-07, "loss": 0.8423130512237549, "step": 443 }, { "epoch": 1.2762589928057553, "grad_norm": 0.298352488485131, "learning_rate": 6.399317541733029e-07, "loss": 0.8984063863754272, "step": 444 }, { "epoch": 1.2791366906474821, "grad_norm": 0.3079552906979132, "learning_rate": 6.355018386137474e-07, "loss": 1.0057708024978638, "step": 445 }, { "epoch": 1.2820143884892086, "grad_norm": 0.3494450738277216, "learning_rate": 6.310801566842671e-07, "loss": 1.0255926847457886, "step": 446 }, { "epoch": 1.2848920863309352, "grad_norm": 0.32073469498291907, "learning_rate": 6.266668082660051e-07, "loss": 0.9159607291221619, "step": 447 }, { "epoch": 1.2877697841726619, "grad_norm": 0.30277181047008334, "learning_rate": 6.222618930518604e-07, "loss": 0.9396940469741821, "step": 448 }, { "epoch": 1.2906474820143885, "grad_norm": 0.2996729716461448, "learning_rate": 6.178655105442347e-07, "loss": 0.9432433247566223, "step": 449 }, { "epoch": 1.2935251798561151, "grad_norm": 0.31105342806959047, "learning_rate": 6.134777600527845e-07, "loss": 0.963239848613739, "step": 450 }, { "epoch": 1.2964028776978418, "grad_norm": 0.30886949969557964, "learning_rate": 6.09098740692178e-07, "loss": 0.8668818473815918, "step": 451 }, { "epoch": 1.2992805755395684, "grad_norm": 0.34250157527572056, "learning_rate": 6.047285513798568e-07, "loss": 0.9877142310142517, "step": 452 }, { "epoch": 1.3021582733812949, "grad_norm": 0.3321433510552703, "learning_rate": 6.003672908338008e-07, "loss": 1.1121788024902344, "step": 453 }, { "epoch": 1.3050359712230215, "grad_norm": 0.34432256224945607, "learning_rate": 5.96015057570298e-07, "loss": 0.9205185770988464, "step": 454 }, { "epoch": 1.3079136690647482, "grad_norm": 0.3129154481448231, "learning_rate": 5.916719499017206e-07, "loss": 0.9529520869255066, "step": 455 }, { "epoch": 1.3107913669064748, "grad_norm": 0.3072358733262211, "learning_rate": 5.873380659343021e-07, "loss": 0.8947219252586365, "step": 456 }, { "epoch": 1.3136690647482014, "grad_norm": 0.2689806016977398, "learning_rate": 5.83013503565923e-07, "loss": 0.8450409173965454, "step": 457 }, { "epoch": 1.316546762589928, "grad_norm": 0.30257611336308615, "learning_rate": 5.786983604838983e-07, "loss": 0.9042650461196899, "step": 458 }, { "epoch": 1.3194244604316547, "grad_norm": 0.30536933730041105, "learning_rate": 5.743927341627722e-07, "loss": 0.9189790487289429, "step": 459 }, { "epoch": 1.3223021582733812, "grad_norm": 0.3116994635317501, "learning_rate": 5.700967218621133e-07, "loss": 0.9711490869522095, "step": 460 }, { "epoch": 1.325179856115108, "grad_norm": 0.29567216593755763, "learning_rate": 5.658104206243221e-07, "loss": 0.9121512174606323, "step": 461 }, { "epoch": 1.3280575539568344, "grad_norm": 0.33097880600820834, "learning_rate": 5.615339272724337e-07, "loss": 1.0232415199279785, "step": 462 }, { "epoch": 1.330935251798561, "grad_norm": 0.30421121109984933, "learning_rate": 5.572673384079361e-07, "loss": 0.9682353734970093, "step": 463 }, { "epoch": 1.3338129496402877, "grad_norm": 0.2989421078796733, "learning_rate": 5.530107504085829e-07, "loss": 1.0788567066192627, "step": 464 }, { "epoch": 1.3366906474820144, "grad_norm": 0.3368608300644779, "learning_rate": 5.487642594262203e-07, "loss": 1.0391610860824585, "step": 465 }, { "epoch": 1.339568345323741, "grad_norm": 0.31237915616425, "learning_rate": 5.445279613846132e-07, "loss": 0.97783362865448, "step": 466 }, { "epoch": 1.3424460431654677, "grad_norm": 0.2917517042157253, "learning_rate": 5.40301951977278e-07, "loss": 0.9356849193572998, "step": 467 }, { "epoch": 1.3453237410071943, "grad_norm": 0.2906559333604664, "learning_rate": 5.360863266653227e-07, "loss": 0.9209206104278564, "step": 468 }, { "epoch": 1.3482014388489207, "grad_norm": 0.2762311855557999, "learning_rate": 5.318811806752883e-07, "loss": 0.8892006278038025, "step": 469 }, { "epoch": 1.3510791366906476, "grad_norm": 0.2843773069845965, "learning_rate": 5.276866089970004e-07, "loss": 0.8870881795883179, "step": 470 }, { "epoch": 1.353956834532374, "grad_norm": 0.3214966447300032, "learning_rate": 5.235027063814204e-07, "loss": 1.05729341506958, "step": 471 }, { "epoch": 1.3568345323741007, "grad_norm": 0.29929162633018896, "learning_rate": 5.193295673385081e-07, "loss": 0.9966158866882324, "step": 472 }, { "epoch": 1.3597122302158273, "grad_norm": 0.2845609225335763, "learning_rate": 5.151672861350849e-07, "loss": 0.983919084072113, "step": 473 }, { "epoch": 1.362589928057554, "grad_norm": 0.2872905985027109, "learning_rate": 5.110159567927056e-07, "loss": 0.9776226282119751, "step": 474 }, { "epoch": 1.3654676258992806, "grad_norm": 0.3004471218975015, "learning_rate": 5.068756730855328e-07, "loss": 0.8701659440994263, "step": 475 }, { "epoch": 1.3683453237410073, "grad_norm": 0.27295033239262645, "learning_rate": 5.027465285382213e-07, "loss": 0.8881811499595642, "step": 476 }, { "epoch": 1.371223021582734, "grad_norm": 0.2943944847225173, "learning_rate": 4.986286164238025e-07, "loss": 0.8865438103675842, "step": 477 }, { "epoch": 1.3741007194244603, "grad_norm": 0.2867589741491455, "learning_rate": 4.945220297615805e-07, "loss": 0.9757734537124634, "step": 478 }, { "epoch": 1.376978417266187, "grad_norm": 0.30445366971206, "learning_rate": 4.904268613150278e-07, "loss": 0.9451441764831543, "step": 479 }, { "epoch": 1.3798561151079136, "grad_norm": 0.3764482983471657, "learning_rate": 4.863432035896924e-07, "loss": 1.0263563394546509, "step": 480 }, { "epoch": 1.3827338129496403, "grad_norm": 0.28674275025350787, "learning_rate": 4.822711488311076e-07, "loss": 0.9233589768409729, "step": 481 }, { "epoch": 1.385611510791367, "grad_norm": 0.28357755576379123, "learning_rate": 4.782107890227065e-07, "loss": 0.8945414423942566, "step": 482 }, { "epoch": 1.3884892086330936, "grad_norm": 0.3296678488028909, "learning_rate": 4.7416221588374695e-07, "loss": 0.92512047290802, "step": 483 }, { "epoch": 1.3913669064748202, "grad_norm": 0.34335013571463424, "learning_rate": 4.701255208672371e-07, "loss": 0.8945969343185425, "step": 484 }, { "epoch": 1.3942446043165468, "grad_norm": 0.2681643725763488, "learning_rate": 4.6610079515787217e-07, "loss": 0.8868216872215271, "step": 485 }, { "epoch": 1.3971223021582735, "grad_norm": 0.29646359252343524, "learning_rate": 4.620881296699718e-07, "loss": 0.8830418586730957, "step": 486 }, { "epoch": 1.4, "grad_norm": 0.34972600472566895, "learning_rate": 4.5808761504542915e-07, "loss": 1.0035524368286133, "step": 487 }, { "epoch": 1.4028776978417266, "grad_norm": 0.3131507468407678, "learning_rate": 4.5409934165166174e-07, "loss": 0.9483344554901123, "step": 488 }, { "epoch": 1.4057553956834532, "grad_norm": 0.3077131853121991, "learning_rate": 4.501233995795708e-07, "loss": 1.042191982269287, "step": 489 }, { "epoch": 1.4086330935251798, "grad_norm": 0.30297924853409636, "learning_rate": 4.4615987864150517e-07, "loss": 0.9574159383773804, "step": 490 }, { "epoch": 1.4115107913669065, "grad_norm": 0.3101670489465847, "learning_rate": 4.4220886836923443e-07, "loss": 0.9550837874412537, "step": 491 }, { "epoch": 1.4143884892086331, "grad_norm": 0.3028518200998954, "learning_rate": 4.382704580119242e-07, "loss": 0.9465584754943848, "step": 492 }, { "epoch": 1.4172661870503598, "grad_norm": 0.3563033540312973, "learning_rate": 4.343447365341225e-07, "loss": 1.0004384517669678, "step": 493 }, { "epoch": 1.4201438848920862, "grad_norm": 0.33396685382103786, "learning_rate": 4.3043179261374775e-07, "loss": 0.9882891774177551, "step": 494 }, { "epoch": 1.423021582733813, "grad_norm": 0.30772910816683374, "learning_rate": 4.265317146400876e-07, "loss": 0.9874916672706604, "step": 495 }, { "epoch": 1.4258992805755395, "grad_norm": 0.30524559446834776, "learning_rate": 4.226445907118018e-07, "loss": 0.9210883378982544, "step": 496 }, { "epoch": 1.4287769784172661, "grad_norm": 0.3267114002295349, "learning_rate": 4.1877050863493037e-07, "loss": 0.9688763618469238, "step": 497 }, { "epoch": 1.4316546762589928, "grad_norm": 0.27339720775223914, "learning_rate": 4.1490955592091325e-07, "loss": 0.8747698068618774, "step": 498 }, { "epoch": 1.4345323741007194, "grad_norm": 0.28341881156979953, "learning_rate": 4.110618197846105e-07, "loss": 0.9002431035041809, "step": 499 }, { "epoch": 1.437410071942446, "grad_norm": 0.3157537655118046, "learning_rate": 4.0722738714233475e-07, "loss": 0.9333710074424744, "step": 500 }, { "epoch": 1.4402877697841727, "grad_norm": 0.3385136686246348, "learning_rate": 4.0340634460988634e-07, "loss": 0.9397541284561157, "step": 501 }, { "epoch": 1.4431654676258994, "grad_norm": 0.2799609416561718, "learning_rate": 3.9959877850059654e-07, "loss": 0.9181256890296936, "step": 502 }, { "epoch": 1.4460431654676258, "grad_norm": 0.32743527244615317, "learning_rate": 3.958047748233789e-07, "loss": 0.9613093733787537, "step": 503 }, { "epoch": 1.4489208633093524, "grad_norm": 0.31262261972142885, "learning_rate": 3.920244192807864e-07, "loss": 1.006971836090088, "step": 504 }, { "epoch": 1.451798561151079, "grad_norm": 0.2756374429613347, "learning_rate": 3.8825779726707363e-07, "loss": 0.9426612854003906, "step": 505 }, { "epoch": 1.4546762589928057, "grad_norm": 0.31900967384513096, "learning_rate": 3.845049938662709e-07, "loss": 1.0267070531845093, "step": 506 }, { "epoch": 1.4575539568345324, "grad_norm": 0.2806375956545195, "learning_rate": 3.807660938502588e-07, "loss": 0.8537903428077698, "step": 507 }, { "epoch": 1.460431654676259, "grad_norm": 0.2679665420161734, "learning_rate": 3.770411816768567e-07, "loss": 0.8869454860687256, "step": 508 }, { "epoch": 1.4633093525179857, "grad_norm": 0.2940220938489727, "learning_rate": 3.733303414879121e-07, "loss": 0.9901649951934814, "step": 509 }, { "epoch": 1.4661870503597123, "grad_norm": 0.3002023454804642, "learning_rate": 3.696336571074019e-07, "loss": 0.893314003944397, "step": 510 }, { "epoch": 1.469064748201439, "grad_norm": 0.2989449706350416, "learning_rate": 3.659512120395384e-07, "loss": 0.9651301503181458, "step": 511 }, { "epoch": 1.4719424460431654, "grad_norm": 0.31184751443962444, "learning_rate": 3.6228308946688156e-07, "loss": 0.9276424646377563, "step": 512 }, { "epoch": 1.474820143884892, "grad_norm": 0.3280970794871676, "learning_rate": 3.586293722484628e-07, "loss": 0.9295877814292908, "step": 513 }, { "epoch": 1.4776978417266187, "grad_norm": 0.30361568371256686, "learning_rate": 3.549901429179103e-07, "loss": 0.9780776500701904, "step": 514 }, { "epoch": 1.4805755395683453, "grad_norm": 0.31276714037726877, "learning_rate": 3.513654836815871e-07, "loss": 0.9611223936080933, "step": 515 }, { "epoch": 1.483453237410072, "grad_norm": 0.29813652177832717, "learning_rate": 3.477554764167333e-07, "loss": 0.9399750232696533, "step": 516 }, { "epoch": 1.4863309352517986, "grad_norm": 0.2879175131540814, "learning_rate": 3.44160202669615e-07, "loss": 0.9704416990280151, "step": 517 }, { "epoch": 1.4892086330935252, "grad_norm": 0.27203404497254424, "learning_rate": 3.4057974365368494e-07, "loss": 0.9447322487831116, "step": 518 }, { "epoch": 1.4920863309352517, "grad_norm": 0.3345564889146381, "learning_rate": 3.3701418024774654e-07, "loss": 0.9653472900390625, "step": 519 }, { "epoch": 1.4949640287769785, "grad_norm": 0.3592025624857194, "learning_rate": 3.334635929941262e-07, "loss": 1.0416182279586792, "step": 520 }, { "epoch": 1.497841726618705, "grad_norm": 0.3021027064210215, "learning_rate": 3.29928062096856e-07, "loss": 0.8763036727905273, "step": 521 }, { "epoch": 1.5007194244604316, "grad_norm": 0.3300914239445515, "learning_rate": 3.264076674198594e-07, "loss": 0.9283760786056519, "step": 522 }, { "epoch": 1.5035971223021583, "grad_norm": 0.30648914339336775, "learning_rate": 3.229024884851499e-07, "loss": 1.0218451023101807, "step": 523 }, { "epoch": 1.506474820143885, "grad_norm": 0.3028904431521258, "learning_rate": 3.1941260447103226e-07, "loss": 0.9676252603530884, "step": 524 }, { "epoch": 1.5093525179856115, "grad_norm": 0.31498989874096944, "learning_rate": 3.159380942103158e-07, "loss": 1.0615897178649902, "step": 525 }, { "epoch": 1.512230215827338, "grad_norm": 0.3812643080356087, "learning_rate": 3.1247903618853323e-07, "loss": 1.0494942665100098, "step": 526 }, { "epoch": 1.5151079136690648, "grad_norm": 0.3015637470031748, "learning_rate": 3.0903550854216597e-07, "loss": 0.9968015551567078, "step": 527 }, { "epoch": 1.5179856115107913, "grad_norm": 0.31980913910153197, "learning_rate": 3.0560758905688243e-07, "loss": 0.8924911022186279, "step": 528 }, { "epoch": 1.5208633093525181, "grad_norm": 0.3219645985844337, "learning_rate": 3.021953551657773e-07, "loss": 0.9802518486976624, "step": 529 }, { "epoch": 1.5237410071942445, "grad_norm": 0.32235606474899925, "learning_rate": 2.9879888394762576e-07, "loss": 1.0681138038635254, "step": 530 }, { "epoch": 1.5266187050359712, "grad_norm": 0.29753869212587086, "learning_rate": 2.9541825212514006e-07, "loss": 1.0018823146820068, "step": 531 }, { "epoch": 1.5294964028776978, "grad_norm": 0.32215332495706295, "learning_rate": 2.920535360632368e-07, "loss": 1.0154387950897217, "step": 532 }, { "epoch": 1.5323741007194245, "grad_norm": 0.33197651591712296, "learning_rate": 2.8870481176731287e-07, "loss": 1.0280838012695312, "step": 533 }, { "epoch": 1.5352517985611511, "grad_norm": 0.31343044639526885, "learning_rate": 2.853721548815283e-07, "loss": 1.0017954111099243, "step": 534 }, { "epoch": 1.5381294964028775, "grad_norm": 0.3073520760945164, "learning_rate": 2.8205564068709596e-07, "loss": 0.9721263647079468, "step": 535 }, { "epoch": 1.5410071942446044, "grad_norm": 0.27794918211270764, "learning_rate": 2.787553441005839e-07, "loss": 0.8929443955421448, "step": 536 }, { "epoch": 1.5438848920863308, "grad_norm": 0.36462602887473333, "learning_rate": 2.754713396722198e-07, "loss": 0.8837905526161194, "step": 537 }, { "epoch": 1.5467625899280577, "grad_norm": 0.2590201800878289, "learning_rate": 2.7220370158421026e-07, "loss": 0.9194425344467163, "step": 538 }, { "epoch": 1.5496402877697841, "grad_norm": 0.29622142248800076, "learning_rate": 2.68952503649062e-07, "loss": 0.9988787174224854, "step": 539 }, { "epoch": 1.5525179856115108, "grad_norm": 0.3126140379352631, "learning_rate": 2.65717819307917e-07, "loss": 0.9965845942497253, "step": 540 }, { "epoch": 1.5553956834532374, "grad_norm": 0.3415760999590957, "learning_rate": 2.6249972162889244e-07, "loss": 1.0078110694885254, "step": 541 }, { "epoch": 1.558273381294964, "grad_norm": 0.3369977516727391, "learning_rate": 2.5929828330542935e-07, "loss": 1.0197123289108276, "step": 542 }, { "epoch": 1.5611510791366907, "grad_norm": 0.32678249950063587, "learning_rate": 2.561135766546525e-07, "loss": 0.9625093936920166, "step": 543 }, { "epoch": 1.5640287769784171, "grad_norm": 0.3014751724857403, "learning_rate": 2.529456736157356e-07, "loss": 0.8934499621391296, "step": 544 }, { "epoch": 1.566906474820144, "grad_norm": 0.27876968496773213, "learning_rate": 2.4979464574827555e-07, "loss": 0.8468393087387085, "step": 545 }, { "epoch": 1.5697841726618704, "grad_norm": 0.2757645778822954, "learning_rate": 2.4666056423067825e-07, "loss": 0.9273233413696289, "step": 546 }, { "epoch": 1.572661870503597, "grad_norm": 0.313799523824532, "learning_rate": 2.4354349985854795e-07, "loss": 0.9406145811080933, "step": 547 }, { "epoch": 1.5755395683453237, "grad_norm": 0.3175009917256148, "learning_rate": 2.4044352304309044e-07, "loss": 0.9628616571426392, "step": 548 }, { "epoch": 1.5784172661870504, "grad_norm": 0.27743168737673624, "learning_rate": 2.3736070380952165e-07, "loss": 0.9222140312194824, "step": 549 }, { "epoch": 1.581294964028777, "grad_norm": 0.2851178701079908, "learning_rate": 2.34295111795485e-07, "loss": 1.0274484157562256, "step": 550 }, { "epoch": 1.5841726618705037, "grad_norm": 0.38515099715665085, "learning_rate": 2.3124681624948006e-07, "loss": 1.0696377754211426, "step": 551 }, { "epoch": 1.5870503597122303, "grad_norm": 0.3133249799626146, "learning_rate": 2.2821588602929632e-07, "loss": 0.9214944839477539, "step": 552 }, { "epoch": 1.5899280575539567, "grad_norm": 0.3307355043232978, "learning_rate": 2.252023896004601e-07, "loss": 0.8194340467453003, "step": 553 }, { "epoch": 1.5928057553956836, "grad_norm": 0.2899499890420293, "learning_rate": 2.2220639503468542e-07, "loss": 0.9256591796875, "step": 554 }, { "epoch": 1.59568345323741, "grad_norm": 0.3059563087954579, "learning_rate": 2.192279700083385e-07, "loss": 0.9171819686889648, "step": 555 }, { "epoch": 1.5985611510791367, "grad_norm": 0.2910581911029511, "learning_rate": 2.162671818009082e-07, "loss": 0.8385239839553833, "step": 556 }, { "epoch": 1.6014388489208633, "grad_norm": 0.3077283177341958, "learning_rate": 2.133240972934852e-07, "loss": 0.9529489278793335, "step": 557 }, { "epoch": 1.60431654676259, "grad_norm": 0.33438792027940295, "learning_rate": 2.1039878296725332e-07, "loss": 0.9289690256118774, "step": 558 }, { "epoch": 1.6071942446043166, "grad_norm": 0.33957986792953665, "learning_rate": 2.0749130490198652e-07, "loss": 1.0393140316009521, "step": 559 }, { "epoch": 1.610071942446043, "grad_norm": 0.34246531637554406, "learning_rate": 2.046017287745554e-07, "loss": 1.0560503005981445, "step": 560 }, { "epoch": 1.6129496402877699, "grad_norm": 0.33305609974874595, "learning_rate": 2.0173011985744603e-07, "loss": 0.9776498079299927, "step": 561 }, { "epoch": 1.6158273381294963, "grad_norm": 0.33804380816718965, "learning_rate": 1.9887654301728286e-07, "loss": 0.9953919053077698, "step": 562 }, { "epoch": 1.6187050359712232, "grad_norm": 0.33510637626675815, "learning_rate": 1.960410627133654e-07, "loss": 0.9703421592712402, "step": 563 }, { "epoch": 1.6215827338129496, "grad_norm": 0.3528056926245532, "learning_rate": 1.9322374299621157e-07, "loss": 0.986659824848175, "step": 564 }, { "epoch": 1.6244604316546762, "grad_norm": 0.3190776044982604, "learning_rate": 1.9042464750610987e-07, "loss": 1.047306776046753, "step": 565 }, { "epoch": 1.6273381294964029, "grad_norm": 0.32915934915976663, "learning_rate": 1.8764383947168383e-07, "loss": 0.9910968542098999, "step": 566 }, { "epoch": 1.6302158273381295, "grad_norm": 0.2786176626211202, "learning_rate": 1.8488138170846146e-07, "loss": 0.9353040456771851, "step": 567 }, { "epoch": 1.6330935251798562, "grad_norm": 0.31795714142791043, "learning_rate": 1.8213733661745855e-07, "loss": 0.9244099259376526, "step": 568 }, { "epoch": 1.6359712230215826, "grad_norm": 0.313138188279038, "learning_rate": 1.7941176618376686e-07, "loss": 0.9275581240653992, "step": 569 }, { "epoch": 1.6388489208633095, "grad_norm": 0.2983504345513285, "learning_rate": 1.767047319751559e-07, "loss": 1.0571556091308594, "step": 570 }, { "epoch": 1.641726618705036, "grad_norm": 0.2900247015191147, "learning_rate": 1.7401629514068116e-07, "loss": 0.8571426868438721, "step": 571 }, { "epoch": 1.6446043165467625, "grad_norm": 0.32073923970300267, "learning_rate": 1.713465164093031e-07, "loss": 0.962169349193573, "step": 572 }, { "epoch": 1.6474820143884892, "grad_norm": 0.3426036390922273, "learning_rate": 1.6869545608851465e-07, "loss": 1.068155288696289, "step": 573 }, { "epoch": 1.6503597122302158, "grad_norm": 0.2912198669008646, "learning_rate": 1.6606317406298044e-07, "loss": 0.9102658033370972, "step": 574 }, { "epoch": 1.6532374100719425, "grad_norm": 0.2783664324123141, "learning_rate": 1.6344972979318227e-07, "loss": 0.863929033279419, "step": 575 }, { "epoch": 1.6561151079136691, "grad_norm": 0.3166046506897087, "learning_rate": 1.608551823140778e-07, "loss": 0.9644492268562317, "step": 576 }, { "epoch": 1.6589928057553958, "grad_norm": 0.3032692441409985, "learning_rate": 1.5827959023376503e-07, "loss": 0.9762970209121704, "step": 577 }, { "epoch": 1.6618705035971222, "grad_norm": 0.3016744079808176, "learning_rate": 1.5572301173216018e-07, "loss": 0.9287898540496826, "step": 578 }, { "epoch": 1.664748201438849, "grad_norm": 0.27431423825024975, "learning_rate": 1.5318550455968282e-07, "loss": 0.9573485851287842, "step": 579 }, { "epoch": 1.6676258992805755, "grad_norm": 0.3087425552754159, "learning_rate": 1.5066712603595045e-07, "loss": 0.9209293127059937, "step": 580 }, { "epoch": 1.6705035971223021, "grad_norm": 0.2899753485027158, "learning_rate": 1.481679330484854e-07, "loss": 0.8685443997383118, "step": 581 }, { "epoch": 1.6733812949640288, "grad_norm": 0.30804307167476375, "learning_rate": 1.4568798205142818e-07, "loss": 1.0030007362365723, "step": 582 }, { "epoch": 1.6762589928057554, "grad_norm": 0.3269639712901895, "learning_rate": 1.4322732906426361e-07, "loss": 0.9561444520950317, "step": 583 }, { "epoch": 1.679136690647482, "grad_norm": 0.24889504344208443, "learning_rate": 1.407860296705542e-07, "loss": 0.9087004661560059, "step": 584 }, { "epoch": 1.6820143884892085, "grad_norm": 0.3206254281376367, "learning_rate": 1.3836413901668563e-07, "loss": 1.040391206741333, "step": 585 }, { "epoch": 1.6848920863309353, "grad_norm": 0.35576899441592297, "learning_rate": 1.359617118106202e-07, "loss": 0.9220665097236633, "step": 586 }, { "epoch": 1.6877697841726618, "grad_norm": 0.33992161289171674, "learning_rate": 1.3357880232066188e-07, "loss": 1.0048316717147827, "step": 587 }, { "epoch": 1.6906474820143886, "grad_norm": 0.3155381507693763, "learning_rate": 1.3121546437422915e-07, "loss": 0.9235495924949646, "step": 588 }, { "epoch": 1.693525179856115, "grad_norm": 0.2712504024495115, "learning_rate": 1.2887175135664085e-07, "loss": 0.9516848921775818, "step": 589 }, { "epoch": 1.6964028776978417, "grad_norm": 0.2729229399526208, "learning_rate": 1.2654771620990845e-07, "loss": 0.8648025989532471, "step": 590 }, { "epoch": 1.6992805755395683, "grad_norm": 0.28643485819645237, "learning_rate": 1.242434114315417e-07, "loss": 0.9381082057952881, "step": 591 }, { "epoch": 1.702158273381295, "grad_norm": 0.3583701644093793, "learning_rate": 1.219588890733616e-07, "loss": 0.9976767301559448, "step": 592 }, { "epoch": 1.7050359712230216, "grad_norm": 0.3058650158564105, "learning_rate": 1.1969420074032532e-07, "loss": 0.9010272026062012, "step": 593 }, { "epoch": 1.707913669064748, "grad_norm": 0.3079535255773853, "learning_rate": 1.1744939758936045e-07, "loss": 0.9119488000869751, "step": 594 }, { "epoch": 1.710791366906475, "grad_norm": 0.3112816376998103, "learning_rate": 1.1522453032820867e-07, "loss": 1.0138704776763916, "step": 595 }, { "epoch": 1.7136690647482014, "grad_norm": 0.37249689212589393, "learning_rate": 1.1301964921428164e-07, "loss": 1.0309240818023682, "step": 596 }, { "epoch": 1.7165467625899282, "grad_norm": 0.2919396976165667, "learning_rate": 1.1083480405352419e-07, "loss": 1.02201509475708, "step": 597 }, { "epoch": 1.7194244604316546, "grad_norm": 0.28931756763377436, "learning_rate": 1.086700441992906e-07, "loss": 0.9084526896476746, "step": 598 }, { "epoch": 1.7223021582733813, "grad_norm": 0.29523211564831336, "learning_rate": 1.0652541855122888e-07, "loss": 1.0073999166488647, "step": 599 }, { "epoch": 1.725179856115108, "grad_norm": 0.3072460132576141, "learning_rate": 1.044009755541766e-07, "loss": 0.9768160581588745, "step": 600 }, { "epoch": 1.7280575539568346, "grad_norm": 0.3021099960297105, "learning_rate": 1.0229676319706671e-07, "loss": 1.0317999124526978, "step": 601 }, { "epoch": 1.7309352517985612, "grad_norm": 0.32444961570494624, "learning_rate": 1.0021282901184314e-07, "loss": 0.953796923160553, "step": 602 }, { "epoch": 1.7338129496402876, "grad_norm": 0.31997858537370105, "learning_rate": 9.814922007238691e-08, "loss": 0.8879704475402832, "step": 603 }, { "epoch": 1.7366906474820145, "grad_norm": 0.3001763106646087, "learning_rate": 9.610598299345363e-08, "loss": 0.9384487271308899, "step": 604 }, { "epoch": 1.739568345323741, "grad_norm": 0.3076940778037735, "learning_rate": 9.408316392961946e-08, "loss": 0.8896828889846802, "step": 605 }, { "epoch": 1.7424460431654676, "grad_norm": 0.30206497856260484, "learning_rate": 9.208080857423983e-08, "loss": 0.991470456123352, "step": 606 }, { "epoch": 1.7453237410071942, "grad_norm": 0.31112092266827757, "learning_rate": 9.009896215841561e-08, "loss": 0.9205191135406494, "step": 607 }, { "epoch": 1.7482014388489209, "grad_norm": 0.34267209965236567, "learning_rate": 8.813766944997292e-08, "loss": 0.9923685789108276, "step": 608 }, { "epoch": 1.7510791366906475, "grad_norm": 0.2775856465253116, "learning_rate": 8.619697475245135e-08, "loss": 0.906508207321167, "step": 609 }, { "epoch": 1.753956834532374, "grad_norm": 0.3695265847423868, "learning_rate": 8.427692190410252e-08, "loss": 1.1132643222808838, "step": 610 }, { "epoch": 1.7568345323741008, "grad_norm": 0.335951419865602, "learning_rate": 8.237755427690097e-08, "loss": 0.958720326423645, "step": 611 }, { "epoch": 1.7597122302158272, "grad_norm": 0.26398237511432854, "learning_rate": 8.049891477556325e-08, "loss": 0.8326461315155029, "step": 612 }, { "epoch": 1.762589928057554, "grad_norm": 0.29113012627846874, "learning_rate": 7.864104583657994e-08, "loss": 0.92642742395401, "step": 613 }, { "epoch": 1.7654676258992805, "grad_norm": 0.32510114636650206, "learning_rate": 7.680398942725607e-08, "loss": 1.0428296327590942, "step": 614 }, { "epoch": 1.7683453237410072, "grad_norm": 0.3401761886813798, "learning_rate": 7.498778704476372e-08, "loss": 1.0307958126068115, "step": 615 }, { "epoch": 1.7712230215827338, "grad_norm": 0.31568033622101, "learning_rate": 7.319247971520426e-08, "loss": 0.922683835029602, "step": 616 }, { "epoch": 1.7741007194244605, "grad_norm": 0.2903936993592098, "learning_rate": 7.141810799268222e-08, "loss": 0.8616385459899902, "step": 617 }, { "epoch": 1.776978417266187, "grad_norm": 0.28996044598452053, "learning_rate": 6.966471195838807e-08, "loss": 0.9075828194618225, "step": 618 }, { "epoch": 1.7798561151079135, "grad_norm": 0.30488859521834044, "learning_rate": 6.793233121969422e-08, "loss": 0.9607424736022949, "step": 619 }, { "epoch": 1.7827338129496404, "grad_norm": 0.29464217851633684, "learning_rate": 6.622100490925919e-08, "loss": 0.9187620878219604, "step": 620 }, { "epoch": 1.7856115107913668, "grad_norm": 0.316636600630024, "learning_rate": 6.453077168414455e-08, "loss": 0.9384863972663879, "step": 621 }, { "epoch": 1.7884892086330937, "grad_norm": 0.3042112992195363, "learning_rate": 6.286166972494079e-08, "loss": 0.9122720956802368, "step": 622 }, { "epoch": 1.79136690647482, "grad_norm": 0.2799449479478083, "learning_rate": 6.121373673490548e-08, "loss": 0.9125893712043762, "step": 623 }, { "epoch": 1.7942446043165468, "grad_norm": 0.287767507487325, "learning_rate": 5.958700993911192e-08, "loss": 0.8173254132270813, "step": 624 }, { "epoch": 1.7971223021582734, "grad_norm": 0.2716254786056972, "learning_rate": 5.798152608360696e-08, "loss": 0.9122398495674133, "step": 625 }, { "epoch": 1.8, "grad_norm": 0.31952149094868726, "learning_rate": 5.6397321434582534e-08, "loss": 1.0111041069030762, "step": 626 }, { "epoch": 1.8028776978417267, "grad_norm": 0.27995436477971375, "learning_rate": 5.483443177755498e-08, "loss": 0.9707604646682739, "step": 627 }, { "epoch": 1.8057553956834531, "grad_norm": 0.3243898181487502, "learning_rate": 5.32928924165581e-08, "loss": 1.0739054679870605, "step": 628 }, { "epoch": 1.80863309352518, "grad_norm": 0.2984254760085138, "learning_rate": 5.177273817334438e-08, "loss": 0.9249017238616943, "step": 629 }, { "epoch": 1.8115107913669064, "grad_norm": 0.3146364757154643, "learning_rate": 5.027400338659926e-08, "loss": 0.9324784278869629, "step": 630 }, { "epoch": 1.814388489208633, "grad_norm": 0.2942034665756954, "learning_rate": 4.879672191116524e-08, "loss": 0.92160564661026, "step": 631 }, { "epoch": 1.8172661870503597, "grad_norm": 0.3093230418919872, "learning_rate": 4.7340927117277105e-08, "loss": 0.9626412987709045, "step": 632 }, { "epoch": 1.8201438848920863, "grad_norm": 0.2737771051650981, "learning_rate": 4.590665188980769e-08, "loss": 0.9344724416732788, "step": 633 }, { "epoch": 1.823021582733813, "grad_norm": 0.2868842184615828, "learning_rate": 4.44939286275261e-08, "loss": 0.969592809677124, "step": 634 }, { "epoch": 1.8258992805755394, "grad_norm": 0.30108362908099795, "learning_rate": 4.310278924236454e-08, "loss": 0.9268302917480469, "step": 635 }, { "epoch": 1.8287769784172663, "grad_norm": 0.3038411300603624, "learning_rate": 4.173326515869879e-08, "loss": 0.9940468072891235, "step": 636 }, { "epoch": 1.8316546762589927, "grad_norm": 0.30517693172745697, "learning_rate": 4.038538731263719e-08, "loss": 0.9976387023925781, "step": 637 }, { "epoch": 1.8345323741007196, "grad_norm": 0.3273494194690698, "learning_rate": 3.9059186151322534e-08, "loss": 1.05000638961792, "step": 638 }, { "epoch": 1.837410071942446, "grad_norm": 0.29782336017676786, "learning_rate": 3.775469163224432e-08, "loss": 0.9021062850952148, "step": 639 }, { "epoch": 1.8402877697841726, "grad_norm": 0.2796086934847699, "learning_rate": 3.647193322256137e-08, "loss": 0.8977291584014893, "step": 640 }, { "epoch": 1.8431654676258993, "grad_norm": 0.32101134920342667, "learning_rate": 3.5210939898437154e-08, "loss": 0.9850113391876221, "step": 641 }, { "epoch": 1.846043165467626, "grad_norm": 0.3578812552257961, "learning_rate": 3.397174014438431e-08, "loss": 0.9935275316238403, "step": 642 }, { "epoch": 1.8489208633093526, "grad_norm": 0.3018396148934059, "learning_rate": 3.275436195262193e-08, "loss": 0.9289500713348389, "step": 643 }, { "epoch": 1.851798561151079, "grad_norm": 0.2833335903840955, "learning_rate": 3.155883282244287e-08, "loss": 0.8204896450042725, "step": 644 }, { "epoch": 1.8546762589928059, "grad_norm": 0.3430760638933196, "learning_rate": 3.038517975959276e-08, "loss": 1.0271613597869873, "step": 645 }, { "epoch": 1.8575539568345323, "grad_norm": 0.3085433261480597, "learning_rate": 2.923342927565964e-08, "loss": 0.9084464311599731, "step": 646 }, { "epoch": 1.8604316546762591, "grad_norm": 0.30704980866497883, "learning_rate": 2.8103607387475746e-08, "loss": 0.9712929725646973, "step": 647 }, { "epoch": 1.8633093525179856, "grad_norm": 0.28312145339260103, "learning_rate": 2.69957396165289e-08, "loss": 0.9905073642730713, "step": 648 }, { "epoch": 1.8661870503597122, "grad_norm": 0.2770551368568327, "learning_rate": 2.5909850988386937e-08, "loss": 0.9023991227149963, "step": 649 }, { "epoch": 1.8690647482014389, "grad_norm": 0.31953951865146846, "learning_rate": 2.4845966032131628e-08, "loss": 1.0285024642944336, "step": 650 }, { "epoch": 1.8719424460431655, "grad_norm": 0.3452314742008196, "learning_rate": 2.380410877980532e-08, "loss": 1.004025936126709, "step": 651 }, { "epoch": 1.8748201438848922, "grad_norm": 0.33870243092263896, "learning_rate": 2.278430276586729e-08, "loss": 0.9524623155593872, "step": 652 }, { "epoch": 1.8776978417266186, "grad_norm": 0.32350246830260215, "learning_rate": 2.1786571026662702e-08, "loss": 1.0455811023712158, "step": 653 }, { "epoch": 1.8805755395683454, "grad_norm": 0.3328778935315255, "learning_rate": 2.0810936099902364e-08, "loss": 0.9919840693473816, "step": 654 }, { "epoch": 1.8834532374100719, "grad_norm": 0.29952519081119605, "learning_rate": 1.9857420024152806e-08, "loss": 0.9986833930015564, "step": 655 }, { "epoch": 1.8863309352517985, "grad_norm": 0.33793774006407235, "learning_rate": 1.892604433833933e-08, "loss": 0.931689441204071, "step": 656 }, { "epoch": 1.8892086330935252, "grad_norm": 0.30901405969162027, "learning_rate": 1.8016830081259094e-08, "loss": 0.9114639759063721, "step": 657 }, { "epoch": 1.8920863309352518, "grad_norm": 0.32095691544451443, "learning_rate": 1.712979779110568e-08, "loss": 1.0065832138061523, "step": 658 }, { "epoch": 1.8949640287769784, "grad_norm": 0.30923568734316464, "learning_rate": 1.6264967505005612e-08, "loss": 0.9409237504005432, "step": 659 }, { "epoch": 1.8978417266187049, "grad_norm": 0.31465289359066045, "learning_rate": 1.5422358758565344e-08, "loss": 0.9561393857002258, "step": 660 }, { "epoch": 1.9007194244604317, "grad_norm": 0.2782146122603473, "learning_rate": 1.4601990585430212e-08, "loss": 0.8838869333267212, "step": 661 }, { "epoch": 1.9035971223021582, "grad_norm": 0.3476179534958916, "learning_rate": 1.380388151685441e-08, "loss": 1.1335151195526123, "step": 662 }, { "epoch": 1.906474820143885, "grad_norm": 0.30248705756655914, "learning_rate": 1.302804958128223e-08, "loss": 0.9658051133155823, "step": 663 }, { "epoch": 1.9093525179856115, "grad_norm": 0.30985357673524433, "learning_rate": 1.2274512303941164e-08, "loss": 0.8988052606582642, "step": 664 }, { "epoch": 1.912230215827338, "grad_norm": 0.3105920840120309, "learning_rate": 1.1543286706445553e-08, "loss": 0.9807320237159729, "step": 665 }, { "epoch": 1.9151079136690647, "grad_norm": 0.30776301282911345, "learning_rate": 1.0834389306412673e-08, "loss": 0.9672824144363403, "step": 666 }, { "epoch": 1.9179856115107914, "grad_norm": 0.3355211414551835, "learning_rate": 1.0147836117088915e-08, "loss": 0.9862767457962036, "step": 667 }, { "epoch": 1.920863309352518, "grad_norm": 0.29700692569905607, "learning_rate": 9.483642646988977e-09, "loss": 0.9207549095153809, "step": 668 }, { "epoch": 1.9237410071942445, "grad_norm": 0.32529111277227224, "learning_rate": 8.841823899544577e-09, "loss": 1.05051851272583, "step": 669 }, { "epoch": 1.9266187050359713, "grad_norm": 0.3097839939998225, "learning_rate": 8.222394372766173e-09, "loss": 0.9589816927909851, "step": 670 }, { "epoch": 1.9294964028776977, "grad_norm": 0.30754678215336145, "learning_rate": 7.625368058915226e-09, "loss": 0.9071527719497681, "step": 671 }, { "epoch": 1.9323741007194246, "grad_norm": 0.28515154254996145, "learning_rate": 7.05075844418812e-09, "loss": 0.98401939868927, "step": 672 }, { "epoch": 1.935251798561151, "grad_norm": 0.32501900842828985, "learning_rate": 6.498578508411734e-09, "loss": 0.9431239366531372, "step": 673 }, { "epoch": 1.9381294964028777, "grad_norm": 0.2997493450074929, "learning_rate": 5.9688407247500124e-09, "loss": 0.8510617017745972, "step": 674 }, { "epoch": 1.9410071942446043, "grad_norm": 0.2931086066866291, "learning_rate": 5.461557059422306e-09, "loss": 0.9289333820343018, "step": 675 }, { "epoch": 1.943884892086331, "grad_norm": 0.3450018212922675, "learning_rate": 4.9767389714330256e-09, "loss": 0.988121509552002, "step": 676 }, { "epoch": 1.9467625899280576, "grad_norm": 0.3140422887654555, "learning_rate": 4.514397412312965e-09, "loss": 1.0198135375976562, "step": 677 }, { "epoch": 1.949640287769784, "grad_norm": 0.2885512942736773, "learning_rate": 4.074542825871275e-09, "loss": 0.8961633443832397, "step": 678 }, { "epoch": 1.952517985611511, "grad_norm": 0.35409915766999916, "learning_rate": 3.657185147960762e-09, "loss": 1.0393238067626953, "step": 679 }, { "epoch": 1.9553956834532373, "grad_norm": 0.30601728285109125, "learning_rate": 3.2623338062522933e-09, "loss": 0.9444550275802612, "step": 680 }, { "epoch": 1.958273381294964, "grad_norm": 0.2924289258676955, "learning_rate": 2.889997720022297e-09, "loss": 0.9215587377548218, "step": 681 }, { "epoch": 1.9611510791366906, "grad_norm": 0.30327925646552367, "learning_rate": 2.5401852999512586e-09, "loss": 1.000258445739746, "step": 682 }, { "epoch": 1.9640287769784173, "grad_norm": 0.29260027512430775, "learning_rate": 2.212904447933983e-09, "loss": 0.9252768754959106, "step": 683 }, { "epoch": 1.966906474820144, "grad_norm": 0.3011559234991841, "learning_rate": 1.908162556900628e-09, "loss": 0.9935536980628967, "step": 684 }, { "epoch": 1.9697841726618706, "grad_norm": 0.27115587245365835, "learning_rate": 1.6259665106498344e-09, "loss": 0.9564770460128784, "step": 685 }, { "epoch": 1.9726618705035972, "grad_norm": 0.27403209801565, "learning_rate": 1.3663226836936326e-09, "loss": 0.8805955052375793, "step": 686 }, { "epoch": 1.9755395683453236, "grad_norm": 0.3044315007410778, "learning_rate": 1.1292369411127766e-09, "loss": 0.8896344304084778, "step": 687 }, { "epoch": 1.9784172661870505, "grad_norm": 0.30681720361851966, "learning_rate": 9.147146384250737e-10, "loss": 0.980034351348877, "step": 688 }, { "epoch": 1.981294964028777, "grad_norm": 0.32187194306044253, "learning_rate": 7.227606214635917e-10, "loss": 0.9895438551902771, "step": 689 }, { "epoch": 1.9841726618705036, "grad_norm": 0.3136795801610112, "learning_rate": 5.533792262675252e-10, "loss": 0.9838018417358398, "step": 690 }, { "epoch": 1.9870503597122302, "grad_norm": 0.29300997638864673, "learning_rate": 4.0657427898460603e-10, "loss": 0.8921380043029785, "step": 691 }, { "epoch": 1.9899280575539569, "grad_norm": 0.2711131667782687, "learning_rate": 2.8234909578417344e-10, "loss": 0.8311777114868164, "step": 692 }, { "epoch": 1.9928057553956835, "grad_norm": 0.3390732741445483, "learning_rate": 1.8070648278234457e-10, "loss": 1.0223444700241089, "step": 693 }, { "epoch": 1.99568345323741, "grad_norm": 0.27722007418335426, "learning_rate": 1.0164873597895419e-10, "loss": 0.908263087272644, "step": 694 }, { "epoch": 1.9985611510791368, "grad_norm": 0.32715406813626025, "learning_rate": 4.5177641205262906e-11, "loss": 0.950904369354248, "step": 695 }, { "epoch": 2.0, "grad_norm": 0.45329927875611203, "learning_rate": 1.1294474083878292e-11, "loss": 1.1292500495910645, "step": 696 }, { "epoch": 2.0, "step": 696, "total_flos": 1203702673702912.0, "train_loss": 1.0133290295114463, "train_runtime": 7255.9911, "train_samples_per_second": 0.766, "train_steps_per_second": 0.096 } ], "logging_steps": 1, "max_steps": 696, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1203702673702912.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }