diversity_base_adapter / trainer_state.json
wonwonn's picture
Upload LoRA adapter (Qwen2.5-VL-7B-diversifier)
8e9ebe1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 696,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0028776978417266188,
"grad_norm": 0.42690583075609034,
"learning_rate": 0.0,
"loss": 1.3486042022705078,
"step": 1
},
{
"epoch": 0.0057553956834532375,
"grad_norm": 0.3882655966887319,
"learning_rate": 5.714285714285714e-08,
"loss": 1.2684719562530518,
"step": 2
},
{
"epoch": 0.008633093525179856,
"grad_norm": 0.4443954365616111,
"learning_rate": 1.1428571428571427e-07,
"loss": 1.177267074584961,
"step": 3
},
{
"epoch": 0.011510791366906475,
"grad_norm": 0.4253698770879055,
"learning_rate": 1.7142857142857143e-07,
"loss": 1.181878924369812,
"step": 4
},
{
"epoch": 0.014388489208633094,
"grad_norm": 0.3720421036789158,
"learning_rate": 2.2857142857142855e-07,
"loss": 1.1671853065490723,
"step": 5
},
{
"epoch": 0.017266187050359712,
"grad_norm": 0.3622614120618611,
"learning_rate": 2.857142857142857e-07,
"loss": 1.1122022867202759,
"step": 6
},
{
"epoch": 0.02014388489208633,
"grad_norm": 0.38384210342955205,
"learning_rate": 3.4285714285714286e-07,
"loss": 1.1873408555984497,
"step": 7
},
{
"epoch": 0.02302158273381295,
"grad_norm": 0.4284045629754763,
"learning_rate": 4e-07,
"loss": 1.1845028400421143,
"step": 8
},
{
"epoch": 0.025899280575539568,
"grad_norm": 0.41551879217805887,
"learning_rate": 4.571428571428571e-07,
"loss": 1.2185403108596802,
"step": 9
},
{
"epoch": 0.02877697841726619,
"grad_norm": 0.42087035069001,
"learning_rate": 5.142857142857142e-07,
"loss": 1.0747895240783691,
"step": 10
},
{
"epoch": 0.031654676258992806,
"grad_norm": 0.5150810858798297,
"learning_rate": 5.714285714285714e-07,
"loss": 1.2050367593765259,
"step": 11
},
{
"epoch": 0.034532374100719423,
"grad_norm": 0.3402347213407099,
"learning_rate": 6.285714285714286e-07,
"loss": 1.1960644721984863,
"step": 12
},
{
"epoch": 0.03741007194244604,
"grad_norm": 0.3833689066105734,
"learning_rate": 6.857142857142857e-07,
"loss": 1.2497148513793945,
"step": 13
},
{
"epoch": 0.04028776978417266,
"grad_norm": 0.3335104915047139,
"learning_rate": 7.428571428571429e-07,
"loss": 1.1446340084075928,
"step": 14
},
{
"epoch": 0.04316546762589928,
"grad_norm": 0.3492060423539416,
"learning_rate": 8e-07,
"loss": 1.1868774890899658,
"step": 15
},
{
"epoch": 0.0460431654676259,
"grad_norm": 0.36339916703647873,
"learning_rate": 8.57142857142857e-07,
"loss": 1.1652871370315552,
"step": 16
},
{
"epoch": 0.04892086330935252,
"grad_norm": 0.35128380927769104,
"learning_rate": 9.142857142857142e-07,
"loss": 1.1377315521240234,
"step": 17
},
{
"epoch": 0.051798561151079135,
"grad_norm": 0.3216270031913542,
"learning_rate": 9.714285714285715e-07,
"loss": 1.179404377937317,
"step": 18
},
{
"epoch": 0.05467625899280575,
"grad_norm": 0.3626006607419513,
"learning_rate": 1.0285714285714284e-06,
"loss": 1.272096872329712,
"step": 19
},
{
"epoch": 0.05755395683453238,
"grad_norm": 0.37548463438614677,
"learning_rate": 1.0857142857142856e-06,
"loss": 1.1252775192260742,
"step": 20
},
{
"epoch": 0.060431654676258995,
"grad_norm": 0.39203682362934145,
"learning_rate": 1.1428571428571428e-06,
"loss": 1.2636396884918213,
"step": 21
},
{
"epoch": 0.06330935251798561,
"grad_norm": 0.3929267980473854,
"learning_rate": 1.2e-06,
"loss": 1.1296113729476929,
"step": 22
},
{
"epoch": 0.06618705035971223,
"grad_norm": 0.3580571203740857,
"learning_rate": 1.2571428571428571e-06,
"loss": 1.2140036821365356,
"step": 23
},
{
"epoch": 0.06906474820143885,
"grad_norm": 0.40128457938538337,
"learning_rate": 1.3142857142857143e-06,
"loss": 1.3345097303390503,
"step": 24
},
{
"epoch": 0.07194244604316546,
"grad_norm": 0.3624963705827193,
"learning_rate": 1.3714285714285715e-06,
"loss": 1.2285950183868408,
"step": 25
},
{
"epoch": 0.07482014388489208,
"grad_norm": 0.3891545493397791,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.1885042190551758,
"step": 26
},
{
"epoch": 0.0776978417266187,
"grad_norm": 0.45890158291879024,
"learning_rate": 1.4857142857142858e-06,
"loss": 1.1534702777862549,
"step": 27
},
{
"epoch": 0.08057553956834532,
"grad_norm": 0.4134301257295623,
"learning_rate": 1.5428571428571428e-06,
"loss": 1.1666285991668701,
"step": 28
},
{
"epoch": 0.08345323741007195,
"grad_norm": 0.42695898719570075,
"learning_rate": 1.6e-06,
"loss": 1.0518786907196045,
"step": 29
},
{
"epoch": 0.08633093525179857,
"grad_norm": 0.40529199232299007,
"learning_rate": 1.657142857142857e-06,
"loss": 1.2913450002670288,
"step": 30
},
{
"epoch": 0.08920863309352518,
"grad_norm": 0.42614405561233504,
"learning_rate": 1.714285714285714e-06,
"loss": 1.1956894397735596,
"step": 31
},
{
"epoch": 0.0920863309352518,
"grad_norm": 0.4536901431733599,
"learning_rate": 1.7714285714285712e-06,
"loss": 1.2299771308898926,
"step": 32
},
{
"epoch": 0.09496402877697842,
"grad_norm": 0.4275911040935052,
"learning_rate": 1.8285714285714284e-06,
"loss": 1.230122685432434,
"step": 33
},
{
"epoch": 0.09784172661870504,
"grad_norm": 0.40907326990128035,
"learning_rate": 1.8857142857142856e-06,
"loss": 1.2399665117263794,
"step": 34
},
{
"epoch": 0.10071942446043165,
"grad_norm": 0.42873759553168767,
"learning_rate": 1.942857142857143e-06,
"loss": 1.209876298904419,
"step": 35
},
{
"epoch": 0.10359712230215827,
"grad_norm": 0.3875078895858393,
"learning_rate": 2e-06,
"loss": 1.3261746168136597,
"step": 36
},
{
"epoch": 0.10647482014388489,
"grad_norm": 0.3895695917104401,
"learning_rate": 1.999988705525916e-06,
"loss": 1.1430740356445312,
"step": 37
},
{
"epoch": 0.1093525179856115,
"grad_norm": 0.390575759377815,
"learning_rate": 1.9999548223587944e-06,
"loss": 1.0920931100845337,
"step": 38
},
{
"epoch": 0.11223021582733812,
"grad_norm": 0.4547783385877052,
"learning_rate": 1.9998983512640208e-06,
"loss": 1.1944105625152588,
"step": 39
},
{
"epoch": 0.11510791366906475,
"grad_norm": 0.46472429487773786,
"learning_rate": 1.9998192935172177e-06,
"loss": 1.2673561573028564,
"step": 40
},
{
"epoch": 0.11798561151079137,
"grad_norm": 0.41362852460476074,
"learning_rate": 1.9997176509042157e-06,
"loss": 1.2279549837112427,
"step": 41
},
{
"epoch": 0.12086330935251799,
"grad_norm": 0.4271272595194793,
"learning_rate": 1.9995934257210153e-06,
"loss": 1.1604218482971191,
"step": 42
},
{
"epoch": 0.1237410071942446,
"grad_norm": 0.3804983247156394,
"learning_rate": 1.9994466207737324e-06,
"loss": 1.1747047901153564,
"step": 43
},
{
"epoch": 0.12661870503597122,
"grad_norm": 0.3650820732490411,
"learning_rate": 1.9992772393785363e-06,
"loss": 1.0480847358703613,
"step": 44
},
{
"epoch": 0.12949640287769784,
"grad_norm": 0.41209897515894023,
"learning_rate": 1.9990852853615746e-06,
"loss": 1.2965943813323975,
"step": 45
},
{
"epoch": 0.13237410071942446,
"grad_norm": 0.4043694740064971,
"learning_rate": 1.9988707630588874e-06,
"loss": 1.1381937265396118,
"step": 46
},
{
"epoch": 0.13525179856115108,
"grad_norm": 0.4147421237580474,
"learning_rate": 1.9986336773163066e-06,
"loss": 1.1976345777511597,
"step": 47
},
{
"epoch": 0.1381294964028777,
"grad_norm": 0.46096126219291444,
"learning_rate": 1.99837403348935e-06,
"loss": 1.1909739971160889,
"step": 48
},
{
"epoch": 0.1410071942446043,
"grad_norm": 0.48554729123892804,
"learning_rate": 1.9980918374430994e-06,
"loss": 1.1516118049621582,
"step": 49
},
{
"epoch": 0.14388489208633093,
"grad_norm": 0.46308911997767715,
"learning_rate": 1.997787095552066e-06,
"loss": 1.2086803913116455,
"step": 50
},
{
"epoch": 0.14676258992805755,
"grad_norm": 0.4490535916599434,
"learning_rate": 1.9974598147000487e-06,
"loss": 1.2800133228302002,
"step": 51
},
{
"epoch": 0.14964028776978416,
"grad_norm": 0.40996217705477556,
"learning_rate": 1.997110002279978e-06,
"loss": 1.2382150888442993,
"step": 52
},
{
"epoch": 0.15251798561151078,
"grad_norm": 0.40988669582643505,
"learning_rate": 1.9967376661937477e-06,
"loss": 1.1741186380386353,
"step": 53
},
{
"epoch": 0.1553956834532374,
"grad_norm": 0.5464526851837473,
"learning_rate": 1.9963428148520393e-06,
"loss": 1.1607799530029297,
"step": 54
},
{
"epoch": 0.15827338129496402,
"grad_norm": 0.42016622274268145,
"learning_rate": 1.9959254571741285e-06,
"loss": 1.2755463123321533,
"step": 55
},
{
"epoch": 0.16115107913669063,
"grad_norm": 0.4490415553167208,
"learning_rate": 1.995485602587687e-06,
"loss": 1.261953592300415,
"step": 56
},
{
"epoch": 0.16402877697841728,
"grad_norm": 0.5169029226242617,
"learning_rate": 1.995023261028567e-06,
"loss": 1.1530394554138184,
"step": 57
},
{
"epoch": 0.1669064748201439,
"grad_norm": 0.43016608573228415,
"learning_rate": 1.9945384429405776e-06,
"loss": 1.268787145614624,
"step": 58
},
{
"epoch": 0.1697841726618705,
"grad_norm": 0.4793080238946335,
"learning_rate": 1.99403115927525e-06,
"loss": 1.2686214447021484,
"step": 59
},
{
"epoch": 0.17266187050359713,
"grad_norm": 0.4249978192550724,
"learning_rate": 1.9935014214915883e-06,
"loss": 1.201757550239563,
"step": 60
},
{
"epoch": 0.17553956834532375,
"grad_norm": 0.4481671623233787,
"learning_rate": 1.992949241555812e-06,
"loss": 1.1886329650878906,
"step": 61
},
{
"epoch": 0.17841726618705037,
"grad_norm": 0.5402187081810303,
"learning_rate": 1.9923746319410847e-06,
"loss": 1.2228707075119019,
"step": 62
},
{
"epoch": 0.18129496402877698,
"grad_norm": 0.4000631018631766,
"learning_rate": 1.991777605627234e-06,
"loss": 1.0736989974975586,
"step": 63
},
{
"epoch": 0.1841726618705036,
"grad_norm": 0.4881119026873745,
"learning_rate": 1.9911581761004556e-06,
"loss": 1.213085651397705,
"step": 64
},
{
"epoch": 0.18705035971223022,
"grad_norm": 0.5274580867703768,
"learning_rate": 1.990516357353011e-06,
"loss": 1.2776455879211426,
"step": 65
},
{
"epoch": 0.18992805755395684,
"grad_norm": 0.5316412618386857,
"learning_rate": 1.989852163882911e-06,
"loss": 1.1717431545257568,
"step": 66
},
{
"epoch": 0.19280575539568345,
"grad_norm": 0.5326686824141037,
"learning_rate": 1.9891656106935873e-06,
"loss": 1.1462079286575317,
"step": 67
},
{
"epoch": 0.19568345323741007,
"grad_norm": 0.4445628555318413,
"learning_rate": 1.988456713293554e-06,
"loss": 1.174164056777954,
"step": 68
},
{
"epoch": 0.1985611510791367,
"grad_norm": 0.5068823690157335,
"learning_rate": 1.987725487696059e-06,
"loss": 1.3018139600753784,
"step": 69
},
{
"epoch": 0.2014388489208633,
"grad_norm": 0.5106162613433823,
"learning_rate": 1.9869719504187175e-06,
"loss": 1.273469090461731,
"step": 70
},
{
"epoch": 0.20431654676258992,
"grad_norm": 0.46468998684527285,
"learning_rate": 1.9861961184831453e-06,
"loss": 1.2473914623260498,
"step": 71
},
{
"epoch": 0.20719424460431654,
"grad_norm": 0.5345828232737263,
"learning_rate": 1.9853980094145696e-06,
"loss": 1.193030834197998,
"step": 72
},
{
"epoch": 0.21007194244604316,
"grad_norm": 0.5271717020423939,
"learning_rate": 1.9845776412414346e-06,
"loss": 1.1826913356781006,
"step": 73
},
{
"epoch": 0.21294964028776978,
"grad_norm": 0.4004103214424577,
"learning_rate": 1.9837350324949944e-06,
"loss": 1.055051565170288,
"step": 74
},
{
"epoch": 0.2158273381294964,
"grad_norm": 0.5075363846617762,
"learning_rate": 1.9828702022088942e-06,
"loss": 1.1969430446624756,
"step": 75
},
{
"epoch": 0.218705035971223,
"grad_norm": 0.5116674728159791,
"learning_rate": 1.9819831699187407e-06,
"loss": 1.2737852334976196,
"step": 76
},
{
"epoch": 0.22158273381294963,
"grad_norm": 0.5134518143732013,
"learning_rate": 1.9810739556616607e-06,
"loss": 1.1505439281463623,
"step": 77
},
{
"epoch": 0.22446043165467625,
"grad_norm": 0.4961762001577513,
"learning_rate": 1.980142579975847e-06,
"loss": 1.1265602111816406,
"step": 78
},
{
"epoch": 0.2273381294964029,
"grad_norm": 0.5222448272100187,
"learning_rate": 1.9791890639000973e-06,
"loss": 1.1243963241577148,
"step": 79
},
{
"epoch": 0.2302158273381295,
"grad_norm": 0.500186205073849,
"learning_rate": 1.9782134289733374e-06,
"loss": 1.2614185810089111,
"step": 80
},
{
"epoch": 0.23309352517985613,
"grad_norm": 0.49912691652286095,
"learning_rate": 1.9772156972341326e-06,
"loss": 1.1954736709594727,
"step": 81
},
{
"epoch": 0.23597122302158274,
"grad_norm": 0.4383210281801482,
"learning_rate": 1.9761958912201945e-06,
"loss": 1.125051736831665,
"step": 82
},
{
"epoch": 0.23884892086330936,
"grad_norm": 0.4960615716168437,
"learning_rate": 1.9751540339678683e-06,
"loss": 1.0172779560089111,
"step": 83
},
{
"epoch": 0.24172661870503598,
"grad_norm": 0.6116032459432448,
"learning_rate": 1.9740901490116133e-06,
"loss": 1.1732102632522583,
"step": 84
},
{
"epoch": 0.2446043165467626,
"grad_norm": 0.5176440293767078,
"learning_rate": 1.973004260383471e-06,
"loss": 1.225417137145996,
"step": 85
},
{
"epoch": 0.2474820143884892,
"grad_norm": 0.5678048576376955,
"learning_rate": 1.9718963926125244e-06,
"loss": 1.0927081108093262,
"step": 86
},
{
"epoch": 0.2503597122302158,
"grad_norm": 0.6364015437310658,
"learning_rate": 1.9707665707243406e-06,
"loss": 1.2957037687301636,
"step": 87
},
{
"epoch": 0.25323741007194245,
"grad_norm": 0.5809276103827633,
"learning_rate": 1.969614820240407e-06,
"loss": 1.187430500984192,
"step": 88
},
{
"epoch": 0.25611510791366904,
"grad_norm": 0.47690557886764195,
"learning_rate": 1.9684411671775568e-06,
"loss": 1.1036494970321655,
"step": 89
},
{
"epoch": 0.2589928057553957,
"grad_norm": 0.6365313959891131,
"learning_rate": 1.967245638047378e-06,
"loss": 1.1274656057357788,
"step": 90
},
{
"epoch": 0.26187050359712233,
"grad_norm": 0.542056873386327,
"learning_rate": 1.9660282598556155e-06,
"loss": 1.1317627429962158,
"step": 91
},
{
"epoch": 0.2647482014388489,
"grad_norm": 0.5772496326462602,
"learning_rate": 1.964789060101563e-06,
"loss": 1.1629116535186768,
"step": 92
},
{
"epoch": 0.26762589928057556,
"grad_norm": 0.5006687207127247,
"learning_rate": 1.9635280667774385e-06,
"loss": 1.0691213607788086,
"step": 93
},
{
"epoch": 0.27050359712230215,
"grad_norm": 0.4948922663102226,
"learning_rate": 1.9622453083677555e-06,
"loss": 1.0724678039550781,
"step": 94
},
{
"epoch": 0.2733812949640288,
"grad_norm": 0.6310621741039645,
"learning_rate": 1.9609408138486773e-06,
"loss": 1.1892088651657104,
"step": 95
},
{
"epoch": 0.2762589928057554,
"grad_norm": 0.5720157291662107,
"learning_rate": 1.959614612687363e-06,
"loss": 1.1208692789077759,
"step": 96
},
{
"epoch": 0.27913669064748203,
"grad_norm": 0.4968629674990631,
"learning_rate": 1.9582667348413013e-06,
"loss": 1.1870933771133423,
"step": 97
},
{
"epoch": 0.2820143884892086,
"grad_norm": 0.562208720820431,
"learning_rate": 1.9568972107576355e-06,
"loss": 1.1234577894210815,
"step": 98
},
{
"epoch": 0.28489208633093527,
"grad_norm": 0.5269154276860062,
"learning_rate": 1.9555060713724737e-06,
"loss": 1.0910080671310425,
"step": 99
},
{
"epoch": 0.28776978417266186,
"grad_norm": 0.5740033705130164,
"learning_rate": 1.9540933481101923e-06,
"loss": 1.1712496280670166,
"step": 100
},
{
"epoch": 0.2906474820143885,
"grad_norm": 0.5087584410520719,
"learning_rate": 1.952659072882723e-06,
"loss": 1.1709492206573486,
"step": 101
},
{
"epoch": 0.2935251798561151,
"grad_norm": 0.507763542956114,
"learning_rate": 1.9512032780888346e-06,
"loss": 1.2015647888183594,
"step": 102
},
{
"epoch": 0.29640287769784174,
"grad_norm": 0.46170542788719804,
"learning_rate": 1.9497259966134005e-06,
"loss": 1.100395679473877,
"step": 103
},
{
"epoch": 0.2992805755395683,
"grad_norm": 0.5868783299117551,
"learning_rate": 1.9482272618266554e-06,
"loss": 1.1746639013290405,
"step": 104
},
{
"epoch": 0.302158273381295,
"grad_norm": 0.5697876288298696,
"learning_rate": 1.946707107583442e-06,
"loss": 1.106672763824463,
"step": 105
},
{
"epoch": 0.30503597122302156,
"grad_norm": 0.569742102482636,
"learning_rate": 1.945165568222445e-06,
"loss": 1.213707685470581,
"step": 106
},
{
"epoch": 0.3079136690647482,
"grad_norm": 0.538364984750885,
"learning_rate": 1.9436026785654175e-06,
"loss": 1.0930910110473633,
"step": 107
},
{
"epoch": 0.3107913669064748,
"grad_norm": 0.5312913632919434,
"learning_rate": 1.942018473916393e-06,
"loss": 1.142619252204895,
"step": 108
},
{
"epoch": 0.31366906474820144,
"grad_norm": 0.5975458242690872,
"learning_rate": 1.940412990060888e-06,
"loss": 1.2266335487365723,
"step": 109
},
{
"epoch": 0.31654676258992803,
"grad_norm": 0.46671843811650277,
"learning_rate": 1.9387862632650944e-06,
"loss": 1.1608915328979492,
"step": 110
},
{
"epoch": 0.3194244604316547,
"grad_norm": 0.5834160016362078,
"learning_rate": 1.937138330275059e-06,
"loss": 1.183951735496521,
"step": 111
},
{
"epoch": 0.32230215827338127,
"grad_norm": 0.5226121969782161,
"learning_rate": 1.9354692283158553e-06,
"loss": 1.1224737167358398,
"step": 112
},
{
"epoch": 0.3251798561151079,
"grad_norm": 0.5495555876202739,
"learning_rate": 1.9337789950907407e-06,
"loss": 1.0857056379318237,
"step": 113
},
{
"epoch": 0.32805755395683456,
"grad_norm": 0.4562188941273161,
"learning_rate": 1.9320676687803055e-06,
"loss": 1.0629336833953857,
"step": 114
},
{
"epoch": 0.33093525179856115,
"grad_norm": 0.4181080129706698,
"learning_rate": 1.930335288041612e-06,
"loss": 1.1054446697235107,
"step": 115
},
{
"epoch": 0.3338129496402878,
"grad_norm": 0.4892645824645763,
"learning_rate": 1.928581892007318e-06,
"loss": 1.0204641819000244,
"step": 116
},
{
"epoch": 0.3366906474820144,
"grad_norm": 0.42126704545419896,
"learning_rate": 1.926807520284796e-06,
"loss": 1.0821490287780762,
"step": 117
},
{
"epoch": 0.339568345323741,
"grad_norm": 0.5344372210987457,
"learning_rate": 1.9250122129552364e-06,
"loss": 1.1084657907485962,
"step": 118
},
{
"epoch": 0.3424460431654676,
"grad_norm": 0.502255473105476,
"learning_rate": 1.923196010572744e-06,
"loss": 1.1403509378433228,
"step": 119
},
{
"epoch": 0.34532374100719426,
"grad_norm": 0.4478418679539473,
"learning_rate": 1.92135895416342e-06,
"loss": 1.1791510581970215,
"step": 120
},
{
"epoch": 0.34820143884892085,
"grad_norm": 0.5040410026582576,
"learning_rate": 1.9195010852244366e-06,
"loss": 1.1240849494934082,
"step": 121
},
{
"epoch": 0.3510791366906475,
"grad_norm": 0.3694753679987858,
"learning_rate": 1.917622445723099e-06,
"loss": 0.9320825338363647,
"step": 122
},
{
"epoch": 0.3539568345323741,
"grad_norm": 0.5088967091444829,
"learning_rate": 1.9157230780958975e-06,
"loss": 1.1261234283447266,
"step": 123
},
{
"epoch": 0.35683453237410073,
"grad_norm": 0.48547365885933336,
"learning_rate": 1.9138030252475484e-06,
"loss": 1.0831753015518188,
"step": 124
},
{
"epoch": 0.3597122302158273,
"grad_norm": 0.5680540539558109,
"learning_rate": 1.911862330550027e-06,
"loss": 1.1547625064849854,
"step": 125
},
{
"epoch": 0.36258992805755397,
"grad_norm": 0.482809160276131,
"learning_rate": 1.9099010378415844e-06,
"loss": 0.9853061437606812,
"step": 126
},
{
"epoch": 0.36546762589928056,
"grad_norm": 0.5999988506556185,
"learning_rate": 1.90791919142576e-06,
"loss": 1.0587449073791504,
"step": 127
},
{
"epoch": 0.3683453237410072,
"grad_norm": 0.4786126684230341,
"learning_rate": 1.9059168360703803e-06,
"loss": 1.0581047534942627,
"step": 128
},
{
"epoch": 0.3712230215827338,
"grad_norm": 0.4721916332432008,
"learning_rate": 1.9038940170065466e-06,
"loss": 1.2065491676330566,
"step": 129
},
{
"epoch": 0.37410071942446044,
"grad_norm": 0.43066899683927695,
"learning_rate": 1.9018507799276131e-06,
"loss": 0.9673759937286377,
"step": 130
},
{
"epoch": 0.376978417266187,
"grad_norm": 0.4819631279931072,
"learning_rate": 1.8997871709881567e-06,
"loss": 1.055248498916626,
"step": 131
},
{
"epoch": 0.37985611510791367,
"grad_norm": 0.49209482786786624,
"learning_rate": 1.8977032368029332e-06,
"loss": 1.0030591487884521,
"step": 132
},
{
"epoch": 0.38273381294964026,
"grad_norm": 0.5018665575618141,
"learning_rate": 1.8955990244458233e-06,
"loss": 1.1191744804382324,
"step": 133
},
{
"epoch": 0.3856115107913669,
"grad_norm": 0.5576523584422169,
"learning_rate": 1.8934745814487712e-06,
"loss": 1.0733585357666016,
"step": 134
},
{
"epoch": 0.38848920863309355,
"grad_norm": 0.5008878898473639,
"learning_rate": 1.8913299558007095e-06,
"loss": 1.1800191402435303,
"step": 135
},
{
"epoch": 0.39136690647482014,
"grad_norm": 0.43511113369960597,
"learning_rate": 1.8891651959464758e-06,
"loss": 1.027364730834961,
"step": 136
},
{
"epoch": 0.3942446043165468,
"grad_norm": 0.4765093745936347,
"learning_rate": 1.8869803507857185e-06,
"loss": 1.107445478439331,
"step": 137
},
{
"epoch": 0.3971223021582734,
"grad_norm": 0.46129319329450635,
"learning_rate": 1.884775469671791e-06,
"loss": 1.187384009361267,
"step": 138
},
{
"epoch": 0.4,
"grad_norm": 0.48437113838726986,
"learning_rate": 1.8825506024106396e-06,
"loss": 1.0362842082977295,
"step": 139
},
{
"epoch": 0.4028776978417266,
"grad_norm": 0.4645234599714407,
"learning_rate": 1.8803057992596747e-06,
"loss": 1.0802561044692993,
"step": 140
},
{
"epoch": 0.40575539568345326,
"grad_norm": 0.4280947125747255,
"learning_rate": 1.8780411109266385e-06,
"loss": 1.1036925315856934,
"step": 141
},
{
"epoch": 0.40863309352517985,
"grad_norm": 0.407505252457033,
"learning_rate": 1.8757565885684584e-06,
"loss": 1.0300638675689697,
"step": 142
},
{
"epoch": 0.4115107913669065,
"grad_norm": 0.3623983611321653,
"learning_rate": 1.8734522837900915e-06,
"loss": 0.982805609703064,
"step": 143
},
{
"epoch": 0.4143884892086331,
"grad_norm": 0.4850168244727846,
"learning_rate": 1.8711282486433594e-06,
"loss": 1.1880314350128174,
"step": 144
},
{
"epoch": 0.4172661870503597,
"grad_norm": 0.39657849815671453,
"learning_rate": 1.8687845356257705e-06,
"loss": 1.001549482345581,
"step": 145
},
{
"epoch": 0.4201438848920863,
"grad_norm": 0.43575502402332317,
"learning_rate": 1.866421197679338e-06,
"loss": 1.1122441291809082,
"step": 146
},
{
"epoch": 0.42302158273381296,
"grad_norm": 0.42214821227567706,
"learning_rate": 1.8640382881893797e-06,
"loss": 0.9325125217437744,
"step": 147
},
{
"epoch": 0.42589928057553955,
"grad_norm": 0.49793043845219986,
"learning_rate": 1.8616358609833144e-06,
"loss": 1.1867802143096924,
"step": 148
},
{
"epoch": 0.4287769784172662,
"grad_norm": 0.5003465171318868,
"learning_rate": 1.8592139703294456e-06,
"loss": 1.1244676113128662,
"step": 149
},
{
"epoch": 0.4316546762589928,
"grad_norm": 0.4321152662969621,
"learning_rate": 1.8567726709357365e-06,
"loss": 1.0435458421707153,
"step": 150
},
{
"epoch": 0.43453237410071943,
"grad_norm": 0.42883319369137934,
"learning_rate": 1.854312017948572e-06,
"loss": 0.9999338388442993,
"step": 151
},
{
"epoch": 0.437410071942446,
"grad_norm": 0.41457898959091355,
"learning_rate": 1.8518320669515145e-06,
"loss": 1.0550625324249268,
"step": 152
},
{
"epoch": 0.44028776978417267,
"grad_norm": 0.37601633308325455,
"learning_rate": 1.8493328739640494e-06,
"loss": 1.1828843355178833,
"step": 153
},
{
"epoch": 0.44316546762589926,
"grad_norm": 0.48403246563497276,
"learning_rate": 1.8468144954403174e-06,
"loss": 1.0219019651412964,
"step": 154
},
{
"epoch": 0.4460431654676259,
"grad_norm": 0.4080458449115876,
"learning_rate": 1.8442769882678397e-06,
"loss": 1.064319372177124,
"step": 155
},
{
"epoch": 0.4489208633093525,
"grad_norm": 0.4307991968666485,
"learning_rate": 1.8417204097662348e-06,
"loss": 0.9629073143005371,
"step": 156
},
{
"epoch": 0.45179856115107914,
"grad_norm": 0.42498698874553337,
"learning_rate": 1.8391448176859221e-06,
"loss": 0.9967993497848511,
"step": 157
},
{
"epoch": 0.4546762589928058,
"grad_norm": 0.3864381339362373,
"learning_rate": 1.8365502702068176e-06,
"loss": 0.9952638149261475,
"step": 158
},
{
"epoch": 0.45755395683453237,
"grad_norm": 0.40273255363591914,
"learning_rate": 1.8339368259370196e-06,
"loss": 1.1115927696228027,
"step": 159
},
{
"epoch": 0.460431654676259,
"grad_norm": 0.41064614803619237,
"learning_rate": 1.8313045439114854e-06,
"loss": 1.0840253829956055,
"step": 160
},
{
"epoch": 0.4633093525179856,
"grad_norm": 0.5037888020430983,
"learning_rate": 1.8286534835906967e-06,
"loss": 0.9446510076522827,
"step": 161
},
{
"epoch": 0.46618705035971225,
"grad_norm": 0.5305475182215801,
"learning_rate": 1.8259837048593187e-06,
"loss": 1.074650526046753,
"step": 162
},
{
"epoch": 0.46906474820143884,
"grad_norm": 0.4475049031892367,
"learning_rate": 1.8232952680248439e-06,
"loss": 1.1149487495422363,
"step": 163
},
{
"epoch": 0.4719424460431655,
"grad_norm": 0.36490672571189003,
"learning_rate": 1.8205882338162333e-06,
"loss": 1.0125229358673096,
"step": 164
},
{
"epoch": 0.4748201438848921,
"grad_norm": 0.41629528136632005,
"learning_rate": 1.8178626633825417e-06,
"loss": 1.079350471496582,
"step": 165
},
{
"epoch": 0.4776978417266187,
"grad_norm": 0.49379487126662264,
"learning_rate": 1.8151186182915383e-06,
"loss": 1.057182788848877,
"step": 166
},
{
"epoch": 0.4805755395683453,
"grad_norm": 0.4055282585841769,
"learning_rate": 1.8123561605283163e-06,
"loss": 0.9132846593856812,
"step": 167
},
{
"epoch": 0.48345323741007196,
"grad_norm": 0.3366384943701116,
"learning_rate": 1.8095753524938903e-06,
"loss": 1.0845749378204346,
"step": 168
},
{
"epoch": 0.48633093525179855,
"grad_norm": 0.40679871398886064,
"learning_rate": 1.8067762570037885e-06,
"loss": 1.042318344116211,
"step": 169
},
{
"epoch": 0.4892086330935252,
"grad_norm": 0.35055813919371737,
"learning_rate": 1.8039589372866347e-06,
"loss": 1.01352858543396,
"step": 170
},
{
"epoch": 0.4920863309352518,
"grad_norm": 0.441317686613448,
"learning_rate": 1.8011234569827172e-06,
"loss": 1.0617296695709229,
"step": 171
},
{
"epoch": 0.4949640287769784,
"grad_norm": 0.41119908042871567,
"learning_rate": 1.798269880142554e-06,
"loss": 1.0274578332901,
"step": 172
},
{
"epoch": 0.497841726618705,
"grad_norm": 0.4140046822969208,
"learning_rate": 1.7953982712254446e-06,
"loss": 1.1444511413574219,
"step": 173
},
{
"epoch": 0.5007194244604316,
"grad_norm": 0.360331860974586,
"learning_rate": 1.7925086950980134e-06,
"loss": 1.0049320459365845,
"step": 174
},
{
"epoch": 0.5035971223021583,
"grad_norm": 0.46448969705810783,
"learning_rate": 1.7896012170327466e-06,
"loss": 1.0649842023849487,
"step": 175
},
{
"epoch": 0.5064748201438849,
"grad_norm": 0.379616869145886,
"learning_rate": 1.7866759027065149e-06,
"loss": 1.0953956842422485,
"step": 176
},
{
"epoch": 0.5093525179856115,
"grad_norm": 0.46655176135320064,
"learning_rate": 1.783732818199092e-06,
"loss": 1.1652858257293701,
"step": 177
},
{
"epoch": 0.5122302158273381,
"grad_norm": 0.4331501137557297,
"learning_rate": 1.7807720299916613e-06,
"loss": 1.0503497123718262,
"step": 178
},
{
"epoch": 0.5151079136690647,
"grad_norm": 0.4528588241142532,
"learning_rate": 1.7777936049653146e-06,
"loss": 0.9811398983001709,
"step": 179
},
{
"epoch": 0.5179856115107914,
"grad_norm": 0.3875771596875948,
"learning_rate": 1.77479761039954e-06,
"loss": 1.0009725093841553,
"step": 180
},
{
"epoch": 0.520863309352518,
"grad_norm": 0.379690279869989,
"learning_rate": 1.7717841139707038e-06,
"loss": 1.1046425104141235,
"step": 181
},
{
"epoch": 0.5237410071942447,
"grad_norm": 0.40872536920209385,
"learning_rate": 1.76875318375052e-06,
"loss": 0.9237216711044312,
"step": 182
},
{
"epoch": 0.5266187050359712,
"grad_norm": 0.3943687213438678,
"learning_rate": 1.7657048882045149e-06,
"loss": 1.0758323669433594,
"step": 183
},
{
"epoch": 0.5294964028776978,
"grad_norm": 0.4366095757286378,
"learning_rate": 1.7626392961904783e-06,
"loss": 1.103142261505127,
"step": 184
},
{
"epoch": 0.5323741007194245,
"grad_norm": 0.4230178437251199,
"learning_rate": 1.7595564769569094e-06,
"loss": 0.9749042987823486,
"step": 185
},
{
"epoch": 0.5352517985611511,
"grad_norm": 0.3416533764601269,
"learning_rate": 1.7564565001414522e-06,
"loss": 0.8281745910644531,
"step": 186
},
{
"epoch": 0.5381294964028777,
"grad_norm": 0.33708804570536016,
"learning_rate": 1.753339435769322e-06,
"loss": 1.102489709854126,
"step": 187
},
{
"epoch": 0.5410071942446043,
"grad_norm": 0.37891052167871625,
"learning_rate": 1.7502053542517244e-06,
"loss": 1.0745601654052734,
"step": 188
},
{
"epoch": 0.543884892086331,
"grad_norm": 0.3738524538746129,
"learning_rate": 1.7470543263842642e-06,
"loss": 1.111441969871521,
"step": 189
},
{
"epoch": 0.5467625899280576,
"grad_norm": 0.39050521266738286,
"learning_rate": 1.7438864233453473e-06,
"loss": 1.1269681453704834,
"step": 190
},
{
"epoch": 0.5496402877697841,
"grad_norm": 0.31482595417583814,
"learning_rate": 1.7407017166945706e-06,
"loss": 1.0488468408584595,
"step": 191
},
{
"epoch": 0.5525179856115108,
"grad_norm": 0.3136290302777941,
"learning_rate": 1.7375002783711076e-06,
"loss": 0.9358277320861816,
"step": 192
},
{
"epoch": 0.5553956834532374,
"grad_norm": 0.3338322035032311,
"learning_rate": 1.7342821806920829e-06,
"loss": 1.072392225265503,
"step": 193
},
{
"epoch": 0.5582733812949641,
"grad_norm": 0.3471468140531117,
"learning_rate": 1.7310474963509378e-06,
"loss": 1.0486462116241455,
"step": 194
},
{
"epoch": 0.5611510791366906,
"grad_norm": 0.38596584622793473,
"learning_rate": 1.72779629841579e-06,
"loss": 0.9716250896453857,
"step": 195
},
{
"epoch": 0.5640287769784172,
"grad_norm": 0.3798579435668601,
"learning_rate": 1.7245286603277803e-06,
"loss": 1.033220648765564,
"step": 196
},
{
"epoch": 0.5669064748201439,
"grad_norm": 0.3425201594360531,
"learning_rate": 1.721244655899416e-06,
"loss": 0.9934518337249756,
"step": 197
},
{
"epoch": 0.5697841726618705,
"grad_norm": 0.3427994445976512,
"learning_rate": 1.717944359312904e-06,
"loss": 1.134864330291748,
"step": 198
},
{
"epoch": 0.5726618705035971,
"grad_norm": 0.3723446907907705,
"learning_rate": 1.7146278451184717e-06,
"loss": 1.1344006061553955,
"step": 199
},
{
"epoch": 0.5755395683453237,
"grad_norm": 0.33571105673864887,
"learning_rate": 1.7112951882326869e-06,
"loss": 0.9915531873703003,
"step": 200
},
{
"epoch": 0.5784172661870504,
"grad_norm": 0.3692434093950694,
"learning_rate": 1.7079464639367632e-06,
"loss": 1.028855800628662,
"step": 201
},
{
"epoch": 0.581294964028777,
"grad_norm": 0.3094858241361718,
"learning_rate": 1.7045817478748598e-06,
"loss": 0.9810290932655334,
"step": 202
},
{
"epoch": 0.5841726618705037,
"grad_norm": 0.33222888657473965,
"learning_rate": 1.701201116052374e-06,
"loss": 0.8440494537353516,
"step": 203
},
{
"epoch": 0.5870503597122302,
"grad_norm": 0.3220131017798883,
"learning_rate": 1.6978046448342226e-06,
"loss": 1.0670182704925537,
"step": 204
},
{
"epoch": 0.5899280575539568,
"grad_norm": 0.4023809574277352,
"learning_rate": 1.6943924109431179e-06,
"loss": 1.038970708847046,
"step": 205
},
{
"epoch": 0.5928057553956835,
"grad_norm": 0.36736787076416194,
"learning_rate": 1.690964491457834e-06,
"loss": 1.0510860681533813,
"step": 206
},
{
"epoch": 0.5956834532374101,
"grad_norm": 0.3488299733915227,
"learning_rate": 1.687520963811467e-06,
"loss": 0.913723886013031,
"step": 207
},
{
"epoch": 0.5985611510791367,
"grad_norm": 0.48422640633599995,
"learning_rate": 1.684061905789684e-06,
"loss": 0.9846644401550293,
"step": 208
},
{
"epoch": 0.6014388489208633,
"grad_norm": 0.31689088814964833,
"learning_rate": 1.6805873955289678e-06,
"loss": 1.038316249847412,
"step": 209
},
{
"epoch": 0.60431654676259,
"grad_norm": 0.31535153436268476,
"learning_rate": 1.6770975115148503e-06,
"loss": 1.1639020442962646,
"step": 210
},
{
"epoch": 0.6071942446043166,
"grad_norm": 0.32901232219616355,
"learning_rate": 1.6735923325801406e-06,
"loss": 1.0157148838043213,
"step": 211
},
{
"epoch": 0.6100719424460431,
"grad_norm": 0.32346917708292794,
"learning_rate": 1.670071937903144e-06,
"loss": 0.9528936743736267,
"step": 212
},
{
"epoch": 0.6129496402877698,
"grad_norm": 0.3431066323853164,
"learning_rate": 1.6665364070058736e-06,
"loss": 1.089216709136963,
"step": 213
},
{
"epoch": 0.6158273381294964,
"grad_norm": 0.3096527786452577,
"learning_rate": 1.6629858197522535e-06,
"loss": 1.0500307083129883,
"step": 214
},
{
"epoch": 0.6187050359712231,
"grad_norm": 0.34740584906307037,
"learning_rate": 1.6594202563463149e-06,
"loss": 0.9973140954971313,
"step": 215
},
{
"epoch": 0.6215827338129496,
"grad_norm": 0.3076575246625187,
"learning_rate": 1.6558397973303851e-06,
"loss": 0.9394571781158447,
"step": 216
},
{
"epoch": 0.6244604316546762,
"grad_norm": 0.35489785566062343,
"learning_rate": 1.652244523583267e-06,
"loss": 0.9569211006164551,
"step": 217
},
{
"epoch": 0.6273381294964029,
"grad_norm": 0.33512033241700295,
"learning_rate": 1.6486345163184129e-06,
"loss": 1.0791332721710205,
"step": 218
},
{
"epoch": 0.6302158273381295,
"grad_norm": 0.3626683432890907,
"learning_rate": 1.6450098570820896e-06,
"loss": 1.0544092655181885,
"step": 219
},
{
"epoch": 0.6330935251798561,
"grad_norm": 0.358904663222277,
"learning_rate": 1.6413706277515373e-06,
"loss": 0.9803202152252197,
"step": 220
},
{
"epoch": 0.6359712230215827,
"grad_norm": 0.32815545381559164,
"learning_rate": 1.6377169105331182e-06,
"loss": 0.9604759216308594,
"step": 221
},
{
"epoch": 0.6388489208633094,
"grad_norm": 0.32597650541963474,
"learning_rate": 1.6340487879604617e-06,
"loss": 1.0064623355865479,
"step": 222
},
{
"epoch": 0.641726618705036,
"grad_norm": 0.3506857994251924,
"learning_rate": 1.630366342892598e-06,
"loss": 1.014646053314209,
"step": 223
},
{
"epoch": 0.6446043165467625,
"grad_norm": 0.41874730381325936,
"learning_rate": 1.626669658512088e-06,
"loss": 0.9256491661071777,
"step": 224
},
{
"epoch": 0.6474820143884892,
"grad_norm": 0.3188217704851316,
"learning_rate": 1.6229588183231434e-06,
"loss": 0.9941632151603699,
"step": 225
},
{
"epoch": 0.6503597122302158,
"grad_norm": 0.32011807732834047,
"learning_rate": 1.6192339061497413e-06,
"loss": 0.9773931503295898,
"step": 226
},
{
"epoch": 0.6532374100719425,
"grad_norm": 0.3189594924614036,
"learning_rate": 1.615495006133729e-06,
"loss": 0.9987149238586426,
"step": 227
},
{
"epoch": 0.6561151079136691,
"grad_norm": 0.35037906857078205,
"learning_rate": 1.6117422027329263e-06,
"loss": 0.9832175374031067,
"step": 228
},
{
"epoch": 0.6589928057553956,
"grad_norm": 0.38864609779113907,
"learning_rate": 1.6079755807192136e-06,
"loss": 1.0916314125061035,
"step": 229
},
{
"epoch": 0.6618705035971223,
"grad_norm": 0.30929668859135395,
"learning_rate": 1.604195225176621e-06,
"loss": 0.9629628658294678,
"step": 230
},
{
"epoch": 0.6647482014388489,
"grad_norm": 0.32671840835956706,
"learning_rate": 1.6004012214994035e-06,
"loss": 0.9343143701553345,
"step": 231
},
{
"epoch": 0.6676258992805756,
"grad_norm": 0.33641494062099064,
"learning_rate": 1.5965936553901136e-06,
"loss": 1.0556144714355469,
"step": 232
},
{
"epoch": 0.6705035971223021,
"grad_norm": 0.3187574882066994,
"learning_rate": 1.592772612857665e-06,
"loss": 0.9991135597229004,
"step": 233
},
{
"epoch": 0.6733812949640288,
"grad_norm": 0.3480834665064568,
"learning_rate": 1.5889381802153896e-06,
"loss": 1.0254430770874023,
"step": 234
},
{
"epoch": 0.6762589928057554,
"grad_norm": 0.3072999299525753,
"learning_rate": 1.585090444079087e-06,
"loss": 0.985275149345398,
"step": 235
},
{
"epoch": 0.679136690647482,
"grad_norm": 0.35851120910777423,
"learning_rate": 1.5812294913650694e-06,
"loss": 0.9904893636703491,
"step": 236
},
{
"epoch": 0.6820143884892086,
"grad_norm": 0.31773614133543254,
"learning_rate": 1.5773554092881984e-06,
"loss": 1.0499398708343506,
"step": 237
},
{
"epoch": 0.6848920863309352,
"grad_norm": 0.30921377977469555,
"learning_rate": 1.5734682853599122e-06,
"loss": 1.0339066982269287,
"step": 238
},
{
"epoch": 0.6877697841726619,
"grad_norm": 0.35671236366028325,
"learning_rate": 1.5695682073862525e-06,
"loss": 0.9532429575920105,
"step": 239
},
{
"epoch": 0.6906474820143885,
"grad_norm": 0.3015523412700019,
"learning_rate": 1.5656552634658776e-06,
"loss": 1.038594365119934,
"step": 240
},
{
"epoch": 0.6935251798561151,
"grad_norm": 0.36455740495219996,
"learning_rate": 1.561729541988076e-06,
"loss": 1.0890312194824219,
"step": 241
},
{
"epoch": 0.6964028776978417,
"grad_norm": 0.3680930663786755,
"learning_rate": 1.5577911316307658e-06,
"loss": 1.0601049661636353,
"step": 242
},
{
"epoch": 0.6992805755395683,
"grad_norm": 0.2880128205816018,
"learning_rate": 1.5538401213584948e-06,
"loss": 0.8997229337692261,
"step": 243
},
{
"epoch": 0.702158273381295,
"grad_norm": 0.32285821272462195,
"learning_rate": 1.549876600420429e-06,
"loss": 0.9955217242240906,
"step": 244
},
{
"epoch": 0.7050359712230215,
"grad_norm": 0.29115419774124135,
"learning_rate": 1.545900658348338e-06,
"loss": 0.8849923610687256,
"step": 245
},
{
"epoch": 0.7079136690647482,
"grad_norm": 0.2706455457776965,
"learning_rate": 1.5419123849545708e-06,
"loss": 0.9076135754585266,
"step": 246
},
{
"epoch": 0.7107913669064748,
"grad_norm": 0.36531081434638296,
"learning_rate": 1.5379118703300282e-06,
"loss": 1.0192983150482178,
"step": 247
},
{
"epoch": 0.7136690647482015,
"grad_norm": 0.35715016923044796,
"learning_rate": 1.533899204842128e-06,
"loss": 1.034571647644043,
"step": 248
},
{
"epoch": 0.7165467625899281,
"grad_norm": 0.30344023495404443,
"learning_rate": 1.529874479132763e-06,
"loss": 0.8733739256858826,
"step": 249
},
{
"epoch": 0.7194244604316546,
"grad_norm": 0.33635279495274495,
"learning_rate": 1.5258377841162533e-06,
"loss": 0.9661943316459656,
"step": 250
},
{
"epoch": 0.7223021582733813,
"grad_norm": 0.3375538070903443,
"learning_rate": 1.5217892109772935e-06,
"loss": 1.0986987352371216,
"step": 251
},
{
"epoch": 0.7251798561151079,
"grad_norm": 0.2662157952853344,
"learning_rate": 1.5177288511688927e-06,
"loss": 0.9541377425193787,
"step": 252
},
{
"epoch": 0.7280575539568346,
"grad_norm": 0.30886908842504907,
"learning_rate": 1.5136567964103076e-06,
"loss": 1.0753300189971924,
"step": 253
},
{
"epoch": 0.7309352517985611,
"grad_norm": 0.30701154449906404,
"learning_rate": 1.5095731386849723e-06,
"loss": 0.9976100921630859,
"step": 254
},
{
"epoch": 0.7338129496402878,
"grad_norm": 0.303376410309656,
"learning_rate": 1.5054779702384198e-06,
"loss": 1.0058211088180542,
"step": 255
},
{
"epoch": 0.7366906474820144,
"grad_norm": 0.3652950100731028,
"learning_rate": 1.5013713835761975e-06,
"loss": 1.0633628368377686,
"step": 256
},
{
"epoch": 0.739568345323741,
"grad_norm": 0.3390438283446466,
"learning_rate": 1.497253471461779e-06,
"loss": 0.8934162259101868,
"step": 257
},
{
"epoch": 0.7424460431654676,
"grad_norm": 0.3246861490189164,
"learning_rate": 1.493124326914467e-06,
"loss": 1.0370798110961914,
"step": 258
},
{
"epoch": 0.7453237410071942,
"grad_norm": 0.30533785722726153,
"learning_rate": 1.4889840432072945e-06,
"loss": 0.9263877868652344,
"step": 259
},
{
"epoch": 0.7482014388489209,
"grad_norm": 0.31370371579277184,
"learning_rate": 1.484832713864915e-06,
"loss": 0.9624022245407104,
"step": 260
},
{
"epoch": 0.7510791366906475,
"grad_norm": 0.32008108759680487,
"learning_rate": 1.4806704326614918e-06,
"loss": 0.8735676407814026,
"step": 261
},
{
"epoch": 0.753956834532374,
"grad_norm": 0.3566203918476789,
"learning_rate": 1.4764972936185795e-06,
"loss": 1.0989207029342651,
"step": 262
},
{
"epoch": 0.7568345323741007,
"grad_norm": 0.36407543844243995,
"learning_rate": 1.4723133910029996e-06,
"loss": 0.9619901180267334,
"step": 263
},
{
"epoch": 0.7597122302158273,
"grad_norm": 0.29266238338520917,
"learning_rate": 1.4681188193247115e-06,
"loss": 0.9620180130004883,
"step": 264
},
{
"epoch": 0.762589928057554,
"grad_norm": 0.32115744502647553,
"learning_rate": 1.4639136733346776e-06,
"loss": 0.9723782539367676,
"step": 265
},
{
"epoch": 0.7654676258992805,
"grad_norm": 0.32955472439646183,
"learning_rate": 1.4596980480227222e-06,
"loss": 1.01808762550354,
"step": 266
},
{
"epoch": 0.7683453237410072,
"grad_norm": 0.30150737980380415,
"learning_rate": 1.4554720386153869e-06,
"loss": 1.0717837810516357,
"step": 267
},
{
"epoch": 0.7712230215827338,
"grad_norm": 0.2886477892998947,
"learning_rate": 1.4512357405737797e-06,
"loss": 0.8863840699195862,
"step": 268
},
{
"epoch": 0.7741007194244605,
"grad_norm": 0.3310173561528487,
"learning_rate": 1.4469892495914172e-06,
"loss": 0.964940071105957,
"step": 269
},
{
"epoch": 0.7769784172661871,
"grad_norm": 0.2856238879913019,
"learning_rate": 1.4427326615920641e-06,
"loss": 0.9396013021469116,
"step": 270
},
{
"epoch": 0.7798561151079136,
"grad_norm": 0.2842593394923139,
"learning_rate": 1.4384660727275662e-06,
"loss": 1.0147062540054321,
"step": 271
},
{
"epoch": 0.7827338129496403,
"grad_norm": 0.3377858534929305,
"learning_rate": 1.4341895793756781e-06,
"loss": 1.0019702911376953,
"step": 272
},
{
"epoch": 0.7856115107913669,
"grad_norm": 0.28919748050640776,
"learning_rate": 1.4299032781378863e-06,
"loss": 0.9657357931137085,
"step": 273
},
{
"epoch": 0.7884892086330936,
"grad_norm": 0.2761094660745925,
"learning_rate": 1.4256072658372278e-06,
"loss": 0.9581419229507446,
"step": 274
},
{
"epoch": 0.7913669064748201,
"grad_norm": 0.32811507081877733,
"learning_rate": 1.4213016395161016e-06,
"loss": 0.9768601655960083,
"step": 275
},
{
"epoch": 0.7942446043165468,
"grad_norm": 0.3197698268118257,
"learning_rate": 1.416986496434077e-06,
"loss": 1.0802795886993408,
"step": 276
},
{
"epoch": 0.7971223021582734,
"grad_norm": 0.32014493538109184,
"learning_rate": 1.412661934065698e-06,
"loss": 1.129173994064331,
"step": 277
},
{
"epoch": 0.8,
"grad_norm": 0.3408260667112233,
"learning_rate": 1.4083280500982796e-06,
"loss": 1.0172650814056396,
"step": 278
},
{
"epoch": 0.8028776978417266,
"grad_norm": 0.2994608201736648,
"learning_rate": 1.4039849424297022e-06,
"loss": 1.002464771270752,
"step": 279
},
{
"epoch": 0.8057553956834532,
"grad_norm": 0.27936016058449986,
"learning_rate": 1.3996327091661994e-06,
"loss": 0.9435924887657166,
"step": 280
},
{
"epoch": 0.8086330935251799,
"grad_norm": 0.3421589464369171,
"learning_rate": 1.3952714486201433e-06,
"loss": 0.9648728370666504,
"step": 281
},
{
"epoch": 0.8115107913669065,
"grad_norm": 0.3027125759086274,
"learning_rate": 1.3909012593078223e-06,
"loss": 1.0883413553237915,
"step": 282
},
{
"epoch": 0.814388489208633,
"grad_norm": 0.2718451517981759,
"learning_rate": 1.3865222399472154e-06,
"loss": 0.9606098532676697,
"step": 283
},
{
"epoch": 0.8172661870503597,
"grad_norm": 0.3439278935498304,
"learning_rate": 1.382134489455765e-06,
"loss": 1.006915807723999,
"step": 284
},
{
"epoch": 0.8201438848920863,
"grad_norm": 0.25579346143996035,
"learning_rate": 1.3777381069481396e-06,
"loss": 0.9337391257286072,
"step": 285
},
{
"epoch": 0.823021582733813,
"grad_norm": 0.3050859668016162,
"learning_rate": 1.373333191733995e-06,
"loss": 0.9900962710380554,
"step": 286
},
{
"epoch": 0.8258992805755395,
"grad_norm": 0.30270443732056235,
"learning_rate": 1.3689198433157332e-06,
"loss": 0.8408849835395813,
"step": 287
},
{
"epoch": 0.8287769784172662,
"grad_norm": 0.32722776782068325,
"learning_rate": 1.3644981613862523e-06,
"loss": 0.9334912300109863,
"step": 288
},
{
"epoch": 0.8316546762589928,
"grad_norm": 0.30271696679801074,
"learning_rate": 1.360068245826697e-06,
"loss": 0.9546651840209961,
"step": 289
},
{
"epoch": 0.8345323741007195,
"grad_norm": 0.30274211349049623,
"learning_rate": 1.3556301967041997e-06,
"loss": 0.9813221096992493,
"step": 290
},
{
"epoch": 0.837410071942446,
"grad_norm": 0.2635388567144702,
"learning_rate": 1.351184114269622e-06,
"loss": 0.9474866390228271,
"step": 291
},
{
"epoch": 0.8402877697841726,
"grad_norm": 0.3010633615089385,
"learning_rate": 1.34673009895529e-06,
"loss": 0.986327588558197,
"step": 292
},
{
"epoch": 0.8431654676258993,
"grad_norm": 0.3250052939342708,
"learning_rate": 1.3422682513727243e-06,
"loss": 0.9753819704055786,
"step": 293
},
{
"epoch": 0.8460431654676259,
"grad_norm": 0.33588340814315554,
"learning_rate": 1.3377986723103692e-06,
"loss": 0.9891970753669739,
"step": 294
},
{
"epoch": 0.8489208633093526,
"grad_norm": 0.31646789049784285,
"learning_rate": 1.3333214627313138e-06,
"loss": 0.9514651298522949,
"step": 295
},
{
"epoch": 0.8517985611510791,
"grad_norm": 0.2813259845708673,
"learning_rate": 1.3288367237710139e-06,
"loss": 0.9831069707870483,
"step": 296
},
{
"epoch": 0.8546762589928057,
"grad_norm": 0.30604078940680873,
"learning_rate": 1.3243445567350046e-06,
"loss": 0.9211512207984924,
"step": 297
},
{
"epoch": 0.8575539568345324,
"grad_norm": 0.3167520608936244,
"learning_rate": 1.319845063096615e-06,
"loss": 1.0003859996795654,
"step": 298
},
{
"epoch": 0.860431654676259,
"grad_norm": 0.31829945664441645,
"learning_rate": 1.3153383444946735e-06,
"loss": 0.8789474964141846,
"step": 299
},
{
"epoch": 0.8633093525179856,
"grad_norm": 0.31495160151302437,
"learning_rate": 1.3108245027312128e-06,
"loss": 1.0840336084365845,
"step": 300
},
{
"epoch": 0.8661870503597122,
"grad_norm": 0.30915276693739346,
"learning_rate": 1.3063036397691708e-06,
"loss": 1.0036927461624146,
"step": 301
},
{
"epoch": 0.8690647482014389,
"grad_norm": 0.2941453011820651,
"learning_rate": 1.3017758577300862e-06,
"loss": 1.0740652084350586,
"step": 302
},
{
"epoch": 0.8719424460431655,
"grad_norm": 0.29455577634561325,
"learning_rate": 1.297241258891793e-06,
"loss": 0.989548921585083,
"step": 303
},
{
"epoch": 0.874820143884892,
"grad_norm": 0.3299592819973091,
"learning_rate": 1.2926999456861096e-06,
"loss": 1.0820207595825195,
"step": 304
},
{
"epoch": 0.8776978417266187,
"grad_norm": 0.2673487326485298,
"learning_rate": 1.2881520206965243e-06,
"loss": 0.9292148351669312,
"step": 305
},
{
"epoch": 0.8805755395683453,
"grad_norm": 0.28532631258001817,
"learning_rate": 1.2835975866558792e-06,
"loss": 0.9342219233512878,
"step": 306
},
{
"epoch": 0.883453237410072,
"grad_norm": 0.3025210511532024,
"learning_rate": 1.2790367464440484e-06,
"loss": 0.9670717120170593,
"step": 307
},
{
"epoch": 0.8863309352517985,
"grad_norm": 0.28896814534982135,
"learning_rate": 1.2744696030856153e-06,
"loss": 0.9335446357727051,
"step": 308
},
{
"epoch": 0.8892086330935252,
"grad_norm": 0.3084903177297785,
"learning_rate": 1.2698962597475445e-06,
"loss": 0.9629756808280945,
"step": 309
},
{
"epoch": 0.8920863309352518,
"grad_norm": 0.32422156062771545,
"learning_rate": 1.2653168197368519e-06,
"loss": 0.9787018299102783,
"step": 310
},
{
"epoch": 0.8949640287769784,
"grad_norm": 0.30159646505494975,
"learning_rate": 1.2607313864982697e-06,
"loss": 0.9642415642738342,
"step": 311
},
{
"epoch": 0.897841726618705,
"grad_norm": 0.31856979960613646,
"learning_rate": 1.2561400636119124e-06,
"loss": 1.0449435710906982,
"step": 312
},
{
"epoch": 0.9007194244604316,
"grad_norm": 0.3458241524079836,
"learning_rate": 1.2515429547909346e-06,
"loss": 1.0429253578186035,
"step": 313
},
{
"epoch": 0.9035971223021583,
"grad_norm": 0.30946600198200386,
"learning_rate": 1.246940163879189e-06,
"loss": 1.0028799772262573,
"step": 314
},
{
"epoch": 0.9064748201438849,
"grad_norm": 0.31702914654332653,
"learning_rate": 1.2423317948488813e-06,
"loss": 0.9168355464935303,
"step": 315
},
{
"epoch": 0.9093525179856116,
"grad_norm": 0.2568478715797543,
"learning_rate": 1.23771795179822e-06,
"loss": 0.9950739145278931,
"step": 316
},
{
"epoch": 0.9122302158273381,
"grad_norm": 0.31321859143517206,
"learning_rate": 1.233098738949067e-06,
"loss": 1.0762598514556885,
"step": 317
},
{
"epoch": 0.9151079136690647,
"grad_norm": 0.30023123590979206,
"learning_rate": 1.2284742606445817e-06,
"loss": 0.9474934339523315,
"step": 318
},
{
"epoch": 0.9179856115107914,
"grad_norm": 0.29274105879380363,
"learning_rate": 1.2238446213468653e-06,
"loss": 0.9199013710021973,
"step": 319
},
{
"epoch": 0.920863309352518,
"grad_norm": 0.3343568620635621,
"learning_rate": 1.2192099256345999e-06,
"loss": 1.0041630268096924,
"step": 320
},
{
"epoch": 0.9237410071942446,
"grad_norm": 0.3310327147204012,
"learning_rate": 1.2145702782006862e-06,
"loss": 0.8189488649368286,
"step": 321
},
{
"epoch": 0.9266187050359712,
"grad_norm": 0.2594459346828645,
"learning_rate": 1.2099257838498797e-06,
"loss": 0.8715246915817261,
"step": 322
},
{
"epoch": 0.9294964028776979,
"grad_norm": 0.34108254418878664,
"learning_rate": 1.205276547496423e-06,
"loss": 0.9883395433425903,
"step": 323
},
{
"epoch": 0.9323741007194245,
"grad_norm": 0.2900507060656894,
"learning_rate": 1.200622674161675e-06,
"loss": 1.0369722843170166,
"step": 324
},
{
"epoch": 0.935251798561151,
"grad_norm": 0.2793616872911977,
"learning_rate": 1.195964268971739e-06,
"loss": 0.923148512840271,
"step": 325
},
{
"epoch": 0.9381294964028777,
"grad_norm": 0.31499649360389437,
"learning_rate": 1.191301437155088e-06,
"loss": 0.9886481165885925,
"step": 326
},
{
"epoch": 0.9410071942446043,
"grad_norm": 0.3421793579841603,
"learning_rate": 1.186634284040189e-06,
"loss": 1.049983263015747,
"step": 327
},
{
"epoch": 0.943884892086331,
"grad_norm": 0.2770772261448908,
"learning_rate": 1.1819629150531216e-06,
"loss": 0.9720487594604492,
"step": 328
},
{
"epoch": 0.9467625899280575,
"grad_norm": 0.31715029343065254,
"learning_rate": 1.1772874357151978e-06,
"loss": 0.9858945608139038,
"step": 329
},
{
"epoch": 0.9496402877697842,
"grad_norm": 0.27382247211499205,
"learning_rate": 1.1726079516405775e-06,
"loss": 0.8920480012893677,
"step": 330
},
{
"epoch": 0.9525179856115108,
"grad_norm": 0.3233112136350598,
"learning_rate": 1.1679245685338845e-06,
"loss": 1.059034824371338,
"step": 331
},
{
"epoch": 0.9553956834532374,
"grad_norm": 0.3014575533302111,
"learning_rate": 1.1632373921878167e-06,
"loss": 0.9916867017745972,
"step": 332
},
{
"epoch": 0.958273381294964,
"grad_norm": 0.2733813212594252,
"learning_rate": 1.1585465284807575e-06,
"loss": 1.0110840797424316,
"step": 333
},
{
"epoch": 0.9611510791366906,
"grad_norm": 0.28448057189574405,
"learning_rate": 1.1538520833743843e-06,
"loss": 0.9681780338287354,
"step": 334
},
{
"epoch": 0.9640287769784173,
"grad_norm": 0.28115226388230347,
"learning_rate": 1.1491541629112744e-06,
"loss": 0.9256088733673096,
"step": 335
},
{
"epoch": 0.9669064748201439,
"grad_norm": 0.29939601958186174,
"learning_rate": 1.1444528732125096e-06,
"loss": 0.9332914352416992,
"step": 336
},
{
"epoch": 0.9697841726618706,
"grad_norm": 0.32298682544185786,
"learning_rate": 1.1397483204752789e-06,
"loss": 0.9759551882743835,
"step": 337
},
{
"epoch": 0.9726618705035971,
"grad_norm": 0.32833751314810994,
"learning_rate": 1.1350406109704804e-06,
"loss": 0.955263614654541,
"step": 338
},
{
"epoch": 0.9755395683453237,
"grad_norm": 0.3075747126988841,
"learning_rate": 1.1303298510403204e-06,
"loss": 1.0056906938552856,
"step": 339
},
{
"epoch": 0.9784172661870504,
"grad_norm": 0.3107177190802721,
"learning_rate": 1.1256161470959105e-06,
"loss": 1.0631227493286133,
"step": 340
},
{
"epoch": 0.981294964028777,
"grad_norm": 0.26705526854232686,
"learning_rate": 1.1208996056148645e-06,
"loss": 0.901911735534668,
"step": 341
},
{
"epoch": 0.9841726618705036,
"grad_norm": 0.35096026513434014,
"learning_rate": 1.116180333138894e-06,
"loss": 0.9325671195983887,
"step": 342
},
{
"epoch": 0.9870503597122302,
"grad_norm": 0.3163252628308116,
"learning_rate": 1.1114584362714004e-06,
"loss": 0.9670236706733704,
"step": 343
},
{
"epoch": 0.9899280575539569,
"grad_norm": 0.3187089894926652,
"learning_rate": 1.1067340216750666e-06,
"loss": 0.9988418221473694,
"step": 344
},
{
"epoch": 0.9928057553956835,
"grad_norm": 0.31077878018423455,
"learning_rate": 1.1020071960694498e-06,
"loss": 0.9381593465805054,
"step": 345
},
{
"epoch": 0.99568345323741,
"grad_norm": 0.3019571122091896,
"learning_rate": 1.0972780662285681e-06,
"loss": 0.993405818939209,
"step": 346
},
{
"epoch": 0.9985611510791367,
"grad_norm": 0.30414175155215467,
"learning_rate": 1.0925467389784904e-06,
"loss": 0.9964547753334045,
"step": 347
},
{
"epoch": 1.0,
"grad_norm": 0.39789673725223623,
"learning_rate": 1.0878133211949227e-06,
"loss": 0.8202004432678223,
"step": 348
},
{
"epoch": 1.0028776978417266,
"grad_norm": 0.2656098302983397,
"learning_rate": 1.0830779198007942e-06,
"loss": 0.9116101264953613,
"step": 349
},
{
"epoch": 1.0057553956834533,
"grad_norm": 0.3008389282200421,
"learning_rate": 1.0783406417638417e-06,
"loss": 0.9478936791419983,
"step": 350
},
{
"epoch": 1.00863309352518,
"grad_norm": 0.35871582917260014,
"learning_rate": 1.0736015940941926e-06,
"loss": 0.8595709800720215,
"step": 351
},
{
"epoch": 1.0115107913669066,
"grad_norm": 0.31305380574483693,
"learning_rate": 1.0688608838419494e-06,
"loss": 0.8735829591751099,
"step": 352
},
{
"epoch": 1.014388489208633,
"grad_norm": 0.3548820441522539,
"learning_rate": 1.0641186180947708e-06,
"loss": 0.9741727113723755,
"step": 353
},
{
"epoch": 1.0172661870503596,
"grad_norm": 0.36321462939223775,
"learning_rate": 1.059374903975451e-06,
"loss": 0.8974572420120239,
"step": 354
},
{
"epoch": 1.0201438848920863,
"grad_norm": 0.29496183204532933,
"learning_rate": 1.0546298486395032e-06,
"loss": 0.9210361242294312,
"step": 355
},
{
"epoch": 1.023021582733813,
"grad_norm": 0.2928921022780455,
"learning_rate": 1.0498835592727356e-06,
"loss": 0.9430476427078247,
"step": 356
},
{
"epoch": 1.0258992805755396,
"grad_norm": 0.3049343344878922,
"learning_rate": 1.0451361430888335e-06,
"loss": 0.861330509185791,
"step": 357
},
{
"epoch": 1.0287769784172662,
"grad_norm": 0.329234989560513,
"learning_rate": 1.0403877073269346e-06,
"loss": 0.9548070430755615,
"step": 358
},
{
"epoch": 1.0316546762589929,
"grad_norm": 0.2927660935027829,
"learning_rate": 1.0356383592492083e-06,
"loss": 0.9394206404685974,
"step": 359
},
{
"epoch": 1.0345323741007195,
"grad_norm": 0.28029486425646316,
"learning_rate": 1.0308882061384322e-06,
"loss": 0.940388560295105,
"step": 360
},
{
"epoch": 1.037410071942446,
"grad_norm": 0.3277189443862227,
"learning_rate": 1.0261373552955689e-06,
"loss": 1.0485488176345825,
"step": 361
},
{
"epoch": 1.0402877697841726,
"grad_norm": 0.3336979554860064,
"learning_rate": 1.021385914037341e-06,
"loss": 1.006148338317871,
"step": 362
},
{
"epoch": 1.0431654676258992,
"grad_norm": 0.3034246503039526,
"learning_rate": 1.0166339896938096e-06,
"loss": 1.040244460105896,
"step": 363
},
{
"epoch": 1.0460431654676259,
"grad_norm": 0.2909854917188287,
"learning_rate": 1.0118816896059472e-06,
"loss": 1.0620298385620117,
"step": 364
},
{
"epoch": 1.0489208633093525,
"grad_norm": 0.3342829322918414,
"learning_rate": 1.0071291211232142e-06,
"loss": 1.0369703769683838,
"step": 365
},
{
"epoch": 1.0517985611510792,
"grad_norm": 0.3620459894772739,
"learning_rate": 1.0023763916011337e-06,
"loss": 1.005780816078186,
"step": 366
},
{
"epoch": 1.0546762589928058,
"grad_norm": 0.3245799271587319,
"learning_rate": 9.976236083988662e-07,
"loss": 1.0978028774261475,
"step": 367
},
{
"epoch": 1.0575539568345325,
"grad_norm": 0.2906620053944161,
"learning_rate": 9.928708788767857e-07,
"loss": 1.0192337036132812,
"step": 368
},
{
"epoch": 1.0604316546762589,
"grad_norm": 0.2788940484645077,
"learning_rate": 9.881183103940525e-07,
"loss": 1.0336426496505737,
"step": 369
},
{
"epoch": 1.0633093525179855,
"grad_norm": 0.2646864305544589,
"learning_rate": 9.833660103061903e-07,
"loss": 0.9359861612319946,
"step": 370
},
{
"epoch": 1.0661870503597122,
"grad_norm": 0.3024535696584821,
"learning_rate": 9.78614085962659e-07,
"loss": 0.9596098065376282,
"step": 371
},
{
"epoch": 1.0690647482014388,
"grad_norm": 0.3433854130018685,
"learning_rate": 9.738626447044315e-07,
"loss": 0.9648246169090271,
"step": 372
},
{
"epoch": 1.0719424460431655,
"grad_norm": 0.29727053840895096,
"learning_rate": 9.691117938615677e-07,
"loss": 0.937362551689148,
"step": 373
},
{
"epoch": 1.074820143884892,
"grad_norm": 0.3148473300114735,
"learning_rate": 9.643616407507916e-07,
"loss": 1.0278003215789795,
"step": 374
},
{
"epoch": 1.0776978417266188,
"grad_norm": 0.34189586023197116,
"learning_rate": 9.596122926730653e-07,
"loss": 1.053139090538025,
"step": 375
},
{
"epoch": 1.0805755395683454,
"grad_norm": 0.2979805327926045,
"learning_rate": 9.548638569111664e-07,
"loss": 0.9690728187561035,
"step": 376
},
{
"epoch": 1.083453237410072,
"grad_norm": 0.32530063715847013,
"learning_rate": 9.501164407272641e-07,
"loss": 0.9638134837150574,
"step": 377
},
{
"epoch": 1.0863309352517985,
"grad_norm": 0.28340436102152416,
"learning_rate": 9.453701513604971e-07,
"loss": 1.0154237747192383,
"step": 378
},
{
"epoch": 1.0892086330935251,
"grad_norm": 0.3344518694998693,
"learning_rate": 9.406250960245492e-07,
"loss": 1.0023622512817383,
"step": 379
},
{
"epoch": 1.0920863309352518,
"grad_norm": 0.3057560258941549,
"learning_rate": 9.358813819052293e-07,
"loss": 0.943859338760376,
"step": 380
},
{
"epoch": 1.0949640287769784,
"grad_norm": 0.31581539413489,
"learning_rate": 9.311391161580505e-07,
"loss": 0.9440896511077881,
"step": 381
},
{
"epoch": 1.097841726618705,
"grad_norm": 0.28253845985432785,
"learning_rate": 9.263984059058073e-07,
"loss": 0.9603610634803772,
"step": 382
},
{
"epoch": 1.1007194244604317,
"grad_norm": 0.3743060929428334,
"learning_rate": 9.216593582361584e-07,
"loss": 0.9380893707275391,
"step": 383
},
{
"epoch": 1.1035971223021583,
"grad_norm": 0.29782880129060824,
"learning_rate": 9.169220801992054e-07,
"loss": 0.9593515396118164,
"step": 384
},
{
"epoch": 1.106474820143885,
"grad_norm": 0.25830569907542367,
"learning_rate": 9.121866788050772e-07,
"loss": 0.9210997819900513,
"step": 385
},
{
"epoch": 1.1093525179856114,
"grad_norm": 0.29300460156324154,
"learning_rate": 9.074532610215097e-07,
"loss": 0.9233313798904419,
"step": 386
},
{
"epoch": 1.112230215827338,
"grad_norm": 0.2922799068070249,
"learning_rate": 9.027219337714323e-07,
"loss": 0.9572200179100037,
"step": 387
},
{
"epoch": 1.1151079136690647,
"grad_norm": 0.294905286599111,
"learning_rate": 8.979928039305502e-07,
"loss": 1.0306824445724487,
"step": 388
},
{
"epoch": 1.1179856115107913,
"grad_norm": 0.3096443969458927,
"learning_rate": 8.932659783249332e-07,
"loss": 0.9011950492858887,
"step": 389
},
{
"epoch": 1.120863309352518,
"grad_norm": 0.269705652072412,
"learning_rate": 8.885415637285997e-07,
"loss": 0.9103861451148987,
"step": 390
},
{
"epoch": 1.1237410071942446,
"grad_norm": 0.3045551669252975,
"learning_rate": 8.838196668611056e-07,
"loss": 0.9500089883804321,
"step": 391
},
{
"epoch": 1.1266187050359713,
"grad_norm": 0.3584458914570723,
"learning_rate": 8.791003943851352e-07,
"loss": 1.0625544786453247,
"step": 392
},
{
"epoch": 1.129496402877698,
"grad_norm": 0.40204111666436204,
"learning_rate": 8.743838529040896e-07,
"loss": 1.0451273918151855,
"step": 393
},
{
"epoch": 1.1323741007194243,
"grad_norm": 0.2801446258828758,
"learning_rate": 8.696701489596796e-07,
"loss": 0.8780025839805603,
"step": 394
},
{
"epoch": 1.135251798561151,
"grad_norm": 0.30848025340500973,
"learning_rate": 8.649593890295195e-07,
"loss": 0.853165328502655,
"step": 395
},
{
"epoch": 1.1381294964028776,
"grad_norm": 0.2774601092911939,
"learning_rate": 8.602516795247212e-07,
"loss": 0.997830331325531,
"step": 396
},
{
"epoch": 1.1410071942446043,
"grad_norm": 0.3339281391141689,
"learning_rate": 8.555471267874904e-07,
"loss": 1.0442490577697754,
"step": 397
},
{
"epoch": 1.143884892086331,
"grad_norm": 0.2740872772386324,
"learning_rate": 8.508458370887254e-07,
"loss": 0.9518193602561951,
"step": 398
},
{
"epoch": 1.1467625899280576,
"grad_norm": 0.2720117171082711,
"learning_rate": 8.461479166256155e-07,
"loss": 0.8949469327926636,
"step": 399
},
{
"epoch": 1.1496402877697842,
"grad_norm": 0.296704642333982,
"learning_rate": 8.414534715192424e-07,
"loss": 1.002563714981079,
"step": 400
},
{
"epoch": 1.1525179856115109,
"grad_norm": 0.3435410162561758,
"learning_rate": 8.367626078121836e-07,
"loss": 0.972290575504303,
"step": 401
},
{
"epoch": 1.1553956834532375,
"grad_norm": 0.3129884619450547,
"learning_rate": 8.320754314661158e-07,
"loss": 1.01462984085083,
"step": 402
},
{
"epoch": 1.158273381294964,
"grad_norm": 0.30417813847501757,
"learning_rate": 8.273920483594224e-07,
"loss": 0.9698868989944458,
"step": 403
},
{
"epoch": 1.1611510791366906,
"grad_norm": 0.3020411325830846,
"learning_rate": 8.227125642848023e-07,
"loss": 0.9128695726394653,
"step": 404
},
{
"epoch": 1.1640287769784172,
"grad_norm": 0.346879261409609,
"learning_rate": 8.180370849468783e-07,
"loss": 0.9776325821876526,
"step": 405
},
{
"epoch": 1.1669064748201439,
"grad_norm": 0.36045310471755976,
"learning_rate": 8.133657159598107e-07,
"loss": 1.0711374282836914,
"step": 406
},
{
"epoch": 1.1697841726618705,
"grad_norm": 0.33405600358385434,
"learning_rate": 8.086985628449118e-07,
"loss": 1.01808500289917,
"step": 407
},
{
"epoch": 1.1726618705035972,
"grad_norm": 0.33784058532809186,
"learning_rate": 8.040357310282614e-07,
"loss": 1.073177695274353,
"step": 408
},
{
"epoch": 1.1755395683453238,
"grad_norm": 0.31432792819184735,
"learning_rate": 7.993773258383251e-07,
"loss": 0.9766973257064819,
"step": 409
},
{
"epoch": 1.1784172661870504,
"grad_norm": 0.29143664503321964,
"learning_rate": 7.94723452503577e-07,
"loss": 0.8510106801986694,
"step": 410
},
{
"epoch": 1.181294964028777,
"grad_norm": 0.3117754281019984,
"learning_rate": 7.900742161501203e-07,
"loss": 0.9605945348739624,
"step": 411
},
{
"epoch": 1.1841726618705035,
"grad_norm": 0.3083610748282882,
"learning_rate": 7.854297217993138e-07,
"loss": 1.0148074626922607,
"step": 412
},
{
"epoch": 1.1870503597122302,
"grad_norm": 0.3292067324035066,
"learning_rate": 7.807900743654003e-07,
"loss": 1.0121517181396484,
"step": 413
},
{
"epoch": 1.1899280575539568,
"grad_norm": 0.2906819013001279,
"learning_rate": 7.761553786531344e-07,
"loss": 0.9553067684173584,
"step": 414
},
{
"epoch": 1.1928057553956835,
"grad_norm": 0.25709992178123586,
"learning_rate": 7.71525739355418e-07,
"loss": 0.8994815945625305,
"step": 415
},
{
"epoch": 1.19568345323741,
"grad_norm": 0.36452886143608954,
"learning_rate": 7.669012610509332e-07,
"loss": 0.953561544418335,
"step": 416
},
{
"epoch": 1.1985611510791367,
"grad_norm": 0.3247835868196829,
"learning_rate": 7.622820482017803e-07,
"loss": 0.9593473076820374,
"step": 417
},
{
"epoch": 1.2014388489208634,
"grad_norm": 0.2805164760032836,
"learning_rate": 7.57668205151119e-07,
"loss": 0.9459452629089355,
"step": 418
},
{
"epoch": 1.2043165467625898,
"grad_norm": 0.28523487135593184,
"learning_rate": 7.53059836120811e-07,
"loss": 0.9797439575195312,
"step": 419
},
{
"epoch": 1.2071942446043165,
"grad_norm": 0.3003875524590878,
"learning_rate": 7.484570452090654e-07,
"loss": 0.9212760925292969,
"step": 420
},
{
"epoch": 1.210071942446043,
"grad_norm": 0.34132960956027913,
"learning_rate": 7.438599363880873e-07,
"loss": 1.0429980754852295,
"step": 421
},
{
"epoch": 1.2129496402877697,
"grad_norm": 0.30371204747015557,
"learning_rate": 7.3926861350173e-07,
"loss": 0.9603173732757568,
"step": 422
},
{
"epoch": 1.2158273381294964,
"grad_norm": 0.3162676331919284,
"learning_rate": 7.346831802631485e-07,
"loss": 1.010259985923767,
"step": 423
},
{
"epoch": 1.218705035971223,
"grad_norm": 0.335784603934896,
"learning_rate": 7.301037402524554e-07,
"loss": 0.9941245913505554,
"step": 424
},
{
"epoch": 1.2215827338129497,
"grad_norm": 0.34689945542903367,
"learning_rate": 7.255303969143847e-07,
"loss": 0.9076559543609619,
"step": 425
},
{
"epoch": 1.2244604316546763,
"grad_norm": 0.30699564205037894,
"learning_rate": 7.209632535559517e-07,
"loss": 0.9393267035484314,
"step": 426
},
{
"epoch": 1.227338129496403,
"grad_norm": 0.3645881875578552,
"learning_rate": 7.164024133441209e-07,
"loss": 1.0797785520553589,
"step": 427
},
{
"epoch": 1.2302158273381294,
"grad_norm": 0.30842152383790683,
"learning_rate": 7.118479793034757e-07,
"loss": 0.8502181172370911,
"step": 428
},
{
"epoch": 1.233093525179856,
"grad_norm": 0.2837918860269475,
"learning_rate": 7.073000543138903e-07,
"loss": 0.8781344294548035,
"step": 429
},
{
"epoch": 1.2359712230215827,
"grad_norm": 0.30258666124852246,
"learning_rate": 7.027587411082068e-07,
"loss": 0.9787595272064209,
"step": 430
},
{
"epoch": 1.2388489208633093,
"grad_norm": 0.32255457306195484,
"learning_rate": 6.98224142269914e-07,
"loss": 1.0447101593017578,
"step": 431
},
{
"epoch": 1.241726618705036,
"grad_norm": 0.2842145651170118,
"learning_rate": 6.936963602308296e-07,
"loss": 0.8477309942245483,
"step": 432
},
{
"epoch": 1.2446043165467626,
"grad_norm": 0.2857078174523759,
"learning_rate": 6.891754972687872e-07,
"loss": 0.973019003868103,
"step": 433
},
{
"epoch": 1.2474820143884893,
"grad_norm": 0.26675519419868937,
"learning_rate": 6.846616555053265e-07,
"loss": 0.8788484930992126,
"step": 434
},
{
"epoch": 1.2503597122302157,
"grad_norm": 0.2722315460093143,
"learning_rate": 6.80154936903385e-07,
"loss": 0.8963809013366699,
"step": 435
},
{
"epoch": 1.2532374100719426,
"grad_norm": 0.321353554344839,
"learning_rate": 6.756554432649952e-07,
"loss": 0.9304237365722656,
"step": 436
},
{
"epoch": 1.256115107913669,
"grad_norm": 0.8802677887155806,
"learning_rate": 6.711632762289863e-07,
"loss": 0.9569498300552368,
"step": 437
},
{
"epoch": 1.2589928057553956,
"grad_norm": 0.3027788818991336,
"learning_rate": 6.666785372686862e-07,
"loss": 0.9950339198112488,
"step": 438
},
{
"epoch": 1.2618705035971223,
"grad_norm": 0.3278298564206992,
"learning_rate": 6.622013276896309e-07,
"loss": 1.0428767204284668,
"step": 439
},
{
"epoch": 1.264748201438849,
"grad_norm": 0.322273469382939,
"learning_rate": 6.577317486272756e-07,
"loss": 1.0519962310791016,
"step": 440
},
{
"epoch": 1.2676258992805756,
"grad_norm": 0.27000899918490673,
"learning_rate": 6.5326990104471e-07,
"loss": 0.9430403709411621,
"step": 441
},
{
"epoch": 1.2705035971223022,
"grad_norm": 0.2733597667465118,
"learning_rate": 6.488158857303778e-07,
"loss": 0.8923604488372803,
"step": 442
},
{
"epoch": 1.2733812949640289,
"grad_norm": 0.3379057723615061,
"learning_rate": 6.443698032958003e-07,
"loss": 0.8423130512237549,
"step": 443
},
{
"epoch": 1.2762589928057553,
"grad_norm": 0.298352488485131,
"learning_rate": 6.399317541733029e-07,
"loss": 0.8984063863754272,
"step": 444
},
{
"epoch": 1.2791366906474821,
"grad_norm": 0.3079552906979132,
"learning_rate": 6.355018386137474e-07,
"loss": 1.0057708024978638,
"step": 445
},
{
"epoch": 1.2820143884892086,
"grad_norm": 0.3494450738277216,
"learning_rate": 6.310801566842671e-07,
"loss": 1.0255926847457886,
"step": 446
},
{
"epoch": 1.2848920863309352,
"grad_norm": 0.32073469498291907,
"learning_rate": 6.266668082660051e-07,
"loss": 0.9159607291221619,
"step": 447
},
{
"epoch": 1.2877697841726619,
"grad_norm": 0.30277181047008334,
"learning_rate": 6.222618930518604e-07,
"loss": 0.9396940469741821,
"step": 448
},
{
"epoch": 1.2906474820143885,
"grad_norm": 0.2996729716461448,
"learning_rate": 6.178655105442347e-07,
"loss": 0.9432433247566223,
"step": 449
},
{
"epoch": 1.2935251798561151,
"grad_norm": 0.31105342806959047,
"learning_rate": 6.134777600527845e-07,
"loss": 0.963239848613739,
"step": 450
},
{
"epoch": 1.2964028776978418,
"grad_norm": 0.30886949969557964,
"learning_rate": 6.09098740692178e-07,
"loss": 0.8668818473815918,
"step": 451
},
{
"epoch": 1.2992805755395684,
"grad_norm": 0.34250157527572056,
"learning_rate": 6.047285513798568e-07,
"loss": 0.9877142310142517,
"step": 452
},
{
"epoch": 1.3021582733812949,
"grad_norm": 0.3321433510552703,
"learning_rate": 6.003672908338008e-07,
"loss": 1.1121788024902344,
"step": 453
},
{
"epoch": 1.3050359712230215,
"grad_norm": 0.34432256224945607,
"learning_rate": 5.96015057570298e-07,
"loss": 0.9205185770988464,
"step": 454
},
{
"epoch": 1.3079136690647482,
"grad_norm": 0.3129154481448231,
"learning_rate": 5.916719499017206e-07,
"loss": 0.9529520869255066,
"step": 455
},
{
"epoch": 1.3107913669064748,
"grad_norm": 0.3072358733262211,
"learning_rate": 5.873380659343021e-07,
"loss": 0.8947219252586365,
"step": 456
},
{
"epoch": 1.3136690647482014,
"grad_norm": 0.2689806016977398,
"learning_rate": 5.83013503565923e-07,
"loss": 0.8450409173965454,
"step": 457
},
{
"epoch": 1.316546762589928,
"grad_norm": 0.30257611336308615,
"learning_rate": 5.786983604838983e-07,
"loss": 0.9042650461196899,
"step": 458
},
{
"epoch": 1.3194244604316547,
"grad_norm": 0.30536933730041105,
"learning_rate": 5.743927341627722e-07,
"loss": 0.9189790487289429,
"step": 459
},
{
"epoch": 1.3223021582733812,
"grad_norm": 0.3116994635317501,
"learning_rate": 5.700967218621133e-07,
"loss": 0.9711490869522095,
"step": 460
},
{
"epoch": 1.325179856115108,
"grad_norm": 0.29567216593755763,
"learning_rate": 5.658104206243221e-07,
"loss": 0.9121512174606323,
"step": 461
},
{
"epoch": 1.3280575539568344,
"grad_norm": 0.33097880600820834,
"learning_rate": 5.615339272724337e-07,
"loss": 1.0232415199279785,
"step": 462
},
{
"epoch": 1.330935251798561,
"grad_norm": 0.30421121109984933,
"learning_rate": 5.572673384079361e-07,
"loss": 0.9682353734970093,
"step": 463
},
{
"epoch": 1.3338129496402877,
"grad_norm": 0.2989421078796733,
"learning_rate": 5.530107504085829e-07,
"loss": 1.0788567066192627,
"step": 464
},
{
"epoch": 1.3366906474820144,
"grad_norm": 0.3368608300644779,
"learning_rate": 5.487642594262203e-07,
"loss": 1.0391610860824585,
"step": 465
},
{
"epoch": 1.339568345323741,
"grad_norm": 0.31237915616425,
"learning_rate": 5.445279613846132e-07,
"loss": 0.97783362865448,
"step": 466
},
{
"epoch": 1.3424460431654677,
"grad_norm": 0.2917517042157253,
"learning_rate": 5.40301951977278e-07,
"loss": 0.9356849193572998,
"step": 467
},
{
"epoch": 1.3453237410071943,
"grad_norm": 0.2906559333604664,
"learning_rate": 5.360863266653227e-07,
"loss": 0.9209206104278564,
"step": 468
},
{
"epoch": 1.3482014388489207,
"grad_norm": 0.2762311855557999,
"learning_rate": 5.318811806752883e-07,
"loss": 0.8892006278038025,
"step": 469
},
{
"epoch": 1.3510791366906476,
"grad_norm": 0.2843773069845965,
"learning_rate": 5.276866089970004e-07,
"loss": 0.8870881795883179,
"step": 470
},
{
"epoch": 1.353956834532374,
"grad_norm": 0.3214966447300032,
"learning_rate": 5.235027063814204e-07,
"loss": 1.05729341506958,
"step": 471
},
{
"epoch": 1.3568345323741007,
"grad_norm": 0.29929162633018896,
"learning_rate": 5.193295673385081e-07,
"loss": 0.9966158866882324,
"step": 472
},
{
"epoch": 1.3597122302158273,
"grad_norm": 0.2845609225335763,
"learning_rate": 5.151672861350849e-07,
"loss": 0.983919084072113,
"step": 473
},
{
"epoch": 1.362589928057554,
"grad_norm": 0.2872905985027109,
"learning_rate": 5.110159567927056e-07,
"loss": 0.9776226282119751,
"step": 474
},
{
"epoch": 1.3654676258992806,
"grad_norm": 0.3004471218975015,
"learning_rate": 5.068756730855328e-07,
"loss": 0.8701659440994263,
"step": 475
},
{
"epoch": 1.3683453237410073,
"grad_norm": 0.27295033239262645,
"learning_rate": 5.027465285382213e-07,
"loss": 0.8881811499595642,
"step": 476
},
{
"epoch": 1.371223021582734,
"grad_norm": 0.2943944847225173,
"learning_rate": 4.986286164238025e-07,
"loss": 0.8865438103675842,
"step": 477
},
{
"epoch": 1.3741007194244603,
"grad_norm": 0.2867589741491455,
"learning_rate": 4.945220297615805e-07,
"loss": 0.9757734537124634,
"step": 478
},
{
"epoch": 1.376978417266187,
"grad_norm": 0.30445366971206,
"learning_rate": 4.904268613150278e-07,
"loss": 0.9451441764831543,
"step": 479
},
{
"epoch": 1.3798561151079136,
"grad_norm": 0.3764482983471657,
"learning_rate": 4.863432035896924e-07,
"loss": 1.0263563394546509,
"step": 480
},
{
"epoch": 1.3827338129496403,
"grad_norm": 0.28674275025350787,
"learning_rate": 4.822711488311076e-07,
"loss": 0.9233589768409729,
"step": 481
},
{
"epoch": 1.385611510791367,
"grad_norm": 0.28357755576379123,
"learning_rate": 4.782107890227065e-07,
"loss": 0.8945414423942566,
"step": 482
},
{
"epoch": 1.3884892086330936,
"grad_norm": 0.3296678488028909,
"learning_rate": 4.7416221588374695e-07,
"loss": 0.92512047290802,
"step": 483
},
{
"epoch": 1.3913669064748202,
"grad_norm": 0.34335013571463424,
"learning_rate": 4.701255208672371e-07,
"loss": 0.8945969343185425,
"step": 484
},
{
"epoch": 1.3942446043165468,
"grad_norm": 0.2681643725763488,
"learning_rate": 4.6610079515787217e-07,
"loss": 0.8868216872215271,
"step": 485
},
{
"epoch": 1.3971223021582735,
"grad_norm": 0.29646359252343524,
"learning_rate": 4.620881296699718e-07,
"loss": 0.8830418586730957,
"step": 486
},
{
"epoch": 1.4,
"grad_norm": 0.34972600472566895,
"learning_rate": 4.5808761504542915e-07,
"loss": 1.0035524368286133,
"step": 487
},
{
"epoch": 1.4028776978417266,
"grad_norm": 0.3131507468407678,
"learning_rate": 4.5409934165166174e-07,
"loss": 0.9483344554901123,
"step": 488
},
{
"epoch": 1.4057553956834532,
"grad_norm": 0.3077131853121991,
"learning_rate": 4.501233995795708e-07,
"loss": 1.042191982269287,
"step": 489
},
{
"epoch": 1.4086330935251798,
"grad_norm": 0.30297924853409636,
"learning_rate": 4.4615987864150517e-07,
"loss": 0.9574159383773804,
"step": 490
},
{
"epoch": 1.4115107913669065,
"grad_norm": 0.3101670489465847,
"learning_rate": 4.4220886836923443e-07,
"loss": 0.9550837874412537,
"step": 491
},
{
"epoch": 1.4143884892086331,
"grad_norm": 0.3028518200998954,
"learning_rate": 4.382704580119242e-07,
"loss": 0.9465584754943848,
"step": 492
},
{
"epoch": 1.4172661870503598,
"grad_norm": 0.3563033540312973,
"learning_rate": 4.343447365341225e-07,
"loss": 1.0004384517669678,
"step": 493
},
{
"epoch": 1.4201438848920862,
"grad_norm": 0.33396685382103786,
"learning_rate": 4.3043179261374775e-07,
"loss": 0.9882891774177551,
"step": 494
},
{
"epoch": 1.423021582733813,
"grad_norm": 0.30772910816683374,
"learning_rate": 4.265317146400876e-07,
"loss": 0.9874916672706604,
"step": 495
},
{
"epoch": 1.4258992805755395,
"grad_norm": 0.30524559446834776,
"learning_rate": 4.226445907118018e-07,
"loss": 0.9210883378982544,
"step": 496
},
{
"epoch": 1.4287769784172661,
"grad_norm": 0.3267114002295349,
"learning_rate": 4.1877050863493037e-07,
"loss": 0.9688763618469238,
"step": 497
},
{
"epoch": 1.4316546762589928,
"grad_norm": 0.27339720775223914,
"learning_rate": 4.1490955592091325e-07,
"loss": 0.8747698068618774,
"step": 498
},
{
"epoch": 1.4345323741007194,
"grad_norm": 0.28341881156979953,
"learning_rate": 4.110618197846105e-07,
"loss": 0.9002431035041809,
"step": 499
},
{
"epoch": 1.437410071942446,
"grad_norm": 0.3157537655118046,
"learning_rate": 4.0722738714233475e-07,
"loss": 0.9333710074424744,
"step": 500
},
{
"epoch": 1.4402877697841727,
"grad_norm": 0.3385136686246348,
"learning_rate": 4.0340634460988634e-07,
"loss": 0.9397541284561157,
"step": 501
},
{
"epoch": 1.4431654676258994,
"grad_norm": 0.2799609416561718,
"learning_rate": 3.9959877850059654e-07,
"loss": 0.9181256890296936,
"step": 502
},
{
"epoch": 1.4460431654676258,
"grad_norm": 0.32743527244615317,
"learning_rate": 3.958047748233789e-07,
"loss": 0.9613093733787537,
"step": 503
},
{
"epoch": 1.4489208633093524,
"grad_norm": 0.31262261972142885,
"learning_rate": 3.920244192807864e-07,
"loss": 1.006971836090088,
"step": 504
},
{
"epoch": 1.451798561151079,
"grad_norm": 0.2756374429613347,
"learning_rate": 3.8825779726707363e-07,
"loss": 0.9426612854003906,
"step": 505
},
{
"epoch": 1.4546762589928057,
"grad_norm": 0.31900967384513096,
"learning_rate": 3.845049938662709e-07,
"loss": 1.0267070531845093,
"step": 506
},
{
"epoch": 1.4575539568345324,
"grad_norm": 0.2806375956545195,
"learning_rate": 3.807660938502588e-07,
"loss": 0.8537903428077698,
"step": 507
},
{
"epoch": 1.460431654676259,
"grad_norm": 0.2679665420161734,
"learning_rate": 3.770411816768567e-07,
"loss": 0.8869454860687256,
"step": 508
},
{
"epoch": 1.4633093525179857,
"grad_norm": 0.2940220938489727,
"learning_rate": 3.733303414879121e-07,
"loss": 0.9901649951934814,
"step": 509
},
{
"epoch": 1.4661870503597123,
"grad_norm": 0.3002023454804642,
"learning_rate": 3.696336571074019e-07,
"loss": 0.893314003944397,
"step": 510
},
{
"epoch": 1.469064748201439,
"grad_norm": 0.2989449706350416,
"learning_rate": 3.659512120395384e-07,
"loss": 0.9651301503181458,
"step": 511
},
{
"epoch": 1.4719424460431654,
"grad_norm": 0.31184751443962444,
"learning_rate": 3.6228308946688156e-07,
"loss": 0.9276424646377563,
"step": 512
},
{
"epoch": 1.474820143884892,
"grad_norm": 0.3280970794871676,
"learning_rate": 3.586293722484628e-07,
"loss": 0.9295877814292908,
"step": 513
},
{
"epoch": 1.4776978417266187,
"grad_norm": 0.30361568371256686,
"learning_rate": 3.549901429179103e-07,
"loss": 0.9780776500701904,
"step": 514
},
{
"epoch": 1.4805755395683453,
"grad_norm": 0.31276714037726877,
"learning_rate": 3.513654836815871e-07,
"loss": 0.9611223936080933,
"step": 515
},
{
"epoch": 1.483453237410072,
"grad_norm": 0.29813652177832717,
"learning_rate": 3.477554764167333e-07,
"loss": 0.9399750232696533,
"step": 516
},
{
"epoch": 1.4863309352517986,
"grad_norm": 0.2879175131540814,
"learning_rate": 3.44160202669615e-07,
"loss": 0.9704416990280151,
"step": 517
},
{
"epoch": 1.4892086330935252,
"grad_norm": 0.27203404497254424,
"learning_rate": 3.4057974365368494e-07,
"loss": 0.9447322487831116,
"step": 518
},
{
"epoch": 1.4920863309352517,
"grad_norm": 0.3345564889146381,
"learning_rate": 3.3701418024774654e-07,
"loss": 0.9653472900390625,
"step": 519
},
{
"epoch": 1.4949640287769785,
"grad_norm": 0.3592025624857194,
"learning_rate": 3.334635929941262e-07,
"loss": 1.0416182279586792,
"step": 520
},
{
"epoch": 1.497841726618705,
"grad_norm": 0.3021027064210215,
"learning_rate": 3.29928062096856e-07,
"loss": 0.8763036727905273,
"step": 521
},
{
"epoch": 1.5007194244604316,
"grad_norm": 0.3300914239445515,
"learning_rate": 3.264076674198594e-07,
"loss": 0.9283760786056519,
"step": 522
},
{
"epoch": 1.5035971223021583,
"grad_norm": 0.30648914339336775,
"learning_rate": 3.229024884851499e-07,
"loss": 1.0218451023101807,
"step": 523
},
{
"epoch": 1.506474820143885,
"grad_norm": 0.3028904431521258,
"learning_rate": 3.1941260447103226e-07,
"loss": 0.9676252603530884,
"step": 524
},
{
"epoch": 1.5093525179856115,
"grad_norm": 0.31498989874096944,
"learning_rate": 3.159380942103158e-07,
"loss": 1.0615897178649902,
"step": 525
},
{
"epoch": 1.512230215827338,
"grad_norm": 0.3812643080356087,
"learning_rate": 3.1247903618853323e-07,
"loss": 1.0494942665100098,
"step": 526
},
{
"epoch": 1.5151079136690648,
"grad_norm": 0.3015637470031748,
"learning_rate": 3.0903550854216597e-07,
"loss": 0.9968015551567078,
"step": 527
},
{
"epoch": 1.5179856115107913,
"grad_norm": 0.31980913910153197,
"learning_rate": 3.0560758905688243e-07,
"loss": 0.8924911022186279,
"step": 528
},
{
"epoch": 1.5208633093525181,
"grad_norm": 0.3219645985844337,
"learning_rate": 3.021953551657773e-07,
"loss": 0.9802518486976624,
"step": 529
},
{
"epoch": 1.5237410071942445,
"grad_norm": 0.32235606474899925,
"learning_rate": 2.9879888394762576e-07,
"loss": 1.0681138038635254,
"step": 530
},
{
"epoch": 1.5266187050359712,
"grad_norm": 0.29753869212587086,
"learning_rate": 2.9541825212514006e-07,
"loss": 1.0018823146820068,
"step": 531
},
{
"epoch": 1.5294964028776978,
"grad_norm": 0.32215332495706295,
"learning_rate": 2.920535360632368e-07,
"loss": 1.0154387950897217,
"step": 532
},
{
"epoch": 1.5323741007194245,
"grad_norm": 0.33197651591712296,
"learning_rate": 2.8870481176731287e-07,
"loss": 1.0280838012695312,
"step": 533
},
{
"epoch": 1.5352517985611511,
"grad_norm": 0.31343044639526885,
"learning_rate": 2.853721548815283e-07,
"loss": 1.0017954111099243,
"step": 534
},
{
"epoch": 1.5381294964028775,
"grad_norm": 0.3073520760945164,
"learning_rate": 2.8205564068709596e-07,
"loss": 0.9721263647079468,
"step": 535
},
{
"epoch": 1.5410071942446044,
"grad_norm": 0.27794918211270764,
"learning_rate": 2.787553441005839e-07,
"loss": 0.8929443955421448,
"step": 536
},
{
"epoch": 1.5438848920863308,
"grad_norm": 0.36462602887473333,
"learning_rate": 2.754713396722198e-07,
"loss": 0.8837905526161194,
"step": 537
},
{
"epoch": 1.5467625899280577,
"grad_norm": 0.2590201800878289,
"learning_rate": 2.7220370158421026e-07,
"loss": 0.9194425344467163,
"step": 538
},
{
"epoch": 1.5496402877697841,
"grad_norm": 0.29622142248800076,
"learning_rate": 2.68952503649062e-07,
"loss": 0.9988787174224854,
"step": 539
},
{
"epoch": 1.5525179856115108,
"grad_norm": 0.3126140379352631,
"learning_rate": 2.65717819307917e-07,
"loss": 0.9965845942497253,
"step": 540
},
{
"epoch": 1.5553956834532374,
"grad_norm": 0.3415760999590957,
"learning_rate": 2.6249972162889244e-07,
"loss": 1.0078110694885254,
"step": 541
},
{
"epoch": 1.558273381294964,
"grad_norm": 0.3369977516727391,
"learning_rate": 2.5929828330542935e-07,
"loss": 1.0197123289108276,
"step": 542
},
{
"epoch": 1.5611510791366907,
"grad_norm": 0.32678249950063587,
"learning_rate": 2.561135766546525e-07,
"loss": 0.9625093936920166,
"step": 543
},
{
"epoch": 1.5640287769784171,
"grad_norm": 0.3014751724857403,
"learning_rate": 2.529456736157356e-07,
"loss": 0.8934499621391296,
"step": 544
},
{
"epoch": 1.566906474820144,
"grad_norm": 0.27876968496773213,
"learning_rate": 2.4979464574827555e-07,
"loss": 0.8468393087387085,
"step": 545
},
{
"epoch": 1.5697841726618704,
"grad_norm": 0.2757645778822954,
"learning_rate": 2.4666056423067825e-07,
"loss": 0.9273233413696289,
"step": 546
},
{
"epoch": 1.572661870503597,
"grad_norm": 0.313799523824532,
"learning_rate": 2.4354349985854795e-07,
"loss": 0.9406145811080933,
"step": 547
},
{
"epoch": 1.5755395683453237,
"grad_norm": 0.3175009917256148,
"learning_rate": 2.4044352304309044e-07,
"loss": 0.9628616571426392,
"step": 548
},
{
"epoch": 1.5784172661870504,
"grad_norm": 0.27743168737673624,
"learning_rate": 2.3736070380952165e-07,
"loss": 0.9222140312194824,
"step": 549
},
{
"epoch": 1.581294964028777,
"grad_norm": 0.2851178701079908,
"learning_rate": 2.34295111795485e-07,
"loss": 1.0274484157562256,
"step": 550
},
{
"epoch": 1.5841726618705037,
"grad_norm": 0.38515099715665085,
"learning_rate": 2.3124681624948006e-07,
"loss": 1.0696377754211426,
"step": 551
},
{
"epoch": 1.5870503597122303,
"grad_norm": 0.3133249799626146,
"learning_rate": 2.2821588602929632e-07,
"loss": 0.9214944839477539,
"step": 552
},
{
"epoch": 1.5899280575539567,
"grad_norm": 0.3307355043232978,
"learning_rate": 2.252023896004601e-07,
"loss": 0.8194340467453003,
"step": 553
},
{
"epoch": 1.5928057553956836,
"grad_norm": 0.2899499890420293,
"learning_rate": 2.2220639503468542e-07,
"loss": 0.9256591796875,
"step": 554
},
{
"epoch": 1.59568345323741,
"grad_norm": 0.3059563087954579,
"learning_rate": 2.192279700083385e-07,
"loss": 0.9171819686889648,
"step": 555
},
{
"epoch": 1.5985611510791367,
"grad_norm": 0.2910581911029511,
"learning_rate": 2.162671818009082e-07,
"loss": 0.8385239839553833,
"step": 556
},
{
"epoch": 1.6014388489208633,
"grad_norm": 0.3077283177341958,
"learning_rate": 2.133240972934852e-07,
"loss": 0.9529489278793335,
"step": 557
},
{
"epoch": 1.60431654676259,
"grad_norm": 0.33438792027940295,
"learning_rate": 2.1039878296725332e-07,
"loss": 0.9289690256118774,
"step": 558
},
{
"epoch": 1.6071942446043166,
"grad_norm": 0.33957986792953665,
"learning_rate": 2.0749130490198652e-07,
"loss": 1.0393140316009521,
"step": 559
},
{
"epoch": 1.610071942446043,
"grad_norm": 0.34246531637554406,
"learning_rate": 2.046017287745554e-07,
"loss": 1.0560503005981445,
"step": 560
},
{
"epoch": 1.6129496402877699,
"grad_norm": 0.33305609974874595,
"learning_rate": 2.0173011985744603e-07,
"loss": 0.9776498079299927,
"step": 561
},
{
"epoch": 1.6158273381294963,
"grad_norm": 0.33804380816718965,
"learning_rate": 1.9887654301728286e-07,
"loss": 0.9953919053077698,
"step": 562
},
{
"epoch": 1.6187050359712232,
"grad_norm": 0.33510637626675815,
"learning_rate": 1.960410627133654e-07,
"loss": 0.9703421592712402,
"step": 563
},
{
"epoch": 1.6215827338129496,
"grad_norm": 0.3528056926245532,
"learning_rate": 1.9322374299621157e-07,
"loss": 0.986659824848175,
"step": 564
},
{
"epoch": 1.6244604316546762,
"grad_norm": 0.3190776044982604,
"learning_rate": 1.9042464750610987e-07,
"loss": 1.047306776046753,
"step": 565
},
{
"epoch": 1.6273381294964029,
"grad_norm": 0.32915934915976663,
"learning_rate": 1.8764383947168383e-07,
"loss": 0.9910968542098999,
"step": 566
},
{
"epoch": 1.6302158273381295,
"grad_norm": 0.2786176626211202,
"learning_rate": 1.8488138170846146e-07,
"loss": 0.9353040456771851,
"step": 567
},
{
"epoch": 1.6330935251798562,
"grad_norm": 0.31795714142791043,
"learning_rate": 1.8213733661745855e-07,
"loss": 0.9244099259376526,
"step": 568
},
{
"epoch": 1.6359712230215826,
"grad_norm": 0.313138188279038,
"learning_rate": 1.7941176618376686e-07,
"loss": 0.9275581240653992,
"step": 569
},
{
"epoch": 1.6388489208633095,
"grad_norm": 0.2983504345513285,
"learning_rate": 1.767047319751559e-07,
"loss": 1.0571556091308594,
"step": 570
},
{
"epoch": 1.641726618705036,
"grad_norm": 0.2900247015191147,
"learning_rate": 1.7401629514068116e-07,
"loss": 0.8571426868438721,
"step": 571
},
{
"epoch": 1.6446043165467625,
"grad_norm": 0.32073923970300267,
"learning_rate": 1.713465164093031e-07,
"loss": 0.962169349193573,
"step": 572
},
{
"epoch": 1.6474820143884892,
"grad_norm": 0.3426036390922273,
"learning_rate": 1.6869545608851465e-07,
"loss": 1.068155288696289,
"step": 573
},
{
"epoch": 1.6503597122302158,
"grad_norm": 0.2912198669008646,
"learning_rate": 1.6606317406298044e-07,
"loss": 0.9102658033370972,
"step": 574
},
{
"epoch": 1.6532374100719425,
"grad_norm": 0.2783664324123141,
"learning_rate": 1.6344972979318227e-07,
"loss": 0.863929033279419,
"step": 575
},
{
"epoch": 1.6561151079136691,
"grad_norm": 0.3166046506897087,
"learning_rate": 1.608551823140778e-07,
"loss": 0.9644492268562317,
"step": 576
},
{
"epoch": 1.6589928057553958,
"grad_norm": 0.3032692441409985,
"learning_rate": 1.5827959023376503e-07,
"loss": 0.9762970209121704,
"step": 577
},
{
"epoch": 1.6618705035971222,
"grad_norm": 0.3016744079808176,
"learning_rate": 1.5572301173216018e-07,
"loss": 0.9287898540496826,
"step": 578
},
{
"epoch": 1.664748201438849,
"grad_norm": 0.27431423825024975,
"learning_rate": 1.5318550455968282e-07,
"loss": 0.9573485851287842,
"step": 579
},
{
"epoch": 1.6676258992805755,
"grad_norm": 0.3087425552754159,
"learning_rate": 1.5066712603595045e-07,
"loss": 0.9209293127059937,
"step": 580
},
{
"epoch": 1.6705035971223021,
"grad_norm": 0.2899753485027158,
"learning_rate": 1.481679330484854e-07,
"loss": 0.8685443997383118,
"step": 581
},
{
"epoch": 1.6733812949640288,
"grad_norm": 0.30804307167476375,
"learning_rate": 1.4568798205142818e-07,
"loss": 1.0030007362365723,
"step": 582
},
{
"epoch": 1.6762589928057554,
"grad_norm": 0.3269639712901895,
"learning_rate": 1.4322732906426361e-07,
"loss": 0.9561444520950317,
"step": 583
},
{
"epoch": 1.679136690647482,
"grad_norm": 0.24889504344208443,
"learning_rate": 1.407860296705542e-07,
"loss": 0.9087004661560059,
"step": 584
},
{
"epoch": 1.6820143884892085,
"grad_norm": 0.3206254281376367,
"learning_rate": 1.3836413901668563e-07,
"loss": 1.040391206741333,
"step": 585
},
{
"epoch": 1.6848920863309353,
"grad_norm": 0.35576899441592297,
"learning_rate": 1.359617118106202e-07,
"loss": 0.9220665097236633,
"step": 586
},
{
"epoch": 1.6877697841726618,
"grad_norm": 0.33992161289171674,
"learning_rate": 1.3357880232066188e-07,
"loss": 1.0048316717147827,
"step": 587
},
{
"epoch": 1.6906474820143886,
"grad_norm": 0.3155381507693763,
"learning_rate": 1.3121546437422915e-07,
"loss": 0.9235495924949646,
"step": 588
},
{
"epoch": 1.693525179856115,
"grad_norm": 0.2712504024495115,
"learning_rate": 1.2887175135664085e-07,
"loss": 0.9516848921775818,
"step": 589
},
{
"epoch": 1.6964028776978417,
"grad_norm": 0.2729229399526208,
"learning_rate": 1.2654771620990845e-07,
"loss": 0.8648025989532471,
"step": 590
},
{
"epoch": 1.6992805755395683,
"grad_norm": 0.28643485819645237,
"learning_rate": 1.242434114315417e-07,
"loss": 0.9381082057952881,
"step": 591
},
{
"epoch": 1.702158273381295,
"grad_norm": 0.3583701644093793,
"learning_rate": 1.219588890733616e-07,
"loss": 0.9976767301559448,
"step": 592
},
{
"epoch": 1.7050359712230216,
"grad_norm": 0.3058650158564105,
"learning_rate": 1.1969420074032532e-07,
"loss": 0.9010272026062012,
"step": 593
},
{
"epoch": 1.707913669064748,
"grad_norm": 0.3079535255773853,
"learning_rate": 1.1744939758936045e-07,
"loss": 0.9119488000869751,
"step": 594
},
{
"epoch": 1.710791366906475,
"grad_norm": 0.3112816376998103,
"learning_rate": 1.1522453032820867e-07,
"loss": 1.0138704776763916,
"step": 595
},
{
"epoch": 1.7136690647482014,
"grad_norm": 0.37249689212589393,
"learning_rate": 1.1301964921428164e-07,
"loss": 1.0309240818023682,
"step": 596
},
{
"epoch": 1.7165467625899282,
"grad_norm": 0.2919396976165667,
"learning_rate": 1.1083480405352419e-07,
"loss": 1.02201509475708,
"step": 597
},
{
"epoch": 1.7194244604316546,
"grad_norm": 0.28931756763377436,
"learning_rate": 1.086700441992906e-07,
"loss": 0.9084526896476746,
"step": 598
},
{
"epoch": 1.7223021582733813,
"grad_norm": 0.29523211564831336,
"learning_rate": 1.0652541855122888e-07,
"loss": 1.0073999166488647,
"step": 599
},
{
"epoch": 1.725179856115108,
"grad_norm": 0.3072460132576141,
"learning_rate": 1.044009755541766e-07,
"loss": 0.9768160581588745,
"step": 600
},
{
"epoch": 1.7280575539568346,
"grad_norm": 0.3021099960297105,
"learning_rate": 1.0229676319706671e-07,
"loss": 1.0317999124526978,
"step": 601
},
{
"epoch": 1.7309352517985612,
"grad_norm": 0.32444961570494624,
"learning_rate": 1.0021282901184314e-07,
"loss": 0.953796923160553,
"step": 602
},
{
"epoch": 1.7338129496402876,
"grad_norm": 0.31997858537370105,
"learning_rate": 9.814922007238691e-08,
"loss": 0.8879704475402832,
"step": 603
},
{
"epoch": 1.7366906474820145,
"grad_norm": 0.3001763106646087,
"learning_rate": 9.610598299345363e-08,
"loss": 0.9384487271308899,
"step": 604
},
{
"epoch": 1.739568345323741,
"grad_norm": 0.3076940778037735,
"learning_rate": 9.408316392961946e-08,
"loss": 0.8896828889846802,
"step": 605
},
{
"epoch": 1.7424460431654676,
"grad_norm": 0.30206497856260484,
"learning_rate": 9.208080857423983e-08,
"loss": 0.991470456123352,
"step": 606
},
{
"epoch": 1.7453237410071942,
"grad_norm": 0.31112092266827757,
"learning_rate": 9.009896215841561e-08,
"loss": 0.9205191135406494,
"step": 607
},
{
"epoch": 1.7482014388489209,
"grad_norm": 0.34267209965236567,
"learning_rate": 8.813766944997292e-08,
"loss": 0.9923685789108276,
"step": 608
},
{
"epoch": 1.7510791366906475,
"grad_norm": 0.2775856465253116,
"learning_rate": 8.619697475245135e-08,
"loss": 0.906508207321167,
"step": 609
},
{
"epoch": 1.753956834532374,
"grad_norm": 0.3695265847423868,
"learning_rate": 8.427692190410252e-08,
"loss": 1.1132643222808838,
"step": 610
},
{
"epoch": 1.7568345323741008,
"grad_norm": 0.335951419865602,
"learning_rate": 8.237755427690097e-08,
"loss": 0.958720326423645,
"step": 611
},
{
"epoch": 1.7597122302158272,
"grad_norm": 0.26398237511432854,
"learning_rate": 8.049891477556325e-08,
"loss": 0.8326461315155029,
"step": 612
},
{
"epoch": 1.762589928057554,
"grad_norm": 0.29113012627846874,
"learning_rate": 7.864104583657994e-08,
"loss": 0.92642742395401,
"step": 613
},
{
"epoch": 1.7654676258992805,
"grad_norm": 0.32510114636650206,
"learning_rate": 7.680398942725607e-08,
"loss": 1.0428296327590942,
"step": 614
},
{
"epoch": 1.7683453237410072,
"grad_norm": 0.3401761886813798,
"learning_rate": 7.498778704476372e-08,
"loss": 1.0307958126068115,
"step": 615
},
{
"epoch": 1.7712230215827338,
"grad_norm": 0.31568033622101,
"learning_rate": 7.319247971520426e-08,
"loss": 0.922683835029602,
"step": 616
},
{
"epoch": 1.7741007194244605,
"grad_norm": 0.2903936993592098,
"learning_rate": 7.141810799268222e-08,
"loss": 0.8616385459899902,
"step": 617
},
{
"epoch": 1.776978417266187,
"grad_norm": 0.28996044598452053,
"learning_rate": 6.966471195838807e-08,
"loss": 0.9075828194618225,
"step": 618
},
{
"epoch": 1.7798561151079135,
"grad_norm": 0.30488859521834044,
"learning_rate": 6.793233121969422e-08,
"loss": 0.9607424736022949,
"step": 619
},
{
"epoch": 1.7827338129496404,
"grad_norm": 0.29464217851633684,
"learning_rate": 6.622100490925919e-08,
"loss": 0.9187620878219604,
"step": 620
},
{
"epoch": 1.7856115107913668,
"grad_norm": 0.316636600630024,
"learning_rate": 6.453077168414455e-08,
"loss": 0.9384863972663879,
"step": 621
},
{
"epoch": 1.7884892086330937,
"grad_norm": 0.3042112992195363,
"learning_rate": 6.286166972494079e-08,
"loss": 0.9122720956802368,
"step": 622
},
{
"epoch": 1.79136690647482,
"grad_norm": 0.2799449479478083,
"learning_rate": 6.121373673490548e-08,
"loss": 0.9125893712043762,
"step": 623
},
{
"epoch": 1.7942446043165468,
"grad_norm": 0.287767507487325,
"learning_rate": 5.958700993911192e-08,
"loss": 0.8173254132270813,
"step": 624
},
{
"epoch": 1.7971223021582734,
"grad_norm": 0.2716254786056972,
"learning_rate": 5.798152608360696e-08,
"loss": 0.9122398495674133,
"step": 625
},
{
"epoch": 1.8,
"grad_norm": 0.31952149094868726,
"learning_rate": 5.6397321434582534e-08,
"loss": 1.0111041069030762,
"step": 626
},
{
"epoch": 1.8028776978417267,
"grad_norm": 0.27995436477971375,
"learning_rate": 5.483443177755498e-08,
"loss": 0.9707604646682739,
"step": 627
},
{
"epoch": 1.8057553956834531,
"grad_norm": 0.3243898181487502,
"learning_rate": 5.32928924165581e-08,
"loss": 1.0739054679870605,
"step": 628
},
{
"epoch": 1.80863309352518,
"grad_norm": 0.2984254760085138,
"learning_rate": 5.177273817334438e-08,
"loss": 0.9249017238616943,
"step": 629
},
{
"epoch": 1.8115107913669064,
"grad_norm": 0.3146364757154643,
"learning_rate": 5.027400338659926e-08,
"loss": 0.9324784278869629,
"step": 630
},
{
"epoch": 1.814388489208633,
"grad_norm": 0.2942034665756954,
"learning_rate": 4.879672191116524e-08,
"loss": 0.92160564661026,
"step": 631
},
{
"epoch": 1.8172661870503597,
"grad_norm": 0.3093230418919872,
"learning_rate": 4.7340927117277105e-08,
"loss": 0.9626412987709045,
"step": 632
},
{
"epoch": 1.8201438848920863,
"grad_norm": 0.2737771051650981,
"learning_rate": 4.590665188980769e-08,
"loss": 0.9344724416732788,
"step": 633
},
{
"epoch": 1.823021582733813,
"grad_norm": 0.2868842184615828,
"learning_rate": 4.44939286275261e-08,
"loss": 0.969592809677124,
"step": 634
},
{
"epoch": 1.8258992805755394,
"grad_norm": 0.30108362908099795,
"learning_rate": 4.310278924236454e-08,
"loss": 0.9268302917480469,
"step": 635
},
{
"epoch": 1.8287769784172663,
"grad_norm": 0.3038411300603624,
"learning_rate": 4.173326515869879e-08,
"loss": 0.9940468072891235,
"step": 636
},
{
"epoch": 1.8316546762589927,
"grad_norm": 0.30517693172745697,
"learning_rate": 4.038538731263719e-08,
"loss": 0.9976387023925781,
"step": 637
},
{
"epoch": 1.8345323741007196,
"grad_norm": 0.3273494194690698,
"learning_rate": 3.9059186151322534e-08,
"loss": 1.05000638961792,
"step": 638
},
{
"epoch": 1.837410071942446,
"grad_norm": 0.29782336017676786,
"learning_rate": 3.775469163224432e-08,
"loss": 0.9021062850952148,
"step": 639
},
{
"epoch": 1.8402877697841726,
"grad_norm": 0.2796086934847699,
"learning_rate": 3.647193322256137e-08,
"loss": 0.8977291584014893,
"step": 640
},
{
"epoch": 1.8431654676258993,
"grad_norm": 0.32101134920342667,
"learning_rate": 3.5210939898437154e-08,
"loss": 0.9850113391876221,
"step": 641
},
{
"epoch": 1.846043165467626,
"grad_norm": 0.3578812552257961,
"learning_rate": 3.397174014438431e-08,
"loss": 0.9935275316238403,
"step": 642
},
{
"epoch": 1.8489208633093526,
"grad_norm": 0.3018396148934059,
"learning_rate": 3.275436195262193e-08,
"loss": 0.9289500713348389,
"step": 643
},
{
"epoch": 1.851798561151079,
"grad_norm": 0.2833335903840955,
"learning_rate": 3.155883282244287e-08,
"loss": 0.8204896450042725,
"step": 644
},
{
"epoch": 1.8546762589928059,
"grad_norm": 0.3430760638933196,
"learning_rate": 3.038517975959276e-08,
"loss": 1.0271613597869873,
"step": 645
},
{
"epoch": 1.8575539568345323,
"grad_norm": 0.3085433261480597,
"learning_rate": 2.923342927565964e-08,
"loss": 0.9084464311599731,
"step": 646
},
{
"epoch": 1.8604316546762591,
"grad_norm": 0.30704980866497883,
"learning_rate": 2.8103607387475746e-08,
"loss": 0.9712929725646973,
"step": 647
},
{
"epoch": 1.8633093525179856,
"grad_norm": 0.28312145339260103,
"learning_rate": 2.69957396165289e-08,
"loss": 0.9905073642730713,
"step": 648
},
{
"epoch": 1.8661870503597122,
"grad_norm": 0.2770551368568327,
"learning_rate": 2.5909850988386937e-08,
"loss": 0.9023991227149963,
"step": 649
},
{
"epoch": 1.8690647482014389,
"grad_norm": 0.31953951865146846,
"learning_rate": 2.4845966032131628e-08,
"loss": 1.0285024642944336,
"step": 650
},
{
"epoch": 1.8719424460431655,
"grad_norm": 0.3452314742008196,
"learning_rate": 2.380410877980532e-08,
"loss": 1.004025936126709,
"step": 651
},
{
"epoch": 1.8748201438848922,
"grad_norm": 0.33870243092263896,
"learning_rate": 2.278430276586729e-08,
"loss": 0.9524623155593872,
"step": 652
},
{
"epoch": 1.8776978417266186,
"grad_norm": 0.32350246830260215,
"learning_rate": 2.1786571026662702e-08,
"loss": 1.0455811023712158,
"step": 653
},
{
"epoch": 1.8805755395683454,
"grad_norm": 0.3328778935315255,
"learning_rate": 2.0810936099902364e-08,
"loss": 0.9919840693473816,
"step": 654
},
{
"epoch": 1.8834532374100719,
"grad_norm": 0.29952519081119605,
"learning_rate": 1.9857420024152806e-08,
"loss": 0.9986833930015564,
"step": 655
},
{
"epoch": 1.8863309352517985,
"grad_norm": 0.33793774006407235,
"learning_rate": 1.892604433833933e-08,
"loss": 0.931689441204071,
"step": 656
},
{
"epoch": 1.8892086330935252,
"grad_norm": 0.30901405969162027,
"learning_rate": 1.8016830081259094e-08,
"loss": 0.9114639759063721,
"step": 657
},
{
"epoch": 1.8920863309352518,
"grad_norm": 0.32095691544451443,
"learning_rate": 1.712979779110568e-08,
"loss": 1.0065832138061523,
"step": 658
},
{
"epoch": 1.8949640287769784,
"grad_norm": 0.30923568734316464,
"learning_rate": 1.6264967505005612e-08,
"loss": 0.9409237504005432,
"step": 659
},
{
"epoch": 1.8978417266187049,
"grad_norm": 0.31465289359066045,
"learning_rate": 1.5422358758565344e-08,
"loss": 0.9561393857002258,
"step": 660
},
{
"epoch": 1.9007194244604317,
"grad_norm": 0.2782146122603473,
"learning_rate": 1.4601990585430212e-08,
"loss": 0.8838869333267212,
"step": 661
},
{
"epoch": 1.9035971223021582,
"grad_norm": 0.3476179534958916,
"learning_rate": 1.380388151685441e-08,
"loss": 1.1335151195526123,
"step": 662
},
{
"epoch": 1.906474820143885,
"grad_norm": 0.30248705756655914,
"learning_rate": 1.302804958128223e-08,
"loss": 0.9658051133155823,
"step": 663
},
{
"epoch": 1.9093525179856115,
"grad_norm": 0.30985357673524433,
"learning_rate": 1.2274512303941164e-08,
"loss": 0.8988052606582642,
"step": 664
},
{
"epoch": 1.912230215827338,
"grad_norm": 0.3105920840120309,
"learning_rate": 1.1543286706445553e-08,
"loss": 0.9807320237159729,
"step": 665
},
{
"epoch": 1.9151079136690647,
"grad_norm": 0.30776301282911345,
"learning_rate": 1.0834389306412673e-08,
"loss": 0.9672824144363403,
"step": 666
},
{
"epoch": 1.9179856115107914,
"grad_norm": 0.3355211414551835,
"learning_rate": 1.0147836117088915e-08,
"loss": 0.9862767457962036,
"step": 667
},
{
"epoch": 1.920863309352518,
"grad_norm": 0.29700692569905607,
"learning_rate": 9.483642646988977e-09,
"loss": 0.9207549095153809,
"step": 668
},
{
"epoch": 1.9237410071942445,
"grad_norm": 0.32529111277227224,
"learning_rate": 8.841823899544577e-09,
"loss": 1.05051851272583,
"step": 669
},
{
"epoch": 1.9266187050359713,
"grad_norm": 0.3097839939998225,
"learning_rate": 8.222394372766173e-09,
"loss": 0.9589816927909851,
"step": 670
},
{
"epoch": 1.9294964028776977,
"grad_norm": 0.30754678215336145,
"learning_rate": 7.625368058915226e-09,
"loss": 0.9071527719497681,
"step": 671
},
{
"epoch": 1.9323741007194246,
"grad_norm": 0.28515154254996145,
"learning_rate": 7.05075844418812e-09,
"loss": 0.98401939868927,
"step": 672
},
{
"epoch": 1.935251798561151,
"grad_norm": 0.32501900842828985,
"learning_rate": 6.498578508411734e-09,
"loss": 0.9431239366531372,
"step": 673
},
{
"epoch": 1.9381294964028777,
"grad_norm": 0.2997493450074929,
"learning_rate": 5.9688407247500124e-09,
"loss": 0.8510617017745972,
"step": 674
},
{
"epoch": 1.9410071942446043,
"grad_norm": 0.2931086066866291,
"learning_rate": 5.461557059422306e-09,
"loss": 0.9289333820343018,
"step": 675
},
{
"epoch": 1.943884892086331,
"grad_norm": 0.3450018212922675,
"learning_rate": 4.9767389714330256e-09,
"loss": 0.988121509552002,
"step": 676
},
{
"epoch": 1.9467625899280576,
"grad_norm": 0.3140422887654555,
"learning_rate": 4.514397412312965e-09,
"loss": 1.0198135375976562,
"step": 677
},
{
"epoch": 1.949640287769784,
"grad_norm": 0.2885512942736773,
"learning_rate": 4.074542825871275e-09,
"loss": 0.8961633443832397,
"step": 678
},
{
"epoch": 1.952517985611511,
"grad_norm": 0.35409915766999916,
"learning_rate": 3.657185147960762e-09,
"loss": 1.0393238067626953,
"step": 679
},
{
"epoch": 1.9553956834532373,
"grad_norm": 0.30601728285109125,
"learning_rate": 3.2623338062522933e-09,
"loss": 0.9444550275802612,
"step": 680
},
{
"epoch": 1.958273381294964,
"grad_norm": 0.2924289258676955,
"learning_rate": 2.889997720022297e-09,
"loss": 0.9215587377548218,
"step": 681
},
{
"epoch": 1.9611510791366906,
"grad_norm": 0.30327925646552367,
"learning_rate": 2.5401852999512586e-09,
"loss": 1.000258445739746,
"step": 682
},
{
"epoch": 1.9640287769784173,
"grad_norm": 0.29260027512430775,
"learning_rate": 2.212904447933983e-09,
"loss": 0.9252768754959106,
"step": 683
},
{
"epoch": 1.966906474820144,
"grad_norm": 0.3011559234991841,
"learning_rate": 1.908162556900628e-09,
"loss": 0.9935536980628967,
"step": 684
},
{
"epoch": 1.9697841726618706,
"grad_norm": 0.27115587245365835,
"learning_rate": 1.6259665106498344e-09,
"loss": 0.9564770460128784,
"step": 685
},
{
"epoch": 1.9726618705035972,
"grad_norm": 0.27403209801565,
"learning_rate": 1.3663226836936326e-09,
"loss": 0.8805955052375793,
"step": 686
},
{
"epoch": 1.9755395683453236,
"grad_norm": 0.3044315007410778,
"learning_rate": 1.1292369411127766e-09,
"loss": 0.8896344304084778,
"step": 687
},
{
"epoch": 1.9784172661870505,
"grad_norm": 0.30681720361851966,
"learning_rate": 9.147146384250737e-10,
"loss": 0.980034351348877,
"step": 688
},
{
"epoch": 1.981294964028777,
"grad_norm": 0.32187194306044253,
"learning_rate": 7.227606214635917e-10,
"loss": 0.9895438551902771,
"step": 689
},
{
"epoch": 1.9841726618705036,
"grad_norm": 0.3136795801610112,
"learning_rate": 5.533792262675252e-10,
"loss": 0.9838018417358398,
"step": 690
},
{
"epoch": 1.9870503597122302,
"grad_norm": 0.29300997638864673,
"learning_rate": 4.0657427898460603e-10,
"loss": 0.8921380043029785,
"step": 691
},
{
"epoch": 1.9899280575539569,
"grad_norm": 0.2711131667782687,
"learning_rate": 2.8234909578417344e-10,
"loss": 0.8311777114868164,
"step": 692
},
{
"epoch": 1.9928057553956835,
"grad_norm": 0.3390732741445483,
"learning_rate": 1.8070648278234457e-10,
"loss": 1.0223444700241089,
"step": 693
},
{
"epoch": 1.99568345323741,
"grad_norm": 0.27722007418335426,
"learning_rate": 1.0164873597895419e-10,
"loss": 0.908263087272644,
"step": 694
},
{
"epoch": 1.9985611510791368,
"grad_norm": 0.32715406813626025,
"learning_rate": 4.5177641205262906e-11,
"loss": 0.950904369354248,
"step": 695
},
{
"epoch": 2.0,
"grad_norm": 0.45329927875611203,
"learning_rate": 1.1294474083878292e-11,
"loss": 1.1292500495910645,
"step": 696
},
{
"epoch": 2.0,
"step": 696,
"total_flos": 1203702673702912.0,
"train_loss": 1.0133290295114463,
"train_runtime": 7255.9911,
"train_samples_per_second": 0.766,
"train_steps_per_second": 0.096
}
],
"logging_steps": 1,
"max_steps": 696,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1203702673702912.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}