agent_duplicate_adapter / trainer_state.json
wonwonn's picture
Upload LoRA adapter (Qwen2.5-VL-7B-sft-duplicate)
355b4dd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1662,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012040939193257074,
"grad_norm": 1.3857311022773227,
"learning_rate": 0.0,
"loss": 1.2624930143356323,
"step": 1
},
{
"epoch": 0.002408187838651415,
"grad_norm": 1.2463740707126811,
"learning_rate": 1.1904761904761904e-06,
"loss": 1.2957890033721924,
"step": 2
},
{
"epoch": 0.003612281757977122,
"grad_norm": 1.3015760498903324,
"learning_rate": 2.3809523809523808e-06,
"loss": 1.1986539363861084,
"step": 3
},
{
"epoch": 0.00481637567730283,
"grad_norm": 1.2379219361149816,
"learning_rate": 3.5714285714285714e-06,
"loss": 1.3096256256103516,
"step": 4
},
{
"epoch": 0.006020469596628537,
"grad_norm": 1.3581559932810647,
"learning_rate": 4.7619047619047615e-06,
"loss": 1.1937272548675537,
"step": 5
},
{
"epoch": 0.007224563515954244,
"grad_norm": 1.0634295654218369,
"learning_rate": 5.9523809523809525e-06,
"loss": 1.2732062339782715,
"step": 6
},
{
"epoch": 0.008428657435279952,
"grad_norm": 1.2627731612400113,
"learning_rate": 7.142857142857143e-06,
"loss": 1.2242463827133179,
"step": 7
},
{
"epoch": 0.00963275135460566,
"grad_norm": 1.1398494957999068,
"learning_rate": 8.333333333333334e-06,
"loss": 1.1949011087417603,
"step": 8
},
{
"epoch": 0.010836845273931367,
"grad_norm": 1.026465962704984,
"learning_rate": 9.523809523809523e-06,
"loss": 1.1705152988433838,
"step": 9
},
{
"epoch": 0.012040939193257074,
"grad_norm": 1.1041557775646487,
"learning_rate": 1.0714285714285714e-05,
"loss": 1.2213799953460693,
"step": 10
},
{
"epoch": 0.013245033112582781,
"grad_norm": 0.9017315332617877,
"learning_rate": 1.1904761904761905e-05,
"loss": 1.028003454208374,
"step": 11
},
{
"epoch": 0.014449127031908489,
"grad_norm": 0.9237344161484694,
"learning_rate": 1.3095238095238096e-05,
"loss": 1.1745611429214478,
"step": 12
},
{
"epoch": 0.015653220951234198,
"grad_norm": 0.8711175509387572,
"learning_rate": 1.4285714285714285e-05,
"loss": 1.101892352104187,
"step": 13
},
{
"epoch": 0.016857314870559904,
"grad_norm": 0.8000142119880304,
"learning_rate": 1.5476190476190476e-05,
"loss": 1.2238388061523438,
"step": 14
},
{
"epoch": 0.018061408789885613,
"grad_norm": 0.8029962706958844,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.0508818626403809,
"step": 15
},
{
"epoch": 0.01926550270921132,
"grad_norm": 0.6849848840354494,
"learning_rate": 1.785714285714286e-05,
"loss": 1.1084110736846924,
"step": 16
},
{
"epoch": 0.020469596628537028,
"grad_norm": 0.6093252535402999,
"learning_rate": 1.9047619047619046e-05,
"loss": 1.0637344121932983,
"step": 17
},
{
"epoch": 0.021673690547862733,
"grad_norm": 0.7183132273243993,
"learning_rate": 2.023809523809524e-05,
"loss": 1.227840542793274,
"step": 18
},
{
"epoch": 0.022877784467188442,
"grad_norm": 0.648545110248291,
"learning_rate": 2.1428571428571428e-05,
"loss": 1.035664677619934,
"step": 19
},
{
"epoch": 0.024081878386514148,
"grad_norm": 0.7486365623781298,
"learning_rate": 2.261904761904762e-05,
"loss": 1.1192017793655396,
"step": 20
},
{
"epoch": 0.025285972305839857,
"grad_norm": 0.6944922028048586,
"learning_rate": 2.380952380952381e-05,
"loss": 1.0208429098129272,
"step": 21
},
{
"epoch": 0.026490066225165563,
"grad_norm": 0.7196682603599855,
"learning_rate": 2.5e-05,
"loss": 0.9876303672790527,
"step": 22
},
{
"epoch": 0.027694160144491272,
"grad_norm": 0.6870272402541916,
"learning_rate": 2.6190476190476192e-05,
"loss": 0.9562600255012512,
"step": 23
},
{
"epoch": 0.028898254063816978,
"grad_norm": 0.7001195731104751,
"learning_rate": 2.7380952380952383e-05,
"loss": 0.9452283382415771,
"step": 24
},
{
"epoch": 0.030102347983142687,
"grad_norm": 0.6921327909531948,
"learning_rate": 2.857142857142857e-05,
"loss": 0.9548719525337219,
"step": 25
},
{
"epoch": 0.031306441902468396,
"grad_norm": 0.8825048360102998,
"learning_rate": 2.9761904761904762e-05,
"loss": 0.98993980884552,
"step": 26
},
{
"epoch": 0.0325105358217941,
"grad_norm": 0.9116554836534111,
"learning_rate": 3.095238095238095e-05,
"loss": 0.983814001083374,
"step": 27
},
{
"epoch": 0.03371462974111981,
"grad_norm": 0.6444611662989064,
"learning_rate": 3.2142857142857144e-05,
"loss": 0.9923904538154602,
"step": 28
},
{
"epoch": 0.034918723660445516,
"grad_norm": 0.6886039243167973,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.9659771919250488,
"step": 29
},
{
"epoch": 0.036122817579771226,
"grad_norm": 0.6381424384485729,
"learning_rate": 3.4523809523809526e-05,
"loss": 0.9863267540931702,
"step": 30
},
{
"epoch": 0.03732691149909693,
"grad_norm": 0.6548053846918751,
"learning_rate": 3.571428571428572e-05,
"loss": 0.8822348117828369,
"step": 31
},
{
"epoch": 0.03853100541842264,
"grad_norm": 0.6826015392587083,
"learning_rate": 3.690476190476191e-05,
"loss": 0.9292355179786682,
"step": 32
},
{
"epoch": 0.039735099337748346,
"grad_norm": 0.6273525551782426,
"learning_rate": 3.809523809523809e-05,
"loss": 0.905829906463623,
"step": 33
},
{
"epoch": 0.040939193257074055,
"grad_norm": 0.5893650834603397,
"learning_rate": 3.928571428571429e-05,
"loss": 0.9168639183044434,
"step": 34
},
{
"epoch": 0.04214328717639976,
"grad_norm": 0.6804002970035018,
"learning_rate": 4.047619047619048e-05,
"loss": 0.934005618095398,
"step": 35
},
{
"epoch": 0.04334738109572547,
"grad_norm": 0.6222574179731591,
"learning_rate": 4.166666666666667e-05,
"loss": 1.032753825187683,
"step": 36
},
{
"epoch": 0.044551475015051176,
"grad_norm": 0.6431266610244738,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.9134799242019653,
"step": 37
},
{
"epoch": 0.045755568934376885,
"grad_norm": 0.6578290497463257,
"learning_rate": 4.404761904761905e-05,
"loss": 1.029952049255371,
"step": 38
},
{
"epoch": 0.04695966285370259,
"grad_norm": 0.5695498869134297,
"learning_rate": 4.523809523809524e-05,
"loss": 0.822999119758606,
"step": 39
},
{
"epoch": 0.048163756773028296,
"grad_norm": 0.632165325404353,
"learning_rate": 4.642857142857143e-05,
"loss": 0.9547191858291626,
"step": 40
},
{
"epoch": 0.049367850692354005,
"grad_norm": 0.620883509847803,
"learning_rate": 4.761904761904762e-05,
"loss": 0.862228512763977,
"step": 41
},
{
"epoch": 0.050571944611679714,
"grad_norm": 0.6108129542721347,
"learning_rate": 4.880952380952381e-05,
"loss": 0.7873831987380981,
"step": 42
},
{
"epoch": 0.05177603853100542,
"grad_norm": 0.7030432783023145,
"learning_rate": 5e-05,
"loss": 1.0008782148361206,
"step": 43
},
{
"epoch": 0.052980132450331126,
"grad_norm": 0.6505410737487353,
"learning_rate": 5.119047619047619e-05,
"loss": 0.9275888800621033,
"step": 44
},
{
"epoch": 0.054184226369656835,
"grad_norm": 0.6154655728629782,
"learning_rate": 5.2380952380952384e-05,
"loss": 0.8537961840629578,
"step": 45
},
{
"epoch": 0.055388320288982544,
"grad_norm": 0.7449340178952516,
"learning_rate": 5.3571428571428575e-05,
"loss": 0.9411592483520508,
"step": 46
},
{
"epoch": 0.056592414208308246,
"grad_norm": 0.7101160305866654,
"learning_rate": 5.4761904761904766e-05,
"loss": 0.9629615545272827,
"step": 47
},
{
"epoch": 0.057796508127633955,
"grad_norm": 0.8823843880033381,
"learning_rate": 5.595238095238096e-05,
"loss": 0.9812487363815308,
"step": 48
},
{
"epoch": 0.059000602046959665,
"grad_norm": 0.5608003870598052,
"learning_rate": 5.714285714285714e-05,
"loss": 0.8206362724304199,
"step": 49
},
{
"epoch": 0.060204695966285374,
"grad_norm": 0.638805862955044,
"learning_rate": 5.833333333333334e-05,
"loss": 0.8263431787490845,
"step": 50
},
{
"epoch": 0.061408789885611076,
"grad_norm": 0.664746029928191,
"learning_rate": 5.9523809523809524e-05,
"loss": 0.9181925058364868,
"step": 51
},
{
"epoch": 0.06261288380493679,
"grad_norm": 0.7289678488950536,
"learning_rate": 6.0714285714285715e-05,
"loss": 1.0222965478897095,
"step": 52
},
{
"epoch": 0.0638169777242625,
"grad_norm": 0.6652149139696909,
"learning_rate": 6.19047619047619e-05,
"loss": 0.9594002962112427,
"step": 53
},
{
"epoch": 0.0650210716435882,
"grad_norm": 0.7637365045333203,
"learning_rate": 6.30952380952381e-05,
"loss": 0.9369411468505859,
"step": 54
},
{
"epoch": 0.06622516556291391,
"grad_norm": 0.6519732231987009,
"learning_rate": 6.428571428571429e-05,
"loss": 0.8848404884338379,
"step": 55
},
{
"epoch": 0.06742925948223961,
"grad_norm": 0.7035051117504962,
"learning_rate": 6.547619047619048e-05,
"loss": 0.8579400777816772,
"step": 56
},
{
"epoch": 0.06863335340156532,
"grad_norm": 0.6792729200988498,
"learning_rate": 6.666666666666667e-05,
"loss": 0.861051082611084,
"step": 57
},
{
"epoch": 0.06983744732089103,
"grad_norm": 0.7158674926842507,
"learning_rate": 6.785714285714286e-05,
"loss": 0.7903656363487244,
"step": 58
},
{
"epoch": 0.07104154124021674,
"grad_norm": 0.6760969797512926,
"learning_rate": 6.904761904761905e-05,
"loss": 1.018286943435669,
"step": 59
},
{
"epoch": 0.07224563515954245,
"grad_norm": 0.730090732097835,
"learning_rate": 7.023809523809524e-05,
"loss": 0.9277474284172058,
"step": 60
},
{
"epoch": 0.07344972907886815,
"grad_norm": 0.8107099747044093,
"learning_rate": 7.142857142857143e-05,
"loss": 0.8761380910873413,
"step": 61
},
{
"epoch": 0.07465382299819386,
"grad_norm": 0.7483132054674674,
"learning_rate": 7.261904761904762e-05,
"loss": 0.9940742254257202,
"step": 62
},
{
"epoch": 0.07585791691751957,
"grad_norm": 0.6592025247156321,
"learning_rate": 7.380952380952382e-05,
"loss": 0.8578092455863953,
"step": 63
},
{
"epoch": 0.07706201083684527,
"grad_norm": 0.666663929914337,
"learning_rate": 7.500000000000001e-05,
"loss": 1.0562655925750732,
"step": 64
},
{
"epoch": 0.07826610475617098,
"grad_norm": 0.6553657959902202,
"learning_rate": 7.619047619047618e-05,
"loss": 0.8854537010192871,
"step": 65
},
{
"epoch": 0.07947019867549669,
"grad_norm": 0.6801527830093095,
"learning_rate": 7.738095238095239e-05,
"loss": 0.9619255065917969,
"step": 66
},
{
"epoch": 0.0806742925948224,
"grad_norm": 0.7091511971130606,
"learning_rate": 7.857142857142858e-05,
"loss": 0.8897154331207275,
"step": 67
},
{
"epoch": 0.08187838651414811,
"grad_norm": 0.6217568908430099,
"learning_rate": 7.976190476190477e-05,
"loss": 0.8639489412307739,
"step": 68
},
{
"epoch": 0.08308248043347381,
"grad_norm": 0.6691126348267878,
"learning_rate": 8.095238095238096e-05,
"loss": 0.9247884154319763,
"step": 69
},
{
"epoch": 0.08428657435279951,
"grad_norm": 0.7919455951914144,
"learning_rate": 8.214285714285714e-05,
"loss": 0.9096778631210327,
"step": 70
},
{
"epoch": 0.08549066827212523,
"grad_norm": 0.644505073274393,
"learning_rate": 8.333333333333334e-05,
"loss": 0.9896690845489502,
"step": 71
},
{
"epoch": 0.08669476219145093,
"grad_norm": 0.6459900993473042,
"learning_rate": 8.452380952380952e-05,
"loss": 0.8692221641540527,
"step": 72
},
{
"epoch": 0.08789885611077664,
"grad_norm": 0.6921380271706044,
"learning_rate": 8.571428571428571e-05,
"loss": 0.9840501546859741,
"step": 73
},
{
"epoch": 0.08910295003010235,
"grad_norm": 0.6546405385949223,
"learning_rate": 8.690476190476192e-05,
"loss": 0.9007392525672913,
"step": 74
},
{
"epoch": 0.09030704394942805,
"grad_norm": 0.7145883364692728,
"learning_rate": 8.80952380952381e-05,
"loss": 0.8619614839553833,
"step": 75
},
{
"epoch": 0.09151113786875377,
"grad_norm": 0.5739847978816365,
"learning_rate": 8.92857142857143e-05,
"loss": 0.837583065032959,
"step": 76
},
{
"epoch": 0.09271523178807947,
"grad_norm": 0.6341980887282052,
"learning_rate": 9.047619047619048e-05,
"loss": 0.9038036465644836,
"step": 77
},
{
"epoch": 0.09391932570740517,
"grad_norm": 0.6715336469582549,
"learning_rate": 9.166666666666667e-05,
"loss": 0.8892405033111572,
"step": 78
},
{
"epoch": 0.09512341962673089,
"grad_norm": 0.6903293650509718,
"learning_rate": 9.285714285714286e-05,
"loss": 0.8257960081100464,
"step": 79
},
{
"epoch": 0.09632751354605659,
"grad_norm": 0.6991914027861378,
"learning_rate": 9.404761904761905e-05,
"loss": 0.9379273056983948,
"step": 80
},
{
"epoch": 0.0975316074653823,
"grad_norm": 0.6517772318319879,
"learning_rate": 9.523809523809524e-05,
"loss": 0.8167468905448914,
"step": 81
},
{
"epoch": 0.09873570138470801,
"grad_norm": 0.6350406572797412,
"learning_rate": 9.642857142857143e-05,
"loss": 0.870227575302124,
"step": 82
},
{
"epoch": 0.09993979530403371,
"grad_norm": 0.6386697594141318,
"learning_rate": 9.761904761904762e-05,
"loss": 0.8133725523948669,
"step": 83
},
{
"epoch": 0.10114388922335943,
"grad_norm": 0.8274992982474421,
"learning_rate": 9.880952380952381e-05,
"loss": 0.895470917224884,
"step": 84
},
{
"epoch": 0.10234798314268513,
"grad_norm": 0.7218441065279982,
"learning_rate": 0.0001,
"loss": 0.8310229778289795,
"step": 85
},
{
"epoch": 0.10355207706201083,
"grad_norm": 0.7086943059605396,
"learning_rate": 9.999990091096183e-05,
"loss": 0.9554932117462158,
"step": 86
},
{
"epoch": 0.10475617098133655,
"grad_norm": 0.8132692794887189,
"learning_rate": 9.999960364424007e-05,
"loss": 0.9255356192588806,
"step": 87
},
{
"epoch": 0.10596026490066225,
"grad_norm": 0.6711594152752894,
"learning_rate": 9.999910820101293e-05,
"loss": 0.8907528519630432,
"step": 88
},
{
"epoch": 0.10716435881998795,
"grad_norm": 0.7050931327380768,
"learning_rate": 9.999841458324413e-05,
"loss": 0.8549885749816895,
"step": 89
},
{
"epoch": 0.10836845273931367,
"grad_norm": 0.7100005408954425,
"learning_rate": 9.999752279368291e-05,
"loss": 0.9431029558181763,
"step": 90
},
{
"epoch": 0.10957254665863937,
"grad_norm": 0.6371089599634813,
"learning_rate": 9.999643283586388e-05,
"loss": 0.9296759963035583,
"step": 91
},
{
"epoch": 0.11077664057796509,
"grad_norm": 0.6734224937891277,
"learning_rate": 9.999514471410719e-05,
"loss": 0.899579644203186,
"step": 92
},
{
"epoch": 0.11198073449729079,
"grad_norm": 0.6438486311871371,
"learning_rate": 9.999365843351838e-05,
"loss": 0.9152452945709229,
"step": 93
},
{
"epoch": 0.11318482841661649,
"grad_norm": 0.6110376662502763,
"learning_rate": 9.999197399998841e-05,
"loss": 0.7135411500930786,
"step": 94
},
{
"epoch": 0.11438892233594221,
"grad_norm": 0.624592733508939,
"learning_rate": 9.999009142019364e-05,
"loss": 0.890850305557251,
"step": 95
},
{
"epoch": 0.11559301625526791,
"grad_norm": 0.6776171599806363,
"learning_rate": 9.99880107015958e-05,
"loss": 0.871957004070282,
"step": 96
},
{
"epoch": 0.11679711017459361,
"grad_norm": 0.6468008693554984,
"learning_rate": 9.998573185244192e-05,
"loss": 0.8072552680969238,
"step": 97
},
{
"epoch": 0.11800120409391933,
"grad_norm": 0.6533711890608097,
"learning_rate": 9.998325488176437e-05,
"loss": 0.8927024602890015,
"step": 98
},
{
"epoch": 0.11920529801324503,
"grad_norm": 0.651193212466641,
"learning_rate": 9.99805797993808e-05,
"loss": 0.7971667051315308,
"step": 99
},
{
"epoch": 0.12040939193257075,
"grad_norm": 0.6833450262543173,
"learning_rate": 9.997770661589403e-05,
"loss": 0.8304203748703003,
"step": 100
},
{
"epoch": 0.12161348585189645,
"grad_norm": 0.5967083397414709,
"learning_rate": 9.997463534269213e-05,
"loss": 0.8748705983161926,
"step": 101
},
{
"epoch": 0.12281757977122215,
"grad_norm": 0.6694705277935548,
"learning_rate": 9.997136599194825e-05,
"loss": 0.8278296589851379,
"step": 102
},
{
"epoch": 0.12402167369054787,
"grad_norm": 0.5722304555846056,
"learning_rate": 9.996789857662068e-05,
"loss": 0.7870973944664001,
"step": 103
},
{
"epoch": 0.12522576760987358,
"grad_norm": 0.6261642343902029,
"learning_rate": 9.996423311045273e-05,
"loss": 0.9398536086082458,
"step": 104
},
{
"epoch": 0.12642986152919927,
"grad_norm": 0.7013040298414001,
"learning_rate": 9.99603696079727e-05,
"loss": 0.7681595087051392,
"step": 105
},
{
"epoch": 0.127633955448525,
"grad_norm": 0.6763036369561215,
"learning_rate": 9.995630808449383e-05,
"loss": 0.7789179086685181,
"step": 106
},
{
"epoch": 0.1288380493678507,
"grad_norm": 0.6292862927531453,
"learning_rate": 9.99520485561142e-05,
"loss": 0.8259806632995605,
"step": 107
},
{
"epoch": 0.1300421432871764,
"grad_norm": 0.5750463614305219,
"learning_rate": 9.994759103971673e-05,
"loss": 0.8091164231300354,
"step": 108
},
{
"epoch": 0.1312462372065021,
"grad_norm": 0.6900239826497377,
"learning_rate": 9.994293555296904e-05,
"loss": 0.8229101300239563,
"step": 109
},
{
"epoch": 0.13245033112582782,
"grad_norm": 0.7090621897550452,
"learning_rate": 9.993808211432346e-05,
"loss": 0.8720242381095886,
"step": 110
},
{
"epoch": 0.1336544250451535,
"grad_norm": 0.649720004600575,
"learning_rate": 9.993303074301689e-05,
"loss": 0.9825680255889893,
"step": 111
},
{
"epoch": 0.13485851896447923,
"grad_norm": 0.596066035750241,
"learning_rate": 9.992778145907073e-05,
"loss": 0.835587203502655,
"step": 112
},
{
"epoch": 0.13606261288380495,
"grad_norm": 0.6114422326601442,
"learning_rate": 9.992233428329088e-05,
"loss": 0.8825037479400635,
"step": 113
},
{
"epoch": 0.13726670680313063,
"grad_norm": 0.6139625161179929,
"learning_rate": 9.99166892372675e-05,
"loss": 0.8165640830993652,
"step": 114
},
{
"epoch": 0.13847080072245635,
"grad_norm": 0.6400803908308438,
"learning_rate": 9.991084634337511e-05,
"loss": 0.8673977851867676,
"step": 115
},
{
"epoch": 0.13967489464178207,
"grad_norm": 0.63045091304516,
"learning_rate": 9.99048056247724e-05,
"loss": 0.9138743877410889,
"step": 116
},
{
"epoch": 0.14087898856110775,
"grad_norm": 0.6538480320196308,
"learning_rate": 9.989856710540209e-05,
"loss": 0.8727657794952393,
"step": 117
},
{
"epoch": 0.14208308248043347,
"grad_norm": 0.5908593583990588,
"learning_rate": 9.989213080999097e-05,
"loss": 0.7986258864402771,
"step": 118
},
{
"epoch": 0.1432871763997592,
"grad_norm": 0.6062471765966283,
"learning_rate": 9.988549676404965e-05,
"loss": 0.9581261873245239,
"step": 119
},
{
"epoch": 0.1444912703190849,
"grad_norm": 0.6628181567530304,
"learning_rate": 9.987866499387264e-05,
"loss": 0.8676153421401978,
"step": 120
},
{
"epoch": 0.1456953642384106,
"grad_norm": 0.6088706936578496,
"learning_rate": 9.987163552653802e-05,
"loss": 0.8428996205329895,
"step": 121
},
{
"epoch": 0.1468994581577363,
"grad_norm": 0.6022625865028414,
"learning_rate": 9.986440838990755e-05,
"loss": 0.8001778721809387,
"step": 122
},
{
"epoch": 0.14810355207706202,
"grad_norm": 0.5976622504629961,
"learning_rate": 9.985698361262642e-05,
"loss": 0.8853356242179871,
"step": 123
},
{
"epoch": 0.1493076459963877,
"grad_norm": 0.6384576361013036,
"learning_rate": 9.984936122412319e-05,
"loss": 0.8536797165870667,
"step": 124
},
{
"epoch": 0.15051173991571343,
"grad_norm": 0.5757065790067829,
"learning_rate": 9.984154125460969e-05,
"loss": 0.730737566947937,
"step": 125
},
{
"epoch": 0.15171583383503914,
"grad_norm": 0.6478459969598638,
"learning_rate": 9.983352373508081e-05,
"loss": 0.896142840385437,
"step": 126
},
{
"epoch": 0.15291992775436483,
"grad_norm": 0.6340985936115769,
"learning_rate": 9.982530869731451e-05,
"loss": 0.8389994502067566,
"step": 127
},
{
"epoch": 0.15412402167369055,
"grad_norm": 0.6141960403382065,
"learning_rate": 9.98168961738716e-05,
"loss": 0.7525704503059387,
"step": 128
},
{
"epoch": 0.15532811559301626,
"grad_norm": 0.5593301763063173,
"learning_rate": 9.980828619809561e-05,
"loss": 0.8256592154502869,
"step": 129
},
{
"epoch": 0.15653220951234195,
"grad_norm": 0.5599457834475475,
"learning_rate": 9.979947880411273e-05,
"loss": 0.8039137125015259,
"step": 130
},
{
"epoch": 0.15773630343166767,
"grad_norm": 0.6241383827018707,
"learning_rate": 9.97904740268316e-05,
"loss": 0.8460755348205566,
"step": 131
},
{
"epoch": 0.15894039735099338,
"grad_norm": 0.6316516334216284,
"learning_rate": 9.978127190194322e-05,
"loss": 0.8612096309661865,
"step": 132
},
{
"epoch": 0.16014449127031907,
"grad_norm": 0.6125586110900785,
"learning_rate": 9.977187246592076e-05,
"loss": 0.855193018913269,
"step": 133
},
{
"epoch": 0.1613485851896448,
"grad_norm": 0.6292847564168722,
"learning_rate": 9.976227575601947e-05,
"loss": 0.988248348236084,
"step": 134
},
{
"epoch": 0.1625526791089705,
"grad_norm": 0.5508080355058005,
"learning_rate": 9.97524818102765e-05,
"loss": 0.8570771217346191,
"step": 135
},
{
"epoch": 0.16375677302829622,
"grad_norm": 0.6701649957289219,
"learning_rate": 9.974249066751077e-05,
"loss": 0.8548328280448914,
"step": 136
},
{
"epoch": 0.1649608669476219,
"grad_norm": 0.6260940616203523,
"learning_rate": 9.973230236732276e-05,
"loss": 0.8489102125167847,
"step": 137
},
{
"epoch": 0.16616496086694763,
"grad_norm": 0.6448311566984258,
"learning_rate": 9.972191695009446e-05,
"loss": 0.814854085445404,
"step": 138
},
{
"epoch": 0.16736905478627334,
"grad_norm": 0.6006579952276618,
"learning_rate": 9.971133445698908e-05,
"loss": 0.8287836909294128,
"step": 139
},
{
"epoch": 0.16857314870559903,
"grad_norm": 0.595069768052264,
"learning_rate": 9.9700554929951e-05,
"loss": 0.8828743100166321,
"step": 140
},
{
"epoch": 0.16977724262492475,
"grad_norm": 0.5992523879349947,
"learning_rate": 9.968957841170554e-05,
"loss": 0.7269818782806396,
"step": 141
},
{
"epoch": 0.17098133654425046,
"grad_norm": 0.6358277373490012,
"learning_rate": 9.967840494575879e-05,
"loss": 0.8707228302955627,
"step": 142
},
{
"epoch": 0.17218543046357615,
"grad_norm": 0.6256085596888589,
"learning_rate": 9.966703457639748e-05,
"loss": 0.7957820892333984,
"step": 143
},
{
"epoch": 0.17338952438290187,
"grad_norm": 0.633651328717841,
"learning_rate": 9.965546734868875e-05,
"loss": 0.7849557995796204,
"step": 144
},
{
"epoch": 0.17459361830222758,
"grad_norm": 0.6650954162324398,
"learning_rate": 9.964370330848005e-05,
"loss": 0.86981201171875,
"step": 145
},
{
"epoch": 0.17579771222155327,
"grad_norm": 0.6662715455940965,
"learning_rate": 9.963174250239888e-05,
"loss": 0.874397337436676,
"step": 146
},
{
"epoch": 0.177001806140879,
"grad_norm": 0.6256157448531097,
"learning_rate": 9.96195849778526e-05,
"loss": 0.8385191559791565,
"step": 147
},
{
"epoch": 0.1782059000602047,
"grad_norm": 0.7580851610244488,
"learning_rate": 9.960723078302832e-05,
"loss": 0.8500886559486389,
"step": 148
},
{
"epoch": 0.1794099939795304,
"grad_norm": 0.5473969800960325,
"learning_rate": 9.959467996689264e-05,
"loss": 0.7514045238494873,
"step": 149
},
{
"epoch": 0.1806140878988561,
"grad_norm": 0.5617783736374764,
"learning_rate": 9.95819325791915e-05,
"loss": 0.8130397200584412,
"step": 150
},
{
"epoch": 0.18181818181818182,
"grad_norm": 0.5255953623788495,
"learning_rate": 9.956898867044999e-05,
"loss": 0.7567414045333862,
"step": 151
},
{
"epoch": 0.18302227573750754,
"grad_norm": 0.5802570951276651,
"learning_rate": 9.955584829197203e-05,
"loss": 0.8222522139549255,
"step": 152
},
{
"epoch": 0.18422636965683323,
"grad_norm": 0.5516508722368936,
"learning_rate": 9.954251149584036e-05,
"loss": 0.7163509130477905,
"step": 153
},
{
"epoch": 0.18543046357615894,
"grad_norm": 0.5840085174535194,
"learning_rate": 9.952897833491617e-05,
"loss": 0.8305390477180481,
"step": 154
},
{
"epoch": 0.18663455749548466,
"grad_norm": 0.5998319848472415,
"learning_rate": 9.951524886283899e-05,
"loss": 0.8803359270095825,
"step": 155
},
{
"epoch": 0.18783865141481035,
"grad_norm": 0.5801346337633988,
"learning_rate": 9.950132313402642e-05,
"loss": 0.8195471167564392,
"step": 156
},
{
"epoch": 0.18904274533413606,
"grad_norm": 0.6021003112824673,
"learning_rate": 9.948720120367394e-05,
"loss": 0.8305729627609253,
"step": 157
},
{
"epoch": 0.19024683925346178,
"grad_norm": 0.5722705278603031,
"learning_rate": 9.947288312775471e-05,
"loss": 0.911281406879425,
"step": 158
},
{
"epoch": 0.19145093317278747,
"grad_norm": 0.7115092503847944,
"learning_rate": 9.945836896301927e-05,
"loss": 0.8648508787155151,
"step": 159
},
{
"epoch": 0.19265502709211318,
"grad_norm": 0.5561571792875855,
"learning_rate": 9.944365876699544e-05,
"loss": 0.8270993828773499,
"step": 160
},
{
"epoch": 0.1938591210114389,
"grad_norm": 0.5047158553714911,
"learning_rate": 9.942875259798796e-05,
"loss": 0.6847013831138611,
"step": 161
},
{
"epoch": 0.1950632149307646,
"grad_norm": 0.6071931068999377,
"learning_rate": 9.941365051507836e-05,
"loss": 0.6603276133537292,
"step": 162
},
{
"epoch": 0.1962673088500903,
"grad_norm": 0.6975658094834045,
"learning_rate": 9.939835257812468e-05,
"loss": 0.8116236925125122,
"step": 163
},
{
"epoch": 0.19747140276941602,
"grad_norm": 0.6144333372016162,
"learning_rate": 9.938285884776121e-05,
"loss": 0.7998278141021729,
"step": 164
},
{
"epoch": 0.1986754966887417,
"grad_norm": 0.6746772669051367,
"learning_rate": 9.936716938539834e-05,
"loss": 0.7614130973815918,
"step": 165
},
{
"epoch": 0.19987959060806743,
"grad_norm": 0.6058387154386324,
"learning_rate": 9.93512842532222e-05,
"loss": 0.8001455664634705,
"step": 166
},
{
"epoch": 0.20108368452739314,
"grad_norm": 0.709154481648146,
"learning_rate": 9.93352035141945e-05,
"loss": 0.898850679397583,
"step": 167
},
{
"epoch": 0.20228777844671886,
"grad_norm": 0.7417717941267389,
"learning_rate": 9.931892723205221e-05,
"loss": 0.9218953251838684,
"step": 168
},
{
"epoch": 0.20349187236604455,
"grad_norm": 0.6209828279431264,
"learning_rate": 9.93024554713074e-05,
"loss": 0.7685527801513672,
"step": 169
},
{
"epoch": 0.20469596628537026,
"grad_norm": 0.6366701630614862,
"learning_rate": 9.92857882972469e-05,
"loss": 0.8374059796333313,
"step": 170
},
{
"epoch": 0.20590006020469598,
"grad_norm": 0.5595323430918345,
"learning_rate": 9.926892577593208e-05,
"loss": 0.9538528919219971,
"step": 171
},
{
"epoch": 0.20710415412402167,
"grad_norm": 0.5686220609783365,
"learning_rate": 9.925186797419858e-05,
"loss": 0.7435036301612854,
"step": 172
},
{
"epoch": 0.20830824804334738,
"grad_norm": 0.5896909739473452,
"learning_rate": 9.923461495965607e-05,
"loss": 0.9060875177383423,
"step": 173
},
{
"epoch": 0.2095123419626731,
"grad_norm": 0.5732335188188169,
"learning_rate": 9.92171668006879e-05,
"loss": 0.7095259428024292,
"step": 174
},
{
"epoch": 0.2107164358819988,
"grad_norm": 0.5413026055480401,
"learning_rate": 9.919952356645092e-05,
"loss": 0.8325881958007812,
"step": 175
},
{
"epoch": 0.2119205298013245,
"grad_norm": 0.6250854503279384,
"learning_rate": 9.91816853268752e-05,
"loss": 0.8085938096046448,
"step": 176
},
{
"epoch": 0.21312462372065022,
"grad_norm": 0.5191367916965406,
"learning_rate": 9.91636521526637e-05,
"loss": 0.7798078656196594,
"step": 177
},
{
"epoch": 0.2143287176399759,
"grad_norm": 0.5853528270128076,
"learning_rate": 9.9145424115292e-05,
"loss": 0.8047578930854797,
"step": 178
},
{
"epoch": 0.21553281155930162,
"grad_norm": 0.5620566052956103,
"learning_rate": 9.912700128700805e-05,
"loss": 0.7659736275672913,
"step": 179
},
{
"epoch": 0.21673690547862734,
"grad_norm": 0.6328693181609747,
"learning_rate": 9.910838374083185e-05,
"loss": 0.900551438331604,
"step": 180
},
{
"epoch": 0.21794099939795303,
"grad_norm": 0.6120172533297469,
"learning_rate": 9.908957155055523e-05,
"loss": 0.7945046424865723,
"step": 181
},
{
"epoch": 0.21914509331727874,
"grad_norm": 0.5837876424034375,
"learning_rate": 9.907056479074142e-05,
"loss": 0.792214035987854,
"step": 182
},
{
"epoch": 0.22034918723660446,
"grad_norm": 0.643354812405774,
"learning_rate": 9.905136353672492e-05,
"loss": 0.765371561050415,
"step": 183
},
{
"epoch": 0.22155328115593018,
"grad_norm": 0.6018854214062566,
"learning_rate": 9.903196786461106e-05,
"loss": 0.86408931016922,
"step": 184
},
{
"epoch": 0.22275737507525586,
"grad_norm": 0.6035354187551014,
"learning_rate": 9.901237785127577e-05,
"loss": 0.8438395261764526,
"step": 185
},
{
"epoch": 0.22396146899458158,
"grad_norm": 0.6087113771907617,
"learning_rate": 9.89925935743653e-05,
"loss": 0.8094379305839539,
"step": 186
},
{
"epoch": 0.2251655629139073,
"grad_norm": 0.5726663549851495,
"learning_rate": 9.897261511229583e-05,
"loss": 0.7722645998001099,
"step": 187
},
{
"epoch": 0.22636965683323299,
"grad_norm": 0.5725020014585523,
"learning_rate": 9.895244254425322e-05,
"loss": 0.7963128685951233,
"step": 188
},
{
"epoch": 0.2275737507525587,
"grad_norm": 0.5671424265608609,
"learning_rate": 9.893207595019269e-05,
"loss": 0.861395001411438,
"step": 189
},
{
"epoch": 0.22877784467188442,
"grad_norm": 0.6148069567225031,
"learning_rate": 9.891151541083852e-05,
"loss": 0.8708590269088745,
"step": 190
},
{
"epoch": 0.2299819385912101,
"grad_norm": 0.5373717373175128,
"learning_rate": 9.889076100768362e-05,
"loss": 0.8136815428733826,
"step": 191
},
{
"epoch": 0.23118603251053582,
"grad_norm": 0.5623442601909269,
"learning_rate": 9.886981282298937e-05,
"loss": 0.8031935095787048,
"step": 192
},
{
"epoch": 0.23239012642986154,
"grad_norm": 0.5877417215377068,
"learning_rate": 9.884867093978519e-05,
"loss": 0.9116311073303223,
"step": 193
},
{
"epoch": 0.23359422034918723,
"grad_norm": 0.5940782648264424,
"learning_rate": 9.882733544186825e-05,
"loss": 0.8557519912719727,
"step": 194
},
{
"epoch": 0.23479831426851294,
"grad_norm": 0.5877233860993956,
"learning_rate": 9.880580641380306e-05,
"loss": 0.8720890283584595,
"step": 195
},
{
"epoch": 0.23600240818783866,
"grad_norm": 0.6100600335906609,
"learning_rate": 9.878408394092129e-05,
"loss": 0.796465277671814,
"step": 196
},
{
"epoch": 0.23720650210716435,
"grad_norm": 0.5224524929320521,
"learning_rate": 9.876216810932129e-05,
"loss": 0.7119709253311157,
"step": 197
},
{
"epoch": 0.23841059602649006,
"grad_norm": 0.5882114020843635,
"learning_rate": 9.87400590058678e-05,
"loss": 0.7778695225715637,
"step": 198
},
{
"epoch": 0.23961468994581578,
"grad_norm": 0.5646252854821827,
"learning_rate": 9.871775671819162e-05,
"loss": 0.7835485935211182,
"step": 199
},
{
"epoch": 0.2408187838651415,
"grad_norm": 0.5534101097338066,
"learning_rate": 9.869526133468923e-05,
"loss": 0.7811274528503418,
"step": 200
},
{
"epoch": 0.24202287778446718,
"grad_norm": 0.659757981549362,
"learning_rate": 9.867257294452245e-05,
"loss": 0.8600764274597168,
"step": 201
},
{
"epoch": 0.2432269717037929,
"grad_norm": 0.5828795040656405,
"learning_rate": 9.864969163761817e-05,
"loss": 0.8677747845649719,
"step": 202
},
{
"epoch": 0.24443106562311862,
"grad_norm": 0.5735738787878958,
"learning_rate": 9.86266175046678e-05,
"loss": 0.7098298072814941,
"step": 203
},
{
"epoch": 0.2456351595424443,
"grad_norm": 0.6109429085852309,
"learning_rate": 9.86033506371271e-05,
"loss": 0.8356621265411377,
"step": 204
},
{
"epoch": 0.24683925346177002,
"grad_norm": 0.6590037848494839,
"learning_rate": 9.857989112721574e-05,
"loss": 0.8513151407241821,
"step": 205
},
{
"epoch": 0.24804334738109574,
"grad_norm": 0.5843263839007125,
"learning_rate": 9.855623906791693e-05,
"loss": 0.7554444074630737,
"step": 206
},
{
"epoch": 0.24924744130042142,
"grad_norm": 0.6097379652117024,
"learning_rate": 9.853239455297705e-05,
"loss": 0.7517697811126709,
"step": 207
},
{
"epoch": 0.25045153521974717,
"grad_norm": 0.5909426785905095,
"learning_rate": 9.850835767690532e-05,
"loss": 0.8333016633987427,
"step": 208
},
{
"epoch": 0.25165562913907286,
"grad_norm": 0.5632590506371093,
"learning_rate": 9.848412853497337e-05,
"loss": 0.7474313974380493,
"step": 209
},
{
"epoch": 0.25285972305839854,
"grad_norm": 0.6515626929603409,
"learning_rate": 9.845970722321489e-05,
"loss": 0.8400745987892151,
"step": 210
},
{
"epoch": 0.2540638169777243,
"grad_norm": 0.5758739986380993,
"learning_rate": 9.843509383842525e-05,
"loss": 0.8299905061721802,
"step": 211
},
{
"epoch": 0.25526791089705,
"grad_norm": 0.5990535640426988,
"learning_rate": 9.841028847816112e-05,
"loss": 0.871423602104187,
"step": 212
},
{
"epoch": 0.25647200481637566,
"grad_norm": 0.6381070316663646,
"learning_rate": 9.838529124074006e-05,
"loss": 0.8027063012123108,
"step": 213
},
{
"epoch": 0.2576760987357014,
"grad_norm": 0.5637299048863595,
"learning_rate": 9.836010222524018e-05,
"loss": 0.7195415496826172,
"step": 214
},
{
"epoch": 0.2588801926550271,
"grad_norm": 0.5432784432670418,
"learning_rate": 9.833472153149968e-05,
"loss": 0.7662504315376282,
"step": 215
},
{
"epoch": 0.2600842865743528,
"grad_norm": 0.5861530629284589,
"learning_rate": 9.830914926011651e-05,
"loss": 0.7310256958007812,
"step": 216
},
{
"epoch": 0.26128838049367853,
"grad_norm": 0.5619164717308941,
"learning_rate": 9.828338551244794e-05,
"loss": 0.83974289894104,
"step": 217
},
{
"epoch": 0.2624924744130042,
"grad_norm": 0.5584180288947086,
"learning_rate": 9.825743039061015e-05,
"loss": 0.8033380508422852,
"step": 218
},
{
"epoch": 0.2636965683323299,
"grad_norm": 0.6521530238007373,
"learning_rate": 9.823128399747788e-05,
"loss": 0.8010194301605225,
"step": 219
},
{
"epoch": 0.26490066225165565,
"grad_norm": 0.5866740761389695,
"learning_rate": 9.820494643668396e-05,
"loss": 0.7578902244567871,
"step": 220
},
{
"epoch": 0.26610475617098134,
"grad_norm": 0.5752716889039778,
"learning_rate": 9.817841781261894e-05,
"loss": 0.889061450958252,
"step": 221
},
{
"epoch": 0.267308850090307,
"grad_norm": 0.661749410785086,
"learning_rate": 9.815169823043066e-05,
"loss": 0.9390289783477783,
"step": 222
},
{
"epoch": 0.26851294400963277,
"grad_norm": 0.6005185789152208,
"learning_rate": 9.812478779602381e-05,
"loss": 0.8111711740493774,
"step": 223
},
{
"epoch": 0.26971703792895846,
"grad_norm": 0.6462056969592548,
"learning_rate": 9.809768661605956e-05,
"loss": 0.8043175935745239,
"step": 224
},
{
"epoch": 0.27092113184828415,
"grad_norm": 0.6425301724501173,
"learning_rate": 9.80703947979551e-05,
"loss": 0.8524254560470581,
"step": 225
},
{
"epoch": 0.2721252257676099,
"grad_norm": 0.5728100192205237,
"learning_rate": 9.804291244988324e-05,
"loss": 0.7850819230079651,
"step": 226
},
{
"epoch": 0.2733293196869356,
"grad_norm": 0.5821116752238383,
"learning_rate": 9.801523968077195e-05,
"loss": 0.8302472829818726,
"step": 227
},
{
"epoch": 0.27453341360626127,
"grad_norm": 0.569993484923619,
"learning_rate": 9.798737660030397e-05,
"loss": 0.7643336653709412,
"step": 228
},
{
"epoch": 0.275737507525587,
"grad_norm": 0.5524447832896014,
"learning_rate": 9.79593233189163e-05,
"loss": 0.742889404296875,
"step": 229
},
{
"epoch": 0.2769416014449127,
"grad_norm": 0.5482676333114046,
"learning_rate": 9.793107994779988e-05,
"loss": 0.8601056933403015,
"step": 230
},
{
"epoch": 0.2781456953642384,
"grad_norm": 0.5800717674989954,
"learning_rate": 9.790264659889903e-05,
"loss": 0.7449058294296265,
"step": 231
},
{
"epoch": 0.27934978928356413,
"grad_norm": 0.5564258654759603,
"learning_rate": 9.787402338491108e-05,
"loss": 0.7483078837394714,
"step": 232
},
{
"epoch": 0.2805538832028898,
"grad_norm": 0.5661348995726302,
"learning_rate": 9.78452104192859e-05,
"loss": 0.7628078460693359,
"step": 233
},
{
"epoch": 0.2817579771222155,
"grad_norm": 0.5603718171110846,
"learning_rate": 9.781620781622546e-05,
"loss": 0.7319855093955994,
"step": 234
},
{
"epoch": 0.28296207104154125,
"grad_norm": 0.5406763145079039,
"learning_rate": 9.778701569068336e-05,
"loss": 0.7549390196800232,
"step": 235
},
{
"epoch": 0.28416616496086694,
"grad_norm": 0.5351766880508916,
"learning_rate": 9.775763415836439e-05,
"loss": 0.8256465792655945,
"step": 236
},
{
"epoch": 0.28537025888019263,
"grad_norm": 0.5467048130924815,
"learning_rate": 9.772806333572405e-05,
"loss": 0.6397162675857544,
"step": 237
},
{
"epoch": 0.2865743527995184,
"grad_norm": 0.5620154613953218,
"learning_rate": 9.76983033399681e-05,
"loss": 0.7978140711784363,
"step": 238
},
{
"epoch": 0.28777844671884406,
"grad_norm": 0.5606858092665854,
"learning_rate": 9.766835428905213e-05,
"loss": 0.6563281416893005,
"step": 239
},
{
"epoch": 0.2889825406381698,
"grad_norm": 0.6874263093912247,
"learning_rate": 9.763821630168106e-05,
"loss": 0.7781293392181396,
"step": 240
},
{
"epoch": 0.2901866345574955,
"grad_norm": 0.5891472596464531,
"learning_rate": 9.760788949730866e-05,
"loss": 0.8141942024230957,
"step": 241
},
{
"epoch": 0.2913907284768212,
"grad_norm": 0.5988083590231383,
"learning_rate": 9.757737399613706e-05,
"loss": 0.8612946271896362,
"step": 242
},
{
"epoch": 0.2925948223961469,
"grad_norm": 0.6160835727269925,
"learning_rate": 9.754666991911633e-05,
"loss": 0.8862748742103577,
"step": 243
},
{
"epoch": 0.2937989163154726,
"grad_norm": 0.5661716993339028,
"learning_rate": 9.751577738794398e-05,
"loss": 0.8197008371353149,
"step": 244
},
{
"epoch": 0.2950030102347983,
"grad_norm": 0.5678861897622992,
"learning_rate": 9.748469652506446e-05,
"loss": 0.8541512489318848,
"step": 245
},
{
"epoch": 0.29620710415412405,
"grad_norm": 0.5823103566349462,
"learning_rate": 9.745342745366867e-05,
"loss": 0.8963796496391296,
"step": 246
},
{
"epoch": 0.29741119807344973,
"grad_norm": 0.5798680494503358,
"learning_rate": 9.74219702976935e-05,
"loss": 0.9132851958274841,
"step": 247
},
{
"epoch": 0.2986152919927754,
"grad_norm": 0.6178066264758173,
"learning_rate": 9.739032518182134e-05,
"loss": 0.8326083421707153,
"step": 248
},
{
"epoch": 0.29981938591210117,
"grad_norm": 0.611524198156477,
"learning_rate": 9.735849223147952e-05,
"loss": 0.6978630423545837,
"step": 249
},
{
"epoch": 0.30102347983142685,
"grad_norm": 0.5413119691863977,
"learning_rate": 9.732647157283994e-05,
"loss": 0.8263933062553406,
"step": 250
},
{
"epoch": 0.30222757375075254,
"grad_norm": 0.6044662490988156,
"learning_rate": 9.729426333281841e-05,
"loss": 0.7137265801429749,
"step": 251
},
{
"epoch": 0.3034316676700783,
"grad_norm": 0.503313581222299,
"learning_rate": 9.726186763907432e-05,
"loss": 0.6936661005020142,
"step": 252
},
{
"epoch": 0.304635761589404,
"grad_norm": 0.558556757807548,
"learning_rate": 9.722928462000995e-05,
"loss": 0.8272273540496826,
"step": 253
},
{
"epoch": 0.30583985550872966,
"grad_norm": 0.5223729769763801,
"learning_rate": 9.71965144047701e-05,
"loss": 0.8500688076019287,
"step": 254
},
{
"epoch": 0.3070439494280554,
"grad_norm": 0.6471571685002796,
"learning_rate": 9.716355712324156e-05,
"loss": 0.7432481050491333,
"step": 255
},
{
"epoch": 0.3082480433473811,
"grad_norm": 0.5381987465091518,
"learning_rate": 9.713041290605254e-05,
"loss": 0.6739128232002258,
"step": 256
},
{
"epoch": 0.3094521372667068,
"grad_norm": 0.5731618503283799,
"learning_rate": 9.709708188457218e-05,
"loss": 0.7067856192588806,
"step": 257
},
{
"epoch": 0.3106562311860325,
"grad_norm": 0.5847726819974521,
"learning_rate": 9.706356419091003e-05,
"loss": 0.7837916612625122,
"step": 258
},
{
"epoch": 0.3118603251053582,
"grad_norm": 0.5928405149092865,
"learning_rate": 9.702985995791554e-05,
"loss": 0.7222775816917419,
"step": 259
},
{
"epoch": 0.3130644190246839,
"grad_norm": 0.6372265852544939,
"learning_rate": 9.69959693191775e-05,
"loss": 0.68992018699646,
"step": 260
},
{
"epoch": 0.31426851294400965,
"grad_norm": 0.5987802715969925,
"learning_rate": 9.696189240902353e-05,
"loss": 0.737679660320282,
"step": 261
},
{
"epoch": 0.31547260686333534,
"grad_norm": 0.5674891954353958,
"learning_rate": 9.69276293625196e-05,
"loss": 0.7600986957550049,
"step": 262
},
{
"epoch": 0.316676700782661,
"grad_norm": 0.5401274241101958,
"learning_rate": 9.689318031546935e-05,
"loss": 0.8289561867713928,
"step": 263
},
{
"epoch": 0.31788079470198677,
"grad_norm": 0.562252292461447,
"learning_rate": 9.685854540441374e-05,
"loss": 0.846182107925415,
"step": 264
},
{
"epoch": 0.31908488862131246,
"grad_norm": 0.5780906172982114,
"learning_rate": 9.682372476663037e-05,
"loss": 0.8012008666992188,
"step": 265
},
{
"epoch": 0.32028898254063815,
"grad_norm": 0.5746296189877343,
"learning_rate": 9.678871854013296e-05,
"loss": 0.7840061187744141,
"step": 266
},
{
"epoch": 0.3214930764599639,
"grad_norm": 0.5832924605117712,
"learning_rate": 9.675352686367086e-05,
"loss": 0.8156418800354004,
"step": 267
},
{
"epoch": 0.3226971703792896,
"grad_norm": 0.5614106568130766,
"learning_rate": 9.671814987672842e-05,
"loss": 0.8256844282150269,
"step": 268
},
{
"epoch": 0.32390126429861527,
"grad_norm": 0.5254943908985892,
"learning_rate": 9.668258771952453e-05,
"loss": 0.74140465259552,
"step": 269
},
{
"epoch": 0.325105358217941,
"grad_norm": 0.6200746421357911,
"learning_rate": 9.664684053301199e-05,
"loss": 0.7934343218803406,
"step": 270
},
{
"epoch": 0.3263094521372667,
"grad_norm": 0.5831834419812628,
"learning_rate": 9.661090845887693e-05,
"loss": 0.8534310460090637,
"step": 271
},
{
"epoch": 0.32751354605659244,
"grad_norm": 0.6345113330165018,
"learning_rate": 9.657479163953839e-05,
"loss": 0.835474967956543,
"step": 272
},
{
"epoch": 0.32871763997591813,
"grad_norm": 0.4683700127754648,
"learning_rate": 9.653849021814759e-05,
"loss": 0.6988995671272278,
"step": 273
},
{
"epoch": 0.3299217338952438,
"grad_norm": 0.5886264074969564,
"learning_rate": 9.650200433858741e-05,
"loss": 0.8585962057113647,
"step": 274
},
{
"epoch": 0.33112582781456956,
"grad_norm": 0.6135065491148268,
"learning_rate": 9.646533414547193e-05,
"loss": 0.8581076860427856,
"step": 275
},
{
"epoch": 0.33232992173389525,
"grad_norm": 0.5812007416519458,
"learning_rate": 9.642847978414569e-05,
"loss": 0.850233793258667,
"step": 276
},
{
"epoch": 0.33353401565322094,
"grad_norm": 0.5437246153982104,
"learning_rate": 9.639144140068324e-05,
"loss": 0.7476022243499756,
"step": 277
},
{
"epoch": 0.3347381095725467,
"grad_norm": 0.6098016790315451,
"learning_rate": 9.635421914188845e-05,
"loss": 0.9571095705032349,
"step": 278
},
{
"epoch": 0.33594220349187237,
"grad_norm": 0.5658420004769046,
"learning_rate": 9.631681315529408e-05,
"loss": 0.9088435173034668,
"step": 279
},
{
"epoch": 0.33714629741119806,
"grad_norm": 0.5550044767870903,
"learning_rate": 9.627922358916102e-05,
"loss": 0.7667790651321411,
"step": 280
},
{
"epoch": 0.3383503913305238,
"grad_norm": 0.6100185501331828,
"learning_rate": 9.624145059247787e-05,
"loss": 0.670824408531189,
"step": 281
},
{
"epoch": 0.3395544852498495,
"grad_norm": 0.5331576033417736,
"learning_rate": 9.620349431496018e-05,
"loss": 0.7122035026550293,
"step": 282
},
{
"epoch": 0.3407585791691752,
"grad_norm": 0.636645893021669,
"learning_rate": 9.616535490705004e-05,
"loss": 0.8380610942840576,
"step": 283
},
{
"epoch": 0.3419626730885009,
"grad_norm": 0.5606243551760431,
"learning_rate": 9.612703251991528e-05,
"loss": 0.8130165934562683,
"step": 284
},
{
"epoch": 0.3431667670078266,
"grad_norm": 0.6041929535466637,
"learning_rate": 9.60885273054491e-05,
"loss": 0.7357275485992432,
"step": 285
},
{
"epoch": 0.3443708609271523,
"grad_norm": 0.6002414673444332,
"learning_rate": 9.604983941626924e-05,
"loss": 0.824700117111206,
"step": 286
},
{
"epoch": 0.34557495484647804,
"grad_norm": 0.5391453496259582,
"learning_rate": 9.601096900571757e-05,
"loss": 0.696814775466919,
"step": 287
},
{
"epoch": 0.34677904876580373,
"grad_norm": 0.6179913262039818,
"learning_rate": 9.597191622785932e-05,
"loss": 0.7977198362350464,
"step": 288
},
{
"epoch": 0.3479831426851294,
"grad_norm": 0.5785274608800763,
"learning_rate": 9.593268123748259e-05,
"loss": 0.7644785642623901,
"step": 289
},
{
"epoch": 0.34918723660445516,
"grad_norm": 0.6184780253497415,
"learning_rate": 9.589326419009768e-05,
"loss": 0.7752266526222229,
"step": 290
},
{
"epoch": 0.35039133052378085,
"grad_norm": 0.6496568369594222,
"learning_rate": 9.585366524193648e-05,
"loss": 0.7995858788490295,
"step": 291
},
{
"epoch": 0.35159542444310654,
"grad_norm": 0.5903922954900828,
"learning_rate": 9.581388454995187e-05,
"loss": 0.7844491004943848,
"step": 292
},
{
"epoch": 0.3527995183624323,
"grad_norm": 0.649900135181538,
"learning_rate": 9.577392227181706e-05,
"loss": 0.7254087924957275,
"step": 293
},
{
"epoch": 0.354003612281758,
"grad_norm": 0.6727462360620303,
"learning_rate": 9.573377856592499e-05,
"loss": 0.7682409286499023,
"step": 294
},
{
"epoch": 0.35520770620108366,
"grad_norm": 0.6276787315652674,
"learning_rate": 9.569345359138771e-05,
"loss": 0.7973129153251648,
"step": 295
},
{
"epoch": 0.3564118001204094,
"grad_norm": 0.5608406404647831,
"learning_rate": 9.565294750803575e-05,
"loss": 0.7603083252906799,
"step": 296
},
{
"epoch": 0.3576158940397351,
"grad_norm": 0.5496790325518602,
"learning_rate": 9.561226047641745e-05,
"loss": 0.7687323689460754,
"step": 297
},
{
"epoch": 0.3588199879590608,
"grad_norm": 0.5616097936793484,
"learning_rate": 9.557139265779838e-05,
"loss": 0.776694118976593,
"step": 298
},
{
"epoch": 0.3600240818783865,
"grad_norm": 0.6531406072657592,
"learning_rate": 9.553034421416062e-05,
"loss": 0.8216550350189209,
"step": 299
},
{
"epoch": 0.3612281757977122,
"grad_norm": 0.5721508721189617,
"learning_rate": 9.548911530820226e-05,
"loss": 0.7561111450195312,
"step": 300
},
{
"epoch": 0.3624322697170379,
"grad_norm": 0.5824837418513067,
"learning_rate": 9.544770610333655e-05,
"loss": 0.7453889846801758,
"step": 301
},
{
"epoch": 0.36363636363636365,
"grad_norm": 0.5634396077904287,
"learning_rate": 9.540611676369145e-05,
"loss": 0.6875409483909607,
"step": 302
},
{
"epoch": 0.36484045755568933,
"grad_norm": 0.5849500369013592,
"learning_rate": 9.536434745410885e-05,
"loss": 0.7186317443847656,
"step": 303
},
{
"epoch": 0.3660445514750151,
"grad_norm": 0.5939765738642575,
"learning_rate": 9.5322398340144e-05,
"loss": 0.8197835087776184,
"step": 304
},
{
"epoch": 0.36724864539434077,
"grad_norm": 0.64306629173261,
"learning_rate": 9.528026958806477e-05,
"loss": 0.7928416728973389,
"step": 305
},
{
"epoch": 0.36845273931366646,
"grad_norm": 0.5549268774068403,
"learning_rate": 9.523796136485109e-05,
"loss": 0.6800048351287842,
"step": 306
},
{
"epoch": 0.3696568332329922,
"grad_norm": 0.5308696731387338,
"learning_rate": 9.519547383819416e-05,
"loss": 0.7719547152519226,
"step": 307
},
{
"epoch": 0.3708609271523179,
"grad_norm": 0.6662801838868186,
"learning_rate": 9.515280717649594e-05,
"loss": 0.7264106869697571,
"step": 308
},
{
"epoch": 0.3720650210716436,
"grad_norm": 0.5933085678392126,
"learning_rate": 9.510996154886839e-05,
"loss": 0.8447253704071045,
"step": 309
},
{
"epoch": 0.3732691149909693,
"grad_norm": 0.5531316218293344,
"learning_rate": 9.506693712513274e-05,
"loss": 0.7589705586433411,
"step": 310
},
{
"epoch": 0.374473208910295,
"grad_norm": 0.603459887836217,
"learning_rate": 9.502373407581897e-05,
"loss": 0.9212093949317932,
"step": 311
},
{
"epoch": 0.3756773028296207,
"grad_norm": 0.5767559783598122,
"learning_rate": 9.498035257216501e-05,
"loss": 0.8521254062652588,
"step": 312
},
{
"epoch": 0.37688139674894644,
"grad_norm": 0.5561371970294019,
"learning_rate": 9.493679278611616e-05,
"loss": 0.764145016670227,
"step": 313
},
{
"epoch": 0.37808549066827213,
"grad_norm": 0.5298149174186499,
"learning_rate": 9.489305489032425e-05,
"loss": 0.7402347326278687,
"step": 314
},
{
"epoch": 0.3792895845875978,
"grad_norm": 0.5235142194281388,
"learning_rate": 9.484913905814716e-05,
"loss": 0.7398232221603394,
"step": 315
},
{
"epoch": 0.38049367850692356,
"grad_norm": 0.5632871497547195,
"learning_rate": 9.4805045463648e-05,
"loss": 0.7935575246810913,
"step": 316
},
{
"epoch": 0.38169777242624925,
"grad_norm": 0.5820816300843275,
"learning_rate": 9.47607742815944e-05,
"loss": 0.7310054898262024,
"step": 317
},
{
"epoch": 0.38290186634557494,
"grad_norm": 0.5191642572711835,
"learning_rate": 9.471632568745794e-05,
"loss": 0.758508563041687,
"step": 318
},
{
"epoch": 0.3841059602649007,
"grad_norm": 0.6079805560804347,
"learning_rate": 9.467169985741337e-05,
"loss": 0.8284726142883301,
"step": 319
},
{
"epoch": 0.38531005418422637,
"grad_norm": 0.5980141887150925,
"learning_rate": 9.462689696833791e-05,
"loss": 0.7411810159683228,
"step": 320
},
{
"epoch": 0.38651414810355206,
"grad_norm": 0.48258653724680256,
"learning_rate": 9.458191719781056e-05,
"loss": 0.7031044960021973,
"step": 321
},
{
"epoch": 0.3877182420228778,
"grad_norm": 0.6225633581026373,
"learning_rate": 9.453676072411142e-05,
"loss": 0.7624230980873108,
"step": 322
},
{
"epoch": 0.3889223359422035,
"grad_norm": 0.55976593820311,
"learning_rate": 9.449142772622092e-05,
"loss": 0.7776026129722595,
"step": 323
},
{
"epoch": 0.3901264298615292,
"grad_norm": 0.5805339346207273,
"learning_rate": 9.444591838381922e-05,
"loss": 0.8038637638092041,
"step": 324
},
{
"epoch": 0.3913305237808549,
"grad_norm": 0.5837248578485592,
"learning_rate": 9.440023287728537e-05,
"loss": 0.8646640777587891,
"step": 325
},
{
"epoch": 0.3925346177001806,
"grad_norm": 0.6434430956932562,
"learning_rate": 9.435437138769672e-05,
"loss": 0.7498881816864014,
"step": 326
},
{
"epoch": 0.3937387116195063,
"grad_norm": 0.6004450235996133,
"learning_rate": 9.430833409682806e-05,
"loss": 0.8004214763641357,
"step": 327
},
{
"epoch": 0.39494280553883204,
"grad_norm": 0.6572172072860755,
"learning_rate": 9.426212118715108e-05,
"loss": 0.7760463356971741,
"step": 328
},
{
"epoch": 0.39614689945815773,
"grad_norm": 0.6308238250396566,
"learning_rate": 9.421573284183345e-05,
"loss": 0.8375146985054016,
"step": 329
},
{
"epoch": 0.3973509933774834,
"grad_norm": 0.6157113149433467,
"learning_rate": 9.416916924473825e-05,
"loss": 0.890914797782898,
"step": 330
},
{
"epoch": 0.39855508729680916,
"grad_norm": 0.5906990383341111,
"learning_rate": 9.412243058042315e-05,
"loss": 0.7460801601409912,
"step": 331
},
{
"epoch": 0.39975918121613485,
"grad_norm": 0.5562002948702587,
"learning_rate": 9.407551703413973e-05,
"loss": 0.7909257411956787,
"step": 332
},
{
"epoch": 0.40096327513546054,
"grad_norm": 0.520246518281898,
"learning_rate": 9.402842879183272e-05,
"loss": 0.7020269632339478,
"step": 333
},
{
"epoch": 0.4021673690547863,
"grad_norm": 0.627730167180195,
"learning_rate": 9.398116604013925e-05,
"loss": 0.7831474542617798,
"step": 334
},
{
"epoch": 0.40337146297411197,
"grad_norm": 0.5344410381831145,
"learning_rate": 9.393372896638816e-05,
"loss": 0.7443587183952332,
"step": 335
},
{
"epoch": 0.4045755568934377,
"grad_norm": 0.6042157074471304,
"learning_rate": 9.38861177585992e-05,
"loss": 0.7584066987037659,
"step": 336
},
{
"epoch": 0.4057796508127634,
"grad_norm": 0.5847615768814335,
"learning_rate": 9.383833260548233e-05,
"loss": 0.8609414100646973,
"step": 337
},
{
"epoch": 0.4069837447320891,
"grad_norm": 0.5622382695154348,
"learning_rate": 9.379037369643694e-05,
"loss": 0.7267239093780518,
"step": 338
},
{
"epoch": 0.40818783865141484,
"grad_norm": 0.5895609636792623,
"learning_rate": 9.374224122155111e-05,
"loss": 0.7831911444664001,
"step": 339
},
{
"epoch": 0.4093919325707405,
"grad_norm": 0.944345150650652,
"learning_rate": 9.369393537160089e-05,
"loss": 0.8276694416999817,
"step": 340
},
{
"epoch": 0.4105960264900662,
"grad_norm": 0.5093947879414813,
"learning_rate": 9.364545633804946e-05,
"loss": 0.7780739665031433,
"step": 341
},
{
"epoch": 0.41180012040939196,
"grad_norm": 0.568549199083402,
"learning_rate": 9.359680431304647e-05,
"loss": 0.7830153703689575,
"step": 342
},
{
"epoch": 0.41300421432871764,
"grad_norm": 0.6085826245045816,
"learning_rate": 9.354797948942719e-05,
"loss": 0.753618597984314,
"step": 343
},
{
"epoch": 0.41420830824804333,
"grad_norm": 0.6106136073955027,
"learning_rate": 9.349898206071186e-05,
"loss": 0.7702220678329468,
"step": 344
},
{
"epoch": 0.4154124021673691,
"grad_norm": 0.5509805230268469,
"learning_rate": 9.344981222110476e-05,
"loss": 0.819717526435852,
"step": 345
},
{
"epoch": 0.41661649608669477,
"grad_norm": 0.6223463566186167,
"learning_rate": 9.340047016549358e-05,
"loss": 0.7494675517082214,
"step": 346
},
{
"epoch": 0.41782059000602045,
"grad_norm": 0.613692765201492,
"learning_rate": 9.335095608944861e-05,
"loss": 0.7567797899246216,
"step": 347
},
{
"epoch": 0.4190246839253462,
"grad_norm": 0.6165249146181281,
"learning_rate": 9.330127018922194e-05,
"loss": 0.7949779629707336,
"step": 348
},
{
"epoch": 0.4202287778446719,
"grad_norm": 0.5897658535768848,
"learning_rate": 9.325141266174666e-05,
"loss": 0.7259105443954468,
"step": 349
},
{
"epoch": 0.4214328717639976,
"grad_norm": 0.5861335410071635,
"learning_rate": 9.320138370463618e-05,
"loss": 0.8436710238456726,
"step": 350
},
{
"epoch": 0.4226369656833233,
"grad_norm": 0.6206635975538285,
"learning_rate": 9.315118351618335e-05,
"loss": 0.822520911693573,
"step": 351
},
{
"epoch": 0.423841059602649,
"grad_norm": 0.5834071691310755,
"learning_rate": 9.310081229535968e-05,
"loss": 0.7531993389129639,
"step": 352
},
{
"epoch": 0.4250451535219747,
"grad_norm": 0.5501379471214572,
"learning_rate": 9.305027024181462e-05,
"loss": 0.7817683815956116,
"step": 353
},
{
"epoch": 0.42624924744130044,
"grad_norm": 0.5918807199612602,
"learning_rate": 9.29995575558747e-05,
"loss": 0.7909103631973267,
"step": 354
},
{
"epoch": 0.4274533413606261,
"grad_norm": 0.5265214658989071,
"learning_rate": 9.294867443854278e-05,
"loss": 0.730841875076294,
"step": 355
},
{
"epoch": 0.4286574352799518,
"grad_norm": 0.5787134950870648,
"learning_rate": 9.289762109149723e-05,
"loss": 0.8346636295318604,
"step": 356
},
{
"epoch": 0.42986152919927756,
"grad_norm": 0.5816052225271856,
"learning_rate": 9.284639771709112e-05,
"loss": 0.8406312465667725,
"step": 357
},
{
"epoch": 0.43106562311860325,
"grad_norm": 0.6192951790977504,
"learning_rate": 9.279500451835145e-05,
"loss": 0.7195670008659363,
"step": 358
},
{
"epoch": 0.43226971703792894,
"grad_norm": 0.5649438183167637,
"learning_rate": 9.274344169897834e-05,
"loss": 0.8138882517814636,
"step": 359
},
{
"epoch": 0.4334738109572547,
"grad_norm": 0.5319891543066664,
"learning_rate": 9.269170946334418e-05,
"loss": 0.8081603050231934,
"step": 360
},
{
"epoch": 0.43467790487658037,
"grad_norm": 0.638552666613924,
"learning_rate": 9.263980801649286e-05,
"loss": 0.7729615569114685,
"step": 361
},
{
"epoch": 0.43588199879590606,
"grad_norm": 0.7164438531076048,
"learning_rate": 9.258773756413898e-05,
"loss": 0.9047574400901794,
"step": 362
},
{
"epoch": 0.4370860927152318,
"grad_norm": 0.5450974052349067,
"learning_rate": 9.253549831266696e-05,
"loss": 0.7925511002540588,
"step": 363
},
{
"epoch": 0.4382901866345575,
"grad_norm": 0.5333374546278858,
"learning_rate": 9.248309046913032e-05,
"loss": 0.7976597547531128,
"step": 364
},
{
"epoch": 0.4394942805538832,
"grad_norm": 0.5224661281212655,
"learning_rate": 9.243051424125075e-05,
"loss": 0.7459884285926819,
"step": 365
},
{
"epoch": 0.4406983744732089,
"grad_norm": 0.5459657091037837,
"learning_rate": 9.237776983741735e-05,
"loss": 0.7713004946708679,
"step": 366
},
{
"epoch": 0.4419024683925346,
"grad_norm": 0.5719149619614183,
"learning_rate": 9.232485746668584e-05,
"loss": 0.816899836063385,
"step": 367
},
{
"epoch": 0.44310656231186035,
"grad_norm": 0.5823561778378619,
"learning_rate": 9.227177733877763e-05,
"loss": 0.7673546075820923,
"step": 368
},
{
"epoch": 0.44431065623118604,
"grad_norm": 0.5492936414986261,
"learning_rate": 9.221852966407909e-05,
"loss": 0.7557410001754761,
"step": 369
},
{
"epoch": 0.44551475015051173,
"grad_norm": 0.6408557113497547,
"learning_rate": 9.216511465364066e-05,
"loss": 0.8476218581199646,
"step": 370
},
{
"epoch": 0.4467188440698375,
"grad_norm": 0.6172862708094095,
"learning_rate": 9.2111532519176e-05,
"loss": 0.8076975345611572,
"step": 371
},
{
"epoch": 0.44792293798916316,
"grad_norm": 0.6297321011778938,
"learning_rate": 9.205778347306121e-05,
"loss": 0.8561894297599792,
"step": 372
},
{
"epoch": 0.44912703190848885,
"grad_norm": 0.7190904011514955,
"learning_rate": 9.200386772833394e-05,
"loss": 0.7783414125442505,
"step": 373
},
{
"epoch": 0.4503311258278146,
"grad_norm": 0.5593767357275289,
"learning_rate": 9.194978549869256e-05,
"loss": 0.6968886852264404,
"step": 374
},
{
"epoch": 0.4515352197471403,
"grad_norm": 0.5615898220032896,
"learning_rate": 9.189553699849531e-05,
"loss": 0.7256219387054443,
"step": 375
},
{
"epoch": 0.45273931366646597,
"grad_norm": 0.5662616923058666,
"learning_rate": 9.184112244275948e-05,
"loss": 0.7661458849906921,
"step": 376
},
{
"epoch": 0.4539434075857917,
"grad_norm": 0.5725640510920725,
"learning_rate": 9.178654204716048e-05,
"loss": 0.7258007526397705,
"step": 377
},
{
"epoch": 0.4551475015051174,
"grad_norm": 0.6209428990213935,
"learning_rate": 9.173179602803108e-05,
"loss": 0.766287088394165,
"step": 378
},
{
"epoch": 0.4563515954244431,
"grad_norm": 0.5765706455483566,
"learning_rate": 9.167688460236049e-05,
"loss": 0.6859148740768433,
"step": 379
},
{
"epoch": 0.45755568934376883,
"grad_norm": 0.5924278501045154,
"learning_rate": 9.162180798779354e-05,
"loss": 0.7121888995170593,
"step": 380
},
{
"epoch": 0.4587597832630945,
"grad_norm": 0.5864758105935457,
"learning_rate": 9.156656640262975e-05,
"loss": 0.7574361562728882,
"step": 381
},
{
"epoch": 0.4599638771824202,
"grad_norm": 0.6056336103242377,
"learning_rate": 9.151116006582259e-05,
"loss": 0.8138922452926636,
"step": 382
},
{
"epoch": 0.46116797110174595,
"grad_norm": 0.6514266482602168,
"learning_rate": 9.145558919697844e-05,
"loss": 0.8442139625549316,
"step": 383
},
{
"epoch": 0.46237206502107164,
"grad_norm": 0.6042841510601016,
"learning_rate": 9.139985401635587e-05,
"loss": 0.7353885173797607,
"step": 384
},
{
"epoch": 0.46357615894039733,
"grad_norm": 0.5761011230486508,
"learning_rate": 9.13439547448647e-05,
"loss": 0.7756004333496094,
"step": 385
},
{
"epoch": 0.4647802528597231,
"grad_norm": 0.5495187483625902,
"learning_rate": 9.128789160406512e-05,
"loss": 0.7558329105377197,
"step": 386
},
{
"epoch": 0.46598434677904876,
"grad_norm": 0.5519235417262857,
"learning_rate": 9.123166481616687e-05,
"loss": 0.7433863282203674,
"step": 387
},
{
"epoch": 0.46718844069837445,
"grad_norm": 0.544452007225767,
"learning_rate": 9.117527460402826e-05,
"loss": 0.7605905532836914,
"step": 388
},
{
"epoch": 0.4683925346177002,
"grad_norm": 0.5977968855932455,
"learning_rate": 9.111872119115537e-05,
"loss": 0.7053298950195312,
"step": 389
},
{
"epoch": 0.4695966285370259,
"grad_norm": 0.5052512812965578,
"learning_rate": 9.106200480170113e-05,
"loss": 0.8009657263755798,
"step": 390
},
{
"epoch": 0.4708007224563516,
"grad_norm": 0.5692785579794742,
"learning_rate": 9.100512566046444e-05,
"loss": 0.7023915648460388,
"step": 391
},
{
"epoch": 0.4720048163756773,
"grad_norm": 0.5916944517293747,
"learning_rate": 9.094808399288927e-05,
"loss": 0.8082914352416992,
"step": 392
},
{
"epoch": 0.473208910295003,
"grad_norm": 0.5809105407389136,
"learning_rate": 9.089088002506379e-05,
"loss": 0.6799631118774414,
"step": 393
},
{
"epoch": 0.4744130042143287,
"grad_norm": 0.6109564629035809,
"learning_rate": 9.083351398371944e-05,
"loss": 0.7526841759681702,
"step": 394
},
{
"epoch": 0.47561709813365444,
"grad_norm": 0.5883358248629554,
"learning_rate": 9.077598609623006e-05,
"loss": 0.7367865443229675,
"step": 395
},
{
"epoch": 0.4768211920529801,
"grad_norm": 0.7262416033015108,
"learning_rate": 9.071829659061097e-05,
"loss": 0.826897382736206,
"step": 396
},
{
"epoch": 0.4780252859723058,
"grad_norm": 0.5615108473838226,
"learning_rate": 9.066044569551807e-05,
"loss": 0.7454221248626709,
"step": 397
},
{
"epoch": 0.47922937989163156,
"grad_norm": 0.5688987580359471,
"learning_rate": 9.060243364024692e-05,
"loss": 0.7147552967071533,
"step": 398
},
{
"epoch": 0.48043347381095725,
"grad_norm": 0.5756967620004046,
"learning_rate": 9.054426065473192e-05,
"loss": 0.8094872236251831,
"step": 399
},
{
"epoch": 0.481637567730283,
"grad_norm": 0.5597229238751422,
"learning_rate": 9.048592696954524e-05,
"loss": 0.7492781281471252,
"step": 400
},
{
"epoch": 0.4828416616496087,
"grad_norm": 0.5614528171918515,
"learning_rate": 9.042743281589605e-05,
"loss": 0.7033244967460632,
"step": 401
},
{
"epoch": 0.48404575556893437,
"grad_norm": 0.5838529800692324,
"learning_rate": 9.036877842562952e-05,
"loss": 0.756244957447052,
"step": 402
},
{
"epoch": 0.4852498494882601,
"grad_norm": 0.5380290614149196,
"learning_rate": 9.030996403122592e-05,
"loss": 0.7965587377548218,
"step": 403
},
{
"epoch": 0.4864539434075858,
"grad_norm": 0.6225423144307125,
"learning_rate": 9.025098986579975e-05,
"loss": 0.7602047324180603,
"step": 404
},
{
"epoch": 0.4876580373269115,
"grad_norm": 0.5453305974936316,
"learning_rate": 9.019185616309871e-05,
"loss": 0.7680432796478271,
"step": 405
},
{
"epoch": 0.48886213124623723,
"grad_norm": 0.5609975583465159,
"learning_rate": 9.013256315750291e-05,
"loss": 0.7896559834480286,
"step": 406
},
{
"epoch": 0.4900662251655629,
"grad_norm": 0.5548846029745622,
"learning_rate": 9.007311108402379e-05,
"loss": 0.834828794002533,
"step": 407
},
{
"epoch": 0.4912703190848886,
"grad_norm": 0.5833075178168359,
"learning_rate": 9.001350017830331e-05,
"loss": 0.7566395998001099,
"step": 408
},
{
"epoch": 0.49247441300421435,
"grad_norm": 0.5609827683617111,
"learning_rate": 8.995373067661296e-05,
"loss": 0.8615512251853943,
"step": 409
},
{
"epoch": 0.49367850692354004,
"grad_norm": 0.5709117378373297,
"learning_rate": 8.989380281585287e-05,
"loss": 0.8020814657211304,
"step": 410
},
{
"epoch": 0.4948826008428657,
"grad_norm": 0.5255264602267204,
"learning_rate": 8.983371683355075e-05,
"loss": 0.7462515234947205,
"step": 411
},
{
"epoch": 0.49608669476219147,
"grad_norm": 0.48719953187362913,
"learning_rate": 8.977347296786113e-05,
"loss": 0.7431973814964294,
"step": 412
},
{
"epoch": 0.49729078868151716,
"grad_norm": 0.547147139382234,
"learning_rate": 8.971307145756426e-05,
"loss": 0.7564660906791687,
"step": 413
},
{
"epoch": 0.49849488260084285,
"grad_norm": 0.5596198093447211,
"learning_rate": 8.965251254206524e-05,
"loss": 0.7865108847618103,
"step": 414
},
{
"epoch": 0.4996989765201686,
"grad_norm": 0.5645497006901606,
"learning_rate": 8.959179646139306e-05,
"loss": 0.7137467861175537,
"step": 415
},
{
"epoch": 0.5009030704394943,
"grad_norm": 0.5134547551858623,
"learning_rate": 8.953092345619964e-05,
"loss": 0.719763994216919,
"step": 416
},
{
"epoch": 0.50210716435882,
"grad_norm": 0.6104010003384248,
"learning_rate": 8.94698937677589e-05,
"loss": 0.7604578733444214,
"step": 417
},
{
"epoch": 0.5033112582781457,
"grad_norm": 0.5439651203338742,
"learning_rate": 8.940870763796574e-05,
"loss": 0.7409037351608276,
"step": 418
},
{
"epoch": 0.5045153521974715,
"grad_norm": 0.5197572864894027,
"learning_rate": 8.934736530933519e-05,
"loss": 0.7605472803115845,
"step": 419
},
{
"epoch": 0.5057194461167971,
"grad_norm": 0.5353143318938045,
"learning_rate": 8.928586702500128e-05,
"loss": 0.7507858276367188,
"step": 420
},
{
"epoch": 0.5069235400361228,
"grad_norm": 0.5637812105352008,
"learning_rate": 8.922421302871629e-05,
"loss": 0.7504753470420837,
"step": 421
},
{
"epoch": 0.5081276339554486,
"grad_norm": 0.5649396855266844,
"learning_rate": 8.916240356484961e-05,
"loss": 0.7545074820518494,
"step": 422
},
{
"epoch": 0.5093317278747742,
"grad_norm": 0.5829178160113707,
"learning_rate": 8.910043887838686e-05,
"loss": 0.7903565764427185,
"step": 423
},
{
"epoch": 0.5105358217941,
"grad_norm": 0.6218209307526726,
"learning_rate": 8.903831921492889e-05,
"loss": 0.8612103462219238,
"step": 424
},
{
"epoch": 0.5117399157134257,
"grad_norm": 0.6387963390219014,
"learning_rate": 8.89760448206908e-05,
"loss": 0.8914673924446106,
"step": 425
},
{
"epoch": 0.5129440096327513,
"grad_norm": 0.6087141281483449,
"learning_rate": 8.8913615942501e-05,
"loss": 0.8402252793312073,
"step": 426
},
{
"epoch": 0.5141481035520771,
"grad_norm": 0.6106105673585972,
"learning_rate": 8.885103282780016e-05,
"loss": 0.8295918107032776,
"step": 427
},
{
"epoch": 0.5153521974714028,
"grad_norm": 0.5214324089267341,
"learning_rate": 8.878829572464034e-05,
"loss": 0.796715259552002,
"step": 428
},
{
"epoch": 0.5165562913907285,
"grad_norm": 0.5637780696473946,
"learning_rate": 8.872540488168389e-05,
"loss": 0.6490369439125061,
"step": 429
},
{
"epoch": 0.5177603853100542,
"grad_norm": 0.6656062762101366,
"learning_rate": 8.866236054820251e-05,
"loss": 0.7382924556732178,
"step": 430
},
{
"epoch": 0.5189644792293799,
"grad_norm": 0.6229872811322728,
"learning_rate": 8.859916297407636e-05,
"loss": 0.8123342394828796,
"step": 431
},
{
"epoch": 0.5201685731487056,
"grad_norm": 0.5340311114394617,
"learning_rate": 8.853581240979285e-05,
"loss": 0.8523218035697937,
"step": 432
},
{
"epoch": 0.5213726670680313,
"grad_norm": 0.5667729939597453,
"learning_rate": 8.847230910644586e-05,
"loss": 0.7326020002365112,
"step": 433
},
{
"epoch": 0.5225767609873571,
"grad_norm": 0.5482598087283126,
"learning_rate": 8.840865331573465e-05,
"loss": 0.8485729098320007,
"step": 434
},
{
"epoch": 0.5237808549066827,
"grad_norm": 0.5374391259559329,
"learning_rate": 8.834484528996287e-05,
"loss": 0.7684690952301025,
"step": 435
},
{
"epoch": 0.5249849488260084,
"grad_norm": 0.5767084046670321,
"learning_rate": 8.828088528203753e-05,
"loss": 0.8233696818351746,
"step": 436
},
{
"epoch": 0.5261890427453342,
"grad_norm": 0.5420921694939217,
"learning_rate": 8.821677354546807e-05,
"loss": 0.7202430963516235,
"step": 437
},
{
"epoch": 0.5273931366646598,
"grad_norm": 0.6105133980701312,
"learning_rate": 8.815251033436531e-05,
"loss": 0.8520496487617493,
"step": 438
},
{
"epoch": 0.5285972305839856,
"grad_norm": 0.5302524197197112,
"learning_rate": 8.808809590344042e-05,
"loss": 0.7050381898880005,
"step": 439
},
{
"epoch": 0.5298013245033113,
"grad_norm": 0.5771515531186625,
"learning_rate": 8.802353050800398e-05,
"loss": 0.7315058708190918,
"step": 440
},
{
"epoch": 0.5310054184226369,
"grad_norm": 0.548741589615364,
"learning_rate": 8.795881440396491e-05,
"loss": 0.7780054807662964,
"step": 441
},
{
"epoch": 0.5322095123419627,
"grad_norm": 0.544655477761206,
"learning_rate": 8.789394784782945e-05,
"loss": 0.7249109148979187,
"step": 442
},
{
"epoch": 0.5334136062612884,
"grad_norm": 0.5540166863806754,
"learning_rate": 8.78289310967002e-05,
"loss": 0.7666537165641785,
"step": 443
},
{
"epoch": 0.534617700180614,
"grad_norm": 0.5636852062723088,
"learning_rate": 8.776376440827505e-05,
"loss": 0.6913390159606934,
"step": 444
},
{
"epoch": 0.5358217940999398,
"grad_norm": 0.6448340062856655,
"learning_rate": 8.769844804084619e-05,
"loss": 0.7798296213150024,
"step": 445
},
{
"epoch": 0.5370258880192655,
"grad_norm": 0.5149311000198883,
"learning_rate": 8.763298225329903e-05,
"loss": 0.7092111110687256,
"step": 446
},
{
"epoch": 0.5382299819385912,
"grad_norm": 0.5363101206709715,
"learning_rate": 8.756736730511128e-05,
"loss": 0.8767518401145935,
"step": 447
},
{
"epoch": 0.5394340758579169,
"grad_norm": 0.5282501825840302,
"learning_rate": 8.750160345635183e-05,
"loss": 0.7216475009918213,
"step": 448
},
{
"epoch": 0.5406381697772427,
"grad_norm": 0.5425626987052361,
"learning_rate": 8.74356909676797e-05,
"loss": 0.7362173199653625,
"step": 449
},
{
"epoch": 0.5418422636965683,
"grad_norm": 0.5366839210544753,
"learning_rate": 8.736963010034311e-05,
"loss": 0.8743109107017517,
"step": 450
},
{
"epoch": 0.543046357615894,
"grad_norm": 0.5679692157639264,
"learning_rate": 8.73034211161784e-05,
"loss": 0.780762791633606,
"step": 451
},
{
"epoch": 0.5442504515352198,
"grad_norm": 0.4983927207727879,
"learning_rate": 8.723706427760892e-05,
"loss": 0.8108317852020264,
"step": 452
},
{
"epoch": 0.5454545454545454,
"grad_norm": 0.5557144789278132,
"learning_rate": 8.717055984764411e-05,
"loss": 0.8065282702445984,
"step": 453
},
{
"epoch": 0.5466586393738712,
"grad_norm": 0.5216018109164337,
"learning_rate": 8.710390808987833e-05,
"loss": 0.6993690729141235,
"step": 454
},
{
"epoch": 0.5478627332931969,
"grad_norm": 0.5606971368620551,
"learning_rate": 8.703710926848995e-05,
"loss": 0.7065219283103943,
"step": 455
},
{
"epoch": 0.5490668272125225,
"grad_norm": 0.5577030499855022,
"learning_rate": 8.697016364824023e-05,
"loss": 0.7574766874313354,
"step": 456
},
{
"epoch": 0.5502709211318483,
"grad_norm": 0.537041894590591,
"learning_rate": 8.690307149447221e-05,
"loss": 0.7309029698371887,
"step": 457
},
{
"epoch": 0.551475015051174,
"grad_norm": 0.5731425233928719,
"learning_rate": 8.683583307310978e-05,
"loss": 0.793787956237793,
"step": 458
},
{
"epoch": 0.5526791089704997,
"grad_norm": 0.5283494585175387,
"learning_rate": 8.676844865065659e-05,
"loss": 0.6971169114112854,
"step": 459
},
{
"epoch": 0.5538832028898254,
"grad_norm": 0.5874759003745078,
"learning_rate": 8.67009184941949e-05,
"loss": 0.7948325872421265,
"step": 460
},
{
"epoch": 0.5550872968091511,
"grad_norm": 0.5573331677427176,
"learning_rate": 8.663324287138469e-05,
"loss": 0.8879362344741821,
"step": 461
},
{
"epoch": 0.5562913907284768,
"grad_norm": 0.6214491273546168,
"learning_rate": 8.656542205046243e-05,
"loss": 0.7630327939987183,
"step": 462
},
{
"epoch": 0.5574954846478025,
"grad_norm": 0.6231045323942624,
"learning_rate": 8.64974563002401e-05,
"loss": 0.7660893797874451,
"step": 463
},
{
"epoch": 0.5586995785671283,
"grad_norm": 0.5678786786218427,
"learning_rate": 8.642934589010414e-05,
"loss": 0.8774542808532715,
"step": 464
},
{
"epoch": 0.5599036724864539,
"grad_norm": 0.6671183114861469,
"learning_rate": 8.636109109001438e-05,
"loss": 0.8003222346305847,
"step": 465
},
{
"epoch": 0.5611077664057796,
"grad_norm": 0.585215674412474,
"learning_rate": 8.629269217050289e-05,
"loss": 0.7785161733627319,
"step": 466
},
{
"epoch": 0.5623118603251054,
"grad_norm": 0.5431169098112358,
"learning_rate": 8.6224149402673e-05,
"loss": 0.7165198922157288,
"step": 467
},
{
"epoch": 0.563515954244431,
"grad_norm": 0.5307201420800518,
"learning_rate": 8.61554630581982e-05,
"loss": 0.7273076772689819,
"step": 468
},
{
"epoch": 0.5647200481637568,
"grad_norm": 0.570313651414482,
"learning_rate": 8.608663340932104e-05,
"loss": 0.812558650970459,
"step": 469
},
{
"epoch": 0.5659241420830825,
"grad_norm": 0.5044626155006432,
"learning_rate": 8.601766072885204e-05,
"loss": 0.7102110385894775,
"step": 470
},
{
"epoch": 0.5671282360024081,
"grad_norm": 0.5478570075366566,
"learning_rate": 8.594854529016872e-05,
"loss": 0.785699725151062,
"step": 471
},
{
"epoch": 0.5683323299217339,
"grad_norm": 0.5781025269640302,
"learning_rate": 8.587928736721432e-05,
"loss": 0.8369163274765015,
"step": 472
},
{
"epoch": 0.5695364238410596,
"grad_norm": 0.5601783407883638,
"learning_rate": 8.580988723449688e-05,
"loss": 0.7228402495384216,
"step": 473
},
{
"epoch": 0.5707405177603853,
"grad_norm": 0.545526634078626,
"learning_rate": 8.574034516708814e-05,
"loss": 0.7141140699386597,
"step": 474
},
{
"epoch": 0.571944611679711,
"grad_norm": 0.5065095422123954,
"learning_rate": 8.567066144062232e-05,
"loss": 0.6694948673248291,
"step": 475
},
{
"epoch": 0.5731487055990367,
"grad_norm": 0.583153560561957,
"learning_rate": 8.560083633129519e-05,
"loss": 0.6797178387641907,
"step": 476
},
{
"epoch": 0.5743527995183624,
"grad_norm": 0.5353291979784289,
"learning_rate": 8.553087011586284e-05,
"loss": 0.7320327162742615,
"step": 477
},
{
"epoch": 0.5755568934376881,
"grad_norm": 0.5405490064071823,
"learning_rate": 8.546076307164068e-05,
"loss": 0.6791343688964844,
"step": 478
},
{
"epoch": 0.5767609873570139,
"grad_norm": 0.6177363789709537,
"learning_rate": 8.53905154765023e-05,
"loss": 0.6731370091438293,
"step": 479
},
{
"epoch": 0.5779650812763396,
"grad_norm": 0.587188606703384,
"learning_rate": 8.532012760887837e-05,
"loss": 0.7344556450843811,
"step": 480
},
{
"epoch": 0.5791691751956652,
"grad_norm": 0.5381712982934647,
"learning_rate": 8.524959974775551e-05,
"loss": 0.6820102334022522,
"step": 481
},
{
"epoch": 0.580373269114991,
"grad_norm": 0.5813804980616727,
"learning_rate": 8.517893217267525e-05,
"loss": 0.6940742135047913,
"step": 482
},
{
"epoch": 0.5815773630343167,
"grad_norm": 0.5307198642693565,
"learning_rate": 8.510812516373288e-05,
"loss": 0.7252935767173767,
"step": 483
},
{
"epoch": 0.5827814569536424,
"grad_norm": 0.6349127487825815,
"learning_rate": 8.503717900157632e-05,
"loss": 0.7607162594795227,
"step": 484
},
{
"epoch": 0.5839855508729681,
"grad_norm": 0.6572581702723835,
"learning_rate": 8.496609396740506e-05,
"loss": 0.7956105470657349,
"step": 485
},
{
"epoch": 0.5851896447922939,
"grad_norm": 0.5505925649699112,
"learning_rate": 8.489487034296902e-05,
"loss": 0.7797254323959351,
"step": 486
},
{
"epoch": 0.5863937387116195,
"grad_norm": 0.5625295583685044,
"learning_rate": 8.482350841056737e-05,
"loss": 0.726678729057312,
"step": 487
},
{
"epoch": 0.5875978326309452,
"grad_norm": 0.6290241986823146,
"learning_rate": 8.475200845304758e-05,
"loss": 0.8688774704933167,
"step": 488
},
{
"epoch": 0.588801926550271,
"grad_norm": 0.5393301616881966,
"learning_rate": 8.468037075380408e-05,
"loss": 0.6241949796676636,
"step": 489
},
{
"epoch": 0.5900060204695966,
"grad_norm": 0.5311210590721807,
"learning_rate": 8.460859559677734e-05,
"loss": 0.6885173320770264,
"step": 490
},
{
"epoch": 0.5912101143889223,
"grad_norm": 0.5218045008876208,
"learning_rate": 8.453668326645259e-05,
"loss": 0.7178826928138733,
"step": 491
},
{
"epoch": 0.5924142083082481,
"grad_norm": 0.5713951728613547,
"learning_rate": 8.446463404785875e-05,
"loss": 0.8139700889587402,
"step": 492
},
{
"epoch": 0.5936183022275737,
"grad_norm": 0.5563435445452487,
"learning_rate": 8.43924482265674e-05,
"loss": 0.7864251136779785,
"step": 493
},
{
"epoch": 0.5948223961468995,
"grad_norm": 0.5122495450523467,
"learning_rate": 8.432012608869141e-05,
"loss": 0.7013481855392456,
"step": 494
},
{
"epoch": 0.5960264900662252,
"grad_norm": 0.5210851560064924,
"learning_rate": 8.424766792088408e-05,
"loss": 0.6481162309646606,
"step": 495
},
{
"epoch": 0.5972305839855508,
"grad_norm": 0.552284790130884,
"learning_rate": 8.417507401033779e-05,
"loss": 0.8301807641983032,
"step": 496
},
{
"epoch": 0.5984346779048766,
"grad_norm": 0.5596842624663915,
"learning_rate": 8.410234464478297e-05,
"loss": 0.8332841396331787,
"step": 497
},
{
"epoch": 0.5996387718242023,
"grad_norm": 0.4748866291903979,
"learning_rate": 8.402948011248692e-05,
"loss": 0.6934956312179565,
"step": 498
},
{
"epoch": 0.600842865743528,
"grad_norm": 0.5664132495739204,
"learning_rate": 8.395648070225272e-05,
"loss": 0.8205459713935852,
"step": 499
},
{
"epoch": 0.6020469596628537,
"grad_norm": 0.5655754830551384,
"learning_rate": 8.388334670341805e-05,
"loss": 0.7597204446792603,
"step": 500
},
{
"epoch": 0.6032510535821795,
"grad_norm": 0.5456716872518325,
"learning_rate": 8.381007840585395e-05,
"loss": 0.7730255126953125,
"step": 501
},
{
"epoch": 0.6044551475015051,
"grad_norm": 0.5413433172388354,
"learning_rate": 8.373667609996387e-05,
"loss": 0.7178712487220764,
"step": 502
},
{
"epoch": 0.6056592414208308,
"grad_norm": 0.5420830010757542,
"learning_rate": 8.366314007668235e-05,
"loss": 0.6995259523391724,
"step": 503
},
{
"epoch": 0.6068633353401566,
"grad_norm": 0.4922977060080209,
"learning_rate": 8.358947062747397e-05,
"loss": 0.7817350625991821,
"step": 504
},
{
"epoch": 0.6080674292594822,
"grad_norm": 0.5822315121920465,
"learning_rate": 8.351566804433207e-05,
"loss": 0.8230698704719543,
"step": 505
},
{
"epoch": 0.609271523178808,
"grad_norm": 0.5215831606763998,
"learning_rate": 8.344173261977777e-05,
"loss": 0.7596744298934937,
"step": 506
},
{
"epoch": 0.6104756170981337,
"grad_norm": 0.567461405001868,
"learning_rate": 8.336766464685869e-05,
"loss": 0.7933334708213806,
"step": 507
},
{
"epoch": 0.6116797110174593,
"grad_norm": 0.5889715242341521,
"learning_rate": 8.329346441914774e-05,
"loss": 0.8186151385307312,
"step": 508
},
{
"epoch": 0.6128838049367851,
"grad_norm": 0.5185763633504242,
"learning_rate": 8.321913223074212e-05,
"loss": 0.7934544086456299,
"step": 509
},
{
"epoch": 0.6140878988561108,
"grad_norm": 0.5511567159839881,
"learning_rate": 8.314466837626205e-05,
"loss": 0.7858798503875732,
"step": 510
},
{
"epoch": 0.6152919927754364,
"grad_norm": 0.5730508983694548,
"learning_rate": 8.307007315084958e-05,
"loss": 0.7628536224365234,
"step": 511
},
{
"epoch": 0.6164960866947622,
"grad_norm": 0.5924347050159982,
"learning_rate": 8.299534685016747e-05,
"loss": 0.8055329322814941,
"step": 512
},
{
"epoch": 0.6177001806140879,
"grad_norm": 0.6096498357709286,
"learning_rate": 8.292048977039801e-05,
"loss": 0.8355273008346558,
"step": 513
},
{
"epoch": 0.6189042745334136,
"grad_norm": 0.5492309824373928,
"learning_rate": 8.284550220824187e-05,
"loss": 0.6998053789138794,
"step": 514
},
{
"epoch": 0.6201083684527393,
"grad_norm": 0.520941643837888,
"learning_rate": 8.277038446091683e-05,
"loss": 0.6920537948608398,
"step": 515
},
{
"epoch": 0.621312462372065,
"grad_norm": 0.5057283105220394,
"learning_rate": 8.269513682615672e-05,
"loss": 0.7318291664123535,
"step": 516
},
{
"epoch": 0.6225165562913907,
"grad_norm": 0.6088069875770302,
"learning_rate": 8.261975960221017e-05,
"loss": 0.7531617879867554,
"step": 517
},
{
"epoch": 0.6237206502107164,
"grad_norm": 0.534726366599209,
"learning_rate": 8.254425308783944e-05,
"loss": 0.6863809823989868,
"step": 518
},
{
"epoch": 0.6249247441300422,
"grad_norm": 0.5927917820962418,
"learning_rate": 8.246861758231925e-05,
"loss": 0.6661443114280701,
"step": 519
},
{
"epoch": 0.6261288380493678,
"grad_norm": 0.5809166240496674,
"learning_rate": 8.239285338543558e-05,
"loss": 0.7542550563812256,
"step": 520
},
{
"epoch": 0.6273329319686936,
"grad_norm": 0.6225849001672006,
"learning_rate": 8.23169607974845e-05,
"loss": 0.7155928611755371,
"step": 521
},
{
"epoch": 0.6285370258880193,
"grad_norm": 0.5194276411439794,
"learning_rate": 8.224094011927091e-05,
"loss": 0.7266699075698853,
"step": 522
},
{
"epoch": 0.6297411198073449,
"grad_norm": 0.5517873773853348,
"learning_rate": 8.216479165210748e-05,
"loss": 0.7413134574890137,
"step": 523
},
{
"epoch": 0.6309452137266707,
"grad_norm": 0.5339243729900964,
"learning_rate": 8.208851569781335e-05,
"loss": 0.7612078785896301,
"step": 524
},
{
"epoch": 0.6321493076459964,
"grad_norm": 0.6013511837590858,
"learning_rate": 8.201211255871293e-05,
"loss": 0.7581101655960083,
"step": 525
},
{
"epoch": 0.633353401565322,
"grad_norm": 0.5291974250735563,
"learning_rate": 8.193558253763478e-05,
"loss": 0.7113319635391235,
"step": 526
},
{
"epoch": 0.6345574954846478,
"grad_norm": 0.6185893376455726,
"learning_rate": 8.185892593791034e-05,
"loss": 0.8097169995307922,
"step": 527
},
{
"epoch": 0.6357615894039735,
"grad_norm": 0.5710668028977469,
"learning_rate": 8.178214306337278e-05,
"loss": 0.7629044055938721,
"step": 528
},
{
"epoch": 0.6369656833232992,
"grad_norm": 0.590842018166521,
"learning_rate": 8.170523421835572e-05,
"loss": 0.6748006343841553,
"step": 529
},
{
"epoch": 0.6381697772426249,
"grad_norm": 0.5232863047530079,
"learning_rate": 8.162819970769211e-05,
"loss": 0.729299783706665,
"step": 530
},
{
"epoch": 0.6393738711619507,
"grad_norm": 0.5110057849737956,
"learning_rate": 8.155103983671297e-05,
"loss": 0.8379458785057068,
"step": 531
},
{
"epoch": 0.6405779650812763,
"grad_norm": 0.615473066825679,
"learning_rate": 8.14737549112462e-05,
"loss": 0.7374885082244873,
"step": 532
},
{
"epoch": 0.641782059000602,
"grad_norm": 0.4711847774090933,
"learning_rate": 8.139634523761537e-05,
"loss": 0.7441030740737915,
"step": 533
},
{
"epoch": 0.6429861529199278,
"grad_norm": 0.5630505830242891,
"learning_rate": 8.131881112263845e-05,
"loss": 0.760844349861145,
"step": 534
},
{
"epoch": 0.6441902468392534,
"grad_norm": 0.4902045102048299,
"learning_rate": 8.12411528736267e-05,
"loss": 0.747408390045166,
"step": 535
},
{
"epoch": 0.6453943407585792,
"grad_norm": 0.5239339457722322,
"learning_rate": 8.116337079838337e-05,
"loss": 0.6766310334205627,
"step": 536
},
{
"epoch": 0.6465984346779049,
"grad_norm": 0.5639067025890481,
"learning_rate": 8.10854652052025e-05,
"loss": 0.7553038001060486,
"step": 537
},
{
"epoch": 0.6478025285972305,
"grad_norm": 0.5652729576179896,
"learning_rate": 8.100743640286768e-05,
"loss": 0.7927436828613281,
"step": 538
},
{
"epoch": 0.6490066225165563,
"grad_norm": 0.5899930998288447,
"learning_rate": 8.092928470065091e-05,
"loss": 0.746442437171936,
"step": 539
},
{
"epoch": 0.650210716435882,
"grad_norm": 0.5734926100588223,
"learning_rate": 8.085101040831122e-05,
"loss": 0.7299135327339172,
"step": 540
},
{
"epoch": 0.6514148103552077,
"grad_norm": 0.5445239473427346,
"learning_rate": 8.077261383609363e-05,
"loss": 0.7429971694946289,
"step": 541
},
{
"epoch": 0.6526189042745334,
"grad_norm": 0.5377764536655968,
"learning_rate": 8.069409529472774e-05,
"loss": 0.6965886354446411,
"step": 542
},
{
"epoch": 0.6538229981938591,
"grad_norm": 0.5495556573143111,
"learning_rate": 8.061545509542663e-05,
"loss": 0.8017982244491577,
"step": 543
},
{
"epoch": 0.6550270921131849,
"grad_norm": 0.6319154675647844,
"learning_rate": 8.05366935498856e-05,
"loss": 0.8424440026283264,
"step": 544
},
{
"epoch": 0.6562311860325105,
"grad_norm": 0.5851580380009528,
"learning_rate": 8.045781097028083e-05,
"loss": 0.7136452198028564,
"step": 545
},
{
"epoch": 0.6574352799518363,
"grad_norm": 0.5768159366295683,
"learning_rate": 8.037880766926833e-05,
"loss": 0.7658728957176208,
"step": 546
},
{
"epoch": 0.658639373871162,
"grad_norm": 0.5153849506337413,
"learning_rate": 8.02996839599825e-05,
"loss": 0.7741184234619141,
"step": 547
},
{
"epoch": 0.6598434677904876,
"grad_norm": 0.5439727935199963,
"learning_rate": 8.022044015603505e-05,
"loss": 0.682166337966919,
"step": 548
},
{
"epoch": 0.6610475617098134,
"grad_norm": 0.6200730052850133,
"learning_rate": 8.014107657151369e-05,
"loss": 0.7769534587860107,
"step": 549
},
{
"epoch": 0.6622516556291391,
"grad_norm": 0.8898978861581679,
"learning_rate": 8.006159352098082e-05,
"loss": 0.8437709808349609,
"step": 550
},
{
"epoch": 0.6634557495484648,
"grad_norm": 0.5710658960553652,
"learning_rate": 7.998199131947247e-05,
"loss": 0.7517293691635132,
"step": 551
},
{
"epoch": 0.6646598434677905,
"grad_norm": 0.5766184289770093,
"learning_rate": 7.990227028249678e-05,
"loss": 0.767498254776001,
"step": 552
},
{
"epoch": 0.6658639373871162,
"grad_norm": 0.48344386565097125,
"learning_rate": 7.982243072603306e-05,
"loss": 0.7911073565483093,
"step": 553
},
{
"epoch": 0.6670680313064419,
"grad_norm": 0.5720685454250555,
"learning_rate": 7.974247296653028e-05,
"loss": 0.7882490158081055,
"step": 554
},
{
"epoch": 0.6682721252257676,
"grad_norm": 0.551845274541981,
"learning_rate": 7.966239732090592e-05,
"loss": 0.7455636262893677,
"step": 555
},
{
"epoch": 0.6694762191450934,
"grad_norm": 0.5008402819182415,
"learning_rate": 7.958220410654475e-05,
"loss": 0.6583361029624939,
"step": 556
},
{
"epoch": 0.670680313064419,
"grad_norm": 0.629379844851495,
"learning_rate": 7.95018936412975e-05,
"loss": 0.8074872493743896,
"step": 557
},
{
"epoch": 0.6718844069837447,
"grad_norm": 0.5046050642514287,
"learning_rate": 7.942146624347964e-05,
"loss": 0.7117890119552612,
"step": 558
},
{
"epoch": 0.6730885009030705,
"grad_norm": 0.5606691219626582,
"learning_rate": 7.93409222318701e-05,
"loss": 0.776159942150116,
"step": 559
},
{
"epoch": 0.6742925948223961,
"grad_norm": 0.5675319565450898,
"learning_rate": 7.926026192571007e-05,
"loss": 0.7273181676864624,
"step": 560
},
{
"epoch": 0.6754966887417219,
"grad_norm": 0.6807229006452853,
"learning_rate": 7.917948564470157e-05,
"loss": 0.7717639207839966,
"step": 561
},
{
"epoch": 0.6767007826610476,
"grad_norm": 0.5643784281894073,
"learning_rate": 7.909859370900642e-05,
"loss": 0.850112795829773,
"step": 562
},
{
"epoch": 0.6779048765803732,
"grad_norm": 0.5624356458337382,
"learning_rate": 7.901758643924475e-05,
"loss": 0.717825174331665,
"step": 563
},
{
"epoch": 0.679108970499699,
"grad_norm": 0.5779451560266392,
"learning_rate": 7.893646415649384e-05,
"loss": 0.6891994476318359,
"step": 564
},
{
"epoch": 0.6803130644190247,
"grad_norm": 0.5460823910922556,
"learning_rate": 7.88552271822869e-05,
"loss": 0.7132343649864197,
"step": 565
},
{
"epoch": 0.6815171583383504,
"grad_norm": 0.5463816904759803,
"learning_rate": 7.877387583861165e-05,
"loss": 0.8201718926429749,
"step": 566
},
{
"epoch": 0.6827212522576761,
"grad_norm": 0.5019769893477617,
"learning_rate": 7.869241044790915e-05,
"loss": 0.6515538096427917,
"step": 567
},
{
"epoch": 0.6839253461770018,
"grad_norm": 0.5617633221602063,
"learning_rate": 7.861083133307247e-05,
"loss": 0.7625322341918945,
"step": 568
},
{
"epoch": 0.6851294400963275,
"grad_norm": 0.54964042039411,
"learning_rate": 7.852913881744547e-05,
"loss": 0.7261612415313721,
"step": 569
},
{
"epoch": 0.6863335340156532,
"grad_norm": 0.6263394024336052,
"learning_rate": 7.844733322482145e-05,
"loss": 0.7486558556556702,
"step": 570
},
{
"epoch": 0.687537627934979,
"grad_norm": 0.5555893896503016,
"learning_rate": 7.836541487944193e-05,
"loss": 0.7491647005081177,
"step": 571
},
{
"epoch": 0.6887417218543046,
"grad_norm": 0.5820402015407633,
"learning_rate": 7.82833841059953e-05,
"loss": 0.7191346883773804,
"step": 572
},
{
"epoch": 0.6899458157736303,
"grad_norm": 0.555977833657025,
"learning_rate": 7.820124122961557e-05,
"loss": 0.7905391454696655,
"step": 573
},
{
"epoch": 0.6911499096929561,
"grad_norm": 0.5037913291859483,
"learning_rate": 7.811898657588109e-05,
"loss": 0.7211226224899292,
"step": 574
},
{
"epoch": 0.6923540036122817,
"grad_norm": 0.5729626893502113,
"learning_rate": 7.803662047081323e-05,
"loss": 0.8028491735458374,
"step": 575
},
{
"epoch": 0.6935580975316075,
"grad_norm": 0.5860473434891468,
"learning_rate": 7.795414324087515e-05,
"loss": 0.829058825969696,
"step": 576
},
{
"epoch": 0.6947621914509332,
"grad_norm": 0.5958131631643511,
"learning_rate": 7.78715552129704e-05,
"loss": 0.8834158182144165,
"step": 577
},
{
"epoch": 0.6959662853702588,
"grad_norm": 0.5431734521373482,
"learning_rate": 7.778885671444169e-05,
"loss": 0.726544976234436,
"step": 578
},
{
"epoch": 0.6971703792895846,
"grad_norm": 0.5375518534571525,
"learning_rate": 7.770604807306966e-05,
"loss": 0.6950877904891968,
"step": 579
},
{
"epoch": 0.6983744732089103,
"grad_norm": 0.5646475926608178,
"learning_rate": 7.762312961707141e-05,
"loss": 0.7466782331466675,
"step": 580
},
{
"epoch": 0.699578567128236,
"grad_norm": 0.48963828108768087,
"learning_rate": 7.754010167509935e-05,
"loss": 0.6853356957435608,
"step": 581
},
{
"epoch": 0.7007826610475617,
"grad_norm": 0.586236539500212,
"learning_rate": 7.745696457623986e-05,
"loss": 0.740483283996582,
"step": 582
},
{
"epoch": 0.7019867549668874,
"grad_norm": 0.6274150858960484,
"learning_rate": 7.73737186500119e-05,
"loss": 0.7569112181663513,
"step": 583
},
{
"epoch": 0.7031908488862131,
"grad_norm": 0.5631748478704627,
"learning_rate": 7.729036422636589e-05,
"loss": 0.8061918020248413,
"step": 584
},
{
"epoch": 0.7043949428055388,
"grad_norm": 0.5841006909268124,
"learning_rate": 7.720690163568214e-05,
"loss": 0.8308575749397278,
"step": 585
},
{
"epoch": 0.7055990367248646,
"grad_norm": 0.5699658900127791,
"learning_rate": 7.712333120876983e-05,
"loss": 0.7775663137435913,
"step": 586
},
{
"epoch": 0.7068031306441902,
"grad_norm": 0.5889370522339232,
"learning_rate": 7.703965327686544e-05,
"loss": 0.7314500212669373,
"step": 587
},
{
"epoch": 0.708007224563516,
"grad_norm": 0.5923804291432415,
"learning_rate": 7.695586817163163e-05,
"loss": 0.7363287210464478,
"step": 588
},
{
"epoch": 0.7092113184828417,
"grad_norm": 0.6533117299804164,
"learning_rate": 7.68719762251558e-05,
"loss": 0.651695966720581,
"step": 589
},
{
"epoch": 0.7104154124021673,
"grad_norm": 0.5479326254295158,
"learning_rate": 7.678797776994886e-05,
"loss": 0.7530328631401062,
"step": 590
},
{
"epoch": 0.7116195063214931,
"grad_norm": 0.5299024142422354,
"learning_rate": 7.670387313894384e-05,
"loss": 0.7354722023010254,
"step": 591
},
{
"epoch": 0.7128236002408188,
"grad_norm": 0.5472573973168082,
"learning_rate": 7.661966266549463e-05,
"loss": 0.7562400102615356,
"step": 592
},
{
"epoch": 0.7140276941601444,
"grad_norm": 0.5854890840875948,
"learning_rate": 7.653534668337463e-05,
"loss": 0.8579235076904297,
"step": 593
},
{
"epoch": 0.7152317880794702,
"grad_norm": 0.5697072947758892,
"learning_rate": 7.645092552677539e-05,
"loss": 0.7369991540908813,
"step": 594
},
{
"epoch": 0.7164358819987959,
"grad_norm": 0.5685555722350157,
"learning_rate": 7.636639953030541e-05,
"loss": 0.8678609132766724,
"step": 595
},
{
"epoch": 0.7176399759181216,
"grad_norm": 0.567416577687503,
"learning_rate": 7.628176902898863e-05,
"loss": 0.791597843170166,
"step": 596
},
{
"epoch": 0.7188440698374473,
"grad_norm": 0.6148265638311559,
"learning_rate": 7.619703435826328e-05,
"loss": 0.749405026435852,
"step": 597
},
{
"epoch": 0.720048163756773,
"grad_norm": 0.5414167107715409,
"learning_rate": 7.61121958539804e-05,
"loss": 0.8846501708030701,
"step": 598
},
{
"epoch": 0.7212522576760987,
"grad_norm": 0.5416681872805827,
"learning_rate": 7.602725385240268e-05,
"loss": 0.7774538993835449,
"step": 599
},
{
"epoch": 0.7224563515954244,
"grad_norm": 0.588344345196164,
"learning_rate": 7.594220869020293e-05,
"loss": 0.814307451248169,
"step": 600
},
{
"epoch": 0.7236604455147502,
"grad_norm": 0.5576823157325073,
"learning_rate": 7.585706070446288e-05,
"loss": 0.7856515645980835,
"step": 601
},
{
"epoch": 0.7248645394340758,
"grad_norm": 0.5985127235800698,
"learning_rate": 7.577181023267185e-05,
"loss": 0.8144470453262329,
"step": 602
},
{
"epoch": 0.7260686333534015,
"grad_norm": 0.5779044201512746,
"learning_rate": 7.568645761272527e-05,
"loss": 0.7946053743362427,
"step": 603
},
{
"epoch": 0.7272727272727273,
"grad_norm": 0.5488619890930316,
"learning_rate": 7.560100318292355e-05,
"loss": 0.7577052116394043,
"step": 604
},
{
"epoch": 0.7284768211920529,
"grad_norm": 0.5459077250816263,
"learning_rate": 7.551544728197057e-05,
"loss": 0.7259520888328552,
"step": 605
},
{
"epoch": 0.7296809151113787,
"grad_norm": 0.6119689669390659,
"learning_rate": 7.542979024897239e-05,
"loss": 0.8580405712127686,
"step": 606
},
{
"epoch": 0.7308850090307044,
"grad_norm": 0.5147985669255489,
"learning_rate": 7.534403242343595e-05,
"loss": 0.7137761116027832,
"step": 607
},
{
"epoch": 0.7320891029500302,
"grad_norm": 0.5193410319727177,
"learning_rate": 7.525817414526764e-05,
"loss": 0.7543395757675171,
"step": 608
},
{
"epoch": 0.7332931968693558,
"grad_norm": 0.5573511973877319,
"learning_rate": 7.517221575477209e-05,
"loss": 0.7173647284507751,
"step": 609
},
{
"epoch": 0.7344972907886815,
"grad_norm": 0.5580550054944537,
"learning_rate": 7.508615759265059e-05,
"loss": 0.7329656481742859,
"step": 610
},
{
"epoch": 0.7357013847080073,
"grad_norm": 0.551637721601536,
"learning_rate": 7.500000000000001e-05,
"loss": 0.7801769375801086,
"step": 611
},
{
"epoch": 0.7369054786273329,
"grad_norm": 0.5329464486402283,
"learning_rate": 7.491374331831125e-05,
"loss": 0.7641035318374634,
"step": 612
},
{
"epoch": 0.7381095725466587,
"grad_norm": 0.5580597551172531,
"learning_rate": 7.482738788946799e-05,
"loss": 0.8063909411430359,
"step": 613
},
{
"epoch": 0.7393136664659844,
"grad_norm": 0.5536354711211005,
"learning_rate": 7.474093405574527e-05,
"loss": 0.8169757127761841,
"step": 614
},
{
"epoch": 0.74051776038531,
"grad_norm": 0.5416837758281863,
"learning_rate": 7.465438215980819e-05,
"loss": 0.7172082662582397,
"step": 615
},
{
"epoch": 0.7417218543046358,
"grad_norm": 0.5757826134716865,
"learning_rate": 7.456773254471053e-05,
"loss": 0.8059848546981812,
"step": 616
},
{
"epoch": 0.7429259482239615,
"grad_norm": 0.5552693082023876,
"learning_rate": 7.448098555389333e-05,
"loss": 0.888195276260376,
"step": 617
},
{
"epoch": 0.7441300421432872,
"grad_norm": 0.5247113920620894,
"learning_rate": 7.439414153118364e-05,
"loss": 0.7386849522590637,
"step": 618
},
{
"epoch": 0.7453341360626129,
"grad_norm": 0.5778342940116027,
"learning_rate": 7.43072008207931e-05,
"loss": 0.7103273868560791,
"step": 619
},
{
"epoch": 0.7465382299819386,
"grad_norm": 0.5785723142524426,
"learning_rate": 7.422016376731658e-05,
"loss": 0.772225558757782,
"step": 620
},
{
"epoch": 0.7477423239012643,
"grad_norm": 0.5459844270560734,
"learning_rate": 7.413303071573077e-05,
"loss": 0.7679145336151123,
"step": 621
},
{
"epoch": 0.74894641782059,
"grad_norm": 0.5723341998985462,
"learning_rate": 7.404580201139286e-05,
"loss": 0.7297276258468628,
"step": 622
},
{
"epoch": 0.7501505117399158,
"grad_norm": 0.5641616655306876,
"learning_rate": 7.395847800003925e-05,
"loss": 0.7312839031219482,
"step": 623
},
{
"epoch": 0.7513546056592414,
"grad_norm": 0.5733662946104073,
"learning_rate": 7.387105902778397e-05,
"loss": 0.7721570730209351,
"step": 624
},
{
"epoch": 0.7525586995785671,
"grad_norm": 0.5362108749320107,
"learning_rate": 7.378354544111755e-05,
"loss": 0.8026418089866638,
"step": 625
},
{
"epoch": 0.7537627934978929,
"grad_norm": 0.5633254415228907,
"learning_rate": 7.36959375869054e-05,
"loss": 0.6819289326667786,
"step": 626
},
{
"epoch": 0.7549668874172185,
"grad_norm": 0.529897150259017,
"learning_rate": 7.360823581238672e-05,
"loss": 0.8319383859634399,
"step": 627
},
{
"epoch": 0.7561709813365443,
"grad_norm": 0.5106910013108146,
"learning_rate": 7.352044046517285e-05,
"loss": 0.676674485206604,
"step": 628
},
{
"epoch": 0.75737507525587,
"grad_norm": 0.5484578007680182,
"learning_rate": 7.343255189324605e-05,
"loss": 0.6932168006896973,
"step": 629
},
{
"epoch": 0.7585791691751956,
"grad_norm": 0.5650930759635948,
"learning_rate": 7.334457044495811e-05,
"loss": 0.7660653591156006,
"step": 630
},
{
"epoch": 0.7597832630945214,
"grad_norm": 0.5584992927312921,
"learning_rate": 7.325649646902887e-05,
"loss": 0.7922879457473755,
"step": 631
},
{
"epoch": 0.7609873570138471,
"grad_norm": 0.5513822198360595,
"learning_rate": 7.316833031454498e-05,
"loss": 0.7717798948287964,
"step": 632
},
{
"epoch": 0.7621914509331728,
"grad_norm": 0.5903937591677999,
"learning_rate": 7.30800723309584e-05,
"loss": 0.709945797920227,
"step": 633
},
{
"epoch": 0.7633955448524985,
"grad_norm": 0.5243311870823861,
"learning_rate": 7.299172286808511e-05,
"loss": 0.6364589929580688,
"step": 634
},
{
"epoch": 0.7645996387718242,
"grad_norm": 0.5645975620167715,
"learning_rate": 7.290328227610362e-05,
"loss": 0.7189136743545532,
"step": 635
},
{
"epoch": 0.7658037326911499,
"grad_norm": 0.5989333495083847,
"learning_rate": 7.281475090555365e-05,
"loss": 0.8010446429252625,
"step": 636
},
{
"epoch": 0.7670078266104756,
"grad_norm": 0.5392019595111714,
"learning_rate": 7.272612910733475e-05,
"loss": 0.7160613536834717,
"step": 637
},
{
"epoch": 0.7682119205298014,
"grad_norm": 0.526699638579813,
"learning_rate": 7.263741723270486e-05,
"loss": 0.6541684865951538,
"step": 638
},
{
"epoch": 0.769416014449127,
"grad_norm": 0.5502399148075039,
"learning_rate": 7.254861563327896e-05,
"loss": 0.7666326761245728,
"step": 639
},
{
"epoch": 0.7706201083684527,
"grad_norm": 0.5367816011630834,
"learning_rate": 7.245972466102766e-05,
"loss": 0.7381945848464966,
"step": 640
},
{
"epoch": 0.7718242022877785,
"grad_norm": 0.5673276609505631,
"learning_rate": 7.237074466827579e-05,
"loss": 0.7307241559028625,
"step": 641
},
{
"epoch": 0.7730282962071041,
"grad_norm": 0.5374169130324764,
"learning_rate": 7.228167600770101e-05,
"loss": 0.752852201461792,
"step": 642
},
{
"epoch": 0.7742323901264299,
"grad_norm": 0.5222964595544717,
"learning_rate": 7.219251903233246e-05,
"loss": 0.7814919948577881,
"step": 643
},
{
"epoch": 0.7754364840457556,
"grad_norm": 0.5353375865532896,
"learning_rate": 7.210327409554926e-05,
"loss": 0.7453927993774414,
"step": 644
},
{
"epoch": 0.7766405779650812,
"grad_norm": 0.5420182525639181,
"learning_rate": 7.201394155107928e-05,
"loss": 0.748891294002533,
"step": 645
},
{
"epoch": 0.777844671884407,
"grad_norm": 0.5877559832438797,
"learning_rate": 7.192452175299748e-05,
"loss": 0.6877246499061584,
"step": 646
},
{
"epoch": 0.7790487658037327,
"grad_norm": 0.5760557644626407,
"learning_rate": 7.183501505572478e-05,
"loss": 0.8391183614730835,
"step": 647
},
{
"epoch": 0.7802528597230584,
"grad_norm": 0.5140131622082071,
"learning_rate": 7.174542181402646e-05,
"loss": 0.6214525699615479,
"step": 648
},
{
"epoch": 0.7814569536423841,
"grad_norm": 0.5920652975441979,
"learning_rate": 7.165574238301085e-05,
"loss": 0.7245489954948425,
"step": 649
},
{
"epoch": 0.7826610475617098,
"grad_norm": 0.5291539489099727,
"learning_rate": 7.15659771181279e-05,
"loss": 0.6523704528808594,
"step": 650
},
{
"epoch": 0.7838651414810355,
"grad_norm": 0.5184868746629374,
"learning_rate": 7.147612637516775e-05,
"loss": 0.7590177059173584,
"step": 651
},
{
"epoch": 0.7850692354003612,
"grad_norm": 0.5767737493812523,
"learning_rate": 7.138619051025935e-05,
"loss": 0.7685889005661011,
"step": 652
},
{
"epoch": 0.786273329319687,
"grad_norm": 0.5316769794527457,
"learning_rate": 7.129616987986905e-05,
"loss": 0.612991213798523,
"step": 653
},
{
"epoch": 0.7874774232390126,
"grad_norm": 0.566437732909868,
"learning_rate": 7.120606484079912e-05,
"loss": 0.8298099040985107,
"step": 654
},
{
"epoch": 0.7886815171583383,
"grad_norm": 0.5819731356601086,
"learning_rate": 7.111587575018648e-05,
"loss": 0.7112371921539307,
"step": 655
},
{
"epoch": 0.7898856110776641,
"grad_norm": 0.5939381717337568,
"learning_rate": 7.102560296550109e-05,
"loss": 0.7533249855041504,
"step": 656
},
{
"epoch": 0.7910897049969897,
"grad_norm": 0.5977179736055875,
"learning_rate": 7.093524684454471e-05,
"loss": 0.8646591305732727,
"step": 657
},
{
"epoch": 0.7922937989163155,
"grad_norm": 0.5811627493929847,
"learning_rate": 7.084480774544937e-05,
"loss": 0.7045024037361145,
"step": 658
},
{
"epoch": 0.7934978928356412,
"grad_norm": 0.46222093389370705,
"learning_rate": 7.075428602667602e-05,
"loss": 0.5506249666213989,
"step": 659
},
{
"epoch": 0.7947019867549668,
"grad_norm": 0.5645543669115538,
"learning_rate": 7.066368204701306e-05,
"loss": 0.7758337259292603,
"step": 660
},
{
"epoch": 0.7959060806742926,
"grad_norm": 0.4915448078569477,
"learning_rate": 7.057299616557493e-05,
"loss": 0.651608943939209,
"step": 661
},
{
"epoch": 0.7971101745936183,
"grad_norm": 0.5522172829410883,
"learning_rate": 7.048222874180072e-05,
"loss": 0.7136639356613159,
"step": 662
},
{
"epoch": 0.798314268512944,
"grad_norm": 0.5893396997765099,
"learning_rate": 7.039138013545265e-05,
"loss": 0.8380235433578491,
"step": 663
},
{
"epoch": 0.7995183624322697,
"grad_norm": 0.6133987101537153,
"learning_rate": 7.030045070661484e-05,
"loss": 0.7751943469047546,
"step": 664
},
{
"epoch": 0.8007224563515954,
"grad_norm": 0.5282165625345195,
"learning_rate": 7.020944081569158e-05,
"loss": 0.7577475309371948,
"step": 665
},
{
"epoch": 0.8019265502709211,
"grad_norm": 0.5594249036363087,
"learning_rate": 7.011835082340625e-05,
"loss": 0.8250494599342346,
"step": 666
},
{
"epoch": 0.8031306441902468,
"grad_norm": 0.5851363252245912,
"learning_rate": 7.002718109079964e-05,
"loss": 0.7192866802215576,
"step": 667
},
{
"epoch": 0.8043347381095726,
"grad_norm": 0.5451363099285061,
"learning_rate": 6.993593197922852e-05,
"loss": 0.7122324705123901,
"step": 668
},
{
"epoch": 0.8055388320288982,
"grad_norm": 0.5665959912586943,
"learning_rate": 6.984460385036442e-05,
"loss": 0.674079179763794,
"step": 669
},
{
"epoch": 0.8067429259482239,
"grad_norm": 0.5607832145712084,
"learning_rate": 6.975319706619197e-05,
"loss": 0.7614448070526123,
"step": 670
},
{
"epoch": 0.8079470198675497,
"grad_norm": 0.516789952393803,
"learning_rate": 6.966171198900761e-05,
"loss": 0.7189384698867798,
"step": 671
},
{
"epoch": 0.8091511137868754,
"grad_norm": 0.6772184953913467,
"learning_rate": 6.957014898141805e-05,
"loss": 0.6840660572052002,
"step": 672
},
{
"epoch": 0.8103552077062011,
"grad_norm": 0.531942812379234,
"learning_rate": 6.947850840633892e-05,
"loss": 0.6458152532577515,
"step": 673
},
{
"epoch": 0.8115593016255268,
"grad_norm": 0.5519490767522439,
"learning_rate": 6.938679062699327e-05,
"loss": 0.7066282629966736,
"step": 674
},
{
"epoch": 0.8127633955448526,
"grad_norm": 0.48652561414966566,
"learning_rate": 6.929499600691014e-05,
"loss": 0.7652988433837891,
"step": 675
},
{
"epoch": 0.8139674894641782,
"grad_norm": 0.6226477503268696,
"learning_rate": 6.92031249099232e-05,
"loss": 0.7655966281890869,
"step": 676
},
{
"epoch": 0.8151715833835039,
"grad_norm": 0.5595978038997756,
"learning_rate": 6.911117770016915e-05,
"loss": 0.7019493579864502,
"step": 677
},
{
"epoch": 0.8163756773028297,
"grad_norm": 0.5495627855637409,
"learning_rate": 6.901915474208644e-05,
"loss": 0.70711350440979,
"step": 678
},
{
"epoch": 0.8175797712221553,
"grad_norm": 0.5712315500872475,
"learning_rate": 6.892705640041373e-05,
"loss": 0.7052388787269592,
"step": 679
},
{
"epoch": 0.818783865141481,
"grad_norm": 0.5500499336299753,
"learning_rate": 6.883488304018844e-05,
"loss": 0.7443139553070068,
"step": 680
},
{
"epoch": 0.8199879590608068,
"grad_norm": 0.5387862831681539,
"learning_rate": 6.874263502674538e-05,
"loss": 0.7455554604530334,
"step": 681
},
{
"epoch": 0.8211920529801324,
"grad_norm": 0.5925515236740267,
"learning_rate": 6.86503127257152e-05,
"loss": 0.7441598773002625,
"step": 682
},
{
"epoch": 0.8223961468994582,
"grad_norm": 0.6281201889624337,
"learning_rate": 6.855791650302305e-05,
"loss": 0.825596272945404,
"step": 683
},
{
"epoch": 0.8236002408187839,
"grad_norm": 0.5638562915547312,
"learning_rate": 6.846544672488701e-05,
"loss": 0.8428453803062439,
"step": 684
},
{
"epoch": 0.8248043347381095,
"grad_norm": 0.5505970416330618,
"learning_rate": 6.837290375781678e-05,
"loss": 0.7983956336975098,
"step": 685
},
{
"epoch": 0.8260084286574353,
"grad_norm": 0.5721512425660251,
"learning_rate": 6.828028796861207e-05,
"loss": 0.7709592580795288,
"step": 686
},
{
"epoch": 0.827212522576761,
"grad_norm": 0.6645005011132197,
"learning_rate": 6.818759972436125e-05,
"loss": 0.7213572263717651,
"step": 687
},
{
"epoch": 0.8284166164960867,
"grad_norm": 0.5017848479381612,
"learning_rate": 6.809483939243992e-05,
"loss": 0.7169408798217773,
"step": 688
},
{
"epoch": 0.8296207104154124,
"grad_norm": 0.5943825592817649,
"learning_rate": 6.800200734050931e-05,
"loss": 0.6034449338912964,
"step": 689
},
{
"epoch": 0.8308248043347382,
"grad_norm": 0.5544289785627361,
"learning_rate": 6.790910393651502e-05,
"loss": 0.7898417711257935,
"step": 690
},
{
"epoch": 0.8320288982540638,
"grad_norm": 0.5851190478412958,
"learning_rate": 6.781612954868538e-05,
"loss": 0.735333263874054,
"step": 691
},
{
"epoch": 0.8332329921733895,
"grad_norm": 0.5132314600936285,
"learning_rate": 6.77230845455301e-05,
"loss": 0.6649155616760254,
"step": 692
},
{
"epoch": 0.8344370860927153,
"grad_norm": 0.5717715226236111,
"learning_rate": 6.762996929583878e-05,
"loss": 0.7077334523200989,
"step": 693
},
{
"epoch": 0.8356411800120409,
"grad_norm": 0.5295441327900904,
"learning_rate": 6.753678416867944e-05,
"loss": 0.7083529233932495,
"step": 694
},
{
"epoch": 0.8368452739313667,
"grad_norm": 0.5409070425072329,
"learning_rate": 6.744352953339706e-05,
"loss": 0.6848558783531189,
"step": 695
},
{
"epoch": 0.8380493678506924,
"grad_norm": 11.456453932391845,
"learning_rate": 6.735020575961213e-05,
"loss": 2.429105281829834,
"step": 696
},
{
"epoch": 0.839253461770018,
"grad_norm": 0.5030799338663503,
"learning_rate": 6.725681321721916e-05,
"loss": 0.6945353150367737,
"step": 697
},
{
"epoch": 0.8404575556893438,
"grad_norm": 0.5019492592535822,
"learning_rate": 6.716335227638525e-05,
"loss": 0.6985099911689758,
"step": 698
},
{
"epoch": 0.8416616496086695,
"grad_norm": 0.5343651236614299,
"learning_rate": 6.706982330754858e-05,
"loss": 0.7406755089759827,
"step": 699
},
{
"epoch": 0.8428657435279951,
"grad_norm": 0.5772034919820741,
"learning_rate": 6.697622668141698e-05,
"loss": 0.8253107070922852,
"step": 700
},
{
"epoch": 0.8440698374473209,
"grad_norm": 0.5340743353018809,
"learning_rate": 6.688256276896643e-05,
"loss": 0.6895221471786499,
"step": 701
},
{
"epoch": 0.8452739313666466,
"grad_norm": 0.5653672583177308,
"learning_rate": 6.678883194143962e-05,
"loss": 0.7509863376617432,
"step": 702
},
{
"epoch": 0.8464780252859723,
"grad_norm": 0.6952333150720577,
"learning_rate": 6.669503457034446e-05,
"loss": 0.7106937766075134,
"step": 703
},
{
"epoch": 0.847682119205298,
"grad_norm": 0.5851587415926052,
"learning_rate": 6.660117102745256e-05,
"loss": 0.8080302476882935,
"step": 704
},
{
"epoch": 0.8488862131246238,
"grad_norm": 0.5889137432099876,
"learning_rate": 6.650724168479789e-05,
"loss": 0.7220859527587891,
"step": 705
},
{
"epoch": 0.8500903070439494,
"grad_norm": 0.5691084934905695,
"learning_rate": 6.641324691467514e-05,
"loss": 0.7487884759902954,
"step": 706
},
{
"epoch": 0.8512944009632751,
"grad_norm": 0.5298216455216096,
"learning_rate": 6.63191870896384e-05,
"loss": 0.7792977690696716,
"step": 707
},
{
"epoch": 0.8524984948826009,
"grad_norm": 0.5772535684835562,
"learning_rate": 6.622506258249956e-05,
"loss": 0.6996712684631348,
"step": 708
},
{
"epoch": 0.8537025888019265,
"grad_norm": 0.5155556118938752,
"learning_rate": 6.61308737663269e-05,
"loss": 0.6337662935256958,
"step": 709
},
{
"epoch": 0.8549066827212523,
"grad_norm": 0.5924702334508825,
"learning_rate": 6.60366210144436e-05,
"loss": 0.8370622396469116,
"step": 710
},
{
"epoch": 0.856110776640578,
"grad_norm": 0.5183147485813695,
"learning_rate": 6.59423047004262e-05,
"loss": 0.6973636746406555,
"step": 711
},
{
"epoch": 0.8573148705599036,
"grad_norm": 0.5572135137974717,
"learning_rate": 6.584792519810325e-05,
"loss": 0.7278915047645569,
"step": 712
},
{
"epoch": 0.8585189644792294,
"grad_norm": 0.5420202758959125,
"learning_rate": 6.57534828815537e-05,
"loss": 0.7928230166435242,
"step": 713
},
{
"epoch": 0.8597230583985551,
"grad_norm": 0.574639133513042,
"learning_rate": 6.565897812510549e-05,
"loss": 0.8664229512214661,
"step": 714
},
{
"epoch": 0.8609271523178808,
"grad_norm": 0.4700403610654423,
"learning_rate": 6.556441130333403e-05,
"loss": 0.5933694243431091,
"step": 715
},
{
"epoch": 0.8621312462372065,
"grad_norm": 0.5312249498337128,
"learning_rate": 6.546978279106074e-05,
"loss": 0.733972430229187,
"step": 716
},
{
"epoch": 0.8633353401565322,
"grad_norm": 0.5216302635057909,
"learning_rate": 6.537509296335155e-05,
"loss": 0.6753861904144287,
"step": 717
},
{
"epoch": 0.8645394340758579,
"grad_norm": 0.4790376690745771,
"learning_rate": 6.528034219551543e-05,
"loss": 0.6640701293945312,
"step": 718
},
{
"epoch": 0.8657435279951836,
"grad_norm": 0.5065707454061958,
"learning_rate": 6.518553086310285e-05,
"loss": 0.7376319169998169,
"step": 719
},
{
"epoch": 0.8669476219145094,
"grad_norm": 0.5816099925628252,
"learning_rate": 6.509065934190437e-05,
"loss": 0.6989994049072266,
"step": 720
},
{
"epoch": 0.868151715833835,
"grad_norm": 0.533855911666009,
"learning_rate": 6.499572800794911e-05,
"loss": 0.6418149471282959,
"step": 721
},
{
"epoch": 0.8693558097531607,
"grad_norm": 0.5808342911427314,
"learning_rate": 6.490073723750326e-05,
"loss": 0.7712098956108093,
"step": 722
},
{
"epoch": 0.8705599036724865,
"grad_norm": 0.5618125174120853,
"learning_rate": 6.480568740706856e-05,
"loss": 0.6786316633224487,
"step": 723
},
{
"epoch": 0.8717639975918121,
"grad_norm": 0.5442790186136376,
"learning_rate": 6.471057889338089e-05,
"loss": 0.6759684681892395,
"step": 724
},
{
"epoch": 0.8729680915111379,
"grad_norm": 0.5575091652765518,
"learning_rate": 6.461541207340866e-05,
"loss": 0.8004689812660217,
"step": 725
},
{
"epoch": 0.8741721854304636,
"grad_norm": 0.6087037735248484,
"learning_rate": 6.452018732435145e-05,
"loss": 0.702217161655426,
"step": 726
},
{
"epoch": 0.8753762793497892,
"grad_norm": 0.523391785114522,
"learning_rate": 6.442490502363838e-05,
"loss": 0.7841488122940063,
"step": 727
},
{
"epoch": 0.876580373269115,
"grad_norm": 0.5893015983604554,
"learning_rate": 6.432956554892675e-05,
"loss": 0.806492030620575,
"step": 728
},
{
"epoch": 0.8777844671884407,
"grad_norm": 0.49897942350210794,
"learning_rate": 6.42341692781004e-05,
"loss": 0.7724281549453735,
"step": 729
},
{
"epoch": 0.8789885611077664,
"grad_norm": 0.5025156193616485,
"learning_rate": 6.413871658926833e-05,
"loss": 0.7163165211677551,
"step": 730
},
{
"epoch": 0.8801926550270921,
"grad_norm": 0.5411433468483965,
"learning_rate": 6.404320786076317e-05,
"loss": 0.7383996248245239,
"step": 731
},
{
"epoch": 0.8813967489464178,
"grad_norm": 0.5363738863779194,
"learning_rate": 6.39476434711396e-05,
"loss": 0.7603915929794312,
"step": 732
},
{
"epoch": 0.8826008428657435,
"grad_norm": 0.5771663171868379,
"learning_rate": 6.385202379917297e-05,
"loss": 0.7711158990859985,
"step": 733
},
{
"epoch": 0.8838049367850692,
"grad_norm": 0.5477282394048203,
"learning_rate": 6.375634922385775e-05,
"loss": 0.755664587020874,
"step": 734
},
{
"epoch": 0.885009030704395,
"grad_norm": 0.5880459246570379,
"learning_rate": 6.366062012440599e-05,
"loss": 0.7198222875595093,
"step": 735
},
{
"epoch": 0.8862131246237207,
"grad_norm": 0.6360552256124195,
"learning_rate": 6.356483688024588e-05,
"loss": 0.7838082909584045,
"step": 736
},
{
"epoch": 0.8874172185430463,
"grad_norm": 0.5369270802924788,
"learning_rate": 6.346899987102019e-05,
"loss": 0.7577356100082397,
"step": 737
},
{
"epoch": 0.8886213124623721,
"grad_norm": 0.5556367435929235,
"learning_rate": 6.337310947658478e-05,
"loss": 0.7227284908294678,
"step": 738
},
{
"epoch": 0.8898254063816978,
"grad_norm": 0.5361468921378463,
"learning_rate": 6.327716607700719e-05,
"loss": 0.7303032875061035,
"step": 739
},
{
"epoch": 0.8910295003010235,
"grad_norm": 0.5395989339749642,
"learning_rate": 6.318117005256494e-05,
"loss": 0.7400148510932922,
"step": 740
},
{
"epoch": 0.8922335942203492,
"grad_norm": 0.5723788997800525,
"learning_rate": 6.308512178374419e-05,
"loss": 0.7611583471298218,
"step": 741
},
{
"epoch": 0.893437688139675,
"grad_norm": 0.5521688567876344,
"learning_rate": 6.298902165123815e-05,
"loss": 0.8619204759597778,
"step": 742
},
{
"epoch": 0.8946417820590006,
"grad_norm": 0.5582827301628421,
"learning_rate": 6.289287003594564e-05,
"loss": 0.7897509932518005,
"step": 743
},
{
"epoch": 0.8958458759783263,
"grad_norm": 0.5093785587034908,
"learning_rate": 6.279666731896946e-05,
"loss": 0.7222546935081482,
"step": 744
},
{
"epoch": 0.8970499698976521,
"grad_norm": 0.6095242070371871,
"learning_rate": 6.270041388161503e-05,
"loss": 0.798790454864502,
"step": 745
},
{
"epoch": 0.8982540638169777,
"grad_norm": 0.5698300911071993,
"learning_rate": 6.26041101053888e-05,
"loss": 0.73308926820755,
"step": 746
},
{
"epoch": 0.8994581577363034,
"grad_norm": 0.5540329200739117,
"learning_rate": 6.250775637199661e-05,
"loss": 0.7667993903160095,
"step": 747
},
{
"epoch": 0.9006622516556292,
"grad_norm": 0.49980897472798463,
"learning_rate": 6.241135306334254e-05,
"loss": 0.7137086987495422,
"step": 748
},
{
"epoch": 0.9018663455749548,
"grad_norm": 0.5795317113908485,
"learning_rate": 6.231490056152692e-05,
"loss": 0.8151491284370422,
"step": 749
},
{
"epoch": 0.9030704394942806,
"grad_norm": 0.5582206562940626,
"learning_rate": 6.221839924884527e-05,
"loss": 0.7875903844833374,
"step": 750
},
{
"epoch": 0.9042745334136063,
"grad_norm": 0.5491858152450069,
"learning_rate": 6.21218495077864e-05,
"loss": 0.7306442260742188,
"step": 751
},
{
"epoch": 0.9054786273329319,
"grad_norm": 0.6055567996900213,
"learning_rate": 6.20252517210312e-05,
"loss": 0.754359781742096,
"step": 752
},
{
"epoch": 0.9066827212522577,
"grad_norm": 0.5322913036677438,
"learning_rate": 6.192860627145094e-05,
"loss": 0.6508967280387878,
"step": 753
},
{
"epoch": 0.9078868151715834,
"grad_norm": 0.5656082361795239,
"learning_rate": 6.183191354210577e-05,
"loss": 0.7721344232559204,
"step": 754
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.5362562825791712,
"learning_rate": 6.173517391624331e-05,
"loss": 0.7396098971366882,
"step": 755
},
{
"epoch": 0.9102950030102348,
"grad_norm": 0.5552002010414196,
"learning_rate": 6.163838777729699e-05,
"loss": 0.6969982385635376,
"step": 756
},
{
"epoch": 0.9114990969295605,
"grad_norm": 0.5806759798417249,
"learning_rate": 6.154155550888466e-05,
"loss": 0.7862981557846069,
"step": 757
},
{
"epoch": 0.9127031908488862,
"grad_norm": 0.5618012271452348,
"learning_rate": 6.144467749480695e-05,
"loss": 0.7197493314743042,
"step": 758
},
{
"epoch": 0.9139072847682119,
"grad_norm": 0.6015842930717432,
"learning_rate": 6.134775411904581e-05,
"loss": 0.801885724067688,
"step": 759
},
{
"epoch": 0.9151113786875377,
"grad_norm": 0.648697014238488,
"learning_rate": 6.125078576576306e-05,
"loss": 0.762459397315979,
"step": 760
},
{
"epoch": 0.9163154726068633,
"grad_norm": 1.006231715341414,
"learning_rate": 6.115377281929867e-05,
"loss": 0.7917613983154297,
"step": 761
},
{
"epoch": 0.917519566526189,
"grad_norm": 0.5730425991520282,
"learning_rate": 6.105671566416947e-05,
"loss": 0.702721893787384,
"step": 762
},
{
"epoch": 0.9187236604455148,
"grad_norm": 0.5497044062021355,
"learning_rate": 6.0959614685067444e-05,
"loss": 0.6928747892379761,
"step": 763
},
{
"epoch": 0.9199277543648404,
"grad_norm": 0.5209247297432111,
"learning_rate": 6.086247026685832e-05,
"loss": 0.7445698380470276,
"step": 764
},
{
"epoch": 0.9211318482841662,
"grad_norm": 0.49471431299243296,
"learning_rate": 6.0765282794579934e-05,
"loss": 0.6368311643600464,
"step": 765
},
{
"epoch": 0.9223359422034919,
"grad_norm": 0.4948084280976926,
"learning_rate": 6.066805265344084e-05,
"loss": 0.7579319477081299,
"step": 766
},
{
"epoch": 0.9235400361228175,
"grad_norm": 0.5782172565746628,
"learning_rate": 6.0570780228818705e-05,
"loss": 0.7203896641731262,
"step": 767
},
{
"epoch": 0.9247441300421433,
"grad_norm": 0.5239174148850319,
"learning_rate": 6.047346590625872e-05,
"loss": 0.7110145092010498,
"step": 768
},
{
"epoch": 0.925948223961469,
"grad_norm": 0.5425491477842784,
"learning_rate": 6.0376110071472234e-05,
"loss": 0.7392451763153076,
"step": 769
},
{
"epoch": 0.9271523178807947,
"grad_norm": 0.5268921834335735,
"learning_rate": 6.027871311033506e-05,
"loss": 0.777907133102417,
"step": 770
},
{
"epoch": 0.9283564118001204,
"grad_norm": 0.5572897315618067,
"learning_rate": 6.0181275408886064e-05,
"loss": 0.8401760458946228,
"step": 771
},
{
"epoch": 0.9295605057194462,
"grad_norm": 0.5641552558734205,
"learning_rate": 6.008379735332556e-05,
"loss": 0.6873654127120972,
"step": 772
},
{
"epoch": 0.9307645996387718,
"grad_norm": 0.5208475397793321,
"learning_rate": 5.998627933001381e-05,
"loss": 0.6632927656173706,
"step": 773
},
{
"epoch": 0.9319686935580975,
"grad_norm": 0.5600358850446495,
"learning_rate": 5.988872172546952e-05,
"loss": 0.7082634568214417,
"step": 774
},
{
"epoch": 0.9331727874774233,
"grad_norm": 0.5693732220311049,
"learning_rate": 5.979112492636824e-05,
"loss": 0.8022473454475403,
"step": 775
},
{
"epoch": 0.9343768813967489,
"grad_norm": 0.5193498332315772,
"learning_rate": 5.9693489319540906e-05,
"loss": 0.7477356195449829,
"step": 776
},
{
"epoch": 0.9355809753160746,
"grad_norm": 0.5273371583121212,
"learning_rate": 5.959581529197225e-05,
"loss": 0.7064501047134399,
"step": 777
},
{
"epoch": 0.9367850692354004,
"grad_norm": 0.542237859966888,
"learning_rate": 5.9498103230799274e-05,
"loss": 0.7777222394943237,
"step": 778
},
{
"epoch": 0.937989163154726,
"grad_norm": 0.5099252902637973,
"learning_rate": 5.940035352330975e-05,
"loss": 0.6861948370933533,
"step": 779
},
{
"epoch": 0.9391932570740518,
"grad_norm": 0.5315788954781036,
"learning_rate": 5.9302566556940654e-05,
"loss": 0.7673315405845642,
"step": 780
},
{
"epoch": 0.9403973509933775,
"grad_norm": 0.5656250044454243,
"learning_rate": 5.9204742719276676e-05,
"loss": 0.6729450225830078,
"step": 781
},
{
"epoch": 0.9416014449127031,
"grad_norm": 0.5245159798265684,
"learning_rate": 5.910688239804857e-05,
"loss": 0.6607034206390381,
"step": 782
},
{
"epoch": 0.9428055388320289,
"grad_norm": 0.5362612853904927,
"learning_rate": 5.900898598113177e-05,
"loss": 0.6874101161956787,
"step": 783
},
{
"epoch": 0.9440096327513546,
"grad_norm": 0.5619032845760895,
"learning_rate": 5.891105385654474e-05,
"loss": 0.8311077356338501,
"step": 784
},
{
"epoch": 0.9452137266706803,
"grad_norm": 0.5298715941448011,
"learning_rate": 5.881308641244747e-05,
"loss": 0.7484720349311829,
"step": 785
},
{
"epoch": 0.946417820590006,
"grad_norm": 0.5638478600748363,
"learning_rate": 5.871508403713997e-05,
"loss": 0.7567854523658752,
"step": 786
},
{
"epoch": 0.9476219145093318,
"grad_norm": 0.5652256134001232,
"learning_rate": 5.861704711906067e-05,
"loss": 0.6978403329849243,
"step": 787
},
{
"epoch": 0.9488260084286574,
"grad_norm": 0.5120740518680585,
"learning_rate": 5.8518976046784944e-05,
"loss": 0.6337791085243225,
"step": 788
},
{
"epoch": 0.9500301023479831,
"grad_norm": 0.5806417082771747,
"learning_rate": 5.842087120902351e-05,
"loss": 0.7493042945861816,
"step": 789
},
{
"epoch": 0.9512341962673089,
"grad_norm": 0.5320004350541582,
"learning_rate": 5.832273299462092e-05,
"loss": 0.7247514128684998,
"step": 790
},
{
"epoch": 0.9524382901866345,
"grad_norm": 0.5687996430845107,
"learning_rate": 5.8224561792554036e-05,
"loss": 0.7841694355010986,
"step": 791
},
{
"epoch": 0.9536423841059603,
"grad_norm": 0.5448943870212728,
"learning_rate": 5.812635799193046e-05,
"loss": 0.7085949182510376,
"step": 792
},
{
"epoch": 0.954846478025286,
"grad_norm": 0.5208551754781878,
"learning_rate": 5.802812198198699e-05,
"loss": 0.6806622743606567,
"step": 793
},
{
"epoch": 0.9560505719446116,
"grad_norm": 0.5808285218584335,
"learning_rate": 5.792985415208809e-05,
"loss": 0.6692842841148376,
"step": 794
},
{
"epoch": 0.9572546658639374,
"grad_norm": 0.5329672698419077,
"learning_rate": 5.783155489172437e-05,
"loss": 0.7136318683624268,
"step": 795
},
{
"epoch": 0.9584587597832631,
"grad_norm": 0.6087188300193648,
"learning_rate": 5.773322459051098e-05,
"loss": 0.6897290945053101,
"step": 796
},
{
"epoch": 0.9596628537025887,
"grad_norm": 0.5370418233498948,
"learning_rate": 5.763486363818613e-05,
"loss": 0.6508396863937378,
"step": 797
},
{
"epoch": 0.9608669476219145,
"grad_norm": 0.5582963877436334,
"learning_rate": 5.75364724246095e-05,
"loss": 0.7444456219673157,
"step": 798
},
{
"epoch": 0.9620710415412402,
"grad_norm": 0.5276580787140643,
"learning_rate": 5.743805133976071e-05,
"loss": 0.6803913116455078,
"step": 799
},
{
"epoch": 0.963275135460566,
"grad_norm": 0.566464776830601,
"learning_rate": 5.733960077373781e-05,
"loss": 0.7227286696434021,
"step": 800
},
{
"epoch": 0.9644792293798916,
"grad_norm": 0.49230944093225026,
"learning_rate": 5.7241121116755646e-05,
"loss": 0.6570050716400146,
"step": 801
},
{
"epoch": 0.9656833232992174,
"grad_norm": 0.5354273978335398,
"learning_rate": 5.714261275914442e-05,
"loss": 0.6928553581237793,
"step": 802
},
{
"epoch": 0.9668874172185431,
"grad_norm": 0.5840417013891573,
"learning_rate": 5.704407609134805e-05,
"loss": 0.7683090567588806,
"step": 803
},
{
"epoch": 0.9680915111378687,
"grad_norm": 0.5687796495614603,
"learning_rate": 5.694551150392271e-05,
"loss": 0.6832355856895447,
"step": 804
},
{
"epoch": 0.9692956050571945,
"grad_norm": 0.6253085231688535,
"learning_rate": 5.684691938753517e-05,
"loss": 0.6900801658630371,
"step": 805
},
{
"epoch": 0.9704996989765202,
"grad_norm": 0.5656488130538716,
"learning_rate": 5.674830013296137e-05,
"loss": 0.757134199142456,
"step": 806
},
{
"epoch": 0.9717037928958459,
"grad_norm": 0.9657780904921088,
"learning_rate": 5.664965413108481e-05,
"loss": 0.6606450080871582,
"step": 807
},
{
"epoch": 0.9729078868151716,
"grad_norm": 0.53118259104115,
"learning_rate": 5.655098177289496e-05,
"loss": 0.789542555809021,
"step": 808
},
{
"epoch": 0.9741119807344973,
"grad_norm": 0.5739635967821072,
"learning_rate": 5.6452283449485774e-05,
"loss": 0.7988462448120117,
"step": 809
},
{
"epoch": 0.975316074653823,
"grad_norm": 0.5657096105950147,
"learning_rate": 5.635355955205416e-05,
"loss": 0.690794825553894,
"step": 810
},
{
"epoch": 0.9765201685731487,
"grad_norm": 0.5651903319137221,
"learning_rate": 5.625481047189835e-05,
"loss": 0.7849836945533752,
"step": 811
},
{
"epoch": 0.9777242624924745,
"grad_norm": 0.5296771731346789,
"learning_rate": 5.6156036600416385e-05,
"loss": 0.6022036671638489,
"step": 812
},
{
"epoch": 0.9789283564118001,
"grad_norm": 0.5615215691551119,
"learning_rate": 5.60572383291046e-05,
"loss": 0.7593386769294739,
"step": 813
},
{
"epoch": 0.9801324503311258,
"grad_norm": 0.5820599724079297,
"learning_rate": 5.595841604955601e-05,
"loss": 0.8309260606765747,
"step": 814
},
{
"epoch": 0.9813365442504516,
"grad_norm": 0.5772941004909043,
"learning_rate": 5.585957015345882e-05,
"loss": 0.7122389078140259,
"step": 815
},
{
"epoch": 0.9825406381697772,
"grad_norm": 0.5708808623062601,
"learning_rate": 5.5760701032594775e-05,
"loss": 0.8290776014328003,
"step": 816
},
{
"epoch": 0.983744732089103,
"grad_norm": 0.5082628334979072,
"learning_rate": 5.566180907883777e-05,
"loss": 0.6864231824874878,
"step": 817
},
{
"epoch": 0.9849488260084287,
"grad_norm": 0.5583106407232503,
"learning_rate": 5.556289468415212e-05,
"loss": 0.6441450119018555,
"step": 818
},
{
"epoch": 0.9861529199277543,
"grad_norm": 0.5249727637966792,
"learning_rate": 5.546395824059113e-05,
"loss": 0.5966594219207764,
"step": 819
},
{
"epoch": 0.9873570138470801,
"grad_norm": 0.5469008116884886,
"learning_rate": 5.536500014029547e-05,
"loss": 0.6438560485839844,
"step": 820
},
{
"epoch": 0.9885611077664058,
"grad_norm": 0.5748625071857363,
"learning_rate": 5.5266020775491654e-05,
"loss": 0.7250472903251648,
"step": 821
},
{
"epoch": 0.9897652016857315,
"grad_norm": 0.6173582251606323,
"learning_rate": 5.5167020538490514e-05,
"loss": 0.6797659397125244,
"step": 822
},
{
"epoch": 0.9909692956050572,
"grad_norm": 0.5611640576854714,
"learning_rate": 5.506799982168553e-05,
"loss": 0.6531401872634888,
"step": 823
},
{
"epoch": 0.9921733895243829,
"grad_norm": 0.5641246723973709,
"learning_rate": 5.496895901755145e-05,
"loss": 0.7849875688552856,
"step": 824
},
{
"epoch": 0.9933774834437086,
"grad_norm": 0.554146547967545,
"learning_rate": 5.486989851864258e-05,
"loss": 0.6360095143318176,
"step": 825
},
{
"epoch": 0.9945815773630343,
"grad_norm": 0.6921418168357653,
"learning_rate": 5.47708187175913e-05,
"loss": 0.7262243032455444,
"step": 826
},
{
"epoch": 0.9957856712823601,
"grad_norm": 0.5242750840999799,
"learning_rate": 5.4671720007106507e-05,
"loss": 0.7025319337844849,
"step": 827
},
{
"epoch": 0.9969897652016857,
"grad_norm": 0.5499578008394467,
"learning_rate": 5.4572602779972006e-05,
"loss": 0.7388126850128174,
"step": 828
},
{
"epoch": 0.9981938591210114,
"grad_norm": 0.5614719219533634,
"learning_rate": 5.447346742904508e-05,
"loss": 0.7533327341079712,
"step": 829
},
{
"epoch": 0.9993979530403372,
"grad_norm": 0.5784650062660298,
"learning_rate": 5.437431434725473e-05,
"loss": 0.763211727142334,
"step": 830
},
{
"epoch": 1.0,
"grad_norm": 0.7637282781143653,
"learning_rate": 5.427514392760034e-05,
"loss": 0.848166823387146,
"step": 831
},
{
"epoch": 1.0012040939193256,
"grad_norm": 0.5817442892771696,
"learning_rate": 5.417595656314997e-05,
"loss": 0.6827788949012756,
"step": 832
},
{
"epoch": 1.0024081878386515,
"grad_norm": 0.549560327372644,
"learning_rate": 5.40767526470388e-05,
"loss": 0.5953079462051392,
"step": 833
},
{
"epoch": 1.0036122817579771,
"grad_norm": 0.5967166975159005,
"learning_rate": 5.39775325724677e-05,
"loss": 0.7045449018478394,
"step": 834
},
{
"epoch": 1.0048163756773028,
"grad_norm": 0.6029293682103201,
"learning_rate": 5.3878296732701515e-05,
"loss": 0.7496785521507263,
"step": 835
},
{
"epoch": 1.0060204695966286,
"grad_norm": 0.47488284344673654,
"learning_rate": 5.377904552106763e-05,
"loss": 0.5429801940917969,
"step": 836
},
{
"epoch": 1.0072245635159542,
"grad_norm": 0.527003189159265,
"learning_rate": 5.367977933095428e-05,
"loss": 0.6961348652839661,
"step": 837
},
{
"epoch": 1.0084286574352799,
"grad_norm": 0.550893561119634,
"learning_rate": 5.3580498555809163e-05,
"loss": 0.6526985764503479,
"step": 838
},
{
"epoch": 1.0096327513546057,
"grad_norm": 0.5220566727238085,
"learning_rate": 5.348120358913773e-05,
"loss": 0.6380011439323425,
"step": 839
},
{
"epoch": 1.0108368452739314,
"grad_norm": 0.5298767597315704,
"learning_rate": 5.338189482450167e-05,
"loss": 0.639511227607727,
"step": 840
},
{
"epoch": 1.012040939193257,
"grad_norm": 0.573656957306192,
"learning_rate": 5.3282572655517416e-05,
"loss": 0.6803268194198608,
"step": 841
},
{
"epoch": 1.0132450331125828,
"grad_norm": 0.5058702143914242,
"learning_rate": 5.318323747585444e-05,
"loss": 0.6620562076568604,
"step": 842
},
{
"epoch": 1.0144491270319085,
"grad_norm": 0.5695889927973388,
"learning_rate": 5.308388967923391e-05,
"loss": 0.6041232943534851,
"step": 843
},
{
"epoch": 1.0156532209512341,
"grad_norm": 0.49054820465846416,
"learning_rate": 5.298452965942687e-05,
"loss": 0.6772005558013916,
"step": 844
},
{
"epoch": 1.01685731487056,
"grad_norm": 0.5392475480353385,
"learning_rate": 5.2885157810252915e-05,
"loss": 0.5783129930496216,
"step": 845
},
{
"epoch": 1.0180614087898856,
"grad_norm": 0.5468382185617752,
"learning_rate": 5.278577452557845e-05,
"loss": 0.5806093215942383,
"step": 846
},
{
"epoch": 1.0192655027092112,
"grad_norm": 0.49514274753944165,
"learning_rate": 5.2686380199315244e-05,
"loss": 0.5545554757118225,
"step": 847
},
{
"epoch": 1.020469596628537,
"grad_norm": 0.5862857103625438,
"learning_rate": 5.2586975225418854e-05,
"loss": 0.5811598896980286,
"step": 848
},
{
"epoch": 1.0216736905478627,
"grad_norm": 0.5703320491579602,
"learning_rate": 5.248755999788699e-05,
"loss": 0.6371333599090576,
"step": 849
},
{
"epoch": 1.0228777844671884,
"grad_norm": 0.5603274134406141,
"learning_rate": 5.2388134910758015e-05,
"loss": 0.4923644959926605,
"step": 850
},
{
"epoch": 1.0240818783865142,
"grad_norm": 0.6170978531608771,
"learning_rate": 5.2288700358109375e-05,
"loss": 0.6948882937431335,
"step": 851
},
{
"epoch": 1.0252859723058398,
"grad_norm": 0.5781012388613602,
"learning_rate": 5.218925673405607e-05,
"loss": 0.6110134124755859,
"step": 852
},
{
"epoch": 1.0264900662251655,
"grad_norm": 0.5804572358644217,
"learning_rate": 5.208980443274899e-05,
"loss": 0.6287789344787598,
"step": 853
},
{
"epoch": 1.0276941601444913,
"grad_norm": 0.5611152572630231,
"learning_rate": 5.199034384837345e-05,
"loss": 0.6175764203071594,
"step": 854
},
{
"epoch": 1.028898254063817,
"grad_norm": 0.5484030238940184,
"learning_rate": 5.189087537514763e-05,
"loss": 0.481817364692688,
"step": 855
},
{
"epoch": 1.0301023479831426,
"grad_norm": 0.5940370858562336,
"learning_rate": 5.179139940732091e-05,
"loss": 0.6743038296699524,
"step": 856
},
{
"epoch": 1.0313064419024685,
"grad_norm": 0.564636164581345,
"learning_rate": 5.169191633917242e-05,
"loss": 0.5929891467094421,
"step": 857
},
{
"epoch": 1.032510535821794,
"grad_norm": 0.6259147551368491,
"learning_rate": 5.159242656500942e-05,
"loss": 0.5457702875137329,
"step": 858
},
{
"epoch": 1.0337146297411197,
"grad_norm": 0.5984443219192407,
"learning_rate": 5.149293047916576e-05,
"loss": 0.6046398878097534,
"step": 859
},
{
"epoch": 1.0349187236604456,
"grad_norm": 0.6903029875072887,
"learning_rate": 5.139342847600028e-05,
"loss": 0.6197146773338318,
"step": 860
},
{
"epoch": 1.0361228175797712,
"grad_norm": 0.5944824318158347,
"learning_rate": 5.12939209498953e-05,
"loss": 0.5844958424568176,
"step": 861
},
{
"epoch": 1.0373269114990968,
"grad_norm": 0.5353294399186319,
"learning_rate": 5.119440829525504e-05,
"loss": 0.6081749796867371,
"step": 862
},
{
"epoch": 1.0385310054184227,
"grad_norm": 0.5960984748872302,
"learning_rate": 5.1094890906504e-05,
"loss": 0.5811648368835449,
"step": 863
},
{
"epoch": 1.0397350993377483,
"grad_norm": 0.6588748465392122,
"learning_rate": 5.0995369178085484e-05,
"loss": 0.5508978366851807,
"step": 864
},
{
"epoch": 1.040939193257074,
"grad_norm": 0.6020747390502208,
"learning_rate": 5.0895843504460005e-05,
"loss": 0.5702750086784363,
"step": 865
},
{
"epoch": 1.0421432871763998,
"grad_norm": 0.6177373296812404,
"learning_rate": 5.0796314280103664e-05,
"loss": 0.6664659976959229,
"step": 866
},
{
"epoch": 1.0433473810957254,
"grad_norm": 0.6117972385380832,
"learning_rate": 5.0696781899506686e-05,
"loss": 0.5547976493835449,
"step": 867
},
{
"epoch": 1.044551475015051,
"grad_norm": 0.5865234270411791,
"learning_rate": 5.059724675717177e-05,
"loss": 0.598779559135437,
"step": 868
},
{
"epoch": 1.045755568934377,
"grad_norm": 0.5835961031116876,
"learning_rate": 5.049770924761259e-05,
"loss": 0.6203871965408325,
"step": 869
},
{
"epoch": 1.0469596628537026,
"grad_norm": 0.6905086635237377,
"learning_rate": 5.039816976535219e-05,
"loss": 0.6205575466156006,
"step": 870
},
{
"epoch": 1.0481637567730282,
"grad_norm": 0.6796150825926938,
"learning_rate": 5.029862870492142e-05,
"loss": 0.5886781215667725,
"step": 871
},
{
"epoch": 1.049367850692354,
"grad_norm": 0.615583079532075,
"learning_rate": 5.0199086460857406e-05,
"loss": 0.6542514562606812,
"step": 872
},
{
"epoch": 1.0505719446116797,
"grad_norm": 0.5678350633461089,
"learning_rate": 5.0099543427701956e-05,
"loss": 0.676892876625061,
"step": 873
},
{
"epoch": 1.0517760385310053,
"grad_norm": 0.6713559084994786,
"learning_rate": 5e-05,
"loss": 0.5744781494140625,
"step": 874
},
{
"epoch": 1.0529801324503312,
"grad_norm": 0.6267712685550183,
"learning_rate": 4.9900456572298055e-05,
"loss": 0.5414940118789673,
"step": 875
},
{
"epoch": 1.0541842263696568,
"grad_norm": 0.6324262535300782,
"learning_rate": 4.980091353914259e-05,
"loss": 0.6841826438903809,
"step": 876
},
{
"epoch": 1.0553883202889824,
"grad_norm": 0.6735743184646561,
"learning_rate": 4.9701371295078603e-05,
"loss": 0.611656665802002,
"step": 877
},
{
"epoch": 1.0565924142083083,
"grad_norm": 0.55330409545385,
"learning_rate": 4.9601830234647824e-05,
"loss": 0.5333291888237,
"step": 878
},
{
"epoch": 1.057796508127634,
"grad_norm": 0.7011089474115021,
"learning_rate": 4.950229075238742e-05,
"loss": 0.6680951118469238,
"step": 879
},
{
"epoch": 1.0590006020469596,
"grad_norm": 0.6334316945847226,
"learning_rate": 4.940275324282824e-05,
"loss": 0.6174753904342651,
"step": 880
},
{
"epoch": 1.0602046959662854,
"grad_norm": 0.6348827129842721,
"learning_rate": 4.930321810049334e-05,
"loss": 0.5034064054489136,
"step": 881
},
{
"epoch": 1.061408789885611,
"grad_norm": 0.5948207083494964,
"learning_rate": 4.920368571989636e-05,
"loss": 0.5429894924163818,
"step": 882
},
{
"epoch": 1.062612883804937,
"grad_norm": 0.6375757341176411,
"learning_rate": 4.910415649554001e-05,
"loss": 0.6564920544624329,
"step": 883
},
{
"epoch": 1.0638169777242625,
"grad_norm": 0.6050162694473955,
"learning_rate": 4.900463082191452e-05,
"loss": 0.606536865234375,
"step": 884
},
{
"epoch": 1.0650210716435882,
"grad_norm": 0.606266694568716,
"learning_rate": 4.890510909349602e-05,
"loss": 0.679326593875885,
"step": 885
},
{
"epoch": 1.0662251655629138,
"grad_norm": 0.6435014713478211,
"learning_rate": 4.880559170474499e-05,
"loss": 0.5882464647293091,
"step": 886
},
{
"epoch": 1.0674292594822397,
"grad_norm": 0.6539530957089141,
"learning_rate": 4.870607905010471e-05,
"loss": 0.6037100553512573,
"step": 887
},
{
"epoch": 1.0686333534015653,
"grad_norm": 0.5801005040789394,
"learning_rate": 4.860657152399973e-05,
"loss": 0.5796064138412476,
"step": 888
},
{
"epoch": 1.0698374473208911,
"grad_norm": 0.6187442737201257,
"learning_rate": 4.850706952083426e-05,
"loss": 0.6228768825531006,
"step": 889
},
{
"epoch": 1.0710415412402168,
"grad_norm": 0.59568276458961,
"learning_rate": 4.840757343499059e-05,
"loss": 0.5712155103683472,
"step": 890
},
{
"epoch": 1.0722456351595424,
"grad_norm": 0.5760016138201668,
"learning_rate": 4.83080836608276e-05,
"loss": 0.5738168954849243,
"step": 891
},
{
"epoch": 1.073449729078868,
"grad_norm": 0.6743277918913058,
"learning_rate": 4.82086005926791e-05,
"loss": 0.5057202577590942,
"step": 892
},
{
"epoch": 1.074653822998194,
"grad_norm": 0.7173948834253054,
"learning_rate": 4.8109124624852386e-05,
"loss": 0.5986368060112,
"step": 893
},
{
"epoch": 1.0758579169175195,
"grad_norm": 0.6066019981357941,
"learning_rate": 4.800965615162655e-05,
"loss": 0.5507107377052307,
"step": 894
},
{
"epoch": 1.0770620108368454,
"grad_norm": 0.570699707922851,
"learning_rate": 4.791019556725104e-05,
"loss": 0.5539663434028625,
"step": 895
},
{
"epoch": 1.078266104756171,
"grad_norm": 0.5990803961904384,
"learning_rate": 4.7810743265943955e-05,
"loss": 0.5781638622283936,
"step": 896
},
{
"epoch": 1.0794701986754967,
"grad_norm": 0.634286495555675,
"learning_rate": 4.771129964189063e-05,
"loss": 0.6107989549636841,
"step": 897
},
{
"epoch": 1.0806742925948223,
"grad_norm": 0.6625886830962654,
"learning_rate": 4.7611865089242004e-05,
"loss": 0.6336471438407898,
"step": 898
},
{
"epoch": 1.0818783865141481,
"grad_norm": 0.5953458742603315,
"learning_rate": 4.751244000211302e-05,
"loss": 0.5361660122871399,
"step": 899
},
{
"epoch": 1.0830824804334738,
"grad_norm": 0.5697913331256542,
"learning_rate": 4.741302477458116e-05,
"loss": 0.4925832450389862,
"step": 900
},
{
"epoch": 1.0842865743527996,
"grad_norm": 0.6567592758325008,
"learning_rate": 4.731361980068476e-05,
"loss": 0.5178099870681763,
"step": 901
},
{
"epoch": 1.0854906682721253,
"grad_norm": 0.6649466885669353,
"learning_rate": 4.7214225474421556e-05,
"loss": 0.6095975637435913,
"step": 902
},
{
"epoch": 1.086694762191451,
"grad_norm": 0.6076096228525512,
"learning_rate": 4.7114842189747096e-05,
"loss": 0.5674281120300293,
"step": 903
},
{
"epoch": 1.0878988561107765,
"grad_norm": 0.6999035612711257,
"learning_rate": 4.701547034057313e-05,
"loss": 0.599638819694519,
"step": 904
},
{
"epoch": 1.0891029500301024,
"grad_norm": 0.653847984948624,
"learning_rate": 4.691611032076611e-05,
"loss": 0.6121338605880737,
"step": 905
},
{
"epoch": 1.090307043949428,
"grad_norm": 0.6955239544796425,
"learning_rate": 4.6816762524145565e-05,
"loss": 0.5960685014724731,
"step": 906
},
{
"epoch": 1.0915111378687539,
"grad_norm": 0.6417736180252925,
"learning_rate": 4.67174273444826e-05,
"loss": 0.6004455089569092,
"step": 907
},
{
"epoch": 1.0927152317880795,
"grad_norm": 0.607562397077653,
"learning_rate": 4.6618105175498336e-05,
"loss": 0.5403653383255005,
"step": 908
},
{
"epoch": 1.0939193257074051,
"grad_norm": 0.6408984013716601,
"learning_rate": 4.6518796410862284e-05,
"loss": 0.6267843246459961,
"step": 909
},
{
"epoch": 1.095123419626731,
"grad_norm": 0.6650116314895101,
"learning_rate": 4.641950144419085e-05,
"loss": 0.5727046728134155,
"step": 910
},
{
"epoch": 1.0963275135460566,
"grad_norm": 0.618267638272318,
"learning_rate": 4.632022066904573e-05,
"loss": 0.5612502098083496,
"step": 911
},
{
"epoch": 1.0975316074653823,
"grad_norm": 0.5895105842263793,
"learning_rate": 4.622095447893238e-05,
"loss": 0.6283978223800659,
"step": 912
},
{
"epoch": 1.098735701384708,
"grad_norm": 0.6817362753022886,
"learning_rate": 4.612170326729849e-05,
"loss": 0.6637049913406372,
"step": 913
},
{
"epoch": 1.0999397953040337,
"grad_norm": 0.6415408879379791,
"learning_rate": 4.6022467427532316e-05,
"loss": 0.5799641013145447,
"step": 914
},
{
"epoch": 1.1011438892233594,
"grad_norm": 0.5739861139089872,
"learning_rate": 4.592324735296122e-05,
"loss": 0.5028109550476074,
"step": 915
},
{
"epoch": 1.1023479831426852,
"grad_norm": 0.6172673919687738,
"learning_rate": 4.582404343685005e-05,
"loss": 0.6666399240493774,
"step": 916
},
{
"epoch": 1.1035520770620109,
"grad_norm": 0.6268529091591767,
"learning_rate": 4.5724856072399666e-05,
"loss": 0.559740424156189,
"step": 917
},
{
"epoch": 1.1047561709813365,
"grad_norm": 0.6492454517429083,
"learning_rate": 4.562568565274528e-05,
"loss": 0.6159163117408752,
"step": 918
},
{
"epoch": 1.1059602649006623,
"grad_norm": 0.6215951034168719,
"learning_rate": 4.552653257095495e-05,
"loss": 0.573366641998291,
"step": 919
},
{
"epoch": 1.107164358819988,
"grad_norm": 0.6066675582411075,
"learning_rate": 4.5427397220028006e-05,
"loss": 0.5606663823127747,
"step": 920
},
{
"epoch": 1.1083684527393136,
"grad_norm": 0.6478045382159789,
"learning_rate": 4.5328279992893505e-05,
"loss": 0.6397379040718079,
"step": 921
},
{
"epoch": 1.1095725466586395,
"grad_norm": 0.6273407365981201,
"learning_rate": 4.5229181282408705e-05,
"loss": 0.5965734720230103,
"step": 922
},
{
"epoch": 1.110776640577965,
"grad_norm": 0.6355763281639094,
"learning_rate": 4.513010148135743e-05,
"loss": 0.6133944988250732,
"step": 923
},
{
"epoch": 1.1119807344972907,
"grad_norm": 0.6119414377862017,
"learning_rate": 4.5031040982448564e-05,
"loss": 0.5194113850593567,
"step": 924
},
{
"epoch": 1.1131848284166166,
"grad_norm": 0.6221044213040818,
"learning_rate": 4.493200017831448e-05,
"loss": 0.5849362015724182,
"step": 925
},
{
"epoch": 1.1143889223359422,
"grad_norm": 0.7107162972479569,
"learning_rate": 4.48329794615095e-05,
"loss": 0.6704531908035278,
"step": 926
},
{
"epoch": 1.1155930162552679,
"grad_norm": 0.5899361795444248,
"learning_rate": 4.4733979224508344e-05,
"loss": 0.5973348021507263,
"step": 927
},
{
"epoch": 1.1167971101745937,
"grad_norm": 0.6255731716471912,
"learning_rate": 4.4634999859704546e-05,
"loss": 0.6177182197570801,
"step": 928
},
{
"epoch": 1.1180012040939193,
"grad_norm": 0.6081461188270509,
"learning_rate": 4.453604175940888e-05,
"loss": 0.5532832741737366,
"step": 929
},
{
"epoch": 1.119205298013245,
"grad_norm": 0.6341727177094064,
"learning_rate": 4.443710531584789e-05,
"loss": 0.6038954257965088,
"step": 930
},
{
"epoch": 1.1204093919325708,
"grad_norm": 0.6377047278223871,
"learning_rate": 4.433819092116223e-05,
"loss": 0.6358925104141235,
"step": 931
},
{
"epoch": 1.1216134858518965,
"grad_norm": 0.613011312662429,
"learning_rate": 4.423929896740522e-05,
"loss": 0.611016571521759,
"step": 932
},
{
"epoch": 1.122817579771222,
"grad_norm": 0.6108189480738041,
"learning_rate": 4.41404298465412e-05,
"loss": 0.5910207033157349,
"step": 933
},
{
"epoch": 1.124021673690548,
"grad_norm": 0.6836800479540479,
"learning_rate": 4.4041583950444004e-05,
"loss": 0.6607179641723633,
"step": 934
},
{
"epoch": 1.1252257676098736,
"grad_norm": 0.7220371157830175,
"learning_rate": 4.3942761670895416e-05,
"loss": 0.7475870847702026,
"step": 935
},
{
"epoch": 1.1264298615291992,
"grad_norm": 0.575873966311362,
"learning_rate": 4.384396339958361e-05,
"loss": 0.5753701329231262,
"step": 936
},
{
"epoch": 1.127633955448525,
"grad_norm": 0.6975267174936021,
"learning_rate": 4.374518952810167e-05,
"loss": 0.5938791632652283,
"step": 937
},
{
"epoch": 1.1288380493678507,
"grad_norm": 0.810609543542089,
"learning_rate": 4.364644044794585e-05,
"loss": 0.5994819402694702,
"step": 938
},
{
"epoch": 1.1300421432871763,
"grad_norm": 0.6441192086179356,
"learning_rate": 4.354771655051424e-05,
"loss": 0.6630600690841675,
"step": 939
},
{
"epoch": 1.1312462372065022,
"grad_norm": 0.6535537361722771,
"learning_rate": 4.344901822710506e-05,
"loss": 0.7066477537155151,
"step": 940
},
{
"epoch": 1.1324503311258278,
"grad_norm": 0.635730964066639,
"learning_rate": 4.33503458689152e-05,
"loss": 0.650691568851471,
"step": 941
},
{
"epoch": 1.1336544250451535,
"grad_norm": 0.666506605551561,
"learning_rate": 4.3251699867038634e-05,
"loss": 0.6128287315368652,
"step": 942
},
{
"epoch": 1.1348585189644793,
"grad_norm": 0.6429322160847161,
"learning_rate": 4.3153080612464835e-05,
"loss": 0.605588972568512,
"step": 943
},
{
"epoch": 1.136062612883805,
"grad_norm": 0.6596630841100535,
"learning_rate": 4.305448849607731e-05,
"loss": 0.5547311305999756,
"step": 944
},
{
"epoch": 1.1372667068031306,
"grad_norm": 0.59544837453043,
"learning_rate": 4.295592390865194e-05,
"loss": 0.5454503297805786,
"step": 945
},
{
"epoch": 1.1384708007224564,
"grad_norm": 0.6496665040917174,
"learning_rate": 4.28573872408556e-05,
"loss": 0.657252311706543,
"step": 946
},
{
"epoch": 1.139674894641782,
"grad_norm": 0.6213252577534326,
"learning_rate": 4.2758878883244366e-05,
"loss": 0.5490322709083557,
"step": 947
},
{
"epoch": 1.1408789885611077,
"grad_norm": 0.6459212910724774,
"learning_rate": 4.2660399226262205e-05,
"loss": 0.6364617943763733,
"step": 948
},
{
"epoch": 1.1420830824804336,
"grad_norm": 0.6483965157725265,
"learning_rate": 4.256194866023929e-05,
"loss": 0.6644007563591003,
"step": 949
},
{
"epoch": 1.1432871763997592,
"grad_norm": 0.6555747499386102,
"learning_rate": 4.24635275753905e-05,
"loss": 0.5882210731506348,
"step": 950
},
{
"epoch": 1.1444912703190848,
"grad_norm": 0.6474747054742495,
"learning_rate": 4.236513636181389e-05,
"loss": 0.6130434274673462,
"step": 951
},
{
"epoch": 1.1456953642384107,
"grad_norm": 0.6528070309969348,
"learning_rate": 4.2266775409489023e-05,
"loss": 0.6332091689109802,
"step": 952
},
{
"epoch": 1.1468994581577363,
"grad_norm": 0.6326281677158688,
"learning_rate": 4.2168445108275636e-05,
"loss": 0.6028267741203308,
"step": 953
},
{
"epoch": 1.148103552077062,
"grad_norm": 0.7328255228339239,
"learning_rate": 4.2070145847911915e-05,
"loss": 0.7219833135604858,
"step": 954
},
{
"epoch": 1.1493076459963878,
"grad_norm": 0.6963105749236023,
"learning_rate": 4.197187801801301e-05,
"loss": 0.6642370820045471,
"step": 955
},
{
"epoch": 1.1505117399157134,
"grad_norm": 0.7480824143370934,
"learning_rate": 4.187364200806956e-05,
"loss": 0.6453176140785217,
"step": 956
},
{
"epoch": 1.151715833835039,
"grad_norm": 0.613906527836311,
"learning_rate": 4.177543820744597e-05,
"loss": 0.5650044083595276,
"step": 957
},
{
"epoch": 1.152919927754365,
"grad_norm": 0.6359766463721599,
"learning_rate": 4.167726700537909e-05,
"loss": 0.6176016330718994,
"step": 958
},
{
"epoch": 1.1541240216736905,
"grad_norm": 0.6531273793295372,
"learning_rate": 4.157912879097651e-05,
"loss": 0.5973043441772461,
"step": 959
},
{
"epoch": 1.1553281155930162,
"grad_norm": 0.6954746070004237,
"learning_rate": 4.1481023953215074e-05,
"loss": 0.5698620080947876,
"step": 960
},
{
"epoch": 1.156532209512342,
"grad_norm": 0.6301476445212942,
"learning_rate": 4.1382952880939346e-05,
"loss": 0.6511484980583191,
"step": 961
},
{
"epoch": 1.1577363034316677,
"grad_norm": 0.6063599734043299,
"learning_rate": 4.128491596286004e-05,
"loss": 0.6116698384284973,
"step": 962
},
{
"epoch": 1.1589403973509933,
"grad_norm": 0.6159011100734033,
"learning_rate": 4.118691358755254e-05,
"loss": 0.6456314325332642,
"step": 963
},
{
"epoch": 1.1601444912703192,
"grad_norm": 0.6053144734793151,
"learning_rate": 4.108894614345527e-05,
"loss": 0.6062647700309753,
"step": 964
},
{
"epoch": 1.1613485851896448,
"grad_norm": 0.6739455259407251,
"learning_rate": 4.0991014018868255e-05,
"loss": 0.6302129626274109,
"step": 965
},
{
"epoch": 1.1625526791089704,
"grad_norm": 0.7191486362024705,
"learning_rate": 4.0893117601951434e-05,
"loss": 0.5403755903244019,
"step": 966
},
{
"epoch": 1.1637567730282963,
"grad_norm": 0.6314741330518358,
"learning_rate": 4.079525728072334e-05,
"loss": 0.574742317199707,
"step": 967
},
{
"epoch": 1.164960866947622,
"grad_norm": 0.5923746377492743,
"learning_rate": 4.0697433443059344e-05,
"loss": 0.5776370763778687,
"step": 968
},
{
"epoch": 1.1661649608669475,
"grad_norm": 0.5947576096895085,
"learning_rate": 4.059964647669025e-05,
"loss": 0.5982620120048523,
"step": 969
},
{
"epoch": 1.1673690547862734,
"grad_norm": 0.6520630378541489,
"learning_rate": 4.050189676920075e-05,
"loss": 0.5546575784683228,
"step": 970
},
{
"epoch": 1.168573148705599,
"grad_norm": 0.5658561021115951,
"learning_rate": 4.0404184708027764e-05,
"loss": 0.5330312252044678,
"step": 971
},
{
"epoch": 1.1697772426249247,
"grad_norm": 0.5873224802923641,
"learning_rate": 4.03065106804591e-05,
"loss": 0.5556476712226868,
"step": 972
},
{
"epoch": 1.1709813365442505,
"grad_norm": 0.6830593650409802,
"learning_rate": 4.0208875073631767e-05,
"loss": 0.6657742261886597,
"step": 973
},
{
"epoch": 1.1721854304635762,
"grad_norm": 0.5743563134699543,
"learning_rate": 4.01112782745305e-05,
"loss": 0.5370659828186035,
"step": 974
},
{
"epoch": 1.1733895243829018,
"grad_norm": 0.697574785052048,
"learning_rate": 4.001372066998621e-05,
"loss": 0.6451708078384399,
"step": 975
},
{
"epoch": 1.1745936183022276,
"grad_norm": 0.6980507337519486,
"learning_rate": 3.9916202646674454e-05,
"loss": 0.6033217310905457,
"step": 976
},
{
"epoch": 1.1757977122215533,
"grad_norm": 0.6669048212504026,
"learning_rate": 3.981872459111394e-05,
"loss": 0.5445959568023682,
"step": 977
},
{
"epoch": 1.177001806140879,
"grad_norm": 0.6679306152523738,
"learning_rate": 3.9721286889664946e-05,
"loss": 0.6711142659187317,
"step": 978
},
{
"epoch": 1.1782059000602048,
"grad_norm": 0.5850303780605345,
"learning_rate": 3.962388992852778e-05,
"loss": 0.5332244038581848,
"step": 979
},
{
"epoch": 1.1794099939795304,
"grad_norm": 0.6775732780820883,
"learning_rate": 3.9526534093741294e-05,
"loss": 0.5913684368133545,
"step": 980
},
{
"epoch": 1.180614087898856,
"grad_norm": 0.6428401954730476,
"learning_rate": 3.94292197711813e-05,
"loss": 0.599868893623352,
"step": 981
},
{
"epoch": 1.1818181818181819,
"grad_norm": 0.6257846327079057,
"learning_rate": 3.933194734655916e-05,
"loss": 0.6274338960647583,
"step": 982
},
{
"epoch": 1.1830222757375075,
"grad_norm": 0.6036724683992304,
"learning_rate": 3.923471720542007e-05,
"loss": 0.6208685636520386,
"step": 983
},
{
"epoch": 1.1842263696568331,
"grad_norm": 0.6007109898517125,
"learning_rate": 3.9137529733141705e-05,
"loss": 0.5919234752655029,
"step": 984
},
{
"epoch": 1.185430463576159,
"grad_norm": 0.6913099793661222,
"learning_rate": 3.904038531493257e-05,
"loss": 0.6586284041404724,
"step": 985
},
{
"epoch": 1.1866345574954846,
"grad_norm": 0.7117714377535543,
"learning_rate": 3.894328433583053e-05,
"loss": 0.5739585757255554,
"step": 986
},
{
"epoch": 1.1878386514148103,
"grad_norm": 0.6280207911331188,
"learning_rate": 3.8846227180701335e-05,
"loss": 0.6130982637405396,
"step": 987
},
{
"epoch": 1.1890427453341361,
"grad_norm": 0.6768881599258235,
"learning_rate": 3.874921423423697e-05,
"loss": 0.5182419419288635,
"step": 988
},
{
"epoch": 1.1902468392534618,
"grad_norm": 0.6047329103995114,
"learning_rate": 3.86522458809542e-05,
"loss": 0.6246413588523865,
"step": 989
},
{
"epoch": 1.1914509331727874,
"grad_norm": 0.6555178443633155,
"learning_rate": 3.855532250519307e-05,
"loss": 0.5194020867347717,
"step": 990
},
{
"epoch": 1.1926550270921132,
"grad_norm": 0.6377027852734348,
"learning_rate": 3.845844449111535e-05,
"loss": 0.6242698431015015,
"step": 991
},
{
"epoch": 1.1938591210114389,
"grad_norm": 0.6862351164925463,
"learning_rate": 3.8361612222703015e-05,
"loss": 0.6675893068313599,
"step": 992
},
{
"epoch": 1.1950632149307645,
"grad_norm": 0.6449272420655037,
"learning_rate": 3.826482608375671e-05,
"loss": 0.5611934065818787,
"step": 993
},
{
"epoch": 1.1962673088500904,
"grad_norm": 0.6042233998603995,
"learning_rate": 3.816808645789425e-05,
"loss": 0.5754957795143127,
"step": 994
},
{
"epoch": 1.197471402769416,
"grad_norm": 0.678102187170135,
"learning_rate": 3.8071393728549074e-05,
"loss": 0.5788442492485046,
"step": 995
},
{
"epoch": 1.1986754966887416,
"grad_norm": 0.6247552561127608,
"learning_rate": 3.79747482789688e-05,
"loss": 0.5228564143180847,
"step": 996
},
{
"epoch": 1.1998795906080675,
"grad_norm": 0.6400466034518847,
"learning_rate": 3.787815049221361e-05,
"loss": 0.5728960037231445,
"step": 997
},
{
"epoch": 1.2010836845273931,
"grad_norm": 0.6421828793157212,
"learning_rate": 3.778160075115476e-05,
"loss": 0.6080710887908936,
"step": 998
},
{
"epoch": 1.2022877784467187,
"grad_norm": 0.5804730465715611,
"learning_rate": 3.768509943847309e-05,
"loss": 0.49825185537338257,
"step": 999
},
{
"epoch": 1.2034918723660446,
"grad_norm": 0.6623008537622053,
"learning_rate": 3.758864693665748e-05,
"loss": 0.6732375621795654,
"step": 1000
},
{
"epoch": 1.2046959662853702,
"grad_norm": 0.6622552680722864,
"learning_rate": 3.749224362800338e-05,
"loss": 0.62694251537323,
"step": 1001
},
{
"epoch": 1.205900060204696,
"grad_norm": 0.6714213622905841,
"learning_rate": 3.739588989461123e-05,
"loss": 0.5858567357063293,
"step": 1002
},
{
"epoch": 1.2071041541240217,
"grad_norm": 0.6380215631945886,
"learning_rate": 3.729958611838496e-05,
"loss": 0.5421775579452515,
"step": 1003
},
{
"epoch": 1.2083082480433474,
"grad_norm": 0.618393287531136,
"learning_rate": 3.720333268103055e-05,
"loss": 0.6375395059585571,
"step": 1004
},
{
"epoch": 1.209512341962673,
"grad_norm": 0.7051542848169918,
"learning_rate": 3.7107129964054366e-05,
"loss": 0.6745729446411133,
"step": 1005
},
{
"epoch": 1.2107164358819988,
"grad_norm": 0.6196842057552577,
"learning_rate": 3.701097834876185e-05,
"loss": 0.5497210025787354,
"step": 1006
},
{
"epoch": 1.2119205298013245,
"grad_norm": 0.6326550691274164,
"learning_rate": 3.691487821625583e-05,
"loss": 0.6627382040023804,
"step": 1007
},
{
"epoch": 1.2131246237206503,
"grad_norm": 0.6180702890104406,
"learning_rate": 3.6818829947435076e-05,
"loss": 0.563970685005188,
"step": 1008
},
{
"epoch": 1.214328717639976,
"grad_norm": 0.6254730729959757,
"learning_rate": 3.672283392299282e-05,
"loss": 0.5731421709060669,
"step": 1009
},
{
"epoch": 1.2155328115593016,
"grad_norm": 0.6346449994435577,
"learning_rate": 3.66268905234152e-05,
"loss": 0.6485154032707214,
"step": 1010
},
{
"epoch": 1.2167369054786272,
"grad_norm": 0.6120641977727209,
"learning_rate": 3.653100012897983e-05,
"loss": 0.6196368932723999,
"step": 1011
},
{
"epoch": 1.217940999397953,
"grad_norm": 0.6484469517458605,
"learning_rate": 3.643516311975413e-05,
"loss": 0.6733512878417969,
"step": 1012
},
{
"epoch": 1.2191450933172787,
"grad_norm": 0.6420590529471717,
"learning_rate": 3.633937987559402e-05,
"loss": 0.6339029669761658,
"step": 1013
},
{
"epoch": 1.2203491872366046,
"grad_norm": 0.6565599374629973,
"learning_rate": 3.624365077614226e-05,
"loss": 0.5736485719680786,
"step": 1014
},
{
"epoch": 1.2215532811559302,
"grad_norm": 0.7366624144305671,
"learning_rate": 3.614797620082703e-05,
"loss": 0.6090103387832642,
"step": 1015
},
{
"epoch": 1.2227573750752558,
"grad_norm": 0.6316786850562287,
"learning_rate": 3.605235652886042e-05,
"loss": 0.6597657203674316,
"step": 1016
},
{
"epoch": 1.2239614689945815,
"grad_norm": 0.6785270644589948,
"learning_rate": 3.595679213923685e-05,
"loss": 0.5462619066238403,
"step": 1017
},
{
"epoch": 1.2251655629139073,
"grad_norm": 0.667186505230287,
"learning_rate": 3.586128341073167e-05,
"loss": 0.6368972063064575,
"step": 1018
},
{
"epoch": 1.226369656833233,
"grad_norm": 0.6823288520473675,
"learning_rate": 3.57658307218996e-05,
"loss": 0.6157046556472778,
"step": 1019
},
{
"epoch": 1.2275737507525588,
"grad_norm": 0.6431142858536705,
"learning_rate": 3.567043445107326e-05,
"loss": 0.6580021381378174,
"step": 1020
},
{
"epoch": 1.2287778446718844,
"grad_norm": 0.69172361080416,
"learning_rate": 3.5575094976361625e-05,
"loss": 0.6095155477523804,
"step": 1021
},
{
"epoch": 1.22998193859121,
"grad_norm": 0.6221213905726743,
"learning_rate": 3.5479812675648575e-05,
"loss": 0.5716145634651184,
"step": 1022
},
{
"epoch": 1.2311860325105357,
"grad_norm": 0.8043447507612648,
"learning_rate": 3.5384587926591355e-05,
"loss": 0.6103835701942444,
"step": 1023
},
{
"epoch": 1.2323901264298616,
"grad_norm": 0.6959804548679159,
"learning_rate": 3.5289421106619126e-05,
"loss": 0.5943867564201355,
"step": 1024
},
{
"epoch": 1.2335942203491872,
"grad_norm": 0.6142985696828224,
"learning_rate": 3.519431259293147e-05,
"loss": 0.5688106417655945,
"step": 1025
},
{
"epoch": 1.234798314268513,
"grad_norm": 0.6539902425602039,
"learning_rate": 3.509926276249676e-05,
"loss": 0.5941796898841858,
"step": 1026
},
{
"epoch": 1.2360024081878387,
"grad_norm": 0.6420380526278909,
"learning_rate": 3.500427199205091e-05,
"loss": 0.6068412661552429,
"step": 1027
},
{
"epoch": 1.2372065021071643,
"grad_norm": 0.6456452908855624,
"learning_rate": 3.4909340658095646e-05,
"loss": 0.6149383783340454,
"step": 1028
},
{
"epoch": 1.23841059602649,
"grad_norm": 0.5554157694390899,
"learning_rate": 3.4814469136897165e-05,
"loss": 0.4858720898628235,
"step": 1029
},
{
"epoch": 1.2396146899458158,
"grad_norm": 0.6429406598986497,
"learning_rate": 3.471965780448461e-05,
"loss": 0.5543316602706909,
"step": 1030
},
{
"epoch": 1.2408187838651414,
"grad_norm": 0.6541248801974615,
"learning_rate": 3.462490703664846e-05,
"loss": 0.5952856540679932,
"step": 1031
},
{
"epoch": 1.2420228777844673,
"grad_norm": 0.6604177891554712,
"learning_rate": 3.4530217208939274e-05,
"loss": 0.6849150061607361,
"step": 1032
},
{
"epoch": 1.243226971703793,
"grad_norm": 0.6675006411124658,
"learning_rate": 3.443558869666598e-05,
"loss": 0.5796087980270386,
"step": 1033
},
{
"epoch": 1.2444310656231186,
"grad_norm": 0.6948023904823086,
"learning_rate": 3.434102187489451e-05,
"loss": 0.665795624256134,
"step": 1034
},
{
"epoch": 1.2456351595424442,
"grad_norm": 0.6579390174814185,
"learning_rate": 3.424651711844632e-05,
"loss": 0.6483998894691467,
"step": 1035
},
{
"epoch": 1.24683925346177,
"grad_norm": 0.7001785815128132,
"learning_rate": 3.415207480189676e-05,
"loss": 0.6428192853927612,
"step": 1036
},
{
"epoch": 1.2480433473810957,
"grad_norm": 0.5758736239689276,
"learning_rate": 3.405769529957381e-05,
"loss": 0.603247880935669,
"step": 1037
},
{
"epoch": 1.2492474413004215,
"grad_norm": 0.6703140182317202,
"learning_rate": 3.396337898555642e-05,
"loss": 0.6470834016799927,
"step": 1038
},
{
"epoch": 1.2504515352197472,
"grad_norm": 0.6663209203892997,
"learning_rate": 3.386912623367311e-05,
"loss": 0.6754087805747986,
"step": 1039
},
{
"epoch": 1.2516556291390728,
"grad_norm": 0.7160516255838019,
"learning_rate": 3.377493741750044e-05,
"loss": 0.5863697528839111,
"step": 1040
},
{
"epoch": 1.2528597230583984,
"grad_norm": 0.6856060560436505,
"learning_rate": 3.36808129103616e-05,
"loss": 0.6012188196182251,
"step": 1041
},
{
"epoch": 1.2540638169777243,
"grad_norm": 0.5676057074982405,
"learning_rate": 3.358675308532486e-05,
"loss": 0.5652576684951782,
"step": 1042
},
{
"epoch": 1.25526791089705,
"grad_norm": 0.5680031504105864,
"learning_rate": 3.349275831520212e-05,
"loss": 0.5598222017288208,
"step": 1043
},
{
"epoch": 1.2564720048163758,
"grad_norm": 0.6063379708273372,
"learning_rate": 3.339882897254746e-05,
"loss": 0.5237823128700256,
"step": 1044
},
{
"epoch": 1.2576760987357014,
"grad_norm": 0.6099579368555109,
"learning_rate": 3.330496542965556e-05,
"loss": 0.6714369058609009,
"step": 1045
},
{
"epoch": 1.258880192655027,
"grad_norm": 0.6315160010925641,
"learning_rate": 3.3211168058560386e-05,
"loss": 0.6626559495925903,
"step": 1046
},
{
"epoch": 1.2600842865743527,
"grad_norm": 0.6603687480331342,
"learning_rate": 3.311743723103357e-05,
"loss": 0.5565869808197021,
"step": 1047
},
{
"epoch": 1.2612883804936785,
"grad_norm": 0.6159593860438909,
"learning_rate": 3.302377331858302e-05,
"loss": 0.6076995134353638,
"step": 1048
},
{
"epoch": 1.2624924744130042,
"grad_norm": 0.6607517926642036,
"learning_rate": 3.293017669245144e-05,
"loss": 0.6004924774169922,
"step": 1049
},
{
"epoch": 1.26369656833233,
"grad_norm": 0.6679520254927858,
"learning_rate": 3.283664772361476e-05,
"loss": 0.5795238018035889,
"step": 1050
},
{
"epoch": 1.2649006622516556,
"grad_norm": 0.6506529440156168,
"learning_rate": 3.274318678278085e-05,
"loss": 0.5556989312171936,
"step": 1051
},
{
"epoch": 1.2661047561709813,
"grad_norm": 0.6321116119404896,
"learning_rate": 3.264979424038789e-05,
"loss": 0.6258856058120728,
"step": 1052
},
{
"epoch": 1.267308850090307,
"grad_norm": 0.6225820795770387,
"learning_rate": 3.255647046660296e-05,
"loss": 0.5914543271064758,
"step": 1053
},
{
"epoch": 1.2685129440096328,
"grad_norm": 0.5777885553006885,
"learning_rate": 3.246321583132058e-05,
"loss": 0.5762636065483093,
"step": 1054
},
{
"epoch": 1.2697170379289584,
"grad_norm": 0.68651261991656,
"learning_rate": 3.237003070416122e-05,
"loss": 0.5639084577560425,
"step": 1055
},
{
"epoch": 1.2709211318482843,
"grad_norm": 0.6935824769960947,
"learning_rate": 3.22769154544699e-05,
"loss": 0.5805132985115051,
"step": 1056
},
{
"epoch": 1.27212522576761,
"grad_norm": 0.6309093425474914,
"learning_rate": 3.2183870451314624e-05,
"loss": 0.5962166786193848,
"step": 1057
},
{
"epoch": 1.2733293196869355,
"grad_norm": 0.6298740995123034,
"learning_rate": 3.2090896063485e-05,
"loss": 0.519180178642273,
"step": 1058
},
{
"epoch": 1.2745334136062612,
"grad_norm": 0.6912341376043626,
"learning_rate": 3.19979926594907e-05,
"loss": 0.5523577332496643,
"step": 1059
},
{
"epoch": 1.275737507525587,
"grad_norm": 0.6018902645477407,
"learning_rate": 3.190516060756009e-05,
"loss": 0.500494122505188,
"step": 1060
},
{
"epoch": 1.2769416014449126,
"grad_norm": 0.6754925868661024,
"learning_rate": 3.181240027563875e-05,
"loss": 0.5533350706100464,
"step": 1061
},
{
"epoch": 1.2781456953642385,
"grad_norm": 0.626419141611913,
"learning_rate": 3.171971203138795e-05,
"loss": 0.5055978894233704,
"step": 1062
},
{
"epoch": 1.2793497892835641,
"grad_norm": 0.7176869318393736,
"learning_rate": 3.1627096242183243e-05,
"loss": 0.6873006224632263,
"step": 1063
},
{
"epoch": 1.2805538832028898,
"grad_norm": 0.6521112813116352,
"learning_rate": 3.1534553275112994e-05,
"loss": 0.5471648573875427,
"step": 1064
},
{
"epoch": 1.2817579771222154,
"grad_norm": 0.6266761704518603,
"learning_rate": 3.144208349697696e-05,
"loss": 0.5388270616531372,
"step": 1065
},
{
"epoch": 1.2829620710415413,
"grad_norm": 0.6968291098173685,
"learning_rate": 3.13496872742848e-05,
"loss": 0.6154459714889526,
"step": 1066
},
{
"epoch": 1.2841661649608669,
"grad_norm": 0.6527459812005079,
"learning_rate": 3.125736497325464e-05,
"loss": 0.7208917140960693,
"step": 1067
},
{
"epoch": 1.2853702588801927,
"grad_norm": 0.7151546194251819,
"learning_rate": 3.116511695981157e-05,
"loss": 0.5722376108169556,
"step": 1068
},
{
"epoch": 1.2865743527995184,
"grad_norm": 0.6583949661812686,
"learning_rate": 3.107294359958628e-05,
"loss": 0.559638500213623,
"step": 1069
},
{
"epoch": 1.287778446718844,
"grad_norm": 0.6587475851250851,
"learning_rate": 3.098084525791356e-05,
"loss": 0.6353858709335327,
"step": 1070
},
{
"epoch": 1.2889825406381699,
"grad_norm": 0.652538682115768,
"learning_rate": 3.0888822299830854e-05,
"loss": 0.6150951385498047,
"step": 1071
},
{
"epoch": 1.2901866345574955,
"grad_norm": 0.6586896874481181,
"learning_rate": 3.079687509007682e-05,
"loss": 0.6220561861991882,
"step": 1072
},
{
"epoch": 1.2913907284768211,
"grad_norm": 0.6552350351155425,
"learning_rate": 3.070500399308987e-05,
"loss": 0.675493597984314,
"step": 1073
},
{
"epoch": 1.292594822396147,
"grad_norm": 0.6060971894323066,
"learning_rate": 3.0613209373006745e-05,
"loss": 0.5046559572219849,
"step": 1074
},
{
"epoch": 1.2937989163154726,
"grad_norm": 0.6233886266058096,
"learning_rate": 3.052149159366109e-05,
"loss": 0.6072496175765991,
"step": 1075
},
{
"epoch": 1.2950030102347982,
"grad_norm": 0.6311730480045631,
"learning_rate": 3.0429851018581955e-05,
"loss": 0.5360444784164429,
"step": 1076
},
{
"epoch": 1.296207104154124,
"grad_norm": 0.6939180477433627,
"learning_rate": 3.0338288010992398e-05,
"loss": 0.5549867153167725,
"step": 1077
},
{
"epoch": 1.2974111980734497,
"grad_norm": 0.6219878385314985,
"learning_rate": 3.024680293380804e-05,
"loss": 0.5277044177055359,
"step": 1078
},
{
"epoch": 1.2986152919927754,
"grad_norm": 0.6140075443332544,
"learning_rate": 3.0155396149635585e-05,
"loss": 0.571826159954071,
"step": 1079
},
{
"epoch": 1.2998193859121012,
"grad_norm": 0.6707947564189365,
"learning_rate": 3.0064068020771486e-05,
"loss": 0.6137290596961975,
"step": 1080
},
{
"epoch": 1.3010234798314269,
"grad_norm": 0.7065984932280848,
"learning_rate": 2.9972818909200396e-05,
"loss": 0.6794793009757996,
"step": 1081
},
{
"epoch": 1.3022275737507525,
"grad_norm": 0.6139158900775069,
"learning_rate": 2.988164917659375e-05,
"loss": 0.5482147336006165,
"step": 1082
},
{
"epoch": 1.3034316676700783,
"grad_norm": 0.7064635472503787,
"learning_rate": 2.979055918430842e-05,
"loss": 0.6055623292922974,
"step": 1083
},
{
"epoch": 1.304635761589404,
"grad_norm": 0.6979920492052719,
"learning_rate": 2.9699549293385176e-05,
"loss": 0.6477084159851074,
"step": 1084
},
{
"epoch": 1.3058398555087296,
"grad_norm": 0.7089875208013002,
"learning_rate": 2.9608619864547337e-05,
"loss": 0.5869253873825073,
"step": 1085
},
{
"epoch": 1.3070439494280555,
"grad_norm": 0.5833004568834135,
"learning_rate": 2.95177712581993e-05,
"loss": 0.5308574438095093,
"step": 1086
},
{
"epoch": 1.308248043347381,
"grad_norm": 0.6853013870438273,
"learning_rate": 2.9427003834425075e-05,
"loss": 0.6042900681495667,
"step": 1087
},
{
"epoch": 1.3094521372667067,
"grad_norm": 0.5952406502795179,
"learning_rate": 2.9336317952986946e-05,
"loss": 0.5966813564300537,
"step": 1088
},
{
"epoch": 1.3106562311860326,
"grad_norm": 0.5739932817506537,
"learning_rate": 2.924571397332398e-05,
"loss": 0.5839823484420776,
"step": 1089
},
{
"epoch": 1.3118603251053582,
"grad_norm": 0.6937286005428881,
"learning_rate": 2.915519225455065e-05,
"loss": 0.6247888803482056,
"step": 1090
},
{
"epoch": 1.3130644190246838,
"grad_norm": 0.6289172233700483,
"learning_rate": 2.906475315545532e-05,
"loss": 0.582708477973938,
"step": 1091
},
{
"epoch": 1.3142685129440097,
"grad_norm": 0.648225763610444,
"learning_rate": 2.8974397034498917e-05,
"loss": 0.532221257686615,
"step": 1092
},
{
"epoch": 1.3154726068633353,
"grad_norm": 0.6334032293391493,
"learning_rate": 2.8884124249813526e-05,
"loss": 0.582087516784668,
"step": 1093
},
{
"epoch": 1.316676700782661,
"grad_norm": 0.6097615056252118,
"learning_rate": 2.879393515920087e-05,
"loss": 0.5749963521957397,
"step": 1094
},
{
"epoch": 1.3178807947019868,
"grad_norm": 0.6754307122551152,
"learning_rate": 2.8703830120130976e-05,
"loss": 0.6027169227600098,
"step": 1095
},
{
"epoch": 1.3190848886213125,
"grad_norm": 0.6522477036329215,
"learning_rate": 2.8613809489740662e-05,
"loss": 0.5660452246665955,
"step": 1096
},
{
"epoch": 1.320288982540638,
"grad_norm": 0.6597895217196497,
"learning_rate": 2.8523873624832247e-05,
"loss": 0.6122511029243469,
"step": 1097
},
{
"epoch": 1.321493076459964,
"grad_norm": 0.6648075572953821,
"learning_rate": 2.8434022881872103e-05,
"loss": 0.6023167967796326,
"step": 1098
},
{
"epoch": 1.3226971703792896,
"grad_norm": 0.6484160772837273,
"learning_rate": 2.8344257616989144e-05,
"loss": 0.6276779770851135,
"step": 1099
},
{
"epoch": 1.3239012642986152,
"grad_norm": 0.737747975114455,
"learning_rate": 2.8254578185973556e-05,
"loss": 0.6234055757522583,
"step": 1100
},
{
"epoch": 1.325105358217941,
"grad_norm": 0.6489780855502737,
"learning_rate": 2.8164984944275242e-05,
"loss": 0.5611757040023804,
"step": 1101
},
{
"epoch": 1.3263094521372667,
"grad_norm": 0.7117233686951562,
"learning_rate": 2.8075478247002518e-05,
"loss": 0.5399574637413025,
"step": 1102
},
{
"epoch": 1.3275135460565926,
"grad_norm": 0.6407050506409804,
"learning_rate": 2.7986058448920728e-05,
"loss": 0.5748783946037292,
"step": 1103
},
{
"epoch": 1.3287176399759182,
"grad_norm": 0.667933099755268,
"learning_rate": 2.7896725904450748e-05,
"loss": 0.6935731768608093,
"step": 1104
},
{
"epoch": 1.3299217338952438,
"grad_norm": 0.6860475556498444,
"learning_rate": 2.7807480967667576e-05,
"loss": 0.7096599340438843,
"step": 1105
},
{
"epoch": 1.3311258278145695,
"grad_norm": 0.6332362503131266,
"learning_rate": 2.7718323992299e-05,
"loss": 0.5988775491714478,
"step": 1106
},
{
"epoch": 1.3323299217338953,
"grad_norm": 0.6014992532536464,
"learning_rate": 2.7629255331724225e-05,
"loss": 0.5470643639564514,
"step": 1107
},
{
"epoch": 1.333534015653221,
"grad_norm": 0.5861204952940166,
"learning_rate": 2.7540275338972343e-05,
"loss": 0.518825352191925,
"step": 1108
},
{
"epoch": 1.3347381095725468,
"grad_norm": 0.6738797248522155,
"learning_rate": 2.7451384366721057e-05,
"loss": 0.6365870237350464,
"step": 1109
},
{
"epoch": 1.3359422034918724,
"grad_norm": 0.6801087675041407,
"learning_rate": 2.7362582767295158e-05,
"loss": 0.6318552494049072,
"step": 1110
},
{
"epoch": 1.337146297411198,
"grad_norm": 0.6750940307980116,
"learning_rate": 2.7273870892665253e-05,
"loss": 0.5534715056419373,
"step": 1111
},
{
"epoch": 1.3383503913305237,
"grad_norm": 0.6013798363241066,
"learning_rate": 2.718524909444635e-05,
"loss": 0.529080867767334,
"step": 1112
},
{
"epoch": 1.3395544852498495,
"grad_norm": 0.615240489839793,
"learning_rate": 2.709671772389639e-05,
"loss": 0.5268905162811279,
"step": 1113
},
{
"epoch": 1.3407585791691752,
"grad_norm": 0.6916244806655932,
"learning_rate": 2.7008277131914916e-05,
"loss": 0.7194089293479919,
"step": 1114
},
{
"epoch": 1.341962673088501,
"grad_norm": 0.6586961779117202,
"learning_rate": 2.691992766904161e-05,
"loss": 0.5073974132537842,
"step": 1115
},
{
"epoch": 1.3431667670078267,
"grad_norm": 0.6538582927789098,
"learning_rate": 2.683166968545503e-05,
"loss": 0.5996333360671997,
"step": 1116
},
{
"epoch": 1.3443708609271523,
"grad_norm": 0.630993703763636,
"learning_rate": 2.6743503530971138e-05,
"loss": 0.6223669648170471,
"step": 1117
},
{
"epoch": 1.345574954846478,
"grad_norm": 0.5828107330626588,
"learning_rate": 2.6655429555041922e-05,
"loss": 0.5439039468765259,
"step": 1118
},
{
"epoch": 1.3467790487658038,
"grad_norm": 0.956766479698746,
"learning_rate": 2.656744810675397e-05,
"loss": 0.621475338935852,
"step": 1119
},
{
"epoch": 1.3479831426851294,
"grad_norm": 0.6867270570515961,
"learning_rate": 2.6479559534827168e-05,
"loss": 0.6019476652145386,
"step": 1120
},
{
"epoch": 1.3491872366044553,
"grad_norm": 0.6197122045694213,
"learning_rate": 2.6391764187613277e-05,
"loss": 0.5331425070762634,
"step": 1121
},
{
"epoch": 1.350391330523781,
"grad_norm": 0.6590947385881453,
"learning_rate": 2.6304062413094588e-05,
"loss": 0.5331645011901855,
"step": 1122
},
{
"epoch": 1.3515954244431065,
"grad_norm": 0.6661403360540019,
"learning_rate": 2.6216454558882486e-05,
"loss": 0.559219479560852,
"step": 1123
},
{
"epoch": 1.3527995183624322,
"grad_norm": 0.6209742376495854,
"learning_rate": 2.6128940972216044e-05,
"loss": 0.5601733922958374,
"step": 1124
},
{
"epoch": 1.354003612281758,
"grad_norm": 0.6299069556714798,
"learning_rate": 2.604152199996077e-05,
"loss": 0.6254225969314575,
"step": 1125
},
{
"epoch": 1.3552077062010837,
"grad_norm": 0.6211292902545004,
"learning_rate": 2.5954197988607133e-05,
"loss": 0.5158826112747192,
"step": 1126
},
{
"epoch": 1.3564118001204095,
"grad_norm": 0.6890145800181504,
"learning_rate": 2.586696928426926e-05,
"loss": 0.6442182064056396,
"step": 1127
},
{
"epoch": 1.3576158940397351,
"grad_norm": 0.7574739136778598,
"learning_rate": 2.5779836232683442e-05,
"loss": 0.6257715821266174,
"step": 1128
},
{
"epoch": 1.3588199879590608,
"grad_norm": 0.7222962521399886,
"learning_rate": 2.5692799179206906e-05,
"loss": 0.5151680111885071,
"step": 1129
},
{
"epoch": 1.3600240818783864,
"grad_norm": 0.6764668123220063,
"learning_rate": 2.5605858468816358e-05,
"loss": 0.5849635004997253,
"step": 1130
},
{
"epoch": 1.3612281757977123,
"grad_norm": 0.68913282009391,
"learning_rate": 2.5519014446106682e-05,
"loss": 0.6753305792808533,
"step": 1131
},
{
"epoch": 1.362432269717038,
"grad_norm": 0.615830753952669,
"learning_rate": 2.5432267455289503e-05,
"loss": 0.4868830442428589,
"step": 1132
},
{
"epoch": 1.3636363636363638,
"grad_norm": 0.6358775421404815,
"learning_rate": 2.5345617840191828e-05,
"loss": 0.5930942296981812,
"step": 1133
},
{
"epoch": 1.3648404575556894,
"grad_norm": 0.6783047897821732,
"learning_rate": 2.5259065944254746e-05,
"loss": 0.6682963371276855,
"step": 1134
},
{
"epoch": 1.366044551475015,
"grad_norm": 0.8039581825606367,
"learning_rate": 2.5172612110532012e-05,
"loss": 0.5669816732406616,
"step": 1135
},
{
"epoch": 1.3672486453943407,
"grad_norm": 0.6634970884500128,
"learning_rate": 2.5086256681688745e-05,
"loss": 0.5131814479827881,
"step": 1136
},
{
"epoch": 1.3684527393136665,
"grad_norm": 0.6637549272097328,
"learning_rate": 2.500000000000001e-05,
"loss": 0.6064308881759644,
"step": 1137
},
{
"epoch": 1.3696568332329921,
"grad_norm": 0.6874956301642076,
"learning_rate": 2.491384240734943e-05,
"loss": 0.6396753787994385,
"step": 1138
},
{
"epoch": 1.370860927152318,
"grad_norm": 0.7435580202739259,
"learning_rate": 2.4827784245227937e-05,
"loss": 0.5631410479545593,
"step": 1139
},
{
"epoch": 1.3720650210716436,
"grad_norm": 0.6683288312051743,
"learning_rate": 2.4741825854732344e-05,
"loss": 0.6863007545471191,
"step": 1140
},
{
"epoch": 1.3732691149909693,
"grad_norm": 0.6739448022666904,
"learning_rate": 2.4655967576564064e-05,
"loss": 0.6584625244140625,
"step": 1141
},
{
"epoch": 1.374473208910295,
"grad_norm": 0.6100124389564276,
"learning_rate": 2.4570209751027623e-05,
"loss": 0.5866068005561829,
"step": 1142
},
{
"epoch": 1.3756773028296208,
"grad_norm": 0.7068260860149022,
"learning_rate": 2.4484552718029448e-05,
"loss": 0.5527026653289795,
"step": 1143
},
{
"epoch": 1.3768813967489464,
"grad_norm": 0.7130310928948262,
"learning_rate": 2.439899681707646e-05,
"loss": 0.6723543405532837,
"step": 1144
},
{
"epoch": 1.3780854906682722,
"grad_norm": 0.6784531758296957,
"learning_rate": 2.4313542387274717e-05,
"loss": 0.5644799470901489,
"step": 1145
},
{
"epoch": 1.3792895845875979,
"grad_norm": 0.7129481328571383,
"learning_rate": 2.4228189767328173e-05,
"loss": 0.5912647843360901,
"step": 1146
},
{
"epoch": 1.3804936785069235,
"grad_norm": 0.6444292423961997,
"learning_rate": 2.4142939295537126e-05,
"loss": 0.5156216025352478,
"step": 1147
},
{
"epoch": 1.3816977724262491,
"grad_norm": 0.6970413746112917,
"learning_rate": 2.405779130979709e-05,
"loss": 0.5808528065681458,
"step": 1148
},
{
"epoch": 1.382901866345575,
"grad_norm": 0.5978220622551951,
"learning_rate": 2.3972746147597335e-05,
"loss": 0.4604603946208954,
"step": 1149
},
{
"epoch": 1.3841059602649006,
"grad_norm": 0.6748388586558529,
"learning_rate": 2.388780414601959e-05,
"loss": 0.5760262608528137,
"step": 1150
},
{
"epoch": 1.3853100541842265,
"grad_norm": 0.707341335363254,
"learning_rate": 2.380296564173674e-05,
"loss": 0.5673799514770508,
"step": 1151
},
{
"epoch": 1.3865141481035521,
"grad_norm": 0.6505915334242497,
"learning_rate": 2.3718230971011386e-05,
"loss": 0.5893689393997192,
"step": 1152
},
{
"epoch": 1.3877182420228777,
"grad_norm": 0.5960802185045463,
"learning_rate": 2.3633600469694606e-05,
"loss": 0.5265970230102539,
"step": 1153
},
{
"epoch": 1.3889223359422034,
"grad_norm": 0.6562878017537669,
"learning_rate": 2.3549074473224612e-05,
"loss": 0.6685402989387512,
"step": 1154
},
{
"epoch": 1.3901264298615292,
"grad_norm": 0.6714792590660233,
"learning_rate": 2.3464653316625385e-05,
"loss": 0.6193652153015137,
"step": 1155
},
{
"epoch": 1.3913305237808549,
"grad_norm": 0.652333247187965,
"learning_rate": 2.3380337334505374e-05,
"loss": 0.5320765972137451,
"step": 1156
},
{
"epoch": 1.3925346177001807,
"grad_norm": 0.6694326966899518,
"learning_rate": 2.3296126861056167e-05,
"loss": 0.7142533659934998,
"step": 1157
},
{
"epoch": 1.3937387116195064,
"grad_norm": 0.6755620258584408,
"learning_rate": 2.3212022230051154e-05,
"loss": 0.6147712469100952,
"step": 1158
},
{
"epoch": 1.394942805538832,
"grad_norm": 0.6526357166615497,
"learning_rate": 2.3128023774844194e-05,
"loss": 0.57289719581604,
"step": 1159
},
{
"epoch": 1.3961468994581576,
"grad_norm": 0.6763701082565594,
"learning_rate": 2.3044131828368386e-05,
"loss": 0.610175609588623,
"step": 1160
},
{
"epoch": 1.3973509933774835,
"grad_norm": 0.6664594873329567,
"learning_rate": 2.2960346723134575e-05,
"loss": 0.5026771426200867,
"step": 1161
},
{
"epoch": 1.398555087296809,
"grad_norm": 0.6139734265290661,
"learning_rate": 2.2876668791230193e-05,
"loss": 0.5563870668411255,
"step": 1162
},
{
"epoch": 1.399759181216135,
"grad_norm": 0.65603675473224,
"learning_rate": 2.2793098364317868e-05,
"loss": 0.6313542127609253,
"step": 1163
},
{
"epoch": 1.4009632751354606,
"grad_norm": 0.6628406945641241,
"learning_rate": 2.2709635773634113e-05,
"loss": 0.5728123784065247,
"step": 1164
},
{
"epoch": 1.4021673690547862,
"grad_norm": 0.6610390146768919,
"learning_rate": 2.2626281349988103e-05,
"loss": 0.590998113155365,
"step": 1165
},
{
"epoch": 1.4033714629741119,
"grad_norm": 0.7093784263783569,
"learning_rate": 2.254303542376016e-05,
"loss": 0.578934371471405,
"step": 1166
},
{
"epoch": 1.4045755568934377,
"grad_norm": 0.6722358307812577,
"learning_rate": 2.2459898324900662e-05,
"loss": 0.5327061414718628,
"step": 1167
},
{
"epoch": 1.4057796508127633,
"grad_norm": 0.6720764767134005,
"learning_rate": 2.2376870382928607e-05,
"loss": 0.6328048706054688,
"step": 1168
},
{
"epoch": 1.4069837447320892,
"grad_norm": 0.6072052751210661,
"learning_rate": 2.2293951926930356e-05,
"loss": 0.5807158946990967,
"step": 1169
},
{
"epoch": 1.4081878386514148,
"grad_norm": 0.7239788229686193,
"learning_rate": 2.2211143285558312e-05,
"loss": 0.6846528053283691,
"step": 1170
},
{
"epoch": 1.4093919325707405,
"grad_norm": 0.6983348518524077,
"learning_rate": 2.2128444787029618e-05,
"loss": 0.6707192063331604,
"step": 1171
},
{
"epoch": 1.410596026490066,
"grad_norm": 0.7229792916497166,
"learning_rate": 2.204585675912486e-05,
"loss": 0.5944390296936035,
"step": 1172
},
{
"epoch": 1.411800120409392,
"grad_norm": 0.68467720786428,
"learning_rate": 2.1963379529186768e-05,
"loss": 0.5784805417060852,
"step": 1173
},
{
"epoch": 1.4130042143287176,
"grad_norm": 0.6441557349730764,
"learning_rate": 2.1881013424118922e-05,
"loss": 0.5677670836448669,
"step": 1174
},
{
"epoch": 1.4142083082480434,
"grad_norm": 0.6645841903588222,
"learning_rate": 2.1798758770384442e-05,
"loss": 0.6499326229095459,
"step": 1175
},
{
"epoch": 1.415412402167369,
"grad_norm": 0.6514998788694832,
"learning_rate": 2.171661589400471e-05,
"loss": 0.559750497341156,
"step": 1176
},
{
"epoch": 1.4166164960866947,
"grad_norm": 0.6827011585978081,
"learning_rate": 2.1634585120558078e-05,
"loss": 0.5925350189208984,
"step": 1177
},
{
"epoch": 1.4178205900060203,
"grad_norm": 0.6948830521197485,
"learning_rate": 2.1552666775178548e-05,
"loss": 0.5711615681648254,
"step": 1178
},
{
"epoch": 1.4190246839253462,
"grad_norm": 0.6671986963240619,
"learning_rate": 2.1470861182554534e-05,
"loss": 0.5662252306938171,
"step": 1179
},
{
"epoch": 1.4202287778446718,
"grad_norm": 0.6851768329023631,
"learning_rate": 2.138916866692754e-05,
"loss": 0.683719277381897,
"step": 1180
},
{
"epoch": 1.4214328717639977,
"grad_norm": 0.7266715462148113,
"learning_rate": 2.1307589552090866e-05,
"loss": 0.6323405504226685,
"step": 1181
},
{
"epoch": 1.4226369656833233,
"grad_norm": 0.6143920744082411,
"learning_rate": 2.1226124161388354e-05,
"loss": 0.6116073131561279,
"step": 1182
},
{
"epoch": 1.423841059602649,
"grad_norm": 0.679159711504491,
"learning_rate": 2.1144772817713103e-05,
"loss": 0.7198662161827087,
"step": 1183
},
{
"epoch": 1.4250451535219746,
"grad_norm": 0.6757307692020434,
"learning_rate": 2.106353584350616e-05,
"loss": 0.6091858148574829,
"step": 1184
},
{
"epoch": 1.4262492474413004,
"grad_norm": 0.6961418661890654,
"learning_rate": 2.0982413560755272e-05,
"loss": 0.6085526347160339,
"step": 1185
},
{
"epoch": 1.427453341360626,
"grad_norm": 0.7679493274935778,
"learning_rate": 2.0901406290993598e-05,
"loss": 0.6127029061317444,
"step": 1186
},
{
"epoch": 1.428657435279952,
"grad_norm": 0.6327120331998163,
"learning_rate": 2.0820514355298432e-05,
"loss": 0.5237135887145996,
"step": 1187
},
{
"epoch": 1.4298615291992776,
"grad_norm": 0.6777932530712685,
"learning_rate": 2.073973807428995e-05,
"loss": 0.5015468597412109,
"step": 1188
},
{
"epoch": 1.4310656231186032,
"grad_norm": 0.6570717414906649,
"learning_rate": 2.0659077768129898e-05,
"loss": 0.5828713178634644,
"step": 1189
},
{
"epoch": 1.4322697170379288,
"grad_norm": 0.6689525315217688,
"learning_rate": 2.0578533756520375e-05,
"loss": 0.6016378402709961,
"step": 1190
},
{
"epoch": 1.4334738109572547,
"grad_norm": 0.6626194231928022,
"learning_rate": 2.049810635870251e-05,
"loss": 0.5744947791099548,
"step": 1191
},
{
"epoch": 1.4346779048765803,
"grad_norm": 0.654853715590486,
"learning_rate": 2.0417795893455265e-05,
"loss": 0.4999788999557495,
"step": 1192
},
{
"epoch": 1.4358819987959062,
"grad_norm": 0.6631813785864065,
"learning_rate": 2.0337602679094092e-05,
"loss": 0.640974223613739,
"step": 1193
},
{
"epoch": 1.4370860927152318,
"grad_norm": 0.6656503589041513,
"learning_rate": 2.0257527033469735e-05,
"loss": 0.6686643958091736,
"step": 1194
},
{
"epoch": 1.4382901866345574,
"grad_norm": 0.7195749887839146,
"learning_rate": 2.0177569273966945e-05,
"loss": 0.6436607837677002,
"step": 1195
},
{
"epoch": 1.439494280553883,
"grad_norm": 0.6224049363358496,
"learning_rate": 2.009772971750322e-05,
"loss": 0.6621146202087402,
"step": 1196
},
{
"epoch": 1.440698374473209,
"grad_norm": 0.6870667027942325,
"learning_rate": 2.0018008680527556e-05,
"loss": 0.6553419232368469,
"step": 1197
},
{
"epoch": 1.4419024683925346,
"grad_norm": 0.752594057427092,
"learning_rate": 1.9938406479019183e-05,
"loss": 0.5890825390815735,
"step": 1198
},
{
"epoch": 1.4431065623118604,
"grad_norm": 0.6295842476537755,
"learning_rate": 1.985892342848633e-05,
"loss": 0.5660571455955505,
"step": 1199
},
{
"epoch": 1.444310656231186,
"grad_norm": 0.6784299946025513,
"learning_rate": 1.9779559843964957e-05,
"loss": 0.6301198601722717,
"step": 1200
},
{
"epoch": 1.4455147501505117,
"grad_norm": 0.7184243923628549,
"learning_rate": 1.9700316040017515e-05,
"loss": 0.5631546378135681,
"step": 1201
},
{
"epoch": 1.4467188440698375,
"grad_norm": 0.705782713183386,
"learning_rate": 1.9621192330731684e-05,
"loss": 0.5814705491065979,
"step": 1202
},
{
"epoch": 1.4479229379891632,
"grad_norm": 0.7357316231438419,
"learning_rate": 1.954218902971917e-05,
"loss": 0.6509934663772583,
"step": 1203
},
{
"epoch": 1.4491270319084888,
"grad_norm": 0.6211989488690487,
"learning_rate": 1.9463306450114416e-05,
"loss": 0.5680814385414124,
"step": 1204
},
{
"epoch": 1.4503311258278146,
"grad_norm": 0.689236766464551,
"learning_rate": 1.938454490457337e-05,
"loss": 0.6668490171432495,
"step": 1205
},
{
"epoch": 1.4515352197471403,
"grad_norm": 0.761382009234011,
"learning_rate": 1.9305904705272275e-05,
"loss": 0.7243137359619141,
"step": 1206
},
{
"epoch": 1.452739313666466,
"grad_norm": 0.6726820235370389,
"learning_rate": 1.922738616390639e-05,
"loss": 0.6479090452194214,
"step": 1207
},
{
"epoch": 1.4539434075857918,
"grad_norm": 0.6927010843022338,
"learning_rate": 1.9148989591688786e-05,
"loss": 0.5434556007385254,
"step": 1208
},
{
"epoch": 1.4551475015051174,
"grad_norm": 0.6286082992167814,
"learning_rate": 1.9070715299349105e-05,
"loss": 0.6021962761878967,
"step": 1209
},
{
"epoch": 1.456351595424443,
"grad_norm": 0.6374198604384195,
"learning_rate": 1.8992563597132323e-05,
"loss": 0.6144254207611084,
"step": 1210
},
{
"epoch": 1.457555689343769,
"grad_norm": 0.7752040368710191,
"learning_rate": 1.8914534794797513e-05,
"loss": 0.6344243288040161,
"step": 1211
},
{
"epoch": 1.4587597832630945,
"grad_norm": 0.679800091113536,
"learning_rate": 1.8836629201616635e-05,
"loss": 0.5787262320518494,
"step": 1212
},
{
"epoch": 1.4599638771824202,
"grad_norm": 0.6538717861512221,
"learning_rate": 1.8758847126373303e-05,
"loss": 0.6544387340545654,
"step": 1213
},
{
"epoch": 1.461167971101746,
"grad_norm": 0.681978026013326,
"learning_rate": 1.8681188877361555e-05,
"loss": 0.6070996522903442,
"step": 1214
},
{
"epoch": 1.4623720650210716,
"grad_norm": 0.6455730314080899,
"learning_rate": 1.8603654762384642e-05,
"loss": 0.5753454566001892,
"step": 1215
},
{
"epoch": 1.4635761589403973,
"grad_norm": 0.6832422399485334,
"learning_rate": 1.85262450887538e-05,
"loss": 0.5864018201828003,
"step": 1216
},
{
"epoch": 1.4647802528597231,
"grad_norm": 0.6343117890568124,
"learning_rate": 1.8448960163287034e-05,
"loss": 0.5790785551071167,
"step": 1217
},
{
"epoch": 1.4659843467790488,
"grad_norm": 0.6645088122577267,
"learning_rate": 1.83718002923079e-05,
"loss": 0.6346677541732788,
"step": 1218
},
{
"epoch": 1.4671884406983744,
"grad_norm": 0.863960139767857,
"learning_rate": 1.8294765781644285e-05,
"loss": 0.5457351803779602,
"step": 1219
},
{
"epoch": 1.4683925346177003,
"grad_norm": 0.6434124997628975,
"learning_rate": 1.821785693662724e-05,
"loss": 0.5659875869750977,
"step": 1220
},
{
"epoch": 1.4695966285370259,
"grad_norm": 0.6846377550127212,
"learning_rate": 1.8141074062089657e-05,
"loss": 0.5451058149337769,
"step": 1221
},
{
"epoch": 1.4708007224563515,
"grad_norm": 0.695185719098649,
"learning_rate": 1.8064417462365226e-05,
"loss": 0.5791363716125488,
"step": 1222
},
{
"epoch": 1.4720048163756774,
"grad_norm": 0.6200127760471036,
"learning_rate": 1.798788744128707e-05,
"loss": 0.625119686126709,
"step": 1223
},
{
"epoch": 1.473208910295003,
"grad_norm": 0.647067603031808,
"learning_rate": 1.791148430218666e-05,
"loss": 0.5558938384056091,
"step": 1224
},
{
"epoch": 1.4744130042143286,
"grad_norm": 0.6434041757670454,
"learning_rate": 1.7835208347892535e-05,
"loss": 0.5209598541259766,
"step": 1225
},
{
"epoch": 1.4756170981336545,
"grad_norm": 0.6768163709486766,
"learning_rate": 1.775905988072909e-05,
"loss": 0.601038932800293,
"step": 1226
},
{
"epoch": 1.4768211920529801,
"grad_norm": 0.6413983452096825,
"learning_rate": 1.768303920251551e-05,
"loss": 0.5623950958251953,
"step": 1227
},
{
"epoch": 1.4780252859723058,
"grad_norm": 0.6684889175160904,
"learning_rate": 1.7607146614564418e-05,
"loss": 0.610763430595398,
"step": 1228
},
{
"epoch": 1.4792293798916316,
"grad_norm": 0.6646920531869194,
"learning_rate": 1.753138241768075e-05,
"loss": 0.6074750423431396,
"step": 1229
},
{
"epoch": 1.4804334738109572,
"grad_norm": 0.6835837698413788,
"learning_rate": 1.7455746912160564e-05,
"loss": 0.615565299987793,
"step": 1230
},
{
"epoch": 1.4816375677302829,
"grad_norm": 0.6498789722824945,
"learning_rate": 1.7380240397789836e-05,
"loss": 0.5732934474945068,
"step": 1231
},
{
"epoch": 1.4828416616496087,
"grad_norm": 0.680806756887946,
"learning_rate": 1.7304863173843283e-05,
"loss": 0.68631911277771,
"step": 1232
},
{
"epoch": 1.4840457555689344,
"grad_norm": 0.6975195256011921,
"learning_rate": 1.722961553908318e-05,
"loss": 0.6374117136001587,
"step": 1233
},
{
"epoch": 1.4852498494882602,
"grad_norm": 0.6956467250143136,
"learning_rate": 1.7154497791758157e-05,
"loss": 0.5769176483154297,
"step": 1234
},
{
"epoch": 1.4864539434075859,
"grad_norm": 0.7363676191301148,
"learning_rate": 1.707951022960199e-05,
"loss": 0.5100231766700745,
"step": 1235
},
{
"epoch": 1.4876580373269115,
"grad_norm": 0.6796322902287394,
"learning_rate": 1.700465314983254e-05,
"loss": 0.6184688806533813,
"step": 1236
},
{
"epoch": 1.4888621312462371,
"grad_norm": 0.6941299138170351,
"learning_rate": 1.6929926849150428e-05,
"loss": 0.6970233917236328,
"step": 1237
},
{
"epoch": 1.490066225165563,
"grad_norm": 0.7001168068471418,
"learning_rate": 1.6855331623737958e-05,
"loss": 0.6311406493186951,
"step": 1238
},
{
"epoch": 1.4912703190848886,
"grad_norm": 0.6645492248463752,
"learning_rate": 1.6780867769257898e-05,
"loss": 0.5672847628593445,
"step": 1239
},
{
"epoch": 1.4924744130042145,
"grad_norm": 0.6797801656176244,
"learning_rate": 1.6706535580852267e-05,
"loss": 0.645459771156311,
"step": 1240
},
{
"epoch": 1.49367850692354,
"grad_norm": 0.6868214191099717,
"learning_rate": 1.6632335353141333e-05,
"loss": 0.6530009508132935,
"step": 1241
},
{
"epoch": 1.4948826008428657,
"grad_norm": 0.6501647723803492,
"learning_rate": 1.6558267380222224e-05,
"loss": 0.5365598201751709,
"step": 1242
},
{
"epoch": 1.4960866947621914,
"grad_norm": 0.648368798889899,
"learning_rate": 1.6484331955667947e-05,
"loss": 0.6006693840026855,
"step": 1243
},
{
"epoch": 1.4972907886815172,
"grad_norm": 0.7484073803647112,
"learning_rate": 1.6410529372526057e-05,
"loss": 0.5684210062026978,
"step": 1244
},
{
"epoch": 1.4984948826008428,
"grad_norm": 0.6875511798711057,
"learning_rate": 1.6336859923317643e-05,
"loss": 0.6225722432136536,
"step": 1245
},
{
"epoch": 1.4996989765201687,
"grad_norm": 0.7548689908076737,
"learning_rate": 1.6263323900036126e-05,
"loss": 0.6161872148513794,
"step": 1246
},
{
"epoch": 1.5009030704394943,
"grad_norm": 0.6794880455222678,
"learning_rate": 1.6189921594146046e-05,
"loss": 0.5291401743888855,
"step": 1247
},
{
"epoch": 1.50210716435882,
"grad_norm": 0.6910873883895179,
"learning_rate": 1.6116653296581975e-05,
"loss": 0.6173139810562134,
"step": 1248
},
{
"epoch": 1.5033112582781456,
"grad_norm": 0.6557935458779608,
"learning_rate": 1.6043519297747285e-05,
"loss": 0.5563411116600037,
"step": 1249
},
{
"epoch": 1.5045153521974715,
"grad_norm": 0.6196590618016037,
"learning_rate": 1.5970519887513073e-05,
"loss": 0.6068015694618225,
"step": 1250
},
{
"epoch": 1.505719446116797,
"grad_norm": 0.6555417209864466,
"learning_rate": 1.5897655355217038e-05,
"loss": 0.5607477426528931,
"step": 1251
},
{
"epoch": 1.506923540036123,
"grad_norm": 0.6513591496358758,
"learning_rate": 1.5824925989662216e-05,
"loss": 0.5354853868484497,
"step": 1252
},
{
"epoch": 1.5081276339554486,
"grad_norm": 0.6864021439176216,
"learning_rate": 1.5752332079115932e-05,
"loss": 0.5652515888214111,
"step": 1253
},
{
"epoch": 1.5093317278747742,
"grad_norm": 0.6462104553767192,
"learning_rate": 1.5679873911308597e-05,
"loss": 0.5760456323623657,
"step": 1254
},
{
"epoch": 1.5105358217940998,
"grad_norm": 0.5920722939954449,
"learning_rate": 1.56075517734326e-05,
"loss": 0.49027904868125916,
"step": 1255
},
{
"epoch": 1.5117399157134257,
"grad_norm": 0.6109690781215307,
"learning_rate": 1.5535365952141233e-05,
"loss": 0.5099217891693115,
"step": 1256
},
{
"epoch": 1.5129440096327513,
"grad_norm": 0.6448781186889819,
"learning_rate": 1.5463316733547434e-05,
"loss": 0.5549823045730591,
"step": 1257
},
{
"epoch": 1.5141481035520772,
"grad_norm": 0.7241234229442524,
"learning_rate": 1.5391404403222676e-05,
"loss": 0.5943726301193237,
"step": 1258
},
{
"epoch": 1.5153521974714028,
"grad_norm": 0.6620022620542522,
"learning_rate": 1.5319629246195917e-05,
"loss": 0.5054406523704529,
"step": 1259
},
{
"epoch": 1.5165562913907285,
"grad_norm": 0.6841163729400295,
"learning_rate": 1.5247991546952428e-05,
"loss": 0.6340876817703247,
"step": 1260
},
{
"epoch": 1.517760385310054,
"grad_norm": 0.6536235321860999,
"learning_rate": 1.5176491589432628e-05,
"loss": 0.5980866551399231,
"step": 1261
},
{
"epoch": 1.51896447922938,
"grad_norm": 0.7072002026747592,
"learning_rate": 1.5105129657031009e-05,
"loss": 0.5912050008773804,
"step": 1262
},
{
"epoch": 1.5201685731487056,
"grad_norm": 0.7518022002323144,
"learning_rate": 1.5033906032594958e-05,
"loss": 0.5446956157684326,
"step": 1263
},
{
"epoch": 1.5213726670680314,
"grad_norm": 0.616787689480563,
"learning_rate": 1.4962820998423683e-05,
"loss": 0.5494934320449829,
"step": 1264
},
{
"epoch": 1.522576760987357,
"grad_norm": 0.6903510168817233,
"learning_rate": 1.4891874836267127e-05,
"loss": 0.6838136315345764,
"step": 1265
},
{
"epoch": 1.5237808549066827,
"grad_norm": 0.5750532179822861,
"learning_rate": 1.4821067827324753e-05,
"loss": 0.5225636959075928,
"step": 1266
},
{
"epoch": 1.5249849488260083,
"grad_norm": 0.6497995243005985,
"learning_rate": 1.4750400252244511e-05,
"loss": 0.5877646207809448,
"step": 1267
},
{
"epoch": 1.5261890427453342,
"grad_norm": 0.6410420397877039,
"learning_rate": 1.4679872391121651e-05,
"loss": 0.5297002196311951,
"step": 1268
},
{
"epoch": 1.5273931366646598,
"grad_norm": 0.7002210097985194,
"learning_rate": 1.4609484523497696e-05,
"loss": 0.5521526336669922,
"step": 1269
},
{
"epoch": 1.5285972305839857,
"grad_norm": 0.6633293289955295,
"learning_rate": 1.4539236928359318e-05,
"loss": 0.5375245809555054,
"step": 1270
},
{
"epoch": 1.5298013245033113,
"grad_norm": 0.6984242296793548,
"learning_rate": 1.4469129884137177e-05,
"loss": 0.5870165228843689,
"step": 1271
},
{
"epoch": 1.531005418422637,
"grad_norm": 0.6265819271455159,
"learning_rate": 1.4399163668704829e-05,
"loss": 0.5064294338226318,
"step": 1272
},
{
"epoch": 1.5322095123419626,
"grad_norm": 0.6005110980118469,
"learning_rate": 1.4329338559377691e-05,
"loss": 0.5902453660964966,
"step": 1273
},
{
"epoch": 1.5334136062612884,
"grad_norm": 0.6313797290602909,
"learning_rate": 1.4259654832911867e-05,
"loss": 0.5448839664459229,
"step": 1274
},
{
"epoch": 1.534617700180614,
"grad_norm": 0.6737155419119836,
"learning_rate": 1.4190112765503117e-05,
"loss": 0.5859847664833069,
"step": 1275
},
{
"epoch": 1.53582179409994,
"grad_norm": 0.6485814439643522,
"learning_rate": 1.412071263278571e-05,
"loss": 0.5323665738105774,
"step": 1276
},
{
"epoch": 1.5370258880192655,
"grad_norm": 0.684534293923461,
"learning_rate": 1.4051454709831308e-05,
"loss": 0.5728932619094849,
"step": 1277
},
{
"epoch": 1.5382299819385912,
"grad_norm": 0.6788308121489335,
"learning_rate": 1.398233927114797e-05,
"loss": 0.5794711112976074,
"step": 1278
},
{
"epoch": 1.5394340758579168,
"grad_norm": 0.7020969052887231,
"learning_rate": 1.3913366590678966e-05,
"loss": 0.5733374357223511,
"step": 1279
},
{
"epoch": 1.5406381697772427,
"grad_norm": 0.7129657171420142,
"learning_rate": 1.38445369418018e-05,
"loss": 0.6412773728370667,
"step": 1280
},
{
"epoch": 1.5418422636965683,
"grad_norm": 0.7981258082670758,
"learning_rate": 1.377585059732701e-05,
"loss": 0.5884507298469543,
"step": 1281
},
{
"epoch": 1.5430463576158941,
"grad_norm": 0.67599894319694,
"learning_rate": 1.370730782949713e-05,
"loss": 0.617125391960144,
"step": 1282
},
{
"epoch": 1.5442504515352198,
"grad_norm": 0.6131897663905975,
"learning_rate": 1.3638908909985621e-05,
"loss": 0.5849452018737793,
"step": 1283
},
{
"epoch": 1.5454545454545454,
"grad_norm": 0.698854961826203,
"learning_rate": 1.3570654109895853e-05,
"loss": 0.4832739531993866,
"step": 1284
},
{
"epoch": 1.546658639373871,
"grad_norm": 0.6471604421208978,
"learning_rate": 1.3502543699759917e-05,
"loss": 0.6543363332748413,
"step": 1285
},
{
"epoch": 1.547862733293197,
"grad_norm": 0.6242702470342856,
"learning_rate": 1.3434577949537592e-05,
"loss": 0.5883569717407227,
"step": 1286
},
{
"epoch": 1.5490668272125225,
"grad_norm": 0.6238250088321301,
"learning_rate": 1.336675712861532e-05,
"loss": 0.5618208646774292,
"step": 1287
},
{
"epoch": 1.5502709211318484,
"grad_norm": 0.6612496125172123,
"learning_rate": 1.3299081505805088e-05,
"loss": 0.5929635167121887,
"step": 1288
},
{
"epoch": 1.551475015051174,
"grad_norm": 0.6692758960448262,
"learning_rate": 1.3231551349343413e-05,
"loss": 0.5423666834831238,
"step": 1289
},
{
"epoch": 1.5526791089704997,
"grad_norm": 0.6752769648200759,
"learning_rate": 1.3164166926890226e-05,
"loss": 0.5197986364364624,
"step": 1290
},
{
"epoch": 1.5538832028898253,
"grad_norm": 0.669092768800806,
"learning_rate": 1.3096928505527811e-05,
"loss": 0.5313164591789246,
"step": 1291
},
{
"epoch": 1.5550872968091511,
"grad_norm": 0.7182716714011864,
"learning_rate": 1.3029836351759795e-05,
"loss": 0.5484466552734375,
"step": 1292
},
{
"epoch": 1.5562913907284768,
"grad_norm": 0.6431119213649995,
"learning_rate": 1.296289073151004e-05,
"loss": 0.5544946193695068,
"step": 1293
},
{
"epoch": 1.5574954846478026,
"grad_norm": 0.6058203791827846,
"learning_rate": 1.2896091910121666e-05,
"loss": 0.6230573654174805,
"step": 1294
},
{
"epoch": 1.5586995785671283,
"grad_norm": 0.658413335183263,
"learning_rate": 1.2829440152355915e-05,
"loss": 0.6586100459098816,
"step": 1295
},
{
"epoch": 1.559903672486454,
"grad_norm": 0.7016584672362768,
"learning_rate": 1.2762935722391089e-05,
"loss": 0.5778795480728149,
"step": 1296
},
{
"epoch": 1.5611077664057795,
"grad_norm": 0.6971910577092758,
"learning_rate": 1.2696578883821614e-05,
"loss": 0.6585246324539185,
"step": 1297
},
{
"epoch": 1.5623118603251054,
"grad_norm": 0.706176096353826,
"learning_rate": 1.263036989965688e-05,
"loss": 0.5676591396331787,
"step": 1298
},
{
"epoch": 1.563515954244431,
"grad_norm": 0.6466527762936504,
"learning_rate": 1.2564309032320315e-05,
"loss": 0.5486147999763489,
"step": 1299
},
{
"epoch": 1.5647200481637569,
"grad_norm": 0.7894485392513839,
"learning_rate": 1.2498396543648195e-05,
"loss": 0.5645613670349121,
"step": 1300
},
{
"epoch": 1.5659241420830825,
"grad_norm": 0.7000651764045831,
"learning_rate": 1.2432632694888724e-05,
"loss": 0.4964786469936371,
"step": 1301
},
{
"epoch": 1.5671282360024081,
"grad_norm": 0.6786788880580892,
"learning_rate": 1.2367017746700977e-05,
"loss": 0.5440781116485596,
"step": 1302
},
{
"epoch": 1.5683323299217338,
"grad_norm": 0.6737203989678393,
"learning_rate": 1.2301551959153813e-05,
"loss": 0.5844026803970337,
"step": 1303
},
{
"epoch": 1.5695364238410596,
"grad_norm": 0.668549947601081,
"learning_rate": 1.223623559172496e-05,
"loss": 0.678715169429779,
"step": 1304
},
{
"epoch": 1.5707405177603853,
"grad_norm": 0.6699944490261841,
"learning_rate": 1.217106890329981e-05,
"loss": 0.48936742544174194,
"step": 1305
},
{
"epoch": 1.5719446116797111,
"grad_norm": 0.6981875741840513,
"learning_rate": 1.2106052152170561e-05,
"loss": 0.5707062482833862,
"step": 1306
},
{
"epoch": 1.5731487055990367,
"grad_norm": 0.6747467637430019,
"learning_rate": 1.204118559603511e-05,
"loss": 0.6040956974029541,
"step": 1307
},
{
"epoch": 1.5743527995183624,
"grad_norm": 0.6910940201995631,
"learning_rate": 1.1976469491996028e-05,
"loss": 0.6196696758270264,
"step": 1308
},
{
"epoch": 1.575556893437688,
"grad_norm": 0.6870109501772094,
"learning_rate": 1.1911904096559589e-05,
"loss": 0.5733761191368103,
"step": 1309
},
{
"epoch": 1.5767609873570139,
"grad_norm": 0.6586659405360901,
"learning_rate": 1.1847489665634709e-05,
"loss": 0.5610096454620361,
"step": 1310
},
{
"epoch": 1.5779650812763397,
"grad_norm": 0.7460735946310162,
"learning_rate": 1.1783226454531942e-05,
"loss": 0.6038789749145508,
"step": 1311
},
{
"epoch": 1.5791691751956654,
"grad_norm": 0.7395726996909909,
"learning_rate": 1.1719114717962476e-05,
"loss": 0.6537466049194336,
"step": 1312
},
{
"epoch": 1.580373269114991,
"grad_norm": 0.6499783317009531,
"learning_rate": 1.1655154710037153e-05,
"loss": 0.5639055967330933,
"step": 1313
},
{
"epoch": 1.5815773630343166,
"grad_norm": 0.681242545134431,
"learning_rate": 1.1591346684265359e-05,
"loss": 0.593967616558075,
"step": 1314
},
{
"epoch": 1.5827814569536423,
"grad_norm": 0.6531301486627312,
"learning_rate": 1.1527690893554156e-05,
"loss": 0.531619131565094,
"step": 1315
},
{
"epoch": 1.583985550872968,
"grad_norm": 0.7163893807357503,
"learning_rate": 1.1464187590207171e-05,
"loss": 0.5939278602600098,
"step": 1316
},
{
"epoch": 1.585189644792294,
"grad_norm": 0.7463494625207332,
"learning_rate": 1.1400837025923649e-05,
"loss": 0.5914647579193115,
"step": 1317
},
{
"epoch": 1.5863937387116196,
"grad_norm": 0.6410015091159691,
"learning_rate": 1.1337639451797494e-05,
"loss": 0.5456799864768982,
"step": 1318
},
{
"epoch": 1.5875978326309452,
"grad_norm": 0.6713142729133758,
"learning_rate": 1.1274595118316134e-05,
"loss": 0.5442759990692139,
"step": 1319
},
{
"epoch": 1.5888019265502709,
"grad_norm": 0.6467986073462993,
"learning_rate": 1.1211704275359675e-05,
"loss": 0.48860234022140503,
"step": 1320
},
{
"epoch": 1.5900060204695965,
"grad_norm": 0.7519139615127933,
"learning_rate": 1.1148967172199848e-05,
"loss": 0.4999341666698456,
"step": 1321
},
{
"epoch": 1.5912101143889223,
"grad_norm": 0.6808399870270115,
"learning_rate": 1.1086384057499011e-05,
"loss": 0.5638273358345032,
"step": 1322
},
{
"epoch": 1.5924142083082482,
"grad_norm": 0.6954877809436288,
"learning_rate": 1.1023955179309203e-05,
"loss": 0.6613055467605591,
"step": 1323
},
{
"epoch": 1.5936183022275738,
"grad_norm": 0.6903795960686822,
"learning_rate": 1.0961680785071116e-05,
"loss": 0.5615702271461487,
"step": 1324
},
{
"epoch": 1.5948223961468995,
"grad_norm": 0.6750172122223536,
"learning_rate": 1.0899561121613145e-05,
"loss": 0.5752872228622437,
"step": 1325
},
{
"epoch": 1.596026490066225,
"grad_norm": 0.7785301940553601,
"learning_rate": 1.0837596435150398e-05,
"loss": 0.5692745447158813,
"step": 1326
},
{
"epoch": 1.5972305839855507,
"grad_norm": 0.6494681462447573,
"learning_rate": 1.0775786971283725e-05,
"loss": 0.5178340673446655,
"step": 1327
},
{
"epoch": 1.5984346779048766,
"grad_norm": 0.6744443029597672,
"learning_rate": 1.0714132974998731e-05,
"loss": 0.6879583597183228,
"step": 1328
},
{
"epoch": 1.5996387718242024,
"grad_norm": 0.677952563994588,
"learning_rate": 1.065263469066483e-05,
"loss": 0.5413495302200317,
"step": 1329
},
{
"epoch": 1.600842865743528,
"grad_norm": 0.6537301805420873,
"learning_rate": 1.0591292362034255e-05,
"loss": 0.5920218825340271,
"step": 1330
},
{
"epoch": 1.6020469596628537,
"grad_norm": 0.6469373008066952,
"learning_rate": 1.0530106232241099e-05,
"loss": 0.5061972141265869,
"step": 1331
},
{
"epoch": 1.6032510535821793,
"grad_norm": 0.6708685902250847,
"learning_rate": 1.0469076543800355e-05,
"loss": 0.49697619676589966,
"step": 1332
},
{
"epoch": 1.604455147501505,
"grad_norm": 0.6993408295725457,
"learning_rate": 1.0408203538606948e-05,
"loss": 0.6032021641731262,
"step": 1333
},
{
"epoch": 1.6056592414208308,
"grad_norm": 0.6393911060563842,
"learning_rate": 1.0347487457934768e-05,
"loss": 0.5719197988510132,
"step": 1334
},
{
"epoch": 1.6068633353401567,
"grad_norm": 0.7226199767221391,
"learning_rate": 1.0286928542435754e-05,
"loss": 0.6187776923179626,
"step": 1335
},
{
"epoch": 1.6080674292594823,
"grad_norm": 0.642305535476551,
"learning_rate": 1.0226527032138878e-05,
"loss": 0.613179087638855,
"step": 1336
},
{
"epoch": 1.609271523178808,
"grad_norm": 0.673311015402003,
"learning_rate": 1.0166283166449253e-05,
"loss": 0.5710175037384033,
"step": 1337
},
{
"epoch": 1.6104756170981336,
"grad_norm": 0.632179375454808,
"learning_rate": 1.0106197184147143e-05,
"loss": 0.5210025310516357,
"step": 1338
},
{
"epoch": 1.6116797110174592,
"grad_norm": 0.6957067778486582,
"learning_rate": 1.0046269323387036e-05,
"loss": 0.6410612463951111,
"step": 1339
},
{
"epoch": 1.612883804936785,
"grad_norm": 0.6287930862009652,
"learning_rate": 9.986499821696693e-06,
"loss": 0.5632354617118835,
"step": 1340
},
{
"epoch": 1.614087898856111,
"grad_norm": 0.622387312688554,
"learning_rate": 9.926888915976219e-06,
"loss": 0.4282957911491394,
"step": 1341
},
{
"epoch": 1.6152919927754366,
"grad_norm": 0.8669900709105164,
"learning_rate": 9.867436842497103e-06,
"loss": 0.5236736536026001,
"step": 1342
},
{
"epoch": 1.6164960866947622,
"grad_norm": 0.7350159046721363,
"learning_rate": 9.808143836901284e-06,
"loss": 0.593165934085846,
"step": 1343
},
{
"epoch": 1.6177001806140878,
"grad_norm": 0.6869753376200917,
"learning_rate": 9.74901013420026e-06,
"loss": 0.5582044720649719,
"step": 1344
},
{
"epoch": 1.6189042745334135,
"grad_norm": 0.7091818776395036,
"learning_rate": 9.69003596877408e-06,
"loss": 0.7148651480674744,
"step": 1345
},
{
"epoch": 1.6201083684527393,
"grad_norm": 0.684701088586173,
"learning_rate": 9.631221574370491e-06,
"loss": 0.5752968192100525,
"step": 1346
},
{
"epoch": 1.6213124623720652,
"grad_norm": 0.6805443936750752,
"learning_rate": 9.572567184103958e-06,
"loss": 0.5721158981323242,
"step": 1347
},
{
"epoch": 1.6225165562913908,
"grad_norm": 0.7358892263289364,
"learning_rate": 9.514073030454762e-06,
"loss": 0.5454210042953491,
"step": 1348
},
{
"epoch": 1.6237206502107164,
"grad_norm": 0.706456329071073,
"learning_rate": 9.455739345268088e-06,
"loss": 0.5805840492248535,
"step": 1349
},
{
"epoch": 1.624924744130042,
"grad_norm": 0.737656503682747,
"learning_rate": 9.39756635975308e-06,
"loss": 0.7166793942451477,
"step": 1350
},
{
"epoch": 1.6261288380493677,
"grad_norm": 0.6758146231670337,
"learning_rate": 9.339554304481951e-06,
"loss": 0.5964561700820923,
"step": 1351
},
{
"epoch": 1.6273329319686936,
"grad_norm": 0.6025978301665178,
"learning_rate": 9.281703409389043e-06,
"loss": 0.5290141105651855,
"step": 1352
},
{
"epoch": 1.6285370258880194,
"grad_norm": 0.7145707303310174,
"learning_rate": 9.224013903769946e-06,
"loss": 0.5035989284515381,
"step": 1353
},
{
"epoch": 1.629741119807345,
"grad_norm": 0.6565918156785405,
"learning_rate": 9.166486016280562e-06,
"loss": 0.6106424927711487,
"step": 1354
},
{
"epoch": 1.6309452137266707,
"grad_norm": 0.5599536234506536,
"learning_rate": 9.109119974936214e-06,
"loss": 0.46169233322143555,
"step": 1355
},
{
"epoch": 1.6321493076459963,
"grad_norm": 0.7207701549427209,
"learning_rate": 9.051916007110734e-06,
"loss": 0.659675121307373,
"step": 1356
},
{
"epoch": 1.633353401565322,
"grad_norm": 0.6601045291460892,
"learning_rate": 8.994874339535569e-06,
"loss": 0.5975547432899475,
"step": 1357
},
{
"epoch": 1.6345574954846478,
"grad_norm": 0.7374844627180586,
"learning_rate": 8.937995198298876e-06,
"loss": 0.596606433391571,
"step": 1358
},
{
"epoch": 1.6357615894039736,
"grad_norm": 0.6794216656947183,
"learning_rate": 8.881278808844634e-06,
"loss": 0.6227086782455444,
"step": 1359
},
{
"epoch": 1.6369656833232993,
"grad_norm": 0.7310840030823296,
"learning_rate": 8.824725395971745e-06,
"loss": 0.6003000736236572,
"step": 1360
},
{
"epoch": 1.638169777242625,
"grad_norm": 0.6313339460251725,
"learning_rate": 8.768335183833132e-06,
"loss": 0.5620385408401489,
"step": 1361
},
{
"epoch": 1.6393738711619505,
"grad_norm": 0.735750852863019,
"learning_rate": 8.712108395934881e-06,
"loss": 0.6669098734855652,
"step": 1362
},
{
"epoch": 1.6405779650812762,
"grad_norm": 0.6513943005675726,
"learning_rate": 8.656045255135314e-06,
"loss": 0.5156469941139221,
"step": 1363
},
{
"epoch": 1.641782059000602,
"grad_norm": 0.7068955426438429,
"learning_rate": 8.600145983644148e-06,
"loss": 0.578862190246582,
"step": 1364
},
{
"epoch": 1.642986152919928,
"grad_norm": 0.6293611458383194,
"learning_rate": 8.544410803021574e-06,
"loss": 0.5733503699302673,
"step": 1365
},
{
"epoch": 1.6441902468392535,
"grad_norm": 0.7237414922994768,
"learning_rate": 8.488839934177422e-06,
"loss": 0.5321314334869385,
"step": 1366
},
{
"epoch": 1.6453943407585792,
"grad_norm": 0.6599014377859602,
"learning_rate": 8.43343359737025e-06,
"loss": 0.5745839476585388,
"step": 1367
},
{
"epoch": 1.6465984346779048,
"grad_norm": 0.6674019927935357,
"learning_rate": 8.37819201220647e-06,
"loss": 0.6119813919067383,
"step": 1368
},
{
"epoch": 1.6478025285972304,
"grad_norm": 0.6646405574559846,
"learning_rate": 8.323115397639513e-06,
"loss": 0.5637862682342529,
"step": 1369
},
{
"epoch": 1.6490066225165563,
"grad_norm": 0.6603179812112369,
"learning_rate": 8.268203971968924e-06,
"loss": 0.5550099015235901,
"step": 1370
},
{
"epoch": 1.6502107164358821,
"grad_norm": 0.599521665605895,
"learning_rate": 8.213457952839527e-06,
"loss": 0.5197803378105164,
"step": 1371
},
{
"epoch": 1.6514148103552078,
"grad_norm": 0.7614423749809117,
"learning_rate": 8.158877557240529e-06,
"loss": 0.6506602168083191,
"step": 1372
},
{
"epoch": 1.6526189042745334,
"grad_norm": 0.699033560188023,
"learning_rate": 8.104463001504697e-06,
"loss": 0.6326109766960144,
"step": 1373
},
{
"epoch": 1.653822998193859,
"grad_norm": 0.6623964529587189,
"learning_rate": 8.05021450130744e-06,
"loss": 0.6270788311958313,
"step": 1374
},
{
"epoch": 1.6550270921131849,
"grad_norm": 0.6281369047593645,
"learning_rate": 7.996132271666062e-06,
"loss": 0.5085688233375549,
"step": 1375
},
{
"epoch": 1.6562311860325105,
"grad_norm": 0.7100130784152302,
"learning_rate": 7.942216526938795e-06,
"loss": 0.6412090063095093,
"step": 1376
},
{
"epoch": 1.6574352799518364,
"grad_norm": 0.720921172066268,
"learning_rate": 7.888467480824003e-06,
"loss": 0.6398442983627319,
"step": 1377
},
{
"epoch": 1.658639373871162,
"grad_norm": 0.7341990833868546,
"learning_rate": 7.83488534635936e-06,
"loss": 0.6106516122817993,
"step": 1378
},
{
"epoch": 1.6598434677904876,
"grad_norm": 0.6821836584715316,
"learning_rate": 7.78147033592091e-06,
"loss": 0.6357331275939941,
"step": 1379
},
{
"epoch": 1.6610475617098133,
"grad_norm": 0.685946591607971,
"learning_rate": 7.728222661222372e-06,
"loss": 0.5777029395103455,
"step": 1380
},
{
"epoch": 1.6622516556291391,
"grad_norm": 0.6891670202059894,
"learning_rate": 7.675142533314172e-06,
"loss": 0.6097980737686157,
"step": 1381
},
{
"epoch": 1.6634557495484648,
"grad_norm": 0.6667769182542659,
"learning_rate": 7.622230162582656e-06,
"loss": 0.5467538833618164,
"step": 1382
},
{
"epoch": 1.6646598434677906,
"grad_norm": 0.6889746140530888,
"learning_rate": 7.569485758749262e-06,
"loss": 0.5487406849861145,
"step": 1383
},
{
"epoch": 1.6658639373871162,
"grad_norm": 0.65882966833545,
"learning_rate": 7.5169095308696865e-06,
"loss": 0.48606792092323303,
"step": 1384
},
{
"epoch": 1.6670680313064419,
"grad_norm": 0.7348759467898891,
"learning_rate": 7.4645016873330366e-06,
"loss": 0.5994123220443726,
"step": 1385
},
{
"epoch": 1.6682721252257675,
"grad_norm": 0.6509418908408136,
"learning_rate": 7.412262435861023e-06,
"loss": 0.5139130353927612,
"step": 1386
},
{
"epoch": 1.6694762191450934,
"grad_norm": 0.681233192126596,
"learning_rate": 7.360191983507153e-06,
"loss": 0.6144307255744934,
"step": 1387
},
{
"epoch": 1.670680313064419,
"grad_norm": 0.7257999803609204,
"learning_rate": 7.308290536655832e-06,
"loss": 0.5968878865242004,
"step": 1388
},
{
"epoch": 1.6718844069837449,
"grad_norm": 0.697738830575009,
"learning_rate": 7.256558301021665e-06,
"loss": 0.5899304747581482,
"step": 1389
},
{
"epoch": 1.6730885009030705,
"grad_norm": 0.7287346454906112,
"learning_rate": 7.2049954816485465e-06,
"loss": 0.5723932981491089,
"step": 1390
},
{
"epoch": 1.6742925948223961,
"grad_norm": 0.6922298902594373,
"learning_rate": 7.153602282908877e-06,
"loss": 0.5339739322662354,
"step": 1391
},
{
"epoch": 1.6754966887417218,
"grad_norm": 0.7005947853476255,
"learning_rate": 7.102378908502783e-06,
"loss": 0.614686906337738,
"step": 1392
},
{
"epoch": 1.6767007826610476,
"grad_norm": 0.6338169439501814,
"learning_rate": 7.051325561457217e-06,
"loss": 0.5092990398406982,
"step": 1393
},
{
"epoch": 1.6779048765803732,
"grad_norm": 0.6466781558640031,
"learning_rate": 7.000442444125299e-06,
"loss": 0.503153383731842,
"step": 1394
},
{
"epoch": 1.679108970499699,
"grad_norm": 0.668788103838701,
"learning_rate": 6.949729758185386e-06,
"loss": 0.5666583776473999,
"step": 1395
},
{
"epoch": 1.6803130644190247,
"grad_norm": 0.6946575939430065,
"learning_rate": 6.899187704640325e-06,
"loss": 0.5352746844291687,
"step": 1396
},
{
"epoch": 1.6815171583383504,
"grad_norm": 0.799147133363122,
"learning_rate": 6.848816483816672e-06,
"loss": 0.5560225248336792,
"step": 1397
},
{
"epoch": 1.682721252257676,
"grad_norm": 0.6879951160707153,
"learning_rate": 6.798616295363819e-06,
"loss": 0.5174521207809448,
"step": 1398
},
{
"epoch": 1.6839253461770018,
"grad_norm": 0.7052209200272666,
"learning_rate": 6.748587338253337e-06,
"loss": 0.6782345175743103,
"step": 1399
},
{
"epoch": 1.6851294400963275,
"grad_norm": 0.7362031149865755,
"learning_rate": 6.698729810778065e-06,
"loss": 0.6185617446899414,
"step": 1400
},
{
"epoch": 1.6863335340156533,
"grad_norm": 0.7131545728436787,
"learning_rate": 6.649043910551394e-06,
"loss": 0.5694747567176819,
"step": 1401
},
{
"epoch": 1.687537627934979,
"grad_norm": 0.7013307903011614,
"learning_rate": 6.59952983450643e-06,
"loss": 0.5360475778579712,
"step": 1402
},
{
"epoch": 1.6887417218543046,
"grad_norm": 0.7313975893593249,
"learning_rate": 6.550187778895245e-06,
"loss": 0.554715633392334,
"step": 1403
},
{
"epoch": 1.6899458157736302,
"grad_norm": 0.6248799726855148,
"learning_rate": 6.501017939288145e-06,
"loss": 0.5001120567321777,
"step": 1404
},
{
"epoch": 1.691149909692956,
"grad_norm": 0.7635923193270232,
"learning_rate": 6.452020510572798e-06,
"loss": 0.5844587087631226,
"step": 1405
},
{
"epoch": 1.6923540036122817,
"grad_norm": 0.8672468095630893,
"learning_rate": 6.4031956869535446e-06,
"loss": 0.4895361065864563,
"step": 1406
},
{
"epoch": 1.6935580975316076,
"grad_norm": 0.7019485128193276,
"learning_rate": 6.354543661950552e-06,
"loss": 0.6151779294013977,
"step": 1407
},
{
"epoch": 1.6947621914509332,
"grad_norm": 0.6413354451667462,
"learning_rate": 6.3060646283991106e-06,
"loss": 0.4792034327983856,
"step": 1408
},
{
"epoch": 1.6959662853702588,
"grad_norm": 0.7400460139659534,
"learning_rate": 6.257758778448886e-06,
"loss": 0.5733928680419922,
"step": 1409
},
{
"epoch": 1.6971703792895845,
"grad_norm": 0.6924835847483812,
"learning_rate": 6.20962630356306e-06,
"loss": 0.587846040725708,
"step": 1410
},
{
"epoch": 1.6983744732089103,
"grad_norm": 0.6792062461473658,
"learning_rate": 6.1616673945176836e-06,
"loss": 0.5776320695877075,
"step": 1411
},
{
"epoch": 1.699578567128236,
"grad_norm": 0.7374799355809821,
"learning_rate": 6.113882241400804e-06,
"loss": 0.6477718353271484,
"step": 1412
},
{
"epoch": 1.7007826610475618,
"grad_norm": 0.778627782734248,
"learning_rate": 6.0662710336118425e-06,
"loss": 0.5870640277862549,
"step": 1413
},
{
"epoch": 1.7019867549668874,
"grad_norm": 0.697885294998567,
"learning_rate": 6.018833959860753e-06,
"loss": 0.5413687825202942,
"step": 1414
},
{
"epoch": 1.703190848886213,
"grad_norm": 0.6131464328004254,
"learning_rate": 5.971571208167298e-06,
"loss": 0.5253198146820068,
"step": 1415
},
{
"epoch": 1.7043949428055387,
"grad_norm": 0.7124343955537419,
"learning_rate": 5.9244829658602845e-06,
"loss": 0.5268945097923279,
"step": 1416
},
{
"epoch": 1.7055990367248646,
"grad_norm": 0.7371279372996249,
"learning_rate": 5.87756941957685e-06,
"loss": 0.5446827411651611,
"step": 1417
},
{
"epoch": 1.7068031306441902,
"grad_norm": 0.7247041443586373,
"learning_rate": 5.830830755261751e-06,
"loss": 0.6003855466842651,
"step": 1418
},
{
"epoch": 1.708007224563516,
"grad_norm": 0.6784247281296139,
"learning_rate": 5.784267158166551e-06,
"loss": 0.5970977544784546,
"step": 1419
},
{
"epoch": 1.7092113184828417,
"grad_norm": 0.7067472045358276,
"learning_rate": 5.737878812848929e-06,
"loss": 0.688196063041687,
"step": 1420
},
{
"epoch": 1.7104154124021673,
"grad_norm": 0.731618554019377,
"learning_rate": 5.691665903171939e-06,
"loss": 0.6994629502296448,
"step": 1421
},
{
"epoch": 1.711619506321493,
"grad_norm": 0.6589123791710798,
"learning_rate": 5.6456286123032884e-06,
"loss": 0.6352708339691162,
"step": 1422
},
{
"epoch": 1.7128236002408188,
"grad_norm": 0.6365405554814509,
"learning_rate": 5.599767122714627e-06,
"loss": 0.5105507969856262,
"step": 1423
},
{
"epoch": 1.7140276941601444,
"grad_norm": 0.6764045969911787,
"learning_rate": 5.554081616180801e-06,
"loss": 0.645250678062439,
"step": 1424
},
{
"epoch": 1.7152317880794703,
"grad_norm": 0.6774797351412346,
"learning_rate": 5.508572273779089e-06,
"loss": 0.5692201256752014,
"step": 1425
},
{
"epoch": 1.716435881998796,
"grad_norm": 0.7162737130025575,
"learning_rate": 5.4632392758885985e-06,
"loss": 0.5054356455802917,
"step": 1426
},
{
"epoch": 1.7176399759181216,
"grad_norm": 0.7437270462318553,
"learning_rate": 5.418082802189434e-06,
"loss": 0.5896080732345581,
"step": 1427
},
{
"epoch": 1.7188440698374472,
"grad_norm": 0.655533046664382,
"learning_rate": 5.37310303166208e-06,
"loss": 0.5693444609642029,
"step": 1428
},
{
"epoch": 1.720048163756773,
"grad_norm": 0.6190555705713453,
"learning_rate": 5.328300142586629e-06,
"loss": 0.4733911156654358,
"step": 1429
},
{
"epoch": 1.7212522576760987,
"grad_norm": 0.7238456789470515,
"learning_rate": 5.283674312542064e-06,
"loss": 0.6123558878898621,
"step": 1430
},
{
"epoch": 1.7224563515954245,
"grad_norm": 0.663890497997296,
"learning_rate": 5.239225718405616e-06,
"loss": 0.6106290817260742,
"step": 1431
},
{
"epoch": 1.7236604455147502,
"grad_norm": 0.6818691796419768,
"learning_rate": 5.194954536352021e-06,
"loss": 0.6485813856124878,
"step": 1432
},
{
"epoch": 1.7248645394340758,
"grad_norm": 0.6300589817718674,
"learning_rate": 5.150860941852842e-06,
"loss": 0.5373544096946716,
"step": 1433
},
{
"epoch": 1.7260686333534014,
"grad_norm": 0.6532037585377417,
"learning_rate": 5.106945109675759e-06,
"loss": 0.548999011516571,
"step": 1434
},
{
"epoch": 1.7272727272727273,
"grad_norm": 0.6811476109415601,
"learning_rate": 5.0632072138838584e-06,
"loss": 0.6281917691230774,
"step": 1435
},
{
"epoch": 1.728476821192053,
"grad_norm": 0.6972572080478234,
"learning_rate": 5.019647427834978e-06,
"loss": 0.59413743019104,
"step": 1436
},
{
"epoch": 1.7296809151113788,
"grad_norm": 0.661799751509244,
"learning_rate": 4.9762659241810285e-06,
"loss": 0.5843008756637573,
"step": 1437
},
{
"epoch": 1.7308850090307044,
"grad_norm": 0.8659419030131159,
"learning_rate": 4.933062874867267e-06,
"loss": 0.6004117727279663,
"step": 1438
},
{
"epoch": 1.73208910295003,
"grad_norm": 0.7619365375619501,
"learning_rate": 4.8900384511316234e-06,
"loss": 0.6101161241531372,
"step": 1439
},
{
"epoch": 1.7332931968693557,
"grad_norm": 0.7687675218930419,
"learning_rate": 4.847192823504054e-06,
"loss": 0.620930016040802,
"step": 1440
},
{
"epoch": 1.7344972907886815,
"grad_norm": 0.6590959462066254,
"learning_rate": 4.804526161805833e-06,
"loss": 0.5227761268615723,
"step": 1441
},
{
"epoch": 1.7357013847080074,
"grad_norm": 0.6513758806530793,
"learning_rate": 4.7620386351489185e-06,
"loss": 0.5266942381858826,
"step": 1442
},
{
"epoch": 1.736905478627333,
"grad_norm": 0.6879255547887067,
"learning_rate": 4.7197304119352355e-06,
"loss": 0.6544359922409058,
"step": 1443
},
{
"epoch": 1.7381095725466587,
"grad_norm": 0.6605301633143231,
"learning_rate": 4.6776016598560124e-06,
"loss": 0.5252479314804077,
"step": 1444
},
{
"epoch": 1.7393136664659843,
"grad_norm": 0.7144320006304289,
"learning_rate": 4.635652545891156e-06,
"loss": 0.5649040937423706,
"step": 1445
},
{
"epoch": 1.74051776038531,
"grad_norm": 0.6693059953084293,
"learning_rate": 4.5938832363085495e-06,
"loss": 0.6241734623908997,
"step": 1446
},
{
"epoch": 1.7417218543046358,
"grad_norm": 0.7297204441588223,
"learning_rate": 4.552293896663451e-06,
"loss": 0.5992779731750488,
"step": 1447
},
{
"epoch": 1.7429259482239616,
"grad_norm": 0.7963775154058986,
"learning_rate": 4.510884691797751e-06,
"loss": 0.6442251801490784,
"step": 1448
},
{
"epoch": 1.7441300421432873,
"grad_norm": 0.6363117180758585,
"learning_rate": 4.469655785839377e-06,
"loss": 0.5162489414215088,
"step": 1449
},
{
"epoch": 1.745334136062613,
"grad_norm": 0.712831553241241,
"learning_rate": 4.428607342201635e-06,
"loss": 0.49798154830932617,
"step": 1450
},
{
"epoch": 1.7465382299819385,
"grad_norm": 0.6606384761286709,
"learning_rate": 4.387739523582551e-06,
"loss": 0.5387279987335205,
"step": 1451
},
{
"epoch": 1.7477423239012642,
"grad_norm": 0.6740116224699733,
"learning_rate": 4.347052491964265e-06,
"loss": 0.6302919983863831,
"step": 1452
},
{
"epoch": 1.74894641782059,
"grad_norm": 0.6463122078337356,
"learning_rate": 4.306546408612306e-06,
"loss": 0.5648490786552429,
"step": 1453
},
{
"epoch": 1.7501505117399159,
"grad_norm": 0.639843667179173,
"learning_rate": 4.26622143407503e-06,
"loss": 0.6026947498321533,
"step": 1454
},
{
"epoch": 1.7513546056592415,
"grad_norm": 0.7543801349162795,
"learning_rate": 4.226077728182959e-06,
"loss": 0.7119262218475342,
"step": 1455
},
{
"epoch": 1.7525586995785671,
"grad_norm": 0.6804514900474806,
"learning_rate": 4.186115450048128e-06,
"loss": 0.531987726688385,
"step": 1456
},
{
"epoch": 1.7537627934978928,
"grad_norm": 0.7156377356812621,
"learning_rate": 4.1463347580635195e-06,
"loss": 0.5670576095581055,
"step": 1457
},
{
"epoch": 1.7549668874172184,
"grad_norm": 0.6809341862091342,
"learning_rate": 4.106735809902324e-06,
"loss": 0.5247880220413208,
"step": 1458
},
{
"epoch": 1.7561709813365443,
"grad_norm": 0.7650789820662952,
"learning_rate": 4.0673187625174195e-06,
"loss": 0.5779381990432739,
"step": 1459
},
{
"epoch": 1.7573750752558701,
"grad_norm": 0.695708303509977,
"learning_rate": 4.028083772140689e-06,
"loss": 0.641172468662262,
"step": 1460
},
{
"epoch": 1.7585791691751957,
"grad_norm": 0.7230143002792645,
"learning_rate": 3.989030994282434e-06,
"loss": 0.6113120317459106,
"step": 1461
},
{
"epoch": 1.7597832630945214,
"grad_norm": 0.7409818026797323,
"learning_rate": 3.950160583730761e-06,
"loss": 0.5901451110839844,
"step": 1462
},
{
"epoch": 1.760987357013847,
"grad_norm": 0.6895041829720423,
"learning_rate": 3.911472694550916e-06,
"loss": 0.6240176558494568,
"step": 1463
},
{
"epoch": 1.7621914509331726,
"grad_norm": 0.6529779300382468,
"learning_rate": 3.872967480084727e-06,
"loss": 0.6033583283424377,
"step": 1464
},
{
"epoch": 1.7633955448524985,
"grad_norm": 0.68212653410366,
"learning_rate": 3.834645092949973e-06,
"loss": 0.5997433662414551,
"step": 1465
},
{
"epoch": 1.7645996387718244,
"grad_norm": 0.7005451911969677,
"learning_rate": 3.796505685039825e-06,
"loss": 0.6138598322868347,
"step": 1466
},
{
"epoch": 1.76580373269115,
"grad_norm": 0.6415625167401455,
"learning_rate": 3.758549407522144e-06,
"loss": 0.6035425662994385,
"step": 1467
},
{
"epoch": 1.7670078266104756,
"grad_norm": 0.687504172592215,
"learning_rate": 3.720776410838983e-06,
"loss": 0.5945162177085876,
"step": 1468
},
{
"epoch": 1.7682119205298013,
"grad_norm": 0.6456334680135388,
"learning_rate": 3.6831868447059324e-06,
"loss": 0.6363914608955383,
"step": 1469
},
{
"epoch": 1.7694160144491269,
"grad_norm": 0.6900446693261378,
"learning_rate": 3.645780858111547e-06,
"loss": 0.5730565786361694,
"step": 1470
},
{
"epoch": 1.7706201083684527,
"grad_norm": 0.7394950510818776,
"learning_rate": 3.6085585993167805e-06,
"loss": 0.6147753596305847,
"step": 1471
},
{
"epoch": 1.7718242022877786,
"grad_norm": 0.6520440526913354,
"learning_rate": 3.5715202158543125e-06,
"loss": 0.5810579061508179,
"step": 1472
},
{
"epoch": 1.7730282962071042,
"grad_norm": 0.7094833291959122,
"learning_rate": 3.5346658545280795e-06,
"loss": 0.5907972455024719,
"step": 1473
},
{
"epoch": 1.7742323901264299,
"grad_norm": 0.6344277681212521,
"learning_rate": 3.4979956614125953e-06,
"loss": 0.5560249090194702,
"step": 1474
},
{
"epoch": 1.7754364840457555,
"grad_norm": 0.7300026392268726,
"learning_rate": 3.4615097818524235e-06,
"loss": 0.5345858335494995,
"step": 1475
},
{
"epoch": 1.7766405779650811,
"grad_norm": 0.7359455923428527,
"learning_rate": 3.4252083604616182e-06,
"loss": 0.543491542339325,
"step": 1476
},
{
"epoch": 1.777844671884407,
"grad_norm": 0.6823106864670261,
"learning_rate": 3.389091541123074e-06,
"loss": 0.6181703209877014,
"step": 1477
},
{
"epoch": 1.7790487658037328,
"grad_norm": 0.6620693865982018,
"learning_rate": 3.353159466988032e-06,
"loss": 0.6111152768135071,
"step": 1478
},
{
"epoch": 1.7802528597230585,
"grad_norm": 0.6872931183670186,
"learning_rate": 3.3174122804754738e-06,
"loss": 0.5422138571739197,
"step": 1479
},
{
"epoch": 1.781456953642384,
"grad_norm": 0.738156392149178,
"learning_rate": 3.2818501232715794e-06,
"loss": 0.6552243232727051,
"step": 1480
},
{
"epoch": 1.7826610475617097,
"grad_norm": 0.7742629334981834,
"learning_rate": 3.246473136329148e-06,
"loss": 0.6418430805206299,
"step": 1481
},
{
"epoch": 1.7838651414810354,
"grad_norm": 0.6522806054479117,
"learning_rate": 3.211281459867038e-06,
"loss": 0.5439977645874023,
"step": 1482
},
{
"epoch": 1.7850692354003612,
"grad_norm": 0.7528643880285534,
"learning_rate": 3.1762752333696297e-06,
"loss": 0.6083682775497437,
"step": 1483
},
{
"epoch": 1.786273329319687,
"grad_norm": 0.7089494466170355,
"learning_rate": 3.141454595586252e-06,
"loss": 0.5474369525909424,
"step": 1484
},
{
"epoch": 1.7874774232390127,
"grad_norm": 0.6457667187979976,
"learning_rate": 3.1068196845306487e-06,
"loss": 0.49917763471603394,
"step": 1485
},
{
"epoch": 1.7886815171583383,
"grad_norm": 0.6738475782712636,
"learning_rate": 3.072370637480415e-06,
"loss": 0.6236350536346436,
"step": 1486
},
{
"epoch": 1.789885611077664,
"grad_norm": 0.6510418666873684,
"learning_rate": 3.0381075909764744e-06,
"loss": 0.5416608452796936,
"step": 1487
},
{
"epoch": 1.7910897049969896,
"grad_norm": 0.6646652265787325,
"learning_rate": 3.004030680822517e-06,
"loss": 0.5300649404525757,
"step": 1488
},
{
"epoch": 1.7922937989163155,
"grad_norm": 0.6458235745232335,
"learning_rate": 2.9701400420844737e-06,
"loss": 0.5278496742248535,
"step": 1489
},
{
"epoch": 1.7934978928356413,
"grad_norm": 0.6928692675216639,
"learning_rate": 2.9364358090899766e-06,
"loss": 0.5711668729782104,
"step": 1490
},
{
"epoch": 1.794701986754967,
"grad_norm": 0.6739524076286412,
"learning_rate": 2.9029181154278274e-06,
"loss": 0.5389432907104492,
"step": 1491
},
{
"epoch": 1.7959060806742926,
"grad_norm": 0.621068216798583,
"learning_rate": 2.8695870939474624e-06,
"loss": 0.5566880702972412,
"step": 1492
},
{
"epoch": 1.7971101745936182,
"grad_norm": 0.7315346301465079,
"learning_rate": 2.836442876758438e-06,
"loss": 0.5933565497398376,
"step": 1493
},
{
"epoch": 1.7983142685129438,
"grad_norm": 0.6815608529608811,
"learning_rate": 2.8034855952299045e-06,
"loss": 0.5041368007659912,
"step": 1494
},
{
"epoch": 1.7995183624322697,
"grad_norm": 0.6732751718221458,
"learning_rate": 2.770715379990069e-06,
"loss": 0.5126627087593079,
"step": 1495
},
{
"epoch": 1.8007224563515956,
"grad_norm": 0.7039758611332892,
"learning_rate": 2.7381323609256937e-06,
"loss": 0.6248936057090759,
"step": 1496
},
{
"epoch": 1.8019265502709212,
"grad_norm": 0.670116476742304,
"learning_rate": 2.7057366671815856e-06,
"loss": 0.5207480192184448,
"step": 1497
},
{
"epoch": 1.8031306441902468,
"grad_norm": 0.7104022628418962,
"learning_rate": 2.6735284271600657e-06,
"loss": 0.555051326751709,
"step": 1498
},
{
"epoch": 1.8043347381095725,
"grad_norm": 0.673778042025423,
"learning_rate": 2.641507768520479e-06,
"loss": 0.534259557723999,
"step": 1499
},
{
"epoch": 1.805538832028898,
"grad_norm": 0.6919570475804793,
"learning_rate": 2.6096748181786758e-06,
"loss": 0.5753833651542664,
"step": 1500
},
{
"epoch": 1.806742925948224,
"grad_norm": 0.6782523801970958,
"learning_rate": 2.5780297023065057e-06,
"loss": 0.5746898055076599,
"step": 1501
},
{
"epoch": 1.8079470198675498,
"grad_norm": 0.697364851316635,
"learning_rate": 2.546572546331338e-06,
"loss": 0.6060160994529724,
"step": 1502
},
{
"epoch": 1.8091511137868754,
"grad_norm": 0.6463204577526431,
"learning_rate": 2.5153034749355487e-06,
"loss": 0.5508155226707458,
"step": 1503
},
{
"epoch": 1.810355207706201,
"grad_norm": 0.7144416015878513,
"learning_rate": 2.4842226120560255e-06,
"loss": 0.5839451551437378,
"step": 1504
},
{
"epoch": 1.8115593016255267,
"grad_norm": 0.8541030914341952,
"learning_rate": 2.4533300808836757e-06,
"loss": 0.5724666118621826,
"step": 1505
},
{
"epoch": 1.8127633955448526,
"grad_norm": 0.6619563314372996,
"learning_rate": 2.4226260038629545e-06,
"loss": 0.5439346432685852,
"step": 1506
},
{
"epoch": 1.8139674894641782,
"grad_norm": 0.7163163973797126,
"learning_rate": 2.3921105026913527e-06,
"loss": 0.5885302424430847,
"step": 1507
},
{
"epoch": 1.815171583383504,
"grad_norm": 0.6950296954121478,
"learning_rate": 2.3617836983189366e-06,
"loss": 0.6386774182319641,
"step": 1508
},
{
"epoch": 1.8163756773028297,
"grad_norm": 0.6784365328534507,
"learning_rate": 2.3316457109478716e-06,
"loss": 0.5825845003128052,
"step": 1509
},
{
"epoch": 1.8175797712221553,
"grad_norm": 0.6554890874355428,
"learning_rate": 2.3016966600319154e-06,
"loss": 0.5724809765815735,
"step": 1510
},
{
"epoch": 1.818783865141481,
"grad_norm": 0.7627203175968272,
"learning_rate": 2.2719366642759754e-06,
"loss": 0.6278424263000488,
"step": 1511
},
{
"epoch": 1.8199879590608068,
"grad_norm": 0.6914546854567296,
"learning_rate": 2.2423658416356296e-06,
"loss": 0.6198673844337463,
"step": 1512
},
{
"epoch": 1.8211920529801324,
"grad_norm": 0.6351326710785647,
"learning_rate": 2.212984309316646e-06,
"loss": 0.5905113220214844,
"step": 1513
},
{
"epoch": 1.8223961468994583,
"grad_norm": 0.6574505382855732,
"learning_rate": 2.183792183774541e-06,
"loss": 0.6554192900657654,
"step": 1514
},
{
"epoch": 1.823600240818784,
"grad_norm": 0.6342635623446866,
"learning_rate": 2.1547895807141004e-06,
"loss": 0.5438601970672607,
"step": 1515
},
{
"epoch": 1.8248043347381095,
"grad_norm": 0.6923580951549989,
"learning_rate": 2.125976615088926e-06,
"loss": 0.686562180519104,
"step": 1516
},
{
"epoch": 1.8260084286574352,
"grad_norm": 0.7515369516579626,
"learning_rate": 2.0973534011009823e-06,
"loss": 0.5416876673698425,
"step": 1517
},
{
"epoch": 1.827212522576761,
"grad_norm": 0.6886410841599845,
"learning_rate": 2.0689200522001294e-06,
"loss": 0.5325416922569275,
"step": 1518
},
{
"epoch": 1.8284166164960867,
"grad_norm": 0.6682374450698709,
"learning_rate": 2.040676681083703e-06,
"loss": 0.6342188119888306,
"step": 1519
},
{
"epoch": 1.8296207104154125,
"grad_norm": 0.7000955513877171,
"learning_rate": 2.01262339969604e-06,
"loss": 0.6164153814315796,
"step": 1520
},
{
"epoch": 1.8308248043347382,
"grad_norm": 0.6704393653313119,
"learning_rate": 1.9847603192280515e-06,
"loss": 0.6320425271987915,
"step": 1521
},
{
"epoch": 1.8320288982540638,
"grad_norm": 0.5730845391915349,
"learning_rate": 1.957087550116765e-06,
"loss": 0.4951869249343872,
"step": 1522
},
{
"epoch": 1.8332329921733894,
"grad_norm": 0.6338525544790719,
"learning_rate": 1.929605202044904e-06,
"loss": 0.5820206999778748,
"step": 1523
},
{
"epoch": 1.8344370860927153,
"grad_norm": 0.717794910834416,
"learning_rate": 1.9023133839404517e-06,
"loss": 0.6417974829673767,
"step": 1524
},
{
"epoch": 1.835641180012041,
"grad_norm": 0.6795481581739131,
"learning_rate": 1.875212203976201e-06,
"loss": 0.5976875424385071,
"step": 1525
},
{
"epoch": 1.8368452739313668,
"grad_norm": 0.6441331199887967,
"learning_rate": 1.8483017695693494e-06,
"loss": 0.5258415341377258,
"step": 1526
},
{
"epoch": 1.8380493678506924,
"grad_norm": 0.67966884524053,
"learning_rate": 1.8215821873810601e-06,
"loss": 0.6156249642372131,
"step": 1527
},
{
"epoch": 1.839253461770018,
"grad_norm": 0.6888070858144395,
"learning_rate": 1.7950535633160403e-06,
"loss": 0.498773455619812,
"step": 1528
},
{
"epoch": 1.8404575556893437,
"grad_norm": 0.7001813340781752,
"learning_rate": 1.768716002522125e-06,
"loss": 0.6881451606750488,
"step": 1529
},
{
"epoch": 1.8416616496086695,
"grad_norm": 0.7282826525728465,
"learning_rate": 1.7425696093898548e-06,
"loss": 0.5823349952697754,
"step": 1530
},
{
"epoch": 1.8428657435279951,
"grad_norm": 0.6411953283518155,
"learning_rate": 1.7166144875520763e-06,
"loss": 0.635110080242157,
"step": 1531
},
{
"epoch": 1.844069837447321,
"grad_norm": 0.6786755533256277,
"learning_rate": 1.6908507398834927e-06,
"loss": 0.5210625529289246,
"step": 1532
},
{
"epoch": 1.8452739313666466,
"grad_norm": 0.6804080746910104,
"learning_rate": 1.6652784685003197e-06,
"loss": 0.5672831535339355,
"step": 1533
},
{
"epoch": 1.8464780252859723,
"grad_norm": 0.7292886106274454,
"learning_rate": 1.6398977747598243e-06,
"loss": 0.5520044565200806,
"step": 1534
},
{
"epoch": 1.847682119205298,
"grad_norm": 0.7230620358286436,
"learning_rate": 1.614708759259942e-06,
"loss": 0.5762327313423157,
"step": 1535
},
{
"epoch": 1.8488862131246238,
"grad_norm": 0.6191659203515569,
"learning_rate": 1.5897115218388936e-06,
"loss": 0.4777446985244751,
"step": 1536
},
{
"epoch": 1.8500903070439494,
"grad_norm": 0.613001967206192,
"learning_rate": 1.564906161574764e-06,
"loss": 0.5762559175491333,
"step": 1537
},
{
"epoch": 1.8512944009632752,
"grad_norm": 0.6529567725151024,
"learning_rate": 1.5402927767851239e-06,
"loss": 0.6167515516281128,
"step": 1538
},
{
"epoch": 1.8524984948826009,
"grad_norm": 0.6218098628710633,
"learning_rate": 1.5158714650266414e-06,
"loss": 0.6211032867431641,
"step": 1539
},
{
"epoch": 1.8537025888019265,
"grad_norm": 0.7555424829761173,
"learning_rate": 1.4916423230946885e-06,
"loss": 0.6565650701522827,
"step": 1540
},
{
"epoch": 1.8549066827212521,
"grad_norm": 0.6588724386208783,
"learning_rate": 1.4676054470229517e-06,
"loss": 0.5031234622001648,
"step": 1541
},
{
"epoch": 1.856110776640578,
"grad_norm": 0.770945448661847,
"learning_rate": 1.443760932083077e-06,
"loss": 0.6038906574249268,
"step": 1542
},
{
"epoch": 1.8573148705599036,
"grad_norm": 0.691453070814277,
"learning_rate": 1.4201088727842648e-06,
"loss": 0.5204800367355347,
"step": 1543
},
{
"epoch": 1.8585189644792295,
"grad_norm": 0.6861089494242295,
"learning_rate": 1.3966493628729039e-06,
"loss": 0.596354603767395,
"step": 1544
},
{
"epoch": 1.8597230583985551,
"grad_norm": 0.6216607077411774,
"learning_rate": 1.373382495332215e-06,
"loss": 0.5718226432800293,
"step": 1545
},
{
"epoch": 1.8609271523178808,
"grad_norm": 1.0035977848891333,
"learning_rate": 1.3503083623818412e-06,
"loss": 0.6382907629013062,
"step": 1546
},
{
"epoch": 1.8621312462372064,
"grad_norm": 0.7040053351661644,
"learning_rate": 1.3274270554775425e-06,
"loss": 0.586823582649231,
"step": 1547
},
{
"epoch": 1.8633353401565322,
"grad_norm": 0.8213637768031666,
"learning_rate": 1.3047386653107784e-06,
"loss": 0.5361060500144958,
"step": 1548
},
{
"epoch": 1.8645394340758579,
"grad_norm": 0.7300514807359588,
"learning_rate": 1.282243281808393e-06,
"loss": 0.5263211131095886,
"step": 1549
},
{
"epoch": 1.8657435279951837,
"grad_norm": 0.6977522358911603,
"learning_rate": 1.2599409941322081e-06,
"loss": 0.546600878238678,
"step": 1550
},
{
"epoch": 1.8669476219145094,
"grad_norm": 0.7671572569220524,
"learning_rate": 1.2378318906787145e-06,
"loss": 0.663577675819397,
"step": 1551
},
{
"epoch": 1.868151715833835,
"grad_norm": 0.6663382304458376,
"learning_rate": 1.2159160590787143e-06,
"loss": 0.6312112212181091,
"step": 1552
},
{
"epoch": 1.8693558097531606,
"grad_norm": 0.6876839860406914,
"learning_rate": 1.1941935861969455e-06,
"loss": 0.5542502403259277,
"step": 1553
},
{
"epoch": 1.8705599036724865,
"grad_norm": 0.6687338089550334,
"learning_rate": 1.1726645581317752e-06,
"loss": 0.5027328133583069,
"step": 1554
},
{
"epoch": 1.8717639975918121,
"grad_norm": 0.6967733086755344,
"learning_rate": 1.1513290602148174e-06,
"loss": 0.5309032201766968,
"step": 1555
},
{
"epoch": 1.872968091511138,
"grad_norm": 0.6524410503120639,
"learning_rate": 1.1301871770106332e-06,
"loss": 0.5936433672904968,
"step": 1556
},
{
"epoch": 1.8741721854304636,
"grad_norm": 0.6925436889231327,
"learning_rate": 1.1092389923163915e-06,
"loss": 0.6055241823196411,
"step": 1557
},
{
"epoch": 1.8753762793497892,
"grad_norm": 0.6373219587941897,
"learning_rate": 1.0884845891614925e-06,
"loss": 0.5215670466423035,
"step": 1558
},
{
"epoch": 1.8765803732691149,
"grad_norm": 0.727383203209103,
"learning_rate": 1.0679240498073118e-06,
"loss": 0.6773104667663574,
"step": 1559
},
{
"epoch": 1.8777844671884407,
"grad_norm": 0.618711203403452,
"learning_rate": 1.0475574557467837e-06,
"loss": 0.49172860383987427,
"step": 1560
},
{
"epoch": 1.8789885611077664,
"grad_norm": 0.768599713429045,
"learning_rate": 1.0273848877041802e-06,
"loss": 0.5530818700790405,
"step": 1561
},
{
"epoch": 1.8801926550270922,
"grad_norm": 0.6410580132596866,
"learning_rate": 1.0074064256347104e-06,
"loss": 0.5300968289375305,
"step": 1562
},
{
"epoch": 1.8813967489464178,
"grad_norm": 0.6688489587617759,
"learning_rate": 9.876221487242322e-07,
"loss": 0.5452580451965332,
"step": 1563
},
{
"epoch": 1.8826008428657435,
"grad_norm": 0.6890360731041622,
"learning_rate": 9.680321353889576e-07,
"loss": 0.5452010631561279,
"step": 1564
},
{
"epoch": 1.883804936785069,
"grad_norm": 0.6122470601394586,
"learning_rate": 9.486364632750878e-07,
"loss": 0.5611681342124939,
"step": 1565
},
{
"epoch": 1.885009030704395,
"grad_norm": 0.6572902573416992,
"learning_rate": 9.294352092585779e-07,
"loss": 0.527140736579895,
"step": 1566
},
{
"epoch": 1.8862131246237208,
"grad_norm": 0.7618143214306591,
"learning_rate": 9.104284494447779e-07,
"loss": 0.5629008412361145,
"step": 1567
},
{
"epoch": 1.8874172185430464,
"grad_norm": 0.6885541395424113,
"learning_rate": 8.916162591681543e-07,
"loss": 0.5700817108154297,
"step": 1568
},
{
"epoch": 1.888621312462372,
"grad_norm": 0.6865506853312878,
"learning_rate": 8.729987129919682e-07,
"loss": 0.5483814477920532,
"step": 1569
},
{
"epoch": 1.8898254063816977,
"grad_norm": 0.710893895157567,
"learning_rate": 8.545758847080143e-07,
"loss": 0.5944420099258423,
"step": 1570
},
{
"epoch": 1.8910295003010233,
"grad_norm": 0.6079309909831784,
"learning_rate": 8.363478473363107e-07,
"loss": 0.5602034330368042,
"step": 1571
},
{
"epoch": 1.8922335942203492,
"grad_norm": 0.6434961027642722,
"learning_rate": 8.183146731247982e-07,
"loss": 0.6112760305404663,
"step": 1572
},
{
"epoch": 1.893437688139675,
"grad_norm": 0.7210093955232695,
"learning_rate": 8.004764335490856e-07,
"loss": 0.5606849193572998,
"step": 1573
},
{
"epoch": 1.8946417820590007,
"grad_norm": 0.6659584597671264,
"learning_rate": 7.828331993121163e-07,
"loss": 0.5550767183303833,
"step": 1574
},
{
"epoch": 1.8958458759783263,
"grad_norm": 0.6587516929317906,
"learning_rate": 7.653850403439411e-07,
"loss": 0.5370407104492188,
"step": 1575
},
{
"epoch": 1.897049969897652,
"grad_norm": 0.7189514634341319,
"learning_rate": 7.481320258014124e-07,
"loss": 0.5744485259056091,
"step": 1576
},
{
"epoch": 1.8982540638169776,
"grad_norm": 0.7486631699615646,
"learning_rate": 7.31074224067918e-07,
"loss": 0.5951935052871704,
"step": 1577
},
{
"epoch": 1.8994581577363034,
"grad_norm": 0.6632069887545978,
"learning_rate": 7.142117027531092e-07,
"loss": 0.5946663022041321,
"step": 1578
},
{
"epoch": 1.9006622516556293,
"grad_norm": 0.7508644684135262,
"learning_rate": 6.975445286926063e-07,
"loss": 0.58709716796875,
"step": 1579
},
{
"epoch": 1.901866345574955,
"grad_norm": 0.6684309384208824,
"learning_rate": 6.810727679477935e-07,
"loss": 0.5619924664497375,
"step": 1580
},
{
"epoch": 1.9030704394942806,
"grad_norm": 0.6844806194718719,
"learning_rate": 6.647964858055133e-07,
"loss": 0.6591289639472961,
"step": 1581
},
{
"epoch": 1.9042745334136062,
"grad_norm": 0.6686552666556163,
"learning_rate": 6.48715746777806e-07,
"loss": 0.6070966720581055,
"step": 1582
},
{
"epoch": 1.9054786273329318,
"grad_norm": 0.6563726007677,
"learning_rate": 6.328306146016593e-07,
"loss": 0.5923444628715515,
"step": 1583
},
{
"epoch": 1.9066827212522577,
"grad_norm": 0.6744572496615946,
"learning_rate": 6.171411522387871e-07,
"loss": 0.6328150629997253,
"step": 1584
},
{
"epoch": 1.9078868151715835,
"grad_norm": 0.851438330026497,
"learning_rate": 6.016474218753288e-07,
"loss": 0.6075662970542908,
"step": 1585
},
{
"epoch": 1.9090909090909092,
"grad_norm": 0.6382780096006331,
"learning_rate": 5.863494849216444e-07,
"loss": 0.6110202074050903,
"step": 1586
},
{
"epoch": 1.9102950030102348,
"grad_norm": 0.6697355611749591,
"learning_rate": 5.712474020120484e-07,
"loss": 0.5693877935409546,
"step": 1587
},
{
"epoch": 1.9114990969295604,
"grad_norm": 0.6802556251191029,
"learning_rate": 5.563412330045758e-07,
"loss": 0.583171546459198,
"step": 1588
},
{
"epoch": 1.912703190848886,
"grad_norm": 0.6263710497464096,
"learning_rate": 5.416310369807331e-07,
"loss": 0.5916058421134949,
"step": 1589
},
{
"epoch": 1.913907284768212,
"grad_norm": 0.6464567646077097,
"learning_rate": 5.271168722453035e-07,
"loss": 0.5331957936286926,
"step": 1590
},
{
"epoch": 1.9151113786875378,
"grad_norm": 0.6325951844367099,
"learning_rate": 5.127987963260583e-07,
"loss": 0.576506495475769,
"step": 1591
},
{
"epoch": 1.9163154726068634,
"grad_norm": 0.680529169178207,
"learning_rate": 4.986768659735852e-07,
"loss": 0.5742542147636414,
"step": 1592
},
{
"epoch": 1.917519566526189,
"grad_norm": 0.709862252702507,
"learning_rate": 4.847511371610159e-07,
"loss": 0.5625314712524414,
"step": 1593
},
{
"epoch": 1.9187236604455147,
"grad_norm": 0.705410663948067,
"learning_rate": 4.710216650838317e-07,
"loss": 0.5976494550704956,
"step": 1594
},
{
"epoch": 1.9199277543648403,
"grad_norm": 0.6621288019120659,
"learning_rate": 4.5748850415964774e-07,
"loss": 0.5507940649986267,
"step": 1595
},
{
"epoch": 1.9211318482841662,
"grad_norm": 0.6863149874567348,
"learning_rate": 4.4415170802797333e-07,
"loss": 0.5350108742713928,
"step": 1596
},
{
"epoch": 1.922335942203492,
"grad_norm": 0.6288628977776699,
"learning_rate": 4.3101132955002396e-07,
"loss": 0.5225965976715088,
"step": 1597
},
{
"epoch": 1.9235400361228177,
"grad_norm": 0.6523787284202942,
"learning_rate": 4.180674208084989e-07,
"loss": 0.5456944108009338,
"step": 1598
},
{
"epoch": 1.9247441300421433,
"grad_norm": 0.6723823452661183,
"learning_rate": 4.0532003310736475e-07,
"loss": 0.5596846342086792,
"step": 1599
},
{
"epoch": 1.925948223961469,
"grad_norm": 0.6639128844336288,
"learning_rate": 3.9276921697169455e-07,
"loss": 0.543656051158905,
"step": 1600
},
{
"epoch": 1.9271523178807946,
"grad_norm": 0.6242422771084308,
"learning_rate": 3.804150221474179e-07,
"loss": 0.6506460309028625,
"step": 1601
},
{
"epoch": 1.9283564118001204,
"grad_norm": 0.7424854047490422,
"learning_rate": 3.6825749760113215e-07,
"loss": 0.605479896068573,
"step": 1602
},
{
"epoch": 1.9295605057194463,
"grad_norm": 0.6471089250698551,
"learning_rate": 3.5629669151994725e-07,
"loss": 0.6640603542327881,
"step": 1603
},
{
"epoch": 1.930764599638772,
"grad_norm": 0.7264230357435266,
"learning_rate": 3.4453265131124677e-07,
"loss": 0.6173760890960693,
"step": 1604
},
{
"epoch": 1.9319686935580975,
"grad_norm": 0.6707412555489995,
"learning_rate": 3.3296542360253256e-07,
"loss": 0.5396270155906677,
"step": 1605
},
{
"epoch": 1.9331727874774232,
"grad_norm": 0.7768420000306318,
"learning_rate": 3.2159505424122495e-07,
"loss": 0.6355475187301636,
"step": 1606
},
{
"epoch": 1.9343768813967488,
"grad_norm": 0.7302604152833332,
"learning_rate": 3.1042158829447385e-07,
"loss": 0.6131929159164429,
"step": 1607
},
{
"epoch": 1.9355809753160746,
"grad_norm": 0.9956608539221433,
"learning_rate": 2.9944507004900367e-07,
"loss": 0.5304654240608215,
"step": 1608
},
{
"epoch": 1.9367850692354005,
"grad_norm": 0.739504539921323,
"learning_rate": 2.8866554301091866e-07,
"loss": 0.5288342833518982,
"step": 1609
},
{
"epoch": 1.9379891631547261,
"grad_norm": 0.6816680137264456,
"learning_rate": 2.780830499055476e-07,
"loss": 0.6204798817634583,
"step": 1610
},
{
"epoch": 1.9391932570740518,
"grad_norm": 0.6774156941317607,
"learning_rate": 2.6769763267723845e-07,
"loss": 0.5852087140083313,
"step": 1611
},
{
"epoch": 1.9403973509933774,
"grad_norm": 0.6706146200911259,
"learning_rate": 2.575093324892364e-07,
"loss": 0.5640581250190735,
"step": 1612
},
{
"epoch": 1.941601444912703,
"grad_norm": 0.7937578533972202,
"learning_rate": 2.4751818972350016e-07,
"loss": 0.572894811630249,
"step": 1613
},
{
"epoch": 1.9428055388320289,
"grad_norm": 0.7337506916245448,
"learning_rate": 2.377242439805305e-07,
"loss": 0.6091702580451965,
"step": 1614
},
{
"epoch": 1.9440096327513547,
"grad_norm": 1.3236024161162703,
"learning_rate": 2.281275340792477e-07,
"loss": 0.5569441914558411,
"step": 1615
},
{
"epoch": 1.9452137266706804,
"grad_norm": 0.6928530830843692,
"learning_rate": 2.187280980567863e-07,
"loss": 0.620290994644165,
"step": 1616
},
{
"epoch": 1.946417820590006,
"grad_norm": 0.6937580624955899,
"learning_rate": 2.095259731684007e-07,
"loss": 0.5869073867797852,
"step": 1617
},
{
"epoch": 1.9476219145093316,
"grad_norm": 0.7353184221706057,
"learning_rate": 2.0052119588727103e-07,
"loss": 0.5709312558174133,
"step": 1618
},
{
"epoch": 1.9488260084286573,
"grad_norm": 0.6548506998663534,
"learning_rate": 1.917138019043918e-07,
"loss": 0.5279954075813293,
"step": 1619
},
{
"epoch": 1.9500301023479831,
"grad_norm": 0.6634588715985461,
"learning_rate": 1.8310382612841125e-07,
"loss": 0.5710673332214355,
"step": 1620
},
{
"epoch": 1.951234196267309,
"grad_norm": 0.6236608187574747,
"learning_rate": 1.7469130268549238e-07,
"loss": 0.5501574873924255,
"step": 1621
},
{
"epoch": 1.9524382901866346,
"grad_norm": 0.6457007549327352,
"learning_rate": 1.6647626491919088e-07,
"loss": 0.5679866671562195,
"step": 1622
},
{
"epoch": 1.9536423841059603,
"grad_norm": 0.6627218971204165,
"learning_rate": 1.5845874539032192e-07,
"loss": 0.6286696195602417,
"step": 1623
},
{
"epoch": 1.9548464780252859,
"grad_norm": 0.6773761258451763,
"learning_rate": 1.5063877587681019e-07,
"loss": 0.5981844663619995,
"step": 1624
},
{
"epoch": 1.9560505719446115,
"grad_norm": 0.6653439928090288,
"learning_rate": 1.4301638737358459e-07,
"loss": 0.5640007853507996,
"step": 1625
},
{
"epoch": 1.9572546658639374,
"grad_norm": 0.6589267219551842,
"learning_rate": 1.3559161009246146e-07,
"loss": 0.566085159778595,
"step": 1626
},
{
"epoch": 1.9584587597832632,
"grad_norm": 0.6618794429902688,
"learning_rate": 1.283644734619893e-07,
"loss": 0.5385036468505859,
"step": 1627
},
{
"epoch": 1.9596628537025889,
"grad_norm": 0.7223226717477191,
"learning_rate": 1.2133500612737103e-07,
"loss": 0.5916330814361572,
"step": 1628
},
{
"epoch": 1.9608669476219145,
"grad_norm": 0.6882613273174467,
"learning_rate": 1.1450323595034174e-07,
"loss": 0.556686282157898,
"step": 1629
},
{
"epoch": 1.9620710415412401,
"grad_norm": 0.7171451577063112,
"learning_rate": 1.0786919000903562e-07,
"loss": 0.5608625411987305,
"step": 1630
},
{
"epoch": 1.963275135460566,
"grad_norm": 0.6384421145532632,
"learning_rate": 1.0143289459790816e-07,
"loss": 0.5630712509155273,
"step": 1631
},
{
"epoch": 1.9644792293798916,
"grad_norm": 0.6703048979056866,
"learning_rate": 9.519437522760299e-08,
"loss": 0.5901123285293579,
"step": 1632
},
{
"epoch": 1.9656833232992175,
"grad_norm": 0.6867348554872079,
"learning_rate": 8.915365662488518e-08,
"loss": 0.613411545753479,
"step": 1633
},
{
"epoch": 1.966887417218543,
"grad_norm": 0.7400259582135543,
"learning_rate": 8.331076273250249e-08,
"loss": 0.5884881615638733,
"step": 1634
},
{
"epoch": 1.9680915111378687,
"grad_norm": 0.6915837149997415,
"learning_rate": 7.766571670913547e-08,
"loss": 0.5767718553543091,
"step": 1635
},
{
"epoch": 1.9692956050571944,
"grad_norm": 0.615336117944421,
"learning_rate": 7.221854092926971e-08,
"loss": 0.5186179876327515,
"step": 1636
},
{
"epoch": 1.9704996989765202,
"grad_norm": 0.7202939771776644,
"learning_rate": 6.696925698311817e-08,
"loss": 0.5988378524780273,
"step": 1637
},
{
"epoch": 1.9717037928958459,
"grad_norm": 0.6185657178722601,
"learning_rate": 6.191788567654344e-08,
"loss": 0.49759912490844727,
"step": 1638
},
{
"epoch": 1.9729078868151717,
"grad_norm": 0.6946831772383305,
"learning_rate": 5.706444703096336e-08,
"loss": 0.5950595736503601,
"step": 1639
},
{
"epoch": 1.9741119807344973,
"grad_norm": 0.7611465759908403,
"learning_rate": 5.240896028327891e-08,
"loss": 0.6370514631271362,
"step": 1640
},
{
"epoch": 1.975316074653823,
"grad_norm": 0.647055596484101,
"learning_rate": 4.7951443885807524e-08,
"loss": 0.5466381311416626,
"step": 1641
},
{
"epoch": 1.9765201685731486,
"grad_norm": 0.709007923344838,
"learning_rate": 4.3691915506177686e-08,
"loss": 0.5643912553787231,
"step": 1642
},
{
"epoch": 1.9777242624924745,
"grad_norm": 0.7269801621655702,
"learning_rate": 3.963039202730112e-08,
"loss": 0.5674261450767517,
"step": 1643
},
{
"epoch": 1.9789283564118,
"grad_norm": 0.6442326121814204,
"learning_rate": 3.576688954727847e-08,
"loss": 0.5666972398757935,
"step": 1644
},
{
"epoch": 1.980132450331126,
"grad_norm": 0.7459964922296664,
"learning_rate": 3.210142337932709e-08,
"loss": 0.5523781776428223,
"step": 1645
},
{
"epoch": 1.9813365442504516,
"grad_norm": 0.7581081381677305,
"learning_rate": 2.8634008051758864e-08,
"loss": 0.6662766337394714,
"step": 1646
},
{
"epoch": 1.9825406381697772,
"grad_norm": 0.7216186682510903,
"learning_rate": 2.536465730788029e-08,
"loss": 0.5974636077880859,
"step": 1647
},
{
"epoch": 1.9837447320891028,
"grad_norm": 0.6628837622577649,
"learning_rate": 2.229338410597026e-08,
"loss": 0.4923580288887024,
"step": 1648
},
{
"epoch": 1.9849488260084287,
"grad_norm": 0.704122217590992,
"learning_rate": 1.9420200619207907e-08,
"loss": 0.6617586612701416,
"step": 1649
},
{
"epoch": 1.9861529199277543,
"grad_norm": 0.7241628925331735,
"learning_rate": 1.6745118235628188e-08,
"loss": 0.6709647178649902,
"step": 1650
},
{
"epoch": 1.9873570138470802,
"grad_norm": 0.6785703998166114,
"learning_rate": 1.4268147558088585e-08,
"loss": 0.5783538818359375,
"step": 1651
},
{
"epoch": 1.9885611077664058,
"grad_norm": 0.7847685208073799,
"learning_rate": 1.1989298404213588e-08,
"loss": 0.6351412534713745,
"step": 1652
},
{
"epoch": 1.9897652016857315,
"grad_norm": 0.6116629465922838,
"learning_rate": 9.908579806361396e-09,
"loss": 0.5521553754806519,
"step": 1653
},
{
"epoch": 1.990969295605057,
"grad_norm": 0.6162078650916201,
"learning_rate": 8.026000011596146e-09,
"loss": 0.5397777557373047,
"step": 1654
},
{
"epoch": 1.992173389524383,
"grad_norm": 0.6638506733828293,
"learning_rate": 6.341566481626871e-09,
"loss": 0.566266655921936,
"step": 1655
},
{
"epoch": 1.9933774834437086,
"grad_norm": 0.6652397648214484,
"learning_rate": 4.855285892813033e-09,
"loss": 0.6032072305679321,
"step": 1656
},
{
"epoch": 1.9945815773630344,
"grad_norm": 0.6598149842452187,
"learning_rate": 3.567164136120127e-09,
"loss": 0.5143309235572815,
"step": 1657
},
{
"epoch": 1.99578567128236,
"grad_norm": 0.6618621283675333,
"learning_rate": 2.47720631710302e-09,
"loss": 0.5726733207702637,
"step": 1658
},
{
"epoch": 1.9969897652016857,
"grad_norm": 0.6933592150593764,
"learning_rate": 1.5854167558670975e-09,
"loss": 0.5318538546562195,
"step": 1659
},
{
"epoch": 1.9981938591210113,
"grad_norm": 0.6341432390196854,
"learning_rate": 8.917989870849131e-10,
"loss": 0.5127713680267334,
"step": 1660
},
{
"epoch": 1.9993979530403372,
"grad_norm": 0.7221071105709749,
"learning_rate": 3.9635575994623196e-10,
"loss": 0.6321007013320923,
"step": 1661
},
{
"epoch": 2.0,
"grad_norm": 0.9210968506833704,
"learning_rate": 9.908903817468229e-11,
"loss": 0.5240795016288757,
"step": 1662
},
{
"epoch": 2.0,
"step": 1662,
"total_flos": 1138324862599168.0,
"train_loss": 0.6906307002733474,
"train_runtime": 17072.7016,
"train_samples_per_second": 0.778,
"train_steps_per_second": 0.097
}
],
"logging_steps": 1,
"max_steps": 1662,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1138324862599168.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}