gsmyrnis's picture
End of training
004389c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9983753046303816,
"eval_steps": 500,
"global_step": 2460,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012185215272136475,
"grad_norm": 6.834534852371434,
"learning_rate": 4.0650406504065046e-08,
"loss": 1.0814,
"step": 1
},
{
"epoch": 0.002437043054427295,
"grad_norm": 6.434639482368721,
"learning_rate": 8.130081300813009e-08,
"loss": 1.0829,
"step": 2
},
{
"epoch": 0.0036555645816409425,
"grad_norm": 6.863769364155968,
"learning_rate": 1.2195121951219514e-07,
"loss": 1.1046,
"step": 3
},
{
"epoch": 0.00487408610885459,
"grad_norm": 6.5857528488136925,
"learning_rate": 1.6260162601626018e-07,
"loss": 1.0853,
"step": 4
},
{
"epoch": 0.006092607636068237,
"grad_norm": 6.521091645053231,
"learning_rate": 2.0325203252032523e-07,
"loss": 1.0773,
"step": 5
},
{
"epoch": 0.007311129163281885,
"grad_norm": 6.29456329359191,
"learning_rate": 2.439024390243903e-07,
"loss": 1.0569,
"step": 6
},
{
"epoch": 0.008529650690495532,
"grad_norm": 6.6897100511742575,
"learning_rate": 2.845528455284553e-07,
"loss": 1.0615,
"step": 7
},
{
"epoch": 0.00974817221770918,
"grad_norm": 6.645918686532055,
"learning_rate": 3.2520325203252037e-07,
"loss": 1.11,
"step": 8
},
{
"epoch": 0.010966693744922826,
"grad_norm": 6.5860967765132505,
"learning_rate": 3.6585365853658536e-07,
"loss": 1.0968,
"step": 9
},
{
"epoch": 0.012185215272136474,
"grad_norm": 6.316550148573923,
"learning_rate": 4.0650406504065046e-07,
"loss": 1.0402,
"step": 10
},
{
"epoch": 0.013403736799350122,
"grad_norm": 6.589233946221034,
"learning_rate": 4.471544715447155e-07,
"loss": 1.0883,
"step": 11
},
{
"epoch": 0.01462225832656377,
"grad_norm": 5.923561630066145,
"learning_rate": 4.878048780487805e-07,
"loss": 1.0577,
"step": 12
},
{
"epoch": 0.015840779853777416,
"grad_norm": 6.107921911359583,
"learning_rate": 5.284552845528456e-07,
"loss": 1.0624,
"step": 13
},
{
"epoch": 0.017059301380991064,
"grad_norm": 5.923788461868563,
"learning_rate": 5.691056910569106e-07,
"loss": 1.0736,
"step": 14
},
{
"epoch": 0.018277822908204712,
"grad_norm": 5.874057009305477,
"learning_rate": 6.097560975609757e-07,
"loss": 1.0447,
"step": 15
},
{
"epoch": 0.01949634443541836,
"grad_norm": 5.031342070027601,
"learning_rate": 6.504065040650407e-07,
"loss": 1.0433,
"step": 16
},
{
"epoch": 0.020714865962632008,
"grad_norm": 4.869478502925938,
"learning_rate": 6.910569105691058e-07,
"loss": 1.0411,
"step": 17
},
{
"epoch": 0.021933387489845652,
"grad_norm": 4.534685569961031,
"learning_rate": 7.317073170731707e-07,
"loss": 0.9874,
"step": 18
},
{
"epoch": 0.0231519090170593,
"grad_norm": 4.357854034553876,
"learning_rate": 7.723577235772359e-07,
"loss": 1.0014,
"step": 19
},
{
"epoch": 0.024370430544272948,
"grad_norm": 4.315329088535952,
"learning_rate": 8.130081300813009e-07,
"loss": 0.9925,
"step": 20
},
{
"epoch": 0.025588952071486596,
"grad_norm": 3.3054166230275337,
"learning_rate": 8.53658536585366e-07,
"loss": 1.003,
"step": 21
},
{
"epoch": 0.026807473598700244,
"grad_norm": 2.726952834638993,
"learning_rate": 8.94308943089431e-07,
"loss": 0.9692,
"step": 22
},
{
"epoch": 0.028025995125913892,
"grad_norm": 2.7243029466264,
"learning_rate": 9.349593495934959e-07,
"loss": 1.0212,
"step": 23
},
{
"epoch": 0.02924451665312754,
"grad_norm": 2.5896267051583313,
"learning_rate": 9.75609756097561e-07,
"loss": 0.9757,
"step": 24
},
{
"epoch": 0.030463038180341188,
"grad_norm": 2.6706226110708498,
"learning_rate": 1.0162601626016261e-06,
"loss": 0.9801,
"step": 25
},
{
"epoch": 0.03168155970755483,
"grad_norm": 2.437924378367497,
"learning_rate": 1.0569105691056912e-06,
"loss": 0.9768,
"step": 26
},
{
"epoch": 0.03290008123476848,
"grad_norm": 2.5803827822332397,
"learning_rate": 1.0975609756097562e-06,
"loss": 0.9895,
"step": 27
},
{
"epoch": 0.03411860276198213,
"grad_norm": 2.203043465720474,
"learning_rate": 1.1382113821138213e-06,
"loss": 0.9726,
"step": 28
},
{
"epoch": 0.035337124289195776,
"grad_norm": 1.805085101507344,
"learning_rate": 1.1788617886178863e-06,
"loss": 0.9237,
"step": 29
},
{
"epoch": 0.036555645816409424,
"grad_norm": 2.1165116479858215,
"learning_rate": 1.2195121951219514e-06,
"loss": 0.9338,
"step": 30
},
{
"epoch": 0.03777416734362307,
"grad_norm": 2.3435725524236735,
"learning_rate": 1.2601626016260162e-06,
"loss": 0.9365,
"step": 31
},
{
"epoch": 0.03899268887083672,
"grad_norm": 2.2985182006567326,
"learning_rate": 1.3008130081300815e-06,
"loss": 0.9135,
"step": 32
},
{
"epoch": 0.04021121039805037,
"grad_norm": 2.1356048436615036,
"learning_rate": 1.3414634146341465e-06,
"loss": 0.9196,
"step": 33
},
{
"epoch": 0.041429731925264016,
"grad_norm": 2.028116965668269,
"learning_rate": 1.3821138211382116e-06,
"loss": 0.9074,
"step": 34
},
{
"epoch": 0.042648253452477664,
"grad_norm": 1.7266339650438713,
"learning_rate": 1.4227642276422766e-06,
"loss": 0.8969,
"step": 35
},
{
"epoch": 0.043866774979691305,
"grad_norm": 1.548462619619361,
"learning_rate": 1.4634146341463414e-06,
"loss": 0.9009,
"step": 36
},
{
"epoch": 0.04508529650690495,
"grad_norm": 1.2325234503287605,
"learning_rate": 1.5040650406504067e-06,
"loss": 0.8942,
"step": 37
},
{
"epoch": 0.0463038180341186,
"grad_norm": 1.119008985117944,
"learning_rate": 1.5447154471544717e-06,
"loss": 0.8869,
"step": 38
},
{
"epoch": 0.04752233956133225,
"grad_norm": 1.232748685157114,
"learning_rate": 1.5853658536585368e-06,
"loss": 0.8542,
"step": 39
},
{
"epoch": 0.048740861088545896,
"grad_norm": 1.6677472249438465,
"learning_rate": 1.6260162601626018e-06,
"loss": 0.8594,
"step": 40
},
{
"epoch": 0.049959382615759544,
"grad_norm": 1.5964345757340976,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.8481,
"step": 41
},
{
"epoch": 0.05117790414297319,
"grad_norm": 1.5241421915515152,
"learning_rate": 1.707317073170732e-06,
"loss": 0.8478,
"step": 42
},
{
"epoch": 0.05239642567018684,
"grad_norm": 1.350659230101709,
"learning_rate": 1.747967479674797e-06,
"loss": 0.8669,
"step": 43
},
{
"epoch": 0.05361494719740049,
"grad_norm": 1.0459886477510316,
"learning_rate": 1.788617886178862e-06,
"loss": 0.8229,
"step": 44
},
{
"epoch": 0.054833468724614136,
"grad_norm": 0.9790437025811132,
"learning_rate": 1.8292682926829268e-06,
"loss": 0.8214,
"step": 45
},
{
"epoch": 0.056051990251827784,
"grad_norm": 1.1555035661921353,
"learning_rate": 1.8699186991869919e-06,
"loss": 0.8106,
"step": 46
},
{
"epoch": 0.05727051177904143,
"grad_norm": 1.246916947603362,
"learning_rate": 1.9105691056910574e-06,
"loss": 0.8193,
"step": 47
},
{
"epoch": 0.05848903330625508,
"grad_norm": 1.1905103203002494,
"learning_rate": 1.951219512195122e-06,
"loss": 0.8075,
"step": 48
},
{
"epoch": 0.05970755483346873,
"grad_norm": 0.918493036902164,
"learning_rate": 1.991869918699187e-06,
"loss": 0.7842,
"step": 49
},
{
"epoch": 0.060926076360682375,
"grad_norm": 0.961427277586569,
"learning_rate": 2.0325203252032523e-06,
"loss": 0.8206,
"step": 50
},
{
"epoch": 0.062144597887896016,
"grad_norm": 0.7135293393398392,
"learning_rate": 2.073170731707317e-06,
"loss": 0.7784,
"step": 51
},
{
"epoch": 0.06336311941510966,
"grad_norm": 0.8687421281930399,
"learning_rate": 2.1138211382113824e-06,
"loss": 0.7953,
"step": 52
},
{
"epoch": 0.06458164094232331,
"grad_norm": 0.8575700781368814,
"learning_rate": 2.154471544715447e-06,
"loss": 0.7926,
"step": 53
},
{
"epoch": 0.06580016246953696,
"grad_norm": 0.9435599171162209,
"learning_rate": 2.1951219512195125e-06,
"loss": 0.7766,
"step": 54
},
{
"epoch": 0.06701868399675061,
"grad_norm": 0.7215369508734659,
"learning_rate": 2.2357723577235773e-06,
"loss": 0.7686,
"step": 55
},
{
"epoch": 0.06823720552396426,
"grad_norm": 0.6329822213923535,
"learning_rate": 2.2764227642276426e-06,
"loss": 0.7868,
"step": 56
},
{
"epoch": 0.0694557270511779,
"grad_norm": 0.6559439915679887,
"learning_rate": 2.317073170731708e-06,
"loss": 0.7672,
"step": 57
},
{
"epoch": 0.07067424857839155,
"grad_norm": 0.6424064072265188,
"learning_rate": 2.3577235772357727e-06,
"loss": 0.7876,
"step": 58
},
{
"epoch": 0.0718927701056052,
"grad_norm": 0.5670684781739027,
"learning_rate": 2.3983739837398375e-06,
"loss": 0.7545,
"step": 59
},
{
"epoch": 0.07311129163281885,
"grad_norm": 0.62119888744641,
"learning_rate": 2.4390243902439027e-06,
"loss": 0.7778,
"step": 60
},
{
"epoch": 0.0743298131600325,
"grad_norm": 0.5945888357559133,
"learning_rate": 2.4796747967479676e-06,
"loss": 0.7593,
"step": 61
},
{
"epoch": 0.07554833468724614,
"grad_norm": 0.5566344615882963,
"learning_rate": 2.5203252032520324e-06,
"loss": 0.7783,
"step": 62
},
{
"epoch": 0.07676685621445979,
"grad_norm": 0.6010029344681969,
"learning_rate": 2.5609756097560977e-06,
"loss": 0.7824,
"step": 63
},
{
"epoch": 0.07798537774167344,
"grad_norm": 0.5620208665027641,
"learning_rate": 2.601626016260163e-06,
"loss": 0.7538,
"step": 64
},
{
"epoch": 0.07920389926888709,
"grad_norm": 0.629839488738847,
"learning_rate": 2.6422764227642278e-06,
"loss": 0.7478,
"step": 65
},
{
"epoch": 0.08042242079610074,
"grad_norm": 0.5843721125393191,
"learning_rate": 2.682926829268293e-06,
"loss": 0.7551,
"step": 66
},
{
"epoch": 0.08164094232331438,
"grad_norm": 0.6807297637633912,
"learning_rate": 2.723577235772358e-06,
"loss": 0.7643,
"step": 67
},
{
"epoch": 0.08285946385052803,
"grad_norm": 0.5337293638802343,
"learning_rate": 2.764227642276423e-06,
"loss": 0.7581,
"step": 68
},
{
"epoch": 0.08407798537774168,
"grad_norm": 0.5557859370743894,
"learning_rate": 2.8048780487804884e-06,
"loss": 0.7485,
"step": 69
},
{
"epoch": 0.08529650690495533,
"grad_norm": 0.5518435051656209,
"learning_rate": 2.845528455284553e-06,
"loss": 0.7503,
"step": 70
},
{
"epoch": 0.08651502843216897,
"grad_norm": 0.5585475097227505,
"learning_rate": 2.8861788617886185e-06,
"loss": 0.7456,
"step": 71
},
{
"epoch": 0.08773354995938261,
"grad_norm": 0.5842062776799712,
"learning_rate": 2.926829268292683e-06,
"loss": 0.7457,
"step": 72
},
{
"epoch": 0.08895207148659626,
"grad_norm": 0.5691413037940362,
"learning_rate": 2.967479674796748e-06,
"loss": 0.7316,
"step": 73
},
{
"epoch": 0.0901705930138099,
"grad_norm": 0.592000953180703,
"learning_rate": 3.0081300813008134e-06,
"loss": 0.7379,
"step": 74
},
{
"epoch": 0.09138911454102355,
"grad_norm": 0.4986095202064355,
"learning_rate": 3.0487804878048782e-06,
"loss": 0.7026,
"step": 75
},
{
"epoch": 0.0926076360682372,
"grad_norm": 0.5610598503101445,
"learning_rate": 3.0894308943089435e-06,
"loss": 0.7366,
"step": 76
},
{
"epoch": 0.09382615759545085,
"grad_norm": 0.5189286785140359,
"learning_rate": 3.1300813008130083e-06,
"loss": 0.7343,
"step": 77
},
{
"epoch": 0.0950446791226645,
"grad_norm": 0.5352050364602269,
"learning_rate": 3.1707317073170736e-06,
"loss": 0.7128,
"step": 78
},
{
"epoch": 0.09626320064987814,
"grad_norm": 0.589544520130887,
"learning_rate": 3.211382113821139e-06,
"loss": 0.7377,
"step": 79
},
{
"epoch": 0.09748172217709179,
"grad_norm": 0.5170292516124821,
"learning_rate": 3.2520325203252037e-06,
"loss": 0.751,
"step": 80
},
{
"epoch": 0.09870024370430544,
"grad_norm": 0.5178115988752247,
"learning_rate": 3.292682926829269e-06,
"loss": 0.7263,
"step": 81
},
{
"epoch": 0.09991876523151909,
"grad_norm": 0.5758324455305359,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.7204,
"step": 82
},
{
"epoch": 0.10113728675873274,
"grad_norm": 0.5191922407454059,
"learning_rate": 3.3739837398373986e-06,
"loss": 0.7323,
"step": 83
},
{
"epoch": 0.10235580828594638,
"grad_norm": 0.5706404216543195,
"learning_rate": 3.414634146341464e-06,
"loss": 0.7343,
"step": 84
},
{
"epoch": 0.10357432981316003,
"grad_norm": 0.5166974408338545,
"learning_rate": 3.4552845528455287e-06,
"loss": 0.7347,
"step": 85
},
{
"epoch": 0.10479285134037368,
"grad_norm": 0.575076347441057,
"learning_rate": 3.495934959349594e-06,
"loss": 0.729,
"step": 86
},
{
"epoch": 0.10601137286758733,
"grad_norm": 0.5503219216241421,
"learning_rate": 3.5365853658536588e-06,
"loss": 0.7247,
"step": 87
},
{
"epoch": 0.10722989439480098,
"grad_norm": 0.5315644262103328,
"learning_rate": 3.577235772357724e-06,
"loss": 0.7188,
"step": 88
},
{
"epoch": 0.10844841592201462,
"grad_norm": 0.5283688627559194,
"learning_rate": 3.6178861788617893e-06,
"loss": 0.7328,
"step": 89
},
{
"epoch": 0.10966693744922827,
"grad_norm": 0.5657078936164937,
"learning_rate": 3.6585365853658537e-06,
"loss": 0.7317,
"step": 90
},
{
"epoch": 0.11088545897644192,
"grad_norm": 0.5285271136308272,
"learning_rate": 3.699186991869919e-06,
"loss": 0.7259,
"step": 91
},
{
"epoch": 0.11210398050365557,
"grad_norm": 0.5665339591374581,
"learning_rate": 3.7398373983739838e-06,
"loss": 0.71,
"step": 92
},
{
"epoch": 0.11332250203086922,
"grad_norm": 0.5408789367861271,
"learning_rate": 3.780487804878049e-06,
"loss": 0.7287,
"step": 93
},
{
"epoch": 0.11454102355808286,
"grad_norm": 0.530024765222071,
"learning_rate": 3.821138211382115e-06,
"loss": 0.7158,
"step": 94
},
{
"epoch": 0.11575954508529651,
"grad_norm": 0.525972820727265,
"learning_rate": 3.861788617886179e-06,
"loss": 0.6953,
"step": 95
},
{
"epoch": 0.11697806661251016,
"grad_norm": 0.5538892198758983,
"learning_rate": 3.902439024390244e-06,
"loss": 0.7213,
"step": 96
},
{
"epoch": 0.11819658813972381,
"grad_norm": 0.544929946076996,
"learning_rate": 3.943089430894309e-06,
"loss": 0.6996,
"step": 97
},
{
"epoch": 0.11941510966693746,
"grad_norm": 0.5734775021447369,
"learning_rate": 3.983739837398374e-06,
"loss": 0.7154,
"step": 98
},
{
"epoch": 0.1206336311941511,
"grad_norm": 0.5045397032963369,
"learning_rate": 4.024390243902439e-06,
"loss": 0.7024,
"step": 99
},
{
"epoch": 0.12185215272136475,
"grad_norm": 0.5236114609222794,
"learning_rate": 4.0650406504065046e-06,
"loss": 0.7226,
"step": 100
},
{
"epoch": 0.12307067424857839,
"grad_norm": 0.5641036987903533,
"learning_rate": 4.10569105691057e-06,
"loss": 0.7054,
"step": 101
},
{
"epoch": 0.12428919577579203,
"grad_norm": 0.508465869676476,
"learning_rate": 4.146341463414634e-06,
"loss": 0.7003,
"step": 102
},
{
"epoch": 0.1255077173030057,
"grad_norm": 0.5202630797257376,
"learning_rate": 4.1869918699186995e-06,
"loss": 0.7204,
"step": 103
},
{
"epoch": 0.12672623883021933,
"grad_norm": 0.5552933325377176,
"learning_rate": 4.227642276422765e-06,
"loss": 0.7279,
"step": 104
},
{
"epoch": 0.127944760357433,
"grad_norm": 0.5416012915563714,
"learning_rate": 4.268292682926829e-06,
"loss": 0.7093,
"step": 105
},
{
"epoch": 0.12916328188464662,
"grad_norm": 0.5104004921064896,
"learning_rate": 4.308943089430894e-06,
"loss": 0.7013,
"step": 106
},
{
"epoch": 0.1303818034118603,
"grad_norm": 0.5765782104977603,
"learning_rate": 4.34959349593496e-06,
"loss": 0.7045,
"step": 107
},
{
"epoch": 0.13160032493907392,
"grad_norm": 0.5017287673543831,
"learning_rate": 4.390243902439025e-06,
"loss": 0.6997,
"step": 108
},
{
"epoch": 0.13281884646628758,
"grad_norm": 0.48808935518722807,
"learning_rate": 4.43089430894309e-06,
"loss": 0.7184,
"step": 109
},
{
"epoch": 0.13403736799350122,
"grad_norm": 0.531216252027469,
"learning_rate": 4.471544715447155e-06,
"loss": 0.7069,
"step": 110
},
{
"epoch": 0.13525588952071488,
"grad_norm": 0.5149543102282697,
"learning_rate": 4.51219512195122e-06,
"loss": 0.7023,
"step": 111
},
{
"epoch": 0.1364744110479285,
"grad_norm": 0.5291257352871406,
"learning_rate": 4.552845528455285e-06,
"loss": 0.7185,
"step": 112
},
{
"epoch": 0.13769293257514217,
"grad_norm": 0.47858897961333036,
"learning_rate": 4.59349593495935e-06,
"loss": 0.7149,
"step": 113
},
{
"epoch": 0.1389114541023558,
"grad_norm": 0.5359903721383661,
"learning_rate": 4.634146341463416e-06,
"loss": 0.7118,
"step": 114
},
{
"epoch": 0.14012997562956944,
"grad_norm": 0.5023325811416011,
"learning_rate": 4.67479674796748e-06,
"loss": 0.6986,
"step": 115
},
{
"epoch": 0.1413484971567831,
"grad_norm": 0.507102678949565,
"learning_rate": 4.715447154471545e-06,
"loss": 0.7053,
"step": 116
},
{
"epoch": 0.14256701868399674,
"grad_norm": 0.5162377887307996,
"learning_rate": 4.75609756097561e-06,
"loss": 0.6971,
"step": 117
},
{
"epoch": 0.1437855402112104,
"grad_norm": 0.5228780554370066,
"learning_rate": 4.796747967479675e-06,
"loss": 0.6915,
"step": 118
},
{
"epoch": 0.14500406173842403,
"grad_norm": 0.5539538888660016,
"learning_rate": 4.83739837398374e-06,
"loss": 0.6979,
"step": 119
},
{
"epoch": 0.1462225832656377,
"grad_norm": 0.6135340785022244,
"learning_rate": 4.8780487804878055e-06,
"loss": 0.7197,
"step": 120
},
{
"epoch": 0.14744110479285133,
"grad_norm": 0.5261935119823417,
"learning_rate": 4.918699186991871e-06,
"loss": 0.6957,
"step": 121
},
{
"epoch": 0.148659626320065,
"grad_norm": 0.5941876044718514,
"learning_rate": 4.959349593495935e-06,
"loss": 0.7031,
"step": 122
},
{
"epoch": 0.14987814784727863,
"grad_norm": 0.5436866255986976,
"learning_rate": 5e-06,
"loss": 0.7068,
"step": 123
},
{
"epoch": 0.1510966693744923,
"grad_norm": 0.5295736343510782,
"learning_rate": 5.040650406504065e-06,
"loss": 0.686,
"step": 124
},
{
"epoch": 0.15231519090170592,
"grad_norm": 0.5536691790810129,
"learning_rate": 5.081300813008131e-06,
"loss": 0.681,
"step": 125
},
{
"epoch": 0.15353371242891958,
"grad_norm": 0.6057295035493935,
"learning_rate": 5.121951219512195e-06,
"loss": 0.7195,
"step": 126
},
{
"epoch": 0.15475223395613322,
"grad_norm": 0.49006287650569935,
"learning_rate": 5.162601626016261e-06,
"loss": 0.6846,
"step": 127
},
{
"epoch": 0.15597075548334688,
"grad_norm": 0.5312531193234717,
"learning_rate": 5.203252032520326e-06,
"loss": 0.7038,
"step": 128
},
{
"epoch": 0.1571892770105605,
"grad_norm": 0.5581018411430491,
"learning_rate": 5.243902439024391e-06,
"loss": 0.7184,
"step": 129
},
{
"epoch": 0.15840779853777417,
"grad_norm": 0.5133234429893759,
"learning_rate": 5.2845528455284555e-06,
"loss": 0.6792,
"step": 130
},
{
"epoch": 0.1596263200649878,
"grad_norm": 0.5191238918744202,
"learning_rate": 5.32520325203252e-06,
"loss": 0.6921,
"step": 131
},
{
"epoch": 0.16084484159220147,
"grad_norm": 0.6134860482308477,
"learning_rate": 5.365853658536586e-06,
"loss": 0.6764,
"step": 132
},
{
"epoch": 0.1620633631194151,
"grad_norm": 0.6271296104201189,
"learning_rate": 5.4065040650406504e-06,
"loss": 0.6829,
"step": 133
},
{
"epoch": 0.16328188464662877,
"grad_norm": 0.6385973016730124,
"learning_rate": 5.447154471544716e-06,
"loss": 0.6957,
"step": 134
},
{
"epoch": 0.1645004061738424,
"grad_norm": 0.5686127235373731,
"learning_rate": 5.487804878048781e-06,
"loss": 0.698,
"step": 135
},
{
"epoch": 0.16571892770105606,
"grad_norm": 0.6833156687934043,
"learning_rate": 5.528455284552846e-06,
"loss": 0.6851,
"step": 136
},
{
"epoch": 0.1669374492282697,
"grad_norm": 0.700308205780514,
"learning_rate": 5.569105691056911e-06,
"loss": 0.6806,
"step": 137
},
{
"epoch": 0.16815597075548336,
"grad_norm": 0.5462617932277235,
"learning_rate": 5.609756097560977e-06,
"loss": 0.7091,
"step": 138
},
{
"epoch": 0.169374492282697,
"grad_norm": 0.7852807393490868,
"learning_rate": 5.650406504065041e-06,
"loss": 0.7123,
"step": 139
},
{
"epoch": 0.17059301380991065,
"grad_norm": 0.6057177569541422,
"learning_rate": 5.691056910569106e-06,
"loss": 0.7078,
"step": 140
},
{
"epoch": 0.1718115353371243,
"grad_norm": 0.6623630085315475,
"learning_rate": 5.731707317073171e-06,
"loss": 0.6685,
"step": 141
},
{
"epoch": 0.17303005686433795,
"grad_norm": 0.6456913396682378,
"learning_rate": 5.772357723577237e-06,
"loss": 0.7076,
"step": 142
},
{
"epoch": 0.17424857839155158,
"grad_norm": 0.5490338737139909,
"learning_rate": 5.813008130081301e-06,
"loss": 0.6679,
"step": 143
},
{
"epoch": 0.17546709991876522,
"grad_norm": 0.6684035267057585,
"learning_rate": 5.853658536585366e-06,
"loss": 0.6862,
"step": 144
},
{
"epoch": 0.17668562144597888,
"grad_norm": 0.5217072648450134,
"learning_rate": 5.894308943089432e-06,
"loss": 0.6846,
"step": 145
},
{
"epoch": 0.17790414297319251,
"grad_norm": 0.5999701195487821,
"learning_rate": 5.934959349593496e-06,
"loss": 0.6971,
"step": 146
},
{
"epoch": 0.17912266450040618,
"grad_norm": 0.6672016415742844,
"learning_rate": 5.9756097560975615e-06,
"loss": 0.7023,
"step": 147
},
{
"epoch": 0.1803411860276198,
"grad_norm": 0.5638253505945849,
"learning_rate": 6.016260162601627e-06,
"loss": 0.6718,
"step": 148
},
{
"epoch": 0.18155970755483347,
"grad_norm": 0.5443164616899534,
"learning_rate": 6.056910569105692e-06,
"loss": 0.678,
"step": 149
},
{
"epoch": 0.1827782290820471,
"grad_norm": 0.5515636708718161,
"learning_rate": 6.0975609756097564e-06,
"loss": 0.6914,
"step": 150
},
{
"epoch": 0.18399675060926077,
"grad_norm": 0.6385969983161707,
"learning_rate": 6.138211382113821e-06,
"loss": 0.6796,
"step": 151
},
{
"epoch": 0.1852152721364744,
"grad_norm": 0.6113406592810082,
"learning_rate": 6.178861788617887e-06,
"loss": 0.682,
"step": 152
},
{
"epoch": 0.18643379366368806,
"grad_norm": 0.6906350808865743,
"learning_rate": 6.219512195121951e-06,
"loss": 0.6671,
"step": 153
},
{
"epoch": 0.1876523151909017,
"grad_norm": 0.7020113339328089,
"learning_rate": 6.260162601626017e-06,
"loss": 0.6835,
"step": 154
},
{
"epoch": 0.18887083671811536,
"grad_norm": 0.5548828056807938,
"learning_rate": 6.300813008130082e-06,
"loss": 0.6809,
"step": 155
},
{
"epoch": 0.190089358245329,
"grad_norm": 0.8352572415357199,
"learning_rate": 6.341463414634147e-06,
"loss": 0.6809,
"step": 156
},
{
"epoch": 0.19130787977254265,
"grad_norm": 0.6517742914384106,
"learning_rate": 6.3821138211382115e-06,
"loss": 0.6791,
"step": 157
},
{
"epoch": 0.1925264012997563,
"grad_norm": 0.6204344146843959,
"learning_rate": 6.422764227642278e-06,
"loss": 0.6766,
"step": 158
},
{
"epoch": 0.19374492282696995,
"grad_norm": 0.8219899409754744,
"learning_rate": 6.463414634146342e-06,
"loss": 0.6726,
"step": 159
},
{
"epoch": 0.19496344435418358,
"grad_norm": 0.6541183549209502,
"learning_rate": 6.504065040650407e-06,
"loss": 0.6781,
"step": 160
},
{
"epoch": 0.19618196588139725,
"grad_norm": 0.566310879262149,
"learning_rate": 6.544715447154472e-06,
"loss": 0.6702,
"step": 161
},
{
"epoch": 0.19740048740861088,
"grad_norm": 0.775755089339994,
"learning_rate": 6.585365853658538e-06,
"loss": 0.6905,
"step": 162
},
{
"epoch": 0.19861900893582454,
"grad_norm": 0.6288678821845954,
"learning_rate": 6.626016260162602e-06,
"loss": 0.6881,
"step": 163
},
{
"epoch": 0.19983753046303818,
"grad_norm": 0.7640676261377178,
"learning_rate": 6.666666666666667e-06,
"loss": 0.6737,
"step": 164
},
{
"epoch": 0.20105605199025184,
"grad_norm": 0.5731637259066372,
"learning_rate": 6.707317073170733e-06,
"loss": 0.674,
"step": 165
},
{
"epoch": 0.20227457351746547,
"grad_norm": 0.7761516718399211,
"learning_rate": 6.747967479674797e-06,
"loss": 0.6928,
"step": 166
},
{
"epoch": 0.20349309504467913,
"grad_norm": 0.7100095841961804,
"learning_rate": 6.788617886178862e-06,
"loss": 0.6727,
"step": 167
},
{
"epoch": 0.20471161657189277,
"grad_norm": 0.569930635478734,
"learning_rate": 6.829268292682928e-06,
"loss": 0.689,
"step": 168
},
{
"epoch": 0.20593013809910643,
"grad_norm": 0.7691268212355195,
"learning_rate": 6.869918699186993e-06,
"loss": 0.6973,
"step": 169
},
{
"epoch": 0.20714865962632006,
"grad_norm": 0.560097362553805,
"learning_rate": 6.910569105691057e-06,
"loss": 0.6681,
"step": 170
},
{
"epoch": 0.20836718115353373,
"grad_norm": 0.6849837037178143,
"learning_rate": 6.951219512195122e-06,
"loss": 0.6592,
"step": 171
},
{
"epoch": 0.20958570268074736,
"grad_norm": 0.7951681541297303,
"learning_rate": 6.991869918699188e-06,
"loss": 0.6812,
"step": 172
},
{
"epoch": 0.210804224207961,
"grad_norm": 0.5428585266109707,
"learning_rate": 7.032520325203252e-06,
"loss": 0.696,
"step": 173
},
{
"epoch": 0.21202274573517466,
"grad_norm": 0.7462142080092842,
"learning_rate": 7.0731707317073175e-06,
"loss": 0.6793,
"step": 174
},
{
"epoch": 0.2132412672623883,
"grad_norm": 0.6370138105851062,
"learning_rate": 7.113821138211383e-06,
"loss": 0.6717,
"step": 175
},
{
"epoch": 0.21445978878960195,
"grad_norm": 0.566025113423941,
"learning_rate": 7.154471544715448e-06,
"loss": 0.6694,
"step": 176
},
{
"epoch": 0.21567831031681559,
"grad_norm": 0.6632467338949928,
"learning_rate": 7.1951219512195125e-06,
"loss": 0.679,
"step": 177
},
{
"epoch": 0.21689683184402925,
"grad_norm": 0.5775437329049822,
"learning_rate": 7.2357723577235786e-06,
"loss": 0.6738,
"step": 178
},
{
"epoch": 0.21811535337124288,
"grad_norm": 0.6763254821774859,
"learning_rate": 7.276422764227643e-06,
"loss": 0.6885,
"step": 179
},
{
"epoch": 0.21933387489845654,
"grad_norm": 0.6525555364778458,
"learning_rate": 7.317073170731707e-06,
"loss": 0.6829,
"step": 180
},
{
"epoch": 0.22055239642567018,
"grad_norm": 0.6376488223620492,
"learning_rate": 7.357723577235773e-06,
"loss": 0.6611,
"step": 181
},
{
"epoch": 0.22177091795288384,
"grad_norm": 0.6135443136132807,
"learning_rate": 7.398373983739838e-06,
"loss": 0.6875,
"step": 182
},
{
"epoch": 0.22298943948009747,
"grad_norm": 0.6616707267536054,
"learning_rate": 7.439024390243903e-06,
"loss": 0.6637,
"step": 183
},
{
"epoch": 0.22420796100731114,
"grad_norm": 0.6601543949811752,
"learning_rate": 7.4796747967479676e-06,
"loss": 0.6714,
"step": 184
},
{
"epoch": 0.22542648253452477,
"grad_norm": 0.689531862633905,
"learning_rate": 7.520325203252034e-06,
"loss": 0.6717,
"step": 185
},
{
"epoch": 0.22664500406173843,
"grad_norm": 0.6693067594219624,
"learning_rate": 7.560975609756098e-06,
"loss": 0.6538,
"step": 186
},
{
"epoch": 0.22786352558895206,
"grad_norm": 0.6877489613732909,
"learning_rate": 7.601626016260163e-06,
"loss": 0.6791,
"step": 187
},
{
"epoch": 0.22908204711616573,
"grad_norm": 0.6102935004924294,
"learning_rate": 7.64227642276423e-06,
"loss": 0.6697,
"step": 188
},
{
"epoch": 0.23030056864337936,
"grad_norm": 0.7109056322063843,
"learning_rate": 7.682926829268293e-06,
"loss": 0.679,
"step": 189
},
{
"epoch": 0.23151909017059302,
"grad_norm": 0.6183616410187914,
"learning_rate": 7.723577235772358e-06,
"loss": 0.662,
"step": 190
},
{
"epoch": 0.23273761169780666,
"grad_norm": 0.6117992409555106,
"learning_rate": 7.764227642276424e-06,
"loss": 0.6671,
"step": 191
},
{
"epoch": 0.23395613322502032,
"grad_norm": 0.7288006220266883,
"learning_rate": 7.804878048780489e-06,
"loss": 0.7049,
"step": 192
},
{
"epoch": 0.23517465475223395,
"grad_norm": 0.6795369812377434,
"learning_rate": 7.845528455284554e-06,
"loss": 0.6638,
"step": 193
},
{
"epoch": 0.23639317627944761,
"grad_norm": 0.6336366896960686,
"learning_rate": 7.886178861788618e-06,
"loss": 0.6738,
"step": 194
},
{
"epoch": 0.23761169780666125,
"grad_norm": 0.7184491761674651,
"learning_rate": 7.926829268292685e-06,
"loss": 0.6628,
"step": 195
},
{
"epoch": 0.2388302193338749,
"grad_norm": 0.659177288266525,
"learning_rate": 7.967479674796748e-06,
"loss": 0.6805,
"step": 196
},
{
"epoch": 0.24004874086108854,
"grad_norm": 0.578465157473097,
"learning_rate": 8.008130081300813e-06,
"loss": 0.6894,
"step": 197
},
{
"epoch": 0.2412672623883022,
"grad_norm": 0.6868825593189901,
"learning_rate": 8.048780487804879e-06,
"loss": 0.6847,
"step": 198
},
{
"epoch": 0.24248578391551584,
"grad_norm": 0.6185564483778565,
"learning_rate": 8.089430894308944e-06,
"loss": 0.6667,
"step": 199
},
{
"epoch": 0.2437043054427295,
"grad_norm": 0.7195682220690447,
"learning_rate": 8.130081300813009e-06,
"loss": 0.6681,
"step": 200
},
{
"epoch": 0.24492282696994314,
"grad_norm": 0.5988887592571761,
"learning_rate": 8.170731707317073e-06,
"loss": 0.663,
"step": 201
},
{
"epoch": 0.24614134849715677,
"grad_norm": 0.6728160874756142,
"learning_rate": 8.21138211382114e-06,
"loss": 0.6688,
"step": 202
},
{
"epoch": 0.24735987002437043,
"grad_norm": 0.6167676512934452,
"learning_rate": 8.252032520325203e-06,
"loss": 0.6597,
"step": 203
},
{
"epoch": 0.24857839155158407,
"grad_norm": 0.6963420894322225,
"learning_rate": 8.292682926829268e-06,
"loss": 0.6739,
"step": 204
},
{
"epoch": 0.24979691307879773,
"grad_norm": 0.6374482813383724,
"learning_rate": 8.333333333333334e-06,
"loss": 0.6782,
"step": 205
},
{
"epoch": 0.2510154346060114,
"grad_norm": 0.7061607202293944,
"learning_rate": 8.373983739837399e-06,
"loss": 0.6784,
"step": 206
},
{
"epoch": 0.252233956133225,
"grad_norm": 0.6588145062673179,
"learning_rate": 8.414634146341464e-06,
"loss": 0.6785,
"step": 207
},
{
"epoch": 0.25345247766043866,
"grad_norm": 0.5575233234513415,
"learning_rate": 8.45528455284553e-06,
"loss": 0.659,
"step": 208
},
{
"epoch": 0.2546709991876523,
"grad_norm": 0.660881658687861,
"learning_rate": 8.495934959349595e-06,
"loss": 0.672,
"step": 209
},
{
"epoch": 0.255889520714866,
"grad_norm": 0.8528802704630485,
"learning_rate": 8.536585365853658e-06,
"loss": 0.6758,
"step": 210
},
{
"epoch": 0.2571080422420796,
"grad_norm": 0.5423728405429434,
"learning_rate": 8.577235772357724e-06,
"loss": 0.6526,
"step": 211
},
{
"epoch": 0.25832656376929325,
"grad_norm": 0.7828577081027186,
"learning_rate": 8.617886178861789e-06,
"loss": 0.6619,
"step": 212
},
{
"epoch": 0.2595450852965069,
"grad_norm": 0.683409796151077,
"learning_rate": 8.658536585365854e-06,
"loss": 0.6609,
"step": 213
},
{
"epoch": 0.2607636068237206,
"grad_norm": 0.7965663856791516,
"learning_rate": 8.69918699186992e-06,
"loss": 0.6771,
"step": 214
},
{
"epoch": 0.2619821283509342,
"grad_norm": 0.5296355193224814,
"learning_rate": 8.739837398373985e-06,
"loss": 0.6631,
"step": 215
},
{
"epoch": 0.26320064987814784,
"grad_norm": 0.6996343517682662,
"learning_rate": 8.78048780487805e-06,
"loss": 0.6778,
"step": 216
},
{
"epoch": 0.2644191714053615,
"grad_norm": 0.5685534992796657,
"learning_rate": 8.821138211382113e-06,
"loss": 0.6542,
"step": 217
},
{
"epoch": 0.26563769293257516,
"grad_norm": 0.5812188798996168,
"learning_rate": 8.86178861788618e-06,
"loss": 0.6665,
"step": 218
},
{
"epoch": 0.2668562144597888,
"grad_norm": 0.5872098802829594,
"learning_rate": 8.902439024390244e-06,
"loss": 0.6793,
"step": 219
},
{
"epoch": 0.26807473598700243,
"grad_norm": 0.6138295440628797,
"learning_rate": 8.94308943089431e-06,
"loss": 0.6845,
"step": 220
},
{
"epoch": 0.26929325751421607,
"grad_norm": 0.5555943793492026,
"learning_rate": 8.983739837398374e-06,
"loss": 0.6456,
"step": 221
},
{
"epoch": 0.27051177904142976,
"grad_norm": 0.5716563368438368,
"learning_rate": 9.02439024390244e-06,
"loss": 0.6881,
"step": 222
},
{
"epoch": 0.2717303005686434,
"grad_norm": 0.5657678117161605,
"learning_rate": 9.065040650406505e-06,
"loss": 0.6721,
"step": 223
},
{
"epoch": 0.272948822095857,
"grad_norm": 0.6494717196083141,
"learning_rate": 9.10569105691057e-06,
"loss": 0.6856,
"step": 224
},
{
"epoch": 0.27416734362307066,
"grad_norm": 0.5769888402008441,
"learning_rate": 9.146341463414635e-06,
"loss": 0.65,
"step": 225
},
{
"epoch": 0.27538586515028435,
"grad_norm": 0.6372976291357912,
"learning_rate": 9.1869918699187e-06,
"loss": 0.679,
"step": 226
},
{
"epoch": 0.276604386677498,
"grad_norm": 0.61020413240267,
"learning_rate": 9.227642276422764e-06,
"loss": 0.6462,
"step": 227
},
{
"epoch": 0.2778229082047116,
"grad_norm": 0.7347279477946881,
"learning_rate": 9.268292682926831e-06,
"loss": 0.6502,
"step": 228
},
{
"epoch": 0.27904142973192525,
"grad_norm": 0.6513241840116742,
"learning_rate": 9.308943089430895e-06,
"loss": 0.6707,
"step": 229
},
{
"epoch": 0.2802599512591389,
"grad_norm": 0.6578226137183896,
"learning_rate": 9.34959349593496e-06,
"loss": 0.6559,
"step": 230
},
{
"epoch": 0.2814784727863526,
"grad_norm": 0.6443126179924461,
"learning_rate": 9.390243902439025e-06,
"loss": 0.6815,
"step": 231
},
{
"epoch": 0.2826969943135662,
"grad_norm": 0.5681908489104979,
"learning_rate": 9.43089430894309e-06,
"loss": 0.6483,
"step": 232
},
{
"epoch": 0.28391551584077984,
"grad_norm": 0.638868530973396,
"learning_rate": 9.471544715447156e-06,
"loss": 0.6663,
"step": 233
},
{
"epoch": 0.2851340373679935,
"grad_norm": 0.5345735736702238,
"learning_rate": 9.51219512195122e-06,
"loss": 0.6507,
"step": 234
},
{
"epoch": 0.28635255889520717,
"grad_norm": 0.6170557049684545,
"learning_rate": 9.552845528455286e-06,
"loss": 0.6533,
"step": 235
},
{
"epoch": 0.2875710804224208,
"grad_norm": 0.6282001318911594,
"learning_rate": 9.59349593495935e-06,
"loss": 0.6715,
"step": 236
},
{
"epoch": 0.28878960194963443,
"grad_norm": 0.548783110101442,
"learning_rate": 9.634146341463415e-06,
"loss": 0.6536,
"step": 237
},
{
"epoch": 0.29000812347684807,
"grad_norm": 0.6300302160047813,
"learning_rate": 9.67479674796748e-06,
"loss": 0.657,
"step": 238
},
{
"epoch": 0.29122664500406176,
"grad_norm": 0.5955216072274768,
"learning_rate": 9.715447154471546e-06,
"loss": 0.6767,
"step": 239
},
{
"epoch": 0.2924451665312754,
"grad_norm": 0.6216921714562351,
"learning_rate": 9.756097560975611e-06,
"loss": 0.6492,
"step": 240
},
{
"epoch": 0.293663688058489,
"grad_norm": 0.6909539613975563,
"learning_rate": 9.796747967479675e-06,
"loss": 0.6618,
"step": 241
},
{
"epoch": 0.29488220958570266,
"grad_norm": 0.8137292747107515,
"learning_rate": 9.837398373983741e-06,
"loss": 0.6614,
"step": 242
},
{
"epoch": 0.29610073111291635,
"grad_norm": 0.5855911517789665,
"learning_rate": 9.878048780487805e-06,
"loss": 0.6561,
"step": 243
},
{
"epoch": 0.29731925264013,
"grad_norm": 0.8851136874577217,
"learning_rate": 9.91869918699187e-06,
"loss": 0.6498,
"step": 244
},
{
"epoch": 0.2985377741673436,
"grad_norm": 0.57227502230073,
"learning_rate": 9.959349593495936e-06,
"loss": 0.6606,
"step": 245
},
{
"epoch": 0.29975629569455725,
"grad_norm": 0.9576157821693805,
"learning_rate": 1e-05,
"loss": 0.648,
"step": 246
},
{
"epoch": 0.30097481722177094,
"grad_norm": 0.574426873878406,
"learning_rate": 9.999994966333388e-06,
"loss": 0.6543,
"step": 247
},
{
"epoch": 0.3021933387489846,
"grad_norm": 0.7230465083023617,
"learning_rate": 9.99997986534369e-06,
"loss": 0.6654,
"step": 248
},
{
"epoch": 0.3034118602761982,
"grad_norm": 0.5421626680587527,
"learning_rate": 9.999954697061305e-06,
"loss": 0.6343,
"step": 249
},
{
"epoch": 0.30463038180341184,
"grad_norm": 0.6129301937842085,
"learning_rate": 9.999919461536915e-06,
"loss": 0.6449,
"step": 250
},
{
"epoch": 0.30584890333062553,
"grad_norm": 0.563497786259594,
"learning_rate": 9.999874158841462e-06,
"loss": 0.66,
"step": 251
},
{
"epoch": 0.30706742485783917,
"grad_norm": 0.6709530297921161,
"learning_rate": 9.999818789066164e-06,
"loss": 0.6575,
"step": 252
},
{
"epoch": 0.3082859463850528,
"grad_norm": 0.6033112191541231,
"learning_rate": 9.999753352322502e-06,
"loss": 0.6745,
"step": 253
},
{
"epoch": 0.30950446791226643,
"grad_norm": 0.7085418197042371,
"learning_rate": 9.999677848742238e-06,
"loss": 0.645,
"step": 254
},
{
"epoch": 0.3107229894394801,
"grad_norm": 0.6149439429340515,
"learning_rate": 9.999592278477389e-06,
"loss": 0.6553,
"step": 255
},
{
"epoch": 0.31194151096669376,
"grad_norm": 0.5361824485289747,
"learning_rate": 9.999496641700249e-06,
"loss": 0.6394,
"step": 256
},
{
"epoch": 0.3131600324939074,
"grad_norm": 0.7876266919973667,
"learning_rate": 9.99939093860338e-06,
"loss": 0.651,
"step": 257
},
{
"epoch": 0.314378554021121,
"grad_norm": 0.5240336550865616,
"learning_rate": 9.999275169399614e-06,
"loss": 0.6445,
"step": 258
},
{
"epoch": 0.31559707554833466,
"grad_norm": 0.9003012478867778,
"learning_rate": 9.999149334322047e-06,
"loss": 0.6759,
"step": 259
},
{
"epoch": 0.31681559707554835,
"grad_norm": 0.520552428762164,
"learning_rate": 9.999013433624042e-06,
"loss": 0.6656,
"step": 260
},
{
"epoch": 0.318034118602762,
"grad_norm": 0.8451285058918907,
"learning_rate": 9.998867467579234e-06,
"loss": 0.6393,
"step": 261
},
{
"epoch": 0.3192526401299756,
"grad_norm": 0.6368634173244008,
"learning_rate": 9.998711436481519e-06,
"loss": 0.6544,
"step": 262
},
{
"epoch": 0.32047116165718925,
"grad_norm": 0.690099709138949,
"learning_rate": 9.998545340645058e-06,
"loss": 0.6609,
"step": 263
},
{
"epoch": 0.32168968318440294,
"grad_norm": 0.7144861500132949,
"learning_rate": 9.998369180404283e-06,
"loss": 0.6647,
"step": 264
},
{
"epoch": 0.3229082047116166,
"grad_norm": 0.6362319514002672,
"learning_rate": 9.998182956113885e-06,
"loss": 0.6533,
"step": 265
},
{
"epoch": 0.3241267262388302,
"grad_norm": 0.6488964510495924,
"learning_rate": 9.99798666814882e-06,
"loss": 0.6504,
"step": 266
},
{
"epoch": 0.32534524776604384,
"grad_norm": 0.6063198470537309,
"learning_rate": 9.99778031690431e-06,
"loss": 0.6563,
"step": 267
},
{
"epoch": 0.32656376929325753,
"grad_norm": 0.5938533025522102,
"learning_rate": 9.997563902795834e-06,
"loss": 0.6675,
"step": 268
},
{
"epoch": 0.32778229082047117,
"grad_norm": 0.7515871090930308,
"learning_rate": 9.997337426259134e-06,
"loss": 0.6792,
"step": 269
},
{
"epoch": 0.3290008123476848,
"grad_norm": 0.703279934707329,
"learning_rate": 9.997100887750215e-06,
"loss": 0.6635,
"step": 270
},
{
"epoch": 0.33021933387489844,
"grad_norm": 0.695544945955001,
"learning_rate": 9.996854287745337e-06,
"loss": 0.645,
"step": 271
},
{
"epoch": 0.3314378554021121,
"grad_norm": 0.7462833994362996,
"learning_rate": 9.996597626741023e-06,
"loss": 0.6478,
"step": 272
},
{
"epoch": 0.33265637692932576,
"grad_norm": 0.6876699055946316,
"learning_rate": 9.99633090525405e-06,
"loss": 0.6495,
"step": 273
},
{
"epoch": 0.3338748984565394,
"grad_norm": 0.6161949269900944,
"learning_rate": 9.996054123821455e-06,
"loss": 0.6477,
"step": 274
},
{
"epoch": 0.335093419983753,
"grad_norm": 0.6992818714334844,
"learning_rate": 9.995767283000526e-06,
"loss": 0.6471,
"step": 275
},
{
"epoch": 0.3363119415109667,
"grad_norm": 0.6649545633189144,
"learning_rate": 9.995470383368808e-06,
"loss": 0.6526,
"step": 276
},
{
"epoch": 0.33753046303818035,
"grad_norm": 0.7069772548058584,
"learning_rate": 9.995163425524097e-06,
"loss": 0.6622,
"step": 277
},
{
"epoch": 0.338748984565394,
"grad_norm": 0.7343365884623839,
"learning_rate": 9.994846410084447e-06,
"loss": 0.6401,
"step": 278
},
{
"epoch": 0.3399675060926076,
"grad_norm": 0.7666383023534878,
"learning_rate": 9.994519337688152e-06,
"loss": 0.6351,
"step": 279
},
{
"epoch": 0.3411860276198213,
"grad_norm": 0.7101687784996984,
"learning_rate": 9.994182208993766e-06,
"loss": 0.6686,
"step": 280
},
{
"epoch": 0.34240454914703494,
"grad_norm": 0.794098416336116,
"learning_rate": 9.993835024680084e-06,
"loss": 0.6534,
"step": 281
},
{
"epoch": 0.3436230706742486,
"grad_norm": 0.6476191969862704,
"learning_rate": 9.993477785446151e-06,
"loss": 0.6321,
"step": 282
},
{
"epoch": 0.3448415922014622,
"grad_norm": 0.7027462161925977,
"learning_rate": 9.993110492011256e-06,
"loss": 0.6677,
"step": 283
},
{
"epoch": 0.3460601137286759,
"grad_norm": 0.7368948502336647,
"learning_rate": 9.992733145114932e-06,
"loss": 0.6332,
"step": 284
},
{
"epoch": 0.34727863525588953,
"grad_norm": 0.769793462172428,
"learning_rate": 9.992345745516954e-06,
"loss": 0.6627,
"step": 285
},
{
"epoch": 0.34849715678310317,
"grad_norm": 0.6391657112532801,
"learning_rate": 9.99194829399734e-06,
"loss": 0.6364,
"step": 286
},
{
"epoch": 0.3497156783103168,
"grad_norm": 0.8671328129231476,
"learning_rate": 9.991540791356342e-06,
"loss": 0.6558,
"step": 287
},
{
"epoch": 0.35093419983753044,
"grad_norm": 0.6143371180878986,
"learning_rate": 9.991123238414455e-06,
"loss": 0.6725,
"step": 288
},
{
"epoch": 0.3521527213647441,
"grad_norm": 0.7114612477683598,
"learning_rate": 9.99069563601241e-06,
"loss": 0.6386,
"step": 289
},
{
"epoch": 0.35337124289195776,
"grad_norm": 0.5910112375855043,
"learning_rate": 9.990257985011168e-06,
"loss": 0.6648,
"step": 290
},
{
"epoch": 0.3545897644191714,
"grad_norm": 0.6709399619542642,
"learning_rate": 9.989810286291923e-06,
"loss": 0.6641,
"step": 291
},
{
"epoch": 0.35580828594638503,
"grad_norm": 0.5876086675256037,
"learning_rate": 9.989352540756103e-06,
"loss": 0.6519,
"step": 292
},
{
"epoch": 0.3570268074735987,
"grad_norm": 0.4993245470857056,
"learning_rate": 9.988884749325366e-06,
"loss": 0.6409,
"step": 293
},
{
"epoch": 0.35824532900081235,
"grad_norm": 0.6361394412220084,
"learning_rate": 9.988406912941591e-06,
"loss": 0.6543,
"step": 294
},
{
"epoch": 0.359463850528026,
"grad_norm": 0.5972665446098098,
"learning_rate": 9.987919032566885e-06,
"loss": 0.6379,
"step": 295
},
{
"epoch": 0.3606823720552396,
"grad_norm": 0.5332779981117456,
"learning_rate": 9.987421109183581e-06,
"loss": 0.6362,
"step": 296
},
{
"epoch": 0.3619008935824533,
"grad_norm": 0.6057994457076236,
"learning_rate": 9.986913143794232e-06,
"loss": 0.6455,
"step": 297
},
{
"epoch": 0.36311941510966694,
"grad_norm": 0.6075132715056499,
"learning_rate": 9.986395137421607e-06,
"loss": 0.6624,
"step": 298
},
{
"epoch": 0.3643379366368806,
"grad_norm": 0.5258247408109219,
"learning_rate": 9.985867091108697e-06,
"loss": 0.638,
"step": 299
},
{
"epoch": 0.3655564581640942,
"grad_norm": 0.5267906230313797,
"learning_rate": 9.985329005918702e-06,
"loss": 0.6362,
"step": 300
},
{
"epoch": 0.3667749796913079,
"grad_norm": 0.5638416352250496,
"learning_rate": 9.984780882935043e-06,
"loss": 0.6301,
"step": 301
},
{
"epoch": 0.36799350121852153,
"grad_norm": 0.545011579464239,
"learning_rate": 9.984222723261344e-06,
"loss": 0.6599,
"step": 302
},
{
"epoch": 0.36921202274573517,
"grad_norm": 0.5606014722546357,
"learning_rate": 9.983654528021442e-06,
"loss": 0.6542,
"step": 303
},
{
"epoch": 0.3704305442729488,
"grad_norm": 0.6018343388636366,
"learning_rate": 9.98307629835938e-06,
"loss": 0.6368,
"step": 304
},
{
"epoch": 0.3716490658001625,
"grad_norm": 0.6118602452372705,
"learning_rate": 9.982488035439401e-06,
"loss": 0.6513,
"step": 305
},
{
"epoch": 0.3728675873273761,
"grad_norm": 0.6022653337990805,
"learning_rate": 9.981889740445958e-06,
"loss": 0.6496,
"step": 306
},
{
"epoch": 0.37408610885458976,
"grad_norm": 0.569004250440184,
"learning_rate": 9.981281414583693e-06,
"loss": 0.6598,
"step": 307
},
{
"epoch": 0.3753046303818034,
"grad_norm": 0.5713014740165444,
"learning_rate": 9.980663059077453e-06,
"loss": 0.6613,
"step": 308
},
{
"epoch": 0.3765231519090171,
"grad_norm": 0.6154580840564017,
"learning_rate": 9.980034675172274e-06,
"loss": 0.6442,
"step": 309
},
{
"epoch": 0.3777416734362307,
"grad_norm": 0.5917553562402863,
"learning_rate": 9.979396264133388e-06,
"loss": 0.6431,
"step": 310
},
{
"epoch": 0.37896019496344435,
"grad_norm": 0.578864320620872,
"learning_rate": 9.978747827246214e-06,
"loss": 0.6589,
"step": 311
},
{
"epoch": 0.380178716490658,
"grad_norm": 0.6460070122884725,
"learning_rate": 9.978089365816357e-06,
"loss": 0.6267,
"step": 312
},
{
"epoch": 0.3813972380178717,
"grad_norm": 0.6165901634865715,
"learning_rate": 9.977420881169607e-06,
"loss": 0.6357,
"step": 313
},
{
"epoch": 0.3826157595450853,
"grad_norm": 0.6862027434641219,
"learning_rate": 9.976742374651936e-06,
"loss": 0.6607,
"step": 314
},
{
"epoch": 0.38383428107229894,
"grad_norm": 0.6447789605505084,
"learning_rate": 9.976053847629496e-06,
"loss": 0.6464,
"step": 315
},
{
"epoch": 0.3850528025995126,
"grad_norm": 0.597882927094437,
"learning_rate": 9.97535530148861e-06,
"loss": 0.6337,
"step": 316
},
{
"epoch": 0.3862713241267262,
"grad_norm": 0.6296819593414332,
"learning_rate": 9.974646737635781e-06,
"loss": 0.6474,
"step": 317
},
{
"epoch": 0.3874898456539399,
"grad_norm": 0.6313838311506389,
"learning_rate": 9.973928157497675e-06,
"loss": 0.6289,
"step": 318
},
{
"epoch": 0.38870836718115354,
"grad_norm": 0.6255452790127047,
"learning_rate": 9.97319956252113e-06,
"loss": 0.6418,
"step": 319
},
{
"epoch": 0.38992688870836717,
"grad_norm": 0.501125482187719,
"learning_rate": 9.972460954173149e-06,
"loss": 0.6469,
"step": 320
},
{
"epoch": 0.3911454102355808,
"grad_norm": 0.5644277137540713,
"learning_rate": 9.971712333940896e-06,
"loss": 0.6431,
"step": 321
},
{
"epoch": 0.3923639317627945,
"grad_norm": 0.5401625089221826,
"learning_rate": 9.970953703331692e-06,
"loss": 0.6399,
"step": 322
},
{
"epoch": 0.3935824532900081,
"grad_norm": 0.6126970579614653,
"learning_rate": 9.970185063873012e-06,
"loss": 0.6312,
"step": 323
},
{
"epoch": 0.39480097481722176,
"grad_norm": 0.6237167355625934,
"learning_rate": 9.969406417112489e-06,
"loss": 0.6492,
"step": 324
},
{
"epoch": 0.3960194963444354,
"grad_norm": 0.6083530680570769,
"learning_rate": 9.9686177646179e-06,
"loss": 0.6404,
"step": 325
},
{
"epoch": 0.3972380178716491,
"grad_norm": 0.6156210783234582,
"learning_rate": 9.967819107977175e-06,
"loss": 0.626,
"step": 326
},
{
"epoch": 0.3984565393988627,
"grad_norm": 0.6913246389420981,
"learning_rate": 9.967010448798376e-06,
"loss": 0.6464,
"step": 327
},
{
"epoch": 0.39967506092607635,
"grad_norm": 0.6430895031047548,
"learning_rate": 9.966191788709716e-06,
"loss": 0.6482,
"step": 328
},
{
"epoch": 0.40089358245329,
"grad_norm": 0.670581453307023,
"learning_rate": 9.965363129359537e-06,
"loss": 0.649,
"step": 329
},
{
"epoch": 0.4021121039805037,
"grad_norm": 0.6373745499675882,
"learning_rate": 9.964524472416319e-06,
"loss": 0.6231,
"step": 330
},
{
"epoch": 0.4033306255077173,
"grad_norm": 0.5729524017518108,
"learning_rate": 9.96367581956867e-06,
"loss": 0.639,
"step": 331
},
{
"epoch": 0.40454914703493094,
"grad_norm": 0.60528048612915,
"learning_rate": 9.962817172525323e-06,
"loss": 0.6412,
"step": 332
},
{
"epoch": 0.4057676685621446,
"grad_norm": 0.5439146819119978,
"learning_rate": 9.961948533015135e-06,
"loss": 0.6463,
"step": 333
},
{
"epoch": 0.40698619008935827,
"grad_norm": 0.6696342043794363,
"learning_rate": 9.961069902787082e-06,
"loss": 0.6559,
"step": 334
},
{
"epoch": 0.4082047116165719,
"grad_norm": 0.6137113821251218,
"learning_rate": 9.96018128361026e-06,
"loss": 0.6186,
"step": 335
},
{
"epoch": 0.40942323314378554,
"grad_norm": 0.7521896228588043,
"learning_rate": 9.959282677273869e-06,
"loss": 0.6585,
"step": 336
},
{
"epoch": 0.41064175467099917,
"grad_norm": 0.6161644621872354,
"learning_rate": 9.958374085587228e-06,
"loss": 0.6511,
"step": 337
},
{
"epoch": 0.41186027619821286,
"grad_norm": 0.6232166791838529,
"learning_rate": 9.957455510379753e-06,
"loss": 0.6421,
"step": 338
},
{
"epoch": 0.4130787977254265,
"grad_norm": 0.6575837363786434,
"learning_rate": 9.956526953500965e-06,
"loss": 0.6288,
"step": 339
},
{
"epoch": 0.41429731925264013,
"grad_norm": 0.624761687952515,
"learning_rate": 9.955588416820482e-06,
"loss": 0.6397,
"step": 340
},
{
"epoch": 0.41551584077985376,
"grad_norm": 0.6332930756907055,
"learning_rate": 9.954639902228018e-06,
"loss": 0.6444,
"step": 341
},
{
"epoch": 0.41673436230706745,
"grad_norm": 0.5746664206825376,
"learning_rate": 9.953681411633376e-06,
"loss": 0.6414,
"step": 342
},
{
"epoch": 0.4179528838342811,
"grad_norm": 0.6762777021979247,
"learning_rate": 9.952712946966441e-06,
"loss": 0.6306,
"step": 343
},
{
"epoch": 0.4191714053614947,
"grad_norm": 0.6244129529931802,
"learning_rate": 9.951734510177187e-06,
"loss": 0.6366,
"step": 344
},
{
"epoch": 0.42038992688870835,
"grad_norm": 0.6226787509569254,
"learning_rate": 9.950746103235663e-06,
"loss": 0.6302,
"step": 345
},
{
"epoch": 0.421608448415922,
"grad_norm": 0.6520199261370837,
"learning_rate": 9.949747728131994e-06,
"loss": 0.6816,
"step": 346
},
{
"epoch": 0.4228269699431357,
"grad_norm": 0.6026134976644628,
"learning_rate": 9.948739386876376e-06,
"loss": 0.6385,
"step": 347
},
{
"epoch": 0.4240454914703493,
"grad_norm": 0.6012466224483265,
"learning_rate": 9.947721081499068e-06,
"loss": 0.6458,
"step": 348
},
{
"epoch": 0.42526401299756295,
"grad_norm": 0.5524226925373649,
"learning_rate": 9.946692814050396e-06,
"loss": 0.6281,
"step": 349
},
{
"epoch": 0.4264825345247766,
"grad_norm": 0.6055953304742949,
"learning_rate": 9.945654586600741e-06,
"loss": 0.6467,
"step": 350
},
{
"epoch": 0.42770105605199027,
"grad_norm": 0.586137745210729,
"learning_rate": 9.944606401240538e-06,
"loss": 0.6379,
"step": 351
},
{
"epoch": 0.4289195775792039,
"grad_norm": 0.5125599093697626,
"learning_rate": 9.943548260080277e-06,
"loss": 0.6523,
"step": 352
},
{
"epoch": 0.43013809910641754,
"grad_norm": 0.6305973658118967,
"learning_rate": 9.942480165250487e-06,
"loss": 0.6389,
"step": 353
},
{
"epoch": 0.43135662063363117,
"grad_norm": 0.5220411272087411,
"learning_rate": 9.941402118901743e-06,
"loss": 0.6425,
"step": 354
},
{
"epoch": 0.43257514216084486,
"grad_norm": 0.5753441957701829,
"learning_rate": 9.940314123204656e-06,
"loss": 0.6441,
"step": 355
},
{
"epoch": 0.4337936636880585,
"grad_norm": 0.584328279121849,
"learning_rate": 9.939216180349864e-06,
"loss": 0.6359,
"step": 356
},
{
"epoch": 0.43501218521527213,
"grad_norm": 0.6135441335146246,
"learning_rate": 9.938108292548044e-06,
"loss": 0.6267,
"step": 357
},
{
"epoch": 0.43623070674248576,
"grad_norm": 0.5429972724232678,
"learning_rate": 9.93699046202989e-06,
"loss": 0.611,
"step": 358
},
{
"epoch": 0.43744922826969945,
"grad_norm": 0.6487815842031103,
"learning_rate": 9.935862691046114e-06,
"loss": 0.6395,
"step": 359
},
{
"epoch": 0.4386677497969131,
"grad_norm": 0.5638558609882317,
"learning_rate": 9.934724981867447e-06,
"loss": 0.6398,
"step": 360
},
{
"epoch": 0.4398862713241267,
"grad_norm": 0.7915256825394801,
"learning_rate": 9.93357733678463e-06,
"loss": 0.6275,
"step": 361
},
{
"epoch": 0.44110479285134035,
"grad_norm": 0.6072564790199728,
"learning_rate": 9.932419758108403e-06,
"loss": 0.6313,
"step": 362
},
{
"epoch": 0.44232331437855404,
"grad_norm": 0.7829204972438968,
"learning_rate": 9.931252248169518e-06,
"loss": 0.6334,
"step": 363
},
{
"epoch": 0.4435418359057677,
"grad_norm": 0.6029448727505217,
"learning_rate": 9.930074809318714e-06,
"loss": 0.6469,
"step": 364
},
{
"epoch": 0.4447603574329813,
"grad_norm": 0.6793840267075067,
"learning_rate": 9.928887443926725e-06,
"loss": 0.6334,
"step": 365
},
{
"epoch": 0.44597887896019495,
"grad_norm": 0.5488302948299049,
"learning_rate": 9.927690154384273e-06,
"loss": 0.6213,
"step": 366
},
{
"epoch": 0.44719740048740864,
"grad_norm": 0.7346734434148855,
"learning_rate": 9.92648294310206e-06,
"loss": 0.6295,
"step": 367
},
{
"epoch": 0.44841592201462227,
"grad_norm": 0.7457059967309784,
"learning_rate": 9.925265812510767e-06,
"loss": 0.6379,
"step": 368
},
{
"epoch": 0.4496344435418359,
"grad_norm": 0.621543177481449,
"learning_rate": 9.924038765061042e-06,
"loss": 0.641,
"step": 369
},
{
"epoch": 0.45085296506904954,
"grad_norm": 0.8188643504363363,
"learning_rate": 9.922801803223506e-06,
"loss": 0.6481,
"step": 370
},
{
"epoch": 0.45207148659626323,
"grad_norm": 0.6040894853255576,
"learning_rate": 9.921554929488741e-06,
"loss": 0.6493,
"step": 371
},
{
"epoch": 0.45329000812347686,
"grad_norm": 0.8455545003582287,
"learning_rate": 9.920298146367287e-06,
"loss": 0.6436,
"step": 372
},
{
"epoch": 0.4545085296506905,
"grad_norm": 0.626392939964308,
"learning_rate": 9.919031456389632e-06,
"loss": 0.6303,
"step": 373
},
{
"epoch": 0.45572705117790413,
"grad_norm": 0.7483260656404666,
"learning_rate": 9.917754862106216e-06,
"loss": 0.6306,
"step": 374
},
{
"epoch": 0.45694557270511776,
"grad_norm": 0.6122181327172058,
"learning_rate": 9.916468366087418e-06,
"loss": 0.6409,
"step": 375
},
{
"epoch": 0.45816409423233145,
"grad_norm": 0.5593648989087618,
"learning_rate": 9.915171970923556e-06,
"loss": 0.6583,
"step": 376
},
{
"epoch": 0.4593826157595451,
"grad_norm": 0.7626157086282944,
"learning_rate": 9.913865679224876e-06,
"loss": 0.648,
"step": 377
},
{
"epoch": 0.4606011372867587,
"grad_norm": 0.5027545868887003,
"learning_rate": 9.912549493621555e-06,
"loss": 0.6378,
"step": 378
},
{
"epoch": 0.46181965881397236,
"grad_norm": 0.6593540069533284,
"learning_rate": 9.911223416763689e-06,
"loss": 0.6487,
"step": 379
},
{
"epoch": 0.46303818034118605,
"grad_norm": 0.7507657782021496,
"learning_rate": 9.909887451321288e-06,
"loss": 0.6628,
"step": 380
},
{
"epoch": 0.4642567018683997,
"grad_norm": 0.5963371403892291,
"learning_rate": 9.908541599984276e-06,
"loss": 0.6304,
"step": 381
},
{
"epoch": 0.4654752233956133,
"grad_norm": 0.7456866534587581,
"learning_rate": 9.907185865462476e-06,
"loss": 0.6362,
"step": 382
},
{
"epoch": 0.46669374492282695,
"grad_norm": 0.5547991254906135,
"learning_rate": 9.905820250485619e-06,
"loss": 0.631,
"step": 383
},
{
"epoch": 0.46791226645004064,
"grad_norm": 0.7089080919365149,
"learning_rate": 9.904444757803322e-06,
"loss": 0.6281,
"step": 384
},
{
"epoch": 0.46913078797725427,
"grad_norm": 0.5003916403857714,
"learning_rate": 9.903059390185093e-06,
"loss": 0.6412,
"step": 385
},
{
"epoch": 0.4703493095044679,
"grad_norm": 0.6729850093918749,
"learning_rate": 9.901664150420328e-06,
"loss": 0.6329,
"step": 386
},
{
"epoch": 0.47156783103168154,
"grad_norm": 0.5557718878026181,
"learning_rate": 9.90025904131829e-06,
"loss": 0.6226,
"step": 387
},
{
"epoch": 0.47278635255889523,
"grad_norm": 0.6260971706778755,
"learning_rate": 9.898844065708121e-06,
"loss": 0.6257,
"step": 388
},
{
"epoch": 0.47400487408610886,
"grad_norm": 0.5411961675821981,
"learning_rate": 9.89741922643883e-06,
"loss": 0.6517,
"step": 389
},
{
"epoch": 0.4752233956133225,
"grad_norm": 0.5597130938499267,
"learning_rate": 9.895984526379282e-06,
"loss": 0.6157,
"step": 390
},
{
"epoch": 0.47644191714053613,
"grad_norm": 0.58052501455543,
"learning_rate": 9.894539968418195e-06,
"loss": 0.6322,
"step": 391
},
{
"epoch": 0.4776604386677498,
"grad_norm": 0.5211161945377233,
"learning_rate": 9.893085555464143e-06,
"loss": 0.6089,
"step": 392
},
{
"epoch": 0.47887896019496345,
"grad_norm": 0.6838111314182518,
"learning_rate": 9.891621290445534e-06,
"loss": 0.632,
"step": 393
},
{
"epoch": 0.4800974817221771,
"grad_norm": 0.5785699696283433,
"learning_rate": 9.890147176310618e-06,
"loss": 0.623,
"step": 394
},
{
"epoch": 0.4813160032493907,
"grad_norm": 0.6260781225868985,
"learning_rate": 9.888663216027477e-06,
"loss": 0.6433,
"step": 395
},
{
"epoch": 0.4825345247766044,
"grad_norm": 0.5634389513735794,
"learning_rate": 9.887169412584012e-06,
"loss": 0.6359,
"step": 396
},
{
"epoch": 0.48375304630381805,
"grad_norm": 0.576861556797157,
"learning_rate": 9.885665768987947e-06,
"loss": 0.6289,
"step": 397
},
{
"epoch": 0.4849715678310317,
"grad_norm": 0.5991685983326442,
"learning_rate": 9.88415228826682e-06,
"loss": 0.6345,
"step": 398
},
{
"epoch": 0.4861900893582453,
"grad_norm": 0.5331826337156919,
"learning_rate": 9.882628973467972e-06,
"loss": 0.6282,
"step": 399
},
{
"epoch": 0.487408610885459,
"grad_norm": 0.5052439699487477,
"learning_rate": 9.881095827658548e-06,
"loss": 0.629,
"step": 400
},
{
"epoch": 0.48862713241267264,
"grad_norm": 0.5842564825983466,
"learning_rate": 9.879552853925486e-06,
"loss": 0.6518,
"step": 401
},
{
"epoch": 0.48984565393988627,
"grad_norm": 0.5538659465643975,
"learning_rate": 9.878000055375512e-06,
"loss": 0.6333,
"step": 402
},
{
"epoch": 0.4910641754670999,
"grad_norm": 0.5200827864775698,
"learning_rate": 9.876437435135133e-06,
"loss": 0.6348,
"step": 403
},
{
"epoch": 0.49228269699431354,
"grad_norm": 0.6043127912027646,
"learning_rate": 9.874864996350633e-06,
"loss": 0.6136,
"step": 404
},
{
"epoch": 0.49350121852152723,
"grad_norm": 0.4948272003142496,
"learning_rate": 9.873282742188066e-06,
"loss": 0.6301,
"step": 405
},
{
"epoch": 0.49471974004874086,
"grad_norm": 0.5983030540970795,
"learning_rate": 9.871690675833248e-06,
"loss": 0.6354,
"step": 406
},
{
"epoch": 0.4959382615759545,
"grad_norm": 0.5309927588463559,
"learning_rate": 9.87008880049175e-06,
"loss": 0.6316,
"step": 407
},
{
"epoch": 0.49715678310316813,
"grad_norm": 0.46510544628039285,
"learning_rate": 9.868477119388897e-06,
"loss": 0.641,
"step": 408
},
{
"epoch": 0.4983753046303818,
"grad_norm": 0.4745237655389145,
"learning_rate": 9.866855635769753e-06,
"loss": 0.6484,
"step": 409
},
{
"epoch": 0.49959382615759546,
"grad_norm": 0.562173043770555,
"learning_rate": 9.86522435289912e-06,
"loss": 0.6263,
"step": 410
},
{
"epoch": 0.5008123476848091,
"grad_norm": 0.5419982591023096,
"learning_rate": 9.863583274061535e-06,
"loss": 0.6197,
"step": 411
},
{
"epoch": 0.5020308692120228,
"grad_norm": 0.5709095665576734,
"learning_rate": 9.861932402561253e-06,
"loss": 0.6253,
"step": 412
},
{
"epoch": 0.5032493907392364,
"grad_norm": 0.5575561882923015,
"learning_rate": 9.86027174172225e-06,
"loss": 0.6257,
"step": 413
},
{
"epoch": 0.50446791226645,
"grad_norm": 0.5818761313113621,
"learning_rate": 9.858601294888212e-06,
"loss": 0.6375,
"step": 414
},
{
"epoch": 0.5056864337936637,
"grad_norm": 0.55560278003152,
"learning_rate": 9.856921065422527e-06,
"loss": 0.6327,
"step": 415
},
{
"epoch": 0.5069049553208773,
"grad_norm": 0.5142680238787152,
"learning_rate": 9.855231056708281e-06,
"loss": 0.6347,
"step": 416
},
{
"epoch": 0.508123476848091,
"grad_norm": 0.5468260799033448,
"learning_rate": 9.853531272148248e-06,
"loss": 0.6165,
"step": 417
},
{
"epoch": 0.5093419983753046,
"grad_norm": 0.5366215405716666,
"learning_rate": 9.851821715164891e-06,
"loss": 0.6232,
"step": 418
},
{
"epoch": 0.5105605199025183,
"grad_norm": 0.6815769917483668,
"learning_rate": 9.850102389200346e-06,
"loss": 0.6375,
"step": 419
},
{
"epoch": 0.511779041429732,
"grad_norm": 0.5766636790628379,
"learning_rate": 9.848373297716414e-06,
"loss": 0.6411,
"step": 420
},
{
"epoch": 0.5129975629569455,
"grad_norm": 0.6508434213004275,
"learning_rate": 9.846634444194568e-06,
"loss": 0.6277,
"step": 421
},
{
"epoch": 0.5142160844841592,
"grad_norm": 0.5654811023161467,
"learning_rate": 9.844885832135928e-06,
"loss": 0.6192,
"step": 422
},
{
"epoch": 0.5154346060113729,
"grad_norm": 0.6220408843438429,
"learning_rate": 9.84312746506127e-06,
"loss": 0.6254,
"step": 423
},
{
"epoch": 0.5166531275385865,
"grad_norm": 0.5550144456923615,
"learning_rate": 9.841359346511004e-06,
"loss": 0.6288,
"step": 424
},
{
"epoch": 0.5178716490658002,
"grad_norm": 0.5804117244385404,
"learning_rate": 9.83958148004518e-06,
"loss": 0.6244,
"step": 425
},
{
"epoch": 0.5190901705930138,
"grad_norm": 0.6245742605810847,
"learning_rate": 9.837793869243468e-06,
"loss": 0.6209,
"step": 426
},
{
"epoch": 0.5203086921202275,
"grad_norm": 0.5661037548895256,
"learning_rate": 9.83599651770517e-06,
"loss": 0.6279,
"step": 427
},
{
"epoch": 0.5215272136474411,
"grad_norm": 0.5358603369119569,
"learning_rate": 9.834189429049188e-06,
"loss": 0.6307,
"step": 428
},
{
"epoch": 0.5227457351746547,
"grad_norm": 0.6122007731857034,
"learning_rate": 9.832372606914038e-06,
"loss": 0.6158,
"step": 429
},
{
"epoch": 0.5239642567018684,
"grad_norm": 0.5972271574369769,
"learning_rate": 9.830546054957828e-06,
"loss": 0.6204,
"step": 430
},
{
"epoch": 0.525182778229082,
"grad_norm": 0.5443858161988891,
"learning_rate": 9.82870977685826e-06,
"loss": 0.621,
"step": 431
},
{
"epoch": 0.5264012997562957,
"grad_norm": 0.6250123596443754,
"learning_rate": 9.826863776312621e-06,
"loss": 0.6408,
"step": 432
},
{
"epoch": 0.5276198212835094,
"grad_norm": 0.5933038352389216,
"learning_rate": 9.825008057037769e-06,
"loss": 0.6588,
"step": 433
},
{
"epoch": 0.528838342810723,
"grad_norm": 0.6567920347058966,
"learning_rate": 9.823142622770135e-06,
"loss": 0.625,
"step": 434
},
{
"epoch": 0.5300568643379366,
"grad_norm": 0.5779776066299945,
"learning_rate": 9.821267477265705e-06,
"loss": 0.6387,
"step": 435
},
{
"epoch": 0.5312753858651503,
"grad_norm": 0.570082080677981,
"learning_rate": 9.819382624300027e-06,
"loss": 0.6324,
"step": 436
},
{
"epoch": 0.5324939073923639,
"grad_norm": 0.5818606827175574,
"learning_rate": 9.817488067668186e-06,
"loss": 0.644,
"step": 437
},
{
"epoch": 0.5337124289195776,
"grad_norm": 0.5476824827124901,
"learning_rate": 9.815583811184809e-06,
"loss": 0.6189,
"step": 438
},
{
"epoch": 0.5349309504467912,
"grad_norm": 0.5768267508522074,
"learning_rate": 9.813669858684054e-06,
"loss": 0.6222,
"step": 439
},
{
"epoch": 0.5361494719740049,
"grad_norm": 0.5120867453918215,
"learning_rate": 9.8117462140196e-06,
"loss": 0.6204,
"step": 440
},
{
"epoch": 0.5373679935012186,
"grad_norm": 0.5186146607717382,
"learning_rate": 9.80981288106464e-06,
"loss": 0.6195,
"step": 441
},
{
"epoch": 0.5385865150284321,
"grad_norm": 0.5895698622661449,
"learning_rate": 9.807869863711878e-06,
"loss": 0.6205,
"step": 442
},
{
"epoch": 0.5398050365556458,
"grad_norm": 0.5421346973971489,
"learning_rate": 9.805917165873515e-06,
"loss": 0.6303,
"step": 443
},
{
"epoch": 0.5410235580828595,
"grad_norm": 0.5227058266380313,
"learning_rate": 9.803954791481239e-06,
"loss": 0.6196,
"step": 444
},
{
"epoch": 0.5422420796100731,
"grad_norm": 0.4665750459631165,
"learning_rate": 9.801982744486229e-06,
"loss": 0.628,
"step": 445
},
{
"epoch": 0.5434606011372868,
"grad_norm": 0.5340051839015313,
"learning_rate": 9.800001028859135e-06,
"loss": 0.6321,
"step": 446
},
{
"epoch": 0.5446791226645004,
"grad_norm": 0.49569443009344233,
"learning_rate": 9.798009648590073e-06,
"loss": 0.6295,
"step": 447
},
{
"epoch": 0.545897644191714,
"grad_norm": 0.5589394978947685,
"learning_rate": 9.796008607688624e-06,
"loss": 0.6458,
"step": 448
},
{
"epoch": 0.5471161657189277,
"grad_norm": 0.5349825334411198,
"learning_rate": 9.793997910183815e-06,
"loss": 0.6348,
"step": 449
},
{
"epoch": 0.5483346872461413,
"grad_norm": 0.5406756193824626,
"learning_rate": 9.79197756012412e-06,
"loss": 0.6352,
"step": 450
},
{
"epoch": 0.549553208773355,
"grad_norm": 0.5590939249326192,
"learning_rate": 9.789947561577445e-06,
"loss": 0.6345,
"step": 451
},
{
"epoch": 0.5507717303005687,
"grad_norm": 0.5138272981689205,
"learning_rate": 9.787907918631125e-06,
"loss": 0.6457,
"step": 452
},
{
"epoch": 0.5519902518277823,
"grad_norm": 0.5967975071520696,
"learning_rate": 9.785858635391913e-06,
"loss": 0.6059,
"step": 453
},
{
"epoch": 0.553208773354996,
"grad_norm": 0.4912288949887055,
"learning_rate": 9.783799715985973e-06,
"loss": 0.6254,
"step": 454
},
{
"epoch": 0.5544272948822095,
"grad_norm": 0.5903941074513651,
"learning_rate": 9.78173116455887e-06,
"loss": 0.6108,
"step": 455
},
{
"epoch": 0.5556458164094232,
"grad_norm": 0.5632794329839387,
"learning_rate": 9.779652985275562e-06,
"loss": 0.6187,
"step": 456
},
{
"epoch": 0.5568643379366369,
"grad_norm": 0.5941486268629673,
"learning_rate": 9.777565182320396e-06,
"loss": 0.6184,
"step": 457
},
{
"epoch": 0.5580828594638505,
"grad_norm": 0.6416650599158464,
"learning_rate": 9.775467759897092e-06,
"loss": 0.6331,
"step": 458
},
{
"epoch": 0.5593013809910642,
"grad_norm": 0.5651281069823211,
"learning_rate": 9.773360722228742e-06,
"loss": 0.6307,
"step": 459
},
{
"epoch": 0.5605199025182778,
"grad_norm": 0.6620891236551917,
"learning_rate": 9.771244073557792e-06,
"loss": 0.6078,
"step": 460
},
{
"epoch": 0.5617384240454915,
"grad_norm": 0.6015785675867341,
"learning_rate": 9.769117818146048e-06,
"loss": 0.6237,
"step": 461
},
{
"epoch": 0.5629569455727051,
"grad_norm": 0.8038047522794796,
"learning_rate": 9.766981960274653e-06,
"loss": 0.6173,
"step": 462
},
{
"epoch": 0.5641754670999187,
"grad_norm": 0.6163269598618792,
"learning_rate": 9.764836504244086e-06,
"loss": 0.6264,
"step": 463
},
{
"epoch": 0.5653939886271324,
"grad_norm": 0.6244153487192251,
"learning_rate": 9.762681454374148e-06,
"loss": 0.6112,
"step": 464
},
{
"epoch": 0.5666125101543461,
"grad_norm": 0.724456218504814,
"learning_rate": 9.760516815003965e-06,
"loss": 0.6255,
"step": 465
},
{
"epoch": 0.5678310316815597,
"grad_norm": 0.580652096091434,
"learning_rate": 9.758342590491961e-06,
"loss": 0.6342,
"step": 466
},
{
"epoch": 0.5690495532087734,
"grad_norm": 0.6644456071205537,
"learning_rate": 9.756158785215866e-06,
"loss": 0.6127,
"step": 467
},
{
"epoch": 0.570268074735987,
"grad_norm": 0.5736293156748269,
"learning_rate": 9.753965403572703e-06,
"loss": 0.6313,
"step": 468
},
{
"epoch": 0.5714865962632006,
"grad_norm": 0.6178186373387958,
"learning_rate": 9.751762449978767e-06,
"loss": 0.643,
"step": 469
},
{
"epoch": 0.5727051177904143,
"grad_norm": 0.584712916385393,
"learning_rate": 9.749549928869636e-06,
"loss": 0.5948,
"step": 470
},
{
"epoch": 0.5739236393176279,
"grad_norm": 0.6116917271773714,
"learning_rate": 9.747327844700147e-06,
"loss": 0.6297,
"step": 471
},
{
"epoch": 0.5751421608448416,
"grad_norm": 0.4903955649085751,
"learning_rate": 9.745096201944391e-06,
"loss": 0.6251,
"step": 472
},
{
"epoch": 0.5763606823720553,
"grad_norm": 0.6968313476556924,
"learning_rate": 9.742855005095706e-06,
"loss": 0.6117,
"step": 473
},
{
"epoch": 0.5775792038992689,
"grad_norm": 0.48897486873959584,
"learning_rate": 9.740604258666668e-06,
"loss": 0.6058,
"step": 474
},
{
"epoch": 0.5787977254264826,
"grad_norm": 0.7217629239411762,
"learning_rate": 9.73834396718908e-06,
"loss": 0.6265,
"step": 475
},
{
"epoch": 0.5800162469536961,
"grad_norm": 0.613354162646377,
"learning_rate": 9.736074135213962e-06,
"loss": 0.6399,
"step": 476
},
{
"epoch": 0.5812347684809098,
"grad_norm": 0.6447055703309105,
"learning_rate": 9.733794767311545e-06,
"loss": 0.6335,
"step": 477
},
{
"epoch": 0.5824532900081235,
"grad_norm": 0.5823448015058018,
"learning_rate": 9.731505868071262e-06,
"loss": 0.6262,
"step": 478
},
{
"epoch": 0.5836718115353371,
"grad_norm": 0.5125864553684497,
"learning_rate": 9.729207442101736e-06,
"loss": 0.6101,
"step": 479
},
{
"epoch": 0.5848903330625508,
"grad_norm": 0.6147081791430226,
"learning_rate": 9.726899494030768e-06,
"loss": 0.6411,
"step": 480
},
{
"epoch": 0.5861088545897645,
"grad_norm": 0.5467046907537908,
"learning_rate": 9.724582028505336e-06,
"loss": 0.6203,
"step": 481
},
{
"epoch": 0.587327376116978,
"grad_norm": 0.5741960101018327,
"learning_rate": 9.72225505019158e-06,
"loss": 0.624,
"step": 482
},
{
"epoch": 0.5885458976441917,
"grad_norm": 0.6709034274446143,
"learning_rate": 9.719918563774793e-06,
"loss": 0.6316,
"step": 483
},
{
"epoch": 0.5897644191714053,
"grad_norm": 0.5633926121392079,
"learning_rate": 9.71757257395941e-06,
"loss": 0.6205,
"step": 484
},
{
"epoch": 0.590982940698619,
"grad_norm": 0.5752003286544818,
"learning_rate": 9.715217085469009e-06,
"loss": 0.601,
"step": 485
},
{
"epoch": 0.5922014622258327,
"grad_norm": 0.6676085473844594,
"learning_rate": 9.712852103046281e-06,
"loss": 0.6425,
"step": 486
},
{
"epoch": 0.5934199837530463,
"grad_norm": 0.43714860457984767,
"learning_rate": 9.710477631453044e-06,
"loss": 0.6264,
"step": 487
},
{
"epoch": 0.59463850528026,
"grad_norm": 0.7834186015627101,
"learning_rate": 9.708093675470214e-06,
"loss": 0.6294,
"step": 488
},
{
"epoch": 0.5958570268074735,
"grad_norm": 0.5229823852593044,
"learning_rate": 9.705700239897809e-06,
"loss": 0.6253,
"step": 489
},
{
"epoch": 0.5970755483346872,
"grad_norm": 0.6641427142623177,
"learning_rate": 9.70329732955493e-06,
"loss": 0.6208,
"step": 490
},
{
"epoch": 0.5982940698619009,
"grad_norm": 0.5777300627058165,
"learning_rate": 9.70088494927976e-06,
"loss": 0.62,
"step": 491
},
{
"epoch": 0.5995125913891145,
"grad_norm": 0.47427848956457735,
"learning_rate": 9.698463103929542e-06,
"loss": 0.6168,
"step": 492
},
{
"epoch": 0.6007311129163282,
"grad_norm": 0.6176694192284208,
"learning_rate": 9.696031798380586e-06,
"loss": 0.6192,
"step": 493
},
{
"epoch": 0.6019496344435419,
"grad_norm": 0.5380294280704867,
"learning_rate": 9.693591037528239e-06,
"loss": 0.6324,
"step": 494
},
{
"epoch": 0.6031681559707555,
"grad_norm": 0.5270092433580651,
"learning_rate": 9.691140826286893e-06,
"loss": 0.6275,
"step": 495
},
{
"epoch": 0.6043866774979691,
"grad_norm": 0.5928211370503502,
"learning_rate": 9.688681169589971e-06,
"loss": 0.6295,
"step": 496
},
{
"epoch": 0.6056051990251827,
"grad_norm": 0.487281690093329,
"learning_rate": 9.686212072389904e-06,
"loss": 0.6157,
"step": 497
},
{
"epoch": 0.6068237205523964,
"grad_norm": 0.5179266059337351,
"learning_rate": 9.68373353965814e-06,
"loss": 0.6098,
"step": 498
},
{
"epoch": 0.6080422420796101,
"grad_norm": 0.5314913870970437,
"learning_rate": 9.68124557638512e-06,
"loss": 0.6173,
"step": 499
},
{
"epoch": 0.6092607636068237,
"grad_norm": 0.4844744555610714,
"learning_rate": 9.678748187580278e-06,
"loss": 0.6186,
"step": 500
},
{
"epoch": 0.6104792851340374,
"grad_norm": 0.5188776477142794,
"learning_rate": 9.676241378272022e-06,
"loss": 0.6168,
"step": 501
},
{
"epoch": 0.6116978066612511,
"grad_norm": 0.49668970689497427,
"learning_rate": 9.673725153507727e-06,
"loss": 0.6128,
"step": 502
},
{
"epoch": 0.6129163281884646,
"grad_norm": 0.5049088012633238,
"learning_rate": 9.67119951835373e-06,
"loss": 0.6204,
"step": 503
},
{
"epoch": 0.6141348497156783,
"grad_norm": 0.5286755135827618,
"learning_rate": 9.66866447789531e-06,
"loss": 0.6321,
"step": 504
},
{
"epoch": 0.6153533712428919,
"grad_norm": 0.5414829955250333,
"learning_rate": 9.666120037236692e-06,
"loss": 0.6073,
"step": 505
},
{
"epoch": 0.6165718927701056,
"grad_norm": 0.5929807296645003,
"learning_rate": 9.663566201501017e-06,
"loss": 0.6219,
"step": 506
},
{
"epoch": 0.6177904142973193,
"grad_norm": 0.565513002212362,
"learning_rate": 9.66100297583035e-06,
"loss": 0.6218,
"step": 507
},
{
"epoch": 0.6190089358245329,
"grad_norm": 0.48043459347807704,
"learning_rate": 9.65843036538566e-06,
"loss": 0.607,
"step": 508
},
{
"epoch": 0.6202274573517466,
"grad_norm": 0.6289509926942585,
"learning_rate": 9.655848375346812e-06,
"loss": 0.6396,
"step": 509
},
{
"epoch": 0.6214459788789602,
"grad_norm": 0.5609440147588081,
"learning_rate": 9.65325701091256e-06,
"loss": 0.6303,
"step": 510
},
{
"epoch": 0.6226645004061738,
"grad_norm": 0.5893573188478602,
"learning_rate": 9.650656277300525e-06,
"loss": 0.6166,
"step": 511
},
{
"epoch": 0.6238830219333875,
"grad_norm": 0.5628137809478111,
"learning_rate": 9.6480461797472e-06,
"loss": 0.6291,
"step": 512
},
{
"epoch": 0.6251015434606011,
"grad_norm": 0.5493464215154626,
"learning_rate": 9.645426723507929e-06,
"loss": 0.6222,
"step": 513
},
{
"epoch": 0.6263200649878148,
"grad_norm": 0.5629698357909129,
"learning_rate": 9.6427979138569e-06,
"loss": 0.6317,
"step": 514
},
{
"epoch": 0.6275385865150285,
"grad_norm": 0.6664927672498832,
"learning_rate": 9.640159756087136e-06,
"loss": 0.6382,
"step": 515
},
{
"epoch": 0.628757108042242,
"grad_norm": 0.5522749634660304,
"learning_rate": 9.637512255510475e-06,
"loss": 0.6143,
"step": 516
},
{
"epoch": 0.6299756295694557,
"grad_norm": 0.5532267628661862,
"learning_rate": 9.63485541745757e-06,
"loss": 0.6374,
"step": 517
},
{
"epoch": 0.6311941510966693,
"grad_norm": 0.6876124654936631,
"learning_rate": 9.632189247277885e-06,
"loss": 0.6392,
"step": 518
},
{
"epoch": 0.632412672623883,
"grad_norm": 0.653192030137328,
"learning_rate": 9.629513750339656e-06,
"loss": 0.6146,
"step": 519
},
{
"epoch": 0.6336311941510967,
"grad_norm": 0.5264590327684809,
"learning_rate": 9.626828932029907e-06,
"loss": 0.6187,
"step": 520
},
{
"epoch": 0.6348497156783103,
"grad_norm": 0.6140627235902801,
"learning_rate": 9.624134797754437e-06,
"loss": 0.5948,
"step": 521
},
{
"epoch": 0.636068237205524,
"grad_norm": 0.715948251788629,
"learning_rate": 9.62143135293779e-06,
"loss": 0.6221,
"step": 522
},
{
"epoch": 0.6372867587327377,
"grad_norm": 0.6814424426040064,
"learning_rate": 9.618718603023261e-06,
"loss": 0.6279,
"step": 523
},
{
"epoch": 0.6385052802599512,
"grad_norm": 0.600168318088034,
"learning_rate": 9.615996553472885e-06,
"loss": 0.6267,
"step": 524
},
{
"epoch": 0.6397238017871649,
"grad_norm": 0.5619413500131725,
"learning_rate": 9.613265209767417e-06,
"loss": 0.6288,
"step": 525
},
{
"epoch": 0.6409423233143785,
"grad_norm": 0.5903652755615201,
"learning_rate": 9.610524577406325e-06,
"loss": 0.6305,
"step": 526
},
{
"epoch": 0.6421608448415922,
"grad_norm": 0.5087861988940737,
"learning_rate": 9.607774661907783e-06,
"loss": 0.6192,
"step": 527
},
{
"epoch": 0.6433793663688059,
"grad_norm": 0.6555944853088764,
"learning_rate": 9.605015468808651e-06,
"loss": 0.6255,
"step": 528
},
{
"epoch": 0.6445978878960195,
"grad_norm": 0.6123139168204214,
"learning_rate": 9.602247003664476e-06,
"loss": 0.6185,
"step": 529
},
{
"epoch": 0.6458164094232332,
"grad_norm": 0.5503960050113602,
"learning_rate": 9.599469272049468e-06,
"loss": 0.6385,
"step": 530
},
{
"epoch": 0.6470349309504468,
"grad_norm": 0.5823472571150912,
"learning_rate": 9.596682279556499e-06,
"loss": 0.6241,
"step": 531
},
{
"epoch": 0.6482534524776604,
"grad_norm": 0.5840631388468679,
"learning_rate": 9.593886031797081e-06,
"loss": 0.625,
"step": 532
},
{
"epoch": 0.6494719740048741,
"grad_norm": 0.5622117171111194,
"learning_rate": 9.591080534401371e-06,
"loss": 0.6192,
"step": 533
},
{
"epoch": 0.6506904955320877,
"grad_norm": 0.5707745901206253,
"learning_rate": 9.588265793018141e-06,
"loss": 0.6391,
"step": 534
},
{
"epoch": 0.6519090170593014,
"grad_norm": 0.5896800585312665,
"learning_rate": 9.58544181331478e-06,
"loss": 0.6339,
"step": 535
},
{
"epoch": 0.6531275385865151,
"grad_norm": 0.5209906229065117,
"learning_rate": 9.582608600977276e-06,
"loss": 0.601,
"step": 536
},
{
"epoch": 0.6543460601137286,
"grad_norm": 0.5155011577582275,
"learning_rate": 9.579766161710209e-06,
"loss": 0.6015,
"step": 537
},
{
"epoch": 0.6555645816409423,
"grad_norm": 0.48807425767261786,
"learning_rate": 9.576914501236734e-06,
"loss": 0.6167,
"step": 538
},
{
"epoch": 0.656783103168156,
"grad_norm": 0.5579148908182612,
"learning_rate": 9.574053625298577e-06,
"loss": 0.6193,
"step": 539
},
{
"epoch": 0.6580016246953696,
"grad_norm": 0.5287053319535842,
"learning_rate": 9.571183539656011e-06,
"loss": 0.6291,
"step": 540
},
{
"epoch": 0.6592201462225833,
"grad_norm": 0.6191360016551267,
"learning_rate": 9.568304250087864e-06,
"loss": 0.6139,
"step": 541
},
{
"epoch": 0.6604386677497969,
"grad_norm": 0.5099069268786582,
"learning_rate": 9.565415762391485e-06,
"loss": 0.6013,
"step": 542
},
{
"epoch": 0.6616571892770106,
"grad_norm": 0.5421293076141,
"learning_rate": 9.562518082382751e-06,
"loss": 0.5907,
"step": 543
},
{
"epoch": 0.6628757108042242,
"grad_norm": 0.5498541039203616,
"learning_rate": 9.559611215896041e-06,
"loss": 0.627,
"step": 544
},
{
"epoch": 0.6640942323314378,
"grad_norm": 0.5680961983046815,
"learning_rate": 9.556695168784236e-06,
"loss": 0.5952,
"step": 545
},
{
"epoch": 0.6653127538586515,
"grad_norm": 0.5218060004228549,
"learning_rate": 9.553769946918698e-06,
"loss": 0.6228,
"step": 546
},
{
"epoch": 0.6665312753858651,
"grad_norm": 0.5543031912725007,
"learning_rate": 9.550835556189264e-06,
"loss": 0.6338,
"step": 547
},
{
"epoch": 0.6677497969130788,
"grad_norm": 0.5668524593324846,
"learning_rate": 9.547892002504233e-06,
"loss": 0.6219,
"step": 548
},
{
"epoch": 0.6689683184402925,
"grad_norm": 0.5873694380478705,
"learning_rate": 9.544939291790352e-06,
"loss": 0.624,
"step": 549
},
{
"epoch": 0.670186839967506,
"grad_norm": 0.5399986226537774,
"learning_rate": 9.541977429992803e-06,
"loss": 0.6385,
"step": 550
},
{
"epoch": 0.6714053614947197,
"grad_norm": 0.7171400926799747,
"learning_rate": 9.5390064230752e-06,
"loss": 0.621,
"step": 551
},
{
"epoch": 0.6726238830219334,
"grad_norm": 0.6092647452638789,
"learning_rate": 9.536026277019562e-06,
"loss": 0.6223,
"step": 552
},
{
"epoch": 0.673842404549147,
"grad_norm": 0.683988747327427,
"learning_rate": 9.533036997826315e-06,
"loss": 0.6199,
"step": 553
},
{
"epoch": 0.6750609260763607,
"grad_norm": 0.5791819914636441,
"learning_rate": 9.530038591514275e-06,
"loss": 0.6328,
"step": 554
},
{
"epoch": 0.6762794476035743,
"grad_norm": 0.6782628719672897,
"learning_rate": 9.527031064120632e-06,
"loss": 0.6127,
"step": 555
},
{
"epoch": 0.677497969130788,
"grad_norm": 0.6767775073979123,
"learning_rate": 9.524014421700942e-06,
"loss": 0.6186,
"step": 556
},
{
"epoch": 0.6787164906580017,
"grad_norm": 0.5114857558759379,
"learning_rate": 9.520988670329114e-06,
"loss": 0.63,
"step": 557
},
{
"epoch": 0.6799350121852152,
"grad_norm": 0.5501380880007342,
"learning_rate": 9.517953816097396e-06,
"loss": 0.5915,
"step": 558
},
{
"epoch": 0.6811535337124289,
"grad_norm": 0.6714746829201106,
"learning_rate": 9.514909865116368e-06,
"loss": 0.6067,
"step": 559
},
{
"epoch": 0.6823720552396426,
"grad_norm": 0.5375092336126965,
"learning_rate": 9.511856823514924e-06,
"loss": 0.596,
"step": 560
},
{
"epoch": 0.6835905767668562,
"grad_norm": 0.6176188040728243,
"learning_rate": 9.508794697440257e-06,
"loss": 0.6333,
"step": 561
},
{
"epoch": 0.6848090982940699,
"grad_norm": 0.6212303271054956,
"learning_rate": 9.505723493057862e-06,
"loss": 0.6178,
"step": 562
},
{
"epoch": 0.6860276198212835,
"grad_norm": 0.5377188134801542,
"learning_rate": 9.502643216551502e-06,
"loss": 0.6017,
"step": 563
},
{
"epoch": 0.6872461413484972,
"grad_norm": 0.6362000539969834,
"learning_rate": 9.499553874123213e-06,
"loss": 0.6392,
"step": 564
},
{
"epoch": 0.6884646628757108,
"grad_norm": 0.5480382319562058,
"learning_rate": 9.496455471993284e-06,
"loss": 0.6113,
"step": 565
},
{
"epoch": 0.6896831844029244,
"grad_norm": 0.6994517506614581,
"learning_rate": 9.49334801640024e-06,
"loss": 0.6327,
"step": 566
},
{
"epoch": 0.6909017059301381,
"grad_norm": 0.5335729160289857,
"learning_rate": 9.490231513600842e-06,
"loss": 0.6218,
"step": 567
},
{
"epoch": 0.6921202274573518,
"grad_norm": 0.6063268804347564,
"learning_rate": 9.487105969870068e-06,
"loss": 0.6174,
"step": 568
},
{
"epoch": 0.6933387489845654,
"grad_norm": 0.6267394635949436,
"learning_rate": 9.48397139150109e-06,
"loss": 0.605,
"step": 569
},
{
"epoch": 0.6945572705117791,
"grad_norm": 0.48229350211609867,
"learning_rate": 9.480827784805278e-06,
"loss": 0.6138,
"step": 570
},
{
"epoch": 0.6957757920389926,
"grad_norm": 0.6094361236823382,
"learning_rate": 9.477675156112183e-06,
"loss": 0.616,
"step": 571
},
{
"epoch": 0.6969943135662063,
"grad_norm": 0.5646668548267415,
"learning_rate": 9.474513511769513e-06,
"loss": 0.6257,
"step": 572
},
{
"epoch": 0.69821283509342,
"grad_norm": 0.5605266691062354,
"learning_rate": 9.47134285814314e-06,
"loss": 0.623,
"step": 573
},
{
"epoch": 0.6994313566206336,
"grad_norm": 0.5976205093855237,
"learning_rate": 9.468163201617063e-06,
"loss": 0.6182,
"step": 574
},
{
"epoch": 0.7006498781478473,
"grad_norm": 0.5736754942220608,
"learning_rate": 9.464974548593415e-06,
"loss": 0.5973,
"step": 575
},
{
"epoch": 0.7018683996750609,
"grad_norm": 0.5782971035374301,
"learning_rate": 9.461776905492446e-06,
"loss": 0.6021,
"step": 576
},
{
"epoch": 0.7030869212022746,
"grad_norm": 0.5094228164183464,
"learning_rate": 9.458570278752501e-06,
"loss": 0.6028,
"step": 577
},
{
"epoch": 0.7043054427294883,
"grad_norm": 0.5803305530484321,
"learning_rate": 9.455354674830016e-06,
"loss": 0.6224,
"step": 578
},
{
"epoch": 0.7055239642567018,
"grad_norm": 0.5229464149205902,
"learning_rate": 9.452130100199504e-06,
"loss": 0.6157,
"step": 579
},
{
"epoch": 0.7067424857839155,
"grad_norm": 0.5965075801420928,
"learning_rate": 9.448896561353536e-06,
"loss": 0.6062,
"step": 580
},
{
"epoch": 0.7079610073111292,
"grad_norm": 0.5275236801559984,
"learning_rate": 9.445654064802738e-06,
"loss": 0.611,
"step": 581
},
{
"epoch": 0.7091795288383428,
"grad_norm": 0.511555457965572,
"learning_rate": 9.442402617075765e-06,
"loss": 0.6263,
"step": 582
},
{
"epoch": 0.7103980503655565,
"grad_norm": 0.5490562182756723,
"learning_rate": 9.439142224719302e-06,
"loss": 0.6236,
"step": 583
},
{
"epoch": 0.7116165718927701,
"grad_norm": 0.5258200584782562,
"learning_rate": 9.435872894298037e-06,
"loss": 0.6106,
"step": 584
},
{
"epoch": 0.7128350934199837,
"grad_norm": 0.5189357566107585,
"learning_rate": 9.43259463239466e-06,
"loss": 0.636,
"step": 585
},
{
"epoch": 0.7140536149471974,
"grad_norm": 0.5097577073371684,
"learning_rate": 9.429307445609841e-06,
"loss": 0.6337,
"step": 586
},
{
"epoch": 0.715272136474411,
"grad_norm": 0.6069103268356187,
"learning_rate": 9.426011340562222e-06,
"loss": 0.6177,
"step": 587
},
{
"epoch": 0.7164906580016247,
"grad_norm": 0.48842546371203027,
"learning_rate": 9.422706323888398e-06,
"loss": 0.6011,
"step": 588
},
{
"epoch": 0.7177091795288384,
"grad_norm": 0.5365657101299985,
"learning_rate": 9.419392402242912e-06,
"loss": 0.6007,
"step": 589
},
{
"epoch": 0.718927701056052,
"grad_norm": 0.5101507591790149,
"learning_rate": 9.416069582298236e-06,
"loss": 0.6175,
"step": 590
},
{
"epoch": 0.7201462225832657,
"grad_norm": 0.4516555710559031,
"learning_rate": 9.412737870744752e-06,
"loss": 0.6107,
"step": 591
},
{
"epoch": 0.7213647441104792,
"grad_norm": 0.4881759934731241,
"learning_rate": 9.409397274290756e-06,
"loss": 0.6224,
"step": 592
},
{
"epoch": 0.7225832656376929,
"grad_norm": 0.45459978443672416,
"learning_rate": 9.406047799662426e-06,
"loss": 0.6089,
"step": 593
},
{
"epoch": 0.7238017871649066,
"grad_norm": 0.505751917086364,
"learning_rate": 9.402689453603815e-06,
"loss": 0.6244,
"step": 594
},
{
"epoch": 0.7250203086921202,
"grad_norm": 0.5110751597586063,
"learning_rate": 9.399322242876843e-06,
"loss": 0.601,
"step": 595
},
{
"epoch": 0.7262388302193339,
"grad_norm": 0.504579475445371,
"learning_rate": 9.395946174261274e-06,
"loss": 0.6216,
"step": 596
},
{
"epoch": 0.7274573517465476,
"grad_norm": 0.534595723022526,
"learning_rate": 9.392561254554712e-06,
"loss": 0.6067,
"step": 597
},
{
"epoch": 0.7286758732737612,
"grad_norm": 0.5583009202449097,
"learning_rate": 9.38916749057258e-06,
"loss": 0.6249,
"step": 598
},
{
"epoch": 0.7298943948009748,
"grad_norm": 0.5059716144312469,
"learning_rate": 9.385764889148107e-06,
"loss": 0.6115,
"step": 599
},
{
"epoch": 0.7311129163281884,
"grad_norm": 0.6121449401534393,
"learning_rate": 9.382353457132318e-06,
"loss": 0.6077,
"step": 600
},
{
"epoch": 0.7323314378554021,
"grad_norm": 0.4829522546788395,
"learning_rate": 9.378933201394019e-06,
"loss": 0.6216,
"step": 601
},
{
"epoch": 0.7335499593826158,
"grad_norm": 0.5436028145378481,
"learning_rate": 9.375504128819779e-06,
"loss": 0.6185,
"step": 602
},
{
"epoch": 0.7347684809098294,
"grad_norm": 0.5172970082009579,
"learning_rate": 9.372066246313922e-06,
"loss": 0.644,
"step": 603
},
{
"epoch": 0.7359870024370431,
"grad_norm": 0.4738987982796835,
"learning_rate": 9.368619560798511e-06,
"loss": 0.6246,
"step": 604
},
{
"epoch": 0.7372055239642566,
"grad_norm": 0.4525040495867516,
"learning_rate": 9.36516407921333e-06,
"loss": 0.6109,
"step": 605
},
{
"epoch": 0.7384240454914703,
"grad_norm": 0.5076237716007553,
"learning_rate": 9.361699808515877e-06,
"loss": 0.6151,
"step": 606
},
{
"epoch": 0.739642567018684,
"grad_norm": 0.5074655977130175,
"learning_rate": 9.358226755681342e-06,
"loss": 0.6082,
"step": 607
},
{
"epoch": 0.7408610885458976,
"grad_norm": 0.4840933107276308,
"learning_rate": 9.354744927702607e-06,
"loss": 0.615,
"step": 608
},
{
"epoch": 0.7420796100731113,
"grad_norm": 0.5219253787729252,
"learning_rate": 9.351254331590216e-06,
"loss": 0.5996,
"step": 609
},
{
"epoch": 0.743298131600325,
"grad_norm": 0.5601150249253273,
"learning_rate": 9.347754974372365e-06,
"loss": 0.6032,
"step": 610
},
{
"epoch": 0.7445166531275386,
"grad_norm": 0.4986838680038737,
"learning_rate": 9.344246863094893e-06,
"loss": 0.5976,
"step": 611
},
{
"epoch": 0.7457351746547523,
"grad_norm": 0.4948788586568317,
"learning_rate": 9.340730004821266e-06,
"loss": 0.6085,
"step": 612
},
{
"epoch": 0.7469536961819658,
"grad_norm": 0.5238689007424114,
"learning_rate": 9.33720440663256e-06,
"loss": 0.6129,
"step": 613
},
{
"epoch": 0.7481722177091795,
"grad_norm": 0.47607891045094536,
"learning_rate": 9.33367007562745e-06,
"loss": 0.6199,
"step": 614
},
{
"epoch": 0.7493907392363932,
"grad_norm": 0.4955962984701164,
"learning_rate": 9.330127018922195e-06,
"loss": 0.5949,
"step": 615
},
{
"epoch": 0.7506092607636068,
"grad_norm": 0.6100851359106775,
"learning_rate": 9.326575243650618e-06,
"loss": 0.6143,
"step": 616
},
{
"epoch": 0.7518277822908205,
"grad_norm": 0.48084331485799453,
"learning_rate": 9.323014756964104e-06,
"loss": 0.6064,
"step": 617
},
{
"epoch": 0.7530463038180342,
"grad_norm": 0.6768728956598579,
"learning_rate": 9.31944556603157e-06,
"loss": 0.6229,
"step": 618
},
{
"epoch": 0.7542648253452477,
"grad_norm": 0.6664441895394185,
"learning_rate": 9.315867678039469e-06,
"loss": 0.631,
"step": 619
},
{
"epoch": 0.7554833468724614,
"grad_norm": 0.6265982250759069,
"learning_rate": 9.312281100191752e-06,
"loss": 0.63,
"step": 620
},
{
"epoch": 0.756701868399675,
"grad_norm": 0.6297592873763573,
"learning_rate": 9.308685839709878e-06,
"loss": 0.6264,
"step": 621
},
{
"epoch": 0.7579203899268887,
"grad_norm": 0.5583877292859594,
"learning_rate": 9.305081903832784e-06,
"loss": 0.5974,
"step": 622
},
{
"epoch": 0.7591389114541024,
"grad_norm": 0.5001555304308823,
"learning_rate": 9.301469299816874e-06,
"loss": 0.6117,
"step": 623
},
{
"epoch": 0.760357432981316,
"grad_norm": 0.5390093336249369,
"learning_rate": 9.297848034936007e-06,
"loss": 0.6088,
"step": 624
},
{
"epoch": 0.7615759545085297,
"grad_norm": 0.5678848176997396,
"learning_rate": 9.294218116481476e-06,
"loss": 0.6018,
"step": 625
},
{
"epoch": 0.7627944760357434,
"grad_norm": 0.5844799796481355,
"learning_rate": 9.290579551762002e-06,
"loss": 0.604,
"step": 626
},
{
"epoch": 0.7640129975629569,
"grad_norm": 0.5159143134307803,
"learning_rate": 9.286932348103716e-06,
"loss": 0.6083,
"step": 627
},
{
"epoch": 0.7652315190901706,
"grad_norm": 0.5326620021016965,
"learning_rate": 9.283276512850137e-06,
"loss": 0.6206,
"step": 628
},
{
"epoch": 0.7664500406173842,
"grad_norm": 0.5963411548189359,
"learning_rate": 9.27961205336217e-06,
"loss": 0.6108,
"step": 629
},
{
"epoch": 0.7676685621445979,
"grad_norm": 0.5014319447503888,
"learning_rate": 9.275938977018082e-06,
"loss": 0.6034,
"step": 630
},
{
"epoch": 0.7688870836718116,
"grad_norm": 0.5126870488620024,
"learning_rate": 9.272257291213488e-06,
"loss": 0.6176,
"step": 631
},
{
"epoch": 0.7701056051990252,
"grad_norm": 0.4787184158365945,
"learning_rate": 9.268567003361341e-06,
"loss": 0.607,
"step": 632
},
{
"epoch": 0.7713241267262388,
"grad_norm": 0.557057771330538,
"learning_rate": 9.264868120891913e-06,
"loss": 0.6318,
"step": 633
},
{
"epoch": 0.7725426482534524,
"grad_norm": 0.535409561474859,
"learning_rate": 9.261160651252778e-06,
"loss": 0.62,
"step": 634
},
{
"epoch": 0.7737611697806661,
"grad_norm": 0.4814507650875912,
"learning_rate": 9.257444601908806e-06,
"loss": 0.6074,
"step": 635
},
{
"epoch": 0.7749796913078798,
"grad_norm": 0.6101990877396614,
"learning_rate": 9.253719980342134e-06,
"loss": 0.6208,
"step": 636
},
{
"epoch": 0.7761982128350934,
"grad_norm": 0.5403900228621851,
"learning_rate": 9.249986794052168e-06,
"loss": 0.5968,
"step": 637
},
{
"epoch": 0.7774167343623071,
"grad_norm": 0.5703352381203307,
"learning_rate": 9.24624505055555e-06,
"loss": 0.626,
"step": 638
},
{
"epoch": 0.7786352558895208,
"grad_norm": 0.5241053254774348,
"learning_rate": 9.24249475738616e-06,
"loss": 0.5959,
"step": 639
},
{
"epoch": 0.7798537774167343,
"grad_norm": 0.5780889050780196,
"learning_rate": 9.238735922095083e-06,
"loss": 0.5783,
"step": 640
},
{
"epoch": 0.781072298943948,
"grad_norm": 0.5164354758896532,
"learning_rate": 9.234968552250612e-06,
"loss": 0.6192,
"step": 641
},
{
"epoch": 0.7822908204711616,
"grad_norm": 0.5672667605052139,
"learning_rate": 9.231192655438222e-06,
"loss": 0.6003,
"step": 642
},
{
"epoch": 0.7835093419983753,
"grad_norm": 0.5135255221881695,
"learning_rate": 9.22740823926055e-06,
"loss": 0.6082,
"step": 643
},
{
"epoch": 0.784727863525589,
"grad_norm": 0.5584536390516718,
"learning_rate": 9.223615311337395e-06,
"loss": 0.614,
"step": 644
},
{
"epoch": 0.7859463850528026,
"grad_norm": 0.5216134140261057,
"learning_rate": 9.219813879305692e-06,
"loss": 0.6012,
"step": 645
},
{
"epoch": 0.7871649065800163,
"grad_norm": 0.5736410922364097,
"learning_rate": 9.216003950819497e-06,
"loss": 0.6194,
"step": 646
},
{
"epoch": 0.7883834281072299,
"grad_norm": 0.5049300976776431,
"learning_rate": 9.21218553354997e-06,
"loss": 0.6115,
"step": 647
},
{
"epoch": 0.7896019496344435,
"grad_norm": 0.5596092247163901,
"learning_rate": 9.208358635185372e-06,
"loss": 0.6002,
"step": 648
},
{
"epoch": 0.7908204711616572,
"grad_norm": 0.6492697062225624,
"learning_rate": 9.204523263431034e-06,
"loss": 0.6087,
"step": 649
},
{
"epoch": 0.7920389926888708,
"grad_norm": 0.5493287831302429,
"learning_rate": 9.200679426009347e-06,
"loss": 0.6134,
"step": 650
},
{
"epoch": 0.7932575142160845,
"grad_norm": 0.5393423473357866,
"learning_rate": 9.196827130659752e-06,
"loss": 0.6077,
"step": 651
},
{
"epoch": 0.7944760357432982,
"grad_norm": 0.4822437257768845,
"learning_rate": 9.192966385138714e-06,
"loss": 0.6206,
"step": 652
},
{
"epoch": 0.7956945572705117,
"grad_norm": 0.5489723911011465,
"learning_rate": 9.189097197219718e-06,
"loss": 0.6237,
"step": 653
},
{
"epoch": 0.7969130787977254,
"grad_norm": 0.465446021569481,
"learning_rate": 9.185219574693242e-06,
"loss": 0.5969,
"step": 654
},
{
"epoch": 0.7981316003249391,
"grad_norm": 0.5608574163560325,
"learning_rate": 9.181333525366756e-06,
"loss": 0.6116,
"step": 655
},
{
"epoch": 0.7993501218521527,
"grad_norm": 0.47338894132856235,
"learning_rate": 9.177439057064684e-06,
"loss": 0.5898,
"step": 656
},
{
"epoch": 0.8005686433793664,
"grad_norm": 0.5538432939088667,
"learning_rate": 9.17353617762841e-06,
"loss": 0.6042,
"step": 657
},
{
"epoch": 0.80178716490658,
"grad_norm": 0.5129997268787104,
"learning_rate": 9.169624894916252e-06,
"loss": 0.6045,
"step": 658
},
{
"epoch": 0.8030056864337937,
"grad_norm": 0.491484979669411,
"learning_rate": 9.165705216803446e-06,
"loss": 0.6159,
"step": 659
},
{
"epoch": 0.8042242079610074,
"grad_norm": 0.4865407913972347,
"learning_rate": 9.161777151182137e-06,
"loss": 0.6095,
"step": 660
},
{
"epoch": 0.8054427294882209,
"grad_norm": 0.5482167186016993,
"learning_rate": 9.15784070596135e-06,
"loss": 0.6063,
"step": 661
},
{
"epoch": 0.8066612510154346,
"grad_norm": 0.4899874123032885,
"learning_rate": 9.153895889066988e-06,
"loss": 0.5993,
"step": 662
},
{
"epoch": 0.8078797725426482,
"grad_norm": 0.4971658879090838,
"learning_rate": 9.149942708441808e-06,
"loss": 0.6349,
"step": 663
},
{
"epoch": 0.8090982940698619,
"grad_norm": 0.4774943646678603,
"learning_rate": 9.145981172045407e-06,
"loss": 0.5937,
"step": 664
},
{
"epoch": 0.8103168155970756,
"grad_norm": 0.5239506111079297,
"learning_rate": 9.142011287854206e-06,
"loss": 0.596,
"step": 665
},
{
"epoch": 0.8115353371242892,
"grad_norm": 0.49171964255133527,
"learning_rate": 9.138033063861436e-06,
"loss": 0.5866,
"step": 666
},
{
"epoch": 0.8127538586515028,
"grad_norm": 0.5198610207245239,
"learning_rate": 9.134046508077116e-06,
"loss": 0.6022,
"step": 667
},
{
"epoch": 0.8139723801787165,
"grad_norm": 0.4768598644726109,
"learning_rate": 9.130051628528046e-06,
"loss": 0.6057,
"step": 668
},
{
"epoch": 0.8151909017059301,
"grad_norm": 0.539806947114795,
"learning_rate": 9.12604843325778e-06,
"loss": 0.6175,
"step": 669
},
{
"epoch": 0.8164094232331438,
"grad_norm": 0.49480984634291075,
"learning_rate": 9.122036930326618e-06,
"loss": 0.6214,
"step": 670
},
{
"epoch": 0.8176279447603574,
"grad_norm": 0.5006857848218066,
"learning_rate": 9.118017127811591e-06,
"loss": 0.6084,
"step": 671
},
{
"epoch": 0.8188464662875711,
"grad_norm": 0.4713529456554149,
"learning_rate": 9.113989033806434e-06,
"loss": 0.6177,
"step": 672
},
{
"epoch": 0.8200649878147848,
"grad_norm": 0.5234744664186434,
"learning_rate": 9.10995265642158e-06,
"loss": 0.623,
"step": 673
},
{
"epoch": 0.8212835093419983,
"grad_norm": 0.46959588708419714,
"learning_rate": 9.105908003784142e-06,
"loss": 0.6223,
"step": 674
},
{
"epoch": 0.822502030869212,
"grad_norm": 0.483130564646199,
"learning_rate": 9.101855084037893e-06,
"loss": 0.6079,
"step": 675
},
{
"epoch": 0.8237205523964257,
"grad_norm": 0.4707432015389284,
"learning_rate": 9.097793905343251e-06,
"loss": 0.6246,
"step": 676
},
{
"epoch": 0.8249390739236393,
"grad_norm": 0.5109208158836949,
"learning_rate": 9.093724475877262e-06,
"loss": 0.6223,
"step": 677
},
{
"epoch": 0.826157595450853,
"grad_norm": 0.524528742300806,
"learning_rate": 9.089646803833589e-06,
"loss": 0.6054,
"step": 678
},
{
"epoch": 0.8273761169780666,
"grad_norm": 0.48479589382874644,
"learning_rate": 9.085560897422487e-06,
"loss": 0.5978,
"step": 679
},
{
"epoch": 0.8285946385052803,
"grad_norm": 0.520310530932384,
"learning_rate": 9.081466764870795e-06,
"loss": 0.6141,
"step": 680
},
{
"epoch": 0.829813160032494,
"grad_norm": 0.5320998645898771,
"learning_rate": 9.07736441442191e-06,
"loss": 0.5952,
"step": 681
},
{
"epoch": 0.8310316815597075,
"grad_norm": 0.522944143229052,
"learning_rate": 9.073253854335777e-06,
"loss": 0.5966,
"step": 682
},
{
"epoch": 0.8322502030869212,
"grad_norm": 0.5438608445694643,
"learning_rate": 9.069135092888874e-06,
"loss": 0.6036,
"step": 683
},
{
"epoch": 0.8334687246141349,
"grad_norm": 0.4929729088140395,
"learning_rate": 9.06500813837419e-06,
"loss": 0.603,
"step": 684
},
{
"epoch": 0.8346872461413485,
"grad_norm": 0.5376420120613337,
"learning_rate": 9.060872999101206e-06,
"loss": 0.6151,
"step": 685
},
{
"epoch": 0.8359057676685622,
"grad_norm": 0.52471690520972,
"learning_rate": 9.056729683395892e-06,
"loss": 0.581,
"step": 686
},
{
"epoch": 0.8371242891957758,
"grad_norm": 0.49865247625736375,
"learning_rate": 9.052578199600675e-06,
"loss": 0.6067,
"step": 687
},
{
"epoch": 0.8383428107229894,
"grad_norm": 0.5035636474694776,
"learning_rate": 9.048418556074425e-06,
"loss": 0.605,
"step": 688
},
{
"epoch": 0.8395613322502031,
"grad_norm": 0.5460518150855164,
"learning_rate": 9.04425076119245e-06,
"loss": 0.6008,
"step": 689
},
{
"epoch": 0.8407798537774167,
"grad_norm": 0.5154326591857874,
"learning_rate": 9.040074823346466e-06,
"loss": 0.612,
"step": 690
},
{
"epoch": 0.8419983753046304,
"grad_norm": 0.41895451726050503,
"learning_rate": 9.035890750944583e-06,
"loss": 0.5947,
"step": 691
},
{
"epoch": 0.843216896831844,
"grad_norm": 0.49674088276174516,
"learning_rate": 9.03169855241129e-06,
"loss": 0.625,
"step": 692
},
{
"epoch": 0.8444354183590577,
"grad_norm": 0.5650371934623263,
"learning_rate": 9.02749823618744e-06,
"loss": 0.5954,
"step": 693
},
{
"epoch": 0.8456539398862714,
"grad_norm": 0.5010709938981562,
"learning_rate": 9.02328981073023e-06,
"loss": 0.6071,
"step": 694
},
{
"epoch": 0.8468724614134849,
"grad_norm": 0.5831039880668286,
"learning_rate": 9.019073284513184e-06,
"loss": 0.5989,
"step": 695
},
{
"epoch": 0.8480909829406986,
"grad_norm": 0.5796544622455602,
"learning_rate": 9.014848666026138e-06,
"loss": 0.6328,
"step": 696
},
{
"epoch": 0.8493095044679123,
"grad_norm": 0.5898423233515925,
"learning_rate": 9.01061596377522e-06,
"loss": 0.6316,
"step": 697
},
{
"epoch": 0.8505280259951259,
"grad_norm": 0.576717321636104,
"learning_rate": 9.006375186282832e-06,
"loss": 0.6129,
"step": 698
},
{
"epoch": 0.8517465475223396,
"grad_norm": 0.5274725251295577,
"learning_rate": 9.002126342087643e-06,
"loss": 0.6103,
"step": 699
},
{
"epoch": 0.8529650690495532,
"grad_norm": 0.5405289062395403,
"learning_rate": 8.997869439744555e-06,
"loss": 0.6252,
"step": 700
},
{
"epoch": 0.8541835905767668,
"grad_norm": 0.5521347732238037,
"learning_rate": 8.993604487824701e-06,
"loss": 0.6008,
"step": 701
},
{
"epoch": 0.8554021121039805,
"grad_norm": 0.5196724445810474,
"learning_rate": 8.989331494915417e-06,
"loss": 0.6185,
"step": 702
},
{
"epoch": 0.8566206336311941,
"grad_norm": 0.5683878673891257,
"learning_rate": 8.985050469620236e-06,
"loss": 0.6245,
"step": 703
},
{
"epoch": 0.8578391551584078,
"grad_norm": 0.5407694973000146,
"learning_rate": 8.980761420558855e-06,
"loss": 0.6142,
"step": 704
},
{
"epoch": 0.8590576766856215,
"grad_norm": 0.5649995760138024,
"learning_rate": 8.976464356367133e-06,
"loss": 0.5985,
"step": 705
},
{
"epoch": 0.8602761982128351,
"grad_norm": 0.4922853729727254,
"learning_rate": 8.972159285697066e-06,
"loss": 0.6128,
"step": 706
},
{
"epoch": 0.8614947197400488,
"grad_norm": 0.5653149236554849,
"learning_rate": 8.967846217216771e-06,
"loss": 0.6085,
"step": 707
},
{
"epoch": 0.8627132412672623,
"grad_norm": 0.5367471044143063,
"learning_rate": 8.963525159610465e-06,
"loss": 0.6148,
"step": 708
},
{
"epoch": 0.863931762794476,
"grad_norm": 0.6165337631503633,
"learning_rate": 8.959196121578455e-06,
"loss": 0.6152,
"step": 709
},
{
"epoch": 0.8651502843216897,
"grad_norm": 0.4805242301641202,
"learning_rate": 8.954859111837115e-06,
"loss": 0.6012,
"step": 710
},
{
"epoch": 0.8663688058489033,
"grad_norm": 0.5673830583367931,
"learning_rate": 8.950514139118868e-06,
"loss": 0.6137,
"step": 711
},
{
"epoch": 0.867587327376117,
"grad_norm": 0.6116666852593193,
"learning_rate": 8.946161212172172e-06,
"loss": 0.6067,
"step": 712
},
{
"epoch": 0.8688058489033307,
"grad_norm": 0.4787324171983748,
"learning_rate": 8.941800339761503e-06,
"loss": 0.6229,
"step": 713
},
{
"epoch": 0.8700243704305443,
"grad_norm": 0.5603801815973803,
"learning_rate": 8.937431530667329e-06,
"loss": 0.6105,
"step": 714
},
{
"epoch": 0.871242891957758,
"grad_norm": 0.5681506397184968,
"learning_rate": 8.933054793686102e-06,
"loss": 0.6196,
"step": 715
},
{
"epoch": 0.8724614134849715,
"grad_norm": 0.4745590461881841,
"learning_rate": 8.928670137630236e-06,
"loss": 0.6041,
"step": 716
},
{
"epoch": 0.8736799350121852,
"grad_norm": 0.5290850478804046,
"learning_rate": 8.924277571328091e-06,
"loss": 0.5968,
"step": 717
},
{
"epoch": 0.8748984565393989,
"grad_norm": 0.4724468577981056,
"learning_rate": 8.919877103623949e-06,
"loss": 0.5888,
"step": 718
},
{
"epoch": 0.8761169780666125,
"grad_norm": 0.4710021425585232,
"learning_rate": 8.915468743378009e-06,
"loss": 0.6039,
"step": 719
},
{
"epoch": 0.8773354995938262,
"grad_norm": 0.5615817507996624,
"learning_rate": 8.911052499466358e-06,
"loss": 0.611,
"step": 720
},
{
"epoch": 0.8785540211210398,
"grad_norm": 0.5372617716587773,
"learning_rate": 8.906628380780951e-06,
"loss": 0.5853,
"step": 721
},
{
"epoch": 0.8797725426482534,
"grad_norm": 0.4671881493526463,
"learning_rate": 8.902196396229605e-06,
"loss": 0.6135,
"step": 722
},
{
"epoch": 0.8809910641754671,
"grad_norm": 0.6571538751607443,
"learning_rate": 8.897756554735976e-06,
"loss": 0.6166,
"step": 723
},
{
"epoch": 0.8822095857026807,
"grad_norm": 0.5407143640334066,
"learning_rate": 8.893308865239536e-06,
"loss": 0.5946,
"step": 724
},
{
"epoch": 0.8834281072298944,
"grad_norm": 0.53845654868447,
"learning_rate": 8.888853336695558e-06,
"loss": 0.6056,
"step": 725
},
{
"epoch": 0.8846466287571081,
"grad_norm": 0.5501103328024185,
"learning_rate": 8.884389978075098e-06,
"loss": 0.5983,
"step": 726
},
{
"epoch": 0.8858651502843217,
"grad_norm": 0.5308109296782529,
"learning_rate": 8.879918798364984e-06,
"loss": 0.5777,
"step": 727
},
{
"epoch": 0.8870836718115354,
"grad_norm": 0.5017325039220928,
"learning_rate": 8.875439806567786e-06,
"loss": 0.6045,
"step": 728
},
{
"epoch": 0.8883021933387489,
"grad_norm": 0.5901206372277947,
"learning_rate": 8.870953011701804e-06,
"loss": 0.604,
"step": 729
},
{
"epoch": 0.8895207148659626,
"grad_norm": 0.45439896535640995,
"learning_rate": 8.866458422801048e-06,
"loss": 0.6073,
"step": 730
},
{
"epoch": 0.8907392363931763,
"grad_norm": 0.5577426986098635,
"learning_rate": 8.861956048915225e-06,
"loss": 0.5915,
"step": 731
},
{
"epoch": 0.8919577579203899,
"grad_norm": 0.6016567936834477,
"learning_rate": 8.857445899109716e-06,
"loss": 0.6046,
"step": 732
},
{
"epoch": 0.8931762794476036,
"grad_norm": 0.5445868957449489,
"learning_rate": 8.852927982465553e-06,
"loss": 0.6106,
"step": 733
},
{
"epoch": 0.8943948009748173,
"grad_norm": 0.74687623190731,
"learning_rate": 8.848402308079415e-06,
"loss": 0.6106,
"step": 734
},
{
"epoch": 0.8956133225020309,
"grad_norm": 0.5720296451679941,
"learning_rate": 8.843868885063594e-06,
"loss": 0.6051,
"step": 735
},
{
"epoch": 0.8968318440292445,
"grad_norm": 0.6556133763306434,
"learning_rate": 8.839327722545985e-06,
"loss": 0.6167,
"step": 736
},
{
"epoch": 0.8980503655564581,
"grad_norm": 0.564067584928174,
"learning_rate": 8.83477882967007e-06,
"loss": 0.5994,
"step": 737
},
{
"epoch": 0.8992688870836718,
"grad_norm": 0.7349456844478599,
"learning_rate": 8.83022221559489e-06,
"loss": 0.6114,
"step": 738
},
{
"epoch": 0.9004874086108855,
"grad_norm": 0.5690040907358448,
"learning_rate": 8.82565788949504e-06,
"loss": 0.5881,
"step": 739
},
{
"epoch": 0.9017059301380991,
"grad_norm": 0.6984688918514965,
"learning_rate": 8.821085860560633e-06,
"loss": 0.5983,
"step": 740
},
{
"epoch": 0.9029244516653128,
"grad_norm": 0.5870268436598589,
"learning_rate": 8.8165061379973e-06,
"loss": 0.6158,
"step": 741
},
{
"epoch": 0.9041429731925265,
"grad_norm": 0.730806962459982,
"learning_rate": 8.81191873102616e-06,
"loss": 0.6058,
"step": 742
},
{
"epoch": 0.90536149471974,
"grad_norm": 0.5520509944838993,
"learning_rate": 8.807323648883802e-06,
"loss": 0.6076,
"step": 743
},
{
"epoch": 0.9065800162469537,
"grad_norm": 0.5674479495642151,
"learning_rate": 8.80272090082227e-06,
"loss": 0.6017,
"step": 744
},
{
"epoch": 0.9077985377741673,
"grad_norm": 0.6471015570221698,
"learning_rate": 8.798110496109047e-06,
"loss": 0.6114,
"step": 745
},
{
"epoch": 0.909017059301381,
"grad_norm": 0.5077905529540144,
"learning_rate": 8.793492444027027e-06,
"loss": 0.6086,
"step": 746
},
{
"epoch": 0.9102355808285947,
"grad_norm": 0.5684591151412205,
"learning_rate": 8.788866753874504e-06,
"loss": 0.5939,
"step": 747
},
{
"epoch": 0.9114541023558083,
"grad_norm": 0.5373473945369368,
"learning_rate": 8.784233434965149e-06,
"loss": 0.605,
"step": 748
},
{
"epoch": 0.912672623883022,
"grad_norm": 0.4922150085749876,
"learning_rate": 8.779592496627998e-06,
"loss": 0.6016,
"step": 749
},
{
"epoch": 0.9138911454102355,
"grad_norm": 0.5346368247367626,
"learning_rate": 8.774943948207427e-06,
"loss": 0.5894,
"step": 750
},
{
"epoch": 0.9151096669374492,
"grad_norm": 0.5910293461390073,
"learning_rate": 8.770287799063128e-06,
"loss": 0.5928,
"step": 751
},
{
"epoch": 0.9163281884646629,
"grad_norm": 0.45941353467858154,
"learning_rate": 8.765624058570106e-06,
"loss": 0.606,
"step": 752
},
{
"epoch": 0.9175467099918765,
"grad_norm": 0.5187731411231332,
"learning_rate": 8.760952736118645e-06,
"loss": 0.6128,
"step": 753
},
{
"epoch": 0.9187652315190902,
"grad_norm": 0.5257713049314863,
"learning_rate": 8.756273841114297e-06,
"loss": 0.5954,
"step": 754
},
{
"epoch": 0.9199837530463039,
"grad_norm": 0.5158216045537021,
"learning_rate": 8.751587382977862e-06,
"loss": 0.6016,
"step": 755
},
{
"epoch": 0.9212022745735174,
"grad_norm": 0.48265635843326626,
"learning_rate": 8.746893371145367e-06,
"loss": 0.6023,
"step": 756
},
{
"epoch": 0.9224207961007311,
"grad_norm": 0.56635290896361,
"learning_rate": 8.742191815068048e-06,
"loss": 0.6168,
"step": 757
},
{
"epoch": 0.9236393176279447,
"grad_norm": 0.5246869149929032,
"learning_rate": 8.737482724212331e-06,
"loss": 0.6073,
"step": 758
},
{
"epoch": 0.9248578391551584,
"grad_norm": 0.5675558144411569,
"learning_rate": 8.732766108059814e-06,
"loss": 0.6089,
"step": 759
},
{
"epoch": 0.9260763606823721,
"grad_norm": 0.5373680000020842,
"learning_rate": 8.728041976107247e-06,
"loss": 0.6229,
"step": 760
},
{
"epoch": 0.9272948822095857,
"grad_norm": 0.4781724675355625,
"learning_rate": 8.723310337866508e-06,
"loss": 0.6109,
"step": 761
},
{
"epoch": 0.9285134037367994,
"grad_norm": 0.5425148864348092,
"learning_rate": 8.718571202864598e-06,
"loss": 0.6135,
"step": 762
},
{
"epoch": 0.929731925264013,
"grad_norm": 0.5848574183660457,
"learning_rate": 8.713824580643606e-06,
"loss": 0.5856,
"step": 763
},
{
"epoch": 0.9309504467912266,
"grad_norm": 0.5359644668976268,
"learning_rate": 8.709070480760696e-06,
"loss": 0.6005,
"step": 764
},
{
"epoch": 0.9321689683184403,
"grad_norm": 0.620026762890768,
"learning_rate": 8.70430891278809e-06,
"loss": 0.6068,
"step": 765
},
{
"epoch": 0.9333874898456539,
"grad_norm": 0.47117448230839937,
"learning_rate": 8.699539886313047e-06,
"loss": 0.6252,
"step": 766
},
{
"epoch": 0.9346060113728676,
"grad_norm": 0.5057879313386596,
"learning_rate": 8.69476341093784e-06,
"loss": 0.6043,
"step": 767
},
{
"epoch": 0.9358245329000813,
"grad_norm": 0.5719864673466165,
"learning_rate": 8.689979496279747e-06,
"loss": 0.6021,
"step": 768
},
{
"epoch": 0.9370430544272949,
"grad_norm": 0.4550279435061135,
"learning_rate": 8.685188151971018e-06,
"loss": 0.5903,
"step": 769
},
{
"epoch": 0.9382615759545085,
"grad_norm": 0.5815823584929373,
"learning_rate": 8.680389387658866e-06,
"loss": 0.5994,
"step": 770
},
{
"epoch": 0.9394800974817222,
"grad_norm": 0.5037028317625714,
"learning_rate": 8.675583213005443e-06,
"loss": 0.619,
"step": 771
},
{
"epoch": 0.9406986190089358,
"grad_norm": 0.5242690261886358,
"learning_rate": 8.67076963768782e-06,
"loss": 0.6048,
"step": 772
},
{
"epoch": 0.9419171405361495,
"grad_norm": 0.6218367099845817,
"learning_rate": 8.66594867139797e-06,
"loss": 0.5839,
"step": 773
},
{
"epoch": 0.9431356620633631,
"grad_norm": 0.47012822627564055,
"learning_rate": 8.661120323842751e-06,
"loss": 0.5901,
"step": 774
},
{
"epoch": 0.9443541835905768,
"grad_norm": 0.5922308137676237,
"learning_rate": 8.656284604743877e-06,
"loss": 0.5949,
"step": 775
},
{
"epoch": 0.9455727051177905,
"grad_norm": 0.5371260230634575,
"learning_rate": 8.651441523837908e-06,
"loss": 0.623,
"step": 776
},
{
"epoch": 0.946791226645004,
"grad_norm": 0.5773759686297267,
"learning_rate": 8.646591090876225e-06,
"loss": 0.6234,
"step": 777
},
{
"epoch": 0.9480097481722177,
"grad_norm": 0.5887590407239388,
"learning_rate": 8.641733315625014e-06,
"loss": 0.6111,
"step": 778
},
{
"epoch": 0.9492282696994313,
"grad_norm": 0.5226241995561731,
"learning_rate": 8.636868207865244e-06,
"loss": 0.6206,
"step": 779
},
{
"epoch": 0.950446791226645,
"grad_norm": 0.6014897765265561,
"learning_rate": 8.631995777392645e-06,
"loss": 0.6098,
"step": 780
},
{
"epoch": 0.9516653127538587,
"grad_norm": 0.4728664792789181,
"learning_rate": 8.627116034017697e-06,
"loss": 0.6175,
"step": 781
},
{
"epoch": 0.9528838342810723,
"grad_norm": 0.5599521599776955,
"learning_rate": 8.622228987565597e-06,
"loss": 0.6121,
"step": 782
},
{
"epoch": 0.954102355808286,
"grad_norm": 0.45561297167703785,
"learning_rate": 8.61733464787625e-06,
"loss": 0.585,
"step": 783
},
{
"epoch": 0.9553208773354996,
"grad_norm": 0.4965712938546266,
"learning_rate": 8.612433024804246e-06,
"loss": 0.5844,
"step": 784
},
{
"epoch": 0.9565393988627132,
"grad_norm": 0.49923609484853176,
"learning_rate": 8.607524128218842e-06,
"loss": 0.6056,
"step": 785
},
{
"epoch": 0.9577579203899269,
"grad_norm": 0.5194489854997212,
"learning_rate": 8.602607968003935e-06,
"loss": 0.6157,
"step": 786
},
{
"epoch": 0.9589764419171405,
"grad_norm": 0.45374807644787585,
"learning_rate": 8.597684554058053e-06,
"loss": 0.6131,
"step": 787
},
{
"epoch": 0.9601949634443542,
"grad_norm": 0.48980331599376176,
"learning_rate": 8.59275389629432e-06,
"loss": 0.6277,
"step": 788
},
{
"epoch": 0.9614134849715679,
"grad_norm": 0.512984376262805,
"learning_rate": 8.587816004640456e-06,
"loss": 0.6079,
"step": 789
},
{
"epoch": 0.9626320064987814,
"grad_norm": 0.46938679490869983,
"learning_rate": 8.58287088903874e-06,
"loss": 0.6024,
"step": 790
},
{
"epoch": 0.9638505280259951,
"grad_norm": 0.5727370279954419,
"learning_rate": 8.577918559445994e-06,
"loss": 0.6133,
"step": 791
},
{
"epoch": 0.9650690495532088,
"grad_norm": 0.46813355754433694,
"learning_rate": 8.572959025833573e-06,
"loss": 0.6091,
"step": 792
},
{
"epoch": 0.9662875710804224,
"grad_norm": 0.5352006872401892,
"learning_rate": 8.56799229818733e-06,
"loss": 0.5926,
"step": 793
},
{
"epoch": 0.9675060926076361,
"grad_norm": 0.5423797070420179,
"learning_rate": 8.563018386507607e-06,
"loss": 0.6055,
"step": 794
},
{
"epoch": 0.9687246141348497,
"grad_norm": 0.5598760717169532,
"learning_rate": 8.558037300809209e-06,
"loss": 0.601,
"step": 795
},
{
"epoch": 0.9699431356620634,
"grad_norm": 0.5899307915518814,
"learning_rate": 8.553049051121383e-06,
"loss": 0.5925,
"step": 796
},
{
"epoch": 0.971161657189277,
"grad_norm": 0.5817700253793735,
"learning_rate": 8.548053647487808e-06,
"loss": 0.5794,
"step": 797
},
{
"epoch": 0.9723801787164906,
"grad_norm": 0.6684891953193655,
"learning_rate": 8.543051099966558e-06,
"loss": 0.6158,
"step": 798
},
{
"epoch": 0.9735987002437043,
"grad_norm": 0.6186641627844115,
"learning_rate": 8.538041418630099e-06,
"loss": 0.6045,
"step": 799
},
{
"epoch": 0.974817221770918,
"grad_norm": 0.5620245115548018,
"learning_rate": 8.533024613565256e-06,
"loss": 0.6074,
"step": 800
},
{
"epoch": 0.9760357432981316,
"grad_norm": 0.5360734619477909,
"learning_rate": 8.5280006948732e-06,
"loss": 0.5781,
"step": 801
},
{
"epoch": 0.9772542648253453,
"grad_norm": 0.5649861774930516,
"learning_rate": 8.522969672669419e-06,
"loss": 0.603,
"step": 802
},
{
"epoch": 0.9784727863525589,
"grad_norm": 0.5524388375136041,
"learning_rate": 8.517931557083713e-06,
"loss": 0.5927,
"step": 803
},
{
"epoch": 0.9796913078797725,
"grad_norm": 0.5048333497363491,
"learning_rate": 8.512886358260162e-06,
"loss": 0.6218,
"step": 804
},
{
"epoch": 0.9809098294069862,
"grad_norm": 0.5532699810235799,
"learning_rate": 8.5078340863571e-06,
"loss": 0.5935,
"step": 805
},
{
"epoch": 0.9821283509341998,
"grad_norm": 0.482227106626454,
"learning_rate": 8.502774751547108e-06,
"loss": 0.5946,
"step": 806
},
{
"epoch": 0.9833468724614135,
"grad_norm": 0.5612628853741157,
"learning_rate": 8.49770836401699e-06,
"loss": 0.6174,
"step": 807
},
{
"epoch": 0.9845653939886271,
"grad_norm": 0.5165079876207431,
"learning_rate": 8.492634933967749e-06,
"loss": 0.586,
"step": 808
},
{
"epoch": 0.9857839155158408,
"grad_norm": 0.5243350260461674,
"learning_rate": 8.487554471614568e-06,
"loss": 0.598,
"step": 809
},
{
"epoch": 0.9870024370430545,
"grad_norm": 0.5334138693548346,
"learning_rate": 8.482466987186785e-06,
"loss": 0.6156,
"step": 810
},
{
"epoch": 0.988220958570268,
"grad_norm": 0.5183630888601999,
"learning_rate": 8.477372490927882e-06,
"loss": 0.6043,
"step": 811
},
{
"epoch": 0.9894394800974817,
"grad_norm": 0.5064511107410842,
"learning_rate": 8.47227099309546e-06,
"loss": 0.618,
"step": 812
},
{
"epoch": 0.9906580016246954,
"grad_norm": 0.502910387382079,
"learning_rate": 8.467162503961209e-06,
"loss": 0.5921,
"step": 813
},
{
"epoch": 0.991876523151909,
"grad_norm": 0.6360985673189292,
"learning_rate": 8.462047033810906e-06,
"loss": 0.6196,
"step": 814
},
{
"epoch": 0.9930950446791227,
"grad_norm": 0.48804000994343705,
"learning_rate": 8.456924592944377e-06,
"loss": 0.5874,
"step": 815
},
{
"epoch": 0.9943135662063363,
"grad_norm": 0.5525784026778128,
"learning_rate": 8.451795191675488e-06,
"loss": 0.6121,
"step": 816
},
{
"epoch": 0.99553208773355,
"grad_norm": 0.6244758885512404,
"learning_rate": 8.446658840332115e-06,
"loss": 0.6117,
"step": 817
},
{
"epoch": 0.9967506092607636,
"grad_norm": 0.5125354504575084,
"learning_rate": 8.441515549256134e-06,
"loss": 0.6029,
"step": 818
},
{
"epoch": 0.9979691307879772,
"grad_norm": 0.48689738688414835,
"learning_rate": 8.436365328803386e-06,
"loss": 0.6118,
"step": 819
},
{
"epoch": 0.9991876523151909,
"grad_norm": 0.6498259018985348,
"learning_rate": 8.43120818934367e-06,
"loss": 0.6102,
"step": 820
},
{
"epoch": 1.0008123476848092,
"grad_norm": 0.9638337013283915,
"learning_rate": 8.426044141260712e-06,
"loss": 0.9573,
"step": 821
},
{
"epoch": 1.0020308692120228,
"grad_norm": 0.49843778312392245,
"learning_rate": 8.420873194952153e-06,
"loss": 0.5312,
"step": 822
},
{
"epoch": 1.0032493907392364,
"grad_norm": 0.5736142039977695,
"learning_rate": 8.415695360829521e-06,
"loss": 0.5481,
"step": 823
},
{
"epoch": 1.00446791226645,
"grad_norm": 0.5588125856539439,
"learning_rate": 8.410510649318211e-06,
"loss": 0.6112,
"step": 824
},
{
"epoch": 1.0056864337936637,
"grad_norm": 0.5238730088532109,
"learning_rate": 8.405319070857466e-06,
"loss": 0.5738,
"step": 825
},
{
"epoch": 1.0069049553208773,
"grad_norm": 0.5729923093577028,
"learning_rate": 8.40012063590036e-06,
"loss": 0.563,
"step": 826
},
{
"epoch": 1.008123476848091,
"grad_norm": 0.542308645982848,
"learning_rate": 8.394915354913763e-06,
"loss": 0.5825,
"step": 827
},
{
"epoch": 1.0093419983753047,
"grad_norm": 0.5635399800755453,
"learning_rate": 8.38970323837834e-06,
"loss": 0.5596,
"step": 828
},
{
"epoch": 1.0105605199025183,
"grad_norm": 0.5412240812641438,
"learning_rate": 8.384484296788509e-06,
"loss": 0.583,
"step": 829
},
{
"epoch": 1.0117790414297319,
"grad_norm": 0.4985722523246039,
"learning_rate": 8.379258540652438e-06,
"loss": 0.5269,
"step": 830
},
{
"epoch": 1.0129975629569457,
"grad_norm": 0.5577073237880519,
"learning_rate": 8.37402598049201e-06,
"loss": 0.5971,
"step": 831
},
{
"epoch": 1.0142160844841592,
"grad_norm": 0.5397320632233633,
"learning_rate": 8.368786626842815e-06,
"loss": 0.576,
"step": 832
},
{
"epoch": 1.0154346060113728,
"grad_norm": 0.5446374373068642,
"learning_rate": 8.363540490254111e-06,
"loss": 0.5604,
"step": 833
},
{
"epoch": 1.0166531275385866,
"grad_norm": 0.5916157265480478,
"learning_rate": 8.358287581288824e-06,
"loss": 0.5977,
"step": 834
},
{
"epoch": 1.0178716490658002,
"grad_norm": 0.44317757053413465,
"learning_rate": 8.353027910523506e-06,
"loss": 0.5386,
"step": 835
},
{
"epoch": 1.0190901705930138,
"grad_norm": 0.5306644900080113,
"learning_rate": 8.347761488548334e-06,
"loss": 0.5685,
"step": 836
},
{
"epoch": 1.0203086921202273,
"grad_norm": 0.554634789156319,
"learning_rate": 8.342488325967068e-06,
"loss": 0.5906,
"step": 837
},
{
"epoch": 1.0215272136474411,
"grad_norm": 0.46926134132806735,
"learning_rate": 8.337208433397051e-06,
"loss": 0.5518,
"step": 838
},
{
"epoch": 1.0227457351746547,
"grad_norm": 0.5223237573306092,
"learning_rate": 8.331921821469164e-06,
"loss": 0.5482,
"step": 839
},
{
"epoch": 1.0239642567018683,
"grad_norm": 0.6456110639127597,
"learning_rate": 8.326628500827826e-06,
"loss": 0.5533,
"step": 840
},
{
"epoch": 1.025182778229082,
"grad_norm": 0.49817045727119846,
"learning_rate": 8.321328482130967e-06,
"loss": 0.5828,
"step": 841
},
{
"epoch": 1.0264012997562957,
"grad_norm": 0.6439926526967455,
"learning_rate": 8.31602177604999e-06,
"loss": 0.5445,
"step": 842
},
{
"epoch": 1.0276198212835093,
"grad_norm": 0.5597217590326287,
"learning_rate": 8.310708393269773e-06,
"loss": 0.5919,
"step": 843
},
{
"epoch": 1.028838342810723,
"grad_norm": 0.5067484108191753,
"learning_rate": 8.305388344488636e-06,
"loss": 0.5119,
"step": 844
},
{
"epoch": 1.0300568643379366,
"grad_norm": 0.6138111359383427,
"learning_rate": 8.300061640418322e-06,
"loss": 0.5819,
"step": 845
},
{
"epoch": 1.0312753858651502,
"grad_norm": 0.5228439245226578,
"learning_rate": 8.294728291783967e-06,
"loss": 0.5488,
"step": 846
},
{
"epoch": 1.032493907392364,
"grad_norm": 0.5069119735333029,
"learning_rate": 8.289388309324094e-06,
"loss": 0.5531,
"step": 847
},
{
"epoch": 1.0337124289195776,
"grad_norm": 0.6055259774711721,
"learning_rate": 8.284041703790578e-06,
"loss": 0.6323,
"step": 848
},
{
"epoch": 1.0349309504467912,
"grad_norm": 0.40577407124920994,
"learning_rate": 8.278688485948634e-06,
"loss": 0.5171,
"step": 849
},
{
"epoch": 1.036149471974005,
"grad_norm": 0.5480653507617855,
"learning_rate": 8.273328666576783e-06,
"loss": 0.5708,
"step": 850
},
{
"epoch": 1.0373679935012186,
"grad_norm": 0.5332307457846426,
"learning_rate": 8.267962256466845e-06,
"loss": 0.5802,
"step": 851
},
{
"epoch": 1.0385865150284321,
"grad_norm": 0.45617231239236866,
"learning_rate": 8.262589266423908e-06,
"loss": 0.5367,
"step": 852
},
{
"epoch": 1.0398050365556457,
"grad_norm": 0.4487718203264924,
"learning_rate": 8.257209707266308e-06,
"loss": 0.5412,
"step": 853
},
{
"epoch": 1.0410235580828595,
"grad_norm": 0.49617901681065096,
"learning_rate": 8.251823589825608e-06,
"loss": 0.582,
"step": 854
},
{
"epoch": 1.042242079610073,
"grad_norm": 0.47465221989539974,
"learning_rate": 8.246430924946575e-06,
"loss": 0.5377,
"step": 855
},
{
"epoch": 1.0434606011372867,
"grad_norm": 0.4988725203576914,
"learning_rate": 8.24103172348716e-06,
"loss": 0.6148,
"step": 856
},
{
"epoch": 1.0446791226645005,
"grad_norm": 0.4769299659284957,
"learning_rate": 8.235625996318475e-06,
"loss": 0.5376,
"step": 857
},
{
"epoch": 1.045897644191714,
"grad_norm": 0.5418879737499556,
"learning_rate": 8.230213754324773e-06,
"loss": 0.5688,
"step": 858
},
{
"epoch": 1.0471161657189276,
"grad_norm": 0.4361367720716124,
"learning_rate": 8.22479500840342e-06,
"loss": 0.5337,
"step": 859
},
{
"epoch": 1.0483346872461414,
"grad_norm": 0.5323815827851344,
"learning_rate": 8.219369769464883e-06,
"loss": 0.6055,
"step": 860
},
{
"epoch": 1.049553208773355,
"grad_norm": 0.5879673529136081,
"learning_rate": 8.213938048432697e-06,
"loss": 0.5415,
"step": 861
},
{
"epoch": 1.0507717303005686,
"grad_norm": 0.4684259408064238,
"learning_rate": 8.208499856243453e-06,
"loss": 0.5515,
"step": 862
},
{
"epoch": 1.0519902518277824,
"grad_norm": 0.5196995774290054,
"learning_rate": 8.20305520384677e-06,
"loss": 0.5934,
"step": 863
},
{
"epoch": 1.053208773354996,
"grad_norm": 0.555821404956024,
"learning_rate": 8.19760410220527e-06,
"loss": 0.5608,
"step": 864
},
{
"epoch": 1.0544272948822095,
"grad_norm": 0.49067810902195214,
"learning_rate": 8.19214656229457e-06,
"loss": 0.5338,
"step": 865
},
{
"epoch": 1.0556458164094233,
"grad_norm": 0.5035110725818862,
"learning_rate": 8.186682595103241e-06,
"loss": 0.579,
"step": 866
},
{
"epoch": 1.056864337936637,
"grad_norm": 0.5005979772843533,
"learning_rate": 8.1812122116328e-06,
"loss": 0.5824,
"step": 867
},
{
"epoch": 1.0580828594638505,
"grad_norm": 0.5504829458164456,
"learning_rate": 8.175735422897682e-06,
"loss": 0.5574,
"step": 868
},
{
"epoch": 1.059301380991064,
"grad_norm": 0.5207101568397476,
"learning_rate": 8.170252239925215e-06,
"loss": 0.5894,
"step": 869
},
{
"epoch": 1.0605199025182779,
"grad_norm": 0.41793216877614997,
"learning_rate": 8.16476267375561e-06,
"loss": 0.509,
"step": 870
},
{
"epoch": 1.0617384240454915,
"grad_norm": 0.5270083025323902,
"learning_rate": 8.159266735441922e-06,
"loss": 0.584,
"step": 871
},
{
"epoch": 1.062956945572705,
"grad_norm": 0.4966922910229618,
"learning_rate": 8.15376443605004e-06,
"loss": 0.5269,
"step": 872
},
{
"epoch": 1.0641754670999188,
"grad_norm": 0.4961677071135526,
"learning_rate": 8.148255786658661e-06,
"loss": 0.6035,
"step": 873
},
{
"epoch": 1.0653939886271324,
"grad_norm": 0.4946533201405728,
"learning_rate": 8.142740798359268e-06,
"loss": 0.5932,
"step": 874
},
{
"epoch": 1.066612510154346,
"grad_norm": 0.49312465250267673,
"learning_rate": 8.137219482256102e-06,
"loss": 0.5337,
"step": 875
},
{
"epoch": 1.0678310316815598,
"grad_norm": 0.5074238436289318,
"learning_rate": 8.131691849466154e-06,
"loss": 0.5536,
"step": 876
},
{
"epoch": 1.0690495532087734,
"grad_norm": 0.5179722934326702,
"learning_rate": 8.126157911119124e-06,
"loss": 0.5781,
"step": 877
},
{
"epoch": 1.070268074735987,
"grad_norm": 0.42106727984073683,
"learning_rate": 8.120617678357415e-06,
"loss": 0.5364,
"step": 878
},
{
"epoch": 1.0714865962632008,
"grad_norm": 0.5619541047984238,
"learning_rate": 8.115071162336099e-06,
"loss": 0.6302,
"step": 879
},
{
"epoch": 1.0727051177904143,
"grad_norm": 0.48218497269212,
"learning_rate": 8.109518374222902e-06,
"loss": 0.5081,
"step": 880
},
{
"epoch": 1.073923639317628,
"grad_norm": 0.5288776434466912,
"learning_rate": 8.103959325198178e-06,
"loss": 0.6161,
"step": 881
},
{
"epoch": 1.0751421608448415,
"grad_norm": 0.4396305550189922,
"learning_rate": 8.098394026454886e-06,
"loss": 0.5269,
"step": 882
},
{
"epoch": 1.0763606823720553,
"grad_norm": 0.5705187563085431,
"learning_rate": 8.09282248919857e-06,
"loss": 0.5918,
"step": 883
},
{
"epoch": 1.0775792038992689,
"grad_norm": 0.5173394574008403,
"learning_rate": 8.087244724647333e-06,
"loss": 0.55,
"step": 884
},
{
"epoch": 1.0787977254264824,
"grad_norm": 0.5259195540857357,
"learning_rate": 8.081660744031818e-06,
"loss": 0.5587,
"step": 885
},
{
"epoch": 1.0800162469536962,
"grad_norm": 0.5013768900277689,
"learning_rate": 8.076070558595188e-06,
"loss": 0.5847,
"step": 886
},
{
"epoch": 1.0812347684809098,
"grad_norm": 0.5113716323758455,
"learning_rate": 8.070474179593088e-06,
"loss": 0.5841,
"step": 887
},
{
"epoch": 1.0824532900081234,
"grad_norm": 0.4304893769830929,
"learning_rate": 8.064871618293647e-06,
"loss": 0.474,
"step": 888
},
{
"epoch": 1.0836718115353372,
"grad_norm": 0.5581590870053381,
"learning_rate": 8.05926288597743e-06,
"loss": 0.5883,
"step": 889
},
{
"epoch": 1.0848903330625508,
"grad_norm": 0.5966885478295298,
"learning_rate": 8.053647993937436e-06,
"loss": 0.6114,
"step": 890
},
{
"epoch": 1.0861088545897644,
"grad_norm": 0.45798182910038504,
"learning_rate": 8.048026953479062e-06,
"loss": 0.5349,
"step": 891
},
{
"epoch": 1.0873273761169782,
"grad_norm": 0.5977190234288519,
"learning_rate": 8.042399775920084e-06,
"loss": 0.5822,
"step": 892
},
{
"epoch": 1.0885458976441917,
"grad_norm": 0.5579549068887683,
"learning_rate": 8.036766472590636e-06,
"loss": 0.5892,
"step": 893
},
{
"epoch": 1.0897644191714053,
"grad_norm": 0.5035624965150097,
"learning_rate": 8.031127054833192e-06,
"loss": 0.5278,
"step": 894
},
{
"epoch": 1.090982940698619,
"grad_norm": 0.569184764093924,
"learning_rate": 8.025481534002524e-06,
"loss": 0.5904,
"step": 895
},
{
"epoch": 1.0922014622258327,
"grad_norm": 0.47339482033152885,
"learning_rate": 8.019829921465703e-06,
"loss": 0.5598,
"step": 896
},
{
"epoch": 1.0934199837530463,
"grad_norm": 0.4510131001279952,
"learning_rate": 8.014172228602063e-06,
"loss": 0.5218,
"step": 897
},
{
"epoch": 1.0946385052802599,
"grad_norm": 0.5778676271124781,
"learning_rate": 8.00850846680318e-06,
"loss": 0.6047,
"step": 898
},
{
"epoch": 1.0958570268074737,
"grad_norm": 0.437095810398411,
"learning_rate": 8.002838647472848e-06,
"loss": 0.5497,
"step": 899
},
{
"epoch": 1.0970755483346872,
"grad_norm": 0.5562520913467127,
"learning_rate": 7.997162782027061e-06,
"loss": 0.5555,
"step": 900
},
{
"epoch": 1.0982940698619008,
"grad_norm": 0.49447252137766545,
"learning_rate": 7.991480881893982e-06,
"loss": 0.5282,
"step": 901
},
{
"epoch": 1.0995125913891146,
"grad_norm": 0.5223776301957348,
"learning_rate": 7.985792958513932e-06,
"loss": 0.5936,
"step": 902
},
{
"epoch": 1.1007311129163282,
"grad_norm": 0.43743454592876513,
"learning_rate": 7.98009902333935e-06,
"loss": 0.5209,
"step": 903
},
{
"epoch": 1.1019496344435418,
"grad_norm": 0.48630293369462313,
"learning_rate": 7.974399087834786e-06,
"loss": 0.5629,
"step": 904
},
{
"epoch": 1.1031681559707556,
"grad_norm": 0.4518898797022784,
"learning_rate": 7.968693163476872e-06,
"loss": 0.5469,
"step": 905
},
{
"epoch": 1.1043866774979691,
"grad_norm": 0.5599257334925746,
"learning_rate": 7.962981261754295e-06,
"loss": 0.6093,
"step": 906
},
{
"epoch": 1.1056051990251827,
"grad_norm": 0.508379851023288,
"learning_rate": 7.957263394167778e-06,
"loss": 0.5502,
"step": 907
},
{
"epoch": 1.1068237205523965,
"grad_norm": 0.46905549399423435,
"learning_rate": 7.951539572230058e-06,
"loss": 0.5498,
"step": 908
},
{
"epoch": 1.10804224207961,
"grad_norm": 0.5331570716206057,
"learning_rate": 7.945809807465857e-06,
"loss": 0.5936,
"step": 909
},
{
"epoch": 1.1092607636068237,
"grad_norm": 0.43447287523932976,
"learning_rate": 7.940074111411869e-06,
"loss": 0.5205,
"step": 910
},
{
"epoch": 1.1104792851340373,
"grad_norm": 0.4675250634574423,
"learning_rate": 7.934332495616723e-06,
"loss": 0.5921,
"step": 911
},
{
"epoch": 1.111697806661251,
"grad_norm": 0.5710382430607513,
"learning_rate": 7.928584971640974e-06,
"loss": 0.5528,
"step": 912
},
{
"epoch": 1.1129163281884646,
"grad_norm": 0.43616129376419555,
"learning_rate": 7.922831551057068e-06,
"loss": 0.5304,
"step": 913
},
{
"epoch": 1.1141348497156782,
"grad_norm": 0.4931780007557348,
"learning_rate": 7.917072245449327e-06,
"loss": 0.5667,
"step": 914
},
{
"epoch": 1.115353371242892,
"grad_norm": 0.46266355232192513,
"learning_rate": 7.91130706641392e-06,
"loss": 0.557,
"step": 915
},
{
"epoch": 1.1165718927701056,
"grad_norm": 0.4769121004651534,
"learning_rate": 7.90553602555884e-06,
"loss": 0.5761,
"step": 916
},
{
"epoch": 1.1177904142973192,
"grad_norm": 0.4543130942521957,
"learning_rate": 7.899759134503888e-06,
"loss": 0.5667,
"step": 917
},
{
"epoch": 1.119008935824533,
"grad_norm": 0.4622820207175306,
"learning_rate": 7.893976404880643e-06,
"loss": 0.5217,
"step": 918
},
{
"epoch": 1.1202274573517466,
"grad_norm": 0.45946359941638926,
"learning_rate": 7.888187848332434e-06,
"loss": 0.552,
"step": 919
},
{
"epoch": 1.1214459788789601,
"grad_norm": 0.5221530283186372,
"learning_rate": 7.88239347651433e-06,
"loss": 0.6037,
"step": 920
},
{
"epoch": 1.122664500406174,
"grad_norm": 0.490304437758209,
"learning_rate": 7.876593301093104e-06,
"loss": 0.5435,
"step": 921
},
{
"epoch": 1.1238830219333875,
"grad_norm": 0.5353872887084351,
"learning_rate": 7.870787333747216e-06,
"loss": 0.5465,
"step": 922
},
{
"epoch": 1.125101543460601,
"grad_norm": 0.5305459219097892,
"learning_rate": 7.864975586166788e-06,
"loss": 0.5401,
"step": 923
},
{
"epoch": 1.126320064987815,
"grad_norm": 0.4522121891276298,
"learning_rate": 7.859158070053578e-06,
"loss": 0.56,
"step": 924
},
{
"epoch": 1.1275385865150285,
"grad_norm": 0.5400674612069138,
"learning_rate": 7.853334797120961e-06,
"loss": 0.5938,
"step": 925
},
{
"epoch": 1.128757108042242,
"grad_norm": 0.4735679351556697,
"learning_rate": 7.847505779093906e-06,
"loss": 0.5517,
"step": 926
},
{
"epoch": 1.1299756295694556,
"grad_norm": 0.48850903658646466,
"learning_rate": 7.841671027708945e-06,
"loss": 0.5805,
"step": 927
},
{
"epoch": 1.1311941510966694,
"grad_norm": 0.4465079826503964,
"learning_rate": 7.835830554714153e-06,
"loss": 0.5332,
"step": 928
},
{
"epoch": 1.132412672623883,
"grad_norm": 0.5630070888376983,
"learning_rate": 7.82998437186913e-06,
"loss": 0.5744,
"step": 929
},
{
"epoch": 1.1336311941510966,
"grad_norm": 0.4850227941162986,
"learning_rate": 7.824132490944968e-06,
"loss": 0.5284,
"step": 930
},
{
"epoch": 1.1348497156783104,
"grad_norm": 0.5473017535296978,
"learning_rate": 7.818274923724237e-06,
"loss": 0.5853,
"step": 931
},
{
"epoch": 1.136068237205524,
"grad_norm": 0.6180360857968815,
"learning_rate": 7.81241168200095e-06,
"loss": 0.6005,
"step": 932
},
{
"epoch": 1.1372867587327375,
"grad_norm": 0.606221772548701,
"learning_rate": 7.80654277758055e-06,
"loss": 0.5534,
"step": 933
},
{
"epoch": 1.1385052802599513,
"grad_norm": 0.4683974906247182,
"learning_rate": 7.80066822227988e-06,
"loss": 0.5557,
"step": 934
},
{
"epoch": 1.139723801787165,
"grad_norm": 0.5733918926578689,
"learning_rate": 7.794788027927165e-06,
"loss": 0.5617,
"step": 935
},
{
"epoch": 1.1409423233143785,
"grad_norm": 0.5394769205967501,
"learning_rate": 7.788902206361974e-06,
"loss": 0.5949,
"step": 936
},
{
"epoch": 1.1421608448415923,
"grad_norm": 0.4616046919338432,
"learning_rate": 7.783010769435216e-06,
"loss": 0.5173,
"step": 937
},
{
"epoch": 1.1433793663688059,
"grad_norm": 0.5796955213884182,
"learning_rate": 7.7771137290091e-06,
"loss": 0.5924,
"step": 938
},
{
"epoch": 1.1445978878960195,
"grad_norm": 0.5847720129488866,
"learning_rate": 7.771211096957125e-06,
"loss": 0.5562,
"step": 939
},
{
"epoch": 1.145816409423233,
"grad_norm": 0.5171314095714995,
"learning_rate": 7.765302885164038e-06,
"loss": 0.5548,
"step": 940
},
{
"epoch": 1.1470349309504468,
"grad_norm": 0.49901458608547633,
"learning_rate": 7.759389105525832e-06,
"loss": 0.5725,
"step": 941
},
{
"epoch": 1.1482534524776604,
"grad_norm": 0.5352472484551857,
"learning_rate": 7.753469769949701e-06,
"loss": 0.5582,
"step": 942
},
{
"epoch": 1.149471974004874,
"grad_norm": 0.6669984026812862,
"learning_rate": 7.747544890354031e-06,
"loss": 0.6313,
"step": 943
},
{
"epoch": 1.1506904955320878,
"grad_norm": 0.4640017618478166,
"learning_rate": 7.74161447866837e-06,
"loss": 0.5275,
"step": 944
},
{
"epoch": 1.1519090170593014,
"grad_norm": 0.5032260303359475,
"learning_rate": 7.735678546833403e-06,
"loss": 0.5405,
"step": 945
},
{
"epoch": 1.153127538586515,
"grad_norm": 0.545384651096698,
"learning_rate": 7.729737106800932e-06,
"loss": 0.5856,
"step": 946
},
{
"epoch": 1.1543460601137288,
"grad_norm": 0.5735240939112272,
"learning_rate": 7.723790170533848e-06,
"loss": 0.571,
"step": 947
},
{
"epoch": 1.1555645816409423,
"grad_norm": 0.4552234746793405,
"learning_rate": 7.717837750006106e-06,
"loss": 0.5067,
"step": 948
},
{
"epoch": 1.156783103168156,
"grad_norm": 0.49406048197174507,
"learning_rate": 7.71187985720271e-06,
"loss": 0.592,
"step": 949
},
{
"epoch": 1.1580016246953697,
"grad_norm": 0.5489847996831881,
"learning_rate": 7.705916504119679e-06,
"loss": 0.5716,
"step": 950
},
{
"epoch": 1.1592201462225833,
"grad_norm": 0.48074624532511123,
"learning_rate": 7.699947702764021e-06,
"loss": 0.5287,
"step": 951
},
{
"epoch": 1.1604386677497969,
"grad_norm": 0.4833115004977427,
"learning_rate": 7.693973465153724e-06,
"loss": 0.5667,
"step": 952
},
{
"epoch": 1.1616571892770104,
"grad_norm": 0.5472052571967937,
"learning_rate": 7.68799380331771e-06,
"loss": 0.5806,
"step": 953
},
{
"epoch": 1.1628757108042242,
"grad_norm": 0.4381241429842595,
"learning_rate": 7.682008729295834e-06,
"loss": 0.5448,
"step": 954
},
{
"epoch": 1.1640942323314378,
"grad_norm": 0.6129536550799662,
"learning_rate": 7.676018255138841e-06,
"loss": 0.6091,
"step": 955
},
{
"epoch": 1.1653127538586514,
"grad_norm": 0.524234969513479,
"learning_rate": 7.67002239290835e-06,
"loss": 0.5363,
"step": 956
},
{
"epoch": 1.1665312753858652,
"grad_norm": 0.43755065750263256,
"learning_rate": 7.664021154676828e-06,
"loss": 0.5683,
"step": 957
},
{
"epoch": 1.1677497969130788,
"grad_norm": 0.4767439220213808,
"learning_rate": 7.658014552527572e-06,
"loss": 0.5201,
"step": 958
},
{
"epoch": 1.1689683184402924,
"grad_norm": 0.6051473086713034,
"learning_rate": 7.652002598554675e-06,
"loss": 0.6148,
"step": 959
},
{
"epoch": 1.1701868399675062,
"grad_norm": 0.442810424258257,
"learning_rate": 7.645985304863004e-06,
"loss": 0.5089,
"step": 960
},
{
"epoch": 1.1714053614947197,
"grad_norm": 0.5212534237408961,
"learning_rate": 7.639962683568178e-06,
"loss": 0.6398,
"step": 961
},
{
"epoch": 1.1726238830219333,
"grad_norm": 0.4782128214916858,
"learning_rate": 7.633934746796545e-06,
"loss": 0.5247,
"step": 962
},
{
"epoch": 1.1738424045491471,
"grad_norm": 0.555997733569589,
"learning_rate": 7.627901506685157e-06,
"loss": 0.57,
"step": 963
},
{
"epoch": 1.1750609260763607,
"grad_norm": 0.4524690440478936,
"learning_rate": 7.621862975381739e-06,
"loss": 0.5032,
"step": 964
},
{
"epoch": 1.1762794476035743,
"grad_norm": 0.5558207018565952,
"learning_rate": 7.615819165044671e-06,
"loss": 0.6055,
"step": 965
},
{
"epoch": 1.1774979691307879,
"grad_norm": 0.5285401986639633,
"learning_rate": 7.609770087842969e-06,
"loss": 0.5232,
"step": 966
},
{
"epoch": 1.1787164906580017,
"grad_norm": 0.4906926197877719,
"learning_rate": 7.603715755956243e-06,
"loss": 0.6184,
"step": 967
},
{
"epoch": 1.1799350121852152,
"grad_norm": 0.5453800647325697,
"learning_rate": 7.597656181574691e-06,
"loss": 0.5449,
"step": 968
},
{
"epoch": 1.181153533712429,
"grad_norm": 0.532023507332386,
"learning_rate": 7.5915913768990615e-06,
"loss": 0.574,
"step": 969
},
{
"epoch": 1.1823720552396426,
"grad_norm": 0.46068002444123424,
"learning_rate": 7.585521354140638e-06,
"loss": 0.5616,
"step": 970
},
{
"epoch": 1.1835905767668562,
"grad_norm": 0.45366600351939207,
"learning_rate": 7.57944612552121e-06,
"loss": 0.5576,
"step": 971
},
{
"epoch": 1.1848090982940698,
"grad_norm": 0.5035963241142227,
"learning_rate": 7.573365703273045e-06,
"loss": 0.5842,
"step": 972
},
{
"epoch": 1.1860276198212836,
"grad_norm": 0.46429524269523453,
"learning_rate": 7.567280099638874e-06,
"loss": 0.5603,
"step": 973
},
{
"epoch": 1.1872461413484972,
"grad_norm": 0.4391995658392802,
"learning_rate": 7.561189326871854e-06,
"loss": 0.5483,
"step": 974
},
{
"epoch": 1.1884646628757107,
"grad_norm": 0.5688078918566764,
"learning_rate": 7.555093397235553e-06,
"loss": 0.6145,
"step": 975
},
{
"epoch": 1.1896831844029245,
"grad_norm": 0.4535069143341333,
"learning_rate": 7.548992323003923e-06,
"loss": 0.529,
"step": 976
},
{
"epoch": 1.190901705930138,
"grad_norm": 0.5610828923463264,
"learning_rate": 7.542886116461272e-06,
"loss": 0.5604,
"step": 977
},
{
"epoch": 1.1921202274573517,
"grad_norm": 0.49771566362561265,
"learning_rate": 7.536774789902246e-06,
"loss": 0.5339,
"step": 978
},
{
"epoch": 1.1933387489845655,
"grad_norm": 0.5055933911391732,
"learning_rate": 7.530658355631795e-06,
"loss": 0.5307,
"step": 979
},
{
"epoch": 1.194557270511779,
"grad_norm": 0.5075577294535538,
"learning_rate": 7.524536825965154e-06,
"loss": 0.5604,
"step": 980
},
{
"epoch": 1.1957757920389926,
"grad_norm": 0.5520230309503728,
"learning_rate": 7.518410213227823e-06,
"loss": 0.6162,
"step": 981
},
{
"epoch": 1.1969943135662064,
"grad_norm": 0.5218152039597276,
"learning_rate": 7.512278529755529e-06,
"loss": 0.5613,
"step": 982
},
{
"epoch": 1.19821283509342,
"grad_norm": 0.4971095496314555,
"learning_rate": 7.506141787894214e-06,
"loss": 0.5643,
"step": 983
},
{
"epoch": 1.1994313566206336,
"grad_norm": 0.5351931771239321,
"learning_rate": 7.500000000000001e-06,
"loss": 0.5365,
"step": 984
},
{
"epoch": 1.2006498781478472,
"grad_norm": 0.49713221603010127,
"learning_rate": 7.493853178439177e-06,
"loss": 0.5276,
"step": 985
},
{
"epoch": 1.201868399675061,
"grad_norm": 0.49687942243856253,
"learning_rate": 7.48770133558816e-06,
"loss": 0.5705,
"step": 986
},
{
"epoch": 1.2030869212022746,
"grad_norm": 0.4638420387813551,
"learning_rate": 7.481544483833485e-06,
"loss": 0.5143,
"step": 987
},
{
"epoch": 1.2043054427294881,
"grad_norm": 0.5737984880330318,
"learning_rate": 7.475382635571761e-06,
"loss": 0.6105,
"step": 988
},
{
"epoch": 1.205523964256702,
"grad_norm": 0.4548720894167483,
"learning_rate": 7.4692158032096706e-06,
"loss": 0.5409,
"step": 989
},
{
"epoch": 1.2067424857839155,
"grad_norm": 0.49711497244164915,
"learning_rate": 7.463043999163919e-06,
"loss": 0.5803,
"step": 990
},
{
"epoch": 1.207961007311129,
"grad_norm": 0.47268020267724503,
"learning_rate": 7.456867235861231e-06,
"loss": 0.563,
"step": 991
},
{
"epoch": 1.209179528838343,
"grad_norm": 0.4431695796449243,
"learning_rate": 7.450685525738315e-06,
"loss": 0.5458,
"step": 992
},
{
"epoch": 1.2103980503655565,
"grad_norm": 0.5514220959709781,
"learning_rate": 7.444498881241835e-06,
"loss": 0.5719,
"step": 993
},
{
"epoch": 1.21161657189277,
"grad_norm": 0.48730651156910637,
"learning_rate": 7.4383073148283945e-06,
"loss": 0.5547,
"step": 994
},
{
"epoch": 1.2128350934199839,
"grad_norm": 0.48026701020561735,
"learning_rate": 7.432110838964508e-06,
"loss": 0.5446,
"step": 995
},
{
"epoch": 1.2140536149471974,
"grad_norm": 0.49526550877005804,
"learning_rate": 7.4259094661265685e-06,
"loss": 0.5539,
"step": 996
},
{
"epoch": 1.215272136474411,
"grad_norm": 0.5033075517007225,
"learning_rate": 7.419703208800839e-06,
"loss": 0.5885,
"step": 997
},
{
"epoch": 1.2164906580016246,
"grad_norm": 0.4591330610679407,
"learning_rate": 7.413492079483405e-06,
"loss": 0.4958,
"step": 998
},
{
"epoch": 1.2177091795288384,
"grad_norm": 0.5435516527726211,
"learning_rate": 7.407276090680173e-06,
"loss": 0.5941,
"step": 999
},
{
"epoch": 1.218927701056052,
"grad_norm": 0.5014818934661753,
"learning_rate": 7.401055254906829e-06,
"loss": 0.5674,
"step": 1000
},
{
"epoch": 1.2201462225832655,
"grad_norm": 0.5506374382220622,
"learning_rate": 7.394829584688816e-06,
"loss": 0.5623,
"step": 1001
},
{
"epoch": 1.2213647441104794,
"grad_norm": 0.47988582460651985,
"learning_rate": 7.388599092561315e-06,
"loss": 0.579,
"step": 1002
},
{
"epoch": 1.222583265637693,
"grad_norm": 0.5116646928435937,
"learning_rate": 7.382363791069214e-06,
"loss": 0.5789,
"step": 1003
},
{
"epoch": 1.2238017871649065,
"grad_norm": 0.5815639981335669,
"learning_rate": 7.376123692767084e-06,
"loss": 0.5306,
"step": 1004
},
{
"epoch": 1.2250203086921203,
"grad_norm": 0.47545875532554605,
"learning_rate": 7.369878810219154e-06,
"loss": 0.574,
"step": 1005
},
{
"epoch": 1.2262388302193339,
"grad_norm": 0.5843762256050973,
"learning_rate": 7.363629155999289e-06,
"loss": 0.5835,
"step": 1006
},
{
"epoch": 1.2274573517465475,
"grad_norm": 0.49038029629420044,
"learning_rate": 7.357374742690956e-06,
"loss": 0.5277,
"step": 1007
},
{
"epoch": 1.2286758732737613,
"grad_norm": 0.4825203440227731,
"learning_rate": 7.351115582887212e-06,
"loss": 0.5749,
"step": 1008
},
{
"epoch": 1.2298943948009748,
"grad_norm": 0.5230621508499962,
"learning_rate": 7.344851689190662e-06,
"loss": 0.5494,
"step": 1009
},
{
"epoch": 1.2311129163281884,
"grad_norm": 0.49942387299855917,
"learning_rate": 7.33858307421345e-06,
"loss": 0.5684,
"step": 1010
},
{
"epoch": 1.232331437855402,
"grad_norm": 0.5550781071831415,
"learning_rate": 7.3323097505772225e-06,
"loss": 0.5552,
"step": 1011
},
{
"epoch": 1.2335499593826158,
"grad_norm": 0.5160851429477965,
"learning_rate": 7.326031730913107e-06,
"loss": 0.5365,
"step": 1012
},
{
"epoch": 1.2347684809098294,
"grad_norm": 0.5594132080926748,
"learning_rate": 7.319749027861687e-06,
"loss": 0.5805,
"step": 1013
},
{
"epoch": 1.235987002437043,
"grad_norm": 0.5035664881102385,
"learning_rate": 7.313461654072974e-06,
"loss": 0.5572,
"step": 1014
},
{
"epoch": 1.2372055239642568,
"grad_norm": 0.5011647298301126,
"learning_rate": 7.3071696222063874e-06,
"loss": 0.5736,
"step": 1015
},
{
"epoch": 1.2384240454914703,
"grad_norm": 0.5003447796526637,
"learning_rate": 7.300872944930724e-06,
"loss": 0.5724,
"step": 1016
},
{
"epoch": 1.239642567018684,
"grad_norm": 0.4488541730554654,
"learning_rate": 7.2945716349241305e-06,
"loss": 0.5271,
"step": 1017
},
{
"epoch": 1.2408610885458977,
"grad_norm": 0.48397897498100484,
"learning_rate": 7.288265704874089e-06,
"loss": 0.5702,
"step": 1018
},
{
"epoch": 1.2420796100731113,
"grad_norm": 0.46076984680494393,
"learning_rate": 7.281955167477372e-06,
"loss": 0.5235,
"step": 1019
},
{
"epoch": 1.2432981316003249,
"grad_norm": 0.46851694123351845,
"learning_rate": 7.2756400354400445e-06,
"loss": 0.5237,
"step": 1020
},
{
"epoch": 1.2445166531275387,
"grad_norm": 0.48677378118465786,
"learning_rate": 7.2693203214774084e-06,
"loss": 0.6109,
"step": 1021
},
{
"epoch": 1.2457351746547523,
"grad_norm": 0.4780766187805638,
"learning_rate": 7.262996038314001e-06,
"loss": 0.5765,
"step": 1022
},
{
"epoch": 1.2469536961819658,
"grad_norm": 0.4640167779478858,
"learning_rate": 7.2566671986835515e-06,
"loss": 0.5642,
"step": 1023
},
{
"epoch": 1.2481722177091794,
"grad_norm": 0.48778459720464146,
"learning_rate": 7.25033381532897e-06,
"loss": 0.4946,
"step": 1024
},
{
"epoch": 1.2493907392363932,
"grad_norm": 0.4659728876017271,
"learning_rate": 7.243995901002312e-06,
"loss": 0.5638,
"step": 1025
},
{
"epoch": 1.2506092607636068,
"grad_norm": 0.4038916973792116,
"learning_rate": 7.237653468464756e-06,
"loss": 0.5607,
"step": 1026
},
{
"epoch": 1.2518277822908206,
"grad_norm": 0.5567339438269147,
"learning_rate": 7.231306530486579e-06,
"loss": 0.5561,
"step": 1027
},
{
"epoch": 1.2530463038180342,
"grad_norm": 0.4641852200663108,
"learning_rate": 7.224955099847129e-06,
"loss": 0.6096,
"step": 1028
},
{
"epoch": 1.2542648253452477,
"grad_norm": 0.4411515265169084,
"learning_rate": 7.218599189334799e-06,
"loss": 0.4709,
"step": 1029
},
{
"epoch": 1.2554833468724613,
"grad_norm": 0.5058133934757223,
"learning_rate": 7.212238811747003e-06,
"loss": 0.5904,
"step": 1030
},
{
"epoch": 1.2567018683996751,
"grad_norm": 0.41291563737696013,
"learning_rate": 7.205873979890151e-06,
"loss": 0.5436,
"step": 1031
},
{
"epoch": 1.2579203899268887,
"grad_norm": 0.4994662597356207,
"learning_rate": 7.199504706579617e-06,
"loss": 0.6102,
"step": 1032
},
{
"epoch": 1.2591389114541023,
"grad_norm": 0.419031706073167,
"learning_rate": 7.193131004639722e-06,
"loss": 0.5104,
"step": 1033
},
{
"epoch": 1.260357432981316,
"grad_norm": 0.4373098819276125,
"learning_rate": 7.186752886903702e-06,
"loss": 0.5539,
"step": 1034
},
{
"epoch": 1.2615759545085297,
"grad_norm": 0.42312469752099624,
"learning_rate": 7.180370366213684e-06,
"loss": 0.5685,
"step": 1035
},
{
"epoch": 1.2627944760357432,
"grad_norm": 0.4976440200214435,
"learning_rate": 7.173983455420659e-06,
"loss": 0.5886,
"step": 1036
},
{
"epoch": 1.2640129975629568,
"grad_norm": 0.4458571719063019,
"learning_rate": 7.167592167384461e-06,
"loss": 0.5481,
"step": 1037
},
{
"epoch": 1.2652315190901706,
"grad_norm": 0.5011046191959967,
"learning_rate": 7.161196514973735e-06,
"loss": 0.591,
"step": 1038
},
{
"epoch": 1.2664500406173842,
"grad_norm": 0.49133842958144974,
"learning_rate": 7.154796511065914e-06,
"loss": 0.5523,
"step": 1039
},
{
"epoch": 1.267668562144598,
"grad_norm": 0.47022131838731085,
"learning_rate": 7.148392168547191e-06,
"loss": 0.5736,
"step": 1040
},
{
"epoch": 1.2688870836718116,
"grad_norm": 0.41386960779050597,
"learning_rate": 7.141983500312498e-06,
"loss": 0.5529,
"step": 1041
},
{
"epoch": 1.2701056051990252,
"grad_norm": 0.44977069875020453,
"learning_rate": 7.135570519265473e-06,
"loss": 0.548,
"step": 1042
},
{
"epoch": 1.2713241267262387,
"grad_norm": 0.505607270978524,
"learning_rate": 7.129153238318441e-06,
"loss": 0.5685,
"step": 1043
},
{
"epoch": 1.2725426482534525,
"grad_norm": 0.4473291490790123,
"learning_rate": 7.122731670392381e-06,
"loss": 0.5914,
"step": 1044
},
{
"epoch": 1.2737611697806661,
"grad_norm": 0.42761462653683685,
"learning_rate": 7.116305828416907e-06,
"loss": 0.5596,
"step": 1045
},
{
"epoch": 1.2749796913078797,
"grad_norm": 0.5367569602527996,
"learning_rate": 7.109875725330239e-06,
"loss": 0.5705,
"step": 1046
},
{
"epoch": 1.2761982128350935,
"grad_norm": 0.4239534982631823,
"learning_rate": 7.1034413740791705e-06,
"loss": 0.4988,
"step": 1047
},
{
"epoch": 1.277416734362307,
"grad_norm": 0.5193109373280052,
"learning_rate": 7.097002787619059e-06,
"loss": 0.5812,
"step": 1048
},
{
"epoch": 1.2786352558895206,
"grad_norm": 0.5147411712979314,
"learning_rate": 7.090559978913781e-06,
"loss": 0.5916,
"step": 1049
},
{
"epoch": 1.2798537774167342,
"grad_norm": 0.4224143215053458,
"learning_rate": 7.0841129609357165e-06,
"loss": 0.4905,
"step": 1050
},
{
"epoch": 1.281072298943948,
"grad_norm": 0.47217055541643876,
"learning_rate": 7.0776617466657196e-06,
"loss": 0.5592,
"step": 1051
},
{
"epoch": 1.2822908204711616,
"grad_norm": 0.4826081486423026,
"learning_rate": 7.071206349093097e-06,
"loss": 0.5822,
"step": 1052
},
{
"epoch": 1.2835093419983754,
"grad_norm": 0.42489592319050484,
"learning_rate": 7.064746781215578e-06,
"loss": 0.539,
"step": 1053
},
{
"epoch": 1.284727863525589,
"grad_norm": 0.4378036437269882,
"learning_rate": 7.058283056039283e-06,
"loss": 0.5224,
"step": 1054
},
{
"epoch": 1.2859463850528026,
"grad_norm": 0.5090205584091956,
"learning_rate": 7.051815186578711e-06,
"loss": 0.6022,
"step": 1055
},
{
"epoch": 1.2871649065800161,
"grad_norm": 0.421460820182392,
"learning_rate": 7.045343185856701e-06,
"loss": 0.5371,
"step": 1056
},
{
"epoch": 1.28838342810723,
"grad_norm": 0.45568572401745694,
"learning_rate": 7.038867066904407e-06,
"loss": 0.5549,
"step": 1057
},
{
"epoch": 1.2896019496344435,
"grad_norm": 0.4249363344861208,
"learning_rate": 7.032386842761282e-06,
"loss": 0.5434,
"step": 1058
},
{
"epoch": 1.2908204711616573,
"grad_norm": 0.4562034562178344,
"learning_rate": 7.025902526475039e-06,
"loss": 0.5494,
"step": 1059
},
{
"epoch": 1.292038992688871,
"grad_norm": 0.5341880271433396,
"learning_rate": 7.0194141311016336e-06,
"loss": 0.613,
"step": 1060
},
{
"epoch": 1.2932575142160845,
"grad_norm": 0.4504428137448532,
"learning_rate": 7.0129216697052345e-06,
"loss": 0.5016,
"step": 1061
},
{
"epoch": 1.294476035743298,
"grad_norm": 0.48710310604219204,
"learning_rate": 7.006425155358195e-06,
"loss": 0.5966,
"step": 1062
},
{
"epoch": 1.2956945572705119,
"grad_norm": 0.4178638324054384,
"learning_rate": 6.99992460114103e-06,
"loss": 0.518,
"step": 1063
},
{
"epoch": 1.2969130787977254,
"grad_norm": 0.4592904842250764,
"learning_rate": 6.993420020142389e-06,
"loss": 0.5731,
"step": 1064
},
{
"epoch": 1.298131600324939,
"grad_norm": 0.44542709276757847,
"learning_rate": 6.986911425459028e-06,
"loss": 0.5713,
"step": 1065
},
{
"epoch": 1.2993501218521528,
"grad_norm": 0.43271038208431817,
"learning_rate": 6.980398830195785e-06,
"loss": 0.5394,
"step": 1066
},
{
"epoch": 1.3005686433793664,
"grad_norm": 0.42858083262689106,
"learning_rate": 6.9738822474655555e-06,
"loss": 0.5593,
"step": 1067
},
{
"epoch": 1.30178716490658,
"grad_norm": 0.45958843226910784,
"learning_rate": 6.967361690389258e-06,
"loss": 0.6054,
"step": 1068
},
{
"epoch": 1.3030056864337936,
"grad_norm": 0.4289960695158536,
"learning_rate": 6.960837172095822e-06,
"loss": 0.5548,
"step": 1069
},
{
"epoch": 1.3042242079610074,
"grad_norm": 0.47468738466334404,
"learning_rate": 6.954308705722142e-06,
"loss": 0.572,
"step": 1070
},
{
"epoch": 1.305442729488221,
"grad_norm": 0.47013938140744177,
"learning_rate": 6.947776304413072e-06,
"loss": 0.5705,
"step": 1071
},
{
"epoch": 1.3066612510154347,
"grad_norm": 0.42486037624655837,
"learning_rate": 6.941239981321379e-06,
"loss": 0.5541,
"step": 1072
},
{
"epoch": 1.3078797725426483,
"grad_norm": 0.49246997027712336,
"learning_rate": 6.9346997496077365e-06,
"loss": 0.5955,
"step": 1073
},
{
"epoch": 1.309098294069862,
"grad_norm": 0.4472253157123058,
"learning_rate": 6.92815562244068e-06,
"loss": 0.5347,
"step": 1074
},
{
"epoch": 1.3103168155970755,
"grad_norm": 0.4795845777067209,
"learning_rate": 6.921607612996591e-06,
"loss": 0.544,
"step": 1075
},
{
"epoch": 1.3115353371242893,
"grad_norm": 0.4858592748082412,
"learning_rate": 6.915055734459669e-06,
"loss": 0.5825,
"step": 1076
},
{
"epoch": 1.3127538586515028,
"grad_norm": 0.440529958757846,
"learning_rate": 6.908500000021905e-06,
"loss": 0.4894,
"step": 1077
},
{
"epoch": 1.3139723801787164,
"grad_norm": 0.49777302193386763,
"learning_rate": 6.9019404228830465e-06,
"loss": 0.6143,
"step": 1078
},
{
"epoch": 1.3151909017059302,
"grad_norm": 0.42490987305110145,
"learning_rate": 6.895377016250589e-06,
"loss": 0.5383,
"step": 1079
},
{
"epoch": 1.3164094232331438,
"grad_norm": 0.4250241686101231,
"learning_rate": 6.888809793339729e-06,
"loss": 0.5343,
"step": 1080
},
{
"epoch": 1.3176279447603574,
"grad_norm": 0.47908573640303,
"learning_rate": 6.882238767373352e-06,
"loss": 0.5766,
"step": 1081
},
{
"epoch": 1.318846466287571,
"grad_norm": 0.4393923199378749,
"learning_rate": 6.875663951582e-06,
"loss": 0.518,
"step": 1082
},
{
"epoch": 1.3200649878147848,
"grad_norm": 0.5298761025962999,
"learning_rate": 6.869085359203844e-06,
"loss": 0.5687,
"step": 1083
},
{
"epoch": 1.3212835093419983,
"grad_norm": 0.4742825873608696,
"learning_rate": 6.862503003484662e-06,
"loss": 0.5804,
"step": 1084
},
{
"epoch": 1.3225020308692121,
"grad_norm": 0.4633225475847929,
"learning_rate": 6.855916897677806e-06,
"loss": 0.556,
"step": 1085
},
{
"epoch": 1.3237205523964257,
"grad_norm": 0.5225783981999679,
"learning_rate": 6.849327055044182e-06,
"loss": 0.5814,
"step": 1086
},
{
"epoch": 1.3249390739236393,
"grad_norm": 0.4288152429153542,
"learning_rate": 6.842733488852218e-06,
"loss": 0.5576,
"step": 1087
},
{
"epoch": 1.3261575954508529,
"grad_norm": 0.5221719185878941,
"learning_rate": 6.836136212377839e-06,
"loss": 0.5535,
"step": 1088
},
{
"epoch": 1.3273761169780667,
"grad_norm": 0.5296939222461858,
"learning_rate": 6.82953523890444e-06,
"loss": 0.5367,
"step": 1089
},
{
"epoch": 1.3285946385052803,
"grad_norm": 0.4975997883807605,
"learning_rate": 6.822930581722864e-06,
"loss": 0.5888,
"step": 1090
},
{
"epoch": 1.3298131600324938,
"grad_norm": 0.5680495533922292,
"learning_rate": 6.8163222541313646e-06,
"loss": 0.5797,
"step": 1091
},
{
"epoch": 1.3310316815597076,
"grad_norm": 0.4587905010305772,
"learning_rate": 6.80971026943559e-06,
"loss": 0.5202,
"step": 1092
},
{
"epoch": 1.3322502030869212,
"grad_norm": 0.551574996506335,
"learning_rate": 6.803094640948553e-06,
"loss": 0.5777,
"step": 1093
},
{
"epoch": 1.3334687246141348,
"grad_norm": 0.5703735684360373,
"learning_rate": 6.796475381990598e-06,
"loss": 0.5764,
"step": 1094
},
{
"epoch": 1.3346872461413484,
"grad_norm": 0.4925036270565778,
"learning_rate": 6.789852505889384e-06,
"loss": 0.528,
"step": 1095
},
{
"epoch": 1.3359057676685622,
"grad_norm": 0.47585637004253845,
"learning_rate": 6.78322602597985e-06,
"loss": 0.5379,
"step": 1096
},
{
"epoch": 1.3371242891957758,
"grad_norm": 0.5098349120934949,
"learning_rate": 6.776595955604192e-06,
"loss": 0.5564,
"step": 1097
},
{
"epoch": 1.3383428107229896,
"grad_norm": 0.45580609051116194,
"learning_rate": 6.769962308111839e-06,
"loss": 0.5753,
"step": 1098
},
{
"epoch": 1.3395613322502031,
"grad_norm": 0.5171674920493432,
"learning_rate": 6.7633250968594145e-06,
"loss": 0.5949,
"step": 1099
},
{
"epoch": 1.3407798537774167,
"grad_norm": 0.4877120256762604,
"learning_rate": 6.756684335210724e-06,
"loss": 0.515,
"step": 1100
},
{
"epoch": 1.3419983753046303,
"grad_norm": 0.4814845112101113,
"learning_rate": 6.750040036536718e-06,
"loss": 0.5684,
"step": 1101
},
{
"epoch": 1.343216896831844,
"grad_norm": 0.5705372014720597,
"learning_rate": 6.743392214215473e-06,
"loss": 0.6171,
"step": 1102
},
{
"epoch": 1.3444354183590577,
"grad_norm": 0.41955386315882853,
"learning_rate": 6.736740881632156e-06,
"loss": 0.5509,
"step": 1103
},
{
"epoch": 1.3456539398862712,
"grad_norm": 0.5028763983027598,
"learning_rate": 6.7300860521790034e-06,
"loss": 0.5519,
"step": 1104
},
{
"epoch": 1.346872461413485,
"grad_norm": 0.4751712922206779,
"learning_rate": 6.723427739255291e-06,
"loss": 0.5871,
"step": 1105
},
{
"epoch": 1.3480909829406986,
"grad_norm": 0.44250278427343415,
"learning_rate": 6.716765956267313e-06,
"loss": 0.5563,
"step": 1106
},
{
"epoch": 1.3493095044679122,
"grad_norm": 0.42447760271061347,
"learning_rate": 6.710100716628345e-06,
"loss": 0.5246,
"step": 1107
},
{
"epoch": 1.3505280259951258,
"grad_norm": 0.4884332973463199,
"learning_rate": 6.7034320337586236e-06,
"loss": 0.5906,
"step": 1108
},
{
"epoch": 1.3517465475223396,
"grad_norm": 0.47868995347975324,
"learning_rate": 6.696759921085321e-06,
"loss": 0.56,
"step": 1109
},
{
"epoch": 1.3529650690495532,
"grad_norm": 0.4891201962969247,
"learning_rate": 6.690084392042514e-06,
"loss": 0.5387,
"step": 1110
},
{
"epoch": 1.354183590576767,
"grad_norm": 0.4799279297524152,
"learning_rate": 6.683405460071158e-06,
"loss": 0.5584,
"step": 1111
},
{
"epoch": 1.3554021121039805,
"grad_norm": 0.468730614409241,
"learning_rate": 6.676723138619056e-06,
"loss": 0.5639,
"step": 1112
},
{
"epoch": 1.3566206336311941,
"grad_norm": 0.4691028535874034,
"learning_rate": 6.670037441140844e-06,
"loss": 0.5249,
"step": 1113
},
{
"epoch": 1.3578391551584077,
"grad_norm": 0.5055139224683095,
"learning_rate": 6.663348381097949e-06,
"loss": 0.5668,
"step": 1114
},
{
"epoch": 1.3590576766856215,
"grad_norm": 0.4641835440622289,
"learning_rate": 6.656655971958569e-06,
"loss": 0.5168,
"step": 1115
},
{
"epoch": 1.360276198212835,
"grad_norm": 0.5446202821644559,
"learning_rate": 6.649960227197648e-06,
"loss": 0.613,
"step": 1116
},
{
"epoch": 1.3614947197400489,
"grad_norm": 0.4947120887114883,
"learning_rate": 6.6432611602968445e-06,
"loss": 0.5567,
"step": 1117
},
{
"epoch": 1.3627132412672625,
"grad_norm": 0.43139439093199355,
"learning_rate": 6.636558784744507e-06,
"loss": 0.5242,
"step": 1118
},
{
"epoch": 1.363931762794476,
"grad_norm": 0.5614131203778131,
"learning_rate": 6.629853114035643e-06,
"loss": 0.5333,
"step": 1119
},
{
"epoch": 1.3651502843216896,
"grad_norm": 0.47984259019139797,
"learning_rate": 6.623144161671899e-06,
"loss": 0.6073,
"step": 1120
},
{
"epoch": 1.3663688058489034,
"grad_norm": 0.48772092734746075,
"learning_rate": 6.616431941161525e-06,
"loss": 0.519,
"step": 1121
},
{
"epoch": 1.367587327376117,
"grad_norm": 0.4944650047147664,
"learning_rate": 6.609716466019356e-06,
"loss": 0.5982,
"step": 1122
},
{
"epoch": 1.3688058489033306,
"grad_norm": 0.4514730750801606,
"learning_rate": 6.602997749766773e-06,
"loss": 0.5215,
"step": 1123
},
{
"epoch": 1.3700243704305444,
"grad_norm": 0.4806270554361702,
"learning_rate": 6.596275805931691e-06,
"loss": 0.6507,
"step": 1124
},
{
"epoch": 1.371242891957758,
"grad_norm": 0.42879599863826967,
"learning_rate": 6.589550648048517e-06,
"loss": 0.5263,
"step": 1125
},
{
"epoch": 1.3724614134849715,
"grad_norm": 0.5002076010149914,
"learning_rate": 6.582822289658134e-06,
"loss": 0.544,
"step": 1126
},
{
"epoch": 1.373679935012185,
"grad_norm": 0.49996651647577767,
"learning_rate": 6.576090744307866e-06,
"loss": 0.6115,
"step": 1127
},
{
"epoch": 1.374898456539399,
"grad_norm": 0.47192523752862847,
"learning_rate": 6.569356025551454e-06,
"loss": 0.5044,
"step": 1128
},
{
"epoch": 1.3761169780666125,
"grad_norm": 0.5486850848812702,
"learning_rate": 6.562618146949033e-06,
"loss": 0.5963,
"step": 1129
},
{
"epoch": 1.3773354995938263,
"grad_norm": 0.44516782959863155,
"learning_rate": 6.5558771220670935e-06,
"loss": 0.5424,
"step": 1130
},
{
"epoch": 1.3785540211210399,
"grad_norm": 0.49271550503516953,
"learning_rate": 6.5491329644784655e-06,
"loss": 0.5241,
"step": 1131
},
{
"epoch": 1.3797725426482534,
"grad_norm": 0.5660845065509308,
"learning_rate": 6.542385687762287e-06,
"loss": 0.6154,
"step": 1132
},
{
"epoch": 1.380991064175467,
"grad_norm": 0.4271740206518289,
"learning_rate": 6.53563530550397e-06,
"loss": 0.4689,
"step": 1133
},
{
"epoch": 1.3822095857026808,
"grad_norm": 0.5195908868358481,
"learning_rate": 6.5288818312951886e-06,
"loss": 0.5462,
"step": 1134
},
{
"epoch": 1.3834281072298944,
"grad_norm": 0.5034196032593611,
"learning_rate": 6.5221252787338365e-06,
"loss": 0.587,
"step": 1135
},
{
"epoch": 1.384646628757108,
"grad_norm": 0.5196583715973591,
"learning_rate": 6.515365661424007e-06,
"loss": 0.577,
"step": 1136
},
{
"epoch": 1.3858651502843218,
"grad_norm": 0.47148796040432117,
"learning_rate": 6.508602992975963e-06,
"loss": 0.5516,
"step": 1137
},
{
"epoch": 1.3870836718115354,
"grad_norm": 0.47240263639853314,
"learning_rate": 6.501837287006112e-06,
"loss": 0.5017,
"step": 1138
},
{
"epoch": 1.388302193338749,
"grad_norm": 0.4848195827079731,
"learning_rate": 6.495068557136979e-06,
"loss": 0.6068,
"step": 1139
},
{
"epoch": 1.3895207148659625,
"grad_norm": 0.464916968432065,
"learning_rate": 6.4882968169971734e-06,
"loss": 0.5114,
"step": 1140
},
{
"epoch": 1.3907392363931763,
"grad_norm": 0.4672169290921844,
"learning_rate": 6.4815220802213705e-06,
"loss": 0.571,
"step": 1141
},
{
"epoch": 1.39195775792039,
"grad_norm": 0.45354629086847004,
"learning_rate": 6.474744360450274e-06,
"loss": 0.559,
"step": 1142
},
{
"epoch": 1.3931762794476037,
"grad_norm": 0.49697460412752753,
"learning_rate": 6.467963671330602e-06,
"loss": 0.5712,
"step": 1143
},
{
"epoch": 1.3943948009748173,
"grad_norm": 0.42597106705666193,
"learning_rate": 6.461180026515038e-06,
"loss": 0.4836,
"step": 1144
},
{
"epoch": 1.3956133225020309,
"grad_norm": 0.5696838757187256,
"learning_rate": 6.45439343966223e-06,
"loss": 0.6293,
"step": 1145
},
{
"epoch": 1.3968318440292444,
"grad_norm": 0.44015111009766694,
"learning_rate": 6.447603924436744e-06,
"loss": 0.5672,
"step": 1146
},
{
"epoch": 1.3980503655564582,
"grad_norm": 0.5171923824405892,
"learning_rate": 6.44081149450904e-06,
"loss": 0.543,
"step": 1147
},
{
"epoch": 1.3992688870836718,
"grad_norm": 0.4861104307146921,
"learning_rate": 6.434016163555452e-06,
"loss": 0.5536,
"step": 1148
},
{
"epoch": 1.4004874086108854,
"grad_norm": 0.4672428316707098,
"learning_rate": 6.4272179452581505e-06,
"loss": 0.5513,
"step": 1149
},
{
"epoch": 1.4017059301380992,
"grad_norm": 0.5041699642923018,
"learning_rate": 6.42041685330512e-06,
"loss": 0.5579,
"step": 1150
},
{
"epoch": 1.4029244516653128,
"grad_norm": 0.5689659183656529,
"learning_rate": 6.413612901390136e-06,
"loss": 0.5171,
"step": 1151
},
{
"epoch": 1.4041429731925263,
"grad_norm": 0.4852858521398993,
"learning_rate": 6.406806103212725e-06,
"loss": 0.619,
"step": 1152
},
{
"epoch": 1.40536149471974,
"grad_norm": 0.5193110473839417,
"learning_rate": 6.39999647247815e-06,
"loss": 0.549,
"step": 1153
},
{
"epoch": 1.4065800162469537,
"grad_norm": 0.4769214566829931,
"learning_rate": 6.393184022897375e-06,
"loss": 0.526,
"step": 1154
},
{
"epoch": 1.4077985377741673,
"grad_norm": 0.4334565428180597,
"learning_rate": 6.38636876818704e-06,
"loss": 0.5511,
"step": 1155
},
{
"epoch": 1.409017059301381,
"grad_norm": 0.7019468412425452,
"learning_rate": 6.3795507220694335e-06,
"loss": 0.6058,
"step": 1156
},
{
"epoch": 1.4102355808285947,
"grad_norm": 0.4559746858499104,
"learning_rate": 6.372729898272463e-06,
"loss": 0.5623,
"step": 1157
},
{
"epoch": 1.4114541023558083,
"grad_norm": 0.4873072450531043,
"learning_rate": 6.365906310529631e-06,
"loss": 0.526,
"step": 1158
},
{
"epoch": 1.4126726238830218,
"grad_norm": 0.4930877948197653,
"learning_rate": 6.359079972580001e-06,
"loss": 0.5417,
"step": 1159
},
{
"epoch": 1.4138911454102356,
"grad_norm": 0.4585436187425684,
"learning_rate": 6.352250898168181e-06,
"loss": 0.5558,
"step": 1160
},
{
"epoch": 1.4151096669374492,
"grad_norm": 0.4843507493218003,
"learning_rate": 6.345419101044281e-06,
"loss": 0.6178,
"step": 1161
},
{
"epoch": 1.4163281884646628,
"grad_norm": 0.4103238225687281,
"learning_rate": 6.338584594963898e-06,
"loss": 0.486,
"step": 1162
},
{
"epoch": 1.4175467099918766,
"grad_norm": 0.4339709319145015,
"learning_rate": 6.3317473936880814e-06,
"loss": 0.5516,
"step": 1163
},
{
"epoch": 1.4187652315190902,
"grad_norm": 0.5006035176222503,
"learning_rate": 6.32490751098331e-06,
"loss": 0.5893,
"step": 1164
},
{
"epoch": 1.4199837530463038,
"grad_norm": 0.43944118536778887,
"learning_rate": 6.318064960621456e-06,
"loss": 0.554,
"step": 1165
},
{
"epoch": 1.4212022745735173,
"grad_norm": 0.4205988668702698,
"learning_rate": 6.31121975637977e-06,
"loss": 0.5705,
"step": 1166
},
{
"epoch": 1.4224207961007311,
"grad_norm": 0.42492946208091176,
"learning_rate": 6.30437191204084e-06,
"loss": 0.5382,
"step": 1167
},
{
"epoch": 1.4236393176279447,
"grad_norm": 0.4782081072972405,
"learning_rate": 6.297521441392572e-06,
"loss": 0.6081,
"step": 1168
},
{
"epoch": 1.4248578391551585,
"grad_norm": 0.4056428801301219,
"learning_rate": 6.290668358228162e-06,
"loss": 0.5448,
"step": 1169
},
{
"epoch": 1.426076360682372,
"grad_norm": 0.4346131086300656,
"learning_rate": 6.2838126763460635e-06,
"loss": 0.5339,
"step": 1170
},
{
"epoch": 1.4272948822095857,
"grad_norm": 0.4104447709327887,
"learning_rate": 6.276954409549963e-06,
"loss": 0.5399,
"step": 1171
},
{
"epoch": 1.4285134037367992,
"grad_norm": 0.46444896204069186,
"learning_rate": 6.270093571648752e-06,
"loss": 0.5941,
"step": 1172
},
{
"epoch": 1.429731925264013,
"grad_norm": 0.4451786529645794,
"learning_rate": 6.263230176456497e-06,
"loss": 0.5384,
"step": 1173
},
{
"epoch": 1.4309504467912266,
"grad_norm": 0.47981749578622157,
"learning_rate": 6.256364237792419e-06,
"loss": 0.5765,
"step": 1174
},
{
"epoch": 1.4321689683184404,
"grad_norm": 0.4367054717344673,
"learning_rate": 6.249495769480856e-06,
"loss": 0.5124,
"step": 1175
},
{
"epoch": 1.433387489845654,
"grad_norm": 0.42899069684022384,
"learning_rate": 6.2426247853512355e-06,
"loss": 0.5524,
"step": 1176
},
{
"epoch": 1.4346060113728676,
"grad_norm": 0.4904917718170387,
"learning_rate": 6.23575129923806e-06,
"loss": 0.5613,
"step": 1177
},
{
"epoch": 1.4358245329000812,
"grad_norm": 0.7624825038153906,
"learning_rate": 6.228875324980862e-06,
"loss": 0.5469,
"step": 1178
},
{
"epoch": 1.437043054427295,
"grad_norm": 0.48032758007828885,
"learning_rate": 6.221996876424186e-06,
"loss": 0.6088,
"step": 1179
},
{
"epoch": 1.4382615759545085,
"grad_norm": 0.4261676949483954,
"learning_rate": 6.21511596741756e-06,
"loss": 0.5269,
"step": 1180
},
{
"epoch": 1.4394800974817221,
"grad_norm": 0.44938704101588606,
"learning_rate": 6.208232611815463e-06,
"loss": 0.5497,
"step": 1181
},
{
"epoch": 1.440698619008936,
"grad_norm": 0.47843420481431187,
"learning_rate": 6.2013468234773034e-06,
"loss": 0.5673,
"step": 1182
},
{
"epoch": 1.4419171405361495,
"grad_norm": 0.4143118724051908,
"learning_rate": 6.194458616267388e-06,
"loss": 0.5561,
"step": 1183
},
{
"epoch": 1.443135662063363,
"grad_norm": 0.4687400706518928,
"learning_rate": 6.187568004054888e-06,
"loss": 0.5599,
"step": 1184
},
{
"epoch": 1.4443541835905767,
"grad_norm": 0.43117586360472987,
"learning_rate": 6.180675000713825e-06,
"loss": 0.5579,
"step": 1185
},
{
"epoch": 1.4455727051177905,
"grad_norm": 0.4677168526332838,
"learning_rate": 6.173779620123028e-06,
"loss": 0.5377,
"step": 1186
},
{
"epoch": 1.446791226645004,
"grad_norm": 0.4684613773900322,
"learning_rate": 6.166881876166119e-06,
"loss": 0.5505,
"step": 1187
},
{
"epoch": 1.4480097481722178,
"grad_norm": 0.45099302981330264,
"learning_rate": 6.1599817827314744e-06,
"loss": 0.5349,
"step": 1188
},
{
"epoch": 1.4492282696994314,
"grad_norm": 0.44725643758516653,
"learning_rate": 6.153079353712201e-06,
"loss": 0.5445,
"step": 1189
},
{
"epoch": 1.450446791226645,
"grad_norm": 0.509822041445509,
"learning_rate": 6.14617460300611e-06,
"loss": 0.6048,
"step": 1190
},
{
"epoch": 1.4516653127538586,
"grad_norm": 0.48251767820083963,
"learning_rate": 6.139267544515689e-06,
"loss": 0.5214,
"step": 1191
},
{
"epoch": 1.4528838342810724,
"grad_norm": 0.462469865969966,
"learning_rate": 6.132358192148065e-06,
"loss": 0.5628,
"step": 1192
},
{
"epoch": 1.454102355808286,
"grad_norm": 0.42592720114566546,
"learning_rate": 6.125446559814994e-06,
"loss": 0.4844,
"step": 1193
},
{
"epoch": 1.4553208773354995,
"grad_norm": 0.49275532261036237,
"learning_rate": 6.118532661432812e-06,
"loss": 0.5944,
"step": 1194
},
{
"epoch": 1.4565393988627133,
"grad_norm": 0.4649906751266784,
"learning_rate": 6.111616510922426e-06,
"loss": 0.5493,
"step": 1195
},
{
"epoch": 1.457757920389927,
"grad_norm": 0.46291320623399196,
"learning_rate": 6.104698122209274e-06,
"loss": 0.5172,
"step": 1196
},
{
"epoch": 1.4589764419171405,
"grad_norm": 0.5426739834419568,
"learning_rate": 6.097777509223299e-06,
"loss": 0.5666,
"step": 1197
},
{
"epoch": 1.460194963444354,
"grad_norm": 0.45093365966871296,
"learning_rate": 6.090854685898928e-06,
"loss": 0.5357,
"step": 1198
},
{
"epoch": 1.4614134849715679,
"grad_norm": 0.46357917186858966,
"learning_rate": 6.083929666175031e-06,
"loss": 0.5102,
"step": 1199
},
{
"epoch": 1.4626320064987814,
"grad_norm": 0.42735860218881255,
"learning_rate": 6.077002463994908e-06,
"loss": 0.5353,
"step": 1200
},
{
"epoch": 1.4638505280259952,
"grad_norm": 0.48773472737225,
"learning_rate": 6.070073093306246e-06,
"loss": 0.5969,
"step": 1201
},
{
"epoch": 1.4650690495532088,
"grad_norm": 0.45583834308371346,
"learning_rate": 6.063141568061104e-06,
"loss": 0.5501,
"step": 1202
},
{
"epoch": 1.4662875710804224,
"grad_norm": 0.48230795906015783,
"learning_rate": 6.056207902215874e-06,
"loss": 0.5943,
"step": 1203
},
{
"epoch": 1.467506092607636,
"grad_norm": 0.48530024356797447,
"learning_rate": 6.049272109731266e-06,
"loss": 0.535,
"step": 1204
},
{
"epoch": 1.4687246141348498,
"grad_norm": 0.39847364405399893,
"learning_rate": 6.042334204572261e-06,
"loss": 0.5088,
"step": 1205
},
{
"epoch": 1.4699431356620634,
"grad_norm": 0.4192802944065179,
"learning_rate": 6.035394200708104e-06,
"loss": 0.5541,
"step": 1206
},
{
"epoch": 1.471161657189277,
"grad_norm": 0.5095459416726968,
"learning_rate": 6.02845211211226e-06,
"loss": 0.6044,
"step": 1207
},
{
"epoch": 1.4723801787164907,
"grad_norm": 0.4834365213328995,
"learning_rate": 6.021507952762392e-06,
"loss": 0.5698,
"step": 1208
},
{
"epoch": 1.4735987002437043,
"grad_norm": 0.43629510532697163,
"learning_rate": 6.014561736640334e-06,
"loss": 0.536,
"step": 1209
},
{
"epoch": 1.474817221770918,
"grad_norm": 0.469188019208721,
"learning_rate": 6.007613477732061e-06,
"loss": 0.5495,
"step": 1210
},
{
"epoch": 1.4760357432981315,
"grad_norm": 0.4901471440756352,
"learning_rate": 6.000663190027658e-06,
"loss": 0.5661,
"step": 1211
},
{
"epoch": 1.4772542648253453,
"grad_norm": 0.4686562631964871,
"learning_rate": 5.993710887521302e-06,
"loss": 0.5812,
"step": 1212
},
{
"epoch": 1.4784727863525589,
"grad_norm": 0.48734085024012297,
"learning_rate": 5.986756584211217e-06,
"loss": 0.5335,
"step": 1213
},
{
"epoch": 1.4796913078797727,
"grad_norm": 0.5326878131009583,
"learning_rate": 5.979800294099666e-06,
"loss": 0.5689,
"step": 1214
},
{
"epoch": 1.4809098294069862,
"grad_norm": 0.4253596342133157,
"learning_rate": 5.972842031192901e-06,
"loss": 0.5265,
"step": 1215
},
{
"epoch": 1.4821283509341998,
"grad_norm": 0.4985627825685433,
"learning_rate": 5.965881809501158e-06,
"loss": 0.5632,
"step": 1216
},
{
"epoch": 1.4833468724614134,
"grad_norm": 0.45204140138324095,
"learning_rate": 5.958919643038609e-06,
"loss": 0.5569,
"step": 1217
},
{
"epoch": 1.4845653939886272,
"grad_norm": 0.4483567522219748,
"learning_rate": 5.951955545823342e-06,
"loss": 0.5731,
"step": 1218
},
{
"epoch": 1.4857839155158408,
"grad_norm": 0.4426846776302582,
"learning_rate": 5.944989531877337e-06,
"loss": 0.528,
"step": 1219
},
{
"epoch": 1.4870024370430543,
"grad_norm": 0.44399265576382146,
"learning_rate": 5.938021615226431e-06,
"loss": 0.5489,
"step": 1220
},
{
"epoch": 1.4882209585702681,
"grad_norm": 0.4581264239244066,
"learning_rate": 5.93105180990029e-06,
"loss": 0.5794,
"step": 1221
},
{
"epoch": 1.4894394800974817,
"grad_norm": 0.4198932355319627,
"learning_rate": 5.924080129932386e-06,
"loss": 0.5179,
"step": 1222
},
{
"epoch": 1.4906580016246953,
"grad_norm": 0.47789225286091797,
"learning_rate": 5.9171065893599625e-06,
"loss": 0.5638,
"step": 1223
},
{
"epoch": 1.4918765231519089,
"grad_norm": 0.4321558944637844,
"learning_rate": 5.910131202224011e-06,
"loss": 0.5057,
"step": 1224
},
{
"epoch": 1.4930950446791227,
"grad_norm": 0.4450307808888705,
"learning_rate": 5.903153982569243e-06,
"loss": 0.5421,
"step": 1225
},
{
"epoch": 1.4943135662063363,
"grad_norm": 0.5226652256866916,
"learning_rate": 5.8961749444440555e-06,
"loss": 0.576,
"step": 1226
},
{
"epoch": 1.49553208773355,
"grad_norm": 0.42973467257152986,
"learning_rate": 5.8891941019005095e-06,
"loss": 0.6013,
"step": 1227
},
{
"epoch": 1.4967506092607636,
"grad_norm": 0.40820805235816976,
"learning_rate": 5.882211468994299e-06,
"loss": 0.5175,
"step": 1228
},
{
"epoch": 1.4979691307879772,
"grad_norm": 0.4801201178398426,
"learning_rate": 5.87522705978472e-06,
"loss": 0.5833,
"step": 1229
},
{
"epoch": 1.4991876523151908,
"grad_norm": 0.45266942815985767,
"learning_rate": 5.8682408883346535e-06,
"loss": 0.5284,
"step": 1230
},
{
"epoch": 1.5004061738424046,
"grad_norm": 0.4353303644229792,
"learning_rate": 5.8612529687105156e-06,
"loss": 0.526,
"step": 1231
},
{
"epoch": 1.5016246953696182,
"grad_norm": 0.48827760996687436,
"learning_rate": 5.854263314982252e-06,
"loss": 0.5955,
"step": 1232
},
{
"epoch": 1.502843216896832,
"grad_norm": 0.4255509928321056,
"learning_rate": 5.847271941223301e-06,
"loss": 0.5442,
"step": 1233
},
{
"epoch": 1.5040617384240456,
"grad_norm": 0.43808076740492513,
"learning_rate": 5.840278861510555e-06,
"loss": 0.5433,
"step": 1234
},
{
"epoch": 1.5052802599512591,
"grad_norm": 0.4760892817675488,
"learning_rate": 5.83328408992435e-06,
"loss": 0.5702,
"step": 1235
},
{
"epoch": 1.5064987814784727,
"grad_norm": 0.4481600783932247,
"learning_rate": 5.826287640548425e-06,
"loss": 0.5946,
"step": 1236
},
{
"epoch": 1.5077173030056863,
"grad_norm": 0.42699536589107157,
"learning_rate": 5.819289527469897e-06,
"loss": 0.5642,
"step": 1237
},
{
"epoch": 1.5089358245329,
"grad_norm": 0.4633211620631564,
"learning_rate": 5.812289764779232e-06,
"loss": 0.4845,
"step": 1238
},
{
"epoch": 1.510154346060114,
"grad_norm": 0.4571770115639661,
"learning_rate": 5.80528836657022e-06,
"loss": 0.5513,
"step": 1239
},
{
"epoch": 1.5113728675873275,
"grad_norm": 0.4701604947751102,
"learning_rate": 5.798285346939942e-06,
"loss": 0.559,
"step": 1240
},
{
"epoch": 1.512591389114541,
"grad_norm": 0.4865903026982408,
"learning_rate": 5.791280719988747e-06,
"loss": 0.5878,
"step": 1241
},
{
"epoch": 1.5138099106417546,
"grad_norm": 0.4412282841651163,
"learning_rate": 5.784274499820214e-06,
"loss": 0.5197,
"step": 1242
},
{
"epoch": 1.5150284321689682,
"grad_norm": 0.5747443137859876,
"learning_rate": 5.777266700541134e-06,
"loss": 0.6011,
"step": 1243
},
{
"epoch": 1.516246953696182,
"grad_norm": 0.4564303892112499,
"learning_rate": 5.770257336261482e-06,
"loss": 0.5279,
"step": 1244
},
{
"epoch": 1.5174654752233956,
"grad_norm": 0.45997367471162187,
"learning_rate": 5.763246421094373e-06,
"loss": 0.5255,
"step": 1245
},
{
"epoch": 1.5186839967506094,
"grad_norm": 0.4695480650402549,
"learning_rate": 5.7562339691560556e-06,
"loss": 0.5885,
"step": 1246
},
{
"epoch": 1.519902518277823,
"grad_norm": 0.5356612979245375,
"learning_rate": 5.749219994565863e-06,
"loss": 0.5569,
"step": 1247
},
{
"epoch": 1.5211210398050365,
"grad_norm": 0.5813954013182587,
"learning_rate": 5.742204511446203e-06,
"loss": 0.5544,
"step": 1248
},
{
"epoch": 1.5223395613322501,
"grad_norm": 0.43618938610834346,
"learning_rate": 5.7351875339225164e-06,
"loss": 0.5374,
"step": 1249
},
{
"epoch": 1.5235580828594637,
"grad_norm": 0.4937073666394837,
"learning_rate": 5.7281690761232515e-06,
"loss": 0.5162,
"step": 1250
},
{
"epoch": 1.5247766043866775,
"grad_norm": 0.4780704238400619,
"learning_rate": 5.72114915217984e-06,
"loss": 0.542,
"step": 1251
},
{
"epoch": 1.5259951259138913,
"grad_norm": 0.458787226662822,
"learning_rate": 5.714127776226667e-06,
"loss": 0.5708,
"step": 1252
},
{
"epoch": 1.5272136474411049,
"grad_norm": 0.4727970564003603,
"learning_rate": 5.707104962401034e-06,
"loss": 0.5678,
"step": 1253
},
{
"epoch": 1.5284321689683185,
"grad_norm": 0.42019947987975415,
"learning_rate": 5.7000807248431466e-06,
"loss": 0.4449,
"step": 1254
},
{
"epoch": 1.529650690495532,
"grad_norm": 0.5313576192243948,
"learning_rate": 5.693055077696069e-06,
"loss": 0.62,
"step": 1255
},
{
"epoch": 1.5308692120227456,
"grad_norm": 0.4133150947222481,
"learning_rate": 5.686028035105711e-06,
"loss": 0.5446,
"step": 1256
},
{
"epoch": 1.5320877335499594,
"grad_norm": 0.5182558216138413,
"learning_rate": 5.6789996112207865e-06,
"loss": 0.5589,
"step": 1257
},
{
"epoch": 1.533306255077173,
"grad_norm": 0.5235310986043601,
"learning_rate": 5.671969820192794e-06,
"loss": 0.5516,
"step": 1258
},
{
"epoch": 1.5345247766043868,
"grad_norm": 0.43543733715186,
"learning_rate": 5.664938676175982e-06,
"loss": 0.5463,
"step": 1259
},
{
"epoch": 1.5357432981316004,
"grad_norm": 0.542523163223611,
"learning_rate": 5.657906193327325e-06,
"loss": 0.5289,
"step": 1260
},
{
"epoch": 1.536961819658814,
"grad_norm": 0.6705586961902954,
"learning_rate": 5.650872385806492e-06,
"loss": 0.6,
"step": 1261
},
{
"epoch": 1.5381803411860275,
"grad_norm": 0.4252405119039053,
"learning_rate": 5.64383726777582e-06,
"loss": 0.5558,
"step": 1262
},
{
"epoch": 1.5393988627132411,
"grad_norm": 0.5168792668343379,
"learning_rate": 5.636800853400285e-06,
"loss": 0.5427,
"step": 1263
},
{
"epoch": 1.540617384240455,
"grad_norm": 0.56808607878734,
"learning_rate": 5.6297631568474705e-06,
"loss": 0.5785,
"step": 1264
},
{
"epoch": 1.5418359057676687,
"grad_norm": 0.4194312889852155,
"learning_rate": 5.622724192287548e-06,
"loss": 0.5061,
"step": 1265
},
{
"epoch": 1.5430544272948823,
"grad_norm": 0.46739113422443607,
"learning_rate": 5.615683973893235e-06,
"loss": 0.5543,
"step": 1266
},
{
"epoch": 1.5442729488220959,
"grad_norm": 0.4711436329274137,
"learning_rate": 5.608642515839777e-06,
"loss": 0.5468,
"step": 1267
},
{
"epoch": 1.5454914703493094,
"grad_norm": 0.45925976085714865,
"learning_rate": 5.601599832304915e-06,
"loss": 0.5533,
"step": 1268
},
{
"epoch": 1.546709991876523,
"grad_norm": 0.4629430310984532,
"learning_rate": 5.594555937468856e-06,
"loss": 0.6238,
"step": 1269
},
{
"epoch": 1.5479285134037368,
"grad_norm": 0.4430875583116207,
"learning_rate": 5.587510845514249e-06,
"loss": 0.5334,
"step": 1270
},
{
"epoch": 1.5491470349309504,
"grad_norm": 0.4964005647402626,
"learning_rate": 5.5804645706261515e-06,
"loss": 0.5563,
"step": 1271
},
{
"epoch": 1.5503655564581642,
"grad_norm": 0.47908339446690995,
"learning_rate": 5.573417126992004e-06,
"loss": 0.5761,
"step": 1272
},
{
"epoch": 1.5515840779853778,
"grad_norm": 0.4320719099995596,
"learning_rate": 5.5663685288015955e-06,
"loss": 0.5519,
"step": 1273
},
{
"epoch": 1.5528025995125914,
"grad_norm": 0.45893470814872167,
"learning_rate": 5.5593187902470465e-06,
"loss": 0.5122,
"step": 1274
},
{
"epoch": 1.554021121039805,
"grad_norm": 0.47949830848407404,
"learning_rate": 5.55226792552277e-06,
"loss": 0.5839,
"step": 1275
},
{
"epoch": 1.5552396425670185,
"grad_norm": 0.415731009852519,
"learning_rate": 5.545215948825447e-06,
"loss": 0.5378,
"step": 1276
},
{
"epoch": 1.5564581640942323,
"grad_norm": 0.466056698108541,
"learning_rate": 5.538162874353994e-06,
"loss": 0.4983,
"step": 1277
},
{
"epoch": 1.5576766856214461,
"grad_norm": 0.5916240577351891,
"learning_rate": 5.5311087163095475e-06,
"loss": 0.6251,
"step": 1278
},
{
"epoch": 1.5588952071486597,
"grad_norm": 0.44367509738450317,
"learning_rate": 5.524053488895413e-06,
"loss": 0.5488,
"step": 1279
},
{
"epoch": 1.5601137286758733,
"grad_norm": 0.47062048808194906,
"learning_rate": 5.516997206317061e-06,
"loss": 0.5563,
"step": 1280
},
{
"epoch": 1.5613322502030869,
"grad_norm": 0.5420478722656378,
"learning_rate": 5.509939882782077e-06,
"loss": 0.5416,
"step": 1281
},
{
"epoch": 1.5625507717303004,
"grad_norm": 0.5222284367927739,
"learning_rate": 5.502881532500149e-06,
"loss": 0.5965,
"step": 1282
},
{
"epoch": 1.5637692932575142,
"grad_norm": 0.42208342526415327,
"learning_rate": 5.49582216968303e-06,
"loss": 0.5467,
"step": 1283
},
{
"epoch": 1.5649878147847278,
"grad_norm": 0.4294650898913376,
"learning_rate": 5.4887618085445094e-06,
"loss": 0.5287,
"step": 1284
},
{
"epoch": 1.5662063363119416,
"grad_norm": 0.46855647055671784,
"learning_rate": 5.48170046330039e-06,
"loss": 0.5628,
"step": 1285
},
{
"epoch": 1.5674248578391552,
"grad_norm": 0.4699651333558714,
"learning_rate": 5.474638148168456e-06,
"loss": 0.5574,
"step": 1286
},
{
"epoch": 1.5686433793663688,
"grad_norm": 0.5135379339848296,
"learning_rate": 5.467574877368441e-06,
"loss": 0.547,
"step": 1287
},
{
"epoch": 1.5698619008935824,
"grad_norm": 0.4810680839376017,
"learning_rate": 5.460510665122007e-06,
"loss": 0.557,
"step": 1288
},
{
"epoch": 1.5710804224207962,
"grad_norm": 0.4098166771088161,
"learning_rate": 5.453445525652711e-06,
"loss": 0.5418,
"step": 1289
},
{
"epoch": 1.5722989439480097,
"grad_norm": 0.450215288957951,
"learning_rate": 5.446379473185972e-06,
"loss": 0.5357,
"step": 1290
},
{
"epoch": 1.5735174654752235,
"grad_norm": 0.5294521431799539,
"learning_rate": 5.4393125219490536e-06,
"loss": 0.5643,
"step": 1291
},
{
"epoch": 1.574735987002437,
"grad_norm": 0.4592328236388863,
"learning_rate": 5.432244686171025e-06,
"loss": 0.5579,
"step": 1292
},
{
"epoch": 1.5759545085296507,
"grad_norm": 0.43283051916010107,
"learning_rate": 5.42517598008274e-06,
"loss": 0.5045,
"step": 1293
},
{
"epoch": 1.5771730300568643,
"grad_norm": 0.5659434705667795,
"learning_rate": 5.418106417916799e-06,
"loss": 0.6214,
"step": 1294
},
{
"epoch": 1.5783915515840778,
"grad_norm": 0.43767902318474483,
"learning_rate": 5.411036013907534e-06,
"loss": 0.4785,
"step": 1295
},
{
"epoch": 1.5796100731112916,
"grad_norm": 0.49107247160929135,
"learning_rate": 5.403964782290962e-06,
"loss": 0.6033,
"step": 1296
},
{
"epoch": 1.5808285946385054,
"grad_norm": 0.4941184832970728,
"learning_rate": 5.396892737304779e-06,
"loss": 0.5625,
"step": 1297
},
{
"epoch": 1.582047116165719,
"grad_norm": 0.45207210705440176,
"learning_rate": 5.389819893188304e-06,
"loss": 0.5955,
"step": 1298
},
{
"epoch": 1.5832656376929326,
"grad_norm": 0.41624551022025036,
"learning_rate": 5.38274626418248e-06,
"loss": 0.4859,
"step": 1299
},
{
"epoch": 1.5844841592201462,
"grad_norm": 0.5355211526596017,
"learning_rate": 5.375671864529817e-06,
"loss": 0.5847,
"step": 1300
},
{
"epoch": 1.5857026807473598,
"grad_norm": 0.4975201469339488,
"learning_rate": 5.368596708474388e-06,
"loss": 0.5338,
"step": 1301
},
{
"epoch": 1.5869212022745736,
"grad_norm": 0.4863357216575736,
"learning_rate": 5.361520810261779e-06,
"loss": 0.5535,
"step": 1302
},
{
"epoch": 1.5881397238017871,
"grad_norm": 0.4458515473467672,
"learning_rate": 5.354444184139077e-06,
"loss": 0.5457,
"step": 1303
},
{
"epoch": 1.589358245329001,
"grad_norm": 0.4614906452198629,
"learning_rate": 5.347366844354833e-06,
"loss": 0.5398,
"step": 1304
},
{
"epoch": 1.5905767668562145,
"grad_norm": 0.4685010422012627,
"learning_rate": 5.340288805159037e-06,
"loss": 0.5407,
"step": 1305
},
{
"epoch": 1.591795288383428,
"grad_norm": 0.48804182586096323,
"learning_rate": 5.33321008080308e-06,
"loss": 0.547,
"step": 1306
},
{
"epoch": 1.5930138099106417,
"grad_norm": 0.44694564705893386,
"learning_rate": 5.3261306855397395e-06,
"loss": 0.5459,
"step": 1307
},
{
"epoch": 1.5942323314378553,
"grad_norm": 0.4139859944920655,
"learning_rate": 5.319050633623141e-06,
"loss": 0.5519,
"step": 1308
},
{
"epoch": 1.595450852965069,
"grad_norm": 0.5097755056565069,
"learning_rate": 5.311969939308736e-06,
"loss": 0.5901,
"step": 1309
},
{
"epoch": 1.5966693744922829,
"grad_norm": 0.47592489399723925,
"learning_rate": 5.304888616853265e-06,
"loss": 0.5324,
"step": 1310
},
{
"epoch": 1.5978878960194964,
"grad_norm": 0.4276883892776071,
"learning_rate": 5.297806680514731e-06,
"loss": 0.5106,
"step": 1311
},
{
"epoch": 1.59910641754671,
"grad_norm": 0.4681244968477927,
"learning_rate": 5.290724144552379e-06,
"loss": 0.6054,
"step": 1312
},
{
"epoch": 1.6003249390739236,
"grad_norm": 0.4896701927637777,
"learning_rate": 5.283641023226661e-06,
"loss": 0.5455,
"step": 1313
},
{
"epoch": 1.6015434606011372,
"grad_norm": 0.4245053792739156,
"learning_rate": 5.276557330799203e-06,
"loss": 0.5471,
"step": 1314
},
{
"epoch": 1.602761982128351,
"grad_norm": 0.4874649206218259,
"learning_rate": 5.269473081532785e-06,
"loss": 0.5782,
"step": 1315
},
{
"epoch": 1.6039805036555645,
"grad_norm": 0.47549962008011226,
"learning_rate": 5.262388289691303e-06,
"loss": 0.575,
"step": 1316
},
{
"epoch": 1.6051990251827783,
"grad_norm": 0.42642213924678707,
"learning_rate": 5.255302969539753e-06,
"loss": 0.5805,
"step": 1317
},
{
"epoch": 1.606417546709992,
"grad_norm": 0.42684200856960786,
"learning_rate": 5.248217135344191e-06,
"loss": 0.5072,
"step": 1318
},
{
"epoch": 1.6076360682372055,
"grad_norm": 0.4365701459872912,
"learning_rate": 5.241130801371704e-06,
"loss": 0.5658,
"step": 1319
},
{
"epoch": 1.608854589764419,
"grad_norm": 0.42471390001052695,
"learning_rate": 5.234043981890395e-06,
"loss": 0.5698,
"step": 1320
},
{
"epoch": 1.6100731112916327,
"grad_norm": 0.4535238587027896,
"learning_rate": 5.226956691169332e-06,
"loss": 0.5839,
"step": 1321
},
{
"epoch": 1.6112916328188465,
"grad_norm": 0.4247946464572348,
"learning_rate": 5.219868943478542e-06,
"loss": 0.5577,
"step": 1322
},
{
"epoch": 1.6125101543460603,
"grad_norm": 0.43376338736220743,
"learning_rate": 5.212780753088968e-06,
"loss": 0.5449,
"step": 1323
},
{
"epoch": 1.6137286758732738,
"grad_norm": 0.4061841147634886,
"learning_rate": 5.205692134272445e-06,
"loss": 0.5179,
"step": 1324
},
{
"epoch": 1.6149471974004874,
"grad_norm": 0.4596996267175098,
"learning_rate": 5.1986031013016706e-06,
"loss": 0.5818,
"step": 1325
},
{
"epoch": 1.616165718927701,
"grad_norm": 0.43123766272618486,
"learning_rate": 5.191513668450178e-06,
"loss": 0.5687,
"step": 1326
},
{
"epoch": 1.6173842404549146,
"grad_norm": 0.4329937345499755,
"learning_rate": 5.184423849992299e-06,
"loss": 0.5348,
"step": 1327
},
{
"epoch": 1.6186027619821284,
"grad_norm": 0.49663961496101067,
"learning_rate": 5.177333660203153e-06,
"loss": 0.5956,
"step": 1328
},
{
"epoch": 1.619821283509342,
"grad_norm": 0.3924685962518714,
"learning_rate": 5.170243113358594e-06,
"loss": 0.5125,
"step": 1329
},
{
"epoch": 1.6210398050365558,
"grad_norm": 0.4856207429888876,
"learning_rate": 5.163152223735206e-06,
"loss": 0.5778,
"step": 1330
},
{
"epoch": 1.6222583265637693,
"grad_norm": 0.45002527423182,
"learning_rate": 5.156061005610258e-06,
"loss": 0.5584,
"step": 1331
},
{
"epoch": 1.623476848090983,
"grad_norm": 0.4310106517218945,
"learning_rate": 5.1489694732616805e-06,
"loss": 0.5377,
"step": 1332
},
{
"epoch": 1.6246953696181965,
"grad_norm": 0.49448879444066074,
"learning_rate": 5.141877640968037e-06,
"loss": 0.623,
"step": 1333
},
{
"epoch": 1.62591389114541,
"grad_norm": 0.40362533961876157,
"learning_rate": 5.134785523008496e-06,
"loss": 0.5014,
"step": 1334
},
{
"epoch": 1.6271324126726239,
"grad_norm": 0.4269483197368071,
"learning_rate": 5.127693133662801e-06,
"loss": 0.573,
"step": 1335
},
{
"epoch": 1.6283509341998377,
"grad_norm": 0.4258879503760348,
"learning_rate": 5.12060048721124e-06,
"loss": 0.5314,
"step": 1336
},
{
"epoch": 1.6295694557270513,
"grad_norm": 0.44120462268057764,
"learning_rate": 5.11350759793462e-06,
"loss": 0.5373,
"step": 1337
},
{
"epoch": 1.6307879772542648,
"grad_norm": 0.4276083907367786,
"learning_rate": 5.106414480114238e-06,
"loss": 0.5276,
"step": 1338
},
{
"epoch": 1.6320064987814784,
"grad_norm": 0.4517524664721021,
"learning_rate": 5.099321148031851e-06,
"loss": 0.5504,
"step": 1339
},
{
"epoch": 1.633225020308692,
"grad_norm": 0.44913374968040776,
"learning_rate": 5.092227615969643e-06,
"loss": 0.553,
"step": 1340
},
{
"epoch": 1.6344435418359058,
"grad_norm": 0.49845971138611844,
"learning_rate": 5.085133898210208e-06,
"loss": 0.5653,
"step": 1341
},
{
"epoch": 1.6356620633631194,
"grad_norm": 0.4427260322632497,
"learning_rate": 5.078040009036509e-06,
"loss": 0.5213,
"step": 1342
},
{
"epoch": 1.6368805848903332,
"grad_norm": 0.4177253316358593,
"learning_rate": 5.070945962731854e-06,
"loss": 0.5397,
"step": 1343
},
{
"epoch": 1.6380991064175467,
"grad_norm": 0.47651126334983296,
"learning_rate": 5.06385177357987e-06,
"loss": 0.5595,
"step": 1344
},
{
"epoch": 1.6393176279447603,
"grad_norm": 0.5627892918210755,
"learning_rate": 5.056757455864469e-06,
"loss": 0.6096,
"step": 1345
},
{
"epoch": 1.640536149471974,
"grad_norm": 0.44180856064958623,
"learning_rate": 5.049663023869824e-06,
"loss": 0.5025,
"step": 1346
},
{
"epoch": 1.6417546709991877,
"grad_norm": 0.460979656039155,
"learning_rate": 5.042568491880338e-06,
"loss": 0.5982,
"step": 1347
},
{
"epoch": 1.6429731925264013,
"grad_norm": 0.4821324897781787,
"learning_rate": 5.035473874180612e-06,
"loss": 0.5598,
"step": 1348
},
{
"epoch": 1.644191714053615,
"grad_norm": 0.45517260087056105,
"learning_rate": 5.028379185055424e-06,
"loss": 0.5246,
"step": 1349
},
{
"epoch": 1.6454102355808287,
"grad_norm": 0.4413055629736707,
"learning_rate": 5.021284438789694e-06,
"loss": 0.5341,
"step": 1350
},
{
"epoch": 1.6466287571080422,
"grad_norm": 0.4614955719864221,
"learning_rate": 5.014189649668456e-06,
"loss": 0.5578,
"step": 1351
},
{
"epoch": 1.6478472786352558,
"grad_norm": 0.4953936356649888,
"learning_rate": 5.007094831976832e-06,
"loss": 0.5765,
"step": 1352
},
{
"epoch": 1.6490658001624694,
"grad_norm": 0.39648893153136167,
"learning_rate": 5e-06,
"loss": 0.5342,
"step": 1353
},
{
"epoch": 1.6502843216896832,
"grad_norm": 0.43855043725681864,
"learning_rate": 4.992905168023169e-06,
"loss": 0.543,
"step": 1354
},
{
"epoch": 1.6515028432168968,
"grad_norm": 0.5301209980205615,
"learning_rate": 4.985810350331544e-06,
"loss": 0.6293,
"step": 1355
},
{
"epoch": 1.6527213647441106,
"grad_norm": 0.38590596359640195,
"learning_rate": 4.9787155612103076e-06,
"loss": 0.5296,
"step": 1356
},
{
"epoch": 1.6539398862713242,
"grad_norm": 0.42738095322238806,
"learning_rate": 4.9716208149445776e-06,
"loss": 0.5308,
"step": 1357
},
{
"epoch": 1.6551584077985377,
"grad_norm": 0.4555041349632123,
"learning_rate": 4.96452612581939e-06,
"loss": 0.5788,
"step": 1358
},
{
"epoch": 1.6563769293257513,
"grad_norm": 0.4558921081759917,
"learning_rate": 4.9574315081196634e-06,
"loss": 0.5609,
"step": 1359
},
{
"epoch": 1.6575954508529651,
"grad_norm": 0.4503929824518257,
"learning_rate": 4.950336976130176e-06,
"loss": 0.5341,
"step": 1360
},
{
"epoch": 1.6588139723801787,
"grad_norm": 0.43711031728275695,
"learning_rate": 4.9432425441355334e-06,
"loss": 0.5793,
"step": 1361
},
{
"epoch": 1.6600324939073925,
"grad_norm": 0.39568528756580684,
"learning_rate": 4.936148226420133e-06,
"loss": 0.5069,
"step": 1362
},
{
"epoch": 1.661251015434606,
"grad_norm": 0.4309659404250017,
"learning_rate": 4.929054037268147e-06,
"loss": 0.5872,
"step": 1363
},
{
"epoch": 1.6624695369618196,
"grad_norm": 0.482908985444469,
"learning_rate": 4.921959990963493e-06,
"loss": 0.5583,
"step": 1364
},
{
"epoch": 1.6636880584890332,
"grad_norm": 0.4133363420753277,
"learning_rate": 4.914866101789793e-06,
"loss": 0.484,
"step": 1365
},
{
"epoch": 1.6649065800162468,
"grad_norm": 0.46336848283664533,
"learning_rate": 4.907772384030357e-06,
"loss": 0.6055,
"step": 1366
},
{
"epoch": 1.6661251015434606,
"grad_norm": 0.4021280914849084,
"learning_rate": 4.900678851968152e-06,
"loss": 0.4953,
"step": 1367
},
{
"epoch": 1.6673436230706744,
"grad_norm": 0.4496122068891948,
"learning_rate": 4.893585519885764e-06,
"loss": 0.5631,
"step": 1368
},
{
"epoch": 1.668562144597888,
"grad_norm": 0.4386416975070193,
"learning_rate": 4.886492402065381e-06,
"loss": 0.5632,
"step": 1369
},
{
"epoch": 1.6697806661251016,
"grad_norm": 0.4335033691327931,
"learning_rate": 4.8793995127887615e-06,
"loss": 0.5377,
"step": 1370
},
{
"epoch": 1.6709991876523151,
"grad_norm": 0.4639132609070873,
"learning_rate": 4.8723068663372005e-06,
"loss": 0.5658,
"step": 1371
},
{
"epoch": 1.6722177091795287,
"grad_norm": 0.4186533135703324,
"learning_rate": 4.865214476991506e-06,
"loss": 0.538,
"step": 1372
},
{
"epoch": 1.6734362307067425,
"grad_norm": 0.5100673554858591,
"learning_rate": 4.858122359031964e-06,
"loss": 0.5977,
"step": 1373
},
{
"epoch": 1.674654752233956,
"grad_norm": 0.4284001466166066,
"learning_rate": 4.851030526738321e-06,
"loss": 0.5325,
"step": 1374
},
{
"epoch": 1.67587327376117,
"grad_norm": 0.4048773843920905,
"learning_rate": 4.843938994389744e-06,
"loss": 0.4975,
"step": 1375
},
{
"epoch": 1.6770917952883835,
"grad_norm": 0.4074001135895807,
"learning_rate": 4.836847776264794e-06,
"loss": 0.5762,
"step": 1376
},
{
"epoch": 1.678310316815597,
"grad_norm": 0.41740364142746117,
"learning_rate": 4.829756886641408e-06,
"loss": 0.5731,
"step": 1377
},
{
"epoch": 1.6795288383428106,
"grad_norm": 0.4812773839220182,
"learning_rate": 4.82266633979685e-06,
"loss": 0.5849,
"step": 1378
},
{
"epoch": 1.6807473598700242,
"grad_norm": 0.39560445425868235,
"learning_rate": 4.815576150007702e-06,
"loss": 0.4699,
"step": 1379
},
{
"epoch": 1.681965881397238,
"grad_norm": 0.4414471548591453,
"learning_rate": 4.808486331549824e-06,
"loss": 0.5626,
"step": 1380
},
{
"epoch": 1.6831844029244518,
"grad_norm": 0.38187499198826846,
"learning_rate": 4.801396898698329e-06,
"loss": 0.5071,
"step": 1381
},
{
"epoch": 1.6844029244516654,
"grad_norm": 0.4892251033230591,
"learning_rate": 4.794307865727555e-06,
"loss": 0.5552,
"step": 1382
},
{
"epoch": 1.685621445978879,
"grad_norm": 0.482903388217794,
"learning_rate": 4.787219246911034e-06,
"loss": 0.5492,
"step": 1383
},
{
"epoch": 1.6868399675060926,
"grad_norm": 0.45801551724996875,
"learning_rate": 4.78013105652146e-06,
"loss": 0.5838,
"step": 1384
},
{
"epoch": 1.6880584890333061,
"grad_norm": 0.42866796779932836,
"learning_rate": 4.77304330883067e-06,
"loss": 0.5085,
"step": 1385
},
{
"epoch": 1.68927701056052,
"grad_norm": 0.4475021559493066,
"learning_rate": 4.765956018109607e-06,
"loss": 0.5505,
"step": 1386
},
{
"epoch": 1.6904955320877335,
"grad_norm": 0.4697192585313218,
"learning_rate": 4.758869198628296e-06,
"loss": 0.5479,
"step": 1387
},
{
"epoch": 1.6917140536149473,
"grad_norm": 0.465791753930643,
"learning_rate": 4.7517828646558115e-06,
"loss": 0.56,
"step": 1388
},
{
"epoch": 1.692932575142161,
"grad_norm": 0.4015038202394012,
"learning_rate": 4.744697030460248e-06,
"loss": 0.5492,
"step": 1389
},
{
"epoch": 1.6941510966693745,
"grad_norm": 0.5232226854854597,
"learning_rate": 4.7376117103086974e-06,
"loss": 0.5464,
"step": 1390
},
{
"epoch": 1.695369618196588,
"grad_norm": 0.4518351945360455,
"learning_rate": 4.730526918467217e-06,
"loss": 0.533,
"step": 1391
},
{
"epoch": 1.6965881397238016,
"grad_norm": 0.4769324521614458,
"learning_rate": 4.7234426692007985e-06,
"loss": 0.6265,
"step": 1392
},
{
"epoch": 1.6978066612510154,
"grad_norm": 0.39722631525643654,
"learning_rate": 4.716358976773342e-06,
"loss": 0.4616,
"step": 1393
},
{
"epoch": 1.6990251827782292,
"grad_norm": 0.5143439560883679,
"learning_rate": 4.7092758554476215e-06,
"loss": 0.5927,
"step": 1394
},
{
"epoch": 1.7002437043054428,
"grad_norm": 0.4893384326011186,
"learning_rate": 4.702193319485271e-06,
"loss": 0.581,
"step": 1395
},
{
"epoch": 1.7014622258326564,
"grad_norm": 0.40330674171655206,
"learning_rate": 4.695111383146738e-06,
"loss": 0.5152,
"step": 1396
},
{
"epoch": 1.70268074735987,
"grad_norm": 0.4812638116299566,
"learning_rate": 4.688030060691264e-06,
"loss": 0.6068,
"step": 1397
},
{
"epoch": 1.7038992688870835,
"grad_norm": 0.42808075960070036,
"learning_rate": 4.680949366376858e-06,
"loss": 0.5232,
"step": 1398
},
{
"epoch": 1.7051177904142973,
"grad_norm": 0.4186184139760809,
"learning_rate": 4.673869314460262e-06,
"loss": 0.5375,
"step": 1399
},
{
"epoch": 1.706336311941511,
"grad_norm": 0.4351340155979422,
"learning_rate": 4.666789919196923e-06,
"loss": 0.5493,
"step": 1400
},
{
"epoch": 1.7075548334687247,
"grad_norm": 0.5600164408896984,
"learning_rate": 4.659711194840964e-06,
"loss": 0.587,
"step": 1401
},
{
"epoch": 1.7087733549959383,
"grad_norm": 0.43365827783641364,
"learning_rate": 4.6526331556451674e-06,
"loss": 0.519,
"step": 1402
},
{
"epoch": 1.7099918765231519,
"grad_norm": 0.44015645831753214,
"learning_rate": 4.645555815860923e-06,
"loss": 0.5523,
"step": 1403
},
{
"epoch": 1.7112103980503655,
"grad_norm": 0.4552471646368589,
"learning_rate": 4.638479189738224e-06,
"loss": 0.5404,
"step": 1404
},
{
"epoch": 1.7124289195775793,
"grad_norm": 0.4535728417437257,
"learning_rate": 4.631403291525615e-06,
"loss": 0.5368,
"step": 1405
},
{
"epoch": 1.7136474411047928,
"grad_norm": 0.4734624014107752,
"learning_rate": 4.624328135470184e-06,
"loss": 0.5778,
"step": 1406
},
{
"epoch": 1.7148659626320066,
"grad_norm": 0.4934447889274217,
"learning_rate": 4.617253735817522e-06,
"loss": 0.5476,
"step": 1407
},
{
"epoch": 1.7160844841592202,
"grad_norm": 0.4984539363836997,
"learning_rate": 4.610180106811696e-06,
"loss": 0.5649,
"step": 1408
},
{
"epoch": 1.7173030056864338,
"grad_norm": 0.4848858968212611,
"learning_rate": 4.603107262695225e-06,
"loss": 0.5111,
"step": 1409
},
{
"epoch": 1.7185215272136474,
"grad_norm": 0.47036832640121645,
"learning_rate": 4.596035217709039e-06,
"loss": 0.5948,
"step": 1410
},
{
"epoch": 1.719740048740861,
"grad_norm": 0.44168165703224904,
"learning_rate": 4.588963986092468e-06,
"loss": 0.5941,
"step": 1411
},
{
"epoch": 1.7209585702680747,
"grad_norm": 0.39666220117961165,
"learning_rate": 4.5818935820832014e-06,
"loss": 0.4913,
"step": 1412
},
{
"epoch": 1.7221770917952883,
"grad_norm": 0.5025801254491269,
"learning_rate": 4.574824019917262e-06,
"loss": 0.5932,
"step": 1413
},
{
"epoch": 1.7233956133225021,
"grad_norm": 0.3845664023510723,
"learning_rate": 4.5677553138289764e-06,
"loss": 0.5369,
"step": 1414
},
{
"epoch": 1.7246141348497157,
"grad_norm": 0.42320355598590065,
"learning_rate": 4.560687478050947e-06,
"loss": 0.5294,
"step": 1415
},
{
"epoch": 1.7258326563769293,
"grad_norm": 0.4096157422530506,
"learning_rate": 4.553620526814029e-06,
"loss": 0.519,
"step": 1416
},
{
"epoch": 1.7270511779041429,
"grad_norm": 0.48631875630001814,
"learning_rate": 4.546554474347291e-06,
"loss": 0.6101,
"step": 1417
},
{
"epoch": 1.7282696994313567,
"grad_norm": 0.4768787594020578,
"learning_rate": 4.539489334877992e-06,
"loss": 0.5629,
"step": 1418
},
{
"epoch": 1.7294882209585702,
"grad_norm": 0.41978448851594347,
"learning_rate": 4.532425122631559e-06,
"loss": 0.5365,
"step": 1419
},
{
"epoch": 1.730706742485784,
"grad_norm": 0.4298141402145644,
"learning_rate": 4.5253618518315455e-06,
"loss": 0.5346,
"step": 1420
},
{
"epoch": 1.7319252640129976,
"grad_norm": 0.43330287443239485,
"learning_rate": 4.5182995366996115e-06,
"loss": 0.565,
"step": 1421
},
{
"epoch": 1.7331437855402112,
"grad_norm": 0.4618063094916825,
"learning_rate": 4.511238191455491e-06,
"loss": 0.5669,
"step": 1422
},
{
"epoch": 1.7343623070674248,
"grad_norm": 0.4330349372337,
"learning_rate": 4.504177830316971e-06,
"loss": 0.5563,
"step": 1423
},
{
"epoch": 1.7355808285946384,
"grad_norm": 0.4061046490367817,
"learning_rate": 4.497118467499852e-06,
"loss": 0.5371,
"step": 1424
},
{
"epoch": 1.7367993501218522,
"grad_norm": 0.4524064658816882,
"learning_rate": 4.490060117217925e-06,
"loss": 0.5273,
"step": 1425
},
{
"epoch": 1.738017871649066,
"grad_norm": 0.4153684807216417,
"learning_rate": 4.483002793682941e-06,
"loss": 0.5202,
"step": 1426
},
{
"epoch": 1.7392363931762795,
"grad_norm": 0.5126499568306361,
"learning_rate": 4.475946511104588e-06,
"loss": 0.5964,
"step": 1427
},
{
"epoch": 1.7404549147034931,
"grad_norm": 0.442175693450011,
"learning_rate": 4.468891283690454e-06,
"loss": 0.514,
"step": 1428
},
{
"epoch": 1.7416734362307067,
"grad_norm": 0.421309384527005,
"learning_rate": 4.461837125646007e-06,
"loss": 0.6091,
"step": 1429
},
{
"epoch": 1.7428919577579203,
"grad_norm": 0.4380243684629681,
"learning_rate": 4.4547840511745565e-06,
"loss": 0.4913,
"step": 1430
},
{
"epoch": 1.744110479285134,
"grad_norm": 0.4812216276097867,
"learning_rate": 4.447732074477233e-06,
"loss": 0.5582,
"step": 1431
},
{
"epoch": 1.7453290008123477,
"grad_norm": 0.40488056766666325,
"learning_rate": 4.440681209752955e-06,
"loss": 0.5758,
"step": 1432
},
{
"epoch": 1.7465475223395615,
"grad_norm": 0.4732265653920416,
"learning_rate": 4.433631471198406e-06,
"loss": 0.5962,
"step": 1433
},
{
"epoch": 1.747766043866775,
"grad_norm": 0.42539261148413177,
"learning_rate": 4.426582873007999e-06,
"loss": 0.4769,
"step": 1434
},
{
"epoch": 1.7489845653939886,
"grad_norm": 0.512036705158376,
"learning_rate": 4.4195354293738484e-06,
"loss": 0.582,
"step": 1435
},
{
"epoch": 1.7502030869212022,
"grad_norm": 0.4305096055128761,
"learning_rate": 4.412489154485752e-06,
"loss": 0.5326,
"step": 1436
},
{
"epoch": 1.7514216084484158,
"grad_norm": 0.5036201316708941,
"learning_rate": 4.405444062531145e-06,
"loss": 0.579,
"step": 1437
},
{
"epoch": 1.7526401299756296,
"grad_norm": 0.42814700978676606,
"learning_rate": 4.3984001676950875e-06,
"loss": 0.5706,
"step": 1438
},
{
"epoch": 1.7538586515028434,
"grad_norm": 0.4336700472324628,
"learning_rate": 4.391357484160223e-06,
"loss": 0.5429,
"step": 1439
},
{
"epoch": 1.755077173030057,
"grad_norm": 0.4197620066836796,
"learning_rate": 4.384316026106766e-06,
"loss": 0.5312,
"step": 1440
},
{
"epoch": 1.7562956945572705,
"grad_norm": 0.4358185412850227,
"learning_rate": 4.377275807712453e-06,
"loss": 0.5601,
"step": 1441
},
{
"epoch": 1.757514216084484,
"grad_norm": 0.4593898380941711,
"learning_rate": 4.37023684315253e-06,
"loss": 0.5522,
"step": 1442
},
{
"epoch": 1.7587327376116977,
"grad_norm": 0.41694136338662585,
"learning_rate": 4.363199146599717e-06,
"loss": 0.5436,
"step": 1443
},
{
"epoch": 1.7599512591389115,
"grad_norm": 0.41051974887386045,
"learning_rate": 4.3561627322241815e-06,
"loss": 0.5484,
"step": 1444
},
{
"epoch": 1.761169780666125,
"grad_norm": 0.49654495683052513,
"learning_rate": 4.34912761419351e-06,
"loss": 0.5471,
"step": 1445
},
{
"epoch": 1.7623883021933389,
"grad_norm": 0.46755105267929675,
"learning_rate": 4.342093806672678e-06,
"loss": 0.5675,
"step": 1446
},
{
"epoch": 1.7636068237205524,
"grad_norm": 0.4560949973440655,
"learning_rate": 4.335061323824019e-06,
"loss": 0.5921,
"step": 1447
},
{
"epoch": 1.764825345247766,
"grad_norm": 0.4254462067059595,
"learning_rate": 4.328030179807207e-06,
"loss": 0.4801,
"step": 1448
},
{
"epoch": 1.7660438667749796,
"grad_norm": 0.43590945113760904,
"learning_rate": 4.321000388779214e-06,
"loss": 0.55,
"step": 1449
},
{
"epoch": 1.7672623883021932,
"grad_norm": 0.45385792985801476,
"learning_rate": 4.313971964894289e-06,
"loss": 0.5936,
"step": 1450
},
{
"epoch": 1.768480909829407,
"grad_norm": 0.45173148922198614,
"learning_rate": 4.306944922303932e-06,
"loss": 0.5198,
"step": 1451
},
{
"epoch": 1.7696994313566208,
"grad_norm": 0.4738870866999846,
"learning_rate": 4.299919275156857e-06,
"loss": 0.5695,
"step": 1452
},
{
"epoch": 1.7709179528838344,
"grad_norm": 0.4308222859015806,
"learning_rate": 4.292895037598968e-06,
"loss": 0.5302,
"step": 1453
},
{
"epoch": 1.772136474411048,
"grad_norm": 0.43194034641229945,
"learning_rate": 4.285872223773336e-06,
"loss": 0.5277,
"step": 1454
},
{
"epoch": 1.7733549959382615,
"grad_norm": 0.44969920461261864,
"learning_rate": 4.278850847820161e-06,
"loss": 0.5552,
"step": 1455
},
{
"epoch": 1.774573517465475,
"grad_norm": 0.45716879761679,
"learning_rate": 4.2718309238767485e-06,
"loss": 0.5785,
"step": 1456
},
{
"epoch": 1.775792038992689,
"grad_norm": 0.4340108500215334,
"learning_rate": 4.264812466077486e-06,
"loss": 0.5973,
"step": 1457
},
{
"epoch": 1.7770105605199025,
"grad_norm": 0.40605446125162264,
"learning_rate": 4.2577954885537985e-06,
"loss": 0.5262,
"step": 1458
},
{
"epoch": 1.7782290820471163,
"grad_norm": 0.4862703366986213,
"learning_rate": 4.2507800054341385e-06,
"loss": 0.576,
"step": 1459
},
{
"epoch": 1.7794476035743299,
"grad_norm": 0.48359582578678745,
"learning_rate": 4.243766030843947e-06,
"loss": 0.5998,
"step": 1460
},
{
"epoch": 1.7806661251015434,
"grad_norm": 0.3661204928776939,
"learning_rate": 4.236753578905627e-06,
"loss": 0.4968,
"step": 1461
},
{
"epoch": 1.781884646628757,
"grad_norm": 0.43106534453803774,
"learning_rate": 4.229742663738521e-06,
"loss": 0.5418,
"step": 1462
},
{
"epoch": 1.7831031681559708,
"grad_norm": 0.48202616303627543,
"learning_rate": 4.2227332994588666e-06,
"loss": 0.5486,
"step": 1463
},
{
"epoch": 1.7843216896831844,
"grad_norm": 0.46787213059943966,
"learning_rate": 4.215725500179788e-06,
"loss": 0.5394,
"step": 1464
},
{
"epoch": 1.7855402112103982,
"grad_norm": 0.4786631164932734,
"learning_rate": 4.208719280011255e-06,
"loss": 0.6512,
"step": 1465
},
{
"epoch": 1.7867587327376118,
"grad_norm": 0.5007334603891063,
"learning_rate": 4.2017146530600585e-06,
"loss": 0.5262,
"step": 1466
},
{
"epoch": 1.7879772542648253,
"grad_norm": 0.4766688726563304,
"learning_rate": 4.194711633429782e-06,
"loss": 0.4996,
"step": 1467
},
{
"epoch": 1.789195775792039,
"grad_norm": 0.46491040345633633,
"learning_rate": 4.1877102352207695e-06,
"loss": 0.5968,
"step": 1468
},
{
"epoch": 1.7904142973192525,
"grad_norm": 0.4085352403505702,
"learning_rate": 4.180710472530105e-06,
"loss": 0.5262,
"step": 1469
},
{
"epoch": 1.7916328188464663,
"grad_norm": 0.43126540204858976,
"learning_rate": 4.173712359451576e-06,
"loss": 0.5407,
"step": 1470
},
{
"epoch": 1.7928513403736799,
"grad_norm": 0.5202009737302775,
"learning_rate": 4.16671591007565e-06,
"loss": 0.5644,
"step": 1471
},
{
"epoch": 1.7940698619008937,
"grad_norm": 0.43231572065106405,
"learning_rate": 4.159721138489445e-06,
"loss": 0.5143,
"step": 1472
},
{
"epoch": 1.7952883834281073,
"grad_norm": 0.4626044446914442,
"learning_rate": 4.152728058776701e-06,
"loss": 0.5853,
"step": 1473
},
{
"epoch": 1.7965069049553208,
"grad_norm": 0.43407883748754916,
"learning_rate": 4.145736685017749e-06,
"loss": 0.5239,
"step": 1474
},
{
"epoch": 1.7977254264825344,
"grad_norm": 0.4267857290356126,
"learning_rate": 4.138747031289485e-06,
"loss": 0.5558,
"step": 1475
},
{
"epoch": 1.7989439480097482,
"grad_norm": 0.4315508799133083,
"learning_rate": 4.131759111665349e-06,
"loss": 0.5807,
"step": 1476
},
{
"epoch": 1.8001624695369618,
"grad_norm": 0.4048772175153014,
"learning_rate": 4.124772940215279e-06,
"loss": 0.508,
"step": 1477
},
{
"epoch": 1.8013809910641756,
"grad_norm": 0.4268392177126804,
"learning_rate": 4.1177885310057045e-06,
"loss": 0.552,
"step": 1478
},
{
"epoch": 1.8025995125913892,
"grad_norm": 0.4495564163997895,
"learning_rate": 4.110805898099492e-06,
"loss": 0.5669,
"step": 1479
},
{
"epoch": 1.8038180341186028,
"grad_norm": 0.4570284740109343,
"learning_rate": 4.103825055555947e-06,
"loss": 0.5503,
"step": 1480
},
{
"epoch": 1.8050365556458163,
"grad_norm": 0.45712926339273185,
"learning_rate": 4.096846017430758e-06,
"loss": 0.5861,
"step": 1481
},
{
"epoch": 1.80625507717303,
"grad_norm": 0.4363450699012883,
"learning_rate": 4.0898687977759895e-06,
"loss": 0.5698,
"step": 1482
},
{
"epoch": 1.8074735987002437,
"grad_norm": 0.36642253412778386,
"learning_rate": 4.08289341064004e-06,
"loss": 0.4882,
"step": 1483
},
{
"epoch": 1.8086921202274575,
"grad_norm": 0.4626576143609871,
"learning_rate": 4.075919870067617e-06,
"loss": 0.5695,
"step": 1484
},
{
"epoch": 1.809910641754671,
"grad_norm": 0.46018408439267183,
"learning_rate": 4.068948190099711e-06,
"loss": 0.5529,
"step": 1485
},
{
"epoch": 1.8111291632818847,
"grad_norm": 0.4119449731431994,
"learning_rate": 4.06197838477357e-06,
"loss": 0.5024,
"step": 1486
},
{
"epoch": 1.8123476848090982,
"grad_norm": 0.4015730766144408,
"learning_rate": 4.0550104681226635e-06,
"loss": 0.5656,
"step": 1487
},
{
"epoch": 1.8135662063363118,
"grad_norm": 0.4503343260237984,
"learning_rate": 4.048044454176658e-06,
"loss": 0.5661,
"step": 1488
},
{
"epoch": 1.8147847278635256,
"grad_norm": 0.43240190245880916,
"learning_rate": 4.041080356961393e-06,
"loss": 0.4974,
"step": 1489
},
{
"epoch": 1.8160032493907392,
"grad_norm": 0.4734473361008657,
"learning_rate": 4.034118190498843e-06,
"loss": 0.5663,
"step": 1490
},
{
"epoch": 1.817221770917953,
"grad_norm": 0.43362890265223014,
"learning_rate": 4.0271579688071e-06,
"loss": 0.5531,
"step": 1491
},
{
"epoch": 1.8184402924451666,
"grad_norm": 0.46894586845233727,
"learning_rate": 4.020199705900335e-06,
"loss": 0.5534,
"step": 1492
},
{
"epoch": 1.8196588139723802,
"grad_norm": 0.5328522267534698,
"learning_rate": 4.013243415788783e-06,
"loss": 0.6018,
"step": 1493
},
{
"epoch": 1.8208773354995937,
"grad_norm": 0.41829831723127575,
"learning_rate": 4.0062891124787e-06,
"loss": 0.5562,
"step": 1494
},
{
"epoch": 1.8220958570268073,
"grad_norm": 0.45791268251247896,
"learning_rate": 3.999336809972343e-06,
"loss": 0.5226,
"step": 1495
},
{
"epoch": 1.8233143785540211,
"grad_norm": 0.52749121116633,
"learning_rate": 3.99238652226794e-06,
"loss": 0.5885,
"step": 1496
},
{
"epoch": 1.824532900081235,
"grad_norm": 0.4102080465654976,
"learning_rate": 3.985438263359667e-06,
"loss": 0.4996,
"step": 1497
},
{
"epoch": 1.8257514216084485,
"grad_norm": 0.453636908099918,
"learning_rate": 3.978492047237608e-06,
"loss": 0.568,
"step": 1498
},
{
"epoch": 1.826969943135662,
"grad_norm": 0.49160181331071584,
"learning_rate": 3.971547887887742e-06,
"loss": 0.574,
"step": 1499
},
{
"epoch": 1.8281884646628757,
"grad_norm": 0.4128170671886108,
"learning_rate": 3.964605799291897e-06,
"loss": 0.4792,
"step": 1500
},
{
"epoch": 1.8294069861900892,
"grad_norm": 0.4714468421227976,
"learning_rate": 3.9576657954277406e-06,
"loss": 0.5527,
"step": 1501
},
{
"epoch": 1.830625507717303,
"grad_norm": 0.4759788646029719,
"learning_rate": 3.950727890268736e-06,
"loss": 0.564,
"step": 1502
},
{
"epoch": 1.8318440292445166,
"grad_norm": 0.4269752606026449,
"learning_rate": 3.943792097784126e-06,
"loss": 0.5733,
"step": 1503
},
{
"epoch": 1.8330625507717304,
"grad_norm": 0.4407801675115479,
"learning_rate": 3.936858431938899e-06,
"loss": 0.501,
"step": 1504
},
{
"epoch": 1.834281072298944,
"grad_norm": 0.417785240435009,
"learning_rate": 3.929926906693757e-06,
"loss": 0.5292,
"step": 1505
},
{
"epoch": 1.8354995938261576,
"grad_norm": 0.4954357818413886,
"learning_rate": 3.922997536005094e-06,
"loss": 0.5834,
"step": 1506
},
{
"epoch": 1.8367181153533712,
"grad_norm": 0.4520246987276718,
"learning_rate": 3.91607033382497e-06,
"loss": 0.601,
"step": 1507
},
{
"epoch": 1.8379366368805847,
"grad_norm": 0.41894543258809586,
"learning_rate": 3.909145314101074e-06,
"loss": 0.5201,
"step": 1508
},
{
"epoch": 1.8391551584077985,
"grad_norm": 0.48207218741173996,
"learning_rate": 3.9022224907767e-06,
"loss": 0.5478,
"step": 1509
},
{
"epoch": 1.8403736799350123,
"grad_norm": 0.45664125938234335,
"learning_rate": 3.895301877790728e-06,
"loss": 0.5646,
"step": 1510
},
{
"epoch": 1.841592201462226,
"grad_norm": 0.4171767656165807,
"learning_rate": 3.888383489077576e-06,
"loss": 0.511,
"step": 1511
},
{
"epoch": 1.8428107229894395,
"grad_norm": 0.43354385082610986,
"learning_rate": 3.88146733856719e-06,
"loss": 0.526,
"step": 1512
},
{
"epoch": 1.844029244516653,
"grad_norm": 0.4920523523977966,
"learning_rate": 3.874553440185008e-06,
"loss": 0.5767,
"step": 1513
},
{
"epoch": 1.8452477660438666,
"grad_norm": 0.46705637995124133,
"learning_rate": 3.867641807851935e-06,
"loss": 0.5835,
"step": 1514
},
{
"epoch": 1.8464662875710804,
"grad_norm": 0.4461828469934018,
"learning_rate": 3.860732455484314e-06,
"loss": 0.4961,
"step": 1515
},
{
"epoch": 1.847684809098294,
"grad_norm": 0.4633901834358105,
"learning_rate": 3.853825396993891e-06,
"loss": 0.5811,
"step": 1516
},
{
"epoch": 1.8489033306255078,
"grad_norm": 0.4438313726356196,
"learning_rate": 3.8469206462878e-06,
"loss": 0.5655,
"step": 1517
},
{
"epoch": 1.8501218521527214,
"grad_norm": 0.4546281191367206,
"learning_rate": 3.840018217268527e-06,
"loss": 0.5556,
"step": 1518
},
{
"epoch": 1.851340373679935,
"grad_norm": 0.39692829259522316,
"learning_rate": 3.833118123833881e-06,
"loss": 0.5083,
"step": 1519
},
{
"epoch": 1.8525588952071486,
"grad_norm": 0.4367611773412124,
"learning_rate": 3.826220379876974e-06,
"loss": 0.5621,
"step": 1520
},
{
"epoch": 1.8537774167343624,
"grad_norm": 0.46250207668673327,
"learning_rate": 3.819324999286177e-06,
"loss": 0.5502,
"step": 1521
},
{
"epoch": 1.854995938261576,
"grad_norm": 0.42252748085937586,
"learning_rate": 3.8124319959451133e-06,
"loss": 0.5428,
"step": 1522
},
{
"epoch": 1.8562144597887897,
"grad_norm": 0.42520604252823624,
"learning_rate": 3.8055413837326133e-06,
"loss": 0.5484,
"step": 1523
},
{
"epoch": 1.8574329813160033,
"grad_norm": 0.41242969185302275,
"learning_rate": 3.7986531765226965e-06,
"loss": 0.521,
"step": 1524
},
{
"epoch": 1.858651502843217,
"grad_norm": 0.43001901614946786,
"learning_rate": 3.7917673881845373e-06,
"loss": 0.5943,
"step": 1525
},
{
"epoch": 1.8598700243704305,
"grad_norm": 0.4097185174805625,
"learning_rate": 3.7848840325824428e-06,
"loss": 0.5407,
"step": 1526
},
{
"epoch": 1.861088545897644,
"grad_norm": 0.4509948723964594,
"learning_rate": 3.778003123575815e-06,
"loss": 0.5526,
"step": 1527
},
{
"epoch": 1.8623070674248579,
"grad_norm": 0.458525992527633,
"learning_rate": 3.77112467501914e-06,
"loss": 0.5546,
"step": 1528
},
{
"epoch": 1.8635255889520714,
"grad_norm": 0.407821722457764,
"learning_rate": 3.7642487007619417e-06,
"loss": 0.5205,
"step": 1529
},
{
"epoch": 1.8647441104792852,
"grad_norm": 0.4630986805415289,
"learning_rate": 3.757375214648764e-06,
"loss": 0.5733,
"step": 1530
},
{
"epoch": 1.8659626320064988,
"grad_norm": 0.46336209457236627,
"learning_rate": 3.7505042305191463e-06,
"loss": 0.5653,
"step": 1531
},
{
"epoch": 1.8671811535337124,
"grad_norm": 0.39311369649530103,
"learning_rate": 3.743635762207582e-06,
"loss": 0.5342,
"step": 1532
},
{
"epoch": 1.868399675060926,
"grad_norm": 0.42025451422654897,
"learning_rate": 3.7367698235435036e-06,
"loss": 0.5474,
"step": 1533
},
{
"epoch": 1.8696181965881398,
"grad_norm": 0.42303104824107784,
"learning_rate": 3.72990642835125e-06,
"loss": 0.52,
"step": 1534
},
{
"epoch": 1.8708367181153533,
"grad_norm": 0.40431884043673944,
"learning_rate": 3.7230455904500385e-06,
"loss": 0.5468,
"step": 1535
},
{
"epoch": 1.8720552396425671,
"grad_norm": 0.4312111249145443,
"learning_rate": 3.716187323653939e-06,
"loss": 0.5888,
"step": 1536
},
{
"epoch": 1.8732737611697807,
"grad_norm": 0.41823157797464644,
"learning_rate": 3.7093316417718407e-06,
"loss": 0.5638,
"step": 1537
},
{
"epoch": 1.8744922826969943,
"grad_norm": 0.42521945264285177,
"learning_rate": 3.702478558607429e-06,
"loss": 0.5357,
"step": 1538
},
{
"epoch": 1.8757108042242079,
"grad_norm": 0.4652010212406273,
"learning_rate": 3.695628087959162e-06,
"loss": 0.5809,
"step": 1539
},
{
"epoch": 1.8769293257514215,
"grad_norm": 0.399398106227539,
"learning_rate": 3.6887802436202307e-06,
"loss": 0.5233,
"step": 1540
},
{
"epoch": 1.8781478472786353,
"grad_norm": 0.40874149994794573,
"learning_rate": 3.6819350393785445e-06,
"loss": 0.5534,
"step": 1541
},
{
"epoch": 1.879366368805849,
"grad_norm": 0.4553334343530874,
"learning_rate": 3.675092489016693e-06,
"loss": 0.5369,
"step": 1542
},
{
"epoch": 1.8805848903330626,
"grad_norm": 0.40028666583281924,
"learning_rate": 3.6682526063119206e-06,
"loss": 0.5209,
"step": 1543
},
{
"epoch": 1.8818034118602762,
"grad_norm": 0.41838120396321116,
"learning_rate": 3.661415405036103e-06,
"loss": 0.5752,
"step": 1544
},
{
"epoch": 1.8830219333874898,
"grad_norm": 0.4136128207763801,
"learning_rate": 3.654580898955721e-06,
"loss": 0.5277,
"step": 1545
},
{
"epoch": 1.8842404549147034,
"grad_norm": 0.4024921294917515,
"learning_rate": 3.647749101831821e-06,
"loss": 0.5239,
"step": 1546
},
{
"epoch": 1.8854589764419172,
"grad_norm": 0.4141269485652032,
"learning_rate": 3.640920027420001e-06,
"loss": 0.5508,
"step": 1547
},
{
"epoch": 1.8866774979691308,
"grad_norm": 0.440719562642394,
"learning_rate": 3.6340936894703717e-06,
"loss": 0.5702,
"step": 1548
},
{
"epoch": 1.8878960194963446,
"grad_norm": 0.4786206622488289,
"learning_rate": 3.6272701017275385e-06,
"loss": 0.5721,
"step": 1549
},
{
"epoch": 1.8891145410235581,
"grad_norm": 0.4129960293133917,
"learning_rate": 3.6204492779305678e-06,
"loss": 0.5382,
"step": 1550
},
{
"epoch": 1.8903330625507717,
"grad_norm": 0.425696733632996,
"learning_rate": 3.61363123181296e-06,
"loss": 0.546,
"step": 1551
},
{
"epoch": 1.8915515840779853,
"grad_norm": 0.4653679268493305,
"learning_rate": 3.6068159771026267e-06,
"loss": 0.5789,
"step": 1552
},
{
"epoch": 1.8927701056051989,
"grad_norm": 0.4522003365392251,
"learning_rate": 3.6000035275218515e-06,
"loss": 0.5224,
"step": 1553
},
{
"epoch": 1.8939886271324127,
"grad_norm": 0.3920499944414549,
"learning_rate": 3.593193896787277e-06,
"loss": 0.4976,
"step": 1554
},
{
"epoch": 1.8952071486596265,
"grad_norm": 0.45058258754829983,
"learning_rate": 3.5863870986098655e-06,
"loss": 0.5745,
"step": 1555
},
{
"epoch": 1.89642567018684,
"grad_norm": 0.4244815509498337,
"learning_rate": 3.5795831466948805e-06,
"loss": 0.5414,
"step": 1556
},
{
"epoch": 1.8976441917140536,
"grad_norm": 0.4331016099950002,
"learning_rate": 3.5727820547418525e-06,
"loss": 0.539,
"step": 1557
},
{
"epoch": 1.8988627132412672,
"grad_norm": 0.417995629833821,
"learning_rate": 3.5659838364445505e-06,
"loss": 0.5156,
"step": 1558
},
{
"epoch": 1.9000812347684808,
"grad_norm": 0.4734474494296379,
"learning_rate": 3.5591885054909605e-06,
"loss": 0.5925,
"step": 1559
},
{
"epoch": 1.9012997562956946,
"grad_norm": 0.46115486081361745,
"learning_rate": 3.5523960755632573e-06,
"loss": 0.5091,
"step": 1560
},
{
"epoch": 1.9025182778229082,
"grad_norm": 0.40875274875883305,
"learning_rate": 3.5456065603377697e-06,
"loss": 0.5567,
"step": 1561
},
{
"epoch": 1.903736799350122,
"grad_norm": 0.45386074829816486,
"learning_rate": 3.5388199734849626e-06,
"loss": 0.5578,
"step": 1562
},
{
"epoch": 1.9049553208773355,
"grad_norm": 0.38709828752403175,
"learning_rate": 3.5320363286694015e-06,
"loss": 0.5179,
"step": 1563
},
{
"epoch": 1.9061738424045491,
"grad_norm": 0.42735900716697534,
"learning_rate": 3.5252556395497274e-06,
"loss": 0.5712,
"step": 1564
},
{
"epoch": 1.9073923639317627,
"grad_norm": 0.4181700954501109,
"learning_rate": 3.518477919778631e-06,
"loss": 0.5781,
"step": 1565
},
{
"epoch": 1.9086108854589763,
"grad_norm": 0.421517198534864,
"learning_rate": 3.5117031830028274e-06,
"loss": 0.5214,
"step": 1566
},
{
"epoch": 1.90982940698619,
"grad_norm": 0.44082135382564575,
"learning_rate": 3.504931442863023e-06,
"loss": 0.5929,
"step": 1567
},
{
"epoch": 1.9110479285134039,
"grad_norm": 0.3829707161093217,
"learning_rate": 3.49816271299389e-06,
"loss": 0.4973,
"step": 1568
},
{
"epoch": 1.9122664500406175,
"grad_norm": 0.4241401054720212,
"learning_rate": 3.4913970070240388e-06,
"loss": 0.5694,
"step": 1569
},
{
"epoch": 1.913484971567831,
"grad_norm": 0.4287983948624245,
"learning_rate": 3.484634338575995e-06,
"loss": 0.5123,
"step": 1570
},
{
"epoch": 1.9147034930950446,
"grad_norm": 0.40950888500677163,
"learning_rate": 3.4778747212661647e-06,
"loss": 0.5595,
"step": 1571
},
{
"epoch": 1.9159220146222582,
"grad_norm": 0.4272739781741268,
"learning_rate": 3.4711181687048114e-06,
"loss": 0.5609,
"step": 1572
},
{
"epoch": 1.917140536149472,
"grad_norm": 0.41564421693161757,
"learning_rate": 3.464364694496031e-06,
"loss": 0.5336,
"step": 1573
},
{
"epoch": 1.9183590576766856,
"grad_norm": 0.4359864387668293,
"learning_rate": 3.457614312237716e-06,
"loss": 0.5371,
"step": 1574
},
{
"epoch": 1.9195775792038994,
"grad_norm": 0.4799831871173569,
"learning_rate": 3.450867035521536e-06,
"loss": 0.5299,
"step": 1575
},
{
"epoch": 1.920796100731113,
"grad_norm": 0.4453426614702043,
"learning_rate": 3.4441228779329073e-06,
"loss": 0.5502,
"step": 1576
},
{
"epoch": 1.9220146222583265,
"grad_norm": 0.4238694285973907,
"learning_rate": 3.4373818530509686e-06,
"loss": 0.5275,
"step": 1577
},
{
"epoch": 1.9232331437855401,
"grad_norm": 0.41917397616804203,
"learning_rate": 3.4306439744485453e-06,
"loss": 0.5761,
"step": 1578
},
{
"epoch": 1.924451665312754,
"grad_norm": 0.425933885933589,
"learning_rate": 3.423909255692137e-06,
"loss": 0.515,
"step": 1579
},
{
"epoch": 1.9256701868399675,
"grad_norm": 0.4456890458176112,
"learning_rate": 3.417177710341868e-06,
"loss": 0.5522,
"step": 1580
},
{
"epoch": 1.9268887083671813,
"grad_norm": 0.41653994234849523,
"learning_rate": 3.4104493519514844e-06,
"loss": 0.5675,
"step": 1581
},
{
"epoch": 1.9281072298943949,
"grad_norm": 0.4065472343645043,
"learning_rate": 3.40372419406831e-06,
"loss": 0.5016,
"step": 1582
},
{
"epoch": 1.9293257514216084,
"grad_norm": 0.4554171323483848,
"learning_rate": 3.3970022502332273e-06,
"loss": 0.5919,
"step": 1583
},
{
"epoch": 1.930544272948822,
"grad_norm": 0.4247465501155384,
"learning_rate": 3.3902835339806463e-06,
"loss": 0.565,
"step": 1584
},
{
"epoch": 1.9317627944760356,
"grad_norm": 0.44295772231878283,
"learning_rate": 3.3835680588384767e-06,
"loss": 0.5046,
"step": 1585
},
{
"epoch": 1.9329813160032494,
"grad_norm": 0.4268447826543325,
"learning_rate": 3.3768558383281024e-06,
"loss": 0.5193,
"step": 1586
},
{
"epoch": 1.934199837530463,
"grad_norm": 0.44799827489237776,
"learning_rate": 3.3701468859643583e-06,
"loss": 0.5631,
"step": 1587
},
{
"epoch": 1.9354183590576768,
"grad_norm": 0.4332023943083594,
"learning_rate": 3.363441215255495e-06,
"loss": 0.5724,
"step": 1588
},
{
"epoch": 1.9366368805848904,
"grad_norm": 0.3996520998473681,
"learning_rate": 3.356738839703158e-06,
"loss": 0.5255,
"step": 1589
},
{
"epoch": 1.937855402112104,
"grad_norm": 0.44409520862327806,
"learning_rate": 3.3500397728023536e-06,
"loss": 0.5425,
"step": 1590
},
{
"epoch": 1.9390739236393175,
"grad_norm": 0.4405722390495154,
"learning_rate": 3.343344028041433e-06,
"loss": 0.6053,
"step": 1591
},
{
"epoch": 1.9402924451665313,
"grad_norm": 0.40826303676438436,
"learning_rate": 3.336651618902054e-06,
"loss": 0.5324,
"step": 1592
},
{
"epoch": 1.941510966693745,
"grad_norm": 0.3851707449193732,
"learning_rate": 3.3299625588591568e-06,
"loss": 0.5088,
"step": 1593
},
{
"epoch": 1.9427294882209587,
"grad_norm": 0.40859643518575894,
"learning_rate": 3.3232768613809453e-06,
"loss": 0.581,
"step": 1594
},
{
"epoch": 1.9439480097481723,
"grad_norm": 0.3722542671220263,
"learning_rate": 3.316594539928845e-06,
"loss": 0.4977,
"step": 1595
},
{
"epoch": 1.9451665312753859,
"grad_norm": 0.4383598182132238,
"learning_rate": 3.309915607957487e-06,
"loss": 0.6137,
"step": 1596
},
{
"epoch": 1.9463850528025994,
"grad_norm": 0.4011690274677538,
"learning_rate": 3.303240078914679e-06,
"loss": 0.563,
"step": 1597
},
{
"epoch": 1.947603574329813,
"grad_norm": 0.36889683412600394,
"learning_rate": 3.2965679662413772e-06,
"loss": 0.4968,
"step": 1598
},
{
"epoch": 1.9488220958570268,
"grad_norm": 0.43411779671306283,
"learning_rate": 3.289899283371657e-06,
"loss": 0.5802,
"step": 1599
},
{
"epoch": 1.9500406173842406,
"grad_norm": 0.3803158460715809,
"learning_rate": 3.283234043732689e-06,
"loss": 0.5093,
"step": 1600
},
{
"epoch": 1.9512591389114542,
"grad_norm": 0.4391648613289349,
"learning_rate": 3.276572260744709e-06,
"loss": 0.565,
"step": 1601
},
{
"epoch": 1.9524776604386678,
"grad_norm": 0.40723374350566227,
"learning_rate": 3.2699139478209987e-06,
"loss": 0.514,
"step": 1602
},
{
"epoch": 1.9536961819658814,
"grad_norm": 0.454313203169353,
"learning_rate": 3.263259118367845e-06,
"loss": 0.6135,
"step": 1603
},
{
"epoch": 1.954914703493095,
"grad_norm": 0.4178433726812962,
"learning_rate": 3.256607785784527e-06,
"loss": 0.5301,
"step": 1604
},
{
"epoch": 1.9561332250203087,
"grad_norm": 0.42422765865196377,
"learning_rate": 3.249959963463283e-06,
"loss": 0.5278,
"step": 1605
},
{
"epoch": 1.9573517465475223,
"grad_norm": 0.4507513531340492,
"learning_rate": 3.2433156647892784e-06,
"loss": 0.5154,
"step": 1606
},
{
"epoch": 1.958570268074736,
"grad_norm": 0.4155636470893334,
"learning_rate": 3.2366749031405875e-06,
"loss": 0.5627,
"step": 1607
},
{
"epoch": 1.9597887896019497,
"grad_norm": 0.41760402999256324,
"learning_rate": 3.2300376918881628e-06,
"loss": 0.5779,
"step": 1608
},
{
"epoch": 1.9610073111291633,
"grad_norm": 0.4262673425446038,
"learning_rate": 3.223404044395808e-06,
"loss": 0.5939,
"step": 1609
},
{
"epoch": 1.9622258326563768,
"grad_norm": 0.39002137904137807,
"learning_rate": 3.216773974020152e-06,
"loss": 0.4796,
"step": 1610
},
{
"epoch": 1.9634443541835904,
"grad_norm": 0.48341233875628054,
"learning_rate": 3.210147494110618e-06,
"loss": 0.5623,
"step": 1611
},
{
"epoch": 1.9646628757108042,
"grad_norm": 0.4263996506849499,
"learning_rate": 3.203524618009403e-06,
"loss": 0.5771,
"step": 1612
},
{
"epoch": 1.965881397238018,
"grad_norm": 0.39778372039996124,
"learning_rate": 3.1969053590514487e-06,
"loss": 0.5291,
"step": 1613
},
{
"epoch": 1.9670999187652316,
"grad_norm": 0.4303659167460693,
"learning_rate": 3.19028973056441e-06,
"loss": 0.5488,
"step": 1614
},
{
"epoch": 1.9683184402924452,
"grad_norm": 0.4137612167679553,
"learning_rate": 3.1836777458686363e-06,
"loss": 0.5619,
"step": 1615
},
{
"epoch": 1.9695369618196588,
"grad_norm": 0.36766081180510835,
"learning_rate": 3.177069418277139e-06,
"loss": 0.4946,
"step": 1616
},
{
"epoch": 1.9707554833468723,
"grad_norm": 0.4182717579515874,
"learning_rate": 3.1704647610955618e-06,
"loss": 0.5297,
"step": 1617
},
{
"epoch": 1.9719740048740861,
"grad_norm": 0.4584959850560608,
"learning_rate": 3.163863787622162e-06,
"loss": 0.6143,
"step": 1618
},
{
"epoch": 1.9731925264012997,
"grad_norm": 0.4458263983902489,
"learning_rate": 3.157266511147783e-06,
"loss": 0.5079,
"step": 1619
},
{
"epoch": 1.9744110479285135,
"grad_norm": 0.43613330489917596,
"learning_rate": 3.150672944955818e-06,
"loss": 0.5714,
"step": 1620
},
{
"epoch": 1.975629569455727,
"grad_norm": 0.3858901721024831,
"learning_rate": 3.1440831023221952e-06,
"loss": 0.5283,
"step": 1621
},
{
"epoch": 1.9768480909829407,
"grad_norm": 0.40043306380620164,
"learning_rate": 3.137496996515339e-06,
"loss": 0.5618,
"step": 1622
},
{
"epoch": 1.9780666125101543,
"grad_norm": 0.4155561403542389,
"learning_rate": 3.1309146407961565e-06,
"loss": 0.5793,
"step": 1623
},
{
"epoch": 1.9792851340373678,
"grad_norm": 0.48452491106537393,
"learning_rate": 3.1243360484180012e-06,
"loss": 0.5955,
"step": 1624
},
{
"epoch": 1.9805036555645816,
"grad_norm": 0.4052625054606517,
"learning_rate": 3.117761232626648e-06,
"loss": 0.5113,
"step": 1625
},
{
"epoch": 1.9817221770917954,
"grad_norm": 0.42003854375542504,
"learning_rate": 3.111190206660273e-06,
"loss": 0.5462,
"step": 1626
},
{
"epoch": 1.982940698619009,
"grad_norm": 0.425058799574285,
"learning_rate": 3.1046229837494123e-06,
"loss": 0.5244,
"step": 1627
},
{
"epoch": 1.9841592201462226,
"grad_norm": 0.4113830672023175,
"learning_rate": 3.0980595771169543e-06,
"loss": 0.5297,
"step": 1628
},
{
"epoch": 1.9853777416734362,
"grad_norm": 0.4015669403567964,
"learning_rate": 3.091499999978097e-06,
"loss": 0.5261,
"step": 1629
},
{
"epoch": 1.9865962632006497,
"grad_norm": 0.4283955622994288,
"learning_rate": 3.0849442655403315e-06,
"loss": 0.5755,
"step": 1630
},
{
"epoch": 1.9878147847278635,
"grad_norm": 0.41898536957171045,
"learning_rate": 3.0783923870034094e-06,
"loss": 0.5468,
"step": 1631
},
{
"epoch": 1.9890333062550771,
"grad_norm": 0.39218815049699407,
"learning_rate": 3.0718443775593233e-06,
"loss": 0.5094,
"step": 1632
},
{
"epoch": 1.990251827782291,
"grad_norm": 0.4279916922662056,
"learning_rate": 3.065300250392265e-06,
"loss": 0.5914,
"step": 1633
},
{
"epoch": 1.9914703493095045,
"grad_norm": 0.41267484908021385,
"learning_rate": 3.058760018678622e-06,
"loss": 0.5182,
"step": 1634
},
{
"epoch": 1.992688870836718,
"grad_norm": 0.44135796853573733,
"learning_rate": 3.0522236955869293e-06,
"loss": 0.5306,
"step": 1635
},
{
"epoch": 1.9939073923639317,
"grad_norm": 0.47924737111572846,
"learning_rate": 3.0456912942778585e-06,
"loss": 0.5286,
"step": 1636
},
{
"epoch": 1.9951259138911455,
"grad_norm": 0.42225974588467396,
"learning_rate": 3.0391628279041797e-06,
"loss": 0.5143,
"step": 1637
},
{
"epoch": 1.996344435418359,
"grad_norm": 0.443505986438843,
"learning_rate": 3.0326383096107424e-06,
"loss": 0.603,
"step": 1638
},
{
"epoch": 1.9975629569455728,
"grad_norm": 0.43300461200802,
"learning_rate": 3.0261177525344458e-06,
"loss": 0.529,
"step": 1639
},
{
"epoch": 1.9987814784727864,
"grad_norm": 0.44310730809971327,
"learning_rate": 3.019601169804216e-06,
"loss": 0.5712,
"step": 1640
},
{
"epoch": 2.0004061738424044,
"grad_norm": 0.9468576561635692,
"learning_rate": 3.0130885745409744e-06,
"loss": 0.9149,
"step": 1641
},
{
"epoch": 2.0016246953696184,
"grad_norm": 0.4706438333540491,
"learning_rate": 3.0065799798576146e-06,
"loss": 0.4931,
"step": 1642
},
{
"epoch": 2.002843216896832,
"grad_norm": 0.4720816823665595,
"learning_rate": 3.0000753988589717e-06,
"loss": 0.4837,
"step": 1643
},
{
"epoch": 2.0040617384240456,
"grad_norm": 0.47408797235071,
"learning_rate": 2.993574844641807e-06,
"loss": 0.4923,
"step": 1644
},
{
"epoch": 2.005280259951259,
"grad_norm": 0.4519771428113875,
"learning_rate": 2.987078330294767e-06,
"loss": 0.5211,
"step": 1645
},
{
"epoch": 2.0064987814784727,
"grad_norm": 0.519886504494184,
"learning_rate": 2.9805858688983656e-06,
"loss": 0.5746,
"step": 1646
},
{
"epoch": 2.0077173030056863,
"grad_norm": 0.42316406477644747,
"learning_rate": 2.9740974735249627e-06,
"loss": 0.4762,
"step": 1647
},
{
"epoch": 2.0089358245329,
"grad_norm": 0.4459084131070543,
"learning_rate": 2.96761315723872e-06,
"loss": 0.517,
"step": 1648
},
{
"epoch": 2.010154346060114,
"grad_norm": 0.4787475311586957,
"learning_rate": 2.961132933095595e-06,
"loss": 0.5475,
"step": 1649
},
{
"epoch": 2.0113728675873275,
"grad_norm": 0.44287904587306054,
"learning_rate": 2.9546568141433007e-06,
"loss": 0.513,
"step": 1650
},
{
"epoch": 2.012591389114541,
"grad_norm": 0.3984732881743886,
"learning_rate": 2.94818481342129e-06,
"loss": 0.5093,
"step": 1651
},
{
"epoch": 2.0138099106417546,
"grad_norm": 0.4399121723080827,
"learning_rate": 2.941716943960716e-06,
"loss": 0.511,
"step": 1652
},
{
"epoch": 2.015028432168968,
"grad_norm": 0.44831302014249225,
"learning_rate": 2.9352532187844254e-06,
"loss": 0.4984,
"step": 1653
},
{
"epoch": 2.016246953696182,
"grad_norm": 0.42996322667286735,
"learning_rate": 2.9287936509069036e-06,
"loss": 0.5191,
"step": 1654
},
{
"epoch": 2.017465475223396,
"grad_norm": 0.4183124555440696,
"learning_rate": 2.9223382533342825e-06,
"loss": 0.545,
"step": 1655
},
{
"epoch": 2.0186839967506094,
"grad_norm": 0.40446852524401855,
"learning_rate": 2.915887039064287e-06,
"loss": 0.503,
"step": 1656
},
{
"epoch": 2.019902518277823,
"grad_norm": 0.46166747184685086,
"learning_rate": 2.9094400210862206e-06,
"loss": 0.5397,
"step": 1657
},
{
"epoch": 2.0211210398050365,
"grad_norm": 0.44952433573058875,
"learning_rate": 2.9029972123809425e-06,
"loss": 0.5055,
"step": 1658
},
{
"epoch": 2.02233956133225,
"grad_norm": 0.4250306396302766,
"learning_rate": 2.8965586259208295e-06,
"loss": 0.521,
"step": 1659
},
{
"epoch": 2.0235580828594637,
"grad_norm": 0.4183203719527761,
"learning_rate": 2.890124274669764e-06,
"loss": 0.4974,
"step": 1660
},
{
"epoch": 2.0247766043866773,
"grad_norm": 0.4411815617611041,
"learning_rate": 2.8836941715830943e-06,
"loss": 0.5129,
"step": 1661
},
{
"epoch": 2.0259951259138913,
"grad_norm": 0.44795060381325624,
"learning_rate": 2.8772683296076197e-06,
"loss": 0.5142,
"step": 1662
},
{
"epoch": 2.027213647441105,
"grad_norm": 0.41579452280547835,
"learning_rate": 2.8708467616815606e-06,
"loss": 0.4951,
"step": 1663
},
{
"epoch": 2.0284321689683185,
"grad_norm": 0.4012708682220002,
"learning_rate": 2.864429480734529e-06,
"loss": 0.512,
"step": 1664
},
{
"epoch": 2.029650690495532,
"grad_norm": 0.4273458204316277,
"learning_rate": 2.858016499687503e-06,
"loss": 0.5401,
"step": 1665
},
{
"epoch": 2.0308692120227456,
"grad_norm": 0.45185500116453636,
"learning_rate": 2.8516078314528082e-06,
"loss": 0.4782,
"step": 1666
},
{
"epoch": 2.032087733549959,
"grad_norm": 0.49808257517442245,
"learning_rate": 2.8452034889340874e-06,
"loss": 0.5078,
"step": 1667
},
{
"epoch": 2.033306255077173,
"grad_norm": 0.4147857957964427,
"learning_rate": 2.838803485026265e-06,
"loss": 0.5092,
"step": 1668
},
{
"epoch": 2.034524776604387,
"grad_norm": 0.4270575824323386,
"learning_rate": 2.8324078326155403e-06,
"loss": 0.5239,
"step": 1669
},
{
"epoch": 2.0357432981316004,
"grad_norm": 0.4396795083275381,
"learning_rate": 2.8260165445793417e-06,
"loss": 0.5106,
"step": 1670
},
{
"epoch": 2.036961819658814,
"grad_norm": 0.42042014253641513,
"learning_rate": 2.819629633786319e-06,
"loss": 0.4699,
"step": 1671
},
{
"epoch": 2.0381803411860275,
"grad_norm": 0.4430726691393502,
"learning_rate": 2.8132471130962997e-06,
"loss": 0.4899,
"step": 1672
},
{
"epoch": 2.039398862713241,
"grad_norm": 0.40352990345385337,
"learning_rate": 2.806868995360278e-06,
"loss": 0.5271,
"step": 1673
},
{
"epoch": 2.0406173842404547,
"grad_norm": 0.4264717440525784,
"learning_rate": 2.800495293420384e-06,
"loss": 0.5358,
"step": 1674
},
{
"epoch": 2.0418359057676687,
"grad_norm": 0.406701314146554,
"learning_rate": 2.7941260201098513e-06,
"loss": 0.5347,
"step": 1675
},
{
"epoch": 2.0430544272948823,
"grad_norm": 0.3789120115073334,
"learning_rate": 2.7877611882529978e-06,
"loss": 0.5291,
"step": 1676
},
{
"epoch": 2.044272948822096,
"grad_norm": 0.376105306550124,
"learning_rate": 2.781400810665201e-06,
"loss": 0.4798,
"step": 1677
},
{
"epoch": 2.0454914703493094,
"grad_norm": 0.41689803288099314,
"learning_rate": 2.775044900152873e-06,
"loss": 0.5603,
"step": 1678
},
{
"epoch": 2.046709991876523,
"grad_norm": 0.39084573445190973,
"learning_rate": 2.7686934695134237e-06,
"loss": 0.5172,
"step": 1679
},
{
"epoch": 2.0479285134037366,
"grad_norm": 0.3983625559018197,
"learning_rate": 2.762346531535246e-06,
"loss": 0.5169,
"step": 1680
},
{
"epoch": 2.0491470349309506,
"grad_norm": 0.4214853676308127,
"learning_rate": 2.7560040989976894e-06,
"loss": 0.4956,
"step": 1681
},
{
"epoch": 2.050365556458164,
"grad_norm": 0.3999213184120299,
"learning_rate": 2.749666184671032e-06,
"loss": 0.4772,
"step": 1682
},
{
"epoch": 2.051584077985378,
"grad_norm": 0.47662956549783836,
"learning_rate": 2.7433328013164493e-06,
"loss": 0.5384,
"step": 1683
},
{
"epoch": 2.0528025995125914,
"grad_norm": 0.41183336935775333,
"learning_rate": 2.737003961686e-06,
"loss": 0.5383,
"step": 1684
},
{
"epoch": 2.054021121039805,
"grad_norm": 0.4162288645157988,
"learning_rate": 2.730679678522592e-06,
"loss": 0.4879,
"step": 1685
},
{
"epoch": 2.0552396425670185,
"grad_norm": 0.4208621537377079,
"learning_rate": 2.724359964559958e-06,
"loss": 0.5302,
"step": 1686
},
{
"epoch": 2.0564581640942325,
"grad_norm": 0.46940669999316137,
"learning_rate": 2.7180448325226283e-06,
"loss": 0.5038,
"step": 1687
},
{
"epoch": 2.057676685621446,
"grad_norm": 0.42207329602275184,
"learning_rate": 2.711734295125913e-06,
"loss": 0.5136,
"step": 1688
},
{
"epoch": 2.0588952071486597,
"grad_norm": 0.42923148362073005,
"learning_rate": 2.705428365075868e-06,
"loss": 0.4974,
"step": 1689
},
{
"epoch": 2.0601137286758733,
"grad_norm": 0.4486303504796547,
"learning_rate": 2.6991270550692794e-06,
"loss": 0.4896,
"step": 1690
},
{
"epoch": 2.061332250203087,
"grad_norm": 0.4450084773476215,
"learning_rate": 2.692830377793614e-06,
"loss": 0.5368,
"step": 1691
},
{
"epoch": 2.0625507717303004,
"grad_norm": 0.41459790491115805,
"learning_rate": 2.686538345927027e-06,
"loss": 0.5181,
"step": 1692
},
{
"epoch": 2.063769293257514,
"grad_norm": 0.39428684445951,
"learning_rate": 2.680250972138314e-06,
"loss": 0.5002,
"step": 1693
},
{
"epoch": 2.064987814784728,
"grad_norm": 0.46824872444922333,
"learning_rate": 2.6739682690868947e-06,
"loss": 0.5303,
"step": 1694
},
{
"epoch": 2.0662063363119416,
"grad_norm": 0.4328466707209096,
"learning_rate": 2.6676902494227795e-06,
"loss": 0.5603,
"step": 1695
},
{
"epoch": 2.067424857839155,
"grad_norm": 0.3839348433674553,
"learning_rate": 2.6614169257865513e-06,
"loss": 0.4682,
"step": 1696
},
{
"epoch": 2.0686433793663688,
"grad_norm": 0.4527195063930813,
"learning_rate": 2.6551483108093378e-06,
"loss": 0.5468,
"step": 1697
},
{
"epoch": 2.0698619008935824,
"grad_norm": 0.3864719481645061,
"learning_rate": 2.6488844171127903e-06,
"loss": 0.4596,
"step": 1698
},
{
"epoch": 2.071080422420796,
"grad_norm": 0.46426343352179134,
"learning_rate": 2.6426252573090437e-06,
"loss": 0.56,
"step": 1699
},
{
"epoch": 2.07229894394801,
"grad_norm": 0.4288286079073576,
"learning_rate": 2.6363708440007136e-06,
"loss": 0.5161,
"step": 1700
},
{
"epoch": 2.0735174654752235,
"grad_norm": 0.40637110346427036,
"learning_rate": 2.6301211897808463e-06,
"loss": 0.5389,
"step": 1701
},
{
"epoch": 2.074735987002437,
"grad_norm": 0.3951734512163483,
"learning_rate": 2.623876307232919e-06,
"loss": 0.526,
"step": 1702
},
{
"epoch": 2.0759545085296507,
"grad_norm": 0.3885989062888163,
"learning_rate": 2.6176362089307873e-06,
"loss": 0.4725,
"step": 1703
},
{
"epoch": 2.0771730300568643,
"grad_norm": 0.43823376795203267,
"learning_rate": 2.611400907438685e-06,
"loss": 0.5124,
"step": 1704
},
{
"epoch": 2.078391551584078,
"grad_norm": 0.39530623859105424,
"learning_rate": 2.6051704153111847e-06,
"loss": 0.4934,
"step": 1705
},
{
"epoch": 2.0796100731112914,
"grad_norm": 0.3703827995949618,
"learning_rate": 2.598944745093174e-06,
"loss": 0.477,
"step": 1706
},
{
"epoch": 2.0808285946385054,
"grad_norm": 0.399727045865617,
"learning_rate": 2.5927239093198273e-06,
"loss": 0.5887,
"step": 1707
},
{
"epoch": 2.082047116165719,
"grad_norm": 0.366776126773729,
"learning_rate": 2.5865079205165953e-06,
"loss": 0.4682,
"step": 1708
},
{
"epoch": 2.0832656376929326,
"grad_norm": 0.4208917158039958,
"learning_rate": 2.5802967911991637e-06,
"loss": 0.5203,
"step": 1709
},
{
"epoch": 2.084484159220146,
"grad_norm": 0.4493230756017366,
"learning_rate": 2.574090533873431e-06,
"loss": 0.5273,
"step": 1710
},
{
"epoch": 2.0857026807473598,
"grad_norm": 0.4464314211780269,
"learning_rate": 2.567889161035494e-06,
"loss": 0.589,
"step": 1711
},
{
"epoch": 2.0869212022745733,
"grad_norm": 0.37995030061127644,
"learning_rate": 2.5616926851716055e-06,
"loss": 0.4443,
"step": 1712
},
{
"epoch": 2.0881397238017874,
"grad_norm": 0.43366495148118606,
"learning_rate": 2.555501118758167e-06,
"loss": 0.5068,
"step": 1713
},
{
"epoch": 2.089358245329001,
"grad_norm": 0.43859783988060524,
"learning_rate": 2.549314474261686e-06,
"loss": 0.5061,
"step": 1714
},
{
"epoch": 2.0905767668562145,
"grad_norm": 0.41699646136345586,
"learning_rate": 2.5431327641387682e-06,
"loss": 0.5149,
"step": 1715
},
{
"epoch": 2.091795288383428,
"grad_norm": 0.4456011191053756,
"learning_rate": 2.5369560008360826e-06,
"loss": 0.521,
"step": 1716
},
{
"epoch": 2.0930138099106417,
"grad_norm": 0.3959554858313093,
"learning_rate": 2.5307841967903337e-06,
"loss": 0.5048,
"step": 1717
},
{
"epoch": 2.0942323314378553,
"grad_norm": 0.41396649662012225,
"learning_rate": 2.52461736442824e-06,
"loss": 0.5162,
"step": 1718
},
{
"epoch": 2.095450852965069,
"grad_norm": 0.42258000014128283,
"learning_rate": 2.518455516166517e-06,
"loss": 0.5517,
"step": 1719
},
{
"epoch": 2.096669374492283,
"grad_norm": 0.39450373632220187,
"learning_rate": 2.512298664411841e-06,
"loss": 0.4964,
"step": 1720
},
{
"epoch": 2.0978878960194964,
"grad_norm": 0.3723209543555369,
"learning_rate": 2.5061468215608243e-06,
"loss": 0.5218,
"step": 1721
},
{
"epoch": 2.09910641754671,
"grad_norm": 0.41901342453157836,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.5245,
"step": 1722
},
{
"epoch": 2.1003249390739236,
"grad_norm": 0.40321870380750857,
"learning_rate": 2.493858212105788e-06,
"loss": 0.5008,
"step": 1723
},
{
"epoch": 2.101543460601137,
"grad_norm": 0.41094871301137714,
"learning_rate": 2.487721470244473e-06,
"loss": 0.5255,
"step": 1724
},
{
"epoch": 2.1027619821283507,
"grad_norm": 0.38906496669749396,
"learning_rate": 2.481589786772178e-06,
"loss": 0.5077,
"step": 1725
},
{
"epoch": 2.1039805036555648,
"grad_norm": 0.4041745743019385,
"learning_rate": 2.4754631740348455e-06,
"loss": 0.5387,
"step": 1726
},
{
"epoch": 2.1051990251827783,
"grad_norm": 0.393377125185753,
"learning_rate": 2.4693416443682074e-06,
"loss": 0.5206,
"step": 1727
},
{
"epoch": 2.106417546709992,
"grad_norm": 0.43228252852381843,
"learning_rate": 2.4632252100977567e-06,
"loss": 0.5457,
"step": 1728
},
{
"epoch": 2.1076360682372055,
"grad_norm": 0.3665069578741783,
"learning_rate": 2.4571138835387293e-06,
"loss": 0.4513,
"step": 1729
},
{
"epoch": 2.108854589764419,
"grad_norm": 0.40575471379817674,
"learning_rate": 2.4510076769960784e-06,
"loss": 0.486,
"step": 1730
},
{
"epoch": 2.1100731112916327,
"grad_norm": 0.43487942611154445,
"learning_rate": 2.4449066027644473e-06,
"loss": 0.542,
"step": 1731
},
{
"epoch": 2.1112916328188467,
"grad_norm": 0.41618547734069145,
"learning_rate": 2.4388106731281496e-06,
"loss": 0.5405,
"step": 1732
},
{
"epoch": 2.1125101543460603,
"grad_norm": 0.37250571753343925,
"learning_rate": 2.4327199003611285e-06,
"loss": 0.5298,
"step": 1733
},
{
"epoch": 2.113728675873274,
"grad_norm": 0.38311544589645186,
"learning_rate": 2.426634296726955e-06,
"loss": 0.4806,
"step": 1734
},
{
"epoch": 2.1149471974004874,
"grad_norm": 0.41137106387325834,
"learning_rate": 2.4205538744787904e-06,
"loss": 0.5201,
"step": 1735
},
{
"epoch": 2.116165718927701,
"grad_norm": 0.39341688405639397,
"learning_rate": 2.4144786458593635e-06,
"loss": 0.4973,
"step": 1736
},
{
"epoch": 2.1173842404549146,
"grad_norm": 0.4261520463046671,
"learning_rate": 2.40840862310094e-06,
"loss": 0.5574,
"step": 1737
},
{
"epoch": 2.118602761982128,
"grad_norm": 0.4270827914830235,
"learning_rate": 2.4023438184253115e-06,
"loss": 0.5011,
"step": 1738
},
{
"epoch": 2.119821283509342,
"grad_norm": 0.37282235236530675,
"learning_rate": 2.3962842440437584e-06,
"loss": 0.4675,
"step": 1739
},
{
"epoch": 2.1210398050365558,
"grad_norm": 0.44674425439539434,
"learning_rate": 2.3902299121570332e-06,
"loss": 0.5741,
"step": 1740
},
{
"epoch": 2.1222583265637693,
"grad_norm": 0.41462268384604345,
"learning_rate": 2.384180834955329e-06,
"loss": 0.4876,
"step": 1741
},
{
"epoch": 2.123476848090983,
"grad_norm": 0.4534841107739354,
"learning_rate": 2.378137024618262e-06,
"loss": 0.5135,
"step": 1742
},
{
"epoch": 2.1246953696181965,
"grad_norm": 0.3978485819501479,
"learning_rate": 2.3720984933148443e-06,
"loss": 0.5208,
"step": 1743
},
{
"epoch": 2.12591389114541,
"grad_norm": 0.37184067874428883,
"learning_rate": 2.366065253203456e-06,
"loss": 0.5007,
"step": 1744
},
{
"epoch": 2.1271324126726237,
"grad_norm": 0.4276632980042562,
"learning_rate": 2.360037316431823e-06,
"loss": 0.5317,
"step": 1745
},
{
"epoch": 2.1283509341998377,
"grad_norm": 0.4617864367024641,
"learning_rate": 2.354014695136997e-06,
"loss": 0.5064,
"step": 1746
},
{
"epoch": 2.1295694557270513,
"grad_norm": 0.3859544135248513,
"learning_rate": 2.3479974014453255e-06,
"loss": 0.4865,
"step": 1747
},
{
"epoch": 2.130787977254265,
"grad_norm": 0.39298854898729213,
"learning_rate": 2.3419854474724284e-06,
"loss": 0.5399,
"step": 1748
},
{
"epoch": 2.1320064987814784,
"grad_norm": 0.4537583444692293,
"learning_rate": 2.3359788453231723e-06,
"loss": 0.5134,
"step": 1749
},
{
"epoch": 2.133225020308692,
"grad_norm": 0.4143013070479495,
"learning_rate": 2.329977607091652e-06,
"loss": 0.5128,
"step": 1750
},
{
"epoch": 2.1344435418359056,
"grad_norm": 0.37360321916937234,
"learning_rate": 2.323981744861162e-06,
"loss": 0.5181,
"step": 1751
},
{
"epoch": 2.1356620633631196,
"grad_norm": 0.4044550908338376,
"learning_rate": 2.317991270704167e-06,
"loss": 0.5197,
"step": 1752
},
{
"epoch": 2.136880584890333,
"grad_norm": 0.4067716384869161,
"learning_rate": 2.3120061966822915e-06,
"loss": 0.4899,
"step": 1753
},
{
"epoch": 2.1380991064175467,
"grad_norm": 0.4141293721178242,
"learning_rate": 2.3060265348462777e-06,
"loss": 0.5499,
"step": 1754
},
{
"epoch": 2.1393176279447603,
"grad_norm": 0.4005349519648887,
"learning_rate": 2.3000522972359803e-06,
"loss": 0.5395,
"step": 1755
},
{
"epoch": 2.140536149471974,
"grad_norm": 0.39248292555402464,
"learning_rate": 2.2940834958803228e-06,
"loss": 0.4931,
"step": 1756
},
{
"epoch": 2.1417546709991875,
"grad_norm": 0.38822332167948,
"learning_rate": 2.2881201427972894e-06,
"loss": 0.4722,
"step": 1757
},
{
"epoch": 2.1429731925264015,
"grad_norm": 0.38077200885472606,
"learning_rate": 2.282162249993895e-06,
"loss": 0.5326,
"step": 1758
},
{
"epoch": 2.144191714053615,
"grad_norm": 0.38524212575031547,
"learning_rate": 2.2762098294661556e-06,
"loss": 0.5109,
"step": 1759
},
{
"epoch": 2.1454102355808287,
"grad_norm": 0.40347299946589055,
"learning_rate": 2.27026289319907e-06,
"loss": 0.5579,
"step": 1760
},
{
"epoch": 2.1466287571080422,
"grad_norm": 0.3924233257276901,
"learning_rate": 2.264321453166598e-06,
"loss": 0.5165,
"step": 1761
},
{
"epoch": 2.147847278635256,
"grad_norm": 0.37575656828172116,
"learning_rate": 2.2583855213316326e-06,
"loss": 0.4895,
"step": 1762
},
{
"epoch": 2.1490658001624694,
"grad_norm": 0.3937169968667044,
"learning_rate": 2.2524551096459703e-06,
"loss": 0.53,
"step": 1763
},
{
"epoch": 2.150284321689683,
"grad_norm": 0.39457712101518455,
"learning_rate": 2.2465302300503012e-06,
"loss": 0.4689,
"step": 1764
},
{
"epoch": 2.151502843216897,
"grad_norm": 0.4393918889033505,
"learning_rate": 2.2406108944741696e-06,
"loss": 0.5178,
"step": 1765
},
{
"epoch": 2.1527213647441106,
"grad_norm": 0.4147988916944454,
"learning_rate": 2.234697114835963e-06,
"loss": 0.5385,
"step": 1766
},
{
"epoch": 2.153939886271324,
"grad_norm": 0.40920290551320604,
"learning_rate": 2.228788903042877e-06,
"loss": 0.5229,
"step": 1767
},
{
"epoch": 2.1551584077985377,
"grad_norm": 0.3841912028808192,
"learning_rate": 2.2228862709909e-06,
"loss": 0.4859,
"step": 1768
},
{
"epoch": 2.1563769293257513,
"grad_norm": 0.4017528120034774,
"learning_rate": 2.2169892305647865e-06,
"loss": 0.5067,
"step": 1769
},
{
"epoch": 2.157595450852965,
"grad_norm": 0.4106356542533839,
"learning_rate": 2.211097793638029e-06,
"loss": 0.511,
"step": 1770
},
{
"epoch": 2.158813972380179,
"grad_norm": 0.3922497877364856,
"learning_rate": 2.2052119720728375e-06,
"loss": 0.5213,
"step": 1771
},
{
"epoch": 2.1600324939073925,
"grad_norm": 0.41142355231120237,
"learning_rate": 2.1993317777201197e-06,
"loss": 0.55,
"step": 1772
},
{
"epoch": 2.161251015434606,
"grad_norm": 0.3793501040249683,
"learning_rate": 2.19345722241945e-06,
"loss": 0.4942,
"step": 1773
},
{
"epoch": 2.1624695369618196,
"grad_norm": 0.4181558266950597,
"learning_rate": 2.1875883179990515e-06,
"loss": 0.5179,
"step": 1774
},
{
"epoch": 2.1636880584890332,
"grad_norm": 0.4101546221225696,
"learning_rate": 2.1817250762757657e-06,
"loss": 0.4854,
"step": 1775
},
{
"epoch": 2.164906580016247,
"grad_norm": 0.40370317236247594,
"learning_rate": 2.175867509055033e-06,
"loss": 0.5675,
"step": 1776
},
{
"epoch": 2.166125101543461,
"grad_norm": 0.34161977537337374,
"learning_rate": 2.170015628130871e-06,
"loss": 0.4693,
"step": 1777
},
{
"epoch": 2.1673436230706744,
"grad_norm": 0.3929226138427561,
"learning_rate": 2.1641694452858486e-06,
"loss": 0.4932,
"step": 1778
},
{
"epoch": 2.168562144597888,
"grad_norm": 0.414139184233712,
"learning_rate": 2.158328972291056e-06,
"loss": 0.5428,
"step": 1779
},
{
"epoch": 2.1697806661251016,
"grad_norm": 0.4021702827253732,
"learning_rate": 2.1524942209060944e-06,
"loss": 0.553,
"step": 1780
},
{
"epoch": 2.170999187652315,
"grad_norm": 0.3914173100634304,
"learning_rate": 2.1466652028790384e-06,
"loss": 0.4846,
"step": 1781
},
{
"epoch": 2.1722177091795287,
"grad_norm": 0.4155952702393289,
"learning_rate": 2.1408419299464245e-06,
"loss": 0.5062,
"step": 1782
},
{
"epoch": 2.1734362307067423,
"grad_norm": 0.4029405381679447,
"learning_rate": 2.1350244138332143e-06,
"loss": 0.5543,
"step": 1783
},
{
"epoch": 2.1746547522339563,
"grad_norm": 0.3847467238608974,
"learning_rate": 2.1292126662527846e-06,
"loss": 0.4783,
"step": 1784
},
{
"epoch": 2.17587327376117,
"grad_norm": 0.3774561138941112,
"learning_rate": 2.1234066989068972e-06,
"loss": 0.5736,
"step": 1785
},
{
"epoch": 2.1770917952883835,
"grad_norm": 0.3791792062977483,
"learning_rate": 2.1176065234856725e-06,
"loss": 0.4782,
"step": 1786
},
{
"epoch": 2.178310316815597,
"grad_norm": 0.40849575227250023,
"learning_rate": 2.111812151667567e-06,
"loss": 0.498,
"step": 1787
},
{
"epoch": 2.1795288383428106,
"grad_norm": 0.36289578272484346,
"learning_rate": 2.106023595119358e-06,
"loss": 0.4866,
"step": 1788
},
{
"epoch": 2.180747359870024,
"grad_norm": 0.3869017558290611,
"learning_rate": 2.1002408654961124e-06,
"loss": 0.4643,
"step": 1789
},
{
"epoch": 2.181965881397238,
"grad_norm": 0.4954162333959639,
"learning_rate": 2.0944639744411627e-06,
"loss": 0.5415,
"step": 1790
},
{
"epoch": 2.183184402924452,
"grad_norm": 0.42167227063607843,
"learning_rate": 2.088692933586083e-06,
"loss": 0.5359,
"step": 1791
},
{
"epoch": 2.1844029244516654,
"grad_norm": 0.38236452945033045,
"learning_rate": 2.0829277545506736e-06,
"loss": 0.4971,
"step": 1792
},
{
"epoch": 2.185621445978879,
"grad_norm": 0.4285834277650781,
"learning_rate": 2.077168448942933e-06,
"loss": 0.5475,
"step": 1793
},
{
"epoch": 2.1868399675060926,
"grad_norm": 0.4023920796181869,
"learning_rate": 2.071415028359026e-06,
"loss": 0.4797,
"step": 1794
},
{
"epoch": 2.188058489033306,
"grad_norm": 0.4074540063011702,
"learning_rate": 2.065667504383276e-06,
"loss": 0.5254,
"step": 1795
},
{
"epoch": 2.1892770105605197,
"grad_norm": 0.39510353720495955,
"learning_rate": 2.0599258885881317e-06,
"loss": 0.4899,
"step": 1796
},
{
"epoch": 2.1904955320877337,
"grad_norm": 0.5548900318530214,
"learning_rate": 2.0541901925341446e-06,
"loss": 0.5198,
"step": 1797
},
{
"epoch": 2.1917140536149473,
"grad_norm": 0.3825302397550243,
"learning_rate": 2.0484604277699437e-06,
"loss": 0.5098,
"step": 1798
},
{
"epoch": 2.192932575142161,
"grad_norm": 0.40564085390755233,
"learning_rate": 2.042736605832222e-06,
"loss": 0.5323,
"step": 1799
},
{
"epoch": 2.1941510966693745,
"grad_norm": 0.39704056600422283,
"learning_rate": 2.037018738245707e-06,
"loss": 0.5108,
"step": 1800
},
{
"epoch": 2.195369618196588,
"grad_norm": 0.41600542688505693,
"learning_rate": 2.0313068365231303e-06,
"loss": 0.4978,
"step": 1801
},
{
"epoch": 2.1965881397238016,
"grad_norm": 0.39387757095797193,
"learning_rate": 2.0256009121652147e-06,
"loss": 0.5273,
"step": 1802
},
{
"epoch": 2.1978066612510156,
"grad_norm": 0.36242925936592724,
"learning_rate": 2.019900976660651e-06,
"loss": 0.4982,
"step": 1803
},
{
"epoch": 2.1990251827782292,
"grad_norm": 0.35509245865119315,
"learning_rate": 2.0142070414860704e-06,
"loss": 0.4878,
"step": 1804
},
{
"epoch": 2.200243704305443,
"grad_norm": 0.38952269804673,
"learning_rate": 2.0085191181060176e-06,
"loss": 0.5369,
"step": 1805
},
{
"epoch": 2.2014622258326564,
"grad_norm": 0.3866235545814812,
"learning_rate": 2.0028372179729405e-06,
"loss": 0.4802,
"step": 1806
},
{
"epoch": 2.20268074735987,
"grad_norm": 0.3922622961361272,
"learning_rate": 1.9971613525271523e-06,
"loss": 0.5284,
"step": 1807
},
{
"epoch": 2.2038992688870835,
"grad_norm": 0.38718886845940065,
"learning_rate": 1.9914915331968217e-06,
"loss": 0.4846,
"step": 1808
},
{
"epoch": 2.205117790414297,
"grad_norm": 0.39994907283167946,
"learning_rate": 1.985827771397938e-06,
"loss": 0.5433,
"step": 1809
},
{
"epoch": 2.206336311941511,
"grad_norm": 0.37905307147188894,
"learning_rate": 1.980170078534297e-06,
"loss": 0.5145,
"step": 1810
},
{
"epoch": 2.2075548334687247,
"grad_norm": 0.4192247244192786,
"learning_rate": 1.9745184659974764e-06,
"loss": 0.5118,
"step": 1811
},
{
"epoch": 2.2087733549959383,
"grad_norm": 0.3522566177407128,
"learning_rate": 1.9688729451668116e-06,
"loss": 0.4751,
"step": 1812
},
{
"epoch": 2.209991876523152,
"grad_norm": 0.3772830661699162,
"learning_rate": 1.9632335274093645e-06,
"loss": 0.4859,
"step": 1813
},
{
"epoch": 2.2112103980503655,
"grad_norm": 0.41564321254010056,
"learning_rate": 1.957600224079917e-06,
"loss": 0.5474,
"step": 1814
},
{
"epoch": 2.212428919577579,
"grad_norm": 0.40463784380961515,
"learning_rate": 1.9519730465209384e-06,
"loss": 0.5135,
"step": 1815
},
{
"epoch": 2.213647441104793,
"grad_norm": 0.42048597280314337,
"learning_rate": 1.9463520060625647e-06,
"loss": 0.51,
"step": 1816
},
{
"epoch": 2.2148659626320066,
"grad_norm": 0.4057972852875916,
"learning_rate": 1.940737114022572e-06,
"loss": 0.5291,
"step": 1817
},
{
"epoch": 2.21608448415922,
"grad_norm": 0.3860966909234876,
"learning_rate": 1.935128381706355e-06,
"loss": 0.4638,
"step": 1818
},
{
"epoch": 2.217303005686434,
"grad_norm": 0.3992185398314597,
"learning_rate": 1.9295258204069116e-06,
"loss": 0.4846,
"step": 1819
},
{
"epoch": 2.2185215272136474,
"grad_norm": 0.4414225404994573,
"learning_rate": 1.9239294414048143e-06,
"loss": 0.5729,
"step": 1820
},
{
"epoch": 2.219740048740861,
"grad_norm": 0.38820997327095225,
"learning_rate": 1.9183392559681812e-06,
"loss": 0.4883,
"step": 1821
},
{
"epoch": 2.2209585702680745,
"grad_norm": 0.3830961932249294,
"learning_rate": 1.9127552753526683e-06,
"loss": 0.4959,
"step": 1822
},
{
"epoch": 2.2221770917952886,
"grad_norm": 0.40371127197935186,
"learning_rate": 1.907177510801431e-06,
"loss": 0.5322,
"step": 1823
},
{
"epoch": 2.223395613322502,
"grad_norm": 0.4422584980389884,
"learning_rate": 1.901605973545116e-06,
"loss": 0.544,
"step": 1824
},
{
"epoch": 2.2246141348497157,
"grad_norm": 0.3671460120788879,
"learning_rate": 1.8960406748018229e-06,
"loss": 0.447,
"step": 1825
},
{
"epoch": 2.2258326563769293,
"grad_norm": 0.41787679838261416,
"learning_rate": 1.8904816257770976e-06,
"loss": 0.4837,
"step": 1826
},
{
"epoch": 2.227051177904143,
"grad_norm": 0.40739753945636065,
"learning_rate": 1.884928837663902e-06,
"loss": 0.5215,
"step": 1827
},
{
"epoch": 2.2282696994313564,
"grad_norm": 0.418750898184457,
"learning_rate": 1.8793823216425872e-06,
"loss": 0.5042,
"step": 1828
},
{
"epoch": 2.2294882209585705,
"grad_norm": 0.42352381285835144,
"learning_rate": 1.8738420888808767e-06,
"loss": 0.5266,
"step": 1829
},
{
"epoch": 2.230706742485784,
"grad_norm": 0.3766712142861196,
"learning_rate": 1.8683081505338468e-06,
"loss": 0.4898,
"step": 1830
},
{
"epoch": 2.2319252640129976,
"grad_norm": 0.3646915065525184,
"learning_rate": 1.8627805177438984e-06,
"loss": 0.5102,
"step": 1831
},
{
"epoch": 2.233143785540211,
"grad_norm": 0.3728256406154505,
"learning_rate": 1.8572592016407337e-06,
"loss": 0.5124,
"step": 1832
},
{
"epoch": 2.234362307067425,
"grad_norm": 0.4171588204608096,
"learning_rate": 1.8517442133413405e-06,
"loss": 0.543,
"step": 1833
},
{
"epoch": 2.2355808285946384,
"grad_norm": 0.38601375413922817,
"learning_rate": 1.8462355639499614e-06,
"loss": 0.4802,
"step": 1834
},
{
"epoch": 2.236799350121852,
"grad_norm": 0.41191751363191975,
"learning_rate": 1.8407332645580805e-06,
"loss": 0.498,
"step": 1835
},
{
"epoch": 2.238017871649066,
"grad_norm": 0.36858808021561745,
"learning_rate": 1.8352373262443918e-06,
"loss": 0.5308,
"step": 1836
},
{
"epoch": 2.2392363931762795,
"grad_norm": 0.39319359572416623,
"learning_rate": 1.8297477600747854e-06,
"loss": 0.5147,
"step": 1837
},
{
"epoch": 2.240454914703493,
"grad_norm": 0.4146287760930502,
"learning_rate": 1.8242645771023205e-06,
"loss": 0.4951,
"step": 1838
},
{
"epoch": 2.2416734362307067,
"grad_norm": 0.42074375031523503,
"learning_rate": 1.8187877883672024e-06,
"loss": 0.5238,
"step": 1839
},
{
"epoch": 2.2428919577579203,
"grad_norm": 0.3933564686288827,
"learning_rate": 1.81331740489676e-06,
"loss": 0.5319,
"step": 1840
},
{
"epoch": 2.244110479285134,
"grad_norm": 0.39931640514560685,
"learning_rate": 1.8078534377054303e-06,
"loss": 0.4921,
"step": 1841
},
{
"epoch": 2.245329000812348,
"grad_norm": 0.43198490048399485,
"learning_rate": 1.8023958977947303e-06,
"loss": 0.55,
"step": 1842
},
{
"epoch": 2.2465475223395615,
"grad_norm": 0.4043002892149948,
"learning_rate": 1.7969447961532333e-06,
"loss": 0.4992,
"step": 1843
},
{
"epoch": 2.247766043866775,
"grad_norm": 0.4118718760088592,
"learning_rate": 1.7915001437565481e-06,
"loss": 0.4981,
"step": 1844
},
{
"epoch": 2.2489845653939886,
"grad_norm": 0.4099852822242898,
"learning_rate": 1.7860619515673034e-06,
"loss": 0.5036,
"step": 1845
},
{
"epoch": 2.250203086921202,
"grad_norm": 0.4061790060023943,
"learning_rate": 1.7806302305351191e-06,
"loss": 0.518,
"step": 1846
},
{
"epoch": 2.2514216084484158,
"grad_norm": 0.38920893319388733,
"learning_rate": 1.7752049915965807e-06,
"loss": 0.5347,
"step": 1847
},
{
"epoch": 2.25264012997563,
"grad_norm": 0.378445552638885,
"learning_rate": 1.7697862456752273e-06,
"loss": 0.4489,
"step": 1848
},
{
"epoch": 2.2538586515028434,
"grad_norm": 0.43624250871726583,
"learning_rate": 1.764374003681526e-06,
"loss": 0.5076,
"step": 1849
},
{
"epoch": 2.255077173030057,
"grad_norm": 0.4301972293121319,
"learning_rate": 1.7589682765128424e-06,
"loss": 0.5106,
"step": 1850
},
{
"epoch": 2.2562956945572705,
"grad_norm": 0.4206525298070121,
"learning_rate": 1.7535690750534268e-06,
"loss": 0.5224,
"step": 1851
},
{
"epoch": 2.257514216084484,
"grad_norm": 0.3846425486629517,
"learning_rate": 1.7481764101743925e-06,
"loss": 0.4962,
"step": 1852
},
{
"epoch": 2.2587327376116977,
"grad_norm": 0.3994656170665921,
"learning_rate": 1.7427902927336932e-06,
"loss": 0.5142,
"step": 1853
},
{
"epoch": 2.2599512591389113,
"grad_norm": 0.4156779226062884,
"learning_rate": 1.7374107335760937e-06,
"loss": 0.5224,
"step": 1854
},
{
"epoch": 2.2611697806661253,
"grad_norm": 0.44546616602212363,
"learning_rate": 1.732037743533156e-06,
"loss": 0.49,
"step": 1855
},
{
"epoch": 2.262388302193339,
"grad_norm": 0.41287367815029863,
"learning_rate": 1.7266713334232177e-06,
"loss": 0.5125,
"step": 1856
},
{
"epoch": 2.2636068237205524,
"grad_norm": 0.4171809094817276,
"learning_rate": 1.7213115140513687e-06,
"loss": 0.4866,
"step": 1857
},
{
"epoch": 2.264825345247766,
"grad_norm": 0.4055921589949916,
"learning_rate": 1.7159582962094224e-06,
"loss": 0.5221,
"step": 1858
},
{
"epoch": 2.2660438667749796,
"grad_norm": 0.37574794466013317,
"learning_rate": 1.710611690675908e-06,
"loss": 0.5475,
"step": 1859
},
{
"epoch": 2.267262388302193,
"grad_norm": 0.3923703686975919,
"learning_rate": 1.7052717082160348e-06,
"loss": 0.502,
"step": 1860
},
{
"epoch": 2.2684809098294068,
"grad_norm": 0.4104360688359129,
"learning_rate": 1.6999383595816816e-06,
"loss": 0.4915,
"step": 1861
},
{
"epoch": 2.2696994313566208,
"grad_norm": 0.42448276721497596,
"learning_rate": 1.694611655511365e-06,
"loss": 0.5187,
"step": 1862
},
{
"epoch": 2.2709179528838344,
"grad_norm": 0.4182208678556554,
"learning_rate": 1.6892916067302279e-06,
"loss": 0.5431,
"step": 1863
},
{
"epoch": 2.272136474411048,
"grad_norm": 0.3809524496490651,
"learning_rate": 1.6839782239500114e-06,
"loss": 0.4962,
"step": 1864
},
{
"epoch": 2.2733549959382615,
"grad_norm": 0.4044653821113984,
"learning_rate": 1.6786715178690372e-06,
"loss": 0.5455,
"step": 1865
},
{
"epoch": 2.274573517465475,
"grad_norm": 0.4017956989739632,
"learning_rate": 1.6733714991721738e-06,
"loss": 0.5124,
"step": 1866
},
{
"epoch": 2.275792038992689,
"grad_norm": 0.3948697674235281,
"learning_rate": 1.668078178530837e-06,
"loss": 0.5121,
"step": 1867
},
{
"epoch": 2.2770105605199027,
"grad_norm": 0.406458521824727,
"learning_rate": 1.6627915666029503e-06,
"loss": 0.5111,
"step": 1868
},
{
"epoch": 2.2782290820471163,
"grad_norm": 0.3778951735019038,
"learning_rate": 1.6575116740329316e-06,
"loss": 0.4983,
"step": 1869
},
{
"epoch": 2.27944760357433,
"grad_norm": 0.3577602501518659,
"learning_rate": 1.6522385114516681e-06,
"loss": 0.4748,
"step": 1870
},
{
"epoch": 2.2806661251015434,
"grad_norm": 0.39147597379268634,
"learning_rate": 1.6469720894764945e-06,
"loss": 0.5167,
"step": 1871
},
{
"epoch": 2.281884646628757,
"grad_norm": 0.41526652630800426,
"learning_rate": 1.6417124187111778e-06,
"loss": 0.4856,
"step": 1872
},
{
"epoch": 2.2831031681559706,
"grad_norm": 0.46476506582341715,
"learning_rate": 1.6364595097458901e-06,
"loss": 0.5541,
"step": 1873
},
{
"epoch": 2.2843216896831846,
"grad_norm": 0.4413380147809054,
"learning_rate": 1.6312133731571867e-06,
"loss": 0.5681,
"step": 1874
},
{
"epoch": 2.285540211210398,
"grad_norm": 0.41316580395580427,
"learning_rate": 1.6259740195079903e-06,
"loss": 0.4902,
"step": 1875
},
{
"epoch": 2.2867587327376118,
"grad_norm": 0.375223833084769,
"learning_rate": 1.6207414593475634e-06,
"loss": 0.5059,
"step": 1876
},
{
"epoch": 2.2879772542648253,
"grad_norm": 0.4191217491734361,
"learning_rate": 1.6155157032114926e-06,
"loss": 0.4903,
"step": 1877
},
{
"epoch": 2.289195775792039,
"grad_norm": 0.4104399365050538,
"learning_rate": 1.610296761621662e-06,
"loss": 0.4978,
"step": 1878
},
{
"epoch": 2.2904142973192525,
"grad_norm": 0.452132557586679,
"learning_rate": 1.6050846450862368e-06,
"loss": 0.5529,
"step": 1879
},
{
"epoch": 2.291632818846466,
"grad_norm": 0.38189023256593707,
"learning_rate": 1.5998793640996418e-06,
"loss": 0.4534,
"step": 1880
},
{
"epoch": 2.29285134037368,
"grad_norm": 0.4105896104811722,
"learning_rate": 1.5946809291425352e-06,
"loss": 0.5157,
"step": 1881
},
{
"epoch": 2.2940698619008937,
"grad_norm": 0.39415858749113886,
"learning_rate": 1.589489350681791e-06,
"loss": 0.504,
"step": 1882
},
{
"epoch": 2.2952883834281073,
"grad_norm": 0.35980183287414685,
"learning_rate": 1.5843046391704802e-06,
"loss": 0.5077,
"step": 1883
},
{
"epoch": 2.296506904955321,
"grad_norm": 0.38396469649464077,
"learning_rate": 1.5791268050478487e-06,
"loss": 0.5051,
"step": 1884
},
{
"epoch": 2.2977254264825344,
"grad_norm": 0.3821275005707828,
"learning_rate": 1.573955858739289e-06,
"loss": 0.5345,
"step": 1885
},
{
"epoch": 2.298943948009748,
"grad_norm": 0.39018524126022086,
"learning_rate": 1.5687918106563326e-06,
"loss": 0.4713,
"step": 1886
},
{
"epoch": 2.3001624695369616,
"grad_norm": 0.4269492782915114,
"learning_rate": 1.5636346711966154e-06,
"loss": 0.5396,
"step": 1887
},
{
"epoch": 2.3013809910641756,
"grad_norm": 0.40024049549443624,
"learning_rate": 1.5584844507438678e-06,
"loss": 0.5119,
"step": 1888
},
{
"epoch": 2.302599512591389,
"grad_norm": 0.3769373466661974,
"learning_rate": 1.5533411596678843e-06,
"loss": 0.4858,
"step": 1889
},
{
"epoch": 2.3038180341186028,
"grad_norm": 0.40957300518215145,
"learning_rate": 1.5482048083245116e-06,
"loss": 0.5299,
"step": 1890
},
{
"epoch": 2.3050365556458163,
"grad_norm": 0.3887283184505036,
"learning_rate": 1.543075407055623e-06,
"loss": 0.5276,
"step": 1891
},
{
"epoch": 2.30625507717303,
"grad_norm": 0.3944358213197462,
"learning_rate": 1.5379529661890956e-06,
"loss": 0.512,
"step": 1892
},
{
"epoch": 2.307473598700244,
"grad_norm": 0.38335955851758596,
"learning_rate": 1.532837496038792e-06,
"loss": 0.4802,
"step": 1893
},
{
"epoch": 2.3086921202274575,
"grad_norm": 0.40294432984687933,
"learning_rate": 1.5277290069045414e-06,
"loss": 0.5171,
"step": 1894
},
{
"epoch": 2.309910641754671,
"grad_norm": 0.39774386298303005,
"learning_rate": 1.5226275090721183e-06,
"loss": 0.4993,
"step": 1895
},
{
"epoch": 2.3111291632818847,
"grad_norm": 0.4200593782198228,
"learning_rate": 1.517533012813217e-06,
"loss": 0.5606,
"step": 1896
},
{
"epoch": 2.3123476848090982,
"grad_norm": 0.4204431416124692,
"learning_rate": 1.512445528385434e-06,
"loss": 0.5538,
"step": 1897
},
{
"epoch": 2.313566206336312,
"grad_norm": 0.33671871586550156,
"learning_rate": 1.5073650660322509e-06,
"loss": 0.4575,
"step": 1898
},
{
"epoch": 2.3147847278635254,
"grad_norm": 0.42276990891040056,
"learning_rate": 1.5022916359830114e-06,
"loss": 0.5744,
"step": 1899
},
{
"epoch": 2.3160032493907394,
"grad_norm": 0.35916851603197664,
"learning_rate": 1.4972252484528938e-06,
"loss": 0.4721,
"step": 1900
},
{
"epoch": 2.317221770917953,
"grad_norm": 0.40308654798116644,
"learning_rate": 1.4921659136429022e-06,
"loss": 0.5283,
"step": 1901
},
{
"epoch": 2.3184402924451666,
"grad_norm": 0.36536184604215355,
"learning_rate": 1.4871136417398407e-06,
"loss": 0.4748,
"step": 1902
},
{
"epoch": 2.31965881397238,
"grad_norm": 0.42296472427505094,
"learning_rate": 1.4820684429162879e-06,
"loss": 0.6,
"step": 1903
},
{
"epoch": 2.3208773354995937,
"grad_norm": 0.3598564220383708,
"learning_rate": 1.477030327330582e-06,
"loss": 0.4422,
"step": 1904
},
{
"epoch": 2.3220958570268073,
"grad_norm": 0.3948776090594206,
"learning_rate": 1.4719993051268023e-06,
"loss": 0.5343,
"step": 1905
},
{
"epoch": 2.323314378554021,
"grad_norm": 0.4028197151468667,
"learning_rate": 1.466975386434744e-06,
"loss": 0.5355,
"step": 1906
},
{
"epoch": 2.324532900081235,
"grad_norm": 0.3958394776231179,
"learning_rate": 1.4619585813699032e-06,
"loss": 0.5119,
"step": 1907
},
{
"epoch": 2.3257514216084485,
"grad_norm": 0.3640898960169131,
"learning_rate": 1.4569489000334435e-06,
"loss": 0.4749,
"step": 1908
},
{
"epoch": 2.326969943135662,
"grad_norm": 0.42215058629769103,
"learning_rate": 1.4519463525121934e-06,
"loss": 0.5157,
"step": 1909
},
{
"epoch": 2.3281884646628757,
"grad_norm": 0.38263832424003186,
"learning_rate": 1.4469509488786165e-06,
"loss": 0.509,
"step": 1910
},
{
"epoch": 2.3294069861900892,
"grad_norm": 0.41466783476778923,
"learning_rate": 1.4419626991907925e-06,
"loss": 0.5222,
"step": 1911
},
{
"epoch": 2.330625507717303,
"grad_norm": 0.3818440846598212,
"learning_rate": 1.436981613492394e-06,
"loss": 0.5153,
"step": 1912
},
{
"epoch": 2.331844029244517,
"grad_norm": 0.3744448281484142,
"learning_rate": 1.4320077018126704e-06,
"loss": 0.4932,
"step": 1913
},
{
"epoch": 2.3330625507717304,
"grad_norm": 0.35650000061694337,
"learning_rate": 1.427040974166427e-06,
"loss": 0.4711,
"step": 1914
},
{
"epoch": 2.334281072298944,
"grad_norm": 0.44375949024201433,
"learning_rate": 1.4220814405540067e-06,
"loss": 0.6081,
"step": 1915
},
{
"epoch": 2.3354995938261576,
"grad_norm": 0.3632417016214969,
"learning_rate": 1.4171291109612618e-06,
"loss": 0.4439,
"step": 1916
},
{
"epoch": 2.336718115353371,
"grad_norm": 0.391995578036673,
"learning_rate": 1.412183995359544e-06,
"loss": 0.5208,
"step": 1917
},
{
"epoch": 2.3379366368805847,
"grad_norm": 0.4106831300879619,
"learning_rate": 1.4072461037056806e-06,
"loss": 0.5185,
"step": 1918
},
{
"epoch": 2.3391551584077988,
"grad_norm": 0.37156715583934985,
"learning_rate": 1.4023154459419497e-06,
"loss": 0.492,
"step": 1919
},
{
"epoch": 2.3403736799350123,
"grad_norm": 0.4194748560314305,
"learning_rate": 1.3973920319960654e-06,
"loss": 0.5387,
"step": 1920
},
{
"epoch": 2.341592201462226,
"grad_norm": 0.4089237007134858,
"learning_rate": 1.3924758717811582e-06,
"loss": 0.5258,
"step": 1921
},
{
"epoch": 2.3428107229894395,
"grad_norm": 0.3694012500954815,
"learning_rate": 1.3875669751957548e-06,
"loss": 0.4604,
"step": 1922
},
{
"epoch": 2.344029244516653,
"grad_norm": 0.4054490233784107,
"learning_rate": 1.3826653521237526e-06,
"loss": 0.5113,
"step": 1923
},
{
"epoch": 2.3452477660438666,
"grad_norm": 0.39405086162290015,
"learning_rate": 1.3777710124344058e-06,
"loss": 0.5753,
"step": 1924
},
{
"epoch": 2.3464662875710802,
"grad_norm": 0.3708763755034275,
"learning_rate": 1.3728839659823045e-06,
"loss": 0.5154,
"step": 1925
},
{
"epoch": 2.3476848090982942,
"grad_norm": 0.3772580609058518,
"learning_rate": 1.3680042226073554e-06,
"loss": 0.4871,
"step": 1926
},
{
"epoch": 2.348903330625508,
"grad_norm": 0.39403743871876235,
"learning_rate": 1.3631317921347564e-06,
"loss": 0.5306,
"step": 1927
},
{
"epoch": 2.3501218521527214,
"grad_norm": 0.3873906953232042,
"learning_rate": 1.358266684374987e-06,
"loss": 0.5123,
"step": 1928
},
{
"epoch": 2.351340373679935,
"grad_norm": 0.37942846145056086,
"learning_rate": 1.3534089091237757e-06,
"loss": 0.5054,
"step": 1929
},
{
"epoch": 2.3525588952071486,
"grad_norm": 0.3607773079027305,
"learning_rate": 1.348558476162094e-06,
"loss": 0.481,
"step": 1930
},
{
"epoch": 2.353777416734362,
"grad_norm": 0.4005225791492384,
"learning_rate": 1.343715395256124e-06,
"loss": 0.5331,
"step": 1931
},
{
"epoch": 2.3549959382615757,
"grad_norm": 0.36782048392498773,
"learning_rate": 1.3388796761572493e-06,
"loss": 0.4872,
"step": 1932
},
{
"epoch": 2.3562144597887897,
"grad_norm": 0.38694592195175675,
"learning_rate": 1.3340513286020307e-06,
"loss": 0.5245,
"step": 1933
},
{
"epoch": 2.3574329813160033,
"grad_norm": 0.3951746347940695,
"learning_rate": 1.3292303623121828e-06,
"loss": 0.5296,
"step": 1934
},
{
"epoch": 2.358651502843217,
"grad_norm": 0.4201044387513612,
"learning_rate": 1.324416786994559e-06,
"loss": 0.5284,
"step": 1935
},
{
"epoch": 2.3598700243704305,
"grad_norm": 0.4088086029813002,
"learning_rate": 1.3196106123411345e-06,
"loss": 0.5212,
"step": 1936
},
{
"epoch": 2.361088545897644,
"grad_norm": 0.38621048916855116,
"learning_rate": 1.3148118480289834e-06,
"loss": 0.5078,
"step": 1937
},
{
"epoch": 2.362307067424858,
"grad_norm": 0.38423444639816645,
"learning_rate": 1.310020503720254e-06,
"loss": 0.5363,
"step": 1938
},
{
"epoch": 2.3635255889520717,
"grad_norm": 0.3807864286378039,
"learning_rate": 1.3052365890621615e-06,
"loss": 0.5349,
"step": 1939
},
{
"epoch": 2.3647441104792852,
"grad_norm": 0.3969153122233334,
"learning_rate": 1.3004601136869555e-06,
"loss": 0.5245,
"step": 1940
},
{
"epoch": 2.365962632006499,
"grad_norm": 0.3706070359987908,
"learning_rate": 1.295691087211912e-06,
"loss": 0.4639,
"step": 1941
},
{
"epoch": 2.3671811535337124,
"grad_norm": 0.4076566869065765,
"learning_rate": 1.2909295192393057e-06,
"loss": 0.5623,
"step": 1942
},
{
"epoch": 2.368399675060926,
"grad_norm": 0.3609321931094141,
"learning_rate": 1.2861754193563948e-06,
"loss": 0.4532,
"step": 1943
},
{
"epoch": 2.3696181965881395,
"grad_norm": 0.3775351942907534,
"learning_rate": 1.2814287971354023e-06,
"loss": 0.5515,
"step": 1944
},
{
"epoch": 2.3708367181153536,
"grad_norm": 0.37390993961577534,
"learning_rate": 1.2766896621334928e-06,
"loss": 0.5097,
"step": 1945
},
{
"epoch": 2.372055239642567,
"grad_norm": 0.37384134850125694,
"learning_rate": 1.2719580238927553e-06,
"loss": 0.5557,
"step": 1946
},
{
"epoch": 2.3732737611697807,
"grad_norm": 0.3775237757952146,
"learning_rate": 1.2672338919401866e-06,
"loss": 0.5197,
"step": 1947
},
{
"epoch": 2.3744922826969943,
"grad_norm": 0.3845839355688612,
"learning_rate": 1.2625172757876691e-06,
"loss": 0.5175,
"step": 1948
},
{
"epoch": 2.375710804224208,
"grad_norm": 0.3930000499088082,
"learning_rate": 1.2578081849319547e-06,
"loss": 0.4908,
"step": 1949
},
{
"epoch": 2.3769293257514215,
"grad_norm": 0.3679733160971826,
"learning_rate": 1.253106628854635e-06,
"loss": 0.4807,
"step": 1950
},
{
"epoch": 2.378147847278635,
"grad_norm": 0.41138237630333274,
"learning_rate": 1.2484126170221388e-06,
"loss": 0.5494,
"step": 1951
},
{
"epoch": 2.379366368805849,
"grad_norm": 0.3644013554869518,
"learning_rate": 1.2437261588857037e-06,
"loss": 0.4715,
"step": 1952
},
{
"epoch": 2.3805848903330626,
"grad_norm": 0.3754357671998505,
"learning_rate": 1.2390472638813572e-06,
"loss": 0.5106,
"step": 1953
},
{
"epoch": 2.381803411860276,
"grad_norm": 0.4210050485232648,
"learning_rate": 1.2343759414298955e-06,
"loss": 0.5755,
"step": 1954
},
{
"epoch": 2.38302193338749,
"grad_norm": 0.3648248351254956,
"learning_rate": 1.229712200936874e-06,
"loss": 0.4928,
"step": 1955
},
{
"epoch": 2.3842404549147034,
"grad_norm": 0.34960648428885643,
"learning_rate": 1.2250560517925747e-06,
"loss": 0.4643,
"step": 1956
},
{
"epoch": 2.385458976441917,
"grad_norm": 0.3790545819611439,
"learning_rate": 1.2204075033720025e-06,
"loss": 0.4949,
"step": 1957
},
{
"epoch": 2.386677497969131,
"grad_norm": 0.3711764526859297,
"learning_rate": 1.2157665650348516e-06,
"loss": 0.4838,
"step": 1958
},
{
"epoch": 2.3878960194963446,
"grad_norm": 0.4040275413347584,
"learning_rate": 1.211133246125497e-06,
"loss": 0.5255,
"step": 1959
},
{
"epoch": 2.389114541023558,
"grad_norm": 0.40676437477424116,
"learning_rate": 1.2065075559729749e-06,
"loss": 0.5417,
"step": 1960
},
{
"epoch": 2.3903330625507717,
"grad_norm": 0.3828497235073974,
"learning_rate": 1.201889503890955e-06,
"loss": 0.5003,
"step": 1961
},
{
"epoch": 2.3915515840779853,
"grad_norm": 0.40024563629448995,
"learning_rate": 1.197279099177731e-06,
"loss": 0.5627,
"step": 1962
},
{
"epoch": 2.392770105605199,
"grad_norm": 0.3508330713937016,
"learning_rate": 1.1926763511161993e-06,
"loss": 0.4607,
"step": 1963
},
{
"epoch": 2.393988627132413,
"grad_norm": 0.4280688001820654,
"learning_rate": 1.188081268973842e-06,
"loss": 0.5389,
"step": 1964
},
{
"epoch": 2.3952071486596265,
"grad_norm": 0.3853051982812674,
"learning_rate": 1.183493862002702e-06,
"loss": 0.4576,
"step": 1965
},
{
"epoch": 2.39642567018684,
"grad_norm": 0.4045347572603379,
"learning_rate": 1.1789141394393683e-06,
"loss": 0.5698,
"step": 1966
},
{
"epoch": 2.3976441917140536,
"grad_norm": 0.3983176516664499,
"learning_rate": 1.1743421105049612e-06,
"loss": 0.4725,
"step": 1967
},
{
"epoch": 2.398862713241267,
"grad_norm": 0.39056153824479495,
"learning_rate": 1.1697777844051105e-06,
"loss": 0.5365,
"step": 1968
},
{
"epoch": 2.400081234768481,
"grad_norm": 0.38372959754674263,
"learning_rate": 1.165221170329931e-06,
"loss": 0.5051,
"step": 1969
},
{
"epoch": 2.4012997562956944,
"grad_norm": 0.3938860376877866,
"learning_rate": 1.1606722774540146e-06,
"loss": 0.4948,
"step": 1970
},
{
"epoch": 2.4025182778229084,
"grad_norm": 0.38806865450446354,
"learning_rate": 1.1561311149364075e-06,
"loss": 0.5132,
"step": 1971
},
{
"epoch": 2.403736799350122,
"grad_norm": 0.41405852900105905,
"learning_rate": 1.1515976919205869e-06,
"loss": 0.5287,
"step": 1972
},
{
"epoch": 2.4049553208773355,
"grad_norm": 0.41486030319490325,
"learning_rate": 1.1470720175344473e-06,
"loss": 0.4826,
"step": 1973
},
{
"epoch": 2.406173842404549,
"grad_norm": 0.4073479693226464,
"learning_rate": 1.1425541008902852e-06,
"loss": 0.5061,
"step": 1974
},
{
"epoch": 2.4073923639317627,
"grad_norm": 0.3698628863486537,
"learning_rate": 1.1380439510847757e-06,
"loss": 0.4822,
"step": 1975
},
{
"epoch": 2.4086108854589763,
"grad_norm": 0.40735700660846696,
"learning_rate": 1.1335415771989538e-06,
"loss": 0.5198,
"step": 1976
},
{
"epoch": 2.40982940698619,
"grad_norm": 0.39248680656683244,
"learning_rate": 1.1290469882981987e-06,
"loss": 0.5513,
"step": 1977
},
{
"epoch": 2.411047928513404,
"grad_norm": 0.36602544238604245,
"learning_rate": 1.1245601934322148e-06,
"loss": 0.5042,
"step": 1978
},
{
"epoch": 2.4122664500406175,
"grad_norm": 0.3765733599147946,
"learning_rate": 1.1200812016350172e-06,
"loss": 0.5031,
"step": 1979
},
{
"epoch": 2.413484971567831,
"grad_norm": 0.34319770025526913,
"learning_rate": 1.1156100219249022e-06,
"loss": 0.5049,
"step": 1980
},
{
"epoch": 2.4147034930950446,
"grad_norm": 0.4344345273336274,
"learning_rate": 1.1111466633044448e-06,
"loss": 0.6097,
"step": 1981
},
{
"epoch": 2.415922014622258,
"grad_norm": 0.4011208632543373,
"learning_rate": 1.1066911347604653e-06,
"loss": 0.4408,
"step": 1982
},
{
"epoch": 2.417140536149472,
"grad_norm": 0.3602299607841865,
"learning_rate": 1.1022434452640252e-06,
"loss": 0.4878,
"step": 1983
},
{
"epoch": 2.418359057676686,
"grad_norm": 0.3938174382324399,
"learning_rate": 1.0978036037703955e-06,
"loss": 0.5246,
"step": 1984
},
{
"epoch": 2.4195775792038994,
"grad_norm": 0.40627392150384173,
"learning_rate": 1.0933716192190502e-06,
"loss": 0.5191,
"step": 1985
},
{
"epoch": 2.420796100731113,
"grad_norm": 0.3960581508676551,
"learning_rate": 1.0889475005336447e-06,
"loss": 0.4755,
"step": 1986
},
{
"epoch": 2.4220146222583265,
"grad_norm": 0.3909127795418108,
"learning_rate": 1.0845312566219924e-06,
"loss": 0.5128,
"step": 1987
},
{
"epoch": 2.42323314378554,
"grad_norm": 0.39827799729251556,
"learning_rate": 1.0801228963760518e-06,
"loss": 0.5425,
"step": 1988
},
{
"epoch": 2.4244516653127537,
"grad_norm": 0.37511159704629343,
"learning_rate": 1.075722428671911e-06,
"loss": 0.4761,
"step": 1989
},
{
"epoch": 2.4256701868399677,
"grad_norm": 0.3982398641015277,
"learning_rate": 1.0713298623697654e-06,
"loss": 0.5386,
"step": 1990
},
{
"epoch": 2.4268887083671813,
"grad_norm": 0.3947556572315106,
"learning_rate": 1.0669452063138992e-06,
"loss": 0.4842,
"step": 1991
},
{
"epoch": 2.428107229894395,
"grad_norm": 0.40576214681574757,
"learning_rate": 1.0625684693326727e-06,
"loss": 0.5423,
"step": 1992
},
{
"epoch": 2.4293257514216084,
"grad_norm": 0.40693906455637163,
"learning_rate": 1.0581996602384975e-06,
"loss": 0.5159,
"step": 1993
},
{
"epoch": 2.430544272948822,
"grad_norm": 0.3488770060857356,
"learning_rate": 1.0538387878278283e-06,
"loss": 0.5187,
"step": 1994
},
{
"epoch": 2.4317627944760356,
"grad_norm": 0.4158444319209436,
"learning_rate": 1.0494858608811326e-06,
"loss": 0.5313,
"step": 1995
},
{
"epoch": 2.432981316003249,
"grad_norm": 0.4169970056929273,
"learning_rate": 1.0451408881628855e-06,
"loss": 0.4866,
"step": 1996
},
{
"epoch": 2.434199837530463,
"grad_norm": 0.3823144671122315,
"learning_rate": 1.0408038784215462e-06,
"loss": 0.4871,
"step": 1997
},
{
"epoch": 2.435418359057677,
"grad_norm": 0.38435379244248535,
"learning_rate": 1.0364748403895368e-06,
"loss": 0.5276,
"step": 1998
},
{
"epoch": 2.4366368805848904,
"grad_norm": 0.39978814004238356,
"learning_rate": 1.0321537827832311e-06,
"loss": 0.5374,
"step": 1999
},
{
"epoch": 2.437855402112104,
"grad_norm": 0.3858037469760484,
"learning_rate": 1.0278407143029346e-06,
"loss": 0.4967,
"step": 2000
},
{
"epoch": 2.4390739236393175,
"grad_norm": 0.36427416648269983,
"learning_rate": 1.0235356436328675e-06,
"loss": 0.5147,
"step": 2001
},
{
"epoch": 2.440292445166531,
"grad_norm": 0.42530639031865014,
"learning_rate": 1.019238579441148e-06,
"loss": 0.4949,
"step": 2002
},
{
"epoch": 2.4415109666937447,
"grad_norm": 0.4047119130435624,
"learning_rate": 1.014949530379767e-06,
"loss": 0.491,
"step": 2003
},
{
"epoch": 2.4427294882209587,
"grad_norm": 0.38798880943669517,
"learning_rate": 1.0106685050845838e-06,
"loss": 0.5433,
"step": 2004
},
{
"epoch": 2.4439480097481723,
"grad_norm": 0.4009909590310544,
"learning_rate": 1.0063955121752999e-06,
"loss": 0.5113,
"step": 2005
},
{
"epoch": 2.445166531275386,
"grad_norm": 0.35796810794196016,
"learning_rate": 1.0021305602554459e-06,
"loss": 0.5113,
"step": 2006
},
{
"epoch": 2.4463850528025994,
"grad_norm": 0.3755800564081513,
"learning_rate": 9.978736579123577e-07,
"loss": 0.5004,
"step": 2007
},
{
"epoch": 2.447603574329813,
"grad_norm": 0.3683007765287416,
"learning_rate": 9.936248137171684e-07,
"loss": 0.4974,
"step": 2008
},
{
"epoch": 2.448822095857027,
"grad_norm": 0.3952937239349372,
"learning_rate": 9.893840362247809e-07,
"loss": 0.4971,
"step": 2009
},
{
"epoch": 2.4500406173842406,
"grad_norm": 0.4284741934979282,
"learning_rate": 9.851513339738627e-07,
"loss": 0.561,
"step": 2010
},
{
"epoch": 2.451259138911454,
"grad_norm": 0.4056720067131079,
"learning_rate": 9.809267154868163e-07,
"loss": 0.5179,
"step": 2011
},
{
"epoch": 2.4524776604386678,
"grad_norm": 0.35255993479863745,
"learning_rate": 9.7671018926977e-07,
"loss": 0.4424,
"step": 2012
},
{
"epoch": 2.4536961819658814,
"grad_norm": 0.40547305130915784,
"learning_rate": 9.725017638125612e-07,
"loss": 0.5524,
"step": 2013
},
{
"epoch": 2.454914703493095,
"grad_norm": 0.35656299866798635,
"learning_rate": 9.683014475887126e-07,
"loss": 0.4676,
"step": 2014
},
{
"epoch": 2.4561332250203085,
"grad_norm": 0.37086670515583137,
"learning_rate": 9.641092490554195e-07,
"loss": 0.5398,
"step": 2015
},
{
"epoch": 2.4573517465475225,
"grad_norm": 0.3791246421084454,
"learning_rate": 9.599251766535344e-07,
"loss": 0.4933,
"step": 2016
},
{
"epoch": 2.458570268074736,
"grad_norm": 0.40912338906986023,
"learning_rate": 9.5574923880755e-07,
"loss": 0.562,
"step": 2017
},
{
"epoch": 2.4597887896019497,
"grad_norm": 0.41181426146954847,
"learning_rate": 9.51581443925576e-07,
"loss": 0.4892,
"step": 2018
},
{
"epoch": 2.4610073111291633,
"grad_norm": 0.4026513287049664,
"learning_rate": 9.474218003993275e-07,
"loss": 0.5278,
"step": 2019
},
{
"epoch": 2.462225832656377,
"grad_norm": 0.38613963899490084,
"learning_rate": 9.432703166041085e-07,
"loss": 0.4996,
"step": 2020
},
{
"epoch": 2.4634443541835904,
"grad_norm": 0.38482050851065364,
"learning_rate": 9.391270008987946e-07,
"loss": 0.5189,
"step": 2021
},
{
"epoch": 2.464662875710804,
"grad_norm": 0.39758696168412205,
"learning_rate": 9.349918616258113e-07,
"loss": 0.5078,
"step": 2022
},
{
"epoch": 2.465881397238018,
"grad_norm": 0.38614047610686164,
"learning_rate": 9.308649071111259e-07,
"loss": 0.4729,
"step": 2023
},
{
"epoch": 2.4670999187652316,
"grad_norm": 0.37024789300038397,
"learning_rate": 9.267461456642235e-07,
"loss": 0.5187,
"step": 2024
},
{
"epoch": 2.468318440292445,
"grad_norm": 0.393874443470908,
"learning_rate": 9.226355855780922e-07,
"loss": 0.5266,
"step": 2025
},
{
"epoch": 2.4695369618196588,
"grad_norm": 0.39515255892810097,
"learning_rate": 9.185332351292059e-07,
"loss": 0.4979,
"step": 2026
},
{
"epoch": 2.4707554833468723,
"grad_norm": 0.36209573390534977,
"learning_rate": 9.144391025775123e-07,
"loss": 0.4685,
"step": 2027
},
{
"epoch": 2.471974004874086,
"grad_norm": 0.3690675939364142,
"learning_rate": 9.10353196166412e-07,
"loss": 0.5109,
"step": 2028
},
{
"epoch": 2.4731925264013,
"grad_norm": 0.4126364096164172,
"learning_rate": 9.0627552412274e-07,
"loss": 0.551,
"step": 2029
},
{
"epoch": 2.4744110479285135,
"grad_norm": 0.39808305056022897,
"learning_rate": 9.022060946567512e-07,
"loss": 0.4829,
"step": 2030
},
{
"epoch": 2.475629569455727,
"grad_norm": 0.3791592284080857,
"learning_rate": 8.981449159621075e-07,
"loss": 0.4993,
"step": 2031
},
{
"epoch": 2.4768480909829407,
"grad_norm": 0.3890390516980624,
"learning_rate": 8.940919962158584e-07,
"loss": 0.5213,
"step": 2032
},
{
"epoch": 2.4780666125101543,
"grad_norm": 0.42524657999992466,
"learning_rate": 8.900473435784196e-07,
"loss": 0.5666,
"step": 2033
},
{
"epoch": 2.479285134037368,
"grad_norm": 0.3815964696084361,
"learning_rate": 8.860109661935673e-07,
"loss": 0.4625,
"step": 2034
},
{
"epoch": 2.480503655564582,
"grad_norm": 0.42469861666467223,
"learning_rate": 8.819828721884094e-07,
"loss": 0.5373,
"step": 2035
},
{
"epoch": 2.4817221770917954,
"grad_norm": 0.38320361649924684,
"learning_rate": 8.779630696733821e-07,
"loss": 0.5375,
"step": 2036
},
{
"epoch": 2.482940698619009,
"grad_norm": 0.3687214848508832,
"learning_rate": 8.739515667422211e-07,
"loss": 0.4435,
"step": 2037
},
{
"epoch": 2.4841592201462226,
"grad_norm": 0.40061711007827416,
"learning_rate": 8.699483714719547e-07,
"loss": 0.5467,
"step": 2038
},
{
"epoch": 2.485377741673436,
"grad_norm": 0.40521522379814523,
"learning_rate": 8.659534919228845e-07,
"loss": 0.536,
"step": 2039
},
{
"epoch": 2.4865962632006497,
"grad_norm": 0.3672113864753048,
"learning_rate": 8.619669361385663e-07,
"loss": 0.4978,
"step": 2040
},
{
"epoch": 2.4878147847278633,
"grad_norm": 0.3620893091593676,
"learning_rate": 8.579887121457952e-07,
"loss": 0.5038,
"step": 2041
},
{
"epoch": 2.4890333062550773,
"grad_norm": 0.3663778220669876,
"learning_rate": 8.540188279545942e-07,
"loss": 0.4862,
"step": 2042
},
{
"epoch": 2.490251827782291,
"grad_norm": 0.38043441191253624,
"learning_rate": 8.500572915581923e-07,
"loss": 0.5152,
"step": 2043
},
{
"epoch": 2.4914703493095045,
"grad_norm": 0.3942635437659399,
"learning_rate": 8.461041109330132e-07,
"loss": 0.5055,
"step": 2044
},
{
"epoch": 2.492688870836718,
"grad_norm": 0.3729970950643679,
"learning_rate": 8.421592940386514e-07,
"loss": 0.5022,
"step": 2045
},
{
"epoch": 2.4939073923639317,
"grad_norm": 0.40364708615741257,
"learning_rate": 8.382228488178639e-07,
"loss": 0.5297,
"step": 2046
},
{
"epoch": 2.4951259138911452,
"grad_norm": 0.3841836875471797,
"learning_rate": 8.342947831965537e-07,
"loss": 0.4594,
"step": 2047
},
{
"epoch": 2.496344435418359,
"grad_norm": 0.39167222446559674,
"learning_rate": 8.3037510508375e-07,
"loss": 0.538,
"step": 2048
},
{
"epoch": 2.497562956945573,
"grad_norm": 0.36017475560838597,
"learning_rate": 8.264638223715916e-07,
"loss": 0.4904,
"step": 2049
},
{
"epoch": 2.4987814784727864,
"grad_norm": 0.38494521342543364,
"learning_rate": 8.225609429353187e-07,
"loss": 0.5098,
"step": 2050
},
{
"epoch": 2.5,
"grad_norm": 0.3915568133305174,
"learning_rate": 8.186664746332457e-07,
"loss": 0.5479,
"step": 2051
},
{
"epoch": 2.5012185215272136,
"grad_norm": 0.3653001722783512,
"learning_rate": 8.147804253067581e-07,
"loss": 0.5505,
"step": 2052
},
{
"epoch": 2.502437043054427,
"grad_norm": 0.38529539896383097,
"learning_rate": 8.109028027802834e-07,
"loss": 0.5075,
"step": 2053
},
{
"epoch": 2.503655564581641,
"grad_norm": 0.32985269566739706,
"learning_rate": 8.070336148612873e-07,
"loss": 0.4737,
"step": 2054
},
{
"epoch": 2.5048740861088543,
"grad_norm": 0.3688596078684635,
"learning_rate": 8.031728693402502e-07,
"loss": 0.4933,
"step": 2055
},
{
"epoch": 2.5060926076360683,
"grad_norm": 0.3574147764462336,
"learning_rate": 7.993205739906551e-07,
"loss": 0.5036,
"step": 2056
},
{
"epoch": 2.507311129163282,
"grad_norm": 0.3933673997370336,
"learning_rate": 7.954767365689675e-07,
"loss": 0.5284,
"step": 2057
},
{
"epoch": 2.5085296506904955,
"grad_norm": 0.3804892757598497,
"learning_rate": 7.916413648146282e-07,
"loss": 0.5314,
"step": 2058
},
{
"epoch": 2.509748172217709,
"grad_norm": 0.3972280482401795,
"learning_rate": 7.878144664500304e-07,
"loss": 0.5042,
"step": 2059
},
{
"epoch": 2.5109666937449227,
"grad_norm": 0.4139170649517036,
"learning_rate": 7.839960491805048e-07,
"loss": 0.513,
"step": 2060
},
{
"epoch": 2.5121852152721367,
"grad_norm": 0.3682306221980358,
"learning_rate": 7.80186120694309e-07,
"loss": 0.5082,
"step": 2061
},
{
"epoch": 2.5134037367993503,
"grad_norm": 0.40743403282060575,
"learning_rate": 7.763846886626048e-07,
"loss": 0.4982,
"step": 2062
},
{
"epoch": 2.514622258326564,
"grad_norm": 0.3807959558438016,
"learning_rate": 7.725917607394512e-07,
"loss": 0.4893,
"step": 2063
},
{
"epoch": 2.5158407798537774,
"grad_norm": 0.3774151979891591,
"learning_rate": 7.6880734456178e-07,
"loss": 0.5308,
"step": 2064
},
{
"epoch": 2.517059301380991,
"grad_norm": 0.39200277210093626,
"learning_rate": 7.650314477493875e-07,
"loss": 0.5221,
"step": 2065
},
{
"epoch": 2.5182778229082046,
"grad_norm": 0.3987158423288902,
"learning_rate": 7.612640779049174e-07,
"loss": 0.5387,
"step": 2066
},
{
"epoch": 2.519496344435418,
"grad_norm": 0.3432299316334845,
"learning_rate": 7.575052426138424e-07,
"loss": 0.448,
"step": 2067
},
{
"epoch": 2.520714865962632,
"grad_norm": 0.40306877146829656,
"learning_rate": 7.537549494444502e-07,
"loss": 0.5319,
"step": 2068
},
{
"epoch": 2.5219333874898457,
"grad_norm": 0.3624054180666312,
"learning_rate": 7.500132059478327e-07,
"loss": 0.4755,
"step": 2069
},
{
"epoch": 2.5231519090170593,
"grad_norm": 0.3943720013643357,
"learning_rate": 7.462800196578662e-07,
"loss": 0.5517,
"step": 2070
},
{
"epoch": 2.524370430544273,
"grad_norm": 0.3760692184644974,
"learning_rate": 7.425553980911959e-07,
"loss": 0.5198,
"step": 2071
},
{
"epoch": 2.5255889520714865,
"grad_norm": 0.36875663183404517,
"learning_rate": 7.388393487472223e-07,
"loss": 0.5099,
"step": 2072
},
{
"epoch": 2.5268074735987005,
"grad_norm": 0.3765385801804941,
"learning_rate": 7.351318791080881e-07,
"loss": 0.4877,
"step": 2073
},
{
"epoch": 2.5280259951259136,
"grad_norm": 0.3880867322532877,
"learning_rate": 7.314329966386596e-07,
"loss": 0.5191,
"step": 2074
},
{
"epoch": 2.5292445166531277,
"grad_norm": 0.38480762192630874,
"learning_rate": 7.277427087865124e-07,
"loss": 0.5367,
"step": 2075
},
{
"epoch": 2.5304630381803412,
"grad_norm": 0.37367690807549686,
"learning_rate": 7.240610229819195e-07,
"loss": 0.4796,
"step": 2076
},
{
"epoch": 2.531681559707555,
"grad_norm": 0.356459470205227,
"learning_rate": 7.203879466378311e-07,
"loss": 0.4846,
"step": 2077
},
{
"epoch": 2.5329000812347684,
"grad_norm": 0.368312803237026,
"learning_rate": 7.167234871498646e-07,
"loss": 0.512,
"step": 2078
},
{
"epoch": 2.534118602761982,
"grad_norm": 0.42790949260764394,
"learning_rate": 7.130676518962859e-07,
"loss": 0.5199,
"step": 2079
},
{
"epoch": 2.535337124289196,
"grad_norm": 0.3760245356587111,
"learning_rate": 7.094204482379985e-07,
"loss": 0.5206,
"step": 2080
},
{
"epoch": 2.5365556458164096,
"grad_norm": 0.36529563925832975,
"learning_rate": 7.057818835185243e-07,
"loss": 0.5169,
"step": 2081
},
{
"epoch": 2.537774167343623,
"grad_norm": 0.37415123963436103,
"learning_rate": 7.021519650639952e-07,
"loss": 0.4682,
"step": 2082
},
{
"epoch": 2.5389926888708367,
"grad_norm": 0.3599256024573686,
"learning_rate": 6.985307001831266e-07,
"loss": 0.5237,
"step": 2083
},
{
"epoch": 2.5402112103980503,
"grad_norm": 0.37172969261280475,
"learning_rate": 6.949180961672159e-07,
"loss": 0.5229,
"step": 2084
},
{
"epoch": 2.541429731925264,
"grad_norm": 0.3692464609849223,
"learning_rate": 6.913141602901213e-07,
"loss": 0.4967,
"step": 2085
},
{
"epoch": 2.5426482534524775,
"grad_norm": 0.41481021551912467,
"learning_rate": 6.877188998082484e-07,
"loss": 0.5364,
"step": 2086
},
{
"epoch": 2.5438667749796915,
"grad_norm": 0.3587567944310898,
"learning_rate": 6.841323219605333e-07,
"loss": 0.477,
"step": 2087
},
{
"epoch": 2.545085296506905,
"grad_norm": 0.36227017983644627,
"learning_rate": 6.805544339684295e-07,
"loss": 0.5186,
"step": 2088
},
{
"epoch": 2.5463038180341186,
"grad_norm": 0.3848961894752312,
"learning_rate": 6.769852430358969e-07,
"loss": 0.494,
"step": 2089
},
{
"epoch": 2.5475223395613322,
"grad_norm": 0.400827672871941,
"learning_rate": 6.734247563493829e-07,
"loss": 0.5104,
"step": 2090
},
{
"epoch": 2.548740861088546,
"grad_norm": 0.3858206572812583,
"learning_rate": 6.698729810778065e-07,
"loss": 0.5203,
"step": 2091
},
{
"epoch": 2.5499593826157594,
"grad_norm": 0.39420570104347397,
"learning_rate": 6.663299243725512e-07,
"loss": 0.514,
"step": 2092
},
{
"epoch": 2.551177904142973,
"grad_norm": 0.37623344903141814,
"learning_rate": 6.627955933674412e-07,
"loss": 0.4675,
"step": 2093
},
{
"epoch": 2.552396425670187,
"grad_norm": 0.37984856280561025,
"learning_rate": 6.592699951787362e-07,
"loss": 0.5349,
"step": 2094
},
{
"epoch": 2.5536149471974006,
"grad_norm": 0.38942296808421134,
"learning_rate": 6.55753136905109e-07,
"loss": 0.5222,
"step": 2095
},
{
"epoch": 2.554833468724614,
"grad_norm": 0.38744941426091656,
"learning_rate": 6.522450256276363e-07,
"loss": 0.4997,
"step": 2096
},
{
"epoch": 2.5560519902518277,
"grad_norm": 0.40862429991424404,
"learning_rate": 6.487456684097848e-07,
"loss": 0.5409,
"step": 2097
},
{
"epoch": 2.5572705117790413,
"grad_norm": 0.37635062650001033,
"learning_rate": 6.452550722973927e-07,
"loss": 0.4627,
"step": 2098
},
{
"epoch": 2.5584890333062553,
"grad_norm": 0.4221777822228316,
"learning_rate": 6.417732443186575e-07,
"loss": 0.5358,
"step": 2099
},
{
"epoch": 2.5597075548334685,
"grad_norm": 0.39847174733267055,
"learning_rate": 6.383001914841252e-07,
"loss": 0.5012,
"step": 2100
},
{
"epoch": 2.5609260763606825,
"grad_norm": 0.3748715416676312,
"learning_rate": 6.348359207866722e-07,
"loss": 0.4956,
"step": 2101
},
{
"epoch": 2.562144597887896,
"grad_norm": 0.37750025006496746,
"learning_rate": 6.313804392014905e-07,
"loss": 0.4854,
"step": 2102
},
{
"epoch": 2.5633631194151096,
"grad_norm": 0.3998375296968308,
"learning_rate": 6.279337536860786e-07,
"loss": 0.5143,
"step": 2103
},
{
"epoch": 2.564581640942323,
"grad_norm": 0.3710721048856582,
"learning_rate": 6.244958711802213e-07,
"loss": 0.5591,
"step": 2104
},
{
"epoch": 2.565800162469537,
"grad_norm": 0.34868738151134687,
"learning_rate": 6.210667986059821e-07,
"loss": 0.4551,
"step": 2105
},
{
"epoch": 2.567018683996751,
"grad_norm": 0.35595641503961983,
"learning_rate": 6.17646542867682e-07,
"loss": 0.5152,
"step": 2106
},
{
"epoch": 2.5682372055239644,
"grad_norm": 0.36663979047928985,
"learning_rate": 6.142351108518929e-07,
"loss": 0.503,
"step": 2107
},
{
"epoch": 2.569455727051178,
"grad_norm": 0.34787252687208675,
"learning_rate": 6.108325094274209e-07,
"loss": 0.5031,
"step": 2108
},
{
"epoch": 2.5706742485783916,
"grad_norm": 0.39033263561688103,
"learning_rate": 6.074387454452891e-07,
"loss": 0.5214,
"step": 2109
},
{
"epoch": 2.571892770105605,
"grad_norm": 0.38512927731883373,
"learning_rate": 6.040538257387268e-07,
"loss": 0.5198,
"step": 2110
},
{
"epoch": 2.5731112916328187,
"grad_norm": 0.3590301126097114,
"learning_rate": 6.006777571231587e-07,
"loss": 0.5027,
"step": 2111
},
{
"epoch": 2.5743298131600323,
"grad_norm": 0.3732504638805604,
"learning_rate": 5.973105463961864e-07,
"loss": 0.5066,
"step": 2112
},
{
"epoch": 2.5755483346872463,
"grad_norm": 0.3729739011338398,
"learning_rate": 5.939522003375753e-07,
"loss": 0.4958,
"step": 2113
},
{
"epoch": 2.57676685621446,
"grad_norm": 0.37186730911837346,
"learning_rate": 5.906027257092444e-07,
"loss": 0.4761,
"step": 2114
},
{
"epoch": 2.5779853777416735,
"grad_norm": 0.3661760756265481,
"learning_rate": 5.872621292552477e-07,
"loss": 0.5327,
"step": 2115
},
{
"epoch": 2.579203899268887,
"grad_norm": 0.40542839626324956,
"learning_rate": 5.839304177017663e-07,
"loss": 0.5512,
"step": 2116
},
{
"epoch": 2.5804224207961006,
"grad_norm": 0.3840467276263846,
"learning_rate": 5.806075977570886e-07,
"loss": 0.4793,
"step": 2117
},
{
"epoch": 2.5816409423233146,
"grad_norm": 0.37820337565321277,
"learning_rate": 5.772936761116027e-07,
"loss": 0.506,
"step": 2118
},
{
"epoch": 2.582859463850528,
"grad_norm": 0.3797306170789339,
"learning_rate": 5.739886594377803e-07,
"loss": 0.508,
"step": 2119
},
{
"epoch": 2.584077985377742,
"grad_norm": 0.3828935693851265,
"learning_rate": 5.706925543901609e-07,
"loss": 0.5097,
"step": 2120
},
{
"epoch": 2.5852965069049554,
"grad_norm": 0.3900080504691436,
"learning_rate": 5.674053676053415e-07,
"loss": 0.5168,
"step": 2121
},
{
"epoch": 2.586515028432169,
"grad_norm": 0.3587725291460617,
"learning_rate": 5.641271057019637e-07,
"loss": 0.4565,
"step": 2122
},
{
"epoch": 2.5877335499593825,
"grad_norm": 0.3939424632788925,
"learning_rate": 5.608577752806987e-07,
"loss": 0.5494,
"step": 2123
},
{
"epoch": 2.588952071486596,
"grad_norm": 0.3725432276278501,
"learning_rate": 5.575973829242365e-07,
"loss": 0.4588,
"step": 2124
},
{
"epoch": 2.59017059301381,
"grad_norm": 0.38604468058456287,
"learning_rate": 5.543459351972635e-07,
"loss": 0.529,
"step": 2125
},
{
"epoch": 2.5913891145410237,
"grad_norm": 0.36341318860508387,
"learning_rate": 5.511034386464642e-07,
"loss": 0.494,
"step": 2126
},
{
"epoch": 2.5926076360682373,
"grad_norm": 0.35625493095798805,
"learning_rate": 5.478698998004967e-07,
"loss": 0.5456,
"step": 2127
},
{
"epoch": 2.593826157595451,
"grad_norm": 0.36227564286221264,
"learning_rate": 5.446453251699851e-07,
"loss": 0.514,
"step": 2128
},
{
"epoch": 2.5950446791226645,
"grad_norm": 0.3662431166869742,
"learning_rate": 5.414297212475012e-07,
"loss": 0.5157,
"step": 2129
},
{
"epoch": 2.596263200649878,
"grad_norm": 0.3558072452798451,
"learning_rate": 5.382230945075556e-07,
"loss": 0.4961,
"step": 2130
},
{
"epoch": 2.5974817221770916,
"grad_norm": 0.3795263836967965,
"learning_rate": 5.350254514065856e-07,
"loss": 0.5127,
"step": 2131
},
{
"epoch": 2.5987002437043056,
"grad_norm": 0.3690040036136185,
"learning_rate": 5.318367983829393e-07,
"loss": 0.4908,
"step": 2132
},
{
"epoch": 2.599918765231519,
"grad_norm": 0.3608821461773019,
"learning_rate": 5.286571418568615e-07,
"loss": 0.5289,
"step": 2133
},
{
"epoch": 2.601137286758733,
"grad_norm": 0.4006495491671045,
"learning_rate": 5.254864882304855e-07,
"loss": 0.5254,
"step": 2134
},
{
"epoch": 2.6023558082859464,
"grad_norm": 0.38150929128537214,
"learning_rate": 5.223248438878176e-07,
"loss": 0.4622,
"step": 2135
},
{
"epoch": 2.60357432981316,
"grad_norm": 0.400783680185111,
"learning_rate": 5.191722151947227e-07,
"loss": 0.5474,
"step": 2136
},
{
"epoch": 2.6047928513403735,
"grad_norm": 0.3662412318337768,
"learning_rate": 5.160286084989119e-07,
"loss": 0.536,
"step": 2137
},
{
"epoch": 2.606011372867587,
"grad_norm": 0.37308572148487257,
"learning_rate": 5.128940301299334e-07,
"loss": 0.4731,
"step": 2138
},
{
"epoch": 2.607229894394801,
"grad_norm": 0.39187078975715245,
"learning_rate": 5.097684863991575e-07,
"loss": 0.5249,
"step": 2139
},
{
"epoch": 2.6084484159220147,
"grad_norm": 0.3885064721528569,
"learning_rate": 5.066519835997613e-07,
"loss": 0.5225,
"step": 2140
},
{
"epoch": 2.6096669374492283,
"grad_norm": 0.41543896829402627,
"learning_rate": 5.03544528006718e-07,
"loss": 0.5476,
"step": 2141
},
{
"epoch": 2.610885458976442,
"grad_norm": 0.33915812403705176,
"learning_rate": 5.004461258767873e-07,
"loss": 0.4825,
"step": 2142
},
{
"epoch": 2.6121039805036554,
"grad_norm": 0.39963867108256157,
"learning_rate": 4.973567834484988e-07,
"loss": 0.4868,
"step": 2143
},
{
"epoch": 2.6133225020308695,
"grad_norm": 0.4052069661227251,
"learning_rate": 4.942765069421384e-07,
"loss": 0.5707,
"step": 2144
},
{
"epoch": 2.6145410235580826,
"grad_norm": 0.3744715850855104,
"learning_rate": 4.91205302559743e-07,
"loss": 0.4698,
"step": 2145
},
{
"epoch": 2.6157595450852966,
"grad_norm": 0.39172802789195654,
"learning_rate": 4.881431764850775e-07,
"loss": 0.5429,
"step": 2146
},
{
"epoch": 2.61697806661251,
"grad_norm": 0.3617617734796279,
"learning_rate": 4.850901348836328e-07,
"loss": 0.5195,
"step": 2147
},
{
"epoch": 2.618196588139724,
"grad_norm": 0.3582182319101665,
"learning_rate": 4.820461839026047e-07,
"loss": 0.5237,
"step": 2148
},
{
"epoch": 2.6194151096669374,
"grad_norm": 0.382565389265081,
"learning_rate": 4.79011329670887e-07,
"loss": 0.508,
"step": 2149
},
{
"epoch": 2.620633631194151,
"grad_norm": 0.36371999280375944,
"learning_rate": 4.7598557829905913e-07,
"loss": 0.5138,
"step": 2150
},
{
"epoch": 2.621852152721365,
"grad_norm": 0.36372813807546805,
"learning_rate": 4.729689358793693e-07,
"loss": 0.4863,
"step": 2151
},
{
"epoch": 2.6230706742485785,
"grad_norm": 0.4358272328748702,
"learning_rate": 4.699614084857257e-07,
"loss": 0.5501,
"step": 2152
},
{
"epoch": 2.624289195775792,
"grad_norm": 0.40082789201202496,
"learning_rate": 4.669630021736854e-07,
"loss": 0.4957,
"step": 2153
},
{
"epoch": 2.6255077173030057,
"grad_norm": 0.38531826765138316,
"learning_rate": 4.639737229804403e-07,
"loss": 0.5189,
"step": 2154
},
{
"epoch": 2.6267262388302193,
"grad_norm": 0.3510117904392168,
"learning_rate": 4.609935769248025e-07,
"loss": 0.4438,
"step": 2155
},
{
"epoch": 2.627944760357433,
"grad_norm": 0.3854632098940677,
"learning_rate": 4.5802257000719885e-07,
"loss": 0.5672,
"step": 2156
},
{
"epoch": 2.6291632818846464,
"grad_norm": 0.356713590588076,
"learning_rate": 4.5506070820964973e-07,
"loss": 0.4941,
"step": 2157
},
{
"epoch": 2.6303818034118605,
"grad_norm": 0.37107534196018116,
"learning_rate": 4.5210799749576815e-07,
"loss": 0.537,
"step": 2158
},
{
"epoch": 2.631600324939074,
"grad_norm": 0.36951174703750844,
"learning_rate": 4.4916444381073674e-07,
"loss": 0.487,
"step": 2159
},
{
"epoch": 2.6328188464662876,
"grad_norm": 0.3737744583819628,
"learning_rate": 4.4623005308130243e-07,
"loss": 0.5047,
"step": 2160
},
{
"epoch": 2.634037367993501,
"grad_norm": 0.41814109045623277,
"learning_rate": 4.433048312157651e-07,
"loss": 0.4921,
"step": 2161
},
{
"epoch": 2.6352558895207148,
"grad_norm": 0.38314084064991044,
"learning_rate": 4.4038878410396003e-07,
"loss": 0.545,
"step": 2162
},
{
"epoch": 2.636474411047929,
"grad_norm": 0.34232486717400545,
"learning_rate": 4.374819176172501e-07,
"loss": 0.451,
"step": 2163
},
{
"epoch": 2.637692932575142,
"grad_norm": 0.4161225048009829,
"learning_rate": 4.3458423760851523e-07,
"loss": 0.5468,
"step": 2164
},
{
"epoch": 2.638911454102356,
"grad_norm": 0.3670515864821956,
"learning_rate": 4.316957499121377e-07,
"loss": 0.5067,
"step": 2165
},
{
"epoch": 2.6401299756295695,
"grad_norm": 0.3624129319596192,
"learning_rate": 4.2881646034398926e-07,
"loss": 0.4816,
"step": 2166
},
{
"epoch": 2.641348497156783,
"grad_norm": 0.3972920967019095,
"learning_rate": 4.2594637470142587e-07,
"loss": 0.5452,
"step": 2167
},
{
"epoch": 2.6425670186839967,
"grad_norm": 0.36647997443354524,
"learning_rate": 4.230854987632671e-07,
"loss": 0.4962,
"step": 2168
},
{
"epoch": 2.6437855402112103,
"grad_norm": 0.38616087967711843,
"learning_rate": 4.2023383828979305e-07,
"loss": 0.5471,
"step": 2169
},
{
"epoch": 2.6450040617384243,
"grad_norm": 0.35103710867257426,
"learning_rate": 4.173913990227252e-07,
"loss": 0.4679,
"step": 2170
},
{
"epoch": 2.6462225832656374,
"grad_norm": 0.39309483512948734,
"learning_rate": 4.145581866852211e-07,
"loss": 0.5224,
"step": 2171
},
{
"epoch": 2.6474411047928514,
"grad_norm": 0.38439655446848475,
"learning_rate": 4.1173420698186027e-07,
"loss": 0.504,
"step": 2172
},
{
"epoch": 2.648659626320065,
"grad_norm": 0.3577220323312346,
"learning_rate": 4.089194655986306e-07,
"loss": 0.5131,
"step": 2173
},
{
"epoch": 2.6498781478472786,
"grad_norm": 0.36326136971896916,
"learning_rate": 4.0611396820291915e-07,
"loss": 0.5451,
"step": 2174
},
{
"epoch": 2.651096669374492,
"grad_norm": 0.36056618051796013,
"learning_rate": 4.0331772044350235e-07,
"loss": 0.5175,
"step": 2175
},
{
"epoch": 2.6523151909017058,
"grad_norm": 0.35979010393998906,
"learning_rate": 4.0053072795053163e-07,
"loss": 0.5057,
"step": 2176
},
{
"epoch": 2.6535337124289198,
"grad_norm": 0.38026873249239673,
"learning_rate": 3.9775299633552535e-07,
"loss": 0.5173,
"step": 2177
},
{
"epoch": 2.6547522339561334,
"grad_norm": 0.34812609416605766,
"learning_rate": 3.9498453119134917e-07,
"loss": 0.4774,
"step": 2178
},
{
"epoch": 2.655970755483347,
"grad_norm": 0.37873267778767,
"learning_rate": 3.9222533809221864e-07,
"loss": 0.5171,
"step": 2179
},
{
"epoch": 2.6571892770105605,
"grad_norm": 0.3876778226332137,
"learning_rate": 3.894754225936753e-07,
"loss": 0.5367,
"step": 2180
},
{
"epoch": 2.658407798537774,
"grad_norm": 0.37800746580583733,
"learning_rate": 3.8673479023258464e-07,
"loss": 0.5366,
"step": 2181
},
{
"epoch": 2.6596263200649877,
"grad_norm": 0.3592372247497727,
"learning_rate": 3.840034465271164e-07,
"loss": 0.4612,
"step": 2182
},
{
"epoch": 2.6608448415922012,
"grad_norm": 0.37676898244480056,
"learning_rate": 3.812813969767398e-07,
"loss": 0.5335,
"step": 2183
},
{
"epoch": 2.6620633631194153,
"grad_norm": 0.3889687505966972,
"learning_rate": 3.7856864706221187e-07,
"loss": 0.5379,
"step": 2184
},
{
"epoch": 2.663281884646629,
"grad_norm": 0.3429621452435135,
"learning_rate": 3.7586520224556444e-07,
"loss": 0.4249,
"step": 2185
},
{
"epoch": 2.6645004061738424,
"grad_norm": 0.4100593265019823,
"learning_rate": 3.731710679700923e-07,
"loss": 0.5571,
"step": 2186
},
{
"epoch": 2.665718927701056,
"grad_norm": 0.3754827320099358,
"learning_rate": 3.7048624966034506e-07,
"loss": 0.4772,
"step": 2187
},
{
"epoch": 2.6669374492282696,
"grad_norm": 0.506362418039737,
"learning_rate": 3.6781075272211643e-07,
"loss": 0.4898,
"step": 2188
},
{
"epoch": 2.6681559707554836,
"grad_norm": 0.39960584269392463,
"learning_rate": 3.6514458254242936e-07,
"loss": 0.5355,
"step": 2189
},
{
"epoch": 2.6693744922826967,
"grad_norm": 0.38884516821746157,
"learning_rate": 3.6248774448952695e-07,
"loss": 0.4607,
"step": 2190
},
{
"epoch": 2.6705930138099108,
"grad_norm": 0.38681697956869593,
"learning_rate": 3.598402439128656e-07,
"loss": 0.5662,
"step": 2191
},
{
"epoch": 2.6718115353371243,
"grad_norm": 0.3756082857300239,
"learning_rate": 3.572020861430997e-07,
"loss": 0.5143,
"step": 2192
},
{
"epoch": 2.673030056864338,
"grad_norm": 0.40002189934283794,
"learning_rate": 3.545732764920717e-07,
"loss": 0.5061,
"step": 2193
},
{
"epoch": 2.6742485783915515,
"grad_norm": 0.36384776166641386,
"learning_rate": 3.519538202528011e-07,
"loss": 0.504,
"step": 2194
},
{
"epoch": 2.675467099918765,
"grad_norm": 0.3788979703828696,
"learning_rate": 3.4934372269947613e-07,
"loss": 0.4801,
"step": 2195
},
{
"epoch": 2.676685621445979,
"grad_norm": 0.3818268978478761,
"learning_rate": 3.467429890874424e-07,
"loss": 0.5279,
"step": 2196
},
{
"epoch": 2.6779041429731927,
"grad_norm": 0.35141288719000796,
"learning_rate": 3.4415162465318843e-07,
"loss": 0.4803,
"step": 2197
},
{
"epoch": 2.6791226645004063,
"grad_norm": 0.39220258510601774,
"learning_rate": 3.4156963461434156e-07,
"loss": 0.5009,
"step": 2198
},
{
"epoch": 2.68034118602762,
"grad_norm": 0.4103479084725928,
"learning_rate": 3.3899702416965166e-07,
"loss": 0.6119,
"step": 2199
},
{
"epoch": 2.6815597075548334,
"grad_norm": 0.3797647584117213,
"learning_rate": 3.364337984989846e-07,
"loss": 0.4665,
"step": 2200
},
{
"epoch": 2.682778229082047,
"grad_norm": 0.3540938338082574,
"learning_rate": 3.3387996276330934e-07,
"loss": 0.4382,
"step": 2201
},
{
"epoch": 2.6839967506092606,
"grad_norm": 0.3743322734466896,
"learning_rate": 3.313355221046888e-07,
"loss": 0.5334,
"step": 2202
},
{
"epoch": 2.6852152721364746,
"grad_norm": 0.38933035048539233,
"learning_rate": 3.2880048164627087e-07,
"loss": 0.5351,
"step": 2203
},
{
"epoch": 2.686433793663688,
"grad_norm": 0.37060820135278527,
"learning_rate": 3.262748464922738e-07,
"loss": 0.5097,
"step": 2204
},
{
"epoch": 2.6876523151909018,
"grad_norm": 0.38495794293474345,
"learning_rate": 3.2375862172797866e-07,
"loss": 0.5678,
"step": 2205
},
{
"epoch": 2.6888708367181153,
"grad_norm": 0.36198556723986514,
"learning_rate": 3.212518124197217e-07,
"loss": 0.4704,
"step": 2206
},
{
"epoch": 2.690089358245329,
"grad_norm": 0.36538988897114305,
"learning_rate": 3.1875442361487987e-07,
"loss": 0.5394,
"step": 2207
},
{
"epoch": 2.6913078797725425,
"grad_norm": 0.3529695149923601,
"learning_rate": 3.1626646034186084e-07,
"loss": 0.4924,
"step": 2208
},
{
"epoch": 2.692526401299756,
"grad_norm": 0.3589469252207183,
"learning_rate": 3.1378792761009745e-07,
"loss": 0.5141,
"step": 2209
},
{
"epoch": 2.69374492282697,
"grad_norm": 0.3716457330306638,
"learning_rate": 3.1131883041003065e-07,
"loss": 0.5162,
"step": 2210
},
{
"epoch": 2.6949634443541837,
"grad_norm": 0.39002214118541273,
"learning_rate": 3.0885917371310745e-07,
"loss": 0.5371,
"step": 2211
},
{
"epoch": 2.6961819658813972,
"grad_norm": 0.38634067585511356,
"learning_rate": 3.0640896247176257e-07,
"loss": 0.5303,
"step": 2212
},
{
"epoch": 2.697400487408611,
"grad_norm": 0.3679719670190458,
"learning_rate": 3.039682016194162e-07,
"loss": 0.4844,
"step": 2213
},
{
"epoch": 2.6986190089358244,
"grad_norm": 0.3650561363535968,
"learning_rate": 3.015368960704584e-07,
"loss": 0.5491,
"step": 2214
},
{
"epoch": 2.6998375304630384,
"grad_norm": 0.35589244778023654,
"learning_rate": 2.9911505072024173e-07,
"loss": 0.4435,
"step": 2215
},
{
"epoch": 2.7010560519902516,
"grad_norm": 0.3971577742524068,
"learning_rate": 2.967026704450704e-07,
"loss": 0.5417,
"step": 2216
},
{
"epoch": 2.7022745735174656,
"grad_norm": 0.36768123246070666,
"learning_rate": 2.942997601021924e-07,
"loss": 0.4946,
"step": 2217
},
{
"epoch": 2.703493095044679,
"grad_norm": 0.3868406198179559,
"learning_rate": 2.9190632452978706e-07,
"loss": 0.5273,
"step": 2218
},
{
"epoch": 2.7047116165718927,
"grad_norm": 0.3708200192787325,
"learning_rate": 2.895223685469578e-07,
"loss": 0.5005,
"step": 2219
},
{
"epoch": 2.7059301380991063,
"grad_norm": 0.37953093384871284,
"learning_rate": 2.871478969537206e-07,
"loss": 0.5435,
"step": 2220
},
{
"epoch": 2.70714865962632,
"grad_norm": 0.3614383708651899,
"learning_rate": 2.847829145309933e-07,
"loss": 0.4749,
"step": 2221
},
{
"epoch": 2.708367181153534,
"grad_norm": 0.3737932290872502,
"learning_rate": 2.824274260405896e-07,
"loss": 0.5178,
"step": 2222
},
{
"epoch": 2.7095857026807475,
"grad_norm": 0.3678542573451642,
"learning_rate": 2.800814362252091e-07,
"loss": 0.5328,
"step": 2223
},
{
"epoch": 2.710804224207961,
"grad_norm": 0.3590587724355208,
"learning_rate": 2.7774494980842117e-07,
"loss": 0.488,
"step": 2224
},
{
"epoch": 2.7120227457351747,
"grad_norm": 0.37585088629389257,
"learning_rate": 2.754179714946653e-07,
"loss": 0.4925,
"step": 2225
},
{
"epoch": 2.7132412672623882,
"grad_norm": 0.3719950620904676,
"learning_rate": 2.7310050596923323e-07,
"loss": 0.4999,
"step": 2226
},
{
"epoch": 2.714459788789602,
"grad_norm": 0.3568715514712545,
"learning_rate": 2.7079255789826565e-07,
"loss": 0.4807,
"step": 2227
},
{
"epoch": 2.7156783103168154,
"grad_norm": 0.35482907121703094,
"learning_rate": 2.6849413192873816e-07,
"loss": 0.4793,
"step": 2228
},
{
"epoch": 2.7168968318440294,
"grad_norm": 0.395651827257008,
"learning_rate": 2.662052326884551e-07,
"loss": 0.544,
"step": 2229
},
{
"epoch": 2.718115353371243,
"grad_norm": 0.3935098863850654,
"learning_rate": 2.639258647860399e-07,
"loss": 0.5635,
"step": 2230
},
{
"epoch": 2.7193338748984566,
"grad_norm": 0.40165173172581203,
"learning_rate": 2.616560328109219e-07,
"loss": 0.4864,
"step": 2231
},
{
"epoch": 2.72055239642567,
"grad_norm": 0.37629992760618575,
"learning_rate": 2.593957413333331e-07,
"loss": 0.4642,
"step": 2232
},
{
"epoch": 2.7217709179528837,
"grad_norm": 0.39061948229389115,
"learning_rate": 2.571449949042942e-07,
"loss": 0.4931,
"step": 2233
},
{
"epoch": 2.7229894394800978,
"grad_norm": 0.37406505289417313,
"learning_rate": 2.549037980556096e-07,
"loss": 0.5149,
"step": 2234
},
{
"epoch": 2.724207961007311,
"grad_norm": 0.3865469301953249,
"learning_rate": 2.5267215529985346e-07,
"loss": 0.5662,
"step": 2235
},
{
"epoch": 2.725426482534525,
"grad_norm": 0.4176157521247648,
"learning_rate": 2.5045007113036315e-07,
"loss": 0.4846,
"step": 2236
},
{
"epoch": 2.7266450040617385,
"grad_norm": 0.3593515844357923,
"learning_rate": 2.4823755002123253e-07,
"loss": 0.5028,
"step": 2237
},
{
"epoch": 2.727863525588952,
"grad_norm": 0.3763091273664034,
"learning_rate": 2.4603459642729867e-07,
"loss": 0.4883,
"step": 2238
},
{
"epoch": 2.7290820471161656,
"grad_norm": 0.3254080082849582,
"learning_rate": 2.4384121478413403e-07,
"loss": 0.4552,
"step": 2239
},
{
"epoch": 2.7303005686433792,
"grad_norm": 0.3704714874750766,
"learning_rate": 2.416574095080404e-07,
"loss": 0.5491,
"step": 2240
},
{
"epoch": 2.7315190901705932,
"grad_norm": 0.36236149508058013,
"learning_rate": 2.394831849960377e-07,
"loss": 0.5425,
"step": 2241
},
{
"epoch": 2.732737611697807,
"grad_norm": 0.3749697833768894,
"learning_rate": 2.373185456258531e-07,
"loss": 0.5278,
"step": 2242
},
{
"epoch": 2.7339561332250204,
"grad_norm": 0.34972552754271036,
"learning_rate": 2.3516349575591568e-07,
"loss": 0.4618,
"step": 2243
},
{
"epoch": 2.735174654752234,
"grad_norm": 0.375574385039016,
"learning_rate": 2.330180397253473e-07,
"loss": 0.5175,
"step": 2244
},
{
"epoch": 2.7363931762794476,
"grad_norm": 0.37141386139233595,
"learning_rate": 2.3088218185395195e-07,
"loss": 0.5511,
"step": 2245
},
{
"epoch": 2.737611697806661,
"grad_norm": 0.3813893728380543,
"learning_rate": 2.2875592644220846e-07,
"loss": 0.4508,
"step": 2246
},
{
"epoch": 2.7388302193338747,
"grad_norm": 0.3775198226604257,
"learning_rate": 2.266392777712595e-07,
"loss": 0.4983,
"step": 2247
},
{
"epoch": 2.7400487408610887,
"grad_norm": 0.3750870423354922,
"learning_rate": 2.245322401029082e-07,
"loss": 0.5044,
"step": 2248
},
{
"epoch": 2.7412672623883023,
"grad_norm": 0.40047002764422285,
"learning_rate": 2.2243481767960483e-07,
"loss": 0.5827,
"step": 2249
},
{
"epoch": 2.742485783915516,
"grad_norm": 0.36949294277977374,
"learning_rate": 2.2034701472443854e-07,
"loss": 0.4752,
"step": 2250
},
{
"epoch": 2.7437043054427295,
"grad_norm": 0.3824346198409812,
"learning_rate": 2.1826883544113165e-07,
"loss": 0.5286,
"step": 2251
},
{
"epoch": 2.744922826969943,
"grad_norm": 0.3417914549036838,
"learning_rate": 2.1620028401402815e-07,
"loss": 0.4697,
"step": 2252
},
{
"epoch": 2.7461413484971566,
"grad_norm": 0.41461070303616865,
"learning_rate": 2.141413646080881e-07,
"loss": 0.5349,
"step": 2253
},
{
"epoch": 2.74735987002437,
"grad_norm": 0.37741611628599325,
"learning_rate": 2.1209208136887593e-07,
"loss": 0.5375,
"step": 2254
},
{
"epoch": 2.7485783915515842,
"grad_norm": 0.39321969452161015,
"learning_rate": 2.1005243842255552e-07,
"loss": 0.5025,
"step": 2255
},
{
"epoch": 2.749796913078798,
"grad_norm": 0.36043353231485903,
"learning_rate": 2.0802243987588068e-07,
"loss": 0.479,
"step": 2256
},
{
"epoch": 2.7510154346060114,
"grad_norm": 0.3771749654256085,
"learning_rate": 2.060020898161863e-07,
"loss": 0.5296,
"step": 2257
},
{
"epoch": 2.752233956133225,
"grad_norm": 0.36344961189872743,
"learning_rate": 2.0399139231137731e-07,
"loss": 0.513,
"step": 2258
},
{
"epoch": 2.7534524776604385,
"grad_norm": 0.37387207502104647,
"learning_rate": 2.019903514099275e-07,
"loss": 0.4837,
"step": 2259
},
{
"epoch": 2.7546709991876526,
"grad_norm": 0.4074258198058105,
"learning_rate": 1.999989711408662e-07,
"loss": 0.5165,
"step": 2260
},
{
"epoch": 2.7558895207148657,
"grad_norm": 0.40163838399796564,
"learning_rate": 1.9801725551377217e-07,
"loss": 0.484,
"step": 2261
},
{
"epoch": 2.7571080422420797,
"grad_norm": 0.39748068787202046,
"learning_rate": 1.9604520851876196e-07,
"loss": 0.5346,
"step": 2262
},
{
"epoch": 2.7583265637692933,
"grad_norm": 0.3837696844219795,
"learning_rate": 1.940828341264861e-07,
"loss": 0.5195,
"step": 2263
},
{
"epoch": 2.759545085296507,
"grad_norm": 0.372851636737616,
"learning_rate": 1.9213013628812173e-07,
"loss": 0.5025,
"step": 2264
},
{
"epoch": 2.7607636068237205,
"grad_norm": 0.3824295932103617,
"learning_rate": 1.9018711893535991e-07,
"loss": 0.4982,
"step": 2265
},
{
"epoch": 2.761982128350934,
"grad_norm": 0.3941575511286188,
"learning_rate": 1.8825378598040067e-07,
"loss": 0.4943,
"step": 2266
},
{
"epoch": 2.763200649878148,
"grad_norm": 0.3942171147413845,
"learning_rate": 1.863301413159474e-07,
"loss": 0.5597,
"step": 2267
},
{
"epoch": 2.7644191714053616,
"grad_norm": 0.3934183991195872,
"learning_rate": 1.8441618881519186e-07,
"loss": 0.483,
"step": 2268
},
{
"epoch": 2.765637692932575,
"grad_norm": 0.37982935450069527,
"learning_rate": 1.825119323318153e-07,
"loss": 0.4977,
"step": 2269
},
{
"epoch": 2.766856214459789,
"grad_norm": 0.35859063843934896,
"learning_rate": 1.8061737569997407e-07,
"loss": 0.5082,
"step": 2270
},
{
"epoch": 2.7680747359870024,
"grad_norm": 0.3882067848746523,
"learning_rate": 1.787325227342951e-07,
"loss": 0.5204,
"step": 2271
},
{
"epoch": 2.769293257514216,
"grad_norm": 0.3679939619950258,
"learning_rate": 1.768573772298665e-07,
"loss": 0.5395,
"step": 2272
},
{
"epoch": 2.7705117790414295,
"grad_norm": 0.3720953279482073,
"learning_rate": 1.7499194296223209e-07,
"loss": 0.5176,
"step": 2273
},
{
"epoch": 2.7717303005686436,
"grad_norm": 0.3858227060687811,
"learning_rate": 1.7313622368738014e-07,
"loss": 0.5067,
"step": 2274
},
{
"epoch": 2.772948822095857,
"grad_norm": 0.3747485160463101,
"learning_rate": 1.7129022314174015e-07,
"loss": 0.4811,
"step": 2275
},
{
"epoch": 2.7741673436230707,
"grad_norm": 0.38326441088479096,
"learning_rate": 1.694539450421734e-07,
"loss": 0.4991,
"step": 2276
},
{
"epoch": 2.7753858651502843,
"grad_norm": 0.34922626985376715,
"learning_rate": 1.6762739308596343e-07,
"loss": 0.5068,
"step": 2277
},
{
"epoch": 2.776604386677498,
"grad_norm": 0.37804059096779785,
"learning_rate": 1.6581057095081288e-07,
"loss": 0.4969,
"step": 2278
},
{
"epoch": 2.777822908204712,
"grad_norm": 0.39482724733491026,
"learning_rate": 1.640034822948311e-07,
"loss": 0.5356,
"step": 2279
},
{
"epoch": 2.779041429731925,
"grad_norm": 0.3564142364029908,
"learning_rate": 1.6220613075653201e-07,
"loss": 0.5082,
"step": 2280
},
{
"epoch": 2.780259951259139,
"grad_norm": 0.3808785336291618,
"learning_rate": 1.604185199548225e-07,
"loss": 0.5012,
"step": 2281
},
{
"epoch": 2.7814784727863526,
"grad_norm": 0.3466837847337903,
"learning_rate": 1.586406534889967e-07,
"loss": 0.5215,
"step": 2282
},
{
"epoch": 2.782696994313566,
"grad_norm": 0.35748226035408104,
"learning_rate": 1.5687253493873068e-07,
"loss": 0.4975,
"step": 2283
},
{
"epoch": 2.78391551584078,
"grad_norm": 0.38342498504946887,
"learning_rate": 1.5511416786407164e-07,
"loss": 0.499,
"step": 2284
},
{
"epoch": 2.7851340373679934,
"grad_norm": 0.38368966272625266,
"learning_rate": 1.5336555580543256e-07,
"loss": 0.5289,
"step": 2285
},
{
"epoch": 2.7863525588952074,
"grad_norm": 0.3761350556362916,
"learning_rate": 1.51626702283586e-07,
"loss": 0.5334,
"step": 2286
},
{
"epoch": 2.7875710804224205,
"grad_norm": 0.34488391672914276,
"learning_rate": 1.4989761079965583e-07,
"loss": 0.4731,
"step": 2287
},
{
"epoch": 2.7887896019496345,
"grad_norm": 0.3711824659401226,
"learning_rate": 1.4817828483510933e-07,
"loss": 0.5647,
"step": 2288
},
{
"epoch": 2.790008123476848,
"grad_norm": 0.36364946680537624,
"learning_rate": 1.4646872785175182e-07,
"loss": 0.5068,
"step": 2289
},
{
"epoch": 2.7912266450040617,
"grad_norm": 0.3640609104391418,
"learning_rate": 1.4476894329172042e-07,
"loss": 0.5129,
"step": 2290
},
{
"epoch": 2.7924451665312753,
"grad_norm": 0.36422428829864073,
"learning_rate": 1.4307893457747358e-07,
"loss": 0.5234,
"step": 2291
},
{
"epoch": 2.793663688058489,
"grad_norm": 0.3541446608840156,
"learning_rate": 1.4139870511178767e-07,
"loss": 0.5035,
"step": 2292
},
{
"epoch": 2.794882209585703,
"grad_norm": 0.3772886381428785,
"learning_rate": 1.3972825827774928e-07,
"loss": 0.5069,
"step": 2293
},
{
"epoch": 2.7961007311129165,
"grad_norm": 0.39173775427502877,
"learning_rate": 1.3806759743874688e-07,
"loss": 0.5421,
"step": 2294
},
{
"epoch": 2.79731925264013,
"grad_norm": 0.3652175638097129,
"learning_rate": 1.3641672593846632e-07,
"loss": 0.5213,
"step": 2295
},
{
"epoch": 2.7985377741673436,
"grad_norm": 0.36544225086563453,
"learning_rate": 1.3477564710088097e-07,
"loss": 0.4687,
"step": 2296
},
{
"epoch": 2.799756295694557,
"grad_norm": 0.3740249931778571,
"learning_rate": 1.3314436423024935e-07,
"loss": 0.518,
"step": 2297
},
{
"epoch": 2.8009748172217708,
"grad_norm": 0.37454883087183666,
"learning_rate": 1.3152288061110518e-07,
"loss": 0.4902,
"step": 2298
},
{
"epoch": 2.8021933387489844,
"grad_norm": 0.3629763422238737,
"learning_rate": 1.2991119950825138e-07,
"loss": 0.5329,
"step": 2299
},
{
"epoch": 2.8034118602761984,
"grad_norm": 0.3825610104170137,
"learning_rate": 1.2830932416675323e-07,
"loss": 0.5217,
"step": 2300
},
{
"epoch": 2.804630381803412,
"grad_norm": 0.35458779804689705,
"learning_rate": 1.2671725781193467e-07,
"loss": 0.482,
"step": 2301
},
{
"epoch": 2.8058489033306255,
"grad_norm": 0.4060620936449498,
"learning_rate": 1.251350036493676e-07,
"loss": 0.5396,
"step": 2302
},
{
"epoch": 2.807067424857839,
"grad_norm": 0.37363725202431575,
"learning_rate": 1.2356256486486806e-07,
"loss": 0.4898,
"step": 2303
},
{
"epoch": 2.8082859463850527,
"grad_norm": 0.35835154484517495,
"learning_rate": 1.2199994462448906e-07,
"loss": 0.493,
"step": 2304
},
{
"epoch": 2.8095044679122667,
"grad_norm": 0.42120102584212404,
"learning_rate": 1.2044714607451436e-07,
"loss": 0.5257,
"step": 2305
},
{
"epoch": 2.81072298943948,
"grad_norm": 0.3556943551485984,
"learning_rate": 1.1890417234145246e-07,
"loss": 0.5095,
"step": 2306
},
{
"epoch": 2.811941510966694,
"grad_norm": 0.38386985507916965,
"learning_rate": 1.1737102653202825e-07,
"loss": 0.5279,
"step": 2307
},
{
"epoch": 2.8131600324939074,
"grad_norm": 0.36317152822127746,
"learning_rate": 1.1584771173318076e-07,
"loss": 0.4927,
"step": 2308
},
{
"epoch": 2.814378554021121,
"grad_norm": 0.4011884968558589,
"learning_rate": 1.1433423101205321e-07,
"loss": 0.5282,
"step": 2309
},
{
"epoch": 2.8155970755483346,
"grad_norm": 0.3642862846860186,
"learning_rate": 1.1283058741598962e-07,
"loss": 0.4734,
"step": 2310
},
{
"epoch": 2.816815597075548,
"grad_norm": 0.38393727230766617,
"learning_rate": 1.1133678397252434e-07,
"loss": 0.5357,
"step": 2311
},
{
"epoch": 2.818034118602762,
"grad_norm": 0.3837474969152823,
"learning_rate": 1.0985282368938199e-07,
"loss": 0.5024,
"step": 2312
},
{
"epoch": 2.819252640129976,
"grad_norm": 0.4030179784599761,
"learning_rate": 1.0837870955446639e-07,
"loss": 0.5339,
"step": 2313
},
{
"epoch": 2.8204711616571894,
"grad_norm": 0.3539980646211641,
"learning_rate": 1.0691444453585775e-07,
"loss": 0.4979,
"step": 2314
},
{
"epoch": 2.821689683184403,
"grad_norm": 0.3490699508092425,
"learning_rate": 1.0546003158180496e-07,
"loss": 0.4861,
"step": 2315
},
{
"epoch": 2.8229082047116165,
"grad_norm": 0.3504210969211817,
"learning_rate": 1.0401547362071939e-07,
"loss": 0.4995,
"step": 2316
},
{
"epoch": 2.82412672623883,
"grad_norm": 0.3626756496837975,
"learning_rate": 1.0258077356117057e-07,
"loss": 0.5019,
"step": 2317
},
{
"epoch": 2.8253452477660437,
"grad_norm": 0.39120905301617104,
"learning_rate": 1.0115593429187942e-07,
"loss": 0.5056,
"step": 2318
},
{
"epoch": 2.8265637692932577,
"grad_norm": 0.38360724438244953,
"learning_rate": 9.974095868171164e-08,
"loss": 0.4574,
"step": 2319
},
{
"epoch": 2.8277822908204713,
"grad_norm": 0.4154087623735947,
"learning_rate": 9.833584957967491e-08,
"loss": 0.5459,
"step": 2320
},
{
"epoch": 2.829000812347685,
"grad_norm": 0.3684945590836955,
"learning_rate": 9.694060981490783e-08,
"loss": 0.5044,
"step": 2321
},
{
"epoch": 2.8302193338748984,
"grad_norm": 0.3759625212141255,
"learning_rate": 9.555524219667989e-08,
"loss": 0.5045,
"step": 2322
},
{
"epoch": 2.831437855402112,
"grad_norm": 0.3711414030240098,
"learning_rate": 9.417974951438203e-08,
"loss": 0.4909,
"step": 2323
},
{
"epoch": 2.8326563769293256,
"grad_norm": 0.40793520425182356,
"learning_rate": 9.281413453752386e-08,
"loss": 0.5911,
"step": 2324
},
{
"epoch": 2.833874898456539,
"grad_norm": 0.35038615442889337,
"learning_rate": 9.145840001572537e-08,
"loss": 0.5061,
"step": 2325
},
{
"epoch": 2.835093419983753,
"grad_norm": 0.33298280777144973,
"learning_rate": 9.011254867871244e-08,
"loss": 0.4843,
"step": 2326
},
{
"epoch": 2.8363119415109668,
"grad_norm": 0.37732042868872595,
"learning_rate": 8.877658323631188e-08,
"loss": 0.5434,
"step": 2327
},
{
"epoch": 2.8375304630381804,
"grad_norm": 0.3989852512203333,
"learning_rate": 8.745050637844532e-08,
"loss": 0.5179,
"step": 2328
},
{
"epoch": 2.838748984565394,
"grad_norm": 0.38524826151374814,
"learning_rate": 8.613432077512474e-08,
"loss": 0.5135,
"step": 2329
},
{
"epoch": 2.8399675060926075,
"grad_norm": 0.3561681050422304,
"learning_rate": 8.482802907644528e-08,
"loss": 0.5332,
"step": 2330
},
{
"epoch": 2.8411860276198215,
"grad_norm": 0.35905357929351883,
"learning_rate": 8.353163391258302e-08,
"loss": 0.4736,
"step": 2331
},
{
"epoch": 2.8424045491470347,
"grad_norm": 0.3771943557678179,
"learning_rate": 8.224513789378497e-08,
"loss": 0.4974,
"step": 2332
},
{
"epoch": 2.8436230706742487,
"grad_norm": 0.3689008459698471,
"learning_rate": 8.09685436103691e-08,
"loss": 0.5007,
"step": 2333
},
{
"epoch": 2.8448415922014623,
"grad_norm": 0.3946551692601427,
"learning_rate": 7.970185363271432e-08,
"loss": 0.5555,
"step": 2334
},
{
"epoch": 2.846060113728676,
"grad_norm": 0.3814135760411076,
"learning_rate": 7.844507051125937e-08,
"loss": 0.4953,
"step": 2335
},
{
"epoch": 2.8472786352558894,
"grad_norm": 0.3877741958314108,
"learning_rate": 7.71981967764951e-08,
"loss": 0.5059,
"step": 2336
},
{
"epoch": 2.848497156783103,
"grad_norm": 0.364018962944753,
"learning_rate": 7.59612349389599e-08,
"loss": 0.4948,
"step": 2337
},
{
"epoch": 2.849715678310317,
"grad_norm": 0.4075558874034061,
"learning_rate": 7.473418748923545e-08,
"loss": 0.5948,
"step": 2338
},
{
"epoch": 2.8509341998375306,
"grad_norm": 0.37855730911487784,
"learning_rate": 7.351705689794042e-08,
"loss": 0.424,
"step": 2339
},
{
"epoch": 2.852152721364744,
"grad_norm": 0.3738096901546095,
"learning_rate": 7.230984561572729e-08,
"loss": 0.5434,
"step": 2340
},
{
"epoch": 2.8533712428919578,
"grad_norm": 0.37627980072639083,
"learning_rate": 7.11125560732756e-08,
"loss": 0.4934,
"step": 2341
},
{
"epoch": 2.8545897644191713,
"grad_norm": 0.39211088076356204,
"learning_rate": 6.992519068128701e-08,
"loss": 0.4979,
"step": 2342
},
{
"epoch": 2.855808285946385,
"grad_norm": 0.36180712823693784,
"learning_rate": 6.8747751830483e-08,
"loss": 0.54,
"step": 2343
},
{
"epoch": 2.8570268074735985,
"grad_norm": 0.3422753121701595,
"learning_rate": 6.758024189159718e-08,
"loss": 0.4674,
"step": 2344
},
{
"epoch": 2.8582453290008125,
"grad_norm": 0.3670191252106066,
"learning_rate": 6.64226632153725e-08,
"loss": 0.5208,
"step": 2345
},
{
"epoch": 2.859463850528026,
"grad_norm": 0.3796974952937155,
"learning_rate": 6.527501813255344e-08,
"loss": 0.5399,
"step": 2346
},
{
"epoch": 2.8606823720552397,
"grad_norm": 0.36481445761250786,
"learning_rate": 6.413730895388714e-08,
"loss": 0.5072,
"step": 2347
},
{
"epoch": 2.8619008935824533,
"grad_norm": 0.3636784036139616,
"learning_rate": 6.300953797011178e-08,
"loss": 0.5291,
"step": 2348
},
{
"epoch": 2.863119415109667,
"grad_norm": 0.35701639845422045,
"learning_rate": 6.18917074519565e-08,
"loss": 0.503,
"step": 2349
},
{
"epoch": 2.864337936636881,
"grad_norm": 0.3780638398451471,
"learning_rate": 6.078381965013646e-08,
"loss": 0.526,
"step": 2350
},
{
"epoch": 2.865556458164094,
"grad_norm": 0.3863922669183168,
"learning_rate": 5.968587679534621e-08,
"loss": 0.4887,
"step": 2351
},
{
"epoch": 2.866774979691308,
"grad_norm": 0.36868437000440496,
"learning_rate": 5.8597881098257924e-08,
"loss": 0.535,
"step": 2352
},
{
"epoch": 2.8679935012185216,
"grad_norm": 0.3667806587451281,
"learning_rate": 5.751983474951317e-08,
"loss": 0.5357,
"step": 2353
},
{
"epoch": 2.869212022745735,
"grad_norm": 0.35160456413985003,
"learning_rate": 5.6451739919723417e-08,
"loss": 0.4966,
"step": 2354
},
{
"epoch": 2.8704305442729487,
"grad_norm": 0.3702286255153397,
"learning_rate": 5.539359875946171e-08,
"loss": 0.5364,
"step": 2355
},
{
"epoch": 2.8716490658001623,
"grad_norm": 0.3545425220241147,
"learning_rate": 5.434541339926047e-08,
"loss": 0.4989,
"step": 2356
},
{
"epoch": 2.8728675873273763,
"grad_norm": 0.37190280510667345,
"learning_rate": 5.3307185949605935e-08,
"loss": 0.5177,
"step": 2357
},
{
"epoch": 2.87408610885459,
"grad_norm": 0.39562598350439043,
"learning_rate": 5.227891850093314e-08,
"loss": 0.5489,
"step": 2358
},
{
"epoch": 2.8753046303818035,
"grad_norm": 0.3653775053445267,
"learning_rate": 5.12606131236254e-08,
"loss": 0.485,
"step": 2359
},
{
"epoch": 2.876523151909017,
"grad_norm": 0.3641584823261951,
"learning_rate": 5.025227186800652e-08,
"loss": 0.5217,
"step": 2360
},
{
"epoch": 2.8777416734362307,
"grad_norm": 0.38104906167780134,
"learning_rate": 4.925389676433745e-08,
"loss": 0.485,
"step": 2361
},
{
"epoch": 2.8789601949634442,
"grad_norm": 0.39160093264511175,
"learning_rate": 4.8265489822814094e-08,
"loss": 0.515,
"step": 2362
},
{
"epoch": 2.880178716490658,
"grad_norm": 0.37954553391255796,
"learning_rate": 4.728705303356007e-08,
"loss": 0.4743,
"step": 2363
},
{
"epoch": 2.881397238017872,
"grad_norm": 0.3786501167734631,
"learning_rate": 4.631858836662562e-08,
"loss": 0.5282,
"step": 2364
},
{
"epoch": 2.8826157595450854,
"grad_norm": 0.3847521896579275,
"learning_rate": 4.536009777198203e-08,
"loss": 0.4954,
"step": 2365
},
{
"epoch": 2.883834281072299,
"grad_norm": 0.3686885036028533,
"learning_rate": 4.441158317951777e-08,
"loss": 0.5,
"step": 2366
},
{
"epoch": 2.8850528025995126,
"grad_norm": 0.3476414945698473,
"learning_rate": 4.347304649903572e-08,
"loss": 0.5112,
"step": 2367
},
{
"epoch": 2.886271324126726,
"grad_norm": 0.3501811924875589,
"learning_rate": 4.2544489620248155e-08,
"loss": 0.5212,
"step": 2368
},
{
"epoch": 2.8874898456539397,
"grad_norm": 0.3522575951304983,
"learning_rate": 4.162591441277341e-08,
"loss": 0.5216,
"step": 2369
},
{
"epoch": 2.8887083671811533,
"grad_norm": 0.34261605478893353,
"learning_rate": 4.071732272613149e-08,
"loss": 0.4688,
"step": 2370
},
{
"epoch": 2.8899268887083673,
"grad_norm": 0.3647353980991927,
"learning_rate": 3.981871638974177e-08,
"loss": 0.5131,
"step": 2371
},
{
"epoch": 2.891145410235581,
"grad_norm": 0.37709939404151627,
"learning_rate": 3.8930097212918625e-08,
"loss": 0.5103,
"step": 2372
},
{
"epoch": 2.8923639317627945,
"grad_norm": 0.35808464221731223,
"learning_rate": 3.805146698486695e-08,
"loss": 0.4684,
"step": 2373
},
{
"epoch": 2.893582453290008,
"grad_norm": 0.3812340713874895,
"learning_rate": 3.7182827474678273e-08,
"loss": 0.5575,
"step": 2374
},
{
"epoch": 2.8948009748172217,
"grad_norm": 0.36259528565916516,
"learning_rate": 3.632418043133079e-08,
"loss": 0.513,
"step": 2375
},
{
"epoch": 2.8960194963444357,
"grad_norm": 0.3422911070110535,
"learning_rate": 3.5475527583681005e-08,
"loss": 0.4727,
"step": 2376
},
{
"epoch": 2.897238017871649,
"grad_norm": 0.3609331496351682,
"learning_rate": 3.463687064046317e-08,
"loss": 0.529,
"step": 2377
},
{
"epoch": 2.898456539398863,
"grad_norm": 0.3819585041721209,
"learning_rate": 3.3808211290284886e-08,
"loss": 0.481,
"step": 2378
},
{
"epoch": 2.8996750609260764,
"grad_norm": 0.3731434598411168,
"learning_rate": 3.2989551201624836e-08,
"loss": 0.5226,
"step": 2379
},
{
"epoch": 2.90089358245329,
"grad_norm": 0.3885400700621752,
"learning_rate": 3.2180892022826705e-08,
"loss": 0.5329,
"step": 2380
},
{
"epoch": 2.9021121039805036,
"grad_norm": 0.3572845398778449,
"learning_rate": 3.138223538209973e-08,
"loss": 0.4753,
"step": 2381
},
{
"epoch": 2.903330625507717,
"grad_norm": 0.3614383781834413,
"learning_rate": 3.059358288751202e-08,
"loss": 0.5409,
"step": 2382
},
{
"epoch": 2.904549147034931,
"grad_norm": 0.37189556032401827,
"learning_rate": 2.981493612698838e-08,
"loss": 0.5195,
"step": 2383
},
{
"epoch": 2.9057676685621447,
"grad_norm": 0.3789215584826568,
"learning_rate": 2.9046296668309716e-08,
"loss": 0.5074,
"step": 2384
},
{
"epoch": 2.9069861900893583,
"grad_norm": 0.370512893367381,
"learning_rate": 2.8287666059104713e-08,
"loss": 0.5191,
"step": 2385
},
{
"epoch": 2.908204711616572,
"grad_norm": 0.38297199689289846,
"learning_rate": 2.753904582685096e-08,
"loss": 0.4737,
"step": 2386
},
{
"epoch": 2.9094232331437855,
"grad_norm": 0.3959947838736986,
"learning_rate": 2.6800437478870512e-08,
"loss": 0.5115,
"step": 2387
},
{
"epoch": 2.910641754670999,
"grad_norm": 0.3957470998942836,
"learning_rate": 2.6071842502326526e-08,
"loss": 0.5063,
"step": 2388
},
{
"epoch": 2.9118602761982126,
"grad_norm": 0.3723580924084453,
"learning_rate": 2.535326236422053e-08,
"loss": 0.4892,
"step": 2389
},
{
"epoch": 2.9130787977254267,
"grad_norm": 0.37260731194295516,
"learning_rate": 2.464469851139073e-08,
"loss": 0.5542,
"step": 2390
},
{
"epoch": 2.9142973192526402,
"grad_norm": 0.35868024153034395,
"learning_rate": 2.394615237050535e-08,
"loss": 0.523,
"step": 2391
},
{
"epoch": 2.915515840779854,
"grad_norm": 0.36371587526669685,
"learning_rate": 2.3257625348064306e-08,
"loss": 0.4825,
"step": 2392
},
{
"epoch": 2.9167343623070674,
"grad_norm": 0.38007578965240923,
"learning_rate": 2.2579118830393654e-08,
"loss": 0.5096,
"step": 2393
},
{
"epoch": 2.917952883834281,
"grad_norm": 0.3894212168804138,
"learning_rate": 2.1910634183644475e-08,
"loss": 0.4839,
"step": 2394
},
{
"epoch": 2.9191714053614946,
"grad_norm": 0.40052317739398724,
"learning_rate": 2.1252172753787324e-08,
"loss": 0.5651,
"step": 2395
},
{
"epoch": 2.920389926888708,
"grad_norm": 0.35188112107988123,
"learning_rate": 2.060373586661224e-08,
"loss": 0.4977,
"step": 2396
},
{
"epoch": 2.921608448415922,
"grad_norm": 0.3499426602627878,
"learning_rate": 1.996532482772595e-08,
"loss": 0.4519,
"step": 2397
},
{
"epoch": 2.9228269699431357,
"grad_norm": 0.3812985888964239,
"learning_rate": 1.933694092254801e-08,
"loss": 0.5197,
"step": 2398
},
{
"epoch": 2.9240454914703493,
"grad_norm": 0.37294376101878496,
"learning_rate": 1.8718585416307443e-08,
"loss": 0.5252,
"step": 2399
},
{
"epoch": 2.925264012997563,
"grad_norm": 0.390284947297633,
"learning_rate": 1.811025955404333e-08,
"loss": 0.4939,
"step": 2400
},
{
"epoch": 2.9264825345247765,
"grad_norm": 0.38299656914528474,
"learning_rate": 1.751196456059867e-08,
"loss": 0.5282,
"step": 2401
},
{
"epoch": 2.9277010560519905,
"grad_norm": 0.4033256657794055,
"learning_rate": 1.6923701640621514e-08,
"loss": 0.5516,
"step": 2402
},
{
"epoch": 2.9289195775792036,
"grad_norm": 0.3564131129340729,
"learning_rate": 1.6345471978558847e-08,
"loss": 0.4492,
"step": 2403
},
{
"epoch": 2.9301380991064176,
"grad_norm": 0.3696818550866779,
"learning_rate": 1.577727673865659e-08,
"loss": 0.5282,
"step": 2404
},
{
"epoch": 2.9313566206336312,
"grad_norm": 0.3511572891694367,
"learning_rate": 1.5219117064957934e-08,
"loss": 0.5573,
"step": 2405
},
{
"epoch": 2.932575142160845,
"grad_norm": 0.3777779794080296,
"learning_rate": 1.4670994081297796e-08,
"loss": 0.4964,
"step": 2406
},
{
"epoch": 2.9337936636880584,
"grad_norm": 0.3408974732458375,
"learning_rate": 1.413290889130392e-08,
"loss": 0.5008,
"step": 2407
},
{
"epoch": 2.935012185215272,
"grad_norm": 0.35326628190060844,
"learning_rate": 1.3604862578392996e-08,
"loss": 0.4734,
"step": 2408
},
{
"epoch": 2.936230706742486,
"grad_norm": 0.3885332399006649,
"learning_rate": 1.3086856205768439e-08,
"loss": 0.5695,
"step": 2409
},
{
"epoch": 2.9374492282696996,
"grad_norm": 0.36720005893901214,
"learning_rate": 1.257889081641872e-08,
"loss": 0.4626,
"step": 2410
},
{
"epoch": 2.938667749796913,
"grad_norm": 0.38654367810832296,
"learning_rate": 1.208096743311571e-08,
"loss": 0.5201,
"step": 2411
},
{
"epoch": 2.9398862713241267,
"grad_norm": 0.35989398437475195,
"learning_rate": 1.159308705841078e-08,
"loss": 0.524,
"step": 2412
},
{
"epoch": 2.9411047928513403,
"grad_norm": 0.36447060978418805,
"learning_rate": 1.111525067463537e-08,
"loss": 0.4977,
"step": 2413
},
{
"epoch": 2.942323314378554,
"grad_norm": 0.3941566187065989,
"learning_rate": 1.0647459243897095e-08,
"loss": 0.5241,
"step": 2414
},
{
"epoch": 2.9435418359057675,
"grad_norm": 0.3971195756945423,
"learning_rate": 1.0189713708078086e-08,
"loss": 0.5083,
"step": 2415
},
{
"epoch": 2.9447603574329815,
"grad_norm": 0.3617746907444414,
"learning_rate": 9.74201498883387e-09,
"loss": 0.4824,
"step": 2416
},
{
"epoch": 2.945978878960195,
"grad_norm": 0.38983833247818245,
"learning_rate": 9.304363987591158e-09,
"loss": 0.5426,
"step": 2417
},
{
"epoch": 2.9471974004874086,
"grad_norm": 0.3739423997155145,
"learning_rate": 8.87676158554507e-09,
"loss": 0.4452,
"step": 2418
},
{
"epoch": 2.948415922014622,
"grad_norm": 0.37952476055522766,
"learning_rate": 8.459208643659122e-09,
"loss": 0.5432,
"step": 2419
},
{
"epoch": 2.949634443541836,
"grad_norm": 0.3634354801661458,
"learning_rate": 8.051706002661919e-09,
"loss": 0.5223,
"step": 2420
},
{
"epoch": 2.95085296506905,
"grad_norm": 0.3655917645054435,
"learning_rate": 7.65425448304713e-09,
"loss": 0.4784,
"step": 2421
},
{
"epoch": 2.952071486596263,
"grad_norm": 0.3878857225849821,
"learning_rate": 7.266854885069619e-09,
"loss": 0.536,
"step": 2422
},
{
"epoch": 2.953290008123477,
"grad_norm": 0.3860010124815134,
"learning_rate": 6.889507988745436e-09,
"loss": 0.5343,
"step": 2423
},
{
"epoch": 2.9545085296506906,
"grad_norm": 0.37137826994221546,
"learning_rate": 6.5222145538501595e-09,
"loss": 0.4683,
"step": 2424
},
{
"epoch": 2.955727051177904,
"grad_norm": 0.40358716261973293,
"learning_rate": 6.164975319917221e-09,
"loss": 0.5118,
"step": 2425
},
{
"epoch": 2.9569455727051177,
"grad_norm": 0.3805519912768825,
"learning_rate": 5.817791006235141e-09,
"loss": 0.5446,
"step": 2426
},
{
"epoch": 2.9581640942323313,
"grad_norm": 0.3645099929908209,
"learning_rate": 5.480662311848628e-09,
"loss": 0.4789,
"step": 2427
},
{
"epoch": 2.9593826157595453,
"grad_norm": 0.36366977745298745,
"learning_rate": 5.153589915554702e-09,
"loss": 0.5268,
"step": 2428
},
{
"epoch": 2.960601137286759,
"grad_norm": 0.35084949812297817,
"learning_rate": 4.836574475903244e-09,
"loss": 0.4545,
"step": 2429
},
{
"epoch": 2.9618196588139725,
"grad_norm": 0.3688342331523407,
"learning_rate": 4.5296166311931125e-09,
"loss": 0.5512,
"step": 2430
},
{
"epoch": 2.963038180341186,
"grad_norm": 0.3521398793576215,
"learning_rate": 4.232716999474917e-09,
"loss": 0.5379,
"step": 2431
},
{
"epoch": 2.9642567018683996,
"grad_norm": 0.3706937567678196,
"learning_rate": 3.9458761785460266e-09,
"loss": 0.5445,
"step": 2432
},
{
"epoch": 2.965475223395613,
"grad_norm": 0.35060638658108007,
"learning_rate": 3.669094745950008e-09,
"loss": 0.5027,
"step": 2433
},
{
"epoch": 2.966693744922827,
"grad_norm": 0.33178253347322945,
"learning_rate": 3.4023732589777426e-09,
"loss": 0.4681,
"step": 2434
},
{
"epoch": 2.967912266450041,
"grad_norm": 0.35617115102694386,
"learning_rate": 3.1457122546635353e-09,
"loss": 0.5019,
"step": 2435
},
{
"epoch": 2.9691307879772544,
"grad_norm": 0.3548326105732938,
"learning_rate": 2.899112249786229e-09,
"loss": 0.5276,
"step": 2436
},
{
"epoch": 2.970349309504468,
"grad_norm": 0.3984987781160192,
"learning_rate": 2.6625737408669804e-09,
"loss": 0.5172,
"step": 2437
},
{
"epoch": 2.9715678310316815,
"grad_norm": 0.362754815960633,
"learning_rate": 2.436097204167043e-09,
"loss": 0.5206,
"step": 2438
},
{
"epoch": 2.972786352558895,
"grad_norm": 0.38356526868895263,
"learning_rate": 2.2196830956905392e-09,
"loss": 0.4762,
"step": 2439
},
{
"epoch": 2.9740048740861087,
"grad_norm": 0.37390570550389457,
"learning_rate": 2.0133318511800227e-09,
"loss": 0.5343,
"step": 2440
},
{
"epoch": 2.9752233956133223,
"grad_norm": 0.35679383760064753,
"learning_rate": 1.8170438861159212e-09,
"loss": 0.4894,
"step": 2441
},
{
"epoch": 2.9764419171405363,
"grad_norm": 0.3592057260000064,
"learning_rate": 1.6308195957182028e-09,
"loss": 0.5362,
"step": 2442
},
{
"epoch": 2.97766043866775,
"grad_norm": 0.3259038266407775,
"learning_rate": 1.4546593549424892e-09,
"loss": 0.4727,
"step": 2443
},
{
"epoch": 2.9788789601949635,
"grad_norm": 0.37363652956982973,
"learning_rate": 1.2885635184828326e-09,
"loss": 0.5277,
"step": 2444
},
{
"epoch": 2.980097481722177,
"grad_norm": 0.35379174534673236,
"learning_rate": 1.1325324207667187e-09,
"loss": 0.4837,
"step": 2445
},
{
"epoch": 2.9813160032493906,
"grad_norm": 0.3675995235345453,
"learning_rate": 9.865663759578426e-10,
"loss": 0.5461,
"step": 2446
},
{
"epoch": 2.9825345247766046,
"grad_norm": 0.37636771913858935,
"learning_rate": 8.50665677953888e-10,
"loss": 0.4862,
"step": 2447
},
{
"epoch": 2.9837530463038178,
"grad_norm": 0.3681022019205161,
"learning_rate": 7.24830600386528e-10,
"loss": 0.4963,
"step": 2448
},
{
"epoch": 2.984971567831032,
"grad_norm": 0.3746370138837488,
"learning_rate": 6.09061396620314e-10,
"loss": 0.534,
"step": 2449
},
{
"epoch": 2.9861900893582454,
"grad_norm": 0.36486933688431966,
"learning_rate": 5.033582997526765e-10,
"loss": 0.577,
"step": 2450
},
{
"epoch": 2.987408610885459,
"grad_norm": 0.3650821349016925,
"learning_rate": 4.0772152261336906e-10,
"loss": 0.4508,
"step": 2451
},
{
"epoch": 2.9886271324126725,
"grad_norm": 0.380070504025212,
"learning_rate": 3.221512577639141e-10,
"loss": 0.5051,
"step": 2452
},
{
"epoch": 2.989845653939886,
"grad_norm": 0.3786185974021647,
"learning_rate": 2.466476774970472e-10,
"loss": 0.4845,
"step": 2453
},
{
"epoch": 2.9910641754671,
"grad_norm": 0.360482935976555,
"learning_rate": 1.812109338367174e-10,
"loss": 0.5414,
"step": 2454
},
{
"epoch": 2.9922826969943137,
"grad_norm": 0.372228898631232,
"learning_rate": 1.2584115853808697e-10,
"loss": 0.5194,
"step": 2455
},
{
"epoch": 2.9935012185215273,
"grad_norm": 0.38381176412541346,
"learning_rate": 8.053846308531122e-11,
"loss": 0.4912,
"step": 2456
},
{
"epoch": 2.994719740048741,
"grad_norm": 0.3934048198873252,
"learning_rate": 4.53029386948689e-11,
"loss": 0.5137,
"step": 2457
},
{
"epoch": 2.9959382615759544,
"grad_norm": 0.38117458951656574,
"learning_rate": 2.0134656311676658e-11,
"loss": 0.5353,
"step": 2458
},
{
"epoch": 2.997156783103168,
"grad_norm": 0.38135142008825745,
"learning_rate": 5.033666611864441e-12,
"loss": 0.4967,
"step": 2459
},
{
"epoch": 2.9983753046303816,
"grad_norm": 0.36779717909800114,
"learning_rate": 0.0,
"loss": 0.5127,
"step": 2460
},
{
"epoch": 2.9983753046303816,
"step": 2460,
"total_flos": 2708356203970560.0,
"train_loss": 0.5759637464110444,
"train_runtime": 38974.7056,
"train_samples_per_second": 6.062,
"train_steps_per_second": 0.063
}
],
"logging_steps": 1,
"max_steps": 2460,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2708356203970560.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}