pcagent_human_only_adapter / trainer_state.json
wonwonn's picture
Upload LoRA adapter (pcagent-7B-human-only)
8bb44ef verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1388,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001440922190201729,
"grad_norm": 0.7939718961715698,
"learning_rate": 0.0,
"loss": 1.1426048278808594,
"step": 1
},
{
"epoch": 0.002881844380403458,
"grad_norm": 0.7847036123275757,
"learning_rate": 2.8571428571428575e-07,
"loss": 1.2942605018615723,
"step": 2
},
{
"epoch": 0.004322766570605188,
"grad_norm": 0.8253518342971802,
"learning_rate": 5.714285714285715e-07,
"loss": 1.283416748046875,
"step": 3
},
{
"epoch": 0.005763688760806916,
"grad_norm": 0.8199485540390015,
"learning_rate": 8.571428571428572e-07,
"loss": 1.3191412687301636,
"step": 4
},
{
"epoch": 0.007204610951008645,
"grad_norm": 0.7191706895828247,
"learning_rate": 1.142857142857143e-06,
"loss": 1.127804160118103,
"step": 5
},
{
"epoch": 0.008645533141210375,
"grad_norm": 0.7180572748184204,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.280735969543457,
"step": 6
},
{
"epoch": 0.010086455331412104,
"grad_norm": 0.7501729726791382,
"learning_rate": 1.7142857142857145e-06,
"loss": 1.1184178590774536,
"step": 7
},
{
"epoch": 0.011527377521613832,
"grad_norm": 0.7057927846908569,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.2702372074127197,
"step": 8
},
{
"epoch": 0.012968299711815562,
"grad_norm": 0.6871187686920166,
"learning_rate": 2.285714285714286e-06,
"loss": 1.2448116540908813,
"step": 9
},
{
"epoch": 0.01440922190201729,
"grad_norm": 0.8333551287651062,
"learning_rate": 2.571428571428571e-06,
"loss": 1.1602749824523926,
"step": 10
},
{
"epoch": 0.01585014409221902,
"grad_norm": 0.7824198007583618,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.3133487701416016,
"step": 11
},
{
"epoch": 0.01729106628242075,
"grad_norm": 0.7421954274177551,
"learning_rate": 3.142857142857143e-06,
"loss": 1.4090148210525513,
"step": 12
},
{
"epoch": 0.018731988472622477,
"grad_norm": 0.7107607126235962,
"learning_rate": 3.428571428571429e-06,
"loss": 1.0468783378601074,
"step": 13
},
{
"epoch": 0.020172910662824207,
"grad_norm": 0.7520753145217896,
"learning_rate": 3.7142857142857146e-06,
"loss": 1.2376011610031128,
"step": 14
},
{
"epoch": 0.021613832853025938,
"grad_norm": 0.7466248273849487,
"learning_rate": 4.000000000000001e-06,
"loss": 1.1968696117401123,
"step": 15
},
{
"epoch": 0.023054755043227664,
"grad_norm": 0.6783359050750732,
"learning_rate": 4.2857142857142855e-06,
"loss": 1.2191252708435059,
"step": 16
},
{
"epoch": 0.024495677233429394,
"grad_norm": 0.6667131781578064,
"learning_rate": 4.571428571428572e-06,
"loss": 1.088866114616394,
"step": 17
},
{
"epoch": 0.025936599423631124,
"grad_norm": 0.6895946860313416,
"learning_rate": 4.857142857142858e-06,
"loss": 1.3629319667816162,
"step": 18
},
{
"epoch": 0.027377521613832854,
"grad_norm": 0.6927962303161621,
"learning_rate": 5.142857142857142e-06,
"loss": 1.1627918481826782,
"step": 19
},
{
"epoch": 0.02881844380403458,
"grad_norm": 0.5862833857536316,
"learning_rate": 5.428571428571429e-06,
"loss": 0.9320468902587891,
"step": 20
},
{
"epoch": 0.03025936599423631,
"grad_norm": 0.5858862400054932,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.1411528587341309,
"step": 21
},
{
"epoch": 0.03170028818443804,
"grad_norm": 0.691691517829895,
"learning_rate": 6e-06,
"loss": 0.9531513452529907,
"step": 22
},
{
"epoch": 0.03314121037463977,
"grad_norm": 0.49931254982948303,
"learning_rate": 6.285714285714286e-06,
"loss": 0.949053943157196,
"step": 23
},
{
"epoch": 0.0345821325648415,
"grad_norm": 0.5515104532241821,
"learning_rate": 6.571428571428572e-06,
"loss": 1.3075839281082153,
"step": 24
},
{
"epoch": 0.03602305475504323,
"grad_norm": 0.6252418160438538,
"learning_rate": 6.857142857142858e-06,
"loss": 1.134469985961914,
"step": 25
},
{
"epoch": 0.037463976945244955,
"grad_norm": 0.4665541648864746,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.9274802207946777,
"step": 26
},
{
"epoch": 0.03890489913544669,
"grad_norm": 0.47294583916664124,
"learning_rate": 7.428571428571429e-06,
"loss": 0.9820688962936401,
"step": 27
},
{
"epoch": 0.040345821325648415,
"grad_norm": 0.4679860770702362,
"learning_rate": 7.714285714285716e-06,
"loss": 1.0257033109664917,
"step": 28
},
{
"epoch": 0.04178674351585014,
"grad_norm": 0.44461655616760254,
"learning_rate": 8.000000000000001e-06,
"loss": 0.8461464643478394,
"step": 29
},
{
"epoch": 0.043227665706051875,
"grad_norm": 0.5527508854866028,
"learning_rate": 8.285714285714287e-06,
"loss": 1.014765739440918,
"step": 30
},
{
"epoch": 0.0446685878962536,
"grad_norm": 0.44049155712127686,
"learning_rate": 8.571428571428571e-06,
"loss": 1.056575059890747,
"step": 31
},
{
"epoch": 0.04610951008645533,
"grad_norm": 0.46883395314216614,
"learning_rate": 8.857142857142858e-06,
"loss": 1.078040599822998,
"step": 32
},
{
"epoch": 0.04755043227665706,
"grad_norm": 0.40662866830825806,
"learning_rate": 9.142857142857144e-06,
"loss": 1.0002977848052979,
"step": 33
},
{
"epoch": 0.04899135446685879,
"grad_norm": 0.4658549129962921,
"learning_rate": 9.42857142857143e-06,
"loss": 1.2170262336730957,
"step": 34
},
{
"epoch": 0.05043227665706052,
"grad_norm": 0.43307650089263916,
"learning_rate": 9.714285714285715e-06,
"loss": 1.0410033464431763,
"step": 35
},
{
"epoch": 0.05187319884726225,
"grad_norm": 0.4385557174682617,
"learning_rate": 1e-05,
"loss": 1.0051664113998413,
"step": 36
},
{
"epoch": 0.053314121037463975,
"grad_norm": 0.376889705657959,
"learning_rate": 1.0285714285714285e-05,
"loss": 0.9119026064872742,
"step": 37
},
{
"epoch": 0.05475504322766571,
"grad_norm": 0.4563588798046112,
"learning_rate": 1.0571428571428572e-05,
"loss": 1.0321323871612549,
"step": 38
},
{
"epoch": 0.056195965417867436,
"grad_norm": 0.42952460050582886,
"learning_rate": 1.0857142857142858e-05,
"loss": 1.1022179126739502,
"step": 39
},
{
"epoch": 0.05763688760806916,
"grad_norm": 0.4148353934288025,
"learning_rate": 1.1142857142857143e-05,
"loss": 1.003936767578125,
"step": 40
},
{
"epoch": 0.059077809798270896,
"grad_norm": 0.4105982482433319,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.9413349628448486,
"step": 41
},
{
"epoch": 0.06051873198847262,
"grad_norm": 0.4209625720977783,
"learning_rate": 1.1714285714285716e-05,
"loss": 1.0281121730804443,
"step": 42
},
{
"epoch": 0.06195965417867435,
"grad_norm": 0.4445074498653412,
"learning_rate": 1.2e-05,
"loss": 1.103991985321045,
"step": 43
},
{
"epoch": 0.06340057636887608,
"grad_norm": 0.4571291506290436,
"learning_rate": 1.2285714285714288e-05,
"loss": 1.130021572113037,
"step": 44
},
{
"epoch": 0.06484149855907781,
"grad_norm": 0.3988986909389496,
"learning_rate": 1.2571428571428572e-05,
"loss": 0.9458773136138916,
"step": 45
},
{
"epoch": 0.06628242074927954,
"grad_norm": 0.44278568029403687,
"learning_rate": 1.2857142857142859e-05,
"loss": 0.8265559673309326,
"step": 46
},
{
"epoch": 0.06772334293948126,
"grad_norm": 0.4381110966205597,
"learning_rate": 1.3142857142857145e-05,
"loss": 0.8585218191146851,
"step": 47
},
{
"epoch": 0.069164265129683,
"grad_norm": 0.436262845993042,
"learning_rate": 1.3428571428571429e-05,
"loss": 1.0744308233261108,
"step": 48
},
{
"epoch": 0.07060518731988473,
"grad_norm": 0.38992249965667725,
"learning_rate": 1.3714285714285716e-05,
"loss": 0.7727986574172974,
"step": 49
},
{
"epoch": 0.07204610951008646,
"grad_norm": 0.4699057340621948,
"learning_rate": 1.4e-05,
"loss": 1.107350468635559,
"step": 50
},
{
"epoch": 0.07348703170028818,
"grad_norm": 0.42696720361709595,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.8284908533096313,
"step": 51
},
{
"epoch": 0.07492795389048991,
"grad_norm": 0.41258543729782104,
"learning_rate": 1.4571428571428573e-05,
"loss": 0.9984976649284363,
"step": 52
},
{
"epoch": 0.07636887608069164,
"grad_norm": 0.4343370795249939,
"learning_rate": 1.4857142857142858e-05,
"loss": 1.0807418823242188,
"step": 53
},
{
"epoch": 0.07780979827089338,
"grad_norm": 0.41640159487724304,
"learning_rate": 1.5142857142857144e-05,
"loss": 0.9495835304260254,
"step": 54
},
{
"epoch": 0.0792507204610951,
"grad_norm": 0.40046796202659607,
"learning_rate": 1.542857142857143e-05,
"loss": 0.8888975381851196,
"step": 55
},
{
"epoch": 0.08069164265129683,
"grad_norm": 0.5632781386375427,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.7974977493286133,
"step": 56
},
{
"epoch": 0.08213256484149856,
"grad_norm": 0.42231452465057373,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.9832745790481567,
"step": 57
},
{
"epoch": 0.08357348703170028,
"grad_norm": 0.40361636877059937,
"learning_rate": 1.6285714285714287e-05,
"loss": 0.9870190620422363,
"step": 58
},
{
"epoch": 0.08501440922190202,
"grad_norm": 0.39357513189315796,
"learning_rate": 1.6571428571428574e-05,
"loss": 0.8166898488998413,
"step": 59
},
{
"epoch": 0.08645533141210375,
"grad_norm": 0.41122061014175415,
"learning_rate": 1.6857142857142858e-05,
"loss": 0.7634358406066895,
"step": 60
},
{
"epoch": 0.08789625360230548,
"grad_norm": 0.46859362721443176,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.9240067005157471,
"step": 61
},
{
"epoch": 0.0893371757925072,
"grad_norm": 0.45468568801879883,
"learning_rate": 1.742857142857143e-05,
"loss": 0.8535733819007874,
"step": 62
},
{
"epoch": 0.09077809798270893,
"grad_norm": 0.4963301718235016,
"learning_rate": 1.7714285714285717e-05,
"loss": 1.0492768287658691,
"step": 63
},
{
"epoch": 0.09221902017291066,
"grad_norm": 0.4315282106399536,
"learning_rate": 1.8e-05,
"loss": 1.057793378829956,
"step": 64
},
{
"epoch": 0.0936599423631124,
"grad_norm": 0.47847360372543335,
"learning_rate": 1.8285714285714288e-05,
"loss": 1.083353877067566,
"step": 65
},
{
"epoch": 0.09510086455331412,
"grad_norm": 0.4626167118549347,
"learning_rate": 1.8571428571428575e-05,
"loss": 1.078417420387268,
"step": 66
},
{
"epoch": 0.09654178674351585,
"grad_norm": 0.5010611414909363,
"learning_rate": 1.885714285714286e-05,
"loss": 0.9978217482566833,
"step": 67
},
{
"epoch": 0.09798270893371758,
"grad_norm": 0.42384764552116394,
"learning_rate": 1.9142857142857146e-05,
"loss": 0.8274828195571899,
"step": 68
},
{
"epoch": 0.0994236311239193,
"grad_norm": 0.5382806062698364,
"learning_rate": 1.942857142857143e-05,
"loss": 0.8650892972946167,
"step": 69
},
{
"epoch": 0.10086455331412104,
"grad_norm": 0.43172308802604675,
"learning_rate": 1.9714285714285718e-05,
"loss": 0.9850153923034668,
"step": 70
},
{
"epoch": 0.10230547550432277,
"grad_norm": 0.4458475410938263,
"learning_rate": 2e-05,
"loss": 0.8429934978485107,
"step": 71
},
{
"epoch": 0.1037463976945245,
"grad_norm": 0.4776175916194916,
"learning_rate": 1.999997159212662e-05,
"loss": 0.9614291191101074,
"step": 72
},
{
"epoch": 0.10518731988472622,
"grad_norm": 0.43584567308425903,
"learning_rate": 1.9999886368667875e-05,
"loss": 0.7892089486122131,
"step": 73
},
{
"epoch": 0.10662824207492795,
"grad_norm": 0.45101606845855713,
"learning_rate": 1.9999744330107972e-05,
"loss": 1.1100192070007324,
"step": 74
},
{
"epoch": 0.10806916426512968,
"grad_norm": 0.467498242855072,
"learning_rate": 1.999954547725391e-05,
"loss": 0.9660458564758301,
"step": 75
},
{
"epoch": 0.10951008645533142,
"grad_norm": 0.49766960740089417,
"learning_rate": 1.9999289811235492e-05,
"loss": 1.0705103874206543,
"step": 76
},
{
"epoch": 0.11095100864553314,
"grad_norm": 0.43898335099220276,
"learning_rate": 1.9998977333505298e-05,
"loss": 0.8018485903739929,
"step": 77
},
{
"epoch": 0.11239193083573487,
"grad_norm": 0.44093072414398193,
"learning_rate": 1.9998608045838696e-05,
"loss": 0.9763551950454712,
"step": 78
},
{
"epoch": 0.1138328530259366,
"grad_norm": 0.5379366278648376,
"learning_rate": 1.9998181950333825e-05,
"loss": 0.9606471061706543,
"step": 79
},
{
"epoch": 0.11527377521613832,
"grad_norm": 0.5516687035560608,
"learning_rate": 1.999769904941157e-05,
"loss": 1.0976412296295166,
"step": 80
},
{
"epoch": 0.11671469740634005,
"grad_norm": 0.48758774995803833,
"learning_rate": 1.9997159345815577e-05,
"loss": 0.9517735242843628,
"step": 81
},
{
"epoch": 0.11815561959654179,
"grad_norm": 0.4559188485145569,
"learning_rate": 1.9996562842612208e-05,
"loss": 0.9503711462020874,
"step": 82
},
{
"epoch": 0.11959654178674352,
"grad_norm": 0.4746760129928589,
"learning_rate": 1.999590954319054e-05,
"loss": 0.979506254196167,
"step": 83
},
{
"epoch": 0.12103746397694524,
"grad_norm": 0.43155157566070557,
"learning_rate": 1.9995199451262348e-05,
"loss": 0.87384033203125,
"step": 84
},
{
"epoch": 0.12247838616714697,
"grad_norm": 0.49892765283584595,
"learning_rate": 1.999443257086206e-05,
"loss": 0.874519944190979,
"step": 85
},
{
"epoch": 0.1239193083573487,
"grad_norm": 0.4722626507282257,
"learning_rate": 1.9993608906346778e-05,
"loss": 0.9120929837226868,
"step": 86
},
{
"epoch": 0.12536023054755044,
"grad_norm": 0.5804750323295593,
"learning_rate": 1.9992728462396207e-05,
"loss": 0.9798381924629211,
"step": 87
},
{
"epoch": 0.12680115273775217,
"grad_norm": 0.5119799375534058,
"learning_rate": 1.9991791244012654e-05,
"loss": 0.9007123708724976,
"step": 88
},
{
"epoch": 0.1282420749279539,
"grad_norm": 0.529379665851593,
"learning_rate": 1.9990797256521e-05,
"loss": 0.9782993793487549,
"step": 89
},
{
"epoch": 0.12968299711815562,
"grad_norm": 0.4421041011810303,
"learning_rate": 1.9989746505568655e-05,
"loss": 0.8241182565689087,
"step": 90
},
{
"epoch": 0.13112391930835735,
"grad_norm": 0.5022783279418945,
"learning_rate": 1.998863899712554e-05,
"loss": 1.0301092863082886,
"step": 91
},
{
"epoch": 0.13256484149855907,
"grad_norm": 0.4944583773612976,
"learning_rate": 1.998747473748405e-05,
"loss": 0.9057658910751343,
"step": 92
},
{
"epoch": 0.1340057636887608,
"grad_norm": 0.5021255016326904,
"learning_rate": 1.9986253733259004e-05,
"loss": 0.994688868522644,
"step": 93
},
{
"epoch": 0.13544668587896252,
"grad_norm": 0.5356181859970093,
"learning_rate": 1.998497599138764e-05,
"loss": 0.921332061290741,
"step": 94
},
{
"epoch": 0.13688760806916425,
"grad_norm": 0.5648020505905151,
"learning_rate": 1.9983641519129534e-05,
"loss": 0.8739620447158813,
"step": 95
},
{
"epoch": 0.138328530259366,
"grad_norm": 0.46064072847366333,
"learning_rate": 1.99822503240666e-05,
"loss": 0.8407166600227356,
"step": 96
},
{
"epoch": 0.13976945244956773,
"grad_norm": 0.5033822059631348,
"learning_rate": 1.998080241410301e-05,
"loss": 0.9476931095123291,
"step": 97
},
{
"epoch": 0.14121037463976946,
"grad_norm": 0.5009251236915588,
"learning_rate": 1.997929779746517e-05,
"loss": 0.9408062696456909,
"step": 98
},
{
"epoch": 0.14265129682997119,
"grad_norm": 0.5032044649124146,
"learning_rate": 1.997773648270168e-05,
"loss": 0.9481003284454346,
"step": 99
},
{
"epoch": 0.1440922190201729,
"grad_norm": 0.5312591791152954,
"learning_rate": 1.997611847868326e-05,
"loss": 0.8620598316192627,
"step": 100
},
{
"epoch": 0.14553314121037464,
"grad_norm": 0.49578797817230225,
"learning_rate": 1.9974443794602723e-05,
"loss": 0.8824926018714905,
"step": 101
},
{
"epoch": 0.14697406340057637,
"grad_norm": 0.49957969784736633,
"learning_rate": 1.9972712439974912e-05,
"loss": 0.8620983362197876,
"step": 102
},
{
"epoch": 0.1484149855907781,
"grad_norm": 0.5200111865997314,
"learning_rate": 1.9970924424636645e-05,
"loss": 1.0327873229980469,
"step": 103
},
{
"epoch": 0.14985590778097982,
"grad_norm": 0.5515114068984985,
"learning_rate": 1.996907975874667e-05,
"loss": 0.9890369176864624,
"step": 104
},
{
"epoch": 0.15129682997118155,
"grad_norm": 0.621155858039856,
"learning_rate": 1.9967178452785586e-05,
"loss": 1.0814683437347412,
"step": 105
},
{
"epoch": 0.15273775216138327,
"grad_norm": 0.5430831909179688,
"learning_rate": 1.9965220517555814e-05,
"loss": 0.9082866907119751,
"step": 106
},
{
"epoch": 0.15417867435158503,
"grad_norm": 0.5123117566108704,
"learning_rate": 1.9963205964181503e-05,
"loss": 0.8269040584564209,
"step": 107
},
{
"epoch": 0.15561959654178675,
"grad_norm": 0.475583553314209,
"learning_rate": 1.996113480410849e-05,
"loss": 0.9455451965332031,
"step": 108
},
{
"epoch": 0.15706051873198848,
"grad_norm": 0.48762381076812744,
"learning_rate": 1.9959007049104223e-05,
"loss": 0.7549535036087036,
"step": 109
},
{
"epoch": 0.1585014409221902,
"grad_norm": 0.5038346648216248,
"learning_rate": 1.9956822711257708e-05,
"loss": 0.7314675450325012,
"step": 110
},
{
"epoch": 0.15994236311239193,
"grad_norm": 0.5289925932884216,
"learning_rate": 1.995458180297942e-05,
"loss": 0.8430821299552917,
"step": 111
},
{
"epoch": 0.16138328530259366,
"grad_norm": 0.48994776606559753,
"learning_rate": 1.9952284337001238e-05,
"loss": 0.8332911729812622,
"step": 112
},
{
"epoch": 0.1628242074927954,
"grad_norm": 0.536970853805542,
"learning_rate": 1.9949930326376403e-05,
"loss": 0.8364700078964233,
"step": 113
},
{
"epoch": 0.1642651296829971,
"grad_norm": 0.5265873670578003,
"learning_rate": 1.994751978447939e-05,
"loss": 0.9724129438400269,
"step": 114
},
{
"epoch": 0.16570605187319884,
"grad_norm": 0.515876293182373,
"learning_rate": 1.994505272500588e-05,
"loss": 0.8738645315170288,
"step": 115
},
{
"epoch": 0.16714697406340057,
"grad_norm": 0.49826738238334656,
"learning_rate": 1.9942529161972646e-05,
"loss": 0.7798672318458557,
"step": 116
},
{
"epoch": 0.1685878962536023,
"grad_norm": 0.4744509756565094,
"learning_rate": 1.993994910971751e-05,
"loss": 0.779198944568634,
"step": 117
},
{
"epoch": 0.17002881844380405,
"grad_norm": 0.5166738033294678,
"learning_rate": 1.9937312582899224e-05,
"loss": 0.8262094855308533,
"step": 118
},
{
"epoch": 0.17146974063400577,
"grad_norm": 0.5266196131706238,
"learning_rate": 1.993461959649742e-05,
"loss": 0.9057078957557678,
"step": 119
},
{
"epoch": 0.1729106628242075,
"grad_norm": 0.46664103865623474,
"learning_rate": 1.9931870165812492e-05,
"loss": 0.8845268487930298,
"step": 120
},
{
"epoch": 0.17435158501440923,
"grad_norm": 0.49491918087005615,
"learning_rate": 1.9929064306465543e-05,
"loss": 1.0086660385131836,
"step": 121
},
{
"epoch": 0.17579250720461095,
"grad_norm": 0.6369159817695618,
"learning_rate": 1.992620203439827e-05,
"loss": 0.8299688100814819,
"step": 122
},
{
"epoch": 0.17723342939481268,
"grad_norm": 0.5914552211761475,
"learning_rate": 1.9923283365872886e-05,
"loss": 0.6999752521514893,
"step": 123
},
{
"epoch": 0.1786743515850144,
"grad_norm": 0.5571977496147156,
"learning_rate": 1.9920308317472023e-05,
"loss": 0.899816632270813,
"step": 124
},
{
"epoch": 0.18011527377521613,
"grad_norm": 0.5898154377937317,
"learning_rate": 1.9917276906098643e-05,
"loss": 1.0478678941726685,
"step": 125
},
{
"epoch": 0.18155619596541786,
"grad_norm": 0.5443904399871826,
"learning_rate": 1.991418914897593e-05,
"loss": 0.8770928978919983,
"step": 126
},
{
"epoch": 0.1829971181556196,
"grad_norm": 0.43962040543556213,
"learning_rate": 1.9911045063647214e-05,
"loss": 0.7189308404922485,
"step": 127
},
{
"epoch": 0.1844380403458213,
"grad_norm": 0.46299442648887634,
"learning_rate": 1.9907844667975847e-05,
"loss": 0.7834997177124023,
"step": 128
},
{
"epoch": 0.18587896253602307,
"grad_norm": 0.5070964694023132,
"learning_rate": 1.9904587980145117e-05,
"loss": 0.9666174650192261,
"step": 129
},
{
"epoch": 0.1873198847262248,
"grad_norm": 0.5512601733207703,
"learning_rate": 1.990127501865814e-05,
"loss": 1.0248420238494873,
"step": 130
},
{
"epoch": 0.18876080691642652,
"grad_norm": 0.4986175000667572,
"learning_rate": 1.989790580233775e-05,
"loss": 0.8395522832870483,
"step": 131
},
{
"epoch": 0.19020172910662825,
"grad_norm": 0.5736032724380493,
"learning_rate": 1.989448035032641e-05,
"loss": 0.9717695713043213,
"step": 132
},
{
"epoch": 0.19164265129682997,
"grad_norm": 0.560399055480957,
"learning_rate": 1.989099868208607e-05,
"loss": 0.968644917011261,
"step": 133
},
{
"epoch": 0.1930835734870317,
"grad_norm": 0.49599337577819824,
"learning_rate": 1.9887460817398093e-05,
"loss": 0.9145469069480896,
"step": 134
},
{
"epoch": 0.19452449567723343,
"grad_norm": 0.5436801910400391,
"learning_rate": 1.9883866776363123e-05,
"loss": 1.0251821279525757,
"step": 135
},
{
"epoch": 0.19596541786743515,
"grad_norm": 0.5881351232528687,
"learning_rate": 1.9880216579400972e-05,
"loss": 0.9223377108573914,
"step": 136
},
{
"epoch": 0.19740634005763688,
"grad_norm": 0.517955482006073,
"learning_rate": 1.9876510247250506e-05,
"loss": 0.8712332248687744,
"step": 137
},
{
"epoch": 0.1988472622478386,
"grad_norm": 0.5075203776359558,
"learning_rate": 1.9872747800969526e-05,
"loss": 0.9759121537208557,
"step": 138
},
{
"epoch": 0.20028818443804033,
"grad_norm": 0.4947361946105957,
"learning_rate": 1.9868929261934657e-05,
"loss": 0.920539140701294,
"step": 139
},
{
"epoch": 0.2017291066282421,
"grad_norm": 0.693020761013031,
"learning_rate": 1.986505465184121e-05,
"loss": 0.9794489145278931,
"step": 140
},
{
"epoch": 0.20317002881844382,
"grad_norm": 0.5060868859291077,
"learning_rate": 1.986112399270307e-05,
"loss": 0.9132547974586487,
"step": 141
},
{
"epoch": 0.20461095100864554,
"grad_norm": 0.5373198390007019,
"learning_rate": 1.985713730685257e-05,
"loss": 0.9875497817993164,
"step": 142
},
{
"epoch": 0.20605187319884727,
"grad_norm": 0.4931413531303406,
"learning_rate": 1.985309461694037e-05,
"loss": 0.8547732830047607,
"step": 143
},
{
"epoch": 0.207492795389049,
"grad_norm": 0.5622901916503906,
"learning_rate": 1.9848995945935305e-05,
"loss": 0.7855159640312195,
"step": 144
},
{
"epoch": 0.20893371757925072,
"grad_norm": 0.6368292570114136,
"learning_rate": 1.984484131712429e-05,
"loss": 0.8236104249954224,
"step": 145
},
{
"epoch": 0.21037463976945245,
"grad_norm": 0.4789673089981079,
"learning_rate": 1.9840630754112152e-05,
"loss": 0.7789024114608765,
"step": 146
},
{
"epoch": 0.21181556195965417,
"grad_norm": 0.5207936763763428,
"learning_rate": 1.9836364280821522e-05,
"loss": 0.8215268850326538,
"step": 147
},
{
"epoch": 0.2132564841498559,
"grad_norm": 0.5288500189781189,
"learning_rate": 1.9832041921492688e-05,
"loss": 0.883397102355957,
"step": 148
},
{
"epoch": 0.21469740634005763,
"grad_norm": 0.4870454967021942,
"learning_rate": 1.9827663700683454e-05,
"loss": 0.903971791267395,
"step": 149
},
{
"epoch": 0.21613832853025935,
"grad_norm": 0.468478798866272,
"learning_rate": 1.982322964326901e-05,
"loss": 0.7198023796081543,
"step": 150
},
{
"epoch": 0.21757925072046108,
"grad_norm": 0.5701258182525635,
"learning_rate": 1.9818739774441784e-05,
"loss": 0.8818264007568359,
"step": 151
},
{
"epoch": 0.21902017291066284,
"grad_norm": 0.5318727493286133,
"learning_rate": 1.98141941197113e-05,
"loss": 0.7901202440261841,
"step": 152
},
{
"epoch": 0.22046109510086456,
"grad_norm": 0.6329215168952942,
"learning_rate": 1.980959270490404e-05,
"loss": 0.9011144638061523,
"step": 153
},
{
"epoch": 0.2219020172910663,
"grad_norm": 0.5430089235305786,
"learning_rate": 1.980493555616328e-05,
"loss": 0.8788041472434998,
"step": 154
},
{
"epoch": 0.22334293948126802,
"grad_norm": 0.5590965747833252,
"learning_rate": 1.980022269994896e-05,
"loss": 0.9463634490966797,
"step": 155
},
{
"epoch": 0.22478386167146974,
"grad_norm": 0.5332587361335754,
"learning_rate": 1.9795454163037523e-05,
"loss": 0.8602564334869385,
"step": 156
},
{
"epoch": 0.22622478386167147,
"grad_norm": 0.5108247399330139,
"learning_rate": 1.9790629972521772e-05,
"loss": 0.956876277923584,
"step": 157
},
{
"epoch": 0.2276657060518732,
"grad_norm": 0.48440778255462646,
"learning_rate": 1.97857501558107e-05,
"loss": 0.8002164363861084,
"step": 158
},
{
"epoch": 0.22910662824207492,
"grad_norm": 0.5165001749992371,
"learning_rate": 1.9780814740629357e-05,
"loss": 0.7904690504074097,
"step": 159
},
{
"epoch": 0.23054755043227665,
"grad_norm": 0.5920788049697876,
"learning_rate": 1.9775823755018665e-05,
"loss": 1.004716157913208,
"step": 160
},
{
"epoch": 0.23198847262247838,
"grad_norm": 0.46533599495887756,
"learning_rate": 1.9770777227335292e-05,
"loss": 0.7796809673309326,
"step": 161
},
{
"epoch": 0.2334293948126801,
"grad_norm": 0.5575913786888123,
"learning_rate": 1.976567518625145e-05,
"loss": 0.7896379232406616,
"step": 162
},
{
"epoch": 0.23487031700288186,
"grad_norm": 0.5064666271209717,
"learning_rate": 1.976051766075477e-05,
"loss": 0.8482154607772827,
"step": 163
},
{
"epoch": 0.23631123919308358,
"grad_norm": 0.5755187273025513,
"learning_rate": 1.9755304680148125e-05,
"loss": 0.9588966965675354,
"step": 164
},
{
"epoch": 0.2377521613832853,
"grad_norm": 0.5244006514549255,
"learning_rate": 1.9750036274049447e-05,
"loss": 0.8246345520019531,
"step": 165
},
{
"epoch": 0.23919308357348704,
"grad_norm": 0.5705826878547668,
"learning_rate": 1.974471247239158e-05,
"loss": 0.9529173374176025,
"step": 166
},
{
"epoch": 0.24063400576368876,
"grad_norm": 0.5782693028450012,
"learning_rate": 1.97393333054221e-05,
"loss": 0.9156125783920288,
"step": 167
},
{
"epoch": 0.2420749279538905,
"grad_norm": 0.5548809170722961,
"learning_rate": 1.9733898803703145e-05,
"loss": 0.87775719165802,
"step": 168
},
{
"epoch": 0.24351585014409222,
"grad_norm": 0.5045956373214722,
"learning_rate": 1.972840899811125e-05,
"loss": 0.8338059782981873,
"step": 169
},
{
"epoch": 0.24495677233429394,
"grad_norm": 0.4768422245979309,
"learning_rate": 1.9722863919837146e-05,
"loss": 0.8089622259140015,
"step": 170
},
{
"epoch": 0.24639769452449567,
"grad_norm": 0.5043761134147644,
"learning_rate": 1.9717263600385614e-05,
"loss": 0.8071548938751221,
"step": 171
},
{
"epoch": 0.2478386167146974,
"grad_norm": 0.449352502822876,
"learning_rate": 1.9711608071575285e-05,
"loss": 0.7377144694328308,
"step": 172
},
{
"epoch": 0.24927953890489912,
"grad_norm": 0.5425634980201721,
"learning_rate": 1.970589736553847e-05,
"loss": 0.9347266554832458,
"step": 173
},
{
"epoch": 0.2507204610951009,
"grad_norm": 0.5423866510391235,
"learning_rate": 1.970013151472097e-05,
"loss": 0.9006168842315674,
"step": 174
},
{
"epoch": 0.2521613832853026,
"grad_norm": 0.46786877512931824,
"learning_rate": 1.96943105518819e-05,
"loss": 0.8953101634979248,
"step": 175
},
{
"epoch": 0.25360230547550433,
"grad_norm": 0.5013384222984314,
"learning_rate": 1.968843451009349e-05,
"loss": 0.9531070590019226,
"step": 176
},
{
"epoch": 0.25504322766570603,
"grad_norm": 0.5079538226127625,
"learning_rate": 1.9682503422740915e-05,
"loss": 0.7452487945556641,
"step": 177
},
{
"epoch": 0.2564841498559078,
"grad_norm": 0.48233309388160706,
"learning_rate": 1.967651732352209e-05,
"loss": 0.78824782371521,
"step": 178
},
{
"epoch": 0.2579250720461095,
"grad_norm": 0.4961184859275818,
"learning_rate": 1.9670476246447484e-05,
"loss": 0.7305347919464111,
"step": 179
},
{
"epoch": 0.25936599423631124,
"grad_norm": 0.5201149582862854,
"learning_rate": 1.966438022583993e-05,
"loss": 0.896546483039856,
"step": 180
},
{
"epoch": 0.260806916426513,
"grad_norm": 0.5316110849380493,
"learning_rate": 1.9658229296334416e-05,
"loss": 0.9205869436264038,
"step": 181
},
{
"epoch": 0.2622478386167147,
"grad_norm": 0.4991670548915863,
"learning_rate": 1.9652023492877915e-05,
"loss": 0.7877059578895569,
"step": 182
},
{
"epoch": 0.26368876080691644,
"grad_norm": 0.5440250635147095,
"learning_rate": 1.964576285072916e-05,
"loss": 0.7989544868469238,
"step": 183
},
{
"epoch": 0.26512968299711814,
"grad_norm": 0.5110585689544678,
"learning_rate": 1.963944740545846e-05,
"loss": 0.861824631690979,
"step": 184
},
{
"epoch": 0.2665706051873199,
"grad_norm": 0.5013352632522583,
"learning_rate": 1.9633077192947486e-05,
"loss": 0.7943878173828125,
"step": 185
},
{
"epoch": 0.2680115273775216,
"grad_norm": 0.4938945472240448,
"learning_rate": 1.9626652249389076e-05,
"loss": 0.7950688600540161,
"step": 186
},
{
"epoch": 0.26945244956772335,
"grad_norm": 0.5503535270690918,
"learning_rate": 1.9620172611287028e-05,
"loss": 0.7456330060958862,
"step": 187
},
{
"epoch": 0.27089337175792505,
"grad_norm": 0.5915765166282654,
"learning_rate": 1.9613638315455888e-05,
"loss": 0.803912878036499,
"step": 188
},
{
"epoch": 0.2723342939481268,
"grad_norm": 0.5289369821548462,
"learning_rate": 1.9607049399020746e-05,
"loss": 0.8069210052490234,
"step": 189
},
{
"epoch": 0.2737752161383285,
"grad_norm": 0.6146844029426575,
"learning_rate": 1.9600405899417026e-05,
"loss": 0.9639400243759155,
"step": 190
},
{
"epoch": 0.27521613832853026,
"grad_norm": 0.5518732070922852,
"learning_rate": 1.9593707854390263e-05,
"loss": 0.8917955160140991,
"step": 191
},
{
"epoch": 0.276657060518732,
"grad_norm": 0.5757451057434082,
"learning_rate": 1.9586955301995903e-05,
"loss": 1.0030066967010498,
"step": 192
},
{
"epoch": 0.2780979827089337,
"grad_norm": 0.6039057374000549,
"learning_rate": 1.9580148280599075e-05,
"loss": 0.770416796207428,
"step": 193
},
{
"epoch": 0.27953890489913547,
"grad_norm": 0.5454704761505127,
"learning_rate": 1.957328682887438e-05,
"loss": 0.7988390326499939,
"step": 194
},
{
"epoch": 0.28097982708933716,
"grad_norm": 0.5586422085762024,
"learning_rate": 1.9566370985805666e-05,
"loss": 0.8730517625808716,
"step": 195
},
{
"epoch": 0.2824207492795389,
"grad_norm": 0.5465781092643738,
"learning_rate": 1.9559400790685813e-05,
"loss": 0.984703779220581,
"step": 196
},
{
"epoch": 0.2838616714697406,
"grad_norm": 0.5505930185317993,
"learning_rate": 1.9552376283116508e-05,
"loss": 0.8712029457092285,
"step": 197
},
{
"epoch": 0.28530259365994237,
"grad_norm": 0.49047884345054626,
"learning_rate": 1.9545297503008014e-05,
"loss": 0.9437016844749451,
"step": 198
},
{
"epoch": 0.28674351585014407,
"grad_norm": 0.5203781127929688,
"learning_rate": 1.953816449057895e-05,
"loss": 0.9115222096443176,
"step": 199
},
{
"epoch": 0.2881844380403458,
"grad_norm": 0.5116919279098511,
"learning_rate": 1.9530977286356053e-05,
"loss": 0.8222418427467346,
"step": 200
},
{
"epoch": 0.2896253602305475,
"grad_norm": 0.5122188329696655,
"learning_rate": 1.9523735931173964e-05,
"loss": 0.7692494988441467,
"step": 201
},
{
"epoch": 0.2910662824207493,
"grad_norm": 0.48403820395469666,
"learning_rate": 1.951644046617499e-05,
"loss": 0.7008178234100342,
"step": 202
},
{
"epoch": 0.29250720461095103,
"grad_norm": 0.5577651262283325,
"learning_rate": 1.950909093280885e-05,
"loss": 0.9718255996704102,
"step": 203
},
{
"epoch": 0.29394812680115273,
"grad_norm": 0.5913541913032532,
"learning_rate": 1.9501687372832466e-05,
"loss": 1.0085289478302002,
"step": 204
},
{
"epoch": 0.2953890489913545,
"grad_norm": 0.6354373097419739,
"learning_rate": 1.9494229828309724e-05,
"loss": 0.9526057243347168,
"step": 205
},
{
"epoch": 0.2968299711815562,
"grad_norm": 0.5196180939674377,
"learning_rate": 1.948671834161122e-05,
"loss": 0.9512363076210022,
"step": 206
},
{
"epoch": 0.29827089337175794,
"grad_norm": 0.49973103404045105,
"learning_rate": 1.947915295541402e-05,
"loss": 0.8308489322662354,
"step": 207
},
{
"epoch": 0.29971181556195964,
"grad_norm": 0.5216118693351746,
"learning_rate": 1.947153371270144e-05,
"loss": 0.8502541780471802,
"step": 208
},
{
"epoch": 0.3011527377521614,
"grad_norm": 0.6129580140113831,
"learning_rate": 1.946386065676277e-05,
"loss": 0.8418374061584473,
"step": 209
},
{
"epoch": 0.3025936599423631,
"grad_norm": 0.5305296182632446,
"learning_rate": 1.945613383119305e-05,
"loss": 0.8838613033294678,
"step": 210
},
{
"epoch": 0.30403458213256485,
"grad_norm": 0.5104044079780579,
"learning_rate": 1.944835327989282e-05,
"loss": 0.8677416443824768,
"step": 211
},
{
"epoch": 0.30547550432276654,
"grad_norm": 0.5255080461502075,
"learning_rate": 1.944051904706786e-05,
"loss": 0.8306810855865479,
"step": 212
},
{
"epoch": 0.3069164265129683,
"grad_norm": 0.5854450464248657,
"learning_rate": 1.9432631177228948e-05,
"loss": 0.9153035879135132,
"step": 213
},
{
"epoch": 0.30835734870317005,
"grad_norm": 0.6011139154434204,
"learning_rate": 1.942468971519161e-05,
"loss": 0.8749545812606812,
"step": 214
},
{
"epoch": 0.30979827089337175,
"grad_norm": 0.6026611924171448,
"learning_rate": 1.941669470607585e-05,
"loss": 0.9470349550247192,
"step": 215
},
{
"epoch": 0.3112391930835735,
"grad_norm": 0.5269956588745117,
"learning_rate": 1.9408646195305914e-05,
"loss": 0.8375416398048401,
"step": 216
},
{
"epoch": 0.3126801152737752,
"grad_norm": 0.5777774453163147,
"learning_rate": 1.940054422861002e-05,
"loss": 0.8614240884780884,
"step": 217
},
{
"epoch": 0.31412103746397696,
"grad_norm": 0.5759449601173401,
"learning_rate": 1.939238885202009e-05,
"loss": 0.886568546295166,
"step": 218
},
{
"epoch": 0.31556195965417866,
"grad_norm": 0.6054496765136719,
"learning_rate": 1.9384180111871502e-05,
"loss": 0.9906665086746216,
"step": 219
},
{
"epoch": 0.3170028818443804,
"grad_norm": 0.5911257863044739,
"learning_rate": 1.9375918054802836e-05,
"loss": 0.8998844623565674,
"step": 220
},
{
"epoch": 0.3184438040345821,
"grad_norm": 0.5230793356895447,
"learning_rate": 1.936760272775558e-05,
"loss": 0.8659279346466064,
"step": 221
},
{
"epoch": 0.31988472622478387,
"grad_norm": 0.5517547130584717,
"learning_rate": 1.935923417797389e-05,
"loss": 0.8853185176849365,
"step": 222
},
{
"epoch": 0.32132564841498557,
"grad_norm": 0.5559066534042358,
"learning_rate": 1.9350812453004303e-05,
"loss": 0.8165359497070312,
"step": 223
},
{
"epoch": 0.3227665706051873,
"grad_norm": 0.7742547988891602,
"learning_rate": 1.934233760069548e-05,
"loss": 0.9410666227340698,
"step": 224
},
{
"epoch": 0.3242074927953891,
"grad_norm": 0.5944716334342957,
"learning_rate": 1.933380966919792e-05,
"loss": 0.8204824328422546,
"step": 225
},
{
"epoch": 0.3256484149855908,
"grad_norm": 0.5238089561462402,
"learning_rate": 1.9325228706963716e-05,
"loss": 0.7781552672386169,
"step": 226
},
{
"epoch": 0.3270893371757925,
"grad_norm": 0.5538492202758789,
"learning_rate": 1.9316594762746238e-05,
"loss": 0.861137330532074,
"step": 227
},
{
"epoch": 0.3285302593659942,
"grad_norm": 0.49301236867904663,
"learning_rate": 1.9307907885599883e-05,
"loss": 0.7672471404075623,
"step": 228
},
{
"epoch": 0.329971181556196,
"grad_norm": 0.5632166266441345,
"learning_rate": 1.9299168124879798e-05,
"loss": 0.9043705463409424,
"step": 229
},
{
"epoch": 0.3314121037463977,
"grad_norm": 0.517174482345581,
"learning_rate": 1.9290375530241577e-05,
"loss": 0.8089410662651062,
"step": 230
},
{
"epoch": 0.33285302593659943,
"grad_norm": 0.5550019145011902,
"learning_rate": 1.9281530151641016e-05,
"loss": 0.8886780142784119,
"step": 231
},
{
"epoch": 0.33429394812680113,
"grad_norm": 0.6222152709960938,
"learning_rate": 1.9272632039333784e-05,
"loss": 0.7788711786270142,
"step": 232
},
{
"epoch": 0.3357348703170029,
"grad_norm": 0.5116149187088013,
"learning_rate": 1.9263681243875173e-05,
"loss": 0.8461220264434814,
"step": 233
},
{
"epoch": 0.3371757925072046,
"grad_norm": 0.5942230820655823,
"learning_rate": 1.92546778161198e-05,
"loss": 0.856147289276123,
"step": 234
},
{
"epoch": 0.33861671469740634,
"grad_norm": 0.5200196504592896,
"learning_rate": 1.9245621807221306e-05,
"loss": 0.7370104789733887,
"step": 235
},
{
"epoch": 0.3400576368876081,
"grad_norm": 0.4902980625629425,
"learning_rate": 1.9236513268632085e-05,
"loss": 0.8132247924804688,
"step": 236
},
{
"epoch": 0.3414985590778098,
"grad_norm": 0.5125890374183655,
"learning_rate": 1.922735225210298e-05,
"loss": 0.8135820627212524,
"step": 237
},
{
"epoch": 0.34293948126801155,
"grad_norm": 0.6545057892799377,
"learning_rate": 1.9218138809682988e-05,
"loss": 0.9990659952163696,
"step": 238
},
{
"epoch": 0.34438040345821325,
"grad_norm": 0.45519211888313293,
"learning_rate": 1.9208872993718967e-05,
"loss": 0.6385080814361572,
"step": 239
},
{
"epoch": 0.345821325648415,
"grad_norm": 0.5084793567657471,
"learning_rate": 1.919955485685535e-05,
"loss": 0.8031101226806641,
"step": 240
},
{
"epoch": 0.3472622478386167,
"grad_norm": 0.5788770318031311,
"learning_rate": 1.9190184452033828e-05,
"loss": 0.8098981380462646,
"step": 241
},
{
"epoch": 0.34870317002881845,
"grad_norm": 0.5901806354522705,
"learning_rate": 1.9180761832493045e-05,
"loss": 0.770499050617218,
"step": 242
},
{
"epoch": 0.35014409221902015,
"grad_norm": 0.5531857013702393,
"learning_rate": 1.917128705176833e-05,
"loss": 0.7973406314849854,
"step": 243
},
{
"epoch": 0.3515850144092219,
"grad_norm": 0.533688485622406,
"learning_rate": 1.9161760163691347e-05,
"loss": 0.8184660077095032,
"step": 244
},
{
"epoch": 0.3530259365994236,
"grad_norm": 0.6230199933052063,
"learning_rate": 1.915218122238983e-05,
"loss": 0.9237667322158813,
"step": 245
},
{
"epoch": 0.35446685878962536,
"grad_norm": 0.5710411667823792,
"learning_rate": 1.9142550282287247e-05,
"loss": 0.9484732747077942,
"step": 246
},
{
"epoch": 0.3559077809798271,
"grad_norm": 0.5010262727737427,
"learning_rate": 1.9132867398102498e-05,
"loss": 0.7892597317695618,
"step": 247
},
{
"epoch": 0.3573487031700288,
"grad_norm": 0.5981287956237793,
"learning_rate": 1.912313262484962e-05,
"loss": 0.8831825852394104,
"step": 248
},
{
"epoch": 0.35878962536023057,
"grad_norm": 0.5648021697998047,
"learning_rate": 1.911334601783745e-05,
"loss": 0.7330418825149536,
"step": 249
},
{
"epoch": 0.36023054755043227,
"grad_norm": 0.540814220905304,
"learning_rate": 1.910350763266933e-05,
"loss": 0.8082142472267151,
"step": 250
},
{
"epoch": 0.361671469740634,
"grad_norm": 0.5894553065299988,
"learning_rate": 1.9093617525242772e-05,
"loss": 0.9762543439865112,
"step": 251
},
{
"epoch": 0.3631123919308357,
"grad_norm": 0.5327045321464539,
"learning_rate": 1.9083675751749174e-05,
"loss": 0.7824052572250366,
"step": 252
},
{
"epoch": 0.3645533141210375,
"grad_norm": 0.5246372818946838,
"learning_rate": 1.907368236867345e-05,
"loss": 0.7279900312423706,
"step": 253
},
{
"epoch": 0.3659942363112392,
"grad_norm": 0.5794147849082947,
"learning_rate": 1.9063637432793757e-05,
"loss": 0.7523423433303833,
"step": 254
},
{
"epoch": 0.36743515850144093,
"grad_norm": 0.5827282071113586,
"learning_rate": 1.9053541001181156e-05,
"loss": 0.8881576061248779,
"step": 255
},
{
"epoch": 0.3688760806916426,
"grad_norm": 0.5458179712295532,
"learning_rate": 1.9043393131199266e-05,
"loss": 0.7404780983924866,
"step": 256
},
{
"epoch": 0.3703170028818444,
"grad_norm": 0.5116549134254456,
"learning_rate": 1.9033193880503976e-05,
"loss": 0.846668004989624,
"step": 257
},
{
"epoch": 0.37175792507204614,
"grad_norm": 0.5459802746772766,
"learning_rate": 1.9022943307043085e-05,
"loss": 0.7665458917617798,
"step": 258
},
{
"epoch": 0.37319884726224783,
"grad_norm": 0.584256649017334,
"learning_rate": 1.9012641469055998e-05,
"loss": 0.9224525690078735,
"step": 259
},
{
"epoch": 0.3746397694524496,
"grad_norm": 0.5947668552398682,
"learning_rate": 1.9002288425073367e-05,
"loss": 0.8733052611351013,
"step": 260
},
{
"epoch": 0.3760806916426513,
"grad_norm": 0.5581645369529724,
"learning_rate": 1.8991884233916795e-05,
"loss": 0.9351893663406372,
"step": 261
},
{
"epoch": 0.37752161383285304,
"grad_norm": 0.5662564039230347,
"learning_rate": 1.8981428954698466e-05,
"loss": 0.841423749923706,
"step": 262
},
{
"epoch": 0.37896253602305474,
"grad_norm": 0.5291077494621277,
"learning_rate": 1.8970922646820825e-05,
"loss": 0.8083174228668213,
"step": 263
},
{
"epoch": 0.3804034582132565,
"grad_norm": 0.549354612827301,
"learning_rate": 1.8960365369976256e-05,
"loss": 0.9548866152763367,
"step": 264
},
{
"epoch": 0.3818443804034582,
"grad_norm": 0.5359464883804321,
"learning_rate": 1.8949757184146706e-05,
"loss": 0.9965292811393738,
"step": 265
},
{
"epoch": 0.38328530259365995,
"grad_norm": 0.4956778287887573,
"learning_rate": 1.893909814960338e-05,
"loss": 0.7761472463607788,
"step": 266
},
{
"epoch": 0.38472622478386165,
"grad_norm": 0.6444170475006104,
"learning_rate": 1.8928388326906376e-05,
"loss": 0.9611786603927612,
"step": 267
},
{
"epoch": 0.3861671469740634,
"grad_norm": 0.5334309935569763,
"learning_rate": 1.8917627776904352e-05,
"loss": 0.7814089059829712,
"step": 268
},
{
"epoch": 0.38760806916426516,
"grad_norm": 0.5955665707588196,
"learning_rate": 1.8906816560734182e-05,
"loss": 0.7999836206436157,
"step": 269
},
{
"epoch": 0.38904899135446686,
"grad_norm": 0.5542837381362915,
"learning_rate": 1.889595473982059e-05,
"loss": 0.8167480230331421,
"step": 270
},
{
"epoch": 0.3904899135446686,
"grad_norm": 0.5896769762039185,
"learning_rate": 1.8885042375875825e-05,
"loss": 0.8907427787780762,
"step": 271
},
{
"epoch": 0.3919308357348703,
"grad_norm": 0.5406194925308228,
"learning_rate": 1.8874079530899298e-05,
"loss": 0.7174965143203735,
"step": 272
},
{
"epoch": 0.39337175792507206,
"grad_norm": 0.5626124739646912,
"learning_rate": 1.8863066267177234e-05,
"loss": 0.8048909902572632,
"step": 273
},
{
"epoch": 0.39481268011527376,
"grad_norm": 0.5179824233055115,
"learning_rate": 1.885200264728231e-05,
"loss": 0.9267027378082275,
"step": 274
},
{
"epoch": 0.3962536023054755,
"grad_norm": 0.5661771893501282,
"learning_rate": 1.884088873407331e-05,
"loss": 0.9026603698730469,
"step": 275
},
{
"epoch": 0.3976945244956772,
"grad_norm": 0.5045554637908936,
"learning_rate": 1.882972459069476e-05,
"loss": 0.6470727920532227,
"step": 276
},
{
"epoch": 0.39913544668587897,
"grad_norm": 0.56135493516922,
"learning_rate": 1.8818510280576577e-05,
"loss": 0.8532092571258545,
"step": 277
},
{
"epoch": 0.40057636887608067,
"grad_norm": 0.5583396553993225,
"learning_rate": 1.88072458674337e-05,
"loss": 0.9061315059661865,
"step": 278
},
{
"epoch": 0.4020172910662824,
"grad_norm": 0.5593348741531372,
"learning_rate": 1.8795931415265735e-05,
"loss": 0.8761183023452759,
"step": 279
},
{
"epoch": 0.4034582132564842,
"grad_norm": 0.5651080012321472,
"learning_rate": 1.8784566988356586e-05,
"loss": 0.8924948573112488,
"step": 280
},
{
"epoch": 0.4048991354466859,
"grad_norm": 0.5161260962486267,
"learning_rate": 1.877315265127409e-05,
"loss": 0.7666558623313904,
"step": 281
},
{
"epoch": 0.40634005763688763,
"grad_norm": 0.5837459564208984,
"learning_rate": 1.8761688468869658e-05,
"loss": 0.8420302867889404,
"step": 282
},
{
"epoch": 0.40778097982708933,
"grad_norm": 0.4191261827945709,
"learning_rate": 1.8750174506277902e-05,
"loss": 0.6361753940582275,
"step": 283
},
{
"epoch": 0.4092219020172911,
"grad_norm": 0.5724524855613708,
"learning_rate": 1.8738610828916255e-05,
"loss": 0.8530267477035522,
"step": 284
},
{
"epoch": 0.4106628242074928,
"grad_norm": 0.542438268661499,
"learning_rate": 1.8726997502484617e-05,
"loss": 0.8724586963653564,
"step": 285
},
{
"epoch": 0.41210374639769454,
"grad_norm": 0.6427977085113525,
"learning_rate": 1.8715334592964964e-05,
"loss": 0.9001520276069641,
"step": 286
},
{
"epoch": 0.41354466858789624,
"grad_norm": 0.5782722234725952,
"learning_rate": 1.8703622166620995e-05,
"loss": 0.7817631959915161,
"step": 287
},
{
"epoch": 0.414985590778098,
"grad_norm": 0.5602166652679443,
"learning_rate": 1.869186028999773e-05,
"loss": 0.8070363998413086,
"step": 288
},
{
"epoch": 0.4164265129682997,
"grad_norm": 0.4618314504623413,
"learning_rate": 1.868004902992115e-05,
"loss": 0.7365565299987793,
"step": 289
},
{
"epoch": 0.41786743515850144,
"grad_norm": 0.5189284682273865,
"learning_rate": 1.8668188453497814e-05,
"loss": 0.7789565324783325,
"step": 290
},
{
"epoch": 0.41930835734870314,
"grad_norm": 0.5521842241287231,
"learning_rate": 1.865627862811447e-05,
"loss": 0.8625829219818115,
"step": 291
},
{
"epoch": 0.4207492795389049,
"grad_norm": 0.5609997510910034,
"learning_rate": 1.8644319621437677e-05,
"loss": 0.7133926153182983,
"step": 292
},
{
"epoch": 0.42219020172910665,
"grad_norm": 0.541661262512207,
"learning_rate": 1.863231150141343e-05,
"loss": 0.8321035504341125,
"step": 293
},
{
"epoch": 0.42363112391930835,
"grad_norm": 0.5149182677268982,
"learning_rate": 1.8620254336266757e-05,
"loss": 0.874350905418396,
"step": 294
},
{
"epoch": 0.4250720461095101,
"grad_norm": 0.6094365119934082,
"learning_rate": 1.8608148194501343e-05,
"loss": 0.95909583568573,
"step": 295
},
{
"epoch": 0.4265129682997118,
"grad_norm": 0.545134425163269,
"learning_rate": 1.8595993144899135e-05,
"loss": 0.8478891849517822,
"step": 296
},
{
"epoch": 0.42795389048991356,
"grad_norm": 0.5844987630844116,
"learning_rate": 1.858378925651996e-05,
"loss": 0.952475905418396,
"step": 297
},
{
"epoch": 0.42939481268011526,
"grad_norm": 0.6407363414764404,
"learning_rate": 1.8571536598701114e-05,
"loss": 0.890306293964386,
"step": 298
},
{
"epoch": 0.430835734870317,
"grad_norm": 0.5291120409965515,
"learning_rate": 1.8559235241056994e-05,
"loss": 0.7820404767990112,
"step": 299
},
{
"epoch": 0.4322766570605187,
"grad_norm": 0.6523168683052063,
"learning_rate": 1.8546885253478678e-05,
"loss": 0.9190243482589722,
"step": 300
},
{
"epoch": 0.43371757925072046,
"grad_norm": 0.5536230802536011,
"learning_rate": 1.853448670613354e-05,
"loss": 0.8771055936813354,
"step": 301
},
{
"epoch": 0.43515850144092216,
"grad_norm": 0.563988208770752,
"learning_rate": 1.8522039669464863e-05,
"loss": 0.9419461488723755,
"step": 302
},
{
"epoch": 0.4365994236311239,
"grad_norm": 0.5542746186256409,
"learning_rate": 1.8509544214191403e-05,
"loss": 0.8944621086120605,
"step": 303
},
{
"epoch": 0.43804034582132567,
"grad_norm": 0.5350150465965271,
"learning_rate": 1.8497000411307035e-05,
"loss": 0.8363540768623352,
"step": 304
},
{
"epoch": 0.43948126801152737,
"grad_norm": 0.6578159928321838,
"learning_rate": 1.8484408332080298e-05,
"loss": 0.8941947221755981,
"step": 305
},
{
"epoch": 0.4409221902017291,
"grad_norm": 0.5049351453781128,
"learning_rate": 1.847176804805404e-05,
"loss": 0.7112985849380493,
"step": 306
},
{
"epoch": 0.4423631123919308,
"grad_norm": 0.5391475558280945,
"learning_rate": 1.845907963104497e-05,
"loss": 0.8202729225158691,
"step": 307
},
{
"epoch": 0.4438040345821326,
"grad_norm": 0.533073365688324,
"learning_rate": 1.844634315314329e-05,
"loss": 0.7154335379600525,
"step": 308
},
{
"epoch": 0.4452449567723343,
"grad_norm": 0.5108841061592102,
"learning_rate": 1.843355868671224e-05,
"loss": 0.809640109539032,
"step": 309
},
{
"epoch": 0.44668587896253603,
"grad_norm": 0.5836730003356934,
"learning_rate": 1.8420726304387723e-05,
"loss": 0.8588663339614868,
"step": 310
},
{
"epoch": 0.44812680115273773,
"grad_norm": 0.553754448890686,
"learning_rate": 1.840784607907788e-05,
"loss": 0.9030488729476929,
"step": 311
},
{
"epoch": 0.4495677233429395,
"grad_norm": 0.5422321557998657,
"learning_rate": 1.839491808396267e-05,
"loss": 0.9312916994094849,
"step": 312
},
{
"epoch": 0.4510086455331412,
"grad_norm": 0.571288526058197,
"learning_rate": 1.8381942392493464e-05,
"loss": 0.7808306217193604,
"step": 313
},
{
"epoch": 0.45244956772334294,
"grad_norm": 0.6292662024497986,
"learning_rate": 1.836891907839262e-05,
"loss": 0.8910583257675171,
"step": 314
},
{
"epoch": 0.4538904899135447,
"grad_norm": 0.7166162133216858,
"learning_rate": 1.8355848215653073e-05,
"loss": 0.9522457122802734,
"step": 315
},
{
"epoch": 0.4553314121037464,
"grad_norm": 0.530035138130188,
"learning_rate": 1.8342729878537903e-05,
"loss": 0.7427791357040405,
"step": 316
},
{
"epoch": 0.45677233429394815,
"grad_norm": 0.5844604969024658,
"learning_rate": 1.8329564141579924e-05,
"loss": 0.8231528997421265,
"step": 317
},
{
"epoch": 0.45821325648414984,
"grad_norm": 0.5728287100791931,
"learning_rate": 1.831635107958125e-05,
"loss": 0.7926725149154663,
"step": 318
},
{
"epoch": 0.4596541786743516,
"grad_norm": 0.5957269668579102,
"learning_rate": 1.8303090767612882e-05,
"loss": 0.7666646242141724,
"step": 319
},
{
"epoch": 0.4610951008645533,
"grad_norm": 0.5679943561553955,
"learning_rate": 1.828978328101428e-05,
"loss": 0.8417126536369324,
"step": 320
},
{
"epoch": 0.46253602305475505,
"grad_norm": 0.5592843890190125,
"learning_rate": 1.8276428695392908e-05,
"loss": 0.8209915161132812,
"step": 321
},
{
"epoch": 0.46397694524495675,
"grad_norm": 0.5582208633422852,
"learning_rate": 1.8263027086623852e-05,
"loss": 0.8837690353393555,
"step": 322
},
{
"epoch": 0.4654178674351585,
"grad_norm": 0.5516442656517029,
"learning_rate": 1.824957853084935e-05,
"loss": 0.8529192209243774,
"step": 323
},
{
"epoch": 0.4668587896253602,
"grad_norm": 0.46436768770217896,
"learning_rate": 1.8236083104478373e-05,
"loss": 0.7889063358306885,
"step": 324
},
{
"epoch": 0.46829971181556196,
"grad_norm": 0.5388792157173157,
"learning_rate": 1.82225408841862e-05,
"loss": 0.9407418966293335,
"step": 325
},
{
"epoch": 0.4697406340057637,
"grad_norm": 0.5538932681083679,
"learning_rate": 1.8208951946913965e-05,
"loss": 0.7872345447540283,
"step": 326
},
{
"epoch": 0.4711815561959654,
"grad_norm": 0.5267892479896545,
"learning_rate": 1.819531636986823e-05,
"loss": 0.882459282875061,
"step": 327
},
{
"epoch": 0.47262247838616717,
"grad_norm": 0.5909714698791504,
"learning_rate": 1.8181634230520537e-05,
"loss": 0.9235331416130066,
"step": 328
},
{
"epoch": 0.47406340057636887,
"grad_norm": 0.46869757771492004,
"learning_rate": 1.8167905606606995e-05,
"loss": 0.729993462562561,
"step": 329
},
{
"epoch": 0.4755043227665706,
"grad_norm": 0.5067007541656494,
"learning_rate": 1.8154130576127794e-05,
"loss": 0.782564103603363,
"step": 330
},
{
"epoch": 0.4769452449567723,
"grad_norm": 0.5801006555557251,
"learning_rate": 1.8140309217346805e-05,
"loss": 0.8379372358322144,
"step": 331
},
{
"epoch": 0.4783861671469741,
"grad_norm": 0.5479756593704224,
"learning_rate": 1.812644160879111e-05,
"loss": 0.8435878753662109,
"step": 332
},
{
"epoch": 0.47982708933717577,
"grad_norm": 0.5957703590393066,
"learning_rate": 1.8112527829250558e-05,
"loss": 0.9512555599212646,
"step": 333
},
{
"epoch": 0.4812680115273775,
"grad_norm": 0.6068620681762695,
"learning_rate": 1.809856795777733e-05,
"loss": 0.9145616292953491,
"step": 334
},
{
"epoch": 0.4827089337175792,
"grad_norm": 0.6074503064155579,
"learning_rate": 1.8084562073685482e-05,
"loss": 0.882449746131897,
"step": 335
},
{
"epoch": 0.484149855907781,
"grad_norm": 0.525236964225769,
"learning_rate": 1.807051025655048e-05,
"loss": 0.8563383221626282,
"step": 336
},
{
"epoch": 0.48559077809798273,
"grad_norm": 0.5497779250144958,
"learning_rate": 1.8056412586208784e-05,
"loss": 0.9395558834075928,
"step": 337
},
{
"epoch": 0.48703170028818443,
"grad_norm": 0.6293545365333557,
"learning_rate": 1.8042269142757354e-05,
"loss": 0.88382488489151,
"step": 338
},
{
"epoch": 0.4884726224783862,
"grad_norm": 0.577172577381134,
"learning_rate": 1.8028080006553223e-05,
"loss": 0.8618476390838623,
"step": 339
},
{
"epoch": 0.4899135446685879,
"grad_norm": 0.6339530944824219,
"learning_rate": 1.8013845258213024e-05,
"loss": 0.9440295696258545,
"step": 340
},
{
"epoch": 0.49135446685878964,
"grad_norm": 0.5603951215744019,
"learning_rate": 1.7999564978612544e-05,
"loss": 0.8309040069580078,
"step": 341
},
{
"epoch": 0.49279538904899134,
"grad_norm": 0.5788914561271667,
"learning_rate": 1.7985239248886264e-05,
"loss": 0.7537168264389038,
"step": 342
},
{
"epoch": 0.4942363112391931,
"grad_norm": 0.6304644346237183,
"learning_rate": 1.797086815042688e-05,
"loss": 0.8292367458343506,
"step": 343
},
{
"epoch": 0.4956772334293948,
"grad_norm": 0.5928136110305786,
"learning_rate": 1.7956451764884862e-05,
"loss": 0.9440184831619263,
"step": 344
},
{
"epoch": 0.49711815561959655,
"grad_norm": 0.5665022730827332,
"learning_rate": 1.7941990174167987e-05,
"loss": 0.8995509147644043,
"step": 345
},
{
"epoch": 0.49855907780979825,
"grad_norm": 0.5720584392547607,
"learning_rate": 1.7927483460440857e-05,
"loss": 0.76401686668396,
"step": 346
},
{
"epoch": 0.5,
"grad_norm": 0.5267989635467529,
"learning_rate": 1.7912931706124447e-05,
"loss": 0.8446654081344604,
"step": 347
},
{
"epoch": 0.5014409221902018,
"grad_norm": 0.5750987529754639,
"learning_rate": 1.789833499389564e-05,
"loss": 0.9858725070953369,
"step": 348
},
{
"epoch": 0.5028818443804035,
"grad_norm": 0.5349816083908081,
"learning_rate": 1.7883693406686746e-05,
"loss": 0.7775611877441406,
"step": 349
},
{
"epoch": 0.5043227665706052,
"grad_norm": 0.5641323328018188,
"learning_rate": 1.786900702768504e-05,
"loss": 0.8651770353317261,
"step": 350
},
{
"epoch": 0.5057636887608069,
"grad_norm": 0.5117978453636169,
"learning_rate": 1.7854275940332272e-05,
"loss": 0.9010483026504517,
"step": 351
},
{
"epoch": 0.5072046109510087,
"grad_norm": 0.5440452694892883,
"learning_rate": 1.7839500228324223e-05,
"loss": 0.8568730354309082,
"step": 352
},
{
"epoch": 0.5086455331412104,
"grad_norm": 0.6057641506195068,
"learning_rate": 1.78246799756102e-05,
"loss": 0.8103794455528259,
"step": 353
},
{
"epoch": 0.5100864553314121,
"grad_norm": 0.5351923108100891,
"learning_rate": 1.7809815266392575e-05,
"loss": 0.7672939300537109,
"step": 354
},
{
"epoch": 0.5115273775216138,
"grad_norm": 0.5460227131843567,
"learning_rate": 1.779490618512631e-05,
"loss": 0.8088076114654541,
"step": 355
},
{
"epoch": 0.5129682997118156,
"grad_norm": 0.5851891040802002,
"learning_rate": 1.7779952816518454e-05,
"loss": 0.9721853137016296,
"step": 356
},
{
"epoch": 0.5144092219020173,
"grad_norm": 0.5677433013916016,
"learning_rate": 1.7764955245527693e-05,
"loss": 0.8301690220832825,
"step": 357
},
{
"epoch": 0.515850144092219,
"grad_norm": 0.535126805305481,
"learning_rate": 1.7749913557363844e-05,
"loss": 0.7846548557281494,
"step": 358
},
{
"epoch": 0.5172910662824207,
"grad_norm": 0.525834321975708,
"learning_rate": 1.7734827837487386e-05,
"loss": 0.7961332201957703,
"step": 359
},
{
"epoch": 0.5187319884726225,
"grad_norm": 0.5872762203216553,
"learning_rate": 1.771969817160896e-05,
"loss": 0.9165979623794556,
"step": 360
},
{
"epoch": 0.5201729106628242,
"grad_norm": 0.6101512312889099,
"learning_rate": 1.770452464568889e-05,
"loss": 0.8378180861473083,
"step": 361
},
{
"epoch": 0.521613832853026,
"grad_norm": 0.5622274279594421,
"learning_rate": 1.7689307345936705e-05,
"loss": 0.9264764189720154,
"step": 362
},
{
"epoch": 0.5230547550432276,
"grad_norm": 0.5881224274635315,
"learning_rate": 1.767404635881062e-05,
"loss": 0.7161828279495239,
"step": 363
},
{
"epoch": 0.5244956772334294,
"grad_norm": 0.5270618796348572,
"learning_rate": 1.7658741771017076e-05,
"loss": 0.7974847555160522,
"step": 364
},
{
"epoch": 0.5259365994236311,
"grad_norm": 0.6095194816589355,
"learning_rate": 1.7643393669510236e-05,
"loss": 0.9246715307235718,
"step": 365
},
{
"epoch": 0.5273775216138329,
"grad_norm": 0.5328574776649475,
"learning_rate": 1.7628002141491477e-05,
"loss": 0.7695842981338501,
"step": 366
},
{
"epoch": 0.5288184438040345,
"grad_norm": 0.5745126605033875,
"learning_rate": 1.7612567274408925e-05,
"loss": 0.8186914920806885,
"step": 367
},
{
"epoch": 0.5302593659942363,
"grad_norm": 0.628709077835083,
"learning_rate": 1.759708915595692e-05,
"loss": 0.8628635406494141,
"step": 368
},
{
"epoch": 0.531700288184438,
"grad_norm": 0.6294564604759216,
"learning_rate": 1.7581567874075552e-05,
"loss": 0.9215906858444214,
"step": 369
},
{
"epoch": 0.5331412103746398,
"grad_norm": 0.5869788527488708,
"learning_rate": 1.7566003516950146e-05,
"loss": 0.8090559244155884,
"step": 370
},
{
"epoch": 0.5345821325648416,
"grad_norm": 0.5539984703063965,
"learning_rate": 1.755039617301075e-05,
"loss": 0.7528271675109863,
"step": 371
},
{
"epoch": 0.5360230547550432,
"grad_norm": 0.6224076747894287,
"learning_rate": 1.753474593093167e-05,
"loss": 0.8555803298950195,
"step": 372
},
{
"epoch": 0.537463976945245,
"grad_norm": 0.5650821328163147,
"learning_rate": 1.751905287963091e-05,
"loss": 0.7390874624252319,
"step": 373
},
{
"epoch": 0.5389048991354467,
"grad_norm": 0.574177086353302,
"learning_rate": 1.7503317108269722e-05,
"loss": 0.8000231385231018,
"step": 374
},
{
"epoch": 0.5403458213256485,
"grad_norm": 0.5812594294548035,
"learning_rate": 1.7487538706252062e-05,
"loss": 0.7520013451576233,
"step": 375
},
{
"epoch": 0.5417867435158501,
"grad_norm": 0.6823700666427612,
"learning_rate": 1.7471717763224096e-05,
"loss": 0.8118777275085449,
"step": 376
},
{
"epoch": 0.5432276657060519,
"grad_norm": 0.5933303236961365,
"learning_rate": 1.7455854369073703e-05,
"loss": 0.9474261999130249,
"step": 377
},
{
"epoch": 0.5446685878962536,
"grad_norm": 0.5944895148277283,
"learning_rate": 1.7439948613929928e-05,
"loss": 0.8606828451156616,
"step": 378
},
{
"epoch": 0.5461095100864554,
"grad_norm": 0.6672566533088684,
"learning_rate": 1.742400058816252e-05,
"loss": 0.9909709692001343,
"step": 379
},
{
"epoch": 0.547550432276657,
"grad_norm": 0.6100744009017944,
"learning_rate": 1.740801038238137e-05,
"loss": 1.0025837421417236,
"step": 380
},
{
"epoch": 0.5489913544668588,
"grad_norm": 0.5349178910255432,
"learning_rate": 1.7391978087436032e-05,
"loss": 0.7103726863861084,
"step": 381
},
{
"epoch": 0.5504322766570605,
"grad_norm": 0.7046380639076233,
"learning_rate": 1.737590379441518e-05,
"loss": 0.8792040348052979,
"step": 382
},
{
"epoch": 0.5518731988472623,
"grad_norm": 0.4961056411266327,
"learning_rate": 1.735978759464612e-05,
"loss": 0.6803684234619141,
"step": 383
},
{
"epoch": 0.553314121037464,
"grad_norm": 0.679291307926178,
"learning_rate": 1.734362957969423e-05,
"loss": 0.9841272830963135,
"step": 384
},
{
"epoch": 0.5547550432276657,
"grad_norm": 0.6313779950141907,
"learning_rate": 1.7327429841362494e-05,
"loss": 0.9495967030525208,
"step": 385
},
{
"epoch": 0.5561959654178674,
"grad_norm": 0.6582711935043335,
"learning_rate": 1.7311188471690925e-05,
"loss": 0.883407711982727,
"step": 386
},
{
"epoch": 0.5576368876080692,
"grad_norm": 0.5695992112159729,
"learning_rate": 1.729490556295608e-05,
"loss": 0.779451847076416,
"step": 387
},
{
"epoch": 0.5590778097982709,
"grad_norm": 0.5270829796791077,
"learning_rate": 1.7278581207670522e-05,
"loss": 0.8198345899581909,
"step": 388
},
{
"epoch": 0.5605187319884726,
"grad_norm": 0.5536486506462097,
"learning_rate": 1.72622154985823e-05,
"loss": 0.8746044635772705,
"step": 389
},
{
"epoch": 0.5619596541786743,
"grad_norm": 0.570270836353302,
"learning_rate": 1.7245808528674403e-05,
"loss": 0.7180839776992798,
"step": 390
},
{
"epoch": 0.5634005763688761,
"grad_norm": 0.5193256139755249,
"learning_rate": 1.7229360391164256e-05,
"loss": 0.7979388236999512,
"step": 391
},
{
"epoch": 0.5648414985590778,
"grad_norm": 0.6080824136734009,
"learning_rate": 1.7212871179503188e-05,
"loss": 0.8225536346435547,
"step": 392
},
{
"epoch": 0.5662824207492796,
"grad_norm": 0.5873963832855225,
"learning_rate": 1.719634098737588e-05,
"loss": 0.9406713247299194,
"step": 393
},
{
"epoch": 0.5677233429394812,
"grad_norm": 0.5575190782546997,
"learning_rate": 1.7179769908699856e-05,
"loss": 0.8183209896087646,
"step": 394
},
{
"epoch": 0.569164265129683,
"grad_norm": 0.7602899074554443,
"learning_rate": 1.716315803762494e-05,
"loss": 0.7549249529838562,
"step": 395
},
{
"epoch": 0.5706051873198847,
"grad_norm": 0.6657350659370422,
"learning_rate": 1.7146505468532707e-05,
"loss": 0.8715250492095947,
"step": 396
},
{
"epoch": 0.5720461095100865,
"grad_norm": 0.5282604694366455,
"learning_rate": 1.7129812296035985e-05,
"loss": 0.6946278810501099,
"step": 397
},
{
"epoch": 0.5734870317002881,
"grad_norm": 0.7253147959709167,
"learning_rate": 1.711307861497827e-05,
"loss": 0.8823874592781067,
"step": 398
},
{
"epoch": 0.5749279538904899,
"grad_norm": 0.5167055130004883,
"learning_rate": 1.709630452043323e-05,
"loss": 0.7666712999343872,
"step": 399
},
{
"epoch": 0.5763688760806917,
"grad_norm": 0.6416431069374084,
"learning_rate": 1.707949010770413e-05,
"loss": 0.9795528054237366,
"step": 400
},
{
"epoch": 0.5778097982708934,
"grad_norm": 0.640731930732727,
"learning_rate": 1.7062635472323306e-05,
"loss": 0.9208186864852905,
"step": 401
},
{
"epoch": 0.579250720461095,
"grad_norm": 0.549220621585846,
"learning_rate": 1.7045740710051637e-05,
"loss": 0.6796606779098511,
"step": 402
},
{
"epoch": 0.5806916426512968,
"grad_norm": 0.6743412613868713,
"learning_rate": 1.7028805916877975e-05,
"loss": 0.9954819679260254,
"step": 403
},
{
"epoch": 0.5821325648414986,
"grad_norm": 0.609154462814331,
"learning_rate": 1.7011831189018607e-05,
"loss": 0.8255324363708496,
"step": 404
},
{
"epoch": 0.5835734870317003,
"grad_norm": 0.5475680828094482,
"learning_rate": 1.6994816622916726e-05,
"loss": 0.8262126445770264,
"step": 405
},
{
"epoch": 0.5850144092219021,
"grad_norm": 0.5728087425231934,
"learning_rate": 1.697776231524185e-05,
"loss": 0.847625195980072,
"step": 406
},
{
"epoch": 0.5864553314121037,
"grad_norm": 0.5608763098716736,
"learning_rate": 1.696066836288931e-05,
"loss": 0.8561680316925049,
"step": 407
},
{
"epoch": 0.5878962536023055,
"grad_norm": 0.6582566499710083,
"learning_rate": 1.694353486297966e-05,
"loss": 0.9651436805725098,
"step": 408
},
{
"epoch": 0.5893371757925072,
"grad_norm": 0.6420081853866577,
"learning_rate": 1.6926361912858172e-05,
"loss": 0.8645302057266235,
"step": 409
},
{
"epoch": 0.590778097982709,
"grad_norm": 0.5092571377754211,
"learning_rate": 1.6909149610094245e-05,
"loss": 0.6726502180099487,
"step": 410
},
{
"epoch": 0.5922190201729106,
"grad_norm": 0.6565274596214294,
"learning_rate": 1.689189805248085e-05,
"loss": 0.8908772468566895,
"step": 411
},
{
"epoch": 0.5936599423631124,
"grad_norm": 0.5423445105552673,
"learning_rate": 1.6874607338034015e-05,
"loss": 0.815947413444519,
"step": 412
},
{
"epoch": 0.5951008645533141,
"grad_norm": 0.5798755288124084,
"learning_rate": 1.6857277564992212e-05,
"loss": 0.7958804965019226,
"step": 413
},
{
"epoch": 0.5965417867435159,
"grad_norm": 0.5115224123001099,
"learning_rate": 1.683990883181585e-05,
"loss": 0.7472406029701233,
"step": 414
},
{
"epoch": 0.5979827089337176,
"grad_norm": 0.48664143681526184,
"learning_rate": 1.6822501237186677e-05,
"loss": 0.9092934727668762,
"step": 415
},
{
"epoch": 0.5994236311239193,
"grad_norm": 0.5489537715911865,
"learning_rate": 1.680505488000725e-05,
"loss": 0.8212312459945679,
"step": 416
},
{
"epoch": 0.600864553314121,
"grad_norm": 0.643065869808197,
"learning_rate": 1.678756985940034e-05,
"loss": 1.0320334434509277,
"step": 417
},
{
"epoch": 0.6023054755043228,
"grad_norm": 0.5214179754257202,
"learning_rate": 1.6770046274708404e-05,
"loss": 0.8109217882156372,
"step": 418
},
{
"epoch": 0.6037463976945245,
"grad_norm": 0.5978414416313171,
"learning_rate": 1.6752484225493e-05,
"loss": 0.80739426612854,
"step": 419
},
{
"epoch": 0.6051873198847262,
"grad_norm": 0.5854954719543457,
"learning_rate": 1.673488381153421e-05,
"loss": 0.8603742122650146,
"step": 420
},
{
"epoch": 0.6066282420749279,
"grad_norm": 0.5928551554679871,
"learning_rate": 1.6717245132830114e-05,
"loss": 0.781597375869751,
"step": 421
},
{
"epoch": 0.6080691642651297,
"grad_norm": 0.5761047601699829,
"learning_rate": 1.6699568289596175e-05,
"loss": 0.8539882898330688,
"step": 422
},
{
"epoch": 0.6095100864553314,
"grad_norm": 0.5701265931129456,
"learning_rate": 1.66818533822647e-05,
"loss": 0.7630763053894043,
"step": 423
},
{
"epoch": 0.6109510086455331,
"grad_norm": 0.5455740094184875,
"learning_rate": 1.6664100511484252e-05,
"loss": 0.8451640009880066,
"step": 424
},
{
"epoch": 0.6123919308357348,
"grad_norm": 0.6149104833602905,
"learning_rate": 1.66463097781191e-05,
"loss": 0.8973286151885986,
"step": 425
},
{
"epoch": 0.6138328530259366,
"grad_norm": 0.6265655159950256,
"learning_rate": 1.662848128324862e-05,
"loss": 0.8606114983558655,
"step": 426
},
{
"epoch": 0.6152737752161384,
"grad_norm": 0.5285404324531555,
"learning_rate": 1.6610615128166738e-05,
"loss": 0.7635661363601685,
"step": 427
},
{
"epoch": 0.6167146974063401,
"grad_norm": 0.522072434425354,
"learning_rate": 1.659271141438135e-05,
"loss": 0.8387913703918457,
"step": 428
},
{
"epoch": 0.6181556195965417,
"grad_norm": 0.5824821591377258,
"learning_rate": 1.657477024361374e-05,
"loss": 0.8419888019561768,
"step": 429
},
{
"epoch": 0.6195965417867435,
"grad_norm": 0.5804380774497986,
"learning_rate": 1.6556791717798013e-05,
"loss": 0.8360769748687744,
"step": 430
},
{
"epoch": 0.6210374639769453,
"grad_norm": 0.6119903922080994,
"learning_rate": 1.65387759390805e-05,
"loss": 0.8125861883163452,
"step": 431
},
{
"epoch": 0.622478386167147,
"grad_norm": 0.5484216809272766,
"learning_rate": 1.65207230098192e-05,
"loss": 0.7709370851516724,
"step": 432
},
{
"epoch": 0.6239193083573487,
"grad_norm": 0.5545355081558228,
"learning_rate": 1.6502633032583173e-05,
"loss": 0.663250207901001,
"step": 433
},
{
"epoch": 0.6253602305475504,
"grad_norm": 0.5580651760101318,
"learning_rate": 1.6484506110151977e-05,
"loss": 0.7123851776123047,
"step": 434
},
{
"epoch": 0.6268011527377522,
"grad_norm": 0.6118587851524353,
"learning_rate": 1.646634234551508e-05,
"loss": 0.7323435544967651,
"step": 435
},
{
"epoch": 0.6282420749279539,
"grad_norm": 0.5335654020309448,
"learning_rate": 1.6448141841871262e-05,
"loss": 0.704369306564331,
"step": 436
},
{
"epoch": 0.6296829971181557,
"grad_norm": 0.5516027212142944,
"learning_rate": 1.6429904702628044e-05,
"loss": 0.7410569190979004,
"step": 437
},
{
"epoch": 0.6311239193083573,
"grad_norm": 0.6382977366447449,
"learning_rate": 1.64116310314011e-05,
"loss": 0.7958225011825562,
"step": 438
},
{
"epoch": 0.6325648414985591,
"grad_norm": 0.7621927261352539,
"learning_rate": 1.639332093201365e-05,
"loss": 0.9671751856803894,
"step": 439
},
{
"epoch": 0.6340057636887608,
"grad_norm": 0.5713940858840942,
"learning_rate": 1.6374974508495895e-05,
"loss": 0.8552824258804321,
"step": 440
},
{
"epoch": 0.6354466858789626,
"grad_norm": 0.6350247859954834,
"learning_rate": 1.6356591865084413e-05,
"loss": 0.8765571117401123,
"step": 441
},
{
"epoch": 0.6368876080691642,
"grad_norm": 0.5537572503089905,
"learning_rate": 1.633817310622156e-05,
"loss": 0.7851294279098511,
"step": 442
},
{
"epoch": 0.638328530259366,
"grad_norm": 0.5975667834281921,
"learning_rate": 1.631971833655489e-05,
"loss": 0.8070282936096191,
"step": 443
},
{
"epoch": 0.6397694524495677,
"grad_norm": 0.5319865345954895,
"learning_rate": 1.630122766093656e-05,
"loss": 0.8131473660469055,
"step": 444
},
{
"epoch": 0.6412103746397695,
"grad_norm": 0.635881245136261,
"learning_rate": 1.6282701184422717e-05,
"loss": 0.8592699766159058,
"step": 445
},
{
"epoch": 0.6426512968299711,
"grad_norm": 0.8621135950088501,
"learning_rate": 1.6264139012272927e-05,
"loss": 0.8750573992729187,
"step": 446
},
{
"epoch": 0.6440922190201729,
"grad_norm": 0.5308735966682434,
"learning_rate": 1.6245541249949558e-05,
"loss": 0.754439651966095,
"step": 447
},
{
"epoch": 0.6455331412103746,
"grad_norm": 0.583328366279602,
"learning_rate": 1.622690800311718e-05,
"loss": 0.9887423515319824,
"step": 448
},
{
"epoch": 0.6469740634005764,
"grad_norm": 0.5716376304626465,
"learning_rate": 1.620823937764198e-05,
"loss": 0.6914777755737305,
"step": 449
},
{
"epoch": 0.6484149855907781,
"grad_norm": 0.510493814945221,
"learning_rate": 1.618953547959115e-05,
"loss": 0.7186870574951172,
"step": 450
},
{
"epoch": 0.6498559077809798,
"grad_norm": 0.5852826833724976,
"learning_rate": 1.6170796415232278e-05,
"loss": 0.8185505867004395,
"step": 451
},
{
"epoch": 0.6512968299711815,
"grad_norm": 0.5349909067153931,
"learning_rate": 1.615202229103276e-05,
"loss": 0.8269165754318237,
"step": 452
},
{
"epoch": 0.6527377521613833,
"grad_norm": 0.5847293734550476,
"learning_rate": 1.613321321365918e-05,
"loss": 0.7098100185394287,
"step": 453
},
{
"epoch": 0.654178674351585,
"grad_norm": 0.7040385603904724,
"learning_rate": 1.6114369289976727e-05,
"loss": 0.8710377216339111,
"step": 454
},
{
"epoch": 0.6556195965417867,
"grad_norm": 0.519795298576355,
"learning_rate": 1.609549062704855e-05,
"loss": 0.788497805595398,
"step": 455
},
{
"epoch": 0.6570605187319885,
"grad_norm": 0.5721155405044556,
"learning_rate": 1.607657733213519e-05,
"loss": 0.8074417114257812,
"step": 456
},
{
"epoch": 0.6585014409221902,
"grad_norm": 0.5255273580551147,
"learning_rate": 1.6057629512693938e-05,
"loss": 0.8402494788169861,
"step": 457
},
{
"epoch": 0.659942363112392,
"grad_norm": 0.6352676153182983,
"learning_rate": 1.6038647276378246e-05,
"loss": 0.7989722490310669,
"step": 458
},
{
"epoch": 0.6613832853025937,
"grad_norm": 0.5911176800727844,
"learning_rate": 1.601963073103711e-05,
"loss": 0.8456016778945923,
"step": 459
},
{
"epoch": 0.6628242074927954,
"grad_norm": 0.5407702326774597,
"learning_rate": 1.6000579984714453e-05,
"loss": 0.9468159675598145,
"step": 460
},
{
"epoch": 0.6642651296829971,
"grad_norm": 0.6090099215507507,
"learning_rate": 1.5981495145648507e-05,
"loss": 0.8110009431838989,
"step": 461
},
{
"epoch": 0.6657060518731989,
"grad_norm": 0.6194032430648804,
"learning_rate": 1.5962376322271218e-05,
"loss": 0.7852393388748169,
"step": 462
},
{
"epoch": 0.6671469740634006,
"grad_norm": 0.620429515838623,
"learning_rate": 1.5943223623207608e-05,
"loss": 0.6961592435836792,
"step": 463
},
{
"epoch": 0.6685878962536023,
"grad_norm": 0.524815022945404,
"learning_rate": 1.5924037157275156e-05,
"loss": 0.7707295417785645,
"step": 464
},
{
"epoch": 0.670028818443804,
"grad_norm": 0.5691516399383545,
"learning_rate": 1.5904817033483216e-05,
"loss": 0.936732292175293,
"step": 465
},
{
"epoch": 0.6714697406340058,
"grad_norm": 0.5965140461921692,
"learning_rate": 1.588556336103235e-05,
"loss": 0.9365247488021851,
"step": 466
},
{
"epoch": 0.6729106628242075,
"grad_norm": 0.5429266095161438,
"learning_rate": 1.586627624931373e-05,
"loss": 0.7953989505767822,
"step": 467
},
{
"epoch": 0.6743515850144092,
"grad_norm": 0.6360976099967957,
"learning_rate": 1.584695580790853e-05,
"loss": 0.8234362602233887,
"step": 468
},
{
"epoch": 0.6757925072046109,
"grad_norm": 0.5487512350082397,
"learning_rate": 1.5827602146587277e-05,
"loss": 0.8714422583580017,
"step": 469
},
{
"epoch": 0.6772334293948127,
"grad_norm": 0.5767121315002441,
"learning_rate": 1.5808215375309243e-05,
"loss": 0.7444263696670532,
"step": 470
},
{
"epoch": 0.6786743515850144,
"grad_norm": 0.5978402495384216,
"learning_rate": 1.578879560422182e-05,
"loss": 0.7328246235847473,
"step": 471
},
{
"epoch": 0.6801152737752162,
"grad_norm": 0.6980924010276794,
"learning_rate": 1.576934294365988e-05,
"loss": 0.9335816502571106,
"step": 472
},
{
"epoch": 0.6815561959654178,
"grad_norm": 0.578369677066803,
"learning_rate": 1.574985750414518e-05,
"loss": 0.7987563610076904,
"step": 473
},
{
"epoch": 0.6829971181556196,
"grad_norm": 0.5886070728302002,
"learning_rate": 1.5730339396385684e-05,
"loss": 0.8044711351394653,
"step": 474
},
{
"epoch": 0.6844380403458213,
"grad_norm": 0.5407376885414124,
"learning_rate": 1.571078873127499e-05,
"loss": 0.7568783760070801,
"step": 475
},
{
"epoch": 0.6858789625360231,
"grad_norm": 0.5557326674461365,
"learning_rate": 1.569120561989166e-05,
"loss": 0.9148538112640381,
"step": 476
},
{
"epoch": 0.6873198847262247,
"grad_norm": 0.6909852027893066,
"learning_rate": 1.5671590173498602e-05,
"loss": 0.8718932867050171,
"step": 477
},
{
"epoch": 0.6887608069164265,
"grad_norm": 0.5995265245437622,
"learning_rate": 1.5651942503542435e-05,
"loss": 0.815102219581604,
"step": 478
},
{
"epoch": 0.6902017291066282,
"grad_norm": 0.6188293099403381,
"learning_rate": 1.563226272165287e-05,
"loss": 0.8449897766113281,
"step": 479
},
{
"epoch": 0.69164265129683,
"grad_norm": 0.5536366701126099,
"learning_rate": 1.561255093964205e-05,
"loss": 0.6867048740386963,
"step": 480
},
{
"epoch": 0.6930835734870316,
"grad_norm": 0.5375257134437561,
"learning_rate": 1.559280726950395e-05,
"loss": 0.7475601434707642,
"step": 481
},
{
"epoch": 0.6945244956772334,
"grad_norm": 0.5647540092468262,
"learning_rate": 1.557303182341369e-05,
"loss": 0.787548303604126,
"step": 482
},
{
"epoch": 0.6959654178674352,
"grad_norm": 0.6174569129943848,
"learning_rate": 1.5553224713726954e-05,
"loss": 0.806119441986084,
"step": 483
},
{
"epoch": 0.6974063400576369,
"grad_norm": 0.5920992493629456,
"learning_rate": 1.553338605297931e-05,
"loss": 0.7667924761772156,
"step": 484
},
{
"epoch": 0.6988472622478387,
"grad_norm": 0.5590972900390625,
"learning_rate": 1.55135159538856e-05,
"loss": 0.757071852684021,
"step": 485
},
{
"epoch": 0.7002881844380403,
"grad_norm": 0.7274153232574463,
"learning_rate": 1.549361452933926e-05,
"loss": 0.8193639516830444,
"step": 486
},
{
"epoch": 0.7017291066282421,
"grad_norm": 0.6040515899658203,
"learning_rate": 1.5473681892411733e-05,
"loss": 0.7873412370681763,
"step": 487
},
{
"epoch": 0.7031700288184438,
"grad_norm": 0.5784795880317688,
"learning_rate": 1.5453718156351775e-05,
"loss": 0.8369981050491333,
"step": 488
},
{
"epoch": 0.7046109510086456,
"grad_norm": 0.6267423629760742,
"learning_rate": 1.543372343458485e-05,
"loss": 0.9070534110069275,
"step": 489
},
{
"epoch": 0.7060518731988472,
"grad_norm": 0.6215807199478149,
"learning_rate": 1.541369784071246e-05,
"loss": 0.8574085235595703,
"step": 490
},
{
"epoch": 0.707492795389049,
"grad_norm": 0.6305344104766846,
"learning_rate": 1.5393641488511514e-05,
"loss": 0.8450196981430054,
"step": 491
},
{
"epoch": 0.7089337175792507,
"grad_norm": 0.5847838521003723,
"learning_rate": 1.537355449193367e-05,
"loss": 0.7976102828979492,
"step": 492
},
{
"epoch": 0.7103746397694525,
"grad_norm": 0.5669839382171631,
"learning_rate": 1.5353436965104708e-05,
"loss": 0.9691898822784424,
"step": 493
},
{
"epoch": 0.7118155619596542,
"grad_norm": 0.5651892423629761,
"learning_rate": 1.533328902232385e-05,
"loss": 0.8189429044723511,
"step": 494
},
{
"epoch": 0.7132564841498559,
"grad_norm": 0.5902916193008423,
"learning_rate": 1.5313110778063142e-05,
"loss": 0.9311359524726868,
"step": 495
},
{
"epoch": 0.7146974063400576,
"grad_norm": 0.5827770233154297,
"learning_rate": 1.5292902346966782e-05,
"loss": 0.8031175136566162,
"step": 496
},
{
"epoch": 0.7161383285302594,
"grad_norm": 0.5901687741279602,
"learning_rate": 1.5272663843850484e-05,
"loss": 0.7599796056747437,
"step": 497
},
{
"epoch": 0.7175792507204611,
"grad_norm": 0.5782361626625061,
"learning_rate": 1.5252395383700815e-05,
"loss": 0.6886292099952698,
"step": 498
},
{
"epoch": 0.7190201729106628,
"grad_norm": 0.6033108830451965,
"learning_rate": 1.5232097081674542e-05,
"loss": 0.9031864404678345,
"step": 499
},
{
"epoch": 0.7204610951008645,
"grad_norm": 0.5347855091094971,
"learning_rate": 1.5211769053097984e-05,
"loss": 0.8068576455116272,
"step": 500
},
{
"epoch": 0.7219020172910663,
"grad_norm": 0.580989420413971,
"learning_rate": 1.5191411413466357e-05,
"loss": 0.7699841260910034,
"step": 501
},
{
"epoch": 0.723342939481268,
"grad_norm": 0.5658376216888428,
"learning_rate": 1.5171024278443108e-05,
"loss": 0.8388891220092773,
"step": 502
},
{
"epoch": 0.7247838616714697,
"grad_norm": 0.5904268622398376,
"learning_rate": 1.5150607763859266e-05,
"loss": 0.9313668012619019,
"step": 503
},
{
"epoch": 0.7262247838616714,
"grad_norm": 0.5787760019302368,
"learning_rate": 1.5130161985712786e-05,
"loss": 0.7764126062393188,
"step": 504
},
{
"epoch": 0.7276657060518732,
"grad_norm": 0.6377682089805603,
"learning_rate": 1.510968706016788e-05,
"loss": 0.9689415693283081,
"step": 505
},
{
"epoch": 0.729106628242075,
"grad_norm": 0.5599706172943115,
"learning_rate": 1.5089183103554372e-05,
"loss": 0.8220781087875366,
"step": 506
},
{
"epoch": 0.7305475504322767,
"grad_norm": 0.48352760076522827,
"learning_rate": 1.506865023236702e-05,
"loss": 0.7756180763244629,
"step": 507
},
{
"epoch": 0.7319884726224783,
"grad_norm": 0.5906649827957153,
"learning_rate": 1.504808856326486e-05,
"loss": 0.8613312840461731,
"step": 508
},
{
"epoch": 0.7334293948126801,
"grad_norm": 0.7078525424003601,
"learning_rate": 1.5027498213070558e-05,
"loss": 0.9191365838050842,
"step": 509
},
{
"epoch": 0.7348703170028819,
"grad_norm": 0.5789520144462585,
"learning_rate": 1.5006879298769721e-05,
"loss": 0.7827133536338806,
"step": 510
},
{
"epoch": 0.7363112391930836,
"grad_norm": 0.6121973395347595,
"learning_rate": 1.498623193751025e-05,
"loss": 0.8028290271759033,
"step": 511
},
{
"epoch": 0.7377521613832853,
"grad_norm": 0.6222305297851562,
"learning_rate": 1.4965556246601677e-05,
"loss": 1.0173910856246948,
"step": 512
},
{
"epoch": 0.739193083573487,
"grad_norm": 0.6844464540481567,
"learning_rate": 1.4944852343514478e-05,
"loss": 0.9411607980728149,
"step": 513
},
{
"epoch": 0.7406340057636888,
"grad_norm": 0.5612237453460693,
"learning_rate": 1.4924120345879422e-05,
"loss": 0.7112851738929749,
"step": 514
},
{
"epoch": 0.7420749279538905,
"grad_norm": 0.602611780166626,
"learning_rate": 1.490336037148691e-05,
"loss": 0.811668872833252,
"step": 515
},
{
"epoch": 0.7435158501440923,
"grad_norm": 0.5829697251319885,
"learning_rate": 1.4882572538286279e-05,
"loss": 0.8444674015045166,
"step": 516
},
{
"epoch": 0.7449567723342939,
"grad_norm": 0.6352137923240662,
"learning_rate": 1.486175696438516e-05,
"loss": 0.8086113333702087,
"step": 517
},
{
"epoch": 0.7463976945244957,
"grad_norm": 0.5597201585769653,
"learning_rate": 1.4840913768048788e-05,
"loss": 0.7531715631484985,
"step": 518
},
{
"epoch": 0.7478386167146974,
"grad_norm": 0.6141948103904724,
"learning_rate": 1.4820043067699342e-05,
"loss": 0.8946848511695862,
"step": 519
},
{
"epoch": 0.7492795389048992,
"grad_norm": 0.6349887251853943,
"learning_rate": 1.479914498191526e-05,
"loss": 0.9019135236740112,
"step": 520
},
{
"epoch": 0.7507204610951008,
"grad_norm": 0.5442377924919128,
"learning_rate": 1.4778219629430585e-05,
"loss": 0.8436559438705444,
"step": 521
},
{
"epoch": 0.7521613832853026,
"grad_norm": 0.5552181601524353,
"learning_rate": 1.4757267129134266e-05,
"loss": 0.7609561681747437,
"step": 522
},
{
"epoch": 0.7536023054755043,
"grad_norm": 0.5454914569854736,
"learning_rate": 1.4736287600069493e-05,
"loss": 0.6811736822128296,
"step": 523
},
{
"epoch": 0.7550432276657061,
"grad_norm": 0.5283980965614319,
"learning_rate": 1.4715281161433032e-05,
"loss": 0.6988868713378906,
"step": 524
},
{
"epoch": 0.7564841498559077,
"grad_norm": 0.6911367177963257,
"learning_rate": 1.4694247932574533e-05,
"loss": 0.7650970220565796,
"step": 525
},
{
"epoch": 0.7579250720461095,
"grad_norm": 0.6973710060119629,
"learning_rate": 1.4673188032995858e-05,
"loss": 0.8932456970214844,
"step": 526
},
{
"epoch": 0.7593659942363112,
"grad_norm": 0.573573887348175,
"learning_rate": 1.4652101582350394e-05,
"loss": 0.8257400989532471,
"step": 527
},
{
"epoch": 0.760806916426513,
"grad_norm": 0.6292358040809631,
"learning_rate": 1.4630988700442386e-05,
"loss": 0.7898586988449097,
"step": 528
},
{
"epoch": 0.7622478386167147,
"grad_norm": 0.6209089159965515,
"learning_rate": 1.4609849507226254e-05,
"loss": 0.8471835851669312,
"step": 529
},
{
"epoch": 0.7636887608069164,
"grad_norm": 0.5322726368904114,
"learning_rate": 1.4588684122805895e-05,
"loss": 0.6654898524284363,
"step": 530
},
{
"epoch": 0.7651296829971181,
"grad_norm": 0.600330114364624,
"learning_rate": 1.4567492667434031e-05,
"loss": 0.6953059434890747,
"step": 531
},
{
"epoch": 0.7665706051873199,
"grad_norm": 0.6372314691543579,
"learning_rate": 1.4546275261511493e-05,
"loss": 0.8754534721374512,
"step": 532
},
{
"epoch": 0.7680115273775217,
"grad_norm": 0.5352574586868286,
"learning_rate": 1.4525032025586555e-05,
"loss": 0.646438717842102,
"step": 533
},
{
"epoch": 0.7694524495677233,
"grad_norm": 0.5772659182548523,
"learning_rate": 1.450376308035425e-05,
"loss": 0.7625031471252441,
"step": 534
},
{
"epoch": 0.770893371757925,
"grad_norm": 0.6228421926498413,
"learning_rate": 1.4482468546655679e-05,
"loss": 0.8432234525680542,
"step": 535
},
{
"epoch": 0.7723342939481268,
"grad_norm": 0.5398774147033691,
"learning_rate": 1.4461148545477328e-05,
"loss": 0.8652327060699463,
"step": 536
},
{
"epoch": 0.7737752161383286,
"grad_norm": 0.6024429798126221,
"learning_rate": 1.443980319795037e-05,
"loss": 0.853449821472168,
"step": 537
},
{
"epoch": 0.7752161383285303,
"grad_norm": 0.6293278336524963,
"learning_rate": 1.4418432625349997e-05,
"loss": 1.0636059045791626,
"step": 538
},
{
"epoch": 0.776657060518732,
"grad_norm": 0.6416622996330261,
"learning_rate": 1.439703694909471e-05,
"loss": 0.7662414312362671,
"step": 539
},
{
"epoch": 0.7780979827089337,
"grad_norm": 0.5270242094993591,
"learning_rate": 1.437561629074564e-05,
"loss": 0.7614691257476807,
"step": 540
},
{
"epoch": 0.7795389048991355,
"grad_norm": 0.6011450290679932,
"learning_rate": 1.4354170772005862e-05,
"loss": 0.8249303698539734,
"step": 541
},
{
"epoch": 0.7809798270893372,
"grad_norm": 0.570554256439209,
"learning_rate": 1.4332700514719687e-05,
"loss": 0.8420515060424805,
"step": 542
},
{
"epoch": 0.7824207492795389,
"grad_norm": 0.6834957003593445,
"learning_rate": 1.4311205640871985e-05,
"loss": 0.7347505688667297,
"step": 543
},
{
"epoch": 0.7838616714697406,
"grad_norm": 0.5610590577125549,
"learning_rate": 1.4289686272587493e-05,
"loss": 0.7387393712997437,
"step": 544
},
{
"epoch": 0.7853025936599424,
"grad_norm": 0.5757878422737122,
"learning_rate": 1.4268142532130102e-05,
"loss": 0.8649142980575562,
"step": 545
},
{
"epoch": 0.7867435158501441,
"grad_norm": 0.5952373147010803,
"learning_rate": 1.4246574541902187e-05,
"loss": 0.9021813273429871,
"step": 546
},
{
"epoch": 0.7881844380403458,
"grad_norm": 0.564714789390564,
"learning_rate": 1.4224982424443894e-05,
"loss": 0.8700401186943054,
"step": 547
},
{
"epoch": 0.7896253602305475,
"grad_norm": 0.5540585517883301,
"learning_rate": 1.4203366302432447e-05,
"loss": 0.904576301574707,
"step": 548
},
{
"epoch": 0.7910662824207493,
"grad_norm": 0.5750954151153564,
"learning_rate": 1.4181726298681462e-05,
"loss": 0.6933422088623047,
"step": 549
},
{
"epoch": 0.792507204610951,
"grad_norm": 0.6007941365242004,
"learning_rate": 1.4160062536140235e-05,
"loss": 0.7416195869445801,
"step": 550
},
{
"epoch": 0.7939481268011528,
"grad_norm": 0.6115585565567017,
"learning_rate": 1.413837513789305e-05,
"loss": 0.7834339141845703,
"step": 551
},
{
"epoch": 0.7953890489913544,
"grad_norm": 0.5994195342063904,
"learning_rate": 1.4116664227158481e-05,
"loss": 0.7905891537666321,
"step": 552
},
{
"epoch": 0.7968299711815562,
"grad_norm": 0.6097184419631958,
"learning_rate": 1.4094929927288688e-05,
"loss": 0.9327152967453003,
"step": 553
},
{
"epoch": 0.7982708933717579,
"grad_norm": 0.6687369346618652,
"learning_rate": 1.4073172361768715e-05,
"loss": 1.0150576829910278,
"step": 554
},
{
"epoch": 0.7997118155619597,
"grad_norm": 0.5856130719184875,
"learning_rate": 1.4051391654215803e-05,
"loss": 0.7339403629302979,
"step": 555
},
{
"epoch": 0.8011527377521613,
"grad_norm": 0.5845277309417725,
"learning_rate": 1.402958792837866e-05,
"loss": 0.8820422887802124,
"step": 556
},
{
"epoch": 0.8025936599423631,
"grad_norm": 0.5744795203208923,
"learning_rate": 1.4007761308136791e-05,
"loss": 0.7946295738220215,
"step": 557
},
{
"epoch": 0.8040345821325648,
"grad_norm": 0.5813792943954468,
"learning_rate": 1.3985911917499764e-05,
"loss": 0.8150988221168518,
"step": 558
},
{
"epoch": 0.8054755043227666,
"grad_norm": 0.5602363348007202,
"learning_rate": 1.3964039880606522e-05,
"loss": 0.7896089553833008,
"step": 559
},
{
"epoch": 0.8069164265129684,
"grad_norm": 0.5744377374649048,
"learning_rate": 1.3942145321724678e-05,
"loss": 0.6854937076568604,
"step": 560
},
{
"epoch": 0.80835734870317,
"grad_norm": 0.5629292130470276,
"learning_rate": 1.3920228365249807e-05,
"loss": 0.8506255149841309,
"step": 561
},
{
"epoch": 0.8097982708933718,
"grad_norm": 0.6157159209251404,
"learning_rate": 1.3898289135704726e-05,
"loss": 0.8277969360351562,
"step": 562
},
{
"epoch": 0.8112391930835735,
"grad_norm": 0.6000183820724487,
"learning_rate": 1.387632775773881e-05,
"loss": 0.7690261006355286,
"step": 563
},
{
"epoch": 0.8126801152737753,
"grad_norm": 0.5285099744796753,
"learning_rate": 1.3854344356127272e-05,
"loss": 0.8356052041053772,
"step": 564
},
{
"epoch": 0.8141210374639769,
"grad_norm": 0.5782726407051086,
"learning_rate": 1.3832339055770443e-05,
"loss": 0.7185315489768982,
"step": 565
},
{
"epoch": 0.8155619596541787,
"grad_norm": 0.6152368187904358,
"learning_rate": 1.3810311981693084e-05,
"loss": 0.8090786337852478,
"step": 566
},
{
"epoch": 0.8170028818443804,
"grad_norm": 0.6380811929702759,
"learning_rate": 1.378826325904366e-05,
"loss": 0.786938488483429,
"step": 567
},
{
"epoch": 0.8184438040345822,
"grad_norm": 0.5521567463874817,
"learning_rate": 1.3766193013093637e-05,
"loss": 0.754514217376709,
"step": 568
},
{
"epoch": 0.8198847262247838,
"grad_norm": 0.5184625387191772,
"learning_rate": 1.3744101369236765e-05,
"loss": 0.8483080863952637,
"step": 569
},
{
"epoch": 0.8213256484149856,
"grad_norm": 0.5939867496490479,
"learning_rate": 1.3721988452988366e-05,
"loss": 0.7971341609954834,
"step": 570
},
{
"epoch": 0.8227665706051873,
"grad_norm": 0.6107808947563171,
"learning_rate": 1.3699854389984626e-05,
"loss": 0.7329815626144409,
"step": 571
},
{
"epoch": 0.8242074927953891,
"grad_norm": 0.5994635820388794,
"learning_rate": 1.367769930598188e-05,
"loss": 0.6906665563583374,
"step": 572
},
{
"epoch": 0.8256484149855908,
"grad_norm": 0.7389527559280396,
"learning_rate": 1.3655523326855889e-05,
"loss": 0.9652154445648193,
"step": 573
},
{
"epoch": 0.8270893371757925,
"grad_norm": 0.6304334402084351,
"learning_rate": 1.3633326578601133e-05,
"loss": 0.7543058395385742,
"step": 574
},
{
"epoch": 0.8285302593659942,
"grad_norm": 0.6436780095100403,
"learning_rate": 1.36111091873301e-05,
"loss": 0.7568035125732422,
"step": 575
},
{
"epoch": 0.829971181556196,
"grad_norm": 0.6232560873031616,
"learning_rate": 1.3588871279272553e-05,
"loss": 0.821622371673584,
"step": 576
},
{
"epoch": 0.8314121037463977,
"grad_norm": 0.585903525352478,
"learning_rate": 1.356661298077483e-05,
"loss": 0.6720898151397705,
"step": 577
},
{
"epoch": 0.8328530259365994,
"grad_norm": 0.6891927719116211,
"learning_rate": 1.3544334418299115e-05,
"loss": 0.8245535492897034,
"step": 578
},
{
"epoch": 0.8342939481268011,
"grad_norm": 0.6329434514045715,
"learning_rate": 1.3522035718422722e-05,
"loss": 0.7789890766143799,
"step": 579
},
{
"epoch": 0.8357348703170029,
"grad_norm": 0.5948352813720703,
"learning_rate": 1.3499717007837381e-05,
"loss": 0.7675924301147461,
"step": 580
},
{
"epoch": 0.8371757925072046,
"grad_norm": 0.699979841709137,
"learning_rate": 1.3477378413348516e-05,
"loss": 1.040189504623413,
"step": 581
},
{
"epoch": 0.8386167146974063,
"grad_norm": 0.5986906290054321,
"learning_rate": 1.3455020061874517e-05,
"loss": 0.7689225077629089,
"step": 582
},
{
"epoch": 0.840057636887608,
"grad_norm": 0.5747967958450317,
"learning_rate": 1.343264208044603e-05,
"loss": 0.8100346326828003,
"step": 583
},
{
"epoch": 0.8414985590778098,
"grad_norm": 0.5550184845924377,
"learning_rate": 1.3410244596205222e-05,
"loss": 0.6612875461578369,
"step": 584
},
{
"epoch": 0.8429394812680115,
"grad_norm": 0.5810987949371338,
"learning_rate": 1.3387827736405079e-05,
"loss": 0.8044896721839905,
"step": 585
},
{
"epoch": 0.8443804034582133,
"grad_norm": 0.5891236066818237,
"learning_rate": 1.336539162840866e-05,
"loss": 0.7642381191253662,
"step": 586
},
{
"epoch": 0.845821325648415,
"grad_norm": 0.5809030532836914,
"learning_rate": 1.3342936399688387e-05,
"loss": 0.9018023014068604,
"step": 587
},
{
"epoch": 0.8472622478386167,
"grad_norm": 0.5283681750297546,
"learning_rate": 1.3320462177825321e-05,
"loss": 0.6488313674926758,
"step": 588
},
{
"epoch": 0.8487031700288185,
"grad_norm": 0.691789448261261,
"learning_rate": 1.3297969090508434e-05,
"loss": 0.9001842737197876,
"step": 589
},
{
"epoch": 0.8501440922190202,
"grad_norm": 0.5393000245094299,
"learning_rate": 1.3275457265533876e-05,
"loss": 0.7306933403015137,
"step": 590
},
{
"epoch": 0.8515850144092219,
"grad_norm": 0.5468747615814209,
"learning_rate": 1.3252926830804264e-05,
"loss": 0.7733805179595947,
"step": 591
},
{
"epoch": 0.8530259365994236,
"grad_norm": 0.6384521722793579,
"learning_rate": 1.323037791432795e-05,
"loss": 0.8655095100402832,
"step": 592
},
{
"epoch": 0.8544668587896254,
"grad_norm": 0.6655124425888062,
"learning_rate": 1.3207810644218277e-05,
"loss": 0.8915953040122986,
"step": 593
},
{
"epoch": 0.8559077809798271,
"grad_norm": 0.8409086465835571,
"learning_rate": 1.3185225148692884e-05,
"loss": 0.7861893773078918,
"step": 594
},
{
"epoch": 0.8573487031700289,
"grad_norm": 0.529315173625946,
"learning_rate": 1.316262155607295e-05,
"loss": 0.6976417303085327,
"step": 595
},
{
"epoch": 0.8587896253602305,
"grad_norm": 0.7067042589187622,
"learning_rate": 1.3139999994782468e-05,
"loss": 0.7654718160629272,
"step": 596
},
{
"epoch": 0.8602305475504323,
"grad_norm": 0.5580388903617859,
"learning_rate": 1.3117360593347535e-05,
"loss": 0.7656145095825195,
"step": 597
},
{
"epoch": 0.861671469740634,
"grad_norm": 0.5642116665840149,
"learning_rate": 1.3094703480395597e-05,
"loss": 0.8965026140213013,
"step": 598
},
{
"epoch": 0.8631123919308358,
"grad_norm": 0.5922830700874329,
"learning_rate": 1.3072028784654732e-05,
"loss": 0.7152601480484009,
"step": 599
},
{
"epoch": 0.8645533141210374,
"grad_norm": 0.6355752944946289,
"learning_rate": 1.3049336634952918e-05,
"loss": 0.8053656816482544,
"step": 600
},
{
"epoch": 0.8659942363112392,
"grad_norm": 0.5964752435684204,
"learning_rate": 1.3026627160217302e-05,
"loss": 0.8105225563049316,
"step": 601
},
{
"epoch": 0.8674351585014409,
"grad_norm": 0.5729430913925171,
"learning_rate": 1.3003900489473455e-05,
"loss": 0.7246596217155457,
"step": 602
},
{
"epoch": 0.8688760806916427,
"grad_norm": 0.5772424340248108,
"learning_rate": 1.2981156751844659e-05,
"loss": 0.878759503364563,
"step": 603
},
{
"epoch": 0.8703170028818443,
"grad_norm": 0.6068733334541321,
"learning_rate": 1.2958396076551157e-05,
"loss": 0.771674633026123,
"step": 604
},
{
"epoch": 0.8717579250720461,
"grad_norm": 0.7268223762512207,
"learning_rate": 1.2935618592909419e-05,
"loss": 0.8257685899734497,
"step": 605
},
{
"epoch": 0.8731988472622478,
"grad_norm": 0.5236421227455139,
"learning_rate": 1.2912824430331425e-05,
"loss": 0.7433541417121887,
"step": 606
},
{
"epoch": 0.8746397694524496,
"grad_norm": 0.6350269913673401,
"learning_rate": 1.2890013718323913e-05,
"loss": 0.7988302111625671,
"step": 607
},
{
"epoch": 0.8760806916426513,
"grad_norm": 0.5974761843681335,
"learning_rate": 1.2867186586487642e-05,
"loss": 0.69598388671875,
"step": 608
},
{
"epoch": 0.877521613832853,
"grad_norm": 0.6555905938148499,
"learning_rate": 1.2844343164516671e-05,
"loss": 0.8490002155303955,
"step": 609
},
{
"epoch": 0.8789625360230547,
"grad_norm": 0.5933155417442322,
"learning_rate": 1.2821483582197604e-05,
"loss": 0.8980555534362793,
"step": 610
},
{
"epoch": 0.8804034582132565,
"grad_norm": 0.5820237994194031,
"learning_rate": 1.2798607969408865e-05,
"loss": 0.6644209623336792,
"step": 611
},
{
"epoch": 0.8818443804034583,
"grad_norm": 0.7071488499641418,
"learning_rate": 1.2775716456119962e-05,
"loss": 0.8648644685745239,
"step": 612
},
{
"epoch": 0.8832853025936599,
"grad_norm": 0.6479011178016663,
"learning_rate": 1.275280917239073e-05,
"loss": 0.7517762184143066,
"step": 613
},
{
"epoch": 0.8847262247838616,
"grad_norm": 0.5966110825538635,
"learning_rate": 1.2729886248370616e-05,
"loss": 0.8572617173194885,
"step": 614
},
{
"epoch": 0.8861671469740634,
"grad_norm": 0.644463062286377,
"learning_rate": 1.2706947814297923e-05,
"loss": 0.9164141416549683,
"step": 615
},
{
"epoch": 0.8876080691642652,
"grad_norm": 0.5812889933586121,
"learning_rate": 1.2683994000499078e-05,
"loss": 0.8233826160430908,
"step": 616
},
{
"epoch": 0.8890489913544669,
"grad_norm": 0.5846764445304871,
"learning_rate": 1.2661024937387888e-05,
"loss": 0.7792487144470215,
"step": 617
},
{
"epoch": 0.8904899135446686,
"grad_norm": 0.5830758810043335,
"learning_rate": 1.2638040755464802e-05,
"loss": 0.8183486461639404,
"step": 618
},
{
"epoch": 0.8919308357348703,
"grad_norm": 0.600700855255127,
"learning_rate": 1.2615041585316163e-05,
"loss": 0.8133193254470825,
"step": 619
},
{
"epoch": 0.8933717579250721,
"grad_norm": 0.7301390767097473,
"learning_rate": 1.2592027557613476e-05,
"loss": 0.9990606307983398,
"step": 620
},
{
"epoch": 0.8948126801152738,
"grad_norm": 0.6379325985908508,
"learning_rate": 1.2568998803112658e-05,
"loss": 0.9970605373382568,
"step": 621
},
{
"epoch": 0.8962536023054755,
"grad_norm": 0.6173595190048218,
"learning_rate": 1.2545955452653294e-05,
"loss": 0.7785443067550659,
"step": 622
},
{
"epoch": 0.8976945244956772,
"grad_norm": 0.5701093673706055,
"learning_rate": 1.2522897637157905e-05,
"loss": 0.7518781423568726,
"step": 623
},
{
"epoch": 0.899135446685879,
"grad_norm": 0.5610460638999939,
"learning_rate": 1.249982548763119e-05,
"loss": 0.7533529996871948,
"step": 624
},
{
"epoch": 0.9005763688760807,
"grad_norm": 0.5958570837974548,
"learning_rate": 1.2476739135159286e-05,
"loss": 0.7188445329666138,
"step": 625
},
{
"epoch": 0.9020172910662824,
"grad_norm": 0.6020827889442444,
"learning_rate": 1.2453638710909033e-05,
"loss": 0.821806788444519,
"step": 626
},
{
"epoch": 0.9034582132564841,
"grad_norm": 0.6721755266189575,
"learning_rate": 1.2430524346127215e-05,
"loss": 0.829516589641571,
"step": 627
},
{
"epoch": 0.9048991354466859,
"grad_norm": 0.648987889289856,
"learning_rate": 1.2407396172139822e-05,
"loss": 0.8926085233688354,
"step": 628
},
{
"epoch": 0.9063400576368876,
"grad_norm": 0.6156898140907288,
"learning_rate": 1.2384254320351301e-05,
"loss": 0.6972285509109497,
"step": 629
},
{
"epoch": 0.9077809798270894,
"grad_norm": 0.5631294846534729,
"learning_rate": 1.2361098922243812e-05,
"loss": 0.8096356391906738,
"step": 630
},
{
"epoch": 0.909221902017291,
"grad_norm": 0.5868187546730042,
"learning_rate": 1.233793010937648e-05,
"loss": 0.8927261233329773,
"step": 631
},
{
"epoch": 0.9106628242074928,
"grad_norm": 0.7092884182929993,
"learning_rate": 1.2314748013384639e-05,
"loss": 0.8122037649154663,
"step": 632
},
{
"epoch": 0.9121037463976945,
"grad_norm": 0.5598758459091187,
"learning_rate": 1.2291552765979104e-05,
"loss": 0.7103347182273865,
"step": 633
},
{
"epoch": 0.9135446685878963,
"grad_norm": 0.5712397694587708,
"learning_rate": 1.2268344498945404e-05,
"loss": 0.8768525123596191,
"step": 634
},
{
"epoch": 0.9149855907780979,
"grad_norm": 0.6459716558456421,
"learning_rate": 1.2245123344143044e-05,
"loss": 0.8588672876358032,
"step": 635
},
{
"epoch": 0.9164265129682997,
"grad_norm": 0.6647771000862122,
"learning_rate": 1.2221889433504743e-05,
"loss": 0.838790237903595,
"step": 636
},
{
"epoch": 0.9178674351585014,
"grad_norm": 0.630342960357666,
"learning_rate": 1.2198642899035704e-05,
"loss": 0.8317841291427612,
"step": 637
},
{
"epoch": 0.9193083573487032,
"grad_norm": 0.6161531209945679,
"learning_rate": 1.2175383872812851e-05,
"loss": 0.8737014532089233,
"step": 638
},
{
"epoch": 0.920749279538905,
"grad_norm": 0.5237288475036621,
"learning_rate": 1.2152112486984071e-05,
"loss": 0.7703773975372314,
"step": 639
},
{
"epoch": 0.9221902017291066,
"grad_norm": 0.6804758310317993,
"learning_rate": 1.2128828873767487e-05,
"loss": 0.8832876682281494,
"step": 640
},
{
"epoch": 0.9236311239193083,
"grad_norm": 0.6326414942741394,
"learning_rate": 1.210553316545068e-05,
"loss": 0.8082267045974731,
"step": 641
},
{
"epoch": 0.9250720461095101,
"grad_norm": 0.518068253993988,
"learning_rate": 1.2082225494389961e-05,
"loss": 0.8200917840003967,
"step": 642
},
{
"epoch": 0.9265129682997119,
"grad_norm": 0.6239233016967773,
"learning_rate": 1.2058905993009604e-05,
"loss": 0.7693580389022827,
"step": 643
},
{
"epoch": 0.9279538904899135,
"grad_norm": 0.7250560522079468,
"learning_rate": 1.2035574793801095e-05,
"loss": 0.9098770618438721,
"step": 644
},
{
"epoch": 0.9293948126801153,
"grad_norm": 0.5638466477394104,
"learning_rate": 1.2012232029322384e-05,
"loss": 0.8906189799308777,
"step": 645
},
{
"epoch": 0.930835734870317,
"grad_norm": 0.5933250188827515,
"learning_rate": 1.1988877832197135e-05,
"loss": 0.8694485425949097,
"step": 646
},
{
"epoch": 0.9322766570605188,
"grad_norm": 0.5953240394592285,
"learning_rate": 1.1965512335113958e-05,
"loss": 0.6657785177230835,
"step": 647
},
{
"epoch": 0.9337175792507204,
"grad_norm": 0.7607654929161072,
"learning_rate": 1.1942135670825672e-05,
"loss": 0.8424980044364929,
"step": 648
},
{
"epoch": 0.9351585014409222,
"grad_norm": 0.5857916474342346,
"learning_rate": 1.1918747972148541e-05,
"loss": 0.8536664247512817,
"step": 649
},
{
"epoch": 0.9365994236311239,
"grad_norm": 0.6392703056335449,
"learning_rate": 1.189534937196152e-05,
"loss": 0.7639260292053223,
"step": 650
},
{
"epoch": 0.9380403458213257,
"grad_norm": 0.6499249339103699,
"learning_rate": 1.1871940003205505e-05,
"loss": 0.6486794948577881,
"step": 651
},
{
"epoch": 0.9394812680115274,
"grad_norm": 0.5829477906227112,
"learning_rate": 1.1848519998882572e-05,
"loss": 0.8788424730300903,
"step": 652
},
{
"epoch": 0.9409221902017291,
"grad_norm": 0.6531436443328857,
"learning_rate": 1.1825089492055227e-05,
"loss": 0.7516967058181763,
"step": 653
},
{
"epoch": 0.9423631123919308,
"grad_norm": 0.5762933492660522,
"learning_rate": 1.1801648615845638e-05,
"loss": 0.8271253108978271,
"step": 654
},
{
"epoch": 0.9438040345821326,
"grad_norm": 0.5682176351547241,
"learning_rate": 1.17781975034349e-05,
"loss": 0.7439221143722534,
"step": 655
},
{
"epoch": 0.9452449567723343,
"grad_norm": 0.6221916675567627,
"learning_rate": 1.1754736288062256e-05,
"loss": 0.8153043985366821,
"step": 656
},
{
"epoch": 0.946685878962536,
"grad_norm": 0.6802467107772827,
"learning_rate": 1.1731265103024351e-05,
"loss": 0.7968660593032837,
"step": 657
},
{
"epoch": 0.9481268011527377,
"grad_norm": 0.5722571611404419,
"learning_rate": 1.1707784081674482e-05,
"loss": 0.7626321315765381,
"step": 658
},
{
"epoch": 0.9495677233429395,
"grad_norm": 0.6222584247589111,
"learning_rate": 1.1684293357421824e-05,
"loss": 0.7135765552520752,
"step": 659
},
{
"epoch": 0.9510086455331412,
"grad_norm": 0.6431753039360046,
"learning_rate": 1.1660793063730674e-05,
"loss": 0.7373322248458862,
"step": 660
},
{
"epoch": 0.952449567723343,
"grad_norm": 0.6022802591323853,
"learning_rate": 1.1637283334119713e-05,
"loss": 0.7500289678573608,
"step": 661
},
{
"epoch": 0.9538904899135446,
"grad_norm": 0.5709948539733887,
"learning_rate": 1.1613764302161222e-05,
"loss": 0.8752883076667786,
"step": 662
},
{
"epoch": 0.9553314121037464,
"grad_norm": 0.5400457382202148,
"learning_rate": 1.1590236101480339e-05,
"loss": 0.670220673084259,
"step": 663
},
{
"epoch": 0.9567723342939481,
"grad_norm": 0.647760272026062,
"learning_rate": 1.1566698865754291e-05,
"loss": 0.8324419260025024,
"step": 664
},
{
"epoch": 0.9582132564841499,
"grad_norm": 0.6560764312744141,
"learning_rate": 1.154315272871164e-05,
"loss": 0.7774407863616943,
"step": 665
},
{
"epoch": 0.9596541786743515,
"grad_norm": 0.6726030111312866,
"learning_rate": 1.1519597824131527e-05,
"loss": 0.8403815031051636,
"step": 666
},
{
"epoch": 0.9610951008645533,
"grad_norm": 0.7448883056640625,
"learning_rate": 1.1496034285842897e-05,
"loss": 0.8905906677246094,
"step": 667
},
{
"epoch": 0.962536023054755,
"grad_norm": 0.6700533032417297,
"learning_rate": 1.1472462247723752e-05,
"loss": 0.7805229425430298,
"step": 668
},
{
"epoch": 0.9639769452449568,
"grad_norm": 0.6197190284729004,
"learning_rate": 1.1448881843700392e-05,
"loss": 0.7229803800582886,
"step": 669
},
{
"epoch": 0.9654178674351584,
"grad_norm": 0.59717857837677,
"learning_rate": 1.1425293207746638e-05,
"loss": 0.8611892461776733,
"step": 670
},
{
"epoch": 0.9668587896253602,
"grad_norm": 0.796004593372345,
"learning_rate": 1.1401696473883086e-05,
"loss": 0.8944194316864014,
"step": 671
},
{
"epoch": 0.968299711815562,
"grad_norm": 0.5947389602661133,
"learning_rate": 1.1378091776176348e-05,
"loss": 0.8134667873382568,
"step": 672
},
{
"epoch": 0.9697406340057637,
"grad_norm": 0.6667426824569702,
"learning_rate": 1.1354479248738271e-05,
"loss": 0.7905430197715759,
"step": 673
},
{
"epoch": 0.9711815561959655,
"grad_norm": 0.5860411524772644,
"learning_rate": 1.1330859025725193e-05,
"loss": 0.7300920486450195,
"step": 674
},
{
"epoch": 0.9726224783861671,
"grad_norm": 0.5794700384140015,
"learning_rate": 1.130723124133718e-05,
"loss": 0.7038317918777466,
"step": 675
},
{
"epoch": 0.9740634005763689,
"grad_norm": 0.6058333516120911,
"learning_rate": 1.1283596029817248e-05,
"loss": 0.8271040916442871,
"step": 676
},
{
"epoch": 0.9755043227665706,
"grad_norm": 0.6150603294372559,
"learning_rate": 1.1259953525450616e-05,
"loss": 0.7750831842422485,
"step": 677
},
{
"epoch": 0.9769452449567724,
"grad_norm": 0.6314917802810669,
"learning_rate": 1.1236303862563945e-05,
"loss": 0.9058209657669067,
"step": 678
},
{
"epoch": 0.978386167146974,
"grad_norm": 0.6402139067649841,
"learning_rate": 1.1212647175524551e-05,
"loss": 0.8667633533477783,
"step": 679
},
{
"epoch": 0.9798270893371758,
"grad_norm": 0.5912607312202454,
"learning_rate": 1.1188983598739675e-05,
"loss": 0.7763844728469849,
"step": 680
},
{
"epoch": 0.9812680115273775,
"grad_norm": 0.6257530450820923,
"learning_rate": 1.1165313266655698e-05,
"loss": 0.8233456611633301,
"step": 681
},
{
"epoch": 0.9827089337175793,
"grad_norm": 0.6173887252807617,
"learning_rate": 1.1141636313757369e-05,
"loss": 0.7909761667251587,
"step": 682
},
{
"epoch": 0.984149855907781,
"grad_norm": 0.6075451374053955,
"learning_rate": 1.1117952874567073e-05,
"loss": 0.8156836628913879,
"step": 683
},
{
"epoch": 0.9855907780979827,
"grad_norm": 0.6228588819503784,
"learning_rate": 1.1094263083644036e-05,
"loss": 0.7954513430595398,
"step": 684
},
{
"epoch": 0.9870317002881844,
"grad_norm": 0.5579982399940491,
"learning_rate": 1.1070567075583572e-05,
"loss": 0.781398594379425,
"step": 685
},
{
"epoch": 0.9884726224783862,
"grad_norm": 0.5715999603271484,
"learning_rate": 1.1046864985016326e-05,
"loss": 0.7486555576324463,
"step": 686
},
{
"epoch": 0.9899135446685879,
"grad_norm": 0.7021177411079407,
"learning_rate": 1.1023156946607485e-05,
"loss": 0.866464376449585,
"step": 687
},
{
"epoch": 0.9913544668587896,
"grad_norm": 0.7282413840293884,
"learning_rate": 1.0999443095056051e-05,
"loss": 1.0165081024169922,
"step": 688
},
{
"epoch": 0.9927953890489913,
"grad_norm": 0.6795728802680969,
"learning_rate": 1.0975723565094036e-05,
"loss": 0.659792959690094,
"step": 689
},
{
"epoch": 0.9942363112391931,
"grad_norm": 0.5526747107505798,
"learning_rate": 1.0951998491485722e-05,
"loss": 0.6671736836433411,
"step": 690
},
{
"epoch": 0.9956772334293948,
"grad_norm": 0.6293612122535706,
"learning_rate": 1.0928268009026885e-05,
"loss": 0.7951771020889282,
"step": 691
},
{
"epoch": 0.9971181556195965,
"grad_norm": 0.5912222266197205,
"learning_rate": 1.090453225254404e-05,
"loss": 0.764556884765625,
"step": 692
},
{
"epoch": 0.9985590778097982,
"grad_norm": 0.6562872529029846,
"learning_rate": 1.0880791356893652e-05,
"loss": 0.7850635647773743,
"step": 693
},
{
"epoch": 1.0,
"grad_norm": 0.6331807971000671,
"learning_rate": 1.0857045456961394e-05,
"loss": 0.8403358459472656,
"step": 694
},
{
"epoch": 1.0014409221902016,
"grad_norm": 0.53999924659729,
"learning_rate": 1.0833294687661376e-05,
"loss": 0.7220283150672913,
"step": 695
},
{
"epoch": 1.0028818443804035,
"grad_norm": 0.5253156423568726,
"learning_rate": 1.0809539183935358e-05,
"loss": 0.6956285238265991,
"step": 696
},
{
"epoch": 1.0043227665706052,
"grad_norm": 0.5885496139526367,
"learning_rate": 1.0785779080752012e-05,
"loss": 0.7220051288604736,
"step": 697
},
{
"epoch": 1.005763688760807,
"grad_norm": 0.5988635420799255,
"learning_rate": 1.0762014513106143e-05,
"loss": 0.6951655745506287,
"step": 698
},
{
"epoch": 1.0072046109510087,
"grad_norm": 0.49586206674575806,
"learning_rate": 1.073824561601791e-05,
"loss": 0.6196475625038147,
"step": 699
},
{
"epoch": 1.0086455331412103,
"grad_norm": 0.7789514064788818,
"learning_rate": 1.0714472524532085e-05,
"loss": 0.8181064128875732,
"step": 700
},
{
"epoch": 1.0100864553314122,
"grad_norm": 0.5491945147514343,
"learning_rate": 1.0690695373717254e-05,
"loss": 0.6493509411811829,
"step": 701
},
{
"epoch": 1.0115273775216138,
"grad_norm": 0.7047191262245178,
"learning_rate": 1.0666914298665079e-05,
"loss": 0.7577710151672363,
"step": 702
},
{
"epoch": 1.0129682997118155,
"grad_norm": 0.7199708223342896,
"learning_rate": 1.0643129434489514e-05,
"loss": 0.7761712074279785,
"step": 703
},
{
"epoch": 1.0144092219020173,
"grad_norm": 0.5329734086990356,
"learning_rate": 1.0619340916326039e-05,
"loss": 0.6484905481338501,
"step": 704
},
{
"epoch": 1.015850144092219,
"grad_norm": 0.6241481304168701,
"learning_rate": 1.05955488793309e-05,
"loss": 0.669824481010437,
"step": 705
},
{
"epoch": 1.0172910662824208,
"grad_norm": 0.5850144028663635,
"learning_rate": 1.0571753458680329e-05,
"loss": 0.7012614011764526,
"step": 706
},
{
"epoch": 1.0187319884726225,
"grad_norm": 0.5404962301254272,
"learning_rate": 1.0547954789569785e-05,
"loss": 0.7472232580184937,
"step": 707
},
{
"epoch": 1.0201729106628241,
"grad_norm": 0.7023850679397583,
"learning_rate": 1.0524153007213185e-05,
"loss": 0.8266449570655823,
"step": 708
},
{
"epoch": 1.021613832853026,
"grad_norm": 0.6288999319076538,
"learning_rate": 1.0500348246842136e-05,
"loss": 0.8678215742111206,
"step": 709
},
{
"epoch": 1.0230547550432276,
"grad_norm": 0.7006089091300964,
"learning_rate": 1.0476540643705153e-05,
"loss": 0.7670779228210449,
"step": 710
},
{
"epoch": 1.0244956772334295,
"grad_norm": 0.7132288217544556,
"learning_rate": 1.0452730333066919e-05,
"loss": 0.8972759246826172,
"step": 711
},
{
"epoch": 1.0259365994236311,
"grad_norm": 0.7113478779792786,
"learning_rate": 1.0428917450207489e-05,
"loss": 0.6248783469200134,
"step": 712
},
{
"epoch": 1.0273775216138328,
"grad_norm": 0.5936718583106995,
"learning_rate": 1.0405102130421536e-05,
"loss": 0.7173407077789307,
"step": 713
},
{
"epoch": 1.0288184438040346,
"grad_norm": 0.6577355265617371,
"learning_rate": 1.0381284509017578e-05,
"loss": 0.8124462366104126,
"step": 714
},
{
"epoch": 1.0302593659942363,
"grad_norm": 0.5350339412689209,
"learning_rate": 1.035746472131721e-05,
"loss": 0.6757724285125732,
"step": 715
},
{
"epoch": 1.031700288184438,
"grad_norm": 0.625091016292572,
"learning_rate": 1.033364290265433e-05,
"loss": 0.6315501928329468,
"step": 716
},
{
"epoch": 1.0331412103746398,
"grad_norm": 0.7091370820999146,
"learning_rate": 1.0309819188374386e-05,
"loss": 0.6656221151351929,
"step": 717
},
{
"epoch": 1.0345821325648414,
"grad_norm": 0.6561499238014221,
"learning_rate": 1.0285993713833586e-05,
"loss": 0.7100173234939575,
"step": 718
},
{
"epoch": 1.0360230547550433,
"grad_norm": 0.637753963470459,
"learning_rate": 1.0262166614398144e-05,
"loss": 0.6528655290603638,
"step": 719
},
{
"epoch": 1.037463976945245,
"grad_norm": 0.6694713234901428,
"learning_rate": 1.0238338025443507e-05,
"loss": 0.8758798837661743,
"step": 720
},
{
"epoch": 1.0389048991354466,
"grad_norm": 0.6426526308059692,
"learning_rate": 1.0214508082353578e-05,
"loss": 0.6384798288345337,
"step": 721
},
{
"epoch": 1.0403458213256485,
"grad_norm": 0.6031687259674072,
"learning_rate": 1.019067692051996e-05,
"loss": 0.7043944597244263,
"step": 722
},
{
"epoch": 1.04178674351585,
"grad_norm": 0.6691206693649292,
"learning_rate": 1.016684467534118e-05,
"loss": 0.7097047567367554,
"step": 723
},
{
"epoch": 1.043227665706052,
"grad_norm": 0.646927535533905,
"learning_rate": 1.0143011482221916e-05,
"loss": 0.6287018060684204,
"step": 724
},
{
"epoch": 1.0446685878962536,
"grad_norm": 0.6911733150482178,
"learning_rate": 1.0119177476572237e-05,
"loss": 0.645012617111206,
"step": 725
},
{
"epoch": 1.0461095100864553,
"grad_norm": 0.567036509513855,
"learning_rate": 1.0095342793806828e-05,
"loss": 0.6956562995910645,
"step": 726
},
{
"epoch": 1.0475504322766571,
"grad_norm": 0.679738461971283,
"learning_rate": 1.0071507569344213e-05,
"loss": 0.6614462733268738,
"step": 727
},
{
"epoch": 1.0489913544668588,
"grad_norm": 0.7040454149246216,
"learning_rate": 1.0047671938606002e-05,
"loss": 0.7205630540847778,
"step": 728
},
{
"epoch": 1.0504322766570606,
"grad_norm": 0.8227221369743347,
"learning_rate": 1.0023836037016115e-05,
"loss": 0.684057354927063,
"step": 729
},
{
"epoch": 1.0518731988472623,
"grad_norm": 0.6533084511756897,
"learning_rate": 1e-05,
"loss": 0.5894599556922913,
"step": 730
},
{
"epoch": 1.053314121037464,
"grad_norm": 0.6469436287879944,
"learning_rate": 9.976163962983889e-06,
"loss": 0.6846705675125122,
"step": 731
},
{
"epoch": 1.0547550432276658,
"grad_norm": 0.6716520190238953,
"learning_rate": 9.952328061394001e-06,
"loss": 0.6836794018745422,
"step": 732
},
{
"epoch": 1.0561959654178674,
"grad_norm": 0.6842796802520752,
"learning_rate": 9.92849243065579e-06,
"loss": 0.7929626107215881,
"step": 733
},
{
"epoch": 1.057636887608069,
"grad_norm": 0.8187405467033386,
"learning_rate": 9.904657206193175e-06,
"loss": 0.6658978462219238,
"step": 734
},
{
"epoch": 1.059077809798271,
"grad_norm": 0.6604411602020264,
"learning_rate": 9.880822523427766e-06,
"loss": 0.6329740285873413,
"step": 735
},
{
"epoch": 1.0605187319884726,
"grad_norm": 0.6714515089988708,
"learning_rate": 9.856988517778086e-06,
"loss": 0.7321251630783081,
"step": 736
},
{
"epoch": 1.0619596541786744,
"grad_norm": 0.7017390727996826,
"learning_rate": 9.833155324658823e-06,
"loss": 0.744154691696167,
"step": 737
},
{
"epoch": 1.063400576368876,
"grad_norm": 0.7215039134025574,
"learning_rate": 9.809323079480043e-06,
"loss": 0.7160513401031494,
"step": 738
},
{
"epoch": 1.0648414985590777,
"grad_norm": 0.5923436284065247,
"learning_rate": 9.785491917646425e-06,
"loss": 0.6206352710723877,
"step": 739
},
{
"epoch": 1.0662824207492796,
"grad_norm": 0.6986830830574036,
"learning_rate": 9.761661974556495e-06,
"loss": 0.7324307560920715,
"step": 740
},
{
"epoch": 1.0677233429394812,
"grad_norm": 0.5980766415596008,
"learning_rate": 9.737833385601858e-06,
"loss": 0.6454845666885376,
"step": 741
},
{
"epoch": 1.069164265129683,
"grad_norm": 0.9852787852287292,
"learning_rate": 9.714006286166416e-06,
"loss": 0.6832539439201355,
"step": 742
},
{
"epoch": 1.0706051873198847,
"grad_norm": 0.9407469034194946,
"learning_rate": 9.690180811625618e-06,
"loss": 0.8757802248001099,
"step": 743
},
{
"epoch": 1.0720461095100864,
"grad_norm": 0.612558126449585,
"learning_rate": 9.666357097345672e-06,
"loss": 0.8261449337005615,
"step": 744
},
{
"epoch": 1.0734870317002883,
"grad_norm": 0.5846421122550964,
"learning_rate": 9.642535278682795e-06,
"loss": 0.6925964951515198,
"step": 745
},
{
"epoch": 1.07492795389049,
"grad_norm": 0.7762342691421509,
"learning_rate": 9.618715490982425e-06,
"loss": 0.7280269861221313,
"step": 746
},
{
"epoch": 1.0763688760806915,
"grad_norm": 0.689094066619873,
"learning_rate": 9.594897869578466e-06,
"loss": 0.8407827615737915,
"step": 747
},
{
"epoch": 1.0778097982708934,
"grad_norm": 0.6733883619308472,
"learning_rate": 9.571082549792513e-06,
"loss": 0.7604755163192749,
"step": 748
},
{
"epoch": 1.079250720461095,
"grad_norm": 0.608144223690033,
"learning_rate": 9.547269666933085e-06,
"loss": 0.646695613861084,
"step": 749
},
{
"epoch": 1.080691642651297,
"grad_norm": 0.7533197999000549,
"learning_rate": 9.523459356294849e-06,
"loss": 0.7508174180984497,
"step": 750
},
{
"epoch": 1.0821325648414986,
"grad_norm": 0.7083786129951477,
"learning_rate": 9.499651753157869e-06,
"loss": 0.7907015085220337,
"step": 751
},
{
"epoch": 1.0835734870317002,
"grad_norm": 0.7183002233505249,
"learning_rate": 9.475846992786817e-06,
"loss": 0.8525063991546631,
"step": 752
},
{
"epoch": 1.085014409221902,
"grad_norm": 0.6429514288902283,
"learning_rate": 9.452045210430218e-06,
"loss": 0.6777982711791992,
"step": 753
},
{
"epoch": 1.0864553314121037,
"grad_norm": 0.6770809292793274,
"learning_rate": 9.428246541319673e-06,
"loss": 0.7967904806137085,
"step": 754
},
{
"epoch": 1.0878962536023056,
"grad_norm": 0.6980239152908325,
"learning_rate": 9.404451120669102e-06,
"loss": 0.6735846400260925,
"step": 755
},
{
"epoch": 1.0893371757925072,
"grad_norm": 0.699763834476471,
"learning_rate": 9.380659083673963e-06,
"loss": 0.7672224044799805,
"step": 756
},
{
"epoch": 1.0907780979827089,
"grad_norm": 0.6815734505653381,
"learning_rate": 9.356870565510488e-06,
"loss": 0.665432333946228,
"step": 757
},
{
"epoch": 1.0922190201729107,
"grad_norm": 0.6672005653381348,
"learning_rate": 9.333085701334925e-06,
"loss": 0.6282204389572144,
"step": 758
},
{
"epoch": 1.0936599423631124,
"grad_norm": 0.8683066368103027,
"learning_rate": 9.30930462628275e-06,
"loss": 0.800000011920929,
"step": 759
},
{
"epoch": 1.0951008645533142,
"grad_norm": 0.6048387289047241,
"learning_rate": 9.285527475467918e-06,
"loss": 0.530065655708313,
"step": 760
},
{
"epoch": 1.0965417867435159,
"grad_norm": 0.7173153758049011,
"learning_rate": 9.261754383982093e-06,
"loss": 0.8034170866012573,
"step": 761
},
{
"epoch": 1.0979827089337175,
"grad_norm": 0.5964096784591675,
"learning_rate": 9.23798548689386e-06,
"loss": 0.6461498141288757,
"step": 762
},
{
"epoch": 1.0994236311239194,
"grad_norm": 0.6942315697669983,
"learning_rate": 9.21422091924799e-06,
"loss": 0.6815940737724304,
"step": 763
},
{
"epoch": 1.100864553314121,
"grad_norm": 0.7249640226364136,
"learning_rate": 9.190460816064649e-06,
"loss": 0.7779183387756348,
"step": 764
},
{
"epoch": 1.1023054755043227,
"grad_norm": 0.7075570821762085,
"learning_rate": 9.16670531233863e-06,
"loss": 0.8409022688865662,
"step": 765
},
{
"epoch": 1.1037463976945245,
"grad_norm": 0.6892013549804688,
"learning_rate": 9.14295454303861e-06,
"loss": 0.7865520715713501,
"step": 766
},
{
"epoch": 1.1051873198847262,
"grad_norm": 0.6577411890029907,
"learning_rate": 9.119208643106353e-06,
"loss": 0.8110712766647339,
"step": 767
},
{
"epoch": 1.106628242074928,
"grad_norm": 0.6635581254959106,
"learning_rate": 9.095467747455965e-06,
"loss": 0.63739013671875,
"step": 768
},
{
"epoch": 1.1080691642651297,
"grad_norm": 0.6412572264671326,
"learning_rate": 9.071731990973118e-06,
"loss": 0.6418280601501465,
"step": 769
},
{
"epoch": 1.1095100864553313,
"grad_norm": 0.7638756036758423,
"learning_rate": 9.048001508514283e-06,
"loss": 0.7094175815582275,
"step": 770
},
{
"epoch": 1.1109510086455332,
"grad_norm": 0.5744118094444275,
"learning_rate": 9.024276434905969e-06,
"loss": 0.6916787624359131,
"step": 771
},
{
"epoch": 1.1123919308357348,
"grad_norm": 0.793855607509613,
"learning_rate": 9.000556904943956e-06,
"loss": 0.7775775790214539,
"step": 772
},
{
"epoch": 1.1138328530259365,
"grad_norm": 0.6273725032806396,
"learning_rate": 8.976843053392518e-06,
"loss": 0.6217244267463684,
"step": 773
},
{
"epoch": 1.1152737752161384,
"grad_norm": 0.6361021399497986,
"learning_rate": 8.95313501498368e-06,
"loss": 0.659927487373352,
"step": 774
},
{
"epoch": 1.11671469740634,
"grad_norm": 0.7996638417243958,
"learning_rate": 8.929432924416433e-06,
"loss": 0.7214843034744263,
"step": 775
},
{
"epoch": 1.1181556195965419,
"grad_norm": 0.6677613854408264,
"learning_rate": 8.905736916355969e-06,
"loss": 0.8332221508026123,
"step": 776
},
{
"epoch": 1.1195965417867435,
"grad_norm": 0.6600406765937805,
"learning_rate": 8.882047125432929e-06,
"loss": 0.601822555065155,
"step": 777
},
{
"epoch": 1.1210374639769451,
"grad_norm": 0.6558908820152283,
"learning_rate": 8.858363686242635e-06,
"loss": 0.6736506223678589,
"step": 778
},
{
"epoch": 1.122478386167147,
"grad_norm": 0.6801712512969971,
"learning_rate": 8.834686733344309e-06,
"loss": 0.8313186168670654,
"step": 779
},
{
"epoch": 1.1239193083573487,
"grad_norm": 0.6852150559425354,
"learning_rate": 8.811016401260327e-06,
"loss": 0.6597498655319214,
"step": 780
},
{
"epoch": 1.1253602305475505,
"grad_norm": 0.7473975419998169,
"learning_rate": 8.787352824475454e-06,
"loss": 0.6347630023956299,
"step": 781
},
{
"epoch": 1.1268011527377522,
"grad_norm": 0.6975881457328796,
"learning_rate": 8.76369613743606e-06,
"loss": 0.7058587074279785,
"step": 782
},
{
"epoch": 1.1282420749279538,
"grad_norm": 0.7085602879524231,
"learning_rate": 8.740046474549387e-06,
"loss": 0.835166335105896,
"step": 783
},
{
"epoch": 1.1296829971181557,
"grad_norm": 0.6902181506156921,
"learning_rate": 8.716403970182759e-06,
"loss": 0.7125815153121948,
"step": 784
},
{
"epoch": 1.1311239193083573,
"grad_norm": 0.7300699353218079,
"learning_rate": 8.692768758662827e-06,
"loss": 0.6701489686965942,
"step": 785
},
{
"epoch": 1.1325648414985592,
"grad_norm": 0.747675895690918,
"learning_rate": 8.66914097427481e-06,
"loss": 0.6480385661125183,
"step": 786
},
{
"epoch": 1.1340057636887608,
"grad_norm": 0.7518520355224609,
"learning_rate": 8.645520751261736e-06,
"loss": 0.7200244665145874,
"step": 787
},
{
"epoch": 1.1354466858789625,
"grad_norm": 0.7091997861862183,
"learning_rate": 8.621908223823657e-06,
"loss": 0.7781722545623779,
"step": 788
},
{
"epoch": 1.1368876080691643,
"grad_norm": 0.6493226289749146,
"learning_rate": 8.598303526116916e-06,
"loss": 0.659550666809082,
"step": 789
},
{
"epoch": 1.138328530259366,
"grad_norm": 0.7172505855560303,
"learning_rate": 8.574706792253367e-06,
"loss": 0.7001605033874512,
"step": 790
},
{
"epoch": 1.1397694524495678,
"grad_norm": 0.7529043555259705,
"learning_rate": 8.551118156299613e-06,
"loss": 0.7431622743606567,
"step": 791
},
{
"epoch": 1.1412103746397695,
"grad_norm": 0.646467387676239,
"learning_rate": 8.527537752276251e-06,
"loss": 0.687673807144165,
"step": 792
},
{
"epoch": 1.1426512968299711,
"grad_norm": 0.7232325673103333,
"learning_rate": 8.503965714157108e-06,
"loss": 0.567053496837616,
"step": 793
},
{
"epoch": 1.144092219020173,
"grad_norm": 0.6364091634750366,
"learning_rate": 8.480402175868477e-06,
"loss": 0.6352185010910034,
"step": 794
},
{
"epoch": 1.1455331412103746,
"grad_norm": 0.6260091066360474,
"learning_rate": 8.456847271288365e-06,
"loss": 0.6209022998809814,
"step": 795
},
{
"epoch": 1.1469740634005763,
"grad_norm": 0.780681312084198,
"learning_rate": 8.43330113424571e-06,
"loss": 0.8006823658943176,
"step": 796
},
{
"epoch": 1.1484149855907781,
"grad_norm": 0.7389974594116211,
"learning_rate": 8.409763898519665e-06,
"loss": 0.7085101008415222,
"step": 797
},
{
"epoch": 1.1498559077809798,
"grad_norm": 0.6930822134017944,
"learning_rate": 8.38623569783878e-06,
"loss": 0.7008879780769348,
"step": 798
},
{
"epoch": 1.1512968299711814,
"grad_norm": 0.7401261329650879,
"learning_rate": 8.362716665880289e-06,
"loss": 0.8319974541664124,
"step": 799
},
{
"epoch": 1.1527377521613833,
"grad_norm": 0.8224151730537415,
"learning_rate": 8.339206936269328e-06,
"loss": 0.8165872097015381,
"step": 800
},
{
"epoch": 1.154178674351585,
"grad_norm": 0.7225102186203003,
"learning_rate": 8.315706642578178e-06,
"loss": 0.7181413173675537,
"step": 801
},
{
"epoch": 1.1556195965417868,
"grad_norm": 0.6846278309822083,
"learning_rate": 8.292215918325518e-06,
"loss": 0.5527253746986389,
"step": 802
},
{
"epoch": 1.1570605187319885,
"grad_norm": 0.7421664595603943,
"learning_rate": 8.268734896975649e-06,
"loss": 0.7071849703788757,
"step": 803
},
{
"epoch": 1.15850144092219,
"grad_norm": 0.7124081254005432,
"learning_rate": 8.245263711937746e-06,
"loss": 0.7202374339103699,
"step": 804
},
{
"epoch": 1.159942363112392,
"grad_norm": 0.6946137547492981,
"learning_rate": 8.221802496565102e-06,
"loss": 0.7475452423095703,
"step": 805
},
{
"epoch": 1.1613832853025936,
"grad_norm": 0.6117473244667053,
"learning_rate": 8.198351384154363e-06,
"loss": 0.6516153216362,
"step": 806
},
{
"epoch": 1.1628242074927955,
"grad_norm": 0.7568015456199646,
"learning_rate": 8.174910507944775e-06,
"loss": 0.7098596096038818,
"step": 807
},
{
"epoch": 1.1642651296829971,
"grad_norm": 0.7341271638870239,
"learning_rate": 8.15148000111743e-06,
"loss": 0.7767957448959351,
"step": 808
},
{
"epoch": 1.1657060518731988,
"grad_norm": 0.6909346580505371,
"learning_rate": 8.128059996794495e-06,
"loss": 0.7601003646850586,
"step": 809
},
{
"epoch": 1.1671469740634006,
"grad_norm": 0.7617642879486084,
"learning_rate": 8.10465062803848e-06,
"loss": 0.8070919513702393,
"step": 810
},
{
"epoch": 1.1685878962536023,
"grad_norm": 0.7109162211418152,
"learning_rate": 8.08125202785146e-06,
"loss": 0.7039991617202759,
"step": 811
},
{
"epoch": 1.1700288184438041,
"grad_norm": 0.611440122127533,
"learning_rate": 8.05786432917433e-06,
"loss": 0.7397060394287109,
"step": 812
},
{
"epoch": 1.1714697406340058,
"grad_norm": 0.6908255219459534,
"learning_rate": 8.034487664886042e-06,
"loss": 0.6378216743469238,
"step": 813
},
{
"epoch": 1.1729106628242074,
"grad_norm": 0.7022346258163452,
"learning_rate": 8.011122167802869e-06,
"loss": 0.7086902856826782,
"step": 814
},
{
"epoch": 1.1743515850144093,
"grad_norm": 0.8581727147102356,
"learning_rate": 7.987767970677618e-06,
"loss": 0.8129750490188599,
"step": 815
},
{
"epoch": 1.175792507204611,
"grad_norm": 0.7053741216659546,
"learning_rate": 7.964425206198907e-06,
"loss": 0.7385943531990051,
"step": 816
},
{
"epoch": 1.1772334293948128,
"grad_norm": 0.686326801776886,
"learning_rate": 7.941094006990398e-06,
"loss": 0.7873866558074951,
"step": 817
},
{
"epoch": 1.1786743515850144,
"grad_norm": 0.695410430431366,
"learning_rate": 7.917774505610039e-06,
"loss": 0.6239868402481079,
"step": 818
},
{
"epoch": 1.180115273775216,
"grad_norm": 0.7279312014579773,
"learning_rate": 7.89446683454932e-06,
"loss": 0.6867477893829346,
"step": 819
},
{
"epoch": 1.181556195965418,
"grad_norm": 0.7835471630096436,
"learning_rate": 7.871171126232516e-06,
"loss": 0.8003014326095581,
"step": 820
},
{
"epoch": 1.1829971181556196,
"grad_norm": 0.6107333898544312,
"learning_rate": 7.84788751301593e-06,
"loss": 0.6153905391693115,
"step": 821
},
{
"epoch": 1.1844380403458212,
"grad_norm": 0.6579324007034302,
"learning_rate": 7.82461612718715e-06,
"loss": 0.6941539645195007,
"step": 822
},
{
"epoch": 1.185878962536023,
"grad_norm": 0.7140527963638306,
"learning_rate": 7.801357100964295e-06,
"loss": 0.6446021795272827,
"step": 823
},
{
"epoch": 1.1873198847262247,
"grad_norm": 0.5526272654533386,
"learning_rate": 7.778110566495256e-06,
"loss": 0.6298232078552246,
"step": 824
},
{
"epoch": 1.1887608069164266,
"grad_norm": 0.7560884356498718,
"learning_rate": 7.754876655856957e-06,
"loss": 0.6755807399749756,
"step": 825
},
{
"epoch": 1.1902017291066282,
"grad_norm": 0.7089958190917969,
"learning_rate": 7.731655501054597e-06,
"loss": 0.8171476125717163,
"step": 826
},
{
"epoch": 1.19164265129683,
"grad_norm": 0.8474247455596924,
"learning_rate": 7.708447234020898e-06,
"loss": 0.6631404161453247,
"step": 827
},
{
"epoch": 1.1930835734870318,
"grad_norm": 0.696233868598938,
"learning_rate": 7.685251986615363e-06,
"loss": 0.679205060005188,
"step": 828
},
{
"epoch": 1.1945244956772334,
"grad_norm": 0.6988159418106079,
"learning_rate": 7.662069890623525e-06,
"loss": 0.6870052218437195,
"step": 829
},
{
"epoch": 1.195965417867435,
"grad_norm": 0.8398119807243347,
"learning_rate": 7.63890107775619e-06,
"loss": 0.6322426795959473,
"step": 830
},
{
"epoch": 1.197406340057637,
"grad_norm": 0.7013004422187805,
"learning_rate": 7.615745679648702e-06,
"loss": 0.7239193916320801,
"step": 831
},
{
"epoch": 1.1988472622478386,
"grad_norm": 0.7037452459335327,
"learning_rate": 7.59260382786018e-06,
"loss": 0.738641083240509,
"step": 832
},
{
"epoch": 1.2002881844380404,
"grad_norm": 0.7014064788818359,
"learning_rate": 7.569475653872787e-06,
"loss": 0.7159215807914734,
"step": 833
},
{
"epoch": 1.201729106628242,
"grad_norm": 0.7071189880371094,
"learning_rate": 7.546361289090971e-06,
"loss": 0.7976879477500916,
"step": 834
},
{
"epoch": 1.2031700288184437,
"grad_norm": 1.5894548892974854,
"learning_rate": 7.5232608648407166e-06,
"loss": 0.8454712629318237,
"step": 835
},
{
"epoch": 1.2046109510086456,
"grad_norm": 0.828970730304718,
"learning_rate": 7.500174512368814e-06,
"loss": 0.759265661239624,
"step": 836
},
{
"epoch": 1.2060518731988472,
"grad_norm": 0.798107385635376,
"learning_rate": 7.477102362842099e-06,
"loss": 0.6458663940429688,
"step": 837
},
{
"epoch": 1.207492795389049,
"grad_norm": 0.7149136066436768,
"learning_rate": 7.454044547346708e-06,
"loss": 0.7283859252929688,
"step": 838
},
{
"epoch": 1.2089337175792507,
"grad_norm": 0.6997484564781189,
"learning_rate": 7.431001196887345e-06,
"loss": 0.7165562510490417,
"step": 839
},
{
"epoch": 1.2103746397694524,
"grad_norm": 0.8367432355880737,
"learning_rate": 7.407972442386527e-06,
"loss": 0.8220187425613403,
"step": 840
},
{
"epoch": 1.2118155619596542,
"grad_norm": 0.7961417436599731,
"learning_rate": 7.384958414683839e-06,
"loss": 0.6605899930000305,
"step": 841
},
{
"epoch": 1.2132564841498559,
"grad_norm": 0.8134687542915344,
"learning_rate": 7.361959244535199e-06,
"loss": 0.7518759965896606,
"step": 842
},
{
"epoch": 1.2146974063400577,
"grad_norm": 0.8612692356109619,
"learning_rate": 7.338975062612115e-06,
"loss": 0.8546530604362488,
"step": 843
},
{
"epoch": 1.2161383285302594,
"grad_norm": 0.7533084154129028,
"learning_rate": 7.316005999500924e-06,
"loss": 0.6535155773162842,
"step": 844
},
{
"epoch": 1.217579250720461,
"grad_norm": 0.756385862827301,
"learning_rate": 7.293052185702079e-06,
"loss": 0.7160431742668152,
"step": 845
},
{
"epoch": 1.219020172910663,
"grad_norm": 0.7518402934074402,
"learning_rate": 7.270113751629388e-06,
"loss": 0.771348237991333,
"step": 846
},
{
"epoch": 1.2204610951008645,
"grad_norm": 0.7385017275810242,
"learning_rate": 7.247190827609273e-06,
"loss": 0.7204279899597168,
"step": 847
},
{
"epoch": 1.2219020172910664,
"grad_norm": 0.7346227765083313,
"learning_rate": 7.224283543880041e-06,
"loss": 0.678016722202301,
"step": 848
},
{
"epoch": 1.223342939481268,
"grad_norm": 0.7345762252807617,
"learning_rate": 7.201392030591137e-06,
"loss": 0.8019239902496338,
"step": 849
},
{
"epoch": 1.2247838616714697,
"grad_norm": 0.8108334541320801,
"learning_rate": 7.178516417802399e-06,
"loss": 0.7505182027816772,
"step": 850
},
{
"epoch": 1.2262247838616716,
"grad_norm": 0.7021329998970032,
"learning_rate": 7.155656835483331e-06,
"loss": 0.8881153464317322,
"step": 851
},
{
"epoch": 1.2276657060518732,
"grad_norm": 0.6703478097915649,
"learning_rate": 7.132813413512361e-06,
"loss": 0.6785498857498169,
"step": 852
},
{
"epoch": 1.2291066282420748,
"grad_norm": 0.6962404251098633,
"learning_rate": 7.10998628167609e-06,
"loss": 0.7082129120826721,
"step": 853
},
{
"epoch": 1.2305475504322767,
"grad_norm": 0.7256265878677368,
"learning_rate": 7.087175569668576e-06,
"loss": 0.5700943470001221,
"step": 854
},
{
"epoch": 1.2319884726224783,
"grad_norm": 0.6702523827552795,
"learning_rate": 7.064381407090584e-06,
"loss": 0.5819450616836548,
"step": 855
},
{
"epoch": 1.23342939481268,
"grad_norm": 0.7409508228302002,
"learning_rate": 7.041603923448847e-06,
"loss": 0.8606373071670532,
"step": 856
},
{
"epoch": 1.2348703170028819,
"grad_norm": 0.7683098316192627,
"learning_rate": 7.018843248155345e-06,
"loss": 0.7102565765380859,
"step": 857
},
{
"epoch": 1.2363112391930835,
"grad_norm": 0.8579858541488647,
"learning_rate": 6.996099510526546e-06,
"loss": 0.7700096368789673,
"step": 858
},
{
"epoch": 1.2377521613832854,
"grad_norm": 0.7399595975875854,
"learning_rate": 6.973372839782699e-06,
"loss": 0.7888767123222351,
"step": 859
},
{
"epoch": 1.239193083573487,
"grad_norm": 0.788192868232727,
"learning_rate": 6.950663365047083e-06,
"loss": 0.824313759803772,
"step": 860
},
{
"epoch": 1.2406340057636887,
"grad_norm": 0.7066922187805176,
"learning_rate": 6.927971215345271e-06,
"loss": 0.7467577457427979,
"step": 861
},
{
"epoch": 1.2420749279538905,
"grad_norm": 0.7298476099967957,
"learning_rate": 6.905296519604407e-06,
"loss": 0.7883299589157104,
"step": 862
},
{
"epoch": 1.2435158501440922,
"grad_norm": 0.7065430283546448,
"learning_rate": 6.8826394066524695e-06,
"loss": 0.7581709623336792,
"step": 863
},
{
"epoch": 1.244956772334294,
"grad_norm": 0.759701669216156,
"learning_rate": 6.860000005217533e-06,
"loss": 0.7661190629005432,
"step": 864
},
{
"epoch": 1.2463976945244957,
"grad_norm": 0.7207411527633667,
"learning_rate": 6.837378443927052e-06,
"loss": 0.6200900077819824,
"step": 865
},
{
"epoch": 1.2478386167146973,
"grad_norm": 0.7673540711402893,
"learning_rate": 6.814774851307118e-06,
"loss": 0.7384034395217896,
"step": 866
},
{
"epoch": 1.2492795389048992,
"grad_norm": 0.7653612494468689,
"learning_rate": 6.7921893557817246e-06,
"loss": 0.6381803750991821,
"step": 867
},
{
"epoch": 1.2507204610951008,
"grad_norm": 0.6993824243545532,
"learning_rate": 6.769622085672054e-06,
"loss": 0.7953975200653076,
"step": 868
},
{
"epoch": 1.2521613832853027,
"grad_norm": 0.8462424278259277,
"learning_rate": 6.747073169195739e-06,
"loss": 0.8438471555709839,
"step": 869
},
{
"epoch": 1.2536023054755043,
"grad_norm": 0.728303074836731,
"learning_rate": 6.724542734466127e-06,
"loss": 0.7537517547607422,
"step": 870
},
{
"epoch": 1.255043227665706,
"grad_norm": 0.8953651189804077,
"learning_rate": 6.70203090949157e-06,
"loss": 0.6626983880996704,
"step": 871
},
{
"epoch": 1.2564841498559078,
"grad_norm": 0.8448413014411926,
"learning_rate": 6.679537822174682e-06,
"loss": 0.7634541988372803,
"step": 872
},
{
"epoch": 1.2579250720461095,
"grad_norm": 0.6933168172836304,
"learning_rate": 6.657063600311616e-06,
"loss": 0.728123128414154,
"step": 873
},
{
"epoch": 1.2593659942363113,
"grad_norm": 0.728682279586792,
"learning_rate": 6.634608371591343e-06,
"loss": 0.7198150157928467,
"step": 874
},
{
"epoch": 1.260806916426513,
"grad_norm": 0.7677860260009766,
"learning_rate": 6.6121722635949244e-06,
"loss": 0.7593638896942139,
"step": 875
},
{
"epoch": 1.2622478386167146,
"grad_norm": 0.8202926516532898,
"learning_rate": 6.58975540379478e-06,
"loss": 0.7541650533676147,
"step": 876
},
{
"epoch": 1.2636887608069165,
"grad_norm": 0.7458558678627014,
"learning_rate": 6.567357919553973e-06,
"loss": 0.7063798904418945,
"step": 877
},
{
"epoch": 1.2651296829971181,
"grad_norm": 0.7055810689926147,
"learning_rate": 6.544979938125485e-06,
"loss": 0.6888713240623474,
"step": 878
},
{
"epoch": 1.26657060518732,
"grad_norm": 0.7375597953796387,
"learning_rate": 6.522621586651485e-06,
"loss": 0.7496410012245178,
"step": 879
},
{
"epoch": 1.2680115273775217,
"grad_norm": 0.8904906511306763,
"learning_rate": 6.5002829921626206e-06,
"loss": 0.7112149000167847,
"step": 880
},
{
"epoch": 1.2694524495677233,
"grad_norm": 0.7383248209953308,
"learning_rate": 6.477964281577282e-06,
"loss": 0.7184029817581177,
"step": 881
},
{
"epoch": 1.270893371757925,
"grad_norm": 0.7049366235733032,
"learning_rate": 6.4556655817008895e-06,
"loss": 0.7123745679855347,
"step": 882
},
{
"epoch": 1.2723342939481268,
"grad_norm": 0.8128007650375366,
"learning_rate": 6.433387019225175e-06,
"loss": 0.7427414059638977,
"step": 883
},
{
"epoch": 1.2737752161383284,
"grad_norm": 0.8199894428253174,
"learning_rate": 6.411128720727448e-06,
"loss": 0.8018887042999268,
"step": 884
},
{
"epoch": 1.2752161383285303,
"grad_norm": 0.7976292967796326,
"learning_rate": 6.3888908126699015e-06,
"loss": 0.7540068626403809,
"step": 885
},
{
"epoch": 1.276657060518732,
"grad_norm": 0.7567126750946045,
"learning_rate": 6.366673421398869e-06,
"loss": 0.6633398532867432,
"step": 886
},
{
"epoch": 1.2780979827089336,
"grad_norm": 0.9206326007843018,
"learning_rate": 6.344476673144113e-06,
"loss": 0.8141340613365173,
"step": 887
},
{
"epoch": 1.2795389048991355,
"grad_norm": 0.7636165618896484,
"learning_rate": 6.322300694018122e-06,
"loss": 0.922012448310852,
"step": 888
},
{
"epoch": 1.280979827089337,
"grad_norm": 0.5804350972175598,
"learning_rate": 6.3001456100153754e-06,
"loss": 0.5236533284187317,
"step": 889
},
{
"epoch": 1.282420749279539,
"grad_norm": 0.6871946454048157,
"learning_rate": 6.278011547011638e-06,
"loss": 0.6777955293655396,
"step": 890
},
{
"epoch": 1.2838616714697406,
"grad_norm": 0.9018382430076599,
"learning_rate": 6.255898630763238e-06,
"loss": 0.7229803204536438,
"step": 891
},
{
"epoch": 1.2853025936599423,
"grad_norm": 0.7636622190475464,
"learning_rate": 6.233806986906367e-06,
"loss": 0.7445772886276245,
"step": 892
},
{
"epoch": 1.2867435158501441,
"grad_norm": 0.8684889674186707,
"learning_rate": 6.211736740956343e-06,
"loss": 0.7453570365905762,
"step": 893
},
{
"epoch": 1.2881844380403458,
"grad_norm": 0.7295882105827332,
"learning_rate": 6.189688018306919e-06,
"loss": 0.7675601243972778,
"step": 894
},
{
"epoch": 1.2896253602305476,
"grad_norm": 0.7328001856803894,
"learning_rate": 6.167660944229561e-06,
"loss": 0.7515370845794678,
"step": 895
},
{
"epoch": 1.2910662824207493,
"grad_norm": 0.7595508694648743,
"learning_rate": 6.145655643872733e-06,
"loss": 0.567493736743927,
"step": 896
},
{
"epoch": 1.292507204610951,
"grad_norm": 0.614960253238678,
"learning_rate": 6.123672242261191e-06,
"loss": 0.645517110824585,
"step": 897
},
{
"epoch": 1.2939481268011528,
"grad_norm": 0.7227185368537903,
"learning_rate": 6.101710864295279e-06,
"loss": 0.7477235794067383,
"step": 898
},
{
"epoch": 1.2953890489913544,
"grad_norm": 0.8557547330856323,
"learning_rate": 6.0797716347502e-06,
"loss": 0.842688798904419,
"step": 899
},
{
"epoch": 1.2968299711815563,
"grad_norm": 0.7734098434448242,
"learning_rate": 6.057854678275326e-06,
"loss": 0.7552927732467651,
"step": 900
},
{
"epoch": 1.298270893371758,
"grad_norm": 0.9069559574127197,
"learning_rate": 6.035960119393483e-06,
"loss": 0.7265192270278931,
"step": 901
},
{
"epoch": 1.2997118155619596,
"grad_norm": 0.7913199067115784,
"learning_rate": 6.014088082500241e-06,
"loss": 0.8187565803527832,
"step": 902
},
{
"epoch": 1.3011527377521614,
"grad_norm": 0.7673888802528381,
"learning_rate": 5.9922386918632145e-06,
"loss": 0.6189776659011841,
"step": 903
},
{
"epoch": 1.302593659942363,
"grad_norm": 0.8198954463005066,
"learning_rate": 5.9704120716213435e-06,
"loss": 0.7065783739089966,
"step": 904
},
{
"epoch": 1.304034582132565,
"grad_norm": 0.7108833193778992,
"learning_rate": 5.948608345784201e-06,
"loss": 0.6191039085388184,
"step": 905
},
{
"epoch": 1.3054755043227666,
"grad_norm": 0.7285876870155334,
"learning_rate": 5.926827638231289e-06,
"loss": 0.645740270614624,
"step": 906
},
{
"epoch": 1.3069164265129682,
"grad_norm": 0.7736101150512695,
"learning_rate": 5.905070072711318e-06,
"loss": 0.6827014684677124,
"step": 907
},
{
"epoch": 1.30835734870317,
"grad_norm": 0.72000652551651,
"learning_rate": 5.883335772841523e-06,
"loss": 0.6328073740005493,
"step": 908
},
{
"epoch": 1.3097982708933718,
"grad_norm": 0.8541474342346191,
"learning_rate": 5.8616248621069545e-06,
"loss": 0.7510843276977539,
"step": 909
},
{
"epoch": 1.3112391930835736,
"grad_norm": 0.801873505115509,
"learning_rate": 5.83993746385977e-06,
"loss": 0.8494454026222229,
"step": 910
},
{
"epoch": 1.3126801152737753,
"grad_norm": 0.6421008110046387,
"learning_rate": 5.818273701318542e-06,
"loss": 0.6766311526298523,
"step": 911
},
{
"epoch": 1.314121037463977,
"grad_norm": 0.6372893452644348,
"learning_rate": 5.796633697567557e-06,
"loss": 0.6513572931289673,
"step": 912
},
{
"epoch": 1.3155619596541785,
"grad_norm": 0.7505905032157898,
"learning_rate": 5.77501757555611e-06,
"loss": 0.7472840547561646,
"step": 913
},
{
"epoch": 1.3170028818443804,
"grad_norm": 0.6798507571220398,
"learning_rate": 5.753425458097817e-06,
"loss": 0.6567862629890442,
"step": 914
},
{
"epoch": 1.318443804034582,
"grad_norm": 0.7536661624908447,
"learning_rate": 5.731857467869902e-06,
"loss": 0.7532881498336792,
"step": 915
},
{
"epoch": 1.319884726224784,
"grad_norm": 0.8154526352882385,
"learning_rate": 5.710313727412513e-06,
"loss": 0.7319618463516235,
"step": 916
},
{
"epoch": 1.3213256484149856,
"grad_norm": 0.7455941438674927,
"learning_rate": 5.688794359128018e-06,
"loss": 0.8852958679199219,
"step": 917
},
{
"epoch": 1.3227665706051872,
"grad_norm": 0.691473662853241,
"learning_rate": 5.6672994852803184e-06,
"loss": 0.7115726470947266,
"step": 918
},
{
"epoch": 1.324207492795389,
"grad_norm": 0.8230622410774231,
"learning_rate": 5.645829227994146e-06,
"loss": 0.7268555164337158,
"step": 919
},
{
"epoch": 1.3256484149855907,
"grad_norm": 0.7255274057388306,
"learning_rate": 5.624383709254363e-06,
"loss": 0.6968173980712891,
"step": 920
},
{
"epoch": 1.3270893371757926,
"grad_norm": 0.7870388627052307,
"learning_rate": 5.602963050905296e-06,
"loss": 0.7143142223358154,
"step": 921
},
{
"epoch": 1.3285302593659942,
"grad_norm": 0.753603994846344,
"learning_rate": 5.58156737465001e-06,
"loss": 0.662851095199585,
"step": 922
},
{
"epoch": 1.3299711815561959,
"grad_norm": 0.7982147336006165,
"learning_rate": 5.560196802049633e-06,
"loss": 0.7520275712013245,
"step": 923
},
{
"epoch": 1.3314121037463977,
"grad_norm": 0.7443618178367615,
"learning_rate": 5.538851454522678e-06,
"loss": 0.6887432932853699,
"step": 924
},
{
"epoch": 1.3328530259365994,
"grad_norm": 0.732810378074646,
"learning_rate": 5.517531453344327e-06,
"loss": 0.6998310685157776,
"step": 925
},
{
"epoch": 1.3342939481268012,
"grad_norm": 0.7935830950737,
"learning_rate": 5.496236919645754e-06,
"loss": 0.8417803645133972,
"step": 926
},
{
"epoch": 1.3357348703170029,
"grad_norm": 0.8099915981292725,
"learning_rate": 5.474967974413451e-06,
"loss": 0.6904634237289429,
"step": 927
},
{
"epoch": 1.3371757925072045,
"grad_norm": 0.803108274936676,
"learning_rate": 5.453724738488511e-06,
"loss": 0.7582980394363403,
"step": 928
},
{
"epoch": 1.3386167146974064,
"grad_norm": 0.7702426910400391,
"learning_rate": 5.432507332565968e-06,
"loss": 0.7501033544540405,
"step": 929
},
{
"epoch": 1.340057636887608,
"grad_norm": 0.5750554800033569,
"learning_rate": 5.411315877194104e-06,
"loss": 0.46313565969467163,
"step": 930
},
{
"epoch": 1.34149855907781,
"grad_norm": 0.7295573949813843,
"learning_rate": 5.390150492773749e-06,
"loss": 0.7082580327987671,
"step": 931
},
{
"epoch": 1.3429394812680115,
"grad_norm": 0.8001100420951843,
"learning_rate": 5.369011299557617e-06,
"loss": 0.5968407988548279,
"step": 932
},
{
"epoch": 1.3443804034582132,
"grad_norm": 0.7285897731781006,
"learning_rate": 5.347898417649609e-06,
"loss": 0.6727792024612427,
"step": 933
},
{
"epoch": 1.345821325648415,
"grad_norm": 0.7374528646469116,
"learning_rate": 5.3268119670041465e-06,
"loss": 0.7759701013565063,
"step": 934
},
{
"epoch": 1.3472622478386167,
"grad_norm": 0.8536549806594849,
"learning_rate": 5.305752067425469e-06,
"loss": 0.7828449010848999,
"step": 935
},
{
"epoch": 1.3487031700288186,
"grad_norm": 0.7041170001029968,
"learning_rate": 5.284718838566968e-06,
"loss": 0.8058781623840332,
"step": 936
},
{
"epoch": 1.3501440922190202,
"grad_norm": 0.7253990173339844,
"learning_rate": 5.26371239993051e-06,
"loss": 0.7985930442810059,
"step": 937
},
{
"epoch": 1.3515850144092219,
"grad_norm": 0.796826183795929,
"learning_rate": 5.242732870865739e-06,
"loss": 0.8930832147598267,
"step": 938
},
{
"epoch": 1.3530259365994235,
"grad_norm": 0.8277836441993713,
"learning_rate": 5.221780370569415e-06,
"loss": 0.8998521566390991,
"step": 939
},
{
"epoch": 1.3544668587896254,
"grad_norm": 0.6864756941795349,
"learning_rate": 5.2008550180847394e-06,
"loss": 0.65891432762146,
"step": 940
},
{
"epoch": 1.3559077809798272,
"grad_norm": 0.8034760355949402,
"learning_rate": 5.1799569323006615e-06,
"loss": 0.754687488079071,
"step": 941
},
{
"epoch": 1.3573487031700289,
"grad_norm": 0.7061858773231506,
"learning_rate": 5.159086231951213e-06,
"loss": 0.6347618103027344,
"step": 942
},
{
"epoch": 1.3587896253602305,
"grad_norm": 0.7044359445571899,
"learning_rate": 5.138243035614842e-06,
"loss": 0.7207168340682983,
"step": 943
},
{
"epoch": 1.3602305475504322,
"grad_norm": 0.795578122138977,
"learning_rate": 5.117427461713724e-06,
"loss": 0.778628945350647,
"step": 944
},
{
"epoch": 1.361671469740634,
"grad_norm": 0.9205237627029419,
"learning_rate": 5.096639628513092e-06,
"loss": 0.7258281111717224,
"step": 945
},
{
"epoch": 1.3631123919308357,
"grad_norm": 0.793719470500946,
"learning_rate": 5.0758796541205794e-06,
"loss": 0.9028610587120056,
"step": 946
},
{
"epoch": 1.3645533141210375,
"grad_norm": 0.803044855594635,
"learning_rate": 5.055147656485526e-06,
"loss": 0.6968406438827515,
"step": 947
},
{
"epoch": 1.3659942363112392,
"grad_norm": 0.8841282725334167,
"learning_rate": 5.034443753398323e-06,
"loss": 0.8499928712844849,
"step": 948
},
{
"epoch": 1.3674351585014408,
"grad_norm": 0.7515528202056885,
"learning_rate": 5.01376806248975e-06,
"loss": 0.6870205998420715,
"step": 949
},
{
"epoch": 1.3688760806916427,
"grad_norm": 0.9015482068061829,
"learning_rate": 4.993120701230283e-06,
"loss": 0.7434237003326416,
"step": 950
},
{
"epoch": 1.3703170028818443,
"grad_norm": 0.726290762424469,
"learning_rate": 4.972501786929443e-06,
"loss": 0.7235680818557739,
"step": 951
},
{
"epoch": 1.3717579250720462,
"grad_norm": 0.8368416428565979,
"learning_rate": 4.951911436735142e-06,
"loss": 0.7924642562866211,
"step": 952
},
{
"epoch": 1.3731988472622478,
"grad_norm": 0.7369707226753235,
"learning_rate": 4.931349767632985e-06,
"loss": 0.7321688532829285,
"step": 953
},
{
"epoch": 1.3746397694524495,
"grad_norm": 0.783839762210846,
"learning_rate": 4.910816896445628e-06,
"loss": 0.7634139060974121,
"step": 954
},
{
"epoch": 1.3760806916426513,
"grad_norm": 0.7845622301101685,
"learning_rate": 4.890312939832119e-06,
"loss": 0.7176026105880737,
"step": 955
},
{
"epoch": 1.377521613832853,
"grad_norm": 0.7255096435546875,
"learning_rate": 4.869838014287217e-06,
"loss": 0.7216761708259583,
"step": 956
},
{
"epoch": 1.3789625360230549,
"grad_norm": 0.7795526385307312,
"learning_rate": 4.849392236140734e-06,
"loss": 0.8003466725349426,
"step": 957
},
{
"epoch": 1.3804034582132565,
"grad_norm": 0.7873338460922241,
"learning_rate": 4.828975721556895e-06,
"loss": 0.7114070653915405,
"step": 958
},
{
"epoch": 1.3818443804034581,
"grad_norm": 0.8338869214057922,
"learning_rate": 4.808588586533646e-06,
"loss": 0.7371832132339478,
"step": 959
},
{
"epoch": 1.38328530259366,
"grad_norm": 0.7012267112731934,
"learning_rate": 4.788230946902015e-06,
"loss": 0.5402542948722839,
"step": 960
},
{
"epoch": 1.3847262247838616,
"grad_norm": 0.9027784466743469,
"learning_rate": 4.76790291832546e-06,
"loss": 0.7232666015625,
"step": 961
},
{
"epoch": 1.3861671469740635,
"grad_norm": 0.7093715667724609,
"learning_rate": 4.747604616299189e-06,
"loss": 0.6918929815292358,
"step": 962
},
{
"epoch": 1.3876080691642652,
"grad_norm": 0.716592013835907,
"learning_rate": 4.727336156149516e-06,
"loss": 0.71802818775177,
"step": 963
},
{
"epoch": 1.3890489913544668,
"grad_norm": 0.7714113593101501,
"learning_rate": 4.707097653033219e-06,
"loss": 0.722802996635437,
"step": 964
},
{
"epoch": 1.3904899135446687,
"grad_norm": 0.7559787034988403,
"learning_rate": 4.686889221936861e-06,
"loss": 0.7283670902252197,
"step": 965
},
{
"epoch": 1.3919308357348703,
"grad_norm": 0.7135756611824036,
"learning_rate": 4.66671097767615e-06,
"loss": 0.6234134435653687,
"step": 966
},
{
"epoch": 1.3933717579250722,
"grad_norm": 0.799081027507782,
"learning_rate": 4.646563034895293e-06,
"loss": 0.7715392112731934,
"step": 967
},
{
"epoch": 1.3948126801152738,
"grad_norm": 0.7190555930137634,
"learning_rate": 4.626445508066329e-06,
"loss": 0.6489726901054382,
"step": 968
},
{
"epoch": 1.3962536023054755,
"grad_norm": 0.97360759973526,
"learning_rate": 4.606358511488486e-06,
"loss": 0.7395188808441162,
"step": 969
},
{
"epoch": 1.397694524495677,
"grad_norm": 0.6608725786209106,
"learning_rate": 4.58630215928754e-06,
"loss": 0.6690818667411804,
"step": 970
},
{
"epoch": 1.399135446685879,
"grad_norm": 0.7637996077537537,
"learning_rate": 4.566276565415152e-06,
"loss": 0.8046863675117493,
"step": 971
},
{
"epoch": 1.4005763688760806,
"grad_norm": 0.8393040299415588,
"learning_rate": 4.5462818436482245e-06,
"loss": 0.825577437877655,
"step": 972
},
{
"epoch": 1.4020172910662825,
"grad_norm": 0.7651957869529724,
"learning_rate": 4.52631810758827e-06,
"loss": 0.7689692378044128,
"step": 973
},
{
"epoch": 1.4034582132564841,
"grad_norm": 0.8406446576118469,
"learning_rate": 4.506385470660742e-06,
"loss": 0.7642035484313965,
"step": 974
},
{
"epoch": 1.4048991354466858,
"grad_norm": 0.9124138951301575,
"learning_rate": 4.486484046114403e-06,
"loss": 0.6852501630783081,
"step": 975
},
{
"epoch": 1.4063400576368876,
"grad_norm": 0.9097302556037903,
"learning_rate": 4.466613947020689e-06,
"loss": 0.7974008917808533,
"step": 976
},
{
"epoch": 1.4077809798270893,
"grad_norm": 0.7557157874107361,
"learning_rate": 4.4467752862730485e-06,
"loss": 0.7818719148635864,
"step": 977
},
{
"epoch": 1.4092219020172911,
"grad_norm": 0.8131412863731384,
"learning_rate": 4.42696817658631e-06,
"loss": 0.6493479609489441,
"step": 978
},
{
"epoch": 1.4106628242074928,
"grad_norm": 0.7907167673110962,
"learning_rate": 4.4071927304960534e-06,
"loss": 0.79290771484375,
"step": 979
},
{
"epoch": 1.4121037463976944,
"grad_norm": 0.7542915344238281,
"learning_rate": 4.38744906035795e-06,
"loss": 0.8156715631484985,
"step": 980
},
{
"epoch": 1.4135446685878963,
"grad_norm": 0.7144057154655457,
"learning_rate": 4.367737278347136e-06,
"loss": 0.7458773851394653,
"step": 981
},
{
"epoch": 1.414985590778098,
"grad_norm": 0.8254581093788147,
"learning_rate": 4.348057496457567e-06,
"loss": 0.6097003221511841,
"step": 982
},
{
"epoch": 1.4164265129682998,
"grad_norm": 0.8161498308181763,
"learning_rate": 4.328409826501403e-06,
"loss": 0.7463165521621704,
"step": 983
},
{
"epoch": 1.4178674351585014,
"grad_norm": 0.8310127258300781,
"learning_rate": 4.3087943801083445e-06,
"loss": 0.6355860233306885,
"step": 984
},
{
"epoch": 1.419308357348703,
"grad_norm": 0.8795257210731506,
"learning_rate": 4.289211268725009e-06,
"loss": 0.7873852252960205,
"step": 985
},
{
"epoch": 1.420749279538905,
"grad_norm": 0.6934751272201538,
"learning_rate": 4.269660603614316e-06,
"loss": 0.6793715953826904,
"step": 986
},
{
"epoch": 1.4221902017291066,
"grad_norm": 0.8139089345932007,
"learning_rate": 4.250142495854825e-06,
"loss": 0.6482336521148682,
"step": 987
},
{
"epoch": 1.4236311239193085,
"grad_norm": 0.812993586063385,
"learning_rate": 4.2306570563401185e-06,
"loss": 0.6544175744056702,
"step": 988
},
{
"epoch": 1.42507204610951,
"grad_norm": 0.8022162318229675,
"learning_rate": 4.211204395778183e-06,
"loss": 0.7107487916946411,
"step": 989
},
{
"epoch": 1.4265129682997117,
"grad_norm": 0.8545569777488708,
"learning_rate": 4.19178462469076e-06,
"loss": 0.8046406507492065,
"step": 990
},
{
"epoch": 1.4279538904899136,
"grad_norm": 0.7910804748535156,
"learning_rate": 4.172397853412725e-06,
"loss": 0.7375363707542419,
"step": 991
},
{
"epoch": 1.4293948126801153,
"grad_norm": 0.8032233715057373,
"learning_rate": 4.1530441920914746e-06,
"loss": 0.7059754133224487,
"step": 992
},
{
"epoch": 1.4308357348703171,
"grad_norm": 0.6835878491401672,
"learning_rate": 4.1337237506862744e-06,
"loss": 0.6616318225860596,
"step": 993
},
{
"epoch": 1.4322766570605188,
"grad_norm": 0.6732160449028015,
"learning_rate": 4.114436638967656e-06,
"loss": 0.5688523054122925,
"step": 994
},
{
"epoch": 1.4337175792507204,
"grad_norm": 0.8257527351379395,
"learning_rate": 4.095182966516787e-06,
"loss": 0.8635351657867432,
"step": 995
},
{
"epoch": 1.435158501440922,
"grad_norm": 0.8186964392662048,
"learning_rate": 4.075962842724847e-06,
"loss": 0.6884078979492188,
"step": 996
},
{
"epoch": 1.436599423631124,
"grad_norm": 0.7682012915611267,
"learning_rate": 4.0567763767923965e-06,
"loss": 0.6973609328269958,
"step": 997
},
{
"epoch": 1.4380403458213258,
"grad_norm": 0.7340428233146667,
"learning_rate": 4.037623677728783e-06,
"loss": 0.645268440246582,
"step": 998
},
{
"epoch": 1.4394812680115274,
"grad_norm": 0.8506885170936584,
"learning_rate": 4.018504854351495e-06,
"loss": 0.639744758605957,
"step": 999
},
{
"epoch": 1.440922190201729,
"grad_norm": 0.7691463828086853,
"learning_rate": 3.999420015285549e-06,
"loss": 0.6750536561012268,
"step": 1000
},
{
"epoch": 1.4423631123919307,
"grad_norm": 0.7501078844070435,
"learning_rate": 3.980369268962893e-06,
"loss": 0.6951167583465576,
"step": 1001
},
{
"epoch": 1.4438040345821326,
"grad_norm": 0.6547222137451172,
"learning_rate": 3.961352723621757e-06,
"loss": 0.5897108912467957,
"step": 1002
},
{
"epoch": 1.4452449567723342,
"grad_norm": 0.7267579436302185,
"learning_rate": 3.942370487306064e-06,
"loss": 0.6418097019195557,
"step": 1003
},
{
"epoch": 1.446685878962536,
"grad_norm": 0.7520357966423035,
"learning_rate": 3.923422667864814e-06,
"loss": 0.7392733693122864,
"step": 1004
},
{
"epoch": 1.4481268011527377,
"grad_norm": 0.9271366000175476,
"learning_rate": 3.904509372951453e-06,
"loss": 0.7005877494812012,
"step": 1005
},
{
"epoch": 1.4495677233429394,
"grad_norm": 0.741805911064148,
"learning_rate": 3.885630710023275e-06,
"loss": 0.7494614124298096,
"step": 1006
},
{
"epoch": 1.4510086455331412,
"grad_norm": 0.7596203684806824,
"learning_rate": 3.866786786340821e-06,
"loss": 0.7183794975280762,
"step": 1007
},
{
"epoch": 1.4524495677233429,
"grad_norm": 0.792771577835083,
"learning_rate": 3.847977708967246e-06,
"loss": 0.6995346546173096,
"step": 1008
},
{
"epoch": 1.4538904899135447,
"grad_norm": 0.772834062576294,
"learning_rate": 3.829203584767724e-06,
"loss": 0.6018137335777283,
"step": 1009
},
{
"epoch": 1.4553314121037464,
"grad_norm": 0.9559422135353088,
"learning_rate": 3.810464520408853e-06,
"loss": 0.7116073966026306,
"step": 1010
},
{
"epoch": 1.456772334293948,
"grad_norm": 0.7630804777145386,
"learning_rate": 3.7917606223580217e-06,
"loss": 0.737439751625061,
"step": 1011
},
{
"epoch": 1.45821325648415,
"grad_norm": 0.7160147428512573,
"learning_rate": 3.7730919968828194e-06,
"loss": 0.6739982962608337,
"step": 1012
},
{
"epoch": 1.4596541786743515,
"grad_norm": 0.9138517379760742,
"learning_rate": 3.754458750050445e-06,
"loss": 0.8231876492500305,
"step": 1013
},
{
"epoch": 1.4610951008645534,
"grad_norm": 0.850914716720581,
"learning_rate": 3.7358609877270746e-06,
"loss": 0.7859776020050049,
"step": 1014
},
{
"epoch": 1.462536023054755,
"grad_norm": 0.6578977704048157,
"learning_rate": 3.717298815577284e-06,
"loss": 0.6639118194580078,
"step": 1015
},
{
"epoch": 1.4639769452449567,
"grad_norm": 0.7375325560569763,
"learning_rate": 3.6987723390634447e-06,
"loss": 0.7305494546890259,
"step": 1016
},
{
"epoch": 1.4654178674351586,
"grad_norm": 0.7297279834747314,
"learning_rate": 3.6802816634451144e-06,
"loss": 0.7086485028266907,
"step": 1017
},
{
"epoch": 1.4668587896253602,
"grad_norm": 0.6900395154953003,
"learning_rate": 3.661826893778443e-06,
"loss": 0.5996535420417786,
"step": 1018
},
{
"epoch": 1.468299711815562,
"grad_norm": 0.9336727261543274,
"learning_rate": 3.6434081349155903e-06,
"loss": 0.8409576416015625,
"step": 1019
},
{
"epoch": 1.4697406340057637,
"grad_norm": 0.8965365290641785,
"learning_rate": 3.6250254915041073e-06,
"loss": 0.8301442861557007,
"step": 1020
},
{
"epoch": 1.4711815561959654,
"grad_norm": 0.7489187717437744,
"learning_rate": 3.6066790679863505e-06,
"loss": 0.6619806289672852,
"step": 1021
},
{
"epoch": 1.4726224783861672,
"grad_norm": 0.7200744152069092,
"learning_rate": 3.588368968598903e-06,
"loss": 0.7702663540840149,
"step": 1022
},
{
"epoch": 1.4740634005763689,
"grad_norm": 0.7389686107635498,
"learning_rate": 3.5700952973719573e-06,
"loss": 0.6748791933059692,
"step": 1023
},
{
"epoch": 1.4755043227665707,
"grad_norm": 0.9621058106422424,
"learning_rate": 3.551858158128739e-06,
"loss": 0.7804979085922241,
"step": 1024
},
{
"epoch": 1.4769452449567724,
"grad_norm": 0.6999828815460205,
"learning_rate": 3.533657654484922e-06,
"loss": 0.7398617267608643,
"step": 1025
},
{
"epoch": 1.478386167146974,
"grad_norm": 0.8494158387184143,
"learning_rate": 3.515493889848025e-06,
"loss": 0.647086501121521,
"step": 1026
},
{
"epoch": 1.4798270893371757,
"grad_norm": 0.7813376784324646,
"learning_rate": 3.49736696741683e-06,
"loss": 0.753842830657959,
"step": 1027
},
{
"epoch": 1.4812680115273775,
"grad_norm": 0.7741125226020813,
"learning_rate": 3.4792769901808043e-06,
"loss": 0.8448225259780884,
"step": 1028
},
{
"epoch": 1.4827089337175792,
"grad_norm": 0.7925018072128296,
"learning_rate": 3.4612240609195034e-06,
"loss": 0.8170247077941895,
"step": 1029
},
{
"epoch": 1.484149855907781,
"grad_norm": 0.7917532324790955,
"learning_rate": 3.443208282201994e-06,
"loss": 0.7810318470001221,
"step": 1030
},
{
"epoch": 1.4855907780979827,
"grad_norm": 0.8103862404823303,
"learning_rate": 3.4252297563862625e-06,
"loss": 0.7185397148132324,
"step": 1031
},
{
"epoch": 1.4870317002881843,
"grad_norm": 0.8068615198135376,
"learning_rate": 3.407288585618654e-06,
"loss": 0.6962016224861145,
"step": 1032
},
{
"epoch": 1.4884726224783862,
"grad_norm": 0.712526261806488,
"learning_rate": 3.3893848718332665e-06,
"loss": 0.6078779101371765,
"step": 1033
},
{
"epoch": 1.4899135446685878,
"grad_norm": 0.8054221868515015,
"learning_rate": 3.371518716751383e-06,
"loss": 0.6642535924911499,
"step": 1034
},
{
"epoch": 1.4913544668587897,
"grad_norm": 0.735863983631134,
"learning_rate": 3.3536902218809043e-06,
"loss": 0.6583288311958313,
"step": 1035
},
{
"epoch": 1.4927953890489913,
"grad_norm": 0.7167906165122986,
"learning_rate": 3.3358994885157537e-06,
"loss": 0.7499520778656006,
"step": 1036
},
{
"epoch": 1.494236311239193,
"grad_norm": 0.9229914546012878,
"learning_rate": 3.318146617735306e-06,
"loss": 0.9205317497253418,
"step": 1037
},
{
"epoch": 1.4956772334293948,
"grad_norm": 0.7089002728462219,
"learning_rate": 3.3004317104038296e-06,
"loss": 0.7000449299812317,
"step": 1038
},
{
"epoch": 1.4971181556195965,
"grad_norm": 0.8045422434806824,
"learning_rate": 3.2827548671698907e-06,
"loss": 0.7404249906539917,
"step": 1039
},
{
"epoch": 1.4985590778097984,
"grad_norm": 0.7974978685379028,
"learning_rate": 3.26511618846579e-06,
"loss": 0.6965054273605347,
"step": 1040
},
{
"epoch": 1.5,
"grad_norm": 0.793845534324646,
"learning_rate": 3.247515774507005e-06,
"loss": 0.6663249731063843,
"step": 1041
},
{
"epoch": 1.5014409221902016,
"grad_norm": 0.6912310123443604,
"learning_rate": 3.2299537252915993e-06,
"loss": 0.5732256770133972,
"step": 1042
},
{
"epoch": 1.5028818443804035,
"grad_norm": 0.779844343662262,
"learning_rate": 3.2124301405996616e-06,
"loss": 0.6914101839065552,
"step": 1043
},
{
"epoch": 1.5043227665706052,
"grad_norm": 0.6761540770530701,
"learning_rate": 3.194945119992755e-06,
"loss": 0.6391370296478271,
"step": 1044
},
{
"epoch": 1.505763688760807,
"grad_norm": 0.7634333372116089,
"learning_rate": 3.177498762813327e-06,
"loss": 0.7757022976875305,
"step": 1045
},
{
"epoch": 1.5072046109510087,
"grad_norm": 0.9414384961128235,
"learning_rate": 3.160091168184154e-06,
"loss": 0.6397742033004761,
"step": 1046
},
{
"epoch": 1.5086455331412103,
"grad_norm": 0.8356814980506897,
"learning_rate": 3.142722435007791e-06,
"loss": 0.749836802482605,
"step": 1047
},
{
"epoch": 1.510086455331412,
"grad_norm": 0.8035895228385925,
"learning_rate": 3.1253926619659912e-06,
"loss": 0.7546325922012329,
"step": 1048
},
{
"epoch": 1.5115273775216138,
"grad_norm": 0.84291672706604,
"learning_rate": 3.108101947519151e-06,
"loss": 0.759354829788208,
"step": 1049
},
{
"epoch": 1.5129682997118157,
"grad_norm": 0.731903076171875,
"learning_rate": 3.0908503899057605e-06,
"loss": 0.6469593048095703,
"step": 1050
},
{
"epoch": 1.5144092219020173,
"grad_norm": 0.7537881731987,
"learning_rate": 3.0736380871418305e-06,
"loss": 0.7219445109367371,
"step": 1051
},
{
"epoch": 1.515850144092219,
"grad_norm": 0.8318817019462585,
"learning_rate": 3.0564651370203414e-06,
"loss": 0.7360014915466309,
"step": 1052
},
{
"epoch": 1.5172910662824206,
"grad_norm": 0.7886281609535217,
"learning_rate": 3.039331637110697e-06,
"loss": 0.7079243063926697,
"step": 1053
},
{
"epoch": 1.5187319884726225,
"grad_norm": 0.8962216973304749,
"learning_rate": 3.0222376847581546e-06,
"loss": 0.7061739563941956,
"step": 1054
},
{
"epoch": 1.5201729106628243,
"grad_norm": 0.8324871063232422,
"learning_rate": 3.005183377083277e-06,
"loss": 0.6976668834686279,
"step": 1055
},
{
"epoch": 1.521613832853026,
"grad_norm": 0.7693350315093994,
"learning_rate": 2.9881688109813933e-06,
"loss": 0.745376467704773,
"step": 1056
},
{
"epoch": 1.5230547550432276,
"grad_norm": 0.7759479880332947,
"learning_rate": 2.971194083122029e-06,
"loss": 0.8127241730690002,
"step": 1057
},
{
"epoch": 1.5244956772334293,
"grad_norm": 0.6519967913627625,
"learning_rate": 2.9542592899483633e-06,
"loss": 0.6398651003837585,
"step": 1058
},
{
"epoch": 1.5259365994236311,
"grad_norm": 0.7922623753547668,
"learning_rate": 2.937364527676697e-06,
"loss": 0.7102863788604736,
"step": 1059
},
{
"epoch": 1.527377521613833,
"grad_norm": 0.8374100923538208,
"learning_rate": 2.920509892295875e-06,
"loss": 0.8550270795822144,
"step": 1060
},
{
"epoch": 1.5288184438040346,
"grad_norm": 0.8438422679901123,
"learning_rate": 2.903695479566774e-06,
"loss": 0.6879276037216187,
"step": 1061
},
{
"epoch": 1.5302593659942363,
"grad_norm": 0.857816755771637,
"learning_rate": 2.886921385021729e-06,
"loss": 0.7720720767974854,
"step": 1062
},
{
"epoch": 1.531700288184438,
"grad_norm": 0.6902361512184143,
"learning_rate": 2.870187703964017e-06,
"loss": 0.5288726091384888,
"step": 1063
},
{
"epoch": 1.5331412103746398,
"grad_norm": 0.8847417235374451,
"learning_rate": 2.8534945314672946e-06,
"loss": 0.648311197757721,
"step": 1064
},
{
"epoch": 1.5345821325648417,
"grad_norm": 0.73543781042099,
"learning_rate": 2.8368419623750633e-06,
"loss": 0.7209224104881287,
"step": 1065
},
{
"epoch": 1.5360230547550433,
"grad_norm": 0.7391148805618286,
"learning_rate": 2.8202300913001445e-06,
"loss": 0.6820803880691528,
"step": 1066
},
{
"epoch": 1.537463976945245,
"grad_norm": 0.7311487793922424,
"learning_rate": 2.8036590126241226e-06,
"loss": 0.7790380120277405,
"step": 1067
},
{
"epoch": 1.5389048991354466,
"grad_norm": 0.8042330145835876,
"learning_rate": 2.7871288204968127e-06,
"loss": 0.7293061017990112,
"step": 1068
},
{
"epoch": 1.5403458213256485,
"grad_norm": 0.8683137893676758,
"learning_rate": 2.7706396088357444e-06,
"loss": 0.6657461524009705,
"step": 1069
},
{
"epoch": 1.54178674351585,
"grad_norm": 0.7877099514007568,
"learning_rate": 2.754191471325601e-06,
"loss": 0.7446212768554688,
"step": 1070
},
{
"epoch": 1.543227665706052,
"grad_norm": 0.7506648302078247,
"learning_rate": 2.737784501417702e-06,
"loss": 0.5783571004867554,
"step": 1071
},
{
"epoch": 1.5446685878962536,
"grad_norm": 0.9356808662414551,
"learning_rate": 2.7214187923294766e-06,
"loss": 0.710444986820221,
"step": 1072
},
{
"epoch": 1.5461095100864553,
"grad_norm": 0.8530304431915283,
"learning_rate": 2.70509443704392e-06,
"loss": 0.7275018095970154,
"step": 1073
},
{
"epoch": 1.547550432276657,
"grad_norm": 0.8602854609489441,
"learning_rate": 2.6888115283090754e-06,
"loss": 0.6969873905181885,
"step": 1074
},
{
"epoch": 1.5489913544668588,
"grad_norm": 0.7868731021881104,
"learning_rate": 2.6725701586375075e-06,
"loss": 0.6938682794570923,
"step": 1075
},
{
"epoch": 1.5504322766570606,
"grad_norm": 0.7867501378059387,
"learning_rate": 2.6563704203057704e-06,
"loss": 0.6999156475067139,
"step": 1076
},
{
"epoch": 1.5518731988472623,
"grad_norm": 0.8717447519302368,
"learning_rate": 2.6402124053538837e-06,
"loss": 0.7442126274108887,
"step": 1077
},
{
"epoch": 1.553314121037464,
"grad_norm": 0.8786858916282654,
"learning_rate": 2.6240962055848196e-06,
"loss": 0.8091111183166504,
"step": 1078
},
{
"epoch": 1.5547550432276656,
"grad_norm": 0.8132948875427246,
"learning_rate": 2.6080219125639703e-06,
"loss": 0.6046196222305298,
"step": 1079
},
{
"epoch": 1.5561959654178674,
"grad_norm": 0.8643730878829956,
"learning_rate": 2.5919896176186287e-06,
"loss": 0.8511631488800049,
"step": 1080
},
{
"epoch": 1.5576368876080693,
"grad_norm": 0.9204576015472412,
"learning_rate": 2.575999411837481e-06,
"loss": 0.8237127661705017,
"step": 1081
},
{
"epoch": 1.559077809798271,
"grad_norm": 0.7882550358772278,
"learning_rate": 2.560051386070073e-06,
"loss": 0.7889937162399292,
"step": 1082
},
{
"epoch": 1.5605187319884726,
"grad_norm": 0.8252015113830566,
"learning_rate": 2.5441456309263e-06,
"loss": 0.6393820643424988,
"step": 1083
},
{
"epoch": 1.5619596541786742,
"grad_norm": 0.8019078969955444,
"learning_rate": 2.5282822367759054e-06,
"loss": 0.6617242693901062,
"step": 1084
},
{
"epoch": 1.563400576368876,
"grad_norm": 0.8571462631225586,
"learning_rate": 2.512461293747942e-06,
"loss": 0.8511845469474792,
"step": 1085
},
{
"epoch": 1.564841498559078,
"grad_norm": 0.7433684468269348,
"learning_rate": 2.496682891730279e-06,
"loss": 0.7948633432388306,
"step": 1086
},
{
"epoch": 1.5662824207492796,
"grad_norm": 0.8066547513008118,
"learning_rate": 2.480947120369089e-06,
"loss": 0.648466944694519,
"step": 1087
},
{
"epoch": 1.5677233429394812,
"grad_norm": 0.816417396068573,
"learning_rate": 2.4652540690683315e-06,
"loss": 0.8480396866798401,
"step": 1088
},
{
"epoch": 1.5691642651296829,
"grad_norm": 0.7453494668006897,
"learning_rate": 2.4496038269892455e-06,
"loss": 0.7550395131111145,
"step": 1089
},
{
"epoch": 1.5706051873198847,
"grad_norm": 0.8361696004867554,
"learning_rate": 2.433996483049855e-06,
"loss": 0.6834908723831177,
"step": 1090
},
{
"epoch": 1.5720461095100866,
"grad_norm": 0.7857866883277893,
"learning_rate": 2.418432125924449e-06,
"loss": 0.7276380062103271,
"step": 1091
},
{
"epoch": 1.5734870317002883,
"grad_norm": 0.7743988037109375,
"learning_rate": 2.4029108440430838e-06,
"loss": 0.7755744457244873,
"step": 1092
},
{
"epoch": 1.57492795389049,
"grad_norm": 0.8050707578659058,
"learning_rate": 2.387432725591078e-06,
"loss": 0.8086447715759277,
"step": 1093
},
{
"epoch": 1.5763688760806915,
"grad_norm": 0.7724820971488953,
"learning_rate": 2.3719978585085234e-06,
"loss": 0.7475936412811279,
"step": 1094
},
{
"epoch": 1.5778097982708934,
"grad_norm": 0.8708832263946533,
"learning_rate": 2.356606330489769e-06,
"loss": 0.6741630434989929,
"step": 1095
},
{
"epoch": 1.579250720461095,
"grad_norm": 0.8784914016723633,
"learning_rate": 2.3412582289829254e-06,
"loss": 0.5807492733001709,
"step": 1096
},
{
"epoch": 1.580691642651297,
"grad_norm": 0.6939861178398132,
"learning_rate": 2.3259536411893836e-06,
"loss": 0.6853386163711548,
"step": 1097
},
{
"epoch": 1.5821325648414986,
"grad_norm": 0.7035260200500488,
"learning_rate": 2.3106926540633e-06,
"loss": 0.6275226473808289,
"step": 1098
},
{
"epoch": 1.5835734870317002,
"grad_norm": 0.8285399079322815,
"learning_rate": 2.2954753543111097e-06,
"loss": 0.7287248373031616,
"step": 1099
},
{
"epoch": 1.585014409221902,
"grad_norm": 0.828209638595581,
"learning_rate": 2.2803018283910415e-06,
"loss": 0.775260865688324,
"step": 1100
},
{
"epoch": 1.5864553314121037,
"grad_norm": 0.717948853969574,
"learning_rate": 2.2651721625126167e-06,
"loss": 0.5827840566635132,
"step": 1101
},
{
"epoch": 1.5878962536023056,
"grad_norm": 0.8269100785255432,
"learning_rate": 2.2500864426361556e-06,
"loss": 0.820456862449646,
"step": 1102
},
{
"epoch": 1.5893371757925072,
"grad_norm": 0.7805430293083191,
"learning_rate": 2.23504475447231e-06,
"loss": 0.6738294363021851,
"step": 1103
},
{
"epoch": 1.5907780979827089,
"grad_norm": 0.7611426115036011,
"learning_rate": 2.2200471834815497e-06,
"loss": 0.6646812558174133,
"step": 1104
},
{
"epoch": 1.5922190201729105,
"grad_norm": 0.7774697542190552,
"learning_rate": 2.2050938148736934e-06,
"loss": 0.7116397619247437,
"step": 1105
},
{
"epoch": 1.5936599423631124,
"grad_norm": 0.8153246641159058,
"learning_rate": 2.1901847336074258e-06,
"loss": 0.7710515260696411,
"step": 1106
},
{
"epoch": 1.5951008645533142,
"grad_norm": 0.7410541772842407,
"learning_rate": 2.1753200243898032e-06,
"loss": 0.7078261375427246,
"step": 1107
},
{
"epoch": 1.5965417867435159,
"grad_norm": 0.7237730622291565,
"learning_rate": 2.160499771675778e-06,
"loss": 0.706581711769104,
"step": 1108
},
{
"epoch": 1.5979827089337175,
"grad_norm": 0.8456921577453613,
"learning_rate": 2.14572405966773e-06,
"loss": 0.7275344133377075,
"step": 1109
},
{
"epoch": 1.5994236311239192,
"grad_norm": 0.7417944669723511,
"learning_rate": 2.130992972314965e-06,
"loss": 0.7175461053848267,
"step": 1110
},
{
"epoch": 1.600864553314121,
"grad_norm": 0.7151730060577393,
"learning_rate": 2.1163065933132544e-06,
"loss": 0.6884589791297913,
"step": 1111
},
{
"epoch": 1.602305475504323,
"grad_norm": 0.8144704699516296,
"learning_rate": 2.101665006104362e-06,
"loss": 0.750603199005127,
"step": 1112
},
{
"epoch": 1.6037463976945245,
"grad_norm": 1.0301843881607056,
"learning_rate": 2.087068293875557e-06,
"loss": 0.6825680732727051,
"step": 1113
},
{
"epoch": 1.6051873198847262,
"grad_norm": 0.7837945222854614,
"learning_rate": 2.0725165395591472e-06,
"loss": 0.6988552212715149,
"step": 1114
},
{
"epoch": 1.6066282420749278,
"grad_norm": 0.7630927562713623,
"learning_rate": 2.0580098258320167e-06,
"loss": 0.7969825267791748,
"step": 1115
},
{
"epoch": 1.6080691642651297,
"grad_norm": 0.90474534034729,
"learning_rate": 2.043548235115139e-06,
"loss": 0.7409637570381165,
"step": 1116
},
{
"epoch": 1.6095100864553316,
"grad_norm": 0.879987895488739,
"learning_rate": 2.0291318495731215e-06,
"loss": 0.7136498689651489,
"step": 1117
},
{
"epoch": 1.6109510086455332,
"grad_norm": 0.7135412096977234,
"learning_rate": 2.014760751113738e-06,
"loss": 0.6333821415901184,
"step": 1118
},
{
"epoch": 1.6123919308357348,
"grad_norm": 0.8625454902648926,
"learning_rate": 2.000435021387457e-06,
"loss": 0.6699397563934326,
"step": 1119
},
{
"epoch": 1.6138328530259365,
"grad_norm": 0.9795721173286438,
"learning_rate": 1.9861547417869776e-06,
"loss": 0.7384968996047974,
"step": 1120
},
{
"epoch": 1.6152737752161384,
"grad_norm": 0.7938385605812073,
"learning_rate": 1.9719199934467804e-06,
"loss": 0.7596741914749146,
"step": 1121
},
{
"epoch": 1.6167146974063402,
"grad_norm": 0.8295004367828369,
"learning_rate": 1.957730857242649e-06,
"loss": 0.7557300329208374,
"step": 1122
},
{
"epoch": 1.6181556195965419,
"grad_norm": 0.8414580821990967,
"learning_rate": 1.943587413791217e-06,
"loss": 0.6814196109771729,
"step": 1123
},
{
"epoch": 1.6195965417867435,
"grad_norm": 0.7273333072662354,
"learning_rate": 1.9294897434495196e-06,
"loss": 0.5870025157928467,
"step": 1124
},
{
"epoch": 1.6210374639769451,
"grad_norm": 0.7663788199424744,
"learning_rate": 1.915437926314523e-06,
"loss": 0.6649688482284546,
"step": 1125
},
{
"epoch": 1.622478386167147,
"grad_norm": 0.8005634546279907,
"learning_rate": 1.9014320422226707e-06,
"loss": 0.6026031374931335,
"step": 1126
},
{
"epoch": 1.6239193083573487,
"grad_norm": 0.7385629415512085,
"learning_rate": 1.8874721707494448e-06,
"loss": 0.6108108162879944,
"step": 1127
},
{
"epoch": 1.6253602305475505,
"grad_norm": 0.7913649082183838,
"learning_rate": 1.8735583912088951e-06,
"loss": 0.7508318424224854,
"step": 1128
},
{
"epoch": 1.6268011527377522,
"grad_norm": 0.7595199346542358,
"learning_rate": 1.8596907826531962e-06,
"loss": 0.7356204986572266,
"step": 1129
},
{
"epoch": 1.6282420749279538,
"grad_norm": 0.8176481127738953,
"learning_rate": 1.8458694238722086e-06,
"loss": 0.6614360213279724,
"step": 1130
},
{
"epoch": 1.6296829971181557,
"grad_norm": 0.7632184028625488,
"learning_rate": 1.8320943933930103e-06,
"loss": 0.70728600025177,
"step": 1131
},
{
"epoch": 1.6311239193083573,
"grad_norm": 0.733985960483551,
"learning_rate": 1.818365769479462e-06,
"loss": 0.720178484916687,
"step": 1132
},
{
"epoch": 1.6325648414985592,
"grad_norm": 0.7500304579734802,
"learning_rate": 1.8046836301317727e-06,
"loss": 0.688285231590271,
"step": 1133
},
{
"epoch": 1.6340057636887608,
"grad_norm": 0.7885209918022156,
"learning_rate": 1.7910480530860363e-06,
"loss": 0.7022037506103516,
"step": 1134
},
{
"epoch": 1.6354466858789625,
"grad_norm": 0.7801220417022705,
"learning_rate": 1.7774591158137977e-06,
"loss": 0.6189717054367065,
"step": 1135
},
{
"epoch": 1.6368876080691641,
"grad_norm": 0.7880612015724182,
"learning_rate": 1.7639168955216257e-06,
"loss": 0.6936331987380981,
"step": 1136
},
{
"epoch": 1.638328530259366,
"grad_norm": 0.7047246694564819,
"learning_rate": 1.7504214691506527e-06,
"loss": 0.8859960436820984,
"step": 1137
},
{
"epoch": 1.6397694524495678,
"grad_norm": 0.7471388578414917,
"learning_rate": 1.7369729133761493e-06,
"loss": 0.6911474466323853,
"step": 1138
},
{
"epoch": 1.6412103746397695,
"grad_norm": 0.8122578263282776,
"learning_rate": 1.7235713046070935e-06,
"loss": 0.8664177656173706,
"step": 1139
},
{
"epoch": 1.6426512968299711,
"grad_norm": 0.8845809102058411,
"learning_rate": 1.7102167189857255e-06,
"loss": 0.7293643951416016,
"step": 1140
},
{
"epoch": 1.6440922190201728,
"grad_norm": 0.7609624266624451,
"learning_rate": 1.6969092323871195e-06,
"loss": 0.5892981886863708,
"step": 1141
},
{
"epoch": 1.6455331412103746,
"grad_norm": 0.7141941785812378,
"learning_rate": 1.6836489204187511e-06,
"loss": 0.6005899906158447,
"step": 1142
},
{
"epoch": 1.6469740634005765,
"grad_norm": 0.8074778318405151,
"learning_rate": 1.6704358584200809e-06,
"loss": 0.7091890573501587,
"step": 1143
},
{
"epoch": 1.6484149855907781,
"grad_norm": 0.7607950568199158,
"learning_rate": 1.6572701214621013e-06,
"loss": 0.6214733719825745,
"step": 1144
},
{
"epoch": 1.6498559077809798,
"grad_norm": 0.8153498768806458,
"learning_rate": 1.6441517843469302e-06,
"loss": 0.6415261030197144,
"step": 1145
},
{
"epoch": 1.6512968299711814,
"grad_norm": 0.7986946105957031,
"learning_rate": 1.631080921607383e-06,
"loss": 0.6604084968566895,
"step": 1146
},
{
"epoch": 1.6527377521613833,
"grad_norm": 0.8780381083488464,
"learning_rate": 1.6180576075065412e-06,
"loss": 0.6568028330802917,
"step": 1147
},
{
"epoch": 1.6541786743515852,
"grad_norm": 0.7540323138237,
"learning_rate": 1.6050819160373331e-06,
"loss": 0.7073581218719482,
"step": 1148
},
{
"epoch": 1.6556195965417868,
"grad_norm": 0.7875475883483887,
"learning_rate": 1.5921539209221238e-06,
"loss": 0.7660901546478271,
"step": 1149
},
{
"epoch": 1.6570605187319885,
"grad_norm": 0.7967971563339233,
"learning_rate": 1.5792736956122801e-06,
"loss": 0.6941828727722168,
"step": 1150
},
{
"epoch": 1.65850144092219,
"grad_norm": 0.804412305355072,
"learning_rate": 1.566441313287762e-06,
"loss": 0.7443846464157104,
"step": 1151
},
{
"epoch": 1.659942363112392,
"grad_norm": 0.7768911719322205,
"learning_rate": 1.5536568468567126e-06,
"loss": 0.783721923828125,
"step": 1152
},
{
"epoch": 1.6613832853025938,
"grad_norm": 0.8279328942298889,
"learning_rate": 1.5409203689550313e-06,
"loss": 0.7449991703033447,
"step": 1153
},
{
"epoch": 1.6628242074927955,
"grad_norm": 0.7922146320343018,
"learning_rate": 1.5282319519459643e-06,
"loss": 0.7389559745788574,
"step": 1154
},
{
"epoch": 1.6642651296829971,
"grad_norm": 0.8328949213027954,
"learning_rate": 1.5155916679197057e-06,
"loss": 0.7289194464683533,
"step": 1155
},
{
"epoch": 1.6657060518731988,
"grad_norm": 0.782349705696106,
"learning_rate": 1.5029995886929717e-06,
"loss": 0.8883833885192871,
"step": 1156
},
{
"epoch": 1.6671469740634006,
"grad_norm": 0.8573653101921082,
"learning_rate": 1.4904557858085967e-06,
"loss": 0.680975079536438,
"step": 1157
},
{
"epoch": 1.6685878962536023,
"grad_norm": 1.300133466720581,
"learning_rate": 1.4779603305351397e-06,
"loss": 0.6703798770904541,
"step": 1158
},
{
"epoch": 1.6700288184438041,
"grad_norm": 0.8434372544288635,
"learning_rate": 1.4655132938664607e-06,
"loss": 0.6783407926559448,
"step": 1159
},
{
"epoch": 1.6714697406340058,
"grad_norm": 0.7665268182754517,
"learning_rate": 1.4531147465213247e-06,
"loss": 0.7479151487350464,
"step": 1160
},
{
"epoch": 1.6729106628242074,
"grad_norm": 0.7832397818565369,
"learning_rate": 1.4407647589430084e-06,
"loss": 0.8307238817214966,
"step": 1161
},
{
"epoch": 1.674351585014409,
"grad_norm": 0.803061306476593,
"learning_rate": 1.4284634012988886e-06,
"loss": 0.719711422920227,
"step": 1162
},
{
"epoch": 1.675792507204611,
"grad_norm": 0.786881685256958,
"learning_rate": 1.4162107434800422e-06,
"loss": 0.7440253496170044,
"step": 1163
},
{
"epoch": 1.6772334293948128,
"grad_norm": 0.8766026496887207,
"learning_rate": 1.4040068551008658e-06,
"loss": 0.7081141471862793,
"step": 1164
},
{
"epoch": 1.6786743515850144,
"grad_norm": 0.7960566878318787,
"learning_rate": 1.3918518054986607e-06,
"loss": 0.5892655253410339,
"step": 1165
},
{
"epoch": 1.680115273775216,
"grad_norm": 0.7898479104042053,
"learning_rate": 1.3797456637332451e-06,
"loss": 0.7909804582595825,
"step": 1166
},
{
"epoch": 1.6815561959654177,
"grad_norm": 0.805218517780304,
"learning_rate": 1.3676884985865735e-06,
"loss": 0.773381233215332,
"step": 1167
},
{
"epoch": 1.6829971181556196,
"grad_norm": 0.8499282598495483,
"learning_rate": 1.3556803785623274e-06,
"loss": 0.7352174520492554,
"step": 1168
},
{
"epoch": 1.6844380403458215,
"grad_norm": 0.7760320901870728,
"learning_rate": 1.3437213718855347e-06,
"loss": 0.7122522592544556,
"step": 1169
},
{
"epoch": 1.685878962536023,
"grad_norm": 0.9259606003761292,
"learning_rate": 1.3318115465021896e-06,
"loss": 0.7463353872299194,
"step": 1170
},
{
"epoch": 1.6873198847262247,
"grad_norm": 0.7640926837921143,
"learning_rate": 1.3199509700788527e-06,
"loss": 0.751839280128479,
"step": 1171
},
{
"epoch": 1.6887608069164264,
"grad_norm": 0.6983827948570251,
"learning_rate": 1.3081397100022718e-06,
"loss": 0.6213783025741577,
"step": 1172
},
{
"epoch": 1.6902017291066282,
"grad_norm": 0.7470341920852661,
"learning_rate": 1.2963778333790067e-06,
"loss": 0.7670629024505615,
"step": 1173
},
{
"epoch": 1.6916426512968301,
"grad_norm": 0.8287291526794434,
"learning_rate": 1.2846654070350372e-06,
"loss": 0.6274840831756592,
"step": 1174
},
{
"epoch": 1.6930835734870318,
"grad_norm": 0.8524205684661865,
"learning_rate": 1.2730024975153854e-06,
"loss": 0.7239786386489868,
"step": 1175
},
{
"epoch": 1.6945244956772334,
"grad_norm": 0.844444215297699,
"learning_rate": 1.2613891710837467e-06,
"loss": 0.7526017427444458,
"step": 1176
},
{
"epoch": 1.695965417867435,
"grad_norm": 0.9229111671447754,
"learning_rate": 1.249825493722101e-06,
"loss": 0.707030177116394,
"step": 1177
},
{
"epoch": 1.697406340057637,
"grad_norm": 0.7492605447769165,
"learning_rate": 1.2383115311303417e-06,
"loss": 0.7857674360275269,
"step": 1178
},
{
"epoch": 1.6988472622478388,
"grad_norm": 0.8065267205238342,
"learning_rate": 1.2268473487259124e-06,
"loss": 0.7206966876983643,
"step": 1179
},
{
"epoch": 1.7002881844380404,
"grad_norm": 0.6866024136543274,
"learning_rate": 1.2154330116434188e-06,
"loss": 0.6249977350234985,
"step": 1180
},
{
"epoch": 1.701729106628242,
"grad_norm": 0.7971569299697876,
"learning_rate": 1.204068584734267e-06,
"loss": 0.6811063289642334,
"step": 1181
},
{
"epoch": 1.7031700288184437,
"grad_norm": 0.8167104721069336,
"learning_rate": 1.1927541325663018e-06,
"loss": 0.6600002646446228,
"step": 1182
},
{
"epoch": 1.7046109510086456,
"grad_norm": 0.7842592000961304,
"learning_rate": 1.1814897194234253e-06,
"loss": 0.6734592914581299,
"step": 1183
},
{
"epoch": 1.7060518731988472,
"grad_norm": 0.8304277658462524,
"learning_rate": 1.1702754093052415e-06,
"loss": 0.7508211135864258,
"step": 1184
},
{
"epoch": 1.707492795389049,
"grad_norm": 0.813774049282074,
"learning_rate": 1.1591112659266934e-06,
"loss": 0.7605842351913452,
"step": 1185
},
{
"epoch": 1.7089337175792507,
"grad_norm": 0.7906317114830017,
"learning_rate": 1.1479973527176935e-06,
"loss": 0.7054247260093689,
"step": 1186
},
{
"epoch": 1.7103746397694524,
"grad_norm": 0.7082518935203552,
"learning_rate": 1.136933732822768e-06,
"loss": 0.6379563212394714,
"step": 1187
},
{
"epoch": 1.7118155619596542,
"grad_norm": 0.7211143970489502,
"learning_rate": 1.125920469100704e-06,
"loss": 0.5920464992523193,
"step": 1188
},
{
"epoch": 1.7132564841498559,
"grad_norm": 0.7928754091262817,
"learning_rate": 1.1149576241241788e-06,
"loss": 0.6615055799484253,
"step": 1189
},
{
"epoch": 1.7146974063400577,
"grad_norm": 0.6627988219261169,
"learning_rate": 1.104045260179415e-06,
"loss": 0.6191346645355225,
"step": 1190
},
{
"epoch": 1.7161383285302594,
"grad_norm": 0.8548330664634705,
"learning_rate": 1.0931834392658213e-06,
"loss": 0.6938613653182983,
"step": 1191
},
{
"epoch": 1.717579250720461,
"grad_norm": 0.806582510471344,
"learning_rate": 1.082372223095647e-06,
"loss": 0.7682313323020935,
"step": 1192
},
{
"epoch": 1.7190201729106627,
"grad_norm": 0.7843705415725708,
"learning_rate": 1.0716116730936254e-06,
"loss": 0.6646119356155396,
"step": 1193
},
{
"epoch": 1.7204610951008645,
"grad_norm": 0.8586521744728088,
"learning_rate": 1.0609018503966207e-06,
"loss": 0.8926165103912354,
"step": 1194
},
{
"epoch": 1.7219020172910664,
"grad_norm": 0.8918136954307556,
"learning_rate": 1.0502428158532952e-06,
"loss": 0.7644927501678467,
"step": 1195
},
{
"epoch": 1.723342939481268,
"grad_norm": 0.7381912469863892,
"learning_rate": 1.039634630023747e-06,
"loss": 0.8272491693496704,
"step": 1196
},
{
"epoch": 1.7247838616714697,
"grad_norm": 0.7400140166282654,
"learning_rate": 1.0290773531791743e-06,
"loss": 0.706849992275238,
"step": 1197
},
{
"epoch": 1.7262247838616713,
"grad_norm": 0.8239127397537231,
"learning_rate": 1.0185710453015374e-06,
"loss": 0.7492132186889648,
"step": 1198
},
{
"epoch": 1.7276657060518732,
"grad_norm": 0.7474268078804016,
"learning_rate": 1.0081157660832086e-06,
"loss": 0.6511049270629883,
"step": 1199
},
{
"epoch": 1.729106628242075,
"grad_norm": 0.8263754844665527,
"learning_rate": 9.977115749266331e-07,
"loss": 0.6856651902198792,
"step": 1200
},
{
"epoch": 1.7305475504322767,
"grad_norm": 0.8760755658149719,
"learning_rate": 9.87358530944006e-07,
"loss": 0.8096874356269836,
"step": 1201
},
{
"epoch": 1.7319884726224783,
"grad_norm": 0.7762811779975891,
"learning_rate": 9.77056692956916e-07,
"loss": 0.7226123809814453,
"step": 1202
},
{
"epoch": 1.73342939481268,
"grad_norm": 0.9807335138320923,
"learning_rate": 9.668061194960255e-07,
"loss": 0.76618891954422,
"step": 1203
},
{
"epoch": 1.7348703170028819,
"grad_norm": 0.8935138583183289,
"learning_rate": 9.566068688007346e-07,
"loss": 0.6808332204818726,
"step": 1204
},
{
"epoch": 1.7363112391930837,
"grad_norm": 0.786347508430481,
"learning_rate": 9.464589988188466e-07,
"loss": 0.758068859577179,
"step": 1205
},
{
"epoch": 1.7377521613832854,
"grad_norm": 0.9107068181037903,
"learning_rate": 9.363625672062427e-07,
"loss": 0.6974388957023621,
"step": 1206
},
{
"epoch": 1.739193083573487,
"grad_norm": 0.8915838599205017,
"learning_rate": 9.263176313265521e-07,
"loss": 0.7282131314277649,
"step": 1207
},
{
"epoch": 1.7406340057636887,
"grad_norm": 0.8899832963943481,
"learning_rate": 9.163242482508306e-07,
"loss": 0.7565990686416626,
"step": 1208
},
{
"epoch": 1.7420749279538905,
"grad_norm": 0.7595863342285156,
"learning_rate": 9.06382474757228e-07,
"loss": 0.6799623370170593,
"step": 1209
},
{
"epoch": 1.7435158501440924,
"grad_norm": 0.7380494475364685,
"learning_rate": 8.964923673306725e-07,
"loss": 0.6662101149559021,
"step": 1210
},
{
"epoch": 1.744956772334294,
"grad_norm": 0.7743575572967529,
"learning_rate": 8.866539821625519e-07,
"loss": 0.8160727620124817,
"step": 1211
},
{
"epoch": 1.7463976945244957,
"grad_norm": 0.8004354238510132,
"learning_rate": 8.76867375150382e-07,
"loss": 0.7559400796890259,
"step": 1212
},
{
"epoch": 1.7478386167146973,
"grad_norm": 0.7768638730049133,
"learning_rate": 8.671326018975024e-07,
"loss": 0.705933690071106,
"step": 1213
},
{
"epoch": 1.7492795389048992,
"grad_norm": 0.8964418172836304,
"learning_rate": 8.574497177127561e-07,
"loss": 0.7099254131317139,
"step": 1214
},
{
"epoch": 1.7507204610951008,
"grad_norm": 0.7826142907142639,
"learning_rate": 8.47818777610172e-07,
"loss": 0.6332671046257019,
"step": 1215
},
{
"epoch": 1.7521613832853027,
"grad_norm": 0.860312819480896,
"learning_rate": 8.38239836308653e-07,
"loss": 0.8072866201400757,
"step": 1216
},
{
"epoch": 1.7536023054755043,
"grad_norm": 0.7804715037345886,
"learning_rate": 8.287129482316725e-07,
"loss": 0.6959720849990845,
"step": 1217
},
{
"epoch": 1.755043227665706,
"grad_norm": 0.865706205368042,
"learning_rate": 8.192381675069561e-07,
"loss": 0.7419095039367676,
"step": 1218
},
{
"epoch": 1.7564841498559076,
"grad_norm": 0.7246455550193787,
"learning_rate": 8.098155479661751e-07,
"loss": 0.6388202905654907,
"step": 1219
},
{
"epoch": 1.7579250720461095,
"grad_norm": 0.7382645606994629,
"learning_rate": 8.004451431446503e-07,
"loss": 0.7727887034416199,
"step": 1220
},
{
"epoch": 1.7593659942363113,
"grad_norm": 0.6949777603149414,
"learning_rate": 7.911270062810338e-07,
"loss": 0.6242851614952087,
"step": 1221
},
{
"epoch": 1.760806916426513,
"grad_norm": 0.7804027795791626,
"learning_rate": 7.818611903170159e-07,
"loss": 0.7478713989257812,
"step": 1222
},
{
"epoch": 1.7622478386167146,
"grad_norm": 0.9338253736495972,
"learning_rate": 7.72647747897024e-07,
"loss": 0.767483115196228,
"step": 1223
},
{
"epoch": 1.7636887608069163,
"grad_norm": 0.8870203495025635,
"learning_rate": 7.634867313679172e-07,
"loss": 0.7354853749275208,
"step": 1224
},
{
"epoch": 1.7651296829971181,
"grad_norm": 0.8656888604164124,
"learning_rate": 7.543781927786953e-07,
"loss": 0.8114850521087646,
"step": 1225
},
{
"epoch": 1.76657060518732,
"grad_norm": 0.7293028831481934,
"learning_rate": 7.453221838802027e-07,
"loss": 0.676995038986206,
"step": 1226
},
{
"epoch": 1.7680115273775217,
"grad_norm": 0.8250572085380554,
"learning_rate": 7.363187561248275e-07,
"loss": 0.7541552782058716,
"step": 1227
},
{
"epoch": 1.7694524495677233,
"grad_norm": 0.7912562489509583,
"learning_rate": 7.273679606662166e-07,
"loss": 0.7518225908279419,
"step": 1228
},
{
"epoch": 1.770893371757925,
"grad_norm": 0.9369992017745972,
"learning_rate": 7.184698483589858e-07,
"loss": 0.6083760261535645,
"step": 1229
},
{
"epoch": 1.7723342939481268,
"grad_norm": 0.7918375134468079,
"learning_rate": 7.096244697584221e-07,
"loss": 0.7271479964256287,
"step": 1230
},
{
"epoch": 1.7737752161383287,
"grad_norm": 0.804225742816925,
"learning_rate": 7.008318751202048e-07,
"loss": 0.7801766991615295,
"step": 1231
},
{
"epoch": 1.7752161383285303,
"grad_norm": 0.8039699196815491,
"learning_rate": 6.92092114400118e-07,
"loss": 0.8230787515640259,
"step": 1232
},
{
"epoch": 1.776657060518732,
"grad_norm": 0.7346729636192322,
"learning_rate": 6.834052372537658e-07,
"loss": 0.6628938913345337,
"step": 1233
},
{
"epoch": 1.7780979827089336,
"grad_norm": 0.7379924654960632,
"learning_rate": 6.747712930362848e-07,
"loss": 0.6801489591598511,
"step": 1234
},
{
"epoch": 1.7795389048991355,
"grad_norm": 0.8555311560630798,
"learning_rate": 6.661903308020801e-07,
"loss": 0.7245817184448242,
"step": 1235
},
{
"epoch": 1.7809798270893373,
"grad_norm": 0.8269429802894592,
"learning_rate": 6.57662399304525e-07,
"loss": 0.632072925567627,
"step": 1236
},
{
"epoch": 1.782420749279539,
"grad_norm": 0.6983954906463623,
"learning_rate": 6.491875469956998e-07,
"loss": 0.6843686699867249,
"step": 1237
},
{
"epoch": 1.7838616714697406,
"grad_norm": 0.8060896992683411,
"learning_rate": 6.407658220261126e-07,
"loss": 0.65267014503479,
"step": 1238
},
{
"epoch": 1.7853025936599423,
"grad_norm": 0.8000738024711609,
"learning_rate": 6.323972722444215e-07,
"loss": 0.6732505559921265,
"step": 1239
},
{
"epoch": 1.7867435158501441,
"grad_norm": 0.7847197651863098,
"learning_rate": 6.240819451971658e-07,
"loss": 0.6458503603935242,
"step": 1240
},
{
"epoch": 1.7881844380403458,
"grad_norm": 0.9293048977851868,
"learning_rate": 6.158198881284994e-07,
"loss": 0.8058085441589355,
"step": 1241
},
{
"epoch": 1.7896253602305476,
"grad_norm": 0.8233553171157837,
"learning_rate": 6.076111479799162e-07,
"loss": 0.7245039343833923,
"step": 1242
},
{
"epoch": 1.7910662824207493,
"grad_norm": 0.8260737061500549,
"learning_rate": 5.994557713899829e-07,
"loss": 0.6901232004165649,
"step": 1243
},
{
"epoch": 1.792507204610951,
"grad_norm": 0.9803736209869385,
"learning_rate": 5.913538046940859e-07,
"loss": 0.6991169452667236,
"step": 1244
},
{
"epoch": 1.7939481268011528,
"grad_norm": 0.7552391290664673,
"learning_rate": 5.833052939241513e-07,
"loss": 0.7670243978500366,
"step": 1245
},
{
"epoch": 1.7953890489913544,
"grad_norm": 0.8138408660888672,
"learning_rate": 5.753102848083924e-07,
"loss": 0.5832873582839966,
"step": 1246
},
{
"epoch": 1.7968299711815563,
"grad_norm": 0.7978786826133728,
"learning_rate": 5.673688227710539e-07,
"loss": 0.682740330696106,
"step": 1247
},
{
"epoch": 1.798270893371758,
"grad_norm": 0.7594887018203735,
"learning_rate": 5.594809529321443e-07,
"loss": 0.6237415671348572,
"step": 1248
},
{
"epoch": 1.7997118155619596,
"grad_norm": 0.9314215779304504,
"learning_rate": 5.516467201071829e-07,
"loss": 0.6325702667236328,
"step": 1249
},
{
"epoch": 1.8011527377521612,
"grad_norm": 0.8046096563339233,
"learning_rate": 5.438661688069513e-07,
"loss": 0.6462284326553345,
"step": 1250
},
{
"epoch": 1.802593659942363,
"grad_norm": 0.8647462129592896,
"learning_rate": 5.361393432372319e-07,
"loss": 0.7181062698364258,
"step": 1251
},
{
"epoch": 1.804034582132565,
"grad_norm": 0.8170526027679443,
"learning_rate": 5.284662872985602e-07,
"loss": 0.7745201587677002,
"step": 1252
},
{
"epoch": 1.8054755043227666,
"grad_norm": 0.9152999520301819,
"learning_rate": 5.208470445859782e-07,
"loss": 0.8000661134719849,
"step": 1253
},
{
"epoch": 1.8069164265129682,
"grad_norm": 0.8332073092460632,
"learning_rate": 5.132816583887812e-07,
"loss": 0.7485237121582031,
"step": 1254
},
{
"epoch": 1.8083573487031699,
"grad_norm": 0.7533081769943237,
"learning_rate": 5.057701716902764e-07,
"loss": 0.6614536046981812,
"step": 1255
},
{
"epoch": 1.8097982708933718,
"grad_norm": 0.7314124703407288,
"learning_rate": 4.983126271675354e-07,
"loss": 0.5718865394592285,
"step": 1256
},
{
"epoch": 1.8112391930835736,
"grad_norm": 0.7745943665504456,
"learning_rate": 4.909090671911554e-07,
"loss": 0.6789857149124146,
"step": 1257
},
{
"epoch": 1.8126801152737753,
"grad_norm": 0.9485689401626587,
"learning_rate": 4.835595338250155e-07,
"loss": 0.7332895398139954,
"step": 1258
},
{
"epoch": 1.814121037463977,
"grad_norm": 0.7740848660469055,
"learning_rate": 4.762640688260356e-07,
"loss": 0.7217935919761658,
"step": 1259
},
{
"epoch": 1.8155619596541785,
"grad_norm": 0.7472824454307556,
"learning_rate": 4.690227136439496e-07,
"loss": 0.7193496227264404,
"step": 1260
},
{
"epoch": 1.8170028818443804,
"grad_norm": 0.8185622692108154,
"learning_rate": 4.618355094210547e-07,
"loss": 0.7183260917663574,
"step": 1261
},
{
"epoch": 1.8184438040345823,
"grad_norm": 0.6518324017524719,
"learning_rate": 4.5470249699198667e-07,
"loss": 0.5979580879211426,
"step": 1262
},
{
"epoch": 1.819884726224784,
"grad_norm": 0.8009700179100037,
"learning_rate": 4.476237168834929e-07,
"loss": 0.9154891967773438,
"step": 1263
},
{
"epoch": 1.8213256484149856,
"grad_norm": 0.814588189125061,
"learning_rate": 4.4059920931418866e-07,
"loss": 0.7386868000030518,
"step": 1264
},
{
"epoch": 1.8227665706051872,
"grad_norm": 0.754295289516449,
"learning_rate": 4.336290141943367e-07,
"loss": 0.8517154455184937,
"step": 1265
},
{
"epoch": 1.824207492795389,
"grad_norm": 0.9901515245437622,
"learning_rate": 4.267131711256245e-07,
"loss": 0.8675104379653931,
"step": 1266
},
{
"epoch": 1.825648414985591,
"grad_norm": 0.815948486328125,
"learning_rate": 4.1985171940092884e-07,
"loss": 0.7671102285385132,
"step": 1267
},
{
"epoch": 1.8270893371757926,
"grad_norm": 0.7330636978149414,
"learning_rate": 4.130446980041003e-07,
"loss": 0.6249233484268188,
"step": 1268
},
{
"epoch": 1.8285302593659942,
"grad_norm": 0.8817590475082397,
"learning_rate": 4.0629214560973907e-07,
"loss": 0.7181810140609741,
"step": 1269
},
{
"epoch": 1.8299711815561959,
"grad_norm": 0.7729085087776184,
"learning_rate": 3.995941005829773e-07,
"loss": 0.5876595973968506,
"step": 1270
},
{
"epoch": 1.8314121037463977,
"grad_norm": 0.7303892970085144,
"learning_rate": 3.92950600979255e-07,
"loss": 0.6904604434967041,
"step": 1271
},
{
"epoch": 1.8328530259365994,
"grad_norm": 0.8344207406044006,
"learning_rate": 3.863616845441154e-07,
"loss": 0.7644379138946533,
"step": 1272
},
{
"epoch": 1.8342939481268012,
"grad_norm": 0.7694862484931946,
"learning_rate": 3.798273887129755e-07,
"loss": 0.6588039398193359,
"step": 1273
},
{
"epoch": 1.8357348703170029,
"grad_norm": 0.788310170173645,
"learning_rate": 3.733477506109262e-07,
"loss": 0.708280086517334,
"step": 1274
},
{
"epoch": 1.8371757925072045,
"grad_norm": 0.67696613073349,
"learning_rate": 3.669228070525177e-07,
"loss": 0.6759251356124878,
"step": 1275
},
{
"epoch": 1.8386167146974062,
"grad_norm": 0.8932890295982361,
"learning_rate": 3.6055259454154334e-07,
"loss": 0.8551055788993835,
"step": 1276
},
{
"epoch": 1.840057636887608,
"grad_norm": 0.8879607915878296,
"learning_rate": 3.5423714927084186e-07,
"loss": 0.8530901670455933,
"step": 1277
},
{
"epoch": 1.84149855907781,
"grad_norm": 0.7950157523155212,
"learning_rate": 3.4797650712208863e-07,
"loss": 0.5972962379455566,
"step": 1278
},
{
"epoch": 1.8429394812680115,
"grad_norm": 0.9025905728340149,
"learning_rate": 3.417707036655882e-07,
"loss": 0.7006347179412842,
"step": 1279
},
{
"epoch": 1.8443804034582132,
"grad_norm": 0.7743578553199768,
"learning_rate": 3.356197741600753e-07,
"loss": 0.7719041705131531,
"step": 1280
},
{
"epoch": 1.8458213256484148,
"grad_norm": 0.8957917094230652,
"learning_rate": 3.2952375355251865e-07,
"loss": 0.7189593315124512,
"step": 1281
},
{
"epoch": 1.8472622478386167,
"grad_norm": 0.7992256283760071,
"learning_rate": 3.234826764779131e-07,
"loss": 0.6644465923309326,
"step": 1282
},
{
"epoch": 1.8487031700288186,
"grad_norm": 0.7817349433898926,
"learning_rate": 3.174965772590866e-07,
"loss": 0.7548238039016724,
"step": 1283
},
{
"epoch": 1.8501440922190202,
"grad_norm": 0.8072176575660706,
"learning_rate": 3.1156548990651237e-07,
"loss": 0.6053937673568726,
"step": 1284
},
{
"epoch": 1.8515850144092219,
"grad_norm": 1.0495251417160034,
"learning_rate": 3.0568944811810497e-07,
"loss": 0.6877189874649048,
"step": 1285
},
{
"epoch": 1.8530259365994235,
"grad_norm": 0.8143266439437866,
"learning_rate": 2.9986848527903347e-07,
"loss": 0.8271951675415039,
"step": 1286
},
{
"epoch": 1.8544668587896254,
"grad_norm": 0.8350967764854431,
"learning_rate": 2.9410263446153385e-07,
"loss": 0.6571352481842041,
"step": 1287
},
{
"epoch": 1.8559077809798272,
"grad_norm": 0.9221143126487732,
"learning_rate": 2.8839192842471943e-07,
"loss": 0.743099570274353,
"step": 1288
},
{
"epoch": 1.8573487031700289,
"grad_norm": 0.7692269682884216,
"learning_rate": 2.827363996143895e-07,
"loss": 0.7288011312484741,
"step": 1289
},
{
"epoch": 1.8587896253602305,
"grad_norm": 0.8214716911315918,
"learning_rate": 2.771360801628575e-07,
"loss": 0.6358669996261597,
"step": 1290
},
{
"epoch": 1.8602305475504322,
"grad_norm": 0.7842360734939575,
"learning_rate": 2.7159100188875355e-07,
"loss": 0.7078214883804321,
"step": 1291
},
{
"epoch": 1.861671469740634,
"grad_norm": 0.7359623312950134,
"learning_rate": 2.6610119629685517e-07,
"loss": 0.6269357204437256,
"step": 1292
},
{
"epoch": 1.8631123919308359,
"grad_norm": 0.8186429142951965,
"learning_rate": 2.606666945779024e-07,
"loss": 0.7744366526603699,
"step": 1293
},
{
"epoch": 1.8645533141210375,
"grad_norm": 0.5552643537521362,
"learning_rate": 2.552875276084232e-07,
"loss": 0.4451786279678345,
"step": 1294
},
{
"epoch": 1.8659942363112392,
"grad_norm": 0.779941976070404,
"learning_rate": 2.4996372595055605e-07,
"loss": 0.7336533069610596,
"step": 1295
},
{
"epoch": 1.8674351585014408,
"grad_norm": 0.9156535863876343,
"learning_rate": 2.446953198518776e-07,
"loss": 0.7989022135734558,
"step": 1296
},
{
"epoch": 1.8688760806916427,
"grad_norm": 0.806794285774231,
"learning_rate": 2.394823392452306e-07,
"loss": 0.7164968252182007,
"step": 1297
},
{
"epoch": 1.8703170028818443,
"grad_norm": 0.793441116809845,
"learning_rate": 2.343248137485532e-07,
"loss": 0.6987060308456421,
"step": 1298
},
{
"epoch": 1.8717579250720462,
"grad_norm": 0.7144315838813782,
"learning_rate": 2.2922277266471226e-07,
"loss": 0.683693528175354,
"step": 1299
},
{
"epoch": 1.8731988472622478,
"grad_norm": 0.788743257522583,
"learning_rate": 2.241762449813345e-07,
"loss": 0.7102502584457397,
"step": 1300
},
{
"epoch": 1.8746397694524495,
"grad_norm": 0.955685555934906,
"learning_rate": 2.191852593706456e-07,
"loss": 0.6945414543151855,
"step": 1301
},
{
"epoch": 1.8760806916426513,
"grad_norm": 0.8227760195732117,
"learning_rate": 2.142498441893004e-07,
"loss": 0.7690091133117676,
"step": 1302
},
{
"epoch": 1.877521613832853,
"grad_norm": 0.9441574811935425,
"learning_rate": 2.0937002747823067e-07,
"loss": 0.7788715362548828,
"step": 1303
},
{
"epoch": 1.8789625360230549,
"grad_norm": 0.7637494802474976,
"learning_rate": 2.0454583696247864e-07,
"loss": 0.6455879807472229,
"step": 1304
},
{
"epoch": 1.8804034582132565,
"grad_norm": 0.7609235644340515,
"learning_rate": 1.997773000510428e-07,
"loss": 0.7027079463005066,
"step": 1305
},
{
"epoch": 1.8818443804034581,
"grad_norm": 0.8110032677650452,
"learning_rate": 1.9506444383672328e-07,
"loss": 0.6902576684951782,
"step": 1306
},
{
"epoch": 1.8832853025936598,
"grad_norm": 0.8210185766220093,
"learning_rate": 1.9040729509596235e-07,
"loss": 0.6750905513763428,
"step": 1307
},
{
"epoch": 1.8847262247838616,
"grad_norm": 0.965930163860321,
"learning_rate": 1.8580588028869972e-07,
"loss": 0.6262344121932983,
"step": 1308
},
{
"epoch": 1.8861671469740635,
"grad_norm": 0.7677191495895386,
"learning_rate": 1.8126022555821742e-07,
"loss": 0.6965380907058716,
"step": 1309
},
{
"epoch": 1.8876080691642652,
"grad_norm": 0.795842170715332,
"learning_rate": 1.7677035673099196e-07,
"loss": 0.6852426528930664,
"step": 1310
},
{
"epoch": 1.8890489913544668,
"grad_norm": 0.829835832118988,
"learning_rate": 1.7233629931654782e-07,
"loss": 0.7562562227249146,
"step": 1311
},
{
"epoch": 1.8904899135446684,
"grad_norm": 0.8017676472663879,
"learning_rate": 1.6795807850731428e-07,
"loss": 0.6990101337432861,
"step": 1312
},
{
"epoch": 1.8919308357348703,
"grad_norm": 0.8548034429550171,
"learning_rate": 1.6363571917847875e-07,
"loss": 0.7861789464950562,
"step": 1313
},
{
"epoch": 1.8933717579250722,
"grad_norm": 0.8647658228874207,
"learning_rate": 1.593692458878482e-07,
"loss": 0.8342926502227783,
"step": 1314
},
{
"epoch": 1.8948126801152738,
"grad_norm": 0.8461260795593262,
"learning_rate": 1.5515868287571124e-07,
"loss": 0.7062436938285828,
"step": 1315
},
{
"epoch": 1.8962536023054755,
"grad_norm": 0.7481532096862793,
"learning_rate": 1.5100405406469508e-07,
"loss": 0.8491038084030151,
"step": 1316
},
{
"epoch": 1.897694524495677,
"grad_norm": 0.846656084060669,
"learning_rate": 1.469053830596323e-07,
"loss": 0.7018347978591919,
"step": 1317
},
{
"epoch": 1.899135446685879,
"grad_norm": 0.8599597811698914,
"learning_rate": 1.4286269314743085e-07,
"loss": 0.729584813117981,
"step": 1318
},
{
"epoch": 1.9005763688760808,
"grad_norm": 0.7672188878059387,
"learning_rate": 1.3887600729693307e-07,
"loss": 0.6782927513122559,
"step": 1319
},
{
"epoch": 1.9020172910662825,
"grad_norm": 0.6663503050804138,
"learning_rate": 1.3494534815879257e-07,
"loss": 0.6440367102622986,
"step": 1320
},
{
"epoch": 1.9034582132564841,
"grad_norm": 0.824517011642456,
"learning_rate": 1.310707380653442e-07,
"loss": 0.6864629983901978,
"step": 1321
},
{
"epoch": 1.9048991354466858,
"grad_norm": 0.7598153948783875,
"learning_rate": 1.2725219903047425e-07,
"loss": 0.7515442371368408,
"step": 1322
},
{
"epoch": 1.9063400576368876,
"grad_norm": 0.9752185940742493,
"learning_rate": 1.2348975274949605e-07,
"loss": 0.8176276683807373,
"step": 1323
},
{
"epoch": 1.9077809798270895,
"grad_norm": 0.7803353667259216,
"learning_rate": 1.1978342059902892e-07,
"loss": 0.6281642913818359,
"step": 1324
},
{
"epoch": 1.9092219020172911,
"grad_norm": 0.8944565057754517,
"learning_rate": 1.161332236368784e-07,
"loss": 0.7009260654449463,
"step": 1325
},
{
"epoch": 1.9106628242074928,
"grad_norm": 0.7747707366943359,
"learning_rate": 1.1253918260190844e-07,
"loss": 0.7126386761665344,
"step": 1326
},
{
"epoch": 1.9121037463976944,
"grad_norm": 0.7823174595832825,
"learning_rate": 1.0900131791393265e-07,
"loss": 0.688586950302124,
"step": 1327
},
{
"epoch": 1.9135446685878963,
"grad_norm": 0.8464524149894714,
"learning_rate": 1.0551964967359441e-07,
"loss": 0.8571232557296753,
"step": 1328
},
{
"epoch": 1.914985590778098,
"grad_norm": 0.8994123935699463,
"learning_rate": 1.0209419766225026e-07,
"loss": 0.819856584072113,
"step": 1329
},
{
"epoch": 1.9164265129682998,
"grad_norm": 0.7324085831642151,
"learning_rate": 9.872498134186115e-08,
"loss": 0.6851845979690552,
"step": 1330
},
{
"epoch": 1.9178674351585014,
"grad_norm": 0.798224925994873,
"learning_rate": 9.541201985488358e-08,
"loss": 0.7378122210502625,
"step": 1331
},
{
"epoch": 1.919308357348703,
"grad_norm": 0.7607042789459229,
"learning_rate": 9.215533202415306e-08,
"loss": 0.7003333568572998,
"step": 1332
},
{
"epoch": 1.920749279538905,
"grad_norm": 0.8446595072746277,
"learning_rate": 8.89549363527864e-08,
"loss": 0.6810410022735596,
"step": 1333
},
{
"epoch": 1.9221902017291066,
"grad_norm": 0.7857621312141418,
"learning_rate": 8.581085102407072e-08,
"loss": 0.6305010318756104,
"step": 1334
},
{
"epoch": 1.9236311239193085,
"grad_norm": 0.8723453879356384,
"learning_rate": 8.272309390136013e-08,
"loss": 0.6027143597602844,
"step": 1335
},
{
"epoch": 1.92507204610951,
"grad_norm": 0.8410208821296692,
"learning_rate": 7.96916825279781e-08,
"loss": 0.7244712114334106,
"step": 1336
},
{
"epoch": 1.9265129682997117,
"grad_norm": 0.8330152034759521,
"learning_rate": 7.671663412711527e-08,
"loss": 0.810680627822876,
"step": 1337
},
{
"epoch": 1.9279538904899134,
"grad_norm": 0.7898954749107361,
"learning_rate": 7.37979656017318e-08,
"loss": 0.759876012802124,
"step": 1338
},
{
"epoch": 1.9293948126801153,
"grad_norm": 0.7904804348945618,
"learning_rate": 7.09356935344585e-08,
"loss": 0.6283839344978333,
"step": 1339
},
{
"epoch": 1.9308357348703171,
"grad_norm": 0.7238035202026367,
"learning_rate": 6.812983418750917e-08,
"loss": 0.6455138921737671,
"step": 1340
},
{
"epoch": 1.9322766570605188,
"grad_norm": 0.8438715934753418,
"learning_rate": 6.538040350258401e-08,
"loss": 0.8216662406921387,
"step": 1341
},
{
"epoch": 1.9337175792507204,
"grad_norm": 0.7600931525230408,
"learning_rate": 6.268741710077741e-08,
"loss": 0.8431342840194702,
"step": 1342
},
{
"epoch": 1.935158501440922,
"grad_norm": 0.8402287364006042,
"learning_rate": 6.005089028249366e-08,
"loss": 0.826948881149292,
"step": 1343
},
{
"epoch": 1.936599423631124,
"grad_norm": 0.8917995691299438,
"learning_rate": 5.747083802735587e-08,
"loss": 0.7549749612808228,
"step": 1344
},
{
"epoch": 1.9380403458213258,
"grad_norm": 0.8245948553085327,
"learning_rate": 5.494727499412489e-08,
"loss": 0.6571122407913208,
"step": 1345
},
{
"epoch": 1.9394812680115274,
"grad_norm": 0.7117204070091248,
"learning_rate": 5.2480215520611665e-08,
"loss": 0.7198864817619324,
"step": 1346
},
{
"epoch": 1.940922190201729,
"grad_norm": 0.8619735240936279,
"learning_rate": 5.006967362359949e-08,
"loss": 0.7122694253921509,
"step": 1347
},
{
"epoch": 1.9423631123919307,
"grad_norm": 0.8721776008605957,
"learning_rate": 4.7715662998760735e-08,
"loss": 0.7027156352996826,
"step": 1348
},
{
"epoch": 1.9438040345821326,
"grad_norm": 0.7703850269317627,
"learning_rate": 4.54181970205847e-08,
"loss": 0.6207553148269653,
"step": 1349
},
{
"epoch": 1.9452449567723344,
"grad_norm": 0.875476598739624,
"learning_rate": 4.317728874229321e-08,
"loss": 0.6593065857887268,
"step": 1350
},
{
"epoch": 1.946685878962536,
"grad_norm": 0.784439742565155,
"learning_rate": 4.0992950895776265e-08,
"loss": 0.6175656914710999,
"step": 1351
},
{
"epoch": 1.9481268011527377,
"grad_norm": 0.8737075924873352,
"learning_rate": 3.8865195891512054e-08,
"loss": 0.8658263087272644,
"step": 1352
},
{
"epoch": 1.9495677233429394,
"grad_norm": 0.8800835609436035,
"learning_rate": 3.679403581849927e-08,
"loss": 0.8155512809753418,
"step": 1353
},
{
"epoch": 1.9510086455331412,
"grad_norm": 0.7003129124641418,
"learning_rate": 3.477948244418716e-08,
"loss": 0.6445010900497437,
"step": 1354
},
{
"epoch": 1.952449567723343,
"grad_norm": 0.8185921311378479,
"learning_rate": 3.2821547214413327e-08,
"loss": 0.687210202217102,
"step": 1355
},
{
"epoch": 1.9538904899135447,
"grad_norm": 0.6671662330627441,
"learning_rate": 3.0920241253331596e-08,
"loss": 0.7458325624465942,
"step": 1356
},
{
"epoch": 1.9553314121037464,
"grad_norm": 0.86750727891922,
"learning_rate": 2.9075575363355368e-08,
"loss": 0.7063475847244263,
"step": 1357
},
{
"epoch": 1.956772334293948,
"grad_norm": 0.6927515864372253,
"learning_rate": 2.728756002508881e-08,
"loss": 0.726108968257904,
"step": 1358
},
{
"epoch": 1.95821325648415,
"grad_norm": 0.7644289135932922,
"learning_rate": 2.555620539727799e-08,
"loss": 0.7151061296463013,
"step": 1359
},
{
"epoch": 1.9596541786743515,
"grad_norm": 0.8003210425376892,
"learning_rate": 2.388152131674093e-08,
"loss": 0.7040859460830688,
"step": 1360
},
{
"epoch": 1.9610951008645534,
"grad_norm": 0.9617341160774231,
"learning_rate": 2.2263517298320992e-08,
"loss": 0.6567614078521729,
"step": 1361
},
{
"epoch": 1.962536023054755,
"grad_norm": 0.8938956260681152,
"learning_rate": 2.070220253483024e-08,
"loss": 0.7761929035186768,
"step": 1362
},
{
"epoch": 1.9639769452449567,
"grad_norm": 0.9245355725288391,
"learning_rate": 1.919758589699283e-08,
"loss": 0.7922526597976685,
"step": 1363
},
{
"epoch": 1.9654178674351583,
"grad_norm": 0.8674725294113159,
"learning_rate": 1.774967593340171e-08,
"loss": 0.7022340297698975,
"step": 1364
},
{
"epoch": 1.9668587896253602,
"grad_norm": 0.8854632377624512,
"learning_rate": 1.635848087046532e-08,
"loss": 0.7119938135147095,
"step": 1365
},
{
"epoch": 1.968299711815562,
"grad_norm": 0.7363027930259705,
"learning_rate": 1.5024008612363196e-08,
"loss": 0.636238694190979,
"step": 1366
},
{
"epoch": 1.9697406340057637,
"grad_norm": 0.830125629901886,
"learning_rate": 1.3746266740997104e-08,
"loss": 0.6909417510032654,
"step": 1367
},
{
"epoch": 1.9711815561959654,
"grad_norm": 0.8109197020530701,
"learning_rate": 1.2525262515954429e-08,
"loss": 0.7618024349212646,
"step": 1368
},
{
"epoch": 1.972622478386167,
"grad_norm": 0.8020060062408447,
"learning_rate": 1.1361002874461512e-08,
"loss": 0.8216003179550171,
"step": 1369
},
{
"epoch": 1.9740634005763689,
"grad_norm": 0.818276047706604,
"learning_rate": 1.0253494431347045e-08,
"loss": 0.7050395011901855,
"step": 1370
},
{
"epoch": 1.9755043227665707,
"grad_norm": 0.7677318453788757,
"learning_rate": 9.202743479002074e-09,
"loss": 0.6805046200752258,
"step": 1371
},
{
"epoch": 1.9769452449567724,
"grad_norm": 0.7596999406814575,
"learning_rate": 8.208755987346717e-09,
"loss": 0.6607006788253784,
"step": 1372
},
{
"epoch": 1.978386167146974,
"grad_norm": 0.7905407547950745,
"learning_rate": 7.27153760379462e-09,
"loss": 0.7673226594924927,
"step": 1373
},
{
"epoch": 1.9798270893371757,
"grad_norm": 0.7896655201911926,
"learning_rate": 6.391093653224101e-09,
"loss": 0.7531790733337402,
"step": 1374
},
{
"epoch": 1.9812680115273775,
"grad_norm": 0.9625368118286133,
"learning_rate": 5.567429137940395e-09,
"loss": 0.6215754151344299,
"step": 1375
},
{
"epoch": 1.9827089337175794,
"grad_norm": 0.8169432282447815,
"learning_rate": 4.800548737656785e-09,
"loss": 0.6860508322715759,
"step": 1376
},
{
"epoch": 1.984149855907781,
"grad_norm": 0.8397043347358704,
"learning_rate": 4.090456809462407e-09,
"loss": 0.631721556186676,
"step": 1377
},
{
"epoch": 1.9855907780979827,
"grad_norm": 0.8882712721824646,
"learning_rate": 3.4371573877944874e-09,
"loss": 0.6776498556137085,
"step": 1378
},
{
"epoch": 1.9870317002881843,
"grad_norm": 0.7635476589202881,
"learning_rate": 2.840654184425029e-09,
"loss": 0.6540400981903076,
"step": 1379
},
{
"epoch": 1.9884726224783862,
"grad_norm": 0.9316484332084656,
"learning_rate": 2.300950588430828e-09,
"loss": 0.6866999268531799,
"step": 1380
},
{
"epoch": 1.989913544668588,
"grad_norm": 0.802742600440979,
"learning_rate": 1.8180496661779346e-09,
"loss": 0.6650329232215881,
"step": 1381
},
{
"epoch": 1.9913544668587897,
"grad_norm": 0.7995099425315857,
"learning_rate": 1.391954161304998e-09,
"loss": 0.7718065977096558,
"step": 1382
},
{
"epoch": 1.9927953890489913,
"grad_norm": 0.7001140713691711,
"learning_rate": 1.0226664947032838e-09,
"loss": 0.570642352104187,
"step": 1383
},
{
"epoch": 1.994236311239193,
"grad_norm": 0.8440130949020386,
"learning_rate": 7.101887645100114e-10,
"loss": 0.8078399896621704,
"step": 1384
},
{
"epoch": 1.9956772334293948,
"grad_norm": 0.8214783072471619,
"learning_rate": 4.54522746090591e-10,
"loss": 0.6115165948867798,
"step": 1385
},
{
"epoch": 1.9971181556195965,
"grad_norm": 0.8176844120025635,
"learning_rate": 2.55669892030852e-10,
"loss": 0.7604609727859497,
"step": 1386
},
{
"epoch": 1.9985590778097984,
"grad_norm": 0.8071082830429077,
"learning_rate": 1.1363133212705102e-10,
"loss": 0.6580838561058044,
"step": 1387
},
{
"epoch": 2.0,
"grad_norm": 0.7433238625526428,
"learning_rate": 2.840787338254103e-11,
"loss": 0.6367690563201904,
"step": 1388
},
{
"epoch": 2.0,
"step": 1388,
"total_flos": 1.0037139745307361e+18,
"train_loss": 0.7883986402571374,
"train_runtime": 11184.8694,
"train_samples_per_second": 0.496,
"train_steps_per_second": 0.124
}
],
"logging_steps": 1,
"max_steps": 1388,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0037139745307361e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}