{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 1024,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0009765625,
      "grad_norm": 0.6541444063186646,
      "learning_rate": 0.0,
      "loss": 1.0280990600585938,
      "step": 1
    },
    {
      "epoch": 0.001953125,
      "grad_norm": 0.4356674551963806,
      "learning_rate": 4e-05,
      "loss": 0.8305179476737976,
      "step": 2
    },
    {
      "epoch": 0.0029296875,
      "grad_norm": 0.3900858759880066,
      "learning_rate": 8e-05,
      "loss": 0.7835474014282227,
      "step": 3
    },
    {
      "epoch": 0.00390625,
      "grad_norm": 0.3717947006225586,
      "learning_rate": 0.00012,
      "loss": 1.1571688652038574,
      "step": 4
    },
    {
      "epoch": 0.0048828125,
      "grad_norm": 0.2760661542415619,
      "learning_rate": 0.00016,
      "loss": 0.8141135573387146,
      "step": 5
    },
    {
      "epoch": 0.005859375,
      "grad_norm": 0.24524882435798645,
      "learning_rate": 0.0002,
      "loss": 0.29919666051864624,
      "step": 6
    },
    {
      "epoch": 0.0068359375,
      "grad_norm": 0.3155483305454254,
      "learning_rate": 0.00019980372914622178,
      "loss": 0.916366696357727,
      "step": 7
    },
    {
      "epoch": 0.0078125,
      "grad_norm": 1.0419310331344604,
      "learning_rate": 0.00019960745829244357,
      "loss": 0.986505389213562,
      "step": 8
    },
    {
      "epoch": 0.0087890625,
      "grad_norm": 0.32395845651626587,
      "learning_rate": 0.00019941118743866537,
      "loss": 0.7845190167427063,
      "step": 9
    },
    {
      "epoch": 0.009765625,
      "grad_norm": 0.564084529876709,
      "learning_rate": 0.00019921491658488717,
      "loss": 1.0922366380691528,
      "step": 10
    },
    {
      "epoch": 0.0107421875,
      "grad_norm": 0.4066593647003174,
      "learning_rate": 0.00019901864573110893,
      "loss": 1.0279463529586792,
      "step": 11
    },
    {
      "epoch": 0.01171875,
      "grad_norm": 0.43442535400390625,
      "learning_rate": 0.00019882237487733073,
      "loss": 0.9713175892829895,
      "step": 12
    },
    {
      "epoch": 0.0126953125,
      "grad_norm": 0.26689526438713074,
      "learning_rate": 0.0001986261040235525,
      "loss": 0.38461241126060486,
      "step": 13
    },
    {
      "epoch": 0.013671875,
      "grad_norm": 0.41254541277885437,
      "learning_rate": 0.0001984298331697743,
      "loss": 0.7746479511260986,
      "step": 14
    },
    {
      "epoch": 0.0146484375,
      "grad_norm": 0.39432424306869507,
      "learning_rate": 0.0001982335623159961,
      "loss": 0.7843194603919983,
      "step": 15
    },
    {
      "epoch": 0.015625,
      "grad_norm": 0.4303337037563324,
      "learning_rate": 0.0001980372914622179,
      "loss": 0.6613403558731079,
      "step": 16
    },
    {
      "epoch": 0.0166015625,
      "grad_norm": 0.875269889831543,
      "learning_rate": 0.00019784102060843966,
      "loss": 1.0992671251296997,
      "step": 17
    },
    {
      "epoch": 0.017578125,
      "grad_norm": 0.21415413916110992,
      "learning_rate": 0.00019764474975466145,
      "loss": 0.2784216105937958,
      "step": 18
    },
    {
      "epoch": 0.0185546875,
      "grad_norm": 0.4318086504936218,
      "learning_rate": 0.00019744847890088322,
      "loss": 0.6146124005317688,
      "step": 19
    },
    {
      "epoch": 0.01953125,
      "grad_norm": 0.20149515569210052,
      "learning_rate": 0.00019725220804710502,
      "loss": 0.3920556306838989,
      "step": 20
    },
    {
      "epoch": 0.0205078125,
      "grad_norm": 0.358688622713089,
      "learning_rate": 0.0001970559371933268,
      "loss": 0.6672685742378235,
      "step": 21
    },
    {
      "epoch": 0.021484375,
      "grad_norm": 0.5916730165481567,
      "learning_rate": 0.00019685966633954858,
      "loss": 1.0804443359375,
      "step": 22
    },
    {
      "epoch": 0.0224609375,
      "grad_norm": 0.3139825761318207,
      "learning_rate": 0.00019666339548577038,
      "loss": 0.7358766794204712,
      "step": 23
    },
    {
      "epoch": 0.0234375,
      "grad_norm": 0.4019712805747986,
      "learning_rate": 0.00019646712463199215,
      "loss": 0.7362902164459229,
      "step": 24
    },
    {
      "epoch": 0.0244140625,
      "grad_norm": 0.2874290347099304,
      "learning_rate": 0.00019627085377821394,
      "loss": 0.6446189284324646,
      "step": 25
    },
    {
      "epoch": 0.025390625,
      "grad_norm": 0.357494592666626,
      "learning_rate": 0.0001960745829244357,
      "loss": 0.2820976972579956,
      "step": 26
    },
    {
      "epoch": 0.0263671875,
      "grad_norm": 0.22216391563415527,
      "learning_rate": 0.00019587831207065753,
      "loss": 0.6020435094833374,
      "step": 27
    },
    {
      "epoch": 0.02734375,
      "grad_norm": 0.23284995555877686,
      "learning_rate": 0.0001956820412168793,
      "loss": 0.44151532649993896,
      "step": 28
    },
    {
      "epoch": 0.0283203125,
      "grad_norm": 0.3594605028629303,
      "learning_rate": 0.0001954857703631011,
      "loss": 0.9414041042327881,
      "step": 29
    },
    {
      "epoch": 0.029296875,
      "grad_norm": 0.4460504353046417,
      "learning_rate": 0.00019528949950932287,
      "loss": 0.7148531079292297,
      "step": 30
    },
    {
      "epoch": 0.0302734375,
      "grad_norm": 0.3392362892627716,
      "learning_rate": 0.00019509322865554466,
      "loss": 0.7185512781143188,
      "step": 31
    },
    {
      "epoch": 0.03125,
      "grad_norm": 0.3340625464916229,
      "learning_rate": 0.00019489695780176643,
      "loss": 0.6613262891769409,
      "step": 32
    },
    {
      "epoch": 0.0322265625,
      "grad_norm": 0.26223355531692505,
      "learning_rate": 0.00019470068694798826,
      "loss": 0.590149462223053,
      "step": 33
    },
    {
      "epoch": 0.033203125,
      "grad_norm": 0.3481689691543579,
      "learning_rate": 0.00019450441609421002,
      "loss": 0.5590913891792297,
      "step": 34
    },
    {
      "epoch": 0.0341796875,
      "grad_norm": 0.4775488078594208,
      "learning_rate": 0.00019430814524043182,
      "loss": 0.927351176738739,
      "step": 35
    },
    {
      "epoch": 0.03515625,
      "grad_norm": 0.4474835693836212,
      "learning_rate": 0.0001941118743866536,
      "loss": 0.7719380855560303,
      "step": 36
    },
    {
      "epoch": 0.0361328125,
      "grad_norm": 0.3538999855518341,
      "learning_rate": 0.00019391560353287536,
      "loss": 1.0287561416625977,
      "step": 37
    },
    {
      "epoch": 0.037109375,
      "grad_norm": 0.5018237233161926,
      "learning_rate": 0.00019371933267909715,
      "loss": 1.049814224243164,
      "step": 38
    },
    {
      "epoch": 0.0380859375,
      "grad_norm": 0.5052743554115295,
      "learning_rate": 0.00019352306182531895,
      "loss": 0.39767658710479736,
      "step": 39
    },
    {
      "epoch": 0.0390625,
      "grad_norm": 0.46170520782470703,
      "learning_rate": 0.00019332679097154075,
      "loss": 0.9849376678466797,
      "step": 40
    },
    {
      "epoch": 0.0400390625,
      "grad_norm": 0.5961291193962097,
      "learning_rate": 0.00019313052011776251,
      "loss": 0.8527336716651917,
      "step": 41
    },
    {
      "epoch": 0.041015625,
      "grad_norm": 0.4002876579761505,
      "learning_rate": 0.0001929342492639843,
      "loss": 0.7445047497749329,
      "step": 42
    },
    {
      "epoch": 0.0419921875,
      "grad_norm": 0.6382992267608643,
      "learning_rate": 0.00019273797841020608,
      "loss": 0.7587878704071045,
      "step": 43
    },
    {
      "epoch": 0.04296875,
      "grad_norm": 0.4204530715942383,
      "learning_rate": 0.00019254170755642788,
      "loss": 0.943995475769043,
      "step": 44
    },
    {
      "epoch": 0.0439453125,
      "grad_norm": 0.29038068652153015,
      "learning_rate": 0.00019234543670264967,
      "loss": 0.4540131688117981,
      "step": 45
    },
    {
      "epoch": 0.044921875,
      "grad_norm": 0.41968628764152527,
      "learning_rate": 0.00019214916584887147,
      "loss": 0.3900204300880432,
      "step": 46
    },
    {
      "epoch": 0.0458984375,
      "grad_norm": 0.5870251059532166,
      "learning_rate": 0.00019195289499509324,
      "loss": 0.8700598478317261,
      "step": 47
    },
    {
      "epoch": 0.046875,
      "grad_norm": 0.3120124042034149,
      "learning_rate": 0.00019175662414131503,
      "loss": 0.2866731882095337,
      "step": 48
    },
    {
      "epoch": 0.0478515625,
      "grad_norm": 0.31891942024230957,
      "learning_rate": 0.0001915603532875368,
      "loss": 0.7711223363876343,
      "step": 49
    },
    {
      "epoch": 0.048828125,
      "grad_norm": 0.4250207543373108,
      "learning_rate": 0.0001913640824337586,
      "loss": 0.7499758005142212,
      "step": 50
    },
    {
      "epoch": 0.0498046875,
      "grad_norm": 0.4769924581050873,
      "learning_rate": 0.0001911678115799804,
      "loss": 0.8479812145233154,
      "step": 51
    },
    {
      "epoch": 0.05078125,
      "grad_norm": 0.2966979146003723,
      "learning_rate": 0.00019097154072620216,
      "loss": 0.8125182390213013,
      "step": 52
    },
    {
      "epoch": 0.0517578125,
      "grad_norm": 0.4924452006816864,
      "learning_rate": 0.00019077526987242396,
      "loss": 1.006331443786621,
      "step": 53
    },
    {
      "epoch": 0.052734375,
      "grad_norm": 0.5558736324310303,
      "learning_rate": 0.00019057899901864573,
      "loss": 0.8218062520027161,
      "step": 54
    },
    {
      "epoch": 0.0537109375,
      "grad_norm": 0.488903284072876,
      "learning_rate": 0.00019038272816486752,
      "loss": 0.7451006770133972,
      "step": 55
    },
    {
      "epoch": 0.0546875,
      "grad_norm": 0.6092124581336975,
      "learning_rate": 0.00019018645731108932,
      "loss": 0.3371097445487976,
      "step": 56
    },
    {
      "epoch": 0.0556640625,
      "grad_norm": 0.34885621070861816,
      "learning_rate": 0.00018999018645731111,
      "loss": 0.9263520836830139,
      "step": 57
    },
    {
      "epoch": 0.056640625,
      "grad_norm": 0.41470521688461304,
      "learning_rate": 0.00018979391560353288,
      "loss": 0.8741390109062195,
      "step": 58
    },
    {
      "epoch": 0.0576171875,
      "grad_norm": 0.32286664843559265,
      "learning_rate": 0.00018959764474975468,
      "loss": 0.6128658056259155,
      "step": 59
    },
    {
      "epoch": 0.05859375,
      "grad_norm": 0.43667954206466675,
      "learning_rate": 0.00018940137389597645,
      "loss": 0.822106122970581,
      "step": 60
    },
    {
      "epoch": 0.0595703125,
      "grad_norm": 0.5501149892807007,
      "learning_rate": 0.00018920510304219824,
      "loss": 0.2981743812561035,
      "step": 61
    },
    {
      "epoch": 0.060546875,
      "grad_norm": 0.5234649777412415,
      "learning_rate": 0.00018900883218842004,
      "loss": 0.710310161113739,
      "step": 62
    },
    {
      "epoch": 0.0615234375,
      "grad_norm": 0.5040559768676758,
      "learning_rate": 0.00018881256133464184,
      "loss": 1.0355676412582397,
      "step": 63
    },
    {
      "epoch": 0.0625,
      "grad_norm": 0.4435643255710602,
      "learning_rate": 0.0001886162904808636,
      "loss": 1.031105399131775,
      "step": 64
    },
    {
      "epoch": 0.0634765625,
      "grad_norm": 0.4987465441226959,
      "learning_rate": 0.0001884200196270854,
      "loss": 0.7753915190696716,
      "step": 65
    },
    {
      "epoch": 0.064453125,
      "grad_norm": 0.3633696436882019,
      "learning_rate": 0.00018822374877330717,
      "loss": 1.2376799583435059,
      "step": 66
    },
    {
      "epoch": 0.0654296875,
      "grad_norm": 1.0342258214950562,
      "learning_rate": 0.00018802747791952894,
      "loss": 0.6145737171173096,
      "step": 67
    },
    {
      "epoch": 0.06640625,
      "grad_norm": 0.47045138478279114,
      "learning_rate": 0.00018783120706575076,
      "loss": 0.8622407913208008,
      "step": 68
    },
    {
      "epoch": 0.0673828125,
      "grad_norm": 0.47864851355552673,
      "learning_rate": 0.00018763493621197253,
      "loss": 0.6727300882339478,
      "step": 69
    },
    {
      "epoch": 0.068359375,
      "grad_norm": 0.38102060556411743,
      "learning_rate": 0.00018743866535819433,
      "loss": 0.7417519092559814,
      "step": 70
    },
    {
      "epoch": 0.0693359375,
      "grad_norm": 0.4229515492916107,
      "learning_rate": 0.0001872423945044161,
      "loss": 0.46951866149902344,
      "step": 71
    },
    {
      "epoch": 0.0703125,
      "grad_norm": 0.4868115186691284,
      "learning_rate": 0.0001870461236506379,
      "loss": 0.32457292079925537,
      "step": 72
    },
    {
      "epoch": 0.0712890625,
      "grad_norm": 0.298020601272583,
      "learning_rate": 0.00018684985279685966,
      "loss": 0.2501494288444519,
      "step": 73
    },
    {
      "epoch": 0.072265625,
      "grad_norm": 0.49870651960372925,
      "learning_rate": 0.00018665358194308145,
      "loss": 0.5599403381347656,
      "step": 74
    },
    {
      "epoch": 0.0732421875,
      "grad_norm": 0.5717479586601257,
      "learning_rate": 0.00018645731108930325,
      "loss": 0.4725653827190399,
      "step": 75
    },
    {
      "epoch": 0.07421875,
      "grad_norm": 0.5230128765106201,
      "learning_rate": 0.00018626104023552505,
      "loss": 1.0607699155807495,
      "step": 76
    },
    {
      "epoch": 0.0751953125,
      "grad_norm": 0.4279435873031616,
      "learning_rate": 0.00018606476938174682,
      "loss": 0.5628142952919006,
      "step": 77
    },
    {
      "epoch": 0.076171875,
      "grad_norm": 0.6166331171989441,
      "learning_rate": 0.0001858684985279686,
      "loss": 0.44837141036987305,
      "step": 78
    },
    {
      "epoch": 0.0771484375,
      "grad_norm": 0.6329861879348755,
      "learning_rate": 0.00018567222767419038,
      "loss": 0.5013883709907532,
      "step": 79
    },
    {
      "epoch": 0.078125,
      "grad_norm": 0.2921103239059448,
      "learning_rate": 0.00018547595682041218,
      "loss": 0.541824996471405,
      "step": 80
    },
    {
      "epoch": 0.0791015625,
      "grad_norm": 0.36744800209999084,
      "learning_rate": 0.00018527968596663397,
      "loss": 0.3878925144672394,
      "step": 81
    },
    {
      "epoch": 0.080078125,
      "grad_norm": 0.34045904874801636,
      "learning_rate": 0.00018508341511285574,
      "loss": 0.33476194739341736,
      "step": 82
    },
    {
      "epoch": 0.0810546875,
      "grad_norm": 0.48908546566963196,
      "learning_rate": 0.00018488714425907754,
      "loss": 1.003555178642273,
      "step": 83
    },
    {
      "epoch": 0.08203125,
      "grad_norm": 0.4683694839477539,
      "learning_rate": 0.0001846908734052993,
      "loss": 0.7300649285316467,
      "step": 84
    },
    {
      "epoch": 0.0830078125,
      "grad_norm": 0.3560928404331207,
      "learning_rate": 0.0001844946025515211,
      "loss": 0.4525097608566284,
      "step": 85
    },
    {
      "epoch": 0.083984375,
      "grad_norm": 1.481307864189148,
      "learning_rate": 0.0001842983316977429,
      "loss": 0.5444833040237427,
      "step": 86
    },
    {
      "epoch": 0.0849609375,
      "grad_norm": 0.42610403895378113,
      "learning_rate": 0.0001841020608439647,
      "loss": 0.7340827584266663,
      "step": 87
    },
    {
      "epoch": 0.0859375,
      "grad_norm": 0.6035026907920837,
      "learning_rate": 0.00018390578999018646,
      "loss": 0.5589049458503723,
      "step": 88
    },
    {
      "epoch": 0.0869140625,
      "grad_norm": 0.6075074076652527,
      "learning_rate": 0.00018370951913640826,
      "loss": 0.4969009757041931,
      "step": 89
    },
    {
      "epoch": 0.087890625,
      "grad_norm": 0.6751372814178467,
      "learning_rate": 0.00018351324828263003,
      "loss": 0.46451041102409363,
      "step": 90
    },
    {
      "epoch": 0.0888671875,
      "grad_norm": 0.5816373229026794,
      "learning_rate": 0.00018331697742885182,
      "loss": 1.024427056312561,
      "step": 91
    },
    {
      "epoch": 0.08984375,
      "grad_norm": 0.6644161939620972,
      "learning_rate": 0.00018312070657507362,
      "loss": 0.778592586517334,
      "step": 92
    },
    {
      "epoch": 0.0908203125,
      "grad_norm": 0.652209997177124,
      "learning_rate": 0.00018292443572129541,
      "loss": 0.8565710783004761,
      "step": 93
    },
    {
      "epoch": 0.091796875,
      "grad_norm": 0.9109074473381042,
      "learning_rate": 0.00018272816486751718,
      "loss": 0.6693978309631348,
      "step": 94
    },
    {
      "epoch": 0.0927734375,
      "grad_norm": 0.5235186219215393,
      "learning_rate": 0.00018253189401373895,
      "loss": 0.8255172967910767,
      "step": 95
    },
    {
      "epoch": 0.09375,
      "grad_norm": 0.8362122178077698,
      "learning_rate": 0.00018233562315996075,
      "loss": 0.5858157873153687,
      "step": 96
    },
    {
      "epoch": 0.0947265625,
      "grad_norm": 0.6753116846084595,
      "learning_rate": 0.00018213935230618254,
      "loss": 0.6682421565055847,
      "step": 97
    },
    {
      "epoch": 0.095703125,
      "grad_norm": 0.5394794940948486,
      "learning_rate": 0.00018194308145240434,
      "loss": 0.3218158781528473,
      "step": 98
    },
    {
      "epoch": 0.0966796875,
      "grad_norm": 3.2796010971069336,
      "learning_rate": 0.0001817468105986261,
      "loss": 0.681085467338562,
      "step": 99
    },
    {
      "epoch": 0.09765625,
      "grad_norm": 0.38390907645225525,
      "learning_rate": 0.0001815505397448479,
      "loss": 0.39554187655448914,
      "step": 100
    },
    {
      "epoch": 0.0986328125,
      "grad_norm": 0.5289499759674072,
      "learning_rate": 0.00018135426889106967,
      "loss": 1.0264520645141602,
      "step": 101
    },
    {
      "epoch": 0.099609375,
      "grad_norm": 0.8211148977279663,
      "learning_rate": 0.00018115799803729147,
      "loss": 0.8588113784790039,
      "step": 102
    },
    {
      "epoch": 0.1005859375,
      "grad_norm": 0.4771063029766083,
      "learning_rate": 0.00018096172718351327,
      "loss": 0.7471244931221008,
      "step": 103
    },
    {
      "epoch": 0.1015625,
      "grad_norm": 0.6326794624328613,
      "learning_rate": 0.00018076545632973506,
      "loss": 0.6081597805023193,
      "step": 104
    },
    {
      "epoch": 0.1025390625,
      "grad_norm": 0.7229248285293579,
      "learning_rate": 0.00018056918547595683,
      "loss": 0.8315082788467407,
      "step": 105
    },
    {
      "epoch": 0.103515625,
      "grad_norm": 0.6803163290023804,
      "learning_rate": 0.00018037291462217863,
      "loss": 0.8308911323547363,
      "step": 106
    },
    {
      "epoch": 0.1044921875,
      "grad_norm": 0.5268850326538086,
      "learning_rate": 0.0001801766437684004,
      "loss": 0.8480656743049622,
      "step": 107
    },
    {
      "epoch": 0.10546875,
      "grad_norm": 0.7849289178848267,
      "learning_rate": 0.0001799803729146222,
      "loss": 0.8200575113296509,
      "step": 108
    },
    {
      "epoch": 0.1064453125,
      "grad_norm": 0.4259982407093048,
      "learning_rate": 0.00017978410206084396,
      "loss": 0.44367721676826477,
      "step": 109
    },
    {
      "epoch": 0.107421875,
      "grad_norm": 0.4788619577884674,
      "learning_rate": 0.00017958783120706576,
      "loss": 0.6017763018608093,
      "step": 110
    },
    {
      "epoch": 0.1083984375,
      "grad_norm": 0.34434452652931213,
      "learning_rate": 0.00017939156035328755,
      "loss": 0.29681769013404846,
      "step": 111
    },
    {
      "epoch": 0.109375,
      "grad_norm": 1.1506884098052979,
      "learning_rate": 0.00017919528949950932,
      "loss": 0.6520863771438599,
      "step": 112
    },
    {
      "epoch": 0.1103515625,
      "grad_norm": 0.8348999619483948,
      "learning_rate": 0.00017899901864573112,
      "loss": 0.6035414934158325,
      "step": 113
    },
    {
      "epoch": 0.111328125,
      "grad_norm": 0.5550518035888672,
      "learning_rate": 0.00017880274779195289,
      "loss": 0.7711564302444458,
      "step": 114
    },
    {
      "epoch": 0.1123046875,
      "grad_norm": 0.28814634680747986,
      "learning_rate": 0.00017860647693817468,
      "loss": 0.8325987458229065,
      "step": 115
    },
    {
      "epoch": 0.11328125,
      "grad_norm": 0.3833630084991455,
      "learning_rate": 0.00017841020608439648,
      "loss": 0.3345921039581299,
      "step": 116
    },
    {
      "epoch": 0.1142578125,
      "grad_norm": 0.8784507513046265,
      "learning_rate": 0.00017821393523061827,
      "loss": 0.4186948239803314,
      "step": 117
    },
    {
      "epoch": 0.115234375,
      "grad_norm": 0.7263842225074768,
      "learning_rate": 0.00017801766437684004,
      "loss": 0.5570493936538696,
      "step": 118
    },
    {
      "epoch": 0.1162109375,
      "grad_norm": 0.6391569972038269,
      "learning_rate": 0.00017782139352306184,
      "loss": 1.0257431268692017,
      "step": 119
    },
    {
      "epoch": 0.1171875,
      "grad_norm": 0.6025450229644775,
      "learning_rate": 0.0001776251226692836,
      "loss": 0.8676729202270508,
      "step": 120
    },
    {
      "epoch": 0.1181640625,
      "grad_norm": 0.3776579201221466,
      "learning_rate": 0.0001774288518155054,
      "loss": 0.5870720148086548,
      "step": 121
    },
    {
      "epoch": 0.119140625,
      "grad_norm": 0.40912336111068726,
      "learning_rate": 0.0001772325809617272,
      "loss": 0.9210044145584106,
      "step": 122
    },
    {
      "epoch": 0.1201171875,
      "grad_norm": 0.5036085247993469,
      "learning_rate": 0.000177036310107949,
      "loss": 0.47378072142601013,
      "step": 123
    },
    {
      "epoch": 0.12109375,
      "grad_norm": 0.5508134961128235,
      "learning_rate": 0.00017684003925417076,
      "loss": 0.8295834064483643,
      "step": 124
    },
    {
      "epoch": 0.1220703125,
      "grad_norm": 0.5522392392158508,
      "learning_rate": 0.00017664376840039253,
      "loss": 0.793156087398529,
      "step": 125
    },
    {
      "epoch": 0.123046875,
      "grad_norm": 1.0098820924758911,
      "learning_rate": 0.00017644749754661433,
      "loss": 0.5780155658721924,
      "step": 126
    },
    {
      "epoch": 0.1240234375,
      "grad_norm": 0.6178780198097229,
      "learning_rate": 0.00017625122669283612,
      "loss": 0.5129156708717346,
      "step": 127
    },
    {
      "epoch": 0.125,
      "grad_norm": 0.6224352121353149,
      "learning_rate": 0.00017605495583905792,
      "loss": 0.8498928546905518,
      "step": 128
    },
    {
      "epoch": 0.1259765625,
      "grad_norm": 0.7869983315467834,
      "learning_rate": 0.0001758586849852797,
      "loss": 0.9180670976638794,
      "step": 129
    },
    {
      "epoch": 0.126953125,
      "grad_norm": 0.4122680127620697,
      "learning_rate": 0.00017566241413150148,
      "loss": 0.510919988155365,
      "step": 130
    },
    {
      "epoch": 0.1279296875,
      "grad_norm": 0.7221843004226685,
      "learning_rate": 0.00017546614327772325,
      "loss": 0.3977488875389099,
      "step": 131
    },
    {
      "epoch": 0.12890625,
      "grad_norm": 1.155800461769104,
      "learning_rate": 0.00017526987242394505,
      "loss": 0.6549078226089478,
      "step": 132
    },
    {
      "epoch": 0.1298828125,
      "grad_norm": 0.7164724469184875,
      "learning_rate": 0.00017507360157016685,
      "loss": 0.8306566476821899,
      "step": 133
    },
    {
      "epoch": 0.130859375,
      "grad_norm": 0.7600284814834595,
      "learning_rate": 0.00017487733071638864,
      "loss": 0.34278520941734314,
      "step": 134
    },
    {
      "epoch": 0.1318359375,
      "grad_norm": 0.8636081218719482,
      "learning_rate": 0.0001746810598626104,
      "loss": 0.8881778717041016,
      "step": 135
    },
    {
      "epoch": 0.1328125,
      "grad_norm": 1.0904357433319092,
      "learning_rate": 0.0001744847890088322,
      "loss": 0.4423227310180664,
      "step": 136
    },
    {
      "epoch": 0.1337890625,
      "grad_norm": 0.5639862418174744,
      "learning_rate": 0.00017428851815505397,
      "loss": 0.8610935211181641,
      "step": 137
    },
    {
      "epoch": 0.134765625,
      "grad_norm": 1.05929696559906,
      "learning_rate": 0.00017409224730127577,
      "loss": 1.1729753017425537,
      "step": 138
    },
    {
      "epoch": 0.1357421875,
      "grad_norm": 1.0731761455535889,
      "learning_rate": 0.00017389597644749757,
      "loss": 0.6459341049194336,
      "step": 139
    },
    {
      "epoch": 0.13671875,
      "grad_norm": 0.7464702725410461,
      "learning_rate": 0.00017369970559371934,
      "loss": 0.5368601083755493,
      "step": 140
    },
    {
      "epoch": 0.1376953125,
      "grad_norm": 0.5722304582595825,
      "learning_rate": 0.00017350343473994113,
      "loss": 0.9642695784568787,
      "step": 141
    },
    {
      "epoch": 0.138671875,
      "grad_norm": 0.5044945478439331,
      "learning_rate": 0.0001733071638861629,
      "loss": 0.49555253982543945,
      "step": 142
    },
    {
      "epoch": 0.1396484375,
      "grad_norm": 0.8069168329238892,
      "learning_rate": 0.0001731108930323847,
      "loss": 0.8796389698982239,
      "step": 143
    },
    {
      "epoch": 0.140625,
      "grad_norm": 0.5269959568977356,
      "learning_rate": 0.00017291462217860646,
      "loss": 0.9928920269012451,
      "step": 144
    },
    {
      "epoch": 0.1416015625,
      "grad_norm": 0.6606360077857971,
      "learning_rate": 0.0001727183513248283,
      "loss": 1.0528640747070312,
      "step": 145
    },
    {
      "epoch": 0.142578125,
      "grad_norm": 0.7145242691040039,
      "learning_rate": 0.00017252208047105006,
      "loss": 1.1252766847610474,
      "step": 146
    },
    {
      "epoch": 0.1435546875,
      "grad_norm": 0.5808660984039307,
      "learning_rate": 0.00017232580961727185,
      "loss": 0.24914072453975677,
      "step": 147
    },
    {
      "epoch": 0.14453125,
      "grad_norm": 0.8544529676437378,
      "learning_rate": 0.00017212953876349362,
      "loss": 0.4420434832572937,
      "step": 148
    },
    {
      "epoch": 0.1455078125,
      "grad_norm": 0.899334728717804,
      "learning_rate": 0.00017193326790971542,
      "loss": 0.7128512263298035,
      "step": 149
    },
    {
      "epoch": 0.146484375,
      "grad_norm": 0.36327579617500305,
      "learning_rate": 0.00017173699705593719,
      "loss": 0.5503419637680054,
      "step": 150
    },
    {
      "epoch": 0.1474609375,
      "grad_norm": 0.553255021572113,
      "learning_rate": 0.000171540726202159,
      "loss": 0.5796535015106201,
      "step": 151
    },
    {
      "epoch": 0.1484375,
      "grad_norm": 0.41036659479141235,
      "learning_rate": 0.00017134445534838078,
      "loss": 0.8935849666595459,
      "step": 152
    },
    {
      "epoch": 0.1494140625,
      "grad_norm": 0.3723013997077942,
      "learning_rate": 0.00017114818449460257,
      "loss": 0.39106485247612,
      "step": 153
    },
    {
      "epoch": 0.150390625,
      "grad_norm": 0.654262900352478,
      "learning_rate": 0.00017095191364082434,
      "loss": 1.0176405906677246,
      "step": 154
    },
    {
      "epoch": 0.1513671875,
      "grad_norm": 0.5707812309265137,
      "learning_rate": 0.0001707556427870461,
      "loss": 0.6580768823623657,
      "step": 155
    },
    {
      "epoch": 0.15234375,
      "grad_norm": 0.35879406332969666,
      "learning_rate": 0.0001705593719332679,
      "loss": 0.4050876200199127,
      "step": 156
    },
    {
      "epoch": 0.1533203125,
      "grad_norm": 0.5701449513435364,
      "learning_rate": 0.0001703631010794897,
      "loss": 0.9737375974655151,
      "step": 157
    },
    {
      "epoch": 0.154296875,
      "grad_norm": 0.4461202919483185,
      "learning_rate": 0.0001701668302257115,
      "loss": 0.9864733815193176,
      "step": 158
    },
    {
      "epoch": 0.1552734375,
      "grad_norm": 0.6229621767997742,
      "learning_rate": 0.00016997055937193327,
      "loss": 0.35883933305740356,
      "step": 159
    },
    {
      "epoch": 0.15625,
      "grad_norm": 0.5390028357505798,
      "learning_rate": 0.00016977428851815506,
      "loss": 0.5791765451431274,
      "step": 160
    },
    {
      "epoch": 0.1572265625,
      "grad_norm": 0.7851611375808716,
      "learning_rate": 0.00016957801766437683,
      "loss": 0.9032300114631653,
      "step": 161
    },
    {
      "epoch": 0.158203125,
      "grad_norm": 0.6211395263671875,
      "learning_rate": 0.00016938174681059863,
      "loss": 0.5069928765296936,
      "step": 162
    },
    {
      "epoch": 0.1591796875,
      "grad_norm": 0.8290377855300903,
      "learning_rate": 0.00016918547595682042,
      "loss": 0.8917738795280457,
      "step": 163
    },
    {
      "epoch": 0.16015625,
      "grad_norm": 0.42707324028015137,
      "learning_rate": 0.00016898920510304222,
      "loss": 0.606585681438446,
      "step": 164
    },
    {
      "epoch": 0.1611328125,
      "grad_norm": 0.49472010135650635,
      "learning_rate": 0.000168792934249264,
      "loss": 1.0100075006484985,
      "step": 165
    },
    {
      "epoch": 0.162109375,
      "grad_norm": 0.48441267013549805,
      "learning_rate": 0.00016859666339548579,
      "loss": 0.7145558595657349,
      "step": 166
    },
    {
      "epoch": 0.1630859375,
      "grad_norm": 0.5181763172149658,
      "learning_rate": 0.00016840039254170755,
      "loss": 0.8088749647140503,
      "step": 167
    },
    {
      "epoch": 0.1640625,
      "grad_norm": 0.4702328145503998,
      "learning_rate": 0.00016820412168792935,
      "loss": 0.5631542801856995,
      "step": 168
    },
    {
      "epoch": 0.1650390625,
      "grad_norm": 0.35454344749450684,
      "learning_rate": 0.00016800785083415115,
      "loss": 0.31744396686553955,
      "step": 169
    },
    {
      "epoch": 0.166015625,
      "grad_norm": 0.5193122029304504,
      "learning_rate": 0.00016781157998037291,
      "loss": 0.7338438034057617,
      "step": 170
    },
    {
      "epoch": 0.1669921875,
      "grad_norm": 0.49799400568008423,
      "learning_rate": 0.0001676153091265947,
      "loss": 0.7910654544830322,
      "step": 171
    },
    {
      "epoch": 0.16796875,
      "grad_norm": 0.4855571389198303,
      "learning_rate": 0.00016741903827281648,
      "loss": 0.38415610790252686,
      "step": 172
    },
    {
      "epoch": 0.1689453125,
      "grad_norm": 0.8796041011810303,
      "learning_rate": 0.00016722276741903828,
      "loss": 0.6042807102203369,
      "step": 173
    },
    {
      "epoch": 0.169921875,
      "grad_norm": 0.6005135774612427,
      "learning_rate": 0.00016702649656526007,
      "loss": 0.6617047786712646,
      "step": 174
    },
    {
      "epoch": 0.1708984375,
      "grad_norm": 0.6359293460845947,
      "learning_rate": 0.00016683022571148187,
      "loss": 0.5227914452552795,
      "step": 175
    },
    {
      "epoch": 0.171875,
      "grad_norm": 0.46007266640663147,
      "learning_rate": 0.00016663395485770364,
      "loss": 0.6881235837936401,
      "step": 176
    },
    {
      "epoch": 0.1728515625,
      "grad_norm": 0.37411797046661377,
      "learning_rate": 0.00016643768400392543,
      "loss": 0.7384200096130371,
      "step": 177
    },
    {
      "epoch": 0.173828125,
      "grad_norm": 0.4021860659122467,
      "learning_rate": 0.0001662414131501472,
      "loss": 1.1738500595092773,
      "step": 178
    },
    {
      "epoch": 0.1748046875,
      "grad_norm": 0.3674755096435547,
      "learning_rate": 0.000166045142296369,
      "loss": 0.37539663910865784,
      "step": 179
    },
    {
      "epoch": 0.17578125,
      "grad_norm": 0.5051441788673401,
      "learning_rate": 0.0001658488714425908,
      "loss": 0.6273016333580017,
      "step": 180
    },
    {
      "epoch": 0.1767578125,
      "grad_norm": 0.6807597279548645,
      "learning_rate": 0.0001656526005888126,
      "loss": 0.4195510447025299,
      "step": 181
    },
    {
      "epoch": 0.177734375,
      "grad_norm": 0.3345419466495514,
      "learning_rate": 0.00016545632973503436,
      "loss": 0.8546851873397827,
      "step": 182
    },
    {
      "epoch": 0.1787109375,
      "grad_norm": 0.33821800351142883,
      "learning_rate": 0.00016526005888125615,
      "loss": 0.522655725479126,
      "step": 183
    },
    {
      "epoch": 0.1796875,
      "grad_norm": 0.3145562708377838,
      "learning_rate": 0.00016506378802747792,
      "loss": 0.3799128532409668,
      "step": 184
    },
    {
      "epoch": 0.1806640625,
      "grad_norm": 0.44908636808395386,
      "learning_rate": 0.0001648675171736997,
      "loss": 0.6263326406478882,
      "step": 185
    },
    {
      "epoch": 0.181640625,
      "grad_norm": 0.7736865282058716,
      "learning_rate": 0.00016467124631992151,
      "loss": 0.3385460078716278,
      "step": 186
    },
    {
      "epoch": 0.1826171875,
      "grad_norm": 0.5184527635574341,
      "learning_rate": 0.00016447497546614328,
      "loss": 0.7980771064758301,
      "step": 187
    },
    {
      "epoch": 0.18359375,
      "grad_norm": 0.41774502396583557,
      "learning_rate": 0.00016427870461236508,
      "loss": 0.7745299339294434,
      "step": 188
    },
    {
      "epoch": 0.1845703125,
      "grad_norm": 0.43824154138565063,
      "learning_rate": 0.00016408243375858685,
      "loss": 0.9190135598182678,
      "step": 189
    },
    {
      "epoch": 0.185546875,
      "grad_norm": 0.4037880301475525,
      "learning_rate": 0.00016388616290480864,
      "loss": 0.5671911239624023,
      "step": 190
    },
    {
      "epoch": 0.1865234375,
      "grad_norm": 0.3757816255092621,
      "learning_rate": 0.0001636898920510304,
      "loss": 0.39916592836380005,
      "step": 191
    },
    {
      "epoch": 0.1875,
      "grad_norm": 0.4747844636440277,
      "learning_rate": 0.00016349362119725224,
      "loss": 0.9217299818992615,
      "step": 192
    },
    {
      "epoch": 0.1884765625,
      "grad_norm": 0.42307209968566895,
      "learning_rate": 0.000163297350343474,
      "loss": 0.8852982521057129,
      "step": 193
    },
    {
      "epoch": 0.189453125,
      "grad_norm": 0.47294488549232483,
      "learning_rate": 0.0001631010794896958,
      "loss": 1.0635476112365723,
      "step": 194
    },
    {
      "epoch": 0.1904296875,
      "grad_norm": 0.3519342243671417,
      "learning_rate": 0.00016290480863591757,
      "loss": 0.33460623025894165,
      "step": 195
    },
    {
      "epoch": 0.19140625,
      "grad_norm": 0.418151319026947,
      "learning_rate": 0.00016270853778213936,
      "loss": 0.8776851296424866,
      "step": 196
    },
    {
      "epoch": 0.1923828125,
      "grad_norm": 0.3954712152481079,
      "learning_rate": 0.00016251226692836113,
      "loss": 0.9358173608779907,
      "step": 197
    },
    {
      "epoch": 0.193359375,
      "grad_norm": 0.35646897554397583,
      "learning_rate": 0.00016231599607458293,
      "loss": 0.43795716762542725,
      "step": 198
    },
    {
      "epoch": 0.1943359375,
      "grad_norm": 0.41675063967704773,
      "learning_rate": 0.00016211972522080473,
      "loss": 0.8348654508590698,
      "step": 199
    },
    {
      "epoch": 0.1953125,
      "grad_norm": 0.5800544023513794,
      "learning_rate": 0.0001619234543670265,
      "loss": 0.5580507516860962,
      "step": 200
    },
    {
      "epoch": 0.1962890625,
      "grad_norm": 0.44925832748413086,
      "learning_rate": 0.0001617271835132483,
      "loss": 0.47444453835487366,
      "step": 201
    },
    {
      "epoch": 0.197265625,
      "grad_norm": 0.48447439074516296,
      "learning_rate": 0.00016153091265947006,
      "loss": 0.5927308797836304,
      "step": 202
    },
    {
      "epoch": 0.1982421875,
      "grad_norm": 0.37814846634864807,
      "learning_rate": 0.00016133464180569186,
      "loss": 0.8504298329353333,
      "step": 203
    },
    {
      "epoch": 0.19921875,
      "grad_norm": 0.4171026051044464,
      "learning_rate": 0.00016113837095191365,
      "loss": 1.0796414613723755,
      "step": 204
    },
    {
      "epoch": 0.2001953125,
      "grad_norm": 0.4570372402667999,
      "learning_rate": 0.00016094210009813545,
      "loss": 0.6229358315467834,
      "step": 205
    },
    {
      "epoch": 0.201171875,
      "grad_norm": 0.6294324994087219,
      "learning_rate": 0.00016074582924435722,
      "loss": 0.8749011158943176,
      "step": 206
    },
    {
      "epoch": 0.2021484375,
      "grad_norm": 0.42371129989624023,
      "learning_rate": 0.000160549558390579,
      "loss": 0.9866290092468262,
      "step": 207
    },
    {
      "epoch": 0.203125,
      "grad_norm": 0.5329370498657227,
      "learning_rate": 0.00016035328753680078,
      "loss": 0.7568405270576477,
      "step": 208
    },
    {
      "epoch": 0.2041015625,
      "grad_norm": 0.37205901741981506,
      "learning_rate": 0.00016015701668302258,
      "loss": 0.7115534543991089,
      "step": 209
    },
    {
      "epoch": 0.205078125,
      "grad_norm": 0.4536517262458801,
      "learning_rate": 0.00015996074582924437,
      "loss": 0.5152509808540344,
      "step": 210
    },
    {
      "epoch": 0.2060546875,
      "grad_norm": 2.319321393966675,
      "learning_rate": 0.00015976447497546617,
      "loss": 0.2915653586387634,
      "step": 211
    },
    {
      "epoch": 0.20703125,
      "grad_norm": 0.7047526836395264,
      "learning_rate": 0.00015956820412168794,
      "loss": 0.3070187568664551,
      "step": 212
    },
    {
      "epoch": 0.2080078125,
      "grad_norm": 0.6068500280380249,
      "learning_rate": 0.0001593719332679097,
      "loss": 0.8103427290916443,
      "step": 213
    },
    {
      "epoch": 0.208984375,
      "grad_norm": 0.3588794469833374,
      "learning_rate": 0.0001591756624141315,
      "loss": 0.4655485153198242,
      "step": 214
    },
    {
      "epoch": 0.2099609375,
      "grad_norm": 0.6561040878295898,
      "learning_rate": 0.0001589793915603533,
      "loss": 0.5353362560272217,
      "step": 215
    },
    {
      "epoch": 0.2109375,
      "grad_norm": 0.6485084891319275,
      "learning_rate": 0.0001587831207065751,
      "loss": 0.8601769804954529,
      "step": 216
    },
    {
      "epoch": 0.2119140625,
      "grad_norm": 0.4718208909034729,
      "learning_rate": 0.00015858684985279686,
      "loss": 0.6897189617156982,
      "step": 217
    },
    {
      "epoch": 0.212890625,
      "grad_norm": 0.7453560829162598,
      "learning_rate": 0.00015839057899901866,
      "loss": 1.0387171506881714,
      "step": 218
    },
    {
      "epoch": 0.2138671875,
      "grad_norm": 0.41157087683677673,
      "learning_rate": 0.00015819430814524043,
      "loss": 0.4910873770713806,
      "step": 219
    },
    {
      "epoch": 0.21484375,
      "grad_norm": 0.4198990762233734,
      "learning_rate": 0.00015799803729146222,
      "loss": 0.588080108165741,
      "step": 220
    },
    {
      "epoch": 0.2158203125,
      "grad_norm": 0.7791650295257568,
      "learning_rate": 0.00015780176643768402,
      "loss": 0.754984974861145,
      "step": 221
    },
    {
      "epoch": 0.216796875,
      "grad_norm": 1.4430909156799316,
      "learning_rate": 0.00015760549558390581,
      "loss": 0.5313946008682251,
      "step": 222
    },
    {
      "epoch": 0.2177734375,
      "grad_norm": 0.4399142861366272,
      "learning_rate": 0.00015740922473012758,
      "loss": 0.523280918598175,
      "step": 223
    },
    {
      "epoch": 0.21875,
      "grad_norm": 0.4177611470222473,
      "learning_rate": 0.00015721295387634938,
      "loss": 0.7598159313201904,
      "step": 224
    },
    {
      "epoch": 0.2197265625,
      "grad_norm": 0.4408816397190094,
      "learning_rate": 0.00015701668302257115,
      "loss": 0.8131666779518127,
      "step": 225
    },
    {
      "epoch": 0.220703125,
      "grad_norm": 0.4228694438934326,
      "learning_rate": 0.00015682041216879294,
      "loss": 1.0456180572509766,
      "step": 226
    },
    {
      "epoch": 0.2216796875,
      "grad_norm": 0.6313449144363403,
      "learning_rate": 0.00015662414131501474,
      "loss": 0.496864914894104,
      "step": 227
    },
    {
      "epoch": 0.22265625,
      "grad_norm": 0.48103493452072144,
      "learning_rate": 0.0001564278704612365,
      "loss": 0.5967347621917725,
      "step": 228
    },
    {
      "epoch": 0.2236328125,
      "grad_norm": 0.3548172116279602,
      "learning_rate": 0.0001562315996074583,
      "loss": 0.3325611650943756,
      "step": 229
    },
    {
      "epoch": 0.224609375,
      "grad_norm": 0.41543763875961304,
      "learning_rate": 0.00015603532875368007,
      "loss": 0.9223452806472778,
      "step": 230
    },
    {
      "epoch": 0.2255859375,
      "grad_norm": 0.6072061061859131,
      "learning_rate": 0.00015583905789990187,
      "loss": 0.2860236167907715,
      "step": 231
    },
    {
      "epoch": 0.2265625,
      "grad_norm": 0.3232869505882263,
      "learning_rate": 0.00015564278704612364,
      "loss": 0.7308738231658936,
      "step": 232
    },
    {
      "epoch": 0.2275390625,
      "grad_norm": 0.5271327495574951,
      "learning_rate": 0.00015544651619234546,
      "loss": 1.0354498624801636,
      "step": 233
    },
    {
      "epoch": 0.228515625,
      "grad_norm": 0.626105546951294,
      "learning_rate": 0.00015525024533856723,
      "loss": 1.0841856002807617,
      "step": 234
    },
    {
      "epoch": 0.2294921875,
      "grad_norm": 0.5628311634063721,
      "learning_rate": 0.00015505397448478903,
      "loss": 0.8868529200553894,
      "step": 235
    },
    {
      "epoch": 0.23046875,
      "grad_norm": 0.4290577471256256,
      "learning_rate": 0.0001548577036310108,
      "loss": 0.5887943506240845,
      "step": 236
    },
    {
      "epoch": 0.2314453125,
      "grad_norm": 0.743786096572876,
      "learning_rate": 0.0001546614327772326,
      "loss": 0.8314348459243774,
      "step": 237
    },
    {
      "epoch": 0.232421875,
      "grad_norm": 0.34498658776283264,
      "learning_rate": 0.00015446516192345436,
      "loss": 0.6171099543571472,
      "step": 238
    },
    {
      "epoch": 0.2333984375,
      "grad_norm": 0.7894997596740723,
      "learning_rate": 0.00015426889106967616,
      "loss": 0.614283561706543,
      "step": 239
    },
    {
      "epoch": 0.234375,
      "grad_norm": 0.4631381034851074,
      "learning_rate": 0.00015407262021589795,
      "loss": 0.6744101047515869,
      "step": 240
    },
    {
      "epoch": 0.2353515625,
      "grad_norm": 0.44523295760154724,
      "learning_rate": 0.00015387634936211975,
      "loss": 0.7094103097915649,
      "step": 241
    },
    {
      "epoch": 0.236328125,
      "grad_norm": 0.7059242725372314,
      "learning_rate": 0.00015368007850834152,
      "loss": 0.6856737732887268,
      "step": 242
    },
    {
      "epoch": 0.2373046875,
      "grad_norm": 1.0360506772994995,
      "learning_rate": 0.00015348380765456329,
      "loss": 1.101341962814331,
      "step": 243
    },
    {
      "epoch": 0.23828125,
      "grad_norm": 0.6630859375,
      "learning_rate": 0.00015328753680078508,
      "loss": 0.8815068006515503,
      "step": 244
    },
    {
      "epoch": 0.2392578125,
      "grad_norm": 0.4162105321884155,
      "learning_rate": 0.00015309126594700688,
      "loss": 0.39801689982414246,
      "step": 245
    },
    {
      "epoch": 0.240234375,
      "grad_norm": 0.5786510109901428,
      "learning_rate": 0.00015289499509322867,
      "loss": 0.5399383902549744,
      "step": 246
    },
    {
      "epoch": 0.2412109375,
      "grad_norm": 0.5430185794830322,
      "learning_rate": 0.00015269872423945044,
      "loss": 0.5432325601577759,
      "step": 247
    },
    {
      "epoch": 0.2421875,
      "grad_norm": 0.3750382959842682,
      "learning_rate": 0.00015250245338567224,
      "loss": 0.49265092611312866,
      "step": 248
    },
    {
      "epoch": 0.2431640625,
      "grad_norm": 0.5081580877304077,
      "learning_rate": 0.000152306182531894,
      "loss": 0.8720104098320007,
      "step": 249
    },
    {
      "epoch": 0.244140625,
      "grad_norm": 0.5619673728942871,
      "learning_rate": 0.0001521099116781158,
      "loss": 0.4022529125213623,
      "step": 250
    },
    {
      "epoch": 0.2451171875,
      "grad_norm": 0.3996225893497467,
      "learning_rate": 0.0001519136408243376,
      "loss": 0.443879097700119,
      "step": 251
    },
    {
      "epoch": 0.24609375,
      "grad_norm": 0.4688915014266968,
      "learning_rate": 0.0001517173699705594,
      "loss": 0.47562721371650696,
      "step": 252
    },
    {
      "epoch": 0.2470703125,
      "grad_norm": 1.7595641613006592,
      "learning_rate": 0.00015152109911678116,
      "loss": 0.5174474716186523,
      "step": 253
    },
    {
      "epoch": 0.248046875,
      "grad_norm": 0.47813650965690613,
      "learning_rate": 0.00015132482826300296,
      "loss": 0.8565359711647034,
      "step": 254
    },
    {
      "epoch": 0.2490234375,
      "grad_norm": 0.49612802267074585,
      "learning_rate": 0.00015112855740922473,
      "loss": 0.4736977815628052,
      "step": 255
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.4370449483394623,
      "learning_rate": 0.00015093228655544652,
      "loss": 0.7566809058189392,
      "step": 256
    },
    {
      "epoch": 0.2509765625,
      "grad_norm": 0.43916988372802734,
      "learning_rate": 0.00015073601570166832,
      "loss": 0.8396226763725281,
      "step": 257
    },
    {
      "epoch": 0.251953125,
      "grad_norm": 0.7745673060417175,
      "learning_rate": 0.0001505397448478901,
      "loss": 0.3085971772670746,
      "step": 258
    },
    {
      "epoch": 0.2529296875,
      "grad_norm": 0.4097643792629242,
      "learning_rate": 0.00015034347399411188,
      "loss": 0.2730502188205719,
      "step": 259
    },
    {
      "epoch": 0.25390625,
      "grad_norm": 0.4131183624267578,
      "learning_rate": 0.00015014720314033365,
      "loss": 0.5422588586807251,
      "step": 260
    },
    {
      "epoch": 0.2548828125,
      "grad_norm": 0.469498872756958,
      "learning_rate": 0.00014995093228655545,
      "loss": 0.6572885513305664,
      "step": 261
    },
    {
      "epoch": 0.255859375,
      "grad_norm": 0.3662133514881134,
      "learning_rate": 0.00014975466143277725,
      "loss": 0.9272421598434448,
      "step": 262
    },
    {
      "epoch": 0.2568359375,
      "grad_norm": 0.38194844126701355,
      "learning_rate": 0.00014955839057899904,
      "loss": 0.6010634303092957,
      "step": 263
    },
    {
      "epoch": 0.2578125,
      "grad_norm": 0.3645467758178711,
      "learning_rate": 0.0001493621197252208,
      "loss": 0.9131143093109131,
      "step": 264
    },
    {
      "epoch": 0.2587890625,
      "grad_norm": 0.3304290771484375,
      "learning_rate": 0.0001491658488714426,
      "loss": 0.4593530297279358,
      "step": 265
    },
    {
      "epoch": 0.259765625,
      "grad_norm": 0.7529020309448242,
      "learning_rate": 0.00014896957801766437,
      "loss": 0.5219628810882568,
      "step": 266
    },
    {
      "epoch": 0.2607421875,
      "grad_norm": 0.4974548816680908,
      "learning_rate": 0.00014877330716388617,
      "loss": 0.7617945075035095,
      "step": 267
    },
    {
      "epoch": 0.26171875,
      "grad_norm": 0.28884655237197876,
      "learning_rate": 0.00014857703631010797,
      "loss": 0.4288986921310425,
      "step": 268
    },
    {
      "epoch": 0.2626953125,
      "grad_norm": 0.5195730328559875,
      "learning_rate": 0.00014838076545632976,
      "loss": 0.830593466758728,
      "step": 269
    },
    {
      "epoch": 0.263671875,
      "grad_norm": 0.40689924359321594,
      "learning_rate": 0.00014818449460255153,
      "loss": 0.7528857588768005,
      "step": 270
    },
    {
      "epoch": 0.2646484375,
      "grad_norm": 0.33955928683280945,
      "learning_rate": 0.00014798822374877333,
      "loss": 0.5274187326431274,
      "step": 271
    },
    {
      "epoch": 0.265625,
      "grad_norm": 1.0572726726531982,
      "learning_rate": 0.0001477919528949951,
      "loss": 0.7389089465141296,
      "step": 272
    },
    {
      "epoch": 0.2666015625,
      "grad_norm": 0.5191348791122437,
      "learning_rate": 0.00014759568204121686,
      "loss": 0.4842514991760254,
      "step": 273
    },
    {
      "epoch": 0.267578125,
      "grad_norm": 0.3779315650463104,
      "learning_rate": 0.00014739941118743866,
      "loss": 0.7406666278839111,
      "step": 274
    },
    {
      "epoch": 0.2685546875,
      "grad_norm": 0.6065999865531921,
      "learning_rate": 0.00014720314033366046,
      "loss": 0.6771246790885925,
      "step": 275
    },
    {
      "epoch": 0.26953125,
      "grad_norm": 0.537529468536377,
      "learning_rate": 0.00014700686947988225,
      "loss": 0.861257791519165,
      "step": 276
    },
    {
      "epoch": 0.2705078125,
      "grad_norm": 0.3961732089519501,
      "learning_rate": 0.00014681059862610402,
      "loss": 0.9672999382019043,
      "step": 277
    },
    {
      "epoch": 0.271484375,
      "grad_norm": 0.45974740386009216,
      "learning_rate": 0.00014661432777232582,
      "loss": 0.5789016485214233,
      "step": 278
    },
    {
      "epoch": 0.2724609375,
      "grad_norm": 0.7211292386054993,
      "learning_rate": 0.00014641805691854759,
      "loss": 0.867314338684082,
      "step": 279
    },
    {
      "epoch": 0.2734375,
      "grad_norm": 0.6938930749893188,
      "learning_rate": 0.00014622178606476938,
      "loss": 0.4570122957229614,
      "step": 280
    },
    {
      "epoch": 0.2744140625,
      "grad_norm": 0.5093329548835754,
      "learning_rate": 0.00014602551521099118,
      "loss": 0.9487482309341431,
      "step": 281
    },
    {
      "epoch": 0.275390625,
      "grad_norm": 0.4403358995914459,
      "learning_rate": 0.00014582924435721297,
      "loss": 0.5330759286880493,
      "step": 282
    },
    {
      "epoch": 0.2763671875,
      "grad_norm": 0.5305198431015015,
      "learning_rate": 0.00014563297350343474,
      "loss": 0.8727459907531738,
      "step": 283
    },
    {
      "epoch": 0.27734375,
      "grad_norm": 0.49577099084854126,
      "learning_rate": 0.00014543670264965654,
      "loss": 0.6166709065437317,
      "step": 284
    },
    {
      "epoch": 0.2783203125,
      "grad_norm": 0.4856763780117035,
      "learning_rate": 0.0001452404317958783,
      "loss": 0.920722484588623,
      "step": 285
    },
    {
      "epoch": 0.279296875,
      "grad_norm": 0.3397112786769867,
      "learning_rate": 0.0001450441609421001,
      "loss": 1.001542329788208,
      "step": 286
    },
    {
      "epoch": 0.2802734375,
      "grad_norm": 0.591691792011261,
      "learning_rate": 0.0001448478900883219,
      "loss": 0.4898494780063629,
      "step": 287
    },
    {
      "epoch": 0.28125,
      "grad_norm": 0.45293164253234863,
      "learning_rate": 0.00014465161923454367,
      "loss": 0.4958389401435852,
      "step": 288
    },
    {
      "epoch": 0.2822265625,
      "grad_norm": 0.38414305448532104,
      "learning_rate": 0.00014445534838076546,
      "loss": 0.3971215784549713,
      "step": 289
    },
    {
      "epoch": 0.283203125,
      "grad_norm": 0.5568608045578003,
      "learning_rate": 0.00014425907752698723,
      "loss": 0.7953230142593384,
      "step": 290
    },
    {
      "epoch": 0.2841796875,
      "grad_norm": 0.3680984377861023,
      "learning_rate": 0.00014406280667320903,
      "loss": 0.703729510307312,
      "step": 291
    },
    {
      "epoch": 0.28515625,
      "grad_norm": 0.4263870120048523,
      "learning_rate": 0.00014386653581943082,
      "loss": 0.7433100938796997,
      "step": 292
    },
    {
      "epoch": 0.2861328125,
      "grad_norm": 1.3262213468551636,
      "learning_rate": 0.00014367026496565262,
      "loss": 0.8011248111724854,
      "step": 293
    },
    {
      "epoch": 0.287109375,
      "grad_norm": 0.44766104221343994,
      "learning_rate": 0.0001434739941118744,
      "loss": 0.6682827472686768,
      "step": 294
    },
    {
      "epoch": 0.2880859375,
      "grad_norm": 0.7399169206619263,
      "learning_rate": 0.00014327772325809619,
      "loss": 0.8356127142906189,
      "step": 295
    },
    {
      "epoch": 0.2890625,
      "grad_norm": 0.3582242727279663,
      "learning_rate": 0.00014308145240431795,
      "loss": 0.7127545475959778,
      "step": 296
    },
    {
      "epoch": 0.2900390625,
      "grad_norm": 0.5251145958900452,
      "learning_rate": 0.00014288518155053975,
      "loss": 0.7467620968818665,
      "step": 297
    },
    {
      "epoch": 0.291015625,
      "grad_norm": 0.639377772808075,
      "learning_rate": 0.00014268891069676155,
      "loss": 0.434887170791626,
      "step": 298
    },
    {
      "epoch": 0.2919921875,
      "grad_norm": 0.5007404685020447,
      "learning_rate": 0.00014249263984298334,
      "loss": 1.028229832649231,
      "step": 299
    },
    {
      "epoch": 0.29296875,
      "grad_norm": 0.41101035475730896,
      "learning_rate": 0.0001422963689892051,
      "loss": 0.8766242265701294,
      "step": 300
    },
    {
      "epoch": 0.2939453125,
      "grad_norm": 0.3938690721988678,
      "learning_rate": 0.0001421000981354269,
      "loss": 0.7176960706710815,
      "step": 301
    },
    {
      "epoch": 0.294921875,
      "grad_norm": 0.5939344763755798,
      "learning_rate": 0.00014190382728164868,
      "loss": 0.6655953526496887,
      "step": 302
    },
    {
      "epoch": 0.2958984375,
      "grad_norm": 0.47224998474121094,
      "learning_rate": 0.00014170755642787047,
      "loss": 0.9155608415603638,
      "step": 303
    },
    {
      "epoch": 0.296875,
      "grad_norm": 0.41344454884529114,
      "learning_rate": 0.00014151128557409227,
      "loss": 0.6017557382583618,
      "step": 304
    },
    {
      "epoch": 0.2978515625,
      "grad_norm": 0.514320969581604,
      "learning_rate": 0.00014131501472031404,
      "loss": 0.6184566617012024,
      "step": 305
    },
    {
      "epoch": 0.298828125,
      "grad_norm": 0.5005887150764465,
      "learning_rate": 0.00014111874386653583,
      "loss": 0.6652892231941223,
      "step": 306
    },
    {
      "epoch": 0.2998046875,
      "grad_norm": 0.5872619152069092,
      "learning_rate": 0.0001409224730127576,
      "loss": 0.8618959784507751,
      "step": 307
    },
    {
      "epoch": 0.30078125,
      "grad_norm": 0.5114542245864868,
      "learning_rate": 0.0001407262021589794,
      "loss": 0.6637990474700928,
      "step": 308
    },
    {
      "epoch": 0.3017578125,
      "grad_norm": 1.141750693321228,
      "learning_rate": 0.00014052993130520117,
      "loss": 0.6234999299049377,
      "step": 309
    },
    {
      "epoch": 0.302734375,
      "grad_norm": 0.4786873459815979,
      "learning_rate": 0.000140333660451423,
      "loss": 0.9601540565490723,
      "step": 310
    },
    {
      "epoch": 0.3037109375,
      "grad_norm": 0.6048462390899658,
      "learning_rate": 0.00014013738959764476,
      "loss": 0.5895652770996094,
      "step": 311
    },
    {
      "epoch": 0.3046875,
      "grad_norm": 0.7435188889503479,
      "learning_rate": 0.00013994111874386655,
      "loss": 1.196149468421936,
      "step": 312
    },
    {
      "epoch": 0.3056640625,
      "grad_norm": 0.7936303019523621,
      "learning_rate": 0.00013974484789008832,
      "loss": 0.6073983907699585,
      "step": 313
    },
    {
      "epoch": 0.306640625,
      "grad_norm": 0.5199156403541565,
      "learning_rate": 0.00013954857703631012,
      "loss": 0.2734944224357605,
      "step": 314
    },
    {
      "epoch": 0.3076171875,
      "grad_norm": 0.38845276832580566,
      "learning_rate": 0.0001393523061825319,
      "loss": 0.604506254196167,
      "step": 315
    },
    {
      "epoch": 0.30859375,
      "grad_norm": 0.6925122737884521,
      "learning_rate": 0.0001391560353287537,
      "loss": 1.0446012020111084,
      "step": 316
    },
    {
      "epoch": 0.3095703125,
      "grad_norm": 0.4950433075428009,
      "learning_rate": 0.00013895976447497548,
      "loss": 1.027349591255188,
      "step": 317
    },
    {
      "epoch": 0.310546875,
      "grad_norm": 0.36179935932159424,
      "learning_rate": 0.00013876349362119725,
      "loss": 0.6760075688362122,
      "step": 318
    },
    {
      "epoch": 0.3115234375,
      "grad_norm": 0.3730153739452362,
      "learning_rate": 0.00013856722276741904,
      "loss": 0.47779884934425354,
      "step": 319
    },
    {
      "epoch": 0.3125,
      "grad_norm": 0.6181739568710327,
      "learning_rate": 0.0001383709519136408,
      "loss": 0.4747524857521057,
      "step": 320
    },
    {
      "epoch": 0.3134765625,
      "grad_norm": 0.8233240246772766,
      "learning_rate": 0.0001381746810598626,
      "loss": 0.490276575088501,
      "step": 321
    },
    {
      "epoch": 0.314453125,
      "grad_norm": 0.6492604613304138,
      "learning_rate": 0.0001379784102060844,
      "loss": 0.44847172498703003,
      "step": 322
    },
    {
      "epoch": 0.3154296875,
      "grad_norm": 0.5506369471549988,
      "learning_rate": 0.0001377821393523062,
      "loss": 0.47955968976020813,
      "step": 323
    },
    {
      "epoch": 0.31640625,
      "grad_norm": 0.4187554717063904,
      "learning_rate": 0.00013758586849852797,
      "loss": 0.6466250419616699,
      "step": 324
    },
    {
      "epoch": 0.3173828125,
      "grad_norm": 0.3976380527019501,
      "learning_rate": 0.00013738959764474977,
      "loss": 0.756473183631897,
      "step": 325
    },
    {
      "epoch": 0.318359375,
      "grad_norm": 0.6089552044868469,
      "learning_rate": 0.00013719332679097153,
      "loss": 0.9309840202331543,
      "step": 326
    },
    {
      "epoch": 0.3193359375,
      "grad_norm": 0.31628501415252686,
      "learning_rate": 0.00013699705593719333,
      "loss": 0.7739764451980591,
      "step": 327
    },
    {
      "epoch": 0.3203125,
      "grad_norm": 0.6984357237815857,
      "learning_rate": 0.00013680078508341513,
      "loss": 1.0047030448913574,
      "step": 328
    },
    {
      "epoch": 0.3212890625,
      "grad_norm": 0.42705219984054565,
      "learning_rate": 0.00013660451422963692,
      "loss": 0.5215034484863281,
      "step": 329
    },
    {
      "epoch": 0.322265625,
      "grad_norm": 0.3548984229564667,
      "learning_rate": 0.0001364082433758587,
      "loss": 0.777184009552002,
      "step": 330
    },
    {
      "epoch": 0.3232421875,
      "grad_norm": 0.6042805314064026,
      "learning_rate": 0.00013621197252208046,
      "loss": 0.469806432723999,
      "step": 331
    },
    {
      "epoch": 0.32421875,
      "grad_norm": 0.43482446670532227,
      "learning_rate": 0.00013601570166830226,
      "loss": 0.8123322129249573,
      "step": 332
    },
    {
      "epoch": 0.3251953125,
      "grad_norm": 0.4851783812046051,
      "learning_rate": 0.00013581943081452405,
      "loss": 1.1560527086257935,
      "step": 333
    },
    {
      "epoch": 0.326171875,
      "grad_norm": 0.681423008441925,
      "learning_rate": 0.00013562315996074585,
      "loss": 0.5681013464927673,
      "step": 334
    },
    {
      "epoch": 0.3271484375,
      "grad_norm": 0.43838411569595337,
      "learning_rate": 0.00013542688910696762,
      "loss": 0.8758999109268188,
      "step": 335
    },
    {
      "epoch": 0.328125,
      "grad_norm": 0.5508302450180054,
      "learning_rate": 0.0001352306182531894,
      "loss": 0.7725740671157837,
      "step": 336
    },
    {
      "epoch": 0.3291015625,
      "grad_norm": 0.2603519856929779,
      "learning_rate": 0.00013503434739941118,
      "loss": 0.357033908367157,
      "step": 337
    },
    {
      "epoch": 0.330078125,
      "grad_norm": 0.38098394870758057,
      "learning_rate": 0.00013483807654563298,
      "loss": 0.41752922534942627,
      "step": 338
    },
    {
      "epoch": 0.3310546875,
      "grad_norm": 0.5308575630187988,
      "learning_rate": 0.00013464180569185477,
      "loss": 0.6187021732330322,
      "step": 339
    },
    {
      "epoch": 0.33203125,
      "grad_norm": 0.4033392369747162,
      "learning_rate": 0.00013444553483807657,
      "loss": 0.9481551647186279,
      "step": 340
    },
    {
      "epoch": 0.3330078125,
      "grad_norm": 0.3999135494232178,
      "learning_rate": 0.00013424926398429834,
      "loss": 0.6853100657463074,
      "step": 341
    },
    {
      "epoch": 0.333984375,
      "grad_norm": 0.4521353840827942,
      "learning_rate": 0.00013405299313052013,
      "loss": 1.0335659980773926,
      "step": 342
    },
    {
      "epoch": 0.3349609375,
      "grad_norm": 0.3538281321525574,
      "learning_rate": 0.0001338567222767419,
      "loss": 0.821506142616272,
      "step": 343
    },
    {
      "epoch": 0.3359375,
      "grad_norm": 0.49575889110565186,
      "learning_rate": 0.0001336604514229637,
      "loss": 0.6124354004859924,
      "step": 344
    },
    {
      "epoch": 0.3369140625,
      "grad_norm": 0.37985700368881226,
      "learning_rate": 0.0001334641805691855,
      "loss": 0.6803320646286011,
      "step": 345
    },
    {
      "epoch": 0.337890625,
      "grad_norm": 0.3533600866794586,
      "learning_rate": 0.00013326790971540726,
      "loss": 0.7260403037071228,
      "step": 346
    },
    {
      "epoch": 0.3388671875,
      "grad_norm": 0.49213504791259766,
      "learning_rate": 0.00013307163886162906,
      "loss": 0.9051091074943542,
      "step": 347
    },
    {
      "epoch": 0.33984375,
      "grad_norm": 0.37704166769981384,
      "learning_rate": 0.00013287536800785083,
      "loss": 0.4471222460269928,
      "step": 348
    },
    {
      "epoch": 0.3408203125,
      "grad_norm": 0.4309573471546173,
      "learning_rate": 0.00013267909715407262,
      "loss": 0.749025285243988,
      "step": 349
    },
    {
      "epoch": 0.341796875,
      "grad_norm": 0.7491689920425415,
      "learning_rate": 0.0001324828263002944,
      "loss": 1.1318167448043823,
      "step": 350
    },
    {
      "epoch": 0.3427734375,
      "grad_norm": 0.3965498208999634,
      "learning_rate": 0.00013228655544651622,
      "loss": 0.8451839685440063,
      "step": 351
    },
    {
      "epoch": 0.34375,
      "grad_norm": 0.4446418285369873,
      "learning_rate": 0.00013209028459273798,
      "loss": 0.7875360250473022,
      "step": 352
    },
    {
      "epoch": 0.3447265625,
      "grad_norm": 0.3396705985069275,
      "learning_rate": 0.00013189401373895978,
      "loss": 0.8446518182754517,
      "step": 353
    },
    {
      "epoch": 0.345703125,
      "grad_norm": 0.3436250388622284,
      "learning_rate": 0.00013169774288518155,
      "loss": 0.8995112180709839,
      "step": 354
    },
    {
      "epoch": 0.3466796875,
      "grad_norm": 0.33643823862075806,
      "learning_rate": 0.00013150147203140334,
      "loss": 0.6253601312637329,
      "step": 355
    },
    {
      "epoch": 0.34765625,
      "grad_norm": 0.39978718757629395,
      "learning_rate": 0.0001313052011776251,
      "loss": 0.31882500648498535,
      "step": 356
    },
    {
      "epoch": 0.3486328125,
      "grad_norm": 0.3054925799369812,
      "learning_rate": 0.00013110893032384694,
      "loss": 0.3698769807815552,
      "step": 357
    },
    {
      "epoch": 0.349609375,
      "grad_norm": 0.3789948523044586,
      "learning_rate": 0.0001309126594700687,
      "loss": 0.9039162397384644,
      "step": 358
    },
    {
      "epoch": 0.3505859375,
      "grad_norm": 0.4192582964897156,
      "learning_rate": 0.0001307163886162905,
      "loss": 0.7852678298950195,
      "step": 359
    },
    {
      "epoch": 0.3515625,
      "grad_norm": 0.5130710601806641,
      "learning_rate": 0.00013052011776251227,
      "loss": 0.7745686769485474,
      "step": 360
    },
    {
      "epoch": 0.3525390625,
      "grad_norm": 0.39334234595298767,
      "learning_rate": 0.00013032384690873404,
      "loss": 0.7644802331924438,
      "step": 361
    },
    {
      "epoch": 0.353515625,
      "grad_norm": 0.6141180992126465,
      "learning_rate": 0.00013012757605495583,
      "loss": 0.6028044819831848,
      "step": 362
    },
    {
      "epoch": 0.3544921875,
      "grad_norm": 0.33263200521469116,
      "learning_rate": 0.00012993130520117763,
      "loss": 0.6908546090126038,
      "step": 363
    },
    {
      "epoch": 0.35546875,
      "grad_norm": 0.3901807367801666,
      "learning_rate": 0.00012973503434739943,
      "loss": 0.8896909952163696,
      "step": 364
    },
    {
      "epoch": 0.3564453125,
      "grad_norm": 0.3889808654785156,
      "learning_rate": 0.0001295387634936212,
      "loss": 0.622492790222168,
      "step": 365
    },
    {
      "epoch": 0.357421875,
      "grad_norm": 0.41004979610443115,
      "learning_rate": 0.000129342492639843,
      "loss": 0.6293104887008667,
      "step": 366
    },
    {
      "epoch": 0.3583984375,
      "grad_norm": 0.32929369807243347,
      "learning_rate": 0.00012914622178606476,
      "loss": 0.7049382925033569,
      "step": 367
    },
    {
      "epoch": 0.359375,
      "grad_norm": 0.5189999341964722,
      "learning_rate": 0.00012894995093228656,
      "loss": 0.9230547547340393,
      "step": 368
    },
    {
      "epoch": 0.3603515625,
      "grad_norm": 0.290991872549057,
      "learning_rate": 0.00012875368007850835,
      "loss": 0.5716772079467773,
      "step": 369
    },
    {
      "epoch": 0.361328125,
      "grad_norm": 0.3976893126964569,
      "learning_rate": 0.00012855740922473015,
      "loss": 0.4593455493450165,
      "step": 370
    },
    {
      "epoch": 0.3623046875,
      "grad_norm": 0.38385459780693054,
      "learning_rate": 0.00012836113837095192,
      "loss": 0.4766542315483093,
      "step": 371
    },
    {
      "epoch": 0.36328125,
      "grad_norm": 0.45652449131011963,
      "learning_rate": 0.0001281648675171737,
      "loss": 0.9292062520980835,
      "step": 372
    },
    {
      "epoch": 0.3642578125,
      "grad_norm": 0.384463906288147,
      "learning_rate": 0.00012796859666339548,
      "loss": 0.7896109223365784,
      "step": 373
    },
    {
      "epoch": 0.365234375,
      "grad_norm": 0.43412724137306213,
      "learning_rate": 0.00012777232580961728,
      "loss": 0.6185650825500488,
      "step": 374
    },
    {
      "epoch": 0.3662109375,
      "grad_norm": 0.4574507772922516,
      "learning_rate": 0.00012757605495583907,
      "loss": 0.5614027380943298,
      "step": 375
    },
    {
      "epoch": 0.3671875,
      "grad_norm": 0.2921536862850189,
      "learning_rate": 0.00012737978410206084,
      "loss": 0.26786333322525024,
      "step": 376
    },
    {
      "epoch": 0.3681640625,
      "grad_norm": 0.5887529850006104,
      "learning_rate": 0.00012718351324828264,
      "loss": 0.4167410433292389,
      "step": 377
    },
    {
      "epoch": 0.369140625,
      "grad_norm": 0.3651127815246582,
      "learning_rate": 0.0001269872423945044,
      "loss": 1.0140016078948975,
      "step": 378
    },
    {
      "epoch": 0.3701171875,
      "grad_norm": 0.47206228971481323,
      "learning_rate": 0.0001267909715407262,
      "loss": 0.8293377757072449,
      "step": 379
    },
    {
      "epoch": 0.37109375,
      "grad_norm": 0.6319689154624939,
      "learning_rate": 0.000126594700686948,
      "loss": 0.7301446795463562,
      "step": 380
    },
    {
      "epoch": 0.3720703125,
      "grad_norm": 0.5163951516151428,
      "learning_rate": 0.0001263984298331698,
      "loss": 0.9944421648979187,
      "step": 381
    },
    {
      "epoch": 0.373046875,
      "grad_norm": 0.519072949886322,
      "learning_rate": 0.00012620215897939156,
      "loss": 0.6176541447639465,
      "step": 382
    },
    {
      "epoch": 0.3740234375,
      "grad_norm": 3.0750813484191895,
      "learning_rate": 0.00012600588812561336,
      "loss": 0.7531320452690125,
      "step": 383
    },
    {
      "epoch": 0.375,
      "grad_norm": 0.3246331512928009,
      "learning_rate": 0.00012580961727183513,
      "loss": 0.3269459903240204,
      "step": 384
    },
    {
      "epoch": 0.3759765625,
      "grad_norm": 1.1105197668075562,
      "learning_rate": 0.00012561334641805692,
      "loss": 0.4228656589984894,
      "step": 385
    },
    {
      "epoch": 0.376953125,
      "grad_norm": 0.6776182055473328,
      "learning_rate": 0.00012541707556427872,
      "loss": 0.791953980922699,
      "step": 386
    },
    {
      "epoch": 0.3779296875,
      "grad_norm": 0.4413786828517914,
      "learning_rate": 0.00012522080471050052,
      "loss": 0.7953442335128784,
      "step": 387
    },
    {
      "epoch": 0.37890625,
      "grad_norm": 0.4036264419555664,
      "learning_rate": 0.00012502453385672228,
      "loss": 0.6062744855880737,
      "step": 388
    },
    {
      "epoch": 0.3798828125,
      "grad_norm": 1.0638166666030884,
      "learning_rate": 0.00012482826300294408,
      "loss": 1.0578093528747559,
      "step": 389
    },
    {
      "epoch": 0.380859375,
      "grad_norm": 0.2518276572227478,
      "learning_rate": 0.00012463199214916585,
      "loss": 0.5070685148239136,
      "step": 390
    },
    {
      "epoch": 0.3818359375,
      "grad_norm": 0.3338214159011841,
      "learning_rate": 0.00012443572129538762,
      "loss": 0.7665579915046692,
      "step": 391
    },
    {
      "epoch": 0.3828125,
      "grad_norm": 0.4730507433414459,
      "learning_rate": 0.00012423945044160944,
      "loss": 0.48353517055511475,
      "step": 392
    },
    {
      "epoch": 0.3837890625,
      "grad_norm": 0.3488924503326416,
      "learning_rate": 0.0001240431795878312,
      "loss": 0.4422420561313629,
      "step": 393
    },
    {
      "epoch": 0.384765625,
      "grad_norm": 0.2397361695766449,
      "learning_rate": 0.000123846908734053,
      "loss": 0.7025644183158875,
      "step": 394
    },
    {
      "epoch": 0.3857421875,
      "grad_norm": 0.3638167679309845,
      "learning_rate": 0.00012365063788027478,
      "loss": 0.5372107625007629,
      "step": 395
    },
    {
      "epoch": 0.38671875,
      "grad_norm": 0.4088346064090729,
      "learning_rate": 0.00012345436702649657,
      "loss": 0.7636011838912964,
      "step": 396
    },
    {
      "epoch": 0.3876953125,
      "grad_norm": 0.36985111236572266,
      "learning_rate": 0.00012325809617271834,
      "loss": 0.6720612645149231,
      "step": 397
    },
    {
      "epoch": 0.388671875,
      "grad_norm": 0.37556055188179016,
      "learning_rate": 0.00012306182531894016,
      "loss": 0.8087592124938965,
      "step": 398
    },
    {
      "epoch": 0.3896484375,
      "grad_norm": 0.6851724982261658,
      "learning_rate": 0.00012286555446516193,
      "loss": 0.780835747718811,
      "step": 399
    },
    {
      "epoch": 0.390625,
      "grad_norm": 0.3453989326953888,
      "learning_rate": 0.00012266928361138373,
      "loss": 0.8235517740249634,
      "step": 400
    },
    {
      "epoch": 0.3916015625,
      "grad_norm": 0.43622198700904846,
      "learning_rate": 0.0001224730127576055,
      "loss": 0.3758167028427124,
      "step": 401
    },
    {
      "epoch": 0.392578125,
      "grad_norm": 0.4364018142223358,
      "learning_rate": 0.0001222767419038273,
      "loss": 0.7123017907142639,
      "step": 402
    },
    {
      "epoch": 0.3935546875,
      "grad_norm": 0.24169716238975525,
      "learning_rate": 0.00012208047105004906,
      "loss": 0.48390328884124756,
      "step": 403
    },
    {
      "epoch": 0.39453125,
      "grad_norm": 3.4902851581573486,
      "learning_rate": 0.00012188420019627087,
      "loss": 0.8519951105117798,
      "step": 404
    },
    {
      "epoch": 0.3955078125,
      "grad_norm": 0.8332751989364624,
      "learning_rate": 0.00012168792934249264,
      "loss": 0.7562370896339417,
      "step": 405
    },
    {
      "epoch": 0.396484375,
      "grad_norm": 0.3582589030265808,
      "learning_rate": 0.00012149165848871442,
      "loss": 0.3723471164703369,
      "step": 406
    },
    {
      "epoch": 0.3974609375,
      "grad_norm": 0.48302146792411804,
      "learning_rate": 0.00012129538763493622,
      "loss": 1.0008171796798706,
      "step": 407
    },
    {
      "epoch": 0.3984375,
      "grad_norm": 0.3510138988494873,
      "learning_rate": 0.000121099116781158,
      "loss": 0.30772703886032104,
      "step": 408
    },
    {
      "epoch": 0.3994140625,
      "grad_norm": 0.2771015763282776,
      "learning_rate": 0.0001209028459273798,
      "loss": 0.4403090178966522,
      "step": 409
    },
    {
      "epoch": 0.400390625,
      "grad_norm": 0.42239415645599365,
      "learning_rate": 0.00012070657507360156,
      "loss": 0.5451241731643677,
      "step": 410
    },
    {
      "epoch": 0.4013671875,
      "grad_norm": 0.27876874804496765,
      "learning_rate": 0.00012051030421982336,
      "loss": 0.3590753972530365,
      "step": 411
    },
    {
      "epoch": 0.40234375,
      "grad_norm": 0.42854824662208557,
      "learning_rate": 0.00012031403336604514,
      "loss": 1.0192680358886719,
      "step": 412
    },
    {
      "epoch": 0.4033203125,
      "grad_norm": 0.32980695366859436,
      "learning_rate": 0.00012011776251226694,
      "loss": 0.6476566195487976,
      "step": 413
    },
    {
      "epoch": 0.404296875,
      "grad_norm": 0.45046037435531616,
      "learning_rate": 0.00011992149165848872,
      "loss": 0.9548048973083496,
      "step": 414
    },
    {
      "epoch": 0.4052734375,
      "grad_norm": 0.4176082909107208,
      "learning_rate": 0.00011972522080471052,
      "loss": 0.3793225586414337,
      "step": 415
    },
    {
      "epoch": 0.40625,
      "grad_norm": 0.335823118686676,
      "learning_rate": 0.00011952894995093229,
      "loss": 0.5807560086250305,
      "step": 416
    },
    {
      "epoch": 0.4072265625,
      "grad_norm": 0.4758591651916504,
      "learning_rate": 0.00011933267909715408,
      "loss": 0.3924551010131836,
      "step": 417
    },
    {
      "epoch": 0.408203125,
      "grad_norm": 0.21527709066867828,
      "learning_rate": 0.00011913640824337586,
      "loss": 0.1651245653629303,
      "step": 418
    },
    {
      "epoch": 0.4091796875,
      "grad_norm": 0.31255391240119934,
      "learning_rate": 0.00011894013738959766,
      "loss": 0.6133516430854797,
      "step": 419
    },
    {
      "epoch": 0.41015625,
      "grad_norm": 0.40668365359306335,
      "learning_rate": 0.00011874386653581944,
      "loss": 0.894720196723938,
      "step": 420
    },
    {
      "epoch": 0.4111328125,
      "grad_norm": 0.35574087500572205,
      "learning_rate": 0.00011854759568204121,
      "loss": 0.9017484188079834,
      "step": 421
    },
    {
      "epoch": 0.412109375,
      "grad_norm": 0.3389612138271332,
      "learning_rate": 0.00011835132482826301,
      "loss": 0.7961660623550415,
      "step": 422
    },
    {
      "epoch": 0.4130859375,
      "grad_norm": 0.8334202766418457,
      "learning_rate": 0.00011815505397448479,
      "loss": 0.8654063940048218,
      "step": 423
    },
    {
      "epoch": 0.4140625,
      "grad_norm": 0.5917571187019348,
      "learning_rate": 0.00011795878312070659,
      "loss": 0.631730318069458,
      "step": 424
    },
    {
      "epoch": 0.4150390625,
      "grad_norm": 0.4908443093299866,
      "learning_rate": 0.00011776251226692835,
      "loss": 0.3205869495868683,
      "step": 425
    },
    {
      "epoch": 0.416015625,
      "grad_norm": 0.8349789381027222,
      "learning_rate": 0.00011756624141315016,
      "loss": 0.8526176810264587,
      "step": 426
    },
    {
      "epoch": 0.4169921875,
      "grad_norm": 0.38712671399116516,
      "learning_rate": 0.00011736997055937193,
      "loss": 0.6580482125282288,
      "step": 427
    },
    {
      "epoch": 0.41796875,
      "grad_norm": 0.766034722328186,
      "learning_rate": 0.00011717369970559373,
      "loss": 0.5494309663772583,
      "step": 428
    },
    {
      "epoch": 0.4189453125,
      "grad_norm": 0.33322349190711975,
      "learning_rate": 0.00011697742885181551,
      "loss": 0.38351887464523315,
      "step": 429
    },
    {
      "epoch": 0.419921875,
      "grad_norm": 0.411155641078949,
      "learning_rate": 0.00011678115799803731,
      "loss": 0.8139836192131042,
      "step": 430
    },
    {
      "epoch": 0.4208984375,
      "grad_norm": 0.5857217907905579,
      "learning_rate": 0.00011658488714425908,
      "loss": 0.5668150186538696,
      "step": 431
    },
    {
      "epoch": 0.421875,
      "grad_norm": 0.8849710822105408,
      "learning_rate": 0.00011638861629048087,
      "loss": 0.5478008985519409,
      "step": 432
    },
    {
      "epoch": 0.4228515625,
      "grad_norm": 0.6771020293235779,
      "learning_rate": 0.00011619234543670265,
      "loss": 0.608709454536438,
      "step": 433
    },
    {
      "epoch": 0.423828125,
      "grad_norm": 0.30138713121414185,
      "learning_rate": 0.00011599607458292445,
      "loss": 0.8240669369697571,
      "step": 434
    },
    {
      "epoch": 0.4248046875,
      "grad_norm": 0.3273598253726959,
      "learning_rate": 0.00011579980372914623,
      "loss": 0.6287229657173157,
      "step": 435
    },
    {
      "epoch": 0.42578125,
      "grad_norm": 0.5044806003570557,
      "learning_rate": 0.000115603532875368,
      "loss": 0.735835075378418,
      "step": 436
    },
    {
      "epoch": 0.4267578125,
      "grad_norm": 0.34495776891708374,
      "learning_rate": 0.0001154072620215898,
      "loss": 0.7688421010971069,
      "step": 437
    },
    {
      "epoch": 0.427734375,
      "grad_norm": 0.41923069953918457,
      "learning_rate": 0.00011521099116781158,
      "loss": 0.679617166519165,
      "step": 438
    },
    {
      "epoch": 0.4287109375,
      "grad_norm": 0.3509843945503235,
      "learning_rate": 0.00011501472031403338,
      "loss": 0.7478575110435486,
      "step": 439
    },
    {
      "epoch": 0.4296875,
      "grad_norm": 0.4758707582950592,
      "learning_rate": 0.00011481844946025514,
      "loss": 0.48871147632598877,
      "step": 440
    },
    {
      "epoch": 0.4306640625,
      "grad_norm": 0.30272597074508667,
      "learning_rate": 0.00011462217860647695,
      "loss": 0.4311315715312958,
      "step": 441
    },
    {
      "epoch": 0.431640625,
      "grad_norm": 0.5226417779922485,
      "learning_rate": 0.00011442590775269872,
      "loss": 0.8198300004005432,
      "step": 442
    },
    {
      "epoch": 0.4326171875,
      "grad_norm": 0.41183850169181824,
      "learning_rate": 0.00011422963689892052,
      "loss": 0.9958367347717285,
      "step": 443
    },
    {
      "epoch": 0.43359375,
      "grad_norm": 0.384048193693161,
      "learning_rate": 0.0001140333660451423,
      "loss": 0.3194778859615326,
      "step": 444
    },
    {
      "epoch": 0.4345703125,
      "grad_norm": 0.5035115480422974,
      "learning_rate": 0.0001138370951913641,
      "loss": 0.6455928683280945,
      "step": 445
    },
    {
      "epoch": 0.435546875,
      "grad_norm": 0.4875551462173462,
      "learning_rate": 0.00011364082433758587,
      "loss": 0.799978494644165,
      "step": 446
    },
    {
      "epoch": 0.4365234375,
      "grad_norm": 0.3395763337612152,
      "learning_rate": 0.00011344455348380768,
      "loss": 0.47672414779663086,
      "step": 447
    },
    {
      "epoch": 0.4375,
      "grad_norm": 0.5594314932823181,
      "learning_rate": 0.00011324828263002944,
      "loss": 0.4325803518295288,
      "step": 448
    },
    {
      "epoch": 0.4384765625,
      "grad_norm": 0.44647228717803955,
      "learning_rate": 0.00011305201177625124,
      "loss": 0.8119433522224426,
      "step": 449
    },
    {
      "epoch": 0.439453125,
      "grad_norm": 0.3190518915653229,
      "learning_rate": 0.00011285574092247302,
      "loss": 0.4949466288089752,
      "step": 450
    },
    {
      "epoch": 0.4404296875,
      "grad_norm": 0.5943452715873718,
      "learning_rate": 0.00011265947006869479,
      "loss": 0.8245764374732971,
      "step": 451
    },
    {
      "epoch": 0.44140625,
      "grad_norm": 0.8067309260368347,
      "learning_rate": 0.00011246319921491659,
      "loss": 0.39331740140914917,
      "step": 452
    },
    {
      "epoch": 0.4423828125,
      "grad_norm": 0.4130857288837433,
      "learning_rate": 0.00011226692836113837,
      "loss": 1.0005946159362793,
      "step": 453
    },
    {
      "epoch": 0.443359375,
      "grad_norm": 0.6839224100112915,
      "learning_rate": 0.00011207065750736017,
      "loss": 0.453269362449646,
      "step": 454
    },
    {
      "epoch": 0.4443359375,
      "grad_norm": 0.6282085180282593,
      "learning_rate": 0.00011187438665358195,
      "loss": 0.7137607932090759,
      "step": 455
    },
    {
      "epoch": 0.4453125,
      "grad_norm": 0.49894508719444275,
      "learning_rate": 0.00011167811579980374,
      "loss": 0.6289803981781006,
      "step": 456
    },
    {
      "epoch": 0.4462890625,
      "grad_norm": 0.3570895493030548,
      "learning_rate": 0.00011148184494602551,
      "loss": 0.3711976110935211,
      "step": 457
    },
    {
      "epoch": 0.447265625,
      "grad_norm": 0.28931114077568054,
      "learning_rate": 0.00011128557409224731,
      "loss": 0.5629679560661316,
      "step": 458
    },
    {
      "epoch": 0.4482421875,
      "grad_norm": 1.2492791414260864,
      "learning_rate": 0.00011108930323846909,
      "loss": 0.5821082592010498,
      "step": 459
    },
    {
      "epoch": 0.44921875,
      "grad_norm": 0.29861876368522644,
      "learning_rate": 0.00011089303238469089,
      "loss": 0.4129573106765747,
      "step": 460
    },
    {
      "epoch": 0.4501953125,
      "grad_norm": 0.5244950652122498,
      "learning_rate": 0.00011069676153091267,
      "loss": 0.8300201296806335,
      "step": 461
    },
    {
      "epoch": 0.451171875,
      "grad_norm": 0.446435809135437,
      "learning_rate": 0.00011050049067713446,
      "loss": 0.7500958442687988,
      "step": 462
    },
    {
      "epoch": 0.4521484375,
      "grad_norm": 0.4531306028366089,
      "learning_rate": 0.00011030421982335623,
      "loss": 0.8492609262466431,
      "step": 463
    },
    {
      "epoch": 0.453125,
      "grad_norm": 0.46944308280944824,
      "learning_rate": 0.00011010794896957802,
      "loss": 0.6209090948104858,
      "step": 464
    },
    {
      "epoch": 0.4541015625,
      "grad_norm": 0.5465651154518127,
      "learning_rate": 0.00010991167811579981,
      "loss": 0.5176469087600708,
      "step": 465
    },
    {
      "epoch": 0.455078125,
      "grad_norm": 0.36550402641296387,
      "learning_rate": 0.00010971540726202158,
      "loss": 0.6358295679092407,
      "step": 466
    },
    {
      "epoch": 0.4560546875,
      "grad_norm": 0.48919910192489624,
      "learning_rate": 0.00010951913640824338,
      "loss": 0.5903019905090332,
      "step": 467
    },
    {
      "epoch": 0.45703125,
      "grad_norm": 0.4378332793712616,
      "learning_rate": 0.00010932286555446516,
      "loss": 0.6710047721862793,
      "step": 468
    },
    {
      "epoch": 0.4580078125,
      "grad_norm": 0.3095405101776123,
      "learning_rate": 0.00010912659470068696,
      "loss": 0.6787213683128357,
      "step": 469
    },
    {
      "epoch": 0.458984375,
      "grad_norm": 0.40901967883110046,
      "learning_rate": 0.00010893032384690874,
      "loss": 0.6371384859085083,
      "step": 470
    },
    {
      "epoch": 0.4599609375,
      "grad_norm": 0.3962486982345581,
      "learning_rate": 0.00010873405299313053,
      "loss": 0.5823498964309692,
      "step": 471
    },
    {
      "epoch": 0.4609375,
      "grad_norm": 0.4094708263874054,
      "learning_rate": 0.0001085377821393523,
      "loss": 1.0396480560302734,
      "step": 472
    },
    {
      "epoch": 0.4619140625,
      "grad_norm": 0.5117614269256592,
      "learning_rate": 0.0001083415112855741,
      "loss": 0.6320610642433167,
      "step": 473
    },
    {
      "epoch": 0.462890625,
      "grad_norm": 0.28345227241516113,
      "learning_rate": 0.00010814524043179588,
      "loss": 0.33279290795326233,
      "step": 474
    },
    {
      "epoch": 0.4638671875,
      "grad_norm": 0.5475791096687317,
      "learning_rate": 0.00010794896957801768,
      "loss": 0.359570175409317,
      "step": 475
    },
    {
      "epoch": 0.46484375,
      "grad_norm": 0.44176843762397766,
      "learning_rate": 0.00010775269872423946,
      "loss": 0.7576714158058167,
      "step": 476
    },
    {
      "epoch": 0.4658203125,
      "grad_norm": 0.473562628030777,
      "learning_rate": 0.00010755642787046125,
      "loss": 0.8758799433708191,
      "step": 477
    },
    {
      "epoch": 0.466796875,
      "grad_norm": 0.41919219493865967,
      "learning_rate": 0.00010736015701668302,
      "loss": 0.863654375076294,
      "step": 478
    },
    {
      "epoch": 0.4677734375,
      "grad_norm": 0.4215691089630127,
      "learning_rate": 0.0001071638861629048,
      "loss": 0.5004569292068481,
      "step": 479
    },
    {
      "epoch": 0.46875,
      "grad_norm": 0.36801034212112427,
      "learning_rate": 0.0001069676153091266,
      "loss": 0.9330754280090332,
      "step": 480
    },
    {
      "epoch": 0.4697265625,
      "grad_norm": 0.42489972710609436,
      "learning_rate": 0.00010677134445534837,
      "loss": 1.0529820919036865,
      "step": 481
    },
    {
      "epoch": 0.470703125,
      "grad_norm": 0.4067368507385254,
      "learning_rate": 0.00010657507360157018,
      "loss": 0.5453970432281494,
      "step": 482
    },
    {
      "epoch": 0.4716796875,
      "grad_norm": 0.28611162304878235,
      "learning_rate": 0.00010637880274779195,
      "loss": 0.2348572313785553,
      "step": 483
    },
    {
      "epoch": 0.47265625,
      "grad_norm": 0.40047627687454224,
      "learning_rate": 0.00010618253189401374,
      "loss": 0.4776308834552765,
      "step": 484
    },
    {
      "epoch": 0.4736328125,
      "grad_norm": 0.5168628692626953,
      "learning_rate": 0.00010598626104023553,
      "loss": 0.9922167062759399,
      "step": 485
    },
    {
      "epoch": 0.474609375,
      "grad_norm": 0.3620246946811676,
      "learning_rate": 0.00010578999018645732,
      "loss": 0.7285036444664001,
      "step": 486
    },
    {
      "epoch": 0.4755859375,
      "grad_norm": 0.42711782455444336,
      "learning_rate": 0.00010559371933267909,
      "loss": 0.6387231349945068,
      "step": 487
    },
    {
      "epoch": 0.4765625,
      "grad_norm": 0.2139827311038971,
      "learning_rate": 0.0001053974484789009,
      "loss": 0.4295338988304138,
      "step": 488
    },
    {
      "epoch": 0.4775390625,
      "grad_norm": 0.31191739439964294,
      "learning_rate": 0.00010520117762512267,
      "loss": 0.42860671877861023,
      "step": 489
    },
    {
      "epoch": 0.478515625,
      "grad_norm": 0.2909379303455353,
      "learning_rate": 0.00010500490677134447,
      "loss": 0.47065097093582153,
      "step": 490
    },
    {
      "epoch": 0.4794921875,
      "grad_norm": 0.48990437388420105,
      "learning_rate": 0.00010480863591756625,
      "loss": 0.8870656490325928,
      "step": 491
    },
    {
      "epoch": 0.48046875,
      "grad_norm": 0.5662127733230591,
      "learning_rate": 0.00010461236506378804,
      "loss": 0.8007984161376953,
      "step": 492
    },
    {
      "epoch": 0.4814453125,
      "grad_norm": 0.3656634986400604,
      "learning_rate": 0.00010441609421000981,
      "loss": 0.41389334201812744,
      "step": 493
    },
    {
      "epoch": 0.482421875,
      "grad_norm": 0.39840465784072876,
      "learning_rate": 0.0001042198233562316,
      "loss": 0.6927056908607483,
      "step": 494
    },
    {
      "epoch": 0.4833984375,
      "grad_norm": 0.641647219657898,
      "learning_rate": 0.00010402355250245339,
      "loss": 0.7912976145744324,
      "step": 495
    },
    {
      "epoch": 0.484375,
      "grad_norm": 0.4522266685962677,
      "learning_rate": 0.00010382728164867517,
      "loss": 0.615374743938446,
      "step": 496
    },
    {
      "epoch": 0.4853515625,
      "grad_norm": 0.415444016456604,
      "learning_rate": 0.00010363101079489697,
      "loss": 0.8559135794639587,
      "step": 497
    },
    {
      "epoch": 0.486328125,
      "grad_norm": 0.4477578401565552,
      "learning_rate": 0.00010343473994111874,
      "loss": 0.6109384298324585,
      "step": 498
    },
    {
      "epoch": 0.4873046875,
      "grad_norm": 0.33097633719444275,
      "learning_rate": 0.00010323846908734053,
      "loss": 0.6325762271881104,
      "step": 499
    },
    {
      "epoch": 0.48828125,
      "grad_norm": 0.38771572709083557,
      "learning_rate": 0.00010304219823356232,
      "loss": 0.5979640483856201,
      "step": 500
    },
    {
      "epoch": 0.4892578125,
      "grad_norm": 0.3339928984642029,
      "learning_rate": 0.00010284592737978411,
      "loss": 0.6619001626968384,
      "step": 501
    },
    {
      "epoch": 0.490234375,
      "grad_norm": 0.6400135159492493,
      "learning_rate": 0.00010264965652600588,
      "loss": 0.28338727355003357,
      "step": 502
    },
    {
      "epoch": 0.4912109375,
      "grad_norm": 0.35763970017433167,
      "learning_rate": 0.00010245338567222769,
      "loss": 0.6373124122619629,
      "step": 503
    },
    {
      "epoch": 0.4921875,
      "grad_norm": 0.2136622965335846,
      "learning_rate": 0.00010225711481844946,
      "loss": 0.2315329760313034,
      "step": 504
    },
    {
      "epoch": 0.4931640625,
      "grad_norm": 0.6324110627174377,
      "learning_rate": 0.00010206084396467126,
      "loss": 1.0045514106750488,
      "step": 505
    },
    {
      "epoch": 0.494140625,
      "grad_norm": 0.4471307694911957,
      "learning_rate": 0.00010186457311089304,
      "loss": 0.5188390016555786,
      "step": 506
    },
    {
      "epoch": 0.4951171875,
      "grad_norm": 0.38222211599349976,
      "learning_rate": 0.00010166830225711483,
      "loss": 0.7351740598678589,
      "step": 507
    },
    {
      "epoch": 0.49609375,
      "grad_norm": 0.41885000467300415,
      "learning_rate": 0.0001014720314033366,
      "loss": 0.9071688055992126,
      "step": 508
    },
    {
      "epoch": 0.4970703125,
      "grad_norm": 0.8193621635437012,
      "learning_rate": 0.00010127576054955839,
      "loss": 0.7240473031997681,
      "step": 509
    },
    {
      "epoch": 0.498046875,
      "grad_norm": 0.2846645712852478,
      "learning_rate": 0.00010107948969578018,
      "loss": 0.351628839969635,
      "step": 510
    },
    {
      "epoch": 0.4990234375,
      "grad_norm": 0.4778954088687897,
      "learning_rate": 0.00010088321884200196,
      "loss": 0.7705833911895752,
      "step": 511
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3384702503681183,
      "learning_rate": 0.00010068694798822376,
      "loss": 0.5467265248298645,
      "step": 512
    },
    {
      "epoch": 0.5009765625,
      "grad_norm": 0.43917056918144226,
      "learning_rate": 0.00010049067713444553,
      "loss": 0.9810686707496643,
      "step": 513
    },
    {
      "epoch": 0.501953125,
      "grad_norm": 0.4351615607738495,
      "learning_rate": 0.00010029440628066732,
      "loss": 0.9716764688491821,
      "step": 514
    },
    {
      "epoch": 0.5029296875,
      "grad_norm": 0.49873459339141846,
      "learning_rate": 0.00010009813542688911,
      "loss": 0.9183788299560547,
      "step": 515
    },
    {
      "epoch": 0.50390625,
      "grad_norm": 0.36710789799690247,
      "learning_rate": 9.990186457311089e-05,
      "loss": 0.49884548783302307,
      "step": 516
    },
    {
      "epoch": 0.5048828125,
      "grad_norm": 0.5402531623840332,
      "learning_rate": 9.970559371933269e-05,
      "loss": 0.6645570993423462,
      "step": 517
    },
    {
      "epoch": 0.505859375,
      "grad_norm": 0.4990559220314026,
      "learning_rate": 9.950932286555447e-05,
      "loss": 1.0321924686431885,
      "step": 518
    },
    {
      "epoch": 0.5068359375,
      "grad_norm": 0.4634752869606018,
      "learning_rate": 9.931305201177625e-05,
      "loss": 0.8484972715377808,
      "step": 519
    },
    {
      "epoch": 0.5078125,
      "grad_norm": 0.38584330677986145,
      "learning_rate": 9.911678115799805e-05,
      "loss": 0.3424939513206482,
      "step": 520
    },
    {
      "epoch": 0.5087890625,
      "grad_norm": 0.41148415207862854,
      "learning_rate": 9.892051030421983e-05,
      "loss": 0.7890703678131104,
      "step": 521
    },
    {
      "epoch": 0.509765625,
      "grad_norm": 0.35891374945640564,
      "learning_rate": 9.872423945044161e-05,
      "loss": 0.7387750744819641,
      "step": 522
    },
    {
      "epoch": 0.5107421875,
      "grad_norm": 0.4174203872680664,
      "learning_rate": 9.85279685966634e-05,
      "loss": 0.5610706806182861,
      "step": 523
    },
    {
      "epoch": 0.51171875,
      "grad_norm": 0.4062010645866394,
      "learning_rate": 9.833169774288519e-05,
      "loss": 0.6016039252281189,
      "step": 524
    },
    {
      "epoch": 0.5126953125,
      "grad_norm": 0.35915061831474304,
      "learning_rate": 9.813542688910697e-05,
      "loss": 0.37933990359306335,
      "step": 525
    },
    {
      "epoch": 0.513671875,
      "grad_norm": 0.49826234579086304,
      "learning_rate": 9.793915603532877e-05,
      "loss": 0.9650976657867432,
      "step": 526
    },
    {
      "epoch": 0.5146484375,
      "grad_norm": 0.4122180938720703,
      "learning_rate": 9.774288518155055e-05,
      "loss": 0.5477824211120605,
      "step": 527
    },
    {
      "epoch": 0.515625,
      "grad_norm": 0.3824058175086975,
      "learning_rate": 9.754661432777233e-05,
      "loss": 0.5163108706474304,
      "step": 528
    },
    {
      "epoch": 0.5166015625,
      "grad_norm": 0.4485555589199066,
      "learning_rate": 9.735034347399413e-05,
      "loss": 0.9402418732643127,
      "step": 529
    },
    {
      "epoch": 0.517578125,
      "grad_norm": 0.4053209722042084,
      "learning_rate": 9.715407262021591e-05,
      "loss": 0.9314478039741516,
      "step": 530
    },
    {
      "epoch": 0.5185546875,
      "grad_norm": 0.3183811604976654,
      "learning_rate": 9.695780176643768e-05,
      "loss": 0.6706205606460571,
      "step": 531
    },
    {
      "epoch": 0.51953125,
      "grad_norm": 0.40083932876586914,
      "learning_rate": 9.676153091265947e-05,
      "loss": 1.102424144744873,
      "step": 532
    },
    {
      "epoch": 0.5205078125,
      "grad_norm": 0.5949054956436157,
      "learning_rate": 9.656526005888126e-05,
      "loss": 0.8396608829498291,
      "step": 533
    },
    {
      "epoch": 0.521484375,
      "grad_norm": 0.41966959834098816,
      "learning_rate": 9.636898920510304e-05,
      "loss": 0.5641101002693176,
      "step": 534
    },
    {
      "epoch": 0.5224609375,
      "grad_norm": 0.448281466960907,
      "learning_rate": 9.617271835132484e-05,
      "loss": 0.44873932003974915,
      "step": 535
    },
    {
      "epoch": 0.5234375,
      "grad_norm": 0.47785645723342896,
      "learning_rate": 9.597644749754662e-05,
      "loss": 0.8799008131027222,
      "step": 536
    },
    {
      "epoch": 0.5244140625,
      "grad_norm": 0.45459261536598206,
      "learning_rate": 9.57801766437684e-05,
      "loss": 0.8261788487434387,
      "step": 537
    },
    {
      "epoch": 0.525390625,
      "grad_norm": 0.6168074607849121,
      "learning_rate": 9.55839057899902e-05,
      "loss": 0.9762136936187744,
      "step": 538
    },
    {
      "epoch": 0.5263671875,
      "grad_norm": 0.6500818133354187,
      "learning_rate": 9.538763493621198e-05,
      "loss": 0.9044640064239502,
      "step": 539
    },
    {
      "epoch": 0.52734375,
      "grad_norm": 0.31668490171432495,
      "learning_rate": 9.519136408243376e-05,
      "loss": 0.42503029108047485,
      "step": 540
    },
    {
      "epoch": 0.5283203125,
      "grad_norm": 0.4041314721107483,
      "learning_rate": 9.499509322865556e-05,
      "loss": 0.6643175482749939,
      "step": 541
    },
    {
      "epoch": 0.529296875,
      "grad_norm": 1.011020541191101,
      "learning_rate": 9.479882237487734e-05,
      "loss": 0.7636033892631531,
      "step": 542
    },
    {
      "epoch": 0.5302734375,
      "grad_norm": 0.3690396845340729,
      "learning_rate": 9.460255152109912e-05,
      "loss": 1.0516947507858276,
      "step": 543
    },
    {
      "epoch": 0.53125,
      "grad_norm": 0.288604199886322,
      "learning_rate": 9.440628066732092e-05,
      "loss": 0.3806208372116089,
      "step": 544
    },
    {
      "epoch": 0.5322265625,
      "grad_norm": 0.4247501790523529,
      "learning_rate": 9.42100098135427e-05,
      "loss": 0.8651745319366455,
      "step": 545
    },
    {
      "epoch": 0.533203125,
      "grad_norm": 1.1893255710601807,
      "learning_rate": 9.401373895976447e-05,
      "loss": 0.28601521253585815,
      "step": 546
    },
    {
      "epoch": 0.5341796875,
      "grad_norm": 0.3229619562625885,
      "learning_rate": 9.381746810598626e-05,
      "loss": 0.8316909670829773,
      "step": 547
    },
    {
      "epoch": 0.53515625,
      "grad_norm": 0.390278160572052,
      "learning_rate": 9.362119725220805e-05,
      "loss": 0.7263185977935791,
      "step": 548
    },
    {
      "epoch": 0.5361328125,
      "grad_norm": 0.2949998378753662,
      "learning_rate": 9.342492639842983e-05,
      "loss": 0.5417062044143677,
      "step": 549
    },
    {
      "epoch": 0.537109375,
      "grad_norm": 0.47482210397720337,
      "learning_rate": 9.322865554465163e-05,
      "loss": 0.6505849361419678,
      "step": 550
    },
    {
      "epoch": 0.5380859375,
      "grad_norm": 0.3653123676776886,
      "learning_rate": 9.303238469087341e-05,
      "loss": 0.7270935773849487,
      "step": 551
    },
    {
      "epoch": 0.5390625,
      "grad_norm": 0.5652351975440979,
      "learning_rate": 9.283611383709519e-05,
      "loss": 0.8330069780349731,
      "step": 552
    },
    {
      "epoch": 0.5400390625,
      "grad_norm": 0.448408842086792,
      "learning_rate": 9.263984298331699e-05,
      "loss": 0.8804951310157776,
      "step": 553
    },
    {
      "epoch": 0.541015625,
      "grad_norm": 0.7700690031051636,
      "learning_rate": 9.244357212953877e-05,
      "loss": 0.6466813087463379,
      "step": 554
    },
    {
      "epoch": 0.5419921875,
      "grad_norm": 0.45755863189697266,
      "learning_rate": 9.224730127576055e-05,
      "loss": 0.5548572540283203,
      "step": 555
    },
    {
      "epoch": 0.54296875,
      "grad_norm": 0.4113846719264984,
      "learning_rate": 9.205103042198235e-05,
      "loss": 0.9286736845970154,
      "step": 556
    },
    {
      "epoch": 0.5439453125,
      "grad_norm": 0.4555431604385376,
      "learning_rate": 9.185475956820413e-05,
      "loss": 0.8332977890968323,
      "step": 557
    },
    {
      "epoch": 0.544921875,
      "grad_norm": 0.5103408098220825,
      "learning_rate": 9.165848871442591e-05,
      "loss": 1.0110094547271729,
      "step": 558
    },
    {
      "epoch": 0.5458984375,
      "grad_norm": 0.299912691116333,
      "learning_rate": 9.146221786064771e-05,
      "loss": 0.3136459290981293,
      "step": 559
    },
    {
      "epoch": 0.546875,
      "grad_norm": 0.40499091148376465,
      "learning_rate": 9.126594700686948e-05,
      "loss": 0.6785961389541626,
      "step": 560
    },
    {
      "epoch": 0.5478515625,
      "grad_norm": 0.4190375804901123,
      "learning_rate": 9.106967615309127e-05,
      "loss": 0.9891744256019592,
      "step": 561
    },
    {
      "epoch": 0.548828125,
      "grad_norm": 0.6265519261360168,
      "learning_rate": 9.087340529931305e-05,
      "loss": 0.48712462186813354,
      "step": 562
    },
    {
      "epoch": 0.5498046875,
      "grad_norm": 0.466420978307724,
      "learning_rate": 9.067713444553484e-05,
      "loss": 0.5573943257331848,
      "step": 563
    },
    {
      "epoch": 0.55078125,
      "grad_norm": 0.3990301191806793,
      "learning_rate": 9.048086359175663e-05,
      "loss": 0.5893411040306091,
      "step": 564
    },
    {
      "epoch": 0.5517578125,
      "grad_norm": 0.31471043825149536,
      "learning_rate": 9.028459273797842e-05,
      "loss": 0.593424379825592,
      "step": 565
    },
    {
      "epoch": 0.552734375,
      "grad_norm": 0.46789905428886414,
      "learning_rate": 9.00883218842002e-05,
      "loss": 0.9398684501647949,
      "step": 566
    },
    {
      "epoch": 0.5537109375,
      "grad_norm": 0.48358282446861267,
      "learning_rate": 8.989205103042198e-05,
      "loss": 0.895098865032196,
      "step": 567
    },
    {
      "epoch": 0.5546875,
      "grad_norm": 0.25878453254699707,
      "learning_rate": 8.969578017664378e-05,
      "loss": 0.4817226231098175,
      "step": 568
    },
    {
      "epoch": 0.5556640625,
      "grad_norm": 0.5319378972053528,
      "learning_rate": 8.949950932286556e-05,
      "loss": 0.6119651794433594,
      "step": 569
    },
    {
      "epoch": 0.556640625,
      "grad_norm": 0.3002898097038269,
      "learning_rate": 8.930323846908734e-05,
      "loss": 0.28599199652671814,
      "step": 570
    },
    {
      "epoch": 0.5576171875,
      "grad_norm": 0.37161317467689514,
      "learning_rate": 8.910696761530914e-05,
      "loss": 0.3307079076766968,
      "step": 571
    },
    {
      "epoch": 0.55859375,
      "grad_norm": 0.4755436182022095,
      "learning_rate": 8.891069676153092e-05,
      "loss": 0.5868921279907227,
      "step": 572
    },
    {
      "epoch": 0.5595703125,
      "grad_norm": 0.3264123499393463,
      "learning_rate": 8.87144259077527e-05,
      "loss": 0.6682905554771423,
      "step": 573
    },
    {
      "epoch": 0.560546875,
      "grad_norm": 0.43468573689460754,
      "learning_rate": 8.85181550539745e-05,
      "loss": 0.6316066980361938,
      "step": 574
    },
    {
      "epoch": 0.5615234375,
      "grad_norm": 0.5759416222572327,
      "learning_rate": 8.832188420019627e-05,
      "loss": 0.5687480568885803,
      "step": 575
    },
    {
      "epoch": 0.5625,
      "grad_norm": 0.39352041482925415,
      "learning_rate": 8.812561334641806e-05,
      "loss": 0.3803275525569916,
      "step": 576
    },
    {
      "epoch": 0.5634765625,
      "grad_norm": 0.4155440926551819,
      "learning_rate": 8.792934249263984e-05,
      "loss": 0.3923049569129944,
      "step": 577
    },
    {
      "epoch": 0.564453125,
      "grad_norm": 0.34934133291244507,
      "learning_rate": 8.773307163886163e-05,
      "loss": 0.7100962996482849,
      "step": 578
    },
    {
      "epoch": 0.5654296875,
      "grad_norm": 0.3993069529533386,
      "learning_rate": 8.753680078508342e-05,
      "loss": 0.6711176037788391,
      "step": 579
    },
    {
      "epoch": 0.56640625,
      "grad_norm": 0.3445776700973511,
      "learning_rate": 8.73405299313052e-05,
      "loss": 0.6986067295074463,
      "step": 580
    },
    {
      "epoch": 0.5673828125,
      "grad_norm": 0.45837292075157166,
      "learning_rate": 8.714425907752699e-05,
      "loss": 0.9020513892173767,
      "step": 581
    },
    {
      "epoch": 0.568359375,
      "grad_norm": 0.3630208671092987,
      "learning_rate": 8.694798822374878e-05,
      "loss": 0.42499858140945435,
      "step": 582
    },
    {
      "epoch": 0.5693359375,
      "grad_norm": 0.41205838322639465,
      "learning_rate": 8.675171736997057e-05,
      "loss": 0.5535018444061279,
      "step": 583
    },
    {
      "epoch": 0.5703125,
      "grad_norm": 0.2596284747123718,
      "learning_rate": 8.655544651619235e-05,
      "loss": 0.3234618902206421,
      "step": 584
    },
    {
      "epoch": 0.5712890625,
      "grad_norm": 0.3716956079006195,
      "learning_rate": 8.635917566241414e-05,
      "loss": 0.7567611932754517,
      "step": 585
    },
    {
      "epoch": 0.572265625,
      "grad_norm": 0.42999619245529175,
      "learning_rate": 8.616290480863593e-05,
      "loss": 0.8695427179336548,
      "step": 586
    },
    {
      "epoch": 0.5732421875,
      "grad_norm": 0.3309305012226105,
      "learning_rate": 8.596663395485771e-05,
      "loss": 0.989714503288269,
      "step": 587
    },
    {
      "epoch": 0.57421875,
      "grad_norm": 0.40024474263191223,
      "learning_rate": 8.57703631010795e-05,
      "loss": 1.0608711242675781,
      "step": 588
    },
    {
      "epoch": 0.5751953125,
      "grad_norm": 0.453950434923172,
      "learning_rate": 8.557409224730129e-05,
      "loss": 0.7340632677078247,
      "step": 589
    },
    {
      "epoch": 0.576171875,
      "grad_norm": 0.4473342299461365,
      "learning_rate": 8.537782139352306e-05,
      "loss": 0.7264219522476196,
      "step": 590
    },
    {
      "epoch": 0.5771484375,
      "grad_norm": 0.420469731092453,
      "learning_rate": 8.518155053974485e-05,
      "loss": 0.8141539692878723,
      "step": 591
    },
    {
      "epoch": 0.578125,
      "grad_norm": 0.4068243205547333,
      "learning_rate": 8.498527968596663e-05,
      "loss": 0.5802872180938721,
      "step": 592
    },
    {
      "epoch": 0.5791015625,
      "grad_norm": 0.4243272840976715,
      "learning_rate": 8.478900883218842e-05,
      "loss": 0.350595086812973,
      "step": 593
    },
    {
      "epoch": 0.580078125,
      "grad_norm": 0.4519834518432617,
      "learning_rate": 8.459273797841021e-05,
      "loss": 0.7131458520889282,
      "step": 594
    },
    {
      "epoch": 0.5810546875,
      "grad_norm": 0.34145745635032654,
      "learning_rate": 8.4396467124632e-05,
      "loss": 0.7618221640586853,
      "step": 595
    },
    {
      "epoch": 0.58203125,
      "grad_norm": 0.46494174003601074,
      "learning_rate": 8.420019627085378e-05,
      "loss": 0.5102145075798035,
      "step": 596
    },
    {
      "epoch": 0.5830078125,
      "grad_norm": 0.3305060565471649,
      "learning_rate": 8.400392541707557e-05,
      "loss": 0.7812811732292175,
      "step": 597
    },
    {
      "epoch": 0.583984375,
      "grad_norm": 0.47092583775520325,
      "learning_rate": 8.380765456329736e-05,
      "loss": 0.7497634887695312,
      "step": 598
    },
    {
      "epoch": 0.5849609375,
      "grad_norm": 0.38902655243873596,
      "learning_rate": 8.361138370951914e-05,
      "loss": 0.4198119640350342,
      "step": 599
    },
    {
      "epoch": 0.5859375,
      "grad_norm": 0.43659287691116333,
      "learning_rate": 8.341511285574093e-05,
      "loss": 0.824333667755127,
      "step": 600
    },
    {
      "epoch": 0.5869140625,
      "grad_norm": 0.4277879595756531,
      "learning_rate": 8.321884200196272e-05,
      "loss": 0.445267915725708,
      "step": 601
    },
    {
      "epoch": 0.587890625,
      "grad_norm": 0.3186829090118408,
      "learning_rate": 8.30225711481845e-05,
      "loss": 0.9906235337257385,
      "step": 602
    },
    {
      "epoch": 0.5888671875,
      "grad_norm": 0.2983294427394867,
      "learning_rate": 8.28263002944063e-05,
      "loss": 0.5342146754264832,
      "step": 603
    },
    {
      "epoch": 0.58984375,
      "grad_norm": 0.4127228856086731,
      "learning_rate": 8.263002944062808e-05,
      "loss": 0.41288450360298157,
      "step": 604
    },
    {
      "epoch": 0.5908203125,
      "grad_norm": 0.3961617052555084,
      "learning_rate": 8.243375858684985e-05,
      "loss": 0.43576663732528687,
      "step": 605
    },
    {
      "epoch": 0.591796875,
      "grad_norm": 0.4124387502670288,
      "learning_rate": 8.223748773307164e-05,
      "loss": 0.5837401747703552,
      "step": 606
    },
    {
      "epoch": 0.5927734375,
      "grad_norm": 0.4274151921272278,
      "learning_rate": 8.204121687929342e-05,
      "loss": 0.8666547536849976,
      "step": 607
    },
    {
      "epoch": 0.59375,
      "grad_norm": 0.3881700932979584,
      "learning_rate": 8.18449460255152e-05,
      "loss": 0.9063656330108643,
      "step": 608
    },
    {
      "epoch": 0.5947265625,
      "grad_norm": 0.46216556429862976,
      "learning_rate": 8.1648675171737e-05,
      "loss": 0.4573599696159363,
      "step": 609
    },
    {
      "epoch": 0.595703125,
      "grad_norm": 0.3843960762023926,
      "learning_rate": 8.145240431795878e-05,
      "loss": 0.6214632391929626,
      "step": 610
    },
    {
      "epoch": 0.5966796875,
      "grad_norm": 0.538301408290863,
      "learning_rate": 8.125613346418057e-05,
      "loss": 0.8800979852676392,
      "step": 611
    },
    {
      "epoch": 0.59765625,
      "grad_norm": 0.49643319845199585,
      "learning_rate": 8.105986261040236e-05,
      "loss": 0.48715031147003174,
      "step": 612
    },
    {
      "epoch": 0.5986328125,
      "grad_norm": 0.4753062427043915,
      "learning_rate": 8.086359175662415e-05,
      "loss": 0.8127011060714722,
      "step": 613
    },
    {
      "epoch": 0.599609375,
      "grad_norm": 0.7572022676467896,
      "learning_rate": 8.066732090284593e-05,
      "loss": 0.7151535153388977,
      "step": 614
    },
    {
      "epoch": 0.6005859375,
      "grad_norm": 0.35117295384407043,
      "learning_rate": 8.047105004906772e-05,
      "loss": 0.9221618175506592,
      "step": 615
    },
    {
      "epoch": 0.6015625,
      "grad_norm": 0.2643633186817169,
      "learning_rate": 8.02747791952895e-05,
      "loss": 0.5025840401649475,
      "step": 616
    },
    {
      "epoch": 0.6025390625,
      "grad_norm": 0.45553916692733765,
      "learning_rate": 8.007850834151129e-05,
      "loss": 0.452494740486145,
      "step": 617
    },
    {
      "epoch": 0.603515625,
      "grad_norm": 0.386594295501709,
      "learning_rate": 7.988223748773308e-05,
      "loss": 0.7942792773246765,
      "step": 618
    },
    {
      "epoch": 0.6044921875,
      "grad_norm": 0.3616650700569153,
      "learning_rate": 7.968596663395485e-05,
      "loss": 0.5697340965270996,
      "step": 619
    },
    {
      "epoch": 0.60546875,
      "grad_norm": 0.3885051906108856,
      "learning_rate": 7.948969578017665e-05,
      "loss": 0.7082506418228149,
      "step": 620
    },
    {
      "epoch": 0.6064453125,
      "grad_norm": 0.4484117329120636,
      "learning_rate": 7.929342492639843e-05,
      "loss": 0.5993860960006714,
      "step": 621
    },
    {
      "epoch": 0.607421875,
      "grad_norm": 0.44654563069343567,
      "learning_rate": 7.909715407262021e-05,
      "loss": 0.5804839134216309,
      "step": 622
    },
    {
      "epoch": 0.6083984375,
      "grad_norm": 0.3943687081336975,
      "learning_rate": 7.890088321884201e-05,
      "loss": 0.6422688364982605,
      "step": 623
    },
    {
      "epoch": 0.609375,
      "grad_norm": 0.4153381288051605,
      "learning_rate": 7.870461236506379e-05,
      "loss": 0.6437400579452515,
      "step": 624
    },
    {
      "epoch": 0.6103515625,
      "grad_norm": 0.38221171498298645,
      "learning_rate": 7.850834151128557e-05,
      "loss": 0.8738820552825928,
      "step": 625
    },
    {
      "epoch": 0.611328125,
      "grad_norm": 0.339599609375,
      "learning_rate": 7.831207065750737e-05,
      "loss": 0.517478883266449,
      "step": 626
    },
    {
      "epoch": 0.6123046875,
      "grad_norm": 0.7177076935768127,
      "learning_rate": 7.811579980372915e-05,
      "loss": 0.7372115254402161,
      "step": 627
    },
    {
      "epoch": 0.61328125,
      "grad_norm": 0.47573140263557434,
      "learning_rate": 7.791952894995093e-05,
      "loss": 0.649010181427002,
      "step": 628
    },
    {
      "epoch": 0.6142578125,
      "grad_norm": 0.44851094484329224,
      "learning_rate": 7.772325809617273e-05,
      "loss": 0.6269842386245728,
      "step": 629
    },
    {
      "epoch": 0.615234375,
      "grad_norm": 0.3544669449329376,
      "learning_rate": 7.752698724239451e-05,
      "loss": 0.8870983123779297,
      "step": 630
    },
    {
      "epoch": 0.6162109375,
      "grad_norm": 0.4103491008281708,
      "learning_rate": 7.73307163886163e-05,
      "loss": 0.8711034059524536,
      "step": 631
    },
    {
      "epoch": 0.6171875,
      "grad_norm": 0.3651062548160553,
      "learning_rate": 7.713444553483808e-05,
      "loss": 0.8420337438583374,
      "step": 632
    },
    {
      "epoch": 0.6181640625,
      "grad_norm": 0.4135638475418091,
      "learning_rate": 7.693817468105987e-05,
      "loss": 0.601078450679779,
      "step": 633
    },
    {
      "epoch": 0.619140625,
      "grad_norm": 0.5965299010276794,
      "learning_rate": 7.674190382728164e-05,
      "loss": 0.604471743106842,
      "step": 634
    },
    {
      "epoch": 0.6201171875,
      "grad_norm": 0.4340416491031647,
      "learning_rate": 7.654563297350344e-05,
      "loss": 0.905183732509613,
      "step": 635
    },
    {
      "epoch": 0.62109375,
      "grad_norm": 0.361518919467926,
      "learning_rate": 7.634936211972522e-05,
      "loss": 0.6569675207138062,
      "step": 636
    },
    {
      "epoch": 0.6220703125,
      "grad_norm": 1.04604971408844,
      "learning_rate": 7.6153091265947e-05,
      "loss": 0.7399482727050781,
      "step": 637
    },
    {
      "epoch": 0.623046875,
      "grad_norm": 0.8039460778236389,
      "learning_rate": 7.59568204121688e-05,
      "loss": 0.6003617644309998,
      "step": 638
    },
    {
      "epoch": 0.6240234375,
      "grad_norm": 0.5462118983268738,
      "learning_rate": 7.576054955839058e-05,
      "loss": 0.7750217914581299,
      "step": 639
    },
    {
      "epoch": 0.625,
      "grad_norm": 0.29333505034446716,
      "learning_rate": 7.556427870461236e-05,
      "loss": 0.47371456027030945,
      "step": 640
    },
    {
      "epoch": 0.6259765625,
      "grad_norm": 0.2468312531709671,
      "learning_rate": 7.536800785083416e-05,
      "loss": 0.4615188241004944,
      "step": 641
    },
    {
      "epoch": 0.626953125,
      "grad_norm": 0.48467332124710083,
      "learning_rate": 7.517173699705594e-05,
      "loss": 0.6456693410873413,
      "step": 642
    },
    {
      "epoch": 0.6279296875,
      "grad_norm": 0.5471943020820618,
      "learning_rate": 7.497546614327772e-05,
      "loss": 0.5899155139923096,
      "step": 643
    },
    {
      "epoch": 0.62890625,
      "grad_norm": 0.3715604841709137,
      "learning_rate": 7.477919528949952e-05,
      "loss": 0.7910970449447632,
      "step": 644
    },
    {
      "epoch": 0.6298828125,
      "grad_norm": 0.3298327922821045,
      "learning_rate": 7.45829244357213e-05,
      "loss": 0.5769776701927185,
      "step": 645
    },
    {
      "epoch": 0.630859375,
      "grad_norm": 0.44131916761398315,
      "learning_rate": 7.438665358194309e-05,
      "loss": 0.8805806636810303,
      "step": 646
    },
    {
      "epoch": 0.6318359375,
      "grad_norm": 0.4686948359012604,
      "learning_rate": 7.419038272816488e-05,
      "loss": 0.7262091636657715,
      "step": 647
    },
    {
      "epoch": 0.6328125,
      "grad_norm": 0.48123931884765625,
      "learning_rate": 7.399411187438666e-05,
      "loss": 0.8481992483139038,
      "step": 648
    },
    {
      "epoch": 0.6337890625,
      "grad_norm": 0.5582646131515503,
      "learning_rate": 7.379784102060843e-05,
      "loss": 0.4963653087615967,
      "step": 649
    },
    {
      "epoch": 0.634765625,
      "grad_norm": 0.30464881658554077,
      "learning_rate": 7.360157016683023e-05,
      "loss": 0.6772556900978088,
      "step": 650
    },
    {
      "epoch": 0.6357421875,
      "grad_norm": 0.44710803031921387,
      "learning_rate": 7.340529931305201e-05,
      "loss": 0.5476983189582825,
      "step": 651
    },
    {
      "epoch": 0.63671875,
      "grad_norm": 0.35922887921333313,
      "learning_rate": 7.320902845927379e-05,
      "loss": 0.8256508111953735,
      "step": 652
    },
    {
      "epoch": 0.6376953125,
      "grad_norm": 0.40085500478744507,
      "learning_rate": 7.301275760549559e-05,
      "loss": 0.5783500671386719,
      "step": 653
    },
    {
      "epoch": 0.638671875,
      "grad_norm": 0.47579512000083923,
      "learning_rate": 7.281648675171737e-05,
      "loss": 0.5591031908988953,
      "step": 654
    },
    {
      "epoch": 0.6396484375,
      "grad_norm": 0.5594353675842285,
      "learning_rate": 7.262021589793915e-05,
      "loss": 0.8133666515350342,
      "step": 655
    },
    {
      "epoch": 0.640625,
      "grad_norm": 0.44030821323394775,
      "learning_rate": 7.242394504416095e-05,
      "loss": 1.0282940864562988,
      "step": 656
    },
    {
      "epoch": 0.6416015625,
      "grad_norm": 0.7038627862930298,
      "learning_rate": 7.222767419038273e-05,
      "loss": 0.2322971373796463,
      "step": 657
    },
    {
      "epoch": 0.642578125,
      "grad_norm": 0.223698228597641,
      "learning_rate": 7.203140333660451e-05,
      "loss": 0.7056642174720764,
      "step": 658
    },
    {
      "epoch": 0.6435546875,
      "grad_norm": 0.3815765976905823,
      "learning_rate": 7.183513248282631e-05,
      "loss": 1.074477195739746,
      "step": 659
    },
    {
      "epoch": 0.64453125,
      "grad_norm": 0.35606271028518677,
      "learning_rate": 7.163886162904809e-05,
      "loss": 0.4300801753997803,
      "step": 660
    },
    {
      "epoch": 0.6455078125,
      "grad_norm": 0.32899999618530273,
      "learning_rate": 7.144259077526988e-05,
      "loss": 0.5923078060150146,
      "step": 661
    },
    {
      "epoch": 0.646484375,
      "grad_norm": 0.49968358874320984,
      "learning_rate": 7.124631992149167e-05,
      "loss": 0.8295183181762695,
      "step": 662
    },
    {
      "epoch": 0.6474609375,
      "grad_norm": 0.3393777012825012,
      "learning_rate": 7.105004906771345e-05,
      "loss": 0.30383622646331787,
      "step": 663
    },
    {
      "epoch": 0.6484375,
      "grad_norm": 0.24977968633174896,
      "learning_rate": 7.085377821393524e-05,
      "loss": 0.429612934589386,
      "step": 664
    },
    {
      "epoch": 0.6494140625,
      "grad_norm": 0.35886242985725403,
      "learning_rate": 7.065750736015702e-05,
      "loss": 0.9189084768295288,
      "step": 665
    },
    {
      "epoch": 0.650390625,
      "grad_norm": 0.3856249153614044,
      "learning_rate": 7.04612365063788e-05,
      "loss": 0.4880048930644989,
      "step": 666
    },
    {
      "epoch": 0.6513671875,
      "grad_norm": 0.4439884424209595,
      "learning_rate": 7.026496565260058e-05,
      "loss": 0.7537186145782471,
      "step": 667
    },
    {
      "epoch": 0.65234375,
      "grad_norm": 0.29563215374946594,
      "learning_rate": 7.006869479882238e-05,
      "loss": 0.38701343536376953,
      "step": 668
    },
    {
      "epoch": 0.6533203125,
      "grad_norm": 0.1909576952457428,
      "learning_rate": 6.987242394504416e-05,
      "loss": 0.15140604972839355,
      "step": 669
    },
    {
      "epoch": 0.654296875,
      "grad_norm": 0.3344849944114685,
      "learning_rate": 6.967615309126594e-05,
      "loss": 0.527427077293396,
      "step": 670
    },
    {
      "epoch": 0.6552734375,
      "grad_norm": 0.3609422743320465,
      "learning_rate": 6.947988223748774e-05,
      "loss": 0.29116177558898926,
      "step": 671
    },
    {
      "epoch": 0.65625,
      "grad_norm": 0.4419811964035034,
      "learning_rate": 6.928361138370952e-05,
      "loss": 0.7166855931282043,
      "step": 672
    },
    {
      "epoch": 0.6572265625,
      "grad_norm": 0.31890806555747986,
      "learning_rate": 6.90873405299313e-05,
      "loss": 0.5259425640106201,
      "step": 673
    },
    {
      "epoch": 0.658203125,
      "grad_norm": 0.39572352170944214,
      "learning_rate": 6.88910696761531e-05,
      "loss": 0.5964791774749756,
      "step": 674
    },
    {
      "epoch": 0.6591796875,
      "grad_norm": 0.4501058757305145,
      "learning_rate": 6.869479882237488e-05,
      "loss": 0.2289922833442688,
      "step": 675
    },
    {
      "epoch": 0.66015625,
      "grad_norm": 0.2884235680103302,
      "learning_rate": 6.849852796859666e-05,
      "loss": 0.2730886936187744,
      "step": 676
    },
    {
      "epoch": 0.6611328125,
      "grad_norm": 0.32970431447029114,
      "learning_rate": 6.830225711481846e-05,
      "loss": 0.4283568859100342,
      "step": 677
    },
    {
      "epoch": 0.662109375,
      "grad_norm": 0.39025789499282837,
      "learning_rate": 6.810598626104023e-05,
      "loss": 0.9361288547515869,
      "step": 678
    },
    {
      "epoch": 0.6630859375,
      "grad_norm": 0.48386886715888977,
      "learning_rate": 6.790971540726203e-05,
      "loss": 0.4907494783401489,
      "step": 679
    },
    {
      "epoch": 0.6640625,
      "grad_norm": 0.41783151030540466,
      "learning_rate": 6.771344455348381e-05,
      "loss": 0.7485824823379517,
      "step": 680
    },
    {
      "epoch": 0.6650390625,
      "grad_norm": 0.4826144278049469,
      "learning_rate": 6.751717369970559e-05,
      "loss": 0.6413211226463318,
      "step": 681
    },
    {
      "epoch": 0.666015625,
      "grad_norm": 0.27521079778671265,
      "learning_rate": 6.732090284592739e-05,
      "loss": 0.5747159123420715,
      "step": 682
    },
    {
      "epoch": 0.6669921875,
      "grad_norm": 0.3745660185813904,
      "learning_rate": 6.712463199214917e-05,
      "loss": 0.414341002702713,
      "step": 683
    },
    {
      "epoch": 0.66796875,
      "grad_norm": 0.45048731565475464,
      "learning_rate": 6.692836113837095e-05,
      "loss": 0.3665570318698883,
      "step": 684
    },
    {
      "epoch": 0.6689453125,
      "grad_norm": 0.5048633217811584,
      "learning_rate": 6.673209028459275e-05,
      "loss": 0.5923498272895813,
      "step": 685
    },
    {
      "epoch": 0.669921875,
      "grad_norm": 0.46423155069351196,
      "learning_rate": 6.653581943081453e-05,
      "loss": 0.7506915330886841,
      "step": 686
    },
    {
      "epoch": 0.6708984375,
      "grad_norm": 0.42965108156204224,
      "learning_rate": 6.633954857703631e-05,
      "loss": 0.7576399445533752,
      "step": 687
    },
    {
      "epoch": 0.671875,
      "grad_norm": 0.48331597447395325,
      "learning_rate": 6.614327772325811e-05,
      "loss": 0.5249682068824768,
      "step": 688
    },
    {
      "epoch": 0.6728515625,
      "grad_norm": 0.4685790240764618,
      "learning_rate": 6.594700686947989e-05,
      "loss": 0.8056750297546387,
      "step": 689
    },
    {
      "epoch": 0.673828125,
      "grad_norm": 0.46440044045448303,
      "learning_rate": 6.575073601570167e-05,
      "loss": 0.9252493381500244,
      "step": 690
    },
    {
      "epoch": 0.6748046875,
      "grad_norm": 0.46564289927482605,
      "learning_rate": 6.555446516192347e-05,
      "loss": 0.8182022571563721,
      "step": 691
    },
    {
      "epoch": 0.67578125,
      "grad_norm": 0.4397750496864319,
      "learning_rate": 6.535819430814525e-05,
      "loss": 0.7928388118743896,
      "step": 692
    },
    {
      "epoch": 0.6767578125,
      "grad_norm": 0.3233174681663513,
      "learning_rate": 6.516192345436702e-05,
      "loss": 0.5252426862716675,
      "step": 693
    },
    {
      "epoch": 0.677734375,
      "grad_norm": 0.6012148857116699,
      "learning_rate": 6.496565260058882e-05,
      "loss": 0.44195663928985596,
      "step": 694
    },
    {
      "epoch": 0.6787109375,
      "grad_norm": 0.6329052448272705,
      "learning_rate": 6.47693817468106e-05,
      "loss": 0.5354570150375366,
      "step": 695
    },
    {
      "epoch": 0.6796875,
      "grad_norm": 0.47926270961761475,
      "learning_rate": 6.457311089303238e-05,
      "loss": 0.4950491786003113,
      "step": 696
    },
    {
      "epoch": 0.6806640625,
      "grad_norm": 0.5051383972167969,
      "learning_rate": 6.437684003925418e-05,
      "loss": 0.6795849204063416,
      "step": 697
    },
    {
      "epoch": 0.681640625,
      "grad_norm": 0.4022398591041565,
      "learning_rate": 6.418056918547596e-05,
      "loss": 1.0388166904449463,
      "step": 698
    },
    {
      "epoch": 0.6826171875,
      "grad_norm": 0.4309573471546173,
      "learning_rate": 6.398429833169774e-05,
      "loss": 0.6022897362709045,
      "step": 699
    },
    {
      "epoch": 0.68359375,
      "grad_norm": 0.3301983177661896,
      "learning_rate": 6.378802747791954e-05,
      "loss": 0.6451660394668579,
      "step": 700
    },
    {
      "epoch": 0.6845703125,
      "grad_norm": 0.6647156476974487,
      "learning_rate": 6.359175662414132e-05,
      "loss": 0.9699732661247253,
      "step": 701
    },
    {
      "epoch": 0.685546875,
      "grad_norm": 0.37545597553253174,
      "learning_rate": 6.33954857703631e-05,
      "loss": 0.43181508779525757,
      "step": 702
    },
    {
      "epoch": 0.6865234375,
      "grad_norm": 0.40882429480552673,
      "learning_rate": 6.31992149165849e-05,
      "loss": 0.665264368057251,
      "step": 703
    },
    {
      "epoch": 0.6875,
      "grad_norm": 0.46597936749458313,
      "learning_rate": 6.300294406280668e-05,
      "loss": 0.8813620209693909,
      "step": 704
    },
    {
      "epoch": 0.6884765625,
      "grad_norm": 0.4355461597442627,
      "learning_rate": 6.280667320902846e-05,
      "loss": 0.595770537853241,
      "step": 705
    },
    {
      "epoch": 0.689453125,
      "grad_norm": 0.45896056294441223,
      "learning_rate": 6.261040235525026e-05,
      "loss": 0.7571601271629333,
      "step": 706
    },
    {
      "epoch": 0.6904296875,
      "grad_norm": 0.37643495202064514,
      "learning_rate": 6.241413150147204e-05,
      "loss": 0.47930869460105896,
      "step": 707
    },
    {
      "epoch": 0.69140625,
      "grad_norm": 0.49690738320350647,
      "learning_rate": 6.221786064769381e-05,
      "loss": 0.3727263808250427,
      "step": 708
    },
    {
      "epoch": 0.6923828125,
      "grad_norm": 0.44111907482147217,
      "learning_rate": 6.20215897939156e-05,
      "loss": 0.7276532649993896,
      "step": 709
    },
    {
      "epoch": 0.693359375,
      "grad_norm": 0.44872644543647766,
      "learning_rate": 6.182531894013739e-05,
      "loss": 0.5082123279571533,
      "step": 710
    },
    {
      "epoch": 0.6943359375,
      "grad_norm": 0.3345314562320709,
      "learning_rate": 6.162904808635917e-05,
      "loss": 0.5472716093063354,
      "step": 711
    },
    {
      "epoch": 0.6953125,
      "grad_norm": 0.4269154667854309,
      "learning_rate": 6.143277723258097e-05,
      "loss": 0.7036910057067871,
      "step": 712
    },
    {
      "epoch": 0.6962890625,
      "grad_norm": 0.5314676761627197,
      "learning_rate": 6.123650637880275e-05,
      "loss": 0.8663474917411804,
      "step": 713
    },
    {
      "epoch": 0.697265625,
      "grad_norm": 0.2820166349411011,
      "learning_rate": 6.104023552502453e-05,
      "loss": 0.6397068500518799,
      "step": 714
    },
    {
      "epoch": 0.6982421875,
      "grad_norm": 0.40954726934432983,
      "learning_rate": 6.084396467124632e-05,
      "loss": 0.5477964282035828,
      "step": 715
    },
    {
      "epoch": 0.69921875,
      "grad_norm": 0.6858615279197693,
      "learning_rate": 6.064769381746811e-05,
      "loss": 0.694764256477356,
      "step": 716
    },
    {
      "epoch": 0.7001953125,
      "grad_norm": 2.901998281478882,
      "learning_rate": 6.04514229636899e-05,
      "loss": 0.5803335309028625,
      "step": 717
    },
    {
      "epoch": 0.701171875,
      "grad_norm": 0.6065869927406311,
      "learning_rate": 6.025515210991168e-05,
      "loss": 0.49790292978286743,
      "step": 718
    },
    {
      "epoch": 0.7021484375,
      "grad_norm": 0.3678690195083618,
      "learning_rate": 6.005888125613347e-05,
      "loss": 0.38595882058143616,
      "step": 719
    },
    {
      "epoch": 0.703125,
      "grad_norm": 0.32496991753578186,
      "learning_rate": 5.986261040235526e-05,
      "loss": 0.3554360866546631,
      "step": 720
    },
    {
      "epoch": 0.7041015625,
      "grad_norm": 0.5348960161209106,
      "learning_rate": 5.966633954857704e-05,
      "loss": 1.0386948585510254,
      "step": 721
    },
    {
      "epoch": 0.705078125,
      "grad_norm": 0.42248818278312683,
      "learning_rate": 5.947006869479883e-05,
      "loss": 0.4950508177280426,
      "step": 722
    },
    {
      "epoch": 0.7060546875,
      "grad_norm": 0.36575669050216675,
      "learning_rate": 5.9273797841020606e-05,
      "loss": 0.8793643712997437,
      "step": 723
    },
    {
      "epoch": 0.70703125,
      "grad_norm": 0.30802977085113525,
      "learning_rate": 5.9077526987242395e-05,
      "loss": 0.7557331919670105,
      "step": 724
    },
    {
      "epoch": 0.7080078125,
      "grad_norm": 0.36057788133621216,
      "learning_rate": 5.888125613346418e-05,
      "loss": 0.793386697769165,
      "step": 725
    },
    {
      "epoch": 0.708984375,
      "grad_norm": 0.5049283504486084,
      "learning_rate": 5.8684985279685966e-05,
      "loss": 0.3805343210697174,
      "step": 726
    },
    {
      "epoch": 0.7099609375,
      "grad_norm": 0.4448167681694031,
      "learning_rate": 5.8488714425907756e-05,
      "loss": 0.8297110199928284,
      "step": 727
    },
    {
      "epoch": 0.7109375,
      "grad_norm": 0.5144803524017334,
      "learning_rate": 5.829244357212954e-05,
      "loss": 0.8582932949066162,
      "step": 728
    },
    {
      "epoch": 0.7119140625,
      "grad_norm": 0.48559248447418213,
      "learning_rate": 5.809617271835133e-05,
      "loss": 0.851997971534729,
      "step": 729
    },
    {
      "epoch": 0.712890625,
      "grad_norm": 0.5277959704399109,
      "learning_rate": 5.7899901864573116e-05,
      "loss": 0.8560271859169006,
      "step": 730
    },
    {
      "epoch": 0.7138671875,
      "grad_norm": 0.39055025577545166,
      "learning_rate": 5.77036310107949e-05,
      "loss": 0.5023626685142517,
      "step": 731
    },
    {
      "epoch": 0.71484375,
      "grad_norm": 0.4014328718185425,
      "learning_rate": 5.750736015701669e-05,
      "loss": 0.7782986760139465,
      "step": 732
    },
    {
      "epoch": 0.7158203125,
      "grad_norm": 0.9840988516807556,
      "learning_rate": 5.731108930323848e-05,
      "loss": 0.5097107887268066,
      "step": 733
    },
    {
      "epoch": 0.716796875,
      "grad_norm": 0.512140691280365,
      "learning_rate": 5.711481844946026e-05,
      "loss": 0.5448895692825317,
      "step": 734
    },
    {
      "epoch": 0.7177734375,
      "grad_norm": 0.45195046067237854,
      "learning_rate": 5.691854759568205e-05,
      "loss": 0.7583330273628235,
      "step": 735
    },
    {
      "epoch": 0.71875,
      "grad_norm": 0.4155009090900421,
      "learning_rate": 5.672227674190384e-05,
      "loss": 0.5220797061920166,
      "step": 736
    },
    {
      "epoch": 0.7197265625,
      "grad_norm": 0.552148699760437,
      "learning_rate": 5.652600588812562e-05,
      "loss": 0.8043540716171265,
      "step": 737
    },
    {
      "epoch": 0.720703125,
      "grad_norm": 0.30510297417640686,
      "learning_rate": 5.6329735034347396e-05,
      "loss": 0.5110808610916138,
      "step": 738
    },
    {
      "epoch": 0.7216796875,
      "grad_norm": 0.522339940071106,
      "learning_rate": 5.6133464180569185e-05,
      "loss": 1.0245096683502197,
      "step": 739
    },
    {
      "epoch": 0.72265625,
      "grad_norm": 0.27751341462135315,
      "learning_rate": 5.5937193326790974e-05,
      "loss": 0.6376601457595825,
      "step": 740
    },
    {
      "epoch": 0.7236328125,
      "grad_norm": 0.4283340573310852,
      "learning_rate": 5.5740922473012756e-05,
      "loss": 1.1317777633666992,
      "step": 741
    },
    {
      "epoch": 0.724609375,
      "grad_norm": 0.541248619556427,
      "learning_rate": 5.5544651619234545e-05,
      "loss": 0.8086187839508057,
      "step": 742
    },
    {
      "epoch": 0.7255859375,
      "grad_norm": 0.24750906229019165,
      "learning_rate": 5.5348380765456335e-05,
      "loss": 0.4873177409172058,
      "step": 743
    },
    {
      "epoch": 0.7265625,
      "grad_norm": 0.42374616861343384,
      "learning_rate": 5.515210991167812e-05,
      "loss": 0.41606956720352173,
      "step": 744
    },
    {
      "epoch": 0.7275390625,
      "grad_norm": 0.35455161333084106,
      "learning_rate": 5.4955839057899906e-05,
      "loss": 0.49936947226524353,
      "step": 745
    },
    {
      "epoch": 0.728515625,
      "grad_norm": 0.4243617653846741,
      "learning_rate": 5.475956820412169e-05,
      "loss": 0.6650359630584717,
      "step": 746
    },
    {
      "epoch": 0.7294921875,
      "grad_norm": 0.4106060862541199,
      "learning_rate": 5.456329735034348e-05,
      "loss": 0.37870654463768005,
      "step": 747
    },
    {
      "epoch": 0.73046875,
      "grad_norm": 0.3536394536495209,
      "learning_rate": 5.436702649656527e-05,
      "loss": 1.0944924354553223,
      "step": 748
    },
    {
      "epoch": 0.7314453125,
      "grad_norm": 0.3067559003829956,
      "learning_rate": 5.417075564278705e-05,
      "loss": 0.6380996704101562,
      "step": 749
    },
    {
      "epoch": 0.732421875,
      "grad_norm": 0.40423691272735596,
      "learning_rate": 5.397448478900884e-05,
      "loss": 0.712358295917511,
      "step": 750
    },
    {
      "epoch": 0.7333984375,
      "grad_norm": 0.451038658618927,
      "learning_rate": 5.377821393523063e-05,
      "loss": 0.6221305727958679,
      "step": 751
    },
    {
      "epoch": 0.734375,
      "grad_norm": 0.32606229186058044,
      "learning_rate": 5.35819430814524e-05,
      "loss": 0.6600078344345093,
      "step": 752
    },
    {
      "epoch": 0.7353515625,
      "grad_norm": 0.746896505355835,
      "learning_rate": 5.3385672227674185e-05,
      "loss": 0.5533967614173889,
      "step": 753
    },
    {
      "epoch": 0.736328125,
      "grad_norm": 0.403277724981308,
      "learning_rate": 5.3189401373895974e-05,
      "loss": 0.7483388185501099,
      "step": 754
    },
    {
      "epoch": 0.7373046875,
      "grad_norm": 0.6016709208488464,
      "learning_rate": 5.2993130520117764e-05,
      "loss": 0.539909839630127,
      "step": 755
    },
    {
      "epoch": 0.73828125,
      "grad_norm": 0.39885231852531433,
      "learning_rate": 5.2796859666339546e-05,
      "loss": 0.7900533676147461,
      "step": 756
    },
    {
      "epoch": 0.7392578125,
      "grad_norm": 0.3245362639427185,
      "learning_rate": 5.2600588812561335e-05,
      "loss": 0.42862433195114136,
      "step": 757
    },
    {
      "epoch": 0.740234375,
      "grad_norm": 0.47334104776382446,
      "learning_rate": 5.2404317958783124e-05,
      "loss": 0.3249909281730652,
      "step": 758
    },
    {
      "epoch": 0.7412109375,
      "grad_norm": 0.3029737174510956,
      "learning_rate": 5.220804710500491e-05,
      "loss": 0.4264957308769226,
      "step": 759
    },
    {
      "epoch": 0.7421875,
      "grad_norm": 0.33878564834594727,
      "learning_rate": 5.2011776251226696e-05,
      "loss": 0.4446904957294464,
      "step": 760
    },
    {
      "epoch": 0.7431640625,
      "grad_norm": 0.3307798206806183,
      "learning_rate": 5.1815505397448485e-05,
      "loss": 0.461605966091156,
      "step": 761
    },
    {
      "epoch": 0.744140625,
      "grad_norm": 0.4146850109100342,
      "learning_rate": 5.161923454367027e-05,
      "loss": 0.758568525314331,
      "step": 762
    },
    {
      "epoch": 0.7451171875,
      "grad_norm": 0.3531327545642853,
      "learning_rate": 5.1422963689892056e-05,
      "loss": 0.4580535292625427,
      "step": 763
    },
    {
      "epoch": 0.74609375,
      "grad_norm": 0.3952695429325104,
      "learning_rate": 5.1226692836113846e-05,
      "loss": 0.333244651556015,
      "step": 764
    },
    {
      "epoch": 0.7470703125,
      "grad_norm": 0.5774162411689758,
      "learning_rate": 5.103042198233563e-05,
      "loss": 0.6433362364768982,
      "step": 765
    },
    {
      "epoch": 0.748046875,
      "grad_norm": 0.49668964743614197,
      "learning_rate": 5.083415112855742e-05,
      "loss": 0.8478100895881653,
      "step": 766
    },
    {
      "epoch": 0.7490234375,
      "grad_norm": 0.3303810954093933,
      "learning_rate": 5.063788027477919e-05,
      "loss": 0.7296837568283081,
      "step": 767
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.27652832865715027,
      "learning_rate": 5.044160942100098e-05,
      "loss": 0.6442312598228455,
      "step": 768
    },
    {
      "epoch": 0.7509765625,
      "grad_norm": 1.0828924179077148,
      "learning_rate": 5.0245338567222764e-05,
      "loss": 0.9848635196685791,
      "step": 769
    },
    {
      "epoch": 0.751953125,
      "grad_norm": 0.38959333300590515,
      "learning_rate": 5.0049067713444553e-05,
      "loss": 0.722776472568512,
      "step": 770
    },
    {
      "epoch": 0.7529296875,
      "grad_norm": 0.3470323383808136,
      "learning_rate": 4.985279685966634e-05,
      "loss": 0.6584157943725586,
      "step": 771
    },
    {
      "epoch": 0.75390625,
      "grad_norm": 0.4060254693031311,
      "learning_rate": 4.9656526005888125e-05,
      "loss": 0.6276923418045044,
      "step": 772
    },
    {
      "epoch": 0.7548828125,
      "grad_norm": 0.34566962718963623,
      "learning_rate": 4.9460255152109914e-05,
      "loss": 0.972516655921936,
      "step": 773
    },
    {
      "epoch": 0.755859375,
      "grad_norm": 0.41829708218574524,
      "learning_rate": 4.92639842983317e-05,
      "loss": 0.6937177181243896,
      "step": 774
    },
    {
      "epoch": 0.7568359375,
      "grad_norm": 0.7653974294662476,
      "learning_rate": 4.9067713444553486e-05,
      "loss": 0.6027823090553284,
      "step": 775
    },
    {
      "epoch": 0.7578125,
      "grad_norm": 1.0477155447006226,
      "learning_rate": 4.8871442590775275e-05,
      "loss": 0.925806999206543,
      "step": 776
    },
    {
      "epoch": 0.7587890625,
      "grad_norm": 0.43484824895858765,
      "learning_rate": 4.8675171736997064e-05,
      "loss": 0.7783142328262329,
      "step": 777
    },
    {
      "epoch": 0.759765625,
      "grad_norm": 0.33719849586486816,
      "learning_rate": 4.847890088321884e-05,
      "loss": 0.6108527779579163,
      "step": 778
    },
    {
      "epoch": 0.7607421875,
      "grad_norm": 0.3983028531074524,
      "learning_rate": 4.828263002944063e-05,
      "loss": 0.9976012706756592,
      "step": 779
    },
    {
      "epoch": 0.76171875,
      "grad_norm": 0.3278787136077881,
      "learning_rate": 4.808635917566242e-05,
      "loss": 0.5754845142364502,
      "step": 780
    },
    {
      "epoch": 0.7626953125,
      "grad_norm": 0.42433467507362366,
      "learning_rate": 4.78900883218842e-05,
      "loss": 0.8455826640129089,
      "step": 781
    },
    {
      "epoch": 0.763671875,
      "grad_norm": 0.33245334029197693,
      "learning_rate": 4.769381746810599e-05,
      "loss": 0.5207083225250244,
      "step": 782
    },
    {
      "epoch": 0.7646484375,
      "grad_norm": 0.4390372931957245,
      "learning_rate": 4.749754661432778e-05,
      "loss": 0.7208432555198669,
      "step": 783
    },
    {
      "epoch": 0.765625,
      "grad_norm": 0.325720876455307,
      "learning_rate": 4.730127576054956e-05,
      "loss": 0.3017955422401428,
      "step": 784
    },
    {
      "epoch": 0.7666015625,
      "grad_norm": 0.3036203980445862,
      "learning_rate": 4.710500490677135e-05,
      "loss": 0.47869423031806946,
      "step": 785
    },
    {
      "epoch": 0.767578125,
      "grad_norm": 0.4316065013408661,
      "learning_rate": 4.690873405299313e-05,
      "loss": 0.7984920740127563,
      "step": 786
    },
    {
      "epoch": 0.7685546875,
      "grad_norm": 0.46907728910446167,
      "learning_rate": 4.6712463199214915e-05,
      "loss": 0.7288491725921631,
      "step": 787
    },
    {
      "epoch": 0.76953125,
      "grad_norm": 0.38269418478012085,
      "learning_rate": 4.6516192345436704e-05,
      "loss": 0.46745771169662476,
      "step": 788
    },
    {
      "epoch": 0.7705078125,
      "grad_norm": 0.6045718193054199,
      "learning_rate": 4.631992149165849e-05,
      "loss": 0.5405256152153015,
      "step": 789
    },
    {
      "epoch": 0.771484375,
      "grad_norm": 0.3303053677082062,
      "learning_rate": 4.6123650637880275e-05,
      "loss": 0.6721948981285095,
      "step": 790
    },
    {
      "epoch": 0.7724609375,
      "grad_norm": 0.42014074325561523,
      "learning_rate": 4.5927379784102065e-05,
      "loss": 0.9322581887245178,
      "step": 791
    },
    {
      "epoch": 0.7734375,
      "grad_norm": 0.3720149099826813,
      "learning_rate": 4.5731108930323854e-05,
      "loss": 0.7807843685150146,
      "step": 792
    },
    {
      "epoch": 0.7744140625,
      "grad_norm": 0.31559938192367554,
      "learning_rate": 4.5534838076545636e-05,
      "loss": 0.8503724336624146,
      "step": 793
    },
    {
      "epoch": 0.775390625,
      "grad_norm": 0.4096013903617859,
      "learning_rate": 4.533856722276742e-05,
      "loss": 0.6950633525848389,
      "step": 794
    },
    {
      "epoch": 0.7763671875,
      "grad_norm": 0.3791837990283966,
      "learning_rate": 4.514229636898921e-05,
      "loss": 0.7583197951316833,
      "step": 795
    },
    {
      "epoch": 0.77734375,
      "grad_norm": 0.5274584889411926,
      "learning_rate": 4.494602551521099e-05,
      "loss": 0.4712093770503998,
      "step": 796
    },
    {
      "epoch": 0.7783203125,
      "grad_norm": 0.29654791951179504,
      "learning_rate": 4.474975466143278e-05,
      "loss": 0.552979588508606,
      "step": 797
    },
    {
      "epoch": 0.779296875,
      "grad_norm": 0.25629475712776184,
      "learning_rate": 4.455348380765457e-05,
      "loss": 0.5225521922111511,
      "step": 798
    },
    {
      "epoch": 0.7802734375,
      "grad_norm": 0.2676495611667633,
      "learning_rate": 4.435721295387635e-05,
      "loss": 0.4382556080818176,
      "step": 799
    },
    {
      "epoch": 0.78125,
      "grad_norm": 0.4117366075515747,
      "learning_rate": 4.416094210009813e-05,
      "loss": 0.5639417767524719,
      "step": 800
    },
    {
      "epoch": 0.7822265625,
      "grad_norm": 0.26305386424064636,
      "learning_rate": 4.396467124631992e-05,
      "loss": 0.28840768337249756,
      "step": 801
    },
    {
      "epoch": 0.783203125,
      "grad_norm": 0.7253789305686951,
      "learning_rate": 4.376840039254171e-05,
      "loss": 0.4104336202144623,
      "step": 802
    },
    {
      "epoch": 0.7841796875,
      "grad_norm": 0.371288001537323,
      "learning_rate": 4.3572129538763494e-05,
      "loss": 0.609147310256958,
      "step": 803
    },
    {
      "epoch": 0.78515625,
      "grad_norm": 0.634273111820221,
      "learning_rate": 4.337585868498528e-05,
      "loss": 0.5141665935516357,
      "step": 804
    },
    {
      "epoch": 0.7861328125,
      "grad_norm": 0.4442044496536255,
      "learning_rate": 4.317958783120707e-05,
      "loss": 0.4882044494152069,
      "step": 805
    },
    {
      "epoch": 0.787109375,
      "grad_norm": 0.3099007308483124,
      "learning_rate": 4.2983316977428854e-05,
      "loss": 0.3148588538169861,
      "step": 806
    },
    {
      "epoch": 0.7880859375,
      "grad_norm": 0.41893890500068665,
      "learning_rate": 4.2787046123650643e-05,
      "loss": 0.6678078174591064,
      "step": 807
    },
    {
      "epoch": 0.7890625,
      "grad_norm": 0.47682809829711914,
      "learning_rate": 4.2590775269872426e-05,
      "loss": 0.46614763140678406,
      "step": 808
    },
    {
      "epoch": 0.7900390625,
      "grad_norm": 0.25193366408348083,
      "learning_rate": 4.239450441609421e-05,
      "loss": 0.3707652986049652,
      "step": 809
    },
    {
      "epoch": 0.791015625,
      "grad_norm": 0.3425232768058777,
      "learning_rate": 4.2198233562316e-05,
      "loss": 0.604179859161377,
      "step": 810
    },
    {
      "epoch": 0.7919921875,
      "grad_norm": 0.31459808349609375,
      "learning_rate": 4.2001962708537786e-05,
      "loss": 0.748989999294281,
      "step": 811
    },
    {
      "epoch": 0.79296875,
      "grad_norm": 0.3478514850139618,
      "learning_rate": 4.180569185475957e-05,
      "loss": 0.6651142835617065,
      "step": 812
    },
    {
      "epoch": 0.7939453125,
      "grad_norm": 0.3951675295829773,
      "learning_rate": 4.160942100098136e-05,
      "loss": 0.7293418049812317,
      "step": 813
    },
    {
      "epoch": 0.794921875,
      "grad_norm": 0.26888158917427063,
      "learning_rate": 4.141315014720315e-05,
      "loss": 0.2181730419397354,
      "step": 814
    },
    {
      "epoch": 0.7958984375,
      "grad_norm": 0.17496585845947266,
      "learning_rate": 4.121687929342492e-05,
      "loss": 0.18257993459701538,
      "step": 815
    },
    {
      "epoch": 0.796875,
      "grad_norm": 0.3386918306350708,
      "learning_rate": 4.102060843964671e-05,
      "loss": 0.43010956048965454,
      "step": 816
    },
    {
      "epoch": 0.7978515625,
      "grad_norm": 0.5185137987136841,
      "learning_rate": 4.08243375858685e-05,
      "loss": 0.9117882251739502,
      "step": 817
    },
    {
      "epoch": 0.798828125,
      "grad_norm": 0.499529093503952,
      "learning_rate": 4.0628066732090283e-05,
      "loss": 0.8601939678192139,
      "step": 818
    },
    {
      "epoch": 0.7998046875,
      "grad_norm": 0.44401317834854126,
      "learning_rate": 4.043179587831207e-05,
      "loss": 0.8643960356712341,
      "step": 819
    },
    {
      "epoch": 0.80078125,
      "grad_norm": 0.30553653836250305,
      "learning_rate": 4.023552502453386e-05,
      "loss": 0.7741817235946655,
      "step": 820
    },
    {
      "epoch": 0.8017578125,
      "grad_norm": 0.443541944026947,
      "learning_rate": 4.0039254170755644e-05,
      "loss": 0.9571224451065063,
      "step": 821
    },
    {
      "epoch": 0.802734375,
      "grad_norm": 0.2611587643623352,
      "learning_rate": 3.9842983316977426e-05,
      "loss": 0.4755222201347351,
      "step": 822
    },
    {
      "epoch": 0.8037109375,
      "grad_norm": 0.38695722818374634,
      "learning_rate": 3.9646712463199216e-05,
      "loss": 0.9597996473312378,
      "step": 823
    },
    {
      "epoch": 0.8046875,
      "grad_norm": 0.505346953868866,
      "learning_rate": 3.9450441609421005e-05,
      "loss": 0.328266441822052,
      "step": 824
    },
    {
      "epoch": 0.8056640625,
      "grad_norm": 0.38910478353500366,
      "learning_rate": 3.925417075564279e-05,
      "loss": 0.4758382737636566,
      "step": 825
    },
    {
      "epoch": 0.806640625,
      "grad_norm": 0.4268342852592468,
      "learning_rate": 3.9057899901864576e-05,
      "loss": 0.6131553649902344,
      "step": 826
    },
    {
      "epoch": 0.8076171875,
      "grad_norm": 0.32205328345298767,
      "learning_rate": 3.8861629048086365e-05,
      "loss": 0.6047544479370117,
      "step": 827
    },
    {
      "epoch": 0.80859375,
      "grad_norm": 0.6975948214530945,
      "learning_rate": 3.866535819430815e-05,
      "loss": 0.7599061727523804,
      "step": 828
    },
    {
      "epoch": 0.8095703125,
      "grad_norm": 0.20186780393123627,
      "learning_rate": 3.846908734052994e-05,
      "loss": 0.3639545738697052,
      "step": 829
    },
    {
      "epoch": 0.810546875,
      "grad_norm": 0.443435937166214,
      "learning_rate": 3.827281648675172e-05,
      "loss": 0.6933274269104004,
      "step": 830
    },
    {
      "epoch": 0.8115234375,
      "grad_norm": 0.44157811999320984,
      "learning_rate": 3.80765456329735e-05,
      "loss": 0.5135524272918701,
      "step": 831
    },
    {
      "epoch": 0.8125,
      "grad_norm": 0.3959600031375885,
      "learning_rate": 3.788027477919529e-05,
      "loss": 0.6713152527809143,
      "step": 832
    },
    {
      "epoch": 0.8134765625,
      "grad_norm": 0.5439519882202148,
      "learning_rate": 3.768400392541708e-05,
      "loss": 0.3603706359863281,
      "step": 833
    },
    {
      "epoch": 0.814453125,
      "grad_norm": 0.36693719029426575,
      "learning_rate": 3.748773307163886e-05,
      "loss": 0.8574247360229492,
      "step": 834
    },
    {
      "epoch": 0.8154296875,
      "grad_norm": 0.3476804792881012,
      "learning_rate": 3.729146221786065e-05,
      "loss": 0.6845530867576599,
      "step": 835
    },
    {
      "epoch": 0.81640625,
      "grad_norm": 0.48850229382514954,
      "learning_rate": 3.709519136408244e-05,
      "loss": 0.788569450378418,
      "step": 836
    },
    {
      "epoch": 0.8173828125,
      "grad_norm": 0.5997111797332764,
      "learning_rate": 3.6898920510304216e-05,
      "loss": 0.5885312557220459,
      "step": 837
    },
    {
      "epoch": 0.818359375,
      "grad_norm": 0.43312472105026245,
      "learning_rate": 3.6702649656526005e-05,
      "loss": 0.5300126075744629,
      "step": 838
    },
    {
      "epoch": 0.8193359375,
      "grad_norm": 0.6505857110023499,
      "learning_rate": 3.6506378802747795e-05,
      "loss": 0.7164736986160278,
      "step": 839
    },
    {
      "epoch": 0.8203125,
      "grad_norm": 0.34061765670776367,
      "learning_rate": 3.631010794896958e-05,
      "loss": 0.5405696034431458,
      "step": 840
    },
    {
      "epoch": 0.8212890625,
      "grad_norm": 0.4188057780265808,
      "learning_rate": 3.6113837095191366e-05,
      "loss": 1.0057684183120728,
      "step": 841
    },
    {
      "epoch": 0.822265625,
      "grad_norm": 0.392007052898407,
      "learning_rate": 3.5917566241413155e-05,
      "loss": 0.6687936782836914,
      "step": 842
    },
    {
      "epoch": 0.8232421875,
      "grad_norm": 0.44254210591316223,
      "learning_rate": 3.572129538763494e-05,
      "loss": 0.39150726795196533,
      "step": 843
    },
    {
      "epoch": 0.82421875,
      "grad_norm": 0.41756534576416016,
      "learning_rate": 3.552502453385673e-05,
      "loss": 0.764665961265564,
      "step": 844
    },
    {
      "epoch": 0.8251953125,
      "grad_norm": 0.9839560985565186,
      "learning_rate": 3.532875368007851e-05,
      "loss": 0.45259296894073486,
      "step": 845
    },
    {
      "epoch": 0.826171875,
      "grad_norm": 0.3465111553668976,
      "learning_rate": 3.513248282630029e-05,
      "loss": 0.5895928740501404,
      "step": 846
    },
    {
      "epoch": 0.8271484375,
      "grad_norm": 0.4883447289466858,
      "learning_rate": 3.493621197252208e-05,
      "loss": 0.8401346802711487,
      "step": 847
    },
    {
      "epoch": 0.828125,
      "grad_norm": 0.3590312898159027,
      "learning_rate": 3.473994111874387e-05,
      "loss": 0.6134470105171204,
      "step": 848
    },
    {
      "epoch": 0.8291015625,
      "grad_norm": 0.48273324966430664,
      "learning_rate": 3.454367026496565e-05,
      "loss": 0.6351644992828369,
      "step": 849
    },
    {
      "epoch": 0.830078125,
      "grad_norm": 0.32156500220298767,
      "learning_rate": 3.434739941118744e-05,
      "loss": 0.5098355412483215,
      "step": 850
    },
    {
      "epoch": 0.8310546875,
      "grad_norm": 0.38239747285842896,
      "learning_rate": 3.415112855740923e-05,
      "loss": 1.0178660154342651,
      "step": 851
    },
    {
      "epoch": 0.83203125,
      "grad_norm": 0.6875290274620056,
      "learning_rate": 3.395485770363101e-05,
      "loss": 0.4496825337409973,
      "step": 852
    },
    {
      "epoch": 0.8330078125,
      "grad_norm": 0.27034860849380493,
      "learning_rate": 3.3758586849852795e-05,
      "loss": 0.41253381967544556,
      "step": 853
    },
    {
      "epoch": 0.833984375,
      "grad_norm": 0.5166223049163818,
      "learning_rate": 3.3562315996074584e-05,
      "loss": 0.7344639897346497,
      "step": 854
    },
    {
      "epoch": 0.8349609375,
      "grad_norm": 0.39597758650779724,
      "learning_rate": 3.3366045142296373e-05,
      "loss": 0.6066821217536926,
      "step": 855
    },
    {
      "epoch": 0.8359375,
      "grad_norm": 0.44033098220825195,
      "learning_rate": 3.3169774288518156e-05,
      "loss": 0.7928174734115601,
      "step": 856
    },
    {
      "epoch": 0.8369140625,
      "grad_norm": 0.3340597450733185,
      "learning_rate": 3.2973503434739945e-05,
      "loss": 0.4783233404159546,
      "step": 857
    },
    {
      "epoch": 0.837890625,
      "grad_norm": 0.5634653568267822,
      "learning_rate": 3.2777232580961734e-05,
      "loss": 0.785845935344696,
      "step": 858
    },
    {
      "epoch": 0.8388671875,
      "grad_norm": 0.24581296741962433,
      "learning_rate": 3.258096172718351e-05,
      "loss": 0.36480462551116943,
      "step": 859
    },
    {
      "epoch": 0.83984375,
      "grad_norm": 0.316773384809494,
      "learning_rate": 3.23846908734053e-05,
      "loss": 0.886894941329956,
      "step": 860
    },
    {
      "epoch": 0.8408203125,
      "grad_norm": 0.4605409502983093,
      "learning_rate": 3.218842001962709e-05,
      "loss": 0.7125131487846375,
      "step": 861
    },
    {
      "epoch": 0.841796875,
      "grad_norm": 0.5473557114601135,
      "learning_rate": 3.199214916584887e-05,
      "loss": 0.45582157373428345,
      "step": 862
    },
    {
      "epoch": 0.8427734375,
      "grad_norm": 0.4604926109313965,
      "learning_rate": 3.179587831207066e-05,
      "loss": 0.5392733812332153,
      "step": 863
    },
    {
      "epoch": 0.84375,
      "grad_norm": 0.3192322552204132,
      "learning_rate": 3.159960745829245e-05,
      "loss": 0.3216538727283478,
      "step": 864
    },
    {
      "epoch": 0.8447265625,
      "grad_norm": 0.4225713610649109,
      "learning_rate": 3.140333660451423e-05,
      "loss": 0.36403900384902954,
      "step": 865
    },
    {
      "epoch": 0.845703125,
      "grad_norm": 0.7738484740257263,
      "learning_rate": 3.120706575073602e-05,
      "loss": 0.5428112149238586,
      "step": 866
    },
    {
      "epoch": 0.8466796875,
      "grad_norm": 0.7795976400375366,
      "learning_rate": 3.10107948969578e-05,
      "loss": 0.838668704032898,
      "step": 867
    },
    {
      "epoch": 0.84765625,
      "grad_norm": 0.4240044355392456,
      "learning_rate": 3.0814524043179585e-05,
      "loss": 0.5039677023887634,
      "step": 868
    },
    {
      "epoch": 0.8486328125,
      "grad_norm": 0.7870606780052185,
      "learning_rate": 3.0618253189401374e-05,
      "loss": 0.2639703154563904,
      "step": 869
    },
    {
      "epoch": 0.849609375,
      "grad_norm": 4.898192405700684,
      "learning_rate": 3.042198233562316e-05,
      "loss": 0.9641809463500977,
      "step": 870
    },
    {
      "epoch": 0.8505859375,
      "grad_norm": 0.4090663194656372,
      "learning_rate": 3.022571148184495e-05,
      "loss": 0.5249053835868835,
      "step": 871
    },
    {
      "epoch": 0.8515625,
      "grad_norm": 0.5761129856109619,
      "learning_rate": 3.0029440628066735e-05,
      "loss": 0.8987921476364136,
      "step": 872
    },
    {
      "epoch": 0.8525390625,
      "grad_norm": 0.2440023124217987,
      "learning_rate": 2.983316977428852e-05,
      "loss": 0.3279159367084503,
      "step": 873
    },
    {
      "epoch": 0.853515625,
      "grad_norm": 0.438519150018692,
      "learning_rate": 2.9636898920510303e-05,
      "loss": 0.8272308111190796,
      "step": 874
    },
    {
      "epoch": 0.8544921875,
      "grad_norm": 0.4011988639831543,
      "learning_rate": 2.944062806673209e-05,
      "loss": 0.3140803873538971,
      "step": 875
    },
    {
      "epoch": 0.85546875,
      "grad_norm": 0.5748201012611389,
      "learning_rate": 2.9244357212953878e-05,
      "loss": 0.6699116230010986,
      "step": 876
    },
    {
      "epoch": 0.8564453125,
      "grad_norm": 0.3001462519168854,
      "learning_rate": 2.9048086359175664e-05,
      "loss": 0.19382989406585693,
      "step": 877
    },
    {
      "epoch": 0.857421875,
      "grad_norm": 0.40844887495040894,
      "learning_rate": 2.885181550539745e-05,
      "loss": 0.6494845747947693,
      "step": 878
    },
    {
      "epoch": 0.8583984375,
      "grad_norm": 0.3480914235115051,
      "learning_rate": 2.865554465161924e-05,
      "loss": 0.5555131435394287,
      "step": 879
    },
    {
      "epoch": 0.859375,
      "grad_norm": 0.3903101682662964,
      "learning_rate": 2.8459273797841024e-05,
      "loss": 0.6830955147743225,
      "step": 880
    },
    {
      "epoch": 0.8603515625,
      "grad_norm": 0.3058629333972931,
      "learning_rate": 2.826300294406281e-05,
      "loss": 0.3747236728668213,
      "step": 881
    },
    {
      "epoch": 0.861328125,
      "grad_norm": 0.49275287985801697,
      "learning_rate": 2.8066732090284592e-05,
      "loss": 1.0192487239837646,
      "step": 882
    },
    {
      "epoch": 0.8623046875,
      "grad_norm": 0.4016769826412201,
      "learning_rate": 2.7870461236506378e-05,
      "loss": 0.4012300372123718,
      "step": 883
    },
    {
      "epoch": 0.86328125,
      "grad_norm": 0.4790811240673065,
      "learning_rate": 2.7674190382728167e-05,
      "loss": 0.6936056613922119,
      "step": 884
    },
    {
      "epoch": 0.8642578125,
      "grad_norm": 0.39931413531303406,
      "learning_rate": 2.7477919528949953e-05,
      "loss": 0.3612633943557739,
      "step": 885
    },
    {
      "epoch": 0.865234375,
      "grad_norm": 0.3250795006752014,
      "learning_rate": 2.728164867517174e-05,
      "loss": 0.5146504640579224,
      "step": 886
    },
    {
      "epoch": 0.8662109375,
      "grad_norm": 0.5216737985610962,
      "learning_rate": 2.7085377821393525e-05,
      "loss": 0.6185201406478882,
      "step": 887
    },
    {
      "epoch": 0.8671875,
      "grad_norm": 0.5681923031806946,
      "learning_rate": 2.6889106967615314e-05,
      "loss": 0.9492973685264587,
      "step": 888
    },
    {
      "epoch": 0.8681640625,
      "grad_norm": 0.5284391045570374,
      "learning_rate": 2.6692836113837093e-05,
      "loss": 0.7801765203475952,
      "step": 889
    },
    {
      "epoch": 0.869140625,
      "grad_norm": 0.42510825395584106,
      "learning_rate": 2.6496565260058882e-05,
      "loss": 0.4871942102909088,
      "step": 890
    },
    {
      "epoch": 0.8701171875,
      "grad_norm": 0.39092326164245605,
      "learning_rate": 2.6300294406280668e-05,
      "loss": 0.5123960375785828,
      "step": 891
    },
    {
      "epoch": 0.87109375,
      "grad_norm": 0.37694281339645386,
      "learning_rate": 2.6104023552502453e-05,
      "loss": 0.3543451428413391,
      "step": 892
    },
    {
      "epoch": 0.8720703125,
      "grad_norm": 0.26519376039505005,
      "learning_rate": 2.5907752698724242e-05,
      "loss": 0.2388455718755722,
      "step": 893
    },
    {
      "epoch": 0.873046875,
      "grad_norm": 0.6303861141204834,
      "learning_rate": 2.5711481844946028e-05,
      "loss": 0.7195224761962891,
      "step": 894
    },
    {
      "epoch": 0.8740234375,
      "grad_norm": 0.4436159133911133,
      "learning_rate": 2.5515210991167814e-05,
      "loss": 0.8888048529624939,
      "step": 895
    },
    {
      "epoch": 0.875,
      "grad_norm": 0.6473313570022583,
      "learning_rate": 2.5318940137389596e-05,
      "loss": 0.8557075262069702,
      "step": 896
    },
    {
      "epoch": 0.8759765625,
      "grad_norm": 0.6625436544418335,
      "learning_rate": 2.5122669283611382e-05,
      "loss": 0.7132158279418945,
      "step": 897
    },
    {
      "epoch": 0.876953125,
      "grad_norm": 0.7241202592849731,
      "learning_rate": 2.492639842983317e-05,
      "loss": 0.9367854595184326,
      "step": 898
    },
    {
      "epoch": 0.8779296875,
      "grad_norm": 0.5321157574653625,
      "learning_rate": 2.4730127576054957e-05,
      "loss": 1.0013937950134277,
      "step": 899
    },
    {
      "epoch": 0.87890625,
      "grad_norm": 0.3287423253059387,
      "learning_rate": 2.4533856722276743e-05,
      "loss": 0.4560258984565735,
      "step": 900
    },
    {
      "epoch": 0.8798828125,
      "grad_norm": 0.5040727257728577,
      "learning_rate": 2.4337585868498532e-05,
      "loss": 0.5655212998390198,
      "step": 901
    },
    {
      "epoch": 0.880859375,
      "grad_norm": 0.4150228202342987,
      "learning_rate": 2.4141315014720314e-05,
      "loss": 0.43106216192245483,
      "step": 902
    },
    {
      "epoch": 0.8818359375,
      "grad_norm": 0.4006192684173584,
      "learning_rate": 2.39450441609421e-05,
      "loss": 0.4401901364326477,
      "step": 903
    },
    {
      "epoch": 0.8828125,
      "grad_norm": 0.5145865678787231,
      "learning_rate": 2.374877330716389e-05,
      "loss": 0.9345691800117493,
      "step": 904
    },
    {
      "epoch": 0.8837890625,
      "grad_norm": 0.7273013591766357,
      "learning_rate": 2.3552502453385675e-05,
      "loss": 0.27768659591674805,
      "step": 905
    },
    {
      "epoch": 0.884765625,
      "grad_norm": 0.3039482831954956,
      "learning_rate": 2.3356231599607457e-05,
      "loss": 0.6196010112762451,
      "step": 906
    },
    {
      "epoch": 0.8857421875,
      "grad_norm": 0.35697150230407715,
      "learning_rate": 2.3159960745829247e-05,
      "loss": 0.34777021408081055,
      "step": 907
    },
    {
      "epoch": 0.88671875,
      "grad_norm": 0.356717050075531,
      "learning_rate": 2.2963689892051032e-05,
      "loss": 0.4651508331298828,
      "step": 908
    },
    {
      "epoch": 0.8876953125,
      "grad_norm": 0.485963374376297,
      "learning_rate": 2.2767419038272818e-05,
      "loss": 0.3906201720237732,
      "step": 909
    },
    {
      "epoch": 0.888671875,
      "grad_norm": 0.38827836513519287,
      "learning_rate": 2.2571148184494604e-05,
      "loss": 0.48782849311828613,
      "step": 910
    },
    {
      "epoch": 0.8896484375,
      "grad_norm": 0.39589494466781616,
      "learning_rate": 2.237487733071639e-05,
      "loss": 0.5089969635009766,
      "step": 911
    },
    {
      "epoch": 0.890625,
      "grad_norm": 0.6619493365287781,
      "learning_rate": 2.2178606476938175e-05,
      "loss": 0.9266189932823181,
      "step": 912
    },
    {
      "epoch": 0.8916015625,
      "grad_norm": 0.407817542552948,
      "learning_rate": 2.198233562315996e-05,
      "loss": 0.3518386483192444,
      "step": 913
    },
    {
      "epoch": 0.892578125,
      "grad_norm": 0.4645719826221466,
      "learning_rate": 2.1786064769381747e-05,
      "loss": 0.9297075271606445,
      "step": 914
    },
    {
      "epoch": 0.8935546875,
      "grad_norm": 0.434517502784729,
      "learning_rate": 2.1589793915603536e-05,
      "loss": 0.7716128826141357,
      "step": 915
    },
    {
      "epoch": 0.89453125,
      "grad_norm": 0.49387747049331665,
      "learning_rate": 2.1393523061825322e-05,
      "loss": 0.5475488901138306,
      "step": 916
    },
    {
      "epoch": 0.8955078125,
      "grad_norm": 0.5593905448913574,
      "learning_rate": 2.1197252208047104e-05,
      "loss": 0.7304456233978271,
      "step": 917
    },
    {
      "epoch": 0.896484375,
      "grad_norm": 0.3386078178882599,
      "learning_rate": 2.1000981354268893e-05,
      "loss": 0.7872465252876282,
      "step": 918
    },
    {
      "epoch": 0.8974609375,
      "grad_norm": 0.2872868478298187,
      "learning_rate": 2.080471050049068e-05,
      "loss": 0.3295198976993561,
      "step": 919
    },
    {
      "epoch": 0.8984375,
      "grad_norm": 0.4897945523262024,
      "learning_rate": 2.060843964671246e-05,
      "loss": 0.3939395546913147,
      "step": 920
    },
    {
      "epoch": 0.8994140625,
      "grad_norm": 0.5068129897117615,
      "learning_rate": 2.041216879293425e-05,
      "loss": 0.4646037817001343,
      "step": 921
    },
    {
      "epoch": 0.900390625,
      "grad_norm": 0.3769625425338745,
      "learning_rate": 2.0215897939156036e-05,
      "loss": 0.811498761177063,
      "step": 922
    },
    {
      "epoch": 0.9013671875,
      "grad_norm": 0.380655974149704,
      "learning_rate": 2.0019627085377822e-05,
      "loss": 0.6260181665420532,
      "step": 923
    },
    {
      "epoch": 0.90234375,
      "grad_norm": 0.5810602903366089,
      "learning_rate": 1.9823356231599608e-05,
      "loss": 0.7125158309936523,
      "step": 924
    },
    {
      "epoch": 0.9033203125,
      "grad_norm": 0.4367387592792511,
      "learning_rate": 1.9627085377821394e-05,
      "loss": 0.7728107571601868,
      "step": 925
    },
    {
      "epoch": 0.904296875,
      "grad_norm": 0.604702353477478,
      "learning_rate": 1.9430814524043183e-05,
      "loss": 0.5136534571647644,
      "step": 926
    },
    {
      "epoch": 0.9052734375,
      "grad_norm": 0.40865615010261536,
      "learning_rate": 1.923454367026497e-05,
      "loss": 0.5040115714073181,
      "step": 927
    },
    {
      "epoch": 0.90625,
      "grad_norm": 0.3602078855037689,
      "learning_rate": 1.903827281648675e-05,
      "loss": 0.4498569965362549,
      "step": 928
    },
    {
      "epoch": 0.9072265625,
      "grad_norm": 0.46351152658462524,
      "learning_rate": 1.884200196270854e-05,
      "loss": 0.8635745644569397,
      "step": 929
    },
    {
      "epoch": 0.908203125,
      "grad_norm": 0.5490495562553406,
      "learning_rate": 1.8645731108930326e-05,
      "loss": 0.9265761375427246,
      "step": 930
    },
    {
      "epoch": 0.9091796875,
      "grad_norm": 0.4198157489299774,
      "learning_rate": 1.8449460255152108e-05,
      "loss": 0.8148217797279358,
      "step": 931
    },
    {
      "epoch": 0.91015625,
      "grad_norm": 0.5183578729629517,
      "learning_rate": 1.8253189401373897e-05,
      "loss": 0.7837534546852112,
      "step": 932
    },
    {
      "epoch": 0.9111328125,
      "grad_norm": 0.41839340329170227,
      "learning_rate": 1.8056918547595683e-05,
      "loss": 0.7239848971366882,
      "step": 933
    },
    {
      "epoch": 0.912109375,
      "grad_norm": 0.49158063530921936,
      "learning_rate": 1.786064769381747e-05,
      "loss": 0.7751527428627014,
      "step": 934
    },
    {
      "epoch": 0.9130859375,
      "grad_norm": 0.20171599090099335,
      "learning_rate": 1.7664376840039255e-05,
      "loss": 0.181843563914299,
      "step": 935
    },
    {
      "epoch": 0.9140625,
      "grad_norm": 0.36237961053848267,
      "learning_rate": 1.746810598626104e-05,
      "loss": 0.5150234699249268,
      "step": 936
    },
    {
      "epoch": 0.9150390625,
      "grad_norm": 0.4587535858154297,
      "learning_rate": 1.7271835132482826e-05,
      "loss": 0.6178685426712036,
      "step": 937
    },
    {
      "epoch": 0.916015625,
      "grad_norm": 0.392635703086853,
      "learning_rate": 1.7075564278704615e-05,
      "loss": 0.7002321481704712,
      "step": 938
    },
    {
      "epoch": 0.9169921875,
      "grad_norm": 0.28255772590637207,
      "learning_rate": 1.6879293424926398e-05,
      "loss": 0.6161627769470215,
      "step": 939
    },
    {
      "epoch": 0.91796875,
      "grad_norm": 0.31382182240486145,
      "learning_rate": 1.6683022571148187e-05,
      "loss": 0.6143029928207397,
      "step": 940
    },
    {
      "epoch": 0.9189453125,
      "grad_norm": 0.5099475383758545,
      "learning_rate": 1.6486751717369972e-05,
      "loss": 0.9116108417510986,
      "step": 941
    },
    {
      "epoch": 0.919921875,
      "grad_norm": 0.4015892446041107,
      "learning_rate": 1.6290480863591755e-05,
      "loss": 0.7331390380859375,
      "step": 942
    },
    {
      "epoch": 0.9208984375,
      "grad_norm": 0.4519053101539612,
      "learning_rate": 1.6094210009813544e-05,
      "loss": 0.6662384867668152,
      "step": 943
    },
    {
      "epoch": 0.921875,
      "grad_norm": 0.5565328598022461,
      "learning_rate": 1.589793915603533e-05,
      "loss": 0.37386590242385864,
      "step": 944
    },
    {
      "epoch": 0.9228515625,
      "grad_norm": 0.398419588804245,
      "learning_rate": 1.5701668302257116e-05,
      "loss": 0.9127399325370789,
      "step": 945
    },
    {
      "epoch": 0.923828125,
      "grad_norm": 0.37491804361343384,
      "learning_rate": 1.55053974484789e-05,
      "loss": 0.47025924921035767,
      "step": 946
    },
    {
      "epoch": 0.9248046875,
      "grad_norm": 0.49557894468307495,
      "learning_rate": 1.5309126594700687e-05,
      "loss": 0.6349594593048096,
      "step": 947
    },
    {
      "epoch": 0.92578125,
      "grad_norm": 0.2361314743757248,
      "learning_rate": 1.5112855740922475e-05,
      "loss": 0.3594982922077179,
      "step": 948
    },
    {
      "epoch": 0.9267578125,
      "grad_norm": 0.40022003650665283,
      "learning_rate": 1.491658488714426e-05,
      "loss": 0.41701436042785645,
      "step": 949
    },
    {
      "epoch": 0.927734375,
      "grad_norm": 0.349528431892395,
      "learning_rate": 1.4720314033366044e-05,
      "loss": 0.2943156063556671,
      "step": 950
    },
    {
      "epoch": 0.9287109375,
      "grad_norm": 0.4660559892654419,
      "learning_rate": 1.4524043179587832e-05,
      "loss": 0.3633948564529419,
      "step": 951
    },
    {
      "epoch": 0.9296875,
      "grad_norm": 0.28590673208236694,
      "learning_rate": 1.432777232580962e-05,
      "loss": 0.4886907935142517,
      "step": 952
    },
    {
      "epoch": 0.9306640625,
      "grad_norm": 0.4388448894023895,
      "learning_rate": 1.4131501472031405e-05,
      "loss": 0.6123654246330261,
      "step": 953
    },
    {
      "epoch": 0.931640625,
      "grad_norm": 0.4807531237602234,
      "learning_rate": 1.3935230618253189e-05,
      "loss": 0.32400381565093994,
      "step": 954
    },
    {
      "epoch": 0.9326171875,
      "grad_norm": 0.3903636932373047,
      "learning_rate": 1.3738959764474977e-05,
      "loss": 0.6839208006858826,
      "step": 955
    },
    {
      "epoch": 0.93359375,
      "grad_norm": 0.2925507426261902,
      "learning_rate": 1.3542688910696762e-05,
      "loss": 0.5898708701133728,
      "step": 956
    },
    {
      "epoch": 0.9345703125,
      "grad_norm": 0.39300912618637085,
      "learning_rate": 1.3346418056918546e-05,
      "loss": 0.3898833692073822,
      "step": 957
    },
    {
      "epoch": 0.935546875,
      "grad_norm": 0.4321513772010803,
      "learning_rate": 1.3150147203140334e-05,
      "loss": 0.5717346668243408,
      "step": 958
    },
    {
      "epoch": 0.9365234375,
      "grad_norm": 0.47681212425231934,
      "learning_rate": 1.2953876349362121e-05,
      "loss": 0.9711145162582397,
      "step": 959
    },
    {
      "epoch": 0.9375,
      "grad_norm": 0.524958610534668,
      "learning_rate": 1.2757605495583907e-05,
      "loss": 0.6577808260917664,
      "step": 960
    },
    {
      "epoch": 0.9384765625,
      "grad_norm": 0.40814298391342163,
      "learning_rate": 1.2561334641805691e-05,
      "loss": 0.5148733258247375,
      "step": 961
    },
    {
      "epoch": 0.939453125,
      "grad_norm": 0.3122687041759491,
      "learning_rate": 1.2365063788027479e-05,
      "loss": 0.884072482585907,
      "step": 962
    },
    {
      "epoch": 0.9404296875,
      "grad_norm": 0.4473840594291687,
      "learning_rate": 1.2168792934249266e-05,
      "loss": 0.660685658454895,
      "step": 963
    },
    {
      "epoch": 0.94140625,
      "grad_norm": 0.3491450548171997,
      "learning_rate": 1.197252208047105e-05,
      "loss": 0.8680378794670105,
      "step": 964
    },
    {
      "epoch": 0.9423828125,
      "grad_norm": 0.6323879957199097,
      "learning_rate": 1.1776251226692837e-05,
      "loss": 0.8196921348571777,
      "step": 965
    },
    {
      "epoch": 0.943359375,
      "grad_norm": 0.354900062084198,
      "learning_rate": 1.1579980372914623e-05,
      "loss": 0.5380838513374329,
      "step": 966
    },
    {
      "epoch": 0.9443359375,
      "grad_norm": 0.3235265612602234,
      "learning_rate": 1.1383709519136409e-05,
      "loss": 0.39993464946746826,
      "step": 967
    },
    {
      "epoch": 0.9453125,
      "grad_norm": 0.3700491786003113,
      "learning_rate": 1.1187438665358195e-05,
      "loss": 0.6613435745239258,
      "step": 968
    },
    {
      "epoch": 0.9462890625,
      "grad_norm": 0.29880228638648987,
      "learning_rate": 1.099116781157998e-05,
      "loss": 0.5756196975708008,
      "step": 969
    },
    {
      "epoch": 0.947265625,
      "grad_norm": 0.4585433304309845,
      "learning_rate": 1.0794896957801768e-05,
      "loss": 0.5012968182563782,
      "step": 970
    },
    {
      "epoch": 0.9482421875,
      "grad_norm": 0.5275799632072449,
      "learning_rate": 1.0598626104023552e-05,
      "loss": 0.4986013174057007,
      "step": 971
    },
    {
      "epoch": 0.94921875,
      "grad_norm": 0.30642619729042053,
      "learning_rate": 1.040235525024534e-05,
      "loss": 0.29793277382850647,
      "step": 972
    },
    {
      "epoch": 0.9501953125,
      "grad_norm": 0.7356166243553162,
      "learning_rate": 1.0206084396467125e-05,
      "loss": 0.6518126726150513,
      "step": 973
    },
    {
      "epoch": 0.951171875,
      "grad_norm": 0.6069150567054749,
      "learning_rate": 1.0009813542688911e-05,
      "loss": 0.7005544900894165,
      "step": 974
    },
    {
      "epoch": 0.9521484375,
      "grad_norm": 0.500067949295044,
      "learning_rate": 9.813542688910697e-06,
      "loss": 0.5567950010299683,
      "step": 975
    },
    {
      "epoch": 0.953125,
      "grad_norm": 0.5926097631454468,
      "learning_rate": 9.617271835132484e-06,
      "loss": 0.6974345445632935,
      "step": 976
    },
    {
      "epoch": 0.9541015625,
      "grad_norm": 0.28873002529144287,
      "learning_rate": 9.42100098135427e-06,
      "loss": 0.28231939673423767,
      "step": 977
    },
    {
      "epoch": 0.955078125,
      "grad_norm": 0.6644822359085083,
      "learning_rate": 9.224730127576054e-06,
      "loss": 0.46575701236724854,
      "step": 978
    },
    {
      "epoch": 0.9560546875,
      "grad_norm": 0.34748774766921997,
      "learning_rate": 9.028459273797842e-06,
      "loss": 0.7192713022232056,
      "step": 979
    },
    {
      "epoch": 0.95703125,
      "grad_norm": 0.4444558024406433,
      "learning_rate": 8.832188420019627e-06,
      "loss": 0.34014150500297546,
      "step": 980
    },
    {
      "epoch": 0.9580078125,
      "grad_norm": 0.4814091920852661,
      "learning_rate": 8.635917566241413e-06,
      "loss": 0.8042552471160889,
      "step": 981
    },
    {
      "epoch": 0.958984375,
      "grad_norm": 0.5443412661552429,
      "learning_rate": 8.439646712463199e-06,
      "loss": 0.6534023880958557,
      "step": 982
    },
    {
      "epoch": 0.9599609375,
      "grad_norm": 0.40025195479393005,
      "learning_rate": 8.243375858684986e-06,
      "loss": 0.9056930541992188,
      "step": 983
    },
    {
      "epoch": 0.9609375,
      "grad_norm": 0.41958069801330566,
      "learning_rate": 8.047105004906772e-06,
      "loss": 0.5610394477844238,
      "step": 984
    },
    {
      "epoch": 0.9619140625,
      "grad_norm": 0.33056482672691345,
      "learning_rate": 7.850834151128558e-06,
      "loss": 0.5796000361442566,
      "step": 985
    },
    {
      "epoch": 0.962890625,
      "grad_norm": 0.5056169629096985,
      "learning_rate": 7.654563297350344e-06,
      "loss": 0.7795373201370239,
      "step": 986
    },
    {
      "epoch": 0.9638671875,
      "grad_norm": 0.4030667543411255,
      "learning_rate": 7.45829244357213e-06,
      "loss": 0.761528491973877,
      "step": 987
    },
    {
      "epoch": 0.96484375,
      "grad_norm": 0.22716952860355377,
      "learning_rate": 7.262021589793916e-06,
      "loss": 0.21712671220302582,
      "step": 988
    },
    {
      "epoch": 0.9658203125,
      "grad_norm": 0.4826786518096924,
      "learning_rate": 7.0657507360157025e-06,
      "loss": 0.6192560791969299,
      "step": 989
    },
    {
      "epoch": 0.966796875,
      "grad_norm": 0.3611379861831665,
      "learning_rate": 6.869479882237488e-06,
      "loss": 0.5660407543182373,
      "step": 990
    },
    {
      "epoch": 0.9677734375,
      "grad_norm": 0.44197750091552734,
      "learning_rate": 6.673209028459273e-06,
      "loss": 0.8223164081573486,
      "step": 991
    },
    {
      "epoch": 0.96875,
      "grad_norm": 0.45650866627693176,
      "learning_rate": 6.476938174681061e-06,
      "loss": 0.5810177326202393,
      "step": 992
    },
    {
      "epoch": 0.9697265625,
      "grad_norm": 0.6275922060012817,
      "learning_rate": 6.2806673209028455e-06,
      "loss": 0.46302127838134766,
      "step": 993
    },
    {
      "epoch": 0.970703125,
      "grad_norm": 0.29163289070129395,
      "learning_rate": 6.084396467124633e-06,
      "loss": 0.49744415283203125,
      "step": 994
    },
    {
      "epoch": 0.9716796875,
      "grad_norm": 0.4289768934249878,
      "learning_rate": 5.888125613346419e-06,
      "loss": 0.39710360765457153,
      "step": 995
    },
    {
      "epoch": 0.97265625,
      "grad_norm": 0.43311089277267456,
      "learning_rate": 5.6918547595682045e-06,
      "loss": 0.4934995174407959,
      "step": 996
    },
    {
      "epoch": 0.9736328125,
      "grad_norm": 0.4249640703201294,
      "learning_rate": 5.49558390578999e-06,
      "loss": 0.6822129487991333,
      "step": 997
    },
    {
      "epoch": 0.974609375,
      "grad_norm": 0.4080635607242584,
      "learning_rate": 5.299313052011776e-06,
      "loss": 0.2851019501686096,
      "step": 998
    },
    {
      "epoch": 0.9755859375,
      "grad_norm": 0.3082174062728882,
      "learning_rate": 5.103042198233563e-06,
      "loss": 0.8851650357246399,
      "step": 999
    },
    {
      "epoch": 0.9765625,
      "grad_norm": 0.5285578370094299,
      "learning_rate": 4.906771344455348e-06,
      "loss": 0.5684286952018738,
      "step": 1000
    },
    {
      "epoch": 0.9775390625,
      "grad_norm": 0.37052616477012634,
      "learning_rate": 4.710500490677135e-06,
      "loss": 0.8170924782752991,
      "step": 1001
    },
    {
      "epoch": 0.978515625,
      "grad_norm": 0.46926191449165344,
      "learning_rate": 4.514229636898921e-06,
      "loss": 0.665911853313446,
      "step": 1002
    },
    {
      "epoch": 0.9794921875,
      "grad_norm": 0.38110095262527466,
      "learning_rate": 4.3179587831207065e-06,
      "loss": 0.9365942478179932,
      "step": 1003
    },
    {
      "epoch": 0.98046875,
      "grad_norm": 0.3803754150867462,
      "learning_rate": 4.121687929342493e-06,
      "loss": 0.756361722946167,
      "step": 1004
    },
    {
      "epoch": 0.9814453125,
      "grad_norm": 0.6576887965202332,
      "learning_rate": 3.925417075564279e-06,
      "loss": 0.6846331357955933,
      "step": 1005
    },
    {
      "epoch": 0.982421875,
      "grad_norm": 0.6425113081932068,
      "learning_rate": 3.729146221786065e-06,
      "loss": 0.7665562629699707,
      "step": 1006
    },
    {
      "epoch": 0.9833984375,
      "grad_norm": 0.28858375549316406,
      "learning_rate": 3.5328753680078512e-06,
      "loss": 0.2748746871948242,
      "step": 1007
    },
    {
      "epoch": 0.984375,
      "grad_norm": 0.38693365454673767,
      "learning_rate": 3.3366045142296366e-06,
      "loss": 0.6602081060409546,
      "step": 1008
    },
    {
      "epoch": 0.9853515625,
      "grad_norm": 0.39297735691070557,
      "learning_rate": 3.1403336604514228e-06,
      "loss": 0.43784576654434204,
      "step": 1009
    },
    {
      "epoch": 0.986328125,
      "grad_norm": 0.4182215929031372,
      "learning_rate": 2.9440628066732094e-06,
      "loss": 0.7852948307991028,
      "step": 1010
    },
    {
      "epoch": 0.9873046875,
      "grad_norm": 0.4079328775405884,
      "learning_rate": 2.747791952894995e-06,
      "loss": 0.5413305759429932,
      "step": 1011
    },
    {
      "epoch": 0.98828125,
      "grad_norm": 0.41826963424682617,
      "learning_rate": 2.5515210991167813e-06,
      "loss": 0.449452668428421,
      "step": 1012
    },
    {
      "epoch": 0.9892578125,
      "grad_norm": 0.31969836354255676,
      "learning_rate": 2.3552502453385675e-06,
      "loss": 0.26595592498779297,
      "step": 1013
    },
    {
      "epoch": 0.990234375,
      "grad_norm": 0.466192364692688,
      "learning_rate": 2.1589793915603533e-06,
      "loss": 0.6175995469093323,
      "step": 1014
    },
    {
      "epoch": 0.9912109375,
      "grad_norm": 0.4734349846839905,
      "learning_rate": 1.9627085377821394e-06,
      "loss": 0.6440984010696411,
      "step": 1015
    },
    {
      "epoch": 0.9921875,
      "grad_norm": 0.4446095824241638,
      "learning_rate": 1.7664376840039256e-06,
      "loss": 0.5738557577133179,
      "step": 1016
    },
    {
      "epoch": 0.9931640625,
      "grad_norm": 0.24098840355873108,
      "learning_rate": 1.5701668302257114e-06,
      "loss": 0.6320365071296692,
      "step": 1017
    },
    {
      "epoch": 0.994140625,
      "grad_norm": 0.5342791676521301,
      "learning_rate": 1.3738959764474976e-06,
      "loss": 0.9431695938110352,
      "step": 1018
    },
    {
      "epoch": 0.9951171875,
      "grad_norm": 0.31406712532043457,
      "learning_rate": 1.1776251226692837e-06,
      "loss": 0.6406105160713196,
      "step": 1019
    },
    {
      "epoch": 0.99609375,
      "grad_norm": 0.5162865519523621,
      "learning_rate": 9.813542688910697e-07,
      "loss": 0.7935853004455566,
      "step": 1020
    },
    {
      "epoch": 0.9970703125,
      "grad_norm": 0.4624859690666199,
      "learning_rate": 7.850834151128557e-07,
      "loss": 0.9667851328849792,
      "step": 1021
    },
    {
      "epoch": 0.998046875,
      "grad_norm": 0.43549951910972595,
      "learning_rate": 5.888125613346419e-07,
      "loss": 0.73248291015625,
      "step": 1022
    },
    {
      "epoch": 0.9990234375,
      "grad_norm": 0.6080308556556702,
      "learning_rate": 3.9254170755642785e-07,
      "loss": 0.5045021772384644,
      "step": 1023
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.3927266299724579,
      "learning_rate": 1.9627085377821392e-07,
      "loss": 0.37262263894081116,
      "step": 1024
    }
  ],
  "logging_steps": 1,
  "max_steps": 1024,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 6.871410239702333e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}