{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 12514, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015983377287620874, "grad_norm": 1.3181974688388804, "learning_rate": 0.0, "loss": 1.0502293109893799, "step": 1 }, { "epoch": 0.0003196675457524175, "grad_norm": 1.2649758100746085, "learning_rate": 3.194888178913738e-08, "loss": 0.9572315216064453, "step": 2 }, { "epoch": 0.0004795013186286262, "grad_norm": 1.2983755166619069, "learning_rate": 6.389776357827476e-08, "loss": 1.1238579750061035, "step": 3 }, { "epoch": 0.000639335091504835, "grad_norm": 1.3271925663360031, "learning_rate": 9.584664536741214e-08, "loss": 1.12874174118042, "step": 4 }, { "epoch": 0.0007991688643810437, "grad_norm": 1.220778610979008, "learning_rate": 1.2779552715654952e-07, "loss": 1.3113226890563965, "step": 5 }, { "epoch": 0.0009590026372572524, "grad_norm": 1.625959591924292, "learning_rate": 1.597444089456869e-07, "loss": 1.0629719495773315, "step": 6 }, { "epoch": 0.0011188364101334613, "grad_norm": 1.2865110454422555, "learning_rate": 1.9169329073482428e-07, "loss": 1.2056427001953125, "step": 7 }, { "epoch": 0.00127867018300967, "grad_norm": 1.5341956357150697, "learning_rate": 2.2364217252396166e-07, "loss": 1.0042500495910645, "step": 8 }, { "epoch": 0.0014385039558858787, "grad_norm": 1.5857021065637524, "learning_rate": 2.5559105431309904e-07, "loss": 1.3802070617675781, "step": 9 }, { "epoch": 0.0015983377287620874, "grad_norm": 1.2080500765892395, "learning_rate": 2.875399361022364e-07, "loss": 0.9500985145568848, "step": 10 }, { "epoch": 0.0017581715016382962, "grad_norm": 1.4403871424211643, "learning_rate": 3.194888178913738e-07, "loss": 1.2184820175170898, "step": 11 }, { "epoch": 0.0019180052745145048, "grad_norm": 1.455768200647343, "learning_rate": 3.514376996805112e-07, "loss": 1.0787396430969238, "step": 12 }, { "epoch": 0.0020778390473907135, "grad_norm": 1.5078914613350027, "learning_rate": 3.8338658146964857e-07, "loss": 1.2352054119110107, "step": 13 }, { "epoch": 0.0022376728202669225, "grad_norm": 1.3337729872107418, "learning_rate": 4.1533546325878595e-07, "loss": 1.0422511100769043, "step": 14 }, { "epoch": 0.002397506593143131, "grad_norm": 1.4904477753606498, "learning_rate": 4.4728434504792333e-07, "loss": 1.0832364559173584, "step": 15 }, { "epoch": 0.00255734036601934, "grad_norm": 1.3273480104443816, "learning_rate": 4.792332268370608e-07, "loss": 1.2150917053222656, "step": 16 }, { "epoch": 0.0027171741388955484, "grad_norm": 1.284327256705105, "learning_rate": 5.111821086261981e-07, "loss": 1.3138893842697144, "step": 17 }, { "epoch": 0.0028770079117717575, "grad_norm": 1.4629103840485582, "learning_rate": 5.431309904153355e-07, "loss": 1.0003154277801514, "step": 18 }, { "epoch": 0.003036841684647966, "grad_norm": 1.2171118130677758, "learning_rate": 5.750798722044729e-07, "loss": 1.2520501613616943, "step": 19 }, { "epoch": 0.0031966754575241747, "grad_norm": 1.2348494773293113, "learning_rate": 6.070287539936103e-07, "loss": 1.0949773788452148, "step": 20 }, { "epoch": 0.003356509230400384, "grad_norm": 1.2986606318016034, "learning_rate": 6.389776357827476e-07, "loss": 1.2555091381072998, "step": 21 }, { "epoch": 0.0035163430032765924, "grad_norm": 1.2306451779018204, "learning_rate": 6.70926517571885e-07, "loss": 1.0022754669189453, "step": 22 }, { "epoch": 0.003676176776152801, "grad_norm": 1.5831712519034546, "learning_rate": 7.028753993610224e-07, "loss": 1.2263541221618652, "step": 23 }, { "epoch": 0.0038360105490290097, "grad_norm": 1.3390955639854611, "learning_rate": 7.348242811501598e-07, "loss": 1.142397403717041, "step": 24 }, { "epoch": 0.003995844321905219, "grad_norm": 1.4315350292426876, "learning_rate": 7.667731629392971e-07, "loss": 1.1085206270217896, "step": 25 }, { "epoch": 0.004155678094781427, "grad_norm": 1.3075862569947134, "learning_rate": 7.987220447284346e-07, "loss": 0.9603530168533325, "step": 26 }, { "epoch": 0.004315511867657636, "grad_norm": 1.4153337133492792, "learning_rate": 8.306709265175719e-07, "loss": 1.249459981918335, "step": 27 }, { "epoch": 0.004475345640533845, "grad_norm": 1.4154195410090542, "learning_rate": 8.626198083067093e-07, "loss": 1.1679058074951172, "step": 28 }, { "epoch": 0.004635179413410053, "grad_norm": 1.3194302494399077, "learning_rate": 8.945686900958467e-07, "loss": 1.057343602180481, "step": 29 }, { "epoch": 0.004795013186286262, "grad_norm": 1.2443718744353829, "learning_rate": 9.265175718849841e-07, "loss": 1.0410945415496826, "step": 30 }, { "epoch": 0.004954846959162471, "grad_norm": 1.323486849660824, "learning_rate": 9.584664536741215e-07, "loss": 1.1863224506378174, "step": 31 }, { "epoch": 0.00511468073203868, "grad_norm": 1.4802590813539558, "learning_rate": 9.904153354632587e-07, "loss": 1.1297975778579712, "step": 32 }, { "epoch": 0.005274514504914889, "grad_norm": 1.3222607958300026, "learning_rate": 1.0223642172523962e-06, "loss": 1.314483880996704, "step": 33 }, { "epoch": 0.005434348277791097, "grad_norm": 1.4167581033373444, "learning_rate": 1.0543130990415336e-06, "loss": 1.1921782493591309, "step": 34 }, { "epoch": 0.005594182050667306, "grad_norm": 1.3053827881002344, "learning_rate": 1.086261980830671e-06, "loss": 1.2519431114196777, "step": 35 }, { "epoch": 0.005754015823543515, "grad_norm": 1.5260906687657927, "learning_rate": 1.1182108626198083e-06, "loss": 1.286122441291809, "step": 36 }, { "epoch": 0.005913849596419723, "grad_norm": 1.4542851137493225, "learning_rate": 1.1501597444089457e-06, "loss": 1.1977086067199707, "step": 37 }, { "epoch": 0.006073683369295932, "grad_norm": 1.5234553121108836, "learning_rate": 1.1821086261980831e-06, "loss": 1.2496362924575806, "step": 38 }, { "epoch": 0.006233517142172141, "grad_norm": 1.2922980970213516, "learning_rate": 1.2140575079872206e-06, "loss": 1.2815275192260742, "step": 39 }, { "epoch": 0.0063933509150483495, "grad_norm": 1.3333271470108043, "learning_rate": 1.2460063897763578e-06, "loss": 1.1019034385681152, "step": 40 }, { "epoch": 0.0065531846879245586, "grad_norm": 1.2516257145086909, "learning_rate": 1.2779552715654952e-06, "loss": 1.1768746376037598, "step": 41 }, { "epoch": 0.006713018460800768, "grad_norm": 1.4599419591399039, "learning_rate": 1.3099041533546329e-06, "loss": 1.143226146697998, "step": 42 }, { "epoch": 0.006872852233676976, "grad_norm": 1.3113850945888383, "learning_rate": 1.34185303514377e-06, "loss": 1.1629767417907715, "step": 43 }, { "epoch": 0.007032686006553185, "grad_norm": 1.5943722698392906, "learning_rate": 1.3738019169329075e-06, "loss": 1.318975567817688, "step": 44 }, { "epoch": 0.007192519779429393, "grad_norm": 1.3424299000371867, "learning_rate": 1.4057507987220447e-06, "loss": 1.2889617681503296, "step": 45 }, { "epoch": 0.007352353552305602, "grad_norm": 1.4645286956542423, "learning_rate": 1.4376996805111824e-06, "loss": 1.2702231407165527, "step": 46 }, { "epoch": 0.007512187325181811, "grad_norm": 1.259859668708174, "learning_rate": 1.4696485623003196e-06, "loss": 1.233471155166626, "step": 47 }, { "epoch": 0.007672021098058019, "grad_norm": 1.4537378713515798, "learning_rate": 1.501597444089457e-06, "loss": 1.2733325958251953, "step": 48 }, { "epoch": 0.007831854870934228, "grad_norm": 1.3720896128457873, "learning_rate": 1.5335463258785943e-06, "loss": 1.3221018314361572, "step": 49 }, { "epoch": 0.007991688643810438, "grad_norm": 1.3086698364678357, "learning_rate": 1.565495207667732e-06, "loss": 1.1992450952529907, "step": 50 }, { "epoch": 0.008151522416686647, "grad_norm": 1.2906698242627062, "learning_rate": 1.5974440894568691e-06, "loss": 1.0617294311523438, "step": 51 }, { "epoch": 0.008311356189562854, "grad_norm": 1.5698313972003528, "learning_rate": 1.6293929712460066e-06, "loss": 1.3697021007537842, "step": 52 }, { "epoch": 0.008471189962439063, "grad_norm": 1.3084637275016762, "learning_rate": 1.6613418530351438e-06, "loss": 1.0854195356369019, "step": 53 }, { "epoch": 0.008631023735315272, "grad_norm": 1.2289790573733612, "learning_rate": 1.6932907348242814e-06, "loss": 1.2875707149505615, "step": 54 }, { "epoch": 0.008790857508191481, "grad_norm": 1.42870628143881, "learning_rate": 1.7252396166134187e-06, "loss": 1.1158974170684814, "step": 55 }, { "epoch": 0.00895069128106769, "grad_norm": 1.2363961160395844, "learning_rate": 1.757188498402556e-06, "loss": 1.0575098991394043, "step": 56 }, { "epoch": 0.009110525053943897, "grad_norm": 1.2583631127082553, "learning_rate": 1.7891373801916933e-06, "loss": 1.0961458683013916, "step": 57 }, { "epoch": 0.009270358826820107, "grad_norm": 1.337454676085886, "learning_rate": 1.821086261980831e-06, "loss": 1.1861414909362793, "step": 58 }, { "epoch": 0.009430192599696316, "grad_norm": 1.3811188292696879, "learning_rate": 1.8530351437699682e-06, "loss": 1.2653028964996338, "step": 59 }, { "epoch": 0.009590026372572525, "grad_norm": 1.4871911404092613, "learning_rate": 1.8849840255591056e-06, "loss": 1.2577351331710815, "step": 60 }, { "epoch": 0.009749860145448734, "grad_norm": 1.3186360116750186, "learning_rate": 1.916932907348243e-06, "loss": 1.187122106552124, "step": 61 }, { "epoch": 0.009909693918324943, "grad_norm": 1.3538036826903752, "learning_rate": 1.9488817891373803e-06, "loss": 1.2659339904785156, "step": 62 }, { "epoch": 0.01006952769120115, "grad_norm": 1.1130460084578466, "learning_rate": 1.9808306709265175e-06, "loss": 1.0905447006225586, "step": 63 }, { "epoch": 0.01022936146407736, "grad_norm": 1.1536789948974882, "learning_rate": 2.012779552715655e-06, "loss": 1.0933492183685303, "step": 64 }, { "epoch": 0.010389195236953568, "grad_norm": 1.3323465151112526, "learning_rate": 2.0447284345047924e-06, "loss": 1.1508063077926636, "step": 65 }, { "epoch": 0.010549029009829777, "grad_norm": 1.0797170904129183, "learning_rate": 2.07667731629393e-06, "loss": 0.9767791628837585, "step": 66 }, { "epoch": 0.010708862782705986, "grad_norm": 1.2161438513173874, "learning_rate": 2.1086261980830672e-06, "loss": 1.0106879472732544, "step": 67 }, { "epoch": 0.010868696555582194, "grad_norm": 1.439354741117747, "learning_rate": 2.140575079872205e-06, "loss": 1.4405637979507446, "step": 68 }, { "epoch": 0.011028530328458403, "grad_norm": 1.2762466192507866, "learning_rate": 2.172523961661342e-06, "loss": 1.0366973876953125, "step": 69 }, { "epoch": 0.011188364101334612, "grad_norm": 1.1967967832087072, "learning_rate": 2.2044728434504793e-06, "loss": 1.2707693576812744, "step": 70 }, { "epoch": 0.011348197874210821, "grad_norm": 1.1370115829963572, "learning_rate": 2.2364217252396165e-06, "loss": 1.0658433437347412, "step": 71 }, { "epoch": 0.01150803164708703, "grad_norm": 1.0510966314694306, "learning_rate": 2.268370607028754e-06, "loss": 1.0458035469055176, "step": 72 }, { "epoch": 0.011667865419963239, "grad_norm": 1.1076067342400793, "learning_rate": 2.3003194888178914e-06, "loss": 0.997923731803894, "step": 73 }, { "epoch": 0.011827699192839446, "grad_norm": 1.0616267877586723, "learning_rate": 2.332268370607029e-06, "loss": 1.0030103921890259, "step": 74 }, { "epoch": 0.011987532965715655, "grad_norm": 1.1912936017464786, "learning_rate": 2.3642172523961663e-06, "loss": 1.0437122583389282, "step": 75 }, { "epoch": 0.012147366738591864, "grad_norm": 1.1223876956906558, "learning_rate": 2.396166134185304e-06, "loss": 0.866408109664917, "step": 76 }, { "epoch": 0.012307200511468074, "grad_norm": 1.2686254426857155, "learning_rate": 2.428115015974441e-06, "loss": 1.3401315212249756, "step": 77 }, { "epoch": 0.012467034284344283, "grad_norm": 1.1545832938327754, "learning_rate": 2.4600638977635784e-06, "loss": 1.0656827688217163, "step": 78 }, { "epoch": 0.01262686805722049, "grad_norm": 0.9353206248079999, "learning_rate": 2.4920127795527156e-06, "loss": 0.9946616888046265, "step": 79 }, { "epoch": 0.012786701830096699, "grad_norm": 1.1164632386674533, "learning_rate": 2.5239616613418532e-06, "loss": 0.994091272354126, "step": 80 }, { "epoch": 0.012946535602972908, "grad_norm": 1.0133627120335766, "learning_rate": 2.5559105431309904e-06, "loss": 0.9931496381759644, "step": 81 }, { "epoch": 0.013106369375849117, "grad_norm": 0.9108229356389546, "learning_rate": 2.5878594249201277e-06, "loss": 0.8210781216621399, "step": 82 }, { "epoch": 0.013266203148725326, "grad_norm": 1.204388790804072, "learning_rate": 2.6198083067092657e-06, "loss": 1.2206480503082275, "step": 83 }, { "epoch": 0.013426036921601535, "grad_norm": 1.0043791546466398, "learning_rate": 2.651757188498403e-06, "loss": 1.196014404296875, "step": 84 }, { "epoch": 0.013585870694477743, "grad_norm": 1.1093586362943146, "learning_rate": 2.68370607028754e-06, "loss": 1.1540277004241943, "step": 85 }, { "epoch": 0.013745704467353952, "grad_norm": 1.0559725017524004, "learning_rate": 2.7156549520766774e-06, "loss": 1.0365521907806396, "step": 86 }, { "epoch": 0.01390553824023016, "grad_norm": 1.0572084292819426, "learning_rate": 2.747603833865815e-06, "loss": 1.0644091367721558, "step": 87 }, { "epoch": 0.01406537201310637, "grad_norm": 1.0105224120692844, "learning_rate": 2.7795527156549523e-06, "loss": 1.2148473262786865, "step": 88 }, { "epoch": 0.014225205785982579, "grad_norm": 0.97064398543111, "learning_rate": 2.8115015974440895e-06, "loss": 1.0565274953842163, "step": 89 }, { "epoch": 0.014385039558858786, "grad_norm": 1.0267158401575383, "learning_rate": 2.8434504792332267e-06, "loss": 1.0598220825195312, "step": 90 }, { "epoch": 0.014544873331734995, "grad_norm": 1.1393876555915452, "learning_rate": 2.8753993610223648e-06, "loss": 1.1263421773910522, "step": 91 }, { "epoch": 0.014704707104611204, "grad_norm": 1.065558436977494, "learning_rate": 2.907348242811502e-06, "loss": 1.1373424530029297, "step": 92 }, { "epoch": 0.014864540877487413, "grad_norm": 0.9332410513351271, "learning_rate": 2.9392971246006392e-06, "loss": 0.9297798871994019, "step": 93 }, { "epoch": 0.015024374650363622, "grad_norm": 0.9933249666449965, "learning_rate": 2.9712460063897764e-06, "loss": 1.0755114555358887, "step": 94 }, { "epoch": 0.01518420842323983, "grad_norm": 1.031760760602797, "learning_rate": 3.003194888178914e-06, "loss": 1.0245609283447266, "step": 95 }, { "epoch": 0.015344042196116039, "grad_norm": 0.8585657275777885, "learning_rate": 3.0351437699680513e-06, "loss": 0.9828968048095703, "step": 96 }, { "epoch": 0.015503875968992248, "grad_norm": 1.1106289618579444, "learning_rate": 3.0670926517571885e-06, "loss": 1.2802739143371582, "step": 97 }, { "epoch": 0.015663709741868457, "grad_norm": 0.8540251691514039, "learning_rate": 3.0990415335463258e-06, "loss": 0.7321071624755859, "step": 98 }, { "epoch": 0.015823543514744666, "grad_norm": 0.9730666033820332, "learning_rate": 3.130990415335464e-06, "loss": 0.9345012903213501, "step": 99 }, { "epoch": 0.015983377287620875, "grad_norm": 1.023838374458908, "learning_rate": 3.162939297124601e-06, "loss": 0.8831745386123657, "step": 100 }, { "epoch": 0.016143211060497084, "grad_norm": 1.072532503982652, "learning_rate": 3.1948881789137383e-06, "loss": 1.0899248123168945, "step": 101 }, { "epoch": 0.016303044833373293, "grad_norm": 1.1215557347973586, "learning_rate": 3.2268370607028755e-06, "loss": 0.9357964992523193, "step": 102 }, { "epoch": 0.016462878606249502, "grad_norm": 0.7863875334816387, "learning_rate": 3.258785942492013e-06, "loss": 0.6844557523727417, "step": 103 }, { "epoch": 0.016622712379125708, "grad_norm": 0.9830183084859795, "learning_rate": 3.2907348242811504e-06, "loss": 0.9507535696029663, "step": 104 }, { "epoch": 0.016782546152001917, "grad_norm": 1.042694241751345, "learning_rate": 3.3226837060702876e-06, "loss": 1.0128626823425293, "step": 105 }, { "epoch": 0.016942379924878126, "grad_norm": 1.1085085188170936, "learning_rate": 3.3546325878594257e-06, "loss": 0.8368178606033325, "step": 106 }, { "epoch": 0.017102213697754335, "grad_norm": 0.9020471008683518, "learning_rate": 3.386581469648563e-06, "loss": 0.8534760475158691, "step": 107 }, { "epoch": 0.017262047470630544, "grad_norm": 1.1395343871437682, "learning_rate": 3.4185303514377e-06, "loss": 0.981425940990448, "step": 108 }, { "epoch": 0.017421881243506753, "grad_norm": 1.0130664813997454, "learning_rate": 3.4504792332268373e-06, "loss": 1.0521316528320312, "step": 109 }, { "epoch": 0.017581715016382962, "grad_norm": 1.048540015407227, "learning_rate": 3.482428115015975e-06, "loss": 1.0010894536972046, "step": 110 }, { "epoch": 0.01774154878925917, "grad_norm": 1.0469890626544727, "learning_rate": 3.514376996805112e-06, "loss": 1.0109913349151611, "step": 111 }, { "epoch": 0.01790138256213538, "grad_norm": 0.9169031499856537, "learning_rate": 3.5463258785942494e-06, "loss": 0.8992091417312622, "step": 112 }, { "epoch": 0.01806121633501159, "grad_norm": 1.2026788468631184, "learning_rate": 3.5782747603833866e-06, "loss": 0.9930199384689331, "step": 113 }, { "epoch": 0.018221050107887795, "grad_norm": 1.052839945949869, "learning_rate": 3.6102236421725247e-06, "loss": 1.1980481147766113, "step": 114 }, { "epoch": 0.018380883880764004, "grad_norm": 1.076557615568507, "learning_rate": 3.642172523961662e-06, "loss": 0.9395987391471863, "step": 115 }, { "epoch": 0.018540717653640213, "grad_norm": 0.9691167707176271, "learning_rate": 3.674121405750799e-06, "loss": 0.7987468242645264, "step": 116 }, { "epoch": 0.018700551426516422, "grad_norm": 1.051957706526938, "learning_rate": 3.7060702875399364e-06, "loss": 0.8879724740982056, "step": 117 }, { "epoch": 0.01886038519939263, "grad_norm": 1.2175879873717184, "learning_rate": 3.738019169329074e-06, "loss": 1.0274040699005127, "step": 118 }, { "epoch": 0.01902021897226884, "grad_norm": 1.046473064567514, "learning_rate": 3.7699680511182112e-06, "loss": 0.7740172147750854, "step": 119 }, { "epoch": 0.01918005274514505, "grad_norm": 1.0543086935686483, "learning_rate": 3.8019169329073485e-06, "loss": 0.8526840806007385, "step": 120 }, { "epoch": 0.01933988651802126, "grad_norm": 1.0100193743031152, "learning_rate": 3.833865814696486e-06, "loss": 1.057236671447754, "step": 121 }, { "epoch": 0.019499720290897467, "grad_norm": 1.2589493037527701, "learning_rate": 3.865814696485624e-06, "loss": 1.0267603397369385, "step": 122 }, { "epoch": 0.019659554063773677, "grad_norm": 0.9564350396862173, "learning_rate": 3.8977635782747605e-06, "loss": 0.8590590953826904, "step": 123 }, { "epoch": 0.019819387836649886, "grad_norm": 1.0098299920260096, "learning_rate": 3.929712460063898e-06, "loss": 0.8989751935005188, "step": 124 }, { "epoch": 0.01997922160952609, "grad_norm": 1.1638461416236399, "learning_rate": 3.961661341853035e-06, "loss": 0.9856419563293457, "step": 125 }, { "epoch": 0.0201390553824023, "grad_norm": 1.0124517023977921, "learning_rate": 3.9936102236421735e-06, "loss": 0.8706690073013306, "step": 126 }, { "epoch": 0.02029888915527851, "grad_norm": 0.9645734497643194, "learning_rate": 4.02555910543131e-06, "loss": 0.8747401237487793, "step": 127 }, { "epoch": 0.02045872292815472, "grad_norm": 1.1620567160297703, "learning_rate": 4.057507987220448e-06, "loss": 1.111987829208374, "step": 128 }, { "epoch": 0.020618556701030927, "grad_norm": 0.9976007446257543, "learning_rate": 4.089456869009585e-06, "loss": 0.961155116558075, "step": 129 }, { "epoch": 0.020778390473907137, "grad_norm": 1.1361426112037494, "learning_rate": 4.121405750798722e-06, "loss": 1.0706353187561035, "step": 130 }, { "epoch": 0.020938224246783346, "grad_norm": 0.9627210941835423, "learning_rate": 4.15335463258786e-06, "loss": 0.9013700485229492, "step": 131 }, { "epoch": 0.021098058019659555, "grad_norm": 0.9836570440324753, "learning_rate": 4.185303514376997e-06, "loss": 0.9287548065185547, "step": 132 }, { "epoch": 0.021257891792535764, "grad_norm": 1.1164162314736905, "learning_rate": 4.2172523961661345e-06, "loss": 1.056089162826538, "step": 133 }, { "epoch": 0.021417725565411973, "grad_norm": 1.1052614543701111, "learning_rate": 4.249201277955272e-06, "loss": 0.9663273096084595, "step": 134 }, { "epoch": 0.021577559338288182, "grad_norm": 0.9359879200342807, "learning_rate": 4.28115015974441e-06, "loss": 0.8704187870025635, "step": 135 }, { "epoch": 0.021737393111164387, "grad_norm": 0.9928800043352217, "learning_rate": 4.3130990415335465e-06, "loss": 0.8470963835716248, "step": 136 }, { "epoch": 0.021897226884040596, "grad_norm": 1.1046900119667922, "learning_rate": 4.345047923322684e-06, "loss": 0.9410116672515869, "step": 137 }, { "epoch": 0.022057060656916806, "grad_norm": 0.9965891570339606, "learning_rate": 4.376996805111822e-06, "loss": 0.7876315116882324, "step": 138 }, { "epoch": 0.022216894429793015, "grad_norm": 1.0688362289665465, "learning_rate": 4.408945686900959e-06, "loss": 0.9143752455711365, "step": 139 }, { "epoch": 0.022376728202669224, "grad_norm": 1.1493983853925718, "learning_rate": 4.440894568690096e-06, "loss": 0.9272838830947876, "step": 140 }, { "epoch": 0.022536561975545433, "grad_norm": 1.186073772831683, "learning_rate": 4.472843450479233e-06, "loss": 0.92360520362854, "step": 141 }, { "epoch": 0.022696395748421642, "grad_norm": 1.024812340069894, "learning_rate": 4.5047923322683716e-06, "loss": 0.884662389755249, "step": 142 }, { "epoch": 0.02285622952129785, "grad_norm": 0.9828452043390083, "learning_rate": 4.536741214057508e-06, "loss": 0.8513422012329102, "step": 143 }, { "epoch": 0.02301606329417406, "grad_norm": 1.2546523723668526, "learning_rate": 4.568690095846646e-06, "loss": 0.9007426500320435, "step": 144 }, { "epoch": 0.02317589706705027, "grad_norm": 1.0848548312029995, "learning_rate": 4.600638977635783e-06, "loss": 0.7869035005569458, "step": 145 }, { "epoch": 0.023335730839926478, "grad_norm": 1.1639367041267967, "learning_rate": 4.6325878594249205e-06, "loss": 0.6652911901473999, "step": 146 }, { "epoch": 0.023495564612802684, "grad_norm": 1.05980874399583, "learning_rate": 4.664536741214058e-06, "loss": 0.8826829195022583, "step": 147 }, { "epoch": 0.023655398385678893, "grad_norm": 1.1189846737031972, "learning_rate": 4.696485623003195e-06, "loss": 0.953906774520874, "step": 148 }, { "epoch": 0.023815232158555102, "grad_norm": 1.1412673377215716, "learning_rate": 4.7284345047923325e-06, "loss": 0.8290791511535645, "step": 149 }, { "epoch": 0.02397506593143131, "grad_norm": 1.0227159841843705, "learning_rate": 4.76038338658147e-06, "loss": 1.0381193161010742, "step": 150 }, { "epoch": 0.02413489970430752, "grad_norm": 1.0858636198827498, "learning_rate": 4.792332268370608e-06, "loss": 0.9834634065628052, "step": 151 }, { "epoch": 0.02429473347718373, "grad_norm": 1.0974828505932643, "learning_rate": 4.824281150159745e-06, "loss": 0.8359044790267944, "step": 152 }, { "epoch": 0.024454567250059938, "grad_norm": 1.1361541887165083, "learning_rate": 4.856230031948882e-06, "loss": 1.028106689453125, "step": 153 }, { "epoch": 0.024614401022936147, "grad_norm": 1.2569291347113225, "learning_rate": 4.88817891373802e-06, "loss": 0.9327988624572754, "step": 154 }, { "epoch": 0.024774234795812356, "grad_norm": 1.094224151784668, "learning_rate": 4.920127795527157e-06, "loss": 0.9944421052932739, "step": 155 }, { "epoch": 0.024934068568688565, "grad_norm": 1.0945575553046996, "learning_rate": 4.952076677316294e-06, "loss": 0.9506489038467407, "step": 156 }, { "epoch": 0.025093902341564774, "grad_norm": 1.206704543883526, "learning_rate": 4.984025559105431e-06, "loss": 0.9064034223556519, "step": 157 }, { "epoch": 0.02525373611444098, "grad_norm": 1.1133797522443956, "learning_rate": 5.015974440894569e-06, "loss": 0.9309333562850952, "step": 158 }, { "epoch": 0.02541356988731719, "grad_norm": 1.0672529732295324, "learning_rate": 5.0479233226837065e-06, "loss": 0.8531267642974854, "step": 159 }, { "epoch": 0.025573403660193398, "grad_norm": 1.0776848540661674, "learning_rate": 5.079872204472844e-06, "loss": 0.837105929851532, "step": 160 }, { "epoch": 0.025733237433069607, "grad_norm": 1.1010567388972268, "learning_rate": 5.111821086261981e-06, "loss": 0.8921215534210205, "step": 161 }, { "epoch": 0.025893071205945816, "grad_norm": 1.0383683528687655, "learning_rate": 5.1437699680511185e-06, "loss": 0.763064444065094, "step": 162 }, { "epoch": 0.026052904978822025, "grad_norm": 1.0514422334301683, "learning_rate": 5.175718849840255e-06, "loss": 0.7697719931602478, "step": 163 }, { "epoch": 0.026212738751698234, "grad_norm": 0.9889635267255543, "learning_rate": 5.207667731629393e-06, "loss": 0.9206843376159668, "step": 164 }, { "epoch": 0.026372572524574443, "grad_norm": 1.1948719048250966, "learning_rate": 5.2396166134185315e-06, "loss": 0.9089791774749756, "step": 165 }, { "epoch": 0.026532406297450652, "grad_norm": 1.353670811111575, "learning_rate": 5.2715654952076674e-06, "loss": 1.0194668769836426, "step": 166 }, { "epoch": 0.02669224007032686, "grad_norm": 1.1580536873665865, "learning_rate": 5.303514376996806e-06, "loss": 0.9085028767585754, "step": 167 }, { "epoch": 0.02685207384320307, "grad_norm": 1.1078181032695045, "learning_rate": 5.3354632587859436e-06, "loss": 0.7427655458450317, "step": 168 }, { "epoch": 0.027011907616079276, "grad_norm": 1.2189841711716163, "learning_rate": 5.36741214057508e-06, "loss": 1.1292150020599365, "step": 169 }, { "epoch": 0.027171741388955485, "grad_norm": 1.1192570705551002, "learning_rate": 5.399361022364218e-06, "loss": 0.8856632709503174, "step": 170 }, { "epoch": 0.027331575161831694, "grad_norm": 1.369419763748786, "learning_rate": 5.431309904153355e-06, "loss": 1.033820390701294, "step": 171 }, { "epoch": 0.027491408934707903, "grad_norm": 1.1701411780407738, "learning_rate": 5.4632587859424925e-06, "loss": 0.8454846143722534, "step": 172 }, { "epoch": 0.027651242707584112, "grad_norm": 1.1682587959721455, "learning_rate": 5.49520766773163e-06, "loss": 0.8327605724334717, "step": 173 }, { "epoch": 0.02781107648046032, "grad_norm": 1.1006695236814168, "learning_rate": 5.527156549520767e-06, "loss": 0.9264702796936035, "step": 174 }, { "epoch": 0.02797091025333653, "grad_norm": 1.054201830478629, "learning_rate": 5.5591054313099045e-06, "loss": 0.8374263048171997, "step": 175 }, { "epoch": 0.02813074402621274, "grad_norm": 1.192351696438423, "learning_rate": 5.591054313099042e-06, "loss": 0.8102055788040161, "step": 176 }, { "epoch": 0.02829057779908895, "grad_norm": 1.5122983072868743, "learning_rate": 5.623003194888179e-06, "loss": 0.8928204774856567, "step": 177 }, { "epoch": 0.028450411571965158, "grad_norm": 1.2029911844303673, "learning_rate": 5.654952076677317e-06, "loss": 0.904794454574585, "step": 178 }, { "epoch": 0.028610245344841367, "grad_norm": 1.0534045893165624, "learning_rate": 5.6869009584664534e-06, "loss": 0.8071985840797424, "step": 179 }, { "epoch": 0.028770079117717572, "grad_norm": 1.0532037912439862, "learning_rate": 5.718849840255591e-06, "loss": 0.7486203908920288, "step": 180 }, { "epoch": 0.02892991289059378, "grad_norm": 1.160045855829123, "learning_rate": 5.7507987220447296e-06, "loss": 1.0163891315460205, "step": 181 }, { "epoch": 0.02908974666346999, "grad_norm": 1.1583775217659187, "learning_rate": 5.7827476038338655e-06, "loss": 1.0505359172821045, "step": 182 }, { "epoch": 0.0292495804363462, "grad_norm": 1.1648602525910763, "learning_rate": 5.814696485623004e-06, "loss": 0.945558488368988, "step": 183 }, { "epoch": 0.02940941420922241, "grad_norm": 1.074807733879136, "learning_rate": 5.846645367412142e-06, "loss": 0.8072291016578674, "step": 184 }, { "epoch": 0.029569247982098618, "grad_norm": 1.0855238338606705, "learning_rate": 5.8785942492012785e-06, "loss": 0.9473419189453125, "step": 185 }, { "epoch": 0.029729081754974827, "grad_norm": 1.0576731048118033, "learning_rate": 5.910543130990416e-06, "loss": 0.7568771839141846, "step": 186 }, { "epoch": 0.029888915527851036, "grad_norm": 1.0254860988777716, "learning_rate": 5.942492012779553e-06, "loss": 0.8538993000984192, "step": 187 }, { "epoch": 0.030048749300727245, "grad_norm": 1.1197847329935535, "learning_rate": 5.9744408945686905e-06, "loss": 0.8712530136108398, "step": 188 }, { "epoch": 0.030208583073603454, "grad_norm": 1.4249112347614585, "learning_rate": 6.006389776357828e-06, "loss": 0.9077597856521606, "step": 189 }, { "epoch": 0.03036841684647966, "grad_norm": 1.087590660724588, "learning_rate": 6.038338658146965e-06, "loss": 1.0415164232254028, "step": 190 }, { "epoch": 0.03052825061935587, "grad_norm": 1.138003116120244, "learning_rate": 6.070287539936103e-06, "loss": 0.8049876689910889, "step": 191 }, { "epoch": 0.030688084392232078, "grad_norm": 1.3085678877046827, "learning_rate": 6.10223642172524e-06, "loss": 0.9171313047409058, "step": 192 }, { "epoch": 0.030847918165108287, "grad_norm": 1.277708308048679, "learning_rate": 6.134185303514377e-06, "loss": 0.8420673608779907, "step": 193 }, { "epoch": 0.031007751937984496, "grad_norm": 1.2664629556771676, "learning_rate": 6.166134185303515e-06, "loss": 0.945099413394928, "step": 194 }, { "epoch": 0.031167585710860705, "grad_norm": 1.4252829239277218, "learning_rate": 6.1980830670926515e-06, "loss": 0.9485107660293579, "step": 195 }, { "epoch": 0.031327419483736914, "grad_norm": 1.2808568405813339, "learning_rate": 6.230031948881789e-06, "loss": 0.8778038024902344, "step": 196 }, { "epoch": 0.03148725325661312, "grad_norm": 1.3687577832313456, "learning_rate": 6.261980830670928e-06, "loss": 1.0483018159866333, "step": 197 }, { "epoch": 0.03164708702948933, "grad_norm": 1.2820421834382618, "learning_rate": 6.293929712460064e-06, "loss": 0.9175823926925659, "step": 198 }, { "epoch": 0.03180692080236554, "grad_norm": 1.3465295103300081, "learning_rate": 6.325878594249202e-06, "loss": 1.0000312328338623, "step": 199 }, { "epoch": 0.03196675457524175, "grad_norm": 1.1677314305823745, "learning_rate": 6.35782747603834e-06, "loss": 0.8024624586105347, "step": 200 }, { "epoch": 0.032126588348117956, "grad_norm": 1.1341559766332245, "learning_rate": 6.3897763578274765e-06, "loss": 0.9432533979415894, "step": 201 }, { "epoch": 0.03228642212099417, "grad_norm": 1.176603768297458, "learning_rate": 6.421725239616614e-06, "loss": 0.8906043767929077, "step": 202 }, { "epoch": 0.032446255893870374, "grad_norm": 1.1898300153755337, "learning_rate": 6.453674121405751e-06, "loss": 0.8444076180458069, "step": 203 }, { "epoch": 0.032606089666746586, "grad_norm": 1.1785756202565136, "learning_rate": 6.485623003194889e-06, "loss": 0.8533221483230591, "step": 204 }, { "epoch": 0.03276592343962279, "grad_norm": 1.033112757454525, "learning_rate": 6.517571884984026e-06, "loss": 0.7145401239395142, "step": 205 }, { "epoch": 0.032925757212499004, "grad_norm": 1.304775224444254, "learning_rate": 6.549520766773163e-06, "loss": 0.8512332439422607, "step": 206 }, { "epoch": 0.03308559098537521, "grad_norm": 1.1063739241675528, "learning_rate": 6.581469648562301e-06, "loss": 0.7991707921028137, "step": 207 }, { "epoch": 0.033245424758251416, "grad_norm": 1.3402963234307177, "learning_rate": 6.613418530351438e-06, "loss": 0.898452639579773, "step": 208 }, { "epoch": 0.03340525853112763, "grad_norm": 1.142098582234626, "learning_rate": 6.645367412140575e-06, "loss": 0.8503364324569702, "step": 209 }, { "epoch": 0.033565092304003834, "grad_norm": 1.191232212080857, "learning_rate": 6.677316293929713e-06, "loss": 0.8502440452575684, "step": 210 }, { "epoch": 0.033724926076880046, "grad_norm": 1.5180165675514783, "learning_rate": 6.709265175718851e-06, "loss": 1.033862590789795, "step": 211 }, { "epoch": 0.03388475984975625, "grad_norm": 1.0423377388688437, "learning_rate": 6.741214057507987e-06, "loss": 0.8612003326416016, "step": 212 }, { "epoch": 0.034044593622632464, "grad_norm": 1.768338603489529, "learning_rate": 6.773162939297126e-06, "loss": 0.9774960279464722, "step": 213 }, { "epoch": 0.03420442739550867, "grad_norm": 1.1977559543881329, "learning_rate": 6.805111821086262e-06, "loss": 0.8575427532196045, "step": 214 }, { "epoch": 0.03436426116838488, "grad_norm": 1.370876768509842, "learning_rate": 6.8370607028754e-06, "loss": 0.8948541879653931, "step": 215 }, { "epoch": 0.03452409494126109, "grad_norm": 1.1895755140898039, "learning_rate": 6.869009584664538e-06, "loss": 0.8142316341400146, "step": 216 }, { "epoch": 0.0346839287141373, "grad_norm": 1.3089587872959563, "learning_rate": 6.900958466453675e-06, "loss": 0.9514938592910767, "step": 217 }, { "epoch": 0.034843762487013506, "grad_norm": 1.1637842580470301, "learning_rate": 6.932907348242812e-06, "loss": 0.9251481294631958, "step": 218 }, { "epoch": 0.03500359625988971, "grad_norm": 1.179019638355714, "learning_rate": 6.96485623003195e-06, "loss": 0.7942936420440674, "step": 219 }, { "epoch": 0.035163430032765924, "grad_norm": 1.326228715045048, "learning_rate": 6.996805111821087e-06, "loss": 0.8948459625244141, "step": 220 }, { "epoch": 0.03532326380564213, "grad_norm": 1.266465936914079, "learning_rate": 7.028753993610224e-06, "loss": 0.9808124303817749, "step": 221 }, { "epoch": 0.03548309757851834, "grad_norm": 1.311242015961831, "learning_rate": 7.060702875399361e-06, "loss": 0.8304955959320068, "step": 222 }, { "epoch": 0.03564293135139455, "grad_norm": 1.2148237668876638, "learning_rate": 7.092651757188499e-06, "loss": 0.7385537624359131, "step": 223 }, { "epoch": 0.03580276512427076, "grad_norm": 1.2611735328402922, "learning_rate": 7.1246006389776365e-06, "loss": 0.6848198175430298, "step": 224 }, { "epoch": 0.035962598897146966, "grad_norm": 1.194300517147493, "learning_rate": 7.156549520766773e-06, "loss": 0.9053319692611694, "step": 225 }, { "epoch": 0.03612243267002318, "grad_norm": 1.444939977496555, "learning_rate": 7.188498402555911e-06, "loss": 0.9047830700874329, "step": 226 }, { "epoch": 0.036282266442899384, "grad_norm": 1.2950163829802193, "learning_rate": 7.220447284345049e-06, "loss": 0.7496174573898315, "step": 227 }, { "epoch": 0.03644210021577559, "grad_norm": 1.2623831455905177, "learning_rate": 7.252396166134185e-06, "loss": 0.9004618525505066, "step": 228 }, { "epoch": 0.0366019339886518, "grad_norm": 1.3499091958689196, "learning_rate": 7.284345047923324e-06, "loss": 0.9380099773406982, "step": 229 }, { "epoch": 0.03676176776152801, "grad_norm": 1.105345013126664, "learning_rate": 7.316293929712461e-06, "loss": 0.9151743054389954, "step": 230 }, { "epoch": 0.03692160153440422, "grad_norm": 1.53983453239729, "learning_rate": 7.348242811501598e-06, "loss": 1.056518316268921, "step": 231 }, { "epoch": 0.037081435307280426, "grad_norm": 1.3626146007606779, "learning_rate": 7.380191693290736e-06, "loss": 0.7918332815170288, "step": 232 }, { "epoch": 0.03724126908015664, "grad_norm": 1.1224271605809795, "learning_rate": 7.412140575079873e-06, "loss": 0.8518538475036621, "step": 233 }, { "epoch": 0.037401102853032844, "grad_norm": 1.1217278753782463, "learning_rate": 7.44408945686901e-06, "loss": 0.7044215202331543, "step": 234 }, { "epoch": 0.03756093662590906, "grad_norm": 1.4793631582826492, "learning_rate": 7.476038338658148e-06, "loss": 1.094738245010376, "step": 235 }, { "epoch": 0.03772077039878526, "grad_norm": 1.100576329285542, "learning_rate": 7.507987220447285e-06, "loss": 0.7402036190032959, "step": 236 }, { "epoch": 0.037880604171661475, "grad_norm": 1.2377024809023973, "learning_rate": 7.5399361022364225e-06, "loss": 0.7000367045402527, "step": 237 }, { "epoch": 0.03804043794453768, "grad_norm": 1.3291253116432482, "learning_rate": 7.571884984025559e-06, "loss": 0.7521114349365234, "step": 238 }, { "epoch": 0.038200271717413886, "grad_norm": 1.5721743373170944, "learning_rate": 7.603833865814697e-06, "loss": 0.9570767879486084, "step": 239 }, { "epoch": 0.0383601054902901, "grad_norm": 1.3568061137185459, "learning_rate": 7.635782747603835e-06, "loss": 0.8635360598564148, "step": 240 }, { "epoch": 0.038519939263166304, "grad_norm": 1.2206294657838253, "learning_rate": 7.667731629392972e-06, "loss": 0.8352243900299072, "step": 241 }, { "epoch": 0.03867977303604252, "grad_norm": 1.6370538463984892, "learning_rate": 7.699680511182109e-06, "loss": 0.8780116438865662, "step": 242 }, { "epoch": 0.03883960680891872, "grad_norm": 1.3614861437557393, "learning_rate": 7.731629392971247e-06, "loss": 0.8677772283554077, "step": 243 }, { "epoch": 0.038999440581794935, "grad_norm": 1.292374997341664, "learning_rate": 7.763578274760384e-06, "loss": 0.9132684469223022, "step": 244 }, { "epoch": 0.03915927435467114, "grad_norm": 1.2654295849261186, "learning_rate": 7.795527156549521e-06, "loss": 0.8442959189414978, "step": 245 }, { "epoch": 0.03931910812754735, "grad_norm": 1.2461355877142128, "learning_rate": 7.827476038338658e-06, "loss": 0.864766001701355, "step": 246 }, { "epoch": 0.03947894190042356, "grad_norm": 1.4327230049761248, "learning_rate": 7.859424920127796e-06, "loss": 0.9173413515090942, "step": 247 }, { "epoch": 0.03963877567329977, "grad_norm": 1.315543715458025, "learning_rate": 7.891373801916933e-06, "loss": 0.7981840372085571, "step": 248 }, { "epoch": 0.03979860944617598, "grad_norm": 1.305053439731054, "learning_rate": 7.92332268370607e-06, "loss": 0.8423348665237427, "step": 249 }, { "epoch": 0.03995844321905218, "grad_norm": 1.2896805655411945, "learning_rate": 7.955271565495208e-06, "loss": 0.8703134655952454, "step": 250 }, { "epoch": 0.040118276991928395, "grad_norm": 1.3224713529069925, "learning_rate": 7.987220447284347e-06, "loss": 0.8292195796966553, "step": 251 }, { "epoch": 0.0402781107648046, "grad_norm": 1.287790991211804, "learning_rate": 8.019169329073482e-06, "loss": 0.7924104928970337, "step": 252 }, { "epoch": 0.04043794453768081, "grad_norm": 1.3260152244492769, "learning_rate": 8.05111821086262e-06, "loss": 0.7682307958602905, "step": 253 }, { "epoch": 0.04059777831055702, "grad_norm": 1.3460020806001924, "learning_rate": 8.083067092651757e-06, "loss": 0.9640902280807495, "step": 254 }, { "epoch": 0.04075761208343323, "grad_norm": 1.8411366470646795, "learning_rate": 8.115015974440896e-06, "loss": 0.866060197353363, "step": 255 }, { "epoch": 0.04091744585630944, "grad_norm": 1.347448217095743, "learning_rate": 8.146964856230033e-06, "loss": 0.838890790939331, "step": 256 }, { "epoch": 0.04107727962918565, "grad_norm": 1.4190103701779069, "learning_rate": 8.17891373801917e-06, "loss": 0.7780252695083618, "step": 257 }, { "epoch": 0.041237113402061855, "grad_norm": 1.2567922380858219, "learning_rate": 8.210862619808308e-06, "loss": 0.9079878926277161, "step": 258 }, { "epoch": 0.04139694717493807, "grad_norm": 1.5674298278706182, "learning_rate": 8.242811501597445e-06, "loss": 0.9344363212585449, "step": 259 }, { "epoch": 0.04155678094781427, "grad_norm": 1.5796575697482462, "learning_rate": 8.274760383386582e-06, "loss": 0.849139928817749, "step": 260 }, { "epoch": 0.04171661472069048, "grad_norm": 1.3140039138062922, "learning_rate": 8.30670926517572e-06, "loss": 0.8214280605316162, "step": 261 }, { "epoch": 0.04187644849356669, "grad_norm": 1.5449448920753683, "learning_rate": 8.338658146964857e-06, "loss": 0.955508828163147, "step": 262 }, { "epoch": 0.0420362822664429, "grad_norm": 1.2379774697424901, "learning_rate": 8.370607028753994e-06, "loss": 0.7696863412857056, "step": 263 }, { "epoch": 0.04219611603931911, "grad_norm": 1.3056652986320798, "learning_rate": 8.402555910543132e-06, "loss": 0.8372495174407959, "step": 264 }, { "epoch": 0.042355949812195315, "grad_norm": 1.3733494755167432, "learning_rate": 8.434504792332269e-06, "loss": 0.7147719860076904, "step": 265 }, { "epoch": 0.04251578358507153, "grad_norm": 1.5102094020339412, "learning_rate": 8.466453674121406e-06, "loss": 0.9111226797103882, "step": 266 }, { "epoch": 0.04267561735794773, "grad_norm": 1.337630527200369, "learning_rate": 8.498402555910544e-06, "loss": 0.9126783609390259, "step": 267 }, { "epoch": 0.042835451130823946, "grad_norm": 1.9867667437794139, "learning_rate": 8.530351437699681e-06, "loss": 0.9659831523895264, "step": 268 }, { "epoch": 0.04299528490370015, "grad_norm": 1.5186736445617055, "learning_rate": 8.56230031948882e-06, "loss": 0.7982177734375, "step": 269 }, { "epoch": 0.043155118676576364, "grad_norm": 1.5767878985918724, "learning_rate": 8.594249201277956e-06, "loss": 0.91988605260849, "step": 270 }, { "epoch": 0.04331495244945257, "grad_norm": 1.4713681293821603, "learning_rate": 8.626198083067093e-06, "loss": 0.8320435285568237, "step": 271 }, { "epoch": 0.043474786222328775, "grad_norm": 1.2667970670981237, "learning_rate": 8.658146964856232e-06, "loss": 0.7971675395965576, "step": 272 }, { "epoch": 0.04363461999520499, "grad_norm": 1.4017784876171442, "learning_rate": 8.690095846645368e-06, "loss": 0.8945772647857666, "step": 273 }, { "epoch": 0.04379445376808119, "grad_norm": 1.22371741504904, "learning_rate": 8.722044728434505e-06, "loss": 0.7579639554023743, "step": 274 }, { "epoch": 0.043954287540957405, "grad_norm": 1.3348876831340866, "learning_rate": 8.753993610223644e-06, "loss": 0.8004035949707031, "step": 275 }, { "epoch": 0.04411412131383361, "grad_norm": 1.2510032492942502, "learning_rate": 8.78594249201278e-06, "loss": 0.8324122428894043, "step": 276 }, { "epoch": 0.044273955086709824, "grad_norm": 1.5651710545877155, "learning_rate": 8.817891373801917e-06, "loss": 0.9940129518508911, "step": 277 }, { "epoch": 0.04443378885958603, "grad_norm": 1.4883026254609137, "learning_rate": 8.849840255591054e-06, "loss": 0.7488629817962646, "step": 278 }, { "epoch": 0.04459362263246224, "grad_norm": 1.3722544742088725, "learning_rate": 8.881789137380193e-06, "loss": 0.8742542862892151, "step": 279 }, { "epoch": 0.04475345640533845, "grad_norm": 1.4066182751685017, "learning_rate": 8.91373801916933e-06, "loss": 0.8406568765640259, "step": 280 }, { "epoch": 0.04491329017821466, "grad_norm": 1.342451289093957, "learning_rate": 8.945686900958466e-06, "loss": 0.7265186309814453, "step": 281 }, { "epoch": 0.045073123951090865, "grad_norm": 1.2391140058491508, "learning_rate": 8.977635782747605e-06, "loss": 0.7040204405784607, "step": 282 }, { "epoch": 0.04523295772396707, "grad_norm": 1.277863467196476, "learning_rate": 9.009584664536743e-06, "loss": 0.8625319600105286, "step": 283 }, { "epoch": 0.045392791496843284, "grad_norm": 1.5549408568956902, "learning_rate": 9.04153354632588e-06, "loss": 0.9476619958877563, "step": 284 }, { "epoch": 0.04555262526971949, "grad_norm": 1.4550793553994856, "learning_rate": 9.073482428115017e-06, "loss": 0.7880963683128357, "step": 285 }, { "epoch": 0.0457124590425957, "grad_norm": 1.5308415915606255, "learning_rate": 9.105431309904154e-06, "loss": 0.7936915755271912, "step": 286 }, { "epoch": 0.04587229281547191, "grad_norm": 1.487411025967749, "learning_rate": 9.137380191693292e-06, "loss": 0.7366125583648682, "step": 287 }, { "epoch": 0.04603212658834812, "grad_norm": 1.4250751960498158, "learning_rate": 9.169329073482429e-06, "loss": 0.864298939704895, "step": 288 }, { "epoch": 0.046191960361224325, "grad_norm": 1.8251182061669315, "learning_rate": 9.201277955271566e-06, "loss": 0.849245548248291, "step": 289 }, { "epoch": 0.04635179413410054, "grad_norm": 1.6014408145053154, "learning_rate": 9.233226837060704e-06, "loss": 0.790162980556488, "step": 290 }, { "epoch": 0.046511627906976744, "grad_norm": 1.5558997133888646, "learning_rate": 9.265175718849841e-06, "loss": 0.8842270374298096, "step": 291 }, { "epoch": 0.046671461679852956, "grad_norm": 1.3978376583525027, "learning_rate": 9.297124600638978e-06, "loss": 0.8526664972305298, "step": 292 }, { "epoch": 0.04683129545272916, "grad_norm": 1.763906270095795, "learning_rate": 9.329073482428116e-06, "loss": 0.7270492315292358, "step": 293 }, { "epoch": 0.04699112922560537, "grad_norm": 1.2825103055106561, "learning_rate": 9.361022364217253e-06, "loss": 0.774446964263916, "step": 294 }, { "epoch": 0.04715096299848158, "grad_norm": 1.3421625768130658, "learning_rate": 9.39297124600639e-06, "loss": 0.6504009962081909, "step": 295 }, { "epoch": 0.047310796771357785, "grad_norm": 1.7213244509043018, "learning_rate": 9.424920127795528e-06, "loss": 0.8961807489395142, "step": 296 }, { "epoch": 0.047470630544234, "grad_norm": 1.3479724624789353, "learning_rate": 9.456869009584665e-06, "loss": 0.8174201846122742, "step": 297 }, { "epoch": 0.047630464317110204, "grad_norm": 1.52545559317666, "learning_rate": 9.488817891373802e-06, "loss": 0.7255045175552368, "step": 298 }, { "epoch": 0.047790298089986416, "grad_norm": 1.1816956041277213, "learning_rate": 9.52076677316294e-06, "loss": 0.765607476234436, "step": 299 }, { "epoch": 0.04795013186286262, "grad_norm": 1.6999700634649817, "learning_rate": 9.552715654952077e-06, "loss": 0.8681449294090271, "step": 300 }, { "epoch": 0.048109965635738834, "grad_norm": 1.5689190965232696, "learning_rate": 9.584664536741216e-06, "loss": 0.8691922426223755, "step": 301 }, { "epoch": 0.04826979940861504, "grad_norm": 1.2900321700630648, "learning_rate": 9.616613418530352e-06, "loss": 0.7994365692138672, "step": 302 }, { "epoch": 0.04842963318149125, "grad_norm": 1.5085024842703538, "learning_rate": 9.64856230031949e-06, "loss": 0.8454890251159668, "step": 303 }, { "epoch": 0.04858946695436746, "grad_norm": 1.324533279714106, "learning_rate": 9.680511182108628e-06, "loss": 0.8292251825332642, "step": 304 }, { "epoch": 0.048749300727243663, "grad_norm": 1.340871360112492, "learning_rate": 9.712460063897765e-06, "loss": 0.7081131339073181, "step": 305 }, { "epoch": 0.048909134500119876, "grad_norm": 1.381587876543518, "learning_rate": 9.744408945686901e-06, "loss": 0.915484607219696, "step": 306 }, { "epoch": 0.04906896827299608, "grad_norm": 1.4240812648900598, "learning_rate": 9.77635782747604e-06, "loss": 0.7804629802703857, "step": 307 }, { "epoch": 0.049228802045872294, "grad_norm": 1.1896785650032013, "learning_rate": 9.808306709265177e-06, "loss": 0.7304016947746277, "step": 308 }, { "epoch": 0.0493886358187485, "grad_norm": 1.5902699342192925, "learning_rate": 9.840255591054313e-06, "loss": 0.9041261076927185, "step": 309 }, { "epoch": 0.04954846959162471, "grad_norm": 1.6032133893160123, "learning_rate": 9.87220447284345e-06, "loss": 0.8213251829147339, "step": 310 }, { "epoch": 0.04970830336450092, "grad_norm": 1.7930486959151493, "learning_rate": 9.904153354632589e-06, "loss": 1.0483702421188354, "step": 311 }, { "epoch": 0.04986813713737713, "grad_norm": 1.4717083231361214, "learning_rate": 9.936102236421726e-06, "loss": 0.71589195728302, "step": 312 }, { "epoch": 0.050027970910253336, "grad_norm": 1.2458890864515535, "learning_rate": 9.968051118210862e-06, "loss": 0.887083113193512, "step": 313 }, { "epoch": 0.05018780468312955, "grad_norm": 1.8137590622467226, "learning_rate": 1e-05, "loss": 0.9847934246063232, "step": 314 }, { "epoch": 0.050347638456005754, "grad_norm": 1.6304230466882303, "learning_rate": 1.0031948881789138e-05, "loss": 0.8516553640365601, "step": 315 }, { "epoch": 0.05050747222888196, "grad_norm": 1.5287494189831683, "learning_rate": 1.0063897763578276e-05, "loss": 1.0078349113464355, "step": 316 }, { "epoch": 0.05066730600175817, "grad_norm": 1.4531240509551573, "learning_rate": 1.0095846645367413e-05, "loss": 0.7621479034423828, "step": 317 }, { "epoch": 0.05082713977463438, "grad_norm": 1.4282876245195184, "learning_rate": 1.012779552715655e-05, "loss": 0.8616100549697876, "step": 318 }, { "epoch": 0.05098697354751059, "grad_norm": 1.5107230615169638, "learning_rate": 1.0159744408945688e-05, "loss": 0.9616912007331848, "step": 319 }, { "epoch": 0.051146807320386796, "grad_norm": 1.4390934759974454, "learning_rate": 1.0191693290734825e-05, "loss": 0.8629004955291748, "step": 320 }, { "epoch": 0.05130664109326301, "grad_norm": 1.4228562491821801, "learning_rate": 1.0223642172523962e-05, "loss": 0.8356399536132812, "step": 321 }, { "epoch": 0.051466474866139214, "grad_norm": 1.6438498312592118, "learning_rate": 1.02555910543131e-05, "loss": 0.777511477470398, "step": 322 }, { "epoch": 0.05162630863901543, "grad_norm": 1.4939119626404065, "learning_rate": 1.0287539936102237e-05, "loss": 0.7846474647521973, "step": 323 }, { "epoch": 0.05178614241189163, "grad_norm": 1.4048833022781255, "learning_rate": 1.0319488817891374e-05, "loss": 0.7021617889404297, "step": 324 }, { "epoch": 0.051945976184767845, "grad_norm": 1.4266839411255663, "learning_rate": 1.035143769968051e-05, "loss": 0.7994639873504639, "step": 325 }, { "epoch": 0.05210580995764405, "grad_norm": 1.6075445102074106, "learning_rate": 1.038338658146965e-05, "loss": 0.7034040689468384, "step": 326 }, { "epoch": 0.052265643730520256, "grad_norm": 1.4468182000202667, "learning_rate": 1.0415335463258786e-05, "loss": 0.779927134513855, "step": 327 }, { "epoch": 0.05242547750339647, "grad_norm": 1.3636690813002696, "learning_rate": 1.0447284345047923e-05, "loss": 0.8955647349357605, "step": 328 }, { "epoch": 0.052585311276272674, "grad_norm": 1.4900129244814446, "learning_rate": 1.0479233226837063e-05, "loss": 0.8396627902984619, "step": 329 }, { "epoch": 0.05274514504914889, "grad_norm": 1.6011041417267724, "learning_rate": 1.05111821086262e-05, "loss": 0.7960445880889893, "step": 330 }, { "epoch": 0.05290497882202509, "grad_norm": 1.8167885146674805, "learning_rate": 1.0543130990415335e-05, "loss": 0.9727675914764404, "step": 331 }, { "epoch": 0.053064812594901305, "grad_norm": 1.47798725221917, "learning_rate": 1.0575079872204475e-05, "loss": 0.7495489120483398, "step": 332 }, { "epoch": 0.05322464636777751, "grad_norm": 1.4084887579117726, "learning_rate": 1.0607028753993612e-05, "loss": 0.9413062334060669, "step": 333 }, { "epoch": 0.05338448014065372, "grad_norm": 1.5062511861867198, "learning_rate": 1.0638977635782749e-05, "loss": 0.9631729125976562, "step": 334 }, { "epoch": 0.05354431391352993, "grad_norm": 1.3545659302104662, "learning_rate": 1.0670926517571887e-05, "loss": 0.824018120765686, "step": 335 }, { "epoch": 0.05370414768640614, "grad_norm": 1.5300835402336124, "learning_rate": 1.0702875399361024e-05, "loss": 0.8615777492523193, "step": 336 }, { "epoch": 0.05386398145928235, "grad_norm": 1.4612562318498115, "learning_rate": 1.073482428115016e-05, "loss": 0.7780876755714417, "step": 337 }, { "epoch": 0.05402381523215855, "grad_norm": 1.4901415506219495, "learning_rate": 1.07667731629393e-05, "loss": 0.808748185634613, "step": 338 }, { "epoch": 0.054183649005034765, "grad_norm": 1.5792846110670389, "learning_rate": 1.0798722044728436e-05, "loss": 0.7605083584785461, "step": 339 }, { "epoch": 0.05434348277791097, "grad_norm": 1.7413351665740398, "learning_rate": 1.0830670926517573e-05, "loss": 0.8639430403709412, "step": 340 }, { "epoch": 0.05450331655078718, "grad_norm": 1.7356324194930712, "learning_rate": 1.086261980830671e-05, "loss": 0.8886935710906982, "step": 341 }, { "epoch": 0.05466315032366339, "grad_norm": 1.230288413656686, "learning_rate": 1.0894568690095848e-05, "loss": 0.698784351348877, "step": 342 }, { "epoch": 0.0548229840965396, "grad_norm": 1.4445168487604407, "learning_rate": 1.0926517571884985e-05, "loss": 0.6817876100540161, "step": 343 }, { "epoch": 0.054982817869415807, "grad_norm": 1.37765299977265, "learning_rate": 1.0958466453674122e-05, "loss": 0.7921793460845947, "step": 344 }, { "epoch": 0.05514265164229202, "grad_norm": 1.3048756708429432, "learning_rate": 1.099041533546326e-05, "loss": 0.7881006002426147, "step": 345 }, { "epoch": 0.055302485415168225, "grad_norm": 1.4105814408007864, "learning_rate": 1.1022364217252397e-05, "loss": 0.8145550489425659, "step": 346 }, { "epoch": 0.05546231918804444, "grad_norm": 1.3390260244736691, "learning_rate": 1.1054313099041534e-05, "loss": 0.6963472366333008, "step": 347 }, { "epoch": 0.05562215296092064, "grad_norm": 1.7649474710033006, "learning_rate": 1.1086261980830672e-05, "loss": 0.9395530819892883, "step": 348 }, { "epoch": 0.05578198673379685, "grad_norm": 1.303591259716812, "learning_rate": 1.1118210862619809e-05, "loss": 0.8336542844772339, "step": 349 }, { "epoch": 0.05594182050667306, "grad_norm": 1.7031700578879212, "learning_rate": 1.1150159744408946e-05, "loss": 0.8212020993232727, "step": 350 }, { "epoch": 0.056101654279549266, "grad_norm": 1.4218116615806091, "learning_rate": 1.1182108626198084e-05, "loss": 0.8393007516860962, "step": 351 }, { "epoch": 0.05626148805242548, "grad_norm": 1.5137782494973875, "learning_rate": 1.1214057507987221e-05, "loss": 0.7295740842819214, "step": 352 }, { "epoch": 0.056421321825301685, "grad_norm": 1.5559579834867996, "learning_rate": 1.1246006389776358e-05, "loss": 0.6821837425231934, "step": 353 }, { "epoch": 0.0565811555981779, "grad_norm": 1.364803336494244, "learning_rate": 1.1277955271565496e-05, "loss": 0.8720686435699463, "step": 354 }, { "epoch": 0.0567409893710541, "grad_norm": 1.6255820006054338, "learning_rate": 1.1309904153354633e-05, "loss": 0.7757724523544312, "step": 355 }, { "epoch": 0.056900823143930315, "grad_norm": 1.5533464439464824, "learning_rate": 1.134185303514377e-05, "loss": 0.6689169406890869, "step": 356 }, { "epoch": 0.05706065691680652, "grad_norm": 1.2950818369612946, "learning_rate": 1.1373801916932907e-05, "loss": 0.9124189615249634, "step": 357 }, { "epoch": 0.05722049068968273, "grad_norm": 1.5137491027023837, "learning_rate": 1.1405750798722045e-05, "loss": 0.7351683378219604, "step": 358 }, { "epoch": 0.05738032446255894, "grad_norm": 1.5927539154853196, "learning_rate": 1.1437699680511182e-05, "loss": 0.7245374321937561, "step": 359 }, { "epoch": 0.057540158235435145, "grad_norm": 1.2453005561539827, "learning_rate": 1.1469648562300319e-05, "loss": 0.6597838401794434, "step": 360 }, { "epoch": 0.05769999200831136, "grad_norm": 1.3839930381425736, "learning_rate": 1.1501597444089459e-05, "loss": 0.767436146736145, "step": 361 }, { "epoch": 0.05785982578118756, "grad_norm": 1.932801115951603, "learning_rate": 1.1533546325878596e-05, "loss": 0.8635532855987549, "step": 362 }, { "epoch": 0.058019659554063775, "grad_norm": 1.366525088140957, "learning_rate": 1.1565495207667731e-05, "loss": 0.7527984976768494, "step": 363 }, { "epoch": 0.05817949332693998, "grad_norm": 1.6412065464184873, "learning_rate": 1.1597444089456871e-05, "loss": 0.865106463432312, "step": 364 }, { "epoch": 0.05833932709981619, "grad_norm": 1.5280369377180154, "learning_rate": 1.1629392971246008e-05, "loss": 0.877324104309082, "step": 365 }, { "epoch": 0.0584991608726924, "grad_norm": 1.4796605787394865, "learning_rate": 1.1661341853035145e-05, "loss": 0.9242693185806274, "step": 366 }, { "epoch": 0.05865899464556861, "grad_norm": 1.6112304692695365, "learning_rate": 1.1693290734824283e-05, "loss": 0.7910684943199158, "step": 367 }, { "epoch": 0.05881882841844482, "grad_norm": 1.1840662611011932, "learning_rate": 1.172523961661342e-05, "loss": 0.8429200649261475, "step": 368 }, { "epoch": 0.05897866219132102, "grad_norm": 1.5495830782118234, "learning_rate": 1.1757188498402557e-05, "loss": 0.8412070870399475, "step": 369 }, { "epoch": 0.059138495964197235, "grad_norm": 1.2857932360229658, "learning_rate": 1.1789137380191695e-05, "loss": 0.841310977935791, "step": 370 }, { "epoch": 0.05929832973707344, "grad_norm": 1.553158461010764, "learning_rate": 1.1821086261980832e-05, "loss": 0.716041088104248, "step": 371 }, { "epoch": 0.05945816350994965, "grad_norm": 1.6501689082353532, "learning_rate": 1.1853035143769969e-05, "loss": 0.8233271241188049, "step": 372 }, { "epoch": 0.05961799728282586, "grad_norm": 1.629574939201863, "learning_rate": 1.1884984025559106e-05, "loss": 0.8554290533065796, "step": 373 }, { "epoch": 0.05977783105570207, "grad_norm": 1.5231995236313778, "learning_rate": 1.1916932907348244e-05, "loss": 0.8242645263671875, "step": 374 }, { "epoch": 0.05993766482857828, "grad_norm": 1.681737527382594, "learning_rate": 1.1948881789137381e-05, "loss": 0.8636946082115173, "step": 375 }, { "epoch": 0.06009749860145449, "grad_norm": 1.609587001650182, "learning_rate": 1.1980830670926518e-05, "loss": 0.815860390663147, "step": 376 }, { "epoch": 0.060257332374330695, "grad_norm": 1.4056989134620899, "learning_rate": 1.2012779552715656e-05, "loss": 0.6679046154022217, "step": 377 }, { "epoch": 0.06041716614720691, "grad_norm": 1.2467535601814819, "learning_rate": 1.2044728434504793e-05, "loss": 0.6614173650741577, "step": 378 }, { "epoch": 0.06057699992008311, "grad_norm": 1.5171653218878849, "learning_rate": 1.207667731629393e-05, "loss": 0.6568672060966492, "step": 379 }, { "epoch": 0.06073683369295932, "grad_norm": 1.2233633475905887, "learning_rate": 1.2108626198083068e-05, "loss": 0.6740138530731201, "step": 380 }, { "epoch": 0.06089666746583553, "grad_norm": 1.7506502876123862, "learning_rate": 1.2140575079872205e-05, "loss": 0.943774938583374, "step": 381 }, { "epoch": 0.06105650123871174, "grad_norm": 1.71587351582452, "learning_rate": 1.2172523961661342e-05, "loss": 0.8156380653381348, "step": 382 }, { "epoch": 0.06121633501158795, "grad_norm": 1.4393759242905142, "learning_rate": 1.220447284345048e-05, "loss": 0.7066268920898438, "step": 383 }, { "epoch": 0.061376168784464155, "grad_norm": 1.2927868951259331, "learning_rate": 1.2236421725239617e-05, "loss": 0.7000384330749512, "step": 384 }, { "epoch": 0.06153600255734037, "grad_norm": 1.4876400149646956, "learning_rate": 1.2268370607028754e-05, "loss": 0.6453815698623657, "step": 385 }, { "epoch": 0.06169583633021657, "grad_norm": 1.3346104315338791, "learning_rate": 1.2300319488817893e-05, "loss": 0.8244464993476868, "step": 386 }, { "epoch": 0.061855670103092786, "grad_norm": 1.5473614835748464, "learning_rate": 1.233226837060703e-05, "loss": 0.8362691402435303, "step": 387 }, { "epoch": 0.06201550387596899, "grad_norm": 1.590272894189532, "learning_rate": 1.2364217252396166e-05, "loss": 0.7947163581848145, "step": 388 }, { "epoch": 0.062175337648845204, "grad_norm": 1.531501381721076, "learning_rate": 1.2396166134185303e-05, "loss": 0.9302787780761719, "step": 389 }, { "epoch": 0.06233517142172141, "grad_norm": 1.4735345725172118, "learning_rate": 1.2428115015974442e-05, "loss": 0.8092745542526245, "step": 390 }, { "epoch": 0.062495005194597615, "grad_norm": 1.3732139509926466, "learning_rate": 1.2460063897763578e-05, "loss": 0.7884571552276611, "step": 391 }, { "epoch": 0.06265483896747383, "grad_norm": 1.5181523338866818, "learning_rate": 1.2492012779552715e-05, "loss": 0.790107250213623, "step": 392 }, { "epoch": 0.06281467274035003, "grad_norm": 1.399547701165128, "learning_rate": 1.2523961661341855e-05, "loss": 0.8292316198348999, "step": 393 }, { "epoch": 0.06297450651322624, "grad_norm": 1.724288836588929, "learning_rate": 1.2555910543130992e-05, "loss": 0.8746632933616638, "step": 394 }, { "epoch": 0.06313434028610246, "grad_norm": 1.2767751403505017, "learning_rate": 1.2587859424920127e-05, "loss": 0.7065081596374512, "step": 395 }, { "epoch": 0.06329417405897866, "grad_norm": 1.5074923842367196, "learning_rate": 1.2619808306709267e-05, "loss": 0.6989595890045166, "step": 396 }, { "epoch": 0.06345400783185487, "grad_norm": 1.313781814860153, "learning_rate": 1.2651757188498404e-05, "loss": 0.645057737827301, "step": 397 }, { "epoch": 0.06361384160473108, "grad_norm": 1.541457818739231, "learning_rate": 1.2683706070287541e-05, "loss": 0.7114013433456421, "step": 398 }, { "epoch": 0.0637736753776073, "grad_norm": 1.641303588847484, "learning_rate": 1.271565495207668e-05, "loss": 0.783957839012146, "step": 399 }, { "epoch": 0.0639335091504835, "grad_norm": 1.4479666512738798, "learning_rate": 1.2747603833865816e-05, "loss": 0.9405421614646912, "step": 400 }, { "epoch": 0.0640933429233597, "grad_norm": 1.3584336369622905, "learning_rate": 1.2779552715654953e-05, "loss": 0.7643344402313232, "step": 401 }, { "epoch": 0.06425317669623591, "grad_norm": 1.5141073462179333, "learning_rate": 1.2811501597444092e-05, "loss": 0.9061086773872375, "step": 402 }, { "epoch": 0.06441301046911212, "grad_norm": 1.483571262037137, "learning_rate": 1.2843450479233228e-05, "loss": 0.6750068068504333, "step": 403 }, { "epoch": 0.06457284424198834, "grad_norm": 1.385627249958975, "learning_rate": 1.2875399361022365e-05, "loss": 0.7608882188796997, "step": 404 }, { "epoch": 0.06473267801486454, "grad_norm": 1.6582638638531004, "learning_rate": 1.2907348242811502e-05, "loss": 0.7326583862304688, "step": 405 }, { "epoch": 0.06489251178774075, "grad_norm": 1.7789502668732347, "learning_rate": 1.293929712460064e-05, "loss": 0.9461251497268677, "step": 406 }, { "epoch": 0.06505234556061695, "grad_norm": 1.5648937637985407, "learning_rate": 1.2971246006389777e-05, "loss": 0.8689418435096741, "step": 407 }, { "epoch": 0.06521217933349317, "grad_norm": 1.5337621652326163, "learning_rate": 1.3003194888178914e-05, "loss": 0.8660377264022827, "step": 408 }, { "epoch": 0.06537201310636938, "grad_norm": 1.604187558730239, "learning_rate": 1.3035143769968053e-05, "loss": 0.7126583456993103, "step": 409 }, { "epoch": 0.06553184687924558, "grad_norm": 1.8745372544204792, "learning_rate": 1.306709265175719e-05, "loss": 0.7148457765579224, "step": 410 }, { "epoch": 0.06569168065212179, "grad_norm": 1.5956395492367812, "learning_rate": 1.3099041533546326e-05, "loss": 0.7185033559799194, "step": 411 }, { "epoch": 0.06585151442499801, "grad_norm": 1.5498658441723852, "learning_rate": 1.3130990415335465e-05, "loss": 0.9170419573783875, "step": 412 }, { "epoch": 0.06601134819787421, "grad_norm": 1.3638006402657745, "learning_rate": 1.3162939297124601e-05, "loss": 0.8708788752555847, "step": 413 }, { "epoch": 0.06617118197075042, "grad_norm": 1.7770071119234836, "learning_rate": 1.3194888178913738e-05, "loss": 0.8004765510559082, "step": 414 }, { "epoch": 0.06633101574362663, "grad_norm": 1.8901962748808492, "learning_rate": 1.3226837060702877e-05, "loss": 0.9904618263244629, "step": 415 }, { "epoch": 0.06649084951650283, "grad_norm": 1.4437856259643826, "learning_rate": 1.3258785942492014e-05, "loss": 0.7822768092155457, "step": 416 }, { "epoch": 0.06665068328937905, "grad_norm": 1.5482146739079592, "learning_rate": 1.329073482428115e-05, "loss": 0.739385724067688, "step": 417 }, { "epoch": 0.06681051706225526, "grad_norm": 1.5187670538465574, "learning_rate": 1.3322683706070289e-05, "loss": 0.6831960082054138, "step": 418 }, { "epoch": 0.06697035083513146, "grad_norm": 1.6534814493567351, "learning_rate": 1.3354632587859426e-05, "loss": 0.6852116584777832, "step": 419 }, { "epoch": 0.06713018460800767, "grad_norm": 1.5489508269732004, "learning_rate": 1.3386581469648562e-05, "loss": 0.8255378007888794, "step": 420 }, { "epoch": 0.06729001838088389, "grad_norm": 1.4736716511336667, "learning_rate": 1.3418530351437703e-05, "loss": 0.8314093947410583, "step": 421 }, { "epoch": 0.06744985215376009, "grad_norm": 1.6353575389054653, "learning_rate": 1.345047923322684e-05, "loss": 0.8057845234870911, "step": 422 }, { "epoch": 0.0676096859266363, "grad_norm": 1.3394984430030235, "learning_rate": 1.3482428115015975e-05, "loss": 0.8222237825393677, "step": 423 }, { "epoch": 0.0677695196995125, "grad_norm": 1.3895284647764823, "learning_rate": 1.3514376996805111e-05, "loss": 0.7582566738128662, "step": 424 }, { "epoch": 0.06792935347238871, "grad_norm": 1.4831542892923946, "learning_rate": 1.3546325878594251e-05, "loss": 0.7641762495040894, "step": 425 }, { "epoch": 0.06808918724526493, "grad_norm": 1.4483023031186848, "learning_rate": 1.3578274760383388e-05, "loss": 0.8219408988952637, "step": 426 }, { "epoch": 0.06824902101814113, "grad_norm": 1.835206051909756, "learning_rate": 1.3610223642172523e-05, "loss": 0.9360288381576538, "step": 427 }, { "epoch": 0.06840885479101734, "grad_norm": 1.6834948134829935, "learning_rate": 1.3642172523961664e-05, "loss": 0.8271459341049194, "step": 428 }, { "epoch": 0.06856868856389355, "grad_norm": 1.4909702130585212, "learning_rate": 1.36741214057508e-05, "loss": 0.5854500532150269, "step": 429 }, { "epoch": 0.06872852233676977, "grad_norm": 1.3823534502571675, "learning_rate": 1.3706070287539937e-05, "loss": 0.7902114391326904, "step": 430 }, { "epoch": 0.06888835610964597, "grad_norm": 1.5506644982582594, "learning_rate": 1.3738019169329076e-05, "loss": 0.8315001726150513, "step": 431 }, { "epoch": 0.06904818988252218, "grad_norm": 1.675415505390212, "learning_rate": 1.3769968051118212e-05, "loss": 0.8187963962554932, "step": 432 }, { "epoch": 0.06920802365539838, "grad_norm": 1.5229666918467277, "learning_rate": 1.380191693290735e-05, "loss": 0.726334810256958, "step": 433 }, { "epoch": 0.0693678574282746, "grad_norm": 1.5494546687809587, "learning_rate": 1.3833865814696488e-05, "loss": 0.8446439504623413, "step": 434 }, { "epoch": 0.0695276912011508, "grad_norm": 1.4487270336843472, "learning_rate": 1.3865814696485625e-05, "loss": 0.7965983152389526, "step": 435 }, { "epoch": 0.06968752497402701, "grad_norm": 1.7264423716821484, "learning_rate": 1.3897763578274761e-05, "loss": 0.8971899747848511, "step": 436 }, { "epoch": 0.06984735874690322, "grad_norm": 1.466167413399896, "learning_rate": 1.39297124600639e-05, "loss": 0.7395395040512085, "step": 437 }, { "epoch": 0.07000719251977942, "grad_norm": 1.4730261334892343, "learning_rate": 1.3961661341853037e-05, "loss": 0.7063359022140503, "step": 438 }, { "epoch": 0.07016702629265564, "grad_norm": 1.5260972003374493, "learning_rate": 1.3993610223642173e-05, "loss": 0.8156409859657288, "step": 439 }, { "epoch": 0.07032686006553185, "grad_norm": 1.489860094361666, "learning_rate": 1.402555910543131e-05, "loss": 0.8181414008140564, "step": 440 }, { "epoch": 0.07048669383840805, "grad_norm": 1.6602841140705602, "learning_rate": 1.4057507987220449e-05, "loss": 0.7766282558441162, "step": 441 }, { "epoch": 0.07064652761128426, "grad_norm": 1.4113033957349042, "learning_rate": 1.4089456869009586e-05, "loss": 0.8293454051017761, "step": 442 }, { "epoch": 0.07080636138416048, "grad_norm": 1.3924119110582514, "learning_rate": 1.4121405750798722e-05, "loss": 0.7134082317352295, "step": 443 }, { "epoch": 0.07096619515703669, "grad_norm": 1.4419009239049672, "learning_rate": 1.415335463258786e-05, "loss": 0.8339556455612183, "step": 444 }, { "epoch": 0.07112602892991289, "grad_norm": 1.5640403418147888, "learning_rate": 1.4185303514376998e-05, "loss": 0.8295791149139404, "step": 445 }, { "epoch": 0.0712858627027891, "grad_norm": 1.469533960884318, "learning_rate": 1.4217252396166134e-05, "loss": 0.7451021671295166, "step": 446 }, { "epoch": 0.0714456964756653, "grad_norm": 1.5799212092905832, "learning_rate": 1.4249201277955273e-05, "loss": 0.724601149559021, "step": 447 }, { "epoch": 0.07160553024854152, "grad_norm": 1.5601459754109819, "learning_rate": 1.428115015974441e-05, "loss": 0.679542064666748, "step": 448 }, { "epoch": 0.07176536402141773, "grad_norm": 1.5427128815804998, "learning_rate": 1.4313099041533547e-05, "loss": 0.8038979768753052, "step": 449 }, { "epoch": 0.07192519779429393, "grad_norm": 1.532927128287707, "learning_rate": 1.4345047923322685e-05, "loss": 0.7035850286483765, "step": 450 }, { "epoch": 0.07208503156717014, "grad_norm": 1.5372875008392355, "learning_rate": 1.4376996805111822e-05, "loss": 0.7285014390945435, "step": 451 }, { "epoch": 0.07224486534004636, "grad_norm": 1.552162496851371, "learning_rate": 1.4408945686900959e-05, "loss": 0.9163622260093689, "step": 452 }, { "epoch": 0.07240469911292256, "grad_norm": 1.5193158005171437, "learning_rate": 1.4440894568690099e-05, "loss": 0.7380140423774719, "step": 453 }, { "epoch": 0.07256453288579877, "grad_norm": 1.585225329270069, "learning_rate": 1.4472843450479236e-05, "loss": 0.8043112754821777, "step": 454 }, { "epoch": 0.07272436665867497, "grad_norm": 1.6575035609290283, "learning_rate": 1.450479233226837e-05, "loss": 0.7057956457138062, "step": 455 }, { "epoch": 0.07288420043155118, "grad_norm": 1.4924836137866209, "learning_rate": 1.4536741214057507e-05, "loss": 0.9526468515396118, "step": 456 }, { "epoch": 0.0730440342044274, "grad_norm": 1.647966661724522, "learning_rate": 1.4568690095846648e-05, "loss": 0.766329288482666, "step": 457 }, { "epoch": 0.0732038679773036, "grad_norm": 1.6164636809411808, "learning_rate": 1.4600638977635784e-05, "loss": 0.757896900177002, "step": 458 }, { "epoch": 0.07336370175017981, "grad_norm": 1.5041094889545612, "learning_rate": 1.4632587859424921e-05, "loss": 0.8064939975738525, "step": 459 }, { "epoch": 0.07352353552305602, "grad_norm": 1.5603732494944316, "learning_rate": 1.466453674121406e-05, "loss": 0.7683442234992981, "step": 460 }, { "epoch": 0.07368336929593224, "grad_norm": 1.6379486688844351, "learning_rate": 1.4696485623003197e-05, "loss": 0.8830092549324036, "step": 461 }, { "epoch": 0.07384320306880844, "grad_norm": 1.5101658813985077, "learning_rate": 1.4728434504792333e-05, "loss": 0.8324140906333923, "step": 462 }, { "epoch": 0.07400303684168465, "grad_norm": 1.7335673852160336, "learning_rate": 1.4760383386581472e-05, "loss": 0.8724834322929382, "step": 463 }, { "epoch": 0.07416287061456085, "grad_norm": 2.024773192416227, "learning_rate": 1.4792332268370609e-05, "loss": 0.9090779423713684, "step": 464 }, { "epoch": 0.07432270438743707, "grad_norm": 1.3634868245095924, "learning_rate": 1.4824281150159745e-05, "loss": 0.7641571760177612, "step": 465 }, { "epoch": 0.07448253816031328, "grad_norm": 1.4309973640452986, "learning_rate": 1.4856230031948884e-05, "loss": 0.7200521230697632, "step": 466 }, { "epoch": 0.07464237193318948, "grad_norm": 1.6122910909834305, "learning_rate": 1.488817891373802e-05, "loss": 0.7306962013244629, "step": 467 }, { "epoch": 0.07480220570606569, "grad_norm": 1.7710282268807016, "learning_rate": 1.4920127795527158e-05, "loss": 0.8132913112640381, "step": 468 }, { "epoch": 0.0749620394789419, "grad_norm": 1.5140219556076975, "learning_rate": 1.4952076677316296e-05, "loss": 0.7234621047973633, "step": 469 }, { "epoch": 0.07512187325181811, "grad_norm": 1.3881758376340037, "learning_rate": 1.4984025559105433e-05, "loss": 0.7677523493766785, "step": 470 }, { "epoch": 0.07528170702469432, "grad_norm": 1.5336849136436728, "learning_rate": 1.501597444089457e-05, "loss": 0.810373842716217, "step": 471 }, { "epoch": 0.07544154079757052, "grad_norm": 1.632336595341793, "learning_rate": 1.5047923322683706e-05, "loss": 0.8625991344451904, "step": 472 }, { "epoch": 0.07560137457044673, "grad_norm": 1.259245944108394, "learning_rate": 1.5079872204472845e-05, "loss": 0.5825401544570923, "step": 473 }, { "epoch": 0.07576120834332295, "grad_norm": 1.549525124919439, "learning_rate": 1.5111821086261982e-05, "loss": 0.7968155145645142, "step": 474 }, { "epoch": 0.07592104211619916, "grad_norm": 1.5384032253799445, "learning_rate": 1.5143769968051119e-05, "loss": 0.7883274555206299, "step": 475 }, { "epoch": 0.07608087588907536, "grad_norm": 1.3129949636010396, "learning_rate": 1.5175718849840257e-05, "loss": 0.6988245844841003, "step": 476 }, { "epoch": 0.07624070966195157, "grad_norm": 1.6206723015000652, "learning_rate": 1.5207667731629394e-05, "loss": 0.8375004529953003, "step": 477 }, { "epoch": 0.07640054343482777, "grad_norm": 1.3312921723249866, "learning_rate": 1.523961661341853e-05, "loss": 0.7210384607315063, "step": 478 }, { "epoch": 0.07656037720770399, "grad_norm": 1.4017219841878383, "learning_rate": 1.527156549520767e-05, "loss": 0.7885026931762695, "step": 479 }, { "epoch": 0.0767202109805802, "grad_norm": 1.8444852818596809, "learning_rate": 1.5303514376996806e-05, "loss": 0.820142388343811, "step": 480 }, { "epoch": 0.0768800447534564, "grad_norm": 1.434129240302957, "learning_rate": 1.5335463258785944e-05, "loss": 0.7430834174156189, "step": 481 }, { "epoch": 0.07703987852633261, "grad_norm": 1.5332446071601449, "learning_rate": 1.5367412140575083e-05, "loss": 0.8062323331832886, "step": 482 }, { "epoch": 0.07719971229920883, "grad_norm": 1.5689799868353669, "learning_rate": 1.5399361022364218e-05, "loss": 0.8116800785064697, "step": 483 }, { "epoch": 0.07735954607208503, "grad_norm": 1.6175338474561896, "learning_rate": 1.5431309904153356e-05, "loss": 0.8073225617408752, "step": 484 }, { "epoch": 0.07751937984496124, "grad_norm": 1.2634580045419297, "learning_rate": 1.5463258785942495e-05, "loss": 0.6973634958267212, "step": 485 }, { "epoch": 0.07767921361783744, "grad_norm": 1.339409551055729, "learning_rate": 1.549520766773163e-05, "loss": 0.7221083641052246, "step": 486 }, { "epoch": 0.07783904739071366, "grad_norm": 1.6718840957977552, "learning_rate": 1.552715654952077e-05, "loss": 0.7697114944458008, "step": 487 }, { "epoch": 0.07799888116358987, "grad_norm": 1.5440116629507095, "learning_rate": 1.5559105431309904e-05, "loss": 0.8050528168678284, "step": 488 }, { "epoch": 0.07815871493646608, "grad_norm": 1.4794998453670887, "learning_rate": 1.5591054313099042e-05, "loss": 0.8214355707168579, "step": 489 }, { "epoch": 0.07831854870934228, "grad_norm": 1.4228848215317833, "learning_rate": 1.562300319488818e-05, "loss": 0.7776778936386108, "step": 490 }, { "epoch": 0.07847838248221849, "grad_norm": 1.5712124846147115, "learning_rate": 1.5654952076677316e-05, "loss": 0.661906361579895, "step": 491 }, { "epoch": 0.0786382162550947, "grad_norm": 2.043152062483003, "learning_rate": 1.5686900958466454e-05, "loss": 0.9041488766670227, "step": 492 }, { "epoch": 0.07879805002797091, "grad_norm": 1.6573817407045852, "learning_rate": 1.5718849840255593e-05, "loss": 0.7607190608978271, "step": 493 }, { "epoch": 0.07895788380084712, "grad_norm": 1.4155575068623465, "learning_rate": 1.5750798722044728e-05, "loss": 0.7890668511390686, "step": 494 }, { "epoch": 0.07911771757372332, "grad_norm": 1.4133975885547518, "learning_rate": 1.5782747603833866e-05, "loss": 0.9790210723876953, "step": 495 }, { "epoch": 0.07927755134659954, "grad_norm": 1.4476334574309748, "learning_rate": 1.5814696485623005e-05, "loss": 0.6419234275817871, "step": 496 }, { "epoch": 0.07943738511947575, "grad_norm": 1.3883705363906125, "learning_rate": 1.584664536741214e-05, "loss": 0.7390645742416382, "step": 497 }, { "epoch": 0.07959721889235195, "grad_norm": 1.559062382142213, "learning_rate": 1.587859424920128e-05, "loss": 0.7597742080688477, "step": 498 }, { "epoch": 0.07975705266522816, "grad_norm": 1.5836620935368766, "learning_rate": 1.5910543130990417e-05, "loss": 0.9833526611328125, "step": 499 }, { "epoch": 0.07991688643810436, "grad_norm": 1.4719261847671516, "learning_rate": 1.5942492012779552e-05, "loss": 0.8242717981338501, "step": 500 }, { "epoch": 0.08007672021098058, "grad_norm": 1.5131571938001669, "learning_rate": 1.5974440894568694e-05, "loss": 0.7697975039482117, "step": 501 }, { "epoch": 0.08023655398385679, "grad_norm": 1.2741626473170096, "learning_rate": 1.600638977635783e-05, "loss": 0.7491664886474609, "step": 502 }, { "epoch": 0.080396387756733, "grad_norm": 1.519663961028758, "learning_rate": 1.6038338658146964e-05, "loss": 0.6907899379730225, "step": 503 }, { "epoch": 0.0805562215296092, "grad_norm": 1.5484321804429417, "learning_rate": 1.6070287539936103e-05, "loss": 0.7607770562171936, "step": 504 }, { "epoch": 0.08071605530248542, "grad_norm": 1.777047326768645, "learning_rate": 1.610223642172524e-05, "loss": 0.9152498245239258, "step": 505 }, { "epoch": 0.08087588907536163, "grad_norm": 1.6657966845048087, "learning_rate": 1.613418530351438e-05, "loss": 0.8261476755142212, "step": 506 }, { "epoch": 0.08103572284823783, "grad_norm": 1.5602307698493232, "learning_rate": 1.6166134185303515e-05, "loss": 0.8784970045089722, "step": 507 }, { "epoch": 0.08119555662111404, "grad_norm": 1.6913588790423613, "learning_rate": 1.6198083067092653e-05, "loss": 0.7912211418151855, "step": 508 }, { "epoch": 0.08135539039399026, "grad_norm": 1.5596915550734873, "learning_rate": 1.623003194888179e-05, "loss": 0.7474700212478638, "step": 509 }, { "epoch": 0.08151522416686646, "grad_norm": 1.9670794277422867, "learning_rate": 1.6261980830670927e-05, "loss": 0.8760364055633545, "step": 510 }, { "epoch": 0.08167505793974267, "grad_norm": 1.7091766480735437, "learning_rate": 1.6293929712460065e-05, "loss": 0.8791464567184448, "step": 511 }, { "epoch": 0.08183489171261887, "grad_norm": 1.416623459282517, "learning_rate": 1.6325878594249204e-05, "loss": 0.8489415645599365, "step": 512 }, { "epoch": 0.08199472548549508, "grad_norm": 1.479978938094, "learning_rate": 1.635782747603834e-05, "loss": 0.8292858600616455, "step": 513 }, { "epoch": 0.0821545592583713, "grad_norm": 1.3452650235496992, "learning_rate": 1.6389776357827477e-05, "loss": 0.8175803422927856, "step": 514 }, { "epoch": 0.0823143930312475, "grad_norm": 1.489532525781157, "learning_rate": 1.6421725239616616e-05, "loss": 0.7451221942901611, "step": 515 }, { "epoch": 0.08247422680412371, "grad_norm": 1.5405458293845844, "learning_rate": 1.645367412140575e-05, "loss": 0.7237639427185059, "step": 516 }, { "epoch": 0.08263406057699992, "grad_norm": 1.7050191426130552, "learning_rate": 1.648562300319489e-05, "loss": 0.7893201112747192, "step": 517 }, { "epoch": 0.08279389434987613, "grad_norm": 1.1524562951640018, "learning_rate": 1.6517571884984028e-05, "loss": 0.681610107421875, "step": 518 }, { "epoch": 0.08295372812275234, "grad_norm": 1.446897774584658, "learning_rate": 1.6549520766773163e-05, "loss": 0.7824424505233765, "step": 519 }, { "epoch": 0.08311356189562855, "grad_norm": 1.4515825238513789, "learning_rate": 1.65814696485623e-05, "loss": 0.8744614720344543, "step": 520 }, { "epoch": 0.08327339566850475, "grad_norm": 1.4105031406095925, "learning_rate": 1.661341853035144e-05, "loss": 0.7370257377624512, "step": 521 }, { "epoch": 0.08343322944138096, "grad_norm": 1.5778496951242102, "learning_rate": 1.6645367412140575e-05, "loss": 0.6885213255882263, "step": 522 }, { "epoch": 0.08359306321425718, "grad_norm": 1.7106849293982411, "learning_rate": 1.6677316293929714e-05, "loss": 0.9816760420799255, "step": 523 }, { "epoch": 0.08375289698713338, "grad_norm": 1.5010909069389673, "learning_rate": 1.6709265175718852e-05, "loss": 0.6874128580093384, "step": 524 }, { "epoch": 0.08391273076000959, "grad_norm": 1.4399469362873571, "learning_rate": 1.6741214057507987e-05, "loss": 0.7271018028259277, "step": 525 }, { "epoch": 0.0840725645328858, "grad_norm": 1.705553533661086, "learning_rate": 1.6773162939297126e-05, "loss": 0.9482641220092773, "step": 526 }, { "epoch": 0.08423239830576201, "grad_norm": 1.5674381860718343, "learning_rate": 1.6805111821086264e-05, "loss": 0.77538001537323, "step": 527 }, { "epoch": 0.08439223207863822, "grad_norm": 1.5020544246936502, "learning_rate": 1.68370607028754e-05, "loss": 0.8026472330093384, "step": 528 }, { "epoch": 0.08455206585151442, "grad_norm": 1.4387776798498706, "learning_rate": 1.6869009584664538e-05, "loss": 0.7807244658470154, "step": 529 }, { "epoch": 0.08471189962439063, "grad_norm": 1.3876580127583298, "learning_rate": 1.6900958466453676e-05, "loss": 0.6274101734161377, "step": 530 }, { "epoch": 0.08487173339726685, "grad_norm": 1.8842783372526333, "learning_rate": 1.693290734824281e-05, "loss": 0.8141864538192749, "step": 531 }, { "epoch": 0.08503156717014305, "grad_norm": 1.4582739706907717, "learning_rate": 1.696485623003195e-05, "loss": 0.8223141431808472, "step": 532 }, { "epoch": 0.08519140094301926, "grad_norm": 1.5981132998281093, "learning_rate": 1.699680511182109e-05, "loss": 0.8022832870483398, "step": 533 }, { "epoch": 0.08535123471589547, "grad_norm": 1.5479489616505089, "learning_rate": 1.7028753993610227e-05, "loss": 0.8065938353538513, "step": 534 }, { "epoch": 0.08551106848877167, "grad_norm": 2.0195299086803544, "learning_rate": 1.7060702875399362e-05, "loss": 0.7543671727180481, "step": 535 }, { "epoch": 0.08567090226164789, "grad_norm": 1.7693742471919405, "learning_rate": 1.70926517571885e-05, "loss": 0.8406636118888855, "step": 536 }, { "epoch": 0.0858307360345241, "grad_norm": 1.5384172524448863, "learning_rate": 1.712460063897764e-05, "loss": 0.7698411345481873, "step": 537 }, { "epoch": 0.0859905698074003, "grad_norm": 1.3171621783167722, "learning_rate": 1.7156549520766774e-05, "loss": 0.6345995664596558, "step": 538 }, { "epoch": 0.08615040358027651, "grad_norm": 1.6489565823197192, "learning_rate": 1.7188498402555913e-05, "loss": 0.7944232821464539, "step": 539 }, { "epoch": 0.08631023735315273, "grad_norm": 1.6808885759031669, "learning_rate": 1.722044728434505e-05, "loss": 0.8220272660255432, "step": 540 }, { "epoch": 0.08647007112602893, "grad_norm": 1.4449415066541706, "learning_rate": 1.7252396166134186e-05, "loss": 0.8833564519882202, "step": 541 }, { "epoch": 0.08662990489890514, "grad_norm": 1.390989431365382, "learning_rate": 1.7284345047923325e-05, "loss": 0.8244155645370483, "step": 542 }, { "epoch": 0.08678973867178134, "grad_norm": 1.6831276767642116, "learning_rate": 1.7316293929712463e-05, "loss": 0.9344595074653625, "step": 543 }, { "epoch": 0.08694957244465755, "grad_norm": 1.4921368407701023, "learning_rate": 1.7348242811501598e-05, "loss": 0.8360371589660645, "step": 544 }, { "epoch": 0.08710940621753377, "grad_norm": 1.4722871613020276, "learning_rate": 1.7380191693290737e-05, "loss": 0.7314562797546387, "step": 545 }, { "epoch": 0.08726923999040997, "grad_norm": 1.4084388337342302, "learning_rate": 1.7412140575079875e-05, "loss": 0.8487749695777893, "step": 546 }, { "epoch": 0.08742907376328618, "grad_norm": 1.3775721702772958, "learning_rate": 1.744408945686901e-05, "loss": 0.7883730530738831, "step": 547 }, { "epoch": 0.08758890753616239, "grad_norm": 1.4609462709883325, "learning_rate": 1.747603833865815e-05, "loss": 0.7240481376647949, "step": 548 }, { "epoch": 0.0877487413090386, "grad_norm": 1.443551048515154, "learning_rate": 1.7507987220447287e-05, "loss": 0.6428011655807495, "step": 549 }, { "epoch": 0.08790857508191481, "grad_norm": 1.3433909559602095, "learning_rate": 1.7539936102236422e-05, "loss": 0.8726882934570312, "step": 550 }, { "epoch": 0.08806840885479102, "grad_norm": 1.2158963844607527, "learning_rate": 1.757188498402556e-05, "loss": 0.6428338289260864, "step": 551 }, { "epoch": 0.08822824262766722, "grad_norm": 1.4916219726297553, "learning_rate": 1.76038338658147e-05, "loss": 0.7753780484199524, "step": 552 }, { "epoch": 0.08838807640054344, "grad_norm": 1.7208975939936595, "learning_rate": 1.7635782747603835e-05, "loss": 0.847137987613678, "step": 553 }, { "epoch": 0.08854791017341965, "grad_norm": 1.4756271158829053, "learning_rate": 1.7667731629392973e-05, "loss": 0.7530514001846313, "step": 554 }, { "epoch": 0.08870774394629585, "grad_norm": 1.519771693014792, "learning_rate": 1.7699680511182108e-05, "loss": 0.7698676586151123, "step": 555 }, { "epoch": 0.08886757771917206, "grad_norm": 1.3876849231395423, "learning_rate": 1.7731629392971247e-05, "loss": 0.7801159620285034, "step": 556 }, { "epoch": 0.08902741149204826, "grad_norm": 1.7670054733466538, "learning_rate": 1.7763578274760385e-05, "loss": 0.8090691566467285, "step": 557 }, { "epoch": 0.08918724526492448, "grad_norm": 1.4077098342605472, "learning_rate": 1.779552715654952e-05, "loss": 0.6844626069068909, "step": 558 }, { "epoch": 0.08934707903780069, "grad_norm": 1.3104742364120174, "learning_rate": 1.782747603833866e-05, "loss": 0.8210827112197876, "step": 559 }, { "epoch": 0.0895069128106769, "grad_norm": 1.336974906683419, "learning_rate": 1.7859424920127797e-05, "loss": 0.6667079329490662, "step": 560 }, { "epoch": 0.0896667465835531, "grad_norm": 1.4883452806161557, "learning_rate": 1.7891373801916932e-05, "loss": 0.8466023206710815, "step": 561 }, { "epoch": 0.08982658035642932, "grad_norm": 1.45738897327972, "learning_rate": 1.7923322683706074e-05, "loss": 0.8312264084815979, "step": 562 }, { "epoch": 0.08998641412930553, "grad_norm": 1.3906083553679176, "learning_rate": 1.795527156549521e-05, "loss": 0.7500077486038208, "step": 563 }, { "epoch": 0.09014624790218173, "grad_norm": 1.5945573831471203, "learning_rate": 1.7987220447284344e-05, "loss": 0.705231785774231, "step": 564 }, { "epoch": 0.09030608167505794, "grad_norm": 1.4880334264255712, "learning_rate": 1.8019169329073486e-05, "loss": 0.850125789642334, "step": 565 }, { "epoch": 0.09046591544793414, "grad_norm": 1.2656677184693932, "learning_rate": 1.805111821086262e-05, "loss": 0.6714374423027039, "step": 566 }, { "epoch": 0.09062574922081036, "grad_norm": 1.516151926463915, "learning_rate": 1.808306709265176e-05, "loss": 0.6789853572845459, "step": 567 }, { "epoch": 0.09078558299368657, "grad_norm": 1.5390201920065187, "learning_rate": 1.81150159744409e-05, "loss": 0.7723639011383057, "step": 568 }, { "epoch": 0.09094541676656277, "grad_norm": 1.3028521581095809, "learning_rate": 1.8146964856230033e-05, "loss": 0.6508913040161133, "step": 569 }, { "epoch": 0.09110525053943898, "grad_norm": 1.4970779462077175, "learning_rate": 1.8178913738019172e-05, "loss": 0.7604965567588806, "step": 570 }, { "epoch": 0.0912650843123152, "grad_norm": 1.4905697246578358, "learning_rate": 1.8210862619808307e-05, "loss": 0.7112371325492859, "step": 571 }, { "epoch": 0.0914249180851914, "grad_norm": 1.3697421449247156, "learning_rate": 1.8242811501597446e-05, "loss": 0.7086530923843384, "step": 572 }, { "epoch": 0.09158475185806761, "grad_norm": 1.5488904807283992, "learning_rate": 1.8274760383386584e-05, "loss": 0.7770938873291016, "step": 573 }, { "epoch": 0.09174458563094381, "grad_norm": 1.640556280477446, "learning_rate": 1.830670926517572e-05, "loss": 0.8871116638183594, "step": 574 }, { "epoch": 0.09190441940382003, "grad_norm": 1.5715602961618689, "learning_rate": 1.8338658146964858e-05, "loss": 0.8133755326271057, "step": 575 }, { "epoch": 0.09206425317669624, "grad_norm": 1.3404326946133842, "learning_rate": 1.8370607028753996e-05, "loss": 0.6994065642356873, "step": 576 }, { "epoch": 0.09222408694957245, "grad_norm": 1.2271528393683497, "learning_rate": 1.840255591054313e-05, "loss": 0.8351587057113647, "step": 577 }, { "epoch": 0.09238392072244865, "grad_norm": 1.7114232943442962, "learning_rate": 1.843450479233227e-05, "loss": 1.0070974826812744, "step": 578 }, { "epoch": 0.09254375449532486, "grad_norm": 1.5041438930324251, "learning_rate": 1.8466453674121408e-05, "loss": 0.7577608227729797, "step": 579 }, { "epoch": 0.09270358826820108, "grad_norm": 1.5792492570101138, "learning_rate": 1.8498402555910543e-05, "loss": 0.8609957695007324, "step": 580 }, { "epoch": 0.09286342204107728, "grad_norm": 1.5386468015262316, "learning_rate": 1.8530351437699682e-05, "loss": 0.9048320055007935, "step": 581 }, { "epoch": 0.09302325581395349, "grad_norm": 1.406442240226185, "learning_rate": 1.856230031948882e-05, "loss": 0.8067824840545654, "step": 582 }, { "epoch": 0.09318308958682969, "grad_norm": 2.6052618034226542, "learning_rate": 1.8594249201277955e-05, "loss": 0.7674081325531006, "step": 583 }, { "epoch": 0.09334292335970591, "grad_norm": 1.2863933142985953, "learning_rate": 1.8626198083067094e-05, "loss": 0.8196754455566406, "step": 584 }, { "epoch": 0.09350275713258212, "grad_norm": 1.4366204375164262, "learning_rate": 1.8658146964856232e-05, "loss": 0.809384822845459, "step": 585 }, { "epoch": 0.09366259090545832, "grad_norm": 1.2110870087295071, "learning_rate": 1.8690095846645367e-05, "loss": 0.7116761803627014, "step": 586 }, { "epoch": 0.09382242467833453, "grad_norm": 1.7971110367284788, "learning_rate": 1.8722044728434506e-05, "loss": 0.7821942567825317, "step": 587 }, { "epoch": 0.09398225845121073, "grad_norm": 1.337876090866723, "learning_rate": 1.8753993610223644e-05, "loss": 0.8136622905731201, "step": 588 }, { "epoch": 0.09414209222408695, "grad_norm": 1.3319757529139085, "learning_rate": 1.878594249201278e-05, "loss": 0.771775484085083, "step": 589 }, { "epoch": 0.09430192599696316, "grad_norm": 1.75406010670273, "learning_rate": 1.8817891373801918e-05, "loss": 0.8561886548995972, "step": 590 }, { "epoch": 0.09446175976983937, "grad_norm": 1.2935917676493496, "learning_rate": 1.8849840255591057e-05, "loss": 0.6031676530838013, "step": 591 }, { "epoch": 0.09462159354271557, "grad_norm": 1.3922644157535065, "learning_rate": 1.888178913738019e-05, "loss": 0.7033494710922241, "step": 592 }, { "epoch": 0.09478142731559179, "grad_norm": 1.3581716203107974, "learning_rate": 1.891373801916933e-05, "loss": 0.6879885196685791, "step": 593 }, { "epoch": 0.094941261088468, "grad_norm": 1.5984468851666849, "learning_rate": 1.894568690095847e-05, "loss": 0.7527498006820679, "step": 594 }, { "epoch": 0.0951010948613442, "grad_norm": 1.7012523451596688, "learning_rate": 1.8977635782747604e-05, "loss": 0.7229347229003906, "step": 595 }, { "epoch": 0.09526092863422041, "grad_norm": 1.5690348327140906, "learning_rate": 1.9009584664536742e-05, "loss": 0.7734091877937317, "step": 596 }, { "epoch": 0.09542076240709661, "grad_norm": 1.7493282893809663, "learning_rate": 1.904153354632588e-05, "loss": 0.8460515737533569, "step": 597 }, { "epoch": 0.09558059617997283, "grad_norm": 1.5390150772687727, "learning_rate": 1.907348242811502e-05, "loss": 0.7697834968566895, "step": 598 }, { "epoch": 0.09574042995284904, "grad_norm": 1.5736366590466921, "learning_rate": 1.9105431309904154e-05, "loss": 0.8942311406135559, "step": 599 }, { "epoch": 0.09590026372572524, "grad_norm": 1.6080268284316228, "learning_rate": 1.9137380191693293e-05, "loss": 0.7691764235496521, "step": 600 }, { "epoch": 0.09606009749860145, "grad_norm": 1.532937732748804, "learning_rate": 1.916932907348243e-05, "loss": 0.7943923473358154, "step": 601 }, { "epoch": 0.09621993127147767, "grad_norm": 1.6686475475996732, "learning_rate": 1.9201277955271566e-05, "loss": 0.7093757390975952, "step": 602 }, { "epoch": 0.09637976504435387, "grad_norm": 1.7107173518028753, "learning_rate": 1.9233226837060705e-05, "loss": 0.8859753012657166, "step": 603 }, { "epoch": 0.09653959881723008, "grad_norm": 1.7061657031784512, "learning_rate": 1.9265175718849843e-05, "loss": 0.8145350217819214, "step": 604 }, { "epoch": 0.09669943259010629, "grad_norm": 1.4196716060563062, "learning_rate": 1.929712460063898e-05, "loss": 0.7225016355514526, "step": 605 }, { "epoch": 0.0968592663629825, "grad_norm": 1.4308393017133094, "learning_rate": 1.9329073482428117e-05, "loss": 0.799528181552887, "step": 606 }, { "epoch": 0.09701910013585871, "grad_norm": 1.4837440296187339, "learning_rate": 1.9361022364217256e-05, "loss": 0.6587074995040894, "step": 607 }, { "epoch": 0.09717893390873492, "grad_norm": 1.4657951289961806, "learning_rate": 1.939297124600639e-05, "loss": 0.691370964050293, "step": 608 }, { "epoch": 0.09733876768161112, "grad_norm": 1.2884168050028788, "learning_rate": 1.942492012779553e-05, "loss": 0.720697283744812, "step": 609 }, { "epoch": 0.09749860145448733, "grad_norm": 1.5337057247682475, "learning_rate": 1.9456869009584668e-05, "loss": 0.8258018493652344, "step": 610 }, { "epoch": 0.09765843522736355, "grad_norm": 1.5608629043792914, "learning_rate": 1.9488817891373803e-05, "loss": 0.5412628054618835, "step": 611 }, { "epoch": 0.09781826900023975, "grad_norm": 1.6016882251676439, "learning_rate": 1.952076677316294e-05, "loss": 0.8545553684234619, "step": 612 }, { "epoch": 0.09797810277311596, "grad_norm": 1.301380235836662, "learning_rate": 1.955271565495208e-05, "loss": 0.6566832065582275, "step": 613 }, { "epoch": 0.09813793654599216, "grad_norm": 1.3581994900828898, "learning_rate": 1.9584664536741215e-05, "loss": 0.6036455631256104, "step": 614 }, { "epoch": 0.09829777031886838, "grad_norm": 1.4157092517924772, "learning_rate": 1.9616613418530353e-05, "loss": 0.7263977527618408, "step": 615 }, { "epoch": 0.09845760409174459, "grad_norm": 1.594280998660133, "learning_rate": 1.9648562300319492e-05, "loss": 0.7667392492294312, "step": 616 }, { "epoch": 0.0986174378646208, "grad_norm": 1.3674278087027065, "learning_rate": 1.9680511182108627e-05, "loss": 0.7357811331748962, "step": 617 }, { "epoch": 0.098777271637497, "grad_norm": 1.463062164780302, "learning_rate": 1.9712460063897765e-05, "loss": 0.7066216468811035, "step": 618 }, { "epoch": 0.0989371054103732, "grad_norm": 1.6209217698499585, "learning_rate": 1.97444089456869e-05, "loss": 0.7781126499176025, "step": 619 }, { "epoch": 0.09909693918324942, "grad_norm": 1.5651276068574795, "learning_rate": 1.977635782747604e-05, "loss": 0.7274935245513916, "step": 620 }, { "epoch": 0.09925677295612563, "grad_norm": 1.2811464167057713, "learning_rate": 1.9808306709265177e-05, "loss": 0.5940160751342773, "step": 621 }, { "epoch": 0.09941660672900184, "grad_norm": 1.6834984047510695, "learning_rate": 1.9840255591054313e-05, "loss": 0.7388969659805298, "step": 622 }, { "epoch": 0.09957644050187804, "grad_norm": 1.1372107159996372, "learning_rate": 1.987220447284345e-05, "loss": 0.679617166519165, "step": 623 }, { "epoch": 0.09973627427475426, "grad_norm": 1.5299899364838703, "learning_rate": 1.990415335463259e-05, "loss": 0.7251070141792297, "step": 624 }, { "epoch": 0.09989610804763047, "grad_norm": 1.4490416313155432, "learning_rate": 1.9936102236421725e-05, "loss": 0.7098166942596436, "step": 625 }, { "epoch": 0.10005594182050667, "grad_norm": 1.5774377800002015, "learning_rate": 1.9968051118210867e-05, "loss": 0.8220812082290649, "step": 626 }, { "epoch": 0.10021577559338288, "grad_norm": 1.3355639207287495, "learning_rate": 2e-05, "loss": 0.6839143633842468, "step": 627 }, { "epoch": 0.1003756093662591, "grad_norm": 1.4567193189059648, "learning_rate": 1.9999999650817754e-05, "loss": 0.7098559141159058, "step": 628 }, { "epoch": 0.1005354431391353, "grad_norm": 1.4965574933350694, "learning_rate": 1.9999998603271044e-05, "loss": 0.7492846250534058, "step": 629 }, { "epoch": 0.10069527691201151, "grad_norm": 1.494552928770556, "learning_rate": 1.9999996857359933e-05, "loss": 0.7745399475097656, "step": 630 }, { "epoch": 0.10085511068488771, "grad_norm": 1.3840858168302719, "learning_rate": 1.999999441308455e-05, "loss": 0.8643372058868408, "step": 631 }, { "epoch": 0.10101494445776392, "grad_norm": 1.4898145008078183, "learning_rate": 1.9999991270445064e-05, "loss": 0.6853726506233215, "step": 632 }, { "epoch": 0.10117477823064014, "grad_norm": 1.3557409346014468, "learning_rate": 1.99999874294417e-05, "loss": 0.6257847547531128, "step": 633 }, { "epoch": 0.10133461200351634, "grad_norm": 1.4806423955233905, "learning_rate": 1.9999982890074715e-05, "loss": 0.8977762460708618, "step": 634 }, { "epoch": 0.10149444577639255, "grad_norm": 1.4827112578890862, "learning_rate": 1.999997765234444e-05, "loss": 0.6285030841827393, "step": 635 }, { "epoch": 0.10165427954926876, "grad_norm": 1.347981294740808, "learning_rate": 1.999997171625123e-05, "loss": 0.7057784795761108, "step": 636 }, { "epoch": 0.10181411332214498, "grad_norm": 1.5947879824427154, "learning_rate": 1.99999650817955e-05, "loss": 0.8639155626296997, "step": 637 }, { "epoch": 0.10197394709502118, "grad_norm": 1.4596111733357988, "learning_rate": 1.9999957748977717e-05, "loss": 0.7214103937149048, "step": 638 }, { "epoch": 0.10213378086789739, "grad_norm": 1.6031274804049949, "learning_rate": 1.9999949717798395e-05, "loss": 0.7008181810379028, "step": 639 }, { "epoch": 0.10229361464077359, "grad_norm": 1.436839886933864, "learning_rate": 1.9999940988258087e-05, "loss": 0.9145289659500122, "step": 640 }, { "epoch": 0.1024534484136498, "grad_norm": 1.397141195612346, "learning_rate": 1.9999931560357414e-05, "loss": 0.7054450511932373, "step": 641 }, { "epoch": 0.10261328218652602, "grad_norm": 1.3422743874015248, "learning_rate": 1.9999921434097025e-05, "loss": 0.778377890586853, "step": 642 }, { "epoch": 0.10277311595940222, "grad_norm": 1.5745463963259547, "learning_rate": 1.999991060947763e-05, "loss": 0.5604584217071533, "step": 643 }, { "epoch": 0.10293294973227843, "grad_norm": 1.3972051874052067, "learning_rate": 1.999989908649999e-05, "loss": 0.785851001739502, "step": 644 }, { "epoch": 0.10309278350515463, "grad_norm": 1.3972132019575454, "learning_rate": 1.9999886865164897e-05, "loss": 0.803881049156189, "step": 645 }, { "epoch": 0.10325261727803085, "grad_norm": 1.3344824898092937, "learning_rate": 1.9999873945473217e-05, "loss": 0.8854486346244812, "step": 646 }, { "epoch": 0.10341245105090706, "grad_norm": 1.598510784653645, "learning_rate": 1.9999860327425846e-05, "loss": 0.86016845703125, "step": 647 }, { "epoch": 0.10357228482378326, "grad_norm": 1.3290250098892573, "learning_rate": 1.9999846011023738e-05, "loss": 0.7498229146003723, "step": 648 }, { "epoch": 0.10373211859665947, "grad_norm": 1.207015379124579, "learning_rate": 1.9999830996267894e-05, "loss": 0.729159951210022, "step": 649 }, { "epoch": 0.10389195236953569, "grad_norm": 1.3009410354245314, "learning_rate": 1.9999815283159356e-05, "loss": 0.8211929202079773, "step": 650 }, { "epoch": 0.1040517861424119, "grad_norm": 1.4102535707092627, "learning_rate": 1.9999798871699227e-05, "loss": 0.6974851489067078, "step": 651 }, { "epoch": 0.1042116199152881, "grad_norm": 1.4441578186035322, "learning_rate": 1.9999781761888656e-05, "loss": 0.7162270545959473, "step": 652 }, { "epoch": 0.1043714536881643, "grad_norm": 1.5153393283157088, "learning_rate": 1.999976395372883e-05, "loss": 0.6694966554641724, "step": 653 }, { "epoch": 0.10453128746104051, "grad_norm": 1.4054449661296442, "learning_rate": 1.9999745447220996e-05, "loss": 0.7492227554321289, "step": 654 }, { "epoch": 0.10469112123391673, "grad_norm": 1.3418587314113692, "learning_rate": 1.9999726242366445e-05, "loss": 0.6083633303642273, "step": 655 }, { "epoch": 0.10485095500679294, "grad_norm": 1.6007082422722518, "learning_rate": 1.999970633916652e-05, "loss": 0.7736701965332031, "step": 656 }, { "epoch": 0.10501078877966914, "grad_norm": 1.3880584893383636, "learning_rate": 1.9999685737622613e-05, "loss": 0.7855695486068726, "step": 657 }, { "epoch": 0.10517062255254535, "grad_norm": 1.4748840529948737, "learning_rate": 1.999966443773616e-05, "loss": 0.7605036497116089, "step": 658 }, { "epoch": 0.10533045632542157, "grad_norm": 1.5037091334362858, "learning_rate": 1.9999642439508648e-05, "loss": 0.7304930686950684, "step": 659 }, { "epoch": 0.10549029009829777, "grad_norm": 1.4325988638550253, "learning_rate": 1.9999619742941617e-05, "loss": 0.6525495648384094, "step": 660 }, { "epoch": 0.10565012387117398, "grad_norm": 1.515310644649115, "learning_rate": 1.9999596348036646e-05, "loss": 0.7739183902740479, "step": 661 }, { "epoch": 0.10580995764405018, "grad_norm": 1.6266998291508052, "learning_rate": 1.9999572254795375e-05, "loss": 0.7643071413040161, "step": 662 }, { "epoch": 0.10596979141692639, "grad_norm": 1.6298003285797567, "learning_rate": 1.9999547463219482e-05, "loss": 0.7631039023399353, "step": 663 }, { "epoch": 0.10612962518980261, "grad_norm": 1.557243069389112, "learning_rate": 1.9999521973310704e-05, "loss": 0.7136729955673218, "step": 664 }, { "epoch": 0.10628945896267881, "grad_norm": 1.546519049669395, "learning_rate": 1.999949578507081e-05, "loss": 0.7530505657196045, "step": 665 }, { "epoch": 0.10644929273555502, "grad_norm": 1.6263556319253982, "learning_rate": 1.999946889850164e-05, "loss": 0.7270312905311584, "step": 666 }, { "epoch": 0.10660912650843123, "grad_norm": 1.678192160451896, "learning_rate": 1.9999441313605068e-05, "loss": 0.7296005487442017, "step": 667 }, { "epoch": 0.10676896028130745, "grad_norm": 1.7304701626526047, "learning_rate": 1.999941303038302e-05, "loss": 0.7601582407951355, "step": 668 }, { "epoch": 0.10692879405418365, "grad_norm": 1.586061488776819, "learning_rate": 1.9999384048837473e-05, "loss": 0.8124114274978638, "step": 669 }, { "epoch": 0.10708862782705986, "grad_norm": 1.3855062745455151, "learning_rate": 1.9999354368970446e-05, "loss": 0.7714579701423645, "step": 670 }, { "epoch": 0.10724846159993606, "grad_norm": 1.3386439044941267, "learning_rate": 1.999932399078402e-05, "loss": 0.7381305694580078, "step": 671 }, { "epoch": 0.10740829537281228, "grad_norm": 1.1916536869163317, "learning_rate": 1.999929291428031e-05, "loss": 0.718634843826294, "step": 672 }, { "epoch": 0.10756812914568849, "grad_norm": 1.75334178124822, "learning_rate": 1.9999261139461485e-05, "loss": 0.9466282725334167, "step": 673 }, { "epoch": 0.1077279629185647, "grad_norm": 1.3640740133150147, "learning_rate": 1.9999228666329767e-05, "loss": 0.7460193634033203, "step": 674 }, { "epoch": 0.1078877966914409, "grad_norm": 1.314944884282478, "learning_rate": 1.999919549488743e-05, "loss": 0.7294358015060425, "step": 675 }, { "epoch": 0.1080476304643171, "grad_norm": 1.3707763771637473, "learning_rate": 1.999916162513678e-05, "loss": 0.6791901588439941, "step": 676 }, { "epoch": 0.10820746423719332, "grad_norm": 1.5038187439835178, "learning_rate": 1.9999127057080185e-05, "loss": 0.7615883946418762, "step": 677 }, { "epoch": 0.10836729801006953, "grad_norm": 1.4356710936692834, "learning_rate": 1.9999091790720064e-05, "loss": 0.9881159663200378, "step": 678 }, { "epoch": 0.10852713178294573, "grad_norm": 1.5369302508927127, "learning_rate": 1.9999055826058877e-05, "loss": 0.7533701658248901, "step": 679 }, { "epoch": 0.10868696555582194, "grad_norm": 1.7202094537885078, "learning_rate": 1.9999019163099133e-05, "loss": 0.8186965584754944, "step": 680 }, { "epoch": 0.10884679932869816, "grad_norm": 1.8588091821617305, "learning_rate": 1.9998981801843395e-05, "loss": 0.8035793900489807, "step": 681 }, { "epoch": 0.10900663310157437, "grad_norm": 1.2667354148925583, "learning_rate": 1.999894374229427e-05, "loss": 0.7888848185539246, "step": 682 }, { "epoch": 0.10916646687445057, "grad_norm": 1.4934992653275359, "learning_rate": 1.999890498445442e-05, "loss": 0.807166576385498, "step": 683 }, { "epoch": 0.10932630064732678, "grad_norm": 1.3595152109349384, "learning_rate": 1.999886552832655e-05, "loss": 0.8332852125167847, "step": 684 }, { "epoch": 0.10948613442020298, "grad_norm": 1.2973924229614227, "learning_rate": 1.999882537391342e-05, "loss": 0.7118600606918335, "step": 685 }, { "epoch": 0.1096459681930792, "grad_norm": 1.3525484478370187, "learning_rate": 1.999878452121782e-05, "loss": 0.6941431760787964, "step": 686 }, { "epoch": 0.10980580196595541, "grad_norm": 1.5521131217930169, "learning_rate": 1.9998742970242614e-05, "loss": 0.7715268135070801, "step": 687 }, { "epoch": 0.10996563573883161, "grad_norm": 1.4435327520582972, "learning_rate": 1.9998700720990706e-05, "loss": 0.7204523086547852, "step": 688 }, { "epoch": 0.11012546951170782, "grad_norm": 1.3223053895669863, "learning_rate": 1.9998657773465044e-05, "loss": 0.7676020860671997, "step": 689 }, { "epoch": 0.11028530328458404, "grad_norm": 1.3950490270505227, "learning_rate": 1.999861412766862e-05, "loss": 0.6631279587745667, "step": 690 }, { "epoch": 0.11044513705746024, "grad_norm": 1.3646711766786552, "learning_rate": 1.999856978360449e-05, "loss": 0.7049078941345215, "step": 691 }, { "epoch": 0.11060497083033645, "grad_norm": 1.322431521773146, "learning_rate": 1.9998524741275755e-05, "loss": 0.7928891181945801, "step": 692 }, { "epoch": 0.11076480460321265, "grad_norm": 1.4787695753803778, "learning_rate": 1.9998479000685545e-05, "loss": 0.8428578972816467, "step": 693 }, { "epoch": 0.11092463837608887, "grad_norm": 1.4049577507833984, "learning_rate": 1.9998432561837068e-05, "loss": 0.9533113837242126, "step": 694 }, { "epoch": 0.11108447214896508, "grad_norm": 1.290068197437625, "learning_rate": 1.9998385424733566e-05, "loss": 0.7669137716293335, "step": 695 }, { "epoch": 0.11124430592184129, "grad_norm": 1.275894656846982, "learning_rate": 1.9998337589378324e-05, "loss": 0.6965047121047974, "step": 696 }, { "epoch": 0.11140413969471749, "grad_norm": 1.4345406982737519, "learning_rate": 1.9998289055774688e-05, "loss": 0.7974309921264648, "step": 697 }, { "epoch": 0.1115639734675937, "grad_norm": 1.473451490581318, "learning_rate": 1.9998239823926045e-05, "loss": 0.7685157656669617, "step": 698 }, { "epoch": 0.11172380724046992, "grad_norm": 1.552861223927574, "learning_rate": 1.999818989383583e-05, "loss": 0.8135314583778381, "step": 699 }, { "epoch": 0.11188364101334612, "grad_norm": 1.2205440282211373, "learning_rate": 1.999813926550754e-05, "loss": 0.698004961013794, "step": 700 }, { "epoch": 0.11204347478622233, "grad_norm": 1.5317837342224954, "learning_rate": 1.9998087938944704e-05, "loss": 0.832384467124939, "step": 701 }, { "epoch": 0.11220330855909853, "grad_norm": 1.5753813936724936, "learning_rate": 1.9998035914150903e-05, "loss": 0.6781207323074341, "step": 702 }, { "epoch": 0.11236314233197475, "grad_norm": 1.6159701352458775, "learning_rate": 1.999798319112978e-05, "loss": 0.7589502334594727, "step": 703 }, { "epoch": 0.11252297610485096, "grad_norm": 1.4221060670984769, "learning_rate": 1.9997929769885007e-05, "loss": 0.7922561168670654, "step": 704 }, { "epoch": 0.11268280987772716, "grad_norm": 1.493630582454643, "learning_rate": 1.9997875650420322e-05, "loss": 0.6877001523971558, "step": 705 }, { "epoch": 0.11284264365060337, "grad_norm": 1.4320228652650717, "learning_rate": 1.9997820832739498e-05, "loss": 0.7138189077377319, "step": 706 }, { "epoch": 0.11300247742347957, "grad_norm": 1.6622686750484885, "learning_rate": 1.999776531684637e-05, "loss": 0.874489963054657, "step": 707 }, { "epoch": 0.1131623111963558, "grad_norm": 1.301329359038166, "learning_rate": 1.999770910274481e-05, "loss": 0.6729953289031982, "step": 708 }, { "epoch": 0.113322144969232, "grad_norm": 1.578571338133926, "learning_rate": 1.9997652190438748e-05, "loss": 0.8198636174201965, "step": 709 }, { "epoch": 0.1134819787421082, "grad_norm": 1.3629496266160952, "learning_rate": 1.9997594579932156e-05, "loss": 0.7178049087524414, "step": 710 }, { "epoch": 0.11364181251498441, "grad_norm": 1.5560369775909741, "learning_rate": 1.9997536271229055e-05, "loss": 0.8800842761993408, "step": 711 }, { "epoch": 0.11380164628786063, "grad_norm": 1.4622672730279045, "learning_rate": 1.999747726433352e-05, "loss": 0.7434945106506348, "step": 712 }, { "epoch": 0.11396148006073684, "grad_norm": 1.4308975084540068, "learning_rate": 1.9997417559249674e-05, "loss": 0.7598833441734314, "step": 713 }, { "epoch": 0.11412131383361304, "grad_norm": 1.3174048076665434, "learning_rate": 1.9997357155981685e-05, "loss": 0.8143091797828674, "step": 714 }, { "epoch": 0.11428114760648925, "grad_norm": 2.562652910439098, "learning_rate": 1.9997296054533768e-05, "loss": 0.8293554782867432, "step": 715 }, { "epoch": 0.11444098137936547, "grad_norm": 1.3029985669011404, "learning_rate": 1.9997234254910193e-05, "loss": 0.8376948237419128, "step": 716 }, { "epoch": 0.11460081515224167, "grad_norm": 1.5864399239467744, "learning_rate": 1.9997171757115273e-05, "loss": 0.7996543645858765, "step": 717 }, { "epoch": 0.11476064892511788, "grad_norm": 1.593986198142795, "learning_rate": 1.999710856115338e-05, "loss": 0.802361011505127, "step": 718 }, { "epoch": 0.11492048269799408, "grad_norm": 1.3694338218636002, "learning_rate": 1.999704466702892e-05, "loss": 0.7593950033187866, "step": 719 }, { "epoch": 0.11508031647087029, "grad_norm": 1.4040264265750129, "learning_rate": 1.999698007474636e-05, "loss": 0.6507510542869568, "step": 720 }, { "epoch": 0.11524015024374651, "grad_norm": 1.3253572414773755, "learning_rate": 1.99969147843102e-05, "loss": 0.7981061339378357, "step": 721 }, { "epoch": 0.11539998401662271, "grad_norm": 3.3545082347298583, "learning_rate": 1.9996848795725013e-05, "loss": 0.7638077735900879, "step": 722 }, { "epoch": 0.11555981778949892, "grad_norm": 1.2873943554955793, "learning_rate": 1.9996782108995406e-05, "loss": 0.6922284364700317, "step": 723 }, { "epoch": 0.11571965156237513, "grad_norm": 1.3713244473441302, "learning_rate": 1.999671472412603e-05, "loss": 0.8139147758483887, "step": 724 }, { "epoch": 0.11587948533525134, "grad_norm": 1.251544991584579, "learning_rate": 1.9996646641121593e-05, "loss": 0.7402356863021851, "step": 725 }, { "epoch": 0.11603931910812755, "grad_norm": 1.2933822840941116, "learning_rate": 1.999657785998685e-05, "loss": 0.6658774614334106, "step": 726 }, { "epoch": 0.11619915288100376, "grad_norm": 1.5015880703153797, "learning_rate": 1.9996508380726608e-05, "loss": 0.6885571479797363, "step": 727 }, { "epoch": 0.11635898665387996, "grad_norm": 1.3853420788559907, "learning_rate": 1.999643820334571e-05, "loss": 0.8202059268951416, "step": 728 }, { "epoch": 0.11651882042675617, "grad_norm": 1.5558200256869792, "learning_rate": 1.999636732784907e-05, "loss": 0.7568144798278809, "step": 729 }, { "epoch": 0.11667865419963239, "grad_norm": 1.6247903673427564, "learning_rate": 1.9996295754241623e-05, "loss": 0.6452083587646484, "step": 730 }, { "epoch": 0.11683848797250859, "grad_norm": 1.5423882179541295, "learning_rate": 1.9996223482528378e-05, "loss": 0.8173599243164062, "step": 731 }, { "epoch": 0.1169983217453848, "grad_norm": 1.3952910663460865, "learning_rate": 1.9996150512714383e-05, "loss": 0.6236687302589417, "step": 732 }, { "epoch": 0.117158155518261, "grad_norm": 1.2000044109273, "learning_rate": 1.9996076844804726e-05, "loss": 0.6606029272079468, "step": 733 }, { "epoch": 0.11731798929113722, "grad_norm": 1.4908786504678813, "learning_rate": 1.9996002478804555e-05, "loss": 0.7458797693252563, "step": 734 }, { "epoch": 0.11747782306401343, "grad_norm": 1.4454457976254882, "learning_rate": 1.9995927414719067e-05, "loss": 0.6776801347732544, "step": 735 }, { "epoch": 0.11763765683688963, "grad_norm": 1.5300129675145215, "learning_rate": 1.99958516525535e-05, "loss": 0.8561967611312866, "step": 736 }, { "epoch": 0.11779749060976584, "grad_norm": 1.4173552203260131, "learning_rate": 1.9995775192313146e-05, "loss": 0.5846202373504639, "step": 737 }, { "epoch": 0.11795732438264205, "grad_norm": 1.3979387978200897, "learning_rate": 1.9995698034003345e-05, "loss": 0.5969233512878418, "step": 738 }, { "epoch": 0.11811715815551826, "grad_norm": 1.6602290440051615, "learning_rate": 1.999562017762949e-05, "loss": 0.791445255279541, "step": 739 }, { "epoch": 0.11827699192839447, "grad_norm": 1.6287211047063639, "learning_rate": 1.9995541623197007e-05, "loss": 0.686720609664917, "step": 740 }, { "epoch": 0.11843682570127068, "grad_norm": 1.4085243920576533, "learning_rate": 1.9995462370711394e-05, "loss": 0.6358303427696228, "step": 741 }, { "epoch": 0.11859665947414688, "grad_norm": 1.4890199988246242, "learning_rate": 1.999538242017818e-05, "loss": 0.6671973466873169, "step": 742 }, { "epoch": 0.1187564932470231, "grad_norm": 1.3131405077766432, "learning_rate": 1.9995301771602946e-05, "loss": 0.6473855972290039, "step": 743 }, { "epoch": 0.1189163270198993, "grad_norm": 1.4252254015531243, "learning_rate": 1.999522042499133e-05, "loss": 0.7837209701538086, "step": 744 }, { "epoch": 0.11907616079277551, "grad_norm": 1.456195962110472, "learning_rate": 1.999513838034901e-05, "loss": 0.8117954730987549, "step": 745 }, { "epoch": 0.11923599456565172, "grad_norm": 1.4624383025576997, "learning_rate": 1.9995055637681715e-05, "loss": 0.6887726783752441, "step": 746 }, { "epoch": 0.11939582833852794, "grad_norm": 1.3644455514063325, "learning_rate": 1.9994972196995223e-05, "loss": 0.7273027896881104, "step": 747 }, { "epoch": 0.11955566211140414, "grad_norm": 1.2354160517375818, "learning_rate": 1.9994888058295367e-05, "loss": 0.709640383720398, "step": 748 }, { "epoch": 0.11971549588428035, "grad_norm": 1.296199372521716, "learning_rate": 1.9994803221588012e-05, "loss": 0.8129841685295105, "step": 749 }, { "epoch": 0.11987532965715655, "grad_norm": 1.3084251672422877, "learning_rate": 1.999471768687909e-05, "loss": 0.7708166837692261, "step": 750 }, { "epoch": 0.12003516343003276, "grad_norm": 1.343136264498384, "learning_rate": 1.999463145417458e-05, "loss": 0.5883492231369019, "step": 751 }, { "epoch": 0.12019499720290898, "grad_norm": 3.086521522101128, "learning_rate": 1.999454452348049e-05, "loss": 0.7705204486846924, "step": 752 }, { "epoch": 0.12035483097578518, "grad_norm": 1.4279052592720383, "learning_rate": 1.9994456894802905e-05, "loss": 0.7730055451393127, "step": 753 }, { "epoch": 0.12051466474866139, "grad_norm": 1.4406602407332787, "learning_rate": 1.9994368568147934e-05, "loss": 0.8094367980957031, "step": 754 }, { "epoch": 0.1206744985215376, "grad_norm": 1.3594160791774532, "learning_rate": 1.9994279543521752e-05, "loss": 0.7939951419830322, "step": 755 }, { "epoch": 0.12083433229441382, "grad_norm": 1.5103698267499288, "learning_rate": 1.9994189820930572e-05, "loss": 0.8104475736618042, "step": 756 }, { "epoch": 0.12099416606729002, "grad_norm": 1.2989703352810038, "learning_rate": 1.9994099400380666e-05, "loss": 0.7015718817710876, "step": 757 }, { "epoch": 0.12115399984016623, "grad_norm": 1.3588807601219495, "learning_rate": 1.9994008281878342e-05, "loss": 0.6815011501312256, "step": 758 }, { "epoch": 0.12131383361304243, "grad_norm": 1.6636369010812362, "learning_rate": 1.9993916465429963e-05, "loss": 0.7900819778442383, "step": 759 }, { "epoch": 0.12147366738591864, "grad_norm": 1.583393568419869, "learning_rate": 1.9993823951041947e-05, "loss": 0.7775341868400574, "step": 760 }, { "epoch": 0.12163350115879486, "grad_norm": 1.3434209687150553, "learning_rate": 1.999373073872075e-05, "loss": 0.695499062538147, "step": 761 }, { "epoch": 0.12179333493167106, "grad_norm": 1.4938747872507019, "learning_rate": 1.9993636828472886e-05, "loss": 0.7884455919265747, "step": 762 }, { "epoch": 0.12195316870454727, "grad_norm": 1.4969269613130165, "learning_rate": 1.999354222030491e-05, "loss": 0.7110012769699097, "step": 763 }, { "epoch": 0.12211300247742347, "grad_norm": 1.5698219108445797, "learning_rate": 1.9993446914223432e-05, "loss": 0.7194823026657104, "step": 764 }, { "epoch": 0.1222728362502997, "grad_norm": 1.3479655886834037, "learning_rate": 1.99933509102351e-05, "loss": 0.6567502021789551, "step": 765 }, { "epoch": 0.1224326700231759, "grad_norm": 1.314662429689603, "learning_rate": 1.999325420834663e-05, "loss": 0.6682953238487244, "step": 766 }, { "epoch": 0.1225925037960521, "grad_norm": 1.3612265444763214, "learning_rate": 1.999315680856477e-05, "loss": 0.6191434264183044, "step": 767 }, { "epoch": 0.12275233756892831, "grad_norm": 1.3586313481804502, "learning_rate": 1.999305871089632e-05, "loss": 0.6948822736740112, "step": 768 }, { "epoch": 0.12291217134180453, "grad_norm": 1.3251596027374912, "learning_rate": 1.999295991534813e-05, "loss": 0.797822117805481, "step": 769 }, { "epoch": 0.12307200511468074, "grad_norm": 1.3861544233596654, "learning_rate": 1.9992860421927105e-05, "loss": 0.7153537273406982, "step": 770 }, { "epoch": 0.12323183888755694, "grad_norm": 1.3715012275030127, "learning_rate": 1.999276023064019e-05, "loss": 0.7628278136253357, "step": 771 }, { "epoch": 0.12339167266043315, "grad_norm": 1.2993806249383313, "learning_rate": 1.9992659341494384e-05, "loss": 0.6915866136550903, "step": 772 }, { "epoch": 0.12355150643330935, "grad_norm": 1.5104671752761194, "learning_rate": 1.999255775449673e-05, "loss": 0.8087785243988037, "step": 773 }, { "epoch": 0.12371134020618557, "grad_norm": 1.3997955534865878, "learning_rate": 1.999245546965432e-05, "loss": 0.6837514042854309, "step": 774 }, { "epoch": 0.12387117397906178, "grad_norm": 1.549410685107072, "learning_rate": 1.9992352486974306e-05, "loss": 0.5994603633880615, "step": 775 }, { "epoch": 0.12403100775193798, "grad_norm": 1.2260153890451526, "learning_rate": 1.999224880646387e-05, "loss": 0.5979177355766296, "step": 776 }, { "epoch": 0.12419084152481419, "grad_norm": 1.3211731900661143, "learning_rate": 1.9992144428130257e-05, "loss": 0.7793248891830444, "step": 777 }, { "epoch": 0.12435067529769041, "grad_norm": 1.4093771565696185, "learning_rate": 1.999203935198076e-05, "loss": 0.7097541689872742, "step": 778 }, { "epoch": 0.12451050907056661, "grad_norm": 1.4697753730701955, "learning_rate": 1.9991933578022714e-05, "loss": 0.8328258991241455, "step": 779 }, { "epoch": 0.12467034284344282, "grad_norm": 1.6176516728490122, "learning_rate": 1.9991827106263506e-05, "loss": 0.7757339477539062, "step": 780 }, { "epoch": 0.12483017661631902, "grad_norm": 1.6779962046337424, "learning_rate": 1.999171993671057e-05, "loss": 0.8943396806716919, "step": 781 }, { "epoch": 0.12499001038919523, "grad_norm": 1.4866385302379304, "learning_rate": 1.9991612069371395e-05, "loss": 0.7564566135406494, "step": 782 }, { "epoch": 0.12514984416207145, "grad_norm": 1.5368204152990008, "learning_rate": 1.9991503504253508e-05, "loss": 0.7480037808418274, "step": 783 }, { "epoch": 0.12530967793494766, "grad_norm": 1.6852182315123174, "learning_rate": 1.9991394241364495e-05, "loss": 0.760720431804657, "step": 784 }, { "epoch": 0.12546951170782386, "grad_norm": 1.4144300572430817, "learning_rate": 1.9991284280711983e-05, "loss": 0.7093279957771301, "step": 785 }, { "epoch": 0.12562934548070007, "grad_norm": 1.664356157087571, "learning_rate": 1.9991173622303653e-05, "loss": 0.8570642471313477, "step": 786 }, { "epoch": 0.12578917925357627, "grad_norm": 1.3558957609212012, "learning_rate": 1.9991062266147237e-05, "loss": 0.6494729518890381, "step": 787 }, { "epoch": 0.12594901302645248, "grad_norm": 1.3913306443382885, "learning_rate": 1.9990950212250507e-05, "loss": 0.6257604360580444, "step": 788 }, { "epoch": 0.1261088467993287, "grad_norm": 1.4700733231427066, "learning_rate": 1.9990837460621287e-05, "loss": 0.7150046229362488, "step": 789 }, { "epoch": 0.12626868057220492, "grad_norm": 2.0082754588488596, "learning_rate": 1.9990724011267456e-05, "loss": 0.7996202707290649, "step": 790 }, { "epoch": 0.12642851434508112, "grad_norm": 1.4745799433787967, "learning_rate": 1.9990609864196938e-05, "loss": 0.7747999429702759, "step": 791 }, { "epoch": 0.12658834811795733, "grad_norm": 1.7346905025549897, "learning_rate": 1.9990495019417696e-05, "loss": 0.9858893156051636, "step": 792 }, { "epoch": 0.12674818189083353, "grad_norm": 1.4574537227380138, "learning_rate": 1.999037947693776e-05, "loss": 0.7675096988677979, "step": 793 }, { "epoch": 0.12690801566370974, "grad_norm": 1.2070336779810367, "learning_rate": 1.9990263236765193e-05, "loss": 0.630946159362793, "step": 794 }, { "epoch": 0.12706784943658594, "grad_norm": 1.6026203484674784, "learning_rate": 1.9990146298908114e-05, "loss": 0.798943042755127, "step": 795 }, { "epoch": 0.12722768320946215, "grad_norm": 1.7806975646927936, "learning_rate": 1.999002866337469e-05, "loss": 0.7412124872207642, "step": 796 }, { "epoch": 0.12738751698233836, "grad_norm": 1.329987653218611, "learning_rate": 1.9989910330173135e-05, "loss": 0.6848167181015015, "step": 797 }, { "epoch": 0.1275473507552146, "grad_norm": 1.5784450644207673, "learning_rate": 1.9989791299311714e-05, "loss": 0.716299295425415, "step": 798 }, { "epoch": 0.1277071845280908, "grad_norm": 1.3655651747844746, "learning_rate": 1.9989671570798742e-05, "loss": 0.7831393480300903, "step": 799 }, { "epoch": 0.127867018300967, "grad_norm": 1.3077867940344519, "learning_rate": 1.9989551144642577e-05, "loss": 0.7548964619636536, "step": 800 }, { "epoch": 0.1280268520738432, "grad_norm": 1.42969725664558, "learning_rate": 1.9989430020851635e-05, "loss": 0.6904217004776001, "step": 801 }, { "epoch": 0.1281866858467194, "grad_norm": 1.5079106201133397, "learning_rate": 1.9989308199434365e-05, "loss": 0.7701213359832764, "step": 802 }, { "epoch": 0.12834651961959562, "grad_norm": 1.4706003204000797, "learning_rate": 1.9989185680399283e-05, "loss": 0.8197448253631592, "step": 803 }, { "epoch": 0.12850635339247182, "grad_norm": 1.6121921657547587, "learning_rate": 1.9989062463754944e-05, "loss": 0.8286414742469788, "step": 804 }, { "epoch": 0.12866618716534803, "grad_norm": 1.3060930626425251, "learning_rate": 1.9988938549509946e-05, "loss": 0.7402040958404541, "step": 805 }, { "epoch": 0.12882602093822423, "grad_norm": 1.2054317414989002, "learning_rate": 1.998881393767295e-05, "loss": 0.6891919374465942, "step": 806 }, { "epoch": 0.12898585471110047, "grad_norm": 1.4517901308748598, "learning_rate": 1.9988688628252656e-05, "loss": 0.678276777267456, "step": 807 }, { "epoch": 0.12914568848397667, "grad_norm": 1.6443159781464198, "learning_rate": 1.9988562621257816e-05, "loss": 0.8682153820991516, "step": 808 }, { "epoch": 0.12930552225685288, "grad_norm": 1.1320584344906806, "learning_rate": 1.9988435916697232e-05, "loss": 0.6385586261749268, "step": 809 }, { "epoch": 0.12946535602972908, "grad_norm": 1.3175463186636651, "learning_rate": 1.9988308514579747e-05, "loss": 0.8323901891708374, "step": 810 }, { "epoch": 0.1296251898026053, "grad_norm": 1.4202938176979525, "learning_rate": 1.9988180414914266e-05, "loss": 0.8119633197784424, "step": 811 }, { "epoch": 0.1297850235754815, "grad_norm": 1.3972852662045623, "learning_rate": 1.9988051617709726e-05, "loss": 0.8807292580604553, "step": 812 }, { "epoch": 0.1299448573483577, "grad_norm": 1.4514071220928828, "learning_rate": 1.9987922122975126e-05, "loss": 0.7514749765396118, "step": 813 }, { "epoch": 0.1301046911212339, "grad_norm": 1.250754897232334, "learning_rate": 1.9987791930719512e-05, "loss": 0.7307324409484863, "step": 814 }, { "epoch": 0.1302645248941101, "grad_norm": 1.3287292658766683, "learning_rate": 1.9987661040951975e-05, "loss": 0.6497594118118286, "step": 815 }, { "epoch": 0.13042435866698635, "grad_norm": 1.372864483564374, "learning_rate": 1.998752945368165e-05, "loss": 0.7597640156745911, "step": 816 }, { "epoch": 0.13058419243986255, "grad_norm": 1.214238284232431, "learning_rate": 1.9987397168917732e-05, "loss": 0.661428689956665, "step": 817 }, { "epoch": 0.13074402621273876, "grad_norm": 1.6072428916416392, "learning_rate": 1.9987264186669462e-05, "loss": 0.6505078673362732, "step": 818 }, { "epoch": 0.13090385998561496, "grad_norm": 1.3950353751417714, "learning_rate": 1.998713050694612e-05, "loss": 0.7483452558517456, "step": 819 }, { "epoch": 0.13106369375849117, "grad_norm": 1.374407439284077, "learning_rate": 1.9986996129757048e-05, "loss": 0.7821881771087646, "step": 820 }, { "epoch": 0.13122352753136737, "grad_norm": 1.2602925367458273, "learning_rate": 1.998686105511163e-05, "loss": 0.7397878170013428, "step": 821 }, { "epoch": 0.13138336130424358, "grad_norm": 1.406918888396706, "learning_rate": 1.998672528301929e-05, "loss": 0.7440090179443359, "step": 822 }, { "epoch": 0.13154319507711978, "grad_norm": 1.4824295324492283, "learning_rate": 1.998658881348952e-05, "loss": 0.9503762722015381, "step": 823 }, { "epoch": 0.13170302884999602, "grad_norm": 1.408553266501561, "learning_rate": 1.9986451646531848e-05, "loss": 0.7895357608795166, "step": 824 }, { "epoch": 0.13186286262287222, "grad_norm": 1.1775462711725244, "learning_rate": 1.9986313782155852e-05, "loss": 0.6279281973838806, "step": 825 }, { "epoch": 0.13202269639574843, "grad_norm": 1.5193201534636243, "learning_rate": 1.998617522037116e-05, "loss": 0.666504979133606, "step": 826 }, { "epoch": 0.13218253016862463, "grad_norm": 1.611762225633308, "learning_rate": 1.998603596118745e-05, "loss": 0.7678489089012146, "step": 827 }, { "epoch": 0.13234236394150084, "grad_norm": 1.4189748585864337, "learning_rate": 1.9985896004614446e-05, "loss": 0.739505410194397, "step": 828 }, { "epoch": 0.13250219771437705, "grad_norm": 1.4980511893106532, "learning_rate": 1.998575535066192e-05, "loss": 0.7873313426971436, "step": 829 }, { "epoch": 0.13266203148725325, "grad_norm": 1.4762134140516143, "learning_rate": 1.9985613999339703e-05, "loss": 0.7374459505081177, "step": 830 }, { "epoch": 0.13282186526012946, "grad_norm": 1.3271403571808393, "learning_rate": 1.9985471950657658e-05, "loss": 0.6338688135147095, "step": 831 }, { "epoch": 0.13298169903300566, "grad_norm": 1.429953704311959, "learning_rate": 1.9985329204625707e-05, "loss": 0.8153502941131592, "step": 832 }, { "epoch": 0.1331415328058819, "grad_norm": 1.5364928067655927, "learning_rate": 1.998518576125382e-05, "loss": 0.7660870552062988, "step": 833 }, { "epoch": 0.1333013665787581, "grad_norm": 1.5817043808111153, "learning_rate": 1.9985041620552015e-05, "loss": 0.7916711568832397, "step": 834 }, { "epoch": 0.1334612003516343, "grad_norm": 1.4192101907093717, "learning_rate": 1.998489678253036e-05, "loss": 0.6593364477157593, "step": 835 }, { "epoch": 0.1336210341245105, "grad_norm": 1.3896437510719697, "learning_rate": 1.9984751247198964e-05, "loss": 0.7173914909362793, "step": 836 }, { "epoch": 0.13378086789738672, "grad_norm": 1.6474760776086803, "learning_rate": 1.9984605014567996e-05, "loss": 0.728963315486908, "step": 837 }, { "epoch": 0.13394070167026292, "grad_norm": 1.361891080326298, "learning_rate": 1.9984458084647665e-05, "loss": 0.720567524433136, "step": 838 }, { "epoch": 0.13410053544313913, "grad_norm": 1.4796880461443134, "learning_rate": 1.9984310457448232e-05, "loss": 0.7465842962265015, "step": 839 }, { "epoch": 0.13426036921601534, "grad_norm": 1.5326301486727583, "learning_rate": 1.9984162132980012e-05, "loss": 0.8140372037887573, "step": 840 }, { "epoch": 0.13442020298889154, "grad_norm": 1.2700185749454123, "learning_rate": 1.9984013111253362e-05, "loss": 0.628426194190979, "step": 841 }, { "epoch": 0.13458003676176777, "grad_norm": 1.4552757570162211, "learning_rate": 1.9983863392278684e-05, "loss": 0.7490667104721069, "step": 842 }, { "epoch": 0.13473987053464398, "grad_norm": 1.6402076659343874, "learning_rate": 1.9983712976066436e-05, "loss": 0.7847870588302612, "step": 843 }, { "epoch": 0.13489970430752019, "grad_norm": 1.3880461118546683, "learning_rate": 1.9983561862627128e-05, "loss": 0.6565139293670654, "step": 844 }, { "epoch": 0.1350595380803964, "grad_norm": 1.4254949378831563, "learning_rate": 1.9983410051971302e-05, "loss": 0.7220372557640076, "step": 845 }, { "epoch": 0.1352193718532726, "grad_norm": 1.4532670302799984, "learning_rate": 1.9983257544109572e-05, "loss": 0.6851153373718262, "step": 846 }, { "epoch": 0.1353792056261488, "grad_norm": 1.3962327599807878, "learning_rate": 1.998310433905258e-05, "loss": 0.5873013138771057, "step": 847 }, { "epoch": 0.135539039399025, "grad_norm": 1.3306405531706538, "learning_rate": 1.9982950436811033e-05, "loss": 0.7105544209480286, "step": 848 }, { "epoch": 0.1356988731719012, "grad_norm": 1.440142775857777, "learning_rate": 1.998279583739567e-05, "loss": 0.6831409931182861, "step": 849 }, { "epoch": 0.13585870694477742, "grad_norm": 1.3037794074795865, "learning_rate": 1.9982640540817296e-05, "loss": 0.6227332353591919, "step": 850 }, { "epoch": 0.13601854071765365, "grad_norm": 1.488276177442909, "learning_rate": 1.9982484547086753e-05, "loss": 0.7171109318733215, "step": 851 }, { "epoch": 0.13617837449052986, "grad_norm": 1.3878072673942827, "learning_rate": 1.998232785621493e-05, "loss": 0.7268008589744568, "step": 852 }, { "epoch": 0.13633820826340606, "grad_norm": 1.6230638363806307, "learning_rate": 1.9982170468212777e-05, "loss": 0.8320509195327759, "step": 853 }, { "epoch": 0.13649804203628227, "grad_norm": 1.3605122559421887, "learning_rate": 1.998201238309128e-05, "loss": 0.5246403217315674, "step": 854 }, { "epoch": 0.13665787580915847, "grad_norm": 1.4164974838911193, "learning_rate": 1.9981853600861484e-05, "loss": 0.7979347705841064, "step": 855 }, { "epoch": 0.13681770958203468, "grad_norm": 1.4061019087050088, "learning_rate": 1.998169412153448e-05, "loss": 0.7623511552810669, "step": 856 }, { "epoch": 0.13697754335491089, "grad_norm": 1.5023940447461404, "learning_rate": 1.9981533945121394e-05, "loss": 0.6425983905792236, "step": 857 }, { "epoch": 0.1371373771277871, "grad_norm": 1.4472273239783606, "learning_rate": 1.998137307163342e-05, "loss": 0.6175265312194824, "step": 858 }, { "epoch": 0.1372972109006633, "grad_norm": 1.5136673792435242, "learning_rate": 1.9981211501081796e-05, "loss": 0.694920539855957, "step": 859 }, { "epoch": 0.13745704467353953, "grad_norm": 1.3204168591407848, "learning_rate": 1.9981049233477803e-05, "loss": 0.6901209354400635, "step": 860 }, { "epoch": 0.13761687844641574, "grad_norm": 1.5204631922033292, "learning_rate": 1.9980886268832766e-05, "loss": 0.7521947622299194, "step": 861 }, { "epoch": 0.13777671221929194, "grad_norm": 1.3868728614615131, "learning_rate": 1.9980722607158075e-05, "loss": 0.7104480266571045, "step": 862 }, { "epoch": 0.13793654599216815, "grad_norm": 1.368351234124227, "learning_rate": 1.9980558248465157e-05, "loss": 0.7963050603866577, "step": 863 }, { "epoch": 0.13809637976504435, "grad_norm": 1.120636966641217, "learning_rate": 1.9980393192765488e-05, "loss": 0.6104671955108643, "step": 864 }, { "epoch": 0.13825621353792056, "grad_norm": 1.5809984712722065, "learning_rate": 1.99802274400706e-05, "loss": 0.8194656372070312, "step": 865 }, { "epoch": 0.13841604731079676, "grad_norm": 1.3509426670607643, "learning_rate": 1.998006099039206e-05, "loss": 0.7100414037704468, "step": 866 }, { "epoch": 0.13857588108367297, "grad_norm": 1.3715565463572017, "learning_rate": 1.9979893843741498e-05, "loss": 0.7166603803634644, "step": 867 }, { "epoch": 0.1387357148565492, "grad_norm": 1.2843837194942023, "learning_rate": 1.9979726000130588e-05, "loss": 0.7846702933311462, "step": 868 }, { "epoch": 0.1388955486294254, "grad_norm": 1.4192625331879574, "learning_rate": 1.9979557459571048e-05, "loss": 0.7989272475242615, "step": 869 }, { "epoch": 0.1390553824023016, "grad_norm": 1.3508658317725502, "learning_rate": 1.997938822207465e-05, "loss": 0.6970962285995483, "step": 870 }, { "epoch": 0.13921521617517782, "grad_norm": 1.3424870348300788, "learning_rate": 1.9979218287653215e-05, "loss": 0.7169700860977173, "step": 871 }, { "epoch": 0.13937504994805403, "grad_norm": 1.3318421620444405, "learning_rate": 1.997904765631861e-05, "loss": 0.6523847579956055, "step": 872 }, { "epoch": 0.13953488372093023, "grad_norm": 1.511973974467927, "learning_rate": 1.9978876328082742e-05, "loss": 0.6740889549255371, "step": 873 }, { "epoch": 0.13969471749380644, "grad_norm": 1.3024752254773933, "learning_rate": 1.997870430295759e-05, "loss": 0.7536131143569946, "step": 874 }, { "epoch": 0.13985455126668264, "grad_norm": 1.2624338581187038, "learning_rate": 1.9978531580955162e-05, "loss": 0.6185048222541809, "step": 875 }, { "epoch": 0.14001438503955885, "grad_norm": 1.4457215182396839, "learning_rate": 1.9978358162087517e-05, "loss": 0.774916410446167, "step": 876 }, { "epoch": 0.14017421881243508, "grad_norm": 1.265215064717787, "learning_rate": 1.997818404636677e-05, "loss": 0.7172949314117432, "step": 877 }, { "epoch": 0.1403340525853113, "grad_norm": 1.3780350528155254, "learning_rate": 1.997800923380508e-05, "loss": 0.7877898216247559, "step": 878 }, { "epoch": 0.1404938863581875, "grad_norm": 1.4348174986505393, "learning_rate": 1.9977833724414656e-05, "loss": 0.749297022819519, "step": 879 }, { "epoch": 0.1406537201310637, "grad_norm": 1.5747294326281343, "learning_rate": 1.997765751820775e-05, "loss": 0.7231800556182861, "step": 880 }, { "epoch": 0.1408135539039399, "grad_norm": 1.4132827639109797, "learning_rate": 1.9977480615196674e-05, "loss": 0.6846503019332886, "step": 881 }, { "epoch": 0.1409733876768161, "grad_norm": 1.3575544343186035, "learning_rate": 1.9977303015393778e-05, "loss": 0.8677273988723755, "step": 882 }, { "epoch": 0.14113322144969231, "grad_norm": 1.5516626550800414, "learning_rate": 1.9977124718811466e-05, "loss": 0.7058049440383911, "step": 883 }, { "epoch": 0.14129305522256852, "grad_norm": 1.5660140356261012, "learning_rate": 1.997694572546219e-05, "loss": 0.7429174184799194, "step": 884 }, { "epoch": 0.14145288899544473, "grad_norm": 1.242676692011116, "learning_rate": 1.997676603535845e-05, "loss": 0.7057037353515625, "step": 885 }, { "epoch": 0.14161272276832096, "grad_norm": 1.434172836038226, "learning_rate": 1.9976585648512795e-05, "loss": 0.8037622570991516, "step": 886 }, { "epoch": 0.14177255654119716, "grad_norm": 1.4566863286161398, "learning_rate": 1.9976404564937825e-05, "loss": 0.8654393553733826, "step": 887 }, { "epoch": 0.14193239031407337, "grad_norm": 1.2693396819999607, "learning_rate": 1.9976222784646183e-05, "loss": 0.8642077445983887, "step": 888 }, { "epoch": 0.14209222408694958, "grad_norm": 1.3278870057678116, "learning_rate": 1.9976040307650565e-05, "loss": 0.6091741323471069, "step": 889 }, { "epoch": 0.14225205785982578, "grad_norm": 1.7121104971101613, "learning_rate": 1.9975857133963714e-05, "loss": 0.7157796025276184, "step": 890 }, { "epoch": 0.142411891632702, "grad_norm": 1.3115190531016032, "learning_rate": 1.997567326359842e-05, "loss": 0.7872158885002136, "step": 891 }, { "epoch": 0.1425717254055782, "grad_norm": 1.2646376523735015, "learning_rate": 1.997548869656753e-05, "loss": 0.6791656613349915, "step": 892 }, { "epoch": 0.1427315591784544, "grad_norm": 1.6302210428691624, "learning_rate": 1.997530343288393e-05, "loss": 0.8641713857650757, "step": 893 }, { "epoch": 0.1428913929513306, "grad_norm": 1.5407938301563782, "learning_rate": 1.9975117472560556e-05, "loss": 0.8025457859039307, "step": 894 }, { "epoch": 0.14305122672420684, "grad_norm": 1.2585900301713049, "learning_rate": 1.99749308156104e-05, "loss": 0.6510149240493774, "step": 895 }, { "epoch": 0.14321106049708304, "grad_norm": 1.1152684807651005, "learning_rate": 1.997474346204649e-05, "loss": 0.6275606155395508, "step": 896 }, { "epoch": 0.14337089426995925, "grad_norm": 1.5287563758203289, "learning_rate": 1.9974555411881917e-05, "loss": 0.8721482753753662, "step": 897 }, { "epoch": 0.14353072804283545, "grad_norm": 1.139634722078462, "learning_rate": 1.9974366665129813e-05, "loss": 0.6199144721031189, "step": 898 }, { "epoch": 0.14369056181571166, "grad_norm": 1.3212025171999888, "learning_rate": 1.9974177221803356e-05, "loss": 0.7045670747756958, "step": 899 }, { "epoch": 0.14385039558858786, "grad_norm": 1.3857178220062722, "learning_rate": 1.9973987081915777e-05, "loss": 0.655850887298584, "step": 900 }, { "epoch": 0.14401022936146407, "grad_norm": 1.495969812025157, "learning_rate": 1.9973796245480357e-05, "loss": 0.700211763381958, "step": 901 }, { "epoch": 0.14417006313434028, "grad_norm": 1.535909117751359, "learning_rate": 1.997360471251042e-05, "loss": 0.7579134702682495, "step": 902 }, { "epoch": 0.14432989690721648, "grad_norm": 1.327487936632262, "learning_rate": 1.9973412483019346e-05, "loss": 0.7369130849838257, "step": 903 }, { "epoch": 0.14448973068009272, "grad_norm": 1.6623115124446655, "learning_rate": 1.9973219557020554e-05, "loss": 0.7552086114883423, "step": 904 }, { "epoch": 0.14464956445296892, "grad_norm": 1.473609831194242, "learning_rate": 1.9973025934527522e-05, "loss": 0.7121331691741943, "step": 905 }, { "epoch": 0.14480939822584513, "grad_norm": 1.1736653193434718, "learning_rate": 1.9972831615553775e-05, "loss": 0.6454912424087524, "step": 906 }, { "epoch": 0.14496923199872133, "grad_norm": 1.3416803706748248, "learning_rate": 1.9972636600112873e-05, "loss": 0.6892168521881104, "step": 907 }, { "epoch": 0.14512906577159754, "grad_norm": 1.478810406109494, "learning_rate": 1.9972440888218445e-05, "loss": 0.6550434827804565, "step": 908 }, { "epoch": 0.14528889954447374, "grad_norm": 1.170605939411479, "learning_rate": 1.9972244479884153e-05, "loss": 0.7010334730148315, "step": 909 }, { "epoch": 0.14544873331734995, "grad_norm": 1.2333109573319108, "learning_rate": 1.9972047375123716e-05, "loss": 0.5657981038093567, "step": 910 }, { "epoch": 0.14560856709022615, "grad_norm": 1.2822715045842685, "learning_rate": 1.99718495739509e-05, "loss": 0.6540981531143188, "step": 911 }, { "epoch": 0.14576840086310236, "grad_norm": 1.7653545695338053, "learning_rate": 1.9971651076379515e-05, "loss": 0.7908906936645508, "step": 912 }, { "epoch": 0.1459282346359786, "grad_norm": 1.4081093991039986, "learning_rate": 1.9971451882423427e-05, "loss": 0.8336319923400879, "step": 913 }, { "epoch": 0.1460880684088548, "grad_norm": 1.767556507305242, "learning_rate": 1.9971251992096544e-05, "loss": 0.7375800013542175, "step": 914 }, { "epoch": 0.146247902181731, "grad_norm": 1.3912407339443467, "learning_rate": 1.997105140541283e-05, "loss": 0.8045055866241455, "step": 915 }, { "epoch": 0.1464077359546072, "grad_norm": 1.3627402532765005, "learning_rate": 1.9970850122386292e-05, "loss": 0.7990760803222656, "step": 916 }, { "epoch": 0.14656756972748342, "grad_norm": 1.3748101646099269, "learning_rate": 1.9970648143030982e-05, "loss": 0.6585942506790161, "step": 917 }, { "epoch": 0.14672740350035962, "grad_norm": 1.281497700936161, "learning_rate": 1.997044546736101e-05, "loss": 0.6348997354507446, "step": 918 }, { "epoch": 0.14688723727323583, "grad_norm": 1.3729524087220657, "learning_rate": 1.9970242095390532e-05, "loss": 0.6184995174407959, "step": 919 }, { "epoch": 0.14704707104611203, "grad_norm": 1.350699455723126, "learning_rate": 1.9970038027133742e-05, "loss": 0.6805800795555115, "step": 920 }, { "epoch": 0.14720690481898827, "grad_norm": 1.4691086765504024, "learning_rate": 1.99698332626049e-05, "loss": 0.7265095710754395, "step": 921 }, { "epoch": 0.14736673859186447, "grad_norm": 1.4981976314800434, "learning_rate": 1.9969627801818308e-05, "loss": 0.7314579486846924, "step": 922 }, { "epoch": 0.14752657236474068, "grad_norm": 1.4778512019452754, "learning_rate": 1.9969421644788306e-05, "loss": 0.7119166851043701, "step": 923 }, { "epoch": 0.14768640613761688, "grad_norm": 1.6699946187267296, "learning_rate": 1.9969214791529294e-05, "loss": 0.8397559523582458, "step": 924 }, { "epoch": 0.1478462399104931, "grad_norm": 1.2686807881182545, "learning_rate": 1.996900724205572e-05, "loss": 0.7030672430992126, "step": 925 }, { "epoch": 0.1480060736833693, "grad_norm": 1.3375425491498822, "learning_rate": 1.996879899638208e-05, "loss": 0.7186651825904846, "step": 926 }, { "epoch": 0.1481659074562455, "grad_norm": 1.5508304238737547, "learning_rate": 1.9968590054522914e-05, "loss": 0.7324919104576111, "step": 927 }, { "epoch": 0.1483257412291217, "grad_norm": 1.596473723497921, "learning_rate": 1.9968380416492814e-05, "loss": 0.8984308242797852, "step": 928 }, { "epoch": 0.1484855750019979, "grad_norm": 1.6719722564544426, "learning_rate": 1.9968170082306423e-05, "loss": 0.8869888782501221, "step": 929 }, { "epoch": 0.14864540877487414, "grad_norm": 1.5081731380779901, "learning_rate": 1.996795905197842e-05, "loss": 0.6465568542480469, "step": 930 }, { "epoch": 0.14880524254775035, "grad_norm": 1.688574850774831, "learning_rate": 1.996774732552356e-05, "loss": 0.8253421783447266, "step": 931 }, { "epoch": 0.14896507632062655, "grad_norm": 1.3507573825099708, "learning_rate": 1.9967534902956618e-05, "loss": 0.6745047569274902, "step": 932 }, { "epoch": 0.14912491009350276, "grad_norm": 1.4049992402820721, "learning_rate": 1.996732178429243e-05, "loss": 0.7645952105522156, "step": 933 }, { "epoch": 0.14928474386637897, "grad_norm": 1.2140000309213352, "learning_rate": 1.9967107969545878e-05, "loss": 0.6390388011932373, "step": 934 }, { "epoch": 0.14944457763925517, "grad_norm": 1.4184590898335492, "learning_rate": 1.9966893458731897e-05, "loss": 0.7879503965377808, "step": 935 }, { "epoch": 0.14960441141213138, "grad_norm": 1.2809890995634239, "learning_rate": 1.9966678251865467e-05, "loss": 0.7547303438186646, "step": 936 }, { "epoch": 0.14976424518500758, "grad_norm": 1.361017693578852, "learning_rate": 1.9966462348961618e-05, "loss": 0.7092487812042236, "step": 937 }, { "epoch": 0.1499240789578838, "grad_norm": 1.2342423951995731, "learning_rate": 1.996624575003543e-05, "loss": 0.588369607925415, "step": 938 }, { "epoch": 0.15008391273076002, "grad_norm": 1.1158747098415516, "learning_rate": 1.9966028455102022e-05, "loss": 0.7102346420288086, "step": 939 }, { "epoch": 0.15024374650363623, "grad_norm": 1.2836019520063893, "learning_rate": 1.9965810464176574e-05, "loss": 0.6842697858810425, "step": 940 }, { "epoch": 0.15040358027651243, "grad_norm": 1.4779235528833483, "learning_rate": 1.996559177727431e-05, "loss": 0.6228888034820557, "step": 941 }, { "epoch": 0.15056341404938864, "grad_norm": 1.4403195141503082, "learning_rate": 1.9965372394410504e-05, "loss": 0.7086974382400513, "step": 942 }, { "epoch": 0.15072324782226484, "grad_norm": 1.164030080376093, "learning_rate": 1.996515231560047e-05, "loss": 0.6259913444519043, "step": 943 }, { "epoch": 0.15088308159514105, "grad_norm": 1.206017008759446, "learning_rate": 1.9964931540859587e-05, "loss": 0.6592695713043213, "step": 944 }, { "epoch": 0.15104291536801726, "grad_norm": 1.6601763220040684, "learning_rate": 1.9964710070203266e-05, "loss": 0.8538177013397217, "step": 945 }, { "epoch": 0.15120274914089346, "grad_norm": 1.5037704827135245, "learning_rate": 1.9964487903646975e-05, "loss": 0.7218484878540039, "step": 946 }, { "epoch": 0.15136258291376967, "grad_norm": 1.2615428241014708, "learning_rate": 1.996426504120623e-05, "loss": 0.5786716938018799, "step": 947 }, { "epoch": 0.1515224166866459, "grad_norm": 1.4093805125367636, "learning_rate": 1.99640414828966e-05, "loss": 0.6617209911346436, "step": 948 }, { "epoch": 0.1516822504595221, "grad_norm": 1.4545485268130283, "learning_rate": 1.996381722873369e-05, "loss": 0.7049237489700317, "step": 949 }, { "epoch": 0.1518420842323983, "grad_norm": 1.4134266756557696, "learning_rate": 1.996359227873316e-05, "loss": 0.7708187103271484, "step": 950 }, { "epoch": 0.15200191800527452, "grad_norm": 1.9805576768600943, "learning_rate": 1.996336663291073e-05, "loss": 0.7421470880508423, "step": 951 }, { "epoch": 0.15216175177815072, "grad_norm": 1.7327465381892342, "learning_rate": 1.9963140291282147e-05, "loss": 0.8278591632843018, "step": 952 }, { "epoch": 0.15232158555102693, "grad_norm": 1.2272260698640785, "learning_rate": 1.9962913253863222e-05, "loss": 0.6209260821342468, "step": 953 }, { "epoch": 0.15248141932390313, "grad_norm": 1.3861265473162634, "learning_rate": 1.9962685520669814e-05, "loss": 0.6611728668212891, "step": 954 }, { "epoch": 0.15264125309677934, "grad_norm": 1.5163936289412105, "learning_rate": 1.9962457091717825e-05, "loss": 0.8616877794265747, "step": 955 }, { "epoch": 0.15280108686965554, "grad_norm": 1.307854652496479, "learning_rate": 1.9962227967023208e-05, "loss": 0.656430721282959, "step": 956 }, { "epoch": 0.15296092064253178, "grad_norm": 1.304730653945871, "learning_rate": 1.9961998146601956e-05, "loss": 0.5790224075317383, "step": 957 }, { "epoch": 0.15312075441540798, "grad_norm": 1.4327376792228963, "learning_rate": 1.9961767630470135e-05, "loss": 0.7594686150550842, "step": 958 }, { "epoch": 0.1532805881882842, "grad_norm": 1.5123930926460334, "learning_rate": 1.996153641864383e-05, "loss": 0.7222437858581543, "step": 959 }, { "epoch": 0.1534404219611604, "grad_norm": 1.6318906681523302, "learning_rate": 1.9961304511139192e-05, "loss": 0.8152687549591064, "step": 960 }, { "epoch": 0.1536002557340366, "grad_norm": 1.2463065893121337, "learning_rate": 1.996107190797242e-05, "loss": 0.6111058592796326, "step": 961 }, { "epoch": 0.1537600895069128, "grad_norm": 1.4092822971997043, "learning_rate": 1.996083860915975e-05, "loss": 0.6819170713424683, "step": 962 }, { "epoch": 0.153919923279789, "grad_norm": 1.4236109673945143, "learning_rate": 1.9960604614717486e-05, "loss": 0.6521663665771484, "step": 963 }, { "epoch": 0.15407975705266522, "grad_norm": 1.4205501906861928, "learning_rate": 1.9960369924661963e-05, "loss": 0.6381034851074219, "step": 964 }, { "epoch": 0.15423959082554145, "grad_norm": 1.4689061504446692, "learning_rate": 1.9960134539009567e-05, "loss": 0.8161357045173645, "step": 965 }, { "epoch": 0.15439942459841766, "grad_norm": 1.6815673620902638, "learning_rate": 1.9959898457776744e-05, "loss": 0.6852189302444458, "step": 966 }, { "epoch": 0.15455925837129386, "grad_norm": 1.2435863755379617, "learning_rate": 1.995966168097998e-05, "loss": 0.7425601482391357, "step": 967 }, { "epoch": 0.15471909214417007, "grad_norm": 1.4229260485405872, "learning_rate": 1.9959424208635805e-05, "loss": 0.6636368036270142, "step": 968 }, { "epoch": 0.15487892591704627, "grad_norm": 1.4783504327652013, "learning_rate": 1.9959186040760806e-05, "loss": 0.8207269310951233, "step": 969 }, { "epoch": 0.15503875968992248, "grad_norm": 1.7712104190822682, "learning_rate": 1.9958947177371618e-05, "loss": 0.792357325553894, "step": 970 }, { "epoch": 0.15519859346279868, "grad_norm": 1.5083206323707308, "learning_rate": 1.995870761848492e-05, "loss": 0.7694542407989502, "step": 971 }, { "epoch": 0.1553584272356749, "grad_norm": 1.3606612891734828, "learning_rate": 1.9958467364117443e-05, "loss": 0.5457226037979126, "step": 972 }, { "epoch": 0.1555182610085511, "grad_norm": 1.492143900184909, "learning_rate": 1.9958226414285972e-05, "loss": 0.6953445672988892, "step": 973 }, { "epoch": 0.15567809478142733, "grad_norm": 1.263633966385886, "learning_rate": 1.995798476900732e-05, "loss": 0.6509208679199219, "step": 974 }, { "epoch": 0.15583792855430353, "grad_norm": 1.302610561217528, "learning_rate": 1.9957742428298373e-05, "loss": 0.5416626930236816, "step": 975 }, { "epoch": 0.15599776232717974, "grad_norm": 1.664730762754315, "learning_rate": 1.995749939217605e-05, "loss": 0.8692383170127869, "step": 976 }, { "epoch": 0.15615759610005595, "grad_norm": 1.5125478203825429, "learning_rate": 1.9957255660657326e-05, "loss": 0.7670928239822388, "step": 977 }, { "epoch": 0.15631742987293215, "grad_norm": 1.414639685021893, "learning_rate": 1.9957011233759225e-05, "loss": 0.6701701283454895, "step": 978 }, { "epoch": 0.15647726364580836, "grad_norm": 1.3074807012873593, "learning_rate": 1.9956766111498815e-05, "loss": 0.6716769933700562, "step": 979 }, { "epoch": 0.15663709741868456, "grad_norm": 1.6055865408745886, "learning_rate": 1.995652029389321e-05, "loss": 0.8069812059402466, "step": 980 }, { "epoch": 0.15679693119156077, "grad_norm": 1.4774224269486786, "learning_rate": 1.9956273780959583e-05, "loss": 0.7159361243247986, "step": 981 }, { "epoch": 0.15695676496443697, "grad_norm": 1.3824736571317728, "learning_rate": 1.9956026572715145e-05, "loss": 0.5984079837799072, "step": 982 }, { "epoch": 0.1571165987373132, "grad_norm": 1.226044854808796, "learning_rate": 1.995577866917717e-05, "loss": 0.6948502063751221, "step": 983 }, { "epoch": 0.1572764325101894, "grad_norm": 1.3463374945540405, "learning_rate": 1.9955530070362957e-05, "loss": 0.6957736611366272, "step": 984 }, { "epoch": 0.15743626628306562, "grad_norm": 1.451964921184834, "learning_rate": 1.9955280776289874e-05, "loss": 0.8155888319015503, "step": 985 }, { "epoch": 0.15759610005594182, "grad_norm": 1.3587527274065607, "learning_rate": 1.995503078697533e-05, "loss": 0.7267242670059204, "step": 986 }, { "epoch": 0.15775593382881803, "grad_norm": 1.5492642071548155, "learning_rate": 1.9954780102436786e-05, "loss": 0.8222640752792358, "step": 987 }, { "epoch": 0.15791576760169423, "grad_norm": 1.316351762305781, "learning_rate": 1.9954528722691746e-05, "loss": 0.7900207042694092, "step": 988 }, { "epoch": 0.15807560137457044, "grad_norm": 1.3379343246518314, "learning_rate": 1.9954276647757768e-05, "loss": 0.6853829622268677, "step": 989 }, { "epoch": 0.15823543514744665, "grad_norm": 1.246087873933639, "learning_rate": 1.9954023877652448e-05, "loss": 0.653404712677002, "step": 990 }, { "epoch": 0.15839526892032285, "grad_norm": 1.5431433764320477, "learning_rate": 1.995377041239345e-05, "loss": 0.8442589640617371, "step": 991 }, { "epoch": 0.15855510269319908, "grad_norm": 1.4456461276597075, "learning_rate": 1.995351625199847e-05, "loss": 0.866415798664093, "step": 992 }, { "epoch": 0.1587149364660753, "grad_norm": 1.3917242574247715, "learning_rate": 1.9953261396485254e-05, "loss": 0.8028926849365234, "step": 993 }, { "epoch": 0.1588747702389515, "grad_norm": 1.4665798079072179, "learning_rate": 1.9953005845871608e-05, "loss": 0.8360021710395813, "step": 994 }, { "epoch": 0.1590346040118277, "grad_norm": 1.5124065434002965, "learning_rate": 1.9952749600175373e-05, "loss": 0.8041058778762817, "step": 995 }, { "epoch": 0.1591944377847039, "grad_norm": 1.3216509424894312, "learning_rate": 1.9952492659414445e-05, "loss": 0.6768205165863037, "step": 996 }, { "epoch": 0.1593542715575801, "grad_norm": 3.126617689166972, "learning_rate": 1.995223502360677e-05, "loss": 0.7393781542778015, "step": 997 }, { "epoch": 0.15951410533045632, "grad_norm": 1.3148726896062688, "learning_rate": 1.9951976692770334e-05, "loss": 0.7113670110702515, "step": 998 }, { "epoch": 0.15967393910333252, "grad_norm": 1.3363706958195052, "learning_rate": 1.9951717666923186e-05, "loss": 0.7462390661239624, "step": 999 }, { "epoch": 0.15983377287620873, "grad_norm": 1.3639920445051754, "learning_rate": 1.9951457946083414e-05, "loss": 0.7384167909622192, "step": 1000 }, { "epoch": 0.15999360664908496, "grad_norm": 1.4573527220318798, "learning_rate": 1.9951197530269152e-05, "loss": 0.61247718334198, "step": 1001 }, { "epoch": 0.16015344042196117, "grad_norm": 1.4199237214563944, "learning_rate": 1.995093641949859e-05, "loss": 0.602272629737854, "step": 1002 }, { "epoch": 0.16031327419483737, "grad_norm": 1.4516791450683415, "learning_rate": 1.995067461378996e-05, "loss": 0.643913984298706, "step": 1003 }, { "epoch": 0.16047310796771358, "grad_norm": 1.4934533391297626, "learning_rate": 1.995041211316155e-05, "loss": 0.8578706979751587, "step": 1004 }, { "epoch": 0.16063294174058979, "grad_norm": 1.4870690984623074, "learning_rate": 1.995014891763169e-05, "loss": 0.7291224002838135, "step": 1005 }, { "epoch": 0.160792775513466, "grad_norm": 1.5765021951535985, "learning_rate": 1.9949885027218756e-05, "loss": 0.7317346930503845, "step": 1006 }, { "epoch": 0.1609526092863422, "grad_norm": 1.396442681856437, "learning_rate": 1.9949620441941183e-05, "loss": 0.6754283905029297, "step": 1007 }, { "epoch": 0.1611124430592184, "grad_norm": 1.5881514532004795, "learning_rate": 1.9949355161817448e-05, "loss": 0.7083402872085571, "step": 1008 }, { "epoch": 0.16127227683209464, "grad_norm": 1.303024089380436, "learning_rate": 1.9949089186866078e-05, "loss": 0.6454806327819824, "step": 1009 }, { "epoch": 0.16143211060497084, "grad_norm": 1.2947883191005831, "learning_rate": 1.9948822517105643e-05, "loss": 0.7186431884765625, "step": 1010 }, { "epoch": 0.16159194437784705, "grad_norm": 1.2029339379999946, "learning_rate": 1.994855515255477e-05, "loss": 0.7341243028640747, "step": 1011 }, { "epoch": 0.16175177815072325, "grad_norm": 1.3262560200688185, "learning_rate": 1.994828709323213e-05, "loss": 0.6126158237457275, "step": 1012 }, { "epoch": 0.16191161192359946, "grad_norm": 1.462810504273161, "learning_rate": 1.9948018339156446e-05, "loss": 0.663185715675354, "step": 1013 }, { "epoch": 0.16207144569647566, "grad_norm": 1.336506264886934, "learning_rate": 1.9947748890346482e-05, "loss": 0.7099394202232361, "step": 1014 }, { "epoch": 0.16223127946935187, "grad_norm": 1.330958919156954, "learning_rate": 1.9947478746821057e-05, "loss": 0.6179676651954651, "step": 1015 }, { "epoch": 0.16239111324222807, "grad_norm": 1.910346161934647, "learning_rate": 1.994720790859904e-05, "loss": 0.7639859914779663, "step": 1016 }, { "epoch": 0.16255094701510428, "grad_norm": 1.3603000420345943, "learning_rate": 1.9946936375699338e-05, "loss": 0.6212798953056335, "step": 1017 }, { "epoch": 0.1627107807879805, "grad_norm": 1.3912571675983587, "learning_rate": 1.994666414814092e-05, "loss": 0.7770909070968628, "step": 1018 }, { "epoch": 0.16287061456085672, "grad_norm": 1.6785640002002342, "learning_rate": 1.99463912259428e-05, "loss": 0.7773163318634033, "step": 1019 }, { "epoch": 0.16303044833373292, "grad_norm": 1.5797605402680177, "learning_rate": 1.9946117609124034e-05, "loss": 0.700624942779541, "step": 1020 }, { "epoch": 0.16319028210660913, "grad_norm": 1.365004618442985, "learning_rate": 1.9945843297703728e-05, "loss": 0.7140263319015503, "step": 1021 }, { "epoch": 0.16335011587948534, "grad_norm": 1.4660563012738965, "learning_rate": 1.994556829170104e-05, "loss": 0.7508946061134338, "step": 1022 }, { "epoch": 0.16350994965236154, "grad_norm": 1.5049387777138001, "learning_rate": 1.9945292591135178e-05, "loss": 0.6493654251098633, "step": 1023 }, { "epoch": 0.16366978342523775, "grad_norm": 1.4992198994613828, "learning_rate": 1.9945016196025396e-05, "loss": 0.7683531641960144, "step": 1024 }, { "epoch": 0.16382961719811395, "grad_norm": 1.294298055760375, "learning_rate": 1.9944739106390993e-05, "loss": 0.6861573457717896, "step": 1025 }, { "epoch": 0.16398945097099016, "grad_norm": 1.3003445444487074, "learning_rate": 1.9944461322251322e-05, "loss": 0.7353789806365967, "step": 1026 }, { "epoch": 0.1641492847438664, "grad_norm": 1.4310496088792415, "learning_rate": 1.9944182843625786e-05, "loss": 0.744941234588623, "step": 1027 }, { "epoch": 0.1643091185167426, "grad_norm": 1.4565275601404162, "learning_rate": 1.9943903670533828e-05, "loss": 0.655401349067688, "step": 1028 }, { "epoch": 0.1644689522896188, "grad_norm": 1.4184373608632246, "learning_rate": 1.9943623802994945e-05, "loss": 0.6169332265853882, "step": 1029 }, { "epoch": 0.164628786062495, "grad_norm": 1.6367757436977304, "learning_rate": 1.9943343241028684e-05, "loss": 0.705575704574585, "step": 1030 }, { "epoch": 0.16478861983537121, "grad_norm": 1.3938231350543486, "learning_rate": 1.9943061984654638e-05, "loss": 0.8016455173492432, "step": 1031 }, { "epoch": 0.16494845360824742, "grad_norm": 1.4665178829548517, "learning_rate": 1.9942780033892445e-05, "loss": 0.8134523630142212, "step": 1032 }, { "epoch": 0.16510828738112363, "grad_norm": 1.38167624699251, "learning_rate": 1.9942497388761803e-05, "loss": 0.6190634965896606, "step": 1033 }, { "epoch": 0.16526812115399983, "grad_norm": 1.4237287017163904, "learning_rate": 1.9942214049282446e-05, "loss": 0.6930725574493408, "step": 1034 }, { "epoch": 0.16542795492687604, "grad_norm": 1.6234750460159209, "learning_rate": 1.9941930015474162e-05, "loss": 0.827562689781189, "step": 1035 }, { "epoch": 0.16558778869975227, "grad_norm": 1.634216999324018, "learning_rate": 1.9941645287356787e-05, "loss": 0.8899297714233398, "step": 1036 }, { "epoch": 0.16574762247262848, "grad_norm": 1.35543750521662, "learning_rate": 1.9941359864950204e-05, "loss": 0.7438822388648987, "step": 1037 }, { "epoch": 0.16590745624550468, "grad_norm": 1.736905288254636, "learning_rate": 1.9941073748274348e-05, "loss": 0.7484678030014038, "step": 1038 }, { "epoch": 0.1660672900183809, "grad_norm": 2.0651441942946227, "learning_rate": 1.99407869373492e-05, "loss": 0.8200445175170898, "step": 1039 }, { "epoch": 0.1662271237912571, "grad_norm": 1.4709334758241983, "learning_rate": 1.994049943219479e-05, "loss": 0.808530330657959, "step": 1040 }, { "epoch": 0.1663869575641333, "grad_norm": 1.5182224013272703, "learning_rate": 1.9940211232831196e-05, "loss": 0.6897738575935364, "step": 1041 }, { "epoch": 0.1665467913370095, "grad_norm": 1.3818107502162122, "learning_rate": 1.9939922339278544e-05, "loss": 0.6088767051696777, "step": 1042 }, { "epoch": 0.1667066251098857, "grad_norm": 1.3632957150144343, "learning_rate": 1.993963275155701e-05, "loss": 0.6425639390945435, "step": 1043 }, { "epoch": 0.16686645888276191, "grad_norm": 1.2653830785836113, "learning_rate": 1.993934246968682e-05, "loss": 0.8192942142486572, "step": 1044 }, { "epoch": 0.16702629265563815, "grad_norm": 2.784018823297878, "learning_rate": 1.9939051493688243e-05, "loss": 0.7274699807167053, "step": 1045 }, { "epoch": 0.16718612642851435, "grad_norm": 1.5656824413264105, "learning_rate": 1.99387598235816e-05, "loss": 0.7395567893981934, "step": 1046 }, { "epoch": 0.16734596020139056, "grad_norm": 1.3491670195660148, "learning_rate": 1.993846745938726e-05, "loss": 0.6142668724060059, "step": 1047 }, { "epoch": 0.16750579397426676, "grad_norm": 1.5675560733551817, "learning_rate": 1.9938174401125646e-05, "loss": 0.7102195024490356, "step": 1048 }, { "epoch": 0.16766562774714297, "grad_norm": 1.3513775497980522, "learning_rate": 1.9937880648817215e-05, "loss": 0.7174410820007324, "step": 1049 }, { "epoch": 0.16782546152001918, "grad_norm": 1.4590406438093906, "learning_rate": 1.993758620248249e-05, "loss": 0.653752326965332, "step": 1050 }, { "epoch": 0.16798529529289538, "grad_norm": 1.395920369657364, "learning_rate": 1.993729106214203e-05, "loss": 0.8101499080657959, "step": 1051 }, { "epoch": 0.1681451290657716, "grad_norm": 1.3201923321551465, "learning_rate": 1.9936995227816445e-05, "loss": 0.7454655170440674, "step": 1052 }, { "epoch": 0.1683049628386478, "grad_norm": 1.5602885953114096, "learning_rate": 1.99366986995264e-05, "loss": 0.7612032294273376, "step": 1053 }, { "epoch": 0.16846479661152403, "grad_norm": 1.1957562587806345, "learning_rate": 1.9936401477292597e-05, "loss": 0.66379714012146, "step": 1054 }, { "epoch": 0.16862463038440023, "grad_norm": 1.647740116928088, "learning_rate": 1.99361035611358e-05, "loss": 0.7867101430892944, "step": 1055 }, { "epoch": 0.16878446415727644, "grad_norm": 1.4881745994048563, "learning_rate": 1.993580495107681e-05, "loss": 0.7544465065002441, "step": 1056 }, { "epoch": 0.16894429793015264, "grad_norm": 1.491574164739546, "learning_rate": 1.993550564713648e-05, "loss": 0.6714593768119812, "step": 1057 }, { "epoch": 0.16910413170302885, "grad_norm": 1.4839586272916174, "learning_rate": 1.9935205649335717e-05, "loss": 0.6167606115341187, "step": 1058 }, { "epoch": 0.16926396547590505, "grad_norm": 1.5098518641834626, "learning_rate": 1.993490495769547e-05, "loss": 0.7658101916313171, "step": 1059 }, { "epoch": 0.16942379924878126, "grad_norm": 1.3156136793157323, "learning_rate": 1.993460357223673e-05, "loss": 0.623932957649231, "step": 1060 }, { "epoch": 0.16958363302165747, "grad_norm": 1.5546796094351716, "learning_rate": 1.9934301492980557e-05, "loss": 0.7102897763252258, "step": 1061 }, { "epoch": 0.1697434667945337, "grad_norm": 1.576482622380038, "learning_rate": 1.9933998719948044e-05, "loss": 0.6239539384841919, "step": 1062 }, { "epoch": 0.1699033005674099, "grad_norm": 1.1860167167893243, "learning_rate": 1.993369525316033e-05, "loss": 0.7209612131118774, "step": 1063 }, { "epoch": 0.1700631343402861, "grad_norm": 1.415319848446742, "learning_rate": 1.9933391092638614e-05, "loss": 0.7445592880249023, "step": 1064 }, { "epoch": 0.17022296811316232, "grad_norm": 1.5664994426954715, "learning_rate": 1.9933086238404137e-05, "loss": 0.9584676027297974, "step": 1065 }, { "epoch": 0.17038280188603852, "grad_norm": 4.634059872645088, "learning_rate": 1.9932780690478184e-05, "loss": 0.6520017385482788, "step": 1066 }, { "epoch": 0.17054263565891473, "grad_norm": 1.2038014055581958, "learning_rate": 1.9932474448882097e-05, "loss": 0.6197675466537476, "step": 1067 }, { "epoch": 0.17070246943179093, "grad_norm": 1.4604003612925882, "learning_rate": 1.993216751363726e-05, "loss": 0.8016868829727173, "step": 1068 }, { "epoch": 0.17086230320466714, "grad_norm": 1.4486654604136855, "learning_rate": 1.9931859884765113e-05, "loss": 0.5726907253265381, "step": 1069 }, { "epoch": 0.17102213697754334, "grad_norm": 1.2093838164256665, "learning_rate": 1.993155156228714e-05, "loss": 0.6149111986160278, "step": 1070 }, { "epoch": 0.17118197075041958, "grad_norm": 1.3185889622555902, "learning_rate": 1.9931242546224868e-05, "loss": 0.7823998928070068, "step": 1071 }, { "epoch": 0.17134180452329578, "grad_norm": 1.3037382767571117, "learning_rate": 1.9930932836599877e-05, "loss": 0.7147391438484192, "step": 1072 }, { "epoch": 0.171501638296172, "grad_norm": 1.471605370585371, "learning_rate": 1.9930622433433805e-05, "loss": 0.8041431903839111, "step": 1073 }, { "epoch": 0.1716614720690482, "grad_norm": 1.42153913054521, "learning_rate": 1.9930311336748317e-05, "loss": 0.7112363576889038, "step": 1074 }, { "epoch": 0.1718213058419244, "grad_norm": 1.4686723532696686, "learning_rate": 1.9929999546565148e-05, "loss": 0.6364198923110962, "step": 1075 }, { "epoch": 0.1719811396148006, "grad_norm": 1.5342368918420477, "learning_rate": 1.9929687062906072e-05, "loss": 0.8081374764442444, "step": 1076 }, { "epoch": 0.1721409733876768, "grad_norm": 1.67005403245223, "learning_rate": 1.9929373885792906e-05, "loss": 0.6716324090957642, "step": 1077 }, { "epoch": 0.17230080716055302, "grad_norm": 1.4683740785622132, "learning_rate": 1.9929060015247524e-05, "loss": 0.730725884437561, "step": 1078 }, { "epoch": 0.17246064093342922, "grad_norm": 1.5098548457983347, "learning_rate": 1.992874545129185e-05, "loss": 0.7582625150680542, "step": 1079 }, { "epoch": 0.17262047470630545, "grad_norm": 1.3010269536663825, "learning_rate": 1.9928430193947847e-05, "loss": 0.639045000076294, "step": 1080 }, { "epoch": 0.17278030847918166, "grad_norm": 1.283726385640876, "learning_rate": 1.9928114243237533e-05, "loss": 0.648330569267273, "step": 1081 }, { "epoch": 0.17294014225205787, "grad_norm": 1.2846217947904404, "learning_rate": 1.9927797599182973e-05, "loss": 0.8496332764625549, "step": 1082 }, { "epoch": 0.17309997602493407, "grad_norm": 1.3568204254983929, "learning_rate": 1.9927480261806277e-05, "loss": 0.8098894953727722, "step": 1083 }, { "epoch": 0.17325980979781028, "grad_norm": 1.2919614566995932, "learning_rate": 1.9927162231129613e-05, "loss": 0.6306595802307129, "step": 1084 }, { "epoch": 0.17341964357068648, "grad_norm": 1.575096964473941, "learning_rate": 1.9926843507175185e-05, "loss": 0.8325341939926147, "step": 1085 }, { "epoch": 0.1735794773435627, "grad_norm": 1.3510001009339379, "learning_rate": 1.992652408996526e-05, "loss": 0.6723941564559937, "step": 1086 }, { "epoch": 0.1737393111164389, "grad_norm": 1.2964920675107137, "learning_rate": 1.992620397952213e-05, "loss": 0.7341580390930176, "step": 1087 }, { "epoch": 0.1738991448893151, "grad_norm": 1.439939213144319, "learning_rate": 1.9925883175868165e-05, "loss": 0.6451663970947266, "step": 1088 }, { "epoch": 0.17405897866219133, "grad_norm": 1.3593318098581115, "learning_rate": 1.9925561679025766e-05, "loss": 0.6865383386611938, "step": 1089 }, { "epoch": 0.17421881243506754, "grad_norm": 1.3960577573628474, "learning_rate": 1.9925239489017376e-05, "loss": 0.7389088869094849, "step": 1090 }, { "epoch": 0.17437864620794374, "grad_norm": 1.3597316269454869, "learning_rate": 1.9924916605865506e-05, "loss": 0.6307468414306641, "step": 1091 }, { "epoch": 0.17453847998081995, "grad_norm": 1.0504542277719269, "learning_rate": 1.9924593029592702e-05, "loss": 0.4898347556591034, "step": 1092 }, { "epoch": 0.17469831375369616, "grad_norm": 1.31899028228928, "learning_rate": 1.9924268760221558e-05, "loss": 0.6519070863723755, "step": 1093 }, { "epoch": 0.17485814752657236, "grad_norm": 1.5283646248839198, "learning_rate": 1.9923943797774724e-05, "loss": 0.9303742051124573, "step": 1094 }, { "epoch": 0.17501798129944857, "grad_norm": 1.4954238408621787, "learning_rate": 1.9923618142274894e-05, "loss": 0.7315076589584351, "step": 1095 }, { "epoch": 0.17517781507232477, "grad_norm": 1.423686548179565, "learning_rate": 1.992329179374481e-05, "loss": 0.700570821762085, "step": 1096 }, { "epoch": 0.17533764884520098, "grad_norm": 1.256413427606751, "learning_rate": 1.9922964752207257e-05, "loss": 0.800347626209259, "step": 1097 }, { "epoch": 0.1754974826180772, "grad_norm": 1.3806815764062652, "learning_rate": 1.992263701768508e-05, "loss": 0.6470175981521606, "step": 1098 }, { "epoch": 0.17565731639095342, "grad_norm": 1.3161745498260238, "learning_rate": 1.9922308590201173e-05, "loss": 0.7205234169960022, "step": 1099 }, { "epoch": 0.17581715016382962, "grad_norm": 1.611586733555736, "learning_rate": 1.992197946977846e-05, "loss": 0.7086703181266785, "step": 1100 }, { "epoch": 0.17597698393670583, "grad_norm": 1.3464135472441086, "learning_rate": 1.9921649656439936e-05, "loss": 0.774569034576416, "step": 1101 }, { "epoch": 0.17613681770958203, "grad_norm": 1.7952167107423722, "learning_rate": 1.992131915020863e-05, "loss": 0.6944275498390198, "step": 1102 }, { "epoch": 0.17629665148245824, "grad_norm": 1.7323031742852155, "learning_rate": 1.9920987951107617e-05, "loss": 0.7453587651252747, "step": 1103 }, { "epoch": 0.17645648525533444, "grad_norm": 1.3576923537685732, "learning_rate": 1.9920656059160034e-05, "loss": 0.7179883718490601, "step": 1104 }, { "epoch": 0.17661631902821065, "grad_norm": 1.589667815730307, "learning_rate": 1.992032347438906e-05, "loss": 0.7739389538764954, "step": 1105 }, { "epoch": 0.17677615280108688, "grad_norm": 1.2196343923435364, "learning_rate": 1.991999019681792e-05, "loss": 0.651526689529419, "step": 1106 }, { "epoch": 0.1769359865739631, "grad_norm": 1.4895006528767163, "learning_rate": 1.991965622646989e-05, "loss": 0.7612435817718506, "step": 1107 }, { "epoch": 0.1770958203468393, "grad_norm": 1.3457430858067245, "learning_rate": 1.9919321563368283e-05, "loss": 0.7394741773605347, "step": 1108 }, { "epoch": 0.1772556541197155, "grad_norm": 1.3701093975168632, "learning_rate": 1.9918986207536484e-05, "loss": 0.697041392326355, "step": 1109 }, { "epoch": 0.1774154878925917, "grad_norm": 1.5265251958619852, "learning_rate": 1.991865015899791e-05, "loss": 0.7596273422241211, "step": 1110 }, { "epoch": 0.1775753216654679, "grad_norm": 1.6086916364535837, "learning_rate": 1.9918313417776024e-05, "loss": 0.7562317848205566, "step": 1111 }, { "epoch": 0.17773515543834412, "grad_norm": 1.31457115965481, "learning_rate": 1.9917975983894352e-05, "loss": 0.6901167631149292, "step": 1112 }, { "epoch": 0.17789498921122032, "grad_norm": 1.2993251407721493, "learning_rate": 1.9917637857376448e-05, "loss": 0.634183406829834, "step": 1113 }, { "epoch": 0.17805482298409653, "grad_norm": 1.3452322418580913, "learning_rate": 1.9917299038245933e-05, "loss": 0.6554555892944336, "step": 1114 }, { "epoch": 0.17821465675697276, "grad_norm": 1.2658707911732816, "learning_rate": 1.9916959526526468e-05, "loss": 0.7572702169418335, "step": 1115 }, { "epoch": 0.17837449052984897, "grad_norm": 1.4058965230551514, "learning_rate": 1.991661932224176e-05, "loss": 0.6066701412200928, "step": 1116 }, { "epoch": 0.17853432430272517, "grad_norm": 1.3789586357786254, "learning_rate": 1.9916278425415573e-05, "loss": 0.7609643936157227, "step": 1117 }, { "epoch": 0.17869415807560138, "grad_norm": 1.5044768551051912, "learning_rate": 1.991593683607171e-05, "loss": 0.7424414157867432, "step": 1118 }, { "epoch": 0.17885399184847758, "grad_norm": 1.3497615550244935, "learning_rate": 1.9915594554234027e-05, "loss": 0.7500286102294922, "step": 1119 }, { "epoch": 0.1790138256213538, "grad_norm": 1.501624923009734, "learning_rate": 1.9915251579926428e-05, "loss": 0.7611908912658691, "step": 1120 }, { "epoch": 0.17917365939423, "grad_norm": 1.4026073427011163, "learning_rate": 1.9914907913172865e-05, "loss": 0.7436773777008057, "step": 1121 }, { "epoch": 0.1793334931671062, "grad_norm": 1.2931229802647175, "learning_rate": 1.991456355399734e-05, "loss": 0.6791903376579285, "step": 1122 }, { "epoch": 0.1794933269399824, "grad_norm": 1.4977426701038827, "learning_rate": 1.99142185024239e-05, "loss": 0.7694204449653625, "step": 1123 }, { "epoch": 0.17965316071285864, "grad_norm": 1.3527033635410413, "learning_rate": 1.991387275847664e-05, "loss": 0.6821222305297852, "step": 1124 }, { "epoch": 0.17981299448573485, "grad_norm": 1.654116555931727, "learning_rate": 1.9913526322179715e-05, "loss": 0.636681079864502, "step": 1125 }, { "epoch": 0.17997282825861105, "grad_norm": 1.2446761958709682, "learning_rate": 1.9913179193557308e-05, "loss": 0.716866135597229, "step": 1126 }, { "epoch": 0.18013266203148726, "grad_norm": 1.3797900296469492, "learning_rate": 1.9912831372633665e-05, "loss": 0.5786306858062744, "step": 1127 }, { "epoch": 0.18029249580436346, "grad_norm": 1.2563355087861467, "learning_rate": 1.9912482859433076e-05, "loss": 0.6823065876960754, "step": 1128 }, { "epoch": 0.18045232957723967, "grad_norm": 1.4519317678462809, "learning_rate": 1.9912133653979887e-05, "loss": 0.6578145623207092, "step": 1129 }, { "epoch": 0.18061216335011587, "grad_norm": 1.4054260973526813, "learning_rate": 1.991178375629847e-05, "loss": 0.6792463064193726, "step": 1130 }, { "epoch": 0.18077199712299208, "grad_norm": 1.311879838318306, "learning_rate": 1.9911433166413277e-05, "loss": 0.6729969382286072, "step": 1131 }, { "epoch": 0.18093183089586828, "grad_norm": 1.4613453973901744, "learning_rate": 1.991108188434878e-05, "loss": 0.868512749671936, "step": 1132 }, { "epoch": 0.18109166466874452, "grad_norm": 1.240581119924001, "learning_rate": 1.9910729910129524e-05, "loss": 0.7306257486343384, "step": 1133 }, { "epoch": 0.18125149844162072, "grad_norm": 1.2931616712106246, "learning_rate": 1.9910377243780076e-05, "loss": 0.6209720373153687, "step": 1134 }, { "epoch": 0.18141133221449693, "grad_norm": 1.2707647066687375, "learning_rate": 1.991002388532507e-05, "loss": 0.6145265698432922, "step": 1135 }, { "epoch": 0.18157116598737313, "grad_norm": 1.4096953093606284, "learning_rate": 1.9909669834789183e-05, "loss": 0.7791049480438232, "step": 1136 }, { "epoch": 0.18173099976024934, "grad_norm": 1.5950022300848443, "learning_rate": 1.9909315092197145e-05, "loss": 0.6921156048774719, "step": 1137 }, { "epoch": 0.18189083353312555, "grad_norm": 1.344343717703087, "learning_rate": 1.9908959657573727e-05, "loss": 0.6820235252380371, "step": 1138 }, { "epoch": 0.18205066730600175, "grad_norm": 1.5504658502683641, "learning_rate": 1.9908603530943747e-05, "loss": 0.7926119565963745, "step": 1139 }, { "epoch": 0.18221050107887796, "grad_norm": 1.5715716613881152, "learning_rate": 1.9908246712332082e-05, "loss": 0.6651533246040344, "step": 1140 }, { "epoch": 0.18237033485175416, "grad_norm": 1.6266963351788006, "learning_rate": 1.9907889201763647e-05, "loss": 0.7227697372436523, "step": 1141 }, { "epoch": 0.1825301686246304, "grad_norm": 1.7365872344715003, "learning_rate": 1.990753099926341e-05, "loss": 0.8080978393554688, "step": 1142 }, { "epoch": 0.1826900023975066, "grad_norm": 1.3150034753397064, "learning_rate": 1.990717210485639e-05, "loss": 0.7323604822158813, "step": 1143 }, { "epoch": 0.1828498361703828, "grad_norm": 1.4513218657659441, "learning_rate": 1.9906812518567648e-05, "loss": 0.8394142389297485, "step": 1144 }, { "epoch": 0.183009669943259, "grad_norm": 1.2719840771281907, "learning_rate": 1.9906452240422295e-05, "loss": 0.649466335773468, "step": 1145 }, { "epoch": 0.18316950371613522, "grad_norm": 1.4033668392555672, "learning_rate": 1.990609127044549e-05, "loss": 0.7704079151153564, "step": 1146 }, { "epoch": 0.18332933748901142, "grad_norm": 1.4582236138353974, "learning_rate": 1.990572960866245e-05, "loss": 0.7098120450973511, "step": 1147 }, { "epoch": 0.18348917126188763, "grad_norm": 1.4469717679383625, "learning_rate": 1.9905367255098423e-05, "loss": 0.9093941450119019, "step": 1148 }, { "epoch": 0.18364900503476383, "grad_norm": 1.4733204042591033, "learning_rate": 1.9905004209778722e-05, "loss": 0.5974063873291016, "step": 1149 }, { "epoch": 0.18380883880764007, "grad_norm": 1.3720767804926242, "learning_rate": 1.9904640472728697e-05, "loss": 0.6387408971786499, "step": 1150 }, { "epoch": 0.18396867258051627, "grad_norm": 1.2757542035751892, "learning_rate": 1.990427604397375e-05, "loss": 0.670793354511261, "step": 1151 }, { "epoch": 0.18412850635339248, "grad_norm": 1.5700556098745384, "learning_rate": 1.990391092353933e-05, "loss": 0.8412415981292725, "step": 1152 }, { "epoch": 0.18428834012626868, "grad_norm": 1.4162223859037735, "learning_rate": 1.9903545111450938e-05, "loss": 0.7571865320205688, "step": 1153 }, { "epoch": 0.1844481738991449, "grad_norm": 1.886223573539259, "learning_rate": 1.9903178607734118e-05, "loss": 0.7440699338912964, "step": 1154 }, { "epoch": 0.1846080076720211, "grad_norm": 1.4645768168325168, "learning_rate": 1.9902811412414472e-05, "loss": 0.6340526342391968, "step": 1155 }, { "epoch": 0.1847678414448973, "grad_norm": 1.244487820750412, "learning_rate": 1.990244352551764e-05, "loss": 0.7896057367324829, "step": 1156 }, { "epoch": 0.1849276752177735, "grad_norm": 1.380668026448721, "learning_rate": 1.990207494706931e-05, "loss": 0.6975103616714478, "step": 1157 }, { "epoch": 0.1850875089906497, "grad_norm": 1.4976789727209303, "learning_rate": 1.9901705677095227e-05, "loss": 0.6923559904098511, "step": 1158 }, { "epoch": 0.18524734276352595, "grad_norm": 1.3838480422125559, "learning_rate": 1.990133571562118e-05, "loss": 0.6791424751281738, "step": 1159 }, { "epoch": 0.18540717653640215, "grad_norm": 1.13765324490782, "learning_rate": 1.9900965062673e-05, "loss": 0.5825923681259155, "step": 1160 }, { "epoch": 0.18556701030927836, "grad_norm": 1.271856432146451, "learning_rate": 1.9900593718276578e-05, "loss": 0.5966122150421143, "step": 1161 }, { "epoch": 0.18572684408215456, "grad_norm": 1.3153536395894208, "learning_rate": 1.9900221682457844e-05, "loss": 0.7398377656936646, "step": 1162 }, { "epoch": 0.18588667785503077, "grad_norm": 1.2627266350881334, "learning_rate": 1.9899848955242782e-05, "loss": 0.5503379106521606, "step": 1163 }, { "epoch": 0.18604651162790697, "grad_norm": 1.4094806181784627, "learning_rate": 1.9899475536657422e-05, "loss": 0.6385091543197632, "step": 1164 }, { "epoch": 0.18620634540078318, "grad_norm": 1.3273431599760823, "learning_rate": 1.989910142672784e-05, "loss": 0.6445037126541138, "step": 1165 }, { "epoch": 0.18636617917365939, "grad_norm": 1.3542730706984203, "learning_rate": 1.9898726625480168e-05, "loss": 0.6375089287757874, "step": 1166 }, { "epoch": 0.1865260129465356, "grad_norm": 1.4262671409482788, "learning_rate": 1.989835113294057e-05, "loss": 0.7246414422988892, "step": 1167 }, { "epoch": 0.18668584671941182, "grad_norm": 1.5333851580270792, "learning_rate": 1.989797494913528e-05, "loss": 0.6978378295898438, "step": 1168 }, { "epoch": 0.18684568049228803, "grad_norm": 1.3615108618540137, "learning_rate": 1.989759807409056e-05, "loss": 0.6485525369644165, "step": 1169 }, { "epoch": 0.18700551426516424, "grad_norm": 1.242983791917634, "learning_rate": 1.9897220507832737e-05, "loss": 0.7427802681922913, "step": 1170 }, { "epoch": 0.18716534803804044, "grad_norm": 1.5213929726182533, "learning_rate": 1.9896842250388178e-05, "loss": 0.8321880102157593, "step": 1171 }, { "epoch": 0.18732518181091665, "grad_norm": 1.2545517867772242, "learning_rate": 1.9896463301783298e-05, "loss": 0.7879867553710938, "step": 1172 }, { "epoch": 0.18748501558379285, "grad_norm": 1.2917998242986375, "learning_rate": 1.9896083662044558e-05, "loss": 0.6922857165336609, "step": 1173 }, { "epoch": 0.18764484935666906, "grad_norm": 1.3490993469217778, "learning_rate": 1.9895703331198476e-05, "loss": 0.7528950572013855, "step": 1174 }, { "epoch": 0.18780468312954526, "grad_norm": 1.323621448041208, "learning_rate": 1.9895322309271606e-05, "loss": 0.743928074836731, "step": 1175 }, { "epoch": 0.18796451690242147, "grad_norm": 1.412833675046163, "learning_rate": 1.9894940596290566e-05, "loss": 0.5722575187683105, "step": 1176 }, { "epoch": 0.1881243506752977, "grad_norm": 1.4002708533156407, "learning_rate": 1.989455819228201e-05, "loss": 0.847992479801178, "step": 1177 }, { "epoch": 0.1882841844481739, "grad_norm": 1.3499070559160746, "learning_rate": 1.9894175097272642e-05, "loss": 0.6606172323226929, "step": 1178 }, { "epoch": 0.1884440182210501, "grad_norm": 1.382409548678394, "learning_rate": 1.9893791311289217e-05, "loss": 0.7947548627853394, "step": 1179 }, { "epoch": 0.18860385199392632, "grad_norm": 1.3802541986700667, "learning_rate": 1.9893406834358537e-05, "loss": 0.5930181741714478, "step": 1180 }, { "epoch": 0.18876368576680252, "grad_norm": 1.207183439367097, "learning_rate": 1.9893021666507453e-05, "loss": 0.6329394578933716, "step": 1181 }, { "epoch": 0.18892351953967873, "grad_norm": 1.1571135783688231, "learning_rate": 1.9892635807762862e-05, "loss": 0.5638591647148132, "step": 1182 }, { "epoch": 0.18908335331255494, "grad_norm": 1.406630345681163, "learning_rate": 1.9892249258151715e-05, "loss": 0.7814106941223145, "step": 1183 }, { "epoch": 0.18924318708543114, "grad_norm": 1.5859573387377952, "learning_rate": 1.9891862017701e-05, "loss": 0.6423671841621399, "step": 1184 }, { "epoch": 0.18940302085830735, "grad_norm": 1.1318389135715554, "learning_rate": 1.989147408643777e-05, "loss": 0.6469918489456177, "step": 1185 }, { "epoch": 0.18956285463118358, "grad_norm": 1.534783222423634, "learning_rate": 1.9891085464389112e-05, "loss": 0.6230617761611938, "step": 1186 }, { "epoch": 0.18972268840405979, "grad_norm": 1.4165396534421337, "learning_rate": 1.989069615158217e-05, "loss": 0.6591713428497314, "step": 1187 }, { "epoch": 0.189882522176936, "grad_norm": 1.8479584138668799, "learning_rate": 1.9890306148044122e-05, "loss": 0.6576615571975708, "step": 1188 }, { "epoch": 0.1900423559498122, "grad_norm": 1.2960170091815706, "learning_rate": 1.988991545380221e-05, "loss": 0.7880216836929321, "step": 1189 }, { "epoch": 0.1902021897226884, "grad_norm": 1.2924005563700036, "learning_rate": 1.988952406888372e-05, "loss": 0.7574440240859985, "step": 1190 }, { "epoch": 0.1903620234955646, "grad_norm": 1.1732516526252623, "learning_rate": 1.9889131993315985e-05, "loss": 0.5732156038284302, "step": 1191 }, { "epoch": 0.19052185726844081, "grad_norm": 1.4789827317791941, "learning_rate": 1.9888739227126387e-05, "loss": 0.8722629547119141, "step": 1192 }, { "epoch": 0.19068169104131702, "grad_norm": 1.536404737103253, "learning_rate": 1.9888345770342354e-05, "loss": 0.7009774446487427, "step": 1193 }, { "epoch": 0.19084152481419323, "grad_norm": 1.4840484543572943, "learning_rate": 1.9887951622991363e-05, "loss": 0.7328234314918518, "step": 1194 }, { "epoch": 0.19100135858706946, "grad_norm": 1.4838402560665058, "learning_rate": 1.988755678510094e-05, "loss": 0.6955565214157104, "step": 1195 }, { "epoch": 0.19116119235994566, "grad_norm": 1.445971762392729, "learning_rate": 1.988716125669866e-05, "loss": 0.6565529704093933, "step": 1196 }, { "epoch": 0.19132102613282187, "grad_norm": 1.3241876555241858, "learning_rate": 1.9886765037812142e-05, "loss": 0.741100549697876, "step": 1197 }, { "epoch": 0.19148085990569808, "grad_norm": 1.365318548888243, "learning_rate": 1.9886368128469063e-05, "loss": 0.6573902368545532, "step": 1198 }, { "epoch": 0.19164069367857428, "grad_norm": 1.2586999558790817, "learning_rate": 1.9885970528697136e-05, "loss": 0.6442805528640747, "step": 1199 }, { "epoch": 0.1918005274514505, "grad_norm": 2.0243647092287462, "learning_rate": 1.988557223852413e-05, "loss": 0.769987940788269, "step": 1200 }, { "epoch": 0.1919603612243267, "grad_norm": 1.2159217949143282, "learning_rate": 1.988517325797786e-05, "loss": 0.67017662525177, "step": 1201 }, { "epoch": 0.1921201949972029, "grad_norm": 1.4408961034994685, "learning_rate": 1.988477358708619e-05, "loss": 0.7221630811691284, "step": 1202 }, { "epoch": 0.19228002877007913, "grad_norm": 1.57837479456252, "learning_rate": 1.988437322587703e-05, "loss": 0.6867338418960571, "step": 1203 }, { "epoch": 0.19243986254295534, "grad_norm": 1.3906363431319846, "learning_rate": 1.9883972174378346e-05, "loss": 0.6141301393508911, "step": 1204 }, { "epoch": 0.19259969631583154, "grad_norm": 1.5175906700080148, "learning_rate": 1.9883570432618136e-05, "loss": 0.6561384797096252, "step": 1205 }, { "epoch": 0.19275953008870775, "grad_norm": 1.4885537597099958, "learning_rate": 1.9883168000624462e-05, "loss": 0.808748722076416, "step": 1206 }, { "epoch": 0.19291936386158395, "grad_norm": 1.6384170310287014, "learning_rate": 1.988276487842543e-05, "loss": 0.7532825469970703, "step": 1207 }, { "epoch": 0.19307919763446016, "grad_norm": 1.5066300974725904, "learning_rate": 1.9882361066049184e-05, "loss": 0.7280316352844238, "step": 1208 }, { "epoch": 0.19323903140733636, "grad_norm": 1.4356795266601192, "learning_rate": 1.9881956563523935e-05, "loss": 0.7127469778060913, "step": 1209 }, { "epoch": 0.19339886518021257, "grad_norm": 1.6076864033814486, "learning_rate": 1.988155137087793e-05, "loss": 0.743291974067688, "step": 1210 }, { "epoch": 0.19355869895308878, "grad_norm": 1.4120126758361888, "learning_rate": 1.988114548813946e-05, "loss": 0.6583206653594971, "step": 1211 }, { "epoch": 0.193718532725965, "grad_norm": 1.3884758886660864, "learning_rate": 1.988073891533688e-05, "loss": 0.7329360246658325, "step": 1212 }, { "epoch": 0.19387836649884121, "grad_norm": 1.281615839320217, "learning_rate": 1.9880331652498575e-05, "loss": 0.5590286254882812, "step": 1213 }, { "epoch": 0.19403820027171742, "grad_norm": 1.3996694611870701, "learning_rate": 1.987992369965299e-05, "loss": 0.8372070789337158, "step": 1214 }, { "epoch": 0.19419803404459363, "grad_norm": 1.4534615132949278, "learning_rate": 1.9879515056828615e-05, "loss": 0.6553635001182556, "step": 1215 }, { "epoch": 0.19435786781746983, "grad_norm": 1.4798717786652853, "learning_rate": 1.987910572405399e-05, "loss": 0.6765850782394409, "step": 1216 }, { "epoch": 0.19451770159034604, "grad_norm": 1.3388940327853684, "learning_rate": 1.9878695701357696e-05, "loss": 0.761551558971405, "step": 1217 }, { "epoch": 0.19467753536322224, "grad_norm": 1.504123421707415, "learning_rate": 1.9878284988768375e-05, "loss": 0.7428072094917297, "step": 1218 }, { "epoch": 0.19483736913609845, "grad_norm": 1.3830237407067658, "learning_rate": 1.9877873586314703e-05, "loss": 0.7106081247329712, "step": 1219 }, { "epoch": 0.19499720290897465, "grad_norm": 1.1270090242746211, "learning_rate": 1.9877461494025418e-05, "loss": 0.6757584810256958, "step": 1220 }, { "epoch": 0.1951570366818509, "grad_norm": 1.3074152878320098, "learning_rate": 1.987704871192929e-05, "loss": 0.7742687463760376, "step": 1221 }, { "epoch": 0.1953168704547271, "grad_norm": 1.2488746142631575, "learning_rate": 1.9876635240055152e-05, "loss": 0.7287697792053223, "step": 1222 }, { "epoch": 0.1954767042276033, "grad_norm": 1.8104208700154676, "learning_rate": 1.987622107843188e-05, "loss": 0.7872811555862427, "step": 1223 }, { "epoch": 0.1956365380004795, "grad_norm": 1.6600217455375563, "learning_rate": 1.987580622708839e-05, "loss": 0.7155132293701172, "step": 1224 }, { "epoch": 0.1957963717733557, "grad_norm": 1.3564249000624846, "learning_rate": 1.9875390686053667e-05, "loss": 0.6928200125694275, "step": 1225 }, { "epoch": 0.19595620554623192, "grad_norm": 1.5010913554821932, "learning_rate": 1.987497445535672e-05, "loss": 0.9728653430938721, "step": 1226 }, { "epoch": 0.19611603931910812, "grad_norm": 1.3799246590533774, "learning_rate": 1.9874557535026623e-05, "loss": 0.7945585250854492, "step": 1227 }, { "epoch": 0.19627587309198433, "grad_norm": 1.5511605103100776, "learning_rate": 1.9874139925092488e-05, "loss": 0.6815810203552246, "step": 1228 }, { "epoch": 0.19643570686486053, "grad_norm": 1.261930134367777, "learning_rate": 1.987372162558348e-05, "loss": 0.7460245490074158, "step": 1229 }, { "epoch": 0.19659554063773677, "grad_norm": 1.3538014818686261, "learning_rate": 1.9873302636528818e-05, "loss": 0.778649091720581, "step": 1230 }, { "epoch": 0.19675537441061297, "grad_norm": 1.3873808799441414, "learning_rate": 1.9872882957957754e-05, "loss": 0.7961175441741943, "step": 1231 }, { "epoch": 0.19691520818348918, "grad_norm": 1.5050800011619943, "learning_rate": 1.9872462589899602e-05, "loss": 0.7200673222541809, "step": 1232 }, { "epoch": 0.19707504195636538, "grad_norm": 1.430203464572746, "learning_rate": 1.9872041532383715e-05, "loss": 0.685125470161438, "step": 1233 }, { "epoch": 0.1972348757292416, "grad_norm": 1.3316208897084862, "learning_rate": 1.9871619785439503e-05, "loss": 0.7427598237991333, "step": 1234 }, { "epoch": 0.1973947095021178, "grad_norm": 1.4224290160943738, "learning_rate": 1.9871197349096415e-05, "loss": 0.7025827169418335, "step": 1235 }, { "epoch": 0.197554543274994, "grad_norm": 1.2904614837704667, "learning_rate": 1.9870774223383956e-05, "loss": 0.7399424314498901, "step": 1236 }, { "epoch": 0.1977143770478702, "grad_norm": 1.1753464626242511, "learning_rate": 1.9870350408331672e-05, "loss": 0.6029114723205566, "step": 1237 }, { "epoch": 0.1978742108207464, "grad_norm": 1.478773595638653, "learning_rate": 1.9869925903969163e-05, "loss": 0.7918970584869385, "step": 1238 }, { "epoch": 0.19803404459362264, "grad_norm": 1.3140398416732135, "learning_rate": 1.986950071032608e-05, "loss": 0.7230225801467896, "step": 1239 }, { "epoch": 0.19819387836649885, "grad_norm": 1.381665666553232, "learning_rate": 1.9869074827432105e-05, "loss": 0.7326455116271973, "step": 1240 }, { "epoch": 0.19835371213937505, "grad_norm": 1.471108622905422, "learning_rate": 1.986864825531699e-05, "loss": 0.7948825359344482, "step": 1241 }, { "epoch": 0.19851354591225126, "grad_norm": 1.3253894793628802, "learning_rate": 1.986822099401052e-05, "loss": 0.639298677444458, "step": 1242 }, { "epoch": 0.19867337968512747, "grad_norm": 1.4425735735109997, "learning_rate": 1.9867793043542532e-05, "loss": 0.6624086499214172, "step": 1243 }, { "epoch": 0.19883321345800367, "grad_norm": 1.272616990502735, "learning_rate": 1.9867364403942925e-05, "loss": 0.7413447499275208, "step": 1244 }, { "epoch": 0.19899304723087988, "grad_norm": 1.4172138071091107, "learning_rate": 1.986693507524162e-05, "loss": 0.8354438543319702, "step": 1245 }, { "epoch": 0.19915288100375608, "grad_norm": 1.3652742204021335, "learning_rate": 1.98665050574686e-05, "loss": 0.7379753589630127, "step": 1246 }, { "epoch": 0.19931271477663232, "grad_norm": 1.2336037430435927, "learning_rate": 1.986607435065391e-05, "loss": 0.6207895278930664, "step": 1247 }, { "epoch": 0.19947254854950852, "grad_norm": 1.3223454872427474, "learning_rate": 1.986564295482761e-05, "loss": 0.5829358100891113, "step": 1248 }, { "epoch": 0.19963238232238473, "grad_norm": 1.4413174395710475, "learning_rate": 1.986521087001984e-05, "loss": 0.9301465153694153, "step": 1249 }, { "epoch": 0.19979221609526093, "grad_norm": 1.4304153045245986, "learning_rate": 1.9864778096260774e-05, "loss": 0.639299750328064, "step": 1250 }, { "epoch": 0.19995204986813714, "grad_norm": 1.4255669347096958, "learning_rate": 1.9864344633580634e-05, "loss": 0.8621479272842407, "step": 1251 }, { "epoch": 0.20011188364101334, "grad_norm": 1.3738456780201207, "learning_rate": 1.9863910482009688e-05, "loss": 0.703559160232544, "step": 1252 }, { "epoch": 0.20027171741388955, "grad_norm": 1.396138806721998, "learning_rate": 1.9863475641578257e-05, "loss": 0.8717089891433716, "step": 1253 }, { "epoch": 0.20043155118676576, "grad_norm": 1.4233902329339903, "learning_rate": 1.9863040112316713e-05, "loss": 0.6799169778823853, "step": 1254 }, { "epoch": 0.20059138495964196, "grad_norm": 1.3048599647723336, "learning_rate": 1.9862603894255468e-05, "loss": 0.5686472654342651, "step": 1255 }, { "epoch": 0.2007512187325182, "grad_norm": 1.5332011103420813, "learning_rate": 1.9862166987424983e-05, "loss": 0.7781459093093872, "step": 1256 }, { "epoch": 0.2009110525053944, "grad_norm": 1.3465587528733627, "learning_rate": 1.986172939185578e-05, "loss": 0.605546236038208, "step": 1257 }, { "epoch": 0.2010708862782706, "grad_norm": 1.2735522526444292, "learning_rate": 1.986129110757841e-05, "loss": 0.6798164248466492, "step": 1258 }, { "epoch": 0.2012307200511468, "grad_norm": 1.4104103886605792, "learning_rate": 1.9860852134623487e-05, "loss": 0.7618591785430908, "step": 1259 }, { "epoch": 0.20139055382402302, "grad_norm": 1.6080297166936761, "learning_rate": 1.9860412473021656e-05, "loss": 0.6884853839874268, "step": 1260 }, { "epoch": 0.20155038759689922, "grad_norm": 1.2545157943320684, "learning_rate": 1.985997212280364e-05, "loss": 0.6320818662643433, "step": 1261 }, { "epoch": 0.20171022136977543, "grad_norm": 1.294802867030037, "learning_rate": 1.9859531084000173e-05, "loss": 0.7378035187721252, "step": 1262 }, { "epoch": 0.20187005514265163, "grad_norm": 1.4396131927015814, "learning_rate": 1.9859089356642068e-05, "loss": 0.7360035181045532, "step": 1263 }, { "epoch": 0.20202988891552784, "grad_norm": 1.4012035587665017, "learning_rate": 1.985864694076017e-05, "loss": 0.7020284533500671, "step": 1264 }, { "epoch": 0.20218972268840407, "grad_norm": 1.26984165632573, "learning_rate": 1.9858203836385374e-05, "loss": 0.6495577692985535, "step": 1265 }, { "epoch": 0.20234955646128028, "grad_norm": 1.2764445789932564, "learning_rate": 1.985776004354863e-05, "loss": 0.6263347864151001, "step": 1266 }, { "epoch": 0.20250939023415648, "grad_norm": 1.4739108103680072, "learning_rate": 1.9857315562280923e-05, "loss": 0.8263877630233765, "step": 1267 }, { "epoch": 0.2026692240070327, "grad_norm": 1.314237538722981, "learning_rate": 1.9856870392613297e-05, "loss": 0.6558773517608643, "step": 1268 }, { "epoch": 0.2028290577799089, "grad_norm": 1.4419220094811713, "learning_rate": 1.9856424534576843e-05, "loss": 0.8104687929153442, "step": 1269 }, { "epoch": 0.2029888915527851, "grad_norm": 1.4887366387257515, "learning_rate": 1.98559779882027e-05, "loss": 0.8065711259841919, "step": 1270 }, { "epoch": 0.2031487253256613, "grad_norm": 1.2929810788199774, "learning_rate": 1.9855530753522047e-05, "loss": 0.7365837693214417, "step": 1271 }, { "epoch": 0.2033085590985375, "grad_norm": 1.2044538090530823, "learning_rate": 1.9855082830566125e-05, "loss": 0.6979198455810547, "step": 1272 }, { "epoch": 0.20346839287141372, "grad_norm": 1.3888586933832172, "learning_rate": 1.9854634219366206e-05, "loss": 0.7299173474311829, "step": 1273 }, { "epoch": 0.20362822664428995, "grad_norm": 1.1856550602489215, "learning_rate": 1.985418491995363e-05, "loss": 0.6974408626556396, "step": 1274 }, { "epoch": 0.20378806041716616, "grad_norm": 1.3179084810955264, "learning_rate": 1.9853734932359766e-05, "loss": 0.7004963159561157, "step": 1275 }, { "epoch": 0.20394789419004236, "grad_norm": 1.4293528526157242, "learning_rate": 1.9853284256616043e-05, "loss": 0.6571593284606934, "step": 1276 }, { "epoch": 0.20410772796291857, "grad_norm": 1.3872217805886324, "learning_rate": 1.9852832892753934e-05, "loss": 0.6254069805145264, "step": 1277 }, { "epoch": 0.20426756173579477, "grad_norm": 1.393780470251697, "learning_rate": 1.985238084080496e-05, "loss": 0.7431704998016357, "step": 1278 }, { "epoch": 0.20442739550867098, "grad_norm": 1.2973416316164927, "learning_rate": 1.985192810080069e-05, "loss": 0.6334558725357056, "step": 1279 }, { "epoch": 0.20458722928154718, "grad_norm": 1.3630344815763369, "learning_rate": 1.9851474672772747e-05, "loss": 0.8103378415107727, "step": 1280 }, { "epoch": 0.2047470630544234, "grad_norm": 1.3304884495589624, "learning_rate": 1.9851020556752793e-05, "loss": 0.6945733428001404, "step": 1281 }, { "epoch": 0.2049068968272996, "grad_norm": 1.1772328716114941, "learning_rate": 1.985056575277254e-05, "loss": 0.6936560869216919, "step": 1282 }, { "epoch": 0.20506673060017583, "grad_norm": 1.18841215996349, "learning_rate": 1.985011026086375e-05, "loss": 0.6094295978546143, "step": 1283 }, { "epoch": 0.20522656437305203, "grad_norm": 1.2792075368753992, "learning_rate": 1.984965408105824e-05, "loss": 0.7048581838607788, "step": 1284 }, { "epoch": 0.20538639814592824, "grad_norm": 1.354589776145139, "learning_rate": 1.984919721338786e-05, "loss": 0.7590210437774658, "step": 1285 }, { "epoch": 0.20554623191880445, "grad_norm": 1.3742422307392188, "learning_rate": 1.9848739657884515e-05, "loss": 0.7275649905204773, "step": 1286 }, { "epoch": 0.20570606569168065, "grad_norm": 1.3766747398141839, "learning_rate": 1.9848281414580167e-05, "loss": 0.7433292865753174, "step": 1287 }, { "epoch": 0.20586589946455686, "grad_norm": 1.384230262055324, "learning_rate": 1.9847822483506813e-05, "loss": 0.6704058647155762, "step": 1288 }, { "epoch": 0.20602573323743306, "grad_norm": 1.3473392339471468, "learning_rate": 1.9847362864696503e-05, "loss": 0.6595830917358398, "step": 1289 }, { "epoch": 0.20618556701030927, "grad_norm": 1.5630705611557354, "learning_rate": 1.984690255818134e-05, "loss": 0.779539942741394, "step": 1290 }, { "epoch": 0.2063454007831855, "grad_norm": 1.3558418308807043, "learning_rate": 1.9846441563993465e-05, "loss": 0.7901930809020996, "step": 1291 }, { "epoch": 0.2065052345560617, "grad_norm": 1.4275820174323155, "learning_rate": 1.984597988216507e-05, "loss": 0.6753349304199219, "step": 1292 }, { "epoch": 0.2066650683289379, "grad_norm": 1.5179426605678907, "learning_rate": 1.9845517512728404e-05, "loss": 0.703722357749939, "step": 1293 }, { "epoch": 0.20682490210181412, "grad_norm": 1.4163290577740373, "learning_rate": 1.9845054455715752e-05, "loss": 0.7355129718780518, "step": 1294 }, { "epoch": 0.20698473587469032, "grad_norm": 1.46079262689329, "learning_rate": 1.9844590711159456e-05, "loss": 0.7062946557998657, "step": 1295 }, { "epoch": 0.20714456964756653, "grad_norm": 1.2178513030363844, "learning_rate": 1.9844126279091898e-05, "loss": 0.4897298812866211, "step": 1296 }, { "epoch": 0.20730440342044273, "grad_norm": 1.5945483274852992, "learning_rate": 1.9843661159545514e-05, "loss": 0.8866924047470093, "step": 1297 }, { "epoch": 0.20746423719331894, "grad_norm": 1.5552796679008394, "learning_rate": 1.984319535255279e-05, "loss": 0.6825282573699951, "step": 1298 }, { "epoch": 0.20762407096619515, "grad_norm": 1.3759210363575491, "learning_rate": 1.9842728858146256e-05, "loss": 0.6289255619049072, "step": 1299 }, { "epoch": 0.20778390473907138, "grad_norm": 1.2905185021935808, "learning_rate": 1.9842261676358483e-05, "loss": 0.678288459777832, "step": 1300 }, { "epoch": 0.20794373851194758, "grad_norm": 1.3510012938625284, "learning_rate": 1.9841793807222102e-05, "loss": 0.6516299247741699, "step": 1301 }, { "epoch": 0.2081035722848238, "grad_norm": 1.3253771045171803, "learning_rate": 1.984132525076979e-05, "loss": 0.6387859582901001, "step": 1302 }, { "epoch": 0.2082634060577, "grad_norm": 1.3246002382631479, "learning_rate": 1.9840856007034264e-05, "loss": 0.6948815584182739, "step": 1303 }, { "epoch": 0.2084232398305762, "grad_norm": 1.2807919582411602, "learning_rate": 1.9840386076048298e-05, "loss": 0.6660309433937073, "step": 1304 }, { "epoch": 0.2085830736034524, "grad_norm": 1.3993115071136566, "learning_rate": 1.983991545784471e-05, "loss": 0.7555513381958008, "step": 1305 }, { "epoch": 0.2087429073763286, "grad_norm": 1.237584147554765, "learning_rate": 1.9839444152456368e-05, "loss": 0.6254390478134155, "step": 1306 }, { "epoch": 0.20890274114920482, "grad_norm": 1.17321800740117, "learning_rate": 1.983897215991618e-05, "loss": 0.5459407567977905, "step": 1307 }, { "epoch": 0.20906257492208102, "grad_norm": 1.58591815844038, "learning_rate": 1.983849948025711e-05, "loss": 0.6434347629547119, "step": 1308 }, { "epoch": 0.20922240869495726, "grad_norm": 1.3457470845548478, "learning_rate": 1.9838026113512176e-05, "loss": 0.6474493741989136, "step": 1309 }, { "epoch": 0.20938224246783346, "grad_norm": 1.222086084724456, "learning_rate": 1.983755205971443e-05, "loss": 0.7394979596138, "step": 1310 }, { "epoch": 0.20954207624070967, "grad_norm": 1.1979106069222532, "learning_rate": 1.983707731889698e-05, "loss": 0.5842689275741577, "step": 1311 }, { "epoch": 0.20970191001358587, "grad_norm": 1.2228882152723508, "learning_rate": 1.9836601891092974e-05, "loss": 0.738640546798706, "step": 1312 }, { "epoch": 0.20986174378646208, "grad_norm": 1.429591641014631, "learning_rate": 1.983612577633562e-05, "loss": 0.6407420635223389, "step": 1313 }, { "epoch": 0.21002157755933828, "grad_norm": 1.333493460326361, "learning_rate": 1.983564897465817e-05, "loss": 0.7112857103347778, "step": 1314 }, { "epoch": 0.2101814113322145, "grad_norm": 1.1283050817757383, "learning_rate": 1.983517148609392e-05, "loss": 0.7495163083076477, "step": 1315 }, { "epoch": 0.2103412451050907, "grad_norm": 1.683902766813452, "learning_rate": 1.9834693310676214e-05, "loss": 0.8102997541427612, "step": 1316 }, { "epoch": 0.2105010788779669, "grad_norm": 1.3694454123082098, "learning_rate": 1.983421444843845e-05, "loss": 0.5760840177536011, "step": 1317 }, { "epoch": 0.21066091265084314, "grad_norm": 1.437796628767111, "learning_rate": 1.9833734899414067e-05, "loss": 0.7583373785018921, "step": 1318 }, { "epoch": 0.21082074642371934, "grad_norm": 1.5064855671212618, "learning_rate": 1.983325466363655e-05, "loss": 0.74403315782547, "step": 1319 }, { "epoch": 0.21098058019659555, "grad_norm": 1.3046707634866257, "learning_rate": 1.983277374113945e-05, "loss": 0.5845941305160522, "step": 1320 }, { "epoch": 0.21114041396947175, "grad_norm": 1.3203522807447434, "learning_rate": 1.9832292131956344e-05, "loss": 0.6312963962554932, "step": 1321 }, { "epoch": 0.21130024774234796, "grad_norm": 1.512187944718962, "learning_rate": 1.983180983612087e-05, "loss": 0.7462144494056702, "step": 1322 }, { "epoch": 0.21146008151522416, "grad_norm": 1.3173443512717016, "learning_rate": 1.98313268536667e-05, "loss": 0.7204917669296265, "step": 1323 }, { "epoch": 0.21161991528810037, "grad_norm": 1.5547719244152463, "learning_rate": 1.983084318462758e-05, "loss": 0.7514877915382385, "step": 1324 }, { "epoch": 0.21177974906097657, "grad_norm": 1.5193930170717038, "learning_rate": 1.9830358829037272e-05, "loss": 0.5743621587753296, "step": 1325 }, { "epoch": 0.21193958283385278, "grad_norm": 1.2646951873933145, "learning_rate": 1.982987378692961e-05, "loss": 0.6736361980438232, "step": 1326 }, { "epoch": 0.212099416606729, "grad_norm": 1.5002574071225498, "learning_rate": 1.982938805833847e-05, "loss": 0.6010693311691284, "step": 1327 }, { "epoch": 0.21225925037960522, "grad_norm": 1.2040673442138063, "learning_rate": 1.9828901643297768e-05, "loss": 0.6446272134780884, "step": 1328 }, { "epoch": 0.21241908415248142, "grad_norm": 1.3689312097161044, "learning_rate": 1.9828414541841474e-05, "loss": 0.8774561882019043, "step": 1329 }, { "epoch": 0.21257891792535763, "grad_norm": 1.2913940621549826, "learning_rate": 1.9827926754003608e-05, "loss": 0.6246641874313354, "step": 1330 }, { "epoch": 0.21273875169823384, "grad_norm": 1.318773623493509, "learning_rate": 1.9827438279818235e-05, "loss": 0.7197943925857544, "step": 1331 }, { "epoch": 0.21289858547111004, "grad_norm": 1.2156719523133153, "learning_rate": 1.9826949119319467e-05, "loss": 0.6055973172187805, "step": 1332 }, { "epoch": 0.21305841924398625, "grad_norm": 1.424133559416207, "learning_rate": 1.9826459272541462e-05, "loss": 0.7336833477020264, "step": 1333 }, { "epoch": 0.21321825301686245, "grad_norm": 1.4283410031818355, "learning_rate": 1.9825968739518434e-05, "loss": 0.7671831846237183, "step": 1334 }, { "epoch": 0.21337808678973866, "grad_norm": 1.5009574722328967, "learning_rate": 1.982547752028464e-05, "loss": 0.7161750793457031, "step": 1335 }, { "epoch": 0.2135379205626149, "grad_norm": 1.6015772443275524, "learning_rate": 1.9824985614874386e-05, "loss": 0.7644939422607422, "step": 1336 }, { "epoch": 0.2136977543354911, "grad_norm": 1.2468326032090868, "learning_rate": 1.982449302332202e-05, "loss": 0.5946794152259827, "step": 1337 }, { "epoch": 0.2138575881083673, "grad_norm": 1.3828844367575561, "learning_rate": 1.9823999745661947e-05, "loss": 0.7119749784469604, "step": 1338 }, { "epoch": 0.2140174218812435, "grad_norm": 1.6109712701405543, "learning_rate": 1.9823505781928614e-05, "loss": 0.7094067335128784, "step": 1339 }, { "epoch": 0.2141772556541197, "grad_norm": 1.1186736137922157, "learning_rate": 1.9823011132156516e-05, "loss": 0.5945286750793457, "step": 1340 }, { "epoch": 0.21433708942699592, "grad_norm": 1.3273667599429628, "learning_rate": 1.9822515796380204e-05, "loss": 0.7252005338668823, "step": 1341 }, { "epoch": 0.21449692319987212, "grad_norm": 1.5493755607117905, "learning_rate": 1.9822019774634263e-05, "loss": 0.6503769755363464, "step": 1342 }, { "epoch": 0.21465675697274833, "grad_norm": 1.4945347689594253, "learning_rate": 1.982152306695334e-05, "loss": 0.8383272886276245, "step": 1343 }, { "epoch": 0.21481659074562456, "grad_norm": 1.3517629811379557, "learning_rate": 1.9821025673372115e-05, "loss": 0.5533391237258911, "step": 1344 }, { "epoch": 0.21497642451850077, "grad_norm": 1.23973317121528, "learning_rate": 1.982052759392533e-05, "loss": 0.6199306845664978, "step": 1345 }, { "epoch": 0.21513625829137697, "grad_norm": 1.2085365288602143, "learning_rate": 1.9820028828647772e-05, "loss": 0.6535028219223022, "step": 1346 }, { "epoch": 0.21529609206425318, "grad_norm": 1.2082006600418302, "learning_rate": 1.9819529377574265e-05, "loss": 0.6253442764282227, "step": 1347 }, { "epoch": 0.2154559258371294, "grad_norm": 1.4556367213121353, "learning_rate": 1.9819029240739697e-05, "loss": 0.6708614826202393, "step": 1348 }, { "epoch": 0.2156157596100056, "grad_norm": 1.1628001829602048, "learning_rate": 1.9818528418178987e-05, "loss": 0.5892277956008911, "step": 1349 }, { "epoch": 0.2157755933828818, "grad_norm": 1.3475614230966495, "learning_rate": 1.981802690992712e-05, "loss": 0.6867453455924988, "step": 1350 }, { "epoch": 0.215935427155758, "grad_norm": 1.4256926970208539, "learning_rate": 1.9817524716019112e-05, "loss": 0.6828614473342896, "step": 1351 }, { "epoch": 0.2160952609286342, "grad_norm": 1.5628436679604976, "learning_rate": 1.981702183649004e-05, "loss": 0.6696404218673706, "step": 1352 }, { "epoch": 0.21625509470151044, "grad_norm": 1.3197608590144596, "learning_rate": 1.981651827137502e-05, "loss": 0.668402910232544, "step": 1353 }, { "epoch": 0.21641492847438665, "grad_norm": 1.3911873060386077, "learning_rate": 1.9816014020709217e-05, "loss": 0.6281450390815735, "step": 1354 }, { "epoch": 0.21657476224726285, "grad_norm": 1.3088860302915848, "learning_rate": 1.9815509084527852e-05, "loss": 0.7604254484176636, "step": 1355 }, { "epoch": 0.21673459602013906, "grad_norm": 1.4091525638933027, "learning_rate": 1.9815003462866186e-05, "loss": 0.6012227535247803, "step": 1356 }, { "epoch": 0.21689442979301526, "grad_norm": 1.9310123082554476, "learning_rate": 1.9814497155759527e-05, "loss": 0.7080192565917969, "step": 1357 }, { "epoch": 0.21705426356589147, "grad_norm": 1.40404754757062, "learning_rate": 1.9813990163243235e-05, "loss": 0.6913294792175293, "step": 1358 }, { "epoch": 0.21721409733876768, "grad_norm": 1.3645516445249657, "learning_rate": 1.9813482485352718e-05, "loss": 0.7439720630645752, "step": 1359 }, { "epoch": 0.21737393111164388, "grad_norm": 1.3670934659432832, "learning_rate": 1.9812974122123432e-05, "loss": 0.6457796096801758, "step": 1360 }, { "epoch": 0.2175337648845201, "grad_norm": 1.3010097950921702, "learning_rate": 1.9812465073590874e-05, "loss": 0.7400883436203003, "step": 1361 }, { "epoch": 0.21769359865739632, "grad_norm": 1.7121904700852717, "learning_rate": 1.9811955339790597e-05, "loss": 0.7822026014328003, "step": 1362 }, { "epoch": 0.21785343243027253, "grad_norm": 1.4877515949706255, "learning_rate": 1.98114449207582e-05, "loss": 0.8554891347885132, "step": 1363 }, { "epoch": 0.21801326620314873, "grad_norm": 1.3153072802956753, "learning_rate": 1.9810933816529326e-05, "loss": 0.7372326850891113, "step": 1364 }, { "epoch": 0.21817309997602494, "grad_norm": 1.5387451313914413, "learning_rate": 1.981042202713967e-05, "loss": 0.8676271438598633, "step": 1365 }, { "epoch": 0.21833293374890114, "grad_norm": 1.1990652064994656, "learning_rate": 1.980990955262498e-05, "loss": 0.7093222141265869, "step": 1366 }, { "epoch": 0.21849276752177735, "grad_norm": 1.6488420181142107, "learning_rate": 1.980939639302103e-05, "loss": 0.8306069374084473, "step": 1367 }, { "epoch": 0.21865260129465355, "grad_norm": 1.1809417594625193, "learning_rate": 1.9808882548363675e-05, "loss": 0.6525511741638184, "step": 1368 }, { "epoch": 0.21881243506752976, "grad_norm": 1.168642074037725, "learning_rate": 1.9808368018688787e-05, "loss": 0.7065588235855103, "step": 1369 }, { "epoch": 0.21897226884040596, "grad_norm": 1.3977664662150373, "learning_rate": 1.9807852804032306e-05, "loss": 0.7997630834579468, "step": 1370 }, { "epoch": 0.2191321026132822, "grad_norm": 1.4129855376716296, "learning_rate": 1.980733690443021e-05, "loss": 0.717704176902771, "step": 1371 }, { "epoch": 0.2192919363861584, "grad_norm": 1.248272249576425, "learning_rate": 1.9806820319918526e-05, "loss": 0.6906048059463501, "step": 1372 }, { "epoch": 0.2194517701590346, "grad_norm": 1.1968536881858167, "learning_rate": 1.9806303050533334e-05, "loss": 0.700929582118988, "step": 1373 }, { "epoch": 0.21961160393191081, "grad_norm": 1.41204143339676, "learning_rate": 1.980578509631076e-05, "loss": 0.7202860713005066, "step": 1374 }, { "epoch": 0.21977143770478702, "grad_norm": 1.312655769126824, "learning_rate": 1.980526645728697e-05, "loss": 0.6344460844993591, "step": 1375 }, { "epoch": 0.21993127147766323, "grad_norm": 1.2737088862884898, "learning_rate": 1.9804747133498185e-05, "loss": 0.6529449820518494, "step": 1376 }, { "epoch": 0.22009110525053943, "grad_norm": 1.213417690849843, "learning_rate": 1.9804227124980677e-05, "loss": 0.6059368848800659, "step": 1377 }, { "epoch": 0.22025093902341564, "grad_norm": 1.220259028490108, "learning_rate": 1.980370643177076e-05, "loss": 0.7258337736129761, "step": 1378 }, { "epoch": 0.22041077279629184, "grad_norm": 1.41654145218461, "learning_rate": 1.9803185053904793e-05, "loss": 0.7047377824783325, "step": 1379 }, { "epoch": 0.22057060656916808, "grad_norm": 1.4308242683206074, "learning_rate": 1.9802662991419192e-05, "loss": 0.6176800727844238, "step": 1380 }, { "epoch": 0.22073044034204428, "grad_norm": 1.301013432673452, "learning_rate": 1.9802140244350415e-05, "loss": 0.7061741948127747, "step": 1381 }, { "epoch": 0.2208902741149205, "grad_norm": 1.5344821178351415, "learning_rate": 1.9801616812734968e-05, "loss": 0.68290776014328, "step": 1382 }, { "epoch": 0.2210501078877967, "grad_norm": 1.7315641090428355, "learning_rate": 1.9801092696609407e-05, "loss": 0.8434194326400757, "step": 1383 }, { "epoch": 0.2212099416606729, "grad_norm": 1.3695237282106123, "learning_rate": 1.9800567896010335e-05, "loss": 0.7708989977836609, "step": 1384 }, { "epoch": 0.2213697754335491, "grad_norm": 1.4067012461180373, "learning_rate": 1.9800042410974398e-05, "loss": 0.7915180325508118, "step": 1385 }, { "epoch": 0.2215296092064253, "grad_norm": 1.5280402800022042, "learning_rate": 1.9799516241538295e-05, "loss": 0.6937187910079956, "step": 1386 }, { "epoch": 0.22168944297930152, "grad_norm": 1.362895156035701, "learning_rate": 1.9798989387738776e-05, "loss": 0.7566078901290894, "step": 1387 }, { "epoch": 0.22184927675217775, "grad_norm": 1.587502216840986, "learning_rate": 1.979846184961263e-05, "loss": 0.8235206604003906, "step": 1388 }, { "epoch": 0.22200911052505395, "grad_norm": 1.5076499669355088, "learning_rate": 1.9797933627196702e-05, "loss": 0.7972155213356018, "step": 1389 }, { "epoch": 0.22216894429793016, "grad_norm": 1.337319444110225, "learning_rate": 1.979740472052788e-05, "loss": 0.6400808095932007, "step": 1390 }, { "epoch": 0.22232877807080637, "grad_norm": 1.2784971410589168, "learning_rate": 1.97968751296431e-05, "loss": 0.7010924816131592, "step": 1391 }, { "epoch": 0.22248861184368257, "grad_norm": 1.4239128739783526, "learning_rate": 1.9796344854579346e-05, "loss": 0.6878113746643066, "step": 1392 }, { "epoch": 0.22264844561655878, "grad_norm": 1.3366737273233298, "learning_rate": 1.9795813895373653e-05, "loss": 0.5702048540115356, "step": 1393 }, { "epoch": 0.22280827938943498, "grad_norm": 1.2175560422866942, "learning_rate": 1.97952822520631e-05, "loss": 0.6681894063949585, "step": 1394 }, { "epoch": 0.2229681131623112, "grad_norm": 1.3625871083245262, "learning_rate": 1.9794749924684814e-05, "loss": 0.6245666742324829, "step": 1395 }, { "epoch": 0.2231279469351874, "grad_norm": 1.364496313013051, "learning_rate": 1.9794216913275973e-05, "loss": 0.6381571292877197, "step": 1396 }, { "epoch": 0.22328778070806363, "grad_norm": 1.3350681530018997, "learning_rate": 1.97936832178738e-05, "loss": 0.7915291786193848, "step": 1397 }, { "epoch": 0.22344761448093983, "grad_norm": 1.4248715805166172, "learning_rate": 1.9793148838515567e-05, "loss": 0.708747386932373, "step": 1398 }, { "epoch": 0.22360744825381604, "grad_norm": 1.397262693537426, "learning_rate": 1.979261377523859e-05, "loss": 0.691439688205719, "step": 1399 }, { "epoch": 0.22376728202669224, "grad_norm": 1.3939936258428733, "learning_rate": 1.979207802808024e-05, "loss": 0.7179487943649292, "step": 1400 }, { "epoch": 0.22392711579956845, "grad_norm": 1.5211684787945234, "learning_rate": 1.9791541597077926e-05, "loss": 0.811821699142456, "step": 1401 }, { "epoch": 0.22408694957244465, "grad_norm": 1.4378905442964318, "learning_rate": 1.979100448226912e-05, "loss": 0.7620618343353271, "step": 1402 }, { "epoch": 0.22424678334532086, "grad_norm": 1.4796187697631344, "learning_rate": 1.9790466683691324e-05, "loss": 0.7416234612464905, "step": 1403 }, { "epoch": 0.22440661711819707, "grad_norm": 1.459846286378065, "learning_rate": 1.97899282013821e-05, "loss": 0.6941238641738892, "step": 1404 }, { "epoch": 0.22456645089107327, "grad_norm": 1.380991867947981, "learning_rate": 1.978938903537905e-05, "loss": 0.6968086957931519, "step": 1405 }, { "epoch": 0.2247262846639495, "grad_norm": 1.3961744553762176, "learning_rate": 1.978884918571983e-05, "loss": 0.7770566940307617, "step": 1406 }, { "epoch": 0.2248861184368257, "grad_norm": 1.4904237040282262, "learning_rate": 1.9788308652442137e-05, "loss": 0.6526232361793518, "step": 1407 }, { "epoch": 0.22504595220970192, "grad_norm": 1.3861834618213051, "learning_rate": 1.978776743558373e-05, "loss": 0.7074083089828491, "step": 1408 }, { "epoch": 0.22520578598257812, "grad_norm": 1.2839420642322097, "learning_rate": 1.9787225535182397e-05, "loss": 0.7248469591140747, "step": 1409 }, { "epoch": 0.22536561975545433, "grad_norm": 1.0844064402944795, "learning_rate": 1.9786682951275983e-05, "loss": 0.6136362552642822, "step": 1410 }, { "epoch": 0.22552545352833053, "grad_norm": 1.4019846355284806, "learning_rate": 1.978613968390238e-05, "loss": 0.5342234373092651, "step": 1411 }, { "epoch": 0.22568528730120674, "grad_norm": 1.3704101034761869, "learning_rate": 1.9785595733099537e-05, "loss": 0.6830645799636841, "step": 1412 }, { "epoch": 0.22584512107408294, "grad_norm": 1.5304023876337545, "learning_rate": 1.978505109890543e-05, "loss": 0.7615850567817688, "step": 1413 }, { "epoch": 0.22600495484695915, "grad_norm": 1.401770185909509, "learning_rate": 1.9784505781358096e-05, "loss": 0.7548065185546875, "step": 1414 }, { "epoch": 0.22616478861983538, "grad_norm": 1.4928800894331482, "learning_rate": 1.9783959780495623e-05, "loss": 0.7315211296081543, "step": 1415 }, { "epoch": 0.2263246223927116, "grad_norm": 1.3840246447564406, "learning_rate": 1.978341309635614e-05, "loss": 0.735817551612854, "step": 1416 }, { "epoch": 0.2264844561655878, "grad_norm": 1.2107479083417032, "learning_rate": 1.9782865728977826e-05, "loss": 0.7010113596916199, "step": 1417 }, { "epoch": 0.226644289938464, "grad_norm": 1.3477598186533917, "learning_rate": 1.9782317678398905e-05, "loss": 0.7730410695075989, "step": 1418 }, { "epoch": 0.2268041237113402, "grad_norm": 1.405610784691655, "learning_rate": 1.978176894465765e-05, "loss": 0.6698378324508667, "step": 1419 }, { "epoch": 0.2269639574842164, "grad_norm": 1.4876459048398256, "learning_rate": 1.9781219527792387e-05, "loss": 0.7644287943840027, "step": 1420 }, { "epoch": 0.22712379125709262, "grad_norm": 1.503528686894615, "learning_rate": 1.9780669427841482e-05, "loss": 0.8123547434806824, "step": 1421 }, { "epoch": 0.22728362502996882, "grad_norm": 1.8521548509029737, "learning_rate": 1.9780118644843353e-05, "loss": 0.7187423706054688, "step": 1422 }, { "epoch": 0.22744345880284503, "grad_norm": 1.3678218829228541, "learning_rate": 1.9779567178836464e-05, "loss": 0.7010666728019714, "step": 1423 }, { "epoch": 0.22760329257572126, "grad_norm": 1.3726320408570447, "learning_rate": 1.9779015029859327e-05, "loss": 0.7575660943984985, "step": 1424 }, { "epoch": 0.22776312634859747, "grad_norm": 1.4235858490960374, "learning_rate": 1.977846219795051e-05, "loss": 0.765648603439331, "step": 1425 }, { "epoch": 0.22792296012147367, "grad_norm": 1.2936712636054999, "learning_rate": 1.9777908683148607e-05, "loss": 0.6524199843406677, "step": 1426 }, { "epoch": 0.22808279389434988, "grad_norm": 1.3430322197161293, "learning_rate": 1.977735448549228e-05, "loss": 0.7845069169998169, "step": 1427 }, { "epoch": 0.22824262766722608, "grad_norm": 1.3060170675654532, "learning_rate": 1.9776799605020236e-05, "loss": 0.7372259497642517, "step": 1428 }, { "epoch": 0.2284024614401023, "grad_norm": 1.448598393575071, "learning_rate": 1.9776244041771222e-05, "loss": 0.8302007913589478, "step": 1429 }, { "epoch": 0.2285622952129785, "grad_norm": 1.4707253821243487, "learning_rate": 1.977568779578404e-05, "loss": 0.772087812423706, "step": 1430 }, { "epoch": 0.2287221289858547, "grad_norm": 1.2482767470842777, "learning_rate": 1.977513086709753e-05, "loss": 0.7554785013198853, "step": 1431 }, { "epoch": 0.22888196275873093, "grad_norm": 1.698866065950161, "learning_rate": 1.977457325575059e-05, "loss": 0.5602648258209229, "step": 1432 }, { "epoch": 0.22904179653160714, "grad_norm": 1.409738151136477, "learning_rate": 1.977401496178216e-05, "loss": 0.7753527760505676, "step": 1433 }, { "epoch": 0.22920163030448334, "grad_norm": 1.4738241330638406, "learning_rate": 1.977345598523123e-05, "loss": 0.8351249098777771, "step": 1434 }, { "epoch": 0.22936146407735955, "grad_norm": 1.198863774099769, "learning_rate": 1.977289632613684e-05, "loss": 0.5526580810546875, "step": 1435 }, { "epoch": 0.22952129785023576, "grad_norm": 1.2762332453253413, "learning_rate": 1.977233598453807e-05, "loss": 0.7613028287887573, "step": 1436 }, { "epoch": 0.22968113162311196, "grad_norm": 1.205754102528273, "learning_rate": 1.9771774960474056e-05, "loss": 0.608036994934082, "step": 1437 }, { "epoch": 0.22984096539598817, "grad_norm": 1.363275806399939, "learning_rate": 1.977121325398397e-05, "loss": 0.6119382381439209, "step": 1438 }, { "epoch": 0.23000079916886437, "grad_norm": 1.4539636950509613, "learning_rate": 1.977065086510705e-05, "loss": 0.7501673698425293, "step": 1439 }, { "epoch": 0.23016063294174058, "grad_norm": 1.4269153922345632, "learning_rate": 1.977008779388257e-05, "loss": 0.7205017805099487, "step": 1440 }, { "epoch": 0.2303204667146168, "grad_norm": 1.4626061393481085, "learning_rate": 1.9769524040349844e-05, "loss": 0.7004523277282715, "step": 1441 }, { "epoch": 0.23048030048749302, "grad_norm": 1.3199234203747987, "learning_rate": 1.9768959604548253e-05, "loss": 0.6921421885490417, "step": 1442 }, { "epoch": 0.23064013426036922, "grad_norm": 1.273003944226971, "learning_rate": 1.9768394486517208e-05, "loss": 0.7366555333137512, "step": 1443 }, { "epoch": 0.23079996803324543, "grad_norm": 1.3957103511289972, "learning_rate": 1.9767828686296178e-05, "loss": 0.6983602046966553, "step": 1444 }, { "epoch": 0.23095980180612163, "grad_norm": 1.3593919239226162, "learning_rate": 1.9767262203924672e-05, "loss": 0.7562505006790161, "step": 1445 }, { "epoch": 0.23111963557899784, "grad_norm": 1.4485278857245416, "learning_rate": 1.976669503944226e-05, "loss": 0.6996701955795288, "step": 1446 }, { "epoch": 0.23127946935187405, "grad_norm": 1.1650123432529653, "learning_rate": 1.9766127192888543e-05, "loss": 0.5562084317207336, "step": 1447 }, { "epoch": 0.23143930312475025, "grad_norm": 1.4063623450372604, "learning_rate": 1.976555866430318e-05, "loss": 0.7308400273323059, "step": 1448 }, { "epoch": 0.23159913689762646, "grad_norm": 1.219542379269848, "learning_rate": 1.9764989453725874e-05, "loss": 0.6313849687576294, "step": 1449 }, { "epoch": 0.2317589706705027, "grad_norm": 1.3610271412498003, "learning_rate": 1.9764419561196382e-05, "loss": 0.5207926034927368, "step": 1450 }, { "epoch": 0.2319188044433789, "grad_norm": 1.4699902015966686, "learning_rate": 1.9763848986754495e-05, "loss": 0.7856417894363403, "step": 1451 }, { "epoch": 0.2320786382162551, "grad_norm": 1.316155633410428, "learning_rate": 1.9763277730440066e-05, "loss": 0.5867819786071777, "step": 1452 }, { "epoch": 0.2322384719891313, "grad_norm": 1.5754583391122283, "learning_rate": 1.9762705792292986e-05, "loss": 0.6897380948066711, "step": 1453 }, { "epoch": 0.2323983057620075, "grad_norm": 1.2481378575940187, "learning_rate": 1.97621331723532e-05, "loss": 0.7864158153533936, "step": 1454 }, { "epoch": 0.23255813953488372, "grad_norm": 1.4640119338083672, "learning_rate": 1.9761559870660693e-05, "loss": 0.7826555967330933, "step": 1455 }, { "epoch": 0.23271797330775992, "grad_norm": 1.5470321208482811, "learning_rate": 1.9760985887255504e-05, "loss": 0.7396694421768188, "step": 1456 }, { "epoch": 0.23287780708063613, "grad_norm": 1.4671400984737828, "learning_rate": 1.9760411222177726e-05, "loss": 0.6486523151397705, "step": 1457 }, { "epoch": 0.23303764085351233, "grad_norm": 1.2810708145864138, "learning_rate": 1.9759835875467483e-05, "loss": 0.6117856502532959, "step": 1458 }, { "epoch": 0.23319747462638857, "grad_norm": 1.3659663290561574, "learning_rate": 1.975925984716496e-05, "loss": 0.6987011432647705, "step": 1459 }, { "epoch": 0.23335730839926477, "grad_norm": 1.4258094127811731, "learning_rate": 1.9758683137310374e-05, "loss": 0.7038440704345703, "step": 1460 }, { "epoch": 0.23351714217214098, "grad_norm": 1.4544874827763847, "learning_rate": 1.9758105745944014e-05, "loss": 0.7783602476119995, "step": 1461 }, { "epoch": 0.23367697594501718, "grad_norm": 1.4049035313834806, "learning_rate": 1.9757527673106196e-05, "loss": 0.652577817440033, "step": 1462 }, { "epoch": 0.2338368097178934, "grad_norm": 1.5565544096219925, "learning_rate": 1.9756948918837294e-05, "loss": 0.8217934370040894, "step": 1463 }, { "epoch": 0.2339966434907696, "grad_norm": 1.2352597607007423, "learning_rate": 1.9756369483177722e-05, "loss": 0.6369031667709351, "step": 1464 }, { "epoch": 0.2341564772636458, "grad_norm": 1.418416064050854, "learning_rate": 1.9755789366167947e-05, "loss": 0.6844276785850525, "step": 1465 }, { "epoch": 0.234316311036522, "grad_norm": 1.2205412513976046, "learning_rate": 1.9755208567848484e-05, "loss": 0.6462321281433105, "step": 1466 }, { "epoch": 0.2344761448093982, "grad_norm": 1.2654438956029272, "learning_rate": 1.9754627088259894e-05, "loss": 0.6700056195259094, "step": 1467 }, { "epoch": 0.23463597858227445, "grad_norm": 1.4187909401534982, "learning_rate": 1.9754044927442782e-05, "loss": 0.6843726634979248, "step": 1468 }, { "epoch": 0.23479581235515065, "grad_norm": 1.6964633162715859, "learning_rate": 1.975346208543781e-05, "loss": 0.7429804801940918, "step": 1469 }, { "epoch": 0.23495564612802686, "grad_norm": 1.178089863601815, "learning_rate": 1.9752878562285676e-05, "loss": 0.6438348293304443, "step": 1470 }, { "epoch": 0.23511547990090306, "grad_norm": 1.6949369729321642, "learning_rate": 1.9752294358027133e-05, "loss": 0.751465916633606, "step": 1471 }, { "epoch": 0.23527531367377927, "grad_norm": 1.4903084037374623, "learning_rate": 1.9751709472702983e-05, "loss": 0.6774715185165405, "step": 1472 }, { "epoch": 0.23543514744665547, "grad_norm": 1.225603923184818, "learning_rate": 1.9751123906354067e-05, "loss": 0.5574661493301392, "step": 1473 }, { "epoch": 0.23559498121953168, "grad_norm": 1.4835952574356712, "learning_rate": 1.975053765902128e-05, "loss": 0.6940405368804932, "step": 1474 }, { "epoch": 0.23575481499240789, "grad_norm": 1.3847934620343936, "learning_rate": 1.9749950730745565e-05, "loss": 0.714057207107544, "step": 1475 }, { "epoch": 0.2359146487652841, "grad_norm": 1.2999961836413818, "learning_rate": 1.9749363121567913e-05, "loss": 0.7151186466217041, "step": 1476 }, { "epoch": 0.23607448253816032, "grad_norm": 1.46741752881831, "learning_rate": 1.974877483152936e-05, "loss": 0.746310293674469, "step": 1477 }, { "epoch": 0.23623431631103653, "grad_norm": 1.532290452473716, "learning_rate": 1.974818586067098e-05, "loss": 0.7298638224601746, "step": 1478 }, { "epoch": 0.23639415008391274, "grad_norm": 1.1496430132916005, "learning_rate": 1.974759620903392e-05, "loss": 0.6862781047821045, "step": 1479 }, { "epoch": 0.23655398385678894, "grad_norm": 1.2662531718730943, "learning_rate": 1.974700587665935e-05, "loss": 0.6673654317855835, "step": 1480 }, { "epoch": 0.23671381762966515, "grad_norm": 1.4828436786885049, "learning_rate": 1.97464148635885e-05, "loss": 0.7758495211601257, "step": 1481 }, { "epoch": 0.23687365140254135, "grad_norm": 1.444528643904843, "learning_rate": 1.9745823169862645e-05, "loss": 0.6963504552841187, "step": 1482 }, { "epoch": 0.23703348517541756, "grad_norm": 1.499814199881076, "learning_rate": 1.97452307955231e-05, "loss": 0.7819448709487915, "step": 1483 }, { "epoch": 0.23719331894829376, "grad_norm": 1.419096210014126, "learning_rate": 1.9744637740611242e-05, "loss": 0.814679741859436, "step": 1484 }, { "epoch": 0.23735315272117, "grad_norm": 1.212359982406446, "learning_rate": 1.9744044005168486e-05, "loss": 0.7085659503936768, "step": 1485 }, { "epoch": 0.2375129864940462, "grad_norm": 1.4367345407250804, "learning_rate": 1.9743449589236293e-05, "loss": 0.7697277069091797, "step": 1486 }, { "epoch": 0.2376728202669224, "grad_norm": 1.2467028422872326, "learning_rate": 1.9742854492856178e-05, "loss": 0.5868037939071655, "step": 1487 }, { "epoch": 0.2378326540397986, "grad_norm": 1.1822045697193702, "learning_rate": 1.97422587160697e-05, "loss": 0.6250841617584229, "step": 1488 }, { "epoch": 0.23799248781267482, "grad_norm": 1.4317641567907173, "learning_rate": 1.9741662258918467e-05, "loss": 0.6798715591430664, "step": 1489 }, { "epoch": 0.23815232158555102, "grad_norm": 1.4403009703530638, "learning_rate": 1.974106512144413e-05, "loss": 0.7897815704345703, "step": 1490 }, { "epoch": 0.23831215535842723, "grad_norm": 1.2539356448646983, "learning_rate": 1.9740467303688394e-05, "loss": 0.5161728858947754, "step": 1491 }, { "epoch": 0.23847198913130344, "grad_norm": 1.5111596146600492, "learning_rate": 1.973986880569301e-05, "loss": 0.8051695227622986, "step": 1492 }, { "epoch": 0.23863182290417964, "grad_norm": 1.235311281172091, "learning_rate": 1.9739269627499766e-05, "loss": 0.6701176166534424, "step": 1493 }, { "epoch": 0.23879165667705587, "grad_norm": 1.1925100683681684, "learning_rate": 1.9738669769150517e-05, "loss": 0.6484909057617188, "step": 1494 }, { "epoch": 0.23895149044993208, "grad_norm": 1.5489716639053452, "learning_rate": 1.973806923068715e-05, "loss": 0.6468074321746826, "step": 1495 }, { "epoch": 0.23911132422280829, "grad_norm": 1.2513878733413633, "learning_rate": 1.9737468012151607e-05, "loss": 0.6344358921051025, "step": 1496 }, { "epoch": 0.2392711579956845, "grad_norm": 1.1133737451090744, "learning_rate": 1.9736866113585872e-05, "loss": 0.5740172863006592, "step": 1497 }, { "epoch": 0.2394309917685607, "grad_norm": 1.3129355189261531, "learning_rate": 1.9736263535031985e-05, "loss": 0.6262648701667786, "step": 1498 }, { "epoch": 0.2395908255414369, "grad_norm": 1.5256074577338865, "learning_rate": 1.973566027653202e-05, "loss": 0.767176628112793, "step": 1499 }, { "epoch": 0.2397506593143131, "grad_norm": 1.315589919549915, "learning_rate": 1.9735056338128112e-05, "loss": 0.6243873834609985, "step": 1500 }, { "epoch": 0.2399104930871893, "grad_norm": 1.7234535810248444, "learning_rate": 1.9734451719862434e-05, "loss": 0.7948981523513794, "step": 1501 }, { "epoch": 0.24007032686006552, "grad_norm": 1.4194383057787123, "learning_rate": 1.9733846421777213e-05, "loss": 0.7267893552780151, "step": 1502 }, { "epoch": 0.24023016063294175, "grad_norm": 1.3761851254390354, "learning_rate": 1.973324044391472e-05, "loss": 0.7906004190444946, "step": 1503 }, { "epoch": 0.24038999440581796, "grad_norm": 1.4959271599107575, "learning_rate": 1.973263378631728e-05, "loss": 0.8793166279792786, "step": 1504 }, { "epoch": 0.24054982817869416, "grad_norm": 1.4079062259403383, "learning_rate": 1.9732026449027247e-05, "loss": 0.7288076877593994, "step": 1505 }, { "epoch": 0.24070966195157037, "grad_norm": 1.3510978053989549, "learning_rate": 1.9731418432087046e-05, "loss": 0.7646291255950928, "step": 1506 }, { "epoch": 0.24086949572444658, "grad_norm": 1.2505610996900232, "learning_rate": 1.9730809735539134e-05, "loss": 0.8066853880882263, "step": 1507 }, { "epoch": 0.24102932949732278, "grad_norm": 1.1316246650131387, "learning_rate": 1.9730200359426027e-05, "loss": 0.566561222076416, "step": 1508 }, { "epoch": 0.241189163270199, "grad_norm": 1.4731677858829058, "learning_rate": 1.9729590303790275e-05, "loss": 0.7987436056137085, "step": 1509 }, { "epoch": 0.2413489970430752, "grad_norm": 1.360285298409976, "learning_rate": 1.9728979568674478e-05, "loss": 0.7303330898284912, "step": 1510 }, { "epoch": 0.2415088308159514, "grad_norm": 1.3648364954012835, "learning_rate": 1.97283681541213e-05, "loss": 0.8333615064620972, "step": 1511 }, { "epoch": 0.24166866458882763, "grad_norm": 1.2736281327194023, "learning_rate": 1.9727756060173428e-05, "loss": 0.7218985557556152, "step": 1512 }, { "epoch": 0.24182849836170384, "grad_norm": 1.536937092677792, "learning_rate": 1.9727143286873617e-05, "loss": 0.5926717519760132, "step": 1513 }, { "epoch": 0.24198833213458004, "grad_norm": 1.429891361742943, "learning_rate": 1.9726529834264655e-05, "loss": 0.7787671089172363, "step": 1514 }, { "epoch": 0.24214816590745625, "grad_norm": 1.297876779717012, "learning_rate": 1.9725915702389388e-05, "loss": 0.6760900020599365, "step": 1515 }, { "epoch": 0.24230799968033245, "grad_norm": 1.4579997016486241, "learning_rate": 1.9725300891290702e-05, "loss": 0.6839381456375122, "step": 1516 }, { "epoch": 0.24246783345320866, "grad_norm": 1.2993390094183304, "learning_rate": 1.9724685401011533e-05, "loss": 0.7553150057792664, "step": 1517 }, { "epoch": 0.24262766722608486, "grad_norm": 1.4983324626498362, "learning_rate": 1.9724069231594866e-05, "loss": 0.8267725706100464, "step": 1518 }, { "epoch": 0.24278750099896107, "grad_norm": 1.38786778479545, "learning_rate": 1.9723452383083736e-05, "loss": 0.7053337097167969, "step": 1519 }, { "epoch": 0.24294733477183728, "grad_norm": 1.3829996567939076, "learning_rate": 1.972283485552121e-05, "loss": 0.6373224258422852, "step": 1520 }, { "epoch": 0.2431071685447135, "grad_norm": 1.552825722861561, "learning_rate": 1.9722216648950426e-05, "loss": 0.7589160799980164, "step": 1521 }, { "epoch": 0.24326700231758971, "grad_norm": 1.139738426353205, "learning_rate": 1.9721597763414552e-05, "loss": 0.6699864268302917, "step": 1522 }, { "epoch": 0.24342683609046592, "grad_norm": 1.3158405254419319, "learning_rate": 1.9720978198956807e-05, "loss": 0.6331802606582642, "step": 1523 }, { "epoch": 0.24358666986334213, "grad_norm": 1.247141514959625, "learning_rate": 1.9720357955620465e-05, "loss": 0.7600693106651306, "step": 1524 }, { "epoch": 0.24374650363621833, "grad_norm": 1.3345753860259943, "learning_rate": 1.9719737033448835e-05, "loss": 0.7265281677246094, "step": 1525 }, { "epoch": 0.24390633740909454, "grad_norm": 1.2605441490523392, "learning_rate": 1.9719115432485282e-05, "loss": 0.7328777313232422, "step": 1526 }, { "epoch": 0.24406617118197074, "grad_norm": 1.296277522845149, "learning_rate": 1.971849315277322e-05, "loss": 0.8103433847427368, "step": 1527 }, { "epoch": 0.24422600495484695, "grad_norm": 1.342128231333001, "learning_rate": 1.9717870194356105e-05, "loss": 0.6944591999053955, "step": 1528 }, { "epoch": 0.24438583872772318, "grad_norm": 1.0781199621114508, "learning_rate": 1.9717246557277437e-05, "loss": 0.6089047193527222, "step": 1529 }, { "epoch": 0.2445456725005994, "grad_norm": 1.398157925416954, "learning_rate": 1.971662224158078e-05, "loss": 0.6234419941902161, "step": 1530 }, { "epoch": 0.2447055062734756, "grad_norm": 1.2996456236978626, "learning_rate": 1.971599724730972e-05, "loss": 0.6591331958770752, "step": 1531 }, { "epoch": 0.2448653400463518, "grad_norm": 1.286529103754645, "learning_rate": 1.9715371574507915e-05, "loss": 0.6790498495101929, "step": 1532 }, { "epoch": 0.245025173819228, "grad_norm": 1.4059011580044305, "learning_rate": 1.9714745223219056e-05, "loss": 0.6558626890182495, "step": 1533 }, { "epoch": 0.2451850075921042, "grad_norm": 1.3143077762016417, "learning_rate": 1.9714118193486883e-05, "loss": 0.767539381980896, "step": 1534 }, { "epoch": 0.24534484136498041, "grad_norm": 1.408503054066658, "learning_rate": 1.9713490485355194e-05, "loss": 0.7004050016403198, "step": 1535 }, { "epoch": 0.24550467513785662, "grad_norm": 1.203000966776458, "learning_rate": 1.971286209886781e-05, "loss": 0.6651173830032349, "step": 1536 }, { "epoch": 0.24566450891073283, "grad_norm": 1.307811940699873, "learning_rate": 1.9712233034068635e-05, "loss": 0.654621958732605, "step": 1537 }, { "epoch": 0.24582434268360906, "grad_norm": 1.4484816173879136, "learning_rate": 1.9711603291001585e-05, "loss": 0.7956559658050537, "step": 1538 }, { "epoch": 0.24598417645648527, "grad_norm": 1.6610017572269986, "learning_rate": 1.9710972869710647e-05, "loss": 0.8360828757286072, "step": 1539 }, { "epoch": 0.24614401022936147, "grad_norm": 1.3026862899605127, "learning_rate": 1.9710341770239845e-05, "loss": 0.6559914350509644, "step": 1540 }, { "epoch": 0.24630384400223768, "grad_norm": 1.4360757090035159, "learning_rate": 1.970970999263325e-05, "loss": 0.6766985654830933, "step": 1541 }, { "epoch": 0.24646367777511388, "grad_norm": 1.2658555184939033, "learning_rate": 1.970907753693499e-05, "loss": 0.8297581672668457, "step": 1542 }, { "epoch": 0.2466235115479901, "grad_norm": 1.4607296242904388, "learning_rate": 1.9708444403189224e-05, "loss": 0.7198971509933472, "step": 1543 }, { "epoch": 0.2467833453208663, "grad_norm": 1.2500641848441427, "learning_rate": 1.9707810591440178e-05, "loss": 0.7011252641677856, "step": 1544 }, { "epoch": 0.2469431790937425, "grad_norm": 1.4527351215147994, "learning_rate": 1.9707176101732107e-05, "loss": 0.6545218229293823, "step": 1545 }, { "epoch": 0.2471030128666187, "grad_norm": 1.1227416302494904, "learning_rate": 1.9706540934109328e-05, "loss": 0.5012578964233398, "step": 1546 }, { "epoch": 0.24726284663949494, "grad_norm": 1.2571921189376065, "learning_rate": 1.9705905088616195e-05, "loss": 0.7132161855697632, "step": 1547 }, { "epoch": 0.24742268041237114, "grad_norm": 1.1746277590848522, "learning_rate": 1.9705268565297112e-05, "loss": 0.6761974096298218, "step": 1548 }, { "epoch": 0.24758251418524735, "grad_norm": 1.2959311294464329, "learning_rate": 1.9704631364196536e-05, "loss": 0.6574461460113525, "step": 1549 }, { "epoch": 0.24774234795812355, "grad_norm": 1.501273091133035, "learning_rate": 1.9703993485358963e-05, "loss": 0.822643518447876, "step": 1550 }, { "epoch": 0.24790218173099976, "grad_norm": 1.3478543756979444, "learning_rate": 1.9703354928828943e-05, "loss": 0.675794780254364, "step": 1551 }, { "epoch": 0.24806201550387597, "grad_norm": 1.3226537122036714, "learning_rate": 1.9702715694651067e-05, "loss": 0.5968230962753296, "step": 1552 }, { "epoch": 0.24822184927675217, "grad_norm": 1.329375764882718, "learning_rate": 1.9702075782869982e-05, "loss": 0.7713773250579834, "step": 1553 }, { "epoch": 0.24838168304962838, "grad_norm": 1.2845045285498105, "learning_rate": 1.970143519353037e-05, "loss": 0.6205570697784424, "step": 1554 }, { "epoch": 0.24854151682250458, "grad_norm": 1.6116468508094746, "learning_rate": 1.9700793926676972e-05, "loss": 0.9462058544158936, "step": 1555 }, { "epoch": 0.24870135059538082, "grad_norm": 1.1874348565934483, "learning_rate": 1.9700151982354573e-05, "loss": 0.6761001348495483, "step": 1556 }, { "epoch": 0.24886118436825702, "grad_norm": 1.2591260647822702, "learning_rate": 1.9699509360608002e-05, "loss": 0.6595338582992554, "step": 1557 }, { "epoch": 0.24902101814113323, "grad_norm": 1.4402328384005365, "learning_rate": 1.969886606148214e-05, "loss": 0.6294066905975342, "step": 1558 }, { "epoch": 0.24918085191400943, "grad_norm": 1.2841411100259212, "learning_rate": 1.969822208502191e-05, "loss": 0.760164201259613, "step": 1559 }, { "epoch": 0.24934068568688564, "grad_norm": 1.3533186782494073, "learning_rate": 1.969757743127228e-05, "loss": 0.7323276400566101, "step": 1560 }, { "epoch": 0.24950051945976184, "grad_norm": 1.353847283564294, "learning_rate": 1.969693210027828e-05, "loss": 0.6517594456672668, "step": 1561 }, { "epoch": 0.24966035323263805, "grad_norm": 1.4851343430085815, "learning_rate": 1.9696286092084975e-05, "loss": 0.7845602631568909, "step": 1562 }, { "epoch": 0.24982018700551425, "grad_norm": 1.2609470777329008, "learning_rate": 1.9695639406737478e-05, "loss": 0.759488046169281, "step": 1563 }, { "epoch": 0.24998002077839046, "grad_norm": 1.3625200083820967, "learning_rate": 1.969499204428095e-05, "loss": 0.7268551588058472, "step": 1564 }, { "epoch": 0.2501398545512667, "grad_norm": 1.3278290289188803, "learning_rate": 1.9694344004760602e-05, "loss": 0.700222373008728, "step": 1565 }, { "epoch": 0.2502996883241429, "grad_norm": 1.2659846279042712, "learning_rate": 1.9693695288221694e-05, "loss": 0.6777544617652893, "step": 1566 }, { "epoch": 0.2504595220970191, "grad_norm": 1.16399883669983, "learning_rate": 1.9693045894709524e-05, "loss": 0.6549234390258789, "step": 1567 }, { "epoch": 0.2506193558698953, "grad_norm": 1.2869430473616283, "learning_rate": 1.969239582426945e-05, "loss": 0.7561033964157104, "step": 1568 }, { "epoch": 0.2507791896427715, "grad_norm": 1.2659552046759732, "learning_rate": 1.969174507694686e-05, "loss": 0.7113642692565918, "step": 1569 }, { "epoch": 0.2509390234156477, "grad_norm": 1.604325763937548, "learning_rate": 1.969109365278721e-05, "loss": 0.5812934041023254, "step": 1570 }, { "epoch": 0.2510988571885239, "grad_norm": 1.3812309892047938, "learning_rate": 1.9690441551835994e-05, "loss": 0.645480751991272, "step": 1571 }, { "epoch": 0.25125869096140013, "grad_norm": 1.477609862086931, "learning_rate": 1.968978877413875e-05, "loss": 0.7515904307365417, "step": 1572 }, { "epoch": 0.25141852473427634, "grad_norm": 1.2699708257027682, "learning_rate": 1.9689135319741056e-05, "loss": 0.8324103355407715, "step": 1573 }, { "epoch": 0.25157835850715254, "grad_norm": 1.443815357188874, "learning_rate": 1.968848118868856e-05, "loss": 0.6741470098495483, "step": 1574 }, { "epoch": 0.25173819228002875, "grad_norm": 1.3846253538315265, "learning_rate": 1.968782638102694e-05, "loss": 0.6454814672470093, "step": 1575 }, { "epoch": 0.25189802605290496, "grad_norm": 1.3804163596349786, "learning_rate": 1.968717089680192e-05, "loss": 0.7931051254272461, "step": 1576 }, { "epoch": 0.25205785982578116, "grad_norm": 1.514268050216139, "learning_rate": 1.9686514736059285e-05, "loss": 0.6720920205116272, "step": 1577 }, { "epoch": 0.2522176935986574, "grad_norm": 1.323983908344112, "learning_rate": 1.9685857898844855e-05, "loss": 0.6611518263816833, "step": 1578 }, { "epoch": 0.2523775273715336, "grad_norm": 1.5825214488105825, "learning_rate": 1.96852003852045e-05, "loss": 0.6363953948020935, "step": 1579 }, { "epoch": 0.25253736114440983, "grad_norm": 1.4009993367533429, "learning_rate": 1.9684542195184142e-05, "loss": 0.6886722445487976, "step": 1580 }, { "epoch": 0.25269719491728604, "grad_norm": 1.3749622346228592, "learning_rate": 1.9683883328829744e-05, "loss": 0.6678167581558228, "step": 1581 }, { "epoch": 0.25285702869016224, "grad_norm": 1.5917519711395849, "learning_rate": 1.968322378618732e-05, "loss": 0.7219483852386475, "step": 1582 }, { "epoch": 0.25301686246303845, "grad_norm": 1.4249440202166967, "learning_rate": 1.9682563567302928e-05, "loss": 0.6508514881134033, "step": 1583 }, { "epoch": 0.25317669623591466, "grad_norm": 1.2721639177555002, "learning_rate": 1.9681902672222675e-05, "loss": 0.8733874559402466, "step": 1584 }, { "epoch": 0.25333653000879086, "grad_norm": 1.3790631835079878, "learning_rate": 1.968124110099272e-05, "loss": 0.77411949634552, "step": 1585 }, { "epoch": 0.25349636378166707, "grad_norm": 1.5419111584904113, "learning_rate": 1.9680578853659265e-05, "loss": 0.6930632591247559, "step": 1586 }, { "epoch": 0.2536561975545433, "grad_norm": 1.4846390285886992, "learning_rate": 1.9679915930268553e-05, "loss": 0.6258266568183899, "step": 1587 }, { "epoch": 0.2538160313274195, "grad_norm": 1.456262749819701, "learning_rate": 1.9679252330866883e-05, "loss": 0.8018940091133118, "step": 1588 }, { "epoch": 0.2539758651002957, "grad_norm": 1.5247950520092568, "learning_rate": 1.96785880555006e-05, "loss": 0.6687588691711426, "step": 1589 }, { "epoch": 0.2541356988731719, "grad_norm": 1.3085040230945921, "learning_rate": 1.967792310421609e-05, "loss": 0.6524564027786255, "step": 1590 }, { "epoch": 0.2542955326460481, "grad_norm": 1.5946928156233098, "learning_rate": 1.9677257477059802e-05, "loss": 0.9201748967170715, "step": 1591 }, { "epoch": 0.2544553664189243, "grad_norm": 1.395417502136074, "learning_rate": 1.967659117407821e-05, "loss": 0.7082047462463379, "step": 1592 }, { "epoch": 0.2546152001918005, "grad_norm": 1.2644872545354362, "learning_rate": 1.9675924195317847e-05, "loss": 0.6076304316520691, "step": 1593 }, { "epoch": 0.2547750339646767, "grad_norm": 1.269956665671109, "learning_rate": 1.9675256540825296e-05, "loss": 0.6120024919509888, "step": 1594 }, { "epoch": 0.254934867737553, "grad_norm": 1.2942396715843034, "learning_rate": 1.9674588210647184e-05, "loss": 0.7616837024688721, "step": 1595 }, { "epoch": 0.2550947015104292, "grad_norm": 1.3114708522674652, "learning_rate": 1.967391920483018e-05, "loss": 0.7411924600601196, "step": 1596 }, { "epoch": 0.2552545352833054, "grad_norm": 1.4565315169824347, "learning_rate": 1.9673249523421015e-05, "loss": 0.7987037897109985, "step": 1597 }, { "epoch": 0.2554143690561816, "grad_norm": 1.4622745300487108, "learning_rate": 1.9672579166466444e-05, "loss": 0.6989274621009827, "step": 1598 }, { "epoch": 0.2555742028290578, "grad_norm": 1.4260134691092616, "learning_rate": 1.9671908134013293e-05, "loss": 0.7098203897476196, "step": 1599 }, { "epoch": 0.255734036601934, "grad_norm": 1.2271413900199062, "learning_rate": 1.967123642610842e-05, "loss": 0.7331365346908569, "step": 1600 }, { "epoch": 0.2558938703748102, "grad_norm": 1.4438747761560093, "learning_rate": 1.9670564042798738e-05, "loss": 0.6981710195541382, "step": 1601 }, { "epoch": 0.2560537041476864, "grad_norm": 1.3837884374742588, "learning_rate": 1.9669890984131195e-05, "loss": 0.6234681606292725, "step": 1602 }, { "epoch": 0.2562135379205626, "grad_norm": 1.314936714129418, "learning_rate": 1.9669217250152805e-05, "loss": 0.6964513063430786, "step": 1603 }, { "epoch": 0.2563733716934388, "grad_norm": 1.2672987610825097, "learning_rate": 1.9668542840910615e-05, "loss": 0.5839202404022217, "step": 1604 }, { "epoch": 0.25653320546631503, "grad_norm": 1.225008039301722, "learning_rate": 1.9667867756451723e-05, "loss": 0.6441745162010193, "step": 1605 }, { "epoch": 0.25669303923919123, "grad_norm": 1.2725025961026504, "learning_rate": 1.9667191996823273e-05, "loss": 0.6470555067062378, "step": 1606 }, { "epoch": 0.25685287301206744, "grad_norm": 1.3175100954968044, "learning_rate": 1.9666515562072463e-05, "loss": 0.7032459378242493, "step": 1607 }, { "epoch": 0.25701270678494365, "grad_norm": 1.534191382060693, "learning_rate": 1.9665838452246528e-05, "loss": 0.8317437171936035, "step": 1608 }, { "epoch": 0.25717254055781985, "grad_norm": 1.2733309679726346, "learning_rate": 1.9665160667392756e-05, "loss": 0.5756364464759827, "step": 1609 }, { "epoch": 0.25733237433069606, "grad_norm": 1.2555286220477904, "learning_rate": 1.9664482207558483e-05, "loss": 0.7088730335235596, "step": 1610 }, { "epoch": 0.25749220810357226, "grad_norm": 1.8748861868981979, "learning_rate": 1.966380307279109e-05, "loss": 0.731549084186554, "step": 1611 }, { "epoch": 0.25765204187644847, "grad_norm": 1.3178522918242987, "learning_rate": 1.9663123263138003e-05, "loss": 0.6999553442001343, "step": 1612 }, { "epoch": 0.25781187564932473, "grad_norm": 1.3975008286227015, "learning_rate": 1.9662442778646697e-05, "loss": 0.6326725482940674, "step": 1613 }, { "epoch": 0.25797170942220093, "grad_norm": 1.247617413857823, "learning_rate": 1.96617616193647e-05, "loss": 0.7044098377227783, "step": 1614 }, { "epoch": 0.25813154319507714, "grad_norm": 1.5917024633517816, "learning_rate": 1.9661079785339577e-05, "loss": 0.6394310593605042, "step": 1615 }, { "epoch": 0.25829137696795335, "grad_norm": 1.551040783892071, "learning_rate": 1.9660397276618942e-05, "loss": 0.7173164486885071, "step": 1616 }, { "epoch": 0.25845121074082955, "grad_norm": 1.7149516231394815, "learning_rate": 1.9659714093250466e-05, "loss": 0.6946667432785034, "step": 1617 }, { "epoch": 0.25861104451370576, "grad_norm": 1.3807905149330726, "learning_rate": 1.9659030235281858e-05, "loss": 0.7541089057922363, "step": 1618 }, { "epoch": 0.25877087828658196, "grad_norm": 1.514077181539667, "learning_rate": 1.9658345702760876e-05, "loss": 0.5994028449058533, "step": 1619 }, { "epoch": 0.25893071205945817, "grad_norm": 1.2933365041034377, "learning_rate": 1.965766049573532e-05, "loss": 0.586869478225708, "step": 1620 }, { "epoch": 0.2590905458323344, "grad_norm": 1.4989148064881044, "learning_rate": 1.965697461425305e-05, "loss": 0.7351793050765991, "step": 1621 }, { "epoch": 0.2592503796052106, "grad_norm": 1.3334920994221682, "learning_rate": 1.9656288058361963e-05, "loss": 0.7333091497421265, "step": 1622 }, { "epoch": 0.2594102133780868, "grad_norm": 1.5741793487457008, "learning_rate": 1.9655600828110003e-05, "loss": 0.7011978626251221, "step": 1623 }, { "epoch": 0.259570047150963, "grad_norm": 1.5556591216490796, "learning_rate": 1.965491292354517e-05, "loss": 0.5750409364700317, "step": 1624 }, { "epoch": 0.2597298809238392, "grad_norm": 1.174407268526578, "learning_rate": 1.96542243447155e-05, "loss": 0.6376796960830688, "step": 1625 }, { "epoch": 0.2598897146967154, "grad_norm": 1.65229188953694, "learning_rate": 1.9653535091669077e-05, "loss": 0.7129979133605957, "step": 1626 }, { "epoch": 0.2600495484695916, "grad_norm": 1.2519928375943183, "learning_rate": 1.9652845164454044e-05, "loss": 0.7000892162322998, "step": 1627 }, { "epoch": 0.2602093822424678, "grad_norm": 1.379018010715368, "learning_rate": 1.9652154563118578e-05, "loss": 0.7106137275695801, "step": 1628 }, { "epoch": 0.260369216015344, "grad_norm": 1.466002278284771, "learning_rate": 1.965146328771091e-05, "loss": 0.7312677502632141, "step": 1629 }, { "epoch": 0.2605290497882202, "grad_norm": 1.3428784031656618, "learning_rate": 1.9650771338279317e-05, "loss": 0.662976861000061, "step": 1630 }, { "epoch": 0.2606888835610965, "grad_norm": 1.3069463358110842, "learning_rate": 1.965007871487212e-05, "loss": 0.7840933799743652, "step": 1631 }, { "epoch": 0.2608487173339727, "grad_norm": 1.7377068756026308, "learning_rate": 1.9649385417537692e-05, "loss": 0.5997519493103027, "step": 1632 }, { "epoch": 0.2610085511068489, "grad_norm": 1.6684395816414705, "learning_rate": 1.9648691446324448e-05, "loss": 0.7751398682594299, "step": 1633 }, { "epoch": 0.2611683848797251, "grad_norm": 1.2100324467326837, "learning_rate": 1.9647996801280854e-05, "loss": 0.5927606821060181, "step": 1634 }, { "epoch": 0.2613282186526013, "grad_norm": 1.3577184446732846, "learning_rate": 1.964730148245542e-05, "loss": 0.6446875333786011, "step": 1635 }, { "epoch": 0.2614880524254775, "grad_norm": 1.148901830965533, "learning_rate": 1.9646605489896707e-05, "loss": 0.7261097431182861, "step": 1636 }, { "epoch": 0.2616478861983537, "grad_norm": 1.227957841881305, "learning_rate": 1.964590882365332e-05, "loss": 0.6126832365989685, "step": 1637 }, { "epoch": 0.2618077199712299, "grad_norm": 1.174398348382907, "learning_rate": 1.9645211483773913e-05, "loss": 0.5395327806472778, "step": 1638 }, { "epoch": 0.26196755374410613, "grad_norm": 1.295882847731849, "learning_rate": 1.964451347030718e-05, "loss": 0.6139845848083496, "step": 1639 }, { "epoch": 0.26212738751698234, "grad_norm": 1.3532403041522538, "learning_rate": 1.964381478330187e-05, "loss": 0.645352840423584, "step": 1640 }, { "epoch": 0.26228722128985854, "grad_norm": 1.56096796336008, "learning_rate": 1.964311542280678e-05, "loss": 0.7135288715362549, "step": 1641 }, { "epoch": 0.26244705506273475, "grad_norm": 1.2217372977289087, "learning_rate": 1.9642415388870754e-05, "loss": 0.5582029223442078, "step": 1642 }, { "epoch": 0.26260688883561095, "grad_norm": 1.3674941193674164, "learning_rate": 1.964171468154267e-05, "loss": 0.7233625054359436, "step": 1643 }, { "epoch": 0.26276672260848716, "grad_norm": 1.5426783704302394, "learning_rate": 1.964101330087147e-05, "loss": 0.8595319986343384, "step": 1644 }, { "epoch": 0.26292655638136336, "grad_norm": 1.3631054757185443, "learning_rate": 1.9640311246906136e-05, "loss": 0.6204942464828491, "step": 1645 }, { "epoch": 0.26308639015423957, "grad_norm": 1.3328442967486986, "learning_rate": 1.963960851969569e-05, "loss": 0.6368395090103149, "step": 1646 }, { "epoch": 0.2632462239271158, "grad_norm": 1.535275352196398, "learning_rate": 1.9638905119289215e-05, "loss": 0.7410032749176025, "step": 1647 }, { "epoch": 0.26340605769999204, "grad_norm": 1.2361292552511316, "learning_rate": 1.9638201045735833e-05, "loss": 0.5779759883880615, "step": 1648 }, { "epoch": 0.26356589147286824, "grad_norm": 1.4159587817347479, "learning_rate": 1.9637496299084713e-05, "loss": 0.6503702402114868, "step": 1649 }, { "epoch": 0.26372572524574445, "grad_norm": 1.5684134680674737, "learning_rate": 1.9636790879385066e-05, "loss": 0.6921496391296387, "step": 1650 }, { "epoch": 0.26388555901862065, "grad_norm": 1.191725148886986, "learning_rate": 1.9636084786686172e-05, "loss": 0.6913032531738281, "step": 1651 }, { "epoch": 0.26404539279149686, "grad_norm": 1.4144668299914052, "learning_rate": 1.9635378021037325e-05, "loss": 0.6610362529754639, "step": 1652 }, { "epoch": 0.26420522656437306, "grad_norm": 1.3052050268342994, "learning_rate": 1.963467058248789e-05, "loss": 0.5618107914924622, "step": 1653 }, { "epoch": 0.26436506033724927, "grad_norm": 1.1891153769166865, "learning_rate": 1.9633962471087275e-05, "loss": 0.7532593607902527, "step": 1654 }, { "epoch": 0.2645248941101255, "grad_norm": 1.4726746117445946, "learning_rate": 1.9633253686884928e-05, "loss": 0.755352258682251, "step": 1655 }, { "epoch": 0.2646847278830017, "grad_norm": 1.245457904034842, "learning_rate": 1.963254422993035e-05, "loss": 0.5610772371292114, "step": 1656 }, { "epoch": 0.2648445616558779, "grad_norm": 1.6256862365466955, "learning_rate": 1.9631834100273082e-05, "loss": 0.7309484481811523, "step": 1657 }, { "epoch": 0.2650043954287541, "grad_norm": 1.1245862329801724, "learning_rate": 1.9631123297962723e-05, "loss": 0.6001459360122681, "step": 1658 }, { "epoch": 0.2651642292016303, "grad_norm": 1.2875296591886245, "learning_rate": 1.9630411823048912e-05, "loss": 0.8333244323730469, "step": 1659 }, { "epoch": 0.2653240629745065, "grad_norm": 1.3492178168120899, "learning_rate": 1.9629699675581332e-05, "loss": 0.8023163080215454, "step": 1660 }, { "epoch": 0.2654838967473827, "grad_norm": 1.2350871060611692, "learning_rate": 1.962898685560972e-05, "loss": 0.593690037727356, "step": 1661 }, { "epoch": 0.2656437305202589, "grad_norm": 2.357943410387117, "learning_rate": 1.9628273363183858e-05, "loss": 0.6380280256271362, "step": 1662 }, { "epoch": 0.2658035642931351, "grad_norm": 1.3261850844869905, "learning_rate": 1.962755919835357e-05, "loss": 0.5973507165908813, "step": 1663 }, { "epoch": 0.2659633980660113, "grad_norm": 1.0999081757593656, "learning_rate": 1.9626844361168733e-05, "loss": 0.5228163003921509, "step": 1664 }, { "epoch": 0.26612323183888753, "grad_norm": 1.184178847053964, "learning_rate": 1.962612885167927e-05, "loss": 0.6211236715316772, "step": 1665 }, { "epoch": 0.2662830656117638, "grad_norm": 1.4042974200425764, "learning_rate": 1.9625412669935146e-05, "loss": 0.8085689544677734, "step": 1666 }, { "epoch": 0.26644289938464, "grad_norm": 1.4075741941515187, "learning_rate": 1.9624695815986383e-05, "loss": 0.6266421675682068, "step": 1667 }, { "epoch": 0.2666027331575162, "grad_norm": 1.4565426077116577, "learning_rate": 1.9623978289883034e-05, "loss": 0.6725727319717407, "step": 1668 }, { "epoch": 0.2667625669303924, "grad_norm": 1.489423538833034, "learning_rate": 1.9623260091675216e-05, "loss": 0.6690022945404053, "step": 1669 }, { "epoch": 0.2669224007032686, "grad_norm": 1.2597839546679543, "learning_rate": 1.962254122141308e-05, "loss": 0.666772723197937, "step": 1670 }, { "epoch": 0.2670822344761448, "grad_norm": 1.445022758060651, "learning_rate": 1.9621821679146836e-05, "loss": 0.6236975193023682, "step": 1671 }, { "epoch": 0.267242068249021, "grad_norm": 1.3367179263788809, "learning_rate": 1.962110146492673e-05, "loss": 0.8341723680496216, "step": 1672 }, { "epoch": 0.26740190202189723, "grad_norm": 1.2597444118190106, "learning_rate": 1.962038057880306e-05, "loss": 0.5359981060028076, "step": 1673 }, { "epoch": 0.26756173579477344, "grad_norm": 1.2391303821715014, "learning_rate": 1.961965902082617e-05, "loss": 0.674939751625061, "step": 1674 }, { "epoch": 0.26772156956764964, "grad_norm": 1.6253921917514187, "learning_rate": 1.961893679104645e-05, "loss": 0.8418779969215393, "step": 1675 }, { "epoch": 0.26788140334052585, "grad_norm": 1.541113709788851, "learning_rate": 1.961821388951434e-05, "loss": 0.8186821341514587, "step": 1676 }, { "epoch": 0.26804123711340205, "grad_norm": 1.4132288967421682, "learning_rate": 1.961749031628032e-05, "loss": 0.5913512706756592, "step": 1677 }, { "epoch": 0.26820107088627826, "grad_norm": 1.353871784150506, "learning_rate": 1.9616766071394932e-05, "loss": 0.6854456663131714, "step": 1678 }, { "epoch": 0.26836090465915446, "grad_norm": 1.160993944301492, "learning_rate": 1.9616041154908745e-05, "loss": 0.5227072238922119, "step": 1679 }, { "epoch": 0.26852073843203067, "grad_norm": 1.3669011049704318, "learning_rate": 1.9615315566872388e-05, "loss": 0.7746537327766418, "step": 1680 }, { "epoch": 0.2686805722049069, "grad_norm": 1.2947031776828093, "learning_rate": 1.9614589307336533e-05, "loss": 0.6722708940505981, "step": 1681 }, { "epoch": 0.2688404059777831, "grad_norm": 1.4255228418315427, "learning_rate": 1.9613862376351902e-05, "loss": 0.6843346357345581, "step": 1682 }, { "epoch": 0.2690002397506593, "grad_norm": 1.3294054322047122, "learning_rate": 1.9613134773969257e-05, "loss": 0.7153303623199463, "step": 1683 }, { "epoch": 0.26916007352353555, "grad_norm": 1.3105593871508774, "learning_rate": 1.9612406500239415e-05, "loss": 0.7340210676193237, "step": 1684 }, { "epoch": 0.26931990729641175, "grad_norm": 1.4635161548263578, "learning_rate": 1.9611677555213235e-05, "loss": 0.8326092958450317, "step": 1685 }, { "epoch": 0.26947974106928796, "grad_norm": 1.4261396764229295, "learning_rate": 1.9610947938941623e-05, "loss": 0.7315328121185303, "step": 1686 }, { "epoch": 0.26963957484216416, "grad_norm": 1.3729363076492935, "learning_rate": 1.961021765147553e-05, "loss": 0.6234256625175476, "step": 1687 }, { "epoch": 0.26979940861504037, "grad_norm": 1.6396496470160835, "learning_rate": 1.9609486692865962e-05, "loss": 0.658631443977356, "step": 1688 }, { "epoch": 0.2699592423879166, "grad_norm": 1.7233857493017386, "learning_rate": 1.9608755063163963e-05, "loss": 0.8039625883102417, "step": 1689 }, { "epoch": 0.2701190761607928, "grad_norm": 1.283748724240632, "learning_rate": 1.9608022762420634e-05, "loss": 0.5473562479019165, "step": 1690 }, { "epoch": 0.270278909933669, "grad_norm": 1.3089495980097712, "learning_rate": 1.9607289790687104e-05, "loss": 0.6612862944602966, "step": 1691 }, { "epoch": 0.2704387437065452, "grad_norm": 1.1832789010899056, "learning_rate": 1.9606556148014572e-05, "loss": 0.6112768054008484, "step": 1692 }, { "epoch": 0.2705985774794214, "grad_norm": 1.5406115868016597, "learning_rate": 1.960582183445427e-05, "loss": 0.7427144646644592, "step": 1693 }, { "epoch": 0.2707584112522976, "grad_norm": 1.5971551129649249, "learning_rate": 1.9605086850057476e-05, "loss": 0.845478355884552, "step": 1694 }, { "epoch": 0.2709182450251738, "grad_norm": 1.3871874834922633, "learning_rate": 1.9604351194875523e-05, "loss": 0.6666054725646973, "step": 1695 }, { "epoch": 0.27107807879805, "grad_norm": 1.5934312793605792, "learning_rate": 1.9603614868959786e-05, "loss": 0.8560802936553955, "step": 1696 }, { "epoch": 0.2712379125709262, "grad_norm": 1.3827917371513256, "learning_rate": 1.9602877872361684e-05, "loss": 0.7584503293037415, "step": 1697 }, { "epoch": 0.2713977463438024, "grad_norm": 1.2857467589218379, "learning_rate": 1.960214020513269e-05, "loss": 0.638446033000946, "step": 1698 }, { "epoch": 0.27155758011667863, "grad_norm": 1.2866870225049147, "learning_rate": 1.9601401867324324e-05, "loss": 0.6865583062171936, "step": 1699 }, { "epoch": 0.27171741388955484, "grad_norm": 1.4623782401168826, "learning_rate": 1.960066285898814e-05, "loss": 0.8091509342193604, "step": 1700 }, { "epoch": 0.2718772476624311, "grad_norm": 1.6127271215154002, "learning_rate": 1.9599923180175747e-05, "loss": 0.7302850484848022, "step": 1701 }, { "epoch": 0.2720370814353073, "grad_norm": 1.2860726540384584, "learning_rate": 1.9599182830938814e-05, "loss": 0.6036243438720703, "step": 1702 }, { "epoch": 0.2721969152081835, "grad_norm": 1.2618386613694534, "learning_rate": 1.9598441811329032e-05, "loss": 0.7101293206214905, "step": 1703 }, { "epoch": 0.2723567489810597, "grad_norm": 1.3414701871372037, "learning_rate": 1.959770012139816e-05, "loss": 0.699309766292572, "step": 1704 }, { "epoch": 0.2725165827539359, "grad_norm": 1.219004851176218, "learning_rate": 1.9596957761197984e-05, "loss": 0.6157147288322449, "step": 1705 }, { "epoch": 0.2726764165268121, "grad_norm": 1.184537993463572, "learning_rate": 1.9596214730780357e-05, "loss": 0.6198773384094238, "step": 1706 }, { "epoch": 0.27283625029968833, "grad_norm": 1.2656841859752643, "learning_rate": 1.9595471030197165e-05, "loss": 0.5893261432647705, "step": 1707 }, { "epoch": 0.27299608407256454, "grad_norm": 1.279848942470761, "learning_rate": 1.9594726659500353e-05, "loss": 0.7415104508399963, "step": 1708 }, { "epoch": 0.27315591784544074, "grad_norm": 1.1660839138965795, "learning_rate": 1.9593981618741895e-05, "loss": 0.6659318208694458, "step": 1709 }, { "epoch": 0.27331575161831695, "grad_norm": 1.5727233358640387, "learning_rate": 1.959323590797383e-05, "loss": 0.6537609100341797, "step": 1710 }, { "epoch": 0.27347558539119315, "grad_norm": 1.1558739758087664, "learning_rate": 1.959248952724823e-05, "loss": 0.6140319108963013, "step": 1711 }, { "epoch": 0.27363541916406936, "grad_norm": 1.4607415457358368, "learning_rate": 1.9591742476617222e-05, "loss": 0.7112525701522827, "step": 1712 }, { "epoch": 0.27379525293694557, "grad_norm": 1.3884851269322527, "learning_rate": 1.959099475613298e-05, "loss": 0.7675604820251465, "step": 1713 }, { "epoch": 0.27395508670982177, "grad_norm": 1.30009604154473, "learning_rate": 1.959024636584772e-05, "loss": 0.5273449420928955, "step": 1714 }, { "epoch": 0.274114920482698, "grad_norm": 1.2008948011239158, "learning_rate": 1.95894973058137e-05, "loss": 0.6763789653778076, "step": 1715 }, { "epoch": 0.2742747542555742, "grad_norm": 1.7146215252003751, "learning_rate": 1.9588747576083245e-05, "loss": 0.7397695779800415, "step": 1716 }, { "epoch": 0.2744345880284504, "grad_norm": 1.3273706101940843, "learning_rate": 1.9587997176708703e-05, "loss": 0.6068428754806519, "step": 1717 }, { "epoch": 0.2745944218013266, "grad_norm": 1.4034201306597092, "learning_rate": 1.9587246107742483e-05, "loss": 0.6676723957061768, "step": 1718 }, { "epoch": 0.27475425557420285, "grad_norm": 1.3159959626126323, "learning_rate": 1.958649436923704e-05, "loss": 0.7274281978607178, "step": 1719 }, { "epoch": 0.27491408934707906, "grad_norm": 1.3079605544471993, "learning_rate": 1.9585741961244867e-05, "loss": 0.5320653915405273, "step": 1720 }, { "epoch": 0.27507392311995527, "grad_norm": 1.3131324970388005, "learning_rate": 1.958498888381851e-05, "loss": 0.8284043073654175, "step": 1721 }, { "epoch": 0.27523375689283147, "grad_norm": 1.5000452311130583, "learning_rate": 1.9584235137010568e-05, "loss": 0.8166561126708984, "step": 1722 }, { "epoch": 0.2753935906657077, "grad_norm": 1.3706017568074278, "learning_rate": 1.9583480720873673e-05, "loss": 0.8273227214813232, "step": 1723 }, { "epoch": 0.2755534244385839, "grad_norm": 1.5227385342602149, "learning_rate": 1.9582725635460512e-05, "loss": 0.8048650026321411, "step": 1724 }, { "epoch": 0.2757132582114601, "grad_norm": 1.362832268408294, "learning_rate": 1.958196988082382e-05, "loss": 0.8278433084487915, "step": 1725 }, { "epoch": 0.2758730919843363, "grad_norm": 1.3083690124598595, "learning_rate": 1.9581213457016376e-05, "loss": 0.8221151828765869, "step": 1726 }, { "epoch": 0.2760329257572125, "grad_norm": 1.4880535789108462, "learning_rate": 1.9580456364091003e-05, "loss": 0.6924199461936951, "step": 1727 }, { "epoch": 0.2761927595300887, "grad_norm": 1.252238578024794, "learning_rate": 1.9579698602100573e-05, "loss": 0.5813178420066833, "step": 1728 }, { "epoch": 0.2763525933029649, "grad_norm": 1.198848125342411, "learning_rate": 1.9578940171098012e-05, "loss": 0.6269388794898987, "step": 1729 }, { "epoch": 0.2765124270758411, "grad_norm": 1.2941729507241828, "learning_rate": 1.957818107113628e-05, "loss": 0.6284687519073486, "step": 1730 }, { "epoch": 0.2766722608487173, "grad_norm": 1.2519042750486273, "learning_rate": 1.9577421302268394e-05, "loss": 0.6489766836166382, "step": 1731 }, { "epoch": 0.2768320946215935, "grad_norm": 1.3597775688955491, "learning_rate": 1.957666086454741e-05, "loss": 0.7331215739250183, "step": 1732 }, { "epoch": 0.27699192839446973, "grad_norm": 1.354381145565243, "learning_rate": 1.9575899758026434e-05, "loss": 0.6497584581375122, "step": 1733 }, { "epoch": 0.27715176216734594, "grad_norm": 1.3462960782497397, "learning_rate": 1.957513798275862e-05, "loss": 0.7447936534881592, "step": 1734 }, { "epoch": 0.27731159594022214, "grad_norm": 1.5305798041365664, "learning_rate": 1.957437553879717e-05, "loss": 0.6761071681976318, "step": 1735 }, { "epoch": 0.2774714297130984, "grad_norm": 1.3539119009004912, "learning_rate": 1.9573612426195332e-05, "loss": 0.7770014405250549, "step": 1736 }, { "epoch": 0.2776312634859746, "grad_norm": 1.4362242240915883, "learning_rate": 1.9572848645006392e-05, "loss": 0.7385944724082947, "step": 1737 }, { "epoch": 0.2777910972588508, "grad_norm": 1.309824881317241, "learning_rate": 1.9572084195283695e-05, "loss": 0.6837446689605713, "step": 1738 }, { "epoch": 0.277950931031727, "grad_norm": 1.3439577007926107, "learning_rate": 1.957131907708062e-05, "loss": 0.582611083984375, "step": 1739 }, { "epoch": 0.2781107648046032, "grad_norm": 1.0671863606624878, "learning_rate": 1.9570553290450616e-05, "loss": 0.6181079149246216, "step": 1740 }, { "epoch": 0.27827059857747943, "grad_norm": 1.666508533827743, "learning_rate": 1.9569786835447148e-05, "loss": 0.7590829730033875, "step": 1741 }, { "epoch": 0.27843043235035564, "grad_norm": 1.4939415734514567, "learning_rate": 1.9569019712123744e-05, "loss": 0.7225114107131958, "step": 1742 }, { "epoch": 0.27859026612323184, "grad_norm": 1.45546940360152, "learning_rate": 1.9568251920533984e-05, "loss": 0.6541002988815308, "step": 1743 }, { "epoch": 0.27875009989610805, "grad_norm": 1.630132460228098, "learning_rate": 1.956748346073149e-05, "loss": 0.7156921029090881, "step": 1744 }, { "epoch": 0.27890993366898426, "grad_norm": 1.2650056381330803, "learning_rate": 1.9566714332769912e-05, "loss": 0.6616955995559692, "step": 1745 }, { "epoch": 0.27906976744186046, "grad_norm": 1.4737689193823753, "learning_rate": 1.956594453670298e-05, "loss": 0.7994748950004578, "step": 1746 }, { "epoch": 0.27922960121473667, "grad_norm": 1.1490863750389586, "learning_rate": 1.9565174072584448e-05, "loss": 0.6439602375030518, "step": 1747 }, { "epoch": 0.2793894349876129, "grad_norm": 1.3062458793141862, "learning_rate": 1.9564402940468122e-05, "loss": 0.5621670484542847, "step": 1748 }, { "epoch": 0.2795492687604891, "grad_norm": 1.410858227014757, "learning_rate": 1.9563631140407856e-05, "loss": 0.7604212760925293, "step": 1749 }, { "epoch": 0.2797091025333653, "grad_norm": 1.2234366628851117, "learning_rate": 1.9562858672457546e-05, "loss": 0.653132438659668, "step": 1750 }, { "epoch": 0.2798689363062415, "grad_norm": 1.4196498996977478, "learning_rate": 1.9562085536671147e-05, "loss": 0.5712625980377197, "step": 1751 }, { "epoch": 0.2800287700791177, "grad_norm": 1.6983554973675046, "learning_rate": 1.956131173310264e-05, "loss": 0.703083336353302, "step": 1752 }, { "epoch": 0.2801886038519939, "grad_norm": 1.421290008010928, "learning_rate": 1.9560537261806078e-05, "loss": 0.7451972961425781, "step": 1753 }, { "epoch": 0.28034843762487016, "grad_norm": 1.3569943792236931, "learning_rate": 1.9559762122835537e-05, "loss": 0.7720047235488892, "step": 1754 }, { "epoch": 0.28050827139774637, "grad_norm": 1.4048824429117766, "learning_rate": 1.9558986316245157e-05, "loss": 0.7219556570053101, "step": 1755 }, { "epoch": 0.2806681051706226, "grad_norm": 1.1425598278498172, "learning_rate": 1.955820984208911e-05, "loss": 0.6201022863388062, "step": 1756 }, { "epoch": 0.2808279389434988, "grad_norm": 1.260908473068997, "learning_rate": 1.955743270042163e-05, "loss": 0.7149145603179932, "step": 1757 }, { "epoch": 0.280987772716375, "grad_norm": 1.2886626470211275, "learning_rate": 1.9556654891296984e-05, "loss": 0.6557884216308594, "step": 1758 }, { "epoch": 0.2811476064892512, "grad_norm": 1.302857272430924, "learning_rate": 1.9555876414769498e-05, "loss": 0.6366511583328247, "step": 1759 }, { "epoch": 0.2813074402621274, "grad_norm": 1.3839086646184697, "learning_rate": 1.955509727089353e-05, "loss": 0.7340471744537354, "step": 1760 }, { "epoch": 0.2814672740350036, "grad_norm": 1.3701542655144419, "learning_rate": 1.9554317459723497e-05, "loss": 0.6337078809738159, "step": 1761 }, { "epoch": 0.2816271078078798, "grad_norm": 1.4266283608992452, "learning_rate": 1.955353698131386e-05, "loss": 0.7792196273803711, "step": 1762 }, { "epoch": 0.281786941580756, "grad_norm": 1.460043121152538, "learning_rate": 1.955275583571912e-05, "loss": 0.7564200162887573, "step": 1763 }, { "epoch": 0.2819467753536322, "grad_norm": 1.4579735398582867, "learning_rate": 1.9551974022993828e-05, "loss": 0.8418802618980408, "step": 1764 }, { "epoch": 0.2821066091265084, "grad_norm": 1.4237141415279067, "learning_rate": 1.9551191543192595e-05, "loss": 0.7078307271003723, "step": 1765 }, { "epoch": 0.28226644289938463, "grad_norm": 1.4030732435740223, "learning_rate": 1.9550408396370054e-05, "loss": 0.6138899326324463, "step": 1766 }, { "epoch": 0.28242627667226083, "grad_norm": 1.7077499814999353, "learning_rate": 1.9549624582580905e-05, "loss": 0.6245138645172119, "step": 1767 }, { "epoch": 0.28258611044513704, "grad_norm": 1.298907525548504, "learning_rate": 1.9548840101879877e-05, "loss": 0.6795047521591187, "step": 1768 }, { "epoch": 0.28274594421801325, "grad_norm": 1.504118297046421, "learning_rate": 1.9548054954321767e-05, "loss": 0.679441511631012, "step": 1769 }, { "epoch": 0.28290577799088945, "grad_norm": 1.1803160465624363, "learning_rate": 1.9547269139961404e-05, "loss": 0.6264669895172119, "step": 1770 }, { "epoch": 0.28306561176376566, "grad_norm": 1.3090919057854509, "learning_rate": 1.954648265885366e-05, "loss": 0.5919781923294067, "step": 1771 }, { "epoch": 0.2832254455366419, "grad_norm": 1.332215559436551, "learning_rate": 1.9545695511053468e-05, "loss": 0.6656942367553711, "step": 1772 }, { "epoch": 0.2833852793095181, "grad_norm": 1.4351584031143987, "learning_rate": 1.9544907696615792e-05, "loss": 0.7625677585601807, "step": 1773 }, { "epoch": 0.28354511308239433, "grad_norm": 1.5049974218504465, "learning_rate": 1.9544119215595655e-05, "loss": 0.6489413976669312, "step": 1774 }, { "epoch": 0.28370494685527053, "grad_norm": 1.2600928067827086, "learning_rate": 1.954333006804812e-05, "loss": 0.7035982608795166, "step": 1775 }, { "epoch": 0.28386478062814674, "grad_norm": 1.1957632018945221, "learning_rate": 1.9542540254028302e-05, "loss": 0.5922408103942871, "step": 1776 }, { "epoch": 0.28402461440102295, "grad_norm": 1.2787242646172734, "learning_rate": 1.9541749773591356e-05, "loss": 0.6625111103057861, "step": 1777 }, { "epoch": 0.28418444817389915, "grad_norm": 1.2311393339896688, "learning_rate": 1.9540958626792482e-05, "loss": 0.7213752269744873, "step": 1778 }, { "epoch": 0.28434428194677536, "grad_norm": 1.1506161539977038, "learning_rate": 1.954016681368694e-05, "loss": 0.7290177941322327, "step": 1779 }, { "epoch": 0.28450411571965156, "grad_norm": 1.6226586329328163, "learning_rate": 1.953937433433002e-05, "loss": 0.8377779722213745, "step": 1780 }, { "epoch": 0.28466394949252777, "grad_norm": 1.179345606832616, "learning_rate": 1.9538581188777072e-05, "loss": 0.6949566602706909, "step": 1781 }, { "epoch": 0.284823783265404, "grad_norm": 1.4050434725498202, "learning_rate": 1.9537787377083478e-05, "loss": 0.6224508285522461, "step": 1782 }, { "epoch": 0.2849836170382802, "grad_norm": 1.08702712580338, "learning_rate": 1.953699289930468e-05, "loss": 0.5167127251625061, "step": 1783 }, { "epoch": 0.2851434508111564, "grad_norm": 1.2162308664314876, "learning_rate": 1.9536197755496163e-05, "loss": 0.6748517751693726, "step": 1784 }, { "epoch": 0.2853032845840326, "grad_norm": 1.1977633710869775, "learning_rate": 1.953540194571346e-05, "loss": 0.6313768625259399, "step": 1785 }, { "epoch": 0.2854631183569088, "grad_norm": 1.3280911772987523, "learning_rate": 1.953460547001214e-05, "loss": 0.7224056720733643, "step": 1786 }, { "epoch": 0.285622952129785, "grad_norm": 1.1961359387706663, "learning_rate": 1.9533808328447828e-05, "loss": 0.7963544726371765, "step": 1787 }, { "epoch": 0.2857827859026612, "grad_norm": 1.5551483054137658, "learning_rate": 1.9533010521076195e-05, "loss": 0.7881929874420166, "step": 1788 }, { "epoch": 0.28594261967553747, "grad_norm": 1.2308584735469934, "learning_rate": 1.9532212047952953e-05, "loss": 0.5629903078079224, "step": 1789 }, { "epoch": 0.2861024534484137, "grad_norm": 1.5109508412616486, "learning_rate": 1.9531412909133873e-05, "loss": 0.6999092102050781, "step": 1790 }, { "epoch": 0.2862622872212899, "grad_norm": 1.1500754682741008, "learning_rate": 1.9530613104674757e-05, "loss": 0.6391503810882568, "step": 1791 }, { "epoch": 0.2864221209941661, "grad_norm": 1.1616238972205, "learning_rate": 1.9529812634631464e-05, "loss": 0.6037361025810242, "step": 1792 }, { "epoch": 0.2865819547670423, "grad_norm": 1.2516540258756808, "learning_rate": 1.9529011499059892e-05, "loss": 0.7537471055984497, "step": 1793 }, { "epoch": 0.2867417885399185, "grad_norm": 1.4692079260644209, "learning_rate": 1.9528209698015998e-05, "loss": 0.6820262670516968, "step": 1794 }, { "epoch": 0.2869016223127947, "grad_norm": 1.636933192796097, "learning_rate": 1.9527407231555763e-05, "loss": 0.6966941356658936, "step": 1795 }, { "epoch": 0.2870614560856709, "grad_norm": 1.127151065127273, "learning_rate": 1.952660409973524e-05, "loss": 0.6072083711624146, "step": 1796 }, { "epoch": 0.2872212898585471, "grad_norm": 1.424287021950837, "learning_rate": 1.952580030261052e-05, "loss": 0.650800883769989, "step": 1797 }, { "epoch": 0.2873811236314233, "grad_norm": 1.461547419285023, "learning_rate": 1.952499584023772e-05, "loss": 0.701573371887207, "step": 1798 }, { "epoch": 0.2875409574042995, "grad_norm": 1.290279952873007, "learning_rate": 1.952419071267304e-05, "loss": 0.7691897749900818, "step": 1799 }, { "epoch": 0.28770079117717573, "grad_norm": 1.3422448656728743, "learning_rate": 1.9523384919972692e-05, "loss": 0.5908514261245728, "step": 1800 }, { "epoch": 0.28786062495005194, "grad_norm": 1.306878398232105, "learning_rate": 1.9522578462192962e-05, "loss": 0.6140164136886597, "step": 1801 }, { "epoch": 0.28802045872292814, "grad_norm": 1.4548321630931074, "learning_rate": 1.9521771339390158e-05, "loss": 0.6340742111206055, "step": 1802 }, { "epoch": 0.28818029249580435, "grad_norm": 1.309942212012868, "learning_rate": 1.9520963551620656e-05, "loss": 0.6374989748001099, "step": 1803 }, { "epoch": 0.28834012626868055, "grad_norm": 1.556969344045787, "learning_rate": 1.952015509894087e-05, "loss": 0.8751993179321289, "step": 1804 }, { "epoch": 0.28849996004155676, "grad_norm": 1.4228117986320192, "learning_rate": 1.951934598140725e-05, "loss": 0.863433837890625, "step": 1805 }, { "epoch": 0.28865979381443296, "grad_norm": 1.3097766181062063, "learning_rate": 1.9518536199076315e-05, "loss": 0.6849066019058228, "step": 1806 }, { "epoch": 0.2888196275873092, "grad_norm": 1.4090146130961252, "learning_rate": 1.9517725752004605e-05, "loss": 0.6013676524162292, "step": 1807 }, { "epoch": 0.28897946136018543, "grad_norm": 1.4386288402793241, "learning_rate": 1.9516914640248723e-05, "loss": 0.6338294744491577, "step": 1808 }, { "epoch": 0.28913929513306164, "grad_norm": 1.323842513035754, "learning_rate": 1.9516102863865315e-05, "loss": 0.4566560387611389, "step": 1809 }, { "epoch": 0.28929912890593784, "grad_norm": 1.308260332449868, "learning_rate": 1.9515290422911074e-05, "loss": 0.6704435348510742, "step": 1810 }, { "epoch": 0.28945896267881405, "grad_norm": 1.6508127124398948, "learning_rate": 1.9514477317442736e-05, "loss": 0.670565128326416, "step": 1811 }, { "epoch": 0.28961879645169025, "grad_norm": 1.4505109751428493, "learning_rate": 1.9513663547517084e-05, "loss": 0.7277899980545044, "step": 1812 }, { "epoch": 0.28977863022456646, "grad_norm": 1.4254712023089753, "learning_rate": 1.9512849113190952e-05, "loss": 0.6750303506851196, "step": 1813 }, { "epoch": 0.28993846399744266, "grad_norm": 1.2088475451470817, "learning_rate": 1.9512034014521216e-05, "loss": 0.5213642120361328, "step": 1814 }, { "epoch": 0.29009829777031887, "grad_norm": 4.1219674626261025, "learning_rate": 1.9511218251564797e-05, "loss": 0.7581863403320312, "step": 1815 }, { "epoch": 0.2902581315431951, "grad_norm": 1.2214685640469904, "learning_rate": 1.951040182437867e-05, "loss": 0.5709383487701416, "step": 1816 }, { "epoch": 0.2904179653160713, "grad_norm": 1.268188328241926, "learning_rate": 1.950958473301985e-05, "loss": 0.653376042842865, "step": 1817 }, { "epoch": 0.2905777990889475, "grad_norm": 1.5109048542612453, "learning_rate": 1.9508766977545395e-05, "loss": 0.7004082202911377, "step": 1818 }, { "epoch": 0.2907376328618237, "grad_norm": 1.309485030498757, "learning_rate": 1.950794855801242e-05, "loss": 0.7100225687026978, "step": 1819 }, { "epoch": 0.2908974666346999, "grad_norm": 1.5284141679517262, "learning_rate": 1.9507129474478076e-05, "loss": 0.5557228326797485, "step": 1820 }, { "epoch": 0.2910573004075761, "grad_norm": 1.5070328050298978, "learning_rate": 1.950630972699957e-05, "loss": 0.7521004676818848, "step": 1821 }, { "epoch": 0.2912171341804523, "grad_norm": 1.4205229512792983, "learning_rate": 1.9505489315634144e-05, "loss": 0.7444958686828613, "step": 1822 }, { "epoch": 0.2913769679533285, "grad_norm": 1.4072507294413215, "learning_rate": 1.9504668240439097e-05, "loss": 0.7411206960678101, "step": 1823 }, { "epoch": 0.2915368017262047, "grad_norm": 1.4971706305163577, "learning_rate": 1.950384650147177e-05, "loss": 0.6899110078811646, "step": 1824 }, { "epoch": 0.291696635499081, "grad_norm": 1.3919437389618388, "learning_rate": 1.950302409878955e-05, "loss": 0.6088758707046509, "step": 1825 }, { "epoch": 0.2918564692719572, "grad_norm": 1.4955664228005456, "learning_rate": 1.9502201032449867e-05, "loss": 0.6791123151779175, "step": 1826 }, { "epoch": 0.2920163030448334, "grad_norm": 1.3982342232672105, "learning_rate": 1.9501377302510204e-05, "loss": 0.7344762086868286, "step": 1827 }, { "epoch": 0.2921761368177096, "grad_norm": 0.9988396257126179, "learning_rate": 1.950055290902809e-05, "loss": 0.512244462966919, "step": 1828 }, { "epoch": 0.2923359705905858, "grad_norm": 1.3765529567024855, "learning_rate": 1.9499727852061094e-05, "loss": 0.828218936920166, "step": 1829 }, { "epoch": 0.292495804363462, "grad_norm": 1.181892997713389, "learning_rate": 1.9498902131666836e-05, "loss": 0.6462881565093994, "step": 1830 }, { "epoch": 0.2926556381363382, "grad_norm": 1.1923014564244359, "learning_rate": 1.949807574790298e-05, "loss": 0.63648921251297, "step": 1831 }, { "epoch": 0.2928154719092144, "grad_norm": 1.1642144064105062, "learning_rate": 1.949724870082724e-05, "loss": 0.619259238243103, "step": 1832 }, { "epoch": 0.2929753056820906, "grad_norm": 1.3129049481850585, "learning_rate": 1.9496420990497374e-05, "loss": 0.6138759851455688, "step": 1833 }, { "epoch": 0.29313513945496683, "grad_norm": 1.337644332559258, "learning_rate": 1.9495592616971187e-05, "loss": 0.613968014717102, "step": 1834 }, { "epoch": 0.29329497322784304, "grad_norm": 1.3128093944311396, "learning_rate": 1.9494763580306525e-05, "loss": 0.6950427293777466, "step": 1835 }, { "epoch": 0.29345480700071924, "grad_norm": 1.3480457975394156, "learning_rate": 1.949393388056129e-05, "loss": 0.7157516479492188, "step": 1836 }, { "epoch": 0.29361464077359545, "grad_norm": 1.368799559285294, "learning_rate": 1.9493103517793422e-05, "loss": 0.5928688049316406, "step": 1837 }, { "epoch": 0.29377447454647165, "grad_norm": 1.420677734877478, "learning_rate": 1.949227249206091e-05, "loss": 0.8103125095367432, "step": 1838 }, { "epoch": 0.29393430831934786, "grad_norm": 1.3962799926350782, "learning_rate": 1.9491440803421796e-05, "loss": 0.7429225444793701, "step": 1839 }, { "epoch": 0.29409414209222406, "grad_norm": 1.383933850112301, "learning_rate": 1.9490608451934156e-05, "loss": 0.602365255355835, "step": 1840 }, { "epoch": 0.29425397586510027, "grad_norm": 1.352933857884403, "learning_rate": 1.9489775437656126e-05, "loss": 0.74156254529953, "step": 1841 }, { "epoch": 0.29441380963797653, "grad_norm": 1.3463246959579258, "learning_rate": 1.9488941760645867e-05, "loss": 0.6792944669723511, "step": 1842 }, { "epoch": 0.29457364341085274, "grad_norm": 1.3234024521633247, "learning_rate": 1.9488107420961612e-05, "loss": 0.5533865690231323, "step": 1843 }, { "epoch": 0.29473347718372894, "grad_norm": 1.6565281446008755, "learning_rate": 1.9487272418661625e-05, "loss": 0.7153151035308838, "step": 1844 }, { "epoch": 0.29489331095660515, "grad_norm": 1.1368909507014986, "learning_rate": 1.9486436753804217e-05, "loss": 0.5894697308540344, "step": 1845 }, { "epoch": 0.29505314472948135, "grad_norm": 1.3139133863352699, "learning_rate": 1.9485600426447755e-05, "loss": 0.7484534382820129, "step": 1846 }, { "epoch": 0.29521297850235756, "grad_norm": 1.3052096110243032, "learning_rate": 1.9484763436650637e-05, "loss": 0.6697699427604675, "step": 1847 }, { "epoch": 0.29537281227523376, "grad_norm": 1.4023610213011826, "learning_rate": 1.9483925784471317e-05, "loss": 0.6351062655448914, "step": 1848 }, { "epoch": 0.29553264604810997, "grad_norm": 1.5151194894371318, "learning_rate": 1.9483087469968297e-05, "loss": 0.6866735816001892, "step": 1849 }, { "epoch": 0.2956924798209862, "grad_norm": 1.2984026424180153, "learning_rate": 1.9482248493200125e-05, "loss": 0.7104238271713257, "step": 1850 }, { "epoch": 0.2958523135938624, "grad_norm": 1.3538617576415795, "learning_rate": 1.948140885422538e-05, "loss": 0.762339174747467, "step": 1851 }, { "epoch": 0.2960121473667386, "grad_norm": 1.110887823419646, "learning_rate": 1.948056855310271e-05, "loss": 0.6939602494239807, "step": 1852 }, { "epoch": 0.2961719811396148, "grad_norm": 1.3836467657828113, "learning_rate": 1.9479727589890797e-05, "loss": 0.637076735496521, "step": 1853 }, { "epoch": 0.296331814912491, "grad_norm": 1.2157785740116114, "learning_rate": 1.9478885964648368e-05, "loss": 0.6752113103866577, "step": 1854 }, { "epoch": 0.2964916486853672, "grad_norm": 1.4612724772833545, "learning_rate": 1.94780436774342e-05, "loss": 0.6209604740142822, "step": 1855 }, { "epoch": 0.2966514824582434, "grad_norm": 1.3094227539252463, "learning_rate": 1.9477200728307118e-05, "loss": 0.7744503021240234, "step": 1856 }, { "epoch": 0.2968113162311196, "grad_norm": 1.277816826023472, "learning_rate": 1.9476357117325984e-05, "loss": 0.6097621917724609, "step": 1857 }, { "epoch": 0.2969711500039958, "grad_norm": 1.3896854778805807, "learning_rate": 1.947551284454972e-05, "loss": 0.5889319181442261, "step": 1858 }, { "epoch": 0.297130983776872, "grad_norm": 1.2677306978127492, "learning_rate": 1.9474667910037282e-05, "loss": 0.7025108337402344, "step": 1859 }, { "epoch": 0.2972908175497483, "grad_norm": 1.2092749411529584, "learning_rate": 1.9473822313847682e-05, "loss": 0.5977394580841064, "step": 1860 }, { "epoch": 0.2974506513226245, "grad_norm": 1.386663554451007, "learning_rate": 1.9472976056039973e-05, "loss": 0.6916562914848328, "step": 1861 }, { "epoch": 0.2976104850955007, "grad_norm": 1.2783555756003064, "learning_rate": 1.9472129136673247e-05, "loss": 0.7871260643005371, "step": 1862 }, { "epoch": 0.2977703188683769, "grad_norm": 1.2358779820357315, "learning_rate": 1.947128155580666e-05, "loss": 0.6613223552703857, "step": 1863 }, { "epoch": 0.2979301526412531, "grad_norm": 1.679026717925079, "learning_rate": 1.9470433313499395e-05, "loss": 0.8294663429260254, "step": 1864 }, { "epoch": 0.2980899864141293, "grad_norm": 1.1377276861386103, "learning_rate": 1.9469584409810698e-05, "loss": 0.6664983034133911, "step": 1865 }, { "epoch": 0.2982498201870055, "grad_norm": 1.4891020833977555, "learning_rate": 1.946873484479985e-05, "loss": 0.7021251320838928, "step": 1866 }, { "epoch": 0.2984096539598817, "grad_norm": 1.497385430930107, "learning_rate": 1.946788461852618e-05, "loss": 0.6771948337554932, "step": 1867 }, { "epoch": 0.29856948773275793, "grad_norm": 1.3997861834839265, "learning_rate": 1.9467033731049067e-05, "loss": 0.719413697719574, "step": 1868 }, { "epoch": 0.29872932150563414, "grad_norm": 1.268216247328872, "learning_rate": 1.9466182182427933e-05, "loss": 0.6188986301422119, "step": 1869 }, { "epoch": 0.29888915527851034, "grad_norm": 1.33150606595723, "learning_rate": 1.946532997272225e-05, "loss": 0.6581360101699829, "step": 1870 }, { "epoch": 0.29904898905138655, "grad_norm": 1.7556655108951382, "learning_rate": 1.9464477101991525e-05, "loss": 0.7741925716400146, "step": 1871 }, { "epoch": 0.29920882282426275, "grad_norm": 1.4580189039721383, "learning_rate": 1.946362357029533e-05, "loss": 0.6435893177986145, "step": 1872 }, { "epoch": 0.29936865659713896, "grad_norm": 1.2430122303009736, "learning_rate": 1.9462769377693266e-05, "loss": 0.6853729486465454, "step": 1873 }, { "epoch": 0.29952849037001517, "grad_norm": 1.3120296650152272, "learning_rate": 1.946191452424499e-05, "loss": 0.6750870943069458, "step": 1874 }, { "epoch": 0.29968832414289137, "grad_norm": 1.4318996420036674, "learning_rate": 1.94610590100102e-05, "loss": 0.7226662635803223, "step": 1875 }, { "epoch": 0.2998481579157676, "grad_norm": 1.396722356275221, "learning_rate": 1.9460202835048643e-05, "loss": 0.6151450276374817, "step": 1876 }, { "epoch": 0.30000799168864384, "grad_norm": 1.319655323144409, "learning_rate": 1.9459345999420112e-05, "loss": 0.686818540096283, "step": 1877 }, { "epoch": 0.30016782546152004, "grad_norm": 1.513727064321595, "learning_rate": 1.945848850318444e-05, "loss": 0.6663897633552551, "step": 1878 }, { "epoch": 0.30032765923439625, "grad_norm": 1.3606270832863576, "learning_rate": 1.945763034640152e-05, "loss": 0.690800666809082, "step": 1879 }, { "epoch": 0.30048749300727245, "grad_norm": 1.732122988632467, "learning_rate": 1.9456771529131278e-05, "loss": 0.8697131276130676, "step": 1880 }, { "epoch": 0.30064732678014866, "grad_norm": 1.416855361637994, "learning_rate": 1.945591205143369e-05, "loss": 0.7360158562660217, "step": 1881 }, { "epoch": 0.30080716055302487, "grad_norm": 1.5993100020673237, "learning_rate": 1.945505191336878e-05, "loss": 0.7548385858535767, "step": 1882 }, { "epoch": 0.30096699432590107, "grad_norm": 1.5378467578841692, "learning_rate": 1.9454191114996618e-05, "loss": 0.6665276288986206, "step": 1883 }, { "epoch": 0.3011268280987773, "grad_norm": 1.2868149709387908, "learning_rate": 1.9453329656377315e-05, "loss": 0.8228793144226074, "step": 1884 }, { "epoch": 0.3012866618716535, "grad_norm": 1.117860969638515, "learning_rate": 1.945246753757104e-05, "loss": 0.5263769626617432, "step": 1885 }, { "epoch": 0.3014464956445297, "grad_norm": 1.3345317572191409, "learning_rate": 1.945160475863799e-05, "loss": 0.7479335069656372, "step": 1886 }, { "epoch": 0.3016063294174059, "grad_norm": 1.318940260014817, "learning_rate": 1.945074131963843e-05, "loss": 0.665996789932251, "step": 1887 }, { "epoch": 0.3017661631902821, "grad_norm": 1.4194254845841978, "learning_rate": 1.944987722063265e-05, "loss": 0.6841144561767578, "step": 1888 }, { "epoch": 0.3019259969631583, "grad_norm": 1.239725283159523, "learning_rate": 1.9449012461681e-05, "loss": 0.6652299761772156, "step": 1889 }, { "epoch": 0.3020858307360345, "grad_norm": 1.3086728864658739, "learning_rate": 1.944814704284387e-05, "loss": 0.6096293330192566, "step": 1890 }, { "epoch": 0.3022456645089107, "grad_norm": 1.5032196631188002, "learning_rate": 1.9447280964181698e-05, "loss": 0.772672176361084, "step": 1891 }, { "epoch": 0.3024054982817869, "grad_norm": 1.3228478460861626, "learning_rate": 1.944641422575497e-05, "loss": 0.5830060839653015, "step": 1892 }, { "epoch": 0.3025653320546631, "grad_norm": 1.1914556501168603, "learning_rate": 1.9445546827624215e-05, "loss": 0.6920336484909058, "step": 1893 }, { "epoch": 0.30272516582753933, "grad_norm": 1.4312517308318429, "learning_rate": 1.9444678769850008e-05, "loss": 0.6438971757888794, "step": 1894 }, { "epoch": 0.3028849996004156, "grad_norm": 1.3216476230949195, "learning_rate": 1.9443810052492972e-05, "loss": 0.574951171875, "step": 1895 }, { "epoch": 0.3030448333732918, "grad_norm": 1.3032397922691772, "learning_rate": 1.9442940675613773e-05, "loss": 0.7357450723648071, "step": 1896 }, { "epoch": 0.303204667146168, "grad_norm": 1.323590490567085, "learning_rate": 1.9442070639273125e-05, "loss": 0.8629430532455444, "step": 1897 }, { "epoch": 0.3033645009190442, "grad_norm": 1.4724463438860786, "learning_rate": 1.9441199943531792e-05, "loss": 0.5301831960678101, "step": 1898 }, { "epoch": 0.3035243346919204, "grad_norm": 1.3575708525945547, "learning_rate": 1.944032858845058e-05, "loss": 0.7310376167297363, "step": 1899 }, { "epoch": 0.3036841684647966, "grad_norm": 1.491425985769271, "learning_rate": 1.9439456574090338e-05, "loss": 0.7451033592224121, "step": 1900 }, { "epoch": 0.30384400223767283, "grad_norm": 1.4533688915962077, "learning_rate": 1.943858390051197e-05, "loss": 0.6700550317764282, "step": 1901 }, { "epoch": 0.30400383601054903, "grad_norm": 1.2557091680938381, "learning_rate": 1.9437710567776413e-05, "loss": 0.511677086353302, "step": 1902 }, { "epoch": 0.30416366978342524, "grad_norm": 3.7553256922513794, "learning_rate": 1.943683657594466e-05, "loss": 0.7365038394927979, "step": 1903 }, { "epoch": 0.30432350355630144, "grad_norm": 1.2993879886973934, "learning_rate": 1.9435961925077748e-05, "loss": 0.6558183431625366, "step": 1904 }, { "epoch": 0.30448333732917765, "grad_norm": 1.4111824199247303, "learning_rate": 1.9435086615236762e-05, "loss": 0.6859403848648071, "step": 1905 }, { "epoch": 0.30464317110205386, "grad_norm": 1.4546496082010305, "learning_rate": 1.943421064648283e-05, "loss": 0.7302327156066895, "step": 1906 }, { "epoch": 0.30480300487493006, "grad_norm": 1.5009318887483432, "learning_rate": 1.943333401887712e-05, "loss": 0.787503719329834, "step": 1907 }, { "epoch": 0.30496283864780627, "grad_norm": 1.3767909021134166, "learning_rate": 1.9432456732480862e-05, "loss": 0.6668105125427246, "step": 1908 }, { "epoch": 0.3051226724206825, "grad_norm": 1.3311114864573845, "learning_rate": 1.9431578787355317e-05, "loss": 0.7136852741241455, "step": 1909 }, { "epoch": 0.3052825061935587, "grad_norm": 1.342319048861463, "learning_rate": 1.9430700183561798e-05, "loss": 0.7214742302894592, "step": 1910 }, { "epoch": 0.3054423399664349, "grad_norm": 1.3985960845319814, "learning_rate": 1.9429820921161665e-05, "loss": 0.7363846302032471, "step": 1911 }, { "epoch": 0.3056021737393111, "grad_norm": 1.281222862208744, "learning_rate": 1.9428941000216324e-05, "loss": 0.7219333052635193, "step": 1912 }, { "epoch": 0.30576200751218735, "grad_norm": 1.3467001458484278, "learning_rate": 1.942806042078722e-05, "loss": 0.7374323010444641, "step": 1913 }, { "epoch": 0.30592184128506356, "grad_norm": 1.428417133893915, "learning_rate": 1.9427179182935852e-05, "loss": 0.6078013181686401, "step": 1914 }, { "epoch": 0.30608167505793976, "grad_norm": 1.21023457894632, "learning_rate": 1.942629728672377e-05, "loss": 0.6039586067199707, "step": 1915 }, { "epoch": 0.30624150883081597, "grad_norm": 1.669410103612175, "learning_rate": 1.942541473221255e-05, "loss": 0.8745598196983337, "step": 1916 }, { "epoch": 0.3064013426036922, "grad_norm": 1.4146407848361615, "learning_rate": 1.9424531519463834e-05, "loss": 0.6337844133377075, "step": 1917 }, { "epoch": 0.3065611763765684, "grad_norm": 1.472858315408877, "learning_rate": 1.94236476485393e-05, "loss": 0.7043319940567017, "step": 1918 }, { "epoch": 0.3067210101494446, "grad_norm": 1.3021686260516832, "learning_rate": 1.9422763119500677e-05, "loss": 0.7228454351425171, "step": 1919 }, { "epoch": 0.3068808439223208, "grad_norm": 1.1953432323058117, "learning_rate": 1.9421877932409736e-05, "loss": 0.6499645709991455, "step": 1920 }, { "epoch": 0.307040677695197, "grad_norm": 1.5283454128922378, "learning_rate": 1.9420992087328295e-05, "loss": 0.858070433139801, "step": 1921 }, { "epoch": 0.3072005114680732, "grad_norm": 1.2950366180965522, "learning_rate": 1.9420105584318216e-05, "loss": 0.8279030323028564, "step": 1922 }, { "epoch": 0.3073603452409494, "grad_norm": 1.6199865366898052, "learning_rate": 1.9419218423441414e-05, "loss": 0.6141207218170166, "step": 1923 }, { "epoch": 0.3075201790138256, "grad_norm": 1.429924038745628, "learning_rate": 1.9418330604759842e-05, "loss": 0.6620126962661743, "step": 1924 }, { "epoch": 0.3076800127867018, "grad_norm": 1.3598438924007588, "learning_rate": 1.9417442128335503e-05, "loss": 0.660314679145813, "step": 1925 }, { "epoch": 0.307839846559578, "grad_norm": 1.3541392978452245, "learning_rate": 1.9416552994230443e-05, "loss": 0.6714390516281128, "step": 1926 }, { "epoch": 0.30799968033245423, "grad_norm": 1.2061450591394338, "learning_rate": 1.9415663202506757e-05, "loss": 0.6096206903457642, "step": 1927 }, { "epoch": 0.30815951410533043, "grad_norm": 1.2781556382710992, "learning_rate": 1.941477275322659e-05, "loss": 0.7026059627532959, "step": 1928 }, { "epoch": 0.30831934787820664, "grad_norm": 1.3532841933774302, "learning_rate": 1.9413881646452122e-05, "loss": 0.7474328279495239, "step": 1929 }, { "epoch": 0.3084791816510829, "grad_norm": 1.257837409365775, "learning_rate": 1.9412989882245586e-05, "loss": 0.5754653811454773, "step": 1930 }, { "epoch": 0.3086390154239591, "grad_norm": 1.5467435445913131, "learning_rate": 1.9412097460669258e-05, "loss": 0.5581716299057007, "step": 1931 }, { "epoch": 0.3087988491968353, "grad_norm": 1.1165990304918747, "learning_rate": 1.9411204381785467e-05, "loss": 0.6414940357208252, "step": 1932 }, { "epoch": 0.3089586829697115, "grad_norm": 1.3272210798939532, "learning_rate": 1.9410310645656577e-05, "loss": 0.68830406665802, "step": 1933 }, { "epoch": 0.3091185167425877, "grad_norm": 1.2817429927739497, "learning_rate": 1.9409416252345004e-05, "loss": 0.6880050897598267, "step": 1934 }, { "epoch": 0.30927835051546393, "grad_norm": 1.50631257658822, "learning_rate": 1.940852120191321e-05, "loss": 0.723797082901001, "step": 1935 }, { "epoch": 0.30943818428834013, "grad_norm": 1.4259322400278054, "learning_rate": 1.9407625494423704e-05, "loss": 0.7240141034126282, "step": 1936 }, { "epoch": 0.30959801806121634, "grad_norm": 1.2490212072073241, "learning_rate": 1.940672912993904e-05, "loss": 0.5662398338317871, "step": 1937 }, { "epoch": 0.30975785183409255, "grad_norm": 1.4586278647784818, "learning_rate": 1.9405832108521808e-05, "loss": 0.7155436277389526, "step": 1938 }, { "epoch": 0.30991768560696875, "grad_norm": 1.1919009357168788, "learning_rate": 1.9404934430234665e-05, "loss": 0.6051626801490784, "step": 1939 }, { "epoch": 0.31007751937984496, "grad_norm": 1.3358063364349428, "learning_rate": 1.9404036095140293e-05, "loss": 0.7296582460403442, "step": 1940 }, { "epoch": 0.31023735315272116, "grad_norm": 1.4596964148232194, "learning_rate": 1.9403137103301436e-05, "loss": 0.795889675617218, "step": 1941 }, { "epoch": 0.31039718692559737, "grad_norm": 1.1919320463967054, "learning_rate": 1.940223745478087e-05, "loss": 0.5719799995422363, "step": 1942 }, { "epoch": 0.3105570206984736, "grad_norm": 1.398176722658398, "learning_rate": 1.9401337149641423e-05, "loss": 0.6255544424057007, "step": 1943 }, { "epoch": 0.3107168544713498, "grad_norm": 1.233877752389252, "learning_rate": 1.9400436187945972e-05, "loss": 0.4468693733215332, "step": 1944 }, { "epoch": 0.310876688244226, "grad_norm": 1.4221653551437958, "learning_rate": 1.939953456975744e-05, "loss": 0.6261292695999146, "step": 1945 }, { "epoch": 0.3110365220171022, "grad_norm": 1.3558975923668295, "learning_rate": 1.9398632295138786e-05, "loss": 0.6463280320167542, "step": 1946 }, { "epoch": 0.3111963557899784, "grad_norm": 1.3747259535459897, "learning_rate": 1.9397729364153025e-05, "loss": 0.7591673135757446, "step": 1947 }, { "epoch": 0.31135618956285466, "grad_norm": 1.498297861318522, "learning_rate": 1.9396825776863215e-05, "loss": 0.6897374391555786, "step": 1948 }, { "epoch": 0.31151602333573086, "grad_norm": 1.3908929480551293, "learning_rate": 1.9395921533332455e-05, "loss": 0.6538255214691162, "step": 1949 }, { "epoch": 0.31167585710860707, "grad_norm": 1.3642278515636022, "learning_rate": 1.93950166336239e-05, "loss": 0.6838874816894531, "step": 1950 }, { "epoch": 0.3118356908814833, "grad_norm": 1.2218851124931864, "learning_rate": 1.9394111077800747e-05, "loss": 0.529714822769165, "step": 1951 }, { "epoch": 0.3119955246543595, "grad_norm": 1.217613635679235, "learning_rate": 1.9393204865926227e-05, "loss": 0.6173565983772278, "step": 1952 }, { "epoch": 0.3121553584272357, "grad_norm": 1.1365913599257764, "learning_rate": 1.9392297998063637e-05, "loss": 0.49539339542388916, "step": 1953 }, { "epoch": 0.3123151922001119, "grad_norm": 1.4976378186202812, "learning_rate": 1.93913904742763e-05, "loss": 0.6277658939361572, "step": 1954 }, { "epoch": 0.3124750259729881, "grad_norm": 1.6320398867185906, "learning_rate": 1.93904822946276e-05, "loss": 0.6633662581443787, "step": 1955 }, { "epoch": 0.3126348597458643, "grad_norm": 1.403578433173158, "learning_rate": 1.9389573459180963e-05, "loss": 0.7277640700340271, "step": 1956 }, { "epoch": 0.3127946935187405, "grad_norm": 1.2213520910234759, "learning_rate": 1.938866396799985e-05, "loss": 0.5648549795150757, "step": 1957 }, { "epoch": 0.3129545272916167, "grad_norm": 1.4050721862292221, "learning_rate": 1.938775382114779e-05, "loss": 0.7341117858886719, "step": 1958 }, { "epoch": 0.3131143610644929, "grad_norm": 1.09499172995835, "learning_rate": 1.938684301868833e-05, "loss": 0.6017086505889893, "step": 1959 }, { "epoch": 0.3132741948373691, "grad_norm": 1.2900199200554539, "learning_rate": 1.9385931560685086e-05, "loss": 0.7748286724090576, "step": 1960 }, { "epoch": 0.31343402861024533, "grad_norm": 1.3777178696458983, "learning_rate": 1.9385019447201707e-05, "loss": 0.9706350564956665, "step": 1961 }, { "epoch": 0.31359386238312154, "grad_norm": 1.1397477575568524, "learning_rate": 1.9384106678301896e-05, "loss": 0.7614817023277283, "step": 1962 }, { "epoch": 0.31375369615599774, "grad_norm": 1.580489848002324, "learning_rate": 1.9383193254049398e-05, "loss": 0.6629382371902466, "step": 1963 }, { "epoch": 0.31391352992887395, "grad_norm": 1.2658762991900605, "learning_rate": 1.9382279174508e-05, "loss": 0.5654825568199158, "step": 1964 }, { "epoch": 0.31407336370175015, "grad_norm": 1.3675698801716984, "learning_rate": 1.9381364439741534e-05, "loss": 0.677720844745636, "step": 1965 }, { "epoch": 0.3142331974746264, "grad_norm": 1.1021941614771353, "learning_rate": 1.9380449049813888e-05, "loss": 0.5543438196182251, "step": 1966 }, { "epoch": 0.3143930312475026, "grad_norm": 1.4544795214308952, "learning_rate": 1.9379533004788992e-05, "loss": 0.7234451174736023, "step": 1967 }, { "epoch": 0.3145528650203788, "grad_norm": 1.443303768592814, "learning_rate": 1.937861630473081e-05, "loss": 0.6223294734954834, "step": 1968 }, { "epoch": 0.31471269879325503, "grad_norm": 1.2628039037736394, "learning_rate": 1.937769894970337e-05, "loss": 0.7442903518676758, "step": 1969 }, { "epoch": 0.31487253256613124, "grad_norm": 1.6321933764089585, "learning_rate": 1.937678093977073e-05, "loss": 0.7113708257675171, "step": 1970 }, { "epoch": 0.31503236633900744, "grad_norm": 1.3042807885947485, "learning_rate": 1.937586227499701e-05, "loss": 0.5940143465995789, "step": 1971 }, { "epoch": 0.31519220011188365, "grad_norm": 1.433603072936976, "learning_rate": 1.9374942955446356e-05, "loss": 0.6453345417976379, "step": 1972 }, { "epoch": 0.31535203388475985, "grad_norm": 1.497649268534, "learning_rate": 1.9374022981182974e-05, "loss": 0.7584973573684692, "step": 1973 }, { "epoch": 0.31551186765763606, "grad_norm": 1.2081653420772418, "learning_rate": 1.937310235227111e-05, "loss": 0.5852333307266235, "step": 1974 }, { "epoch": 0.31567170143051226, "grad_norm": 1.2562133585153472, "learning_rate": 1.9372181068775062e-05, "loss": 0.6259508728981018, "step": 1975 }, { "epoch": 0.31583153520338847, "grad_norm": 1.2015953193345756, "learning_rate": 1.937125913075917e-05, "loss": 0.6014357805252075, "step": 1976 }, { "epoch": 0.3159913689762647, "grad_norm": 1.1488573037879444, "learning_rate": 1.937033653828781e-05, "loss": 0.5744330883026123, "step": 1977 }, { "epoch": 0.3161512027491409, "grad_norm": 1.3601694664818023, "learning_rate": 1.936941329142542e-05, "loss": 0.7008112072944641, "step": 1978 }, { "epoch": 0.3163110365220171, "grad_norm": 1.4000743036735055, "learning_rate": 1.936848939023647e-05, "loss": 0.6868554353713989, "step": 1979 }, { "epoch": 0.3164708702948933, "grad_norm": 1.2799607878395238, "learning_rate": 1.9367564834785493e-05, "loss": 0.6849342584609985, "step": 1980 }, { "epoch": 0.3166307040677695, "grad_norm": 1.2920829803197467, "learning_rate": 1.9366639625137046e-05, "loss": 0.7803016901016235, "step": 1981 }, { "epoch": 0.3167905378406457, "grad_norm": 1.389381533364044, "learning_rate": 1.9365713761355746e-05, "loss": 0.6831706762313843, "step": 1982 }, { "epoch": 0.31695037161352196, "grad_norm": 1.2130953164894793, "learning_rate": 1.9364787243506257e-05, "loss": 0.6244046092033386, "step": 1983 }, { "epoch": 0.31711020538639817, "grad_norm": 1.4234272820969878, "learning_rate": 1.936386007165327e-05, "loss": 0.6174769997596741, "step": 1984 }, { "epoch": 0.3172700391592744, "grad_norm": 1.3258723442885663, "learning_rate": 1.9362932245861553e-05, "loss": 0.6408554315567017, "step": 1985 }, { "epoch": 0.3174298729321506, "grad_norm": 1.1824814091142635, "learning_rate": 1.936200376619589e-05, "loss": 0.5586062669754028, "step": 1986 }, { "epoch": 0.3175897067050268, "grad_norm": 1.1616265370247325, "learning_rate": 1.9361074632721125e-05, "loss": 0.6619579792022705, "step": 1987 }, { "epoch": 0.317749540477903, "grad_norm": 1.3800692549383131, "learning_rate": 1.936014484550215e-05, "loss": 0.832611620426178, "step": 1988 }, { "epoch": 0.3179093742507792, "grad_norm": 1.3811791705199346, "learning_rate": 1.9359214404603892e-05, "loss": 0.7597346305847168, "step": 1989 }, { "epoch": 0.3180692080236554, "grad_norm": 1.2489903768192936, "learning_rate": 1.9358283310091333e-05, "loss": 0.7085386514663696, "step": 1990 }, { "epoch": 0.3182290417965316, "grad_norm": 1.3389668977641904, "learning_rate": 1.9357351562029496e-05, "loss": 0.6874441504478455, "step": 1991 }, { "epoch": 0.3183888755694078, "grad_norm": 1.2946409136853103, "learning_rate": 1.935641916048345e-05, "loss": 0.6245360374450684, "step": 1992 }, { "epoch": 0.318548709342284, "grad_norm": 1.4460676710644444, "learning_rate": 1.9355486105518316e-05, "loss": 0.7459972500801086, "step": 1993 }, { "epoch": 0.3187085431151602, "grad_norm": 1.2772716468311043, "learning_rate": 1.935455239719925e-05, "loss": 0.6707639694213867, "step": 1994 }, { "epoch": 0.31886837688803643, "grad_norm": 1.5822373739947644, "learning_rate": 1.935361803559146e-05, "loss": 0.601954460144043, "step": 1995 }, { "epoch": 0.31902821066091264, "grad_norm": 1.3901529752273623, "learning_rate": 1.93526830207602e-05, "loss": 0.704692542552948, "step": 1996 }, { "epoch": 0.31918804443378884, "grad_norm": 1.2906795097413117, "learning_rate": 1.9351747352770766e-05, "loss": 0.6999996900558472, "step": 1997 }, { "epoch": 0.31934787820666505, "grad_norm": 1.3254038054875583, "learning_rate": 1.9350811031688503e-05, "loss": 0.5595599412918091, "step": 1998 }, { "epoch": 0.31950771197954125, "grad_norm": 1.5338592364074115, "learning_rate": 1.93498740575788e-05, "loss": 0.7228975296020508, "step": 1999 }, { "epoch": 0.31966754575241746, "grad_norm": 1.2619081481776444, "learning_rate": 1.934893643050709e-05, "loss": 0.8417708277702332, "step": 2000 }, { "epoch": 0.3198273795252937, "grad_norm": 1.565272650782086, "learning_rate": 1.934799815053886e-05, "loss": 0.664306640625, "step": 2001 }, { "epoch": 0.3199872132981699, "grad_norm": 1.2572716951291378, "learning_rate": 1.934705921773963e-05, "loss": 0.5479151010513306, "step": 2002 }, { "epoch": 0.32014704707104613, "grad_norm": 1.2888287943013905, "learning_rate": 1.934611963217497e-05, "loss": 0.584537923336029, "step": 2003 }, { "epoch": 0.32030688084392234, "grad_norm": 1.2619254258722088, "learning_rate": 1.9345179393910502e-05, "loss": 0.6029731035232544, "step": 2004 }, { "epoch": 0.32046671461679854, "grad_norm": 1.3611210275877457, "learning_rate": 1.934423850301189e-05, "loss": 0.711074709892273, "step": 2005 }, { "epoch": 0.32062654838967475, "grad_norm": 1.3409156009972971, "learning_rate": 1.9343296959544836e-05, "loss": 0.6655420064926147, "step": 2006 }, { "epoch": 0.32078638216255095, "grad_norm": 1.328153776243198, "learning_rate": 1.9342354763575103e-05, "loss": 0.666632354259491, "step": 2007 }, { "epoch": 0.32094621593542716, "grad_norm": 1.379777109913041, "learning_rate": 1.9341411915168482e-05, "loss": 0.8328494429588318, "step": 2008 }, { "epoch": 0.32110604970830336, "grad_norm": 1.4517574361944738, "learning_rate": 1.9340468414390827e-05, "loss": 0.7580541372299194, "step": 2009 }, { "epoch": 0.32126588348117957, "grad_norm": 1.256973893789304, "learning_rate": 1.9339524261308017e-05, "loss": 0.6753842830657959, "step": 2010 }, { "epoch": 0.3214257172540558, "grad_norm": 1.4476536020005357, "learning_rate": 1.9338579455986e-05, "loss": 0.651616096496582, "step": 2011 }, { "epoch": 0.321585551026932, "grad_norm": 1.201182280574299, "learning_rate": 1.933763399849075e-05, "loss": 0.7200304269790649, "step": 2012 }, { "epoch": 0.3217453847998082, "grad_norm": 1.3507888903062473, "learning_rate": 1.9336687888888296e-05, "loss": 0.7925429344177246, "step": 2013 }, { "epoch": 0.3219052185726844, "grad_norm": 1.1380806566771116, "learning_rate": 1.9335741127244712e-05, "loss": 0.5892415046691895, "step": 2014 }, { "epoch": 0.3220650523455606, "grad_norm": 1.5493625512776963, "learning_rate": 1.933479371362612e-05, "loss": 0.6716031432151794, "step": 2015 }, { "epoch": 0.3222248861184368, "grad_norm": 1.157512311633323, "learning_rate": 1.9333845648098682e-05, "loss": 0.6185641288757324, "step": 2016 }, { "epoch": 0.322384719891313, "grad_norm": 1.5278081952927363, "learning_rate": 1.9332896930728603e-05, "loss": 0.7979305982589722, "step": 2017 }, { "epoch": 0.32254455366418927, "grad_norm": 1.2864753988170983, "learning_rate": 1.933194756158214e-05, "loss": 0.8242633938789368, "step": 2018 }, { "epoch": 0.3227043874370655, "grad_norm": 1.2323356395250529, "learning_rate": 1.93309975407256e-05, "loss": 0.6905983686447144, "step": 2019 }, { "epoch": 0.3228642212099417, "grad_norm": 1.4376097255891551, "learning_rate": 1.9330046868225317e-05, "loss": 0.735590934753418, "step": 2020 }, { "epoch": 0.3230240549828179, "grad_norm": 1.215839730806677, "learning_rate": 1.9329095544147695e-05, "loss": 0.6779923439025879, "step": 2021 }, { "epoch": 0.3231838887556941, "grad_norm": 1.4457825486566465, "learning_rate": 1.9328143568559166e-05, "loss": 0.7493510842323303, "step": 2022 }, { "epoch": 0.3233437225285703, "grad_norm": 1.47932422261152, "learning_rate": 1.9327190941526208e-05, "loss": 0.746444821357727, "step": 2023 }, { "epoch": 0.3235035563014465, "grad_norm": 1.513068069321817, "learning_rate": 1.9326237663115355e-05, "loss": 0.773535966873169, "step": 2024 }, { "epoch": 0.3236633900743227, "grad_norm": 1.2194557763464058, "learning_rate": 1.9325283733393177e-05, "loss": 0.5884173512458801, "step": 2025 }, { "epoch": 0.3238232238471989, "grad_norm": 1.4284870176871014, "learning_rate": 1.9324329152426296e-05, "loss": 0.6873111724853516, "step": 2026 }, { "epoch": 0.3239830576200751, "grad_norm": 1.3375614567046203, "learning_rate": 1.932337392028138e-05, "loss": 0.7211938500404358, "step": 2027 }, { "epoch": 0.3241428913929513, "grad_norm": 1.4051997469569835, "learning_rate": 1.932241803702513e-05, "loss": 0.49940383434295654, "step": 2028 }, { "epoch": 0.32430272516582753, "grad_norm": 1.521991659139806, "learning_rate": 1.9321461502724305e-05, "loss": 0.7472026348114014, "step": 2029 }, { "epoch": 0.32446255893870374, "grad_norm": 1.5035761534471468, "learning_rate": 1.932050431744571e-05, "loss": 0.7686707377433777, "step": 2030 }, { "epoch": 0.32462239271157994, "grad_norm": 1.2463676971122088, "learning_rate": 1.9319546481256185e-05, "loss": 0.6900930404663086, "step": 2031 }, { "epoch": 0.32478222648445615, "grad_norm": 6.227918774185052, "learning_rate": 1.9318587994222628e-05, "loss": 0.6302396655082703, "step": 2032 }, { "epoch": 0.32494206025733235, "grad_norm": 1.5788818257851456, "learning_rate": 1.9317628856411973e-05, "loss": 0.794941782951355, "step": 2033 }, { "epoch": 0.32510189403020856, "grad_norm": 1.2720579101607778, "learning_rate": 1.93166690678912e-05, "loss": 0.6165955066680908, "step": 2034 }, { "epoch": 0.32526172780308477, "grad_norm": 1.3996265133486416, "learning_rate": 1.9315708628727346e-05, "loss": 0.7178764343261719, "step": 2035 }, { "epoch": 0.325421561575961, "grad_norm": 1.3767808308996918, "learning_rate": 1.9314747538987476e-05, "loss": 0.7133528590202332, "step": 2036 }, { "epoch": 0.32558139534883723, "grad_norm": 1.0710715018227577, "learning_rate": 1.9313785798738714e-05, "loss": 0.5676624178886414, "step": 2037 }, { "epoch": 0.32574122912171344, "grad_norm": 1.5397855493993216, "learning_rate": 1.931282340804822e-05, "loss": 0.7171624898910522, "step": 2038 }, { "epoch": 0.32590106289458964, "grad_norm": 1.1528548981822002, "learning_rate": 1.931186036698321e-05, "loss": 0.5560579895973206, "step": 2039 }, { "epoch": 0.32606089666746585, "grad_norm": 1.3021323145093588, "learning_rate": 1.9310896675610935e-05, "loss": 0.7790721654891968, "step": 2040 }, { "epoch": 0.32622073044034205, "grad_norm": 1.4407543835470789, "learning_rate": 1.9309932333998698e-05, "loss": 0.559990406036377, "step": 2041 }, { "epoch": 0.32638056421321826, "grad_norm": 1.4001250368358504, "learning_rate": 1.930896734221384e-05, "loss": 0.7634480595588684, "step": 2042 }, { "epoch": 0.32654039798609447, "grad_norm": 1.3370625033121766, "learning_rate": 1.930800170032376e-05, "loss": 0.788512110710144, "step": 2043 }, { "epoch": 0.32670023175897067, "grad_norm": 1.213617039105458, "learning_rate": 1.930703540839589e-05, "loss": 0.6017355918884277, "step": 2044 }, { "epoch": 0.3268600655318469, "grad_norm": 1.4728321015629149, "learning_rate": 1.9306068466497718e-05, "loss": 0.6031537055969238, "step": 2045 }, { "epoch": 0.3270198993047231, "grad_norm": 1.3193738743072785, "learning_rate": 1.9305100874696765e-05, "loss": 0.7643611431121826, "step": 2046 }, { "epoch": 0.3271797330775993, "grad_norm": 1.3537479570143414, "learning_rate": 1.9304132633060605e-05, "loss": 0.7710871696472168, "step": 2047 }, { "epoch": 0.3273395668504755, "grad_norm": 1.457852014934657, "learning_rate": 1.9303163741656862e-05, "loss": 0.6538043022155762, "step": 2048 }, { "epoch": 0.3274994006233517, "grad_norm": 1.2329699715063331, "learning_rate": 1.9302194200553194e-05, "loss": 0.5544213056564331, "step": 2049 }, { "epoch": 0.3276592343962279, "grad_norm": 1.4196224125992103, "learning_rate": 1.9301224009817315e-05, "loss": 0.6827383041381836, "step": 2050 }, { "epoch": 0.3278190681691041, "grad_norm": 1.2702812963296786, "learning_rate": 1.9300253169516975e-05, "loss": 0.545218825340271, "step": 2051 }, { "epoch": 0.3279789019419803, "grad_norm": 1.4201721748574088, "learning_rate": 1.929928167971998e-05, "loss": 0.6018340587615967, "step": 2052 }, { "epoch": 0.3281387357148565, "grad_norm": 1.4330714076451518, "learning_rate": 1.9298309540494172e-05, "loss": 0.8607896566390991, "step": 2053 }, { "epoch": 0.3282985694877328, "grad_norm": 1.2799787474799316, "learning_rate": 1.929733675190744e-05, "loss": 0.6718511581420898, "step": 2054 }, { "epoch": 0.328458403260609, "grad_norm": 1.3245814947408736, "learning_rate": 1.929636331402772e-05, "loss": 0.6179430484771729, "step": 2055 }, { "epoch": 0.3286182370334852, "grad_norm": 4.614347760426029, "learning_rate": 1.9295389226922998e-05, "loss": 0.6982767581939697, "step": 2056 }, { "epoch": 0.3287780708063614, "grad_norm": 1.7151093671293323, "learning_rate": 1.9294414490661298e-05, "loss": 0.8444027900695801, "step": 2057 }, { "epoch": 0.3289379045792376, "grad_norm": 1.5238475809155092, "learning_rate": 1.9293439105310692e-05, "loss": 0.6123001575469971, "step": 2058 }, { "epoch": 0.3290977383521138, "grad_norm": 1.5970981570432876, "learning_rate": 1.9292463070939295e-05, "loss": 0.6807155609130859, "step": 2059 }, { "epoch": 0.32925757212499, "grad_norm": 1.4214731111001513, "learning_rate": 1.9291486387615275e-05, "loss": 0.6175172328948975, "step": 2060 }, { "epoch": 0.3294174058978662, "grad_norm": 1.5786375081337962, "learning_rate": 1.929050905540684e-05, "loss": 0.72853684425354, "step": 2061 }, { "epoch": 0.32957723967074243, "grad_norm": 1.6618291785379806, "learning_rate": 1.9289531074382236e-05, "loss": 0.6667797565460205, "step": 2062 }, { "epoch": 0.32973707344361863, "grad_norm": 1.3928467862540008, "learning_rate": 1.9288552444609768e-05, "loss": 0.7732189297676086, "step": 2063 }, { "epoch": 0.32989690721649484, "grad_norm": 4.309586665787044, "learning_rate": 1.9287573166157782e-05, "loss": 0.5544748306274414, "step": 2064 }, { "epoch": 0.33005674098937104, "grad_norm": 1.4156703583281072, "learning_rate": 1.928659323909466e-05, "loss": 0.7927010655403137, "step": 2065 }, { "epoch": 0.33021657476224725, "grad_norm": 1.4430785427733983, "learning_rate": 1.9285612663488843e-05, "loss": 0.6232773065567017, "step": 2066 }, { "epoch": 0.33037640853512346, "grad_norm": 1.4594945975786389, "learning_rate": 1.9284631439408804e-05, "loss": 0.8209255933761597, "step": 2067 }, { "epoch": 0.33053624230799966, "grad_norm": 1.3218355517460822, "learning_rate": 1.928364956692308e-05, "loss": 0.6354833841323853, "step": 2068 }, { "epoch": 0.33069607608087587, "grad_norm": 1.534351920989753, "learning_rate": 1.928266704610023e-05, "loss": 0.7799805402755737, "step": 2069 }, { "epoch": 0.3308559098537521, "grad_norm": 1.2867151852048209, "learning_rate": 1.9281683877008873e-05, "loss": 0.677147388458252, "step": 2070 }, { "epoch": 0.33101574362662833, "grad_norm": 1.4240706891727741, "learning_rate": 1.9280700059717673e-05, "loss": 0.7214182019233704, "step": 2071 }, { "epoch": 0.33117557739950454, "grad_norm": 1.3171364890629889, "learning_rate": 1.9279715594295333e-05, "loss": 0.5954206585884094, "step": 2072 }, { "epoch": 0.33133541117238074, "grad_norm": 1.1629086557831496, "learning_rate": 1.9278730480810606e-05, "loss": 0.7429690957069397, "step": 2073 }, { "epoch": 0.33149524494525695, "grad_norm": 1.1760466303551553, "learning_rate": 1.9277744719332292e-05, "loss": 0.6554604768753052, "step": 2074 }, { "epoch": 0.33165507871813316, "grad_norm": 1.2062292181717447, "learning_rate": 1.927675830992923e-05, "loss": 0.6150014400482178, "step": 2075 }, { "epoch": 0.33181491249100936, "grad_norm": 1.2861874409337584, "learning_rate": 1.92757712526703e-05, "loss": 0.7295516133308411, "step": 2076 }, { "epoch": 0.33197474626388557, "grad_norm": 2.5062030869020604, "learning_rate": 1.9274783547624446e-05, "loss": 0.8371297121047974, "step": 2077 }, { "epoch": 0.3321345800367618, "grad_norm": 1.2272207140615128, "learning_rate": 1.9273795194860642e-05, "loss": 0.7680200338363647, "step": 2078 }, { "epoch": 0.332294413809638, "grad_norm": 1.5882282723186858, "learning_rate": 1.927280619444791e-05, "loss": 0.8393151164054871, "step": 2079 }, { "epoch": 0.3324542475825142, "grad_norm": 1.179009782190668, "learning_rate": 1.927181654645532e-05, "loss": 0.7088927030563354, "step": 2080 }, { "epoch": 0.3326140813553904, "grad_norm": 1.2548230969032554, "learning_rate": 1.9270826250951986e-05, "loss": 0.6035297513008118, "step": 2081 }, { "epoch": 0.3327739151282666, "grad_norm": 1.383198936614805, "learning_rate": 1.926983530800706e-05, "loss": 0.7479138374328613, "step": 2082 }, { "epoch": 0.3329337489011428, "grad_norm": 1.2504723529315345, "learning_rate": 1.9268843717689754e-05, "loss": 0.6685523986816406, "step": 2083 }, { "epoch": 0.333093582674019, "grad_norm": 1.2824898054302798, "learning_rate": 1.9267851480069314e-05, "loss": 0.7847437858581543, "step": 2084 }, { "epoch": 0.3332534164468952, "grad_norm": 1.3797187944558242, "learning_rate": 1.9266858595215038e-05, "loss": 0.6170862913131714, "step": 2085 }, { "epoch": 0.3334132502197714, "grad_norm": 1.37292797901858, "learning_rate": 1.926586506319626e-05, "loss": 0.7669895887374878, "step": 2086 }, { "epoch": 0.3335730839926476, "grad_norm": 1.4889808424538635, "learning_rate": 1.9264870884082362e-05, "loss": 0.8779116272926331, "step": 2087 }, { "epoch": 0.33373291776552383, "grad_norm": 1.4667203836448406, "learning_rate": 1.9263876057942784e-05, "loss": 0.7066929340362549, "step": 2088 }, { "epoch": 0.3338927515384001, "grad_norm": 1.1969881739302193, "learning_rate": 1.9262880584846994e-05, "loss": 0.7973390817642212, "step": 2089 }, { "epoch": 0.3340525853112763, "grad_norm": 1.1338758605072827, "learning_rate": 1.9261884464864515e-05, "loss": 0.6967628002166748, "step": 2090 }, { "epoch": 0.3342124190841525, "grad_norm": 1.30879709470664, "learning_rate": 1.9260887698064912e-05, "loss": 0.6753462553024292, "step": 2091 }, { "epoch": 0.3343722528570287, "grad_norm": 1.048560811418181, "learning_rate": 1.9259890284517792e-05, "loss": 0.6854492425918579, "step": 2092 }, { "epoch": 0.3345320866299049, "grad_norm": 1.1722916156701886, "learning_rate": 1.9258892224292814e-05, "loss": 0.5777957439422607, "step": 2093 }, { "epoch": 0.3346919204027811, "grad_norm": 1.1538240014333503, "learning_rate": 1.9257893517459686e-05, "loss": 0.4830266237258911, "step": 2094 }, { "epoch": 0.3348517541756573, "grad_norm": 1.1421519699690243, "learning_rate": 1.9256894164088143e-05, "loss": 0.682544469833374, "step": 2095 }, { "epoch": 0.33501158794853353, "grad_norm": 1.3539723264045047, "learning_rate": 1.925589416424798e-05, "loss": 0.7676233053207397, "step": 2096 }, { "epoch": 0.33517142172140973, "grad_norm": 1.4425634508155736, "learning_rate": 1.9254893518009035e-05, "loss": 0.668459415435791, "step": 2097 }, { "epoch": 0.33533125549428594, "grad_norm": 1.2793409559057243, "learning_rate": 1.925389222544119e-05, "loss": 0.6230002641677856, "step": 2098 }, { "epoch": 0.33549108926716215, "grad_norm": 1.3501493932216007, "learning_rate": 1.9252890286614366e-05, "loss": 0.7195125818252563, "step": 2099 }, { "epoch": 0.33565092304003835, "grad_norm": 1.4821674156805078, "learning_rate": 1.925188770159854e-05, "loss": 0.7344171404838562, "step": 2100 }, { "epoch": 0.33581075681291456, "grad_norm": 1.261558139182491, "learning_rate": 1.925088447046373e-05, "loss": 0.572076678276062, "step": 2101 }, { "epoch": 0.33597059058579076, "grad_norm": 1.5952528013886047, "learning_rate": 1.9249880593279998e-05, "loss": 0.6377012729644775, "step": 2102 }, { "epoch": 0.33613042435866697, "grad_norm": 1.3951611623560576, "learning_rate": 1.9248876070117447e-05, "loss": 0.7013492584228516, "step": 2103 }, { "epoch": 0.3362902581315432, "grad_norm": 1.3111493494017774, "learning_rate": 1.9247870901046233e-05, "loss": 0.6741582155227661, "step": 2104 }, { "epoch": 0.3364500919044194, "grad_norm": 1.332969184003824, "learning_rate": 1.924686508613655e-05, "loss": 0.7019170522689819, "step": 2105 }, { "epoch": 0.3366099256772956, "grad_norm": 1.4096697073293742, "learning_rate": 1.9245858625458645e-05, "loss": 0.7580071091651917, "step": 2106 }, { "epoch": 0.33676975945017185, "grad_norm": 1.4281876093629606, "learning_rate": 1.9244851519082802e-05, "loss": 0.6906900405883789, "step": 2107 }, { "epoch": 0.33692959322304805, "grad_norm": 1.8169277519937947, "learning_rate": 1.9243843767079354e-05, "loss": 0.7233393788337708, "step": 2108 }, { "epoch": 0.33708942699592426, "grad_norm": 1.6014113663952456, "learning_rate": 1.9242835369518683e-05, "loss": 0.7124673128128052, "step": 2109 }, { "epoch": 0.33724926076880046, "grad_norm": 1.2694922273590412, "learning_rate": 1.9241826326471208e-05, "loss": 0.6579681038856506, "step": 2110 }, { "epoch": 0.33740909454167667, "grad_norm": 1.2154247446539832, "learning_rate": 1.92408166380074e-05, "loss": 0.5584207773208618, "step": 2111 }, { "epoch": 0.3375689283145529, "grad_norm": 1.3340673798048275, "learning_rate": 1.9239806304197766e-05, "loss": 0.744728684425354, "step": 2112 }, { "epoch": 0.3377287620874291, "grad_norm": 1.5322556097645346, "learning_rate": 1.9238795325112867e-05, "loss": 0.7226409912109375, "step": 2113 }, { "epoch": 0.3378885958603053, "grad_norm": 1.1248841190949213, "learning_rate": 1.923778370082331e-05, "loss": 0.5105258226394653, "step": 2114 }, { "epoch": 0.3380484296331815, "grad_norm": 1.316563712921779, "learning_rate": 1.9236771431399744e-05, "loss": 0.7199316620826721, "step": 2115 }, { "epoch": 0.3382082634060577, "grad_norm": 1.5022677377345897, "learning_rate": 1.9235758516912852e-05, "loss": 0.7405848503112793, "step": 2116 }, { "epoch": 0.3383680971789339, "grad_norm": 1.46084764130018, "learning_rate": 1.9234744957433383e-05, "loss": 0.714366614818573, "step": 2117 }, { "epoch": 0.3385279309518101, "grad_norm": 1.5270508335266257, "learning_rate": 1.9233730753032115e-05, "loss": 0.7714265584945679, "step": 2118 }, { "epoch": 0.3386877647246863, "grad_norm": 1.198997977314793, "learning_rate": 1.923271590377988e-05, "loss": 0.5426170229911804, "step": 2119 }, { "epoch": 0.3388475984975625, "grad_norm": 1.4958328867331654, "learning_rate": 1.9231700409747547e-05, "loss": 0.7213335633277893, "step": 2120 }, { "epoch": 0.3390074322704387, "grad_norm": 1.4219427626293517, "learning_rate": 1.9230684271006038e-05, "loss": 0.7407135963439941, "step": 2121 }, { "epoch": 0.33916726604331493, "grad_norm": 1.2981219716027685, "learning_rate": 1.9229667487626317e-05, "loss": 0.7727035284042358, "step": 2122 }, { "epoch": 0.33932709981619114, "grad_norm": 1.4416489097674172, "learning_rate": 1.9228650059679388e-05, "loss": 0.6784064769744873, "step": 2123 }, { "epoch": 0.3394869335890674, "grad_norm": 1.2872806053776344, "learning_rate": 1.9227631987236307e-05, "loss": 0.5930296182632446, "step": 2124 }, { "epoch": 0.3396467673619436, "grad_norm": 1.517970817374122, "learning_rate": 1.9226613270368176e-05, "loss": 0.7374839782714844, "step": 2125 }, { "epoch": 0.3398066011348198, "grad_norm": 1.2332305023583001, "learning_rate": 1.9225593909146133e-05, "loss": 0.7358907461166382, "step": 2126 }, { "epoch": 0.339966434907696, "grad_norm": 1.2985983306121485, "learning_rate": 1.9224573903641374e-05, "loss": 0.6117913722991943, "step": 2127 }, { "epoch": 0.3401262686805722, "grad_norm": 1.4353025455567674, "learning_rate": 1.9223553253925124e-05, "loss": 0.6887273788452148, "step": 2128 }, { "epoch": 0.3402861024534484, "grad_norm": 1.3579495445108296, "learning_rate": 1.9222531960068663e-05, "loss": 0.5838130712509155, "step": 2129 }, { "epoch": 0.34044593622632463, "grad_norm": 1.5070281630029787, "learning_rate": 1.9221510022143325e-05, "loss": 0.6950957775115967, "step": 2130 }, { "epoch": 0.34060576999920084, "grad_norm": 1.4533488092264022, "learning_rate": 1.9220487440220463e-05, "loss": 0.874958872795105, "step": 2131 }, { "epoch": 0.34076560377207704, "grad_norm": 1.1926491204629357, "learning_rate": 1.92194642143715e-05, "loss": 0.6058677434921265, "step": 2132 }, { "epoch": 0.34092543754495325, "grad_norm": 1.2873497367379658, "learning_rate": 1.9218440344667893e-05, "loss": 0.5679577589035034, "step": 2133 }, { "epoch": 0.34108527131782945, "grad_norm": 1.1877543503020116, "learning_rate": 1.9217415831181143e-05, "loss": 0.691325843334198, "step": 2134 }, { "epoch": 0.34124510509070566, "grad_norm": 1.2960113385674916, "learning_rate": 1.9216390673982802e-05, "loss": 0.7360744476318359, "step": 2135 }, { "epoch": 0.34140493886358186, "grad_norm": 1.5513109934041314, "learning_rate": 1.921536487314446e-05, "loss": 0.6861453056335449, "step": 2136 }, { "epoch": 0.34156477263645807, "grad_norm": 1.2096825998267369, "learning_rate": 1.9214338428737758e-05, "loss": 0.6040647029876709, "step": 2137 }, { "epoch": 0.3417246064093343, "grad_norm": 1.3159836837684846, "learning_rate": 1.9213311340834377e-05, "loss": 0.6479071974754333, "step": 2138 }, { "epoch": 0.3418844401822105, "grad_norm": 1.2277053998557819, "learning_rate": 1.921228360950605e-05, "loss": 0.6403425931930542, "step": 2139 }, { "epoch": 0.3420442739550867, "grad_norm": 1.2606323094361136, "learning_rate": 1.921125523482454e-05, "loss": 0.5943559408187866, "step": 2140 }, { "epoch": 0.3422041077279629, "grad_norm": 1.310950436878692, "learning_rate": 1.9210226216861677e-05, "loss": 0.5414480566978455, "step": 2141 }, { "epoch": 0.34236394150083915, "grad_norm": 1.297264221778862, "learning_rate": 1.9209196555689316e-05, "loss": 0.6546425819396973, "step": 2142 }, { "epoch": 0.34252377527371536, "grad_norm": 1.1741840323458965, "learning_rate": 1.9208166251379366e-05, "loss": 0.5844888091087341, "step": 2143 }, { "epoch": 0.34268360904659156, "grad_norm": 1.4349656648928202, "learning_rate": 1.9207135304003784e-05, "loss": 0.8152811527252197, "step": 2144 }, { "epoch": 0.34284344281946777, "grad_norm": 1.3188790979319, "learning_rate": 1.9206103713634562e-05, "loss": 0.5289534330368042, "step": 2145 }, { "epoch": 0.343003276592344, "grad_norm": 1.271520146833905, "learning_rate": 1.9205071480343745e-05, "loss": 0.6145447492599487, "step": 2146 }, { "epoch": 0.3431631103652202, "grad_norm": 1.190306533974546, "learning_rate": 1.9204038604203423e-05, "loss": 0.6565712094306946, "step": 2147 }, { "epoch": 0.3433229441380964, "grad_norm": 1.3690941273428452, "learning_rate": 1.9203005085285724e-05, "loss": 0.6650857925415039, "step": 2148 }, { "epoch": 0.3434827779109726, "grad_norm": 1.7196130731492039, "learning_rate": 1.920197092366283e-05, "loss": 0.8203243017196655, "step": 2149 }, { "epoch": 0.3436426116838488, "grad_norm": 1.5076412650088085, "learning_rate": 1.920093611940696e-05, "loss": 0.8555629253387451, "step": 2150 }, { "epoch": 0.343802445456725, "grad_norm": 1.485532692888131, "learning_rate": 1.919990067259038e-05, "loss": 0.7605935335159302, "step": 2151 }, { "epoch": 0.3439622792296012, "grad_norm": 1.2741313738479505, "learning_rate": 1.9198864583285405e-05, "loss": 0.6650635004043579, "step": 2152 }, { "epoch": 0.3441221130024774, "grad_norm": 1.8697748024309784, "learning_rate": 1.919782785156439e-05, "loss": 0.725955069065094, "step": 2153 }, { "epoch": 0.3442819467753536, "grad_norm": 1.7057315287725887, "learning_rate": 1.9196790477499738e-05, "loss": 0.7051912546157837, "step": 2154 }, { "epoch": 0.3444417805482298, "grad_norm": 1.3087722526518826, "learning_rate": 1.9195752461163892e-05, "loss": 0.5346652865409851, "step": 2155 }, { "epoch": 0.34460161432110603, "grad_norm": 1.2131045158120295, "learning_rate": 1.919471380262935e-05, "loss": 0.529830813407898, "step": 2156 }, { "epoch": 0.34476144809398224, "grad_norm": 1.3378168192070843, "learning_rate": 1.9193674501968643e-05, "loss": 0.7154967784881592, "step": 2157 }, { "epoch": 0.34492128186685844, "grad_norm": 1.4589791960493876, "learning_rate": 1.919263455925435e-05, "loss": 0.7295655012130737, "step": 2158 }, { "epoch": 0.3450811156397347, "grad_norm": 1.6767372633579314, "learning_rate": 1.9191593974559102e-05, "loss": 0.8775319457054138, "step": 2159 }, { "epoch": 0.3452409494126109, "grad_norm": 1.5399894037393231, "learning_rate": 1.919055274795557e-05, "loss": 0.7624016404151917, "step": 2160 }, { "epoch": 0.3454007831854871, "grad_norm": 1.3965670280963478, "learning_rate": 1.9189510879516465e-05, "loss": 0.7064412832260132, "step": 2161 }, { "epoch": 0.3455606169583633, "grad_norm": 1.5755890419138319, "learning_rate": 1.918846836931455e-05, "loss": 0.7670370936393738, "step": 2162 }, { "epoch": 0.3457204507312395, "grad_norm": 1.2496283728118145, "learning_rate": 1.918742521742263e-05, "loss": 0.6749917268753052, "step": 2163 }, { "epoch": 0.34588028450411573, "grad_norm": 1.3911440420570547, "learning_rate": 1.9186381423913557e-05, "loss": 0.6406770944595337, "step": 2164 }, { "epoch": 0.34604011827699194, "grad_norm": 1.320550617599467, "learning_rate": 1.9185336988860224e-05, "loss": 0.519544243812561, "step": 2165 }, { "epoch": 0.34619995204986814, "grad_norm": 1.3482066839599325, "learning_rate": 1.918429191233557e-05, "loss": 0.5005266070365906, "step": 2166 }, { "epoch": 0.34635978582274435, "grad_norm": 1.3855108946604033, "learning_rate": 1.9183246194412583e-05, "loss": 0.6141878366470337, "step": 2167 }, { "epoch": 0.34651961959562055, "grad_norm": 1.4040529035774518, "learning_rate": 1.9182199835164282e-05, "loss": 0.7237972021102905, "step": 2168 }, { "epoch": 0.34667945336849676, "grad_norm": 1.163837210853369, "learning_rate": 1.9181152834663753e-05, "loss": 0.5295809507369995, "step": 2169 }, { "epoch": 0.34683928714137297, "grad_norm": 1.239161685031187, "learning_rate": 1.9180105192984107e-05, "loss": 0.6249514222145081, "step": 2170 }, { "epoch": 0.34699912091424917, "grad_norm": 1.6985247708904436, "learning_rate": 1.9179056910198515e-05, "loss": 0.6453648805618286, "step": 2171 }, { "epoch": 0.3471589546871254, "grad_norm": 1.2662094524648537, "learning_rate": 1.917800798638018e-05, "loss": 0.5743041038513184, "step": 2172 }, { "epoch": 0.3473187884600016, "grad_norm": 1.5927473189953434, "learning_rate": 1.9176958421602357e-05, "loss": 0.6658318042755127, "step": 2173 }, { "epoch": 0.3474786222328778, "grad_norm": 1.365931351986767, "learning_rate": 1.917590821593834e-05, "loss": 0.5837766528129578, "step": 2174 }, { "epoch": 0.347638456005754, "grad_norm": 1.3625857940374133, "learning_rate": 1.9174857369461474e-05, "loss": 0.6203511953353882, "step": 2175 }, { "epoch": 0.3477982897786302, "grad_norm": 1.7380929366283373, "learning_rate": 1.917380588224515e-05, "loss": 0.7477306127548218, "step": 2176 }, { "epoch": 0.34795812355150646, "grad_norm": 1.17346703472901, "learning_rate": 1.9172753754362795e-05, "loss": 0.752670168876648, "step": 2177 }, { "epoch": 0.34811795732438267, "grad_norm": 1.2823214480610563, "learning_rate": 1.917170098588789e-05, "loss": 0.7856263518333435, "step": 2178 }, { "epoch": 0.34827779109725887, "grad_norm": 1.4704618487809598, "learning_rate": 1.9170647576893955e-05, "loss": 0.6855887174606323, "step": 2179 }, { "epoch": 0.3484376248701351, "grad_norm": 1.4368258190968388, "learning_rate": 1.9169593527454556e-05, "loss": 0.5751274228096008, "step": 2180 }, { "epoch": 0.3485974586430113, "grad_norm": 1.2372665481699272, "learning_rate": 1.91685388376433e-05, "loss": 0.6764986515045166, "step": 2181 }, { "epoch": 0.3487572924158875, "grad_norm": 1.4523899192379128, "learning_rate": 1.916748350753385e-05, "loss": 0.8863280415534973, "step": 2182 }, { "epoch": 0.3489171261887637, "grad_norm": 1.5136289368176432, "learning_rate": 1.9166427537199903e-05, "loss": 0.8502447605133057, "step": 2183 }, { "epoch": 0.3490769599616399, "grad_norm": 1.4227689358327698, "learning_rate": 1.9165370926715206e-05, "loss": 0.6722351312637329, "step": 2184 }, { "epoch": 0.3492367937345161, "grad_norm": 1.3801821361346174, "learning_rate": 1.9164313676153545e-05, "loss": 0.6934195756912231, "step": 2185 }, { "epoch": 0.3493966275073923, "grad_norm": 1.421561032704299, "learning_rate": 1.9163255785588757e-05, "loss": 0.7017782330513, "step": 2186 }, { "epoch": 0.3495564612802685, "grad_norm": 1.4658196621734028, "learning_rate": 1.9162197255094722e-05, "loss": 0.7083251476287842, "step": 2187 }, { "epoch": 0.3497162950531447, "grad_norm": 1.887578608634586, "learning_rate": 1.9161138084745364e-05, "loss": 0.8458925485610962, "step": 2188 }, { "epoch": 0.3498761288260209, "grad_norm": 1.1581799986241494, "learning_rate": 1.9160078274614654e-05, "loss": 0.6457761526107788, "step": 2189 }, { "epoch": 0.35003596259889713, "grad_norm": 1.24710462557268, "learning_rate": 1.91590178247766e-05, "loss": 0.5765552520751953, "step": 2190 }, { "epoch": 0.35019579637177334, "grad_norm": 1.218281154501071, "learning_rate": 1.915795673530526e-05, "loss": 0.7291146516799927, "step": 2191 }, { "epoch": 0.35035563014464954, "grad_norm": 1.1637300789491882, "learning_rate": 1.915689500627474e-05, "loss": 0.7477453351020813, "step": 2192 }, { "epoch": 0.35051546391752575, "grad_norm": 1.7805533845619048, "learning_rate": 1.915583263775919e-05, "loss": 0.6426257491111755, "step": 2193 }, { "epoch": 0.35067529769040195, "grad_norm": 1.264667734879758, "learning_rate": 1.91547696298328e-05, "loss": 0.6602900624275208, "step": 2194 }, { "epoch": 0.3508351314632782, "grad_norm": 1.4252869812737796, "learning_rate": 1.91537059825698e-05, "loss": 0.6597764492034912, "step": 2195 }, { "epoch": 0.3509949652361544, "grad_norm": 1.4156373955586057, "learning_rate": 1.9152641696044482e-05, "loss": 0.6870718598365784, "step": 2196 }, { "epoch": 0.3511547990090306, "grad_norm": 1.2784801515713768, "learning_rate": 1.9151576770331162e-05, "loss": 0.6660459041595459, "step": 2197 }, { "epoch": 0.35131463278190683, "grad_norm": 1.3446341557975576, "learning_rate": 1.9150511205504216e-05, "loss": 0.5892510414123535, "step": 2198 }, { "epoch": 0.35147446655478304, "grad_norm": 1.1683658988193113, "learning_rate": 1.914944500163806e-05, "loss": 0.6674843430519104, "step": 2199 }, { "epoch": 0.35163430032765924, "grad_norm": 1.383326303810383, "learning_rate": 1.9148378158807156e-05, "loss": 0.6681661009788513, "step": 2200 }, { "epoch": 0.35179413410053545, "grad_norm": 1.2673405148563288, "learning_rate": 1.9147310677086e-05, "loss": 0.7103513479232788, "step": 2201 }, { "epoch": 0.35195396787341166, "grad_norm": 1.3990678608546927, "learning_rate": 1.9146242556549145e-05, "loss": 0.6854860782623291, "step": 2202 }, { "epoch": 0.35211380164628786, "grad_norm": 1.3449643813487475, "learning_rate": 1.9145173797271192e-05, "loss": 0.6426149010658264, "step": 2203 }, { "epoch": 0.35227363541916407, "grad_norm": 1.4226502410678483, "learning_rate": 1.9144104399326767e-05, "loss": 0.7394700050354004, "step": 2204 }, { "epoch": 0.35243346919204027, "grad_norm": 1.346739531757487, "learning_rate": 1.9143034362790563e-05, "loss": 0.6883282661437988, "step": 2205 }, { "epoch": 0.3525933029649165, "grad_norm": 1.2747824980514997, "learning_rate": 1.9141963687737305e-05, "loss": 0.6239020824432373, "step": 2206 }, { "epoch": 0.3527531367377927, "grad_norm": 1.195875644578937, "learning_rate": 1.914089237424176e-05, "loss": 0.6721460223197937, "step": 2207 }, { "epoch": 0.3529129705106689, "grad_norm": 1.3759212318557723, "learning_rate": 1.9139820422378753e-05, "loss": 0.7208749055862427, "step": 2208 }, { "epoch": 0.3530728042835451, "grad_norm": 1.3930086938493813, "learning_rate": 1.9138747832223137e-05, "loss": 0.6378113031387329, "step": 2209 }, { "epoch": 0.3532326380564213, "grad_norm": 1.3402919871943972, "learning_rate": 1.9137674603849826e-05, "loss": 0.6245076060295105, "step": 2210 }, { "epoch": 0.3533924718292975, "grad_norm": 1.2573745122289643, "learning_rate": 1.913660073733376e-05, "loss": 0.754725456237793, "step": 2211 }, { "epoch": 0.35355230560217377, "grad_norm": 1.5558278125313867, "learning_rate": 1.9135526232749947e-05, "loss": 0.5586130619049072, "step": 2212 }, { "epoch": 0.35371213937504997, "grad_norm": 1.5700746434427648, "learning_rate": 1.9134451090173417e-05, "loss": 0.7768895626068115, "step": 2213 }, { "epoch": 0.3538719731479262, "grad_norm": 1.2692268868516954, "learning_rate": 1.9133375309679257e-05, "loss": 0.6994771957397461, "step": 2214 }, { "epoch": 0.3540318069208024, "grad_norm": 1.4376904896721066, "learning_rate": 1.9132298891342597e-05, "loss": 0.8592725396156311, "step": 2215 }, { "epoch": 0.3541916406936786, "grad_norm": 1.4317348913916543, "learning_rate": 1.9131221835238608e-05, "loss": 0.8378867506980896, "step": 2216 }, { "epoch": 0.3543514744665548, "grad_norm": 1.2061353559963923, "learning_rate": 1.913014414144251e-05, "loss": 0.685500979423523, "step": 2217 }, { "epoch": 0.354511308239431, "grad_norm": 1.3766989732557222, "learning_rate": 1.912906581002956e-05, "loss": 0.5736105442047119, "step": 2218 }, { "epoch": 0.3546711420123072, "grad_norm": 1.2227542394429658, "learning_rate": 1.9127986841075076e-05, "loss": 0.6609224677085876, "step": 2219 }, { "epoch": 0.3548309757851834, "grad_norm": 1.3107797257915212, "learning_rate": 1.91269072346544e-05, "loss": 0.5643155574798584, "step": 2220 }, { "epoch": 0.3549908095580596, "grad_norm": 1.384293918113257, "learning_rate": 1.9125826990842923e-05, "loss": 0.7821148037910461, "step": 2221 }, { "epoch": 0.3551506433309358, "grad_norm": 1.420591441726726, "learning_rate": 1.91247461097161e-05, "loss": 0.7247564196586609, "step": 2222 }, { "epoch": 0.35531047710381203, "grad_norm": 1.4898205190878355, "learning_rate": 1.9123664591349407e-05, "loss": 0.791448712348938, "step": 2223 }, { "epoch": 0.35547031087668823, "grad_norm": 1.19900800917689, "learning_rate": 1.9122582435818377e-05, "loss": 0.578758955001831, "step": 2224 }, { "epoch": 0.35563014464956444, "grad_norm": 1.4844865789691442, "learning_rate": 1.912149964319858e-05, "loss": 0.7867324352264404, "step": 2225 }, { "epoch": 0.35578997842244064, "grad_norm": 1.3574573578496385, "learning_rate": 1.912041621356563e-05, "loss": 0.7163368463516235, "step": 2226 }, { "epoch": 0.35594981219531685, "grad_norm": 1.1650019541412304, "learning_rate": 1.9119332146995205e-05, "loss": 0.5776684284210205, "step": 2227 }, { "epoch": 0.35610964596819306, "grad_norm": 1.2676871938063698, "learning_rate": 1.9118247443562997e-05, "loss": 0.7581896781921387, "step": 2228 }, { "epoch": 0.35626947974106926, "grad_norm": 1.2719395695223767, "learning_rate": 1.911716210334477e-05, "loss": 0.6760993003845215, "step": 2229 }, { "epoch": 0.3564293135139455, "grad_norm": 1.4727175740327114, "learning_rate": 1.911607612641631e-05, "loss": 0.6401838064193726, "step": 2230 }, { "epoch": 0.35658914728682173, "grad_norm": 1.5978980138004721, "learning_rate": 1.9114989512853464e-05, "loss": 0.7380865812301636, "step": 2231 }, { "epoch": 0.35674898105969793, "grad_norm": 1.2351799078255776, "learning_rate": 1.9113902262732113e-05, "loss": 0.6600319147109985, "step": 2232 }, { "epoch": 0.35690881483257414, "grad_norm": 1.260816702204998, "learning_rate": 1.911281437612819e-05, "loss": 0.5905752778053284, "step": 2233 }, { "epoch": 0.35706864860545035, "grad_norm": 1.304109306319736, "learning_rate": 1.9111725853117673e-05, "loss": 0.8088815212249756, "step": 2234 }, { "epoch": 0.35722848237832655, "grad_norm": 1.534913263354202, "learning_rate": 1.9110636693776574e-05, "loss": 0.8027616739273071, "step": 2235 }, { "epoch": 0.35738831615120276, "grad_norm": 1.378695970274749, "learning_rate": 1.9109546898180957e-05, "loss": 0.7719030976295471, "step": 2236 }, { "epoch": 0.35754814992407896, "grad_norm": 1.2718227683695893, "learning_rate": 1.910845646640693e-05, "loss": 0.6129354238510132, "step": 2237 }, { "epoch": 0.35770798369695517, "grad_norm": 1.580701714347399, "learning_rate": 1.9107365398530645e-05, "loss": 0.587748646736145, "step": 2238 }, { "epoch": 0.3578678174698314, "grad_norm": 1.3842266436206139, "learning_rate": 1.91062736946283e-05, "loss": 0.8017829656600952, "step": 2239 }, { "epoch": 0.3580276512427076, "grad_norm": 1.1251086340556948, "learning_rate": 1.9105181354776134e-05, "loss": 0.5311495065689087, "step": 2240 }, { "epoch": 0.3581874850155838, "grad_norm": 1.374052648203255, "learning_rate": 1.910408837905043e-05, "loss": 0.7161800265312195, "step": 2241 }, { "epoch": 0.35834731878846, "grad_norm": 1.3346459670216408, "learning_rate": 1.9102994767527525e-05, "loss": 0.6798179149627686, "step": 2242 }, { "epoch": 0.3585071525613362, "grad_norm": 1.3442078593029276, "learning_rate": 1.9101900520283785e-05, "loss": 0.6485310792922974, "step": 2243 }, { "epoch": 0.3586669863342124, "grad_norm": 1.1207403823461928, "learning_rate": 1.910080563739563e-05, "loss": 0.5853524804115295, "step": 2244 }, { "epoch": 0.3588268201070886, "grad_norm": 1.2590931126532747, "learning_rate": 1.9099710118939526e-05, "loss": 0.614122748374939, "step": 2245 }, { "epoch": 0.3589866538799648, "grad_norm": 1.2828539849287879, "learning_rate": 1.9098613964991978e-05, "loss": 0.5590721368789673, "step": 2246 }, { "epoch": 0.359146487652841, "grad_norm": 1.327574512881504, "learning_rate": 1.9097517175629535e-05, "loss": 0.6087897419929504, "step": 2247 }, { "epoch": 0.3593063214257173, "grad_norm": 1.228567789889193, "learning_rate": 1.90964197509288e-05, "loss": 0.7089048027992249, "step": 2248 }, { "epoch": 0.3594661551985935, "grad_norm": 1.3724679997324547, "learning_rate": 1.909532169096641e-05, "loss": 0.6361855864524841, "step": 2249 }, { "epoch": 0.3596259889714697, "grad_norm": 1.4059674217922116, "learning_rate": 1.9094222995819042e-05, "loss": 0.7667871713638306, "step": 2250 }, { "epoch": 0.3597858227443459, "grad_norm": 1.2093138108518375, "learning_rate": 1.9093123665563434e-05, "loss": 0.7474459409713745, "step": 2251 }, { "epoch": 0.3599456565172221, "grad_norm": 1.4331999121420138, "learning_rate": 1.909202370027636e-05, "loss": 0.5469540357589722, "step": 2252 }, { "epoch": 0.3601054902900983, "grad_norm": 1.642548735136108, "learning_rate": 1.909092310003463e-05, "loss": 0.6732547283172607, "step": 2253 }, { "epoch": 0.3602653240629745, "grad_norm": 1.2908203819657267, "learning_rate": 1.9089821864915113e-05, "loss": 0.730177640914917, "step": 2254 }, { "epoch": 0.3604251578358507, "grad_norm": 1.309955698884273, "learning_rate": 1.908871999499471e-05, "loss": 0.6622527837753296, "step": 2255 }, { "epoch": 0.3605849916087269, "grad_norm": 1.4506291781988412, "learning_rate": 1.9087617490350375e-05, "loss": 0.7465305328369141, "step": 2256 }, { "epoch": 0.36074482538160313, "grad_norm": 1.439864072194541, "learning_rate": 1.9086514351059103e-05, "loss": 0.4852221608161926, "step": 2257 }, { "epoch": 0.36090465915447933, "grad_norm": 1.2995792203587009, "learning_rate": 1.9085410577197933e-05, "loss": 0.6201282739639282, "step": 2258 }, { "epoch": 0.36106449292735554, "grad_norm": 1.2739396152899671, "learning_rate": 1.908430616884395e-05, "loss": 0.5550696849822998, "step": 2259 }, { "epoch": 0.36122432670023175, "grad_norm": 1.3505453716143012, "learning_rate": 1.9083201126074278e-05, "loss": 0.7602344751358032, "step": 2260 }, { "epoch": 0.36138416047310795, "grad_norm": 1.2644819893627448, "learning_rate": 1.908209544896609e-05, "loss": 0.8537826538085938, "step": 2261 }, { "epoch": 0.36154399424598416, "grad_norm": 1.5465170304746598, "learning_rate": 1.908098913759661e-05, "loss": 0.7691806554794312, "step": 2262 }, { "epoch": 0.36170382801886036, "grad_norm": 1.3649347462699482, "learning_rate": 1.907988219204309e-05, "loss": 0.5716327428817749, "step": 2263 }, { "epoch": 0.36186366179173657, "grad_norm": 1.5446602073088853, "learning_rate": 1.907877461238284e-05, "loss": 0.747254490852356, "step": 2264 }, { "epoch": 0.36202349556461283, "grad_norm": 1.6120943925211932, "learning_rate": 1.9077666398693203e-05, "loss": 0.6439672112464905, "step": 2265 }, { "epoch": 0.36218332933748904, "grad_norm": 1.6532816323149786, "learning_rate": 1.9076557551051577e-05, "loss": 0.8232126235961914, "step": 2266 }, { "epoch": 0.36234316311036524, "grad_norm": 2.7065668339300433, "learning_rate": 1.9075448069535406e-05, "loss": 0.6724094152450562, "step": 2267 }, { "epoch": 0.36250299688324145, "grad_norm": 1.2456313452403254, "learning_rate": 1.907433795422216e-05, "loss": 0.5593839883804321, "step": 2268 }, { "epoch": 0.36266283065611765, "grad_norm": 1.2793599976783834, "learning_rate": 1.9073227205189377e-05, "loss": 0.622748851776123, "step": 2269 }, { "epoch": 0.36282266442899386, "grad_norm": 1.3179295508773496, "learning_rate": 1.907211582251462e-05, "loss": 0.646733283996582, "step": 2270 }, { "epoch": 0.36298249820187006, "grad_norm": 1.672412430444255, "learning_rate": 1.9071003806275513e-05, "loss": 0.6711282730102539, "step": 2271 }, { "epoch": 0.36314233197474627, "grad_norm": 1.6541158265420692, "learning_rate": 1.90698911565497e-05, "loss": 0.6606044769287109, "step": 2272 }, { "epoch": 0.3633021657476225, "grad_norm": 1.2153412986247627, "learning_rate": 1.90687778734149e-05, "loss": 0.5932390689849854, "step": 2273 }, { "epoch": 0.3634619995204987, "grad_norm": 1.4392526088546387, "learning_rate": 1.906766395694885e-05, "loss": 0.7151979207992554, "step": 2274 }, { "epoch": 0.3636218332933749, "grad_norm": 1.7992793966680611, "learning_rate": 1.906654940722935e-05, "loss": 0.7585502862930298, "step": 2275 }, { "epoch": 0.3637816670662511, "grad_norm": 1.4276793832880579, "learning_rate": 1.906543422433423e-05, "loss": 0.7055114507675171, "step": 2276 }, { "epoch": 0.3639415008391273, "grad_norm": 1.1711589378554341, "learning_rate": 1.9064318408341376e-05, "loss": 0.6525132656097412, "step": 2277 }, { "epoch": 0.3641013346120035, "grad_norm": 1.552141631221825, "learning_rate": 1.906320195932871e-05, "loss": 0.7379230260848999, "step": 2278 }, { "epoch": 0.3642611683848797, "grad_norm": 1.1107944056886874, "learning_rate": 1.9062084877374197e-05, "loss": 0.6288002729415894, "step": 2279 }, { "epoch": 0.3644210021577559, "grad_norm": 1.1664224115225614, "learning_rate": 1.9060967162555854e-05, "loss": 0.6604692339897156, "step": 2280 }, { "epoch": 0.3645808359306321, "grad_norm": 1.4219229883787479, "learning_rate": 1.9059848814951742e-05, "loss": 0.7200974225997925, "step": 2281 }, { "epoch": 0.3647406697035083, "grad_norm": 1.2961547383310783, "learning_rate": 1.905872983463995e-05, "loss": 0.586728572845459, "step": 2282 }, { "epoch": 0.3649005034763846, "grad_norm": 1.2392360520790637, "learning_rate": 1.9057610221698635e-05, "loss": 0.6320621371269226, "step": 2283 }, { "epoch": 0.3650603372492608, "grad_norm": 1.326368248281584, "learning_rate": 1.9056489976205986e-05, "loss": 0.5916095972061157, "step": 2284 }, { "epoch": 0.365220171022137, "grad_norm": 1.5065927144450595, "learning_rate": 1.9055369098240236e-05, "loss": 0.9199308753013611, "step": 2285 }, { "epoch": 0.3653800047950132, "grad_norm": 1.259195854333253, "learning_rate": 1.9054247587879656e-05, "loss": 0.6158570051193237, "step": 2286 }, { "epoch": 0.3655398385678894, "grad_norm": 1.3190212423573806, "learning_rate": 1.9053125445202574e-05, "loss": 0.7343533039093018, "step": 2287 }, { "epoch": 0.3656996723407656, "grad_norm": 1.4859925407975678, "learning_rate": 1.905200267028736e-05, "loss": 0.6763941049575806, "step": 2288 }, { "epoch": 0.3658595061136418, "grad_norm": 1.460462166628265, "learning_rate": 1.9050879263212422e-05, "loss": 0.807426393032074, "step": 2289 }, { "epoch": 0.366019339886518, "grad_norm": 1.214034814777802, "learning_rate": 1.904975522405621e-05, "loss": 0.7341610193252563, "step": 2290 }, { "epoch": 0.36617917365939423, "grad_norm": 1.5423240920785692, "learning_rate": 1.904863055289723e-05, "loss": 0.5988118648529053, "step": 2291 }, { "epoch": 0.36633900743227044, "grad_norm": 1.271948579811331, "learning_rate": 1.904750524981402e-05, "loss": 0.666029155254364, "step": 2292 }, { "epoch": 0.36649884120514664, "grad_norm": 1.3285773553415599, "learning_rate": 1.904637931488517e-05, "loss": 0.7065035104751587, "step": 2293 }, { "epoch": 0.36665867497802285, "grad_norm": 1.4858638611586399, "learning_rate": 1.9045252748189306e-05, "loss": 0.6691937446594238, "step": 2294 }, { "epoch": 0.36681850875089905, "grad_norm": 1.4457590391504243, "learning_rate": 1.9044125549805113e-05, "loss": 0.6454895734786987, "step": 2295 }, { "epoch": 0.36697834252377526, "grad_norm": 1.6188200897394212, "learning_rate": 1.90429977198113e-05, "loss": 0.6886719465255737, "step": 2296 }, { "epoch": 0.36713817629665146, "grad_norm": 1.350054639404368, "learning_rate": 1.9041869258286637e-05, "loss": 0.6574082374572754, "step": 2297 }, { "epoch": 0.36729801006952767, "grad_norm": 1.6920383496681304, "learning_rate": 1.9040740165309933e-05, "loss": 0.7125925421714783, "step": 2298 }, { "epoch": 0.3674578438424039, "grad_norm": 1.523060219310861, "learning_rate": 1.9039610440960037e-05, "loss": 0.7222104072570801, "step": 2299 }, { "epoch": 0.36761767761528014, "grad_norm": 1.313242155448583, "learning_rate": 1.9038480085315845e-05, "loss": 0.6746781468391418, "step": 2300 }, { "epoch": 0.36777751138815634, "grad_norm": 1.2337856767418733, "learning_rate": 1.9037349098456296e-05, "loss": 0.6049522161483765, "step": 2301 }, { "epoch": 0.36793734516103255, "grad_norm": 1.4432965348092481, "learning_rate": 1.9036217480460377e-05, "loss": 0.8619695901870728, "step": 2302 }, { "epoch": 0.36809717893390875, "grad_norm": 1.5256818579641003, "learning_rate": 1.9035085231407113e-05, "loss": 0.6794500350952148, "step": 2303 }, { "epoch": 0.36825701270678496, "grad_norm": 1.3483048496990993, "learning_rate": 1.903395235137558e-05, "loss": 0.7094830274581909, "step": 2304 }, { "epoch": 0.36841684647966116, "grad_norm": 1.2774943411859414, "learning_rate": 1.9032818840444892e-05, "loss": 0.6534925103187561, "step": 2305 }, { "epoch": 0.36857668025253737, "grad_norm": 1.223948616802673, "learning_rate": 1.9031684698694207e-05, "loss": 0.7298251986503601, "step": 2306 }, { "epoch": 0.3687365140254136, "grad_norm": 1.7948758449030071, "learning_rate": 1.9030549926202732e-05, "loss": 0.6548950672149658, "step": 2307 }, { "epoch": 0.3688963477982898, "grad_norm": 3.228412216904126, "learning_rate": 1.902941452304972e-05, "loss": 0.6911270022392273, "step": 2308 }, { "epoch": 0.369056181571166, "grad_norm": 1.3900962319496726, "learning_rate": 1.9028278489314456e-05, "loss": 0.8524800539016724, "step": 2309 }, { "epoch": 0.3692160153440422, "grad_norm": 1.3927992968621128, "learning_rate": 1.902714182507628e-05, "loss": 0.7086869478225708, "step": 2310 }, { "epoch": 0.3693758491169184, "grad_norm": 1.2629465189191378, "learning_rate": 1.902600453041457e-05, "loss": 0.6852195262908936, "step": 2311 }, { "epoch": 0.3695356828897946, "grad_norm": 1.2264572002060323, "learning_rate": 1.902486660540875e-05, "loss": 0.5391331911087036, "step": 2312 }, { "epoch": 0.3696955166626708, "grad_norm": 1.3240184598540954, "learning_rate": 1.9023728050138298e-05, "loss": 0.6464699506759644, "step": 2313 }, { "epoch": 0.369855350435547, "grad_norm": 1.2864306108155983, "learning_rate": 1.902258886468272e-05, "loss": 0.6460436582565308, "step": 2314 }, { "epoch": 0.3700151842084232, "grad_norm": 1.62278914143426, "learning_rate": 1.9021449049121566e-05, "loss": 0.7671197652816772, "step": 2315 }, { "epoch": 0.3701750179812994, "grad_norm": 1.3410920404091276, "learning_rate": 1.902030860353445e-05, "loss": 0.7515243291854858, "step": 2316 }, { "epoch": 0.37033485175417563, "grad_norm": 1.3536717001962868, "learning_rate": 1.901916752800101e-05, "loss": 0.8246750235557556, "step": 2317 }, { "epoch": 0.3704946855270519, "grad_norm": 1.3231588541265555, "learning_rate": 1.901802582260093e-05, "loss": 0.6614014506340027, "step": 2318 }, { "epoch": 0.3706545192999281, "grad_norm": 1.4778249815411955, "learning_rate": 1.901688348741395e-05, "loss": 0.7345165610313416, "step": 2319 }, { "epoch": 0.3708143530728043, "grad_norm": 1.278169336349388, "learning_rate": 1.9015740522519844e-05, "loss": 0.6189603209495544, "step": 2320 }, { "epoch": 0.3709741868456805, "grad_norm": 1.256002248074907, "learning_rate": 1.9014596927998433e-05, "loss": 0.6829824447631836, "step": 2321 }, { "epoch": 0.3711340206185567, "grad_norm": 1.2298746918679304, "learning_rate": 1.901345270392958e-05, "loss": 0.6096198558807373, "step": 2322 }, { "epoch": 0.3712938543914329, "grad_norm": 1.2828034103701864, "learning_rate": 1.9012307850393198e-05, "loss": 0.5410192608833313, "step": 2323 }, { "epoch": 0.3714536881643091, "grad_norm": 1.2334833978918194, "learning_rate": 1.901116236746923e-05, "loss": 0.5808621644973755, "step": 2324 }, { "epoch": 0.37161352193718533, "grad_norm": 3.2969480871757146, "learning_rate": 1.9010016255237685e-05, "loss": 0.7163689732551575, "step": 2325 }, { "epoch": 0.37177335571006154, "grad_norm": 1.3062994086037782, "learning_rate": 1.9008869513778597e-05, "loss": 0.6753708720207214, "step": 2326 }, { "epoch": 0.37193318948293774, "grad_norm": 1.3756202018664792, "learning_rate": 1.9007722143172046e-05, "loss": 0.5696530938148499, "step": 2327 }, { "epoch": 0.37209302325581395, "grad_norm": 1.4176155131444528, "learning_rate": 1.9006574143498167e-05, "loss": 0.6965281367301941, "step": 2328 }, { "epoch": 0.37225285702869015, "grad_norm": 1.3069644113810464, "learning_rate": 1.900542551483713e-05, "loss": 0.5799552202224731, "step": 2329 }, { "epoch": 0.37241269080156636, "grad_norm": 1.599910363344181, "learning_rate": 1.900427625726915e-05, "loss": 0.8022103309631348, "step": 2330 }, { "epoch": 0.37257252457444257, "grad_norm": 1.2995578889222923, "learning_rate": 1.9003126370874493e-05, "loss": 0.689765214920044, "step": 2331 }, { "epoch": 0.37273235834731877, "grad_norm": 1.5711379979562683, "learning_rate": 1.900197585573345e-05, "loss": 0.6918299198150635, "step": 2332 }, { "epoch": 0.372892192120195, "grad_norm": 1.2621539059938778, "learning_rate": 1.9000824711926383e-05, "loss": 0.6494290828704834, "step": 2333 }, { "epoch": 0.3730520258930712, "grad_norm": 1.4429397930271113, "learning_rate": 1.899967293953368e-05, "loss": 0.6864122152328491, "step": 2334 }, { "epoch": 0.3732118596659474, "grad_norm": 1.9323133139588526, "learning_rate": 1.899852053863577e-05, "loss": 0.693235456943512, "step": 2335 }, { "epoch": 0.37337169343882365, "grad_norm": 1.5659788893702264, "learning_rate": 1.8997367509313137e-05, "loss": 0.7093889117240906, "step": 2336 }, { "epoch": 0.37353152721169985, "grad_norm": 1.4035134126463373, "learning_rate": 1.8996213851646304e-05, "loss": 0.6221547722816467, "step": 2337 }, { "epoch": 0.37369136098457606, "grad_norm": 1.2420100512859678, "learning_rate": 1.8995059565715838e-05, "loss": 0.6674536466598511, "step": 2338 }, { "epoch": 0.37385119475745227, "grad_norm": 1.5320473270147095, "learning_rate": 1.8993904651602357e-05, "loss": 0.7583402395248413, "step": 2339 }, { "epoch": 0.37401102853032847, "grad_norm": 1.3453118547206937, "learning_rate": 1.8992749109386505e-05, "loss": 0.6328785419464111, "step": 2340 }, { "epoch": 0.3741708623032047, "grad_norm": 1.6963619752380146, "learning_rate": 1.899159293914899e-05, "loss": 0.7282474637031555, "step": 2341 }, { "epoch": 0.3743306960760809, "grad_norm": 1.3527506693653346, "learning_rate": 1.899043614097055e-05, "loss": 0.7564928531646729, "step": 2342 }, { "epoch": 0.3744905298489571, "grad_norm": 1.483964839472238, "learning_rate": 1.898927871493197e-05, "loss": 0.7425534725189209, "step": 2343 }, { "epoch": 0.3746503636218333, "grad_norm": 1.381794742742492, "learning_rate": 1.8988120661114088e-05, "loss": 0.6950216293334961, "step": 2344 }, { "epoch": 0.3748101973947095, "grad_norm": 1.3990237199874151, "learning_rate": 1.898696197959777e-05, "loss": 0.634365439414978, "step": 2345 }, { "epoch": 0.3749700311675857, "grad_norm": 1.6914838119358317, "learning_rate": 1.898580267046394e-05, "loss": 0.7068453431129456, "step": 2346 }, { "epoch": 0.3751298649404619, "grad_norm": 1.502564640798543, "learning_rate": 1.8984642733793556e-05, "loss": 0.7195779085159302, "step": 2347 }, { "epoch": 0.3752896987133381, "grad_norm": 1.1889894441615152, "learning_rate": 1.8983482169667628e-05, "loss": 0.5610717535018921, "step": 2348 }, { "epoch": 0.3754495324862143, "grad_norm": 1.3584700722664569, "learning_rate": 1.8982320978167204e-05, "loss": 0.7410106658935547, "step": 2349 }, { "epoch": 0.3756093662590905, "grad_norm": 1.3868833357221841, "learning_rate": 1.8981159159373374e-05, "loss": 0.7751725912094116, "step": 2350 }, { "epoch": 0.37576920003196673, "grad_norm": 1.3178797586715836, "learning_rate": 1.897999671336728e-05, "loss": 0.7639886140823364, "step": 2351 }, { "epoch": 0.37592903380484294, "grad_norm": 1.2803900634248926, "learning_rate": 1.89788336402301e-05, "loss": 0.6325713396072388, "step": 2352 }, { "epoch": 0.3760888675777192, "grad_norm": 1.2604974276581102, "learning_rate": 1.8977669940043058e-05, "loss": 0.6082665920257568, "step": 2353 }, { "epoch": 0.3762487013505954, "grad_norm": 1.3161545930075578, "learning_rate": 1.8976505612887434e-05, "loss": 0.6989084482192993, "step": 2354 }, { "epoch": 0.3764085351234716, "grad_norm": 1.1767925141075906, "learning_rate": 1.8975340658844524e-05, "loss": 0.7501645088195801, "step": 2355 }, { "epoch": 0.3765683688963478, "grad_norm": 1.4035739838949481, "learning_rate": 1.8974175077995692e-05, "loss": 0.6870884895324707, "step": 2356 }, { "epoch": 0.376728202669224, "grad_norm": 1.3233314327513521, "learning_rate": 1.8973008870422337e-05, "loss": 0.7261620759963989, "step": 2357 }, { "epoch": 0.3768880364421002, "grad_norm": 1.290902936583235, "learning_rate": 1.8971842036205907e-05, "loss": 0.5015931129455566, "step": 2358 }, { "epoch": 0.37704787021497643, "grad_norm": 1.2187828629771997, "learning_rate": 1.8970674575427886e-05, "loss": 0.7966753244400024, "step": 2359 }, { "epoch": 0.37720770398785264, "grad_norm": 1.2818737313560526, "learning_rate": 1.8969506488169802e-05, "loss": 0.5779162049293518, "step": 2360 }, { "epoch": 0.37736753776072884, "grad_norm": 1.1574690300289725, "learning_rate": 1.8968337774513236e-05, "loss": 0.62501460313797, "step": 2361 }, { "epoch": 0.37752737153360505, "grad_norm": 1.3200253636819586, "learning_rate": 1.8967168434539804e-05, "loss": 0.7241790294647217, "step": 2362 }, { "epoch": 0.37768720530648126, "grad_norm": 1.2999417529447475, "learning_rate": 1.8965998468331166e-05, "loss": 0.8491934537887573, "step": 2363 }, { "epoch": 0.37784703907935746, "grad_norm": 1.3851147256516887, "learning_rate": 1.896482787596903e-05, "loss": 0.7283545732498169, "step": 2364 }, { "epoch": 0.37800687285223367, "grad_norm": 1.352422627546802, "learning_rate": 1.8963656657535155e-05, "loss": 0.5698156356811523, "step": 2365 }, { "epoch": 0.37816670662510987, "grad_norm": 1.4069069526330233, "learning_rate": 1.896248481311132e-05, "loss": 0.7129380106925964, "step": 2366 }, { "epoch": 0.3783265403979861, "grad_norm": 1.248041322019358, "learning_rate": 1.8961312342779374e-05, "loss": 0.6312893629074097, "step": 2367 }, { "epoch": 0.3784863741708623, "grad_norm": 1.4601544165882803, "learning_rate": 1.896013924662119e-05, "loss": 0.5489940643310547, "step": 2368 }, { "epoch": 0.3786462079437385, "grad_norm": 1.4090385200292612, "learning_rate": 1.8958965524718696e-05, "loss": 0.7013911008834839, "step": 2369 }, { "epoch": 0.3788060417166147, "grad_norm": 1.4547076936070056, "learning_rate": 1.895779117715386e-05, "loss": 0.5934585928916931, "step": 2370 }, { "epoch": 0.37896587548949096, "grad_norm": 1.4557801897831244, "learning_rate": 1.89566162040087e-05, "loss": 0.8138035535812378, "step": 2371 }, { "epoch": 0.37912570926236716, "grad_norm": 1.2703914837710706, "learning_rate": 1.8955440605365263e-05, "loss": 0.5913794040679932, "step": 2372 }, { "epoch": 0.37928554303524337, "grad_norm": 1.18579969261621, "learning_rate": 1.8954264381305655e-05, "loss": 0.7592180967330933, "step": 2373 }, { "epoch": 0.37944537680811957, "grad_norm": 1.6783905641181744, "learning_rate": 1.8953087531912016e-05, "loss": 0.5866315960884094, "step": 2374 }, { "epoch": 0.3796052105809958, "grad_norm": 1.4249177496270335, "learning_rate": 1.895191005726653e-05, "loss": 0.7475906610488892, "step": 2375 }, { "epoch": 0.379765044353872, "grad_norm": 1.3576750680824172, "learning_rate": 1.8950731957451437e-05, "loss": 0.7017526030540466, "step": 2376 }, { "epoch": 0.3799248781267482, "grad_norm": 1.9558334067569259, "learning_rate": 1.8949553232549007e-05, "loss": 0.8012783527374268, "step": 2377 }, { "epoch": 0.3800847118996244, "grad_norm": 1.382800071057167, "learning_rate": 1.894837388264155e-05, "loss": 0.6443845629692078, "step": 2378 }, { "epoch": 0.3802445456725006, "grad_norm": 1.223616698688841, "learning_rate": 1.8947193907811442e-05, "loss": 0.5673719644546509, "step": 2379 }, { "epoch": 0.3804043794453768, "grad_norm": 1.67825651407276, "learning_rate": 1.8946013308141076e-05, "loss": 0.6483337879180908, "step": 2380 }, { "epoch": 0.380564213218253, "grad_norm": 1.7300254436504379, "learning_rate": 1.894483208371291e-05, "loss": 0.7897880673408508, "step": 2381 }, { "epoch": 0.3807240469911292, "grad_norm": 1.285599493787591, "learning_rate": 1.8943650234609427e-05, "loss": 0.7265780568122864, "step": 2382 }, { "epoch": 0.3808838807640054, "grad_norm": 1.3715999168174309, "learning_rate": 1.8942467760913173e-05, "loss": 0.6617293357849121, "step": 2383 }, { "epoch": 0.38104371453688163, "grad_norm": 1.394571537310316, "learning_rate": 1.894128466270672e-05, "loss": 0.7675162553787231, "step": 2384 }, { "epoch": 0.38120354830975783, "grad_norm": 1.4957953662767156, "learning_rate": 1.8940100940072698e-05, "loss": 0.7235264778137207, "step": 2385 }, { "epoch": 0.38136338208263404, "grad_norm": 1.562495764499615, "learning_rate": 1.8938916593093767e-05, "loss": 0.7154811024665833, "step": 2386 }, { "epoch": 0.38152321585551024, "grad_norm": 1.2920807827886578, "learning_rate": 1.893773162185264e-05, "loss": 0.6837974786758423, "step": 2387 }, { "epoch": 0.38168304962838645, "grad_norm": 1.2758272184147008, "learning_rate": 1.8936546026432076e-05, "loss": 0.6335198283195496, "step": 2388 }, { "epoch": 0.3818428834012627, "grad_norm": 1.431022837899817, "learning_rate": 1.8935359806914868e-05, "loss": 0.6070530414581299, "step": 2389 }, { "epoch": 0.3820027171741389, "grad_norm": 1.424797139688011, "learning_rate": 1.893417296338386e-05, "loss": 0.6973010301589966, "step": 2390 }, { "epoch": 0.3821625509470151, "grad_norm": 1.4308402122691664, "learning_rate": 1.8932985495921933e-05, "loss": 0.6314265727996826, "step": 2391 }, { "epoch": 0.38232238471989133, "grad_norm": 1.3929927003573948, "learning_rate": 1.893179740461202e-05, "loss": 0.7634855508804321, "step": 2392 }, { "epoch": 0.38248221849276753, "grad_norm": 1.38768574080021, "learning_rate": 1.8930608689537084e-05, "loss": 0.7030370831489563, "step": 2393 }, { "epoch": 0.38264205226564374, "grad_norm": 1.4677723770054645, "learning_rate": 1.8929419350780153e-05, "loss": 0.7541444301605225, "step": 2394 }, { "epoch": 0.38280188603851995, "grad_norm": 1.3074019751935728, "learning_rate": 1.892822938842428e-05, "loss": 0.7498692274093628, "step": 2395 }, { "epoch": 0.38296171981139615, "grad_norm": 1.4415756987709827, "learning_rate": 1.892703880255257e-05, "loss": 0.7347438335418701, "step": 2396 }, { "epoch": 0.38312155358427236, "grad_norm": 1.3969526779605301, "learning_rate": 1.8925847593248167e-05, "loss": 0.6433572173118591, "step": 2397 }, { "epoch": 0.38328138735714856, "grad_norm": 1.5010834691441521, "learning_rate": 1.892465576059426e-05, "loss": 0.7101916074752808, "step": 2398 }, { "epoch": 0.38344122113002477, "grad_norm": 1.5677994691997013, "learning_rate": 1.8923463304674087e-05, "loss": 0.6105614304542542, "step": 2399 }, { "epoch": 0.383601054902901, "grad_norm": 1.15537500517134, "learning_rate": 1.892227022557092e-05, "loss": 0.6372073888778687, "step": 2400 }, { "epoch": 0.3837608886757772, "grad_norm": 1.5180144436805976, "learning_rate": 1.892107652336808e-05, "loss": 0.6248462200164795, "step": 2401 }, { "epoch": 0.3839207224486534, "grad_norm": 1.3136596203634388, "learning_rate": 1.8919882198148937e-05, "loss": 0.8565120697021484, "step": 2402 }, { "epoch": 0.3840805562215296, "grad_norm": 1.3900865382180196, "learning_rate": 1.891868724999689e-05, "loss": 0.6853058338165283, "step": 2403 }, { "epoch": 0.3842403899944058, "grad_norm": 1.1085300104638243, "learning_rate": 1.8917491678995392e-05, "loss": 0.6835023164749146, "step": 2404 }, { "epoch": 0.384400223767282, "grad_norm": 1.330006915840754, "learning_rate": 1.891629548522794e-05, "loss": 0.6986216902732849, "step": 2405 }, { "epoch": 0.38456005754015826, "grad_norm": 1.426777480706275, "learning_rate": 1.891509866877807e-05, "loss": 0.6399407386779785, "step": 2406 }, { "epoch": 0.38471989131303447, "grad_norm": 1.5441731197947823, "learning_rate": 1.8913901229729367e-05, "loss": 0.5537971258163452, "step": 2407 }, { "epoch": 0.3848797250859107, "grad_norm": 1.2869435251360033, "learning_rate": 1.891270316816545e-05, "loss": 0.5446213483810425, "step": 2408 }, { "epoch": 0.3850395588587869, "grad_norm": 1.218053794045791, "learning_rate": 1.8911504484169997e-05, "loss": 0.6251507997512817, "step": 2409 }, { "epoch": 0.3851993926316631, "grad_norm": 1.2639202665875668, "learning_rate": 1.8910305177826708e-05, "loss": 0.6549391746520996, "step": 2410 }, { "epoch": 0.3853592264045393, "grad_norm": 1.4078632457534084, "learning_rate": 1.8909105249219345e-05, "loss": 0.6212690472602844, "step": 2411 }, { "epoch": 0.3855190601774155, "grad_norm": 1.115953752155601, "learning_rate": 1.8907904698431706e-05, "loss": 0.6680158376693726, "step": 2412 }, { "epoch": 0.3856788939502917, "grad_norm": 1.3508385491517176, "learning_rate": 1.890670352554763e-05, "loss": 0.5676329731941223, "step": 2413 }, { "epoch": 0.3858387277231679, "grad_norm": 2.546867652346029, "learning_rate": 1.8905501730651006e-05, "loss": 0.6895579099655151, "step": 2414 }, { "epoch": 0.3859985614960441, "grad_norm": 1.17541794693443, "learning_rate": 1.8904299313825766e-05, "loss": 0.551306962966919, "step": 2415 }, { "epoch": 0.3861583952689203, "grad_norm": 1.4962097801461163, "learning_rate": 1.890309627515588e-05, "loss": 0.7870636582374573, "step": 2416 }, { "epoch": 0.3863182290417965, "grad_norm": 1.1106154209169936, "learning_rate": 1.8901892614725357e-05, "loss": 0.5952318906784058, "step": 2417 }, { "epoch": 0.38647806281467273, "grad_norm": 1.5205720571595995, "learning_rate": 1.8900688332618265e-05, "loss": 0.6260591745376587, "step": 2418 }, { "epoch": 0.38663789658754893, "grad_norm": 1.1714487408895857, "learning_rate": 1.8899483428918705e-05, "loss": 0.6012989282608032, "step": 2419 }, { "epoch": 0.38679773036042514, "grad_norm": 1.0565386399487362, "learning_rate": 1.889827790371082e-05, "loss": 0.5123088955879211, "step": 2420 }, { "epoch": 0.38695756413330135, "grad_norm": 1.6782002709894803, "learning_rate": 1.8897071757078804e-05, "loss": 0.789835512638092, "step": 2421 }, { "epoch": 0.38711739790617755, "grad_norm": 1.3146969112467868, "learning_rate": 1.8895864989106886e-05, "loss": 0.7375814914703369, "step": 2422 }, { "epoch": 0.38727723167905376, "grad_norm": 1.2327318746934945, "learning_rate": 1.8894657599879344e-05, "loss": 0.5609710216522217, "step": 2423 }, { "epoch": 0.38743706545193, "grad_norm": 1.4340685560450874, "learning_rate": 1.88934495894805e-05, "loss": 0.8033417463302612, "step": 2424 }, { "epoch": 0.3875968992248062, "grad_norm": 1.5456315397679836, "learning_rate": 1.8892240957994715e-05, "loss": 0.7824690341949463, "step": 2425 }, { "epoch": 0.38775673299768243, "grad_norm": 1.6007971250969986, "learning_rate": 1.8891031705506394e-05, "loss": 0.8706427812576294, "step": 2426 }, { "epoch": 0.38791656677055864, "grad_norm": 1.4153547730799647, "learning_rate": 1.8889821832099988e-05, "loss": 0.6542589664459229, "step": 2427 }, { "epoch": 0.38807640054343484, "grad_norm": 1.2662618438069952, "learning_rate": 1.8888611337859993e-05, "loss": 0.5297260284423828, "step": 2428 }, { "epoch": 0.38823623431631105, "grad_norm": 1.235952052809652, "learning_rate": 1.8887400222870945e-05, "loss": 0.6769542694091797, "step": 2429 }, { "epoch": 0.38839606808918725, "grad_norm": 1.5051834497690855, "learning_rate": 1.888618848721742e-05, "loss": 0.7321336269378662, "step": 2430 }, { "epoch": 0.38855590186206346, "grad_norm": 1.312052692919082, "learning_rate": 1.8884976130984044e-05, "loss": 0.7581310272216797, "step": 2431 }, { "epoch": 0.38871573563493966, "grad_norm": 1.208909870939084, "learning_rate": 1.8883763154255483e-05, "loss": 0.6914512515068054, "step": 2432 }, { "epoch": 0.38887556940781587, "grad_norm": 1.242205892671285, "learning_rate": 1.888254955711645e-05, "loss": 0.6383819580078125, "step": 2433 }, { "epoch": 0.3890354031806921, "grad_norm": 1.1855285053985036, "learning_rate": 1.888133533965169e-05, "loss": 0.7127296924591064, "step": 2434 }, { "epoch": 0.3891952369535683, "grad_norm": 1.454027498495591, "learning_rate": 1.8880120501946013e-05, "loss": 0.7782776355743408, "step": 2435 }, { "epoch": 0.3893550707264445, "grad_norm": 1.2038532133937743, "learning_rate": 1.8878905044084247e-05, "loss": 0.6778348684310913, "step": 2436 }, { "epoch": 0.3895149044993207, "grad_norm": 1.2358586662348932, "learning_rate": 1.8877688966151276e-05, "loss": 0.678850531578064, "step": 2437 }, { "epoch": 0.3896747382721969, "grad_norm": 1.2310882404958317, "learning_rate": 1.8876472268232036e-05, "loss": 0.6906700134277344, "step": 2438 }, { "epoch": 0.3898345720450731, "grad_norm": 1.3910758762096997, "learning_rate": 1.887525495041149e-05, "loss": 0.5561615228652954, "step": 2439 }, { "epoch": 0.3899944058179493, "grad_norm": 1.262886108466452, "learning_rate": 1.887403701277465e-05, "loss": 0.5631967782974243, "step": 2440 }, { "epoch": 0.39015423959082557, "grad_norm": 1.370657963278164, "learning_rate": 1.8872818455406575e-05, "loss": 0.6397109031677246, "step": 2441 }, { "epoch": 0.3903140733637018, "grad_norm": 1.3407764057167944, "learning_rate": 1.887159927839237e-05, "loss": 0.5128443837165833, "step": 2442 }, { "epoch": 0.390473907136578, "grad_norm": 1.2077634064677218, "learning_rate": 1.8870379481817163e-05, "loss": 0.6194829940795898, "step": 2443 }, { "epoch": 0.3906337409094542, "grad_norm": 1.6070536718391837, "learning_rate": 1.8869159065766155e-05, "loss": 0.6471958160400391, "step": 2444 }, { "epoch": 0.3907935746823304, "grad_norm": 1.4278804748910605, "learning_rate": 1.886793803032457e-05, "loss": 0.6264729499816895, "step": 2445 }, { "epoch": 0.3909534084552066, "grad_norm": 1.490587514151806, "learning_rate": 1.886671637557768e-05, "loss": 0.750165581703186, "step": 2446 }, { "epoch": 0.3911132422280828, "grad_norm": 1.485257823293616, "learning_rate": 1.88654941016108e-05, "loss": 0.6951596736907959, "step": 2447 }, { "epoch": 0.391273076000959, "grad_norm": 1.783302191960869, "learning_rate": 1.8864271208509292e-05, "loss": 0.6799538135528564, "step": 2448 }, { "epoch": 0.3914329097738352, "grad_norm": 1.4058380287905372, "learning_rate": 1.886304769635856e-05, "loss": 0.6947135925292969, "step": 2449 }, { "epoch": 0.3915927435467114, "grad_norm": 1.2990860837261917, "learning_rate": 1.8861823565244045e-05, "loss": 0.7395917177200317, "step": 2450 }, { "epoch": 0.3917525773195876, "grad_norm": 1.343366826772679, "learning_rate": 1.8860598815251243e-05, "loss": 0.6555167436599731, "step": 2451 }, { "epoch": 0.39191241109246383, "grad_norm": 1.564948229368321, "learning_rate": 1.8859373446465677e-05, "loss": 0.701756477355957, "step": 2452 }, { "epoch": 0.39207224486534004, "grad_norm": 1.349252125770698, "learning_rate": 1.8858147458972927e-05, "loss": 0.6478126049041748, "step": 2453 }, { "epoch": 0.39223207863821624, "grad_norm": 1.4308308764274074, "learning_rate": 1.885692085285861e-05, "loss": 0.6520286798477173, "step": 2454 }, { "epoch": 0.39239191241109245, "grad_norm": 1.1443917442791585, "learning_rate": 1.8855693628208393e-05, "loss": 0.6852548122406006, "step": 2455 }, { "epoch": 0.39255174618396865, "grad_norm": 1.4557037870487255, "learning_rate": 1.8854465785107976e-05, "loss": 0.5840084552764893, "step": 2456 }, { "epoch": 0.39271157995684486, "grad_norm": 3.089160152368177, "learning_rate": 1.885323732364311e-05, "loss": 0.6286720037460327, "step": 2457 }, { "epoch": 0.39287141372972106, "grad_norm": 1.186794600481349, "learning_rate": 1.8852008243899583e-05, "loss": 0.7039046287536621, "step": 2458 }, { "epoch": 0.3930312475025973, "grad_norm": 1.2323814720520614, "learning_rate": 1.8850778545963237e-05, "loss": 0.6885911226272583, "step": 2459 }, { "epoch": 0.39319108127547353, "grad_norm": 1.4664472568682543, "learning_rate": 1.884954822991994e-05, "loss": 0.7402110695838928, "step": 2460 }, { "epoch": 0.39335091504834974, "grad_norm": 1.256666410825022, "learning_rate": 1.8848317295855615e-05, "loss": 0.7018570899963379, "step": 2461 }, { "epoch": 0.39351074882122594, "grad_norm": 1.4458892926328117, "learning_rate": 1.884708574385623e-05, "loss": 0.6412159204483032, "step": 2462 }, { "epoch": 0.39367058259410215, "grad_norm": 1.2834927062628052, "learning_rate": 1.884585357400779e-05, "loss": 0.6426022052764893, "step": 2463 }, { "epoch": 0.39383041636697835, "grad_norm": 1.3316072959436314, "learning_rate": 1.884462078639635e-05, "loss": 0.6236803531646729, "step": 2464 }, { "epoch": 0.39399025013985456, "grad_norm": 1.2419514932693512, "learning_rate": 1.8843387381107995e-05, "loss": 0.635899543762207, "step": 2465 }, { "epoch": 0.39415008391273076, "grad_norm": 1.2413743199989606, "learning_rate": 1.8842153358228867e-05, "loss": 0.7009532451629639, "step": 2466 }, { "epoch": 0.39430991768560697, "grad_norm": 1.2678072240263927, "learning_rate": 1.8840918717845146e-05, "loss": 0.7032531499862671, "step": 2467 }, { "epoch": 0.3944697514584832, "grad_norm": 1.423848589352717, "learning_rate": 1.8839683460043054e-05, "loss": 0.7599601745605469, "step": 2468 }, { "epoch": 0.3946295852313594, "grad_norm": 1.163266893868482, "learning_rate": 1.883844758490885e-05, "loss": 0.6869351863861084, "step": 2469 }, { "epoch": 0.3947894190042356, "grad_norm": 1.2502097119175013, "learning_rate": 1.883721109252886e-05, "loss": 0.6392126083374023, "step": 2470 }, { "epoch": 0.3949492527771118, "grad_norm": 1.5516747083705993, "learning_rate": 1.8835973982989423e-05, "loss": 0.7324562072753906, "step": 2471 }, { "epoch": 0.395109086549988, "grad_norm": 1.1500754628846697, "learning_rate": 1.8834736256376934e-05, "loss": 0.6511144638061523, "step": 2472 }, { "epoch": 0.3952689203228642, "grad_norm": 1.4162149032357312, "learning_rate": 1.883349791277784e-05, "loss": 0.5472415685653687, "step": 2473 }, { "epoch": 0.3954287540957404, "grad_norm": 1.4639847222218327, "learning_rate": 1.883225895227861e-05, "loss": 0.6889536380767822, "step": 2474 }, { "epoch": 0.3955885878686166, "grad_norm": 1.3388780708753942, "learning_rate": 1.8831019374965784e-05, "loss": 0.6248984932899475, "step": 2475 }, { "epoch": 0.3957484216414928, "grad_norm": 1.6225472068918543, "learning_rate": 1.8829779180925915e-05, "loss": 0.6368595361709595, "step": 2476 }, { "epoch": 0.3959082554143691, "grad_norm": 1.1828412367737784, "learning_rate": 1.8828538370245625e-05, "loss": 0.7078070044517517, "step": 2477 }, { "epoch": 0.3960680891872453, "grad_norm": 1.410940356009364, "learning_rate": 1.8827296943011563e-05, "loss": 0.7656192779541016, "step": 2478 }, { "epoch": 0.3962279229601215, "grad_norm": 1.336777553865056, "learning_rate": 1.882605489931042e-05, "loss": 0.7151859998703003, "step": 2479 }, { "epoch": 0.3963877567329977, "grad_norm": 1.4466173030657343, "learning_rate": 1.8824812239228948e-05, "loss": 0.6537841558456421, "step": 2480 }, { "epoch": 0.3965475905058739, "grad_norm": 1.337421238877777, "learning_rate": 1.8823568962853925e-05, "loss": 0.6705565452575684, "step": 2481 }, { "epoch": 0.3967074242787501, "grad_norm": 1.1839628083304454, "learning_rate": 1.8822325070272174e-05, "loss": 0.6165805459022522, "step": 2482 }, { "epoch": 0.3968672580516263, "grad_norm": 1.4773074500330152, "learning_rate": 1.8821080561570562e-05, "loss": 0.5927593111991882, "step": 2483 }, { "epoch": 0.3970270918245025, "grad_norm": 1.4470981687161824, "learning_rate": 1.881983543683601e-05, "loss": 0.678826093673706, "step": 2484 }, { "epoch": 0.3971869255973787, "grad_norm": 1.3349284130811303, "learning_rate": 1.881858969615547e-05, "loss": 0.6547293663024902, "step": 2485 }, { "epoch": 0.39734675937025493, "grad_norm": 1.5820448190798875, "learning_rate": 1.8817343339615934e-05, "loss": 0.7982410192489624, "step": 2486 }, { "epoch": 0.39750659314313114, "grad_norm": 1.3346120859396406, "learning_rate": 1.8816096367304447e-05, "loss": 0.7250164747238159, "step": 2487 }, { "epoch": 0.39766642691600734, "grad_norm": 1.2517533518593795, "learning_rate": 1.8814848779308095e-05, "loss": 0.5848666429519653, "step": 2488 }, { "epoch": 0.39782626068888355, "grad_norm": 1.3466996535557765, "learning_rate": 1.8813600575714005e-05, "loss": 0.6787430047988892, "step": 2489 }, { "epoch": 0.39798609446175975, "grad_norm": 1.5237265244953173, "learning_rate": 1.8812351756609347e-05, "loss": 0.6465681195259094, "step": 2490 }, { "epoch": 0.39814592823463596, "grad_norm": 1.2748072238558674, "learning_rate": 1.881110232208133e-05, "loss": 0.5713223218917847, "step": 2491 }, { "epoch": 0.39830576200751217, "grad_norm": 1.2717433172481603, "learning_rate": 1.8809852272217212e-05, "loss": 0.6044856905937195, "step": 2492 }, { "epoch": 0.39846559578038837, "grad_norm": 1.4687071590576593, "learning_rate": 1.8808601607104293e-05, "loss": 0.7384705543518066, "step": 2493 }, { "epoch": 0.39862542955326463, "grad_norm": 1.5037189573727878, "learning_rate": 1.8807350326829918e-05, "loss": 0.7671334147453308, "step": 2494 }, { "epoch": 0.39878526332614084, "grad_norm": 1.2841000981991393, "learning_rate": 1.8806098431481467e-05, "loss": 0.5499131083488464, "step": 2495 }, { "epoch": 0.39894509709901704, "grad_norm": 1.2764653199771236, "learning_rate": 1.880484592114637e-05, "loss": 0.5303882360458374, "step": 2496 }, { "epoch": 0.39910493087189325, "grad_norm": 1.3573826692803093, "learning_rate": 1.88035927959121e-05, "loss": 0.6949981451034546, "step": 2497 }, { "epoch": 0.39926476464476945, "grad_norm": 1.2261121253026488, "learning_rate": 1.8802339055866164e-05, "loss": 0.6819010376930237, "step": 2498 }, { "epoch": 0.39942459841764566, "grad_norm": 1.267203650573772, "learning_rate": 1.8801084701096128e-05, "loss": 0.6188281774520874, "step": 2499 }, { "epoch": 0.39958443219052187, "grad_norm": 1.477787187430802, "learning_rate": 1.8799829731689586e-05, "loss": 0.6531288027763367, "step": 2500 }, { "epoch": 0.39974426596339807, "grad_norm": 1.2558840575511323, "learning_rate": 1.879857414773418e-05, "loss": 0.5383626818656921, "step": 2501 }, { "epoch": 0.3999040997362743, "grad_norm": 1.5602541265196705, "learning_rate": 1.8797317949317597e-05, "loss": 0.7250930070877075, "step": 2502 }, { "epoch": 0.4000639335091505, "grad_norm": 1.3363891123728038, "learning_rate": 1.8796061136527567e-05, "loss": 0.7773958444595337, "step": 2503 }, { "epoch": 0.4002237672820267, "grad_norm": 1.5161404090722266, "learning_rate": 1.879480370945186e-05, "loss": 0.7777020931243896, "step": 2504 }, { "epoch": 0.4003836010549029, "grad_norm": 2.0931703647094095, "learning_rate": 1.8793545668178287e-05, "loss": 0.599022626876831, "step": 2505 }, { "epoch": 0.4005434348277791, "grad_norm": 1.3161651177022367, "learning_rate": 1.879228701279471e-05, "loss": 0.6054858565330505, "step": 2506 }, { "epoch": 0.4007032686006553, "grad_norm": 1.1826956054481568, "learning_rate": 1.879102774338903e-05, "loss": 0.48367738723754883, "step": 2507 }, { "epoch": 0.4008631023735315, "grad_norm": 1.3506456820580912, "learning_rate": 1.8789767860049185e-05, "loss": 0.6375066041946411, "step": 2508 }, { "epoch": 0.4010229361464077, "grad_norm": 1.4905759885240082, "learning_rate": 1.878850736286316e-05, "loss": 0.6613455414772034, "step": 2509 }, { "epoch": 0.4011827699192839, "grad_norm": 1.3020646253961776, "learning_rate": 1.878724625191899e-05, "loss": 0.6601828932762146, "step": 2510 }, { "epoch": 0.4013426036921601, "grad_norm": 1.4383033944240375, "learning_rate": 1.8785984527304744e-05, "loss": 0.7754969000816345, "step": 2511 }, { "epoch": 0.4015024374650364, "grad_norm": 1.4519909294204556, "learning_rate": 1.8784722189108535e-05, "loss": 0.7394834756851196, "step": 2512 }, { "epoch": 0.4016622712379126, "grad_norm": 1.2864048151575114, "learning_rate": 1.878345923741852e-05, "loss": 0.5359139442443848, "step": 2513 }, { "epoch": 0.4018221050107888, "grad_norm": 1.356159978911461, "learning_rate": 1.87821956723229e-05, "loss": 0.6523616909980774, "step": 2514 }, { "epoch": 0.401981938783665, "grad_norm": 1.2409884288160593, "learning_rate": 1.878093149390992e-05, "loss": 0.6595865488052368, "step": 2515 }, { "epoch": 0.4021417725565412, "grad_norm": 1.6731895718417538, "learning_rate": 1.8779666702267857e-05, "loss": 0.7271475791931152, "step": 2516 }, { "epoch": 0.4023016063294174, "grad_norm": 1.1954344768300635, "learning_rate": 1.877840129748505e-05, "loss": 0.6353638172149658, "step": 2517 }, { "epoch": 0.4024614401022936, "grad_norm": 1.4138001882743734, "learning_rate": 1.8777135279649868e-05, "loss": 0.8530781269073486, "step": 2518 }, { "epoch": 0.4026212738751698, "grad_norm": 1.2584329039450248, "learning_rate": 1.877586864885072e-05, "loss": 0.6328309774398804, "step": 2519 }, { "epoch": 0.40278110764804603, "grad_norm": 1.2141457318181053, "learning_rate": 1.8774601405176073e-05, "loss": 0.6148073673248291, "step": 2520 }, { "epoch": 0.40294094142092224, "grad_norm": 1.2551462046627277, "learning_rate": 1.8773333548714413e-05, "loss": 0.6466900110244751, "step": 2521 }, { "epoch": 0.40310077519379844, "grad_norm": 1.1168099433047634, "learning_rate": 1.8772065079554294e-05, "loss": 0.6105293035507202, "step": 2522 }, { "epoch": 0.40326060896667465, "grad_norm": 2.3624534854481802, "learning_rate": 1.8770795997784296e-05, "loss": 0.6406100988388062, "step": 2523 }, { "epoch": 0.40342044273955086, "grad_norm": 1.490845366588663, "learning_rate": 1.8769526303493045e-05, "loss": 0.7287620902061462, "step": 2524 }, { "epoch": 0.40358027651242706, "grad_norm": 1.1188616257415602, "learning_rate": 1.8768255996769224e-05, "loss": 0.5413584113121033, "step": 2525 }, { "epoch": 0.40374011028530327, "grad_norm": 1.3696087907462406, "learning_rate": 1.8766985077701528e-05, "loss": 0.5382312536239624, "step": 2526 }, { "epoch": 0.40389994405817947, "grad_norm": 1.3060112444567429, "learning_rate": 1.8765713546378733e-05, "loss": 0.6882139444351196, "step": 2527 }, { "epoch": 0.4040597778310557, "grad_norm": 1.1676797482181926, "learning_rate": 1.8764441402889624e-05, "loss": 0.5961217880249023, "step": 2528 }, { "epoch": 0.4042196116039319, "grad_norm": 1.2701179194895784, "learning_rate": 1.8763168647323045e-05, "loss": 0.6321712732315063, "step": 2529 }, { "epoch": 0.40437944537680814, "grad_norm": 1.4191156924081736, "learning_rate": 1.876189527976789e-05, "loss": 0.627864420413971, "step": 2530 }, { "epoch": 0.40453927914968435, "grad_norm": 1.283226480856006, "learning_rate": 1.8760621300313077e-05, "loss": 0.6318312883377075, "step": 2531 }, { "epoch": 0.40469911292256056, "grad_norm": 1.151608783014521, "learning_rate": 1.875934670904758e-05, "loss": 0.6208364963531494, "step": 2532 }, { "epoch": 0.40485894669543676, "grad_norm": 1.5812475122451253, "learning_rate": 1.875807150606041e-05, "loss": 0.6964436173439026, "step": 2533 }, { "epoch": 0.40501878046831297, "grad_norm": 1.326464999807376, "learning_rate": 1.8756795691440624e-05, "loss": 0.7361592054367065, "step": 2534 }, { "epoch": 0.40517861424118917, "grad_norm": 1.2817290896883948, "learning_rate": 1.875551926527732e-05, "loss": 0.5912302136421204, "step": 2535 }, { "epoch": 0.4053384480140654, "grad_norm": 1.4754619621262512, "learning_rate": 1.8754242227659643e-05, "loss": 0.6169337630271912, "step": 2536 }, { "epoch": 0.4054982817869416, "grad_norm": 1.3075007961196483, "learning_rate": 1.8752964578676768e-05, "loss": 0.6893788576126099, "step": 2537 }, { "epoch": 0.4056581155598178, "grad_norm": 1.204604732497709, "learning_rate": 1.8751686318417927e-05, "loss": 0.6234867572784424, "step": 2538 }, { "epoch": 0.405817949332694, "grad_norm": 1.31716357515748, "learning_rate": 1.875040744697239e-05, "loss": 0.6303800344467163, "step": 2539 }, { "epoch": 0.4059777831055702, "grad_norm": 1.2496287999129172, "learning_rate": 1.874912796442947e-05, "loss": 0.568096399307251, "step": 2540 }, { "epoch": 0.4061376168784464, "grad_norm": 1.4034424137868862, "learning_rate": 1.8747847870878516e-05, "loss": 0.5732982158660889, "step": 2541 }, { "epoch": 0.4062974506513226, "grad_norm": 1.7260398949858777, "learning_rate": 1.874656716640893e-05, "loss": 0.6276465654373169, "step": 2542 }, { "epoch": 0.4064572844241988, "grad_norm": 1.549051997824, "learning_rate": 1.8745285851110148e-05, "loss": 0.5513802170753479, "step": 2543 }, { "epoch": 0.406617118197075, "grad_norm": 1.184821105205603, "learning_rate": 1.874400392507165e-05, "loss": 0.5467043519020081, "step": 2544 }, { "epoch": 0.40677695196995123, "grad_norm": 1.3361682515386182, "learning_rate": 1.8742721388382973e-05, "loss": 0.6476399898529053, "step": 2545 }, { "epoch": 0.40693678574282743, "grad_norm": 1.6082048175542332, "learning_rate": 1.8741438241133675e-05, "loss": 0.9062840938568115, "step": 2546 }, { "epoch": 0.4070966195157037, "grad_norm": 1.1610299120769314, "learning_rate": 1.874015448341337e-05, "loss": 0.6802487373352051, "step": 2547 }, { "epoch": 0.4072564532885799, "grad_norm": 1.1451740104823074, "learning_rate": 1.873887011531171e-05, "loss": 0.6131240129470825, "step": 2548 }, { "epoch": 0.4074162870614561, "grad_norm": 1.4404183300401066, "learning_rate": 1.873758513691839e-05, "loss": 0.7927498817443848, "step": 2549 }, { "epoch": 0.4075761208343323, "grad_norm": 1.4046412624242035, "learning_rate": 1.873629954832315e-05, "loss": 0.7408485412597656, "step": 2550 }, { "epoch": 0.4077359546072085, "grad_norm": 1.5338969688582427, "learning_rate": 1.8735013349615768e-05, "loss": 0.7097939252853394, "step": 2551 }, { "epoch": 0.4078957883800847, "grad_norm": 1.4480631352303857, "learning_rate": 1.8733726540886073e-05, "loss": 0.6516549587249756, "step": 2552 }, { "epoch": 0.40805562215296093, "grad_norm": 1.520521906713431, "learning_rate": 1.8732439122223924e-05, "loss": 0.7071236371994019, "step": 2553 }, { "epoch": 0.40821545592583713, "grad_norm": 1.4913644347551727, "learning_rate": 1.8731151093719236e-05, "loss": 0.5747165083885193, "step": 2554 }, { "epoch": 0.40837528969871334, "grad_norm": 1.1359488121197163, "learning_rate": 1.8729862455461956e-05, "loss": 0.4499322474002838, "step": 2555 }, { "epoch": 0.40853512347158955, "grad_norm": 1.3037037117983448, "learning_rate": 1.872857320754208e-05, "loss": 0.679273247718811, "step": 2556 }, { "epoch": 0.40869495724446575, "grad_norm": 1.3853083857464756, "learning_rate": 1.8727283350049647e-05, "loss": 0.6623892784118652, "step": 2557 }, { "epoch": 0.40885479101734196, "grad_norm": 1.3216371539625318, "learning_rate": 1.8725992883074733e-05, "loss": 0.6087357997894287, "step": 2558 }, { "epoch": 0.40901462479021816, "grad_norm": 1.4142919304847297, "learning_rate": 1.8724701806707453e-05, "loss": 0.7736249566078186, "step": 2559 }, { "epoch": 0.40917445856309437, "grad_norm": 1.3613334256580247, "learning_rate": 1.8723410121037984e-05, "loss": 0.6786304116249084, "step": 2560 }, { "epoch": 0.4093342923359706, "grad_norm": 1.4484712681538918, "learning_rate": 1.872211782615653e-05, "loss": 0.6326806545257568, "step": 2561 }, { "epoch": 0.4094941261088468, "grad_norm": 1.6156592786827713, "learning_rate": 1.8720824922153332e-05, "loss": 0.7305386066436768, "step": 2562 }, { "epoch": 0.409653959881723, "grad_norm": 1.5315321820010313, "learning_rate": 1.871953140911869e-05, "loss": 0.6931586265563965, "step": 2563 }, { "epoch": 0.4098137936545992, "grad_norm": 1.3339205606934355, "learning_rate": 1.8718237287142933e-05, "loss": 0.6039432287216187, "step": 2564 }, { "epoch": 0.40997362742747545, "grad_norm": 1.3164297762032438, "learning_rate": 1.8716942556316442e-05, "loss": 0.6585820913314819, "step": 2565 }, { "epoch": 0.41013346120035166, "grad_norm": 1.3144792743992384, "learning_rate": 1.8715647216729632e-05, "loss": 0.7298041582107544, "step": 2566 }, { "epoch": 0.41029329497322786, "grad_norm": 1.3392974243330273, "learning_rate": 1.871435126847297e-05, "loss": 0.5700669884681702, "step": 2567 }, { "epoch": 0.41045312874610407, "grad_norm": 1.3030019521438314, "learning_rate": 1.8713054711636956e-05, "loss": 0.5755072832107544, "step": 2568 }, { "epoch": 0.4106129625189803, "grad_norm": 1.3003959901870343, "learning_rate": 1.8711757546312143e-05, "loss": 0.659722626209259, "step": 2569 }, { "epoch": 0.4107727962918565, "grad_norm": 1.367554549483079, "learning_rate": 1.871045977258911e-05, "loss": 0.5791096091270447, "step": 2570 }, { "epoch": 0.4109326300647327, "grad_norm": 1.3328904967309922, "learning_rate": 1.87091613905585e-05, "loss": 0.6319575309753418, "step": 2571 }, { "epoch": 0.4110924638376089, "grad_norm": 1.4007597126847522, "learning_rate": 1.870786240031098e-05, "loss": 0.7091948390007019, "step": 2572 }, { "epoch": 0.4112522976104851, "grad_norm": 1.2514749268523508, "learning_rate": 1.870656280193727e-05, "loss": 0.7648112177848816, "step": 2573 }, { "epoch": 0.4114121313833613, "grad_norm": 0.9808023563034459, "learning_rate": 1.870526259552813e-05, "loss": 0.4583331346511841, "step": 2574 }, { "epoch": 0.4115719651562375, "grad_norm": 1.2143843964604724, "learning_rate": 1.870396178117436e-05, "loss": 0.6987504363059998, "step": 2575 }, { "epoch": 0.4117317989291137, "grad_norm": 1.2287675476313058, "learning_rate": 1.8702660358966803e-05, "loss": 0.543438196182251, "step": 2576 }, { "epoch": 0.4118916327019899, "grad_norm": 1.2225967136815958, "learning_rate": 1.870135832899635e-05, "loss": 0.5291761159896851, "step": 2577 }, { "epoch": 0.4120514664748661, "grad_norm": 1.2216457621910095, "learning_rate": 1.8700055691353927e-05, "loss": 0.4831300377845764, "step": 2578 }, { "epoch": 0.41221130024774233, "grad_norm": 1.3615349403284, "learning_rate": 1.869875244613051e-05, "loss": 0.5436007976531982, "step": 2579 }, { "epoch": 0.41237113402061853, "grad_norm": 1.2463354116869925, "learning_rate": 1.8697448593417103e-05, "loss": 0.5898248553276062, "step": 2580 }, { "epoch": 0.41253096779349474, "grad_norm": 1.373749634426656, "learning_rate": 1.8696144133304774e-05, "loss": 0.7410004138946533, "step": 2581 }, { "epoch": 0.412690801566371, "grad_norm": 1.4397029734893974, "learning_rate": 1.869483906588461e-05, "loss": 0.9046136736869812, "step": 2582 }, { "epoch": 0.4128506353392472, "grad_norm": 1.230916543258048, "learning_rate": 1.8693533391247764e-05, "loss": 0.6853865385055542, "step": 2583 }, { "epoch": 0.4130104691121234, "grad_norm": 1.497337215516754, "learning_rate": 1.8692227109485416e-05, "loss": 0.7473920583724976, "step": 2584 }, { "epoch": 0.4131703028849996, "grad_norm": 1.455038727555084, "learning_rate": 1.8690920220688785e-05, "loss": 0.6136566996574402, "step": 2585 }, { "epoch": 0.4133301366578758, "grad_norm": 1.10259318407957, "learning_rate": 1.868961272494915e-05, "loss": 0.5488603115081787, "step": 2586 }, { "epoch": 0.41348997043075203, "grad_norm": 1.5860225603571025, "learning_rate": 1.8688304622357817e-05, "loss": 0.788642168045044, "step": 2587 }, { "epoch": 0.41364980420362824, "grad_norm": 1.8449251577279238, "learning_rate": 1.8686995913006137e-05, "loss": 0.7016304731369019, "step": 2588 }, { "epoch": 0.41380963797650444, "grad_norm": 1.5409891078370872, "learning_rate": 1.868568659698551e-05, "loss": 0.7064251899719238, "step": 2589 }, { "epoch": 0.41396947174938065, "grad_norm": 1.2274749145727122, "learning_rate": 1.868437667438737e-05, "loss": 0.6810289621353149, "step": 2590 }, { "epoch": 0.41412930552225685, "grad_norm": 1.2173102571761036, "learning_rate": 1.86830661453032e-05, "loss": 0.7732363939285278, "step": 2591 }, { "epoch": 0.41428913929513306, "grad_norm": 2.0661249800303434, "learning_rate": 1.868175500982452e-05, "loss": 0.7011655569076538, "step": 2592 }, { "epoch": 0.41444897306800926, "grad_norm": 1.4044030104602678, "learning_rate": 1.8680443268042898e-05, "loss": 0.6386412382125854, "step": 2593 }, { "epoch": 0.41460880684088547, "grad_norm": 1.3602284270216387, "learning_rate": 1.8679130920049943e-05, "loss": 0.6522265076637268, "step": 2594 }, { "epoch": 0.4147686406137617, "grad_norm": 1.2080037139675515, "learning_rate": 1.8677817965937297e-05, "loss": 0.5329179763793945, "step": 2595 }, { "epoch": 0.4149284743866379, "grad_norm": 1.53484158502707, "learning_rate": 1.867650440579666e-05, "loss": 0.6144271492958069, "step": 2596 }, { "epoch": 0.4150883081595141, "grad_norm": 1.5546025613373666, "learning_rate": 1.8675190239719766e-05, "loss": 0.41412514448165894, "step": 2597 }, { "epoch": 0.4152481419323903, "grad_norm": 1.235431435595209, "learning_rate": 1.8673875467798383e-05, "loss": 0.60771644115448, "step": 2598 }, { "epoch": 0.4154079757052665, "grad_norm": 2.2735947026080603, "learning_rate": 1.867256009012434e-05, "loss": 0.7305999994277954, "step": 2599 }, { "epoch": 0.41556780947814276, "grad_norm": 1.235756798980995, "learning_rate": 1.8671244106789498e-05, "loss": 0.474692165851593, "step": 2600 }, { "epoch": 0.41572764325101896, "grad_norm": 1.3227974470603094, "learning_rate": 1.8669927517885752e-05, "loss": 0.5973420739173889, "step": 2601 }, { "epoch": 0.41588747702389517, "grad_norm": 1.3166239420227732, "learning_rate": 1.8668610323505054e-05, "loss": 0.6436511278152466, "step": 2602 }, { "epoch": 0.4160473107967714, "grad_norm": 1.4740804698540886, "learning_rate": 1.8667292523739392e-05, "loss": 0.820503294467926, "step": 2603 }, { "epoch": 0.4162071445696476, "grad_norm": 1.3044028601118325, "learning_rate": 1.8665974118680794e-05, "loss": 0.5831207036972046, "step": 2604 }, { "epoch": 0.4163669783425238, "grad_norm": 1.4269740953980679, "learning_rate": 1.8664655108421333e-05, "loss": 0.6760775446891785, "step": 2605 }, { "epoch": 0.4165268121154, "grad_norm": 1.3371370542470644, "learning_rate": 1.866333549305313e-05, "loss": 0.7132297158241272, "step": 2606 }, { "epoch": 0.4166866458882762, "grad_norm": 1.2912101196651327, "learning_rate": 1.866201527266834e-05, "loss": 0.6463411450386047, "step": 2607 }, { "epoch": 0.4168464796611524, "grad_norm": 1.6490625556999685, "learning_rate": 1.866069444735915e-05, "loss": 0.6352095603942871, "step": 2608 }, { "epoch": 0.4170063134340286, "grad_norm": 1.13954861548915, "learning_rate": 1.8659373017217817e-05, "loss": 0.671646773815155, "step": 2609 }, { "epoch": 0.4171661472069048, "grad_norm": 1.2463650949325271, "learning_rate": 1.8658050982336618e-05, "loss": 0.5267366766929626, "step": 2610 }, { "epoch": 0.417325980979781, "grad_norm": 1.3813132229130265, "learning_rate": 1.8656728342807885e-05, "loss": 0.6257288455963135, "step": 2611 }, { "epoch": 0.4174858147526572, "grad_norm": 1.3287960717665106, "learning_rate": 1.865540509872398e-05, "loss": 0.7207344770431519, "step": 2612 }, { "epoch": 0.41764564852553343, "grad_norm": 1.5058585881843058, "learning_rate": 1.865408125017732e-05, "loss": 0.7606980800628662, "step": 2613 }, { "epoch": 0.41780548229840964, "grad_norm": 1.468749946326349, "learning_rate": 1.8652756797260348e-05, "loss": 0.7014983296394348, "step": 2614 }, { "epoch": 0.41796531607128584, "grad_norm": 1.299584196100411, "learning_rate": 1.8651431740065565e-05, "loss": 0.6995244026184082, "step": 2615 }, { "epoch": 0.41812514984416205, "grad_norm": 1.311847264592749, "learning_rate": 1.8650106078685515e-05, "loss": 0.5342460870742798, "step": 2616 }, { "epoch": 0.41828498361703825, "grad_norm": 1.3763969233048428, "learning_rate": 1.8648779813212767e-05, "loss": 0.6033475399017334, "step": 2617 }, { "epoch": 0.4184448173899145, "grad_norm": 1.5482177994719593, "learning_rate": 1.8647452943739945e-05, "loss": 0.7306591272354126, "step": 2618 }, { "epoch": 0.4186046511627907, "grad_norm": 1.4400126205230777, "learning_rate": 1.8646125470359716e-05, "loss": 0.7683197259902954, "step": 2619 }, { "epoch": 0.4187644849356669, "grad_norm": 1.5194477669379525, "learning_rate": 1.8644797393164786e-05, "loss": 0.7666222453117371, "step": 2620 }, { "epoch": 0.41892431870854313, "grad_norm": 1.4212832038411705, "learning_rate": 1.8643468712247902e-05, "loss": 0.6763629913330078, "step": 2621 }, { "epoch": 0.41908415248141934, "grad_norm": 1.5457671164941005, "learning_rate": 1.8642139427701852e-05, "loss": 0.744233250617981, "step": 2622 }, { "epoch": 0.41924398625429554, "grad_norm": 1.4284614769419837, "learning_rate": 1.864080953961947e-05, "loss": 0.6751522421836853, "step": 2623 }, { "epoch": 0.41940382002717175, "grad_norm": 1.4657566693867325, "learning_rate": 1.8639479048093632e-05, "loss": 0.8166995644569397, "step": 2624 }, { "epoch": 0.41956365380004795, "grad_norm": 1.7639987851780077, "learning_rate": 1.863814795321725e-05, "loss": 0.6867600083351135, "step": 2625 }, { "epoch": 0.41972348757292416, "grad_norm": 1.3880764468119742, "learning_rate": 1.8636816255083296e-05, "loss": 0.7666686773300171, "step": 2626 }, { "epoch": 0.41988332134580036, "grad_norm": 1.499818493306542, "learning_rate": 1.8635483953784755e-05, "loss": 0.635777473449707, "step": 2627 }, { "epoch": 0.42004315511867657, "grad_norm": 1.0568925473316226, "learning_rate": 1.863415104941468e-05, "loss": 0.6570395231246948, "step": 2628 }, { "epoch": 0.4202029888915528, "grad_norm": 1.284136862469683, "learning_rate": 1.863281754206615e-05, "loss": 0.6087621450424194, "step": 2629 }, { "epoch": 0.420362822664429, "grad_norm": 1.3008045214302233, "learning_rate": 1.8631483431832297e-05, "loss": 0.8099372386932373, "step": 2630 }, { "epoch": 0.4205226564373052, "grad_norm": 1.4441462637385905, "learning_rate": 1.8630148718806292e-05, "loss": 0.8868034482002258, "step": 2631 }, { "epoch": 0.4206824902101814, "grad_norm": 1.0989256522124609, "learning_rate": 1.8628813403081342e-05, "loss": 0.6445072889328003, "step": 2632 }, { "epoch": 0.4208423239830576, "grad_norm": 1.2405312356753488, "learning_rate": 1.8627477484750706e-05, "loss": 0.6612555980682373, "step": 2633 }, { "epoch": 0.4210021577559338, "grad_norm": 1.2526428441706496, "learning_rate": 1.8626140963907672e-05, "loss": 0.5852108597755432, "step": 2634 }, { "epoch": 0.42116199152881006, "grad_norm": 1.667699163622191, "learning_rate": 1.8624803840645587e-05, "loss": 0.7309861183166504, "step": 2635 }, { "epoch": 0.42132182530168627, "grad_norm": 1.3965904392257462, "learning_rate": 1.862346611505782e-05, "loss": 0.6741830706596375, "step": 2636 }, { "epoch": 0.4214816590745625, "grad_norm": 3.4839212281382546, "learning_rate": 1.8622127787237808e-05, "loss": 0.570526123046875, "step": 2637 }, { "epoch": 0.4216414928474387, "grad_norm": 1.3943950465013317, "learning_rate": 1.8620788857279e-05, "loss": 0.6865749955177307, "step": 2638 }, { "epoch": 0.4218013266203149, "grad_norm": 1.4756382788464721, "learning_rate": 1.861944932527491e-05, "loss": 0.5892429351806641, "step": 2639 }, { "epoch": 0.4219611603931911, "grad_norm": 1.3780970547279674, "learning_rate": 1.8618109191319086e-05, "loss": 0.5450118780136108, "step": 2640 }, { "epoch": 0.4221209941660673, "grad_norm": 1.290954745735008, "learning_rate": 1.8616768455505117e-05, "loss": 0.5516404509544373, "step": 2641 }, { "epoch": 0.4222808279389435, "grad_norm": 1.2339241935683858, "learning_rate": 1.8615427117926633e-05, "loss": 0.5819623470306396, "step": 2642 }, { "epoch": 0.4224406617118197, "grad_norm": 1.5245793139582604, "learning_rate": 1.8614085178677312e-05, "loss": 0.7443658113479614, "step": 2643 }, { "epoch": 0.4226004954846959, "grad_norm": 1.2537420204263738, "learning_rate": 1.8612742637850867e-05, "loss": 0.6300304532051086, "step": 2644 }, { "epoch": 0.4227603292575721, "grad_norm": 1.3948116800910608, "learning_rate": 1.8611399495541057e-05, "loss": 0.5512635111808777, "step": 2645 }, { "epoch": 0.4229201630304483, "grad_norm": 1.1834576396581178, "learning_rate": 1.8610055751841687e-05, "loss": 0.6206268072128296, "step": 2646 }, { "epoch": 0.42307999680332453, "grad_norm": 1.2409524918761912, "learning_rate": 1.8608711406846595e-05, "loss": 0.5366986393928528, "step": 2647 }, { "epoch": 0.42323983057620074, "grad_norm": 1.3297950190734418, "learning_rate": 1.8607366460649664e-05, "loss": 0.5894198417663574, "step": 2648 }, { "epoch": 0.42339966434907694, "grad_norm": 1.4511395173073873, "learning_rate": 1.860602091334482e-05, "loss": 0.770255982875824, "step": 2649 }, { "epoch": 0.42355949812195315, "grad_norm": 1.713811614024875, "learning_rate": 1.860467476502604e-05, "loss": 0.8404580950737, "step": 2650 }, { "epoch": 0.42371933189482935, "grad_norm": 1.3542744995614207, "learning_rate": 1.860332801578732e-05, "loss": 0.7051761150360107, "step": 2651 }, { "epoch": 0.42387916566770556, "grad_norm": 1.5979516283193123, "learning_rate": 1.8601980665722724e-05, "loss": 0.6905285120010376, "step": 2652 }, { "epoch": 0.4240389994405818, "grad_norm": 1.579876997729684, "learning_rate": 1.860063271492634e-05, "loss": 0.6362490653991699, "step": 2653 }, { "epoch": 0.424198833213458, "grad_norm": 1.3253451095168745, "learning_rate": 1.8599284163492307e-05, "loss": 0.7156815528869629, "step": 2654 }, { "epoch": 0.42435866698633423, "grad_norm": 1.5815048544027561, "learning_rate": 1.8597935011514802e-05, "loss": 0.7116405963897705, "step": 2655 }, { "epoch": 0.42451850075921044, "grad_norm": 1.5595149182630235, "learning_rate": 1.8596585259088046e-05, "loss": 0.8220904469490051, "step": 2656 }, { "epoch": 0.42467833453208664, "grad_norm": 1.206337043115669, "learning_rate": 1.8595234906306298e-05, "loss": 0.5348503589630127, "step": 2657 }, { "epoch": 0.42483816830496285, "grad_norm": 1.475251249288414, "learning_rate": 1.8593883953263867e-05, "loss": 0.6601158380508423, "step": 2658 }, { "epoch": 0.42499800207783905, "grad_norm": 1.5137647286072655, "learning_rate": 1.8592532400055094e-05, "loss": 0.6405354738235474, "step": 2659 }, { "epoch": 0.42515783585071526, "grad_norm": 1.2731925372053723, "learning_rate": 1.8591180246774367e-05, "loss": 0.5983975529670715, "step": 2660 }, { "epoch": 0.42531766962359147, "grad_norm": 1.444334552729937, "learning_rate": 1.8589827493516117e-05, "loss": 0.6375229358673096, "step": 2661 }, { "epoch": 0.42547750339646767, "grad_norm": 1.2650651663814558, "learning_rate": 1.8588474140374815e-05, "loss": 0.7199240326881409, "step": 2662 }, { "epoch": 0.4256373371693439, "grad_norm": 1.3481772125408054, "learning_rate": 1.8587120187444974e-05, "loss": 0.5506976842880249, "step": 2663 }, { "epoch": 0.4257971709422201, "grad_norm": 1.5088262560541765, "learning_rate": 1.858576563482115e-05, "loss": 0.6145796775817871, "step": 2664 }, { "epoch": 0.4259570047150963, "grad_norm": 1.253996121685459, "learning_rate": 1.8584410482597942e-05, "loss": 0.7721471786499023, "step": 2665 }, { "epoch": 0.4261168384879725, "grad_norm": 1.2081620375536384, "learning_rate": 1.8583054730869987e-05, "loss": 0.782804012298584, "step": 2666 }, { "epoch": 0.4262766722608487, "grad_norm": 1.2987341957974061, "learning_rate": 1.8581698379731965e-05, "loss": 0.8365254402160645, "step": 2667 }, { "epoch": 0.4264365060337249, "grad_norm": 1.2717105374401225, "learning_rate": 1.85803414292786e-05, "loss": 0.7107741832733154, "step": 2668 }, { "epoch": 0.4265963398066011, "grad_norm": 1.5682693582327958, "learning_rate": 1.8578983879604654e-05, "loss": 0.7148358821868896, "step": 2669 }, { "epoch": 0.4267561735794773, "grad_norm": 1.2972368415541693, "learning_rate": 1.857762573080494e-05, "loss": 0.5813679695129395, "step": 2670 }, { "epoch": 0.4269160073523536, "grad_norm": 1.341901173518789, "learning_rate": 1.85762669829743e-05, "loss": 0.6123851537704468, "step": 2671 }, { "epoch": 0.4270758411252298, "grad_norm": 1.449411589613324, "learning_rate": 1.8574907636207626e-05, "loss": 0.6664736270904541, "step": 2672 }, { "epoch": 0.427235674898106, "grad_norm": 1.2121881058344715, "learning_rate": 1.8573547690599846e-05, "loss": 0.5455510020256042, "step": 2673 }, { "epoch": 0.4273955086709822, "grad_norm": 1.4161861813085037, "learning_rate": 1.8572187146245943e-05, "loss": 0.7377834320068359, "step": 2674 }, { "epoch": 0.4275553424438584, "grad_norm": 1.1809105209337496, "learning_rate": 1.857082600324093e-05, "loss": 0.7194732427597046, "step": 2675 }, { "epoch": 0.4277151762167346, "grad_norm": 1.0771469396995075, "learning_rate": 1.8569464261679855e-05, "loss": 0.4365018606185913, "step": 2676 }, { "epoch": 0.4278750099896108, "grad_norm": 1.443785320583416, "learning_rate": 1.856810192165783e-05, "loss": 0.6594066619873047, "step": 2677 }, { "epoch": 0.428034843762487, "grad_norm": 1.2769924280878286, "learning_rate": 1.8566738983269984e-05, "loss": 0.7938088774681091, "step": 2678 }, { "epoch": 0.4281946775353632, "grad_norm": 1.0980022764815969, "learning_rate": 1.856537544661151e-05, "loss": 0.5815978050231934, "step": 2679 }, { "epoch": 0.4283545113082394, "grad_norm": 1.1870636129057943, "learning_rate": 1.856401131177763e-05, "loss": 0.5222591757774353, "step": 2680 }, { "epoch": 0.42851434508111563, "grad_norm": 1.2549551824879779, "learning_rate": 1.8562646578863607e-05, "loss": 0.764096200466156, "step": 2681 }, { "epoch": 0.42867417885399184, "grad_norm": 1.3389716616090255, "learning_rate": 1.8561281247964748e-05, "loss": 0.5149767994880676, "step": 2682 }, { "epoch": 0.42883401262686804, "grad_norm": 1.0857307837281196, "learning_rate": 1.8559915319176407e-05, "loss": 0.5169332027435303, "step": 2683 }, { "epoch": 0.42899384639974425, "grad_norm": 1.4568416850348997, "learning_rate": 1.8558548792593977e-05, "loss": 0.7676863074302673, "step": 2684 }, { "epoch": 0.42915368017262046, "grad_norm": 1.1632853731645163, "learning_rate": 1.8557181668312885e-05, "loss": 0.5853965282440186, "step": 2685 }, { "epoch": 0.42931351394549666, "grad_norm": 1.3651365984197148, "learning_rate": 1.8555813946428613e-05, "loss": 0.7672019004821777, "step": 2686 }, { "epoch": 0.42947334771837287, "grad_norm": 1.2001638306481697, "learning_rate": 1.855444562703667e-05, "loss": 0.49810171127319336, "step": 2687 }, { "epoch": 0.4296331814912491, "grad_norm": 1.4917414664398527, "learning_rate": 1.8553076710232625e-05, "loss": 0.677632212638855, "step": 2688 }, { "epoch": 0.42979301526412533, "grad_norm": 1.2697119687346066, "learning_rate": 1.855170719611207e-05, "loss": 0.7509523630142212, "step": 2689 }, { "epoch": 0.42995284903700154, "grad_norm": 1.3008205275433629, "learning_rate": 1.855033708477065e-05, "loss": 0.5905642509460449, "step": 2690 }, { "epoch": 0.43011268280987774, "grad_norm": 1.2788282807167841, "learning_rate": 1.8548966376304048e-05, "loss": 0.7288973331451416, "step": 2691 }, { "epoch": 0.43027251658275395, "grad_norm": 1.312251059569966, "learning_rate": 1.854759507080799e-05, "loss": 0.6654991507530212, "step": 2692 }, { "epoch": 0.43043235035563016, "grad_norm": 1.3403658685146094, "learning_rate": 1.854622316837824e-05, "loss": 0.556742787361145, "step": 2693 }, { "epoch": 0.43059218412850636, "grad_norm": 1.1688886871253978, "learning_rate": 1.854485066911061e-05, "loss": 0.5167092084884644, "step": 2694 }, { "epoch": 0.43075201790138257, "grad_norm": 1.304896305439634, "learning_rate": 1.854347757310095e-05, "loss": 0.6970034837722778, "step": 2695 }, { "epoch": 0.4309118516742588, "grad_norm": 1.4750597283587594, "learning_rate": 1.8542103880445156e-05, "loss": 0.7243872284889221, "step": 2696 }, { "epoch": 0.431071685447135, "grad_norm": 1.4301319280769824, "learning_rate": 1.8540729591239154e-05, "loss": 0.6656793355941772, "step": 2697 }, { "epoch": 0.4312315192200112, "grad_norm": 1.2619309197476563, "learning_rate": 1.8539354705578924e-05, "loss": 0.5107887983322144, "step": 2698 }, { "epoch": 0.4313913529928874, "grad_norm": 1.239374643512034, "learning_rate": 1.8537979223560482e-05, "loss": 0.6609721183776855, "step": 2699 }, { "epoch": 0.4315511867657636, "grad_norm": 1.238977436001884, "learning_rate": 1.8536603145279892e-05, "loss": 0.6854214668273926, "step": 2700 }, { "epoch": 0.4317110205386398, "grad_norm": 1.5732915232098463, "learning_rate": 1.8535226470833245e-05, "loss": 0.6976163983345032, "step": 2701 }, { "epoch": 0.431870854311516, "grad_norm": 1.2599761018164088, "learning_rate": 1.8533849200316688e-05, "loss": 0.6729791164398193, "step": 2702 }, { "epoch": 0.4320306880843922, "grad_norm": 1.3592300405358644, "learning_rate": 1.8532471333826408e-05, "loss": 0.6665728092193604, "step": 2703 }, { "epoch": 0.4321905218572684, "grad_norm": 1.1488625868479092, "learning_rate": 1.8531092871458624e-05, "loss": 0.5156057476997375, "step": 2704 }, { "epoch": 0.4323503556301446, "grad_norm": 1.1360091531809862, "learning_rate": 1.8529713813309603e-05, "loss": 0.510320782661438, "step": 2705 }, { "epoch": 0.4325101894030209, "grad_norm": 1.5686364857435715, "learning_rate": 1.8528334159475663e-05, "loss": 0.621070384979248, "step": 2706 }, { "epoch": 0.4326700231758971, "grad_norm": 1.2822654209881084, "learning_rate": 1.8526953910053143e-05, "loss": 0.629065215587616, "step": 2707 }, { "epoch": 0.4328298569487733, "grad_norm": 1.2694153386186622, "learning_rate": 1.8525573065138437e-05, "loss": 0.5646466016769409, "step": 2708 }, { "epoch": 0.4329896907216495, "grad_norm": 1.4681381847717354, "learning_rate": 1.8524191624827985e-05, "loss": 0.7112997770309448, "step": 2709 }, { "epoch": 0.4331495244945257, "grad_norm": 1.2616834156851249, "learning_rate": 1.8522809589218256e-05, "loss": 0.6119600534439087, "step": 2710 }, { "epoch": 0.4333093582674019, "grad_norm": 1.4180290905455317, "learning_rate": 1.8521426958405765e-05, "loss": 0.6050465106964111, "step": 2711 }, { "epoch": 0.4334691920402781, "grad_norm": 1.5232304281846774, "learning_rate": 1.8520043732487076e-05, "loss": 0.6822681427001953, "step": 2712 }, { "epoch": 0.4336290258131543, "grad_norm": 1.4793929180291836, "learning_rate": 1.8518659911558786e-05, "loss": 0.5466115474700928, "step": 2713 }, { "epoch": 0.43378885958603053, "grad_norm": 1.2564160357465335, "learning_rate": 1.8517275495717533e-05, "loss": 0.5680779218673706, "step": 2714 }, { "epoch": 0.43394869335890673, "grad_norm": 1.086973369900906, "learning_rate": 1.8515890485060003e-05, "loss": 0.6113004088401794, "step": 2715 }, { "epoch": 0.43410852713178294, "grad_norm": 1.5709763990037033, "learning_rate": 1.851450487968292e-05, "loss": 0.6402972340583801, "step": 2716 }, { "epoch": 0.43426836090465915, "grad_norm": 1.1606308415494244, "learning_rate": 1.851311867968305e-05, "loss": 0.5250452756881714, "step": 2717 }, { "epoch": 0.43442819467753535, "grad_norm": 1.7093127167761593, "learning_rate": 1.8511731885157195e-05, "loss": 0.7079339623451233, "step": 2718 }, { "epoch": 0.43458802845041156, "grad_norm": 1.3897288318919594, "learning_rate": 1.851034449620221e-05, "loss": 0.5923683643341064, "step": 2719 }, { "epoch": 0.43474786222328776, "grad_norm": 1.3912416250781754, "learning_rate": 1.8508956512914985e-05, "loss": 0.5753535032272339, "step": 2720 }, { "epoch": 0.43490769599616397, "grad_norm": 1.3660216202227329, "learning_rate": 1.850756793539245e-05, "loss": 0.7995709180831909, "step": 2721 }, { "epoch": 0.4350675297690402, "grad_norm": 1.555492143052812, "learning_rate": 1.850617876373158e-05, "loss": 0.734774112701416, "step": 2722 }, { "epoch": 0.43522736354191643, "grad_norm": 1.4187416844110405, "learning_rate": 1.8504788998029387e-05, "loss": 0.6282511353492737, "step": 2723 }, { "epoch": 0.43538719731479264, "grad_norm": 1.2991001049047641, "learning_rate": 1.850339863838293e-05, "loss": 0.7101029753684998, "step": 2724 }, { "epoch": 0.43554703108766885, "grad_norm": 1.2349607435094057, "learning_rate": 1.8502007684889305e-05, "loss": 0.4902058243751526, "step": 2725 }, { "epoch": 0.43570686486054505, "grad_norm": 1.3911281295034563, "learning_rate": 1.850061613764565e-05, "loss": 0.6887025833129883, "step": 2726 }, { "epoch": 0.43586669863342126, "grad_norm": 1.3720799381503344, "learning_rate": 1.8499223996749148e-05, "loss": 0.6519949436187744, "step": 2727 }, { "epoch": 0.43602653240629746, "grad_norm": 1.3947325252705876, "learning_rate": 1.8497831262297023e-05, "loss": 0.6462594270706177, "step": 2728 }, { "epoch": 0.43618636617917367, "grad_norm": 1.6206424269657662, "learning_rate": 1.8496437934386536e-05, "loss": 0.6737157106399536, "step": 2729 }, { "epoch": 0.4363461999520499, "grad_norm": 1.4375431206142988, "learning_rate": 1.8495044013114993e-05, "loss": 0.7685256004333496, "step": 2730 }, { "epoch": 0.4365060337249261, "grad_norm": 1.606093076796401, "learning_rate": 1.8493649498579736e-05, "loss": 0.6408920288085938, "step": 2731 }, { "epoch": 0.4366658674978023, "grad_norm": 1.143481435204188, "learning_rate": 1.8492254390878163e-05, "loss": 0.6485114097595215, "step": 2732 }, { "epoch": 0.4368257012706785, "grad_norm": 1.4154515424944105, "learning_rate": 1.8490858690107694e-05, "loss": 0.7204115390777588, "step": 2733 }, { "epoch": 0.4369855350435547, "grad_norm": 1.3431033907836756, "learning_rate": 1.84894623963658e-05, "loss": 0.5298323631286621, "step": 2734 }, { "epoch": 0.4371453688164309, "grad_norm": 1.23511607836628, "learning_rate": 1.848806550975e-05, "loss": 0.5476313829421997, "step": 2735 }, { "epoch": 0.4373052025893071, "grad_norm": 1.4186356373608031, "learning_rate": 1.8486668030357843e-05, "loss": 0.7225269079208374, "step": 2736 }, { "epoch": 0.4374650363621833, "grad_norm": 1.3623238340589274, "learning_rate": 1.8485269958286926e-05, "loss": 0.6731442809104919, "step": 2737 }, { "epoch": 0.4376248701350595, "grad_norm": 1.297298993792628, "learning_rate": 1.8483871293634883e-05, "loss": 0.7559491991996765, "step": 2738 }, { "epoch": 0.4377847039079357, "grad_norm": 1.338477808610587, "learning_rate": 1.8482472036499393e-05, "loss": 0.7615846395492554, "step": 2739 }, { "epoch": 0.43794453768081193, "grad_norm": 1.3574947434927291, "learning_rate": 1.8481072186978174e-05, "loss": 0.7183574438095093, "step": 2740 }, { "epoch": 0.4381043714536882, "grad_norm": 1.250905408897344, "learning_rate": 1.847967174516899e-05, "loss": 0.7152528762817383, "step": 2741 }, { "epoch": 0.4382642052265644, "grad_norm": 1.3029883485250713, "learning_rate": 1.8478270711169642e-05, "loss": 0.6538732051849365, "step": 2742 }, { "epoch": 0.4384240389994406, "grad_norm": 1.3560020836776105, "learning_rate": 1.8476869085077966e-05, "loss": 0.6755460500717163, "step": 2743 }, { "epoch": 0.4385838727723168, "grad_norm": 1.3990351390309235, "learning_rate": 1.847546686699186e-05, "loss": 0.54799884557724, "step": 2744 }, { "epoch": 0.438743706545193, "grad_norm": 1.528000081674506, "learning_rate": 1.8474064057009235e-05, "loss": 0.6817601919174194, "step": 2745 }, { "epoch": 0.4389035403180692, "grad_norm": 1.3561257696508033, "learning_rate": 1.847266065522807e-05, "loss": 0.7231380343437195, "step": 2746 }, { "epoch": 0.4390633740909454, "grad_norm": 1.4966368854555432, "learning_rate": 1.8471256661746367e-05, "loss": 0.6772811412811279, "step": 2747 }, { "epoch": 0.43922320786382163, "grad_norm": 1.445110882319774, "learning_rate": 1.846985207666218e-05, "loss": 0.640465497970581, "step": 2748 }, { "epoch": 0.43938304163669784, "grad_norm": 1.3435776433558957, "learning_rate": 1.8468446900073598e-05, "loss": 0.688041627407074, "step": 2749 }, { "epoch": 0.43954287540957404, "grad_norm": 1.2331825181751292, "learning_rate": 1.8467041132078752e-05, "loss": 0.6846007108688354, "step": 2750 }, { "epoch": 0.43970270918245025, "grad_norm": 1.4047480907706122, "learning_rate": 1.8465634772775823e-05, "loss": 0.6596185564994812, "step": 2751 }, { "epoch": 0.43986254295532645, "grad_norm": 1.3394445471315874, "learning_rate": 1.8464227822263016e-05, "loss": 0.6513063311576843, "step": 2752 }, { "epoch": 0.44002237672820266, "grad_norm": 1.4335653901332475, "learning_rate": 1.8462820280638594e-05, "loss": 0.7028379440307617, "step": 2753 }, { "epoch": 0.44018221050107886, "grad_norm": 1.3952278526515274, "learning_rate": 1.8461412148000853e-05, "loss": 0.617393970489502, "step": 2754 }, { "epoch": 0.44034204427395507, "grad_norm": 1.3866206433107913, "learning_rate": 1.846000342444813e-05, "loss": 0.6756799817085266, "step": 2755 }, { "epoch": 0.4405018780468313, "grad_norm": 1.2740139824284626, "learning_rate": 1.845859411007881e-05, "loss": 0.7021000385284424, "step": 2756 }, { "epoch": 0.4406617118197075, "grad_norm": 1.3686711148001451, "learning_rate": 1.845718420499131e-05, "loss": 0.7424697279930115, "step": 2757 }, { "epoch": 0.4408215455925837, "grad_norm": 1.6664706721400167, "learning_rate": 1.8455773709284095e-05, "loss": 0.7609999179840088, "step": 2758 }, { "epoch": 0.44098137936545995, "grad_norm": 1.2951066780284195, "learning_rate": 1.8454362623055667e-05, "loss": 0.6163060665130615, "step": 2759 }, { "epoch": 0.44114121313833615, "grad_norm": 1.2309896015047361, "learning_rate": 1.8452950946404577e-05, "loss": 0.5609992742538452, "step": 2760 }, { "epoch": 0.44130104691121236, "grad_norm": 1.3229133115285407, "learning_rate": 1.8451538679429405e-05, "loss": 0.7605381011962891, "step": 2761 }, { "epoch": 0.44146088068408856, "grad_norm": 1.2370134549276368, "learning_rate": 1.845012582222878e-05, "loss": 0.5433685779571533, "step": 2762 }, { "epoch": 0.44162071445696477, "grad_norm": 1.3314273764903077, "learning_rate": 1.844871237490137e-05, "loss": 0.6451576948165894, "step": 2763 }, { "epoch": 0.441780548229841, "grad_norm": 1.357261110247926, "learning_rate": 1.8447298337545892e-05, "loss": 0.6793545484542847, "step": 2764 }, { "epoch": 0.4419403820027172, "grad_norm": 1.4240919582891356, "learning_rate": 1.844588371026109e-05, "loss": 0.6260217428207397, "step": 2765 }, { "epoch": 0.4421002157755934, "grad_norm": 1.227586710360139, "learning_rate": 1.844446849314576e-05, "loss": 0.6825739145278931, "step": 2766 }, { "epoch": 0.4422600495484696, "grad_norm": 1.4343606983861044, "learning_rate": 1.8443052686298733e-05, "loss": 0.59722900390625, "step": 2767 }, { "epoch": 0.4424198833213458, "grad_norm": 1.4007125552665356, "learning_rate": 1.8441636289818887e-05, "loss": 0.714505672454834, "step": 2768 }, { "epoch": 0.442579717094222, "grad_norm": 1.3846441569325294, "learning_rate": 1.8440219303805136e-05, "loss": 0.6841034293174744, "step": 2769 }, { "epoch": 0.4427395508670982, "grad_norm": 1.61531210422101, "learning_rate": 1.8438801728356438e-05, "loss": 0.7382904291152954, "step": 2770 }, { "epoch": 0.4428993846399744, "grad_norm": 1.2005903068331252, "learning_rate": 1.843738356357179e-05, "loss": 0.6454112529754639, "step": 2771 }, { "epoch": 0.4430592184128506, "grad_norm": 1.333143496162026, "learning_rate": 1.8435964809550238e-05, "loss": 0.7349854707717896, "step": 2772 }, { "epoch": 0.4432190521857268, "grad_norm": 1.4204691656877722, "learning_rate": 1.8434545466390853e-05, "loss": 0.6155507564544678, "step": 2773 }, { "epoch": 0.44337888595860303, "grad_norm": 1.3882650006189199, "learning_rate": 1.8433125534192763e-05, "loss": 0.8168923258781433, "step": 2774 }, { "epoch": 0.44353871973147924, "grad_norm": 1.295614493422566, "learning_rate": 1.843170501305513e-05, "loss": 0.5273088216781616, "step": 2775 }, { "epoch": 0.4436985535043555, "grad_norm": 1.2304306299817223, "learning_rate": 1.843028390307716e-05, "loss": 0.5317558646202087, "step": 2776 }, { "epoch": 0.4438583872772317, "grad_norm": 1.4480725778114596, "learning_rate": 1.842886220435809e-05, "loss": 0.7330682277679443, "step": 2777 }, { "epoch": 0.4440182210501079, "grad_norm": 1.5852559600762188, "learning_rate": 1.8427439916997217e-05, "loss": 0.6935468316078186, "step": 2778 }, { "epoch": 0.4441780548229841, "grad_norm": 1.4134605864147578, "learning_rate": 1.8426017041093864e-05, "loss": 0.5892280340194702, "step": 2779 }, { "epoch": 0.4443378885958603, "grad_norm": 1.3682301714132115, "learning_rate": 1.84245935767474e-05, "loss": 0.6675612330436707, "step": 2780 }, { "epoch": 0.4444977223687365, "grad_norm": 1.2381211801383984, "learning_rate": 1.8423169524057233e-05, "loss": 0.6161730289459229, "step": 2781 }, { "epoch": 0.44465755614161273, "grad_norm": 1.3242082261046366, "learning_rate": 1.8421744883122813e-05, "loss": 0.6925511360168457, "step": 2782 }, { "epoch": 0.44481738991448894, "grad_norm": 1.4052887108130534, "learning_rate": 1.8420319654043638e-05, "loss": 0.7333444952964783, "step": 2783 }, { "epoch": 0.44497722368736514, "grad_norm": 1.6054104177114528, "learning_rate": 1.8418893836919233e-05, "loss": 0.7072803974151611, "step": 2784 }, { "epoch": 0.44513705746024135, "grad_norm": 1.676167257329856, "learning_rate": 1.8417467431849178e-05, "loss": 0.7403231263160706, "step": 2785 }, { "epoch": 0.44529689123311755, "grad_norm": 1.2115353061560399, "learning_rate": 1.841604043893309e-05, "loss": 0.6226142644882202, "step": 2786 }, { "epoch": 0.44545672500599376, "grad_norm": 1.2602733104230657, "learning_rate": 1.8414612858270616e-05, "loss": 0.5404184460639954, "step": 2787 }, { "epoch": 0.44561655877886996, "grad_norm": 1.2380510095173696, "learning_rate": 1.841318468996146e-05, "loss": 0.6730576753616333, "step": 2788 }, { "epoch": 0.44577639255174617, "grad_norm": 1.1479022526314306, "learning_rate": 1.8411755934105355e-05, "loss": 0.6104799509048462, "step": 2789 }, { "epoch": 0.4459362263246224, "grad_norm": 1.318424650038679, "learning_rate": 1.8410326590802086e-05, "loss": 0.6218891143798828, "step": 2790 }, { "epoch": 0.4460960600974986, "grad_norm": 1.5269645727326664, "learning_rate": 1.8408896660151473e-05, "loss": 0.6993046998977661, "step": 2791 }, { "epoch": 0.4462558938703748, "grad_norm": 2.146596820780386, "learning_rate": 1.8407466142253373e-05, "loss": 0.5991084575653076, "step": 2792 }, { "epoch": 0.446415727643251, "grad_norm": 1.218706126961346, "learning_rate": 1.840603503720769e-05, "loss": 0.7511904239654541, "step": 2793 }, { "epoch": 0.44657556141612725, "grad_norm": 1.3212461832971578, "learning_rate": 1.840460334511437e-05, "loss": 0.7064381837844849, "step": 2794 }, { "epoch": 0.44673539518900346, "grad_norm": 2.2082289436465694, "learning_rate": 1.8403171066073392e-05, "loss": 0.763009250164032, "step": 2795 }, { "epoch": 0.44689522896187966, "grad_norm": 1.350386303881398, "learning_rate": 1.8401738200184786e-05, "loss": 0.5992553234100342, "step": 2796 }, { "epoch": 0.44705506273475587, "grad_norm": 1.4745763201616076, "learning_rate": 1.8400304747548618e-05, "loss": 0.8035294413566589, "step": 2797 }, { "epoch": 0.4472148965076321, "grad_norm": 1.095881860093201, "learning_rate": 1.8398870708264995e-05, "loss": 0.5529322624206543, "step": 2798 }, { "epoch": 0.4473747302805083, "grad_norm": 1.6364709606554995, "learning_rate": 1.839743608243406e-05, "loss": 0.7133969068527222, "step": 2799 }, { "epoch": 0.4475345640533845, "grad_norm": 1.178462146353912, "learning_rate": 1.8396000870156006e-05, "loss": 0.5420316457748413, "step": 2800 }, { "epoch": 0.4476943978262607, "grad_norm": 1.4015318056861996, "learning_rate": 1.839456507153107e-05, "loss": 0.6392829418182373, "step": 2801 }, { "epoch": 0.4478542315991369, "grad_norm": 1.3489269174892164, "learning_rate": 1.839312868665951e-05, "loss": 0.7436625957489014, "step": 2802 }, { "epoch": 0.4480140653720131, "grad_norm": 1.3874550065549245, "learning_rate": 1.8391691715641648e-05, "loss": 0.8017918467521667, "step": 2803 }, { "epoch": 0.4481738991448893, "grad_norm": 1.2977105230748573, "learning_rate": 1.839025415857783e-05, "loss": 0.5740799903869629, "step": 2804 }, { "epoch": 0.4483337329177655, "grad_norm": 1.3273491468924163, "learning_rate": 1.8388816015568455e-05, "loss": 0.6616719365119934, "step": 2805 }, { "epoch": 0.4484935666906417, "grad_norm": 1.4588145873527718, "learning_rate": 1.8387377286713958e-05, "loss": 0.5958390235900879, "step": 2806 }, { "epoch": 0.4486534004635179, "grad_norm": 1.279068507333091, "learning_rate": 1.838593797211481e-05, "loss": 0.6808652877807617, "step": 2807 }, { "epoch": 0.44881323423639413, "grad_norm": 1.4909747567817628, "learning_rate": 1.8384498071871533e-05, "loss": 0.7231125831604004, "step": 2808 }, { "epoch": 0.44897306800927034, "grad_norm": 1.4375493813505245, "learning_rate": 1.838305758608468e-05, "loss": 0.6947187185287476, "step": 2809 }, { "epoch": 0.44913290178214654, "grad_norm": 1.3264291960555479, "learning_rate": 1.8381616514854853e-05, "loss": 0.6365419030189514, "step": 2810 }, { "epoch": 0.44929273555502275, "grad_norm": 1.1170768670432245, "learning_rate": 1.8380174858282686e-05, "loss": 0.5046124458312988, "step": 2811 }, { "epoch": 0.449452569327899, "grad_norm": 1.3415911967232566, "learning_rate": 1.837873261646887e-05, "loss": 0.5996062755584717, "step": 2812 }, { "epoch": 0.4496124031007752, "grad_norm": 1.5548787527404546, "learning_rate": 1.8377289789514115e-05, "loss": 0.7398148775100708, "step": 2813 }, { "epoch": 0.4497722368736514, "grad_norm": 1.3425207255788585, "learning_rate": 1.8375846377519183e-05, "loss": 0.6416780948638916, "step": 2814 }, { "epoch": 0.4499320706465276, "grad_norm": 1.3512210954134443, "learning_rate": 1.8374402380584887e-05, "loss": 0.641364574432373, "step": 2815 }, { "epoch": 0.45009190441940383, "grad_norm": 1.509498607287578, "learning_rate": 1.837295779881206e-05, "loss": 0.7375928163528442, "step": 2816 }, { "epoch": 0.45025173819228004, "grad_norm": 1.5630160184256345, "learning_rate": 1.8371512632301592e-05, "loss": 0.6184826493263245, "step": 2817 }, { "epoch": 0.45041157196515624, "grad_norm": 1.3583422964730152, "learning_rate": 1.837006688115441e-05, "loss": 0.7800716161727905, "step": 2818 }, { "epoch": 0.45057140573803245, "grad_norm": 1.3901727418901624, "learning_rate": 1.8368620545471468e-05, "loss": 0.6823962926864624, "step": 2819 }, { "epoch": 0.45073123951090865, "grad_norm": 1.2234838376503572, "learning_rate": 1.836717362535379e-05, "loss": 0.6141701936721802, "step": 2820 }, { "epoch": 0.45089107328378486, "grad_norm": 1.4661449076863649, "learning_rate": 1.8365726120902414e-05, "loss": 0.728952944278717, "step": 2821 }, { "epoch": 0.45105090705666107, "grad_norm": 1.2542377153965951, "learning_rate": 1.836427803221843e-05, "loss": 0.7058284282684326, "step": 2822 }, { "epoch": 0.45121074082953727, "grad_norm": 1.277476996063144, "learning_rate": 1.836282935940297e-05, "loss": 0.5963454246520996, "step": 2823 }, { "epoch": 0.4513705746024135, "grad_norm": 1.3359039122419993, "learning_rate": 1.83613801025572e-05, "loss": 0.7327374219894409, "step": 2824 }, { "epoch": 0.4515304083752897, "grad_norm": 1.389027280829791, "learning_rate": 1.835993026178233e-05, "loss": 0.585428774356842, "step": 2825 }, { "epoch": 0.4516902421481659, "grad_norm": 1.3669946103514778, "learning_rate": 1.835847983717962e-05, "loss": 0.6675020456314087, "step": 2826 }, { "epoch": 0.4518500759210421, "grad_norm": 1.2890937821744848, "learning_rate": 1.8357028828850356e-05, "loss": 0.4997371435165405, "step": 2827 }, { "epoch": 0.4520099096939183, "grad_norm": 1.2434851493887054, "learning_rate": 1.835557723689587e-05, "loss": 0.6266213655471802, "step": 2828 }, { "epoch": 0.45216974346679456, "grad_norm": 1.2203703818100107, "learning_rate": 1.835412506141754e-05, "loss": 0.5715497732162476, "step": 2829 }, { "epoch": 0.45232957723967077, "grad_norm": 1.2900342348806313, "learning_rate": 1.8352672302516775e-05, "loss": 0.6802390217781067, "step": 2830 }, { "epoch": 0.45248941101254697, "grad_norm": 1.328331809810264, "learning_rate": 1.835121896029504e-05, "loss": 0.642387330532074, "step": 2831 }, { "epoch": 0.4526492447854232, "grad_norm": 1.4230129925000554, "learning_rate": 1.8349765034853826e-05, "loss": 0.6931133270263672, "step": 2832 }, { "epoch": 0.4528090785582994, "grad_norm": 1.553451571705802, "learning_rate": 1.8348310526294665e-05, "loss": 0.6796694993972778, "step": 2833 }, { "epoch": 0.4529689123311756, "grad_norm": 1.4535378871037834, "learning_rate": 1.8346855434719144e-05, "loss": 0.6302200555801392, "step": 2834 }, { "epoch": 0.4531287461040518, "grad_norm": 1.4727623902400433, "learning_rate": 1.8345399760228878e-05, "loss": 0.6329060196876526, "step": 2835 }, { "epoch": 0.453288579876928, "grad_norm": 1.3352701911318894, "learning_rate": 1.834394350292552e-05, "loss": 0.6860841512680054, "step": 2836 }, { "epoch": 0.4534484136498042, "grad_norm": 1.4583421270671553, "learning_rate": 1.834248666291078e-05, "loss": 0.775091290473938, "step": 2837 }, { "epoch": 0.4536082474226804, "grad_norm": 1.430637563960444, "learning_rate": 1.8341029240286393e-05, "loss": 0.6983340978622437, "step": 2838 }, { "epoch": 0.4537680811955566, "grad_norm": 1.2535841819002507, "learning_rate": 1.8339571235154138e-05, "loss": 0.5347910523414612, "step": 2839 }, { "epoch": 0.4539279149684328, "grad_norm": 1.4260830345029103, "learning_rate": 1.833811264761584e-05, "loss": 0.661279559135437, "step": 2840 }, { "epoch": 0.454087748741309, "grad_norm": 1.2615768013938553, "learning_rate": 1.8336653477773364e-05, "loss": 0.5267983078956604, "step": 2841 }, { "epoch": 0.45424758251418523, "grad_norm": 1.70670099368009, "learning_rate": 1.833519372572861e-05, "loss": 0.7430722713470459, "step": 2842 }, { "epoch": 0.45440741628706144, "grad_norm": 2.01343246072569, "learning_rate": 1.8333733391583522e-05, "loss": 0.6976137161254883, "step": 2843 }, { "epoch": 0.45456725005993764, "grad_norm": 1.1006261474994836, "learning_rate": 1.8332272475440085e-05, "loss": 0.49765652418136597, "step": 2844 }, { "epoch": 0.45472708383281385, "grad_norm": 1.3117429387330484, "learning_rate": 1.8330810977400325e-05, "loss": 0.6957669258117676, "step": 2845 }, { "epoch": 0.45488691760569006, "grad_norm": 1.3967101607064691, "learning_rate": 1.8329348897566305e-05, "loss": 0.6148779392242432, "step": 2846 }, { "epoch": 0.4550467513785663, "grad_norm": 1.2427908636421554, "learning_rate": 1.8327886236040137e-05, "loss": 0.6018548011779785, "step": 2847 }, { "epoch": 0.4552065851514425, "grad_norm": 1.3767825714111976, "learning_rate": 1.8326422992923962e-05, "loss": 0.865698516368866, "step": 2848 }, { "epoch": 0.4553664189243187, "grad_norm": 1.4690699904330917, "learning_rate": 1.8324959168319968e-05, "loss": 0.6835354566574097, "step": 2849 }, { "epoch": 0.45552625269719493, "grad_norm": 1.5022246253520832, "learning_rate": 1.832349476233039e-05, "loss": 0.6499279737472534, "step": 2850 }, { "epoch": 0.45568608647007114, "grad_norm": 1.8574840662163452, "learning_rate": 1.832202977505749e-05, "loss": 0.7784950733184814, "step": 2851 }, { "epoch": 0.45584592024294734, "grad_norm": 1.207230199774369, "learning_rate": 1.832056420660358e-05, "loss": 0.6324540376663208, "step": 2852 }, { "epoch": 0.45600575401582355, "grad_norm": 1.3861259815089728, "learning_rate": 1.8319098057071013e-05, "loss": 0.6023910045623779, "step": 2853 }, { "epoch": 0.45616558778869976, "grad_norm": 1.39691403010959, "learning_rate": 1.8317631326562172e-05, "loss": 0.769321620464325, "step": 2854 }, { "epoch": 0.45632542156157596, "grad_norm": 1.3935332158175076, "learning_rate": 1.8316164015179495e-05, "loss": 0.6534066200256348, "step": 2855 }, { "epoch": 0.45648525533445217, "grad_norm": 1.2809865729782774, "learning_rate": 1.8314696123025456e-05, "loss": 0.6712850332260132, "step": 2856 }, { "epoch": 0.4566450891073284, "grad_norm": 1.0912986682521557, "learning_rate": 1.8313227650202558e-05, "loss": 0.6758321523666382, "step": 2857 }, { "epoch": 0.4568049228802046, "grad_norm": 1.7929198497146788, "learning_rate": 1.831175859681336e-05, "loss": 0.8168136477470398, "step": 2858 }, { "epoch": 0.4569647566530808, "grad_norm": 1.4209832954136896, "learning_rate": 1.8310288962960455e-05, "loss": 0.8760515451431274, "step": 2859 }, { "epoch": 0.457124590425957, "grad_norm": 1.8241086038651881, "learning_rate": 1.830881874874648e-05, "loss": 0.6644118428230286, "step": 2860 }, { "epoch": 0.4572844241988332, "grad_norm": 1.30606712680271, "learning_rate": 1.8307347954274103e-05, "loss": 0.6644967198371887, "step": 2861 }, { "epoch": 0.4574442579717094, "grad_norm": 1.3422129143177255, "learning_rate": 1.8305876579646043e-05, "loss": 0.7329211235046387, "step": 2862 }, { "epoch": 0.4576040917445856, "grad_norm": 1.1252413810307613, "learning_rate": 1.8304404624965054e-05, "loss": 0.5605420470237732, "step": 2863 }, { "epoch": 0.45776392551746187, "grad_norm": 1.196688727078003, "learning_rate": 1.830293209033393e-05, "loss": 0.6472973823547363, "step": 2864 }, { "epoch": 0.4579237592903381, "grad_norm": 1.3349922406152965, "learning_rate": 1.8301458975855518e-05, "loss": 0.5948977470397949, "step": 2865 }, { "epoch": 0.4580835930632143, "grad_norm": 1.454025890783899, "learning_rate": 1.829998528163268e-05, "loss": 0.6984856128692627, "step": 2866 }, { "epoch": 0.4582434268360905, "grad_norm": 1.3835072854346693, "learning_rate": 1.8298511007768347e-05, "loss": 0.6452276110649109, "step": 2867 }, { "epoch": 0.4584032606089667, "grad_norm": 1.34306225992123, "learning_rate": 1.829703615436547e-05, "loss": 0.5712462663650513, "step": 2868 }, { "epoch": 0.4585630943818429, "grad_norm": 1.4821701283571063, "learning_rate": 1.8295560721527048e-05, "loss": 0.8254045844078064, "step": 2869 }, { "epoch": 0.4587229281547191, "grad_norm": 1.2822222961334453, "learning_rate": 1.8294084709356118e-05, "loss": 0.6331624984741211, "step": 2870 }, { "epoch": 0.4588827619275953, "grad_norm": 1.2753866532041758, "learning_rate": 1.8292608117955765e-05, "loss": 0.47429144382476807, "step": 2871 }, { "epoch": 0.4590425957004715, "grad_norm": 1.2988043162456047, "learning_rate": 1.8291130947429107e-05, "loss": 0.6699603796005249, "step": 2872 }, { "epoch": 0.4592024294733477, "grad_norm": 1.1183640808639326, "learning_rate": 1.8289653197879304e-05, "loss": 0.39495909214019775, "step": 2873 }, { "epoch": 0.4593622632462239, "grad_norm": 1.560643670546573, "learning_rate": 1.8288174869409553e-05, "loss": 0.6077760457992554, "step": 2874 }, { "epoch": 0.45952209701910013, "grad_norm": 1.4085679940778952, "learning_rate": 1.82866959621231e-05, "loss": 0.6662619113922119, "step": 2875 }, { "epoch": 0.45968193079197633, "grad_norm": 1.4788060495657755, "learning_rate": 1.8285216476123223e-05, "loss": 0.7454976439476013, "step": 2876 }, { "epoch": 0.45984176456485254, "grad_norm": 1.2904214430415695, "learning_rate": 1.8283736411513245e-05, "loss": 0.6706621646881104, "step": 2877 }, { "epoch": 0.46000159833772875, "grad_norm": 1.5293771051534768, "learning_rate": 1.8282255768396536e-05, "loss": 0.5787959098815918, "step": 2878 }, { "epoch": 0.46016143211060495, "grad_norm": 1.1991269610522473, "learning_rate": 1.8280774546876487e-05, "loss": 0.5141779184341431, "step": 2879 }, { "epoch": 0.46032126588348116, "grad_norm": 1.4698696934309121, "learning_rate": 1.827929274705655e-05, "loss": 0.6305357217788696, "step": 2880 }, { "epoch": 0.46048109965635736, "grad_norm": 1.2124038114953912, "learning_rate": 1.8277810369040204e-05, "loss": 0.6752347350120544, "step": 2881 }, { "epoch": 0.4606409334292336, "grad_norm": 1.343536389249507, "learning_rate": 1.8276327412930974e-05, "loss": 0.5746601223945618, "step": 2882 }, { "epoch": 0.46080076720210983, "grad_norm": 1.6109534205245395, "learning_rate": 1.8274843878832425e-05, "loss": 0.7540355920791626, "step": 2883 }, { "epoch": 0.46096060097498603, "grad_norm": 1.387985982458214, "learning_rate": 1.827335976684816e-05, "loss": 0.690057635307312, "step": 2884 }, { "epoch": 0.46112043474786224, "grad_norm": 1.4056381718549125, "learning_rate": 1.827187507708183e-05, "loss": 0.7701202630996704, "step": 2885 }, { "epoch": 0.46128026852073845, "grad_norm": 1.4541927263660392, "learning_rate": 1.8270389809637114e-05, "loss": 0.720339834690094, "step": 2886 }, { "epoch": 0.46144010229361465, "grad_norm": 1.5013995409856413, "learning_rate": 1.8268903964617738e-05, "loss": 0.7138417959213257, "step": 2887 }, { "epoch": 0.46159993606649086, "grad_norm": 1.4288158687661126, "learning_rate": 1.826741754212747e-05, "loss": 0.7293871641159058, "step": 2888 }, { "epoch": 0.46175976983936706, "grad_norm": 1.4939108338169271, "learning_rate": 1.8265930542270116e-05, "loss": 0.8006014823913574, "step": 2889 }, { "epoch": 0.46191960361224327, "grad_norm": 1.614131040483236, "learning_rate": 1.8264442965149526e-05, "loss": 0.712306022644043, "step": 2890 }, { "epoch": 0.4620794373851195, "grad_norm": 1.3063586592868015, "learning_rate": 1.8262954810869586e-05, "loss": 0.6720699071884155, "step": 2891 }, { "epoch": 0.4622392711579957, "grad_norm": 1.4053370650432297, "learning_rate": 1.8261466079534217e-05, "loss": 0.6714296340942383, "step": 2892 }, { "epoch": 0.4623991049308719, "grad_norm": 1.2614217758438961, "learning_rate": 1.825997677124739e-05, "loss": 0.6074554920196533, "step": 2893 }, { "epoch": 0.4625589387037481, "grad_norm": 1.2304808715132303, "learning_rate": 1.825848688611312e-05, "loss": 0.6235300302505493, "step": 2894 }, { "epoch": 0.4627187724766243, "grad_norm": 1.2114188471791179, "learning_rate": 1.8256996424235446e-05, "loss": 0.6357066631317139, "step": 2895 }, { "epoch": 0.4628786062495005, "grad_norm": 1.2602157958214444, "learning_rate": 1.825550538571846e-05, "loss": 0.7125087976455688, "step": 2896 }, { "epoch": 0.4630384400223767, "grad_norm": 1.1979707479096806, "learning_rate": 1.8254013770666294e-05, "loss": 0.5860527753829956, "step": 2897 }, { "epoch": 0.4631982737952529, "grad_norm": 1.3872276334299742, "learning_rate": 1.8252521579183117e-05, "loss": 0.5431051254272461, "step": 2898 }, { "epoch": 0.4633581075681291, "grad_norm": 1.3506833326812056, "learning_rate": 1.825102881137313e-05, "loss": 0.5579657554626465, "step": 2899 }, { "epoch": 0.4635179413410054, "grad_norm": 1.4845226471173862, "learning_rate": 1.8249535467340587e-05, "loss": 0.7741929292678833, "step": 2900 }, { "epoch": 0.4636777751138816, "grad_norm": 1.2851222776215405, "learning_rate": 1.8248041547189787e-05, "loss": 0.6389281749725342, "step": 2901 }, { "epoch": 0.4638376088867578, "grad_norm": 1.407498243145404, "learning_rate": 1.8246547051025045e-05, "loss": 0.7024012207984924, "step": 2902 }, { "epoch": 0.463997442659634, "grad_norm": 1.4130967395355651, "learning_rate": 1.824505197895074e-05, "loss": 0.6651328802108765, "step": 2903 }, { "epoch": 0.4641572764325102, "grad_norm": 1.2577262780806782, "learning_rate": 1.8243556331071283e-05, "loss": 0.5178192257881165, "step": 2904 }, { "epoch": 0.4643171102053864, "grad_norm": 1.1271706677183033, "learning_rate": 1.824206010749112e-05, "loss": 0.6014556884765625, "step": 2905 }, { "epoch": 0.4644769439782626, "grad_norm": 1.3857838382509118, "learning_rate": 1.8240563308314746e-05, "loss": 0.6371148824691772, "step": 2906 }, { "epoch": 0.4646367777511388, "grad_norm": 1.581980446322637, "learning_rate": 1.823906593364669e-05, "loss": 0.6439225077629089, "step": 2907 }, { "epoch": 0.464796611524015, "grad_norm": 1.2196326767438332, "learning_rate": 1.8237567983591527e-05, "loss": 0.7003176212310791, "step": 2908 }, { "epoch": 0.46495644529689123, "grad_norm": 1.2519898412300032, "learning_rate": 1.8236069458253866e-05, "loss": 0.5392706394195557, "step": 2909 }, { "epoch": 0.46511627906976744, "grad_norm": 1.4465541226111553, "learning_rate": 1.8234570357738356e-05, "loss": 0.7595521211624146, "step": 2910 }, { "epoch": 0.46527611284264364, "grad_norm": 1.4560325637307727, "learning_rate": 1.8233070682149688e-05, "loss": 0.6948658227920532, "step": 2911 }, { "epoch": 0.46543594661551985, "grad_norm": 1.3194642377694152, "learning_rate": 1.8231570431592602e-05, "loss": 0.6138565540313721, "step": 2912 }, { "epoch": 0.46559578038839605, "grad_norm": 1.2447320547298235, "learning_rate": 1.8230069606171863e-05, "loss": 0.573826253414154, "step": 2913 }, { "epoch": 0.46575561416127226, "grad_norm": 1.1720748354512858, "learning_rate": 1.8228568205992286e-05, "loss": 0.5468051433563232, "step": 2914 }, { "epoch": 0.46591544793414846, "grad_norm": 1.415255347624638, "learning_rate": 1.8227066231158724e-05, "loss": 0.6857677698135376, "step": 2915 }, { "epoch": 0.46607528170702467, "grad_norm": 2.288415547058174, "learning_rate": 1.822556368177607e-05, "loss": 0.6056547164916992, "step": 2916 }, { "epoch": 0.46623511547990093, "grad_norm": 1.1797460260739856, "learning_rate": 1.8224060557949253e-05, "loss": 0.6546908020973206, "step": 2917 }, { "epoch": 0.46639494925277714, "grad_norm": 1.507048245978505, "learning_rate": 1.822255685978325e-05, "loss": 0.7218306064605713, "step": 2918 }, { "epoch": 0.46655478302565334, "grad_norm": 1.292214743032567, "learning_rate": 1.822105258738307e-05, "loss": 0.7118898630142212, "step": 2919 }, { "epoch": 0.46671461679852955, "grad_norm": 1.143147539221026, "learning_rate": 1.821954774085377e-05, "loss": 0.5244104266166687, "step": 2920 }, { "epoch": 0.46687445057140575, "grad_norm": 1.3339578512882637, "learning_rate": 1.821804232030044e-05, "loss": 0.7919752597808838, "step": 2921 }, { "epoch": 0.46703428434428196, "grad_norm": 1.2714031854234602, "learning_rate": 1.8216536325828217e-05, "loss": 0.6962050199508667, "step": 2922 }, { "epoch": 0.46719411811715816, "grad_norm": 1.4016574495372514, "learning_rate": 1.821502975754227e-05, "loss": 0.6861224174499512, "step": 2923 }, { "epoch": 0.46735395189003437, "grad_norm": 1.2069288361981418, "learning_rate": 1.8213522615547812e-05, "loss": 0.4920305013656616, "step": 2924 }, { "epoch": 0.4675137856629106, "grad_norm": 1.3643069518064923, "learning_rate": 1.8212014899950104e-05, "loss": 0.5548691749572754, "step": 2925 }, { "epoch": 0.4676736194357868, "grad_norm": 2.7910142827113926, "learning_rate": 1.821050661085443e-05, "loss": 0.5725923180580139, "step": 2926 }, { "epoch": 0.467833453208663, "grad_norm": 1.23469402966525, "learning_rate": 1.820899774836613e-05, "loss": 0.5492453575134277, "step": 2927 }, { "epoch": 0.4679932869815392, "grad_norm": 1.3937754781439666, "learning_rate": 1.820748831259058e-05, "loss": 0.6750328540802002, "step": 2928 }, { "epoch": 0.4681531207544154, "grad_norm": 1.2840872361344997, "learning_rate": 1.820597830363318e-05, "loss": 0.4877922236919403, "step": 2929 }, { "epoch": 0.4683129545272916, "grad_norm": 1.460956327546741, "learning_rate": 1.8204467721599397e-05, "loss": 0.6425420641899109, "step": 2930 }, { "epoch": 0.4684727883001678, "grad_norm": 1.6347787346745706, "learning_rate": 1.820295656659472e-05, "loss": 0.8286110162734985, "step": 2931 }, { "epoch": 0.468632622073044, "grad_norm": 1.431870171943297, "learning_rate": 1.8201444838724684e-05, "loss": 0.7522428631782532, "step": 2932 }, { "epoch": 0.4687924558459202, "grad_norm": 1.3976922913990422, "learning_rate": 1.8199932538094864e-05, "loss": 0.6117022633552551, "step": 2933 }, { "epoch": 0.4689522896187964, "grad_norm": 1.3585197408989593, "learning_rate": 1.819841966481087e-05, "loss": 0.557388424873352, "step": 2934 }, { "epoch": 0.4691121233916727, "grad_norm": 1.468283876013903, "learning_rate": 1.819690621897835e-05, "loss": 0.6341254711151123, "step": 2935 }, { "epoch": 0.4692719571645489, "grad_norm": 1.4399183052827256, "learning_rate": 1.8195392200703014e-05, "loss": 0.6142661571502686, "step": 2936 }, { "epoch": 0.4694317909374251, "grad_norm": 1.3947806572753452, "learning_rate": 1.8193877610090584e-05, "loss": 0.6454945802688599, "step": 2937 }, { "epoch": 0.4695916247103013, "grad_norm": 1.2769019250424365, "learning_rate": 1.8192362447246835e-05, "loss": 0.512948215007782, "step": 2938 }, { "epoch": 0.4697514584831775, "grad_norm": 1.7050197557533424, "learning_rate": 1.8190846712277582e-05, "loss": 0.6729454398155212, "step": 2939 }, { "epoch": 0.4699112922560537, "grad_norm": 1.523538474105628, "learning_rate": 1.8189330405288678e-05, "loss": 0.7248960733413696, "step": 2940 }, { "epoch": 0.4700711260289299, "grad_norm": 1.3954538476098488, "learning_rate": 1.8187813526386017e-05, "loss": 0.6037353277206421, "step": 2941 }, { "epoch": 0.4702309598018061, "grad_norm": 1.2349529121372766, "learning_rate": 1.8186296075675534e-05, "loss": 0.6073585748672485, "step": 2942 }, { "epoch": 0.47039079357468233, "grad_norm": 1.3837808576762882, "learning_rate": 1.81847780532632e-05, "loss": 0.6883077621459961, "step": 2943 }, { "epoch": 0.47055062734755854, "grad_norm": 1.2918895710328262, "learning_rate": 1.8183259459255027e-05, "loss": 0.6086082458496094, "step": 2944 }, { "epoch": 0.47071046112043474, "grad_norm": 1.1485013483059887, "learning_rate": 1.8181740293757072e-05, "loss": 0.6191303730010986, "step": 2945 }, { "epoch": 0.47087029489331095, "grad_norm": 1.3528085584897633, "learning_rate": 1.818022055687543e-05, "loss": 0.6677380800247192, "step": 2946 }, { "epoch": 0.47103012866618715, "grad_norm": 1.0636469338147785, "learning_rate": 1.8178700248716225e-05, "loss": 0.5556996464729309, "step": 2947 }, { "epoch": 0.47118996243906336, "grad_norm": 1.3404685749648344, "learning_rate": 1.8177179369385632e-05, "loss": 0.7558667659759521, "step": 2948 }, { "epoch": 0.47134979621193956, "grad_norm": 1.2886476355515715, "learning_rate": 1.8175657918989872e-05, "loss": 0.5154839158058167, "step": 2949 }, { "epoch": 0.47150962998481577, "grad_norm": 1.329801352467588, "learning_rate": 1.8174135897635192e-05, "loss": 0.6438555717468262, "step": 2950 }, { "epoch": 0.471669463757692, "grad_norm": 1.3180458784000126, "learning_rate": 1.8172613305427885e-05, "loss": 0.474861741065979, "step": 2951 }, { "epoch": 0.4718292975305682, "grad_norm": 1.4998643576206137, "learning_rate": 1.8171090142474285e-05, "loss": 0.7370665073394775, "step": 2952 }, { "epoch": 0.47198913130344444, "grad_norm": 1.2941214544067858, "learning_rate": 1.8169566408880764e-05, "loss": 0.7458305358886719, "step": 2953 }, { "epoch": 0.47214896507632065, "grad_norm": 1.3804424687873627, "learning_rate": 1.816804210475373e-05, "loss": 0.6851722598075867, "step": 2954 }, { "epoch": 0.47230879884919685, "grad_norm": 1.4560793228637772, "learning_rate": 1.8166517230199638e-05, "loss": 0.8682346940040588, "step": 2955 }, { "epoch": 0.47246863262207306, "grad_norm": 1.330118013866809, "learning_rate": 1.8164991785324983e-05, "loss": 0.6019588112831116, "step": 2956 }, { "epoch": 0.47262846639494926, "grad_norm": 1.7488685639878518, "learning_rate": 1.8163465770236293e-05, "loss": 0.6637696027755737, "step": 2957 }, { "epoch": 0.47278830016782547, "grad_norm": 1.1775892255289846, "learning_rate": 1.8161939185040137e-05, "loss": 0.5490168333053589, "step": 2958 }, { "epoch": 0.4729481339407017, "grad_norm": 1.3244082156740344, "learning_rate": 1.816041202984313e-05, "loss": 0.7065246105194092, "step": 2959 }, { "epoch": 0.4731079677135779, "grad_norm": 1.3900541337589947, "learning_rate": 1.8158884304751927e-05, "loss": 0.7304121255874634, "step": 2960 }, { "epoch": 0.4732678014864541, "grad_norm": 1.5871075625345716, "learning_rate": 1.815735600987321e-05, "loss": 0.8045570850372314, "step": 2961 }, { "epoch": 0.4734276352593303, "grad_norm": 1.2502894364557333, "learning_rate": 1.8155827145313717e-05, "loss": 0.6227906346321106, "step": 2962 }, { "epoch": 0.4735874690322065, "grad_norm": 1.460011303630869, "learning_rate": 1.8154297711180215e-05, "loss": 0.7326947450637817, "step": 2963 }, { "epoch": 0.4737473028050827, "grad_norm": 1.3699049489496626, "learning_rate": 1.8152767707579513e-05, "loss": 0.621147632598877, "step": 2964 }, { "epoch": 0.4739071365779589, "grad_norm": 1.2669028091081573, "learning_rate": 1.8151237134618463e-05, "loss": 0.6759653091430664, "step": 2965 }, { "epoch": 0.4740669703508351, "grad_norm": 1.329157289814597, "learning_rate": 1.8149705992403954e-05, "loss": 0.5586148500442505, "step": 2966 }, { "epoch": 0.4742268041237113, "grad_norm": 1.5693676444901319, "learning_rate": 1.814817428104292e-05, "loss": 0.6282596588134766, "step": 2967 }, { "epoch": 0.4743866378965875, "grad_norm": 1.3871106512336129, "learning_rate": 1.814664200064232e-05, "loss": 0.7592473030090332, "step": 2968 }, { "epoch": 0.47454647166946373, "grad_norm": 1.3324667753717632, "learning_rate": 1.8145109151309176e-05, "loss": 0.650623083114624, "step": 2969 }, { "epoch": 0.47470630544234, "grad_norm": 1.5090691973653787, "learning_rate": 1.8143575733150528e-05, "loss": 0.7538669109344482, "step": 2970 }, { "epoch": 0.4748661392152162, "grad_norm": 1.3987678795978848, "learning_rate": 1.8142041746273464e-05, "loss": 0.6142212152481079, "step": 2971 }, { "epoch": 0.4750259729880924, "grad_norm": 1.230192172450817, "learning_rate": 1.8140507190785118e-05, "loss": 0.6808756589889526, "step": 2972 }, { "epoch": 0.4751858067609686, "grad_norm": 1.5705220565049989, "learning_rate": 1.813897206679265e-05, "loss": 0.7305218577384949, "step": 2973 }, { "epoch": 0.4753456405338448, "grad_norm": 1.1618626989779488, "learning_rate": 1.813743637440328e-05, "loss": 0.6419669389724731, "step": 2974 }, { "epoch": 0.475505474306721, "grad_norm": 1.3111425270061134, "learning_rate": 1.8135900113724243e-05, "loss": 0.6384248733520508, "step": 2975 }, { "epoch": 0.4756653080795972, "grad_norm": 1.420096573947869, "learning_rate": 1.813436328486283e-05, "loss": 0.6746721863746643, "step": 2976 }, { "epoch": 0.47582514185247343, "grad_norm": 1.6177698687721689, "learning_rate": 1.8132825887926372e-05, "loss": 0.7832936644554138, "step": 2977 }, { "epoch": 0.47598497562534964, "grad_norm": 1.2054838762208746, "learning_rate": 1.813128792302223e-05, "loss": 0.5848304033279419, "step": 2978 }, { "epoch": 0.47614480939822584, "grad_norm": 1.5420070993671848, "learning_rate": 1.812974939025781e-05, "loss": 0.7924578189849854, "step": 2979 }, { "epoch": 0.47630464317110205, "grad_norm": 1.2957769628713367, "learning_rate": 1.812821028974056e-05, "loss": 0.6300419569015503, "step": 2980 }, { "epoch": 0.47646447694397825, "grad_norm": 1.3056759966739424, "learning_rate": 1.8126670621577966e-05, "loss": 0.6113443374633789, "step": 2981 }, { "epoch": 0.47662431071685446, "grad_norm": 1.5018949903308685, "learning_rate": 1.812513038587755e-05, "loss": 0.6820118427276611, "step": 2982 }, { "epoch": 0.47678414448973067, "grad_norm": 1.3207738500811683, "learning_rate": 1.8123589582746877e-05, "loss": 0.7006584405899048, "step": 2983 }, { "epoch": 0.47694397826260687, "grad_norm": 1.5010051773563116, "learning_rate": 1.8122048212293556e-05, "loss": 0.6938729286193848, "step": 2984 }, { "epoch": 0.4771038120354831, "grad_norm": 1.2126607315595241, "learning_rate": 1.8120506274625226e-05, "loss": 0.7320007085800171, "step": 2985 }, { "epoch": 0.4772636458083593, "grad_norm": 1.3039005189622905, "learning_rate": 1.811896376984957e-05, "loss": 0.5475800037384033, "step": 2986 }, { "epoch": 0.4774234795812355, "grad_norm": 1.4255379331566127, "learning_rate": 1.8117420698074318e-05, "loss": 0.7049278020858765, "step": 2987 }, { "epoch": 0.47758331335411175, "grad_norm": 1.955895649568119, "learning_rate": 1.8115877059407222e-05, "loss": 0.6483992338180542, "step": 2988 }, { "epoch": 0.47774314712698795, "grad_norm": 1.5214108305503486, "learning_rate": 1.811433285395609e-05, "loss": 0.7060750722885132, "step": 2989 }, { "epoch": 0.47790298089986416, "grad_norm": 1.2852186486229487, "learning_rate": 1.8112788081828764e-05, "loss": 0.6814541816711426, "step": 2990 }, { "epoch": 0.47806281467274037, "grad_norm": 1.4482567498237664, "learning_rate": 1.8111242743133127e-05, "loss": 0.6228811740875244, "step": 2991 }, { "epoch": 0.47822264844561657, "grad_norm": 1.380612240816776, "learning_rate": 1.8109696837977097e-05, "loss": 0.6801897287368774, "step": 2992 }, { "epoch": 0.4783824822184928, "grad_norm": 1.3874895543819012, "learning_rate": 1.8108150366468636e-05, "loss": 0.618004560470581, "step": 2993 }, { "epoch": 0.478542315991369, "grad_norm": 1.5644874505925497, "learning_rate": 1.8106603328715742e-05, "loss": 0.6704860925674438, "step": 2994 }, { "epoch": 0.4787021497642452, "grad_norm": 1.2801558970927063, "learning_rate": 1.8105055724826453e-05, "loss": 0.5657632350921631, "step": 2995 }, { "epoch": 0.4788619835371214, "grad_norm": 1.23084701735192, "learning_rate": 1.8103507554908858e-05, "loss": 0.6363087296485901, "step": 2996 }, { "epoch": 0.4790218173099976, "grad_norm": 1.3555963446516606, "learning_rate": 1.8101958819071067e-05, "loss": 0.7367660999298096, "step": 2997 }, { "epoch": 0.4791816510828738, "grad_norm": 1.4281909859476867, "learning_rate": 1.8100409517421238e-05, "loss": 0.7129071950912476, "step": 2998 }, { "epoch": 0.47934148485575, "grad_norm": 1.1452519776016832, "learning_rate": 1.8098859650067573e-05, "loss": 0.6447185277938843, "step": 2999 }, { "epoch": 0.4795013186286262, "grad_norm": 1.3966576435658875, "learning_rate": 1.8097309217118307e-05, "loss": 0.6226822137832642, "step": 3000 }, { "epoch": 0.4796611524015024, "grad_norm": 1.453631208385148, "learning_rate": 1.8095758218681717e-05, "loss": 0.7575054168701172, "step": 3001 }, { "epoch": 0.4798209861743786, "grad_norm": 1.3879066707927596, "learning_rate": 1.8094206654866115e-05, "loss": 0.701685905456543, "step": 3002 }, { "epoch": 0.47998081994725483, "grad_norm": 1.657986946850372, "learning_rate": 1.8092654525779867e-05, "loss": 0.8679551482200623, "step": 3003 }, { "epoch": 0.48014065372013104, "grad_norm": 1.4068462363777, "learning_rate": 1.8091101831531356e-05, "loss": 0.5558755397796631, "step": 3004 }, { "epoch": 0.4803004874930073, "grad_norm": 1.4568655454216566, "learning_rate": 1.8089548572229026e-05, "loss": 0.7233043313026428, "step": 3005 }, { "epoch": 0.4804603212658835, "grad_norm": 1.3401463416061945, "learning_rate": 1.808799474798135e-05, "loss": 0.6397933959960938, "step": 3006 }, { "epoch": 0.4806201550387597, "grad_norm": 1.425998042629066, "learning_rate": 1.8086440358896834e-05, "loss": 0.7623498439788818, "step": 3007 }, { "epoch": 0.4807799888116359, "grad_norm": 1.404542242487801, "learning_rate": 1.808488540508404e-05, "loss": 0.6196281313896179, "step": 3008 }, { "epoch": 0.4809398225845121, "grad_norm": 1.2934162325332645, "learning_rate": 1.8083329886651554e-05, "loss": 0.7511473894119263, "step": 3009 }, { "epoch": 0.48109965635738833, "grad_norm": 1.0438131244868634, "learning_rate": 1.8081773803708015e-05, "loss": 0.6718053817749023, "step": 3010 }, { "epoch": 0.48125949013026453, "grad_norm": 1.2531847912038658, "learning_rate": 1.8080217156362086e-05, "loss": 0.5918296575546265, "step": 3011 }, { "epoch": 0.48141932390314074, "grad_norm": 1.417335078130562, "learning_rate": 1.807865994472248e-05, "loss": 0.6865646839141846, "step": 3012 }, { "epoch": 0.48157915767601694, "grad_norm": 1.2456472345482938, "learning_rate": 1.8077102168897955e-05, "loss": 0.7080925107002258, "step": 3013 }, { "epoch": 0.48173899144889315, "grad_norm": 1.3265665238412023, "learning_rate": 1.807554382899729e-05, "loss": 0.6520585417747498, "step": 3014 }, { "epoch": 0.48189882522176936, "grad_norm": 1.2779521302441286, "learning_rate": 1.807398492512932e-05, "loss": 0.7094342708587646, "step": 3015 }, { "epoch": 0.48205865899464556, "grad_norm": 1.4581574395852568, "learning_rate": 1.8072425457402913e-05, "loss": 0.6269960403442383, "step": 3016 }, { "epoch": 0.48221849276752177, "grad_norm": 1.5547712413751733, "learning_rate": 1.807086542592697e-05, "loss": 0.850969672203064, "step": 3017 }, { "epoch": 0.482378326540398, "grad_norm": 1.341000550407222, "learning_rate": 1.806930483081045e-05, "loss": 0.6116987466812134, "step": 3018 }, { "epoch": 0.4825381603132742, "grad_norm": 1.4412402086245453, "learning_rate": 1.806774367216233e-05, "loss": 0.7337480783462524, "step": 3019 }, { "epoch": 0.4826979940861504, "grad_norm": 1.4845985304824305, "learning_rate": 1.8066181950091634e-05, "loss": 0.6201764941215515, "step": 3020 }, { "epoch": 0.4828578278590266, "grad_norm": 1.2886631013183498, "learning_rate": 1.8064619664707437e-05, "loss": 0.5533766746520996, "step": 3021 }, { "epoch": 0.4830176616319028, "grad_norm": 1.2224394787473238, "learning_rate": 1.806305681611884e-05, "loss": 0.6233221292495728, "step": 3022 }, { "epoch": 0.48317749540477906, "grad_norm": 1.5067889080384662, "learning_rate": 1.806149340443498e-05, "loss": 0.6988272666931152, "step": 3023 }, { "epoch": 0.48333732917765526, "grad_norm": 1.344995245968969, "learning_rate": 1.8059929429765045e-05, "loss": 0.764191746711731, "step": 3024 }, { "epoch": 0.48349716295053147, "grad_norm": 1.439705313434884, "learning_rate": 1.805836489221826e-05, "loss": 0.6099340915679932, "step": 3025 }, { "epoch": 0.4836569967234077, "grad_norm": 1.3640496830435427, "learning_rate": 1.8056799791903886e-05, "loss": 0.5658941268920898, "step": 3026 }, { "epoch": 0.4838168304962839, "grad_norm": 1.3844863434608246, "learning_rate": 1.8055234128931218e-05, "loss": 0.7224754095077515, "step": 3027 }, { "epoch": 0.4839766642691601, "grad_norm": 1.3124280951084264, "learning_rate": 1.8053667903409603e-05, "loss": 0.6307432055473328, "step": 3028 }, { "epoch": 0.4841364980420363, "grad_norm": 1.6017367261988613, "learning_rate": 1.8052101115448418e-05, "loss": 0.7087247371673584, "step": 3029 }, { "epoch": 0.4842963318149125, "grad_norm": 1.3900639991166126, "learning_rate": 1.8050533765157082e-05, "loss": 0.5788217782974243, "step": 3030 }, { "epoch": 0.4844561655877887, "grad_norm": 1.2078160775597908, "learning_rate": 1.8048965852645053e-05, "loss": 0.5901039838790894, "step": 3031 }, { "epoch": 0.4846159993606649, "grad_norm": 1.2743000918376597, "learning_rate": 1.8047397378021827e-05, "loss": 0.6692395210266113, "step": 3032 }, { "epoch": 0.4847758331335411, "grad_norm": 1.1241984842374453, "learning_rate": 1.8045828341396947e-05, "loss": 0.7166885137557983, "step": 3033 }, { "epoch": 0.4849356669064173, "grad_norm": 1.6144954814124766, "learning_rate": 1.804425874287998e-05, "loss": 0.5699695944786072, "step": 3034 }, { "epoch": 0.4850955006792935, "grad_norm": 1.2831586181997088, "learning_rate": 1.804268858258055e-05, "loss": 0.6436895132064819, "step": 3035 }, { "epoch": 0.48525533445216973, "grad_norm": 1.1866785202767445, "learning_rate": 1.8041117860608303e-05, "loss": 0.7043083310127258, "step": 3036 }, { "epoch": 0.48541516822504593, "grad_norm": 1.300375373361587, "learning_rate": 1.8039546577072937e-05, "loss": 0.7037966251373291, "step": 3037 }, { "epoch": 0.48557500199792214, "grad_norm": 1.2117810238149096, "learning_rate": 1.8037974732084188e-05, "loss": 0.6155203580856323, "step": 3038 }, { "epoch": 0.48573483577079835, "grad_norm": 1.1379822842981393, "learning_rate": 1.803640232575182e-05, "loss": 0.5761622190475464, "step": 3039 }, { "epoch": 0.48589466954367455, "grad_norm": 1.3042064946608973, "learning_rate": 1.8034829358185654e-05, "loss": 0.5500296354293823, "step": 3040 }, { "epoch": 0.4860545033165508, "grad_norm": 1.2510808346596665, "learning_rate": 1.8033255829495532e-05, "loss": 0.5339052081108093, "step": 3041 }, { "epoch": 0.486214337089427, "grad_norm": 1.439450980416591, "learning_rate": 1.8031681739791346e-05, "loss": 0.7414443492889404, "step": 3042 }, { "epoch": 0.4863741708623032, "grad_norm": 1.3836351748083995, "learning_rate": 1.803010708918303e-05, "loss": 0.6935863494873047, "step": 3043 }, { "epoch": 0.48653400463517943, "grad_norm": 1.2991964701432686, "learning_rate": 1.8028531877780545e-05, "loss": 0.6151530742645264, "step": 3044 }, { "epoch": 0.48669383840805563, "grad_norm": 1.4559459289767416, "learning_rate": 1.80269561056939e-05, "loss": 0.6222383975982666, "step": 3045 }, { "epoch": 0.48685367218093184, "grad_norm": 1.433631450820031, "learning_rate": 1.8025379773033145e-05, "loss": 0.541060209274292, "step": 3046 }, { "epoch": 0.48701350595380805, "grad_norm": 1.3808108889762158, "learning_rate": 1.802380287990836e-05, "loss": 0.6078846454620361, "step": 3047 }, { "epoch": 0.48717333972668425, "grad_norm": 1.424301351122609, "learning_rate": 1.8022225426429677e-05, "loss": 0.699860692024231, "step": 3048 }, { "epoch": 0.48733317349956046, "grad_norm": 1.4931358516891884, "learning_rate": 1.802064741270725e-05, "loss": 0.6633355617523193, "step": 3049 }, { "epoch": 0.48749300727243666, "grad_norm": 1.2382421839387525, "learning_rate": 1.801906883885129e-05, "loss": 0.7264290452003479, "step": 3050 }, { "epoch": 0.48765284104531287, "grad_norm": 1.5710535872851201, "learning_rate": 1.8017489704972035e-05, "loss": 0.7092193961143494, "step": 3051 }, { "epoch": 0.4878126748181891, "grad_norm": 1.312914860295833, "learning_rate": 1.8015910011179766e-05, "loss": 0.7525522708892822, "step": 3052 }, { "epoch": 0.4879725085910653, "grad_norm": 1.4999459072634647, "learning_rate": 1.801432975758481e-05, "loss": 0.6802791357040405, "step": 3053 }, { "epoch": 0.4881323423639415, "grad_norm": 1.1759369459714737, "learning_rate": 1.8012748944297516e-05, "loss": 0.6180126667022705, "step": 3054 }, { "epoch": 0.4882921761368177, "grad_norm": 1.3790355280690472, "learning_rate": 1.8011167571428285e-05, "loss": 0.6351402401924133, "step": 3055 }, { "epoch": 0.4884520099096939, "grad_norm": 1.3425938894872413, "learning_rate": 1.800958563908756e-05, "loss": 0.5875847339630127, "step": 3056 }, { "epoch": 0.4886118436825701, "grad_norm": 1.2747797880245078, "learning_rate": 1.8008003147385812e-05, "loss": 0.6647064089775085, "step": 3057 }, { "epoch": 0.48877167745544636, "grad_norm": 1.3007109188351553, "learning_rate": 1.8006420096433565e-05, "loss": 0.6284028887748718, "step": 3058 }, { "epoch": 0.48893151122832257, "grad_norm": 1.3377959086029003, "learning_rate": 1.800483648634136e-05, "loss": 0.6780111789703369, "step": 3059 }, { "epoch": 0.4890913450011988, "grad_norm": 1.4377839920612119, "learning_rate": 1.8003252317219798e-05, "loss": 0.8918596506118774, "step": 3060 }, { "epoch": 0.489251178774075, "grad_norm": 1.2569925121112493, "learning_rate": 1.8001667589179517e-05, "loss": 0.5432535409927368, "step": 3061 }, { "epoch": 0.4894110125469512, "grad_norm": 1.3162325231425447, "learning_rate": 1.800008230233118e-05, "loss": 0.6502131819725037, "step": 3062 }, { "epoch": 0.4895708463198274, "grad_norm": 1.5627266582203292, "learning_rate": 1.79984964567855e-05, "loss": 0.6676664352416992, "step": 3063 }, { "epoch": 0.4897306800927036, "grad_norm": 1.3433173245742438, "learning_rate": 1.7996910052653233e-05, "loss": 0.840561032295227, "step": 3064 }, { "epoch": 0.4898905138655798, "grad_norm": 1.411104961652811, "learning_rate": 1.799532309004516e-05, "loss": 0.7013871669769287, "step": 3065 }, { "epoch": 0.490050347638456, "grad_norm": 1.5301831689783236, "learning_rate": 1.7993735569072112e-05, "loss": 0.611169695854187, "step": 3066 }, { "epoch": 0.4902101814113322, "grad_norm": 1.5872054913145754, "learning_rate": 1.7992147489844956e-05, "loss": 0.6218129396438599, "step": 3067 }, { "epoch": 0.4903700151842084, "grad_norm": 1.2447503542832894, "learning_rate": 1.7990558852474597e-05, "loss": 0.7565284371376038, "step": 3068 }, { "epoch": 0.4905298489570846, "grad_norm": 1.3322038634307887, "learning_rate": 1.798896965707198e-05, "loss": 0.5534245371818542, "step": 3069 }, { "epoch": 0.49068968272996083, "grad_norm": 1.1375744984173262, "learning_rate": 1.798737990374809e-05, "loss": 0.5428298115730286, "step": 3070 }, { "epoch": 0.49084951650283704, "grad_norm": 1.2600957941193014, "learning_rate": 1.798578959261395e-05, "loss": 0.5621017217636108, "step": 3071 }, { "epoch": 0.49100935027571324, "grad_norm": 1.3653860761124548, "learning_rate": 1.7984198723780618e-05, "loss": 0.5968067646026611, "step": 3072 }, { "epoch": 0.49116918404858945, "grad_norm": 1.400090934898666, "learning_rate": 1.79826072973592e-05, "loss": 0.7173087000846863, "step": 3073 }, { "epoch": 0.49132901782146565, "grad_norm": 1.2902842910638388, "learning_rate": 1.798101531346083e-05, "loss": 0.6239790916442871, "step": 3074 }, { "epoch": 0.49148885159434186, "grad_norm": 1.5304775469453868, "learning_rate": 1.7979422772196685e-05, "loss": 0.6887032985687256, "step": 3075 }, { "epoch": 0.4916486853672181, "grad_norm": 1.5419901824057907, "learning_rate": 1.7977829673677997e-05, "loss": 0.7603594660758972, "step": 3076 }, { "epoch": 0.4918085191400943, "grad_norm": 1.2544150833527783, "learning_rate": 1.7976236018016005e-05, "loss": 0.5694543719291687, "step": 3077 }, { "epoch": 0.49196835291297053, "grad_norm": 1.6125970201142417, "learning_rate": 1.797464180532201e-05, "loss": 0.5909101963043213, "step": 3078 }, { "epoch": 0.49212818668584674, "grad_norm": 1.5927282511661978, "learning_rate": 1.7973047035707352e-05, "loss": 0.691271185874939, "step": 3079 }, { "epoch": 0.49228802045872294, "grad_norm": 1.1980138204263078, "learning_rate": 1.7971451709283396e-05, "loss": 0.6149266958236694, "step": 3080 }, { "epoch": 0.49244785423159915, "grad_norm": 1.2528085677574645, "learning_rate": 1.7969855826161556e-05, "loss": 0.6757845878601074, "step": 3081 }, { "epoch": 0.49260768800447535, "grad_norm": 1.459466050344153, "learning_rate": 1.796825938645329e-05, "loss": 0.6832835674285889, "step": 3082 }, { "epoch": 0.49276752177735156, "grad_norm": 1.6249484039503854, "learning_rate": 1.7966662390270076e-05, "loss": 0.8154357671737671, "step": 3083 }, { "epoch": 0.49292735555022776, "grad_norm": 1.3358767466419916, "learning_rate": 1.7965064837723452e-05, "loss": 0.6429100036621094, "step": 3084 }, { "epoch": 0.49308718932310397, "grad_norm": 1.1143094260287465, "learning_rate": 1.7963466728924982e-05, "loss": 0.5880461931228638, "step": 3085 }, { "epoch": 0.4932470230959802, "grad_norm": 1.5297850549250809, "learning_rate": 1.796186806398627e-05, "loss": 0.6106619238853455, "step": 3086 }, { "epoch": 0.4934068568688564, "grad_norm": 1.2501600531975465, "learning_rate": 1.7960268843018964e-05, "loss": 0.6297531127929688, "step": 3087 }, { "epoch": 0.4935666906417326, "grad_norm": 1.44871379217384, "learning_rate": 1.7958669066134747e-05, "loss": 0.6694366335868835, "step": 3088 }, { "epoch": 0.4937265244146088, "grad_norm": 1.3116044125432278, "learning_rate": 1.795706873344534e-05, "loss": 0.6714637279510498, "step": 3089 }, { "epoch": 0.493886358187485, "grad_norm": 1.20821979587185, "learning_rate": 1.795546784506251e-05, "loss": 0.5167112350463867, "step": 3090 }, { "epoch": 0.4940461919603612, "grad_norm": 1.3829796558526959, "learning_rate": 1.795386640109805e-05, "loss": 0.6298086047172546, "step": 3091 }, { "epoch": 0.4942060257332374, "grad_norm": 1.524960906554025, "learning_rate": 1.7952264401663805e-05, "loss": 0.7314967513084412, "step": 3092 }, { "epoch": 0.4943658595061136, "grad_norm": 1.3815020538972829, "learning_rate": 1.7950661846871653e-05, "loss": 0.7451049089431763, "step": 3093 }, { "epoch": 0.4945256932789899, "grad_norm": 1.4580215271466912, "learning_rate": 1.7949058736833502e-05, "loss": 0.6351803541183472, "step": 3094 }, { "epoch": 0.4946855270518661, "grad_norm": 1.3092367171289276, "learning_rate": 1.7947455071661316e-05, "loss": 0.7520921230316162, "step": 3095 }, { "epoch": 0.4948453608247423, "grad_norm": 1.2804401661436684, "learning_rate": 1.794585085146709e-05, "loss": 0.6672300100326538, "step": 3096 }, { "epoch": 0.4950051945976185, "grad_norm": 1.363175636198508, "learning_rate": 1.7944246076362852e-05, "loss": 0.7335870265960693, "step": 3097 }, { "epoch": 0.4951650283704947, "grad_norm": 1.1941853744192468, "learning_rate": 1.794264074646068e-05, "loss": 0.5854717493057251, "step": 3098 }, { "epoch": 0.4953248621433709, "grad_norm": 1.4798670549455983, "learning_rate": 1.7941034861872676e-05, "loss": 0.6028521060943604, "step": 3099 }, { "epoch": 0.4954846959162471, "grad_norm": 1.3238054411042455, "learning_rate": 1.7939428422710996e-05, "loss": 0.6387102007865906, "step": 3100 }, { "epoch": 0.4956445296891233, "grad_norm": 1.5040813120704444, "learning_rate": 1.7937821429087826e-05, "loss": 0.6800696849822998, "step": 3101 }, { "epoch": 0.4958043634619995, "grad_norm": 1.498931035501388, "learning_rate": 1.7936213881115393e-05, "loss": 0.7759741544723511, "step": 3102 }, { "epoch": 0.4959641972348757, "grad_norm": 1.2669373885511457, "learning_rate": 1.793460577890596e-05, "loss": 0.7159613966941833, "step": 3103 }, { "epoch": 0.49612403100775193, "grad_norm": 1.1645175434915283, "learning_rate": 1.7932997122571835e-05, "loss": 0.6040558815002441, "step": 3104 }, { "epoch": 0.49628386478062814, "grad_norm": 1.5667043763448223, "learning_rate": 1.793138791222536e-05, "loss": 0.7005022764205933, "step": 3105 }, { "epoch": 0.49644369855350434, "grad_norm": 1.528085554268168, "learning_rate": 1.7929778147978915e-05, "loss": 0.7402414083480835, "step": 3106 }, { "epoch": 0.49660353232638055, "grad_norm": 1.2724185982913176, "learning_rate": 1.7928167829944917e-05, "loss": 0.5450716614723206, "step": 3107 }, { "epoch": 0.49676336609925675, "grad_norm": 1.1620106165608095, "learning_rate": 1.7926556958235836e-05, "loss": 0.5975571274757385, "step": 3108 }, { "epoch": 0.49692319987213296, "grad_norm": 1.266569731893554, "learning_rate": 1.7924945532964157e-05, "loss": 0.591755747795105, "step": 3109 }, { "epoch": 0.49708303364500916, "grad_norm": 1.3056155968510958, "learning_rate": 1.7923333554242425e-05, "loss": 0.673771858215332, "step": 3110 }, { "epoch": 0.4972428674178854, "grad_norm": 1.3844544586485092, "learning_rate": 1.7921721022183206e-05, "loss": 0.5381490588188171, "step": 3111 }, { "epoch": 0.49740270119076163, "grad_norm": 1.4040299886082643, "learning_rate": 1.7920107936899125e-05, "loss": 0.668859601020813, "step": 3112 }, { "epoch": 0.49756253496363784, "grad_norm": 1.4022602331744383, "learning_rate": 1.7918494298502825e-05, "loss": 0.7958034873008728, "step": 3113 }, { "epoch": 0.49772236873651404, "grad_norm": 1.317465184550144, "learning_rate": 1.7916880107107e-05, "loss": 0.6872917413711548, "step": 3114 }, { "epoch": 0.49788220250939025, "grad_norm": 1.362314872523862, "learning_rate": 1.791526536282438e-05, "loss": 0.7131896615028381, "step": 3115 }, { "epoch": 0.49804203628226645, "grad_norm": 1.225391671549833, "learning_rate": 1.791365006576773e-05, "loss": 0.6083654165267944, "step": 3116 }, { "epoch": 0.49820187005514266, "grad_norm": 1.2876442583796126, "learning_rate": 1.791203421604986e-05, "loss": 0.70420902967453, "step": 3117 }, { "epoch": 0.49836170382801886, "grad_norm": 1.448755560687142, "learning_rate": 1.7910417813783615e-05, "loss": 0.5796658992767334, "step": 3118 }, { "epoch": 0.49852153760089507, "grad_norm": 1.4944484365579591, "learning_rate": 1.790880085908188e-05, "loss": 0.6715267896652222, "step": 3119 }, { "epoch": 0.4986813713737713, "grad_norm": 1.2875722387426776, "learning_rate": 1.790718335205757e-05, "loss": 0.6453369855880737, "step": 3120 }, { "epoch": 0.4988412051466475, "grad_norm": 1.590244651701148, "learning_rate": 1.7905565292823658e-05, "loss": 0.7419759035110474, "step": 3121 }, { "epoch": 0.4990010389195237, "grad_norm": 1.3688033834128714, "learning_rate": 1.790394668149313e-05, "loss": 0.7290701866149902, "step": 3122 }, { "epoch": 0.4991608726923999, "grad_norm": 1.4300589249118392, "learning_rate": 1.7902327518179036e-05, "loss": 0.8674570322036743, "step": 3123 }, { "epoch": 0.4993207064652761, "grad_norm": 1.2367328822370804, "learning_rate": 1.7900707802994443e-05, "loss": 0.5934872031211853, "step": 3124 }, { "epoch": 0.4994805402381523, "grad_norm": 1.52772131320229, "learning_rate": 1.7899087536052472e-05, "loss": 0.4813838303089142, "step": 3125 }, { "epoch": 0.4996403740110285, "grad_norm": 1.4537249487291506, "learning_rate": 1.7897466717466278e-05, "loss": 0.65118408203125, "step": 3126 }, { "epoch": 0.4998002077839047, "grad_norm": 1.474168621325983, "learning_rate": 1.7895845347349047e-05, "loss": 0.6354297399520874, "step": 3127 }, { "epoch": 0.4999600415567809, "grad_norm": 1.2690224407481623, "learning_rate": 1.7894223425814015e-05, "loss": 0.6105283498764038, "step": 3128 }, { "epoch": 0.5001198753296572, "grad_norm": 1.4286850538163725, "learning_rate": 1.7892600952974447e-05, "loss": 0.7158613204956055, "step": 3129 }, { "epoch": 0.5002797091025334, "grad_norm": 1.1262930709533314, "learning_rate": 1.7890977928943655e-05, "loss": 0.530447781085968, "step": 3130 }, { "epoch": 0.5004395428754096, "grad_norm": 1.4885596698023271, "learning_rate": 1.7889354353834986e-05, "loss": 0.6341517567634583, "step": 3131 }, { "epoch": 0.5005993766482858, "grad_norm": 1.2745911726688182, "learning_rate": 1.788773022776182e-05, "loss": 0.7449532747268677, "step": 3132 }, { "epoch": 0.500759210421162, "grad_norm": 1.3421976964031297, "learning_rate": 1.788610555083758e-05, "loss": 0.7581501007080078, "step": 3133 }, { "epoch": 0.5009190441940382, "grad_norm": 1.1946127166190683, "learning_rate": 1.788448032317573e-05, "loss": 0.5268166661262512, "step": 3134 }, { "epoch": 0.5010788779669144, "grad_norm": 2.5644531602806384, "learning_rate": 1.7882854544889775e-05, "loss": 0.7192783951759338, "step": 3135 }, { "epoch": 0.5012387117397906, "grad_norm": 1.2751276964228906, "learning_rate": 1.7881228216093244e-05, "loss": 0.5352597236633301, "step": 3136 }, { "epoch": 0.5013985455126668, "grad_norm": 1.3866988077006106, "learning_rate": 1.787960133689972e-05, "loss": 0.6092105507850647, "step": 3137 }, { "epoch": 0.501558379285543, "grad_norm": 1.2963662883854363, "learning_rate": 1.7877973907422815e-05, "loss": 0.6370062828063965, "step": 3138 }, { "epoch": 0.5017182130584192, "grad_norm": 1.3465902233940632, "learning_rate": 1.787634592777619e-05, "loss": 0.6271853446960449, "step": 3139 }, { "epoch": 0.5018780468312954, "grad_norm": 1.3713424866695596, "learning_rate": 1.787471739807353e-05, "loss": 0.6636099815368652, "step": 3140 }, { "epoch": 0.5020378806041716, "grad_norm": 1.4790872407540223, "learning_rate": 1.787308831842857e-05, "loss": 0.736928403377533, "step": 3141 }, { "epoch": 0.5021977143770479, "grad_norm": 1.3488690243365005, "learning_rate": 1.7871458688955076e-05, "loss": 0.5229798555374146, "step": 3142 }, { "epoch": 0.5023575481499241, "grad_norm": 1.2268140095566311, "learning_rate": 1.7869828509766853e-05, "loss": 0.6221684217453003, "step": 3143 }, { "epoch": 0.5025173819228003, "grad_norm": 1.3155934233317208, "learning_rate": 1.7868197780977755e-05, "loss": 0.7546975612640381, "step": 3144 }, { "epoch": 0.5026772156956765, "grad_norm": 1.4431956947607611, "learning_rate": 1.786656650270166e-05, "loss": 0.7034422159194946, "step": 3145 }, { "epoch": 0.5028370494685527, "grad_norm": 1.5044985420590873, "learning_rate": 1.7864934675052494e-05, "loss": 0.6167211532592773, "step": 3146 }, { "epoch": 0.5029968832414289, "grad_norm": 1.1334271928157538, "learning_rate": 1.7863302298144218e-05, "loss": 0.47788500785827637, "step": 3147 }, { "epoch": 0.5031567170143051, "grad_norm": 1.427689030231786, "learning_rate": 1.7861669372090827e-05, "loss": 0.7448767423629761, "step": 3148 }, { "epoch": 0.5033165507871813, "grad_norm": 1.3045241712966773, "learning_rate": 1.7860035897006364e-05, "loss": 0.7636187076568604, "step": 3149 }, { "epoch": 0.5034763845600575, "grad_norm": 1.6827939960752278, "learning_rate": 1.7858401873004903e-05, "loss": 0.5818511247634888, "step": 3150 }, { "epoch": 0.5036362183329337, "grad_norm": 1.53707921826588, "learning_rate": 1.7856767300200556e-05, "loss": 0.7382160425186157, "step": 3151 }, { "epoch": 0.5037960521058099, "grad_norm": 1.88486199779671, "learning_rate": 1.785513217870748e-05, "loss": 0.6827539801597595, "step": 3152 }, { "epoch": 0.5039558858786861, "grad_norm": 1.6711093270517772, "learning_rate": 1.7853496508639866e-05, "loss": 0.6533536911010742, "step": 3153 }, { "epoch": 0.5041157196515623, "grad_norm": 1.3396191309469667, "learning_rate": 1.785186029011194e-05, "loss": 0.6997132301330566, "step": 3154 }, { "epoch": 0.5042755534244386, "grad_norm": 1.442263891062799, "learning_rate": 1.7850223523237974e-05, "loss": 0.7430293560028076, "step": 3155 }, { "epoch": 0.5044353871973148, "grad_norm": 1.2930028082559788, "learning_rate": 1.7848586208132267e-05, "loss": 0.7414283752441406, "step": 3156 }, { "epoch": 0.504595220970191, "grad_norm": 1.5373499501985688, "learning_rate": 1.784694834490917e-05, "loss": 0.6578136682510376, "step": 3157 }, { "epoch": 0.5047550547430673, "grad_norm": 1.3999558368239273, "learning_rate": 1.7845309933683067e-05, "loss": 0.6938832402229309, "step": 3158 }, { "epoch": 0.5049148885159435, "grad_norm": 1.29573243308192, "learning_rate": 1.7843670974568372e-05, "loss": 0.683109700679779, "step": 3159 }, { "epoch": 0.5050747222888197, "grad_norm": 1.5014169775361899, "learning_rate": 1.7842031467679543e-05, "loss": 0.7053946256637573, "step": 3160 }, { "epoch": 0.5052345560616959, "grad_norm": 1.2326913150991825, "learning_rate": 1.7840391413131086e-05, "loss": 0.5223598480224609, "step": 3161 }, { "epoch": 0.5053943898345721, "grad_norm": 1.381538978305615, "learning_rate": 1.783875081103753e-05, "loss": 0.6588343977928162, "step": 3162 }, { "epoch": 0.5055542236074483, "grad_norm": 1.4040338745888359, "learning_rate": 1.783710966151345e-05, "loss": 0.5939790606498718, "step": 3163 }, { "epoch": 0.5057140573803245, "grad_norm": 1.2895804094438559, "learning_rate": 1.7835467964673462e-05, "loss": 0.7445304989814758, "step": 3164 }, { "epoch": 0.5058738911532007, "grad_norm": 1.4556307032765805, "learning_rate": 1.783382572063221e-05, "loss": 0.6945794820785522, "step": 3165 }, { "epoch": 0.5060337249260769, "grad_norm": 1.467359383615599, "learning_rate": 1.783218292950439e-05, "loss": 0.6750667691230774, "step": 3166 }, { "epoch": 0.5061935586989531, "grad_norm": 1.3140672623324372, "learning_rate": 1.7830539591404717e-05, "loss": 0.652665376663208, "step": 3167 }, { "epoch": 0.5063533924718293, "grad_norm": 1.2422081469512638, "learning_rate": 1.782889570644797e-05, "loss": 0.5336638689041138, "step": 3168 }, { "epoch": 0.5065132262447055, "grad_norm": 1.2812465246111788, "learning_rate": 1.7827251274748944e-05, "loss": 0.5119317770004272, "step": 3169 }, { "epoch": 0.5066730600175817, "grad_norm": 1.4631512624679541, "learning_rate": 1.782560629642248e-05, "loss": 0.6360000371932983, "step": 3170 }, { "epoch": 0.5068328937904579, "grad_norm": 1.3019399712988682, "learning_rate": 1.7823960771583462e-05, "loss": 0.5664699077606201, "step": 3171 }, { "epoch": 0.5069927275633341, "grad_norm": 1.19959900601981, "learning_rate": 1.7822314700346802e-05, "loss": 0.520512580871582, "step": 3172 }, { "epoch": 0.5071525613362103, "grad_norm": 1.4375948592130277, "learning_rate": 1.7820668082827462e-05, "loss": 0.7747718095779419, "step": 3173 }, { "epoch": 0.5073123951090865, "grad_norm": 1.5250891591760514, "learning_rate": 1.781902091914043e-05, "loss": 0.6954092383384705, "step": 3174 }, { "epoch": 0.5074722288819628, "grad_norm": 1.430888591394074, "learning_rate": 1.781737320940074e-05, "loss": 0.571139931678772, "step": 3175 }, { "epoch": 0.507632062654839, "grad_norm": 1.191495569247351, "learning_rate": 1.781572495372346e-05, "loss": 0.5377719402313232, "step": 3176 }, { "epoch": 0.5077918964277152, "grad_norm": 1.5864377571750012, "learning_rate": 1.7814076152223703e-05, "loss": 0.7789922952651978, "step": 3177 }, { "epoch": 0.5079517302005914, "grad_norm": 1.5598617890160007, "learning_rate": 1.7812426805016617e-05, "loss": 0.8091435432434082, "step": 3178 }, { "epoch": 0.5081115639734676, "grad_norm": 1.4455719927742803, "learning_rate": 1.781077691221738e-05, "loss": 0.6660465002059937, "step": 3179 }, { "epoch": 0.5082713977463438, "grad_norm": 1.1992566899420438, "learning_rate": 1.780912647394122e-05, "loss": 0.5830725431442261, "step": 3180 }, { "epoch": 0.50843123151922, "grad_norm": 1.1982468812977856, "learning_rate": 1.780747549030339e-05, "loss": 0.5834197402000427, "step": 3181 }, { "epoch": 0.5085910652920962, "grad_norm": 1.3619547490267276, "learning_rate": 1.7805823961419198e-05, "loss": 0.5017741918563843, "step": 3182 }, { "epoch": 0.5087508990649724, "grad_norm": 1.678838143813201, "learning_rate": 1.7804171887403976e-05, "loss": 0.6586090326309204, "step": 3183 }, { "epoch": 0.5089107328378486, "grad_norm": 1.3058423331915012, "learning_rate": 1.78025192683731e-05, "loss": 0.5410352349281311, "step": 3184 }, { "epoch": 0.5090705666107248, "grad_norm": 1.5168276175065991, "learning_rate": 1.7800866104441986e-05, "loss": 0.6717417240142822, "step": 3185 }, { "epoch": 0.509230400383601, "grad_norm": 1.3563157615832304, "learning_rate": 1.7799212395726078e-05, "loss": 0.7121155261993408, "step": 3186 }, { "epoch": 0.5093902341564772, "grad_norm": 1.3137512795004302, "learning_rate": 1.779755814234087e-05, "loss": 0.6834805011749268, "step": 3187 }, { "epoch": 0.5095500679293534, "grad_norm": 1.1895454269878876, "learning_rate": 1.7795903344401894e-05, "loss": 0.5937683582305908, "step": 3188 }, { "epoch": 0.5097099017022296, "grad_norm": 1.5016310653881677, "learning_rate": 1.7794248002024705e-05, "loss": 0.6812021136283875, "step": 3189 }, { "epoch": 0.509869735475106, "grad_norm": 1.5555801384520787, "learning_rate": 1.7792592115324913e-05, "loss": 0.6981450319290161, "step": 3190 }, { "epoch": 0.5100295692479822, "grad_norm": 1.7478242477633807, "learning_rate": 1.7790935684418154e-05, "loss": 0.6011456847190857, "step": 3191 }, { "epoch": 0.5101894030208584, "grad_norm": 1.2466067007328183, "learning_rate": 1.7789278709420115e-05, "loss": 0.5536404252052307, "step": 3192 }, { "epoch": 0.5103492367937346, "grad_norm": 1.2888576154582565, "learning_rate": 1.7787621190446508e-05, "loss": 0.6185088157653809, "step": 3193 }, { "epoch": 0.5105090705666108, "grad_norm": 1.3148405720846124, "learning_rate": 1.7785963127613084e-05, "loss": 0.45622551441192627, "step": 3194 }, { "epoch": 0.510668904339487, "grad_norm": 1.4321196672403989, "learning_rate": 1.7784304521035647e-05, "loss": 0.7003923654556274, "step": 3195 }, { "epoch": 0.5108287381123632, "grad_norm": 1.1822852773515693, "learning_rate": 1.778264537083002e-05, "loss": 0.6465214490890503, "step": 3196 }, { "epoch": 0.5109885718852394, "grad_norm": 1.333987832762814, "learning_rate": 1.7780985677112074e-05, "loss": 0.7026268243789673, "step": 3197 }, { "epoch": 0.5111484056581156, "grad_norm": 1.3776184138969956, "learning_rate": 1.7779325439997718e-05, "loss": 0.647014856338501, "step": 3198 }, { "epoch": 0.5113082394309918, "grad_norm": 1.354872885619151, "learning_rate": 1.7777664659602895e-05, "loss": 0.6576491594314575, "step": 3199 }, { "epoch": 0.511468073203868, "grad_norm": 1.4547810323346746, "learning_rate": 1.7776003336043587e-05, "loss": 0.7452266216278076, "step": 3200 }, { "epoch": 0.5116279069767442, "grad_norm": 1.3516475080063548, "learning_rate": 1.777434146943582e-05, "loss": 0.7905550003051758, "step": 3201 }, { "epoch": 0.5117877407496204, "grad_norm": 1.7066697510673194, "learning_rate": 1.777267905989565e-05, "loss": 0.6065611839294434, "step": 3202 }, { "epoch": 0.5119475745224966, "grad_norm": 1.3029590275904208, "learning_rate": 1.777101610753917e-05, "loss": 0.5375442504882812, "step": 3203 }, { "epoch": 0.5121074082953728, "grad_norm": 1.5501762179216327, "learning_rate": 1.776935261248252e-05, "loss": 0.6167640686035156, "step": 3204 }, { "epoch": 0.512267242068249, "grad_norm": 1.3689863161333933, "learning_rate": 1.776768857484187e-05, "loss": 0.6838306784629822, "step": 3205 }, { "epoch": 0.5124270758411252, "grad_norm": 1.2088167416005147, "learning_rate": 1.776602399473343e-05, "loss": 0.583583414554596, "step": 3206 }, { "epoch": 0.5125869096140014, "grad_norm": 1.245508589222423, "learning_rate": 1.7764358872273452e-05, "loss": 0.6497099995613098, "step": 3207 }, { "epoch": 0.5127467433868776, "grad_norm": 1.2416913413713861, "learning_rate": 1.776269320757822e-05, "loss": 0.5672459006309509, "step": 3208 }, { "epoch": 0.5129065771597539, "grad_norm": 1.3152051933109588, "learning_rate": 1.7761027000764056e-05, "loss": 0.6592400074005127, "step": 3209 }, { "epoch": 0.5130664109326301, "grad_norm": 1.4041773882230035, "learning_rate": 1.7759360251947323e-05, "loss": 0.6737270355224609, "step": 3210 }, { "epoch": 0.5132262447055063, "grad_norm": 1.5027783598076323, "learning_rate": 1.7757692961244423e-05, "loss": 0.6559303998947144, "step": 3211 }, { "epoch": 0.5133860784783825, "grad_norm": 1.4185639822943754, "learning_rate": 1.7756025128771796e-05, "loss": 0.7576043605804443, "step": 3212 }, { "epoch": 0.5135459122512587, "grad_norm": 1.1936827051617764, "learning_rate": 1.775435675464591e-05, "loss": 0.633793830871582, "step": 3213 }, { "epoch": 0.5137057460241349, "grad_norm": 1.4484548039946499, "learning_rate": 1.7752687838983283e-05, "loss": 0.577100396156311, "step": 3214 }, { "epoch": 0.5138655797970111, "grad_norm": 1.4868524946369739, "learning_rate": 1.7751018381900463e-05, "loss": 0.7048342227935791, "step": 3215 }, { "epoch": 0.5140254135698873, "grad_norm": 1.1911004555601281, "learning_rate": 1.7749348383514043e-05, "loss": 0.5569266080856323, "step": 3216 }, { "epoch": 0.5141852473427635, "grad_norm": 1.3245537336815216, "learning_rate": 1.7747677843940647e-05, "loss": 0.6399272680282593, "step": 3217 }, { "epoch": 0.5143450811156397, "grad_norm": 1.459915565763467, "learning_rate": 1.7746006763296943e-05, "loss": 0.6081969738006592, "step": 3218 }, { "epoch": 0.5145049148885159, "grad_norm": 1.4612888485709565, "learning_rate": 1.7744335141699628e-05, "loss": 0.5492688417434692, "step": 3219 }, { "epoch": 0.5146647486613921, "grad_norm": 1.3484362917940846, "learning_rate": 1.7742662979265444e-05, "loss": 0.6020501852035522, "step": 3220 }, { "epoch": 0.5148245824342683, "grad_norm": 1.4614589494871424, "learning_rate": 1.7740990276111173e-05, "loss": 0.5829300880432129, "step": 3221 }, { "epoch": 0.5149844162071445, "grad_norm": 1.1713067408985465, "learning_rate": 1.7739317032353627e-05, "loss": 0.49546176195144653, "step": 3222 }, { "epoch": 0.5151442499800207, "grad_norm": 1.1785519610786972, "learning_rate": 1.7737643248109657e-05, "loss": 0.6345555186271667, "step": 3223 }, { "epoch": 0.5153040837528969, "grad_norm": 1.1328827591287938, "learning_rate": 1.773596892349616e-05, "loss": 0.5520418286323547, "step": 3224 }, { "epoch": 0.5154639175257731, "grad_norm": 1.3547782468317564, "learning_rate": 1.7734294058630063e-05, "loss": 0.8342475295066833, "step": 3225 }, { "epoch": 0.5156237512986495, "grad_norm": 1.3260805451649889, "learning_rate": 1.7732618653628328e-05, "loss": 0.6185263991355896, "step": 3226 }, { "epoch": 0.5157835850715257, "grad_norm": 1.2774888965303037, "learning_rate": 1.7730942708607965e-05, "loss": 0.6080664992332458, "step": 3227 }, { "epoch": 0.5159434188444019, "grad_norm": 1.362651426587485, "learning_rate": 1.7729266223686013e-05, "loss": 0.6986156702041626, "step": 3228 }, { "epoch": 0.5161032526172781, "grad_norm": 1.6072543315138557, "learning_rate": 1.7727589198979555e-05, "loss": 0.8076416254043579, "step": 3229 }, { "epoch": 0.5162630863901543, "grad_norm": 1.272298337171942, "learning_rate": 1.7725911634605703e-05, "loss": 0.6714143753051758, "step": 3230 }, { "epoch": 0.5164229201630305, "grad_norm": 1.322506758302793, "learning_rate": 1.7724233530681618e-05, "loss": 0.626609206199646, "step": 3231 }, { "epoch": 0.5165827539359067, "grad_norm": 1.4405162513490088, "learning_rate": 1.7722554887324488e-05, "loss": 0.7428673505783081, "step": 3232 }, { "epoch": 0.5167425877087829, "grad_norm": 1.449180935559793, "learning_rate": 1.7720875704651547e-05, "loss": 0.7130461931228638, "step": 3233 }, { "epoch": 0.5169024214816591, "grad_norm": 1.6639174286368055, "learning_rate": 1.771919598278006e-05, "loss": 0.6429048776626587, "step": 3234 }, { "epoch": 0.5170622552545353, "grad_norm": 1.3847495135791723, "learning_rate": 1.7717515721827338e-05, "loss": 0.6487271785736084, "step": 3235 }, { "epoch": 0.5172220890274115, "grad_norm": 1.63759557998442, "learning_rate": 1.7715834921910718e-05, "loss": 0.6311441659927368, "step": 3236 }, { "epoch": 0.5173819228002877, "grad_norm": 1.29168567099197, "learning_rate": 1.7714153583147583e-05, "loss": 0.5462524890899658, "step": 3237 }, { "epoch": 0.5175417565731639, "grad_norm": 1.232974445482713, "learning_rate": 1.7712471705655353e-05, "loss": 0.6371603608131409, "step": 3238 }, { "epoch": 0.5177015903460401, "grad_norm": 1.6468652912835935, "learning_rate": 1.7710789289551487e-05, "loss": 0.6159452199935913, "step": 3239 }, { "epoch": 0.5178614241189163, "grad_norm": 1.5357882480341247, "learning_rate": 1.7709106334953476e-05, "loss": 0.6512657403945923, "step": 3240 }, { "epoch": 0.5180212578917925, "grad_norm": 1.2786933485687624, "learning_rate": 1.7707422841978854e-05, "loss": 0.5051828026771545, "step": 3241 }, { "epoch": 0.5181810916646687, "grad_norm": 1.5073611746976099, "learning_rate": 1.770573881074518e-05, "loss": 0.6157774925231934, "step": 3242 }, { "epoch": 0.518340925437545, "grad_norm": 1.4456151196684477, "learning_rate": 1.7704054241370075e-05, "loss": 0.6846711039543152, "step": 3243 }, { "epoch": 0.5185007592104212, "grad_norm": 1.2974495967668125, "learning_rate": 1.7702369133971176e-05, "loss": 0.6475973725318909, "step": 3244 }, { "epoch": 0.5186605929832974, "grad_norm": 1.3183395831388558, "learning_rate": 1.7700683488666163e-05, "loss": 0.5515133142471313, "step": 3245 }, { "epoch": 0.5188204267561736, "grad_norm": 1.6098240556690073, "learning_rate": 1.7698997305572763e-05, "loss": 0.7055119872093201, "step": 3246 }, { "epoch": 0.5189802605290498, "grad_norm": 1.3077743552530645, "learning_rate": 1.7697310584808726e-05, "loss": 0.701392412185669, "step": 3247 }, { "epoch": 0.519140094301926, "grad_norm": 1.3077680129916973, "learning_rate": 1.7695623326491848e-05, "loss": 0.5657117366790771, "step": 3248 }, { "epoch": 0.5192999280748022, "grad_norm": 1.4368913341837615, "learning_rate": 1.7693935530739965e-05, "loss": 0.7945680618286133, "step": 3249 }, { "epoch": 0.5194597618476784, "grad_norm": 1.2681699325855122, "learning_rate": 1.7692247197670942e-05, "loss": 0.5998186469078064, "step": 3250 }, { "epoch": 0.5196195956205546, "grad_norm": 1.3972272170113151, "learning_rate": 1.7690558327402688e-05, "loss": 0.7524669170379639, "step": 3251 }, { "epoch": 0.5197794293934308, "grad_norm": 1.6043769526069824, "learning_rate": 1.768886892005315e-05, "loss": 0.613519549369812, "step": 3252 }, { "epoch": 0.519939263166307, "grad_norm": 1.550695288860151, "learning_rate": 1.7687178975740304e-05, "loss": 0.7307167053222656, "step": 3253 }, { "epoch": 0.5200990969391832, "grad_norm": 1.447639233789607, "learning_rate": 1.7685488494582178e-05, "loss": 0.6741597652435303, "step": 3254 }, { "epoch": 0.5202589307120594, "grad_norm": 1.4103047630006582, "learning_rate": 1.768379747669682e-05, "loss": 0.7266644239425659, "step": 3255 }, { "epoch": 0.5204187644849356, "grad_norm": 1.3281985636288276, "learning_rate": 1.7682105922202332e-05, "loss": 0.6262527108192444, "step": 3256 }, { "epoch": 0.5205785982578118, "grad_norm": 1.2661819817146458, "learning_rate": 1.7680413831216846e-05, "loss": 0.6470575332641602, "step": 3257 }, { "epoch": 0.520738432030688, "grad_norm": 1.489105373873125, "learning_rate": 1.7678721203858528e-05, "loss": 0.7582579851150513, "step": 3258 }, { "epoch": 0.5208982658035642, "grad_norm": 1.3620215068194674, "learning_rate": 1.767702804024558e-05, "loss": 0.5745124816894531, "step": 3259 }, { "epoch": 0.5210580995764404, "grad_norm": 1.272903679531772, "learning_rate": 1.7675334340496264e-05, "loss": 0.8358359336853027, "step": 3260 }, { "epoch": 0.5212179333493168, "grad_norm": 1.2934234918657044, "learning_rate": 1.7673640104728843e-05, "loss": 0.5052933096885681, "step": 3261 }, { "epoch": 0.521377767122193, "grad_norm": 2.299908002810897, "learning_rate": 1.7671945333061648e-05, "loss": 0.5665205717086792, "step": 3262 }, { "epoch": 0.5215376008950692, "grad_norm": 1.489269941042231, "learning_rate": 1.767025002561303e-05, "loss": 0.7217590808868408, "step": 3263 }, { "epoch": 0.5216974346679454, "grad_norm": 1.4178567025418316, "learning_rate": 1.7668554182501386e-05, "loss": 0.6951470375061035, "step": 3264 }, { "epoch": 0.5218572684408216, "grad_norm": 1.427324711968295, "learning_rate": 1.7666857803845147e-05, "loss": 0.6010559797286987, "step": 3265 }, { "epoch": 0.5220171022136978, "grad_norm": 1.1786524501583175, "learning_rate": 1.7665160889762783e-05, "loss": 0.6751329898834229, "step": 3266 }, { "epoch": 0.522176935986574, "grad_norm": 1.6818384881223343, "learning_rate": 1.7663463440372795e-05, "loss": 0.7021758556365967, "step": 3267 }, { "epoch": 0.5223367697594502, "grad_norm": 1.3339857615204829, "learning_rate": 1.7661765455793737e-05, "loss": 0.6408154368400574, "step": 3268 }, { "epoch": 0.5224966035323264, "grad_norm": 1.3332122054563253, "learning_rate": 1.7660066936144185e-05, "loss": 0.6504801511764526, "step": 3269 }, { "epoch": 0.5226564373052026, "grad_norm": 1.3916989347401838, "learning_rate": 1.7658367881542757e-05, "loss": 0.5902675986289978, "step": 3270 }, { "epoch": 0.5228162710780788, "grad_norm": 1.3241632770340408, "learning_rate": 1.7656668292108103e-05, "loss": 0.7941468358039856, "step": 3271 }, { "epoch": 0.522976104850955, "grad_norm": 1.4593121568583427, "learning_rate": 1.765496816795893e-05, "loss": 0.8145009875297546, "step": 3272 }, { "epoch": 0.5231359386238312, "grad_norm": 1.2908298472992323, "learning_rate": 1.7653267509213957e-05, "loss": 0.5945425629615784, "step": 3273 }, { "epoch": 0.5232957723967074, "grad_norm": 1.4250304272672796, "learning_rate": 1.7651566315991956e-05, "loss": 0.6065115332603455, "step": 3274 }, { "epoch": 0.5234556061695836, "grad_norm": 1.2574046017184664, "learning_rate": 1.7649864588411735e-05, "loss": 0.5951169729232788, "step": 3275 }, { "epoch": 0.5236154399424598, "grad_norm": 1.56604846741523, "learning_rate": 1.764816232659213e-05, "loss": 0.8135693073272705, "step": 3276 }, { "epoch": 0.523775273715336, "grad_norm": 1.2966748634411633, "learning_rate": 1.764645953065203e-05, "loss": 0.7698768377304077, "step": 3277 }, { "epoch": 0.5239351074882123, "grad_norm": 1.3175238922458872, "learning_rate": 1.7644756200710343e-05, "loss": 0.5965021252632141, "step": 3278 }, { "epoch": 0.5240949412610885, "grad_norm": 1.4480147654738944, "learning_rate": 1.764305233688603e-05, "loss": 0.6528807878494263, "step": 3279 }, { "epoch": 0.5242547750339647, "grad_norm": 1.2298338208707973, "learning_rate": 1.764134793929808e-05, "loss": 0.5905325412750244, "step": 3280 }, { "epoch": 0.5244146088068409, "grad_norm": 1.7112221191887411, "learning_rate": 1.7639643008065523e-05, "loss": 0.73473060131073, "step": 3281 }, { "epoch": 0.5245744425797171, "grad_norm": 1.1910652480420212, "learning_rate": 1.763793754330742e-05, "loss": 0.5569298267364502, "step": 3282 }, { "epoch": 0.5247342763525933, "grad_norm": 1.4444893477498928, "learning_rate": 1.7636231545142887e-05, "loss": 0.6367961764335632, "step": 3283 }, { "epoch": 0.5248941101254695, "grad_norm": 1.4540683031236725, "learning_rate": 1.763452501369105e-05, "loss": 0.684553861618042, "step": 3284 }, { "epoch": 0.5250539438983457, "grad_norm": 1.426873163495008, "learning_rate": 1.7632817949071098e-05, "loss": 0.7482527494430542, "step": 3285 }, { "epoch": 0.5252137776712219, "grad_norm": 1.2687923249280464, "learning_rate": 1.7631110351402245e-05, "loss": 0.610958456993103, "step": 3286 }, { "epoch": 0.5253736114440981, "grad_norm": 1.0404327613635465, "learning_rate": 1.762940222080374e-05, "loss": 0.42904213070869446, "step": 3287 }, { "epoch": 0.5255334452169743, "grad_norm": 1.210335484712458, "learning_rate": 1.762769355739487e-05, "loss": 0.5877847075462341, "step": 3288 }, { "epoch": 0.5256932789898505, "grad_norm": 1.1885601418557896, "learning_rate": 1.762598436129497e-05, "loss": 0.6796174049377441, "step": 3289 }, { "epoch": 0.5258531127627267, "grad_norm": 1.3547877223728424, "learning_rate": 1.7624274632623398e-05, "loss": 0.7612389326095581, "step": 3290 }, { "epoch": 0.5260129465356029, "grad_norm": 1.183764131947902, "learning_rate": 1.762256437149956e-05, "loss": 0.7209995985031128, "step": 3291 }, { "epoch": 0.5261727803084791, "grad_norm": 1.3637191939006614, "learning_rate": 1.762085357804289e-05, "loss": 0.7271910905838013, "step": 3292 }, { "epoch": 0.5263326140813553, "grad_norm": 1.479971542753672, "learning_rate": 1.7619142252372867e-05, "loss": 0.6946596503257751, "step": 3293 }, { "epoch": 0.5264924478542315, "grad_norm": 1.4162045466903288, "learning_rate": 1.7617430394609005e-05, "loss": 0.642956554889679, "step": 3294 }, { "epoch": 0.5266522816271078, "grad_norm": 1.2495657044422004, "learning_rate": 1.7615718004870847e-05, "loss": 0.6830593347549438, "step": 3295 }, { "epoch": 0.5268121153999841, "grad_norm": 1.3915878961931767, "learning_rate": 1.761400508327799e-05, "loss": 0.6635298728942871, "step": 3296 }, { "epoch": 0.5269719491728603, "grad_norm": 1.5513716130903126, "learning_rate": 1.761229162995005e-05, "loss": 0.669110894203186, "step": 3297 }, { "epoch": 0.5271317829457365, "grad_norm": 1.3734248098059971, "learning_rate": 1.7610577645006693e-05, "loss": 0.7726343870162964, "step": 3298 }, { "epoch": 0.5272916167186127, "grad_norm": 1.1602432533479712, "learning_rate": 1.7608863128567616e-05, "loss": 0.5453744530677795, "step": 3299 }, { "epoch": 0.5274514504914889, "grad_norm": 1.371509172921011, "learning_rate": 1.7607148080752558e-05, "loss": 0.6692403554916382, "step": 3300 }, { "epoch": 0.5276112842643651, "grad_norm": 1.2243084882417052, "learning_rate": 1.7605432501681287e-05, "loss": 0.6950448751449585, "step": 3301 }, { "epoch": 0.5277711180372413, "grad_norm": 1.4054440461657485, "learning_rate": 1.7603716391473616e-05, "loss": 0.6826663017272949, "step": 3302 }, { "epoch": 0.5279309518101175, "grad_norm": 1.3181877837143319, "learning_rate": 1.7601999750249393e-05, "loss": 0.6931796073913574, "step": 3303 }, { "epoch": 0.5280907855829937, "grad_norm": 1.5670421654696245, "learning_rate": 1.7600282578128497e-05, "loss": 0.7244482636451721, "step": 3304 }, { "epoch": 0.5282506193558699, "grad_norm": 1.3619599290912425, "learning_rate": 1.7598564875230853e-05, "loss": 0.6238716840744019, "step": 3305 }, { "epoch": 0.5284104531287461, "grad_norm": 1.3446856606238486, "learning_rate": 1.759684664167642e-05, "loss": 0.6578810811042786, "step": 3306 }, { "epoch": 0.5285702869016223, "grad_norm": 1.6432251852318585, "learning_rate": 1.759512787758519e-05, "loss": 0.5966111421585083, "step": 3307 }, { "epoch": 0.5287301206744985, "grad_norm": 1.6185036907768704, "learning_rate": 1.75934085830772e-05, "loss": 0.7427636384963989, "step": 3308 }, { "epoch": 0.5288899544473747, "grad_norm": 1.2822148741852384, "learning_rate": 1.7591688758272517e-05, "loss": 0.5910412073135376, "step": 3309 }, { "epoch": 0.529049788220251, "grad_norm": 1.3101254980478012, "learning_rate": 1.7589968403291243e-05, "loss": 0.6487846374511719, "step": 3310 }, { "epoch": 0.5292096219931272, "grad_norm": 1.2984147414933178, "learning_rate": 1.7588247518253528e-05, "loss": 0.546601414680481, "step": 3311 }, { "epoch": 0.5293694557660034, "grad_norm": 1.413477987751073, "learning_rate": 1.7586526103279557e-05, "loss": 0.7769111394882202, "step": 3312 }, { "epoch": 0.5295292895388796, "grad_norm": 1.7661158051568162, "learning_rate": 1.7584804158489533e-05, "loss": 0.8681553602218628, "step": 3313 }, { "epoch": 0.5296891233117558, "grad_norm": 1.1598356957393114, "learning_rate": 1.758308168400372e-05, "loss": 0.6436805725097656, "step": 3314 }, { "epoch": 0.529848957084632, "grad_norm": 1.3059097543241165, "learning_rate": 1.758135867994241e-05, "loss": 0.5731608867645264, "step": 3315 }, { "epoch": 0.5300087908575082, "grad_norm": 1.4436504245189778, "learning_rate": 1.7579635146425928e-05, "loss": 0.5824544429779053, "step": 3316 }, { "epoch": 0.5301686246303844, "grad_norm": 1.4212257907563661, "learning_rate": 1.757791108357464e-05, "loss": 0.7957141399383545, "step": 3317 }, { "epoch": 0.5303284584032606, "grad_norm": 1.2353009298221298, "learning_rate": 1.7576186491508948e-05, "loss": 0.6245888471603394, "step": 3318 }, { "epoch": 0.5304882921761368, "grad_norm": 1.5003641718519474, "learning_rate": 1.7574461370349296e-05, "loss": 0.6394588351249695, "step": 3319 }, { "epoch": 0.530648125949013, "grad_norm": 1.2231696700835435, "learning_rate": 1.7572735720216158e-05, "loss": 0.5519759654998779, "step": 3320 }, { "epoch": 0.5308079597218892, "grad_norm": 1.4291970494927355, "learning_rate": 1.757100954123004e-05, "loss": 0.6601712703704834, "step": 3321 }, { "epoch": 0.5309677934947654, "grad_norm": 1.4653465928484912, "learning_rate": 1.7569282833511505e-05, "loss": 0.6489498615264893, "step": 3322 }, { "epoch": 0.5311276272676416, "grad_norm": 2.0215978287509015, "learning_rate": 1.756755559718113e-05, "loss": 0.562728762626648, "step": 3323 }, { "epoch": 0.5312874610405178, "grad_norm": 1.2793504992737572, "learning_rate": 1.7565827832359542e-05, "loss": 0.5716842412948608, "step": 3324 }, { "epoch": 0.531447294813394, "grad_norm": 1.3229810063218619, "learning_rate": 1.7564099539167405e-05, "loss": 0.7035822868347168, "step": 3325 }, { "epoch": 0.5316071285862702, "grad_norm": 1.4929647497047378, "learning_rate": 1.7562370717725414e-05, "loss": 0.7277442216873169, "step": 3326 }, { "epoch": 0.5317669623591464, "grad_norm": 1.3691284155412824, "learning_rate": 1.7560641368154307e-05, "loss": 0.6963745355606079, "step": 3327 }, { "epoch": 0.5319267961320227, "grad_norm": 1.3571935805991286, "learning_rate": 1.7558911490574848e-05, "loss": 0.6908401250839233, "step": 3328 }, { "epoch": 0.5320866299048989, "grad_norm": 1.2408054268401418, "learning_rate": 1.755718108510785e-05, "loss": 0.7480169534683228, "step": 3329 }, { "epoch": 0.5322464636777751, "grad_norm": 1.1981745803035002, "learning_rate": 1.7555450151874164e-05, "loss": 0.6062700748443604, "step": 3330 }, { "epoch": 0.5324062974506514, "grad_norm": 1.4800494271478133, "learning_rate": 1.7553718690994662e-05, "loss": 0.6734371781349182, "step": 3331 }, { "epoch": 0.5325661312235276, "grad_norm": 1.2407583757126064, "learning_rate": 1.755198670259027e-05, "loss": 0.644174337387085, "step": 3332 }, { "epoch": 0.5327259649964038, "grad_norm": 1.2877228260709173, "learning_rate": 1.7550254186781944e-05, "loss": 0.7751634120941162, "step": 3333 }, { "epoch": 0.53288579876928, "grad_norm": 1.263139207587124, "learning_rate": 1.754852114369067e-05, "loss": 0.7172766923904419, "step": 3334 }, { "epoch": 0.5330456325421562, "grad_norm": 1.0945023502403224, "learning_rate": 1.7546787573437484e-05, "loss": 0.4920363426208496, "step": 3335 }, { "epoch": 0.5332054663150324, "grad_norm": 1.1479365149031404, "learning_rate": 1.7545053476143455e-05, "loss": 0.6753175854682922, "step": 3336 }, { "epoch": 0.5333653000879086, "grad_norm": 1.580442775269292, "learning_rate": 1.7543318851929676e-05, "loss": 0.6575832366943359, "step": 3337 }, { "epoch": 0.5335251338607848, "grad_norm": 1.237458215887583, "learning_rate": 1.7541583700917296e-05, "loss": 0.6568933129310608, "step": 3338 }, { "epoch": 0.533684967633661, "grad_norm": 1.3968808638147499, "learning_rate": 1.753984802322749e-05, "loss": 0.7037683129310608, "step": 3339 }, { "epoch": 0.5338448014065372, "grad_norm": 1.4472497784168104, "learning_rate": 1.7538111818981467e-05, "loss": 0.8107818365097046, "step": 3340 }, { "epoch": 0.5340046351794134, "grad_norm": 1.199086318622139, "learning_rate": 1.753637508830048e-05, "loss": 0.4793219566345215, "step": 3341 }, { "epoch": 0.5341644689522896, "grad_norm": 1.3720068059579142, "learning_rate": 1.753463783130582e-05, "loss": 0.7971971035003662, "step": 3342 }, { "epoch": 0.5343243027251658, "grad_norm": 1.478961739548071, "learning_rate": 1.7532900048118806e-05, "loss": 0.7936440110206604, "step": 3343 }, { "epoch": 0.534484136498042, "grad_norm": 1.1887075178394362, "learning_rate": 1.75311617388608e-05, "loss": 0.6231493949890137, "step": 3344 }, { "epoch": 0.5346439702709183, "grad_norm": 1.1207191662856868, "learning_rate": 1.75294229036532e-05, "loss": 0.4952911138534546, "step": 3345 }, { "epoch": 0.5348038040437945, "grad_norm": 1.4019157564964686, "learning_rate": 1.7527683542617436e-05, "loss": 0.751960039138794, "step": 3346 }, { "epoch": 0.5349636378166707, "grad_norm": 1.3991488106262777, "learning_rate": 1.7525943655874987e-05, "loss": 0.6539026498794556, "step": 3347 }, { "epoch": 0.5351234715895469, "grad_norm": 1.3646008742270415, "learning_rate": 1.7524203243547355e-05, "loss": 0.6049796342849731, "step": 3348 }, { "epoch": 0.5352833053624231, "grad_norm": 1.3029472792108274, "learning_rate": 1.7522462305756085e-05, "loss": 0.6725252866744995, "step": 3349 }, { "epoch": 0.5354431391352993, "grad_norm": 1.2876772287676834, "learning_rate": 1.752072084262276e-05, "loss": 0.7403813600540161, "step": 3350 }, { "epoch": 0.5356029729081755, "grad_norm": 1.4067061207221228, "learning_rate": 1.751897885426899e-05, "loss": 0.7315483093261719, "step": 3351 }, { "epoch": 0.5357628066810517, "grad_norm": 1.2299800140895676, "learning_rate": 1.751723634081644e-05, "loss": 0.574153482913971, "step": 3352 }, { "epoch": 0.5359226404539279, "grad_norm": 1.4203135858158866, "learning_rate": 1.7515493302386794e-05, "loss": 0.6617644429206848, "step": 3353 }, { "epoch": 0.5360824742268041, "grad_norm": 1.417130155198386, "learning_rate": 1.7513749739101782e-05, "loss": 0.6722239255905151, "step": 3354 }, { "epoch": 0.5362423079996803, "grad_norm": 1.5294486791951336, "learning_rate": 1.7512005651083166e-05, "loss": 0.6003472805023193, "step": 3355 }, { "epoch": 0.5364021417725565, "grad_norm": 1.2233849606619516, "learning_rate": 1.7510261038452752e-05, "loss": 0.6062363982200623, "step": 3356 }, { "epoch": 0.5365619755454327, "grad_norm": 6.010743372191197, "learning_rate": 1.750851590133237e-05, "loss": 0.6991024017333984, "step": 3357 }, { "epoch": 0.5367218093183089, "grad_norm": 1.464121254349576, "learning_rate": 1.7506770239843902e-05, "loss": 0.7477253079414368, "step": 3358 }, { "epoch": 0.5368816430911851, "grad_norm": 1.197912948448875, "learning_rate": 1.7505024054109256e-05, "loss": 0.6052764058113098, "step": 3359 }, { "epoch": 0.5370414768640613, "grad_norm": 1.3426901887269995, "learning_rate": 1.7503277344250374e-05, "loss": 0.542432963848114, "step": 3360 }, { "epoch": 0.5372013106369375, "grad_norm": 1.664969297687115, "learning_rate": 1.7501530110389247e-05, "loss": 0.6719183921813965, "step": 3361 }, { "epoch": 0.5373611444098138, "grad_norm": 1.529691632092692, "learning_rate": 1.7499782352647892e-05, "loss": 0.6198438405990601, "step": 3362 }, { "epoch": 0.53752097818269, "grad_norm": 1.3384051910204835, "learning_rate": 1.7498034071148366e-05, "loss": 0.6036723852157593, "step": 3363 }, { "epoch": 0.5376808119555662, "grad_norm": 1.3681487351225858, "learning_rate": 1.749628526601277e-05, "loss": 0.5731211304664612, "step": 3364 }, { "epoch": 0.5378406457284424, "grad_norm": 1.4859620712106734, "learning_rate": 1.7494535937363223e-05, "loss": 0.8325076103210449, "step": 3365 }, { "epoch": 0.5380004795013186, "grad_norm": 1.4376450159412244, "learning_rate": 1.74927860853219e-05, "loss": 0.7335789799690247, "step": 3366 }, { "epoch": 0.5381603132741949, "grad_norm": 1.3181604511045315, "learning_rate": 1.7491035710011e-05, "loss": 0.6077995300292969, "step": 3367 }, { "epoch": 0.5383201470470711, "grad_norm": 1.5013218183266233, "learning_rate": 1.7489284811552768e-05, "loss": 0.65059894323349, "step": 3368 }, { "epoch": 0.5384799808199473, "grad_norm": 1.2798148744255384, "learning_rate": 1.7487533390069475e-05, "loss": 0.5000290870666504, "step": 3369 }, { "epoch": 0.5386398145928235, "grad_norm": 1.4854968854450565, "learning_rate": 1.7485781445683437e-05, "loss": 0.5750426054000854, "step": 3370 }, { "epoch": 0.5387996483656997, "grad_norm": 1.1761666645014384, "learning_rate": 1.7484028978517e-05, "loss": 0.668907880783081, "step": 3371 }, { "epoch": 0.5389594821385759, "grad_norm": 1.3858989917626876, "learning_rate": 1.7482275988692562e-05, "loss": 0.6698299646377563, "step": 3372 }, { "epoch": 0.5391193159114521, "grad_norm": 1.2122134417698016, "learning_rate": 1.748052247633253e-05, "loss": 0.581870436668396, "step": 3373 }, { "epoch": 0.5392791496843283, "grad_norm": 1.8477862325121248, "learning_rate": 1.747876844155937e-05, "loss": 0.6086711287498474, "step": 3374 }, { "epoch": 0.5394389834572045, "grad_norm": 1.5295588486647447, "learning_rate": 1.7477013884495576e-05, "loss": 0.6948727369308472, "step": 3375 }, { "epoch": 0.5395988172300807, "grad_norm": 1.2706848891754488, "learning_rate": 1.7475258805263683e-05, "loss": 0.6293987035751343, "step": 3376 }, { "epoch": 0.539758651002957, "grad_norm": 1.2561327428309437, "learning_rate": 1.747350320398626e-05, "loss": 0.5806186199188232, "step": 3377 }, { "epoch": 0.5399184847758332, "grad_norm": 1.3209844643036004, "learning_rate": 1.747174708078591e-05, "loss": 0.7810990810394287, "step": 3378 }, { "epoch": 0.5400783185487094, "grad_norm": 1.3894528233108032, "learning_rate": 1.746999043578527e-05, "loss": 0.67838054895401, "step": 3379 }, { "epoch": 0.5402381523215856, "grad_norm": 1.2422236373611033, "learning_rate": 1.7468233269107026e-05, "loss": 0.6119016408920288, "step": 3380 }, { "epoch": 0.5403979860944618, "grad_norm": 1.314478334775138, "learning_rate": 1.746647558087389e-05, "loss": 0.6675276756286621, "step": 3381 }, { "epoch": 0.540557819867338, "grad_norm": 1.27001969563391, "learning_rate": 1.7464717371208607e-05, "loss": 0.5633958578109741, "step": 3382 }, { "epoch": 0.5407176536402142, "grad_norm": 1.2956696631320026, "learning_rate": 1.7462958640233967e-05, "loss": 0.7714853286743164, "step": 3383 }, { "epoch": 0.5408774874130904, "grad_norm": 1.3023698565971098, "learning_rate": 1.74611993880728e-05, "loss": 0.6280545592308044, "step": 3384 }, { "epoch": 0.5410373211859666, "grad_norm": 1.2917450538258466, "learning_rate": 1.7459439614847956e-05, "loss": 0.6293779015541077, "step": 3385 }, { "epoch": 0.5411971549588428, "grad_norm": 1.5864542866951408, "learning_rate": 1.7457679320682337e-05, "loss": 0.7802628874778748, "step": 3386 }, { "epoch": 0.541356988731719, "grad_norm": 2.21122903017392, "learning_rate": 1.7455918505698876e-05, "loss": 0.6146061420440674, "step": 3387 }, { "epoch": 0.5415168225045952, "grad_norm": 1.292499550291871, "learning_rate": 1.7454157170020538e-05, "loss": 0.7111290693283081, "step": 3388 }, { "epoch": 0.5416766562774714, "grad_norm": 1.3397784563243105, "learning_rate": 1.7452395313770333e-05, "loss": 0.6446300745010376, "step": 3389 }, { "epoch": 0.5418364900503476, "grad_norm": 1.286789900828431, "learning_rate": 1.7450632937071302e-05, "loss": 0.7198811769485474, "step": 3390 }, { "epoch": 0.5419963238232238, "grad_norm": 1.3478193696056893, "learning_rate": 1.744887004004652e-05, "loss": 0.6046252846717834, "step": 3391 }, { "epoch": 0.5421561575961, "grad_norm": 1.3732083651546427, "learning_rate": 1.7447106622819105e-05, "loss": 0.6406614780426025, "step": 3392 }, { "epoch": 0.5423159913689762, "grad_norm": 1.0928194758009422, "learning_rate": 1.7445342685512206e-05, "loss": 0.5384916067123413, "step": 3393 }, { "epoch": 0.5424758251418524, "grad_norm": 1.267022352992181, "learning_rate": 1.7443578228249013e-05, "loss": 0.695517897605896, "step": 3394 }, { "epoch": 0.5426356589147286, "grad_norm": 1.2909958655296134, "learning_rate": 1.744181325115274e-05, "loss": 0.6857254505157471, "step": 3395 }, { "epoch": 0.5427954926876049, "grad_norm": 1.3936483273861349, "learning_rate": 1.744004775434666e-05, "loss": 0.725933313369751, "step": 3396 }, { "epoch": 0.5429553264604811, "grad_norm": 1.4186796721883126, "learning_rate": 1.743828173795406e-05, "loss": 0.6600837111473083, "step": 3397 }, { "epoch": 0.5431151602333573, "grad_norm": 1.174719572927995, "learning_rate": 1.7436515202098276e-05, "loss": 0.6589547395706177, "step": 3398 }, { "epoch": 0.5432749940062335, "grad_norm": 1.3551240503233322, "learning_rate": 1.7434748146902675e-05, "loss": 0.6393237709999084, "step": 3399 }, { "epoch": 0.5434348277791097, "grad_norm": 1.5692908475517386, "learning_rate": 1.7432980572490663e-05, "loss": 0.5526387691497803, "step": 3400 }, { "epoch": 0.5435946615519859, "grad_norm": 1.452836334165317, "learning_rate": 1.743121247898568e-05, "loss": 0.7952708601951599, "step": 3401 }, { "epoch": 0.5437544953248622, "grad_norm": 1.3713306776976668, "learning_rate": 1.7429443866511206e-05, "loss": 0.5972472429275513, "step": 3402 }, { "epoch": 0.5439143290977384, "grad_norm": 1.6093084374357574, "learning_rate": 1.742767473519075e-05, "loss": 0.747690737247467, "step": 3403 }, { "epoch": 0.5440741628706146, "grad_norm": 1.2714765677678694, "learning_rate": 1.7425905085147864e-05, "loss": 0.7437419295310974, "step": 3404 }, { "epoch": 0.5442339966434908, "grad_norm": 1.548453610348173, "learning_rate": 1.7424134916506133e-05, "loss": 0.804347038269043, "step": 3405 }, { "epoch": 0.544393830416367, "grad_norm": 1.48476945694663, "learning_rate": 1.7422364229389183e-05, "loss": 0.5838221311569214, "step": 3406 }, { "epoch": 0.5445536641892432, "grad_norm": 1.3418993003712698, "learning_rate": 1.7420593023920673e-05, "loss": 0.504860520362854, "step": 3407 }, { "epoch": 0.5447134979621194, "grad_norm": 1.1965654920184563, "learning_rate": 1.741882130022429e-05, "loss": 0.5379858016967773, "step": 3408 }, { "epoch": 0.5448733317349956, "grad_norm": 1.1881574203388792, "learning_rate": 1.7417049058423772e-05, "loss": 0.5829296112060547, "step": 3409 }, { "epoch": 0.5450331655078718, "grad_norm": 1.4509807564315778, "learning_rate": 1.7415276298642885e-05, "loss": 0.8727485537528992, "step": 3410 }, { "epoch": 0.545192999280748, "grad_norm": 1.3509423863635275, "learning_rate": 1.741350302100543e-05, "loss": 0.691536545753479, "step": 3411 }, { "epoch": 0.5453528330536243, "grad_norm": 1.426492616432482, "learning_rate": 1.741172922563525e-05, "loss": 0.7383873462677002, "step": 3412 }, { "epoch": 0.5455126668265005, "grad_norm": 1.4752075584936128, "learning_rate": 1.7409954912656213e-05, "loss": 0.6902316808700562, "step": 3413 }, { "epoch": 0.5456725005993767, "grad_norm": 1.1558016953260544, "learning_rate": 1.740818008219224e-05, "loss": 0.5410501956939697, "step": 3414 }, { "epoch": 0.5458323343722529, "grad_norm": 1.2829867342536585, "learning_rate": 1.7406404734367273e-05, "loss": 0.6669406294822693, "step": 3415 }, { "epoch": 0.5459921681451291, "grad_norm": 1.4930789838636922, "learning_rate": 1.74046288693053e-05, "loss": 0.716016948223114, "step": 3416 }, { "epoch": 0.5461520019180053, "grad_norm": 1.212001015534178, "learning_rate": 1.7402852487130336e-05, "loss": 0.658932089805603, "step": 3417 }, { "epoch": 0.5463118356908815, "grad_norm": 1.3985858613864268, "learning_rate": 1.740107558796644e-05, "loss": 0.6697168350219727, "step": 3418 }, { "epoch": 0.5464716694637577, "grad_norm": 1.2976218185201593, "learning_rate": 1.7399298171937706e-05, "loss": 0.6561987400054932, "step": 3419 }, { "epoch": 0.5466315032366339, "grad_norm": 1.4702571729977143, "learning_rate": 1.739752023916826e-05, "loss": 0.7027947902679443, "step": 3420 }, { "epoch": 0.5467913370095101, "grad_norm": 1.052365438223427, "learning_rate": 1.7395741789782267e-05, "loss": 0.5118316411972046, "step": 3421 }, { "epoch": 0.5469511707823863, "grad_norm": 1.2898076121185416, "learning_rate": 1.7393962823903926e-05, "loss": 0.49246740341186523, "step": 3422 }, { "epoch": 0.5471110045552625, "grad_norm": 1.3502846205046841, "learning_rate": 1.739218334165748e-05, "loss": 0.7547677755355835, "step": 3423 }, { "epoch": 0.5472708383281387, "grad_norm": 1.6089256864057508, "learning_rate": 1.7390403343167196e-05, "loss": 0.674443244934082, "step": 3424 }, { "epoch": 0.5474306721010149, "grad_norm": 1.5418823178599064, "learning_rate": 1.738862282855738e-05, "loss": 0.6398800611495972, "step": 3425 }, { "epoch": 0.5475905058738911, "grad_norm": 1.1942459592157262, "learning_rate": 1.7386841797952385e-05, "loss": 0.5644793510437012, "step": 3426 }, { "epoch": 0.5477503396467673, "grad_norm": 1.3247656299667219, "learning_rate": 1.738506025147659e-05, "loss": 0.5351995229721069, "step": 3427 }, { "epoch": 0.5479101734196435, "grad_norm": 1.5946912076139828, "learning_rate": 1.7383278189254404e-05, "loss": 0.6356407403945923, "step": 3428 }, { "epoch": 0.5480700071925197, "grad_norm": 1.8106104256659799, "learning_rate": 1.7381495611410288e-05, "loss": 0.7876839637756348, "step": 3429 }, { "epoch": 0.548229840965396, "grad_norm": 1.4480288195615008, "learning_rate": 1.737971251806873e-05, "loss": 0.7970958948135376, "step": 3430 }, { "epoch": 0.5483896747382722, "grad_norm": 1.472946758830749, "learning_rate": 1.737792890935425e-05, "loss": 0.6418031454086304, "step": 3431 }, { "epoch": 0.5485495085111484, "grad_norm": 1.3196599148995796, "learning_rate": 1.7376144785391417e-05, "loss": 0.7742650508880615, "step": 3432 }, { "epoch": 0.5487093422840246, "grad_norm": 1.3054140204707112, "learning_rate": 1.7374360146304818e-05, "loss": 0.6273850202560425, "step": 3433 }, { "epoch": 0.5488691760569008, "grad_norm": 1.3138520474420317, "learning_rate": 1.7372574992219093e-05, "loss": 0.7114772796630859, "step": 3434 }, { "epoch": 0.549029009829777, "grad_norm": 1.4881813674965498, "learning_rate": 1.7370789323258906e-05, "loss": 0.6522894501686096, "step": 3435 }, { "epoch": 0.5491888436026532, "grad_norm": 1.3611941988787504, "learning_rate": 1.7369003139548967e-05, "loss": 0.5566262006759644, "step": 3436 }, { "epoch": 0.5493486773755295, "grad_norm": 1.316177971195993, "learning_rate": 1.7367216441214017e-05, "loss": 0.6014896035194397, "step": 3437 }, { "epoch": 0.5495085111484057, "grad_norm": 1.602045187108803, "learning_rate": 1.7365429228378827e-05, "loss": 0.6998291015625, "step": 3438 }, { "epoch": 0.5496683449212819, "grad_norm": 1.305642764618903, "learning_rate": 1.7363641501168212e-05, "loss": 0.6735547780990601, "step": 3439 }, { "epoch": 0.5498281786941581, "grad_norm": 1.317886506287957, "learning_rate": 1.7361853259707026e-05, "loss": 0.6518927812576294, "step": 3440 }, { "epoch": 0.5499880124670343, "grad_norm": 1.1557259795617782, "learning_rate": 1.736006450412014e-05, "loss": 0.476670503616333, "step": 3441 }, { "epoch": 0.5501478462399105, "grad_norm": 1.2569278682821943, "learning_rate": 1.7358275234532487e-05, "loss": 0.6387946605682373, "step": 3442 }, { "epoch": 0.5503076800127867, "grad_norm": 1.3584321413621867, "learning_rate": 1.7356485451069023e-05, "loss": 0.6964125633239746, "step": 3443 }, { "epoch": 0.5504675137856629, "grad_norm": 1.3430704195009744, "learning_rate": 1.7354695153854734e-05, "loss": 0.6153122782707214, "step": 3444 }, { "epoch": 0.5506273475585391, "grad_norm": 1.2946503376402858, "learning_rate": 1.7352904343014647e-05, "loss": 0.6061907410621643, "step": 3445 }, { "epoch": 0.5507871813314154, "grad_norm": 1.6836581611587582, "learning_rate": 1.735111301867383e-05, "loss": 0.6930907964706421, "step": 3446 }, { "epoch": 0.5509470151042916, "grad_norm": 1.1153651702537901, "learning_rate": 1.7349321180957382e-05, "loss": 0.5682157874107361, "step": 3447 }, { "epoch": 0.5511068488771678, "grad_norm": 1.2682623202644594, "learning_rate": 1.734752882999044e-05, "loss": 0.592563271522522, "step": 3448 }, { "epoch": 0.551266682650044, "grad_norm": 1.4323057268832395, "learning_rate": 1.7345735965898173e-05, "loss": 0.752490758895874, "step": 3449 }, { "epoch": 0.5514265164229202, "grad_norm": 1.1907489419889525, "learning_rate": 1.734394258880579e-05, "loss": 0.6154223680496216, "step": 3450 }, { "epoch": 0.5515863501957964, "grad_norm": 1.468960494129003, "learning_rate": 1.7342148698838532e-05, "loss": 0.5817596912384033, "step": 3451 }, { "epoch": 0.5517461839686726, "grad_norm": 1.3696429875680611, "learning_rate": 1.734035429612168e-05, "loss": 0.6606818437576294, "step": 3452 }, { "epoch": 0.5519060177415488, "grad_norm": 1.1540399583518441, "learning_rate": 1.7338559380780546e-05, "loss": 0.603647768497467, "step": 3453 }, { "epoch": 0.552065851514425, "grad_norm": 1.3522088083524348, "learning_rate": 1.7336763952940484e-05, "loss": 0.5471448302268982, "step": 3454 }, { "epoch": 0.5522256852873012, "grad_norm": 1.3454013384601442, "learning_rate": 1.733496801272688e-05, "loss": 0.6743029952049255, "step": 3455 }, { "epoch": 0.5523855190601774, "grad_norm": 1.3004266563237523, "learning_rate": 1.7333171560265148e-05, "loss": 0.5831489562988281, "step": 3456 }, { "epoch": 0.5525453528330536, "grad_norm": 1.3348161023540237, "learning_rate": 1.733137459568076e-05, "loss": 0.710056483745575, "step": 3457 }, { "epoch": 0.5527051866059298, "grad_norm": 1.2113740281366014, "learning_rate": 1.73295771190992e-05, "loss": 0.6228444576263428, "step": 3458 }, { "epoch": 0.552865020378806, "grad_norm": 1.326512452032384, "learning_rate": 1.7327779130645995e-05, "loss": 0.8248697519302368, "step": 3459 }, { "epoch": 0.5530248541516822, "grad_norm": 1.6775982621244807, "learning_rate": 1.7325980630446723e-05, "loss": 0.5318511128425598, "step": 3460 }, { "epoch": 0.5531846879245584, "grad_norm": 1.461945349436733, "learning_rate": 1.7324181618626973e-05, "loss": 0.7420292496681213, "step": 3461 }, { "epoch": 0.5533445216974346, "grad_norm": 1.2774091114558919, "learning_rate": 1.7322382095312382e-05, "loss": 0.6993167400360107, "step": 3462 }, { "epoch": 0.5535043554703108, "grad_norm": 1.4128067537665676, "learning_rate": 1.732058206062863e-05, "loss": 0.5338654518127441, "step": 3463 }, { "epoch": 0.553664189243187, "grad_norm": 1.5019637868870341, "learning_rate": 1.7318781514701416e-05, "loss": 0.7249306440353394, "step": 3464 }, { "epoch": 0.5538240230160633, "grad_norm": 1.2801421162380413, "learning_rate": 1.731698045765649e-05, "loss": 0.5482423305511475, "step": 3465 }, { "epoch": 0.5539838567889395, "grad_norm": 1.4009065607927793, "learning_rate": 1.731517888961963e-05, "loss": 0.7466360330581665, "step": 3466 }, { "epoch": 0.5541436905618157, "grad_norm": 1.2996576355438216, "learning_rate": 1.7313376810716654e-05, "loss": 0.6409533619880676, "step": 3467 }, { "epoch": 0.5543035243346919, "grad_norm": 1.212527765930779, "learning_rate": 1.7311574221073405e-05, "loss": 0.6362326145172119, "step": 3468 }, { "epoch": 0.5544633581075681, "grad_norm": 1.2416791376806955, "learning_rate": 1.7309771120815778e-05, "loss": 0.7277450561523438, "step": 3469 }, { "epoch": 0.5546231918804443, "grad_norm": 1.4515325534939256, "learning_rate": 1.7307967510069686e-05, "loss": 0.6530172824859619, "step": 3470 }, { "epoch": 0.5547830256533205, "grad_norm": 1.1893906312788276, "learning_rate": 1.7306163388961096e-05, "loss": 0.4833327531814575, "step": 3471 }, { "epoch": 0.5549428594261968, "grad_norm": 1.2976928794750917, "learning_rate": 1.7304358757615997e-05, "loss": 0.6254940032958984, "step": 3472 }, { "epoch": 0.555102693199073, "grad_norm": 1.336461485683396, "learning_rate": 1.7302553616160417e-05, "loss": 0.7139802575111389, "step": 3473 }, { "epoch": 0.5552625269719492, "grad_norm": 1.2489861795777364, "learning_rate": 1.730074796472042e-05, "loss": 0.6195944547653198, "step": 3474 }, { "epoch": 0.5554223607448254, "grad_norm": 1.160990516783744, "learning_rate": 1.729894180342211e-05, "loss": 0.619806170463562, "step": 3475 }, { "epoch": 0.5555821945177016, "grad_norm": 1.6528802868638124, "learning_rate": 1.7297135132391626e-05, "loss": 0.6997839212417603, "step": 3476 }, { "epoch": 0.5557420282905778, "grad_norm": 1.490180830940789, "learning_rate": 1.729532795175513e-05, "loss": 0.5673412084579468, "step": 3477 }, { "epoch": 0.555901862063454, "grad_norm": 1.2970531678260737, "learning_rate": 1.7293520261638832e-05, "loss": 0.5244846940040588, "step": 3478 }, { "epoch": 0.5560616958363302, "grad_norm": 1.3818717126959967, "learning_rate": 1.7291712062168978e-05, "loss": 0.6393638253211975, "step": 3479 }, { "epoch": 0.5562215296092065, "grad_norm": 1.278149934507257, "learning_rate": 1.728990335347185e-05, "loss": 0.5987988710403442, "step": 3480 }, { "epoch": 0.5563813633820827, "grad_norm": 1.2947745823993726, "learning_rate": 1.728809413567375e-05, "loss": 0.7066690325737, "step": 3481 }, { "epoch": 0.5565411971549589, "grad_norm": 1.2026959598124167, "learning_rate": 1.7286284408901036e-05, "loss": 0.5921949148178101, "step": 3482 }, { "epoch": 0.5567010309278351, "grad_norm": 1.478710517426958, "learning_rate": 1.728447417328009e-05, "loss": 0.501231849193573, "step": 3483 }, { "epoch": 0.5568608647007113, "grad_norm": 1.2496260909694503, "learning_rate": 1.7282663428937334e-05, "loss": 0.6041455268859863, "step": 3484 }, { "epoch": 0.5570206984735875, "grad_norm": 1.3940321811968694, "learning_rate": 1.7280852175999224e-05, "loss": 0.7643565535545349, "step": 3485 }, { "epoch": 0.5571805322464637, "grad_norm": 1.5627215735597055, "learning_rate": 1.7279040414592247e-05, "loss": 0.6332203149795532, "step": 3486 }, { "epoch": 0.5573403660193399, "grad_norm": 1.121481559435728, "learning_rate": 1.727722814484294e-05, "loss": 0.6162508130073547, "step": 3487 }, { "epoch": 0.5575001997922161, "grad_norm": 1.394875851310007, "learning_rate": 1.7275415366877855e-05, "loss": 0.6296569108963013, "step": 3488 }, { "epoch": 0.5576600335650923, "grad_norm": 1.2127643262649392, "learning_rate": 1.727360208082359e-05, "loss": 0.6339658498764038, "step": 3489 }, { "epoch": 0.5578198673379685, "grad_norm": 1.2164420392127004, "learning_rate": 1.727178828680679e-05, "loss": 0.5209901332855225, "step": 3490 }, { "epoch": 0.5579797011108447, "grad_norm": 1.3203304920392838, "learning_rate": 1.7269973984954114e-05, "loss": 0.5034038424491882, "step": 3491 }, { "epoch": 0.5581395348837209, "grad_norm": 1.4131636062249502, "learning_rate": 1.7268159175392266e-05, "loss": 0.6378763318061829, "step": 3492 }, { "epoch": 0.5582993686565971, "grad_norm": 1.2432044454681925, "learning_rate": 1.726634385824799e-05, "loss": 0.8096505403518677, "step": 3493 }, { "epoch": 0.5584592024294733, "grad_norm": 1.2300508163860822, "learning_rate": 1.726452803364806e-05, "loss": 0.7073753476142883, "step": 3494 }, { "epoch": 0.5586190362023495, "grad_norm": 1.3036331389919849, "learning_rate": 1.726271170171929e-05, "loss": 0.604549765586853, "step": 3495 }, { "epoch": 0.5587788699752257, "grad_norm": 1.2950000381241722, "learning_rate": 1.726089486258852e-05, "loss": 0.5369107723236084, "step": 3496 }, { "epoch": 0.558938703748102, "grad_norm": 1.5052100433110962, "learning_rate": 1.725907751638263e-05, "loss": 0.7494378685951233, "step": 3497 }, { "epoch": 0.5590985375209782, "grad_norm": 1.2293071824222632, "learning_rate": 1.725725966322855e-05, "loss": 0.572180449962616, "step": 3498 }, { "epoch": 0.5592583712938544, "grad_norm": 1.2671848676830035, "learning_rate": 1.725544130325322e-05, "loss": 0.6301899552345276, "step": 3499 }, { "epoch": 0.5594182050667306, "grad_norm": 1.3702392152897993, "learning_rate": 1.725362243658363e-05, "loss": 0.6318742036819458, "step": 3500 }, { "epoch": 0.5595780388396068, "grad_norm": 1.2367630345602034, "learning_rate": 1.7251803063346807e-05, "loss": 0.5695955753326416, "step": 3501 }, { "epoch": 0.559737872612483, "grad_norm": 1.4101526845526338, "learning_rate": 1.7249983183669807e-05, "loss": 0.7587012648582458, "step": 3502 }, { "epoch": 0.5598977063853592, "grad_norm": 1.4672109281133998, "learning_rate": 1.7248162797679727e-05, "loss": 0.7743348479270935, "step": 3503 }, { "epoch": 0.5600575401582354, "grad_norm": 1.3752315249332738, "learning_rate": 1.724634190550369e-05, "loss": 0.8007620573043823, "step": 3504 }, { "epoch": 0.5602173739311116, "grad_norm": 1.3259146438844793, "learning_rate": 1.7244520507268864e-05, "loss": 0.6278820037841797, "step": 3505 }, { "epoch": 0.5603772077039878, "grad_norm": 1.4474441443620991, "learning_rate": 1.724269860310245e-05, "loss": 0.6128769516944885, "step": 3506 }, { "epoch": 0.560537041476864, "grad_norm": 1.3503425809644114, "learning_rate": 1.7240876193131685e-05, "loss": 0.770089864730835, "step": 3507 }, { "epoch": 0.5606968752497403, "grad_norm": 1.3110504613594687, "learning_rate": 1.7239053277483833e-05, "loss": 0.7533187866210938, "step": 3508 }, { "epoch": 0.5608567090226165, "grad_norm": 1.3915462851097151, "learning_rate": 1.7237229856286207e-05, "loss": 0.8145678043365479, "step": 3509 }, { "epoch": 0.5610165427954927, "grad_norm": 1.6012221928931891, "learning_rate": 1.7235405929666146e-05, "loss": 0.9298213124275208, "step": 3510 }, { "epoch": 0.5611763765683689, "grad_norm": 1.3270890945607086, "learning_rate": 1.7233581497751026e-05, "loss": 0.722296953201294, "step": 3511 }, { "epoch": 0.5613362103412451, "grad_norm": 1.606767513263266, "learning_rate": 1.7231756560668258e-05, "loss": 0.6904152631759644, "step": 3512 }, { "epoch": 0.5614960441141214, "grad_norm": 1.41590363222745, "learning_rate": 1.722993111854529e-05, "loss": 0.8377268314361572, "step": 3513 }, { "epoch": 0.5616558778869976, "grad_norm": 1.217162036610024, "learning_rate": 1.7228105171509605e-05, "loss": 0.46983811259269714, "step": 3514 }, { "epoch": 0.5618157116598738, "grad_norm": 1.4399567853916921, "learning_rate": 1.722627871968872e-05, "loss": 0.5877717733383179, "step": 3515 }, { "epoch": 0.56197554543275, "grad_norm": 1.2743494360250123, "learning_rate": 1.7224451763210187e-05, "loss": 0.5679811239242554, "step": 3516 }, { "epoch": 0.5621353792056262, "grad_norm": 1.5278252587751695, "learning_rate": 1.72226243022016e-05, "loss": 0.6549334526062012, "step": 3517 }, { "epoch": 0.5622952129785024, "grad_norm": 1.2735649612267563, "learning_rate": 1.722079633679057e-05, "loss": 0.6163715124130249, "step": 3518 }, { "epoch": 0.5624550467513786, "grad_norm": 1.4762053774857207, "learning_rate": 1.7218967867104767e-05, "loss": 0.6142867207527161, "step": 3519 }, { "epoch": 0.5626148805242548, "grad_norm": 1.7236289462533299, "learning_rate": 1.7217138893271882e-05, "loss": 0.676325261592865, "step": 3520 }, { "epoch": 0.562774714297131, "grad_norm": 1.187494510963724, "learning_rate": 1.721530941541964e-05, "loss": 0.5851874351501465, "step": 3521 }, { "epoch": 0.5629345480700072, "grad_norm": 1.5810264322795897, "learning_rate": 1.7213479433675807e-05, "loss": 0.6229936480522156, "step": 3522 }, { "epoch": 0.5630943818428834, "grad_norm": 1.3339610197275482, "learning_rate": 1.721164894816819e-05, "loss": 0.6591496467590332, "step": 3523 }, { "epoch": 0.5632542156157596, "grad_norm": 1.2214428079920712, "learning_rate": 1.7209817959024613e-05, "loss": 0.5710176229476929, "step": 3524 }, { "epoch": 0.5634140493886358, "grad_norm": 1.0832177047951015, "learning_rate": 1.7207986466372953e-05, "loss": 0.5450361371040344, "step": 3525 }, { "epoch": 0.563573883161512, "grad_norm": 1.344194525449029, "learning_rate": 1.7206154470341108e-05, "loss": 0.7329450845718384, "step": 3526 }, { "epoch": 0.5637337169343882, "grad_norm": 1.501471259246436, "learning_rate": 1.7204321971057024e-05, "loss": 0.6032882928848267, "step": 3527 }, { "epoch": 0.5638935507072644, "grad_norm": 1.2521805479731414, "learning_rate": 1.7202488968648675e-05, "loss": 0.5574590563774109, "step": 3528 }, { "epoch": 0.5640533844801406, "grad_norm": 1.1246214739027858, "learning_rate": 1.720065546324407e-05, "loss": 0.6257256269454956, "step": 3529 }, { "epoch": 0.5642132182530168, "grad_norm": 1.7957053330992674, "learning_rate": 1.7198821454971255e-05, "loss": 0.6482585668563843, "step": 3530 }, { "epoch": 0.564373052025893, "grad_norm": 1.5412894648285929, "learning_rate": 1.719698694395831e-05, "loss": 0.7343719601631165, "step": 3531 }, { "epoch": 0.5645328857987693, "grad_norm": 1.2739507532680905, "learning_rate": 1.7195151930333352e-05, "loss": 0.7121622562408447, "step": 3532 }, { "epoch": 0.5646927195716455, "grad_norm": 1.2829064796474883, "learning_rate": 1.7193316414224533e-05, "loss": 0.695358395576477, "step": 3533 }, { "epoch": 0.5648525533445217, "grad_norm": 1.2467771365649867, "learning_rate": 1.719148039576004e-05, "loss": 0.6340655088424683, "step": 3534 }, { "epoch": 0.5650123871173979, "grad_norm": 1.1976599875488272, "learning_rate": 1.7189643875068088e-05, "loss": 0.7568054795265198, "step": 3535 }, { "epoch": 0.5651722208902741, "grad_norm": 1.3609784075194147, "learning_rate": 1.7187806852276936e-05, "loss": 0.6209052801132202, "step": 3536 }, { "epoch": 0.5653320546631503, "grad_norm": 1.3983172886356734, "learning_rate": 1.7185969327514874e-05, "loss": 0.6143126487731934, "step": 3537 }, { "epoch": 0.5654918884360265, "grad_norm": 1.205834272123039, "learning_rate": 1.7184131300910232e-05, "loss": 0.6806745529174805, "step": 3538 }, { "epoch": 0.5656517222089027, "grad_norm": 1.3790689035826011, "learning_rate": 1.718229277259137e-05, "loss": 0.6447205543518066, "step": 3539 }, { "epoch": 0.5658115559817789, "grad_norm": 1.1965622909479021, "learning_rate": 1.718045374268668e-05, "loss": 0.6303297877311707, "step": 3540 }, { "epoch": 0.5659713897546551, "grad_norm": 1.9564756599380682, "learning_rate": 1.71786142113246e-05, "loss": 0.6868810653686523, "step": 3541 }, { "epoch": 0.5661312235275313, "grad_norm": 1.4459575080729739, "learning_rate": 1.717677417863359e-05, "loss": 0.6690500974655151, "step": 3542 }, { "epoch": 0.5662910573004076, "grad_norm": 1.3169468135445124, "learning_rate": 1.7174933644742154e-05, "loss": 0.667747437953949, "step": 3543 }, { "epoch": 0.5664508910732838, "grad_norm": 1.3423502312444748, "learning_rate": 1.7173092609778837e-05, "loss": 0.6058038473129272, "step": 3544 }, { "epoch": 0.56661072484616, "grad_norm": 1.2514854975268508, "learning_rate": 1.7171251073872195e-05, "loss": 0.5661507844924927, "step": 3545 }, { "epoch": 0.5667705586190362, "grad_norm": 1.4678640898759059, "learning_rate": 1.716940903715084e-05, "loss": 0.5260874032974243, "step": 3546 }, { "epoch": 0.5669303923919125, "grad_norm": 1.1956156686225865, "learning_rate": 1.7167566499743417e-05, "loss": 0.6527509093284607, "step": 3547 }, { "epoch": 0.5670902261647887, "grad_norm": 1.1593806195156364, "learning_rate": 1.7165723461778602e-05, "loss": 0.5594768524169922, "step": 3548 }, { "epoch": 0.5672500599376649, "grad_norm": 1.29657462595587, "learning_rate": 1.7163879923385103e-05, "loss": 0.6460009813308716, "step": 3549 }, { "epoch": 0.5674098937105411, "grad_norm": 1.1855591016749185, "learning_rate": 1.7162035884691665e-05, "loss": 0.5713326334953308, "step": 3550 }, { "epoch": 0.5675697274834173, "grad_norm": 1.3184778569961664, "learning_rate": 1.7160191345827073e-05, "loss": 0.5434755086898804, "step": 3551 }, { "epoch": 0.5677295612562935, "grad_norm": 1.377027459922168, "learning_rate": 1.715834630692014e-05, "loss": 0.6068323850631714, "step": 3552 }, { "epoch": 0.5678893950291697, "grad_norm": 1.345906064089921, "learning_rate": 1.715650076809972e-05, "loss": 0.6783758401870728, "step": 3553 }, { "epoch": 0.5680492288020459, "grad_norm": 1.1469640598657855, "learning_rate": 1.7154654729494696e-05, "loss": 0.5402476787567139, "step": 3554 }, { "epoch": 0.5682090625749221, "grad_norm": 1.1680841414389442, "learning_rate": 1.7152808191233993e-05, "loss": 0.5588859915733337, "step": 3555 }, { "epoch": 0.5683688963477983, "grad_norm": 1.4673732582533443, "learning_rate": 1.7150961153446563e-05, "loss": 0.6867421865463257, "step": 3556 }, { "epoch": 0.5685287301206745, "grad_norm": 1.5627251721173652, "learning_rate": 1.7149113616261396e-05, "loss": 0.8048520088195801, "step": 3557 }, { "epoch": 0.5686885638935507, "grad_norm": 1.232759486341354, "learning_rate": 1.7147265579807522e-05, "loss": 0.7140878438949585, "step": 3558 }, { "epoch": 0.5688483976664269, "grad_norm": 1.5247202450258122, "learning_rate": 1.714541704421399e-05, "loss": 0.6195781230926514, "step": 3559 }, { "epoch": 0.5690082314393031, "grad_norm": 1.3757044244071832, "learning_rate": 1.7143568009609913e-05, "loss": 0.7514217495918274, "step": 3560 }, { "epoch": 0.5691680652121793, "grad_norm": 1.3602615830184188, "learning_rate": 1.7141718476124407e-05, "loss": 0.6192144155502319, "step": 3561 }, { "epoch": 0.5693278989850555, "grad_norm": 1.4787249588562137, "learning_rate": 1.713986844388664e-05, "loss": 0.7337640523910522, "step": 3562 }, { "epoch": 0.5694877327579317, "grad_norm": 1.464380216844388, "learning_rate": 1.7138017913025813e-05, "loss": 0.7519044876098633, "step": 3563 }, { "epoch": 0.569647566530808, "grad_norm": 1.220555778047901, "learning_rate": 1.7136166883671165e-05, "loss": 0.6701940298080444, "step": 3564 }, { "epoch": 0.5698074003036842, "grad_norm": 1.3683543877090907, "learning_rate": 1.7134315355951956e-05, "loss": 0.7068596482276917, "step": 3565 }, { "epoch": 0.5699672340765604, "grad_norm": 1.3870777046157052, "learning_rate": 1.7132463329997495e-05, "loss": 0.5631163120269775, "step": 3566 }, { "epoch": 0.5701270678494366, "grad_norm": 1.1427690549781775, "learning_rate": 1.7130610805937123e-05, "loss": 0.6403575539588928, "step": 3567 }, { "epoch": 0.5702869016223128, "grad_norm": 1.4619676677423736, "learning_rate": 1.712875778390021e-05, "loss": 0.6558884382247925, "step": 3568 }, { "epoch": 0.570446735395189, "grad_norm": 1.4085491562657473, "learning_rate": 1.7126904264016166e-05, "loss": 0.6242915987968445, "step": 3569 }, { "epoch": 0.5706065691680652, "grad_norm": 1.0576020492127276, "learning_rate": 1.7125050246414434e-05, "loss": 0.5915728807449341, "step": 3570 }, { "epoch": 0.5707664029409414, "grad_norm": 1.2006257372339448, "learning_rate": 1.712319573122449e-05, "loss": 0.6212459206581116, "step": 3571 }, { "epoch": 0.5709262367138176, "grad_norm": 1.3985423822503757, "learning_rate": 1.712134071857585e-05, "loss": 0.7135747075080872, "step": 3572 }, { "epoch": 0.5710860704866938, "grad_norm": 1.410996974908346, "learning_rate": 1.7119485208598062e-05, "loss": 0.5933932662010193, "step": 3573 }, { "epoch": 0.57124590425957, "grad_norm": 1.4125377438893925, "learning_rate": 1.7117629201420707e-05, "loss": 0.6546995043754578, "step": 3574 }, { "epoch": 0.5714057380324462, "grad_norm": 1.250305489005912, "learning_rate": 1.71157726971734e-05, "loss": 0.5835333466529846, "step": 3575 }, { "epoch": 0.5715655718053224, "grad_norm": 1.4193291206326548, "learning_rate": 1.7113915695985797e-05, "loss": 0.8018687963485718, "step": 3576 }, { "epoch": 0.5717254055781986, "grad_norm": 1.4953493274727745, "learning_rate": 1.711205819798758e-05, "loss": 0.8597303628921509, "step": 3577 }, { "epoch": 0.5718852393510749, "grad_norm": 1.4892787237170695, "learning_rate": 1.7110200203308467e-05, "loss": 0.6415429711341858, "step": 3578 }, { "epoch": 0.5720450731239511, "grad_norm": 1.336101730335348, "learning_rate": 1.7108341712078226e-05, "loss": 0.659905195236206, "step": 3579 }, { "epoch": 0.5722049068968273, "grad_norm": 1.2769663422244153, "learning_rate": 1.7106482724426636e-05, "loss": 0.6155505776405334, "step": 3580 }, { "epoch": 0.5723647406697036, "grad_norm": 1.4484327039940852, "learning_rate": 1.7104623240483525e-05, "loss": 0.70982825756073, "step": 3581 }, { "epoch": 0.5725245744425798, "grad_norm": 1.4028631853154292, "learning_rate": 1.7102763260378755e-05, "loss": 0.6709097623825073, "step": 3582 }, { "epoch": 0.572684408215456, "grad_norm": 1.3052788838358913, "learning_rate": 1.710090278424222e-05, "loss": 0.6451097130775452, "step": 3583 }, { "epoch": 0.5728442419883322, "grad_norm": 1.4878982238902407, "learning_rate": 1.7099041812203846e-05, "loss": 0.5887759923934937, "step": 3584 }, { "epoch": 0.5730040757612084, "grad_norm": 1.3957635381394822, "learning_rate": 1.70971803443936e-05, "loss": 0.6072577238082886, "step": 3585 }, { "epoch": 0.5731639095340846, "grad_norm": 1.3280113757253924, "learning_rate": 1.709531838094148e-05, "loss": 0.5525989532470703, "step": 3586 }, { "epoch": 0.5733237433069608, "grad_norm": 1.3059143010332503, "learning_rate": 1.7093455921977516e-05, "loss": 0.5093385577201843, "step": 3587 }, { "epoch": 0.573483577079837, "grad_norm": 1.3695819384546908, "learning_rate": 1.709159296763178e-05, "loss": 0.5828169584274292, "step": 3588 }, { "epoch": 0.5736434108527132, "grad_norm": 1.2697110722982399, "learning_rate": 1.708972951803437e-05, "loss": 0.6114600896835327, "step": 3589 }, { "epoch": 0.5738032446255894, "grad_norm": 1.2717658404709429, "learning_rate": 1.7087865573315423e-05, "loss": 0.6245534420013428, "step": 3590 }, { "epoch": 0.5739630783984656, "grad_norm": 1.3411168085480507, "learning_rate": 1.7086001133605114e-05, "loss": 0.5193400382995605, "step": 3591 }, { "epoch": 0.5741229121713418, "grad_norm": 1.4430129823654325, "learning_rate": 1.7084136199033647e-05, "loss": 0.6994925737380981, "step": 3592 }, { "epoch": 0.574282745944218, "grad_norm": 1.3190805769560758, "learning_rate": 1.708227076973126e-05, "loss": 0.5250186324119568, "step": 3593 }, { "epoch": 0.5744425797170942, "grad_norm": 1.2872010012711892, "learning_rate": 1.708040484582823e-05, "loss": 0.5393325090408325, "step": 3594 }, { "epoch": 0.5746024134899704, "grad_norm": 1.3148439697928063, "learning_rate": 1.707853842745487e-05, "loss": 0.6398894786834717, "step": 3595 }, { "epoch": 0.5747622472628466, "grad_norm": 1.448118124840514, "learning_rate": 1.7076671514741515e-05, "loss": 0.5384362936019897, "step": 3596 }, { "epoch": 0.5749220810357228, "grad_norm": 1.2984017336203115, "learning_rate": 1.707480410781855e-05, "loss": 0.7813922762870789, "step": 3597 }, { "epoch": 0.575081914808599, "grad_norm": 1.4734417632938868, "learning_rate": 1.7072936206816388e-05, "loss": 0.6656261682510376, "step": 3598 }, { "epoch": 0.5752417485814753, "grad_norm": 1.3493499142055532, "learning_rate": 1.7071067811865477e-05, "loss": 0.589159369468689, "step": 3599 }, { "epoch": 0.5754015823543515, "grad_norm": 1.43150588061388, "learning_rate": 1.7069198923096296e-05, "loss": 0.7014951705932617, "step": 3600 }, { "epoch": 0.5755614161272277, "grad_norm": 1.123327779843371, "learning_rate": 1.7067329540639366e-05, "loss": 0.5382117033004761, "step": 3601 }, { "epoch": 0.5757212499001039, "grad_norm": 1.5588060004967803, "learning_rate": 1.706545966462523e-05, "loss": 0.6557246446609497, "step": 3602 }, { "epoch": 0.5758810836729801, "grad_norm": 1.7891862196570911, "learning_rate": 1.7063589295184484e-05, "loss": 0.5490176677703857, "step": 3603 }, { "epoch": 0.5760409174458563, "grad_norm": 1.3309450981463995, "learning_rate": 1.706171843244774e-05, "loss": 0.7157633304595947, "step": 3604 }, { "epoch": 0.5762007512187325, "grad_norm": 1.297558489514046, "learning_rate": 1.7059847076545653e-05, "loss": 0.4763781428337097, "step": 3605 }, { "epoch": 0.5763605849916087, "grad_norm": 1.3925653235141522, "learning_rate": 1.7057975227608917e-05, "loss": 0.6423946619033813, "step": 3606 }, { "epoch": 0.5765204187644849, "grad_norm": 1.4233190950640133, "learning_rate": 1.705610288576825e-05, "loss": 0.712710440158844, "step": 3607 }, { "epoch": 0.5766802525373611, "grad_norm": 1.369424097097141, "learning_rate": 1.705423005115442e-05, "loss": 0.599120020866394, "step": 3608 }, { "epoch": 0.5768400863102373, "grad_norm": 1.5009287543137315, "learning_rate": 1.7052356723898205e-05, "loss": 0.6210820078849792, "step": 3609 }, { "epoch": 0.5769999200831135, "grad_norm": 1.3657044817865753, "learning_rate": 1.7050482904130435e-05, "loss": 0.620414137840271, "step": 3610 }, { "epoch": 0.5771597538559897, "grad_norm": 1.4225263991032355, "learning_rate": 1.7048608591981976e-05, "loss": 0.6105018854141235, "step": 3611 }, { "epoch": 0.5773195876288659, "grad_norm": 1.6801656261674969, "learning_rate": 1.7046733787583724e-05, "loss": 0.7198355197906494, "step": 3612 }, { "epoch": 0.5774794214017422, "grad_norm": 1.3840009844690595, "learning_rate": 1.7044858491066604e-05, "loss": 0.756309986114502, "step": 3613 }, { "epoch": 0.5776392551746184, "grad_norm": 1.3754292234236012, "learning_rate": 1.7042982702561578e-05, "loss": 0.7626680135726929, "step": 3614 }, { "epoch": 0.5777990889474947, "grad_norm": 1.2712103320400827, "learning_rate": 1.704110642219965e-05, "loss": 0.6259851455688477, "step": 3615 }, { "epoch": 0.5779589227203709, "grad_norm": 1.1962551742429965, "learning_rate": 1.7039229650111848e-05, "loss": 0.6498050689697266, "step": 3616 }, { "epoch": 0.5781187564932471, "grad_norm": 1.3386744184860364, "learning_rate": 1.7037352386429247e-05, "loss": 0.74101722240448, "step": 3617 }, { "epoch": 0.5782785902661233, "grad_norm": 1.1457743309441466, "learning_rate": 1.703547463128294e-05, "loss": 0.5641334652900696, "step": 3618 }, { "epoch": 0.5784384240389995, "grad_norm": 1.4632775231940052, "learning_rate": 1.7033596384804067e-05, "loss": 0.7129007577896118, "step": 3619 }, { "epoch": 0.5785982578118757, "grad_norm": 1.1436721810793407, "learning_rate": 1.70317176471238e-05, "loss": 0.59371018409729, "step": 3620 }, { "epoch": 0.5787580915847519, "grad_norm": 1.067015141907346, "learning_rate": 1.7029838418373335e-05, "loss": 0.7348324060440063, "step": 3621 }, { "epoch": 0.5789179253576281, "grad_norm": 1.2950194328276134, "learning_rate": 1.702795869868392e-05, "loss": 0.6611829996109009, "step": 3622 }, { "epoch": 0.5790777591305043, "grad_norm": 1.2070866029233607, "learning_rate": 1.7026078488186822e-05, "loss": 0.5469619631767273, "step": 3623 }, { "epoch": 0.5792375929033805, "grad_norm": 1.4422903372595839, "learning_rate": 1.7024197787013352e-05, "loss": 0.5683704614639282, "step": 3624 }, { "epoch": 0.5793974266762567, "grad_norm": 1.436885952593961, "learning_rate": 1.702231659529485e-05, "loss": 0.6172204613685608, "step": 3625 }, { "epoch": 0.5795572604491329, "grad_norm": 1.4234290511046739, "learning_rate": 1.702043491316269e-05, "loss": 0.6562497615814209, "step": 3626 }, { "epoch": 0.5797170942220091, "grad_norm": 1.4789784148435414, "learning_rate": 1.701855274074828e-05, "loss": 0.649880588054657, "step": 3627 }, { "epoch": 0.5798769279948853, "grad_norm": 1.3556889758003559, "learning_rate": 1.7016670078183076e-05, "loss": 0.6739754676818848, "step": 3628 }, { "epoch": 0.5800367617677615, "grad_norm": 1.5277678045698913, "learning_rate": 1.7014786925598543e-05, "loss": 0.8340903520584106, "step": 3629 }, { "epoch": 0.5801965955406377, "grad_norm": 1.4129691890382645, "learning_rate": 1.70129032831262e-05, "loss": 0.5830846428871155, "step": 3630 }, { "epoch": 0.5803564293135139, "grad_norm": 1.8771255731745284, "learning_rate": 1.701101915089759e-05, "loss": 0.6793071031570435, "step": 3631 }, { "epoch": 0.5805162630863901, "grad_norm": 1.6152373990440874, "learning_rate": 1.70091345290443e-05, "loss": 0.6112523674964905, "step": 3632 }, { "epoch": 0.5806760968592664, "grad_norm": 1.5023376923062353, "learning_rate": 1.7007249417697943e-05, "loss": 0.7810583114624023, "step": 3633 }, { "epoch": 0.5808359306321426, "grad_norm": 1.2061625784852277, "learning_rate": 1.7005363816990166e-05, "loss": 0.553376317024231, "step": 3634 }, { "epoch": 0.5809957644050188, "grad_norm": 1.2192540543538146, "learning_rate": 1.7003477727052656e-05, "loss": 0.622454047203064, "step": 3635 }, { "epoch": 0.581155598177895, "grad_norm": 1.3209424894891677, "learning_rate": 1.7001591148017126e-05, "loss": 0.7209550738334656, "step": 3636 }, { "epoch": 0.5813154319507712, "grad_norm": 1.5144434124510744, "learning_rate": 1.6999704080015337e-05, "loss": 0.7045693397521973, "step": 3637 }, { "epoch": 0.5814752657236474, "grad_norm": 1.3629591050878074, "learning_rate": 1.6997816523179064e-05, "loss": 0.6616601347923279, "step": 3638 }, { "epoch": 0.5816350994965236, "grad_norm": 1.405371175002587, "learning_rate": 1.6995928477640135e-05, "loss": 0.6713130474090576, "step": 3639 }, { "epoch": 0.5817949332693998, "grad_norm": 1.3547014172939311, "learning_rate": 1.69940399435304e-05, "loss": 0.5950970649719238, "step": 3640 }, { "epoch": 0.581954767042276, "grad_norm": 1.2363808510933885, "learning_rate": 1.6992150920981752e-05, "loss": 0.598843514919281, "step": 3641 }, { "epoch": 0.5821146008151522, "grad_norm": 1.4361339768722938, "learning_rate": 1.699026141012611e-05, "loss": 0.6399247646331787, "step": 3642 }, { "epoch": 0.5822744345880284, "grad_norm": 1.3475694217861471, "learning_rate": 1.698837141109543e-05, "loss": 0.5721813440322876, "step": 3643 }, { "epoch": 0.5824342683609046, "grad_norm": 1.2515809364006962, "learning_rate": 1.6986480924021706e-05, "loss": 0.6746468544006348, "step": 3644 }, { "epoch": 0.5825941021337808, "grad_norm": 1.4617520028309794, "learning_rate": 1.698458994903696e-05, "loss": 0.7461503744125366, "step": 3645 }, { "epoch": 0.582753935906657, "grad_norm": 1.5093106893894084, "learning_rate": 1.6982698486273257e-05, "loss": 0.6902386546134949, "step": 3646 }, { "epoch": 0.5829137696795332, "grad_norm": 1.386265122836792, "learning_rate": 1.6980806535862683e-05, "loss": 0.5717121958732605, "step": 3647 }, { "epoch": 0.5830736034524094, "grad_norm": 1.2188411464697573, "learning_rate": 1.6978914097937367e-05, "loss": 0.6207612752914429, "step": 3648 }, { "epoch": 0.5832334372252858, "grad_norm": 1.4909567503841312, "learning_rate": 1.697702117262947e-05, "loss": 0.6948279142379761, "step": 3649 }, { "epoch": 0.583393270998162, "grad_norm": 1.1389168921200234, "learning_rate": 1.697512776007119e-05, "loss": 0.567595362663269, "step": 3650 }, { "epoch": 0.5835531047710382, "grad_norm": 1.371283000080715, "learning_rate": 1.6973233860394755e-05, "loss": 0.7202455401420593, "step": 3651 }, { "epoch": 0.5837129385439144, "grad_norm": 1.3604592973694347, "learning_rate": 1.6971339473732427e-05, "loss": 0.5557639598846436, "step": 3652 }, { "epoch": 0.5838727723167906, "grad_norm": 1.509264929337603, "learning_rate": 1.6969444600216502e-05, "loss": 0.46707314252853394, "step": 3653 }, { "epoch": 0.5840326060896668, "grad_norm": 1.3338010090114516, "learning_rate": 1.6967549239979315e-05, "loss": 0.6762194633483887, "step": 3654 }, { "epoch": 0.584192439862543, "grad_norm": 1.3358675977357262, "learning_rate": 1.6965653393153226e-05, "loss": 0.668000340461731, "step": 3655 }, { "epoch": 0.5843522736354192, "grad_norm": 1.4988963349776445, "learning_rate": 1.696375705987064e-05, "loss": 0.6737450957298279, "step": 3656 }, { "epoch": 0.5845121074082954, "grad_norm": 1.183026564986887, "learning_rate": 1.6961860240263987e-05, "loss": 0.5460862517356873, "step": 3657 }, { "epoch": 0.5846719411811716, "grad_norm": 1.5756856855023993, "learning_rate": 1.6959962934465734e-05, "loss": 0.7111004590988159, "step": 3658 }, { "epoch": 0.5848317749540478, "grad_norm": 1.2423774315175058, "learning_rate": 1.6958065142608385e-05, "loss": 0.5908740758895874, "step": 3659 }, { "epoch": 0.584991608726924, "grad_norm": 1.3613819003961607, "learning_rate": 1.6956166864824474e-05, "loss": 0.6487448215484619, "step": 3660 }, { "epoch": 0.5851514424998002, "grad_norm": 1.3252052881185525, "learning_rate": 1.6954268101246564e-05, "loss": 0.8479584455490112, "step": 3661 }, { "epoch": 0.5853112762726764, "grad_norm": 1.360621227072115, "learning_rate": 1.6952368852007263e-05, "loss": 0.6157948970794678, "step": 3662 }, { "epoch": 0.5854711100455526, "grad_norm": 1.169879884113769, "learning_rate": 1.695046911723921e-05, "loss": 0.4874064326286316, "step": 3663 }, { "epoch": 0.5856309438184288, "grad_norm": 1.4821652641577594, "learning_rate": 1.6948568897075076e-05, "loss": 0.6513482332229614, "step": 3664 }, { "epoch": 0.585790777591305, "grad_norm": 1.3560312784162518, "learning_rate": 1.6946668191647554e-05, "loss": 0.6418244242668152, "step": 3665 }, { "epoch": 0.5859506113641813, "grad_norm": 1.5095032176092944, "learning_rate": 1.69447670010894e-05, "loss": 0.576100766658783, "step": 3666 }, { "epoch": 0.5861104451370575, "grad_norm": 1.4376578898455983, "learning_rate": 1.6942865325533374e-05, "loss": 0.6765683889389038, "step": 3667 }, { "epoch": 0.5862702789099337, "grad_norm": 1.3640222915601368, "learning_rate": 1.6940963165112286e-05, "loss": 0.5745466947555542, "step": 3668 }, { "epoch": 0.5864301126828099, "grad_norm": 1.286928179542614, "learning_rate": 1.693906051995898e-05, "loss": 0.5985152721405029, "step": 3669 }, { "epoch": 0.5865899464556861, "grad_norm": 1.291719307907608, "learning_rate": 1.6937157390206324e-05, "loss": 0.6206957101821899, "step": 3670 }, { "epoch": 0.5867497802285623, "grad_norm": 1.2329228081657966, "learning_rate": 1.6935253775987227e-05, "loss": 0.5275678634643555, "step": 3671 }, { "epoch": 0.5869096140014385, "grad_norm": 1.2115716920459925, "learning_rate": 1.693334967743463e-05, "loss": 0.4945167303085327, "step": 3672 }, { "epoch": 0.5870694477743147, "grad_norm": 1.4988409792132105, "learning_rate": 1.6931445094681517e-05, "loss": 0.6576538681983948, "step": 3673 }, { "epoch": 0.5872292815471909, "grad_norm": 1.3232854471793951, "learning_rate": 1.6929540027860883e-05, "loss": 0.6734195947647095, "step": 3674 }, { "epoch": 0.5873891153200671, "grad_norm": 1.6085178970531033, "learning_rate": 1.692763447710578e-05, "loss": 0.7049174308776855, "step": 3675 }, { "epoch": 0.5875489490929433, "grad_norm": 1.381766075577995, "learning_rate": 1.6925728442549287e-05, "loss": 0.6080617904663086, "step": 3676 }, { "epoch": 0.5877087828658195, "grad_norm": 1.371910349289599, "learning_rate": 1.6923821924324507e-05, "loss": 0.5589151978492737, "step": 3677 }, { "epoch": 0.5878686166386957, "grad_norm": 1.2418826398190885, "learning_rate": 1.692191492256459e-05, "loss": 0.6586312055587769, "step": 3678 }, { "epoch": 0.5880284504115719, "grad_norm": 1.5132958652599324, "learning_rate": 1.6920007437402715e-05, "loss": 0.5626885890960693, "step": 3679 }, { "epoch": 0.5881882841844481, "grad_norm": 1.825195134869163, "learning_rate": 1.691809946897209e-05, "loss": 0.7562592029571533, "step": 3680 }, { "epoch": 0.5883481179573243, "grad_norm": 1.2195855113892162, "learning_rate": 1.6916191017405962e-05, "loss": 0.5482134222984314, "step": 3681 }, { "epoch": 0.5885079517302005, "grad_norm": 1.2126152436267357, "learning_rate": 1.6914282082837616e-05, "loss": 0.5689587593078613, "step": 3682 }, { "epoch": 0.5886677855030767, "grad_norm": 1.3005426399376254, "learning_rate": 1.6912372665400353e-05, "loss": 0.5259156823158264, "step": 3683 }, { "epoch": 0.5888276192759531, "grad_norm": 1.6683550922419141, "learning_rate": 1.6910462765227535e-05, "loss": 0.7855716347694397, "step": 3684 }, { "epoch": 0.5889874530488293, "grad_norm": 1.212259183889731, "learning_rate": 1.6908552382452524e-05, "loss": 0.6169172525405884, "step": 3685 }, { "epoch": 0.5891472868217055, "grad_norm": 1.2971456270493125, "learning_rate": 1.6906641517208754e-05, "loss": 0.6360615491867065, "step": 3686 }, { "epoch": 0.5893071205945817, "grad_norm": 1.449810179418359, "learning_rate": 1.690473016962966e-05, "loss": 0.6760947704315186, "step": 3687 }, { "epoch": 0.5894669543674579, "grad_norm": 1.3245158222650226, "learning_rate": 1.6902818339848734e-05, "loss": 0.5705865621566772, "step": 3688 }, { "epoch": 0.5896267881403341, "grad_norm": 1.363682725765496, "learning_rate": 1.690090602799948e-05, "loss": 0.6439546346664429, "step": 3689 }, { "epoch": 0.5897866219132103, "grad_norm": 1.249670055207669, "learning_rate": 1.689899323421545e-05, "loss": 0.596930980682373, "step": 3690 }, { "epoch": 0.5899464556860865, "grad_norm": 1.193721197263404, "learning_rate": 1.6897079958630236e-05, "loss": 0.5118149518966675, "step": 3691 }, { "epoch": 0.5901062894589627, "grad_norm": 1.4037462676581858, "learning_rate": 1.6895166201377446e-05, "loss": 0.6551092863082886, "step": 3692 }, { "epoch": 0.5902661232318389, "grad_norm": 1.3396959479350992, "learning_rate": 1.6893251962590727e-05, "loss": 0.567879319190979, "step": 3693 }, { "epoch": 0.5904259570047151, "grad_norm": 1.333851096926733, "learning_rate": 1.689133724240377e-05, "loss": 0.700168251991272, "step": 3694 }, { "epoch": 0.5905857907775913, "grad_norm": 2.514904806579266, "learning_rate": 1.688942204095029e-05, "loss": 0.6097227931022644, "step": 3695 }, { "epoch": 0.5907456245504675, "grad_norm": 1.2931919606652784, "learning_rate": 1.6887506358364038e-05, "loss": 0.5631687045097351, "step": 3696 }, { "epoch": 0.5909054583233437, "grad_norm": 1.4287275713312342, "learning_rate": 1.6885590194778796e-05, "loss": 0.7895026803016663, "step": 3697 }, { "epoch": 0.5910652920962199, "grad_norm": 1.1184755744080055, "learning_rate": 1.6883673550328387e-05, "loss": 0.5496994256973267, "step": 3698 }, { "epoch": 0.5912251258690961, "grad_norm": 1.45336615698679, "learning_rate": 1.6881756425146658e-05, "loss": 0.688342809677124, "step": 3699 }, { "epoch": 0.5913849596419724, "grad_norm": 1.3377771250100283, "learning_rate": 1.687983881936749e-05, "loss": 0.5336135625839233, "step": 3700 }, { "epoch": 0.5915447934148486, "grad_norm": 1.2901884345812091, "learning_rate": 1.6877920733124815e-05, "loss": 0.6034704446792603, "step": 3701 }, { "epoch": 0.5917046271877248, "grad_norm": 1.2899146033998035, "learning_rate": 1.6876002166552578e-05, "loss": 0.6307896375656128, "step": 3702 }, { "epoch": 0.591864460960601, "grad_norm": 1.525749943117184, "learning_rate": 1.6874083119784762e-05, "loss": 0.6824404001235962, "step": 3703 }, { "epoch": 0.5920242947334772, "grad_norm": 1.2807913917176377, "learning_rate": 1.6872163592955385e-05, "loss": 0.628807783126831, "step": 3704 }, { "epoch": 0.5921841285063534, "grad_norm": 1.3188731873133126, "learning_rate": 1.687024358619851e-05, "loss": 0.6203593611717224, "step": 3705 }, { "epoch": 0.5923439622792296, "grad_norm": 1.378202442534907, "learning_rate": 1.6868323099648214e-05, "loss": 0.703155517578125, "step": 3706 }, { "epoch": 0.5925037960521058, "grad_norm": 1.3831664678639777, "learning_rate": 1.6866402133438623e-05, "loss": 0.5668352842330933, "step": 3707 }, { "epoch": 0.592663629824982, "grad_norm": 1.2843769808213898, "learning_rate": 1.6864480687703884e-05, "loss": 0.44095245003700256, "step": 3708 }, { "epoch": 0.5928234635978582, "grad_norm": 1.196765865747268, "learning_rate": 1.6862558762578194e-05, "loss": 0.7219269275665283, "step": 3709 }, { "epoch": 0.5929832973707344, "grad_norm": 1.3281129985553701, "learning_rate": 1.6860636358195767e-05, "loss": 0.5928473472595215, "step": 3710 }, { "epoch": 0.5931431311436106, "grad_norm": 1.7971936985638757, "learning_rate": 1.6858713474690854e-05, "loss": 0.787200391292572, "step": 3711 }, { "epoch": 0.5933029649164868, "grad_norm": 1.430890977600209, "learning_rate": 1.6856790112197744e-05, "loss": 0.7581091523170471, "step": 3712 }, { "epoch": 0.593462798689363, "grad_norm": 1.4608636901483278, "learning_rate": 1.685486627085076e-05, "loss": 0.6211638450622559, "step": 3713 }, { "epoch": 0.5936226324622392, "grad_norm": 1.6354394178944356, "learning_rate": 1.685294195078426e-05, "loss": 0.7084892392158508, "step": 3714 }, { "epoch": 0.5937824662351154, "grad_norm": 1.4448149661228336, "learning_rate": 1.685101715213262e-05, "loss": 0.6585123538970947, "step": 3715 }, { "epoch": 0.5939423000079916, "grad_norm": 1.3515837840426848, "learning_rate": 1.6849091875030276e-05, "loss": 0.6287245750427246, "step": 3716 }, { "epoch": 0.5941021337808678, "grad_norm": 1.5469243093242275, "learning_rate": 1.684716611961167e-05, "loss": 0.6766758561134338, "step": 3717 }, { "epoch": 0.594261967553744, "grad_norm": 1.4278507158890927, "learning_rate": 1.6845239886011295e-05, "loss": 0.7190407514572144, "step": 3718 }, { "epoch": 0.5944218013266204, "grad_norm": 1.47819171132996, "learning_rate": 1.6843313174363674e-05, "loss": 0.7322973012924194, "step": 3719 }, { "epoch": 0.5945816350994966, "grad_norm": 1.5989816115842324, "learning_rate": 1.684138598480336e-05, "loss": 0.5515525341033936, "step": 3720 }, { "epoch": 0.5947414688723728, "grad_norm": 1.3502248190335666, "learning_rate": 1.6839458317464942e-05, "loss": 0.609562873840332, "step": 3721 }, { "epoch": 0.594901302645249, "grad_norm": 1.162661899598484, "learning_rate": 1.6837530172483038e-05, "loss": 0.5587354898452759, "step": 3722 }, { "epoch": 0.5950611364181252, "grad_norm": 1.25994314426437, "learning_rate": 1.6835601549992307e-05, "loss": 0.5733625888824463, "step": 3723 }, { "epoch": 0.5952209701910014, "grad_norm": 1.2407204974066084, "learning_rate": 1.6833672450127435e-05, "loss": 0.7004930973052979, "step": 3724 }, { "epoch": 0.5953808039638776, "grad_norm": 1.286607427196428, "learning_rate": 1.6831742873023142e-05, "loss": 0.6143877506256104, "step": 3725 }, { "epoch": 0.5955406377367538, "grad_norm": 1.5439506798413902, "learning_rate": 1.6829812818814182e-05, "loss": 0.6494096517562866, "step": 3726 }, { "epoch": 0.59570047150963, "grad_norm": 1.2533464408464332, "learning_rate": 1.6827882287635353e-05, "loss": 0.6725373864173889, "step": 3727 }, { "epoch": 0.5958603052825062, "grad_norm": 1.1992369801549279, "learning_rate": 1.6825951279621463e-05, "loss": 0.6774092316627502, "step": 3728 }, { "epoch": 0.5960201390553824, "grad_norm": 1.2406728257499364, "learning_rate": 1.682401979490738e-05, "loss": 0.582321286201477, "step": 3729 }, { "epoch": 0.5961799728282586, "grad_norm": 1.3280096724046129, "learning_rate": 1.6822087833627977e-05, "loss": 0.6745223999023438, "step": 3730 }, { "epoch": 0.5963398066011348, "grad_norm": 1.3189231552334193, "learning_rate": 1.6820155395918188e-05, "loss": 0.5688159465789795, "step": 3731 }, { "epoch": 0.596499640374011, "grad_norm": 1.271733000934162, "learning_rate": 1.681822248191296e-05, "loss": 0.6445189714431763, "step": 3732 }, { "epoch": 0.5966594741468872, "grad_norm": 1.2994327005064035, "learning_rate": 1.6816289091747285e-05, "loss": 0.6794494986534119, "step": 3733 }, { "epoch": 0.5968193079197635, "grad_norm": 1.3674988893504103, "learning_rate": 1.6814355225556186e-05, "loss": 0.6223195791244507, "step": 3734 }, { "epoch": 0.5969791416926397, "grad_norm": 1.7210193458633878, "learning_rate": 1.681242088347471e-05, "loss": 0.6487654447555542, "step": 3735 }, { "epoch": 0.5971389754655159, "grad_norm": 1.1744377867722822, "learning_rate": 1.681048606563795e-05, "loss": 0.45184439420700073, "step": 3736 }, { "epoch": 0.5972988092383921, "grad_norm": 1.4083813781021777, "learning_rate": 1.6808550772181025e-05, "loss": 0.7845635414123535, "step": 3737 }, { "epoch": 0.5974586430112683, "grad_norm": 1.2603941396657345, "learning_rate": 1.680661500323909e-05, "loss": 0.6443489193916321, "step": 3738 }, { "epoch": 0.5976184767841445, "grad_norm": 1.6596166000501407, "learning_rate": 1.6804678758947333e-05, "loss": 0.6309236288070679, "step": 3739 }, { "epoch": 0.5977783105570207, "grad_norm": 1.3137424791167978, "learning_rate": 1.6802742039440974e-05, "loss": 0.5729854702949524, "step": 3740 }, { "epoch": 0.5979381443298969, "grad_norm": 1.2730776986911856, "learning_rate": 1.6800804844855264e-05, "loss": 0.6938658952713013, "step": 3741 }, { "epoch": 0.5980979781027731, "grad_norm": 1.1186943458718925, "learning_rate": 1.6798867175325492e-05, "loss": 0.675932765007019, "step": 3742 }, { "epoch": 0.5982578118756493, "grad_norm": 1.362664040106989, "learning_rate": 1.6796929030986977e-05, "loss": 0.6675488948822021, "step": 3743 }, { "epoch": 0.5984176456485255, "grad_norm": 1.2707495723549194, "learning_rate": 1.679499041197507e-05, "loss": 0.6772403717041016, "step": 3744 }, { "epoch": 0.5985774794214017, "grad_norm": 1.5142898252073065, "learning_rate": 1.6793051318425163e-05, "loss": 0.6200945377349854, "step": 3745 }, { "epoch": 0.5987373131942779, "grad_norm": 1.3755266552909837, "learning_rate": 1.6791111750472675e-05, "loss": 0.6830241680145264, "step": 3746 }, { "epoch": 0.5988971469671541, "grad_norm": 1.3082536119723926, "learning_rate": 1.6789171708253052e-05, "loss": 0.6384872198104858, "step": 3747 }, { "epoch": 0.5990569807400303, "grad_norm": 1.3741571110825257, "learning_rate": 1.6787231191901788e-05, "loss": 0.6847600936889648, "step": 3748 }, { "epoch": 0.5992168145129065, "grad_norm": 2.5718466299844813, "learning_rate": 1.6785290201554393e-05, "loss": 0.7105522155761719, "step": 3749 }, { "epoch": 0.5993766482857827, "grad_norm": 1.6207097824060466, "learning_rate": 1.6783348737346426e-05, "loss": 0.6761798858642578, "step": 3750 }, { "epoch": 0.599536482058659, "grad_norm": 1.5341901383701342, "learning_rate": 1.6781406799413468e-05, "loss": 0.753601610660553, "step": 3751 }, { "epoch": 0.5996963158315352, "grad_norm": 1.1807646085423757, "learning_rate": 1.6779464387891138e-05, "loss": 0.6838303804397583, "step": 3752 }, { "epoch": 0.5998561496044114, "grad_norm": 1.2312702937022646, "learning_rate": 1.677752150291509e-05, "loss": 0.5909677147865295, "step": 3753 }, { "epoch": 0.6000159833772877, "grad_norm": 1.4174733701575792, "learning_rate": 1.6775578144621e-05, "loss": 0.7807401418685913, "step": 3754 }, { "epoch": 0.6001758171501639, "grad_norm": 1.3722171718477085, "learning_rate": 1.6773634313144595e-05, "loss": 0.6960315704345703, "step": 3755 }, { "epoch": 0.6003356509230401, "grad_norm": 1.619062675152664, "learning_rate": 1.677169000862162e-05, "loss": 0.6827824115753174, "step": 3756 }, { "epoch": 0.6004954846959163, "grad_norm": 1.204586437069132, "learning_rate": 1.676974523118786e-05, "loss": 0.7344492673873901, "step": 3757 }, { "epoch": 0.6006553184687925, "grad_norm": 1.3496787924230078, "learning_rate": 1.676779998097913e-05, "loss": 0.7078438997268677, "step": 3758 }, { "epoch": 0.6008151522416687, "grad_norm": 1.3281109970330414, "learning_rate": 1.676585425813128e-05, "loss": 0.5310760736465454, "step": 3759 }, { "epoch": 0.6009749860145449, "grad_norm": 1.4224426250876387, "learning_rate": 1.6763908062780194e-05, "loss": 0.7241216897964478, "step": 3760 }, { "epoch": 0.6011348197874211, "grad_norm": 1.7140599295986194, "learning_rate": 1.6761961395061784e-05, "loss": 0.8693035840988159, "step": 3761 }, { "epoch": 0.6012946535602973, "grad_norm": 1.1708258840446046, "learning_rate": 1.6760014255112003e-05, "loss": 0.5267365574836731, "step": 3762 }, { "epoch": 0.6014544873331735, "grad_norm": 1.2930375879331855, "learning_rate": 1.675806664306683e-05, "loss": 0.5453076362609863, "step": 3763 }, { "epoch": 0.6016143211060497, "grad_norm": 1.233771739493079, "learning_rate": 1.6756118559062273e-05, "loss": 0.5413586497306824, "step": 3764 }, { "epoch": 0.6017741548789259, "grad_norm": 1.4838730584368993, "learning_rate": 1.675417000323439e-05, "loss": 0.767230749130249, "step": 3765 }, { "epoch": 0.6019339886518021, "grad_norm": 1.476110785473587, "learning_rate": 1.6752220975719254e-05, "loss": 0.783753514289856, "step": 3766 }, { "epoch": 0.6020938224246783, "grad_norm": 1.2219878796994774, "learning_rate": 1.675027147665298e-05, "loss": 0.753368616104126, "step": 3767 }, { "epoch": 0.6022536561975546, "grad_norm": 1.2065445143984639, "learning_rate": 1.6748321506171718e-05, "loss": 0.6432809829711914, "step": 3768 }, { "epoch": 0.6024134899704308, "grad_norm": 1.416984694011405, "learning_rate": 1.674637106441164e-05, "loss": 0.6199855208396912, "step": 3769 }, { "epoch": 0.602573323743307, "grad_norm": 1.2504783898193128, "learning_rate": 1.6744420151508966e-05, "loss": 0.7174891233444214, "step": 3770 }, { "epoch": 0.6027331575161832, "grad_norm": 1.5054764057730856, "learning_rate": 1.6742468767599936e-05, "loss": 0.675035834312439, "step": 3771 }, { "epoch": 0.6028929912890594, "grad_norm": 1.1917146416857227, "learning_rate": 1.6740516912820824e-05, "loss": 0.5725709795951843, "step": 3772 }, { "epoch": 0.6030528250619356, "grad_norm": 1.3012482348690684, "learning_rate": 1.673856458730795e-05, "loss": 0.7212632298469543, "step": 3773 }, { "epoch": 0.6032126588348118, "grad_norm": 1.5439231222117953, "learning_rate": 1.673661179119765e-05, "loss": 0.6880491971969604, "step": 3774 }, { "epoch": 0.603372492607688, "grad_norm": 1.6148692995709948, "learning_rate": 1.6734658524626298e-05, "loss": 0.6667518019676208, "step": 3775 }, { "epoch": 0.6035323263805642, "grad_norm": 1.2686179968205762, "learning_rate": 1.673270478773031e-05, "loss": 0.517200231552124, "step": 3776 }, { "epoch": 0.6036921601534404, "grad_norm": 1.4124196113770286, "learning_rate": 1.6730750580646133e-05, "loss": 0.4878992736339569, "step": 3777 }, { "epoch": 0.6038519939263166, "grad_norm": 1.4454156429963045, "learning_rate": 1.672879590351023e-05, "loss": 0.6562104821205139, "step": 3778 }, { "epoch": 0.6040118276991928, "grad_norm": 1.2634096055195059, "learning_rate": 1.6726840756459108e-05, "loss": 0.547622799873352, "step": 3779 }, { "epoch": 0.604171661472069, "grad_norm": 1.176994769724668, "learning_rate": 1.6724885139629318e-05, "loss": 0.7241042256355286, "step": 3780 }, { "epoch": 0.6043314952449452, "grad_norm": 1.1879099288055808, "learning_rate": 1.672292905315743e-05, "loss": 0.6696019172668457, "step": 3781 }, { "epoch": 0.6044913290178214, "grad_norm": 1.6879739088945072, "learning_rate": 1.6720972497180047e-05, "loss": 0.7607624530792236, "step": 3782 }, { "epoch": 0.6046511627906976, "grad_norm": 1.229347870004841, "learning_rate": 1.671901547183381e-05, "loss": 0.5755773782730103, "step": 3783 }, { "epoch": 0.6048109965635738, "grad_norm": 1.3243020581373282, "learning_rate": 1.6717057977255388e-05, "loss": 0.6851277947425842, "step": 3784 }, { "epoch": 0.60497083033645, "grad_norm": 1.3332015053815762, "learning_rate": 1.6715100013581487e-05, "loss": 0.585755467414856, "step": 3785 }, { "epoch": 0.6051306641093263, "grad_norm": 1.4467490845555775, "learning_rate": 1.6713141580948846e-05, "loss": 0.6114035844802856, "step": 3786 }, { "epoch": 0.6052904978822025, "grad_norm": 1.3271006124860592, "learning_rate": 1.6711182679494232e-05, "loss": 0.6501030921936035, "step": 3787 }, { "epoch": 0.6054503316550787, "grad_norm": 1.2303433461249411, "learning_rate": 1.6709223309354453e-05, "loss": 0.6817241907119751, "step": 3788 }, { "epoch": 0.6056101654279549, "grad_norm": 1.2068880552595944, "learning_rate": 1.670726347066634e-05, "loss": 0.6296716928482056, "step": 3789 }, { "epoch": 0.6057699992008312, "grad_norm": 1.5215892773761763, "learning_rate": 1.670530316356676e-05, "loss": 0.6385477185249329, "step": 3790 }, { "epoch": 0.6059298329737074, "grad_norm": 1.5805633355220303, "learning_rate": 1.670334238819262e-05, "loss": 0.5688613057136536, "step": 3791 }, { "epoch": 0.6060896667465836, "grad_norm": 1.3807257783976363, "learning_rate": 1.6701381144680848e-05, "loss": 0.7081964612007141, "step": 3792 }, { "epoch": 0.6062495005194598, "grad_norm": 1.4130573530081987, "learning_rate": 1.6699419433168407e-05, "loss": 0.5906977653503418, "step": 3793 }, { "epoch": 0.606409334292336, "grad_norm": 1.3257951377392299, "learning_rate": 1.6697457253792304e-05, "loss": 0.5990844368934631, "step": 3794 }, { "epoch": 0.6065691680652122, "grad_norm": 1.3291687088945794, "learning_rate": 1.6695494606689568e-05, "loss": 0.6848385334014893, "step": 3795 }, { "epoch": 0.6067290018380884, "grad_norm": 1.4145190879806457, "learning_rate": 1.669353149199726e-05, "loss": 0.5783364772796631, "step": 3796 }, { "epoch": 0.6068888356109646, "grad_norm": 1.319718721249917, "learning_rate": 1.6691567909852483e-05, "loss": 0.6574451923370361, "step": 3797 }, { "epoch": 0.6070486693838408, "grad_norm": 1.3543503127843022, "learning_rate": 1.6689603860392366e-05, "loss": 0.5611411333084106, "step": 3798 }, { "epoch": 0.607208503156717, "grad_norm": 1.3482126881362597, "learning_rate": 1.6687639343754064e-05, "loss": 0.6407443284988403, "step": 3799 }, { "epoch": 0.6073683369295932, "grad_norm": 1.1668484258125325, "learning_rate": 1.6685674360074777e-05, "loss": 0.596869945526123, "step": 3800 }, { "epoch": 0.6075281707024694, "grad_norm": 1.5196023520142303, "learning_rate": 1.6683708909491733e-05, "loss": 0.5993518829345703, "step": 3801 }, { "epoch": 0.6076880044753457, "grad_norm": 1.0908282136222125, "learning_rate": 1.6681742992142188e-05, "loss": 0.5122400522232056, "step": 3802 }, { "epoch": 0.6078478382482219, "grad_norm": 1.348016526487344, "learning_rate": 1.6679776608163442e-05, "loss": 0.6519409418106079, "step": 3803 }, { "epoch": 0.6080076720210981, "grad_norm": 1.3240824655119692, "learning_rate": 1.6677809757692814e-05, "loss": 0.5764514207839966, "step": 3804 }, { "epoch": 0.6081675057939743, "grad_norm": 1.4186082687923156, "learning_rate": 1.667584244086766e-05, "loss": 0.5857013463973999, "step": 3805 }, { "epoch": 0.6083273395668505, "grad_norm": 1.2367593420241085, "learning_rate": 1.6673874657825382e-05, "loss": 0.6708394289016724, "step": 3806 }, { "epoch": 0.6084871733397267, "grad_norm": 1.4307345766281843, "learning_rate": 1.6671906408703394e-05, "loss": 0.5878826379776001, "step": 3807 }, { "epoch": 0.6086470071126029, "grad_norm": 1.5936202560871897, "learning_rate": 1.6669937693639145e-05, "loss": 0.734362006187439, "step": 3808 }, { "epoch": 0.6088068408854791, "grad_norm": 1.149630486223339, "learning_rate": 1.6667968512770137e-05, "loss": 0.644942581653595, "step": 3809 }, { "epoch": 0.6089666746583553, "grad_norm": 1.5369866924238051, "learning_rate": 1.6665998866233887e-05, "loss": 0.6890354156494141, "step": 3810 }, { "epoch": 0.6091265084312315, "grad_norm": 1.226724682262707, "learning_rate": 1.666402875416794e-05, "loss": 0.5285967588424683, "step": 3811 }, { "epoch": 0.6092863422041077, "grad_norm": 1.250311443445165, "learning_rate": 1.666205817670989e-05, "loss": 0.6096465587615967, "step": 3812 }, { "epoch": 0.6094461759769839, "grad_norm": 1.3426161125098957, "learning_rate": 1.666008713399735e-05, "loss": 0.6470063924789429, "step": 3813 }, { "epoch": 0.6096060097498601, "grad_norm": 1.2210060839232395, "learning_rate": 1.6658115626167975e-05, "loss": 0.5127000212669373, "step": 3814 }, { "epoch": 0.6097658435227363, "grad_norm": 1.3491853251878623, "learning_rate": 1.6656143653359446e-05, "loss": 0.7210760712623596, "step": 3815 }, { "epoch": 0.6099256772956125, "grad_norm": 1.582384500625693, "learning_rate": 1.6654171215709477e-05, "loss": 0.76228928565979, "step": 3816 }, { "epoch": 0.6100855110684887, "grad_norm": 1.3119935772336404, "learning_rate": 1.6652198313355818e-05, "loss": 0.6093920469284058, "step": 3817 }, { "epoch": 0.610245344841365, "grad_norm": 1.1858639911075641, "learning_rate": 1.665022494643625e-05, "loss": 0.5687311887741089, "step": 3818 }, { "epoch": 0.6104051786142412, "grad_norm": 1.2505623834136248, "learning_rate": 1.6648251115088586e-05, "loss": 0.638434648513794, "step": 3819 }, { "epoch": 0.6105650123871174, "grad_norm": 1.1526026108415834, "learning_rate": 1.664627681945067e-05, "loss": 0.6570332646369934, "step": 3820 }, { "epoch": 0.6107248461599936, "grad_norm": 1.4129075004479807, "learning_rate": 1.6644302059660377e-05, "loss": 0.571868896484375, "step": 3821 }, { "epoch": 0.6108846799328698, "grad_norm": 1.215323861130641, "learning_rate": 1.6642326835855622e-05, "loss": 0.7022189497947693, "step": 3822 }, { "epoch": 0.611044513705746, "grad_norm": 1.4597085818204723, "learning_rate": 1.664035114817435e-05, "loss": 0.6421900391578674, "step": 3823 }, { "epoch": 0.6112043474786222, "grad_norm": 1.4359418030716236, "learning_rate": 1.6638374996754527e-05, "loss": 0.6077640652656555, "step": 3824 }, { "epoch": 0.6113641812514985, "grad_norm": 1.2478346557524873, "learning_rate": 1.6636398381734168e-05, "loss": 0.6439847946166992, "step": 3825 }, { "epoch": 0.6115240150243747, "grad_norm": 1.200795460593323, "learning_rate": 1.663442130325131e-05, "loss": 0.5980304479598999, "step": 3826 }, { "epoch": 0.6116838487972509, "grad_norm": 1.336814240076125, "learning_rate": 1.6632443761444027e-05, "loss": 0.7316504120826721, "step": 3827 }, { "epoch": 0.6118436825701271, "grad_norm": 1.2160668300825328, "learning_rate": 1.663046575645042e-05, "loss": 0.5920047760009766, "step": 3828 }, { "epoch": 0.6120035163430033, "grad_norm": 1.1240473405520484, "learning_rate": 1.6628487288408626e-05, "loss": 0.5072997212409973, "step": 3829 }, { "epoch": 0.6121633501158795, "grad_norm": 1.2090429324911824, "learning_rate": 1.6626508357456817e-05, "loss": 0.5752838253974915, "step": 3830 }, { "epoch": 0.6123231838887557, "grad_norm": 1.4037899258298012, "learning_rate": 1.6624528963733196e-05, "loss": 0.5121587514877319, "step": 3831 }, { "epoch": 0.6124830176616319, "grad_norm": 1.2863202524145503, "learning_rate": 1.662254910737599e-05, "loss": 0.6197426319122314, "step": 3832 }, { "epoch": 0.6126428514345081, "grad_norm": 1.655370680311032, "learning_rate": 1.6620568788523476e-05, "loss": 0.7231720089912415, "step": 3833 }, { "epoch": 0.6128026852073843, "grad_norm": 1.4639852533289432, "learning_rate": 1.661858800731394e-05, "loss": 0.8146006464958191, "step": 3834 }, { "epoch": 0.6129625189802606, "grad_norm": 1.2891625044918786, "learning_rate": 1.661660676388572e-05, "loss": 0.6136653423309326, "step": 3835 }, { "epoch": 0.6131223527531368, "grad_norm": 1.2861160052545844, "learning_rate": 1.661462505837718e-05, "loss": 0.4442022442817688, "step": 3836 }, { "epoch": 0.613282186526013, "grad_norm": 1.4808777090430851, "learning_rate": 1.661264289092671e-05, "loss": 0.6358233094215393, "step": 3837 }, { "epoch": 0.6134420202988892, "grad_norm": 1.2279950036896743, "learning_rate": 1.661066026167274e-05, "loss": 0.7330678701400757, "step": 3838 }, { "epoch": 0.6136018540717654, "grad_norm": 1.3329300911081066, "learning_rate": 1.6608677170753734e-05, "loss": 0.6647059917449951, "step": 3839 }, { "epoch": 0.6137616878446416, "grad_norm": 1.3389899484975785, "learning_rate": 1.6606693618308175e-05, "loss": 0.7092499136924744, "step": 3840 }, { "epoch": 0.6139215216175178, "grad_norm": 1.4189760526847837, "learning_rate": 1.6604709604474595e-05, "loss": 0.7394460439682007, "step": 3841 }, { "epoch": 0.614081355390394, "grad_norm": 1.3737227996332968, "learning_rate": 1.660272512939155e-05, "loss": 0.5576767921447754, "step": 3842 }, { "epoch": 0.6142411891632702, "grad_norm": 1.39621198432331, "learning_rate": 1.6600740193197625e-05, "loss": 0.5647201538085938, "step": 3843 }, { "epoch": 0.6144010229361464, "grad_norm": 1.174430781475388, "learning_rate": 1.6598754796031442e-05, "loss": 0.573197603225708, "step": 3844 }, { "epoch": 0.6145608567090226, "grad_norm": 1.4249591296244541, "learning_rate": 1.6596768938031654e-05, "loss": 0.522570013999939, "step": 3845 }, { "epoch": 0.6147206904818988, "grad_norm": 1.1890350760175048, "learning_rate": 1.659478261933695e-05, "loss": 0.5108821392059326, "step": 3846 }, { "epoch": 0.614880524254775, "grad_norm": 1.4295177463893938, "learning_rate": 1.659279584008604e-05, "loss": 0.6870665550231934, "step": 3847 }, { "epoch": 0.6150403580276512, "grad_norm": 1.4639428866332465, "learning_rate": 1.659080860041768e-05, "loss": 0.758454442024231, "step": 3848 }, { "epoch": 0.6152001918005274, "grad_norm": 1.4620503988819058, "learning_rate": 1.658882090047065e-05, "loss": 0.6254711151123047, "step": 3849 }, { "epoch": 0.6153600255734036, "grad_norm": 1.4438192706018644, "learning_rate": 1.6586832740383763e-05, "loss": 0.7769038677215576, "step": 3850 }, { "epoch": 0.6155198593462798, "grad_norm": 1.3928807897998021, "learning_rate": 1.6584844120295868e-05, "loss": 0.6896807551383972, "step": 3851 }, { "epoch": 0.615679693119156, "grad_norm": 1.470597795538774, "learning_rate": 1.6582855040345836e-05, "loss": 0.7165013551712036, "step": 3852 }, { "epoch": 0.6158395268920323, "grad_norm": 1.3789609372190024, "learning_rate": 1.6580865500672586e-05, "loss": 0.5850635766983032, "step": 3853 }, { "epoch": 0.6159993606649085, "grad_norm": 1.2831051275652645, "learning_rate": 1.6578875501415058e-05, "loss": 0.6492139101028442, "step": 3854 }, { "epoch": 0.6161591944377847, "grad_norm": 1.2962722850894983, "learning_rate": 1.6576885042712222e-05, "loss": 0.5909055471420288, "step": 3855 }, { "epoch": 0.6163190282106609, "grad_norm": 1.4644487740838679, "learning_rate": 1.6574894124703087e-05, "loss": 0.6547709703445435, "step": 3856 }, { "epoch": 0.6164788619835371, "grad_norm": 1.3580610925313272, "learning_rate": 1.6572902747526692e-05, "loss": 0.7291073799133301, "step": 3857 }, { "epoch": 0.6166386957564133, "grad_norm": 1.4133215272970527, "learning_rate": 1.6570910911322113e-05, "loss": 0.8350358605384827, "step": 3858 }, { "epoch": 0.6167985295292895, "grad_norm": 1.4622756953088845, "learning_rate": 1.6568918616228442e-05, "loss": 0.7269452810287476, "step": 3859 }, { "epoch": 0.6169583633021658, "grad_norm": 1.2145930468263808, "learning_rate": 1.656692586238482e-05, "loss": 0.6536587476730347, "step": 3860 }, { "epoch": 0.617118197075042, "grad_norm": 1.3946495026309054, "learning_rate": 1.6564932649930415e-05, "loss": 0.7160556316375732, "step": 3861 }, { "epoch": 0.6172780308479182, "grad_norm": 1.2796230881697381, "learning_rate": 1.6562938979004425e-05, "loss": 0.5529543161392212, "step": 3862 }, { "epoch": 0.6174378646207944, "grad_norm": 1.3458473178955617, "learning_rate": 1.6560944849746078e-05, "loss": 0.7229206562042236, "step": 3863 }, { "epoch": 0.6175976983936706, "grad_norm": 1.2907510265903852, "learning_rate": 1.6558950262294643e-05, "loss": 0.6945812702178955, "step": 3864 }, { "epoch": 0.6177575321665468, "grad_norm": 1.905331943815508, "learning_rate": 1.6556955216789407e-05, "loss": 0.5383328199386597, "step": 3865 }, { "epoch": 0.617917365939423, "grad_norm": 1.7813725876619062, "learning_rate": 1.65549597133697e-05, "loss": 0.6058598756790161, "step": 3866 }, { "epoch": 0.6180771997122992, "grad_norm": 1.1896814604021821, "learning_rate": 1.655296375217488e-05, "loss": 0.5221691131591797, "step": 3867 }, { "epoch": 0.6182370334851754, "grad_norm": 1.4257935416185583, "learning_rate": 1.6550967333344346e-05, "loss": 0.6778889894485474, "step": 3868 }, { "epoch": 0.6183968672580517, "grad_norm": 1.259548568756002, "learning_rate": 1.654897045701751e-05, "loss": 0.6419001817703247, "step": 3869 }, { "epoch": 0.6185567010309279, "grad_norm": 1.2898707592282588, "learning_rate": 1.6546973123333834e-05, "loss": 0.5491964817047119, "step": 3870 }, { "epoch": 0.6187165348038041, "grad_norm": 1.4673844325617598, "learning_rate": 1.65449753324328e-05, "loss": 0.7620891332626343, "step": 3871 }, { "epoch": 0.6188763685766803, "grad_norm": 1.192789005439843, "learning_rate": 1.654297708445393e-05, "loss": 0.624184250831604, "step": 3872 }, { "epoch": 0.6190362023495565, "grad_norm": 1.2294674210580168, "learning_rate": 1.654097837953677e-05, "loss": 0.6824431419372559, "step": 3873 }, { "epoch": 0.6191960361224327, "grad_norm": 1.2940957072902541, "learning_rate": 1.653897921782091e-05, "loss": 0.6912523508071899, "step": 3874 }, { "epoch": 0.6193558698953089, "grad_norm": 1.3037601126739649, "learning_rate": 1.6536979599445956e-05, "loss": 0.7970359325408936, "step": 3875 }, { "epoch": 0.6195157036681851, "grad_norm": 1.158931097290756, "learning_rate": 1.6534979524551556e-05, "loss": 0.5978498458862305, "step": 3876 }, { "epoch": 0.6196755374410613, "grad_norm": 1.1510336968677393, "learning_rate": 1.6532978993277393e-05, "loss": 0.4901534914970398, "step": 3877 }, { "epoch": 0.6198353712139375, "grad_norm": 1.3647833292237819, "learning_rate": 1.6530978005763176e-05, "loss": 0.7960721254348755, "step": 3878 }, { "epoch": 0.6199952049868137, "grad_norm": 1.512957476677514, "learning_rate": 1.6528976562148643e-05, "loss": 0.6161859631538391, "step": 3879 }, { "epoch": 0.6201550387596899, "grad_norm": 1.3164894731204275, "learning_rate": 1.6526974662573568e-05, "loss": 0.7159808874130249, "step": 3880 }, { "epoch": 0.6203148725325661, "grad_norm": 1.200159424888714, "learning_rate": 1.652497230717776e-05, "loss": 0.5720070600509644, "step": 3881 }, { "epoch": 0.6204747063054423, "grad_norm": 1.3716882066704539, "learning_rate": 1.6522969496101057e-05, "loss": 0.6445578336715698, "step": 3882 }, { "epoch": 0.6206345400783185, "grad_norm": 1.3513676406372752, "learning_rate": 1.6520966229483323e-05, "loss": 0.7300676703453064, "step": 3883 }, { "epoch": 0.6207943738511947, "grad_norm": 1.4048183475611873, "learning_rate": 1.651896250746446e-05, "loss": 0.6362603902816772, "step": 3884 }, { "epoch": 0.6209542076240709, "grad_norm": 1.4965785541946437, "learning_rate": 1.6516958330184407e-05, "loss": 0.6084654331207275, "step": 3885 }, { "epoch": 0.6211140413969471, "grad_norm": 1.2114276094777179, "learning_rate": 1.651495369778312e-05, "loss": 0.5354351997375488, "step": 3886 }, { "epoch": 0.6212738751698234, "grad_norm": 1.1936242369290802, "learning_rate": 1.6512948610400606e-05, "loss": 0.6561100482940674, "step": 3887 }, { "epoch": 0.6214337089426996, "grad_norm": 1.1278578984147878, "learning_rate": 1.6510943068176885e-05, "loss": 0.483451247215271, "step": 3888 }, { "epoch": 0.6215935427155758, "grad_norm": 1.2902088252203914, "learning_rate": 1.6508937071252017e-05, "loss": 0.760006844997406, "step": 3889 }, { "epoch": 0.621753376488452, "grad_norm": 1.5498720415145293, "learning_rate": 1.6506930619766094e-05, "loss": 0.6354636549949646, "step": 3890 }, { "epoch": 0.6219132102613282, "grad_norm": 1.3098840764984916, "learning_rate": 1.6504923713859244e-05, "loss": 0.7785017490386963, "step": 3891 }, { "epoch": 0.6220730440342044, "grad_norm": 1.3060671557322103, "learning_rate": 1.650291635367162e-05, "loss": 0.5643576979637146, "step": 3892 }, { "epoch": 0.6222328778070806, "grad_norm": 1.018109476448254, "learning_rate": 1.6500908539343403e-05, "loss": 0.5747264623641968, "step": 3893 }, { "epoch": 0.6223927115799568, "grad_norm": 1.3804266217276788, "learning_rate": 1.649890027101482e-05, "loss": 0.7095305323600769, "step": 3894 }, { "epoch": 0.6225525453528331, "grad_norm": 1.3639829191501305, "learning_rate": 1.6496891548826118e-05, "loss": 0.6227153539657593, "step": 3895 }, { "epoch": 0.6227123791257093, "grad_norm": 1.252747616515381, "learning_rate": 1.649488237291758e-05, "loss": 0.7283186316490173, "step": 3896 }, { "epoch": 0.6228722128985855, "grad_norm": 1.0915868092221561, "learning_rate": 1.6492872743429518e-05, "loss": 0.6985980272293091, "step": 3897 }, { "epoch": 0.6230320466714617, "grad_norm": 1.3795780074041097, "learning_rate": 1.6490862660502277e-05, "loss": 0.7360734939575195, "step": 3898 }, { "epoch": 0.6231918804443379, "grad_norm": 1.1588044782270248, "learning_rate": 1.6488852124276236e-05, "loss": 0.6567797660827637, "step": 3899 }, { "epoch": 0.6233517142172141, "grad_norm": 1.2436042170117922, "learning_rate": 1.6486841134891804e-05, "loss": 0.6353428363800049, "step": 3900 }, { "epoch": 0.6235115479900903, "grad_norm": 1.1839985472425365, "learning_rate": 1.6484829692489422e-05, "loss": 0.4875122308731079, "step": 3901 }, { "epoch": 0.6236713817629665, "grad_norm": 1.4189748124749926, "learning_rate": 1.648281779720956e-05, "loss": 0.6910936832427979, "step": 3902 }, { "epoch": 0.6238312155358428, "grad_norm": 1.321253434023515, "learning_rate": 1.648080544919272e-05, "loss": 0.6205313205718994, "step": 3903 }, { "epoch": 0.623991049308719, "grad_norm": 1.3556510677175746, "learning_rate": 1.6478792648579437e-05, "loss": 0.6280686855316162, "step": 3904 }, { "epoch": 0.6241508830815952, "grad_norm": 1.5195114836893202, "learning_rate": 1.6476779395510282e-05, "loss": 0.6298943758010864, "step": 3905 }, { "epoch": 0.6243107168544714, "grad_norm": 1.9317861308091853, "learning_rate": 1.647476569012585e-05, "loss": 0.7369906902313232, "step": 3906 }, { "epoch": 0.6244705506273476, "grad_norm": 1.3270599547881141, "learning_rate": 1.6472751532566777e-05, "loss": 0.6009336709976196, "step": 3907 }, { "epoch": 0.6246303844002238, "grad_norm": 1.2003105325534105, "learning_rate": 1.6470736922973716e-05, "loss": 0.7454385161399841, "step": 3908 }, { "epoch": 0.6247902181731, "grad_norm": 1.8295037185432523, "learning_rate": 1.6468721861487366e-05, "loss": 0.558091402053833, "step": 3909 }, { "epoch": 0.6249500519459762, "grad_norm": 1.1898134006100456, "learning_rate": 1.646670634824845e-05, "loss": 0.6099045276641846, "step": 3910 }, { "epoch": 0.6251098857188524, "grad_norm": 1.2369180564567934, "learning_rate": 1.6464690383397725e-05, "loss": 0.6764594912528992, "step": 3911 }, { "epoch": 0.6252697194917286, "grad_norm": 1.2086305121530987, "learning_rate": 1.6462673967075978e-05, "loss": 0.6685458421707153, "step": 3912 }, { "epoch": 0.6254295532646048, "grad_norm": 1.3756875665995516, "learning_rate": 1.6460657099424028e-05, "loss": 0.8048404455184937, "step": 3913 }, { "epoch": 0.625589387037481, "grad_norm": 1.419650479324874, "learning_rate": 1.6458639780582727e-05, "loss": 0.7133411169052124, "step": 3914 }, { "epoch": 0.6257492208103572, "grad_norm": 1.3186424377732167, "learning_rate": 1.645662201069296e-05, "loss": 0.6210123896598816, "step": 3915 }, { "epoch": 0.6259090545832334, "grad_norm": 1.3029692236306492, "learning_rate": 1.645460378989563e-05, "loss": 0.6839264631271362, "step": 3916 }, { "epoch": 0.6260688883561096, "grad_norm": 1.2498707706894936, "learning_rate": 1.6452585118331693e-05, "loss": 0.6276473999023438, "step": 3917 }, { "epoch": 0.6262287221289858, "grad_norm": 1.3726916822910658, "learning_rate": 1.6450565996142126e-05, "loss": 0.6038564443588257, "step": 3918 }, { "epoch": 0.626388555901862, "grad_norm": 1.3981685398983041, "learning_rate": 1.644854642346793e-05, "loss": 0.7610451579093933, "step": 3919 }, { "epoch": 0.6265483896747382, "grad_norm": 1.1559445205484125, "learning_rate": 1.644652640045015e-05, "loss": 0.5812621116638184, "step": 3920 }, { "epoch": 0.6267082234476145, "grad_norm": 1.2725783899742849, "learning_rate": 1.644450592722986e-05, "loss": 0.6792014241218567, "step": 3921 }, { "epoch": 0.6268680572204907, "grad_norm": 1.3477942747743756, "learning_rate": 1.6442485003948156e-05, "loss": 0.7280830144882202, "step": 3922 }, { "epoch": 0.6270278909933669, "grad_norm": 1.333373080124916, "learning_rate": 1.6440463630746176e-05, "loss": 0.7336154580116272, "step": 3923 }, { "epoch": 0.6271877247662431, "grad_norm": 1.2878732342625825, "learning_rate": 1.6438441807765083e-05, "loss": 0.6237192749977112, "step": 3924 }, { "epoch": 0.6273475585391193, "grad_norm": 1.2562588012824984, "learning_rate": 1.6436419535146078e-05, "loss": 0.6311440467834473, "step": 3925 }, { "epoch": 0.6275073923119955, "grad_norm": 1.1889268254980543, "learning_rate": 1.6434396813030384e-05, "loss": 0.6888768672943115, "step": 3926 }, { "epoch": 0.6276672260848717, "grad_norm": 1.294080599014309, "learning_rate": 1.6432373641559266e-05, "loss": 0.6041181087493896, "step": 3927 }, { "epoch": 0.6278270598577479, "grad_norm": 1.0809144802474098, "learning_rate": 1.643035002087401e-05, "loss": 0.471838116645813, "step": 3928 }, { "epoch": 0.6279868936306241, "grad_norm": 1.5475162229143826, "learning_rate": 1.642832595111594e-05, "loss": 0.5739034414291382, "step": 3929 }, { "epoch": 0.6281467274035003, "grad_norm": 1.3986593041731756, "learning_rate": 1.6426301432426418e-05, "loss": 0.7082586288452148, "step": 3930 }, { "epoch": 0.6283065611763766, "grad_norm": 1.241169879603667, "learning_rate": 1.6424276464946816e-05, "loss": 0.5908615589141846, "step": 3931 }, { "epoch": 0.6284663949492528, "grad_norm": 1.2178953131153043, "learning_rate": 1.642225104881856e-05, "loss": 0.5467467308044434, "step": 3932 }, { "epoch": 0.628626228722129, "grad_norm": 1.1723063906763391, "learning_rate": 1.642022518418309e-05, "loss": 0.43145203590393066, "step": 3933 }, { "epoch": 0.6287860624950052, "grad_norm": 1.34933817793539, "learning_rate": 1.6418198871181897e-05, "loss": 0.7393187284469604, "step": 3934 }, { "epoch": 0.6289458962678814, "grad_norm": 1.1216298367803545, "learning_rate": 1.641617210995648e-05, "loss": 0.5506287813186646, "step": 3935 }, { "epoch": 0.6291057300407576, "grad_norm": 1.4220923114585753, "learning_rate": 1.6414144900648384e-05, "loss": 0.6665002107620239, "step": 3936 }, { "epoch": 0.6292655638136339, "grad_norm": 1.3875874422390395, "learning_rate": 1.6412117243399185e-05, "loss": 0.7186019420623779, "step": 3937 }, { "epoch": 0.6294253975865101, "grad_norm": 1.1871651080569414, "learning_rate": 1.6410089138350486e-05, "loss": 0.5654632449150085, "step": 3938 }, { "epoch": 0.6295852313593863, "grad_norm": 1.6177708563166224, "learning_rate": 1.640806058564392e-05, "loss": 0.7628018856048584, "step": 3939 }, { "epoch": 0.6297450651322625, "grad_norm": 1.6127010059480051, "learning_rate": 1.640603158542116e-05, "loss": 0.7092567682266235, "step": 3940 }, { "epoch": 0.6299048989051387, "grad_norm": 1.1701773312954344, "learning_rate": 1.64040021378239e-05, "loss": 0.5999500155448914, "step": 3941 }, { "epoch": 0.6300647326780149, "grad_norm": 1.4729455524486088, "learning_rate": 1.6401972242993864e-05, "loss": 0.7112864851951599, "step": 3942 }, { "epoch": 0.6302245664508911, "grad_norm": 1.4160133157101824, "learning_rate": 1.6399941901072823e-05, "loss": 0.5724179744720459, "step": 3943 }, { "epoch": 0.6303844002237673, "grad_norm": 1.4635771188482682, "learning_rate": 1.6397911112202566e-05, "loss": 0.6738380193710327, "step": 3944 }, { "epoch": 0.6305442339966435, "grad_norm": 1.3191744849065334, "learning_rate": 1.639587987652491e-05, "loss": 0.6597954034805298, "step": 3945 }, { "epoch": 0.6307040677695197, "grad_norm": 1.7263945028324479, "learning_rate": 1.6393848194181714e-05, "loss": 0.7090487480163574, "step": 3946 }, { "epoch": 0.6308639015423959, "grad_norm": 1.1642111499843903, "learning_rate": 1.6391816065314865e-05, "loss": 0.5380268096923828, "step": 3947 }, { "epoch": 0.6310237353152721, "grad_norm": 1.4168041666716138, "learning_rate": 1.6389783490066277e-05, "loss": 0.6341896653175354, "step": 3948 }, { "epoch": 0.6311835690881483, "grad_norm": 1.3982194771603313, "learning_rate": 1.63877504685779e-05, "loss": 0.5978249311447144, "step": 3949 }, { "epoch": 0.6313434028610245, "grad_norm": 1.2309175307666604, "learning_rate": 1.638571700099171e-05, "loss": 0.6887931823730469, "step": 3950 }, { "epoch": 0.6315032366339007, "grad_norm": 1.2398842167681274, "learning_rate": 1.638368308744972e-05, "loss": 0.5831772089004517, "step": 3951 }, { "epoch": 0.6316630704067769, "grad_norm": 1.265471157092776, "learning_rate": 1.638164872809397e-05, "loss": 0.6379671096801758, "step": 3952 }, { "epoch": 0.6318229041796531, "grad_norm": 1.37762293267975, "learning_rate": 1.637961392306653e-05, "loss": 0.5381253957748413, "step": 3953 }, { "epoch": 0.6319827379525293, "grad_norm": 1.4408874382907508, "learning_rate": 1.637757867250951e-05, "loss": 0.6685711145401001, "step": 3954 }, { "epoch": 0.6321425717254056, "grad_norm": 1.279282834558749, "learning_rate": 1.6375542976565038e-05, "loss": 0.5439578294754028, "step": 3955 }, { "epoch": 0.6323024054982818, "grad_norm": 1.214517903891423, "learning_rate": 1.6373506835375285e-05, "loss": 0.5555940270423889, "step": 3956 }, { "epoch": 0.632462239271158, "grad_norm": 1.2906893468153164, "learning_rate": 1.637147024908244e-05, "loss": 0.5850158333778381, "step": 3957 }, { "epoch": 0.6326220730440342, "grad_norm": 1.395449176480106, "learning_rate": 1.6369433217828742e-05, "loss": 0.6662431955337524, "step": 3958 }, { "epoch": 0.6327819068169104, "grad_norm": 1.2930009720690827, "learning_rate": 1.6367395741756444e-05, "loss": 0.6133049130439758, "step": 3959 }, { "epoch": 0.6329417405897866, "grad_norm": 1.9113539217461695, "learning_rate": 1.6365357821007836e-05, "loss": 0.6035110354423523, "step": 3960 }, { "epoch": 0.6331015743626628, "grad_norm": 1.2005888587444482, "learning_rate": 1.636331945572524e-05, "loss": 0.6447256803512573, "step": 3961 }, { "epoch": 0.633261408135539, "grad_norm": 1.3836918325547412, "learning_rate": 1.6361280646051006e-05, "loss": 0.7048743367195129, "step": 3962 }, { "epoch": 0.6334212419084152, "grad_norm": 1.4590751530387103, "learning_rate": 1.635924139212752e-05, "loss": 0.7548015117645264, "step": 3963 }, { "epoch": 0.6335810756812914, "grad_norm": 1.2370006975762908, "learning_rate": 1.6357201694097194e-05, "loss": 0.6190758347511292, "step": 3964 }, { "epoch": 0.6337409094541676, "grad_norm": 1.133369917807797, "learning_rate": 1.6355161552102474e-05, "loss": 0.6382733583450317, "step": 3965 }, { "epoch": 0.6339007432270439, "grad_norm": 1.334896573905161, "learning_rate": 1.6353120966285837e-05, "loss": 0.6434460878372192, "step": 3966 }, { "epoch": 0.6340605769999201, "grad_norm": 1.405521204145093, "learning_rate": 1.6351079936789792e-05, "loss": 0.7141927480697632, "step": 3967 }, { "epoch": 0.6342204107727963, "grad_norm": 1.2429638779576573, "learning_rate": 1.6349038463756874e-05, "loss": 0.6426455974578857, "step": 3968 }, { "epoch": 0.6343802445456725, "grad_norm": 1.544590955879579, "learning_rate": 1.6346996547329658e-05, "loss": 0.5923434495925903, "step": 3969 }, { "epoch": 0.6345400783185488, "grad_norm": 1.4001472861640336, "learning_rate": 1.6344954187650735e-05, "loss": 0.5050376653671265, "step": 3970 }, { "epoch": 0.634699912091425, "grad_norm": 1.3902329539254732, "learning_rate": 1.6342911384862744e-05, "loss": 0.6963850259780884, "step": 3971 }, { "epoch": 0.6348597458643012, "grad_norm": 1.6053698050060083, "learning_rate": 1.634086813910834e-05, "loss": 0.7690558433532715, "step": 3972 }, { "epoch": 0.6350195796371774, "grad_norm": 1.3415125297242179, "learning_rate": 1.6338824450530224e-05, "loss": 0.6303586959838867, "step": 3973 }, { "epoch": 0.6351794134100536, "grad_norm": 1.2467192790887283, "learning_rate": 1.6336780319271114e-05, "loss": 0.5640480518341064, "step": 3974 }, { "epoch": 0.6353392471829298, "grad_norm": 1.6487849495356774, "learning_rate": 1.6334735745473765e-05, "loss": 0.676852285861969, "step": 3975 }, { "epoch": 0.635499080955806, "grad_norm": 1.29364649520069, "learning_rate": 1.6332690729280968e-05, "loss": 0.5873080492019653, "step": 3976 }, { "epoch": 0.6356589147286822, "grad_norm": 1.3639960691907307, "learning_rate": 1.6330645270835535e-05, "loss": 0.7187033891677856, "step": 3977 }, { "epoch": 0.6358187485015584, "grad_norm": 1.1666165621883906, "learning_rate": 1.6328599370280313e-05, "loss": 0.6293737292289734, "step": 3978 }, { "epoch": 0.6359785822744346, "grad_norm": 1.379108199201283, "learning_rate": 1.6326553027758186e-05, "loss": 0.5437077283859253, "step": 3979 }, { "epoch": 0.6361384160473108, "grad_norm": 1.3040401154086878, "learning_rate": 1.6324506243412057e-05, "loss": 0.6974930763244629, "step": 3980 }, { "epoch": 0.636298249820187, "grad_norm": 1.233359968718075, "learning_rate": 1.632245901738487e-05, "loss": 0.6286579966545105, "step": 3981 }, { "epoch": 0.6364580835930632, "grad_norm": 1.4533339237465381, "learning_rate": 1.6320411349819597e-05, "loss": 0.7916463613510132, "step": 3982 }, { "epoch": 0.6366179173659394, "grad_norm": 1.500231257007404, "learning_rate": 1.6318363240859234e-05, "loss": 0.6239026784896851, "step": 3983 }, { "epoch": 0.6367777511388156, "grad_norm": 1.1483935486258494, "learning_rate": 1.6316314690646816e-05, "loss": 0.5503276586532593, "step": 3984 }, { "epoch": 0.6369375849116918, "grad_norm": 1.1107131262234267, "learning_rate": 1.6314265699325412e-05, "loss": 0.6538434028625488, "step": 3985 }, { "epoch": 0.637097418684568, "grad_norm": 1.3892824030291093, "learning_rate": 1.6312216267038107e-05, "loss": 0.6829801797866821, "step": 3986 }, { "epoch": 0.6372572524574442, "grad_norm": 1.3419876731760334, "learning_rate": 1.6310166393928036e-05, "loss": 0.6708869934082031, "step": 3987 }, { "epoch": 0.6374170862303205, "grad_norm": 1.4812245108418332, "learning_rate": 1.6308116080138344e-05, "loss": 0.5081787705421448, "step": 3988 }, { "epoch": 0.6375769200031967, "grad_norm": 1.7217358977323947, "learning_rate": 1.630606532581223e-05, "loss": 0.7032453417778015, "step": 3989 }, { "epoch": 0.6377367537760729, "grad_norm": 1.0618443334479097, "learning_rate": 1.63040141310929e-05, "loss": 0.5885857343673706, "step": 3990 }, { "epoch": 0.6378965875489491, "grad_norm": 1.3010066137730056, "learning_rate": 1.630196249612361e-05, "loss": 0.5192384123802185, "step": 3991 }, { "epoch": 0.6380564213218253, "grad_norm": 1.2456911202407273, "learning_rate": 1.6299910421047637e-05, "loss": 0.5713367462158203, "step": 3992 }, { "epoch": 0.6382162550947015, "grad_norm": 1.502013857035483, "learning_rate": 1.629785790600829e-05, "loss": 0.6224677562713623, "step": 3993 }, { "epoch": 0.6383760888675777, "grad_norm": 1.4428993409394026, "learning_rate": 1.6295804951148907e-05, "loss": 0.640007495880127, "step": 3994 }, { "epoch": 0.6385359226404539, "grad_norm": 1.2402528276822222, "learning_rate": 1.629375155661286e-05, "loss": 0.5655752420425415, "step": 3995 }, { "epoch": 0.6386957564133301, "grad_norm": 1.4680211725556354, "learning_rate": 1.6291697722543553e-05, "loss": 0.7026726603507996, "step": 3996 }, { "epoch": 0.6388555901862063, "grad_norm": 1.2935067474307633, "learning_rate": 1.628964344908442e-05, "loss": 0.6350305080413818, "step": 3997 }, { "epoch": 0.6390154239590825, "grad_norm": 1.1986831314425122, "learning_rate": 1.6287588736378918e-05, "loss": 0.4845331311225891, "step": 3998 }, { "epoch": 0.6391752577319587, "grad_norm": 1.3380463353501781, "learning_rate": 1.6285533584570544e-05, "loss": 0.6810818910598755, "step": 3999 }, { "epoch": 0.6393350915048349, "grad_norm": 1.5179450469817668, "learning_rate": 1.6283477993802823e-05, "loss": 0.5182068347930908, "step": 4000 }, { "epoch": 0.6394949252777112, "grad_norm": 1.2979923557983295, "learning_rate": 1.6281421964219312e-05, "loss": 0.721962571144104, "step": 4001 }, { "epoch": 0.6396547590505874, "grad_norm": 1.156086266196128, "learning_rate": 1.6279365495963594e-05, "loss": 0.6571844816207886, "step": 4002 }, { "epoch": 0.6398145928234636, "grad_norm": 1.3534226618314589, "learning_rate": 1.627730858917929e-05, "loss": 0.5787698030471802, "step": 4003 }, { "epoch": 0.6399744265963399, "grad_norm": 1.2292234428493396, "learning_rate": 1.627525124401004e-05, "loss": 0.5018789768218994, "step": 4004 }, { "epoch": 0.6401342603692161, "grad_norm": 1.5869675907179002, "learning_rate": 1.6273193460599525e-05, "loss": 0.6552964448928833, "step": 4005 }, { "epoch": 0.6402940941420923, "grad_norm": 1.2831678121069003, "learning_rate": 1.6271135239091448e-05, "loss": 0.61402827501297, "step": 4006 }, { "epoch": 0.6404539279149685, "grad_norm": 1.3388970645367175, "learning_rate": 1.626907657962956e-05, "loss": 0.6678546667098999, "step": 4007 }, { "epoch": 0.6406137616878447, "grad_norm": 1.2888277859892896, "learning_rate": 1.626701748235762e-05, "loss": 0.5678218603134155, "step": 4008 }, { "epoch": 0.6407735954607209, "grad_norm": 1.5239947551979165, "learning_rate": 1.6264957947419436e-05, "loss": 0.6538276672363281, "step": 4009 }, { "epoch": 0.6409334292335971, "grad_norm": 1.496338037668747, "learning_rate": 1.626289797495883e-05, "loss": 0.538192629814148, "step": 4010 }, { "epoch": 0.6410932630064733, "grad_norm": 1.6078303257006037, "learning_rate": 1.626083756511967e-05, "loss": 0.6929163932800293, "step": 4011 }, { "epoch": 0.6412530967793495, "grad_norm": 1.468354109465249, "learning_rate": 1.6258776718045847e-05, "loss": 0.729622483253479, "step": 4012 }, { "epoch": 0.6414129305522257, "grad_norm": 1.2796123261927597, "learning_rate": 1.6256715433881274e-05, "loss": 0.5617029666900635, "step": 4013 }, { "epoch": 0.6415727643251019, "grad_norm": 1.7361847443727563, "learning_rate": 1.625465371276992e-05, "loss": 0.7626732587814331, "step": 4014 }, { "epoch": 0.6417325980979781, "grad_norm": 1.3519938580968756, "learning_rate": 1.6252591554855755e-05, "loss": 0.6505881547927856, "step": 4015 }, { "epoch": 0.6418924318708543, "grad_norm": 1.4164467369550928, "learning_rate": 1.6250528960282797e-05, "loss": 0.6920989155769348, "step": 4016 }, { "epoch": 0.6420522656437305, "grad_norm": 1.4638983580092095, "learning_rate": 1.624846592919509e-05, "loss": 0.6589099168777466, "step": 4017 }, { "epoch": 0.6422120994166067, "grad_norm": 1.164209581019254, "learning_rate": 1.624640246173671e-05, "loss": 0.5686025619506836, "step": 4018 }, { "epoch": 0.6423719331894829, "grad_norm": 1.5961715715569378, "learning_rate": 1.6244338558051764e-05, "loss": 0.6094500422477722, "step": 4019 }, { "epoch": 0.6425317669623591, "grad_norm": 1.4998526105948258, "learning_rate": 1.6242274218284383e-05, "loss": 0.8575407266616821, "step": 4020 }, { "epoch": 0.6426916007352353, "grad_norm": 1.2351539876480304, "learning_rate": 1.6240209442578735e-05, "loss": 0.6153572797775269, "step": 4021 }, { "epoch": 0.6428514345081116, "grad_norm": 2.184538253323166, "learning_rate": 1.623814423107902e-05, "loss": 0.5915082097053528, "step": 4022 }, { "epoch": 0.6430112682809878, "grad_norm": 1.1374219765903946, "learning_rate": 1.623607858392946e-05, "loss": 0.5360047817230225, "step": 4023 }, { "epoch": 0.643171102053864, "grad_norm": 1.2990369323236686, "learning_rate": 1.6234012501274315e-05, "loss": 0.7225958108901978, "step": 4024 }, { "epoch": 0.6433309358267402, "grad_norm": 1.27333373420032, "learning_rate": 1.623194598325787e-05, "loss": 0.5818144083023071, "step": 4025 }, { "epoch": 0.6434907695996164, "grad_norm": 1.4616449793813857, "learning_rate": 1.6229879030024446e-05, "loss": 0.6469637155532837, "step": 4026 }, { "epoch": 0.6436506033724926, "grad_norm": 1.4498898911445295, "learning_rate": 1.6227811641718392e-05, "loss": 0.5497878789901733, "step": 4027 }, { "epoch": 0.6438104371453688, "grad_norm": 1.297979244659227, "learning_rate": 1.622574381848409e-05, "loss": 0.6535791754722595, "step": 4028 }, { "epoch": 0.643970270918245, "grad_norm": 1.0916018064867898, "learning_rate": 1.6223675560465943e-05, "loss": 0.5842829942703247, "step": 4029 }, { "epoch": 0.6441301046911212, "grad_norm": 1.3094132116817592, "learning_rate": 1.6221606867808396e-05, "loss": 0.5698750019073486, "step": 4030 }, { "epoch": 0.6442899384639974, "grad_norm": 1.2079530797903784, "learning_rate": 1.6219537740655913e-05, "loss": 0.5534424781799316, "step": 4031 }, { "epoch": 0.6444497722368736, "grad_norm": 1.4395207471981433, "learning_rate": 1.6217468179153e-05, "loss": 0.6417440176010132, "step": 4032 }, { "epoch": 0.6446096060097498, "grad_norm": 1.4823758357541732, "learning_rate": 1.6215398183444184e-05, "loss": 0.6232008934020996, "step": 4033 }, { "epoch": 0.644769439782626, "grad_norm": 1.3562680303044334, "learning_rate": 1.621332775367403e-05, "loss": 0.66157466173172, "step": 4034 }, { "epoch": 0.6449292735555022, "grad_norm": 1.2831433527434613, "learning_rate": 1.621125688998713e-05, "loss": 0.6033233404159546, "step": 4035 }, { "epoch": 0.6450891073283785, "grad_norm": 1.280311619172041, "learning_rate": 1.6209185592528098e-05, "loss": 0.6675077676773071, "step": 4036 }, { "epoch": 0.6452489411012547, "grad_norm": 1.239709863435401, "learning_rate": 1.6207113861441596e-05, "loss": 0.634209156036377, "step": 4037 }, { "epoch": 0.645408774874131, "grad_norm": 1.2756918221331883, "learning_rate": 1.6205041696872302e-05, "loss": 0.6499675512313843, "step": 4038 }, { "epoch": 0.6455686086470072, "grad_norm": 1.340122377920253, "learning_rate": 1.6202969098964926e-05, "loss": 0.6548628807067871, "step": 4039 }, { "epoch": 0.6457284424198834, "grad_norm": 1.203318733269997, "learning_rate": 1.6200896067864214e-05, "loss": 0.6522499322891235, "step": 4040 }, { "epoch": 0.6458882761927596, "grad_norm": 1.5649820022719236, "learning_rate": 1.619882260371494e-05, "loss": 0.7791466116905212, "step": 4041 }, { "epoch": 0.6460481099656358, "grad_norm": 1.34494648710562, "learning_rate": 1.6196748706661903e-05, "loss": 0.5471866726875305, "step": 4042 }, { "epoch": 0.646207943738512, "grad_norm": 1.3534022669455281, "learning_rate": 1.6194674376849944e-05, "loss": 0.8154988288879395, "step": 4043 }, { "epoch": 0.6463677775113882, "grad_norm": 1.3028625348018559, "learning_rate": 1.619259961442392e-05, "loss": 0.5693488717079163, "step": 4044 }, { "epoch": 0.6465276112842644, "grad_norm": 1.6010972942985053, "learning_rate": 1.6190524419528726e-05, "loss": 0.6741828322410583, "step": 4045 }, { "epoch": 0.6466874450571406, "grad_norm": 1.3041792817775018, "learning_rate": 1.618844879230929e-05, "loss": 0.665753960609436, "step": 4046 }, { "epoch": 0.6468472788300168, "grad_norm": 1.423370032503338, "learning_rate": 1.618637273291056e-05, "loss": 0.6796152591705322, "step": 4047 }, { "epoch": 0.647007112602893, "grad_norm": 1.2733727945003859, "learning_rate": 1.6184296241477525e-05, "loss": 0.6399661898612976, "step": 4048 }, { "epoch": 0.6471669463757692, "grad_norm": 1.3618443505792779, "learning_rate": 1.6182219318155205e-05, "loss": 0.5877674221992493, "step": 4049 }, { "epoch": 0.6473267801486454, "grad_norm": 1.6993966605395419, "learning_rate": 1.6180141963088635e-05, "loss": 0.7920374274253845, "step": 4050 }, { "epoch": 0.6474866139215216, "grad_norm": 1.2289225820824814, "learning_rate": 1.6178064176422897e-05, "loss": 0.6195340156555176, "step": 4051 }, { "epoch": 0.6476464476943978, "grad_norm": 1.2522063958249032, "learning_rate": 1.6175985958303093e-05, "loss": 0.6714541912078857, "step": 4052 }, { "epoch": 0.647806281467274, "grad_norm": 1.4471439889587825, "learning_rate": 1.6173907308874356e-05, "loss": 0.7603486776351929, "step": 4053 }, { "epoch": 0.6479661152401502, "grad_norm": 1.441352663110014, "learning_rate": 1.6171828228281857e-05, "loss": 0.6618666648864746, "step": 4054 }, { "epoch": 0.6481259490130264, "grad_norm": 1.3680520075613698, "learning_rate": 1.616974871667079e-05, "loss": 0.6791979074478149, "step": 4055 }, { "epoch": 0.6482857827859027, "grad_norm": 1.5134187159390744, "learning_rate": 1.616766877418638e-05, "loss": 0.6590292453765869, "step": 4056 }, { "epoch": 0.6484456165587789, "grad_norm": 1.4539691091096278, "learning_rate": 1.616558840097388e-05, "loss": 0.6922234296798706, "step": 4057 }, { "epoch": 0.6486054503316551, "grad_norm": 1.4994983706848575, "learning_rate": 1.616350759717858e-05, "loss": 0.6928768754005432, "step": 4058 }, { "epoch": 0.6487652841045313, "grad_norm": 1.3702220640465823, "learning_rate": 1.6161426362945797e-05, "loss": 0.6951601505279541, "step": 4059 }, { "epoch": 0.6489251178774075, "grad_norm": 1.3000370109387636, "learning_rate": 1.6159344698420874e-05, "loss": 0.5017772912979126, "step": 4060 }, { "epoch": 0.6490849516502837, "grad_norm": 1.5265079979692255, "learning_rate": 1.6157262603749186e-05, "loss": 0.7322577238082886, "step": 4061 }, { "epoch": 0.6492447854231599, "grad_norm": 1.455037581928888, "learning_rate": 1.615518007907614e-05, "loss": 0.6356512308120728, "step": 4062 }, { "epoch": 0.6494046191960361, "grad_norm": 1.3213428924552688, "learning_rate": 1.6153097124547174e-05, "loss": 0.553195059299469, "step": 4063 }, { "epoch": 0.6495644529689123, "grad_norm": 1.2538072377035272, "learning_rate": 1.6151013740307752e-05, "loss": 0.637031078338623, "step": 4064 }, { "epoch": 0.6497242867417885, "grad_norm": 1.3731623274388771, "learning_rate": 1.614892992650337e-05, "loss": 0.7375234961509705, "step": 4065 }, { "epoch": 0.6498841205146647, "grad_norm": 1.2962788152101374, "learning_rate": 1.614684568327956e-05, "loss": 0.6481646299362183, "step": 4066 }, { "epoch": 0.6500439542875409, "grad_norm": 1.0710789492017894, "learning_rate": 1.6144761010781867e-05, "loss": 0.5679949522018433, "step": 4067 }, { "epoch": 0.6502037880604171, "grad_norm": 1.2910132748205734, "learning_rate": 1.614267590915589e-05, "loss": 0.6785615682601929, "step": 4068 }, { "epoch": 0.6503636218332933, "grad_norm": 1.2360335748982023, "learning_rate": 1.6140590378547236e-05, "loss": 0.4941745400428772, "step": 4069 }, { "epoch": 0.6505234556061695, "grad_norm": 1.3339058769473264, "learning_rate": 1.6138504419101554e-05, "loss": 0.6458284854888916, "step": 4070 }, { "epoch": 0.6506832893790457, "grad_norm": 1.5632819977337167, "learning_rate": 1.613641803096452e-05, "loss": 0.5796504020690918, "step": 4071 }, { "epoch": 0.650843123151922, "grad_norm": 1.1845370484885467, "learning_rate": 1.6134331214281836e-05, "loss": 0.7174280881881714, "step": 4072 }, { "epoch": 0.6510029569247983, "grad_norm": 1.3903659738792564, "learning_rate": 1.6132243969199248e-05, "loss": 0.7175403833389282, "step": 4073 }, { "epoch": 0.6511627906976745, "grad_norm": 1.2191374850915144, "learning_rate": 1.613015629586251e-05, "loss": 0.5390135645866394, "step": 4074 }, { "epoch": 0.6513226244705507, "grad_norm": 1.2738944712552762, "learning_rate": 1.6128068194417424e-05, "loss": 0.484072744846344, "step": 4075 }, { "epoch": 0.6514824582434269, "grad_norm": 1.2899164121082498, "learning_rate": 1.6125979665009816e-05, "loss": 0.6694885492324829, "step": 4076 }, { "epoch": 0.6516422920163031, "grad_norm": 1.494077748476111, "learning_rate": 1.6123890707785538e-05, "loss": 0.7922767400741577, "step": 4077 }, { "epoch": 0.6518021257891793, "grad_norm": 1.2861560046858087, "learning_rate": 1.612180132289048e-05, "loss": 0.6950058937072754, "step": 4078 }, { "epoch": 0.6519619595620555, "grad_norm": 1.3099219417020904, "learning_rate": 1.6119711510470553e-05, "loss": 0.5392857789993286, "step": 4079 }, { "epoch": 0.6521217933349317, "grad_norm": 1.8428637011216067, "learning_rate": 1.6117621270671703e-05, "loss": 0.6086214780807495, "step": 4080 }, { "epoch": 0.6522816271078079, "grad_norm": 1.3347652556062986, "learning_rate": 1.6115530603639907e-05, "loss": 0.6950347423553467, "step": 4081 }, { "epoch": 0.6524414608806841, "grad_norm": 1.308846195136672, "learning_rate": 1.611343950952117e-05, "loss": 0.6206123232841492, "step": 4082 }, { "epoch": 0.6526012946535603, "grad_norm": 1.5085382646700374, "learning_rate": 1.6111347988461525e-05, "loss": 0.6953732967376709, "step": 4083 }, { "epoch": 0.6527611284264365, "grad_norm": 1.125333830904512, "learning_rate": 1.6109256040607032e-05, "loss": 0.5230149030685425, "step": 4084 }, { "epoch": 0.6529209621993127, "grad_norm": 1.3690542062841433, "learning_rate": 1.6107163666103792e-05, "loss": 0.5061869621276855, "step": 4085 }, { "epoch": 0.6530807959721889, "grad_norm": 1.1075141606990788, "learning_rate": 1.6105070865097928e-05, "loss": 0.579002857208252, "step": 4086 }, { "epoch": 0.6532406297450651, "grad_norm": 1.2205814464790101, "learning_rate": 1.6102977637735587e-05, "loss": 0.6440199613571167, "step": 4087 }, { "epoch": 0.6534004635179413, "grad_norm": 1.2969377429871383, "learning_rate": 1.6100883984162964e-05, "loss": 0.661546528339386, "step": 4088 }, { "epoch": 0.6535602972908175, "grad_norm": 1.389286557588916, "learning_rate": 1.6098789904526266e-05, "loss": 0.653472900390625, "step": 4089 }, { "epoch": 0.6537201310636938, "grad_norm": 1.3648360455022328, "learning_rate": 1.6096695398971737e-05, "loss": 0.5962165594100952, "step": 4090 }, { "epoch": 0.65387996483657, "grad_norm": 1.3622316405803971, "learning_rate": 1.6094600467645643e-05, "loss": 0.7687486410140991, "step": 4091 }, { "epoch": 0.6540397986094462, "grad_norm": 1.2813361297199852, "learning_rate": 1.60925051106943e-05, "loss": 0.6723577976226807, "step": 4092 }, { "epoch": 0.6541996323823224, "grad_norm": 1.1972952371543597, "learning_rate": 1.609040932826403e-05, "loss": 0.5664939284324646, "step": 4093 }, { "epoch": 0.6543594661551986, "grad_norm": 1.2088660628163574, "learning_rate": 1.6088313120501196e-05, "loss": 0.6590124368667603, "step": 4094 }, { "epoch": 0.6545192999280748, "grad_norm": 2.521704999374809, "learning_rate": 1.6086216487552197e-05, "loss": 0.8331780433654785, "step": 4095 }, { "epoch": 0.654679133700951, "grad_norm": 1.4544450826228001, "learning_rate": 1.6084119429563445e-05, "loss": 0.5834358930587769, "step": 4096 }, { "epoch": 0.6548389674738272, "grad_norm": 1.2512558379944863, "learning_rate": 1.6082021946681393e-05, "loss": 0.6168406009674072, "step": 4097 }, { "epoch": 0.6549988012467034, "grad_norm": 1.4643199558167583, "learning_rate": 1.607992403905253e-05, "loss": 0.6862438321113586, "step": 4098 }, { "epoch": 0.6551586350195796, "grad_norm": 2.227401795762236, "learning_rate": 1.607782570682336e-05, "loss": 0.6114934682846069, "step": 4099 }, { "epoch": 0.6553184687924558, "grad_norm": 1.4838508209796446, "learning_rate": 1.607572695014042e-05, "loss": 0.5611847639083862, "step": 4100 }, { "epoch": 0.655478302565332, "grad_norm": 1.5807822689066504, "learning_rate": 1.6073627769150286e-05, "loss": 0.6069873571395874, "step": 4101 }, { "epoch": 0.6556381363382082, "grad_norm": 1.4727942917824777, "learning_rate": 1.607152816399955e-05, "loss": 0.7112135291099548, "step": 4102 }, { "epoch": 0.6557979701110844, "grad_norm": 1.391187133123597, "learning_rate": 1.6069428134834854e-05, "loss": 0.591815710067749, "step": 4103 }, { "epoch": 0.6559578038839606, "grad_norm": 1.3331334400229633, "learning_rate": 1.606732768180284e-05, "loss": 0.801296591758728, "step": 4104 }, { "epoch": 0.6561176376568368, "grad_norm": 1.483551659836981, "learning_rate": 1.6065226805050208e-05, "loss": 0.6389809846878052, "step": 4105 }, { "epoch": 0.656277471429713, "grad_norm": 1.4513635921115606, "learning_rate": 1.606312550472367e-05, "loss": 0.6841708421707153, "step": 4106 }, { "epoch": 0.6564373052025894, "grad_norm": 1.446128409146222, "learning_rate": 1.606102378096998e-05, "loss": 0.6987607479095459, "step": 4107 }, { "epoch": 0.6565971389754656, "grad_norm": 1.154265750573974, "learning_rate": 1.6058921633935904e-05, "loss": 0.5518394708633423, "step": 4108 }, { "epoch": 0.6567569727483418, "grad_norm": 1.3868959770286033, "learning_rate": 1.605681906376826e-05, "loss": 0.7368997931480408, "step": 4109 }, { "epoch": 0.656916806521218, "grad_norm": 1.0764815493790911, "learning_rate": 1.6054716070613875e-05, "loss": 0.48941317200660706, "step": 4110 }, { "epoch": 0.6570766402940942, "grad_norm": 1.2401064849490935, "learning_rate": 1.605261265461962e-05, "loss": 0.6569296717643738, "step": 4111 }, { "epoch": 0.6572364740669704, "grad_norm": 1.1151978455834841, "learning_rate": 1.6050508815932388e-05, "loss": 0.5143236517906189, "step": 4112 }, { "epoch": 0.6573963078398466, "grad_norm": 1.3022485773141703, "learning_rate": 1.6048404554699107e-05, "loss": 0.6037192344665527, "step": 4113 }, { "epoch": 0.6575561416127228, "grad_norm": 1.481982274519974, "learning_rate": 1.6046299871066728e-05, "loss": 0.5973422527313232, "step": 4114 }, { "epoch": 0.657715975385599, "grad_norm": 1.3508263497505246, "learning_rate": 1.6044194765182234e-05, "loss": 0.6369591951370239, "step": 4115 }, { "epoch": 0.6578758091584752, "grad_norm": 1.3212134141489489, "learning_rate": 1.6042089237192637e-05, "loss": 0.6618697643280029, "step": 4116 }, { "epoch": 0.6580356429313514, "grad_norm": 1.2291460196171362, "learning_rate": 1.6039983287244982e-05, "loss": 0.4953151345252991, "step": 4117 }, { "epoch": 0.6581954767042276, "grad_norm": 1.2326359315683841, "learning_rate": 1.603787691548634e-05, "loss": 0.6004452705383301, "step": 4118 }, { "epoch": 0.6583553104771038, "grad_norm": 1.1722838459612295, "learning_rate": 1.603577012206382e-05, "loss": 0.6668853759765625, "step": 4119 }, { "epoch": 0.65851514424998, "grad_norm": 1.272196662102921, "learning_rate": 1.603366290712454e-05, "loss": 0.5903853178024292, "step": 4120 }, { "epoch": 0.6586749780228562, "grad_norm": 1.371331347939265, "learning_rate": 1.6031555270815665e-05, "loss": 0.6042834520339966, "step": 4121 }, { "epoch": 0.6588348117957324, "grad_norm": 1.3236576601613297, "learning_rate": 1.602944721328439e-05, "loss": 0.5605133771896362, "step": 4122 }, { "epoch": 0.6589946455686087, "grad_norm": 1.3643608838038797, "learning_rate": 1.6027338734677932e-05, "loss": 0.6747474670410156, "step": 4123 }, { "epoch": 0.6591544793414849, "grad_norm": 1.3010757372046993, "learning_rate": 1.6025229835143535e-05, "loss": 0.6506164073944092, "step": 4124 }, { "epoch": 0.6593143131143611, "grad_norm": 1.4819880719829384, "learning_rate": 1.6023120514828483e-05, "loss": 0.6610153913497925, "step": 4125 }, { "epoch": 0.6594741468872373, "grad_norm": 1.3427738136086624, "learning_rate": 1.602101077388008e-05, "loss": 0.6867820024490356, "step": 4126 }, { "epoch": 0.6596339806601135, "grad_norm": 1.4577942283776528, "learning_rate": 1.6018900612445665e-05, "loss": 0.6138587594032288, "step": 4127 }, { "epoch": 0.6597938144329897, "grad_norm": 1.4301390591008587, "learning_rate": 1.6016790030672603e-05, "loss": 0.539503812789917, "step": 4128 }, { "epoch": 0.6599536482058659, "grad_norm": 1.4275726758025482, "learning_rate": 1.6014679028708288e-05, "loss": 0.6023383140563965, "step": 4129 }, { "epoch": 0.6601134819787421, "grad_norm": 1.5354501834786574, "learning_rate": 1.6012567606700145e-05, "loss": 0.5648449063301086, "step": 4130 }, { "epoch": 0.6602733157516183, "grad_norm": 1.4130311633913997, "learning_rate": 1.601045576479563e-05, "loss": 0.7257393598556519, "step": 4131 }, { "epoch": 0.6604331495244945, "grad_norm": 1.1104245290126316, "learning_rate": 1.6008343503142227e-05, "loss": 0.6632633209228516, "step": 4132 }, { "epoch": 0.6605929832973707, "grad_norm": 1.293221195544576, "learning_rate": 1.600623082188745e-05, "loss": 0.635735809803009, "step": 4133 }, { "epoch": 0.6607528170702469, "grad_norm": 1.5191597016997898, "learning_rate": 1.6004117721178835e-05, "loss": 0.7548034191131592, "step": 4134 }, { "epoch": 0.6609126508431231, "grad_norm": 1.4414046979043404, "learning_rate": 1.600200420116396e-05, "loss": 0.7167015075683594, "step": 4135 }, { "epoch": 0.6610724846159993, "grad_norm": 1.1940658314376529, "learning_rate": 1.5999890261990423e-05, "loss": 0.5739086866378784, "step": 4136 }, { "epoch": 0.6612323183888755, "grad_norm": 1.543725219441704, "learning_rate": 1.5997775903805852e-05, "loss": 0.7971364259719849, "step": 4137 }, { "epoch": 0.6613921521617517, "grad_norm": 1.2616957143805352, "learning_rate": 1.599566112675791e-05, "loss": 0.4921188950538635, "step": 4138 }, { "epoch": 0.6615519859346279, "grad_norm": 1.640178516071422, "learning_rate": 1.5993545930994287e-05, "loss": 0.6230031251907349, "step": 4139 }, { "epoch": 0.6617118197075041, "grad_norm": 1.4427617190676374, "learning_rate": 1.5991430316662694e-05, "loss": 0.5824704170227051, "step": 4140 }, { "epoch": 0.6618716534803804, "grad_norm": 1.2208483153034848, "learning_rate": 1.5989314283910883e-05, "loss": 0.5986719131469727, "step": 4141 }, { "epoch": 0.6620314872532567, "grad_norm": 1.3907180648322681, "learning_rate": 1.598719783288663e-05, "loss": 0.7126566171646118, "step": 4142 }, { "epoch": 0.6621913210261329, "grad_norm": 1.3178711670563534, "learning_rate": 1.598508096373774e-05, "loss": 0.5258876085281372, "step": 4143 }, { "epoch": 0.6623511547990091, "grad_norm": 1.1720699791777134, "learning_rate": 1.5982963676612044e-05, "loss": 0.5861738920211792, "step": 4144 }, { "epoch": 0.6625109885718853, "grad_norm": 1.4340812433925312, "learning_rate": 1.598084597165741e-05, "loss": 0.587811291217804, "step": 4145 }, { "epoch": 0.6626708223447615, "grad_norm": 1.2539234401747845, "learning_rate": 1.5978727849021734e-05, "loss": 0.5371910929679871, "step": 4146 }, { "epoch": 0.6628306561176377, "grad_norm": 1.2208346227601707, "learning_rate": 1.597660930885293e-05, "loss": 0.6162006855010986, "step": 4147 }, { "epoch": 0.6629904898905139, "grad_norm": 1.2462926249178625, "learning_rate": 1.5974490351298953e-05, "loss": 0.5901426076889038, "step": 4148 }, { "epoch": 0.6631503236633901, "grad_norm": 1.22398611067013, "learning_rate": 1.5972370976507782e-05, "loss": 0.5306718945503235, "step": 4149 }, { "epoch": 0.6633101574362663, "grad_norm": 1.507566779673609, "learning_rate": 1.597025118462743e-05, "loss": 0.6885899305343628, "step": 4150 }, { "epoch": 0.6634699912091425, "grad_norm": 1.1471375189663653, "learning_rate": 1.596813097580594e-05, "loss": 0.5835040807723999, "step": 4151 }, { "epoch": 0.6636298249820187, "grad_norm": 1.5676359891290919, "learning_rate": 1.596601035019137e-05, "loss": 0.8720357418060303, "step": 4152 }, { "epoch": 0.6637896587548949, "grad_norm": 1.3520454708688718, "learning_rate": 1.596388930793182e-05, "loss": 0.6498568058013916, "step": 4153 }, { "epoch": 0.6639494925277711, "grad_norm": 2.3504271552094154, "learning_rate": 1.5961767849175415e-05, "loss": 0.6353517174720764, "step": 4154 }, { "epoch": 0.6641093263006473, "grad_norm": 1.5312349200116333, "learning_rate": 1.5959645974070314e-05, "loss": 0.5929120779037476, "step": 4155 }, { "epoch": 0.6642691600735235, "grad_norm": 1.166814177280126, "learning_rate": 1.59575236827647e-05, "loss": 0.6476637721061707, "step": 4156 }, { "epoch": 0.6644289938463998, "grad_norm": 1.4106937797561838, "learning_rate": 1.5955400975406784e-05, "loss": 0.6149477362632751, "step": 4157 }, { "epoch": 0.664588827619276, "grad_norm": 1.647813741038824, "learning_rate": 1.5953277852144814e-05, "loss": 0.6171588897705078, "step": 4158 }, { "epoch": 0.6647486613921522, "grad_norm": 1.377555612110673, "learning_rate": 1.5951154313127055e-05, "loss": 0.7400329113006592, "step": 4159 }, { "epoch": 0.6649084951650284, "grad_norm": 1.3003378693801326, "learning_rate": 1.594903035850181e-05, "loss": 0.7015589475631714, "step": 4160 }, { "epoch": 0.6650683289379046, "grad_norm": 1.288151634714153, "learning_rate": 1.594690598841741e-05, "loss": 0.6126841306686401, "step": 4161 }, { "epoch": 0.6652281627107808, "grad_norm": 1.4173841838072132, "learning_rate": 1.594478120302221e-05, "loss": 0.677605152130127, "step": 4162 }, { "epoch": 0.665387996483657, "grad_norm": 1.4748340787214322, "learning_rate": 1.5942656002464597e-05, "loss": 0.72026526927948, "step": 4163 }, { "epoch": 0.6655478302565332, "grad_norm": 1.092156053820711, "learning_rate": 1.5940530386892995e-05, "loss": 0.5137232542037964, "step": 4164 }, { "epoch": 0.6657076640294094, "grad_norm": 1.382766069341082, "learning_rate": 1.5938404356455843e-05, "loss": 0.7280555367469788, "step": 4165 }, { "epoch": 0.6658674978022856, "grad_norm": 1.4213548737361374, "learning_rate": 1.5936277911301612e-05, "loss": 0.7343007922172546, "step": 4166 }, { "epoch": 0.6660273315751618, "grad_norm": 1.155148397716261, "learning_rate": 1.5934151051578814e-05, "loss": 0.4924073815345764, "step": 4167 }, { "epoch": 0.666187165348038, "grad_norm": 1.3081842566734703, "learning_rate": 1.5932023777435976e-05, "loss": 0.6629015803337097, "step": 4168 }, { "epoch": 0.6663469991209142, "grad_norm": 1.2911581252105462, "learning_rate": 1.5929896089021662e-05, "loss": 0.6442384719848633, "step": 4169 }, { "epoch": 0.6665068328937904, "grad_norm": 1.146887368285653, "learning_rate": 1.592776798648446e-05, "loss": 0.589584469795227, "step": 4170 }, { "epoch": 0.6666666666666666, "grad_norm": 1.336597261408029, "learning_rate": 1.5925639469972987e-05, "loss": 0.6320989727973938, "step": 4171 }, { "epoch": 0.6668265004395428, "grad_norm": 1.6351578686773178, "learning_rate": 1.59235105396359e-05, "loss": 0.7193100452423096, "step": 4172 }, { "epoch": 0.666986334212419, "grad_norm": 1.1774837849365742, "learning_rate": 1.5921381195621867e-05, "loss": 0.5269722938537598, "step": 4173 }, { "epoch": 0.6671461679852952, "grad_norm": 1.1736461011591581, "learning_rate": 1.5919251438079594e-05, "loss": 0.5535143613815308, "step": 4174 }, { "epoch": 0.6673060017581715, "grad_norm": 1.40382409363759, "learning_rate": 1.591712126715782e-05, "loss": 0.6627599000930786, "step": 4175 }, { "epoch": 0.6674658355310477, "grad_norm": 1.3744618760563472, "learning_rate": 1.591499068300531e-05, "loss": 0.7007232904434204, "step": 4176 }, { "epoch": 0.667625669303924, "grad_norm": 1.2662966432171043, "learning_rate": 1.591285968577085e-05, "loss": 0.6091697216033936, "step": 4177 }, { "epoch": 0.6677855030768002, "grad_norm": 1.1516783641640138, "learning_rate": 1.5910728275603264e-05, "loss": 0.5758118629455566, "step": 4178 }, { "epoch": 0.6679453368496764, "grad_norm": 1.5035198437493225, "learning_rate": 1.5908596452651403e-05, "loss": 0.5655703544616699, "step": 4179 }, { "epoch": 0.6681051706225526, "grad_norm": 1.387042840000999, "learning_rate": 1.590646421706415e-05, "loss": 0.5608800649642944, "step": 4180 }, { "epoch": 0.6682650043954288, "grad_norm": 1.2533251741318903, "learning_rate": 1.5904331568990402e-05, "loss": 0.6349537372589111, "step": 4181 }, { "epoch": 0.668424838168305, "grad_norm": 1.374734418488206, "learning_rate": 1.5902198508579106e-05, "loss": 0.7341790199279785, "step": 4182 }, { "epoch": 0.6685846719411812, "grad_norm": 1.3170797329396442, "learning_rate": 1.5900065035979223e-05, "loss": 0.6237406730651855, "step": 4183 }, { "epoch": 0.6687445057140574, "grad_norm": 1.2056324618571093, "learning_rate": 1.589793115133975e-05, "loss": 0.6302779912948608, "step": 4184 }, { "epoch": 0.6689043394869336, "grad_norm": 1.3867429085943004, "learning_rate": 1.5895796854809703e-05, "loss": 0.6415943503379822, "step": 4185 }, { "epoch": 0.6690641732598098, "grad_norm": 1.3486912386438408, "learning_rate": 1.5893662146538143e-05, "loss": 0.6098153591156006, "step": 4186 }, { "epoch": 0.669224007032686, "grad_norm": 1.2181064543228814, "learning_rate": 1.589152702667414e-05, "loss": 0.6991367936134338, "step": 4187 }, { "epoch": 0.6693838408055622, "grad_norm": 1.4792703560295362, "learning_rate": 1.5889391495366812e-05, "loss": 0.7749631404876709, "step": 4188 }, { "epoch": 0.6695436745784384, "grad_norm": 1.2805989163732008, "learning_rate": 1.5887255552765295e-05, "loss": 0.6405370235443115, "step": 4189 }, { "epoch": 0.6697035083513146, "grad_norm": 1.3255039701805686, "learning_rate": 1.5885119199018753e-05, "loss": 0.7046881318092346, "step": 4190 }, { "epoch": 0.6698633421241909, "grad_norm": 1.2571940441152163, "learning_rate": 1.588298243427638e-05, "loss": 0.6760947704315186, "step": 4191 }, { "epoch": 0.6700231758970671, "grad_norm": 1.2421742236188673, "learning_rate": 1.5880845258687408e-05, "loss": 0.5724951028823853, "step": 4192 }, { "epoch": 0.6701830096699433, "grad_norm": 1.2683361356647187, "learning_rate": 1.5878707672401083e-05, "loss": 0.6619630455970764, "step": 4193 }, { "epoch": 0.6703428434428195, "grad_norm": 1.2014556234414682, "learning_rate": 1.5876569675566687e-05, "loss": 0.4539029598236084, "step": 4194 }, { "epoch": 0.6705026772156957, "grad_norm": 1.1547256616981294, "learning_rate": 1.5874431268333532e-05, "loss": 0.5834054946899414, "step": 4195 }, { "epoch": 0.6706625109885719, "grad_norm": 1.3756521803931565, "learning_rate": 1.5872292450850957e-05, "loss": 0.577940821647644, "step": 4196 }, { "epoch": 0.6708223447614481, "grad_norm": 1.1502815202739345, "learning_rate": 1.5870153223268324e-05, "loss": 0.5909671783447266, "step": 4197 }, { "epoch": 0.6709821785343243, "grad_norm": 1.2271229845839686, "learning_rate": 1.5868013585735037e-05, "loss": 0.6329652070999146, "step": 4198 }, { "epoch": 0.6711420123072005, "grad_norm": 1.148032248966878, "learning_rate": 1.5865873538400517e-05, "loss": 0.6191864013671875, "step": 4199 }, { "epoch": 0.6713018460800767, "grad_norm": 1.3157739646913633, "learning_rate": 1.5863733081414214e-05, "loss": 0.6290455460548401, "step": 4200 }, { "epoch": 0.6714616798529529, "grad_norm": 1.2272879008135782, "learning_rate": 1.5861592214925617e-05, "loss": 0.6495800614356995, "step": 4201 }, { "epoch": 0.6716215136258291, "grad_norm": 1.2658038527614468, "learning_rate": 1.5859450939084232e-05, "loss": 0.7095520496368408, "step": 4202 }, { "epoch": 0.6717813473987053, "grad_norm": 1.1507748952306889, "learning_rate": 1.58573092540396e-05, "loss": 0.5505092144012451, "step": 4203 }, { "epoch": 0.6719411811715815, "grad_norm": 1.4958873926476859, "learning_rate": 1.5855167159941288e-05, "loss": 0.7423372268676758, "step": 4204 }, { "epoch": 0.6721010149444577, "grad_norm": 1.1270074876212073, "learning_rate": 1.585302465693889e-05, "loss": 0.5620957016944885, "step": 4205 }, { "epoch": 0.6722608487173339, "grad_norm": 1.1490950679882819, "learning_rate": 1.585088174518203e-05, "loss": 0.5105109214782715, "step": 4206 }, { "epoch": 0.6724206824902101, "grad_norm": 1.3702286134417248, "learning_rate": 1.5848738424820366e-05, "loss": 0.7470766305923462, "step": 4207 }, { "epoch": 0.6725805162630863, "grad_norm": 1.3860780681683247, "learning_rate": 1.584659469600358e-05, "loss": 0.7360766530036926, "step": 4208 }, { "epoch": 0.6727403500359626, "grad_norm": 1.1845515727079918, "learning_rate": 1.584445055888138e-05, "loss": 0.5585224628448486, "step": 4209 }, { "epoch": 0.6729001838088388, "grad_norm": 1.1774996628563548, "learning_rate": 1.58423060136035e-05, "loss": 0.6173491477966309, "step": 4210 }, { "epoch": 0.673060017581715, "grad_norm": 1.2687275494603345, "learning_rate": 1.5840161060319718e-05, "loss": 0.6907753944396973, "step": 4211 }, { "epoch": 0.6732198513545912, "grad_norm": 0.9725158516678932, "learning_rate": 1.5838015699179824e-05, "loss": 0.44428551197052, "step": 4212 }, { "epoch": 0.6733796851274675, "grad_norm": 1.181335169700735, "learning_rate": 1.583586993033364e-05, "loss": 0.44335150718688965, "step": 4213 }, { "epoch": 0.6735395189003437, "grad_norm": 1.3618373817561464, "learning_rate": 1.5833723753931026e-05, "loss": 0.6496821641921997, "step": 4214 }, { "epoch": 0.6736993526732199, "grad_norm": 1.5157922292795585, "learning_rate": 1.5831577170121858e-05, "loss": 0.6795716285705566, "step": 4215 }, { "epoch": 0.6738591864460961, "grad_norm": 1.3767643297751337, "learning_rate": 1.5829430179056043e-05, "loss": 0.7137739062309265, "step": 4216 }, { "epoch": 0.6740190202189723, "grad_norm": 1.2125793696136224, "learning_rate": 1.5827282780883527e-05, "loss": 0.6424379348754883, "step": 4217 }, { "epoch": 0.6741788539918485, "grad_norm": 1.3018005689642493, "learning_rate": 1.5825134975754272e-05, "loss": 0.5942584872245789, "step": 4218 }, { "epoch": 0.6743386877647247, "grad_norm": 1.1777350763894745, "learning_rate": 1.5822986763818275e-05, "loss": 0.6703618764877319, "step": 4219 }, { "epoch": 0.6744985215376009, "grad_norm": 1.31652902378165, "learning_rate": 1.5820838145225557e-05, "loss": 0.6388677358627319, "step": 4220 }, { "epoch": 0.6746583553104771, "grad_norm": 1.3272187788097811, "learning_rate": 1.581868912012617e-05, "loss": 0.5798271894454956, "step": 4221 }, { "epoch": 0.6748181890833533, "grad_norm": 1.397438951120357, "learning_rate": 1.5816539688670197e-05, "loss": 0.5876830220222473, "step": 4222 }, { "epoch": 0.6749780228562295, "grad_norm": 1.3105319303320044, "learning_rate": 1.5814389851007746e-05, "loss": 0.674291729927063, "step": 4223 }, { "epoch": 0.6751378566291057, "grad_norm": 1.2567645526451432, "learning_rate": 1.5812239607288956e-05, "loss": 0.5640747547149658, "step": 4224 }, { "epoch": 0.675297690401982, "grad_norm": 1.3120453030570773, "learning_rate": 1.581008895766399e-05, "loss": 0.5576555728912354, "step": 4225 }, { "epoch": 0.6754575241748582, "grad_norm": 1.2183358764972794, "learning_rate": 1.5807937902283032e-05, "loss": 0.5738482475280762, "step": 4226 }, { "epoch": 0.6756173579477344, "grad_norm": 1.2327915525058957, "learning_rate": 1.580578644129632e-05, "loss": 0.6863011121749878, "step": 4227 }, { "epoch": 0.6757771917206106, "grad_norm": 1.254550954179258, "learning_rate": 1.5803634574854097e-05, "loss": 0.6948147416114807, "step": 4228 }, { "epoch": 0.6759370254934868, "grad_norm": 1.2470575256824827, "learning_rate": 1.5801482303106642e-05, "loss": 0.6160929203033447, "step": 4229 }, { "epoch": 0.676096859266363, "grad_norm": 1.323494159162957, "learning_rate": 1.5799329626204264e-05, "loss": 0.5838388204574585, "step": 4230 }, { "epoch": 0.6762566930392392, "grad_norm": 1.2912642075728076, "learning_rate": 1.5797176544297296e-05, "loss": 0.5841827988624573, "step": 4231 }, { "epoch": 0.6764165268121154, "grad_norm": 1.2940758630267977, "learning_rate": 1.5795023057536105e-05, "loss": 0.6544861793518066, "step": 4232 }, { "epoch": 0.6765763605849916, "grad_norm": 1.3889571038818063, "learning_rate": 1.579286916607108e-05, "loss": 0.657036542892456, "step": 4233 }, { "epoch": 0.6767361943578678, "grad_norm": 1.3313977220222821, "learning_rate": 1.579071487005264e-05, "loss": 0.6231729984283447, "step": 4234 }, { "epoch": 0.676896028130744, "grad_norm": 1.2694952460102595, "learning_rate": 1.5788560169631237e-05, "loss": 0.6735128164291382, "step": 4235 }, { "epoch": 0.6770558619036202, "grad_norm": 1.3123617358022956, "learning_rate": 1.578640506495734e-05, "loss": 0.5257964134216309, "step": 4236 }, { "epoch": 0.6772156956764964, "grad_norm": 1.3249010988587528, "learning_rate": 1.5784249556181462e-05, "loss": 0.6838672161102295, "step": 4237 }, { "epoch": 0.6773755294493726, "grad_norm": 1.4645999004967392, "learning_rate": 1.578209364345413e-05, "loss": 0.6792104244232178, "step": 4238 }, { "epoch": 0.6775353632222488, "grad_norm": 1.3873646332974683, "learning_rate": 1.5779937326925918e-05, "loss": 0.6528584957122803, "step": 4239 }, { "epoch": 0.677695196995125, "grad_norm": 1.450591038502056, "learning_rate": 1.57777806067474e-05, "loss": 0.5877615809440613, "step": 4240 }, { "epoch": 0.6778550307680012, "grad_norm": 1.3917786289066802, "learning_rate": 1.5775623483069203e-05, "loss": 0.701919436454773, "step": 4241 }, { "epoch": 0.6780148645408774, "grad_norm": 1.2249090533469045, "learning_rate": 1.5773465956041966e-05, "loss": 0.5801149010658264, "step": 4242 }, { "epoch": 0.6781746983137537, "grad_norm": 1.15252277004187, "learning_rate": 1.5771308025816373e-05, "loss": 0.6002312898635864, "step": 4243 }, { "epoch": 0.6783345320866299, "grad_norm": 1.4351393087760131, "learning_rate": 1.5769149692543115e-05, "loss": 0.7814815640449524, "step": 4244 }, { "epoch": 0.6784943658595061, "grad_norm": 1.2493164661454514, "learning_rate": 1.5766990956372926e-05, "loss": 0.7084025740623474, "step": 4245 }, { "epoch": 0.6786541996323823, "grad_norm": 1.2263427325362417, "learning_rate": 1.576483181745657e-05, "loss": 0.6235140562057495, "step": 4246 }, { "epoch": 0.6788140334052585, "grad_norm": 1.1843367606190491, "learning_rate": 1.5762672275944826e-05, "loss": 0.5141526460647583, "step": 4247 }, { "epoch": 0.6789738671781348, "grad_norm": 1.3338623722067593, "learning_rate": 1.576051233198851e-05, "loss": 0.6528427004814148, "step": 4248 }, { "epoch": 0.679133700951011, "grad_norm": 1.1390572149684992, "learning_rate": 1.5758351985738473e-05, "loss": 0.5337481498718262, "step": 4249 }, { "epoch": 0.6792935347238872, "grad_norm": 1.3871549582300218, "learning_rate": 1.575619123734558e-05, "loss": 0.6200884580612183, "step": 4250 }, { "epoch": 0.6794533684967634, "grad_norm": 1.4587019282434737, "learning_rate": 1.5754030086960726e-05, "loss": 0.6583009958267212, "step": 4251 }, { "epoch": 0.6796132022696396, "grad_norm": 1.109522369926079, "learning_rate": 1.5751868534734844e-05, "loss": 0.6127034425735474, "step": 4252 }, { "epoch": 0.6797730360425158, "grad_norm": 1.3253484541294847, "learning_rate": 1.5749706580818883e-05, "loss": 0.535248339176178, "step": 4253 }, { "epoch": 0.679932869815392, "grad_norm": 1.2835589700658228, "learning_rate": 1.5747544225363834e-05, "loss": 0.7188451886177063, "step": 4254 }, { "epoch": 0.6800927035882682, "grad_norm": 1.2353956464788218, "learning_rate": 1.57453814685207e-05, "loss": 0.6473987698554993, "step": 4255 }, { "epoch": 0.6802525373611444, "grad_norm": 1.2033037723439701, "learning_rate": 1.574321831044053e-05, "loss": 0.6917853355407715, "step": 4256 }, { "epoch": 0.6804123711340206, "grad_norm": 1.4085915892152852, "learning_rate": 1.5741054751274383e-05, "loss": 0.6737395524978638, "step": 4257 }, { "epoch": 0.6805722049068968, "grad_norm": 1.3429377924372181, "learning_rate": 1.573889079117336e-05, "loss": 0.6678279042243958, "step": 4258 }, { "epoch": 0.680732038679773, "grad_norm": 1.2884124167820008, "learning_rate": 1.573672643028858e-05, "loss": 0.662420392036438, "step": 4259 }, { "epoch": 0.6808918724526493, "grad_norm": 1.3574557886090415, "learning_rate": 1.5734561668771198e-05, "loss": 0.7534719705581665, "step": 4260 }, { "epoch": 0.6810517062255255, "grad_norm": 1.356158513290811, "learning_rate": 1.573239650677239e-05, "loss": 0.6370917558670044, "step": 4261 }, { "epoch": 0.6812115399984017, "grad_norm": 1.5070985255313243, "learning_rate": 1.5730230944443362e-05, "loss": 0.715965986251831, "step": 4262 }, { "epoch": 0.6813713737712779, "grad_norm": 1.2920092236402803, "learning_rate": 1.5728064981935352e-05, "loss": 0.7181332111358643, "step": 4263 }, { "epoch": 0.6815312075441541, "grad_norm": 1.1867146182650123, "learning_rate": 1.5725898619399625e-05, "loss": 0.654338538646698, "step": 4264 }, { "epoch": 0.6816910413170303, "grad_norm": 1.3201305356375295, "learning_rate": 1.5723731856987466e-05, "loss": 0.589614748954773, "step": 4265 }, { "epoch": 0.6818508750899065, "grad_norm": 1.148083591331243, "learning_rate": 1.57215646948502e-05, "loss": 0.6890592575073242, "step": 4266 }, { "epoch": 0.6820107088627827, "grad_norm": 1.189945583863712, "learning_rate": 1.5719397133139172e-05, "loss": 0.4721214771270752, "step": 4267 }, { "epoch": 0.6821705426356589, "grad_norm": 1.2612540765896487, "learning_rate": 1.5717229172005756e-05, "loss": 0.6660376787185669, "step": 4268 }, { "epoch": 0.6823303764085351, "grad_norm": 1.374279947026279, "learning_rate": 1.5715060811601354e-05, "loss": 0.6590025424957275, "step": 4269 }, { "epoch": 0.6824902101814113, "grad_norm": 1.2489578450170435, "learning_rate": 1.57128920520774e-05, "loss": 0.5263628959655762, "step": 4270 }, { "epoch": 0.6826500439542875, "grad_norm": 1.4904173221108208, "learning_rate": 1.571072289358535e-05, "loss": 0.6157386302947998, "step": 4271 }, { "epoch": 0.6828098777271637, "grad_norm": 1.3159026078940959, "learning_rate": 1.570855333627669e-05, "loss": 0.6070156693458557, "step": 4272 }, { "epoch": 0.6829697115000399, "grad_norm": 1.2837721665030886, "learning_rate": 1.5706383380302936e-05, "loss": 0.5606510639190674, "step": 4273 }, { "epoch": 0.6831295452729161, "grad_norm": 1.5454359726799465, "learning_rate": 1.5704213025815623e-05, "loss": 0.5686109066009521, "step": 4274 }, { "epoch": 0.6832893790457923, "grad_norm": 1.262369932251246, "learning_rate": 1.570204227296633e-05, "loss": 0.5848813056945801, "step": 4275 }, { "epoch": 0.6834492128186686, "grad_norm": 1.2328968780140444, "learning_rate": 1.5699871121906655e-05, "loss": 0.6612439155578613, "step": 4276 }, { "epoch": 0.6836090465915448, "grad_norm": 1.2928938828678256, "learning_rate": 1.5697699572788213e-05, "loss": 0.5880473852157593, "step": 4277 }, { "epoch": 0.683768880364421, "grad_norm": 1.477836148681659, "learning_rate": 1.569552762576267e-05, "loss": 0.6603419780731201, "step": 4278 }, { "epoch": 0.6839287141372972, "grad_norm": 1.2107689021630366, "learning_rate": 1.5693355280981698e-05, "loss": 0.4984707236289978, "step": 4279 }, { "epoch": 0.6840885479101734, "grad_norm": 1.212556215702773, "learning_rate": 1.5691182538597006e-05, "loss": 0.5749428868293762, "step": 4280 }, { "epoch": 0.6842483816830496, "grad_norm": 1.3728246595459848, "learning_rate": 1.5689009398760336e-05, "loss": 0.6212165951728821, "step": 4281 }, { "epoch": 0.6844082154559258, "grad_norm": 1.4854465833643695, "learning_rate": 1.5686835861623455e-05, "loss": 0.6657037734985352, "step": 4282 }, { "epoch": 0.6845680492288021, "grad_norm": 1.2480478498533296, "learning_rate": 1.5684661927338144e-05, "loss": 0.5770517587661743, "step": 4283 }, { "epoch": 0.6847278830016783, "grad_norm": 1.3935134177899429, "learning_rate": 1.568248759605623e-05, "loss": 0.6620165109634399, "step": 4284 }, { "epoch": 0.6848877167745545, "grad_norm": 1.4341041164896038, "learning_rate": 1.568031286792956e-05, "loss": 0.6530041694641113, "step": 4285 }, { "epoch": 0.6850475505474307, "grad_norm": 1.2807159646698236, "learning_rate": 1.567813774311001e-05, "loss": 0.7145953178405762, "step": 4286 }, { "epoch": 0.6852073843203069, "grad_norm": 1.2375228650959493, "learning_rate": 1.5675962221749478e-05, "loss": 0.536206841468811, "step": 4287 }, { "epoch": 0.6853672180931831, "grad_norm": 1.34412461172051, "learning_rate": 1.5673786303999902e-05, "loss": 0.5162456035614014, "step": 4288 }, { "epoch": 0.6855270518660593, "grad_norm": 1.4528437197795891, "learning_rate": 1.5671609990013236e-05, "loss": 0.8231456279754639, "step": 4289 }, { "epoch": 0.6856868856389355, "grad_norm": 1.2650884361439483, "learning_rate": 1.5669433279941468e-05, "loss": 0.6717678904533386, "step": 4290 }, { "epoch": 0.6858467194118117, "grad_norm": 1.3138853675891033, "learning_rate": 1.5667256173936608e-05, "loss": 0.5814077854156494, "step": 4291 }, { "epoch": 0.686006553184688, "grad_norm": 1.22497942854221, "learning_rate": 1.5665078672150703e-05, "loss": 0.6185094714164734, "step": 4292 }, { "epoch": 0.6861663869575642, "grad_norm": 1.1758591685053088, "learning_rate": 1.5662900774735814e-05, "loss": 0.566455602645874, "step": 4293 }, { "epoch": 0.6863262207304404, "grad_norm": 1.290294600042456, "learning_rate": 1.5660722481844048e-05, "loss": 0.5730017423629761, "step": 4294 }, { "epoch": 0.6864860545033166, "grad_norm": 1.4355947779155678, "learning_rate": 1.565854379362752e-05, "loss": 0.6155259013175964, "step": 4295 }, { "epoch": 0.6866458882761928, "grad_norm": 1.2753640336761203, "learning_rate": 1.5656364710238386e-05, "loss": 0.6218005418777466, "step": 4296 }, { "epoch": 0.686805722049069, "grad_norm": 1.42479607651317, "learning_rate": 1.5654185231828824e-05, "loss": 0.5968587398529053, "step": 4297 }, { "epoch": 0.6869655558219452, "grad_norm": 1.1366656836800126, "learning_rate": 1.5652005358551044e-05, "loss": 0.5296273827552795, "step": 4298 }, { "epoch": 0.6871253895948214, "grad_norm": 1.2985957916504947, "learning_rate": 1.5649825090557277e-05, "loss": 0.6057494282722473, "step": 4299 }, { "epoch": 0.6872852233676976, "grad_norm": 1.4470074997970832, "learning_rate": 1.5647644427999788e-05, "loss": 0.646307110786438, "step": 4300 }, { "epoch": 0.6874450571405738, "grad_norm": 1.2606642486211772, "learning_rate": 1.5645463371030866e-05, "loss": 0.5769599676132202, "step": 4301 }, { "epoch": 0.68760489091345, "grad_norm": 1.057169603782747, "learning_rate": 1.564328191980283e-05, "loss": 0.45767444372177124, "step": 4302 }, { "epoch": 0.6877647246863262, "grad_norm": 1.4543110816467895, "learning_rate": 1.5641100074468018e-05, "loss": 0.6805859804153442, "step": 4303 }, { "epoch": 0.6879245584592024, "grad_norm": 1.2759380058347243, "learning_rate": 1.563891783517881e-05, "loss": 0.6021884679794312, "step": 4304 }, { "epoch": 0.6880843922320786, "grad_norm": 1.4615988947762095, "learning_rate": 1.5636735202087602e-05, "loss": 0.6917456388473511, "step": 4305 }, { "epoch": 0.6882442260049548, "grad_norm": 1.4331326684482024, "learning_rate": 1.563455217534682e-05, "loss": 0.5073557496070862, "step": 4306 }, { "epoch": 0.688404059777831, "grad_norm": 1.2325548466866914, "learning_rate": 1.5632368755108926e-05, "loss": 0.6205089092254639, "step": 4307 }, { "epoch": 0.6885638935507072, "grad_norm": 1.411969283562265, "learning_rate": 1.5630184941526396e-05, "loss": 0.676753044128418, "step": 4308 }, { "epoch": 0.6887237273235834, "grad_norm": 1.1986361979036488, "learning_rate": 1.5628000734751737e-05, "loss": 0.47656774520874023, "step": 4309 }, { "epoch": 0.6888835610964597, "grad_norm": 1.507409175790454, "learning_rate": 1.5625816134937494e-05, "loss": 0.6754735708236694, "step": 4310 }, { "epoch": 0.6890433948693359, "grad_norm": 1.3089181423984493, "learning_rate": 1.5623631142236228e-05, "loss": 0.5785632133483887, "step": 4311 }, { "epoch": 0.6892032286422121, "grad_norm": 1.1713285502705022, "learning_rate": 1.5621445756800525e-05, "loss": 0.4888584017753601, "step": 4312 }, { "epoch": 0.6893630624150883, "grad_norm": 1.1966153905328702, "learning_rate": 1.561925997878302e-05, "loss": 0.6239172220230103, "step": 4313 }, { "epoch": 0.6895228961879645, "grad_norm": 1.3965253760112302, "learning_rate": 1.5617073808336348e-05, "loss": 0.5342088937759399, "step": 4314 }, { "epoch": 0.6896827299608407, "grad_norm": 1.2941367974583031, "learning_rate": 1.5614887245613185e-05, "loss": 0.5609861612319946, "step": 4315 }, { "epoch": 0.6898425637337169, "grad_norm": 1.2296104982052793, "learning_rate": 1.561270029076623e-05, "loss": 0.657428503036499, "step": 4316 }, { "epoch": 0.6900023975065931, "grad_norm": 1.1197571411184146, "learning_rate": 1.561051294394822e-05, "loss": 0.590399980545044, "step": 4317 }, { "epoch": 0.6901622312794694, "grad_norm": 1.6473533354043748, "learning_rate": 1.560832520531191e-05, "loss": 0.7028690576553345, "step": 4318 }, { "epoch": 0.6903220650523456, "grad_norm": 1.577593459954699, "learning_rate": 1.5606137075010077e-05, "loss": 0.6324266791343689, "step": 4319 }, { "epoch": 0.6904818988252218, "grad_norm": 1.2277317784415787, "learning_rate": 1.5603948553195537e-05, "loss": 0.7810221910476685, "step": 4320 }, { "epoch": 0.690641732598098, "grad_norm": 1.345689924104906, "learning_rate": 1.560175964002113e-05, "loss": 0.5814578533172607, "step": 4321 }, { "epoch": 0.6908015663709742, "grad_norm": 1.3977272750309813, "learning_rate": 1.559957033563972e-05, "loss": 0.6780418157577515, "step": 4322 }, { "epoch": 0.6909614001438504, "grad_norm": 1.3476199273661826, "learning_rate": 1.55973806402042e-05, "loss": 0.5512795448303223, "step": 4323 }, { "epoch": 0.6911212339167266, "grad_norm": 1.3501135214676485, "learning_rate": 1.559519055386749e-05, "loss": 0.7218500375747681, "step": 4324 }, { "epoch": 0.6912810676896028, "grad_norm": 1.3804180157622026, "learning_rate": 1.5593000076782542e-05, "loss": 0.6850324869155884, "step": 4325 }, { "epoch": 0.691440901462479, "grad_norm": 1.3154594431668272, "learning_rate": 1.559080920910233e-05, "loss": 0.5292237401008606, "step": 4326 }, { "epoch": 0.6916007352353553, "grad_norm": 1.4259979573716053, "learning_rate": 1.5588617950979846e-05, "loss": 0.6316179037094116, "step": 4327 }, { "epoch": 0.6917605690082315, "grad_norm": 1.187087996023202, "learning_rate": 1.5586426302568132e-05, "loss": 0.4788965582847595, "step": 4328 }, { "epoch": 0.6919204027811077, "grad_norm": 1.3488934223612827, "learning_rate": 1.5584234264020243e-05, "loss": 0.6495994329452515, "step": 4329 }, { "epoch": 0.6920802365539839, "grad_norm": 1.3887877446083994, "learning_rate": 1.558204183548926e-05, "loss": 0.8204209804534912, "step": 4330 }, { "epoch": 0.6922400703268601, "grad_norm": 1.652938995082815, "learning_rate": 1.5579849017128297e-05, "loss": 0.5634127855300903, "step": 4331 }, { "epoch": 0.6923999040997363, "grad_norm": 1.3527245405706732, "learning_rate": 1.557765580909049e-05, "loss": 0.6030218005180359, "step": 4332 }, { "epoch": 0.6925597378726125, "grad_norm": 1.4896207587592272, "learning_rate": 1.5575462211529006e-05, "loss": 0.5791009068489075, "step": 4333 }, { "epoch": 0.6927195716454887, "grad_norm": 1.407502718950911, "learning_rate": 1.5573268224597042e-05, "loss": 0.5841989517211914, "step": 4334 }, { "epoch": 0.6928794054183649, "grad_norm": 1.342159171251052, "learning_rate": 1.5571073848447808e-05, "loss": 0.6068426370620728, "step": 4335 }, { "epoch": 0.6930392391912411, "grad_norm": 1.4653901975364503, "learning_rate": 1.5568879083234562e-05, "loss": 0.7039973735809326, "step": 4336 }, { "epoch": 0.6931990729641173, "grad_norm": 1.450999132416352, "learning_rate": 1.5566683929110573e-05, "loss": 0.7472468614578247, "step": 4337 }, { "epoch": 0.6933589067369935, "grad_norm": 1.315161424702277, "learning_rate": 1.5564488386229145e-05, "loss": 0.7569712400436401, "step": 4338 }, { "epoch": 0.6935187405098697, "grad_norm": 1.3465015252889518, "learning_rate": 1.5562292454743607e-05, "loss": 0.7333941459655762, "step": 4339 }, { "epoch": 0.6936785742827459, "grad_norm": 1.4703931623222646, "learning_rate": 1.5560096134807316e-05, "loss": 0.5352751016616821, "step": 4340 }, { "epoch": 0.6938384080556221, "grad_norm": 1.2952500007512338, "learning_rate": 1.555789942657365e-05, "loss": 0.5975357294082642, "step": 4341 }, { "epoch": 0.6939982418284983, "grad_norm": 1.3849161370239265, "learning_rate": 1.5555702330196024e-05, "loss": 0.6680762767791748, "step": 4342 }, { "epoch": 0.6941580756013745, "grad_norm": 1.2631260748856419, "learning_rate": 1.5553504845827874e-05, "loss": 0.5898943543434143, "step": 4343 }, { "epoch": 0.6943179093742508, "grad_norm": 1.2621825218878704, "learning_rate": 1.5551306973622663e-05, "loss": 0.4895029664039612, "step": 4344 }, { "epoch": 0.694477743147127, "grad_norm": 1.1072074490348454, "learning_rate": 1.5549108713733887e-05, "loss": 0.5438809990882874, "step": 4345 }, { "epoch": 0.6946375769200032, "grad_norm": 1.6069041262564356, "learning_rate": 1.5546910066315062e-05, "loss": 0.8113988637924194, "step": 4346 }, { "epoch": 0.6947974106928794, "grad_norm": 0.994771795647336, "learning_rate": 1.5544711031519736e-05, "loss": 0.5576146841049194, "step": 4347 }, { "epoch": 0.6949572444657556, "grad_norm": 1.2521975523709565, "learning_rate": 1.5542511609501472e-05, "loss": 0.5587736368179321, "step": 4348 }, { "epoch": 0.6951170782386318, "grad_norm": 1.364810906425113, "learning_rate": 1.5540311800413885e-05, "loss": 0.74081951379776, "step": 4349 }, { "epoch": 0.695276912011508, "grad_norm": 1.5778093918503588, "learning_rate": 1.5538111604410588e-05, "loss": 0.6039489507675171, "step": 4350 }, { "epoch": 0.6954367457843842, "grad_norm": 1.3268473514511983, "learning_rate": 1.5535911021645244e-05, "loss": 0.5944502353668213, "step": 4351 }, { "epoch": 0.6955965795572604, "grad_norm": 1.495743522719976, "learning_rate": 1.5533710052271532e-05, "loss": 0.7075603008270264, "step": 4352 }, { "epoch": 0.6957564133301366, "grad_norm": 1.3287189233666845, "learning_rate": 1.5531508696443158e-05, "loss": 0.6814631223678589, "step": 4353 }, { "epoch": 0.6959162471030129, "grad_norm": 1.4544140147513214, "learning_rate": 1.5529306954313858e-05, "loss": 0.5405410528182983, "step": 4354 }, { "epoch": 0.6960760808758891, "grad_norm": 1.2155604267075686, "learning_rate": 1.5527104826037393e-05, "loss": 0.6304495334625244, "step": 4355 }, { "epoch": 0.6962359146487653, "grad_norm": 1.2533319510544567, "learning_rate": 1.5524902311767552e-05, "loss": 0.5591188669204712, "step": 4356 }, { "epoch": 0.6963957484216415, "grad_norm": 1.1622258991898644, "learning_rate": 1.552269941165815e-05, "loss": 0.4889660179615021, "step": 4357 }, { "epoch": 0.6965555821945177, "grad_norm": 1.7056110862306222, "learning_rate": 1.5520496125863035e-05, "loss": 0.7451059222221375, "step": 4358 }, { "epoch": 0.696715415967394, "grad_norm": 1.5778245984959458, "learning_rate": 1.551829245453607e-05, "loss": 0.7189105749130249, "step": 4359 }, { "epoch": 0.6968752497402702, "grad_norm": 1.3745673988151732, "learning_rate": 1.5516088397831154e-05, "loss": 0.5207632780075073, "step": 4360 }, { "epoch": 0.6970350835131464, "grad_norm": 1.152606466012107, "learning_rate": 1.5513883955902208e-05, "loss": 0.6301389932632446, "step": 4361 }, { "epoch": 0.6971949172860226, "grad_norm": 1.2159376867169776, "learning_rate": 1.551167912890319e-05, "loss": 0.683461606502533, "step": 4362 }, { "epoch": 0.6973547510588988, "grad_norm": 1.4510200557290835, "learning_rate": 1.550947391698807e-05, "loss": 0.6328255534172058, "step": 4363 }, { "epoch": 0.697514584831775, "grad_norm": 1.295165698410092, "learning_rate": 1.5507268320310854e-05, "loss": 0.5391401648521423, "step": 4364 }, { "epoch": 0.6976744186046512, "grad_norm": 1.0298356476661856, "learning_rate": 1.5505062339025575e-05, "loss": 0.47483494877815247, "step": 4365 }, { "epoch": 0.6978342523775274, "grad_norm": 1.309801435236502, "learning_rate": 1.5502855973286285e-05, "loss": 0.8341698050498962, "step": 4366 }, { "epoch": 0.6979940861504036, "grad_norm": 2.560254905634445, "learning_rate": 1.5500649223247076e-05, "loss": 0.7134487628936768, "step": 4367 }, { "epoch": 0.6981539199232798, "grad_norm": 1.4148192177210397, "learning_rate": 1.5498442089062057e-05, "loss": 0.6526241302490234, "step": 4368 }, { "epoch": 0.698313753696156, "grad_norm": 1.3735920724201889, "learning_rate": 1.5496234570885365e-05, "loss": 0.6709903478622437, "step": 4369 }, { "epoch": 0.6984735874690322, "grad_norm": 1.2396693992722791, "learning_rate": 1.549402666887117e-05, "loss": 0.5549858808517456, "step": 4370 }, { "epoch": 0.6986334212419084, "grad_norm": 1.3271475384025488, "learning_rate": 1.549181838317366e-05, "loss": 0.5442149639129639, "step": 4371 }, { "epoch": 0.6987932550147846, "grad_norm": 1.2989502274441718, "learning_rate": 1.548960971394705e-05, "loss": 0.6269391775131226, "step": 4372 }, { "epoch": 0.6989530887876608, "grad_norm": 1.2768210956193615, "learning_rate": 1.5487400661345592e-05, "loss": 0.6862353682518005, "step": 4373 }, { "epoch": 0.699112922560537, "grad_norm": 1.3623602653700577, "learning_rate": 1.5485191225523556e-05, "loss": 0.6935453414916992, "step": 4374 }, { "epoch": 0.6992727563334132, "grad_norm": 1.4260162853710403, "learning_rate": 1.548298140663524e-05, "loss": 0.6307376623153687, "step": 4375 }, { "epoch": 0.6994325901062894, "grad_norm": 1.3470179888106397, "learning_rate": 1.5480771204834976e-05, "loss": 0.6694780588150024, "step": 4376 }, { "epoch": 0.6995924238791656, "grad_norm": 1.555410437802373, "learning_rate": 1.5478560620277106e-05, "loss": 0.497622549533844, "step": 4377 }, { "epoch": 0.6997522576520419, "grad_norm": 1.2264619221164461, "learning_rate": 1.547634965311602e-05, "loss": 0.5716407299041748, "step": 4378 }, { "epoch": 0.6999120914249181, "grad_norm": 1.120502588284187, "learning_rate": 1.5474138303506117e-05, "loss": 0.5949844121932983, "step": 4379 }, { "epoch": 0.7000719251977943, "grad_norm": 1.4497245272897636, "learning_rate": 1.5471926571601832e-05, "loss": 0.5760568380355835, "step": 4380 }, { "epoch": 0.7002317589706705, "grad_norm": 1.4144716375533872, "learning_rate": 1.5469714457557627e-05, "loss": 0.7532598972320557, "step": 4381 }, { "epoch": 0.7003915927435467, "grad_norm": 1.4593302413279277, "learning_rate": 1.5467501961527983e-05, "loss": 0.5405452251434326, "step": 4382 }, { "epoch": 0.7005514265164229, "grad_norm": 1.5100152836361547, "learning_rate": 1.546528908366742e-05, "loss": 0.5899060964584351, "step": 4383 }, { "epoch": 0.7007112602892991, "grad_norm": 1.374168592138032, "learning_rate": 1.546307582413047e-05, "loss": 0.5614064931869507, "step": 4384 }, { "epoch": 0.7008710940621753, "grad_norm": 1.4652693342989356, "learning_rate": 1.5460862183071703e-05, "loss": 0.6164522171020508, "step": 4385 }, { "epoch": 0.7010309278350515, "grad_norm": 1.378708231438225, "learning_rate": 1.545864816064571e-05, "loss": 0.5098041892051697, "step": 4386 }, { "epoch": 0.7011907616079277, "grad_norm": 1.3941626215303775, "learning_rate": 1.5456433757007115e-05, "loss": 0.5052299499511719, "step": 4387 }, { "epoch": 0.7013505953808039, "grad_norm": 1.1871904875690882, "learning_rate": 1.5454218972310558e-05, "loss": 0.6013035774230957, "step": 4388 }, { "epoch": 0.7015104291536802, "grad_norm": 1.4803667302958459, "learning_rate": 1.5452003806710715e-05, "loss": 0.6595964431762695, "step": 4389 }, { "epoch": 0.7016702629265564, "grad_norm": 1.1528838770916372, "learning_rate": 1.5449788260362285e-05, "loss": 0.5392903089523315, "step": 4390 }, { "epoch": 0.7018300966994326, "grad_norm": 1.477884692415671, "learning_rate": 1.5447572333419996e-05, "loss": 0.7124717235565186, "step": 4391 }, { "epoch": 0.7019899304723088, "grad_norm": 1.3274412703280016, "learning_rate": 1.5445356026038595e-05, "loss": 0.551252007484436, "step": 4392 }, { "epoch": 0.702149764245185, "grad_norm": 1.3010438346865638, "learning_rate": 1.544313933837287e-05, "loss": 0.6914262771606445, "step": 4393 }, { "epoch": 0.7023095980180613, "grad_norm": 1.1474911623690045, "learning_rate": 1.5440922270577618e-05, "loss": 0.5054196119308472, "step": 4394 }, { "epoch": 0.7024694317909375, "grad_norm": 1.3876774189290932, "learning_rate": 1.5438704822807672e-05, "loss": 0.539471447467804, "step": 4395 }, { "epoch": 0.7026292655638137, "grad_norm": 1.3316603511513274, "learning_rate": 1.543648699521789e-05, "loss": 0.5308753848075867, "step": 4396 }, { "epoch": 0.7027890993366899, "grad_norm": 1.4114543833581636, "learning_rate": 1.5434268787963165e-05, "loss": 0.43718254566192627, "step": 4397 }, { "epoch": 0.7029489331095661, "grad_norm": 1.4410940423239111, "learning_rate": 1.54320502011984e-05, "loss": 0.5253105759620667, "step": 4398 }, { "epoch": 0.7031087668824423, "grad_norm": 1.2906097835236512, "learning_rate": 1.542983123507854e-05, "loss": 0.5879933834075928, "step": 4399 }, { "epoch": 0.7032686006553185, "grad_norm": 1.2803649910378014, "learning_rate": 1.5427611889758544e-05, "loss": 0.6787407994270325, "step": 4400 }, { "epoch": 0.7034284344281947, "grad_norm": 1.3882775861692769, "learning_rate": 1.542539216539341e-05, "loss": 0.6526713371276855, "step": 4401 }, { "epoch": 0.7035882682010709, "grad_norm": 1.4011390589391122, "learning_rate": 1.542317206213815e-05, "loss": 0.6611223220825195, "step": 4402 }, { "epoch": 0.7037481019739471, "grad_norm": 1.4188364486354585, "learning_rate": 1.5420951580147807e-05, "loss": 0.7498570680618286, "step": 4403 }, { "epoch": 0.7039079357468233, "grad_norm": 1.3954274902858421, "learning_rate": 1.5418730719577454e-05, "loss": 0.6839203238487244, "step": 4404 }, { "epoch": 0.7040677695196995, "grad_norm": 1.8367912769812256, "learning_rate": 1.541650948058219e-05, "loss": 0.53312087059021, "step": 4405 }, { "epoch": 0.7042276032925757, "grad_norm": 1.3151289292841264, "learning_rate": 1.5414287863317136e-05, "loss": 0.5052888989448547, "step": 4406 }, { "epoch": 0.7043874370654519, "grad_norm": 1.3315158196737038, "learning_rate": 1.541206586793744e-05, "loss": 0.4815618097782135, "step": 4407 }, { "epoch": 0.7045472708383281, "grad_norm": 1.4376179006060437, "learning_rate": 1.5409843494598286e-05, "loss": 0.6066290140151978, "step": 4408 }, { "epoch": 0.7047071046112043, "grad_norm": 1.40946609493479, "learning_rate": 1.5407620743454868e-05, "loss": 0.5919980406761169, "step": 4409 }, { "epoch": 0.7048669383840805, "grad_norm": 1.5240147921500093, "learning_rate": 1.540539761466242e-05, "loss": 0.7211695909500122, "step": 4410 }, { "epoch": 0.7050267721569567, "grad_norm": 1.6379083043639886, "learning_rate": 1.5403174108376196e-05, "loss": 0.6497873067855835, "step": 4411 }, { "epoch": 0.705186605929833, "grad_norm": 1.3859902350624942, "learning_rate": 1.5400950224751475e-05, "loss": 0.5983560085296631, "step": 4412 }, { "epoch": 0.7053464397027092, "grad_norm": 1.2750356323305454, "learning_rate": 1.539872596394357e-05, "loss": 0.5550751090049744, "step": 4413 }, { "epoch": 0.7055062734755854, "grad_norm": 1.1642605269350015, "learning_rate": 1.539650132610781e-05, "loss": 0.5861295461654663, "step": 4414 }, { "epoch": 0.7056661072484616, "grad_norm": 1.244401401168155, "learning_rate": 1.5394276311399564e-05, "loss": 0.6569851636886597, "step": 4415 }, { "epoch": 0.7058259410213378, "grad_norm": 1.364307779086289, "learning_rate": 1.539205091997421e-05, "loss": 0.5336368083953857, "step": 4416 }, { "epoch": 0.705985774794214, "grad_norm": 1.319239332346677, "learning_rate": 1.538982515198717e-05, "loss": 0.5130893588066101, "step": 4417 }, { "epoch": 0.7061456085670902, "grad_norm": 1.2468279313889519, "learning_rate": 1.5387599007593873e-05, "loss": 0.5555154085159302, "step": 4418 }, { "epoch": 0.7063054423399664, "grad_norm": 1.6046148460501333, "learning_rate": 1.5385372486949795e-05, "loss": 0.6235542893409729, "step": 4419 }, { "epoch": 0.7064652761128426, "grad_norm": 1.3766550742707002, "learning_rate": 1.5383145590210428e-05, "loss": 0.5791348218917847, "step": 4420 }, { "epoch": 0.7066251098857188, "grad_norm": 1.3426547985799377, "learning_rate": 1.5380918317531283e-05, "loss": 0.6608951091766357, "step": 4421 }, { "epoch": 0.706784943658595, "grad_norm": 1.2912234573457857, "learning_rate": 1.537869066906791e-05, "loss": 0.6589933633804321, "step": 4422 }, { "epoch": 0.7069447774314712, "grad_norm": 1.2574079035434493, "learning_rate": 1.537646264497588e-05, "loss": 0.6096375584602356, "step": 4423 }, { "epoch": 0.7071046112043475, "grad_norm": 1.496925281563457, "learning_rate": 1.5374234245410787e-05, "loss": 0.5722351670265198, "step": 4424 }, { "epoch": 0.7072644449772237, "grad_norm": 1.4321382733302355, "learning_rate": 1.537200547052826e-05, "loss": 0.5572649240493774, "step": 4425 }, { "epoch": 0.7074242787500999, "grad_norm": 1.2616458639122607, "learning_rate": 1.5369776320483946e-05, "loss": 0.6124293804168701, "step": 4426 }, { "epoch": 0.7075841125229761, "grad_norm": 1.6050730373081803, "learning_rate": 1.5367546795433517e-05, "loss": 0.6334308385848999, "step": 4427 }, { "epoch": 0.7077439462958524, "grad_norm": 1.1965667078497226, "learning_rate": 1.536531689553268e-05, "loss": 0.634414553642273, "step": 4428 }, { "epoch": 0.7079037800687286, "grad_norm": 1.8705424003071953, "learning_rate": 1.5363086620937164e-05, "loss": 0.5493576526641846, "step": 4429 }, { "epoch": 0.7080636138416048, "grad_norm": 1.2513142177417294, "learning_rate": 1.5360855971802717e-05, "loss": 0.560366153717041, "step": 4430 }, { "epoch": 0.708223447614481, "grad_norm": 1.3500738816797075, "learning_rate": 1.535862494828513e-05, "loss": 0.8231971263885498, "step": 4431 }, { "epoch": 0.7083832813873572, "grad_norm": 1.3157225867650564, "learning_rate": 1.53563935505402e-05, "loss": 0.697882354259491, "step": 4432 }, { "epoch": 0.7085431151602334, "grad_norm": 1.4395546851525007, "learning_rate": 1.535416177872376e-05, "loss": 0.6732091903686523, "step": 4433 }, { "epoch": 0.7087029489331096, "grad_norm": 1.3697828057606918, "learning_rate": 1.535192963299168e-05, "loss": 0.6534626483917236, "step": 4434 }, { "epoch": 0.7088627827059858, "grad_norm": 1.2266555011661926, "learning_rate": 1.5349697113499835e-05, "loss": 0.7049668431282043, "step": 4435 }, { "epoch": 0.709022616478862, "grad_norm": 1.3021826569981212, "learning_rate": 1.534746422040414e-05, "loss": 0.5635584592819214, "step": 4436 }, { "epoch": 0.7091824502517382, "grad_norm": 1.2438407996395957, "learning_rate": 1.534523095386053e-05, "loss": 0.6934196352958679, "step": 4437 }, { "epoch": 0.7093422840246144, "grad_norm": 1.2761280103217238, "learning_rate": 1.5342997314024967e-05, "loss": 0.7011739015579224, "step": 4438 }, { "epoch": 0.7095021177974906, "grad_norm": 1.3268422452327566, "learning_rate": 1.534076330105345e-05, "loss": 0.603807806968689, "step": 4439 }, { "epoch": 0.7096619515703668, "grad_norm": 1.3550883603435426, "learning_rate": 1.533852891510198e-05, "loss": 0.6234951019287109, "step": 4440 }, { "epoch": 0.709821785343243, "grad_norm": 1.4220543638875194, "learning_rate": 1.5336294156326613e-05, "loss": 0.7576762437820435, "step": 4441 }, { "epoch": 0.7099816191161192, "grad_norm": 1.3053603762144803, "learning_rate": 1.5334059024883406e-05, "loss": 0.6285529136657715, "step": 4442 }, { "epoch": 0.7101414528889954, "grad_norm": 1.3268535106447779, "learning_rate": 1.5331823520928462e-05, "loss": 0.5926616191864014, "step": 4443 }, { "epoch": 0.7103012866618716, "grad_norm": 1.3425072292113043, "learning_rate": 1.5329587644617888e-05, "loss": 0.7007814645767212, "step": 4444 }, { "epoch": 0.7104611204347479, "grad_norm": 1.5032690865151037, "learning_rate": 1.532735139610784e-05, "loss": 0.587043046951294, "step": 4445 }, { "epoch": 0.7106209542076241, "grad_norm": 1.2696242018264945, "learning_rate": 1.532511477555449e-05, "loss": 0.5790164470672607, "step": 4446 }, { "epoch": 0.7107807879805003, "grad_norm": 1.2746237369250182, "learning_rate": 1.5322877783114027e-05, "loss": 0.6695201396942139, "step": 4447 }, { "epoch": 0.7109406217533765, "grad_norm": 1.3921982223161076, "learning_rate": 1.5320640418942682e-05, "loss": 0.5640432238578796, "step": 4448 }, { "epoch": 0.7111004555262527, "grad_norm": 1.2827967961518416, "learning_rate": 1.5318402683196705e-05, "loss": 0.620405912399292, "step": 4449 }, { "epoch": 0.7112602892991289, "grad_norm": 1.3334660751510754, "learning_rate": 1.5316164576032368e-05, "loss": 0.679888904094696, "step": 4450 }, { "epoch": 0.7114201230720051, "grad_norm": 1.234298514511276, "learning_rate": 1.5313926097605972e-05, "loss": 0.5163414478302002, "step": 4451 }, { "epoch": 0.7115799568448813, "grad_norm": 1.2637505666106565, "learning_rate": 1.5311687248073845e-05, "loss": 0.701107382774353, "step": 4452 }, { "epoch": 0.7117397906177575, "grad_norm": 1.3537068642262584, "learning_rate": 1.5309448027592343e-05, "loss": 0.6872352361679077, "step": 4453 }, { "epoch": 0.7118996243906337, "grad_norm": 1.4486181866828385, "learning_rate": 1.5307208436317842e-05, "loss": 0.6221134662628174, "step": 4454 }, { "epoch": 0.7120594581635099, "grad_norm": 1.331449952156857, "learning_rate": 1.530496847440675e-05, "loss": 0.7233865261077881, "step": 4455 }, { "epoch": 0.7122192919363861, "grad_norm": 7.181022736465513, "learning_rate": 1.5302728142015494e-05, "loss": 0.5837693810462952, "step": 4456 }, { "epoch": 0.7123791257092623, "grad_norm": 1.3340755040388614, "learning_rate": 1.5300487439300534e-05, "loss": 0.6025350093841553, "step": 4457 }, { "epoch": 0.7125389594821385, "grad_norm": 1.3203825111443646, "learning_rate": 1.5298246366418353e-05, "loss": 0.7546097040176392, "step": 4458 }, { "epoch": 0.7126987932550148, "grad_norm": 1.1306189708323877, "learning_rate": 1.529600492352546e-05, "loss": 0.5051243305206299, "step": 4459 }, { "epoch": 0.712858627027891, "grad_norm": 1.349518355605583, "learning_rate": 1.5293763110778382e-05, "loss": 0.7500331401824951, "step": 4460 }, { "epoch": 0.7130184608007673, "grad_norm": 1.247452864180401, "learning_rate": 1.5291520928333693e-05, "loss": 0.6402784585952759, "step": 4461 }, { "epoch": 0.7131782945736435, "grad_norm": 1.4413488387417352, "learning_rate": 1.5289278376347966e-05, "loss": 0.6283614635467529, "step": 4462 }, { "epoch": 0.7133381283465197, "grad_norm": 1.2000171884863258, "learning_rate": 1.528703545497782e-05, "loss": 0.7055370807647705, "step": 4463 }, { "epoch": 0.7134979621193959, "grad_norm": 1.2371312393100025, "learning_rate": 1.528479216437989e-05, "loss": 0.780030369758606, "step": 4464 }, { "epoch": 0.7136577958922721, "grad_norm": 1.1492740484287465, "learning_rate": 1.5282548504710842e-05, "loss": 0.5775876641273499, "step": 4465 }, { "epoch": 0.7138176296651483, "grad_norm": 1.2876617819698968, "learning_rate": 1.528030447612736e-05, "loss": 0.6106696128845215, "step": 4466 }, { "epoch": 0.7139774634380245, "grad_norm": 1.2047962559335759, "learning_rate": 1.5278060078786166e-05, "loss": 0.5223783254623413, "step": 4467 }, { "epoch": 0.7141372972109007, "grad_norm": 1.2157700235464604, "learning_rate": 1.5275815312843994e-05, "loss": 0.6224316358566284, "step": 4468 }, { "epoch": 0.7142971309837769, "grad_norm": 1.2404722657594123, "learning_rate": 1.5273570178457618e-05, "loss": 0.7228315472602844, "step": 4469 }, { "epoch": 0.7144569647566531, "grad_norm": 1.652651430797409, "learning_rate": 1.5271324675783824e-05, "loss": 0.7216466665267944, "step": 4470 }, { "epoch": 0.7146167985295293, "grad_norm": 1.5987918955206484, "learning_rate": 1.526907880497943e-05, "loss": 0.6743906736373901, "step": 4471 }, { "epoch": 0.7147766323024055, "grad_norm": 1.164627262242454, "learning_rate": 1.5266832566201283e-05, "loss": 0.547457218170166, "step": 4472 }, { "epoch": 0.7149364660752817, "grad_norm": 1.4116732814179551, "learning_rate": 1.526458595960625e-05, "loss": 0.6524714231491089, "step": 4473 }, { "epoch": 0.7150962998481579, "grad_norm": 1.3024983673346004, "learning_rate": 1.5262338985351227e-05, "loss": 0.6484537124633789, "step": 4474 }, { "epoch": 0.7152561336210341, "grad_norm": 1.515086844001251, "learning_rate": 1.5260091643593135e-05, "loss": 0.6425312757492065, "step": 4475 }, { "epoch": 0.7154159673939103, "grad_norm": 1.2928233265688123, "learning_rate": 1.5257843934488924e-05, "loss": 0.6566544771194458, "step": 4476 }, { "epoch": 0.7155758011667865, "grad_norm": 1.437795868862639, "learning_rate": 1.5255595858195555e-05, "loss": 0.6582940220832825, "step": 4477 }, { "epoch": 0.7157356349396627, "grad_norm": 1.6670835111159648, "learning_rate": 1.5253347414870035e-05, "loss": 0.6672756671905518, "step": 4478 }, { "epoch": 0.715895468712539, "grad_norm": 1.5105061758341642, "learning_rate": 1.5251098604669387e-05, "loss": 0.5819997787475586, "step": 4479 }, { "epoch": 0.7160553024854152, "grad_norm": 1.2975419015804566, "learning_rate": 1.5248849427750654e-05, "loss": 0.6935105323791504, "step": 4480 }, { "epoch": 0.7162151362582914, "grad_norm": 1.3664299199634669, "learning_rate": 1.5246599884270919e-05, "loss": 0.6522039771080017, "step": 4481 }, { "epoch": 0.7163749700311676, "grad_norm": 1.5919738526394036, "learning_rate": 1.5244349974387276e-05, "loss": 0.907382071018219, "step": 4482 }, { "epoch": 0.7165348038040438, "grad_norm": 1.8199425413894996, "learning_rate": 1.5242099698256851e-05, "loss": 0.8395380973815918, "step": 4483 }, { "epoch": 0.71669463757692, "grad_norm": 1.3173892855212244, "learning_rate": 1.5239849056036795e-05, "loss": 0.5945100784301758, "step": 4484 }, { "epoch": 0.7168544713497962, "grad_norm": 1.3157871841684115, "learning_rate": 1.523759804788429e-05, "loss": 0.6848471760749817, "step": 4485 }, { "epoch": 0.7170143051226724, "grad_norm": 1.1940910510799643, "learning_rate": 1.5235346673956532e-05, "loss": 0.7497868537902832, "step": 4486 }, { "epoch": 0.7171741388955486, "grad_norm": 1.4691087945251817, "learning_rate": 1.5233094934410755e-05, "loss": 0.6729689240455627, "step": 4487 }, { "epoch": 0.7173339726684248, "grad_norm": 1.1776573368298033, "learning_rate": 1.5230842829404204e-05, "loss": 0.6609368324279785, "step": 4488 }, { "epoch": 0.717493806441301, "grad_norm": 1.4454749711863712, "learning_rate": 1.522859035909417e-05, "loss": 0.6264455318450928, "step": 4489 }, { "epoch": 0.7176536402141772, "grad_norm": 1.2350978056367576, "learning_rate": 1.5226337523637945e-05, "loss": 0.5792102813720703, "step": 4490 }, { "epoch": 0.7178134739870534, "grad_norm": 1.4566482593904913, "learning_rate": 1.5224084323192866e-05, "loss": 0.7849896550178528, "step": 4491 }, { "epoch": 0.7179733077599296, "grad_norm": 1.3049463543282027, "learning_rate": 1.5221830757916287e-05, "loss": 0.5567662715911865, "step": 4492 }, { "epoch": 0.7181331415328058, "grad_norm": 1.3791232443838264, "learning_rate": 1.5219576827965594e-05, "loss": 0.7115607857704163, "step": 4493 }, { "epoch": 0.718292975305682, "grad_norm": 1.473866024394651, "learning_rate": 1.5217322533498182e-05, "loss": 0.646552562713623, "step": 4494 }, { "epoch": 0.7184528090785584, "grad_norm": 1.21360870346978, "learning_rate": 1.5215067874671494e-05, "loss": 0.48284125328063965, "step": 4495 }, { "epoch": 0.7186126428514346, "grad_norm": 1.1346784743733667, "learning_rate": 1.5212812851642984e-05, "loss": 0.6016870141029358, "step": 4496 }, { "epoch": 0.7187724766243108, "grad_norm": 1.4086189057823395, "learning_rate": 1.5210557464570133e-05, "loss": 0.68163001537323, "step": 4497 }, { "epoch": 0.718932310397187, "grad_norm": 1.3375203389653618, "learning_rate": 1.5208301713610448e-05, "loss": 0.6514198780059814, "step": 4498 }, { "epoch": 0.7190921441700632, "grad_norm": 1.2768232120847567, "learning_rate": 1.5206045598921462e-05, "loss": 0.6009154319763184, "step": 4499 }, { "epoch": 0.7192519779429394, "grad_norm": 1.1045497625125122, "learning_rate": 1.5203789120660743e-05, "loss": 0.5714518427848816, "step": 4500 }, { "epoch": 0.7194118117158156, "grad_norm": 1.2649762939498634, "learning_rate": 1.5201532278985869e-05, "loss": 0.5751551389694214, "step": 4501 }, { "epoch": 0.7195716454886918, "grad_norm": 1.1879024713497943, "learning_rate": 1.5199275074054444e-05, "loss": 0.5830031037330627, "step": 4502 }, { "epoch": 0.719731479261568, "grad_norm": 1.2123862696079335, "learning_rate": 1.5197017506024113e-05, "loss": 0.6290950775146484, "step": 4503 }, { "epoch": 0.7198913130344442, "grad_norm": 1.3413414763052713, "learning_rate": 1.5194759575052533e-05, "loss": 0.8088889718055725, "step": 4504 }, { "epoch": 0.7200511468073204, "grad_norm": 1.3582863517065225, "learning_rate": 1.5192501281297392e-05, "loss": 0.6538453102111816, "step": 4505 }, { "epoch": 0.7202109805801966, "grad_norm": 1.6662422377471855, "learning_rate": 1.5190242624916396e-05, "loss": 0.6185595989227295, "step": 4506 }, { "epoch": 0.7203708143530728, "grad_norm": 1.372357024460066, "learning_rate": 1.5187983606067284e-05, "loss": 0.7068312764167786, "step": 4507 }, { "epoch": 0.720530648125949, "grad_norm": 1.2571758998199236, "learning_rate": 1.5185724224907818e-05, "loss": 0.6001952886581421, "step": 4508 }, { "epoch": 0.7206904818988252, "grad_norm": 1.2572175385438153, "learning_rate": 1.5183464481595786e-05, "loss": 0.5298947095870972, "step": 4509 }, { "epoch": 0.7208503156717014, "grad_norm": 1.2979359647592603, "learning_rate": 1.5181204376289e-05, "loss": 0.7260022163391113, "step": 4510 }, { "epoch": 0.7210101494445776, "grad_norm": 1.1671908104085635, "learning_rate": 1.5178943909145298e-05, "loss": 0.5894995331764221, "step": 4511 }, { "epoch": 0.7211699832174538, "grad_norm": 1.2772374551526782, "learning_rate": 1.5176683080322544e-05, "loss": 0.7053165435791016, "step": 4512 }, { "epoch": 0.72132981699033, "grad_norm": 1.4036351322485765, "learning_rate": 1.5174421889978623e-05, "loss": 0.49832218885421753, "step": 4513 }, { "epoch": 0.7214896507632063, "grad_norm": 1.3159510947947368, "learning_rate": 1.517216033827145e-05, "loss": 0.7266497611999512, "step": 4514 }, { "epoch": 0.7216494845360825, "grad_norm": 1.2678108434022815, "learning_rate": 1.5169898425358965e-05, "loss": 0.6354665756225586, "step": 4515 }, { "epoch": 0.7218093183089587, "grad_norm": 1.3055696774827277, "learning_rate": 1.516763615139913e-05, "loss": 0.6666424870491028, "step": 4516 }, { "epoch": 0.7219691520818349, "grad_norm": 1.4891699969405885, "learning_rate": 1.5165373516549934e-05, "loss": 0.6279739141464233, "step": 4517 }, { "epoch": 0.7221289858547111, "grad_norm": 1.3385461022539265, "learning_rate": 1.5163110520969396e-05, "loss": 0.7160278558731079, "step": 4518 }, { "epoch": 0.7222888196275873, "grad_norm": 1.4125060102269649, "learning_rate": 1.5160847164815548e-05, "loss": 0.6852515935897827, "step": 4519 }, { "epoch": 0.7224486534004635, "grad_norm": 1.2939677284678068, "learning_rate": 1.5158583448246462e-05, "loss": 0.5903686285018921, "step": 4520 }, { "epoch": 0.7226084871733397, "grad_norm": 1.194421646497477, "learning_rate": 1.5156319371420227e-05, "loss": 0.4288252592086792, "step": 4521 }, { "epoch": 0.7227683209462159, "grad_norm": 1.4893236337380873, "learning_rate": 1.515405493449495e-05, "loss": 0.618808388710022, "step": 4522 }, { "epoch": 0.7229281547190921, "grad_norm": 1.3710085817118525, "learning_rate": 1.5151790137628784e-05, "loss": 0.4998818039894104, "step": 4523 }, { "epoch": 0.7230879884919683, "grad_norm": 1.324978973125263, "learning_rate": 1.5149524980979882e-05, "loss": 0.667953610420227, "step": 4524 }, { "epoch": 0.7232478222648445, "grad_norm": 1.1415983795641882, "learning_rate": 1.514725946470644e-05, "loss": 0.6082590222358704, "step": 4525 }, { "epoch": 0.7234076560377207, "grad_norm": 1.1884308183744954, "learning_rate": 1.5144993588966676e-05, "loss": 0.5730463266372681, "step": 4526 }, { "epoch": 0.7235674898105969, "grad_norm": 1.4660977086633327, "learning_rate": 1.514272735391883e-05, "loss": 0.7463085651397705, "step": 4527 }, { "epoch": 0.7237273235834731, "grad_norm": 1.167940727796462, "learning_rate": 1.5140460759721163e-05, "loss": 0.7060977220535278, "step": 4528 }, { "epoch": 0.7238871573563493, "grad_norm": 1.503040046289746, "learning_rate": 1.513819380653197e-05, "loss": 0.7429288625717163, "step": 4529 }, { "epoch": 0.7240469911292257, "grad_norm": 1.1398077766723653, "learning_rate": 1.5135926494509567e-05, "loss": 0.5491526126861572, "step": 4530 }, { "epoch": 0.7242068249021019, "grad_norm": 1.4790135731069791, "learning_rate": 1.5133658823812292e-05, "loss": 0.6507843732833862, "step": 4531 }, { "epoch": 0.7243666586749781, "grad_norm": 1.208912254562926, "learning_rate": 1.5131390794598514e-05, "loss": 0.6692906618118286, "step": 4532 }, { "epoch": 0.7245264924478543, "grad_norm": 2.0166405687191493, "learning_rate": 1.5129122407026627e-05, "loss": 0.523192286491394, "step": 4533 }, { "epoch": 0.7246863262207305, "grad_norm": 1.4132447889215936, "learning_rate": 1.5126853661255036e-05, "loss": 0.6660757064819336, "step": 4534 }, { "epoch": 0.7248461599936067, "grad_norm": 1.2914796177096743, "learning_rate": 1.5124584557442197e-05, "loss": 0.5249812602996826, "step": 4535 }, { "epoch": 0.7250059937664829, "grad_norm": 1.288113278254904, "learning_rate": 1.5122315095746563e-05, "loss": 0.6773860454559326, "step": 4536 }, { "epoch": 0.7251658275393591, "grad_norm": 1.3364226192879964, "learning_rate": 1.5120045276326636e-05, "loss": 0.6288610696792603, "step": 4537 }, { "epoch": 0.7253256613122353, "grad_norm": 1.4060024763411143, "learning_rate": 1.5117775099340923e-05, "loss": 0.7927291393280029, "step": 4538 }, { "epoch": 0.7254854950851115, "grad_norm": 1.3261110370203497, "learning_rate": 1.511550456494797e-05, "loss": 0.701248049736023, "step": 4539 }, { "epoch": 0.7256453288579877, "grad_norm": 1.1878351503735203, "learning_rate": 1.511323367330634e-05, "loss": 0.5871274471282959, "step": 4540 }, { "epoch": 0.7258051626308639, "grad_norm": 1.1444495650542261, "learning_rate": 1.5110962424574629e-05, "loss": 0.6070659756660461, "step": 4541 }, { "epoch": 0.7259649964037401, "grad_norm": 1.3316032951992238, "learning_rate": 1.510869081891145e-05, "loss": 0.5678802728652954, "step": 4542 }, { "epoch": 0.7261248301766163, "grad_norm": 1.3713235625147187, "learning_rate": 1.5106418856475444e-05, "loss": 0.5580282211303711, "step": 4543 }, { "epoch": 0.7262846639494925, "grad_norm": 1.102498252209115, "learning_rate": 1.5104146537425275e-05, "loss": 0.7446409463882446, "step": 4544 }, { "epoch": 0.7264444977223687, "grad_norm": 1.205528328359566, "learning_rate": 1.5101873861919636e-05, "loss": 0.5291237831115723, "step": 4545 }, { "epoch": 0.726604331495245, "grad_norm": 1.4071410971914768, "learning_rate": 1.5099600830117242e-05, "loss": 0.5980884432792664, "step": 4546 }, { "epoch": 0.7267641652681212, "grad_norm": 1.3527786132648056, "learning_rate": 1.5097327442176837e-05, "loss": 0.6024569869041443, "step": 4547 }, { "epoch": 0.7269239990409974, "grad_norm": 1.11531864001909, "learning_rate": 1.5095053698257175e-05, "loss": 0.5594196319580078, "step": 4548 }, { "epoch": 0.7270838328138736, "grad_norm": 1.2815611003471576, "learning_rate": 1.509277959851706e-05, "loss": 0.5406759977340698, "step": 4549 }, { "epoch": 0.7272436665867498, "grad_norm": 1.2363508459174386, "learning_rate": 1.5090505143115301e-05, "loss": 0.5601124167442322, "step": 4550 }, { "epoch": 0.727403500359626, "grad_norm": 1.3689269120138832, "learning_rate": 1.5088230332210736e-05, "loss": 0.6920906901359558, "step": 4551 }, { "epoch": 0.7275633341325022, "grad_norm": 1.2678782626134137, "learning_rate": 1.5085955165962234e-05, "loss": 0.7117822170257568, "step": 4552 }, { "epoch": 0.7277231679053784, "grad_norm": 1.129167661263539, "learning_rate": 1.5083679644528681e-05, "loss": 0.571985125541687, "step": 4553 }, { "epoch": 0.7278830016782546, "grad_norm": 1.321010333355946, "learning_rate": 1.5081403768068991e-05, "loss": 0.552426815032959, "step": 4554 }, { "epoch": 0.7280428354511308, "grad_norm": 1.229681923026406, "learning_rate": 1.5079127536742107e-05, "loss": 0.5024241209030151, "step": 4555 }, { "epoch": 0.728202669224007, "grad_norm": 1.2370030255504625, "learning_rate": 1.507685095070699e-05, "loss": 0.7121915817260742, "step": 4556 }, { "epoch": 0.7283625029968832, "grad_norm": 1.1715722820004995, "learning_rate": 1.5074574010122627e-05, "loss": 0.5602481365203857, "step": 4557 }, { "epoch": 0.7285223367697594, "grad_norm": 1.5460560748642362, "learning_rate": 1.5072296715148036e-05, "loss": 0.7047481536865234, "step": 4558 }, { "epoch": 0.7286821705426356, "grad_norm": 1.444312797711541, "learning_rate": 1.507001906594225e-05, "loss": 0.6379858255386353, "step": 4559 }, { "epoch": 0.7288420043155118, "grad_norm": 1.3041782867008767, "learning_rate": 1.5067741062664338e-05, "loss": 0.531816840171814, "step": 4560 }, { "epoch": 0.729001838088388, "grad_norm": 1.3501501603644714, "learning_rate": 1.5065462705473384e-05, "loss": 0.6007355451583862, "step": 4561 }, { "epoch": 0.7291616718612642, "grad_norm": 1.5019043952749618, "learning_rate": 1.50631839945285e-05, "loss": 0.6304600238800049, "step": 4562 }, { "epoch": 0.7293215056341404, "grad_norm": 1.5564759626342268, "learning_rate": 1.5060904929988824e-05, "loss": 0.6437847018241882, "step": 4563 }, { "epoch": 0.7294813394070166, "grad_norm": 1.2358648778380759, "learning_rate": 1.5058625512013519e-05, "loss": 0.49528250098228455, "step": 4564 }, { "epoch": 0.729641173179893, "grad_norm": 1.3588973099343618, "learning_rate": 1.5056345740761771e-05, "loss": 0.5614286661148071, "step": 4565 }, { "epoch": 0.7298010069527692, "grad_norm": 1.2504488291741502, "learning_rate": 1.5054065616392787e-05, "loss": 0.6130121946334839, "step": 4566 }, { "epoch": 0.7299608407256454, "grad_norm": 1.460059504792857, "learning_rate": 1.505178513906581e-05, "loss": 0.5742789506912231, "step": 4567 }, { "epoch": 0.7301206744985216, "grad_norm": 1.2113327914849132, "learning_rate": 1.5049504308940094e-05, "loss": 0.5552239418029785, "step": 4568 }, { "epoch": 0.7302805082713978, "grad_norm": 1.286768104571664, "learning_rate": 1.5047223126174926e-05, "loss": 0.565248966217041, "step": 4569 }, { "epoch": 0.730440342044274, "grad_norm": 1.2004668691139593, "learning_rate": 1.5044941590929618e-05, "loss": 0.8326219320297241, "step": 4570 }, { "epoch": 0.7306001758171502, "grad_norm": 1.2305426832175634, "learning_rate": 1.5042659703363502e-05, "loss": 0.6213837265968323, "step": 4571 }, { "epoch": 0.7307600095900264, "grad_norm": 1.3487101925261318, "learning_rate": 1.5040377463635938e-05, "loss": 0.6348291039466858, "step": 4572 }, { "epoch": 0.7309198433629026, "grad_norm": 1.2179436839534818, "learning_rate": 1.5038094871906308e-05, "loss": 0.5425381660461426, "step": 4573 }, { "epoch": 0.7310796771357788, "grad_norm": 1.3593064658693463, "learning_rate": 1.5035811928334023e-05, "loss": 0.705018162727356, "step": 4574 }, { "epoch": 0.731239510908655, "grad_norm": 1.331708830296371, "learning_rate": 1.5033528633078514e-05, "loss": 0.5904170274734497, "step": 4575 }, { "epoch": 0.7313993446815312, "grad_norm": 1.236608466234813, "learning_rate": 1.5031244986299234e-05, "loss": 0.6708549857139587, "step": 4576 }, { "epoch": 0.7315591784544074, "grad_norm": 1.191993416902346, "learning_rate": 1.5028960988155674e-05, "loss": 0.4815007448196411, "step": 4577 }, { "epoch": 0.7317190122272836, "grad_norm": 1.1058949083421588, "learning_rate": 1.502667663880733e-05, "loss": 0.5410969257354736, "step": 4578 }, { "epoch": 0.7318788460001598, "grad_norm": 1.308825883420259, "learning_rate": 1.5024391938413743e-05, "loss": 0.6272529363632202, "step": 4579 }, { "epoch": 0.732038679773036, "grad_norm": 1.4060731251787757, "learning_rate": 1.5022106887134463e-05, "loss": 0.5570111274719238, "step": 4580 }, { "epoch": 0.7321985135459123, "grad_norm": 1.3979295999030326, "learning_rate": 1.5019821485129066e-05, "loss": 0.8259683847427368, "step": 4581 }, { "epoch": 0.7323583473187885, "grad_norm": 1.336895061015654, "learning_rate": 1.5017535732557167e-05, "loss": 0.7726737260818481, "step": 4582 }, { "epoch": 0.7325181810916647, "grad_norm": 1.469552044983765, "learning_rate": 1.5015249629578384e-05, "loss": 0.6445845365524292, "step": 4583 }, { "epoch": 0.7326780148645409, "grad_norm": 1.3573461132832885, "learning_rate": 1.5012963176352375e-05, "loss": 0.7027872800827026, "step": 4584 }, { "epoch": 0.7328378486374171, "grad_norm": 1.3067680581568217, "learning_rate": 1.501067637303882e-05, "loss": 0.7406896948814392, "step": 4585 }, { "epoch": 0.7329976824102933, "grad_norm": 1.2720228891177103, "learning_rate": 1.5008389219797415e-05, "loss": 0.6459194421768188, "step": 4586 }, { "epoch": 0.7331575161831695, "grad_norm": 1.302589664604361, "learning_rate": 1.5006101716787896e-05, "loss": 0.7818728685379028, "step": 4587 }, { "epoch": 0.7333173499560457, "grad_norm": 1.1885705645457996, "learning_rate": 1.500381386417e-05, "loss": 0.6194977760314941, "step": 4588 }, { "epoch": 0.7334771837289219, "grad_norm": 1.2035543212351214, "learning_rate": 1.5001525662103517e-05, "loss": 0.580812931060791, "step": 4589 }, { "epoch": 0.7336370175017981, "grad_norm": 1.4563448598370539, "learning_rate": 1.4999237110748241e-05, "loss": 0.6165831089019775, "step": 4590 }, { "epoch": 0.7337968512746743, "grad_norm": 1.368571779757883, "learning_rate": 1.4996948210263994e-05, "loss": 0.6518210768699646, "step": 4591 }, { "epoch": 0.7339566850475505, "grad_norm": 1.4006630772795026, "learning_rate": 1.499465896081063e-05, "loss": 0.6500741243362427, "step": 4592 }, { "epoch": 0.7341165188204267, "grad_norm": 1.2442561429963601, "learning_rate": 1.4992369362548019e-05, "loss": 0.4574137330055237, "step": 4593 }, { "epoch": 0.7342763525933029, "grad_norm": 1.4849824698339404, "learning_rate": 1.4990079415636057e-05, "loss": 0.6984858512878418, "step": 4594 }, { "epoch": 0.7344361863661791, "grad_norm": 1.2466201425301864, "learning_rate": 1.4987789120234666e-05, "loss": 0.5608768463134766, "step": 4595 }, { "epoch": 0.7345960201390553, "grad_norm": 1.4931959407097561, "learning_rate": 1.4985498476503794e-05, "loss": 0.6332792043685913, "step": 4596 }, { "epoch": 0.7347558539119315, "grad_norm": 1.2555875448167295, "learning_rate": 1.4983207484603412e-05, "loss": 0.6468278169631958, "step": 4597 }, { "epoch": 0.7349156876848078, "grad_norm": 1.3187668003046367, "learning_rate": 1.4980916144693512e-05, "loss": 0.6090507507324219, "step": 4598 }, { "epoch": 0.735075521457684, "grad_norm": 1.2339782282339347, "learning_rate": 1.4978624456934114e-05, "loss": 0.6301372051239014, "step": 4599 }, { "epoch": 0.7352353552305603, "grad_norm": 1.3451116089276784, "learning_rate": 1.4976332421485265e-05, "loss": 0.5698485374450684, "step": 4600 }, { "epoch": 0.7353951890034365, "grad_norm": 1.430840400442559, "learning_rate": 1.4974040038507024e-05, "loss": 0.6695650815963745, "step": 4601 }, { "epoch": 0.7355550227763127, "grad_norm": 1.1502792297120719, "learning_rate": 1.4971747308159495e-05, "loss": 0.5510814785957336, "step": 4602 }, { "epoch": 0.7357148565491889, "grad_norm": 1.4153658260582238, "learning_rate": 1.4969454230602782e-05, "loss": 0.6758742332458496, "step": 4603 }, { "epoch": 0.7358746903220651, "grad_norm": 1.3140947557002418, "learning_rate": 1.4967160805997034e-05, "loss": 0.6764964461326599, "step": 4604 }, { "epoch": 0.7360345240949413, "grad_norm": 1.2741177261327568, "learning_rate": 1.4964867034502411e-05, "loss": 0.5999242663383484, "step": 4605 }, { "epoch": 0.7361943578678175, "grad_norm": 1.3118166204311867, "learning_rate": 1.4962572916279103e-05, "loss": 0.5276737213134766, "step": 4606 }, { "epoch": 0.7363541916406937, "grad_norm": 1.3920115905959196, "learning_rate": 1.4960278451487327e-05, "loss": 0.7444452047348022, "step": 4607 }, { "epoch": 0.7365140254135699, "grad_norm": 1.368171310750878, "learning_rate": 1.4957983640287312e-05, "loss": 0.6612803936004639, "step": 4608 }, { "epoch": 0.7366738591864461, "grad_norm": 1.1564396228856477, "learning_rate": 1.4955688482839326e-05, "loss": 0.5872864723205566, "step": 4609 }, { "epoch": 0.7368336929593223, "grad_norm": 1.0252272741582453, "learning_rate": 1.4953392979303652e-05, "loss": 0.4699232280254364, "step": 4610 }, { "epoch": 0.7369935267321985, "grad_norm": 1.4939702765362783, "learning_rate": 1.4951097129840601e-05, "loss": 0.7494204044342041, "step": 4611 }, { "epoch": 0.7371533605050747, "grad_norm": 1.121232368127494, "learning_rate": 1.4948800934610508e-05, "loss": 0.548137903213501, "step": 4612 }, { "epoch": 0.737313194277951, "grad_norm": 1.2448287446160342, "learning_rate": 1.4946504393773729e-05, "loss": 0.6403963565826416, "step": 4613 }, { "epoch": 0.7374730280508272, "grad_norm": 1.2946287276297173, "learning_rate": 1.4944207507490647e-05, "loss": 0.6814355850219727, "step": 4614 }, { "epoch": 0.7376328618237034, "grad_norm": 1.6645945275263148, "learning_rate": 1.4941910275921668e-05, "loss": 0.7201135754585266, "step": 4615 }, { "epoch": 0.7377926955965796, "grad_norm": 1.3272240499689623, "learning_rate": 1.4939612699227224e-05, "loss": 0.6786856651306152, "step": 4616 }, { "epoch": 0.7379525293694558, "grad_norm": 1.3411771905226817, "learning_rate": 1.4937314777567769e-05, "loss": 0.6625065207481384, "step": 4617 }, { "epoch": 0.738112363142332, "grad_norm": 1.1991496332677698, "learning_rate": 1.4935016511103779e-05, "loss": 0.5383620858192444, "step": 4618 }, { "epoch": 0.7382721969152082, "grad_norm": 1.1280493234779223, "learning_rate": 1.4932717899995758e-05, "loss": 0.5359270572662354, "step": 4619 }, { "epoch": 0.7384320306880844, "grad_norm": 1.160487405128389, "learning_rate": 1.4930418944404234e-05, "loss": 0.5154749751091003, "step": 4620 }, { "epoch": 0.7385918644609606, "grad_norm": 1.383420811913617, "learning_rate": 1.492811964448976e-05, "loss": 0.6196349859237671, "step": 4621 }, { "epoch": 0.7387516982338368, "grad_norm": 1.2469936968825863, "learning_rate": 1.4925820000412907e-05, "loss": 0.6422646045684814, "step": 4622 }, { "epoch": 0.738911532006713, "grad_norm": 1.3679397383351246, "learning_rate": 1.4923520012334275e-05, "loss": 0.6349624395370483, "step": 4623 }, { "epoch": 0.7390713657795892, "grad_norm": 1.35180965749126, "learning_rate": 1.4921219680414491e-05, "loss": 0.6315587759017944, "step": 4624 }, { "epoch": 0.7392311995524654, "grad_norm": 1.1732355358452269, "learning_rate": 1.4918919004814196e-05, "loss": 0.5214226245880127, "step": 4625 }, { "epoch": 0.7393910333253416, "grad_norm": 1.2940358547085589, "learning_rate": 1.4916617985694064e-05, "loss": 0.6251899600028992, "step": 4626 }, { "epoch": 0.7395508670982178, "grad_norm": 1.4265982801106516, "learning_rate": 1.4914316623214788e-05, "loss": 0.5508317351341248, "step": 4627 }, { "epoch": 0.739710700871094, "grad_norm": 1.1986308652023414, "learning_rate": 1.4912014917537092e-05, "loss": 0.4746837615966797, "step": 4628 }, { "epoch": 0.7398705346439702, "grad_norm": 1.1738900450411132, "learning_rate": 1.4909712868821711e-05, "loss": 0.5910916328430176, "step": 4629 }, { "epoch": 0.7400303684168464, "grad_norm": 1.3516577863979642, "learning_rate": 1.4907410477229421e-05, "loss": 0.6114342212677002, "step": 4630 }, { "epoch": 0.7401902021897226, "grad_norm": 1.2016782293112516, "learning_rate": 1.4905107742921005e-05, "loss": 0.5774136185646057, "step": 4631 }, { "epoch": 0.7403500359625989, "grad_norm": 1.3040231126804445, "learning_rate": 1.4902804666057285e-05, "loss": 0.7417988777160645, "step": 4632 }, { "epoch": 0.7405098697354751, "grad_norm": 1.1689754659570462, "learning_rate": 1.490050124679909e-05, "loss": 0.6760393381118774, "step": 4633 }, { "epoch": 0.7406697035083513, "grad_norm": 1.1951430300048136, "learning_rate": 1.4898197485307293e-05, "loss": 0.5685526132583618, "step": 4634 }, { "epoch": 0.7408295372812275, "grad_norm": 1.4603364499908362, "learning_rate": 1.4895893381742774e-05, "loss": 0.6890367269515991, "step": 4635 }, { "epoch": 0.7409893710541038, "grad_norm": 1.2174631109808487, "learning_rate": 1.4893588936266448e-05, "loss": 0.5934386253356934, "step": 4636 }, { "epoch": 0.74114920482698, "grad_norm": 1.6174062397144626, "learning_rate": 1.4891284149039242e-05, "loss": 0.6742205023765564, "step": 4637 }, { "epoch": 0.7413090385998562, "grad_norm": 1.6025058772752705, "learning_rate": 1.4888979020222122e-05, "loss": 0.6838244795799255, "step": 4638 }, { "epoch": 0.7414688723727324, "grad_norm": 1.4426784264541583, "learning_rate": 1.4886673549976065e-05, "loss": 0.7163301706314087, "step": 4639 }, { "epoch": 0.7416287061456086, "grad_norm": 1.4119478071173248, "learning_rate": 1.488436773846208e-05, "loss": 0.4905848503112793, "step": 4640 }, { "epoch": 0.7417885399184848, "grad_norm": 1.1451333146263465, "learning_rate": 1.4882061585841196e-05, "loss": 0.6078686118125916, "step": 4641 }, { "epoch": 0.741948373691361, "grad_norm": 1.4520040925137, "learning_rate": 1.4879755092274464e-05, "loss": 0.6279661059379578, "step": 4642 }, { "epoch": 0.7421082074642372, "grad_norm": 1.4833883619411223, "learning_rate": 1.4877448257922966e-05, "loss": 0.7401487827301025, "step": 4643 }, { "epoch": 0.7422680412371134, "grad_norm": 1.392056842272476, "learning_rate": 1.48751410829478e-05, "loss": 0.5965585112571716, "step": 4644 }, { "epoch": 0.7424278750099896, "grad_norm": 1.1999326231163918, "learning_rate": 1.4872833567510087e-05, "loss": 0.6176682710647583, "step": 4645 }, { "epoch": 0.7425877087828658, "grad_norm": 1.2888297816164465, "learning_rate": 1.4870525711770984e-05, "loss": 0.6468385457992554, "step": 4646 }, { "epoch": 0.742747542555742, "grad_norm": 1.4946303964971317, "learning_rate": 1.4868217515891657e-05, "loss": 0.6997361183166504, "step": 4647 }, { "epoch": 0.7429073763286183, "grad_norm": 1.2646632489441987, "learning_rate": 1.4865908980033306e-05, "loss": 0.5922179222106934, "step": 4648 }, { "epoch": 0.7430672101014945, "grad_norm": 1.2933235364679745, "learning_rate": 1.486360010435715e-05, "loss": 0.5418304204940796, "step": 4649 }, { "epoch": 0.7432270438743707, "grad_norm": 1.2395191167151671, "learning_rate": 1.4861290889024428e-05, "loss": 0.6699364185333252, "step": 4650 }, { "epoch": 0.7433868776472469, "grad_norm": 1.2536798768969288, "learning_rate": 1.4858981334196418e-05, "loss": 0.6129927635192871, "step": 4651 }, { "epoch": 0.7435467114201231, "grad_norm": 1.3781803915519129, "learning_rate": 1.48566714400344e-05, "loss": 0.5799526572227478, "step": 4652 }, { "epoch": 0.7437065451929993, "grad_norm": 1.1718521622534899, "learning_rate": 1.4854361206699696e-05, "loss": 0.4867413640022278, "step": 4653 }, { "epoch": 0.7438663789658755, "grad_norm": 1.5101090901829082, "learning_rate": 1.4852050634353641e-05, "loss": 0.5962605476379395, "step": 4654 }, { "epoch": 0.7440262127387517, "grad_norm": 1.3477763658254314, "learning_rate": 1.4849739723157597e-05, "loss": 0.6777647137641907, "step": 4655 }, { "epoch": 0.7441860465116279, "grad_norm": 1.3980720379107316, "learning_rate": 1.4847428473272954e-05, "loss": 0.7486779689788818, "step": 4656 }, { "epoch": 0.7443458802845041, "grad_norm": 1.2842459336058067, "learning_rate": 1.4845116884861115e-05, "loss": 0.6682512760162354, "step": 4657 }, { "epoch": 0.7445057140573803, "grad_norm": 1.386810651047145, "learning_rate": 1.4842804958083519e-05, "loss": 0.5561637878417969, "step": 4658 }, { "epoch": 0.7446655478302565, "grad_norm": 1.2439244353921606, "learning_rate": 1.484049269310162e-05, "loss": 0.5963892936706543, "step": 4659 }, { "epoch": 0.7448253816031327, "grad_norm": 1.3229199968944312, "learning_rate": 1.4838180090076897e-05, "loss": 0.6897656321525574, "step": 4660 }, { "epoch": 0.7449852153760089, "grad_norm": 1.2219565066120976, "learning_rate": 1.4835867149170857e-05, "loss": 0.6469798684120178, "step": 4661 }, { "epoch": 0.7451450491488851, "grad_norm": 1.443850121699617, "learning_rate": 1.4833553870545026e-05, "loss": 0.6456371545791626, "step": 4662 }, { "epoch": 0.7453048829217613, "grad_norm": 1.2653960130120816, "learning_rate": 1.4831240254360957e-05, "loss": 0.655066967010498, "step": 4663 }, { "epoch": 0.7454647166946375, "grad_norm": 1.2616569627139256, "learning_rate": 1.4828926300780221e-05, "loss": 0.5722195506095886, "step": 4664 }, { "epoch": 0.7456245504675137, "grad_norm": 1.354932041504804, "learning_rate": 1.482661200996442e-05, "loss": 0.565483808517456, "step": 4665 }, { "epoch": 0.74578438424039, "grad_norm": 1.3384347682975426, "learning_rate": 1.4824297382075175e-05, "loss": 0.6050620079040527, "step": 4666 }, { "epoch": 0.7459442180132662, "grad_norm": 1.4563618536219691, "learning_rate": 1.4821982417274128e-05, "loss": 0.6746129989624023, "step": 4667 }, { "epoch": 0.7461040517861424, "grad_norm": 1.329515684131771, "learning_rate": 1.4819667115722954e-05, "loss": 0.6754982471466064, "step": 4668 }, { "epoch": 0.7462638855590186, "grad_norm": 1.2499915489868707, "learning_rate": 1.481735147758334e-05, "loss": 0.6828760504722595, "step": 4669 }, { "epoch": 0.7464237193318948, "grad_norm": 1.2144026885004184, "learning_rate": 1.4815035503017003e-05, "loss": 0.6754061579704285, "step": 4670 }, { "epoch": 0.7465835531047711, "grad_norm": 1.110579547680476, "learning_rate": 1.4812719192185686e-05, "loss": 0.5341143608093262, "step": 4671 }, { "epoch": 0.7467433868776473, "grad_norm": 1.4312838566428967, "learning_rate": 1.4810402545251148e-05, "loss": 0.6678611040115356, "step": 4672 }, { "epoch": 0.7469032206505235, "grad_norm": 1.2648729023345988, "learning_rate": 1.4808085562375176e-05, "loss": 0.5878967046737671, "step": 4673 }, { "epoch": 0.7470630544233997, "grad_norm": 1.3046977283094225, "learning_rate": 1.4805768243719582e-05, "loss": 0.6268357038497925, "step": 4674 }, { "epoch": 0.7472228881962759, "grad_norm": 1.2567939568326956, "learning_rate": 1.4803450589446198e-05, "loss": 0.6085100173950195, "step": 4675 }, { "epoch": 0.7473827219691521, "grad_norm": 1.1786612477167842, "learning_rate": 1.4801132599716883e-05, "loss": 0.5739302635192871, "step": 4676 }, { "epoch": 0.7475425557420283, "grad_norm": 1.5920931591883745, "learning_rate": 1.4798814274693511e-05, "loss": 0.6827660799026489, "step": 4677 }, { "epoch": 0.7477023895149045, "grad_norm": 1.109380782091643, "learning_rate": 1.4796495614537991e-05, "loss": 0.5500270128250122, "step": 4678 }, { "epoch": 0.7478622232877807, "grad_norm": 1.4098312729195022, "learning_rate": 1.4794176619412249e-05, "loss": 0.8362960815429688, "step": 4679 }, { "epoch": 0.7480220570606569, "grad_norm": 1.0527364678150077, "learning_rate": 1.4791857289478233e-05, "loss": 0.4538796544075012, "step": 4680 }, { "epoch": 0.7481818908335331, "grad_norm": 1.5641567219883286, "learning_rate": 1.478953762489792e-05, "loss": 0.7135646343231201, "step": 4681 }, { "epoch": 0.7483417246064094, "grad_norm": 1.386965633042483, "learning_rate": 1.4787217625833306e-05, "loss": 0.6802133321762085, "step": 4682 }, { "epoch": 0.7485015583792856, "grad_norm": 1.3541437037254624, "learning_rate": 1.4784897292446412e-05, "loss": 0.6120431423187256, "step": 4683 }, { "epoch": 0.7486613921521618, "grad_norm": 1.5929961602718725, "learning_rate": 1.478257662489928e-05, "loss": 0.7508255243301392, "step": 4684 }, { "epoch": 0.748821225925038, "grad_norm": 1.2914889050022424, "learning_rate": 1.4780255623353977e-05, "loss": 0.6352211833000183, "step": 4685 }, { "epoch": 0.7489810596979142, "grad_norm": 1.451810181107973, "learning_rate": 1.4777934287972597e-05, "loss": 0.6820038557052612, "step": 4686 }, { "epoch": 0.7491408934707904, "grad_norm": 1.5042741655870677, "learning_rate": 1.477561261891725e-05, "loss": 0.6062050461769104, "step": 4687 }, { "epoch": 0.7493007272436666, "grad_norm": 1.4687318522445894, "learning_rate": 1.4773290616350076e-05, "loss": 0.8918746709823608, "step": 4688 }, { "epoch": 0.7494605610165428, "grad_norm": 1.484656130542062, "learning_rate": 1.4770968280433233e-05, "loss": 0.7810240983963013, "step": 4689 }, { "epoch": 0.749620394789419, "grad_norm": 1.3353469026063542, "learning_rate": 1.4768645611328907e-05, "loss": 0.6354573965072632, "step": 4690 }, { "epoch": 0.7497802285622952, "grad_norm": 1.3219390418903656, "learning_rate": 1.4766322609199304e-05, "loss": 0.5579347610473633, "step": 4691 }, { "epoch": 0.7499400623351714, "grad_norm": 1.4862441166591296, "learning_rate": 1.476399927420665e-05, "loss": 0.6683955192565918, "step": 4692 }, { "epoch": 0.7500998961080476, "grad_norm": 1.232558851554173, "learning_rate": 1.4761675606513208e-05, "loss": 0.5423543453216553, "step": 4693 }, { "epoch": 0.7502597298809238, "grad_norm": 1.2205669949733866, "learning_rate": 1.4759351606281248e-05, "loss": 0.666401207447052, "step": 4694 }, { "epoch": 0.7504195636538, "grad_norm": 1.0958543185912126, "learning_rate": 1.475702727367307e-05, "loss": 0.5265550017356873, "step": 4695 }, { "epoch": 0.7505793974266762, "grad_norm": 1.2700875997793863, "learning_rate": 1.4754702608851e-05, "loss": 0.6598383784294128, "step": 4696 }, { "epoch": 0.7507392311995524, "grad_norm": 1.4197747552899678, "learning_rate": 1.475237761197738e-05, "loss": 0.5612776279449463, "step": 4697 }, { "epoch": 0.7508990649724286, "grad_norm": 1.5018817292262094, "learning_rate": 1.4750052283214585e-05, "loss": 0.6780356764793396, "step": 4698 }, { "epoch": 0.7510588987453048, "grad_norm": 1.2762749480071405, "learning_rate": 1.4747726622724998e-05, "loss": 0.7178009748458862, "step": 4699 }, { "epoch": 0.751218732518181, "grad_norm": 1.313637612957301, "learning_rate": 1.4745400630671047e-05, "loss": 0.5818676948547363, "step": 4700 }, { "epoch": 0.7513785662910573, "grad_norm": 1.2768710453290444, "learning_rate": 1.4743074307215169e-05, "loss": 0.5777993202209473, "step": 4701 }, { "epoch": 0.7515384000639335, "grad_norm": 4.038447778025495, "learning_rate": 1.4740747652519818e-05, "loss": 0.6566411852836609, "step": 4702 }, { "epoch": 0.7516982338368097, "grad_norm": 1.3494464529942645, "learning_rate": 1.4738420666747486e-05, "loss": 0.6129517555236816, "step": 4703 }, { "epoch": 0.7518580676096859, "grad_norm": 1.2974027867251008, "learning_rate": 1.4736093350060676e-05, "loss": 0.5357733964920044, "step": 4704 }, { "epoch": 0.7520179013825621, "grad_norm": 1.1304592968178775, "learning_rate": 1.473376570262193e-05, "loss": 0.6609746217727661, "step": 4705 }, { "epoch": 0.7521777351554384, "grad_norm": 1.31651629839985, "learning_rate": 1.4731437724593791e-05, "loss": 0.6901361346244812, "step": 4706 }, { "epoch": 0.7523375689283146, "grad_norm": 1.3515619943529174, "learning_rate": 1.4729109416138843e-05, "loss": 0.7087622284889221, "step": 4707 }, { "epoch": 0.7524974027011908, "grad_norm": 1.4432890147427364, "learning_rate": 1.4726780777419685e-05, "loss": 0.6781965494155884, "step": 4708 }, { "epoch": 0.752657236474067, "grad_norm": 1.5999433003798464, "learning_rate": 1.4724451808598938e-05, "loss": 0.7092052698135376, "step": 4709 }, { "epoch": 0.7528170702469432, "grad_norm": 1.372825151941591, "learning_rate": 1.4722122509839256e-05, "loss": 0.6617897748947144, "step": 4710 }, { "epoch": 0.7529769040198194, "grad_norm": 1.706294080833636, "learning_rate": 1.4719792881303305e-05, "loss": 0.638319730758667, "step": 4711 }, { "epoch": 0.7531367377926956, "grad_norm": 1.2201095040877536, "learning_rate": 1.4717462923153778e-05, "loss": 0.6654485464096069, "step": 4712 }, { "epoch": 0.7532965715655718, "grad_norm": 1.2284278688643973, "learning_rate": 1.471513263555339e-05, "loss": 0.5967960357666016, "step": 4713 }, { "epoch": 0.753456405338448, "grad_norm": 1.2066725692359415, "learning_rate": 1.471280201866488e-05, "loss": 0.7219761610031128, "step": 4714 }, { "epoch": 0.7536162391113242, "grad_norm": 1.366886581813106, "learning_rate": 1.4710471072651014e-05, "loss": 0.5076046586036682, "step": 4715 }, { "epoch": 0.7537760728842005, "grad_norm": 1.1959243768340575, "learning_rate": 1.470813979767457e-05, "loss": 0.5075229406356812, "step": 4716 }, { "epoch": 0.7539359066570767, "grad_norm": 1.0488076497238092, "learning_rate": 1.4705808193898363e-05, "loss": 0.4519113004207611, "step": 4717 }, { "epoch": 0.7540957404299529, "grad_norm": 1.377154652770036, "learning_rate": 1.4703476261485224e-05, "loss": 0.5970788598060608, "step": 4718 }, { "epoch": 0.7542555742028291, "grad_norm": 1.3648332177216997, "learning_rate": 1.4701144000597996e-05, "loss": 0.7117664813995361, "step": 4719 }, { "epoch": 0.7544154079757053, "grad_norm": 1.4752678347332777, "learning_rate": 1.4698811411399568e-05, "loss": 0.6118054389953613, "step": 4720 }, { "epoch": 0.7545752417485815, "grad_norm": 1.2708016451337423, "learning_rate": 1.4696478494052834e-05, "loss": 0.6341784596443176, "step": 4721 }, { "epoch": 0.7547350755214577, "grad_norm": 1.4164975736795375, "learning_rate": 1.4694145248720719e-05, "loss": 0.805222749710083, "step": 4722 }, { "epoch": 0.7548949092943339, "grad_norm": 1.323729531902444, "learning_rate": 1.469181167556617e-05, "loss": 0.5930195450782776, "step": 4723 }, { "epoch": 0.7550547430672101, "grad_norm": 1.4017377598377037, "learning_rate": 1.468947777475215e-05, "loss": 0.6587049961090088, "step": 4724 }, { "epoch": 0.7552145768400863, "grad_norm": 1.293631741717334, "learning_rate": 1.4687143546441654e-05, "loss": 0.6041900515556335, "step": 4725 }, { "epoch": 0.7553744106129625, "grad_norm": 1.129999778674182, "learning_rate": 1.4684808990797693e-05, "loss": 0.5265984535217285, "step": 4726 }, { "epoch": 0.7555342443858387, "grad_norm": 1.2141662225269392, "learning_rate": 1.4682474107983314e-05, "loss": 0.48537716269493103, "step": 4727 }, { "epoch": 0.7556940781587149, "grad_norm": 1.436968527509995, "learning_rate": 1.4680138898161564e-05, "loss": 0.6651159524917603, "step": 4728 }, { "epoch": 0.7558539119315911, "grad_norm": 1.3801322850902702, "learning_rate": 1.467780336149553e-05, "loss": 0.6429954767227173, "step": 4729 }, { "epoch": 0.7560137457044673, "grad_norm": 1.447569044401504, "learning_rate": 1.4675467498148321e-05, "loss": 0.5666571855545044, "step": 4730 }, { "epoch": 0.7561735794773435, "grad_norm": 1.180262591192821, "learning_rate": 1.4673131308283064e-05, "loss": 0.5867894887924194, "step": 4731 }, { "epoch": 0.7563334132502197, "grad_norm": 1.2716429490313543, "learning_rate": 1.467079479206291e-05, "loss": 0.6570907831192017, "step": 4732 }, { "epoch": 0.756493247023096, "grad_norm": 1.2523861148562605, "learning_rate": 1.4668457949651034e-05, "loss": 0.6324016451835632, "step": 4733 }, { "epoch": 0.7566530807959722, "grad_norm": 1.3026352928421827, "learning_rate": 1.4666120781210628e-05, "loss": 0.639248788356781, "step": 4734 }, { "epoch": 0.7568129145688484, "grad_norm": 1.3352868796994675, "learning_rate": 1.4663783286904918e-05, "loss": 0.648536741733551, "step": 4735 }, { "epoch": 0.7569727483417246, "grad_norm": 1.232857182547504, "learning_rate": 1.466144546689714e-05, "loss": 0.7306492328643799, "step": 4736 }, { "epoch": 0.7571325821146008, "grad_norm": 1.3546534931088465, "learning_rate": 1.4659107321350565e-05, "loss": 0.5953157544136047, "step": 4737 }, { "epoch": 0.757292415887477, "grad_norm": 1.2952048494037438, "learning_rate": 1.4656768850428476e-05, "loss": 0.5911526679992676, "step": 4738 }, { "epoch": 0.7574522496603532, "grad_norm": 1.437068431772542, "learning_rate": 1.4654430054294184e-05, "loss": 0.6448156237602234, "step": 4739 }, { "epoch": 0.7576120834332294, "grad_norm": 1.4034756429450832, "learning_rate": 1.4652090933111026e-05, "loss": 0.6501081585884094, "step": 4740 }, { "epoch": 0.7577719172061057, "grad_norm": 1.1165779381224943, "learning_rate": 1.4649751487042357e-05, "loss": 0.5031300783157349, "step": 4741 }, { "epoch": 0.7579317509789819, "grad_norm": 1.4877470715516519, "learning_rate": 1.4647411716251555e-05, "loss": 0.678864061832428, "step": 4742 }, { "epoch": 0.7580915847518581, "grad_norm": 1.7353265014035406, "learning_rate": 1.4645071620902019e-05, "loss": 0.7321866154670715, "step": 4743 }, { "epoch": 0.7582514185247343, "grad_norm": 1.1195267341520954, "learning_rate": 1.4642731201157172e-05, "loss": 0.5288077592849731, "step": 4744 }, { "epoch": 0.7584112522976105, "grad_norm": 1.190034588697298, "learning_rate": 1.4640390457180467e-05, "loss": 0.5650748014450073, "step": 4745 }, { "epoch": 0.7585710860704867, "grad_norm": 1.1518673637122763, "learning_rate": 1.4638049389135367e-05, "loss": 0.5694210529327393, "step": 4746 }, { "epoch": 0.7587309198433629, "grad_norm": 1.3678111113775788, "learning_rate": 1.4635707997185367e-05, "loss": 0.6610310077667236, "step": 4747 }, { "epoch": 0.7588907536162391, "grad_norm": 1.4551458754924895, "learning_rate": 1.463336628149398e-05, "loss": 0.7439156770706177, "step": 4748 }, { "epoch": 0.7590505873891153, "grad_norm": 1.269148760559421, "learning_rate": 1.4631024242224743e-05, "loss": 0.7210274934768677, "step": 4749 }, { "epoch": 0.7592104211619916, "grad_norm": 1.4020934696167093, "learning_rate": 1.462868187954122e-05, "loss": 0.7679996490478516, "step": 4750 }, { "epoch": 0.7593702549348678, "grad_norm": 1.245519778635249, "learning_rate": 1.4626339193606986e-05, "loss": 0.6651276350021362, "step": 4751 }, { "epoch": 0.759530088707744, "grad_norm": 1.3440540938650336, "learning_rate": 1.4623996184585654e-05, "loss": 0.5892314910888672, "step": 4752 }, { "epoch": 0.7596899224806202, "grad_norm": 1.1673888877900709, "learning_rate": 1.4621652852640847e-05, "loss": 0.4788740277290344, "step": 4753 }, { "epoch": 0.7598497562534964, "grad_norm": 1.478775268307908, "learning_rate": 1.4619309197936211e-05, "loss": 0.7352867126464844, "step": 4754 }, { "epoch": 0.7600095900263726, "grad_norm": 1.7946408685074677, "learning_rate": 1.4616965220635427e-05, "loss": 0.6948201656341553, "step": 4755 }, { "epoch": 0.7601694237992488, "grad_norm": 1.4897296342547985, "learning_rate": 1.4614620920902181e-05, "loss": 0.6054712533950806, "step": 4756 }, { "epoch": 0.760329257572125, "grad_norm": 1.3204891766467781, "learning_rate": 1.4612276298900199e-05, "loss": 0.6233370304107666, "step": 4757 }, { "epoch": 0.7604890913450012, "grad_norm": 1.1939959487370466, "learning_rate": 1.4609931354793215e-05, "loss": 0.611056387424469, "step": 4758 }, { "epoch": 0.7606489251178774, "grad_norm": 1.4735423462294936, "learning_rate": 1.4607586088744994e-05, "loss": 0.7565651535987854, "step": 4759 }, { "epoch": 0.7608087588907536, "grad_norm": 1.3038772563172094, "learning_rate": 1.4605240500919322e-05, "loss": 0.7845966815948486, "step": 4760 }, { "epoch": 0.7609685926636298, "grad_norm": 1.1750299888273366, "learning_rate": 1.4602894591480006e-05, "loss": 0.6359193325042725, "step": 4761 }, { "epoch": 0.761128426436506, "grad_norm": 1.209716231400917, "learning_rate": 1.4600548360590875e-05, "loss": 0.6350536942481995, "step": 4762 }, { "epoch": 0.7612882602093822, "grad_norm": 1.4577589063739458, "learning_rate": 1.4598201808415781e-05, "loss": 0.7150576114654541, "step": 4763 }, { "epoch": 0.7614480939822584, "grad_norm": 1.2573260709244323, "learning_rate": 1.4595854935118602e-05, "loss": 0.545888364315033, "step": 4764 }, { "epoch": 0.7616079277551346, "grad_norm": 1.417038693767017, "learning_rate": 1.4593507740863233e-05, "loss": 0.6258035898208618, "step": 4765 }, { "epoch": 0.7617677615280108, "grad_norm": 1.2201897903589496, "learning_rate": 1.4591160225813592e-05, "loss": 0.6323337554931641, "step": 4766 }, { "epoch": 0.761927595300887, "grad_norm": 1.1305980209943847, "learning_rate": 1.4588812390133624e-05, "loss": 0.613849937915802, "step": 4767 }, { "epoch": 0.7620874290737633, "grad_norm": 1.1488317961513328, "learning_rate": 1.458646423398729e-05, "loss": 0.5582201480865479, "step": 4768 }, { "epoch": 0.7622472628466395, "grad_norm": 1.374279466486125, "learning_rate": 1.4584115757538581e-05, "loss": 0.5587971210479736, "step": 4769 }, { "epoch": 0.7624070966195157, "grad_norm": 1.3997602460424023, "learning_rate": 1.4581766960951506e-05, "loss": 0.43397456407546997, "step": 4770 }, { "epoch": 0.7625669303923919, "grad_norm": 1.272576700065844, "learning_rate": 1.4579417844390094e-05, "loss": 0.4586319327354431, "step": 4771 }, { "epoch": 0.7627267641652681, "grad_norm": 1.2442865410527664, "learning_rate": 1.45770684080184e-05, "loss": 0.5624072551727295, "step": 4772 }, { "epoch": 0.7628865979381443, "grad_norm": 1.4685267991048976, "learning_rate": 1.4574718652000501e-05, "loss": 0.5292882919311523, "step": 4773 }, { "epoch": 0.7630464317110205, "grad_norm": 1.4093585824425676, "learning_rate": 1.4572368576500496e-05, "loss": 0.6239069700241089, "step": 4774 }, { "epoch": 0.7632062654838967, "grad_norm": 1.1908544826400156, "learning_rate": 1.4570018181682504e-05, "loss": 0.5301902294158936, "step": 4775 }, { "epoch": 0.7633660992567729, "grad_norm": 1.1546277492181027, "learning_rate": 1.456766746771067e-05, "loss": 0.5753846168518066, "step": 4776 }, { "epoch": 0.7635259330296492, "grad_norm": 1.3042333196742566, "learning_rate": 1.4565316434749157e-05, "loss": 0.5487949848175049, "step": 4777 }, { "epoch": 0.7636857668025254, "grad_norm": 2.146053058980507, "learning_rate": 1.4562965082962154e-05, "loss": 0.7524385452270508, "step": 4778 }, { "epoch": 0.7638456005754016, "grad_norm": 1.2550357913581507, "learning_rate": 1.4560613412513873e-05, "loss": 0.726093053817749, "step": 4779 }, { "epoch": 0.7640054343482778, "grad_norm": 1.372730432779656, "learning_rate": 1.4558261423568545e-05, "loss": 0.6554067730903625, "step": 4780 }, { "epoch": 0.764165268121154, "grad_norm": 1.148689273628233, "learning_rate": 1.4555909116290422e-05, "loss": 0.6165506839752197, "step": 4781 }, { "epoch": 0.7643251018940302, "grad_norm": 1.3735742704244107, "learning_rate": 1.4553556490843787e-05, "loss": 0.6523311138153076, "step": 4782 }, { "epoch": 0.7644849356669065, "grad_norm": 1.4287661679801147, "learning_rate": 1.4551203547392933e-05, "loss": 0.670332670211792, "step": 4783 }, { "epoch": 0.7646447694397827, "grad_norm": 1.0204012906361009, "learning_rate": 1.4548850286102185e-05, "loss": 0.5043455362319946, "step": 4784 }, { "epoch": 0.7648046032126589, "grad_norm": 1.303272355762763, "learning_rate": 1.4546496707135886e-05, "loss": 0.6358623504638672, "step": 4785 }, { "epoch": 0.7649644369855351, "grad_norm": 1.2156237065683155, "learning_rate": 1.4544142810658396e-05, "loss": 0.5713671445846558, "step": 4786 }, { "epoch": 0.7651242707584113, "grad_norm": 1.2125012635919867, "learning_rate": 1.4541788596834111e-05, "loss": 0.5468385219573975, "step": 4787 }, { "epoch": 0.7652841045312875, "grad_norm": 1.391666179970169, "learning_rate": 1.4539434065827436e-05, "loss": 0.7005865573883057, "step": 4788 }, { "epoch": 0.7654439383041637, "grad_norm": 1.2645042728723674, "learning_rate": 1.4537079217802804e-05, "loss": 0.5458500385284424, "step": 4789 }, { "epoch": 0.7656037720770399, "grad_norm": 1.4614787328480883, "learning_rate": 1.4534724052924669e-05, "loss": 0.589938759803772, "step": 4790 }, { "epoch": 0.7657636058499161, "grad_norm": 1.428033206700583, "learning_rate": 1.4532368571357508e-05, "loss": 0.5510170459747314, "step": 4791 }, { "epoch": 0.7659234396227923, "grad_norm": 1.2052865659896017, "learning_rate": 1.4530012773265823e-05, "loss": 0.5710453391075134, "step": 4792 }, { "epoch": 0.7660832733956685, "grad_norm": 1.2699021290493202, "learning_rate": 1.4527656658814127e-05, "loss": 0.5566431879997253, "step": 4793 }, { "epoch": 0.7662431071685447, "grad_norm": 1.0967454102261103, "learning_rate": 1.4525300228166967e-05, "loss": 0.6152739524841309, "step": 4794 }, { "epoch": 0.7664029409414209, "grad_norm": 1.4805697217910851, "learning_rate": 1.4522943481488907e-05, "loss": 0.6614764332771301, "step": 4795 }, { "epoch": 0.7665627747142971, "grad_norm": 1.265917887595751, "learning_rate": 1.4520586418944535e-05, "loss": 0.546400785446167, "step": 4796 }, { "epoch": 0.7667226084871733, "grad_norm": 1.424729860579508, "learning_rate": 1.4518229040698461e-05, "loss": 0.7478639483451843, "step": 4797 }, { "epoch": 0.7668824422600495, "grad_norm": 1.470293628967213, "learning_rate": 1.4515871346915312e-05, "loss": 0.6672810316085815, "step": 4798 }, { "epoch": 0.7670422760329257, "grad_norm": 1.2636197479102382, "learning_rate": 1.4513513337759739e-05, "loss": 0.5780946612358093, "step": 4799 }, { "epoch": 0.767202109805802, "grad_norm": 1.2659443494665186, "learning_rate": 1.4511155013396427e-05, "loss": 0.6819579601287842, "step": 4800 }, { "epoch": 0.7673619435786782, "grad_norm": 1.5348266580450367, "learning_rate": 1.4508796373990064e-05, "loss": 0.7367122173309326, "step": 4801 }, { "epoch": 0.7675217773515544, "grad_norm": 1.6980711458188837, "learning_rate": 1.4506437419705373e-05, "loss": 0.5899783372879028, "step": 4802 }, { "epoch": 0.7676816111244306, "grad_norm": 1.1877519974426445, "learning_rate": 1.4504078150707092e-05, "loss": 0.7429318428039551, "step": 4803 }, { "epoch": 0.7678414448973068, "grad_norm": 1.3243215556707562, "learning_rate": 1.4501718567159988e-05, "loss": 0.6441528797149658, "step": 4804 }, { "epoch": 0.768001278670183, "grad_norm": 1.4104837156270023, "learning_rate": 1.4499358669228842e-05, "loss": 0.5700668692588806, "step": 4805 }, { "epoch": 0.7681611124430592, "grad_norm": 1.2277057871788617, "learning_rate": 1.4496998457078463e-05, "loss": 0.6405792236328125, "step": 4806 }, { "epoch": 0.7683209462159354, "grad_norm": 1.346178931295497, "learning_rate": 1.4494637930873683e-05, "loss": 0.6914252638816833, "step": 4807 }, { "epoch": 0.7684807799888116, "grad_norm": 1.4681303775992773, "learning_rate": 1.4492277090779346e-05, "loss": 0.6028960943222046, "step": 4808 }, { "epoch": 0.7686406137616878, "grad_norm": 1.4708674641262551, "learning_rate": 1.4489915936960325e-05, "loss": 0.6732345819473267, "step": 4809 }, { "epoch": 0.768800447534564, "grad_norm": 1.3951553928323006, "learning_rate": 1.4487554469581521e-05, "loss": 0.6707208156585693, "step": 4810 }, { "epoch": 0.7689602813074402, "grad_norm": 1.2572179132981856, "learning_rate": 1.448519268880785e-05, "loss": 0.751924991607666, "step": 4811 }, { "epoch": 0.7691201150803165, "grad_norm": 1.3299567074869558, "learning_rate": 1.4482830594804245e-05, "loss": 0.6084319353103638, "step": 4812 }, { "epoch": 0.7692799488531927, "grad_norm": 1.4213293973772907, "learning_rate": 1.4480468187735666e-05, "loss": 0.6873186230659485, "step": 4813 }, { "epoch": 0.7694397826260689, "grad_norm": 1.3950600967125373, "learning_rate": 1.4478105467767101e-05, "loss": 0.5808796882629395, "step": 4814 }, { "epoch": 0.7695996163989451, "grad_norm": 1.2926047605218673, "learning_rate": 1.447574243506355e-05, "loss": 0.7147466540336609, "step": 4815 }, { "epoch": 0.7697594501718213, "grad_norm": 1.3970712336650968, "learning_rate": 1.4473379089790038e-05, "loss": 0.6206856966018677, "step": 4816 }, { "epoch": 0.7699192839446976, "grad_norm": 1.6389938511654127, "learning_rate": 1.4471015432111619e-05, "loss": 0.6876699328422546, "step": 4817 }, { "epoch": 0.7700791177175738, "grad_norm": 1.4127974652043709, "learning_rate": 1.4468651462193353e-05, "loss": 0.5826501250267029, "step": 4818 }, { "epoch": 0.77023895149045, "grad_norm": 1.2980844623078454, "learning_rate": 1.4466287180200336e-05, "loss": 0.6555550694465637, "step": 4819 }, { "epoch": 0.7703987852633262, "grad_norm": 1.4416234703453548, "learning_rate": 1.446392258629768e-05, "loss": 0.5541114211082458, "step": 4820 }, { "epoch": 0.7705586190362024, "grad_norm": 1.4670122123900178, "learning_rate": 1.4461557680650526e-05, "loss": 0.662563681602478, "step": 4821 }, { "epoch": 0.7707184528090786, "grad_norm": 1.421619360168182, "learning_rate": 1.4459192463424023e-05, "loss": 0.5796164870262146, "step": 4822 }, { "epoch": 0.7708782865819548, "grad_norm": 1.3250035012571544, "learning_rate": 1.445682693478335e-05, "loss": 0.5638300180435181, "step": 4823 }, { "epoch": 0.771038120354831, "grad_norm": 1.0713657784314508, "learning_rate": 1.4454461094893713e-05, "loss": 0.5267537832260132, "step": 4824 }, { "epoch": 0.7711979541277072, "grad_norm": 1.4000103485475535, "learning_rate": 1.4452094943920325e-05, "loss": 0.6025740504264832, "step": 4825 }, { "epoch": 0.7713577879005834, "grad_norm": 1.1102904051278424, "learning_rate": 1.444972848202844e-05, "loss": 0.6027092337608337, "step": 4826 }, { "epoch": 0.7715176216734596, "grad_norm": 1.1939151911896773, "learning_rate": 1.4447361709383312e-05, "loss": 0.6188386082649231, "step": 4827 }, { "epoch": 0.7716774554463358, "grad_norm": 1.33790148353094, "learning_rate": 1.4444994626150238e-05, "loss": 0.630632758140564, "step": 4828 }, { "epoch": 0.771837289219212, "grad_norm": 1.3223354681947963, "learning_rate": 1.444262723249452e-05, "loss": 0.6385332345962524, "step": 4829 }, { "epoch": 0.7719971229920882, "grad_norm": 1.613097204127976, "learning_rate": 1.4440259528581491e-05, "loss": 0.7240095734596252, "step": 4830 }, { "epoch": 0.7721569567649644, "grad_norm": 1.3607104832891255, "learning_rate": 1.4437891514576503e-05, "loss": 0.6386394500732422, "step": 4831 }, { "epoch": 0.7723167905378406, "grad_norm": 1.3577453737946552, "learning_rate": 1.4435523190644931e-05, "loss": 0.6249613761901855, "step": 4832 }, { "epoch": 0.7724766243107168, "grad_norm": 1.2415491586505816, "learning_rate": 1.4433154556952168e-05, "loss": 0.6296877861022949, "step": 4833 }, { "epoch": 0.772636458083593, "grad_norm": 1.24703569418831, "learning_rate": 1.4430785613663632e-05, "loss": 0.5811753273010254, "step": 4834 }, { "epoch": 0.7727962918564693, "grad_norm": 1.4605697810986882, "learning_rate": 1.4428416360944758e-05, "loss": 0.652590274810791, "step": 4835 }, { "epoch": 0.7729561256293455, "grad_norm": 1.2921607458604862, "learning_rate": 1.4426046798961015e-05, "loss": 0.7037408351898193, "step": 4836 }, { "epoch": 0.7731159594022217, "grad_norm": 1.4415078079920387, "learning_rate": 1.4423676927877876e-05, "loss": 0.5736767649650574, "step": 4837 }, { "epoch": 0.7732757931750979, "grad_norm": 1.3626134287960836, "learning_rate": 1.4421306747860851e-05, "loss": 0.5981347560882568, "step": 4838 }, { "epoch": 0.7734356269479741, "grad_norm": 1.1276864771871113, "learning_rate": 1.4418936259075458e-05, "loss": 0.600222110748291, "step": 4839 }, { "epoch": 0.7735954607208503, "grad_norm": 1.1861649245320127, "learning_rate": 1.4416565461687247e-05, "loss": 0.6099202036857605, "step": 4840 }, { "epoch": 0.7737552944937265, "grad_norm": 1.1823259399748887, "learning_rate": 1.4414194355861787e-05, "loss": 0.642790675163269, "step": 4841 }, { "epoch": 0.7739151282666027, "grad_norm": 1.2422105950822793, "learning_rate": 1.4411822941764668e-05, "loss": 0.5455999374389648, "step": 4842 }, { "epoch": 0.7740749620394789, "grad_norm": 1.120073787640773, "learning_rate": 1.4409451219561502e-05, "loss": 0.5911128520965576, "step": 4843 }, { "epoch": 0.7742347958123551, "grad_norm": 1.2998614207430663, "learning_rate": 1.4407079189417918e-05, "loss": 0.6385369896888733, "step": 4844 }, { "epoch": 0.7743946295852313, "grad_norm": 1.3668512464690186, "learning_rate": 1.440470685149957e-05, "loss": 0.6723437309265137, "step": 4845 }, { "epoch": 0.7745544633581075, "grad_norm": 1.2291947583789453, "learning_rate": 1.4402334205972137e-05, "loss": 0.5486667156219482, "step": 4846 }, { "epoch": 0.7747142971309838, "grad_norm": 1.5366123429359457, "learning_rate": 1.4399961253001316e-05, "loss": 0.7916553020477295, "step": 4847 }, { "epoch": 0.77487413090386, "grad_norm": 1.2829329574836725, "learning_rate": 1.4397587992752825e-05, "loss": 0.5941190719604492, "step": 4848 }, { "epoch": 0.7750339646767362, "grad_norm": 1.1905184699613405, "learning_rate": 1.4395214425392402e-05, "loss": 0.6379973888397217, "step": 4849 }, { "epoch": 0.7751937984496124, "grad_norm": 1.4169524404881977, "learning_rate": 1.4392840551085805e-05, "loss": 0.7485802173614502, "step": 4850 }, { "epoch": 0.7753536322224887, "grad_norm": 1.2807521454678248, "learning_rate": 1.4390466369998825e-05, "loss": 0.6279617547988892, "step": 4851 }, { "epoch": 0.7755134659953649, "grad_norm": 1.1054588308706486, "learning_rate": 1.4388091882297266e-05, "loss": 0.6425818204879761, "step": 4852 }, { "epoch": 0.7756732997682411, "grad_norm": 1.2924216696104662, "learning_rate": 1.438571708814695e-05, "loss": 0.6025904417037964, "step": 4853 }, { "epoch": 0.7758331335411173, "grad_norm": 1.2281746908351951, "learning_rate": 1.4383341987713726e-05, "loss": 0.641374945640564, "step": 4854 }, { "epoch": 0.7759929673139935, "grad_norm": 1.1890401083285167, "learning_rate": 1.4380966581163458e-05, "loss": 0.6312657594680786, "step": 4855 }, { "epoch": 0.7761528010868697, "grad_norm": 1.2972624368471566, "learning_rate": 1.4378590868662043e-05, "loss": 0.539751410484314, "step": 4856 }, { "epoch": 0.7763126348597459, "grad_norm": 1.331694273284297, "learning_rate": 1.4376214850375388e-05, "loss": 0.7636700868606567, "step": 4857 }, { "epoch": 0.7764724686326221, "grad_norm": 1.2172620508976688, "learning_rate": 1.4373838526469425e-05, "loss": 0.6069842576980591, "step": 4858 }, { "epoch": 0.7766323024054983, "grad_norm": 1.3792010177876175, "learning_rate": 1.4371461897110112e-05, "loss": 0.5840214490890503, "step": 4859 }, { "epoch": 0.7767921361783745, "grad_norm": 1.4447323276814044, "learning_rate": 1.436908496246342e-05, "loss": 0.6126359701156616, "step": 4860 }, { "epoch": 0.7769519699512507, "grad_norm": 1.3145069132666833, "learning_rate": 1.436670772269535e-05, "loss": 0.5353069305419922, "step": 4861 }, { "epoch": 0.7771118037241269, "grad_norm": 1.3261763716637434, "learning_rate": 1.4364330177971916e-05, "loss": 0.5388559103012085, "step": 4862 }, { "epoch": 0.7772716374970031, "grad_norm": 1.5029281217840893, "learning_rate": 1.4361952328459161e-05, "loss": 0.6051865220069885, "step": 4863 }, { "epoch": 0.7774314712698793, "grad_norm": 1.1505675394321713, "learning_rate": 1.4359574174323146e-05, "loss": 0.5482573509216309, "step": 4864 }, { "epoch": 0.7775913050427555, "grad_norm": 1.3792809136265993, "learning_rate": 1.4357195715729945e-05, "loss": 0.6252598762512207, "step": 4865 }, { "epoch": 0.7777511388156317, "grad_norm": 1.3273189199970532, "learning_rate": 1.435481695284567e-05, "loss": 0.5606298446655273, "step": 4866 }, { "epoch": 0.7779109725885079, "grad_norm": 2.5688785684414532, "learning_rate": 1.4352437885836441e-05, "loss": 0.5360206365585327, "step": 4867 }, { "epoch": 0.7780708063613841, "grad_norm": 1.3542860806596084, "learning_rate": 1.4350058514868403e-05, "loss": 0.6808372735977173, "step": 4868 }, { "epoch": 0.7782306401342604, "grad_norm": 1.4989739840976557, "learning_rate": 1.4347678840107726e-05, "loss": 0.617203414440155, "step": 4869 }, { "epoch": 0.7783904739071366, "grad_norm": 1.1975525858565261, "learning_rate": 1.4345298861720594e-05, "loss": 0.5570329427719116, "step": 4870 }, { "epoch": 0.7785503076800128, "grad_norm": 1.4247554279648833, "learning_rate": 1.4342918579873222e-05, "loss": 0.6984419822692871, "step": 4871 }, { "epoch": 0.778710141452889, "grad_norm": 1.499683799327027, "learning_rate": 1.4340537994731836e-05, "loss": 0.6015500426292419, "step": 4872 }, { "epoch": 0.7788699752257652, "grad_norm": 1.3012005022729873, "learning_rate": 1.4338157106462689e-05, "loss": 0.6591644287109375, "step": 4873 }, { "epoch": 0.7790298089986414, "grad_norm": 1.4719938722071764, "learning_rate": 1.4335775915232051e-05, "loss": 0.6905032396316528, "step": 4874 }, { "epoch": 0.7791896427715176, "grad_norm": 1.1747964655113017, "learning_rate": 1.4333394421206221e-05, "loss": 0.7001169919967651, "step": 4875 }, { "epoch": 0.7793494765443938, "grad_norm": 1.32622374897376, "learning_rate": 1.4331012624551512e-05, "loss": 0.708076536655426, "step": 4876 }, { "epoch": 0.77950931031727, "grad_norm": 1.24446735804222, "learning_rate": 1.4328630525434257e-05, "loss": 0.6016237735748291, "step": 4877 }, { "epoch": 0.7796691440901462, "grad_norm": 1.4435729725318762, "learning_rate": 1.4326248124020819e-05, "loss": 0.6265310049057007, "step": 4878 }, { "epoch": 0.7798289778630224, "grad_norm": 1.619356204560118, "learning_rate": 1.432386542047757e-05, "loss": 0.5748027563095093, "step": 4879 }, { "epoch": 0.7799888116358986, "grad_norm": 1.1896363260541487, "learning_rate": 1.4321482414970913e-05, "loss": 0.6099512577056885, "step": 4880 }, { "epoch": 0.7801486454087748, "grad_norm": 1.3199140269493705, "learning_rate": 1.4319099107667271e-05, "loss": 0.6276883482933044, "step": 4881 }, { "epoch": 0.7803084791816511, "grad_norm": 1.350258739273747, "learning_rate": 1.4316715498733083e-05, "loss": 0.7013257741928101, "step": 4882 }, { "epoch": 0.7804683129545273, "grad_norm": 1.3624058663559997, "learning_rate": 1.4314331588334814e-05, "loss": 0.6956276893615723, "step": 4883 }, { "epoch": 0.7806281467274035, "grad_norm": 1.272738074148238, "learning_rate": 1.4311947376638942e-05, "loss": 0.5213027000427246, "step": 4884 }, { "epoch": 0.7807879805002798, "grad_norm": 1.1487054410153386, "learning_rate": 1.430956286381198e-05, "loss": 0.5611001253128052, "step": 4885 }, { "epoch": 0.780947814273156, "grad_norm": 1.208273499668402, "learning_rate": 1.4307178050020447e-05, "loss": 0.6339571475982666, "step": 4886 }, { "epoch": 0.7811076480460322, "grad_norm": 1.4901383683305833, "learning_rate": 1.4304792935430893e-05, "loss": 0.5748132467269897, "step": 4887 }, { "epoch": 0.7812674818189084, "grad_norm": 1.2450979815029466, "learning_rate": 1.4302407520209887e-05, "loss": 0.6028537750244141, "step": 4888 }, { "epoch": 0.7814273155917846, "grad_norm": 1.3513195791941568, "learning_rate": 1.4300021804524015e-05, "loss": 0.6176056861877441, "step": 4889 }, { "epoch": 0.7815871493646608, "grad_norm": 1.4035468959490096, "learning_rate": 1.4297635788539887e-05, "loss": 0.6322267055511475, "step": 4890 }, { "epoch": 0.781746983137537, "grad_norm": 1.3834261286018665, "learning_rate": 1.429524947242414e-05, "loss": 0.7933801412582397, "step": 4891 }, { "epoch": 0.7819068169104132, "grad_norm": 1.7886413219864472, "learning_rate": 1.4292862856343418e-05, "loss": 0.5840978026390076, "step": 4892 }, { "epoch": 0.7820666506832894, "grad_norm": 1.3851666810241545, "learning_rate": 1.42904759404644e-05, "loss": 0.6166620850563049, "step": 4893 }, { "epoch": 0.7822264844561656, "grad_norm": 1.3448854865413262, "learning_rate": 1.4288088724953773e-05, "loss": 0.5833755135536194, "step": 4894 }, { "epoch": 0.7823863182290418, "grad_norm": 1.277651571879306, "learning_rate": 1.4285701209978257e-05, "loss": 0.5195285081863403, "step": 4895 }, { "epoch": 0.782546152001918, "grad_norm": 1.400838327362106, "learning_rate": 1.4283313395704587e-05, "loss": 0.6961756944656372, "step": 4896 }, { "epoch": 0.7827059857747942, "grad_norm": 1.5392786830707093, "learning_rate": 1.4280925282299516e-05, "loss": 0.7054192423820496, "step": 4897 }, { "epoch": 0.7828658195476704, "grad_norm": 1.1401807805028896, "learning_rate": 1.4278536869929826e-05, "loss": 0.5393038392066956, "step": 4898 }, { "epoch": 0.7830256533205466, "grad_norm": 1.3768971819689675, "learning_rate": 1.4276148158762313e-05, "loss": 0.6740785837173462, "step": 4899 }, { "epoch": 0.7831854870934228, "grad_norm": 1.3260219101013229, "learning_rate": 1.4273759148963793e-05, "loss": 0.5363426208496094, "step": 4900 }, { "epoch": 0.783345320866299, "grad_norm": 1.4796841997341181, "learning_rate": 1.4271369840701112e-05, "loss": 0.585832953453064, "step": 4901 }, { "epoch": 0.7835051546391752, "grad_norm": 1.2489417177458328, "learning_rate": 1.4268980234141127e-05, "loss": 0.4999465048313141, "step": 4902 }, { "epoch": 0.7836649884120515, "grad_norm": 1.1428587916882569, "learning_rate": 1.4266590329450725e-05, "loss": 0.5525690317153931, "step": 4903 }, { "epoch": 0.7838248221849277, "grad_norm": 1.5082728413024844, "learning_rate": 1.4264200126796798e-05, "loss": 0.6721052527427673, "step": 4904 }, { "epoch": 0.7839846559578039, "grad_norm": 1.3146794146791692, "learning_rate": 1.4261809626346278e-05, "loss": 0.649782121181488, "step": 4905 }, { "epoch": 0.7841444897306801, "grad_norm": 1.262237811216173, "learning_rate": 1.4259418828266104e-05, "loss": 0.5418446063995361, "step": 4906 }, { "epoch": 0.7843043235035563, "grad_norm": 1.3818129672914572, "learning_rate": 1.4257027732723247e-05, "loss": 0.6662693023681641, "step": 4907 }, { "epoch": 0.7844641572764325, "grad_norm": 1.2520978823301532, "learning_rate": 1.4254636339884687e-05, "loss": 0.6435825824737549, "step": 4908 }, { "epoch": 0.7846239910493087, "grad_norm": 1.148305184415434, "learning_rate": 1.4252244649917431e-05, "loss": 0.6094300746917725, "step": 4909 }, { "epoch": 0.7847838248221849, "grad_norm": 1.1845676582416456, "learning_rate": 1.4249852662988508e-05, "loss": 0.43373748660087585, "step": 4910 }, { "epoch": 0.7849436585950611, "grad_norm": 1.2306669700943436, "learning_rate": 1.4247460379264966e-05, "loss": 0.688266396522522, "step": 4911 }, { "epoch": 0.7851034923679373, "grad_norm": 1.4752186955256952, "learning_rate": 1.4245067798913871e-05, "loss": 0.6948059797286987, "step": 4912 }, { "epoch": 0.7852633261408135, "grad_norm": 1.3461588201552028, "learning_rate": 1.4242674922102317e-05, "loss": 0.6511755585670471, "step": 4913 }, { "epoch": 0.7854231599136897, "grad_norm": 1.2632709660498616, "learning_rate": 1.424028174899741e-05, "loss": 0.5347593426704407, "step": 4914 }, { "epoch": 0.7855829936865659, "grad_norm": 1.6855057121838417, "learning_rate": 1.4237888279766284e-05, "loss": 0.7610535621643066, "step": 4915 }, { "epoch": 0.7857428274594421, "grad_norm": 1.3995622183414658, "learning_rate": 1.4235494514576083e-05, "loss": 0.6888278126716614, "step": 4916 }, { "epoch": 0.7859026612323183, "grad_norm": 1.4294261043337513, "learning_rate": 1.423310045359399e-05, "loss": 0.6665587425231934, "step": 4917 }, { "epoch": 0.7860624950051947, "grad_norm": 1.3151769712110024, "learning_rate": 1.4230706096987187e-05, "loss": 0.7309359312057495, "step": 4918 }, { "epoch": 0.7862223287780709, "grad_norm": 1.3674739894833159, "learning_rate": 1.4228311444922896e-05, "loss": 0.7242302894592285, "step": 4919 }, { "epoch": 0.7863821625509471, "grad_norm": 1.4680453123078854, "learning_rate": 1.4225916497568343e-05, "loss": 0.7206882834434509, "step": 4920 }, { "epoch": 0.7865419963238233, "grad_norm": 1.248403789408361, "learning_rate": 1.4223521255090793e-05, "loss": 0.6044673919677734, "step": 4921 }, { "epoch": 0.7867018300966995, "grad_norm": 1.3835160283216714, "learning_rate": 1.4221125717657511e-05, "loss": 0.6718044877052307, "step": 4922 }, { "epoch": 0.7868616638695757, "grad_norm": 1.2630104568986213, "learning_rate": 1.4218729885435797e-05, "loss": 0.6460379362106323, "step": 4923 }, { "epoch": 0.7870214976424519, "grad_norm": 1.288429109202317, "learning_rate": 1.421633375859297e-05, "loss": 0.6039177179336548, "step": 4924 }, { "epoch": 0.7871813314153281, "grad_norm": 1.3623433337775177, "learning_rate": 1.4213937337296364e-05, "loss": 0.692755937576294, "step": 4925 }, { "epoch": 0.7873411651882043, "grad_norm": 1.147642171082576, "learning_rate": 1.4211540621713336e-05, "loss": 0.48874080181121826, "step": 4926 }, { "epoch": 0.7875009989610805, "grad_norm": 1.3029656434111405, "learning_rate": 1.4209143612011268e-05, "loss": 0.5561176538467407, "step": 4927 }, { "epoch": 0.7876608327339567, "grad_norm": 1.219042534685195, "learning_rate": 1.4206746308357553e-05, "loss": 0.6122641563415527, "step": 4928 }, { "epoch": 0.7878206665068329, "grad_norm": 1.2449074610219164, "learning_rate": 1.4204348710919611e-05, "loss": 0.6269429922103882, "step": 4929 }, { "epoch": 0.7879805002797091, "grad_norm": 1.0946619724641842, "learning_rate": 1.4201950819864884e-05, "loss": 0.5532495975494385, "step": 4930 }, { "epoch": 0.7881403340525853, "grad_norm": 1.488006415785248, "learning_rate": 1.4199552635360834e-05, "loss": 0.7274075746536255, "step": 4931 }, { "epoch": 0.7883001678254615, "grad_norm": 1.1783212285108524, "learning_rate": 1.419715415757494e-05, "loss": 0.5632067918777466, "step": 4932 }, { "epoch": 0.7884600015983377, "grad_norm": 1.3276875586737678, "learning_rate": 1.4194755386674703e-05, "loss": 0.5464792251586914, "step": 4933 }, { "epoch": 0.7886198353712139, "grad_norm": 1.300027722319677, "learning_rate": 1.4192356322827642e-05, "loss": 0.736358642578125, "step": 4934 }, { "epoch": 0.7887796691440901, "grad_norm": 1.6664842277005236, "learning_rate": 1.4189956966201304e-05, "loss": 0.6344258785247803, "step": 4935 }, { "epoch": 0.7889395029169664, "grad_norm": 1.2491142076096586, "learning_rate": 1.4187557316963248e-05, "loss": 0.5630756616592407, "step": 4936 }, { "epoch": 0.7890993366898426, "grad_norm": 1.3141064337375299, "learning_rate": 1.4185157375281058e-05, "loss": 0.6693710088729858, "step": 4937 }, { "epoch": 0.7892591704627188, "grad_norm": 1.3065533533298084, "learning_rate": 1.4182757141322336e-05, "loss": 0.6381696462631226, "step": 4938 }, { "epoch": 0.789419004235595, "grad_norm": 1.378603774137319, "learning_rate": 1.4180356615254711e-05, "loss": 0.6179401874542236, "step": 4939 }, { "epoch": 0.7895788380084712, "grad_norm": 1.4749029554782047, "learning_rate": 1.417795579724582e-05, "loss": 0.6390187740325928, "step": 4940 }, { "epoch": 0.7897386717813474, "grad_norm": 1.4348824562895437, "learning_rate": 1.417555468746333e-05, "loss": 0.6847834587097168, "step": 4941 }, { "epoch": 0.7898985055542236, "grad_norm": 1.3763057114279669, "learning_rate": 1.417315328607493e-05, "loss": 0.6544370651245117, "step": 4942 }, { "epoch": 0.7900583393270998, "grad_norm": 1.2887459566652064, "learning_rate": 1.4170751593248321e-05, "loss": 0.519904613494873, "step": 4943 }, { "epoch": 0.790218173099976, "grad_norm": 1.4599381609007336, "learning_rate": 1.416834960915123e-05, "loss": 0.5838486552238464, "step": 4944 }, { "epoch": 0.7903780068728522, "grad_norm": 1.2428006717762863, "learning_rate": 1.4165947333951404e-05, "loss": 0.63609778881073, "step": 4945 }, { "epoch": 0.7905378406457284, "grad_norm": 1.4647890994185135, "learning_rate": 1.4163544767816608e-05, "loss": 0.6959666013717651, "step": 4946 }, { "epoch": 0.7906976744186046, "grad_norm": 1.2555719497753866, "learning_rate": 1.4161141910914632e-05, "loss": 0.5126733183860779, "step": 4947 }, { "epoch": 0.7908575081914808, "grad_norm": 1.234322880756226, "learning_rate": 1.4158738763413273e-05, "loss": 0.569364070892334, "step": 4948 }, { "epoch": 0.791017341964357, "grad_norm": 1.4594576436514308, "learning_rate": 1.415633532548037e-05, "loss": 0.6226584911346436, "step": 4949 }, { "epoch": 0.7911771757372332, "grad_norm": 1.459551315850822, "learning_rate": 1.4153931597283765e-05, "loss": 0.5816949605941772, "step": 4950 }, { "epoch": 0.7913370095101094, "grad_norm": 1.1834177767160245, "learning_rate": 1.4151527578991328e-05, "loss": 0.5574387311935425, "step": 4951 }, { "epoch": 0.7914968432829856, "grad_norm": 1.285377603867415, "learning_rate": 1.4149123270770945e-05, "loss": 0.5433369874954224, "step": 4952 }, { "epoch": 0.791656677055862, "grad_norm": 1.5943350225461335, "learning_rate": 1.4146718672790527e-05, "loss": 0.6338667869567871, "step": 4953 }, { "epoch": 0.7918165108287382, "grad_norm": 1.486442289604412, "learning_rate": 1.4144313785218e-05, "loss": 0.6167620420455933, "step": 4954 }, { "epoch": 0.7919763446016144, "grad_norm": 1.4583907833856407, "learning_rate": 1.4141908608221313e-05, "loss": 0.671259880065918, "step": 4955 }, { "epoch": 0.7921361783744906, "grad_norm": 1.3239068129590894, "learning_rate": 1.4139503141968437e-05, "loss": 0.5960241556167603, "step": 4956 }, { "epoch": 0.7922960121473668, "grad_norm": 1.1905369383381847, "learning_rate": 1.413709738662736e-05, "loss": 0.6989983320236206, "step": 4957 }, { "epoch": 0.792455845920243, "grad_norm": 1.2902212930160086, "learning_rate": 1.4134691342366086e-05, "loss": 0.5579081773757935, "step": 4958 }, { "epoch": 0.7926156796931192, "grad_norm": 1.2572501807685852, "learning_rate": 1.4132285009352657e-05, "loss": 0.6287431716918945, "step": 4959 }, { "epoch": 0.7927755134659954, "grad_norm": 1.4193733253742884, "learning_rate": 1.4129878387755112e-05, "loss": 0.5666210651397705, "step": 4960 }, { "epoch": 0.7929353472388716, "grad_norm": 1.5186253190631682, "learning_rate": 1.4127471477741526e-05, "loss": 0.5808209180831909, "step": 4961 }, { "epoch": 0.7930951810117478, "grad_norm": 1.1148059751937083, "learning_rate": 1.4125064279479988e-05, "loss": 0.5115493535995483, "step": 4962 }, { "epoch": 0.793255014784624, "grad_norm": 1.4129907472271999, "learning_rate": 1.4122656793138605e-05, "loss": 0.5540025234222412, "step": 4963 }, { "epoch": 0.7934148485575002, "grad_norm": 1.3236062933518067, "learning_rate": 1.4120249018885512e-05, "loss": 0.5234696269035339, "step": 4964 }, { "epoch": 0.7935746823303764, "grad_norm": 1.263285785592308, "learning_rate": 1.411784095688886e-05, "loss": 0.5580117702484131, "step": 4965 }, { "epoch": 0.7937345161032526, "grad_norm": 1.4344428538740934, "learning_rate": 1.4115432607316814e-05, "loss": 0.7063555717468262, "step": 4966 }, { "epoch": 0.7938943498761288, "grad_norm": 1.3882813727111727, "learning_rate": 1.411302397033757e-05, "loss": 0.6051555871963501, "step": 4967 }, { "epoch": 0.794054183649005, "grad_norm": 1.1654293412973655, "learning_rate": 1.4110615046119334e-05, "loss": 0.398216187953949, "step": 4968 }, { "epoch": 0.7942140174218812, "grad_norm": 1.3772899480593457, "learning_rate": 1.410820583483034e-05, "loss": 0.733210563659668, "step": 4969 }, { "epoch": 0.7943738511947575, "grad_norm": 1.2333870723120013, "learning_rate": 1.4105796336638836e-05, "loss": 0.6280531883239746, "step": 4970 }, { "epoch": 0.7945336849676337, "grad_norm": 1.2790909813352147, "learning_rate": 1.4103386551713094e-05, "loss": 0.5355285406112671, "step": 4971 }, { "epoch": 0.7946935187405099, "grad_norm": 1.2994520573102146, "learning_rate": 1.4100976480221408e-05, "loss": 0.6333147287368774, "step": 4972 }, { "epoch": 0.7948533525133861, "grad_norm": 1.4612239094716872, "learning_rate": 1.4098566122332087e-05, "loss": 0.6798658967018127, "step": 4973 }, { "epoch": 0.7950131862862623, "grad_norm": 1.2401503256390223, "learning_rate": 1.4096155478213458e-05, "loss": 0.6569694876670837, "step": 4974 }, { "epoch": 0.7951730200591385, "grad_norm": 1.3591555691581674, "learning_rate": 1.4093744548033878e-05, "loss": 0.6085880994796753, "step": 4975 }, { "epoch": 0.7953328538320147, "grad_norm": 1.18150056905807, "learning_rate": 1.4091333331961711e-05, "loss": 0.5914332270622253, "step": 4976 }, { "epoch": 0.7954926876048909, "grad_norm": 1.3837594238320727, "learning_rate": 1.4088921830165356e-05, "loss": 0.6417977213859558, "step": 4977 }, { "epoch": 0.7956525213777671, "grad_norm": 1.1534294107562215, "learning_rate": 1.4086510042813215e-05, "loss": 0.5625646114349365, "step": 4978 }, { "epoch": 0.7958123551506433, "grad_norm": 1.3483443873330525, "learning_rate": 1.4084097970073725e-05, "loss": 0.5847265720367432, "step": 4979 }, { "epoch": 0.7959721889235195, "grad_norm": 1.3827906792714166, "learning_rate": 1.4081685612115334e-05, "loss": 0.5992920398712158, "step": 4980 }, { "epoch": 0.7961320226963957, "grad_norm": 1.4208881072462225, "learning_rate": 1.4079272969106512e-05, "loss": 0.6752853393554688, "step": 4981 }, { "epoch": 0.7962918564692719, "grad_norm": 1.3314124052615492, "learning_rate": 1.4076860041215752e-05, "loss": 0.5947104692459106, "step": 4982 }, { "epoch": 0.7964516902421481, "grad_norm": 1.3559980145693942, "learning_rate": 1.407444682861156e-05, "loss": 0.5623762607574463, "step": 4983 }, { "epoch": 0.7966115240150243, "grad_norm": 1.363081852491016, "learning_rate": 1.4072033331462472e-05, "loss": 0.6731761693954468, "step": 4984 }, { "epoch": 0.7967713577879005, "grad_norm": 1.236860166926019, "learning_rate": 1.4069619549937034e-05, "loss": 0.5691913366317749, "step": 4985 }, { "epoch": 0.7969311915607767, "grad_norm": 1.461725587011813, "learning_rate": 1.4067205484203816e-05, "loss": 0.5607779026031494, "step": 4986 }, { "epoch": 0.797091025333653, "grad_norm": 1.2358757527439106, "learning_rate": 1.406479113443141e-05, "loss": 0.5948630571365356, "step": 4987 }, { "epoch": 0.7972508591065293, "grad_norm": 1.2915280340058797, "learning_rate": 1.4062376500788423e-05, "loss": 0.6282084584236145, "step": 4988 }, { "epoch": 0.7974106928794055, "grad_norm": 1.3501864421164995, "learning_rate": 1.4059961583443486e-05, "loss": 0.7256371974945068, "step": 4989 }, { "epoch": 0.7975705266522817, "grad_norm": 1.496271186044476, "learning_rate": 1.4057546382565248e-05, "loss": 0.7966669201850891, "step": 4990 }, { "epoch": 0.7977303604251579, "grad_norm": 1.2112418516358345, "learning_rate": 1.4055130898322379e-05, "loss": 0.6061804890632629, "step": 4991 }, { "epoch": 0.7978901941980341, "grad_norm": 1.6778892240746677, "learning_rate": 1.4052715130883564e-05, "loss": 0.6530884504318237, "step": 4992 }, { "epoch": 0.7980500279709103, "grad_norm": 1.3831525056677616, "learning_rate": 1.4050299080417515e-05, "loss": 0.6597341299057007, "step": 4993 }, { "epoch": 0.7982098617437865, "grad_norm": 1.4200153956863248, "learning_rate": 1.4047882747092962e-05, "loss": 0.6615191102027893, "step": 4994 }, { "epoch": 0.7983696955166627, "grad_norm": 1.3106021916988373, "learning_rate": 1.4045466131078652e-05, "loss": 0.5959436297416687, "step": 4995 }, { "epoch": 0.7985295292895389, "grad_norm": 1.2543658129711688, "learning_rate": 1.404304923254335e-05, "loss": 0.5877714157104492, "step": 4996 }, { "epoch": 0.7986893630624151, "grad_norm": 1.4740245304976705, "learning_rate": 1.4040632051655845e-05, "loss": 0.7753177881240845, "step": 4997 }, { "epoch": 0.7988491968352913, "grad_norm": 1.4799901294334687, "learning_rate": 1.4038214588584947e-05, "loss": 0.5317165851593018, "step": 4998 }, { "epoch": 0.7990090306081675, "grad_norm": 1.252441725295981, "learning_rate": 1.403579684349948e-05, "loss": 0.6335391998291016, "step": 4999 }, { "epoch": 0.7991688643810437, "grad_norm": 1.41527212279567, "learning_rate": 1.4033378816568288e-05, "loss": 0.6137556433677673, "step": 5000 }, { "epoch": 0.7993286981539199, "grad_norm": 1.209782170237796, "learning_rate": 1.4030960507960243e-05, "loss": 0.55289626121521, "step": 5001 }, { "epoch": 0.7994885319267961, "grad_norm": 1.3326992696861562, "learning_rate": 1.4028541917844231e-05, "loss": 0.6142851114273071, "step": 5002 }, { "epoch": 0.7996483656996723, "grad_norm": 1.1353361344452788, "learning_rate": 1.4026123046389156e-05, "loss": 0.6765009760856628, "step": 5003 }, { "epoch": 0.7998081994725486, "grad_norm": 1.2569715166527138, "learning_rate": 1.402370389376394e-05, "loss": 0.6598221063613892, "step": 5004 }, { "epoch": 0.7999680332454248, "grad_norm": 1.259074703834384, "learning_rate": 1.4021284460137533e-05, "loss": 0.7617565989494324, "step": 5005 }, { "epoch": 0.800127867018301, "grad_norm": 1.216963542531056, "learning_rate": 1.40188647456789e-05, "loss": 0.5649770498275757, "step": 5006 }, { "epoch": 0.8002877007911772, "grad_norm": 1.4236967895052295, "learning_rate": 1.4016444750557022e-05, "loss": 0.6524965167045593, "step": 5007 }, { "epoch": 0.8004475345640534, "grad_norm": 1.212173122308322, "learning_rate": 1.4014024474940902e-05, "loss": 0.5435446500778198, "step": 5008 }, { "epoch": 0.8006073683369296, "grad_norm": 1.4581413898468218, "learning_rate": 1.4011603918999568e-05, "loss": 0.6190527677536011, "step": 5009 }, { "epoch": 0.8007672021098058, "grad_norm": 1.287228445926606, "learning_rate": 1.4009183082902055e-05, "loss": 0.6125253438949585, "step": 5010 }, { "epoch": 0.800927035882682, "grad_norm": 1.5359405726443562, "learning_rate": 1.4006761966817437e-05, "loss": 0.6486818790435791, "step": 5011 }, { "epoch": 0.8010868696555582, "grad_norm": 1.2678543717032975, "learning_rate": 1.4004340570914789e-05, "loss": 0.4937480390071869, "step": 5012 }, { "epoch": 0.8012467034284344, "grad_norm": 1.327524250229038, "learning_rate": 1.4001918895363216e-05, "loss": 0.7257390022277832, "step": 5013 }, { "epoch": 0.8014065372013106, "grad_norm": 1.1794228233013446, "learning_rate": 1.3999496940331834e-05, "loss": 0.45087265968322754, "step": 5014 }, { "epoch": 0.8015663709741868, "grad_norm": 1.3106614416298135, "learning_rate": 1.3997074705989789e-05, "loss": 0.6813511848449707, "step": 5015 }, { "epoch": 0.801726204747063, "grad_norm": 1.2079336914231913, "learning_rate": 1.3994652192506237e-05, "loss": 0.7998569011688232, "step": 5016 }, { "epoch": 0.8018860385199392, "grad_norm": 1.2657711740510973, "learning_rate": 1.3992229400050362e-05, "loss": 0.641459047794342, "step": 5017 }, { "epoch": 0.8020458722928154, "grad_norm": 1.4980567974888985, "learning_rate": 1.398980632879136e-05, "loss": 0.47972211241722107, "step": 5018 }, { "epoch": 0.8022057060656916, "grad_norm": 1.5710714344565713, "learning_rate": 1.3987382978898452e-05, "loss": 0.7387580275535583, "step": 5019 }, { "epoch": 0.8023655398385678, "grad_norm": 1.4615298299093538, "learning_rate": 1.3984959350540872e-05, "loss": 0.5496025085449219, "step": 5020 }, { "epoch": 0.802525373611444, "grad_norm": 1.5058588412345815, "learning_rate": 1.3982535443887883e-05, "loss": 0.6438199281692505, "step": 5021 }, { "epoch": 0.8026852073843203, "grad_norm": 1.259868182430134, "learning_rate": 1.398011125910876e-05, "loss": 0.7608089447021484, "step": 5022 }, { "epoch": 0.8028450411571966, "grad_norm": 1.2137907591081618, "learning_rate": 1.3977686796372796e-05, "loss": 0.6262649297714233, "step": 5023 }, { "epoch": 0.8030048749300728, "grad_norm": 1.338300311290368, "learning_rate": 1.3975262055849315e-05, "loss": 0.6193884611129761, "step": 5024 }, { "epoch": 0.803164708702949, "grad_norm": 1.3942166509423704, "learning_rate": 1.3972837037707643e-05, "loss": 0.5235604047775269, "step": 5025 }, { "epoch": 0.8033245424758252, "grad_norm": 1.411123969839521, "learning_rate": 1.397041174211714e-05, "loss": 0.6105351448059082, "step": 5026 }, { "epoch": 0.8034843762487014, "grad_norm": 1.3043785536875137, "learning_rate": 1.396798616924718e-05, "loss": 0.5155941843986511, "step": 5027 }, { "epoch": 0.8036442100215776, "grad_norm": 1.1870629286448562, "learning_rate": 1.3965560319267155e-05, "loss": 0.6631498336791992, "step": 5028 }, { "epoch": 0.8038040437944538, "grad_norm": 1.3647117208189956, "learning_rate": 1.396313419234648e-05, "loss": 0.5027610063552856, "step": 5029 }, { "epoch": 0.80396387756733, "grad_norm": 1.554025074782424, "learning_rate": 1.396070778865458e-05, "loss": 0.7171527147293091, "step": 5030 }, { "epoch": 0.8041237113402062, "grad_norm": 1.338334190425921, "learning_rate": 1.3958281108360914e-05, "loss": 0.5521304607391357, "step": 5031 }, { "epoch": 0.8042835451130824, "grad_norm": 1.3958988497306823, "learning_rate": 1.3955854151634951e-05, "loss": 0.7145735025405884, "step": 5032 }, { "epoch": 0.8044433788859586, "grad_norm": 1.4166727355243318, "learning_rate": 1.395342691864618e-05, "loss": 0.6349798440933228, "step": 5033 }, { "epoch": 0.8046032126588348, "grad_norm": 1.1687812712503742, "learning_rate": 1.395099940956411e-05, "loss": 0.611377477645874, "step": 5034 }, { "epoch": 0.804763046431711, "grad_norm": 1.3839800087590373, "learning_rate": 1.3948571624558271e-05, "loss": 0.6797593235969543, "step": 5035 }, { "epoch": 0.8049228802045872, "grad_norm": 1.5678463598559347, "learning_rate": 1.394614356379821e-05, "loss": 0.5557727217674255, "step": 5036 }, { "epoch": 0.8050827139774634, "grad_norm": 1.3850954798450417, "learning_rate": 1.3943715227453494e-05, "loss": 0.5945366621017456, "step": 5037 }, { "epoch": 0.8052425477503397, "grad_norm": 1.2589411728770266, "learning_rate": 1.3941286615693709e-05, "loss": 0.5486554503440857, "step": 5038 }, { "epoch": 0.8054023815232159, "grad_norm": 1.4495330360324976, "learning_rate": 1.3938857728688464e-05, "loss": 0.8065548539161682, "step": 5039 }, { "epoch": 0.8055622152960921, "grad_norm": 1.314093371688359, "learning_rate": 1.3936428566607378e-05, "loss": 0.5810977220535278, "step": 5040 }, { "epoch": 0.8057220490689683, "grad_norm": 1.2112788505510275, "learning_rate": 1.3933999129620101e-05, "loss": 0.5941017866134644, "step": 5041 }, { "epoch": 0.8058818828418445, "grad_norm": 1.2407604962540106, "learning_rate": 1.3931569417896289e-05, "loss": 0.5241172313690186, "step": 5042 }, { "epoch": 0.8060417166147207, "grad_norm": 1.302810739205179, "learning_rate": 1.3929139431605633e-05, "loss": 0.7724856734275818, "step": 5043 }, { "epoch": 0.8062015503875969, "grad_norm": 1.3085592433464819, "learning_rate": 1.3926709170917831e-05, "loss": 0.5298165082931519, "step": 5044 }, { "epoch": 0.8063613841604731, "grad_norm": 1.2599918945767363, "learning_rate": 1.3924278636002601e-05, "loss": 0.6465991139411926, "step": 5045 }, { "epoch": 0.8065212179333493, "grad_norm": 1.3776642666777035, "learning_rate": 1.3921847827029688e-05, "loss": 0.7054758667945862, "step": 5046 }, { "epoch": 0.8066810517062255, "grad_norm": 1.5944306463890907, "learning_rate": 1.3919416744168846e-05, "loss": 0.7375050783157349, "step": 5047 }, { "epoch": 0.8068408854791017, "grad_norm": 1.2918594155071097, "learning_rate": 1.3916985387589857e-05, "loss": 0.6355932950973511, "step": 5048 }, { "epoch": 0.8070007192519779, "grad_norm": 1.140788774247762, "learning_rate": 1.3914553757462515e-05, "loss": 0.6344894170761108, "step": 5049 }, { "epoch": 0.8071605530248541, "grad_norm": 1.3512224643924284, "learning_rate": 1.3912121853956638e-05, "loss": 0.5745749473571777, "step": 5050 }, { "epoch": 0.8073203867977303, "grad_norm": 1.1956459937077681, "learning_rate": 1.3909689677242062e-05, "loss": 0.5858901739120483, "step": 5051 }, { "epoch": 0.8074802205706065, "grad_norm": 1.5866525381171055, "learning_rate": 1.3907257227488642e-05, "loss": 0.6440722942352295, "step": 5052 }, { "epoch": 0.8076400543434827, "grad_norm": 1.4575319378296263, "learning_rate": 1.3904824504866251e-05, "loss": 0.6065603494644165, "step": 5053 }, { "epoch": 0.8077998881163589, "grad_norm": 1.4961196413625915, "learning_rate": 1.3902391509544783e-05, "loss": 0.637650728225708, "step": 5054 }, { "epoch": 0.8079597218892351, "grad_norm": 1.2204081165488418, "learning_rate": 1.3899958241694146e-05, "loss": 0.7233501672744751, "step": 5055 }, { "epoch": 0.8081195556621114, "grad_norm": 1.362804020713859, "learning_rate": 1.3897524701484275e-05, "loss": 0.5757704973220825, "step": 5056 }, { "epoch": 0.8082793894349876, "grad_norm": 1.160464739071661, "learning_rate": 1.3895090889085117e-05, "loss": 0.5323075652122498, "step": 5057 }, { "epoch": 0.8084392232078638, "grad_norm": 1.3702402370216789, "learning_rate": 1.3892656804666642e-05, "loss": 0.7348502278327942, "step": 5058 }, { "epoch": 0.8085990569807401, "grad_norm": 1.193376074528316, "learning_rate": 1.3890222448398839e-05, "loss": 0.5667406916618347, "step": 5059 }, { "epoch": 0.8087588907536163, "grad_norm": 1.4312628124871067, "learning_rate": 1.3887787820451709e-05, "loss": 0.8011060357093811, "step": 5060 }, { "epoch": 0.8089187245264925, "grad_norm": 1.393853596676985, "learning_rate": 1.3885352920995288e-05, "loss": 0.5592403411865234, "step": 5061 }, { "epoch": 0.8090785582993687, "grad_norm": 1.4379598287774051, "learning_rate": 1.3882917750199609e-05, "loss": 0.8864428997039795, "step": 5062 }, { "epoch": 0.8092383920722449, "grad_norm": 1.3524076518453605, "learning_rate": 1.3880482308234747e-05, "loss": 0.6669004559516907, "step": 5063 }, { "epoch": 0.8093982258451211, "grad_norm": 1.4470033431650706, "learning_rate": 1.3878046595270778e-05, "loss": 0.6468205451965332, "step": 5064 }, { "epoch": 0.8095580596179973, "grad_norm": 1.437106414855805, "learning_rate": 1.3875610611477803e-05, "loss": 0.6866181492805481, "step": 5065 }, { "epoch": 0.8097178933908735, "grad_norm": 1.4166535261082769, "learning_rate": 1.3873174357025948e-05, "loss": 0.5410751104354858, "step": 5066 }, { "epoch": 0.8098777271637497, "grad_norm": 1.4878781013990803, "learning_rate": 1.3870737832085344e-05, "loss": 0.6920919418334961, "step": 5067 }, { "epoch": 0.8100375609366259, "grad_norm": 1.2273612187531036, "learning_rate": 1.3868301036826157e-05, "loss": 0.6782543659210205, "step": 5068 }, { "epoch": 0.8101973947095021, "grad_norm": 3.7876052557727498, "learning_rate": 1.386586397141856e-05, "loss": 0.6847901344299316, "step": 5069 }, { "epoch": 0.8103572284823783, "grad_norm": 1.0811453644668831, "learning_rate": 1.3863426636032747e-05, "loss": 0.5189089775085449, "step": 5070 }, { "epoch": 0.8105170622552546, "grad_norm": 1.2151640923028773, "learning_rate": 1.3860989030838938e-05, "loss": 0.5530447363853455, "step": 5071 }, { "epoch": 0.8106768960281308, "grad_norm": 1.3687840312092834, "learning_rate": 1.3858551156007364e-05, "loss": 0.6567407846450806, "step": 5072 }, { "epoch": 0.810836729801007, "grad_norm": 1.7813380478508354, "learning_rate": 1.385611301170828e-05, "loss": 0.57038813829422, "step": 5073 }, { "epoch": 0.8109965635738832, "grad_norm": 1.2513636011679126, "learning_rate": 1.3853674598111954e-05, "loss": 0.5127645134925842, "step": 5074 }, { "epoch": 0.8111563973467594, "grad_norm": 1.4836167946971026, "learning_rate": 1.3851235915388678e-05, "loss": 0.5830118656158447, "step": 5075 }, { "epoch": 0.8113162311196356, "grad_norm": 1.405289981655609, "learning_rate": 1.3848796963708764e-05, "loss": 0.5070237517356873, "step": 5076 }, { "epoch": 0.8114760648925118, "grad_norm": 1.5305116480480212, "learning_rate": 1.3846357743242532e-05, "loss": 0.6645373106002808, "step": 5077 }, { "epoch": 0.811635898665388, "grad_norm": 4.069151915655895, "learning_rate": 1.3843918254160334e-05, "loss": 0.6680278778076172, "step": 5078 }, { "epoch": 0.8117957324382642, "grad_norm": 1.507582893226469, "learning_rate": 1.3841478496632531e-05, "loss": 0.6705403327941895, "step": 5079 }, { "epoch": 0.8119555662111404, "grad_norm": 1.3346926920228936, "learning_rate": 1.3839038470829511e-05, "loss": 0.6101296544075012, "step": 5080 }, { "epoch": 0.8121153999840166, "grad_norm": 1.3721962334736277, "learning_rate": 1.3836598176921678e-05, "loss": 0.7928368449211121, "step": 5081 }, { "epoch": 0.8122752337568928, "grad_norm": 1.2122861772087823, "learning_rate": 1.383415761507945e-05, "loss": 0.6254251003265381, "step": 5082 }, { "epoch": 0.812435067529769, "grad_norm": 1.2547182644440742, "learning_rate": 1.3831716785473268e-05, "loss": 0.6327080726623535, "step": 5083 }, { "epoch": 0.8125949013026452, "grad_norm": 1.3824445375126573, "learning_rate": 1.382927568827359e-05, "loss": 0.6421014070510864, "step": 5084 }, { "epoch": 0.8127547350755214, "grad_norm": 1.3619792530896608, "learning_rate": 1.3826834323650899e-05, "loss": 0.7347122430801392, "step": 5085 }, { "epoch": 0.8129145688483976, "grad_norm": 1.4857256994360375, "learning_rate": 1.3824392691775685e-05, "loss": 0.7836568355560303, "step": 5086 }, { "epoch": 0.8130744026212738, "grad_norm": 1.3177927234142655, "learning_rate": 1.3821950792818464e-05, "loss": 0.6253644824028015, "step": 5087 }, { "epoch": 0.81323423639415, "grad_norm": 1.2242405305754902, "learning_rate": 1.3819508626949771e-05, "loss": 0.5833884477615356, "step": 5088 }, { "epoch": 0.8133940701670263, "grad_norm": 1.425859606293202, "learning_rate": 1.3817066194340157e-05, "loss": 0.5750654935836792, "step": 5089 }, { "epoch": 0.8135539039399025, "grad_norm": 1.3635017223832027, "learning_rate": 1.3814623495160195e-05, "loss": 0.6166927218437195, "step": 5090 }, { "epoch": 0.8137137377127787, "grad_norm": 1.3544499862205737, "learning_rate": 1.381218052958047e-05, "loss": 0.5826759338378906, "step": 5091 }, { "epoch": 0.8138735714856549, "grad_norm": 1.2353009755750612, "learning_rate": 1.3809737297771596e-05, "loss": 0.5841273665428162, "step": 5092 }, { "epoch": 0.8140334052585311, "grad_norm": 1.3558515937997588, "learning_rate": 1.3807293799904195e-05, "loss": 0.5991075038909912, "step": 5093 }, { "epoch": 0.8141932390314074, "grad_norm": 1.5658814724697048, "learning_rate": 1.3804850036148914e-05, "loss": 0.6252979040145874, "step": 5094 }, { "epoch": 0.8143530728042836, "grad_norm": 1.2748102486158963, "learning_rate": 1.3802406006676417e-05, "loss": 0.4994977116584778, "step": 5095 }, { "epoch": 0.8145129065771598, "grad_norm": 1.1439804574946, "learning_rate": 1.3799961711657385e-05, "loss": 0.5453445911407471, "step": 5096 }, { "epoch": 0.814672740350036, "grad_norm": 1.4075183314315407, "learning_rate": 1.379751715126252e-05, "loss": 0.6710410118103027, "step": 5097 }, { "epoch": 0.8148325741229122, "grad_norm": 1.2677925040978313, "learning_rate": 1.3795072325662545e-05, "loss": 0.54156494140625, "step": 5098 }, { "epoch": 0.8149924078957884, "grad_norm": 1.3415575168863894, "learning_rate": 1.3792627235028188e-05, "loss": 0.6779597997665405, "step": 5099 }, { "epoch": 0.8151522416686646, "grad_norm": 1.315028024892969, "learning_rate": 1.3790181879530215e-05, "loss": 0.6317875385284424, "step": 5100 }, { "epoch": 0.8153120754415408, "grad_norm": 1.1401050044742536, "learning_rate": 1.3787736259339396e-05, "loss": 0.534170389175415, "step": 5101 }, { "epoch": 0.815471909214417, "grad_norm": 1.5777879254516471, "learning_rate": 1.3785290374626527e-05, "loss": 0.6364122629165649, "step": 5102 }, { "epoch": 0.8156317429872932, "grad_norm": 1.4250909293093883, "learning_rate": 1.3782844225562419e-05, "loss": 0.7662627696990967, "step": 5103 }, { "epoch": 0.8157915767601694, "grad_norm": 1.2609949761747319, "learning_rate": 1.3780397812317903e-05, "loss": 0.7464167475700378, "step": 5104 }, { "epoch": 0.8159514105330457, "grad_norm": 1.2597816086416167, "learning_rate": 1.3777951135063825e-05, "loss": 0.5865564942359924, "step": 5105 }, { "epoch": 0.8161112443059219, "grad_norm": 1.2613947760397872, "learning_rate": 1.3775504193971055e-05, "loss": 0.7013859748840332, "step": 5106 }, { "epoch": 0.8162710780787981, "grad_norm": 1.3809745558112267, "learning_rate": 1.377305698921048e-05, "loss": 0.5479708909988403, "step": 5107 }, { "epoch": 0.8164309118516743, "grad_norm": 1.3225266895684153, "learning_rate": 1.3770609520953002e-05, "loss": 0.620661735534668, "step": 5108 }, { "epoch": 0.8165907456245505, "grad_norm": 1.2630160838519486, "learning_rate": 1.3768161789369541e-05, "loss": 0.5480834245681763, "step": 5109 }, { "epoch": 0.8167505793974267, "grad_norm": 1.408003139617333, "learning_rate": 1.3765713794631044e-05, "loss": 0.7032011151313782, "step": 5110 }, { "epoch": 0.8169104131703029, "grad_norm": 1.0724004405628775, "learning_rate": 1.3763265536908461e-05, "loss": 0.587054431438446, "step": 5111 }, { "epoch": 0.8170702469431791, "grad_norm": 1.3663004820182583, "learning_rate": 1.3760817016372778e-05, "loss": 0.6877001523971558, "step": 5112 }, { "epoch": 0.8172300807160553, "grad_norm": 1.120036017628526, "learning_rate": 1.375836823319499e-05, "loss": 0.47000211477279663, "step": 5113 }, { "epoch": 0.8173899144889315, "grad_norm": 1.4894806419430366, "learning_rate": 1.3755919187546107e-05, "loss": 0.7329801321029663, "step": 5114 }, { "epoch": 0.8175497482618077, "grad_norm": 1.216066625535931, "learning_rate": 1.3753469879597168e-05, "loss": 0.5460418462753296, "step": 5115 }, { "epoch": 0.8177095820346839, "grad_norm": 1.0189377434781728, "learning_rate": 1.3751020309519216e-05, "loss": 0.44803380966186523, "step": 5116 }, { "epoch": 0.8178694158075601, "grad_norm": 1.2426977221206172, "learning_rate": 1.374857047748333e-05, "loss": 0.6354285478591919, "step": 5117 }, { "epoch": 0.8180292495804363, "grad_norm": 1.2773685785832463, "learning_rate": 1.3746120383660589e-05, "loss": 0.6281205415725708, "step": 5118 }, { "epoch": 0.8181890833533125, "grad_norm": 1.548906718437124, "learning_rate": 1.3743670028222101e-05, "loss": 0.6397698521614075, "step": 5119 }, { "epoch": 0.8183489171261887, "grad_norm": 1.3132351810077738, "learning_rate": 1.3741219411338993e-05, "loss": 0.7094461917877197, "step": 5120 }, { "epoch": 0.8185087508990649, "grad_norm": 1.2407698803105243, "learning_rate": 1.3738768533182402e-05, "loss": 0.6095378398895264, "step": 5121 }, { "epoch": 0.8186685846719411, "grad_norm": 1.3189276453861787, "learning_rate": 1.3736317393923495e-05, "loss": 0.6200131773948669, "step": 5122 }, { "epoch": 0.8188284184448174, "grad_norm": 1.2748171411888518, "learning_rate": 1.3733865993733448e-05, "loss": 0.6123586893081665, "step": 5123 }, { "epoch": 0.8189882522176936, "grad_norm": 1.465138537444353, "learning_rate": 1.3731414332783457e-05, "loss": 0.5128803253173828, "step": 5124 }, { "epoch": 0.8191480859905698, "grad_norm": 1.4927648550883545, "learning_rate": 1.372896241124474e-05, "loss": 0.6517112851142883, "step": 5125 }, { "epoch": 0.819307919763446, "grad_norm": 1.2490868549581937, "learning_rate": 1.3726510229288526e-05, "loss": 0.6873751878738403, "step": 5126 }, { "epoch": 0.8194677535363222, "grad_norm": 1.551584626572213, "learning_rate": 1.3724057787086073e-05, "loss": 0.7477236986160278, "step": 5127 }, { "epoch": 0.8196275873091984, "grad_norm": 1.2962229268805368, "learning_rate": 1.3721605084808647e-05, "loss": 0.6979618668556213, "step": 5128 }, { "epoch": 0.8197874210820747, "grad_norm": 1.6532810029741258, "learning_rate": 1.3719152122627532e-05, "loss": 0.7186895608901978, "step": 5129 }, { "epoch": 0.8199472548549509, "grad_norm": 1.443797271766842, "learning_rate": 1.3716698900714045e-05, "loss": 0.6730536222457886, "step": 5130 }, { "epoch": 0.8201070886278271, "grad_norm": 1.5268580815470751, "learning_rate": 1.3714245419239496e-05, "loss": 0.4880656898021698, "step": 5131 }, { "epoch": 0.8202669224007033, "grad_norm": 1.4410853538457096, "learning_rate": 1.3711791678375242e-05, "loss": 0.6173999309539795, "step": 5132 }, { "epoch": 0.8204267561735795, "grad_norm": 1.4219742843254075, "learning_rate": 1.3709337678292636e-05, "loss": 0.8510851860046387, "step": 5133 }, { "epoch": 0.8205865899464557, "grad_norm": 1.32011481232745, "learning_rate": 1.3706883419163056e-05, "loss": 0.5690229535102844, "step": 5134 }, { "epoch": 0.8207464237193319, "grad_norm": 1.1831141023636025, "learning_rate": 1.37044289011579e-05, "loss": 0.5394867658615112, "step": 5135 }, { "epoch": 0.8209062574922081, "grad_norm": 1.5292534980498738, "learning_rate": 1.3701974124448582e-05, "loss": 0.6443682909011841, "step": 5136 }, { "epoch": 0.8210660912650843, "grad_norm": 1.2141454944960446, "learning_rate": 1.3699519089206539e-05, "loss": 0.6770744323730469, "step": 5137 }, { "epoch": 0.8212259250379605, "grad_norm": 1.3900128023645295, "learning_rate": 1.3697063795603216e-05, "loss": 0.6686275005340576, "step": 5138 }, { "epoch": 0.8213857588108368, "grad_norm": 1.3355571051914374, "learning_rate": 1.3694608243810088e-05, "loss": 0.6481631398200989, "step": 5139 }, { "epoch": 0.821545592583713, "grad_norm": 1.3348000006841623, "learning_rate": 1.3692152433998638e-05, "loss": 0.6672359108924866, "step": 5140 }, { "epoch": 0.8217054263565892, "grad_norm": 1.4967187367297106, "learning_rate": 1.3689696366340368e-05, "loss": 0.5938963294029236, "step": 5141 }, { "epoch": 0.8218652601294654, "grad_norm": 1.2442231933612433, "learning_rate": 1.3687240041006811e-05, "loss": 0.4717784523963928, "step": 5142 }, { "epoch": 0.8220250939023416, "grad_norm": 1.2753264499308747, "learning_rate": 1.36847834581695e-05, "loss": 0.6880795955657959, "step": 5143 }, { "epoch": 0.8221849276752178, "grad_norm": 1.28268738187564, "learning_rate": 1.3682326617999992e-05, "loss": 0.5100322961807251, "step": 5144 }, { "epoch": 0.822344761448094, "grad_norm": 1.4163365092083229, "learning_rate": 1.3679869520669873e-05, "loss": 0.640540361404419, "step": 5145 }, { "epoch": 0.8225045952209702, "grad_norm": 1.4320498540539135, "learning_rate": 1.367741216635073e-05, "loss": 0.6397087574005127, "step": 5146 }, { "epoch": 0.8226644289938464, "grad_norm": 1.1634437810171443, "learning_rate": 1.3674954555214182e-05, "loss": 0.4696749746799469, "step": 5147 }, { "epoch": 0.8228242627667226, "grad_norm": 1.496529620095221, "learning_rate": 1.3672496687431852e-05, "loss": 0.6437994241714478, "step": 5148 }, { "epoch": 0.8229840965395988, "grad_norm": 1.2416670540725965, "learning_rate": 1.3670038563175398e-05, "loss": 0.6588903665542603, "step": 5149 }, { "epoch": 0.823143930312475, "grad_norm": 1.2255036336250127, "learning_rate": 1.3667580182616483e-05, "loss": 0.529093861579895, "step": 5150 }, { "epoch": 0.8233037640853512, "grad_norm": 1.2795463317972724, "learning_rate": 1.3665121545926785e-05, "loss": 0.69538414478302, "step": 5151 }, { "epoch": 0.8234635978582274, "grad_norm": 1.44880054070363, "learning_rate": 1.3662662653278016e-05, "loss": 0.6904400587081909, "step": 5152 }, { "epoch": 0.8236234316311036, "grad_norm": 1.3912235725897697, "learning_rate": 1.3660203504841894e-05, "loss": 0.5693045854568481, "step": 5153 }, { "epoch": 0.8237832654039798, "grad_norm": 1.5596431897226837, "learning_rate": 1.3657744100790153e-05, "loss": 0.702299177646637, "step": 5154 }, { "epoch": 0.823943099176856, "grad_norm": 0.9991783668322066, "learning_rate": 1.3655284441294554e-05, "loss": 0.5383789539337158, "step": 5155 }, { "epoch": 0.8241029329497322, "grad_norm": 1.2779045258012707, "learning_rate": 1.3652824526526867e-05, "loss": 0.5780709981918335, "step": 5156 }, { "epoch": 0.8242627667226085, "grad_norm": 1.2772319255736146, "learning_rate": 1.3650364356658885e-05, "loss": 0.684495747089386, "step": 5157 }, { "epoch": 0.8244226004954847, "grad_norm": 1.2109299454863895, "learning_rate": 1.3647903931862416e-05, "loss": 0.6513657569885254, "step": 5158 }, { "epoch": 0.8245824342683609, "grad_norm": 1.0072252580148353, "learning_rate": 1.3645443252309295e-05, "loss": 0.549970269203186, "step": 5159 }, { "epoch": 0.8247422680412371, "grad_norm": 1.442293240379323, "learning_rate": 1.3642982318171359e-05, "loss": 0.5649592876434326, "step": 5160 }, { "epoch": 0.8249021018141133, "grad_norm": 1.3217541770505945, "learning_rate": 1.3640521129620468e-05, "loss": 0.5715926885604858, "step": 5161 }, { "epoch": 0.8250619355869895, "grad_norm": 1.2149918441984595, "learning_rate": 1.3638059686828514e-05, "loss": 0.6094440221786499, "step": 5162 }, { "epoch": 0.8252217693598657, "grad_norm": 1.1424362551843992, "learning_rate": 1.3635597989967386e-05, "loss": 0.6681540012359619, "step": 5163 }, { "epoch": 0.825381603132742, "grad_norm": 1.302813265632261, "learning_rate": 1.3633136039209008e-05, "loss": 0.6851502060890198, "step": 5164 }, { "epoch": 0.8255414369056182, "grad_norm": 1.5336254950504509, "learning_rate": 1.3630673834725305e-05, "loss": 0.6440604329109192, "step": 5165 }, { "epoch": 0.8257012706784944, "grad_norm": 1.3676044505147789, "learning_rate": 1.3628211376688235e-05, "loss": 0.6266462802886963, "step": 5166 }, { "epoch": 0.8258611044513706, "grad_norm": 1.420970063225516, "learning_rate": 1.3625748665269765e-05, "loss": 0.5798813104629517, "step": 5167 }, { "epoch": 0.8260209382242468, "grad_norm": 1.2509681993670883, "learning_rate": 1.3623285700641882e-05, "loss": 0.57429438829422, "step": 5168 }, { "epoch": 0.826180771997123, "grad_norm": 1.3944486956220852, "learning_rate": 1.3620822482976593e-05, "loss": 0.7654855251312256, "step": 5169 }, { "epoch": 0.8263406057699992, "grad_norm": 1.2853843201079946, "learning_rate": 1.3618359012445914e-05, "loss": 0.6274380683898926, "step": 5170 }, { "epoch": 0.8265004395428754, "grad_norm": 1.4116523694126608, "learning_rate": 1.3615895289221893e-05, "loss": 0.6298719644546509, "step": 5171 }, { "epoch": 0.8266602733157516, "grad_norm": 1.3999597582558758, "learning_rate": 1.3613431313476584e-05, "loss": 0.7042937278747559, "step": 5172 }, { "epoch": 0.8268201070886279, "grad_norm": 1.2343250414271543, "learning_rate": 1.3610967085382058e-05, "loss": 0.6830975413322449, "step": 5173 }, { "epoch": 0.8269799408615041, "grad_norm": 1.395498455574833, "learning_rate": 1.3608502605110418e-05, "loss": 0.5305124521255493, "step": 5174 }, { "epoch": 0.8271397746343803, "grad_norm": 1.2497593280830512, "learning_rate": 1.360603787283377e-05, "loss": 0.6764016151428223, "step": 5175 }, { "epoch": 0.8272996084072565, "grad_norm": 1.081732524648015, "learning_rate": 1.3603572888724237e-05, "loss": 0.527100682258606, "step": 5176 }, { "epoch": 0.8274594421801327, "grad_norm": 1.5116053347995655, "learning_rate": 1.360110765295397e-05, "loss": 0.6483573913574219, "step": 5177 }, { "epoch": 0.8276192759530089, "grad_norm": 1.2493517879124099, "learning_rate": 1.359864216569513e-05, "loss": 0.6313205361366272, "step": 5178 }, { "epoch": 0.8277791097258851, "grad_norm": 1.1687031352186197, "learning_rate": 1.3596176427119902e-05, "loss": 0.5625530481338501, "step": 5179 }, { "epoch": 0.8279389434987613, "grad_norm": 1.2598432213629465, "learning_rate": 1.3593710437400481e-05, "loss": 0.54328852891922, "step": 5180 }, { "epoch": 0.8280987772716375, "grad_norm": 1.3288809983470187, "learning_rate": 1.3591244196709082e-05, "loss": 0.6893113255500793, "step": 5181 }, { "epoch": 0.8282586110445137, "grad_norm": 1.3928599923785763, "learning_rate": 1.3588777705217941e-05, "loss": 0.6150970458984375, "step": 5182 }, { "epoch": 0.8284184448173899, "grad_norm": 1.2321808456295094, "learning_rate": 1.3586310963099305e-05, "loss": 0.5986258387565613, "step": 5183 }, { "epoch": 0.8285782785902661, "grad_norm": 1.1832870475235846, "learning_rate": 1.358384397052545e-05, "loss": 0.7140519618988037, "step": 5184 }, { "epoch": 0.8287381123631423, "grad_norm": 1.433690421912113, "learning_rate": 1.3581376727668654e-05, "loss": 0.7626566886901855, "step": 5185 }, { "epoch": 0.8288979461360185, "grad_norm": 1.4163107116582272, "learning_rate": 1.3578909234701225e-05, "loss": 0.6683658957481384, "step": 5186 }, { "epoch": 0.8290577799088947, "grad_norm": 1.2737479420175888, "learning_rate": 1.3576441491795484e-05, "loss": 0.5760270953178406, "step": 5187 }, { "epoch": 0.8292176136817709, "grad_norm": 1.3141731844599185, "learning_rate": 1.3573973499123766e-05, "loss": 0.6068624258041382, "step": 5188 }, { "epoch": 0.8293774474546471, "grad_norm": 1.2430364501509608, "learning_rate": 1.3571505256858432e-05, "loss": 0.5189492702484131, "step": 5189 }, { "epoch": 0.8295372812275233, "grad_norm": 1.267160906013922, "learning_rate": 1.356903676517185e-05, "loss": 0.563370943069458, "step": 5190 }, { "epoch": 0.8296971150003996, "grad_norm": 1.622199415931343, "learning_rate": 1.3566568024236415e-05, "loss": 0.7139052152633667, "step": 5191 }, { "epoch": 0.8298569487732758, "grad_norm": 1.373256330145948, "learning_rate": 1.3564099034224532e-05, "loss": 0.5666943788528442, "step": 5192 }, { "epoch": 0.830016782546152, "grad_norm": 1.4585839889492136, "learning_rate": 1.3561629795308626e-05, "loss": 0.526841402053833, "step": 5193 }, { "epoch": 0.8301766163190282, "grad_norm": 1.2767271992218365, "learning_rate": 1.3559160307661146e-05, "loss": 0.5743646025657654, "step": 5194 }, { "epoch": 0.8303364500919044, "grad_norm": 1.250643605210235, "learning_rate": 1.3556690571454545e-05, "loss": 0.5811505913734436, "step": 5195 }, { "epoch": 0.8304962838647806, "grad_norm": 1.6386759141384146, "learning_rate": 1.3554220586861303e-05, "loss": 0.6694384813308716, "step": 5196 }, { "epoch": 0.8306561176376568, "grad_norm": 1.241458261514802, "learning_rate": 1.3551750354053918e-05, "loss": 0.6715579032897949, "step": 5197 }, { "epoch": 0.830815951410533, "grad_norm": 1.3865642312185784, "learning_rate": 1.3549279873204897e-05, "loss": 0.6480803489685059, "step": 5198 }, { "epoch": 0.8309757851834092, "grad_norm": 1.2557456232573163, "learning_rate": 1.3546809144486775e-05, "loss": 0.5725555419921875, "step": 5199 }, { "epoch": 0.8311356189562855, "grad_norm": 1.437150943978619, "learning_rate": 1.3544338168072093e-05, "loss": 0.5914914608001709, "step": 5200 }, { "epoch": 0.8312954527291617, "grad_norm": 1.084234239321416, "learning_rate": 1.354186694413342e-05, "loss": 0.6843799948692322, "step": 5201 }, { "epoch": 0.8314552865020379, "grad_norm": 1.7186724982636084, "learning_rate": 1.3539395472843335e-05, "loss": 0.5946600437164307, "step": 5202 }, { "epoch": 0.8316151202749141, "grad_norm": 1.4307296461790784, "learning_rate": 1.3536923754374438e-05, "loss": 0.6653231382369995, "step": 5203 }, { "epoch": 0.8317749540477903, "grad_norm": 1.3309835822680867, "learning_rate": 1.3534451788899348e-05, "loss": 0.6088337898254395, "step": 5204 }, { "epoch": 0.8319347878206665, "grad_norm": 1.1864668823127604, "learning_rate": 1.353197957659069e-05, "loss": 0.6380215883255005, "step": 5205 }, { "epoch": 0.8320946215935427, "grad_norm": 1.176923288786159, "learning_rate": 1.3529507117621123e-05, "loss": 0.500883162021637, "step": 5206 }, { "epoch": 0.832254455366419, "grad_norm": 1.4543249842780561, "learning_rate": 1.352703441216331e-05, "loss": 0.6155608892440796, "step": 5207 }, { "epoch": 0.8324142891392952, "grad_norm": 1.2305215383571937, "learning_rate": 1.3524561460389938e-05, "loss": 0.539679765701294, "step": 5208 }, { "epoch": 0.8325741229121714, "grad_norm": 1.2320234267511807, "learning_rate": 1.3522088262473709e-05, "loss": 0.697105884552002, "step": 5209 }, { "epoch": 0.8327339566850476, "grad_norm": 1.3311814835665396, "learning_rate": 1.3519614818587338e-05, "loss": 0.5454414486885071, "step": 5210 }, { "epoch": 0.8328937904579238, "grad_norm": 1.3158212084373024, "learning_rate": 1.3517141128903568e-05, "loss": 0.6672284603118896, "step": 5211 }, { "epoch": 0.8330536242308, "grad_norm": 1.6101763318656097, "learning_rate": 1.3514667193595153e-05, "loss": 0.6275649070739746, "step": 5212 }, { "epoch": 0.8332134580036762, "grad_norm": 1.2326566793272926, "learning_rate": 1.3512193012834855e-05, "loss": 0.5623679161071777, "step": 5213 }, { "epoch": 0.8333732917765524, "grad_norm": 1.5642849013860023, "learning_rate": 1.3509718586795472e-05, "loss": 0.5896749496459961, "step": 5214 }, { "epoch": 0.8335331255494286, "grad_norm": 1.3579814639169623, "learning_rate": 1.3507243915649803e-05, "loss": 0.6647505760192871, "step": 5215 }, { "epoch": 0.8336929593223048, "grad_norm": 1.1173606134914893, "learning_rate": 1.3504768999570676e-05, "loss": 0.5052149295806885, "step": 5216 }, { "epoch": 0.833852793095181, "grad_norm": 1.1960013502431226, "learning_rate": 1.3502293838730922e-05, "loss": 0.5289310216903687, "step": 5217 }, { "epoch": 0.8340126268680572, "grad_norm": 1.2026843747714249, "learning_rate": 1.3499818433303407e-05, "loss": 0.5602246522903442, "step": 5218 }, { "epoch": 0.8341724606409334, "grad_norm": 1.1900208772272665, "learning_rate": 1.3497342783460999e-05, "loss": 0.6300835609436035, "step": 5219 }, { "epoch": 0.8343322944138096, "grad_norm": 1.108892518810438, "learning_rate": 1.3494866889376584e-05, "loss": 0.6072870492935181, "step": 5220 }, { "epoch": 0.8344921281866858, "grad_norm": 1.5853280551941686, "learning_rate": 1.3492390751223076e-05, "loss": 0.6312961578369141, "step": 5221 }, { "epoch": 0.834651961959562, "grad_norm": 1.4777044657387932, "learning_rate": 1.3489914369173402e-05, "loss": 0.5977847576141357, "step": 5222 }, { "epoch": 0.8348117957324382, "grad_norm": 1.380221180659787, "learning_rate": 1.3487437743400498e-05, "loss": 0.606317400932312, "step": 5223 }, { "epoch": 0.8349716295053144, "grad_norm": 1.5626555379875602, "learning_rate": 1.3484960874077326e-05, "loss": 0.6078959107398987, "step": 5224 }, { "epoch": 0.8351314632781907, "grad_norm": 1.359527770020005, "learning_rate": 1.3482483761376861e-05, "loss": 0.5739365816116333, "step": 5225 }, { "epoch": 0.8352912970510669, "grad_norm": 1.5556652701749507, "learning_rate": 1.3480006405472096e-05, "loss": 0.7021522521972656, "step": 5226 }, { "epoch": 0.8354511308239431, "grad_norm": 1.3306278337262236, "learning_rate": 1.347752880653604e-05, "loss": 0.5531756281852722, "step": 5227 }, { "epoch": 0.8356109645968193, "grad_norm": 1.2312149567747928, "learning_rate": 1.347505096474172e-05, "loss": 0.5625537037849426, "step": 5228 }, { "epoch": 0.8357707983696955, "grad_norm": 1.2469513341827791, "learning_rate": 1.347257288026218e-05, "loss": 0.5995520353317261, "step": 5229 }, { "epoch": 0.8359306321425717, "grad_norm": 1.383456605624576, "learning_rate": 1.3470094553270478e-05, "loss": 0.5990534424781799, "step": 5230 }, { "epoch": 0.8360904659154479, "grad_norm": 1.426970290523113, "learning_rate": 1.3467615983939695e-05, "loss": 0.7677577137947083, "step": 5231 }, { "epoch": 0.8362502996883241, "grad_norm": 1.402105937965117, "learning_rate": 1.3465137172442925e-05, "loss": 0.527538537979126, "step": 5232 }, { "epoch": 0.8364101334612003, "grad_norm": 1.390974003161581, "learning_rate": 1.3462658118953278e-05, "loss": 0.6186273097991943, "step": 5233 }, { "epoch": 0.8365699672340765, "grad_norm": 1.5682321724269648, "learning_rate": 1.3460178823643886e-05, "loss": 0.6163508296012878, "step": 5234 }, { "epoch": 0.8367298010069528, "grad_norm": 1.5408433879976833, "learning_rate": 1.3457699286687887e-05, "loss": 0.5626715421676636, "step": 5235 }, { "epoch": 0.836889634779829, "grad_norm": 1.3427740270087085, "learning_rate": 1.345521950825845e-05, "loss": 0.5304426550865173, "step": 5236 }, { "epoch": 0.8370494685527052, "grad_norm": 1.545932706995443, "learning_rate": 1.345273948852875e-05, "loss": 0.7066601514816284, "step": 5237 }, { "epoch": 0.8372093023255814, "grad_norm": 1.1823661545692807, "learning_rate": 1.3450259227671985e-05, "loss": 0.5598517656326294, "step": 5238 }, { "epoch": 0.8373691360984576, "grad_norm": 1.5839886240763934, "learning_rate": 1.3447778725861367e-05, "loss": 0.6031191349029541, "step": 5239 }, { "epoch": 0.8375289698713339, "grad_norm": 1.2663734342473252, "learning_rate": 1.3445297983270123e-05, "loss": 0.5169142484664917, "step": 5240 }, { "epoch": 0.8376888036442101, "grad_norm": 1.2809416856748173, "learning_rate": 1.3442817000071499e-05, "loss": 0.5178461074829102, "step": 5241 }, { "epoch": 0.8378486374170863, "grad_norm": 1.3316081167089053, "learning_rate": 1.3440335776438765e-05, "loss": 0.5928995609283447, "step": 5242 }, { "epoch": 0.8380084711899625, "grad_norm": 1.6101639508928243, "learning_rate": 1.3437854312545193e-05, "loss": 0.5182960629463196, "step": 5243 }, { "epoch": 0.8381683049628387, "grad_norm": 1.373408584046842, "learning_rate": 1.3435372608564086e-05, "loss": 0.6784090995788574, "step": 5244 }, { "epoch": 0.8383281387357149, "grad_norm": 1.3728722966399938, "learning_rate": 1.3432890664668752e-05, "loss": 0.5822213888168335, "step": 5245 }, { "epoch": 0.8384879725085911, "grad_norm": 1.2112753106690675, "learning_rate": 1.3430408481032524e-05, "loss": 0.6800785064697266, "step": 5246 }, { "epoch": 0.8386478062814673, "grad_norm": 1.534011825409798, "learning_rate": 1.3427926057828749e-05, "loss": 0.6107461452484131, "step": 5247 }, { "epoch": 0.8388076400543435, "grad_norm": 1.2620437463672414, "learning_rate": 1.3425443395230789e-05, "loss": 0.5743020176887512, "step": 5248 }, { "epoch": 0.8389674738272197, "grad_norm": 1.2669289072797687, "learning_rate": 1.3422960493412025e-05, "loss": 0.5165868401527405, "step": 5249 }, { "epoch": 0.8391273076000959, "grad_norm": 1.3395631974331292, "learning_rate": 1.3420477352545855e-05, "loss": 0.4802916944026947, "step": 5250 }, { "epoch": 0.8392871413729721, "grad_norm": 1.2958619867363907, "learning_rate": 1.341799397280569e-05, "loss": 0.6569322347640991, "step": 5251 }, { "epoch": 0.8394469751458483, "grad_norm": 1.6312281773879658, "learning_rate": 1.3415510354364964e-05, "loss": 0.7777566313743591, "step": 5252 }, { "epoch": 0.8396068089187245, "grad_norm": 1.3441716338582232, "learning_rate": 1.3413026497397123e-05, "loss": 0.6184448003768921, "step": 5253 }, { "epoch": 0.8397666426916007, "grad_norm": 1.4685392800484707, "learning_rate": 1.341054240207563e-05, "loss": 0.6628401279449463, "step": 5254 }, { "epoch": 0.8399264764644769, "grad_norm": 1.3548097214103307, "learning_rate": 1.3408058068573966e-05, "loss": 0.7073814868927002, "step": 5255 }, { "epoch": 0.8400863102373531, "grad_norm": 1.2248372048969374, "learning_rate": 1.3405573497065627e-05, "loss": 0.5435088276863098, "step": 5256 }, { "epoch": 0.8402461440102293, "grad_norm": 1.485370907867063, "learning_rate": 1.3403088687724127e-05, "loss": 0.6071780323982239, "step": 5257 }, { "epoch": 0.8404059777831056, "grad_norm": 1.4090430554388305, "learning_rate": 1.3400603640722997e-05, "loss": 0.74148029088974, "step": 5258 }, { "epoch": 0.8405658115559818, "grad_norm": 1.270655669293593, "learning_rate": 1.3398118356235783e-05, "loss": 0.574088454246521, "step": 5259 }, { "epoch": 0.840725645328858, "grad_norm": 1.3212312299937392, "learning_rate": 1.339563283443605e-05, "loss": 0.6912474036216736, "step": 5260 }, { "epoch": 0.8408854791017342, "grad_norm": 1.3915656183920773, "learning_rate": 1.3393147075497374e-05, "loss": 0.7395029067993164, "step": 5261 }, { "epoch": 0.8410453128746104, "grad_norm": 1.3511573210423935, "learning_rate": 1.3390661079593357e-05, "loss": 0.6427739858627319, "step": 5262 }, { "epoch": 0.8412051466474866, "grad_norm": 1.18722085013642, "learning_rate": 1.338817484689761e-05, "loss": 0.48385196924209595, "step": 5263 }, { "epoch": 0.8413649804203628, "grad_norm": 1.1680453686418748, "learning_rate": 1.338568837758376e-05, "loss": 0.5733391642570496, "step": 5264 }, { "epoch": 0.841524814193239, "grad_norm": 1.1419005134256834, "learning_rate": 1.3383201671825458e-05, "loss": 0.5369728803634644, "step": 5265 }, { "epoch": 0.8416846479661152, "grad_norm": 1.5235450220886544, "learning_rate": 1.3380714729796364e-05, "loss": 0.798134982585907, "step": 5266 }, { "epoch": 0.8418444817389914, "grad_norm": 1.2979013191531013, "learning_rate": 1.3378227551670155e-05, "loss": 0.5436777472496033, "step": 5267 }, { "epoch": 0.8420043155118676, "grad_norm": 1.2355971783764423, "learning_rate": 1.3375740137620532e-05, "loss": 0.616769552230835, "step": 5268 }, { "epoch": 0.8421641492847438, "grad_norm": 1.495854652399127, "learning_rate": 1.3373252487821202e-05, "loss": 0.787708044052124, "step": 5269 }, { "epoch": 0.8423239830576201, "grad_norm": 1.4556740358297542, "learning_rate": 1.3370764602445898e-05, "loss": 0.6457935571670532, "step": 5270 }, { "epoch": 0.8424838168304963, "grad_norm": 1.2332730513579417, "learning_rate": 1.3368276481668362e-05, "loss": 0.7199015617370605, "step": 5271 }, { "epoch": 0.8426436506033725, "grad_norm": 1.3078265213350626, "learning_rate": 1.3365788125662353e-05, "loss": 0.6377049684524536, "step": 5272 }, { "epoch": 0.8428034843762487, "grad_norm": 1.25555476358209, "learning_rate": 1.3363299534601657e-05, "loss": 0.5700642466545105, "step": 5273 }, { "epoch": 0.842963318149125, "grad_norm": 1.37775803740987, "learning_rate": 1.3360810708660061e-05, "loss": 0.7596384286880493, "step": 5274 }, { "epoch": 0.8431231519220012, "grad_norm": 1.3074076703865216, "learning_rate": 1.335832164801138e-05, "loss": 0.4852626919746399, "step": 5275 }, { "epoch": 0.8432829856948774, "grad_norm": 1.408875258981757, "learning_rate": 1.335583235282944e-05, "loss": 0.6240389347076416, "step": 5276 }, { "epoch": 0.8434428194677536, "grad_norm": 1.4952581389856021, "learning_rate": 1.3353342823288084e-05, "loss": 0.62565678358078, "step": 5277 }, { "epoch": 0.8436026532406298, "grad_norm": 1.6561789895932117, "learning_rate": 1.3350853059561172e-05, "loss": 0.6783734560012817, "step": 5278 }, { "epoch": 0.843762487013506, "grad_norm": 1.2977939149947395, "learning_rate": 1.334836306182258e-05, "loss": 0.5258541107177734, "step": 5279 }, { "epoch": 0.8439223207863822, "grad_norm": 1.4822982094678343, "learning_rate": 1.3345872830246203e-05, "loss": 0.62286376953125, "step": 5280 }, { "epoch": 0.8440821545592584, "grad_norm": 1.493488293123342, "learning_rate": 1.3343382365005946e-05, "loss": 0.6521073579788208, "step": 5281 }, { "epoch": 0.8442419883321346, "grad_norm": 1.401315444891808, "learning_rate": 1.3340891666275735e-05, "loss": 0.7524775266647339, "step": 5282 }, { "epoch": 0.8444018221050108, "grad_norm": 1.1940890273299842, "learning_rate": 1.3338400734229515e-05, "loss": 0.6765052080154419, "step": 5283 }, { "epoch": 0.844561655877887, "grad_norm": 1.3696251401700796, "learning_rate": 1.3335909569041239e-05, "loss": 0.6502403020858765, "step": 5284 }, { "epoch": 0.8447214896507632, "grad_norm": 1.636357931409139, "learning_rate": 1.3333418170884887e-05, "loss": 0.6282363533973694, "step": 5285 }, { "epoch": 0.8448813234236394, "grad_norm": 1.2975110589398369, "learning_rate": 1.3330926539934446e-05, "loss": 0.7314305305480957, "step": 5286 }, { "epoch": 0.8450411571965156, "grad_norm": 1.260092702420468, "learning_rate": 1.332843467636392e-05, "loss": 0.6244537830352783, "step": 5287 }, { "epoch": 0.8452009909693918, "grad_norm": 1.5012382559337487, "learning_rate": 1.3325942580347335e-05, "loss": 0.7584145069122314, "step": 5288 }, { "epoch": 0.845360824742268, "grad_norm": 1.160591176216184, "learning_rate": 1.3323450252058732e-05, "loss": 0.45951443910598755, "step": 5289 }, { "epoch": 0.8455206585151442, "grad_norm": 1.5206601879231307, "learning_rate": 1.3320957691672162e-05, "loss": 0.6276482939720154, "step": 5290 }, { "epoch": 0.8456804922880204, "grad_norm": 1.3543397433661535, "learning_rate": 1.33184648993617e-05, "loss": 0.6695002913475037, "step": 5291 }, { "epoch": 0.8458403260608967, "grad_norm": 1.4085069094244402, "learning_rate": 1.331597187530143e-05, "loss": 0.5350031852722168, "step": 5292 }, { "epoch": 0.8460001598337729, "grad_norm": 1.2298197458532802, "learning_rate": 1.3313478619665462e-05, "loss": 0.6759746074676514, "step": 5293 }, { "epoch": 0.8461599936066491, "grad_norm": 1.3443630817975794, "learning_rate": 1.331098513262791e-05, "loss": 0.6613054275512695, "step": 5294 }, { "epoch": 0.8463198273795253, "grad_norm": 1.393445374330195, "learning_rate": 1.3308491414362914e-05, "loss": 0.6747864484786987, "step": 5295 }, { "epoch": 0.8464796611524015, "grad_norm": 1.184071412889101, "learning_rate": 1.3305997465044627e-05, "loss": 0.6046521663665771, "step": 5296 }, { "epoch": 0.8466394949252777, "grad_norm": 1.2405401670163256, "learning_rate": 1.3303503284847214e-05, "loss": 0.6083239316940308, "step": 5297 }, { "epoch": 0.8467993286981539, "grad_norm": 1.209870406333781, "learning_rate": 1.3301008873944863e-05, "loss": 0.5728416442871094, "step": 5298 }, { "epoch": 0.8469591624710301, "grad_norm": 1.485228961875269, "learning_rate": 1.329851423251177e-05, "loss": 0.6565395593643188, "step": 5299 }, { "epoch": 0.8471189962439063, "grad_norm": 1.2890728678670889, "learning_rate": 1.3296019360722158e-05, "loss": 0.6902430653572083, "step": 5300 }, { "epoch": 0.8472788300167825, "grad_norm": 1.2901392832114091, "learning_rate": 1.3293524258750257e-05, "loss": 0.5944949388504028, "step": 5301 }, { "epoch": 0.8474386637896587, "grad_norm": 1.480704039254505, "learning_rate": 1.3291028926770313e-05, "loss": 0.7783628702163696, "step": 5302 }, { "epoch": 0.8475984975625349, "grad_norm": 1.3299441740376352, "learning_rate": 1.3288533364956597e-05, "loss": 0.584749698638916, "step": 5303 }, { "epoch": 0.8477583313354111, "grad_norm": 1.1755894334122485, "learning_rate": 1.3286037573483388e-05, "loss": 0.6616492867469788, "step": 5304 }, { "epoch": 0.8479181651082874, "grad_norm": 1.4525211371059525, "learning_rate": 1.3283541552524983e-05, "loss": 0.6174157857894897, "step": 5305 }, { "epoch": 0.8480779988811636, "grad_norm": 1.1276172317556081, "learning_rate": 1.3281045302255693e-05, "loss": 0.6107800006866455, "step": 5306 }, { "epoch": 0.8482378326540398, "grad_norm": 1.3610731056329164, "learning_rate": 1.3278548822849853e-05, "loss": 0.6582437753677368, "step": 5307 }, { "epoch": 0.848397666426916, "grad_norm": 1.2115979147586324, "learning_rate": 1.3276052114481804e-05, "loss": 0.6221089363098145, "step": 5308 }, { "epoch": 0.8485575001997923, "grad_norm": 2.1788983188400386, "learning_rate": 1.3273555177325903e-05, "loss": 0.7328435182571411, "step": 5309 }, { "epoch": 0.8487173339726685, "grad_norm": 1.6112161473944404, "learning_rate": 1.3271058011556537e-05, "loss": 0.8032696843147278, "step": 5310 }, { "epoch": 0.8488771677455447, "grad_norm": 1.2617783547047678, "learning_rate": 1.3268560617348093e-05, "loss": 0.6039928197860718, "step": 5311 }, { "epoch": 0.8490370015184209, "grad_norm": 1.4105407795045681, "learning_rate": 1.3266062994874977e-05, "loss": 0.7912133932113647, "step": 5312 }, { "epoch": 0.8491968352912971, "grad_norm": 1.2518352136525568, "learning_rate": 1.3263565144311623e-05, "loss": 0.5800407528877258, "step": 5313 }, { "epoch": 0.8493566690641733, "grad_norm": 1.276037938086151, "learning_rate": 1.3261067065832467e-05, "loss": 0.5362832546234131, "step": 5314 }, { "epoch": 0.8495165028370495, "grad_norm": 1.495563603822476, "learning_rate": 1.3258568759611965e-05, "loss": 0.6433330774307251, "step": 5315 }, { "epoch": 0.8496763366099257, "grad_norm": 1.318246093462512, "learning_rate": 1.3256070225824592e-05, "loss": 0.575840950012207, "step": 5316 }, { "epoch": 0.8498361703828019, "grad_norm": 1.2223636070650328, "learning_rate": 1.3253571464644836e-05, "loss": 0.5129414796829224, "step": 5317 }, { "epoch": 0.8499960041556781, "grad_norm": 1.3698708558817312, "learning_rate": 1.3251072476247205e-05, "loss": 0.5376874208450317, "step": 5318 }, { "epoch": 0.8501558379285543, "grad_norm": 1.1174254448336411, "learning_rate": 1.324857326080621e-05, "loss": 0.5680692195892334, "step": 5319 }, { "epoch": 0.8503156717014305, "grad_norm": 1.2787232637923163, "learning_rate": 1.3246073818496398e-05, "loss": 0.6520422697067261, "step": 5320 }, { "epoch": 0.8504755054743067, "grad_norm": 1.5890288643353456, "learning_rate": 1.3243574149492311e-05, "loss": 0.7851667404174805, "step": 5321 }, { "epoch": 0.8506353392471829, "grad_norm": 1.1873069525368996, "learning_rate": 1.3241074253968526e-05, "loss": 0.5100349187850952, "step": 5322 }, { "epoch": 0.8507951730200591, "grad_norm": 1.2351881941287939, "learning_rate": 1.3238574132099626e-05, "loss": 0.6990522146224976, "step": 5323 }, { "epoch": 0.8509550067929353, "grad_norm": 1.3913530633353668, "learning_rate": 1.3236073784060205e-05, "loss": 0.5556619763374329, "step": 5324 }, { "epoch": 0.8511148405658115, "grad_norm": 1.2878177059384164, "learning_rate": 1.3233573210024883e-05, "loss": 0.5793506503105164, "step": 5325 }, { "epoch": 0.8512746743386878, "grad_norm": 1.4554801396011767, "learning_rate": 1.3231072410168286e-05, "loss": 0.6511654853820801, "step": 5326 }, { "epoch": 0.851434508111564, "grad_norm": 1.3532120305498785, "learning_rate": 1.322857138466507e-05, "loss": 0.5527067184448242, "step": 5327 }, { "epoch": 0.8515943418844402, "grad_norm": 1.2769130611621857, "learning_rate": 1.322607013368989e-05, "loss": 0.6274892091751099, "step": 5328 }, { "epoch": 0.8517541756573164, "grad_norm": 1.1700656193145114, "learning_rate": 1.3223568657417425e-05, "loss": 0.5943458080291748, "step": 5329 }, { "epoch": 0.8519140094301926, "grad_norm": 1.5102974454799079, "learning_rate": 1.3221066956022376e-05, "loss": 0.7182157635688782, "step": 5330 }, { "epoch": 0.8520738432030688, "grad_norm": 1.265401810377812, "learning_rate": 1.321856502967944e-05, "loss": 0.5532413721084595, "step": 5331 }, { "epoch": 0.852233676975945, "grad_norm": 1.2955263685594525, "learning_rate": 1.3216062878563355e-05, "loss": 0.47895947098731995, "step": 5332 }, { "epoch": 0.8523935107488212, "grad_norm": 1.40794775169232, "learning_rate": 1.3213560502848858e-05, "loss": 0.7313356995582581, "step": 5333 }, { "epoch": 0.8525533445216974, "grad_norm": 1.2865493850292797, "learning_rate": 1.3211057902710707e-05, "loss": 0.7317941784858704, "step": 5334 }, { "epoch": 0.8527131782945736, "grad_norm": 1.0940584158447275, "learning_rate": 1.3208555078323674e-05, "loss": 0.4654095768928528, "step": 5335 }, { "epoch": 0.8528730120674498, "grad_norm": 1.2098872215043446, "learning_rate": 1.3206052029862546e-05, "loss": 0.6310949921607971, "step": 5336 }, { "epoch": 0.853032845840326, "grad_norm": 1.4520609504607045, "learning_rate": 1.320354875750213e-05, "loss": 0.857041597366333, "step": 5337 }, { "epoch": 0.8531926796132022, "grad_norm": 1.4673809131458972, "learning_rate": 1.320104526141724e-05, "loss": 0.5790360569953918, "step": 5338 }, { "epoch": 0.8533525133860784, "grad_norm": 1.3024324745092148, "learning_rate": 1.3198541541782718e-05, "loss": 0.5051633715629578, "step": 5339 }, { "epoch": 0.8535123471589546, "grad_norm": 1.2991192923135009, "learning_rate": 1.3196037598773413e-05, "loss": 0.6570705771446228, "step": 5340 }, { "epoch": 0.853672180931831, "grad_norm": 1.2056692255658064, "learning_rate": 1.3193533432564187e-05, "loss": 0.6097627282142639, "step": 5341 }, { "epoch": 0.8538320147047072, "grad_norm": 1.0857107835304527, "learning_rate": 1.3191029043329925e-05, "loss": 0.5760360956192017, "step": 5342 }, { "epoch": 0.8539918484775834, "grad_norm": 1.2230505343339932, "learning_rate": 1.318852443124553e-05, "loss": 0.5062584280967712, "step": 5343 }, { "epoch": 0.8541516822504596, "grad_norm": 1.1982277893005366, "learning_rate": 1.3186019596485906e-05, "loss": 0.49731698632240295, "step": 5344 }, { "epoch": 0.8543115160233358, "grad_norm": 1.3442916209426126, "learning_rate": 1.318351453922599e-05, "loss": 0.6965175867080688, "step": 5345 }, { "epoch": 0.854471349796212, "grad_norm": 1.323286630205813, "learning_rate": 1.318100925964072e-05, "loss": 0.753090500831604, "step": 5346 }, { "epoch": 0.8546311835690882, "grad_norm": 1.5168385345407764, "learning_rate": 1.3178503757905058e-05, "loss": 0.5956652164459229, "step": 5347 }, { "epoch": 0.8547910173419644, "grad_norm": 1.195990209072237, "learning_rate": 1.317599803419398e-05, "loss": 0.5841108560562134, "step": 5348 }, { "epoch": 0.8549508511148406, "grad_norm": 1.181380514348335, "learning_rate": 1.3173492088682478e-05, "loss": 0.4680408239364624, "step": 5349 }, { "epoch": 0.8551106848877168, "grad_norm": 1.586806705864642, "learning_rate": 1.3170985921545559e-05, "loss": 0.6673239469528198, "step": 5350 }, { "epoch": 0.855270518660593, "grad_norm": 1.6369050010213857, "learning_rate": 1.3168479532958237e-05, "loss": 0.6348151564598083, "step": 5351 }, { "epoch": 0.8554303524334692, "grad_norm": 1.3324813799425175, "learning_rate": 1.3165972923095556e-05, "loss": 0.7105616927146912, "step": 5352 }, { "epoch": 0.8555901862063454, "grad_norm": 1.1477395536422983, "learning_rate": 1.3163466092132567e-05, "loss": 0.516139030456543, "step": 5353 }, { "epoch": 0.8557500199792216, "grad_norm": 1.5857166406818928, "learning_rate": 1.3160959040244344e-05, "loss": 0.5637469291687012, "step": 5354 }, { "epoch": 0.8559098537520978, "grad_norm": 1.1652830003570025, "learning_rate": 1.3158451767605962e-05, "loss": 0.5248317718505859, "step": 5355 }, { "epoch": 0.856069687524974, "grad_norm": 1.3330350658149814, "learning_rate": 1.3155944274392523e-05, "loss": 0.5435433387756348, "step": 5356 }, { "epoch": 0.8562295212978502, "grad_norm": 1.3819643333837475, "learning_rate": 1.3153436560779142e-05, "loss": 0.6988463401794434, "step": 5357 }, { "epoch": 0.8563893550707264, "grad_norm": 1.2909901956658703, "learning_rate": 1.315092862694095e-05, "loss": 0.605410099029541, "step": 5358 }, { "epoch": 0.8565491888436026, "grad_norm": 1.2003636045110035, "learning_rate": 1.3148420473053089e-05, "loss": 0.6115964651107788, "step": 5359 }, { "epoch": 0.8567090226164789, "grad_norm": 1.1856268847368903, "learning_rate": 1.3145912099290725e-05, "loss": 0.5948947072029114, "step": 5360 }, { "epoch": 0.8568688563893551, "grad_norm": 1.2848942444147422, "learning_rate": 1.3143403505829026e-05, "loss": 0.5831837058067322, "step": 5361 }, { "epoch": 0.8570286901622313, "grad_norm": 1.2135007637170037, "learning_rate": 1.314089469284319e-05, "loss": 0.6180374622344971, "step": 5362 }, { "epoch": 0.8571885239351075, "grad_norm": 1.4120871303227187, "learning_rate": 1.313838566050842e-05, "loss": 0.5348055362701416, "step": 5363 }, { "epoch": 0.8573483577079837, "grad_norm": 1.2282635075799626, "learning_rate": 1.3135876408999938e-05, "loss": 0.5952938795089722, "step": 5364 }, { "epoch": 0.8575081914808599, "grad_norm": 1.2153265385627738, "learning_rate": 1.3133366938492985e-05, "loss": 0.713904619216919, "step": 5365 }, { "epoch": 0.8576680252537361, "grad_norm": 1.1684061262140628, "learning_rate": 1.3130857249162811e-05, "loss": 0.45957911014556885, "step": 5366 }, { "epoch": 0.8578278590266123, "grad_norm": 1.0634455897551625, "learning_rate": 1.3128347341184684e-05, "loss": 0.5993289351463318, "step": 5367 }, { "epoch": 0.8579876927994885, "grad_norm": 1.5143879180359388, "learning_rate": 1.3125837214733882e-05, "loss": 0.7158688306808472, "step": 5368 }, { "epoch": 0.8581475265723647, "grad_norm": 1.4719511107875267, "learning_rate": 1.3123326869985715e-05, "loss": 0.7157689929008484, "step": 5369 }, { "epoch": 0.8583073603452409, "grad_norm": 1.2966171573830116, "learning_rate": 1.3120816307115485e-05, "loss": 0.4555686414241791, "step": 5370 }, { "epoch": 0.8584671941181171, "grad_norm": 1.0827297249976027, "learning_rate": 1.3118305526298528e-05, "loss": 0.6049482226371765, "step": 5371 }, { "epoch": 0.8586270278909933, "grad_norm": 1.2062721835122818, "learning_rate": 1.3115794527710182e-05, "loss": 0.5885344743728638, "step": 5372 }, { "epoch": 0.8587868616638695, "grad_norm": 1.1823824583426157, "learning_rate": 1.3113283311525811e-05, "loss": 0.5255751609802246, "step": 5373 }, { "epoch": 0.8589466954367457, "grad_norm": 1.4268309832445756, "learning_rate": 1.3110771877920789e-05, "loss": 0.514045238494873, "step": 5374 }, { "epoch": 0.8591065292096219, "grad_norm": 1.3242702732368636, "learning_rate": 1.3108260227070504e-05, "loss": 0.6113396286964417, "step": 5375 }, { "epoch": 0.8592663629824983, "grad_norm": 1.3128786780929174, "learning_rate": 1.310574835915036e-05, "loss": 0.5384293794631958, "step": 5376 }, { "epoch": 0.8594261967553745, "grad_norm": 1.3478349523684223, "learning_rate": 1.3103236274335783e-05, "loss": 0.7237014770507812, "step": 5377 }, { "epoch": 0.8595860305282507, "grad_norm": 1.1442212558155718, "learning_rate": 1.3100723972802198e-05, "loss": 0.5507619380950928, "step": 5378 }, { "epoch": 0.8597458643011269, "grad_norm": 1.163979915622548, "learning_rate": 1.3098211454725063e-05, "loss": 0.6383029222488403, "step": 5379 }, { "epoch": 0.8599056980740031, "grad_norm": 1.3322560464291018, "learning_rate": 1.3095698720279839e-05, "loss": 0.5645560026168823, "step": 5380 }, { "epoch": 0.8600655318468793, "grad_norm": 1.2311949277978527, "learning_rate": 1.3093185769642011e-05, "loss": 0.6299991607666016, "step": 5381 }, { "epoch": 0.8602253656197555, "grad_norm": 1.294490228476261, "learning_rate": 1.3090672602987073e-05, "loss": 0.5794308185577393, "step": 5382 }, { "epoch": 0.8603851993926317, "grad_norm": 1.3545138461009234, "learning_rate": 1.308815922049053e-05, "loss": 0.5975264310836792, "step": 5383 }, { "epoch": 0.8605450331655079, "grad_norm": 1.3543347127986403, "learning_rate": 1.3085645622327919e-05, "loss": 0.5372536778450012, "step": 5384 }, { "epoch": 0.8607048669383841, "grad_norm": 1.3327022205845394, "learning_rate": 1.3083131808674767e-05, "loss": 0.6296106576919556, "step": 5385 }, { "epoch": 0.8608647007112603, "grad_norm": 1.2149912266694336, "learning_rate": 1.3080617779706643e-05, "loss": 0.5276777744293213, "step": 5386 }, { "epoch": 0.8610245344841365, "grad_norm": 1.4056774268845018, "learning_rate": 1.307810353559911e-05, "loss": 0.6262065172195435, "step": 5387 }, { "epoch": 0.8611843682570127, "grad_norm": 1.3774543611836683, "learning_rate": 1.3075589076527752e-05, "loss": 0.6602093577384949, "step": 5388 }, { "epoch": 0.8613442020298889, "grad_norm": 1.5034981922614334, "learning_rate": 1.307307440266818e-05, "loss": 0.6020474433898926, "step": 5389 }, { "epoch": 0.8615040358027651, "grad_norm": 1.4590179418297433, "learning_rate": 1.3070559514195997e-05, "loss": 0.6127220392227173, "step": 5390 }, { "epoch": 0.8616638695756413, "grad_norm": 1.1181260988158759, "learning_rate": 1.3068044411286844e-05, "loss": 0.5359649658203125, "step": 5391 }, { "epoch": 0.8618237033485175, "grad_norm": 1.4424668673080034, "learning_rate": 1.3065529094116363e-05, "loss": 0.6392215490341187, "step": 5392 }, { "epoch": 0.8619835371213938, "grad_norm": 1.2866848419544958, "learning_rate": 1.3063013562860211e-05, "loss": 0.6206873655319214, "step": 5393 }, { "epoch": 0.86214337089427, "grad_norm": 1.207508637708333, "learning_rate": 1.3060497817694074e-05, "loss": 0.4888225793838501, "step": 5394 }, { "epoch": 0.8623032046671462, "grad_norm": 1.4384055227293677, "learning_rate": 1.3057981858793633e-05, "loss": 0.6058005094528198, "step": 5395 }, { "epoch": 0.8624630384400224, "grad_norm": 1.2445071830505035, "learning_rate": 1.3055465686334598e-05, "loss": 0.5231959819793701, "step": 5396 }, { "epoch": 0.8626228722128986, "grad_norm": 1.4915040866718434, "learning_rate": 1.3052949300492689e-05, "loss": 0.5996530055999756, "step": 5397 }, { "epoch": 0.8627827059857748, "grad_norm": 1.4133064999108793, "learning_rate": 1.3050432701443642e-05, "loss": 0.7228800058364868, "step": 5398 }, { "epoch": 0.862942539758651, "grad_norm": 1.527211380256249, "learning_rate": 1.3047915889363206e-05, "loss": 0.6040893793106079, "step": 5399 }, { "epoch": 0.8631023735315272, "grad_norm": 1.4115230804497718, "learning_rate": 1.3045398864427145e-05, "loss": 0.710062563419342, "step": 5400 }, { "epoch": 0.8632622073044034, "grad_norm": 1.543420567509727, "learning_rate": 1.3042881626811243e-05, "loss": 0.562633216381073, "step": 5401 }, { "epoch": 0.8634220410772796, "grad_norm": 1.308733821945186, "learning_rate": 1.304036417669129e-05, "loss": 0.5455915927886963, "step": 5402 }, { "epoch": 0.8635818748501558, "grad_norm": 1.2912232635804726, "learning_rate": 1.3037846514243097e-05, "loss": 0.7070979475975037, "step": 5403 }, { "epoch": 0.863741708623032, "grad_norm": 1.348603542500223, "learning_rate": 1.3035328639642495e-05, "loss": 0.6041513085365295, "step": 5404 }, { "epoch": 0.8639015423959082, "grad_norm": 1.274960685856229, "learning_rate": 1.3032810553065315e-05, "loss": 0.47157022356987, "step": 5405 }, { "epoch": 0.8640613761687844, "grad_norm": 1.3628394361427134, "learning_rate": 1.3030292254687418e-05, "loss": 0.5766534805297852, "step": 5406 }, { "epoch": 0.8642212099416606, "grad_norm": 1.3766350141501216, "learning_rate": 1.3027773744684669e-05, "loss": 0.6744813919067383, "step": 5407 }, { "epoch": 0.8643810437145368, "grad_norm": 1.3538644496287826, "learning_rate": 1.3025255023232952e-05, "loss": 0.7011481523513794, "step": 5408 }, { "epoch": 0.864540877487413, "grad_norm": 1.2648767192556154, "learning_rate": 1.3022736090508166e-05, "loss": 0.5287983417510986, "step": 5409 }, { "epoch": 0.8647007112602892, "grad_norm": 1.441330369500544, "learning_rate": 1.3020216946686224e-05, "loss": 0.681311845779419, "step": 5410 }, { "epoch": 0.8648605450331656, "grad_norm": 1.2927399807524915, "learning_rate": 1.3017697591943055e-05, "loss": 0.6303231716156006, "step": 5411 }, { "epoch": 0.8650203788060418, "grad_norm": 1.3775532334919502, "learning_rate": 1.3015178026454601e-05, "loss": 0.6177355647087097, "step": 5412 }, { "epoch": 0.865180212578918, "grad_norm": 1.2514293772905918, "learning_rate": 1.3012658250396817e-05, "loss": 0.6655751466751099, "step": 5413 }, { "epoch": 0.8653400463517942, "grad_norm": 1.3917651586610527, "learning_rate": 1.3010138263945682e-05, "loss": 0.5442310571670532, "step": 5414 }, { "epoch": 0.8654998801246704, "grad_norm": 1.3073686589122628, "learning_rate": 1.3007618067277177e-05, "loss": 0.609693169593811, "step": 5415 }, { "epoch": 0.8656597138975466, "grad_norm": 1.5022878966745954, "learning_rate": 1.3005097660567308e-05, "loss": 0.5554018020629883, "step": 5416 }, { "epoch": 0.8658195476704228, "grad_norm": 1.2303021181386435, "learning_rate": 1.3002577043992086e-05, "loss": 0.6085816621780396, "step": 5417 }, { "epoch": 0.865979381443299, "grad_norm": 1.3735651584960464, "learning_rate": 1.3000056217727546e-05, "loss": 0.6925711631774902, "step": 5418 }, { "epoch": 0.8661392152161752, "grad_norm": 1.280726131388599, "learning_rate": 1.2997535181949732e-05, "loss": 0.6692644357681274, "step": 5419 }, { "epoch": 0.8662990489890514, "grad_norm": 1.7556121479941258, "learning_rate": 1.2995013936834703e-05, "loss": 0.5083062648773193, "step": 5420 }, { "epoch": 0.8664588827619276, "grad_norm": 1.2164929250848597, "learning_rate": 1.2992492482558539e-05, "loss": 0.4996945261955261, "step": 5421 }, { "epoch": 0.8666187165348038, "grad_norm": 1.3401953776363948, "learning_rate": 1.2989970819297322e-05, "loss": 0.5217706561088562, "step": 5422 }, { "epoch": 0.86677855030768, "grad_norm": 1.7300773022332836, "learning_rate": 1.298744894722716e-05, "loss": 0.6275396347045898, "step": 5423 }, { "epoch": 0.8669383840805562, "grad_norm": 1.2860009808013104, "learning_rate": 1.2984926866524173e-05, "loss": 0.8648803234100342, "step": 5424 }, { "epoch": 0.8670982178534324, "grad_norm": 1.2459175612201283, "learning_rate": 1.2982404577364491e-05, "loss": 0.5561904907226562, "step": 5425 }, { "epoch": 0.8672580516263086, "grad_norm": 1.1504766603287886, "learning_rate": 1.2979882079924264e-05, "loss": 0.5504764318466187, "step": 5426 }, { "epoch": 0.8674178853991849, "grad_norm": 1.2539359691686058, "learning_rate": 1.2977359374379652e-05, "loss": 0.63856440782547, "step": 5427 }, { "epoch": 0.8675777191720611, "grad_norm": 1.2976774728560276, "learning_rate": 1.2974836460906834e-05, "loss": 0.5635458827018738, "step": 5428 }, { "epoch": 0.8677375529449373, "grad_norm": 1.400408373823321, "learning_rate": 1.2972313339682003e-05, "loss": 0.7133876085281372, "step": 5429 }, { "epoch": 0.8678973867178135, "grad_norm": 1.3782658434744184, "learning_rate": 1.2969790010881358e-05, "loss": 0.5333417654037476, "step": 5430 }, { "epoch": 0.8680572204906897, "grad_norm": 1.292429445656729, "learning_rate": 1.2967266474681127e-05, "loss": 0.6435680985450745, "step": 5431 }, { "epoch": 0.8682170542635659, "grad_norm": 1.2582810766896524, "learning_rate": 1.2964742731257541e-05, "loss": 0.601866602897644, "step": 5432 }, { "epoch": 0.8683768880364421, "grad_norm": 1.38759113849029, "learning_rate": 1.2962218780786846e-05, "loss": 0.6207876205444336, "step": 5433 }, { "epoch": 0.8685367218093183, "grad_norm": 1.3349842894684063, "learning_rate": 1.2959694623445313e-05, "loss": 0.6392444372177124, "step": 5434 }, { "epoch": 0.8686965555821945, "grad_norm": 1.2734290497701093, "learning_rate": 1.2957170259409215e-05, "loss": 0.7064521312713623, "step": 5435 }, { "epoch": 0.8688563893550707, "grad_norm": 1.3850997706302792, "learning_rate": 1.2954645688854848e-05, "loss": 0.5928981304168701, "step": 5436 }, { "epoch": 0.8690162231279469, "grad_norm": 1.2666963961896498, "learning_rate": 1.2952120911958516e-05, "loss": 0.5293391942977905, "step": 5437 }, { "epoch": 0.8691760569008231, "grad_norm": 1.5083626781768227, "learning_rate": 1.2949595928896544e-05, "loss": 0.683403730392456, "step": 5438 }, { "epoch": 0.8693358906736993, "grad_norm": 1.3516587003189466, "learning_rate": 1.2947070739845264e-05, "loss": 0.6419414281845093, "step": 5439 }, { "epoch": 0.8694957244465755, "grad_norm": 1.2919384145239183, "learning_rate": 1.2944545344981028e-05, "loss": 0.6239831447601318, "step": 5440 }, { "epoch": 0.8696555582194517, "grad_norm": 1.318382589446074, "learning_rate": 1.29420197444802e-05, "loss": 0.5964939594268799, "step": 5441 }, { "epoch": 0.8698153919923279, "grad_norm": 1.4606162952085848, "learning_rate": 1.2939493938519156e-05, "loss": 0.5942639112472534, "step": 5442 }, { "epoch": 0.8699752257652041, "grad_norm": 1.1766669041275446, "learning_rate": 1.2936967927274297e-05, "loss": 0.541946530342102, "step": 5443 }, { "epoch": 0.8701350595380803, "grad_norm": 1.24841961334068, "learning_rate": 1.2934441710922028e-05, "loss": 0.610543966293335, "step": 5444 }, { "epoch": 0.8702948933109566, "grad_norm": 1.4173899721554706, "learning_rate": 1.2931915289638766e-05, "loss": 0.6125832796096802, "step": 5445 }, { "epoch": 0.8704547270838329, "grad_norm": 1.4737687601274012, "learning_rate": 1.2929388663600953e-05, "loss": 0.6954208612442017, "step": 5446 }, { "epoch": 0.8706145608567091, "grad_norm": 1.3349316947251972, "learning_rate": 1.2926861832985036e-05, "loss": 0.6482112407684326, "step": 5447 }, { "epoch": 0.8707743946295853, "grad_norm": 1.300839736220841, "learning_rate": 1.2924334797967482e-05, "loss": 0.62181556224823, "step": 5448 }, { "epoch": 0.8709342284024615, "grad_norm": 1.168204257083016, "learning_rate": 1.2921807558724768e-05, "loss": 0.5384478569030762, "step": 5449 }, { "epoch": 0.8710940621753377, "grad_norm": 1.3130941853863423, "learning_rate": 1.291928011543339e-05, "loss": 0.6403297185897827, "step": 5450 }, { "epoch": 0.8712538959482139, "grad_norm": 1.388838730101994, "learning_rate": 1.2916752468269856e-05, "loss": 0.6323016881942749, "step": 5451 }, { "epoch": 0.8714137297210901, "grad_norm": 1.3364318537098623, "learning_rate": 1.291422461741068e-05, "loss": 0.72786545753479, "step": 5452 }, { "epoch": 0.8715735634939663, "grad_norm": 1.308742289016431, "learning_rate": 1.291169656303241e-05, "loss": 0.4821236729621887, "step": 5453 }, { "epoch": 0.8717333972668425, "grad_norm": 1.3564179845257383, "learning_rate": 1.290916830531159e-05, "loss": 0.6198195219039917, "step": 5454 }, { "epoch": 0.8718932310397187, "grad_norm": 1.2688156439534928, "learning_rate": 1.2906639844424785e-05, "loss": 0.6002181768417358, "step": 5455 }, { "epoch": 0.8720530648125949, "grad_norm": 1.428492369812389, "learning_rate": 1.2904111180548573e-05, "loss": 0.6680974960327148, "step": 5456 }, { "epoch": 0.8722128985854711, "grad_norm": 1.441072118723525, "learning_rate": 1.2901582313859549e-05, "loss": 0.5638870000839233, "step": 5457 }, { "epoch": 0.8723727323583473, "grad_norm": 1.6114720326345744, "learning_rate": 1.2899053244534319e-05, "loss": 0.9018843173980713, "step": 5458 }, { "epoch": 0.8725325661312235, "grad_norm": 1.0991957819778682, "learning_rate": 1.2896523972749503e-05, "loss": 0.5585343241691589, "step": 5459 }, { "epoch": 0.8726923999040997, "grad_norm": 1.2312875909347247, "learning_rate": 1.289399449868174e-05, "loss": 0.5699743628501892, "step": 5460 }, { "epoch": 0.872852233676976, "grad_norm": 1.2410634545818706, "learning_rate": 1.2891464822507676e-05, "loss": 0.5854558944702148, "step": 5461 }, { "epoch": 0.8730120674498522, "grad_norm": 1.5211078169875243, "learning_rate": 1.2888934944403973e-05, "loss": 0.6825758218765259, "step": 5462 }, { "epoch": 0.8731719012227284, "grad_norm": 1.3336129055882024, "learning_rate": 1.2886404864547315e-05, "loss": 0.5846972465515137, "step": 5463 }, { "epoch": 0.8733317349956046, "grad_norm": 1.2441196839395359, "learning_rate": 1.2883874583114389e-05, "loss": 0.616561233997345, "step": 5464 }, { "epoch": 0.8734915687684808, "grad_norm": 1.2525333998555337, "learning_rate": 1.2881344100281901e-05, "loss": 0.6144976019859314, "step": 5465 }, { "epoch": 0.873651402541357, "grad_norm": 1.2748302931972566, "learning_rate": 1.2878813416226574e-05, "loss": 0.5382413864135742, "step": 5466 }, { "epoch": 0.8738112363142332, "grad_norm": 1.1968117522971808, "learning_rate": 1.287628253112514e-05, "loss": 0.585057258605957, "step": 5467 }, { "epoch": 0.8739710700871094, "grad_norm": 1.4435821620208222, "learning_rate": 1.2873751445154345e-05, "loss": 0.7511310577392578, "step": 5468 }, { "epoch": 0.8741309038599856, "grad_norm": 1.135678734031215, "learning_rate": 1.2871220158490953e-05, "loss": 0.517582893371582, "step": 5469 }, { "epoch": 0.8742907376328618, "grad_norm": 1.254155204881382, "learning_rate": 1.2868688671311742e-05, "loss": 0.6252093315124512, "step": 5470 }, { "epoch": 0.874450571405738, "grad_norm": 1.3483404244597559, "learning_rate": 1.2866156983793501e-05, "loss": 0.6788018345832825, "step": 5471 }, { "epoch": 0.8746104051786142, "grad_norm": 2.6229716232772518, "learning_rate": 1.2863625096113028e-05, "loss": 0.655983567237854, "step": 5472 }, { "epoch": 0.8747702389514904, "grad_norm": 1.079347246277332, "learning_rate": 1.2861093008447152e-05, "loss": 0.5792156457901001, "step": 5473 }, { "epoch": 0.8749300727243666, "grad_norm": 1.4505462823976512, "learning_rate": 1.2858560720972694e-05, "loss": 0.4782540500164032, "step": 5474 }, { "epoch": 0.8750899064972428, "grad_norm": 1.4222751687070134, "learning_rate": 1.2856028233866512e-05, "loss": 0.6178659200668335, "step": 5475 }, { "epoch": 0.875249740270119, "grad_norm": 1.4555373362839812, "learning_rate": 1.2853495547305456e-05, "loss": 0.5156598687171936, "step": 5476 }, { "epoch": 0.8754095740429952, "grad_norm": 1.2708801102576583, "learning_rate": 1.2850962661466401e-05, "loss": 0.5832034349441528, "step": 5477 }, { "epoch": 0.8755694078158714, "grad_norm": 1.4450905846948312, "learning_rate": 1.284842957652624e-05, "loss": 0.626712441444397, "step": 5478 }, { "epoch": 0.8757292415887477, "grad_norm": 1.400027595956841, "learning_rate": 1.284589629266187e-05, "loss": 0.5975473523139954, "step": 5479 }, { "epoch": 0.8758890753616239, "grad_norm": 1.0701241067474772, "learning_rate": 1.2843362810050209e-05, "loss": 0.5585854053497314, "step": 5480 }, { "epoch": 0.8760489091345001, "grad_norm": 1.2973856726830024, "learning_rate": 1.2840829128868184e-05, "loss": 0.6451972723007202, "step": 5481 }, { "epoch": 0.8762087429073764, "grad_norm": 1.3708480283321947, "learning_rate": 1.2838295249292738e-05, "loss": 0.5613892674446106, "step": 5482 }, { "epoch": 0.8763685766802526, "grad_norm": 1.3586371847398966, "learning_rate": 1.2835761171500833e-05, "loss": 0.624251127243042, "step": 5483 }, { "epoch": 0.8765284104531288, "grad_norm": 1.2419807044391893, "learning_rate": 1.2833226895669437e-05, "loss": 0.599536657333374, "step": 5484 }, { "epoch": 0.876688244226005, "grad_norm": 1.2978335617946073, "learning_rate": 1.2830692421975535e-05, "loss": 0.6286454200744629, "step": 5485 }, { "epoch": 0.8768480779988812, "grad_norm": 1.334799577999357, "learning_rate": 1.2828157750596125e-05, "loss": 0.6694650053977966, "step": 5486 }, { "epoch": 0.8770079117717574, "grad_norm": 1.7330631823179075, "learning_rate": 1.2825622881708218e-05, "loss": 0.6343778967857361, "step": 5487 }, { "epoch": 0.8771677455446336, "grad_norm": 1.2250372943147543, "learning_rate": 1.2823087815488844e-05, "loss": 0.5278599858283997, "step": 5488 }, { "epoch": 0.8773275793175098, "grad_norm": 1.1335804300330257, "learning_rate": 1.2820552552115038e-05, "loss": 0.5663872957229614, "step": 5489 }, { "epoch": 0.877487413090386, "grad_norm": 1.3624512228824923, "learning_rate": 1.281801709176386e-05, "loss": 0.53324294090271, "step": 5490 }, { "epoch": 0.8776472468632622, "grad_norm": 1.1651625521433544, "learning_rate": 1.281548143461237e-05, "loss": 0.45442116260528564, "step": 5491 }, { "epoch": 0.8778070806361384, "grad_norm": 1.1436593465594413, "learning_rate": 1.2812945580837658e-05, "loss": 0.6130696535110474, "step": 5492 }, { "epoch": 0.8779669144090146, "grad_norm": 1.365113807365821, "learning_rate": 1.2810409530616817e-05, "loss": 0.5902388095855713, "step": 5493 }, { "epoch": 0.8781267481818908, "grad_norm": 1.4460972652118524, "learning_rate": 1.2807873284126949e-05, "loss": 0.6558208465576172, "step": 5494 }, { "epoch": 0.878286581954767, "grad_norm": 1.3752102041555332, "learning_rate": 1.2805336841545181e-05, "loss": 0.6177711486816406, "step": 5495 }, { "epoch": 0.8784464157276433, "grad_norm": 1.372421008432739, "learning_rate": 1.2802800203048653e-05, "loss": 0.5379990935325623, "step": 5496 }, { "epoch": 0.8786062495005195, "grad_norm": 1.2880913770011173, "learning_rate": 1.2800263368814507e-05, "loss": 0.7426531910896301, "step": 5497 }, { "epoch": 0.8787660832733957, "grad_norm": 1.452465837042803, "learning_rate": 1.2797726339019911e-05, "loss": 0.5748164057731628, "step": 5498 }, { "epoch": 0.8789259170462719, "grad_norm": 1.3931855232593457, "learning_rate": 1.2795189113842042e-05, "loss": 0.5959827899932861, "step": 5499 }, { "epoch": 0.8790857508191481, "grad_norm": 1.341274293318591, "learning_rate": 1.2792651693458091e-05, "loss": 0.6771944165229797, "step": 5500 }, { "epoch": 0.8792455845920243, "grad_norm": 1.4594691677190008, "learning_rate": 1.279011407804526e-05, "loss": 0.5914610624313354, "step": 5501 }, { "epoch": 0.8794054183649005, "grad_norm": 1.447941871558332, "learning_rate": 1.2787576267780768e-05, "loss": 0.6165250539779663, "step": 5502 }, { "epoch": 0.8795652521377767, "grad_norm": 1.4860769776129699, "learning_rate": 1.2785038262841851e-05, "loss": 0.5391095876693726, "step": 5503 }, { "epoch": 0.8797250859106529, "grad_norm": 1.3362758516447726, "learning_rate": 1.2782500063405747e-05, "loss": 0.5625762939453125, "step": 5504 }, { "epoch": 0.8798849196835291, "grad_norm": 1.4534383128734023, "learning_rate": 1.2779961669649723e-05, "loss": 0.9073194265365601, "step": 5505 }, { "epoch": 0.8800447534564053, "grad_norm": 1.2927981327048264, "learning_rate": 1.2777423081751043e-05, "loss": 0.5551429390907288, "step": 5506 }, { "epoch": 0.8802045872292815, "grad_norm": 1.4546770047808797, "learning_rate": 1.2774884299887e-05, "loss": 0.7679097652435303, "step": 5507 }, { "epoch": 0.8803644210021577, "grad_norm": 1.473084345455981, "learning_rate": 1.277234532423489e-05, "loss": 0.5452132225036621, "step": 5508 }, { "epoch": 0.8805242547750339, "grad_norm": 1.322980030138996, "learning_rate": 1.2769806154972024e-05, "loss": 0.5290013551712036, "step": 5509 }, { "epoch": 0.8806840885479101, "grad_norm": 1.1397540658538199, "learning_rate": 1.2767266792275734e-05, "loss": 0.5953032374382019, "step": 5510 }, { "epoch": 0.8808439223207863, "grad_norm": 1.2925361607214145, "learning_rate": 1.2764727236323355e-05, "loss": 0.6293697357177734, "step": 5511 }, { "epoch": 0.8810037560936625, "grad_norm": 1.4320843800387173, "learning_rate": 1.2762187487292241e-05, "loss": 0.5034792423248291, "step": 5512 }, { "epoch": 0.8811635898665388, "grad_norm": 1.1974759881362143, "learning_rate": 1.2759647545359765e-05, "loss": 0.5350876450538635, "step": 5513 }, { "epoch": 0.881323423639415, "grad_norm": 1.3956630374744325, "learning_rate": 1.27571074107033e-05, "loss": 0.7563232183456421, "step": 5514 }, { "epoch": 0.8814832574122912, "grad_norm": 1.218090974270028, "learning_rate": 1.2754567083500247e-05, "loss": 0.6839864253997803, "step": 5515 }, { "epoch": 0.8816430911851674, "grad_norm": 2.4110988551924524, "learning_rate": 1.2752026563928006e-05, "loss": 0.6780809760093689, "step": 5516 }, { "epoch": 0.8818029249580437, "grad_norm": 1.4458198352294247, "learning_rate": 1.2749485852164003e-05, "loss": 0.7326167821884155, "step": 5517 }, { "epoch": 0.8819627587309199, "grad_norm": 1.3388609826550812, "learning_rate": 1.2746944948385673e-05, "loss": 0.6738269329071045, "step": 5518 }, { "epoch": 0.8821225925037961, "grad_norm": 1.547903880747485, "learning_rate": 1.2744403852770456e-05, "loss": 0.7001863121986389, "step": 5519 }, { "epoch": 0.8822824262766723, "grad_norm": 1.451336728806808, "learning_rate": 1.2741862565495825e-05, "loss": 0.7693822383880615, "step": 5520 }, { "epoch": 0.8824422600495485, "grad_norm": 1.1449871376085976, "learning_rate": 1.2739321086739244e-05, "loss": 0.5350653529167175, "step": 5521 }, { "epoch": 0.8826020938224247, "grad_norm": 1.1480653598213306, "learning_rate": 1.2736779416678205e-05, "loss": 0.5229732990264893, "step": 5522 }, { "epoch": 0.8827619275953009, "grad_norm": 1.281222428743678, "learning_rate": 1.273423755549021e-05, "loss": 0.6051980257034302, "step": 5523 }, { "epoch": 0.8829217613681771, "grad_norm": 1.27857005141748, "learning_rate": 1.2731695503352773e-05, "loss": 0.5911521911621094, "step": 5524 }, { "epoch": 0.8830815951410533, "grad_norm": 1.1528988940750744, "learning_rate": 1.272915326044342e-05, "loss": 0.7009958624839783, "step": 5525 }, { "epoch": 0.8832414289139295, "grad_norm": 1.3111524981337968, "learning_rate": 1.2726610826939696e-05, "loss": 0.5292192697525024, "step": 5526 }, { "epoch": 0.8834012626868057, "grad_norm": 1.2112053105664906, "learning_rate": 1.2724068203019155e-05, "loss": 0.6225825548171997, "step": 5527 }, { "epoch": 0.883561096459682, "grad_norm": 1.1714918925133897, "learning_rate": 1.2721525388859362e-05, "loss": 0.6354460120201111, "step": 5528 }, { "epoch": 0.8837209302325582, "grad_norm": 1.583517994888606, "learning_rate": 1.2718982384637897e-05, "loss": 0.7310903072357178, "step": 5529 }, { "epoch": 0.8838807640054344, "grad_norm": 1.0688523496235265, "learning_rate": 1.271643919053236e-05, "loss": 0.5905700922012329, "step": 5530 }, { "epoch": 0.8840405977783106, "grad_norm": 1.3929722066495083, "learning_rate": 1.271389580672035e-05, "loss": 0.642207682132721, "step": 5531 }, { "epoch": 0.8842004315511868, "grad_norm": 1.575104416045813, "learning_rate": 1.2711352233379497e-05, "loss": 0.6419017314910889, "step": 5532 }, { "epoch": 0.884360265324063, "grad_norm": 1.2527261175231774, "learning_rate": 1.2708808470687433e-05, "loss": 0.6250254511833191, "step": 5533 }, { "epoch": 0.8845200990969392, "grad_norm": 1.4681600625274966, "learning_rate": 1.2706264518821797e-05, "loss": 0.7086988687515259, "step": 5534 }, { "epoch": 0.8846799328698154, "grad_norm": 1.23059957258588, "learning_rate": 1.2703720377960265e-05, "loss": 0.5447704195976257, "step": 5535 }, { "epoch": 0.8848397666426916, "grad_norm": 1.5778274053274393, "learning_rate": 1.2701176048280497e-05, "loss": 0.7511508464813232, "step": 5536 }, { "epoch": 0.8849996004155678, "grad_norm": 1.3092156449620496, "learning_rate": 1.2698631529960186e-05, "loss": 0.4974208474159241, "step": 5537 }, { "epoch": 0.885159434188444, "grad_norm": 1.4098660730392991, "learning_rate": 1.2696086823177032e-05, "loss": 0.5651538372039795, "step": 5538 }, { "epoch": 0.8853192679613202, "grad_norm": 1.4723056283610507, "learning_rate": 1.2693541928108747e-05, "loss": 0.5443428158760071, "step": 5539 }, { "epoch": 0.8854791017341964, "grad_norm": 1.388212645238037, "learning_rate": 1.269099684493306e-05, "loss": 0.5740501880645752, "step": 5540 }, { "epoch": 0.8856389355070726, "grad_norm": 1.2873749657783011, "learning_rate": 1.2688451573827703e-05, "loss": 0.6159616112709045, "step": 5541 }, { "epoch": 0.8857987692799488, "grad_norm": 1.30037621403214, "learning_rate": 1.2685906114970436e-05, "loss": 0.6553865075111389, "step": 5542 }, { "epoch": 0.885958603052825, "grad_norm": 1.7250225834383208, "learning_rate": 1.2683360468539026e-05, "loss": 0.7273486852645874, "step": 5543 }, { "epoch": 0.8861184368257012, "grad_norm": 1.4220894057694686, "learning_rate": 1.2680814634711244e-05, "loss": 0.6462528109550476, "step": 5544 }, { "epoch": 0.8862782705985774, "grad_norm": 1.3878635057604298, "learning_rate": 1.2678268613664892e-05, "loss": 0.6074379086494446, "step": 5545 }, { "epoch": 0.8864381043714537, "grad_norm": 1.3487348152396899, "learning_rate": 1.2675722405577766e-05, "loss": 0.5460327863693237, "step": 5546 }, { "epoch": 0.8865979381443299, "grad_norm": 1.2867109005608035, "learning_rate": 1.267317601062769e-05, "loss": 0.4924429655075073, "step": 5547 }, { "epoch": 0.8867577719172061, "grad_norm": 1.3597760375695942, "learning_rate": 1.2670629428992494e-05, "loss": 0.6402369737625122, "step": 5548 }, { "epoch": 0.8869176056900823, "grad_norm": 1.3079612366912239, "learning_rate": 1.266808266085002e-05, "loss": 0.7166587114334106, "step": 5549 }, { "epoch": 0.8870774394629585, "grad_norm": 1.3840249047907232, "learning_rate": 1.2665535706378127e-05, "loss": 0.707802414894104, "step": 5550 }, { "epoch": 0.8872372732358347, "grad_norm": 1.4320520391184355, "learning_rate": 1.2662988565754686e-05, "loss": 0.5823968648910522, "step": 5551 }, { "epoch": 0.887397107008711, "grad_norm": 1.5709860577577406, "learning_rate": 1.2660441239157576e-05, "loss": 0.6865856647491455, "step": 5552 }, { "epoch": 0.8875569407815872, "grad_norm": 1.2401255851430042, "learning_rate": 1.2657893726764696e-05, "loss": 0.5389535427093506, "step": 5553 }, { "epoch": 0.8877167745544634, "grad_norm": 1.8163806396676396, "learning_rate": 1.2655346028753961e-05, "loss": 0.5805940628051758, "step": 5554 }, { "epoch": 0.8878766083273396, "grad_norm": 1.5696876036135965, "learning_rate": 1.2652798145303286e-05, "loss": 0.6632301211357117, "step": 5555 }, { "epoch": 0.8880364421002158, "grad_norm": 1.496389807448529, "learning_rate": 1.2650250076590606e-05, "loss": 0.5687971711158752, "step": 5556 }, { "epoch": 0.888196275873092, "grad_norm": 1.3163106667346556, "learning_rate": 1.2647701822793872e-05, "loss": 0.6209065914154053, "step": 5557 }, { "epoch": 0.8883561096459682, "grad_norm": 1.0919518500515584, "learning_rate": 1.2645153384091044e-05, "loss": 0.5452450513839722, "step": 5558 }, { "epoch": 0.8885159434188444, "grad_norm": 1.329072738474116, "learning_rate": 1.2642604760660097e-05, "loss": 0.7052738666534424, "step": 5559 }, { "epoch": 0.8886757771917206, "grad_norm": 1.4899791918880974, "learning_rate": 1.2640055952679018e-05, "loss": 0.558429479598999, "step": 5560 }, { "epoch": 0.8888356109645968, "grad_norm": 1.0546515051696677, "learning_rate": 1.26375069603258e-05, "loss": 0.5511955618858337, "step": 5561 }, { "epoch": 0.888995444737473, "grad_norm": 1.697572906167799, "learning_rate": 1.2634957783778466e-05, "loss": 0.5529689192771912, "step": 5562 }, { "epoch": 0.8891552785103493, "grad_norm": 1.4152768064047752, "learning_rate": 1.2632408423215033e-05, "loss": 0.6610737442970276, "step": 5563 }, { "epoch": 0.8893151122832255, "grad_norm": 1.0678324920012803, "learning_rate": 1.2629858878813546e-05, "loss": 0.5126513242721558, "step": 5564 }, { "epoch": 0.8894749460561017, "grad_norm": 1.1759113051822636, "learning_rate": 1.2627309150752052e-05, "loss": 0.4743994474411011, "step": 5565 }, { "epoch": 0.8896347798289779, "grad_norm": 1.42168654878945, "learning_rate": 1.2624759239208616e-05, "loss": 0.6242191791534424, "step": 5566 }, { "epoch": 0.8897946136018541, "grad_norm": 1.2527496251729426, "learning_rate": 1.2622209144361313e-05, "loss": 0.5178669691085815, "step": 5567 }, { "epoch": 0.8899544473747303, "grad_norm": 1.3468318683685234, "learning_rate": 1.2619658866388233e-05, "loss": 0.7309325933456421, "step": 5568 }, { "epoch": 0.8901142811476065, "grad_norm": 1.4799481546736981, "learning_rate": 1.2617108405467484e-05, "loss": 0.6485530138015747, "step": 5569 }, { "epoch": 0.8902741149204827, "grad_norm": 2.2674928752459276, "learning_rate": 1.2614557761777174e-05, "loss": 0.6407302618026733, "step": 5570 }, { "epoch": 0.8904339486933589, "grad_norm": 1.362889012695916, "learning_rate": 1.2612006935495434e-05, "loss": 0.5450193881988525, "step": 5571 }, { "epoch": 0.8905937824662351, "grad_norm": 1.347037967522294, "learning_rate": 1.2609455926800402e-05, "loss": 0.624599277973175, "step": 5572 }, { "epoch": 0.8907536162391113, "grad_norm": 1.3198762865157354, "learning_rate": 1.2606904735870236e-05, "loss": 0.6257376670837402, "step": 5573 }, { "epoch": 0.8909134500119875, "grad_norm": 1.4422140315595826, "learning_rate": 1.2604353362883101e-05, "loss": 0.7107729911804199, "step": 5574 }, { "epoch": 0.8910732837848637, "grad_norm": 1.3557979674427862, "learning_rate": 1.2601801808017173e-05, "loss": 0.6248384714126587, "step": 5575 }, { "epoch": 0.8912331175577399, "grad_norm": 1.3224578961171947, "learning_rate": 1.2599250071450646e-05, "loss": 0.7080901861190796, "step": 5576 }, { "epoch": 0.8913929513306161, "grad_norm": 1.431510290339472, "learning_rate": 1.2596698153361722e-05, "loss": 0.6904700994491577, "step": 5577 }, { "epoch": 0.8915527851034923, "grad_norm": 1.342836441522318, "learning_rate": 1.259414605392862e-05, "loss": 0.6449389457702637, "step": 5578 }, { "epoch": 0.8917126188763685, "grad_norm": 1.126218478203888, "learning_rate": 1.2591593773329569e-05, "loss": 0.5822738409042358, "step": 5579 }, { "epoch": 0.8918724526492448, "grad_norm": 1.2629466676267809, "learning_rate": 1.2589041311742809e-05, "loss": 0.7423664927482605, "step": 5580 }, { "epoch": 0.892032286422121, "grad_norm": 1.223853710594869, "learning_rate": 1.25864886693466e-05, "loss": 0.6153348684310913, "step": 5581 }, { "epoch": 0.8921921201949972, "grad_norm": 1.254571061838046, "learning_rate": 1.2583935846319206e-05, "loss": 0.6800212860107422, "step": 5582 }, { "epoch": 0.8923519539678734, "grad_norm": 1.2480471742429973, "learning_rate": 1.2581382842838903e-05, "loss": 0.6339824795722961, "step": 5583 }, { "epoch": 0.8925117877407496, "grad_norm": 1.3987439536174162, "learning_rate": 1.2578829659083992e-05, "loss": 0.8237475752830505, "step": 5584 }, { "epoch": 0.8926716215136258, "grad_norm": 1.5439203505969867, "learning_rate": 1.2576276295232773e-05, "loss": 0.7311341166496277, "step": 5585 }, { "epoch": 0.892831455286502, "grad_norm": 1.336252769303824, "learning_rate": 1.2573722751463566e-05, "loss": 0.6077253222465515, "step": 5586 }, { "epoch": 0.8929912890593783, "grad_norm": 1.5781149989878136, "learning_rate": 1.2571169027954702e-05, "loss": 0.6912637948989868, "step": 5587 }, { "epoch": 0.8931511228322545, "grad_norm": 1.224556953239743, "learning_rate": 1.256861512488452e-05, "loss": 0.6290299892425537, "step": 5588 }, { "epoch": 0.8933109566051307, "grad_norm": 1.5744292515089566, "learning_rate": 1.2566061042431382e-05, "loss": 0.6634250283241272, "step": 5589 }, { "epoch": 0.8934707903780069, "grad_norm": 1.2588027027258437, "learning_rate": 1.2563506780773648e-05, "loss": 0.44771987199783325, "step": 5590 }, { "epoch": 0.8936306241508831, "grad_norm": 1.1760328167893577, "learning_rate": 1.2560952340089708e-05, "loss": 0.5435657501220703, "step": 5591 }, { "epoch": 0.8937904579237593, "grad_norm": 1.392793336577513, "learning_rate": 1.2558397720557945e-05, "loss": 0.6466647386550903, "step": 5592 }, { "epoch": 0.8939502916966355, "grad_norm": 1.2406280636772387, "learning_rate": 1.255584292235677e-05, "loss": 0.6693401336669922, "step": 5593 }, { "epoch": 0.8941101254695117, "grad_norm": 1.4323845022023096, "learning_rate": 1.2553287945664601e-05, "loss": 0.6785883903503418, "step": 5594 }, { "epoch": 0.894269959242388, "grad_norm": 1.3110755460974965, "learning_rate": 1.2550732790659868e-05, "loss": 0.603744626045227, "step": 5595 }, { "epoch": 0.8944297930152642, "grad_norm": 1.1936784248772554, "learning_rate": 1.2548177457521016e-05, "loss": 0.6464182138442993, "step": 5596 }, { "epoch": 0.8945896267881404, "grad_norm": 1.6369986585677914, "learning_rate": 1.2545621946426497e-05, "loss": 0.83008873462677, "step": 5597 }, { "epoch": 0.8947494605610166, "grad_norm": 1.2187061810254107, "learning_rate": 1.254306625755478e-05, "loss": 0.6309959888458252, "step": 5598 }, { "epoch": 0.8949092943338928, "grad_norm": 1.3110347753906264, "learning_rate": 1.2540510391084345e-05, "loss": 0.6842869520187378, "step": 5599 }, { "epoch": 0.895069128106769, "grad_norm": 1.3941094175204272, "learning_rate": 1.2537954347193684e-05, "loss": 0.6885541677474976, "step": 5600 }, { "epoch": 0.8952289618796452, "grad_norm": 1.3383667072571055, "learning_rate": 1.2535398126061304e-05, "loss": 0.7115696668624878, "step": 5601 }, { "epoch": 0.8953887956525214, "grad_norm": 1.1289284215907887, "learning_rate": 1.2532841727865722e-05, "loss": 0.5720961689949036, "step": 5602 }, { "epoch": 0.8955486294253976, "grad_norm": 1.2137996372160065, "learning_rate": 1.2530285152785467e-05, "loss": 0.5731387138366699, "step": 5603 }, { "epoch": 0.8957084631982738, "grad_norm": 1.4466108471330077, "learning_rate": 1.2527728400999082e-05, "loss": 0.7186115980148315, "step": 5604 }, { "epoch": 0.89586829697115, "grad_norm": 1.1440710015151425, "learning_rate": 1.2525171472685117e-05, "loss": 0.6536203622817993, "step": 5605 }, { "epoch": 0.8960281307440262, "grad_norm": 1.109861931213502, "learning_rate": 1.2522614368022147e-05, "loss": 0.5593175888061523, "step": 5606 }, { "epoch": 0.8961879645169024, "grad_norm": 1.489437289477893, "learning_rate": 1.2520057087188748e-05, "loss": 0.7035115957260132, "step": 5607 }, { "epoch": 0.8963477982897786, "grad_norm": 1.2565979163295125, "learning_rate": 1.2517499630363506e-05, "loss": 0.6505922079086304, "step": 5608 }, { "epoch": 0.8965076320626548, "grad_norm": 1.3284337079363981, "learning_rate": 1.2514941997725031e-05, "loss": 0.7460681796073914, "step": 5609 }, { "epoch": 0.896667465835531, "grad_norm": 1.3059592696943683, "learning_rate": 1.2512384189451936e-05, "loss": 0.6341826319694519, "step": 5610 }, { "epoch": 0.8968272996084072, "grad_norm": 1.4928248402640247, "learning_rate": 1.2509826205722852e-05, "loss": 0.6365426778793335, "step": 5611 }, { "epoch": 0.8969871333812834, "grad_norm": 1.3013974167636158, "learning_rate": 1.2507268046716414e-05, "loss": 0.582517147064209, "step": 5612 }, { "epoch": 0.8971469671541596, "grad_norm": 1.3731574504714448, "learning_rate": 1.250470971261128e-05, "loss": 0.6723521947860718, "step": 5613 }, { "epoch": 0.8973068009270359, "grad_norm": 1.5878386878470394, "learning_rate": 1.2502151203586115e-05, "loss": 0.6951518654823303, "step": 5614 }, { "epoch": 0.8974666346999121, "grad_norm": 1.34266492570696, "learning_rate": 1.2499592519819593e-05, "loss": 0.7673007249832153, "step": 5615 }, { "epoch": 0.8976264684727883, "grad_norm": 1.2919193559302538, "learning_rate": 1.2497033661490405e-05, "loss": 0.678473949432373, "step": 5616 }, { "epoch": 0.8977863022456645, "grad_norm": 1.1259339426673411, "learning_rate": 1.249447462877725e-05, "loss": 0.5673378705978394, "step": 5617 }, { "epoch": 0.8979461360185407, "grad_norm": 1.3761342102518843, "learning_rate": 1.249191542185885e-05, "loss": 0.6092519164085388, "step": 5618 }, { "epoch": 0.8981059697914169, "grad_norm": 1.3514113362797262, "learning_rate": 1.2489356040913923e-05, "loss": 0.6817176938056946, "step": 5619 }, { "epoch": 0.8982658035642931, "grad_norm": 1.2284763349521088, "learning_rate": 1.2486796486121207e-05, "loss": 0.5880755186080933, "step": 5620 }, { "epoch": 0.8984256373371693, "grad_norm": 1.28910989847405, "learning_rate": 1.2484236757659455e-05, "loss": 0.7243233919143677, "step": 5621 }, { "epoch": 0.8985854711100455, "grad_norm": 1.2163406894415847, "learning_rate": 1.2481676855707427e-05, "loss": 0.7036280632019043, "step": 5622 }, { "epoch": 0.8987453048829218, "grad_norm": 1.1549064888669787, "learning_rate": 1.24791167804439e-05, "loss": 0.6243782639503479, "step": 5623 }, { "epoch": 0.898905138655798, "grad_norm": 1.4116841169233256, "learning_rate": 1.2476556532047664e-05, "loss": 0.6735097169876099, "step": 5624 }, { "epoch": 0.8990649724286742, "grad_norm": 1.1797579573484138, "learning_rate": 1.2473996110697508e-05, "loss": 0.4629819691181183, "step": 5625 }, { "epoch": 0.8992248062015504, "grad_norm": 1.1453540432505063, "learning_rate": 1.2471435516572251e-05, "loss": 0.5803484320640564, "step": 5626 }, { "epoch": 0.8993846399744266, "grad_norm": 1.483560057048863, "learning_rate": 1.2468874749850715e-05, "loss": 0.6261792182922363, "step": 5627 }, { "epoch": 0.8995444737473028, "grad_norm": 1.253687849887175, "learning_rate": 1.2466313810711729e-05, "loss": 0.552505612373352, "step": 5628 }, { "epoch": 0.899704307520179, "grad_norm": 1.314229347765401, "learning_rate": 1.2463752699334148e-05, "loss": 0.5407536029815674, "step": 5629 }, { "epoch": 0.8998641412930553, "grad_norm": 1.2456285401045575, "learning_rate": 1.2461191415896823e-05, "loss": 0.6561392545700073, "step": 5630 }, { "epoch": 0.9000239750659315, "grad_norm": 1.4814255106767038, "learning_rate": 1.2458629960578633e-05, "loss": 0.6983702182769775, "step": 5631 }, { "epoch": 0.9001838088388077, "grad_norm": 1.2303935939941057, "learning_rate": 1.245606833355845e-05, "loss": 0.7233296632766724, "step": 5632 }, { "epoch": 0.9003436426116839, "grad_norm": 1.246550780372517, "learning_rate": 1.2453506535015178e-05, "loss": 0.5277285575866699, "step": 5633 }, { "epoch": 0.9005034763845601, "grad_norm": 1.135668514321019, "learning_rate": 1.2450944565127723e-05, "loss": 0.5682007074356079, "step": 5634 }, { "epoch": 0.9006633101574363, "grad_norm": 1.4445029755686036, "learning_rate": 1.2448382424075e-05, "loss": 0.7176386117935181, "step": 5635 }, { "epoch": 0.9008231439303125, "grad_norm": 1.2730643234988086, "learning_rate": 1.2445820112035945e-05, "loss": 0.7526347637176514, "step": 5636 }, { "epoch": 0.9009829777031887, "grad_norm": 1.5155828038091537, "learning_rate": 1.2443257629189496e-05, "loss": 0.7106977701187134, "step": 5637 }, { "epoch": 0.9011428114760649, "grad_norm": 1.4473364876055663, "learning_rate": 1.244069497571461e-05, "loss": 0.7309247851371765, "step": 5638 }, { "epoch": 0.9013026452489411, "grad_norm": 1.3114474139689567, "learning_rate": 1.2438132151790254e-05, "loss": 0.6906461715698242, "step": 5639 }, { "epoch": 0.9014624790218173, "grad_norm": 1.1939915162021175, "learning_rate": 1.2435569157595405e-05, "loss": 0.5282504558563232, "step": 5640 }, { "epoch": 0.9016223127946935, "grad_norm": 1.2216600782121176, "learning_rate": 1.2433005993309055e-05, "loss": 0.6578055620193481, "step": 5641 }, { "epoch": 0.9017821465675697, "grad_norm": 1.2665929687343833, "learning_rate": 1.2430442659110203e-05, "loss": 0.5980396866798401, "step": 5642 }, { "epoch": 0.9019419803404459, "grad_norm": 1.2727721923735864, "learning_rate": 1.2427879155177867e-05, "loss": 0.6437100172042847, "step": 5643 }, { "epoch": 0.9021018141133221, "grad_norm": 1.445718676495393, "learning_rate": 1.2425315481691071e-05, "loss": 0.582126259803772, "step": 5644 }, { "epoch": 0.9022616478861983, "grad_norm": 1.2932660788272237, "learning_rate": 1.2422751638828854e-05, "loss": 0.6386773586273193, "step": 5645 }, { "epoch": 0.9024214816590745, "grad_norm": 1.2644468854045086, "learning_rate": 1.2420187626770269e-05, "loss": 0.6228309869766235, "step": 5646 }, { "epoch": 0.9025813154319507, "grad_norm": 1.4916755109882278, "learning_rate": 1.2417623445694367e-05, "loss": 0.5483219623565674, "step": 5647 }, { "epoch": 0.902741149204827, "grad_norm": 1.4215260083034214, "learning_rate": 1.2415059095780233e-05, "loss": 0.6278390884399414, "step": 5648 }, { "epoch": 0.9029009829777032, "grad_norm": 1.2264374725947667, "learning_rate": 1.2412494577206943e-05, "loss": 0.6430962085723877, "step": 5649 }, { "epoch": 0.9030608167505794, "grad_norm": 1.3577723217730182, "learning_rate": 1.24099298901536e-05, "loss": 0.6741734743118286, "step": 5650 }, { "epoch": 0.9032206505234556, "grad_norm": 1.4667149614303232, "learning_rate": 1.2407365034799312e-05, "loss": 0.6809106469154358, "step": 5651 }, { "epoch": 0.9033804842963318, "grad_norm": 1.3794832308146312, "learning_rate": 1.2404800011323198e-05, "loss": 0.6649830937385559, "step": 5652 }, { "epoch": 0.903540318069208, "grad_norm": 1.2305280223061106, "learning_rate": 1.2402234819904387e-05, "loss": 0.5738644599914551, "step": 5653 }, { "epoch": 0.9037001518420842, "grad_norm": 1.4180644885550406, "learning_rate": 1.2399669460722028e-05, "loss": 0.5817791819572449, "step": 5654 }, { "epoch": 0.9038599856149604, "grad_norm": 1.6156308615684498, "learning_rate": 1.2397103933955272e-05, "loss": 0.5363345146179199, "step": 5655 }, { "epoch": 0.9040198193878366, "grad_norm": 1.1564381916737216, "learning_rate": 1.2394538239783291e-05, "loss": 0.5525607466697693, "step": 5656 }, { "epoch": 0.9041796531607128, "grad_norm": 1.2011527021666721, "learning_rate": 1.2391972378385262e-05, "loss": 0.5001590251922607, "step": 5657 }, { "epoch": 0.9043394869335891, "grad_norm": 1.5600850492318377, "learning_rate": 1.2389406349940376e-05, "loss": 0.5903452634811401, "step": 5658 }, { "epoch": 0.9044993207064653, "grad_norm": 1.3203376395485307, "learning_rate": 1.2386840154627832e-05, "loss": 0.5288195013999939, "step": 5659 }, { "epoch": 0.9046591544793415, "grad_norm": 1.170966339346435, "learning_rate": 1.238427379262685e-05, "loss": 0.5192838907241821, "step": 5660 }, { "epoch": 0.9048189882522177, "grad_norm": 1.1144246877920827, "learning_rate": 1.2381707264116651e-05, "loss": 0.5818649530410767, "step": 5661 }, { "epoch": 0.9049788220250939, "grad_norm": 1.3551846667600151, "learning_rate": 1.237914056927647e-05, "loss": 0.6045767664909363, "step": 5662 }, { "epoch": 0.9051386557979701, "grad_norm": 1.2536233218013597, "learning_rate": 1.2376573708285565e-05, "loss": 0.6081132292747498, "step": 5663 }, { "epoch": 0.9052984895708464, "grad_norm": 1.2422140979257383, "learning_rate": 1.2374006681323188e-05, "loss": 0.5517585277557373, "step": 5664 }, { "epoch": 0.9054583233437226, "grad_norm": 1.631088087013147, "learning_rate": 1.2371439488568612e-05, "loss": 0.677880048751831, "step": 5665 }, { "epoch": 0.9056181571165988, "grad_norm": 1.1970142008588327, "learning_rate": 1.2368872130201129e-05, "loss": 0.68315589427948, "step": 5666 }, { "epoch": 0.905777990889475, "grad_norm": 1.6049316479217115, "learning_rate": 1.2366304606400021e-05, "loss": 0.5874018669128418, "step": 5667 }, { "epoch": 0.9059378246623512, "grad_norm": 1.2966387905219552, "learning_rate": 1.2363736917344605e-05, "loss": 0.5038365721702576, "step": 5668 }, { "epoch": 0.9060976584352274, "grad_norm": 1.3288201236865238, "learning_rate": 1.2361169063214194e-05, "loss": 0.5172576904296875, "step": 5669 }, { "epoch": 0.9062574922081036, "grad_norm": 1.4453443366619556, "learning_rate": 1.2358601044188121e-05, "loss": 0.665566623210907, "step": 5670 }, { "epoch": 0.9064173259809798, "grad_norm": 1.3621124111753997, "learning_rate": 1.2356032860445725e-05, "loss": 0.511590838432312, "step": 5671 }, { "epoch": 0.906577159753856, "grad_norm": 1.4166853103391601, "learning_rate": 1.235346451216636e-05, "loss": 0.5481657981872559, "step": 5672 }, { "epoch": 0.9067369935267322, "grad_norm": 1.3744934875701913, "learning_rate": 1.2350895999529388e-05, "loss": 0.6893190145492554, "step": 5673 }, { "epoch": 0.9068968272996084, "grad_norm": 1.5263652778162642, "learning_rate": 1.2348327322714188e-05, "loss": 0.6966344118118286, "step": 5674 }, { "epoch": 0.9070566610724846, "grad_norm": 1.5841528475179516, "learning_rate": 1.234575848190015e-05, "loss": 0.6614197492599487, "step": 5675 }, { "epoch": 0.9072164948453608, "grad_norm": 1.1135193203322509, "learning_rate": 1.2343189477266666e-05, "loss": 0.5565313696861267, "step": 5676 }, { "epoch": 0.907376328618237, "grad_norm": 1.2209987065550478, "learning_rate": 1.2340620308993148e-05, "loss": 0.6588678359985352, "step": 5677 }, { "epoch": 0.9075361623911132, "grad_norm": 1.4988643257184153, "learning_rate": 1.233805097725902e-05, "loss": 0.6816084384918213, "step": 5678 }, { "epoch": 0.9076959961639894, "grad_norm": 1.5076290194072037, "learning_rate": 1.2335481482243711e-05, "loss": 0.5309306979179382, "step": 5679 }, { "epoch": 0.9078558299368656, "grad_norm": 1.5463126340706554, "learning_rate": 1.2332911824126673e-05, "loss": 0.6332380771636963, "step": 5680 }, { "epoch": 0.9080156637097418, "grad_norm": 1.4756632322447325, "learning_rate": 1.2330342003087351e-05, "loss": 0.5310546159744263, "step": 5681 }, { "epoch": 0.908175497482618, "grad_norm": 1.306190362434288, "learning_rate": 1.2327772019305223e-05, "loss": 0.7003606557846069, "step": 5682 }, { "epoch": 0.9083353312554943, "grad_norm": 1.1660440679460442, "learning_rate": 1.232520187295976e-05, "loss": 0.6025727987289429, "step": 5683 }, { "epoch": 0.9084951650283705, "grad_norm": 1.4032460092845658, "learning_rate": 1.2322631564230453e-05, "loss": 0.5741279721260071, "step": 5684 }, { "epoch": 0.9086549988012467, "grad_norm": 1.3991695802213584, "learning_rate": 1.2320061093296809e-05, "loss": 0.6711398363113403, "step": 5685 }, { "epoch": 0.9088148325741229, "grad_norm": 1.2760460034219303, "learning_rate": 1.2317490460338335e-05, "loss": 0.6582142114639282, "step": 5686 }, { "epoch": 0.9089746663469991, "grad_norm": 1.29174851422309, "learning_rate": 1.2314919665534552e-05, "loss": 0.6775949001312256, "step": 5687 }, { "epoch": 0.9091345001198753, "grad_norm": 1.372953573050529, "learning_rate": 1.2312348709065006e-05, "loss": 0.6557648777961731, "step": 5688 }, { "epoch": 0.9092943338927515, "grad_norm": 1.252089241200589, "learning_rate": 1.2309777591109229e-05, "loss": 0.4625803232192993, "step": 5689 }, { "epoch": 0.9094541676656277, "grad_norm": 1.3871105452511674, "learning_rate": 1.2307206311846793e-05, "loss": 0.7295341491699219, "step": 5690 }, { "epoch": 0.9096140014385039, "grad_norm": 1.312770141444087, "learning_rate": 1.2304634871457255e-05, "loss": 0.6347035765647888, "step": 5691 }, { "epoch": 0.9097738352113801, "grad_norm": 1.350859314575166, "learning_rate": 1.2302063270120205e-05, "loss": 0.6198697090148926, "step": 5692 }, { "epoch": 0.9099336689842564, "grad_norm": 1.4219631046967245, "learning_rate": 1.2299491508015229e-05, "loss": 0.660214900970459, "step": 5693 }, { "epoch": 0.9100935027571326, "grad_norm": 1.2967608211179062, "learning_rate": 1.229691958532193e-05, "loss": 0.596172571182251, "step": 5694 }, { "epoch": 0.9102533365300088, "grad_norm": 2.0550205974823834, "learning_rate": 1.2294347502219926e-05, "loss": 0.6682045459747314, "step": 5695 }, { "epoch": 0.910413170302885, "grad_norm": 1.426915838105338, "learning_rate": 1.2291775258888835e-05, "loss": 0.5640143156051636, "step": 5696 }, { "epoch": 0.9105730040757612, "grad_norm": 1.1920998079244711, "learning_rate": 1.22892028555083e-05, "loss": 0.6163358688354492, "step": 5697 }, { "epoch": 0.9107328378486375, "grad_norm": 1.3387864271809626, "learning_rate": 1.2286630292257967e-05, "loss": 0.6819164156913757, "step": 5698 }, { "epoch": 0.9108926716215137, "grad_norm": 1.6941574269299013, "learning_rate": 1.2284057569317492e-05, "loss": 0.6853189468383789, "step": 5699 }, { "epoch": 0.9110525053943899, "grad_norm": 1.3927990181712646, "learning_rate": 1.2281484686866548e-05, "loss": 0.6251574754714966, "step": 5700 }, { "epoch": 0.9112123391672661, "grad_norm": 1.245315823903268, "learning_rate": 1.227891164508481e-05, "loss": 0.5799438953399658, "step": 5701 }, { "epoch": 0.9113721729401423, "grad_norm": 1.4718121745967772, "learning_rate": 1.2276338444151979e-05, "loss": 0.7573649883270264, "step": 5702 }, { "epoch": 0.9115320067130185, "grad_norm": 1.2570363035345984, "learning_rate": 1.227376508424775e-05, "loss": 0.6419779062271118, "step": 5703 }, { "epoch": 0.9116918404858947, "grad_norm": 1.2934085392422041, "learning_rate": 1.2271191565551843e-05, "loss": 0.5244773626327515, "step": 5704 }, { "epoch": 0.9118516742587709, "grad_norm": 1.2034454797764307, "learning_rate": 1.2268617888243983e-05, "loss": 0.5746219158172607, "step": 5705 }, { "epoch": 0.9120115080316471, "grad_norm": 1.4470022388905392, "learning_rate": 1.2266044052503904e-05, "loss": 0.5727697610855103, "step": 5706 }, { "epoch": 0.9121713418045233, "grad_norm": 1.5024623379652562, "learning_rate": 1.2263470058511355e-05, "loss": 0.6283658146858215, "step": 5707 }, { "epoch": 0.9123311755773995, "grad_norm": 1.3500148901555298, "learning_rate": 1.2260895906446096e-05, "loss": 0.6868413686752319, "step": 5708 }, { "epoch": 0.9124910093502757, "grad_norm": 1.3031925340478765, "learning_rate": 1.225832159648789e-05, "loss": 0.4974064230918884, "step": 5709 }, { "epoch": 0.9126508431231519, "grad_norm": 1.4603517716521468, "learning_rate": 1.2255747128816527e-05, "loss": 0.5888029932975769, "step": 5710 }, { "epoch": 0.9128106768960281, "grad_norm": 1.2121278382719523, "learning_rate": 1.2253172503611792e-05, "loss": 0.6044439077377319, "step": 5711 }, { "epoch": 0.9129705106689043, "grad_norm": 1.944303383841719, "learning_rate": 1.225059772105349e-05, "loss": 0.6705271601676941, "step": 5712 }, { "epoch": 0.9131303444417805, "grad_norm": 1.2710764436867723, "learning_rate": 1.2248022781321434e-05, "loss": 0.5621042251586914, "step": 5713 }, { "epoch": 0.9132901782146567, "grad_norm": 1.2636039715866045, "learning_rate": 1.2245447684595448e-05, "loss": 0.5314953327178955, "step": 5714 }, { "epoch": 0.913450011987533, "grad_norm": 1.2429394949706083, "learning_rate": 1.2242872431055373e-05, "loss": 0.5395864844322205, "step": 5715 }, { "epoch": 0.9136098457604092, "grad_norm": 1.2516187367627831, "learning_rate": 1.2240297020881047e-05, "loss": 0.5461233854293823, "step": 5716 }, { "epoch": 0.9137696795332854, "grad_norm": 1.3249136699118567, "learning_rate": 1.2237721454252334e-05, "loss": 0.6781212091445923, "step": 5717 }, { "epoch": 0.9139295133061616, "grad_norm": 1.4751815079084256, "learning_rate": 1.2235145731349101e-05, "loss": 0.6444834470748901, "step": 5718 }, { "epoch": 0.9140893470790378, "grad_norm": 1.365966901655322, "learning_rate": 1.2232569852351224e-05, "loss": 0.5974019765853882, "step": 5719 }, { "epoch": 0.914249180851914, "grad_norm": 1.3999815379651348, "learning_rate": 1.22299938174386e-05, "loss": 0.5791054964065552, "step": 5720 }, { "epoch": 0.9144090146247902, "grad_norm": 1.4341977696544894, "learning_rate": 1.222741762679112e-05, "loss": 0.5631091594696045, "step": 5721 }, { "epoch": 0.9145688483976664, "grad_norm": 1.5083148280520635, "learning_rate": 1.2224841280588706e-05, "loss": 0.5766149759292603, "step": 5722 }, { "epoch": 0.9147286821705426, "grad_norm": 1.2135227305923968, "learning_rate": 1.2222264779011275e-05, "loss": 0.5237556099891663, "step": 5723 }, { "epoch": 0.9148885159434188, "grad_norm": 1.629941459339206, "learning_rate": 1.2219688122238762e-05, "loss": 0.6769445538520813, "step": 5724 }, { "epoch": 0.915048349716295, "grad_norm": 1.5551425478668395, "learning_rate": 1.2217111310451115e-05, "loss": 0.5606169700622559, "step": 5725 }, { "epoch": 0.9152081834891712, "grad_norm": 1.2562337674059136, "learning_rate": 1.2214534343828284e-05, "loss": 0.6496095657348633, "step": 5726 }, { "epoch": 0.9153680172620474, "grad_norm": 1.3586035186559688, "learning_rate": 1.221195722255024e-05, "loss": 0.5429080128669739, "step": 5727 }, { "epoch": 0.9155278510349237, "grad_norm": 1.1933839783811468, "learning_rate": 1.2209379946796958e-05, "loss": 0.5536816120147705, "step": 5728 }, { "epoch": 0.9156876848077999, "grad_norm": 1.4522513320270538, "learning_rate": 1.2206802516748421e-05, "loss": 0.7106075286865234, "step": 5729 }, { "epoch": 0.9158475185806761, "grad_norm": 1.3478228676054311, "learning_rate": 1.2204224932584637e-05, "loss": 0.6593894958496094, "step": 5730 }, { "epoch": 0.9160073523535524, "grad_norm": 1.488231952281667, "learning_rate": 1.220164719448561e-05, "loss": 0.6618581414222717, "step": 5731 }, { "epoch": 0.9161671861264286, "grad_norm": 1.2732057660151785, "learning_rate": 1.2199069302631358e-05, "loss": 0.5862797498703003, "step": 5732 }, { "epoch": 0.9163270198993048, "grad_norm": 3.3904946015096544, "learning_rate": 1.2196491257201915e-05, "loss": 0.6277145743370056, "step": 5733 }, { "epoch": 0.916486853672181, "grad_norm": 1.414016639689044, "learning_rate": 1.219391305837732e-05, "loss": 0.6288735866546631, "step": 5734 }, { "epoch": 0.9166466874450572, "grad_norm": 1.2638913067891315, "learning_rate": 1.219133470633763e-05, "loss": 0.5921930074691772, "step": 5735 }, { "epoch": 0.9168065212179334, "grad_norm": 1.2601049023067603, "learning_rate": 1.2188756201262903e-05, "loss": 0.5169473886489868, "step": 5736 }, { "epoch": 0.9169663549908096, "grad_norm": 1.3349871495346997, "learning_rate": 1.2186177543333217e-05, "loss": 0.5586423277854919, "step": 5737 }, { "epoch": 0.9171261887636858, "grad_norm": 1.5220600021601491, "learning_rate": 1.2183598732728652e-05, "loss": 0.7672559022903442, "step": 5738 }, { "epoch": 0.917286022536562, "grad_norm": 1.2156869077241739, "learning_rate": 1.2181019769629306e-05, "loss": 0.7718764543533325, "step": 5739 }, { "epoch": 0.9174458563094382, "grad_norm": 1.4875176214575239, "learning_rate": 1.2178440654215285e-05, "loss": 0.7713667750358582, "step": 5740 }, { "epoch": 0.9176056900823144, "grad_norm": 2.1167074474471668, "learning_rate": 1.2175861386666701e-05, "loss": 0.6041840314865112, "step": 5741 }, { "epoch": 0.9177655238551906, "grad_norm": 2.107278921310092, "learning_rate": 1.2173281967163684e-05, "loss": 0.5994327068328857, "step": 5742 }, { "epoch": 0.9179253576280668, "grad_norm": 1.464792199479061, "learning_rate": 1.2170702395886371e-05, "loss": 0.577503502368927, "step": 5743 }, { "epoch": 0.918085191400943, "grad_norm": 1.3348348523638107, "learning_rate": 1.216812267301491e-05, "loss": 0.6057101488113403, "step": 5744 }, { "epoch": 0.9182450251738192, "grad_norm": 1.335785267146449, "learning_rate": 1.216554279872946e-05, "loss": 0.48884332180023193, "step": 5745 }, { "epoch": 0.9184048589466954, "grad_norm": 1.3276791121429004, "learning_rate": 1.2162962773210194e-05, "loss": 0.5532597303390503, "step": 5746 }, { "epoch": 0.9185646927195716, "grad_norm": 1.4435442074716338, "learning_rate": 1.2160382596637286e-05, "loss": 0.6289737224578857, "step": 5747 }, { "epoch": 0.9187245264924478, "grad_norm": 1.367216418579804, "learning_rate": 1.2157802269190926e-05, "loss": 0.6971465349197388, "step": 5748 }, { "epoch": 0.918884360265324, "grad_norm": 1.3705148792111774, "learning_rate": 1.2155221791051321e-05, "loss": 0.6417139768600464, "step": 5749 }, { "epoch": 0.9190441940382003, "grad_norm": 1.4880111854765237, "learning_rate": 1.2152641162398678e-05, "loss": 0.673902153968811, "step": 5750 }, { "epoch": 0.9192040278110765, "grad_norm": 1.2626191390936103, "learning_rate": 1.2150060383413217e-05, "loss": 0.5498597621917725, "step": 5751 }, { "epoch": 0.9193638615839527, "grad_norm": 1.4014970940329365, "learning_rate": 1.2147479454275177e-05, "loss": 0.5100183486938477, "step": 5752 }, { "epoch": 0.9195236953568289, "grad_norm": 1.6038398147260806, "learning_rate": 1.2144898375164791e-05, "loss": 0.6073414087295532, "step": 5753 }, { "epoch": 0.9196835291297051, "grad_norm": 1.830256037845731, "learning_rate": 1.2142317146262324e-05, "loss": 0.7729055881500244, "step": 5754 }, { "epoch": 0.9198433629025813, "grad_norm": 1.19403734519237, "learning_rate": 1.2139735767748033e-05, "loss": 0.5547149181365967, "step": 5755 }, { "epoch": 0.9200031966754575, "grad_norm": 1.3481255621617176, "learning_rate": 1.2137154239802194e-05, "loss": 0.620542049407959, "step": 5756 }, { "epoch": 0.9201630304483337, "grad_norm": 1.147010791863931, "learning_rate": 1.2134572562605092e-05, "loss": 0.5556861758232117, "step": 5757 }, { "epoch": 0.9203228642212099, "grad_norm": 1.4696434448794604, "learning_rate": 1.2131990736337018e-05, "loss": 0.7767782211303711, "step": 5758 }, { "epoch": 0.9204826979940861, "grad_norm": 1.341105276821871, "learning_rate": 1.2129408761178285e-05, "loss": 0.671923041343689, "step": 5759 }, { "epoch": 0.9206425317669623, "grad_norm": 1.462254228210962, "learning_rate": 1.2126826637309204e-05, "loss": 0.639689564704895, "step": 5760 }, { "epoch": 0.9208023655398385, "grad_norm": 1.4212234886144985, "learning_rate": 1.2124244364910103e-05, "loss": 0.6634880304336548, "step": 5761 }, { "epoch": 0.9209621993127147, "grad_norm": 1.2501433432781182, "learning_rate": 1.2121661944161319e-05, "loss": 0.5903768539428711, "step": 5762 }, { "epoch": 0.9211220330855909, "grad_norm": 1.350279028412702, "learning_rate": 1.2119079375243195e-05, "loss": 0.5399070382118225, "step": 5763 }, { "epoch": 0.9212818668584672, "grad_norm": 1.3564679521962064, "learning_rate": 1.2116496658336095e-05, "loss": 0.623650074005127, "step": 5764 }, { "epoch": 0.9214417006313435, "grad_norm": 1.256335907681859, "learning_rate": 1.2113913793620383e-05, "loss": 0.6849527359008789, "step": 5765 }, { "epoch": 0.9216015344042197, "grad_norm": 1.2255890618442096, "learning_rate": 1.2111330781276434e-05, "loss": 0.5131531953811646, "step": 5766 }, { "epoch": 0.9217613681770959, "grad_norm": 1.2604420956848432, "learning_rate": 1.2108747621484645e-05, "loss": 0.6148468255996704, "step": 5767 }, { "epoch": 0.9219212019499721, "grad_norm": 1.3339100682866987, "learning_rate": 1.2106164314425406e-05, "loss": 0.6187783479690552, "step": 5768 }, { "epoch": 0.9220810357228483, "grad_norm": 1.4505011403391102, "learning_rate": 1.2103580860279134e-05, "loss": 0.6050705909729004, "step": 5769 }, { "epoch": 0.9222408694957245, "grad_norm": 1.5500703145161439, "learning_rate": 1.210099725922624e-05, "loss": 0.6144278049468994, "step": 5770 }, { "epoch": 0.9224007032686007, "grad_norm": 1.2349072727671215, "learning_rate": 1.209841351144716e-05, "loss": 0.586979866027832, "step": 5771 }, { "epoch": 0.9225605370414769, "grad_norm": 1.3123038935709654, "learning_rate": 1.2095829617122332e-05, "loss": 0.7444127798080444, "step": 5772 }, { "epoch": 0.9227203708143531, "grad_norm": 1.5304597173482182, "learning_rate": 1.20932455764322e-05, "loss": 0.7303922176361084, "step": 5773 }, { "epoch": 0.9228802045872293, "grad_norm": 1.1081190356771213, "learning_rate": 1.2090661389557235e-05, "loss": 0.5118374824523926, "step": 5774 }, { "epoch": 0.9230400383601055, "grad_norm": 1.4117605278385639, "learning_rate": 1.2088077056677901e-05, "loss": 0.6147026419639587, "step": 5775 }, { "epoch": 0.9231998721329817, "grad_norm": 1.3174230887437728, "learning_rate": 1.2085492577974678e-05, "loss": 0.5127162933349609, "step": 5776 }, { "epoch": 0.9233597059058579, "grad_norm": 1.1459760526059346, "learning_rate": 1.2082907953628061e-05, "loss": 0.5952365398406982, "step": 5777 }, { "epoch": 0.9235195396787341, "grad_norm": 1.2543825478991957, "learning_rate": 1.2080323183818545e-05, "loss": 0.6675406694412231, "step": 5778 }, { "epoch": 0.9236793734516103, "grad_norm": 1.2871075130628875, "learning_rate": 1.2077738268726646e-05, "loss": 0.5598384141921997, "step": 5779 }, { "epoch": 0.9238392072244865, "grad_norm": 1.3138159592223695, "learning_rate": 1.2075153208532883e-05, "loss": 0.6457749605178833, "step": 5780 }, { "epoch": 0.9239990409973627, "grad_norm": 1.280342844496368, "learning_rate": 1.2072568003417791e-05, "loss": 0.5705680847167969, "step": 5781 }, { "epoch": 0.924158874770239, "grad_norm": 1.3119676484494032, "learning_rate": 1.206998265356191e-05, "loss": 0.58549565076828, "step": 5782 }, { "epoch": 0.9243187085431152, "grad_norm": 1.3072820542021926, "learning_rate": 1.2067397159145782e-05, "loss": 0.8137636184692383, "step": 5783 }, { "epoch": 0.9244785423159914, "grad_norm": 1.342490669629791, "learning_rate": 1.2064811520349987e-05, "loss": 0.5819892883300781, "step": 5784 }, { "epoch": 0.9246383760888676, "grad_norm": 1.172006095609674, "learning_rate": 1.206222573735508e-05, "loss": 0.6518192291259766, "step": 5785 }, { "epoch": 0.9247982098617438, "grad_norm": 1.2268470889027066, "learning_rate": 1.2059639810341654e-05, "loss": 0.5228515863418579, "step": 5786 }, { "epoch": 0.92495804363462, "grad_norm": 1.2906966853024286, "learning_rate": 1.2057053739490297e-05, "loss": 0.5554070472717285, "step": 5787 }, { "epoch": 0.9251178774074962, "grad_norm": 1.7469322888341225, "learning_rate": 1.205446752498161e-05, "loss": 0.6919069290161133, "step": 5788 }, { "epoch": 0.9252777111803724, "grad_norm": 1.3003601677312653, "learning_rate": 1.2051881166996207e-05, "loss": 0.6121323108673096, "step": 5789 }, { "epoch": 0.9254375449532486, "grad_norm": 1.3215504396167919, "learning_rate": 1.2049294665714707e-05, "loss": 0.6675450801849365, "step": 5790 }, { "epoch": 0.9255973787261248, "grad_norm": 1.6076695599780826, "learning_rate": 1.2046708021317745e-05, "loss": 0.5913563966751099, "step": 5791 }, { "epoch": 0.925757212499001, "grad_norm": 1.2668645239874483, "learning_rate": 1.2044121233985961e-05, "loss": 0.6720888614654541, "step": 5792 }, { "epoch": 0.9259170462718772, "grad_norm": 1.215099688393655, "learning_rate": 1.2041534303900008e-05, "loss": 0.7164757251739502, "step": 5793 }, { "epoch": 0.9260768800447534, "grad_norm": 1.2704839052074828, "learning_rate": 1.203894723124055e-05, "loss": 0.4831886887550354, "step": 5794 }, { "epoch": 0.9262367138176296, "grad_norm": 1.302972445110248, "learning_rate": 1.2036360016188256e-05, "loss": 0.7415568828582764, "step": 5795 }, { "epoch": 0.9263965475905058, "grad_norm": 1.476987111173824, "learning_rate": 1.2033772658923813e-05, "loss": 0.6420531272888184, "step": 5796 }, { "epoch": 0.926556381363382, "grad_norm": 1.332520635375973, "learning_rate": 1.2031185159627906e-05, "loss": 0.5384381413459778, "step": 5797 }, { "epoch": 0.9267162151362582, "grad_norm": 1.5573544018711796, "learning_rate": 1.2028597518481238e-05, "loss": 0.6940643191337585, "step": 5798 }, { "epoch": 0.9268760489091346, "grad_norm": 1.399179384059544, "learning_rate": 1.2026009735664523e-05, "loss": 0.5049216747283936, "step": 5799 }, { "epoch": 0.9270358826820108, "grad_norm": 1.597906012699582, "learning_rate": 1.2023421811358483e-05, "loss": 0.6989311575889587, "step": 5800 }, { "epoch": 0.927195716454887, "grad_norm": 1.4029682769889962, "learning_rate": 1.202083374574385e-05, "loss": 0.680176854133606, "step": 5801 }, { "epoch": 0.9273555502277632, "grad_norm": 1.331158698886335, "learning_rate": 1.201824553900136e-05, "loss": 0.630774974822998, "step": 5802 }, { "epoch": 0.9275153840006394, "grad_norm": 1.2976009111160525, "learning_rate": 1.201565719131177e-05, "loss": 0.6333297491073608, "step": 5803 }, { "epoch": 0.9276752177735156, "grad_norm": 1.2156209360788028, "learning_rate": 1.2013068702855838e-05, "loss": 0.603979766368866, "step": 5804 }, { "epoch": 0.9278350515463918, "grad_norm": 1.2281005584082705, "learning_rate": 1.2010480073814334e-05, "loss": 0.5444759130477905, "step": 5805 }, { "epoch": 0.927994885319268, "grad_norm": 1.3520171596962594, "learning_rate": 1.2007891304368045e-05, "loss": 0.5550092458724976, "step": 5806 }, { "epoch": 0.9281547190921442, "grad_norm": 1.2137634827340014, "learning_rate": 1.2005302394697755e-05, "loss": 0.6592360734939575, "step": 5807 }, { "epoch": 0.9283145528650204, "grad_norm": 1.1296678628472694, "learning_rate": 1.2002713344984265e-05, "loss": 0.4871264100074768, "step": 5808 }, { "epoch": 0.9284743866378966, "grad_norm": 1.774017133370852, "learning_rate": 1.2000124155408389e-05, "loss": 0.6177405714988708, "step": 5809 }, { "epoch": 0.9286342204107728, "grad_norm": 1.5574246511475243, "learning_rate": 1.199753482615094e-05, "loss": 0.5274467468261719, "step": 5810 }, { "epoch": 0.928794054183649, "grad_norm": 1.309541830292051, "learning_rate": 1.1994945357392752e-05, "loss": 0.6853556036949158, "step": 5811 }, { "epoch": 0.9289538879565252, "grad_norm": 1.3193250645149222, "learning_rate": 1.1992355749314665e-05, "loss": 0.5595247745513916, "step": 5812 }, { "epoch": 0.9291137217294014, "grad_norm": 1.1504967789985086, "learning_rate": 1.1989766002097527e-05, "loss": 0.6020303964614868, "step": 5813 }, { "epoch": 0.9292735555022776, "grad_norm": 1.2898947551165507, "learning_rate": 1.1987176115922197e-05, "loss": 0.5984443426132202, "step": 5814 }, { "epoch": 0.9294333892751538, "grad_norm": 1.5453319274296136, "learning_rate": 1.1984586090969542e-05, "loss": 0.6604444980621338, "step": 5815 }, { "epoch": 0.92959322304803, "grad_norm": 1.4176234920161819, "learning_rate": 1.1981995927420443e-05, "loss": 0.6783539056777954, "step": 5816 }, { "epoch": 0.9297530568209063, "grad_norm": 1.5055282556403502, "learning_rate": 1.1979405625455782e-05, "loss": 0.7290794253349304, "step": 5817 }, { "epoch": 0.9299128905937825, "grad_norm": 1.388805077788124, "learning_rate": 1.1976815185256464e-05, "loss": 0.4608747959136963, "step": 5818 }, { "epoch": 0.9300727243666587, "grad_norm": 1.4715179482444007, "learning_rate": 1.1974224607003394e-05, "loss": 0.6455098986625671, "step": 5819 }, { "epoch": 0.9302325581395349, "grad_norm": 1.3696186680397135, "learning_rate": 1.1971633890877483e-05, "loss": 0.5772239565849304, "step": 5820 }, { "epoch": 0.9303923919124111, "grad_norm": 1.1704979370261535, "learning_rate": 1.1969043037059666e-05, "loss": 0.7324573993682861, "step": 5821 }, { "epoch": 0.9305522256852873, "grad_norm": 1.3201388745738079, "learning_rate": 1.1966452045730872e-05, "loss": 0.537531316280365, "step": 5822 }, { "epoch": 0.9307120594581635, "grad_norm": 1.205646398356836, "learning_rate": 1.196386091707205e-05, "loss": 0.6342490315437317, "step": 5823 }, { "epoch": 0.9308718932310397, "grad_norm": 1.4601640502221314, "learning_rate": 1.1961269651264155e-05, "loss": 0.655066728591919, "step": 5824 }, { "epoch": 0.9310317270039159, "grad_norm": 1.4936571811576709, "learning_rate": 1.1958678248488151e-05, "loss": 0.6452807188034058, "step": 5825 }, { "epoch": 0.9311915607767921, "grad_norm": 1.1096876906841098, "learning_rate": 1.1956086708925017e-05, "loss": 0.510741114616394, "step": 5826 }, { "epoch": 0.9313513945496683, "grad_norm": 1.3588394104086803, "learning_rate": 1.1953495032755726e-05, "loss": 0.580338716506958, "step": 5827 }, { "epoch": 0.9315112283225445, "grad_norm": 1.3292109663186613, "learning_rate": 1.1950903220161286e-05, "loss": 0.7359927296638489, "step": 5828 }, { "epoch": 0.9316710620954207, "grad_norm": 1.3932705671973753, "learning_rate": 1.1948311271322689e-05, "loss": 0.6215418577194214, "step": 5829 }, { "epoch": 0.9318308958682969, "grad_norm": 1.5546082740508003, "learning_rate": 1.1945719186420947e-05, "loss": 0.7741892337799072, "step": 5830 }, { "epoch": 0.9319907296411731, "grad_norm": 1.5019922194938538, "learning_rate": 1.1943126965637091e-05, "loss": 0.6034442186355591, "step": 5831 }, { "epoch": 0.9321505634140493, "grad_norm": 1.3333105753437224, "learning_rate": 1.1940534609152143e-05, "loss": 0.5399907827377319, "step": 5832 }, { "epoch": 0.9323103971869255, "grad_norm": 1.6957380408301201, "learning_rate": 1.1937942117147152e-05, "loss": 0.6327553987503052, "step": 5833 }, { "epoch": 0.9324702309598019, "grad_norm": 1.2697919754834037, "learning_rate": 1.1935349489803161e-05, "loss": 0.602412760257721, "step": 5834 }, { "epoch": 0.9326300647326781, "grad_norm": 1.6212298329972887, "learning_rate": 1.1932756727301235e-05, "loss": 0.6930487155914307, "step": 5835 }, { "epoch": 0.9327898985055543, "grad_norm": 1.4369909835676644, "learning_rate": 1.1930163829822442e-05, "loss": 0.5464872121810913, "step": 5836 }, { "epoch": 0.9329497322784305, "grad_norm": 1.4360443803412837, "learning_rate": 1.1927570797547863e-05, "loss": 0.8064497709274292, "step": 5837 }, { "epoch": 0.9331095660513067, "grad_norm": 1.7978526249091182, "learning_rate": 1.1924977630658583e-05, "loss": 0.6260437369346619, "step": 5838 }, { "epoch": 0.9332693998241829, "grad_norm": 1.340745371337042, "learning_rate": 1.19223843293357e-05, "loss": 0.6909759044647217, "step": 5839 }, { "epoch": 0.9334292335970591, "grad_norm": 1.6808844174382447, "learning_rate": 1.191979089376032e-05, "loss": 0.6819518804550171, "step": 5840 }, { "epoch": 0.9335890673699353, "grad_norm": 1.3121146961343841, "learning_rate": 1.1917197324113561e-05, "loss": 0.5448915958404541, "step": 5841 }, { "epoch": 0.9337489011428115, "grad_norm": 1.3085786748947414, "learning_rate": 1.1914603620576548e-05, "loss": 0.608354389667511, "step": 5842 }, { "epoch": 0.9339087349156877, "grad_norm": 1.4053279730626604, "learning_rate": 1.1912009783330418e-05, "loss": 0.5493678450584412, "step": 5843 }, { "epoch": 0.9340685686885639, "grad_norm": 1.4129464450546099, "learning_rate": 1.1909415812556313e-05, "loss": 0.6773055791854858, "step": 5844 }, { "epoch": 0.9342284024614401, "grad_norm": 1.414937564263228, "learning_rate": 1.1906821708435387e-05, "loss": 0.6259249448776245, "step": 5845 }, { "epoch": 0.9343882362343163, "grad_norm": 2.0721804111506312, "learning_rate": 1.1904227471148806e-05, "loss": 0.5758013129234314, "step": 5846 }, { "epoch": 0.9345480700071925, "grad_norm": 1.3280898973387252, "learning_rate": 1.1901633100877736e-05, "loss": 0.5774093866348267, "step": 5847 }, { "epoch": 0.9347079037800687, "grad_norm": 1.2440220819056769, "learning_rate": 1.1899038597803365e-05, "loss": 0.5205601453781128, "step": 5848 }, { "epoch": 0.9348677375529449, "grad_norm": 1.2378401569773396, "learning_rate": 1.189644396210688e-05, "loss": 0.5894827842712402, "step": 5849 }, { "epoch": 0.9350275713258211, "grad_norm": 1.2310057089276822, "learning_rate": 1.1893849193969485e-05, "loss": 0.5567631721496582, "step": 5850 }, { "epoch": 0.9351874050986974, "grad_norm": 1.2400258664006065, "learning_rate": 1.1891254293572387e-05, "loss": 0.5000861287117004, "step": 5851 }, { "epoch": 0.9353472388715736, "grad_norm": 1.526726709025458, "learning_rate": 1.1888659261096803e-05, "loss": 0.6016656160354614, "step": 5852 }, { "epoch": 0.9355070726444498, "grad_norm": 1.4192048322343476, "learning_rate": 1.1886064096723962e-05, "loss": 0.6303359270095825, "step": 5853 }, { "epoch": 0.935666906417326, "grad_norm": 1.2455345576278845, "learning_rate": 1.1883468800635101e-05, "loss": 0.4587346315383911, "step": 5854 }, { "epoch": 0.9358267401902022, "grad_norm": 1.3194107980245493, "learning_rate": 1.1880873373011466e-05, "loss": 0.55268394947052, "step": 5855 }, { "epoch": 0.9359865739630784, "grad_norm": 1.457768712698571, "learning_rate": 1.1878277814034315e-05, "loss": 0.6503033638000488, "step": 5856 }, { "epoch": 0.9361464077359546, "grad_norm": 1.305535914591399, "learning_rate": 1.1875682123884909e-05, "loss": 0.519148051738739, "step": 5857 }, { "epoch": 0.9363062415088308, "grad_norm": 1.2176549143525752, "learning_rate": 1.1873086302744525e-05, "loss": 0.6059423685073853, "step": 5858 }, { "epoch": 0.936466075281707, "grad_norm": 1.565971466589819, "learning_rate": 1.1870490350794443e-05, "loss": 0.6640642881393433, "step": 5859 }, { "epoch": 0.9366259090545832, "grad_norm": 1.3584849625341364, "learning_rate": 1.186789426821596e-05, "loss": 0.574019193649292, "step": 5860 }, { "epoch": 0.9367857428274594, "grad_norm": 1.282227861853463, "learning_rate": 1.1865298055190372e-05, "loss": 0.6993629932403564, "step": 5861 }, { "epoch": 0.9369455766003356, "grad_norm": 1.1378332762760515, "learning_rate": 1.1862701711898992e-05, "loss": 0.5429670810699463, "step": 5862 }, { "epoch": 0.9371054103732118, "grad_norm": 1.351567387753437, "learning_rate": 1.1860105238523137e-05, "loss": 0.6812785863876343, "step": 5863 }, { "epoch": 0.937265244146088, "grad_norm": 1.1531851199578083, "learning_rate": 1.1857508635244137e-05, "loss": 0.5771324038505554, "step": 5864 }, { "epoch": 0.9374250779189642, "grad_norm": 1.4278008144067578, "learning_rate": 1.1854911902243326e-05, "loss": 0.5582773685455322, "step": 5865 }, { "epoch": 0.9375849116918404, "grad_norm": 1.3564899674374262, "learning_rate": 1.185231503970206e-05, "loss": 0.5192153453826904, "step": 5866 }, { "epoch": 0.9377447454647166, "grad_norm": 1.0880301818821791, "learning_rate": 1.1849718047801686e-05, "loss": 0.4878807067871094, "step": 5867 }, { "epoch": 0.9379045792375929, "grad_norm": 1.3477560876353272, "learning_rate": 1.1847120926723572e-05, "loss": 0.6024993658065796, "step": 5868 }, { "epoch": 0.9380644130104692, "grad_norm": 1.386375108656648, "learning_rate": 1.1844523676649091e-05, "loss": 0.6715720891952515, "step": 5869 }, { "epoch": 0.9382242467833454, "grad_norm": 1.1865218178591164, "learning_rate": 1.1841926297759627e-05, "loss": 0.4598170518875122, "step": 5870 }, { "epoch": 0.9383840805562216, "grad_norm": 1.5130413867744994, "learning_rate": 1.1839328790236571e-05, "loss": 0.5848408937454224, "step": 5871 }, { "epoch": 0.9385439143290978, "grad_norm": 1.2263112437417216, "learning_rate": 1.1836731154261321e-05, "loss": 0.6248555779457092, "step": 5872 }, { "epoch": 0.938703748101974, "grad_norm": 1.1608120200998315, "learning_rate": 1.183413339001529e-05, "loss": 0.5082179307937622, "step": 5873 }, { "epoch": 0.9388635818748502, "grad_norm": 1.5664284556222805, "learning_rate": 1.1831535497679891e-05, "loss": 0.8451670408248901, "step": 5874 }, { "epoch": 0.9390234156477264, "grad_norm": 1.3348347267194158, "learning_rate": 1.1828937477436562e-05, "loss": 0.572292685508728, "step": 5875 }, { "epoch": 0.9391832494206026, "grad_norm": 1.6618422603126752, "learning_rate": 1.1826339329466733e-05, "loss": 0.5568072199821472, "step": 5876 }, { "epoch": 0.9393430831934788, "grad_norm": 1.425890646711099, "learning_rate": 1.1823741053951849e-05, "loss": 0.6753082275390625, "step": 5877 }, { "epoch": 0.939502916966355, "grad_norm": 1.235665226254972, "learning_rate": 1.1821142651073367e-05, "loss": 0.5843183994293213, "step": 5878 }, { "epoch": 0.9396627507392312, "grad_norm": 1.2571819360774743, "learning_rate": 1.1818544121012747e-05, "loss": 0.5468315482139587, "step": 5879 }, { "epoch": 0.9398225845121074, "grad_norm": 1.3849588043215915, "learning_rate": 1.1815945463951463e-05, "loss": 0.7682713270187378, "step": 5880 }, { "epoch": 0.9399824182849836, "grad_norm": 1.370149901512849, "learning_rate": 1.1813346680070997e-05, "loss": 0.6390241384506226, "step": 5881 }, { "epoch": 0.9401422520578598, "grad_norm": 1.9079315873159264, "learning_rate": 1.1810747769552837e-05, "loss": 0.6692512035369873, "step": 5882 }, { "epoch": 0.940302085830736, "grad_norm": 1.342265059647045, "learning_rate": 1.1808148732578482e-05, "loss": 0.6181855797767639, "step": 5883 }, { "epoch": 0.9404619196036123, "grad_norm": 1.3857340940759806, "learning_rate": 1.1805549569329438e-05, "loss": 0.5255059003829956, "step": 5884 }, { "epoch": 0.9406217533764885, "grad_norm": 1.402099413000872, "learning_rate": 1.1802950279987227e-05, "loss": 0.6485419869422913, "step": 5885 }, { "epoch": 0.9407815871493647, "grad_norm": 1.342143090552619, "learning_rate": 1.1800350864733367e-05, "loss": 0.5329440236091614, "step": 5886 }, { "epoch": 0.9409414209222409, "grad_norm": 1.3389343584064992, "learning_rate": 1.1797751323749396e-05, "loss": 0.6030607223510742, "step": 5887 }, { "epoch": 0.9411012546951171, "grad_norm": 1.2838330791544201, "learning_rate": 1.1795151657216856e-05, "loss": 0.7079654335975647, "step": 5888 }, { "epoch": 0.9412610884679933, "grad_norm": 1.4940518555987543, "learning_rate": 1.1792551865317299e-05, "loss": 0.5942585468292236, "step": 5889 }, { "epoch": 0.9414209222408695, "grad_norm": 1.368250095644129, "learning_rate": 1.1789951948232285e-05, "loss": 0.6844786405563354, "step": 5890 }, { "epoch": 0.9415807560137457, "grad_norm": 1.3249783418391834, "learning_rate": 1.1787351906143383e-05, "loss": 0.5339337587356567, "step": 5891 }, { "epoch": 0.9417405897866219, "grad_norm": 1.2989617335937613, "learning_rate": 1.1784751739232169e-05, "loss": 0.5815314650535583, "step": 5892 }, { "epoch": 0.9419004235594981, "grad_norm": 1.1880084844220276, "learning_rate": 1.1782151447680233e-05, "loss": 0.6213313341140747, "step": 5893 }, { "epoch": 0.9420602573323743, "grad_norm": 1.3226490870937686, "learning_rate": 1.1779551031669163e-05, "loss": 0.645629346370697, "step": 5894 }, { "epoch": 0.9422200911052505, "grad_norm": 1.2856983057093256, "learning_rate": 1.1776950491380573e-05, "loss": 0.6937369108200073, "step": 5895 }, { "epoch": 0.9423799248781267, "grad_norm": 1.4087830172215725, "learning_rate": 1.1774349826996067e-05, "loss": 0.5909979343414307, "step": 5896 }, { "epoch": 0.9425397586510029, "grad_norm": 1.3721288751467893, "learning_rate": 1.177174903869727e-05, "loss": 0.7808830142021179, "step": 5897 }, { "epoch": 0.9426995924238791, "grad_norm": 1.397324541210836, "learning_rate": 1.1769148126665812e-05, "loss": 0.6393038630485535, "step": 5898 }, { "epoch": 0.9428594261967553, "grad_norm": 1.233523916532376, "learning_rate": 1.1766547091083328e-05, "loss": 0.5757086873054504, "step": 5899 }, { "epoch": 0.9430192599696315, "grad_norm": 1.5343293527356805, "learning_rate": 1.1763945932131473e-05, "loss": 0.6362632513046265, "step": 5900 }, { "epoch": 0.9431790937425077, "grad_norm": 1.2252230851787658, "learning_rate": 1.1761344649991893e-05, "loss": 0.6308131217956543, "step": 5901 }, { "epoch": 0.943338927515384, "grad_norm": 1.1901438987410071, "learning_rate": 1.1758743244846258e-05, "loss": 0.5659297704696655, "step": 5902 }, { "epoch": 0.9434987612882602, "grad_norm": 1.3775661744401482, "learning_rate": 1.1756141716876241e-05, "loss": 0.5561391115188599, "step": 5903 }, { "epoch": 0.9436585950611364, "grad_norm": 1.6132931454460777, "learning_rate": 1.1753540066263518e-05, "loss": 0.849047064781189, "step": 5904 }, { "epoch": 0.9438184288340127, "grad_norm": 1.201912697873459, "learning_rate": 1.1750938293189789e-05, "loss": 0.5904800891876221, "step": 5905 }, { "epoch": 0.9439782626068889, "grad_norm": 1.214356979245962, "learning_rate": 1.1748336397836741e-05, "loss": 0.5534099340438843, "step": 5906 }, { "epoch": 0.9441380963797651, "grad_norm": 1.4081477714992736, "learning_rate": 1.1745734380386091e-05, "loss": 0.7067924737930298, "step": 5907 }, { "epoch": 0.9442979301526413, "grad_norm": 1.4281378524219162, "learning_rate": 1.1743132241019551e-05, "loss": 0.7372764348983765, "step": 5908 }, { "epoch": 0.9444577639255175, "grad_norm": 1.1225073847187292, "learning_rate": 1.1740529979918842e-05, "loss": 0.5596131086349487, "step": 5909 }, { "epoch": 0.9446175976983937, "grad_norm": 1.391476194179619, "learning_rate": 1.1737927597265701e-05, "loss": 0.650795578956604, "step": 5910 }, { "epoch": 0.9447774314712699, "grad_norm": 1.0465294395879439, "learning_rate": 1.1735325093241866e-05, "loss": 0.5285925269126892, "step": 5911 }, { "epoch": 0.9449372652441461, "grad_norm": 1.2599477965704462, "learning_rate": 1.1732722468029089e-05, "loss": 0.5024603009223938, "step": 5912 }, { "epoch": 0.9450970990170223, "grad_norm": 1.2958177259753807, "learning_rate": 1.1730119721809128e-05, "loss": 0.64310222864151, "step": 5913 }, { "epoch": 0.9452569327898985, "grad_norm": 1.290511121680201, "learning_rate": 1.1727516854763747e-05, "loss": 0.6279834508895874, "step": 5914 }, { "epoch": 0.9454167665627747, "grad_norm": 1.3718617265446214, "learning_rate": 1.1724913867074725e-05, "loss": 0.7226791381835938, "step": 5915 }, { "epoch": 0.9455766003356509, "grad_norm": 1.3976894363592238, "learning_rate": 1.172231075892384e-05, "loss": 0.6807360649108887, "step": 5916 }, { "epoch": 0.9457364341085271, "grad_norm": 1.5080980287497718, "learning_rate": 1.1719707530492892e-05, "loss": 0.4498242437839508, "step": 5917 }, { "epoch": 0.9458962678814034, "grad_norm": 1.3480597682181972, "learning_rate": 1.1717104181963674e-05, "loss": 0.6675551533699036, "step": 5918 }, { "epoch": 0.9460561016542796, "grad_norm": 1.1848760304765193, "learning_rate": 1.1714500713517998e-05, "loss": 0.5829702019691467, "step": 5919 }, { "epoch": 0.9462159354271558, "grad_norm": 1.3953514014820372, "learning_rate": 1.1711897125337678e-05, "loss": 0.6377483606338501, "step": 5920 }, { "epoch": 0.946375769200032, "grad_norm": 1.2720009096546754, "learning_rate": 1.1709293417604542e-05, "loss": 0.5367203950881958, "step": 5921 }, { "epoch": 0.9465356029729082, "grad_norm": 1.5843345833927005, "learning_rate": 1.1706689590500424e-05, "loss": 0.6623709797859192, "step": 5922 }, { "epoch": 0.9466954367457844, "grad_norm": 1.559590837029595, "learning_rate": 1.1704085644207163e-05, "loss": 0.5691074132919312, "step": 5923 }, { "epoch": 0.9468552705186606, "grad_norm": 1.3637428632481514, "learning_rate": 1.1701481578906612e-05, "loss": 0.729006290435791, "step": 5924 }, { "epoch": 0.9470151042915368, "grad_norm": 1.2865026273834987, "learning_rate": 1.169887739478063e-05, "loss": 0.5350432395935059, "step": 5925 }, { "epoch": 0.947174938064413, "grad_norm": 1.3282793830149493, "learning_rate": 1.1696273092011081e-05, "loss": 0.5896941423416138, "step": 5926 }, { "epoch": 0.9473347718372892, "grad_norm": 1.2977941255560033, "learning_rate": 1.1693668670779847e-05, "loss": 0.6445302367210388, "step": 5927 }, { "epoch": 0.9474946056101654, "grad_norm": 1.5297301268677665, "learning_rate": 1.1691064131268801e-05, "loss": 0.7108518481254578, "step": 5928 }, { "epoch": 0.9476544393830416, "grad_norm": 1.3729285828764832, "learning_rate": 1.1688459473659846e-05, "loss": 0.6975048780441284, "step": 5929 }, { "epoch": 0.9478142731559178, "grad_norm": 1.1318446576006058, "learning_rate": 1.1685854698134876e-05, "loss": 0.6071489453315735, "step": 5930 }, { "epoch": 0.947974106928794, "grad_norm": 1.2065251692470216, "learning_rate": 1.1683249804875797e-05, "loss": 0.4949285686016083, "step": 5931 }, { "epoch": 0.9481339407016702, "grad_norm": 1.3288513330373204, "learning_rate": 1.1680644794064531e-05, "loss": 0.6684936285018921, "step": 5932 }, { "epoch": 0.9482937744745464, "grad_norm": 1.4097100863219958, "learning_rate": 1.1678039665882997e-05, "loss": 0.7705379128456116, "step": 5933 }, { "epoch": 0.9484536082474226, "grad_norm": 1.5350974181087378, "learning_rate": 1.1675434420513132e-05, "loss": 0.7788862586021423, "step": 5934 }, { "epoch": 0.9486134420202988, "grad_norm": 1.244624993104443, "learning_rate": 1.1672829058136876e-05, "loss": 0.5599597692489624, "step": 5935 }, { "epoch": 0.948773275793175, "grad_norm": 1.3799253647781378, "learning_rate": 1.167022357893618e-05, "loss": 0.6463817954063416, "step": 5936 }, { "epoch": 0.9489331095660513, "grad_norm": 1.6049438334131874, "learning_rate": 1.1667617983092998e-05, "loss": 0.6322836875915527, "step": 5937 }, { "epoch": 0.9490929433389275, "grad_norm": 1.4137657645495192, "learning_rate": 1.1665012270789298e-05, "loss": 0.6532701253890991, "step": 5938 }, { "epoch": 0.9492527771118037, "grad_norm": 1.418577279049566, "learning_rate": 1.1662406442207055e-05, "loss": 0.5634000301361084, "step": 5939 }, { "epoch": 0.94941261088468, "grad_norm": 1.1878694978922524, "learning_rate": 1.1659800497528249e-05, "loss": 0.7232908010482788, "step": 5940 }, { "epoch": 0.9495724446575562, "grad_norm": 1.1270689399577072, "learning_rate": 1.1657194436934867e-05, "loss": 0.528523862361908, "step": 5941 }, { "epoch": 0.9497322784304324, "grad_norm": 1.2923046954924247, "learning_rate": 1.1654588260608913e-05, "loss": 0.5120457410812378, "step": 5942 }, { "epoch": 0.9498921122033086, "grad_norm": 1.2300356364045686, "learning_rate": 1.1651981968732387e-05, "loss": 0.623394787311554, "step": 5943 }, { "epoch": 0.9500519459761848, "grad_norm": 1.3447041501137411, "learning_rate": 1.1649375561487304e-05, "loss": 0.6490577459335327, "step": 5944 }, { "epoch": 0.950211779749061, "grad_norm": 1.0650734389126923, "learning_rate": 1.1646769039055692e-05, "loss": 0.54957115650177, "step": 5945 }, { "epoch": 0.9503716135219372, "grad_norm": 1.3596763446179319, "learning_rate": 1.1644162401619576e-05, "loss": 0.5741373896598816, "step": 5946 }, { "epoch": 0.9505314472948134, "grad_norm": 1.3801439807919746, "learning_rate": 1.1641555649360998e-05, "loss": 0.5644515752792358, "step": 5947 }, { "epoch": 0.9506912810676896, "grad_norm": 1.453756231865924, "learning_rate": 1.1638948782462e-05, "loss": 0.5960034132003784, "step": 5948 }, { "epoch": 0.9508511148405658, "grad_norm": 1.2927907515171826, "learning_rate": 1.1636341801104642e-05, "loss": 0.5777009725570679, "step": 5949 }, { "epoch": 0.951010948613442, "grad_norm": 1.4119325363238082, "learning_rate": 1.163373470547098e-05, "loss": 0.62435382604599, "step": 5950 }, { "epoch": 0.9511707823863182, "grad_norm": 1.413922497366089, "learning_rate": 1.1631127495743088e-05, "loss": 0.538602888584137, "step": 5951 }, { "epoch": 0.9513306161591945, "grad_norm": 1.5508671629275022, "learning_rate": 1.1628520172103045e-05, "loss": 0.7330840826034546, "step": 5952 }, { "epoch": 0.9514904499320707, "grad_norm": 1.4021292621732238, "learning_rate": 1.1625912734732932e-05, "loss": 0.7075098752975464, "step": 5953 }, { "epoch": 0.9516502837049469, "grad_norm": 1.2080453943149032, "learning_rate": 1.1623305183814848e-05, "loss": 0.5941776037216187, "step": 5954 }, { "epoch": 0.9518101174778231, "grad_norm": 1.1557666577059658, "learning_rate": 1.1620697519530894e-05, "loss": 0.6245297193527222, "step": 5955 }, { "epoch": 0.9519699512506993, "grad_norm": 1.2510879449459542, "learning_rate": 1.1618089742063181e-05, "loss": 0.6726826429367065, "step": 5956 }, { "epoch": 0.9521297850235755, "grad_norm": 1.413119884079022, "learning_rate": 1.1615481851593825e-05, "loss": 0.5778071880340576, "step": 5957 }, { "epoch": 0.9522896187964517, "grad_norm": 1.528310260034917, "learning_rate": 1.1612873848304953e-05, "loss": 0.6031308174133301, "step": 5958 }, { "epoch": 0.9524494525693279, "grad_norm": 1.423556823937758, "learning_rate": 1.1610265732378699e-05, "loss": 0.6474714875221252, "step": 5959 }, { "epoch": 0.9526092863422041, "grad_norm": 1.3080446386516478, "learning_rate": 1.1607657503997202e-05, "loss": 0.5828068852424622, "step": 5960 }, { "epoch": 0.9527691201150803, "grad_norm": 1.0530194264579453, "learning_rate": 1.1605049163342615e-05, "loss": 0.5506933927536011, "step": 5961 }, { "epoch": 0.9529289538879565, "grad_norm": 1.6450248495630868, "learning_rate": 1.1602440710597094e-05, "loss": 0.7077896595001221, "step": 5962 }, { "epoch": 0.9530887876608327, "grad_norm": 1.3092936480833721, "learning_rate": 1.1599832145942802e-05, "loss": 0.5127038359642029, "step": 5963 }, { "epoch": 0.9532486214337089, "grad_norm": 1.3685414127206448, "learning_rate": 1.1597223469561914e-05, "loss": 0.6761722564697266, "step": 5964 }, { "epoch": 0.9534084552065851, "grad_norm": 1.2040588346427008, "learning_rate": 1.159461468163661e-05, "loss": 0.6395457983016968, "step": 5965 }, { "epoch": 0.9535682889794613, "grad_norm": 1.577274054286279, "learning_rate": 1.1592005782349079e-05, "loss": 0.7136825919151306, "step": 5966 }, { "epoch": 0.9537281227523375, "grad_norm": 1.2937869642945756, "learning_rate": 1.1589396771881518e-05, "loss": 0.5430644750595093, "step": 5967 }, { "epoch": 0.9538879565252137, "grad_norm": 1.363195572960885, "learning_rate": 1.1586787650416129e-05, "loss": 0.6397583484649658, "step": 5968 }, { "epoch": 0.95404779029809, "grad_norm": 1.5175616176664344, "learning_rate": 1.1584178418135126e-05, "loss": 0.579498291015625, "step": 5969 }, { "epoch": 0.9542076240709662, "grad_norm": 1.1823584883271419, "learning_rate": 1.1581569075220727e-05, "loss": 0.5231395959854126, "step": 5970 }, { "epoch": 0.9543674578438424, "grad_norm": 1.2759661150312396, "learning_rate": 1.1578959621855159e-05, "loss": 0.5794535279273987, "step": 5971 }, { "epoch": 0.9545272916167186, "grad_norm": 1.2971847886333803, "learning_rate": 1.1576350058220659e-05, "loss": 0.5867379903793335, "step": 5972 }, { "epoch": 0.9546871253895948, "grad_norm": 1.1825006856420672, "learning_rate": 1.1573740384499466e-05, "loss": 0.49864739179611206, "step": 5973 }, { "epoch": 0.954846959162471, "grad_norm": 1.3370896221222273, "learning_rate": 1.157113060087383e-05, "loss": 0.6952742338180542, "step": 5974 }, { "epoch": 0.9550067929353473, "grad_norm": 1.4163855389275715, "learning_rate": 1.1568520707526017e-05, "loss": 0.6339406967163086, "step": 5975 }, { "epoch": 0.9551666267082235, "grad_norm": 1.208184703107075, "learning_rate": 1.1565910704638284e-05, "loss": 0.5395375490188599, "step": 5976 }, { "epoch": 0.9553264604810997, "grad_norm": 1.3378574611042162, "learning_rate": 1.156330059239291e-05, "loss": 0.5746455192565918, "step": 5977 }, { "epoch": 0.9554862942539759, "grad_norm": 1.1752886186517626, "learning_rate": 1.1560690370972169e-05, "loss": 0.5522438287734985, "step": 5978 }, { "epoch": 0.9556461280268521, "grad_norm": 1.3166792053036713, "learning_rate": 1.1558080040558359e-05, "loss": 0.6414716243743896, "step": 5979 }, { "epoch": 0.9558059617997283, "grad_norm": 1.1525822033161228, "learning_rate": 1.1555469601333768e-05, "loss": 0.4945201873779297, "step": 5980 }, { "epoch": 0.9559657955726045, "grad_norm": 1.3378979581619568, "learning_rate": 1.1552859053480707e-05, "loss": 0.7877768278121948, "step": 5981 }, { "epoch": 0.9561256293454807, "grad_norm": 1.1516945510311893, "learning_rate": 1.155024839718148e-05, "loss": 0.5451884269714355, "step": 5982 }, { "epoch": 0.9562854631183569, "grad_norm": 1.4000728430068163, "learning_rate": 1.154763763261841e-05, "loss": 0.5793730616569519, "step": 5983 }, { "epoch": 0.9564452968912331, "grad_norm": 1.1681921276231304, "learning_rate": 1.1545026759973823e-05, "loss": 0.5467561483383179, "step": 5984 }, { "epoch": 0.9566051306641093, "grad_norm": 1.4200701929387844, "learning_rate": 1.1542415779430054e-05, "loss": 0.6179354190826416, "step": 5985 }, { "epoch": 0.9567649644369856, "grad_norm": 1.2574942675066016, "learning_rate": 1.1539804691169442e-05, "loss": 0.61527419090271, "step": 5986 }, { "epoch": 0.9569247982098618, "grad_norm": 1.3397793939680518, "learning_rate": 1.1537193495374342e-05, "loss": 0.5523898005485535, "step": 5987 }, { "epoch": 0.957084631982738, "grad_norm": 1.347929405392826, "learning_rate": 1.1534582192227104e-05, "loss": 0.6020944118499756, "step": 5988 }, { "epoch": 0.9572444657556142, "grad_norm": 1.4140191614331201, "learning_rate": 1.1531970781910093e-05, "loss": 0.6472889184951782, "step": 5989 }, { "epoch": 0.9574042995284904, "grad_norm": 1.4548663480488022, "learning_rate": 1.1529359264605684e-05, "loss": 0.6524408459663391, "step": 5990 }, { "epoch": 0.9575641333013666, "grad_norm": 1.431779697626823, "learning_rate": 1.1526747640496256e-05, "loss": 0.637367308139801, "step": 5991 }, { "epoch": 0.9577239670742428, "grad_norm": 1.1508659641442869, "learning_rate": 1.1524135909764191e-05, "loss": 0.6957162618637085, "step": 5992 }, { "epoch": 0.957883800847119, "grad_norm": 1.4612373890482064, "learning_rate": 1.1521524072591888e-05, "loss": 0.5838685035705566, "step": 5993 }, { "epoch": 0.9580436346199952, "grad_norm": 1.4713002083387978, "learning_rate": 1.1518912129161744e-05, "loss": 0.6272329688072205, "step": 5994 }, { "epoch": 0.9582034683928714, "grad_norm": 1.1936506980933772, "learning_rate": 1.1516300079656168e-05, "loss": 0.5077688097953796, "step": 5995 }, { "epoch": 0.9583633021657476, "grad_norm": 1.3884065893015118, "learning_rate": 1.1513687924257584e-05, "loss": 0.5693469047546387, "step": 5996 }, { "epoch": 0.9585231359386238, "grad_norm": 1.411038746862577, "learning_rate": 1.151107566314841e-05, "loss": 0.7134711742401123, "step": 5997 }, { "epoch": 0.9586829697115, "grad_norm": 1.5145693899658437, "learning_rate": 1.1508463296511075e-05, "loss": 0.5946648120880127, "step": 5998 }, { "epoch": 0.9588428034843762, "grad_norm": 1.2728797834206107, "learning_rate": 1.150585082452802e-05, "loss": 0.5447986721992493, "step": 5999 }, { "epoch": 0.9590026372572524, "grad_norm": 1.4134374491619814, "learning_rate": 1.1503238247381687e-05, "loss": 0.5229873657226562, "step": 6000 }, { "epoch": 0.9591624710301286, "grad_norm": 1.2584742000529676, "learning_rate": 1.1500625565254537e-05, "loss": 0.6133459806442261, "step": 6001 }, { "epoch": 0.9593223048030048, "grad_norm": 1.2961285881240818, "learning_rate": 1.1498012778329024e-05, "loss": 0.5454378128051758, "step": 6002 }, { "epoch": 0.959482138575881, "grad_norm": 1.3925339371731666, "learning_rate": 1.149539988678762e-05, "loss": 0.6715185642242432, "step": 6003 }, { "epoch": 0.9596419723487573, "grad_norm": 1.3830150744673686, "learning_rate": 1.1492786890812793e-05, "loss": 0.679840087890625, "step": 6004 }, { "epoch": 0.9598018061216335, "grad_norm": 1.3158501517043468, "learning_rate": 1.149017379058703e-05, "loss": 0.5230504274368286, "step": 6005 }, { "epoch": 0.9599616398945097, "grad_norm": 1.2923755971218536, "learning_rate": 1.1487560586292825e-05, "loss": 0.6432143449783325, "step": 6006 }, { "epoch": 0.9601214736673859, "grad_norm": 1.4515262136054685, "learning_rate": 1.1484947278112673e-05, "loss": 0.6951642036437988, "step": 6007 }, { "epoch": 0.9602813074402621, "grad_norm": 1.4443473083979474, "learning_rate": 1.1482333866229069e-05, "loss": 0.6475076675415039, "step": 6008 }, { "epoch": 0.9604411412131383, "grad_norm": 1.2885523696958405, "learning_rate": 1.1479720350824534e-05, "loss": 0.7975236177444458, "step": 6009 }, { "epoch": 0.9606009749860146, "grad_norm": 1.3291408384981978, "learning_rate": 1.1477106732081585e-05, "loss": 0.703743040561676, "step": 6010 }, { "epoch": 0.9607608087588908, "grad_norm": 1.242889978370974, "learning_rate": 1.1474493010182746e-05, "loss": 0.5453802943229675, "step": 6011 }, { "epoch": 0.960920642531767, "grad_norm": 1.2779328738075415, "learning_rate": 1.1471879185310548e-05, "loss": 0.6163164973258972, "step": 6012 }, { "epoch": 0.9610804763046432, "grad_norm": 1.5137376313471311, "learning_rate": 1.1469265257647537e-05, "loss": 0.5524724721908569, "step": 6013 }, { "epoch": 0.9612403100775194, "grad_norm": 1.512303505562083, "learning_rate": 1.1466651227376258e-05, "loss": 0.627368688583374, "step": 6014 }, { "epoch": 0.9614001438503956, "grad_norm": 1.23564999603565, "learning_rate": 1.1464037094679261e-05, "loss": 0.6259746551513672, "step": 6015 }, { "epoch": 0.9615599776232718, "grad_norm": 1.2695924020944052, "learning_rate": 1.1461422859739116e-05, "loss": 0.6236852407455444, "step": 6016 }, { "epoch": 0.961719811396148, "grad_norm": 1.5082239009141574, "learning_rate": 1.1458808522738385e-05, "loss": 0.6028301119804382, "step": 6017 }, { "epoch": 0.9618796451690242, "grad_norm": 1.348732789526483, "learning_rate": 1.1456194083859649e-05, "loss": 0.7352421283721924, "step": 6018 }, { "epoch": 0.9620394789419004, "grad_norm": 1.2260225752831917, "learning_rate": 1.145357954328549e-05, "loss": 0.5305724143981934, "step": 6019 }, { "epoch": 0.9621993127147767, "grad_norm": 1.2008378726039175, "learning_rate": 1.1450964901198491e-05, "loss": 0.5789428353309631, "step": 6020 }, { "epoch": 0.9623591464876529, "grad_norm": 1.386267392980329, "learning_rate": 1.144835015778126e-05, "loss": 0.8460035920143127, "step": 6021 }, { "epoch": 0.9625189802605291, "grad_norm": 1.3129074454798304, "learning_rate": 1.1445735313216396e-05, "loss": 0.6558945775032043, "step": 6022 }, { "epoch": 0.9626788140334053, "grad_norm": 1.180739790101227, "learning_rate": 1.1443120367686511e-05, "loss": 0.5133249759674072, "step": 6023 }, { "epoch": 0.9628386478062815, "grad_norm": 1.3396312497868488, "learning_rate": 1.1440505321374223e-05, "loss": 0.613249659538269, "step": 6024 }, { "epoch": 0.9629984815791577, "grad_norm": 5.644608100630854, "learning_rate": 1.1437890174462157e-05, "loss": 0.5177863240242004, "step": 6025 }, { "epoch": 0.9631583153520339, "grad_norm": 1.501957347644664, "learning_rate": 1.143527492713295e-05, "loss": 0.7263655066490173, "step": 6026 }, { "epoch": 0.9633181491249101, "grad_norm": 1.5141570034444174, "learning_rate": 1.1432659579569234e-05, "loss": 0.6216902136802673, "step": 6027 }, { "epoch": 0.9634779828977863, "grad_norm": 1.4220677506613524, "learning_rate": 1.1430044131953663e-05, "loss": 0.7023002505302429, "step": 6028 }, { "epoch": 0.9636378166706625, "grad_norm": 1.337805454003064, "learning_rate": 1.142742858446889e-05, "loss": 0.57371985912323, "step": 6029 }, { "epoch": 0.9637976504435387, "grad_norm": 1.3623961706605972, "learning_rate": 1.142481293729757e-05, "loss": 0.7003968954086304, "step": 6030 }, { "epoch": 0.9639574842164149, "grad_norm": 1.181799467761961, "learning_rate": 1.1422197190622375e-05, "loss": 0.5362557172775269, "step": 6031 }, { "epoch": 0.9641173179892911, "grad_norm": 1.162168783440318, "learning_rate": 1.1419581344625976e-05, "loss": 0.5487873554229736, "step": 6032 }, { "epoch": 0.9642771517621673, "grad_norm": 1.2171268448787949, "learning_rate": 1.141696539949106e-05, "loss": 0.662723183631897, "step": 6033 }, { "epoch": 0.9644369855350435, "grad_norm": 1.485082393175989, "learning_rate": 1.1414349355400307e-05, "loss": 0.6154780387878418, "step": 6034 }, { "epoch": 0.9645968193079197, "grad_norm": 1.2756078226028043, "learning_rate": 1.1411733212536418e-05, "loss": 0.505618155002594, "step": 6035 }, { "epoch": 0.964756653080796, "grad_norm": 1.1911648276237023, "learning_rate": 1.1409116971082097e-05, "loss": 0.57064288854599, "step": 6036 }, { "epoch": 0.9649164868536722, "grad_norm": 1.298250446194726, "learning_rate": 1.1406500631220048e-05, "loss": 0.5884732007980347, "step": 6037 }, { "epoch": 0.9650763206265484, "grad_norm": 1.3609745745267288, "learning_rate": 1.1403884193132993e-05, "loss": 0.6430528163909912, "step": 6038 }, { "epoch": 0.9652361543994246, "grad_norm": 1.2427644096914008, "learning_rate": 1.1401267657003647e-05, "loss": 0.6002496480941772, "step": 6039 }, { "epoch": 0.9653959881723008, "grad_norm": 1.0727777174316302, "learning_rate": 1.1398651023014745e-05, "loss": 0.5173817276954651, "step": 6040 }, { "epoch": 0.965555821945177, "grad_norm": 1.36558196240828, "learning_rate": 1.139603429134902e-05, "loss": 0.5996820330619812, "step": 6041 }, { "epoch": 0.9657156557180532, "grad_norm": 1.3835947101755746, "learning_rate": 1.1393417462189218e-05, "loss": 0.5949986577033997, "step": 6042 }, { "epoch": 0.9658754894909294, "grad_norm": 1.2781228691738464, "learning_rate": 1.139080053571809e-05, "loss": 0.6216187477111816, "step": 6043 }, { "epoch": 0.9660353232638056, "grad_norm": 1.3306977544292355, "learning_rate": 1.1388183512118387e-05, "loss": 0.6863290667533875, "step": 6044 }, { "epoch": 0.9661951570366818, "grad_norm": 1.0981812159183235, "learning_rate": 1.1385566391572878e-05, "loss": 0.41948533058166504, "step": 6045 }, { "epoch": 0.9663549908095581, "grad_norm": 1.5426843287957215, "learning_rate": 1.1382949174264333e-05, "loss": 0.6330230236053467, "step": 6046 }, { "epoch": 0.9665148245824343, "grad_norm": 1.3586015662440127, "learning_rate": 1.1380331860375527e-05, "loss": 0.6840299367904663, "step": 6047 }, { "epoch": 0.9666746583553105, "grad_norm": 1.9587716893633538, "learning_rate": 1.1377714450089249e-05, "loss": 0.6386526823043823, "step": 6048 }, { "epoch": 0.9668344921281867, "grad_norm": 1.2633249231269061, "learning_rate": 1.1375096943588279e-05, "loss": 0.6337591409683228, "step": 6049 }, { "epoch": 0.9669943259010629, "grad_norm": 1.1886495972394584, "learning_rate": 1.1372479341055427e-05, "loss": 0.6371749639511108, "step": 6050 }, { "epoch": 0.9671541596739391, "grad_norm": 1.2012642748462525, "learning_rate": 1.136986164267349e-05, "loss": 0.5576621294021606, "step": 6051 }, { "epoch": 0.9673139934468153, "grad_norm": 1.4835716884320276, "learning_rate": 1.1367243848625277e-05, "loss": 0.6197466850280762, "step": 6052 }, { "epoch": 0.9674738272196916, "grad_norm": 1.0638859229630715, "learning_rate": 1.1364625959093613e-05, "loss": 0.4326575994491577, "step": 6053 }, { "epoch": 0.9676336609925678, "grad_norm": 1.3251181519242452, "learning_rate": 1.1362007974261312e-05, "loss": 0.6262164115905762, "step": 6054 }, { "epoch": 0.967793494765444, "grad_norm": 1.5586784951419559, "learning_rate": 1.1359389894311214e-05, "loss": 0.5118482112884521, "step": 6055 }, { "epoch": 0.9679533285383202, "grad_norm": 1.3605693950498605, "learning_rate": 1.1356771719426151e-05, "loss": 0.683159351348877, "step": 6056 }, { "epoch": 0.9681131623111964, "grad_norm": 1.4175860382124017, "learning_rate": 1.135415344978897e-05, "loss": 0.5581544637680054, "step": 6057 }, { "epoch": 0.9682729960840726, "grad_norm": 1.4720167259503896, "learning_rate": 1.1351535085582522e-05, "loss": 0.8084542751312256, "step": 6058 }, { "epoch": 0.9684328298569488, "grad_norm": 1.3310199552753963, "learning_rate": 1.1348916626989659e-05, "loss": 0.6475985050201416, "step": 6059 }, { "epoch": 0.968592663629825, "grad_norm": 1.2783082065828624, "learning_rate": 1.1346298074193249e-05, "loss": 0.6282879114151001, "step": 6060 }, { "epoch": 0.9687524974027012, "grad_norm": 1.4416310861289734, "learning_rate": 1.1343679427376164e-05, "loss": 0.6347553730010986, "step": 6061 }, { "epoch": 0.9689123311755774, "grad_norm": 1.3591442331680883, "learning_rate": 1.1341060686721277e-05, "loss": 0.6610962152481079, "step": 6062 }, { "epoch": 0.9690721649484536, "grad_norm": 1.3125910906420621, "learning_rate": 1.1338441852411475e-05, "loss": 0.6923863887786865, "step": 6063 }, { "epoch": 0.9692319987213298, "grad_norm": 2.6628703292463887, "learning_rate": 1.1335822924629643e-05, "loss": 0.4422416090965271, "step": 6064 }, { "epoch": 0.969391832494206, "grad_norm": 1.4485541123194094, "learning_rate": 1.1333203903558685e-05, "loss": 0.6086099743843079, "step": 6065 }, { "epoch": 0.9695516662670822, "grad_norm": 1.480820215094996, "learning_rate": 1.1330584789381499e-05, "loss": 0.7073396444320679, "step": 6066 }, { "epoch": 0.9697115000399584, "grad_norm": 1.0766527740146927, "learning_rate": 1.1327965582280995e-05, "loss": 0.5729517340660095, "step": 6067 }, { "epoch": 0.9698713338128346, "grad_norm": 1.1734751632319387, "learning_rate": 1.1325346282440091e-05, "loss": 0.4348292350769043, "step": 6068 }, { "epoch": 0.9700311675857108, "grad_norm": 1.400301125718058, "learning_rate": 1.1322726890041708e-05, "loss": 0.6418450474739075, "step": 6069 }, { "epoch": 0.970191001358587, "grad_norm": 1.3256396610411656, "learning_rate": 1.1320107405268776e-05, "loss": 0.6717269420623779, "step": 6070 }, { "epoch": 0.9703508351314633, "grad_norm": 1.3915470658687377, "learning_rate": 1.1317487828304233e-05, "loss": 0.6545566320419312, "step": 6071 }, { "epoch": 0.9705106689043395, "grad_norm": 1.330870660683047, "learning_rate": 1.1314868159331014e-05, "loss": 0.5196321606636047, "step": 6072 }, { "epoch": 0.9706705026772157, "grad_norm": 1.2790740489838124, "learning_rate": 1.1312248398532073e-05, "loss": 0.668144702911377, "step": 6073 }, { "epoch": 0.9708303364500919, "grad_norm": 1.4304799848177059, "learning_rate": 1.1309628546090362e-05, "loss": 0.5743748545646667, "step": 6074 }, { "epoch": 0.9709901702229681, "grad_norm": 1.3862029301819168, "learning_rate": 1.1307008602188845e-05, "loss": 0.7941693067550659, "step": 6075 }, { "epoch": 0.9711500039958443, "grad_norm": 1.4116633524093, "learning_rate": 1.1304388567010489e-05, "loss": 0.6749783754348755, "step": 6076 }, { "epoch": 0.9713098377687205, "grad_norm": 1.3887140202539983, "learning_rate": 1.1301768440738266e-05, "loss": 0.6660711765289307, "step": 6077 }, { "epoch": 0.9714696715415967, "grad_norm": 1.5050482446340345, "learning_rate": 1.1299148223555156e-05, "loss": 0.6873001456260681, "step": 6078 }, { "epoch": 0.9716295053144729, "grad_norm": 1.536062920140798, "learning_rate": 1.1296527915644149e-05, "loss": 0.6533854007720947, "step": 6079 }, { "epoch": 0.9717893390873491, "grad_norm": 1.2417588839529337, "learning_rate": 1.1293907517188236e-05, "loss": 0.624266505241394, "step": 6080 }, { "epoch": 0.9719491728602254, "grad_norm": 1.2331600612272933, "learning_rate": 1.1291287028370414e-05, "loss": 0.5766973495483398, "step": 6081 }, { "epoch": 0.9721090066331016, "grad_norm": 1.5264087395334935, "learning_rate": 1.1288666449373695e-05, "loss": 0.6589053869247437, "step": 6082 }, { "epoch": 0.9722688404059778, "grad_norm": 1.3449839680980684, "learning_rate": 1.1286045780381085e-05, "loss": 0.6427566409111023, "step": 6083 }, { "epoch": 0.972428674178854, "grad_norm": 1.2621671506014396, "learning_rate": 1.1283425021575602e-05, "loss": 0.5369752049446106, "step": 6084 }, { "epoch": 0.9725885079517302, "grad_norm": 1.3191065537587705, "learning_rate": 1.1280804173140273e-05, "loss": 0.8130648732185364, "step": 6085 }, { "epoch": 0.9727483417246064, "grad_norm": 1.4938689070822178, "learning_rate": 1.1278183235258131e-05, "loss": 0.6052528619766235, "step": 6086 }, { "epoch": 0.9729081754974827, "grad_norm": 1.3141781221305069, "learning_rate": 1.127556220811221e-05, "loss": 0.49090635776519775, "step": 6087 }, { "epoch": 0.9730680092703589, "grad_norm": 1.3984494721784544, "learning_rate": 1.1272941091885552e-05, "loss": 0.6793513298034668, "step": 6088 }, { "epoch": 0.9732278430432351, "grad_norm": 1.4149793614799893, "learning_rate": 1.1270319886761208e-05, "loss": 0.7401770353317261, "step": 6089 }, { "epoch": 0.9733876768161113, "grad_norm": 1.434796205729256, "learning_rate": 1.1267698592922236e-05, "loss": 0.6115731000900269, "step": 6090 }, { "epoch": 0.9735475105889875, "grad_norm": 1.6392444273689615, "learning_rate": 1.1265077210551693e-05, "loss": 0.7025928497314453, "step": 6091 }, { "epoch": 0.9737073443618637, "grad_norm": 1.3075296514413817, "learning_rate": 1.1262455739832652e-05, "loss": 0.5920161008834839, "step": 6092 }, { "epoch": 0.9738671781347399, "grad_norm": 1.2889829513790374, "learning_rate": 1.1259834180948184e-05, "loss": 0.5292770862579346, "step": 6093 }, { "epoch": 0.9740270119076161, "grad_norm": 1.1745570071584743, "learning_rate": 1.125721253408137e-05, "loss": 0.46474194526672363, "step": 6094 }, { "epoch": 0.9741868456804923, "grad_norm": 1.1459017770262905, "learning_rate": 1.1254590799415294e-05, "loss": 0.560329794883728, "step": 6095 }, { "epoch": 0.9743466794533685, "grad_norm": 1.494151453016033, "learning_rate": 1.1251968977133052e-05, "loss": 0.5591961741447449, "step": 6096 }, { "epoch": 0.9745065132262447, "grad_norm": 1.1463532058451114, "learning_rate": 1.1249347067417747e-05, "loss": 0.52561354637146, "step": 6097 }, { "epoch": 0.9746663469991209, "grad_norm": 1.2824361549256855, "learning_rate": 1.1246725070452476e-05, "loss": 0.7219730019569397, "step": 6098 }, { "epoch": 0.9748261807719971, "grad_norm": 1.2779310063445606, "learning_rate": 1.124410298642035e-05, "loss": 0.5450264811515808, "step": 6099 }, { "epoch": 0.9749860145448733, "grad_norm": 1.2454727575286417, "learning_rate": 1.1241480815504493e-05, "loss": 0.4821789860725403, "step": 6100 }, { "epoch": 0.9751458483177495, "grad_norm": 1.3452237916192968, "learning_rate": 1.1238858557888022e-05, "loss": 0.5352726578712463, "step": 6101 }, { "epoch": 0.9753056820906257, "grad_norm": 1.2069119146223357, "learning_rate": 1.1236236213754071e-05, "loss": 0.5984119176864624, "step": 6102 }, { "epoch": 0.9754655158635019, "grad_norm": 1.5756199502886388, "learning_rate": 1.123361378328577e-05, "loss": 0.6118030548095703, "step": 6103 }, { "epoch": 0.9756253496363781, "grad_norm": 1.3165157757460693, "learning_rate": 1.123099126666626e-05, "loss": 0.5953850746154785, "step": 6104 }, { "epoch": 0.9757851834092544, "grad_norm": 1.317762958438866, "learning_rate": 1.1228368664078694e-05, "loss": 0.6745131611824036, "step": 6105 }, { "epoch": 0.9759450171821306, "grad_norm": 1.7652809117143775, "learning_rate": 1.1225745975706222e-05, "loss": 0.6627421975135803, "step": 6106 }, { "epoch": 0.9761048509550068, "grad_norm": 1.1580698117932755, "learning_rate": 1.1223123201732002e-05, "loss": 0.4832579791545868, "step": 6107 }, { "epoch": 0.976264684727883, "grad_norm": 1.483054647513499, "learning_rate": 1.1220500342339205e-05, "loss": 0.6158186197280884, "step": 6108 }, { "epoch": 0.9764245185007592, "grad_norm": 1.1633980996617943, "learning_rate": 1.1217877397710992e-05, "loss": 0.508309006690979, "step": 6109 }, { "epoch": 0.9765843522736354, "grad_norm": 1.3726776413123762, "learning_rate": 1.121525436803055e-05, "loss": 0.630800724029541, "step": 6110 }, { "epoch": 0.9767441860465116, "grad_norm": 1.5213827338442902, "learning_rate": 1.1212631253481055e-05, "loss": 0.5854274034500122, "step": 6111 }, { "epoch": 0.9769040198193878, "grad_norm": 1.4672818064263344, "learning_rate": 1.1210008054245702e-05, "loss": 0.5676014423370361, "step": 6112 }, { "epoch": 0.977063853592264, "grad_norm": 1.2137839925088496, "learning_rate": 1.1207384770507681e-05, "loss": 0.5910195112228394, "step": 6113 }, { "epoch": 0.9772236873651402, "grad_norm": 1.7301830831740603, "learning_rate": 1.1204761402450195e-05, "loss": 0.6464545130729675, "step": 6114 }, { "epoch": 0.9773835211380164, "grad_norm": 1.353828780492841, "learning_rate": 1.1202137950256452e-05, "loss": 0.589186429977417, "step": 6115 }, { "epoch": 0.9775433549108927, "grad_norm": 1.2540060673594677, "learning_rate": 1.119951441410966e-05, "loss": 0.5772068500518799, "step": 6116 }, { "epoch": 0.9777031886837689, "grad_norm": 1.3638960958917994, "learning_rate": 1.1196890794193042e-05, "loss": 0.6079007387161255, "step": 6117 }, { "epoch": 0.9778630224566451, "grad_norm": 1.2816144544746035, "learning_rate": 1.1194267090689824e-05, "loss": 0.6748499274253845, "step": 6118 }, { "epoch": 0.9780228562295213, "grad_norm": 1.4783302929296525, "learning_rate": 1.1191643303783229e-05, "loss": 0.732580840587616, "step": 6119 }, { "epoch": 0.9781826900023975, "grad_norm": 1.2561654813558536, "learning_rate": 1.1189019433656498e-05, "loss": 0.5313559770584106, "step": 6120 }, { "epoch": 0.9783425237752738, "grad_norm": 1.3286970002032477, "learning_rate": 1.1186395480492871e-05, "loss": 0.7313680648803711, "step": 6121 }, { "epoch": 0.97850235754815, "grad_norm": 1.5991701110897294, "learning_rate": 1.1183771444475598e-05, "loss": 0.528096616268158, "step": 6122 }, { "epoch": 0.9786621913210262, "grad_norm": 1.497640400368788, "learning_rate": 1.1181147325787927e-05, "loss": 0.8093063235282898, "step": 6123 }, { "epoch": 0.9788220250939024, "grad_norm": 1.3020249436766875, "learning_rate": 1.1178523124613125e-05, "loss": 0.6361547112464905, "step": 6124 }, { "epoch": 0.9789818588667786, "grad_norm": 1.2400117638585861, "learning_rate": 1.1175898841134449e-05, "loss": 0.6114391088485718, "step": 6125 }, { "epoch": 0.9791416926396548, "grad_norm": 1.1797896766124862, "learning_rate": 1.1173274475535173e-05, "loss": 0.6216713190078735, "step": 6126 }, { "epoch": 0.979301526412531, "grad_norm": 1.333644638136488, "learning_rate": 1.1170650027998577e-05, "loss": 0.7012845873832703, "step": 6127 }, { "epoch": 0.9794613601854072, "grad_norm": 1.1948393706718685, "learning_rate": 1.1168025498707938e-05, "loss": 0.541235625743866, "step": 6128 }, { "epoch": 0.9796211939582834, "grad_norm": 1.4864896863859083, "learning_rate": 1.1165400887846547e-05, "loss": 0.5863776803016663, "step": 6129 }, { "epoch": 0.9797810277311596, "grad_norm": 1.328129439550735, "learning_rate": 1.1162776195597697e-05, "loss": 0.7499690651893616, "step": 6130 }, { "epoch": 0.9799408615040358, "grad_norm": 1.2779298227458469, "learning_rate": 1.1160151422144683e-05, "loss": 0.6026915311813354, "step": 6131 }, { "epoch": 0.980100695276912, "grad_norm": 1.3423277859522196, "learning_rate": 1.1157526567670816e-05, "loss": 0.6956555843353271, "step": 6132 }, { "epoch": 0.9802605290497882, "grad_norm": 1.5424154172990951, "learning_rate": 1.1154901632359401e-05, "loss": 0.6817967891693115, "step": 6133 }, { "epoch": 0.9804203628226644, "grad_norm": 1.2879300247618242, "learning_rate": 1.1152276616393757e-05, "loss": 0.6567111015319824, "step": 6134 }, { "epoch": 0.9805801965955406, "grad_norm": 1.206980238546946, "learning_rate": 1.1149651519957208e-05, "loss": 0.5577418804168701, "step": 6135 }, { "epoch": 0.9807400303684168, "grad_norm": 1.2432909329268318, "learning_rate": 1.1147026343233074e-05, "loss": 0.6075853109359741, "step": 6136 }, { "epoch": 0.980899864141293, "grad_norm": 1.2923740917600877, "learning_rate": 1.1144401086404697e-05, "loss": 0.5568246841430664, "step": 6137 }, { "epoch": 0.9810596979141692, "grad_norm": 1.2346905160383936, "learning_rate": 1.1141775749655411e-05, "loss": 0.5222175121307373, "step": 6138 }, { "epoch": 0.9812195316870455, "grad_norm": 1.427280423901355, "learning_rate": 1.1139150333168563e-05, "loss": 0.7572745680809021, "step": 6139 }, { "epoch": 0.9813793654599217, "grad_norm": 1.5287736000391674, "learning_rate": 1.1136524837127498e-05, "loss": 0.688421368598938, "step": 6140 }, { "epoch": 0.9815391992327979, "grad_norm": 1.250418641635423, "learning_rate": 1.1133899261715573e-05, "loss": 0.550967276096344, "step": 6141 }, { "epoch": 0.9816990330056741, "grad_norm": 1.3099500844233467, "learning_rate": 1.1131273607116153e-05, "loss": 0.5323122143745422, "step": 6142 }, { "epoch": 0.9818588667785503, "grad_norm": 1.309048646893076, "learning_rate": 1.1128647873512597e-05, "loss": 0.6402919292449951, "step": 6143 }, { "epoch": 0.9820187005514265, "grad_norm": 1.3192685019375558, "learning_rate": 1.1126022061088284e-05, "loss": 0.669939398765564, "step": 6144 }, { "epoch": 0.9821785343243027, "grad_norm": 1.2894671022172832, "learning_rate": 1.1123396170026589e-05, "loss": 0.6512681841850281, "step": 6145 }, { "epoch": 0.9823383680971789, "grad_norm": 1.285545423759136, "learning_rate": 1.1120770200510891e-05, "loss": 0.5661736726760864, "step": 6146 }, { "epoch": 0.9824982018700551, "grad_norm": 1.4185473262873276, "learning_rate": 1.1118144152724584e-05, "loss": 0.5553266406059265, "step": 6147 }, { "epoch": 0.9826580356429313, "grad_norm": 1.2485960898562605, "learning_rate": 1.111551802685106e-05, "loss": 0.6280678510665894, "step": 6148 }, { "epoch": 0.9828178694158075, "grad_norm": 1.4532236816065023, "learning_rate": 1.1112891823073718e-05, "loss": 0.6659491062164307, "step": 6149 }, { "epoch": 0.9829777031886837, "grad_norm": 1.4690174746424434, "learning_rate": 1.1110265541575962e-05, "loss": 0.5490680932998657, "step": 6150 }, { "epoch": 0.98313753696156, "grad_norm": 1.4711879834079085, "learning_rate": 1.1107639182541203e-05, "loss": 0.5121414065361023, "step": 6151 }, { "epoch": 0.9832973707344362, "grad_norm": 1.4079469085072853, "learning_rate": 1.1105012746152857e-05, "loss": 0.740694522857666, "step": 6152 }, { "epoch": 0.9834572045073124, "grad_norm": 1.1947113848135622, "learning_rate": 1.1102386232594342e-05, "loss": 0.5845601558685303, "step": 6153 }, { "epoch": 0.9836170382801886, "grad_norm": 1.546884174359904, "learning_rate": 1.109975964204909e-05, "loss": 0.7099211812019348, "step": 6154 }, { "epoch": 0.9837768720530649, "grad_norm": 1.2430801398654507, "learning_rate": 1.1097132974700528e-05, "loss": 0.5894614458084106, "step": 6155 }, { "epoch": 0.9839367058259411, "grad_norm": 1.417629906955135, "learning_rate": 1.1094506230732093e-05, "loss": 0.5702745914459229, "step": 6156 }, { "epoch": 0.9840965395988173, "grad_norm": 1.302950378100396, "learning_rate": 1.1091879410327233e-05, "loss": 0.5538139343261719, "step": 6157 }, { "epoch": 0.9842563733716935, "grad_norm": 1.2513957238262932, "learning_rate": 1.1089252513669388e-05, "loss": 0.6268938183784485, "step": 6158 }, { "epoch": 0.9844162071445697, "grad_norm": 1.3179735053240391, "learning_rate": 1.108662554094202e-05, "loss": 0.5594606995582581, "step": 6159 }, { "epoch": 0.9845760409174459, "grad_norm": 1.470593789018095, "learning_rate": 1.1083998492328579e-05, "loss": 0.6304662227630615, "step": 6160 }, { "epoch": 0.9847358746903221, "grad_norm": 1.5196698201153394, "learning_rate": 1.1081371368012532e-05, "loss": 0.5596119165420532, "step": 6161 }, { "epoch": 0.9848957084631983, "grad_norm": 1.5906983978529585, "learning_rate": 1.1078744168177351e-05, "loss": 0.6387139558792114, "step": 6162 }, { "epoch": 0.9850555422360745, "grad_norm": 1.2245907501324442, "learning_rate": 1.1076116893006505e-05, "loss": 0.582470178604126, "step": 6163 }, { "epoch": 0.9852153760089507, "grad_norm": 1.4630773143559224, "learning_rate": 1.107348954268348e-05, "loss": 0.731170117855072, "step": 6164 }, { "epoch": 0.9853752097818269, "grad_norm": 1.6966192688989616, "learning_rate": 1.1070862117391752e-05, "loss": 0.8156148195266724, "step": 6165 }, { "epoch": 0.9855350435547031, "grad_norm": 1.4872552450651793, "learning_rate": 1.106823461731482e-05, "loss": 0.5808255076408386, "step": 6166 }, { "epoch": 0.9856948773275793, "grad_norm": 1.5383287442575144, "learning_rate": 1.1065607042636173e-05, "loss": 0.6723901033401489, "step": 6167 }, { "epoch": 0.9858547111004555, "grad_norm": 1.1978074307894058, "learning_rate": 1.1062979393539315e-05, "loss": 0.6203774809837341, "step": 6168 }, { "epoch": 0.9860145448733317, "grad_norm": 1.503701911719011, "learning_rate": 1.106035167020775e-05, "loss": 0.7372831702232361, "step": 6169 }, { "epoch": 0.9861743786462079, "grad_norm": 1.4106970159550103, "learning_rate": 1.1057723872824987e-05, "loss": 0.6166013479232788, "step": 6170 }, { "epoch": 0.9863342124190841, "grad_norm": 1.3945817698977523, "learning_rate": 1.105509600157455e-05, "loss": 0.6773691773414612, "step": 6171 }, { "epoch": 0.9864940461919603, "grad_norm": 1.436402371341598, "learning_rate": 1.1052468056639954e-05, "loss": 0.6801777482032776, "step": 6172 }, { "epoch": 0.9866538799648366, "grad_norm": 1.4375138300942358, "learning_rate": 1.1049840038204721e-05, "loss": 0.6594091653823853, "step": 6173 }, { "epoch": 0.9868137137377128, "grad_norm": 1.2325348289799738, "learning_rate": 1.1047211946452392e-05, "loss": 0.5246376991271973, "step": 6174 }, { "epoch": 0.986973547510589, "grad_norm": 1.364438895692652, "learning_rate": 1.1044583781566495e-05, "loss": 0.5568077564239502, "step": 6175 }, { "epoch": 0.9871333812834652, "grad_norm": 1.372174882917183, "learning_rate": 1.1041955543730578e-05, "loss": 0.618757963180542, "step": 6176 }, { "epoch": 0.9872932150563414, "grad_norm": 1.409658343413917, "learning_rate": 1.1039327233128187e-05, "loss": 0.8551143407821655, "step": 6177 }, { "epoch": 0.9874530488292176, "grad_norm": 1.3193845658376386, "learning_rate": 1.1036698849942868e-05, "loss": 0.6553105115890503, "step": 6178 }, { "epoch": 0.9876128826020938, "grad_norm": 1.628997050006407, "learning_rate": 1.1034070394358187e-05, "loss": 0.6569492816925049, "step": 6179 }, { "epoch": 0.98777271637497, "grad_norm": 1.3504089225359441, "learning_rate": 1.10314418665577e-05, "loss": 0.7503750920295715, "step": 6180 }, { "epoch": 0.9879325501478462, "grad_norm": 1.6061072766183897, "learning_rate": 1.1028813266724975e-05, "loss": 0.7331322431564331, "step": 6181 }, { "epoch": 0.9880923839207224, "grad_norm": 1.8362727152055125, "learning_rate": 1.1026184595043587e-05, "loss": 0.754949688911438, "step": 6182 }, { "epoch": 0.9882522176935986, "grad_norm": 1.2843136205901584, "learning_rate": 1.1023555851697108e-05, "loss": 0.5564025640487671, "step": 6183 }, { "epoch": 0.9884120514664748, "grad_norm": 1.4896223825097956, "learning_rate": 1.1020927036869122e-05, "loss": 0.6452885866165161, "step": 6184 }, { "epoch": 0.988571885239351, "grad_norm": 1.5741133763415958, "learning_rate": 1.1018298150743217e-05, "loss": 0.7014914751052856, "step": 6185 }, { "epoch": 0.9887317190122272, "grad_norm": 1.588766461830902, "learning_rate": 1.1015669193502987e-05, "loss": 0.679856538772583, "step": 6186 }, { "epoch": 0.9888915527851035, "grad_norm": 1.2310717974619572, "learning_rate": 1.1013040165332024e-05, "loss": 0.538355827331543, "step": 6187 }, { "epoch": 0.9890513865579798, "grad_norm": 1.5482496373617183, "learning_rate": 1.1010411066413935e-05, "loss": 0.7355282306671143, "step": 6188 }, { "epoch": 0.989211220330856, "grad_norm": 1.2478499059818657, "learning_rate": 1.1007781896932325e-05, "loss": 0.7397192716598511, "step": 6189 }, { "epoch": 0.9893710541037322, "grad_norm": 1.2204602114177592, "learning_rate": 1.1005152657070806e-05, "loss": 0.5060815811157227, "step": 6190 }, { "epoch": 0.9895308878766084, "grad_norm": 1.3491643881772988, "learning_rate": 1.1002523347012995e-05, "loss": 0.5811724066734314, "step": 6191 }, { "epoch": 0.9896907216494846, "grad_norm": 1.3016212896535635, "learning_rate": 1.099989396694251e-05, "loss": 0.5541568398475647, "step": 6192 }, { "epoch": 0.9898505554223608, "grad_norm": 1.3851856909879945, "learning_rate": 1.0997264517042982e-05, "loss": 0.6308664083480835, "step": 6193 }, { "epoch": 0.990010389195237, "grad_norm": 1.56448891386208, "learning_rate": 1.0994634997498043e-05, "loss": 0.678461492061615, "step": 6194 }, { "epoch": 0.9901702229681132, "grad_norm": 1.3662261549016943, "learning_rate": 1.0992005408491325e-05, "loss": 0.6190327405929565, "step": 6195 }, { "epoch": 0.9903300567409894, "grad_norm": 1.3169639682598728, "learning_rate": 1.0989375750206472e-05, "loss": 0.6531450748443604, "step": 6196 }, { "epoch": 0.9904898905138656, "grad_norm": 1.2703554292846098, "learning_rate": 1.0986746022827133e-05, "loss": 0.7139682769775391, "step": 6197 }, { "epoch": 0.9906497242867418, "grad_norm": 1.3080068909561353, "learning_rate": 1.0984116226536952e-05, "loss": 0.766829252243042, "step": 6198 }, { "epoch": 0.990809558059618, "grad_norm": 1.2941671363521723, "learning_rate": 1.098148636151959e-05, "loss": 0.5706355571746826, "step": 6199 }, { "epoch": 0.9909693918324942, "grad_norm": 1.4543737913847548, "learning_rate": 1.0978856427958701e-05, "loss": 0.5575131773948669, "step": 6200 }, { "epoch": 0.9911292256053704, "grad_norm": 1.5119063090723546, "learning_rate": 1.0976226426037961e-05, "loss": 0.8028132915496826, "step": 6201 }, { "epoch": 0.9912890593782466, "grad_norm": 1.3858742201475205, "learning_rate": 1.0973596355941029e-05, "loss": 0.5615634918212891, "step": 6202 }, { "epoch": 0.9914488931511228, "grad_norm": 1.3947363716581263, "learning_rate": 1.097096621785159e-05, "loss": 0.7033660411834717, "step": 6203 }, { "epoch": 0.991608726923999, "grad_norm": 1.2920863003173795, "learning_rate": 1.0968336011953315e-05, "loss": 0.6950758695602417, "step": 6204 }, { "epoch": 0.9917685606968752, "grad_norm": 1.5595810074925511, "learning_rate": 1.0965705738429889e-05, "loss": 0.7957023978233337, "step": 6205 }, { "epoch": 0.9919283944697515, "grad_norm": 1.4191745734163719, "learning_rate": 1.0963075397465005e-05, "loss": 0.7456878423690796, "step": 6206 }, { "epoch": 0.9920882282426277, "grad_norm": 1.1874173868885465, "learning_rate": 1.0960444989242355e-05, "loss": 0.6221553087234497, "step": 6207 }, { "epoch": 0.9922480620155039, "grad_norm": 1.162570957117429, "learning_rate": 1.0957814513945637e-05, "loss": 0.5017471313476562, "step": 6208 }, { "epoch": 0.9924078957883801, "grad_norm": 1.2757862361956245, "learning_rate": 1.0955183971758555e-05, "loss": 0.5571649074554443, "step": 6209 }, { "epoch": 0.9925677295612563, "grad_norm": 1.3017098712416495, "learning_rate": 1.0952553362864816e-05, "loss": 0.6479425430297852, "step": 6210 }, { "epoch": 0.9927275633341325, "grad_norm": 1.1753426161540679, "learning_rate": 1.0949922687448132e-05, "loss": 0.6551430225372314, "step": 6211 }, { "epoch": 0.9928873971070087, "grad_norm": 1.1981971558583977, "learning_rate": 1.0947291945692219e-05, "loss": 0.6132849454879761, "step": 6212 }, { "epoch": 0.9930472308798849, "grad_norm": 1.3705581869775885, "learning_rate": 1.0944661137780804e-05, "loss": 0.5596290230751038, "step": 6213 }, { "epoch": 0.9932070646527611, "grad_norm": 1.4832587579155583, "learning_rate": 1.0942030263897607e-05, "loss": 0.5348691940307617, "step": 6214 }, { "epoch": 0.9933668984256373, "grad_norm": 1.12860785518181, "learning_rate": 1.0939399324226358e-05, "loss": 0.6838970184326172, "step": 6215 }, { "epoch": 0.9935267321985135, "grad_norm": 1.1221558337662874, "learning_rate": 1.0936768318950801e-05, "loss": 0.5796762704849243, "step": 6216 }, { "epoch": 0.9936865659713897, "grad_norm": 1.3861623992005445, "learning_rate": 1.0934137248254666e-05, "loss": 0.3829338252544403, "step": 6217 }, { "epoch": 0.9938463997442659, "grad_norm": 1.210583589353039, "learning_rate": 1.0931506112321709e-05, "loss": 0.607399582862854, "step": 6218 }, { "epoch": 0.9940062335171421, "grad_norm": 1.3471327064898373, "learning_rate": 1.0928874911335669e-05, "loss": 0.6238508224487305, "step": 6219 }, { "epoch": 0.9941660672900183, "grad_norm": 1.4417175957374724, "learning_rate": 1.0926243645480302e-05, "loss": 0.659553587436676, "step": 6220 }, { "epoch": 0.9943259010628945, "grad_norm": 1.673561549599672, "learning_rate": 1.0923612314939368e-05, "loss": 0.6390873193740845, "step": 6221 }, { "epoch": 0.9944857348357709, "grad_norm": 1.2794618902144497, "learning_rate": 1.092098091989663e-05, "loss": 0.627718448638916, "step": 6222 }, { "epoch": 0.9946455686086471, "grad_norm": 1.3907095231184334, "learning_rate": 1.0918349460535858e-05, "loss": 0.6687479019165039, "step": 6223 }, { "epoch": 0.9948054023815233, "grad_norm": 1.3500875439934024, "learning_rate": 1.0915717937040817e-05, "loss": 0.5907371044158936, "step": 6224 }, { "epoch": 0.9949652361543995, "grad_norm": 1.260118630403952, "learning_rate": 1.0913086349595286e-05, "loss": 0.45643019676208496, "step": 6225 }, { "epoch": 0.9951250699272757, "grad_norm": 1.3056335241663781, "learning_rate": 1.0910454698383048e-05, "loss": 0.5236520767211914, "step": 6226 }, { "epoch": 0.9952849037001519, "grad_norm": 1.3877216469947202, "learning_rate": 1.0907822983587888e-05, "loss": 0.5267136693000793, "step": 6227 }, { "epoch": 0.9954447374730281, "grad_norm": 1.2141204130943972, "learning_rate": 1.0905191205393593e-05, "loss": 0.535110592842102, "step": 6228 }, { "epoch": 0.9956045712459043, "grad_norm": 1.2857485078939148, "learning_rate": 1.090255936398396e-05, "loss": 0.5634952783584595, "step": 6229 }, { "epoch": 0.9957644050187805, "grad_norm": 1.3530267212263276, "learning_rate": 1.0899927459542783e-05, "loss": 0.7044204473495483, "step": 6230 }, { "epoch": 0.9959242387916567, "grad_norm": 1.5964005242014399, "learning_rate": 1.089729549225387e-05, "loss": 0.6786022186279297, "step": 6231 }, { "epoch": 0.9960840725645329, "grad_norm": 1.2218586743857167, "learning_rate": 1.0894663462301023e-05, "loss": 0.6217758655548096, "step": 6232 }, { "epoch": 0.9962439063374091, "grad_norm": 1.509836082210471, "learning_rate": 1.089203136986806e-05, "loss": 0.6022987365722656, "step": 6233 }, { "epoch": 0.9964037401102853, "grad_norm": 1.288894884302835, "learning_rate": 1.088939921513879e-05, "loss": 0.49564915895462036, "step": 6234 }, { "epoch": 0.9965635738831615, "grad_norm": 1.4227644116918656, "learning_rate": 1.0886766998297039e-05, "loss": 0.665274977684021, "step": 6235 }, { "epoch": 0.9967234076560377, "grad_norm": 1.3486942549607561, "learning_rate": 1.0884134719526629e-05, "loss": 0.6678875088691711, "step": 6236 }, { "epoch": 0.9968832414289139, "grad_norm": 1.2834416582573456, "learning_rate": 1.0881502379011389e-05, "loss": 0.6742025017738342, "step": 6237 }, { "epoch": 0.9970430752017901, "grad_norm": 1.3026897341021972, "learning_rate": 1.0878869976935154e-05, "loss": 0.6463541388511658, "step": 6238 }, { "epoch": 0.9972029089746663, "grad_norm": 2.1459382075323514, "learning_rate": 1.087623751348176e-05, "loss": 0.6933959126472473, "step": 6239 }, { "epoch": 0.9973627427475426, "grad_norm": 1.5034799260226794, "learning_rate": 1.087360498883505e-05, "loss": 0.5981336832046509, "step": 6240 }, { "epoch": 0.9975225765204188, "grad_norm": 1.23063377501357, "learning_rate": 1.087097240317887e-05, "loss": 0.5483222007751465, "step": 6241 }, { "epoch": 0.997682410293295, "grad_norm": 1.3639864695303063, "learning_rate": 1.0868339756697065e-05, "loss": 0.5667845606803894, "step": 6242 }, { "epoch": 0.9978422440661712, "grad_norm": 1.5349774552644382, "learning_rate": 1.0865707049573502e-05, "loss": 0.6226719617843628, "step": 6243 }, { "epoch": 0.9980020778390474, "grad_norm": 1.3426970636360738, "learning_rate": 1.0863074281992028e-05, "loss": 0.5090694427490234, "step": 6244 }, { "epoch": 0.9981619116119236, "grad_norm": 1.4170041218010572, "learning_rate": 1.086044145413651e-05, "loss": 0.6752749681472778, "step": 6245 }, { "epoch": 0.9983217453847998, "grad_norm": 1.3557858926172637, "learning_rate": 1.085780856619082e-05, "loss": 0.6313378810882568, "step": 6246 }, { "epoch": 0.998481579157676, "grad_norm": 1.384303280387971, "learning_rate": 1.0855175618338823e-05, "loss": 0.6164373159408569, "step": 6247 }, { "epoch": 0.9986414129305522, "grad_norm": 1.3987835299625089, "learning_rate": 1.0852542610764399e-05, "loss": 0.6689578294754028, "step": 6248 }, { "epoch": 0.9988012467034284, "grad_norm": 1.1732323133111582, "learning_rate": 1.0849909543651423e-05, "loss": 0.5117040872573853, "step": 6249 }, { "epoch": 0.9989610804763046, "grad_norm": 1.4707638890470491, "learning_rate": 1.0847276417183786e-05, "loss": 0.6505225300788879, "step": 6250 }, { "epoch": 0.9991209142491808, "grad_norm": 1.4003980711947714, "learning_rate": 1.084464323154537e-05, "loss": 0.5925883054733276, "step": 6251 }, { "epoch": 0.999280748022057, "grad_norm": 1.5581404704555324, "learning_rate": 1.084200998692007e-05, "loss": 0.5874848365783691, "step": 6252 }, { "epoch": 0.9994405817949332, "grad_norm": 1.3359469829049875, "learning_rate": 1.0839376683491784e-05, "loss": 0.5531243085861206, "step": 6253 }, { "epoch": 0.9996004155678094, "grad_norm": 1.3901742607723595, "learning_rate": 1.083674332144441e-05, "loss": 0.6753431558609009, "step": 6254 }, { "epoch": 0.9997602493406856, "grad_norm": 1.6512236858956704, "learning_rate": 1.0834109900961851e-05, "loss": 0.7253162264823914, "step": 6255 }, { "epoch": 0.9999200831135618, "grad_norm": 1.1871272954158603, "learning_rate": 1.0831476422228022e-05, "loss": 0.4651700556278229, "step": 6256 }, { "epoch": 1.0, "grad_norm": 1.9466201211831171, "learning_rate": 1.0828842885426828e-05, "loss": 0.5753037929534912, "step": 6257 }, { "epoch": 1.0001598337728763, "grad_norm": 1.2757657470060533, "learning_rate": 1.0826209290742195e-05, "loss": 0.4912870526313782, "step": 6258 }, { "epoch": 1.0003196675457524, "grad_norm": 1.1496747927006221, "learning_rate": 1.0823575638358035e-05, "loss": 0.4938851296901703, "step": 6259 }, { "epoch": 1.0004795013186287, "grad_norm": 1.6898441800502166, "learning_rate": 1.082094192845828e-05, "loss": 0.6045573353767395, "step": 6260 }, { "epoch": 1.0006393350915048, "grad_norm": 0.9633512839369148, "learning_rate": 1.0818308161226853e-05, "loss": 0.4539474844932556, "step": 6261 }, { "epoch": 1.0007991688643811, "grad_norm": 1.3200538367984043, "learning_rate": 1.081567433684769e-05, "loss": 0.5650149583816528, "step": 6262 }, { "epoch": 1.0009590026372572, "grad_norm": 1.2832204103773368, "learning_rate": 1.0813040455504728e-05, "loss": 0.5698882341384888, "step": 6263 }, { "epoch": 1.0011188364101336, "grad_norm": 1.2134137259901707, "learning_rate": 1.0810406517381909e-05, "loss": 0.5688674449920654, "step": 6264 }, { "epoch": 1.0012786701830096, "grad_norm": 1.267823736319411, "learning_rate": 1.0807772522663173e-05, "loss": 0.5293726921081543, "step": 6265 }, { "epoch": 1.001438503955886, "grad_norm": 1.4298980551852079, "learning_rate": 1.0805138471532474e-05, "loss": 0.6167427897453308, "step": 6266 }, { "epoch": 1.001598337728762, "grad_norm": 1.093904445584348, "learning_rate": 1.0802504364173763e-05, "loss": 0.6688896417617798, "step": 6267 }, { "epoch": 1.0017581715016384, "grad_norm": 1.436407736360304, "learning_rate": 1.0799870200770997e-05, "loss": 0.5598130226135254, "step": 6268 }, { "epoch": 1.0019180052745145, "grad_norm": 1.3218043533359145, "learning_rate": 1.0797235981508136e-05, "loss": 0.5103017091751099, "step": 6269 }, { "epoch": 1.0020778390473908, "grad_norm": 1.4938048154869132, "learning_rate": 1.0794601706569146e-05, "loss": 0.5302343368530273, "step": 6270 }, { "epoch": 1.0022376728202669, "grad_norm": 1.4808417208418219, "learning_rate": 1.0791967376137993e-05, "loss": 0.6087052822113037, "step": 6271 }, { "epoch": 1.0023975065931432, "grad_norm": 1.1889783801119056, "learning_rate": 1.0789332990398652e-05, "loss": 0.4640199542045593, "step": 6272 }, { "epoch": 1.0025573403660193, "grad_norm": 1.219079449103205, "learning_rate": 1.0786698549535095e-05, "loss": 0.5433422327041626, "step": 6273 }, { "epoch": 1.0027171741388956, "grad_norm": 1.3696228030773858, "learning_rate": 1.0784064053731307e-05, "loss": 0.6085524559020996, "step": 6274 }, { "epoch": 1.0028770079117717, "grad_norm": 1.4366186142246369, "learning_rate": 1.0781429503171266e-05, "loss": 0.5752010941505432, "step": 6275 }, { "epoch": 1.003036841684648, "grad_norm": 1.4136915113826907, "learning_rate": 1.0778794898038967e-05, "loss": 0.5850238800048828, "step": 6276 }, { "epoch": 1.0031966754575241, "grad_norm": 1.413270693701913, "learning_rate": 1.0776160238518395e-05, "loss": 0.6114829778671265, "step": 6277 }, { "epoch": 1.0033565092304004, "grad_norm": 1.426625942212582, "learning_rate": 1.0773525524793551e-05, "loss": 0.5650456547737122, "step": 6278 }, { "epoch": 1.0035163430032765, "grad_norm": 1.4852727241549317, "learning_rate": 1.0770890757048429e-05, "loss": 0.5119244456291199, "step": 6279 }, { "epoch": 1.0036761767761528, "grad_norm": 1.4434970916567373, "learning_rate": 1.0768255935467034e-05, "loss": 0.5487409830093384, "step": 6280 }, { "epoch": 1.003836010549029, "grad_norm": 1.4711937618589508, "learning_rate": 1.0765621060233372e-05, "loss": 0.5887235999107361, "step": 6281 }, { "epoch": 1.0039958443219053, "grad_norm": 1.2582575758915049, "learning_rate": 1.0762986131531454e-05, "loss": 0.6003811359405518, "step": 6282 }, { "epoch": 1.0041556780947813, "grad_norm": 1.3450692684874654, "learning_rate": 1.0760351149545295e-05, "loss": 0.5538383722305298, "step": 6283 }, { "epoch": 1.0043155118676577, "grad_norm": 1.657408648580902, "learning_rate": 1.0757716114458909e-05, "loss": 0.7717470526695251, "step": 6284 }, { "epoch": 1.0044753456405338, "grad_norm": 1.3624858875174537, "learning_rate": 1.075508102645632e-05, "loss": 0.5727131962776184, "step": 6285 }, { "epoch": 1.00463517941341, "grad_norm": 1.3969994150025997, "learning_rate": 1.0752445885721557e-05, "loss": 0.5579670667648315, "step": 6286 }, { "epoch": 1.0047950131862862, "grad_norm": 1.3033937140175256, "learning_rate": 1.074981069243864e-05, "loss": 0.5643517374992371, "step": 6287 }, { "epoch": 1.0049548469591625, "grad_norm": 1.3425434183276355, "learning_rate": 1.0747175446791612e-05, "loss": 0.5524148344993591, "step": 6288 }, { "epoch": 1.0051146807320386, "grad_norm": 1.2768161091541486, "learning_rate": 1.07445401489645e-05, "loss": 0.5179991126060486, "step": 6289 }, { "epoch": 1.005274514504915, "grad_norm": 1.4756727550844921, "learning_rate": 1.074190479914135e-05, "loss": 0.543083131313324, "step": 6290 }, { "epoch": 1.005434348277791, "grad_norm": 1.3126211273425148, "learning_rate": 1.07392693975062e-05, "loss": 0.5047417283058167, "step": 6291 }, { "epoch": 1.0055941820506673, "grad_norm": 1.3491982455229565, "learning_rate": 1.0736633944243104e-05, "loss": 0.602623701095581, "step": 6292 }, { "epoch": 1.0057540158235436, "grad_norm": 1.5726059536239976, "learning_rate": 1.0733998439536108e-05, "loss": 0.540052592754364, "step": 6293 }, { "epoch": 1.0059138495964197, "grad_norm": 1.3380081005664974, "learning_rate": 1.0731362883569266e-05, "loss": 0.520073413848877, "step": 6294 }, { "epoch": 1.006073683369296, "grad_norm": 1.7009772220738586, "learning_rate": 1.0728727276526637e-05, "loss": 0.7350368499755859, "step": 6295 }, { "epoch": 1.0062335171421721, "grad_norm": 1.3896013948584371, "learning_rate": 1.072609161859228e-05, "loss": 0.6566444635391235, "step": 6296 }, { "epoch": 1.0063933509150484, "grad_norm": 1.255621223712628, "learning_rate": 1.0723455909950267e-05, "loss": 0.6026461720466614, "step": 6297 }, { "epoch": 1.0065531846879245, "grad_norm": 1.4765731986616604, "learning_rate": 1.072082015078466e-05, "loss": 0.609266996383667, "step": 6298 }, { "epoch": 1.0067130184608009, "grad_norm": 2.1736021122494487, "learning_rate": 1.0718184341279534e-05, "loss": 0.5184500217437744, "step": 6299 }, { "epoch": 1.006872852233677, "grad_norm": 1.4634993624267822, "learning_rate": 1.0715548481618965e-05, "loss": 0.5323348045349121, "step": 6300 }, { "epoch": 1.0070326860065533, "grad_norm": 1.2922265658916114, "learning_rate": 1.071291257198703e-05, "loss": 0.5434277057647705, "step": 6301 }, { "epoch": 1.0071925197794294, "grad_norm": 1.63618383888463, "learning_rate": 1.0710276612567811e-05, "loss": 0.5289901494979858, "step": 6302 }, { "epoch": 1.0073523535523057, "grad_norm": 1.2140412016644806, "learning_rate": 1.0707640603545397e-05, "loss": 0.5450276136398315, "step": 6303 }, { "epoch": 1.0075121873251818, "grad_norm": 1.3376475626895834, "learning_rate": 1.0705004545103875e-05, "loss": 0.4668545722961426, "step": 6304 }, { "epoch": 1.007672021098058, "grad_norm": 1.2512748786737522, "learning_rate": 1.0702368437427337e-05, "loss": 0.4882703423500061, "step": 6305 }, { "epoch": 1.0078318548709342, "grad_norm": 1.5800578608530507, "learning_rate": 1.0699732280699881e-05, "loss": 0.659075140953064, "step": 6306 }, { "epoch": 1.0079916886438105, "grad_norm": 1.278556791486217, "learning_rate": 1.069709607510561e-05, "loss": 0.5423593521118164, "step": 6307 }, { "epoch": 1.0081515224166866, "grad_norm": 1.5009718467016813, "learning_rate": 1.0694459820828626e-05, "loss": 0.48475804924964905, "step": 6308 }, { "epoch": 1.008311356189563, "grad_norm": 1.3129551325912425, "learning_rate": 1.0691823518053031e-05, "loss": 0.6325443983078003, "step": 6309 }, { "epoch": 1.008471189962439, "grad_norm": 1.1571720395895817, "learning_rate": 1.0689187166962937e-05, "loss": 0.5321831107139587, "step": 6310 }, { "epoch": 1.0086310237353153, "grad_norm": 1.9893915149833021, "learning_rate": 1.0686550767742461e-05, "loss": 0.6958268880844116, "step": 6311 }, { "epoch": 1.0087908575081914, "grad_norm": 1.1908724027439626, "learning_rate": 1.0683914320575717e-05, "loss": 0.5142714977264404, "step": 6312 }, { "epoch": 1.0089506912810677, "grad_norm": 1.10530785688127, "learning_rate": 1.0681277825646823e-05, "loss": 0.48843908309936523, "step": 6313 }, { "epoch": 1.0091105250539438, "grad_norm": 1.281010220259775, "learning_rate": 1.0678641283139907e-05, "loss": 0.6191312074661255, "step": 6314 }, { "epoch": 1.0092703588268201, "grad_norm": 1.5795071705869212, "learning_rate": 1.0676004693239091e-05, "loss": 0.5080801844596863, "step": 6315 }, { "epoch": 1.0094301925996962, "grad_norm": 1.3516512422065066, "learning_rate": 1.0673368056128509e-05, "loss": 0.4511057138442993, "step": 6316 }, { "epoch": 1.0095900263725726, "grad_norm": 1.4202310590749703, "learning_rate": 1.0670731371992294e-05, "loss": 0.5082317590713501, "step": 6317 }, { "epoch": 1.0097498601454487, "grad_norm": 1.2097654610760498, "learning_rate": 1.066809464101458e-05, "loss": 0.5435053706169128, "step": 6318 }, { "epoch": 1.009909693918325, "grad_norm": 1.4487854058688465, "learning_rate": 1.066545786337951e-05, "loss": 0.6195658445358276, "step": 6319 }, { "epoch": 1.010069527691201, "grad_norm": 1.3754858995940453, "learning_rate": 1.0662821039271225e-05, "loss": 0.5642558336257935, "step": 6320 }, { "epoch": 1.0102293614640774, "grad_norm": 1.5018780091225374, "learning_rate": 1.0660184168873871e-05, "loss": 0.49540799856185913, "step": 6321 }, { "epoch": 1.0103891952369535, "grad_norm": 1.6715523929489986, "learning_rate": 1.0657547252371601e-05, "loss": 0.6720250844955444, "step": 6322 }, { "epoch": 1.0105490290098298, "grad_norm": 1.2400096225564228, "learning_rate": 1.0654910289948564e-05, "loss": 0.5183400511741638, "step": 6323 }, { "epoch": 1.0107088627827059, "grad_norm": 1.336954749896376, "learning_rate": 1.065227328178892e-05, "loss": 0.6588212251663208, "step": 6324 }, { "epoch": 1.0108686965555822, "grad_norm": 1.483431871492949, "learning_rate": 1.0649636228076824e-05, "loss": 0.5362921953201294, "step": 6325 }, { "epoch": 1.0110285303284583, "grad_norm": 1.3615200940231034, "learning_rate": 1.0646999128996435e-05, "loss": 0.5878104567527771, "step": 6326 }, { "epoch": 1.0111883641013346, "grad_norm": 1.393323100920226, "learning_rate": 1.0644361984731932e-05, "loss": 0.5997945070266724, "step": 6327 }, { "epoch": 1.011348197874211, "grad_norm": 1.419441953612203, "learning_rate": 1.0641724795467474e-05, "loss": 0.5889352560043335, "step": 6328 }, { "epoch": 1.011508031647087, "grad_norm": 1.41747587560168, "learning_rate": 1.0639087561387236e-05, "loss": 0.590257465839386, "step": 6329 }, { "epoch": 1.0116678654199633, "grad_norm": 1.4053091832751852, "learning_rate": 1.063645028267539e-05, "loss": 0.5758427977561951, "step": 6330 }, { "epoch": 1.0118276991928394, "grad_norm": 1.3455379099276972, "learning_rate": 1.0633812959516116e-05, "loss": 0.6904070377349854, "step": 6331 }, { "epoch": 1.0119875329657158, "grad_norm": 1.4445980439261132, "learning_rate": 1.0631175592093598e-05, "loss": 0.6517212390899658, "step": 6332 }, { "epoch": 1.0121473667385918, "grad_norm": 1.5116428687146053, "learning_rate": 1.0628538180592014e-05, "loss": 0.5864771008491516, "step": 6333 }, { "epoch": 1.0123072005114682, "grad_norm": 1.7436836103786537, "learning_rate": 1.062590072519556e-05, "loss": 0.5634647607803345, "step": 6334 }, { "epoch": 1.0124670342843443, "grad_norm": 1.1983620013956602, "learning_rate": 1.062326322608842e-05, "loss": 0.47786515951156616, "step": 6335 }, { "epoch": 1.0126268680572206, "grad_norm": 1.478144687413207, "learning_rate": 1.0620625683454786e-05, "loss": 0.7093689441680908, "step": 6336 }, { "epoch": 1.0127867018300967, "grad_norm": 1.3441791031477044, "learning_rate": 1.061798809747886e-05, "loss": 0.5453428030014038, "step": 6337 }, { "epoch": 1.012946535602973, "grad_norm": 1.356060222132062, "learning_rate": 1.061535046834484e-05, "loss": 0.5101192593574524, "step": 6338 }, { "epoch": 1.013106369375849, "grad_norm": 1.4258431991952891, "learning_rate": 1.0612712796236927e-05, "loss": 0.4526989459991455, "step": 6339 }, { "epoch": 1.0132662031487254, "grad_norm": 1.655927128305627, "learning_rate": 1.0610075081339332e-05, "loss": 0.5463411808013916, "step": 6340 }, { "epoch": 1.0134260369216015, "grad_norm": 1.646585833494924, "learning_rate": 1.0607437323836256e-05, "loss": 0.5276801586151123, "step": 6341 }, { "epoch": 1.0135858706944778, "grad_norm": 1.5414666890436264, "learning_rate": 1.0604799523911915e-05, "loss": 0.6738910675048828, "step": 6342 }, { "epoch": 1.013745704467354, "grad_norm": 1.2227656932360942, "learning_rate": 1.0602161681750522e-05, "loss": 0.4346553087234497, "step": 6343 }, { "epoch": 1.0139055382402302, "grad_norm": 1.254859542532985, "learning_rate": 1.0599523797536298e-05, "loss": 0.4374135732650757, "step": 6344 }, { "epoch": 1.0140653720131063, "grad_norm": 1.5025947611190278, "learning_rate": 1.0596885871453457e-05, "loss": 0.47171902656555176, "step": 6345 }, { "epoch": 1.0142252057859826, "grad_norm": 1.6090737900430718, "learning_rate": 1.0594247903686225e-05, "loss": 0.5450018644332886, "step": 6346 }, { "epoch": 1.0143850395588587, "grad_norm": 1.4777323826221358, "learning_rate": 1.0591609894418835e-05, "loss": 0.601615309715271, "step": 6347 }, { "epoch": 1.014544873331735, "grad_norm": 1.41458866448883, "learning_rate": 1.0588971843835507e-05, "loss": 0.6403154730796814, "step": 6348 }, { "epoch": 1.0147047071046111, "grad_norm": 1.2628597959898256, "learning_rate": 1.0586333752120479e-05, "loss": 0.4495726525783539, "step": 6349 }, { "epoch": 1.0148645408774875, "grad_norm": 1.5580717005997913, "learning_rate": 1.0583695619457983e-05, "loss": 0.5963699817657471, "step": 6350 }, { "epoch": 1.0150243746503635, "grad_norm": 1.67578025162197, "learning_rate": 1.0581057446032254e-05, "loss": 0.6362361907958984, "step": 6351 }, { "epoch": 1.0151842084232399, "grad_norm": 1.528174389244368, "learning_rate": 1.0578419232027541e-05, "loss": 0.5789356231689453, "step": 6352 }, { "epoch": 1.015344042196116, "grad_norm": 1.5255756846454382, "learning_rate": 1.057578097762808e-05, "loss": 0.6540530920028687, "step": 6353 }, { "epoch": 1.0155038759689923, "grad_norm": 1.4409618773369595, "learning_rate": 1.0573142683018123e-05, "loss": 0.7037988305091858, "step": 6354 }, { "epoch": 1.0156637097418684, "grad_norm": 1.549491101117447, "learning_rate": 1.0570504348381911e-05, "loss": 0.49347037076950073, "step": 6355 }, { "epoch": 1.0158235435147447, "grad_norm": 1.3864753364399047, "learning_rate": 1.0567865973903704e-05, "loss": 0.6401690244674683, "step": 6356 }, { "epoch": 1.0159833772876208, "grad_norm": 1.5111962645345065, "learning_rate": 1.0565227559767755e-05, "loss": 0.45891982316970825, "step": 6357 }, { "epoch": 1.016143211060497, "grad_norm": 1.6353243212935236, "learning_rate": 1.0562589106158317e-05, "loss": 0.5089789032936096, "step": 6358 }, { "epoch": 1.0163030448333732, "grad_norm": 1.3388189243684514, "learning_rate": 1.0559950613259655e-05, "loss": 0.5741634368896484, "step": 6359 }, { "epoch": 1.0164628786062495, "grad_norm": 1.6204429064661887, "learning_rate": 1.055731208125603e-05, "loss": 0.5820925831794739, "step": 6360 }, { "epoch": 1.0166227123791256, "grad_norm": 1.4611860232974638, "learning_rate": 1.055467351033171e-05, "loss": 0.5553723573684692, "step": 6361 }, { "epoch": 1.016782546152002, "grad_norm": 1.3574897106022425, "learning_rate": 1.055203490067096e-05, "loss": 0.6914899349212646, "step": 6362 }, { "epoch": 1.016942379924878, "grad_norm": 1.3079716226049687, "learning_rate": 1.0549396252458053e-05, "loss": 0.5181155204772949, "step": 6363 }, { "epoch": 1.0171022136977543, "grad_norm": 1.5276931459144134, "learning_rate": 1.0546757565877263e-05, "loss": 0.56253981590271, "step": 6364 }, { "epoch": 1.0172620474706306, "grad_norm": 1.3915015713942327, "learning_rate": 1.0544118841112865e-05, "loss": 0.6168774366378784, "step": 6365 }, { "epoch": 1.0174218812435067, "grad_norm": 1.4203615297367655, "learning_rate": 1.0541480078349137e-05, "loss": 0.6632140874862671, "step": 6366 }, { "epoch": 1.017581715016383, "grad_norm": 1.8944617026935309, "learning_rate": 1.053884127777037e-05, "loss": 0.5073263645172119, "step": 6367 }, { "epoch": 1.0177415487892592, "grad_norm": 1.4763230092668103, "learning_rate": 1.0536202439560834e-05, "loss": 0.528716504573822, "step": 6368 }, { "epoch": 1.0179013825621355, "grad_norm": 2.291373039902806, "learning_rate": 1.0533563563904828e-05, "loss": 0.5985743999481201, "step": 6369 }, { "epoch": 1.0180612163350116, "grad_norm": 1.200981813706042, "learning_rate": 1.0530924650986633e-05, "loss": 0.5049870014190674, "step": 6370 }, { "epoch": 1.0182210501078879, "grad_norm": 1.380280620011788, "learning_rate": 1.052828570099055e-05, "loss": 0.4654086232185364, "step": 6371 }, { "epoch": 1.018380883880764, "grad_norm": 1.3414561358331483, "learning_rate": 1.052564671410087e-05, "loss": 0.6327410936355591, "step": 6372 }, { "epoch": 1.0185407176536403, "grad_norm": 1.358689621206948, "learning_rate": 1.0523007690501885e-05, "loss": 0.442621648311615, "step": 6373 }, { "epoch": 1.0187005514265164, "grad_norm": 2.1371339114834633, "learning_rate": 1.0520368630377904e-05, "loss": 0.5109648108482361, "step": 6374 }, { "epoch": 1.0188603851993927, "grad_norm": 1.4994577097621336, "learning_rate": 1.0517729533913224e-05, "loss": 0.708330512046814, "step": 6375 }, { "epoch": 1.0190202189722688, "grad_norm": 1.4302246026742227, "learning_rate": 1.0515090401292152e-05, "loss": 0.6287922263145447, "step": 6376 }, { "epoch": 1.0191800527451451, "grad_norm": 1.362922677650096, "learning_rate": 1.0512451232698994e-05, "loss": 0.6076197624206543, "step": 6377 }, { "epoch": 1.0193398865180212, "grad_norm": 2.673047875053158, "learning_rate": 1.0509812028318063e-05, "loss": 0.5311601758003235, "step": 6378 }, { "epoch": 1.0194997202908975, "grad_norm": 1.4375660451741075, "learning_rate": 1.050717278833367e-05, "loss": 0.5774986743927002, "step": 6379 }, { "epoch": 1.0196595540637736, "grad_norm": 1.4453824155194985, "learning_rate": 1.0504533512930132e-05, "loss": 0.5374851822853088, "step": 6380 }, { "epoch": 1.01981938783665, "grad_norm": 1.563675336905256, "learning_rate": 1.0501894202291763e-05, "loss": 0.6122904419898987, "step": 6381 }, { "epoch": 1.019979221609526, "grad_norm": 1.4378891097219066, "learning_rate": 1.0499254856602885e-05, "loss": 0.5672837495803833, "step": 6382 }, { "epoch": 1.0201390553824023, "grad_norm": 1.304792113807637, "learning_rate": 1.049661547604782e-05, "loss": 0.5178512334823608, "step": 6383 }, { "epoch": 1.0202988891552784, "grad_norm": 1.4344034067151026, "learning_rate": 1.0493976060810895e-05, "loss": 0.5892807245254517, "step": 6384 }, { "epoch": 1.0204587229281548, "grad_norm": 1.45205143286735, "learning_rate": 1.0491336611076435e-05, "loss": 0.5185755491256714, "step": 6385 }, { "epoch": 1.0206185567010309, "grad_norm": 1.3683525816550373, "learning_rate": 1.048869712702877e-05, "loss": 0.5992652773857117, "step": 6386 }, { "epoch": 1.0207783904739072, "grad_norm": 1.5511191448597421, "learning_rate": 1.0486057608852236e-05, "loss": 0.526658833026886, "step": 6387 }, { "epoch": 1.0209382242467833, "grad_norm": 1.592199919890858, "learning_rate": 1.0483418056731161e-05, "loss": 0.55305016040802, "step": 6388 }, { "epoch": 1.0210980580196596, "grad_norm": 1.4443849350716784, "learning_rate": 1.0480778470849885e-05, "loss": 0.5388757586479187, "step": 6389 }, { "epoch": 1.0212578917925357, "grad_norm": 1.2027551191357744, "learning_rate": 1.0478138851392749e-05, "loss": 0.5368421077728271, "step": 6390 }, { "epoch": 1.021417725565412, "grad_norm": 2.3178282599734823, "learning_rate": 1.0475499198544096e-05, "loss": 0.5289212465286255, "step": 6391 }, { "epoch": 1.021577559338288, "grad_norm": 1.3922773764493557, "learning_rate": 1.0472859512488264e-05, "loss": 0.5050119757652283, "step": 6392 }, { "epoch": 1.0217373931111644, "grad_norm": 1.436025097613825, "learning_rate": 1.0470219793409606e-05, "loss": 0.46351760625839233, "step": 6393 }, { "epoch": 1.0218972268840405, "grad_norm": 1.26110226898612, "learning_rate": 1.0467580041492464e-05, "loss": 0.4313037693500519, "step": 6394 }, { "epoch": 1.0220570606569168, "grad_norm": 1.6246167152732383, "learning_rate": 1.0464940256921195e-05, "loss": 0.5031249523162842, "step": 6395 }, { "epoch": 1.022216894429793, "grad_norm": 1.5295601513546342, "learning_rate": 1.0462300439880144e-05, "loss": 0.5245861411094666, "step": 6396 }, { "epoch": 1.0223767282026692, "grad_norm": 1.4957834344020855, "learning_rate": 1.0459660590553678e-05, "loss": 0.5206825733184814, "step": 6397 }, { "epoch": 1.0225365619755453, "grad_norm": 1.4823795554821615, "learning_rate": 1.0457020709126144e-05, "loss": 0.5058096647262573, "step": 6398 }, { "epoch": 1.0226963957484216, "grad_norm": 1.3445074761228597, "learning_rate": 1.0454380795781909e-05, "loss": 0.5396617650985718, "step": 6399 }, { "epoch": 1.022856229521298, "grad_norm": 1.404069484820586, "learning_rate": 1.0451740850705333e-05, "loss": 0.48094069957733154, "step": 6400 }, { "epoch": 1.023016063294174, "grad_norm": 1.6211966236036135, "learning_rate": 1.0449100874080777e-05, "loss": 0.5799289345741272, "step": 6401 }, { "epoch": 1.0231758970670504, "grad_norm": 1.4384574954418865, "learning_rate": 1.0446460866092613e-05, "loss": 0.502835214138031, "step": 6402 }, { "epoch": 1.0233357308399265, "grad_norm": 1.247734122697945, "learning_rate": 1.0443820826925208e-05, "loss": 0.5494664311408997, "step": 6403 }, { "epoch": 1.0234955646128028, "grad_norm": 1.3004873865459436, "learning_rate": 1.0441180756762931e-05, "loss": 0.5926706790924072, "step": 6404 }, { "epoch": 1.0236553983856789, "grad_norm": 1.4977966428657508, "learning_rate": 1.0438540655790156e-05, "loss": 0.5129671096801758, "step": 6405 }, { "epoch": 1.0238152321585552, "grad_norm": 1.3143151350276001, "learning_rate": 1.043590052419126e-05, "loss": 0.5640916228294373, "step": 6406 }, { "epoch": 1.0239750659314313, "grad_norm": 1.4060808396619782, "learning_rate": 1.0433260362150618e-05, "loss": 0.5948761701583862, "step": 6407 }, { "epoch": 1.0241348997043076, "grad_norm": 1.5926088659825604, "learning_rate": 1.0430620169852613e-05, "loss": 0.5131572484970093, "step": 6408 }, { "epoch": 1.0242947334771837, "grad_norm": 1.4007212178946145, "learning_rate": 1.0427979947481624e-05, "loss": 0.5267329216003418, "step": 6409 }, { "epoch": 1.02445456725006, "grad_norm": 1.333920237422151, "learning_rate": 1.0425339695222032e-05, "loss": 0.5830267071723938, "step": 6410 }, { "epoch": 1.024614401022936, "grad_norm": 1.203715589276608, "learning_rate": 1.0422699413258229e-05, "loss": 0.5405546426773071, "step": 6411 }, { "epoch": 1.0247742347958124, "grad_norm": 1.4486028078418292, "learning_rate": 1.0420059101774598e-05, "loss": 0.6140168905258179, "step": 6412 }, { "epoch": 1.0249340685686885, "grad_norm": 1.2456119823046505, "learning_rate": 1.0417418760955532e-05, "loss": 0.574141800403595, "step": 6413 }, { "epoch": 1.0250939023415648, "grad_norm": 1.3342075947277299, "learning_rate": 1.0414778390985423e-05, "loss": 0.5690782070159912, "step": 6414 }, { "epoch": 1.025253736114441, "grad_norm": 1.297405525032008, "learning_rate": 1.0412137992048662e-05, "loss": 0.4473280906677246, "step": 6415 }, { "epoch": 1.0254135698873172, "grad_norm": 1.5277834023692076, "learning_rate": 1.0409497564329644e-05, "loss": 0.48757630586624146, "step": 6416 }, { "epoch": 1.0255734036601933, "grad_norm": 1.5542143820782068, "learning_rate": 1.0406857108012773e-05, "loss": 0.6236777305603027, "step": 6417 }, { "epoch": 1.0257332374330697, "grad_norm": 1.4360300847973484, "learning_rate": 1.0404216623282448e-05, "loss": 0.7458382844924927, "step": 6418 }, { "epoch": 1.0258930712059458, "grad_norm": 1.2058652398380092, "learning_rate": 1.0401576110323068e-05, "loss": 0.35117948055267334, "step": 6419 }, { "epoch": 1.026052904978822, "grad_norm": 1.3225254148794183, "learning_rate": 1.039893556931904e-05, "loss": 0.4979146718978882, "step": 6420 }, { "epoch": 1.0262127387516982, "grad_norm": 1.5268968886961563, "learning_rate": 1.0396295000454766e-05, "loss": 0.6358253955841064, "step": 6421 }, { "epoch": 1.0263725725245745, "grad_norm": 1.6201236980737805, "learning_rate": 1.0393654403914657e-05, "loss": 0.6547834873199463, "step": 6422 }, { "epoch": 1.0265324062974506, "grad_norm": 1.4489892431754041, "learning_rate": 1.0391013779883123e-05, "loss": 0.5196316242218018, "step": 6423 }, { "epoch": 1.026692240070327, "grad_norm": 1.3961442699177427, "learning_rate": 1.0388373128544573e-05, "loss": 0.6231788396835327, "step": 6424 }, { "epoch": 1.026852073843203, "grad_norm": 1.1719969799047967, "learning_rate": 1.0385732450083425e-05, "loss": 0.5547080636024475, "step": 6425 }, { "epoch": 1.0270119076160793, "grad_norm": 1.4956453039380677, "learning_rate": 1.038309174468409e-05, "loss": 0.5325032472610474, "step": 6426 }, { "epoch": 1.0271717413889554, "grad_norm": 1.2144684163267747, "learning_rate": 1.038045101253099e-05, "loss": 0.5203404426574707, "step": 6427 }, { "epoch": 1.0273315751618317, "grad_norm": 1.40348512880361, "learning_rate": 1.0377810253808542e-05, "loss": 0.5281050205230713, "step": 6428 }, { "epoch": 1.0274914089347078, "grad_norm": 1.4034900401809582, "learning_rate": 1.0375169468701167e-05, "loss": 0.5796730518341064, "step": 6429 }, { "epoch": 1.0276512427075841, "grad_norm": 1.3678724708815786, "learning_rate": 1.0372528657393288e-05, "loss": 0.49215319752693176, "step": 6430 }, { "epoch": 1.0278110764804602, "grad_norm": 1.388398802462625, "learning_rate": 1.036988782006933e-05, "loss": 0.4467505216598511, "step": 6431 }, { "epoch": 1.0279709102533365, "grad_norm": 1.5165434738315666, "learning_rate": 1.036724695691372e-05, "loss": 0.6597033739089966, "step": 6432 }, { "epoch": 1.0281307440262126, "grad_norm": 1.7403373123591108, "learning_rate": 1.0364606068110889e-05, "loss": 0.7276226282119751, "step": 6433 }, { "epoch": 1.028290577799089, "grad_norm": 1.3363525936089096, "learning_rate": 1.0361965153845262e-05, "loss": 0.6069903373718262, "step": 6434 }, { "epoch": 1.0284504115719653, "grad_norm": 1.8874769659860628, "learning_rate": 1.0359324214301277e-05, "loss": 0.5523284077644348, "step": 6435 }, { "epoch": 1.0286102453448414, "grad_norm": 1.516677545206969, "learning_rate": 1.035668324966336e-05, "loss": 0.61708664894104, "step": 6436 }, { "epoch": 1.0287700791177177, "grad_norm": 1.28698084621469, "learning_rate": 1.0354042260115954e-05, "loss": 0.5881301164627075, "step": 6437 }, { "epoch": 1.0289299128905938, "grad_norm": 1.4890389192515492, "learning_rate": 1.0351401245843493e-05, "loss": 0.5450636148452759, "step": 6438 }, { "epoch": 1.02908974666347, "grad_norm": 1.4344056339898767, "learning_rate": 1.034876020703042e-05, "loss": 0.5646841526031494, "step": 6439 }, { "epoch": 1.0292495804363462, "grad_norm": 1.4242755455335796, "learning_rate": 1.0346119143861168e-05, "loss": 0.5395259857177734, "step": 6440 }, { "epoch": 1.0294094142092225, "grad_norm": 1.1643510382614382, "learning_rate": 1.0343478056520186e-05, "loss": 0.42465460300445557, "step": 6441 }, { "epoch": 1.0295692479820986, "grad_norm": 1.3971148563397322, "learning_rate": 1.0340836945191917e-05, "loss": 0.5800672173500061, "step": 6442 }, { "epoch": 1.029729081754975, "grad_norm": 1.424308724778952, "learning_rate": 1.0338195810060804e-05, "loss": 0.6461203098297119, "step": 6443 }, { "epoch": 1.029888915527851, "grad_norm": 1.4114969377307662, "learning_rate": 1.0335554651311296e-05, "loss": 0.5770639181137085, "step": 6444 }, { "epoch": 1.0300487493007273, "grad_norm": 1.395188611965321, "learning_rate": 1.0332913469127847e-05, "loss": 0.6321660876274109, "step": 6445 }, { "epoch": 1.0302085830736034, "grad_norm": 1.5199839540146929, "learning_rate": 1.0330272263694899e-05, "loss": 0.5948467254638672, "step": 6446 }, { "epoch": 1.0303684168464797, "grad_norm": 1.2954555034416488, "learning_rate": 1.032763103519691e-05, "loss": 0.5309035778045654, "step": 6447 }, { "epoch": 1.0305282506193558, "grad_norm": 1.7351186322852103, "learning_rate": 1.0324989783818333e-05, "loss": 0.5752701759338379, "step": 6448 }, { "epoch": 1.0306880843922321, "grad_norm": 1.852180941584885, "learning_rate": 1.0322348509743622e-05, "loss": 0.6442651152610779, "step": 6449 }, { "epoch": 1.0308479181651082, "grad_norm": 1.5512770826175415, "learning_rate": 1.031970721315724e-05, "loss": 0.5240070223808289, "step": 6450 }, { "epoch": 1.0310077519379846, "grad_norm": 1.388330560747777, "learning_rate": 1.0317065894243639e-05, "loss": 0.4909105896949768, "step": 6451 }, { "epoch": 1.0311675857108606, "grad_norm": 1.334412776419587, "learning_rate": 1.0314424553187282e-05, "loss": 0.622077465057373, "step": 6452 }, { "epoch": 1.031327419483737, "grad_norm": 1.319239244595523, "learning_rate": 1.031178319017263e-05, "loss": 0.45211291313171387, "step": 6453 }, { "epoch": 1.031487253256613, "grad_norm": 1.2873239906044391, "learning_rate": 1.0309141805384146e-05, "loss": 0.6165117025375366, "step": 6454 }, { "epoch": 1.0316470870294894, "grad_norm": 1.4411613610079304, "learning_rate": 1.0306500399006298e-05, "loss": 0.4406382441520691, "step": 6455 }, { "epoch": 1.0318069208023655, "grad_norm": 1.3389462538173373, "learning_rate": 1.0303858971223549e-05, "loss": 0.5599458813667297, "step": 6456 }, { "epoch": 1.0319667545752418, "grad_norm": 1.2631580345182203, "learning_rate": 1.0301217522220368e-05, "loss": 0.4962192475795746, "step": 6457 }, { "epoch": 1.0321265883481179, "grad_norm": 1.5782556848118414, "learning_rate": 1.0298576052181227e-05, "loss": 0.7360862493515015, "step": 6458 }, { "epoch": 1.0322864221209942, "grad_norm": 1.4009137718340836, "learning_rate": 1.0295934561290593e-05, "loss": 0.5366819500923157, "step": 6459 }, { "epoch": 1.0324462558938703, "grad_norm": 1.560042094376845, "learning_rate": 1.029329304973294e-05, "loss": 0.6277663111686707, "step": 6460 }, { "epoch": 1.0326060896667466, "grad_norm": 1.491388970058048, "learning_rate": 1.0290651517692745e-05, "loss": 0.5241970419883728, "step": 6461 }, { "epoch": 1.0327659234396227, "grad_norm": 1.6117504641460558, "learning_rate": 1.0288009965354475e-05, "loss": 0.7012661695480347, "step": 6462 }, { "epoch": 1.032925757212499, "grad_norm": 1.7740943575665105, "learning_rate": 1.0285368392902615e-05, "loss": 0.5527114868164062, "step": 6463 }, { "epoch": 1.0330855909853751, "grad_norm": 1.4598819832028007, "learning_rate": 1.0282726800521638e-05, "loss": 0.6792351603507996, "step": 6464 }, { "epoch": 1.0332454247582514, "grad_norm": 1.4189931912871783, "learning_rate": 1.0280085188396027e-05, "loss": 0.44044798612594604, "step": 6465 }, { "epoch": 1.0334052585311275, "grad_norm": 1.4452471273434042, "learning_rate": 1.0277443556710258e-05, "loss": 0.6028352975845337, "step": 6466 }, { "epoch": 1.0335650923040038, "grad_norm": 1.3844906528412422, "learning_rate": 1.0274801905648816e-05, "loss": 0.5900224447250366, "step": 6467 }, { "epoch": 1.03372492607688, "grad_norm": 1.5215425478365943, "learning_rate": 1.0272160235396189e-05, "loss": 0.46497392654418945, "step": 6468 }, { "epoch": 1.0338847598497563, "grad_norm": 1.5701748174955834, "learning_rate": 1.0269518546136854e-05, "loss": 0.597348690032959, "step": 6469 }, { "epoch": 1.0340445936226326, "grad_norm": 1.482843223214662, "learning_rate": 1.0266876838055304e-05, "loss": 0.44794386625289917, "step": 6470 }, { "epoch": 1.0342044273955087, "grad_norm": 1.4297555855764514, "learning_rate": 1.0264235111336023e-05, "loss": 0.5081660151481628, "step": 6471 }, { "epoch": 1.034364261168385, "grad_norm": 1.5374875572260591, "learning_rate": 1.0261593366163496e-05, "loss": 0.5237970948219299, "step": 6472 }, { "epoch": 1.034524094941261, "grad_norm": 1.5797649891249004, "learning_rate": 1.025895160272222e-05, "loss": 0.5938165187835693, "step": 6473 }, { "epoch": 1.0346839287141374, "grad_norm": 1.4166686267311275, "learning_rate": 1.0256309821196682e-05, "loss": 0.5683537125587463, "step": 6474 }, { "epoch": 1.0348437624870135, "grad_norm": 1.464127901450752, "learning_rate": 1.025366802177138e-05, "loss": 0.5677320957183838, "step": 6475 }, { "epoch": 1.0350035962598898, "grad_norm": 1.3320957236343076, "learning_rate": 1.0251026204630799e-05, "loss": 0.5450940132141113, "step": 6476 }, { "epoch": 1.035163430032766, "grad_norm": 1.4293031726933592, "learning_rate": 1.024838436995944e-05, "loss": 0.5359078645706177, "step": 6477 }, { "epoch": 1.0353232638056422, "grad_norm": 1.338977266356727, "learning_rate": 1.0245742517941802e-05, "loss": 0.6202707290649414, "step": 6478 }, { "epoch": 1.0354830975785183, "grad_norm": 1.457682654163346, "learning_rate": 1.0243100648762374e-05, "loss": 0.46268725395202637, "step": 6479 }, { "epoch": 1.0356429313513946, "grad_norm": 1.4568502425001086, "learning_rate": 1.0240458762605665e-05, "loss": 0.5740042924880981, "step": 6480 }, { "epoch": 1.0358027651242707, "grad_norm": 1.6024995244253073, "learning_rate": 1.0237816859656167e-05, "loss": 0.6764950752258301, "step": 6481 }, { "epoch": 1.035962598897147, "grad_norm": 1.3404535579687331, "learning_rate": 1.0235174940098385e-05, "loss": 0.5287992358207703, "step": 6482 }, { "epoch": 1.0361224326700231, "grad_norm": 1.3525163974450713, "learning_rate": 1.0232533004116822e-05, "loss": 0.5525130033493042, "step": 6483 }, { "epoch": 1.0362822664428994, "grad_norm": 1.5902817416128192, "learning_rate": 1.0229891051895978e-05, "loss": 0.5199778079986572, "step": 6484 }, { "epoch": 1.0364421002157755, "grad_norm": 1.498370927037378, "learning_rate": 1.0227249083620358e-05, "loss": 0.5587289333343506, "step": 6485 }, { "epoch": 1.0366019339886519, "grad_norm": 1.4175155566287607, "learning_rate": 1.0224607099474468e-05, "loss": 0.5089398622512817, "step": 6486 }, { "epoch": 1.036761767761528, "grad_norm": 1.2828489693001126, "learning_rate": 1.0221965099642817e-05, "loss": 0.6208990812301636, "step": 6487 }, { "epoch": 1.0369216015344043, "grad_norm": 1.4493433800321482, "learning_rate": 1.0219323084309914e-05, "loss": 0.5252114534378052, "step": 6488 }, { "epoch": 1.0370814353072804, "grad_norm": 1.3156231192312282, "learning_rate": 1.0216681053660262e-05, "loss": 0.46230313181877136, "step": 6489 }, { "epoch": 1.0372412690801567, "grad_norm": 1.6201842570299565, "learning_rate": 1.0214039007878378e-05, "loss": 0.6490373611450195, "step": 6490 }, { "epoch": 1.0374011028530328, "grad_norm": 1.4799118356880834, "learning_rate": 1.0211396947148766e-05, "loss": 0.5472490787506104, "step": 6491 }, { "epoch": 1.037560936625909, "grad_norm": 1.2467286053432254, "learning_rate": 1.0208754871655943e-05, "loss": 0.6307540535926819, "step": 6492 }, { "epoch": 1.0377207703987852, "grad_norm": 1.517850598008346, "learning_rate": 1.0206112781584426e-05, "loss": 0.5891799926757812, "step": 6493 }, { "epoch": 1.0378806041716615, "grad_norm": 1.1652982494563024, "learning_rate": 1.0203470677118717e-05, "loss": 0.4222262501716614, "step": 6494 }, { "epoch": 1.0380404379445376, "grad_norm": 2.801592993020371, "learning_rate": 1.0200828558443342e-05, "loss": 0.5632500052452087, "step": 6495 }, { "epoch": 1.038200271717414, "grad_norm": 1.9698453600148398, "learning_rate": 1.0198186425742812e-05, "loss": 0.5657407641410828, "step": 6496 }, { "epoch": 1.03836010549029, "grad_norm": 1.402133439513952, "learning_rate": 1.0195544279201646e-05, "loss": 0.44687914848327637, "step": 6497 }, { "epoch": 1.0385199392631663, "grad_norm": 1.3359404128948065, "learning_rate": 1.0192902119004364e-05, "loss": 0.5450968146324158, "step": 6498 }, { "epoch": 1.0386797730360424, "grad_norm": 1.3357761149312148, "learning_rate": 1.0190259945335479e-05, "loss": 0.5056923031806946, "step": 6499 }, { "epoch": 1.0388396068089187, "grad_norm": 1.3627205850954502, "learning_rate": 1.018761775837952e-05, "loss": 0.6910403966903687, "step": 6500 }, { "epoch": 1.0389994405817948, "grad_norm": 1.7037100727987327, "learning_rate": 1.0184975558321e-05, "loss": 0.5867180824279785, "step": 6501 }, { "epoch": 1.0391592743546711, "grad_norm": 1.4123625171933707, "learning_rate": 1.0182333345344444e-05, "loss": 0.4859459400177002, "step": 6502 }, { "epoch": 1.0393191081275472, "grad_norm": 1.5298710305396297, "learning_rate": 1.0179691119634373e-05, "loss": 0.7067680358886719, "step": 6503 }, { "epoch": 1.0394789419004236, "grad_norm": 1.319389865412256, "learning_rate": 1.0177048881375316e-05, "loss": 0.46508967876434326, "step": 6504 }, { "epoch": 1.0396387756732999, "grad_norm": 1.3157943493652247, "learning_rate": 1.0174406630751792e-05, "loss": 0.522930920124054, "step": 6505 }, { "epoch": 1.039798609446176, "grad_norm": 1.4412871811516752, "learning_rate": 1.0171764367948325e-05, "loss": 0.5991635322570801, "step": 6506 }, { "epoch": 1.0399584432190523, "grad_norm": 1.6122237248097022, "learning_rate": 1.0169122093149449e-05, "loss": 0.6509220600128174, "step": 6507 }, { "epoch": 1.0401182769919284, "grad_norm": 1.4988377069843892, "learning_rate": 1.0166479806539686e-05, "loss": 0.4992656707763672, "step": 6508 }, { "epoch": 1.0402781107648047, "grad_norm": 1.5430205393442389, "learning_rate": 1.0163837508303562e-05, "loss": 0.5806722044944763, "step": 6509 }, { "epoch": 1.0404379445376808, "grad_norm": 1.537380765509137, "learning_rate": 1.0161195198625611e-05, "loss": 0.576096773147583, "step": 6510 }, { "epoch": 1.040597778310557, "grad_norm": 1.5607716034547277, "learning_rate": 1.0158552877690358e-05, "loss": 0.6197652816772461, "step": 6511 }, { "epoch": 1.0407576120834332, "grad_norm": 1.3220564725404353, "learning_rate": 1.0155910545682334e-05, "loss": 0.5623790621757507, "step": 6512 }, { "epoch": 1.0409174458563095, "grad_norm": 1.3561368681027988, "learning_rate": 1.0153268202786073e-05, "loss": 0.5136899948120117, "step": 6513 }, { "epoch": 1.0410772796291856, "grad_norm": 1.14755662976322, "learning_rate": 1.0150625849186104e-05, "loss": 0.5516858100891113, "step": 6514 }, { "epoch": 1.041237113402062, "grad_norm": 1.6038406865159907, "learning_rate": 1.0147983485066963e-05, "loss": 0.5445330142974854, "step": 6515 }, { "epoch": 1.041396947174938, "grad_norm": 1.3445663643870258, "learning_rate": 1.0145341110613177e-05, "loss": 0.45373472571372986, "step": 6516 }, { "epoch": 1.0415567809478143, "grad_norm": 1.5626837312062434, "learning_rate": 1.0142698726009286e-05, "loss": 0.6151089668273926, "step": 6517 }, { "epoch": 1.0417166147206904, "grad_norm": 1.3790017201639886, "learning_rate": 1.0140056331439822e-05, "loss": 0.4872816801071167, "step": 6518 }, { "epoch": 1.0418764484935668, "grad_norm": 1.6439368834302086, "learning_rate": 1.0137413927089322e-05, "loss": 0.5961742997169495, "step": 6519 }, { "epoch": 1.0420362822664428, "grad_norm": 1.3280351468854366, "learning_rate": 1.0134771513142321e-05, "loss": 0.600476086139679, "step": 6520 }, { "epoch": 1.0421961160393192, "grad_norm": 1.4313247364987944, "learning_rate": 1.0132129089783353e-05, "loss": 0.5396819114685059, "step": 6521 }, { "epoch": 1.0423559498121953, "grad_norm": 1.549424223844832, "learning_rate": 1.0129486657196963e-05, "loss": 0.5973873138427734, "step": 6522 }, { "epoch": 1.0425157835850716, "grad_norm": 1.3029018410134483, "learning_rate": 1.012684421556768e-05, "loss": 0.6088537573814392, "step": 6523 }, { "epoch": 1.0426756173579477, "grad_norm": 1.5415054612199355, "learning_rate": 1.0124201765080053e-05, "loss": 0.6968547105789185, "step": 6524 }, { "epoch": 1.042835451130824, "grad_norm": 1.403401140669459, "learning_rate": 1.0121559305918613e-05, "loss": 0.7843728065490723, "step": 6525 }, { "epoch": 1.0429952849037, "grad_norm": 1.2860770386634295, "learning_rate": 1.01189168382679e-05, "loss": 0.5617129802703857, "step": 6526 }, { "epoch": 1.0431551186765764, "grad_norm": 1.257923871093895, "learning_rate": 1.0116274362312462e-05, "loss": 0.4633273780345917, "step": 6527 }, { "epoch": 1.0433149524494525, "grad_norm": 1.2852348877989888, "learning_rate": 1.0113631878236832e-05, "loss": 0.4169425964355469, "step": 6528 }, { "epoch": 1.0434747862223288, "grad_norm": 1.215749789807049, "learning_rate": 1.0110989386225555e-05, "loss": 0.5586339235305786, "step": 6529 }, { "epoch": 1.043634619995205, "grad_norm": 1.510924315862817, "learning_rate": 1.0108346886463178e-05, "loss": 0.6033746004104614, "step": 6530 }, { "epoch": 1.0437944537680812, "grad_norm": 1.9370482167522776, "learning_rate": 1.0105704379134234e-05, "loss": 0.6032218933105469, "step": 6531 }, { "epoch": 1.0439542875409573, "grad_norm": 1.2045854901017725, "learning_rate": 1.0103061864423276e-05, "loss": 0.5882943272590637, "step": 6532 }, { "epoch": 1.0441141213138336, "grad_norm": 1.5295906448099086, "learning_rate": 1.0100419342514838e-05, "loss": 0.5739771723747253, "step": 6533 }, { "epoch": 1.0442739550867097, "grad_norm": 1.2692818644092188, "learning_rate": 1.0097776813593473e-05, "loss": 0.569589376449585, "step": 6534 }, { "epoch": 1.044433788859586, "grad_norm": 1.3889451345196229, "learning_rate": 1.0095134277843721e-05, "loss": 0.5848093032836914, "step": 6535 }, { "epoch": 1.0445936226324621, "grad_norm": 1.405166196621058, "learning_rate": 1.009249173545013e-05, "loss": 0.5814974308013916, "step": 6536 }, { "epoch": 1.0447534564053385, "grad_norm": 1.351016226184542, "learning_rate": 1.0089849186597245e-05, "loss": 0.5261274576187134, "step": 6537 }, { "epoch": 1.0449132901782145, "grad_norm": 1.2685806163266486, "learning_rate": 1.008720663146961e-05, "loss": 0.4947417080402374, "step": 6538 }, { "epoch": 1.0450731239510909, "grad_norm": 1.569138559118462, "learning_rate": 1.0084564070251777e-05, "loss": 0.565587043762207, "step": 6539 }, { "epoch": 1.0452329577239672, "grad_norm": 1.5965866898049919, "learning_rate": 1.0081921503128287e-05, "loss": 0.6310840845108032, "step": 6540 }, { "epoch": 1.0453927914968433, "grad_norm": 1.271720965650934, "learning_rate": 1.0079278930283689e-05, "loss": 0.45774343609809875, "step": 6541 }, { "epoch": 1.0455526252697196, "grad_norm": 1.330231431065916, "learning_rate": 1.0076636351902536e-05, "loss": 0.5459262132644653, "step": 6542 }, { "epoch": 1.0457124590425957, "grad_norm": 1.5699062686470253, "learning_rate": 1.0073993768169368e-05, "loss": 0.6697443127632141, "step": 6543 }, { "epoch": 1.045872292815472, "grad_norm": 1.4360692216067248, "learning_rate": 1.007135117926874e-05, "loss": 0.5307611227035522, "step": 6544 }, { "epoch": 1.046032126588348, "grad_norm": 1.4092714775977626, "learning_rate": 1.00687085853852e-05, "loss": 0.5004967451095581, "step": 6545 }, { "epoch": 1.0461919603612244, "grad_norm": 1.2851248281306977, "learning_rate": 1.0066065986703292e-05, "loss": 0.5759201049804688, "step": 6546 }, { "epoch": 1.0463517941341005, "grad_norm": 1.412416220413785, "learning_rate": 1.0063423383407575e-05, "loss": 0.4306069314479828, "step": 6547 }, { "epoch": 1.0465116279069768, "grad_norm": 1.6036733108287657, "learning_rate": 1.006078077568259e-05, "loss": 0.5197870135307312, "step": 6548 }, { "epoch": 1.046671461679853, "grad_norm": 1.5901452834504783, "learning_rate": 1.0058138163712897e-05, "loss": 0.6399592161178589, "step": 6549 }, { "epoch": 1.0468312954527292, "grad_norm": 1.269368409442484, "learning_rate": 1.0055495547683039e-05, "loss": 0.5963201522827148, "step": 6550 }, { "epoch": 1.0469911292256053, "grad_norm": 1.417066045757582, "learning_rate": 1.0052852927777566e-05, "loss": 0.5549772381782532, "step": 6551 }, { "epoch": 1.0471509629984816, "grad_norm": 1.4189649061971585, "learning_rate": 1.0050210304181034e-05, "loss": 0.4796800911426544, "step": 6552 }, { "epoch": 1.0473107967713577, "grad_norm": 1.521899147580592, "learning_rate": 1.0047567677077992e-05, "loss": 0.6916744709014893, "step": 6553 }, { "epoch": 1.047470630544234, "grad_norm": 1.3426168034903645, "learning_rate": 1.0044925046652994e-05, "loss": 0.47619354724884033, "step": 6554 }, { "epoch": 1.0476304643171102, "grad_norm": 1.5015922540104039, "learning_rate": 1.0042282413090587e-05, "loss": 0.44915705919265747, "step": 6555 }, { "epoch": 1.0477902980899865, "grad_norm": 1.4249939137744347, "learning_rate": 1.003963977657533e-05, "loss": 0.5507164001464844, "step": 6556 }, { "epoch": 1.0479501318628626, "grad_norm": 1.2912046571848868, "learning_rate": 1.0036997137291771e-05, "loss": 0.47640109062194824, "step": 6557 }, { "epoch": 1.0481099656357389, "grad_norm": 1.364163543137376, "learning_rate": 1.003435449542446e-05, "loss": 0.5933245420455933, "step": 6558 }, { "epoch": 1.048269799408615, "grad_norm": 1.6512508019836762, "learning_rate": 1.0031711851157956e-05, "loss": 0.6782962083816528, "step": 6559 }, { "epoch": 1.0484296331814913, "grad_norm": 1.3938099532296875, "learning_rate": 1.002906920467681e-05, "loss": 0.43960675597190857, "step": 6560 }, { "epoch": 1.0485894669543674, "grad_norm": 1.5440979348313504, "learning_rate": 1.002642655616557e-05, "loss": 0.6508559584617615, "step": 6561 }, { "epoch": 1.0487493007272437, "grad_norm": 1.516069321196862, "learning_rate": 1.00237839058088e-05, "loss": 0.5630464553833008, "step": 6562 }, { "epoch": 1.0489091345001198, "grad_norm": 1.4051980955061423, "learning_rate": 1.002114125379104e-05, "loss": 0.5495562553405762, "step": 6563 }, { "epoch": 1.0490689682729961, "grad_norm": 1.593703794032608, "learning_rate": 1.0018498600296853e-05, "loss": 0.576612114906311, "step": 6564 }, { "epoch": 1.0492288020458722, "grad_norm": 1.209618052434533, "learning_rate": 1.0015855945510789e-05, "loss": 0.6056021451950073, "step": 6565 }, { "epoch": 1.0493886358187485, "grad_norm": 1.1079478401477427, "learning_rate": 1.00132132896174e-05, "loss": 0.4628031849861145, "step": 6566 }, { "epoch": 1.0495484695916246, "grad_norm": 1.4483320484267284, "learning_rate": 1.0010570632801244e-05, "loss": 0.6597611308097839, "step": 6567 }, { "epoch": 1.049708303364501, "grad_norm": 1.4178509642894639, "learning_rate": 1.0007927975246871e-05, "loss": 0.5360419750213623, "step": 6568 }, { "epoch": 1.049868137137377, "grad_norm": 1.5341368880959187, "learning_rate": 1.0005285317138837e-05, "loss": 0.7144953608512878, "step": 6569 }, { "epoch": 1.0500279709102533, "grad_norm": 1.4197665422823968, "learning_rate": 1.0002642658661696e-05, "loss": 0.5230358839035034, "step": 6570 }, { "epoch": 1.0501878046831294, "grad_norm": 1.5284952814058481, "learning_rate": 1e-05, "loss": 0.5846437215805054, "step": 6571 }, { "epoch": 1.0503476384560058, "grad_norm": 1.5499654630399495, "learning_rate": 9.997357341338307e-06, "loss": 0.6656991243362427, "step": 6572 }, { "epoch": 1.0505074722288819, "grad_norm": 1.3235422465188347, "learning_rate": 9.994714682861165e-06, "loss": 0.5348371267318726, "step": 6573 }, { "epoch": 1.0506673060017582, "grad_norm": 1.3726190470031767, "learning_rate": 9.992072024753132e-06, "loss": 0.6217748522758484, "step": 6574 }, { "epoch": 1.0508271397746345, "grad_norm": 1.3596712208631587, "learning_rate": 9.989429367198758e-06, "loss": 0.44493377208709717, "step": 6575 }, { "epoch": 1.0509869735475106, "grad_norm": 1.4313152373125497, "learning_rate": 9.986786710382603e-06, "loss": 0.5484309196472168, "step": 6576 }, { "epoch": 1.051146807320387, "grad_norm": 1.279146846196857, "learning_rate": 9.984144054489215e-06, "loss": 0.5387803316116333, "step": 6577 }, { "epoch": 1.051306641093263, "grad_norm": 1.465053961882284, "learning_rate": 9.981501399703152e-06, "loss": 0.5107772350311279, "step": 6578 }, { "epoch": 1.0514664748661393, "grad_norm": 1.3981983037051615, "learning_rate": 9.978858746208964e-06, "loss": 0.478282630443573, "step": 6579 }, { "epoch": 1.0516263086390154, "grad_norm": 1.6597142761573305, "learning_rate": 9.976216094191205e-06, "loss": 0.635331392288208, "step": 6580 }, { "epoch": 1.0517861424118917, "grad_norm": 1.3091521470215843, "learning_rate": 9.973573443834432e-06, "loss": 0.6006420850753784, "step": 6581 }, { "epoch": 1.0519459761847678, "grad_norm": 1.49428562528619, "learning_rate": 9.970930795323192e-06, "loss": 0.5480517148971558, "step": 6582 }, { "epoch": 1.0521058099576441, "grad_norm": 1.4085915076720057, "learning_rate": 9.968288148842044e-06, "loss": 0.5854076743125916, "step": 6583 }, { "epoch": 1.0522656437305202, "grad_norm": 1.4945374491605334, "learning_rate": 9.96564550457554e-06, "loss": 0.5123482346534729, "step": 6584 }, { "epoch": 1.0524254775033965, "grad_norm": 1.4148216957292907, "learning_rate": 9.96300286270823e-06, "loss": 0.5511122941970825, "step": 6585 }, { "epoch": 1.0525853112762726, "grad_norm": 1.4692742450183982, "learning_rate": 9.960360223424672e-06, "loss": 0.5078294277191162, "step": 6586 }, { "epoch": 1.052745145049149, "grad_norm": 1.2276048910807353, "learning_rate": 9.957717586909415e-06, "loss": 0.5635130405426025, "step": 6587 }, { "epoch": 1.052904978822025, "grad_norm": 1.5243717618529375, "learning_rate": 9.95507495334701e-06, "loss": 0.5894197225570679, "step": 6588 }, { "epoch": 1.0530648125949014, "grad_norm": 1.4208110184927678, "learning_rate": 9.952432322922011e-06, "loss": 0.6134940385818481, "step": 6589 }, { "epoch": 1.0532246463677775, "grad_norm": 1.2837059276728051, "learning_rate": 9.94978969581897e-06, "loss": 0.48325347900390625, "step": 6590 }, { "epoch": 1.0533844801406538, "grad_norm": 1.4446556161613806, "learning_rate": 9.947147072222437e-06, "loss": 0.5490453243255615, "step": 6591 }, { "epoch": 1.0535443139135299, "grad_norm": 1.5669077952501227, "learning_rate": 9.944504452316965e-06, "loss": 0.524918794631958, "step": 6592 }, { "epoch": 1.0537041476864062, "grad_norm": 1.6654418457527274, "learning_rate": 9.941861836287107e-06, "loss": 0.5815738439559937, "step": 6593 }, { "epoch": 1.0538639814592823, "grad_norm": 1.2096362017170574, "learning_rate": 9.939219224317413e-06, "loss": 0.5217607021331787, "step": 6594 }, { "epoch": 1.0540238152321586, "grad_norm": 1.3551466425896366, "learning_rate": 9.936576616592428e-06, "loss": 0.5602644085884094, "step": 6595 }, { "epoch": 1.0541836490050347, "grad_norm": 1.505293184290515, "learning_rate": 9.93393401329671e-06, "loss": 0.5570661425590515, "step": 6596 }, { "epoch": 1.054343482777911, "grad_norm": 1.4335505032161244, "learning_rate": 9.931291414614804e-06, "loss": 0.5948861837387085, "step": 6597 }, { "epoch": 1.054503316550787, "grad_norm": 1.500800519079461, "learning_rate": 9.928648820731264e-06, "loss": 0.6169844269752502, "step": 6598 }, { "epoch": 1.0546631503236634, "grad_norm": 1.3616130843086975, "learning_rate": 9.926006231830635e-06, "loss": 0.5826058387756348, "step": 6599 }, { "epoch": 1.0548229840965395, "grad_norm": 1.4263518654703662, "learning_rate": 9.923363648097469e-06, "loss": 0.5936836004257202, "step": 6600 }, { "epoch": 1.0549828178694158, "grad_norm": 1.3740732225471504, "learning_rate": 9.920721069716314e-06, "loss": 0.51890629529953, "step": 6601 }, { "epoch": 1.055142651642292, "grad_norm": 1.5462415977707817, "learning_rate": 9.918078496871715e-06, "loss": 0.6215975284576416, "step": 6602 }, { "epoch": 1.0553024854151682, "grad_norm": 1.583879064861957, "learning_rate": 9.915435929748224e-06, "loss": 0.6465654373168945, "step": 6603 }, { "epoch": 1.0554623191880443, "grad_norm": 1.6656161816247808, "learning_rate": 9.91279336853039e-06, "loss": 0.6195710301399231, "step": 6604 }, { "epoch": 1.0556221529609207, "grad_norm": 1.4787175875656546, "learning_rate": 9.910150813402756e-06, "loss": 0.5981056094169617, "step": 6605 }, { "epoch": 1.0557819867337968, "grad_norm": 1.3583681658470035, "learning_rate": 9.907508264549872e-06, "loss": 0.6013818383216858, "step": 6606 }, { "epoch": 1.055941820506673, "grad_norm": 1.5414494006262658, "learning_rate": 9.90486572215628e-06, "loss": 0.5800068378448486, "step": 6607 }, { "epoch": 1.0561016542795492, "grad_norm": 1.3359507528046364, "learning_rate": 9.902223186406528e-06, "loss": 0.5749721527099609, "step": 6608 }, { "epoch": 1.0562614880524255, "grad_norm": 1.5018434475904896, "learning_rate": 9.899580657485164e-06, "loss": 0.6495611667633057, "step": 6609 }, { "epoch": 1.0564213218253018, "grad_norm": 1.2883767415410272, "learning_rate": 9.896938135576728e-06, "loss": 0.47000885009765625, "step": 6610 }, { "epoch": 1.056581155598178, "grad_norm": 1.2983516952189706, "learning_rate": 9.894295620865768e-06, "loss": 0.49709582328796387, "step": 6611 }, { "epoch": 1.0567409893710542, "grad_norm": 1.3343618279370772, "learning_rate": 9.891653113536825e-06, "loss": 0.6256960034370422, "step": 6612 }, { "epoch": 1.0569008231439303, "grad_norm": 1.2227764839084951, "learning_rate": 9.889010613774446e-06, "loss": 0.5406695008277893, "step": 6613 }, { "epoch": 1.0570606569168066, "grad_norm": 1.5820677898889925, "learning_rate": 9.886368121763171e-06, "loss": 0.5578418970108032, "step": 6614 }, { "epoch": 1.0572204906896827, "grad_norm": 1.7231740196432643, "learning_rate": 9.883725637687543e-06, "loss": 0.6486356258392334, "step": 6615 }, { "epoch": 1.057380324462559, "grad_norm": 1.255339791236429, "learning_rate": 9.881083161732103e-06, "loss": 0.4710511565208435, "step": 6616 }, { "epoch": 1.0575401582354351, "grad_norm": 1.9341656398801161, "learning_rate": 9.878440694081392e-06, "loss": 0.729265034198761, "step": 6617 }, { "epoch": 1.0576999920083114, "grad_norm": 1.4213032418641556, "learning_rate": 9.875798234919952e-06, "loss": 0.6186976432800293, "step": 6618 }, { "epoch": 1.0578598257811875, "grad_norm": 1.2621649105609127, "learning_rate": 9.87315578443232e-06, "loss": 0.5346857309341431, "step": 6619 }, { "epoch": 1.0580196595540639, "grad_norm": 1.3395530133030993, "learning_rate": 9.870513342803042e-06, "loss": 0.556438148021698, "step": 6620 }, { "epoch": 1.05817949332694, "grad_norm": 1.667099977996518, "learning_rate": 9.867870910216652e-06, "loss": 0.5430442094802856, "step": 6621 }, { "epoch": 1.0583393270998163, "grad_norm": 1.4260248220134821, "learning_rate": 9.865228486857684e-06, "loss": 0.6862497329711914, "step": 6622 }, { "epoch": 1.0584991608726924, "grad_norm": 2.371976210600911, "learning_rate": 9.86258607291068e-06, "loss": 0.5676170587539673, "step": 6623 }, { "epoch": 1.0586589946455687, "grad_norm": 1.3896863688959717, "learning_rate": 9.85994366856018e-06, "loss": 0.5668574571609497, "step": 6624 }, { "epoch": 1.0588188284184448, "grad_norm": 1.2966003515890385, "learning_rate": 9.857301273990715e-06, "loss": 0.4617302417755127, "step": 6625 }, { "epoch": 1.058978662191321, "grad_norm": 1.3345416401975532, "learning_rate": 9.854658889386825e-06, "loss": 0.583139181137085, "step": 6626 }, { "epoch": 1.0591384959641972, "grad_norm": 1.266582004074787, "learning_rate": 9.85201651493304e-06, "loss": 0.4543754756450653, "step": 6627 }, { "epoch": 1.0592983297370735, "grad_norm": 1.4434152870634467, "learning_rate": 9.8493741508139e-06, "loss": 0.5345305800437927, "step": 6628 }, { "epoch": 1.0594581635099496, "grad_norm": 1.4279808708886932, "learning_rate": 9.84673179721393e-06, "loss": 0.521716833114624, "step": 6629 }, { "epoch": 1.059617997282826, "grad_norm": 1.3678031315275203, "learning_rate": 9.844089454317667e-06, "loss": 0.5371131896972656, "step": 6630 }, { "epoch": 1.059777831055702, "grad_norm": 1.3479068364636142, "learning_rate": 9.841447122309646e-06, "loss": 0.63995361328125, "step": 6631 }, { "epoch": 1.0599376648285783, "grad_norm": 1.5384946302752593, "learning_rate": 9.838804801374392e-06, "loss": 0.47759711742401123, "step": 6632 }, { "epoch": 1.0600974986014544, "grad_norm": 1.6548281013037753, "learning_rate": 9.836162491696441e-06, "loss": 0.5739649534225464, "step": 6633 }, { "epoch": 1.0602573323743307, "grad_norm": 1.5116943692413811, "learning_rate": 9.833520193460317e-06, "loss": 0.5879784226417542, "step": 6634 }, { "epoch": 1.0604171661472068, "grad_norm": 1.3273037879048597, "learning_rate": 9.830877906850554e-06, "loss": 0.4854174852371216, "step": 6635 }, { "epoch": 1.0605769999200831, "grad_norm": 1.3885459798229212, "learning_rate": 9.828235632051678e-06, "loss": 0.6047710180282593, "step": 6636 }, { "epoch": 1.0607368336929592, "grad_norm": 1.4557148923621441, "learning_rate": 9.825593369248212e-06, "loss": 0.5667049288749695, "step": 6637 }, { "epoch": 1.0608966674658356, "grad_norm": 1.7397665851811093, "learning_rate": 9.822951118624689e-06, "loss": 0.6787606477737427, "step": 6638 }, { "epoch": 1.0610565012387116, "grad_norm": 1.4589463743559803, "learning_rate": 9.820308880365628e-06, "loss": 0.6611208319664001, "step": 6639 }, { "epoch": 1.061216335011588, "grad_norm": 1.5659025237500361, "learning_rate": 9.81766665465556e-06, "loss": 0.5543179512023926, "step": 6640 }, { "epoch": 1.061376168784464, "grad_norm": 1.3058726406389873, "learning_rate": 9.815024441679004e-06, "loss": 0.5952911972999573, "step": 6641 }, { "epoch": 1.0615360025573404, "grad_norm": 1.5629400237733575, "learning_rate": 9.812382241620482e-06, "loss": 0.7866867780685425, "step": 6642 }, { "epoch": 1.0616958363302165, "grad_norm": 1.3879844355785005, "learning_rate": 9.809740054664521e-06, "loss": 0.6675695180892944, "step": 6643 }, { "epoch": 1.0618556701030928, "grad_norm": 1.331556278782578, "learning_rate": 9.80709788099564e-06, "loss": 0.46663573384284973, "step": 6644 }, { "epoch": 1.062015503875969, "grad_norm": 1.4830503051432806, "learning_rate": 9.804455720798354e-06, "loss": 0.39842459559440613, "step": 6645 }, { "epoch": 1.0621753376488452, "grad_norm": 1.365639318013061, "learning_rate": 9.80181357425719e-06, "loss": 0.4784027934074402, "step": 6646 }, { "epoch": 1.0623351714217215, "grad_norm": 1.5221432013040816, "learning_rate": 9.79917144155666e-06, "loss": 0.4473561942577362, "step": 6647 }, { "epoch": 1.0624950051945976, "grad_norm": 1.4385062106166338, "learning_rate": 9.796529322881285e-06, "loss": 0.5028929710388184, "step": 6648 }, { "epoch": 1.062654838967474, "grad_norm": 1.2517037773116853, "learning_rate": 9.79388721841558e-06, "loss": 0.5176906585693359, "step": 6649 }, { "epoch": 1.06281467274035, "grad_norm": 1.82680626799319, "learning_rate": 9.791245128344059e-06, "loss": 0.7056145668029785, "step": 6650 }, { "epoch": 1.0629745065132263, "grad_norm": 1.3205837575153023, "learning_rate": 9.788603052851237e-06, "loss": 0.5062751173973083, "step": 6651 }, { "epoch": 1.0631343402861024, "grad_norm": 1.4541139539708399, "learning_rate": 9.785960992121625e-06, "loss": 0.5692266225814819, "step": 6652 }, { "epoch": 1.0632941740589787, "grad_norm": 1.70730633472303, "learning_rate": 9.78331894633974e-06, "loss": 0.5997258424758911, "step": 6653 }, { "epoch": 1.0634540078318548, "grad_norm": 1.5269359935129723, "learning_rate": 9.78067691569009e-06, "loss": 0.5157448649406433, "step": 6654 }, { "epoch": 1.0636138416047312, "grad_norm": 1.7051318333420071, "learning_rate": 9.778034900357185e-06, "loss": 0.6594017744064331, "step": 6655 }, { "epoch": 1.0637736753776073, "grad_norm": 1.3562817149527298, "learning_rate": 9.775392900525534e-06, "loss": 0.45147204399108887, "step": 6656 }, { "epoch": 1.0639335091504836, "grad_norm": 1.5432856865753537, "learning_rate": 9.772750916379647e-06, "loss": 0.7093039751052856, "step": 6657 }, { "epoch": 1.0640933429233597, "grad_norm": 1.3832699516523175, "learning_rate": 9.770108948104029e-06, "loss": 0.4743332862854004, "step": 6658 }, { "epoch": 1.064253176696236, "grad_norm": 1.5742915155127053, "learning_rate": 9.767466995883183e-06, "loss": 0.5844760537147522, "step": 6659 }, { "epoch": 1.064413010469112, "grad_norm": 1.3977465032360297, "learning_rate": 9.76482505990162e-06, "loss": 0.7005687952041626, "step": 6660 }, { "epoch": 1.0645728442419884, "grad_norm": 1.6185154148866665, "learning_rate": 9.762183140343837e-06, "loss": 0.5817183256149292, "step": 6661 }, { "epoch": 1.0647326780148645, "grad_norm": 2.0397782575377996, "learning_rate": 9.759541237394335e-06, "loss": 0.631308913230896, "step": 6662 }, { "epoch": 1.0648925117877408, "grad_norm": 1.5773057696758883, "learning_rate": 9.756899351237625e-06, "loss": 0.5951026678085327, "step": 6663 }, { "epoch": 1.065052345560617, "grad_norm": 1.4611118974260804, "learning_rate": 9.754257482058198e-06, "loss": 0.5559049248695374, "step": 6664 }, { "epoch": 1.0652121793334932, "grad_norm": 1.6463885702978935, "learning_rate": 9.751615630040561e-06, "loss": 0.5100135207176208, "step": 6665 }, { "epoch": 1.0653720131063693, "grad_norm": 1.5563753835045622, "learning_rate": 9.748973795369205e-06, "loss": 0.49460387229919434, "step": 6666 }, { "epoch": 1.0655318468792456, "grad_norm": 1.293796815660374, "learning_rate": 9.746331978228623e-06, "loss": 0.4991031289100647, "step": 6667 }, { "epoch": 1.0656916806521217, "grad_norm": 1.4170192265527914, "learning_rate": 9.74369017880332e-06, "loss": 0.5123440027236938, "step": 6668 }, { "epoch": 1.065851514424998, "grad_norm": 1.6670641503955212, "learning_rate": 9.741048397277783e-06, "loss": 0.7519813776016235, "step": 6669 }, { "epoch": 1.0660113481978741, "grad_norm": 1.6654268370440726, "learning_rate": 9.738406633836507e-06, "loss": 0.6688218116760254, "step": 6670 }, { "epoch": 1.0661711819707504, "grad_norm": 1.5695116616550504, "learning_rate": 9.73576488866398e-06, "loss": 0.5800172686576843, "step": 6671 }, { "epoch": 1.0663310157436265, "grad_norm": 1.3130115482428202, "learning_rate": 9.733123161944698e-06, "loss": 0.5464235544204712, "step": 6672 }, { "epoch": 1.0664908495165029, "grad_norm": 1.540319945786908, "learning_rate": 9.73048145386315e-06, "loss": 0.5719445943832397, "step": 6673 }, { "epoch": 1.066650683289379, "grad_norm": 1.2578968029632451, "learning_rate": 9.727839764603814e-06, "loss": 0.4387248158454895, "step": 6674 }, { "epoch": 1.0668105170622553, "grad_norm": 1.5342389225474362, "learning_rate": 9.725198094351186e-06, "loss": 0.547410249710083, "step": 6675 }, { "epoch": 1.0669703508351314, "grad_norm": 1.473824041346862, "learning_rate": 9.722556443289743e-06, "loss": 0.5832228660583496, "step": 6676 }, { "epoch": 1.0671301846080077, "grad_norm": 1.5834545311771564, "learning_rate": 9.719914811603978e-06, "loss": 0.6149832606315613, "step": 6677 }, { "epoch": 1.0672900183808838, "grad_norm": 1.4387317539807456, "learning_rate": 9.717273199478367e-06, "loss": 0.5782458186149597, "step": 6678 }, { "epoch": 1.06744985215376, "grad_norm": 1.6226809532674198, "learning_rate": 9.71463160709739e-06, "loss": 0.46297693252563477, "step": 6679 }, { "epoch": 1.0676096859266364, "grad_norm": 1.5294822775597499, "learning_rate": 9.71199003464553e-06, "loss": 0.5156428813934326, "step": 6680 }, { "epoch": 1.0677695196995125, "grad_norm": 1.3986339512844361, "learning_rate": 9.709348482307262e-06, "loss": 0.49242448806762695, "step": 6681 }, { "epoch": 1.0679293534723886, "grad_norm": 1.3956357386492324, "learning_rate": 9.706706950267059e-06, "loss": 0.4932924211025238, "step": 6682 }, { "epoch": 1.068089187245265, "grad_norm": 1.5347551275983033, "learning_rate": 9.704065438709409e-06, "loss": 0.6351436376571655, "step": 6683 }, { "epoch": 1.0682490210181412, "grad_norm": 1.4789595424794404, "learning_rate": 9.701423947818775e-06, "loss": 0.5968376994132996, "step": 6684 }, { "epoch": 1.0684088547910173, "grad_norm": 1.427997836499069, "learning_rate": 9.698782477779634e-06, "loss": 0.6452726125717163, "step": 6685 }, { "epoch": 1.0685686885638936, "grad_norm": 1.4341457592080529, "learning_rate": 9.696141028776454e-06, "loss": 0.49140095710754395, "step": 6686 }, { "epoch": 1.0687285223367697, "grad_norm": 1.2144344279071235, "learning_rate": 9.693499600993705e-06, "loss": 0.5065633058547974, "step": 6687 }, { "epoch": 1.068888356109646, "grad_norm": 1.4972862439996253, "learning_rate": 9.690858194615856e-06, "loss": 0.5576115846633911, "step": 6688 }, { "epoch": 1.0690481898825221, "grad_norm": 1.4946922443430415, "learning_rate": 9.688216809827373e-06, "loss": 0.5418235063552856, "step": 6689 }, { "epoch": 1.0692080236553985, "grad_norm": 1.2120873993520476, "learning_rate": 9.685575446812723e-06, "loss": 0.5823848247528076, "step": 6690 }, { "epoch": 1.0693678574282746, "grad_norm": 1.7363592855561094, "learning_rate": 9.682934105756363e-06, "loss": 0.5630930662155151, "step": 6691 }, { "epoch": 1.0695276912011509, "grad_norm": 1.6659637688180535, "learning_rate": 9.680292786842763e-06, "loss": 0.5502485632896423, "step": 6692 }, { "epoch": 1.069687524974027, "grad_norm": 1.571464225369376, "learning_rate": 9.67765149025638e-06, "loss": 0.5302930474281311, "step": 6693 }, { "epoch": 1.0698473587469033, "grad_norm": 1.3303815407350286, "learning_rate": 9.67501021618167e-06, "loss": 0.6816383004188538, "step": 6694 }, { "epoch": 1.0700071925197794, "grad_norm": 1.4322060067113325, "learning_rate": 9.672368964803094e-06, "loss": 0.5453072786331177, "step": 6695 }, { "epoch": 1.0701670262926557, "grad_norm": 1.7301864490949739, "learning_rate": 9.669727736305103e-06, "loss": 0.6236070394515991, "step": 6696 }, { "epoch": 1.0703268600655318, "grad_norm": 1.5246584065343398, "learning_rate": 9.667086530872158e-06, "loss": 0.5402451753616333, "step": 6697 }, { "epoch": 1.070486693838408, "grad_norm": 1.5126629777484952, "learning_rate": 9.664445348688705e-06, "loss": 0.5557718276977539, "step": 6698 }, { "epoch": 1.0706465276112842, "grad_norm": 1.6862456806940191, "learning_rate": 9.6618041899392e-06, "loss": 0.547545313835144, "step": 6699 }, { "epoch": 1.0708063613841605, "grad_norm": 1.799998247094947, "learning_rate": 9.659163054808088e-06, "loss": 0.6090624332427979, "step": 6700 }, { "epoch": 1.0709661951570366, "grad_norm": 1.3444535755198264, "learning_rate": 9.656521943479817e-06, "loss": 0.607769250869751, "step": 6701 }, { "epoch": 1.071126028929913, "grad_norm": 1.6395241259570978, "learning_rate": 9.653880856138832e-06, "loss": 0.6442394256591797, "step": 6702 }, { "epoch": 1.071285862702789, "grad_norm": 1.273048460203421, "learning_rate": 9.651239792969584e-06, "loss": 0.5246762037277222, "step": 6703 }, { "epoch": 1.0714456964756653, "grad_norm": 1.3036390281520558, "learning_rate": 9.648598754156507e-06, "loss": 0.5816433429718018, "step": 6704 }, { "epoch": 1.0716055302485414, "grad_norm": 1.5606724341056917, "learning_rate": 9.645957739884048e-06, "loss": 0.5132675170898438, "step": 6705 }, { "epoch": 1.0717653640214178, "grad_norm": 1.5635248867703944, "learning_rate": 9.64331675033664e-06, "loss": 0.5067886114120483, "step": 6706 }, { "epoch": 1.0719251977942938, "grad_norm": 1.259583903180412, "learning_rate": 9.640675785698726e-06, "loss": 0.6316165924072266, "step": 6707 }, { "epoch": 1.0720850315671702, "grad_norm": 1.4765283696207694, "learning_rate": 9.63803484615474e-06, "loss": 0.5844793319702148, "step": 6708 }, { "epoch": 1.0722448653400463, "grad_norm": 1.4525992899579419, "learning_rate": 9.635393931889113e-06, "loss": 0.5526914596557617, "step": 6709 }, { "epoch": 1.0724046991129226, "grad_norm": 1.4331445482876763, "learning_rate": 9.63275304308628e-06, "loss": 0.5949366092681885, "step": 6710 }, { "epoch": 1.0725645328857987, "grad_norm": 1.3361055187794544, "learning_rate": 9.630112179930671e-06, "loss": 0.4963393211364746, "step": 6711 }, { "epoch": 1.072724366658675, "grad_norm": 1.568280900241038, "learning_rate": 9.627471342606714e-06, "loss": 0.6493759155273438, "step": 6712 }, { "epoch": 1.072884200431551, "grad_norm": 1.4159057649381481, "learning_rate": 9.624830531298836e-06, "loss": 0.6105741262435913, "step": 6713 }, { "epoch": 1.0730440342044274, "grad_norm": 1.3150197978455718, "learning_rate": 9.622189746191461e-06, "loss": 0.4945265054702759, "step": 6714 }, { "epoch": 1.0732038679773037, "grad_norm": 1.8158878162467078, "learning_rate": 9.619548987469014e-06, "loss": 0.7196142673492432, "step": 6715 }, { "epoch": 1.0733637017501798, "grad_norm": 1.5461308002573655, "learning_rate": 9.616908255315911e-06, "loss": 0.6645936965942383, "step": 6716 }, { "epoch": 1.073523535523056, "grad_norm": 1.3762946647320662, "learning_rate": 9.61426754991658e-06, "loss": 0.6484030485153198, "step": 6717 }, { "epoch": 1.0736833692959322, "grad_norm": 1.3426765418658528, "learning_rate": 9.61162687145543e-06, "loss": 0.6087830066680908, "step": 6718 }, { "epoch": 1.0738432030688085, "grad_norm": 1.4291847086980982, "learning_rate": 9.608986220116882e-06, "loss": 0.6307642459869385, "step": 6719 }, { "epoch": 1.0740030368416846, "grad_norm": 1.1697809203821283, "learning_rate": 9.606345596085345e-06, "loss": 0.6213811039924622, "step": 6720 }, { "epoch": 1.074162870614561, "grad_norm": 1.3182004676935568, "learning_rate": 9.603704999545238e-06, "loss": 0.6425110101699829, "step": 6721 }, { "epoch": 1.074322704387437, "grad_norm": 1.5125829002711766, "learning_rate": 9.601064430680961e-06, "loss": 0.6318727731704712, "step": 6722 }, { "epoch": 1.0744825381603134, "grad_norm": 1.8302010638835313, "learning_rate": 9.598423889676934e-06, "loss": 0.5457540154457092, "step": 6723 }, { "epoch": 1.0746423719331895, "grad_norm": 1.5170721270757168, "learning_rate": 9.595783376717552e-06, "loss": 0.6367179155349731, "step": 6724 }, { "epoch": 1.0748022057060658, "grad_norm": 1.4116021885424694, "learning_rate": 9.593142891987227e-06, "loss": 0.544453501701355, "step": 6725 }, { "epoch": 1.0749620394789419, "grad_norm": 1.437453891471697, "learning_rate": 9.590502435670356e-06, "loss": 0.5838703513145447, "step": 6726 }, { "epoch": 1.0751218732518182, "grad_norm": 1.2937137455979124, "learning_rate": 9.587862007951343e-06, "loss": 0.4642685651779175, "step": 6727 }, { "epoch": 1.0752817070246943, "grad_norm": 1.3902983782223544, "learning_rate": 9.585221609014582e-06, "loss": 0.6270649433135986, "step": 6728 }, { "epoch": 1.0754415407975706, "grad_norm": 1.2840138215428685, "learning_rate": 9.582581239044472e-06, "loss": 0.5222623348236084, "step": 6729 }, { "epoch": 1.0756013745704467, "grad_norm": 1.4980108737477507, "learning_rate": 9.579940898225407e-06, "loss": 0.4993949234485626, "step": 6730 }, { "epoch": 1.075761208343323, "grad_norm": 1.5556085036180551, "learning_rate": 9.577300586741775e-06, "loss": 0.5813767313957214, "step": 6731 }, { "epoch": 1.075921042116199, "grad_norm": 1.5488636076495041, "learning_rate": 9.574660304777973e-06, "loss": 0.5753439664840698, "step": 6732 }, { "epoch": 1.0760808758890754, "grad_norm": 1.7164400829215907, "learning_rate": 9.572020052518381e-06, "loss": 0.6723475456237793, "step": 6733 }, { "epoch": 1.0762407096619515, "grad_norm": 1.5785115572599453, "learning_rate": 9.569379830147392e-06, "loss": 0.6491718292236328, "step": 6734 }, { "epoch": 1.0764005434348278, "grad_norm": 1.5319829444385518, "learning_rate": 9.566739637849383e-06, "loss": 0.5818576812744141, "step": 6735 }, { "epoch": 1.076560377207704, "grad_norm": 1.4543710921668491, "learning_rate": 9.564099475808743e-06, "loss": 0.5460178256034851, "step": 6736 }, { "epoch": 1.0767202109805802, "grad_norm": 1.3812183580398443, "learning_rate": 9.561459344209849e-06, "loss": 0.5222189426422119, "step": 6737 }, { "epoch": 1.0768800447534563, "grad_norm": 1.4446645193829244, "learning_rate": 9.558819243237074e-06, "loss": 0.5927121639251709, "step": 6738 }, { "epoch": 1.0770398785263327, "grad_norm": 1.4953778099442372, "learning_rate": 9.556179173074797e-06, "loss": 0.5471785068511963, "step": 6739 }, { "epoch": 1.0771997122992087, "grad_norm": 1.8264212594239153, "learning_rate": 9.55353913390739e-06, "loss": 0.5205825567245483, "step": 6740 }, { "epoch": 1.077359546072085, "grad_norm": 1.4248147067108987, "learning_rate": 9.550899125919226e-06, "loss": 0.4676799774169922, "step": 6741 }, { "epoch": 1.0775193798449612, "grad_norm": 1.3440751194917164, "learning_rate": 9.548259149294669e-06, "loss": 0.581186830997467, "step": 6742 }, { "epoch": 1.0776792136178375, "grad_norm": 1.424677500446777, "learning_rate": 9.545619204218093e-06, "loss": 0.6089063286781311, "step": 6743 }, { "epoch": 1.0778390473907136, "grad_norm": 1.455870095822494, "learning_rate": 9.542979290873856e-06, "loss": 0.5923644304275513, "step": 6744 }, { "epoch": 1.0779988811635899, "grad_norm": 1.463712507610216, "learning_rate": 9.540339409446325e-06, "loss": 0.5178384780883789, "step": 6745 }, { "epoch": 1.078158714936466, "grad_norm": 1.4577683171932467, "learning_rate": 9.537699560119855e-06, "loss": 0.5661959648132324, "step": 6746 }, { "epoch": 1.0783185487093423, "grad_norm": 1.3513792159981584, "learning_rate": 9.53505974307881e-06, "loss": 0.5408230423927307, "step": 6747 }, { "epoch": 1.0784783824822184, "grad_norm": 1.4096253759390445, "learning_rate": 9.532419958507537e-06, "loss": 0.5512698888778687, "step": 6748 }, { "epoch": 1.0786382162550947, "grad_norm": 1.2999858312732693, "learning_rate": 9.529780206590397e-06, "loss": 0.47214120626449585, "step": 6749 }, { "epoch": 1.078798050027971, "grad_norm": 1.3415588809207573, "learning_rate": 9.527140487511739e-06, "loss": 0.567977786064148, "step": 6750 }, { "epoch": 1.0789578838008471, "grad_norm": 1.4822180810956112, "learning_rate": 9.524500801455907e-06, "loss": 0.5485587120056152, "step": 6751 }, { "epoch": 1.0791177175737232, "grad_norm": 1.5723742827711282, "learning_rate": 9.521861148607253e-06, "loss": 0.5186686515808105, "step": 6752 }, { "epoch": 1.0792775513465995, "grad_norm": 1.4444516819915847, "learning_rate": 9.519221529150116e-06, "loss": 0.5344697833061218, "step": 6753 }, { "epoch": 1.0794373851194758, "grad_norm": 1.8448593760063323, "learning_rate": 9.516581943268842e-06, "loss": 0.5953794121742249, "step": 6754 }, { "epoch": 1.079597218892352, "grad_norm": 1.3702312569387565, "learning_rate": 9.513942391147767e-06, "loss": 0.5132841467857361, "step": 6755 }, { "epoch": 1.0797570526652283, "grad_norm": 1.6936946030042266, "learning_rate": 9.511302872971233e-06, "loss": 0.5063244700431824, "step": 6756 }, { "epoch": 1.0799168864381044, "grad_norm": 1.6096003849827285, "learning_rate": 9.50866338892357e-06, "loss": 0.6586185693740845, "step": 6757 }, { "epoch": 1.0800767202109807, "grad_norm": 1.2651552738656289, "learning_rate": 9.506023939189108e-06, "loss": 0.512138307094574, "step": 6758 }, { "epoch": 1.0802365539838568, "grad_norm": 1.5089903512386031, "learning_rate": 9.503384523952183e-06, "loss": 0.4875413775444031, "step": 6759 }, { "epoch": 1.080396387756733, "grad_norm": 1.4326074268298534, "learning_rate": 9.500745143397118e-06, "loss": 0.5397362112998962, "step": 6760 }, { "epoch": 1.0805562215296092, "grad_norm": 1.5702974037640545, "learning_rate": 9.498105797708242e-06, "loss": 0.531434178352356, "step": 6761 }, { "epoch": 1.0807160553024855, "grad_norm": 1.6103385191200401, "learning_rate": 9.49546648706987e-06, "loss": 0.5059848427772522, "step": 6762 }, { "epoch": 1.0808758890753616, "grad_norm": 1.4895890859801069, "learning_rate": 9.49282721166633e-06, "loss": 0.60770183801651, "step": 6763 }, { "epoch": 1.081035722848238, "grad_norm": 1.6117081390038956, "learning_rate": 9.490187971681938e-06, "loss": 0.5547304153442383, "step": 6764 }, { "epoch": 1.081195556621114, "grad_norm": 1.4533010014057617, "learning_rate": 9.487548767301007e-06, "loss": 0.4928451478481293, "step": 6765 }, { "epoch": 1.0813553903939903, "grad_norm": 1.2823803713156883, "learning_rate": 9.48490959870785e-06, "loss": 0.4911082983016968, "step": 6766 }, { "epoch": 1.0815152241668664, "grad_norm": 1.533195387165885, "learning_rate": 9.482270466086778e-06, "loss": 0.5153052806854248, "step": 6767 }, { "epoch": 1.0816750579397427, "grad_norm": 1.6921972783443753, "learning_rate": 9.479631369622098e-06, "loss": 0.5668256878852844, "step": 6768 }, { "epoch": 1.0818348917126188, "grad_norm": 1.5256019307593287, "learning_rate": 9.476992309498116e-06, "loss": 0.5499261617660522, "step": 6769 }, { "epoch": 1.0819947254854951, "grad_norm": 1.4376670808351315, "learning_rate": 9.474353285899134e-06, "loss": 0.6084034442901611, "step": 6770 }, { "epoch": 1.0821545592583712, "grad_norm": 1.6165579959069978, "learning_rate": 9.471714299009452e-06, "loss": 0.604066789150238, "step": 6771 }, { "epoch": 1.0823143930312475, "grad_norm": 1.3812364979394438, "learning_rate": 9.469075349013369e-06, "loss": 0.5450782775878906, "step": 6772 }, { "epoch": 1.0824742268041236, "grad_norm": 1.577344003139259, "learning_rate": 9.466436436095175e-06, "loss": 0.48142194747924805, "step": 6773 }, { "epoch": 1.082634060577, "grad_norm": 1.2625316877301023, "learning_rate": 9.46379756043917e-06, "loss": 0.4454079568386078, "step": 6774 }, { "epoch": 1.082793894349876, "grad_norm": 1.4075287252175643, "learning_rate": 9.461158722229636e-06, "loss": 0.6429606676101685, "step": 6775 }, { "epoch": 1.0829537281227524, "grad_norm": 1.4097307031738961, "learning_rate": 9.458519921650864e-06, "loss": 0.5050264596939087, "step": 6776 }, { "epoch": 1.0831135618956285, "grad_norm": 1.4948682368078923, "learning_rate": 9.455881158887138e-06, "loss": 0.4362289309501648, "step": 6777 }, { "epoch": 1.0832733956685048, "grad_norm": 1.5517777673584718, "learning_rate": 9.453242434122742e-06, "loss": 0.4131558835506439, "step": 6778 }, { "epoch": 1.0834332294413809, "grad_norm": 1.3638358921827716, "learning_rate": 9.450603747541952e-06, "loss": 0.5344109535217285, "step": 6779 }, { "epoch": 1.0835930632142572, "grad_norm": 1.7304097973365673, "learning_rate": 9.447965099329044e-06, "loss": 0.6873455047607422, "step": 6780 }, { "epoch": 1.0837528969871333, "grad_norm": 1.668677929037952, "learning_rate": 9.445326489668295e-06, "loss": 0.5493911504745483, "step": 6781 }, { "epoch": 1.0839127307600096, "grad_norm": 1.481698688382538, "learning_rate": 9.442687918743971e-06, "loss": 0.5557693243026733, "step": 6782 }, { "epoch": 1.0840725645328857, "grad_norm": 2.041279381054377, "learning_rate": 9.440049386740345e-06, "loss": 0.4948604702949524, "step": 6783 }, { "epoch": 1.084232398305762, "grad_norm": 1.73502292965956, "learning_rate": 9.437410893841685e-06, "loss": 0.573172926902771, "step": 6784 }, { "epoch": 1.0843922320786383, "grad_norm": 1.2489677113214988, "learning_rate": 9.434772440232247e-06, "loss": 0.5676592588424683, "step": 6785 }, { "epoch": 1.0845520658515144, "grad_norm": 1.2762447650950477, "learning_rate": 9.432134026096298e-06, "loss": 0.49028605222702026, "step": 6786 }, { "epoch": 1.0847118996243905, "grad_norm": 1.3811315295115962, "learning_rate": 9.42949565161809e-06, "loss": 0.531734049320221, "step": 6787 }, { "epoch": 1.0848717333972668, "grad_norm": 1.5220815825569753, "learning_rate": 9.42685731698188e-06, "loss": 0.5554561614990234, "step": 6788 }, { "epoch": 1.0850315671701432, "grad_norm": 1.5858776207353678, "learning_rate": 9.424219022371923e-06, "loss": 0.6023854613304138, "step": 6789 }, { "epoch": 1.0851914009430192, "grad_norm": 1.439395286918438, "learning_rate": 9.42158076797246e-06, "loss": 0.7516758441925049, "step": 6790 }, { "epoch": 1.0853512347158956, "grad_norm": 1.682453550359615, "learning_rate": 9.418942553967747e-06, "loss": 0.5409791469573975, "step": 6791 }, { "epoch": 1.0855110684887717, "grad_norm": 1.4070469667175591, "learning_rate": 9.416304380542019e-06, "loss": 0.618402361869812, "step": 6792 }, { "epoch": 1.085670902261648, "grad_norm": 1.5111924793742029, "learning_rate": 9.413666247879525e-06, "loss": 0.60523921251297, "step": 6793 }, { "epoch": 1.085830736034524, "grad_norm": 1.5764193112613596, "learning_rate": 9.411028156164495e-06, "loss": 0.5121446251869202, "step": 6794 }, { "epoch": 1.0859905698074004, "grad_norm": 1.1878291999057191, "learning_rate": 9.408390105581168e-06, "loss": 0.5063347816467285, "step": 6795 }, { "epoch": 1.0861504035802765, "grad_norm": 1.547203732322117, "learning_rate": 9.405752096313776e-06, "loss": 0.5989782810211182, "step": 6796 }, { "epoch": 1.0863102373531528, "grad_norm": 1.4960277445096422, "learning_rate": 9.403114128546545e-06, "loss": 0.5971978306770325, "step": 6797 }, { "epoch": 1.086470071126029, "grad_norm": 1.8513627662058463, "learning_rate": 9.400476202463707e-06, "loss": 0.5548718571662903, "step": 6798 }, { "epoch": 1.0866299048989052, "grad_norm": 1.5703412353542108, "learning_rate": 9.39783831824948e-06, "loss": 0.6471606492996216, "step": 6799 }, { "epoch": 1.0867897386717813, "grad_norm": 1.6251145750289329, "learning_rate": 9.39520047608809e-06, "loss": 0.6151238679885864, "step": 6800 }, { "epoch": 1.0869495724446576, "grad_norm": 1.3554383036333684, "learning_rate": 9.392562676163749e-06, "loss": 0.6023054122924805, "step": 6801 }, { "epoch": 1.0871094062175337, "grad_norm": 1.4671358470267166, "learning_rate": 9.389924918660671e-06, "loss": 0.571556568145752, "step": 6802 }, { "epoch": 1.08726923999041, "grad_norm": 1.3312230221830734, "learning_rate": 9.387287203763071e-06, "loss": 0.5313056707382202, "step": 6803 }, { "epoch": 1.0874290737632861, "grad_norm": 1.3166124729059632, "learning_rate": 9.384649531655161e-06, "loss": 0.4301716685295105, "step": 6804 }, { "epoch": 1.0875889075361624, "grad_norm": 1.6590465014972904, "learning_rate": 9.38201190252114e-06, "loss": 0.5036095380783081, "step": 6805 }, { "epoch": 1.0877487413090385, "grad_norm": 1.5827488295706948, "learning_rate": 9.379374316545216e-06, "loss": 0.5523896813392639, "step": 6806 }, { "epoch": 1.0879085750819149, "grad_norm": 1.4867193600718052, "learning_rate": 9.376736773911583e-06, "loss": 0.49121540784835815, "step": 6807 }, { "epoch": 1.088068408854791, "grad_norm": 1.679771771801812, "learning_rate": 9.374099274804444e-06, "loss": 0.4213669002056122, "step": 6808 }, { "epoch": 1.0882282426276673, "grad_norm": 1.6048205503856696, "learning_rate": 9.371461819407987e-06, "loss": 0.49105212092399597, "step": 6809 }, { "epoch": 1.0883880764005434, "grad_norm": 1.3892670207826987, "learning_rate": 9.368824407906405e-06, "loss": 0.5568563938140869, "step": 6810 }, { "epoch": 1.0885479101734197, "grad_norm": 1.1760862224544455, "learning_rate": 9.366187040483887e-06, "loss": 0.5659752488136292, "step": 6811 }, { "epoch": 1.0887077439462958, "grad_norm": 1.5235250278995105, "learning_rate": 9.363549717324612e-06, "loss": 0.737686276435852, "step": 6812 }, { "epoch": 1.088867577719172, "grad_norm": 1.3623237282971734, "learning_rate": 9.360912438612767e-06, "loss": 0.48579955101013184, "step": 6813 }, { "epoch": 1.0890274114920482, "grad_norm": 1.88099145839248, "learning_rate": 9.35827520453253e-06, "loss": 0.6586970090866089, "step": 6814 }, { "epoch": 1.0891872452649245, "grad_norm": 1.484560885882492, "learning_rate": 9.35563801526807e-06, "loss": 0.6967804431915283, "step": 6815 }, { "epoch": 1.0893470790378006, "grad_norm": 1.6198973737719589, "learning_rate": 9.353000871003566e-06, "loss": 0.6636141538619995, "step": 6816 }, { "epoch": 1.089506912810677, "grad_norm": 1.4649803260014436, "learning_rate": 9.350363771923181e-06, "loss": 0.5567563772201538, "step": 6817 }, { "epoch": 1.089666746583553, "grad_norm": 1.4940185077858665, "learning_rate": 9.347726718211087e-06, "loss": 0.6217758655548096, "step": 6818 }, { "epoch": 1.0898265803564293, "grad_norm": 1.4318538127770766, "learning_rate": 9.345089710051441e-06, "loss": 0.6245477199554443, "step": 6819 }, { "epoch": 1.0899864141293056, "grad_norm": 1.618314123980978, "learning_rate": 9.342452747628406e-06, "loss": 0.6618965268135071, "step": 6820 }, { "epoch": 1.0901462479021817, "grad_norm": 1.3914380592341329, "learning_rate": 9.339815831126134e-06, "loss": 0.5669334530830383, "step": 6821 }, { "epoch": 1.0903060816750578, "grad_norm": 1.6333308867129224, "learning_rate": 9.337178960728781e-06, "loss": 0.5879642963409424, "step": 6822 }, { "epoch": 1.0904659154479341, "grad_norm": 1.464339611303524, "learning_rate": 9.334542136620492e-06, "loss": 0.5171379446983337, "step": 6823 }, { "epoch": 1.0906257492208105, "grad_norm": 1.3284741575630743, "learning_rate": 9.331905358985423e-06, "loss": 0.5236141085624695, "step": 6824 }, { "epoch": 1.0907855829936866, "grad_norm": 1.5596550546519634, "learning_rate": 9.329268628007708e-06, "loss": 0.7096089124679565, "step": 6825 }, { "epoch": 1.0909454167665629, "grad_norm": 1.6936031327336563, "learning_rate": 9.326631943871493e-06, "loss": 0.6962524652481079, "step": 6826 }, { "epoch": 1.091105250539439, "grad_norm": 1.5566621827561462, "learning_rate": 9.323995306760909e-06, "loss": 0.607443630695343, "step": 6827 }, { "epoch": 1.0912650843123153, "grad_norm": 1.4802564352712722, "learning_rate": 9.321358716860096e-06, "loss": 0.5429033637046814, "step": 6828 }, { "epoch": 1.0914249180851914, "grad_norm": 1.3185177777925294, "learning_rate": 9.31872217435318e-06, "loss": 0.6181161403656006, "step": 6829 }, { "epoch": 1.0915847518580677, "grad_norm": 1.3447935518068286, "learning_rate": 9.316085679424286e-06, "loss": 0.45950400829315186, "step": 6830 }, { "epoch": 1.0917445856309438, "grad_norm": 1.263730209319787, "learning_rate": 9.313449232257542e-06, "loss": 0.5390876531600952, "step": 6831 }, { "epoch": 1.09190441940382, "grad_norm": 1.2317142709394426, "learning_rate": 9.310812833037065e-06, "loss": 0.4408085346221924, "step": 6832 }, { "epoch": 1.0920642531766962, "grad_norm": 1.1974715934192037, "learning_rate": 9.308176481946974e-06, "loss": 0.47606295347213745, "step": 6833 }, { "epoch": 1.0922240869495725, "grad_norm": 1.4374375239139197, "learning_rate": 9.305540179171377e-06, "loss": 0.526387095451355, "step": 6834 }, { "epoch": 1.0923839207224486, "grad_norm": 1.2818958578695878, "learning_rate": 9.302903924894391e-06, "loss": 0.4240305423736572, "step": 6835 }, { "epoch": 1.092543754495325, "grad_norm": 1.4686800722460378, "learning_rate": 9.30026771930012e-06, "loss": 0.4857035279273987, "step": 6836 }, { "epoch": 1.092703588268201, "grad_norm": 1.2328053802779315, "learning_rate": 9.297631562572667e-06, "loss": 0.4814143776893616, "step": 6837 }, { "epoch": 1.0928634220410773, "grad_norm": 1.3312355641167675, "learning_rate": 9.29499545489613e-06, "loss": 0.47374725341796875, "step": 6838 }, { "epoch": 1.0930232558139534, "grad_norm": 1.4785401784274863, "learning_rate": 9.292359396454608e-06, "loss": 0.5361547470092773, "step": 6839 }, { "epoch": 1.0931830895868297, "grad_norm": 1.6712265468385317, "learning_rate": 9.289723387432194e-06, "loss": 0.5572801232337952, "step": 6840 }, { "epoch": 1.0933429233597058, "grad_norm": 1.3747612511409935, "learning_rate": 9.287087428012974e-06, "loss": 0.5976380705833435, "step": 6841 }, { "epoch": 1.0935027571325822, "grad_norm": 1.4058119600620407, "learning_rate": 9.28445151838104e-06, "loss": 0.492611289024353, "step": 6842 }, { "epoch": 1.0936625909054583, "grad_norm": 2.332047347307027, "learning_rate": 9.281815658720465e-06, "loss": 0.487851619720459, "step": 6843 }, { "epoch": 1.0938224246783346, "grad_norm": 1.4784213823537184, "learning_rate": 9.279179849215341e-06, "loss": 0.625766932964325, "step": 6844 }, { "epoch": 1.0939822584512107, "grad_norm": 1.4949789936139997, "learning_rate": 9.276544090049733e-06, "loss": 0.5869770050048828, "step": 6845 }, { "epoch": 1.094142092224087, "grad_norm": 1.3607925724370178, "learning_rate": 9.27390838140772e-06, "loss": 0.5977144241333008, "step": 6846 }, { "epoch": 1.094301925996963, "grad_norm": 1.4973751846703742, "learning_rate": 9.271272723473365e-06, "loss": 0.5077997446060181, "step": 6847 }, { "epoch": 1.0944617597698394, "grad_norm": 1.3905468747071013, "learning_rate": 9.268637116430737e-06, "loss": 0.5729416012763977, "step": 6848 }, { "epoch": 1.0946215935427155, "grad_norm": 1.3519542595424376, "learning_rate": 9.266001560463895e-06, "loss": 0.5209523439407349, "step": 6849 }, { "epoch": 1.0947814273155918, "grad_norm": 1.2756710121857644, "learning_rate": 9.2633660557569e-06, "loss": 0.45967888832092285, "step": 6850 }, { "epoch": 1.094941261088468, "grad_norm": 1.5516157721850876, "learning_rate": 9.260730602493803e-06, "loss": 0.8008229732513428, "step": 6851 }, { "epoch": 1.0951010948613442, "grad_norm": 1.369027200470797, "learning_rate": 9.258095200858654e-06, "loss": 0.43503257632255554, "step": 6852 }, { "epoch": 1.0952609286342203, "grad_norm": 1.6452385116675097, "learning_rate": 9.255459851035502e-06, "loss": 0.45490461587905884, "step": 6853 }, { "epoch": 1.0954207624070966, "grad_norm": 1.6135217487803288, "learning_rate": 9.252824553208391e-06, "loss": 0.47209298610687256, "step": 6854 }, { "epoch": 1.095580596179973, "grad_norm": 1.21676438493894, "learning_rate": 9.250189307561361e-06, "loss": 0.37056851387023926, "step": 6855 }, { "epoch": 1.095740429952849, "grad_norm": 1.6391190548474006, "learning_rate": 9.247554114278446e-06, "loss": 0.6424787640571594, "step": 6856 }, { "epoch": 1.0959002637257251, "grad_norm": 1.553737571763185, "learning_rate": 9.244918973543682e-06, "loss": 0.5882565975189209, "step": 6857 }, { "epoch": 1.0960600974986014, "grad_norm": 1.594320468463686, "learning_rate": 9.242283885541096e-06, "loss": 0.597813606262207, "step": 6858 }, { "epoch": 1.0962199312714778, "grad_norm": 1.5557794486318628, "learning_rate": 9.23964885045471e-06, "loss": 0.6285666227340698, "step": 6859 }, { "epoch": 1.0963797650443539, "grad_norm": 1.4539838663978861, "learning_rate": 9.237013868468551e-06, "loss": 0.6264330148696899, "step": 6860 }, { "epoch": 1.0965395988172302, "grad_norm": 1.5426085106541623, "learning_rate": 9.234378939766632e-06, "loss": 0.4866963028907776, "step": 6861 }, { "epoch": 1.0966994325901063, "grad_norm": 1.4991403481446053, "learning_rate": 9.23174406453297e-06, "loss": 0.4726215600967407, "step": 6862 }, { "epoch": 1.0968592663629826, "grad_norm": 1.5637437470995676, "learning_rate": 9.229109242951573e-06, "loss": 0.4960961937904358, "step": 6863 }, { "epoch": 1.0970191001358587, "grad_norm": 1.406092618417641, "learning_rate": 9.22647447520645e-06, "loss": 0.6189308166503906, "step": 6864 }, { "epoch": 1.097178933908735, "grad_norm": 1.2796518036542615, "learning_rate": 9.223839761481606e-06, "loss": 0.42050185799598694, "step": 6865 }, { "epoch": 1.097338767681611, "grad_norm": 1.518720212326589, "learning_rate": 9.221205101961036e-06, "loss": 0.5500880479812622, "step": 6866 }, { "epoch": 1.0974986014544874, "grad_norm": 1.6000248766488765, "learning_rate": 9.218570496828733e-06, "loss": 0.696035623550415, "step": 6867 }, { "epoch": 1.0976584352273635, "grad_norm": 1.4545987107297425, "learning_rate": 9.215935946268697e-06, "loss": 0.6256739497184753, "step": 6868 }, { "epoch": 1.0978182690002398, "grad_norm": 1.4802116377149777, "learning_rate": 9.213301450464906e-06, "loss": 0.3412449359893799, "step": 6869 }, { "epoch": 1.097978102773116, "grad_norm": 1.5340261630665901, "learning_rate": 9.210667009601353e-06, "loss": 0.5507053732872009, "step": 6870 }, { "epoch": 1.0981379365459922, "grad_norm": 1.615652191063521, "learning_rate": 9.208032623862009e-06, "loss": 0.48820507526397705, "step": 6871 }, { "epoch": 1.0982977703188683, "grad_norm": 1.5308115892448446, "learning_rate": 9.205398293430857e-06, "loss": 0.7185014486312866, "step": 6872 }, { "epoch": 1.0984576040917446, "grad_norm": 1.445357294848398, "learning_rate": 9.202764018491867e-06, "loss": 0.3942740857601166, "step": 6873 }, { "epoch": 1.0986174378646207, "grad_norm": 1.4932339446641703, "learning_rate": 9.200129799229004e-06, "loss": 0.559212863445282, "step": 6874 }, { "epoch": 1.098777271637497, "grad_norm": 1.6181525668123633, "learning_rate": 9.19749563582624e-06, "loss": 0.6567010879516602, "step": 6875 }, { "epoch": 1.0989371054103731, "grad_norm": 1.3631872601191088, "learning_rate": 9.194861528467527e-06, "loss": 0.4909363090991974, "step": 6876 }, { "epoch": 1.0990969391832495, "grad_norm": 1.341890493041515, "learning_rate": 9.19222747733683e-06, "loss": 0.5253735780715942, "step": 6877 }, { "epoch": 1.0992567729561256, "grad_norm": 1.9091308075791693, "learning_rate": 9.189593482618098e-06, "loss": 0.5884335041046143, "step": 6878 }, { "epoch": 1.0994166067290019, "grad_norm": 1.5535853296361712, "learning_rate": 9.186959544495275e-06, "loss": 0.4887621998786926, "step": 6879 }, { "epoch": 1.099576440501878, "grad_norm": 2.1228105427157082, "learning_rate": 9.184325663152314e-06, "loss": 0.5608883500099182, "step": 6880 }, { "epoch": 1.0997362742747543, "grad_norm": 1.3477872937918591, "learning_rate": 9.18169183877315e-06, "loss": 0.6144328117370605, "step": 6881 }, { "epoch": 1.0998961080476304, "grad_norm": 1.780036067612354, "learning_rate": 9.179058071541726e-06, "loss": 0.6567690372467041, "step": 6882 }, { "epoch": 1.1000559418205067, "grad_norm": 1.2981039772385543, "learning_rate": 9.176424361641965e-06, "loss": 0.39897626638412476, "step": 6883 }, { "epoch": 1.1002157755933828, "grad_norm": 1.4417371346878223, "learning_rate": 9.173790709257807e-06, "loss": 0.5257717370986938, "step": 6884 }, { "epoch": 1.100375609366259, "grad_norm": 1.4108758374477337, "learning_rate": 9.171157114573172e-06, "loss": 0.48786017298698425, "step": 6885 }, { "epoch": 1.1005354431391352, "grad_norm": 1.536849778407506, "learning_rate": 9.16852357777198e-06, "loss": 0.6449027061462402, "step": 6886 }, { "epoch": 1.1006952769120115, "grad_norm": 1.4653763773930732, "learning_rate": 9.165890099038149e-06, "loss": 0.5330281257629395, "step": 6887 }, { "epoch": 1.1008551106848876, "grad_norm": 1.5919671328232035, "learning_rate": 9.163256678555594e-06, "loss": 0.528901994228363, "step": 6888 }, { "epoch": 1.101014944457764, "grad_norm": 1.776630499872249, "learning_rate": 9.160623316508218e-06, "loss": 0.6347792148590088, "step": 6889 }, { "epoch": 1.1011747782306402, "grad_norm": 1.4432559425397566, "learning_rate": 9.157990013079932e-06, "loss": 0.550991952419281, "step": 6890 }, { "epoch": 1.1013346120035163, "grad_norm": 1.3801486657635786, "learning_rate": 9.155356768454633e-06, "loss": 0.4947432279586792, "step": 6891 }, { "epoch": 1.1014944457763924, "grad_norm": 1.3588172595409715, "learning_rate": 9.152723582816218e-06, "loss": 0.6476958990097046, "step": 6892 }, { "epoch": 1.1016542795492688, "grad_norm": 1.5849697129060685, "learning_rate": 9.15009045634858e-06, "loss": 0.5409548878669739, "step": 6893 }, { "epoch": 1.101814113322145, "grad_norm": 1.417783321166952, "learning_rate": 9.147457389235606e-06, "loss": 0.6802971959114075, "step": 6894 }, { "epoch": 1.1019739470950212, "grad_norm": 1.3283874735984158, "learning_rate": 9.14482438166118e-06, "loss": 0.44448721408843994, "step": 6895 }, { "epoch": 1.1021337808678975, "grad_norm": 1.6618548260323767, "learning_rate": 9.142191433809183e-06, "loss": 0.6144168376922607, "step": 6896 }, { "epoch": 1.1022936146407736, "grad_norm": 1.7457633566081898, "learning_rate": 9.139558545863493e-06, "loss": 0.6007346510887146, "step": 6897 }, { "epoch": 1.10245344841365, "grad_norm": 1.503566863024754, "learning_rate": 9.136925718007975e-06, "loss": 0.500857949256897, "step": 6898 }, { "epoch": 1.102613282186526, "grad_norm": 1.3558734339148242, "learning_rate": 9.134292950426503e-06, "loss": 0.48347997665405273, "step": 6899 }, { "epoch": 1.1027731159594023, "grad_norm": 1.6720421088579598, "learning_rate": 9.131660243302938e-06, "loss": 0.5095036029815674, "step": 6900 }, { "epoch": 1.1029329497322784, "grad_norm": 1.3879948518979937, "learning_rate": 9.129027596821137e-06, "loss": 0.6061928272247314, "step": 6901 }, { "epoch": 1.1030927835051547, "grad_norm": 1.7937174869209618, "learning_rate": 9.126395011164956e-06, "loss": 0.6651574373245239, "step": 6902 }, { "epoch": 1.1032526172780308, "grad_norm": 1.827621162076628, "learning_rate": 9.123762486518242e-06, "loss": 0.7357287406921387, "step": 6903 }, { "epoch": 1.1034124510509071, "grad_norm": 1.4736046819382824, "learning_rate": 9.121130023064846e-06, "loss": 0.6535474061965942, "step": 6904 }, { "epoch": 1.1035722848237832, "grad_norm": 1.3749760780657903, "learning_rate": 9.118497620988611e-06, "loss": 0.49839961528778076, "step": 6905 }, { "epoch": 1.1037321185966595, "grad_norm": 1.15583359260475, "learning_rate": 9.11586528047337e-06, "loss": 0.4807654619216919, "step": 6906 }, { "epoch": 1.1038919523695356, "grad_norm": 1.3412481125157343, "learning_rate": 9.113233001702963e-06, "loss": 0.4365371763706207, "step": 6907 }, { "epoch": 1.104051786142412, "grad_norm": 1.4686901658699771, "learning_rate": 9.110600784861214e-06, "loss": 0.5175493955612183, "step": 6908 }, { "epoch": 1.104211619915288, "grad_norm": 1.5082720844017898, "learning_rate": 9.107968630131944e-06, "loss": 0.5672093033790588, "step": 6909 }, { "epoch": 1.1043714536881644, "grad_norm": 1.4208558014239814, "learning_rate": 9.10533653769898e-06, "loss": 0.6163899898529053, "step": 6910 }, { "epoch": 1.1045312874610405, "grad_norm": 1.7880939839815229, "learning_rate": 9.102704507746134e-06, "loss": 0.572303056716919, "step": 6911 }, { "epoch": 1.1046911212339168, "grad_norm": 1.4868923805962029, "learning_rate": 9.10007254045722e-06, "loss": 0.5085476636886597, "step": 6912 }, { "epoch": 1.1048509550067929, "grad_norm": 1.3406279062724729, "learning_rate": 9.097440636016044e-06, "loss": 0.7091158628463745, "step": 6913 }, { "epoch": 1.1050107887796692, "grad_norm": 1.3425461925794917, "learning_rate": 9.09480879460641e-06, "loss": 0.5646786689758301, "step": 6914 }, { "epoch": 1.1051706225525453, "grad_norm": 1.3776533916482847, "learning_rate": 9.092177016412117e-06, "loss": 0.5831238627433777, "step": 6915 }, { "epoch": 1.1053304563254216, "grad_norm": 1.6363000564692733, "learning_rate": 9.089545301616953e-06, "loss": 0.6696015000343323, "step": 6916 }, { "epoch": 1.1054902900982977, "grad_norm": 1.4916318082422697, "learning_rate": 9.086913650404716e-06, "loss": 0.6203591823577881, "step": 6917 }, { "epoch": 1.105650123871174, "grad_norm": 1.4739736754465946, "learning_rate": 9.084282062959186e-06, "loss": 0.5545026063919067, "step": 6918 }, { "epoch": 1.10580995764405, "grad_norm": 1.740147466729981, "learning_rate": 9.081650539464147e-06, "loss": 0.6996351480484009, "step": 6919 }, { "epoch": 1.1059697914169264, "grad_norm": 1.2672581079758867, "learning_rate": 9.079019080103371e-06, "loss": 0.4492906928062439, "step": 6920 }, { "epoch": 1.1061296251898025, "grad_norm": 1.2968413589585313, "learning_rate": 9.076387685060635e-06, "loss": 0.5402016639709473, "step": 6921 }, { "epoch": 1.1062894589626788, "grad_norm": 1.314311587706899, "learning_rate": 9.073756354519703e-06, "loss": 0.5714753866195679, "step": 6922 }, { "epoch": 1.106449292735555, "grad_norm": 1.4798765751254859, "learning_rate": 9.071125088664335e-06, "loss": 0.6150268316268921, "step": 6923 }, { "epoch": 1.1066091265084312, "grad_norm": 1.3956693446609774, "learning_rate": 9.068493887678293e-06, "loss": 0.6424398422241211, "step": 6924 }, { "epoch": 1.1067689602813076, "grad_norm": 1.180650180350561, "learning_rate": 9.065862751745334e-06, "loss": 0.45503923296928406, "step": 6925 }, { "epoch": 1.1069287940541837, "grad_norm": 1.2254356638347432, "learning_rate": 9.0632316810492e-06, "loss": 0.5186116695404053, "step": 6926 }, { "epoch": 1.1070886278270597, "grad_norm": 1.4877559758474408, "learning_rate": 9.060600675773644e-06, "loss": 0.5710767507553101, "step": 6927 }, { "epoch": 1.107248461599936, "grad_norm": 1.4423512129493121, "learning_rate": 9.057969736102396e-06, "loss": 0.520049512386322, "step": 6928 }, { "epoch": 1.1074082953728124, "grad_norm": 1.4329958471204798, "learning_rate": 9.0553388622192e-06, "loss": 0.553437352180481, "step": 6929 }, { "epoch": 1.1075681291456885, "grad_norm": 1.4644828222691872, "learning_rate": 9.052708054307784e-06, "loss": 0.6673924922943115, "step": 6930 }, { "epoch": 1.1077279629185648, "grad_norm": 1.3566031036802884, "learning_rate": 9.05007731255187e-06, "loss": 0.5695430040359497, "step": 6931 }, { "epoch": 1.1078877966914409, "grad_norm": 1.547650549942775, "learning_rate": 9.047446637135188e-06, "loss": 0.6778731942176819, "step": 6932 }, { "epoch": 1.1080476304643172, "grad_norm": 1.43396627199047, "learning_rate": 9.044816028241448e-06, "loss": 0.4013252854347229, "step": 6933 }, { "epoch": 1.1082074642371933, "grad_norm": 1.545171727055924, "learning_rate": 9.042185486054366e-06, "loss": 0.6124609708786011, "step": 6934 }, { "epoch": 1.1083672980100696, "grad_norm": 1.7693503695935926, "learning_rate": 9.039555010757646e-06, "loss": 0.516233503818512, "step": 6935 }, { "epoch": 1.1085271317829457, "grad_norm": 1.3218278843885154, "learning_rate": 9.036924602534999e-06, "loss": 0.44741660356521606, "step": 6936 }, { "epoch": 1.108686965555822, "grad_norm": 1.5447705484438379, "learning_rate": 9.034294261570116e-06, "loss": 0.5198692679405212, "step": 6937 }, { "epoch": 1.1088467993286981, "grad_norm": 1.1627345628690176, "learning_rate": 9.03166398804669e-06, "loss": 0.573670506477356, "step": 6938 }, { "epoch": 1.1090066331015744, "grad_norm": 1.5264770616686782, "learning_rate": 9.029033782148416e-06, "loss": 0.662861704826355, "step": 6939 }, { "epoch": 1.1091664668744505, "grad_norm": 1.625116529514325, "learning_rate": 9.026403644058973e-06, "loss": 0.5766681432723999, "step": 6940 }, { "epoch": 1.1093263006473268, "grad_norm": 1.5947456392683723, "learning_rate": 9.023773573962044e-06, "loss": 0.7557865381240845, "step": 6941 }, { "epoch": 1.109486134420203, "grad_norm": 1.485675632028536, "learning_rate": 9.0211435720413e-06, "loss": 0.5887001752853394, "step": 6942 }, { "epoch": 1.1096459681930793, "grad_norm": 1.494470222386609, "learning_rate": 9.018513638480412e-06, "loss": 0.5836377739906311, "step": 6943 }, { "epoch": 1.1098058019659554, "grad_norm": 1.4246791601005035, "learning_rate": 9.01588377346305e-06, "loss": 0.5095988512039185, "step": 6944 }, { "epoch": 1.1099656357388317, "grad_norm": 1.6493964791077955, "learning_rate": 9.01325397717287e-06, "loss": 0.6392241716384888, "step": 6945 }, { "epoch": 1.1101254695117078, "grad_norm": 1.4535116352122037, "learning_rate": 9.010624249793528e-06, "loss": 0.5999190211296082, "step": 6946 }, { "epoch": 1.110285303284584, "grad_norm": 1.4478051420920737, "learning_rate": 9.007994591508677e-06, "loss": 0.48718053102493286, "step": 6947 }, { "epoch": 1.1104451370574602, "grad_norm": 1.577114984508699, "learning_rate": 9.005365002501959e-06, "loss": 0.7238324880599976, "step": 6948 }, { "epoch": 1.1106049708303365, "grad_norm": 1.659429626613193, "learning_rate": 9.002735482957021e-06, "loss": 0.6259098649024963, "step": 6949 }, { "epoch": 1.1107648046032126, "grad_norm": 1.564821466037596, "learning_rate": 9.000106033057492e-06, "loss": 0.5354912281036377, "step": 6950 }, { "epoch": 1.110924638376089, "grad_norm": 1.483751917186506, "learning_rate": 8.997476652987009e-06, "loss": 0.5092486143112183, "step": 6951 }, { "epoch": 1.111084472148965, "grad_norm": 1.4388789956630847, "learning_rate": 8.994847342929198e-06, "loss": 0.5846987366676331, "step": 6952 }, { "epoch": 1.1112443059218413, "grad_norm": 1.5252027955333494, "learning_rate": 8.992218103067676e-06, "loss": 0.6440713405609131, "step": 6953 }, { "epoch": 1.1114041396947174, "grad_norm": 1.5610163148108167, "learning_rate": 8.989588933586067e-06, "loss": 0.4951455593109131, "step": 6954 }, { "epoch": 1.1115639734675937, "grad_norm": 1.3273141752039357, "learning_rate": 8.986959834667977e-06, "loss": 0.4995042681694031, "step": 6955 }, { "epoch": 1.1117238072404698, "grad_norm": 1.4233304145303032, "learning_rate": 8.984330806497017e-06, "loss": 0.6744459867477417, "step": 6956 }, { "epoch": 1.1118836410133461, "grad_norm": 1.5298273461936827, "learning_rate": 8.981701849256787e-06, "loss": 0.5761762857437134, "step": 6957 }, { "epoch": 1.1120434747862222, "grad_norm": 1.4617130564865677, "learning_rate": 8.979072963130881e-06, "loss": 0.6261324882507324, "step": 6958 }, { "epoch": 1.1122033085590985, "grad_norm": 1.5793949040626163, "learning_rate": 8.976444148302897e-06, "loss": 0.5813355445861816, "step": 6959 }, { "epoch": 1.1123631423319749, "grad_norm": 1.6721400006152798, "learning_rate": 8.973815404956418e-06, "loss": 0.5943373441696167, "step": 6960 }, { "epoch": 1.112522976104851, "grad_norm": 1.351436955411808, "learning_rate": 8.97118673327503e-06, "loss": 0.5429818034172058, "step": 6961 }, { "epoch": 1.112682809877727, "grad_norm": 1.4989815971054217, "learning_rate": 8.968558133442303e-06, "loss": 0.5703737735748291, "step": 6962 }, { "epoch": 1.1128426436506034, "grad_norm": 1.387490102760597, "learning_rate": 8.965929605641813e-06, "loss": 0.6207147836685181, "step": 6963 }, { "epoch": 1.1130024774234797, "grad_norm": 1.4750948034146127, "learning_rate": 8.96330115005713e-06, "loss": 0.46202552318573, "step": 6964 }, { "epoch": 1.1131623111963558, "grad_norm": 1.2742037172475693, "learning_rate": 8.960672766871816e-06, "loss": 0.4252091348171234, "step": 6965 }, { "epoch": 1.113322144969232, "grad_norm": 1.4775888228911183, "learning_rate": 8.958044456269422e-06, "loss": 0.522503137588501, "step": 6966 }, { "epoch": 1.1134819787421082, "grad_norm": 1.5386214529184605, "learning_rate": 8.955416218433506e-06, "loss": 0.5899875164031982, "step": 6967 }, { "epoch": 1.1136418125149845, "grad_norm": 1.2696581694556206, "learning_rate": 8.952788053547611e-06, "loss": 0.5623082518577576, "step": 6968 }, { "epoch": 1.1138016462878606, "grad_norm": 1.6854653274123848, "learning_rate": 8.950159961795282e-06, "loss": 0.5231536626815796, "step": 6969 }, { "epoch": 1.113961480060737, "grad_norm": 1.4186677364640885, "learning_rate": 8.94753194336005e-06, "loss": 0.5339443683624268, "step": 6970 }, { "epoch": 1.114121313833613, "grad_norm": 1.39334332519116, "learning_rate": 8.944903998425453e-06, "loss": 0.4861244559288025, "step": 6971 }, { "epoch": 1.1142811476064893, "grad_norm": 1.5301338575977228, "learning_rate": 8.942276127175014e-06, "loss": 0.5802389979362488, "step": 6972 }, { "epoch": 1.1144409813793654, "grad_norm": 1.7322497155192105, "learning_rate": 8.939648329792252e-06, "loss": 0.6279224157333374, "step": 6973 }, { "epoch": 1.1146008151522417, "grad_norm": 1.6165395717217785, "learning_rate": 8.937020606460688e-06, "loss": 0.6686193943023682, "step": 6974 }, { "epoch": 1.1147606489251178, "grad_norm": 1.2556308994060124, "learning_rate": 8.93439295736383e-06, "loss": 0.4897461235523224, "step": 6975 }, { "epoch": 1.1149204826979942, "grad_norm": 1.4736197407747127, "learning_rate": 8.931765382685185e-06, "loss": 0.6115464568138123, "step": 6976 }, { "epoch": 1.1150803164708702, "grad_norm": 1.5388775237459755, "learning_rate": 8.92913788260825e-06, "loss": 0.5953955054283142, "step": 6977 }, { "epoch": 1.1152401502437466, "grad_norm": 1.3521443602271486, "learning_rate": 8.926510457316524e-06, "loss": 0.5423276424407959, "step": 6978 }, { "epoch": 1.1153999840166227, "grad_norm": 1.4196225809097855, "learning_rate": 8.923883106993498e-06, "loss": 0.6242902278900146, "step": 6979 }, { "epoch": 1.115559817789499, "grad_norm": 1.663204921181708, "learning_rate": 8.921255831822652e-06, "loss": 0.7315624952316284, "step": 6980 }, { "epoch": 1.115719651562375, "grad_norm": 1.3642224556637157, "learning_rate": 8.918628631987472e-06, "loss": 0.5195304155349731, "step": 6981 }, { "epoch": 1.1158794853352514, "grad_norm": 1.4782937390377693, "learning_rate": 8.916001507671425e-06, "loss": 0.5571188926696777, "step": 6982 }, { "epoch": 1.1160393191081275, "grad_norm": 1.6178491561683728, "learning_rate": 8.913374459057982e-06, "loss": 0.5473554730415344, "step": 6983 }, { "epoch": 1.1161991528810038, "grad_norm": 1.4069383041616026, "learning_rate": 8.910747486330612e-06, "loss": 0.5064950585365295, "step": 6984 }, { "epoch": 1.11635898665388, "grad_norm": 1.4147077665243282, "learning_rate": 8.908120589672767e-06, "loss": 0.4217233955860138, "step": 6985 }, { "epoch": 1.1165188204267562, "grad_norm": 1.3745319759884929, "learning_rate": 8.905493769267907e-06, "loss": 0.511336624622345, "step": 6986 }, { "epoch": 1.1166786541996323, "grad_norm": 1.5897474145340684, "learning_rate": 8.902867025299475e-06, "loss": 0.48383161425590515, "step": 6987 }, { "epoch": 1.1168384879725086, "grad_norm": 1.5541754427501746, "learning_rate": 8.900240357950912e-06, "loss": 0.5428926348686218, "step": 6988 }, { "epoch": 1.1169983217453847, "grad_norm": 1.5221408437476558, "learning_rate": 8.89761376740566e-06, "loss": 0.7189246416091919, "step": 6989 }, { "epoch": 1.117158155518261, "grad_norm": 1.5942604218583079, "learning_rate": 8.894987253847146e-06, "loss": 0.5123748183250427, "step": 6990 }, { "epoch": 1.1173179892911371, "grad_norm": 1.2880836971033098, "learning_rate": 8.8923608174588e-06, "loss": 0.48558348417282104, "step": 6991 }, { "epoch": 1.1174778230640134, "grad_norm": 1.2841493044848047, "learning_rate": 8.88973445842404e-06, "loss": 0.4955028295516968, "step": 6992 }, { "epoch": 1.1176376568368895, "grad_norm": 1.4371150412866516, "learning_rate": 8.887108176926286e-06, "loss": 0.44654935598373413, "step": 6993 }, { "epoch": 1.1177974906097659, "grad_norm": 1.442957487205621, "learning_rate": 8.884481973148944e-06, "loss": 0.6078547239303589, "step": 6994 }, { "epoch": 1.117957324382642, "grad_norm": 1.5996974512207454, "learning_rate": 8.881855847275418e-06, "loss": 0.6370154023170471, "step": 6995 }, { "epoch": 1.1181171581555183, "grad_norm": 1.3826333164905937, "learning_rate": 8.879229799489112e-06, "loss": 0.6529675722122192, "step": 6996 }, { "epoch": 1.1182769919283944, "grad_norm": 1.21610363639413, "learning_rate": 8.876603829973416e-06, "loss": 0.4612423777580261, "step": 6997 }, { "epoch": 1.1184368257012707, "grad_norm": 1.3921129566001016, "learning_rate": 8.87397793891172e-06, "loss": 0.5190247893333435, "step": 6998 }, { "epoch": 1.118596659474147, "grad_norm": 1.3758608198413356, "learning_rate": 8.871352126487406e-06, "loss": 0.5437788963317871, "step": 6999 }, { "epoch": 1.118756493247023, "grad_norm": 1.5643232664467843, "learning_rate": 8.868726392883852e-06, "loss": 0.546127200126648, "step": 7000 }, { "epoch": 1.1189163270198994, "grad_norm": 1.5118002176318335, "learning_rate": 8.866100738284432e-06, "loss": 0.6654967665672302, "step": 7001 }, { "epoch": 1.1190761607927755, "grad_norm": 1.4563900198823498, "learning_rate": 8.863475162872507e-06, "loss": 0.5545699596405029, "step": 7002 }, { "epoch": 1.1192359945656518, "grad_norm": 1.4118296435503306, "learning_rate": 8.860849666831439e-06, "loss": 0.5185892581939697, "step": 7003 }, { "epoch": 1.119395828338528, "grad_norm": 1.70215247225354, "learning_rate": 8.858224250344589e-06, "loss": 0.546446681022644, "step": 7004 }, { "epoch": 1.1195556621114042, "grad_norm": 1.3710820610837677, "learning_rate": 8.855598913595302e-06, "loss": 0.4595677852630615, "step": 7005 }, { "epoch": 1.1197154958842803, "grad_norm": 1.5769664075824914, "learning_rate": 8.852973656766926e-06, "loss": 0.5081753134727478, "step": 7006 }, { "epoch": 1.1198753296571566, "grad_norm": 1.4411214603382765, "learning_rate": 8.850348480042794e-06, "loss": 0.6036607027053833, "step": 7007 }, { "epoch": 1.1200351634300327, "grad_norm": 1.499743628658379, "learning_rate": 8.847723383606244e-06, "loss": 0.5278029441833496, "step": 7008 }, { "epoch": 1.120194997202909, "grad_norm": 1.4186671258205252, "learning_rate": 8.845098367640602e-06, "loss": 0.5607222318649292, "step": 7009 }, { "epoch": 1.1203548309757851, "grad_norm": 1.241302762498644, "learning_rate": 8.842473432329187e-06, "loss": 0.48081833124160767, "step": 7010 }, { "epoch": 1.1205146647486615, "grad_norm": 1.7926023896462833, "learning_rate": 8.83984857785532e-06, "loss": 0.6354970335960388, "step": 7011 }, { "epoch": 1.1206744985215376, "grad_norm": 1.3597985263722174, "learning_rate": 8.837223804402306e-06, "loss": 0.5781649947166443, "step": 7012 }, { "epoch": 1.1208343322944139, "grad_norm": 1.4224957822631932, "learning_rate": 8.834599112153456e-06, "loss": 0.47619444131851196, "step": 7013 }, { "epoch": 1.12099416606729, "grad_norm": 1.2762704670722211, "learning_rate": 8.831974501292064e-06, "loss": 0.5842509269714355, "step": 7014 }, { "epoch": 1.1211539998401663, "grad_norm": 1.6944678146704812, "learning_rate": 8.829349972001425e-06, "loss": 0.6447188854217529, "step": 7015 }, { "epoch": 1.1213138336130424, "grad_norm": 1.4333225742261309, "learning_rate": 8.826725524464828e-06, "loss": 0.565058708190918, "step": 7016 }, { "epoch": 1.1214736673859187, "grad_norm": 1.7354945887986175, "learning_rate": 8.824101158865553e-06, "loss": 0.6474004983901978, "step": 7017 }, { "epoch": 1.1216335011587948, "grad_norm": 1.4863773538261045, "learning_rate": 8.82147687538688e-06, "loss": 0.6298815011978149, "step": 7018 }, { "epoch": 1.121793334931671, "grad_norm": 1.6441399788970754, "learning_rate": 8.818852674212075e-06, "loss": 0.45134034752845764, "step": 7019 }, { "epoch": 1.1219531687045472, "grad_norm": 1.5317819840211173, "learning_rate": 8.816228555524409e-06, "loss": 0.4784253239631653, "step": 7020 }, { "epoch": 1.1221130024774235, "grad_norm": 7.2863260869170405, "learning_rate": 8.813604519507135e-06, "loss": 0.5127506256103516, "step": 7021 }, { "epoch": 1.1222728362502996, "grad_norm": 1.274279233854751, "learning_rate": 8.810980566343507e-06, "loss": 0.5192617774009705, "step": 7022 }, { "epoch": 1.122432670023176, "grad_norm": 1.9055199197719508, "learning_rate": 8.808356696216773e-06, "loss": 0.700403094291687, "step": 7023 }, { "epoch": 1.122592503796052, "grad_norm": 1.5327769534814109, "learning_rate": 8.805732909310181e-06, "loss": 0.649676501750946, "step": 7024 }, { "epoch": 1.1227523375689283, "grad_norm": 1.4475313282061615, "learning_rate": 8.803109205806958e-06, "loss": 0.5888369083404541, "step": 7025 }, { "epoch": 1.1229121713418044, "grad_norm": 1.3243656618943882, "learning_rate": 8.800485585890342e-06, "loss": 0.47365623712539673, "step": 7026 }, { "epoch": 1.1230720051146807, "grad_norm": 1.4459238449016216, "learning_rate": 8.79786204974355e-06, "loss": 0.4832538962364197, "step": 7027 }, { "epoch": 1.1232318388875568, "grad_norm": 1.4874904578399313, "learning_rate": 8.795238597549807e-06, "loss": 0.4793521761894226, "step": 7028 }, { "epoch": 1.1233916726604332, "grad_norm": 1.6985634278016013, "learning_rate": 8.792615229492322e-06, "loss": 0.6534872055053711, "step": 7029 }, { "epoch": 1.1235515064333093, "grad_norm": 1.4528821229513982, "learning_rate": 8.789991945754301e-06, "loss": 0.6402232050895691, "step": 7030 }, { "epoch": 1.1237113402061856, "grad_norm": 1.7777156383017745, "learning_rate": 8.787368746518946e-06, "loss": 0.7381212115287781, "step": 7031 }, { "epoch": 1.1238711739790617, "grad_norm": 1.5110354109496318, "learning_rate": 8.784745631969453e-06, "loss": 0.5516455173492432, "step": 7032 }, { "epoch": 1.124031007751938, "grad_norm": 1.6757625351430787, "learning_rate": 8.78212260228901e-06, "loss": 0.6699756383895874, "step": 7033 }, { "epoch": 1.1241908415248143, "grad_norm": 1.5566956762518511, "learning_rate": 8.779499657660799e-06, "loss": 0.5160881280899048, "step": 7034 }, { "epoch": 1.1243506752976904, "grad_norm": 1.205040292241996, "learning_rate": 8.776876798268e-06, "loss": 0.5283499956130981, "step": 7035 }, { "epoch": 1.1245105090705667, "grad_norm": 1.423173600785991, "learning_rate": 8.774254024293782e-06, "loss": 0.6394257545471191, "step": 7036 }, { "epoch": 1.1246703428434428, "grad_norm": 1.612393524675709, "learning_rate": 8.771631335921309e-06, "loss": 0.5467624664306641, "step": 7037 }, { "epoch": 1.1248301766163191, "grad_norm": 1.5057193527519257, "learning_rate": 8.769008733333743e-06, "loss": 0.5819821357727051, "step": 7038 }, { "epoch": 1.1249900103891952, "grad_norm": 1.3626652569444258, "learning_rate": 8.766386216714235e-06, "loss": 0.4882606565952301, "step": 7039 }, { "epoch": 1.1251498441620715, "grad_norm": 1.236596311381482, "learning_rate": 8.763763786245936e-06, "loss": 0.5903192162513733, "step": 7040 }, { "epoch": 1.1253096779349476, "grad_norm": 1.6681842805615392, "learning_rate": 8.761141442111981e-06, "loss": 0.5555704832077026, "step": 7041 }, { "epoch": 1.125469511707824, "grad_norm": 1.3921095048072545, "learning_rate": 8.758519184495512e-06, "loss": 0.5514618158340454, "step": 7042 }, { "epoch": 1.1256293454807, "grad_norm": 1.4066750097238074, "learning_rate": 8.755897013579651e-06, "loss": 0.5239886045455933, "step": 7043 }, { "epoch": 1.1257891792535764, "grad_norm": 1.3051363124050839, "learning_rate": 8.753274929547528e-06, "loss": 0.5497645139694214, "step": 7044 }, { "epoch": 1.1259490130264524, "grad_norm": 2.0967717509783026, "learning_rate": 8.750652932582255e-06, "loss": 0.46198850870132446, "step": 7045 }, { "epoch": 1.1261088467993288, "grad_norm": 1.4188308520255164, "learning_rate": 8.748031022866947e-06, "loss": 0.49518609046936035, "step": 7046 }, { "epoch": 1.1262686805722049, "grad_norm": 1.3675122174107772, "learning_rate": 8.745409200584707e-06, "loss": 0.5291221737861633, "step": 7047 }, { "epoch": 1.1264285143450812, "grad_norm": 1.4308343645399837, "learning_rate": 8.742787465918634e-06, "loss": 0.4711887836456299, "step": 7048 }, { "epoch": 1.1265883481179573, "grad_norm": 1.6560000868802478, "learning_rate": 8.740165819051818e-06, "loss": 0.5233123898506165, "step": 7049 }, { "epoch": 1.1267481818908336, "grad_norm": 1.5096717856931596, "learning_rate": 8.737544260167352e-06, "loss": 0.6356722116470337, "step": 7050 }, { "epoch": 1.1269080156637097, "grad_norm": 1.3465289900085335, "learning_rate": 8.73492278944831e-06, "loss": 0.43920570611953735, "step": 7051 }, { "epoch": 1.127067849436586, "grad_norm": 1.4660957255570926, "learning_rate": 8.732301407077767e-06, "loss": 0.513185977935791, "step": 7052 }, { "epoch": 1.127227683209462, "grad_norm": 1.530738575047237, "learning_rate": 8.729680113238794e-06, "loss": 0.4445130228996277, "step": 7053 }, { "epoch": 1.1273875169823384, "grad_norm": 1.4080299275877146, "learning_rate": 8.72705890811445e-06, "loss": 0.6024655699729919, "step": 7054 }, { "epoch": 1.1275473507552145, "grad_norm": 1.8099855277186523, "learning_rate": 8.724437791887794e-06, "loss": 0.5569756031036377, "step": 7055 }, { "epoch": 1.1277071845280908, "grad_norm": 1.5788402872090581, "learning_rate": 8.72181676474187e-06, "loss": 0.713215708732605, "step": 7056 }, { "epoch": 1.127867018300967, "grad_norm": 1.5560990409793722, "learning_rate": 8.719195826859728e-06, "loss": 0.5277035236358643, "step": 7057 }, { "epoch": 1.1280268520738432, "grad_norm": 2.203246633207685, "learning_rate": 8.716574978424403e-06, "loss": 0.5709280967712402, "step": 7058 }, { "epoch": 1.1281866858467193, "grad_norm": 1.441638992900086, "learning_rate": 8.71395421961892e-06, "loss": 0.5887954235076904, "step": 7059 }, { "epoch": 1.1283465196195956, "grad_norm": 1.8705738625528756, "learning_rate": 8.711333550626311e-06, "loss": 0.6298611164093018, "step": 7060 }, { "epoch": 1.1285063533924717, "grad_norm": 1.3582274615630443, "learning_rate": 8.708712971629588e-06, "loss": 0.5358578562736511, "step": 7061 }, { "epoch": 1.128666187165348, "grad_norm": 1.544013371090376, "learning_rate": 8.70609248281177e-06, "loss": 0.6869460344314575, "step": 7062 }, { "epoch": 1.1288260209382242, "grad_norm": 1.359071384426067, "learning_rate": 8.703472084355853e-06, "loss": 0.5943823456764221, "step": 7063 }, { "epoch": 1.1289858547111005, "grad_norm": 1.4233713119317188, "learning_rate": 8.700851776444843e-06, "loss": 0.5300551056861877, "step": 7064 }, { "epoch": 1.1291456884839768, "grad_norm": 1.4990388943959985, "learning_rate": 8.698231559261736e-06, "loss": 0.6596075296401978, "step": 7065 }, { "epoch": 1.1293055222568529, "grad_norm": 1.5793909905478942, "learning_rate": 8.695611432989514e-06, "loss": 0.5996339321136475, "step": 7066 }, { "epoch": 1.129465356029729, "grad_norm": 1.7307543089989788, "learning_rate": 8.692991397811157e-06, "loss": 0.582058846950531, "step": 7067 }, { "epoch": 1.1296251898026053, "grad_norm": 1.4913737318077005, "learning_rate": 8.69037145390964e-06, "loss": 0.46390974521636963, "step": 7068 }, { "epoch": 1.1297850235754816, "grad_norm": 1.7063290739501904, "learning_rate": 8.687751601467929e-06, "loss": 0.5926409959793091, "step": 7069 }, { "epoch": 1.1299448573483577, "grad_norm": 1.4247957576068873, "learning_rate": 8.68513184066899e-06, "loss": 0.611376166343689, "step": 7070 }, { "epoch": 1.1301046911212338, "grad_norm": 1.4904749161875515, "learning_rate": 8.68251217169577e-06, "loss": 0.47795045375823975, "step": 7071 }, { "epoch": 1.1302645248941101, "grad_norm": 1.3801271182473795, "learning_rate": 8.679892594731225e-06, "loss": 0.5762813687324524, "step": 7072 }, { "epoch": 1.1304243586669864, "grad_norm": 1.685176034211136, "learning_rate": 8.677273109958295e-06, "loss": 0.5206659436225891, "step": 7073 }, { "epoch": 1.1305841924398625, "grad_norm": 1.583073892785731, "learning_rate": 8.67465371755991e-06, "loss": 0.6556714177131653, "step": 7074 }, { "epoch": 1.1307440262127388, "grad_norm": 1.382850415018363, "learning_rate": 8.672034417719008e-06, "loss": 0.5874379873275757, "step": 7075 }, { "epoch": 1.130903859985615, "grad_norm": 1.6807276334694876, "learning_rate": 8.669415210618503e-06, "loss": 0.6575337648391724, "step": 7076 }, { "epoch": 1.1310636937584913, "grad_norm": 1.5919273294091185, "learning_rate": 8.666796096441319e-06, "loss": 0.4611610770225525, "step": 7077 }, { "epoch": 1.1312235275313673, "grad_norm": 1.6403502447747227, "learning_rate": 8.66417707537036e-06, "loss": 0.6903669834136963, "step": 7078 }, { "epoch": 1.1313833613042437, "grad_norm": 1.4862310973586825, "learning_rate": 8.66155814758853e-06, "loss": 0.5606266260147095, "step": 7079 }, { "epoch": 1.1315431950771198, "grad_norm": 1.8019085404145432, "learning_rate": 8.658939313278728e-06, "loss": 0.6456751227378845, "step": 7080 }, { "epoch": 1.131703028849996, "grad_norm": 1.4421701656107704, "learning_rate": 8.656320572623839e-06, "loss": 0.5736662745475769, "step": 7081 }, { "epoch": 1.1318628626228722, "grad_norm": 1.4235465647622056, "learning_rate": 8.653701925806755e-06, "loss": 0.529071569442749, "step": 7082 }, { "epoch": 1.1320226963957485, "grad_norm": 1.5367617523319974, "learning_rate": 8.651083373010345e-06, "loss": 0.576985776424408, "step": 7083 }, { "epoch": 1.1321825301686246, "grad_norm": 1.464735585362549, "learning_rate": 8.64846491441748e-06, "loss": 0.6292473673820496, "step": 7084 }, { "epoch": 1.132342363941501, "grad_norm": 1.5162302852240883, "learning_rate": 8.64584655021103e-06, "loss": 0.5455754995346069, "step": 7085 }, { "epoch": 1.132502197714377, "grad_norm": 1.548778530675591, "learning_rate": 8.64322828057385e-06, "loss": 0.5510830879211426, "step": 7086 }, { "epoch": 1.1326620314872533, "grad_norm": 1.4332994492058169, "learning_rate": 8.640610105688787e-06, "loss": 0.5452100038528442, "step": 7087 }, { "epoch": 1.1328218652601294, "grad_norm": 1.4209471341586841, "learning_rate": 8.63799202573869e-06, "loss": 0.5394142866134644, "step": 7088 }, { "epoch": 1.1329816990330057, "grad_norm": 1.563696622756284, "learning_rate": 8.63537404090639e-06, "loss": 0.47055232524871826, "step": 7089 }, { "epoch": 1.1331415328058818, "grad_norm": 1.394368432375396, "learning_rate": 8.632756151374724e-06, "loss": 0.6267711520195007, "step": 7090 }, { "epoch": 1.1333013665787581, "grad_norm": 1.4024177614759838, "learning_rate": 8.630138357326512e-06, "loss": 0.5592917203903198, "step": 7091 }, { "epoch": 1.1334612003516342, "grad_norm": 1.167081342048999, "learning_rate": 8.627520658944575e-06, "loss": 0.5186982154846191, "step": 7092 }, { "epoch": 1.1336210341245105, "grad_norm": 1.3665413619602262, "learning_rate": 8.624903056411723e-06, "loss": 0.604210615158081, "step": 7093 }, { "epoch": 1.1337808678973866, "grad_norm": 1.5033757765150166, "learning_rate": 8.622285549910755e-06, "loss": 0.619408130645752, "step": 7094 }, { "epoch": 1.133940701670263, "grad_norm": 1.606410033950478, "learning_rate": 8.619668139624475e-06, "loss": 0.6302100419998169, "step": 7095 }, { "epoch": 1.134100535443139, "grad_norm": 1.443697755128788, "learning_rate": 8.617050825735668e-06, "loss": 0.5419831275939941, "step": 7096 }, { "epoch": 1.1342603692160154, "grad_norm": 1.3544338499270847, "learning_rate": 8.614433608427126e-06, "loss": 0.6448668241500854, "step": 7097 }, { "epoch": 1.1344202029888915, "grad_norm": 1.4799791922738514, "learning_rate": 8.611816487881616e-06, "loss": 0.5588672757148743, "step": 7098 }, { "epoch": 1.1345800367617678, "grad_norm": 1.6236390833256842, "learning_rate": 8.609199464281917e-06, "loss": 0.5249131321907043, "step": 7099 }, { "epoch": 1.134739870534644, "grad_norm": 1.6448871810805297, "learning_rate": 8.606582537810789e-06, "loss": 0.584337055683136, "step": 7100 }, { "epoch": 1.1348997043075202, "grad_norm": 1.5314531806198874, "learning_rate": 8.603965708650984e-06, "loss": 0.5724263787269592, "step": 7101 }, { "epoch": 1.1350595380803963, "grad_norm": 1.57037390085002, "learning_rate": 8.601348976985262e-06, "loss": 0.6734278202056885, "step": 7102 }, { "epoch": 1.1352193718532726, "grad_norm": 1.1971752735464243, "learning_rate": 8.598732342996358e-06, "loss": 0.5009361505508423, "step": 7103 }, { "epoch": 1.135379205626149, "grad_norm": 1.6514487539315332, "learning_rate": 8.596115806867009e-06, "loss": 0.6721314191818237, "step": 7104 }, { "epoch": 1.135539039399025, "grad_norm": 1.4952500038508845, "learning_rate": 8.593499368779952e-06, "loss": 0.6311193108558655, "step": 7105 }, { "epoch": 1.135698873171901, "grad_norm": 1.4099031108083468, "learning_rate": 8.590883028917903e-06, "loss": 0.5690436363220215, "step": 7106 }, { "epoch": 1.1358587069447774, "grad_norm": 1.5458668412504966, "learning_rate": 8.588266787463582e-06, "loss": 0.5425005555152893, "step": 7107 }, { "epoch": 1.1360185407176537, "grad_norm": 1.40048954305846, "learning_rate": 8.585650644599696e-06, "loss": 0.5350207090377808, "step": 7108 }, { "epoch": 1.1361783744905298, "grad_norm": 1.478371583185496, "learning_rate": 8.583034600508943e-06, "loss": 0.6590995788574219, "step": 7109 }, { "epoch": 1.1363382082634061, "grad_norm": 1.3146717674365025, "learning_rate": 8.580418655374027e-06, "loss": 0.5073596835136414, "step": 7110 }, { "epoch": 1.1364980420362822, "grad_norm": 1.6604067378662053, "learning_rate": 8.577802809377628e-06, "loss": 0.6789146661758423, "step": 7111 }, { "epoch": 1.1366578758091586, "grad_norm": 1.545827723965539, "learning_rate": 8.575187062702432e-06, "loss": 0.6511619091033936, "step": 7112 }, { "epoch": 1.1368177095820347, "grad_norm": 1.740344985062286, "learning_rate": 8.572571415531112e-06, "loss": 0.5610610246658325, "step": 7113 }, { "epoch": 1.136977543354911, "grad_norm": 1.5415567184988543, "learning_rate": 8.569955868046338e-06, "loss": 0.6819281578063965, "step": 7114 }, { "epoch": 1.137137377127787, "grad_norm": 1.5930450626243347, "learning_rate": 8.567340420430768e-06, "loss": 0.6441494226455688, "step": 7115 }, { "epoch": 1.1372972109006634, "grad_norm": 1.4827338682807079, "learning_rate": 8.564725072867054e-06, "loss": 0.5897976160049438, "step": 7116 }, { "epoch": 1.1374570446735395, "grad_norm": 1.3618555437431223, "learning_rate": 8.562109825537847e-06, "loss": 0.4889926612377167, "step": 7117 }, { "epoch": 1.1376168784464158, "grad_norm": 1.276697704643845, "learning_rate": 8.55949467862578e-06, "loss": 0.4900668263435364, "step": 7118 }, { "epoch": 1.1377767122192919, "grad_norm": 1.4338822972991516, "learning_rate": 8.556879632313494e-06, "loss": 0.5306631326675415, "step": 7119 }, { "epoch": 1.1379365459921682, "grad_norm": 1.3920581665096894, "learning_rate": 8.554264686783608e-06, "loss": 0.5188461542129517, "step": 7120 }, { "epoch": 1.1380963797650443, "grad_norm": 1.4112790837832647, "learning_rate": 8.551649842218744e-06, "loss": 0.47933775186538696, "step": 7121 }, { "epoch": 1.1382562135379206, "grad_norm": 1.3573236293004944, "learning_rate": 8.549035098801514e-06, "loss": 0.4466302990913391, "step": 7122 }, { "epoch": 1.1384160473107967, "grad_norm": 1.397061500079551, "learning_rate": 8.546420456714517e-06, "loss": 0.5942728519439697, "step": 7123 }, { "epoch": 1.138575881083673, "grad_norm": 1.6047999663179167, "learning_rate": 8.543805916140353e-06, "loss": 0.6860746145248413, "step": 7124 }, { "epoch": 1.1387357148565491, "grad_norm": 1.387375300885233, "learning_rate": 8.541191477261616e-06, "loss": 0.5651633739471436, "step": 7125 }, { "epoch": 1.1388955486294254, "grad_norm": 1.44259796318772, "learning_rate": 8.538577140260884e-06, "loss": 0.5584770441055298, "step": 7126 }, { "epoch": 1.1390553824023015, "grad_norm": 1.385850636874443, "learning_rate": 8.535962905320739e-06, "loss": 0.49160653352737427, "step": 7127 }, { "epoch": 1.1392152161751778, "grad_norm": 1.4502647117172918, "learning_rate": 8.533348772623744e-06, "loss": 0.4355015158653259, "step": 7128 }, { "epoch": 1.139375049948054, "grad_norm": 1.7401167099218853, "learning_rate": 8.530734742352464e-06, "loss": 0.6082775592803955, "step": 7129 }, { "epoch": 1.1395348837209303, "grad_norm": 1.2219471839108418, "learning_rate": 8.528120814689454e-06, "loss": 0.4862367510795593, "step": 7130 }, { "epoch": 1.1396947174938064, "grad_norm": 1.4665117768395657, "learning_rate": 8.525506989817257e-06, "loss": 0.3748445510864258, "step": 7131 }, { "epoch": 1.1398545512666827, "grad_norm": 1.5981067070480997, "learning_rate": 8.522893267918419e-06, "loss": 0.6025470495223999, "step": 7132 }, { "epoch": 1.1400143850395588, "grad_norm": 1.278762048807374, "learning_rate": 8.520279649175468e-06, "loss": 0.6103473901748657, "step": 7133 }, { "epoch": 1.140174218812435, "grad_norm": 1.4234884773778358, "learning_rate": 8.517666133770933e-06, "loss": 0.6334319114685059, "step": 7134 }, { "epoch": 1.1403340525853114, "grad_norm": 1.6773478573012854, "learning_rate": 8.515052721887332e-06, "loss": 0.643554151058197, "step": 7135 }, { "epoch": 1.1404938863581875, "grad_norm": 1.4905914411396686, "learning_rate": 8.512439413707177e-06, "loss": 0.562089204788208, "step": 7136 }, { "epoch": 1.1406537201310636, "grad_norm": 1.5825877656333278, "learning_rate": 8.50982620941297e-06, "loss": 0.5175899267196655, "step": 7137 }, { "epoch": 1.14081355390394, "grad_norm": 1.40175370058605, "learning_rate": 8.507213109187208e-06, "loss": 0.4936741590499878, "step": 7138 }, { "epoch": 1.1409733876768162, "grad_norm": 1.3789875170075923, "learning_rate": 8.504600113212387e-06, "loss": 0.5607438087463379, "step": 7139 }, { "epoch": 1.1411332214496923, "grad_norm": 1.5231240361246516, "learning_rate": 8.501987221670979e-06, "loss": 0.5751928687095642, "step": 7140 }, { "epoch": 1.1412930552225684, "grad_norm": 1.4877685996612844, "learning_rate": 8.499374434745468e-06, "loss": 0.5776112675666809, "step": 7141 }, { "epoch": 1.1414528889954447, "grad_norm": 1.534013296947943, "learning_rate": 8.496761752618314e-06, "loss": 0.7074211835861206, "step": 7142 }, { "epoch": 1.141612722768321, "grad_norm": 1.390215828408012, "learning_rate": 8.494149175471987e-06, "loss": 0.5543332695960999, "step": 7143 }, { "epoch": 1.1417725565411971, "grad_norm": 1.5368977337092062, "learning_rate": 8.491536703488927e-06, "loss": 0.621841311454773, "step": 7144 }, { "epoch": 1.1419323903140735, "grad_norm": 1.5453719994441808, "learning_rate": 8.488924336851595e-06, "loss": 0.5720770359039307, "step": 7145 }, { "epoch": 1.1420922240869495, "grad_norm": 1.3009983310067288, "learning_rate": 8.486312075742416e-06, "loss": 0.5300396680831909, "step": 7146 }, { "epoch": 1.1422520578598259, "grad_norm": 1.3250946603204106, "learning_rate": 8.48369992034383e-06, "loss": 0.4758545756340027, "step": 7147 }, { "epoch": 1.142411891632702, "grad_norm": 1.6741765637560084, "learning_rate": 8.481087870838258e-06, "loss": 0.5534765720367432, "step": 7148 }, { "epoch": 1.1425717254055783, "grad_norm": 1.929501856938476, "learning_rate": 8.478475927408116e-06, "loss": 0.5040035843849182, "step": 7149 }, { "epoch": 1.1427315591784544, "grad_norm": 1.550187088348639, "learning_rate": 8.475864090235812e-06, "loss": 0.6000162363052368, "step": 7150 }, { "epoch": 1.1428913929513307, "grad_norm": 1.3013637437730226, "learning_rate": 8.473252359503747e-06, "loss": 0.5422171950340271, "step": 7151 }, { "epoch": 1.1430512267242068, "grad_norm": 1.562860265995754, "learning_rate": 8.47064073539432e-06, "loss": 0.7031309008598328, "step": 7152 }, { "epoch": 1.143211060497083, "grad_norm": 1.6187440648927558, "learning_rate": 8.468029218089908e-06, "loss": 0.6176165342330933, "step": 7153 }, { "epoch": 1.1433708942699592, "grad_norm": 1.2525675261126847, "learning_rate": 8.465417807772899e-06, "loss": 0.5252389907836914, "step": 7154 }, { "epoch": 1.1435307280428355, "grad_norm": 1.4046472620281116, "learning_rate": 8.46280650462566e-06, "loss": 0.5928475856781006, "step": 7155 }, { "epoch": 1.1436905618157116, "grad_norm": 1.410698589837221, "learning_rate": 8.46019530883056e-06, "loss": 0.5318143963813782, "step": 7156 }, { "epoch": 1.143850395588588, "grad_norm": 1.4683022008810689, "learning_rate": 8.457584220569949e-06, "loss": 0.6310890316963196, "step": 7157 }, { "epoch": 1.144010229361464, "grad_norm": 2.210660646718102, "learning_rate": 8.454973240026178e-06, "loss": 0.4838901460170746, "step": 7158 }, { "epoch": 1.1441700631343403, "grad_norm": 1.299618298314119, "learning_rate": 8.452362367381593e-06, "loss": 0.6407632827758789, "step": 7159 }, { "epoch": 1.1443298969072164, "grad_norm": 1.5157287247811617, "learning_rate": 8.449751602818523e-06, "loss": 0.5042482614517212, "step": 7160 }, { "epoch": 1.1444897306800927, "grad_norm": 1.4096577073347882, "learning_rate": 8.447140946519298e-06, "loss": 0.530754804611206, "step": 7161 }, { "epoch": 1.1446495644529688, "grad_norm": 2.088127155426947, "learning_rate": 8.444530398666233e-06, "loss": 0.50494384765625, "step": 7162 }, { "epoch": 1.1448093982258452, "grad_norm": 1.657478919621565, "learning_rate": 8.441919959441646e-06, "loss": 0.7094967365264893, "step": 7163 }, { "epoch": 1.1449692319987212, "grad_norm": 1.5425307645508037, "learning_rate": 8.439309629027831e-06, "loss": 0.5112444162368774, "step": 7164 }, { "epoch": 1.1451290657715976, "grad_norm": 1.4502268557902382, "learning_rate": 8.436699407607095e-06, "loss": 0.6348606944084167, "step": 7165 }, { "epoch": 1.1452888995444737, "grad_norm": 1.6287135425117945, "learning_rate": 8.434089295361718e-06, "loss": 0.7320420742034912, "step": 7166 }, { "epoch": 1.14544873331735, "grad_norm": 1.5126936369573543, "learning_rate": 8.431479292473986e-06, "loss": 0.5711327195167542, "step": 7167 }, { "epoch": 1.145608567090226, "grad_norm": 1.8868908058980751, "learning_rate": 8.42886939912617e-06, "loss": 0.623599648475647, "step": 7168 }, { "epoch": 1.1457684008631024, "grad_norm": 1.3350981511465572, "learning_rate": 8.426259615500539e-06, "loss": 0.4972377121448517, "step": 7169 }, { "epoch": 1.1459282346359787, "grad_norm": 1.3901251993606765, "learning_rate": 8.423649941779345e-06, "loss": 0.5680341124534607, "step": 7170 }, { "epoch": 1.1460880684088548, "grad_norm": 1.4046421730387604, "learning_rate": 8.421040378144844e-06, "loss": 0.5866043567657471, "step": 7171 }, { "epoch": 1.146247902181731, "grad_norm": 1.590611359191274, "learning_rate": 8.418430924779277e-06, "loss": 0.4768396019935608, "step": 7172 }, { "epoch": 1.1464077359546072, "grad_norm": 1.5657602183894825, "learning_rate": 8.415821581864878e-06, "loss": 0.5440312027931213, "step": 7173 }, { "epoch": 1.1465675697274835, "grad_norm": 1.5914530248098733, "learning_rate": 8.413212349583874e-06, "loss": 0.5203585624694824, "step": 7174 }, { "epoch": 1.1467274035003596, "grad_norm": 1.4335947965449416, "learning_rate": 8.410603228118484e-06, "loss": 0.5445636510848999, "step": 7175 }, { "epoch": 1.1468872372732357, "grad_norm": 1.4388825135619614, "learning_rate": 8.407994217650923e-06, "loss": 0.5232955813407898, "step": 7176 }, { "epoch": 1.147047071046112, "grad_norm": 1.9306096143178462, "learning_rate": 8.405385318363391e-06, "loss": 0.38779497146606445, "step": 7177 }, { "epoch": 1.1472069048189883, "grad_norm": 1.3719479915226667, "learning_rate": 8.40277653043809e-06, "loss": 0.540931224822998, "step": 7178 }, { "epoch": 1.1473667385918644, "grad_norm": 1.6307747183825583, "learning_rate": 8.400167854057203e-06, "loss": 0.7573689222335815, "step": 7179 }, { "epoch": 1.1475265723647408, "grad_norm": 1.4264934751966343, "learning_rate": 8.39755928940291e-06, "loss": 0.49906694889068604, "step": 7180 }, { "epoch": 1.1476864061376169, "grad_norm": 1.513468776241739, "learning_rate": 8.39495083665739e-06, "loss": 0.562023937702179, "step": 7181 }, { "epoch": 1.1478462399104932, "grad_norm": 1.5560452370631586, "learning_rate": 8.392342496002801e-06, "loss": 0.49129417538642883, "step": 7182 }, { "epoch": 1.1480060736833693, "grad_norm": 1.5455549818784848, "learning_rate": 8.389734267621307e-06, "loss": 0.6108235120773315, "step": 7183 }, { "epoch": 1.1481659074562456, "grad_norm": 1.4726456159084826, "learning_rate": 8.387126151695048e-06, "loss": 0.655495285987854, "step": 7184 }, { "epoch": 1.1483257412291217, "grad_norm": 1.511514722815009, "learning_rate": 8.384518148406176e-06, "loss": 0.6003879904747009, "step": 7185 }, { "epoch": 1.148485575001998, "grad_norm": 1.4075669133560507, "learning_rate": 8.38191025793682e-06, "loss": 0.5195223093032837, "step": 7186 }, { "epoch": 1.148645408774874, "grad_norm": 1.682002207987716, "learning_rate": 8.379302480469109e-06, "loss": 0.7353746891021729, "step": 7187 }, { "epoch": 1.1488052425477504, "grad_norm": 1.344600664948278, "learning_rate": 8.376694816185153e-06, "loss": 0.508053183555603, "step": 7188 }, { "epoch": 1.1489650763206265, "grad_norm": 1.3049120110549506, "learning_rate": 8.374087265267071e-06, "loss": 0.4594283699989319, "step": 7189 }, { "epoch": 1.1491249100935028, "grad_norm": 1.8131567877926573, "learning_rate": 8.371479827896959e-06, "loss": 0.5939557552337646, "step": 7190 }, { "epoch": 1.149284743866379, "grad_norm": 1.6977481586977832, "learning_rate": 8.368872504256915e-06, "loss": 0.6682805418968201, "step": 7191 }, { "epoch": 1.1494445776392552, "grad_norm": 4.031380492195382, "learning_rate": 8.366265294529022e-06, "loss": 0.49526247382164, "step": 7192 }, { "epoch": 1.1496044114121313, "grad_norm": 1.3621009860786855, "learning_rate": 8.363658198895362e-06, "loss": 0.4685707688331604, "step": 7193 }, { "epoch": 1.1497642451850076, "grad_norm": 1.5038553383293272, "learning_rate": 8.361051217538001e-06, "loss": 0.5925250053405762, "step": 7194 }, { "epoch": 1.1499240789578837, "grad_norm": 1.8012274861457696, "learning_rate": 8.358444350639004e-06, "loss": 0.5888189673423767, "step": 7195 }, { "epoch": 1.15008391273076, "grad_norm": 1.5521572039254141, "learning_rate": 8.355837598380425e-06, "loss": 0.5641075372695923, "step": 7196 }, { "epoch": 1.1502437465036361, "grad_norm": 1.5573981590277328, "learning_rate": 8.35323096094431e-06, "loss": 0.663723349571228, "step": 7197 }, { "epoch": 1.1504035802765125, "grad_norm": 1.487886444298498, "learning_rate": 8.350624438512697e-06, "loss": 0.468771755695343, "step": 7198 }, { "epoch": 1.1505634140493886, "grad_norm": 1.5931561834213797, "learning_rate": 8.348018031267618e-06, "loss": 0.568286120891571, "step": 7199 }, { "epoch": 1.1507232478222649, "grad_norm": 1.3821689799410708, "learning_rate": 8.345411739391094e-06, "loss": 0.5655402541160583, "step": 7200 }, { "epoch": 1.150883081595141, "grad_norm": 1.4510075246775922, "learning_rate": 8.342805563065138e-06, "loss": 0.6398258805274963, "step": 7201 }, { "epoch": 1.1510429153680173, "grad_norm": 1.4723026986702532, "learning_rate": 8.340199502471756e-06, "loss": 0.45497798919677734, "step": 7202 }, { "epoch": 1.1512027491408934, "grad_norm": 1.3673022590769108, "learning_rate": 8.33759355779295e-06, "loss": 0.5287931561470032, "step": 7203 }, { "epoch": 1.1513625829137697, "grad_norm": 1.464415551104166, "learning_rate": 8.334987729210702e-06, "loss": 0.609112560749054, "step": 7204 }, { "epoch": 1.151522416686646, "grad_norm": 1.5481774447746677, "learning_rate": 8.332382016907002e-06, "loss": 0.6106127500534058, "step": 7205 }, { "epoch": 1.151682250459522, "grad_norm": 1.6129763047665366, "learning_rate": 8.329776421063822e-06, "loss": 0.6119823455810547, "step": 7206 }, { "epoch": 1.1518420842323982, "grad_norm": 1.3332181894625472, "learning_rate": 8.327170941863124e-06, "loss": 0.4630020558834076, "step": 7207 }, { "epoch": 1.1520019180052745, "grad_norm": 1.302919783793104, "learning_rate": 8.32456557948687e-06, "loss": 0.5459526777267456, "step": 7208 }, { "epoch": 1.1521617517781508, "grad_norm": 1.7363012657966244, "learning_rate": 8.321960334117006e-06, "loss": 0.6660997867584229, "step": 7209 }, { "epoch": 1.152321585551027, "grad_norm": 1.495893363342421, "learning_rate": 8.319355205935474e-06, "loss": 0.6023397445678711, "step": 7210 }, { "epoch": 1.152481419323903, "grad_norm": 1.4282115150177532, "learning_rate": 8.316750195124207e-06, "loss": 0.5616303086280823, "step": 7211 }, { "epoch": 1.1526412530967793, "grad_norm": 1.4113274388273378, "learning_rate": 8.314145301865128e-06, "loss": 0.5226938128471375, "step": 7212 }, { "epoch": 1.1528010868696557, "grad_norm": 1.6132327995525166, "learning_rate": 8.311540526340157e-06, "loss": 0.5794750452041626, "step": 7213 }, { "epoch": 1.1529609206425318, "grad_norm": 1.6090699872870413, "learning_rate": 8.3089358687312e-06, "loss": 0.5557301044464111, "step": 7214 }, { "epoch": 1.153120754415408, "grad_norm": 1.4764083721091275, "learning_rate": 8.306331329220157e-06, "loss": 0.5830644965171814, "step": 7215 }, { "epoch": 1.1532805881882842, "grad_norm": 1.4520085092953867, "learning_rate": 8.30372690798892e-06, "loss": 0.6268092393875122, "step": 7216 }, { "epoch": 1.1534404219611605, "grad_norm": 1.3932573872690803, "learning_rate": 8.301122605219372e-06, "loss": 0.5340946912765503, "step": 7217 }, { "epoch": 1.1536002557340366, "grad_norm": 1.3996179632311319, "learning_rate": 8.298518421093391e-06, "loss": 0.5637528896331787, "step": 7218 }, { "epoch": 1.153760089506913, "grad_norm": 1.7258445044179176, "learning_rate": 8.29591435579284e-06, "loss": 0.6250327825546265, "step": 7219 }, { "epoch": 1.153919923279789, "grad_norm": 1.602856246736893, "learning_rate": 8.29331040949958e-06, "loss": 0.4552883505821228, "step": 7220 }, { "epoch": 1.1540797570526653, "grad_norm": 1.552238360486734, "learning_rate": 8.290706582395463e-06, "loss": 0.542935311794281, "step": 7221 }, { "epoch": 1.1542395908255414, "grad_norm": 1.5958726447408682, "learning_rate": 8.288102874662327e-06, "loss": 0.5030564069747925, "step": 7222 }, { "epoch": 1.1543994245984177, "grad_norm": 1.6507270693902296, "learning_rate": 8.285499286482009e-06, "loss": 0.6517465114593506, "step": 7223 }, { "epoch": 1.1545592583712938, "grad_norm": 1.6020688895862683, "learning_rate": 8.282895818036327e-06, "loss": 0.5728309154510498, "step": 7224 }, { "epoch": 1.1547190921441701, "grad_norm": 1.5105199654038204, "learning_rate": 8.280292469507109e-06, "loss": 0.45630401372909546, "step": 7225 }, { "epoch": 1.1548789259170462, "grad_norm": 1.5226133828142985, "learning_rate": 8.277689241076158e-06, "loss": 0.47449803352355957, "step": 7226 }, { "epoch": 1.1550387596899225, "grad_norm": 1.5715485378160523, "learning_rate": 8.275086132925277e-06, "loss": 0.6516823768615723, "step": 7227 }, { "epoch": 1.1551985934627986, "grad_norm": 1.7453575305440505, "learning_rate": 8.272483145236255e-06, "loss": 0.5359281301498413, "step": 7228 }, { "epoch": 1.155358427235675, "grad_norm": 1.5800410628673158, "learning_rate": 8.269880278190876e-06, "loss": 0.5772585272789001, "step": 7229 }, { "epoch": 1.155518261008551, "grad_norm": 1.5348651602777201, "learning_rate": 8.267277531970913e-06, "loss": 0.518791139125824, "step": 7230 }, { "epoch": 1.1556780947814274, "grad_norm": 1.4941984327536066, "learning_rate": 8.264674906758136e-06, "loss": 0.5171495676040649, "step": 7231 }, { "epoch": 1.1558379285543035, "grad_norm": 1.6852766751847104, "learning_rate": 8.262072402734302e-06, "loss": 0.6541048288345337, "step": 7232 }, { "epoch": 1.1559977623271798, "grad_norm": 1.6450717619528912, "learning_rate": 8.259470020081161e-06, "loss": 0.6161694526672363, "step": 7233 }, { "epoch": 1.1561575961000559, "grad_norm": 1.5320532591466793, "learning_rate": 8.256867758980452e-06, "loss": 0.6643911004066467, "step": 7234 }, { "epoch": 1.1563174298729322, "grad_norm": 1.6683141422871541, "learning_rate": 8.25426561961391e-06, "loss": 0.5276433229446411, "step": 7235 }, { "epoch": 1.1564772636458083, "grad_norm": 1.7596559075314424, "learning_rate": 8.25166360216326e-06, "loss": 0.5818904638290405, "step": 7236 }, { "epoch": 1.1566370974186846, "grad_norm": 1.7311978488047075, "learning_rate": 8.249061706810216e-06, "loss": 0.5539177060127258, "step": 7237 }, { "epoch": 1.1567969311915607, "grad_norm": 1.8577225728005455, "learning_rate": 8.246459933736484e-06, "loss": 0.5890508890151978, "step": 7238 }, { "epoch": 1.156956764964437, "grad_norm": 1.3258071099283646, "learning_rate": 8.243858283123764e-06, "loss": 0.48593059182167053, "step": 7239 }, { "epoch": 1.1571165987373133, "grad_norm": 1.6735904957540682, "learning_rate": 8.241256755153745e-06, "loss": 0.4963245987892151, "step": 7240 }, { "epoch": 1.1572764325101894, "grad_norm": 1.6471533584582208, "learning_rate": 8.23865535000811e-06, "loss": 0.484824001789093, "step": 7241 }, { "epoch": 1.1574362662830655, "grad_norm": 1.4123002981299355, "learning_rate": 8.236054067868533e-06, "loss": 0.5844607949256897, "step": 7242 }, { "epoch": 1.1575961000559418, "grad_norm": 1.914928850980046, "learning_rate": 8.233452908916677e-06, "loss": 0.6551589965820312, "step": 7243 }, { "epoch": 1.1577559338288181, "grad_norm": 1.4035088495656851, "learning_rate": 8.23085187333419e-06, "loss": 0.5640982389450073, "step": 7244 }, { "epoch": 1.1579157676016942, "grad_norm": 1.4639244184251576, "learning_rate": 8.22825096130273e-06, "loss": 0.547490119934082, "step": 7245 }, { "epoch": 1.1580756013745703, "grad_norm": 1.7608273208582126, "learning_rate": 8.225650173003935e-06, "loss": 0.6207427978515625, "step": 7246 }, { "epoch": 1.1582354351474466, "grad_norm": 1.369015217975016, "learning_rate": 8.223049508619429e-06, "loss": 0.5797837972640991, "step": 7247 }, { "epoch": 1.158395268920323, "grad_norm": 1.599682414833342, "learning_rate": 8.220448968330838e-06, "loss": 0.5399640798568726, "step": 7248 }, { "epoch": 1.158555102693199, "grad_norm": 1.6604579259446834, "learning_rate": 8.21784855231977e-06, "loss": 0.6409103870391846, "step": 7249 }, { "epoch": 1.1587149364660754, "grad_norm": 1.3241401975436777, "learning_rate": 8.215248260767833e-06, "loss": 0.4367135167121887, "step": 7250 }, { "epoch": 1.1588747702389515, "grad_norm": 1.4104809294257186, "learning_rate": 8.21264809385662e-06, "loss": 0.4325036108493805, "step": 7251 }, { "epoch": 1.1590346040118278, "grad_norm": 1.673592562037629, "learning_rate": 8.210048051767717e-06, "loss": 0.554999828338623, "step": 7252 }, { "epoch": 1.1591944377847039, "grad_norm": 1.656244550948564, "learning_rate": 8.207448134682703e-06, "loss": 0.748532772064209, "step": 7253 }, { "epoch": 1.1593542715575802, "grad_norm": 1.8938656284206437, "learning_rate": 8.204848342783146e-06, "loss": 0.6502972841262817, "step": 7254 }, { "epoch": 1.1595141053304563, "grad_norm": 1.3852905883842743, "learning_rate": 8.202248676250606e-06, "loss": 0.5447432994842529, "step": 7255 }, { "epoch": 1.1596739391033326, "grad_norm": 1.5365251885794213, "learning_rate": 8.199649135266634e-06, "loss": 0.5911996364593506, "step": 7256 }, { "epoch": 1.1598337728762087, "grad_norm": 3.127183840408259, "learning_rate": 8.197049720012778e-06, "loss": 0.6015231609344482, "step": 7257 }, { "epoch": 1.159993606649085, "grad_norm": 1.54424189538425, "learning_rate": 8.194450430670566e-06, "loss": 0.5064313411712646, "step": 7258 }, { "epoch": 1.1601534404219611, "grad_norm": 1.5867196826014223, "learning_rate": 8.191851267421522e-06, "loss": 0.6806852221488953, "step": 7259 }, { "epoch": 1.1603132741948374, "grad_norm": 1.4646613130070405, "learning_rate": 8.189252230447168e-06, "loss": 0.5148774981498718, "step": 7260 }, { "epoch": 1.1604731079677135, "grad_norm": 2.2644488408099988, "learning_rate": 8.186653319929007e-06, "loss": 0.5400238037109375, "step": 7261 }, { "epoch": 1.1606329417405898, "grad_norm": 1.5034552750651748, "learning_rate": 8.18405453604854e-06, "loss": 0.6884785890579224, "step": 7262 }, { "epoch": 1.160792775513466, "grad_norm": 1.4935866449094932, "learning_rate": 8.181455878987256e-06, "loss": 0.4909595549106598, "step": 7263 }, { "epoch": 1.1609526092863423, "grad_norm": 1.3644577635304678, "learning_rate": 8.178857348926633e-06, "loss": 0.4813914895057678, "step": 7264 }, { "epoch": 1.1611124430592183, "grad_norm": 1.41083191956377, "learning_rate": 8.176258946048151e-06, "loss": 0.6125097274780273, "step": 7265 }, { "epoch": 1.1612722768320947, "grad_norm": 1.3456081263033215, "learning_rate": 8.173660670533268e-06, "loss": 0.5924351811408997, "step": 7266 }, { "epoch": 1.1614321106049708, "grad_norm": 1.4967053338834748, "learning_rate": 8.171062522563438e-06, "loss": 0.568679928779602, "step": 7267 }, { "epoch": 1.161591944377847, "grad_norm": 1.4947304876644432, "learning_rate": 8.168464502320107e-06, "loss": 0.5713006258010864, "step": 7268 }, { "epoch": 1.1617517781507232, "grad_norm": 1.3219799916580273, "learning_rate": 8.165866609984713e-06, "loss": 0.5075399279594421, "step": 7269 }, { "epoch": 1.1619116119235995, "grad_norm": 1.4471883067430824, "learning_rate": 8.163268845738684e-06, "loss": 0.6719478964805603, "step": 7270 }, { "epoch": 1.1620714456964756, "grad_norm": 1.4656660495885145, "learning_rate": 8.160671209763432e-06, "loss": 0.7064154744148254, "step": 7271 }, { "epoch": 1.162231279469352, "grad_norm": 1.5717826110622433, "learning_rate": 8.158073702240376e-06, "loss": 0.5222383737564087, "step": 7272 }, { "epoch": 1.162391113242228, "grad_norm": 1.6600367613205773, "learning_rate": 8.155476323350912e-06, "loss": 0.522311806678772, "step": 7273 }, { "epoch": 1.1625509470151043, "grad_norm": 1.5530473529083415, "learning_rate": 8.15287907327643e-06, "loss": 0.467812180519104, "step": 7274 }, { "epoch": 1.1627107807879806, "grad_norm": 1.6307540729039578, "learning_rate": 8.150281952198317e-06, "loss": 0.5190755724906921, "step": 7275 }, { "epoch": 1.1628706145608567, "grad_norm": 1.615289012712682, "learning_rate": 8.147684960297942e-06, "loss": 0.6270943284034729, "step": 7276 }, { "epoch": 1.1630304483337328, "grad_norm": 1.7322158994677803, "learning_rate": 8.145088097756675e-06, "loss": 0.6176137924194336, "step": 7277 }, { "epoch": 1.1631902821066091, "grad_norm": 1.3446633678575646, "learning_rate": 8.142491364755866e-06, "loss": 0.5033122897148132, "step": 7278 }, { "epoch": 1.1633501158794854, "grad_norm": 1.4429123664010393, "learning_rate": 8.139894761476868e-06, "loss": 0.5270724296569824, "step": 7279 }, { "epoch": 1.1635099496523615, "grad_norm": 1.9423440498222098, "learning_rate": 8.137298288101013e-06, "loss": 0.5408906936645508, "step": 7280 }, { "epoch": 1.1636697834252376, "grad_norm": 1.7642918111026917, "learning_rate": 8.134701944809631e-06, "loss": 0.6771782636642456, "step": 7281 }, { "epoch": 1.163829617198114, "grad_norm": 1.587953663647726, "learning_rate": 8.132105731784045e-06, "loss": 0.6811703443527222, "step": 7282 }, { "epoch": 1.1639894509709903, "grad_norm": 1.4211399787501213, "learning_rate": 8.129509649205558e-06, "loss": 0.5982741713523865, "step": 7283 }, { "epoch": 1.1641492847438664, "grad_norm": 1.5380301642873389, "learning_rate": 8.126913697255475e-06, "loss": 0.5794953107833862, "step": 7284 }, { "epoch": 1.1643091185167427, "grad_norm": 1.6218198726829014, "learning_rate": 8.124317876115091e-06, "loss": 0.5700290203094482, "step": 7285 }, { "epoch": 1.1644689522896188, "grad_norm": 1.6010797551225742, "learning_rate": 8.121722185965688e-06, "loss": 0.5487325191497803, "step": 7286 }, { "epoch": 1.164628786062495, "grad_norm": 1.5060383838913889, "learning_rate": 8.119126626988535e-06, "loss": 0.6674705743789673, "step": 7287 }, { "epoch": 1.1647886198353712, "grad_norm": 1.468843628948894, "learning_rate": 8.116531199364904e-06, "loss": 0.5310187339782715, "step": 7288 }, { "epoch": 1.1649484536082475, "grad_norm": 1.368551991537094, "learning_rate": 8.113935903276041e-06, "loss": 0.4243816137313843, "step": 7289 }, { "epoch": 1.1651082873811236, "grad_norm": 1.581754456106474, "learning_rate": 8.111340738903202e-06, "loss": 0.5535536408424377, "step": 7290 }, { "epoch": 1.165268121154, "grad_norm": 1.3876310302235115, "learning_rate": 8.108745706427616e-06, "loss": 0.4358382225036621, "step": 7291 }, { "epoch": 1.165427954926876, "grad_norm": 1.5358309393294234, "learning_rate": 8.106150806030517e-06, "loss": 0.454578161239624, "step": 7292 }, { "epoch": 1.1655877886997523, "grad_norm": 1.2400218810191947, "learning_rate": 8.103556037893123e-06, "loss": 0.44608616828918457, "step": 7293 }, { "epoch": 1.1657476224726284, "grad_norm": 1.670132919270953, "learning_rate": 8.100961402196637e-06, "loss": 0.6149812936782837, "step": 7294 }, { "epoch": 1.1659074562455047, "grad_norm": 1.424053128430266, "learning_rate": 8.098366899122266e-06, "loss": 0.5984516143798828, "step": 7295 }, { "epoch": 1.1660672900183808, "grad_norm": 1.3035573552673028, "learning_rate": 8.095772528851199e-06, "loss": 0.44927626848220825, "step": 7296 }, { "epoch": 1.1662271237912571, "grad_norm": 1.4564840781760915, "learning_rate": 8.093178291564616e-06, "loss": 0.5430619120597839, "step": 7297 }, { "epoch": 1.1663869575641332, "grad_norm": 1.5821459145980428, "learning_rate": 8.09058418744369e-06, "loss": 0.48096317052841187, "step": 7298 }, { "epoch": 1.1665467913370096, "grad_norm": 1.45825795066094, "learning_rate": 8.087990216669585e-06, "loss": 0.6055275797843933, "step": 7299 }, { "epoch": 1.1667066251098857, "grad_norm": 1.6048359874052085, "learning_rate": 8.085396379423457e-06, "loss": 0.663149893283844, "step": 7300 }, { "epoch": 1.166866458882762, "grad_norm": 1.4632515663975363, "learning_rate": 8.082802675886442e-06, "loss": 0.40551114082336426, "step": 7301 }, { "epoch": 1.167026292655638, "grad_norm": 1.4187212821481996, "learning_rate": 8.080209106239686e-06, "loss": 0.612644612789154, "step": 7302 }, { "epoch": 1.1671861264285144, "grad_norm": 1.4039849041629235, "learning_rate": 8.077615670664306e-06, "loss": 0.5667723417282104, "step": 7303 }, { "epoch": 1.1673459602013905, "grad_norm": 1.6264126247140966, "learning_rate": 8.075022369341419e-06, "loss": 0.6117035746574402, "step": 7304 }, { "epoch": 1.1675057939742668, "grad_norm": 1.3476395882828625, "learning_rate": 8.072429202452138e-06, "loss": 0.4453077018260956, "step": 7305 }, { "epoch": 1.1676656277471429, "grad_norm": 1.6033252153574162, "learning_rate": 8.069836170177556e-06, "loss": 0.535423994064331, "step": 7306 }, { "epoch": 1.1678254615200192, "grad_norm": 1.5796038057590458, "learning_rate": 8.067243272698766e-06, "loss": 0.5018707513809204, "step": 7307 }, { "epoch": 1.1679852952928953, "grad_norm": 1.791529841149209, "learning_rate": 8.06465051019684e-06, "loss": 0.5899826288223267, "step": 7308 }, { "epoch": 1.1681451290657716, "grad_norm": 1.5849555087472322, "learning_rate": 8.062057882852852e-06, "loss": 0.5572405457496643, "step": 7309 }, { "epoch": 1.1683049628386477, "grad_norm": 1.8566660735959297, "learning_rate": 8.059465390847858e-06, "loss": 0.5384791493415833, "step": 7310 }, { "epoch": 1.168464796611524, "grad_norm": 1.3475993419537078, "learning_rate": 8.056873034362914e-06, "loss": 0.45944640040397644, "step": 7311 }, { "epoch": 1.1686246303844001, "grad_norm": 1.5748213551267192, "learning_rate": 8.054280813579054e-06, "loss": 0.5447981953620911, "step": 7312 }, { "epoch": 1.1687844641572764, "grad_norm": 1.7224977520780045, "learning_rate": 8.051688728677316e-06, "loss": 0.5061823129653931, "step": 7313 }, { "epoch": 1.1689442979301528, "grad_norm": 1.6112429871057086, "learning_rate": 8.04909677983872e-06, "loss": 0.6173773407936096, "step": 7314 }, { "epoch": 1.1691041317030288, "grad_norm": 1.8351234695351186, "learning_rate": 8.046504967244276e-06, "loss": 0.47529613971710205, "step": 7315 }, { "epoch": 1.169263965475905, "grad_norm": 1.873271537594234, "learning_rate": 8.043913291074988e-06, "loss": 0.7229189872741699, "step": 7316 }, { "epoch": 1.1694237992487813, "grad_norm": 1.8051916924143168, "learning_rate": 8.041321751511852e-06, "loss": 0.6035562753677368, "step": 7317 }, { "epoch": 1.1695836330216576, "grad_norm": 1.3996459901943756, "learning_rate": 8.038730348735847e-06, "loss": 0.5154941082000732, "step": 7318 }, { "epoch": 1.1697434667945337, "grad_norm": 1.412782271075975, "learning_rate": 8.036139082927954e-06, "loss": 0.4879932999610901, "step": 7319 }, { "epoch": 1.16990330056741, "grad_norm": 1.5299760869717232, "learning_rate": 8.033547954269132e-06, "loss": 0.46068689227104187, "step": 7320 }, { "epoch": 1.170063134340286, "grad_norm": 1.5871673510574116, "learning_rate": 8.030956962940339e-06, "loss": 0.534247875213623, "step": 7321 }, { "epoch": 1.1702229681131624, "grad_norm": 1.7964849162746241, "learning_rate": 8.02836610912252e-06, "loss": 0.6472686529159546, "step": 7322 }, { "epoch": 1.1703828018860385, "grad_norm": 1.502262746892989, "learning_rate": 8.025775392996611e-06, "loss": 0.6949285864830017, "step": 7323 }, { "epoch": 1.1705426356589148, "grad_norm": 1.434455229905647, "learning_rate": 8.02318481474354e-06, "loss": 0.6751980781555176, "step": 7324 }, { "epoch": 1.170702469431791, "grad_norm": 1.4560917274693628, "learning_rate": 8.020594374544218e-06, "loss": 0.5795940160751343, "step": 7325 }, { "epoch": 1.1708623032046672, "grad_norm": 1.5289003860617745, "learning_rate": 8.018004072579559e-06, "loss": 0.4721909761428833, "step": 7326 }, { "epoch": 1.1710221369775433, "grad_norm": 1.4372371617602895, "learning_rate": 8.01541390903046e-06, "loss": 0.5210643410682678, "step": 7327 }, { "epoch": 1.1711819707504196, "grad_norm": 1.6018234451204667, "learning_rate": 8.012823884077805e-06, "loss": 0.48504549264907837, "step": 7328 }, { "epoch": 1.1713418045232957, "grad_norm": 1.3810950711284307, "learning_rate": 8.010233997902475e-06, "loss": 0.5422866344451904, "step": 7329 }, { "epoch": 1.171501638296172, "grad_norm": 2.1797212271326787, "learning_rate": 8.007644250685336e-06, "loss": 0.5134543180465698, "step": 7330 }, { "epoch": 1.1716614720690481, "grad_norm": 1.7188560797264745, "learning_rate": 8.00505464260725e-06, "loss": 0.5841692686080933, "step": 7331 }, { "epoch": 1.1718213058419245, "grad_norm": 1.3709823619394148, "learning_rate": 8.002465173849063e-06, "loss": 0.5788718461990356, "step": 7332 }, { "epoch": 1.1719811396148005, "grad_norm": 1.5115449867650748, "learning_rate": 7.999875844591615e-06, "loss": 0.49408525228500366, "step": 7333 }, { "epoch": 1.1721409733876769, "grad_norm": 1.7148918394800736, "learning_rate": 7.997286655015737e-06, "loss": 0.5187755823135376, "step": 7334 }, { "epoch": 1.172300807160553, "grad_norm": 1.6027746652202175, "learning_rate": 7.994697605302248e-06, "loss": 0.5083965063095093, "step": 7335 }, { "epoch": 1.1724606409334293, "grad_norm": 1.7460052021189187, "learning_rate": 7.992108695631957e-06, "loss": 0.57094806432724, "step": 7336 }, { "epoch": 1.1726204747063054, "grad_norm": 1.3137373302405306, "learning_rate": 7.989519926185668e-06, "loss": 0.5952683091163635, "step": 7337 }, { "epoch": 1.1727803084791817, "grad_norm": 1.6061720587908346, "learning_rate": 7.986931297144165e-06, "loss": 0.39877066016197205, "step": 7338 }, { "epoch": 1.1729401422520578, "grad_norm": 1.462476306575838, "learning_rate": 7.984342808688234e-06, "loss": 0.5843749046325684, "step": 7339 }, { "epoch": 1.173099976024934, "grad_norm": 1.7057584094803564, "learning_rate": 7.981754460998643e-06, "loss": 0.548493504524231, "step": 7340 }, { "epoch": 1.1732598097978102, "grad_norm": 1.3950466457254493, "learning_rate": 7.979166254256155e-06, "loss": 0.5057229995727539, "step": 7341 }, { "epoch": 1.1734196435706865, "grad_norm": 1.5761298370896395, "learning_rate": 7.976578188641519e-06, "loss": 0.557745099067688, "step": 7342 }, { "epoch": 1.1735794773435626, "grad_norm": 1.7061024806468685, "learning_rate": 7.97399026433548e-06, "loss": 0.4649260640144348, "step": 7343 }, { "epoch": 1.173739311116439, "grad_norm": 1.9389964601745726, "learning_rate": 7.971402481518767e-06, "loss": 0.44566041231155396, "step": 7344 }, { "epoch": 1.173899144889315, "grad_norm": 1.5006963402179228, "learning_rate": 7.968814840372098e-06, "loss": 0.5627783536911011, "step": 7345 }, { "epoch": 1.1740589786621913, "grad_norm": 1.9521411557134232, "learning_rate": 7.966227341076189e-06, "loss": 0.5858116149902344, "step": 7346 }, { "epoch": 1.1742188124350674, "grad_norm": 1.6875044961232004, "learning_rate": 7.963639983811744e-06, "loss": 0.5970457792282104, "step": 7347 }, { "epoch": 1.1743786462079437, "grad_norm": 1.5657722750651901, "learning_rate": 7.96105276875945e-06, "loss": 0.5443781614303589, "step": 7348 }, { "epoch": 1.17453847998082, "grad_norm": 1.5213032540994638, "learning_rate": 7.958465696099993e-06, "loss": 0.4936804175376892, "step": 7349 }, { "epoch": 1.1746983137536962, "grad_norm": 1.4691153588127448, "learning_rate": 7.955878766014042e-06, "loss": 0.5569518804550171, "step": 7350 }, { "epoch": 1.1748581475265722, "grad_norm": 1.410837178448003, "learning_rate": 7.953291978682258e-06, "loss": 0.6291813254356384, "step": 7351 }, { "epoch": 1.1750179812994486, "grad_norm": 1.5282259347982383, "learning_rate": 7.950705334285297e-06, "loss": 0.6078333258628845, "step": 7352 }, { "epoch": 1.1751778150723249, "grad_norm": 1.6395015882394544, "learning_rate": 7.948118833003797e-06, "loss": 0.5080648064613342, "step": 7353 }, { "epoch": 1.175337648845201, "grad_norm": 1.9002250214349157, "learning_rate": 7.945532475018394e-06, "loss": 0.5333665609359741, "step": 7354 }, { "epoch": 1.1754974826180773, "grad_norm": 1.5366205000932136, "learning_rate": 7.942946260509705e-06, "loss": 0.5500712990760803, "step": 7355 }, { "epoch": 1.1756573163909534, "grad_norm": 1.2410159817895177, "learning_rate": 7.94036018965835e-06, "loss": 0.4254213273525238, "step": 7356 }, { "epoch": 1.1758171501638297, "grad_norm": 1.3746104580712635, "learning_rate": 7.937774262644923e-06, "loss": 0.5780167579650879, "step": 7357 }, { "epoch": 1.1759769839367058, "grad_norm": 1.5286897184138564, "learning_rate": 7.935188479650018e-06, "loss": 0.5310103297233582, "step": 7358 }, { "epoch": 1.1761368177095821, "grad_norm": 1.441315265495074, "learning_rate": 7.93260284085422e-06, "loss": 0.5801793336868286, "step": 7359 }, { "epoch": 1.1762966514824582, "grad_norm": 1.373709846673719, "learning_rate": 7.930017346438097e-06, "loss": 0.4355388879776001, "step": 7360 }, { "epoch": 1.1764564852553345, "grad_norm": 1.6080185353388416, "learning_rate": 7.927431996582212e-06, "loss": 0.6336184740066528, "step": 7361 }, { "epoch": 1.1766163190282106, "grad_norm": 1.4163874161298156, "learning_rate": 7.924846791467118e-06, "loss": 0.5207183957099915, "step": 7362 }, { "epoch": 1.176776152801087, "grad_norm": 1.3083777608395863, "learning_rate": 7.92226173127336e-06, "loss": 0.4957257807254791, "step": 7363 }, { "epoch": 1.176935986573963, "grad_norm": 1.4605084070336232, "learning_rate": 7.91967681618146e-06, "loss": 0.7008999586105347, "step": 7364 }, { "epoch": 1.1770958203468393, "grad_norm": 1.649546467790259, "learning_rate": 7.917092046371944e-06, "loss": 0.5199427604675293, "step": 7365 }, { "epoch": 1.1772556541197154, "grad_norm": 1.4090725432982312, "learning_rate": 7.914507422025324e-06, "loss": 0.5320851802825928, "step": 7366 }, { "epoch": 1.1774154878925918, "grad_norm": 1.5242813414295575, "learning_rate": 7.911922943322102e-06, "loss": 0.6139962673187256, "step": 7367 }, { "epoch": 1.1775753216654679, "grad_norm": 1.3829924756846834, "learning_rate": 7.909338610442767e-06, "loss": 0.4946209192276001, "step": 7368 }, { "epoch": 1.1777351554383442, "grad_norm": 1.7952159351191124, "learning_rate": 7.906754423567802e-06, "loss": 0.4927729070186615, "step": 7369 }, { "epoch": 1.1778949892112203, "grad_norm": 1.4581306183117042, "learning_rate": 7.904170382877672e-06, "loss": 0.5871695876121521, "step": 7370 }, { "epoch": 1.1780548229840966, "grad_norm": 2.0493254655564237, "learning_rate": 7.901586488552843e-06, "loss": 0.514066219329834, "step": 7371 }, { "epoch": 1.1782146567569727, "grad_norm": 1.3915601827414028, "learning_rate": 7.899002740773764e-06, "loss": 0.44359925389289856, "step": 7372 }, { "epoch": 1.178374490529849, "grad_norm": 1.641548916888059, "learning_rate": 7.89641913972087e-06, "loss": 0.6208963990211487, "step": 7373 }, { "epoch": 1.178534324302725, "grad_norm": 1.3605155868060574, "learning_rate": 7.893835685574596e-06, "loss": 0.5327468514442444, "step": 7374 }, { "epoch": 1.1786941580756014, "grad_norm": 1.4777413607923418, "learning_rate": 7.891252378515359e-06, "loss": 0.5177986025810242, "step": 7375 }, { "epoch": 1.1788539918484775, "grad_norm": 1.3535214539612161, "learning_rate": 7.888669218723568e-06, "loss": 0.504790186882019, "step": 7376 }, { "epoch": 1.1790138256213538, "grad_norm": 1.489948613775027, "learning_rate": 7.88608620637962e-06, "loss": 0.6055376529693604, "step": 7377 }, { "epoch": 1.17917365939423, "grad_norm": 1.297507471313112, "learning_rate": 7.883503341663909e-06, "loss": 0.46077877283096313, "step": 7378 }, { "epoch": 1.1793334931671062, "grad_norm": 1.7634477006141325, "learning_rate": 7.88092062475681e-06, "loss": 0.5855928063392639, "step": 7379 }, { "epoch": 1.1794933269399823, "grad_norm": 1.6077257914258734, "learning_rate": 7.878338055838686e-06, "loss": 0.5450344681739807, "step": 7380 }, { "epoch": 1.1796531607128586, "grad_norm": 1.564192729290008, "learning_rate": 7.875755635089902e-06, "loss": 0.548877477645874, "step": 7381 }, { "epoch": 1.1798129944857347, "grad_norm": 1.5559011850419002, "learning_rate": 7.8731733626908e-06, "loss": 0.5231500267982483, "step": 7382 }, { "epoch": 1.179972828258611, "grad_norm": 1.476579796216474, "learning_rate": 7.870591238821718e-06, "loss": 0.48651114106178284, "step": 7383 }, { "epoch": 1.1801326620314874, "grad_norm": 1.3668541695469647, "learning_rate": 7.868009263662984e-06, "loss": 0.4566457271575928, "step": 7384 }, { "epoch": 1.1802924958043635, "grad_norm": 1.624407499407979, "learning_rate": 7.865427437394908e-06, "loss": 0.5793279409408569, "step": 7385 }, { "epoch": 1.1804523295772396, "grad_norm": 1.7483454752269187, "learning_rate": 7.862845760197806e-06, "loss": 0.6069604158401489, "step": 7386 }, { "epoch": 1.1806121633501159, "grad_norm": 1.6867582556467644, "learning_rate": 7.860264232251968e-06, "loss": 0.5897400975227356, "step": 7387 }, { "epoch": 1.1807719971229922, "grad_norm": 1.396443309259728, "learning_rate": 7.857682853737676e-06, "loss": 0.5678689479827881, "step": 7388 }, { "epoch": 1.1809318308958683, "grad_norm": 1.720782855261056, "learning_rate": 7.855101624835209e-06, "loss": 0.48084893822669983, "step": 7389 }, { "epoch": 1.1810916646687446, "grad_norm": 1.4375550528049705, "learning_rate": 7.852520545724827e-06, "loss": 0.593062162399292, "step": 7390 }, { "epoch": 1.1812514984416207, "grad_norm": 1.4605905886263473, "learning_rate": 7.849939616586786e-06, "loss": 0.542899489402771, "step": 7391 }, { "epoch": 1.181411332214497, "grad_norm": 1.7804290632635835, "learning_rate": 7.847358837601324e-06, "loss": 0.4871029853820801, "step": 7392 }, { "epoch": 1.181571165987373, "grad_norm": 1.6909192922511769, "learning_rate": 7.844778208948682e-06, "loss": 0.6886481046676636, "step": 7393 }, { "epoch": 1.1817309997602494, "grad_norm": 1.6141396304076852, "learning_rate": 7.842197730809077e-06, "loss": 0.5175740718841553, "step": 7394 }, { "epoch": 1.1818908335331255, "grad_norm": 1.520332391698365, "learning_rate": 7.839617403362717e-06, "loss": 0.47518390417099, "step": 7395 }, { "epoch": 1.1820506673060018, "grad_norm": 1.410814593852688, "learning_rate": 7.83703722678981e-06, "loss": 0.43135762214660645, "step": 7396 }, { "epoch": 1.182210501078878, "grad_norm": 1.4787890045420098, "learning_rate": 7.83445720127054e-06, "loss": 0.4943627417087555, "step": 7397 }, { "epoch": 1.1823703348517542, "grad_norm": 1.6739766926427067, "learning_rate": 7.831877326985093e-06, "loss": 0.5210771560668945, "step": 7398 }, { "epoch": 1.1825301686246303, "grad_norm": 1.5705937361833495, "learning_rate": 7.829297604113632e-06, "loss": 0.5533506870269775, "step": 7399 }, { "epoch": 1.1826900023975067, "grad_norm": 1.459467730357028, "learning_rate": 7.82671803283632e-06, "loss": 0.599153995513916, "step": 7400 }, { "epoch": 1.1828498361703828, "grad_norm": 1.26167620609765, "learning_rate": 7.824138613333305e-06, "loss": 0.48713380098342896, "step": 7401 }, { "epoch": 1.183009669943259, "grad_norm": 1.523282068489271, "learning_rate": 7.821559345784722e-06, "loss": 0.5455526113510132, "step": 7402 }, { "epoch": 1.1831695037161352, "grad_norm": 1.479911248011227, "learning_rate": 7.8189802303707e-06, "loss": 0.48696720600128174, "step": 7403 }, { "epoch": 1.1833293374890115, "grad_norm": 1.4737829493987262, "learning_rate": 7.816401267271351e-06, "loss": 0.5296944379806519, "step": 7404 }, { "epoch": 1.1834891712618876, "grad_norm": 1.5816158798152185, "learning_rate": 7.813822456666785e-06, "loss": 0.46220076084136963, "step": 7405 }, { "epoch": 1.183649005034764, "grad_norm": 1.3649888322329826, "learning_rate": 7.811243798737099e-06, "loss": 0.5749208927154541, "step": 7406 }, { "epoch": 1.18380883880764, "grad_norm": 1.39752376472818, "learning_rate": 7.80866529366237e-06, "loss": 0.6037627458572388, "step": 7407 }, { "epoch": 1.1839686725805163, "grad_norm": 1.3791086966154391, "learning_rate": 7.806086941622682e-06, "loss": 0.5296956300735474, "step": 7408 }, { "epoch": 1.1841285063533924, "grad_norm": 1.7639864824928502, "learning_rate": 7.80350874279809e-06, "loss": 0.7303761839866638, "step": 7409 }, { "epoch": 1.1842883401262687, "grad_norm": 1.5582888246880189, "learning_rate": 7.800930697368645e-06, "loss": 0.578309178352356, "step": 7410 }, { "epoch": 1.1844481738991448, "grad_norm": 1.5574962138925783, "learning_rate": 7.798352805514396e-06, "loss": 0.5182209014892578, "step": 7411 }, { "epoch": 1.1846080076720211, "grad_norm": 1.4338301875754818, "learning_rate": 7.795775067415365e-06, "loss": 0.638072669506073, "step": 7412 }, { "epoch": 1.1847678414448972, "grad_norm": 1.4930926408399574, "learning_rate": 7.79319748325158e-06, "loss": 0.6386547684669495, "step": 7413 }, { "epoch": 1.1849276752177735, "grad_norm": 1.485464550429449, "learning_rate": 7.790620053203045e-06, "loss": 0.5755152106285095, "step": 7414 }, { "epoch": 1.1850875089906496, "grad_norm": 1.8553177784340529, "learning_rate": 7.788042777449762e-06, "loss": 0.6528584361076355, "step": 7415 }, { "epoch": 1.185247342763526, "grad_norm": 1.4822905905818957, "learning_rate": 7.78546565617172e-06, "loss": 0.5926730632781982, "step": 7416 }, { "epoch": 1.185407176536402, "grad_norm": 1.450654782782787, "learning_rate": 7.782888689548887e-06, "loss": 0.5334916114807129, "step": 7417 }, { "epoch": 1.1855670103092784, "grad_norm": 1.6716194795496209, "learning_rate": 7.78031187776124e-06, "loss": 0.6806395649909973, "step": 7418 }, { "epoch": 1.1857268440821547, "grad_norm": 1.6472990955716251, "learning_rate": 7.777735220988728e-06, "loss": 0.426584929227829, "step": 7419 }, { "epoch": 1.1858866778550308, "grad_norm": 1.5866192503931862, "learning_rate": 7.7751587194113e-06, "loss": 0.5722876787185669, "step": 7420 }, { "epoch": 1.1860465116279069, "grad_norm": 1.6252390459394241, "learning_rate": 7.772582373208884e-06, "loss": 0.4803217053413391, "step": 7421 }, { "epoch": 1.1862063454007832, "grad_norm": 1.555514062224257, "learning_rate": 7.770006182561407e-06, "loss": 0.4965220093727112, "step": 7422 }, { "epoch": 1.1863661791736595, "grad_norm": 1.5028732190860723, "learning_rate": 7.767430147648781e-06, "loss": 0.4347045123577118, "step": 7423 }, { "epoch": 1.1865260129465356, "grad_norm": 1.6017630866336883, "learning_rate": 7.764854268650904e-06, "loss": 0.5821396112442017, "step": 7424 }, { "epoch": 1.186685846719412, "grad_norm": 1.5374830181681791, "learning_rate": 7.762278545747666e-06, "loss": 0.5427632927894592, "step": 7425 }, { "epoch": 1.186845680492288, "grad_norm": 1.5848342602291445, "learning_rate": 7.759702979118955e-06, "loss": 0.6561800241470337, "step": 7426 }, { "epoch": 1.1870055142651643, "grad_norm": 1.4603654548373222, "learning_rate": 7.757127568944629e-06, "loss": 0.44627314805984497, "step": 7427 }, { "epoch": 1.1871653480380404, "grad_norm": 1.5637642759853867, "learning_rate": 7.754552315404552e-06, "loss": 0.6641892790794373, "step": 7428 }, { "epoch": 1.1873251818109167, "grad_norm": 1.5559402089836705, "learning_rate": 7.751977218678569e-06, "loss": 0.5711202621459961, "step": 7429 }, { "epoch": 1.1874850155837928, "grad_norm": 1.493820442509018, "learning_rate": 7.749402278946513e-06, "loss": 0.6618490219116211, "step": 7430 }, { "epoch": 1.1876448493566691, "grad_norm": 1.775187943874195, "learning_rate": 7.746827496388211e-06, "loss": 0.6405639052391052, "step": 7431 }, { "epoch": 1.1878046831295452, "grad_norm": 1.8381882650038384, "learning_rate": 7.744252871183474e-06, "loss": 0.7093803286552429, "step": 7432 }, { "epoch": 1.1879645169024216, "grad_norm": 1.3200056559713393, "learning_rate": 7.741678403512112e-06, "loss": 0.5582314729690552, "step": 7433 }, { "epoch": 1.1881243506752976, "grad_norm": 1.6259391354336488, "learning_rate": 7.739104093553908e-06, "loss": 0.428094744682312, "step": 7434 }, { "epoch": 1.188284184448174, "grad_norm": 1.4298588950086548, "learning_rate": 7.736529941488648e-06, "loss": 0.5056161284446716, "step": 7435 }, { "epoch": 1.18844401822105, "grad_norm": 3.018630888253577, "learning_rate": 7.733955947496099e-06, "loss": 0.5315395593643188, "step": 7436 }, { "epoch": 1.1886038519939264, "grad_norm": 1.884442908094302, "learning_rate": 7.731382111756019e-06, "loss": 0.626392662525177, "step": 7437 }, { "epoch": 1.1887636857668025, "grad_norm": 1.4301628565043247, "learning_rate": 7.728808434448159e-06, "loss": 0.6532267332077026, "step": 7438 }, { "epoch": 1.1889235195396788, "grad_norm": 1.3666796919139381, "learning_rate": 7.726234915752251e-06, "loss": 0.5907344818115234, "step": 7439 }, { "epoch": 1.1890833533125549, "grad_norm": 1.5756996572680906, "learning_rate": 7.723661555848025e-06, "loss": 0.47395607829093933, "step": 7440 }, { "epoch": 1.1892431870854312, "grad_norm": 1.7351816465384637, "learning_rate": 7.721088354915192e-06, "loss": 0.6254748106002808, "step": 7441 }, { "epoch": 1.1894030208583073, "grad_norm": 1.7234281720596252, "learning_rate": 7.71851531313346e-06, "loss": 0.5458738803863525, "step": 7442 }, { "epoch": 1.1895628546311836, "grad_norm": 1.6771834589617507, "learning_rate": 7.715942430682515e-06, "loss": 0.5963952541351318, "step": 7443 }, { "epoch": 1.1897226884040597, "grad_norm": 1.421772792502996, "learning_rate": 7.713369707742038e-06, "loss": 0.5121476650238037, "step": 7444 }, { "epoch": 1.189882522176936, "grad_norm": 1.6241209627487667, "learning_rate": 7.7107971444917e-06, "loss": 0.6678352355957031, "step": 7445 }, { "epoch": 1.1900423559498121, "grad_norm": 1.717723124071245, "learning_rate": 7.708224741111166e-06, "loss": 0.6400055885314941, "step": 7446 }, { "epoch": 1.1902021897226884, "grad_norm": 1.826835106710121, "learning_rate": 7.705652497780076e-06, "loss": 0.5037791728973389, "step": 7447 }, { "epoch": 1.1903620234955645, "grad_norm": 1.4201912807668575, "learning_rate": 7.70308041467807e-06, "loss": 0.5571761131286621, "step": 7448 }, { "epoch": 1.1905218572684408, "grad_norm": 1.547834982276456, "learning_rate": 7.700508491984771e-06, "loss": 0.5730429887771606, "step": 7449 }, { "epoch": 1.190681691041317, "grad_norm": 1.6226714219684621, "learning_rate": 7.697936729879796e-06, "loss": 0.595868706703186, "step": 7450 }, { "epoch": 1.1908415248141933, "grad_norm": 1.3820685984255463, "learning_rate": 7.695365128542747e-06, "loss": 0.4512190818786621, "step": 7451 }, { "epoch": 1.1910013585870693, "grad_norm": 1.480673587327573, "learning_rate": 7.69279368815321e-06, "loss": 0.5213109850883484, "step": 7452 }, { "epoch": 1.1911611923599457, "grad_norm": 1.6705591377475189, "learning_rate": 7.690222408890773e-06, "loss": 0.6849743127822876, "step": 7453 }, { "epoch": 1.191321026132822, "grad_norm": 1.455454697309774, "learning_rate": 7.687651290935e-06, "loss": 0.6971851587295532, "step": 7454 }, { "epoch": 1.191480859905698, "grad_norm": 1.5078084449062084, "learning_rate": 7.685080334465451e-06, "loss": 0.5404572486877441, "step": 7455 }, { "epoch": 1.1916406936785742, "grad_norm": 1.5367207561850889, "learning_rate": 7.68250953966167e-06, "loss": 0.5454411506652832, "step": 7456 }, { "epoch": 1.1918005274514505, "grad_norm": 1.3792425116508318, "learning_rate": 7.679938906703194e-06, "loss": 0.5386468172073364, "step": 7457 }, { "epoch": 1.1919603612243268, "grad_norm": 1.3741169597107437, "learning_rate": 7.677368435769548e-06, "loss": 0.543350875377655, "step": 7458 }, { "epoch": 1.192120194997203, "grad_norm": 1.5691192487479564, "learning_rate": 7.674798127040242e-06, "loss": 0.6697763204574585, "step": 7459 }, { "epoch": 1.1922800287700792, "grad_norm": 1.6099555820238887, "learning_rate": 7.672227980694782e-06, "loss": 0.681533694267273, "step": 7460 }, { "epoch": 1.1924398625429553, "grad_norm": 1.6337856777649378, "learning_rate": 7.66965799691265e-06, "loss": 0.611697793006897, "step": 7461 }, { "epoch": 1.1925996963158316, "grad_norm": 1.6000674743294385, "learning_rate": 7.667088175873332e-06, "loss": 0.627169132232666, "step": 7462 }, { "epoch": 1.1927595300887077, "grad_norm": 1.3893786411551488, "learning_rate": 7.66451851775629e-06, "loss": 0.49258583784103394, "step": 7463 }, { "epoch": 1.192919363861584, "grad_norm": 1.4124548792667464, "learning_rate": 7.661949022740984e-06, "loss": 0.5477480888366699, "step": 7464 }, { "epoch": 1.1930791976344601, "grad_norm": 1.6212693910636982, "learning_rate": 7.659379691006852e-06, "loss": 0.6753759384155273, "step": 7465 }, { "epoch": 1.1932390314073364, "grad_norm": 1.5464550175433662, "learning_rate": 7.656810522733337e-06, "loss": 0.42450180649757385, "step": 7466 }, { "epoch": 1.1933988651802125, "grad_norm": 1.6655304602729475, "learning_rate": 7.654241518099851e-06, "loss": 0.42388296127319336, "step": 7467 }, { "epoch": 1.1935586989530889, "grad_norm": 1.7464100251241688, "learning_rate": 7.65167267728581e-06, "loss": 0.5375616550445557, "step": 7468 }, { "epoch": 1.193718532725965, "grad_norm": 1.6136519720826856, "learning_rate": 7.64910400047061e-06, "loss": 0.525499701499939, "step": 7469 }, { "epoch": 1.1938783664988413, "grad_norm": 1.4089265706336511, "learning_rate": 7.646535487833641e-06, "loss": 0.5409830212593079, "step": 7470 }, { "epoch": 1.1940382002717174, "grad_norm": 1.7599397174715368, "learning_rate": 7.643967139554277e-06, "loss": 0.5037971138954163, "step": 7471 }, { "epoch": 1.1941980340445937, "grad_norm": 1.5617510939064327, "learning_rate": 7.641398955811882e-06, "loss": 0.6655211448669434, "step": 7472 }, { "epoch": 1.1943578678174698, "grad_norm": 1.5465470144096192, "learning_rate": 7.63883093678581e-06, "loss": 0.6683152318000793, "step": 7473 }, { "epoch": 1.194517701590346, "grad_norm": 1.459261768255048, "learning_rate": 7.636263082655397e-06, "loss": 0.5208406448364258, "step": 7474 }, { "epoch": 1.1946775353632222, "grad_norm": 1.6234215368120868, "learning_rate": 7.633695393599982e-06, "loss": 0.5754454731941223, "step": 7475 }, { "epoch": 1.1948373691360985, "grad_norm": 1.4572824396126434, "learning_rate": 7.631127869798875e-06, "loss": 0.5559873580932617, "step": 7476 }, { "epoch": 1.1949972029089746, "grad_norm": 1.5022635248182712, "learning_rate": 7.6285605114313885e-06, "loss": 0.47123026847839355, "step": 7477 }, { "epoch": 1.195157036681851, "grad_norm": 1.3052734474176733, "learning_rate": 7.625993318676814e-06, "loss": 0.4902564287185669, "step": 7478 }, { "epoch": 1.195316870454727, "grad_norm": 1.2595006026696278, "learning_rate": 7.62342629171444e-06, "loss": 0.37777048349380493, "step": 7479 }, { "epoch": 1.1954767042276033, "grad_norm": 1.670097351678774, "learning_rate": 7.620859430723533e-06, "loss": 0.5718543529510498, "step": 7480 }, { "epoch": 1.1956365380004794, "grad_norm": 1.4016534664256113, "learning_rate": 7.618292735883354e-06, "loss": 0.41683241724967957, "step": 7481 }, { "epoch": 1.1957963717733557, "grad_norm": 1.3798770241933853, "learning_rate": 7.615726207373155e-06, "loss": 0.5693812966346741, "step": 7482 }, { "epoch": 1.1959562055462318, "grad_norm": 1.4698982231161553, "learning_rate": 7.613159845372171e-06, "loss": 0.6289976835250854, "step": 7483 }, { "epoch": 1.1961160393191081, "grad_norm": 1.1927172107632262, "learning_rate": 7.61059365005963e-06, "loss": 0.49569424986839294, "step": 7484 }, { "epoch": 1.1962758730919842, "grad_norm": 1.3379341236875695, "learning_rate": 7.60802762161474e-06, "loss": 0.5920602083206177, "step": 7485 }, { "epoch": 1.1964357068648606, "grad_norm": 1.521913227030781, "learning_rate": 7.605461760216711e-06, "loss": 0.6018475294113159, "step": 7486 }, { "epoch": 1.1965955406377367, "grad_norm": 1.5484417654389537, "learning_rate": 7.60289606604473e-06, "loss": 0.5970799922943115, "step": 7487 }, { "epoch": 1.196755374410613, "grad_norm": 1.640085311836972, "learning_rate": 7.600330539277976e-06, "loss": 0.6070147752761841, "step": 7488 }, { "epoch": 1.1969152081834893, "grad_norm": 1.5099644218423105, "learning_rate": 7.5977651800956154e-06, "loss": 0.5896891355514526, "step": 7489 }, { "epoch": 1.1970750419563654, "grad_norm": 1.2631436797995064, "learning_rate": 7.5951999886768065e-06, "loss": 0.4115150570869446, "step": 7490 }, { "epoch": 1.1972348757292415, "grad_norm": 1.820853516763818, "learning_rate": 7.5926349652006905e-06, "loss": 0.5583604574203491, "step": 7491 }, { "epoch": 1.1973947095021178, "grad_norm": 1.5631752984484153, "learning_rate": 7.590070109846401e-06, "loss": 0.6680166125297546, "step": 7492 }, { "epoch": 1.197554543274994, "grad_norm": 1.4542050071228407, "learning_rate": 7.58750542279306e-06, "loss": 0.4897170662879944, "step": 7493 }, { "epoch": 1.1977143770478702, "grad_norm": 1.4921638895262654, "learning_rate": 7.5849409042197705e-06, "loss": 0.4376574158668518, "step": 7494 }, { "epoch": 1.1978742108207463, "grad_norm": 1.3649914196856143, "learning_rate": 7.582376554305636e-06, "loss": 0.528891384601593, "step": 7495 }, { "epoch": 1.1980340445936226, "grad_norm": 1.395359158431465, "learning_rate": 7.579812373229735e-06, "loss": 0.4421691298484802, "step": 7496 }, { "epoch": 1.198193878366499, "grad_norm": 1.5932118143368588, "learning_rate": 7.577248361171148e-06, "loss": 0.4822341203689575, "step": 7497 }, { "epoch": 1.198353712139375, "grad_norm": 1.476661351959998, "learning_rate": 7.57468451830893e-06, "loss": 0.484605610370636, "step": 7498 }, { "epoch": 1.1985135459122513, "grad_norm": 1.4579559730660436, "learning_rate": 7.572120844822136e-06, "loss": 0.582007110118866, "step": 7499 }, { "epoch": 1.1986733796851274, "grad_norm": 1.5949273599066585, "learning_rate": 7.5695573408898015e-06, "loss": 0.6277973651885986, "step": 7500 }, { "epoch": 1.1988332134580038, "grad_norm": 1.3354366028265447, "learning_rate": 7.5669940066909495e-06, "loss": 0.611360490322113, "step": 7501 }, { "epoch": 1.1989930472308798, "grad_norm": 1.5330634157192036, "learning_rate": 7.5644308424045995e-06, "loss": 0.6707090139389038, "step": 7502 }, { "epoch": 1.1991528810037562, "grad_norm": 1.36735225185002, "learning_rate": 7.56186784820975e-06, "loss": 0.5737726092338562, "step": 7503 }, { "epoch": 1.1993127147766323, "grad_norm": 1.544468504013815, "learning_rate": 7.559305024285395e-06, "loss": 0.5348800420761108, "step": 7504 }, { "epoch": 1.1994725485495086, "grad_norm": 1.2599195504421405, "learning_rate": 7.556742370810506e-06, "loss": 0.3680647015571594, "step": 7505 }, { "epoch": 1.1996323823223847, "grad_norm": 2.089073301547395, "learning_rate": 7.554179887964056e-06, "loss": 0.6034774780273438, "step": 7506 }, { "epoch": 1.199792216095261, "grad_norm": 1.5535725238633302, "learning_rate": 7.551617575925001e-06, "loss": 0.6098570227622986, "step": 7507 }, { "epoch": 1.199952049868137, "grad_norm": 1.4408966036449644, "learning_rate": 7.54905543487228e-06, "loss": 0.6137884855270386, "step": 7508 }, { "epoch": 1.2001118836410134, "grad_norm": 1.4927066713380723, "learning_rate": 7.546493464984823e-06, "loss": 0.6392697095870972, "step": 7509 }, { "epoch": 1.2002717174138895, "grad_norm": 2.014456651666012, "learning_rate": 7.5439316664415525e-06, "loss": 0.5937390327453613, "step": 7510 }, { "epoch": 1.2004315511867658, "grad_norm": 1.330583459909807, "learning_rate": 7.5413700394213715e-06, "loss": 0.43830233812332153, "step": 7511 }, { "epoch": 1.200591384959642, "grad_norm": 1.5322011561234081, "learning_rate": 7.53880858410318e-06, "loss": 0.646658182144165, "step": 7512 }, { "epoch": 1.2007512187325182, "grad_norm": 1.531880577968968, "learning_rate": 7.536247300665854e-06, "loss": 0.6794069409370422, "step": 7513 }, { "epoch": 1.2009110525053943, "grad_norm": 1.3688909935902214, "learning_rate": 7.533686189288272e-06, "loss": 0.5221127271652222, "step": 7514 }, { "epoch": 1.2010708862782706, "grad_norm": 1.4613097969749922, "learning_rate": 7.531125250149289e-06, "loss": 0.4954005479812622, "step": 7515 }, { "epoch": 1.2012307200511467, "grad_norm": 1.3235100384104919, "learning_rate": 7.528564483427749e-06, "loss": 0.5052607655525208, "step": 7516 }, { "epoch": 1.201390553824023, "grad_norm": 1.4672095515883579, "learning_rate": 7.526003889302494e-06, "loss": 0.5210402011871338, "step": 7517 }, { "epoch": 1.2015503875968991, "grad_norm": 1.6073811420990365, "learning_rate": 7.5234434679523395e-06, "loss": 0.5636147260665894, "step": 7518 }, { "epoch": 1.2017102213697755, "grad_norm": 1.5348795819660128, "learning_rate": 7.520883219556101e-06, "loss": 0.4482288956642151, "step": 7519 }, { "epoch": 1.2018700551426515, "grad_norm": 1.3762830281693386, "learning_rate": 7.518323144292574e-06, "loss": 0.5639643669128418, "step": 7520 }, { "epoch": 1.2020298889155279, "grad_norm": 1.669690763589899, "learning_rate": 7.51576324234055e-06, "loss": 0.5914486646652222, "step": 7521 }, { "epoch": 1.202189722688404, "grad_norm": 1.541306107605808, "learning_rate": 7.5132035138788e-06, "loss": 0.39427024126052856, "step": 7522 }, { "epoch": 1.2023495564612803, "grad_norm": 1.6240122892239246, "learning_rate": 7.5106439590860836e-06, "loss": 0.5343765616416931, "step": 7523 }, { "epoch": 1.2025093902341566, "grad_norm": 1.6760051822190838, "learning_rate": 7.508084578141156e-06, "loss": 0.6142600774765015, "step": 7524 }, { "epoch": 1.2026692240070327, "grad_norm": 1.6172996703616598, "learning_rate": 7.5055253712227495e-06, "loss": 0.5227649211883545, "step": 7525 }, { "epoch": 1.2028290577799088, "grad_norm": 1.8485240380097399, "learning_rate": 7.502966338509597e-06, "loss": 0.5700494647026062, "step": 7526 }, { "epoch": 1.202988891552785, "grad_norm": 1.5952639890026268, "learning_rate": 7.50040748018041e-06, "loss": 0.45818445086479187, "step": 7527 }, { "epoch": 1.2031487253256614, "grad_norm": 1.6130583390819508, "learning_rate": 7.4978487964138866e-06, "loss": 0.557120144367218, "step": 7528 }, { "epoch": 1.2033085590985375, "grad_norm": 1.5895028076027364, "learning_rate": 7.495290287388721e-06, "loss": 0.6540331840515137, "step": 7529 }, { "epoch": 1.2034683928714136, "grad_norm": 1.6199945227619599, "learning_rate": 7.492731953283587e-06, "loss": 0.5199617147445679, "step": 7530 }, { "epoch": 1.20362822664429, "grad_norm": 1.5601084461419115, "learning_rate": 7.490173794277151e-06, "loss": 0.5019410252571106, "step": 7531 }, { "epoch": 1.2037880604171662, "grad_norm": 1.6229171105756708, "learning_rate": 7.487615810548066e-06, "loss": 0.620373547077179, "step": 7532 }, { "epoch": 1.2039478941900423, "grad_norm": 1.537336714576546, "learning_rate": 7.485058002274971e-06, "loss": 0.5567753314971924, "step": 7533 }, { "epoch": 1.2041077279629187, "grad_norm": 1.2493661152965323, "learning_rate": 7.482500369636496e-06, "loss": 0.4767771363258362, "step": 7534 }, { "epoch": 1.2042675617357947, "grad_norm": 1.511876453919157, "learning_rate": 7.479942912811255e-06, "loss": 0.5773197412490845, "step": 7535 }, { "epoch": 1.204427395508671, "grad_norm": 1.4937802206293669, "learning_rate": 7.4773856319778545e-06, "loss": 0.5978499054908752, "step": 7536 }, { "epoch": 1.2045872292815472, "grad_norm": 1.3723787486088777, "learning_rate": 7.474828527314884e-06, "loss": 0.5086036324501038, "step": 7537 }, { "epoch": 1.2047470630544235, "grad_norm": 1.7369038235083953, "learning_rate": 7.472271599000922e-06, "loss": 0.5230990648269653, "step": 7538 }, { "epoch": 1.2049068968272996, "grad_norm": 1.5273128836005616, "learning_rate": 7.469714847214537e-06, "loss": 0.6169846653938293, "step": 7539 }, { "epoch": 1.2050667306001759, "grad_norm": 1.6314867973359135, "learning_rate": 7.467158272134281e-06, "loss": 0.49529725313186646, "step": 7540 }, { "epoch": 1.205226564373052, "grad_norm": 1.5991598593478336, "learning_rate": 7.4646018739387e-06, "loss": 0.5320773124694824, "step": 7541 }, { "epoch": 1.2053863981459283, "grad_norm": 1.3992804032301658, "learning_rate": 7.46204565280632e-06, "loss": 0.5010770559310913, "step": 7542 }, { "epoch": 1.2055462319188044, "grad_norm": 1.6674986083997736, "learning_rate": 7.4594896089156595e-06, "loss": 0.4873163402080536, "step": 7543 }, { "epoch": 1.2057060656916807, "grad_norm": 1.4861515798515372, "learning_rate": 7.456933742445227e-06, "loss": 0.6911630630493164, "step": 7544 }, { "epoch": 1.2058658994645568, "grad_norm": 1.594617579004568, "learning_rate": 7.454378053573507e-06, "loss": 0.5943602323532104, "step": 7545 }, { "epoch": 1.2060257332374331, "grad_norm": 1.5682089591846062, "learning_rate": 7.4518225424789855e-06, "loss": 0.5750924348831177, "step": 7546 }, { "epoch": 1.2061855670103092, "grad_norm": 1.5146716868779788, "learning_rate": 7.4492672093401345e-06, "loss": 0.5330572128295898, "step": 7547 }, { "epoch": 1.2063454007831855, "grad_norm": 1.6003481267930941, "learning_rate": 7.4467120543354e-06, "loss": 0.5375057458877563, "step": 7548 }, { "epoch": 1.2065052345560616, "grad_norm": 1.6105479080475038, "learning_rate": 7.4441570776432324e-06, "loss": 0.5504614114761353, "step": 7549 }, { "epoch": 1.206665068328938, "grad_norm": 1.5643918359272861, "learning_rate": 7.4416022794420575e-06, "loss": 0.49813374876976013, "step": 7550 }, { "epoch": 1.206824902101814, "grad_norm": 1.361600473895621, "learning_rate": 7.439047659910297e-06, "loss": 0.39372608065605164, "step": 7551 }, { "epoch": 1.2069847358746904, "grad_norm": 1.2620523834802015, "learning_rate": 7.436493219226355e-06, "loss": 0.5643614530563354, "step": 7552 }, { "epoch": 1.2071445696475664, "grad_norm": 1.262676869534666, "learning_rate": 7.4339389575686215e-06, "loss": 0.537462592124939, "step": 7553 }, { "epoch": 1.2073044034204428, "grad_norm": 1.2688911565899024, "learning_rate": 7.431384875115482e-06, "loss": 0.5086826086044312, "step": 7554 }, { "epoch": 1.2074642371933189, "grad_norm": 1.5254903266913833, "learning_rate": 7.4288309720453e-06, "loss": 0.54320228099823, "step": 7555 }, { "epoch": 1.2076240709661952, "grad_norm": 1.6470655221810493, "learning_rate": 7.426277248536435e-06, "loss": 0.5954844951629639, "step": 7556 }, { "epoch": 1.2077839047390713, "grad_norm": 1.9134900080189876, "learning_rate": 7.423723704767229e-06, "loss": 0.610529899597168, "step": 7557 }, { "epoch": 1.2079437385119476, "grad_norm": 1.923742129294712, "learning_rate": 7.421170340916011e-06, "loss": 0.7089823484420776, "step": 7558 }, { "epoch": 1.208103572284824, "grad_norm": 1.5497663995403195, "learning_rate": 7.4186171571611e-06, "loss": 0.6313220262527466, "step": 7559 }, { "epoch": 1.2082634060577, "grad_norm": 1.525388428731249, "learning_rate": 7.4160641536808e-06, "loss": 0.6218715906143188, "step": 7560 }, { "epoch": 1.208423239830576, "grad_norm": 1.4708940742407868, "learning_rate": 7.413511330653404e-06, "loss": 0.6842027306556702, "step": 7561 }, { "epoch": 1.2085830736034524, "grad_norm": 1.6390934390785838, "learning_rate": 7.410958688257193e-06, "loss": 0.454065203666687, "step": 7562 }, { "epoch": 1.2087429073763287, "grad_norm": 1.5273447537716633, "learning_rate": 7.408406226670436e-06, "loss": 0.514539361000061, "step": 7563 }, { "epoch": 1.2089027411492048, "grad_norm": 1.4583591189585083, "learning_rate": 7.4058539460713855e-06, "loss": 0.5124197006225586, "step": 7564 }, { "epoch": 1.209062574922081, "grad_norm": 1.4046287373170496, "learning_rate": 7.4033018466382826e-06, "loss": 0.49588024616241455, "step": 7565 }, { "epoch": 1.2092224086949572, "grad_norm": 1.53233698383628, "learning_rate": 7.400749928549356e-06, "loss": 0.6437125205993652, "step": 7566 }, { "epoch": 1.2093822424678335, "grad_norm": 1.5390208275001445, "learning_rate": 7.398198191982828e-06, "loss": 0.5199313163757324, "step": 7567 }, { "epoch": 1.2095420762407096, "grad_norm": 1.7738799169433008, "learning_rate": 7.3956466371169e-06, "loss": 0.5679307579994202, "step": 7568 }, { "epoch": 1.209701910013586, "grad_norm": 1.6159151981692688, "learning_rate": 7.393095264129764e-06, "loss": 0.5870725512504578, "step": 7569 }, { "epoch": 1.209861743786462, "grad_norm": 1.8175378406388665, "learning_rate": 7.390544073199597e-06, "loss": 0.5360488891601562, "step": 7570 }, { "epoch": 1.2100215775593384, "grad_norm": 1.4767391007546018, "learning_rate": 7.387993064504569e-06, "loss": 0.5432642698287964, "step": 7571 }, { "epoch": 1.2101814113322145, "grad_norm": 2.0409184142758274, "learning_rate": 7.3854422382228295e-06, "loss": 0.5732139348983765, "step": 7572 }, { "epoch": 1.2103412451050908, "grad_norm": 1.772191262928012, "learning_rate": 7.3828915945325185e-06, "loss": 0.502078652381897, "step": 7573 }, { "epoch": 1.2105010788779669, "grad_norm": 1.5628508554042224, "learning_rate": 7.380341133611768e-06, "loss": 0.5549585819244385, "step": 7574 }, { "epoch": 1.2106609126508432, "grad_norm": 1.4711597762761175, "learning_rate": 7.377790855638689e-06, "loss": 0.5662423968315125, "step": 7575 }, { "epoch": 1.2108207464237193, "grad_norm": 1.6992066063081193, "learning_rate": 7.3752407607913886e-06, "loss": 0.5498642921447754, "step": 7576 }, { "epoch": 1.2109805801965956, "grad_norm": 1.8806917476725211, "learning_rate": 7.372690849247951e-06, "loss": 0.5778143405914307, "step": 7577 }, { "epoch": 1.2111404139694717, "grad_norm": 1.452251508655523, "learning_rate": 7.3701411211864586e-06, "loss": 0.5670945644378662, "step": 7578 }, { "epoch": 1.211300247742348, "grad_norm": 1.4275234760336548, "learning_rate": 7.36759157678497e-06, "loss": 0.6075602173805237, "step": 7579 }, { "epoch": 1.211460081515224, "grad_norm": 1.4076835675567039, "learning_rate": 7.3650422162215385e-06, "loss": 0.5515503883361816, "step": 7580 }, { "epoch": 1.2116199152881004, "grad_norm": 1.6829573540026894, "learning_rate": 7.3624930396742036e-06, "loss": 0.46840956807136536, "step": 7581 }, { "epoch": 1.2117797490609765, "grad_norm": 1.445749407847178, "learning_rate": 7.359944047320988e-06, "loss": 0.5762146711349487, "step": 7582 }, { "epoch": 1.2119395828338528, "grad_norm": 1.5032322321653473, "learning_rate": 7.357395239339909e-06, "loss": 0.4527233839035034, "step": 7583 }, { "epoch": 1.212099416606729, "grad_norm": 1.4108351908510497, "learning_rate": 7.354846615908959e-06, "loss": 0.5689112544059753, "step": 7584 }, { "epoch": 1.2122592503796052, "grad_norm": 1.3824679828634203, "learning_rate": 7.352298177206132e-06, "loss": 0.45118358731269836, "step": 7585 }, { "epoch": 1.2124190841524813, "grad_norm": 1.5913328112830685, "learning_rate": 7.349749923409395e-06, "loss": 0.7118686437606812, "step": 7586 }, { "epoch": 1.2125789179253577, "grad_norm": 1.384006824966046, "learning_rate": 7.3472018546967175e-06, "loss": 0.5380777716636658, "step": 7587 }, { "epoch": 1.2127387516982338, "grad_norm": 1.3599325165076386, "learning_rate": 7.3446539712460395e-06, "loss": 0.5458544492721558, "step": 7588 }, { "epoch": 1.21289858547111, "grad_norm": 1.6967972645545761, "learning_rate": 7.342106273235303e-06, "loss": 0.7504158020019531, "step": 7589 }, { "epoch": 1.2130584192439862, "grad_norm": 1.9919920183982096, "learning_rate": 7.3395587608424246e-06, "loss": 0.615189790725708, "step": 7590 }, { "epoch": 1.2132182530168625, "grad_norm": 1.6645257040717938, "learning_rate": 7.337011434245318e-06, "loss": 0.6870241165161133, "step": 7591 }, { "epoch": 1.2133780867897386, "grad_norm": 1.6638428339721412, "learning_rate": 7.3344642936218744e-06, "loss": 0.5168560147285461, "step": 7592 }, { "epoch": 1.213537920562615, "grad_norm": 1.3613506496727321, "learning_rate": 7.331917339149983e-06, "loss": 0.4912811517715454, "step": 7593 }, { "epoch": 1.2136977543354912, "grad_norm": 1.4233588166078281, "learning_rate": 7.32937057100751e-06, "loss": 0.49034392833709717, "step": 7594 }, { "epoch": 1.2138575881083673, "grad_norm": 1.481320264259634, "learning_rate": 7.326823989372311e-06, "loss": 0.5973552465438843, "step": 7595 }, { "epoch": 1.2140174218812434, "grad_norm": 1.512843046211242, "learning_rate": 7.324277594422237e-06, "loss": 0.5453785061836243, "step": 7596 }, { "epoch": 1.2141772556541197, "grad_norm": 1.6130482280857945, "learning_rate": 7.3217313863351115e-06, "loss": 0.5430806279182434, "step": 7597 }, { "epoch": 1.214337089426996, "grad_norm": 1.519938148007931, "learning_rate": 7.319185365288758e-06, "loss": 0.4252171516418457, "step": 7598 }, { "epoch": 1.2144969231998721, "grad_norm": 1.7840533891249792, "learning_rate": 7.316639531460978e-06, "loss": 0.7374281883239746, "step": 7599 }, { "epoch": 1.2146567569727482, "grad_norm": 1.7370552352908892, "learning_rate": 7.314093885029567e-06, "loss": 0.6124124526977539, "step": 7600 }, { "epoch": 1.2148165907456245, "grad_norm": 1.3211314674426473, "learning_rate": 7.311548426172301e-06, "loss": 0.5008434653282166, "step": 7601 }, { "epoch": 1.2149764245185009, "grad_norm": 1.70488186517916, "learning_rate": 7.309003155066947e-06, "loss": 0.6141591668128967, "step": 7602 }, { "epoch": 1.215136258291377, "grad_norm": 1.6266904616482754, "learning_rate": 7.306458071891258e-06, "loss": 0.535993218421936, "step": 7603 }, { "epoch": 1.2152960920642533, "grad_norm": 1.791288411297017, "learning_rate": 7.303913176822972e-06, "loss": 0.5957592725753784, "step": 7604 }, { "epoch": 1.2154559258371294, "grad_norm": 1.338874864730558, "learning_rate": 7.301368470039818e-06, "loss": 0.5724884867668152, "step": 7605 }, { "epoch": 1.2156157596100057, "grad_norm": 1.6640262643307302, "learning_rate": 7.298823951719505e-06, "loss": 0.46090251207351685, "step": 7606 }, { "epoch": 1.2157755933828818, "grad_norm": 1.6097705232893793, "learning_rate": 7.296279622039737e-06, "loss": 0.6242786049842834, "step": 7607 }, { "epoch": 1.215935427155758, "grad_norm": 1.5210164247344349, "learning_rate": 7.293735481178201e-06, "loss": 0.6516928672790527, "step": 7608 }, { "epoch": 1.2160952609286342, "grad_norm": 1.5611352340882225, "learning_rate": 7.291191529312571e-06, "loss": 0.5927517414093018, "step": 7609 }, { "epoch": 1.2162550947015105, "grad_norm": 1.4250180709743612, "learning_rate": 7.288647766620505e-06, "loss": 0.5110941529273987, "step": 7610 }, { "epoch": 1.2164149284743866, "grad_norm": 1.581695765057104, "learning_rate": 7.286104193279652e-06, "loss": 0.6182066202163696, "step": 7611 }, { "epoch": 1.216574762247263, "grad_norm": 1.5244853096899538, "learning_rate": 7.283560809467645e-06, "loss": 0.6621817350387573, "step": 7612 }, { "epoch": 1.216734596020139, "grad_norm": 1.2342121315422658, "learning_rate": 7.281017615362106e-06, "loss": 0.5827094316482544, "step": 7613 }, { "epoch": 1.2168944297930153, "grad_norm": 1.491955053031959, "learning_rate": 7.2784746111406415e-06, "loss": 0.5309097170829773, "step": 7614 }, { "epoch": 1.2170542635658914, "grad_norm": 1.8187218098597575, "learning_rate": 7.275931796980849e-06, "loss": 0.6316421031951904, "step": 7615 }, { "epoch": 1.2172140973387677, "grad_norm": 1.535683967730615, "learning_rate": 7.2733891730603055e-06, "loss": 0.5588853359222412, "step": 7616 }, { "epoch": 1.2173739311116438, "grad_norm": 1.8550880169807697, "learning_rate": 7.27084673955658e-06, "loss": 0.6637346744537354, "step": 7617 }, { "epoch": 1.2175337648845201, "grad_norm": 1.375184410138156, "learning_rate": 7.26830449664723e-06, "loss": 0.4941907525062561, "step": 7618 }, { "epoch": 1.2176935986573962, "grad_norm": 1.5193393415253027, "learning_rate": 7.2657624445097915e-06, "loss": 0.5004606246948242, "step": 7619 }, { "epoch": 1.2178534324302726, "grad_norm": 1.5054880421698915, "learning_rate": 7.263220583321798e-06, "loss": 0.5570226907730103, "step": 7620 }, { "epoch": 1.2180132662031486, "grad_norm": 1.352789579483241, "learning_rate": 7.2606789132607615e-06, "loss": 0.6220054626464844, "step": 7621 }, { "epoch": 1.218173099976025, "grad_norm": 1.4666630341016396, "learning_rate": 7.25813743450418e-06, "loss": 0.6075013875961304, "step": 7622 }, { "epoch": 1.218332933748901, "grad_norm": 1.4015239601244027, "learning_rate": 7.255596147229547e-06, "loss": 0.5423913598060608, "step": 7623 }, { "epoch": 1.2184927675217774, "grad_norm": 1.5596406470427007, "learning_rate": 7.253055051614332e-06, "loss": 0.5784666538238525, "step": 7624 }, { "epoch": 1.2186526012946535, "grad_norm": 1.2362358458195764, "learning_rate": 7.250514147836001e-06, "loss": 0.4369009733200073, "step": 7625 }, { "epoch": 1.2188124350675298, "grad_norm": 1.167588106694786, "learning_rate": 7.247973436071996e-06, "loss": 0.5565727949142456, "step": 7626 }, { "epoch": 1.2189722688404059, "grad_norm": 1.5825490735882262, "learning_rate": 7.245432916499755e-06, "loss": 0.5306486487388611, "step": 7627 }, { "epoch": 1.2191321026132822, "grad_norm": 1.4921352042604774, "learning_rate": 7.242892589296699e-06, "loss": 0.43080735206604004, "step": 7628 }, { "epoch": 1.2192919363861585, "grad_norm": 1.5671043549264516, "learning_rate": 7.240352454640238e-06, "loss": 0.5076327919960022, "step": 7629 }, { "epoch": 1.2194517701590346, "grad_norm": 1.599249114802152, "learning_rate": 7.237812512707758e-06, "loss": 0.5625234246253967, "step": 7630 }, { "epoch": 1.2196116039319107, "grad_norm": 1.4234504421236367, "learning_rate": 7.2352727636766484e-06, "loss": 0.5782353281974792, "step": 7631 }, { "epoch": 1.219771437704787, "grad_norm": 1.6657051943436096, "learning_rate": 7.232733207724268e-06, "loss": 0.47604674100875854, "step": 7632 }, { "epoch": 1.2199312714776633, "grad_norm": 1.8037440869572872, "learning_rate": 7.230193845027978e-06, "loss": 0.5300564169883728, "step": 7633 }, { "epoch": 1.2200911052505394, "grad_norm": 1.4150131272760722, "learning_rate": 7.2276546757651124e-06, "loss": 0.5988212823867798, "step": 7634 }, { "epoch": 1.2202509390234155, "grad_norm": 1.83462133669072, "learning_rate": 7.225115700113002e-06, "loss": 0.6514164209365845, "step": 7635 }, { "epoch": 1.2204107727962918, "grad_norm": 1.4107298419356091, "learning_rate": 7.2225769182489595e-06, "loss": 0.5819602012634277, "step": 7636 }, { "epoch": 1.2205706065691682, "grad_norm": 1.6196253398712153, "learning_rate": 7.22003833035028e-06, "loss": 0.5623183846473694, "step": 7637 }, { "epoch": 1.2207304403420443, "grad_norm": 1.5747796899879785, "learning_rate": 7.217499936594254e-06, "loss": 0.5130534768104553, "step": 7638 }, { "epoch": 1.2208902741149206, "grad_norm": 1.4994943348725351, "learning_rate": 7.214961737158151e-06, "loss": 0.5736808776855469, "step": 7639 }, { "epoch": 1.2210501078877967, "grad_norm": 1.6239944657853491, "learning_rate": 7.212423732219234e-06, "loss": 0.6328707933425903, "step": 7640 }, { "epoch": 1.221209941660673, "grad_norm": 1.2692426092824778, "learning_rate": 7.209885921954742e-06, "loss": 0.4895264208316803, "step": 7641 }, { "epoch": 1.221369775433549, "grad_norm": 1.6191919828548016, "learning_rate": 7.207348306541915e-06, "loss": 0.4492838382720947, "step": 7642 }, { "epoch": 1.2215296092064254, "grad_norm": 1.5326973951879723, "learning_rate": 7.204810886157964e-06, "loss": 0.5750548839569092, "step": 7643 }, { "epoch": 1.2216894429793015, "grad_norm": 1.534530800924903, "learning_rate": 7.202273660980094e-06, "loss": 0.5373774766921997, "step": 7644 }, { "epoch": 1.2218492767521778, "grad_norm": 1.5853160973025668, "learning_rate": 7.199736631185499e-06, "loss": 0.6473690271377563, "step": 7645 }, { "epoch": 1.222009110525054, "grad_norm": 1.4096911044591414, "learning_rate": 7.197199796951351e-06, "loss": 0.49204516410827637, "step": 7646 }, { "epoch": 1.2221689442979302, "grad_norm": 1.5716352965786158, "learning_rate": 7.19466315845482e-06, "loss": 0.5970599055290222, "step": 7647 }, { "epoch": 1.2223287780708063, "grad_norm": 1.4597461560121485, "learning_rate": 7.1921267158730535e-06, "loss": 0.6046522855758667, "step": 7648 }, { "epoch": 1.2224886118436826, "grad_norm": 1.3637844389826748, "learning_rate": 7.189590469383186e-06, "loss": 0.49763768911361694, "step": 7649 }, { "epoch": 1.2226484456165587, "grad_norm": 1.3733260058330343, "learning_rate": 7.1870544191623425e-06, "loss": 0.5457462072372437, "step": 7650 }, { "epoch": 1.222808279389435, "grad_norm": 1.3760577167764694, "learning_rate": 7.18451856538763e-06, "loss": 0.49071088433265686, "step": 7651 }, { "epoch": 1.2229681131623111, "grad_norm": 1.6762026311400955, "learning_rate": 7.181982908236143e-06, "loss": 0.5525273084640503, "step": 7652 }, { "epoch": 1.2231279469351874, "grad_norm": 1.4913975187018236, "learning_rate": 7.179447447884964e-06, "loss": 0.5453462600708008, "step": 7653 }, { "epoch": 1.2232877807080635, "grad_norm": 1.5182758756608632, "learning_rate": 7.17691218451116e-06, "loss": 0.530279278755188, "step": 7654 }, { "epoch": 1.2234476144809399, "grad_norm": 1.5104668096036744, "learning_rate": 7.174377118291786e-06, "loss": 0.5337703227996826, "step": 7655 }, { "epoch": 1.223607448253816, "grad_norm": 1.413281774145449, "learning_rate": 7.171842249403879e-06, "loss": 0.6103769540786743, "step": 7656 }, { "epoch": 1.2237672820266923, "grad_norm": 1.6726466457089317, "learning_rate": 7.169307578024469e-06, "loss": 0.6174967288970947, "step": 7657 }, { "epoch": 1.2239271157995684, "grad_norm": 1.3793371571876967, "learning_rate": 7.166773104330567e-06, "loss": 0.5331202745437622, "step": 7658 }, { "epoch": 1.2240869495724447, "grad_norm": 1.8917217233231776, "learning_rate": 7.164238828499168e-06, "loss": 0.6689407825469971, "step": 7659 }, { "epoch": 1.2242467833453208, "grad_norm": 1.6012217662306267, "learning_rate": 7.161704750707263e-06, "loss": 0.5875412225723267, "step": 7660 }, { "epoch": 1.224406617118197, "grad_norm": 1.722780728039431, "learning_rate": 7.159170871131819e-06, "loss": 0.6298071146011353, "step": 7661 }, { "epoch": 1.2245664508910732, "grad_norm": 1.6950525674306072, "learning_rate": 7.156637189949795e-06, "loss": 0.604820966720581, "step": 7662 }, { "epoch": 1.2247262846639495, "grad_norm": 1.6619513859432447, "learning_rate": 7.154103707338134e-06, "loss": 0.6907042860984802, "step": 7663 }, { "epoch": 1.2248861184368258, "grad_norm": 1.3715463586417322, "learning_rate": 7.1515704234737636e-06, "loss": 0.45223963260650635, "step": 7664 }, { "epoch": 1.225045952209702, "grad_norm": 1.728553849823156, "learning_rate": 7.149037338533603e-06, "loss": 0.5950882434844971, "step": 7665 }, { "epoch": 1.225205785982578, "grad_norm": 1.6528530833571604, "learning_rate": 7.146504452694547e-06, "loss": 0.6468973159790039, "step": 7666 }, { "epoch": 1.2253656197554543, "grad_norm": 1.7097006721526107, "learning_rate": 7.14397176613349e-06, "loss": 0.5648181438446045, "step": 7667 }, { "epoch": 1.2255254535283306, "grad_norm": 1.5380987512357482, "learning_rate": 7.141439279027305e-06, "loss": 0.4295417368412018, "step": 7668 }, { "epoch": 1.2256852873012067, "grad_norm": 1.440292445107205, "learning_rate": 7.13890699155285e-06, "loss": 0.5074783563613892, "step": 7669 }, { "epoch": 1.2258451210740828, "grad_norm": 1.6053747306561261, "learning_rate": 7.136374903886973e-06, "loss": 0.5931074619293213, "step": 7670 }, { "epoch": 1.2260049548469591, "grad_norm": 1.3563211562407174, "learning_rate": 7.133843016206502e-06, "loss": 0.4429509937763214, "step": 7671 }, { "epoch": 1.2261647886198355, "grad_norm": 1.5001825207852497, "learning_rate": 7.131311328688261e-06, "loss": 0.660563588142395, "step": 7672 }, { "epoch": 1.2263246223927116, "grad_norm": 1.7953874558748997, "learning_rate": 7.12877984150905e-06, "loss": 0.536648154258728, "step": 7673 }, { "epoch": 1.2264844561655879, "grad_norm": 1.5733366137511908, "learning_rate": 7.1262485548456584e-06, "loss": 0.552632212638855, "step": 7674 }, { "epoch": 1.226644289938464, "grad_norm": 1.9044847715228828, "learning_rate": 7.123717468874864e-06, "loss": 0.5620244741439819, "step": 7675 }, { "epoch": 1.2268041237113403, "grad_norm": 1.6327090096422752, "learning_rate": 7.121186583773428e-06, "loss": 0.6145051121711731, "step": 7676 }, { "epoch": 1.2269639574842164, "grad_norm": 1.6478837541533762, "learning_rate": 7.118655899718101e-06, "loss": 0.5491584539413452, "step": 7677 }, { "epoch": 1.2271237912570927, "grad_norm": 1.517090330918323, "learning_rate": 7.116125416885613e-06, "loss": 0.5801423192024231, "step": 7678 }, { "epoch": 1.2272836250299688, "grad_norm": 1.5858914119860206, "learning_rate": 7.113595135452689e-06, "loss": 0.47642624378204346, "step": 7679 }, { "epoch": 1.227443458802845, "grad_norm": 1.653228402789459, "learning_rate": 7.11106505559603e-06, "loss": 0.6683584451675415, "step": 7680 }, { "epoch": 1.2276032925757212, "grad_norm": 1.5227894208641959, "learning_rate": 7.108535177492328e-06, "loss": 0.5745207071304321, "step": 7681 }, { "epoch": 1.2277631263485975, "grad_norm": 1.7280941901090474, "learning_rate": 7.106005501318265e-06, "loss": 0.6556668281555176, "step": 7682 }, { "epoch": 1.2279229601214736, "grad_norm": 2.8602939574488695, "learning_rate": 7.1034760272505e-06, "loss": 0.6134908199310303, "step": 7683 }, { "epoch": 1.22808279389435, "grad_norm": 1.3284371275650557, "learning_rate": 7.1009467554656855e-06, "loss": 0.44880831241607666, "step": 7684 }, { "epoch": 1.228242627667226, "grad_norm": 1.7609431228108228, "learning_rate": 7.098417686140454e-06, "loss": 0.6016713380813599, "step": 7685 }, { "epoch": 1.2284024614401023, "grad_norm": 1.535967547550383, "learning_rate": 7.095888819451428e-06, "loss": 0.5315043926239014, "step": 7686 }, { "epoch": 1.2285622952129784, "grad_norm": 1.8750634420757315, "learning_rate": 7.093360155575218e-06, "loss": 0.528501033782959, "step": 7687 }, { "epoch": 1.2287221289858548, "grad_norm": 1.6673362211840437, "learning_rate": 7.090831694688412e-06, "loss": 0.6854931116104126, "step": 7688 }, { "epoch": 1.2288819627587309, "grad_norm": 1.4190660785715492, "learning_rate": 7.088303436967591e-06, "loss": 0.5010057687759399, "step": 7689 }, { "epoch": 1.2290417965316072, "grad_norm": 1.6463603701264298, "learning_rate": 7.0857753825893205e-06, "loss": 0.567308783531189, "step": 7690 }, { "epoch": 1.2292016303044833, "grad_norm": 1.6109444638018597, "learning_rate": 7.0832475317301474e-06, "loss": 0.6384091377258301, "step": 7691 }, { "epoch": 1.2293614640773596, "grad_norm": 1.481807105227474, "learning_rate": 7.080719884566613e-06, "loss": 0.585903525352478, "step": 7692 }, { "epoch": 1.2295212978502357, "grad_norm": 1.4963534899226751, "learning_rate": 7.078192441275236e-06, "loss": 0.535627007484436, "step": 7693 }, { "epoch": 1.229681131623112, "grad_norm": 1.5937057106812995, "learning_rate": 7.07566520203252e-06, "loss": 0.6168299913406372, "step": 7694 }, { "epoch": 1.229840965395988, "grad_norm": 1.4687506784737445, "learning_rate": 7.073138167014968e-06, "loss": 0.6595383882522583, "step": 7695 }, { "epoch": 1.2300007991688644, "grad_norm": 1.347617038995425, "learning_rate": 7.070611336399049e-06, "loss": 0.4657202959060669, "step": 7696 }, { "epoch": 1.2301606329417405, "grad_norm": 1.5926238461479205, "learning_rate": 7.068084710361236e-06, "loss": 0.48908358812332153, "step": 7697 }, { "epoch": 1.2303204667146168, "grad_norm": 1.6546478610427025, "learning_rate": 7.065558289077974e-06, "loss": 0.6449419260025024, "step": 7698 }, { "epoch": 1.2304803004874931, "grad_norm": 1.5771788725137303, "learning_rate": 7.063032072725704e-06, "loss": 0.5989426374435425, "step": 7699 }, { "epoch": 1.2306401342603692, "grad_norm": 1.4660603880364949, "learning_rate": 7.0605060614808465e-06, "loss": 0.627829909324646, "step": 7700 }, { "epoch": 1.2307999680332453, "grad_norm": 1.49344057605861, "learning_rate": 7.0579802555198056e-06, "loss": 0.5335097312927246, "step": 7701 }, { "epoch": 1.2309598018061216, "grad_norm": 1.6860180747079823, "learning_rate": 7.055454655018979e-06, "loss": 0.5317088961601257, "step": 7702 }, { "epoch": 1.231119635578998, "grad_norm": 1.5073116652045426, "learning_rate": 7.052929260154742e-06, "loss": 0.4625016450881958, "step": 7703 }, { "epoch": 1.231279469351874, "grad_norm": 1.4750925832919646, "learning_rate": 7.050404071103461e-06, "loss": 0.49508193135261536, "step": 7704 }, { "epoch": 1.2314393031247501, "grad_norm": 1.5882252510096604, "learning_rate": 7.047879088041487e-06, "loss": 0.5348101258277893, "step": 7705 }, { "epoch": 1.2315991368976265, "grad_norm": 1.5838479703638235, "learning_rate": 7.045354311145151e-06, "loss": 0.6254622936248779, "step": 7706 }, { "epoch": 1.2317589706705028, "grad_norm": 1.6175924999008784, "learning_rate": 7.0428297405907865e-06, "loss": 0.7055380344390869, "step": 7707 }, { "epoch": 1.2319188044433789, "grad_norm": 1.5034332190944906, "learning_rate": 7.04030537655469e-06, "loss": 0.4893184304237366, "step": 7708 }, { "epoch": 1.2320786382162552, "grad_norm": 1.4938227136332434, "learning_rate": 7.037781219213155e-06, "loss": 0.5791112184524536, "step": 7709 }, { "epoch": 1.2322384719891313, "grad_norm": 1.4385380494395392, "learning_rate": 7.035257268742464e-06, "loss": 0.5601178407669067, "step": 7710 }, { "epoch": 1.2323983057620076, "grad_norm": 1.6569194202540443, "learning_rate": 7.032733525318876e-06, "loss": 0.6800211668014526, "step": 7711 }, { "epoch": 1.2325581395348837, "grad_norm": 1.4778104223694941, "learning_rate": 7.030209989118644e-06, "loss": 0.5371111631393433, "step": 7712 }, { "epoch": 1.23271797330776, "grad_norm": 1.71398971598231, "learning_rate": 7.027686660318001e-06, "loss": 0.5330827236175537, "step": 7713 }, { "epoch": 1.232877807080636, "grad_norm": 1.6232710866976428, "learning_rate": 7.025163539093167e-06, "loss": 0.6001682281494141, "step": 7714 }, { "epoch": 1.2330376408535124, "grad_norm": 1.3602918312719539, "learning_rate": 7.0226406256203515e-06, "loss": 0.5529849529266357, "step": 7715 }, { "epoch": 1.2331974746263885, "grad_norm": 1.6751804758490487, "learning_rate": 7.02011792007574e-06, "loss": 0.5774843096733093, "step": 7716 }, { "epoch": 1.2333573083992648, "grad_norm": 1.6713813203870593, "learning_rate": 7.017595422635513e-06, "loss": 0.5293564796447754, "step": 7717 }, { "epoch": 1.233517142172141, "grad_norm": 1.4111056865040517, "learning_rate": 7.015073133475829e-06, "loss": 0.5205264687538147, "step": 7718 }, { "epoch": 1.2336769759450172, "grad_norm": 1.5370855925991933, "learning_rate": 7.0125510527728425e-06, "loss": 0.45380401611328125, "step": 7719 }, { "epoch": 1.2338368097178933, "grad_norm": 1.6535539346346138, "learning_rate": 7.010029180702681e-06, "loss": 0.5349187850952148, "step": 7720 }, { "epoch": 1.2339966434907697, "grad_norm": 1.559867238575187, "learning_rate": 7.007507517441465e-06, "loss": 0.5535628199577332, "step": 7721 }, { "epoch": 1.2341564772636457, "grad_norm": 1.3797627082730703, "learning_rate": 7.0049860631653e-06, "loss": 0.5303788185119629, "step": 7722 }, { "epoch": 1.234316311036522, "grad_norm": 1.7668660864740295, "learning_rate": 7.002464818050272e-06, "loss": 0.6367151737213135, "step": 7723 }, { "epoch": 1.2344761448093982, "grad_norm": 1.4572929656268172, "learning_rate": 6.9999437822724595e-06, "loss": 0.5308901071548462, "step": 7724 }, { "epoch": 1.2346359785822745, "grad_norm": 1.7109063686016812, "learning_rate": 6.997422956007918e-06, "loss": 0.5318984985351562, "step": 7725 }, { "epoch": 1.2347958123551506, "grad_norm": 1.6253813500457832, "learning_rate": 6.994902339432693e-06, "loss": 0.5544580221176147, "step": 7726 }, { "epoch": 1.2349556461280269, "grad_norm": 1.5963060984543012, "learning_rate": 6.9923819327228235e-06, "loss": 0.5621427893638611, "step": 7727 }, { "epoch": 1.235115479900903, "grad_norm": 1.4364471967642067, "learning_rate": 6.989861736054318e-06, "loss": 0.4593636095523834, "step": 7728 }, { "epoch": 1.2352753136737793, "grad_norm": 1.6676014672141153, "learning_rate": 6.987341749603183e-06, "loss": 0.6261745691299438, "step": 7729 }, { "epoch": 1.2354351474466554, "grad_norm": 1.4980578701214504, "learning_rate": 6.984821973545404e-06, "loss": 0.48003149032592773, "step": 7730 }, { "epoch": 1.2355949812195317, "grad_norm": 1.4876128321846616, "learning_rate": 6.982302408056948e-06, "loss": 0.5047560334205627, "step": 7731 }, { "epoch": 1.2357548149924078, "grad_norm": 1.4778500246486288, "learning_rate": 6.97978305331378e-06, "loss": 0.5024114847183228, "step": 7732 }, { "epoch": 1.2359146487652841, "grad_norm": 1.6028247282686492, "learning_rate": 6.977263909491837e-06, "loss": 0.5369435548782349, "step": 7733 }, { "epoch": 1.2360744825381604, "grad_norm": 1.4915503001980817, "learning_rate": 6.974744976767052e-06, "loss": 0.5643706321716309, "step": 7734 }, { "epoch": 1.2362343163110365, "grad_norm": 1.6738258869700255, "learning_rate": 6.972226255315333e-06, "loss": 0.583870530128479, "step": 7735 }, { "epoch": 1.2363941500839126, "grad_norm": 1.6842504954457773, "learning_rate": 6.969707745312584e-06, "loss": 0.5848442316055298, "step": 7736 }, { "epoch": 1.236553983856789, "grad_norm": 1.2689140368387521, "learning_rate": 6.9671894469346865e-06, "loss": 0.5135802030563354, "step": 7737 }, { "epoch": 1.2367138176296653, "grad_norm": 1.489708307520437, "learning_rate": 6.964671360357507e-06, "loss": 0.47957801818847656, "step": 7738 }, { "epoch": 1.2368736514025414, "grad_norm": 1.6921446522036292, "learning_rate": 6.962153485756904e-06, "loss": 0.5681663751602173, "step": 7739 }, { "epoch": 1.2370334851754174, "grad_norm": 1.4795084374292484, "learning_rate": 6.9596358233087125e-06, "loss": 0.5450879335403442, "step": 7740 }, { "epoch": 1.2371933189482938, "grad_norm": 1.728134691515005, "learning_rate": 6.9571183731887625e-06, "loss": 0.5415456891059875, "step": 7741 }, { "epoch": 1.23735315272117, "grad_norm": 1.8403274515585908, "learning_rate": 6.954601135572858e-06, "loss": 0.5416322946548462, "step": 7742 }, { "epoch": 1.2375129864940462, "grad_norm": 1.609712868648575, "learning_rate": 6.952084110636799e-06, "loss": 0.5400592684745789, "step": 7743 }, { "epoch": 1.2376728202669225, "grad_norm": 1.4394266047758493, "learning_rate": 6.9495672985563635e-06, "loss": 0.6020739078521729, "step": 7744 }, { "epoch": 1.2378326540397986, "grad_norm": 1.402530889129717, "learning_rate": 6.947050699507314e-06, "loss": 0.48622989654541016, "step": 7745 }, { "epoch": 1.237992487812675, "grad_norm": 1.5222233533647638, "learning_rate": 6.944534313665401e-06, "loss": 0.5696423053741455, "step": 7746 }, { "epoch": 1.238152321585551, "grad_norm": 1.604624543871764, "learning_rate": 6.942018141206368e-06, "loss": 0.5830937623977661, "step": 7747 }, { "epoch": 1.2383121553584273, "grad_norm": 1.498773226635939, "learning_rate": 6.939502182305926e-06, "loss": 0.5209978818893433, "step": 7748 }, { "epoch": 1.2384719891313034, "grad_norm": 1.6987164257046887, "learning_rate": 6.936986437139788e-06, "loss": 0.4836476445198059, "step": 7749 }, { "epoch": 1.2386318229041797, "grad_norm": 1.243048912428681, "learning_rate": 6.9344709058836376e-06, "loss": 0.4012201428413391, "step": 7750 }, { "epoch": 1.2387916566770558, "grad_norm": 1.3214847529074816, "learning_rate": 6.931955588713157e-06, "loss": 0.47302210330963135, "step": 7751 }, { "epoch": 1.2389514904499321, "grad_norm": 1.3434692115524633, "learning_rate": 6.929440485804004e-06, "loss": 0.5853114128112793, "step": 7752 }, { "epoch": 1.2391113242228082, "grad_norm": 1.6358547195731614, "learning_rate": 6.926925597331824e-06, "loss": 0.5496172904968262, "step": 7753 }, { "epoch": 1.2392711579956845, "grad_norm": 1.70605154909689, "learning_rate": 6.924410923472249e-06, "loss": 0.7425107955932617, "step": 7754 }, { "epoch": 1.2394309917685606, "grad_norm": 1.4993035165975879, "learning_rate": 6.921896464400893e-06, "loss": 0.6536481380462646, "step": 7755 }, { "epoch": 1.239590825541437, "grad_norm": 1.3895325092499857, "learning_rate": 6.91938222029336e-06, "loss": 0.5428048968315125, "step": 7756 }, { "epoch": 1.239750659314313, "grad_norm": 1.3570494344967565, "learning_rate": 6.9168681913252345e-06, "loss": 0.45846158266067505, "step": 7757 }, { "epoch": 1.2399104930871894, "grad_norm": 1.4474649034600826, "learning_rate": 6.914354377672085e-06, "loss": 0.6259112358093262, "step": 7758 }, { "epoch": 1.2400703268600655, "grad_norm": 1.3239041469551145, "learning_rate": 6.911840779509471e-06, "loss": 0.44147568941116333, "step": 7759 }, { "epoch": 1.2402301606329418, "grad_norm": 1.4459478315446812, "learning_rate": 6.90932739701293e-06, "loss": 0.42925330996513367, "step": 7760 }, { "epoch": 1.2403899944058179, "grad_norm": 1.3239031110955815, "learning_rate": 6.906814230357992e-06, "loss": 0.5542987585067749, "step": 7761 }, { "epoch": 1.2405498281786942, "grad_norm": 1.553815162067413, "learning_rate": 6.904301279720163e-06, "loss": 0.4819144308567047, "step": 7762 }, { "epoch": 1.2407096619515703, "grad_norm": 1.8292777311303934, "learning_rate": 6.901788545274941e-06, "loss": 0.5908544063568115, "step": 7763 }, { "epoch": 1.2408694957244466, "grad_norm": 1.3719058911259745, "learning_rate": 6.899276027197807e-06, "loss": 0.5014402866363525, "step": 7764 }, { "epoch": 1.2410293294973227, "grad_norm": 1.314085489239051, "learning_rate": 6.896763725664224e-06, "loss": 0.5062321424484253, "step": 7765 }, { "epoch": 1.241189163270199, "grad_norm": 1.574124143543284, "learning_rate": 6.894251640849641e-06, "loss": 0.5300987958908081, "step": 7766 }, { "epoch": 1.241348997043075, "grad_norm": 1.835681037840587, "learning_rate": 6.891739772929499e-06, "loss": 0.5203226208686829, "step": 7767 }, { "epoch": 1.2415088308159514, "grad_norm": 1.4984973798668482, "learning_rate": 6.889228122079212e-06, "loss": 0.6089410781860352, "step": 7768 }, { "epoch": 1.2416686645888277, "grad_norm": 1.54360039056269, "learning_rate": 6.886716688474191e-06, "loss": 0.5571167469024658, "step": 7769 }, { "epoch": 1.2418284983617038, "grad_norm": 1.5800447287194843, "learning_rate": 6.8842054722898196e-06, "loss": 0.5796821713447571, "step": 7770 }, { "epoch": 1.24198833213458, "grad_norm": 1.494899336295816, "learning_rate": 6.8816944737014766e-06, "loss": 0.5662140846252441, "step": 7771 }, { "epoch": 1.2421481659074562, "grad_norm": 1.5166974996124216, "learning_rate": 6.879183692884519e-06, "loss": 0.6625704765319824, "step": 7772 }, { "epoch": 1.2423079996803326, "grad_norm": 1.5014425666880893, "learning_rate": 6.876673130014289e-06, "loss": 0.5259057283401489, "step": 7773 }, { "epoch": 1.2424678334532087, "grad_norm": 1.7927040411391717, "learning_rate": 6.8741627852661185e-06, "loss": 0.5960171222686768, "step": 7774 }, { "epoch": 1.2426276672260848, "grad_norm": 2.2030507707734115, "learning_rate": 6.87165265881532e-06, "loss": 0.479902982711792, "step": 7775 }, { "epoch": 1.242787500998961, "grad_norm": 1.3631233038795596, "learning_rate": 6.869142750837192e-06, "loss": 0.5460839867591858, "step": 7776 }, { "epoch": 1.2429473347718374, "grad_norm": 1.8356551687564011, "learning_rate": 6.866633061507015e-06, "loss": 0.5850244760513306, "step": 7777 }, { "epoch": 1.2431071685447135, "grad_norm": 1.4446571345020713, "learning_rate": 6.8641235910000624e-06, "loss": 0.5448895692825317, "step": 7778 }, { "epoch": 1.2432670023175898, "grad_norm": 1.9041668223619919, "learning_rate": 6.861614339491585e-06, "loss": 0.603630542755127, "step": 7779 }, { "epoch": 1.243426836090466, "grad_norm": 1.5712550432186418, "learning_rate": 6.859105307156814e-06, "loss": 0.46196821331977844, "step": 7780 }, { "epoch": 1.2435866698633422, "grad_norm": 1.4350618004028517, "learning_rate": 6.856596494170979e-06, "loss": 0.47465479373931885, "step": 7781 }, { "epoch": 1.2437465036362183, "grad_norm": 1.6080383564527194, "learning_rate": 6.854087900709281e-06, "loss": 0.6959006190299988, "step": 7782 }, { "epoch": 1.2439063374090946, "grad_norm": 1.4356695444191854, "learning_rate": 6.8515795269469145e-06, "loss": 0.522574782371521, "step": 7783 }, { "epoch": 1.2440661711819707, "grad_norm": 1.4540430086115117, "learning_rate": 6.849071373059053e-06, "loss": 0.48854365944862366, "step": 7784 }, { "epoch": 1.244226004954847, "grad_norm": 1.6535281216571234, "learning_rate": 6.846563439220861e-06, "loss": 0.43027418851852417, "step": 7785 }, { "epoch": 1.2443858387277231, "grad_norm": 1.4798443599738198, "learning_rate": 6.844055725607479e-06, "loss": 0.5577890872955322, "step": 7786 }, { "epoch": 1.2445456725005994, "grad_norm": 1.486301448856266, "learning_rate": 6.841548232394041e-06, "loss": 0.4787280559539795, "step": 7787 }, { "epoch": 1.2447055062734755, "grad_norm": 1.3594415560030442, "learning_rate": 6.8390409597556585e-06, "loss": 0.39126190543174744, "step": 7788 }, { "epoch": 1.2448653400463519, "grad_norm": 1.4232479622352483, "learning_rate": 6.836533907867433e-06, "loss": 0.4766320586204529, "step": 7789 }, { "epoch": 1.245025173819228, "grad_norm": 1.727281791833807, "learning_rate": 6.8340270769044445e-06, "loss": 0.542421817779541, "step": 7790 }, { "epoch": 1.2451850075921043, "grad_norm": 1.535141508436876, "learning_rate": 6.831520467041766e-06, "loss": 0.6810787916183472, "step": 7791 }, { "epoch": 1.2453448413649804, "grad_norm": 1.901685143924343, "learning_rate": 6.829014078454446e-06, "loss": 0.6847857236862183, "step": 7792 }, { "epoch": 1.2455046751378567, "grad_norm": 1.3406603427896964, "learning_rate": 6.826507911317524e-06, "loss": 0.4582267105579376, "step": 7793 }, { "epoch": 1.2456645089107328, "grad_norm": 1.3760228910198624, "learning_rate": 6.824001965806022e-06, "loss": 0.4809477925300598, "step": 7794 }, { "epoch": 1.245824342683609, "grad_norm": 1.5556070966843312, "learning_rate": 6.8214962420949435e-06, "loss": 0.475738525390625, "step": 7795 }, { "epoch": 1.2459841764564852, "grad_norm": 1.5542175219826369, "learning_rate": 6.8189907403592836e-06, "loss": 0.5873737335205078, "step": 7796 }, { "epoch": 1.2461440102293615, "grad_norm": 2.958035480610365, "learning_rate": 6.816485460774013e-06, "loss": 0.5393846035003662, "step": 7797 }, { "epoch": 1.2463038440022376, "grad_norm": 1.3556038893690328, "learning_rate": 6.813980403514095e-06, "loss": 0.4551510810852051, "step": 7798 }, { "epoch": 1.246463677775114, "grad_norm": 1.490787952753402, "learning_rate": 6.811475568754473e-06, "loss": 0.5547915697097778, "step": 7799 }, { "epoch": 1.24662351154799, "grad_norm": 1.4939113588145287, "learning_rate": 6.8089709566700755e-06, "loss": 0.5441245436668396, "step": 7800 }, { "epoch": 1.2467833453208663, "grad_norm": 1.1942176338697896, "learning_rate": 6.806466567435817e-06, "loss": 0.4703832268714905, "step": 7801 }, { "epoch": 1.2469431790937424, "grad_norm": 1.3800078126870259, "learning_rate": 6.803962401226591e-06, "loss": 0.5136173963546753, "step": 7802 }, { "epoch": 1.2471030128666187, "grad_norm": 1.4750255776564796, "learning_rate": 6.801458458217286e-06, "loss": 0.5801041126251221, "step": 7803 }, { "epoch": 1.247262846639495, "grad_norm": 1.7797657775012203, "learning_rate": 6.7989547385827624e-06, "loss": 0.45378464460372925, "step": 7804 }, { "epoch": 1.2474226804123711, "grad_norm": 1.6408923960149964, "learning_rate": 6.796451242497876e-06, "loss": 0.6382018327713013, "step": 7805 }, { "epoch": 1.2475825141852472, "grad_norm": 1.8304424661478813, "learning_rate": 6.793947970137455e-06, "loss": 0.6241470575332642, "step": 7806 }, { "epoch": 1.2477423479581236, "grad_norm": 1.3030409009212995, "learning_rate": 6.791444921676327e-06, "loss": 0.642693281173706, "step": 7807 }, { "epoch": 1.2479021817309999, "grad_norm": 1.6652901406036646, "learning_rate": 6.788942097289293e-06, "loss": 0.5768294334411621, "step": 7808 }, { "epoch": 1.248062015503876, "grad_norm": 1.56105051113439, "learning_rate": 6.786439497151142e-06, "loss": 0.5903553366661072, "step": 7809 }, { "epoch": 1.248221849276752, "grad_norm": 1.6486610717766734, "learning_rate": 6.783937121436645e-06, "loss": 0.5866459608078003, "step": 7810 }, { "epoch": 1.2483816830496284, "grad_norm": 1.4694077909822896, "learning_rate": 6.781434970320562e-06, "loss": 0.5129170417785645, "step": 7811 }, { "epoch": 1.2485415168225047, "grad_norm": 1.4489476281383613, "learning_rate": 6.7789330439776295e-06, "loss": 0.5382174849510193, "step": 7812 }, { "epoch": 1.2487013505953808, "grad_norm": 1.4015649049157324, "learning_rate": 6.776431342582577e-06, "loss": 0.5159107446670532, "step": 7813 }, { "epoch": 1.248861184368257, "grad_norm": 1.4915553390887402, "learning_rate": 6.7739298663101125e-06, "loss": 0.5732749104499817, "step": 7814 }, { "epoch": 1.2490210181411332, "grad_norm": 1.4795872973843889, "learning_rate": 6.771428615334934e-06, "loss": 0.4897368848323822, "step": 7815 }, { "epoch": 1.2491808519140095, "grad_norm": 2.052252944839289, "learning_rate": 6.768927589831715e-06, "loss": 0.6155787110328674, "step": 7816 }, { "epoch": 1.2493406856868856, "grad_norm": 1.8692890464658423, "learning_rate": 6.76642678997512e-06, "loss": 0.7142373323440552, "step": 7817 }, { "epoch": 1.249500519459762, "grad_norm": 1.6846680890610148, "learning_rate": 6.763926215939798e-06, "loss": 0.6400509476661682, "step": 7818 }, { "epoch": 1.249660353232638, "grad_norm": 1.6405740744108972, "learning_rate": 6.7614258679003775e-06, "loss": 0.6411906480789185, "step": 7819 }, { "epoch": 1.2498201870055143, "grad_norm": 1.8002963438200315, "learning_rate": 6.758925746031475e-06, "loss": 0.5111743807792664, "step": 7820 }, { "epoch": 1.2499800207783904, "grad_norm": 1.6179418566048667, "learning_rate": 6.756425850507691e-06, "loss": 0.6918296813964844, "step": 7821 }, { "epoch": 1.2501398545512667, "grad_norm": 1.624568575550165, "learning_rate": 6.7539261815036075e-06, "loss": 0.5652267336845398, "step": 7822 }, { "epoch": 1.2502996883241428, "grad_norm": 1.4643577536284333, "learning_rate": 6.7514267391937935e-06, "loss": 0.5689760446548462, "step": 7823 }, { "epoch": 1.2504595220970192, "grad_norm": 1.6283230892549316, "learning_rate": 6.7489275237528e-06, "loss": 0.49226436018943787, "step": 7824 }, { "epoch": 1.2506193558698953, "grad_norm": 1.362214245078059, "learning_rate": 6.746428535355167e-06, "loss": 0.5463060140609741, "step": 7825 }, { "epoch": 1.2507791896427716, "grad_norm": 1.6999073127976465, "learning_rate": 6.743929774175411e-06, "loss": 0.5710226893424988, "step": 7826 }, { "epoch": 1.2509390234156477, "grad_norm": 1.7078298687178108, "learning_rate": 6.7414312403880345e-06, "loss": 0.5580348372459412, "step": 7827 }, { "epoch": 1.251098857188524, "grad_norm": 1.3942283357642713, "learning_rate": 6.738932934167535e-06, "loss": 0.5203668475151062, "step": 7828 }, { "epoch": 1.2512586909614, "grad_norm": 1.4962765506642048, "learning_rate": 6.736434855688379e-06, "loss": 0.5847538709640503, "step": 7829 }, { "epoch": 1.2514185247342764, "grad_norm": 1.3669736057241473, "learning_rate": 6.733937005125023e-06, "loss": 0.43871480226516724, "step": 7830 }, { "epoch": 1.2515783585071525, "grad_norm": 1.5009500530171571, "learning_rate": 6.731439382651912e-06, "loss": 0.5844247937202454, "step": 7831 }, { "epoch": 1.2517381922800288, "grad_norm": 1.7045175726254596, "learning_rate": 6.728941988443466e-06, "loss": 0.6246105432510376, "step": 7832 }, { "epoch": 1.251898026052905, "grad_norm": 1.244469053080032, "learning_rate": 6.726444822674098e-06, "loss": 0.4194139242172241, "step": 7833 }, { "epoch": 1.2520578598257812, "grad_norm": 1.3465139869453773, "learning_rate": 6.7239478855182004e-06, "loss": 0.5279592275619507, "step": 7834 }, { "epoch": 1.2522176935986575, "grad_norm": 1.6484579086119315, "learning_rate": 6.721451177150151e-06, "loss": 0.5986981391906738, "step": 7835 }, { "epoch": 1.2523775273715336, "grad_norm": 1.6403878219130428, "learning_rate": 6.718954697744309e-06, "loss": 0.5981085896492004, "step": 7836 }, { "epoch": 1.2525373611444097, "grad_norm": 1.7971527667149199, "learning_rate": 6.716458447475021e-06, "loss": 0.6156624555587769, "step": 7837 }, { "epoch": 1.252697194917286, "grad_norm": 1.564498798541859, "learning_rate": 6.713962426516616e-06, "loss": 0.4748890697956085, "step": 7838 }, { "epoch": 1.2528570286901624, "grad_norm": 1.4732505688336381, "learning_rate": 6.711466635043405e-06, "loss": 0.6444867849349976, "step": 7839 }, { "epoch": 1.2530168624630384, "grad_norm": 1.3423517991487222, "learning_rate": 6.70897107322969e-06, "loss": 0.4439120590686798, "step": 7840 }, { "epoch": 1.2531766962359145, "grad_norm": 1.515266627099159, "learning_rate": 6.706475741249747e-06, "loss": 0.5683071613311768, "step": 7841 }, { "epoch": 1.2533365300087909, "grad_norm": 1.8162072937403317, "learning_rate": 6.703980639277847e-06, "loss": 0.5434002876281738, "step": 7842 }, { "epoch": 1.2534963637816672, "grad_norm": 1.3441009001305078, "learning_rate": 6.701485767488235e-06, "loss": 0.5676743984222412, "step": 7843 }, { "epoch": 1.2536561975545433, "grad_norm": 1.600255249497976, "learning_rate": 6.6989911260551435e-06, "loss": 0.6425946950912476, "step": 7844 }, { "epoch": 1.2538160313274194, "grad_norm": 1.6285418400728393, "learning_rate": 6.696496715152791e-06, "loss": 0.43638065457344055, "step": 7845 }, { "epoch": 1.2539758651002957, "grad_norm": 1.6466802726397554, "learning_rate": 6.694002534955378e-06, "loss": 0.7032850980758667, "step": 7846 }, { "epoch": 1.254135698873172, "grad_norm": 1.618446477506744, "learning_rate": 6.691508585637085e-06, "loss": 0.5553780794143677, "step": 7847 }, { "epoch": 1.254295532646048, "grad_norm": 1.283665220379969, "learning_rate": 6.68901486737209e-06, "loss": 0.5662215948104858, "step": 7848 }, { "epoch": 1.2544553664189242, "grad_norm": 1.4312965113491487, "learning_rate": 6.6865213803345385e-06, "loss": 0.6742970943450928, "step": 7849 }, { "epoch": 1.2546152001918005, "grad_norm": 1.5073185377537555, "learning_rate": 6.684028124698569e-06, "loss": 0.5182023048400879, "step": 7850 }, { "epoch": 1.2547750339646768, "grad_norm": 1.741284383385937, "learning_rate": 6.681535100638303e-06, "loss": 0.5159282088279724, "step": 7851 }, { "epoch": 1.254934867737553, "grad_norm": 1.4957399895788364, "learning_rate": 6.67904230832784e-06, "loss": 0.6555846929550171, "step": 7852 }, { "epoch": 1.2550947015104292, "grad_norm": 1.414208175956026, "learning_rate": 6.676549747941272e-06, "loss": 0.5499376654624939, "step": 7853 }, { "epoch": 1.2552545352833053, "grad_norm": 1.4998006867826286, "learning_rate": 6.674057419652666e-06, "loss": 0.5107200741767883, "step": 7854 }, { "epoch": 1.2554143690561816, "grad_norm": 1.5124496097725166, "learning_rate": 6.671565323636083e-06, "loss": 0.5878357887268066, "step": 7855 }, { "epoch": 1.2555742028290577, "grad_norm": 1.4165728586026711, "learning_rate": 6.669073460065558e-06, "loss": 0.48377323150634766, "step": 7856 }, { "epoch": 1.255734036601934, "grad_norm": 1.4175272551648486, "learning_rate": 6.666581829115116e-06, "loss": 0.436811625957489, "step": 7857 }, { "epoch": 1.2558938703748102, "grad_norm": 1.2726298082307457, "learning_rate": 6.664090430958763e-06, "loss": 0.514069139957428, "step": 7858 }, { "epoch": 1.2560537041476865, "grad_norm": 1.6297601822263863, "learning_rate": 6.661599265770488e-06, "loss": 0.5357897877693176, "step": 7859 }, { "epoch": 1.2562135379205626, "grad_norm": 1.450734892793032, "learning_rate": 6.659108333724269e-06, "loss": 0.6040557622909546, "step": 7860 }, { "epoch": 1.2563733716934389, "grad_norm": 1.400379392435984, "learning_rate": 6.656617634994058e-06, "loss": 0.5838828086853027, "step": 7861 }, { "epoch": 1.256533205466315, "grad_norm": 1.601157549729022, "learning_rate": 6.654127169753801e-06, "loss": 0.5538219213485718, "step": 7862 }, { "epoch": 1.2566930392391913, "grad_norm": 1.4598584363726148, "learning_rate": 6.651636938177421e-06, "loss": 0.5031029582023621, "step": 7863 }, { "epoch": 1.2568528730120674, "grad_norm": 1.2380767107616646, "learning_rate": 6.649146940438831e-06, "loss": 0.5740798711776733, "step": 7864 }, { "epoch": 1.2570127067849437, "grad_norm": 1.4943341109828672, "learning_rate": 6.64665717671192e-06, "loss": 0.6325730085372925, "step": 7865 }, { "epoch": 1.2571725405578198, "grad_norm": 1.518850287259997, "learning_rate": 6.644167647170563e-06, "loss": 0.5958225727081299, "step": 7866 }, { "epoch": 1.2573323743306961, "grad_norm": 1.4132041799398631, "learning_rate": 6.641678351988619e-06, "loss": 0.5833989381790161, "step": 7867 }, { "epoch": 1.2574922081035722, "grad_norm": 1.4144166366566289, "learning_rate": 6.639189291339939e-06, "loss": 0.5619264245033264, "step": 7868 }, { "epoch": 1.2576520418764485, "grad_norm": 1.6086438181510896, "learning_rate": 6.6367004653983425e-06, "loss": 0.6248238682746887, "step": 7869 }, { "epoch": 1.2578118756493248, "grad_norm": 1.7750680027125298, "learning_rate": 6.634211874337646e-06, "loss": 0.5680729746818542, "step": 7870 }, { "epoch": 1.257971709422201, "grad_norm": 1.6536984175228664, "learning_rate": 6.63172351833164e-06, "loss": 0.6480715274810791, "step": 7871 }, { "epoch": 1.258131543195077, "grad_norm": 1.388768615826149, "learning_rate": 6.629235397554105e-06, "loss": 0.5651225447654724, "step": 7872 }, { "epoch": 1.2582913769679533, "grad_norm": 1.4413763872969316, "learning_rate": 6.6267475121788e-06, "loss": 0.5021264553070068, "step": 7873 }, { "epoch": 1.2584512107408297, "grad_norm": 1.4144483689582685, "learning_rate": 6.62425986237947e-06, "loss": 0.47641491889953613, "step": 7874 }, { "epoch": 1.2586110445137058, "grad_norm": 1.8009144685499532, "learning_rate": 6.621772448329846e-06, "loss": 0.4516116678714752, "step": 7875 }, { "epoch": 1.2587708782865819, "grad_norm": 1.446534128077429, "learning_rate": 6.619285270203638e-06, "loss": 0.5897932052612305, "step": 7876 }, { "epoch": 1.2589307120594582, "grad_norm": 1.9652626129268618, "learning_rate": 6.616798328174545e-06, "loss": 0.5949275493621826, "step": 7877 }, { "epoch": 1.2590905458323345, "grad_norm": 1.6265609897501643, "learning_rate": 6.614311622416241e-06, "loss": 0.5139784812927246, "step": 7878 }, { "epoch": 1.2592503796052106, "grad_norm": 1.4135050256944344, "learning_rate": 6.611825153102394e-06, "loss": 0.5474699139595032, "step": 7879 }, { "epoch": 1.2594102133780867, "grad_norm": 1.5546373492742471, "learning_rate": 6.609338920406647e-06, "loss": 0.593502402305603, "step": 7880 }, { "epoch": 1.259570047150963, "grad_norm": 1.4408104850556394, "learning_rate": 6.606852924502628e-06, "loss": 0.3973785638809204, "step": 7881 }, { "epoch": 1.2597298809238393, "grad_norm": 1.7024598101670014, "learning_rate": 6.604367165563955e-06, "loss": 0.5674788951873779, "step": 7882 }, { "epoch": 1.2598897146967154, "grad_norm": 1.7442430485102578, "learning_rate": 6.601881643764221e-06, "loss": 0.6525126099586487, "step": 7883 }, { "epoch": 1.2600495484695915, "grad_norm": 1.6636184719036384, "learning_rate": 6.5993963592770085e-06, "loss": 0.6101952791213989, "step": 7884 }, { "epoch": 1.2602093822424678, "grad_norm": 1.5161463224212701, "learning_rate": 6.596911312275877e-06, "loss": 0.5895152688026428, "step": 7885 }, { "epoch": 1.2603692160153441, "grad_norm": 1.4766617689481316, "learning_rate": 6.594426502934378e-06, "loss": 0.5499473810195923, "step": 7886 }, { "epoch": 1.2605290497882202, "grad_norm": 1.3342969886976461, "learning_rate": 6.591941931426036e-06, "loss": 0.6239385604858398, "step": 7887 }, { "epoch": 1.2606888835610965, "grad_norm": 1.7230538518532397, "learning_rate": 6.5894575979243726e-06, "loss": 0.597612738609314, "step": 7888 }, { "epoch": 1.2608487173339726, "grad_norm": 1.8432780103035968, "learning_rate": 6.586973502602878e-06, "loss": 0.5530426502227783, "step": 7889 }, { "epoch": 1.261008551106849, "grad_norm": 1.442769038126428, "learning_rate": 6.584489645635037e-06, "loss": 0.5327799320220947, "step": 7890 }, { "epoch": 1.261168384879725, "grad_norm": 1.4577427726907488, "learning_rate": 6.58200602719431e-06, "loss": 0.6485732793807983, "step": 7891 }, { "epoch": 1.2613282186526014, "grad_norm": 1.3325152350306009, "learning_rate": 6.579522647454148e-06, "loss": 0.5563591122627258, "step": 7892 }, { "epoch": 1.2614880524254775, "grad_norm": 1.7400830014860962, "learning_rate": 6.577039506587979e-06, "loss": 0.46388840675354004, "step": 7893 }, { "epoch": 1.2616478861983538, "grad_norm": 1.4663092589000328, "learning_rate": 6.574556604769213e-06, "loss": 0.5237041711807251, "step": 7894 }, { "epoch": 1.2618077199712299, "grad_norm": 1.6525413521744312, "learning_rate": 6.572073942171255e-06, "loss": 0.5632222890853882, "step": 7895 }, { "epoch": 1.2619675537441062, "grad_norm": 1.614469346162356, "learning_rate": 6.569591518967477e-06, "loss": 0.5089647173881531, "step": 7896 }, { "epoch": 1.2621273875169823, "grad_norm": 1.6299569515425092, "learning_rate": 6.56710933533125e-06, "loss": 0.5415385961532593, "step": 7897 }, { "epoch": 1.2622872212898586, "grad_norm": 1.3253672342984257, "learning_rate": 6.564627391435916e-06, "loss": 0.36431413888931274, "step": 7898 }, { "epoch": 1.2624470550627347, "grad_norm": 1.495503227801267, "learning_rate": 6.562145687454808e-06, "loss": 0.5420665740966797, "step": 7899 }, { "epoch": 1.262606888835611, "grad_norm": 1.6772927603666106, "learning_rate": 6.559664223561239e-06, "loss": 0.5825533866882324, "step": 7900 }, { "epoch": 1.262766722608487, "grad_norm": 1.3749910362181221, "learning_rate": 6.557182999928502e-06, "loss": 0.459908127784729, "step": 7901 }, { "epoch": 1.2629265563813634, "grad_norm": 1.5740493432124134, "learning_rate": 6.554702016729882e-06, "loss": 0.500017523765564, "step": 7902 }, { "epoch": 1.2630863901542395, "grad_norm": 1.6995763210706705, "learning_rate": 6.552221274138639e-06, "loss": 0.5642962455749512, "step": 7903 }, { "epoch": 1.2632462239271158, "grad_norm": 1.4123660119917807, "learning_rate": 6.5497407723280205e-06, "loss": 0.46532824635505676, "step": 7904 }, { "epoch": 1.2634060576999921, "grad_norm": 1.4362847861914998, "learning_rate": 6.547260511471254e-06, "loss": 0.45656728744506836, "step": 7905 }, { "epoch": 1.2635658914728682, "grad_norm": 1.5752526956747919, "learning_rate": 6.5447804917415545e-06, "loss": 0.6188507080078125, "step": 7906 }, { "epoch": 1.2637257252457443, "grad_norm": 1.6613000797842519, "learning_rate": 6.542300713312113e-06, "loss": 0.5359997749328613, "step": 7907 }, { "epoch": 1.2638855590186207, "grad_norm": 1.5928400296878744, "learning_rate": 6.539821176356118e-06, "loss": 0.5987764000892639, "step": 7908 }, { "epoch": 1.264045392791497, "grad_norm": 1.6728773370138341, "learning_rate": 6.537341881046721e-06, "loss": 0.5390343070030212, "step": 7909 }, { "epoch": 1.264205226564373, "grad_norm": 1.449551976225507, "learning_rate": 6.534862827557077e-06, "loss": 0.5233792662620544, "step": 7910 }, { "epoch": 1.2643650603372492, "grad_norm": 1.5635166375305845, "learning_rate": 6.532384016060306e-06, "loss": 0.43315449357032776, "step": 7911 }, { "epoch": 1.2645248941101255, "grad_norm": 1.8107891312614592, "learning_rate": 6.5299054467295245e-06, "loss": 0.6574208736419678, "step": 7912 }, { "epoch": 1.2646847278830018, "grad_norm": 1.7155850755186846, "learning_rate": 6.527427119737823e-06, "loss": 0.6398597955703735, "step": 7913 }, { "epoch": 1.2648445616558779, "grad_norm": 1.5064725091865374, "learning_rate": 6.524949035258283e-06, "loss": 0.49329617619514465, "step": 7914 }, { "epoch": 1.265004395428754, "grad_norm": 1.4344894402373685, "learning_rate": 6.522471193463964e-06, "loss": 0.4922488033771515, "step": 7915 }, { "epoch": 1.2651642292016303, "grad_norm": 1.5618759314736474, "learning_rate": 6.519993594527907e-06, "loss": 0.5207262635231018, "step": 7916 }, { "epoch": 1.2653240629745066, "grad_norm": 1.9738051081664545, "learning_rate": 6.5175162386231415e-06, "loss": 0.6782094240188599, "step": 7917 }, { "epoch": 1.2654838967473827, "grad_norm": 1.520067610033075, "learning_rate": 6.515039125922675e-06, "loss": 0.6467439532279968, "step": 7918 }, { "epoch": 1.2656437305202588, "grad_norm": 1.4968520593419603, "learning_rate": 6.512562256599504e-06, "loss": 0.6941714286804199, "step": 7919 }, { "epoch": 1.2658035642931351, "grad_norm": 1.8108218177717437, "learning_rate": 6.5100856308265994e-06, "loss": 0.7062669992446899, "step": 7920 }, { "epoch": 1.2659633980660114, "grad_norm": 1.6099018911665637, "learning_rate": 6.507609248776925e-06, "loss": 0.5389896035194397, "step": 7921 }, { "epoch": 1.2661232318388875, "grad_norm": 1.2627593940996042, "learning_rate": 6.50513311062342e-06, "loss": 0.49966445565223694, "step": 7922 }, { "epoch": 1.2662830656117638, "grad_norm": 1.5679567557895766, "learning_rate": 6.502657216539008e-06, "loss": 0.4663267731666565, "step": 7923 }, { "epoch": 1.26644289938464, "grad_norm": 1.2863079056087638, "learning_rate": 6.5001815666965985e-06, "loss": 0.4764796495437622, "step": 7924 }, { "epoch": 1.2666027331575163, "grad_norm": 1.3174147684614699, "learning_rate": 6.49770616126908e-06, "loss": 0.43637603521347046, "step": 7925 }, { "epoch": 1.2667625669303924, "grad_norm": 1.7988581757351012, "learning_rate": 6.4952310004293295e-06, "loss": 0.6054308414459229, "step": 7926 }, { "epoch": 1.2669224007032687, "grad_norm": 1.4885131660628574, "learning_rate": 6.492756084350196e-06, "loss": 0.5080664157867432, "step": 7927 }, { "epoch": 1.2670822344761448, "grad_norm": 1.4947274034327689, "learning_rate": 6.490281413204528e-06, "loss": 0.4730428457260132, "step": 7928 }, { "epoch": 1.267242068249021, "grad_norm": 1.586713396485563, "learning_rate": 6.487806987165145e-06, "loss": 0.4661802649497986, "step": 7929 }, { "epoch": 1.2674019020218972, "grad_norm": 1.613391408044041, "learning_rate": 6.4853328064048514e-06, "loss": 0.4672924280166626, "step": 7930 }, { "epoch": 1.2675617357947735, "grad_norm": 1.4953107243998263, "learning_rate": 6.482858871096433e-06, "loss": 0.47693201899528503, "step": 7931 }, { "epoch": 1.2677215695676496, "grad_norm": 1.6624601111320643, "learning_rate": 6.480385181412663e-06, "loss": 0.5466756820678711, "step": 7932 }, { "epoch": 1.267881403340526, "grad_norm": 1.526509790301639, "learning_rate": 6.4779117375262946e-06, "loss": 0.5823109149932861, "step": 7933 }, { "epoch": 1.268041237113402, "grad_norm": 1.622218460566639, "learning_rate": 6.475438539610066e-06, "loss": 0.5821273922920227, "step": 7934 }, { "epoch": 1.2682010708862783, "grad_norm": 1.5144019746398936, "learning_rate": 6.472965587836693e-06, "loss": 0.4613415598869324, "step": 7935 }, { "epoch": 1.2683609046591544, "grad_norm": 1.1520810183330796, "learning_rate": 6.470492882378881e-06, "loss": 0.41467297077178955, "step": 7936 }, { "epoch": 1.2685207384320307, "grad_norm": 1.5693443522907808, "learning_rate": 6.468020423409313e-06, "loss": 0.6929247975349426, "step": 7937 }, { "epoch": 1.2686805722049068, "grad_norm": 1.6109188794617884, "learning_rate": 6.465548211100656e-06, "loss": 0.5223497152328491, "step": 7938 }, { "epoch": 1.2688404059777831, "grad_norm": 1.6531266888205778, "learning_rate": 6.4630762456255644e-06, "loss": 0.6592357158660889, "step": 7939 }, { "epoch": 1.2690002397506592, "grad_norm": 1.7364859922271088, "learning_rate": 6.460604527156667e-06, "loss": 0.6343169212341309, "step": 7940 }, { "epoch": 1.2691600735235355, "grad_norm": 1.6898531204323917, "learning_rate": 6.458133055866584e-06, "loss": 0.5526716709136963, "step": 7941 }, { "epoch": 1.2693199072964116, "grad_norm": 1.6144429344287008, "learning_rate": 6.455661831927909e-06, "loss": 0.5690292716026306, "step": 7942 }, { "epoch": 1.269479741069288, "grad_norm": 1.6563323997082473, "learning_rate": 6.45319085551323e-06, "loss": 0.48724785447120667, "step": 7943 }, { "epoch": 1.2696395748421643, "grad_norm": 1.4593584121159933, "learning_rate": 6.450720126795107e-06, "loss": 0.4886612296104431, "step": 7944 }, { "epoch": 1.2697994086150404, "grad_norm": 1.5037638458986937, "learning_rate": 6.448249645946087e-06, "loss": 0.42278000712394714, "step": 7945 }, { "epoch": 1.2699592423879165, "grad_norm": 1.516096012227075, "learning_rate": 6.4457794131387e-06, "loss": 0.5213704109191895, "step": 7946 }, { "epoch": 1.2701190761607928, "grad_norm": 1.5461716082044832, "learning_rate": 6.443309428545457e-06, "loss": 0.5232374668121338, "step": 7947 }, { "epoch": 1.270278909933669, "grad_norm": 1.6111620547928707, "learning_rate": 6.440839692338854e-06, "loss": 0.5698679089546204, "step": 7948 }, { "epoch": 1.2704387437065452, "grad_norm": 1.8462646747961935, "learning_rate": 6.438370204691373e-06, "loss": 0.6249377727508545, "step": 7949 }, { "epoch": 1.2705985774794213, "grad_norm": 1.687619363264717, "learning_rate": 6.435900965775468e-06, "loss": 0.6651387214660645, "step": 7950 }, { "epoch": 1.2707584112522976, "grad_norm": 1.3204996637559117, "learning_rate": 6.433431975763586e-06, "loss": 0.3997674286365509, "step": 7951 }, { "epoch": 1.270918245025174, "grad_norm": 1.4206912617650986, "learning_rate": 6.430963234828151e-06, "loss": 0.6954163908958435, "step": 7952 }, { "epoch": 1.27107807879805, "grad_norm": 1.7094532700419258, "learning_rate": 6.428494743141569e-06, "loss": 0.6126468777656555, "step": 7953 }, { "epoch": 1.271237912570926, "grad_norm": 1.6564992459519468, "learning_rate": 6.426026500876234e-06, "loss": 0.7040113806724548, "step": 7954 }, { "epoch": 1.2713977463438024, "grad_norm": 1.522145396071082, "learning_rate": 6.4235585082045174e-06, "loss": 0.5446598529815674, "step": 7955 }, { "epoch": 1.2715575801166787, "grad_norm": 1.584510156145853, "learning_rate": 6.421090765298776e-06, "loss": 0.6316312551498413, "step": 7956 }, { "epoch": 1.2717174138895548, "grad_norm": 1.50139031835555, "learning_rate": 6.418623272331349e-06, "loss": 0.5560241937637329, "step": 7957 }, { "epoch": 1.2718772476624312, "grad_norm": 1.6443850530782385, "learning_rate": 6.416156029474554e-06, "loss": 0.5150659084320068, "step": 7958 }, { "epoch": 1.2720370814353072, "grad_norm": 1.523924002351284, "learning_rate": 6.4136890369006964e-06, "loss": 0.6056660413742065, "step": 7959 }, { "epoch": 1.2721969152081836, "grad_norm": 1.6962114245274782, "learning_rate": 6.4112222947820625e-06, "loss": 0.5290431380271912, "step": 7960 }, { "epoch": 1.2723567489810597, "grad_norm": 1.670735853387136, "learning_rate": 6.408755803290922e-06, "loss": 0.5722850561141968, "step": 7961 }, { "epoch": 1.272516582753936, "grad_norm": 1.6421880014104548, "learning_rate": 6.406289562599522e-06, "loss": 0.5154088735580444, "step": 7962 }, { "epoch": 1.272676416526812, "grad_norm": 1.8670643133610758, "learning_rate": 6.403823572880101e-06, "loss": 0.5943710803985596, "step": 7963 }, { "epoch": 1.2728362502996884, "grad_norm": 1.6790200677379468, "learning_rate": 6.401357834304873e-06, "loss": 0.6280190944671631, "step": 7964 }, { "epoch": 1.2729960840725645, "grad_norm": 1.5099922563430854, "learning_rate": 6.398892347046033e-06, "loss": 0.5331478714942932, "step": 7965 }, { "epoch": 1.2731559178454408, "grad_norm": 1.591000701587242, "learning_rate": 6.396427111275768e-06, "loss": 0.48368674516677856, "step": 7966 }, { "epoch": 1.273315751618317, "grad_norm": 1.7387791945691653, "learning_rate": 6.393962127166233e-06, "loss": 0.6447509527206421, "step": 7967 }, { "epoch": 1.2734755853911932, "grad_norm": 1.6423321829678423, "learning_rate": 6.3914973948895805e-06, "loss": 0.7382107973098755, "step": 7968 }, { "epoch": 1.2736354191640693, "grad_norm": 1.3959431937963962, "learning_rate": 6.389032914617941e-06, "loss": 0.613420307636261, "step": 7969 }, { "epoch": 1.2737952529369456, "grad_norm": 1.7768471934047385, "learning_rate": 6.386568686523418e-06, "loss": 0.5659608244895935, "step": 7970 }, { "epoch": 1.2739550867098217, "grad_norm": 1.8402313216198067, "learning_rate": 6.38410471077811e-06, "loss": 0.5319145321846008, "step": 7971 }, { "epoch": 1.274114920482698, "grad_norm": 1.6990396488821016, "learning_rate": 6.381640987554089e-06, "loss": 0.5296288728713989, "step": 7972 }, { "epoch": 1.2742747542555741, "grad_norm": 1.5690040167261177, "learning_rate": 6.3791775170234115e-06, "loss": 0.5924127697944641, "step": 7973 }, { "epoch": 1.2744345880284504, "grad_norm": 1.4414762814626914, "learning_rate": 6.376714299358122e-06, "loss": 0.6664594411849976, "step": 7974 }, { "epoch": 1.2745944218013265, "grad_norm": 1.5102926664812535, "learning_rate": 6.374251334730237e-06, "loss": 0.6381997466087341, "step": 7975 }, { "epoch": 1.2747542555742029, "grad_norm": 1.6388985333166382, "learning_rate": 6.371788623311769e-06, "loss": 0.5494097471237183, "step": 7976 }, { "epoch": 1.274914089347079, "grad_norm": 1.5544984130733548, "learning_rate": 6.369326165274697e-06, "loss": 0.6738015413284302, "step": 7977 }, { "epoch": 1.2750739231199553, "grad_norm": 1.3840558037587505, "learning_rate": 6.366863960790996e-06, "loss": 0.5470755100250244, "step": 7978 }, { "epoch": 1.2752337568928316, "grad_norm": 1.3951009540107404, "learning_rate": 6.364402010032617e-06, "loss": 0.5139439105987549, "step": 7979 }, { "epoch": 1.2753935906657077, "grad_norm": 1.4509275925612444, "learning_rate": 6.361940313171489e-06, "loss": 0.5828248262405396, "step": 7980 }, { "epoch": 1.2755534244385838, "grad_norm": 1.6348509903692738, "learning_rate": 6.359478870379534e-06, "loss": 0.5743487477302551, "step": 7981 }, { "epoch": 1.27571325821146, "grad_norm": 1.4215180607839475, "learning_rate": 6.357017681828647e-06, "loss": 0.6251858472824097, "step": 7982 }, { "epoch": 1.2758730919843364, "grad_norm": 1.4808780553159897, "learning_rate": 6.354556747690711e-06, "loss": 0.5820167660713196, "step": 7983 }, { "epoch": 1.2760329257572125, "grad_norm": 1.2975954919939712, "learning_rate": 6.3520960681375855e-06, "loss": 0.49235183000564575, "step": 7984 }, { "epoch": 1.2761927595300886, "grad_norm": 1.5118847842256813, "learning_rate": 6.34963564334112e-06, "loss": 0.6027452349662781, "step": 7985 }, { "epoch": 1.276352593302965, "grad_norm": 1.7448939827717416, "learning_rate": 6.347175473473139e-06, "loss": 0.5277247428894043, "step": 7986 }, { "epoch": 1.2765124270758412, "grad_norm": 1.4449136615887088, "learning_rate": 6.34471555870545e-06, "loss": 0.5208892226219177, "step": 7987 }, { "epoch": 1.2766722608487173, "grad_norm": 1.3455243857972239, "learning_rate": 6.342255899209848e-06, "loss": 0.5792890787124634, "step": 7988 }, { "epoch": 1.2768320946215934, "grad_norm": 1.4175014121878826, "learning_rate": 6.339796495158109e-06, "loss": 0.4914495050907135, "step": 7989 }, { "epoch": 1.2769919283944697, "grad_norm": 1.7393388781772987, "learning_rate": 6.337337346721984e-06, "loss": 0.7402383089065552, "step": 7990 }, { "epoch": 1.277151762167346, "grad_norm": 1.603081181251959, "learning_rate": 6.334878454073216e-06, "loss": 0.6019895076751709, "step": 7991 }, { "epoch": 1.2773115959402221, "grad_norm": 1.675566564577356, "learning_rate": 6.3324198173835195e-06, "loss": 0.5399723052978516, "step": 7992 }, { "epoch": 1.2774714297130985, "grad_norm": 1.604572765981362, "learning_rate": 6.329961436824603e-06, "loss": 0.544275164604187, "step": 7993 }, { "epoch": 1.2776312634859746, "grad_norm": 1.4546784700076956, "learning_rate": 6.327503312568148e-06, "loss": 0.5327082872390747, "step": 7994 }, { "epoch": 1.2777910972588509, "grad_norm": 1.4619793660085696, "learning_rate": 6.325045444785821e-06, "loss": 0.5806588530540466, "step": 7995 }, { "epoch": 1.277950931031727, "grad_norm": 1.3373952760212693, "learning_rate": 6.3225878336492716e-06, "loss": 0.5215616226196289, "step": 7996 }, { "epoch": 1.2781107648046033, "grad_norm": 1.5310214885930915, "learning_rate": 6.3201304793301285e-06, "loss": 0.6368540525436401, "step": 7997 }, { "epoch": 1.2782705985774794, "grad_norm": 1.3907541117503588, "learning_rate": 6.317673382000009e-06, "loss": 0.5405285954475403, "step": 7998 }, { "epoch": 1.2784304323503557, "grad_norm": 1.5455309359471556, "learning_rate": 6.315216541830505e-06, "loss": 0.4917758107185364, "step": 7999 }, { "epoch": 1.2785902661232318, "grad_norm": 1.4440133282532799, "learning_rate": 6.312759958993193e-06, "loss": 0.5084921717643738, "step": 8000 }, { "epoch": 1.278750099896108, "grad_norm": 1.4667841882975396, "learning_rate": 6.310303633659635e-06, "loss": 0.4469248652458191, "step": 8001 }, { "epoch": 1.2789099336689842, "grad_norm": 1.4327209803955445, "learning_rate": 6.307847566001366e-06, "loss": 0.7139637470245361, "step": 8002 }, { "epoch": 1.2790697674418605, "grad_norm": 1.3218512595090535, "learning_rate": 6.305391756189917e-06, "loss": 0.5951534509658813, "step": 8003 }, { "epoch": 1.2792296012147366, "grad_norm": 1.4994153635583554, "learning_rate": 6.302936204396786e-06, "loss": 0.5448731184005737, "step": 8004 }, { "epoch": 1.279389434987613, "grad_norm": 1.3145374576520577, "learning_rate": 6.300480910793465e-06, "loss": 0.5470937490463257, "step": 8005 }, { "epoch": 1.279549268760489, "grad_norm": 1.3919428054079213, "learning_rate": 6.29802587555142e-06, "loss": 0.563208818435669, "step": 8006 }, { "epoch": 1.2797091025333653, "grad_norm": 1.4761920140992852, "learning_rate": 6.2955710988421e-06, "loss": 0.5377202033996582, "step": 8007 }, { "epoch": 1.2798689363062414, "grad_norm": 1.3438267350737285, "learning_rate": 6.293116580836947e-06, "loss": 0.43046027421951294, "step": 8008 }, { "epoch": 1.2800287700791178, "grad_norm": 1.727934640650393, "learning_rate": 6.290662321707367e-06, "loss": 0.5891985893249512, "step": 8009 }, { "epoch": 1.2801886038519938, "grad_norm": 1.5742535808966525, "learning_rate": 6.288208321624758e-06, "loss": 0.5263097882270813, "step": 8010 }, { "epoch": 1.2803484376248702, "grad_norm": 1.8136587336179293, "learning_rate": 6.2857545807605035e-06, "loss": 0.7099065780639648, "step": 8011 }, { "epoch": 1.2805082713977463, "grad_norm": 1.6269198668004843, "learning_rate": 6.283301099285959e-06, "loss": 0.7603579163551331, "step": 8012 }, { "epoch": 1.2806681051706226, "grad_norm": 1.398526654361862, "learning_rate": 6.280847877372469e-06, "loss": 0.5367675423622131, "step": 8013 }, { "epoch": 1.280827938943499, "grad_norm": 1.2134274963512255, "learning_rate": 6.278394915191356e-06, "loss": 0.28260141611099243, "step": 8014 }, { "epoch": 1.280987772716375, "grad_norm": 1.6671075228979522, "learning_rate": 6.2759422129139305e-06, "loss": 0.5662581920623779, "step": 8015 }, { "epoch": 1.281147606489251, "grad_norm": 1.9490221125372666, "learning_rate": 6.2734897707114764e-06, "loss": 0.5237032175064087, "step": 8016 }, { "epoch": 1.2813074402621274, "grad_norm": 1.5166607077586978, "learning_rate": 6.271037588755262e-06, "loss": 0.4727543592453003, "step": 8017 }, { "epoch": 1.2814672740350037, "grad_norm": 1.413090012485727, "learning_rate": 6.268585667216546e-06, "loss": 0.45352762937545776, "step": 8018 }, { "epoch": 1.2816271078078798, "grad_norm": 1.570097201685549, "learning_rate": 6.266134006266554e-06, "loss": 0.6373348236083984, "step": 8019 }, { "epoch": 1.281786941580756, "grad_norm": 1.4095267602399832, "learning_rate": 6.263682606076508e-06, "loss": 0.585620105266571, "step": 8020 }, { "epoch": 1.2819467753536322, "grad_norm": 1.6025628819545443, "learning_rate": 6.261231466817599e-06, "loss": 0.5502781867980957, "step": 8021 }, { "epoch": 1.2821066091265085, "grad_norm": 1.5333210412173484, "learning_rate": 6.258780588661012e-06, "loss": 0.6474114060401917, "step": 8022 }, { "epoch": 1.2822664428993846, "grad_norm": 1.3525539648336369, "learning_rate": 6.256329971777903e-06, "loss": 0.5436431765556335, "step": 8023 }, { "epoch": 1.2824262766722607, "grad_norm": 1.3638646739518379, "learning_rate": 6.2538796163394155e-06, "loss": 0.5169064998626709, "step": 8024 }, { "epoch": 1.282586110445137, "grad_norm": 1.455046590835655, "learning_rate": 6.251429522516676e-06, "loss": 0.5916675925254822, "step": 8025 }, { "epoch": 1.2827459442180134, "grad_norm": 1.4355308958805544, "learning_rate": 6.248979690480786e-06, "loss": 0.7212206721305847, "step": 8026 }, { "epoch": 1.2829057779908895, "grad_norm": 1.4649394219149443, "learning_rate": 6.246530120402833e-06, "loss": 0.44629427790641785, "step": 8027 }, { "epoch": 1.2830656117637655, "grad_norm": 1.3546211990170935, "learning_rate": 6.244080812453892e-06, "loss": 0.4298112690448761, "step": 8028 }, { "epoch": 1.2832254455366419, "grad_norm": 1.4819445768608948, "learning_rate": 6.2416317668050116e-06, "loss": 0.5656790733337402, "step": 8029 }, { "epoch": 1.2833852793095182, "grad_norm": 1.6420983340461224, "learning_rate": 6.239182983627222e-06, "loss": 0.670921266078949, "step": 8030 }, { "epoch": 1.2835451130823943, "grad_norm": 1.5710295294897838, "learning_rate": 6.236734463091541e-06, "loss": 0.5564830899238586, "step": 8031 }, { "epoch": 1.2837049468552706, "grad_norm": 1.4396860214041292, "learning_rate": 6.23428620536896e-06, "loss": 0.38214221596717834, "step": 8032 }, { "epoch": 1.2838647806281467, "grad_norm": 1.562215348308135, "learning_rate": 6.231838210630462e-06, "loss": 0.6550655364990234, "step": 8033 }, { "epoch": 1.284024614401023, "grad_norm": 1.3431055277146655, "learning_rate": 6.229390479047002e-06, "loss": 0.4762301743030548, "step": 8034 }, { "epoch": 1.284184448173899, "grad_norm": 1.740477833209502, "learning_rate": 6.226943010789523e-06, "loss": 0.6504449844360352, "step": 8035 }, { "epoch": 1.2843442819467754, "grad_norm": 1.7959212788284415, "learning_rate": 6.224495806028947e-06, "loss": 0.6113705635070801, "step": 8036 }, { "epoch": 1.2845041157196515, "grad_norm": 1.4304322589874259, "learning_rate": 6.2220488649361765e-06, "loss": 0.5082318782806396, "step": 8037 }, { "epoch": 1.2846639494925278, "grad_norm": 1.6540972637530686, "learning_rate": 6.219602187682101e-06, "loss": 0.5396710634231567, "step": 8038 }, { "epoch": 1.284823783265404, "grad_norm": 1.4444818221938769, "learning_rate": 6.217155774437583e-06, "loss": 0.561895489692688, "step": 8039 }, { "epoch": 1.2849836170382802, "grad_norm": 1.2227428572011874, "learning_rate": 6.214709625373476e-06, "loss": 0.3607715368270874, "step": 8040 }, { "epoch": 1.2851434508111563, "grad_norm": 1.567244514242592, "learning_rate": 6.212263740660606e-06, "loss": 0.4273146390914917, "step": 8041 }, { "epoch": 1.2853032845840326, "grad_norm": 1.4850415079358346, "learning_rate": 6.209818120469788e-06, "loss": 0.41595616936683655, "step": 8042 }, { "epoch": 1.2854631183569087, "grad_norm": 1.2929274796457666, "learning_rate": 6.207372764971816e-06, "loss": 0.45795583724975586, "step": 8043 }, { "epoch": 1.285622952129785, "grad_norm": 1.550341584253878, "learning_rate": 6.204927674337461e-06, "loss": 0.5554547309875488, "step": 8044 }, { "epoch": 1.2857827859026612, "grad_norm": 1.479298411154459, "learning_rate": 6.202482848737484e-06, "loss": 0.5909790396690369, "step": 8045 }, { "epoch": 1.2859426196755375, "grad_norm": 1.3827042873982245, "learning_rate": 6.200038288342618e-06, "loss": 0.6014459729194641, "step": 8046 }, { "epoch": 1.2861024534484136, "grad_norm": 1.6431988253313623, "learning_rate": 6.197593993323583e-06, "loss": 0.5239287614822388, "step": 8047 }, { "epoch": 1.2862622872212899, "grad_norm": 1.35280094489854, "learning_rate": 6.195149963851088e-06, "loss": 0.5771858096122742, "step": 8048 }, { "epoch": 1.2864221209941662, "grad_norm": 1.897596283460946, "learning_rate": 6.192706200095806e-06, "loss": 0.6295803785324097, "step": 8049 }, { "epoch": 1.2865819547670423, "grad_norm": 1.5880742687839058, "learning_rate": 6.190262702228407e-06, "loss": 0.509490430355072, "step": 8050 }, { "epoch": 1.2867417885399184, "grad_norm": 1.649478600182146, "learning_rate": 6.187819470419531e-06, "loss": 0.5663611888885498, "step": 8051 }, { "epoch": 1.2869016223127947, "grad_norm": 1.4218182123364393, "learning_rate": 6.185376504839807e-06, "loss": 0.6473878622055054, "step": 8052 }, { "epoch": 1.287061456085671, "grad_norm": 1.494284395119585, "learning_rate": 6.182933805659846e-06, "loss": 0.5055724382400513, "step": 8053 }, { "epoch": 1.2872212898585471, "grad_norm": 1.6547491081211898, "learning_rate": 6.18049137305023e-06, "loss": 0.5257925391197205, "step": 8054 }, { "epoch": 1.2873811236314232, "grad_norm": 1.4376679826901007, "learning_rate": 6.178049207181538e-06, "loss": 0.4507802128791809, "step": 8055 }, { "epoch": 1.2875409574042995, "grad_norm": 2.6592262146563925, "learning_rate": 6.175607308224316e-06, "loss": 0.5646128058433533, "step": 8056 }, { "epoch": 1.2877007911771758, "grad_norm": 1.6839243396300103, "learning_rate": 6.173165676349103e-06, "loss": 0.5661014318466187, "step": 8057 }, { "epoch": 1.287860624950052, "grad_norm": 1.3767189354416824, "learning_rate": 6.17072431172641e-06, "loss": 0.42107445001602173, "step": 8058 }, { "epoch": 1.288020458722928, "grad_norm": 1.6212227480919432, "learning_rate": 6.1682832145267336e-06, "loss": 0.5175114274024963, "step": 8059 }, { "epoch": 1.2881802924958043, "grad_norm": 1.3883965732024781, "learning_rate": 6.165842384920553e-06, "loss": 0.47044217586517334, "step": 8060 }, { "epoch": 1.2883401262686807, "grad_norm": 1.5870631975224154, "learning_rate": 6.163401823078324e-06, "loss": 0.5140398144721985, "step": 8061 }, { "epoch": 1.2884999600415568, "grad_norm": 1.5198946764191152, "learning_rate": 6.1609615291704904e-06, "loss": 0.5458634495735168, "step": 8062 }, { "epoch": 1.2886597938144329, "grad_norm": 1.2960157352574424, "learning_rate": 6.158521503367472e-06, "loss": 0.47231417894363403, "step": 8063 }, { "epoch": 1.2888196275873092, "grad_norm": 1.4001836061290989, "learning_rate": 6.156081745839673e-06, "loss": 0.5546870827674866, "step": 8064 }, { "epoch": 1.2889794613601855, "grad_norm": 1.4939832187011135, "learning_rate": 6.153642256757475e-06, "loss": 0.43833258748054504, "step": 8065 }, { "epoch": 1.2891392951330616, "grad_norm": 1.6492172912092025, "learning_rate": 6.151203036291242e-06, "loss": 0.4890522360801697, "step": 8066 }, { "epoch": 1.289299128905938, "grad_norm": 1.4761985901196746, "learning_rate": 6.148764084611325e-06, "loss": 0.5460132360458374, "step": 8067 }, { "epoch": 1.289458962678814, "grad_norm": 1.7526299684330018, "learning_rate": 6.146325401888047e-06, "loss": 0.6583930253982544, "step": 8068 }, { "epoch": 1.2896187964516903, "grad_norm": 1.451299798567449, "learning_rate": 6.143886988291719e-06, "loss": 0.7152923345565796, "step": 8069 }, { "epoch": 1.2897786302245664, "grad_norm": 1.6800036296194838, "learning_rate": 6.141448843992637e-06, "loss": 0.5352169871330261, "step": 8070 }, { "epoch": 1.2899384639974427, "grad_norm": 1.8023190987521918, "learning_rate": 6.139010969161062e-06, "loss": 0.5552511215209961, "step": 8071 }, { "epoch": 1.2900982977703188, "grad_norm": 1.6710156891266, "learning_rate": 6.136573363967254e-06, "loss": 0.5757999420166016, "step": 8072 }, { "epoch": 1.2902581315431951, "grad_norm": 1.5922376918723666, "learning_rate": 6.134136028581446e-06, "loss": 0.6978703737258911, "step": 8073 }, { "epoch": 1.2904179653160712, "grad_norm": 1.4244320467213913, "learning_rate": 6.1316989631738466e-06, "loss": 0.5033497214317322, "step": 8074 }, { "epoch": 1.2905777990889475, "grad_norm": 1.5006442556430961, "learning_rate": 6.1292621679146605e-06, "loss": 0.447189062833786, "step": 8075 }, { "epoch": 1.2907376328618236, "grad_norm": 1.8449151855482724, "learning_rate": 6.1268256429740565e-06, "loss": 0.6583157181739807, "step": 8076 }, { "epoch": 1.2908974666347, "grad_norm": 1.7879286410374164, "learning_rate": 6.1243893885221994e-06, "loss": 0.5545786619186401, "step": 8077 }, { "epoch": 1.291057300407576, "grad_norm": 1.4956806879431452, "learning_rate": 6.121953404729224e-06, "loss": 0.49355679750442505, "step": 8078 }, { "epoch": 1.2912171341804524, "grad_norm": 1.7467032104105338, "learning_rate": 6.119517691765256e-06, "loss": 0.6224706768989563, "step": 8079 }, { "epoch": 1.2913769679533285, "grad_norm": 1.4283400779947477, "learning_rate": 6.1170822498003925e-06, "loss": 0.589089572429657, "step": 8080 }, { "epoch": 1.2915368017262048, "grad_norm": 1.4366210428468744, "learning_rate": 6.114647079004716e-06, "loss": 0.5990719795227051, "step": 8081 }, { "epoch": 1.2916966354990809, "grad_norm": 1.4808607240824905, "learning_rate": 6.112212179548294e-06, "loss": 0.4499439597129822, "step": 8082 }, { "epoch": 1.2918564692719572, "grad_norm": 1.534568036768315, "learning_rate": 6.109777551601166e-06, "loss": 0.5593299865722656, "step": 8083 }, { "epoch": 1.2920163030448335, "grad_norm": 1.5612333106097305, "learning_rate": 6.107343195333362e-06, "loss": 0.520916223526001, "step": 8084 }, { "epoch": 1.2921761368177096, "grad_norm": 1.4918192482858106, "learning_rate": 6.104909110914886e-06, "loss": 0.634995698928833, "step": 8085 }, { "epoch": 1.2923359705905857, "grad_norm": 1.5807650885730886, "learning_rate": 6.102475298515729e-06, "loss": 0.503252387046814, "step": 8086 }, { "epoch": 1.292495804363462, "grad_norm": 1.6018287064820151, "learning_rate": 6.1000417583058595e-06, "loss": 0.5533050894737244, "step": 8087 }, { "epoch": 1.2926556381363383, "grad_norm": 1.663141649447397, "learning_rate": 6.09760849045522e-06, "loss": 0.6261181831359863, "step": 8088 }, { "epoch": 1.2928154719092144, "grad_norm": 1.3601908672843928, "learning_rate": 6.095175495133749e-06, "loss": 0.6120771765708923, "step": 8089 }, { "epoch": 1.2929753056820905, "grad_norm": 1.4369063451291828, "learning_rate": 6.0927427725113596e-06, "loss": 0.5082550048828125, "step": 8090 }, { "epoch": 1.2931351394549668, "grad_norm": 1.4980387306281098, "learning_rate": 6.090310322757938e-06, "loss": 0.4599694013595581, "step": 8091 }, { "epoch": 1.2932949732278431, "grad_norm": 1.8128314039353512, "learning_rate": 6.087878146043365e-06, "loss": 0.5161524415016174, "step": 8092 }, { "epoch": 1.2934548070007192, "grad_norm": 1.7615652367685986, "learning_rate": 6.085446242537489e-06, "loss": 0.7733516693115234, "step": 8093 }, { "epoch": 1.2936146407735953, "grad_norm": 1.6126419119852846, "learning_rate": 6.083014612410146e-06, "loss": 0.43313711881637573, "step": 8094 }, { "epoch": 1.2937744745464717, "grad_norm": 1.3686200666680794, "learning_rate": 6.080583255831158e-06, "loss": 0.6516197323799133, "step": 8095 }, { "epoch": 1.293934308319348, "grad_norm": 1.627327526158361, "learning_rate": 6.0781521729703154e-06, "loss": 0.49760764837265015, "step": 8096 }, { "epoch": 1.294094142092224, "grad_norm": 1.5742920610120277, "learning_rate": 6.0757213639974e-06, "loss": 0.502845823764801, "step": 8097 }, { "epoch": 1.2942539758651002, "grad_norm": 3.510794457614713, "learning_rate": 6.07329082908217e-06, "loss": 0.5362266898155212, "step": 8098 }, { "epoch": 1.2944138096379765, "grad_norm": 1.7291088446839853, "learning_rate": 6.070860568394367e-06, "loss": 0.6062776446342468, "step": 8099 }, { "epoch": 1.2945736434108528, "grad_norm": 1.438516861804704, "learning_rate": 6.068430582103713e-06, "loss": 0.5084391832351685, "step": 8100 }, { "epoch": 1.2947334771837289, "grad_norm": 1.4006325806929687, "learning_rate": 6.066000870379902e-06, "loss": 0.5504420399665833, "step": 8101 }, { "epoch": 1.2948933109566052, "grad_norm": 1.2995507984807135, "learning_rate": 6.063571433392625e-06, "loss": 0.5672681331634521, "step": 8102 }, { "epoch": 1.2950531447294813, "grad_norm": 1.4412744379792837, "learning_rate": 6.0611422713115396e-06, "loss": 0.5524300336837769, "step": 8103 }, { "epoch": 1.2952129785023576, "grad_norm": 1.4417701962043015, "learning_rate": 6.058713384306294e-06, "loss": 0.5693039894104004, "step": 8104 }, { "epoch": 1.2953728122752337, "grad_norm": 1.5007562169477546, "learning_rate": 6.056284772546509e-06, "loss": 0.5534891486167908, "step": 8105 }, { "epoch": 1.29553264604811, "grad_norm": 1.569423562223834, "learning_rate": 6.053856436201795e-06, "loss": 0.613814115524292, "step": 8106 }, { "epoch": 1.2956924798209861, "grad_norm": 1.8238566831121097, "learning_rate": 6.051428375441735e-06, "loss": 0.4790637791156769, "step": 8107 }, { "epoch": 1.2958523135938624, "grad_norm": 1.5172691984447586, "learning_rate": 6.0490005904358915e-06, "loss": 0.5695732831954956, "step": 8108 }, { "epoch": 1.2960121473667385, "grad_norm": 1.7123669736429754, "learning_rate": 6.046573081353822e-06, "loss": 0.5573859214782715, "step": 8109 }, { "epoch": 1.2961719811396148, "grad_norm": 1.4090721828479666, "learning_rate": 6.0441458483650515e-06, "loss": 0.5464191436767578, "step": 8110 }, { "epoch": 1.296331814912491, "grad_norm": 1.4345929333698075, "learning_rate": 6.041718891639088e-06, "loss": 0.49695920944213867, "step": 8111 }, { "epoch": 1.2964916486853673, "grad_norm": 1.4641312631793555, "learning_rate": 6.039292211345422e-06, "loss": 0.4801523685455322, "step": 8112 }, { "epoch": 1.2966514824582434, "grad_norm": 1.5676010375783755, "learning_rate": 6.036865807653525e-06, "loss": 0.5238218903541565, "step": 8113 }, { "epoch": 1.2968113162311197, "grad_norm": 1.4729925041176744, "learning_rate": 6.034439680732848e-06, "loss": 0.5250793695449829, "step": 8114 }, { "epoch": 1.2969711500039958, "grad_norm": 1.5235696506734346, "learning_rate": 6.032013830752823e-06, "loss": 0.5522066950798035, "step": 8115 }, { "epoch": 1.297130983776872, "grad_norm": 1.6080560178761385, "learning_rate": 6.0295882578828615e-06, "loss": 0.49322766065597534, "step": 8116 }, { "epoch": 1.2972908175497482, "grad_norm": 1.5889149477094444, "learning_rate": 6.02716296229236e-06, "loss": 0.44944506883621216, "step": 8117 }, { "epoch": 1.2974506513226245, "grad_norm": 1.5151364917594599, "learning_rate": 6.0247379441506885e-06, "loss": 0.672188401222229, "step": 8118 }, { "epoch": 1.2976104850955008, "grad_norm": 1.6773420939920418, "learning_rate": 6.0223132036272055e-06, "loss": 0.6179914474487305, "step": 8119 }, { "epoch": 1.297770318868377, "grad_norm": 1.7013248709594753, "learning_rate": 6.019888740891242e-06, "loss": 0.6583150625228882, "step": 8120 }, { "epoch": 1.297930152641253, "grad_norm": 1.525256003931747, "learning_rate": 6.01746455611212e-06, "loss": 0.5084543228149414, "step": 8121 }, { "epoch": 1.2980899864141293, "grad_norm": 1.6887194717891332, "learning_rate": 6.015040649459132e-06, "loss": 0.5563707947731018, "step": 8122 }, { "epoch": 1.2982498201870056, "grad_norm": 1.5372823010473096, "learning_rate": 6.012617021101552e-06, "loss": 0.6740450263023376, "step": 8123 }, { "epoch": 1.2984096539598817, "grad_norm": 1.6223829817890527, "learning_rate": 6.010193671208645e-06, "loss": 0.48743936419487, "step": 8124 }, { "epoch": 1.2985694877327578, "grad_norm": 1.3490971681798327, "learning_rate": 6.007770599949643e-06, "loss": 0.4432450532913208, "step": 8125 }, { "epoch": 1.2987293215056341, "grad_norm": 1.7992234973284276, "learning_rate": 6.005347807493766e-06, "loss": 0.6674023866653442, "step": 8126 }, { "epoch": 1.2988891552785105, "grad_norm": 1.51296517495639, "learning_rate": 6.0029252940102154e-06, "loss": 0.5705724954605103, "step": 8127 }, { "epoch": 1.2990489890513865, "grad_norm": 1.768074331058835, "learning_rate": 6.000503059668166e-06, "loss": 0.5758515000343323, "step": 8128 }, { "epoch": 1.2992088228242626, "grad_norm": 1.5575457977492644, "learning_rate": 5.998081104636786e-06, "loss": 0.6374872922897339, "step": 8129 }, { "epoch": 1.299368656597139, "grad_norm": 1.5195621729261597, "learning_rate": 5.9956594290852125e-06, "loss": 0.4060780107975006, "step": 8130 }, { "epoch": 1.2995284903700153, "grad_norm": 1.5266239580621448, "learning_rate": 5.9932380331825626e-06, "loss": 0.5912911891937256, "step": 8131 }, { "epoch": 1.2996883241428914, "grad_norm": 1.6002722616990555, "learning_rate": 5.990816917097944e-06, "loss": 0.6094276905059814, "step": 8132 }, { "epoch": 1.2998481579157675, "grad_norm": 1.5271939132218035, "learning_rate": 5.988396081000435e-06, "loss": 0.5213682651519775, "step": 8133 }, { "epoch": 1.3000079916886438, "grad_norm": 1.569810439024373, "learning_rate": 5.9859755250591e-06, "loss": 0.6073293685913086, "step": 8134 }, { "epoch": 1.30016782546152, "grad_norm": 1.7261107495281462, "learning_rate": 5.983555249442981e-06, "loss": 0.5593897104263306, "step": 8135 }, { "epoch": 1.3003276592343962, "grad_norm": 1.5994473103192153, "learning_rate": 5.981135254321103e-06, "loss": 0.5724679231643677, "step": 8136 }, { "epoch": 1.3004874930072725, "grad_norm": 1.6461414843120785, "learning_rate": 5.97871553986247e-06, "loss": 0.5432723164558411, "step": 8137 }, { "epoch": 1.3006473267801486, "grad_norm": 1.7629568592604448, "learning_rate": 5.976296106236061e-06, "loss": 0.679223895072937, "step": 8138 }, { "epoch": 1.300807160553025, "grad_norm": 1.6710609052145762, "learning_rate": 5.973876953610849e-06, "loss": 0.5425156950950623, "step": 8139 }, { "epoch": 1.300966994325901, "grad_norm": 1.6669311186851827, "learning_rate": 5.9714580821557724e-06, "loss": 0.5630455017089844, "step": 8140 }, { "epoch": 1.3011268280987773, "grad_norm": 1.414990910964615, "learning_rate": 5.9690394920397585e-06, "loss": 0.5943421721458435, "step": 8141 }, { "epoch": 1.3012866618716534, "grad_norm": 1.5168142143731953, "learning_rate": 5.966621183431714e-06, "loss": 0.5325920581817627, "step": 8142 }, { "epoch": 1.3014464956445297, "grad_norm": 1.4291690891939584, "learning_rate": 5.964203156500526e-06, "loss": 0.5559473633766174, "step": 8143 }, { "epoch": 1.3016063294174058, "grad_norm": 1.7822984724367232, "learning_rate": 5.96178541141506e-06, "loss": 0.6421388387680054, "step": 8144 }, { "epoch": 1.3017661631902822, "grad_norm": 1.494410993109855, "learning_rate": 5.959367948344159e-06, "loss": 0.5769762992858887, "step": 8145 }, { "epoch": 1.3019259969631582, "grad_norm": 1.4829858373384175, "learning_rate": 5.9569507674566555e-06, "loss": 0.5821161270141602, "step": 8146 }, { "epoch": 1.3020858307360346, "grad_norm": 1.4794128109052103, "learning_rate": 5.954533868921352e-06, "loss": 0.5377570390701294, "step": 8147 }, { "epoch": 1.3022456645089107, "grad_norm": 1.4178764613100632, "learning_rate": 5.952117252907037e-06, "loss": 0.684959352016449, "step": 8148 }, { "epoch": 1.302405498281787, "grad_norm": 1.4849881247586318, "learning_rate": 5.949700919582485e-06, "loss": 0.5056724548339844, "step": 8149 }, { "epoch": 1.302565332054663, "grad_norm": 1.7362498207398982, "learning_rate": 5.947284869116436e-06, "loss": 0.6199703216552734, "step": 8150 }, { "epoch": 1.3027251658275394, "grad_norm": 1.6382748319659612, "learning_rate": 5.944869101677624e-06, "loss": 0.520351767539978, "step": 8151 }, { "epoch": 1.3028849996004155, "grad_norm": 1.4821159561377875, "learning_rate": 5.942453617434756e-06, "loss": 0.672935426235199, "step": 8152 }, { "epoch": 1.3030448333732918, "grad_norm": 1.517916591793839, "learning_rate": 5.940038416556516e-06, "loss": 0.6540665030479431, "step": 8153 }, { "epoch": 1.3032046671461681, "grad_norm": 1.4588719728024377, "learning_rate": 5.93762349921158e-06, "loss": 0.5930243730545044, "step": 8154 }, { "epoch": 1.3033645009190442, "grad_norm": 1.4645769423539519, "learning_rate": 5.935208865568593e-06, "loss": 0.5628129839897156, "step": 8155 }, { "epoch": 1.3035243346919203, "grad_norm": 1.5281158232930232, "learning_rate": 5.932794515796187e-06, "loss": 0.5513327717781067, "step": 8156 }, { "epoch": 1.3036841684647966, "grad_norm": 1.3316613611182693, "learning_rate": 5.930380450062968e-06, "loss": 0.5214831233024597, "step": 8157 }, { "epoch": 1.303844002237673, "grad_norm": 1.6064339058072183, "learning_rate": 5.92796666853753e-06, "loss": 0.6563845872879028, "step": 8158 }, { "epoch": 1.304003836010549, "grad_norm": 1.793160588403156, "learning_rate": 5.925553171388443e-06, "loss": 0.5435060858726501, "step": 8159 }, { "epoch": 1.3041636697834251, "grad_norm": 1.4491262242099483, "learning_rate": 5.923139958784251e-06, "loss": 0.6170575618743896, "step": 8160 }, { "epoch": 1.3043235035563014, "grad_norm": 1.4502789770957183, "learning_rate": 5.920727030893492e-06, "loss": 0.5211206078529358, "step": 8161 }, { "epoch": 1.3044833373291778, "grad_norm": 1.387956943020574, "learning_rate": 5.9183143878846695e-06, "loss": 0.5852195024490356, "step": 8162 }, { "epoch": 1.3046431711020539, "grad_norm": 1.4948002262189823, "learning_rate": 5.91590202992628e-06, "loss": 0.5458582043647766, "step": 8163 }, { "epoch": 1.30480300487493, "grad_norm": 1.7056943136608067, "learning_rate": 5.913489957186789e-06, "loss": 0.6047766208648682, "step": 8164 }, { "epoch": 1.3049628386478063, "grad_norm": 1.4966295692736458, "learning_rate": 5.911078169834648e-06, "loss": 0.6113866567611694, "step": 8165 }, { "epoch": 1.3051226724206826, "grad_norm": 1.3126631355830616, "learning_rate": 5.908666668038292e-06, "loss": 0.46477848291397095, "step": 8166 }, { "epoch": 1.3052825061935587, "grad_norm": 1.386925062625069, "learning_rate": 5.906255451966127e-06, "loss": 0.5178128480911255, "step": 8167 }, { "epoch": 1.3054423399664348, "grad_norm": 1.3789417888262936, "learning_rate": 5.903844521786541e-06, "loss": 0.600423276424408, "step": 8168 }, { "epoch": 1.305602173739311, "grad_norm": 1.6959183598600724, "learning_rate": 5.901433877667915e-06, "loss": 0.555129885673523, "step": 8169 }, { "epoch": 1.3057620075121874, "grad_norm": 1.5632552272848261, "learning_rate": 5.899023519778592e-06, "loss": 0.45018863677978516, "step": 8170 }, { "epoch": 1.3059218412850635, "grad_norm": 1.386415066731393, "learning_rate": 5.896613448286905e-06, "loss": 0.4897199869155884, "step": 8171 }, { "epoch": 1.3060816750579398, "grad_norm": 1.5264394385949465, "learning_rate": 5.894203663361167e-06, "loss": 0.4833226203918457, "step": 8172 }, { "epoch": 1.306241508830816, "grad_norm": 1.4300697132114353, "learning_rate": 5.891794165169663e-06, "loss": 0.5602149963378906, "step": 8173 }, { "epoch": 1.3064013426036922, "grad_norm": 1.7964788958598945, "learning_rate": 5.88938495388067e-06, "loss": 0.5680267810821533, "step": 8174 }, { "epoch": 1.3065611763765683, "grad_norm": 1.583082757922582, "learning_rate": 5.886976029662433e-06, "loss": 0.6490883231163025, "step": 8175 }, { "epoch": 1.3067210101494446, "grad_norm": 1.5908025702685618, "learning_rate": 5.8845673926831895e-06, "loss": 0.6825476884841919, "step": 8176 }, { "epoch": 1.3068808439223207, "grad_norm": 1.607455030515601, "learning_rate": 5.882159043111142e-06, "loss": 0.6460356712341309, "step": 8177 }, { "epoch": 1.307040677695197, "grad_norm": 1.7215844253845223, "learning_rate": 5.879750981114489e-06, "loss": 0.5818843245506287, "step": 8178 }, { "epoch": 1.3072005114680731, "grad_norm": 1.5155375807401963, "learning_rate": 5.877343206861397e-06, "loss": 0.5566049814224243, "step": 8179 }, { "epoch": 1.3073603452409495, "grad_norm": 1.4628397597875926, "learning_rate": 5.874935720520015e-06, "loss": 0.5746850967407227, "step": 8180 }, { "epoch": 1.3075201790138256, "grad_norm": 1.3830362663193319, "learning_rate": 5.872528522258478e-06, "loss": 0.5813469886779785, "step": 8181 }, { "epoch": 1.3076800127867019, "grad_norm": 1.5348315047313381, "learning_rate": 5.87012161224489e-06, "loss": 0.589766800403595, "step": 8182 }, { "epoch": 1.307839846559578, "grad_norm": 1.4140203718486366, "learning_rate": 5.867714990647348e-06, "loss": 0.46170955896377563, "step": 8183 }, { "epoch": 1.3079996803324543, "grad_norm": 1.6171279312940123, "learning_rate": 5.865308657633914e-06, "loss": 0.6106960773468018, "step": 8184 }, { "epoch": 1.3081595141053304, "grad_norm": 1.5821328113953064, "learning_rate": 5.862902613372645e-06, "loss": 0.5556931495666504, "step": 8185 }, { "epoch": 1.3083193478782067, "grad_norm": 1.66825033761507, "learning_rate": 5.860496858031569e-06, "loss": 0.5432741641998291, "step": 8186 }, { "epoch": 1.3084791816510828, "grad_norm": 1.4229272051781854, "learning_rate": 5.858091391778691e-06, "loss": 0.5560572147369385, "step": 8187 }, { "epoch": 1.308639015423959, "grad_norm": 1.5009919971963075, "learning_rate": 5.855686214782002e-06, "loss": 0.5319786071777344, "step": 8188 }, { "epoch": 1.3087988491968354, "grad_norm": 1.4542603490435058, "learning_rate": 5.853281327209475e-06, "loss": 0.6467190384864807, "step": 8189 }, { "epoch": 1.3089586829697115, "grad_norm": 1.5848621903942528, "learning_rate": 5.850876729229055e-06, "loss": 0.5485496520996094, "step": 8190 }, { "epoch": 1.3091185167425876, "grad_norm": 1.5481032080773627, "learning_rate": 5.8484724210086726e-06, "loss": 0.5021206140518188, "step": 8191 }, { "epoch": 1.309278350515464, "grad_norm": 1.5408055742037607, "learning_rate": 5.846068402716234e-06, "loss": 0.6060940027236938, "step": 8192 }, { "epoch": 1.3094381842883402, "grad_norm": 1.2551529597137334, "learning_rate": 5.843664674519631e-06, "loss": 0.600982666015625, "step": 8193 }, { "epoch": 1.3095980180612163, "grad_norm": 1.5028507530796358, "learning_rate": 5.841261236586729e-06, "loss": 0.6582956314086914, "step": 8194 }, { "epoch": 1.3097578518340924, "grad_norm": 1.4585077869945808, "learning_rate": 5.838858089085373e-06, "loss": 0.46367329359054565, "step": 8195 }, { "epoch": 1.3099176856069688, "grad_norm": 1.6254707249318383, "learning_rate": 5.8364552321833955e-06, "loss": 0.6699297428131104, "step": 8196 }, { "epoch": 1.310077519379845, "grad_norm": 1.7490187287177943, "learning_rate": 5.8340526660486e-06, "loss": 0.6448116898536682, "step": 8197 }, { "epoch": 1.3102373531527212, "grad_norm": 1.2671160322208612, "learning_rate": 5.831650390848771e-06, "loss": 0.4866679310798645, "step": 8198 }, { "epoch": 1.3103971869255973, "grad_norm": 1.5203935562394248, "learning_rate": 5.82924840675168e-06, "loss": 0.5049277544021606, "step": 8199 }, { "epoch": 1.3105570206984736, "grad_norm": 1.3821501359400423, "learning_rate": 5.826846713925074e-06, "loss": 0.5544209480285645, "step": 8200 }, { "epoch": 1.31071685447135, "grad_norm": 1.7028696741199392, "learning_rate": 5.824445312536673e-06, "loss": 0.6066251397132874, "step": 8201 }, { "epoch": 1.310876688244226, "grad_norm": 1.3639527640091829, "learning_rate": 5.822044202754187e-06, "loss": 0.5094658136367798, "step": 8202 }, { "epoch": 1.311036522017102, "grad_norm": 1.5281555328571468, "learning_rate": 5.819643384745292e-06, "loss": 0.5407668352127075, "step": 8203 }, { "epoch": 1.3111963557899784, "grad_norm": 1.44556799429131, "learning_rate": 5.817242858677666e-06, "loss": 0.626220166683197, "step": 8204 }, { "epoch": 1.3113561895628547, "grad_norm": 1.675080447217429, "learning_rate": 5.814842624718946e-06, "loss": 0.5220004320144653, "step": 8205 }, { "epoch": 1.3115160233357308, "grad_norm": 1.3836632588014133, "learning_rate": 5.812442683036757e-06, "loss": 0.5765817165374756, "step": 8206 }, { "epoch": 1.3116758571086071, "grad_norm": 1.8673287392718052, "learning_rate": 5.810043033798702e-06, "loss": 0.525654137134552, "step": 8207 }, { "epoch": 1.3118356908814832, "grad_norm": 1.5680910437209223, "learning_rate": 5.80764367717236e-06, "loss": 0.5557483434677124, "step": 8208 }, { "epoch": 1.3119955246543595, "grad_norm": 2.750183471653673, "learning_rate": 5.805244613325299e-06, "loss": 0.5505327582359314, "step": 8209 }, { "epoch": 1.3121553584272356, "grad_norm": 1.579896703467051, "learning_rate": 5.80284584242506e-06, "loss": 0.6597088575363159, "step": 8210 }, { "epoch": 1.312315192200112, "grad_norm": 1.4596562663567265, "learning_rate": 5.800447364639167e-06, "loss": 0.5856539011001587, "step": 8211 }, { "epoch": 1.312475025972988, "grad_norm": 1.4377942928967644, "learning_rate": 5.7980491801351165e-06, "loss": 0.6382673978805542, "step": 8212 }, { "epoch": 1.3126348597458644, "grad_norm": 1.2896218122797196, "learning_rate": 5.795651289080389e-06, "loss": 0.47848618030548096, "step": 8213 }, { "epoch": 1.3127946935187405, "grad_norm": 1.4907392661196457, "learning_rate": 5.793253691642451e-06, "loss": 0.588350772857666, "step": 8214 }, { "epoch": 1.3129545272916168, "grad_norm": 1.5775307198691635, "learning_rate": 5.790856387988737e-06, "loss": 0.5090786218643188, "step": 8215 }, { "epoch": 1.3131143610644929, "grad_norm": 1.4910594172861065, "learning_rate": 5.7884593782866685e-06, "loss": 0.573388397693634, "step": 8216 }, { "epoch": 1.3132741948373692, "grad_norm": 1.5249760594111352, "learning_rate": 5.786062662703641e-06, "loss": 0.6561048030853271, "step": 8217 }, { "epoch": 1.3134340286102453, "grad_norm": 1.2948284537724215, "learning_rate": 5.783666241407031e-06, "loss": 0.495551735162735, "step": 8218 }, { "epoch": 1.3135938623831216, "grad_norm": 1.5762049202018642, "learning_rate": 5.781270114564203e-06, "loss": 0.5861350893974304, "step": 8219 }, { "epoch": 1.3137536961559977, "grad_norm": 1.371092718555462, "learning_rate": 5.778874282342492e-06, "loss": 0.5718671679496765, "step": 8220 }, { "epoch": 1.313913529928874, "grad_norm": 1.5379849451687306, "learning_rate": 5.776478744909213e-06, "loss": 0.5988597869873047, "step": 8221 }, { "epoch": 1.31407336370175, "grad_norm": 1.3666593978903003, "learning_rate": 5.77408350243166e-06, "loss": 0.4945909380912781, "step": 8222 }, { "epoch": 1.3142331974746264, "grad_norm": 1.5091601636741774, "learning_rate": 5.771688555077108e-06, "loss": 0.6208739280700684, "step": 8223 }, { "epoch": 1.3143930312475027, "grad_norm": 1.6819422096075514, "learning_rate": 5.769293903012816e-06, "loss": 0.5161570310592651, "step": 8224 }, { "epoch": 1.3145528650203788, "grad_norm": 1.6875481970393218, "learning_rate": 5.766899546406017e-06, "loss": 0.6096788644790649, "step": 8225 }, { "epoch": 1.314712698793255, "grad_norm": 1.6881521152245187, "learning_rate": 5.764505485423921e-06, "loss": 0.5448685884475708, "step": 8226 }, { "epoch": 1.3148725325661312, "grad_norm": 1.4851170197430172, "learning_rate": 5.7621117202337205e-06, "loss": 0.6592206954956055, "step": 8227 }, { "epoch": 1.3150323663390076, "grad_norm": 1.520536985229188, "learning_rate": 5.759718251002588e-06, "loss": 0.6994221210479736, "step": 8228 }, { "epoch": 1.3151922001118836, "grad_norm": 1.4150640340296443, "learning_rate": 5.757325077897683e-06, "loss": 0.5047873854637146, "step": 8229 }, { "epoch": 1.3153520338847597, "grad_norm": 1.6710764882202243, "learning_rate": 5.754932201086129e-06, "loss": 0.5898416042327881, "step": 8230 }, { "epoch": 1.315511867657636, "grad_norm": 1.5798456311150626, "learning_rate": 5.752539620735036e-06, "loss": 0.592679500579834, "step": 8231 }, { "epoch": 1.3156717014305124, "grad_norm": 1.9717066622452657, "learning_rate": 5.7501473370114945e-06, "loss": 0.5964464545249939, "step": 8232 }, { "epoch": 1.3158315352033885, "grad_norm": 1.5546234095789055, "learning_rate": 5.747755350082569e-06, "loss": 0.5404235124588013, "step": 8233 }, { "epoch": 1.3159913689762646, "grad_norm": 1.7023001880272313, "learning_rate": 5.745363660115315e-06, "loss": 0.5550072193145752, "step": 8234 }, { "epoch": 1.3161512027491409, "grad_norm": 1.329092399343752, "learning_rate": 5.742972267276756e-06, "loss": 0.5828782320022583, "step": 8235 }, { "epoch": 1.3163110365220172, "grad_norm": 1.6058471386188342, "learning_rate": 5.7405811717338974e-06, "loss": 0.563485860824585, "step": 8236 }, { "epoch": 1.3164708702948933, "grad_norm": 1.487269672540393, "learning_rate": 5.738190373653727e-06, "loss": 0.5332260727882385, "step": 8237 }, { "epoch": 1.3166307040677694, "grad_norm": 1.5746345101999237, "learning_rate": 5.735799873203202e-06, "loss": 0.5353889465332031, "step": 8238 }, { "epoch": 1.3167905378406457, "grad_norm": 1.6563460350037158, "learning_rate": 5.7334096705492794e-06, "loss": 0.5286833047866821, "step": 8239 }, { "epoch": 1.316950371613522, "grad_norm": 1.3944128231728454, "learning_rate": 5.731019765858874e-06, "loss": 0.4678359031677246, "step": 8240 }, { "epoch": 1.3171102053863981, "grad_norm": 1.4925238586591703, "learning_rate": 5.728630159298891e-06, "loss": 0.621925950050354, "step": 8241 }, { "epoch": 1.3172700391592744, "grad_norm": 1.6755259122195827, "learning_rate": 5.726240851036207e-06, "loss": 0.539188802242279, "step": 8242 }, { "epoch": 1.3174298729321505, "grad_norm": 1.5033664996666183, "learning_rate": 5.723851841237691e-06, "loss": 0.5999862551689148, "step": 8243 }, { "epoch": 1.3175897067050268, "grad_norm": 1.6587682854774315, "learning_rate": 5.721463130070178e-06, "loss": 0.5214466452598572, "step": 8244 }, { "epoch": 1.317749540477903, "grad_norm": 1.6378449885014352, "learning_rate": 5.719074717700488e-06, "loss": 0.6051288843154907, "step": 8245 }, { "epoch": 1.3179093742507793, "grad_norm": 1.3464838964517778, "learning_rate": 5.71668660429542e-06, "loss": 0.48365870118141174, "step": 8246 }, { "epoch": 1.3180692080236553, "grad_norm": 1.7366522246451437, "learning_rate": 5.7142987900217464e-06, "loss": 0.6076208353042603, "step": 8247 }, { "epoch": 1.3182290417965317, "grad_norm": 1.5463439882355545, "learning_rate": 5.711911275046227e-06, "loss": 0.5273228883743286, "step": 8248 }, { "epoch": 1.3183888755694078, "grad_norm": 1.5683968031721935, "learning_rate": 5.709524059535603e-06, "loss": 0.49535971879959106, "step": 8249 }, { "epoch": 1.318548709342284, "grad_norm": 1.4799862487863444, "learning_rate": 5.707137143656582e-06, "loss": 0.5161316990852356, "step": 8250 }, { "epoch": 1.3187085431151602, "grad_norm": 1.410697993283718, "learning_rate": 5.704750527575863e-06, "loss": 0.44986748695373535, "step": 8251 }, { "epoch": 1.3188683768880365, "grad_norm": 1.3055859048233345, "learning_rate": 5.702364211460114e-06, "loss": 0.490278959274292, "step": 8252 }, { "epoch": 1.3190282106609126, "grad_norm": 1.732515566321407, "learning_rate": 5.699978195475986e-06, "loss": 0.5370404720306396, "step": 8253 }, { "epoch": 1.319188044433789, "grad_norm": 1.6409654902917996, "learning_rate": 5.697592479790116e-06, "loss": 0.5100312232971191, "step": 8254 }, { "epoch": 1.319347878206665, "grad_norm": 1.7228352329174839, "learning_rate": 5.695207064569111e-06, "loss": 0.526006817817688, "step": 8255 }, { "epoch": 1.3195077119795413, "grad_norm": 1.6000946876990643, "learning_rate": 5.692821949979557e-06, "loss": 0.5684828758239746, "step": 8256 }, { "epoch": 1.3196675457524174, "grad_norm": 1.5393991299500214, "learning_rate": 5.6904371361880225e-06, "loss": 0.637182354927063, "step": 8257 }, { "epoch": 1.3198273795252937, "grad_norm": 1.5795754593922693, "learning_rate": 5.688052623361059e-06, "loss": 0.5923646092414856, "step": 8258 }, { "epoch": 1.31998721329817, "grad_norm": 1.3083550817706742, "learning_rate": 5.6856684116651906e-06, "loss": 0.4692627787590027, "step": 8259 }, { "epoch": 1.3201470470710461, "grad_norm": 1.6438808375635654, "learning_rate": 5.68328450126692e-06, "loss": 0.5760710835456848, "step": 8260 }, { "epoch": 1.3203068808439222, "grad_norm": 1.7349537435116513, "learning_rate": 5.680900892332733e-06, "loss": 0.5660613179206848, "step": 8261 }, { "epoch": 1.3204667146167985, "grad_norm": 1.4005683521859966, "learning_rate": 5.678517585029088e-06, "loss": 0.5762643814086914, "step": 8262 }, { "epoch": 1.3206265483896749, "grad_norm": 1.642075664728474, "learning_rate": 5.676134579522433e-06, "loss": 0.5909575819969177, "step": 8263 }, { "epoch": 1.320786382162551, "grad_norm": 1.6271923237461348, "learning_rate": 5.673751875979187e-06, "loss": 0.5629652738571167, "step": 8264 }, { "epoch": 1.320946215935427, "grad_norm": 1.3544866904698325, "learning_rate": 5.671369474565747e-06, "loss": 0.5222570896148682, "step": 8265 }, { "epoch": 1.3211060497083034, "grad_norm": 1.3874144478686314, "learning_rate": 5.668987375448496e-06, "loss": 0.5000051856040955, "step": 8266 }, { "epoch": 1.3212658834811797, "grad_norm": 1.5981551962794023, "learning_rate": 5.666605578793782e-06, "loss": 0.43974772095680237, "step": 8267 }, { "epoch": 1.3214257172540558, "grad_norm": 1.6674613665330056, "learning_rate": 5.664224084767948e-06, "loss": 0.7444421052932739, "step": 8268 }, { "epoch": 1.3215855510269319, "grad_norm": 1.6179328920614005, "learning_rate": 5.661842893537313e-06, "loss": 0.653728723526001, "step": 8269 }, { "epoch": 1.3217453847998082, "grad_norm": 1.5304683075337264, "learning_rate": 5.659462005268166e-06, "loss": 0.5747655034065247, "step": 8270 }, { "epoch": 1.3219052185726845, "grad_norm": 1.4127700732370583, "learning_rate": 5.65708142012678e-06, "loss": 0.5507832765579224, "step": 8271 }, { "epoch": 1.3220650523455606, "grad_norm": 1.283206922680177, "learning_rate": 5.654701138279408e-06, "loss": 0.5800594091415405, "step": 8272 }, { "epoch": 1.3222248861184367, "grad_norm": 1.4524682792349823, "learning_rate": 5.652321159892276e-06, "loss": 0.5112971663475037, "step": 8273 }, { "epoch": 1.322384719891313, "grad_norm": 1.6115522991323745, "learning_rate": 5.649941485131598e-06, "loss": 0.6015325784683228, "step": 8274 }, { "epoch": 1.3225445536641893, "grad_norm": 1.4788570716090044, "learning_rate": 5.647562114163562e-06, "loss": 0.5420348644256592, "step": 8275 }, { "epoch": 1.3227043874370654, "grad_norm": 1.3594990815149002, "learning_rate": 5.645183047154334e-06, "loss": 0.5700510144233704, "step": 8276 }, { "epoch": 1.3228642212099417, "grad_norm": 1.485520155493522, "learning_rate": 5.6428042842700556e-06, "loss": 0.6034226417541504, "step": 8277 }, { "epoch": 1.3230240549828178, "grad_norm": 1.3809414824946837, "learning_rate": 5.6404258256768575e-06, "loss": 0.4271804392337799, "step": 8278 }, { "epoch": 1.3231838887556941, "grad_norm": 1.6586163809327592, "learning_rate": 5.6380476715408406e-06, "loss": 0.7169289588928223, "step": 8279 }, { "epoch": 1.3233437225285702, "grad_norm": 1.6544586721502288, "learning_rate": 5.635669822028086e-06, "loss": 0.43537187576293945, "step": 8280 }, { "epoch": 1.3235035563014466, "grad_norm": 1.2046263189371647, "learning_rate": 5.633292277304654e-06, "loss": 0.4338614344596863, "step": 8281 }, { "epoch": 1.3236633900743227, "grad_norm": 1.4283489195708354, "learning_rate": 5.630915037536579e-06, "loss": 0.5291532278060913, "step": 8282 }, { "epoch": 1.323823223847199, "grad_norm": 1.3176928749797712, "learning_rate": 5.628538102889891e-06, "loss": 0.5784658789634705, "step": 8283 }, { "epoch": 1.323983057620075, "grad_norm": 1.6238714369083265, "learning_rate": 5.626161473530578e-06, "loss": 0.5649932026863098, "step": 8284 }, { "epoch": 1.3241428913929514, "grad_norm": 1.6122325610762949, "learning_rate": 5.623785149624619e-06, "loss": 0.5913768410682678, "step": 8285 }, { "epoch": 1.3243027251658275, "grad_norm": 1.3157393769013228, "learning_rate": 5.621409131337964e-06, "loss": 0.4551526606082916, "step": 8286 }, { "epoch": 1.3244625589387038, "grad_norm": 1.4992245519853347, "learning_rate": 5.619033418836545e-06, "loss": 0.6106743812561035, "step": 8287 }, { "epoch": 1.3246223927115799, "grad_norm": 1.7376491396750735, "learning_rate": 5.616658012286276e-06, "loss": 0.44851887226104736, "step": 8288 }, { "epoch": 1.3247822264844562, "grad_norm": 1.3141908925498307, "learning_rate": 5.614282911853052e-06, "loss": 0.5113207101821899, "step": 8289 }, { "epoch": 1.3249420602573323, "grad_norm": 1.6276727687818155, "learning_rate": 5.611908117702736e-06, "loss": 0.5992264151573181, "step": 8290 }, { "epoch": 1.3251018940302086, "grad_norm": 1.3374197210130065, "learning_rate": 5.609533630001176e-06, "loss": 0.41525161266326904, "step": 8291 }, { "epoch": 1.3252617278030847, "grad_norm": 1.759142447455563, "learning_rate": 5.607159448914195e-06, "loss": 0.5436744093894958, "step": 8292 }, { "epoch": 1.325421561575961, "grad_norm": 1.5346132767909872, "learning_rate": 5.604785574607603e-06, "loss": 0.6779544949531555, "step": 8293 }, { "epoch": 1.3255813953488373, "grad_norm": 1.789685893221027, "learning_rate": 5.60241200724718e-06, "loss": 0.6905895471572876, "step": 8294 }, { "epoch": 1.3257412291217134, "grad_norm": 1.551015711611212, "learning_rate": 5.600038746998689e-06, "loss": 0.4640052914619446, "step": 8295 }, { "epoch": 1.3259010628945895, "grad_norm": 1.6943881940422298, "learning_rate": 5.5976657940278665e-06, "loss": 0.5533930063247681, "step": 8296 }, { "epoch": 1.3260608966674658, "grad_norm": 1.4572022221790404, "learning_rate": 5.595293148500431e-06, "loss": 0.49226847290992737, "step": 8297 }, { "epoch": 1.3262207304403422, "grad_norm": 1.5887977278994556, "learning_rate": 5.5929208105820854e-06, "loss": 0.6410150527954102, "step": 8298 }, { "epoch": 1.3263805642132183, "grad_norm": 1.9031444613799082, "learning_rate": 5.590548780438502e-06, "loss": 0.5752484798431396, "step": 8299 }, { "epoch": 1.3265403979860944, "grad_norm": 1.522646472571582, "learning_rate": 5.588177058235334e-06, "loss": 0.5886540412902832, "step": 8300 }, { "epoch": 1.3267002317589707, "grad_norm": 1.999291215847726, "learning_rate": 5.585805644138216e-06, "loss": 0.6245059967041016, "step": 8301 }, { "epoch": 1.326860065531847, "grad_norm": 1.5520767161069162, "learning_rate": 5.5834345383127545e-06, "loss": 0.5477369427680969, "step": 8302 }, { "epoch": 1.327019899304723, "grad_norm": 1.515873879954347, "learning_rate": 5.581063740924546e-06, "loss": 0.5673725605010986, "step": 8303 }, { "epoch": 1.3271797330775992, "grad_norm": 1.6859947000121058, "learning_rate": 5.578693252139156e-06, "loss": 0.5223235487937927, "step": 8304 }, { "epoch": 1.3273395668504755, "grad_norm": 1.4310751696897819, "learning_rate": 5.57632307212213e-06, "loss": 0.44606778025627136, "step": 8305 }, { "epoch": 1.3274994006233518, "grad_norm": 1.6439745621577668, "learning_rate": 5.573953201038988e-06, "loss": 0.4888993203639984, "step": 8306 }, { "epoch": 1.327659234396228, "grad_norm": 1.5832463455226289, "learning_rate": 5.571583639055243e-06, "loss": 0.6331450939178467, "step": 8307 }, { "epoch": 1.327819068169104, "grad_norm": 1.5935497366333036, "learning_rate": 5.569214386336373e-06, "loss": 0.5506129264831543, "step": 8308 }, { "epoch": 1.3279789019419803, "grad_norm": 1.885867557455777, "learning_rate": 5.566845443047833e-06, "loss": 0.6261000037193298, "step": 8309 }, { "epoch": 1.3281387357148566, "grad_norm": 1.6475755483022871, "learning_rate": 5.564476809355072e-06, "loss": 0.5504906177520752, "step": 8310 }, { "epoch": 1.3282985694877327, "grad_norm": 1.7160974944442982, "learning_rate": 5.5621084854234985e-06, "loss": 0.5963953733444214, "step": 8311 }, { "epoch": 1.328458403260609, "grad_norm": 1.434748408495278, "learning_rate": 5.559740471418509e-06, "loss": 0.6231112480163574, "step": 8312 }, { "epoch": 1.3286182370334851, "grad_norm": 1.667429399220962, "learning_rate": 5.557372767505481e-06, "loss": 0.5024057626724243, "step": 8313 }, { "epoch": 1.3287780708063615, "grad_norm": 1.5783774258581016, "learning_rate": 5.555005373849765e-06, "loss": 0.46099206805229187, "step": 8314 }, { "epoch": 1.3289379045792375, "grad_norm": 1.7903439140670565, "learning_rate": 5.55263829061669e-06, "loss": 0.5514940023422241, "step": 8315 }, { "epoch": 1.3290977383521139, "grad_norm": 1.75902194509914, "learning_rate": 5.550271517971566e-06, "loss": 0.4996083378791809, "step": 8316 }, { "epoch": 1.32925757212499, "grad_norm": 1.4111581754049503, "learning_rate": 5.547905056079674e-06, "loss": 0.4135870337486267, "step": 8317 }, { "epoch": 1.3294174058978663, "grad_norm": 1.5187522689084454, "learning_rate": 5.5455389051062904e-06, "loss": 0.5187137126922607, "step": 8318 }, { "epoch": 1.3295772396707424, "grad_norm": 1.5242626402250548, "learning_rate": 5.543173065216651e-06, "loss": 0.4966793358325958, "step": 8319 }, { "epoch": 1.3297370734436187, "grad_norm": 1.5562208073927526, "learning_rate": 5.540807536575981e-06, "loss": 0.5510865449905396, "step": 8320 }, { "epoch": 1.3298969072164948, "grad_norm": 1.580440956303095, "learning_rate": 5.538442319349475e-06, "loss": 0.5195344686508179, "step": 8321 }, { "epoch": 1.330056740989371, "grad_norm": 1.573541246596268, "learning_rate": 5.53607741370232e-06, "loss": 0.4143067002296448, "step": 8322 }, { "epoch": 1.3302165747622472, "grad_norm": 1.5742926891587168, "learning_rate": 5.533712819799667e-06, "loss": 0.6342053413391113, "step": 8323 }, { "epoch": 1.3303764085351235, "grad_norm": 1.631478772432523, "learning_rate": 5.5313485378066524e-06, "loss": 0.524894654750824, "step": 8324 }, { "epoch": 1.3305362423079996, "grad_norm": 1.4504350881324977, "learning_rate": 5.528984567888389e-06, "loss": 0.49792855978012085, "step": 8325 }, { "epoch": 1.330696076080876, "grad_norm": 1.7299406227182328, "learning_rate": 5.526620910209963e-06, "loss": 0.6291788220405579, "step": 8326 }, { "epoch": 1.330855909853752, "grad_norm": 1.6854509416359569, "learning_rate": 5.524257564936454e-06, "loss": 0.5774040222167969, "step": 8327 }, { "epoch": 1.3310157436266283, "grad_norm": 1.4932622382268685, "learning_rate": 5.521894532232903e-06, "loss": 0.5290566086769104, "step": 8328 }, { "epoch": 1.3311755773995047, "grad_norm": 1.6236566727488295, "learning_rate": 5.519531812264334e-06, "loss": 0.4643417298793793, "step": 8329 }, { "epoch": 1.3313354111723807, "grad_norm": 1.367394593977875, "learning_rate": 5.517169405195759e-06, "loss": 0.5443735122680664, "step": 8330 }, { "epoch": 1.3314952449452568, "grad_norm": 1.5216302495775458, "learning_rate": 5.514807311192154e-06, "loss": 0.5297644138336182, "step": 8331 }, { "epoch": 1.3316550787181332, "grad_norm": 1.456005724280809, "learning_rate": 5.512445530418478e-06, "loss": 0.6144634485244751, "step": 8332 }, { "epoch": 1.3318149124910095, "grad_norm": 1.5089232744491647, "learning_rate": 5.510084063039674e-06, "loss": 0.5716589093208313, "step": 8333 }, { "epoch": 1.3319747462638856, "grad_norm": 1.6756991461905424, "learning_rate": 5.507722909220658e-06, "loss": 0.5687721967697144, "step": 8334 }, { "epoch": 1.3321345800367617, "grad_norm": 1.5857103148770428, "learning_rate": 5.505362069126321e-06, "loss": 0.46318894624710083, "step": 8335 }, { "epoch": 1.332294413809638, "grad_norm": 1.570951394332571, "learning_rate": 5.50300154292154e-06, "loss": 0.535705029964447, "step": 8336 }, { "epoch": 1.3324542475825143, "grad_norm": 1.4103318800690334, "learning_rate": 5.500641330771159e-06, "loss": 0.5984888076782227, "step": 8337 }, { "epoch": 1.3326140813553904, "grad_norm": 1.5510027454189679, "learning_rate": 5.498281432840014e-06, "loss": 0.5439463257789612, "step": 8338 }, { "epoch": 1.3327739151282665, "grad_norm": 1.8123726670231428, "learning_rate": 5.495921849292911e-06, "loss": 0.610889732837677, "step": 8339 }, { "epoch": 1.3329337489011428, "grad_norm": 1.704326757962566, "learning_rate": 5.4935625802946315e-06, "loss": 0.6894976496696472, "step": 8340 }, { "epoch": 1.3330935826740191, "grad_norm": 1.824739020117956, "learning_rate": 5.491203626009938e-06, "loss": 0.58765709400177, "step": 8341 }, { "epoch": 1.3332534164468952, "grad_norm": 1.6898726258704102, "learning_rate": 5.488844986603576e-06, "loss": 0.5958683490753174, "step": 8342 }, { "epoch": 1.3334132502197713, "grad_norm": 1.4954080515780126, "learning_rate": 5.486486662240263e-06, "loss": 0.411568820476532, "step": 8343 }, { "epoch": 1.3335730839926476, "grad_norm": 1.430721971527759, "learning_rate": 5.484128653084695e-06, "loss": 0.5521371364593506, "step": 8344 }, { "epoch": 1.333732917765524, "grad_norm": 1.411918549349799, "learning_rate": 5.481770959301547e-06, "loss": 0.5628967881202698, "step": 8345 }, { "epoch": 1.3338927515384, "grad_norm": 1.832412203699029, "learning_rate": 5.4794135810554676e-06, "loss": 0.5799576044082642, "step": 8346 }, { "epoch": 1.3340525853112764, "grad_norm": 1.7279284303838125, "learning_rate": 5.477056518511096e-06, "loss": 0.5658389925956726, "step": 8347 }, { "epoch": 1.3342124190841524, "grad_norm": 1.6159398735802228, "learning_rate": 5.474699771833038e-06, "loss": 0.5564031004905701, "step": 8348 }, { "epoch": 1.3343722528570288, "grad_norm": 1.6943589324184571, "learning_rate": 5.472343341185875e-06, "loss": 0.583571195602417, "step": 8349 }, { "epoch": 1.3345320866299049, "grad_norm": 1.7623953200683935, "learning_rate": 5.469987226734181e-06, "loss": 0.5065886974334717, "step": 8350 }, { "epoch": 1.3346919204027812, "grad_norm": 1.5754755058844607, "learning_rate": 5.467631428642494e-06, "loss": 0.717795729637146, "step": 8351 }, { "epoch": 1.3348517541756573, "grad_norm": 1.5467975866248387, "learning_rate": 5.46527594707533e-06, "loss": 0.5944696068763733, "step": 8352 }, { "epoch": 1.3350115879485336, "grad_norm": 1.2832409056853487, "learning_rate": 5.462920782197198e-06, "loss": 0.4746004045009613, "step": 8353 }, { "epoch": 1.3351714217214097, "grad_norm": 1.7306327530608352, "learning_rate": 5.460565934172567e-06, "loss": 0.7798213958740234, "step": 8354 }, { "epoch": 1.335331255494286, "grad_norm": 1.7388303788037935, "learning_rate": 5.458211403165893e-06, "loss": 0.627055287361145, "step": 8355 }, { "epoch": 1.335491089267162, "grad_norm": 1.5076347516259931, "learning_rate": 5.4558571893416035e-06, "loss": 0.6353896856307983, "step": 8356 }, { "epoch": 1.3356509230400384, "grad_norm": 1.4802530225687696, "learning_rate": 5.453503292864118e-06, "loss": 0.5254738926887512, "step": 8357 }, { "epoch": 1.3358107568129145, "grad_norm": 1.4507434092163367, "learning_rate": 5.451149713897817e-06, "loss": 0.558953046798706, "step": 8358 }, { "epoch": 1.3359705905857908, "grad_norm": 1.2825936546639114, "learning_rate": 5.44879645260707e-06, "loss": 0.48663508892059326, "step": 8359 }, { "epoch": 1.336130424358667, "grad_norm": 1.6067457211075844, "learning_rate": 5.446443509156218e-06, "loss": 0.47315284609794617, "step": 8360 }, { "epoch": 1.3362902581315432, "grad_norm": 1.5184106072683805, "learning_rate": 5.444090883709578e-06, "loss": 0.4240816831588745, "step": 8361 }, { "epoch": 1.3364500919044193, "grad_norm": 1.7287249979813022, "learning_rate": 5.4417385764314575e-06, "loss": 0.63267982006073, "step": 8362 }, { "epoch": 1.3366099256772956, "grad_norm": 1.510212975835052, "learning_rate": 5.4393865874861305e-06, "loss": 0.48205944895744324, "step": 8363 }, { "epoch": 1.336769759450172, "grad_norm": 1.3851364663051189, "learning_rate": 5.4370349170378515e-06, "loss": 0.5754109621047974, "step": 8364 }, { "epoch": 1.336929593223048, "grad_norm": 1.5017492928856346, "learning_rate": 5.43468356525085e-06, "loss": 0.48200520873069763, "step": 8365 }, { "epoch": 1.3370894269959241, "grad_norm": 1.5085904999591666, "learning_rate": 5.432332532289335e-06, "loss": 0.5981442928314209, "step": 8366 }, { "epoch": 1.3372492607688005, "grad_norm": 1.4690591631051004, "learning_rate": 5.4299818183175006e-06, "loss": 0.41644832491874695, "step": 8367 }, { "epoch": 1.3374090945416768, "grad_norm": 1.577223415532251, "learning_rate": 5.4276314234995085e-06, "loss": 0.5063574314117432, "step": 8368 }, { "epoch": 1.3375689283145529, "grad_norm": 1.6073903025123977, "learning_rate": 5.4252813479995e-06, "loss": 0.6363928318023682, "step": 8369 }, { "epoch": 1.337728762087429, "grad_norm": 1.6161203506494923, "learning_rate": 5.422931591981602e-06, "loss": 0.6073437929153442, "step": 8370 }, { "epoch": 1.3378885958603053, "grad_norm": 1.4874228063949295, "learning_rate": 5.420582155609906e-06, "loss": 0.49832087755203247, "step": 8371 }, { "epoch": 1.3380484296331816, "grad_norm": 1.9511010724770583, "learning_rate": 5.418233039048495e-06, "loss": 0.48629945516586304, "step": 8372 }, { "epoch": 1.3382082634060577, "grad_norm": 1.4672449344325995, "learning_rate": 5.41588424246142e-06, "loss": 0.5458687543869019, "step": 8373 }, { "epoch": 1.3383680971789338, "grad_norm": 1.348035144587453, "learning_rate": 5.413535766012712e-06, "loss": 0.5196806192398071, "step": 8374 }, { "epoch": 1.33852793095181, "grad_norm": 1.6282576234950552, "learning_rate": 5.411187609866381e-06, "loss": 0.4867172837257385, "step": 8375 }, { "epoch": 1.3386877647246864, "grad_norm": 1.7153074494126033, "learning_rate": 5.408839774186409e-06, "loss": 0.6156926155090332, "step": 8376 }, { "epoch": 1.3388475984975625, "grad_norm": 1.7215949942590048, "learning_rate": 5.406492259136771e-06, "loss": 0.6482675075531006, "step": 8377 }, { "epoch": 1.3390074322704386, "grad_norm": 1.6205704435236845, "learning_rate": 5.404145064881401e-06, "loss": 0.6390836238861084, "step": 8378 }, { "epoch": 1.339167266043315, "grad_norm": 1.3688757554696658, "learning_rate": 5.401798191584222e-06, "loss": 0.4506390690803528, "step": 8379 }, { "epoch": 1.3393270998161912, "grad_norm": 1.73139401756476, "learning_rate": 5.39945163940913e-06, "loss": 0.5927061438560486, "step": 8380 }, { "epoch": 1.3394869335890673, "grad_norm": 1.745043946560122, "learning_rate": 5.397105408519996e-06, "loss": 0.5299296379089355, "step": 8381 }, { "epoch": 1.3396467673619437, "grad_norm": 1.5216339088232027, "learning_rate": 5.394759499080679e-06, "loss": 0.6195117235183716, "step": 8382 }, { "epoch": 1.3398066011348198, "grad_norm": 1.4654418363514212, "learning_rate": 5.392413911255009e-06, "loss": 0.5481676459312439, "step": 8383 }, { "epoch": 1.339966434907696, "grad_norm": 1.5590579116599146, "learning_rate": 5.3900686452067895e-06, "loss": 0.6323949694633484, "step": 8384 }, { "epoch": 1.3401262686805722, "grad_norm": 1.3655712620505687, "learning_rate": 5.387723701099804e-06, "loss": 0.4594362676143646, "step": 8385 }, { "epoch": 1.3402861024534485, "grad_norm": 1.716201001449873, "learning_rate": 5.3853790790978215e-06, "loss": 0.6177096366882324, "step": 8386 }, { "epoch": 1.3404459362263246, "grad_norm": 1.5195814066022102, "learning_rate": 5.38303477936458e-06, "loss": 0.4553108215332031, "step": 8387 }, { "epoch": 1.340605769999201, "grad_norm": 1.404874888403692, "learning_rate": 5.380690802063794e-06, "loss": 0.5099785327911377, "step": 8388 }, { "epoch": 1.340765603772077, "grad_norm": 1.401466731737766, "learning_rate": 5.378347147359158e-06, "loss": 0.5230342745780945, "step": 8389 }, { "epoch": 1.3409254375449533, "grad_norm": 1.7512221533088643, "learning_rate": 5.376003815414349e-06, "loss": 0.6876116991043091, "step": 8390 }, { "epoch": 1.3410852713178294, "grad_norm": 1.6193302106934309, "learning_rate": 5.373660806393012e-06, "loss": 0.47197622060775757, "step": 8391 }, { "epoch": 1.3412451050907057, "grad_norm": 1.5994133688032774, "learning_rate": 5.37131812045878e-06, "loss": 0.5752505660057068, "step": 8392 }, { "epoch": 1.3414049388635818, "grad_norm": 1.4727949020689848, "learning_rate": 5.368975757775257e-06, "loss": 0.5607613921165466, "step": 8393 }, { "epoch": 1.3415647726364581, "grad_norm": 1.490915827106969, "learning_rate": 5.366633718506023e-06, "loss": 0.5659021139144897, "step": 8394 }, { "epoch": 1.3417246064093342, "grad_norm": 1.790697411886158, "learning_rate": 5.364292002814637e-06, "loss": 0.6169384121894836, "step": 8395 }, { "epoch": 1.3418844401822105, "grad_norm": 1.4514661890106098, "learning_rate": 5.361950610864635e-06, "loss": 0.5710971355438232, "step": 8396 }, { "epoch": 1.3420442739550866, "grad_norm": 1.5263391326901579, "learning_rate": 5.359609542819535e-06, "loss": 0.5233010053634644, "step": 8397 }, { "epoch": 1.342204107727963, "grad_norm": 1.7204287929575541, "learning_rate": 5.357268798842829e-06, "loss": 0.6233644485473633, "step": 8398 }, { "epoch": 1.3423639415008393, "grad_norm": 1.3625294144424087, "learning_rate": 5.3549283790979855e-06, "loss": 0.4837447702884674, "step": 8399 }, { "epoch": 1.3425237752737154, "grad_norm": 1.517364474254668, "learning_rate": 5.35258828374845e-06, "loss": 0.596089243888855, "step": 8400 }, { "epoch": 1.3426836090465915, "grad_norm": 1.5399356295746567, "learning_rate": 5.350248512957643e-06, "loss": 0.4696950316429138, "step": 8401 }, { "epoch": 1.3428434428194678, "grad_norm": 1.7650937335737127, "learning_rate": 5.347909066888974e-06, "loss": 0.5396099090576172, "step": 8402 }, { "epoch": 1.343003276592344, "grad_norm": 1.578839999555509, "learning_rate": 5.345569945705817e-06, "loss": 0.7417463660240173, "step": 8403 }, { "epoch": 1.3431631103652202, "grad_norm": 1.4575178187328397, "learning_rate": 5.343231149571528e-06, "loss": 0.4894796907901764, "step": 8404 }, { "epoch": 1.3433229441380963, "grad_norm": 1.788543713349716, "learning_rate": 5.340892678649439e-06, "loss": 0.5993387699127197, "step": 8405 }, { "epoch": 1.3434827779109726, "grad_norm": 1.4247852617517132, "learning_rate": 5.338554533102862e-06, "loss": 0.5722305178642273, "step": 8406 }, { "epoch": 1.343642611683849, "grad_norm": 1.4209962574505848, "learning_rate": 5.336216713095087e-06, "loss": 0.4454649090766907, "step": 8407 }, { "epoch": 1.343802445456725, "grad_norm": 1.6875901876689696, "learning_rate": 5.333879218789378e-06, "loss": 0.5588880777359009, "step": 8408 }, { "epoch": 1.343962279229601, "grad_norm": 1.8107215913967722, "learning_rate": 5.331542050348969e-06, "loss": 0.577934741973877, "step": 8409 }, { "epoch": 1.3441221130024774, "grad_norm": 1.4916202407179355, "learning_rate": 5.329205207937092e-06, "loss": 0.5085403919219971, "step": 8410 }, { "epoch": 1.3442819467753537, "grad_norm": 1.4200063525582627, "learning_rate": 5.326868691716934e-06, "loss": 0.5242087244987488, "step": 8411 }, { "epoch": 1.3444417805482298, "grad_norm": 1.6486598622816515, "learning_rate": 5.324532501851678e-06, "loss": 0.6031379699707031, "step": 8412 }, { "epoch": 1.344601614321106, "grad_norm": 1.6438754683518253, "learning_rate": 5.322196638504471e-06, "loss": 0.4636077880859375, "step": 8413 }, { "epoch": 1.3447614480939822, "grad_norm": 1.3612965893859685, "learning_rate": 5.31986110183844e-06, "loss": 0.573548436164856, "step": 8414 }, { "epoch": 1.3449212818668586, "grad_norm": 1.3892408910863376, "learning_rate": 5.317525892016693e-06, "loss": 0.44326338171958923, "step": 8415 }, { "epoch": 1.3450811156397346, "grad_norm": 1.4657366323803793, "learning_rate": 5.315191009202305e-06, "loss": 0.5865455269813538, "step": 8416 }, { "epoch": 1.345240949412611, "grad_norm": 1.6012392875525676, "learning_rate": 5.312856453558348e-06, "loss": 0.5065334439277649, "step": 8417 }, { "epoch": 1.345400783185487, "grad_norm": 1.8465140553580206, "learning_rate": 5.3105222252478536e-06, "loss": 0.5951703190803528, "step": 8418 }, { "epoch": 1.3455606169583634, "grad_norm": 1.5919974733113413, "learning_rate": 5.308188324433835e-06, "loss": 0.5733886361122131, "step": 8419 }, { "epoch": 1.3457204507312395, "grad_norm": 1.750115394300093, "learning_rate": 5.3058547512792805e-06, "loss": 0.6360588073730469, "step": 8420 }, { "epoch": 1.3458802845041158, "grad_norm": 1.483015700121319, "learning_rate": 5.303521505947167e-06, "loss": 0.5088040828704834, "step": 8421 }, { "epoch": 1.3460401182769919, "grad_norm": 1.4782129325039168, "learning_rate": 5.301188588600434e-06, "loss": 0.5739944577217102, "step": 8422 }, { "epoch": 1.3461999520498682, "grad_norm": 1.5173219027298355, "learning_rate": 5.298855999402008e-06, "loss": 0.7092975974082947, "step": 8423 }, { "epoch": 1.3463597858227443, "grad_norm": 1.3484790131202846, "learning_rate": 5.296523738514785e-06, "loss": 0.5270177125930786, "step": 8424 }, { "epoch": 1.3465196195956206, "grad_norm": 1.711775541201873, "learning_rate": 5.294191806101638e-06, "loss": 0.4510439932346344, "step": 8425 }, { "epoch": 1.3466794533684967, "grad_norm": 1.553729046012368, "learning_rate": 5.291860202325432e-06, "loss": 0.5406936407089233, "step": 8426 }, { "epoch": 1.346839287141373, "grad_norm": 1.6774407838296554, "learning_rate": 5.2895289273489915e-06, "loss": 0.5582761168479919, "step": 8427 }, { "epoch": 1.3469991209142491, "grad_norm": 1.6738594855231905, "learning_rate": 5.287197981335124e-06, "loss": 0.6225506067276001, "step": 8428 }, { "epoch": 1.3471589546871254, "grad_norm": 1.4853805670415754, "learning_rate": 5.284867364446613e-06, "loss": 0.563236653804779, "step": 8429 }, { "epoch": 1.3473187884600015, "grad_norm": 1.4859348858938923, "learning_rate": 5.282537076846226e-06, "loss": 0.5610422492027283, "step": 8430 }, { "epoch": 1.3474786222328778, "grad_norm": 1.57629578783335, "learning_rate": 5.280207118696695e-06, "loss": 0.5346118807792664, "step": 8431 }, { "epoch": 1.347638456005754, "grad_norm": 1.4637976137428468, "learning_rate": 5.277877490160744e-06, "loss": 0.5909982919692993, "step": 8432 }, { "epoch": 1.3477982897786303, "grad_norm": 1.5970449083626395, "learning_rate": 5.275548191401063e-06, "loss": 0.5129443407058716, "step": 8433 }, { "epoch": 1.3479581235515066, "grad_norm": 1.5553157694455162, "learning_rate": 5.2732192225803195e-06, "loss": 0.5928526520729065, "step": 8434 }, { "epoch": 1.3481179573243827, "grad_norm": 1.5569368798214762, "learning_rate": 5.270890583861159e-06, "loss": 0.7077252864837646, "step": 8435 }, { "epoch": 1.3482777910972588, "grad_norm": 1.3503887047883527, "learning_rate": 5.26856227540621e-06, "loss": 0.5736608505249023, "step": 8436 }, { "epoch": 1.348437624870135, "grad_norm": 1.7214200825027703, "learning_rate": 5.266234297378075e-06, "loss": 0.5308428406715393, "step": 8437 }, { "epoch": 1.3485974586430114, "grad_norm": 1.5733831424069473, "learning_rate": 5.263906649939325e-06, "loss": 0.6333117485046387, "step": 8438 }, { "epoch": 1.3487572924158875, "grad_norm": 1.6841198735423237, "learning_rate": 5.26157933325252e-06, "loss": 0.627062976360321, "step": 8439 }, { "epoch": 1.3489171261887636, "grad_norm": 1.5943370423107421, "learning_rate": 5.259252347480184e-06, "loss": 0.5195136666297913, "step": 8440 }, { "epoch": 1.34907695996164, "grad_norm": 2.1047346912971756, "learning_rate": 5.256925692784835e-06, "loss": 0.718560516834259, "step": 8441 }, { "epoch": 1.3492367937345162, "grad_norm": 1.416800107615722, "learning_rate": 5.254599369328954e-06, "loss": 0.4966932237148285, "step": 8442 }, { "epoch": 1.3493966275073923, "grad_norm": 1.4525662945809958, "learning_rate": 5.252273377275004e-06, "loss": 0.46817493438720703, "step": 8443 }, { "epoch": 1.3495564612802684, "grad_norm": 1.49119811287778, "learning_rate": 5.2499477167854225e-06, "loss": 0.7344421148300171, "step": 8444 }, { "epoch": 1.3497162950531447, "grad_norm": 1.3727183665134834, "learning_rate": 5.247622388022624e-06, "loss": 0.5841885805130005, "step": 8445 }, { "epoch": 1.349876128826021, "grad_norm": 1.5062877758493853, "learning_rate": 5.245297391149005e-06, "loss": 0.42877650260925293, "step": 8446 }, { "epoch": 1.3500359625988971, "grad_norm": 1.3275493751384115, "learning_rate": 5.242972726326934e-06, "loss": 0.425706148147583, "step": 8447 }, { "epoch": 1.3501957963717732, "grad_norm": 1.4836844326259608, "learning_rate": 5.240648393718758e-06, "loss": 0.5607819557189941, "step": 8448 }, { "epoch": 1.3503556301446495, "grad_norm": 1.559875044224167, "learning_rate": 5.238324393486793e-06, "loss": 0.6361261010169983, "step": 8449 }, { "epoch": 1.3505154639175259, "grad_norm": 1.179706159480889, "learning_rate": 5.236000725793347e-06, "loss": 0.39283573627471924, "step": 8450 }, { "epoch": 1.350675297690402, "grad_norm": 1.7823352339612424, "learning_rate": 5.233677390800698e-06, "loss": 0.8346724510192871, "step": 8451 }, { "epoch": 1.3508351314632783, "grad_norm": 1.5608492343568268, "learning_rate": 5.231354388671095e-06, "loss": 0.523604691028595, "step": 8452 }, { "epoch": 1.3509949652361544, "grad_norm": 1.4734002932406836, "learning_rate": 5.229031719566768e-06, "loss": 0.5508921146392822, "step": 8453 }, { "epoch": 1.3511547990090307, "grad_norm": 1.8338215140439478, "learning_rate": 5.2267093836499285e-06, "loss": 0.46721094846725464, "step": 8454 }, { "epoch": 1.3513146327819068, "grad_norm": 2.085146441850766, "learning_rate": 5.22438738108275e-06, "loss": 0.6866000890731812, "step": 8455 }, { "epoch": 1.351474466554783, "grad_norm": 1.7908712208877686, "learning_rate": 5.2220657120274046e-06, "loss": 0.6710063219070435, "step": 8456 }, { "epoch": 1.3516343003276592, "grad_norm": 1.6338423282306178, "learning_rate": 5.2197443766460255e-06, "loss": 0.5975269079208374, "step": 8457 }, { "epoch": 1.3517941341005355, "grad_norm": 1.6620033913084942, "learning_rate": 5.217423375100725e-06, "loss": 0.5170085430145264, "step": 8458 }, { "epoch": 1.3519539678734116, "grad_norm": 1.5270363137045153, "learning_rate": 5.2151027075535945e-06, "loss": 0.5244710445404053, "step": 8459 }, { "epoch": 1.352113801646288, "grad_norm": 1.4524963906004924, "learning_rate": 5.212782374166696e-06, "loss": 0.5682483911514282, "step": 8460 }, { "epoch": 1.352273635419164, "grad_norm": 1.5455341472490316, "learning_rate": 5.210462375102082e-06, "loss": 0.5388128757476807, "step": 8461 }, { "epoch": 1.3524334691920403, "grad_norm": 1.779234121720944, "learning_rate": 5.208142710521771e-06, "loss": 0.5406386852264404, "step": 8462 }, { "epoch": 1.3525933029649164, "grad_norm": 1.92204341263158, "learning_rate": 5.205823380587755e-06, "loss": 0.575678288936615, "step": 8463 }, { "epoch": 1.3527531367377927, "grad_norm": 1.4782705431079153, "learning_rate": 5.2035043854620145e-06, "loss": 0.5567048788070679, "step": 8464 }, { "epoch": 1.3529129705106688, "grad_norm": 1.6898831501634684, "learning_rate": 5.201185725306492e-06, "loss": 0.5067652463912964, "step": 8465 }, { "epoch": 1.3530728042835451, "grad_norm": 1.4958449915215324, "learning_rate": 5.198867400283123e-06, "loss": 0.5125701427459717, "step": 8466 }, { "epoch": 1.3532326380564212, "grad_norm": 1.6198479213553834, "learning_rate": 5.196549410553806e-06, "loss": 0.5210260152816772, "step": 8467 }, { "epoch": 1.3533924718292976, "grad_norm": 1.4038801273418187, "learning_rate": 5.194231756280423e-06, "loss": 0.5688865184783936, "step": 8468 }, { "epoch": 1.3535523056021739, "grad_norm": 1.9533153507321952, "learning_rate": 5.191914437624826e-06, "loss": 0.5914533138275146, "step": 8469 }, { "epoch": 1.35371213937505, "grad_norm": 1.7207856523955218, "learning_rate": 5.189597454748851e-06, "loss": 0.6003538370132446, "step": 8470 }, { "epoch": 1.353871973147926, "grad_norm": 1.2781650996994536, "learning_rate": 5.187280807814314e-06, "loss": 0.43448692560195923, "step": 8471 }, { "epoch": 1.3540318069208024, "grad_norm": 1.2734145595963695, "learning_rate": 5.184964496982998e-06, "loss": 0.464897096157074, "step": 8472 }, { "epoch": 1.3541916406936787, "grad_norm": 1.7150652613568802, "learning_rate": 5.182648522416664e-06, "loss": 0.5513982176780701, "step": 8473 }, { "epoch": 1.3543514744665548, "grad_norm": 1.440018932825547, "learning_rate": 5.1803328842770505e-06, "loss": 0.6227215528488159, "step": 8474 }, { "epoch": 1.354511308239431, "grad_norm": 1.513629260421395, "learning_rate": 5.1780175827258715e-06, "loss": 0.5350911021232605, "step": 8475 }, { "epoch": 1.3546711420123072, "grad_norm": 1.3290468433215563, "learning_rate": 5.175702617924827e-06, "loss": 0.44067203998565674, "step": 8476 }, { "epoch": 1.3548309757851835, "grad_norm": 1.3222260050724726, "learning_rate": 5.173387990035583e-06, "loss": 0.5020850896835327, "step": 8477 }, { "epoch": 1.3549908095580596, "grad_norm": 1.4198046691228785, "learning_rate": 5.171073699219782e-06, "loss": 0.3870032727718353, "step": 8478 }, { "epoch": 1.3551506433309357, "grad_norm": 1.516261743757551, "learning_rate": 5.168759745639047e-06, "loss": 0.6137616038322449, "step": 8479 }, { "epoch": 1.355310477103812, "grad_norm": 1.400826947803709, "learning_rate": 5.166446129454974e-06, "loss": 0.49273258447647095, "step": 8480 }, { "epoch": 1.3554703108766883, "grad_norm": 1.3683606787960174, "learning_rate": 5.164132850829144e-06, "loss": 0.6073681116104126, "step": 8481 }, { "epoch": 1.3556301446495644, "grad_norm": 1.559080608193183, "learning_rate": 5.161819909923106e-06, "loss": 0.6108312606811523, "step": 8482 }, { "epoch": 1.3557899784224405, "grad_norm": 1.512081424570931, "learning_rate": 5.1595073068983855e-06, "loss": 0.5901045203208923, "step": 8483 }, { "epoch": 1.3559498121953169, "grad_norm": 1.6517468701376885, "learning_rate": 5.1571950419164825e-06, "loss": 0.5228683948516846, "step": 8484 }, { "epoch": 1.3561096459681932, "grad_norm": 1.6055088638115003, "learning_rate": 5.1548831151388865e-06, "loss": 0.4814212918281555, "step": 8485 }, { "epoch": 1.3562694797410693, "grad_norm": 1.3434785648675378, "learning_rate": 5.152571526727051e-06, "loss": 0.5870217680931091, "step": 8486 }, { "epoch": 1.3564293135139456, "grad_norm": 1.3263438860965302, "learning_rate": 5.150260276842407e-06, "loss": 0.5037766695022583, "step": 8487 }, { "epoch": 1.3565891472868217, "grad_norm": 1.4090519244449695, "learning_rate": 5.1479493656463655e-06, "loss": 0.6220186948776245, "step": 8488 }, { "epoch": 1.356748981059698, "grad_norm": 1.4483638537386618, "learning_rate": 5.145638793300307e-06, "loss": 0.4880046546459198, "step": 8489 }, { "epoch": 1.356908814832574, "grad_norm": 1.5884026406648815, "learning_rate": 5.143328559965599e-06, "loss": 0.5761159658432007, "step": 8490 }, { "epoch": 1.3570686486054504, "grad_norm": 1.7328344987216922, "learning_rate": 5.141018665803583e-06, "loss": 0.5022734999656677, "step": 8491 }, { "epoch": 1.3572284823783265, "grad_norm": 1.460072936837356, "learning_rate": 5.138709110975572e-06, "loss": 0.5053284168243408, "step": 8492 }, { "epoch": 1.3573883161512028, "grad_norm": 1.6351882549386272, "learning_rate": 5.136399895642854e-06, "loss": 0.4096653461456299, "step": 8493 }, { "epoch": 1.357548149924079, "grad_norm": 1.5710541656420511, "learning_rate": 5.134091019966699e-06, "loss": 0.5564148426055908, "step": 8494 }, { "epoch": 1.3577079836969552, "grad_norm": 1.4773445736766082, "learning_rate": 5.131782484108343e-06, "loss": 0.5041317343711853, "step": 8495 }, { "epoch": 1.3578678174698313, "grad_norm": 1.610120641757988, "learning_rate": 5.129474288229018e-06, "loss": 0.5360857844352722, "step": 8496 }, { "epoch": 1.3580276512427076, "grad_norm": 1.5925448234853834, "learning_rate": 5.127166432489915e-06, "loss": 0.4946759343147278, "step": 8497 }, { "epoch": 1.3581874850155837, "grad_norm": 1.6541237881257207, "learning_rate": 5.124858917052206e-06, "loss": 0.5652698278427124, "step": 8498 }, { "epoch": 1.35834731878846, "grad_norm": 1.5173956774740442, "learning_rate": 5.1225517420770355e-06, "loss": 0.6020951271057129, "step": 8499 }, { "epoch": 1.3585071525613361, "grad_norm": 1.3896388474590267, "learning_rate": 5.120244907725536e-06, "loss": 0.4560049772262573, "step": 8500 }, { "epoch": 1.3586669863342125, "grad_norm": 1.5906067839428037, "learning_rate": 5.117938414158807e-06, "loss": 0.5734023451805115, "step": 8501 }, { "epoch": 1.3588268201070886, "grad_norm": 1.5780841402570367, "learning_rate": 5.1156322615379225e-06, "loss": 0.4918396472930908, "step": 8502 }, { "epoch": 1.3589866538799649, "grad_norm": 1.450966763358204, "learning_rate": 5.113326450023939e-06, "loss": 0.4827936291694641, "step": 8503 }, { "epoch": 1.359146487652841, "grad_norm": 1.4620975517023034, "learning_rate": 5.11102097977788e-06, "loss": 0.4364798367023468, "step": 8504 }, { "epoch": 1.3593063214257173, "grad_norm": 1.5299357392808888, "learning_rate": 5.108715850960759e-06, "loss": 0.5071976184844971, "step": 8505 }, { "epoch": 1.3594661551985934, "grad_norm": 1.2275717276622256, "learning_rate": 5.1064110637335576e-06, "loss": 0.45174598693847656, "step": 8506 }, { "epoch": 1.3596259889714697, "grad_norm": 1.8138122093130271, "learning_rate": 5.1041066182572296e-06, "loss": 0.4956236779689789, "step": 8507 }, { "epoch": 1.359785822744346, "grad_norm": 1.678638701509435, "learning_rate": 5.101802514692713e-06, "loss": 0.5456442832946777, "step": 8508 }, { "epoch": 1.359945656517222, "grad_norm": 1.5828483142096637, "learning_rate": 5.099498753200911e-06, "loss": 0.4958608150482178, "step": 8509 }, { "epoch": 1.3601054902900982, "grad_norm": 1.400951017311062, "learning_rate": 5.097195333942717e-06, "loss": 0.43479329347610474, "step": 8510 }, { "epoch": 1.3602653240629745, "grad_norm": 1.5496041711158093, "learning_rate": 5.094892257078996e-06, "loss": 0.43168890476226807, "step": 8511 }, { "epoch": 1.3604251578358508, "grad_norm": 1.3186922542565442, "learning_rate": 5.0925895227705815e-06, "loss": 0.4386729598045349, "step": 8512 }, { "epoch": 1.360584991608727, "grad_norm": 1.6152115489509828, "learning_rate": 5.0902871311782905e-06, "loss": 0.548802375793457, "step": 8513 }, { "epoch": 1.360744825381603, "grad_norm": 1.6603684777462973, "learning_rate": 5.0879850824629096e-06, "loss": 0.6072713136672974, "step": 8514 }, { "epoch": 1.3609046591544793, "grad_norm": 1.7204588822064466, "learning_rate": 5.085683376785212e-06, "loss": 0.6038908362388611, "step": 8515 }, { "epoch": 1.3610644929273557, "grad_norm": 1.4215632487939238, "learning_rate": 5.083382014305939e-06, "loss": 0.5314810872077942, "step": 8516 }, { "epoch": 1.3612243267002317, "grad_norm": 1.7043748643182335, "learning_rate": 5.081080995185808e-06, "loss": 0.6299760341644287, "step": 8517 }, { "epoch": 1.3613841604731078, "grad_norm": 1.6182012490468998, "learning_rate": 5.078780319585513e-06, "loss": 0.5247477889060974, "step": 8518 }, { "epoch": 1.3615439942459842, "grad_norm": 1.5187571253614587, "learning_rate": 5.076479987665725e-06, "loss": 0.5131157636642456, "step": 8519 }, { "epoch": 1.3617038280188605, "grad_norm": 1.5153646322366607, "learning_rate": 5.074179999587094e-06, "loss": 0.5995645523071289, "step": 8520 }, { "epoch": 1.3618636617917366, "grad_norm": 1.596169879502951, "learning_rate": 5.071880355510244e-06, "loss": 0.5718592405319214, "step": 8521 }, { "epoch": 1.3620234955646129, "grad_norm": 1.4099488494616053, "learning_rate": 5.069581055595769e-06, "loss": 0.44840434193611145, "step": 8522 }, { "epoch": 1.362183329337489, "grad_norm": 1.471399168970184, "learning_rate": 5.0672821000042475e-06, "loss": 0.5001904964447021, "step": 8523 }, { "epoch": 1.3623431631103653, "grad_norm": 1.8059508401445852, "learning_rate": 5.064983488896224e-06, "loss": 0.7130011916160583, "step": 8524 }, { "epoch": 1.3625029968832414, "grad_norm": 1.6011257327021935, "learning_rate": 5.0626852224322365e-06, "loss": 0.49719786643981934, "step": 8525 }, { "epoch": 1.3626628306561177, "grad_norm": 1.6857790043308123, "learning_rate": 5.060387300772781e-06, "loss": 0.76246178150177, "step": 8526 }, { "epoch": 1.3628226644289938, "grad_norm": 1.9164009396260568, "learning_rate": 5.0580897240783365e-06, "loss": 0.4410945177078247, "step": 8527 }, { "epoch": 1.3629824982018701, "grad_norm": 1.61884192087858, "learning_rate": 5.055792492509355e-06, "loss": 0.6086844205856323, "step": 8528 }, { "epoch": 1.3631423319747462, "grad_norm": 1.4258685050516406, "learning_rate": 5.053495606226271e-06, "loss": 0.6113013029098511, "step": 8529 }, { "epoch": 1.3633021657476225, "grad_norm": 1.417915320707245, "learning_rate": 5.0511990653894915e-06, "loss": 0.4944773018360138, "step": 8530 }, { "epoch": 1.3634619995204986, "grad_norm": 1.6605601402724715, "learning_rate": 5.0489028701593986e-06, "loss": 0.4870023727416992, "step": 8531 }, { "epoch": 1.363621833293375, "grad_norm": 1.4929763860560785, "learning_rate": 5.046607020696349e-06, "loss": 0.5922088623046875, "step": 8532 }, { "epoch": 1.363781667066251, "grad_norm": 1.6495940479912665, "learning_rate": 5.044311517160677e-06, "loss": 0.6182898283004761, "step": 8533 }, { "epoch": 1.3639415008391274, "grad_norm": 1.5815301380326818, "learning_rate": 5.042016359712688e-06, "loss": 0.5448101758956909, "step": 8534 }, { "epoch": 1.3641013346120034, "grad_norm": 1.6147726576372807, "learning_rate": 5.039721548512676e-06, "loss": 0.4879504442214966, "step": 8535 }, { "epoch": 1.3642611683848798, "grad_norm": 1.5737830979252294, "learning_rate": 5.037427083720898e-06, "loss": 0.6397386193275452, "step": 8536 }, { "epoch": 1.3644210021577559, "grad_norm": 1.3539889882968066, "learning_rate": 5.0351329654975915e-06, "loss": 0.45551443099975586, "step": 8537 }, { "epoch": 1.3645808359306322, "grad_norm": 1.452056938795584, "learning_rate": 5.03283919400297e-06, "loss": 0.54993736743927, "step": 8538 }, { "epoch": 1.3647406697035083, "grad_norm": 1.5801240971083819, "learning_rate": 5.0305457693972185e-06, "loss": 0.4696599841117859, "step": 8539 }, { "epoch": 1.3649005034763846, "grad_norm": 1.2559971833358334, "learning_rate": 5.0282526918405085e-06, "loss": 0.47968119382858276, "step": 8540 }, { "epoch": 1.3650603372492607, "grad_norm": 1.720669490014647, "learning_rate": 5.025959961492976e-06, "loss": 0.5765263438224792, "step": 8541 }, { "epoch": 1.365220171022137, "grad_norm": 1.5102949475815022, "learning_rate": 5.023667578514739e-06, "loss": 0.6814298629760742, "step": 8542 }, { "epoch": 1.3653800047950133, "grad_norm": 1.5648635642966318, "learning_rate": 5.02137554306589e-06, "loss": 0.4369284510612488, "step": 8543 }, { "epoch": 1.3655398385678894, "grad_norm": 1.3465057869865198, "learning_rate": 5.01908385530649e-06, "loss": 0.5115721821784973, "step": 8544 }, { "epoch": 1.3656996723407655, "grad_norm": 1.6215194481165334, "learning_rate": 5.016792515396591e-06, "loss": 0.49034029245376587, "step": 8545 }, { "epoch": 1.3658595061136418, "grad_norm": 1.7208839017396136, "learning_rate": 5.014501523496209e-06, "loss": 0.6448919773101807, "step": 8546 }, { "epoch": 1.3660193398865181, "grad_norm": 1.5642022862685976, "learning_rate": 5.012210879765339e-06, "loss": 0.5613330602645874, "step": 8547 }, { "epoch": 1.3661791736593942, "grad_norm": 1.6368153347437435, "learning_rate": 5.009920584363946e-06, "loss": 0.6638606786727905, "step": 8548 }, { "epoch": 1.3663390074322703, "grad_norm": 1.7289127943651605, "learning_rate": 5.007630637451981e-06, "loss": 0.5536227822303772, "step": 8549 }, { "epoch": 1.3664988412051466, "grad_norm": 1.4150550829540995, "learning_rate": 5.005341039189368e-06, "loss": 0.4066195487976074, "step": 8550 }, { "epoch": 1.366658674978023, "grad_norm": 1.7385014221798534, "learning_rate": 5.003051789736005e-06, "loss": 0.5678462982177734, "step": 8551 }, { "epoch": 1.366818508750899, "grad_norm": 1.5556778971995935, "learning_rate": 5.00076288925176e-06, "loss": 0.5614588260650635, "step": 8552 }, { "epoch": 1.3669783425237751, "grad_norm": 1.5070393794037007, "learning_rate": 4.998474337896485e-06, "loss": 0.5582751631736755, "step": 8553 }, { "epoch": 1.3671381762966515, "grad_norm": 1.5651398359262854, "learning_rate": 4.996186135829998e-06, "loss": 0.631232738494873, "step": 8554 }, { "epoch": 1.3672980100695278, "grad_norm": 1.6225059605463248, "learning_rate": 4.9938982832121085e-06, "loss": 0.5866076946258545, "step": 8555 }, { "epoch": 1.3674578438424039, "grad_norm": 1.6459056676716357, "learning_rate": 4.991610780202587e-06, "loss": 0.5937917232513428, "step": 8556 }, { "epoch": 1.3676176776152802, "grad_norm": 1.409730785130059, "learning_rate": 4.989323626961186e-06, "loss": 0.4773433208465576, "step": 8557 }, { "epoch": 1.3677775113881563, "grad_norm": 1.4952386921476335, "learning_rate": 4.98703682364763e-06, "loss": 0.5796149969100952, "step": 8558 }, { "epoch": 1.3679373451610326, "grad_norm": 1.4500795675558331, "learning_rate": 4.9847503704216186e-06, "loss": 0.4770885407924652, "step": 8559 }, { "epoch": 1.3680971789339087, "grad_norm": 1.6155406878732563, "learning_rate": 4.982464267442837e-06, "loss": 0.6190887689590454, "step": 8560 }, { "epoch": 1.368257012706785, "grad_norm": 1.5720582063241917, "learning_rate": 4.980178514870936e-06, "loss": 0.49763432145118713, "step": 8561 }, { "epoch": 1.368416846479661, "grad_norm": 1.8104297450579474, "learning_rate": 4.977893112865543e-06, "loss": 0.5524584054946899, "step": 8562 }, { "epoch": 1.3685766802525374, "grad_norm": 1.6107509444950403, "learning_rate": 4.9756080615862575e-06, "loss": 0.610517680644989, "step": 8563 }, { "epoch": 1.3687365140254135, "grad_norm": 1.6199492342956598, "learning_rate": 4.97332336119267e-06, "loss": 0.5778976678848267, "step": 8564 }, { "epoch": 1.3688963477982898, "grad_norm": 1.8961116879644284, "learning_rate": 4.971039011844331e-06, "loss": 0.5685814023017883, "step": 8565 }, { "epoch": 1.369056181571166, "grad_norm": 1.6529369704832872, "learning_rate": 4.968755013700769e-06, "loss": 0.5963383913040161, "step": 8566 }, { "epoch": 1.3692160153440422, "grad_norm": 1.5601833015971929, "learning_rate": 4.966471366921493e-06, "loss": 0.5142366886138916, "step": 8567 }, { "epoch": 1.3693758491169183, "grad_norm": 1.531078420355885, "learning_rate": 4.964188071665981e-06, "loss": 0.552703320980072, "step": 8568 }, { "epoch": 1.3695356828897947, "grad_norm": 1.6663637085929373, "learning_rate": 4.961905128093694e-06, "loss": 0.530907392501831, "step": 8569 }, { "epoch": 1.3696955166626708, "grad_norm": 1.6864107483602977, "learning_rate": 4.959622536364063e-06, "loss": 0.5696729421615601, "step": 8570 }, { "epoch": 1.369855350435547, "grad_norm": 1.5657735777464632, "learning_rate": 4.9573402966365e-06, "loss": 0.5229390859603882, "step": 8571 }, { "epoch": 1.3700151842084232, "grad_norm": 1.4507595367670798, "learning_rate": 4.955058409070385e-06, "loss": 0.4919363558292389, "step": 8572 }, { "epoch": 1.3701750179812995, "grad_norm": 1.3971113107552695, "learning_rate": 4.952776873825077e-06, "loss": 0.4543207287788391, "step": 8573 }, { "epoch": 1.3703348517541756, "grad_norm": 1.3872258775933328, "learning_rate": 4.950495691059908e-06, "loss": 0.5006150007247925, "step": 8574 }, { "epoch": 1.370494685527052, "grad_norm": 1.6491596256596799, "learning_rate": 4.948214860934193e-06, "loss": 0.5782968997955322, "step": 8575 }, { "epoch": 1.370654519299928, "grad_norm": 1.5853679778066903, "learning_rate": 4.945934383607215e-06, "loss": 0.5557270646095276, "step": 8576 }, { "epoch": 1.3708143530728043, "grad_norm": 1.6695784022030187, "learning_rate": 4.943654259238233e-06, "loss": 0.48609036207199097, "step": 8577 }, { "epoch": 1.3709741868456806, "grad_norm": 1.5418867011186497, "learning_rate": 4.941374487986481e-06, "loss": 0.634465217590332, "step": 8578 }, { "epoch": 1.3711340206185567, "grad_norm": 1.5015190449472957, "learning_rate": 4.9390950700111766e-06, "loss": 0.4703825116157532, "step": 8579 }, { "epoch": 1.3712938543914328, "grad_norm": 1.5534030766037372, "learning_rate": 4.936816005471502e-06, "loss": 0.580722987651825, "step": 8580 }, { "epoch": 1.3714536881643091, "grad_norm": 1.4195432036927138, "learning_rate": 4.934537294526619e-06, "loss": 0.490252286195755, "step": 8581 }, { "epoch": 1.3716135219371854, "grad_norm": 1.837395406425251, "learning_rate": 4.932258937335666e-06, "loss": 0.5958240628242493, "step": 8582 }, { "epoch": 1.3717733557100615, "grad_norm": 1.5657782441292156, "learning_rate": 4.929980934057751e-06, "loss": 0.5343270301818848, "step": 8583 }, { "epoch": 1.3719331894829376, "grad_norm": 1.5279761479246723, "learning_rate": 4.927703284851968e-06, "loss": 0.4597705006599426, "step": 8584 }, { "epoch": 1.372093023255814, "grad_norm": 1.5797516149325201, "learning_rate": 4.925425989877377e-06, "loss": 0.4319329857826233, "step": 8585 }, { "epoch": 1.3722528570286903, "grad_norm": 1.6287408611410092, "learning_rate": 4.9231490492930165e-06, "loss": 0.49927425384521484, "step": 8586 }, { "epoch": 1.3724126908015664, "grad_norm": 1.4704834351927278, "learning_rate": 4.9208724632579e-06, "loss": 0.5993574857711792, "step": 8587 }, { "epoch": 1.3725725245744425, "grad_norm": 2.0975926967628564, "learning_rate": 4.918596231931012e-06, "loss": 0.5562351942062378, "step": 8588 }, { "epoch": 1.3727323583473188, "grad_norm": 1.5517552885256565, "learning_rate": 4.916320355471325e-06, "loss": 0.5601184368133545, "step": 8589 }, { "epoch": 1.372892192120195, "grad_norm": 1.4696919768917964, "learning_rate": 4.914044834037768e-06, "loss": 0.5255942344665527, "step": 8590 }, { "epoch": 1.3730520258930712, "grad_norm": 1.5758419538291726, "learning_rate": 4.911769667789266e-06, "loss": 0.5213897228240967, "step": 8591 }, { "epoch": 1.3732118596659473, "grad_norm": 1.624093335977181, "learning_rate": 4.909494856884702e-06, "loss": 0.5545916557312012, "step": 8592 }, { "epoch": 1.3733716934388236, "grad_norm": 1.7234717621077558, "learning_rate": 4.907220401482939e-06, "loss": 0.49511411786079407, "step": 8593 }, { "epoch": 1.3735315272117, "grad_norm": 1.5838738590860884, "learning_rate": 4.904946301742824e-06, "loss": 0.5091565847396851, "step": 8594 }, { "epoch": 1.373691360984576, "grad_norm": 1.7328189503316387, "learning_rate": 4.902672557823167e-06, "loss": 0.4969369173049927, "step": 8595 }, { "epoch": 1.3738511947574523, "grad_norm": 1.741153066129906, "learning_rate": 4.90039916988276e-06, "loss": 0.6275471448898315, "step": 8596 }, { "epoch": 1.3740110285303284, "grad_norm": 1.5152113699856387, "learning_rate": 4.898126138080368e-06, "loss": 0.5540667772293091, "step": 8597 }, { "epoch": 1.3741708623032047, "grad_norm": 1.6002096450649856, "learning_rate": 4.895853462574725e-06, "loss": 0.5824332237243652, "step": 8598 }, { "epoch": 1.3743306960760808, "grad_norm": 1.6385573210748565, "learning_rate": 4.89358114352456e-06, "loss": 0.6010724306106567, "step": 8599 }, { "epoch": 1.3744905298489571, "grad_norm": 1.5110930152603985, "learning_rate": 4.8913091810885525e-06, "loss": 0.5955989956855774, "step": 8600 }, { "epoch": 1.3746503636218332, "grad_norm": 1.6309053293056233, "learning_rate": 4.889037575425375e-06, "loss": 0.5689385533332825, "step": 8601 }, { "epoch": 1.3748101973947096, "grad_norm": 1.2296723206854803, "learning_rate": 4.886766326693664e-06, "loss": 0.36516404151916504, "step": 8602 }, { "epoch": 1.3749700311675856, "grad_norm": 1.563134452154056, "learning_rate": 4.884495435052033e-06, "loss": 0.5636796951293945, "step": 8603 }, { "epoch": 1.375129864940462, "grad_norm": 1.504584773423174, "learning_rate": 4.88222490065908e-06, "loss": 0.7120661735534668, "step": 8604 }, { "epoch": 1.375289698713338, "grad_norm": 1.6717110122730447, "learning_rate": 4.87995472367337e-06, "loss": 0.6001126766204834, "step": 8605 }, { "epoch": 1.3754495324862144, "grad_norm": 1.3766880968720265, "learning_rate": 4.877684904253441e-06, "loss": 0.5451280474662781, "step": 8606 }, { "epoch": 1.3756093662590905, "grad_norm": 2.1890567874974867, "learning_rate": 4.87541544255781e-06, "loss": 0.4977063536643982, "step": 8607 }, { "epoch": 1.3757692000319668, "grad_norm": 1.411172877817821, "learning_rate": 4.873146338744965e-06, "loss": 0.4831295907497406, "step": 8608 }, { "epoch": 1.3759290338048429, "grad_norm": 1.6327740245534748, "learning_rate": 4.870877592973378e-06, "loss": 0.5549312829971313, "step": 8609 }, { "epoch": 1.3760888675777192, "grad_norm": 1.6160562045806688, "learning_rate": 4.868609205401486e-06, "loss": 0.5864373445510864, "step": 8610 }, { "epoch": 1.3762487013505953, "grad_norm": 1.8447712927907403, "learning_rate": 4.866341176187709e-06, "loss": 0.6237846612930298, "step": 8611 }, { "epoch": 1.3764085351234716, "grad_norm": 1.4322768163044453, "learning_rate": 4.864073505490437e-06, "loss": 0.5259350538253784, "step": 8612 }, { "epoch": 1.376568368896348, "grad_norm": 1.6274184854883225, "learning_rate": 4.8618061934680294e-06, "loss": 0.6549063920974731, "step": 8613 }, { "epoch": 1.376728202669224, "grad_norm": 1.4978999450816564, "learning_rate": 4.859539240278838e-06, "loss": 0.49474209547042847, "step": 8614 }, { "epoch": 1.3768880364421001, "grad_norm": 1.7231245088878573, "learning_rate": 4.857272646081172e-06, "loss": 0.5386344194412231, "step": 8615 }, { "epoch": 1.3770478702149764, "grad_norm": 1.5877222072385482, "learning_rate": 4.855006411033326e-06, "loss": 0.4870685935020447, "step": 8616 }, { "epoch": 1.3772077039878527, "grad_norm": 1.6213429849476988, "learning_rate": 4.852740535293563e-06, "loss": 0.643791675567627, "step": 8617 }, { "epoch": 1.3773675377607288, "grad_norm": 1.3716291434722558, "learning_rate": 4.85047501902012e-06, "loss": 0.40962937474250793, "step": 8618 }, { "epoch": 1.377527371533605, "grad_norm": 1.3807337434521794, "learning_rate": 4.848209862371221e-06, "loss": 0.46156781911849976, "step": 8619 }, { "epoch": 1.3776872053064813, "grad_norm": 1.5526230704690562, "learning_rate": 4.845945065505052e-06, "loss": 0.502046525478363, "step": 8620 }, { "epoch": 1.3778470390793576, "grad_norm": 1.5574386929726636, "learning_rate": 4.843680628579779e-06, "loss": 0.43440407514572144, "step": 8621 }, { "epoch": 1.3780068728522337, "grad_norm": 1.6729377916417334, "learning_rate": 4.841416551753542e-06, "loss": 0.6208820343017578, "step": 8622 }, { "epoch": 1.3781667066251098, "grad_norm": 1.4954803624961677, "learning_rate": 4.839152835184453e-06, "loss": 0.6229643821716309, "step": 8623 }, { "epoch": 1.378326540397986, "grad_norm": 1.3791363929770972, "learning_rate": 4.836889479030607e-06, "loss": 0.49654483795166016, "step": 8624 }, { "epoch": 1.3784863741708624, "grad_norm": 1.559431847429691, "learning_rate": 4.834626483450069e-06, "loss": 0.49702584743499756, "step": 8625 }, { "epoch": 1.3786462079437385, "grad_norm": 1.5647830774357114, "learning_rate": 4.8323638486008765e-06, "loss": 0.5595237612724304, "step": 8626 }, { "epoch": 1.3788060417166146, "grad_norm": 1.3244439648056088, "learning_rate": 4.8301015746410385e-06, "loss": 0.45082324743270874, "step": 8627 }, { "epoch": 1.378965875489491, "grad_norm": 1.7236226052626722, "learning_rate": 4.827839661728554e-06, "loss": 0.5033605694770813, "step": 8628 }, { "epoch": 1.3791257092623672, "grad_norm": 1.6367521490080243, "learning_rate": 4.825578110021381e-06, "loss": 0.5219974517822266, "step": 8629 }, { "epoch": 1.3792855430352433, "grad_norm": 1.5128614799921147, "learning_rate": 4.823316919677458e-06, "loss": 0.5587990283966064, "step": 8630 }, { "epoch": 1.3794453768081196, "grad_norm": 1.4926588579100986, "learning_rate": 4.821056090854703e-06, "loss": 0.5211626887321472, "step": 8631 }, { "epoch": 1.3796052105809957, "grad_norm": 1.702258793760162, "learning_rate": 4.818795623711002e-06, "loss": 0.607245922088623, "step": 8632 }, { "epoch": 1.379765044353872, "grad_norm": 1.3353642055408093, "learning_rate": 4.8165355184042124e-06, "loss": 0.5077553987503052, "step": 8633 }, { "epoch": 1.3799248781267481, "grad_norm": 1.7097616892464955, "learning_rate": 4.814275775092182e-06, "loss": 0.5835088491439819, "step": 8634 }, { "epoch": 1.3800847118996245, "grad_norm": 1.4934682301570705, "learning_rate": 4.812016393932718e-06, "loss": 0.5450131297111511, "step": 8635 }, { "epoch": 1.3802445456725005, "grad_norm": 1.4189844148238888, "learning_rate": 4.809757375083608e-06, "loss": 0.45682385563850403, "step": 8636 }, { "epoch": 1.3804043794453769, "grad_norm": 1.4914603231373722, "learning_rate": 4.807498718702612e-06, "loss": 0.5757524967193604, "step": 8637 }, { "epoch": 1.380564213218253, "grad_norm": 1.5071208363629331, "learning_rate": 4.805240424947466e-06, "loss": 0.5380747318267822, "step": 8638 }, { "epoch": 1.3807240469911293, "grad_norm": 1.550720372859908, "learning_rate": 4.802982493975887e-06, "loss": 0.5928875207901001, "step": 8639 }, { "epoch": 1.3808838807640054, "grad_norm": 1.5306020763118307, "learning_rate": 4.800724925945557e-06, "loss": 0.6517137289047241, "step": 8640 }, { "epoch": 1.3810437145368817, "grad_norm": 1.7420709027714172, "learning_rate": 4.798467721014137e-06, "loss": 0.6331058740615845, "step": 8641 }, { "epoch": 1.3812035483097578, "grad_norm": 1.4891027737367226, "learning_rate": 4.796210879339258e-06, "loss": 0.5374213457107544, "step": 8642 }, { "epoch": 1.381363382082634, "grad_norm": 1.5792110509323878, "learning_rate": 4.793954401078538e-06, "loss": 0.5321353673934937, "step": 8643 }, { "epoch": 1.3815232158555102, "grad_norm": 1.4829246177987363, "learning_rate": 4.7916982863895565e-06, "loss": 0.5468493700027466, "step": 8644 }, { "epoch": 1.3816830496283865, "grad_norm": 1.8971172466883746, "learning_rate": 4.7894425354298735e-06, "loss": 0.710847020149231, "step": 8645 }, { "epoch": 1.3818428834012626, "grad_norm": 1.7587370396068855, "learning_rate": 4.787187148357022e-06, "loss": 0.5226351618766785, "step": 8646 }, { "epoch": 1.382002717174139, "grad_norm": 1.448056767946259, "learning_rate": 4.784932125328507e-06, "loss": 0.5109236240386963, "step": 8647 }, { "epoch": 1.3821625509470152, "grad_norm": 1.552602265164796, "learning_rate": 4.78267746650182e-06, "loss": 0.5897952914237976, "step": 8648 }, { "epoch": 1.3823223847198913, "grad_norm": 1.5190855900176143, "learning_rate": 4.780423172034412e-06, "loss": 0.48506176471710205, "step": 8649 }, { "epoch": 1.3824822184927674, "grad_norm": 1.651172597775327, "learning_rate": 4.7781692420837126e-06, "loss": 0.6706602573394775, "step": 8650 }, { "epoch": 1.3826420522656437, "grad_norm": 1.4407304974467472, "learning_rate": 4.775915676807136e-06, "loss": 0.4563499987125397, "step": 8651 }, { "epoch": 1.38280188603852, "grad_norm": 1.8113729691322231, "learning_rate": 4.773662476362059e-06, "loss": 0.4574302136898041, "step": 8652 }, { "epoch": 1.3829617198113962, "grad_norm": 1.3934805821224379, "learning_rate": 4.771409640905833e-06, "loss": 0.4775577783584595, "step": 8653 }, { "epoch": 1.3831215535842722, "grad_norm": 1.4333595589110169, "learning_rate": 4.7691571705957965e-06, "loss": 0.517769992351532, "step": 8654 }, { "epoch": 1.3832813873571486, "grad_norm": 1.447157904055722, "learning_rate": 4.766905065589249e-06, "loss": 0.5844255685806274, "step": 8655 }, { "epoch": 1.3834412211300249, "grad_norm": 1.4584742022528, "learning_rate": 4.764653326043471e-06, "loss": 0.5607476234436035, "step": 8656 }, { "epoch": 1.383601054902901, "grad_norm": 1.7182597871760235, "learning_rate": 4.7624019521157104e-06, "loss": 0.5182936191558838, "step": 8657 }, { "epoch": 1.383760888675777, "grad_norm": 1.4661506637248953, "learning_rate": 4.760150943963205e-06, "loss": 0.5406389236450195, "step": 8658 }, { "epoch": 1.3839207224486534, "grad_norm": 1.3579871924685456, "learning_rate": 4.757900301743153e-06, "loss": 0.4840368628501892, "step": 8659 }, { "epoch": 1.3840805562215297, "grad_norm": 1.3755011024589492, "learning_rate": 4.755650025612728e-06, "loss": 0.5836711525917053, "step": 8660 }, { "epoch": 1.3842403899944058, "grad_norm": 1.432427960124358, "learning_rate": 4.753400115729085e-06, "loss": 0.47628557682037354, "step": 8661 }, { "epoch": 1.384400223767282, "grad_norm": 1.434002916525515, "learning_rate": 4.7511505722493455e-06, "loss": 0.6116941571235657, "step": 8662 }, { "epoch": 1.3845600575401582, "grad_norm": 1.7687941817061819, "learning_rate": 4.748901395330616e-06, "loss": 0.5380780696868896, "step": 8663 }, { "epoch": 1.3847198913130345, "grad_norm": 1.782342978665901, "learning_rate": 4.746652585129967e-06, "loss": 0.657001256942749, "step": 8664 }, { "epoch": 1.3848797250859106, "grad_norm": 1.472314743812137, "learning_rate": 4.744404141804449e-06, "loss": 0.4747353196144104, "step": 8665 }, { "epoch": 1.385039558858787, "grad_norm": 1.578546537900723, "learning_rate": 4.742156065511085e-06, "loss": 0.43183955550193787, "step": 8666 }, { "epoch": 1.385199392631663, "grad_norm": 1.583875694258241, "learning_rate": 4.739908356406866e-06, "loss": 0.533087968826294, "step": 8667 }, { "epoch": 1.3853592264045393, "grad_norm": 1.512150860926507, "learning_rate": 4.737661014648775e-06, "loss": 0.6045172214508057, "step": 8668 }, { "epoch": 1.3855190601774154, "grad_norm": 1.4962970083076914, "learning_rate": 4.735414040393753e-06, "loss": 0.48197197914123535, "step": 8669 }, { "epoch": 1.3856788939502918, "grad_norm": 1.6322190744144207, "learning_rate": 4.733167433798719e-06, "loss": 0.5270397663116455, "step": 8670 }, { "epoch": 1.3858387277231679, "grad_norm": 1.634377670917293, "learning_rate": 4.7309211950205725e-06, "loss": 0.5303877592086792, "step": 8671 }, { "epoch": 1.3859985614960442, "grad_norm": 1.5134119492351796, "learning_rate": 4.728675324216181e-06, "loss": 0.663742184638977, "step": 8672 }, { "epoch": 1.3861583952689203, "grad_norm": 2.247045511920326, "learning_rate": 4.726429821542383e-06, "loss": 0.618415117263794, "step": 8673 }, { "epoch": 1.3863182290417966, "grad_norm": 1.6307040027083297, "learning_rate": 4.724184687156006e-06, "loss": 0.5301131010055542, "step": 8674 }, { "epoch": 1.3864780628146727, "grad_norm": 1.4840790848322796, "learning_rate": 4.721939921213838e-06, "loss": 0.5718185901641846, "step": 8675 }, { "epoch": 1.386637896587549, "grad_norm": 1.4495845464710797, "learning_rate": 4.719695523872643e-06, "loss": 0.5225207209587097, "step": 8676 }, { "epoch": 1.386797730360425, "grad_norm": 1.3714439222196835, "learning_rate": 4.71745149528916e-06, "loss": 0.5207434892654419, "step": 8677 }, { "epoch": 1.3869575641333014, "grad_norm": 1.6478081322763747, "learning_rate": 4.715207835620111e-06, "loss": 0.4833638668060303, "step": 8678 }, { "epoch": 1.3871173979061775, "grad_norm": 1.7329820111819454, "learning_rate": 4.712964545022183e-06, "loss": 0.6249951720237732, "step": 8679 }, { "epoch": 1.3872772316790538, "grad_norm": 1.6792445104759497, "learning_rate": 4.710721623652038e-06, "loss": 0.5257835984230042, "step": 8680 }, { "epoch": 1.38743706545193, "grad_norm": 1.8385606008250672, "learning_rate": 4.708479071666313e-06, "loss": 0.574974775314331, "step": 8681 }, { "epoch": 1.3875968992248062, "grad_norm": 1.5958528743761957, "learning_rate": 4.706236889221618e-06, "loss": 0.49745357036590576, "step": 8682 }, { "epoch": 1.3877567329976825, "grad_norm": 1.5210677122118268, "learning_rate": 4.703995076474545e-06, "loss": 0.6423959732055664, "step": 8683 }, { "epoch": 1.3879165667705586, "grad_norm": 1.4879426813919345, "learning_rate": 4.70175363358165e-06, "loss": 0.6525258421897888, "step": 8684 }, { "epoch": 1.3880764005434347, "grad_norm": 1.477724920237517, "learning_rate": 4.699512560699471e-06, "loss": 0.5829994678497314, "step": 8685 }, { "epoch": 1.388236234316311, "grad_norm": 1.5310579724928022, "learning_rate": 4.697271857984512e-06, "loss": 0.5574259161949158, "step": 8686 }, { "epoch": 1.3883960680891874, "grad_norm": 1.5204097821482307, "learning_rate": 4.695031525593254e-06, "loss": 0.6297938823699951, "step": 8687 }, { "epoch": 1.3885559018620635, "grad_norm": 1.5412556751360025, "learning_rate": 4.6927915636821616e-06, "loss": 0.5367587208747864, "step": 8688 }, { "epoch": 1.3887157356349396, "grad_norm": 1.5800853486620847, "learning_rate": 4.690551972407662e-06, "loss": 0.6503163576126099, "step": 8689 }, { "epoch": 1.3888755694078159, "grad_norm": 1.4958305347494267, "learning_rate": 4.688312751926156e-06, "loss": 0.5181715488433838, "step": 8690 }, { "epoch": 1.3890354031806922, "grad_norm": 1.4370929050874706, "learning_rate": 4.6860739023940316e-06, "loss": 0.4386117458343506, "step": 8691 }, { "epoch": 1.3891952369535683, "grad_norm": 1.5234344284830879, "learning_rate": 4.683835423967633e-06, "loss": 0.4902040362358093, "step": 8692 }, { "epoch": 1.3893550707264444, "grad_norm": 1.5027144391206062, "learning_rate": 4.681597316803296e-06, "loss": 0.46907368302345276, "step": 8693 }, { "epoch": 1.3895149044993207, "grad_norm": 1.4838205958550905, "learning_rate": 4.679359581057318e-06, "loss": 0.5834633111953735, "step": 8694 }, { "epoch": 1.389674738272197, "grad_norm": 1.52187454623563, "learning_rate": 4.677122216885974e-06, "loss": 0.5493132472038269, "step": 8695 }, { "epoch": 1.389834572045073, "grad_norm": 1.5043951929715698, "learning_rate": 4.6748852244455144e-06, "loss": 0.6343116164207458, "step": 8696 }, { "epoch": 1.3899944058179492, "grad_norm": 1.504572127032879, "learning_rate": 4.672648603892158e-06, "loss": 0.4578433036804199, "step": 8697 }, { "epoch": 1.3901542395908255, "grad_norm": 1.4346685589227852, "learning_rate": 4.670412355382112e-06, "loss": 0.5325433611869812, "step": 8698 }, { "epoch": 1.3903140733637018, "grad_norm": 1.5110442060988072, "learning_rate": 4.6681764790715425e-06, "loss": 0.6988544464111328, "step": 8699 }, { "epoch": 1.390473907136578, "grad_norm": 2.553562204823301, "learning_rate": 4.665940975116596e-06, "loss": 0.6642129421234131, "step": 8700 }, { "epoch": 1.3906337409094542, "grad_norm": 1.5927953628841787, "learning_rate": 4.663705843673392e-06, "loss": 0.56160968542099, "step": 8701 }, { "epoch": 1.3907935746823303, "grad_norm": 1.7946512599603441, "learning_rate": 4.661471084898019e-06, "loss": 0.6079453229904175, "step": 8702 }, { "epoch": 1.3909534084552067, "grad_norm": 1.7642344124743055, "learning_rate": 4.659236698946554e-06, "loss": 0.5787031650543213, "step": 8703 }, { "epoch": 1.3911132422280827, "grad_norm": 1.733289696159151, "learning_rate": 4.657002685975035e-06, "loss": 0.5833927392959595, "step": 8704 }, { "epoch": 1.391273076000959, "grad_norm": 1.813646356379989, "learning_rate": 4.6547690461394755e-06, "loss": 0.5730789303779602, "step": 8705 }, { "epoch": 1.3914329097738352, "grad_norm": 1.624868631517873, "learning_rate": 4.652535779595863e-06, "loss": 0.6086177825927734, "step": 8706 }, { "epoch": 1.3915927435467115, "grad_norm": 1.490263376576371, "learning_rate": 4.650302886500168e-06, "loss": 0.4871917963027954, "step": 8707 }, { "epoch": 1.3917525773195876, "grad_norm": 1.7606535074498153, "learning_rate": 4.648070367008324e-06, "loss": 0.7701693177223206, "step": 8708 }, { "epoch": 1.3919124110924639, "grad_norm": 1.4973564818175882, "learning_rate": 4.645838221276242e-06, "loss": 0.6333106756210327, "step": 8709 }, { "epoch": 1.39207224486534, "grad_norm": 1.6490124263898092, "learning_rate": 4.643606449459803e-06, "loss": 0.5835021734237671, "step": 8710 }, { "epoch": 1.3922320786382163, "grad_norm": 1.5241512435720004, "learning_rate": 4.641375051714875e-06, "loss": 0.5104420781135559, "step": 8711 }, { "epoch": 1.3923919124110924, "grad_norm": 1.738889019561314, "learning_rate": 4.639144028197282e-06, "loss": 0.5141983032226562, "step": 8712 }, { "epoch": 1.3925517461839687, "grad_norm": 1.4297392615040747, "learning_rate": 4.636913379062839e-06, "loss": 0.6276365518569946, "step": 8713 }, { "epoch": 1.3927115799568448, "grad_norm": 1.7795607175887056, "learning_rate": 4.634683104467321e-06, "loss": 0.5684667825698853, "step": 8714 }, { "epoch": 1.3928714137297211, "grad_norm": 1.5723475037467933, "learning_rate": 4.632453204566486e-06, "loss": 0.5493981838226318, "step": 8715 }, { "epoch": 1.3930312475025972, "grad_norm": 1.5708881962186412, "learning_rate": 4.630223679516059e-06, "loss": 0.5415298342704773, "step": 8716 }, { "epoch": 1.3931910812754735, "grad_norm": 1.6025441159079057, "learning_rate": 4.627994529471741e-06, "loss": 0.5261739492416382, "step": 8717 }, { "epoch": 1.3933509150483498, "grad_norm": 1.5615913320000465, "learning_rate": 4.625765754589213e-06, "loss": 0.45603710412979126, "step": 8718 }, { "epoch": 1.393510748821226, "grad_norm": 1.9327779862053667, "learning_rate": 4.623537355024123e-06, "loss": 0.5965808033943176, "step": 8719 }, { "epoch": 1.393670582594102, "grad_norm": 1.643162897755875, "learning_rate": 4.6213093309320936e-06, "loss": 0.6681044101715088, "step": 8720 }, { "epoch": 1.3938304163669784, "grad_norm": 1.6530938052364228, "learning_rate": 4.619081682468718e-06, "loss": 0.6020519733428955, "step": 8721 }, { "epoch": 1.3939902501398547, "grad_norm": 1.4828759601151988, "learning_rate": 4.616854409789574e-06, "loss": 0.521634042263031, "step": 8722 }, { "epoch": 1.3941500839127308, "grad_norm": 1.6279521992541819, "learning_rate": 4.614627513050205e-06, "loss": 0.514446496963501, "step": 8723 }, { "epoch": 1.3943099176856069, "grad_norm": 1.6188587222358182, "learning_rate": 4.612400992406129e-06, "loss": 0.575199544429779, "step": 8724 }, { "epoch": 1.3944697514584832, "grad_norm": 1.774053123729704, "learning_rate": 4.6101748480128365e-06, "loss": 0.5596234798431396, "step": 8725 }, { "epoch": 1.3946295852313595, "grad_norm": 1.6680034963409474, "learning_rate": 4.607949080025791e-06, "loss": 0.6061753034591675, "step": 8726 }, { "epoch": 1.3947894190042356, "grad_norm": 1.920792756650683, "learning_rate": 4.60572368860044e-06, "loss": 0.5093473792076111, "step": 8727 }, { "epoch": 1.3949492527771117, "grad_norm": 1.403624526336255, "learning_rate": 4.603498673892193e-06, "loss": 0.4285199046134949, "step": 8728 }, { "epoch": 1.395109086549988, "grad_norm": 1.3692412895201143, "learning_rate": 4.601274036056436e-06, "loss": 0.4022537171840668, "step": 8729 }, { "epoch": 1.3952689203228643, "grad_norm": 1.5709586445733184, "learning_rate": 4.599049775248527e-06, "loss": 0.6344477534294128, "step": 8730 }, { "epoch": 1.3954287540957404, "grad_norm": 1.528582086178284, "learning_rate": 4.596825891623808e-06, "loss": 0.6152888536453247, "step": 8731 }, { "epoch": 1.3955885878686165, "grad_norm": 1.671461805689978, "learning_rate": 4.594602385337581e-06, "loss": 0.6220510005950928, "step": 8732 }, { "epoch": 1.3957484216414928, "grad_norm": 1.423923521601548, "learning_rate": 4.592379256545133e-06, "loss": 0.5309878587722778, "step": 8733 }, { "epoch": 1.3959082554143691, "grad_norm": 1.9040156845999876, "learning_rate": 4.590156505401716e-06, "loss": 0.5475083589553833, "step": 8734 }, { "epoch": 1.3960680891872452, "grad_norm": 1.377547179655317, "learning_rate": 4.5879341320625605e-06, "loss": 0.5384013652801514, "step": 8735 }, { "epoch": 1.3962279229601215, "grad_norm": 1.4664459723135312, "learning_rate": 4.585712136682869e-06, "loss": 0.45866185426712036, "step": 8736 }, { "epoch": 1.3963877567329976, "grad_norm": 1.653496317787797, "learning_rate": 4.583490519417811e-06, "loss": 0.6315751075744629, "step": 8737 }, { "epoch": 1.396547590505874, "grad_norm": 1.5430418858644293, "learning_rate": 4.5812692804225475e-06, "loss": 0.6080507040023804, "step": 8738 }, { "epoch": 1.39670742427875, "grad_norm": 1.5479599659901773, "learning_rate": 4.5790484198521966e-06, "loss": 0.41625940799713135, "step": 8739 }, { "epoch": 1.3968672580516264, "grad_norm": 1.5250599013777428, "learning_rate": 4.576827937861856e-06, "loss": 0.5194335579872131, "step": 8740 }, { "epoch": 1.3970270918245025, "grad_norm": 1.69846873850092, "learning_rate": 4.574607834606592e-06, "loss": 0.5621129870414734, "step": 8741 }, { "epoch": 1.3971869255973788, "grad_norm": 1.4377852786907903, "learning_rate": 4.572388110241457e-06, "loss": 0.4481002390384674, "step": 8742 }, { "epoch": 1.3973467593702549, "grad_norm": 1.4636629053841508, "learning_rate": 4.570168764921463e-06, "loss": 0.5431377291679382, "step": 8743 }, { "epoch": 1.3975065931431312, "grad_norm": 1.5218631562334457, "learning_rate": 4.567949798801603e-06, "loss": 0.6521410942077637, "step": 8744 }, { "epoch": 1.3976664269160073, "grad_norm": 4.040323038737812, "learning_rate": 4.565731212036841e-06, "loss": 0.6178537011146545, "step": 8745 }, { "epoch": 1.3978262606888836, "grad_norm": 1.7636329320741082, "learning_rate": 4.563513004782112e-06, "loss": 0.5517334938049316, "step": 8746 }, { "epoch": 1.3979860944617597, "grad_norm": 1.4942655027514946, "learning_rate": 4.5612951771923345e-06, "loss": 0.4848858118057251, "step": 8747 }, { "epoch": 1.398145928234636, "grad_norm": 1.485786948303758, "learning_rate": 4.55907772942239e-06, "loss": 0.5951975584030151, "step": 8748 }, { "epoch": 1.398305762007512, "grad_norm": 1.6084735434611732, "learning_rate": 4.556860661627137e-06, "loss": 0.6189110279083252, "step": 8749 }, { "epoch": 1.3984655957803884, "grad_norm": 1.4277136509383548, "learning_rate": 4.554643973961406e-06, "loss": 0.4808441996574402, "step": 8750 }, { "epoch": 1.3986254295532645, "grad_norm": 1.7104833771843926, "learning_rate": 4.5524276665800075e-06, "loss": 0.49109625816345215, "step": 8751 }, { "epoch": 1.3987852633261408, "grad_norm": 1.794144904483263, "learning_rate": 4.550211739637715e-06, "loss": 0.6581229567527771, "step": 8752 }, { "epoch": 1.3989450970990172, "grad_norm": 1.3570825639400308, "learning_rate": 4.547996193289286e-06, "loss": 0.4348772168159485, "step": 8753 }, { "epoch": 1.3991049308718932, "grad_norm": 1.5642224886882818, "learning_rate": 4.545781027689445e-06, "loss": 0.4701193571090698, "step": 8754 }, { "epoch": 1.3992647646447693, "grad_norm": 1.288190317818061, "learning_rate": 4.54356624299289e-06, "loss": 0.46256017684936523, "step": 8755 }, { "epoch": 1.3994245984176457, "grad_norm": 1.5648900542774826, "learning_rate": 4.54135183935429e-06, "loss": 0.4684874415397644, "step": 8756 }, { "epoch": 1.399584432190522, "grad_norm": 1.767175224192662, "learning_rate": 4.539137816928299e-06, "loss": 0.7041058540344238, "step": 8757 }, { "epoch": 1.399744265963398, "grad_norm": 1.4938321977589102, "learning_rate": 4.5369241758695325e-06, "loss": 0.5617564916610718, "step": 8758 }, { "epoch": 1.3999040997362742, "grad_norm": 1.7031213983136224, "learning_rate": 4.534710916332584e-06, "loss": 0.611793041229248, "step": 8759 }, { "epoch": 1.4000639335091505, "grad_norm": 1.6135650010418037, "learning_rate": 4.532498038472019e-06, "loss": 0.5457779765129089, "step": 8760 }, { "epoch": 1.4002237672820268, "grad_norm": 1.4889022356250776, "learning_rate": 4.530285542442374e-06, "loss": 0.5243356227874756, "step": 8761 }, { "epoch": 1.400383601054903, "grad_norm": 1.7037898631951047, "learning_rate": 4.528073428398169e-06, "loss": 0.42784255743026733, "step": 8762 }, { "epoch": 1.400543434827779, "grad_norm": 1.6699290361830177, "learning_rate": 4.525861696493886e-06, "loss": 0.6741397380828857, "step": 8763 }, { "epoch": 1.4007032686006553, "grad_norm": 1.5393956954231998, "learning_rate": 4.523650346883984e-06, "loss": 0.5682824850082397, "step": 8764 }, { "epoch": 1.4008631023735316, "grad_norm": 1.653652737842693, "learning_rate": 4.521439379722897e-06, "loss": 0.5340059995651245, "step": 8765 }, { "epoch": 1.4010229361464077, "grad_norm": 1.7094236758614718, "learning_rate": 4.5192287951650285e-06, "loss": 0.5895373821258545, "step": 8766 }, { "epoch": 1.4011827699192838, "grad_norm": 1.510606503800763, "learning_rate": 4.517018593364761e-06, "loss": 0.5234675407409668, "step": 8767 }, { "epoch": 1.4013426036921601, "grad_norm": 1.4525390354242391, "learning_rate": 4.514808774476448e-06, "loss": 0.5605525970458984, "step": 8768 }, { "epoch": 1.4015024374650364, "grad_norm": 1.8493843032951416, "learning_rate": 4.512599338654413e-06, "loss": 0.5477744936943054, "step": 8769 }, { "epoch": 1.4016622712379125, "grad_norm": 1.5712105798878921, "learning_rate": 4.510390286052953e-06, "loss": 0.5745337605476379, "step": 8770 }, { "epoch": 1.4018221050107889, "grad_norm": 1.4247812782198293, "learning_rate": 4.508181616826343e-06, "loss": 0.4082583785057068, "step": 8771 }, { "epoch": 1.401981938783665, "grad_norm": 2.3067657622578195, "learning_rate": 4.505973331128831e-06, "loss": 0.6246391534805298, "step": 8772 }, { "epoch": 1.4021417725565413, "grad_norm": 1.7217524335473284, "learning_rate": 4.503765429114635e-06, "loss": 0.4584830403327942, "step": 8773 }, { "epoch": 1.4023016063294174, "grad_norm": 1.4819817780494098, "learning_rate": 4.501557910937945e-06, "loss": 0.6012344360351562, "step": 8774 }, { "epoch": 1.4024614401022937, "grad_norm": 1.6852591728082345, "learning_rate": 4.4993507767529265e-06, "loss": 0.6039371490478516, "step": 8775 }, { "epoch": 1.4026212738751698, "grad_norm": 1.8294451613220322, "learning_rate": 4.497144026713714e-06, "loss": 0.550154447555542, "step": 8776 }, { "epoch": 1.402781107648046, "grad_norm": 1.570579673798425, "learning_rate": 4.494937660974429e-06, "loss": 0.5490906238555908, "step": 8777 }, { "epoch": 1.4029409414209222, "grad_norm": 1.7788521922938678, "learning_rate": 4.49273167968915e-06, "loss": 0.5075191259384155, "step": 8778 }, { "epoch": 1.4031007751937985, "grad_norm": 1.4072415402410399, "learning_rate": 4.490526083011935e-06, "loss": 0.49042844772338867, "step": 8779 }, { "epoch": 1.4032606089666746, "grad_norm": 1.327657877606711, "learning_rate": 4.488320871096815e-06, "loss": 0.45475876331329346, "step": 8780 }, { "epoch": 1.403420442739551, "grad_norm": 1.5373970216763337, "learning_rate": 4.486116044097793e-06, "loss": 0.6027973890304565, "step": 8781 }, { "epoch": 1.403580276512427, "grad_norm": 1.7113594557114473, "learning_rate": 4.48391160216885e-06, "loss": 0.6071265935897827, "step": 8782 }, { "epoch": 1.4037401102853033, "grad_norm": 1.6208803532711504, "learning_rate": 4.481707545463935e-06, "loss": 0.5364232063293457, "step": 8783 }, { "epoch": 1.4038999440581794, "grad_norm": 1.654886466971323, "learning_rate": 4.47950387413697e-06, "loss": 0.7292917370796204, "step": 8784 }, { "epoch": 1.4040597778310557, "grad_norm": 1.5290004627786034, "learning_rate": 4.477300588341849e-06, "loss": 0.4828561544418335, "step": 8785 }, { "epoch": 1.4042196116039318, "grad_norm": 1.4309553808141364, "learning_rate": 4.47509768823245e-06, "loss": 0.4642384648323059, "step": 8786 }, { "epoch": 1.4043794453768081, "grad_norm": 1.5038167585503313, "learning_rate": 4.47289517396261e-06, "loss": 0.4185413122177124, "step": 8787 }, { "epoch": 1.4045392791496845, "grad_norm": 1.4823707981719632, "learning_rate": 4.470693045686146e-06, "loss": 0.4778400659561157, "step": 8788 }, { "epoch": 1.4046991129225606, "grad_norm": 1.6003155111091185, "learning_rate": 4.468491303556847e-06, "loss": 0.5254011154174805, "step": 8789 }, { "epoch": 1.4048589466954367, "grad_norm": 1.6853820615870916, "learning_rate": 4.46628994772847e-06, "loss": 0.5172042846679688, "step": 8790 }, { "epoch": 1.405018780468313, "grad_norm": 2.042280199118846, "learning_rate": 4.464088978354753e-06, "loss": 0.5723779201507568, "step": 8791 }, { "epoch": 1.4051786142411893, "grad_norm": 2.123695834311624, "learning_rate": 4.461888395589411e-06, "loss": 0.5634088516235352, "step": 8792 }, { "epoch": 1.4053384480140654, "grad_norm": 1.5777215438955985, "learning_rate": 4.459688199586117e-06, "loss": 0.6225572228431702, "step": 8793 }, { "epoch": 1.4054982817869415, "grad_norm": 1.615119657185744, "learning_rate": 4.4574883904985286e-06, "loss": 0.6206678152084351, "step": 8794 }, { "epoch": 1.4056581155598178, "grad_norm": 1.5132204234772644, "learning_rate": 4.45528896848027e-06, "loss": 0.48245173692703247, "step": 8795 }, { "epoch": 1.405817949332694, "grad_norm": 1.4412328441663753, "learning_rate": 4.453089933684938e-06, "loss": 0.45901763439178467, "step": 8796 }, { "epoch": 1.4059777831055702, "grad_norm": 1.3734497937548975, "learning_rate": 4.450891286266113e-06, "loss": 0.5609253644943237, "step": 8797 }, { "epoch": 1.4061376168784463, "grad_norm": 1.6077747937497373, "learning_rate": 4.448693026377337e-06, "loss": 0.6437606811523438, "step": 8798 }, { "epoch": 1.4062974506513226, "grad_norm": 1.9097374681825068, "learning_rate": 4.4464951541721295e-06, "loss": 0.5396495461463928, "step": 8799 }, { "epoch": 1.406457284424199, "grad_norm": 1.8481875562960073, "learning_rate": 4.444297669803981e-06, "loss": 0.568897008895874, "step": 8800 }, { "epoch": 1.406617118197075, "grad_norm": 1.3651903654274131, "learning_rate": 4.442100573426352e-06, "loss": 0.53602135181427, "step": 8801 }, { "epoch": 1.4067769519699511, "grad_norm": 1.8781810195215394, "learning_rate": 4.4399038651926885e-06, "loss": 0.5529435276985168, "step": 8802 }, { "epoch": 1.4069367857428274, "grad_norm": 1.6572347579476208, "learning_rate": 4.437707545256396e-06, "loss": 0.5689260959625244, "step": 8803 }, { "epoch": 1.4070966195157038, "grad_norm": 1.840936652726751, "learning_rate": 4.435511613770857e-06, "loss": 0.5700998306274414, "step": 8804 }, { "epoch": 1.4072564532885798, "grad_norm": 1.7130852808115573, "learning_rate": 4.433316070889427e-06, "loss": 0.5420861840248108, "step": 8805 }, { "epoch": 1.4074162870614562, "grad_norm": 1.5052075413755497, "learning_rate": 4.43112091676544e-06, "loss": 0.714911162853241, "step": 8806 }, { "epoch": 1.4075761208343323, "grad_norm": 1.5527801946170556, "learning_rate": 4.428926151552194e-06, "loss": 0.5389871597290039, "step": 8807 }, { "epoch": 1.4077359546072086, "grad_norm": 1.5205589439580107, "learning_rate": 4.426731775402965e-06, "loss": 0.6276928186416626, "step": 8808 }, { "epoch": 1.4078957883800847, "grad_norm": 1.6452753350502511, "learning_rate": 4.424537788470999e-06, "loss": 0.7049641013145447, "step": 8809 }, { "epoch": 1.408055622152961, "grad_norm": 1.7627953481769991, "learning_rate": 4.422344190909512e-06, "loss": 0.632911205291748, "step": 8810 }, { "epoch": 1.408215455925837, "grad_norm": 1.4474563852212965, "learning_rate": 4.420150982871703e-06, "loss": 0.5527975559234619, "step": 8811 }, { "epoch": 1.4083752896987134, "grad_norm": 1.7761072576989068, "learning_rate": 4.41795816451074e-06, "loss": 0.5535116791725159, "step": 8812 }, { "epoch": 1.4085351234715895, "grad_norm": 1.4314979729964752, "learning_rate": 4.415765735979758e-06, "loss": 0.42532578110694885, "step": 8813 }, { "epoch": 1.4086949572444658, "grad_norm": 1.6304128057473126, "learning_rate": 4.413573697431868e-06, "loss": 0.5396193861961365, "step": 8814 }, { "epoch": 1.408854791017342, "grad_norm": 1.7579547811432303, "learning_rate": 4.411382049020156e-06, "loss": 0.507027268409729, "step": 8815 }, { "epoch": 1.4090146247902182, "grad_norm": 1.3402691879335655, "learning_rate": 4.409190790897675e-06, "loss": 0.5096155405044556, "step": 8816 }, { "epoch": 1.4091744585630943, "grad_norm": 1.473649651132706, "learning_rate": 4.40699992321746e-06, "loss": 0.5361068248748779, "step": 8817 }, { "epoch": 1.4093342923359706, "grad_norm": 1.5985214192939767, "learning_rate": 4.404809446132511e-06, "loss": 0.47385373711586, "step": 8818 }, { "epoch": 1.4094941261088467, "grad_norm": 1.3759396011722898, "learning_rate": 4.402619359795803e-06, "loss": 0.48657214641571045, "step": 8819 }, { "epoch": 1.409653959881723, "grad_norm": 1.4931904708050903, "learning_rate": 4.400429664360281e-06, "loss": 0.4626445174217224, "step": 8820 }, { "epoch": 1.4098137936545991, "grad_norm": 1.2669274653264047, "learning_rate": 4.398240359978871e-06, "loss": 0.41957366466522217, "step": 8821 }, { "epoch": 1.4099736274274755, "grad_norm": 1.388122952741803, "learning_rate": 4.396051446804466e-06, "loss": 0.4591323733329773, "step": 8822 }, { "epoch": 1.4101334612003518, "grad_norm": 1.5871617121519281, "learning_rate": 4.3938629249899276e-06, "loss": 0.5773614048957825, "step": 8823 }, { "epoch": 1.4102932949732279, "grad_norm": 1.496603156674968, "learning_rate": 4.391674794688097e-06, "loss": 0.5537420511245728, "step": 8824 }, { "epoch": 1.410453128746104, "grad_norm": 1.3738802353096948, "learning_rate": 4.389487056051781e-06, "loss": 0.5654962062835693, "step": 8825 }, { "epoch": 1.4106129625189803, "grad_norm": 2.1345580572476304, "learning_rate": 4.387299709233772e-06, "loss": 0.4618731737136841, "step": 8826 }, { "epoch": 1.4107727962918566, "grad_norm": 1.5296394451239315, "learning_rate": 4.385112754386821e-06, "loss": 0.552125096321106, "step": 8827 }, { "epoch": 1.4109326300647327, "grad_norm": 1.2639480255088893, "learning_rate": 4.382926191663659e-06, "loss": 0.5016537308692932, "step": 8828 }, { "epoch": 1.4110924638376088, "grad_norm": 1.8406799240007374, "learning_rate": 4.3807400212169875e-06, "loss": 0.6090797185897827, "step": 8829 }, { "epoch": 1.411252297610485, "grad_norm": 1.5161248019335565, "learning_rate": 4.378554243199475e-06, "loss": 0.549418568611145, "step": 8830 }, { "epoch": 1.4114121313833614, "grad_norm": 1.931564630972812, "learning_rate": 4.376368857763774e-06, "loss": 0.6182624101638794, "step": 8831 }, { "epoch": 1.4115719651562375, "grad_norm": 1.4087480503894805, "learning_rate": 4.374183865062508e-06, "loss": 0.5389872789382935, "step": 8832 }, { "epoch": 1.4117317989291136, "grad_norm": 1.3558361479717047, "learning_rate": 4.3719992652482645e-06, "loss": 0.5548558235168457, "step": 8833 }, { "epoch": 1.41189163270199, "grad_norm": 1.5401405540759334, "learning_rate": 4.36981505847361e-06, "loss": 0.5222291946411133, "step": 8834 }, { "epoch": 1.4120514664748662, "grad_norm": 1.4530270838818093, "learning_rate": 4.367631244891075e-06, "loss": 0.48205333948135376, "step": 8835 }, { "epoch": 1.4122113002477423, "grad_norm": 1.4637161379778605, "learning_rate": 4.36544782465318e-06, "loss": 0.4010556638240814, "step": 8836 }, { "epoch": 1.4123711340206184, "grad_norm": 1.5838343043945018, "learning_rate": 4.3632647979124e-06, "loss": 0.601683497428894, "step": 8837 }, { "epoch": 1.4125309677934947, "grad_norm": 1.6735795251651222, "learning_rate": 4.361082164821193e-06, "loss": 0.5621413588523865, "step": 8838 }, { "epoch": 1.412690801566371, "grad_norm": 1.8110736042381625, "learning_rate": 4.358899925531985e-06, "loss": 0.6405220031738281, "step": 8839 }, { "epoch": 1.4128506353392472, "grad_norm": 1.7550132408493835, "learning_rate": 4.356718080197172e-06, "loss": 0.5360561609268188, "step": 8840 }, { "epoch": 1.4130104691121235, "grad_norm": 1.650399389048549, "learning_rate": 4.354536628969135e-06, "loss": 0.5489741563796997, "step": 8841 }, { "epoch": 1.4131703028849996, "grad_norm": 1.5811993610509258, "learning_rate": 4.352355572000213e-06, "loss": 0.4668614864349365, "step": 8842 }, { "epoch": 1.4133301366578759, "grad_norm": 1.5772395599513853, "learning_rate": 4.3501749094427244e-06, "loss": 0.5906590819358826, "step": 8843 }, { "epoch": 1.413489970430752, "grad_norm": 1.7702876188597017, "learning_rate": 4.3479946414489606e-06, "loss": 0.6070581674575806, "step": 8844 }, { "epoch": 1.4136498042036283, "grad_norm": 1.523760248444977, "learning_rate": 4.345814768171177e-06, "loss": 0.46659305691719055, "step": 8845 }, { "epoch": 1.4138096379765044, "grad_norm": 1.386243768925838, "learning_rate": 4.343635289761617e-06, "loss": 0.425180047750473, "step": 8846 }, { "epoch": 1.4139694717493807, "grad_norm": 1.8022048850446433, "learning_rate": 4.341456206372485e-06, "loss": 0.6079328060150146, "step": 8847 }, { "epoch": 1.4141293055222568, "grad_norm": 1.5583160452448268, "learning_rate": 4.339277518155959e-06, "loss": 0.5381304025650024, "step": 8848 }, { "epoch": 1.4142891392951331, "grad_norm": 1.7596046429179897, "learning_rate": 4.3370992252641875e-06, "loss": 0.6701634526252747, "step": 8849 }, { "epoch": 1.4144489730680092, "grad_norm": 1.7881974849707196, "learning_rate": 4.334921327849302e-06, "loss": 0.49660414457321167, "step": 8850 }, { "epoch": 1.4146088068408855, "grad_norm": 1.7313973712511752, "learning_rate": 4.332743826063392e-06, "loss": 0.5076868534088135, "step": 8851 }, { "epoch": 1.4147686406137616, "grad_norm": 1.3992145677722898, "learning_rate": 4.330566720058534e-06, "loss": 0.4708552360534668, "step": 8852 }, { "epoch": 1.414928474386638, "grad_norm": 1.4564537006363145, "learning_rate": 4.328390009986765e-06, "loss": 0.48069992661476135, "step": 8853 }, { "epoch": 1.415088308159514, "grad_norm": 1.6030814911605176, "learning_rate": 4.3262136960001e-06, "loss": 0.6374919414520264, "step": 8854 }, { "epoch": 1.4152481419323903, "grad_norm": 1.6030970170616334, "learning_rate": 4.324037778250519e-06, "loss": 0.6333277821540833, "step": 8855 }, { "epoch": 1.4154079757052664, "grad_norm": 1.707914402600243, "learning_rate": 4.3218622568899914e-06, "loss": 0.44183671474456787, "step": 8856 }, { "epoch": 1.4155678094781428, "grad_norm": 1.6850885900965318, "learning_rate": 4.319687132070442e-06, "loss": 0.6286724805831909, "step": 8857 }, { "epoch": 1.415727643251019, "grad_norm": 1.6820585627738334, "learning_rate": 4.3175124039437725e-06, "loss": 0.7013682126998901, "step": 8858 }, { "epoch": 1.4158874770238952, "grad_norm": 1.6198152422537333, "learning_rate": 4.31533807266186e-06, "loss": 0.5231615900993347, "step": 8859 }, { "epoch": 1.4160473107967713, "grad_norm": 1.5352929669462576, "learning_rate": 4.3131641383765474e-06, "loss": 0.5716443061828613, "step": 8860 }, { "epoch": 1.4162071445696476, "grad_norm": 1.6864743771058, "learning_rate": 4.310990601239663e-06, "loss": 0.607368528842926, "step": 8861 }, { "epoch": 1.416366978342524, "grad_norm": 1.6465331724697878, "learning_rate": 4.308817461402994e-06, "loss": 0.5285034775733948, "step": 8862 }, { "epoch": 1.4165268121154, "grad_norm": 1.6260733732513217, "learning_rate": 4.306644719018307e-06, "loss": 0.5603325963020325, "step": 8863 }, { "epoch": 1.416686645888276, "grad_norm": 1.568357255257431, "learning_rate": 4.304472374237333e-06, "loss": 0.580291748046875, "step": 8864 }, { "epoch": 1.4168464796611524, "grad_norm": 1.291210733061916, "learning_rate": 4.302300427211787e-06, "loss": 0.5103487968444824, "step": 8865 }, { "epoch": 1.4170063134340287, "grad_norm": 1.4488386451357513, "learning_rate": 4.30012887809335e-06, "loss": 0.4524868130683899, "step": 8866 }, { "epoch": 1.4171661472069048, "grad_norm": 1.7266959977065908, "learning_rate": 4.297957727033673e-06, "loss": 0.6411375403404236, "step": 8867 }, { "epoch": 1.417325980979781, "grad_norm": 1.785563551001042, "learning_rate": 4.295786974184381e-06, "loss": 0.5945384502410889, "step": 8868 }, { "epoch": 1.4174858147526572, "grad_norm": 1.8135311247945218, "learning_rate": 4.293616619697069e-06, "loss": 0.5125095844268799, "step": 8869 }, { "epoch": 1.4176456485255335, "grad_norm": 1.463329869617896, "learning_rate": 4.291446663723314e-06, "loss": 0.5843525528907776, "step": 8870 }, { "epoch": 1.4178054822984096, "grad_norm": 1.730653634816504, "learning_rate": 4.289277106414651e-06, "loss": 0.609208881855011, "step": 8871 }, { "epoch": 1.4179653160712857, "grad_norm": 1.4866115873853536, "learning_rate": 4.287107947922601e-06, "loss": 0.5291969180107117, "step": 8872 }, { "epoch": 1.418125149844162, "grad_norm": 1.3795490822897851, "learning_rate": 4.284939188398647e-06, "loss": 0.4615419805049896, "step": 8873 }, { "epoch": 1.4182849836170384, "grad_norm": 1.4632595607389138, "learning_rate": 4.282770827994247e-06, "loss": 0.5171020030975342, "step": 8874 }, { "epoch": 1.4184448173899145, "grad_norm": 1.6053041961348689, "learning_rate": 4.280602866860828e-06, "loss": 0.4605368375778198, "step": 8875 }, { "epoch": 1.4186046511627908, "grad_norm": 1.4680294078135043, "learning_rate": 4.2784353051498e-06, "loss": 0.5814339518547058, "step": 8876 }, { "epoch": 1.4187644849356669, "grad_norm": 1.4118649366239375, "learning_rate": 4.2762681430125354e-06, "loss": 0.47453099489212036, "step": 8877 }, { "epoch": 1.4189243187085432, "grad_norm": 1.5928464751851126, "learning_rate": 4.274101380600379e-06, "loss": 0.5874607563018799, "step": 8878 }, { "epoch": 1.4190841524814193, "grad_norm": 1.4958502533649656, "learning_rate": 4.271935018064651e-06, "loss": 0.6025033593177795, "step": 8879 }, { "epoch": 1.4192439862542956, "grad_norm": 1.6540633522715178, "learning_rate": 4.26976905555664e-06, "loss": 0.5122642517089844, "step": 8880 }, { "epoch": 1.4194038200271717, "grad_norm": 1.37579136895311, "learning_rate": 4.267603493227614e-06, "loss": 0.6537661552429199, "step": 8881 }, { "epoch": 1.419563653800048, "grad_norm": 1.725131949064942, "learning_rate": 4.265438331228806e-06, "loss": 0.6188651323318481, "step": 8882 }, { "epoch": 1.419723487572924, "grad_norm": 1.7395210014525895, "learning_rate": 4.263273569711423e-06, "loss": 0.6579760313034058, "step": 8883 }, { "epoch": 1.4198833213458004, "grad_norm": 1.640771280064068, "learning_rate": 4.261109208826641e-06, "loss": 0.4597015380859375, "step": 8884 }, { "epoch": 1.4200431551186765, "grad_norm": 1.5176797619183733, "learning_rate": 4.2589452487256165e-06, "loss": 0.47388705611228943, "step": 8885 }, { "epoch": 1.4202029888915528, "grad_norm": 1.3940456194464785, "learning_rate": 4.256781689559472e-06, "loss": 0.48922795057296753, "step": 8886 }, { "epoch": 1.420362822664429, "grad_norm": 1.4842136246489206, "learning_rate": 4.254618531479301e-06, "loss": 0.5197547674179077, "step": 8887 }, { "epoch": 1.4205226564373052, "grad_norm": 1.4325860707855491, "learning_rate": 4.252455774636172e-06, "loss": 0.5331559181213379, "step": 8888 }, { "epoch": 1.4206824902101813, "grad_norm": 1.5897479029291925, "learning_rate": 4.250293419181118e-06, "loss": 0.6038933396339417, "step": 8889 }, { "epoch": 1.4208423239830577, "grad_norm": 1.5026475330787108, "learning_rate": 4.248131465265162e-06, "loss": 0.6305426359176636, "step": 8890 }, { "epoch": 1.4210021577559337, "grad_norm": 1.6350454512498136, "learning_rate": 4.245969913039276e-06, "loss": 0.5766302347183228, "step": 8891 }, { "epoch": 1.42116199152881, "grad_norm": 1.588183134301617, "learning_rate": 4.243808762654425e-06, "loss": 0.5123973488807678, "step": 8892 }, { "epoch": 1.4213218253016864, "grad_norm": 1.3977154106458354, "learning_rate": 4.241648014261529e-06, "loss": 0.44047772884368896, "step": 8893 }, { "epoch": 1.4214816590745625, "grad_norm": 1.4734112151451475, "learning_rate": 4.23948766801149e-06, "loss": 0.4216310381889343, "step": 8894 }, { "epoch": 1.4216414928474386, "grad_norm": 1.8848310684571892, "learning_rate": 4.237327724055174e-06, "loss": 0.6157472133636475, "step": 8895 }, { "epoch": 1.4218013266203149, "grad_norm": 1.4404689304758196, "learning_rate": 4.235168182543433e-06, "loss": 0.5918449759483337, "step": 8896 }, { "epoch": 1.4219611603931912, "grad_norm": 1.5162658863607392, "learning_rate": 4.233009043627077e-06, "loss": 0.5255346298217773, "step": 8897 }, { "epoch": 1.4221209941660673, "grad_norm": 1.6336459216534887, "learning_rate": 4.2308503074568894e-06, "loss": 0.53822922706604, "step": 8898 }, { "epoch": 1.4222808279389434, "grad_norm": 1.4843658157645963, "learning_rate": 4.2286919741836306e-06, "loss": 0.519048810005188, "step": 8899 }, { "epoch": 1.4224406617118197, "grad_norm": 1.3978947967239703, "learning_rate": 4.226534043958034e-06, "loss": 0.4934576451778412, "step": 8900 }, { "epoch": 1.422600495484696, "grad_norm": 1.745223385084935, "learning_rate": 4.224376516930801e-06, "loss": 0.5581388473510742, "step": 8901 }, { "epoch": 1.4227603292575721, "grad_norm": 1.7509255496123604, "learning_rate": 4.2222193932526025e-06, "loss": 0.3899250626564026, "step": 8902 }, { "epoch": 1.4229201630304482, "grad_norm": 1.4701544041970254, "learning_rate": 4.220062673074086e-06, "loss": 0.5324234962463379, "step": 8903 }, { "epoch": 1.4230799968033245, "grad_norm": 1.4715974210932594, "learning_rate": 4.217906356545867e-06, "loss": 0.5647696852684021, "step": 8904 }, { "epoch": 1.4232398305762008, "grad_norm": 1.3719097421842261, "learning_rate": 4.215750443818539e-06, "loss": 0.46222448348999023, "step": 8905 }, { "epoch": 1.423399664349077, "grad_norm": 1.5157296432732033, "learning_rate": 4.213594935042663e-06, "loss": 0.549852728843689, "step": 8906 }, { "epoch": 1.423559498121953, "grad_norm": 1.7995527048828859, "learning_rate": 4.211439830368771e-06, "loss": 0.5215109586715698, "step": 8907 }, { "epoch": 1.4237193318948294, "grad_norm": 1.6232107017584236, "learning_rate": 4.209285129947367e-06, "loss": 0.41718462109565735, "step": 8908 }, { "epoch": 1.4238791656677057, "grad_norm": 1.5231958152937015, "learning_rate": 4.207130833928924e-06, "loss": 0.5676957368850708, "step": 8909 }, { "epoch": 1.4240389994405818, "grad_norm": 1.812688479921924, "learning_rate": 4.204976942463899e-06, "loss": 0.5985423922538757, "step": 8910 }, { "epoch": 1.424198833213458, "grad_norm": 1.6167473139384922, "learning_rate": 4.202823455702703e-06, "loss": 0.570427656173706, "step": 8911 }, { "epoch": 1.4243586669863342, "grad_norm": 1.52484988624914, "learning_rate": 4.200670373795735e-06, "loss": 0.6830257177352905, "step": 8912 }, { "epoch": 1.4245185007592105, "grad_norm": 1.8413329979394408, "learning_rate": 4.198517696893358e-06, "loss": 0.6271561980247498, "step": 8913 }, { "epoch": 1.4246783345320866, "grad_norm": 1.5974593083931425, "learning_rate": 4.196365425145902e-06, "loss": 0.58101487159729, "step": 8914 }, { "epoch": 1.424838168304963, "grad_norm": 1.716301511366818, "learning_rate": 4.19421355870368e-06, "loss": 0.5431299209594727, "step": 8915 }, { "epoch": 1.424998002077839, "grad_norm": 1.7334064070733908, "learning_rate": 4.1920620977169695e-06, "loss": 0.5831018686294556, "step": 8916 }, { "epoch": 1.4251578358507153, "grad_norm": 1.7390705185954776, "learning_rate": 4.1899110423360175e-06, "loss": 0.5736610293388367, "step": 8917 }, { "epoch": 1.4253176696235914, "grad_norm": 1.5228773229910773, "learning_rate": 4.187760392711049e-06, "loss": 0.5831470489501953, "step": 8918 }, { "epoch": 1.4254775033964677, "grad_norm": 1.694645920759342, "learning_rate": 4.185610148992253e-06, "loss": 0.4828738570213318, "step": 8919 }, { "epoch": 1.4256373371693438, "grad_norm": 1.5932934320531749, "learning_rate": 4.183460311329804e-06, "loss": 0.4978991150856018, "step": 8920 }, { "epoch": 1.4257971709422201, "grad_norm": 1.861422738045684, "learning_rate": 4.181310879873831e-06, "loss": 0.6390178799629211, "step": 8921 }, { "epoch": 1.4259570047150962, "grad_norm": 1.6743206109226594, "learning_rate": 4.179161854774447e-06, "loss": 0.5713216066360474, "step": 8922 }, { "epoch": 1.4261168384879725, "grad_norm": 1.761769044841478, "learning_rate": 4.17701323618173e-06, "loss": 0.5848338603973389, "step": 8923 }, { "epoch": 1.4262766722608486, "grad_norm": 1.5419488652000912, "learning_rate": 4.17486502424573e-06, "loss": 0.5923873782157898, "step": 8924 }, { "epoch": 1.426436506033725, "grad_norm": 1.3944111241002368, "learning_rate": 4.172717219116475e-06, "loss": 0.5653753280639648, "step": 8925 }, { "epoch": 1.426596339806601, "grad_norm": 1.42555645840901, "learning_rate": 4.1705698209439595e-06, "loss": 0.49367618560791016, "step": 8926 }, { "epoch": 1.4267561735794774, "grad_norm": 1.3673660644349248, "learning_rate": 4.168422829878148e-06, "loss": 0.5504447817802429, "step": 8927 }, { "epoch": 1.4269160073523537, "grad_norm": 1.6334614341369502, "learning_rate": 4.166276246068976e-06, "loss": 0.5436683893203735, "step": 8928 }, { "epoch": 1.4270758411252298, "grad_norm": 1.6379389503193036, "learning_rate": 4.164130069666361e-06, "loss": 0.5932444334030151, "step": 8929 }, { "epoch": 1.4272356748981059, "grad_norm": 1.5939815495726952, "learning_rate": 4.161984300820181e-06, "loss": 0.5317251086235046, "step": 8930 }, { "epoch": 1.4273955086709822, "grad_norm": 1.7477929835193216, "learning_rate": 4.159838939680283e-06, "loss": 0.7217018008232117, "step": 8931 }, { "epoch": 1.4275553424438585, "grad_norm": 1.7771913587496793, "learning_rate": 4.1576939863965e-06, "loss": 0.6473537683486938, "step": 8932 }, { "epoch": 1.4277151762167346, "grad_norm": 1.2986151004209083, "learning_rate": 4.155549441118625e-06, "loss": 0.6141012907028198, "step": 8933 }, { "epoch": 1.4278750099896107, "grad_norm": 1.6944846116484813, "learning_rate": 4.153405303996422e-06, "loss": 0.5386379957199097, "step": 8934 }, { "epoch": 1.428034843762487, "grad_norm": 1.5163725681423825, "learning_rate": 4.151261575179635e-06, "loss": 0.5210392475128174, "step": 8935 }, { "epoch": 1.4281946775353633, "grad_norm": 1.5383742805795706, "learning_rate": 4.149118254817972e-06, "loss": 0.5035746097564697, "step": 8936 }, { "epoch": 1.4283545113082394, "grad_norm": 1.8874400507665292, "learning_rate": 4.146975343061116e-06, "loss": 0.7008488178253174, "step": 8937 }, { "epoch": 1.4285143450811155, "grad_norm": 1.5041225632548636, "learning_rate": 4.144832840058719e-06, "loss": 0.4369146227836609, "step": 8938 }, { "epoch": 1.4286741788539918, "grad_norm": 1.4509176678727695, "learning_rate": 4.1426907459604015e-06, "loss": 0.5409090518951416, "step": 8939 }, { "epoch": 1.4288340126268682, "grad_norm": 1.4858995388907363, "learning_rate": 4.14054906091577e-06, "loss": 0.5345048904418945, "step": 8940 }, { "epoch": 1.4289938463997442, "grad_norm": 1.5098875535543816, "learning_rate": 4.138407785074385e-06, "loss": 0.57447350025177, "step": 8941 }, { "epoch": 1.4291536801726203, "grad_norm": 1.6400415787868128, "learning_rate": 4.136266918585787e-06, "loss": 0.5261490345001221, "step": 8942 }, { "epoch": 1.4293135139454967, "grad_norm": 1.5691450762093822, "learning_rate": 4.134126461599488e-06, "loss": 0.4709171652793884, "step": 8943 }, { "epoch": 1.429473347718373, "grad_norm": 1.4870634410538448, "learning_rate": 4.131986414264964e-06, "loss": 0.5000861287117004, "step": 8944 }, { "epoch": 1.429633181491249, "grad_norm": 1.5069729271163301, "learning_rate": 4.129846776731677e-06, "loss": 0.5302438139915466, "step": 8945 }, { "epoch": 1.4297930152641254, "grad_norm": 1.4124322451541982, "learning_rate": 4.127707549149048e-06, "loss": 0.4983415901660919, "step": 8946 }, { "epoch": 1.4299528490370015, "grad_norm": 1.596758171618606, "learning_rate": 4.125568731666473e-06, "loss": 0.6334347724914551, "step": 8947 }, { "epoch": 1.4301126828098778, "grad_norm": 1.4749092273821132, "learning_rate": 4.123430324433314e-06, "loss": 0.5100482702255249, "step": 8948 }, { "epoch": 1.430272516582754, "grad_norm": 1.4904328959942992, "learning_rate": 4.121292327598919e-06, "loss": 0.493796169757843, "step": 8949 }, { "epoch": 1.4304323503556302, "grad_norm": 1.4351502099418376, "learning_rate": 4.1191547413125945e-06, "loss": 0.39002060890197754, "step": 8950 }, { "epoch": 1.4305921841285063, "grad_norm": 1.8594473098353996, "learning_rate": 4.1170175657236175e-06, "loss": 0.5441782474517822, "step": 8951 }, { "epoch": 1.4307520179013826, "grad_norm": 1.5184696138105294, "learning_rate": 4.114880800981248e-06, "loss": 0.5269293785095215, "step": 8952 }, { "epoch": 1.4309118516742587, "grad_norm": 1.605272821048912, "learning_rate": 4.112744447234708e-06, "loss": 0.6509151458740234, "step": 8953 }, { "epoch": 1.431071685447135, "grad_norm": 1.599162848284165, "learning_rate": 4.110608504633186e-06, "loss": 0.5488713979721069, "step": 8954 }, { "epoch": 1.4312315192200111, "grad_norm": 1.6416117828828996, "learning_rate": 4.1084729733258585e-06, "loss": 0.6663200855255127, "step": 8955 }, { "epoch": 1.4313913529928874, "grad_norm": 1.501371505610752, "learning_rate": 4.10633785346186e-06, "loss": 0.4761373996734619, "step": 8956 }, { "epoch": 1.4315511867657635, "grad_norm": 1.6048932177849806, "learning_rate": 4.104203145190299e-06, "loss": 0.49403107166290283, "step": 8957 }, { "epoch": 1.4317110205386399, "grad_norm": 1.4241088995454176, "learning_rate": 4.102068848660254e-06, "loss": 0.44121426343917847, "step": 8958 }, { "epoch": 1.431870854311516, "grad_norm": 1.3699710570714128, "learning_rate": 4.0999349640207766e-06, "loss": 0.5548380613327026, "step": 8959 }, { "epoch": 1.4320306880843923, "grad_norm": 1.603753007683083, "learning_rate": 4.0978014914208945e-06, "loss": 0.4690110981464386, "step": 8960 }, { "epoch": 1.4321905218572684, "grad_norm": 1.5882610501942442, "learning_rate": 4.095668431009599e-06, "loss": 0.4977976381778717, "step": 8961 }, { "epoch": 1.4323503556301447, "grad_norm": 1.397600458930942, "learning_rate": 4.093535782935856e-06, "loss": 0.6096994876861572, "step": 8962 }, { "epoch": 1.432510189403021, "grad_norm": 1.7269908520527728, "learning_rate": 4.091403547348597e-06, "loss": 0.4897992014884949, "step": 8963 }, { "epoch": 1.432670023175897, "grad_norm": 1.3076731760108808, "learning_rate": 4.089271724396739e-06, "loss": 0.4921388030052185, "step": 8964 }, { "epoch": 1.4328298569487732, "grad_norm": 1.5858386638906719, "learning_rate": 4.087140314229154e-06, "loss": 0.54243403673172, "step": 8965 }, { "epoch": 1.4329896907216495, "grad_norm": 1.4142582229057308, "learning_rate": 4.085009316994697e-06, "loss": 0.5270304083824158, "step": 8966 }, { "epoch": 1.4331495244945258, "grad_norm": 1.6643263857643462, "learning_rate": 4.082878732842185e-06, "loss": 0.5089865922927856, "step": 8967 }, { "epoch": 1.433309358267402, "grad_norm": 1.7238451105077353, "learning_rate": 4.080748561920409e-06, "loss": 0.5287598371505737, "step": 8968 }, { "epoch": 1.433469192040278, "grad_norm": 1.567362692051125, "learning_rate": 4.078618804378139e-06, "loss": 0.6315202713012695, "step": 8969 }, { "epoch": 1.4336290258131543, "grad_norm": 1.533903248364898, "learning_rate": 4.076489460364106e-06, "loss": 0.5226958394050598, "step": 8970 }, { "epoch": 1.4337888595860306, "grad_norm": 1.5813662155113999, "learning_rate": 4.074360530027013e-06, "loss": 0.5389729738235474, "step": 8971 }, { "epoch": 1.4339486933589067, "grad_norm": 1.8704636697759547, "learning_rate": 4.072232013515544e-06, "loss": 0.6620226502418518, "step": 8972 }, { "epoch": 1.4341085271317828, "grad_norm": 1.569459886675561, "learning_rate": 4.070103910978341e-06, "loss": 0.5559988021850586, "step": 8973 }, { "epoch": 1.4342683609046591, "grad_norm": 1.9087531090984182, "learning_rate": 4.067976222564024e-06, "loss": 0.5618147850036621, "step": 8974 }, { "epoch": 1.4344281946775355, "grad_norm": 1.5084836711911858, "learning_rate": 4.065848948421187e-06, "loss": 0.45607447624206543, "step": 8975 }, { "epoch": 1.4345880284504116, "grad_norm": 3.603725660399116, "learning_rate": 4.063722088698388e-06, "loss": 0.5796927809715271, "step": 8976 }, { "epoch": 1.4347478622232877, "grad_norm": 1.4561955919989595, "learning_rate": 4.061595643544162e-06, "loss": 0.5142892003059387, "step": 8977 }, { "epoch": 1.434907695996164, "grad_norm": 1.5787355078357115, "learning_rate": 4.059469613107005e-06, "loss": 0.5990782380104065, "step": 8978 }, { "epoch": 1.4350675297690403, "grad_norm": 1.7412205185576861, "learning_rate": 4.057343997535402e-06, "loss": 0.5349290370941162, "step": 8979 }, { "epoch": 1.4352273635419164, "grad_norm": 1.5696545356290985, "learning_rate": 4.0552187969777925e-06, "loss": 0.4981725811958313, "step": 8980 }, { "epoch": 1.4353871973147927, "grad_norm": 1.8079677177465296, "learning_rate": 4.053094011582593e-06, "loss": 0.5275477170944214, "step": 8981 }, { "epoch": 1.4355470310876688, "grad_norm": 1.550289271012162, "learning_rate": 4.050969641498194e-06, "loss": 0.4348074495792389, "step": 8982 }, { "epoch": 1.435706864860545, "grad_norm": 1.625198707495657, "learning_rate": 4.048845686872945e-06, "loss": 0.5093809366226196, "step": 8983 }, { "epoch": 1.4358666986334212, "grad_norm": 1.687556727776559, "learning_rate": 4.046722147855188e-06, "loss": 0.6438405513763428, "step": 8984 }, { "epoch": 1.4360265324062975, "grad_norm": 1.6084849058745718, "learning_rate": 4.044599024593216e-06, "loss": 0.4876861572265625, "step": 8985 }, { "epoch": 1.4361863661791736, "grad_norm": 1.5420188425406895, "learning_rate": 4.0424763172353034e-06, "loss": 0.6090084314346313, "step": 8986 }, { "epoch": 1.43634619995205, "grad_norm": 1.6678431598765393, "learning_rate": 4.0403540259296905e-06, "loss": 0.6087539196014404, "step": 8987 }, { "epoch": 1.436506033724926, "grad_norm": 1.637354150239202, "learning_rate": 4.038232150824587e-06, "loss": 0.5419595837593079, "step": 8988 }, { "epoch": 1.4366658674978023, "grad_norm": 1.7355697566029127, "learning_rate": 4.036110692068186e-06, "loss": 0.6317082047462463, "step": 8989 }, { "epoch": 1.4368257012706784, "grad_norm": 1.8097034549795785, "learning_rate": 4.033989649808638e-06, "loss": 0.5736058354377747, "step": 8990 }, { "epoch": 1.4369855350435548, "grad_norm": 1.444288678684277, "learning_rate": 4.031869024194064e-06, "loss": 0.48866018652915955, "step": 8991 }, { "epoch": 1.4371453688164308, "grad_norm": 1.6057318298213772, "learning_rate": 4.02974881537257e-06, "loss": 0.5041208863258362, "step": 8992 }, { "epoch": 1.4373052025893072, "grad_norm": 1.4335401756141384, "learning_rate": 4.027629023492217e-06, "loss": 0.35740235447883606, "step": 8993 }, { "epoch": 1.4374650363621833, "grad_norm": 1.5189890475715688, "learning_rate": 4.0255096487010495e-06, "loss": 0.500372052192688, "step": 8994 }, { "epoch": 1.4376248701350596, "grad_norm": 1.7134508950823502, "learning_rate": 4.023390691147074e-06, "loss": 0.7431068420410156, "step": 8995 }, { "epoch": 1.4377847039079357, "grad_norm": 1.504166274097537, "learning_rate": 4.021272150978271e-06, "loss": 0.5547544956207275, "step": 8996 }, { "epoch": 1.437944537680812, "grad_norm": 1.4079786973460775, "learning_rate": 4.019154028342592e-06, "loss": 0.4606959819793701, "step": 8997 }, { "epoch": 1.4381043714536883, "grad_norm": 1.8073430216814252, "learning_rate": 4.017036323387956e-06, "loss": 0.5081362128257751, "step": 8998 }, { "epoch": 1.4382642052265644, "grad_norm": 1.5250217294094328, "learning_rate": 4.014919036262264e-06, "loss": 0.5021368265151978, "step": 8999 }, { "epoch": 1.4384240389994405, "grad_norm": 1.6674124715332557, "learning_rate": 4.0128021671133735e-06, "loss": 0.6567145586013794, "step": 9000 }, { "epoch": 1.4385838727723168, "grad_norm": 1.5828720789684814, "learning_rate": 4.0106857160891196e-06, "loss": 0.504544198513031, "step": 9001 }, { "epoch": 1.4387437065451931, "grad_norm": 1.5114394195361234, "learning_rate": 4.00856968333731e-06, "loss": 0.5591524839401245, "step": 9002 }, { "epoch": 1.4389035403180692, "grad_norm": 1.8628754449450813, "learning_rate": 4.006454069005716e-06, "loss": 0.6654448509216309, "step": 9003 }, { "epoch": 1.4390633740909453, "grad_norm": 1.4571730392899682, "learning_rate": 4.00433887324209e-06, "loss": 0.5328543186187744, "step": 9004 }, { "epoch": 1.4392232078638216, "grad_norm": 1.5055297194813257, "learning_rate": 4.002224096194151e-06, "loss": 0.5327373147010803, "step": 9005 }, { "epoch": 1.439383041636698, "grad_norm": 1.5918401560983062, "learning_rate": 4.000109738009582e-06, "loss": 0.5431762337684631, "step": 9006 }, { "epoch": 1.439542875409574, "grad_norm": 1.7194453629216309, "learning_rate": 3.997995798836046e-06, "loss": 0.6488674879074097, "step": 9007 }, { "epoch": 1.4397027091824501, "grad_norm": 1.612382969155144, "learning_rate": 3.995882278821167e-06, "loss": 0.5772650241851807, "step": 9008 }, { "epoch": 1.4398625429553265, "grad_norm": 1.683582066419245, "learning_rate": 3.993769178112555e-06, "loss": 0.5435291528701782, "step": 9009 }, { "epoch": 1.4400223767282028, "grad_norm": 1.4461559901736214, "learning_rate": 3.9916564968577776e-06, "loss": 0.39088886976242065, "step": 9010 }, { "epoch": 1.4401822105010789, "grad_norm": 1.577043420333433, "learning_rate": 3.989544235204371e-06, "loss": 0.7239865064620972, "step": 9011 }, { "epoch": 1.440342044273955, "grad_norm": 1.5320020429431733, "learning_rate": 3.987432393299858e-06, "loss": 0.6261614561080933, "step": 9012 }, { "epoch": 1.4405018780468313, "grad_norm": 1.7386260668102096, "learning_rate": 3.985320971291714e-06, "loss": 0.6486514806747437, "step": 9013 }, { "epoch": 1.4406617118197076, "grad_norm": 1.4922212054108288, "learning_rate": 3.9832099693274e-06, "loss": 0.552239716053009, "step": 9014 }, { "epoch": 1.4408215455925837, "grad_norm": 2.3884001830109347, "learning_rate": 3.981099387554337e-06, "loss": 0.432481050491333, "step": 9015 }, { "epoch": 1.44098137936546, "grad_norm": 1.3849613606729563, "learning_rate": 3.978989226119921e-06, "loss": 0.5097609758377075, "step": 9016 }, { "epoch": 1.441141213138336, "grad_norm": 1.5701341713938615, "learning_rate": 3.97687948517152e-06, "loss": 0.5338222980499268, "step": 9017 }, { "epoch": 1.4413010469112124, "grad_norm": 1.4850013655865044, "learning_rate": 3.974770164856464e-06, "loss": 0.5582101345062256, "step": 9018 }, { "epoch": 1.4414608806840885, "grad_norm": 1.7933846150928607, "learning_rate": 3.9726612653220695e-06, "loss": 0.696650505065918, "step": 9019 }, { "epoch": 1.4416207144569648, "grad_norm": 1.4938644706013886, "learning_rate": 3.970552786715611e-06, "loss": 0.5272747874259949, "step": 9020 }, { "epoch": 1.441780548229841, "grad_norm": 1.402286475775448, "learning_rate": 3.968444729184336e-06, "loss": 0.5316349267959595, "step": 9021 }, { "epoch": 1.4419403820027172, "grad_norm": 1.8015614224701475, "learning_rate": 3.966337092875465e-06, "loss": 0.5612523555755615, "step": 9022 }, { "epoch": 1.4421002157755933, "grad_norm": 1.5444689797999598, "learning_rate": 3.964229877936184e-06, "loss": 0.5348038673400879, "step": 9023 }, { "epoch": 1.4422600495484696, "grad_norm": 1.846543076641445, "learning_rate": 3.96212308451366e-06, "loss": 0.5434828996658325, "step": 9024 }, { "epoch": 1.4424198833213457, "grad_norm": 1.670792816204242, "learning_rate": 3.960016712755021e-06, "loss": 0.4753909707069397, "step": 9025 }, { "epoch": 1.442579717094222, "grad_norm": 1.6498618816141217, "learning_rate": 3.957910762807368e-06, "loss": 0.5926543474197388, "step": 9026 }, { "epoch": 1.4427395508670982, "grad_norm": 1.522410778806012, "learning_rate": 3.95580523481777e-06, "loss": 0.5108441710472107, "step": 9027 }, { "epoch": 1.4428993846399745, "grad_norm": 1.5006249245053027, "learning_rate": 3.9537001289332766e-06, "loss": 0.5828627347946167, "step": 9028 }, { "epoch": 1.4430592184128506, "grad_norm": 1.6147324722003795, "learning_rate": 3.9515954453008965e-06, "loss": 0.5708590745925903, "step": 9029 }, { "epoch": 1.4432190521857269, "grad_norm": 1.4947771072076497, "learning_rate": 3.949491184067614e-06, "loss": 0.5383603572845459, "step": 9030 }, { "epoch": 1.443378885958603, "grad_norm": 1.5826595557531042, "learning_rate": 3.947387345380381e-06, "loss": 0.5610009431838989, "step": 9031 }, { "epoch": 1.4435387197314793, "grad_norm": 1.7021850473840796, "learning_rate": 3.945283929386128e-06, "loss": 0.5788978338241577, "step": 9032 }, { "epoch": 1.4436985535043556, "grad_norm": 1.7224679165478443, "learning_rate": 3.9431809362317415e-06, "loss": 0.5757737159729004, "step": 9033 }, { "epoch": 1.4438583872772317, "grad_norm": 1.5626196451456538, "learning_rate": 3.941078366064096e-06, "loss": 0.5624681711196899, "step": 9034 }, { "epoch": 1.4440182210501078, "grad_norm": 1.7275696476316904, "learning_rate": 3.938976219030025e-06, "loss": 0.6166102290153503, "step": 9035 }, { "epoch": 1.4441780548229841, "grad_norm": 1.586130751989342, "learning_rate": 3.9368744952763325e-06, "loss": 0.6168357133865356, "step": 9036 }, { "epoch": 1.4443378885958604, "grad_norm": 1.5433070427663862, "learning_rate": 3.934773194949797e-06, "loss": 0.6842340230941772, "step": 9037 }, { "epoch": 1.4444977223687365, "grad_norm": 1.4705976755165386, "learning_rate": 3.9326723181971615e-06, "loss": 0.514285683631897, "step": 9038 }, { "epoch": 1.4446575561416126, "grad_norm": 1.769388724891951, "learning_rate": 3.930571865165151e-06, "loss": 0.5704057216644287, "step": 9039 }, { "epoch": 1.444817389914489, "grad_norm": 1.398378854265851, "learning_rate": 3.92847183600045e-06, "loss": 0.5054221153259277, "step": 9040 }, { "epoch": 1.4449772236873653, "grad_norm": 1.5283904656021217, "learning_rate": 3.9263722308497185e-06, "loss": 0.5967762470245361, "step": 9041 }, { "epoch": 1.4451370574602413, "grad_norm": 1.61453980987736, "learning_rate": 3.9242730498595816e-06, "loss": 0.5818119049072266, "step": 9042 }, { "epoch": 1.4452968912331174, "grad_norm": 1.465834531654996, "learning_rate": 3.922174293176644e-06, "loss": 0.5218936204910278, "step": 9043 }, { "epoch": 1.4454567250059938, "grad_norm": 1.6418081324279137, "learning_rate": 3.920075960947472e-06, "loss": 0.5697156190872192, "step": 9044 }, { "epoch": 1.44561655877887, "grad_norm": 1.360455409352527, "learning_rate": 3.9179780533186075e-06, "loss": 0.44449251890182495, "step": 9045 }, { "epoch": 1.4457763925517462, "grad_norm": 1.5474855776990024, "learning_rate": 3.91588057043656e-06, "loss": 0.44827258586883545, "step": 9046 }, { "epoch": 1.4459362263246223, "grad_norm": 1.6815198287096276, "learning_rate": 3.913783512447806e-06, "loss": 0.6502708196640015, "step": 9047 }, { "epoch": 1.4460960600974986, "grad_norm": 1.5284772158877735, "learning_rate": 3.911686879498805e-06, "loss": 0.4148547649383545, "step": 9048 }, { "epoch": 1.446255893870375, "grad_norm": 1.392277620196111, "learning_rate": 3.909590671735975e-06, "loss": 0.4626953601837158, "step": 9049 }, { "epoch": 1.446415727643251, "grad_norm": 1.5690826992994271, "learning_rate": 3.907494889305705e-06, "loss": 0.6044490933418274, "step": 9050 }, { "epoch": 1.4465755614161273, "grad_norm": 1.482231029857069, "learning_rate": 3.90539953235436e-06, "loss": 0.49808287620544434, "step": 9051 }, { "epoch": 1.4467353951890034, "grad_norm": 1.5948263717755555, "learning_rate": 3.9033046010282685e-06, "loss": 0.6735255122184753, "step": 9052 }, { "epoch": 1.4468952289618797, "grad_norm": 1.4848904753463819, "learning_rate": 3.901210095473734e-06, "loss": 0.44794952869415283, "step": 9053 }, { "epoch": 1.4470550627347558, "grad_norm": 1.691921484510593, "learning_rate": 3.899116015837035e-06, "loss": 0.5310022830963135, "step": 9054 }, { "epoch": 1.4472148965076321, "grad_norm": 1.502669234346762, "learning_rate": 3.897022362264412e-06, "loss": 0.5983728170394897, "step": 9055 }, { "epoch": 1.4473747302805082, "grad_norm": 1.3685849214675918, "learning_rate": 3.8949291349020755e-06, "loss": 0.5938260555267334, "step": 9056 }, { "epoch": 1.4475345640533845, "grad_norm": 1.4653903143346525, "learning_rate": 3.892836333896208e-06, "loss": 0.4713546335697174, "step": 9057 }, { "epoch": 1.4476943978262606, "grad_norm": 1.695705699996515, "learning_rate": 3.89074395939297e-06, "loss": 0.6216237545013428, "step": 9058 }, { "epoch": 1.447854231599137, "grad_norm": 1.9395542770427872, "learning_rate": 3.888652011538479e-06, "loss": 0.5608693361282349, "step": 9059 }, { "epoch": 1.448014065372013, "grad_norm": 1.4957474739087628, "learning_rate": 3.886560490478834e-06, "loss": 0.48800769448280334, "step": 9060 }, { "epoch": 1.4481738991448894, "grad_norm": 1.325637261482766, "learning_rate": 3.884469396360097e-06, "loss": 0.5567079186439514, "step": 9061 }, { "epoch": 1.4483337329177655, "grad_norm": 1.76746745940352, "learning_rate": 3.882378729328298e-06, "loss": 0.44374674558639526, "step": 9062 }, { "epoch": 1.4484935666906418, "grad_norm": 1.3873775717395471, "learning_rate": 3.880288489529449e-06, "loss": 0.46449339389801025, "step": 9063 }, { "epoch": 1.4486534004635179, "grad_norm": 1.779312264758904, "learning_rate": 3.878198677109524e-06, "loss": 0.6106983423233032, "step": 9064 }, { "epoch": 1.4488132342363942, "grad_norm": 1.8506744314275947, "learning_rate": 3.876109292214465e-06, "loss": 0.6998655796051025, "step": 9065 }, { "epoch": 1.4489730680092703, "grad_norm": 1.8287723627183505, "learning_rate": 3.874020334990189e-06, "loss": 0.4722272455692291, "step": 9066 }, { "epoch": 1.4491329017821466, "grad_norm": 1.5514371658776254, "learning_rate": 3.8719318055825785e-06, "loss": 0.5095228552818298, "step": 9067 }, { "epoch": 1.4492927355550227, "grad_norm": 1.2898291114071199, "learning_rate": 3.869843704137494e-06, "loss": 0.47065478563308716, "step": 9068 }, { "epoch": 1.449452569327899, "grad_norm": 1.541700487321337, "learning_rate": 3.867756030800759e-06, "loss": 0.5204727053642273, "step": 9069 }, { "epoch": 1.449612403100775, "grad_norm": 1.4268353530959321, "learning_rate": 3.865668785718168e-06, "loss": 0.47645869851112366, "step": 9070 }, { "epoch": 1.4497722368736514, "grad_norm": 1.593710401508319, "learning_rate": 3.863581969035488e-06, "loss": 0.5814148187637329, "step": 9071 }, { "epoch": 1.4499320706465277, "grad_norm": 2.000812522955409, "learning_rate": 3.86149558089845e-06, "loss": 0.6431397199630737, "step": 9072 }, { "epoch": 1.4500919044194038, "grad_norm": 1.8034333617771339, "learning_rate": 3.859409621452765e-06, "loss": 0.6229636073112488, "step": 9073 }, { "epoch": 1.45025173819228, "grad_norm": 1.3896590795570942, "learning_rate": 3.857324090844113e-06, "loss": 0.5642144083976746, "step": 9074 }, { "epoch": 1.4504115719651562, "grad_norm": 1.983650954311753, "learning_rate": 3.855238989218133e-06, "loss": 0.6998302340507507, "step": 9075 }, { "epoch": 1.4505714057380326, "grad_norm": 1.446436760694804, "learning_rate": 3.853154316720444e-06, "loss": 0.4800335168838501, "step": 9076 }, { "epoch": 1.4507312395109087, "grad_norm": 1.514900967733505, "learning_rate": 3.851070073496629e-06, "loss": 0.6351631879806519, "step": 9077 }, { "epoch": 1.4508910732837847, "grad_norm": 1.6988031449800574, "learning_rate": 3.84898625969225e-06, "loss": 0.4783133268356323, "step": 9078 }, { "epoch": 1.451050907056661, "grad_norm": 1.4752004056936163, "learning_rate": 3.84690287545283e-06, "loss": 0.5026137232780457, "step": 9079 }, { "epoch": 1.4512107408295374, "grad_norm": 1.7310844660160847, "learning_rate": 3.844819920923863e-06, "loss": 0.7401034832000732, "step": 9080 }, { "epoch": 1.4513705746024135, "grad_norm": 1.7249272288899111, "learning_rate": 3.842737396250819e-06, "loss": 0.528956949710846, "step": 9081 }, { "epoch": 1.4515304083752896, "grad_norm": 1.4478864669168716, "learning_rate": 3.840655301579129e-06, "loss": 0.5411261320114136, "step": 9082 }, { "epoch": 1.4516902421481659, "grad_norm": 1.4449606018395884, "learning_rate": 3.838573637054206e-06, "loss": 0.42290788888931274, "step": 9083 }, { "epoch": 1.4518500759210422, "grad_norm": 1.632421812944409, "learning_rate": 3.836492402821421e-06, "loss": 0.524179995059967, "step": 9084 }, { "epoch": 1.4520099096939183, "grad_norm": 1.7719291862745086, "learning_rate": 3.834411599026122e-06, "loss": 0.5792071223258972, "step": 9085 }, { "epoch": 1.4521697434667946, "grad_norm": 1.5736932911415116, "learning_rate": 3.832331225813626e-06, "loss": 0.5088571310043335, "step": 9086 }, { "epoch": 1.4523295772396707, "grad_norm": 1.4465006210469202, "learning_rate": 3.830251283329211e-06, "loss": 0.5760445594787598, "step": 9087 }, { "epoch": 1.452489411012547, "grad_norm": 1.5210978015585572, "learning_rate": 3.828171771718145e-06, "loss": 0.6111711859703064, "step": 9088 }, { "epoch": 1.4526492447854231, "grad_norm": 1.5360601983157511, "learning_rate": 3.826092691125647e-06, "loss": 0.6337592005729675, "step": 9089 }, { "epoch": 1.4528090785582994, "grad_norm": 1.3538480699988553, "learning_rate": 3.824014041696913e-06, "loss": 0.40705761313438416, "step": 9090 }, { "epoch": 1.4529689123311755, "grad_norm": 1.6220890416790656, "learning_rate": 3.821935823577106e-06, "loss": 0.5417648553848267, "step": 9091 }, { "epoch": 1.4531287461040518, "grad_norm": 1.6859774217290597, "learning_rate": 3.819858036911364e-06, "loss": 0.5166669487953186, "step": 9092 }, { "epoch": 1.453288579876928, "grad_norm": 1.5902232778059806, "learning_rate": 3.817780681844796e-06, "loss": 0.4819057583808899, "step": 9093 }, { "epoch": 1.4534484136498043, "grad_norm": 1.533137354237907, "learning_rate": 3.815703758522473e-06, "loss": 0.5180306434631348, "step": 9094 }, { "epoch": 1.4536082474226804, "grad_norm": 1.7081614154839344, "learning_rate": 3.8136272670894413e-06, "loss": 0.5694319009780884, "step": 9095 }, { "epoch": 1.4537680811955567, "grad_norm": 1.5450957864275607, "learning_rate": 3.8115512076907158e-06, "loss": 0.5084225535392761, "step": 9096 }, { "epoch": 1.4539279149684328, "grad_norm": 1.6828294466389013, "learning_rate": 3.8094755804712756e-06, "loss": 0.6052921414375305, "step": 9097 }, { "epoch": 1.454087748741309, "grad_norm": 1.3570275896163273, "learning_rate": 3.8074003855760834e-06, "loss": 0.4296293258666992, "step": 9098 }, { "epoch": 1.4542475825141852, "grad_norm": 1.5421962047647781, "learning_rate": 3.80532562315006e-06, "loss": 0.5135965943336487, "step": 9099 }, { "epoch": 1.4544074162870615, "grad_norm": 1.5947932819860697, "learning_rate": 3.8032512933380994e-06, "loss": 0.6305787563323975, "step": 9100 }, { "epoch": 1.4545672500599376, "grad_norm": 1.6321716580102252, "learning_rate": 3.801177396285065e-06, "loss": 0.5495818853378296, "step": 9101 }, { "epoch": 1.454727083832814, "grad_norm": 1.582224111895044, "learning_rate": 3.799103932135787e-06, "loss": 0.4514853358268738, "step": 9102 }, { "epoch": 1.45488691760569, "grad_norm": 1.4777829539535927, "learning_rate": 3.797030901035077e-06, "loss": 0.5138155221939087, "step": 9103 }, { "epoch": 1.4550467513785663, "grad_norm": 1.469221521733398, "learning_rate": 3.7949583031277026e-06, "loss": 0.552308976650238, "step": 9104 }, { "epoch": 1.4552065851514424, "grad_norm": 1.551079328356842, "learning_rate": 3.792886138558408e-06, "loss": 0.5385980606079102, "step": 9105 }, { "epoch": 1.4553664189243187, "grad_norm": 1.4918684073245887, "learning_rate": 3.790814407471902e-06, "loss": 0.5476539134979248, "step": 9106 }, { "epoch": 1.455526252697195, "grad_norm": 1.463340956833895, "learning_rate": 3.7887431100128746e-06, "loss": 0.6686993837356567, "step": 9107 }, { "epoch": 1.4556860864700711, "grad_norm": 1.4944489725191386, "learning_rate": 3.786672246325973e-06, "loss": 0.5811367034912109, "step": 9108 }, { "epoch": 1.4558459202429472, "grad_norm": 1.372500355732217, "learning_rate": 3.7846018165558196e-06, "loss": 0.46691811084747314, "step": 9109 }, { "epoch": 1.4560057540158236, "grad_norm": 1.4811674389265852, "learning_rate": 3.7825318208470063e-06, "loss": 0.5074244737625122, "step": 9110 }, { "epoch": 1.4561655877886999, "grad_norm": 1.6822380765346547, "learning_rate": 3.780462259344091e-06, "loss": 0.6335894465446472, "step": 9111 }, { "epoch": 1.456325421561576, "grad_norm": 1.2691242421680844, "learning_rate": 3.7783931321916067e-06, "loss": 0.46761012077331543, "step": 9112 }, { "epoch": 1.456485255334452, "grad_norm": 1.8457524385052169, "learning_rate": 3.776324439534058e-06, "loss": 0.5787124633789062, "step": 9113 }, { "epoch": 1.4566450891073284, "grad_norm": 1.6851254000212774, "learning_rate": 3.774256181515912e-06, "loss": 0.4015949070453644, "step": 9114 }, { "epoch": 1.4568049228802047, "grad_norm": 1.547394007832002, "learning_rate": 3.7721883582816075e-06, "loss": 0.5301440954208374, "step": 9115 }, { "epoch": 1.4569647566530808, "grad_norm": 1.6467113749111486, "learning_rate": 3.770120969975556e-06, "loss": 0.5057294368743896, "step": 9116 }, { "epoch": 1.4571245904259569, "grad_norm": 1.5290362780985116, "learning_rate": 3.7680540167421296e-06, "loss": 0.4635998606681824, "step": 9117 }, { "epoch": 1.4572844241988332, "grad_norm": 1.5963443366832386, "learning_rate": 3.7659874987256885e-06, "loss": 0.506074070930481, "step": 9118 }, { "epoch": 1.4574442579717095, "grad_norm": 1.4456033915207633, "learning_rate": 3.763921416070543e-06, "loss": 0.5588400363922119, "step": 9119 }, { "epoch": 1.4576040917445856, "grad_norm": 1.4799252254387711, "learning_rate": 3.7618557689209836e-06, "loss": 0.612355649471283, "step": 9120 }, { "epoch": 1.457763925517462, "grad_norm": 1.5471960866686287, "learning_rate": 3.7597905574212646e-06, "loss": 0.6308202743530273, "step": 9121 }, { "epoch": 1.457923759290338, "grad_norm": 1.3653497389629157, "learning_rate": 3.7577257817156178e-06, "loss": 0.4432867467403412, "step": 9122 }, { "epoch": 1.4580835930632143, "grad_norm": 1.5805318209580752, "learning_rate": 3.7556614419482387e-06, "loss": 0.4174045920372009, "step": 9123 }, { "epoch": 1.4582434268360904, "grad_norm": 1.8106242625772524, "learning_rate": 3.753597538263292e-06, "loss": 0.5290886163711548, "step": 9124 }, { "epoch": 1.4584032606089667, "grad_norm": 1.5036731129358614, "learning_rate": 3.751534070804913e-06, "loss": 0.5711835622787476, "step": 9125 }, { "epoch": 1.4585630943818428, "grad_norm": 1.5546801413215954, "learning_rate": 3.749471039717204e-06, "loss": 0.5453414916992188, "step": 9126 }, { "epoch": 1.4587229281547192, "grad_norm": 1.559752644975213, "learning_rate": 3.7474084451442484e-06, "loss": 0.5868632793426514, "step": 9127 }, { "epoch": 1.4588827619275953, "grad_norm": 1.5416893301568404, "learning_rate": 3.745346287230085e-06, "loss": 0.5062645673751831, "step": 9128 }, { "epoch": 1.4590425957004716, "grad_norm": 1.5964947172721418, "learning_rate": 3.7432845661187277e-06, "loss": 0.5789105296134949, "step": 9129 }, { "epoch": 1.4592024294733477, "grad_norm": 1.5251340692429476, "learning_rate": 3.7412232819541605e-06, "loss": 0.46243467926979065, "step": 9130 }, { "epoch": 1.459362263246224, "grad_norm": 1.5851686956856927, "learning_rate": 3.7391624348803324e-06, "loss": 0.48492908477783203, "step": 9131 }, { "epoch": 1.4595220970191, "grad_norm": 1.6410011630996053, "learning_rate": 3.7371020250411694e-06, "loss": 0.830456018447876, "step": 9132 }, { "epoch": 1.4596819307919764, "grad_norm": 1.6738445066006087, "learning_rate": 3.7350420525805664e-06, "loss": 0.5807607769966125, "step": 9133 }, { "epoch": 1.4598417645648525, "grad_norm": 1.68364547526098, "learning_rate": 3.7329825176423806e-06, "loss": 0.532829761505127, "step": 9134 }, { "epoch": 1.4600015983377288, "grad_norm": 1.4810450879148391, "learning_rate": 3.7309234203704426e-06, "loss": 0.5632206797599792, "step": 9135 }, { "epoch": 1.460161432110605, "grad_norm": 1.7973325691000774, "learning_rate": 3.7288647609085505e-06, "loss": 0.5111981630325317, "step": 9136 }, { "epoch": 1.4603212658834812, "grad_norm": 1.5866774029705635, "learning_rate": 3.7268065394004795e-06, "loss": 0.5011125802993774, "step": 9137 }, { "epoch": 1.4604810996563573, "grad_norm": 1.6152149087279017, "learning_rate": 3.724748755989965e-06, "loss": 0.52337646484375, "step": 9138 }, { "epoch": 1.4606409334292336, "grad_norm": 1.5345165112813646, "learning_rate": 3.7226914108207156e-06, "loss": 0.4705204367637634, "step": 9139 }, { "epoch": 1.4608007672021097, "grad_norm": 1.8685649390638785, "learning_rate": 3.720634504036409e-06, "loss": 0.5969603061676025, "step": 9140 }, { "epoch": 1.460960600974986, "grad_norm": 1.519632086542557, "learning_rate": 3.718578035780689e-06, "loss": 0.5420143008232117, "step": 9141 }, { "epoch": 1.4611204347478624, "grad_norm": 1.5720459804634808, "learning_rate": 3.7165220061971773e-06, "loss": 0.7077935934066772, "step": 9142 }, { "epoch": 1.4612802685207384, "grad_norm": 1.3220131706988956, "learning_rate": 3.7144664154294584e-06, "loss": 0.3667290210723877, "step": 9143 }, { "epoch": 1.4614401022936145, "grad_norm": 1.2836269697078833, "learning_rate": 3.7124112636210874e-06, "loss": 0.5050696730613708, "step": 9144 }, { "epoch": 1.4615999360664909, "grad_norm": 1.499081527437951, "learning_rate": 3.710356550915587e-06, "loss": 0.4729034900665283, "step": 9145 }, { "epoch": 1.4617597698393672, "grad_norm": 1.4947477016243857, "learning_rate": 3.7083022774564494e-06, "loss": 0.636919379234314, "step": 9146 }, { "epoch": 1.4619196036122433, "grad_norm": 1.642218432665353, "learning_rate": 3.7062484433871426e-06, "loss": 0.5152787566184998, "step": 9147 }, { "epoch": 1.4620794373851194, "grad_norm": 1.3018481754729174, "learning_rate": 3.704195048851098e-06, "loss": 0.49003326892852783, "step": 9148 }, { "epoch": 1.4622392711579957, "grad_norm": 1.5910261550107951, "learning_rate": 3.702142093991716e-06, "loss": 0.6104025840759277, "step": 9149 }, { "epoch": 1.462399104930872, "grad_norm": 1.5538295288300183, "learning_rate": 3.7000895789523683e-06, "loss": 0.5725445747375488, "step": 9150 }, { "epoch": 1.462558938703748, "grad_norm": 1.3688602065853577, "learning_rate": 3.6980375038763905e-06, "loss": 0.4622572660446167, "step": 9151 }, { "epoch": 1.4627187724766242, "grad_norm": 1.5629736571861026, "learning_rate": 3.695985868907098e-06, "loss": 0.46126675605773926, "step": 9152 }, { "epoch": 1.4628786062495005, "grad_norm": 1.900141894831713, "learning_rate": 3.6939346741877712e-06, "loss": 0.5226876139640808, "step": 9153 }, { "epoch": 1.4630384400223768, "grad_norm": 1.6051702887385815, "learning_rate": 3.6918839198616552e-06, "loss": 0.6197510957717896, "step": 9154 }, { "epoch": 1.463198273795253, "grad_norm": 1.1887894572433677, "learning_rate": 3.6898336060719677e-06, "loss": 0.4164469540119171, "step": 9155 }, { "epoch": 1.463358107568129, "grad_norm": 1.9177807933146942, "learning_rate": 3.6877837329618927e-06, "loss": 0.824222207069397, "step": 9156 }, { "epoch": 1.4635179413410053, "grad_norm": 1.3902978024031785, "learning_rate": 3.6857343006745915e-06, "loss": 0.4311221241950989, "step": 9157 }, { "epoch": 1.4636777751138816, "grad_norm": 1.768111682642874, "learning_rate": 3.6836853093531856e-06, "loss": 0.6180658340454102, "step": 9158 }, { "epoch": 1.4638376088867577, "grad_norm": 1.7450700956281215, "learning_rate": 3.681636759140771e-06, "loss": 0.693423867225647, "step": 9159 }, { "epoch": 1.463997442659634, "grad_norm": 1.8163459769590606, "learning_rate": 3.67958865018041e-06, "loss": 0.549777626991272, "step": 9160 }, { "epoch": 1.4641572764325101, "grad_norm": 1.4655317607627538, "learning_rate": 3.6775409826151307e-06, "loss": 0.613083004951477, "step": 9161 }, { "epoch": 1.4643171102053865, "grad_norm": 1.6645843610519255, "learning_rate": 3.6754937565879446e-06, "loss": 0.5827823281288147, "step": 9162 }, { "epoch": 1.4644769439782626, "grad_norm": 1.7164232312001186, "learning_rate": 3.6734469722418177e-06, "loss": 0.610771894454956, "step": 9163 }, { "epoch": 1.4646367777511389, "grad_norm": 1.422078528588381, "learning_rate": 3.6714006297196893e-06, "loss": 0.5314786434173584, "step": 9164 }, { "epoch": 1.464796611524015, "grad_norm": 1.8543475056793521, "learning_rate": 3.6693547291644705e-06, "loss": 0.55128014087677, "step": 9165 }, { "epoch": 1.4649564452968913, "grad_norm": 1.9089399498962558, "learning_rate": 3.667309270719035e-06, "loss": 0.5839862823486328, "step": 9166 }, { "epoch": 1.4651162790697674, "grad_norm": 1.4985459384111313, "learning_rate": 3.6652642545262374e-06, "loss": 0.5097885131835938, "step": 9167 }, { "epoch": 1.4652761128426437, "grad_norm": 1.5385174015354675, "learning_rate": 3.663219680728891e-06, "loss": 0.4994193911552429, "step": 9168 }, { "epoch": 1.4654359466155198, "grad_norm": 1.537739340902411, "learning_rate": 3.661175549469782e-06, "loss": 0.5490689873695374, "step": 9169 }, { "epoch": 1.465595780388396, "grad_norm": 1.6158126125790058, "learning_rate": 3.6591318608916625e-06, "loss": 0.5886790156364441, "step": 9170 }, { "epoch": 1.4657556141612722, "grad_norm": 1.7964775757026465, "learning_rate": 3.657088615137261e-06, "loss": 0.5072516798973083, "step": 9171 }, { "epoch": 1.4659154479341485, "grad_norm": 2.897250653184548, "learning_rate": 3.655045812349266e-06, "loss": 0.6683365106582642, "step": 9172 }, { "epoch": 1.4660752817070246, "grad_norm": 1.4159472039186367, "learning_rate": 3.653003452670345e-06, "loss": 0.44491708278656006, "step": 9173 }, { "epoch": 1.466235115479901, "grad_norm": 1.5671488004696676, "learning_rate": 3.650961536243126e-06, "loss": 0.5460540056228638, "step": 9174 }, { "epoch": 1.466394949252777, "grad_norm": 1.5466087785948293, "learning_rate": 3.6489200632102094e-06, "loss": 0.6044284701347351, "step": 9175 }, { "epoch": 1.4665547830256533, "grad_norm": 1.4084827680169247, "learning_rate": 3.6468790337141613e-06, "loss": 0.4644070267677307, "step": 9176 }, { "epoch": 1.4667146167985297, "grad_norm": 1.7006753878595147, "learning_rate": 3.6448384478975263e-06, "loss": 0.5289619565010071, "step": 9177 }, { "epoch": 1.4668744505714058, "grad_norm": 1.6020411609527887, "learning_rate": 3.6427983059028094e-06, "loss": 0.5358582735061646, "step": 9178 }, { "epoch": 1.4670342843442818, "grad_norm": 1.3916412452395404, "learning_rate": 3.6407586078724844e-06, "loss": 0.47431838512420654, "step": 9179 }, { "epoch": 1.4671941181171582, "grad_norm": 1.742959247261711, "learning_rate": 3.6387193539489994e-06, "loss": 0.5953951478004456, "step": 9180 }, { "epoch": 1.4673539518900345, "grad_norm": 1.857010446006159, "learning_rate": 3.636680544274763e-06, "loss": 0.6040143966674805, "step": 9181 }, { "epoch": 1.4675137856629106, "grad_norm": 1.5428764253282166, "learning_rate": 3.634642178992167e-06, "loss": 0.5716331005096436, "step": 9182 }, { "epoch": 1.4676736194357867, "grad_norm": 1.524182536832579, "learning_rate": 3.632604258243558e-06, "loss": 0.5654137134552002, "step": 9183 }, { "epoch": 1.467833453208663, "grad_norm": 1.8668008108181033, "learning_rate": 3.63056678217126e-06, "loss": 0.506776750087738, "step": 9184 }, { "epoch": 1.4679932869815393, "grad_norm": 2.389317062429162, "learning_rate": 3.6285297509175575e-06, "loss": 0.48911425471305847, "step": 9185 }, { "epoch": 1.4681531207544154, "grad_norm": 1.5997021210912798, "learning_rate": 3.626493164624717e-06, "loss": 0.6274285316467285, "step": 9186 }, { "epoch": 1.4683129545272915, "grad_norm": 1.401227997843009, "learning_rate": 3.624457023434964e-06, "loss": 0.4321872591972351, "step": 9187 }, { "epoch": 1.4684727883001678, "grad_norm": 1.4065704378758903, "learning_rate": 3.622421327490494e-06, "loss": 0.6623330116271973, "step": 9188 }, { "epoch": 1.4686326220730441, "grad_norm": 1.6809519810892892, "learning_rate": 3.620386076933473e-06, "loss": 0.6481292247772217, "step": 9189 }, { "epoch": 1.4687924558459202, "grad_norm": 1.5484153567313692, "learning_rate": 3.618351271906032e-06, "loss": 0.521474301815033, "step": 9190 }, { "epoch": 1.4689522896187963, "grad_norm": 1.5555416082291091, "learning_rate": 3.616316912550283e-06, "loss": 0.49608364701271057, "step": 9191 }, { "epoch": 1.4691121233916726, "grad_norm": 1.5968895411486015, "learning_rate": 3.6142829990082896e-06, "loss": 0.6171221733093262, "step": 9192 }, { "epoch": 1.469271957164549, "grad_norm": 1.396112840402736, "learning_rate": 3.6122495314221008e-06, "loss": 0.44193437695503235, "step": 9193 }, { "epoch": 1.469431790937425, "grad_norm": 1.6318231146259092, "learning_rate": 3.610216509933724e-06, "loss": 0.529923677444458, "step": 9194 }, { "epoch": 1.4695916247103014, "grad_norm": 1.6826077125290424, "learning_rate": 3.608183934685138e-06, "loss": 0.7282793521881104, "step": 9195 }, { "epoch": 1.4697514584831775, "grad_norm": 1.4471123522239229, "learning_rate": 3.6061518058182854e-06, "loss": 0.5174570679664612, "step": 9196 }, { "epoch": 1.4699112922560538, "grad_norm": 1.682537655395628, "learning_rate": 3.604120123475092e-06, "loss": 0.5762633085250854, "step": 9197 }, { "epoch": 1.4700711260289299, "grad_norm": 1.3949516391906376, "learning_rate": 3.602088887797439e-06, "loss": 0.6455390453338623, "step": 9198 }, { "epoch": 1.4702309598018062, "grad_norm": 1.4980101808279191, "learning_rate": 3.6000580989271804e-06, "loss": 0.5344818830490112, "step": 9199 }, { "epoch": 1.4703907935746823, "grad_norm": 1.2766910655137407, "learning_rate": 3.5980277570061362e-06, "loss": 0.4326847195625305, "step": 9200 }, { "epoch": 1.4705506273475586, "grad_norm": 1.7172888244015647, "learning_rate": 3.595997862176105e-06, "loss": 0.4835526943206787, "step": 9201 }, { "epoch": 1.4707104611204347, "grad_norm": 1.5021385418957967, "learning_rate": 3.5939684145788435e-06, "loss": 0.5582339763641357, "step": 9202 }, { "epoch": 1.470870294893311, "grad_norm": 1.5866315835252052, "learning_rate": 3.5919394143560815e-06, "loss": 0.5307984352111816, "step": 9203 }, { "epoch": 1.471030128666187, "grad_norm": 1.4988480097821337, "learning_rate": 3.589910861649518e-06, "loss": 0.486097514629364, "step": 9204 }, { "epoch": 1.4711899624390634, "grad_norm": 1.4607546774571283, "learning_rate": 3.587882756600816e-06, "loss": 0.5408907532691956, "step": 9205 }, { "epoch": 1.4713497962119395, "grad_norm": 1.5024806725449606, "learning_rate": 3.585855099351617e-06, "loss": 0.5648825168609619, "step": 9206 }, { "epoch": 1.4715096299848158, "grad_norm": 1.8415573203669933, "learning_rate": 3.583827890043524e-06, "loss": 0.6198593378067017, "step": 9207 }, { "epoch": 1.471669463757692, "grad_norm": 1.6694686049432204, "learning_rate": 3.581801128818109e-06, "loss": 0.6187496185302734, "step": 9208 }, { "epoch": 1.4718292975305682, "grad_norm": 1.422659181060125, "learning_rate": 3.5797748158169123e-06, "loss": 0.5196118950843811, "step": 9209 }, { "epoch": 1.4719891313034443, "grad_norm": 1.6336986066817305, "learning_rate": 3.5777489511814432e-06, "loss": 0.5908429622650146, "step": 9210 }, { "epoch": 1.4721489650763206, "grad_norm": 1.6175463810942632, "learning_rate": 3.575723535053186e-06, "loss": 0.6341820359230042, "step": 9211 }, { "epoch": 1.472308798849197, "grad_norm": 1.649322735399763, "learning_rate": 3.5736985675735838e-06, "loss": 0.5941982269287109, "step": 9212 }, { "epoch": 1.472468632622073, "grad_norm": 1.341672563916944, "learning_rate": 3.5716740488840584e-06, "loss": 0.5062247514724731, "step": 9213 }, { "epoch": 1.4726284663949492, "grad_norm": 1.6150008791754977, "learning_rate": 3.569649979125992e-06, "loss": 0.625419557094574, "step": 9214 }, { "epoch": 1.4727883001678255, "grad_norm": 1.7666673190490614, "learning_rate": 3.567626358440738e-06, "loss": 0.4553009867668152, "step": 9215 }, { "epoch": 1.4729481339407018, "grad_norm": 1.495647911043382, "learning_rate": 3.565603186969616e-06, "loss": 0.552290141582489, "step": 9216 }, { "epoch": 1.4731079677135779, "grad_norm": 1.5017656581571255, "learning_rate": 3.563580464853924e-06, "loss": 0.5842717885971069, "step": 9217 }, { "epoch": 1.473267801486454, "grad_norm": 1.7498957945676743, "learning_rate": 3.5615581922349185e-06, "loss": 0.5657209753990173, "step": 9218 }, { "epoch": 1.4734276352593303, "grad_norm": 1.4562863823069947, "learning_rate": 3.5595363692538275e-06, "loss": 0.5227541923522949, "step": 9219 }, { "epoch": 1.4735874690322066, "grad_norm": 1.403065388423989, "learning_rate": 3.557514996051844e-06, "loss": 0.40822285413742065, "step": 9220 }, { "epoch": 1.4737473028050827, "grad_norm": 1.4604888777778635, "learning_rate": 3.5554940727701416e-06, "loss": 0.5402770638465881, "step": 9221 }, { "epoch": 1.4739071365779588, "grad_norm": 1.4767088803626491, "learning_rate": 3.5534735995498506e-06, "loss": 0.5827832818031311, "step": 9222 }, { "epoch": 1.4740669703508351, "grad_norm": 1.6741402543676367, "learning_rate": 3.551453576532072e-06, "loss": 0.6737596392631531, "step": 9223 }, { "epoch": 1.4742268041237114, "grad_norm": 1.680601060415411, "learning_rate": 3.549434003857879e-06, "loss": 0.549298882484436, "step": 9224 }, { "epoch": 1.4743866378965875, "grad_norm": 1.4931750883685266, "learning_rate": 3.5474148816683075e-06, "loss": 0.603236198425293, "step": 9225 }, { "epoch": 1.4745464716694636, "grad_norm": 2.1747588994616844, "learning_rate": 3.5453962101043727e-06, "loss": 0.4982025921344757, "step": 9226 }, { "epoch": 1.47470630544234, "grad_norm": 1.5750520851517207, "learning_rate": 3.5433779893070477e-06, "loss": 0.49451786279678345, "step": 9227 }, { "epoch": 1.4748661392152163, "grad_norm": 1.5404441416563832, "learning_rate": 3.541360219417277e-06, "loss": 0.6359374523162842, "step": 9228 }, { "epoch": 1.4750259729880923, "grad_norm": 1.7084645910572085, "learning_rate": 3.5393429005759772e-06, "loss": 0.46602460741996765, "step": 9229 }, { "epoch": 1.4751858067609687, "grad_norm": 1.5190432680799286, "learning_rate": 3.5373260329240244e-06, "loss": 0.5874679088592529, "step": 9230 }, { "epoch": 1.4753456405338448, "grad_norm": 1.185166194053754, "learning_rate": 3.535309616602278e-06, "loss": 0.4510204792022705, "step": 9231 }, { "epoch": 1.475505474306721, "grad_norm": 1.6490893619384157, "learning_rate": 3.533293651751549e-06, "loss": 0.4326029419898987, "step": 9232 }, { "epoch": 1.4756653080795972, "grad_norm": 1.5998554510361227, "learning_rate": 3.531278138512635e-06, "loss": 0.5457872152328491, "step": 9233 }, { "epoch": 1.4758251418524735, "grad_norm": 1.4131776584966125, "learning_rate": 3.5292630770262847e-06, "loss": 0.5537921190261841, "step": 9234 }, { "epoch": 1.4759849756253496, "grad_norm": 1.4389599138550593, "learning_rate": 3.527248467433223e-06, "loss": 0.4192574918270111, "step": 9235 }, { "epoch": 1.476144809398226, "grad_norm": 1.8791913922071757, "learning_rate": 3.5252343098741494e-06, "loss": 0.5489155054092407, "step": 9236 }, { "epoch": 1.476304643171102, "grad_norm": 1.6301299310889652, "learning_rate": 3.52322060448972e-06, "loss": 0.5319665670394897, "step": 9237 }, { "epoch": 1.4764644769439783, "grad_norm": 1.4738792615269174, "learning_rate": 3.5212073514205658e-06, "loss": 0.47718051075935364, "step": 9238 }, { "epoch": 1.4766243107168544, "grad_norm": 1.7276970345829206, "learning_rate": 3.519194550807287e-06, "loss": 0.6367802023887634, "step": 9239 }, { "epoch": 1.4767841444897307, "grad_norm": 1.3512707871743261, "learning_rate": 3.5171822027904435e-06, "loss": 0.4674925208091736, "step": 9240 }, { "epoch": 1.4769439782626068, "grad_norm": 1.3788292557603785, "learning_rate": 3.5151703075105815e-06, "loss": 0.6130577325820923, "step": 9241 }, { "epoch": 1.4771038120354831, "grad_norm": 1.4892214981239529, "learning_rate": 3.513158865108198e-06, "loss": 0.5470978021621704, "step": 9242 }, { "epoch": 1.4772636458083592, "grad_norm": 1.3619171106797967, "learning_rate": 3.5111478757237662e-06, "loss": 0.5329196453094482, "step": 9243 }, { "epoch": 1.4774234795812355, "grad_norm": 1.6324800593592033, "learning_rate": 3.5091373394977267e-06, "loss": 0.47739773988723755, "step": 9244 }, { "epoch": 1.4775833133541116, "grad_norm": 1.4881969736164102, "learning_rate": 3.507127256570485e-06, "loss": 0.6093394756317139, "step": 9245 }, { "epoch": 1.477743147126988, "grad_norm": 1.480837762116355, "learning_rate": 3.5051176270824237e-06, "loss": 0.558617353439331, "step": 9246 }, { "epoch": 1.4779029808998643, "grad_norm": 1.7462227397359116, "learning_rate": 3.5031084511738855e-06, "loss": 0.4670792818069458, "step": 9247 }, { "epoch": 1.4780628146727404, "grad_norm": 1.600022338741682, "learning_rate": 3.5010997289851845e-06, "loss": 0.5774232149124146, "step": 9248 }, { "epoch": 1.4782226484456165, "grad_norm": 1.6715525400324287, "learning_rate": 3.4990914606565996e-06, "loss": 0.4835874140262604, "step": 9249 }, { "epoch": 1.4783824822184928, "grad_norm": 1.5683256035068858, "learning_rate": 3.4970836463283863e-06, "loss": 0.4712406396865845, "step": 9250 }, { "epoch": 1.478542315991369, "grad_norm": 1.52102286833466, "learning_rate": 3.4950762861407616e-06, "loss": 0.6785476207733154, "step": 9251 }, { "epoch": 1.4787021497642452, "grad_norm": 1.6045055955638714, "learning_rate": 3.4930693802339077e-06, "loss": 0.5206823945045471, "step": 9252 }, { "epoch": 1.4788619835371213, "grad_norm": 1.5981860167517985, "learning_rate": 3.491062928747987e-06, "loss": 0.5013755559921265, "step": 9253 }, { "epoch": 1.4790218173099976, "grad_norm": 1.685849167741601, "learning_rate": 3.4890569318231203e-06, "loss": 0.5279414653778076, "step": 9254 }, { "epoch": 1.479181651082874, "grad_norm": 1.557813115110673, "learning_rate": 3.487051389599395e-06, "loss": 0.5315398573875427, "step": 9255 }, { "epoch": 1.47934148485575, "grad_norm": 1.4294534109195984, "learning_rate": 3.485046302216878e-06, "loss": 0.5025812983512878, "step": 9256 }, { "epoch": 1.479501318628626, "grad_norm": 1.6032775470265153, "learning_rate": 3.4830416698155943e-06, "loss": 0.389961302280426, "step": 9257 }, { "epoch": 1.4796611524015024, "grad_norm": 1.4000936852583452, "learning_rate": 3.4810374925355407e-06, "loss": 0.5656421184539795, "step": 9258 }, { "epoch": 1.4798209861743787, "grad_norm": 1.6113228726118045, "learning_rate": 3.479033770516681e-06, "loss": 0.6298533082008362, "step": 9259 }, { "epoch": 1.4799808199472548, "grad_norm": 1.7625240679151357, "learning_rate": 3.4770305038989448e-06, "loss": 0.5208033323287964, "step": 9260 }, { "epoch": 1.480140653720131, "grad_norm": 1.8514381197322003, "learning_rate": 3.4750276928222406e-06, "loss": 0.6571847796440125, "step": 9261 }, { "epoch": 1.4803004874930072, "grad_norm": 1.4374945669569748, "learning_rate": 3.473025337426433e-06, "loss": 0.4882717430591583, "step": 9262 }, { "epoch": 1.4804603212658836, "grad_norm": 1.6757565320758072, "learning_rate": 3.471023437851362e-06, "loss": 0.5381587147712708, "step": 9263 }, { "epoch": 1.4806201550387597, "grad_norm": 1.7406728794563402, "learning_rate": 3.4690219942368263e-06, "loss": 0.4242461621761322, "step": 9264 }, { "epoch": 1.480779988811636, "grad_norm": 1.3260548582953677, "learning_rate": 3.4670210067226074e-06, "loss": 0.35982564091682434, "step": 9265 }, { "epoch": 1.480939822584512, "grad_norm": 1.3109598852064186, "learning_rate": 3.465020475448445e-06, "loss": 0.5720490217208862, "step": 9266 }, { "epoch": 1.4810996563573884, "grad_norm": 1.5956617910897464, "learning_rate": 3.463020400554049e-06, "loss": 0.5829548835754395, "step": 9267 }, { "epoch": 1.4812594901302645, "grad_norm": 1.77653205050638, "learning_rate": 3.4610207821790964e-06, "loss": 0.5751408338546753, "step": 9268 }, { "epoch": 1.4814193239031408, "grad_norm": 1.5527227149068799, "learning_rate": 3.459021620463231e-06, "loss": 0.46224015951156616, "step": 9269 }, { "epoch": 1.481579157676017, "grad_norm": 1.6978348535413184, "learning_rate": 3.457022915546073e-06, "loss": 0.6569905877113342, "step": 9270 }, { "epoch": 1.4817389914488932, "grad_norm": 1.6302650730417387, "learning_rate": 3.4550246675672027e-06, "loss": 0.5100111961364746, "step": 9271 }, { "epoch": 1.4818988252217693, "grad_norm": 1.6213905448857435, "learning_rate": 3.4530268766661667e-06, "loss": 0.6135138869285583, "step": 9272 }, { "epoch": 1.4820586589946456, "grad_norm": 1.6221493369519338, "learning_rate": 3.4510295429824903e-06, "loss": 0.6106469631195068, "step": 9273 }, { "epoch": 1.4822184927675217, "grad_norm": 1.8084575503121976, "learning_rate": 3.449032666655656e-06, "loss": 0.6106611490249634, "step": 9274 }, { "epoch": 1.482378326540398, "grad_norm": 1.585359272257845, "learning_rate": 3.4470362478251173e-06, "loss": 0.38503146171569824, "step": 9275 }, { "epoch": 1.4825381603132741, "grad_norm": 1.5099317864829962, "learning_rate": 3.4450402866303012e-06, "loss": 0.45107677578926086, "step": 9276 }, { "epoch": 1.4826979940861504, "grad_norm": 1.4400855674503843, "learning_rate": 3.443044783210597e-06, "loss": 0.6446613073348999, "step": 9277 }, { "epoch": 1.4828578278590265, "grad_norm": 1.7040308379321816, "learning_rate": 3.441049737705363e-06, "loss": 0.5666744709014893, "step": 9278 }, { "epoch": 1.4830176616319029, "grad_norm": 1.3144703835621512, "learning_rate": 3.439055150253926e-06, "loss": 0.5028718709945679, "step": 9279 }, { "epoch": 1.483177495404779, "grad_norm": 1.5486289966388849, "learning_rate": 3.437061020995577e-06, "loss": 0.6365467309951782, "step": 9280 }, { "epoch": 1.4833373291776553, "grad_norm": 1.7529693396254378, "learning_rate": 3.435067350069586e-06, "loss": 0.5267403721809387, "step": 9281 }, { "epoch": 1.4834971629505316, "grad_norm": 1.5510299916857835, "learning_rate": 3.433074137615182e-06, "loss": 0.6297178268432617, "step": 9282 }, { "epoch": 1.4836569967234077, "grad_norm": 1.5633768726836075, "learning_rate": 3.4310813837715627e-06, "loss": 0.6134960651397705, "step": 9283 }, { "epoch": 1.4838168304962838, "grad_norm": 1.4800294737456412, "learning_rate": 3.4290890886778904e-06, "loss": 0.5571074485778809, "step": 9284 }, { "epoch": 1.48397666426916, "grad_norm": 1.661615958928155, "learning_rate": 3.4270972524733093e-06, "loss": 0.6915296316146851, "step": 9285 }, { "epoch": 1.4841364980420364, "grad_norm": 1.5992486387863951, "learning_rate": 3.425105875296916e-06, "loss": 0.5406966209411621, "step": 9286 }, { "epoch": 1.4842963318149125, "grad_norm": 1.4868210655566718, "learning_rate": 3.423114957287783e-06, "loss": 0.5340057611465454, "step": 9287 }, { "epoch": 1.4844561655877886, "grad_norm": 1.2942026318030224, "learning_rate": 3.4211244985849477e-06, "loss": 0.5130948424339294, "step": 9288 }, { "epoch": 1.484615999360665, "grad_norm": 1.5179732138393993, "learning_rate": 3.4191344993274154e-06, "loss": 0.4509444534778595, "step": 9289 }, { "epoch": 1.4847758331335412, "grad_norm": 1.4942040448348743, "learning_rate": 3.4171449596541652e-06, "loss": 0.664542019367218, "step": 9290 }, { "epoch": 1.4849356669064173, "grad_norm": 1.7006816049646227, "learning_rate": 3.4151558797041373e-06, "loss": 0.4923350512981415, "step": 9291 }, { "epoch": 1.4850955006792934, "grad_norm": 1.6619560894434269, "learning_rate": 3.4131672596162378e-06, "loss": 0.6366985440254211, "step": 9292 }, { "epoch": 1.4852553344521697, "grad_norm": 1.6437064191137871, "learning_rate": 3.4111790995293513e-06, "loss": 0.5699148178100586, "step": 9293 }, { "epoch": 1.485415168225046, "grad_norm": 1.6773943880856101, "learning_rate": 3.409191399582322e-06, "loss": 0.5580539703369141, "step": 9294 }, { "epoch": 1.4855750019979221, "grad_norm": 1.5113914891998703, "learning_rate": 3.4072041599139605e-06, "loss": 0.4789101779460907, "step": 9295 }, { "epoch": 1.4857348357707982, "grad_norm": 1.4893611238338254, "learning_rate": 3.4052173806630527e-06, "loss": 0.40242117643356323, "step": 9296 }, { "epoch": 1.4858946695436746, "grad_norm": 1.8025054436570365, "learning_rate": 3.4032310619683473e-06, "loss": 0.5129149556159973, "step": 9297 }, { "epoch": 1.4860545033165509, "grad_norm": 1.6401610413090433, "learning_rate": 3.4012452039685617e-06, "loss": 0.5755941867828369, "step": 9298 }, { "epoch": 1.486214337089427, "grad_norm": 1.5971896528333853, "learning_rate": 3.3992598068023764e-06, "loss": 0.5772330164909363, "step": 9299 }, { "epoch": 1.4863741708623033, "grad_norm": 1.410740903922059, "learning_rate": 3.3972748706084523e-06, "loss": 0.5438854694366455, "step": 9300 }, { "epoch": 1.4865340046351794, "grad_norm": 1.4635792220478316, "learning_rate": 3.3952903955254056e-06, "loss": 0.4618375301361084, "step": 9301 }, { "epoch": 1.4866938384080557, "grad_norm": 1.4583157655061523, "learning_rate": 3.3933063816918264e-06, "loss": 0.4702354669570923, "step": 9302 }, { "epoch": 1.4868536721809318, "grad_norm": 1.5142037630069356, "learning_rate": 3.391322829246272e-06, "loss": 0.5979188680648804, "step": 9303 }, { "epoch": 1.487013505953808, "grad_norm": 1.6124993153912375, "learning_rate": 3.3893397383272608e-06, "loss": 0.49694305658340454, "step": 9304 }, { "epoch": 1.4871733397266842, "grad_norm": 1.5695160616303336, "learning_rate": 3.3873571090732926e-06, "loss": 0.5146796107292175, "step": 9305 }, { "epoch": 1.4873331734995605, "grad_norm": 1.5626224606332966, "learning_rate": 3.3853749416228245e-06, "loss": 0.5529825687408447, "step": 9306 }, { "epoch": 1.4874930072724366, "grad_norm": 1.7786632035577996, "learning_rate": 3.383393236114283e-06, "loss": 0.5328420996665955, "step": 9307 }, { "epoch": 1.487652841045313, "grad_norm": 1.6736239741588446, "learning_rate": 3.3814119926860643e-06, "loss": 0.5581368207931519, "step": 9308 }, { "epoch": 1.487812674818189, "grad_norm": 1.75813875060899, "learning_rate": 3.379431211476528e-06, "loss": 0.6080427169799805, "step": 9309 }, { "epoch": 1.4879725085910653, "grad_norm": 1.539698685228749, "learning_rate": 3.37745089262401e-06, "loss": 0.5626752376556396, "step": 9310 }, { "epoch": 1.4881323423639414, "grad_norm": 1.5217518425991485, "learning_rate": 3.3754710362668074e-06, "loss": 0.504898190498352, "step": 9311 }, { "epoch": 1.4882921761368177, "grad_norm": 1.40305784774764, "learning_rate": 3.3734916425431865e-06, "loss": 0.42838114500045776, "step": 9312 }, { "epoch": 1.4884520099096938, "grad_norm": 1.3931129178858188, "learning_rate": 3.371512711591376e-06, "loss": 0.651026725769043, "step": 9313 }, { "epoch": 1.4886118436825702, "grad_norm": 1.72313908739234, "learning_rate": 3.3695342435495816e-06, "loss": 0.5419785976409912, "step": 9314 }, { "epoch": 1.4887716774554463, "grad_norm": 1.6624418014155715, "learning_rate": 3.3675562385559747e-06, "loss": 0.5056774020195007, "step": 9315 }, { "epoch": 1.4889315112283226, "grad_norm": 1.6677576896404307, "learning_rate": 3.3655786967486914e-06, "loss": 0.5732419490814209, "step": 9316 }, { "epoch": 1.4890913450011989, "grad_norm": 1.3349341194175093, "learning_rate": 3.3636016182658337e-06, "loss": 0.47816312313079834, "step": 9317 }, { "epoch": 1.489251178774075, "grad_norm": 1.8509416691099763, "learning_rate": 3.3616250032454755e-06, "loss": 0.5906393527984619, "step": 9318 }, { "epoch": 1.489411012546951, "grad_norm": 1.4882081671419074, "learning_rate": 3.359648851825652e-06, "loss": 0.5471899509429932, "step": 9319 }, { "epoch": 1.4895708463198274, "grad_norm": 1.45280695616728, "learning_rate": 3.357673164144378e-06, "loss": 0.5349911451339722, "step": 9320 }, { "epoch": 1.4897306800927037, "grad_norm": 1.588061033868943, "learning_rate": 3.355697940339625e-06, "loss": 0.5567198991775513, "step": 9321 }, { "epoch": 1.4898905138655798, "grad_norm": 1.5242822100036937, "learning_rate": 3.353723180549335e-06, "loss": 0.4318966567516327, "step": 9322 }, { "epoch": 1.490050347638456, "grad_norm": 1.7115311247185854, "learning_rate": 3.351748884911419e-06, "loss": 0.6862127184867859, "step": 9323 }, { "epoch": 1.4902101814113322, "grad_norm": 1.478984708210311, "learning_rate": 3.349775053563752e-06, "loss": 0.5394424200057983, "step": 9324 }, { "epoch": 1.4903700151842085, "grad_norm": 1.5112650911890544, "learning_rate": 3.347801686644184e-06, "loss": 0.5340825319290161, "step": 9325 }, { "epoch": 1.4905298489570846, "grad_norm": 1.892628949252997, "learning_rate": 3.345828784290527e-06, "loss": 0.5686153173446655, "step": 9326 }, { "epoch": 1.4906896827299607, "grad_norm": 1.5010388927712774, "learning_rate": 3.3438563466405595e-06, "loss": 0.4955086410045624, "step": 9327 }, { "epoch": 1.490849516502837, "grad_norm": 1.919671943596027, "learning_rate": 3.3418843738320274e-06, "loss": 0.746638834476471, "step": 9328 }, { "epoch": 1.4910093502757134, "grad_norm": 1.8121782269529256, "learning_rate": 3.3399128660026523e-06, "loss": 0.6430130004882812, "step": 9329 }, { "epoch": 1.4911691840485894, "grad_norm": 1.5323247976611005, "learning_rate": 3.337941823290114e-06, "loss": 0.5426996350288391, "step": 9330 }, { "epoch": 1.4913290178214655, "grad_norm": 1.41217551684769, "learning_rate": 3.335971245832064e-06, "loss": 0.41576188802719116, "step": 9331 }, { "epoch": 1.4914888515943419, "grad_norm": 1.4247676963560385, "learning_rate": 3.3340011337661192e-06, "loss": 0.6007596850395203, "step": 9332 }, { "epoch": 1.4916486853672182, "grad_norm": 1.4958104773187377, "learning_rate": 3.3320314872298644e-06, "loss": 0.48736438155174255, "step": 9333 }, { "epoch": 1.4918085191400943, "grad_norm": 1.7279565798783953, "learning_rate": 3.3300623063608517e-06, "loss": 0.7025443315505981, "step": 9334 }, { "epoch": 1.4919683529129706, "grad_norm": 2.0880880999391476, "learning_rate": 3.3280935912966094e-06, "loss": 0.4624779224395752, "step": 9335 }, { "epoch": 1.4921281866858467, "grad_norm": 1.552919826274772, "learning_rate": 3.3261253421746197e-06, "loss": 0.5068686604499817, "step": 9336 }, { "epoch": 1.492288020458723, "grad_norm": 1.5612346511750264, "learning_rate": 3.3241575591323385e-06, "loss": 0.5844495296478271, "step": 9337 }, { "epoch": 1.492447854231599, "grad_norm": 1.5277305879797407, "learning_rate": 3.3221902423071893e-06, "loss": 0.525305986404419, "step": 9338 }, { "epoch": 1.4926076880044754, "grad_norm": 1.827073288906965, "learning_rate": 3.3202233918365588e-06, "loss": 0.6196680068969727, "step": 9339 }, { "epoch": 1.4927675217773515, "grad_norm": 1.5592481499271809, "learning_rate": 3.3182570078578115e-06, "loss": 0.5602206587791443, "step": 9340 }, { "epoch": 1.4929273555502278, "grad_norm": 1.4308296260145246, "learning_rate": 3.31629109050827e-06, "loss": 0.6007410287857056, "step": 9341 }, { "epoch": 1.493087189323104, "grad_norm": 1.4590850570527036, "learning_rate": 3.3143256399252265e-06, "loss": 0.5030762553215027, "step": 9342 }, { "epoch": 1.4932470230959802, "grad_norm": 1.7181150221752841, "learning_rate": 3.312360656245941e-06, "loss": 0.4212239384651184, "step": 9343 }, { "epoch": 1.4934068568688563, "grad_norm": 1.5674104520349614, "learning_rate": 3.310396139607638e-06, "loss": 0.5586491227149963, "step": 9344 }, { "epoch": 1.4935666906417326, "grad_norm": 1.4840162743643601, "learning_rate": 3.3084320901475177e-06, "loss": 0.5337942838668823, "step": 9345 }, { "epoch": 1.4937265244146087, "grad_norm": 1.841523743366398, "learning_rate": 3.3064685080027416e-06, "loss": 0.7070010304450989, "step": 9346 }, { "epoch": 1.493886358187485, "grad_norm": 1.7786854294309864, "learning_rate": 3.3045053933104366e-06, "loss": 0.5987684726715088, "step": 9347 }, { "epoch": 1.4940461919603611, "grad_norm": 1.6659300917194106, "learning_rate": 3.3025427462076975e-06, "loss": 0.6579699516296387, "step": 9348 }, { "epoch": 1.4942060257332375, "grad_norm": 1.5003051579296467, "learning_rate": 3.300580566831596e-06, "loss": 0.5794988870620728, "step": 9349 }, { "epoch": 1.4943658595061136, "grad_norm": 1.6999526614324754, "learning_rate": 3.298618855319159e-06, "loss": 0.486320823431015, "step": 9350 }, { "epoch": 1.4945256932789899, "grad_norm": 1.8787571545674737, "learning_rate": 3.296657611807387e-06, "loss": 0.47200679779052734, "step": 9351 }, { "epoch": 1.4946855270518662, "grad_norm": 1.7938938281045753, "learning_rate": 3.2946968364332454e-06, "loss": 0.5674417018890381, "step": 9352 }, { "epoch": 1.4948453608247423, "grad_norm": 1.6542372235784313, "learning_rate": 3.2927365293336633e-06, "loss": 0.4403594434261322, "step": 9353 }, { "epoch": 1.4950051945976184, "grad_norm": 1.6406776973677408, "learning_rate": 3.2907766906455474e-06, "loss": 0.6461660861968994, "step": 9354 }, { "epoch": 1.4951650283704947, "grad_norm": 1.6113119709610777, "learning_rate": 3.288817320505767e-06, "loss": 0.49161654710769653, "step": 9355 }, { "epoch": 1.495324862143371, "grad_norm": 1.5679374270908384, "learning_rate": 3.2868584190511564e-06, "loss": 0.5898159742355347, "step": 9356 }, { "epoch": 1.495484695916247, "grad_norm": 1.7701551982290027, "learning_rate": 3.284899986418516e-06, "loss": 0.5539439916610718, "step": 9357 }, { "epoch": 1.4956445296891232, "grad_norm": 1.394979656996682, "learning_rate": 3.2829420227446175e-06, "loss": 0.5146788954734802, "step": 9358 }, { "epoch": 1.4958043634619995, "grad_norm": 1.6556923147402536, "learning_rate": 3.2809845281661936e-06, "loss": 0.47604241967201233, "step": 9359 }, { "epoch": 1.4959641972348758, "grad_norm": 1.604450805608936, "learning_rate": 3.2790275028199557e-06, "loss": 0.6151266098022461, "step": 9360 }, { "epoch": 1.496124031007752, "grad_norm": 1.619108170064138, "learning_rate": 3.277070946842573e-06, "loss": 0.5841996073722839, "step": 9361 }, { "epoch": 1.496283864780628, "grad_norm": 1.5754248086114455, "learning_rate": 3.275114860370684e-06, "loss": 0.6254881620407104, "step": 9362 }, { "epoch": 1.4964436985535043, "grad_norm": 1.5495668177405475, "learning_rate": 3.273159243540891e-06, "loss": 0.47195670008659363, "step": 9363 }, { "epoch": 1.4966035323263807, "grad_norm": 1.71395778230786, "learning_rate": 3.271204096489775e-06, "loss": 0.6202989816665649, "step": 9364 }, { "epoch": 1.4967633660992568, "grad_norm": 1.7548005973019438, "learning_rate": 3.2692494193538715e-06, "loss": 0.5013644099235535, "step": 9365 }, { "epoch": 1.4969231998721328, "grad_norm": 1.7218272541599884, "learning_rate": 3.26729521226969e-06, "loss": 0.41096681356430054, "step": 9366 }, { "epoch": 1.4970830336450092, "grad_norm": 2.0593640257587666, "learning_rate": 3.2653414753737047e-06, "loss": 0.609500527381897, "step": 9367 }, { "epoch": 1.4972428674178855, "grad_norm": 1.950057603706429, "learning_rate": 3.263388208802354e-06, "loss": 0.575768232345581, "step": 9368 }, { "epoch": 1.4974027011907616, "grad_norm": 1.5389392447515882, "learning_rate": 3.2614354126920542e-06, "loss": 0.47566553950309753, "step": 9369 }, { "epoch": 1.497562534963638, "grad_norm": 1.5224001319228098, "learning_rate": 3.2594830871791784e-06, "loss": 0.6381305456161499, "step": 9370 }, { "epoch": 1.497722368736514, "grad_norm": 1.7363641423392269, "learning_rate": 3.2575312324000697e-06, "loss": 0.6817363500595093, "step": 9371 }, { "epoch": 1.4978822025093903, "grad_norm": 1.2955359288068444, "learning_rate": 3.2555798484910385e-06, "loss": 0.39702925086021423, "step": 9372 }, { "epoch": 1.4980420362822664, "grad_norm": 1.600734431039872, "learning_rate": 3.2536289355883598e-06, "loss": 0.5087637305259705, "step": 9373 }, { "epoch": 1.4982018700551427, "grad_norm": 1.4283081485140807, "learning_rate": 3.251678493828282e-06, "loss": 0.43898746371269226, "step": 9374 }, { "epoch": 1.4983617038280188, "grad_norm": 1.4988352520818273, "learning_rate": 3.249728523347019e-06, "loss": 0.6561957001686096, "step": 9375 }, { "epoch": 1.4985215376008951, "grad_norm": 1.8181352597668479, "learning_rate": 3.247779024280747e-06, "loss": 0.5126268267631531, "step": 9376 }, { "epoch": 1.4986813713737712, "grad_norm": 1.680091232053672, "learning_rate": 3.245829996765614e-06, "loss": 0.5090748071670532, "step": 9377 }, { "epoch": 1.4988412051466475, "grad_norm": 1.443140583939243, "learning_rate": 3.2438814409377273e-06, "loss": 0.542649507522583, "step": 9378 }, { "epoch": 1.4990010389195236, "grad_norm": 1.5114349166146994, "learning_rate": 3.2419333569331746e-06, "loss": 0.5135243535041809, "step": 9379 }, { "epoch": 1.4991608726924, "grad_norm": 1.6205971407805129, "learning_rate": 3.239985744888e-06, "loss": 0.5078682899475098, "step": 9380 }, { "epoch": 1.499320706465276, "grad_norm": 1.8551657148578955, "learning_rate": 3.2380386049382186e-06, "loss": 0.4897499084472656, "step": 9381 }, { "epoch": 1.4994805402381524, "grad_norm": 1.6185202798050953, "learning_rate": 3.23609193721981e-06, "loss": 0.3998814523220062, "step": 9382 }, { "epoch": 1.4996403740110285, "grad_norm": 1.5620969039528425, "learning_rate": 3.2341457418687207e-06, "loss": 0.4846160411834717, "step": 9383 }, { "epoch": 1.4998002077839048, "grad_norm": 1.7107670058957059, "learning_rate": 3.2322000190208725e-06, "loss": 0.4829525947570801, "step": 9384 }, { "epoch": 1.4999600415567809, "grad_norm": 1.4532126387657718, "learning_rate": 3.2302547688121433e-06, "loss": 0.5471037030220032, "step": 9385 }, { "epoch": 1.5001198753296572, "grad_norm": 1.4904006703451869, "learning_rate": 3.228309991378383e-06, "loss": 0.5519253611564636, "step": 9386 }, { "epoch": 1.5002797091025335, "grad_norm": 1.5912185093020914, "learning_rate": 3.2263656868554092e-06, "loss": 0.5649279356002808, "step": 9387 }, { "epoch": 1.5004395428754096, "grad_norm": 1.8775810803643838, "learning_rate": 3.2244218553790017e-06, "loss": 0.6571544408798218, "step": 9388 }, { "epoch": 1.5005993766482857, "grad_norm": 1.5456646114358261, "learning_rate": 3.2224784970849154e-06, "loss": 0.5869636535644531, "step": 9389 }, { "epoch": 1.500759210421162, "grad_norm": 1.7149811410841203, "learning_rate": 3.220535612108866e-06, "loss": 0.4772164821624756, "step": 9390 }, { "epoch": 1.5009190441940383, "grad_norm": 1.9989859101118668, "learning_rate": 3.218593200586537e-06, "loss": 0.4979817867279053, "step": 9391 }, { "epoch": 1.5010788779669144, "grad_norm": 1.9897672688026848, "learning_rate": 3.216651262653576e-06, "loss": 0.5379056930541992, "step": 9392 }, { "epoch": 1.5012387117397905, "grad_norm": 1.9351817283704789, "learning_rate": 3.2147097984456055e-06, "loss": 0.6005163192749023, "step": 9393 }, { "epoch": 1.5013985455126668, "grad_norm": 1.7771276802395632, "learning_rate": 3.2127688080982134e-06, "loss": 0.5724639892578125, "step": 9394 }, { "epoch": 1.5015583792855431, "grad_norm": 1.7842621910376482, "learning_rate": 3.2108282917469467e-06, "loss": 0.6961157917976379, "step": 9395 }, { "epoch": 1.5017182130584192, "grad_norm": 1.392777438722017, "learning_rate": 3.2088882495273266e-06, "loss": 0.5180342197418213, "step": 9396 }, { "epoch": 1.5018780468312953, "grad_norm": 1.554752410603981, "learning_rate": 3.2069486815748373e-06, "loss": 0.6423953175544739, "step": 9397 }, { "epoch": 1.5020378806041716, "grad_norm": 1.6340916025166754, "learning_rate": 3.2050095880249276e-06, "loss": 0.5706799626350403, "step": 9398 }, { "epoch": 1.502197714377048, "grad_norm": 1.651689757350893, "learning_rate": 3.2030709690130247e-06, "loss": 0.5381927490234375, "step": 9399 }, { "epoch": 1.502357548149924, "grad_norm": 1.3790036667964651, "learning_rate": 3.201132824674511e-06, "loss": 0.4568990468978882, "step": 9400 }, { "epoch": 1.5025173819228002, "grad_norm": 1.523318748066772, "learning_rate": 3.19919515514474e-06, "loss": 0.537975549697876, "step": 9401 }, { "epoch": 1.5026772156956765, "grad_norm": 1.4670431901181396, "learning_rate": 3.1972579605590314e-06, "loss": 0.4653753638267517, "step": 9402 }, { "epoch": 1.5028370494685528, "grad_norm": 1.6048056608730268, "learning_rate": 3.195321241052668e-06, "loss": 0.5619020462036133, "step": 9403 }, { "epoch": 1.5029968832414289, "grad_norm": 1.5896954920726587, "learning_rate": 3.193384996760911e-06, "loss": 0.571338415145874, "step": 9404 }, { "epoch": 1.503156717014305, "grad_norm": 1.717486717047038, "learning_rate": 3.1914492278189767e-06, "loss": 0.628800630569458, "step": 9405 }, { "epoch": 1.5033165507871813, "grad_norm": 1.52681645121382, "learning_rate": 3.189513934362053e-06, "loss": 0.5307391881942749, "step": 9406 }, { "epoch": 1.5034763845600576, "grad_norm": 1.5686103016024564, "learning_rate": 3.187579116525291e-06, "loss": 0.5427173376083374, "step": 9407 }, { "epoch": 1.5036362183329337, "grad_norm": 1.4836209563966407, "learning_rate": 3.1856447744438178e-06, "loss": 0.5540120005607605, "step": 9408 }, { "epoch": 1.5037960521058098, "grad_norm": 1.65195689507238, "learning_rate": 3.1837109082527164e-06, "loss": 0.3950416147708893, "step": 9409 }, { "epoch": 1.5039558858786861, "grad_norm": 1.740802140220347, "learning_rate": 3.1817775180870425e-06, "loss": 0.5257077217102051, "step": 9410 }, { "epoch": 1.5041157196515624, "grad_norm": 1.477315631919255, "learning_rate": 3.1798446040818175e-06, "loss": 0.5999870300292969, "step": 9411 }, { "epoch": 1.5042755534244385, "grad_norm": 1.4206156735669362, "learning_rate": 3.177912166372025e-06, "loss": 0.49903568625450134, "step": 9412 }, { "epoch": 1.5044353871973148, "grad_norm": 1.3592469158612623, "learning_rate": 3.1759802050926225e-06, "loss": 0.5211212635040283, "step": 9413 }, { "epoch": 1.5045952209701912, "grad_norm": 2.2527478408478467, "learning_rate": 3.174048720378536e-06, "loss": 0.6287298202514648, "step": 9414 }, { "epoch": 1.5047550547430673, "grad_norm": 1.6346021620121236, "learning_rate": 3.1721177123646498e-06, "loss": 0.5791555643081665, "step": 9415 }, { "epoch": 1.5049148885159433, "grad_norm": 1.9706369995602553, "learning_rate": 3.1701871811858176e-06, "loss": 0.5607268810272217, "step": 9416 }, { "epoch": 1.5050747222888197, "grad_norm": 1.4562857075331046, "learning_rate": 3.1682571269768627e-06, "loss": 0.4818130433559418, "step": 9417 }, { "epoch": 1.505234556061696, "grad_norm": 1.8930532297196891, "learning_rate": 3.1663275498725676e-06, "loss": 0.48363322019577026, "step": 9418 }, { "epoch": 1.505394389834572, "grad_norm": 1.5402964070010892, "learning_rate": 3.164398450007695e-06, "loss": 0.616682767868042, "step": 9419 }, { "epoch": 1.5055542236074482, "grad_norm": 1.53761314241033, "learning_rate": 3.1624698275169642e-06, "loss": 0.5169065594673157, "step": 9420 }, { "epoch": 1.5057140573803245, "grad_norm": 1.5984154408438722, "learning_rate": 3.160541682535062e-06, "loss": 0.5095165371894836, "step": 9421 }, { "epoch": 1.5058738911532008, "grad_norm": 1.5783507583235943, "learning_rate": 3.1586140151966428e-06, "loss": 0.6895903944969177, "step": 9422 }, { "epoch": 1.506033724926077, "grad_norm": 1.6118299031397305, "learning_rate": 3.156686825636326e-06, "loss": 0.5307161211967468, "step": 9423 }, { "epoch": 1.506193558698953, "grad_norm": 1.7091779711114958, "learning_rate": 3.154760113988705e-06, "loss": 0.4447166323661804, "step": 9424 }, { "epoch": 1.5063533924718293, "grad_norm": 1.5636156697017856, "learning_rate": 3.152833880388333e-06, "loss": 0.47035348415374756, "step": 9425 }, { "epoch": 1.5065132262447056, "grad_norm": 1.4128290109042043, "learning_rate": 3.1509081249697295e-06, "loss": 0.5029007196426392, "step": 9426 }, { "epoch": 1.5066730600175817, "grad_norm": 1.6859990377615928, "learning_rate": 3.14898284786738e-06, "loss": 0.6109482645988464, "step": 9427 }, { "epoch": 1.5068328937904578, "grad_norm": 1.482890877370989, "learning_rate": 3.1470580492157445e-06, "loss": 0.4916493892669678, "step": 9428 }, { "epoch": 1.5069927275633341, "grad_norm": 1.4996291259372447, "learning_rate": 3.145133729149242e-06, "loss": 0.476385235786438, "step": 9429 }, { "epoch": 1.5071525613362105, "grad_norm": 1.5473179753210353, "learning_rate": 3.143209887802261e-06, "loss": 0.630534291267395, "step": 9430 }, { "epoch": 1.5073123951090865, "grad_norm": 1.7351284435652288, "learning_rate": 3.141286525309153e-06, "loss": 0.5066613554954529, "step": 9431 }, { "epoch": 1.5074722288819626, "grad_norm": 1.4501285657192995, "learning_rate": 3.1393636418042384e-06, "loss": 0.4140323996543884, "step": 9432 }, { "epoch": 1.507632062654839, "grad_norm": 1.6559289868552305, "learning_rate": 3.1374412374218065e-06, "loss": 0.6193994283676147, "step": 9433 }, { "epoch": 1.5077918964277153, "grad_norm": 1.7131242530813702, "learning_rate": 3.1355193122961135e-06, "loss": 0.5999544858932495, "step": 9434 }, { "epoch": 1.5079517302005914, "grad_norm": 1.8776049800815928, "learning_rate": 3.1335978665613788e-06, "loss": 0.5450184345245361, "step": 9435 }, { "epoch": 1.5081115639734675, "grad_norm": 1.8397281850563956, "learning_rate": 3.1316769003517877e-06, "loss": 0.5164183974266052, "step": 9436 }, { "epoch": 1.5082713977463438, "grad_norm": 1.6431141818230108, "learning_rate": 3.1297564138014946e-06, "loss": 0.5676782131195068, "step": 9437 }, { "epoch": 1.50843123151922, "grad_norm": 1.6861734197486138, "learning_rate": 3.1278364070446145e-06, "loss": 0.5544934272766113, "step": 9438 }, { "epoch": 1.5085910652920962, "grad_norm": 1.5452765666306607, "learning_rate": 3.1259168802152428e-06, "loss": 0.4673013687133789, "step": 9439 }, { "epoch": 1.5087508990649723, "grad_norm": 1.588461745118205, "learning_rate": 3.1239978334474275e-06, "loss": 0.49282848834991455, "step": 9440 }, { "epoch": 1.5089107328378486, "grad_norm": 1.7067823514605844, "learning_rate": 3.122079266875189e-06, "loss": 0.49168285727500916, "step": 9441 }, { "epoch": 1.509070566610725, "grad_norm": 2.0429871908607637, "learning_rate": 3.1201611806325083e-06, "loss": 0.6559103727340698, "step": 9442 }, { "epoch": 1.509230400383601, "grad_norm": 1.6592253262845262, "learning_rate": 3.118243574853346e-06, "loss": 0.437414288520813, "step": 9443 }, { "epoch": 1.509390234156477, "grad_norm": 1.6552545389403839, "learning_rate": 3.1163264496716173e-06, "loss": 0.6152058839797974, "step": 9444 }, { "epoch": 1.5095500679293534, "grad_norm": 1.52610975103607, "learning_rate": 3.114409805221207e-06, "loss": 0.562281608581543, "step": 9445 }, { "epoch": 1.5097099017022297, "grad_norm": 1.8134446333668954, "learning_rate": 3.1124936416359663e-06, "loss": 0.6812435984611511, "step": 9446 }, { "epoch": 1.5098697354751058, "grad_norm": 1.6342755349691884, "learning_rate": 3.1105779590497108e-06, "loss": 0.5689482092857361, "step": 9447 }, { "epoch": 1.5100295692479822, "grad_norm": 1.4885999740264946, "learning_rate": 3.1086627575962315e-06, "loss": 0.5263763070106506, "step": 9448 }, { "epoch": 1.5101894030208585, "grad_norm": 1.7199526898223365, "learning_rate": 3.1067480374092752e-06, "loss": 0.565433919429779, "step": 9449 }, { "epoch": 1.5103492367937346, "grad_norm": 1.3881287605364412, "learning_rate": 3.10483379862256e-06, "loss": 0.401424765586853, "step": 9450 }, { "epoch": 1.5105090705666107, "grad_norm": 1.7842835307403762, "learning_rate": 3.102920041369769e-06, "loss": 0.6041573882102966, "step": 9451 }, { "epoch": 1.510668904339487, "grad_norm": 1.5871535600587268, "learning_rate": 3.1010067657845498e-06, "loss": 0.5892050266265869, "step": 9452 }, { "epoch": 1.5108287381123633, "grad_norm": 1.3085731092340618, "learning_rate": 3.0990939720005208e-06, "loss": 0.5039918422698975, "step": 9453 }, { "epoch": 1.5109885718852394, "grad_norm": 1.4164915434661673, "learning_rate": 3.0971816601512692e-06, "loss": 0.4806802272796631, "step": 9454 }, { "epoch": 1.5111484056581155, "grad_norm": 1.7163751435586196, "learning_rate": 3.09526983037034e-06, "loss": 0.5806406736373901, "step": 9455 }, { "epoch": 1.5113082394309918, "grad_norm": 1.6228108552044616, "learning_rate": 3.093358482791249e-06, "loss": 0.4669550955295563, "step": 9456 }, { "epoch": 1.5114680732038681, "grad_norm": 1.6144572892170694, "learning_rate": 3.091447617547474e-06, "loss": 0.482939749956131, "step": 9457 }, { "epoch": 1.5116279069767442, "grad_norm": 1.6268448670414863, "learning_rate": 3.0895372347724695e-06, "loss": 0.5543088912963867, "step": 9458 }, { "epoch": 1.5117877407496203, "grad_norm": 1.5373949423268534, "learning_rate": 3.087627334599648e-06, "loss": 0.5415307283401489, "step": 9459 }, { "epoch": 1.5119475745224966, "grad_norm": 1.7237568882468954, "learning_rate": 3.08571791716239e-06, "loss": 0.6216019988059998, "step": 9460 }, { "epoch": 1.512107408295373, "grad_norm": 1.5661092358512059, "learning_rate": 3.0838089825940397e-06, "loss": 0.47669318318367004, "step": 9461 }, { "epoch": 1.512267242068249, "grad_norm": 1.607033116824089, "learning_rate": 3.0819005310279106e-06, "loss": 0.4846702218055725, "step": 9462 }, { "epoch": 1.5124270758411251, "grad_norm": 1.8033936953550822, "learning_rate": 3.079992562597287e-06, "loss": 0.6091265082359314, "step": 9463 }, { "epoch": 1.5125869096140014, "grad_norm": 1.6738458051963248, "learning_rate": 3.078085077435411e-06, "loss": 0.6186692714691162, "step": 9464 }, { "epoch": 1.5127467433868778, "grad_norm": 1.5610678011537125, "learning_rate": 3.0761780756754957e-06, "loss": 0.5320129990577698, "step": 9465 }, { "epoch": 1.5129065771597539, "grad_norm": 1.6207966917621897, "learning_rate": 3.0742715574507185e-06, "loss": 0.46505922079086304, "step": 9466 }, { "epoch": 1.51306641093263, "grad_norm": 1.406150396206221, "learning_rate": 3.072365522894221e-06, "loss": 0.47991943359375, "step": 9467 }, { "epoch": 1.5132262447055063, "grad_norm": 1.6081942345824591, "learning_rate": 3.0704599721391202e-06, "loss": 0.5744757652282715, "step": 9468 }, { "epoch": 1.5133860784783826, "grad_norm": 1.5388404146360013, "learning_rate": 3.0685549053184903e-06, "loss": 0.5605979561805725, "step": 9469 }, { "epoch": 1.5135459122512587, "grad_norm": 1.3507697866638784, "learning_rate": 3.066650322565373e-06, "loss": 0.41712960600852966, "step": 9470 }, { "epoch": 1.5137057460241348, "grad_norm": 1.4809234534334397, "learning_rate": 3.064746224012776e-06, "loss": 0.4955517053604126, "step": 9471 }, { "epoch": 1.513865579797011, "grad_norm": 2.7653191352341993, "learning_rate": 3.0628426097936805e-06, "loss": 0.4837337136268616, "step": 9472 }, { "epoch": 1.5140254135698874, "grad_norm": 1.7206074831668898, "learning_rate": 3.060939480041022e-06, "loss": 0.5392398834228516, "step": 9473 }, { "epoch": 1.5141852473427635, "grad_norm": 1.3593845446040396, "learning_rate": 3.0590368348877132e-06, "loss": 0.48747310042381287, "step": 9474 }, { "epoch": 1.5143450811156396, "grad_norm": 1.44138097186067, "learning_rate": 3.0571346744666276e-06, "loss": 0.48563259840011597, "step": 9475 }, { "epoch": 1.514504914888516, "grad_norm": 1.4778013234176812, "learning_rate": 3.0552329989106032e-06, "loss": 0.463765025138855, "step": 9476 }, { "epoch": 1.5146647486613922, "grad_norm": 1.6171496691567568, "learning_rate": 3.0533318083524435e-06, "loss": 0.5057806372642517, "step": 9477 }, { "epoch": 1.5148245824342683, "grad_norm": 1.8557017409427046, "learning_rate": 3.051431102924928e-06, "loss": 0.47415876388549805, "step": 9478 }, { "epoch": 1.5149844162071444, "grad_norm": 1.585910475826275, "learning_rate": 3.0495308827607916e-06, "loss": 0.49680617451667786, "step": 9479 }, { "epoch": 1.5151442499800207, "grad_norm": 1.7192895205493197, "learning_rate": 3.047631147992739e-06, "loss": 0.5168200731277466, "step": 9480 }, { "epoch": 1.515304083752897, "grad_norm": 1.6412218687162519, "learning_rate": 3.045731898753441e-06, "loss": 0.6490811109542847, "step": 9481 }, { "epoch": 1.5154639175257731, "grad_norm": 1.516131058051271, "learning_rate": 3.0438331351755302e-06, "loss": 0.6309469938278198, "step": 9482 }, { "epoch": 1.5156237512986495, "grad_norm": 1.8098458902664343, "learning_rate": 3.0419348573916174e-06, "loss": 0.5545985102653503, "step": 9483 }, { "epoch": 1.5157835850715258, "grad_norm": 1.5012725729346932, "learning_rate": 3.040037065534268e-06, "loss": 0.514324963092804, "step": 9484 }, { "epoch": 1.5159434188444019, "grad_norm": 1.6627546075630637, "learning_rate": 3.0381397597360162e-06, "loss": 0.49566856026649475, "step": 9485 }, { "epoch": 1.516103252617278, "grad_norm": 1.7940200626520901, "learning_rate": 3.0362429401293637e-06, "loss": 0.7112557888031006, "step": 9486 }, { "epoch": 1.5162630863901543, "grad_norm": 1.5566766549579134, "learning_rate": 3.0343466068467752e-06, "loss": 0.5343169569969177, "step": 9487 }, { "epoch": 1.5164229201630306, "grad_norm": 1.694236286148139, "learning_rate": 3.0324507600206887e-06, "loss": 0.5820807814598083, "step": 9488 }, { "epoch": 1.5165827539359067, "grad_norm": 1.6475610280064603, "learning_rate": 3.030555399783501e-06, "loss": 0.5146968960762024, "step": 9489 }, { "epoch": 1.5167425877087828, "grad_norm": 1.5296665424869116, "learning_rate": 3.028660526267578e-06, "loss": 0.5868176817893982, "step": 9490 }, { "epoch": 1.516902421481659, "grad_norm": 1.5625059121155505, "learning_rate": 3.026766139605247e-06, "loss": 0.4239385724067688, "step": 9491 }, { "epoch": 1.5170622552545354, "grad_norm": 2.007312726827369, "learning_rate": 3.024872239928812e-06, "loss": 0.4968220591545105, "step": 9492 }, { "epoch": 1.5172220890274115, "grad_norm": 1.839658305860808, "learning_rate": 3.0229788273705287e-06, "loss": 0.6157189607620239, "step": 9493 }, { "epoch": 1.5173819228002876, "grad_norm": 1.5257037864999168, "learning_rate": 3.0210859020626348e-06, "loss": 0.591408908367157, "step": 9494 }, { "epoch": 1.517541756573164, "grad_norm": 1.4788305856445596, "learning_rate": 3.01919346413732e-06, "loss": 0.6103380918502808, "step": 9495 }, { "epoch": 1.5177015903460402, "grad_norm": 1.7430977719670817, "learning_rate": 3.017301513726747e-06, "loss": 0.6100859642028809, "step": 9496 }, { "epoch": 1.5178614241189163, "grad_norm": 1.5173497348374112, "learning_rate": 3.015410050963039e-06, "loss": 0.44463247060775757, "step": 9497 }, { "epoch": 1.5180212578917924, "grad_norm": 1.5290865256393698, "learning_rate": 3.013519075978295e-06, "loss": 0.5169979929924011, "step": 9498 }, { "epoch": 1.5181810916646687, "grad_norm": 1.604350008252826, "learning_rate": 3.0116285889045728e-06, "loss": 0.5675691962242126, "step": 9499 }, { "epoch": 1.518340925437545, "grad_norm": 1.7054530862524802, "learning_rate": 3.009738589873895e-06, "loss": 0.44112715125083923, "step": 9500 }, { "epoch": 1.5185007592104212, "grad_norm": 1.8020766212703476, "learning_rate": 3.0078490790182536e-06, "loss": 0.6164102554321289, "step": 9501 }, { "epoch": 1.5186605929832973, "grad_norm": 1.5427922443824023, "learning_rate": 3.005960056469601e-06, "loss": 0.5524947047233582, "step": 9502 }, { "epoch": 1.5188204267561736, "grad_norm": 1.9083893409354415, "learning_rate": 3.0040715223598683e-06, "loss": 0.45052897930145264, "step": 9503 }, { "epoch": 1.5189802605290499, "grad_norm": 1.569539935283262, "learning_rate": 3.00218347682094e-06, "loss": 0.5568376183509827, "step": 9504 }, { "epoch": 1.519140094301926, "grad_norm": 1.7983675240831114, "learning_rate": 3.0002959199846695e-06, "loss": 0.6597073078155518, "step": 9505 }, { "epoch": 1.519299928074802, "grad_norm": 1.6443504700758982, "learning_rate": 2.9984088519828737e-06, "loss": 0.4442852735519409, "step": 9506 }, { "epoch": 1.5194597618476784, "grad_norm": 2.0221053718265685, "learning_rate": 2.9965222729473474e-06, "loss": 0.6010611057281494, "step": 9507 }, { "epoch": 1.5196195956205547, "grad_norm": 1.7992168517190528, "learning_rate": 2.9946361830098368e-06, "loss": 0.510576605796814, "step": 9508 }, { "epoch": 1.5197794293934308, "grad_norm": 1.5134783921538804, "learning_rate": 2.992750582302061e-06, "loss": 0.4058746099472046, "step": 9509 }, { "epoch": 1.519939263166307, "grad_norm": 1.589769547094719, "learning_rate": 2.990865470955704e-06, "loss": 0.6352638006210327, "step": 9510 }, { "epoch": 1.5200990969391832, "grad_norm": 1.677182661850853, "learning_rate": 2.988980849102411e-06, "loss": 0.505445122718811, "step": 9511 }, { "epoch": 1.5202589307120595, "grad_norm": 1.6111336750456882, "learning_rate": 2.987096716873804e-06, "loss": 0.4707990884780884, "step": 9512 }, { "epoch": 1.5204187644849356, "grad_norm": 1.6710366127531961, "learning_rate": 2.9852130744014585e-06, "loss": 0.643027663230896, "step": 9513 }, { "epoch": 1.5205785982578117, "grad_norm": 1.3376247061121762, "learning_rate": 2.9833299218169276e-06, "loss": 0.4601552486419678, "step": 9514 }, { "epoch": 1.520738432030688, "grad_norm": 1.7146239470318847, "learning_rate": 2.9814472592517195e-06, "loss": 0.4847397208213806, "step": 9515 }, { "epoch": 1.5208982658035644, "grad_norm": 1.6155656627477837, "learning_rate": 2.979565086837315e-06, "loss": 0.5677550435066223, "step": 9516 }, { "epoch": 1.5210580995764404, "grad_norm": 1.5638877255721846, "learning_rate": 2.9776834047051526e-06, "loss": 0.6198139786720276, "step": 9517 }, { "epoch": 1.5212179333493168, "grad_norm": 1.6229693550967736, "learning_rate": 2.9758022129866506e-06, "loss": 0.4906888008117676, "step": 9518 }, { "epoch": 1.521377767122193, "grad_norm": 1.5486300938385196, "learning_rate": 2.9739215118131802e-06, "loss": 0.4163253903388977, "step": 9519 }, { "epoch": 1.5215376008950692, "grad_norm": 1.697174338843058, "learning_rate": 2.972041301316084e-06, "loss": 0.6263730525970459, "step": 9520 }, { "epoch": 1.5216974346679453, "grad_norm": 1.3622182979419848, "learning_rate": 2.970161581626665e-06, "loss": 0.4993811547756195, "step": 9521 }, { "epoch": 1.5218572684408216, "grad_norm": 1.528158081076418, "learning_rate": 2.968282352876204e-06, "loss": 0.45737528800964355, "step": 9522 }, { "epoch": 1.522017102213698, "grad_norm": 1.712526226332152, "learning_rate": 2.966403615195934e-06, "loss": 0.5829906463623047, "step": 9523 }, { "epoch": 1.522176935986574, "grad_norm": 1.4969637237668965, "learning_rate": 2.9645253687170615e-06, "loss": 0.5380685925483704, "step": 9524 }, { "epoch": 1.52233676975945, "grad_norm": 1.4795087927461255, "learning_rate": 2.962647613570757e-06, "loss": 0.5198794603347778, "step": 9525 }, { "epoch": 1.5224966035323264, "grad_norm": 1.5898196903359347, "learning_rate": 2.9607703498881513e-06, "loss": 0.7154092788696289, "step": 9526 }, { "epoch": 1.5226564373052027, "grad_norm": 2.215384810903062, "learning_rate": 2.9588935778003526e-06, "loss": 0.6369082927703857, "step": 9527 }, { "epoch": 1.5228162710780788, "grad_norm": 1.6359109324054653, "learning_rate": 2.9570172974384257e-06, "loss": 0.5073233246803284, "step": 9528 }, { "epoch": 1.522976104850955, "grad_norm": 1.5232384053893122, "learning_rate": 2.9551415089334025e-06, "loss": 0.4822440445423126, "step": 9529 }, { "epoch": 1.5231359386238312, "grad_norm": 1.644345185212782, "learning_rate": 2.9532662124162824e-06, "loss": 0.47312402725219727, "step": 9530 }, { "epoch": 1.5232957723967075, "grad_norm": 1.6599181868299306, "learning_rate": 2.951391408018025e-06, "loss": 0.4626138210296631, "step": 9531 }, { "epoch": 1.5234556061695836, "grad_norm": 1.8373334946202704, "learning_rate": 2.949517095869567e-06, "loss": 0.5982219576835632, "step": 9532 }, { "epoch": 1.5236154399424597, "grad_norm": 1.5445295501379623, "learning_rate": 2.947643276101797e-06, "loss": 0.5208657383918762, "step": 9533 }, { "epoch": 1.523775273715336, "grad_norm": 1.4377765402882574, "learning_rate": 2.9457699488455825e-06, "loss": 0.5037835836410522, "step": 9534 }, { "epoch": 1.5239351074882124, "grad_norm": 1.4825378738935435, "learning_rate": 2.9438971142317484e-06, "loss": 0.4328197240829468, "step": 9535 }, { "epoch": 1.5240949412610885, "grad_norm": 1.6941763648977484, "learning_rate": 2.942024772391081e-06, "loss": 0.6252152919769287, "step": 9536 }, { "epoch": 1.5242547750339646, "grad_norm": 1.7538840057182423, "learning_rate": 2.9401529234543445e-06, "loss": 0.47804486751556396, "step": 9537 }, { "epoch": 1.5244146088068409, "grad_norm": 1.5417802835222625, "learning_rate": 2.9382815675522613e-06, "loss": 0.47735917568206787, "step": 9538 }, { "epoch": 1.5245744425797172, "grad_norm": 1.6242791302554789, "learning_rate": 2.936410704815519e-06, "loss": 0.5539560317993164, "step": 9539 }, { "epoch": 1.5247342763525933, "grad_norm": 1.4739952965775482, "learning_rate": 2.9345403353747713e-06, "loss": 0.5268497467041016, "step": 9540 }, { "epoch": 1.5248941101254694, "grad_norm": 1.7898473273075124, "learning_rate": 2.932670459360637e-06, "loss": 0.5837205052375793, "step": 9541 }, { "epoch": 1.5250539438983457, "grad_norm": 1.6746894782862825, "learning_rate": 2.9308010769037054e-06, "loss": 0.5096185207366943, "step": 9542 }, { "epoch": 1.525213777671222, "grad_norm": 1.7388593903544183, "learning_rate": 2.9289321881345257e-06, "loss": 0.6643701791763306, "step": 9543 }, { "epoch": 1.525373611444098, "grad_norm": 1.5801822071761904, "learning_rate": 2.9270637931836145e-06, "loss": 0.4565950632095337, "step": 9544 }, { "epoch": 1.5255334452169742, "grad_norm": 1.6507802671471985, "learning_rate": 2.925195892181454e-06, "loss": 0.4808942675590515, "step": 9545 }, { "epoch": 1.5256932789898505, "grad_norm": 1.606026646409403, "learning_rate": 2.9233284852584877e-06, "loss": 0.5280964374542236, "step": 9546 }, { "epoch": 1.5258531127627268, "grad_norm": 1.6514268488265513, "learning_rate": 2.9214615725451354e-06, "loss": 0.5958093404769897, "step": 9547 }, { "epoch": 1.526012946535603, "grad_norm": 1.55050885785082, "learning_rate": 2.9195951541717736e-06, "loss": 0.5145132541656494, "step": 9548 }, { "epoch": 1.526172780308479, "grad_norm": 1.7010670917750412, "learning_rate": 2.9177292302687445e-06, "loss": 0.5286173224449158, "step": 9549 }, { "epoch": 1.5263326140813553, "grad_norm": 1.9296982365920776, "learning_rate": 2.91586380096636e-06, "loss": 0.5428380966186523, "step": 9550 }, { "epoch": 1.5264924478542317, "grad_norm": 2.010278659186838, "learning_rate": 2.913998866394889e-06, "loss": 0.6276131868362427, "step": 9551 }, { "epoch": 1.5266522816271078, "grad_norm": 1.426490518325099, "learning_rate": 2.91213442668458e-06, "loss": 0.4860638678073883, "step": 9552 }, { "epoch": 1.526812115399984, "grad_norm": 1.5468597354237161, "learning_rate": 2.9102704819656356e-06, "loss": 0.4918076992034912, "step": 9553 }, { "epoch": 1.5269719491728604, "grad_norm": 1.962023389399838, "learning_rate": 2.9084070323682235e-06, "loss": 0.6736950874328613, "step": 9554 }, { "epoch": 1.5271317829457365, "grad_norm": 1.8026566765829786, "learning_rate": 2.9065440780224862e-06, "loss": 0.6480022668838501, "step": 9555 }, { "epoch": 1.5272916167186126, "grad_norm": 1.7155478196182545, "learning_rate": 2.9046816190585213e-06, "loss": 0.6943134665489197, "step": 9556 }, { "epoch": 1.527451450491489, "grad_norm": 1.745037608316055, "learning_rate": 2.9028196556064013e-06, "loss": 0.6828793287277222, "step": 9557 }, { "epoch": 1.5276112842643652, "grad_norm": 1.57763689946462, "learning_rate": 2.900958187796156e-06, "loss": 0.3510299324989319, "step": 9558 }, { "epoch": 1.5277711180372413, "grad_norm": 1.5765536985523338, "learning_rate": 2.8990972157577833e-06, "loss": 0.4770370423793793, "step": 9559 }, { "epoch": 1.5279309518101174, "grad_norm": 1.4631845834484603, "learning_rate": 2.8972367396212486e-06, "loss": 0.47044992446899414, "step": 9560 }, { "epoch": 1.5280907855829937, "grad_norm": 1.669413730739435, "learning_rate": 2.895376759516476e-06, "loss": 0.7396685481071472, "step": 9561 }, { "epoch": 1.52825061935587, "grad_norm": 1.4515419416263773, "learning_rate": 2.8935172755733663e-06, "loss": 0.49649810791015625, "step": 9562 }, { "epoch": 1.5284104531287461, "grad_norm": 1.497595690469564, "learning_rate": 2.891658287921778e-06, "loss": 0.545720100402832, "step": 9563 }, { "epoch": 1.5285702869016222, "grad_norm": 1.7522677892471856, "learning_rate": 2.8897997966915337e-06, "loss": 0.5907623767852783, "step": 9564 }, { "epoch": 1.5287301206744985, "grad_norm": 1.3076310750903153, "learning_rate": 2.887941802012426e-06, "loss": 0.5579885840415955, "step": 9565 }, { "epoch": 1.5288899544473749, "grad_norm": 1.524808469445889, "learning_rate": 2.886084304014206e-06, "loss": 0.5404189825057983, "step": 9566 }, { "epoch": 1.529049788220251, "grad_norm": 1.4616703568539224, "learning_rate": 2.884227302826601e-06, "loss": 0.4881689250469208, "step": 9567 }, { "epoch": 1.529209621993127, "grad_norm": 1.7245074352398915, "learning_rate": 2.882370798579296e-06, "loss": 0.5996699929237366, "step": 9568 }, { "epoch": 1.5293694557660034, "grad_norm": 1.5529565230378246, "learning_rate": 2.8805147914019406e-06, "loss": 0.4426923990249634, "step": 9569 }, { "epoch": 1.5295292895388797, "grad_norm": 1.637862098982188, "learning_rate": 2.878659281424149e-06, "loss": 0.590720534324646, "step": 9570 }, { "epoch": 1.5296891233117558, "grad_norm": 1.6687092397980774, "learning_rate": 2.8768042687755116e-06, "loss": 0.566561222076416, "step": 9571 }, { "epoch": 1.5298489570846319, "grad_norm": 1.6408058822782032, "learning_rate": 2.8749497535855697e-06, "loss": 0.5593313574790955, "step": 9572 }, { "epoch": 1.5300087908575082, "grad_norm": 1.5888721088872855, "learning_rate": 2.8730957359838385e-06, "loss": 0.6625986695289612, "step": 9573 }, { "epoch": 1.5301686246303845, "grad_norm": 1.5865218295025245, "learning_rate": 2.8712422160997923e-06, "loss": 0.4164241552352905, "step": 9574 }, { "epoch": 1.5303284584032606, "grad_norm": 1.5382099293337645, "learning_rate": 2.8693891940628803e-06, "loss": 0.4974974989891052, "step": 9575 }, { "epoch": 1.5304882921761367, "grad_norm": 1.3774858240519285, "learning_rate": 2.867536670002504e-06, "loss": 0.5147977471351624, "step": 9576 }, { "epoch": 1.530648125949013, "grad_norm": 1.7633366019908008, "learning_rate": 2.8656846440480447e-06, "loss": 0.6513627171516418, "step": 9577 }, { "epoch": 1.5308079597218893, "grad_norm": 1.6750804473428966, "learning_rate": 2.863833116328838e-06, "loss": 0.5828598141670227, "step": 9578 }, { "epoch": 1.5309677934947654, "grad_norm": 1.694256330891792, "learning_rate": 2.8619820869741877e-06, "loss": 0.5978292226791382, "step": 9579 }, { "epoch": 1.5311276272676415, "grad_norm": 1.5382819249589752, "learning_rate": 2.8601315561133623e-06, "loss": 0.5328376293182373, "step": 9580 }, { "epoch": 1.5312874610405178, "grad_norm": 1.5702201831777995, "learning_rate": 2.8582815238755947e-06, "loss": 0.511788010597229, "step": 9581 }, { "epoch": 1.5314472948133941, "grad_norm": 1.592022553104305, "learning_rate": 2.8564319903900893e-06, "loss": 0.5141571164131165, "step": 9582 }, { "epoch": 1.5316071285862702, "grad_norm": 1.651219569584711, "learning_rate": 2.8545829557860083e-06, "loss": 0.5370025634765625, "step": 9583 }, { "epoch": 1.5317669623591463, "grad_norm": 1.8486063854252572, "learning_rate": 2.8527344201924833e-06, "loss": 0.5731949806213379, "step": 9584 }, { "epoch": 1.5319267961320227, "grad_norm": 1.562380548900745, "learning_rate": 2.8508863837386046e-06, "loss": 0.557113766670227, "step": 9585 }, { "epoch": 1.532086629904899, "grad_norm": 1.5789712007969243, "learning_rate": 2.8490388465534393e-06, "loss": 0.522729754447937, "step": 9586 }, { "epoch": 1.532246463677775, "grad_norm": 1.575878385099839, "learning_rate": 2.8471918087660087e-06, "loss": 0.511550784111023, "step": 9587 }, { "epoch": 1.5324062974506514, "grad_norm": 1.6674747516981645, "learning_rate": 2.8453452705053053e-06, "loss": 0.6407668590545654, "step": 9588 }, { "epoch": 1.5325661312235277, "grad_norm": 1.6348469726869235, "learning_rate": 2.843499231900283e-06, "loss": 0.5648893117904663, "step": 9589 }, { "epoch": 1.5327259649964038, "grad_norm": 1.7506584320563765, "learning_rate": 2.841653693079861e-06, "loss": 0.5486952662467957, "step": 9590 }, { "epoch": 1.5328857987692799, "grad_norm": 1.651783379764598, "learning_rate": 2.839808654172931e-06, "loss": 0.5108557343482971, "step": 9591 }, { "epoch": 1.5330456325421562, "grad_norm": 2.0367126732304666, "learning_rate": 2.837964115308339e-06, "loss": 0.5770916938781738, "step": 9592 }, { "epoch": 1.5332054663150325, "grad_norm": 1.624544852272208, "learning_rate": 2.8361200766149034e-06, "loss": 0.5238710641860962, "step": 9593 }, { "epoch": 1.5333653000879086, "grad_norm": 1.5633699275014856, "learning_rate": 2.834276538221401e-06, "loss": 0.5282467603683472, "step": 9594 }, { "epoch": 1.5335251338607847, "grad_norm": 1.5861497896401564, "learning_rate": 2.832433500256585e-06, "loss": 0.5962750911712646, "step": 9595 }, { "epoch": 1.533684967633661, "grad_norm": 1.666870032899254, "learning_rate": 2.83059096284916e-06, "loss": 0.45709124207496643, "step": 9596 }, { "epoch": 1.5338448014065373, "grad_norm": 1.6515466679273392, "learning_rate": 2.828748926127808e-06, "loss": 0.5776263475418091, "step": 9597 }, { "epoch": 1.5340046351794134, "grad_norm": 1.5538505661561721, "learning_rate": 2.8269073902211674e-06, "loss": 0.558265745639801, "step": 9598 }, { "epoch": 1.5341644689522895, "grad_norm": 1.6461079676857913, "learning_rate": 2.825066355257845e-06, "loss": 0.4795677661895752, "step": 9599 }, { "epoch": 1.5343243027251658, "grad_norm": 1.671490735922062, "learning_rate": 2.823225821366409e-06, "loss": 0.6584498882293701, "step": 9600 }, { "epoch": 1.5344841364980422, "grad_norm": 1.6994532404946876, "learning_rate": 2.8213857886754015e-06, "loss": 0.4898855686187744, "step": 9601 }, { "epoch": 1.5346439702709183, "grad_norm": 1.432002969947378, "learning_rate": 2.8195462573133216e-06, "loss": 0.5624327659606934, "step": 9602 }, { "epoch": 1.5348038040437944, "grad_norm": 2.2671673587733125, "learning_rate": 2.8177072274086347e-06, "loss": 0.5749320983886719, "step": 9603 }, { "epoch": 1.5349636378166707, "grad_norm": 1.9209757413087698, "learning_rate": 2.815868699089772e-06, "loss": 0.6240302324295044, "step": 9604 }, { "epoch": 1.535123471589547, "grad_norm": 1.5688000242142521, "learning_rate": 2.8140306724851275e-06, "loss": 0.6196238398551941, "step": 9605 }, { "epoch": 1.535283305362423, "grad_norm": 1.85217483740187, "learning_rate": 2.812193147723068e-06, "loss": 0.5903134346008301, "step": 9606 }, { "epoch": 1.5354431391352992, "grad_norm": 1.6114557475230002, "learning_rate": 2.810356124931918e-06, "loss": 0.6105761528015137, "step": 9607 }, { "epoch": 1.5356029729081755, "grad_norm": 1.526178427135123, "learning_rate": 2.8085196042399664e-06, "loss": 0.48821255564689636, "step": 9608 }, { "epoch": 1.5357628066810518, "grad_norm": 2.1338974500641053, "learning_rate": 2.8066835857754703e-06, "loss": 0.5326516628265381, "step": 9609 }, { "epoch": 1.535922640453928, "grad_norm": 1.5548272926415805, "learning_rate": 2.804848069666648e-06, "loss": 0.5811814069747925, "step": 9610 }, { "epoch": 1.536082474226804, "grad_norm": 1.540545881349926, "learning_rate": 2.8030130560416914e-06, "loss": 0.5538506507873535, "step": 9611 }, { "epoch": 1.5362423079996803, "grad_norm": 1.8654097594542836, "learning_rate": 2.8011785450287487e-06, "loss": 0.6058106422424316, "step": 9612 }, { "epoch": 1.5364021417725566, "grad_norm": 1.721408020207231, "learning_rate": 2.7993445367559348e-06, "loss": 0.735142707824707, "step": 9613 }, { "epoch": 1.5365619755454327, "grad_norm": 1.5336033107818372, "learning_rate": 2.7975110313513275e-06, "loss": 0.46272239089012146, "step": 9614 }, { "epoch": 1.5367218093183088, "grad_norm": 1.6048809489521998, "learning_rate": 2.7956780289429786e-06, "loss": 0.5516318082809448, "step": 9615 }, { "epoch": 1.5368816430911851, "grad_norm": 1.6767829766179423, "learning_rate": 2.793845529658892e-06, "loss": 0.5908757448196411, "step": 9616 }, { "epoch": 1.5370414768640615, "grad_norm": 1.3743677312974525, "learning_rate": 2.7920135336270504e-06, "loss": 0.5129276514053345, "step": 9617 }, { "epoch": 1.5372013106369375, "grad_norm": 1.6522113349388143, "learning_rate": 2.790182040975389e-06, "loss": 0.5757995247840881, "step": 9618 }, { "epoch": 1.5373611444098136, "grad_norm": 1.5610003663443217, "learning_rate": 2.7883510518318137e-06, "loss": 0.5881301164627075, "step": 9619 }, { "epoch": 1.53752097818269, "grad_norm": 2.0930773642794716, "learning_rate": 2.78652056632419e-06, "loss": 0.6417011022567749, "step": 9620 }, { "epoch": 1.5376808119555663, "grad_norm": 1.5142744797765977, "learning_rate": 2.7846905845803606e-06, "loss": 0.5189215540885925, "step": 9621 }, { "epoch": 1.5378406457284424, "grad_norm": 1.5113440664111983, "learning_rate": 2.7828611067281208e-06, "loss": 0.4498721957206726, "step": 9622 }, { "epoch": 1.5380004795013185, "grad_norm": 1.5464385311505024, "learning_rate": 2.781032132895236e-06, "loss": 0.4799695611000061, "step": 9623 }, { "epoch": 1.538160313274195, "grad_norm": 1.6205840850478777, "learning_rate": 2.7792036632094334e-06, "loss": 0.4120226502418518, "step": 9624 }, { "epoch": 1.538320147047071, "grad_norm": 1.7615021406252653, "learning_rate": 2.7773756977984045e-06, "loss": 0.6046385765075684, "step": 9625 }, { "epoch": 1.5384799808199472, "grad_norm": 1.5816388888449058, "learning_rate": 2.7755482367898133e-06, "loss": 0.5677501559257507, "step": 9626 }, { "epoch": 1.5386398145928235, "grad_norm": 1.4527920400050884, "learning_rate": 2.7737212803112824e-06, "loss": 0.4412141740322113, "step": 9627 }, { "epoch": 1.5387996483656998, "grad_norm": 1.4190417605915286, "learning_rate": 2.7718948284903977e-06, "loss": 0.37289175391197205, "step": 9628 }, { "epoch": 1.538959482138576, "grad_norm": 1.6585991173701886, "learning_rate": 2.770068881454714e-06, "loss": 0.5828545093536377, "step": 9629 }, { "epoch": 1.539119315911452, "grad_norm": 1.5554275314146302, "learning_rate": 2.7682434393317436e-06, "loss": 0.5930207967758179, "step": 9630 }, { "epoch": 1.5392791496843283, "grad_norm": 1.4897111971481778, "learning_rate": 2.7664185022489763e-06, "loss": 0.5259422659873962, "step": 9631 }, { "epoch": 1.5394389834572046, "grad_norm": 1.6892822528711025, "learning_rate": 2.764594070333857e-06, "loss": 0.5503689050674438, "step": 9632 }, { "epoch": 1.5395988172300807, "grad_norm": 1.6738374874322897, "learning_rate": 2.7627701437137954e-06, "loss": 0.552128255367279, "step": 9633 }, { "epoch": 1.5397586510029568, "grad_norm": 1.826670326065825, "learning_rate": 2.760946722516168e-06, "loss": 0.5162266492843628, "step": 9634 }, { "epoch": 1.5399184847758332, "grad_norm": 1.547578864483629, "learning_rate": 2.7591238068683156e-06, "loss": 0.47616803646087646, "step": 9635 }, { "epoch": 1.5400783185487095, "grad_norm": 1.6957876979992803, "learning_rate": 2.7573013968975502e-06, "loss": 0.6264957785606384, "step": 9636 }, { "epoch": 1.5402381523215856, "grad_norm": 1.5211687379446084, "learning_rate": 2.755479492731138e-06, "loss": 0.5882010459899902, "step": 9637 }, { "epoch": 1.5403979860944617, "grad_norm": 1.5432280217565268, "learning_rate": 2.753658094496314e-06, "loss": 0.5594900846481323, "step": 9638 }, { "epoch": 1.540557819867338, "grad_norm": 1.343078017542507, "learning_rate": 2.751837202320279e-06, "loss": 0.4680440127849579, "step": 9639 }, { "epoch": 1.5407176536402143, "grad_norm": 1.6539433897947906, "learning_rate": 2.7500168163301933e-06, "loss": 0.5033473968505859, "step": 9640 }, { "epoch": 1.5408774874130904, "grad_norm": 1.6526821260964657, "learning_rate": 2.7481969366531947e-06, "loss": 0.81939697265625, "step": 9641 }, { "epoch": 1.5410373211859665, "grad_norm": 1.7261178536344004, "learning_rate": 2.7463775634163714e-06, "loss": 0.5824074745178223, "step": 9642 }, { "epoch": 1.5411971549588428, "grad_norm": 1.8264675110801707, "learning_rate": 2.744558696746784e-06, "loss": 0.42601585388183594, "step": 9643 }, { "epoch": 1.5413569887317191, "grad_norm": 1.4597848092603756, "learning_rate": 2.7427403367714545e-06, "loss": 0.5333881974220276, "step": 9644 }, { "epoch": 1.5415168225045952, "grad_norm": 1.7102519076452933, "learning_rate": 2.7409224836173687e-06, "loss": 0.5625545978546143, "step": 9645 }, { "epoch": 1.5416766562774713, "grad_norm": 1.7245438707339336, "learning_rate": 2.739105137411484e-06, "loss": 0.7150006294250488, "step": 9646 }, { "epoch": 1.5418364900503476, "grad_norm": 1.7818944368817486, "learning_rate": 2.737288298280715e-06, "loss": 0.5689037442207336, "step": 9647 }, { "epoch": 1.541996323823224, "grad_norm": 1.9533678297249835, "learning_rate": 2.7354719663519423e-06, "loss": 0.6667337417602539, "step": 9648 }, { "epoch": 1.5421561575961, "grad_norm": 1.4351058586716545, "learning_rate": 2.7336561417520113e-06, "loss": 0.4969440698623657, "step": 9649 }, { "epoch": 1.5423159913689761, "grad_norm": 1.4887584165308654, "learning_rate": 2.7318408246077365e-06, "loss": 0.4973409175872803, "step": 9650 }, { "epoch": 1.5424758251418524, "grad_norm": 1.513717884223935, "learning_rate": 2.730026015045891e-06, "loss": 0.56651771068573, "step": 9651 }, { "epoch": 1.5426356589147288, "grad_norm": 1.5530333133496155, "learning_rate": 2.728211713193215e-06, "loss": 0.5930197238922119, "step": 9652 }, { "epoch": 1.5427954926876049, "grad_norm": 1.5740737193948404, "learning_rate": 2.7263979191764124e-06, "loss": 0.5527825355529785, "step": 9653 }, { "epoch": 1.542955326460481, "grad_norm": 1.9489396396443903, "learning_rate": 2.724584633122149e-06, "loss": 0.6757889986038208, "step": 9654 }, { "epoch": 1.5431151602333573, "grad_norm": 1.5898291791391783, "learning_rate": 2.7227718551570624e-06, "loss": 0.4330282509326935, "step": 9655 }, { "epoch": 1.5432749940062336, "grad_norm": 1.6912537665007144, "learning_rate": 2.720959585407752e-06, "loss": 0.5010424256324768, "step": 9656 }, { "epoch": 1.5434348277791097, "grad_norm": 1.7306648515002099, "learning_rate": 2.7191478240007786e-06, "loss": 0.5593842267990112, "step": 9657 }, { "epoch": 1.5435946615519858, "grad_norm": 1.6754715476450195, "learning_rate": 2.7173365710626677e-06, "loss": 0.4633329510688782, "step": 9658 }, { "epoch": 1.5437544953248623, "grad_norm": 1.6175032815934178, "learning_rate": 2.7155258267199123e-06, "loss": 0.47595539689064026, "step": 9659 }, { "epoch": 1.5439143290977384, "grad_norm": 1.6884442463473195, "learning_rate": 2.7137155910989655e-06, "loss": 0.6224728226661682, "step": 9660 }, { "epoch": 1.5440741628706145, "grad_norm": 1.4484734767351888, "learning_rate": 2.711905864326252e-06, "loss": 0.515338659286499, "step": 9661 }, { "epoch": 1.5442339966434908, "grad_norm": 1.3855864148089345, "learning_rate": 2.7100966465281543e-06, "loss": 0.45108598470687866, "step": 9662 }, { "epoch": 1.5443938304163671, "grad_norm": 1.6401669751994261, "learning_rate": 2.7082879378310233e-06, "loss": 0.518696665763855, "step": 9663 }, { "epoch": 1.5445536641892432, "grad_norm": 1.602511464388245, "learning_rate": 2.7064797383611675e-06, "loss": 0.5133255124092102, "step": 9664 }, { "epoch": 1.5447134979621193, "grad_norm": 1.5820405609448758, "learning_rate": 2.704672048244873e-06, "loss": 0.48663443326950073, "step": 9665 }, { "epoch": 1.5448733317349956, "grad_norm": 1.5724060426048634, "learning_rate": 2.702864867608379e-06, "loss": 0.45011621713638306, "step": 9666 }, { "epoch": 1.545033165507872, "grad_norm": 1.583860747822151, "learning_rate": 2.7010581965778914e-06, "loss": 0.5359795093536377, "step": 9667 }, { "epoch": 1.545192999280748, "grad_norm": 1.8211523800077416, "learning_rate": 2.6992520352795826e-06, "loss": 0.46472328901290894, "step": 9668 }, { "epoch": 1.5453528330536241, "grad_norm": 1.5450849866326979, "learning_rate": 2.697446383839586e-06, "loss": 0.4972310960292816, "step": 9669 }, { "epoch": 1.5455126668265005, "grad_norm": 1.3957007866921907, "learning_rate": 2.6956412423840074e-06, "loss": 0.6598198413848877, "step": 9670 }, { "epoch": 1.5456725005993768, "grad_norm": 1.3559027989704835, "learning_rate": 2.693836611038908e-06, "loss": 0.42254477739334106, "step": 9671 }, { "epoch": 1.5458323343722529, "grad_norm": 1.562725071686383, "learning_rate": 2.6920324899303173e-06, "loss": 0.4172053039073944, "step": 9672 }, { "epoch": 1.545992168145129, "grad_norm": 1.5868336780821257, "learning_rate": 2.690228879184229e-06, "loss": 0.5927122235298157, "step": 9673 }, { "epoch": 1.5461520019180053, "grad_norm": 1.507291649694805, "learning_rate": 2.688425778926598e-06, "loss": 0.5323529243469238, "step": 9674 }, { "epoch": 1.5463118356908816, "grad_norm": 1.6106509028442826, "learning_rate": 2.686623189283348e-06, "loss": 0.4781864881515503, "step": 9675 }, { "epoch": 1.5464716694637577, "grad_norm": 1.6439553260507358, "learning_rate": 2.68482111038037e-06, "loss": 0.6052689552307129, "step": 9676 }, { "epoch": 1.5466315032366338, "grad_norm": 1.8389938198163593, "learning_rate": 2.683019542343511e-06, "loss": 0.563148558139801, "step": 9677 }, { "epoch": 1.54679133700951, "grad_norm": 1.7782892146749547, "learning_rate": 2.6812184852985858e-06, "loss": 0.5896855592727661, "step": 9678 }, { "epoch": 1.5469511707823864, "grad_norm": 1.5610850744800837, "learning_rate": 2.679417939371375e-06, "loss": 0.47119107842445374, "step": 9679 }, { "epoch": 1.5471110045552625, "grad_norm": 1.3551175200582084, "learning_rate": 2.677617904687618e-06, "loss": 0.41402900218963623, "step": 9680 }, { "epoch": 1.5472708383281386, "grad_norm": 1.8026340365562779, "learning_rate": 2.675818381373031e-06, "loss": 0.7931550741195679, "step": 9681 }, { "epoch": 1.547430672101015, "grad_norm": 1.6857486975674303, "learning_rate": 2.674019369553281e-06, "loss": 0.5756524801254272, "step": 9682 }, { "epoch": 1.5475905058738912, "grad_norm": 1.8727219833584483, "learning_rate": 2.6722208693540054e-06, "loss": 0.5501387119293213, "step": 9683 }, { "epoch": 1.5477503396467673, "grad_norm": 1.5933466752082315, "learning_rate": 2.6704228809008016e-06, "loss": 0.5106390714645386, "step": 9684 }, { "epoch": 1.5479101734196434, "grad_norm": 1.606506623654861, "learning_rate": 2.6686254043192416e-06, "loss": 0.4781963527202606, "step": 9685 }, { "epoch": 1.5480700071925197, "grad_norm": 1.5401402862879086, "learning_rate": 2.666828439734852e-06, "loss": 0.525061845779419, "step": 9686 }, { "epoch": 1.548229840965396, "grad_norm": 1.7624346573182612, "learning_rate": 2.6650319872731258e-06, "loss": 0.5922737121582031, "step": 9687 }, { "epoch": 1.5483896747382722, "grad_norm": 1.6816785696179823, "learning_rate": 2.66323604705952e-06, "loss": 0.6705523133277893, "step": 9688 }, { "epoch": 1.5485495085111483, "grad_norm": 1.5755364928483366, "learning_rate": 2.6614406192194553e-06, "loss": 0.530379056930542, "step": 9689 }, { "epoch": 1.5487093422840246, "grad_norm": 1.7318249221969826, "learning_rate": 2.659645703878323e-06, "loss": 0.6466739177703857, "step": 9690 }, { "epoch": 1.5488691760569009, "grad_norm": 1.3151249011000044, "learning_rate": 2.6578513011614714e-06, "loss": 0.49325117468833923, "step": 9691 }, { "epoch": 1.549029009829777, "grad_norm": 1.5939620697501322, "learning_rate": 2.6560574111942137e-06, "loss": 0.5157634615898132, "step": 9692 }, { "epoch": 1.549188843602653, "grad_norm": 1.5081906009397446, "learning_rate": 2.654264034101831e-06, "loss": 0.47597092390060425, "step": 9693 }, { "epoch": 1.5493486773755296, "grad_norm": 1.6366871937759293, "learning_rate": 2.652471170009562e-06, "loss": 0.5631994009017944, "step": 9694 }, { "epoch": 1.5495085111484057, "grad_norm": 1.5857050843127163, "learning_rate": 2.650678819042617e-06, "loss": 0.4879603981971741, "step": 9695 }, { "epoch": 1.5496683449212818, "grad_norm": 1.5778775198608772, "learning_rate": 2.6488869813261698e-06, "loss": 0.5860913395881653, "step": 9696 }, { "epoch": 1.5498281786941581, "grad_norm": 1.2128716080164557, "learning_rate": 2.6470956569853546e-06, "loss": 0.40529555082321167, "step": 9697 }, { "epoch": 1.5499880124670344, "grad_norm": 1.8494157995891245, "learning_rate": 2.6453048461452703e-06, "loss": 0.5716162323951721, "step": 9698 }, { "epoch": 1.5501478462399105, "grad_norm": 1.6505084185539993, "learning_rate": 2.6435145489309777e-06, "loss": 0.622512936592102, "step": 9699 }, { "epoch": 1.5503076800127866, "grad_norm": 1.7300260417000388, "learning_rate": 2.6417247654675114e-06, "loss": 0.6307628154754639, "step": 9700 }, { "epoch": 1.550467513785663, "grad_norm": 1.6251584475867773, "learning_rate": 2.6399354958798597e-06, "loss": 0.4953765869140625, "step": 9701 }, { "epoch": 1.5506273475585393, "grad_norm": 1.3557385296067561, "learning_rate": 2.6381467402929793e-06, "loss": 0.42522311210632324, "step": 9702 }, { "epoch": 1.5507871813314154, "grad_norm": 1.7892925519099414, "learning_rate": 2.6363584988317905e-06, "loss": 0.5980936288833618, "step": 9703 }, { "epoch": 1.5509470151042914, "grad_norm": 1.7001059935803564, "learning_rate": 2.6345707716211734e-06, "loss": 0.5219393968582153, "step": 9704 }, { "epoch": 1.5511068488771678, "grad_norm": 1.6754278690868283, "learning_rate": 2.632783558785985e-06, "loss": 0.5117363929748535, "step": 9705 }, { "epoch": 1.551266682650044, "grad_norm": 1.4352055312367156, "learning_rate": 2.6309968604510327e-06, "loss": 0.5564277768135071, "step": 9706 }, { "epoch": 1.5514265164229202, "grad_norm": 1.544602908612708, "learning_rate": 2.6292106767410953e-06, "loss": 0.5317907929420471, "step": 9707 }, { "epoch": 1.5515863501957963, "grad_norm": 1.5593038527641019, "learning_rate": 2.6274250077809117e-06, "loss": 0.48723796010017395, "step": 9708 }, { "epoch": 1.5517461839686726, "grad_norm": 1.6388865382591808, "learning_rate": 2.6256398536951844e-06, "loss": 0.5915758609771729, "step": 9709 }, { "epoch": 1.551906017741549, "grad_norm": 1.508753837628252, "learning_rate": 2.6238552146085885e-06, "loss": 0.5570458173751831, "step": 9710 }, { "epoch": 1.552065851514425, "grad_norm": 1.6548938605486863, "learning_rate": 2.6220710906457523e-06, "loss": 0.5220267176628113, "step": 9711 }, { "epoch": 1.552225685287301, "grad_norm": 1.69994629205776, "learning_rate": 2.620287481931275e-06, "loss": 0.6151893734931946, "step": 9712 }, { "epoch": 1.5523855190601774, "grad_norm": 1.8915513466393583, "learning_rate": 2.618504388589713e-06, "loss": 0.5398567318916321, "step": 9713 }, { "epoch": 1.5525453528330537, "grad_norm": 1.9426390919249599, "learning_rate": 2.616721810745595e-06, "loss": 0.6486589908599854, "step": 9714 }, { "epoch": 1.5527051866059298, "grad_norm": 1.7081435306179173, "learning_rate": 2.6149397485234116e-06, "loss": 0.5335697531700134, "step": 9715 }, { "epoch": 1.552865020378806, "grad_norm": 1.8684383860433593, "learning_rate": 2.6131582020476143e-06, "loss": 0.6599448919296265, "step": 9716 }, { "epoch": 1.5530248541516822, "grad_norm": 1.4203111337409484, "learning_rate": 2.6113771714426196e-06, "loss": 0.3914029002189636, "step": 9717 }, { "epoch": 1.5531846879245585, "grad_norm": 1.6259206017741998, "learning_rate": 2.6095966568328067e-06, "loss": 0.5332126617431641, "step": 9718 }, { "epoch": 1.5533445216974346, "grad_norm": 1.7174351419833918, "learning_rate": 2.60781665834252e-06, "loss": 0.6165268421173096, "step": 9719 }, { "epoch": 1.5535043554703107, "grad_norm": 1.6209576366307035, "learning_rate": 2.6060371760960725e-06, "loss": 0.6142551898956299, "step": 9720 }, { "epoch": 1.553664189243187, "grad_norm": 1.5600398165340335, "learning_rate": 2.604258210217735e-06, "loss": 0.5364009141921997, "step": 9721 }, { "epoch": 1.5538240230160634, "grad_norm": 1.362711005440994, "learning_rate": 2.602479760831743e-06, "loss": 0.4585179090499878, "step": 9722 }, { "epoch": 1.5539838567889395, "grad_norm": 1.9763360136638262, "learning_rate": 2.6007018280622974e-06, "loss": 0.7176387906074524, "step": 9723 }, { "epoch": 1.5541436905618156, "grad_norm": 1.7021039407625647, "learning_rate": 2.598924412033561e-06, "loss": 0.6584632992744446, "step": 9724 }, { "epoch": 1.5543035243346919, "grad_norm": 1.5945997546278534, "learning_rate": 2.5971475128696666e-06, "loss": 0.4456116855144501, "step": 9725 }, { "epoch": 1.5544633581075682, "grad_norm": 1.5304593567510387, "learning_rate": 2.595371130694705e-06, "loss": 0.5440500974655151, "step": 9726 }, { "epoch": 1.5546231918804443, "grad_norm": 1.6476905339920953, "learning_rate": 2.59359526563273e-06, "loss": 0.6580438613891602, "step": 9727 }, { "epoch": 1.5547830256533204, "grad_norm": 1.5552215535101963, "learning_rate": 2.591819917807762e-06, "loss": 0.5149173736572266, "step": 9728 }, { "epoch": 1.554942859426197, "grad_norm": 1.741184672639852, "learning_rate": 2.5900450873437878e-06, "loss": 0.594433069229126, "step": 9729 }, { "epoch": 1.555102693199073, "grad_norm": 1.3617407193883215, "learning_rate": 2.588270774364755e-06, "loss": 0.5209945440292358, "step": 9730 }, { "epoch": 1.555262526971949, "grad_norm": 1.4051638462468585, "learning_rate": 2.5864969789945738e-06, "loss": 0.4818266034126282, "step": 9731 }, { "epoch": 1.5554223607448254, "grad_norm": 1.5725309594007904, "learning_rate": 2.584723701357119e-06, "loss": 0.5117568969726562, "step": 9732 }, { "epoch": 1.5555821945177017, "grad_norm": 1.6535472037269938, "learning_rate": 2.5829509415762288e-06, "loss": 0.5615389347076416, "step": 9733 }, { "epoch": 1.5557420282905778, "grad_norm": 1.3912561036515751, "learning_rate": 2.5811786997757093e-06, "loss": 0.4298620820045471, "step": 9734 }, { "epoch": 1.555901862063454, "grad_norm": 1.4781487118625538, "learning_rate": 2.579406976079328e-06, "loss": 0.5798707604408264, "step": 9735 }, { "epoch": 1.5560616958363302, "grad_norm": 1.66793603208634, "learning_rate": 2.5776357706108167e-06, "loss": 0.46423977613449097, "step": 9736 }, { "epoch": 1.5562215296092066, "grad_norm": 1.4466538885383686, "learning_rate": 2.575865083493868e-06, "loss": 0.6043253540992737, "step": 9737 }, { "epoch": 1.5563813633820827, "grad_norm": 1.3770191049213854, "learning_rate": 2.5740949148521397e-06, "loss": 0.5307304859161377, "step": 9738 }, { "epoch": 1.5565411971549588, "grad_norm": 1.966340522941134, "learning_rate": 2.5723252648092524e-06, "loss": 0.5225646495819092, "step": 9739 }, { "epoch": 1.556701030927835, "grad_norm": 1.540311952997935, "learning_rate": 2.5705561334887973e-06, "loss": 0.5383353233337402, "step": 9740 }, { "epoch": 1.5568608647007114, "grad_norm": 1.7149592108237526, "learning_rate": 2.568787521014322e-06, "loss": 0.5889219045639038, "step": 9741 }, { "epoch": 1.5570206984735875, "grad_norm": 1.6495008804810125, "learning_rate": 2.5670194275093396e-06, "loss": 0.5315497517585754, "step": 9742 }, { "epoch": 1.5571805322464636, "grad_norm": 1.691418566746911, "learning_rate": 2.5652518530973257e-06, "loss": 0.5784677267074585, "step": 9743 }, { "epoch": 1.55734036601934, "grad_norm": 1.601226465228231, "learning_rate": 2.5634847979017253e-06, "loss": 0.4618656635284424, "step": 9744 }, { "epoch": 1.5575001997922162, "grad_norm": 1.6526725076914572, "learning_rate": 2.5617182620459413e-06, "loss": 0.49234768748283386, "step": 9745 }, { "epoch": 1.5576600335650923, "grad_norm": 1.4142293155763637, "learning_rate": 2.559952245653343e-06, "loss": 0.44040897488594055, "step": 9746 }, { "epoch": 1.5578198673379684, "grad_norm": 1.6350170349344615, "learning_rate": 2.558186748847262e-06, "loss": 0.5451531410217285, "step": 9747 }, { "epoch": 1.5579797011108447, "grad_norm": 1.9885382029358634, "learning_rate": 2.556421771750991e-06, "loss": 0.6634653806686401, "step": 9748 }, { "epoch": 1.558139534883721, "grad_norm": 1.675104448238087, "learning_rate": 2.5546573144877964e-06, "loss": 0.56264328956604, "step": 9749 }, { "epoch": 1.5582993686565971, "grad_norm": 1.5737559732336437, "learning_rate": 2.552893377180897e-06, "loss": 0.47833091020584106, "step": 9750 }, { "epoch": 1.5584592024294732, "grad_norm": 1.7472798034053196, "learning_rate": 2.5511299599534833e-06, "loss": 0.5566176772117615, "step": 9751 }, { "epoch": 1.5586190362023495, "grad_norm": 1.7543460994798907, "learning_rate": 2.5493670629287024e-06, "loss": 0.6000379323959351, "step": 9752 }, { "epoch": 1.5587788699752259, "grad_norm": 1.6050593631866221, "learning_rate": 2.547604686229669e-06, "loss": 0.6770547032356262, "step": 9753 }, { "epoch": 1.558938703748102, "grad_norm": 1.9670125463135064, "learning_rate": 2.545842829979461e-06, "loss": 0.7207045555114746, "step": 9754 }, { "epoch": 1.559098537520978, "grad_norm": 1.667488438265132, "learning_rate": 2.544081494301126e-06, "loss": 0.6202195882797241, "step": 9755 }, { "epoch": 1.5592583712938544, "grad_norm": 1.6453142921047557, "learning_rate": 2.5423206793176657e-06, "loss": 0.59214186668396, "step": 9756 }, { "epoch": 1.5594182050667307, "grad_norm": 2.052070818001943, "learning_rate": 2.5405603851520475e-06, "loss": 0.6189810037612915, "step": 9757 }, { "epoch": 1.5595780388396068, "grad_norm": 1.517791315152663, "learning_rate": 2.538800611927206e-06, "loss": 0.4830976724624634, "step": 9758 }, { "epoch": 1.5597378726124829, "grad_norm": 1.4989621403235407, "learning_rate": 2.5370413597660335e-06, "loss": 0.4733561873435974, "step": 9759 }, { "epoch": 1.5598977063853592, "grad_norm": 1.316702793362563, "learning_rate": 2.5352826287913966e-06, "loss": 0.5767710208892822, "step": 9760 }, { "epoch": 1.5600575401582355, "grad_norm": 1.6970482186509988, "learning_rate": 2.5335244191261153e-06, "loss": 0.5671259164810181, "step": 9761 }, { "epoch": 1.5602173739311116, "grad_norm": 1.6230003080735276, "learning_rate": 2.531766730892977e-06, "loss": 0.7147419452667236, "step": 9762 }, { "epoch": 1.5603772077039877, "grad_norm": 1.7355398111149176, "learning_rate": 2.5300095642147295e-06, "loss": 0.530617356300354, "step": 9763 }, { "epoch": 1.560537041476864, "grad_norm": 1.654065349021735, "learning_rate": 2.5282529192140927e-06, "loss": 0.6443653106689453, "step": 9764 }, { "epoch": 1.5606968752497403, "grad_norm": 1.5069871343553332, "learning_rate": 2.526496796013742e-06, "loss": 0.4781092405319214, "step": 9765 }, { "epoch": 1.5608567090226164, "grad_norm": 1.5618696870678352, "learning_rate": 2.5247411947363176e-06, "loss": 0.5353907346725464, "step": 9766 }, { "epoch": 1.5610165427954927, "grad_norm": 1.4093080242152876, "learning_rate": 2.5229861155044254e-06, "loss": 0.3815672993659973, "step": 9767 }, { "epoch": 1.561176376568369, "grad_norm": 1.4551703787435921, "learning_rate": 2.521231558440632e-06, "loss": 0.5923004746437073, "step": 9768 }, { "epoch": 1.5613362103412451, "grad_norm": 1.7317737543940528, "learning_rate": 2.519477523667473e-06, "loss": 0.6903790235519409, "step": 9769 }, { "epoch": 1.5614960441141212, "grad_norm": 1.5673913354704776, "learning_rate": 2.517724011307443e-06, "loss": 0.5191742777824402, "step": 9770 }, { "epoch": 1.5616558778869976, "grad_norm": 1.8445257767547016, "learning_rate": 2.515971021483e-06, "loss": 0.735234260559082, "step": 9771 }, { "epoch": 1.5618157116598739, "grad_norm": 1.593503602537295, "learning_rate": 2.5142185543165665e-06, "loss": 0.4850813150405884, "step": 9772 }, { "epoch": 1.56197554543275, "grad_norm": 1.5764380175250856, "learning_rate": 2.512466609930527e-06, "loss": 0.489785373210907, "step": 9773 }, { "epoch": 1.562135379205626, "grad_norm": 1.6581989874737673, "learning_rate": 2.5107151884472325e-06, "loss": 0.468691885471344, "step": 9774 }, { "epoch": 1.5622952129785024, "grad_norm": 1.404861290711843, "learning_rate": 2.508964289989e-06, "loss": 0.4288971424102783, "step": 9775 }, { "epoch": 1.5624550467513787, "grad_norm": 1.4494732771133374, "learning_rate": 2.5072139146781017e-06, "loss": 0.5321271419525146, "step": 9776 }, { "epoch": 1.5626148805242548, "grad_norm": 1.4916954086726706, "learning_rate": 2.505464062636779e-06, "loss": 0.5288589596748352, "step": 9777 }, { "epoch": 1.5627747142971309, "grad_norm": 1.5721928158515006, "learning_rate": 2.5037147339872315e-06, "loss": 0.49954766035079956, "step": 9778 }, { "epoch": 1.5629345480700072, "grad_norm": 1.5873828218945496, "learning_rate": 2.5019659288516328e-06, "loss": 0.590944766998291, "step": 9779 }, { "epoch": 1.5630943818428835, "grad_norm": 1.6267131241720458, "learning_rate": 2.50021764735211e-06, "loss": 0.5858583450317383, "step": 9780 }, { "epoch": 1.5632542156157596, "grad_norm": 1.5942029180207793, "learning_rate": 2.498469889610757e-06, "loss": 0.39193621277809143, "step": 9781 }, { "epoch": 1.5634140493886357, "grad_norm": 1.3761829972017687, "learning_rate": 2.4967226557496304e-06, "loss": 0.5291779637336731, "step": 9782 }, { "epoch": 1.563573883161512, "grad_norm": 1.6059972506619757, "learning_rate": 2.4949759458907484e-06, "loss": 0.6307076215744019, "step": 9783 }, { "epoch": 1.5637337169343883, "grad_norm": 1.381145367847783, "learning_rate": 2.4932297601561006e-06, "loss": 0.6474952697753906, "step": 9784 }, { "epoch": 1.5638935507072644, "grad_norm": 1.5661687925335732, "learning_rate": 2.4914840986676313e-06, "loss": 0.5187932252883911, "step": 9785 }, { "epoch": 1.5640533844801405, "grad_norm": 1.3834523873741544, "learning_rate": 2.489738961547252e-06, "loss": 0.5181581377983093, "step": 9786 }, { "epoch": 1.5642132182530168, "grad_norm": 1.7059654567595437, "learning_rate": 2.487994348916837e-06, "loss": 0.5750164985656738, "step": 9787 }, { "epoch": 1.5643730520258932, "grad_norm": 1.7563348229931535, "learning_rate": 2.48625026089822e-06, "loss": 0.6160472631454468, "step": 9788 }, { "epoch": 1.5645328857987693, "grad_norm": 1.408188699113013, "learning_rate": 2.4845066976132084e-06, "loss": 0.5341079235076904, "step": 9789 }, { "epoch": 1.5646927195716454, "grad_norm": 1.4290783153260902, "learning_rate": 2.4827636591835637e-06, "loss": 0.49709564447402954, "step": 9790 }, { "epoch": 1.5648525533445217, "grad_norm": 1.5067709196163628, "learning_rate": 2.481021145731012e-06, "loss": 0.4869798719882965, "step": 9791 }, { "epoch": 1.565012387117398, "grad_norm": 1.774377996325256, "learning_rate": 2.4792791573772433e-06, "loss": 0.6414484977722168, "step": 9792 }, { "epoch": 1.565172220890274, "grad_norm": 1.6892003915284384, "learning_rate": 2.477537694243917e-06, "loss": 0.5619974136352539, "step": 9793 }, { "epoch": 1.5653320546631502, "grad_norm": 1.439364834640131, "learning_rate": 2.4757967564526465e-06, "loss": 0.4635585844516754, "step": 9794 }, { "epoch": 1.5654918884360265, "grad_norm": 1.6544383304748744, "learning_rate": 2.474056344125012e-06, "loss": 0.6481947898864746, "step": 9795 }, { "epoch": 1.5656517222089028, "grad_norm": 1.7244382169144734, "learning_rate": 2.472316457382563e-06, "loss": 0.5892337560653687, "step": 9796 }, { "epoch": 1.565811555981779, "grad_norm": 1.581003561755694, "learning_rate": 2.4705770963468024e-06, "loss": 0.5327459573745728, "step": 9797 }, { "epoch": 1.565971389754655, "grad_norm": 1.8962329567039569, "learning_rate": 2.4688382611392003e-06, "loss": 0.5738956928253174, "step": 9798 }, { "epoch": 1.5661312235275313, "grad_norm": 1.5231581988186664, "learning_rate": 2.4670999518811955e-06, "loss": 0.5272986888885498, "step": 9799 }, { "epoch": 1.5662910573004076, "grad_norm": 1.7558904626439695, "learning_rate": 2.465362168694182e-06, "loss": 0.5274810791015625, "step": 9800 }, { "epoch": 1.5664508910732837, "grad_norm": 1.4328548848375946, "learning_rate": 2.4636249116995216e-06, "loss": 0.4531596601009369, "step": 9801 }, { "epoch": 1.56661072484616, "grad_norm": 1.4248879753396817, "learning_rate": 2.4618881810185368e-06, "loss": 0.6018617153167725, "step": 9802 }, { "epoch": 1.5667705586190364, "grad_norm": 1.5529439325266277, "learning_rate": 2.4601519767725136e-06, "loss": 0.6476302742958069, "step": 9803 }, { "epoch": 1.5669303923919125, "grad_norm": 1.5388924449435406, "learning_rate": 2.4584162990827055e-06, "loss": 0.532662570476532, "step": 9804 }, { "epoch": 1.5670902261647885, "grad_norm": 1.4101430131136006, "learning_rate": 2.4566811480703257e-06, "loss": 0.5313551425933838, "step": 9805 }, { "epoch": 1.5672500599376649, "grad_norm": 1.6407602058094486, "learning_rate": 2.4549465238565497e-06, "loss": 0.5209341049194336, "step": 9806 }, { "epoch": 1.5674098937105412, "grad_norm": 1.458109129905276, "learning_rate": 2.4532124265625155e-06, "loss": 0.45618271827697754, "step": 9807 }, { "epoch": 1.5675697274834173, "grad_norm": 1.636808159704876, "learning_rate": 2.451478856309332e-06, "loss": 0.503579318523407, "step": 9808 }, { "epoch": 1.5677295612562934, "grad_norm": 1.5040922972443438, "learning_rate": 2.4497458132180607e-06, "loss": 0.6258663535118103, "step": 9809 }, { "epoch": 1.5678893950291697, "grad_norm": 1.5928838189641954, "learning_rate": 2.4480132974097327e-06, "loss": 0.5032214522361755, "step": 9810 }, { "epoch": 1.568049228802046, "grad_norm": 1.5652699896308235, "learning_rate": 2.446281309005342e-06, "loss": 0.5689252614974976, "step": 9811 }, { "epoch": 1.568209062574922, "grad_norm": 1.3690506740537127, "learning_rate": 2.444549848125839e-06, "loss": 0.3711196184158325, "step": 9812 }, { "epoch": 1.5683688963477982, "grad_norm": 1.68351441663121, "learning_rate": 2.442818914892151e-06, "loss": 0.6343058347702026, "step": 9813 }, { "epoch": 1.5685287301206745, "grad_norm": 1.6373816893467492, "learning_rate": 2.441088509425156e-06, "loss": 0.6492196321487427, "step": 9814 }, { "epoch": 1.5686885638935508, "grad_norm": 1.3318397761837246, "learning_rate": 2.439358631845696e-06, "loss": 0.3939080834388733, "step": 9815 }, { "epoch": 1.568848397666427, "grad_norm": 1.465476207064975, "learning_rate": 2.437629282274587e-06, "loss": 0.5262280106544495, "step": 9816 }, { "epoch": 1.569008231439303, "grad_norm": 1.7208053830376746, "learning_rate": 2.4359004608325964e-06, "loss": 0.5093204975128174, "step": 9817 }, { "epoch": 1.5691680652121793, "grad_norm": 1.6596347105951021, "learning_rate": 2.434172167640457e-06, "loss": 0.5150315761566162, "step": 9818 }, { "epoch": 1.5693278989850556, "grad_norm": 1.3970805746960369, "learning_rate": 2.432444402818871e-06, "loss": 0.42951709032058716, "step": 9819 }, { "epoch": 1.5694877327579317, "grad_norm": 1.4219086326705095, "learning_rate": 2.4307171664884975e-06, "loss": 0.5088808536529541, "step": 9820 }, { "epoch": 1.5696475665308078, "grad_norm": 1.5494575211938506, "learning_rate": 2.428990458769961e-06, "loss": 0.5070794224739075, "step": 9821 }, { "epoch": 1.5698074003036842, "grad_norm": 1.6544654296172336, "learning_rate": 2.4272642797838474e-06, "loss": 0.5821264386177063, "step": 9822 }, { "epoch": 1.5699672340765605, "grad_norm": 1.4411767508645124, "learning_rate": 2.4255386296507057e-06, "loss": 0.475799560546875, "step": 9823 }, { "epoch": 1.5701270678494366, "grad_norm": 1.9080395295350452, "learning_rate": 2.423813508491052e-06, "loss": 0.5532374382019043, "step": 9824 }, { "epoch": 1.5702869016223127, "grad_norm": 1.8127175274241898, "learning_rate": 2.4220889164253635e-06, "loss": 0.4884907305240631, "step": 9825 }, { "epoch": 1.570446735395189, "grad_norm": 1.427008343866114, "learning_rate": 2.4203648535740766e-06, "loss": 0.5153571963310242, "step": 9826 }, { "epoch": 1.5706065691680653, "grad_norm": 1.6390695693460575, "learning_rate": 2.418641320057592e-06, "loss": 0.5556497573852539, "step": 9827 }, { "epoch": 1.5707664029409414, "grad_norm": 1.627112978101131, "learning_rate": 2.4169183159962816e-06, "loss": 0.5992696285247803, "step": 9828 }, { "epoch": 1.5709262367138175, "grad_norm": 1.642190078027036, "learning_rate": 2.4151958415104706e-06, "loss": 0.505528450012207, "step": 9829 }, { "epoch": 1.5710860704866938, "grad_norm": 1.3587430402233571, "learning_rate": 2.41347389672045e-06, "loss": 0.5013592839241028, "step": 9830 }, { "epoch": 1.5712459042595701, "grad_norm": 1.4459005711441746, "learning_rate": 2.4117524817464734e-06, "loss": 0.4815264344215393, "step": 9831 }, { "epoch": 1.5714057380324462, "grad_norm": 1.5454643730040387, "learning_rate": 2.4100315967087574e-06, "loss": 0.540825366973877, "step": 9832 }, { "epoch": 1.5715655718053223, "grad_norm": 1.9808916490651398, "learning_rate": 2.408311241727487e-06, "loss": 0.5498743057250977, "step": 9833 }, { "epoch": 1.5717254055781986, "grad_norm": 1.4481855481339465, "learning_rate": 2.4065914169228044e-06, "loss": 0.5849940776824951, "step": 9834 }, { "epoch": 1.571885239351075, "grad_norm": 1.953825976553706, "learning_rate": 2.404872122414811e-06, "loss": 0.630723237991333, "step": 9835 }, { "epoch": 1.572045073123951, "grad_norm": 1.7558514107933603, "learning_rate": 2.4031533583235832e-06, "loss": 0.6643801927566528, "step": 9836 }, { "epoch": 1.5722049068968273, "grad_norm": 1.759999301849795, "learning_rate": 2.4014351247691503e-06, "loss": 0.5076370239257812, "step": 9837 }, { "epoch": 1.5723647406697037, "grad_norm": 1.6771188805928443, "learning_rate": 2.3997174218715046e-06, "loss": 0.5765306949615479, "step": 9838 }, { "epoch": 1.5725245744425798, "grad_norm": 1.3907704005071104, "learning_rate": 2.3980002497506106e-06, "loss": 0.5943168997764587, "step": 9839 }, { "epoch": 1.5726844082154559, "grad_norm": 1.5117775201144252, "learning_rate": 2.3962836085263853e-06, "loss": 0.513181209564209, "step": 9840 }, { "epoch": 1.5728442419883322, "grad_norm": 1.5465730027847229, "learning_rate": 2.394567498318715e-06, "loss": 0.4509522020816803, "step": 9841 }, { "epoch": 1.5730040757612085, "grad_norm": 1.6444705022737454, "learning_rate": 2.392851919247442e-06, "loss": 0.6456652879714966, "step": 9842 }, { "epoch": 1.5731639095340846, "grad_norm": 1.7042902547718632, "learning_rate": 2.3911368714323835e-06, "loss": 0.5504949688911438, "step": 9843 }, { "epoch": 1.5733237433069607, "grad_norm": 1.4230420331289426, "learning_rate": 2.3894223549933083e-06, "loss": 0.5499420166015625, "step": 9844 }, { "epoch": 1.573483577079837, "grad_norm": 1.3388889252265435, "learning_rate": 2.3877083700499513e-06, "loss": 0.569530725479126, "step": 9845 }, { "epoch": 1.5736434108527133, "grad_norm": 1.2966180636661542, "learning_rate": 2.385994916722014e-06, "loss": 0.46228551864624023, "step": 9846 }, { "epoch": 1.5738032446255894, "grad_norm": 1.7104472525091114, "learning_rate": 2.384281995129153e-06, "loss": 0.5198898911476135, "step": 9847 }, { "epoch": 1.5739630783984655, "grad_norm": 1.8710670247501, "learning_rate": 2.3825696053909976e-06, "loss": 0.5670278072357178, "step": 9848 }, { "epoch": 1.5741229121713418, "grad_norm": 1.4339478818379296, "learning_rate": 2.3808577476271344e-06, "loss": 0.4565263092517853, "step": 9849 }, { "epoch": 1.5742827459442181, "grad_norm": 1.7175909158027238, "learning_rate": 2.3791464219571125e-06, "loss": 0.5818936228752136, "step": 9850 }, { "epoch": 1.5744425797170942, "grad_norm": 1.5458061711474482, "learning_rate": 2.377435628500444e-06, "loss": 0.5786035656929016, "step": 9851 }, { "epoch": 1.5746024134899703, "grad_norm": 1.8306473261158183, "learning_rate": 2.3757253673766025e-06, "loss": 0.5956201553344727, "step": 9852 }, { "epoch": 1.5747622472628466, "grad_norm": 1.651881350470946, "learning_rate": 2.3740156387050328e-06, "loss": 0.4784625768661499, "step": 9853 }, { "epoch": 1.574922081035723, "grad_norm": 1.4779762438995354, "learning_rate": 2.372306442605132e-06, "loss": 0.6088222861289978, "step": 9854 }, { "epoch": 1.575081914808599, "grad_norm": 1.6254006838937651, "learning_rate": 2.370597779196263e-06, "loss": 0.564643383026123, "step": 9855 }, { "epoch": 1.5752417485814751, "grad_norm": 1.612816893914232, "learning_rate": 2.368889648597759e-06, "loss": 0.6219758987426758, "step": 9856 }, { "epoch": 1.5754015823543515, "grad_norm": 1.711058358410336, "learning_rate": 2.367182050928901e-06, "loss": 0.6410151720046997, "step": 9857 }, { "epoch": 1.5755614161272278, "grad_norm": 1.5099178035340535, "learning_rate": 2.36547498630895e-06, "loss": 0.5270994305610657, "step": 9858 }, { "epoch": 1.5757212499001039, "grad_norm": 1.670059232839313, "learning_rate": 2.363768454857117e-06, "loss": 0.6522354483604431, "step": 9859 }, { "epoch": 1.57588108367298, "grad_norm": 1.519250634287248, "learning_rate": 2.362062456692581e-06, "loss": 0.5349702835083008, "step": 9860 }, { "epoch": 1.5760409174458563, "grad_norm": 1.6819495647020897, "learning_rate": 2.3603569919344826e-06, "loss": 0.5713312029838562, "step": 9861 }, { "epoch": 1.5762007512187326, "grad_norm": 1.8894792213706806, "learning_rate": 2.3586520607019224e-06, "loss": 0.5804653763771057, "step": 9862 }, { "epoch": 1.5763605849916087, "grad_norm": 1.84089321813333, "learning_rate": 2.356947663113972e-06, "loss": 0.6209655404090881, "step": 9863 }, { "epoch": 1.5765204187644848, "grad_norm": 1.4734638942774232, "learning_rate": 2.355243799289659e-06, "loss": 0.5813326239585876, "step": 9864 }, { "epoch": 1.576680252537361, "grad_norm": 1.369228768697641, "learning_rate": 2.3535404693479736e-06, "loss": 0.451303094625473, "step": 9865 }, { "epoch": 1.5768400863102374, "grad_norm": 2.072018882894456, "learning_rate": 2.351837673407872e-06, "loss": 0.5716655254364014, "step": 9866 }, { "epoch": 1.5769999200831135, "grad_norm": 1.524809722333142, "learning_rate": 2.350135411588267e-06, "loss": 0.541372537612915, "step": 9867 }, { "epoch": 1.5771597538559896, "grad_norm": 1.3684028938221107, "learning_rate": 2.3484336840080456e-06, "loss": 0.47829586267471313, "step": 9868 }, { "epoch": 1.577319587628866, "grad_norm": 1.4639876154628981, "learning_rate": 2.346732490786047e-06, "loss": 0.5493701100349426, "step": 9869 }, { "epoch": 1.5774794214017422, "grad_norm": 1.3209880448338485, "learning_rate": 2.3450318320410746e-06, "loss": 0.5369817614555359, "step": 9870 }, { "epoch": 1.5776392551746183, "grad_norm": 1.6820509159969153, "learning_rate": 2.3433317078918973e-06, "loss": 0.472625195980072, "step": 9871 }, { "epoch": 1.5777990889474947, "grad_norm": 1.4250971723964756, "learning_rate": 2.341632118457248e-06, "loss": 0.45480233430862427, "step": 9872 }, { "epoch": 1.577958922720371, "grad_norm": 2.0360391247030547, "learning_rate": 2.3399330638558192e-06, "loss": 0.6012803912162781, "step": 9873 }, { "epoch": 1.578118756493247, "grad_norm": 1.7380543678653557, "learning_rate": 2.338234544206266e-06, "loss": 0.472146213054657, "step": 9874 }, { "epoch": 1.5782785902661232, "grad_norm": 1.7434918887118256, "learning_rate": 2.336536559627204e-06, "loss": 0.5425880551338196, "step": 9875 }, { "epoch": 1.5784384240389995, "grad_norm": 1.6056281624139337, "learning_rate": 2.3348391102372216e-06, "loss": 0.47452598810195923, "step": 9876 }, { "epoch": 1.5785982578118758, "grad_norm": 1.609115858615104, "learning_rate": 2.333142196154854e-06, "loss": 0.5467472076416016, "step": 9877 }, { "epoch": 1.5787580915847519, "grad_norm": 1.4183544874151757, "learning_rate": 2.3314458174986164e-06, "loss": 0.5029877424240112, "step": 9878 }, { "epoch": 1.578917925357628, "grad_norm": 1.758053668163697, "learning_rate": 2.3297499743869735e-06, "loss": 0.48038071393966675, "step": 9879 }, { "epoch": 1.5790777591305043, "grad_norm": 1.5841547092379957, "learning_rate": 2.3280546669383565e-06, "loss": 0.4840976595878601, "step": 9880 }, { "epoch": 1.5792375929033806, "grad_norm": 1.986380268825613, "learning_rate": 2.326359895271161e-06, "loss": 0.48867613077163696, "step": 9881 }, { "epoch": 1.5793974266762567, "grad_norm": 1.73639106541815, "learning_rate": 2.32466565950374e-06, "loss": 0.5791463851928711, "step": 9882 }, { "epoch": 1.5795572604491328, "grad_norm": 2.0294763053559466, "learning_rate": 2.3229719597544197e-06, "loss": 0.5643439292907715, "step": 9883 }, { "epoch": 1.5797170942220091, "grad_norm": 1.508472454725887, "learning_rate": 2.3212787961414772e-06, "loss": 0.4526140093803406, "step": 9884 }, { "epoch": 1.5798769279948854, "grad_norm": 1.4965473139896022, "learning_rate": 2.319586168783159e-06, "loss": 0.5540248155593872, "step": 9885 }, { "epoch": 1.5800367617677615, "grad_norm": 1.6331079430867834, "learning_rate": 2.317894077797672e-06, "loss": 0.5772206783294678, "step": 9886 }, { "epoch": 1.5801965955406376, "grad_norm": 1.5512211342420426, "learning_rate": 2.3162025233031814e-06, "loss": 0.5840702652931213, "step": 9887 }, { "epoch": 1.580356429313514, "grad_norm": 1.5364521460951628, "learning_rate": 2.3145115054178267e-06, "loss": 0.48377710580825806, "step": 9888 }, { "epoch": 1.5805162630863903, "grad_norm": 1.357188508481744, "learning_rate": 2.312821024259699e-06, "loss": 0.5126134157180786, "step": 9889 }, { "epoch": 1.5806760968592664, "grad_norm": 1.6747151301955654, "learning_rate": 2.3111310799468557e-06, "loss": 0.6334781050682068, "step": 9890 }, { "epoch": 1.5808359306321424, "grad_norm": 1.622999302785105, "learning_rate": 2.3094416725973135e-06, "loss": 0.5120100975036621, "step": 9891 }, { "epoch": 1.5809957644050188, "grad_norm": 1.7443923657665885, "learning_rate": 2.3077528023290607e-06, "loss": 0.585554838180542, "step": 9892 }, { "epoch": 1.581155598177895, "grad_norm": 1.4536999557854349, "learning_rate": 2.3060644692600387e-06, "loss": 0.4055953323841095, "step": 9893 }, { "epoch": 1.5813154319507712, "grad_norm": 1.5966842345491317, "learning_rate": 2.3043766735081553e-06, "loss": 0.4888641834259033, "step": 9894 }, { "epoch": 1.5814752657236473, "grad_norm": 1.7420661831308628, "learning_rate": 2.302689415191276e-06, "loss": 0.505752444267273, "step": 9895 }, { "epoch": 1.5816350994965236, "grad_norm": 1.8183750371364067, "learning_rate": 2.30100269442724e-06, "loss": 0.5409855246543884, "step": 9896 }, { "epoch": 1.5817949332694, "grad_norm": 1.5326879936884161, "learning_rate": 2.2993165113338356e-06, "loss": 0.49891600012779236, "step": 9897 }, { "epoch": 1.581954767042276, "grad_norm": 1.5399398444763688, "learning_rate": 2.297630866028826e-06, "loss": 0.5854300260543823, "step": 9898 }, { "epoch": 1.582114600815152, "grad_norm": 1.5393195612802286, "learning_rate": 2.295945758629927e-06, "loss": 0.3388775587081909, "step": 9899 }, { "epoch": 1.5822744345880284, "grad_norm": 1.53155901179514, "learning_rate": 2.2942611892548205e-06, "loss": 0.46227848529815674, "step": 9900 }, { "epoch": 1.5824342683609047, "grad_norm": 1.7556665703880365, "learning_rate": 2.292577158021152e-06, "loss": 0.4498790502548218, "step": 9901 }, { "epoch": 1.5825941021337808, "grad_norm": 1.4725932007785214, "learning_rate": 2.290893665046525e-06, "loss": 0.551295816898346, "step": 9902 }, { "epoch": 1.582753935906657, "grad_norm": 1.6283257078935387, "learning_rate": 2.289210710448514e-06, "loss": 0.45309117436408997, "step": 9903 }, { "epoch": 1.5829137696795332, "grad_norm": 1.5271357852937448, "learning_rate": 2.2875282943446476e-06, "loss": 0.5885732769966125, "step": 9904 }, { "epoch": 1.5830736034524096, "grad_norm": 1.7118183201501216, "learning_rate": 2.2858464168524197e-06, "loss": 0.609372079372406, "step": 9905 }, { "epoch": 1.5832334372252856, "grad_norm": 1.5434163809442116, "learning_rate": 2.2841650780892843e-06, "loss": 0.5450705289840698, "step": 9906 }, { "epoch": 1.583393270998162, "grad_norm": 1.6133295992570873, "learning_rate": 2.2824842781726665e-06, "loss": 0.5017862319946289, "step": 9907 }, { "epoch": 1.5835531047710383, "grad_norm": 1.4728454760952228, "learning_rate": 2.2808040172199417e-06, "loss": 0.5053921341896057, "step": 9908 }, { "epoch": 1.5837129385439144, "grad_norm": 1.5770561675283254, "learning_rate": 2.279124295348457e-06, "loss": 0.5677047967910767, "step": 9909 }, { "epoch": 1.5838727723167905, "grad_norm": 1.9121691669278154, "learning_rate": 2.277445112675516e-06, "loss": 0.6627697944641113, "step": 9910 }, { "epoch": 1.5840326060896668, "grad_norm": 1.8740229525811236, "learning_rate": 2.2757664693183845e-06, "loss": 0.7590290307998657, "step": 9911 }, { "epoch": 1.584192439862543, "grad_norm": 1.5581789261558878, "learning_rate": 2.274088365394299e-06, "loss": 0.5287714600563049, "step": 9912 }, { "epoch": 1.5843522736354192, "grad_norm": 1.4492164594074983, "learning_rate": 2.2724108010204492e-06, "loss": 0.5402956604957581, "step": 9913 }, { "epoch": 1.5845121074082953, "grad_norm": 1.746063036710926, "learning_rate": 2.2707337763139903e-06, "loss": 0.5889508128166199, "step": 9914 }, { "epoch": 1.5846719411811716, "grad_norm": 1.6291401533180514, "learning_rate": 2.269057291392037e-06, "loss": 0.5536448359489441, "step": 9915 }, { "epoch": 1.584831774954048, "grad_norm": 1.7623197594591413, "learning_rate": 2.2673813463716742e-06, "loss": 0.613148033618927, "step": 9916 }, { "epoch": 1.584991608726924, "grad_norm": 1.7392980826894477, "learning_rate": 2.265705941369939e-06, "loss": 0.4962416887283325, "step": 9917 }, { "epoch": 1.5851514424998001, "grad_norm": 1.4319798289612564, "learning_rate": 2.264031076503841e-06, "loss": 0.5472603440284729, "step": 9918 }, { "epoch": 1.5853112762726764, "grad_norm": 1.4591862144770122, "learning_rate": 2.2623567518903435e-06, "loss": 0.4956310987472534, "step": 9919 }, { "epoch": 1.5854711100455527, "grad_norm": 1.516739919076256, "learning_rate": 2.260682967646377e-06, "loss": 0.6767820119857788, "step": 9920 }, { "epoch": 1.5856309438184288, "grad_norm": 1.5995218399770916, "learning_rate": 2.2590097238888274e-06, "loss": 0.48680227994918823, "step": 9921 }, { "epoch": 1.585790777591305, "grad_norm": 1.39825205475371, "learning_rate": 2.2573370207345567e-06, "loss": 0.3698890507221222, "step": 9922 }, { "epoch": 1.5859506113641813, "grad_norm": 1.722761341713901, "learning_rate": 2.255664858300375e-06, "loss": 0.6864877939224243, "step": 9923 }, { "epoch": 1.5861104451370576, "grad_norm": 1.569485846701277, "learning_rate": 2.2539932367030614e-06, "loss": 0.7232716679573059, "step": 9924 }, { "epoch": 1.5862702789099337, "grad_norm": 1.4604362741950356, "learning_rate": 2.2523221560593566e-06, "loss": 0.53702312707901, "step": 9925 }, { "epoch": 1.5864301126828098, "grad_norm": 1.7903680742592645, "learning_rate": 2.2506516164859595e-06, "loss": 0.7772867679595947, "step": 9926 }, { "epoch": 1.586589946455686, "grad_norm": 1.8114652691195994, "learning_rate": 2.2489816180995395e-06, "loss": 0.558021605014801, "step": 9927 }, { "epoch": 1.5867497802285624, "grad_norm": 1.4150384876634114, "learning_rate": 2.2473121610167213e-06, "loss": 0.5527351498603821, "step": 9928 }, { "epoch": 1.5869096140014385, "grad_norm": 1.5112767444531465, "learning_rate": 2.2456432453540943e-06, "loss": 0.6407359838485718, "step": 9929 }, { "epoch": 1.5870694477743146, "grad_norm": 1.408145316313659, "learning_rate": 2.2439748712282093e-06, "loss": 0.5955184698104858, "step": 9930 }, { "epoch": 1.587229281547191, "grad_norm": 1.5289410327498942, "learning_rate": 2.2423070387555767e-06, "loss": 0.5566043853759766, "step": 9931 }, { "epoch": 1.5873891153200672, "grad_norm": 1.6482098951612305, "learning_rate": 2.240639748052678e-06, "loss": 0.4776776134967804, "step": 9932 }, { "epoch": 1.5875489490929433, "grad_norm": 1.5244954385458067, "learning_rate": 2.2389729992359466e-06, "loss": 0.47507941722869873, "step": 9933 }, { "epoch": 1.5877087828658194, "grad_norm": 1.4934419488039836, "learning_rate": 2.237306792421784e-06, "loss": 0.5040829181671143, "step": 9934 }, { "epoch": 1.5878686166386957, "grad_norm": 1.4879737589805768, "learning_rate": 2.235641127726549e-06, "loss": 0.5068920254707336, "step": 9935 }, { "epoch": 1.588028450411572, "grad_norm": 1.6354188200540445, "learning_rate": 2.233976005266567e-06, "loss": 0.47696155309677124, "step": 9936 }, { "epoch": 1.5881882841844481, "grad_norm": 1.6547988782871916, "learning_rate": 2.23231142515813e-06, "loss": 0.6847702264785767, "step": 9937 }, { "epoch": 1.5883481179573242, "grad_norm": 1.5830902698049363, "learning_rate": 2.2306473875174807e-06, "loss": 0.5551929473876953, "step": 9938 }, { "epoch": 1.5885079517302005, "grad_norm": 1.5803632913567927, "learning_rate": 2.228983892460831e-06, "loss": 0.5813168287277222, "step": 9939 }, { "epoch": 1.5886677855030769, "grad_norm": 1.600872763017467, "learning_rate": 2.227320940104354e-06, "loss": 0.46715933084487915, "step": 9940 }, { "epoch": 1.588827619275953, "grad_norm": 1.816630849526605, "learning_rate": 2.2256585305641797e-06, "loss": 0.5430653095245361, "step": 9941 }, { "epoch": 1.5889874530488293, "grad_norm": 1.6292010732147262, "learning_rate": 2.2239966639564125e-06, "loss": 0.5580797791481018, "step": 9942 }, { "epoch": 1.5891472868217056, "grad_norm": 1.4501914906615094, "learning_rate": 2.222335340397107e-06, "loss": 0.5501713752746582, "step": 9943 }, { "epoch": 1.5893071205945817, "grad_norm": 1.7724993379379215, "learning_rate": 2.220674560002285e-06, "loss": 0.5492790341377258, "step": 9944 }, { "epoch": 1.5894669543674578, "grad_norm": 1.8204430775815248, "learning_rate": 2.21901432288793e-06, "loss": 0.4892231523990631, "step": 9945 }, { "epoch": 1.589626788140334, "grad_norm": 1.5867450113821124, "learning_rate": 2.217354629169982e-06, "loss": 0.4809390604496002, "step": 9946 }, { "epoch": 1.5897866219132104, "grad_norm": 1.7318562251023408, "learning_rate": 2.215695478964357e-06, "loss": 0.5613152980804443, "step": 9947 }, { "epoch": 1.5899464556860865, "grad_norm": 1.606294261311185, "learning_rate": 2.2140368723869177e-06, "loss": 0.4473770260810852, "step": 9948 }, { "epoch": 1.5901062894589626, "grad_norm": 1.4302704542204896, "learning_rate": 2.2123788095534982e-06, "loss": 0.5731881260871887, "step": 9949 }, { "epoch": 1.590266123231839, "grad_norm": 1.5996529575852454, "learning_rate": 2.2107212905798904e-06, "loss": 0.4888538122177124, "step": 9950 }, { "epoch": 1.5904259570047152, "grad_norm": 1.6246471855275981, "learning_rate": 2.209064315581847e-06, "loss": 0.4667966067790985, "step": 9951 }, { "epoch": 1.5905857907775913, "grad_norm": 1.6406617716635106, "learning_rate": 2.2074078846750912e-06, "loss": 0.7352591753005981, "step": 9952 }, { "epoch": 1.5907456245504674, "grad_norm": 1.7386174237166645, "learning_rate": 2.2057519979752983e-06, "loss": 0.5716424584388733, "step": 9953 }, { "epoch": 1.5909054583233437, "grad_norm": 2.0537773801451547, "learning_rate": 2.204096655598111e-06, "loss": 0.5854446291923523, "step": 9954 }, { "epoch": 1.59106529209622, "grad_norm": 1.5477868424688055, "learning_rate": 2.20244185765913e-06, "loss": 0.6018255949020386, "step": 9955 }, { "epoch": 1.5912251258690961, "grad_norm": 1.5499017505634276, "learning_rate": 2.200787604273921e-06, "loss": 0.4925858676433563, "step": 9956 }, { "epoch": 1.5913849596419722, "grad_norm": 1.7915117218405918, "learning_rate": 2.1991338955580156e-06, "loss": 0.7190935611724854, "step": 9957 }, { "epoch": 1.5915447934148486, "grad_norm": 1.61150281873331, "learning_rate": 2.1974807316269007e-06, "loss": 0.5523911714553833, "step": 9958 }, { "epoch": 1.5917046271877249, "grad_norm": 1.5731030239510508, "learning_rate": 2.195828112596026e-06, "loss": 0.43881791830062866, "step": 9959 }, { "epoch": 1.591864460960601, "grad_norm": 1.5739979875802024, "learning_rate": 2.194176038580805e-06, "loss": 0.6501871943473816, "step": 9960 }, { "epoch": 1.592024294733477, "grad_norm": 1.564771481313719, "learning_rate": 2.192524509696611e-06, "loss": 0.6209310293197632, "step": 9961 }, { "epoch": 1.5921841285063534, "grad_norm": 1.788695245809227, "learning_rate": 2.1908735260587845e-06, "loss": 0.5754613280296326, "step": 9962 }, { "epoch": 1.5923439622792297, "grad_norm": 1.673222503665273, "learning_rate": 2.189223087782624e-06, "loss": 0.4370097219944, "step": 9963 }, { "epoch": 1.5925037960521058, "grad_norm": 1.5356889905353435, "learning_rate": 2.1875731949833866e-06, "loss": 0.5290974378585815, "step": 9964 }, { "epoch": 1.5926636298249819, "grad_norm": 1.5065458317455553, "learning_rate": 2.185923847776299e-06, "loss": 0.48601818084716797, "step": 9965 }, { "epoch": 1.5928234635978582, "grad_norm": 1.4859122448383708, "learning_rate": 2.1842750462765405e-06, "loss": 0.4996616244316101, "step": 9966 }, { "epoch": 1.5929832973707345, "grad_norm": 1.5743923182501738, "learning_rate": 2.182626790599265e-06, "loss": 0.6199072003364563, "step": 9967 }, { "epoch": 1.5931431311436106, "grad_norm": 1.6417115205702142, "learning_rate": 2.1809790808595753e-06, "loss": 0.5629196166992188, "step": 9968 }, { "epoch": 1.5933029649164867, "grad_norm": 1.7627406640363728, "learning_rate": 2.1793319171725437e-06, "loss": 0.5452508926391602, "step": 9969 }, { "epoch": 1.593462798689363, "grad_norm": 1.80001988671017, "learning_rate": 2.1776852996531993e-06, "loss": 0.5912020206451416, "step": 9970 }, { "epoch": 1.5936226324622393, "grad_norm": 1.6845993201533187, "learning_rate": 2.176039228416541e-06, "loss": 0.5168696641921997, "step": 9971 }, { "epoch": 1.5937824662351154, "grad_norm": 1.6770863391159117, "learning_rate": 2.174393703577522e-06, "loss": 0.6298799514770508, "step": 9972 }, { "epoch": 1.5939423000079915, "grad_norm": 1.5093387748133102, "learning_rate": 2.1727487252510593e-06, "loss": 0.42928969860076904, "step": 9973 }, { "epoch": 1.5941021337808678, "grad_norm": 1.607942114976693, "learning_rate": 2.171104293552033e-06, "loss": 0.5527352094650269, "step": 9974 }, { "epoch": 1.5942619675537442, "grad_norm": 1.5458749491798807, "learning_rate": 2.169460408595283e-06, "loss": 0.4985925555229187, "step": 9975 }, { "epoch": 1.5944218013266203, "grad_norm": 1.759984291053369, "learning_rate": 2.1678170704956126e-06, "loss": 0.5316755771636963, "step": 9976 }, { "epoch": 1.5945816350994966, "grad_norm": 1.5863437898878598, "learning_rate": 2.16617427936779e-06, "loss": 0.5450595617294312, "step": 9977 }, { "epoch": 1.594741468872373, "grad_norm": 2.1476831283120426, "learning_rate": 2.164532035326541e-06, "loss": 0.5630190968513489, "step": 9978 }, { "epoch": 1.594901302645249, "grad_norm": 1.7373073933223977, "learning_rate": 2.1628903384865518e-06, "loss": 0.538614809513092, "step": 9979 }, { "epoch": 1.595061136418125, "grad_norm": 1.6594566854387074, "learning_rate": 2.161249188962474e-06, "loss": 0.5398836135864258, "step": 9980 }, { "epoch": 1.5952209701910014, "grad_norm": 1.794200467834626, "learning_rate": 2.1596085868689165e-06, "loss": 0.7057284712791443, "step": 9981 }, { "epoch": 1.5953808039638777, "grad_norm": 1.5787251346781046, "learning_rate": 2.157968532320459e-06, "loss": 0.5320615768432617, "step": 9982 }, { "epoch": 1.5955406377367538, "grad_norm": 1.7638008929662041, "learning_rate": 2.1563290254316337e-06, "loss": 0.5634223222732544, "step": 9983 }, { "epoch": 1.59570047150963, "grad_norm": 1.478393282690545, "learning_rate": 2.154690066316939e-06, "loss": 0.4365915060043335, "step": 9984 }, { "epoch": 1.5958603052825062, "grad_norm": 1.6413548996790448, "learning_rate": 2.1530516550908297e-06, "loss": 0.5246485471725464, "step": 9985 }, { "epoch": 1.5960201390553825, "grad_norm": 1.5571484110806735, "learning_rate": 2.1514137918677336e-06, "loss": 0.5487879514694214, "step": 9986 }, { "epoch": 1.5961799728282586, "grad_norm": 1.4845281844692475, "learning_rate": 2.149776476762029e-06, "loss": 0.429535448551178, "step": 9987 }, { "epoch": 1.5963398066011347, "grad_norm": 1.621479563210325, "learning_rate": 2.1481397098880617e-06, "loss": 0.5195856690406799, "step": 9988 }, { "epoch": 1.596499640374011, "grad_norm": 1.338076608809697, "learning_rate": 2.146503491360137e-06, "loss": 0.49095451831817627, "step": 9989 }, { "epoch": 1.5966594741468874, "grad_norm": 1.427220353983415, "learning_rate": 2.144867821292519e-06, "loss": 0.42611461877822876, "step": 9990 }, { "epoch": 1.5968193079197635, "grad_norm": 1.4605194462995257, "learning_rate": 2.1432326997994447e-06, "loss": 0.7029627561569214, "step": 9991 }, { "epoch": 1.5969791416926395, "grad_norm": 1.6757166901538898, "learning_rate": 2.1415981269951004e-06, "loss": 0.5205379724502563, "step": 9992 }, { "epoch": 1.5971389754655159, "grad_norm": 1.706669243902259, "learning_rate": 2.139964102993639e-06, "loss": 0.7105975151062012, "step": 9993 }, { "epoch": 1.5972988092383922, "grad_norm": 1.5441441484220801, "learning_rate": 2.1383306279091765e-06, "loss": 0.48831242322921753, "step": 9994 }, { "epoch": 1.5974586430112683, "grad_norm": 1.3911409018957934, "learning_rate": 2.1366977018557856e-06, "loss": 0.5912483930587769, "step": 9995 }, { "epoch": 1.5976184767841444, "grad_norm": 1.4875845139584496, "learning_rate": 2.135065324947506e-06, "loss": 0.5200954675674438, "step": 9996 }, { "epoch": 1.5977783105570207, "grad_norm": 1.726805595621569, "learning_rate": 2.1334334972983407e-06, "loss": 0.540779709815979, "step": 9997 }, { "epoch": 1.597938144329897, "grad_norm": 1.4703477210342872, "learning_rate": 2.1318022190222475e-06, "loss": 0.5332176089286804, "step": 9998 }, { "epoch": 1.598097978102773, "grad_norm": 1.4968593521057076, "learning_rate": 2.1301714902331484e-06, "loss": 0.577379584312439, "step": 9999 }, { "epoch": 1.5982578118756492, "grad_norm": 1.6352936865073633, "learning_rate": 2.128541311044927e-06, "loss": 0.6521949172019958, "step": 10000 }, { "epoch": 1.5984176456485255, "grad_norm": 1.5363980615555435, "learning_rate": 2.126911681571433e-06, "loss": 0.5112537145614624, "step": 10001 }, { "epoch": 1.5985774794214018, "grad_norm": 1.5880910046239298, "learning_rate": 2.1252826019264716e-06, "loss": 0.7074633836746216, "step": 10002 }, { "epoch": 1.598737313194278, "grad_norm": 1.4658107203320092, "learning_rate": 2.123654072223812e-06, "loss": 0.4550326466560364, "step": 10003 }, { "epoch": 1.598897146967154, "grad_norm": 1.7884688112532074, "learning_rate": 2.122026092577185e-06, "loss": 0.5413475632667542, "step": 10004 }, { "epoch": 1.5990569807400303, "grad_norm": 1.6681168405149112, "learning_rate": 2.1203986631002815e-06, "loss": 0.5563832521438599, "step": 10005 }, { "epoch": 1.5992168145129066, "grad_norm": 1.7315246879209596, "learning_rate": 2.118771783906758e-06, "loss": 0.5855154991149902, "step": 10006 }, { "epoch": 1.5993766482857827, "grad_norm": 1.7740256290654952, "learning_rate": 2.117145455110229e-06, "loss": 0.6048499345779419, "step": 10007 }, { "epoch": 1.5995364820586588, "grad_norm": 1.7793720468213798, "learning_rate": 2.1155196768242716e-06, "loss": 0.5455775260925293, "step": 10008 }, { "epoch": 1.5996963158315352, "grad_norm": 1.6687948635592125, "learning_rate": 2.113894449162425e-06, "loss": 0.5067030191421509, "step": 10009 }, { "epoch": 1.5998561496044115, "grad_norm": 1.5763341445050845, "learning_rate": 2.1122697722381845e-06, "loss": 0.5104923248291016, "step": 10010 }, { "epoch": 1.6000159833772876, "grad_norm": 1.445224703500917, "learning_rate": 2.110645646165018e-06, "loss": 0.7374412417411804, "step": 10011 }, { "epoch": 1.6001758171501639, "grad_norm": 1.6210730767205275, "learning_rate": 2.109022071056347e-06, "loss": 0.5767982006072998, "step": 10012 }, { "epoch": 1.6003356509230402, "grad_norm": 1.7172157871876539, "learning_rate": 2.107399047025556e-06, "loss": 0.621279776096344, "step": 10013 }, { "epoch": 1.6004954846959163, "grad_norm": 1.5541721104218036, "learning_rate": 2.1057765741859883e-06, "loss": 0.428450345993042, "step": 10014 }, { "epoch": 1.6006553184687924, "grad_norm": 1.3929448693439594, "learning_rate": 2.104154652650953e-06, "loss": 0.5371203422546387, "step": 10015 }, { "epoch": 1.6008151522416687, "grad_norm": 1.6048728657910172, "learning_rate": 2.102533282533724e-06, "loss": 0.6725348234176636, "step": 10016 }, { "epoch": 1.600974986014545, "grad_norm": 1.6973569213841924, "learning_rate": 2.100912463947529e-06, "loss": 0.634293794631958, "step": 10017 }, { "epoch": 1.6011348197874211, "grad_norm": 1.3652508882851173, "learning_rate": 2.099292197005558e-06, "loss": 0.38660919666290283, "step": 10018 }, { "epoch": 1.6012946535602972, "grad_norm": 1.5130671392169244, "learning_rate": 2.0976724818209683e-06, "loss": 0.6387341022491455, "step": 10019 }, { "epoch": 1.6014544873331735, "grad_norm": 1.8178537298810784, "learning_rate": 2.09605331850687e-06, "loss": 0.55818772315979, "step": 10020 }, { "epoch": 1.6016143211060498, "grad_norm": 1.4213451755186965, "learning_rate": 2.094434707176345e-06, "loss": 0.477280855178833, "step": 10021 }, { "epoch": 1.601774154878926, "grad_norm": 1.5375017365174237, "learning_rate": 2.0928166479424296e-06, "loss": 0.6004598140716553, "step": 10022 }, { "epoch": 1.601933988651802, "grad_norm": 1.5385988954704097, "learning_rate": 2.091199140918123e-06, "loss": 0.5904756188392639, "step": 10023 }, { "epoch": 1.6020938224246783, "grad_norm": 1.557840853604519, "learning_rate": 2.0895821862163867e-06, "loss": 0.49680203199386597, "step": 10024 }, { "epoch": 1.6022536561975547, "grad_norm": 1.9107355757444986, "learning_rate": 2.08796578395014e-06, "loss": 0.5462909936904907, "step": 10025 }, { "epoch": 1.6024134899704308, "grad_norm": 1.809246899322772, "learning_rate": 2.086349934232271e-06, "loss": 0.48124074935913086, "step": 10026 }, { "epoch": 1.6025733237433069, "grad_norm": 1.661708854897144, "learning_rate": 2.0847346371756237e-06, "loss": 0.4947127103805542, "step": 10027 }, { "epoch": 1.6027331575161832, "grad_norm": 1.4538300420847363, "learning_rate": 2.083119892893004e-06, "loss": 0.34582269191741943, "step": 10028 }, { "epoch": 1.6028929912890595, "grad_norm": 1.4144549460157525, "learning_rate": 2.0815057014971797e-06, "loss": 0.46911153197288513, "step": 10029 }, { "epoch": 1.6030528250619356, "grad_norm": 1.5300122577799282, "learning_rate": 2.0798920631008777e-06, "loss": 0.5669840574264526, "step": 10030 }, { "epoch": 1.6032126588348117, "grad_norm": 1.536351067862326, "learning_rate": 2.0782789778167955e-06, "loss": 0.5453836917877197, "step": 10031 }, { "epoch": 1.603372492607688, "grad_norm": 1.5102489203792278, "learning_rate": 2.0766664457575804e-06, "loss": 0.5119953155517578, "step": 10032 }, { "epoch": 1.6035323263805643, "grad_norm": 1.5286742807427485, "learning_rate": 2.0750544670358476e-06, "loss": 0.5375887751579285, "step": 10033 }, { "epoch": 1.6036921601534404, "grad_norm": 1.4381710902956406, "learning_rate": 2.0734430417641682e-06, "loss": 0.6133570671081543, "step": 10034 }, { "epoch": 1.6038519939263165, "grad_norm": 1.5767475328745704, "learning_rate": 2.0718321700550815e-06, "loss": 0.5318002700805664, "step": 10035 }, { "epoch": 1.6040118276991928, "grad_norm": 1.640941006224689, "learning_rate": 2.070221852021087e-06, "loss": 0.6514794230461121, "step": 10036 }, { "epoch": 1.6041716614720691, "grad_norm": 1.854862366726647, "learning_rate": 2.068612087774643e-06, "loss": 0.6904427409172058, "step": 10037 }, { "epoch": 1.6043314952449452, "grad_norm": 1.5882666789092819, "learning_rate": 2.067002877428167e-06, "loss": 0.542610228061676, "step": 10038 }, { "epoch": 1.6044913290178213, "grad_norm": 1.4850723654730993, "learning_rate": 2.0653942210940425e-06, "loss": 0.5643032789230347, "step": 10039 }, { "epoch": 1.6046511627906976, "grad_norm": 1.6096147799518032, "learning_rate": 2.0637861188846085e-06, "loss": 0.5670229196548462, "step": 10040 }, { "epoch": 1.604810996563574, "grad_norm": 1.5862016467717444, "learning_rate": 2.0621785709121765e-06, "loss": 0.5887889862060547, "step": 10041 }, { "epoch": 1.60497083033645, "grad_norm": 1.8238575266160306, "learning_rate": 2.0605715772890067e-06, "loss": 0.7007367014884949, "step": 10042 }, { "epoch": 1.6051306641093261, "grad_norm": 1.7564556443069241, "learning_rate": 2.058965138127327e-06, "loss": 0.5227072238922119, "step": 10043 }, { "epoch": 1.6052904978822025, "grad_norm": 1.9810247177883316, "learning_rate": 2.057359253539326e-06, "loss": 0.4795789122581482, "step": 10044 }, { "epoch": 1.6054503316550788, "grad_norm": 1.8419559430879828, "learning_rate": 2.0557539236371493e-06, "loss": 0.45498114824295044, "step": 10045 }, { "epoch": 1.6056101654279549, "grad_norm": 1.6193435337842, "learning_rate": 2.0541491485329124e-06, "loss": 0.461276650428772, "step": 10046 }, { "epoch": 1.6057699992008312, "grad_norm": 1.5789290870371542, "learning_rate": 2.0525449283386855e-06, "loss": 0.5633128881454468, "step": 10047 }, { "epoch": 1.6059298329737075, "grad_norm": 1.8669380763976295, "learning_rate": 2.0509412631665017e-06, "loss": 0.4962506890296936, "step": 10048 }, { "epoch": 1.6060896667465836, "grad_norm": 1.4244179205035246, "learning_rate": 2.049338153128353e-06, "loss": 0.46681317687034607, "step": 10049 }, { "epoch": 1.6062495005194597, "grad_norm": 1.59844259660741, "learning_rate": 2.047735598336198e-06, "loss": 0.5576410293579102, "step": 10050 }, { "epoch": 1.606409334292336, "grad_norm": 1.6276510681393708, "learning_rate": 2.0461335989019528e-06, "loss": 0.59267258644104, "step": 10051 }, { "epoch": 1.6065691680652123, "grad_norm": 1.9016977850610115, "learning_rate": 2.0445321549374953e-06, "loss": 0.6577394604682922, "step": 10052 }, { "epoch": 1.6067290018380884, "grad_norm": 1.6797998311750837, "learning_rate": 2.042931266554663e-06, "loss": 0.6400651931762695, "step": 10053 }, { "epoch": 1.6068888356109645, "grad_norm": 1.6949060897726749, "learning_rate": 2.0413309338652553e-06, "loss": 0.6569147109985352, "step": 10054 }, { "epoch": 1.6070486693838408, "grad_norm": 1.5889194188457332, "learning_rate": 2.0397311569810396e-06, "loss": 0.5065227150917053, "step": 10055 }, { "epoch": 1.6072085031567171, "grad_norm": 1.8869277970787803, "learning_rate": 2.0381319360137307e-06, "loss": 0.6305719017982483, "step": 10056 }, { "epoch": 1.6073683369295932, "grad_norm": 1.6527143789911356, "learning_rate": 2.036533271075021e-06, "loss": 0.7032585740089417, "step": 10057 }, { "epoch": 1.6075281707024693, "grad_norm": 1.5170404354302662, "learning_rate": 2.03493516227655e-06, "loss": 0.5533180832862854, "step": 10058 }, { "epoch": 1.6076880044753457, "grad_norm": 1.7691503660870902, "learning_rate": 2.033337609729925e-06, "loss": 0.5289099216461182, "step": 10059 }, { "epoch": 1.607847838248222, "grad_norm": 1.532705658311091, "learning_rate": 2.031740613546712e-06, "loss": 0.5813469886779785, "step": 10060 }, { "epoch": 1.608007672021098, "grad_norm": 1.45857107350472, "learning_rate": 2.030144173838443e-06, "loss": 0.6151766777038574, "step": 10061 }, { "epoch": 1.6081675057939742, "grad_norm": 1.7175799794753672, "learning_rate": 2.0285482907166066e-06, "loss": 0.5917597413063049, "step": 10062 }, { "epoch": 1.6083273395668505, "grad_norm": 1.600257174884864, "learning_rate": 2.0269529642926523e-06, "loss": 0.739354133605957, "step": 10063 }, { "epoch": 1.6084871733397268, "grad_norm": 1.56080501134921, "learning_rate": 2.02535819467799e-06, "loss": 0.5484195947647095, "step": 10064 }, { "epoch": 1.608647007112603, "grad_norm": 1.420250689956654, "learning_rate": 2.0237639819839983e-06, "loss": 0.5653278231620789, "step": 10065 }, { "epoch": 1.608806840885479, "grad_norm": 2.47940495548045, "learning_rate": 2.022170326322008e-06, "loss": 0.5425577163696289, "step": 10066 }, { "epoch": 1.6089666746583553, "grad_norm": 1.780697965047556, "learning_rate": 2.0205772278033153e-06, "loss": 0.6723408102989197, "step": 10067 }, { "epoch": 1.6091265084312316, "grad_norm": 1.6119584290766795, "learning_rate": 2.018984686539176e-06, "loss": 0.6194473505020142, "step": 10068 }, { "epoch": 1.6092863422041077, "grad_norm": 1.5473078663377515, "learning_rate": 2.0173927026408045e-06, "loss": 0.5446032285690308, "step": 10069 }, { "epoch": 1.6094461759769838, "grad_norm": 1.6768973779074339, "learning_rate": 2.0158012762193856e-06, "loss": 0.6110153198242188, "step": 10070 }, { "epoch": 1.6096060097498601, "grad_norm": 1.624530569255925, "learning_rate": 2.0142104073860546e-06, "loss": 0.523569643497467, "step": 10071 }, { "epoch": 1.6097658435227364, "grad_norm": 1.40167405824831, "learning_rate": 2.0126200962519136e-06, "loss": 0.5746004581451416, "step": 10072 }, { "epoch": 1.6099256772956125, "grad_norm": 1.477457522013502, "learning_rate": 2.011030342928024e-06, "loss": 0.498557448387146, "step": 10073 }, { "epoch": 1.6100855110684886, "grad_norm": 1.5056615223263787, "learning_rate": 2.009441147525406e-06, "loss": 0.5626899003982544, "step": 10074 }, { "epoch": 1.610245344841365, "grad_norm": 1.6041119238284978, "learning_rate": 2.007852510155047e-06, "loss": 0.6117614507675171, "step": 10075 }, { "epoch": 1.6104051786142413, "grad_norm": 1.4584378624594307, "learning_rate": 2.00626443092789e-06, "loss": 0.5808890461921692, "step": 10076 }, { "epoch": 1.6105650123871174, "grad_norm": 1.7200560321736826, "learning_rate": 2.0046769099548425e-06, "loss": 0.7267376780509949, "step": 10077 }, { "epoch": 1.6107248461599935, "grad_norm": 1.7795418563810743, "learning_rate": 2.00308994734677e-06, "loss": 0.5831611156463623, "step": 10078 }, { "epoch": 1.6108846799328698, "grad_norm": 1.8442209932411247, "learning_rate": 2.0015035432144993e-06, "loss": 0.5434012413024902, "step": 10079 }, { "epoch": 1.611044513705746, "grad_norm": 1.667106564602639, "learning_rate": 1.9999176976688217e-06, "loss": 0.646760880947113, "step": 10080 }, { "epoch": 1.6112043474786222, "grad_norm": 1.8228884814327975, "learning_rate": 1.998332410820486e-06, "loss": 0.5674434900283813, "step": 10081 }, { "epoch": 1.6113641812514985, "grad_norm": 1.6846944375328947, "learning_rate": 1.9967476827802023e-06, "loss": 0.5244075059890747, "step": 10082 }, { "epoch": 1.6115240150243748, "grad_norm": 1.5424074222201014, "learning_rate": 1.995163513658643e-06, "loss": 0.5739215612411499, "step": 10083 }, { "epoch": 1.611683848797251, "grad_norm": 1.7525581012206242, "learning_rate": 1.993579903566439e-06, "loss": 0.6266718506813049, "step": 10084 }, { "epoch": 1.611843682570127, "grad_norm": 1.7401749462859877, "learning_rate": 1.9919968526141876e-06, "loss": 0.4952477812767029, "step": 10085 }, { "epoch": 1.6120035163430033, "grad_norm": 1.9405794358019495, "learning_rate": 1.9904143609124417e-06, "loss": 0.5413658022880554, "step": 10086 }, { "epoch": 1.6121633501158796, "grad_norm": 1.5546772073113269, "learning_rate": 1.9888324285717166e-06, "loss": 0.5164011120796204, "step": 10087 }, { "epoch": 1.6123231838887557, "grad_norm": 1.6448019363174644, "learning_rate": 1.9872510557024893e-06, "loss": 0.5491341352462769, "step": 10088 }, { "epoch": 1.6124830176616318, "grad_norm": 1.626789851155203, "learning_rate": 1.985670242415194e-06, "loss": 0.5794100761413574, "step": 10089 }, { "epoch": 1.6126428514345081, "grad_norm": 1.688336787383664, "learning_rate": 1.984089988820235e-06, "loss": 0.5932912826538086, "step": 10090 }, { "epoch": 1.6128026852073845, "grad_norm": 1.5026403532436634, "learning_rate": 1.982510295027967e-06, "loss": 0.5139159560203552, "step": 10091 }, { "epoch": 1.6129625189802606, "grad_norm": 1.6190593368875823, "learning_rate": 1.980931161148714e-06, "loss": 0.5424656867980957, "step": 10092 }, { "epoch": 1.6131223527531366, "grad_norm": 1.594725526275076, "learning_rate": 1.979352587292754e-06, "loss": 0.48551493883132935, "step": 10093 }, { "epoch": 1.613282186526013, "grad_norm": 1.768930090967354, "learning_rate": 1.9777745735703267e-06, "loss": 0.5522955060005188, "step": 10094 }, { "epoch": 1.6134420202988893, "grad_norm": 1.7756959487938062, "learning_rate": 1.9761971200916417e-06, "loss": 0.62049800157547, "step": 10095 }, { "epoch": 1.6136018540717654, "grad_norm": 1.555467225043011, "learning_rate": 1.9746202269668568e-06, "loss": 0.43974635004997253, "step": 10096 }, { "epoch": 1.6137616878446415, "grad_norm": 1.6079848035562785, "learning_rate": 1.9730438943061014e-06, "loss": 0.5768338441848755, "step": 10097 }, { "epoch": 1.6139215216175178, "grad_norm": 1.6443265489127816, "learning_rate": 1.971468122219459e-06, "loss": 0.5338613986968994, "step": 10098 }, { "epoch": 1.614081355390394, "grad_norm": 1.570196396364752, "learning_rate": 1.9698929108169716e-06, "loss": 0.4900963306427002, "step": 10099 }, { "epoch": 1.6142411891632702, "grad_norm": 1.6263778844986465, "learning_rate": 1.968318260208654e-06, "loss": 0.6776007413864136, "step": 10100 }, { "epoch": 1.6144010229361463, "grad_norm": 1.4664607929887932, "learning_rate": 1.96674417050447e-06, "loss": 0.5607292652130127, "step": 10101 }, { "epoch": 1.6145608567090226, "grad_norm": 1.6984134448729158, "learning_rate": 1.96517064181435e-06, "loss": 0.502434253692627, "step": 10102 }, { "epoch": 1.614720690481899, "grad_norm": 1.4620310236282623, "learning_rate": 1.9635976742481823e-06, "loss": 0.4936913251876831, "step": 10103 }, { "epoch": 1.614880524254775, "grad_norm": 1.5696420173405796, "learning_rate": 1.9620252679158136e-06, "loss": 0.7136695384979248, "step": 10104 }, { "epoch": 1.6150403580276511, "grad_norm": 1.7225503285411972, "learning_rate": 1.9604534229270635e-06, "loss": 0.5473582744598389, "step": 10105 }, { "epoch": 1.6152001918005274, "grad_norm": 1.2014055286615255, "learning_rate": 1.958882139391699e-06, "loss": 0.3266558051109314, "step": 10106 }, { "epoch": 1.6153600255734037, "grad_norm": 1.680546771917355, "learning_rate": 1.957311417419455e-06, "loss": 0.5328406095504761, "step": 10107 }, { "epoch": 1.6155198593462798, "grad_norm": 1.7554970025144394, "learning_rate": 1.955741257120023e-06, "loss": 0.5750744342803955, "step": 10108 }, { "epoch": 1.615679693119156, "grad_norm": 1.4083725869143426, "learning_rate": 1.954171658603056e-06, "loss": 0.5188736915588379, "step": 10109 }, { "epoch": 1.6158395268920323, "grad_norm": 1.7222264521817243, "learning_rate": 1.9526026219781734e-06, "loss": 0.5288621187210083, "step": 10110 }, { "epoch": 1.6159993606649086, "grad_norm": 1.5701423195693913, "learning_rate": 1.951034147354951e-06, "loss": 0.5096771121025085, "step": 10111 }, { "epoch": 1.6161591944377847, "grad_norm": 1.790988165940947, "learning_rate": 1.9494662348429226e-06, "loss": 0.5705466270446777, "step": 10112 }, { "epoch": 1.6163190282106608, "grad_norm": 1.6359212403163692, "learning_rate": 1.9478988845515847e-06, "loss": 0.4737274646759033, "step": 10113 }, { "epoch": 1.616478861983537, "grad_norm": 1.568920163966828, "learning_rate": 1.9463320965903996e-06, "loss": 0.5351972579956055, "step": 10114 }, { "epoch": 1.6166386957564134, "grad_norm": 1.6928761470418183, "learning_rate": 1.9447658710687854e-06, "loss": 0.6284874081611633, "step": 10115 }, { "epoch": 1.6167985295292895, "grad_norm": 1.3873800765076518, "learning_rate": 1.9432002080961164e-06, "loss": 0.3828611969947815, "step": 10116 }, { "epoch": 1.6169583633021658, "grad_norm": 1.9379472325654783, "learning_rate": 1.941635107781741e-06, "loss": 0.557814359664917, "step": 10117 }, { "epoch": 1.6171181970750421, "grad_norm": 1.393483984102156, "learning_rate": 1.9400705702349564e-06, "loss": 0.5854359865188599, "step": 10118 }, { "epoch": 1.6172780308479182, "grad_norm": 1.4490019004428683, "learning_rate": 1.9385065955650206e-06, "loss": 0.39864665269851685, "step": 10119 }, { "epoch": 1.6174378646207943, "grad_norm": 1.5611653090876771, "learning_rate": 1.936943183881164e-06, "loss": 0.49863070249557495, "step": 10120 }, { "epoch": 1.6175976983936706, "grad_norm": 1.5841224890827412, "learning_rate": 1.9353803352925635e-06, "loss": 0.5367902517318726, "step": 10121 }, { "epoch": 1.617757532166547, "grad_norm": 1.8345059883348307, "learning_rate": 1.9338180499083668e-06, "loss": 0.6330183744430542, "step": 10122 }, { "epoch": 1.617917365939423, "grad_norm": 1.5199613450844285, "learning_rate": 1.932256327837675e-06, "loss": 0.4967653155326843, "step": 10123 }, { "epoch": 1.6180771997122991, "grad_norm": 1.4325632147787366, "learning_rate": 1.9306951691895526e-06, "loss": 0.5305376648902893, "step": 10124 }, { "epoch": 1.6182370334851754, "grad_norm": 1.43205326329787, "learning_rate": 1.9291345740730305e-06, "loss": 0.5130458474159241, "step": 10125 }, { "epoch": 1.6183968672580518, "grad_norm": 1.2458687378330733, "learning_rate": 1.9275745425970915e-06, "loss": 0.32175779342651367, "step": 10126 }, { "epoch": 1.6185567010309279, "grad_norm": 1.5617679713891377, "learning_rate": 1.926015074870683e-06, "loss": 0.5064348578453064, "step": 10127 }, { "epoch": 1.618716534803804, "grad_norm": 1.476009428910808, "learning_rate": 1.924456171002711e-06, "loss": 0.47950077056884766, "step": 10128 }, { "epoch": 1.6188763685766803, "grad_norm": 1.5033661021229412, "learning_rate": 1.9228978311020473e-06, "loss": 0.4530235528945923, "step": 10129 }, { "epoch": 1.6190362023495566, "grad_norm": 1.7194772788148287, "learning_rate": 1.9213400552775196e-06, "loss": 0.5475796461105347, "step": 10130 }, { "epoch": 1.6191960361224327, "grad_norm": 1.6955092283398339, "learning_rate": 1.9197828436379174e-06, "loss": 0.5732267498970032, "step": 10131 }, { "epoch": 1.6193558698953088, "grad_norm": 1.635995001334701, "learning_rate": 1.918226196291991e-06, "loss": 0.4620676338672638, "step": 10132 }, { "epoch": 1.619515703668185, "grad_norm": 1.504095704184664, "learning_rate": 1.9166701133484466e-06, "loss": 0.4596824049949646, "step": 10133 }, { "epoch": 1.6196755374410614, "grad_norm": 1.5607068093332153, "learning_rate": 1.915114594915963e-06, "loss": 0.4751288890838623, "step": 10134 }, { "epoch": 1.6198353712139375, "grad_norm": 1.6170692987869426, "learning_rate": 1.913559641103169e-06, "loss": 0.5101333856582642, "step": 10135 }, { "epoch": 1.6199952049868136, "grad_norm": 1.4527661757126318, "learning_rate": 1.912005252018654e-06, "loss": 0.49908506870269775, "step": 10136 }, { "epoch": 1.62015503875969, "grad_norm": 1.4409174000198968, "learning_rate": 1.910451427770975e-06, "loss": 0.5101750493049622, "step": 10137 }, { "epoch": 1.6203148725325662, "grad_norm": 1.4842156465761012, "learning_rate": 1.9088981684686457e-06, "loss": 0.5041088461875916, "step": 10138 }, { "epoch": 1.6204747063054423, "grad_norm": 1.6458942864521529, "learning_rate": 1.9073454742201357e-06, "loss": 0.4994598925113678, "step": 10139 }, { "epoch": 1.6206345400783184, "grad_norm": 1.6703557392391815, "learning_rate": 1.9057933451338851e-06, "loss": 0.5612555742263794, "step": 10140 }, { "epoch": 1.6207943738511947, "grad_norm": 1.6376596470581901, "learning_rate": 1.9042417813182868e-06, "loss": 0.5091708302497864, "step": 10141 }, { "epoch": 1.620954207624071, "grad_norm": 1.5273352566328762, "learning_rate": 1.9026907828816965e-06, "loss": 0.5640340447425842, "step": 10142 }, { "epoch": 1.6211140413969471, "grad_norm": 1.7402758573825645, "learning_rate": 1.9011403499324277e-06, "loss": 0.5074774026870728, "step": 10143 }, { "epoch": 1.6212738751698232, "grad_norm": 1.6847312426369083, "learning_rate": 1.8995904825787626e-06, "loss": 0.5280896425247192, "step": 10144 }, { "epoch": 1.6214337089426996, "grad_norm": 1.4598364166718867, "learning_rate": 1.8980411809289357e-06, "loss": 0.49593448638916016, "step": 10145 }, { "epoch": 1.6215935427155759, "grad_norm": 1.6166219319742918, "learning_rate": 1.8964924450911437e-06, "loss": 0.5054309368133545, "step": 10146 }, { "epoch": 1.621753376488452, "grad_norm": 1.4508969387449702, "learning_rate": 1.894944275173547e-06, "loss": 0.540873646736145, "step": 10147 }, { "epoch": 1.621913210261328, "grad_norm": 1.4723602929823443, "learning_rate": 1.89339667128426e-06, "loss": 0.47353655099868774, "step": 10148 }, { "epoch": 1.6220730440342044, "grad_norm": 1.6345615172890335, "learning_rate": 1.8918496335313664e-06, "loss": 0.5602090358734131, "step": 10149 }, { "epoch": 1.6222328778070807, "grad_norm": 1.7014115926293771, "learning_rate": 1.890303162022905e-06, "loss": 0.4996708035469055, "step": 10150 }, { "epoch": 1.6223927115799568, "grad_norm": 1.465688860758307, "learning_rate": 1.8887572568668755e-06, "loss": 0.5375049114227295, "step": 10151 }, { "epoch": 1.622552545352833, "grad_norm": 1.4413564547315654, "learning_rate": 1.8872119181712378e-06, "loss": 0.5084266662597656, "step": 10152 }, { "epoch": 1.6227123791257094, "grad_norm": 1.6484130157092385, "learning_rate": 1.8856671460439113e-06, "loss": 0.585877001285553, "step": 10153 }, { "epoch": 1.6228722128985855, "grad_norm": 2.030261615702116, "learning_rate": 1.884122940592782e-06, "loss": 0.7319950461387634, "step": 10154 }, { "epoch": 1.6230320466714616, "grad_norm": 1.6325740389344607, "learning_rate": 1.8825793019256878e-06, "loss": 0.5592414140701294, "step": 10155 }, { "epoch": 1.623191880444338, "grad_norm": 1.6866059541380105, "learning_rate": 1.8810362301504303e-06, "loss": 0.5194234848022461, "step": 10156 }, { "epoch": 1.6233517142172142, "grad_norm": 1.8126243595558789, "learning_rate": 1.8794937253747758e-06, "loss": 0.4531247615814209, "step": 10157 }, { "epoch": 1.6235115479900903, "grad_norm": 1.4984280080064014, "learning_rate": 1.8779517877064468e-06, "loss": 0.6169701814651489, "step": 10158 }, { "epoch": 1.6236713817629664, "grad_norm": 1.62888628822682, "learning_rate": 1.8764104172531217e-06, "loss": 0.6157268285751343, "step": 10159 }, { "epoch": 1.6238312155358428, "grad_norm": 1.4975228093388844, "learning_rate": 1.874869614122452e-06, "loss": 0.5547229051589966, "step": 10160 }, { "epoch": 1.623991049308719, "grad_norm": 1.4064656933448614, "learning_rate": 1.8733293784220375e-06, "loss": 0.4243704378604889, "step": 10161 }, { "epoch": 1.6241508830815952, "grad_norm": 1.524709836673358, "learning_rate": 1.871789710259443e-06, "loss": 0.5685103535652161, "step": 10162 }, { "epoch": 1.6243107168544713, "grad_norm": 1.6662845922356615, "learning_rate": 1.8702506097421912e-06, "loss": 0.685461163520813, "step": 10163 }, { "epoch": 1.6244705506273476, "grad_norm": 1.5804790962582356, "learning_rate": 1.8687120769777733e-06, "loss": 0.5024242401123047, "step": 10164 }, { "epoch": 1.624630384400224, "grad_norm": 1.403587561871375, "learning_rate": 1.8671741120736309e-06, "loss": 0.44890689849853516, "step": 10165 }, { "epoch": 1.6247902181731, "grad_norm": 1.2568489393953777, "learning_rate": 1.8656367151371713e-06, "loss": 0.43439167737960815, "step": 10166 }, { "epoch": 1.624950051945976, "grad_norm": 1.5722655200371936, "learning_rate": 1.864099886275761e-06, "loss": 0.5025551319122314, "step": 10167 }, { "epoch": 1.6251098857188524, "grad_norm": 1.6329266089837011, "learning_rate": 1.8625636255967227e-06, "loss": 0.4690953493118286, "step": 10168 }, { "epoch": 1.6252697194917287, "grad_norm": 1.5442298213464924, "learning_rate": 1.8610279332073489e-06, "loss": 0.5059592127799988, "step": 10169 }, { "epoch": 1.6254295532646048, "grad_norm": 1.7729923474960452, "learning_rate": 1.8594928092148857e-06, "loss": 0.530040979385376, "step": 10170 }, { "epoch": 1.625589387037481, "grad_norm": 1.581650031020066, "learning_rate": 1.8579582537265395e-06, "loss": 0.5647079944610596, "step": 10171 }, { "epoch": 1.6257492208103572, "grad_norm": 1.4733730154171794, "learning_rate": 1.856424266849478e-06, "loss": 0.5575881600379944, "step": 10172 }, { "epoch": 1.6259090545832335, "grad_norm": 1.7012120066365153, "learning_rate": 1.8548908486908268e-06, "loss": 0.5451078414916992, "step": 10173 }, { "epoch": 1.6260688883561096, "grad_norm": 1.6771622114582236, "learning_rate": 1.8533579993576812e-06, "loss": 0.6469219923019409, "step": 10174 }, { "epoch": 1.6262287221289857, "grad_norm": 1.5200655003753956, "learning_rate": 1.8518257189570853e-06, "loss": 0.6156253814697266, "step": 10175 }, { "epoch": 1.626388555901862, "grad_norm": 1.8842780014973437, "learning_rate": 1.8502940075960463e-06, "loss": 0.5191751718521118, "step": 10176 }, { "epoch": 1.6265483896747384, "grad_norm": 1.5380696595846128, "learning_rate": 1.8487628653815393e-06, "loss": 0.6554334163665771, "step": 10177 }, { "epoch": 1.6267082234476145, "grad_norm": 1.3896545503544087, "learning_rate": 1.8472322924204878e-06, "loss": 0.5635707378387451, "step": 10178 }, { "epoch": 1.6268680572204905, "grad_norm": 1.9373524534496587, "learning_rate": 1.8457022888197873e-06, "loss": 0.5110602378845215, "step": 10179 }, { "epoch": 1.6270278909933669, "grad_norm": 1.4791168525161011, "learning_rate": 1.8441728546862848e-06, "loss": 0.49751514196395874, "step": 10180 }, { "epoch": 1.6271877247662432, "grad_norm": 1.541595495716213, "learning_rate": 1.8426439901267912e-06, "loss": 0.4827856421470642, "step": 10181 }, { "epoch": 1.6273475585391193, "grad_norm": 1.9231345871743042, "learning_rate": 1.8411156952480768e-06, "loss": 0.7463400959968567, "step": 10182 }, { "epoch": 1.6275073923119954, "grad_norm": 1.6238694319493279, "learning_rate": 1.8395879701568687e-06, "loss": 0.5731443762779236, "step": 10183 }, { "epoch": 1.6276672260848717, "grad_norm": 1.7858102740837538, "learning_rate": 1.8380608149598634e-06, "loss": 0.6231145858764648, "step": 10184 }, { "epoch": 1.627827059857748, "grad_norm": 1.8916364283202283, "learning_rate": 1.8365342297637102e-06, "loss": 0.6243208050727844, "step": 10185 }, { "epoch": 1.627986893630624, "grad_norm": 1.4244691286831348, "learning_rate": 1.8350082146750204e-06, "loss": 0.5253198742866516, "step": 10186 }, { "epoch": 1.6281467274035002, "grad_norm": 1.6654905454284519, "learning_rate": 1.8334827698003644e-06, "loss": 0.5398026704788208, "step": 10187 }, { "epoch": 1.6283065611763767, "grad_norm": 1.530294159759788, "learning_rate": 1.8319578952462713e-06, "loss": 0.5349127650260925, "step": 10188 }, { "epoch": 1.6284663949492528, "grad_norm": 1.6246044999709652, "learning_rate": 1.8304335911192394e-06, "loss": 0.44535383582115173, "step": 10189 }, { "epoch": 1.628626228722129, "grad_norm": 1.6992429743578001, "learning_rate": 1.828909857525717e-06, "loss": 0.5699901580810547, "step": 10190 }, { "epoch": 1.6287860624950052, "grad_norm": 1.7519641557617038, "learning_rate": 1.8273866945721175e-06, "loss": 0.6591240167617798, "step": 10191 }, { "epoch": 1.6289458962678816, "grad_norm": 1.4184780161083137, "learning_rate": 1.8258641023648082e-06, "loss": 0.5022834539413452, "step": 10192 }, { "epoch": 1.6291057300407576, "grad_norm": 1.6506639237673468, "learning_rate": 1.8243420810101287e-06, "loss": 0.5347367525100708, "step": 10193 }, { "epoch": 1.6292655638136337, "grad_norm": 1.8211533349210927, "learning_rate": 1.8228206306143691e-06, "loss": 0.6977889537811279, "step": 10194 }, { "epoch": 1.62942539758651, "grad_norm": 2.035924476777122, "learning_rate": 1.8212997512837815e-06, "loss": 0.5542057752609253, "step": 10195 }, { "epoch": 1.6295852313593864, "grad_norm": 1.583894743868464, "learning_rate": 1.8197794431245753e-06, "loss": 0.5826839208602905, "step": 10196 }, { "epoch": 1.6297450651322625, "grad_norm": 1.4655692742266069, "learning_rate": 1.8182597062429297e-06, "loss": 0.5670202374458313, "step": 10197 }, { "epoch": 1.6299048989051386, "grad_norm": 1.4705704350929634, "learning_rate": 1.8167405407449723e-06, "loss": 0.5591129064559937, "step": 10198 }, { "epoch": 1.6300647326780149, "grad_norm": 1.390157897962689, "learning_rate": 1.8152219467368016e-06, "loss": 0.5455940961837769, "step": 10199 }, { "epoch": 1.6302245664508912, "grad_norm": 1.7221897214760997, "learning_rate": 1.813703924324468e-06, "loss": 0.6115128993988037, "step": 10200 }, { "epoch": 1.6303844002237673, "grad_norm": 1.289219392156643, "learning_rate": 1.8121864736139838e-06, "loss": 0.4665408134460449, "step": 10201 }, { "epoch": 1.6305442339966434, "grad_norm": 1.3340888464003149, "learning_rate": 1.810669594711324e-06, "loss": 0.5325117111206055, "step": 10202 }, { "epoch": 1.6307040677695197, "grad_norm": 1.546485090707862, "learning_rate": 1.8091532877224194e-06, "loss": 0.4254032373428345, "step": 10203 }, { "epoch": 1.630863901542396, "grad_norm": 1.7366981299557385, "learning_rate": 1.8076375527531676e-06, "loss": 0.6236056089401245, "step": 10204 }, { "epoch": 1.6310237353152721, "grad_norm": 1.7271404201655813, "learning_rate": 1.8061223899094194e-06, "loss": 0.4944586753845215, "step": 10205 }, { "epoch": 1.6311835690881482, "grad_norm": 1.821928987031399, "learning_rate": 1.8046077992969902e-06, "loss": 0.5305480360984802, "step": 10206 }, { "epoch": 1.6313434028610245, "grad_norm": 1.759354922936042, "learning_rate": 1.8030937810216486e-06, "loss": 0.7194198369979858, "step": 10207 }, { "epoch": 1.6315032366339008, "grad_norm": 1.4208630981836368, "learning_rate": 1.8015803351891348e-06, "loss": 0.47884601354599, "step": 10208 }, { "epoch": 1.631663070406777, "grad_norm": 1.620400527639246, "learning_rate": 1.8000674619051406e-06, "loss": 0.522301435470581, "step": 10209 }, { "epoch": 1.631822904179653, "grad_norm": 1.6386737700385319, "learning_rate": 1.7985551612753182e-06, "loss": 0.5348146557807922, "step": 10210 }, { "epoch": 1.6319827379525293, "grad_norm": 1.4812682276259537, "learning_rate": 1.7970434334052822e-06, "loss": 0.5892417430877686, "step": 10211 }, { "epoch": 1.6321425717254057, "grad_norm": 1.5271007062355164, "learning_rate": 1.7955322784006036e-06, "loss": 0.5726929903030396, "step": 10212 }, { "epoch": 1.6323024054982818, "grad_norm": 1.5882969646299947, "learning_rate": 1.7940216963668212e-06, "loss": 0.5521404147148132, "step": 10213 }, { "epoch": 1.6324622392711579, "grad_norm": 1.377794923672214, "learning_rate": 1.7925116874094262e-06, "loss": 0.5646074414253235, "step": 10214 }, { "epoch": 1.6326220730440342, "grad_norm": 1.6802473099237878, "learning_rate": 1.7910022516338721e-06, "loss": 0.584923267364502, "step": 10215 }, { "epoch": 1.6327819068169105, "grad_norm": 1.5390490462407884, "learning_rate": 1.78949338914557e-06, "loss": 0.5737888813018799, "step": 10216 }, { "epoch": 1.6329417405897866, "grad_norm": 1.6950763016913015, "learning_rate": 1.7879851000498983e-06, "loss": 0.6870735287666321, "step": 10217 }, { "epoch": 1.6331015743626627, "grad_norm": 1.8097797836012741, "learning_rate": 1.7864773844521865e-06, "loss": 0.44647759199142456, "step": 10218 }, { "epoch": 1.633261408135539, "grad_norm": 1.7846114718136885, "learning_rate": 1.784970242457732e-06, "loss": 0.5543832778930664, "step": 10219 }, { "epoch": 1.6334212419084153, "grad_norm": 1.6999477245019647, "learning_rate": 1.7834636741717858e-06, "loss": 0.4928279519081116, "step": 10220 }, { "epoch": 1.6335810756812914, "grad_norm": 1.6436116400281908, "learning_rate": 1.781957679699562e-06, "loss": 0.43958306312561035, "step": 10221 }, { "epoch": 1.6337409094541675, "grad_norm": 1.8000387390453405, "learning_rate": 1.7804522591462338e-06, "loss": 0.5695357322692871, "step": 10222 }, { "epoch": 1.633900743227044, "grad_norm": 1.6193473431765812, "learning_rate": 1.7789474126169314e-06, "loss": 0.5107421278953552, "step": 10223 }, { "epoch": 1.6340605769999201, "grad_norm": 1.5750384193513256, "learning_rate": 1.7774431402167535e-06, "loss": 0.5701265335083008, "step": 10224 }, { "epoch": 1.6342204107727962, "grad_norm": 1.6150730170062682, "learning_rate": 1.7759394420507503e-06, "loss": 0.5733885765075684, "step": 10225 }, { "epoch": 1.6343802445456725, "grad_norm": 1.5439581559353772, "learning_rate": 1.7744363182239343e-06, "loss": 0.49886149168014526, "step": 10226 }, { "epoch": 1.6345400783185489, "grad_norm": 1.8316497860907976, "learning_rate": 1.7729337688412772e-06, "loss": 0.5059786438941956, "step": 10227 }, { "epoch": 1.634699912091425, "grad_norm": 1.7004446770719448, "learning_rate": 1.7714317940077152e-06, "loss": 0.5028568506240845, "step": 10228 }, { "epoch": 1.634859745864301, "grad_norm": 1.5817079153090627, "learning_rate": 1.76993039382814e-06, "loss": 0.5951967239379883, "step": 10229 }, { "epoch": 1.6350195796371774, "grad_norm": 1.6527229598418696, "learning_rate": 1.7684295684074014e-06, "loss": 0.6329824924468994, "step": 10230 }, { "epoch": 1.6351794134100537, "grad_norm": 1.7439059504532424, "learning_rate": 1.7669293178503145e-06, "loss": 0.4889298975467682, "step": 10231 }, { "epoch": 1.6353392471829298, "grad_norm": 1.5827010029474957, "learning_rate": 1.7654296422616479e-06, "loss": 0.4876392185688019, "step": 10232 }, { "epoch": 1.6354990809558059, "grad_norm": 1.5677918588151722, "learning_rate": 1.7639305417461383e-06, "loss": 0.605463981628418, "step": 10233 }, { "epoch": 1.6356589147286822, "grad_norm": 1.5093864403226935, "learning_rate": 1.7624320164084763e-06, "loss": 0.5548660755157471, "step": 10234 }, { "epoch": 1.6358187485015585, "grad_norm": 1.462654767846557, "learning_rate": 1.7609340663533115e-06, "loss": 0.4955105781555176, "step": 10235 }, { "epoch": 1.6359785822744346, "grad_norm": 1.6792827121662182, "learning_rate": 1.7594366916852546e-06, "loss": 0.5177518129348755, "step": 10236 }, { "epoch": 1.6361384160473107, "grad_norm": 1.6609871439790922, "learning_rate": 1.757939892508882e-06, "loss": 0.5387799739837646, "step": 10237 }, { "epoch": 1.636298249820187, "grad_norm": 1.5733264154999609, "learning_rate": 1.7564436689287179e-06, "loss": 0.5111050605773926, "step": 10238 }, { "epoch": 1.6364580835930633, "grad_norm": 1.776467043515205, "learning_rate": 1.7549480210492608e-06, "loss": 0.5937821865081787, "step": 10239 }, { "epoch": 1.6366179173659394, "grad_norm": 1.6493893810684124, "learning_rate": 1.7534529489749563e-06, "loss": 0.5596042275428772, "step": 10240 }, { "epoch": 1.6367777511388155, "grad_norm": 1.4839917408759786, "learning_rate": 1.7519584528102173e-06, "loss": 0.4600415825843811, "step": 10241 }, { "epoch": 1.6369375849116918, "grad_norm": 1.5884894763945907, "learning_rate": 1.7504645326594106e-06, "loss": 0.5564258098602295, "step": 10242 }, { "epoch": 1.6370974186845682, "grad_norm": 1.6866372710247988, "learning_rate": 1.7489711886268713e-06, "loss": 0.5101696252822876, "step": 10243 }, { "epoch": 1.6372572524574442, "grad_norm": 1.6734133544841088, "learning_rate": 1.7474784208168871e-06, "loss": 0.5854451656341553, "step": 10244 }, { "epoch": 1.6374170862303203, "grad_norm": 1.9256605199100651, "learning_rate": 1.745986229333707e-06, "loss": 0.5750274658203125, "step": 10245 }, { "epoch": 1.6375769200031967, "grad_norm": 1.5683008687683793, "learning_rate": 1.7444946142815412e-06, "loss": 0.5672642588615417, "step": 10246 }, { "epoch": 1.637736753776073, "grad_norm": 1.6954915830978126, "learning_rate": 1.7430035757645546e-06, "loss": 0.5639285445213318, "step": 10247 }, { "epoch": 1.637896587548949, "grad_norm": 1.3703261054086497, "learning_rate": 1.7415131138868825e-06, "loss": 0.4181864261627197, "step": 10248 }, { "epoch": 1.6380564213218252, "grad_norm": 1.9414058567216907, "learning_rate": 1.7400232287526108e-06, "loss": 0.6369352340698242, "step": 10249 }, { "epoch": 1.6382162550947015, "grad_norm": 1.7192378237731136, "learning_rate": 1.738533920465788e-06, "loss": 0.5139350891113281, "step": 10250 }, { "epoch": 1.6383760888675778, "grad_norm": 1.5831335902046804, "learning_rate": 1.7370451891304208e-06, "loss": 0.5888731479644775, "step": 10251 }, { "epoch": 1.638535922640454, "grad_norm": 1.546193668739386, "learning_rate": 1.735557034850477e-06, "loss": 0.5504014492034912, "step": 10252 }, { "epoch": 1.63869575641333, "grad_norm": 1.5199987562195285, "learning_rate": 1.7340694577298856e-06, "loss": 0.6368851661682129, "step": 10253 }, { "epoch": 1.6388555901862063, "grad_norm": 1.6958776770382062, "learning_rate": 1.7325824578725337e-06, "loss": 0.5809284448623657, "step": 10254 }, { "epoch": 1.6390154239590826, "grad_norm": 1.5786138809079746, "learning_rate": 1.731096035382267e-06, "loss": 0.6388271450996399, "step": 10255 }, { "epoch": 1.6391752577319587, "grad_norm": 1.5442483078771991, "learning_rate": 1.7296101903628903e-06, "loss": 0.5820679068565369, "step": 10256 }, { "epoch": 1.6393350915048348, "grad_norm": 1.6733695698217146, "learning_rate": 1.7281249229181719e-06, "loss": 0.5508993864059448, "step": 10257 }, { "epoch": 1.6394949252777113, "grad_norm": 1.7291576553391508, "learning_rate": 1.7266402331518394e-06, "loss": 0.6882840394973755, "step": 10258 }, { "epoch": 1.6396547590505874, "grad_norm": 2.171981569195934, "learning_rate": 1.725156121167576e-06, "loss": 0.6457099914550781, "step": 10259 }, { "epoch": 1.6398145928234635, "grad_norm": 1.6660982215430997, "learning_rate": 1.7236725870690274e-06, "loss": 0.638207197189331, "step": 10260 }, { "epoch": 1.6399744265963399, "grad_norm": 1.4994621757361553, "learning_rate": 1.7221896309597985e-06, "loss": 0.5575367212295532, "step": 10261 }, { "epoch": 1.6401342603692162, "grad_norm": 1.658565710758125, "learning_rate": 1.720707252943451e-06, "loss": 0.4833422005176544, "step": 10262 }, { "epoch": 1.6402940941420923, "grad_norm": 1.5447547287486754, "learning_rate": 1.7192254531235132e-06, "loss": 0.4660557508468628, "step": 10263 }, { "epoch": 1.6404539279149684, "grad_norm": 1.486122861901313, "learning_rate": 1.7177442316034664e-06, "loss": 0.6176058053970337, "step": 10264 }, { "epoch": 1.6406137616878447, "grad_norm": 1.703385118138266, "learning_rate": 1.7162635884867551e-06, "loss": 0.5260400772094727, "step": 10265 }, { "epoch": 1.640773595460721, "grad_norm": 1.7902469582094782, "learning_rate": 1.7147835238767807e-06, "loss": 0.5212682485580444, "step": 10266 }, { "epoch": 1.640933429233597, "grad_norm": 1.530891078428288, "learning_rate": 1.7133040378769039e-06, "loss": 0.5726618766784668, "step": 10267 }, { "epoch": 1.6410932630064732, "grad_norm": 1.4672137271711647, "learning_rate": 1.711825130590451e-06, "loss": 0.3623868227005005, "step": 10268 }, { "epoch": 1.6412530967793495, "grad_norm": 1.6720506436490499, "learning_rate": 1.7103468021207015e-06, "loss": 0.6179578304290771, "step": 10269 }, { "epoch": 1.6414129305522258, "grad_norm": 1.8852409916726944, "learning_rate": 1.7088690525708973e-06, "loss": 0.5316182971000671, "step": 10270 }, { "epoch": 1.641572764325102, "grad_norm": 1.409840733440586, "learning_rate": 1.7073918820442358e-06, "loss": 0.5135617256164551, "step": 10271 }, { "epoch": 1.641732598097978, "grad_norm": 1.4671676074168916, "learning_rate": 1.7059152906438836e-06, "loss": 0.6354539394378662, "step": 10272 }, { "epoch": 1.6418924318708543, "grad_norm": 1.7159252023915246, "learning_rate": 1.7044392784729568e-06, "loss": 0.5132986307144165, "step": 10273 }, { "epoch": 1.6420522656437306, "grad_norm": 1.6977263469204291, "learning_rate": 1.7029638456345353e-06, "loss": 0.5710635185241699, "step": 10274 }, { "epoch": 1.6422120994166067, "grad_norm": 1.696024347813116, "learning_rate": 1.701488992231658e-06, "loss": 0.47216320037841797, "step": 10275 }, { "epoch": 1.6423719331894828, "grad_norm": 1.7628290722413424, "learning_rate": 1.70001471836732e-06, "loss": 0.5978369116783142, "step": 10276 }, { "epoch": 1.6425317669623591, "grad_norm": 1.532184951028728, "learning_rate": 1.6985410241444844e-06, "loss": 0.5608137845993042, "step": 10277 }, { "epoch": 1.6426916007352355, "grad_norm": 1.3588278859284377, "learning_rate": 1.6970679096660692e-06, "loss": 0.46539390087127686, "step": 10278 }, { "epoch": 1.6428514345081116, "grad_norm": 1.5877821673654577, "learning_rate": 1.6955953750349485e-06, "loss": 0.5057376623153687, "step": 10279 }, { "epoch": 1.6430112682809876, "grad_norm": 2.037134663281991, "learning_rate": 1.6941234203539614e-06, "loss": 0.5882127285003662, "step": 10280 }, { "epoch": 1.643171102053864, "grad_norm": 1.6342253570104641, "learning_rate": 1.692652045725901e-06, "loss": 0.44389039278030396, "step": 10281 }, { "epoch": 1.6433309358267403, "grad_norm": 1.395537425033439, "learning_rate": 1.691181251253523e-06, "loss": 0.5360576510429382, "step": 10282 }, { "epoch": 1.6434907695996164, "grad_norm": 1.6260035586836672, "learning_rate": 1.6897110370395452e-06, "loss": 0.5960166454315186, "step": 10283 }, { "epoch": 1.6436506033724925, "grad_norm": 1.6099298431813205, "learning_rate": 1.688241403186641e-06, "loss": 0.6183191537857056, "step": 10284 }, { "epoch": 1.6438104371453688, "grad_norm": 1.5548289192428935, "learning_rate": 1.6867723497974454e-06, "loss": 0.4306783676147461, "step": 10285 }, { "epoch": 1.643970270918245, "grad_norm": 1.5292204602158153, "learning_rate": 1.6853038769745466e-06, "loss": 0.5042925477027893, "step": 10286 }, { "epoch": 1.6441301046911212, "grad_norm": 1.6877766588071281, "learning_rate": 1.6838359848205055e-06, "loss": 0.5936790704727173, "step": 10287 }, { "epoch": 1.6442899384639973, "grad_norm": 1.6546066392085592, "learning_rate": 1.6823686734378298e-06, "loss": 0.6176592111587524, "step": 10288 }, { "epoch": 1.6444497722368736, "grad_norm": 1.5939078986733326, "learning_rate": 1.680901942928992e-06, "loss": 0.4246254861354828, "step": 10289 }, { "epoch": 1.64460960600975, "grad_norm": 1.74674763184029, "learning_rate": 1.6794357933964234e-06, "loss": 0.5271140933036804, "step": 10290 }, { "epoch": 1.644769439782626, "grad_norm": 1.5918323945356978, "learning_rate": 1.6779702249425122e-06, "loss": 0.7086026072502136, "step": 10291 }, { "epoch": 1.6449292735555021, "grad_norm": 1.5376396142341386, "learning_rate": 1.6765052376696134e-06, "loss": 0.5685639381408691, "step": 10292 }, { "epoch": 1.6450891073283787, "grad_norm": 1.6591613278739081, "learning_rate": 1.6750408316800337e-06, "loss": 0.5999614000320435, "step": 10293 }, { "epoch": 1.6452489411012547, "grad_norm": 1.5303733087285307, "learning_rate": 1.6735770070760427e-06, "loss": 0.4835222363471985, "step": 10294 }, { "epoch": 1.6454087748741308, "grad_norm": 1.542392320118025, "learning_rate": 1.6721137639598684e-06, "loss": 0.5494489669799805, "step": 10295 }, { "epoch": 1.6455686086470072, "grad_norm": 1.7127809259513878, "learning_rate": 1.670651102433697e-06, "loss": 0.5996339321136475, "step": 10296 }, { "epoch": 1.6457284424198835, "grad_norm": 1.5991956638933393, "learning_rate": 1.6691890225996754e-06, "loss": 0.592807412147522, "step": 10297 }, { "epoch": 1.6458882761927596, "grad_norm": 1.4576795220651257, "learning_rate": 1.6677275245599155e-06, "loss": 0.545479416847229, "step": 10298 }, { "epoch": 1.6460481099656357, "grad_norm": 1.5831045377579596, "learning_rate": 1.6662666084164791e-06, "loss": 0.5228767395019531, "step": 10299 }, { "epoch": 1.646207943738512, "grad_norm": 1.482294655367396, "learning_rate": 1.664806274271391e-06, "loss": 0.4551173448562622, "step": 10300 }, { "epoch": 1.6463677775113883, "grad_norm": 1.6518979844129122, "learning_rate": 1.6633465222266377e-06, "loss": 0.5703585743904114, "step": 10301 }, { "epoch": 1.6465276112842644, "grad_norm": 1.545134203893012, "learning_rate": 1.6618873523841582e-06, "loss": 0.5136078596115112, "step": 10302 }, { "epoch": 1.6466874450571405, "grad_norm": 1.4867334292393293, "learning_rate": 1.6604287648458627e-06, "loss": 0.5631700754165649, "step": 10303 }, { "epoch": 1.6468472788300168, "grad_norm": 1.3152582851075199, "learning_rate": 1.6589707597136096e-06, "loss": 0.3806648552417755, "step": 10304 }, { "epoch": 1.6470071126028931, "grad_norm": 1.5728886282445922, "learning_rate": 1.6575133370892226e-06, "loss": 0.4794714152812958, "step": 10305 }, { "epoch": 1.6471669463757692, "grad_norm": 1.8229068276698526, "learning_rate": 1.6560564970744798e-06, "loss": 0.6635535955429077, "step": 10306 }, { "epoch": 1.6473267801486453, "grad_norm": 1.459276972979647, "learning_rate": 1.6546002397711247e-06, "loss": 0.5214073657989502, "step": 10307 }, { "epoch": 1.6474866139215216, "grad_norm": 1.582570636400161, "learning_rate": 1.6531445652808576e-06, "loss": 0.5098367929458618, "step": 10308 }, { "epoch": 1.647646447694398, "grad_norm": 1.8173534099482282, "learning_rate": 1.6516894737053358e-06, "loss": 0.48858582973480225, "step": 10309 }, { "epoch": 1.647806281467274, "grad_norm": 1.4390628505888492, "learning_rate": 1.650234965146179e-06, "loss": 0.5280856490135193, "step": 10310 }, { "epoch": 1.6479661152401501, "grad_norm": 1.4562502304073064, "learning_rate": 1.648781039704962e-06, "loss": 0.4567068815231323, "step": 10311 }, { "epoch": 1.6481259490130264, "grad_norm": 1.503977750283582, "learning_rate": 1.6473276974832254e-06, "loss": 0.553852915763855, "step": 10312 }, { "epoch": 1.6482857827859028, "grad_norm": 1.6005484065094318, "learning_rate": 1.6458749385824647e-06, "loss": 0.6192032098770142, "step": 10313 }, { "epoch": 1.6484456165587789, "grad_norm": 1.6298526117985022, "learning_rate": 1.6444227631041342e-06, "loss": 0.546985387802124, "step": 10314 }, { "epoch": 1.648605450331655, "grad_norm": 1.5513544617378963, "learning_rate": 1.6429711711496499e-06, "loss": 0.5026179552078247, "step": 10315 }, { "epoch": 1.6487652841045313, "grad_norm": 1.3746946960279163, "learning_rate": 1.6415201628203835e-06, "loss": 0.5773435831069946, "step": 10316 }, { "epoch": 1.6489251178774076, "grad_norm": 1.6812645240694393, "learning_rate": 1.640069738217669e-06, "loss": 0.3923805356025696, "step": 10317 }, { "epoch": 1.6490849516502837, "grad_norm": 1.6409552695837168, "learning_rate": 1.6386198974428025e-06, "loss": 0.517410397529602, "step": 10318 }, { "epoch": 1.6492447854231598, "grad_norm": 1.5353485757246046, "learning_rate": 1.6371706405970333e-06, "loss": 0.5187458992004395, "step": 10319 }, { "epoch": 1.649404619196036, "grad_norm": 1.6558484907908253, "learning_rate": 1.6357219677815717e-06, "loss": 0.4955265522003174, "step": 10320 }, { "epoch": 1.6495644529689124, "grad_norm": 1.6095979486942438, "learning_rate": 1.6342738790975864e-06, "loss": 0.49840444326400757, "step": 10321 }, { "epoch": 1.6497242867417885, "grad_norm": 1.6150602641524712, "learning_rate": 1.6328263746462103e-06, "loss": 0.48751401901245117, "step": 10322 }, { "epoch": 1.6498841205146646, "grad_norm": 1.6304952636549124, "learning_rate": 1.6313794545285312e-06, "loss": 0.582468569278717, "step": 10323 }, { "epoch": 1.650043954287541, "grad_norm": 1.4471403010721282, "learning_rate": 1.6299331188455959e-06, "loss": 0.5982876420021057, "step": 10324 }, { "epoch": 1.6502037880604172, "grad_norm": 1.5929908080131923, "learning_rate": 1.6284873676984115e-06, "loss": 0.4558939039707184, "step": 10325 }, { "epoch": 1.6503636218332933, "grad_norm": 1.6887887123545329, "learning_rate": 1.6270422011879417e-06, "loss": 0.6460006833076477, "step": 10326 }, { "epoch": 1.6505234556061694, "grad_norm": 1.514799018794092, "learning_rate": 1.6255976194151168e-06, "loss": 0.5516673922538757, "step": 10327 }, { "epoch": 1.6506832893790457, "grad_norm": 1.582683342756914, "learning_rate": 1.6241536224808186e-06, "loss": 0.5014259219169617, "step": 10328 }, { "epoch": 1.650843123151922, "grad_norm": 1.6816604858577708, "learning_rate": 1.6227102104858904e-06, "loss": 0.6152321100234985, "step": 10329 }, { "epoch": 1.6510029569247981, "grad_norm": 1.434139776852234, "learning_rate": 1.6212673835311354e-06, "loss": 0.42281967401504517, "step": 10330 }, { "epoch": 1.6511627906976745, "grad_norm": 2.3802075506716776, "learning_rate": 1.6198251417173138e-06, "loss": 0.6513347029685974, "step": 10331 }, { "epoch": 1.6513226244705508, "grad_norm": 1.8373559655491254, "learning_rate": 1.6183834851451496e-06, "loss": 0.5405888557434082, "step": 10332 }, { "epoch": 1.6514824582434269, "grad_norm": 1.3910901410788121, "learning_rate": 1.6169424139153234e-06, "loss": 0.4220278859138489, "step": 10333 }, { "epoch": 1.651642292016303, "grad_norm": 1.4879574809927631, "learning_rate": 1.6155019281284712e-06, "loss": 0.5492222309112549, "step": 10334 }, { "epoch": 1.6518021257891793, "grad_norm": 1.426770732292456, "learning_rate": 1.6140620278851915e-06, "loss": 0.572598934173584, "step": 10335 }, { "epoch": 1.6519619595620556, "grad_norm": 1.3813904548121865, "learning_rate": 1.612622713286045e-06, "loss": 0.4481303095817566, "step": 10336 }, { "epoch": 1.6521217933349317, "grad_norm": 1.5459454796363736, "learning_rate": 1.6111839844315447e-06, "loss": 0.6176250576972961, "step": 10337 }, { "epoch": 1.6522816271078078, "grad_norm": 1.4806062257883343, "learning_rate": 1.6097458414221711e-06, "loss": 0.5397316813468933, "step": 10338 }, { "epoch": 1.652441460880684, "grad_norm": 1.4292464448416191, "learning_rate": 1.6083082843583552e-06, "loss": 0.5078722238540649, "step": 10339 }, { "epoch": 1.6526012946535604, "grad_norm": 1.759758324255394, "learning_rate": 1.6068713133404922e-06, "loss": 0.6403666138648987, "step": 10340 }, { "epoch": 1.6527611284264365, "grad_norm": 1.7567382561783218, "learning_rate": 1.605434928468933e-06, "loss": 0.6140843033790588, "step": 10341 }, { "epoch": 1.6529209621993126, "grad_norm": 2.419335019752772, "learning_rate": 1.6039991298439927e-06, "loss": 0.5486142635345459, "step": 10342 }, { "epoch": 1.653080795972189, "grad_norm": 1.349146143457573, "learning_rate": 1.6025639175659412e-06, "loss": 0.4860588014125824, "step": 10343 }, { "epoch": 1.6532406297450652, "grad_norm": 1.5004129480570352, "learning_rate": 1.6011292917350097e-06, "loss": 0.5716277360916138, "step": 10344 }, { "epoch": 1.6534004635179413, "grad_norm": 1.418083904937703, "learning_rate": 1.5996952524513843e-06, "loss": 0.4846087694168091, "step": 10345 }, { "epoch": 1.6535602972908174, "grad_norm": 1.386448061382809, "learning_rate": 1.598261799815214e-06, "loss": 0.45672792196273804, "step": 10346 }, { "epoch": 1.6537201310636938, "grad_norm": 1.4839281872591823, "learning_rate": 1.5968289339266084e-06, "loss": 0.4636354446411133, "step": 10347 }, { "epoch": 1.65387996483657, "grad_norm": 1.4887268754477387, "learning_rate": 1.5953966548856326e-06, "loss": 0.5137008428573608, "step": 10348 }, { "epoch": 1.6540397986094462, "grad_norm": 1.4953377879065157, "learning_rate": 1.5939649627923126e-06, "loss": 0.5640698671340942, "step": 10349 }, { "epoch": 1.6541996323823223, "grad_norm": 1.4179319301852384, "learning_rate": 1.5925338577466287e-06, "loss": 0.41534703969955444, "step": 10350 }, { "epoch": 1.6543594661551986, "grad_norm": 1.8152823023630944, "learning_rate": 1.59110333984853e-06, "loss": 0.49632060527801514, "step": 10351 }, { "epoch": 1.654519299928075, "grad_norm": 1.6201110557185066, "learning_rate": 1.589673409197916e-06, "loss": 0.5074421763420105, "step": 10352 }, { "epoch": 1.654679133700951, "grad_norm": 1.8448004955487827, "learning_rate": 1.5882440658946474e-06, "loss": 0.55217444896698, "step": 10353 }, { "epoch": 1.654838967473827, "grad_norm": 1.577774471253038, "learning_rate": 1.5868153100385464e-06, "loss": 0.5607395172119141, "step": 10354 }, { "epoch": 1.6549988012467034, "grad_norm": 1.7172511039042018, "learning_rate": 1.5853871417293876e-06, "loss": 0.5075214505195618, "step": 10355 }, { "epoch": 1.6551586350195797, "grad_norm": 1.513467413657453, "learning_rate": 1.5839595610669145e-06, "loss": 0.5621088147163391, "step": 10356 }, { "epoch": 1.6553184687924558, "grad_norm": 1.6637988398482784, "learning_rate": 1.5825325681508209e-06, "loss": 0.5653643608093262, "step": 10357 }, { "epoch": 1.655478302565332, "grad_norm": 1.7177075053652193, "learning_rate": 1.5811061630807667e-06, "loss": 0.6640826463699341, "step": 10358 }, { "epoch": 1.6556381363382082, "grad_norm": 1.320568109331954, "learning_rate": 1.5796803459563648e-06, "loss": 0.40096133947372437, "step": 10359 }, { "epoch": 1.6557979701110845, "grad_norm": 1.5394603107214442, "learning_rate": 1.5782551168771886e-06, "loss": 0.4519657492637634, "step": 10360 }, { "epoch": 1.6559578038839606, "grad_norm": 1.7649638814951916, "learning_rate": 1.5768304759427688e-06, "loss": 0.5566846132278442, "step": 10361 }, { "epoch": 1.6561176376568367, "grad_norm": 1.7162258654832192, "learning_rate": 1.5754064232526023e-06, "loss": 0.5581387281417847, "step": 10362 }, { "epoch": 1.656277471429713, "grad_norm": 1.8415301507241881, "learning_rate": 1.5739829589061384e-06, "loss": 0.4449031352996826, "step": 10363 }, { "epoch": 1.6564373052025894, "grad_norm": 1.5238387669916644, "learning_rate": 1.5725600830027854e-06, "loss": 0.649450421333313, "step": 10364 }, { "epoch": 1.6565971389754655, "grad_norm": 1.6368666821284052, "learning_rate": 1.5711377956419115e-06, "loss": 0.5194652080535889, "step": 10365 }, { "epoch": 1.6567569727483418, "grad_norm": 1.4977625189827732, "learning_rate": 1.5697160969228442e-06, "loss": 0.5898244380950928, "step": 10366 }, { "epoch": 1.656916806521218, "grad_norm": 1.586180672180733, "learning_rate": 1.5682949869448715e-06, "loss": 0.5428628921508789, "step": 10367 }, { "epoch": 1.6570766402940942, "grad_norm": 1.7748798872118063, "learning_rate": 1.56687446580724e-06, "loss": 0.6341651678085327, "step": 10368 }, { "epoch": 1.6572364740669703, "grad_norm": 1.75949585999509, "learning_rate": 1.5654545336091497e-06, "loss": 0.5324609875679016, "step": 10369 }, { "epoch": 1.6573963078398466, "grad_norm": 1.339111527579912, "learning_rate": 1.5640351904497653e-06, "loss": 0.4914647042751312, "step": 10370 }, { "epoch": 1.657556141612723, "grad_norm": 1.3702950355831194, "learning_rate": 1.5626164364282103e-06, "loss": 0.4239102005958557, "step": 10371 }, { "epoch": 1.657715975385599, "grad_norm": 1.5026173307777182, "learning_rate": 1.5611982716435648e-06, "loss": 0.6512537002563477, "step": 10372 }, { "epoch": 1.657875809158475, "grad_norm": 1.5464308928872808, "learning_rate": 1.5597806961948681e-06, "loss": 0.4268460273742676, "step": 10373 }, { "epoch": 1.6580356429313514, "grad_norm": 1.736771624990034, "learning_rate": 1.5583637101811177e-06, "loss": 0.4901607632637024, "step": 10374 }, { "epoch": 1.6581954767042277, "grad_norm": 1.6049603323887247, "learning_rate": 1.5569473137012692e-06, "loss": 0.5347591638565063, "step": 10375 }, { "epoch": 1.6583553104771038, "grad_norm": 1.6744897038856854, "learning_rate": 1.5555315068542432e-06, "loss": 0.5574221611022949, "step": 10376 }, { "epoch": 1.65851514424998, "grad_norm": 1.3832600063432348, "learning_rate": 1.5541162897389107e-06, "loss": 0.46593451499938965, "step": 10377 }, { "epoch": 1.6586749780228562, "grad_norm": 1.9046019567552401, "learning_rate": 1.5527016624541103e-06, "loss": 0.5310299396514893, "step": 10378 }, { "epoch": 1.6588348117957326, "grad_norm": 1.6570141562419034, "learning_rate": 1.5512876250986308e-06, "loss": 0.4720522463321686, "step": 10379 }, { "epoch": 1.6589946455686087, "grad_norm": 1.6071246880242582, "learning_rate": 1.5498741777712235e-06, "loss": 0.6190582513809204, "step": 10380 }, { "epoch": 1.6591544793414847, "grad_norm": 1.7179340640128893, "learning_rate": 1.5484613205705978e-06, "loss": 0.6240072250366211, "step": 10381 }, { "epoch": 1.659314313114361, "grad_norm": 1.455527780308928, "learning_rate": 1.5470490535954264e-06, "loss": 0.5692073106765747, "step": 10382 }, { "epoch": 1.6594741468872374, "grad_norm": 1.3153814897679472, "learning_rate": 1.5456373769443345e-06, "loss": 0.4468618333339691, "step": 10383 }, { "epoch": 1.6596339806601135, "grad_norm": 1.3577103902807295, "learning_rate": 1.544226290715909e-06, "loss": 0.4503334164619446, "step": 10384 }, { "epoch": 1.6597938144329896, "grad_norm": 1.6003306026799191, "learning_rate": 1.542815795008692e-06, "loss": 0.5324071645736694, "step": 10385 }, { "epoch": 1.6599536482058659, "grad_norm": 1.7904578042746622, "learning_rate": 1.5414058899211926e-06, "loss": 0.6001888513565063, "step": 10386 }, { "epoch": 1.6601134819787422, "grad_norm": 1.6387428676602052, "learning_rate": 1.539996575551872e-06, "loss": 0.5741066932678223, "step": 10387 }, { "epoch": 1.6602733157516183, "grad_norm": 1.8602756812701324, "learning_rate": 1.5385878519991514e-06, "loss": 0.605177640914917, "step": 10388 }, { "epoch": 1.6604331495244944, "grad_norm": 1.6199135893475882, "learning_rate": 1.5371797193614113e-06, "loss": 0.4666547179222107, "step": 10389 }, { "epoch": 1.6605929832973707, "grad_norm": 1.5555257120652672, "learning_rate": 1.535772177736986e-06, "loss": 0.5840760469436646, "step": 10390 }, { "epoch": 1.660752817070247, "grad_norm": 1.4512580967597029, "learning_rate": 1.5343652272241815e-06, "loss": 0.49632084369659424, "step": 10391 }, { "epoch": 1.6609126508431231, "grad_norm": 1.6213430831804385, "learning_rate": 1.5329588679212493e-06, "loss": 0.5721763372421265, "step": 10392 }, { "epoch": 1.6610724846159992, "grad_norm": 1.7341525585409154, "learning_rate": 1.531553099926405e-06, "loss": 0.5706750154495239, "step": 10393 }, { "epoch": 1.6612323183888755, "grad_norm": 1.5578514987835153, "learning_rate": 1.5301479233378235e-06, "loss": 0.5798880457878113, "step": 10394 }, { "epoch": 1.6613921521617518, "grad_norm": 1.6387163244738292, "learning_rate": 1.5287433382536342e-06, "loss": 0.6655653715133667, "step": 10395 }, { "epoch": 1.661551985934628, "grad_norm": 1.6101498048702787, "learning_rate": 1.527339344771933e-06, "loss": 0.48181936144828796, "step": 10396 }, { "epoch": 1.661711819707504, "grad_norm": 1.513782529304722, "learning_rate": 1.525935942990765e-06, "loss": 0.5010199546813965, "step": 10397 }, { "epoch": 1.6618716534803804, "grad_norm": 1.6069882160354423, "learning_rate": 1.5245331330081436e-06, "loss": 0.5822975635528564, "step": 10398 }, { "epoch": 1.6620314872532567, "grad_norm": 1.3686544025951806, "learning_rate": 1.523130914922034e-06, "loss": 0.4281354546546936, "step": 10399 }, { "epoch": 1.6621913210261328, "grad_norm": 2.125978960814683, "learning_rate": 1.5217292888303603e-06, "loss": 0.7744104862213135, "step": 10400 }, { "epoch": 1.662351154799009, "grad_norm": 1.466206081968375, "learning_rate": 1.5203282548310106e-06, "loss": 0.49208706617355347, "step": 10401 }, { "epoch": 1.6625109885718854, "grad_norm": 1.4656809988362334, "learning_rate": 1.5189278130218265e-06, "loss": 0.41099268198013306, "step": 10402 }, { "epoch": 1.6626708223447615, "grad_norm": 1.5876753871404012, "learning_rate": 1.5175279635006102e-06, "loss": 0.473871111869812, "step": 10403 }, { "epoch": 1.6628306561176376, "grad_norm": 1.751410915902842, "learning_rate": 1.5161287063651209e-06, "loss": 0.5219014883041382, "step": 10404 }, { "epoch": 1.662990489890514, "grad_norm": 1.4863593900140055, "learning_rate": 1.514730041713076e-06, "loss": 0.5429205298423767, "step": 10405 }, { "epoch": 1.6631503236633902, "grad_norm": 1.4375847935070052, "learning_rate": 1.513331969642159e-06, "loss": 0.4660384953022003, "step": 10406 }, { "epoch": 1.6633101574362663, "grad_norm": 1.9261479164249442, "learning_rate": 1.5119344902500022e-06, "loss": 0.6050187349319458, "step": 10407 }, { "epoch": 1.6634699912091424, "grad_norm": 1.445545240870345, "learning_rate": 1.5105376036342023e-06, "loss": 0.6095868349075317, "step": 10408 }, { "epoch": 1.6636298249820187, "grad_norm": 1.492015014591466, "learning_rate": 1.5091413098923125e-06, "loss": 0.5589683055877686, "step": 10409 }, { "epoch": 1.663789658754895, "grad_norm": 1.730384636410036, "learning_rate": 1.5077456091218412e-06, "loss": 0.649179220199585, "step": 10410 }, { "epoch": 1.6639494925277711, "grad_norm": 1.7721270997250753, "learning_rate": 1.5063505014202651e-06, "loss": 0.6188567280769348, "step": 10411 }, { "epoch": 1.6641093263006472, "grad_norm": 1.5359471139777763, "learning_rate": 1.5049559868850116e-06, "loss": 0.4666043221950531, "step": 10412 }, { "epoch": 1.6642691600735235, "grad_norm": 1.486059455105017, "learning_rate": 1.5035620656134674e-06, "loss": 0.5294106006622314, "step": 10413 }, { "epoch": 1.6644289938463999, "grad_norm": 1.599807203068883, "learning_rate": 1.502168737702978e-06, "loss": 0.49447745084762573, "step": 10414 }, { "epoch": 1.664588827619276, "grad_norm": 1.3479191753894884, "learning_rate": 1.5007760032508534e-06, "loss": 0.3983916640281677, "step": 10415 }, { "epoch": 1.664748661392152, "grad_norm": 1.49367924811468, "learning_rate": 1.4993838623543534e-06, "loss": 0.5490773320198059, "step": 10416 }, { "epoch": 1.6649084951650284, "grad_norm": 1.5642373954804618, "learning_rate": 1.497992315110698e-06, "loss": 0.5159741640090942, "step": 10417 }, { "epoch": 1.6650683289379047, "grad_norm": 1.7776774106834572, "learning_rate": 1.4966013616170728e-06, "loss": 0.6348685026168823, "step": 10418 }, { "epoch": 1.6652281627107808, "grad_norm": 1.5921569018252058, "learning_rate": 1.4952110019706156e-06, "loss": 0.5433463454246521, "step": 10419 }, { "epoch": 1.6653879964836569, "grad_norm": 1.5061154728844637, "learning_rate": 1.4938212362684213e-06, "loss": 0.5132262110710144, "step": 10420 }, { "epoch": 1.6655478302565332, "grad_norm": 1.6276400274859708, "learning_rate": 1.4924320646075508e-06, "loss": 0.5505768656730652, "step": 10421 }, { "epoch": 1.6657076640294095, "grad_norm": 1.6074786153093736, "learning_rate": 1.4910434870850166e-06, "loss": 0.5145381689071655, "step": 10422 }, { "epoch": 1.6658674978022856, "grad_norm": 1.6191023045362412, "learning_rate": 1.489655503797791e-06, "loss": 0.6065109968185425, "step": 10423 }, { "epoch": 1.6660273315751617, "grad_norm": 1.5623704758548016, "learning_rate": 1.488268114842808e-06, "loss": 0.5335051417350769, "step": 10424 }, { "epoch": 1.666187165348038, "grad_norm": 1.6341739354324534, "learning_rate": 1.4868813203169541e-06, "loss": 0.6344558000564575, "step": 10425 }, { "epoch": 1.6663469991209143, "grad_norm": 1.5763145281579125, "learning_rate": 1.485495120317083e-06, "loss": 0.579367995262146, "step": 10426 }, { "epoch": 1.6665068328937904, "grad_norm": 1.7021679668007839, "learning_rate": 1.4841095149399998e-06, "loss": 0.5073459148406982, "step": 10427 }, { "epoch": 1.6666666666666665, "grad_norm": 1.8275271949504264, "learning_rate": 1.48272450428247e-06, "loss": 0.5499062538146973, "step": 10428 }, { "epoch": 1.6668265004395428, "grad_norm": 1.5297628964091021, "learning_rate": 1.4813400884412189e-06, "loss": 0.5916191339492798, "step": 10429 }, { "epoch": 1.6669863342124192, "grad_norm": 1.3868342957020228, "learning_rate": 1.4799562675129253e-06, "loss": 0.5967046618461609, "step": 10430 }, { "epoch": 1.6671461679852952, "grad_norm": 1.5205430024798157, "learning_rate": 1.4785730415942356e-06, "loss": 0.5150541663169861, "step": 10431 }, { "epoch": 1.6673060017581713, "grad_norm": 1.6706668770325772, "learning_rate": 1.477190410781747e-06, "loss": 0.541822612285614, "step": 10432 }, { "epoch": 1.6674658355310477, "grad_norm": 1.4613687595356981, "learning_rate": 1.4758083751720187e-06, "loss": 0.544575572013855, "step": 10433 }, { "epoch": 1.667625669303924, "grad_norm": 1.6570606708085918, "learning_rate": 1.4744269348615624e-06, "loss": 0.5511144399642944, "step": 10434 }, { "epoch": 1.6677855030768, "grad_norm": 2.324766987207282, "learning_rate": 1.4730460899468601e-06, "loss": 0.4918724298477173, "step": 10435 }, { "epoch": 1.6679453368496764, "grad_norm": 1.6457546083061727, "learning_rate": 1.4716658405243411e-06, "loss": 0.5647929906845093, "step": 10436 }, { "epoch": 1.6681051706225527, "grad_norm": 1.6113609070839892, "learning_rate": 1.4702861866903962e-06, "loss": 0.5659569501876831, "step": 10437 }, { "epoch": 1.6682650043954288, "grad_norm": 1.7398034250644347, "learning_rate": 1.468907128541378e-06, "loss": 0.5242612361907959, "step": 10438 }, { "epoch": 1.668424838168305, "grad_norm": 1.5610620879582464, "learning_rate": 1.4675286661735955e-06, "loss": 0.6245757341384888, "step": 10439 }, { "epoch": 1.6685846719411812, "grad_norm": 3.090058935041092, "learning_rate": 1.4661507996833114e-06, "loss": 0.6124241352081299, "step": 10440 }, { "epoch": 1.6687445057140575, "grad_norm": 1.6004715887966872, "learning_rate": 1.4647735291667564e-06, "loss": 0.484146386384964, "step": 10441 }, { "epoch": 1.6689043394869336, "grad_norm": 1.5918695609342017, "learning_rate": 1.4633968547201104e-06, "loss": 0.554603099822998, "step": 10442 }, { "epoch": 1.6690641732598097, "grad_norm": 1.3857117688599465, "learning_rate": 1.4620207764395177e-06, "loss": 0.44484224915504456, "step": 10443 }, { "epoch": 1.669224007032686, "grad_norm": 1.6230512212131918, "learning_rate": 1.460645294421078e-06, "loss": 0.6306934356689453, "step": 10444 }, { "epoch": 1.6693838408055623, "grad_norm": 1.610271005630062, "learning_rate": 1.4592704087608467e-06, "loss": 0.4960832893848419, "step": 10445 }, { "epoch": 1.6695436745784384, "grad_norm": 1.6819721312484146, "learning_rate": 1.4578961195548469e-06, "loss": 0.6135746240615845, "step": 10446 }, { "epoch": 1.6697035083513145, "grad_norm": 1.8084150049634513, "learning_rate": 1.4565224268990507e-06, "loss": 0.5931421518325806, "step": 10447 }, { "epoch": 1.6698633421241909, "grad_norm": 1.8284486650533236, "learning_rate": 1.4551493308893927e-06, "loss": 0.5423244833946228, "step": 10448 }, { "epoch": 1.6700231758970672, "grad_norm": 1.8075728946505312, "learning_rate": 1.4537768316217614e-06, "loss": 0.47124865651130676, "step": 10449 }, { "epoch": 1.6701830096699433, "grad_norm": 1.7333625969694022, "learning_rate": 1.452404929192014e-06, "loss": 0.6785787343978882, "step": 10450 }, { "epoch": 1.6703428434428194, "grad_norm": 1.523765625667307, "learning_rate": 1.4510336236959554e-06, "loss": 0.4683673083782196, "step": 10451 }, { "epoch": 1.6705026772156957, "grad_norm": 1.7927648679692954, "learning_rate": 1.449662915229354e-06, "loss": 0.6086465120315552, "step": 10452 }, { "epoch": 1.670662510988572, "grad_norm": 1.7467187201963603, "learning_rate": 1.448292803887934e-06, "loss": 0.5968157052993774, "step": 10453 }, { "epoch": 1.670822344761448, "grad_norm": 1.603014777955207, "learning_rate": 1.4469232897673769e-06, "loss": 0.5432277917861938, "step": 10454 }, { "epoch": 1.6709821785343242, "grad_norm": 1.6260630744666469, "learning_rate": 1.4455543729633291e-06, "loss": 0.540361762046814, "step": 10455 }, { "epoch": 1.6711420123072005, "grad_norm": 1.449089308091235, "learning_rate": 1.4441860535713902e-06, "loss": 0.47704339027404785, "step": 10456 }, { "epoch": 1.6713018460800768, "grad_norm": 1.791892798144833, "learning_rate": 1.4428183316871158e-06, "loss": 0.4735126495361328, "step": 10457 }, { "epoch": 1.671461679852953, "grad_norm": 1.7483083652133793, "learning_rate": 1.4414512074060261e-06, "loss": 0.6555209159851074, "step": 10458 }, { "epoch": 1.671621513625829, "grad_norm": 1.665048118043635, "learning_rate": 1.4400846808235946e-06, "loss": 0.6091260313987732, "step": 10459 }, { "epoch": 1.6717813473987053, "grad_norm": 1.624033689331309, "learning_rate": 1.438718752035253e-06, "loss": 0.49586135149002075, "step": 10460 }, { "epoch": 1.6719411811715816, "grad_norm": 1.6075372675644093, "learning_rate": 1.437353421136396e-06, "loss": 0.5994814038276672, "step": 10461 }, { "epoch": 1.6721010149444577, "grad_norm": 1.813749565513171, "learning_rate": 1.4359886882223728e-06, "loss": 0.5939407348632812, "step": 10462 }, { "epoch": 1.6722608487173338, "grad_norm": 1.493990298491701, "learning_rate": 1.4346245533884917e-06, "loss": 0.486408531665802, "step": 10463 }, { "epoch": 1.6724206824902101, "grad_norm": 1.7586711004556885, "learning_rate": 1.4332610167300142e-06, "loss": 0.5253610610961914, "step": 10464 }, { "epoch": 1.6725805162630865, "grad_norm": 1.511761607018625, "learning_rate": 1.4318980783421732e-06, "loss": 0.6597140431404114, "step": 10465 }, { "epoch": 1.6727403500359626, "grad_norm": 1.7830103465637162, "learning_rate": 1.4305357383201458e-06, "loss": 0.600877046585083, "step": 10466 }, { "epoch": 1.6729001838088386, "grad_norm": 1.6237894825947998, "learning_rate": 1.4291739967590746e-06, "loss": 0.6061272621154785, "step": 10467 }, { "epoch": 1.673060017581715, "grad_norm": 1.4930312492077547, "learning_rate": 1.427812853754058e-06, "loss": 0.5938606262207031, "step": 10468 }, { "epoch": 1.6732198513545913, "grad_norm": 1.7019428794290454, "learning_rate": 1.4264523094001525e-06, "loss": 0.4367292821407318, "step": 10469 }, { "epoch": 1.6733796851274674, "grad_norm": 1.7743576474475065, "learning_rate": 1.4250923637923776e-06, "loss": 0.5386922359466553, "step": 10470 }, { "epoch": 1.6735395189003437, "grad_norm": 1.5007014136144607, "learning_rate": 1.4237330170257035e-06, "loss": 0.5150753259658813, "step": 10471 }, { "epoch": 1.67369935267322, "grad_norm": 1.5417368900567299, "learning_rate": 1.4223742691950637e-06, "loss": 0.6518837213516235, "step": 10472 }, { "epoch": 1.673859186446096, "grad_norm": 1.4795066063051787, "learning_rate": 1.4210161203953487e-06, "loss": 0.5073148012161255, "step": 10473 }, { "epoch": 1.6740190202189722, "grad_norm": 1.814921211379266, "learning_rate": 1.4196585707214027e-06, "loss": 0.6947974562644958, "step": 10474 }, { "epoch": 1.6741788539918485, "grad_norm": 1.7523871636086739, "learning_rate": 1.4183016202680378e-06, "loss": 0.643968939781189, "step": 10475 }, { "epoch": 1.6743386877647248, "grad_norm": 1.5279827936488375, "learning_rate": 1.4169452691300167e-06, "loss": 0.4560643434524536, "step": 10476 }, { "epoch": 1.674498521537601, "grad_norm": 1.422230111270504, "learning_rate": 1.4155895174020584e-06, "loss": 0.5028533339500427, "step": 10477 }, { "epoch": 1.674658355310477, "grad_norm": 1.539222233272894, "learning_rate": 1.4142343651788504e-06, "loss": 0.4688475728034973, "step": 10478 }, { "epoch": 1.6748181890833533, "grad_norm": 1.4818519830267542, "learning_rate": 1.4128798125550258e-06, "loss": 0.42511481046676636, "step": 10479 }, { "epoch": 1.6749780228562297, "grad_norm": 1.5758225659358465, "learning_rate": 1.4115258596251858e-06, "loss": 0.5871715545654297, "step": 10480 }, { "epoch": 1.6751378566291057, "grad_norm": 1.7598774328307256, "learning_rate": 1.4101725064838856e-06, "loss": 0.532053530216217, "step": 10481 }, { "epoch": 1.6752976904019818, "grad_norm": 1.9142611808080503, "learning_rate": 1.4088197532256353e-06, "loss": 0.7269004583358765, "step": 10482 }, { "epoch": 1.6754575241748582, "grad_norm": 1.8291672188846595, "learning_rate": 1.4074675999449095e-06, "loss": 0.4842722415924072, "step": 10483 }, { "epoch": 1.6756173579477345, "grad_norm": 1.6766991509149891, "learning_rate": 1.4061160467361345e-06, "loss": 0.6343002319335938, "step": 10484 }, { "epoch": 1.6757771917206106, "grad_norm": 1.5297780255216191, "learning_rate": 1.4047650936937018e-06, "loss": 0.4127277433872223, "step": 10485 }, { "epoch": 1.6759370254934867, "grad_norm": 1.5326630084140827, "learning_rate": 1.4034147409119547e-06, "loss": 0.4423428475856781, "step": 10486 }, { "epoch": 1.676096859266363, "grad_norm": 1.6434356344513459, "learning_rate": 1.4020649884851988e-06, "loss": 0.45896315574645996, "step": 10487 }, { "epoch": 1.6762566930392393, "grad_norm": 2.2468452437781004, "learning_rate": 1.4007158365076944e-06, "loss": 0.5797538757324219, "step": 10488 }, { "epoch": 1.6764165268121154, "grad_norm": 1.7746033371122634, "learning_rate": 1.3993672850736596e-06, "loss": 0.5278917551040649, "step": 10489 }, { "epoch": 1.6765763605849915, "grad_norm": 1.5009846642049232, "learning_rate": 1.3980193342772773e-06, "loss": 0.5235538482666016, "step": 10490 }, { "epoch": 1.6767361943578678, "grad_norm": 1.6633511697607974, "learning_rate": 1.3966719842126808e-06, "loss": 0.570606529712677, "step": 10491 }, { "epoch": 1.6768960281307441, "grad_norm": 1.5415650128828509, "learning_rate": 1.3953252349739653e-06, "loss": 0.4991043508052826, "step": 10492 }, { "epoch": 1.6770558619036202, "grad_norm": 1.5232157684515266, "learning_rate": 1.3939790866551817e-06, "loss": 0.4704033136367798, "step": 10493 }, { "epoch": 1.6772156956764963, "grad_norm": 1.494622568290325, "learning_rate": 1.3926335393503388e-06, "loss": 0.4839684069156647, "step": 10494 }, { "epoch": 1.6773755294493726, "grad_norm": 1.468695973445724, "learning_rate": 1.3912885931534093e-06, "loss": 0.4565015733242035, "step": 10495 }, { "epoch": 1.677535363222249, "grad_norm": 1.683974689357882, "learning_rate": 1.3899442481583158e-06, "loss": 0.5870994329452515, "step": 10496 }, { "epoch": 1.677695196995125, "grad_norm": 1.9508850732050529, "learning_rate": 1.3886005044589424e-06, "loss": 0.49027737975120544, "step": 10497 }, { "epoch": 1.6778550307680011, "grad_norm": 1.615439574142571, "learning_rate": 1.3872573621491358e-06, "loss": 0.5532083511352539, "step": 10498 }, { "epoch": 1.6780148645408774, "grad_norm": 1.9174345742495553, "learning_rate": 1.3859148213226903e-06, "loss": 0.589738130569458, "step": 10499 }, { "epoch": 1.6781746983137538, "grad_norm": 1.4853215544973855, "learning_rate": 1.3845728820733684e-06, "loss": 0.5025169849395752, "step": 10500 }, { "epoch": 1.6783345320866299, "grad_norm": 1.573158436635516, "learning_rate": 1.3832315444948862e-06, "loss": 0.6070291996002197, "step": 10501 }, { "epoch": 1.678494365859506, "grad_norm": 1.4976725496241972, "learning_rate": 1.3818908086809168e-06, "loss": 0.5327143669128418, "step": 10502 }, { "epoch": 1.6786541996323823, "grad_norm": 1.636102150676162, "learning_rate": 1.380550674725093e-06, "loss": 0.5419990420341492, "step": 10503 }, { "epoch": 1.6788140334052586, "grad_norm": 1.5184464008668335, "learning_rate": 1.3792111427210008e-06, "loss": 0.6849899888038635, "step": 10504 }, { "epoch": 1.6789738671781347, "grad_norm": 2.0012791460954, "learning_rate": 1.3778722127621958e-06, "loss": 0.6292468309402466, "step": 10505 }, { "epoch": 1.679133700951011, "grad_norm": 1.6405352186145665, "learning_rate": 1.3765338849421795e-06, "loss": 0.47932928800582886, "step": 10506 }, { "epoch": 1.6792935347238873, "grad_norm": 1.3594310321595167, "learning_rate": 1.3751961593544171e-06, "loss": 0.4676722288131714, "step": 10507 }, { "epoch": 1.6794533684967634, "grad_norm": 1.5302282447120379, "learning_rate": 1.37385903609233e-06, "loss": 0.41876518726348877, "step": 10508 }, { "epoch": 1.6796132022696395, "grad_norm": 1.6444004087148125, "learning_rate": 1.372522515249297e-06, "loss": 0.6091490983963013, "step": 10509 }, { "epoch": 1.6797730360425158, "grad_norm": 1.6672557520076152, "learning_rate": 1.3711865969186589e-06, "loss": 0.6527727246284485, "step": 10510 }, { "epoch": 1.6799328698153921, "grad_norm": 1.4645366813171958, "learning_rate": 1.3698512811937103e-06, "loss": 0.5061067342758179, "step": 10511 }, { "epoch": 1.6800927035882682, "grad_norm": 1.7001108651770998, "learning_rate": 1.3685165681677048e-06, "loss": 0.6145594120025635, "step": 10512 }, { "epoch": 1.6802525373611443, "grad_norm": 1.4507180262378918, "learning_rate": 1.367182457933851e-06, "loss": 0.637611985206604, "step": 10513 }, { "epoch": 1.6804123711340206, "grad_norm": 2.1610846274578916, "learning_rate": 1.3658489505853246e-06, "loss": 0.6485246419906616, "step": 10514 }, { "epoch": 1.680572204906897, "grad_norm": 1.7393690362611711, "learning_rate": 1.3645160462152495e-06, "loss": 0.49720633029937744, "step": 10515 }, { "epoch": 1.680732038679773, "grad_norm": 1.7833190737134164, "learning_rate": 1.3631837449167108e-06, "loss": 0.5895638465881348, "step": 10516 }, { "epoch": 1.6808918724526491, "grad_norm": 1.5636960535356998, "learning_rate": 1.36185204678275e-06, "loss": 0.6018344759941101, "step": 10517 }, { "epoch": 1.6810517062255255, "grad_norm": 1.6690650774668512, "learning_rate": 1.3605209519063723e-06, "loss": 0.6001436710357666, "step": 10518 }, { "epoch": 1.6812115399984018, "grad_norm": 1.5930618180570209, "learning_rate": 1.359190460380533e-06, "loss": 0.5620791912078857, "step": 10519 }, { "epoch": 1.6813713737712779, "grad_norm": 1.5998570325281465, "learning_rate": 1.3578605722981519e-06, "loss": 0.5114865303039551, "step": 10520 }, { "epoch": 1.681531207544154, "grad_norm": 1.6944392847245582, "learning_rate": 1.3565312877521019e-06, "loss": 0.5670911073684692, "step": 10521 }, { "epoch": 1.6816910413170303, "grad_norm": 1.5770990560955636, "learning_rate": 1.3552026068352176e-06, "loss": 0.5339769721031189, "step": 10522 }, { "epoch": 1.6818508750899066, "grad_norm": 1.6497839917535597, "learning_rate": 1.3538745296402867e-06, "loss": 0.5871946215629578, "step": 10523 }, { "epoch": 1.6820107088627827, "grad_norm": 1.7294044791649588, "learning_rate": 1.3525470562600563e-06, "loss": 0.5270009636878967, "step": 10524 }, { "epoch": 1.6821705426356588, "grad_norm": 1.4787427487140434, "learning_rate": 1.3512201867872354e-06, "loss": 0.5153957605361938, "step": 10525 }, { "epoch": 1.682330376408535, "grad_norm": 1.7547264026408658, "learning_rate": 1.349893921314489e-06, "loss": 0.477194607257843, "step": 10526 }, { "epoch": 1.6824902101814114, "grad_norm": 1.6551936081740117, "learning_rate": 1.3485682599344351e-06, "loss": 0.46270179748535156, "step": 10527 }, { "epoch": 1.6826500439542875, "grad_norm": 1.8239607875394837, "learning_rate": 1.347243202739653e-06, "loss": 0.5574489831924438, "step": 10528 }, { "epoch": 1.6828098777271636, "grad_norm": 1.6157498896106959, "learning_rate": 1.3459187498226844e-06, "loss": 0.5310214757919312, "step": 10529 }, { "epoch": 1.68296971150004, "grad_norm": 1.5820954775925795, "learning_rate": 1.3445949012760218e-06, "loss": 0.5537222623825073, "step": 10530 }, { "epoch": 1.6831295452729162, "grad_norm": 1.7058496091760331, "learning_rate": 1.3432716571921178e-06, "loss": 0.5247000455856323, "step": 10531 }, { "epoch": 1.6832893790457923, "grad_norm": 1.6202337633120778, "learning_rate": 1.3419490176633831e-06, "loss": 0.5657893419265747, "step": 10532 }, { "epoch": 1.6834492128186684, "grad_norm": 1.4539732997459374, "learning_rate": 1.340626982782185e-06, "loss": 0.4644683003425598, "step": 10533 }, { "epoch": 1.6836090465915448, "grad_norm": 1.4980057249896033, "learning_rate": 1.3393055526408527e-06, "loss": 0.5610225200653076, "step": 10534 }, { "epoch": 1.683768880364421, "grad_norm": 1.61623947251686, "learning_rate": 1.3379847273316682e-06, "loss": 0.4261540472507477, "step": 10535 }, { "epoch": 1.6839287141372972, "grad_norm": 1.8699420552317207, "learning_rate": 1.3366645069468742e-06, "loss": 0.5569952726364136, "step": 10536 }, { "epoch": 1.6840885479101733, "grad_norm": 1.5325947551904546, "learning_rate": 1.3353448915786692e-06, "loss": 0.5928554534912109, "step": 10537 }, { "epoch": 1.6842483816830496, "grad_norm": 1.7462575527455844, "learning_rate": 1.3340258813192086e-06, "loss": 0.6037962436676025, "step": 10538 }, { "epoch": 1.684408215455926, "grad_norm": 1.5616406667265577, "learning_rate": 1.3327074762606096e-06, "loss": 0.5757277011871338, "step": 10539 }, { "epoch": 1.684568049228802, "grad_norm": 1.7512138150050787, "learning_rate": 1.3313896764949486e-06, "loss": 0.5484060049057007, "step": 10540 }, { "epoch": 1.6847278830016783, "grad_norm": 1.4141812318405191, "learning_rate": 1.3300724821142509e-06, "loss": 0.5223333835601807, "step": 10541 }, { "epoch": 1.6848877167745546, "grad_norm": 1.462678234710973, "learning_rate": 1.328755893210507e-06, "loss": 0.5110280513763428, "step": 10542 }, { "epoch": 1.6850475505474307, "grad_norm": 1.4528364526449364, "learning_rate": 1.3274399098756597e-06, "loss": 0.5324446558952332, "step": 10543 }, { "epoch": 1.6852073843203068, "grad_norm": 1.7665486103644483, "learning_rate": 1.3261245322016169e-06, "loss": 0.573121190071106, "step": 10544 }, { "epoch": 1.6853672180931831, "grad_norm": 1.4829023566066053, "learning_rate": 1.324809760280238e-06, "loss": 0.3784528970718384, "step": 10545 }, { "epoch": 1.6855270518660594, "grad_norm": 1.4240358064752645, "learning_rate": 1.323495594203341e-06, "loss": 0.4877997636795044, "step": 10546 }, { "epoch": 1.6856868856389355, "grad_norm": 1.9624041917780757, "learning_rate": 1.3221820340627044e-06, "loss": 0.5049762725830078, "step": 10547 }, { "epoch": 1.6858467194118116, "grad_norm": 1.5069225763021066, "learning_rate": 1.32086907995006e-06, "loss": 0.536200761795044, "step": 10548 }, { "epoch": 1.686006553184688, "grad_norm": 1.5813512571504231, "learning_rate": 1.3195567319571035e-06, "loss": 0.6061148047447205, "step": 10549 }, { "epoch": 1.6861663869575643, "grad_norm": 1.435953548882683, "learning_rate": 1.3182449901754824e-06, "loss": 0.46212029457092285, "step": 10550 }, { "epoch": 1.6863262207304404, "grad_norm": 1.730535251234569, "learning_rate": 1.3169338546968047e-06, "loss": 0.5672433376312256, "step": 10551 }, { "epoch": 1.6864860545033165, "grad_norm": 1.4125304498296678, "learning_rate": 1.315623325612635e-06, "loss": 0.4980164170265198, "step": 10552 }, { "epoch": 1.6866458882761928, "grad_norm": 1.5208018079912387, "learning_rate": 1.3143134030144932e-06, "loss": 0.5250811576843262, "step": 10553 }, { "epoch": 1.686805722049069, "grad_norm": 1.5529872245854222, "learning_rate": 1.3130040869938654e-06, "loss": 0.5009274482727051, "step": 10554 }, { "epoch": 1.6869655558219452, "grad_norm": 1.4882704243750193, "learning_rate": 1.311695377642187e-06, "loss": 0.5639708042144775, "step": 10555 }, { "epoch": 1.6871253895948213, "grad_norm": 1.8295026618118997, "learning_rate": 1.3103872750508528e-06, "loss": 0.7018471956253052, "step": 10556 }, { "epoch": 1.6872852233676976, "grad_norm": 1.5749641994832646, "learning_rate": 1.3090797793112176e-06, "loss": 0.5507833361625671, "step": 10557 }, { "epoch": 1.687445057140574, "grad_norm": 1.715295284672051, "learning_rate": 1.307772890514588e-06, "loss": 0.48853614926338196, "step": 10558 }, { "epoch": 1.68760489091345, "grad_norm": 2.3042826709517237, "learning_rate": 1.3064666087522359e-06, "loss": 0.6676586270332336, "step": 10559 }, { "epoch": 1.687764724686326, "grad_norm": 1.618821945610639, "learning_rate": 1.3051609341153891e-06, "loss": 0.5922711491584778, "step": 10560 }, { "epoch": 1.6879245584592024, "grad_norm": 1.9225271847404775, "learning_rate": 1.3038558666952306e-06, "loss": 0.6278746128082275, "step": 10561 }, { "epoch": 1.6880843922320787, "grad_norm": 1.4635034247889018, "learning_rate": 1.3025514065828992e-06, "loss": 0.5908493399620056, "step": 10562 }, { "epoch": 1.6882442260049548, "grad_norm": 1.5512066326035712, "learning_rate": 1.3012475538694935e-06, "loss": 0.6352735757827759, "step": 10563 }, { "epoch": 1.688404059777831, "grad_norm": 1.7321556989324054, "learning_rate": 1.299944308646074e-06, "loss": 0.5138424634933472, "step": 10564 }, { "epoch": 1.6885638935507072, "grad_norm": 1.509137350713586, "learning_rate": 1.298641671003651e-06, "loss": 0.4647759199142456, "step": 10565 }, { "epoch": 1.6887237273235836, "grad_norm": 1.626433913550625, "learning_rate": 1.2973396410331984e-06, "loss": 0.5361315608024597, "step": 10566 }, { "epoch": 1.6888835610964597, "grad_norm": 1.541730175180269, "learning_rate": 1.2960382188256438e-06, "loss": 0.49547165632247925, "step": 10567 }, { "epoch": 1.6890433948693357, "grad_norm": 1.406999658584044, "learning_rate": 1.294737404471872e-06, "loss": 0.5238907337188721, "step": 10568 }, { "epoch": 1.689203228642212, "grad_norm": 1.7333124607271577, "learning_rate": 1.2934371980627313e-06, "loss": 0.6406506299972534, "step": 10569 }, { "epoch": 1.6893630624150884, "grad_norm": 1.5816538056541998, "learning_rate": 1.2921375996890228e-06, "loss": 0.45357030630111694, "step": 10570 }, { "epoch": 1.6895228961879645, "grad_norm": 1.735197453831731, "learning_rate": 1.2908386094415049e-06, "loss": 0.5735644698143005, "step": 10571 }, { "epoch": 1.6896827299608406, "grad_norm": 1.7651467847259927, "learning_rate": 1.2895402274108936e-06, "loss": 0.5930265188217163, "step": 10572 }, { "epoch": 1.6898425637337169, "grad_norm": 1.7120027254788168, "learning_rate": 1.2882424536878623e-06, "loss": 0.6378717422485352, "step": 10573 }, { "epoch": 1.6900023975065932, "grad_norm": 1.761378888664458, "learning_rate": 1.2869452883630462e-06, "loss": 0.5253302454948425, "step": 10574 }, { "epoch": 1.6901622312794693, "grad_norm": 1.6139957470493265, "learning_rate": 1.285648731527034e-06, "loss": 0.43105369806289673, "step": 10575 }, { "epoch": 1.6903220650523456, "grad_norm": 1.5285319513577378, "learning_rate": 1.2843527832703717e-06, "loss": 0.6089801788330078, "step": 10576 }, { "epoch": 1.690481898825222, "grad_norm": 1.5527647773821576, "learning_rate": 1.2830574436835619e-06, "loss": 0.5534847974777222, "step": 10577 }, { "epoch": 1.690641732598098, "grad_norm": 1.4692383559355604, "learning_rate": 1.2817627128570686e-06, "loss": 0.5290014743804932, "step": 10578 }, { "epoch": 1.6908015663709741, "grad_norm": 1.7343138824437974, "learning_rate": 1.280468590881312e-06, "loss": 0.519874632358551, "step": 10579 }, { "epoch": 1.6909614001438504, "grad_norm": 1.44985895367557, "learning_rate": 1.2791750778466694e-06, "loss": 0.40443646907806396, "step": 10580 }, { "epoch": 1.6911212339167268, "grad_norm": 1.3065947936672997, "learning_rate": 1.2778821738434732e-06, "loss": 0.49005430936813354, "step": 10581 }, { "epoch": 1.6912810676896028, "grad_norm": 1.2052194727526842, "learning_rate": 1.2765898789620157e-06, "loss": 0.438750684261322, "step": 10582 }, { "epoch": 1.691440901462479, "grad_norm": 1.6721900017548619, "learning_rate": 1.2752981932925446e-06, "loss": 0.6192876100540161, "step": 10583 }, { "epoch": 1.6916007352353553, "grad_norm": 1.499519994071648, "learning_rate": 1.27400711692527e-06, "loss": 0.47601696848869324, "step": 10584 }, { "epoch": 1.6917605690082316, "grad_norm": 1.5938894896577147, "learning_rate": 1.2727166499503551e-06, "loss": 0.6962298154830933, "step": 10585 }, { "epoch": 1.6919204027811077, "grad_norm": 1.785307544972434, "learning_rate": 1.2714267924579204e-06, "loss": 0.6060482263565063, "step": 10586 }, { "epoch": 1.6920802365539838, "grad_norm": 1.4753091577257664, "learning_rate": 1.2701375445380459e-06, "loss": 0.5178749561309814, "step": 10587 }, { "epoch": 1.69224007032686, "grad_norm": 1.5952548936905984, "learning_rate": 1.2688489062807651e-06, "loss": 0.5480805039405823, "step": 10588 }, { "epoch": 1.6923999040997364, "grad_norm": 1.4888752062858182, "learning_rate": 1.2675608777760772e-06, "loss": 0.437267005443573, "step": 10589 }, { "epoch": 1.6925597378726125, "grad_norm": 1.656474537957399, "learning_rate": 1.2662734591139302e-06, "loss": 0.486117959022522, "step": 10590 }, { "epoch": 1.6927195716454886, "grad_norm": 2.0101594432600267, "learning_rate": 1.2649866503842334e-06, "loss": 0.5973026156425476, "step": 10591 }, { "epoch": 1.692879405418365, "grad_norm": 1.7520147250580955, "learning_rate": 1.2637004516768514e-06, "loss": 0.6946648955345154, "step": 10592 }, { "epoch": 1.6930392391912412, "grad_norm": 1.685990128517561, "learning_rate": 1.2624148630816114e-06, "loss": 0.5336284637451172, "step": 10593 }, { "epoch": 1.6931990729641173, "grad_norm": 1.6985739482745694, "learning_rate": 1.2611298846882925e-06, "loss": 0.5171808004379272, "step": 10594 }, { "epoch": 1.6933589067369934, "grad_norm": 1.358041211725239, "learning_rate": 1.2598455165866319e-06, "loss": 0.41480061411857605, "step": 10595 }, { "epoch": 1.6935187405098697, "grad_norm": 1.726384894339621, "learning_rate": 1.2585617588663268e-06, "loss": 0.5692511796951294, "step": 10596 }, { "epoch": 1.693678574282746, "grad_norm": 1.5377316080045367, "learning_rate": 1.2572786116170278e-06, "loss": 0.4482381343841553, "step": 10597 }, { "epoch": 1.6938384080556221, "grad_norm": 1.5858891290418113, "learning_rate": 1.2559960749283473e-06, "loss": 0.5650061368942261, "step": 10598 }, { "epoch": 1.6939982418284982, "grad_norm": 1.4804618813043098, "learning_rate": 1.254714148889855e-06, "loss": 0.5964576005935669, "step": 10599 }, { "epoch": 1.6941580756013745, "grad_norm": 1.418171239654169, "learning_rate": 1.253432833591074e-06, "loss": 0.516737699508667, "step": 10600 }, { "epoch": 1.6943179093742509, "grad_norm": 2.0073696135420525, "learning_rate": 1.2521521291214878e-06, "loss": 0.7096840143203735, "step": 10601 }, { "epoch": 1.694477743147127, "grad_norm": 1.4831284989097457, "learning_rate": 1.250872035570534e-06, "loss": 0.4877499043941498, "step": 10602 }, { "epoch": 1.694637576920003, "grad_norm": 1.538397662443638, "learning_rate": 1.24959255302761e-06, "loss": 0.5044761300086975, "step": 10603 }, { "epoch": 1.6947974106928794, "grad_norm": 1.6319479008089637, "learning_rate": 1.248313681582074e-06, "loss": 0.5074189901351929, "step": 10604 }, { "epoch": 1.6949572444657557, "grad_norm": 2.005137244787084, "learning_rate": 1.247035421323235e-06, "loss": 0.5940184593200684, "step": 10605 }, { "epoch": 1.6951170782386318, "grad_norm": 1.708252758017495, "learning_rate": 1.2457577723403613e-06, "loss": 0.5474018454551697, "step": 10606 }, { "epoch": 1.6952769120115079, "grad_norm": 2.556938228383693, "learning_rate": 1.2444807347226795e-06, "loss": 0.6030746698379517, "step": 10607 }, { "epoch": 1.6954367457843842, "grad_norm": 1.5380308071478632, "learning_rate": 1.243204308559377e-06, "loss": 0.5070499181747437, "step": 10608 }, { "epoch": 1.6955965795572605, "grad_norm": 1.434344322191661, "learning_rate": 1.2419284939395915e-06, "loss": 0.5124794840812683, "step": 10609 }, { "epoch": 1.6957564133301366, "grad_norm": 1.626055265698345, "learning_rate": 1.2406532909524227e-06, "loss": 0.5163165330886841, "step": 10610 }, { "epoch": 1.695916247103013, "grad_norm": 1.5679651228575, "learning_rate": 1.2393786996869262e-06, "loss": 0.6583986282348633, "step": 10611 }, { "epoch": 1.6960760808758892, "grad_norm": 1.584841790100659, "learning_rate": 1.2381047202321116e-06, "loss": 0.6385906934738159, "step": 10612 }, { "epoch": 1.6962359146487653, "grad_norm": 1.677352704344957, "learning_rate": 1.2368313526769538e-06, "loss": 0.5790104866027832, "step": 10613 }, { "epoch": 1.6963957484216414, "grad_norm": 1.7357889633214194, "learning_rate": 1.235558597110379e-06, "loss": 0.6084122657775879, "step": 10614 }, { "epoch": 1.6965555821945177, "grad_norm": 1.7327964583945665, "learning_rate": 1.2342864536212718e-06, "loss": 0.5785447359085083, "step": 10615 }, { "epoch": 1.696715415967394, "grad_norm": 1.551573862526929, "learning_rate": 1.2330149222984732e-06, "loss": 0.5430873036384583, "step": 10616 }, { "epoch": 1.6968752497402702, "grad_norm": 2.0614205218632167, "learning_rate": 1.2317440032307803e-06, "loss": 0.6398272514343262, "step": 10617 }, { "epoch": 1.6970350835131462, "grad_norm": 1.643109478417389, "learning_rate": 1.230473696506953e-06, "loss": 0.5382096767425537, "step": 10618 }, { "epoch": 1.6971949172860226, "grad_norm": 1.5802265832840867, "learning_rate": 1.229204002215706e-06, "loss": 0.5524853467941284, "step": 10619 }, { "epoch": 1.6973547510588989, "grad_norm": 1.7082990418654243, "learning_rate": 1.2279349204457091e-06, "loss": 0.5521124601364136, "step": 10620 }, { "epoch": 1.697514584831775, "grad_norm": 1.6385744091506917, "learning_rate": 1.2266664512855897e-06, "loss": 0.4893878102302551, "step": 10621 }, { "epoch": 1.697674418604651, "grad_norm": 1.7227054810928968, "learning_rate": 1.22539859482393e-06, "loss": 0.4816773235797882, "step": 10622 }, { "epoch": 1.6978342523775274, "grad_norm": 1.56743506200784, "learning_rate": 1.2241313511492791e-06, "loss": 0.45445460081100464, "step": 10623 }, { "epoch": 1.6979940861504037, "grad_norm": 1.9997883876254783, "learning_rate": 1.2228647203501343e-06, "loss": 0.6293249726295471, "step": 10624 }, { "epoch": 1.6981539199232798, "grad_norm": 1.60164894445825, "learning_rate": 1.2215987025149512e-06, "loss": 0.6085978150367737, "step": 10625 }, { "epoch": 1.698313753696156, "grad_norm": 1.8298754519778486, "learning_rate": 1.2203332977321448e-06, "loss": 0.4395727515220642, "step": 10626 }, { "epoch": 1.6984735874690322, "grad_norm": 1.7219327303214553, "learning_rate": 1.2190685060900843e-06, "loss": 0.5019946098327637, "step": 10627 }, { "epoch": 1.6986334212419085, "grad_norm": 1.6198212687066658, "learning_rate": 1.2178043276771024e-06, "loss": 0.5448391437530518, "step": 10628 }, { "epoch": 1.6987932550147846, "grad_norm": 1.782913763961265, "learning_rate": 1.216540762581483e-06, "loss": 0.5828598737716675, "step": 10629 }, { "epoch": 1.6989530887876607, "grad_norm": 1.5430845693403623, "learning_rate": 1.2152778108914687e-06, "loss": 0.5971240997314453, "step": 10630 }, { "epoch": 1.699112922560537, "grad_norm": 1.6124663723316792, "learning_rate": 1.2140154726952592e-06, "loss": 0.5077476501464844, "step": 10631 }, { "epoch": 1.6992727563334133, "grad_norm": 1.581027327348733, "learning_rate": 1.2127537480810102e-06, "loss": 0.5133002400398254, "step": 10632 }, { "epoch": 1.6994325901062894, "grad_norm": 1.7917242221433454, "learning_rate": 1.2114926371368408e-06, "loss": 0.604295015335083, "step": 10633 }, { "epoch": 1.6995924238791655, "grad_norm": 1.961519853572399, "learning_rate": 1.21023213995082e-06, "loss": 0.510637104511261, "step": 10634 }, { "epoch": 1.6997522576520419, "grad_norm": 1.65450202445768, "learning_rate": 1.2089722566109752e-06, "loss": 0.4885289669036865, "step": 10635 }, { "epoch": 1.6999120914249182, "grad_norm": 1.4914961998239538, "learning_rate": 1.207712987205294e-06, "loss": 0.6047306656837463, "step": 10636 }, { "epoch": 1.7000719251977943, "grad_norm": 1.980418535839116, "learning_rate": 1.2064543318217158e-06, "loss": 0.5897840261459351, "step": 10637 }, { "epoch": 1.7002317589706704, "grad_norm": 1.5346590887843776, "learning_rate": 1.205196290548143e-06, "loss": 0.4089905023574829, "step": 10638 }, { "epoch": 1.7003915927435467, "grad_norm": 1.469001809721604, "learning_rate": 1.2039388634724359e-06, "loss": 0.4183275103569031, "step": 10639 }, { "epoch": 1.700551426516423, "grad_norm": 1.6489597609387345, "learning_rate": 1.2026820506824056e-06, "loss": 0.495195209980011, "step": 10640 }, { "epoch": 1.700711260289299, "grad_norm": 1.7421964742354832, "learning_rate": 1.201425852265824e-06, "loss": 0.5674498081207275, "step": 10641 }, { "epoch": 1.7008710940621752, "grad_norm": 1.6139826353180846, "learning_rate": 1.2001702683104165e-06, "loss": 0.47056204080581665, "step": 10642 }, { "epoch": 1.7010309278350515, "grad_norm": 1.9468216666684783, "learning_rate": 1.1989152989038745e-06, "loss": 0.5446524620056152, "step": 10643 }, { "epoch": 1.7011907616079278, "grad_norm": 1.5816948127017894, "learning_rate": 1.197660944133836e-06, "loss": 0.5661102533340454, "step": 10644 }, { "epoch": 1.701350595380804, "grad_norm": 1.83726776285906, "learning_rate": 1.1964072040879038e-06, "loss": 0.6184830665588379, "step": 10645 }, { "epoch": 1.7015104291536802, "grad_norm": 1.5268877487802064, "learning_rate": 1.195154078853632e-06, "loss": 0.4961792528629303, "step": 10646 }, { "epoch": 1.7016702629265565, "grad_norm": 1.3918080887046316, "learning_rate": 1.193901568518534e-06, "loss": 0.4361955225467682, "step": 10647 }, { "epoch": 1.7018300966994326, "grad_norm": 1.811832528573389, "learning_rate": 1.1926496731700833e-06, "loss": 0.6320064067840576, "step": 10648 }, { "epoch": 1.7019899304723087, "grad_norm": 1.3994375684577849, "learning_rate": 1.1913983928957073e-06, "loss": 0.49590712785720825, "step": 10649 }, { "epoch": 1.702149764245185, "grad_norm": 1.4811069509386305, "learning_rate": 1.190147727782791e-06, "loss": 0.5397229194641113, "step": 10650 }, { "epoch": 1.7023095980180614, "grad_norm": 1.621323502932938, "learning_rate": 1.1888976779186745e-06, "loss": 0.5747251510620117, "step": 10651 }, { "epoch": 1.7024694317909375, "grad_norm": 1.615034784552511, "learning_rate": 1.1876482433906567e-06, "loss": 0.6734502911567688, "step": 10652 }, { "epoch": 1.7026292655638136, "grad_norm": 1.6911311240061533, "learning_rate": 1.1863994242859967e-06, "loss": 0.4671321511268616, "step": 10653 }, { "epoch": 1.7027890993366899, "grad_norm": 1.7714083513328664, "learning_rate": 1.1851512206919069e-06, "loss": 0.524478554725647, "step": 10654 }, { "epoch": 1.7029489331095662, "grad_norm": 1.794832872717324, "learning_rate": 1.1839036326955556e-06, "loss": 0.6633227467536926, "step": 10655 }, { "epoch": 1.7031087668824423, "grad_norm": 1.7225546579694007, "learning_rate": 1.1826566603840683e-06, "loss": 0.48765939474105835, "step": 10656 }, { "epoch": 1.7032686006553184, "grad_norm": 1.6566768197476571, "learning_rate": 1.1814103038445347e-06, "loss": 0.5420845746994019, "step": 10657 }, { "epoch": 1.7034284344281947, "grad_norm": 1.6037289293297141, "learning_rate": 1.1801645631639903e-06, "loss": 0.5734120011329651, "step": 10658 }, { "epoch": 1.703588268201071, "grad_norm": 1.4320199546762853, "learning_rate": 1.1789194384294377e-06, "loss": 0.5158215165138245, "step": 10659 }, { "epoch": 1.703748101973947, "grad_norm": 1.4765835094623576, "learning_rate": 1.17767492972783e-06, "loss": 0.6004142761230469, "step": 10660 }, { "epoch": 1.7039079357468232, "grad_norm": 1.4587283485460378, "learning_rate": 1.1764310371460786e-06, "loss": 0.6553179025650024, "step": 10661 }, { "epoch": 1.7040677695196995, "grad_norm": 1.49969735741369, "learning_rate": 1.1751877607710516e-06, "loss": 0.49607381224632263, "step": 10662 }, { "epoch": 1.7042276032925758, "grad_norm": 1.5417453844372673, "learning_rate": 1.1739451006895797e-06, "loss": 0.5842528343200684, "step": 10663 }, { "epoch": 1.704387437065452, "grad_norm": 1.5659988635564914, "learning_rate": 1.1727030569884412e-06, "loss": 0.5401959419250488, "step": 10664 }, { "epoch": 1.704547270838328, "grad_norm": 1.5414041335722064, "learning_rate": 1.1714616297543792e-06, "loss": 0.527958869934082, "step": 10665 }, { "epoch": 1.7047071046112043, "grad_norm": 1.581585119590012, "learning_rate": 1.170220819074087e-06, "loss": 0.5202369093894958, "step": 10666 }, { "epoch": 1.7048669383840807, "grad_norm": 1.4169214169070798, "learning_rate": 1.1689806250342196e-06, "loss": 0.594704806804657, "step": 10667 }, { "epoch": 1.7050267721569567, "grad_norm": 1.452711232320383, "learning_rate": 1.1677410477213913e-06, "loss": 0.5964446067810059, "step": 10668 }, { "epoch": 1.7051866059298328, "grad_norm": 1.891438466450554, "learning_rate": 1.1665020872221655e-06, "loss": 0.5848090648651123, "step": 10669 }, { "epoch": 1.7053464397027092, "grad_norm": 1.4996098554822725, "learning_rate": 1.1652637436230685e-06, "loss": 0.45467859506607056, "step": 10670 }, { "epoch": 1.7055062734755855, "grad_norm": 1.681100910358101, "learning_rate": 1.164026017010581e-06, "loss": 0.538124144077301, "step": 10671 }, { "epoch": 1.7056661072484616, "grad_norm": 1.7739427293484753, "learning_rate": 1.162788907471143e-06, "loss": 0.555193305015564, "step": 10672 }, { "epoch": 1.7058259410213377, "grad_norm": 1.5656539079180414, "learning_rate": 1.1615524150911483e-06, "loss": 0.4372193217277527, "step": 10673 }, { "epoch": 1.705985774794214, "grad_norm": 1.6588879069303597, "learning_rate": 1.1603165399569504e-06, "loss": 0.4425429105758667, "step": 10674 }, { "epoch": 1.7061456085670903, "grad_norm": 1.7841798037856218, "learning_rate": 1.159081282154858e-06, "loss": 0.6246517300605774, "step": 10675 }, { "epoch": 1.7063054423399664, "grad_norm": 1.838471166231768, "learning_rate": 1.1578466417711355e-06, "loss": 0.5820005536079407, "step": 10676 }, { "epoch": 1.7064652761128425, "grad_norm": 1.639391083963358, "learning_rate": 1.156612618892008e-06, "loss": 0.543907880783081, "step": 10677 }, { "epoch": 1.7066251098857188, "grad_norm": 1.7480013591315118, "learning_rate": 1.1553792136036523e-06, "loss": 0.6019354462623596, "step": 10678 }, { "epoch": 1.7067849436585951, "grad_norm": 1.5751105973582062, "learning_rate": 1.1541464259922098e-06, "loss": 0.5577656030654907, "step": 10679 }, { "epoch": 1.7069447774314712, "grad_norm": 1.5549684408011273, "learning_rate": 1.1529142561437722e-06, "loss": 0.5463272929191589, "step": 10680 }, { "epoch": 1.7071046112043475, "grad_norm": 1.5114326545130918, "learning_rate": 1.1516827041443878e-06, "loss": 0.5612043738365173, "step": 10681 }, { "epoch": 1.7072644449772238, "grad_norm": 1.659035257803255, "learning_rate": 1.1504517700800632e-06, "loss": 0.6080908179283142, "step": 10682 }, { "epoch": 1.7074242787501, "grad_norm": 1.6068515505225067, "learning_rate": 1.1492214540367674e-06, "loss": 0.6083531379699707, "step": 10683 }, { "epoch": 1.707584112522976, "grad_norm": 1.7942552259870241, "learning_rate": 1.1479917561004173e-06, "loss": 0.48990654945373535, "step": 10684 }, { "epoch": 1.7077439462958524, "grad_norm": 1.8328353767289884, "learning_rate": 1.1467626763568917e-06, "loss": 0.6716176271438599, "step": 10685 }, { "epoch": 1.7079037800687287, "grad_norm": 1.6062131728031106, "learning_rate": 1.1455342148920234e-06, "loss": 0.6882582902908325, "step": 10686 }, { "epoch": 1.7080636138416048, "grad_norm": 1.356295064037104, "learning_rate": 1.1443063717916081e-06, "loss": 0.46299487352371216, "step": 10687 }, { "epoch": 1.7082234476144809, "grad_norm": 1.3877312509259616, "learning_rate": 1.1430791471413905e-06, "loss": 0.5208110213279724, "step": 10688 }, { "epoch": 1.7083832813873572, "grad_norm": 1.5872325345386455, "learning_rate": 1.1418525410270764e-06, "loss": 0.5117796659469604, "step": 10689 }, { "epoch": 1.7085431151602335, "grad_norm": 1.4590157554997538, "learning_rate": 1.1406265535343285e-06, "loss": 0.4703870713710785, "step": 10690 }, { "epoch": 1.7087029489331096, "grad_norm": 2.0944250250668324, "learning_rate": 1.1394011847487618e-06, "loss": 0.6464509963989258, "step": 10691 }, { "epoch": 1.7088627827059857, "grad_norm": 1.565270083839635, "learning_rate": 1.1381764347559566e-06, "loss": 0.580032467842102, "step": 10692 }, { "epoch": 1.709022616478862, "grad_norm": 1.422409948380422, "learning_rate": 1.1369523036414432e-06, "loss": 0.48502904176712036, "step": 10693 }, { "epoch": 1.7091824502517383, "grad_norm": 1.879264731236797, "learning_rate": 1.13572879149071e-06, "loss": 0.5858665704727173, "step": 10694 }, { "epoch": 1.7093422840246144, "grad_norm": 1.6549768090069983, "learning_rate": 1.1345058983892033e-06, "loss": 0.5553524494171143, "step": 10695 }, { "epoch": 1.7095021177974905, "grad_norm": 1.577833957470601, "learning_rate": 1.1332836244223244e-06, "loss": 0.5507740378379822, "step": 10696 }, { "epoch": 1.7096619515703668, "grad_norm": 1.4387099231821527, "learning_rate": 1.1320619696754342e-06, "loss": 0.609412431716919, "step": 10697 }, { "epoch": 1.7098217853432431, "grad_norm": 1.7903812949019957, "learning_rate": 1.1308409342338466e-06, "loss": 0.5150107145309448, "step": 10698 }, { "epoch": 1.7099816191161192, "grad_norm": 1.5722830559527203, "learning_rate": 1.1296205181828378e-06, "loss": 0.5109155178070068, "step": 10699 }, { "epoch": 1.7101414528889953, "grad_norm": 1.598890446787152, "learning_rate": 1.1284007216076365e-06, "loss": 0.5327416658401489, "step": 10700 }, { "epoch": 1.7103012866618716, "grad_norm": 1.6130365573586505, "learning_rate": 1.1271815445934264e-06, "loss": 0.49229830503463745, "step": 10701 }, { "epoch": 1.710461120434748, "grad_norm": 1.5088273636246785, "learning_rate": 1.1259629872253508e-06, "loss": 0.5006793737411499, "step": 10702 }, { "epoch": 1.710620954207624, "grad_norm": 1.5794217842168956, "learning_rate": 1.1247450495885126e-06, "loss": 0.5432054996490479, "step": 10703 }, { "epoch": 1.7107807879805002, "grad_norm": 1.5375159077245337, "learning_rate": 1.123527731767966e-06, "loss": 0.6333029270172119, "step": 10704 }, { "epoch": 1.7109406217533765, "grad_norm": 1.737290332127148, "learning_rate": 1.1223110338487242e-06, "loss": 0.5871235132217407, "step": 10705 }, { "epoch": 1.7111004555262528, "grad_norm": 1.6285725603001255, "learning_rate": 1.1210949559157559e-06, "loss": 0.6525806188583374, "step": 10706 }, { "epoch": 1.7112602892991289, "grad_norm": 1.3759343507932864, "learning_rate": 1.1198794980539908e-06, "loss": 0.5085077285766602, "step": 10707 }, { "epoch": 1.711420123072005, "grad_norm": 1.9533825875197808, "learning_rate": 1.1186646603483098e-06, "loss": 0.6010133028030396, "step": 10708 }, { "epoch": 1.7115799568448813, "grad_norm": 1.6115114909933177, "learning_rate": 1.1174504428835542e-06, "loss": 0.6009531021118164, "step": 10709 }, { "epoch": 1.7117397906177576, "grad_norm": 1.4888375800901381, "learning_rate": 1.1162368457445194e-06, "loss": 0.47188931703567505, "step": 10710 }, { "epoch": 1.7118996243906337, "grad_norm": 1.8002891214505785, "learning_rate": 1.1150238690159577e-06, "loss": 0.6094710230827332, "step": 10711 }, { "epoch": 1.7120594581635098, "grad_norm": 1.6280683580869675, "learning_rate": 1.113811512782582e-06, "loss": 0.602217435836792, "step": 10712 }, { "epoch": 1.7122192919363861, "grad_norm": 1.535966541918856, "learning_rate": 1.1125997771290587e-06, "loss": 0.5231067538261414, "step": 10713 }, { "epoch": 1.7123791257092624, "grad_norm": 1.6222062003704438, "learning_rate": 1.1113886621400082e-06, "loss": 0.4740141034126282, "step": 10714 }, { "epoch": 1.7125389594821385, "grad_norm": 1.6024151550260703, "learning_rate": 1.1101781679000135e-06, "loss": 0.5087425708770752, "step": 10715 }, { "epoch": 1.7126987932550148, "grad_norm": 1.4962608518690952, "learning_rate": 1.1089682944936075e-06, "loss": 0.5042041540145874, "step": 10716 }, { "epoch": 1.7128586270278912, "grad_norm": 1.6173428345959617, "learning_rate": 1.1077590420052885e-06, "loss": 0.6492035388946533, "step": 10717 }, { "epoch": 1.7130184608007673, "grad_norm": 2.001867181565067, "learning_rate": 1.1065504105195012e-06, "loss": 0.675833523273468, "step": 10718 }, { "epoch": 1.7131782945736433, "grad_norm": 1.7906212048222627, "learning_rate": 1.105342400120657e-06, "loss": 0.6018422842025757, "step": 10719 }, { "epoch": 1.7133381283465197, "grad_norm": 1.6726225791331788, "learning_rate": 1.1041350108931159e-06, "loss": 0.611146867275238, "step": 10720 }, { "epoch": 1.713497962119396, "grad_norm": 1.6298636893358462, "learning_rate": 1.1029282429211975e-06, "loss": 0.4853513836860657, "step": 10721 }, { "epoch": 1.713657795892272, "grad_norm": 1.7611879545463276, "learning_rate": 1.1017220962891818e-06, "loss": 0.6511566638946533, "step": 10722 }, { "epoch": 1.7138176296651482, "grad_norm": 1.7918851383397474, "learning_rate": 1.1005165710812982e-06, "loss": 0.5075435638427734, "step": 10723 }, { "epoch": 1.7139774634380245, "grad_norm": 1.4972600753057768, "learning_rate": 1.0993116673817373e-06, "loss": 0.6032909154891968, "step": 10724 }, { "epoch": 1.7141372972109008, "grad_norm": 1.807826081177968, "learning_rate": 1.098107385274646e-06, "loss": 0.58414226770401, "step": 10725 }, { "epoch": 1.714297130983777, "grad_norm": 1.2884856782289051, "learning_rate": 1.0969037248441238e-06, "loss": 0.46896910667419434, "step": 10726 }, { "epoch": 1.714456964756653, "grad_norm": 1.615219163550463, "learning_rate": 1.0957006861742348e-06, "loss": 0.4969627857208252, "step": 10727 }, { "epoch": 1.7146167985295293, "grad_norm": 1.7453899668762658, "learning_rate": 1.0944982693489937e-06, "loss": 0.6956040859222412, "step": 10728 }, { "epoch": 1.7147766323024056, "grad_norm": 1.7403034763102054, "learning_rate": 1.0932964744523712e-06, "loss": 0.6278672218322754, "step": 10729 }, { "epoch": 1.7149364660752817, "grad_norm": 1.730080163582205, "learning_rate": 1.0920953015682978e-06, "loss": 0.5280584096908569, "step": 10730 }, { "epoch": 1.7150962998481578, "grad_norm": 2.2408726230913176, "learning_rate": 1.0908947507806567e-06, "loss": 0.46233052015304565, "step": 10731 }, { "epoch": 1.7152561336210341, "grad_norm": 1.637761496834804, "learning_rate": 1.089694822173295e-06, "loss": 0.41358983516693115, "step": 10732 }, { "epoch": 1.7154159673939104, "grad_norm": 1.4759892277268871, "learning_rate": 1.088495515830007e-06, "loss": 0.4250969886779785, "step": 10733 }, { "epoch": 1.7155758011667865, "grad_norm": 1.7315147938362072, "learning_rate": 1.08729683183455e-06, "loss": 0.5888481140136719, "step": 10734 }, { "epoch": 1.7157356349396626, "grad_norm": 1.5861927545094407, "learning_rate": 1.0860987702706339e-06, "loss": 0.6187912225723267, "step": 10735 }, { "epoch": 1.715895468712539, "grad_norm": 1.5123745194284026, "learning_rate": 1.0849013312219303e-06, "loss": 0.46734750270843506, "step": 10736 }, { "epoch": 1.7160553024854153, "grad_norm": 1.5837898184991857, "learning_rate": 1.0837045147720627e-06, "loss": 0.49115219712257385, "step": 10737 }, { "epoch": 1.7162151362582914, "grad_norm": 1.5754580606352855, "learning_rate": 1.0825083210046106e-06, "loss": 0.5631377100944519, "step": 10738 }, { "epoch": 1.7163749700311675, "grad_norm": 1.6314550181483287, "learning_rate": 1.081312750003114e-06, "loss": 0.5627621412277222, "step": 10739 }, { "epoch": 1.7165348038040438, "grad_norm": 1.680517608528804, "learning_rate": 1.0801178018510683e-06, "loss": 0.4635542631149292, "step": 10740 }, { "epoch": 1.71669463757692, "grad_norm": 1.6315751341730618, "learning_rate": 1.07892347663192e-06, "loss": 0.48274701833724976, "step": 10741 }, { "epoch": 1.7168544713497962, "grad_norm": 1.676925970158359, "learning_rate": 1.0777297744290816e-06, "loss": 0.5935547947883606, "step": 10742 }, { "epoch": 1.7170143051226723, "grad_norm": 1.5093653515998515, "learning_rate": 1.076536695325916e-06, "loss": 0.5510863661766052, "step": 10743 }, { "epoch": 1.7171741388955486, "grad_norm": 1.4758582152675168, "learning_rate": 1.0753442394057412e-06, "loss": 0.486913800239563, "step": 10744 }, { "epoch": 1.717333972668425, "grad_norm": 1.4862807792826627, "learning_rate": 1.0741524067518361e-06, "loss": 0.6010862588882446, "step": 10745 }, { "epoch": 1.717493806441301, "grad_norm": 1.3959434016136427, "learning_rate": 1.0729611974474309e-06, "loss": 0.3734135627746582, "step": 10746 }, { "epoch": 1.717653640214177, "grad_norm": 1.4556971702603962, "learning_rate": 1.071770611575721e-06, "loss": 0.48303163051605225, "step": 10747 }, { "epoch": 1.7178134739870534, "grad_norm": 1.9064835618004836, "learning_rate": 1.0705806492198478e-06, "loss": 0.749979555606842, "step": 10748 }, { "epoch": 1.7179733077599297, "grad_norm": 1.4810503371541046, "learning_rate": 1.0693913104629172e-06, "loss": 0.4843032956123352, "step": 10749 }, { "epoch": 1.7181331415328058, "grad_norm": 1.5275894032168673, "learning_rate": 1.068202595387985e-06, "loss": 0.4852558970451355, "step": 10750 }, { "epoch": 1.718292975305682, "grad_norm": 1.528754326370279, "learning_rate": 1.067014504078071e-06, "loss": 0.5130787491798401, "step": 10751 }, { "epoch": 1.7184528090785585, "grad_norm": 1.861672305798466, "learning_rate": 1.0658270366161439e-06, "loss": 0.525499165058136, "step": 10752 }, { "epoch": 1.7186126428514346, "grad_norm": 1.6981994836751004, "learning_rate": 1.0646401930851346e-06, "loss": 0.5634398460388184, "step": 10753 }, { "epoch": 1.7187724766243107, "grad_norm": 1.5047689988946336, "learning_rate": 1.0634539735679272e-06, "loss": 0.47708380222320557, "step": 10754 }, { "epoch": 1.718932310397187, "grad_norm": 1.549728703104357, "learning_rate": 1.0622683781473596e-06, "loss": 0.5606973171234131, "step": 10755 }, { "epoch": 1.7190921441700633, "grad_norm": 2.0383101592297588, "learning_rate": 1.061083406906236e-06, "loss": 0.6074250936508179, "step": 10756 }, { "epoch": 1.7192519779429394, "grad_norm": 1.3963588368789022, "learning_rate": 1.0598990599273061e-06, "loss": 0.45823782682418823, "step": 10757 }, { "epoch": 1.7194118117158155, "grad_norm": 1.7000563104043789, "learning_rate": 1.05871533729328e-06, "loss": 0.5156980752944946, "step": 10758 }, { "epoch": 1.7195716454886918, "grad_norm": 1.773552922507439, "learning_rate": 1.0575322390868291e-06, "loss": 0.5561760663986206, "step": 10759 }, { "epoch": 1.719731479261568, "grad_norm": 1.5115024954255551, "learning_rate": 1.056349765390574e-06, "loss": 0.43646639585494995, "step": 10760 }, { "epoch": 1.7198913130344442, "grad_norm": 1.4517225652409818, "learning_rate": 1.055167916287092e-06, "loss": 0.49403196573257446, "step": 10761 }, { "epoch": 1.7200511468073203, "grad_norm": 1.55136294110423, "learning_rate": 1.0539866918589237e-06, "loss": 0.5540101528167725, "step": 10762 }, { "epoch": 1.7202109805801966, "grad_norm": 1.4937839148480951, "learning_rate": 1.0528060921885607e-06, "loss": 0.3801548480987549, "step": 10763 }, { "epoch": 1.720370814353073, "grad_norm": 1.722239748341604, "learning_rate": 1.0516261173584497e-06, "loss": 0.5428276062011719, "step": 10764 }, { "epoch": 1.720530648125949, "grad_norm": 1.7831867103210988, "learning_rate": 1.0504467674509977e-06, "loss": 0.5159189701080322, "step": 10765 }, { "epoch": 1.7206904818988251, "grad_norm": 1.4865765917764098, "learning_rate": 1.0492680425485635e-06, "loss": 0.5770276188850403, "step": 10766 }, { "epoch": 1.7208503156717014, "grad_norm": 1.454066725225813, "learning_rate": 1.04808994273347e-06, "loss": 0.3910374045372009, "step": 10767 }, { "epoch": 1.7210101494445778, "grad_norm": 1.7006831806854898, "learning_rate": 1.0469124680879883e-06, "loss": 0.5885015726089478, "step": 10768 }, { "epoch": 1.7211699832174538, "grad_norm": 1.6499037328645039, "learning_rate": 1.0457356186943502e-06, "loss": 0.5770096778869629, "step": 10769 }, { "epoch": 1.72132981699033, "grad_norm": 1.9202811820028738, "learning_rate": 1.0445593946347388e-06, "loss": 0.4813843071460724, "step": 10770 }, { "epoch": 1.7214896507632063, "grad_norm": 1.7878593964560034, "learning_rate": 1.043383795991304e-06, "loss": 0.4622882008552551, "step": 10771 }, { "epoch": 1.7216494845360826, "grad_norm": 1.5104155795193772, "learning_rate": 1.0422088228461414e-06, "loss": 0.6113046407699585, "step": 10772 }, { "epoch": 1.7218093183089587, "grad_norm": 1.6149064507710273, "learning_rate": 1.0410344752813074e-06, "loss": 0.5075700283050537, "step": 10773 }, { "epoch": 1.7219691520818348, "grad_norm": 1.803045833447068, "learning_rate": 1.0398607533788152e-06, "loss": 0.4651968479156494, "step": 10774 }, { "epoch": 1.722128985854711, "grad_norm": 1.6607272539107396, "learning_rate": 1.0386876572206306e-06, "loss": 0.5048092007637024, "step": 10775 }, { "epoch": 1.7222888196275874, "grad_norm": 1.611470733212507, "learning_rate": 1.0375151868886825e-06, "loss": 0.5034884810447693, "step": 10776 }, { "epoch": 1.7224486534004635, "grad_norm": 1.4269199665230303, "learning_rate": 1.0363433424648494e-06, "loss": 0.5122438073158264, "step": 10777 }, { "epoch": 1.7226084871733396, "grad_norm": 1.7834440388826496, "learning_rate": 1.0351721240309687e-06, "loss": 0.5013185143470764, "step": 10778 }, { "epoch": 1.722768320946216, "grad_norm": 1.5949545612272316, "learning_rate": 1.0340015316688358e-06, "loss": 0.6047855615615845, "step": 10779 }, { "epoch": 1.7229281547190922, "grad_norm": 1.5098587912080133, "learning_rate": 1.0328315654602005e-06, "loss": 0.5261352062225342, "step": 10780 }, { "epoch": 1.7230879884919683, "grad_norm": 1.5283441195166758, "learning_rate": 1.0316622254867658e-06, "loss": 0.5027168393135071, "step": 10781 }, { "epoch": 1.7232478222648444, "grad_norm": 1.6266441522417479, "learning_rate": 1.0304935118301994e-06, "loss": 0.6937054395675659, "step": 10782 }, { "epoch": 1.7234076560377207, "grad_norm": 1.5061081418623916, "learning_rate": 1.0293254245721184e-06, "loss": 0.4733249545097351, "step": 10783 }, { "epoch": 1.723567489810597, "grad_norm": 1.676814224447194, "learning_rate": 1.0281579637940953e-06, "loss": 0.5819909572601318, "step": 10784 }, { "epoch": 1.7237273235834731, "grad_norm": 1.524386713402804, "learning_rate": 1.0269911295776625e-06, "loss": 0.523523211479187, "step": 10785 }, { "epoch": 1.7238871573563492, "grad_norm": 1.9479615694037866, "learning_rate": 1.0258249220043093e-06, "loss": 0.5627880096435547, "step": 10786 }, { "epoch": 1.7240469911292258, "grad_norm": 1.6584873027728837, "learning_rate": 1.0246593411554796e-06, "loss": 0.5298303365707397, "step": 10787 }, { "epoch": 1.7242068249021019, "grad_norm": 1.6691235325229172, "learning_rate": 1.0234943871125714e-06, "loss": 0.5483832955360413, "step": 10788 }, { "epoch": 1.724366658674978, "grad_norm": 1.3058263764663858, "learning_rate": 1.0223300599569419e-06, "loss": 0.40287965536117554, "step": 10789 }, { "epoch": 1.7245264924478543, "grad_norm": 1.6205488285977423, "learning_rate": 1.0211663597699018e-06, "loss": 0.5114500522613525, "step": 10790 }, { "epoch": 1.7246863262207306, "grad_norm": 1.6811751961121402, "learning_rate": 1.0200032866327224e-06, "loss": 0.5090068578720093, "step": 10791 }, { "epoch": 1.7248461599936067, "grad_norm": 1.5659978534724566, "learning_rate": 1.0188408406266282e-06, "loss": 0.4807063043117523, "step": 10792 }, { "epoch": 1.7250059937664828, "grad_norm": 1.757478788792961, "learning_rate": 1.0176790218328003e-06, "loss": 0.5447444915771484, "step": 10793 }, { "epoch": 1.725165827539359, "grad_norm": 1.572000568535273, "learning_rate": 1.0165178303323753e-06, "loss": 0.5249073505401611, "step": 10794 }, { "epoch": 1.7253256613122354, "grad_norm": 1.4896770905395962, "learning_rate": 1.0153572662064448e-06, "loss": 0.5001021027565002, "step": 10795 }, { "epoch": 1.7254854950851115, "grad_norm": 1.6858246367513792, "learning_rate": 1.0141973295360619e-06, "loss": 0.5485326051712036, "step": 10796 }, { "epoch": 1.7256453288579876, "grad_norm": 1.5616883689741503, "learning_rate": 1.0130380204022317e-06, "loss": 0.7821493148803711, "step": 10797 }, { "epoch": 1.725805162630864, "grad_norm": 1.5458900579523398, "learning_rate": 1.011879338885916e-06, "loss": 0.5137550830841064, "step": 10798 }, { "epoch": 1.7259649964037402, "grad_norm": 1.810942291934588, "learning_rate": 1.0107212850680304e-06, "loss": 0.6030220985412598, "step": 10799 }, { "epoch": 1.7261248301766163, "grad_norm": 1.571321773905048, "learning_rate": 1.009563859029451e-06, "loss": 0.501389741897583, "step": 10800 }, { "epoch": 1.7262846639494924, "grad_norm": 1.8842426138437236, "learning_rate": 1.008407060851011e-06, "loss": 0.7862114310264587, "step": 10801 }, { "epoch": 1.7264444977223687, "grad_norm": 1.7340799333664256, "learning_rate": 1.0072508906134948e-06, "loss": 0.5656253099441528, "step": 10802 }, { "epoch": 1.726604331495245, "grad_norm": 1.577158569972415, "learning_rate": 1.0060953483976454e-06, "loss": 0.4988686442375183, "step": 10803 }, { "epoch": 1.7267641652681212, "grad_norm": 1.6787509307958322, "learning_rate": 1.0049404342841618e-06, "loss": 0.5573312044143677, "step": 10804 }, { "epoch": 1.7269239990409972, "grad_norm": 1.7246940056305726, "learning_rate": 1.003786148353697e-06, "loss": 0.5062643885612488, "step": 10805 }, { "epoch": 1.7270838328138736, "grad_norm": 1.6137782023415317, "learning_rate": 1.0026324906868667e-06, "loss": 0.55361008644104, "step": 10806 }, { "epoch": 1.7272436665867499, "grad_norm": 1.4499595451101424, "learning_rate": 1.0014794613642354e-06, "loss": 0.49853479862213135, "step": 10807 }, { "epoch": 1.727403500359626, "grad_norm": 2.179325062705282, "learning_rate": 1.0003270604663262e-06, "loss": 0.5031856298446655, "step": 10808 }, { "epoch": 1.727563334132502, "grad_norm": 1.4042157125942716, "learning_rate": 9.991752880736205e-07, "loss": 0.49996817111968994, "step": 10809 }, { "epoch": 1.7277231679053784, "grad_norm": 1.7381820468135483, "learning_rate": 9.980241442665506e-07, "loss": 0.5970619916915894, "step": 10810 }, { "epoch": 1.7278830016782547, "grad_norm": 1.87558657465095, "learning_rate": 9.968736291255122e-07, "loss": 0.560768723487854, "step": 10811 }, { "epoch": 1.7280428354511308, "grad_norm": 1.4240129078596804, "learning_rate": 9.95723742730852e-07, "loss": 0.5518798232078552, "step": 10812 }, { "epoch": 1.728202669224007, "grad_norm": 1.3742854642369329, "learning_rate": 9.945744851628735e-07, "loss": 0.4413491487503052, "step": 10813 }, { "epoch": 1.7283625029968832, "grad_norm": 1.7376868021832086, "learning_rate": 9.934258565018351e-07, "loss": 0.48096734285354614, "step": 10814 }, { "epoch": 1.7285223367697595, "grad_norm": 1.3874806630112095, "learning_rate": 9.922778568279566e-07, "loss": 0.5743018984794617, "step": 10815 }, { "epoch": 1.7286821705426356, "grad_norm": 1.557746838428021, "learning_rate": 9.911304862214078e-07, "loss": 0.49645912647247314, "step": 10816 }, { "epoch": 1.7288420043155117, "grad_norm": 1.4447552315599328, "learning_rate": 9.899837447623184e-07, "loss": 0.5040930509567261, "step": 10817 }, { "epoch": 1.729001838088388, "grad_norm": 1.664716690163483, "learning_rate": 9.888376325307713e-07, "loss": 0.5263099074363708, "step": 10818 }, { "epoch": 1.7291616718612643, "grad_norm": 9.271179303534993, "learning_rate": 9.87692149606806e-07, "loss": 0.622406005859375, "step": 10819 }, { "epoch": 1.7293215056341404, "grad_norm": 1.482227409520077, "learning_rate": 9.865472960704192e-07, "loss": 0.4958440661430359, "step": 10820 }, { "epoch": 1.7294813394070165, "grad_norm": 1.4799302188212653, "learning_rate": 9.854030720015673e-07, "loss": 0.567787766456604, "step": 10821 }, { "epoch": 1.729641173179893, "grad_norm": 1.5319134342483434, "learning_rate": 9.842594774801573e-07, "loss": 0.5809293389320374, "step": 10822 }, { "epoch": 1.7298010069527692, "grad_norm": 1.408862129136634, "learning_rate": 9.831165125860508e-07, "loss": 0.448742538690567, "step": 10823 }, { "epoch": 1.7299608407256453, "grad_norm": 1.4506596512004635, "learning_rate": 9.81974177399072e-07, "loss": 0.5784907937049866, "step": 10824 }, { "epoch": 1.7301206744985216, "grad_norm": 2.0474454350951876, "learning_rate": 9.808324719989925e-07, "loss": 0.7052767276763916, "step": 10825 }, { "epoch": 1.730280508271398, "grad_norm": 1.625277076658792, "learning_rate": 9.796913964655507e-07, "loss": 0.5511055588722229, "step": 10826 }, { "epoch": 1.730440342044274, "grad_norm": 1.6085418654143064, "learning_rate": 9.78550950878433e-07, "loss": 0.5962631702423096, "step": 10827 }, { "epoch": 1.73060017581715, "grad_norm": 1.5758454390950059, "learning_rate": 9.774111353172844e-07, "loss": 0.5634177923202515, "step": 10828 }, { "epoch": 1.7307600095900264, "grad_norm": 2.0009304021622762, "learning_rate": 9.762719498617047e-07, "loss": 0.5543822050094604, "step": 10829 }, { "epoch": 1.7309198433629027, "grad_norm": 1.8815133379357636, "learning_rate": 9.75133394591249e-07, "loss": 0.6197389960289001, "step": 10830 }, { "epoch": 1.7310796771357788, "grad_norm": 1.5687092671919254, "learning_rate": 9.739954695854337e-07, "loss": 0.5901443958282471, "step": 10831 }, { "epoch": 1.731239510908655, "grad_norm": 1.6125132237426831, "learning_rate": 9.728581749237254e-07, "loss": 0.49741798639297485, "step": 10832 }, { "epoch": 1.7313993446815312, "grad_norm": 1.5649665330938272, "learning_rate": 9.717215106855493e-07, "loss": 0.5865083932876587, "step": 10833 }, { "epoch": 1.7315591784544075, "grad_norm": 1.2990614259588529, "learning_rate": 9.705854769502832e-07, "loss": 0.36213478446006775, "step": 10834 }, { "epoch": 1.7317190122272836, "grad_norm": 1.5302107931554734, "learning_rate": 9.69450073797269e-07, "loss": 0.4822666049003601, "step": 10835 }, { "epoch": 1.7318788460001597, "grad_norm": 1.5581537211021712, "learning_rate": 9.683153013057954e-07, "loss": 0.5931825637817383, "step": 10836 }, { "epoch": 1.732038679773036, "grad_norm": 1.5177531883118986, "learning_rate": 9.671811595551128e-07, "loss": 0.5953713655471802, "step": 10837 }, { "epoch": 1.7321985135459124, "grad_norm": 1.6816034390003027, "learning_rate": 9.660476486244242e-07, "loss": 0.5692151784896851, "step": 10838 }, { "epoch": 1.7323583473187885, "grad_norm": 1.426050922264205, "learning_rate": 9.649147685928884e-07, "loss": 0.5127902626991272, "step": 10839 }, { "epoch": 1.7325181810916646, "grad_norm": 1.5910627727823254, "learning_rate": 9.637825195396233e-07, "loss": 0.6138443946838379, "step": 10840 }, { "epoch": 1.7326780148645409, "grad_norm": 1.5430207596989551, "learning_rate": 9.626509015437035e-07, "loss": 0.5446391105651855, "step": 10841 }, { "epoch": 1.7328378486374172, "grad_norm": 1.5749934080480479, "learning_rate": 9.615199146841558e-07, "loss": 0.4601327180862427, "step": 10842 }, { "epoch": 1.7329976824102933, "grad_norm": 1.4646269795165423, "learning_rate": 9.603895590399648e-07, "loss": 0.5443432927131653, "step": 10843 }, { "epoch": 1.7331575161831694, "grad_norm": 1.4814902168558335, "learning_rate": 9.592598346900683e-07, "loss": 0.5337763428688049, "step": 10844 }, { "epoch": 1.7333173499560457, "grad_norm": 2.0007315912407053, "learning_rate": 9.581307417133612e-07, "loss": 0.5438526272773743, "step": 10845 }, { "epoch": 1.733477183728922, "grad_norm": 1.7296322140935119, "learning_rate": 9.570022801887002e-07, "loss": 0.5680325031280518, "step": 10846 }, { "epoch": 1.733637017501798, "grad_norm": 1.4786090173019488, "learning_rate": 9.558744501948903e-07, "loss": 0.4637483060359955, "step": 10847 }, { "epoch": 1.7337968512746742, "grad_norm": 1.662496244898716, "learning_rate": 9.54747251810696e-07, "loss": 0.500856876373291, "step": 10848 }, { "epoch": 1.7339566850475505, "grad_norm": 1.8729680228714187, "learning_rate": 9.536206851148334e-07, "loss": 0.5758404731750488, "step": 10849 }, { "epoch": 1.7341165188204268, "grad_norm": 1.8973684466357752, "learning_rate": 9.524947501859827e-07, "loss": 0.7312759160995483, "step": 10850 }, { "epoch": 1.734276352593303, "grad_norm": 1.5586592599431117, "learning_rate": 9.513694471027735e-07, "loss": 0.5707650184631348, "step": 10851 }, { "epoch": 1.734436186366179, "grad_norm": 1.672454298366871, "learning_rate": 9.502447759437916e-07, "loss": 0.6016563773155212, "step": 10852 }, { "epoch": 1.7345960201390553, "grad_norm": 1.6227528482549949, "learning_rate": 9.491207367875821e-07, "loss": 0.5569148063659668, "step": 10853 }, { "epoch": 1.7347558539119317, "grad_norm": 2.171154061389092, "learning_rate": 9.4799732971264e-07, "loss": 0.5414056777954102, "step": 10854 }, { "epoch": 1.7349156876848078, "grad_norm": 1.8891632486889371, "learning_rate": 9.468745547974256e-07, "loss": 0.5435540676116943, "step": 10855 }, { "epoch": 1.7350755214576838, "grad_norm": 1.4899037371256383, "learning_rate": 9.457524121203465e-07, "loss": 0.45760220289230347, "step": 10856 }, { "epoch": 1.7352353552305604, "grad_norm": 1.7360139913954529, "learning_rate": 9.446309017597699e-07, "loss": 0.7639318704605103, "step": 10857 }, { "epoch": 1.7353951890034365, "grad_norm": 1.6938749056951645, "learning_rate": 9.435100237940176e-07, "loss": 0.5916565656661987, "step": 10858 }, { "epoch": 1.7355550227763126, "grad_norm": 1.6440840516244704, "learning_rate": 9.423897783013658e-07, "loss": 0.4732884168624878, "step": 10859 }, { "epoch": 1.735714856549189, "grad_norm": 1.7830841501406862, "learning_rate": 9.412701653600498e-07, "loss": 0.4740869402885437, "step": 10860 }, { "epoch": 1.7358746903220652, "grad_norm": 1.947348510145012, "learning_rate": 9.401511850482625e-07, "loss": 0.5686638355255127, "step": 10861 }, { "epoch": 1.7360345240949413, "grad_norm": 1.8452598586022801, "learning_rate": 9.390328374441482e-07, "loss": 0.5561156868934631, "step": 10862 }, { "epoch": 1.7361943578678174, "grad_norm": 1.4773091514394145, "learning_rate": 9.379151226258054e-07, "loss": 0.4564332067966461, "step": 10863 }, { "epoch": 1.7363541916406937, "grad_norm": 1.6599717734613788, "learning_rate": 9.367980406712928e-07, "loss": 0.6117100715637207, "step": 10864 }, { "epoch": 1.73651402541357, "grad_norm": 1.4555575315509253, "learning_rate": 9.356815916586248e-07, "loss": 0.5025779008865356, "step": 10865 }, { "epoch": 1.7366738591864461, "grad_norm": 1.5592737973637507, "learning_rate": 9.3456577566577e-07, "loss": 0.6453218460083008, "step": 10866 }, { "epoch": 1.7368336929593222, "grad_norm": 1.6536723023406052, "learning_rate": 9.334505927706516e-07, "loss": 0.5592124462127686, "step": 10867 }, { "epoch": 1.7369935267321985, "grad_norm": 1.6964986579345005, "learning_rate": 9.323360430511508e-07, "loss": 0.4482690095901489, "step": 10868 }, { "epoch": 1.7371533605050749, "grad_norm": 1.455470228943382, "learning_rate": 9.312221265851018e-07, "loss": 0.4726385772228241, "step": 10869 }, { "epoch": 1.737313194277951, "grad_norm": 1.5515517565784362, "learning_rate": 9.301088434503003e-07, "loss": 0.4857487976551056, "step": 10870 }, { "epoch": 1.737473028050827, "grad_norm": 1.6890680704858132, "learning_rate": 9.289961937244918e-07, "loss": 0.49317413568496704, "step": 10871 }, { "epoch": 1.7376328618237034, "grad_norm": 1.5526308857764068, "learning_rate": 9.278841774853808e-07, "loss": 0.47093677520751953, "step": 10872 }, { "epoch": 1.7377926955965797, "grad_norm": 1.5510882760218934, "learning_rate": 9.267727948106253e-07, "loss": 0.5519117712974548, "step": 10873 }, { "epoch": 1.7379525293694558, "grad_norm": 1.5656310359679517, "learning_rate": 9.256620457778398e-07, "loss": 0.6290377378463745, "step": 10874 }, { "epoch": 1.7381123631423319, "grad_norm": 1.3333081569104326, "learning_rate": 9.245519304645978e-07, "loss": 0.5061862468719482, "step": 10875 }, { "epoch": 1.7382721969152082, "grad_norm": 1.663683151415774, "learning_rate": 9.234424489484239e-07, "loss": 0.5582026243209839, "step": 10876 }, { "epoch": 1.7384320306880845, "grad_norm": 1.773080975543022, "learning_rate": 9.223336013068019e-07, "loss": 0.56968092918396, "step": 10877 }, { "epoch": 1.7385918644609606, "grad_norm": 1.1893945538788655, "learning_rate": 9.212253876171651e-07, "loss": 0.4634009599685669, "step": 10878 }, { "epoch": 1.7387516982338367, "grad_norm": 1.5412423278986704, "learning_rate": 9.201178079569118e-07, "loss": 0.5090875625610352, "step": 10879 }, { "epoch": 1.738911532006713, "grad_norm": 1.6162078092073053, "learning_rate": 9.190108624033922e-07, "loss": 0.5074114203453064, "step": 10880 }, { "epoch": 1.7390713657795893, "grad_norm": 1.4796353781747837, "learning_rate": 9.179045510339091e-07, "loss": 0.4601026177406311, "step": 10881 }, { "epoch": 1.7392311995524654, "grad_norm": 1.6375422265967856, "learning_rate": 9.167988739257239e-07, "loss": 0.5625913739204407, "step": 10882 }, { "epoch": 1.7393910333253415, "grad_norm": 1.5644529351582235, "learning_rate": 9.156938311560526e-07, "loss": 0.5925164818763733, "step": 10883 }, { "epoch": 1.7395508670982178, "grad_norm": 1.5926664525120684, "learning_rate": 9.145894228020669e-07, "loss": 0.5885647535324097, "step": 10884 }, { "epoch": 1.7397107008710941, "grad_norm": 1.7659120213848196, "learning_rate": 9.134856489408972e-07, "loss": 0.5164503455162048, "step": 10885 }, { "epoch": 1.7398705346439702, "grad_norm": 1.74973844337935, "learning_rate": 9.123825096496264e-07, "loss": 0.5683653354644775, "step": 10886 }, { "epoch": 1.7400303684168463, "grad_norm": 1.5196391174144848, "learning_rate": 9.112800050052927e-07, "loss": 0.46144717931747437, "step": 10887 }, { "epoch": 1.7401902021897226, "grad_norm": 1.949280519022645, "learning_rate": 9.101781350848915e-07, "loss": 0.7171487808227539, "step": 10888 }, { "epoch": 1.740350035962599, "grad_norm": 1.8043122892461756, "learning_rate": 9.090768999653721e-07, "loss": 0.6437563896179199, "step": 10889 }, { "epoch": 1.740509869735475, "grad_norm": 1.5745213207658049, "learning_rate": 9.079762997236441e-07, "loss": 0.484480619430542, "step": 10890 }, { "epoch": 1.7406697035083512, "grad_norm": 1.5705135686854734, "learning_rate": 9.068763344365683e-07, "loss": 0.5929468870162964, "step": 10891 }, { "epoch": 1.7408295372812275, "grad_norm": 1.6762135280033696, "learning_rate": 9.057770041809611e-07, "loss": 0.5469515323638916, "step": 10892 }, { "epoch": 1.7409893710541038, "grad_norm": 1.621858246921459, "learning_rate": 9.046783090335943e-07, "loss": 0.4622039496898651, "step": 10893 }, { "epoch": 1.7411492048269799, "grad_norm": 1.8272058603669585, "learning_rate": 9.03580249071202e-07, "loss": 0.6135718822479248, "step": 10894 }, { "epoch": 1.7413090385998562, "grad_norm": 1.6517169489939594, "learning_rate": 9.024828243704653e-07, "loss": 0.5104167461395264, "step": 10895 }, { "epoch": 1.7414688723727325, "grad_norm": 1.5766044896531803, "learning_rate": 9.01386035008025e-07, "loss": 0.45107829570770264, "step": 10896 }, { "epoch": 1.7416287061456086, "grad_norm": 1.5354521641049366, "learning_rate": 9.002898810604766e-07, "loss": 0.5132862329483032, "step": 10897 }, { "epoch": 1.7417885399184847, "grad_norm": 1.574678272022027, "learning_rate": 8.991943626043709e-07, "loss": 0.5827409625053406, "step": 10898 }, { "epoch": 1.741948373691361, "grad_norm": 1.4598695339346408, "learning_rate": 8.980994797162157e-07, "loss": 0.5488575100898743, "step": 10899 }, { "epoch": 1.7421082074642373, "grad_norm": 2.1223619782982, "learning_rate": 8.970052324724765e-07, "loss": 0.6066893339157104, "step": 10900 }, { "epoch": 1.7422680412371134, "grad_norm": 1.8646651609832139, "learning_rate": 8.959116209495688e-07, "loss": 0.7181365489959717, "step": 10901 }, { "epoch": 1.7424278750099895, "grad_norm": 1.8032036046025555, "learning_rate": 8.948186452238683e-07, "loss": 0.5670311450958252, "step": 10902 }, { "epoch": 1.7425877087828658, "grad_norm": 1.2822559663722735, "learning_rate": 8.937263053717016e-07, "loss": 0.536555290222168, "step": 10903 }, { "epoch": 1.7427475425557422, "grad_norm": 1.6490447764884997, "learning_rate": 8.926346014693554e-07, "loss": 0.47246450185775757, "step": 10904 }, { "epoch": 1.7429073763286183, "grad_norm": 1.5166843847060425, "learning_rate": 8.915435335930711e-07, "loss": 0.47210586071014404, "step": 10905 }, { "epoch": 1.7430672101014943, "grad_norm": 1.6616226926600866, "learning_rate": 8.904531018190454e-07, "loss": 0.42112475633621216, "step": 10906 }, { "epoch": 1.7432270438743707, "grad_norm": 1.6169850037229634, "learning_rate": 8.893633062234285e-07, "loss": 0.5712890028953552, "step": 10907 }, { "epoch": 1.743386877647247, "grad_norm": 1.4553149088043638, "learning_rate": 8.882741468823287e-07, "loss": 0.4371181130409241, "step": 10908 }, { "epoch": 1.743546711420123, "grad_norm": 1.7409634293465532, "learning_rate": 8.871856238718079e-07, "loss": 0.598023533821106, "step": 10909 }, { "epoch": 1.7437065451929992, "grad_norm": 1.468892163924723, "learning_rate": 8.860977372678869e-07, "loss": 0.43914157152175903, "step": 10910 }, { "epoch": 1.7438663789658755, "grad_norm": 1.4978005704048458, "learning_rate": 8.850104871465381e-07, "loss": 0.5773483514785767, "step": 10911 }, { "epoch": 1.7440262127387518, "grad_norm": 1.540076568563185, "learning_rate": 8.839238735836931e-07, "loss": 0.5218847393989563, "step": 10912 }, { "epoch": 1.744186046511628, "grad_norm": 1.5597164802556294, "learning_rate": 8.828378966552331e-07, "loss": 0.41558727622032166, "step": 10913 }, { "epoch": 1.744345880284504, "grad_norm": 1.6670984203368875, "learning_rate": 8.817525564370033e-07, "loss": 0.5341259241104126, "step": 10914 }, { "epoch": 1.7445057140573803, "grad_norm": 1.4876769644590404, "learning_rate": 8.806678530047985e-07, "loss": 0.5143769979476929, "step": 10915 }, { "epoch": 1.7446655478302566, "grad_norm": 1.4929247003056925, "learning_rate": 8.795837864343703e-07, "loss": 0.39755016565322876, "step": 10916 }, { "epoch": 1.7448253816031327, "grad_norm": 1.4353834319915244, "learning_rate": 8.785003568014261e-07, "loss": 0.5033533573150635, "step": 10917 }, { "epoch": 1.7449852153760088, "grad_norm": 1.6494893396460535, "learning_rate": 8.774175641816274e-07, "loss": 0.5025274753570557, "step": 10918 }, { "epoch": 1.7451450491488851, "grad_norm": 1.5113443289411725, "learning_rate": 8.763354086505937e-07, "loss": 0.5314639210700989, "step": 10919 }, { "epoch": 1.7453048829217614, "grad_norm": 1.6072149061686258, "learning_rate": 8.752538902839003e-07, "loss": 0.4966238737106323, "step": 10920 }, { "epoch": 1.7454647166946375, "grad_norm": 1.6556851169847435, "learning_rate": 8.741730091570766e-07, "loss": 0.5594134330749512, "step": 10921 }, { "epoch": 1.7456245504675136, "grad_norm": 1.806241097776881, "learning_rate": 8.730927653456045e-07, "loss": 0.6068899631500244, "step": 10922 }, { "epoch": 1.74578438424039, "grad_norm": 1.3630631035469172, "learning_rate": 8.720131589249281e-07, "loss": 0.5043426156044006, "step": 10923 }, { "epoch": 1.7459442180132663, "grad_norm": 1.998554328844468, "learning_rate": 8.709341899704382e-07, "loss": 0.655711829662323, "step": 10924 }, { "epoch": 1.7461040517861424, "grad_norm": 1.5372004201870173, "learning_rate": 8.698558585574923e-07, "loss": 0.6621568202972412, "step": 10925 }, { "epoch": 1.7462638855590185, "grad_norm": 1.7458908348333604, "learning_rate": 8.687781647613935e-07, "loss": 0.4842317998409271, "step": 10926 }, { "epoch": 1.7464237193318948, "grad_norm": 1.6416197820989014, "learning_rate": 8.67701108657405e-07, "loss": 0.4107435941696167, "step": 10927 }, { "epoch": 1.746583553104771, "grad_norm": 1.455641193253338, "learning_rate": 8.666246903207432e-07, "loss": 0.5834017992019653, "step": 10928 }, { "epoch": 1.7467433868776472, "grad_norm": 1.502936574291568, "learning_rate": 8.655489098265846e-07, "loss": 0.5527569651603699, "step": 10929 }, { "epoch": 1.7469032206505235, "grad_norm": 1.5553677121855376, "learning_rate": 8.644737672500547e-07, "loss": 0.6278710961341858, "step": 10930 }, { "epoch": 1.7470630544233998, "grad_norm": 1.6267197364098946, "learning_rate": 8.633992626662402e-07, "loss": 0.45141249895095825, "step": 10931 }, { "epoch": 1.747222888196276, "grad_norm": 1.5912697820269472, "learning_rate": 8.623253961501777e-07, "loss": 0.5957207083702087, "step": 10932 }, { "epoch": 1.747382721969152, "grad_norm": 1.522778253271287, "learning_rate": 8.612521677768637e-07, "loss": 0.4515582323074341, "step": 10933 }, { "epoch": 1.7475425557420283, "grad_norm": 1.5384083930972883, "learning_rate": 8.601795776212496e-07, "loss": 0.48346471786499023, "step": 10934 }, { "epoch": 1.7477023895149046, "grad_norm": 1.6661504129833031, "learning_rate": 8.591076257582409e-07, "loss": 0.5518491268157959, "step": 10935 }, { "epoch": 1.7478622232877807, "grad_norm": 1.6257546372353888, "learning_rate": 8.580363122626989e-07, "loss": 0.5785597562789917, "step": 10936 }, { "epoch": 1.7480220570606568, "grad_norm": 1.5419205578165953, "learning_rate": 8.569656372094393e-07, "loss": 0.5286142826080322, "step": 10937 }, { "epoch": 1.7481818908335331, "grad_norm": 1.5185248292419702, "learning_rate": 8.558956006732333e-07, "loss": 0.5747909545898438, "step": 10938 }, { "epoch": 1.7483417246064095, "grad_norm": 1.7937919718246118, "learning_rate": 8.548262027288101e-07, "loss": 0.6434822678565979, "step": 10939 }, { "epoch": 1.7485015583792856, "grad_norm": 1.7435108543364264, "learning_rate": 8.537574434508544e-07, "loss": 0.4881359338760376, "step": 10940 }, { "epoch": 1.7486613921521617, "grad_norm": 1.6788108565223017, "learning_rate": 8.52689322914002e-07, "loss": 0.5488835573196411, "step": 10941 }, { "epoch": 1.748821225925038, "grad_norm": 1.493499338629791, "learning_rate": 8.516218411928489e-07, "loss": 0.6450031399726868, "step": 10942 }, { "epoch": 1.7489810596979143, "grad_norm": 1.5386711607843058, "learning_rate": 8.505549983619399e-07, "loss": 0.6147278547286987, "step": 10943 }, { "epoch": 1.7491408934707904, "grad_norm": 1.641802105219778, "learning_rate": 8.494887944957841e-07, "loss": 0.5990848541259766, "step": 10944 }, { "epoch": 1.7493007272436665, "grad_norm": 1.5674813560701102, "learning_rate": 8.4842322966884e-07, "loss": 0.5823279619216919, "step": 10945 }, { "epoch": 1.7494605610165428, "grad_norm": 1.7227340717690691, "learning_rate": 8.473583039555222e-07, "loss": 0.6325843334197998, "step": 10946 }, { "epoch": 1.749620394789419, "grad_norm": 1.5841868628709972, "learning_rate": 8.462940174302026e-07, "loss": 0.5246336460113525, "step": 10947 }, { "epoch": 1.7497802285622952, "grad_norm": 1.605383155297877, "learning_rate": 8.452303701672038e-07, "loss": 0.5363393425941467, "step": 10948 }, { "epoch": 1.7499400623351713, "grad_norm": 1.3730316632557342, "learning_rate": 8.441673622408108e-07, "loss": 0.3926754891872406, "step": 10949 }, { "epoch": 1.7500998961080476, "grad_norm": 2.014666335833051, "learning_rate": 8.4310499372526e-07, "loss": 0.6092414259910583, "step": 10950 }, { "epoch": 1.750259729880924, "grad_norm": 1.5611443242843288, "learning_rate": 8.420432646947419e-07, "loss": 0.4888366460800171, "step": 10951 }, { "epoch": 1.7504195636538, "grad_norm": 1.8239658499714426, "learning_rate": 8.409821752234048e-07, "loss": 0.5514611005783081, "step": 10952 }, { "epoch": 1.7505793974266761, "grad_norm": 1.900967034334783, "learning_rate": 8.399217253853487e-07, "loss": 0.5956814885139465, "step": 10953 }, { "epoch": 1.7507392311995524, "grad_norm": 1.7195263461094015, "learning_rate": 8.388619152546363e-07, "loss": 0.5850330591201782, "step": 10954 }, { "epoch": 1.7508990649724288, "grad_norm": 1.6704914740899797, "learning_rate": 8.378027449052784e-07, "loss": 0.5311477184295654, "step": 10955 }, { "epoch": 1.7510588987453048, "grad_norm": 1.5778324291944483, "learning_rate": 8.367442144112447e-07, "loss": 0.4788947105407715, "step": 10956 }, { "epoch": 1.751218732518181, "grad_norm": 1.8273315258379557, "learning_rate": 8.356863238464563e-07, "loss": 0.5606995820999146, "step": 10957 }, { "epoch": 1.7513785662910573, "grad_norm": 1.4380515875993465, "learning_rate": 8.346290732847973e-07, "loss": 0.5560935735702515, "step": 10958 }, { "epoch": 1.7515384000639336, "grad_norm": 1.5826291566613342, "learning_rate": 8.335724628000974e-07, "loss": 0.6768242120742798, "step": 10959 }, { "epoch": 1.7516982338368097, "grad_norm": 1.5250604611178866, "learning_rate": 8.325164924661511e-07, "loss": 0.5460206270217896, "step": 10960 }, { "epoch": 1.7518580676096858, "grad_norm": 1.7071361905908091, "learning_rate": 8.314611623567003e-07, "loss": 0.6129022836685181, "step": 10961 }, { "epoch": 1.752017901382562, "grad_norm": 1.6668711672716332, "learning_rate": 8.304064725454475e-07, "loss": 0.5307058691978455, "step": 10962 }, { "epoch": 1.7521777351554384, "grad_norm": 1.6701485823613027, "learning_rate": 8.293524231060468e-07, "loss": 0.7116280794143677, "step": 10963 }, { "epoch": 1.7523375689283145, "grad_norm": 1.7612195554290622, "learning_rate": 8.282990141121105e-07, "loss": 0.5881098508834839, "step": 10964 }, { "epoch": 1.7524974027011908, "grad_norm": 1.6178477386987316, "learning_rate": 8.272462456372055e-07, "loss": 0.49493715167045593, "step": 10965 }, { "epoch": 1.7526572364740671, "grad_norm": 1.8404472455489655, "learning_rate": 8.261941177548516e-07, "loss": 0.5725172758102417, "step": 10966 }, { "epoch": 1.7528170702469432, "grad_norm": 1.5479232261395113, "learning_rate": 8.251426305385268e-07, "loss": 0.5220824480056763, "step": 10967 }, { "epoch": 1.7529769040198193, "grad_norm": 1.91095750656715, "learning_rate": 8.240917840616614e-07, "loss": 0.5143238306045532, "step": 10968 }, { "epoch": 1.7531367377926956, "grad_norm": 1.611641453762621, "learning_rate": 8.230415783976464e-07, "loss": 0.5001630783081055, "step": 10969 }, { "epoch": 1.753296571565572, "grad_norm": 1.4003823514028886, "learning_rate": 8.219920136198222e-07, "loss": 0.43783140182495117, "step": 10970 }, { "epoch": 1.753456405338448, "grad_norm": 1.7463387958317615, "learning_rate": 8.20943089801487e-07, "loss": 0.512962818145752, "step": 10971 }, { "epoch": 1.7536162391113241, "grad_norm": 1.4975935834608716, "learning_rate": 8.19894807015893e-07, "loss": 0.44589224457740784, "step": 10972 }, { "epoch": 1.7537760728842005, "grad_norm": 1.6273842783407628, "learning_rate": 8.188471653362485e-07, "loss": 0.5360490083694458, "step": 10973 }, { "epoch": 1.7539359066570768, "grad_norm": 1.852167202483588, "learning_rate": 8.178001648357192e-07, "loss": 0.49121278524398804, "step": 10974 }, { "epoch": 1.7540957404299529, "grad_norm": 1.6413597320343636, "learning_rate": 8.167538055874224e-07, "loss": 0.5682281255722046, "step": 10975 }, { "epoch": 1.754255574202829, "grad_norm": 1.4840330463139195, "learning_rate": 8.15708087664433e-07, "loss": 0.5319364070892334, "step": 10976 }, { "epoch": 1.7544154079757053, "grad_norm": 1.5856753611898453, "learning_rate": 8.146630111397768e-07, "loss": 0.5851664543151855, "step": 10977 }, { "epoch": 1.7545752417485816, "grad_norm": 1.4829437854393952, "learning_rate": 8.136185760864435e-07, "loss": 0.5801061391830444, "step": 10978 }, { "epoch": 1.7547350755214577, "grad_norm": 1.6188984744673638, "learning_rate": 8.125747825773689e-07, "loss": 0.49259066581726074, "step": 10979 }, { "epoch": 1.7548949092943338, "grad_norm": 1.6642875618478112, "learning_rate": 8.115316306854503e-07, "loss": 0.5096973776817322, "step": 10980 }, { "epoch": 1.75505474306721, "grad_norm": 1.4954701771902321, "learning_rate": 8.104891204835363e-07, "loss": 0.6666842103004456, "step": 10981 }, { "epoch": 1.7552145768400864, "grad_norm": 1.505575286523012, "learning_rate": 8.094472520444329e-07, "loss": 0.4498388171195984, "step": 10982 }, { "epoch": 1.7553744106129625, "grad_norm": 1.6236474459930805, "learning_rate": 8.084060254408988e-07, "loss": 0.6104274988174438, "step": 10983 }, { "epoch": 1.7555342443858386, "grad_norm": 1.882284188933682, "learning_rate": 8.07365440745651e-07, "loss": 0.73320472240448, "step": 10984 }, { "epoch": 1.755694078158715, "grad_norm": 1.6428586371519045, "learning_rate": 8.063254980313617e-07, "loss": 0.5050705075263977, "step": 10985 }, { "epoch": 1.7558539119315912, "grad_norm": 1.5821276710541003, "learning_rate": 8.052861973706538e-07, "loss": 0.5346537828445435, "step": 10986 }, { "epoch": 1.7560137457044673, "grad_norm": 1.6469641322362594, "learning_rate": 8.042475388361104e-07, "loss": 0.5925960540771484, "step": 10987 }, { "epoch": 1.7561735794773434, "grad_norm": 1.7466536512659114, "learning_rate": 8.032095225002646e-07, "loss": 0.6447955965995789, "step": 10988 }, { "epoch": 1.7563334132502197, "grad_norm": 1.7104372503624117, "learning_rate": 8.021721484356126e-07, "loss": 0.6300138235092163, "step": 10989 }, { "epoch": 1.756493247023096, "grad_norm": 1.7461949364861753, "learning_rate": 8.01135416714598e-07, "loss": 0.5432393550872803, "step": 10990 }, { "epoch": 1.7566530807959722, "grad_norm": 1.7474252700051167, "learning_rate": 8.000993274096225e-07, "loss": 0.6394834518432617, "step": 10991 }, { "epoch": 1.7568129145688482, "grad_norm": 1.6164637659053673, "learning_rate": 7.990638805930429e-07, "loss": 0.5600780248641968, "step": 10992 }, { "epoch": 1.7569727483417246, "grad_norm": 1.6053364601521638, "learning_rate": 7.980290763371723e-07, "loss": 0.5206900835037231, "step": 10993 }, { "epoch": 1.7571325821146009, "grad_norm": 1.4360369719950135, "learning_rate": 7.969949147142775e-07, "loss": 0.5045517086982727, "step": 10994 }, { "epoch": 1.757292415887477, "grad_norm": 1.384217830164979, "learning_rate": 7.959613957965794e-07, "loss": 0.45643025636672974, "step": 10995 }, { "epoch": 1.757452249660353, "grad_norm": 1.4929942227331237, "learning_rate": 7.949285196562573e-07, "loss": 0.4574297070503235, "step": 10996 }, { "epoch": 1.7576120834332294, "grad_norm": 1.616596923598292, "learning_rate": 7.938962863654398e-07, "loss": 0.5490545630455017, "step": 10997 }, { "epoch": 1.7577719172061057, "grad_norm": 1.4896409029810964, "learning_rate": 7.928646959962194e-07, "loss": 0.4817555546760559, "step": 10998 }, { "epoch": 1.7579317509789818, "grad_norm": 1.9661870829489934, "learning_rate": 7.91833748620634e-07, "loss": 0.6145161390304565, "step": 10999 }, { "epoch": 1.7580915847518581, "grad_norm": 1.5651335672747402, "learning_rate": 7.90803444310686e-07, "loss": 0.6732021570205688, "step": 11000 }, { "epoch": 1.7582514185247344, "grad_norm": 1.5763766129476973, "learning_rate": 7.897737831383257e-07, "loss": 0.5492489337921143, "step": 11001 }, { "epoch": 1.7584112522976105, "grad_norm": 1.6060598192004465, "learning_rate": 7.887447651754598e-07, "loss": 0.5447673797607422, "step": 11002 }, { "epoch": 1.7585710860704866, "grad_norm": 1.8692638175043028, "learning_rate": 7.877163904939522e-07, "loss": 0.5510507822036743, "step": 11003 }, { "epoch": 1.758730919843363, "grad_norm": 1.675781559209301, "learning_rate": 7.866886591656231e-07, "loss": 0.6141624450683594, "step": 11004 }, { "epoch": 1.7588907536162393, "grad_norm": 1.7218217463186583, "learning_rate": 7.856615712622429e-07, "loss": 0.49058297276496887, "step": 11005 }, { "epoch": 1.7590505873891153, "grad_norm": 1.508288092905046, "learning_rate": 7.846351268555408e-07, "loss": 0.5660715103149414, "step": 11006 }, { "epoch": 1.7592104211619914, "grad_norm": 1.5482226056336865, "learning_rate": 7.836093260171995e-07, "loss": 0.5311812162399292, "step": 11007 }, { "epoch": 1.7593702549348678, "grad_norm": 1.379004836251451, "learning_rate": 7.825841688188585e-07, "loss": 0.5943892002105713, "step": 11008 }, { "epoch": 1.759530088707744, "grad_norm": 1.5894646473843652, "learning_rate": 7.815596553321092e-07, "loss": 0.5096476078033447, "step": 11009 }, { "epoch": 1.7596899224806202, "grad_norm": 1.6552776675829564, "learning_rate": 7.805357856285023e-07, "loss": 0.511347234249115, "step": 11010 }, { "epoch": 1.7598497562534963, "grad_norm": 1.6770305493112185, "learning_rate": 7.795125597795406e-07, "loss": 0.5743509531021118, "step": 11011 }, { "epoch": 1.7600095900263726, "grad_norm": 1.486331074112905, "learning_rate": 7.784899778566791e-07, "loss": 0.5547603964805603, "step": 11012 }, { "epoch": 1.760169423799249, "grad_norm": 1.8534586156447546, "learning_rate": 7.774680399313361e-07, "loss": 0.6644996404647827, "step": 11013 }, { "epoch": 1.760329257572125, "grad_norm": 1.7667445817455665, "learning_rate": 7.764467460748792e-07, "loss": 0.49310243129730225, "step": 11014 }, { "epoch": 1.760489091345001, "grad_norm": 1.6474682053062746, "learning_rate": 7.754260963586302e-07, "loss": 0.5564897656440735, "step": 11015 }, { "epoch": 1.7606489251178774, "grad_norm": 1.6637999945337607, "learning_rate": 7.744060908538686e-07, "loss": 0.5261936783790588, "step": 11016 }, { "epoch": 1.7608087588907537, "grad_norm": 1.5966314562016173, "learning_rate": 7.733867296318254e-07, "loss": 0.5091630816459656, "step": 11017 }, { "epoch": 1.7609685926636298, "grad_norm": 1.5995832193655553, "learning_rate": 7.723680127636946e-07, "loss": 0.559660792350769, "step": 11018 }, { "epoch": 1.761128426436506, "grad_norm": 1.5676734009984623, "learning_rate": 7.713499403206138e-07, "loss": 0.581584632396698, "step": 11019 }, { "epoch": 1.7612882602093822, "grad_norm": 1.5209126784649931, "learning_rate": 7.70332512373686e-07, "loss": 0.6102597713470459, "step": 11020 }, { "epoch": 1.7614480939822585, "grad_norm": 1.6461228661413412, "learning_rate": 7.693157289939645e-07, "loss": 0.5249513983726501, "step": 11021 }, { "epoch": 1.7616079277551346, "grad_norm": 1.760145166238332, "learning_rate": 7.682995902524537e-07, "loss": 0.6016073226928711, "step": 11022 }, { "epoch": 1.7617677615280107, "grad_norm": 1.69388692152052, "learning_rate": 7.672840962201222e-07, "loss": 0.6649349927902222, "step": 11023 }, { "epoch": 1.761927595300887, "grad_norm": 1.6666439638991395, "learning_rate": 7.662692469678856e-07, "loss": 0.4865339994430542, "step": 11024 }, { "epoch": 1.7620874290737634, "grad_norm": 1.623623827001076, "learning_rate": 7.652550425666194e-07, "loss": 0.6229232549667358, "step": 11025 }, { "epoch": 1.7622472628466395, "grad_norm": 1.5987321365992586, "learning_rate": 7.642414830871492e-07, "loss": 0.5521672964096069, "step": 11026 }, { "epoch": 1.7624070966195156, "grad_norm": 1.584316843697838, "learning_rate": 7.632285686002594e-07, "loss": 0.575424313545227, "step": 11027 }, { "epoch": 1.7625669303923919, "grad_norm": 1.4695571072449682, "learning_rate": 7.622162991766901e-07, "loss": 0.4615921974182129, "step": 11028 }, { "epoch": 1.7627267641652682, "grad_norm": 1.8894457298877416, "learning_rate": 7.612046748871327e-07, "loss": 0.5182311534881592, "step": 11029 }, { "epoch": 1.7628865979381443, "grad_norm": 1.5608736925703706, "learning_rate": 7.601936958022371e-07, "loss": 0.5056703090667725, "step": 11030 }, { "epoch": 1.7630464317110204, "grad_norm": 1.6061986970515492, "learning_rate": 7.591833619926047e-07, "loss": 0.5701878070831299, "step": 11031 }, { "epoch": 1.7632062654838967, "grad_norm": 1.43713987489277, "learning_rate": 7.581736735287926e-07, "loss": 0.6154395341873169, "step": 11032 }, { "epoch": 1.763366099256773, "grad_norm": 1.392811899895372, "learning_rate": 7.571646304813185e-07, "loss": 0.5207943916320801, "step": 11033 }, { "epoch": 1.763525933029649, "grad_norm": 1.6429301111090666, "learning_rate": 7.561562329206463e-07, "loss": 0.6134345531463623, "step": 11034 }, { "epoch": 1.7636857668025254, "grad_norm": 1.545402230019788, "learning_rate": 7.551484809172005e-07, "loss": 0.5703251361846924, "step": 11035 }, { "epoch": 1.7638456005754017, "grad_norm": 1.626883916540069, "learning_rate": 7.541413745413584e-07, "loss": 0.5228244066238403, "step": 11036 }, { "epoch": 1.7640054343482778, "grad_norm": 1.8456240450577217, "learning_rate": 7.531349138634514e-07, "loss": 0.57691890001297, "step": 11037 }, { "epoch": 1.764165268121154, "grad_norm": 1.617807443452798, "learning_rate": 7.5212909895377e-07, "loss": 0.4566380977630615, "step": 11038 }, { "epoch": 1.7643251018940302, "grad_norm": 1.7446668375229144, "learning_rate": 7.511239298825557e-07, "loss": 0.5691932439804077, "step": 11039 }, { "epoch": 1.7644849356669066, "grad_norm": 1.7309088083266655, "learning_rate": 7.501194067200046e-07, "loss": 0.5205217003822327, "step": 11040 }, { "epoch": 1.7646447694397827, "grad_norm": 1.5854692281477347, "learning_rate": 7.491155295362706e-07, "loss": 0.4402621388435364, "step": 11041 }, { "epoch": 1.7648046032126588, "grad_norm": 1.6272446871459105, "learning_rate": 7.4811229840146e-07, "loss": 0.5239832401275635, "step": 11042 }, { "epoch": 1.764964436985535, "grad_norm": 1.7095302752004815, "learning_rate": 7.471097133856353e-07, "loss": 0.6066610813140869, "step": 11043 }, { "epoch": 1.7651242707584114, "grad_norm": 1.4889309652171445, "learning_rate": 7.461077745588141e-07, "loss": 0.5197179317474365, "step": 11044 }, { "epoch": 1.7652841045312875, "grad_norm": 1.5444692685525125, "learning_rate": 7.451064819909682e-07, "loss": 0.5255920886993408, "step": 11045 }, { "epoch": 1.7654439383041636, "grad_norm": 1.9583447264394847, "learning_rate": 7.441058357520225e-07, "loss": 0.6349282264709473, "step": 11046 }, { "epoch": 1.76560377207704, "grad_norm": 1.6070835066476505, "learning_rate": 7.431058359118593e-07, "loss": 0.4019221067428589, "step": 11047 }, { "epoch": 1.7657636058499162, "grad_norm": 1.5780074590114381, "learning_rate": 7.421064825403168e-07, "loss": 0.43898820877075195, "step": 11048 }, { "epoch": 1.7659234396227923, "grad_norm": 1.548933856040881, "learning_rate": 7.41107775707185e-07, "loss": 0.44519299268722534, "step": 11049 }, { "epoch": 1.7660832733956684, "grad_norm": 1.332590246951724, "learning_rate": 7.401097154822101e-07, "loss": 0.41143637895584106, "step": 11050 }, { "epoch": 1.7662431071685447, "grad_norm": 1.525865684677225, "learning_rate": 7.391123019350932e-07, "loss": 0.5353708863258362, "step": 11051 }, { "epoch": 1.766402940941421, "grad_norm": 1.62916714257993, "learning_rate": 7.381155351354874e-07, "loss": 0.499099999666214, "step": 11052 }, { "epoch": 1.7665627747142971, "grad_norm": 1.3559709973239185, "learning_rate": 7.371194151530082e-07, "loss": 0.463628888130188, "step": 11053 }, { "epoch": 1.7667226084871732, "grad_norm": 1.6691739367094456, "learning_rate": 7.361239420572186e-07, "loss": 0.6491482853889465, "step": 11054 }, { "epoch": 1.7668824422600495, "grad_norm": 1.6366338112423893, "learning_rate": 7.351291159176399e-07, "loss": 0.3634786605834961, "step": 11055 }, { "epoch": 1.7670422760329259, "grad_norm": 1.567071470508101, "learning_rate": 7.341349368037443e-07, "loss": 0.4421323537826538, "step": 11056 }, { "epoch": 1.767202109805802, "grad_norm": 1.571925022725759, "learning_rate": 7.33141404784965e-07, "loss": 0.5402402877807617, "step": 11057 }, { "epoch": 1.767361943578678, "grad_norm": 1.7305426250538378, "learning_rate": 7.321485199306866e-07, "loss": 0.49014124274253845, "step": 11058 }, { "epoch": 1.7675217773515544, "grad_norm": 1.6741561414595523, "learning_rate": 7.311562823102469e-07, "loss": 0.5209465026855469, "step": 11059 }, { "epoch": 1.7676816111244307, "grad_norm": 1.4561899279958692, "learning_rate": 7.301646919929395e-07, "loss": 0.5123685598373413, "step": 11060 }, { "epoch": 1.7678414448973068, "grad_norm": 1.4362123974689631, "learning_rate": 7.291737490480166e-07, "loss": 0.5527291297912598, "step": 11061 }, { "epoch": 1.7680012786701829, "grad_norm": 1.6239520873640145, "learning_rate": 7.281834535446786e-07, "loss": 0.5196134448051453, "step": 11062 }, { "epoch": 1.7681611124430592, "grad_norm": 1.44774898923887, "learning_rate": 7.271938055520888e-07, "loss": 0.5531063079833984, "step": 11063 }, { "epoch": 1.7683209462159355, "grad_norm": 1.7181545041828987, "learning_rate": 7.262048051393578e-07, "loss": 0.4805620312690735, "step": 11064 }, { "epoch": 1.7684807799888116, "grad_norm": 1.7354167084054428, "learning_rate": 7.252164523755545e-07, "loss": 0.6759759187698364, "step": 11065 }, { "epoch": 1.7686406137616877, "grad_norm": 1.4777282688468776, "learning_rate": 7.242287473297005e-07, "loss": 0.5685122013092041, "step": 11066 }, { "epoch": 1.768800447534564, "grad_norm": 1.6873488152271439, "learning_rate": 7.232416900707739e-07, "loss": 0.5359913110733032, "step": 11067 }, { "epoch": 1.7689602813074403, "grad_norm": 1.5696834087465787, "learning_rate": 7.222552806677097e-07, "loss": 0.4689738154411316, "step": 11068 }, { "epoch": 1.7691201150803164, "grad_norm": 1.5387453465193068, "learning_rate": 7.212695191893937e-07, "loss": 0.5001983642578125, "step": 11069 }, { "epoch": 1.7692799488531927, "grad_norm": 1.9620628172141295, "learning_rate": 7.202844057046687e-07, "loss": 0.5886990427970886, "step": 11070 }, { "epoch": 1.769439782626069, "grad_norm": 1.4561287210360894, "learning_rate": 7.192999402823286e-07, "loss": 0.47845199704170227, "step": 11071 }, { "epoch": 1.7695996163989451, "grad_norm": 1.49340051103579, "learning_rate": 7.183161229911284e-07, "loss": 0.5018801689147949, "step": 11072 }, { "epoch": 1.7697594501718212, "grad_norm": 1.6812370134070738, "learning_rate": 7.173329538997731e-07, "loss": 0.59233158826828, "step": 11073 }, { "epoch": 1.7699192839446976, "grad_norm": 1.5362735598537056, "learning_rate": 7.163504330769244e-07, "loss": 0.6400560140609741, "step": 11074 }, { "epoch": 1.7700791177175739, "grad_norm": 1.7477432857214148, "learning_rate": 7.153685605911964e-07, "loss": 0.7447972297668457, "step": 11075 }, { "epoch": 1.77023895149045, "grad_norm": 1.4041223648874575, "learning_rate": 7.143873365111597e-07, "loss": 0.5610885620117188, "step": 11076 }, { "epoch": 1.770398785263326, "grad_norm": 1.7629437476359189, "learning_rate": 7.13406760905343e-07, "loss": 0.6415668725967407, "step": 11077 }, { "epoch": 1.7705586190362024, "grad_norm": 1.7032666750928964, "learning_rate": 7.124268338422224e-07, "loss": 0.5189284682273865, "step": 11078 }, { "epoch": 1.7707184528090787, "grad_norm": 1.8951691542836173, "learning_rate": 7.114475553902344e-07, "loss": 0.5489590764045715, "step": 11079 }, { "epoch": 1.7708782865819548, "grad_norm": 1.5502781261022658, "learning_rate": 7.104689256177655e-07, "loss": 0.4777529835700989, "step": 11080 }, { "epoch": 1.7710381203548309, "grad_norm": 1.473786011064994, "learning_rate": 7.094909445931642e-07, "loss": 0.44206368923187256, "step": 11081 }, { "epoch": 1.7711979541277072, "grad_norm": 1.9537666102588338, "learning_rate": 7.085136123847247e-07, "loss": 0.44309288263320923, "step": 11082 }, { "epoch": 1.7713577879005835, "grad_norm": 1.6951762738314702, "learning_rate": 7.075369290607049e-07, "loss": 0.5295517444610596, "step": 11083 }, { "epoch": 1.7715176216734596, "grad_norm": 2.0267352417843343, "learning_rate": 7.06560894689311e-07, "loss": 0.6270670890808105, "step": 11084 }, { "epoch": 1.7716774554463357, "grad_norm": 1.5903825763777524, "learning_rate": 7.055855093387043e-07, "loss": 0.5852839946746826, "step": 11085 }, { "epoch": 1.771837289219212, "grad_norm": 1.5028966437972313, "learning_rate": 7.046107730770024e-07, "loss": 0.5038557052612305, "step": 11086 }, { "epoch": 1.7719971229920883, "grad_norm": 1.4958168254960498, "learning_rate": 7.036366859722798e-07, "loss": 0.5006071925163269, "step": 11087 }, { "epoch": 1.7721569567649644, "grad_norm": 1.9319255060642628, "learning_rate": 7.026632480925621e-07, "loss": 0.5024041533470154, "step": 11088 }, { "epoch": 1.7723167905378405, "grad_norm": 1.538048727924872, "learning_rate": 7.016904595058316e-07, "loss": 0.5220665335655212, "step": 11089 }, { "epoch": 1.7724766243107168, "grad_norm": 1.7324903981427224, "learning_rate": 7.007183202800217e-07, "loss": 0.42929965257644653, "step": 11090 }, { "epoch": 1.7726364580835932, "grad_norm": 1.577794443880552, "learning_rate": 6.997468304830247e-07, "loss": 0.4998845160007477, "step": 11091 }, { "epoch": 1.7727962918564693, "grad_norm": 1.7008736555981012, "learning_rate": 6.987759901826863e-07, "loss": 0.5802136063575745, "step": 11092 }, { "epoch": 1.7729561256293453, "grad_norm": 1.7308172226442065, "learning_rate": 6.978057994468068e-07, "loss": 0.5777194499969482, "step": 11093 }, { "epoch": 1.7731159594022217, "grad_norm": 1.7498332073849363, "learning_rate": 6.96836258343141e-07, "loss": 0.48256054520606995, "step": 11094 }, { "epoch": 1.773275793175098, "grad_norm": 1.4042094568688188, "learning_rate": 6.958673669393967e-07, "loss": 0.4720343351364136, "step": 11095 }, { "epoch": 1.773435626947974, "grad_norm": 1.5987287098401244, "learning_rate": 6.948991253032378e-07, "loss": 0.5157099962234497, "step": 11096 }, { "epoch": 1.7735954607208502, "grad_norm": 1.7596175427503133, "learning_rate": 6.939315335022856e-07, "loss": 0.521568775177002, "step": 11097 }, { "epoch": 1.7737552944937265, "grad_norm": 1.818471686096725, "learning_rate": 6.929645916041117e-07, "loss": 0.5985018014907837, "step": 11098 }, { "epoch": 1.7739151282666028, "grad_norm": 1.7554342256345243, "learning_rate": 6.919982996762431e-07, "loss": 0.5089133381843567, "step": 11099 }, { "epoch": 1.774074962039479, "grad_norm": 1.565187069343559, "learning_rate": 6.910326577861615e-07, "loss": 0.48538774251937866, "step": 11100 }, { "epoch": 1.774234795812355, "grad_norm": 1.7825498055268267, "learning_rate": 6.900676660013061e-07, "loss": 0.5467455983161926, "step": 11101 }, { "epoch": 1.7743946295852313, "grad_norm": 1.5748194672879117, "learning_rate": 6.891033243890666e-07, "loss": 0.6228891611099243, "step": 11102 }, { "epoch": 1.7745544633581076, "grad_norm": 1.3557116938257319, "learning_rate": 6.881396330167911e-07, "loss": 0.5342666506767273, "step": 11103 }, { "epoch": 1.7747142971309837, "grad_norm": 1.4365526308035264, "learning_rate": 6.871765919517803e-07, "loss": 0.48563867807388306, "step": 11104 }, { "epoch": 1.77487413090386, "grad_norm": 1.667043770614451, "learning_rate": 6.862142012612882e-07, "loss": 0.4946119487285614, "step": 11105 }, { "epoch": 1.7750339646767364, "grad_norm": 1.8040842825317254, "learning_rate": 6.852524610125244e-07, "loss": 0.6194978952407837, "step": 11106 }, { "epoch": 1.7751937984496124, "grad_norm": 1.5163791599562602, "learning_rate": 6.842913712726551e-07, "loss": 0.4643968343734741, "step": 11107 }, { "epoch": 1.7753536322224885, "grad_norm": 1.5001716117093444, "learning_rate": 6.833309321087989e-07, "loss": 0.5205370187759399, "step": 11108 }, { "epoch": 1.7755134659953649, "grad_norm": 1.5772581887350623, "learning_rate": 6.823711435880298e-07, "loss": 0.45026659965515137, "step": 11109 }, { "epoch": 1.7756732997682412, "grad_norm": 1.6147092622722725, "learning_rate": 6.814120057773743e-07, "loss": 0.5148471593856812, "step": 11110 }, { "epoch": 1.7758331335411173, "grad_norm": 1.521923687455923, "learning_rate": 6.804535187438155e-07, "loss": 0.4631982445716858, "step": 11111 }, { "epoch": 1.7759929673139934, "grad_norm": 1.5233671346703634, "learning_rate": 6.79495682554292e-07, "loss": 0.5412097573280334, "step": 11112 }, { "epoch": 1.7761528010868697, "grad_norm": 1.3799704437667355, "learning_rate": 6.785384972756959e-07, "loss": 0.49559342861175537, "step": 11113 }, { "epoch": 1.776312634859746, "grad_norm": 1.8098634967640188, "learning_rate": 6.775819629748737e-07, "loss": 0.4988465905189514, "step": 11114 }, { "epoch": 1.776472468632622, "grad_norm": 1.4834988686622659, "learning_rate": 6.766260797186242e-07, "loss": 0.5112268924713135, "step": 11115 }, { "epoch": 1.7766323024054982, "grad_norm": 1.6645410851298603, "learning_rate": 6.756708475737028e-07, "loss": 0.5628745555877686, "step": 11116 }, { "epoch": 1.7767921361783745, "grad_norm": 1.74903158489217, "learning_rate": 6.74716266606823e-07, "loss": 0.48587191104888916, "step": 11117 }, { "epoch": 1.7769519699512508, "grad_norm": 1.7201177423579423, "learning_rate": 6.73762336884648e-07, "loss": 0.5375162959098816, "step": 11118 }, { "epoch": 1.777111803724127, "grad_norm": 1.776612475137727, "learning_rate": 6.728090584737956e-07, "loss": 0.5579453110694885, "step": 11119 }, { "epoch": 1.777271637497003, "grad_norm": 1.7741511696153247, "learning_rate": 6.718564314408382e-07, "loss": 0.603939414024353, "step": 11120 }, { "epoch": 1.7774314712698793, "grad_norm": 1.4610913138789932, "learning_rate": 6.709044558523047e-07, "loss": 0.4411853551864624, "step": 11121 }, { "epoch": 1.7775913050427556, "grad_norm": 1.5936947151347511, "learning_rate": 6.699531317746821e-07, "loss": 0.6161457300186157, "step": 11122 }, { "epoch": 1.7777511388156317, "grad_norm": 1.6726843250330226, "learning_rate": 6.690024592744027e-07, "loss": 0.6894478797912598, "step": 11123 }, { "epoch": 1.7779109725885078, "grad_norm": 1.4961893435911389, "learning_rate": 6.680524384178599e-07, "loss": 0.4749608337879181, "step": 11124 }, { "epoch": 1.7780708063613841, "grad_norm": 1.4386763265630802, "learning_rate": 6.671030692713986e-07, "loss": 0.5571208596229553, "step": 11125 }, { "epoch": 1.7782306401342605, "grad_norm": 1.7615175060593211, "learning_rate": 6.661543519013203e-07, "loss": 0.615463137626648, "step": 11126 }, { "epoch": 1.7783904739071366, "grad_norm": 1.7493859286331936, "learning_rate": 6.652062863738795e-07, "loss": 0.6269299983978271, "step": 11127 }, { "epoch": 1.7785503076800127, "grad_norm": 1.505643725433946, "learning_rate": 6.642588727552868e-07, "loss": 0.6064685583114624, "step": 11128 }, { "epoch": 1.778710141452889, "grad_norm": 1.5511239188800303, "learning_rate": 6.633121111117058e-07, "loss": 0.5555300712585449, "step": 11129 }, { "epoch": 1.7788699752257653, "grad_norm": 1.4749592694821385, "learning_rate": 6.623660015092537e-07, "loss": 0.5765143632888794, "step": 11130 }, { "epoch": 1.7790298089986414, "grad_norm": 1.6394815722264064, "learning_rate": 6.614205440140042e-07, "loss": 0.6120021939277649, "step": 11131 }, { "epoch": 1.7791896427715175, "grad_norm": 1.5048522444551578, "learning_rate": 6.604757386919857e-07, "loss": 0.5923857688903809, "step": 11132 }, { "epoch": 1.7793494765443938, "grad_norm": 1.5966421463514993, "learning_rate": 6.595315856091788e-07, "loss": 0.5794708132743835, "step": 11133 }, { "epoch": 1.77950931031727, "grad_norm": 1.6066519811443012, "learning_rate": 6.585880848315207e-07, "loss": 0.540828287601471, "step": 11134 }, { "epoch": 1.7796691440901462, "grad_norm": 1.7598263892503112, "learning_rate": 6.576452364248997e-07, "loss": 0.6356269121170044, "step": 11135 }, { "epoch": 1.7798289778630223, "grad_norm": 1.755402615046281, "learning_rate": 6.567030404551656e-07, "loss": 0.5818914175033569, "step": 11136 }, { "epoch": 1.7799888116358986, "grad_norm": 1.7012003796884876, "learning_rate": 6.557614969881143e-07, "loss": 0.6417635679244995, "step": 11137 }, { "epoch": 1.780148645408775, "grad_norm": 1.5818878426028404, "learning_rate": 6.548206060895013e-07, "loss": 0.3902742266654968, "step": 11138 }, { "epoch": 1.780308479181651, "grad_norm": 1.5054105206940271, "learning_rate": 6.538803678250338e-07, "loss": 0.5814162492752075, "step": 11139 }, { "epoch": 1.7804683129545273, "grad_norm": 1.4815342156890672, "learning_rate": 6.52940782260375e-07, "loss": 0.5653876662254333, "step": 11140 }, { "epoch": 1.7806281467274037, "grad_norm": 1.636942895930094, "learning_rate": 6.520018494611424e-07, "loss": 0.4700622260570526, "step": 11141 }, { "epoch": 1.7807879805002798, "grad_norm": 1.646823484219095, "learning_rate": 6.510635694929102e-07, "loss": 0.6330710649490356, "step": 11142 }, { "epoch": 1.7809478142731558, "grad_norm": 2.403851820471698, "learning_rate": 6.501259424212025e-07, "loss": 0.6464201211929321, "step": 11143 }, { "epoch": 1.7811076480460322, "grad_norm": 1.5196901884343708, "learning_rate": 6.491889683114994e-07, "loss": 0.4794738292694092, "step": 11144 }, { "epoch": 1.7812674818189085, "grad_norm": 1.5271174797165405, "learning_rate": 6.482526472292361e-07, "loss": 0.5451595187187195, "step": 11145 }, { "epoch": 1.7814273155917846, "grad_norm": 1.8725672825113842, "learning_rate": 6.473169792398015e-07, "loss": 0.6125525236129761, "step": 11146 }, { "epoch": 1.7815871493646607, "grad_norm": 2.1855209857936546, "learning_rate": 6.463819644085412e-07, "loss": 0.5317429304122925, "step": 11147 }, { "epoch": 1.781746983137537, "grad_norm": 1.9171612028471523, "learning_rate": 6.454476028007517e-07, "loss": 0.48982375860214233, "step": 11148 }, { "epoch": 1.7819068169104133, "grad_norm": 1.680072584974234, "learning_rate": 6.445138944816853e-07, "loss": 0.6212883591651917, "step": 11149 }, { "epoch": 1.7820666506832894, "grad_norm": 1.65776688962292, "learning_rate": 6.435808395165488e-07, "loss": 0.4949655532836914, "step": 11150 }, { "epoch": 1.7822264844561655, "grad_norm": 1.6125391850685014, "learning_rate": 6.426484379705056e-07, "loss": 0.4900941550731659, "step": 11151 }, { "epoch": 1.7823863182290418, "grad_norm": 1.4760275660633415, "learning_rate": 6.417166899086691e-07, "loss": 0.5042057633399963, "step": 11152 }, { "epoch": 1.7825461520019181, "grad_norm": 1.6385726229942854, "learning_rate": 6.407855953961106e-07, "loss": 0.5582051277160645, "step": 11153 }, { "epoch": 1.7827059857747942, "grad_norm": 1.4968341537795922, "learning_rate": 6.398551544978537e-07, "loss": 0.45643067359924316, "step": 11154 }, { "epoch": 1.7828658195476703, "grad_norm": 1.5585181699326855, "learning_rate": 6.389253672788754e-07, "loss": 0.519586980342865, "step": 11155 }, { "epoch": 1.7830256533205466, "grad_norm": 1.635455013631954, "learning_rate": 6.379962338041124e-07, "loss": 0.6045963764190674, "step": 11156 }, { "epoch": 1.783185487093423, "grad_norm": 1.5630634659827944, "learning_rate": 6.370677541384506e-07, "loss": 0.5849831104278564, "step": 11157 }, { "epoch": 1.783345320866299, "grad_norm": 1.6472058847197941, "learning_rate": 6.361399283467306e-07, "loss": 0.5396624207496643, "step": 11158 }, { "epoch": 1.7835051546391751, "grad_norm": 1.9391862157912962, "learning_rate": 6.352127564937493e-07, "loss": 0.5863741636276245, "step": 11159 }, { "epoch": 1.7836649884120515, "grad_norm": 1.6197833704853624, "learning_rate": 6.34286238644255e-07, "loss": 0.4398317039012909, "step": 11160 }, { "epoch": 1.7838248221849278, "grad_norm": 1.5578498432537546, "learning_rate": 6.333603748629558e-07, "loss": 0.5698070526123047, "step": 11161 }, { "epoch": 1.7839846559578039, "grad_norm": 1.6050553068211093, "learning_rate": 6.3243516521451e-07, "loss": 0.5937540531158447, "step": 11162 }, { "epoch": 1.78414448973068, "grad_norm": 1.7311360757478573, "learning_rate": 6.315106097635304e-07, "loss": 0.45083290338516235, "step": 11163 }, { "epoch": 1.7843043235035563, "grad_norm": 1.669640408834352, "learning_rate": 6.305867085745843e-07, "loss": 0.4362644553184509, "step": 11164 }, { "epoch": 1.7844641572764326, "grad_norm": 1.6064059790254617, "learning_rate": 6.296634617121944e-07, "loss": 0.5527745485305786, "step": 11165 }, { "epoch": 1.7846239910493087, "grad_norm": 1.469970558385619, "learning_rate": 6.287408692408348e-07, "loss": 0.51808762550354, "step": 11166 }, { "epoch": 1.7847838248221848, "grad_norm": 1.5061946298528102, "learning_rate": 6.278189312249395e-07, "loss": 0.4224925637245178, "step": 11167 }, { "epoch": 1.784943658595061, "grad_norm": 1.6355594162018314, "learning_rate": 6.268976477288912e-07, "loss": 0.5827100872993469, "step": 11168 }, { "epoch": 1.7851034923679374, "grad_norm": 1.956983266528325, "learning_rate": 6.2597701881703e-07, "loss": 0.5749928951263428, "step": 11169 }, { "epoch": 1.7852633261408135, "grad_norm": 1.6486368431142422, "learning_rate": 6.250570445536475e-07, "loss": 0.5385233759880066, "step": 11170 }, { "epoch": 1.7854231599136896, "grad_norm": 1.884830223846563, "learning_rate": 6.241377250029934e-07, "loss": 0.6334813833236694, "step": 11171 }, { "epoch": 1.785582993686566, "grad_norm": 1.7659768841613728, "learning_rate": 6.232190602292709e-07, "loss": 0.5496194958686829, "step": 11172 }, { "epoch": 1.7857428274594422, "grad_norm": 1.7849482694835317, "learning_rate": 6.223010502966331e-07, "loss": 0.5680419206619263, "step": 11173 }, { "epoch": 1.7859026612323183, "grad_norm": 1.5053053331257469, "learning_rate": 6.213836952691921e-07, "loss": 0.40670889616012573, "step": 11174 }, { "epoch": 1.7860624950051947, "grad_norm": 1.547200232534258, "learning_rate": 6.204669952110121e-07, "loss": 0.5154871344566345, "step": 11175 }, { "epoch": 1.786222328778071, "grad_norm": 1.5390288203885636, "learning_rate": 6.19550950186113e-07, "loss": 0.5314877033233643, "step": 11176 }, { "epoch": 1.786382162550947, "grad_norm": 1.4708064813430375, "learning_rate": 6.186355602584693e-07, "loss": 0.5672147274017334, "step": 11177 }, { "epoch": 1.7865419963238232, "grad_norm": 1.5691703583270928, "learning_rate": 6.177208254920064e-07, "loss": 0.5736850500106812, "step": 11178 }, { "epoch": 1.7867018300966995, "grad_norm": 1.907364883778568, "learning_rate": 6.168067459506066e-07, "loss": 0.5491394400596619, "step": 11179 }, { "epoch": 1.7868616638695758, "grad_norm": 1.7003027808471975, "learning_rate": 6.158933216981045e-07, "loss": 0.5512908697128296, "step": 11180 }, { "epoch": 1.7870214976424519, "grad_norm": 1.5712312143881406, "learning_rate": 6.149805527982921e-07, "loss": 0.5217008590698242, "step": 11181 }, { "epoch": 1.787181331415328, "grad_norm": 1.7362346130599369, "learning_rate": 6.140684393149155e-07, "loss": 0.4826219081878662, "step": 11182 }, { "epoch": 1.7873411651882043, "grad_norm": 1.4175858681446138, "learning_rate": 6.131569813116723e-07, "loss": 0.4496598243713379, "step": 11183 }, { "epoch": 1.7875009989610806, "grad_norm": 1.621556907480115, "learning_rate": 6.122461788522149e-07, "loss": 0.5408107042312622, "step": 11184 }, { "epoch": 1.7876608327339567, "grad_norm": 1.614581781556851, "learning_rate": 6.113360320001493e-07, "loss": 0.45584774017333984, "step": 11185 }, { "epoch": 1.7878206665068328, "grad_norm": 1.6408024531791607, "learning_rate": 6.104265408190401e-07, "loss": 0.6473221182823181, "step": 11186 }, { "epoch": 1.7879805002797091, "grad_norm": 1.8948352224141063, "learning_rate": 6.095177053724011e-07, "loss": 0.6042296886444092, "step": 11187 }, { "epoch": 1.7881403340525854, "grad_norm": 1.5997381284901486, "learning_rate": 6.086095257237023e-07, "loss": 0.5282765626907349, "step": 11188 }, { "epoch": 1.7883001678254615, "grad_norm": 1.6896996292732775, "learning_rate": 6.077020019363678e-07, "loss": 0.5566398501396179, "step": 11189 }, { "epoch": 1.7884600015983376, "grad_norm": 1.5920430061605402, "learning_rate": 6.067951340737732e-07, "loss": 0.4997178018093109, "step": 11190 }, { "epoch": 1.788619835371214, "grad_norm": 1.7231299948852525, "learning_rate": 6.058889221992559e-07, "loss": 0.5946904420852661, "step": 11191 }, { "epoch": 1.7887796691440903, "grad_norm": 1.512650291222144, "learning_rate": 6.049833663760995e-07, "loss": 0.487037718296051, "step": 11192 }, { "epoch": 1.7889395029169664, "grad_norm": 1.6846484677504834, "learning_rate": 6.040784666675459e-07, "loss": 0.6321935653686523, "step": 11193 }, { "epoch": 1.7890993366898424, "grad_norm": 1.5453551672436607, "learning_rate": 6.031742231367887e-07, "loss": 0.45694100856781006, "step": 11194 }, { "epoch": 1.7892591704627188, "grad_norm": 1.6853937137299022, "learning_rate": 6.022706358469776e-07, "loss": 0.6254055500030518, "step": 11195 }, { "epoch": 1.789419004235595, "grad_norm": 2.1639610582223403, "learning_rate": 6.013677048612165e-07, "loss": 0.5944439172744751, "step": 11196 }, { "epoch": 1.7895788380084712, "grad_norm": 1.5190462612449938, "learning_rate": 6.004654302425638e-07, "loss": 0.4897313714027405, "step": 11197 }, { "epoch": 1.7897386717813473, "grad_norm": 1.7161093006938613, "learning_rate": 5.995638120540292e-07, "loss": 0.5063251852989197, "step": 11198 }, { "epoch": 1.7898985055542236, "grad_norm": 1.594014920084273, "learning_rate": 5.986628503585778e-07, "loss": 0.44427555799484253, "step": 11199 }, { "epoch": 1.7900583393271, "grad_norm": 1.6300217187397807, "learning_rate": 5.977625452191315e-07, "loss": 0.4355146586894989, "step": 11200 }, { "epoch": 1.790218173099976, "grad_norm": 1.559764928254059, "learning_rate": 5.968628966985645e-07, "loss": 0.4955686926841736, "step": 11201 }, { "epoch": 1.790378006872852, "grad_norm": 1.5948644238291454, "learning_rate": 5.959639048597054e-07, "loss": 0.4325081706047058, "step": 11202 }, { "epoch": 1.7905378406457284, "grad_norm": 1.4543550080897631, "learning_rate": 5.950655697653363e-07, "loss": 0.4560660123825073, "step": 11203 }, { "epoch": 1.7906976744186047, "grad_norm": 1.3792731034922359, "learning_rate": 5.941678914781923e-07, "loss": 0.5400413870811462, "step": 11204 }, { "epoch": 1.7908575081914808, "grad_norm": 1.6167050155447895, "learning_rate": 5.932708700609635e-07, "loss": 0.586326003074646, "step": 11205 }, { "epoch": 1.791017341964357, "grad_norm": 1.5273538084294407, "learning_rate": 5.923745055762975e-07, "loss": 0.40254664421081543, "step": 11206 }, { "epoch": 1.7911771757372332, "grad_norm": 1.8542804889310018, "learning_rate": 5.91478798086792e-07, "loss": 0.5717087984085083, "step": 11207 }, { "epoch": 1.7913370095101095, "grad_norm": 1.7293281585139189, "learning_rate": 5.905837476549992e-07, "loss": 0.5076231956481934, "step": 11208 }, { "epoch": 1.7914968432829856, "grad_norm": 1.8202806565168608, "learning_rate": 5.896893543434279e-07, "loss": 0.49250268936157227, "step": 11209 }, { "epoch": 1.791656677055862, "grad_norm": 1.5891509236675818, "learning_rate": 5.88795618214536e-07, "loss": 0.45718514919281006, "step": 11210 }, { "epoch": 1.7918165108287383, "grad_norm": 1.7875353432841579, "learning_rate": 5.879025393307436e-07, "loss": 0.6971331834793091, "step": 11211 }, { "epoch": 1.7919763446016144, "grad_norm": 1.6734058515245136, "learning_rate": 5.870101177544174e-07, "loss": 0.5949270725250244, "step": 11212 }, { "epoch": 1.7921361783744905, "grad_norm": 1.386918494260235, "learning_rate": 5.861183535478809e-07, "loss": 0.5272997617721558, "step": 11213 }, { "epoch": 1.7922960121473668, "grad_norm": 2.0210351867751988, "learning_rate": 5.85227246773411e-07, "loss": 0.6624028086662292, "step": 11214 }, { "epoch": 1.792455845920243, "grad_norm": 1.5422402221389393, "learning_rate": 5.843367974932424e-07, "loss": 0.45971864461898804, "step": 11215 }, { "epoch": 1.7926156796931192, "grad_norm": 1.7209899634561765, "learning_rate": 5.834470057695585e-07, "loss": 0.5737494230270386, "step": 11216 }, { "epoch": 1.7927755134659953, "grad_norm": 1.5271666877693704, "learning_rate": 5.825578716645008e-07, "loss": 0.5191776752471924, "step": 11217 }, { "epoch": 1.7929353472388716, "grad_norm": 1.5244530482409917, "learning_rate": 5.816693952401609e-07, "loss": 0.4978950023651123, "step": 11218 }, { "epoch": 1.793095181011748, "grad_norm": 1.5672276512476149, "learning_rate": 5.807815765585878e-07, "loss": 0.7117917537689209, "step": 11219 }, { "epoch": 1.793255014784624, "grad_norm": 1.7294308232942912, "learning_rate": 5.798944156817842e-07, "loss": 0.5413233041763306, "step": 11220 }, { "epoch": 1.7934148485575, "grad_norm": 1.6351788856625142, "learning_rate": 5.790079126717063e-07, "loss": 0.5499802231788635, "step": 11221 }, { "epoch": 1.7935746823303764, "grad_norm": 1.3455745618587032, "learning_rate": 5.781220675902654e-07, "loss": 0.500529408454895, "step": 11222 }, { "epoch": 1.7937345161032527, "grad_norm": 1.57474915434128, "learning_rate": 5.772368804993234e-07, "loss": 0.6150583028793335, "step": 11223 }, { "epoch": 1.7938943498761288, "grad_norm": 1.5070543889417622, "learning_rate": 5.763523514607005e-07, "loss": 0.5424101948738098, "step": 11224 }, { "epoch": 1.794054183649005, "grad_norm": 1.6071518788322823, "learning_rate": 5.754684805361665e-07, "loss": 0.597252607345581, "step": 11225 }, { "epoch": 1.7942140174218812, "grad_norm": 1.6893102501926545, "learning_rate": 5.745852677874519e-07, "loss": 0.644176721572876, "step": 11226 }, { "epoch": 1.7943738511947576, "grad_norm": 1.3521055740491492, "learning_rate": 5.737027132762341e-07, "loss": 0.5035462975502014, "step": 11227 }, { "epoch": 1.7945336849676337, "grad_norm": 1.4710201079497751, "learning_rate": 5.728208170641481e-07, "loss": 0.5408308506011963, "step": 11228 }, { "epoch": 1.7946935187405098, "grad_norm": 1.502819480643193, "learning_rate": 5.719395792127813e-07, "loss": 0.4124152660369873, "step": 11229 }, { "epoch": 1.794853352513386, "grad_norm": 1.665015443472107, "learning_rate": 5.710589997836791e-07, "loss": 0.40411049127578735, "step": 11230 }, { "epoch": 1.7950131862862624, "grad_norm": 1.5544809769957406, "learning_rate": 5.701790788383355e-07, "loss": 0.5123003721237183, "step": 11231 }, { "epoch": 1.7951730200591385, "grad_norm": 1.7716790132335107, "learning_rate": 5.692998164382036e-07, "loss": 0.5763553977012634, "step": 11232 }, { "epoch": 1.7953328538320146, "grad_norm": 1.703575449424239, "learning_rate": 5.684212126446853e-07, "loss": 0.48957550525665283, "step": 11233 }, { "epoch": 1.795492687604891, "grad_norm": 1.461228197227325, "learning_rate": 5.675432675191395e-07, "loss": 0.6526942849159241, "step": 11234 }, { "epoch": 1.7956525213777672, "grad_norm": 1.8698883578396759, "learning_rate": 5.666659811228803e-07, "loss": 0.5949618816375732, "step": 11235 }, { "epoch": 1.7958123551506433, "grad_norm": 1.8431754889279166, "learning_rate": 5.657893535171744e-07, "loss": 0.58609938621521, "step": 11236 }, { "epoch": 1.7959721889235194, "grad_norm": 1.6697634374852528, "learning_rate": 5.649133847632405e-07, "loss": 0.553336501121521, "step": 11237 }, { "epoch": 1.7961320226963957, "grad_norm": 1.4490623106116698, "learning_rate": 5.640380749222541e-07, "loss": 0.6336069107055664, "step": 11238 }, { "epoch": 1.796291856469272, "grad_norm": 1.521749788822184, "learning_rate": 5.63163424055343e-07, "loss": 0.5377898216247559, "step": 11239 }, { "epoch": 1.7964516902421481, "grad_norm": 1.619252877999134, "learning_rate": 5.622894322235895e-07, "loss": 0.6229879856109619, "step": 11240 }, { "epoch": 1.7966115240150242, "grad_norm": 1.8893243535122037, "learning_rate": 5.614160994880336e-07, "loss": 0.5533014535903931, "step": 11241 }, { "epoch": 1.7967713577879005, "grad_norm": 1.5819731827444257, "learning_rate": 5.605434259096621e-07, "loss": 0.512263298034668, "step": 11242 }, { "epoch": 1.7969311915607769, "grad_norm": 1.8520868873684069, "learning_rate": 5.596714115494217e-07, "loss": 0.4657937288284302, "step": 11243 }, { "epoch": 1.797091025333653, "grad_norm": 1.3772931976642533, "learning_rate": 5.588000564682083e-07, "loss": 0.525922954082489, "step": 11244 }, { "epoch": 1.7972508591065293, "grad_norm": 1.643489646774688, "learning_rate": 5.579293607268755e-07, "loss": 0.6375048160552979, "step": 11245 }, { "epoch": 1.7974106928794056, "grad_norm": 1.4466402468662023, "learning_rate": 5.570593243862298e-07, "loss": 0.5396299958229065, "step": 11246 }, { "epoch": 1.7975705266522817, "grad_norm": 1.735373141606908, "learning_rate": 5.56189947507032e-07, "loss": 0.5697156190872192, "step": 11247 }, { "epoch": 1.7977303604251578, "grad_norm": 1.6801865130894245, "learning_rate": 5.553212301499955e-07, "loss": 0.5475493669509888, "step": 11248 }, { "epoch": 1.797890194198034, "grad_norm": 1.7123036066343937, "learning_rate": 5.544531723757862e-07, "loss": 0.46853992342948914, "step": 11249 }, { "epoch": 1.7980500279709104, "grad_norm": 1.520881138735597, "learning_rate": 5.535857742450312e-07, "loss": 0.4621898829936981, "step": 11250 }, { "epoch": 1.7982098617437865, "grad_norm": 2.13596898277481, "learning_rate": 5.527190358183032e-07, "loss": 0.6830334663391113, "step": 11251 }, { "epoch": 1.7983696955166626, "grad_norm": 1.4942087942018074, "learning_rate": 5.518529571561326e-07, "loss": 0.4464251399040222, "step": 11252 }, { "epoch": 1.798529529289539, "grad_norm": 1.4346689885166828, "learning_rate": 5.509875383190033e-07, "loss": 0.4135960340499878, "step": 11253 }, { "epoch": 1.7986893630624152, "grad_norm": 1.6096807728856415, "learning_rate": 5.501227793673524e-07, "loss": 0.5250100493431091, "step": 11254 }, { "epoch": 1.7988491968352913, "grad_norm": 1.8034470445343806, "learning_rate": 5.49258680361573e-07, "loss": 0.554625391960144, "step": 11255 }, { "epoch": 1.7990090306081674, "grad_norm": 1.6864943406832877, "learning_rate": 5.483952413620109e-07, "loss": 0.5212705135345459, "step": 11256 }, { "epoch": 1.7991688643810437, "grad_norm": 1.804308178011986, "learning_rate": 5.475324624289647e-07, "loss": 0.5817710161209106, "step": 11257 }, { "epoch": 1.79932869815392, "grad_norm": 1.6942673590609745, "learning_rate": 5.466703436226873e-07, "loss": 0.6241973638534546, "step": 11258 }, { "epoch": 1.7994885319267961, "grad_norm": 1.8016977596397505, "learning_rate": 5.458088850033849e-07, "loss": 0.6572762727737427, "step": 11259 }, { "epoch": 1.7996483656996722, "grad_norm": 1.6978245674544645, "learning_rate": 5.449480866312218e-07, "loss": 0.703282356262207, "step": 11260 }, { "epoch": 1.7998081994725486, "grad_norm": 1.5059318582051398, "learning_rate": 5.44087948566312e-07, "loss": 0.43663492798805237, "step": 11261 }, { "epoch": 1.7999680332454249, "grad_norm": 1.6516051083302963, "learning_rate": 5.432284708687241e-07, "loss": 0.4955545663833618, "step": 11262 }, { "epoch": 1.800127867018301, "grad_norm": 1.6760058186095483, "learning_rate": 5.423696535984813e-07, "loss": 0.5187241435050964, "step": 11263 }, { "epoch": 1.800287700791177, "grad_norm": 1.6754623353728002, "learning_rate": 5.415114968155588e-07, "loss": 0.5074775218963623, "step": 11264 }, { "epoch": 1.8004475345640534, "grad_norm": 1.6856366261659042, "learning_rate": 5.406540005798899e-07, "loss": 0.4486734867095947, "step": 11265 }, { "epoch": 1.8006073683369297, "grad_norm": 1.5653093005828482, "learning_rate": 5.397971649513589e-07, "loss": 0.5061945915222168, "step": 11266 }, { "epoch": 1.8007672021098058, "grad_norm": 1.5439583684664189, "learning_rate": 5.389409899898013e-07, "loss": 0.5331987142562866, "step": 11267 }, { "epoch": 1.8009270358826819, "grad_norm": 1.370410005128218, "learning_rate": 5.380854757550125e-07, "loss": 0.4321933686733246, "step": 11268 }, { "epoch": 1.8010868696555582, "grad_norm": 1.7557449319251677, "learning_rate": 5.372306223067347e-07, "loss": 0.5806416273117065, "step": 11269 }, { "epoch": 1.8012467034284345, "grad_norm": 1.4064340543184537, "learning_rate": 5.363764297046714e-07, "loss": 0.5922657251358032, "step": 11270 }, { "epoch": 1.8014065372013106, "grad_norm": 1.6039311157642857, "learning_rate": 5.355228980084759e-07, "loss": 0.5651657581329346, "step": 11271 }, { "epoch": 1.8015663709741867, "grad_norm": 1.5915107461766684, "learning_rate": 5.34670027277755e-07, "loss": 0.5517882704734802, "step": 11272 }, { "epoch": 1.801726204747063, "grad_norm": 1.5720994730765117, "learning_rate": 5.338178175720699e-07, "loss": 0.5887646675109863, "step": 11273 }, { "epoch": 1.8018860385199393, "grad_norm": 1.5755705525804307, "learning_rate": 5.329662689509351e-07, "loss": 0.500206470489502, "step": 11274 }, { "epoch": 1.8020458722928154, "grad_norm": 1.6977375814003484, "learning_rate": 5.321153814738222e-07, "loss": 0.4679844379425049, "step": 11275 }, { "epoch": 1.8022057060656915, "grad_norm": 1.4703898667989808, "learning_rate": 5.312651552001536e-07, "loss": 0.5666332244873047, "step": 11276 }, { "epoch": 1.8023655398385678, "grad_norm": 1.8048645565521266, "learning_rate": 5.304155901893049e-07, "loss": 0.5295791625976562, "step": 11277 }, { "epoch": 1.8025253736114442, "grad_norm": 1.3353515135081273, "learning_rate": 5.295666865006055e-07, "loss": 0.5061853528022766, "step": 11278 }, { "epoch": 1.8026852073843203, "grad_norm": 1.378497252581226, "learning_rate": 5.287184441933435e-07, "loss": 0.4368169903755188, "step": 11279 }, { "epoch": 1.8028450411571966, "grad_norm": 1.4644343771586754, "learning_rate": 5.278708633267549e-07, "loss": 0.5483599901199341, "step": 11280 }, { "epoch": 1.8030048749300729, "grad_norm": 1.5937894710400686, "learning_rate": 5.2702394396003e-07, "loss": 0.5224674344062805, "step": 11281 }, { "epoch": 1.803164708702949, "grad_norm": 1.5769245348516474, "learning_rate": 5.261776861523182e-07, "loss": 0.5603746175765991, "step": 11282 }, { "epoch": 1.803324542475825, "grad_norm": 1.6944363525338164, "learning_rate": 5.253320899627179e-07, "loss": 0.6386314630508423, "step": 11283 }, { "epoch": 1.8034843762487014, "grad_norm": 1.8329631224337424, "learning_rate": 5.244871554502806e-07, "loss": 0.4662055969238281, "step": 11284 }, { "epoch": 1.8036442100215777, "grad_norm": 1.8003427510633603, "learning_rate": 5.23642882674017e-07, "loss": 0.4832000732421875, "step": 11285 }, { "epoch": 1.8038040437944538, "grad_norm": 1.496902002434094, "learning_rate": 5.227992716928853e-07, "loss": 0.5091870427131653, "step": 11286 }, { "epoch": 1.80396387756733, "grad_norm": 1.6004777746248788, "learning_rate": 5.21956322565802e-07, "loss": 0.5277557969093323, "step": 11287 }, { "epoch": 1.8041237113402062, "grad_norm": 1.6913234744141277, "learning_rate": 5.211140353516353e-07, "loss": 0.4941632151603699, "step": 11288 }, { "epoch": 1.8042835451130825, "grad_norm": 1.545802850415069, "learning_rate": 5.202724101092049e-07, "loss": 0.5543164014816284, "step": 11289 }, { "epoch": 1.8044433788859586, "grad_norm": 1.5288076691367674, "learning_rate": 5.194314468972905e-07, "loss": 0.5417066812515259, "step": 11290 }, { "epoch": 1.8046032126588347, "grad_norm": 1.5895762538691, "learning_rate": 5.185911457746207e-07, "loss": 0.42321932315826416, "step": 11291 }, { "epoch": 1.804763046431711, "grad_norm": 1.7539061045321636, "learning_rate": 5.177515067998795e-07, "loss": 0.6132612228393555, "step": 11292 }, { "epoch": 1.8049228802045874, "grad_norm": 1.432363320012687, "learning_rate": 5.169125300317024e-07, "loss": 0.39675793051719666, "step": 11293 }, { "epoch": 1.8050827139774634, "grad_norm": 1.5473632094530148, "learning_rate": 5.160742155286824e-07, "loss": 0.554604172706604, "step": 11294 }, { "epoch": 1.8052425477503395, "grad_norm": 1.5620690867135636, "learning_rate": 5.15236563349365e-07, "loss": 0.5495213270187378, "step": 11295 }, { "epoch": 1.8054023815232159, "grad_norm": 1.7997821944642862, "learning_rate": 5.143995735522478e-07, "loss": 0.6976812481880188, "step": 11296 }, { "epoch": 1.8055622152960922, "grad_norm": 1.637352671807646, "learning_rate": 5.135632461957841e-07, "loss": 0.534028172492981, "step": 11297 }, { "epoch": 1.8057220490689683, "grad_norm": 1.6826655231525378, "learning_rate": 5.127275813383769e-07, "loss": 0.5858889222145081, "step": 11298 }, { "epoch": 1.8058818828418444, "grad_norm": 1.4940038228859895, "learning_rate": 5.1189257903839e-07, "loss": 0.5327097773551941, "step": 11299 }, { "epoch": 1.8060417166147207, "grad_norm": 1.8445086589548, "learning_rate": 5.110582393541353e-07, "loss": 0.5265674591064453, "step": 11300 }, { "epoch": 1.806201550387597, "grad_norm": 1.474190702803114, "learning_rate": 5.102245623438784e-07, "loss": 0.5059071183204651, "step": 11301 }, { "epoch": 1.806361384160473, "grad_norm": 1.4954251434117214, "learning_rate": 5.09391548065844e-07, "loss": 0.46030572056770325, "step": 11302 }, { "epoch": 1.8065212179333492, "grad_norm": 1.7004075791588158, "learning_rate": 5.085591965782055e-07, "loss": 0.5925561785697937, "step": 11303 }, { "epoch": 1.8066810517062255, "grad_norm": 1.6308254616054199, "learning_rate": 5.077275079390897e-07, "loss": 0.5990835428237915, "step": 11304 }, { "epoch": 1.8068408854791018, "grad_norm": 1.6750758167190565, "learning_rate": 5.068964822065802e-07, "loss": 0.6922260522842407, "step": 11305 }, { "epoch": 1.807000719251978, "grad_norm": 1.5090681162143886, "learning_rate": 5.060661194387129e-07, "loss": 0.5467685461044312, "step": 11306 }, { "epoch": 1.807160553024854, "grad_norm": 1.4417606930914082, "learning_rate": 5.052364196934779e-07, "loss": 0.5170353651046753, "step": 11307 }, { "epoch": 1.8073203867977303, "grad_norm": 1.514999142843172, "learning_rate": 5.044073830288176e-07, "loss": 0.5882208347320557, "step": 11308 }, { "epoch": 1.8074802205706066, "grad_norm": 1.6878282702364855, "learning_rate": 5.035790095026271e-07, "loss": 0.5677100419998169, "step": 11309 }, { "epoch": 1.8076400543434827, "grad_norm": 1.6462252395516435, "learning_rate": 5.027512991727612e-07, "loss": 0.662102460861206, "step": 11310 }, { "epoch": 1.8077998881163588, "grad_norm": 1.7136603408798698, "learning_rate": 5.019242520970214e-07, "loss": 0.5634609460830688, "step": 11311 }, { "epoch": 1.8079597218892351, "grad_norm": 1.8599474691980764, "learning_rate": 5.010978683331669e-07, "loss": 0.5350023508071899, "step": 11312 }, { "epoch": 1.8081195556621115, "grad_norm": 1.5494561182140494, "learning_rate": 5.002721479389072e-07, "loss": 0.5480669736862183, "step": 11313 }, { "epoch": 1.8082793894349876, "grad_norm": 1.4996775553206447, "learning_rate": 4.994470909719118e-07, "loss": 0.5268793106079102, "step": 11314 }, { "epoch": 1.8084392232078637, "grad_norm": 1.9622211570302956, "learning_rate": 4.986226974897967e-07, "loss": 0.5812004804611206, "step": 11315 }, { "epoch": 1.8085990569807402, "grad_norm": 1.517502418828406, "learning_rate": 4.977989675501349e-07, "loss": 0.4844397306442261, "step": 11316 }, { "epoch": 1.8087588907536163, "grad_norm": 1.6256555377020216, "learning_rate": 4.969759012104536e-07, "loss": 0.5050321817398071, "step": 11317 }, { "epoch": 1.8089187245264924, "grad_norm": 1.7024901662523433, "learning_rate": 4.961534985282312e-07, "loss": 0.6360176205635071, "step": 11318 }, { "epoch": 1.8090785582993687, "grad_norm": 1.607268463019725, "learning_rate": 4.953317595609042e-07, "loss": 0.633895754814148, "step": 11319 }, { "epoch": 1.809238392072245, "grad_norm": 1.6360723024619646, "learning_rate": 4.945106843658576e-07, "loss": 0.5144848823547363, "step": 11320 }, { "epoch": 1.809398225845121, "grad_norm": 1.7112531122735029, "learning_rate": 4.936902730004323e-07, "loss": 0.48726651072502136, "step": 11321 }, { "epoch": 1.8095580596179972, "grad_norm": 1.4707675123982709, "learning_rate": 4.928705255219246e-07, "loss": 0.5528602600097656, "step": 11322 }, { "epoch": 1.8097178933908735, "grad_norm": 1.69971602022626, "learning_rate": 4.920514419875821e-07, "loss": 0.444627970457077, "step": 11323 }, { "epoch": 1.8098777271637498, "grad_norm": 1.5514791962347518, "learning_rate": 4.912330224546057e-07, "loss": 0.47591546177864075, "step": 11324 }, { "epoch": 1.810037560936626, "grad_norm": 1.6507882923584942, "learning_rate": 4.904152669801521e-07, "loss": 0.4394541382789612, "step": 11325 }, { "epoch": 1.810197394709502, "grad_norm": 1.7067942470540842, "learning_rate": 4.895981756213308e-07, "loss": 0.6258956789970398, "step": 11326 }, { "epoch": 1.8103572284823783, "grad_norm": 1.536823255594741, "learning_rate": 4.887817484352031e-07, "loss": 0.5626389980316162, "step": 11327 }, { "epoch": 1.8105170622552547, "grad_norm": 1.8325546739656497, "learning_rate": 4.879659854787855e-07, "loss": 0.6004582643508911, "step": 11328 }, { "epoch": 1.8106768960281308, "grad_norm": 1.564033364443968, "learning_rate": 4.871508868090491e-07, "loss": 0.6058835983276367, "step": 11329 }, { "epoch": 1.8108367298010069, "grad_norm": 1.4157902804244713, "learning_rate": 4.863364524829173e-07, "loss": 0.5636794567108154, "step": 11330 }, { "epoch": 1.8109965635738832, "grad_norm": 1.357193521191464, "learning_rate": 4.855226825572667e-07, "loss": 0.5377250909805298, "step": 11331 }, { "epoch": 1.8111563973467595, "grad_norm": 1.8613141447774955, "learning_rate": 4.847095770889287e-07, "loss": 0.5604251623153687, "step": 11332 }, { "epoch": 1.8113162311196356, "grad_norm": 1.6184954368703446, "learning_rate": 4.838971361346856e-07, "loss": 0.5698130130767822, "step": 11333 }, { "epoch": 1.8114760648925117, "grad_norm": 1.7081215271349999, "learning_rate": 4.830853597512797e-07, "loss": 0.6797983646392822, "step": 11334 }, { "epoch": 1.811635898665388, "grad_norm": 1.7650824449147908, "learning_rate": 4.82274247995399e-07, "loss": 0.49862736463546753, "step": 11335 }, { "epoch": 1.8117957324382643, "grad_norm": 1.626237992077885, "learning_rate": 4.814638009236894e-07, "loss": 0.5999681949615479, "step": 11336 }, { "epoch": 1.8119555662111404, "grad_norm": 1.6606441855852454, "learning_rate": 4.806540185927499e-07, "loss": 0.6231723427772522, "step": 11337 }, { "epoch": 1.8121153999840165, "grad_norm": 1.5249387619720605, "learning_rate": 4.798449010591322e-07, "loss": 0.5081929564476013, "step": 11338 }, { "epoch": 1.8122752337568928, "grad_norm": 1.7800225679293495, "learning_rate": 4.79036448379343e-07, "loss": 0.6651811003684998, "step": 11339 }, { "epoch": 1.8124350675297691, "grad_norm": 1.733515657506561, "learning_rate": 4.78228660609843e-07, "loss": 0.5540943145751953, "step": 11340 }, { "epoch": 1.8125949013026452, "grad_norm": 1.6238364105768168, "learning_rate": 4.774215378070413e-07, "loss": 0.5653183460235596, "step": 11341 }, { "epoch": 1.8127547350755213, "grad_norm": 1.5286001355693664, "learning_rate": 4.766150800273095e-07, "loss": 0.4779297709465027, "step": 11342 }, { "epoch": 1.8129145688483976, "grad_norm": 1.614362735558569, "learning_rate": 4.7580928732696375e-07, "loss": 0.48906004428863525, "step": 11343 }, { "epoch": 1.813074402621274, "grad_norm": 1.6095565633446607, "learning_rate": 4.750041597622801e-07, "loss": 0.49540165066719055, "step": 11344 }, { "epoch": 1.81323423639415, "grad_norm": 1.5170561113163117, "learning_rate": 4.7419969738948466e-07, "loss": 0.5651788711547852, "step": 11345 }, { "epoch": 1.8133940701670261, "grad_norm": 1.6002335726391785, "learning_rate": 4.7339590026475925e-07, "loss": 0.52085280418396, "step": 11346 }, { "epoch": 1.8135539039399025, "grad_norm": 1.6848812866853968, "learning_rate": 4.725927684442366e-07, "loss": 0.5827236771583557, "step": 11347 }, { "epoch": 1.8137137377127788, "grad_norm": 1.7263224292799957, "learning_rate": 4.717903019840053e-07, "loss": 0.5506642460823059, "step": 11348 }, { "epoch": 1.8138735714856549, "grad_norm": 1.735084951107783, "learning_rate": 4.709885009401083e-07, "loss": 0.49420449137687683, "step": 11349 }, { "epoch": 1.814033405258531, "grad_norm": 1.818841829995756, "learning_rate": 4.701873653685385e-07, "loss": 0.5256140828132629, "step": 11350 }, { "epoch": 1.8141932390314075, "grad_norm": 1.780716119682571, "learning_rate": 4.693868953252445e-07, "loss": 0.7027173638343811, "step": 11351 }, { "epoch": 1.8143530728042836, "grad_norm": 1.925400766187087, "learning_rate": 4.6858709086612943e-07, "loss": 0.6779196262359619, "step": 11352 }, { "epoch": 1.8145129065771597, "grad_norm": 1.3197327581885505, "learning_rate": 4.677879520470474e-07, "loss": 0.4197908937931061, "step": 11353 }, { "epoch": 1.814672740350036, "grad_norm": 1.763599979040973, "learning_rate": 4.6698947892380943e-07, "loss": 0.5744268894195557, "step": 11354 }, { "epoch": 1.8148325741229123, "grad_norm": 1.7443502067191747, "learning_rate": 4.661916715521764e-07, "loss": 0.6885097026824951, "step": 11355 }, { "epoch": 1.8149924078957884, "grad_norm": 1.8127067146817426, "learning_rate": 4.653945299878648e-07, "loss": 0.5230522155761719, "step": 11356 }, { "epoch": 1.8151522416686645, "grad_norm": 1.971901552210049, "learning_rate": 4.6459805428654246e-07, "loss": 0.5788441896438599, "step": 11357 }, { "epoch": 1.8153120754415408, "grad_norm": 1.6530076392608326, "learning_rate": 4.6380224450383595e-07, "loss": 0.5401731729507446, "step": 11358 }, { "epoch": 1.8154719092144171, "grad_norm": 1.7512480234784473, "learning_rate": 4.6300710069531964e-07, "loss": 0.558532178401947, "step": 11359 }, { "epoch": 1.8156317429872932, "grad_norm": 1.9046615291259048, "learning_rate": 4.622126229165247e-07, "loss": 0.5292921662330627, "step": 11360 }, { "epoch": 1.8157915767601693, "grad_norm": 2.0221700520860066, "learning_rate": 4.6141881122293117e-07, "loss": 0.5591013431549072, "step": 11361 }, { "epoch": 1.8159514105330457, "grad_norm": 1.4381342303772013, "learning_rate": 4.606256656699815e-07, "loss": 0.5187528729438782, "step": 11362 }, { "epoch": 1.816111244305922, "grad_norm": 1.5569923413377085, "learning_rate": 4.598331863130612e-07, "loss": 0.4510003328323364, "step": 11363 }, { "epoch": 1.816271078078798, "grad_norm": 1.9953712676360154, "learning_rate": 4.5904137320751716e-07, "loss": 0.739066481590271, "step": 11364 }, { "epoch": 1.8164309118516742, "grad_norm": 1.6633175623683216, "learning_rate": 4.582502264086464e-07, "loss": 0.47025901079177856, "step": 11365 }, { "epoch": 1.8165907456245505, "grad_norm": 1.721149911131206, "learning_rate": 4.5745974597169894e-07, "loss": 0.45879048109054565, "step": 11366 }, { "epoch": 1.8167505793974268, "grad_norm": 1.704269560569586, "learning_rate": 4.566699319518808e-07, "loss": 0.46215176582336426, "step": 11367 }, { "epoch": 1.8169104131703029, "grad_norm": 1.6220304677438009, "learning_rate": 4.558807844043467e-07, "loss": 0.4826759696006775, "step": 11368 }, { "epoch": 1.817070246943179, "grad_norm": 1.5879640728792437, "learning_rate": 4.550923033842103e-07, "loss": 0.537514328956604, "step": 11369 }, { "epoch": 1.8172300807160553, "grad_norm": 1.5090653813365609, "learning_rate": 4.5430448894653646e-07, "loss": 0.3586037755012512, "step": 11370 }, { "epoch": 1.8173899144889316, "grad_norm": 1.570240189326066, "learning_rate": 4.535173411463423e-07, "loss": 0.5415767431259155, "step": 11371 }, { "epoch": 1.8175497482618077, "grad_norm": 1.8697913592699207, "learning_rate": 4.5273086003860046e-07, "loss": 0.4888320565223694, "step": 11372 }, { "epoch": 1.8177095820346838, "grad_norm": 1.4958547675328886, "learning_rate": 4.519450456782337e-07, "loss": 0.5267441272735596, "step": 11373 }, { "epoch": 1.8178694158075601, "grad_norm": 1.8241882102042595, "learning_rate": 4.5115989812012264e-07, "loss": 0.6340227127075195, "step": 11374 }, { "epoch": 1.8180292495804364, "grad_norm": 1.8500643702697828, "learning_rate": 4.503754174191e-07, "loss": 0.553464412689209, "step": 11375 }, { "epoch": 1.8181890833533125, "grad_norm": 1.7254895866802273, "learning_rate": 4.495916036299486e-07, "loss": 0.4464873671531677, "step": 11376 }, { "epoch": 1.8183489171261886, "grad_norm": 1.6372932903241029, "learning_rate": 4.48808456807408e-07, "loss": 0.5796486139297485, "step": 11377 }, { "epoch": 1.818508750899065, "grad_norm": 1.763449311070064, "learning_rate": 4.480259770061712e-07, "loss": 0.6048157215118408, "step": 11378 }, { "epoch": 1.8186685846719413, "grad_norm": 1.4449844020998652, "learning_rate": 4.472441642808845e-07, "loss": 0.43797942996025085, "step": 11379 }, { "epoch": 1.8188284184448174, "grad_norm": 1.5371326958030422, "learning_rate": 4.4646301868614516e-07, "loss": 0.46607792377471924, "step": 11380 }, { "epoch": 1.8189882522176934, "grad_norm": 1.6703176473476538, "learning_rate": 4.456825402765053e-07, "loss": 0.5712785720825195, "step": 11381 }, { "epoch": 1.8191480859905698, "grad_norm": 1.2533311067626725, "learning_rate": 4.4490272910647227e-07, "loss": 0.4422059655189514, "step": 11382 }, { "epoch": 1.819307919763446, "grad_norm": 1.6627337580728414, "learning_rate": 4.441235852305037e-07, "loss": 0.5612835884094238, "step": 11383 }, { "epoch": 1.8194677535363222, "grad_norm": 1.5166421216982904, "learning_rate": 4.4334510870301495e-07, "loss": 0.5710576772689819, "step": 11384 }, { "epoch": 1.8196275873091983, "grad_norm": 1.6411932674348786, "learning_rate": 4.4256729957837143e-07, "loss": 0.5558493733406067, "step": 11385 }, { "epoch": 1.8197874210820748, "grad_norm": 1.4893971214439305, "learning_rate": 4.417901579108896e-07, "loss": 0.6452356576919556, "step": 11386 }, { "epoch": 1.819947254854951, "grad_norm": 1.700384711612542, "learning_rate": 4.410136837548462e-07, "loss": 0.5130923390388489, "step": 11387 }, { "epoch": 1.820107088627827, "grad_norm": 1.5445411543839958, "learning_rate": 4.402378771644633e-07, "loss": 0.4529874324798584, "step": 11388 }, { "epoch": 1.8202669224007033, "grad_norm": 1.5926488909621053, "learning_rate": 4.3946273819392314e-07, "loss": 0.43067699670791626, "step": 11389 }, { "epoch": 1.8204267561735796, "grad_norm": 1.5771092271219602, "learning_rate": 4.386882668973591e-07, "loss": 0.4660922884941101, "step": 11390 }, { "epoch": 1.8205865899464557, "grad_norm": 1.4911105849933703, "learning_rate": 4.379144633288568e-07, "loss": 0.5787100195884705, "step": 11391 }, { "epoch": 1.8207464237193318, "grad_norm": 1.7167865421659287, "learning_rate": 4.371413275424541e-07, "loss": 0.5733020901679993, "step": 11392 }, { "epoch": 1.8209062574922081, "grad_norm": 1.4911473513676294, "learning_rate": 4.363688595921467e-07, "loss": 0.5309166312217712, "step": 11393 }, { "epoch": 1.8210660912650845, "grad_norm": 1.4096223102817829, "learning_rate": 4.3559705953188035e-07, "loss": 0.5391974449157715, "step": 11394 }, { "epoch": 1.8212259250379605, "grad_norm": 1.507224516505797, "learning_rate": 4.348259274155542e-07, "loss": 0.47207802534103394, "step": 11395 }, { "epoch": 1.8213857588108366, "grad_norm": 1.6592601906505673, "learning_rate": 4.3405546329702175e-07, "loss": 0.6165401935577393, "step": 11396 }, { "epoch": 1.821545592583713, "grad_norm": 1.6553909753175455, "learning_rate": 4.332856672300889e-07, "loss": 0.6574395298957825, "step": 11397 }, { "epoch": 1.8217054263565893, "grad_norm": 1.5995905907567354, "learning_rate": 4.32516539268516e-07, "loss": 0.5061655640602112, "step": 11398 }, { "epoch": 1.8218652601294654, "grad_norm": 1.7064102326933979, "learning_rate": 4.3174807946601673e-07, "loss": 0.5726593732833862, "step": 11399 }, { "epoch": 1.8220250939023415, "grad_norm": 3.388762195498476, "learning_rate": 4.30980287876257e-07, "loss": 0.6281687617301941, "step": 11400 }, { "epoch": 1.8221849276752178, "grad_norm": 1.6731892151532755, "learning_rate": 4.302131645528551e-07, "loss": 0.6046589612960815, "step": 11401 }, { "epoch": 1.822344761448094, "grad_norm": 1.5603558235293782, "learning_rate": 4.2944670954938816e-07, "loss": 0.5116190910339355, "step": 11402 }, { "epoch": 1.8225045952209702, "grad_norm": 1.6590651057280783, "learning_rate": 4.286809229193778e-07, "loss": 0.5973318815231323, "step": 11403 }, { "epoch": 1.8226644289938463, "grad_norm": 1.4249905794718614, "learning_rate": 4.2791580471630787e-07, "loss": 0.41867300868034363, "step": 11404 }, { "epoch": 1.8228242627667226, "grad_norm": 1.5283257378015407, "learning_rate": 4.2715135499361014e-07, "loss": 0.5887005925178528, "step": 11405 }, { "epoch": 1.822984096539599, "grad_norm": 1.7659244889328318, "learning_rate": 4.263875738046719e-07, "loss": 0.6064635515213013, "step": 11406 }, { "epoch": 1.823143930312475, "grad_norm": 1.7589611655949693, "learning_rate": 4.256244612028293e-07, "loss": 0.565163254737854, "step": 11407 }, { "epoch": 1.823303764085351, "grad_norm": 1.8738075448424383, "learning_rate": 4.248620172413798e-07, "loss": 0.5561754107475281, "step": 11408 }, { "epoch": 1.8234635978582274, "grad_norm": 1.628893006848998, "learning_rate": 4.2410024197356735e-07, "loss": 0.556196391582489, "step": 11409 }, { "epoch": 1.8236234316311037, "grad_norm": 1.5592959616176414, "learning_rate": 4.2333913545259285e-07, "loss": 0.6142030954360962, "step": 11410 }, { "epoch": 1.8237832654039798, "grad_norm": 1.7062458554785234, "learning_rate": 4.225786977316093e-07, "loss": 0.6762536764144897, "step": 11411 }, { "epoch": 1.823943099176856, "grad_norm": 1.5432427204609342, "learning_rate": 4.218189288637209e-07, "loss": 0.48064282536506653, "step": 11412 }, { "epoch": 1.8241029329497322, "grad_norm": 1.8619257371834144, "learning_rate": 4.210598289019896e-07, "loss": 0.5518168807029724, "step": 11413 }, { "epoch": 1.8242627667226086, "grad_norm": 1.6250383754477893, "learning_rate": 4.2030139789942745e-07, "loss": 0.6855350732803345, "step": 11414 }, { "epoch": 1.8244226004954847, "grad_norm": 1.5363868086218961, "learning_rate": 4.1954363590899993e-07, "loss": 0.49707138538360596, "step": 11415 }, { "epoch": 1.8245824342683608, "grad_norm": 1.624062428342588, "learning_rate": 4.18786542983628e-07, "loss": 0.5718851089477539, "step": 11416 }, { "epoch": 1.824742268041237, "grad_norm": 1.8582162842874328, "learning_rate": 4.1803011917618153e-07, "loss": 0.6941666603088379, "step": 11417 }, { "epoch": 1.8249021018141134, "grad_norm": 1.9345850706340593, "learning_rate": 4.1727436453948945e-07, "loss": 0.5772736072540283, "step": 11418 }, { "epoch": 1.8250619355869895, "grad_norm": 1.6409132975189578, "learning_rate": 4.165192791263295e-07, "loss": 0.5440274477005005, "step": 11419 }, { "epoch": 1.8252217693598656, "grad_norm": 1.6087070576949931, "learning_rate": 4.157648629894351e-07, "loss": 0.6431235074996948, "step": 11420 }, { "epoch": 1.8253816031327421, "grad_norm": 1.7791340610021205, "learning_rate": 4.150111161814896e-07, "loss": 0.4767165184020996, "step": 11421 }, { "epoch": 1.8255414369056182, "grad_norm": 1.5209201834845585, "learning_rate": 4.1425803875513425e-07, "loss": 0.5100509524345398, "step": 11422 }, { "epoch": 1.8257012706784943, "grad_norm": 1.3270932856525268, "learning_rate": 4.1350563076296146e-07, "loss": 0.6468886137008667, "step": 11423 }, { "epoch": 1.8258611044513706, "grad_norm": 1.5474459811930517, "learning_rate": 4.1275389225751694e-07, "loss": 0.4590504765510559, "step": 11424 }, { "epoch": 1.826020938224247, "grad_norm": 1.6021809578470296, "learning_rate": 4.120028232912976e-07, "loss": 0.48795849084854126, "step": 11425 }, { "epoch": 1.826180771997123, "grad_norm": 1.440807555141693, "learning_rate": 4.1125242391675703e-07, "loss": 0.6054941415786743, "step": 11426 }, { "epoch": 1.8263406057699991, "grad_norm": 1.6839037530882186, "learning_rate": 4.1050269418629887e-07, "loss": 0.48512905836105347, "step": 11427 }, { "epoch": 1.8265004395428754, "grad_norm": 1.6972669731928034, "learning_rate": 4.097536341522834e-07, "loss": 0.5695463418960571, "step": 11428 }, { "epoch": 1.8266602733157518, "grad_norm": 1.7524951562565454, "learning_rate": 4.090052438670211e-07, "loss": 0.6288204193115234, "step": 11429 }, { "epoch": 1.8268201070886279, "grad_norm": 1.6199643045813743, "learning_rate": 4.082575233827779e-07, "loss": 0.635772168636322, "step": 11430 }, { "epoch": 1.826979940861504, "grad_norm": 1.7924872656822113, "learning_rate": 4.07510472751772e-07, "loss": 0.5421109795570374, "step": 11431 }, { "epoch": 1.8271397746343803, "grad_norm": 1.7575938960042548, "learning_rate": 4.067640920261717e-07, "loss": 0.6615093350410461, "step": 11432 }, { "epoch": 1.8272996084072566, "grad_norm": 1.848957903494938, "learning_rate": 4.060183812581053e-07, "loss": 0.6349393129348755, "step": 11433 }, { "epoch": 1.8274594421801327, "grad_norm": 1.4754820321189286, "learning_rate": 4.0527334049964893e-07, "loss": 0.47925466299057007, "step": 11434 }, { "epoch": 1.8276192759530088, "grad_norm": 1.5785044660448013, "learning_rate": 4.045289698028343e-07, "loss": 0.6078773736953735, "step": 11435 }, { "epoch": 1.827779109725885, "grad_norm": 1.8850133077610545, "learning_rate": 4.037852692196453e-07, "loss": 0.4745340943336487, "step": 11436 }, { "epoch": 1.8279389434987614, "grad_norm": 1.678807422323671, "learning_rate": 4.0304223880201834e-07, "loss": 0.42438966035842896, "step": 11437 }, { "epoch": 1.8280987772716375, "grad_norm": 1.5071501038655186, "learning_rate": 4.0229987860184616e-07, "loss": 0.5018874406814575, "step": 11438 }, { "epoch": 1.8282586110445136, "grad_norm": 1.7189317260137125, "learning_rate": 4.0155818867097074e-07, "loss": 0.6386710405349731, "step": 11439 }, { "epoch": 1.82841844481739, "grad_norm": 1.5267213355501905, "learning_rate": 4.008171690611895e-07, "loss": 0.4958449602127075, "step": 11440 }, { "epoch": 1.8285782785902662, "grad_norm": 1.9164956684639072, "learning_rate": 4.000768198242522e-07, "loss": 0.6388765573501587, "step": 11441 }, { "epoch": 1.8287381123631423, "grad_norm": 1.7639056378586533, "learning_rate": 3.9933714101186294e-07, "loss": 0.6728850603103638, "step": 11442 }, { "epoch": 1.8288979461360184, "grad_norm": 1.5178624719077198, "learning_rate": 3.985981326756794e-07, "loss": 0.6660566329956055, "step": 11443 }, { "epoch": 1.8290577799088947, "grad_norm": 1.3534279839523844, "learning_rate": 3.978597948673091e-07, "loss": 0.4840899407863617, "step": 11444 }, { "epoch": 1.829217613681771, "grad_norm": 1.5212304217855839, "learning_rate": 3.971221276383164e-07, "loss": 0.4903098940849304, "step": 11445 }, { "epoch": 1.8293774474546471, "grad_norm": 1.9417894553082544, "learning_rate": 3.963851310402167e-07, "loss": 0.5319479703903198, "step": 11446 }, { "epoch": 1.8295372812275232, "grad_norm": 1.6441841085272997, "learning_rate": 3.956488051244789e-07, "loss": 0.5310065746307373, "step": 11447 }, { "epoch": 1.8296971150003996, "grad_norm": 1.5466056662460472, "learning_rate": 3.949131499425263e-07, "loss": 0.4688873589038849, "step": 11448 }, { "epoch": 1.8298569487732759, "grad_norm": 1.6879772463952731, "learning_rate": 3.941781655457333e-07, "loss": 0.486481249332428, "step": 11449 }, { "epoch": 1.830016782546152, "grad_norm": 1.3291513107423671, "learning_rate": 3.9344385198543e-07, "loss": 0.4280623495578766, "step": 11450 }, { "epoch": 1.830176616319028, "grad_norm": 1.7058133087823557, "learning_rate": 3.927102093128976e-07, "loss": 0.7047106027603149, "step": 11451 }, { "epoch": 1.8303364500919044, "grad_norm": 1.590988413841877, "learning_rate": 3.9197723757936956e-07, "loss": 0.5804053544998169, "step": 11452 }, { "epoch": 1.8304962838647807, "grad_norm": 1.9374832531272577, "learning_rate": 3.9124493683603715e-07, "loss": 0.5693638324737549, "step": 11453 }, { "epoch": 1.8306561176376568, "grad_norm": 2.2030609228132967, "learning_rate": 3.9051330713403947e-07, "loss": 0.541659951210022, "step": 11454 }, { "epoch": 1.8308159514105329, "grad_norm": 1.6541028115065843, "learning_rate": 3.897823485244723e-07, "loss": 0.47980594635009766, "step": 11455 }, { "epoch": 1.8309757851834092, "grad_norm": 1.9701899425051113, "learning_rate": 3.8905206105838033e-07, "loss": 0.5130308270454407, "step": 11456 }, { "epoch": 1.8311356189562855, "grad_norm": 1.7666178816923477, "learning_rate": 3.883224447867673e-07, "loss": 0.5824164152145386, "step": 11457 }, { "epoch": 1.8312954527291616, "grad_norm": 1.580478490781968, "learning_rate": 3.875934997605868e-07, "loss": 0.5383594036102295, "step": 11458 }, { "epoch": 1.831455286502038, "grad_norm": 1.5597976726458491, "learning_rate": 3.868652260307437e-07, "loss": 0.5069484114646912, "step": 11459 }, { "epoch": 1.8316151202749142, "grad_norm": 1.5501662625865984, "learning_rate": 3.861376236481007e-07, "loss": 0.4586668014526367, "step": 11460 }, { "epoch": 1.8317749540477903, "grad_norm": 1.1522635372403947, "learning_rate": 3.8541069266346707e-07, "loss": 0.36873963475227356, "step": 11461 }, { "epoch": 1.8319347878206664, "grad_norm": 1.5260978003099381, "learning_rate": 3.846844331276123e-07, "loss": 0.5756825804710388, "step": 11462 }, { "epoch": 1.8320946215935427, "grad_norm": 1.5243075937359403, "learning_rate": 3.839588450912568e-07, "loss": 0.5646279454231262, "step": 11463 }, { "epoch": 1.832254455366419, "grad_norm": 3.1113004262840684, "learning_rate": 3.8323392860507013e-07, "loss": 0.7155604958534241, "step": 11464 }, { "epoch": 1.8324142891392952, "grad_norm": 1.4729486620739145, "learning_rate": 3.8250968371967955e-07, "loss": 0.4809530973434448, "step": 11465 }, { "epoch": 1.8325741229121713, "grad_norm": 1.523332757618388, "learning_rate": 3.817861104856635e-07, "loss": 0.6695763468742371, "step": 11466 }, { "epoch": 1.8327339566850476, "grad_norm": 1.598633834301006, "learning_rate": 3.810632089535526e-07, "loss": 0.5069296360015869, "step": 11467 }, { "epoch": 1.8328937904579239, "grad_norm": 1.6066630644999458, "learning_rate": 3.8034097917383327e-07, "loss": 0.597920298576355, "step": 11468 }, { "epoch": 1.8330536242308, "grad_norm": 1.582383687317003, "learning_rate": 3.7961942119694286e-07, "loss": 0.5816713571548462, "step": 11469 }, { "epoch": 1.833213458003676, "grad_norm": 1.8235062374452466, "learning_rate": 3.7889853507327324e-07, "loss": 0.4445144534111023, "step": 11470 }, { "epoch": 1.8333732917765524, "grad_norm": 1.5958712856199078, "learning_rate": 3.781783208531653e-07, "loss": 0.5019996762275696, "step": 11471 }, { "epoch": 1.8335331255494287, "grad_norm": 1.671296390763486, "learning_rate": 3.7745877858692105e-07, "loss": 0.5059608221054077, "step": 11472 }, { "epoch": 1.8336929593223048, "grad_norm": 1.3530630609650094, "learning_rate": 3.76739908324788e-07, "loss": 0.4917871356010437, "step": 11473 }, { "epoch": 1.833852793095181, "grad_norm": 1.559700024840991, "learning_rate": 3.7602171011696944e-07, "loss": 0.6664754152297974, "step": 11474 }, { "epoch": 1.8340126268680572, "grad_norm": 1.6843798972378874, "learning_rate": 3.753041840136218e-07, "loss": 0.5401942729949951, "step": 11475 }, { "epoch": 1.8341724606409335, "grad_norm": 1.3254662936634567, "learning_rate": 3.7458733006485506e-07, "loss": 0.4878714978694916, "step": 11476 }, { "epoch": 1.8343322944138096, "grad_norm": 1.5681950346566256, "learning_rate": 3.7387114832073245e-07, "loss": 0.4970496594905853, "step": 11477 }, { "epoch": 1.8344921281866857, "grad_norm": 1.9163922650784497, "learning_rate": 3.731556388312685e-07, "loss": 0.5219542980194092, "step": 11478 }, { "epoch": 1.834651961959562, "grad_norm": 1.6265926444453465, "learning_rate": 3.7244080164643205e-07, "loss": 0.5263201594352722, "step": 11479 }, { "epoch": 1.8348117957324384, "grad_norm": 1.6820498704923037, "learning_rate": 3.717266368161454e-07, "loss": 0.5548219680786133, "step": 11480 }, { "epoch": 1.8349716295053144, "grad_norm": 1.7669124411330719, "learning_rate": 3.710131443902809e-07, "loss": 0.6486920118331909, "step": 11481 }, { "epoch": 1.8351314632781905, "grad_norm": 1.7642305247403993, "learning_rate": 3.703003244186687e-07, "loss": 0.4806050658226013, "step": 11482 }, { "epoch": 1.8352912970510669, "grad_norm": 1.7698553826006012, "learning_rate": 3.6958817695109006e-07, "loss": 0.4379774332046509, "step": 11483 }, { "epoch": 1.8354511308239432, "grad_norm": 1.5124457900427903, "learning_rate": 3.6887670203727745e-07, "loss": 0.4709058105945587, "step": 11484 }, { "epoch": 1.8356109645968193, "grad_norm": 1.5746436673481878, "learning_rate": 3.6816589972691885e-07, "loss": 0.5124592781066895, "step": 11485 }, { "epoch": 1.8357707983696954, "grad_norm": 1.642141541820628, "learning_rate": 3.674557700696524e-07, "loss": 0.4730309844017029, "step": 11486 }, { "epoch": 1.8359306321425717, "grad_norm": 1.490482373855968, "learning_rate": 3.667463131150728e-07, "loss": 0.6844156980514526, "step": 11487 }, { "epoch": 1.836090465915448, "grad_norm": 1.739237568032024, "learning_rate": 3.66037528912726e-07, "loss": 0.5021640062332153, "step": 11488 }, { "epoch": 1.836250299688324, "grad_norm": 1.7330640956346584, "learning_rate": 3.6532941751211025e-07, "loss": 0.5938087701797485, "step": 11489 }, { "epoch": 1.8364101334612002, "grad_norm": 1.525471100455186, "learning_rate": 3.6462197896267705e-07, "loss": 0.48745760321617126, "step": 11490 }, { "epoch": 1.8365699672340765, "grad_norm": 1.4266884674341338, "learning_rate": 3.6391521331383126e-07, "loss": 0.654880702495575, "step": 11491 }, { "epoch": 1.8367298010069528, "grad_norm": 1.5934300413173326, "learning_rate": 3.632091206149324e-07, "loss": 0.6519694328308105, "step": 11492 }, { "epoch": 1.836889634779829, "grad_norm": 1.7363780897270464, "learning_rate": 3.625037009152899e-07, "loss": 0.5376772880554199, "step": 11493 }, { "epoch": 1.8370494685527052, "grad_norm": 1.6536846790170947, "learning_rate": 3.6179895426416976e-07, "loss": 0.5265637040138245, "step": 11494 }, { "epoch": 1.8372093023255816, "grad_norm": 1.6866106946220172, "learning_rate": 3.6109488071078725e-07, "loss": 0.5595582723617554, "step": 11495 }, { "epoch": 1.8373691360984576, "grad_norm": 1.809850912449252, "learning_rate": 3.603914803043118e-07, "loss": 0.7143377065658569, "step": 11496 }, { "epoch": 1.8375289698713337, "grad_norm": 1.5560038149735835, "learning_rate": 3.5968875309386866e-07, "loss": 0.5427930355072021, "step": 11497 }, { "epoch": 1.83768880364421, "grad_norm": 1.6663774072108954, "learning_rate": 3.589866991285318e-07, "loss": 0.5127396583557129, "step": 11498 }, { "epoch": 1.8378486374170864, "grad_norm": 1.5508633479176759, "learning_rate": 3.5828531845733206e-07, "loss": 0.480294406414032, "step": 11499 }, { "epoch": 1.8380084711899625, "grad_norm": 1.4057331239758837, "learning_rate": 3.57584611129248e-07, "loss": 0.5357092618942261, "step": 11500 }, { "epoch": 1.8381683049628386, "grad_norm": 1.7558560425575132, "learning_rate": 3.5688457719321725e-07, "loss": 0.513672947883606, "step": 11501 }, { "epoch": 1.8383281387357149, "grad_norm": 1.7140986605852926, "learning_rate": 3.5618521669812945e-07, "loss": 0.6428737640380859, "step": 11502 }, { "epoch": 1.8384879725085912, "grad_norm": 1.4632408682808562, "learning_rate": 3.5548652969282226e-07, "loss": 0.6092033982276917, "step": 11503 }, { "epoch": 1.8386478062814673, "grad_norm": 1.630790860890396, "learning_rate": 3.5478851622609e-07, "loss": 0.7674250602722168, "step": 11504 }, { "epoch": 1.8388076400543434, "grad_norm": 1.4943113288343797, "learning_rate": 3.540911763466814e-07, "loss": 0.5735720992088318, "step": 11505 }, { "epoch": 1.8389674738272197, "grad_norm": 1.6539572325187444, "learning_rate": 3.533945101032921e-07, "loss": 0.5100709199905396, "step": 11506 }, { "epoch": 1.839127307600096, "grad_norm": 1.4720569390348968, "learning_rate": 3.526985175445796e-07, "loss": 0.5748502016067505, "step": 11507 }, { "epoch": 1.8392871413729721, "grad_norm": 1.694158676545876, "learning_rate": 3.5200319871914747e-07, "loss": 0.4921499490737915, "step": 11508 }, { "epoch": 1.8394469751458482, "grad_norm": 1.8558198698915285, "learning_rate": 3.513085536755534e-07, "loss": 0.5763266086578369, "step": 11509 }, { "epoch": 1.8396068089187245, "grad_norm": 1.6820742264173276, "learning_rate": 3.5061458246231083e-07, "loss": 0.6025835275650024, "step": 11510 }, { "epoch": 1.8397666426916008, "grad_norm": 1.65394206464233, "learning_rate": 3.49921285127881e-07, "loss": 0.4932616949081421, "step": 11511 }, { "epoch": 1.839926476464477, "grad_norm": 2.4207347296261665, "learning_rate": 3.4922866172068525e-07, "loss": 0.5132564902305603, "step": 11512 }, { "epoch": 1.840086310237353, "grad_norm": 1.4343637965289489, "learning_rate": 3.4853671228909256e-07, "loss": 0.48559117317199707, "step": 11513 }, { "epoch": 1.8402461440102293, "grad_norm": 1.7026275402061235, "learning_rate": 3.4784543688142545e-07, "loss": 0.46227967739105225, "step": 11514 }, { "epoch": 1.8404059777831057, "grad_norm": 1.5694977779057122, "learning_rate": 3.4715483554595974e-07, "loss": 0.5466436147689819, "step": 11515 }, { "epoch": 1.8405658115559818, "grad_norm": 1.5557161441104166, "learning_rate": 3.464649083309257e-07, "loss": 0.6852654218673706, "step": 11516 }, { "epoch": 1.8407256453288579, "grad_norm": 1.5421941763104072, "learning_rate": 3.4577565528450485e-07, "loss": 0.42915114760398865, "step": 11517 }, { "epoch": 1.8408854791017342, "grad_norm": 1.7856853887967608, "learning_rate": 3.450870764548331e-07, "loss": 0.5830402374267578, "step": 11518 }, { "epoch": 1.8410453128746105, "grad_norm": 1.574816378383211, "learning_rate": 3.443991718899986e-07, "loss": 0.5485647320747375, "step": 11519 }, { "epoch": 1.8412051466474866, "grad_norm": 1.4545281643484143, "learning_rate": 3.437119416380386e-07, "loss": 0.4626867175102234, "step": 11520 }, { "epoch": 1.8413649804203627, "grad_norm": 1.7149389864799702, "learning_rate": 3.430253857469501e-07, "loss": 0.5353021621704102, "step": 11521 }, { "epoch": 1.841524814193239, "grad_norm": 1.3053407162139345, "learning_rate": 3.423395042646804e-07, "loss": 0.401439905166626, "step": 11522 }, { "epoch": 1.8416846479661153, "grad_norm": 1.683053999272638, "learning_rate": 3.416542972391268e-07, "loss": 0.5351619124412537, "step": 11523 }, { "epoch": 1.8418444817389914, "grad_norm": 1.5347910878139832, "learning_rate": 3.409697647181431e-07, "loss": 0.5197975039482117, "step": 11524 }, { "epoch": 1.8420043155118675, "grad_norm": 1.8443294957959067, "learning_rate": 3.4028590674953454e-07, "loss": 0.5465313196182251, "step": 11525 }, { "epoch": 1.8421641492847438, "grad_norm": 1.9017139653355253, "learning_rate": 3.396027233810584e-07, "loss": 0.5309143662452698, "step": 11526 }, { "epoch": 1.8423239830576201, "grad_norm": 1.7306030466950995, "learning_rate": 3.3892021466042646e-07, "loss": 0.6801023483276367, "step": 11527 }, { "epoch": 1.8424838168304962, "grad_norm": 1.3679170721317657, "learning_rate": 3.382383806353029e-07, "loss": 0.5295004844665527, "step": 11528 }, { "epoch": 1.8426436506033725, "grad_norm": 1.8045797447846232, "learning_rate": 3.3755722135330405e-07, "loss": 0.5643969774246216, "step": 11529 }, { "epoch": 1.8428034843762489, "grad_norm": 1.374723443140982, "learning_rate": 3.368767368619996e-07, "loss": 0.4332596957683563, "step": 11530 }, { "epoch": 1.842963318149125, "grad_norm": 1.9143186497518132, "learning_rate": 3.361969272089116e-07, "loss": 0.5679933428764343, "step": 11531 }, { "epoch": 1.843123151922001, "grad_norm": 1.6280558459560992, "learning_rate": 3.3551779244151763e-07, "loss": 0.5632666349411011, "step": 11532 }, { "epoch": 1.8432829856948774, "grad_norm": 1.6199732437879255, "learning_rate": 3.348393326072441e-07, "loss": 0.5957480669021606, "step": 11533 }, { "epoch": 1.8434428194677537, "grad_norm": 1.7123767780437085, "learning_rate": 3.341615477534743e-07, "loss": 0.5065373182296753, "step": 11534 }, { "epoch": 1.8436026532406298, "grad_norm": 1.9980888300432504, "learning_rate": 3.3348443792753813e-07, "loss": 0.5736751556396484, "step": 11535 }, { "epoch": 1.8437624870135059, "grad_norm": 1.3888094810385, "learning_rate": 3.328080031767278e-07, "loss": 0.4291689395904541, "step": 11536 }, { "epoch": 1.8439223207863822, "grad_norm": 1.7675423869127826, "learning_rate": 3.321322435482799e-07, "loss": 0.6029181480407715, "step": 11537 }, { "epoch": 1.8440821545592585, "grad_norm": 1.5333892975647654, "learning_rate": 3.314571590893889e-07, "loss": 0.5048942565917969, "step": 11538 }, { "epoch": 1.8442419883321346, "grad_norm": 1.7717387967002665, "learning_rate": 3.3078274984719825e-07, "loss": 0.6634488105773926, "step": 11539 }, { "epoch": 1.8444018221050107, "grad_norm": 1.5136436187567008, "learning_rate": 3.3010901586880694e-07, "loss": 0.4683600068092346, "step": 11540 }, { "epoch": 1.844561655877887, "grad_norm": 1.693869325268842, "learning_rate": 3.294359572012673e-07, "loss": 0.5663427114486694, "step": 11541 }, { "epoch": 1.8447214896507633, "grad_norm": 1.667992049132995, "learning_rate": 3.2876357389158066e-07, "loss": 0.5758408308029175, "step": 11542 }, { "epoch": 1.8448813234236394, "grad_norm": 1.5644851131941049, "learning_rate": 3.2809186598670827e-07, "loss": 0.5624865293502808, "step": 11543 }, { "epoch": 1.8450411571965155, "grad_norm": 1.474827785490564, "learning_rate": 3.27420833533556e-07, "loss": 0.47347205877304077, "step": 11544 }, { "epoch": 1.8452009909693918, "grad_norm": 1.385560342022676, "learning_rate": 3.267504765789886e-07, "loss": 0.44109588861465454, "step": 11545 }, { "epoch": 1.8453608247422681, "grad_norm": 1.715793901028297, "learning_rate": 3.260807951698186e-07, "loss": 0.6061971783638, "step": 11546 }, { "epoch": 1.8455206585151442, "grad_norm": 1.5302148299891547, "learning_rate": 3.254117893528186e-07, "loss": 0.4700303077697754, "step": 11547 }, { "epoch": 1.8456804922880203, "grad_norm": 1.6437268925855248, "learning_rate": 3.247434591747056e-07, "loss": 0.5173795223236084, "step": 11548 }, { "epoch": 1.8458403260608967, "grad_norm": 1.9893347628909195, "learning_rate": 3.240758046821557e-07, "loss": 0.4641486406326294, "step": 11549 }, { "epoch": 1.846000159833773, "grad_norm": 1.7363685215541713, "learning_rate": 3.234088259217938e-07, "loss": 0.47779950499534607, "step": 11550 }, { "epoch": 1.846159993606649, "grad_norm": 1.5917770047099755, "learning_rate": 3.2274252294020147e-07, "loss": 0.41331326961517334, "step": 11551 }, { "epoch": 1.8463198273795252, "grad_norm": 1.629389358295878, "learning_rate": 3.2207689578390935e-07, "loss": 0.45009660720825195, "step": 11552 }, { "epoch": 1.8464796611524015, "grad_norm": 1.6161820986284932, "learning_rate": 3.214119444994035e-07, "loss": 0.49676182866096497, "step": 11553 }, { "epoch": 1.8466394949252778, "grad_norm": 1.853081022402764, "learning_rate": 3.2074766913312015e-07, "loss": 0.5821343064308167, "step": 11554 }, { "epoch": 1.8467993286981539, "grad_norm": 1.5461574952006167, "learning_rate": 3.2008406973145e-07, "loss": 0.5067360997200012, "step": 11555 }, { "epoch": 1.84695916247103, "grad_norm": 1.7712107545104032, "learning_rate": 3.194211463407393e-07, "loss": 0.5536761283874512, "step": 11556 }, { "epoch": 1.8471189962439063, "grad_norm": 1.5023209304608574, "learning_rate": 3.1875889900728094e-07, "loss": 0.6029508113861084, "step": 11557 }, { "epoch": 1.8472788300167826, "grad_norm": 1.3893713804034338, "learning_rate": 3.180973277773258e-07, "loss": 0.4892123341560364, "step": 11558 }, { "epoch": 1.8474386637896587, "grad_norm": 1.8257306191462617, "learning_rate": 3.174364326970758e-07, "loss": 0.509345293045044, "step": 11559 }, { "epoch": 1.8475984975625348, "grad_norm": 1.4159481418071163, "learning_rate": 3.16776213812684e-07, "loss": 0.43615758419036865, "step": 11560 }, { "epoch": 1.8477583313354111, "grad_norm": 1.479308494872726, "learning_rate": 3.16116671170259e-07, "loss": 0.5552588701248169, "step": 11561 }, { "epoch": 1.8479181651082874, "grad_norm": 1.64395861638744, "learning_rate": 3.154578048158596e-07, "loss": 0.6493726372718811, "step": 11562 }, { "epoch": 1.8480779988811635, "grad_norm": 1.8915632640717481, "learning_rate": 3.147996147955012e-07, "loss": 0.5432388782501221, "step": 11563 }, { "epoch": 1.8482378326540398, "grad_norm": 1.4946315066275428, "learning_rate": 3.141421011551471e-07, "loss": 0.5086888074874878, "step": 11564 }, { "epoch": 1.8483976664269162, "grad_norm": 1.5376882290906364, "learning_rate": 3.13485263940716e-07, "loss": 0.46747326850891113, "step": 11565 }, { "epoch": 1.8485575001997923, "grad_norm": 1.5429895358622112, "learning_rate": 3.1282910319808014e-07, "loss": 0.5719510316848755, "step": 11566 }, { "epoch": 1.8487173339726684, "grad_norm": 1.5956872362695966, "learning_rate": 3.1217361897306395e-07, "loss": 0.5748345851898193, "step": 11567 }, { "epoch": 1.8488771677455447, "grad_norm": 1.4767094747041738, "learning_rate": 3.11518811311442e-07, "loss": 0.7508350610733032, "step": 11568 }, { "epoch": 1.849037001518421, "grad_norm": 1.5672409436769208, "learning_rate": 3.1086468025894544e-07, "loss": 0.5079987049102783, "step": 11569 }, { "epoch": 1.849196835291297, "grad_norm": 1.3836134875128436, "learning_rate": 3.102112258612544e-07, "loss": 0.4088474214076996, "step": 11570 }, { "epoch": 1.8493566690641732, "grad_norm": 1.7669767608934759, "learning_rate": 3.095584481640068e-07, "loss": 0.5530222654342651, "step": 11571 }, { "epoch": 1.8495165028370495, "grad_norm": 1.4905832003597546, "learning_rate": 3.0890634721278846e-07, "loss": 0.4343894124031067, "step": 11572 }, { "epoch": 1.8496763366099258, "grad_norm": 2.179496762499672, "learning_rate": 3.0825492305313955e-07, "loss": 0.43688321113586426, "step": 11573 }, { "epoch": 1.849836170382802, "grad_norm": 1.6879321748774607, "learning_rate": 3.076041757305548e-07, "loss": 0.7386433482170105, "step": 11574 }, { "epoch": 1.849996004155678, "grad_norm": 1.3314785481145366, "learning_rate": 3.0695410529047676e-07, "loss": 0.4631774127483368, "step": 11575 }, { "epoch": 1.8501558379285543, "grad_norm": 1.7452535769623978, "learning_rate": 3.063047117783091e-07, "loss": 0.6754639148712158, "step": 11576 }, { "epoch": 1.8503156717014306, "grad_norm": 1.7468182990696755, "learning_rate": 3.056559952393989e-07, "loss": 0.673799455165863, "step": 11577 }, { "epoch": 1.8504755054743067, "grad_norm": 1.9429663322940678, "learning_rate": 3.050079557190522e-07, "loss": 0.5941497087478638, "step": 11578 }, { "epoch": 1.8506353392471828, "grad_norm": 1.440614839215227, "learning_rate": 3.04360593262526e-07, "loss": 0.5536012649536133, "step": 11579 }, { "epoch": 1.8507951730200591, "grad_norm": 1.7021491209458015, "learning_rate": 3.037139079150264e-07, "loss": 0.5690608024597168, "step": 11580 }, { "epoch": 1.8509550067929355, "grad_norm": 1.4372488782849318, "learning_rate": 3.030678997217207e-07, "loss": 0.462577223777771, "step": 11581 }, { "epoch": 1.8511148405658115, "grad_norm": 1.5055013468359204, "learning_rate": 3.0242256872772044e-07, "loss": 0.6411162614822388, "step": 11582 }, { "epoch": 1.8512746743386876, "grad_norm": 1.9329821380634535, "learning_rate": 3.0177791497809414e-07, "loss": 0.5233805775642395, "step": 11583 }, { "epoch": 1.851434508111564, "grad_norm": 1.8653740138904853, "learning_rate": 3.0113393851786354e-07, "loss": 0.6660106182098389, "step": 11584 }, { "epoch": 1.8515943418844403, "grad_norm": 1.6015669642018462, "learning_rate": 3.0049063939199817e-07, "loss": 0.4419056177139282, "step": 11585 }, { "epoch": 1.8517541756573164, "grad_norm": 1.7532590749750125, "learning_rate": 2.998480176454277e-07, "loss": 0.6181367039680481, "step": 11586 }, { "epoch": 1.8519140094301925, "grad_norm": 1.6438955350886781, "learning_rate": 2.9920607332302844e-07, "loss": 0.5104075074195862, "step": 11587 }, { "epoch": 1.8520738432030688, "grad_norm": 2.2175374037663906, "learning_rate": 2.9856480646963224e-07, "loss": 0.5207963585853577, "step": 11588 }, { "epoch": 1.852233676975945, "grad_norm": 1.6375243836530138, "learning_rate": 2.9792421713002227e-07, "loss": 0.5687987804412842, "step": 11589 }, { "epoch": 1.8523935107488212, "grad_norm": 1.6749542431519782, "learning_rate": 2.972843053489338e-07, "loss": 0.4663729965686798, "step": 11590 }, { "epoch": 1.8525533445216973, "grad_norm": 1.6352338181099542, "learning_rate": 2.966450711710589e-07, "loss": 0.4987262785434723, "step": 11591 }, { "epoch": 1.8527131782945736, "grad_norm": 1.533451808314222, "learning_rate": 2.960065146410385e-07, "loss": 0.4994059205055237, "step": 11592 }, { "epoch": 1.85287301206745, "grad_norm": 1.5745743600100397, "learning_rate": 2.9536863580346576e-07, "loss": 0.5679838061332703, "step": 11593 }, { "epoch": 1.853032845840326, "grad_norm": 1.4761632821073463, "learning_rate": 2.9473143470288955e-07, "loss": 0.5458551645278931, "step": 11594 }, { "epoch": 1.853192679613202, "grad_norm": 1.5410387609528051, "learning_rate": 2.9409491138380655e-07, "loss": 0.616358757019043, "step": 11595 }, { "epoch": 1.8533525133860784, "grad_norm": 1.7696142962914096, "learning_rate": 2.934590658906733e-07, "loss": 0.636901319026947, "step": 11596 }, { "epoch": 1.8535123471589547, "grad_norm": 1.7091358273864905, "learning_rate": 2.928238982678933e-07, "loss": 0.6990540027618408, "step": 11597 }, { "epoch": 1.8536721809318308, "grad_norm": 1.5888232256800892, "learning_rate": 2.9218940855982424e-07, "loss": 0.4978574514389038, "step": 11598 }, { "epoch": 1.8538320147047072, "grad_norm": 1.653635831024394, "learning_rate": 2.915555968107764e-07, "loss": 0.6348320245742798, "step": 11599 }, { "epoch": 1.8539918484775835, "grad_norm": 1.912008460167133, "learning_rate": 2.909224630650143e-07, "loss": 0.5747847557067871, "step": 11600 }, { "epoch": 1.8541516822504596, "grad_norm": 1.3557467308897686, "learning_rate": 2.902900073667525e-07, "loss": 0.420706570148468, "step": 11601 }, { "epoch": 1.8543115160233357, "grad_norm": 1.4887477646438843, "learning_rate": 2.89658229760158e-07, "loss": 0.5796672105789185, "step": 11602 }, { "epoch": 1.854471349796212, "grad_norm": 1.6272017055541679, "learning_rate": 2.8902713028935546e-07, "loss": 0.6462651491165161, "step": 11603 }, { "epoch": 1.8546311835690883, "grad_norm": 1.5908240076837254, "learning_rate": 2.883967089984163e-07, "loss": 0.46575313806533813, "step": 11604 }, { "epoch": 1.8547910173419644, "grad_norm": 1.866028446778401, "learning_rate": 2.8776696593136756e-07, "loss": 0.6079913377761841, "step": 11605 }, { "epoch": 1.8549508511148405, "grad_norm": 1.8467142523043507, "learning_rate": 2.871379011321884e-07, "loss": 0.572178840637207, "step": 11606 }, { "epoch": 1.8551106848877168, "grad_norm": 1.430120751870022, "learning_rate": 2.865095146448105e-07, "loss": 0.4451873004436493, "step": 11607 }, { "epoch": 1.8552705186605931, "grad_norm": 1.8573718339058198, "learning_rate": 2.858818065131175e-07, "loss": 0.5774765014648438, "step": 11608 }, { "epoch": 1.8554303524334692, "grad_norm": 1.4837886801402957, "learning_rate": 2.852547767809466e-07, "loss": 0.4674873650074005, "step": 11609 }, { "epoch": 1.8555901862063453, "grad_norm": 1.484791515417544, "learning_rate": 2.8462842549208615e-07, "loss": 0.519965648651123, "step": 11610 }, { "epoch": 1.8557500199792216, "grad_norm": 1.0780020165786879, "learning_rate": 2.840027526902811e-07, "loss": 0.43349534273147583, "step": 11611 }, { "epoch": 1.855909853752098, "grad_norm": 1.7252435902641279, "learning_rate": 2.833777584192243e-07, "loss": 0.5132009983062744, "step": 11612 }, { "epoch": 1.856069687524974, "grad_norm": 1.4188639021768201, "learning_rate": 2.827534427225631e-07, "loss": 0.4853322505950928, "step": 11613 }, { "epoch": 1.8562295212978501, "grad_norm": 1.706506427498291, "learning_rate": 2.82129805643897e-07, "loss": 0.5713815093040466, "step": 11614 }, { "epoch": 1.8563893550707264, "grad_norm": 1.6691784274268844, "learning_rate": 2.8150684722678125e-07, "loss": 0.5589531660079956, "step": 11615 }, { "epoch": 1.8565491888436028, "grad_norm": 1.5423439961009, "learning_rate": 2.808845675147187e-07, "loss": 0.4523683190345764, "step": 11616 }, { "epoch": 1.8567090226164789, "grad_norm": 1.614907697767359, "learning_rate": 2.80262966551168e-07, "loss": 0.5055345892906189, "step": 11617 }, { "epoch": 1.856868856389355, "grad_norm": 1.6764970675992905, "learning_rate": 2.796420443795389e-07, "loss": 0.493256151676178, "step": 11618 }, { "epoch": 1.8570286901622313, "grad_norm": 1.7323344306882194, "learning_rate": 2.7902180104319443e-07, "loss": 0.6503986120223999, "step": 11619 }, { "epoch": 1.8571885239351076, "grad_norm": 1.3112876197974759, "learning_rate": 2.784022365854511e-07, "loss": 0.5598292350769043, "step": 11620 }, { "epoch": 1.8573483577079837, "grad_norm": 1.921847210592965, "learning_rate": 2.7778335104957644e-07, "loss": 0.5419297814369202, "step": 11621 }, { "epoch": 1.8575081914808598, "grad_norm": 1.412562084107678, "learning_rate": 2.771651444787904e-07, "loss": 0.5204268097877502, "step": 11622 }, { "epoch": 1.857668025253736, "grad_norm": 1.6256640415443757, "learning_rate": 2.765476169162673e-07, "loss": 0.6087778806686401, "step": 11623 }, { "epoch": 1.8578278590266124, "grad_norm": 1.4964481640213145, "learning_rate": 2.759307684051349e-07, "loss": 0.48114317655563354, "step": 11624 }, { "epoch": 1.8579876927994885, "grad_norm": 1.6099625313587638, "learning_rate": 2.753145989884676e-07, "loss": 0.591123104095459, "step": 11625 }, { "epoch": 1.8581475265723646, "grad_norm": 1.7238087560145945, "learning_rate": 2.746991087092998e-07, "loss": 0.5332733392715454, "step": 11626 }, { "epoch": 1.858307360345241, "grad_norm": 1.5752397153837416, "learning_rate": 2.7408429761061393e-07, "loss": 0.5930784344673157, "step": 11627 }, { "epoch": 1.8584671941181172, "grad_norm": 1.4826417970627086, "learning_rate": 2.7347016573534667e-07, "loss": 0.46751129627227783, "step": 11628 }, { "epoch": 1.8586270278909933, "grad_norm": 1.3063258923602448, "learning_rate": 2.7285671312638484e-07, "loss": 0.49060359597206116, "step": 11629 }, { "epoch": 1.8587868616638694, "grad_norm": 1.9003130339806678, "learning_rate": 2.7224393982657303e-07, "loss": 0.5930604934692383, "step": 11630 }, { "epoch": 1.8589466954367457, "grad_norm": 1.304959103963351, "learning_rate": 2.7163184587870375e-07, "loss": 0.47770726680755615, "step": 11631 }, { "epoch": 1.859106529209622, "grad_norm": 1.6159702377806116, "learning_rate": 2.710204313255227e-07, "loss": 0.6031678915023804, "step": 11632 }, { "epoch": 1.8592663629824981, "grad_norm": 1.4666243145183542, "learning_rate": 2.7040969620973023e-07, "loss": 0.49972015619277954, "step": 11633 }, { "epoch": 1.8594261967553745, "grad_norm": 1.4155230829728689, "learning_rate": 2.697996405739756e-07, "loss": 0.519936203956604, "step": 11634 }, { "epoch": 1.8595860305282508, "grad_norm": 1.6019817272523955, "learning_rate": 2.691902644608657e-07, "loss": 0.5346684455871582, "step": 11635 }, { "epoch": 1.8597458643011269, "grad_norm": 1.6734243913158053, "learning_rate": 2.685815679129555e-07, "loss": 0.5538710951805115, "step": 11636 }, { "epoch": 1.859905698074003, "grad_norm": 1.869585444858859, "learning_rate": 2.6797355097275545e-07, "loss": 0.6429612040519714, "step": 11637 }, { "epoch": 1.8600655318468793, "grad_norm": 1.5819509184271883, "learning_rate": 2.67366213682726e-07, "loss": 0.6352698802947998, "step": 11638 }, { "epoch": 1.8602253656197556, "grad_norm": 1.5883980788109764, "learning_rate": 2.6675955608528094e-07, "loss": 0.5306761860847473, "step": 11639 }, { "epoch": 1.8603851993926317, "grad_norm": 1.5822904230465578, "learning_rate": 2.661535782227898e-07, "loss": 0.6659852266311646, "step": 11640 }, { "epoch": 1.8605450331655078, "grad_norm": 1.646402445588972, "learning_rate": 2.6554828013756974e-07, "loss": 0.5966663956642151, "step": 11641 }, { "epoch": 1.860704866938384, "grad_norm": 1.5130928886663457, "learning_rate": 2.649436618718915e-07, "loss": 0.5574854612350464, "step": 11642 }, { "epoch": 1.8608647007112604, "grad_norm": 1.5557925728918416, "learning_rate": 2.643397234679823e-07, "loss": 0.43388891220092773, "step": 11643 }, { "epoch": 1.8610245344841365, "grad_norm": 1.4512932464480304, "learning_rate": 2.637364649680185e-07, "loss": 0.47648268938064575, "step": 11644 }, { "epoch": 1.8611843682570126, "grad_norm": 1.9502798195784035, "learning_rate": 2.631338864141275e-07, "loss": 0.6590250730514526, "step": 11645 }, { "epoch": 1.861344202029889, "grad_norm": 1.6438071592800063, "learning_rate": 2.625319878483934e-07, "loss": 0.6300480365753174, "step": 11646 }, { "epoch": 1.8615040358027652, "grad_norm": 1.6193928789681955, "learning_rate": 2.6193076931285035e-07, "loss": 0.6354087591171265, "step": 11647 }, { "epoch": 1.8616638695756413, "grad_norm": 1.2939529138962074, "learning_rate": 2.6133023084948475e-07, "loss": 0.4445405602455139, "step": 11648 }, { "epoch": 1.8618237033485174, "grad_norm": 1.5148242937000587, "learning_rate": 2.6073037250023416e-07, "loss": 0.5135184526443481, "step": 11649 }, { "epoch": 1.8619835371213938, "grad_norm": 1.628927550691385, "learning_rate": 2.6013119430699505e-07, "loss": 0.5705742835998535, "step": 11650 }, { "epoch": 1.86214337089427, "grad_norm": 1.957600286796366, "learning_rate": 2.5953269631160847e-07, "loss": 0.6073355078697205, "step": 11651 }, { "epoch": 1.8623032046671462, "grad_norm": 1.4850122305448865, "learning_rate": 2.5893487855587317e-07, "loss": 0.5772671103477478, "step": 11652 }, { "epoch": 1.8624630384400223, "grad_norm": 1.5757173230246992, "learning_rate": 2.5833774108153685e-07, "loss": 0.543378472328186, "step": 11653 }, { "epoch": 1.8626228722128986, "grad_norm": 1.693773313911243, "learning_rate": 2.577412839303017e-07, "loss": 0.5580058097839355, "step": 11654 }, { "epoch": 1.862782705985775, "grad_norm": 1.6303333819062888, "learning_rate": 2.5714550714382445e-07, "loss": 0.5560406446456909, "step": 11655 }, { "epoch": 1.862942539758651, "grad_norm": 1.828265304567365, "learning_rate": 2.565504107637096e-07, "loss": 0.5234975814819336, "step": 11656 }, { "epoch": 1.863102373531527, "grad_norm": 1.5955003700708768, "learning_rate": 2.559559948315182e-07, "loss": 0.484516441822052, "step": 11657 }, { "epoch": 1.8632622073044034, "grad_norm": 1.6452090367519752, "learning_rate": 2.5536225938876056e-07, "loss": 0.5055956840515137, "step": 11658 }, { "epoch": 1.8634220410772797, "grad_norm": 1.646235630077513, "learning_rate": 2.547692044769012e-07, "loss": 0.5476646423339844, "step": 11659 }, { "epoch": 1.8635818748501558, "grad_norm": 1.5455836264063552, "learning_rate": 2.5417683013735926e-07, "loss": 0.4642972946166992, "step": 11660 }, { "epoch": 1.863741708623032, "grad_norm": 1.5177572731094096, "learning_rate": 2.535851364115016e-07, "loss": 0.5790832042694092, "step": 11661 }, { "epoch": 1.8639015423959082, "grad_norm": 1.5873306968543435, "learning_rate": 2.5299412334064967e-07, "loss": 0.5959316492080688, "step": 11662 }, { "epoch": 1.8640613761687845, "grad_norm": 1.5726969227114262, "learning_rate": 2.5240379096608035e-07, "loss": 0.5087288022041321, "step": 11663 }, { "epoch": 1.8642212099416606, "grad_norm": 1.6691160981447735, "learning_rate": 2.518141393290185e-07, "loss": 0.5373305082321167, "step": 11664 }, { "epoch": 1.8643810437145367, "grad_norm": 1.9444658850113716, "learning_rate": 2.512251684706435e-07, "loss": 0.5593289732933044, "step": 11665 }, { "epoch": 1.864540877487413, "grad_norm": 1.840678112072637, "learning_rate": 2.5063687843208784e-07, "loss": 0.6399029493331909, "step": 11666 }, { "epoch": 1.8647007112602894, "grad_norm": 1.6521050068037728, "learning_rate": 2.500492692544354e-07, "loss": 0.6248288154602051, "step": 11667 }, { "epoch": 1.8648605450331655, "grad_norm": 1.5237230237680337, "learning_rate": 2.4946234097872225e-07, "loss": 0.4845709204673767, "step": 11668 }, { "epoch": 1.8650203788060418, "grad_norm": 1.9557465019557267, "learning_rate": 2.488760936459356e-07, "loss": 0.5860578417778015, "step": 11669 }, { "epoch": 1.865180212578918, "grad_norm": 1.6220720862577862, "learning_rate": 2.4829052729702043e-07, "loss": 0.5649778842926025, "step": 11670 }, { "epoch": 1.8653400463517942, "grad_norm": 1.5058000919825765, "learning_rate": 2.477056419728685e-07, "loss": 0.4589757025241852, "step": 11671 }, { "epoch": 1.8654998801246703, "grad_norm": 1.426529713368467, "learning_rate": 2.4712143771432605e-07, "loss": 0.5060652494430542, "step": 11672 }, { "epoch": 1.8656597138975466, "grad_norm": 1.6748941457813493, "learning_rate": 2.465379145621927e-07, "loss": 0.5885642766952515, "step": 11673 }, { "epoch": 1.865819547670423, "grad_norm": 1.7916212986856137, "learning_rate": 2.4595507255721796e-07, "loss": 0.6234800815582275, "step": 11674 }, { "epoch": 1.865979381443299, "grad_norm": 1.7441394301023534, "learning_rate": 2.453729117401082e-07, "loss": 0.5619468092918396, "step": 11675 }, { "epoch": 1.866139215216175, "grad_norm": 1.4446923042538924, "learning_rate": 2.447914321515177e-07, "loss": 0.45990216732025146, "step": 11676 }, { "epoch": 1.8662990489890514, "grad_norm": 1.9252315788482979, "learning_rate": 2.442106338320549e-07, "loss": 0.6230083703994751, "step": 11677 }, { "epoch": 1.8664588827619277, "grad_norm": 1.552753017338506, "learning_rate": 2.436305168222808e-07, "loss": 0.6225795745849609, "step": 11678 }, { "epoch": 1.8666187165348038, "grad_norm": 1.5314272984452262, "learning_rate": 2.430510811627096e-07, "loss": 0.5817972421646118, "step": 11679 }, { "epoch": 1.86677855030768, "grad_norm": 1.6113418453414594, "learning_rate": 2.424723268938056e-07, "loss": 0.5434170365333557, "step": 11680 }, { "epoch": 1.8669383840805562, "grad_norm": 1.6979499490147782, "learning_rate": 2.4189425405598875e-07, "loss": 0.6684901714324951, "step": 11681 }, { "epoch": 1.8670982178534326, "grad_norm": 1.7167388705535083, "learning_rate": 2.4131686268962674e-07, "loss": 0.5588634014129639, "step": 11682 }, { "epoch": 1.8672580516263086, "grad_norm": 1.3804704098532075, "learning_rate": 2.4074015283504504e-07, "loss": 0.5632696151733398, "step": 11683 }, { "epoch": 1.8674178853991847, "grad_norm": 1.5827747465175441, "learning_rate": 2.4016412453251816e-07, "loss": 0.4217183589935303, "step": 11684 }, { "epoch": 1.867577719172061, "grad_norm": 1.6167762276769821, "learning_rate": 2.3958877782227497e-07, "loss": 0.5640485286712646, "step": 11685 }, { "epoch": 1.8677375529449374, "grad_norm": 1.9254035577473483, "learning_rate": 2.390141127444945e-07, "loss": 0.6307954788208008, "step": 11686 }, { "epoch": 1.8678973867178135, "grad_norm": 1.5087793549082171, "learning_rate": 2.3844012933930906e-07, "loss": 0.5802831649780273, "step": 11687 }, { "epoch": 1.8680572204906896, "grad_norm": 1.5656380370606744, "learning_rate": 2.3786682764680437e-07, "loss": 0.53814697265625, "step": 11688 }, { "epoch": 1.8682170542635659, "grad_norm": 1.9629895540563322, "learning_rate": 2.3729420770701617e-07, "loss": 0.5596843957901001, "step": 11689 }, { "epoch": 1.8683768880364422, "grad_norm": 1.5302374723451095, "learning_rate": 2.3672226955993694e-07, "loss": 0.5551441311836243, "step": 11690 }, { "epoch": 1.8685367218093183, "grad_norm": 1.9239222823244113, "learning_rate": 2.3615101324550694e-07, "loss": 0.6457874178886414, "step": 11691 }, { "epoch": 1.8686965555821944, "grad_norm": 1.8580529367634635, "learning_rate": 2.3558043880362092e-07, "loss": 0.6196830868721008, "step": 11692 }, { "epoch": 1.8688563893550707, "grad_norm": 2.0543879267164393, "learning_rate": 2.350105462741259e-07, "loss": 0.5679560899734497, "step": 11693 }, { "epoch": 1.869016223127947, "grad_norm": 1.8188234493977147, "learning_rate": 2.3444133569682116e-07, "loss": 0.4978258013725281, "step": 11694 }, { "epoch": 1.8691760569008231, "grad_norm": 1.6730651881455727, "learning_rate": 2.3387280711145933e-07, "loss": 0.5201503038406372, "step": 11695 }, { "epoch": 1.8693358906736992, "grad_norm": 1.838090915625051, "learning_rate": 2.333049605577431e-07, "loss": 0.6695824861526489, "step": 11696 }, { "epoch": 1.8694957244465755, "grad_norm": 1.4670370678875524, "learning_rate": 2.3273779607532855e-07, "loss": 0.49854469299316406, "step": 11697 }, { "epoch": 1.8696555582194518, "grad_norm": 1.761912422578042, "learning_rate": 2.3217131370382505e-07, "loss": 0.6101384162902832, "step": 11698 }, { "epoch": 1.869815391992328, "grad_norm": 1.6372698572981754, "learning_rate": 2.3160551348279438e-07, "loss": 0.5446912050247192, "step": 11699 }, { "epoch": 1.869975225765204, "grad_norm": 1.55750218499352, "learning_rate": 2.310403954517504e-07, "loss": 0.6226977109909058, "step": 11700 }, { "epoch": 1.8701350595380803, "grad_norm": 1.8101243994111786, "learning_rate": 2.3047595965015712e-07, "loss": 0.6122357845306396, "step": 11701 }, { "epoch": 1.8702948933109567, "grad_norm": 1.7247692488703137, "learning_rate": 2.2991220611743191e-07, "loss": 0.5745365619659424, "step": 11702 }, { "epoch": 1.8704547270838328, "grad_norm": 1.697641634499031, "learning_rate": 2.2934913489294886e-07, "loss": 0.599217414855957, "step": 11703 }, { "epoch": 1.870614560856709, "grad_norm": 1.598884958680627, "learning_rate": 2.2878674601602867e-07, "loss": 0.5158504843711853, "step": 11704 }, { "epoch": 1.8707743946295854, "grad_norm": 1.5778631551392552, "learning_rate": 2.2822503952594666e-07, "loss": 0.6180298328399658, "step": 11705 }, { "epoch": 1.8709342284024615, "grad_norm": 1.4938380215954046, "learning_rate": 2.276640154619314e-07, "loss": 0.4318326711654663, "step": 11706 }, { "epoch": 1.8710940621753376, "grad_norm": 1.4655781959835523, "learning_rate": 2.2710367386316156e-07, "loss": 0.5399731397628784, "step": 11707 }, { "epoch": 1.871253895948214, "grad_norm": 1.5853175100248187, "learning_rate": 2.265440147687703e-07, "loss": 0.5012566447257996, "step": 11708 }, { "epoch": 1.8714137297210902, "grad_norm": 1.5285323442613863, "learning_rate": 2.2598503821784078e-07, "loss": 0.5949101448059082, "step": 11709 }, { "epoch": 1.8715735634939663, "grad_norm": 1.8548186453484714, "learning_rate": 2.2542674424941292e-07, "loss": 0.5431573390960693, "step": 11710 }, { "epoch": 1.8717333972668424, "grad_norm": 1.9415947423296105, "learning_rate": 2.2486913290247324e-07, "loss": 0.6035314798355103, "step": 11711 }, { "epoch": 1.8718932310397187, "grad_norm": 1.646432461115375, "learning_rate": 2.24312204215964e-07, "loss": 0.5156893730163574, "step": 11712 }, { "epoch": 1.872053064812595, "grad_norm": 1.5597305086988709, "learning_rate": 2.2375595822877848e-07, "loss": 0.4543306231498718, "step": 11713 }, { "epoch": 1.8722128985854711, "grad_norm": 1.646531717382142, "learning_rate": 2.2320039497976453e-07, "loss": 0.4564175605773926, "step": 11714 }, { "epoch": 1.8723727323583472, "grad_norm": 1.4876676187972364, "learning_rate": 2.2264551450772e-07, "loss": 0.4845849275588989, "step": 11715 }, { "epoch": 1.8725325661312235, "grad_norm": 1.862317409973459, "learning_rate": 2.2209131685139607e-07, "loss": 0.5034329891204834, "step": 11716 }, { "epoch": 1.8726923999040999, "grad_norm": 1.479097531653375, "learning_rate": 2.2153780204949516e-07, "loss": 0.5174927711486816, "step": 11717 }, { "epoch": 1.872852233676976, "grad_norm": 1.8829949956364895, "learning_rate": 2.20984970140673e-07, "loss": 0.5611863136291504, "step": 11718 }, { "epoch": 1.873012067449852, "grad_norm": 1.633169272551062, "learning_rate": 2.204328211635376e-07, "loss": 0.4909396171569824, "step": 11719 }, { "epoch": 1.8731719012227284, "grad_norm": 1.7080256591347645, "learning_rate": 2.1988135515665033e-07, "loss": 0.6118088960647583, "step": 11720 }, { "epoch": 1.8733317349956047, "grad_norm": 1.3961498933606926, "learning_rate": 2.193305721585215e-07, "loss": 0.4958662986755371, "step": 11721 }, { "epoch": 1.8734915687684808, "grad_norm": 1.8388258063174703, "learning_rate": 2.187804722076159e-07, "loss": 0.6185926198959351, "step": 11722 }, { "epoch": 1.8736514025413569, "grad_norm": 1.6343292231276176, "learning_rate": 2.1823105534235166e-07, "loss": 0.4358557164669037, "step": 11723 }, { "epoch": 1.8738112363142332, "grad_norm": 1.6587278734518374, "learning_rate": 2.1768232160109815e-07, "loss": 0.5419970154762268, "step": 11724 }, { "epoch": 1.8739710700871095, "grad_norm": 1.8700084834706836, "learning_rate": 2.1713427102217688e-07, "loss": 0.5342355966567993, "step": 11725 }, { "epoch": 1.8741309038599856, "grad_norm": 1.7499979743832534, "learning_rate": 2.165869036438617e-07, "loss": 0.5544564723968506, "step": 11726 }, { "epoch": 1.8742907376328617, "grad_norm": 1.6826683708481862, "learning_rate": 2.160402195043776e-07, "loss": 0.6605402231216431, "step": 11727 }, { "epoch": 1.874450571405738, "grad_norm": 1.6344441736839637, "learning_rate": 2.15494218641904e-07, "loss": 0.49363642930984497, "step": 11728 }, { "epoch": 1.8746104051786143, "grad_norm": 1.4893559079614662, "learning_rate": 2.1494890109457378e-07, "loss": 0.7038760185241699, "step": 11729 }, { "epoch": 1.8747702389514904, "grad_norm": 1.5393086965763403, "learning_rate": 2.1440426690046646e-07, "loss": 0.4492301642894745, "step": 11730 }, { "epoch": 1.8749300727243665, "grad_norm": 1.400376518412894, "learning_rate": 2.1386031609761937e-07, "loss": 0.5006120800971985, "step": 11731 }, { "epoch": 1.8750899064972428, "grad_norm": 1.643467413511283, "learning_rate": 2.1331704872401993e-07, "loss": 0.6247484683990479, "step": 11732 }, { "epoch": 1.8752497402701191, "grad_norm": 1.676243011323742, "learning_rate": 2.1277446481760555e-07, "loss": 0.5867783427238464, "step": 11733 }, { "epoch": 1.8754095740429952, "grad_norm": 1.5863453773999314, "learning_rate": 2.122325644162726e-07, "loss": 0.5434942245483398, "step": 11734 }, { "epoch": 1.8755694078158713, "grad_norm": 1.7274206270235375, "learning_rate": 2.1169134755786302e-07, "loss": 0.6013495922088623, "step": 11735 }, { "epoch": 1.8757292415887477, "grad_norm": 1.7773281563051784, "learning_rate": 2.1115081428017324e-07, "loss": 0.7192317247390747, "step": 11736 }, { "epoch": 1.875889075361624, "grad_norm": 1.6389834737221995, "learning_rate": 2.106109646209542e-07, "loss": 0.5092378854751587, "step": 11737 }, { "epoch": 1.8760489091345, "grad_norm": 1.2851177292035696, "learning_rate": 2.100717986179035e-07, "loss": 0.460530161857605, "step": 11738 }, { "epoch": 1.8762087429073764, "grad_norm": 1.396832152393618, "learning_rate": 2.095333163086777e-07, "loss": 0.48771363496780396, "step": 11739 }, { "epoch": 1.8763685766802527, "grad_norm": 1.6008514265057945, "learning_rate": 2.0899551773088223e-07, "loss": 0.5998085141181946, "step": 11740 }, { "epoch": 1.8765284104531288, "grad_norm": 1.3835414978442024, "learning_rate": 2.0845840292207374e-07, "loss": 0.452018141746521, "step": 11741 }, { "epoch": 1.8766882442260049, "grad_norm": 1.61159581462588, "learning_rate": 2.0792197191976227e-07, "loss": 0.5129947066307068, "step": 11742 }, { "epoch": 1.8768480779988812, "grad_norm": 1.5759944669928663, "learning_rate": 2.0738622476141e-07, "loss": 0.5346769094467163, "step": 11743 }, { "epoch": 1.8770079117717575, "grad_norm": 1.347774055609758, "learning_rate": 2.0685116148443485e-07, "loss": 0.6070989966392517, "step": 11744 }, { "epoch": 1.8771677455446336, "grad_norm": 1.553349417863331, "learning_rate": 2.0631678212620133e-07, "loss": 0.44680309295654297, "step": 11745 }, { "epoch": 1.8773275793175097, "grad_norm": 1.4901088974653771, "learning_rate": 2.0578308672402736e-07, "loss": 0.620084822177887, "step": 11746 }, { "epoch": 1.877487413090386, "grad_norm": 1.4656658266555216, "learning_rate": 2.052500753151876e-07, "loss": 0.5163232088088989, "step": 11747 }, { "epoch": 1.8776472468632623, "grad_norm": 1.6300730715675034, "learning_rate": 2.0471774793690225e-07, "loss": 0.6446651816368103, "step": 11748 }, { "epoch": 1.8778070806361384, "grad_norm": 1.531315776804148, "learning_rate": 2.0418610462634935e-07, "loss": 0.5798295140266418, "step": 11749 }, { "epoch": 1.8779669144090145, "grad_norm": 1.81375448785484, "learning_rate": 2.036551454206559e-07, "loss": 0.6475037336349487, "step": 11750 }, { "epoch": 1.8781267481818908, "grad_norm": 1.4696488223178417, "learning_rate": 2.031248703569033e-07, "loss": 0.4264892339706421, "step": 11751 }, { "epoch": 1.8782865819547672, "grad_norm": 1.6539528867587567, "learning_rate": 2.025952794721231e-07, "loss": 0.5079970955848694, "step": 11752 }, { "epoch": 1.8784464157276433, "grad_norm": 1.4783006402396093, "learning_rate": 2.02066372803299e-07, "loss": 0.6060564517974854, "step": 11753 }, { "epoch": 1.8786062495005194, "grad_norm": 1.6338151731829424, "learning_rate": 2.0153815038737146e-07, "loss": 0.4828426241874695, "step": 11754 }, { "epoch": 1.8787660832733957, "grad_norm": 1.6159645827396905, "learning_rate": 2.0101061226122654e-07, "loss": 0.5267009735107422, "step": 11755 }, { "epoch": 1.878925917046272, "grad_norm": 2.1290095836243528, "learning_rate": 2.0048375846170697e-07, "loss": 0.572303056716919, "step": 11756 }, { "epoch": 1.879085750819148, "grad_norm": 1.7428989737826281, "learning_rate": 1.9995758902560558e-07, "loss": 0.5490133166313171, "step": 11757 }, { "epoch": 1.8792455845920242, "grad_norm": 1.318511013635319, "learning_rate": 1.994321039896685e-07, "loss": 0.46857333183288574, "step": 11758 }, { "epoch": 1.8794054183649005, "grad_norm": 1.7410733871165003, "learning_rate": 1.9890730339059417e-07, "loss": 0.570580780506134, "step": 11759 }, { "epoch": 1.8795652521377768, "grad_norm": 1.6827948991407806, "learning_rate": 1.9838318726503325e-07, "loss": 0.493958055973053, "step": 11760 }, { "epoch": 1.879725085910653, "grad_norm": 1.8656874636383665, "learning_rate": 1.9785975564958648e-07, "loss": 0.6942718029022217, "step": 11761 }, { "epoch": 1.879884919683529, "grad_norm": 1.86851019946976, "learning_rate": 1.9733700858080905e-07, "loss": 0.637484610080719, "step": 11762 }, { "epoch": 1.8800447534564053, "grad_norm": 1.573290591887045, "learning_rate": 1.9681494609520735e-07, "loss": 0.5575948357582092, "step": 11763 }, { "epoch": 1.8802045872292816, "grad_norm": 1.835496015507902, "learning_rate": 1.9629356822924218e-07, "loss": 0.6324820518493652, "step": 11764 }, { "epoch": 1.8803644210021577, "grad_norm": 1.7140692166089642, "learning_rate": 1.957728750193233e-07, "loss": 0.5799132585525513, "step": 11765 }, { "epoch": 1.8805242547750338, "grad_norm": 1.7515105681015208, "learning_rate": 1.9525286650181495e-07, "loss": 0.5122474431991577, "step": 11766 }, { "epoch": 1.8806840885479101, "grad_norm": 1.3811938775213315, "learning_rate": 1.9473354271303258e-07, "loss": 0.5067346096038818, "step": 11767 }, { "epoch": 1.8808439223207865, "grad_norm": 1.8220400325789918, "learning_rate": 1.9421490368924155e-07, "loss": 0.48723143339157104, "step": 11768 }, { "epoch": 1.8810037560936625, "grad_norm": 1.594238753937612, "learning_rate": 1.9369694946666517e-07, "loss": 0.5749088525772095, "step": 11769 }, { "epoch": 1.8811635898665386, "grad_norm": 1.7288378189837879, "learning_rate": 1.9317968008147448e-07, "loss": 0.5408685207366943, "step": 11770 }, { "epoch": 1.881323423639415, "grad_norm": 1.7268359653130958, "learning_rate": 1.926630955697917e-07, "loss": 0.5867221355438232, "step": 11771 }, { "epoch": 1.8814832574122913, "grad_norm": 1.707641740255705, "learning_rate": 1.921471959676957e-07, "loss": 0.5696075558662415, "step": 11772 }, { "epoch": 1.8816430911851674, "grad_norm": 1.5623582173552772, "learning_rate": 1.916319813112144e-07, "loss": 0.647699773311615, "step": 11773 }, { "epoch": 1.8818029249580437, "grad_norm": 1.560318339109435, "learning_rate": 1.911174516363279e-07, "loss": 0.5269108414649963, "step": 11774 }, { "epoch": 1.88196275873092, "grad_norm": 1.6336429783505182, "learning_rate": 1.9060360697896964e-07, "loss": 0.45908164978027344, "step": 11775 }, { "epoch": 1.882122592503796, "grad_norm": 1.4165171752230055, "learning_rate": 1.9009044737502536e-07, "loss": 0.5202181339263916, "step": 11776 }, { "epoch": 1.8822824262766722, "grad_norm": 1.4830684070628473, "learning_rate": 1.8957797286033087e-07, "loss": 0.47334998846054077, "step": 11777 }, { "epoch": 1.8824422600495485, "grad_norm": 1.432028725066165, "learning_rate": 1.890661834706764e-07, "loss": 0.5500159859657288, "step": 11778 }, { "epoch": 1.8826020938224248, "grad_norm": 1.4986852047783112, "learning_rate": 1.8855507924180337e-07, "loss": 0.45116275548934937, "step": 11779 }, { "epoch": 1.882761927595301, "grad_norm": 1.6952733779534508, "learning_rate": 1.8804466020940658e-07, "loss": 0.5505088567733765, "step": 11780 }, { "epoch": 1.882921761368177, "grad_norm": 1.5539815226499494, "learning_rate": 1.8753492640912974e-07, "loss": 0.44767412543296814, "step": 11781 }, { "epoch": 1.8830815951410533, "grad_norm": 1.4238458568464887, "learning_rate": 1.87025877876571e-07, "loss": 0.6037129759788513, "step": 11782 }, { "epoch": 1.8832414289139296, "grad_norm": 1.7643945353018988, "learning_rate": 1.8651751464728197e-07, "loss": 0.667791485786438, "step": 11783 }, { "epoch": 1.8834012626868057, "grad_norm": 1.5301442852540208, "learning_rate": 1.860098367567653e-07, "loss": 0.7472195029258728, "step": 11784 }, { "epoch": 1.8835610964596818, "grad_norm": 1.664917775103163, "learning_rate": 1.8550284424047494e-07, "loss": 0.615414559841156, "step": 11785 }, { "epoch": 1.8837209302325582, "grad_norm": 1.5393686023406614, "learning_rate": 1.8499653713381693e-07, "loss": 0.5948026180267334, "step": 11786 }, { "epoch": 1.8838807640054345, "grad_norm": 1.5886122678872343, "learning_rate": 1.844909154721497e-07, "loss": 0.6980924606323242, "step": 11787 }, { "epoch": 1.8840405977783106, "grad_norm": 1.9294837673035332, "learning_rate": 1.8398597929078278e-07, "loss": 0.6950677633285522, "step": 11788 }, { "epoch": 1.8842004315511867, "grad_norm": 1.658636421898342, "learning_rate": 1.8348172862498238e-07, "loss": 0.6216822862625122, "step": 11789 }, { "epoch": 1.884360265324063, "grad_norm": 1.7030022819270034, "learning_rate": 1.8297816350996256e-07, "loss": 0.4370077848434448, "step": 11790 }, { "epoch": 1.8845200990969393, "grad_norm": 1.4010544871150348, "learning_rate": 1.8247528398088966e-07, "loss": 0.5179740190505981, "step": 11791 }, { "epoch": 1.8846799328698154, "grad_norm": 1.8078875123624711, "learning_rate": 1.8197309007288223e-07, "loss": 0.6515787839889526, "step": 11792 }, { "epoch": 1.8848397666426915, "grad_norm": 1.7207408612091661, "learning_rate": 1.8147158182101333e-07, "loss": 0.5663474798202515, "step": 11793 }, { "epoch": 1.8849996004155678, "grad_norm": 1.6368175768851445, "learning_rate": 1.8097075926030605e-07, "loss": 0.6230869293212891, "step": 11794 }, { "epoch": 1.8851594341884441, "grad_norm": 1.6205743996730304, "learning_rate": 1.8047062242573576e-07, "loss": 0.5270172953605652, "step": 11795 }, { "epoch": 1.8853192679613202, "grad_norm": 1.721242339218015, "learning_rate": 1.799711713522312e-07, "loss": 0.4469614028930664, "step": 11796 }, { "epoch": 1.8854791017341963, "grad_norm": 1.616353208982142, "learning_rate": 1.7947240607466998e-07, "loss": 0.6487927436828613, "step": 11797 }, { "epoch": 1.8856389355070726, "grad_norm": 1.4706720893641327, "learning_rate": 1.789743266278865e-07, "loss": 0.558261513710022, "step": 11798 }, { "epoch": 1.885798769279949, "grad_norm": 1.6275559676961868, "learning_rate": 1.7847693304666404e-07, "loss": 0.49867135286331177, "step": 11799 }, { "epoch": 1.885958603052825, "grad_norm": 1.55968328228159, "learning_rate": 1.779802253657381e-07, "loss": 0.5042752027511597, "step": 11800 }, { "epoch": 1.8861184368257011, "grad_norm": 1.594531285190057, "learning_rate": 1.7748420361979878e-07, "loss": 0.5372895002365112, "step": 11801 }, { "epoch": 1.8862782705985774, "grad_norm": 1.505449377281213, "learning_rate": 1.769888678434839e-07, "loss": 0.5904831886291504, "step": 11802 }, { "epoch": 1.8864381043714538, "grad_norm": 1.5636226686563455, "learning_rate": 1.764942180713869e-07, "loss": 0.5984663963317871, "step": 11803 }, { "epoch": 1.8865979381443299, "grad_norm": 1.7452629669143398, "learning_rate": 1.7600025433805456e-07, "loss": 0.6132832169532776, "step": 11804 }, { "epoch": 1.886757771917206, "grad_norm": 1.801408543907619, "learning_rate": 1.7550697667798156e-07, "loss": 0.60589200258255, "step": 11805 }, { "epoch": 1.8869176056900823, "grad_norm": 1.5066951469261263, "learning_rate": 1.7501438512561697e-07, "loss": 0.5781439542770386, "step": 11806 }, { "epoch": 1.8870774394629586, "grad_norm": 1.6792676319441928, "learning_rate": 1.7452247971535995e-07, "loss": 0.5842869877815247, "step": 11807 }, { "epoch": 1.8872372732358347, "grad_norm": 1.6272848400061302, "learning_rate": 1.740312604815664e-07, "loss": 0.5932550430297852, "step": 11808 }, { "epoch": 1.887397107008711, "grad_norm": 1.6264789207976675, "learning_rate": 1.735407274585399e-07, "loss": 0.5500960350036621, "step": 11809 }, { "epoch": 1.8875569407815873, "grad_norm": 1.5561162963832618, "learning_rate": 1.7305088068053755e-07, "loss": 0.5640204548835754, "step": 11810 }, { "epoch": 1.8877167745544634, "grad_norm": 1.712160537896321, "learning_rate": 1.7256172018176864e-07, "loss": 0.6195937395095825, "step": 11811 }, { "epoch": 1.8878766083273395, "grad_norm": 1.610248388536384, "learning_rate": 1.7207324599639363e-07, "loss": 0.437818706035614, "step": 11812 }, { "epoch": 1.8880364421002158, "grad_norm": 1.5236480537007997, "learning_rate": 1.7158545815852746e-07, "loss": 0.502767026424408, "step": 11813 }, { "epoch": 1.8881962758730921, "grad_norm": 1.6597637722971976, "learning_rate": 1.7109835670223397e-07, "loss": 0.6320642232894897, "step": 11814 }, { "epoch": 1.8883561096459682, "grad_norm": 1.5893269327938957, "learning_rate": 1.7061194166153262e-07, "loss": 0.6344752907752991, "step": 11815 }, { "epoch": 1.8885159434188443, "grad_norm": 1.5175002667226163, "learning_rate": 1.7012621307038958e-07, "loss": 0.4568040370941162, "step": 11816 }, { "epoch": 1.8886757771917206, "grad_norm": 1.4344016915512334, "learning_rate": 1.6964117096272882e-07, "loss": 0.5340006351470947, "step": 11817 }, { "epoch": 1.888835610964597, "grad_norm": 1.725302033538871, "learning_rate": 1.6915681537242435e-07, "loss": 0.593335747718811, "step": 11818 }, { "epoch": 1.888995444737473, "grad_norm": 1.7428616358302285, "learning_rate": 1.6867314633330023e-07, "loss": 0.5605826377868652, "step": 11819 }, { "epoch": 1.8891552785103491, "grad_norm": 1.4950652972359937, "learning_rate": 1.68190163879135e-07, "loss": 0.44489529728889465, "step": 11820 }, { "epoch": 1.8893151122832255, "grad_norm": 1.4487081458044115, "learning_rate": 1.6770786804365725e-07, "loss": 0.5179280042648315, "step": 11821 }, { "epoch": 1.8894749460561018, "grad_norm": 1.8692666612101816, "learning_rate": 1.6722625886055112e-07, "loss": 0.7506572604179382, "step": 11822 }, { "epoch": 1.8896347798289779, "grad_norm": 1.7553384320267955, "learning_rate": 1.6674533636344858e-07, "loss": 0.5713014602661133, "step": 11823 }, { "epoch": 1.889794613601854, "grad_norm": 1.5960558822584343, "learning_rate": 1.6626510058593724e-07, "loss": 0.5013240575790405, "step": 11824 }, { "epoch": 1.8899544473747303, "grad_norm": 1.7997248237888768, "learning_rate": 1.6578555156155363e-07, "loss": 0.5479022264480591, "step": 11825 }, { "epoch": 1.8901142811476066, "grad_norm": 1.7212617144172184, "learning_rate": 1.653066893237887e-07, "loss": 0.5076754689216614, "step": 11826 }, { "epoch": 1.8902741149204827, "grad_norm": 1.6952869343933668, "learning_rate": 1.6482851390608235e-07, "loss": 0.556308925151825, "step": 11827 }, { "epoch": 1.8904339486933588, "grad_norm": 1.7077957199981089, "learning_rate": 1.6435102534183124e-07, "loss": 0.5589950084686279, "step": 11828 }, { "epoch": 1.890593782466235, "grad_norm": 1.7900758373120067, "learning_rate": 1.6387422366438087e-07, "loss": 0.6160366535186768, "step": 11829 }, { "epoch": 1.8907536162391114, "grad_norm": 1.6944797307755313, "learning_rate": 1.6339810890702912e-07, "loss": 0.5600295066833496, "step": 11830 }, { "epoch": 1.8909134500119875, "grad_norm": 1.6433036381922121, "learning_rate": 1.62922681103026e-07, "loss": 0.5892133116722107, "step": 11831 }, { "epoch": 1.8910732837848636, "grad_norm": 1.730085190822329, "learning_rate": 1.6244794028557276e-07, "loss": 0.5654069185256958, "step": 11832 }, { "epoch": 1.89123311755774, "grad_norm": 1.807139490920018, "learning_rate": 1.6197388648782507e-07, "loss": 0.6635748744010925, "step": 11833 }, { "epoch": 1.8913929513306162, "grad_norm": 2.137836510179982, "learning_rate": 1.6150051974288872e-07, "loss": 0.4951855540275574, "step": 11834 }, { "epoch": 1.8915527851034923, "grad_norm": 1.531306864938243, "learning_rate": 1.6102784008382278e-07, "loss": 0.5617873668670654, "step": 11835 }, { "epoch": 1.8917126188763684, "grad_norm": 1.842980544082262, "learning_rate": 1.605558475436353e-07, "loss": 0.6185364723205566, "step": 11836 }, { "epoch": 1.8918724526492448, "grad_norm": 1.7160299261876795, "learning_rate": 1.6008454215529102e-07, "loss": 0.4863170385360718, "step": 11837 }, { "epoch": 1.892032286422121, "grad_norm": 1.6720616227942444, "learning_rate": 1.596139239517025e-07, "loss": 0.5865427255630493, "step": 11838 }, { "epoch": 1.8921921201949972, "grad_norm": 1.4527853722816333, "learning_rate": 1.591439929657368e-07, "loss": 0.502261221408844, "step": 11839 }, { "epoch": 1.8923519539678733, "grad_norm": 1.6609016897330728, "learning_rate": 1.586747492302121e-07, "loss": 0.4635302424430847, "step": 11840 }, { "epoch": 1.8925117877407496, "grad_norm": 1.49648598835915, "learning_rate": 1.5820619277789885e-07, "loss": 0.47059524059295654, "step": 11841 }, { "epoch": 1.892671621513626, "grad_norm": 1.5838107436465527, "learning_rate": 1.5773832364151977e-07, "loss": 0.49722814559936523, "step": 11842 }, { "epoch": 1.892831455286502, "grad_norm": 1.5347684633681489, "learning_rate": 1.5727114185374758e-07, "loss": 0.4451521635055542, "step": 11843 }, { "epoch": 1.8929912890593783, "grad_norm": 1.941236816090059, "learning_rate": 1.5680464744721068e-07, "loss": 0.604358971118927, "step": 11844 }, { "epoch": 1.8931511228322546, "grad_norm": 1.6740118705200824, "learning_rate": 1.5633884045448523e-07, "loss": 0.5497524738311768, "step": 11845 }, { "epoch": 1.8933109566051307, "grad_norm": 1.5915135151377926, "learning_rate": 1.5587372090810405e-07, "loss": 0.5234866142272949, "step": 11846 }, { "epoch": 1.8934707903780068, "grad_norm": 1.8019372683141903, "learning_rate": 1.5540928884054674e-07, "loss": 0.6375406980514526, "step": 11847 }, { "epoch": 1.8936306241508831, "grad_norm": 1.714101136350965, "learning_rate": 1.5494554428425068e-07, "loss": 0.603113055229187, "step": 11848 }, { "epoch": 1.8937904579237594, "grad_norm": 1.5941060970841916, "learning_rate": 1.5448248727159888e-07, "loss": 0.4869426190853119, "step": 11849 }, { "epoch": 1.8939502916966355, "grad_norm": 1.570459858651432, "learning_rate": 1.540201178349321e-07, "loss": 0.6502795815467834, "step": 11850 }, { "epoch": 1.8941101254695116, "grad_norm": 1.6647263716233769, "learning_rate": 1.5355843600653896e-07, "loss": 0.5713077783584595, "step": 11851 }, { "epoch": 1.894269959242388, "grad_norm": 1.6168355548953517, "learning_rate": 1.530974418186626e-07, "loss": 0.6135174036026001, "step": 11852 }, { "epoch": 1.8944297930152643, "grad_norm": 1.5606292768988161, "learning_rate": 1.5263713530349723e-07, "loss": 0.5058351755142212, "step": 11853 }, { "epoch": 1.8945896267881404, "grad_norm": 1.6596516298197233, "learning_rate": 1.5217751649318823e-07, "loss": 0.4271872341632843, "step": 11854 }, { "epoch": 1.8947494605610165, "grad_norm": 1.494674974551492, "learning_rate": 1.517185854198344e-07, "loss": 0.551502525806427, "step": 11855 }, { "epoch": 1.8949092943338928, "grad_norm": 1.933157595123837, "learning_rate": 1.5126034211548568e-07, "loss": 0.583289623260498, "step": 11856 }, { "epoch": 1.895069128106769, "grad_norm": 1.3709492975920978, "learning_rate": 1.508027866121442e-07, "loss": 0.4758763313293457, "step": 11857 }, { "epoch": 1.8952289618796452, "grad_norm": 1.34294126281649, "learning_rate": 1.5034591894176443e-07, "loss": 0.4939395487308502, "step": 11858 }, { "epoch": 1.8953887956525213, "grad_norm": 1.684324745945199, "learning_rate": 1.4988973913625082e-07, "loss": 0.6163440942764282, "step": 11859 }, { "epoch": 1.8955486294253976, "grad_norm": 1.63891252098693, "learning_rate": 1.4943424722746348e-07, "loss": 0.6051404476165771, "step": 11860 }, { "epoch": 1.895708463198274, "grad_norm": 1.7284627755984492, "learning_rate": 1.4897944324721026e-07, "loss": 0.7299162745475769, "step": 11861 }, { "epoch": 1.89586829697115, "grad_norm": 1.6763537785367686, "learning_rate": 1.4852532722725577e-07, "loss": 0.5826866626739502, "step": 11862 }, { "epoch": 1.896028130744026, "grad_norm": 1.640899833280934, "learning_rate": 1.480718991993102e-07, "loss": 0.5332809686660767, "step": 11863 }, { "epoch": 1.8961879645169024, "grad_norm": 3.0534283823613344, "learning_rate": 1.4761915919504266e-07, "loss": 0.5513292551040649, "step": 11864 }, { "epoch": 1.8963477982897787, "grad_norm": 1.65844604680597, "learning_rate": 1.4716710724606898e-07, "loss": 0.5665723085403442, "step": 11865 }, { "epoch": 1.8965076320626548, "grad_norm": 1.9343300198419928, "learning_rate": 1.4671574338396056e-07, "loss": 0.6595134735107422, "step": 11866 }, { "epoch": 1.896667465835531, "grad_norm": 1.6780998557234086, "learning_rate": 1.4626506764023663e-07, "loss": 0.5247307419776917, "step": 11867 }, { "epoch": 1.8968272996084072, "grad_norm": 1.5454590760780975, "learning_rate": 1.4581508004637313e-07, "loss": 0.5986106395721436, "step": 11868 }, { "epoch": 1.8969871333812836, "grad_norm": 1.7590146645259674, "learning_rate": 1.4536578063379492e-07, "loss": 0.5279767513275146, "step": 11869 }, { "epoch": 1.8971469671541596, "grad_norm": 1.9060196217793484, "learning_rate": 1.4491716943387802e-07, "loss": 0.5431862473487854, "step": 11870 }, { "epoch": 1.8973068009270357, "grad_norm": 1.5017668454891553, "learning_rate": 1.4446924647795402e-07, "loss": 0.5373382568359375, "step": 11871 }, { "epoch": 1.897466634699912, "grad_norm": 1.5455438079661552, "learning_rate": 1.4402201179730235e-07, "loss": 0.5893524885177612, "step": 11872 }, { "epoch": 1.8976264684727884, "grad_norm": 1.5553100560206616, "learning_rate": 1.4357546542315803e-07, "loss": 0.5681529641151428, "step": 11873 }, { "epoch": 1.8977863022456645, "grad_norm": 1.7534360166388625, "learning_rate": 1.43129607386705e-07, "loss": 0.6289689540863037, "step": 11874 }, { "epoch": 1.8979461360185406, "grad_norm": 1.6450179643789409, "learning_rate": 1.4268443771908058e-07, "loss": 0.6019229888916016, "step": 11875 }, { "epoch": 1.8981059697914169, "grad_norm": 1.6026243655544719, "learning_rate": 1.4223995645137434e-07, "loss": 0.4586245119571686, "step": 11876 }, { "epoch": 1.8982658035642932, "grad_norm": 1.6444884347427553, "learning_rate": 1.41796163614627e-07, "loss": 0.49869856238365173, "step": 11877 }, { "epoch": 1.8984256373371693, "grad_norm": 1.6073191080405924, "learning_rate": 1.4135305923983157e-07, "loss": 0.5550327301025391, "step": 11878 }, { "epoch": 1.8985854711100454, "grad_norm": 1.658608469164944, "learning_rate": 1.4091064335793324e-07, "loss": 0.4978049397468567, "step": 11879 }, { "epoch": 1.898745304882922, "grad_norm": 1.5750682121463924, "learning_rate": 1.4046891599982847e-07, "loss": 0.547324538230896, "step": 11880 }, { "epoch": 1.898905138655798, "grad_norm": 1.669719211868793, "learning_rate": 1.4002787719636367e-07, "loss": 0.5137282013893127, "step": 11881 }, { "epoch": 1.8990649724286741, "grad_norm": 1.5543235501863637, "learning_rate": 1.395875269783431e-07, "loss": 0.49003541469573975, "step": 11882 }, { "epoch": 1.8992248062015504, "grad_norm": 1.777976527809464, "learning_rate": 1.391478653765177e-07, "loss": 0.5750352144241333, "step": 11883 }, { "epoch": 1.8993846399744267, "grad_norm": 1.5827094796099412, "learning_rate": 1.387088924215918e-07, "loss": 0.5726999044418335, "step": 11884 }, { "epoch": 1.8995444737473028, "grad_norm": 1.549686656398282, "learning_rate": 1.3827060814422199e-07, "loss": 0.6601599454879761, "step": 11885 }, { "epoch": 1.899704307520179, "grad_norm": 1.6493757703769394, "learning_rate": 1.3783301257501602e-07, "loss": 0.6346186399459839, "step": 11886 }, { "epoch": 1.8998641412930553, "grad_norm": 1.7087668156529183, "learning_rate": 1.373961057445339e-07, "loss": 0.5563250780105591, "step": 11887 }, { "epoch": 1.9000239750659316, "grad_norm": 1.5021313632541853, "learning_rate": 1.3695988768328895e-07, "loss": 0.533602237701416, "step": 11888 }, { "epoch": 1.9001838088388077, "grad_norm": 1.5342305905721867, "learning_rate": 1.365243584217435e-07, "loss": 0.6261991262435913, "step": 11889 }, { "epoch": 1.9003436426116838, "grad_norm": 1.5360251117571693, "learning_rate": 1.3608951799031433e-07, "loss": 0.44969967007637024, "step": 11890 }, { "epoch": 1.90050347638456, "grad_norm": 1.7263117152949334, "learning_rate": 1.3565536641936827e-07, "loss": 0.5839389562606812, "step": 11891 }, { "epoch": 1.9006633101574364, "grad_norm": 1.6791131079720198, "learning_rate": 1.352219037392266e-07, "loss": 0.4729350507259369, "step": 11892 }, { "epoch": 1.9008231439303125, "grad_norm": 1.572204679543948, "learning_rate": 1.3478912998015958e-07, "loss": 0.48772376775741577, "step": 11893 }, { "epoch": 1.9009829777031886, "grad_norm": 1.4707262020367298, "learning_rate": 1.343570451723908e-07, "loss": 0.46306312084198, "step": 11894 }, { "epoch": 1.901142811476065, "grad_norm": 1.4461579644350866, "learning_rate": 1.3392564934609496e-07, "loss": 0.5066636800765991, "step": 11895 }, { "epoch": 1.9013026452489412, "grad_norm": 1.4923963726174785, "learning_rate": 1.3349494253139916e-07, "loss": 0.5610203146934509, "step": 11896 }, { "epoch": 1.9014624790218173, "grad_norm": 1.7961309173505593, "learning_rate": 1.3306492475838372e-07, "loss": 0.5313864946365356, "step": 11897 }, { "epoch": 1.9016223127946934, "grad_norm": 1.6661213842552878, "learning_rate": 1.326355960570791e-07, "loss": 0.5986831784248352, "step": 11898 }, { "epoch": 1.9017821465675697, "grad_norm": 1.8014486427695713, "learning_rate": 1.3220695645746684e-07, "loss": 0.4844016432762146, "step": 11899 }, { "epoch": 1.901941980340446, "grad_norm": 2.090374838752443, "learning_rate": 1.31779005989483e-07, "loss": 0.5103912353515625, "step": 11900 }, { "epoch": 1.9021018141133221, "grad_norm": 1.3472217608077275, "learning_rate": 1.313517446830137e-07, "loss": 0.5056514739990234, "step": 11901 }, { "epoch": 1.9022616478861982, "grad_norm": 1.6899033854385996, "learning_rate": 1.3092517256789838e-07, "loss": 0.41707974672317505, "step": 11902 }, { "epoch": 1.9024214816590745, "grad_norm": 1.9396282100583238, "learning_rate": 1.3049928967392435e-07, "loss": 0.6257559657096863, "step": 11903 }, { "epoch": 1.9025813154319509, "grad_norm": 1.610828568468926, "learning_rate": 1.3007409603083775e-07, "loss": 0.47983554005622864, "step": 11904 }, { "epoch": 1.902741149204827, "grad_norm": 1.507013951238703, "learning_rate": 1.2964959166832935e-07, "loss": 0.519982635974884, "step": 11905 }, { "epoch": 1.902900982977703, "grad_norm": 1.4868517722693695, "learning_rate": 1.292257766160465e-07, "loss": 0.5083847045898438, "step": 11906 }, { "epoch": 1.9030608167505794, "grad_norm": 1.5278308566754275, "learning_rate": 1.2880265090358668e-07, "loss": 0.39720654487609863, "step": 11907 }, { "epoch": 1.9032206505234557, "grad_norm": 1.897849982428962, "learning_rate": 1.2838021456049954e-07, "loss": 0.5635582208633423, "step": 11908 }, { "epoch": 1.9033804842963318, "grad_norm": 1.4285132802464064, "learning_rate": 1.2795846761628705e-07, "loss": 0.5824943780899048, "step": 11909 }, { "epoch": 1.9035403180692079, "grad_norm": 1.7157600020438852, "learning_rate": 1.2753741010040121e-07, "loss": 0.5147334337234497, "step": 11910 }, { "epoch": 1.9037001518420842, "grad_norm": 1.407097860953682, "learning_rate": 1.271170420422474e-07, "loss": 0.5807624459266663, "step": 11911 }, { "epoch": 1.9038599856149605, "grad_norm": 1.5493253330801036, "learning_rate": 1.2669736347118434e-07, "loss": 0.5027338266372681, "step": 11912 }, { "epoch": 1.9040198193878366, "grad_norm": 1.9799900897189464, "learning_rate": 1.2627837441651968e-07, "loss": 0.43574559688568115, "step": 11913 }, { "epoch": 1.9041796531607127, "grad_norm": 1.5260757409965942, "learning_rate": 1.258600749075134e-07, "loss": 0.4911038875579834, "step": 11914 }, { "epoch": 1.9043394869335892, "grad_norm": 1.6193804362502975, "learning_rate": 1.2544246497337986e-07, "loss": 0.5265994071960449, "step": 11915 }, { "epoch": 1.9044993207064653, "grad_norm": 1.6882834935669881, "learning_rate": 1.2502554464328021e-07, "loss": 0.5936429500579834, "step": 11916 }, { "epoch": 1.9046591544793414, "grad_norm": 1.7219637131811416, "learning_rate": 1.2460931394633446e-07, "loss": 0.6237019300460815, "step": 11917 }, { "epoch": 1.9048189882522177, "grad_norm": 1.7943196410096631, "learning_rate": 1.241937729116094e-07, "loss": 0.5776271224021912, "step": 11918 }, { "epoch": 1.904978822025094, "grad_norm": 1.5060479755516725, "learning_rate": 1.23778921568124e-07, "loss": 0.5074374079704285, "step": 11919 }, { "epoch": 1.9051386557979701, "grad_norm": 1.3330986855709996, "learning_rate": 1.2336475994485063e-07, "loss": 0.34631624817848206, "step": 11920 }, { "epoch": 1.9052984895708462, "grad_norm": 1.4515781109688575, "learning_rate": 1.229512880707129e-07, "loss": 0.4620387554168701, "step": 11921 }, { "epoch": 1.9054583233437226, "grad_norm": 1.5107525904127104, "learning_rate": 1.225385059745865e-07, "loss": 0.462411105632782, "step": 11922 }, { "epoch": 1.9056181571165989, "grad_norm": 1.7007333228999575, "learning_rate": 1.2212641368529842e-07, "loss": 0.492867648601532, "step": 11923 }, { "epoch": 1.905777990889475, "grad_norm": 1.4744095696011867, "learning_rate": 1.2171501123162787e-07, "loss": 0.4921988248825073, "step": 11924 }, { "epoch": 1.905937824662351, "grad_norm": 1.494663280213168, "learning_rate": 1.213042986423052e-07, "loss": 0.5501363277435303, "step": 11925 }, { "epoch": 1.9060976584352274, "grad_norm": 1.7044774933577609, "learning_rate": 1.2089427594601189e-07, "loss": 0.509583055973053, "step": 11926 }, { "epoch": 1.9062574922081037, "grad_norm": 1.7956336736832765, "learning_rate": 1.2048494317138615e-07, "loss": 0.651076078414917, "step": 11927 }, { "epoch": 1.9064173259809798, "grad_norm": 1.6322018886787595, "learning_rate": 1.200763003470118e-07, "loss": 0.6041280031204224, "step": 11928 }, { "epoch": 1.9065771597538559, "grad_norm": 1.515993206957869, "learning_rate": 1.196683475014271e-07, "loss": 0.5544252395629883, "step": 11929 }, { "epoch": 1.9067369935267322, "grad_norm": 1.6945897730336876, "learning_rate": 1.1926108466312257e-07, "loss": 0.6993905305862427, "step": 11930 }, { "epoch": 1.9068968272996085, "grad_norm": 1.7096770397627008, "learning_rate": 1.1885451186053886e-07, "loss": 0.6526846885681152, "step": 11931 }, { "epoch": 1.9070566610724846, "grad_norm": 1.470907153908265, "learning_rate": 1.1844862912207211e-07, "loss": 0.4887063503265381, "step": 11932 }, { "epoch": 1.9072164948453607, "grad_norm": 1.5121360562626716, "learning_rate": 1.180434364760652e-07, "loss": 0.4432041049003601, "step": 11933 }, { "epoch": 1.907376328618237, "grad_norm": 1.6656233465447534, "learning_rate": 1.176389339508166e-07, "loss": 0.5349349975585938, "step": 11934 }, { "epoch": 1.9075361623911133, "grad_norm": 1.5732438594117504, "learning_rate": 1.1723512157457373e-07, "loss": 0.43078508973121643, "step": 11935 }, { "epoch": 1.9076959961639894, "grad_norm": 1.575688402053615, "learning_rate": 1.1683199937553957e-07, "loss": 0.591829776763916, "step": 11936 }, { "epoch": 1.9078558299368655, "grad_norm": 1.4242317944687206, "learning_rate": 1.1642956738186606e-07, "loss": 0.4543202221393585, "step": 11937 }, { "epoch": 1.9080156637097418, "grad_norm": 1.7251213617731627, "learning_rate": 1.1602782562165737e-07, "loss": 0.6025422215461731, "step": 11938 }, { "epoch": 1.9081754974826182, "grad_norm": 1.5757230866577263, "learning_rate": 1.1562677412296996e-07, "loss": 0.5974912047386169, "step": 11939 }, { "epoch": 1.9083353312554943, "grad_norm": 1.6124067279802288, "learning_rate": 1.1522641291381142e-07, "loss": 0.5353227853775024, "step": 11940 }, { "epoch": 1.9084951650283704, "grad_norm": 1.8949635551338675, "learning_rate": 1.1482674202214162e-07, "loss": 0.7593573331832886, "step": 11941 }, { "epoch": 1.9086549988012467, "grad_norm": 1.5084075907215087, "learning_rate": 1.1442776147587153e-07, "loss": 0.5077010989189148, "step": 11942 }, { "epoch": 1.908814832574123, "grad_norm": 1.4355152300022898, "learning_rate": 1.1402947130286557e-07, "loss": 0.5564575791358948, "step": 11943 }, { "epoch": 1.908974666346999, "grad_norm": 1.450699667399007, "learning_rate": 1.1363187153093925e-07, "loss": 0.5957154035568237, "step": 11944 }, { "epoch": 1.9091345001198752, "grad_norm": 1.4282603927473285, "learning_rate": 1.1323496218785923e-07, "loss": 0.5083997845649719, "step": 11945 }, { "epoch": 1.9092943338927515, "grad_norm": 1.4842548402889537, "learning_rate": 1.1283874330134337e-07, "loss": 0.4532254934310913, "step": 11946 }, { "epoch": 1.9094541676656278, "grad_norm": 1.5449074225726813, "learning_rate": 1.1244321489906285e-07, "loss": 0.47353819012641907, "step": 11947 }, { "epoch": 1.909614001438504, "grad_norm": 1.4705477353558014, "learning_rate": 1.1204837700864002e-07, "loss": 0.5352537035942078, "step": 11948 }, { "epoch": 1.90977383521138, "grad_norm": 1.5606666296091083, "learning_rate": 1.1165422965764838e-07, "loss": 0.6334578394889832, "step": 11949 }, { "epoch": 1.9099336689842565, "grad_norm": 1.5101139891356734, "learning_rate": 1.112607728736137e-07, "loss": 0.5178500413894653, "step": 11950 }, { "epoch": 1.9100935027571326, "grad_norm": 1.6591919413806788, "learning_rate": 1.108680066840151e-07, "loss": 0.5260983109474182, "step": 11951 }, { "epoch": 1.9102533365300087, "grad_norm": 1.6633040393647414, "learning_rate": 1.1047593111628175e-07, "loss": 0.5167267322540283, "step": 11952 }, { "epoch": 1.910413170302885, "grad_norm": 1.5063796128263203, "learning_rate": 1.1008454619779285e-07, "loss": 0.4465522766113281, "step": 11953 }, { "epoch": 1.9105730040757614, "grad_norm": 1.5955544391656604, "learning_rate": 1.0969385195588322e-07, "loss": 0.5947216153144836, "step": 11954 }, { "epoch": 1.9107328378486375, "grad_norm": 1.661064355172883, "learning_rate": 1.0930384841783548e-07, "loss": 0.48389267921447754, "step": 11955 }, { "epoch": 1.9108926716215135, "grad_norm": 1.6065273159762181, "learning_rate": 1.0891453561088894e-07, "loss": 0.4767611622810364, "step": 11956 }, { "epoch": 1.9110525053943899, "grad_norm": 1.9312416413208895, "learning_rate": 1.0852591356222964e-07, "loss": 0.4907689094543457, "step": 11957 }, { "epoch": 1.9112123391672662, "grad_norm": 1.7355296474722566, "learning_rate": 1.0813798229899919e-07, "loss": 0.4155345559120178, "step": 11958 }, { "epoch": 1.9113721729401423, "grad_norm": 1.6841570425567298, "learning_rate": 1.0775074184828815e-07, "loss": 0.5019080638885498, "step": 11959 }, { "epoch": 1.9115320067130184, "grad_norm": 1.4445179793822465, "learning_rate": 1.0736419223713934e-07, "loss": 0.44493159651756287, "step": 11960 }, { "epoch": 1.9116918404858947, "grad_norm": 1.8876293561895645, "learning_rate": 1.0697833349255004e-07, "loss": 0.5960392951965332, "step": 11961 }, { "epoch": 1.911851674258771, "grad_norm": 1.7871851781555264, "learning_rate": 1.0659316564146538e-07, "loss": 0.5614144802093506, "step": 11962 }, { "epoch": 1.912011508031647, "grad_norm": 1.706123850226728, "learning_rate": 1.0620868871078493e-07, "loss": 0.5600910186767578, "step": 11963 }, { "epoch": 1.9121713418045232, "grad_norm": 1.6338645915829861, "learning_rate": 1.0582490272736057e-07, "loss": 0.40762859582901, "step": 11964 }, { "epoch": 1.9123311755773995, "grad_norm": 1.670087581297665, "learning_rate": 1.0544180771799084e-07, "loss": 0.6084213256835938, "step": 11965 }, { "epoch": 1.9124910093502758, "grad_norm": 1.3903054859483246, "learning_rate": 1.0505940370943434e-07, "loss": 0.3628988265991211, "step": 11966 }, { "epoch": 1.912650843123152, "grad_norm": 1.4932308203699114, "learning_rate": 1.0467769072839307e-07, "loss": 0.49820441007614136, "step": 11967 }, { "epoch": 1.912810676896028, "grad_norm": 1.750538464709227, "learning_rate": 1.0429666880152677e-07, "loss": 0.4659186899662018, "step": 11968 }, { "epoch": 1.9129705106689043, "grad_norm": 1.3411996257333603, "learning_rate": 1.039163379554442e-07, "loss": 0.5457553863525391, "step": 11969 }, { "epoch": 1.9131303444417807, "grad_norm": 1.7096616007103254, "learning_rate": 1.035366982167052e-07, "loss": 0.4639958143234253, "step": 11970 }, { "epoch": 1.9132901782146567, "grad_norm": 1.705053300980817, "learning_rate": 1.0315774961182412e-07, "loss": 0.5171099305152893, "step": 11971 }, { "epoch": 1.9134500119875328, "grad_norm": 1.615363610036031, "learning_rate": 1.0277949216726313e-07, "loss": 0.5340614318847656, "step": 11972 }, { "epoch": 1.9136098457604092, "grad_norm": 1.503530747610248, "learning_rate": 1.0240192590944109e-07, "loss": 0.520169734954834, "step": 11973 }, { "epoch": 1.9137696795332855, "grad_norm": 1.7407476927188705, "learning_rate": 1.020250508647247e-07, "loss": 0.5412936806678772, "step": 11974 }, { "epoch": 1.9139295133061616, "grad_norm": 1.6234410993184616, "learning_rate": 1.0164886705943178e-07, "loss": 0.558767557144165, "step": 11975 }, { "epoch": 1.9140893470790377, "grad_norm": 1.4895717174267757, "learning_rate": 1.0127337451983688e-07, "loss": 0.5216996669769287, "step": 11976 }, { "epoch": 1.914249180851914, "grad_norm": 1.5987342237558544, "learning_rate": 1.0089857327216013e-07, "loss": 0.6306538581848145, "step": 11977 }, { "epoch": 1.9144090146247903, "grad_norm": 1.8250693711548625, "learning_rate": 1.0052446334257948e-07, "loss": 0.5987571477890015, "step": 11978 }, { "epoch": 1.9145688483976664, "grad_norm": 1.6608467743943531, "learning_rate": 1.0015104475721848e-07, "loss": 0.6061285734176636, "step": 11979 }, { "epoch": 1.9147286821705425, "grad_norm": 1.6750456408977945, "learning_rate": 9.977831754215628e-08, "loss": 0.6334404945373535, "step": 11980 }, { "epoch": 1.9148885159434188, "grad_norm": 1.8303029988896673, "learning_rate": 9.940628172342426e-08, "loss": 0.4821617007255554, "step": 11981 }, { "epoch": 1.9150483497162951, "grad_norm": 1.3871485500081648, "learning_rate": 9.903493732700164e-08, "loss": 0.4293556809425354, "step": 11982 }, { "epoch": 1.9152081834891712, "grad_norm": 1.7706764057692013, "learning_rate": 9.866428437882324e-08, "loss": 0.6315349340438843, "step": 11983 }, { "epoch": 1.9153680172620473, "grad_norm": 1.5965607660762797, "learning_rate": 9.829432290477503e-08, "loss": 0.5673647522926331, "step": 11984 }, { "epoch": 1.9155278510349238, "grad_norm": 1.4860577398424288, "learning_rate": 9.792505293069188e-08, "loss": 0.5801830887794495, "step": 11985 }, { "epoch": 1.9156876848078, "grad_norm": 1.6238590943139801, "learning_rate": 9.755647448236316e-08, "loss": 0.5266848802566528, "step": 11986 }, { "epoch": 1.915847518580676, "grad_norm": 1.4426137232384293, "learning_rate": 9.71885875855294e-08, "loss": 0.540579617023468, "step": 11987 }, { "epoch": 1.9160073523535524, "grad_norm": 1.256642836380045, "learning_rate": 9.682139226588228e-08, "loss": 0.43710482120513916, "step": 11988 }, { "epoch": 1.9161671861264287, "grad_norm": 1.5995236836654527, "learning_rate": 9.64548885490646e-08, "loss": 0.5629392862319946, "step": 11989 }, { "epoch": 1.9163270198993048, "grad_norm": 1.6851220964738893, "learning_rate": 9.608907646067256e-08, "loss": 0.49385014176368713, "step": 11990 }, { "epoch": 1.9164868536721809, "grad_norm": 1.5683896978464387, "learning_rate": 9.572395602625351e-08, "loss": 0.5002522468566895, "step": 11991 }, { "epoch": 1.9166466874450572, "grad_norm": 1.813967349893473, "learning_rate": 9.535952727130593e-08, "loss": 0.5171666741371155, "step": 11992 }, { "epoch": 1.9168065212179335, "grad_norm": 1.6875707604718795, "learning_rate": 9.499579022128059e-08, "loss": 0.5445467233657837, "step": 11993 }, { "epoch": 1.9169663549908096, "grad_norm": 1.5922765126992995, "learning_rate": 9.463274490157714e-08, "loss": 0.5554229617118835, "step": 11994 }, { "epoch": 1.9171261887636857, "grad_norm": 1.488596130838392, "learning_rate": 9.4270391337552e-08, "loss": 0.49037837982177734, "step": 11995 }, { "epoch": 1.917286022536562, "grad_norm": 1.6508249030122435, "learning_rate": 9.390872955451047e-08, "loss": 0.4741933345794678, "step": 11996 }, { "epoch": 1.9174458563094383, "grad_norm": 1.739911648619451, "learning_rate": 9.354775957770901e-08, "loss": 0.5872199535369873, "step": 11997 }, { "epoch": 1.9176056900823144, "grad_norm": 1.4624984053345365, "learning_rate": 9.318748143235634e-08, "loss": 0.48704248666763306, "step": 11998 }, { "epoch": 1.9177655238551905, "grad_norm": 1.6700374369868811, "learning_rate": 9.282789514361235e-08, "loss": 0.498282253742218, "step": 11999 }, { "epoch": 1.9179253576280668, "grad_norm": 1.5777879152430185, "learning_rate": 9.246900073659137e-08, "loss": 0.5452195405960083, "step": 12000 }, { "epoch": 1.9180851914009431, "grad_norm": 2.1266088039187143, "learning_rate": 9.21107982363556e-08, "loss": 0.5583117008209229, "step": 12001 }, { "epoch": 1.9182450251738192, "grad_norm": 1.6698858897183564, "learning_rate": 9.175328766792169e-08, "loss": 0.43320977687835693, "step": 12002 }, { "epoch": 1.9184048589466953, "grad_norm": 1.5596314852060216, "learning_rate": 9.13964690562552e-08, "loss": 0.5533802509307861, "step": 12003 }, { "epoch": 1.9185646927195716, "grad_norm": 1.8074829213829662, "learning_rate": 9.104034242627735e-08, "loss": 0.46282297372817993, "step": 12004 }, { "epoch": 1.918724526492448, "grad_norm": 1.59203290170585, "learning_rate": 9.06849078028571e-08, "loss": 0.5898982286453247, "step": 12005 }, { "epoch": 1.918884360265324, "grad_norm": 1.591372653404827, "learning_rate": 9.033016521081794e-08, "loss": 0.46598324179649353, "step": 12006 }, { "epoch": 1.9190441940382001, "grad_norm": 1.631882724249111, "learning_rate": 8.997611467493228e-08, "loss": 0.5369702577590942, "step": 12007 }, { "epoch": 1.9192040278110765, "grad_norm": 1.4018526396964395, "learning_rate": 8.96227562199281e-08, "loss": 0.4379007816314697, "step": 12008 }, { "epoch": 1.9193638615839528, "grad_norm": 1.4790906869684879, "learning_rate": 8.927008987048014e-08, "loss": 0.5432361364364624, "step": 12009 }, { "epoch": 1.9195236953568289, "grad_norm": 1.5012461344818357, "learning_rate": 8.891811565121866e-08, "loss": 0.7016783952713013, "step": 12010 }, { "epoch": 1.919683529129705, "grad_norm": 1.5423487410309813, "learning_rate": 8.856683358672402e-08, "loss": 0.5797202587127686, "step": 12011 }, { "epoch": 1.9198433629025813, "grad_norm": 1.5362903208792509, "learning_rate": 8.821624370152882e-08, "loss": 0.5387768745422363, "step": 12012 }, { "epoch": 1.9200031966754576, "grad_norm": 1.4427661167719021, "learning_rate": 8.786634602011679e-08, "loss": 0.4258165955543518, "step": 12013 }, { "epoch": 1.9201630304483337, "grad_norm": 1.7051760935952345, "learning_rate": 8.751714056692284e-08, "loss": 0.5304577350616455, "step": 12014 }, { "epoch": 1.9203228642212098, "grad_norm": 1.6437573318035184, "learning_rate": 8.716862736633635e-08, "loss": 0.5221409797668457, "step": 12015 }, { "epoch": 1.920482697994086, "grad_norm": 1.5933946831018828, "learning_rate": 8.682080644269452e-08, "loss": 0.6442786455154419, "step": 12016 }, { "epoch": 1.9206425317669624, "grad_norm": 1.5811854623070702, "learning_rate": 8.647367782028793e-08, "loss": 0.5141106843948364, "step": 12017 }, { "epoch": 1.9208023655398385, "grad_norm": 1.3029918220412595, "learning_rate": 8.612724152335939e-08, "loss": 0.4200745224952698, "step": 12018 }, { "epoch": 1.9209621993127146, "grad_norm": 2.040733730901921, "learning_rate": 8.578149757610176e-08, "loss": 0.5358608961105347, "step": 12019 }, { "epoch": 1.921122033085591, "grad_norm": 1.5663598005017867, "learning_rate": 8.543644600266244e-08, "loss": 0.5181002616882324, "step": 12020 }, { "epoch": 1.9212818668584672, "grad_norm": 1.4929062029385864, "learning_rate": 8.509208682713654e-08, "loss": 0.5011206269264221, "step": 12021 }, { "epoch": 1.9214417006313433, "grad_norm": 1.599914620251659, "learning_rate": 8.474842007357487e-08, "loss": 0.49723491072654724, "step": 12022 }, { "epoch": 1.9216015344042197, "grad_norm": 1.522957477880982, "learning_rate": 8.440544576597598e-08, "loss": 0.4599706530570984, "step": 12023 }, { "epoch": 1.921761368177096, "grad_norm": 1.5349905823104901, "learning_rate": 8.406316392829295e-08, "loss": 0.541891872882843, "step": 12024 }, { "epoch": 1.921921201949972, "grad_norm": 1.6161640765373706, "learning_rate": 8.372157458442886e-08, "loss": 0.5708714723587036, "step": 12025 }, { "epoch": 1.9220810357228482, "grad_norm": 1.7098064144123237, "learning_rate": 8.338067775824132e-08, "loss": 0.5095354318618774, "step": 12026 }, { "epoch": 1.9222408694957245, "grad_norm": 1.6929271264963561, "learning_rate": 8.30404734735346e-08, "loss": 0.6543536186218262, "step": 12027 }, { "epoch": 1.9224007032686008, "grad_norm": 1.6228669754121614, "learning_rate": 8.270096175406972e-08, "loss": 0.6292240023612976, "step": 12028 }, { "epoch": 1.922560537041477, "grad_norm": 1.772351864283807, "learning_rate": 8.236214262355435e-08, "loss": 0.5937933921813965, "step": 12029 }, { "epoch": 1.922720370814353, "grad_norm": 1.6609005233986085, "learning_rate": 8.202401610565181e-08, "loss": 0.45298317074775696, "step": 12030 }, { "epoch": 1.9228802045872293, "grad_norm": 1.5759856596880513, "learning_rate": 8.168658222397652e-08, "loss": 0.5950736999511719, "step": 12031 }, { "epoch": 1.9230400383601056, "grad_norm": 1.7590791151483534, "learning_rate": 8.134984100209187e-08, "loss": 0.4811248183250427, "step": 12032 }, { "epoch": 1.9231998721329817, "grad_norm": 1.357416099595878, "learning_rate": 8.101379246351681e-08, "loss": 0.4455181360244751, "step": 12033 }, { "epoch": 1.9233597059058578, "grad_norm": 1.5072033929037059, "learning_rate": 8.067843663171704e-08, "loss": 0.5097839832305908, "step": 12034 }, { "epoch": 1.9235195396787341, "grad_norm": 1.4326626951418258, "learning_rate": 8.034377353011603e-08, "loss": 0.5463922023773193, "step": 12035 }, { "epoch": 1.9236793734516104, "grad_norm": 1.6630250249703995, "learning_rate": 8.000980318208174e-08, "loss": 0.5473964214324951, "step": 12036 }, { "epoch": 1.9238392072244865, "grad_norm": 1.7315721684034024, "learning_rate": 7.967652561094109e-08, "loss": 0.4134838581085205, "step": 12037 }, { "epoch": 1.9239990409973626, "grad_norm": 1.5014979596495954, "learning_rate": 7.934394083996544e-08, "loss": 0.41426748037338257, "step": 12038 }, { "epoch": 1.924158874770239, "grad_norm": 1.7887546187165, "learning_rate": 7.901204889238401e-08, "loss": 0.5975738167762756, "step": 12039 }, { "epoch": 1.9243187085431153, "grad_norm": 3.68480294170697, "learning_rate": 7.86808497913738e-08, "loss": 0.5900000333786011, "step": 12040 }, { "epoch": 1.9244785423159914, "grad_norm": 1.5887590460964647, "learning_rate": 7.835034356006521e-08, "loss": 0.47422656416893005, "step": 12041 }, { "epoch": 1.9246383760888675, "grad_norm": 1.4332418279539232, "learning_rate": 7.802053022153866e-08, "loss": 0.4407183527946472, "step": 12042 }, { "epoch": 1.9247982098617438, "grad_norm": 1.5114966924523883, "learning_rate": 7.769140979882905e-08, "loss": 0.5659181475639343, "step": 12043 }, { "epoch": 1.92495804363462, "grad_norm": 1.5679239680446273, "learning_rate": 7.736298231491801e-08, "loss": 0.7359447479248047, "step": 12044 }, { "epoch": 1.9251178774074962, "grad_norm": 1.5565290754601326, "learning_rate": 7.703524779274385e-08, "loss": 0.4326173961162567, "step": 12045 }, { "epoch": 1.9252777111803723, "grad_norm": 1.7298144832176294, "learning_rate": 7.670820625519382e-08, "loss": 0.5334122180938721, "step": 12046 }, { "epoch": 1.9254375449532486, "grad_norm": 1.6740554977390938, "learning_rate": 7.638185772510854e-08, "loss": 0.6407809257507324, "step": 12047 }, { "epoch": 1.925597378726125, "grad_norm": 1.5552283342480877, "learning_rate": 7.605620222527643e-08, "loss": 0.5013688206672668, "step": 12048 }, { "epoch": 1.925757212499001, "grad_norm": 1.7066241518764722, "learning_rate": 7.573123977844265e-08, "loss": 0.53419429063797, "step": 12049 }, { "epoch": 1.925917046271877, "grad_norm": 1.5374787701340793, "learning_rate": 7.540697040730017e-08, "loss": 0.49488770961761475, "step": 12050 }, { "epoch": 1.9260768800447534, "grad_norm": 1.6231014054070692, "learning_rate": 7.508339413449528e-08, "loss": 0.5341830253601074, "step": 12051 }, { "epoch": 1.9262367138176297, "grad_norm": 1.6342461362596161, "learning_rate": 7.476051098262438e-08, "loss": 0.48774254322052, "step": 12052 }, { "epoch": 1.9263965475905058, "grad_norm": 1.577487776568835, "learning_rate": 7.443832097423831e-08, "loss": 0.43215304613113403, "step": 12053 }, { "epoch": 1.926556381363382, "grad_norm": 1.9432201745821271, "learning_rate": 7.411682413183574e-08, "loss": 0.5358591079711914, "step": 12054 }, { "epoch": 1.9267162151362582, "grad_norm": 1.4535514188431007, "learning_rate": 7.379602047786982e-08, "loss": 0.5011817812919617, "step": 12055 }, { "epoch": 1.9268760489091346, "grad_norm": 1.7190282592936548, "learning_rate": 7.347591003474486e-08, "loss": 0.5840008854866028, "step": 12056 }, { "epoch": 1.9270358826820106, "grad_norm": 1.6925675094447319, "learning_rate": 7.31564928248163e-08, "loss": 0.605417013168335, "step": 12057 }, { "epoch": 1.927195716454887, "grad_norm": 1.7874015997657058, "learning_rate": 7.283776887038963e-08, "loss": 0.6862378120422363, "step": 12058 }, { "epoch": 1.9273555502277633, "grad_norm": 1.6723852627139004, "learning_rate": 7.251973819372371e-08, "loss": 0.5387396812438965, "step": 12059 }, { "epoch": 1.9275153840006394, "grad_norm": 2.018836379958812, "learning_rate": 7.220240081702967e-08, "loss": 0.5578607320785522, "step": 12060 }, { "epoch": 1.9276752177735155, "grad_norm": 1.5999209311168878, "learning_rate": 7.188575676246867e-08, "loss": 0.49599403142929077, "step": 12061 }, { "epoch": 1.9278350515463918, "grad_norm": 1.5207180378727476, "learning_rate": 7.156980605215413e-08, "loss": 0.6633018851280212, "step": 12062 }, { "epoch": 1.927994885319268, "grad_norm": 1.520349852409454, "learning_rate": 7.125454870815063e-08, "loss": 0.5355340242385864, "step": 12063 }, { "epoch": 1.9281547190921442, "grad_norm": 1.788254700088863, "learning_rate": 7.093998475247388e-08, "loss": 0.3375478982925415, "step": 12064 }, { "epoch": 1.9283145528650203, "grad_norm": 1.5010205027212549, "learning_rate": 7.062611420709519e-08, "loss": 0.43130624294281006, "step": 12065 }, { "epoch": 1.9284743866378966, "grad_norm": 1.704251452973095, "learning_rate": 7.03129370939304e-08, "loss": 0.4612215459346771, "step": 12066 }, { "epoch": 1.928634220410773, "grad_norm": 1.6555422348307152, "learning_rate": 7.000045343485306e-08, "loss": 0.5811477303504944, "step": 12067 }, { "epoch": 1.928794054183649, "grad_norm": 1.3950407819473196, "learning_rate": 6.968866325168466e-08, "loss": 0.4709266424179077, "step": 12068 }, { "epoch": 1.9289538879565251, "grad_norm": 1.8614557309861186, "learning_rate": 6.937756656619887e-08, "loss": 0.5351258516311646, "step": 12069 }, { "epoch": 1.9291137217294014, "grad_norm": 1.7485085249025416, "learning_rate": 6.906716340012387e-08, "loss": 0.6503747701644897, "step": 12070 }, { "epoch": 1.9292735555022777, "grad_norm": 1.5206928952408147, "learning_rate": 6.875745377513565e-08, "loss": 0.6083857417106628, "step": 12071 }, { "epoch": 1.9294333892751538, "grad_norm": 1.6027691330678209, "learning_rate": 6.844843771286358e-08, "loss": 0.5405822992324829, "step": 12072 }, { "epoch": 1.92959322304803, "grad_norm": 1.6659693218735987, "learning_rate": 6.814011523488706e-08, "loss": 0.6090596318244934, "step": 12073 }, { "epoch": 1.9297530568209063, "grad_norm": 1.8322991849221923, "learning_rate": 6.783248636273998e-08, "loss": 0.5435606837272644, "step": 12074 }, { "epoch": 1.9299128905937826, "grad_norm": 1.1983362804689244, "learning_rate": 6.752555111790515e-08, "loss": 0.40261775255203247, "step": 12075 }, { "epoch": 1.9300727243666587, "grad_norm": 1.3334109979661202, "learning_rate": 6.721930952181876e-08, "loss": 0.4491538405418396, "step": 12076 }, { "epoch": 1.9302325581395348, "grad_norm": 1.6888064860286587, "learning_rate": 6.691376159586594e-08, "loss": 0.5837558507919312, "step": 12077 }, { "epoch": 1.930392391912411, "grad_norm": 1.67788021839703, "learning_rate": 6.660890736138625e-08, "loss": 0.5991024971008301, "step": 12078 }, { "epoch": 1.9305522256852874, "grad_norm": 1.6381779497846587, "learning_rate": 6.630474683966936e-08, "loss": 0.5343583226203918, "step": 12079 }, { "epoch": 1.9307120594581635, "grad_norm": 1.585187520270058, "learning_rate": 6.600128005195717e-08, "loss": 0.48456668853759766, "step": 12080 }, { "epoch": 1.9308718932310396, "grad_norm": 1.5960199380358389, "learning_rate": 6.569850701944159e-08, "loss": 0.6180224418640137, "step": 12081 }, { "epoch": 1.931031727003916, "grad_norm": 1.892412206452398, "learning_rate": 6.539642776326904e-08, "loss": 0.6440466642379761, "step": 12082 }, { "epoch": 1.9311915607767922, "grad_norm": 1.7897639597503165, "learning_rate": 6.509504230453379e-08, "loss": 0.5607866644859314, "step": 12083 }, { "epoch": 1.9313513945496683, "grad_norm": 1.4573365468514337, "learning_rate": 6.479435066428452e-08, "loss": 0.5520632863044739, "step": 12084 }, { "epoch": 1.9315112283225444, "grad_norm": 1.665410297917667, "learning_rate": 6.449435286352002e-08, "loss": 0.6351944208145142, "step": 12085 }, { "epoch": 1.9316710620954207, "grad_norm": 1.408392036251471, "learning_rate": 6.419504892319128e-08, "loss": 0.4932403564453125, "step": 12086 }, { "epoch": 1.931830895868297, "grad_norm": 1.671421646595948, "learning_rate": 6.389643886420161e-08, "loss": 0.6056285500526428, "step": 12087 }, { "epoch": 1.9319907296411731, "grad_norm": 1.3174482198005983, "learning_rate": 6.359852270740319e-08, "loss": 0.5172312259674072, "step": 12088 }, { "epoch": 1.9321505634140492, "grad_norm": 1.610153583443341, "learning_rate": 6.330130047360272e-08, "loss": 0.4175141751766205, "step": 12089 }, { "epoch": 1.9323103971869255, "grad_norm": 1.6648020995680322, "learning_rate": 6.300477218355583e-08, "loss": 0.5795738697052002, "step": 12090 }, { "epoch": 1.9324702309598019, "grad_norm": 1.745744813166309, "learning_rate": 6.270893785797261e-08, "loss": 0.5950785875320435, "step": 12091 }, { "epoch": 1.932630064732678, "grad_norm": 1.6117325986868658, "learning_rate": 6.241379751751209e-08, "loss": 0.5677987337112427, "step": 12092 }, { "epoch": 1.9327898985055543, "grad_norm": 1.6055346565370856, "learning_rate": 6.211935118278556e-08, "loss": 0.5862283110618591, "step": 12093 }, { "epoch": 1.9329497322784306, "grad_norm": 1.6953254561156796, "learning_rate": 6.182559887435657e-08, "loss": 0.7147966623306274, "step": 12094 }, { "epoch": 1.9331095660513067, "grad_norm": 1.6133663593914718, "learning_rate": 6.153254061273983e-08, "loss": 0.4882371723651886, "step": 12095 }, { "epoch": 1.9332693998241828, "grad_norm": 1.5481370532569438, "learning_rate": 6.124017641840119e-08, "loss": 0.435092568397522, "step": 12096 }, { "epoch": 1.933429233597059, "grad_norm": 1.5517840220260768, "learning_rate": 6.094850631175985e-08, "loss": 0.5897095203399658, "step": 12097 }, { "epoch": 1.9335890673699354, "grad_norm": 1.851834436519679, "learning_rate": 6.065753031318178e-08, "loss": 0.6047646999359131, "step": 12098 }, { "epoch": 1.9337489011428115, "grad_norm": 1.5081135956266498, "learning_rate": 6.036724844299069e-08, "loss": 0.4846755564212799, "step": 12099 }, { "epoch": 1.9339087349156876, "grad_norm": 1.634026737870543, "learning_rate": 6.007766072145704e-08, "loss": 0.4913809597492218, "step": 12100 }, { "epoch": 1.934068568688564, "grad_norm": 1.8818962431413946, "learning_rate": 5.978876716880577e-08, "loss": 0.3047824203968048, "step": 12101 }, { "epoch": 1.9342284024614402, "grad_norm": 1.5566196124498315, "learning_rate": 5.950056780521185e-08, "loss": 0.41185086965560913, "step": 12102 }, { "epoch": 1.9343882362343163, "grad_norm": 1.5992348146488107, "learning_rate": 5.92130626508014e-08, "loss": 0.5855931043624878, "step": 12103 }, { "epoch": 1.9345480700071924, "grad_norm": 1.6369322485056075, "learning_rate": 5.8926251725652807e-08, "loss": 0.546413004398346, "step": 12104 }, { "epoch": 1.9347079037800687, "grad_norm": 1.4820748718008896, "learning_rate": 5.864013504979782e-08, "loss": 0.49044471979141235, "step": 12105 }, { "epoch": 1.934867737552945, "grad_norm": 1.7745279734813566, "learning_rate": 5.835471264321602e-08, "loss": 0.4531008303165436, "step": 12106 }, { "epoch": 1.9350275713258211, "grad_norm": 1.7745657615060189, "learning_rate": 5.806998452584034e-08, "loss": 0.5038710236549377, "step": 12107 }, { "epoch": 1.9351874050986972, "grad_norm": 1.515048169791552, "learning_rate": 5.7785950717554885e-08, "loss": 0.5110545754432678, "step": 12108 }, { "epoch": 1.9353472388715736, "grad_norm": 1.517109810530763, "learning_rate": 5.7502611238198225e-08, "loss": 0.4534268379211426, "step": 12109 }, { "epoch": 1.9355070726444499, "grad_norm": 1.4658929786828199, "learning_rate": 5.721996610755454e-08, "loss": 0.497688889503479, "step": 12110 }, { "epoch": 1.935666906417326, "grad_norm": 2.205424607797432, "learning_rate": 5.693801534536359e-08, "loss": 0.4409187138080597, "step": 12111 }, { "epoch": 1.935826740190202, "grad_norm": 1.5324317644321848, "learning_rate": 5.6656758971317395e-08, "loss": 0.5622597932815552, "step": 12112 }, { "epoch": 1.9359865739630784, "grad_norm": 1.6855564798937608, "learning_rate": 5.637619700505581e-08, "loss": 0.5988489389419556, "step": 12113 }, { "epoch": 1.9361464077359547, "grad_norm": 1.5375761464205115, "learning_rate": 5.609632946617316e-08, "loss": 0.4390106797218323, "step": 12114 }, { "epoch": 1.9363062415088308, "grad_norm": 1.580507009253424, "learning_rate": 5.581715637421492e-08, "loss": 0.6270577311515808, "step": 12115 }, { "epoch": 1.9364660752817069, "grad_norm": 1.7056836530737942, "learning_rate": 5.5538677748677716e-08, "loss": 0.5304179191589355, "step": 12116 }, { "epoch": 1.9366259090545832, "grad_norm": 1.4142023950951859, "learning_rate": 5.5260893609008215e-08, "loss": 0.5091609954833984, "step": 12117 }, { "epoch": 1.9367857428274595, "grad_norm": 1.8882835428504015, "learning_rate": 5.498380397460645e-08, "loss": 0.5462489128112793, "step": 12118 }, { "epoch": 1.9369455766003356, "grad_norm": 1.655514883428793, "learning_rate": 5.4707408864823616e-08, "loss": 0.5808787941932678, "step": 12119 }, { "epoch": 1.9371054103732117, "grad_norm": 1.559592631689509, "learning_rate": 5.443170829896205e-08, "loss": 0.5808051824569702, "step": 12120 }, { "epoch": 1.937265244146088, "grad_norm": 1.5545136714555694, "learning_rate": 5.4156702296275235e-08, "loss": 0.5433141589164734, "step": 12121 }, { "epoch": 1.9374250779189643, "grad_norm": 1.648834254112516, "learning_rate": 5.3882390875968914e-08, "loss": 0.5297873020172119, "step": 12122 }, { "epoch": 1.9375849116918404, "grad_norm": 1.5782048477271895, "learning_rate": 5.360877405720111e-08, "loss": 0.6517232656478882, "step": 12123 }, { "epoch": 1.9377447454647165, "grad_norm": 1.5956506913103843, "learning_rate": 5.3335851859077634e-08, "loss": 0.5184236168861389, "step": 12124 }, { "epoch": 1.9379045792375929, "grad_norm": 1.6776998539745114, "learning_rate": 5.3063624300662144e-08, "loss": 0.517846405506134, "step": 12125 }, { "epoch": 1.9380644130104692, "grad_norm": 1.7314647102270173, "learning_rate": 5.279209140096275e-08, "loss": 0.6530203223228455, "step": 12126 }, { "epoch": 1.9382242467833453, "grad_norm": 1.382419449130054, "learning_rate": 5.2521253178944295e-08, "loss": 0.4218794107437134, "step": 12127 }, { "epoch": 1.9383840805562216, "grad_norm": 1.7671341733478025, "learning_rate": 5.225110965351943e-08, "loss": 0.5245077610015869, "step": 12128 }, { "epoch": 1.938543914329098, "grad_norm": 1.5809691684203537, "learning_rate": 5.198166084355527e-08, "loss": 0.49266481399536133, "step": 12129 }, { "epoch": 1.938703748101974, "grad_norm": 1.561949326494827, "learning_rate": 5.1712906767869e-08, "loss": 0.5307486057281494, "step": 12130 }, { "epoch": 1.93886358187485, "grad_norm": 1.49047296519876, "learning_rate": 5.144484744523004e-08, "loss": 0.42548108100891113, "step": 12131 }, { "epoch": 1.9390234156477264, "grad_norm": 1.5858095533693926, "learning_rate": 5.1177482894357866e-08, "loss": 0.5734071731567383, "step": 12132 }, { "epoch": 1.9391832494206027, "grad_norm": 1.5235616815205104, "learning_rate": 5.09108131339231e-08, "loss": 0.45852744579315186, "step": 12133 }, { "epoch": 1.9393430831934788, "grad_norm": 1.5206819530729814, "learning_rate": 5.064483818255195e-08, "loss": 0.5167903900146484, "step": 12134 }, { "epoch": 1.939502916966355, "grad_norm": 1.55806797828812, "learning_rate": 5.0379558058817336e-08, "loss": 0.4421262741088867, "step": 12135 }, { "epoch": 1.9396627507392312, "grad_norm": 1.6392451371987808, "learning_rate": 5.011497278124444e-08, "loss": 0.7258098125457764, "step": 12136 }, { "epoch": 1.9398225845121075, "grad_norm": 1.5714282706068952, "learning_rate": 4.985108236831404e-08, "loss": 0.5288404226303101, "step": 12137 }, { "epoch": 1.9399824182849836, "grad_norm": 1.3723572654197942, "learning_rate": 4.9587886838451396e-08, "loss": 0.3882333040237427, "step": 12138 }, { "epoch": 1.9401422520578597, "grad_norm": 1.6086250962402635, "learning_rate": 4.932538621004068e-08, "loss": 0.5202689170837402, "step": 12139 }, { "epoch": 1.940302085830736, "grad_norm": 1.5756018323401693, "learning_rate": 4.906358050141169e-08, "loss": 0.5975855588912964, "step": 12140 }, { "epoch": 1.9404619196036124, "grad_norm": 1.6710539351687546, "learning_rate": 4.880246973084979e-08, "loss": 0.552039623260498, "step": 12141 }, { "epoch": 1.9406217533764885, "grad_norm": 1.5810078471904014, "learning_rate": 4.8542053916588175e-08, "loss": 0.626418948173523, "step": 12142 }, { "epoch": 1.9407815871493646, "grad_norm": 1.5238298029170414, "learning_rate": 4.8282333076815623e-08, "loss": 0.47845882177352905, "step": 12143 }, { "epoch": 1.9409414209222409, "grad_norm": 1.6429854939821418, "learning_rate": 4.8023307229666526e-08, "loss": 0.5071046352386475, "step": 12144 }, { "epoch": 1.9411012546951172, "grad_norm": 1.6958896370863, "learning_rate": 4.776497639323419e-08, "loss": 0.6015338897705078, "step": 12145 }, { "epoch": 1.9412610884679933, "grad_norm": 1.7544495234900332, "learning_rate": 4.7507340585558614e-08, "loss": 0.5731173157691956, "step": 12146 }, { "epoch": 1.9414209222408694, "grad_norm": 1.5073772454832397, "learning_rate": 4.7250399824629867e-08, "loss": 0.4693383574485779, "step": 12147 }, { "epoch": 1.9415807560137457, "grad_norm": 1.8053742862127304, "learning_rate": 4.6994154128394697e-08, "loss": 0.562889814376831, "step": 12148 }, { "epoch": 1.941740589786622, "grad_norm": 1.5481352280011473, "learning_rate": 4.673860351474657e-08, "loss": 0.4390438199043274, "step": 12149 }, { "epoch": 1.941900423559498, "grad_norm": 1.5616609106983803, "learning_rate": 4.648374800153232e-08, "loss": 0.5349648594856262, "step": 12150 }, { "epoch": 1.9420602573323742, "grad_norm": 1.6275753652206382, "learning_rate": 4.622958760655105e-08, "loss": 0.503429114818573, "step": 12151 }, { "epoch": 1.9422200911052505, "grad_norm": 1.3884296346687195, "learning_rate": 4.5976122347551886e-08, "loss": 0.5114603042602539, "step": 12152 }, { "epoch": 1.9423799248781268, "grad_norm": 1.6012065590890316, "learning_rate": 4.572335224223512e-08, "loss": 0.4783940017223358, "step": 12153 }, { "epoch": 1.942539758651003, "grad_norm": 1.3360046219736292, "learning_rate": 4.5471277308255515e-08, "loss": 0.4686707854270935, "step": 12154 }, { "epoch": 1.942699592423879, "grad_norm": 1.3164198288320712, "learning_rate": 4.521989756321565e-08, "loss": 0.4707894027233124, "step": 12155 }, { "epoch": 1.9428594261967553, "grad_norm": 1.659313044661318, "learning_rate": 4.4969213024670386e-08, "loss": 0.5438857078552246, "step": 12156 }, { "epoch": 1.9430192599696317, "grad_norm": 4.253190211389161, "learning_rate": 4.471922371012793e-08, "loss": 0.5390538573265076, "step": 12157 }, { "epoch": 1.9431790937425077, "grad_norm": 2.349860385130721, "learning_rate": 4.446992963704544e-08, "loss": 0.5111932158470154, "step": 12158 }, { "epoch": 1.9433389275153838, "grad_norm": 1.595002152059479, "learning_rate": 4.422133082283453e-08, "loss": 0.49374687671661377, "step": 12159 }, { "epoch": 1.9434987612882602, "grad_norm": 1.762038142037927, "learning_rate": 4.397342728485465e-08, "loss": 0.5595703721046448, "step": 12160 }, { "epoch": 1.9436585950611365, "grad_norm": 1.7798740908462474, "learning_rate": 4.372621904041863e-08, "loss": 0.5134013891220093, "step": 12161 }, { "epoch": 1.9438184288340126, "grad_norm": 1.7621312411687904, "learning_rate": 4.3479706106791534e-08, "loss": 0.5085751414299011, "step": 12162 }, { "epoch": 1.9439782626068889, "grad_norm": 1.3418566831235808, "learning_rate": 4.323388850118848e-08, "loss": 0.43374359607696533, "step": 12163 }, { "epoch": 1.9441380963797652, "grad_norm": 1.792439622536975, "learning_rate": 4.298876624077686e-08, "loss": 0.502911388874054, "step": 12164 }, { "epoch": 1.9442979301526413, "grad_norm": 1.748092478403682, "learning_rate": 4.274433934267519e-08, "loss": 0.6412630677223206, "step": 12165 }, { "epoch": 1.9444577639255174, "grad_norm": 1.5981989780014514, "learning_rate": 4.250060782395204e-08, "loss": 0.4686000943183899, "step": 12166 }, { "epoch": 1.9446175976983937, "grad_norm": 1.8936467325941795, "learning_rate": 4.225757170163047e-08, "loss": 0.6010094881057739, "step": 12167 }, { "epoch": 1.94477743147127, "grad_norm": 1.6646067998305278, "learning_rate": 4.2015230992682456e-08, "loss": 0.47085919976234436, "step": 12168 }, { "epoch": 1.9449372652441461, "grad_norm": 1.604722683035063, "learning_rate": 4.177358571403223e-08, "loss": 0.5234862565994263, "step": 12169 }, { "epoch": 1.9450970990170222, "grad_norm": 1.6334212792506178, "learning_rate": 4.1532635882556296e-08, "loss": 0.5217456817626953, "step": 12170 }, { "epoch": 1.9452569327898985, "grad_norm": 1.6712706663599755, "learning_rate": 4.129238151508008e-08, "loss": 0.6005227565765381, "step": 12171 }, { "epoch": 1.9454167665627748, "grad_norm": 1.6876844177892738, "learning_rate": 4.1052822628382397e-08, "loss": 0.4650591015815735, "step": 12172 }, { "epoch": 1.945576600335651, "grad_norm": 1.7052597682149522, "learning_rate": 4.08139592391954e-08, "loss": 0.613932728767395, "step": 12173 }, { "epoch": 1.945736434108527, "grad_norm": 1.4549820078436895, "learning_rate": 4.0575791364197984e-08, "loss": 0.5307148694992065, "step": 12174 }, { "epoch": 1.9458962678814034, "grad_norm": 1.7985831405841566, "learning_rate": 4.0338319020023494e-08, "loss": 0.6528873443603516, "step": 12175 }, { "epoch": 1.9460561016542797, "grad_norm": 1.6806238160101923, "learning_rate": 4.010154222325757e-08, "loss": 0.47505247592926025, "step": 12176 }, { "epoch": 1.9462159354271558, "grad_norm": 1.516336217762106, "learning_rate": 3.9865460990432536e-08, "loss": 0.5415410995483398, "step": 12177 }, { "epoch": 1.9463757692000319, "grad_norm": 1.5590957373796666, "learning_rate": 3.9630075338039645e-08, "loss": 0.4133426249027252, "step": 12178 }, { "epoch": 1.9465356029729082, "grad_norm": 1.6491368417335164, "learning_rate": 3.939538528251463e-08, "loss": 0.553971529006958, "step": 12179 }, { "epoch": 1.9466954367457845, "grad_norm": 1.740255878642387, "learning_rate": 3.9161390840248836e-08, "loss": 0.6235342025756836, "step": 12180 }, { "epoch": 1.9468552705186606, "grad_norm": 1.703246929971027, "learning_rate": 3.892809202758252e-08, "loss": 0.6267732977867126, "step": 12181 }, { "epoch": 1.9470151042915367, "grad_norm": 1.4650618888152231, "learning_rate": 3.8695488860808204e-08, "loss": 0.41952598094940186, "step": 12182 }, { "epoch": 1.947174938064413, "grad_norm": 1.7577549756502968, "learning_rate": 3.8463581356171784e-08, "loss": 0.5804256200790405, "step": 12183 }, { "epoch": 1.9473347718372893, "grad_norm": 1.6608646540877203, "learning_rate": 3.8232369529866975e-08, "loss": 0.5280672311782837, "step": 12184 }, { "epoch": 1.9474946056101654, "grad_norm": 1.6742130616816957, "learning_rate": 3.800185339804308e-08, "loss": 0.575232207775116, "step": 12185 }, { "epoch": 1.9476544393830415, "grad_norm": 1.666951971084533, "learning_rate": 3.7772032976796104e-08, "loss": 0.4603397846221924, "step": 12186 }, { "epoch": 1.9478142731559178, "grad_norm": 1.5384007340792314, "learning_rate": 3.754290828217655e-08, "loss": 0.46594494581222534, "step": 12187 }, { "epoch": 1.9479741069287941, "grad_norm": 1.69568866129193, "learning_rate": 3.731447933018717e-08, "loss": 0.5166486501693726, "step": 12188 }, { "epoch": 1.9481339407016702, "grad_norm": 1.5344522006383507, "learning_rate": 3.708674613677854e-08, "loss": 0.614970326423645, "step": 12189 }, { "epoch": 1.9482937744745463, "grad_norm": 1.433318323846822, "learning_rate": 3.685970871785571e-08, "loss": 0.5024029016494751, "step": 12190 }, { "epoch": 1.9484536082474226, "grad_norm": 1.440835242925804, "learning_rate": 3.663336708927379e-08, "loss": 0.6408637762069702, "step": 12191 }, { "epoch": 1.948613442020299, "grad_norm": 1.5917758059517828, "learning_rate": 3.6407721266841224e-08, "loss": 0.6065382361412048, "step": 12192 }, { "epoch": 1.948773275793175, "grad_norm": 1.788451581413296, "learning_rate": 3.618277126631431e-08, "loss": 0.5839437246322632, "step": 12193 }, { "epoch": 1.9489331095660511, "grad_norm": 1.9738392456956046, "learning_rate": 3.595851710340381e-08, "loss": 0.6774149537086487, "step": 12194 }, { "epoch": 1.9490929433389275, "grad_norm": 1.3854107121933468, "learning_rate": 3.5734958793769426e-08, "loss": 0.5188066363334656, "step": 12195 }, { "epoch": 1.9492527771118038, "grad_norm": 1.46011625163761, "learning_rate": 3.5512096353026436e-08, "loss": 0.53067547082901, "step": 12196 }, { "epoch": 1.9494126108846799, "grad_norm": 1.6705879027966257, "learning_rate": 3.5289929796735735e-08, "loss": 0.5019640326499939, "step": 12197 }, { "epoch": 1.9495724446575562, "grad_norm": 1.769496790961854, "learning_rate": 3.50684591404149e-08, "loss": 0.5803593397140503, "step": 12198 }, { "epoch": 1.9497322784304325, "grad_norm": 1.796859788187229, "learning_rate": 3.484768439953046e-08, "loss": 0.4352472722530365, "step": 12199 }, { "epoch": 1.9498921122033086, "grad_norm": 1.5720358108888797, "learning_rate": 3.462760558949896e-08, "loss": 0.45452964305877686, "step": 12200 }, { "epoch": 1.9500519459761847, "grad_norm": 1.3624085070838765, "learning_rate": 3.4408222725691445e-08, "loss": 0.5636862516403198, "step": 12201 }, { "epoch": 1.950211779749061, "grad_norm": 1.5530562423668905, "learning_rate": 3.4189535823426765e-08, "loss": 0.606377124786377, "step": 12202 }, { "epoch": 1.9503716135219373, "grad_norm": 1.492198578766523, "learning_rate": 3.397154489798049e-08, "loss": 0.5194715261459351, "step": 12203 }, { "epoch": 1.9505314472948134, "grad_norm": 1.6826078445150303, "learning_rate": 3.375424996457377e-08, "loss": 0.614956259727478, "step": 12204 }, { "epoch": 1.9506912810676895, "grad_norm": 1.5737399276437856, "learning_rate": 3.353765103838225e-08, "loss": 0.5867055058479309, "step": 12205 }, { "epoch": 1.9508511148405658, "grad_norm": 2.1115531783663335, "learning_rate": 3.3321748134532706e-08, "loss": 0.7153719663619995, "step": 12206 }, { "epoch": 1.9510109486134422, "grad_norm": 1.6223849275440005, "learning_rate": 3.310654126810309e-08, "loss": 0.5147994756698608, "step": 12207 }, { "epoch": 1.9511707823863182, "grad_norm": 1.482413135008279, "learning_rate": 3.289203045412359e-08, "loss": 0.5683645009994507, "step": 12208 }, { "epoch": 1.9513306161591943, "grad_norm": 1.3591764135790008, "learning_rate": 3.267821570757335e-08, "loss": 0.5050653219223022, "step": 12209 }, { "epoch": 1.9514904499320707, "grad_norm": 1.3689333988041439, "learning_rate": 3.2465097043383745e-08, "loss": 0.49572253227233887, "step": 12210 }, { "epoch": 1.951650283704947, "grad_norm": 1.5922571237238183, "learning_rate": 3.225267447644065e-08, "loss": 0.48833906650543213, "step": 12211 }, { "epoch": 1.951810117477823, "grad_norm": 1.4675652852579384, "learning_rate": 3.204094802157775e-08, "loss": 0.502596378326416, "step": 12212 }, { "epoch": 1.9519699512506992, "grad_norm": 1.7708012733004708, "learning_rate": 3.1829917693581015e-08, "loss": 0.5880696773529053, "step": 12213 }, { "epoch": 1.9521297850235755, "grad_norm": 1.6520273081928831, "learning_rate": 3.1619583507188635e-08, "loss": 0.5332896709442139, "step": 12214 }, { "epoch": 1.9522896187964518, "grad_norm": 1.7964744469895095, "learning_rate": 3.1409945477088866e-08, "loss": 0.5804124474525452, "step": 12215 }, { "epoch": 1.952449452569328, "grad_norm": 1.9214613051328207, "learning_rate": 3.120100361792222e-08, "loss": 0.3908388614654541, "step": 12216 }, { "epoch": 1.952609286342204, "grad_norm": 1.4378680526817937, "learning_rate": 3.099275794428036e-08, "loss": 0.5457268357276917, "step": 12217 }, { "epoch": 1.9527691201150803, "grad_norm": 1.7748186381084166, "learning_rate": 3.078520847070721e-08, "loss": 0.5664020776748657, "step": 12218 }, { "epoch": 1.9529289538879566, "grad_norm": 1.6776880792675246, "learning_rate": 3.057835521169783e-08, "loss": 0.5071251392364502, "step": 12219 }, { "epoch": 1.9530887876608327, "grad_norm": 1.4475868050578986, "learning_rate": 3.0372198181696236e-08, "loss": 0.48787593841552734, "step": 12220 }, { "epoch": 1.9532486214337088, "grad_norm": 1.3877705128308873, "learning_rate": 3.016673739509979e-08, "loss": 0.4353089928627014, "step": 12221 }, { "epoch": 1.9534084552065851, "grad_norm": 1.6781150128012168, "learning_rate": 2.996197286625924e-08, "loss": 0.4899625778198242, "step": 12222 }, { "epoch": 1.9535682889794614, "grad_norm": 1.6156929310731207, "learning_rate": 2.9757904609472032e-08, "loss": 0.5599778890609741, "step": 12223 }, { "epoch": 1.9537281227523375, "grad_norm": 1.7162150172114738, "learning_rate": 2.955453263899122e-08, "loss": 0.5796374082565308, "step": 12224 }, { "epoch": 1.9538879565252136, "grad_norm": 1.4510651937011652, "learning_rate": 2.9351856969019877e-08, "loss": 0.4407954514026642, "step": 12225 }, { "epoch": 1.95404779029809, "grad_norm": 1.8659177205592368, "learning_rate": 2.914987761371113e-08, "loss": 0.6104694604873657, "step": 12226 }, { "epoch": 1.9542076240709663, "grad_norm": 1.419299345181246, "learning_rate": 2.8948594587170366e-08, "loss": 0.34381842613220215, "step": 12227 }, { "epoch": 1.9543674578438424, "grad_norm": 1.4722880172350181, "learning_rate": 2.8748007903456332e-08, "loss": 0.4558316469192505, "step": 12228 }, { "epoch": 1.9545272916167185, "grad_norm": 1.6501869604161927, "learning_rate": 2.8548117576574496e-08, "loss": 0.5537675619125366, "step": 12229 }, { "epoch": 1.9546871253895948, "grad_norm": 1.436780592264762, "learning_rate": 2.8348923620487024e-08, "loss": 0.5958801507949829, "step": 12230 }, { "epoch": 1.954846959162471, "grad_norm": 1.6621776371360621, "learning_rate": 2.8150426049102785e-08, "loss": 0.5184671878814697, "step": 12231 }, { "epoch": 1.9550067929353472, "grad_norm": 1.483605334289909, "learning_rate": 2.795262487628625e-08, "loss": 0.46274420619010925, "step": 12232 }, { "epoch": 1.9551666267082235, "grad_norm": 1.6176687127622509, "learning_rate": 2.775552011584859e-08, "loss": 0.6098757982254028, "step": 12233 }, { "epoch": 1.9553264604810998, "grad_norm": 1.483064513604442, "learning_rate": 2.755911178155768e-08, "loss": 0.5092133283615112, "step": 12234 }, { "epoch": 1.955486294253976, "grad_norm": 1.5182410329867722, "learning_rate": 2.7363399887128105e-08, "loss": 0.5256597995758057, "step": 12235 }, { "epoch": 1.955646128026852, "grad_norm": 1.5082024392212041, "learning_rate": 2.7168384446227825e-08, "loss": 0.5163619518280029, "step": 12236 }, { "epoch": 1.9558059617997283, "grad_norm": 1.4707922519274184, "learning_rate": 2.697406547247705e-08, "loss": 0.5381500720977783, "step": 12237 }, { "epoch": 1.9559657955726046, "grad_norm": 1.5055258716130349, "learning_rate": 2.6780442979446042e-08, "loss": 0.4634343087673187, "step": 12238 }, { "epoch": 1.9561256293454807, "grad_norm": 1.4772879706819337, "learning_rate": 2.6587516980656204e-08, "loss": 0.5071972608566284, "step": 12239 }, { "epoch": 1.9562854631183568, "grad_norm": 1.7027922535130826, "learning_rate": 2.63952874895812e-08, "loss": 0.4300289750099182, "step": 12240 }, { "epoch": 1.9564452968912331, "grad_norm": 1.5360836918939624, "learning_rate": 2.6203754519644743e-08, "loss": 0.4257431924343109, "step": 12241 }, { "epoch": 1.9566051306641095, "grad_norm": 1.6851455792687262, "learning_rate": 2.6012918084223904e-08, "loss": 0.5681049823760986, "step": 12242 }, { "epoch": 1.9567649644369856, "grad_norm": 1.544152539456607, "learning_rate": 2.5822778196645804e-08, "loss": 0.5387139320373535, "step": 12243 }, { "epoch": 1.9569247982098616, "grad_norm": 1.5986477315976386, "learning_rate": 2.563333487018871e-08, "loss": 0.5523008704185486, "step": 12244 }, { "epoch": 1.957084631982738, "grad_norm": 1.9581634213394645, "learning_rate": 2.5444588118083148e-08, "loss": 0.7758046388626099, "step": 12245 }, { "epoch": 1.9572444657556143, "grad_norm": 2.04657194697768, "learning_rate": 2.5256537953509684e-08, "loss": 0.519057035446167, "step": 12246 }, { "epoch": 1.9574042995284904, "grad_norm": 1.5146884002702372, "learning_rate": 2.5069184389602264e-08, "loss": 0.5402083396911621, "step": 12247 }, { "epoch": 1.9575641333013665, "grad_norm": 1.7026212739132385, "learning_rate": 2.488252743944486e-08, "loss": 0.5382499694824219, "step": 12248 }, { "epoch": 1.9577239670742428, "grad_norm": 1.7322803702178413, "learning_rate": 2.469656711607149e-08, "loss": 0.48809701204299927, "step": 12249 }, { "epoch": 1.957883800847119, "grad_norm": 1.7561825432236267, "learning_rate": 2.4511303432470658e-08, "loss": 0.4923025071620941, "step": 12250 }, { "epoch": 1.9580436346199952, "grad_norm": 1.7342106551731966, "learning_rate": 2.4326736401579788e-08, "loss": 0.7689316272735596, "step": 12251 }, { "epoch": 1.9582034683928713, "grad_norm": 1.5946103030597958, "learning_rate": 2.414286603628857e-08, "loss": 0.5375931262969971, "step": 12252 }, { "epoch": 1.9583633021657476, "grad_norm": 1.550522046248976, "learning_rate": 2.3959692349437846e-08, "loss": 0.538823127746582, "step": 12253 }, { "epoch": 1.958523135938624, "grad_norm": 1.843261683102397, "learning_rate": 2.3777215353819606e-08, "loss": 0.6034183502197266, "step": 12254 }, { "epoch": 1.9586829697115, "grad_norm": 1.4741068863201023, "learning_rate": 2.3595435062176986e-08, "loss": 0.5081524848937988, "step": 12255 }, { "epoch": 1.9588428034843761, "grad_norm": 1.8168592043873035, "learning_rate": 2.341435148720539e-08, "loss": 0.6355805397033691, "step": 12256 }, { "epoch": 1.9590026372572524, "grad_norm": 1.8193797020828117, "learning_rate": 2.3233964641551367e-08, "loss": 0.5467796921730042, "step": 12257 }, { "epoch": 1.9591624710301287, "grad_norm": 1.3437791778289, "learning_rate": 2.3054274537812615e-08, "loss": 0.6420918703079224, "step": 12258 }, { "epoch": 1.9593223048030048, "grad_norm": 1.4799646939233544, "learning_rate": 2.287528118853688e-08, "loss": 0.5234168171882629, "step": 12259 }, { "epoch": 1.959482138575881, "grad_norm": 1.5566664415686466, "learning_rate": 2.269698460622527e-08, "loss": 0.5049488544464111, "step": 12260 }, { "epoch": 1.9596419723487573, "grad_norm": 1.494891292727761, "learning_rate": 2.2519384803328937e-08, "loss": 0.46619662642478943, "step": 12261 }, { "epoch": 1.9598018061216336, "grad_norm": 1.586455149944088, "learning_rate": 2.2342481792252402e-08, "loss": 0.5137885808944702, "step": 12262 }, { "epoch": 1.9599616398945097, "grad_norm": 1.7166623400934717, "learning_rate": 2.2166275585348007e-08, "loss": 0.48363566398620605, "step": 12263 }, { "epoch": 1.9601214736673858, "grad_norm": 1.5626673278011387, "learning_rate": 2.1990766194921465e-08, "loss": 0.5854774713516235, "step": 12264 }, { "epoch": 1.960281307440262, "grad_norm": 1.7930278888421933, "learning_rate": 2.1815953633230747e-08, "loss": 0.6666311621665955, "step": 12265 }, { "epoch": 1.9604411412131384, "grad_norm": 1.297310928016054, "learning_rate": 2.1641837912483865e-08, "loss": 0.4233434796333313, "step": 12266 }, { "epoch": 1.9606009749860145, "grad_norm": 1.6477932365386085, "learning_rate": 2.1468419044839984e-08, "loss": 0.5545492172241211, "step": 12267 }, { "epoch": 1.9607608087588908, "grad_norm": 1.7680268497779656, "learning_rate": 2.1295697042410523e-08, "loss": 0.4922400712966919, "step": 12268 }, { "epoch": 1.9609206425317671, "grad_norm": 1.802003214355005, "learning_rate": 2.112367191725695e-08, "loss": 0.5526094436645508, "step": 12269 }, { "epoch": 1.9610804763046432, "grad_norm": 1.677053222434708, "learning_rate": 2.0952343681395203e-08, "loss": 0.6332912445068359, "step": 12270 }, { "epoch": 1.9612403100775193, "grad_norm": 1.4063814491886861, "learning_rate": 2.078171234678683e-08, "loss": 0.5809681415557861, "step": 12271 }, { "epoch": 1.9614001438503956, "grad_norm": 1.6513230888834645, "learning_rate": 2.0611777925351183e-08, "loss": 0.46721166372299194, "step": 12272 }, { "epoch": 1.961559977623272, "grad_norm": 1.9257586948222771, "learning_rate": 2.0442540428953216e-08, "loss": 0.5084739327430725, "step": 12273 }, { "epoch": 1.961719811396148, "grad_norm": 1.6154398119947988, "learning_rate": 2.0273999869414585e-08, "loss": 0.5350823998451233, "step": 12274 }, { "epoch": 1.9618796451690241, "grad_norm": 1.743111881614186, "learning_rate": 2.010615625850365e-08, "loss": 0.4942701458930969, "step": 12275 }, { "epoch": 1.9620394789419004, "grad_norm": 1.6038141863747886, "learning_rate": 1.993900960794215e-08, "loss": 0.5572925806045532, "step": 12276 }, { "epoch": 1.9621993127147768, "grad_norm": 1.5437734037275088, "learning_rate": 1.977255992940408e-08, "loss": 0.48352158069610596, "step": 12277 }, { "epoch": 1.9623591464876529, "grad_norm": 1.4631437783041756, "learning_rate": 1.960680723451347e-08, "loss": 0.4309770464897156, "step": 12278 }, { "epoch": 1.962518980260529, "grad_norm": 1.454339184151927, "learning_rate": 1.94417515348444e-08, "loss": 0.47094208002090454, "step": 12279 }, { "epoch": 1.9626788140334053, "grad_norm": 2.199661647940679, "learning_rate": 1.9277392841925425e-08, "loss": 0.8092626333236694, "step": 12280 }, { "epoch": 1.9628386478062816, "grad_norm": 1.4726164053717004, "learning_rate": 1.9113731167234028e-08, "loss": 0.5378192663192749, "step": 12281 }, { "epoch": 1.9629984815791577, "grad_norm": 1.4707467877484925, "learning_rate": 1.8950766522199958e-08, "loss": 0.5370407700538635, "step": 12282 }, { "epoch": 1.9631583153520338, "grad_norm": 1.6862415844460676, "learning_rate": 1.8788498918204112e-08, "loss": 0.5583850741386414, "step": 12283 }, { "epoch": 1.96331814912491, "grad_norm": 1.445261275853586, "learning_rate": 1.8626928366578534e-08, "loss": 0.44608885049819946, "step": 12284 }, { "epoch": 1.9634779828977864, "grad_norm": 1.7982099758734198, "learning_rate": 1.846605487860642e-08, "loss": 0.5091733336448669, "step": 12285 }, { "epoch": 1.9636378166706625, "grad_norm": 1.548657245264626, "learning_rate": 1.830587846552323e-08, "loss": 0.515340268611908, "step": 12286 }, { "epoch": 1.9637976504435386, "grad_norm": 1.3383888410725937, "learning_rate": 1.814639913851557e-08, "loss": 0.3932730555534363, "step": 12287 }, { "epoch": 1.963957484216415, "grad_norm": 1.636506829699662, "learning_rate": 1.7987616908720086e-08, "loss": 0.5634206533432007, "step": 12288 }, { "epoch": 1.9641173179892912, "grad_norm": 1.3941542721305993, "learning_rate": 1.7829531787225686e-08, "loss": 0.4984392523765564, "step": 12289 }, { "epoch": 1.9642771517621673, "grad_norm": 1.7648728310859874, "learning_rate": 1.7672143785072428e-08, "loss": 0.5460554361343384, "step": 12290 }, { "epoch": 1.9644369855350434, "grad_norm": 1.6367894335832296, "learning_rate": 1.7515452913250407e-08, "loss": 0.5276421308517456, "step": 12291 }, { "epoch": 1.9645968193079197, "grad_norm": 1.6276322590805596, "learning_rate": 1.735945918270532e-08, "loss": 0.5051048398017883, "step": 12292 }, { "epoch": 1.964756653080796, "grad_norm": 1.6128337446654333, "learning_rate": 1.7204162604329555e-08, "loss": 0.5567077398300171, "step": 12293 }, { "epoch": 1.9649164868536722, "grad_norm": 1.6719480390088612, "learning_rate": 1.7049563188968888e-08, "loss": 0.6373506188392639, "step": 12294 }, { "epoch": 1.9650763206265482, "grad_norm": 1.5222874538269782, "learning_rate": 1.6895660947419125e-08, "loss": 0.5327243804931641, "step": 12295 }, { "epoch": 1.9652361543994246, "grad_norm": 1.517423401394264, "learning_rate": 1.6742455890428333e-08, "loss": 0.5747560262680054, "step": 12296 }, { "epoch": 1.9653959881723009, "grad_norm": 1.4650308508771777, "learning_rate": 1.6589948028696846e-08, "loss": 0.6200428605079651, "step": 12297 }, { "epoch": 1.965555821945177, "grad_norm": 1.5589995928497011, "learning_rate": 1.643813737287503e-08, "loss": 0.4633129835128784, "step": 12298 }, { "epoch": 1.965715655718053, "grad_norm": 1.7089377512765043, "learning_rate": 1.6287023933564407e-08, "loss": 0.621448278427124, "step": 12299 }, { "epoch": 1.9658754894909294, "grad_norm": 1.9242667111778406, "learning_rate": 1.6136607721317642e-08, "loss": 0.6486620903015137, "step": 12300 }, { "epoch": 1.9660353232638057, "grad_norm": 1.4845027295314985, "learning_rate": 1.5986888746640783e-08, "loss": 0.4730743169784546, "step": 12301 }, { "epoch": 1.9661951570366818, "grad_norm": 1.6364655109712174, "learning_rate": 1.5837867019987685e-08, "loss": 0.5917767286300659, "step": 12302 }, { "epoch": 1.9663549908095581, "grad_norm": 1.7160287681595858, "learning_rate": 1.56895425517678e-08, "loss": 0.5496455430984497, "step": 12303 }, { "epoch": 1.9665148245824344, "grad_norm": 1.646256434332892, "learning_rate": 1.5541915352337288e-08, "loss": 0.6830860376358032, "step": 12304 }, { "epoch": 1.9666746583553105, "grad_norm": 1.5003103955665527, "learning_rate": 1.53949854320079e-08, "loss": 0.554728627204895, "step": 12305 }, { "epoch": 1.9668344921281866, "grad_norm": 1.320482680536208, "learning_rate": 1.5248752801039214e-08, "loss": 0.4297519028186798, "step": 12306 }, { "epoch": 1.966994325901063, "grad_norm": 1.3503596381594112, "learning_rate": 1.510321746964416e-08, "loss": 0.5594606399536133, "step": 12307 }, { "epoch": 1.9671541596739393, "grad_norm": 1.7274446126704548, "learning_rate": 1.4958379447986838e-08, "loss": 0.507901668548584, "step": 12308 }, { "epoch": 1.9673139934468153, "grad_norm": 1.7409221860271817, "learning_rate": 1.4814238746181376e-08, "loss": 0.5395246148109436, "step": 12309 }, { "epoch": 1.9674738272196914, "grad_norm": 2.066361209941004, "learning_rate": 1.4670795374295277e-08, "loss": 0.6250448822975159, "step": 12310 }, { "epoch": 1.9676336609925678, "grad_norm": 1.5437125866936527, "learning_rate": 1.4528049342344974e-08, "loss": 0.4295463562011719, "step": 12311 }, { "epoch": 1.967793494765444, "grad_norm": 1.7928291860801833, "learning_rate": 1.438600066029916e-08, "loss": 0.5259555578231812, "step": 12312 }, { "epoch": 1.9679533285383202, "grad_norm": 1.4063676341437699, "learning_rate": 1.4244649338078787e-08, "loss": 0.4977126717567444, "step": 12313 }, { "epoch": 1.9681131623111963, "grad_norm": 1.4579615681468427, "learning_rate": 1.4103995385555958e-08, "loss": 0.49858275055885315, "step": 12314 }, { "epoch": 1.9682729960840726, "grad_norm": 1.517163974016951, "learning_rate": 1.3964038812551706e-08, "loss": 0.49352896213531494, "step": 12315 }, { "epoch": 1.968432829856949, "grad_norm": 1.5074214731929316, "learning_rate": 1.3824779628841545e-08, "loss": 0.605060338973999, "step": 12316 }, { "epoch": 1.968592663629825, "grad_norm": 1.6095511254427135, "learning_rate": 1.3686217844149919e-08, "loss": 0.5638681650161743, "step": 12317 }, { "epoch": 1.968752497402701, "grad_norm": 1.559007263388537, "learning_rate": 1.354835346815353e-08, "loss": 0.5855332612991333, "step": 12318 }, { "epoch": 1.9689123311755774, "grad_norm": 1.8167654162010918, "learning_rate": 1.3411186510481345e-08, "loss": 0.4779585599899292, "step": 12319 }, { "epoch": 1.9690721649484537, "grad_norm": 1.5255045209865152, "learning_rate": 1.3274716980711255e-08, "loss": 0.47281551361083984, "step": 12320 }, { "epoch": 1.9692319987213298, "grad_norm": 1.4332061562465275, "learning_rate": 1.3138944888374527e-08, "loss": 0.5055099129676819, "step": 12321 }, { "epoch": 1.969391832494206, "grad_norm": 1.6063933343402093, "learning_rate": 1.3003870242953575e-08, "loss": 0.5359878540039062, "step": 12322 }, { "epoch": 1.9695516662670822, "grad_norm": 1.7598914941059025, "learning_rate": 1.2869493053880855e-08, "loss": 0.6020963191986084, "step": 12323 }, { "epoch": 1.9697115000399585, "grad_norm": 1.5324408318281673, "learning_rate": 1.273581333054108e-08, "loss": 0.5375796556472778, "step": 12324 }, { "epoch": 1.9698713338128346, "grad_norm": 1.5158266689642454, "learning_rate": 1.2602831082269008e-08, "loss": 0.5596848726272583, "step": 12325 }, { "epoch": 1.9700311675857107, "grad_norm": 1.6413231551500698, "learning_rate": 1.2470546318351651e-08, "loss": 0.4820249080657959, "step": 12326 }, { "epoch": 1.970191001358587, "grad_norm": 1.7243624211168362, "learning_rate": 1.2338959048029398e-08, "loss": 0.55936598777771, "step": 12327 }, { "epoch": 1.9703508351314634, "grad_norm": 1.4392549109525534, "learning_rate": 1.2208069280490454e-08, "loss": 0.39767056703567505, "step": 12328 }, { "epoch": 1.9705106689043395, "grad_norm": 1.768590664925517, "learning_rate": 1.2077877024875284e-08, "loss": 0.5225070714950562, "step": 12329 }, { "epoch": 1.9706705026772156, "grad_norm": 1.6399667044791078, "learning_rate": 1.1948382290276616e-08, "loss": 0.5074524283409119, "step": 12330 }, { "epoch": 1.9708303364500919, "grad_norm": 1.6129963460452392, "learning_rate": 1.1819585085737217e-08, "loss": 0.5607888698577881, "step": 12331 }, { "epoch": 1.9709901702229682, "grad_norm": 1.7426302997124978, "learning_rate": 1.1691485420253224e-08, "loss": 0.5800796151161194, "step": 12332 }, { "epoch": 1.9711500039958443, "grad_norm": 1.5750187328659095, "learning_rate": 1.1564083302769702e-08, "loss": 0.5630687475204468, "step": 12333 }, { "epoch": 1.9713098377687204, "grad_norm": 1.5221471596541118, "learning_rate": 1.143737874218398e-08, "loss": 0.5711066722869873, "step": 12334 }, { "epoch": 1.9714696715415967, "grad_norm": 1.7067947400935495, "learning_rate": 1.1311371747344535e-08, "loss": 0.5778764486312866, "step": 12335 }, { "epoch": 1.971629505314473, "grad_norm": 1.5339160246776273, "learning_rate": 1.1186062327050995e-08, "loss": 0.6254835724830627, "step": 12336 }, { "epoch": 1.971789339087349, "grad_norm": 1.5777230470421475, "learning_rate": 1.1061450490055248e-08, "loss": 0.46612706780433655, "step": 12337 }, { "epoch": 1.9719491728602254, "grad_norm": 1.5433766592879539, "learning_rate": 1.0937536245060332e-08, "loss": 0.6242371797561646, "step": 12338 }, { "epoch": 1.9721090066331017, "grad_norm": 1.7324134335848538, "learning_rate": 1.0814319600718216e-08, "loss": 0.5258943438529968, "step": 12339 }, { "epoch": 1.9722688404059778, "grad_norm": 1.4898346264325852, "learning_rate": 1.0691800565635346e-08, "loss": 0.4000421464443207, "step": 12340 }, { "epoch": 1.972428674178854, "grad_norm": 1.2903476277981107, "learning_rate": 1.0569979148367105e-08, "loss": 0.3682510554790497, "step": 12341 }, { "epoch": 1.9725885079517302, "grad_norm": 2.0630776758954434, "learning_rate": 1.0448855357422238e-08, "loss": 0.45060840249061584, "step": 12342 }, { "epoch": 1.9727483417246066, "grad_norm": 1.6505668244478153, "learning_rate": 1.0328429201258427e-08, "loss": 0.5129498839378357, "step": 12343 }, { "epoch": 1.9729081754974827, "grad_norm": 1.5472022323645087, "learning_rate": 1.0208700688286722e-08, "loss": 0.39428192377090454, "step": 12344 }, { "epoch": 1.9730680092703587, "grad_norm": 1.759001820650785, "learning_rate": 1.00896698268671e-08, "loss": 0.6042333841323853, "step": 12345 }, { "epoch": 1.973227843043235, "grad_norm": 1.4949976379808325, "learning_rate": 9.971336625312911e-09, "loss": 0.5315554141998291, "step": 12346 }, { "epoch": 1.9733876768161114, "grad_norm": 1.712677519635571, "learning_rate": 9.853701091888656e-09, "loss": 0.6302799582481384, "step": 12347 }, { "epoch": 1.9735475105889875, "grad_norm": 1.681597849086003, "learning_rate": 9.736763234809987e-09, "loss": 0.575430154800415, "step": 12348 }, { "epoch": 1.9737073443618636, "grad_norm": 1.7066621122047816, "learning_rate": 9.620523062242592e-09, "loss": 0.5314037799835205, "step": 12349 }, { "epoch": 1.9738671781347399, "grad_norm": 1.6648211784251639, "learning_rate": 9.504980582304423e-09, "loss": 0.39728599786758423, "step": 12350 }, { "epoch": 1.9740270119076162, "grad_norm": 1.8720039574323715, "learning_rate": 9.390135803064582e-09, "loss": 0.6158103346824646, "step": 12351 }, { "epoch": 1.9741868456804923, "grad_norm": 1.8151233289370177, "learning_rate": 9.275988732543318e-09, "loss": 0.582158088684082, "step": 12352 }, { "epoch": 1.9743466794533684, "grad_norm": 1.4742685988359565, "learning_rate": 9.162539378712032e-09, "loss": 0.53511643409729, "step": 12353 }, { "epoch": 1.9745065132262447, "grad_norm": 1.620073451550835, "learning_rate": 9.049787749494387e-09, "loss": 0.6475135087966919, "step": 12354 }, { "epoch": 1.974666346999121, "grad_norm": 1.6170427703328616, "learning_rate": 8.937733852764086e-09, "loss": 0.6293531656265259, "step": 12355 }, { "epoch": 1.9748261807719971, "grad_norm": 1.5026165164014724, "learning_rate": 8.826377696345978e-09, "loss": 0.4062901437282562, "step": 12356 }, { "epoch": 1.9749860145448732, "grad_norm": 1.4474642505450912, "learning_rate": 8.715719288018287e-09, "loss": 0.43798935413360596, "step": 12357 }, { "epoch": 1.9751458483177495, "grad_norm": 1.7867059567030454, "learning_rate": 8.605758635507056e-09, "loss": 0.5661337971687317, "step": 12358 }, { "epoch": 1.9753056820906258, "grad_norm": 1.5804231531608635, "learning_rate": 8.496495746493916e-09, "loss": 0.5502060651779175, "step": 12359 }, { "epoch": 1.975465515863502, "grad_norm": 1.5571429490778326, "learning_rate": 8.38793062860721e-09, "loss": 0.5443919897079468, "step": 12360 }, { "epoch": 1.975625349636378, "grad_norm": 1.5607381575349875, "learning_rate": 8.280063289429762e-09, "loss": 0.48413240909576416, "step": 12361 }, { "epoch": 1.9757851834092544, "grad_norm": 1.8445404085453252, "learning_rate": 8.172893736494437e-09, "loss": 0.538400411605835, "step": 12362 }, { "epoch": 1.9759450171821307, "grad_norm": 2.005626502205795, "learning_rate": 8.066421977286355e-09, "loss": 0.581278920173645, "step": 12363 }, { "epoch": 1.9761048509550068, "grad_norm": 1.373661771444283, "learning_rate": 7.96064801923957e-09, "loss": 0.4733688533306122, "step": 12364 }, { "epoch": 1.9762646847278829, "grad_norm": 1.642071649893839, "learning_rate": 7.855571869741507e-09, "loss": 0.5583418607711792, "step": 12365 }, { "epoch": 1.9764245185007592, "grad_norm": 2.0704301383225023, "learning_rate": 7.751193536131851e-09, "loss": 0.5137656927108765, "step": 12366 }, { "epoch": 1.9765843522736355, "grad_norm": 1.4186449181363447, "learning_rate": 7.647513025698105e-09, "loss": 0.6393486261367798, "step": 12367 }, { "epoch": 1.9767441860465116, "grad_norm": 1.627953153753927, "learning_rate": 7.544530345682255e-09, "loss": 0.47092050313949585, "step": 12368 }, { "epoch": 1.9769040198193877, "grad_norm": 1.9423842762324839, "learning_rate": 7.442245503275214e-09, "loss": 0.5837101936340332, "step": 12369 }, { "epoch": 1.977063853592264, "grad_norm": 2.5715651181291594, "learning_rate": 7.340658505620158e-09, "loss": 0.5676426291465759, "step": 12370 }, { "epoch": 1.9772236873651403, "grad_norm": 1.815601473899564, "learning_rate": 7.239769359811411e-09, "loss": 0.5902494192123413, "step": 12371 }, { "epoch": 1.9773835211380164, "grad_norm": 1.655066659212048, "learning_rate": 7.139578072896669e-09, "loss": 0.5364367961883545, "step": 12372 }, { "epoch": 1.9775433549108927, "grad_norm": 1.519560735168396, "learning_rate": 7.040084651871448e-09, "loss": 0.5165821313858032, "step": 12373 }, { "epoch": 1.977703188683769, "grad_norm": 2.1659749786418603, "learning_rate": 6.941289103683524e-09, "loss": 0.7180498242378235, "step": 12374 }, { "epoch": 1.9778630224566451, "grad_norm": 1.7865983730112112, "learning_rate": 6.843191435232932e-09, "loss": 0.6007394790649414, "step": 12375 }, { "epoch": 1.9780228562295212, "grad_norm": 1.7491247925508906, "learning_rate": 6.745791653370859e-09, "loss": 0.5472184419631958, "step": 12376 }, { "epoch": 1.9781826900023975, "grad_norm": 1.7725534858958631, "learning_rate": 6.649089764899641e-09, "loss": 0.5990079641342163, "step": 12377 }, { "epoch": 1.9783425237752739, "grad_norm": 1.5571370698472633, "learning_rate": 6.553085776570544e-09, "loss": 0.5527386665344238, "step": 12378 }, { "epoch": 1.97850235754815, "grad_norm": 1.6250867277674588, "learning_rate": 6.457779695090427e-09, "loss": 0.6611921191215515, "step": 12379 }, { "epoch": 1.978662191321026, "grad_norm": 1.5965993402800358, "learning_rate": 6.363171527115075e-09, "loss": 0.491191565990448, "step": 12380 }, { "epoch": 1.9788220250939024, "grad_norm": 1.528386337692942, "learning_rate": 6.269261279250316e-09, "loss": 0.4356619119644165, "step": 12381 }, { "epoch": 1.9789818588667787, "grad_norm": 1.604155040984395, "learning_rate": 6.176048958054237e-09, "loss": 0.6026917695999146, "step": 12382 }, { "epoch": 1.9791416926396548, "grad_norm": 1.6229417866914633, "learning_rate": 6.083534570038296e-09, "loss": 0.48082613945007324, "step": 12383 }, { "epoch": 1.9793015264125309, "grad_norm": 1.3858042954689593, "learning_rate": 5.991718121661771e-09, "loss": 0.4881294369697571, "step": 12384 }, { "epoch": 1.9794613601854072, "grad_norm": 1.6779253533322935, "learning_rate": 5.9005996193373105e-09, "loss": 0.5804660320281982, "step": 12385 }, { "epoch": 1.9796211939582835, "grad_norm": 1.853283938496512, "learning_rate": 5.810179069428712e-09, "loss": 0.5086131691932678, "step": 12386 }, { "epoch": 1.9797810277311596, "grad_norm": 1.6005642621200729, "learning_rate": 5.7204564782498136e-09, "loss": 0.5249725580215454, "step": 12387 }, { "epoch": 1.9799408615040357, "grad_norm": 1.6645744627918475, "learning_rate": 5.6314318520678254e-09, "loss": 0.6475673913955688, "step": 12388 }, { "epoch": 1.980100695276912, "grad_norm": 1.7201543208004804, "learning_rate": 5.543105197097776e-09, "loss": 0.5930697917938232, "step": 12389 }, { "epoch": 1.9802605290497883, "grad_norm": 1.6683503067764143, "learning_rate": 5.4554765195102835e-09, "loss": 0.6010464429855347, "step": 12390 }, { "epoch": 1.9804203628226644, "grad_norm": 1.6073868096386825, "learning_rate": 5.368545825423787e-09, "loss": 0.5300157070159912, "step": 12391 }, { "epoch": 1.9805801965955405, "grad_norm": 1.5409863667553174, "learning_rate": 5.282313120910099e-09, "loss": 0.48321521282196045, "step": 12392 }, { "epoch": 1.9807400303684168, "grad_norm": 1.338782984393064, "learning_rate": 5.1967784119899555e-09, "loss": 0.5277583599090576, "step": 12393 }, { "epoch": 1.9808998641412932, "grad_norm": 1.600129388250269, "learning_rate": 5.111941704638579e-09, "loss": 0.5479016304016113, "step": 12394 }, { "epoch": 1.9810596979141692, "grad_norm": 1.3342699493851222, "learning_rate": 5.027803004779008e-09, "loss": 0.4969749450683594, "step": 12395 }, { "epoch": 1.9812195316870453, "grad_norm": 1.8437959349061677, "learning_rate": 4.944362318287655e-09, "loss": 0.5528033971786499, "step": 12396 }, { "epoch": 1.9813793654599217, "grad_norm": 2.004511904452004, "learning_rate": 4.861619650993188e-09, "loss": 0.5353970527648926, "step": 12397 }, { "epoch": 1.981539199232798, "grad_norm": 1.5252830512049782, "learning_rate": 4.779575008672099e-09, "loss": 0.601473331451416, "step": 12398 }, { "epoch": 1.981699033005674, "grad_norm": 1.6693562509602853, "learning_rate": 4.6982283970542495e-09, "loss": 0.6091170907020569, "step": 12399 }, { "epoch": 1.9818588667785502, "grad_norm": 1.5003501391266645, "learning_rate": 4.61757982182176e-09, "loss": 0.5029444694519043, "step": 12400 }, { "epoch": 1.9820187005514265, "grad_norm": 1.549168379569144, "learning_rate": 4.537629288606793e-09, "loss": 0.5127397179603577, "step": 12401 }, { "epoch": 1.9821785343243028, "grad_norm": 1.543704594752971, "learning_rate": 4.458376802992659e-09, "loss": 0.5658143758773804, "step": 12402 }, { "epoch": 1.982338368097179, "grad_norm": 1.525915382420257, "learning_rate": 4.37982237051271e-09, "loss": 0.4499701261520386, "step": 12403 }, { "epoch": 1.982498201870055, "grad_norm": 1.8604441794833795, "learning_rate": 4.3019659966547775e-09, "loss": 0.6166338920593262, "step": 12404 }, { "epoch": 1.9826580356429313, "grad_norm": 1.7629729554295956, "learning_rate": 4.224807686854515e-09, "loss": 0.4177905321121216, "step": 12405 }, { "epoch": 1.9828178694158076, "grad_norm": 1.507064960494224, "learning_rate": 4.148347446500944e-09, "loss": 0.49596768617630005, "step": 12406 }, { "epoch": 1.9829777031886837, "grad_norm": 1.5926577810714915, "learning_rate": 4.0725852809342384e-09, "loss": 0.5715577602386475, "step": 12407 }, { "epoch": 1.98313753696156, "grad_norm": 1.339547261648256, "learning_rate": 3.99752119544572e-09, "loss": 0.4126224219799042, "step": 12408 }, { "epoch": 1.9832973707344363, "grad_norm": 1.7480504561763475, "learning_rate": 3.923155195276751e-09, "loss": 0.6018511056900024, "step": 12409 }, { "epoch": 1.9834572045073124, "grad_norm": 1.419283372939755, "learning_rate": 3.8494872856198465e-09, "loss": 0.5541458129882812, "step": 12410 }, { "epoch": 1.9836170382801885, "grad_norm": 1.427355163898441, "learning_rate": 3.776517471621999e-09, "loss": 0.4802596867084503, "step": 12411 }, { "epoch": 1.9837768720530649, "grad_norm": 1.3202954023447162, "learning_rate": 3.704245758376912e-09, "loss": 0.5144997239112854, "step": 12412 }, { "epoch": 1.9839367058259412, "grad_norm": 1.9348356360710797, "learning_rate": 3.63267215093388e-09, "loss": 0.5497217178344727, "step": 12413 }, { "epoch": 1.9840965395988173, "grad_norm": 1.7040775934788617, "learning_rate": 3.561796654290017e-09, "loss": 0.5093096494674683, "step": 12414 }, { "epoch": 1.9842563733716934, "grad_norm": 1.9579595868032356, "learning_rate": 3.4916192733958074e-09, "loss": 0.4308452010154724, "step": 12415 }, { "epoch": 1.9844162071445697, "grad_norm": 1.462791359363964, "learning_rate": 3.4221400131506654e-09, "loss": 0.4303581714630127, "step": 12416 }, { "epoch": 1.984576040917446, "grad_norm": 1.7439954243171745, "learning_rate": 3.353358878408486e-09, "loss": 0.5786176919937134, "step": 12417 }, { "epoch": 1.984735874690322, "grad_norm": 1.7957767168547933, "learning_rate": 3.2852758739720937e-09, "loss": 0.6114327311515808, "step": 12418 }, { "epoch": 1.9848957084631982, "grad_norm": 1.4983708001850728, "learning_rate": 3.217891004596574e-09, "loss": 0.544309675693512, "step": 12419 }, { "epoch": 1.9850555422360745, "grad_norm": 1.8045888372191692, "learning_rate": 3.151204274985942e-09, "loss": 0.5487816333770752, "step": 12420 }, { "epoch": 1.9852153760089508, "grad_norm": 1.594664956512923, "learning_rate": 3.085215689799803e-09, "loss": 0.4949299693107605, "step": 12421 }, { "epoch": 1.985375209781827, "grad_norm": 1.4067830057266466, "learning_rate": 3.0199252536455836e-09, "loss": 0.4800992012023926, "step": 12422 }, { "epoch": 1.985535043554703, "grad_norm": 1.6659127499915825, "learning_rate": 2.9553329710829694e-09, "loss": 0.6576957702636719, "step": 12423 }, { "epoch": 1.9856948773275793, "grad_norm": 1.4736770111735988, "learning_rate": 2.8914388466227962e-09, "loss": 0.6854820251464844, "step": 12424 }, { "epoch": 1.9858547111004556, "grad_norm": 1.668035140847486, "learning_rate": 2.8282428847270503e-09, "loss": 0.4797218441963196, "step": 12425 }, { "epoch": 1.9860145448733317, "grad_norm": 1.3853817327528122, "learning_rate": 2.765745089808869e-09, "loss": 0.4584825932979584, "step": 12426 }, { "epoch": 1.9861743786462078, "grad_norm": 1.6184176604923703, "learning_rate": 2.7039454662336484e-09, "loss": 0.5600043535232544, "step": 12427 }, { "epoch": 1.9863342124190841, "grad_norm": 1.5046096505976079, "learning_rate": 2.6428440183168257e-09, "loss": 0.5107675790786743, "step": 12428 }, { "epoch": 1.9864940461919605, "grad_norm": 1.7336613939864058, "learning_rate": 2.5824407503260984e-09, "loss": 0.5657918453216553, "step": 12429 }, { "epoch": 1.9866538799648366, "grad_norm": 1.334116996615797, "learning_rate": 2.5227356664792036e-09, "loss": 0.5029573440551758, "step": 12430 }, { "epoch": 1.9868137137377126, "grad_norm": 1.6560347368317034, "learning_rate": 2.4637287709450284e-09, "loss": 0.575141429901123, "step": 12431 }, { "epoch": 1.986973547510589, "grad_norm": 1.585499720781744, "learning_rate": 2.4054200678458317e-09, "loss": 0.6330524682998657, "step": 12432 }, { "epoch": 1.9871333812834653, "grad_norm": 1.5350446966329927, "learning_rate": 2.3478095612528007e-09, "loss": 0.5336360335350037, "step": 12433 }, { "epoch": 1.9872932150563414, "grad_norm": 1.71731558288778, "learning_rate": 2.2908972551893837e-09, "loss": 0.504814624786377, "step": 12434 }, { "epoch": 1.9874530488292175, "grad_norm": 1.6635983624419777, "learning_rate": 2.2346831536312895e-09, "loss": 0.516512393951416, "step": 12435 }, { "epoch": 1.9876128826020938, "grad_norm": 1.431538522703741, "learning_rate": 2.1791672605020463e-09, "loss": 0.3985704481601715, "step": 12436 }, { "epoch": 1.98777271637497, "grad_norm": 1.460078477948692, "learning_rate": 2.1243495796807734e-09, "loss": 0.5723185539245605, "step": 12437 }, { "epoch": 1.9879325501478462, "grad_norm": 1.5602781330007545, "learning_rate": 2.0702301149944095e-09, "loss": 0.5695540904998779, "step": 12438 }, { "epoch": 1.9880923839207223, "grad_norm": 1.7107186935879648, "learning_rate": 2.0168088702221533e-09, "loss": 0.5337891578674316, "step": 12439 }, { "epoch": 1.9882522176935986, "grad_norm": 1.5622019784119185, "learning_rate": 1.964085849096575e-09, "loss": 0.5385375022888184, "step": 12440 }, { "epoch": 1.988412051466475, "grad_norm": 1.4839230471109872, "learning_rate": 1.9120610552980646e-09, "loss": 0.5099863409996033, "step": 12441 }, { "epoch": 1.988571885239351, "grad_norm": 1.411134410340893, "learning_rate": 1.8607344924614912e-09, "loss": 0.46772515773773193, "step": 12442 }, { "epoch": 1.9887317190122271, "grad_norm": 1.6495607553846061, "learning_rate": 1.810106164169545e-09, "loss": 0.5132375955581665, "step": 12443 }, { "epoch": 1.9888915527851037, "grad_norm": 1.6887707194981474, "learning_rate": 1.7601760739582862e-09, "loss": 0.6039189100265503, "step": 12444 }, { "epoch": 1.9890513865579798, "grad_norm": 1.6179753586403978, "learning_rate": 1.7109442253149256e-09, "loss": 0.40856826305389404, "step": 12445 }, { "epoch": 1.9892112203308558, "grad_norm": 1.6370410194923999, "learning_rate": 1.6624106216789338e-09, "loss": 0.5140349268913269, "step": 12446 }, { "epoch": 1.9893710541037322, "grad_norm": 1.6437450126606772, "learning_rate": 1.6145752664376014e-09, "loss": 0.5734895467758179, "step": 12447 }, { "epoch": 1.9895308878766085, "grad_norm": 1.6084977464624781, "learning_rate": 1.5674381629326996e-09, "loss": 0.5622732639312744, "step": 12448 }, { "epoch": 1.9896907216494846, "grad_norm": 1.545064365654314, "learning_rate": 1.5209993144549295e-09, "loss": 0.5148879289627075, "step": 12449 }, { "epoch": 1.9898505554223607, "grad_norm": 1.6959485455507657, "learning_rate": 1.4752587242494731e-09, "loss": 0.4946858882904053, "step": 12450 }, { "epoch": 1.990010389195237, "grad_norm": 1.7055107741041255, "learning_rate": 1.4302163955093317e-09, "loss": 0.5063602924346924, "step": 12451 }, { "epoch": 1.9901702229681133, "grad_norm": 1.6199433694392045, "learning_rate": 1.3858723313808775e-09, "loss": 0.5618041157722473, "step": 12452 }, { "epoch": 1.9903300567409894, "grad_norm": 1.4590050828814103, "learning_rate": 1.3422265349594121e-09, "loss": 0.5122268795967102, "step": 12453 }, { "epoch": 1.9904898905138655, "grad_norm": 1.6089485179811192, "learning_rate": 1.2992790092947183e-09, "loss": 0.4732988476753235, "step": 12454 }, { "epoch": 1.9906497242867418, "grad_norm": 1.6535266966914424, "learning_rate": 1.2570297573855084e-09, "loss": 0.5438869595527649, "step": 12455 }, { "epoch": 1.9908095580596181, "grad_norm": 1.7023531840341506, "learning_rate": 1.215478782181645e-09, "loss": 0.5692481398582458, "step": 12456 }, { "epoch": 1.9909693918324942, "grad_norm": 1.4727445540181674, "learning_rate": 1.1746260865863613e-09, "loss": 0.5473777651786804, "step": 12457 }, { "epoch": 1.9911292256053703, "grad_norm": 1.6110907898214784, "learning_rate": 1.13447167345071e-09, "loss": 0.4584997594356537, "step": 12458 }, { "epoch": 1.9912890593782466, "grad_norm": 1.473030876406639, "learning_rate": 1.0950155455802247e-09, "loss": 0.5036872625350952, "step": 12459 }, { "epoch": 1.991448893151123, "grad_norm": 1.6579366613719588, "learning_rate": 1.0562577057304791e-09, "loss": 0.6306886672973633, "step": 12460 }, { "epoch": 1.991608726923999, "grad_norm": 1.7664133397683748, "learning_rate": 1.0181981566081967e-09, "loss": 0.7218204140663147, "step": 12461 }, { "epoch": 1.9917685606968751, "grad_norm": 1.7581887936230747, "learning_rate": 9.808369008701412e-10, "loss": 0.6649379730224609, "step": 12462 }, { "epoch": 1.9919283944697515, "grad_norm": 1.7174602051677361, "learning_rate": 9.441739411264471e-10, "loss": 0.5966506004333496, "step": 12463 }, { "epoch": 1.9920882282426278, "grad_norm": 1.8025163143459406, "learning_rate": 9.08209279938399e-10, "loss": 0.5844554901123047, "step": 12464 }, { "epoch": 1.9922480620155039, "grad_norm": 1.7854579126194048, "learning_rate": 8.72942919816211e-10, "loss": 0.554591178894043, "step": 12465 }, { "epoch": 1.99240789578838, "grad_norm": 1.6082228897472408, "learning_rate": 8.383748632223576e-10, "loss": 0.4953199625015259, "step": 12466 }, { "epoch": 1.9925677295612563, "grad_norm": 1.8037860326406951, "learning_rate": 8.045051125726844e-10, "loss": 0.5133070945739746, "step": 12467 }, { "epoch": 1.9927275633341326, "grad_norm": 1.5653179894317004, "learning_rate": 7.713336702319662e-10, "loss": 0.4655790328979492, "step": 12468 }, { "epoch": 1.9928873971070087, "grad_norm": 1.523231245533525, "learning_rate": 7.388605385150183e-10, "loss": 0.5547038316726685, "step": 12469 }, { "epoch": 1.9930472308798848, "grad_norm": 1.662710207778997, "learning_rate": 7.070857196922465e-10, "loss": 0.4684177041053772, "step": 12470 }, { "epoch": 1.993207064652761, "grad_norm": 1.1565696443604547, "learning_rate": 6.760092159818766e-10, "loss": 0.36084407567977905, "step": 12471 }, { "epoch": 1.9933668984256374, "grad_norm": 1.5911948591259517, "learning_rate": 6.456310295532841e-10, "loss": 0.5282919406890869, "step": 12472 }, { "epoch": 1.9935267321985135, "grad_norm": 1.8057377172565343, "learning_rate": 6.159511625281056e-10, "loss": 0.5428091883659363, "step": 12473 }, { "epoch": 1.9936865659713896, "grad_norm": 1.845134379070901, "learning_rate": 5.869696169802374e-10, "loss": 0.6177743673324585, "step": 12474 }, { "epoch": 1.993846399744266, "grad_norm": 1.4942283643236725, "learning_rate": 5.586863949325061e-10, "loss": 0.579785943031311, "step": 12475 }, { "epoch": 1.9940062335171422, "grad_norm": 1.6425465867817215, "learning_rate": 5.311014983599982e-10, "loss": 0.6279420852661133, "step": 12476 }, { "epoch": 1.9941660672900183, "grad_norm": 1.6346588915913833, "learning_rate": 5.042149291900611e-10, "loss": 0.4718194603919983, "step": 12477 }, { "epoch": 1.9943259010628944, "grad_norm": 1.3350781401284835, "learning_rate": 4.780266893000817e-10, "loss": 0.44801968336105347, "step": 12478 }, { "epoch": 1.994485734835771, "grad_norm": 1.792983718661265, "learning_rate": 4.5253678051970786e-10, "loss": 0.5776640772819519, "step": 12479 }, { "epoch": 1.994645568608647, "grad_norm": 1.622367940088739, "learning_rate": 4.2774520462640635e-10, "loss": 0.510468602180481, "step": 12480 }, { "epoch": 1.9948054023815232, "grad_norm": 1.5104805253565101, "learning_rate": 4.0365196335434564e-10, "loss": 0.5294353365898132, "step": 12481 }, { "epoch": 1.9949652361543995, "grad_norm": 1.8125159675853622, "learning_rate": 3.8025705838551366e-10, "loss": 0.5298052430152893, "step": 12482 }, { "epoch": 1.9951250699272758, "grad_norm": 1.6685002272555942, "learning_rate": 3.575604913530484e-10, "loss": 0.6012872457504272, "step": 12483 }, { "epoch": 1.9952849037001519, "grad_norm": 1.595508647093864, "learning_rate": 3.3556226384123813e-10, "loss": 0.6005699634552002, "step": 12484 }, { "epoch": 1.995444737473028, "grad_norm": 1.926889015627625, "learning_rate": 3.14262377388852e-10, "loss": 0.54365074634552, "step": 12485 }, { "epoch": 1.9956045712459043, "grad_norm": 1.7053980986212924, "learning_rate": 2.936608334802582e-10, "loss": 0.5375258922576904, "step": 12486 }, { "epoch": 1.9957644050187806, "grad_norm": 1.5277019893089065, "learning_rate": 2.737576335576364e-10, "loss": 0.6135390996932983, "step": 12487 }, { "epoch": 1.9959242387916567, "grad_norm": 1.745479188537707, "learning_rate": 2.545527790076552e-10, "loss": 0.523230791091919, "step": 12488 }, { "epoch": 1.9960840725645328, "grad_norm": 2.1726271328286084, "learning_rate": 2.360462711736844e-10, "loss": 0.5370484590530396, "step": 12489 }, { "epoch": 1.9962439063374091, "grad_norm": 1.7271852731905084, "learning_rate": 2.1823811134802364e-10, "loss": 0.647923469543457, "step": 12490 }, { "epoch": 1.9964037401102854, "grad_norm": 1.7622471844343564, "learning_rate": 2.011283007730125e-10, "loss": 0.554527997970581, "step": 12491 }, { "epoch": 1.9965635738831615, "grad_norm": 1.7089212667689624, "learning_rate": 1.8471684064436113e-10, "loss": 0.6458269357681274, "step": 12492 }, { "epoch": 1.9967234076560376, "grad_norm": 1.8434096810869034, "learning_rate": 1.6900373210892996e-10, "loss": 0.5794588923454285, "step": 12493 }, { "epoch": 1.996883241428914, "grad_norm": 1.9199964467628305, "learning_rate": 1.5398897626250908e-10, "loss": 0.5359408259391785, "step": 12494 }, { "epoch": 1.9970430752017903, "grad_norm": 1.5358456182712477, "learning_rate": 1.3967257415425928e-10, "loss": 0.5102736949920654, "step": 12495 }, { "epoch": 1.9972029089746663, "grad_norm": 1.4789151634471691, "learning_rate": 1.2605452678449148e-10, "loss": 0.5053678750991821, "step": 12496 }, { "epoch": 1.9973627427475424, "grad_norm": 1.5168309796024277, "learning_rate": 1.1313483510466683e-10, "loss": 0.43702805042266846, "step": 12497 }, { "epoch": 1.9975225765204188, "grad_norm": 1.5171154863472585, "learning_rate": 1.0091350001517619e-10, "loss": 0.4658450484275818, "step": 12498 }, { "epoch": 1.997682410293295, "grad_norm": 1.6133910136627656, "learning_rate": 8.93905223708913e-11, "loss": 0.43360480666160583, "step": 12499 }, { "epoch": 1.9978422440661712, "grad_norm": 1.5250796475947586, "learning_rate": 7.856590297672384e-11, "loss": 0.459739089012146, "step": 12500 }, { "epoch": 1.9980020778390473, "grad_norm": 4.215845827807994, "learning_rate": 6.84396425887357e-11, "loss": 0.5257546901702881, "step": 12501 }, { "epoch": 1.9981619116119236, "grad_norm": 1.4148240596613726, "learning_rate": 5.901174191302871e-11, "loss": 0.5151155591011047, "step": 12502 }, { "epoch": 1.9983217453848, "grad_norm": 1.4083076255167604, "learning_rate": 5.028220160796515e-11, "loss": 0.5181376934051514, "step": 12503 }, { "epoch": 1.998481579157676, "grad_norm": 1.546225269023206, "learning_rate": 4.225102228416766e-11, "loss": 0.46027252078056335, "step": 12504 }, { "epoch": 1.998641412930552, "grad_norm": 1.6616819424640512, "learning_rate": 3.491820450229888e-11, "loss": 0.46053752303123474, "step": 12505 }, { "epoch": 1.9988012467034284, "grad_norm": 1.793807926119599, "learning_rate": 2.8283748774171616e-11, "loss": 0.538805365562439, "step": 12506 }, { "epoch": 1.9989610804763047, "grad_norm": 1.6311924595407457, "learning_rate": 2.2347655563859096e-11, "loss": 0.7029963135719299, "step": 12507 }, { "epoch": 1.9991209142491808, "grad_norm": 1.5469874673128674, "learning_rate": 1.710992528436428e-11, "loss": 0.6692911982536316, "step": 12508 }, { "epoch": 1.999280748022057, "grad_norm": 1.5905275212290564, "learning_rate": 1.2570558303171e-11, "loss": 0.528274416923523, "step": 12509 }, { "epoch": 1.9994405817949332, "grad_norm": 1.7232718140633463, "learning_rate": 8.729554935582585e-12, "loss": 0.6044978499412537, "step": 12510 }, { "epoch": 1.9996004155678095, "grad_norm": 1.6024591220141842, "learning_rate": 5.586915451383235e-12, "loss": 0.6520248651504517, "step": 12511 }, { "epoch": 1.9997602493406856, "grad_norm": 1.6183203926826915, "learning_rate": 3.142640069286884e-12, "loss": 0.4837193787097931, "step": 12512 }, { "epoch": 1.9999200831135617, "grad_norm": 1.6071404028650613, "learning_rate": 1.396728960267879e-12, "loss": 0.5730553865432739, "step": 12513 }, { "epoch": 2.0, "grad_norm": 1.9895397135256827, "learning_rate": 3.4918224645075214e-13, "loss": 0.6090686321258545, "step": 12514 }, { "epoch": 2.0, "step": 12514, "total_flos": 3840146698174464.0, "train_loss": 0.6134283295598144, "train_runtime": 32820.6163, "train_samples_per_second": 1.525, "train_steps_per_second": 0.381 } ], "logging_steps": 1, "max_steps": 12514, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3840146698174464.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }