{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 990, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020222446916076846, "grad_norm": 3.467946767807007, "learning_rate": 0.0, "loss": 3.4468, "mean_token_accuracy": 0.4403058011084795, "num_tokens": 69017.0, "step": 1 }, { "epoch": 0.004044489383215369, "grad_norm": 3.8678574562072754, "learning_rate": 8.88888888888889e-06, "loss": 3.0508, "mean_token_accuracy": 0.470831586048007, "num_tokens": 150583.0, "step": 2 }, { "epoch": 0.006066734074823054, "grad_norm": 3.7103006839752197, "learning_rate": 1.777777777777778e-05, "loss": 3.285, "mean_token_accuracy": 0.45825996436178684, "num_tokens": 221144.0, "step": 3 }, { "epoch": 0.008088978766430738, "grad_norm": 3.591843605041504, "learning_rate": 2.6666666666666667e-05, "loss": 3.4107, "mean_token_accuracy": 0.44140205159783363, "num_tokens": 287737.0, "step": 4 }, { "epoch": 0.010111223458038422, "grad_norm": 3.940007209777832, "learning_rate": 3.555555555555556e-05, "loss": 3.0975, "mean_token_accuracy": 0.4831150006502867, "num_tokens": 362591.0, "step": 5 }, { "epoch": 0.012133468149646108, "grad_norm": 3.8853604793548584, "learning_rate": 4.4444444444444447e-05, "loss": 3.1354, "mean_token_accuracy": 0.48446146585047245, "num_tokens": 432327.0, "step": 6 }, { "epoch": 0.014155712841253791, "grad_norm": 3.9134953022003174, "learning_rate": 5.333333333333333e-05, "loss": 3.215, "mean_token_accuracy": 0.47610872238874435, "num_tokens": 506671.0, "step": 7 }, { "epoch": 0.016177957532861477, "grad_norm": 4.14130973815918, "learning_rate": 6.222222222222222e-05, "loss": 3.0424, "mean_token_accuracy": 0.47477637231349945, "num_tokens": 577418.0, "step": 8 }, { "epoch": 0.01820020222446916, "grad_norm": 4.15872859954834, "learning_rate": 7.111111111111112e-05, "loss": 3.0563, "mean_token_accuracy": 0.49431027099490166, "num_tokens": 640014.0, "step": 9 }, { "epoch": 0.020222446916076844, "grad_norm": 3.9895355701446533, "learning_rate": 8e-05, "loss": 2.6808, "mean_token_accuracy": 0.5322843790054321, "num_tokens": 704272.0, "step": 10 }, { "epoch": 0.022244691607684528, "grad_norm": 4.202198028564453, "learning_rate": 8.888888888888889e-05, "loss": 2.6339, "mean_token_accuracy": 0.5354921519756317, "num_tokens": 771361.0, "step": 11 }, { "epoch": 0.024266936299292215, "grad_norm": 4.070754051208496, "learning_rate": 9.777777777777778e-05, "loss": 2.2029, "mean_token_accuracy": 0.5880691334605217, "num_tokens": 846229.0, "step": 12 }, { "epoch": 0.0262891809908999, "grad_norm": 3.6637940406799316, "learning_rate": 0.00010666666666666667, "loss": 1.7795, "mean_token_accuracy": 0.6244243904948235, "num_tokens": 927862.0, "step": 13 }, { "epoch": 0.028311425682507583, "grad_norm": 3.9786410331726074, "learning_rate": 0.00011555555555555555, "loss": 1.9043, "mean_token_accuracy": 0.6317372992634773, "num_tokens": 988396.0, "step": 14 }, { "epoch": 0.030333670374115267, "grad_norm": 3.229816198348999, "learning_rate": 0.00012444444444444444, "loss": 1.63, "mean_token_accuracy": 0.65444141253829, "num_tokens": 1047670.0, "step": 15 }, { "epoch": 0.032355915065722954, "grad_norm": 2.8272366523742676, "learning_rate": 0.00013333333333333334, "loss": 1.4858, "mean_token_accuracy": 0.6778117530047894, "num_tokens": 1113088.0, "step": 16 }, { "epoch": 0.034378159757330634, "grad_norm": 2.599519968032837, "learning_rate": 0.00014222222222222224, "loss": 1.323, "mean_token_accuracy": 0.688772302120924, "num_tokens": 1178886.0, "step": 17 }, { "epoch": 0.03640040444893832, "grad_norm": 2.801631212234497, "learning_rate": 0.0001511111111111111, "loss": 1.2173, "mean_token_accuracy": 0.7124413475394249, "num_tokens": 1248356.0, "step": 18 }, { "epoch": 0.03842264914054601, "grad_norm": 3.745363473892212, "learning_rate": 0.00016, "loss": 1.0959, "mean_token_accuracy": 0.7285233177244663, "num_tokens": 1324299.0, "step": 19 }, { "epoch": 0.04044489383215369, "grad_norm": 4.511194229125977, "learning_rate": 0.00016888888888888889, "loss": 1.1729, "mean_token_accuracy": 0.7189365439116955, "num_tokens": 1392035.0, "step": 20 }, { "epoch": 0.042467138523761376, "grad_norm": 4.869667053222656, "learning_rate": 0.00017777777777777779, "loss": 0.965, "mean_token_accuracy": 0.7327957898378372, "num_tokens": 1474776.0, "step": 21 }, { "epoch": 0.044489383215369056, "grad_norm": 3.513063430786133, "learning_rate": 0.0001866666666666667, "loss": 0.958, "mean_token_accuracy": 0.7463030181825161, "num_tokens": 1546445.0, "step": 22 }, { "epoch": 0.046511627906976744, "grad_norm": 2.169617176055908, "learning_rate": 0.00019555555555555556, "loss": 0.9572, "mean_token_accuracy": 0.748451080173254, "num_tokens": 1614331.0, "step": 23 }, { "epoch": 0.04853387259858443, "grad_norm": 1.2484831809997559, "learning_rate": 0.00020444444444444443, "loss": 0.8834, "mean_token_accuracy": 0.7673822268843651, "num_tokens": 1679566.0, "step": 24 }, { "epoch": 0.05055611729019211, "grad_norm": 1.0600098371505737, "learning_rate": 0.00021333333333333333, "loss": 0.8514, "mean_token_accuracy": 0.7709708698093891, "num_tokens": 1741770.0, "step": 25 }, { "epoch": 0.0525783619817998, "grad_norm": 1.095992922782898, "learning_rate": 0.00022222222222222223, "loss": 0.8617, "mean_token_accuracy": 0.7583519890904427, "num_tokens": 1806990.0, "step": 26 }, { "epoch": 0.054600606673407485, "grad_norm": 1.0006545782089233, "learning_rate": 0.0002311111111111111, "loss": 0.7725, "mean_token_accuracy": 0.7819164581596851, "num_tokens": 1872686.0, "step": 27 }, { "epoch": 0.056622851365015166, "grad_norm": 0.6671711802482605, "learning_rate": 0.00024, "loss": 0.6548, "mean_token_accuracy": 0.8015744872391224, "num_tokens": 1943614.0, "step": 28 }, { "epoch": 0.05864509605662285, "grad_norm": 0.47610151767730713, "learning_rate": 0.0002488888888888889, "loss": 0.6524, "mean_token_accuracy": 0.8063510619103909, "num_tokens": 2021034.0, "step": 29 }, { "epoch": 0.06066734074823053, "grad_norm": 0.5676872730255127, "learning_rate": 0.00025777777777777783, "loss": 0.7402, "mean_token_accuracy": 0.784897617995739, "num_tokens": 2087348.0, "step": 30 }, { "epoch": 0.06268958543983821, "grad_norm": 0.6818390488624573, "learning_rate": 0.0002666666666666667, "loss": 0.6894, "mean_token_accuracy": 0.8017890304327011, "num_tokens": 2154170.0, "step": 31 }, { "epoch": 0.06471183013144591, "grad_norm": 0.5972866415977478, "learning_rate": 0.0002755555555555556, "loss": 0.612, "mean_token_accuracy": 0.8184943534433842, "num_tokens": 2229392.0, "step": 32 }, { "epoch": 0.06673407482305359, "grad_norm": 0.4275088608264923, "learning_rate": 0.0002844444444444445, "loss": 0.5885, "mean_token_accuracy": 0.8229578360915184, "num_tokens": 2290048.0, "step": 33 }, { "epoch": 0.06875631951466127, "grad_norm": 0.3523823618888855, "learning_rate": 0.0002933333333333333, "loss": 0.5766, "mean_token_accuracy": 0.82804736495018, "num_tokens": 2360740.0, "step": 34 }, { "epoch": 0.07077856420626896, "grad_norm": 0.45881009101867676, "learning_rate": 0.0003022222222222222, "loss": 0.6217, "mean_token_accuracy": 0.8134612888097763, "num_tokens": 2419828.0, "step": 35 }, { "epoch": 0.07280080889787664, "grad_norm": 0.46817246079444885, "learning_rate": 0.0003111111111111111, "loss": 0.5311, "mean_token_accuracy": 0.8388609476387501, "num_tokens": 2483206.0, "step": 36 }, { "epoch": 0.07482305358948432, "grad_norm": 0.36155763268470764, "learning_rate": 0.00032, "loss": 0.5268, "mean_token_accuracy": 0.8369965106248856, "num_tokens": 2556908.0, "step": 37 }, { "epoch": 0.07684529828109202, "grad_norm": 0.36704790592193604, "learning_rate": 0.00032888888888888887, "loss": 0.5548, "mean_token_accuracy": 0.8294766061007977, "num_tokens": 2626172.0, "step": 38 }, { "epoch": 0.0788675429726997, "grad_norm": 0.3038175106048584, "learning_rate": 0.00033777777777777777, "loss": 0.5232, "mean_token_accuracy": 0.8495447933673859, "num_tokens": 2693541.0, "step": 39 }, { "epoch": 0.08088978766430738, "grad_norm": 0.30305811762809753, "learning_rate": 0.00034666666666666667, "loss": 0.4882, "mean_token_accuracy": 0.8428361192345619, "num_tokens": 2758471.0, "step": 40 }, { "epoch": 0.08291203235591507, "grad_norm": 0.33424293994903564, "learning_rate": 0.00035555555555555557, "loss": 0.508, "mean_token_accuracy": 0.8437883704900742, "num_tokens": 2826093.0, "step": 41 }, { "epoch": 0.08493427704752275, "grad_norm": 0.3217228651046753, "learning_rate": 0.00036444444444444447, "loss": 0.5045, "mean_token_accuracy": 0.8461326025426388, "num_tokens": 2893222.0, "step": 42 }, { "epoch": 0.08695652173913043, "grad_norm": 0.26564908027648926, "learning_rate": 0.0003733333333333334, "loss": 0.5068, "mean_token_accuracy": 0.8433473333716393, "num_tokens": 2956663.0, "step": 43 }, { "epoch": 0.08897876643073811, "grad_norm": 0.25354474782943726, "learning_rate": 0.0003822222222222223, "loss": 0.4609, "mean_token_accuracy": 0.8593583293259144, "num_tokens": 3020245.0, "step": 44 }, { "epoch": 0.0910010111223458, "grad_norm": 0.31298667192459106, "learning_rate": 0.0003911111111111111, "loss": 0.4884, "mean_token_accuracy": 0.8503717556595802, "num_tokens": 3091022.0, "step": 45 }, { "epoch": 0.09302325581395349, "grad_norm": 0.23926222324371338, "learning_rate": 0.0004, "loss": 0.4635, "mean_token_accuracy": 0.8578044883906841, "num_tokens": 3167731.0, "step": 46 }, { "epoch": 0.09504550050556117, "grad_norm": 0.23057548701763153, "learning_rate": 0.00039999957163192333, "loss": 0.4464, "mean_token_accuracy": 0.8583495616912842, "num_tokens": 3230183.0, "step": 47 }, { "epoch": 0.09706774519716886, "grad_norm": 0.22786663472652435, "learning_rate": 0.0003999982865297322, "loss": 0.4165, "mean_token_accuracy": 0.8637920096516609, "num_tokens": 3300798.0, "step": 48 }, { "epoch": 0.09908998988877654, "grad_norm": 0.27733081579208374, "learning_rate": 0.0003999961446995433, "loss": 0.4348, "mean_token_accuracy": 0.8584615886211395, "num_tokens": 3368808.0, "step": 49 }, { "epoch": 0.10111223458038422, "grad_norm": 0.2632873058319092, "learning_rate": 0.00039999314615155084, "loss": 0.4545, "mean_token_accuracy": 0.8571835160255432, "num_tokens": 3436471.0, "step": 50 }, { "epoch": 0.10313447927199192, "grad_norm": 0.20401886105537415, "learning_rate": 0.000399989290900027, "loss": 0.426, "mean_token_accuracy": 0.8630774058401585, "num_tokens": 3504251.0, "step": 51 }, { "epoch": 0.1051567239635996, "grad_norm": 0.2126135528087616, "learning_rate": 0.0003999845789633213, "loss": 0.4209, "mean_token_accuracy": 0.8644996210932732, "num_tokens": 3569455.0, "step": 52 }, { "epoch": 0.10717896865520728, "grad_norm": 0.20767471194267273, "learning_rate": 0.00039997901036386093, "loss": 0.4312, "mean_token_accuracy": 0.8648513294756413, "num_tokens": 3633701.0, "step": 53 }, { "epoch": 0.10920121334681497, "grad_norm": 0.19368676841259003, "learning_rate": 0.0003999725851281504, "loss": 0.4219, "mean_token_accuracy": 0.8675987049937248, "num_tokens": 3700579.0, "step": 54 }, { "epoch": 0.11122345803842265, "grad_norm": 0.19997400045394897, "learning_rate": 0.0003999653032867717, "loss": 0.4305, "mean_token_accuracy": 0.8599656298756599, "num_tokens": 3766515.0, "step": 55 }, { "epoch": 0.11324570273003033, "grad_norm": 0.19456814229488373, "learning_rate": 0.00039995716487438367, "loss": 0.4084, "mean_token_accuracy": 0.8680460080504417, "num_tokens": 3832179.0, "step": 56 }, { "epoch": 0.11526794742163801, "grad_norm": 0.19756172597408295, "learning_rate": 0.00039994816992972227, "loss": 0.4199, "mean_token_accuracy": 0.8612547963857651, "num_tokens": 3898904.0, "step": 57 }, { "epoch": 0.1172901921132457, "grad_norm": 0.1712576448917389, "learning_rate": 0.0003999383184956003, "loss": 0.36, "mean_token_accuracy": 0.879060622304678, "num_tokens": 3976416.0, "step": 58 }, { "epoch": 0.11931243680485339, "grad_norm": 0.20002008974552155, "learning_rate": 0.00039992761061890717, "loss": 0.4269, "mean_token_accuracy": 0.8589905127882957, "num_tokens": 4036526.0, "step": 59 }, { "epoch": 0.12133468149646107, "grad_norm": 0.1924401819705963, "learning_rate": 0.00039991604635060835, "loss": 0.4268, "mean_token_accuracy": 0.8678371347486973, "num_tokens": 4100376.0, "step": 60 }, { "epoch": 0.12335692618806876, "grad_norm": 0.17639940977096558, "learning_rate": 0.00039990362574574586, "loss": 0.3919, "mean_token_accuracy": 0.8658471070230007, "num_tokens": 4165704.0, "step": 61 }, { "epoch": 0.12537917087967643, "grad_norm": 0.1817377358675003, "learning_rate": 0.00039989034886343724, "loss": 0.3735, "mean_token_accuracy": 0.8759783655405045, "num_tokens": 4234412.0, "step": 62 }, { "epoch": 0.12740141557128412, "grad_norm": 0.18214447796344757, "learning_rate": 0.00039987621576687585, "loss": 0.3454, "mean_token_accuracy": 0.8825861141085625, "num_tokens": 4307593.0, "step": 63 }, { "epoch": 0.12942366026289182, "grad_norm": 0.18159601092338562, "learning_rate": 0.0003998612265233302, "loss": 0.3672, "mean_token_accuracy": 0.8755885139107704, "num_tokens": 4376630.0, "step": 64 }, { "epoch": 0.13144590495449948, "grad_norm": 0.17050184309482574, "learning_rate": 0.00039984538120414363, "loss": 0.3333, "mean_token_accuracy": 0.8833661302924156, "num_tokens": 4449580.0, "step": 65 }, { "epoch": 0.13346814964610718, "grad_norm": 0.20457544922828674, "learning_rate": 0.0003998286798847344, "loss": 0.4182, "mean_token_accuracy": 0.8619738966226578, "num_tokens": 4518076.0, "step": 66 }, { "epoch": 0.13549039433771487, "grad_norm": 0.196366086602211, "learning_rate": 0.00039981112264459486, "loss": 0.3386, "mean_token_accuracy": 0.8908565118908882, "num_tokens": 4581622.0, "step": 67 }, { "epoch": 0.13751263902932254, "grad_norm": 0.18182213604450226, "learning_rate": 0.00039979270956729115, "loss": 0.3999, "mean_token_accuracy": 0.8703116998076439, "num_tokens": 4646580.0, "step": 68 }, { "epoch": 0.13953488372093023, "grad_norm": 0.18271780014038086, "learning_rate": 0.0003997734407404631, "loss": 0.3504, "mean_token_accuracy": 0.8762697987258434, "num_tokens": 4716771.0, "step": 69 }, { "epoch": 0.14155712841253792, "grad_norm": 0.19590984284877777, "learning_rate": 0.0003997533162558233, "loss": 0.3753, "mean_token_accuracy": 0.8757792375981808, "num_tokens": 4789100.0, "step": 70 }, { "epoch": 0.1435793731041456, "grad_norm": 0.23697857558727264, "learning_rate": 0.00039973233620915733, "loss": 0.4225, "mean_token_accuracy": 0.8598962388932705, "num_tokens": 4851640.0, "step": 71 }, { "epoch": 0.14560161779575329, "grad_norm": 0.19626037776470184, "learning_rate": 0.0003997105007003228, "loss": 0.3572, "mean_token_accuracy": 0.8849809169769287, "num_tokens": 4916098.0, "step": 72 }, { "epoch": 0.14762386248736098, "grad_norm": 0.20964385569095612, "learning_rate": 0.00039968780983324893, "loss": 0.3507, "mean_token_accuracy": 0.8796872869133949, "num_tokens": 4979744.0, "step": 73 }, { "epoch": 0.14964610717896865, "grad_norm": 0.18054573237895966, "learning_rate": 0.00039966426371593607, "loss": 0.3683, "mean_token_accuracy": 0.8814638741314411, "num_tokens": 5050714.0, "step": 74 }, { "epoch": 0.15166835187057634, "grad_norm": 0.16331350803375244, "learning_rate": 0.0003996398624604556, "loss": 0.3406, "mean_token_accuracy": 0.8873084634542465, "num_tokens": 5130559.0, "step": 75 }, { "epoch": 0.15369059656218403, "grad_norm": 0.20746077597141266, "learning_rate": 0.0003996146061829487, "loss": 0.3762, "mean_token_accuracy": 0.8765941001474857, "num_tokens": 5199691.0, "step": 76 }, { "epoch": 0.1557128412537917, "grad_norm": 0.16679136455059052, "learning_rate": 0.0003995884950036263, "loss": 0.3691, "mean_token_accuracy": 0.8776806406676769, "num_tokens": 5277116.0, "step": 77 }, { "epoch": 0.1577350859453994, "grad_norm": 0.2159774750471115, "learning_rate": 0.00039956152904676835, "loss": 0.4017, "mean_token_accuracy": 0.8709179721772671, "num_tokens": 5343258.0, "step": 78 }, { "epoch": 0.1597573306370071, "grad_norm": 0.16525208950042725, "learning_rate": 0.00039953370844072333, "loss": 0.372, "mean_token_accuracy": 0.8759802021086216, "num_tokens": 5418084.0, "step": 79 }, { "epoch": 0.16177957532861476, "grad_norm": 0.18534427881240845, "learning_rate": 0.00039950503331790744, "loss": 0.4236, "mean_token_accuracy": 0.8610594123601913, "num_tokens": 5483557.0, "step": 80 }, { "epoch": 0.16380182002022245, "grad_norm": 0.17822565138339996, "learning_rate": 0.000399475503814804, "loss": 0.347, "mean_token_accuracy": 0.883899986743927, "num_tokens": 5559324.0, "step": 81 }, { "epoch": 0.16582406471183014, "grad_norm": 0.16568556427955627, "learning_rate": 0.00039944512007196307, "loss": 0.3046, "mean_token_accuracy": 0.8883480541408062, "num_tokens": 5646732.0, "step": 82 }, { "epoch": 0.1678463094034378, "grad_norm": 0.20850011706352234, "learning_rate": 0.0003994138822340004, "loss": 0.3727, "mean_token_accuracy": 0.8808489926159382, "num_tokens": 5709555.0, "step": 83 }, { "epoch": 0.1698685540950455, "grad_norm": 0.19419965147972107, "learning_rate": 0.00039938179044959714, "loss": 0.3667, "mean_token_accuracy": 0.8805488795042038, "num_tokens": 5779149.0, "step": 84 }, { "epoch": 0.1718907987866532, "grad_norm": 0.21039818227291107, "learning_rate": 0.0003993488448714986, "loss": 0.3912, "mean_token_accuracy": 0.8791179358959198, "num_tokens": 5850163.0, "step": 85 }, { "epoch": 0.17391304347826086, "grad_norm": 0.2167867124080658, "learning_rate": 0.00039931504565651424, "loss": 0.3571, "mean_token_accuracy": 0.8792387843132019, "num_tokens": 5916129.0, "step": 86 }, { "epoch": 0.17593528816986856, "grad_norm": 0.2154702991247177, "learning_rate": 0.0003992803929655162, "loss": 0.3868, "mean_token_accuracy": 0.8748185895383358, "num_tokens": 5979082.0, "step": 87 }, { "epoch": 0.17795753286147623, "grad_norm": 0.1713341772556305, "learning_rate": 0.00039924488696343915, "loss": 0.338, "mean_token_accuracy": 0.8834210820496082, "num_tokens": 6048831.0, "step": 88 }, { "epoch": 0.17997977755308392, "grad_norm": 0.20742323994636536, "learning_rate": 0.00039920852781927886, "loss": 0.3911, "mean_token_accuracy": 0.868148323148489, "num_tokens": 6114503.0, "step": 89 }, { "epoch": 0.1820020222446916, "grad_norm": 0.18235628306865692, "learning_rate": 0.0003991713157060922, "loss": 0.3169, "mean_token_accuracy": 0.8923499137163162, "num_tokens": 6184293.0, "step": 90 }, { "epoch": 0.18402426693629928, "grad_norm": 0.18693064153194427, "learning_rate": 0.00039913325080099545, "loss": 0.3678, "mean_token_accuracy": 0.8744825124740601, "num_tokens": 6252712.0, "step": 91 }, { "epoch": 0.18604651162790697, "grad_norm": 0.19899111986160278, "learning_rate": 0.0003990943332851641, "loss": 0.3497, "mean_token_accuracy": 0.8849819526076317, "num_tokens": 6313767.0, "step": 92 }, { "epoch": 0.18806875631951467, "grad_norm": 0.19068098068237305, "learning_rate": 0.0003990545633438318, "loss": 0.3492, "mean_token_accuracy": 0.8846092559397221, "num_tokens": 6382110.0, "step": 93 }, { "epoch": 0.19009100101112233, "grad_norm": 0.19140516221523285, "learning_rate": 0.0003990139411662892, "loss": 0.3434, "mean_token_accuracy": 0.8847804144024849, "num_tokens": 6445880.0, "step": 94 }, { "epoch": 0.19211324570273003, "grad_norm": 0.22566284239292145, "learning_rate": 0.00039897246694588364, "loss": 0.3726, "mean_token_accuracy": 0.8737127743661404, "num_tokens": 6512190.0, "step": 95 }, { "epoch": 0.19413549039433772, "grad_norm": 0.193269744515419, "learning_rate": 0.00039893014088001754, "loss": 0.3689, "mean_token_accuracy": 0.8768584616482258, "num_tokens": 6581328.0, "step": 96 }, { "epoch": 0.1961577350859454, "grad_norm": 0.19110015034675598, "learning_rate": 0.00039888696317014807, "loss": 0.3307, "mean_token_accuracy": 0.8812081180512905, "num_tokens": 6653124.0, "step": 97 }, { "epoch": 0.19817997977755308, "grad_norm": 0.18114197254180908, "learning_rate": 0.00039884293402178575, "loss": 0.3451, "mean_token_accuracy": 0.8798027820885181, "num_tokens": 6723465.0, "step": 98 }, { "epoch": 0.20020222446916078, "grad_norm": 0.19303397834300995, "learning_rate": 0.0003987980536444938, "loss": 0.334, "mean_token_accuracy": 0.8881032280623913, "num_tokens": 6801637.0, "step": 99 }, { "epoch": 0.20222446916076844, "grad_norm": 0.1839206963777542, "learning_rate": 0.0003987523222518868, "loss": 0.3344, "mean_token_accuracy": 0.8791452720761299, "num_tokens": 6879826.0, "step": 100 }, { "epoch": 0.20424671385237614, "grad_norm": 0.1716805100440979, "learning_rate": 0.0003987057400616299, "loss": 0.3494, "mean_token_accuracy": 0.8803286664187908, "num_tokens": 6958940.0, "step": 101 }, { "epoch": 0.20626895854398383, "grad_norm": 0.218710795044899, "learning_rate": 0.000398658307295438, "loss": 0.3696, "mean_token_accuracy": 0.8783976249396801, "num_tokens": 7019640.0, "step": 102 }, { "epoch": 0.2082912032355915, "grad_norm": 0.2176671177148819, "learning_rate": 0.0003986100241790741, "loss": 0.3778, "mean_token_accuracy": 0.8742088116705418, "num_tokens": 7083893.0, "step": 103 }, { "epoch": 0.2103134479271992, "grad_norm": 0.20480629801750183, "learning_rate": 0.0003985608909423487, "loss": 0.3644, "mean_token_accuracy": 0.8779697194695473, "num_tokens": 7146243.0, "step": 104 }, { "epoch": 0.2123356926188069, "grad_norm": 0.21523724496364594, "learning_rate": 0.0003985109078191187, "loss": 0.3384, "mean_token_accuracy": 0.8801298663020134, "num_tokens": 7211820.0, "step": 105 }, { "epoch": 0.21435793731041455, "grad_norm": 0.2035398781299591, "learning_rate": 0.00039846007504728593, "loss": 0.3553, "mean_token_accuracy": 0.8752279430627823, "num_tokens": 7280455.0, "step": 106 }, { "epoch": 0.21638018200202225, "grad_norm": 0.1565598100423813, "learning_rate": 0.00039840839286879636, "loss": 0.3034, "mean_token_accuracy": 0.8931353390216827, "num_tokens": 7357510.0, "step": 107 }, { "epoch": 0.21840242669362994, "grad_norm": 0.17082397639751434, "learning_rate": 0.00039835586152963884, "loss": 0.3135, "mean_token_accuracy": 0.883228026330471, "num_tokens": 7428821.0, "step": 108 }, { "epoch": 0.2204246713852376, "grad_norm": 0.18526601791381836, "learning_rate": 0.0003983024812798439, "loss": 0.3156, "mean_token_accuracy": 0.88564358279109, "num_tokens": 7494951.0, "step": 109 }, { "epoch": 0.2224469160768453, "grad_norm": 0.20190876722335815, "learning_rate": 0.0003982482523734827, "loss": 0.3393, "mean_token_accuracy": 0.8834404349327087, "num_tokens": 7558067.0, "step": 110 }, { "epoch": 0.224469160768453, "grad_norm": 0.1943565011024475, "learning_rate": 0.00039819317506866543, "loss": 0.3582, "mean_token_accuracy": 0.8790641874074936, "num_tokens": 7630543.0, "step": 111 }, { "epoch": 0.22649140546006066, "grad_norm": 0.2155260592699051, "learning_rate": 0.00039813724962754066, "loss": 0.3514, "mean_token_accuracy": 0.8799824342131615, "num_tokens": 7693798.0, "step": 112 }, { "epoch": 0.22851365015166836, "grad_norm": 0.17986060678958893, "learning_rate": 0.00039808047631629363, "loss": 0.3361, "mean_token_accuracy": 0.8870190940797329, "num_tokens": 7763267.0, "step": 113 }, { "epoch": 0.23053589484327602, "grad_norm": 0.18999366462230682, "learning_rate": 0.00039802285540514504, "loss": 0.325, "mean_token_accuracy": 0.8898543640971184, "num_tokens": 7834437.0, "step": 114 }, { "epoch": 0.23255813953488372, "grad_norm": 0.20567375421524048, "learning_rate": 0.0003979643871683501, "loss": 0.3734, "mean_token_accuracy": 0.8742238134145737, "num_tokens": 7896274.0, "step": 115 }, { "epoch": 0.2345803842264914, "grad_norm": 0.18579523265361786, "learning_rate": 0.000397905071884197, "loss": 0.3543, "mean_token_accuracy": 0.8827438056468964, "num_tokens": 7962304.0, "step": 116 }, { "epoch": 0.23660262891809908, "grad_norm": 0.1895459145307541, "learning_rate": 0.00039784490983500514, "loss": 0.2899, "mean_token_accuracy": 0.888210829347372, "num_tokens": 8037020.0, "step": 117 }, { "epoch": 0.23862487360970677, "grad_norm": 0.1934623420238495, "learning_rate": 0.0003977839013071248, "loss": 0.3172, "mean_token_accuracy": 0.8874295391142368, "num_tokens": 8106669.0, "step": 118 }, { "epoch": 0.24064711830131447, "grad_norm": 0.18337437510490417, "learning_rate": 0.0003977220465909348, "loss": 0.328, "mean_token_accuracy": 0.884034089744091, "num_tokens": 8174813.0, "step": 119 }, { "epoch": 0.24266936299292213, "grad_norm": 0.18985910713672638, "learning_rate": 0.00039765934598084176, "loss": 0.3396, "mean_token_accuracy": 0.8789964653551579, "num_tokens": 8247396.0, "step": 120 }, { "epoch": 0.24469160768452983, "grad_norm": 0.20584100484848022, "learning_rate": 0.0003975957997752783, "loss": 0.3537, "mean_token_accuracy": 0.8752495422959328, "num_tokens": 8310521.0, "step": 121 }, { "epoch": 0.24671385237613752, "grad_norm": 0.20211565494537354, "learning_rate": 0.00039753140827670163, "loss": 0.3607, "mean_token_accuracy": 0.877599012106657, "num_tokens": 8374419.0, "step": 122 }, { "epoch": 0.2487360970677452, "grad_norm": 0.21102474629878998, "learning_rate": 0.00039746617179159274, "loss": 0.3411, "mean_token_accuracy": 0.8837038949131966, "num_tokens": 8436270.0, "step": 123 }, { "epoch": 0.25075834175935285, "grad_norm": 0.22104637324810028, "learning_rate": 0.000397400090630454, "loss": 0.3467, "mean_token_accuracy": 0.8838667124509811, "num_tokens": 8496108.0, "step": 124 }, { "epoch": 0.2527805864509606, "grad_norm": 0.23561948537826538, "learning_rate": 0.0003973331651078084, "loss": 0.3933, "mean_token_accuracy": 0.864571388810873, "num_tokens": 8558449.0, "step": 125 }, { "epoch": 0.25480283114256824, "grad_norm": 0.17975358664989471, "learning_rate": 0.00039726539554219746, "loss": 0.3246, "mean_token_accuracy": 0.8921530395746231, "num_tokens": 8633747.0, "step": 126 }, { "epoch": 0.2568250758341759, "grad_norm": 0.19475312530994415, "learning_rate": 0.0003971967822561805, "loss": 0.359, "mean_token_accuracy": 0.878424908965826, "num_tokens": 8700730.0, "step": 127 }, { "epoch": 0.25884732052578363, "grad_norm": 0.23659245669841766, "learning_rate": 0.0003971273255763324, "loss": 0.3606, "mean_token_accuracy": 0.8830053992569447, "num_tokens": 8760014.0, "step": 128 }, { "epoch": 0.2608695652173913, "grad_norm": 0.19543145596981049, "learning_rate": 0.0003970570258332422, "loss": 0.3309, "mean_token_accuracy": 0.8853320479393005, "num_tokens": 8824736.0, "step": 129 }, { "epoch": 0.26289180990899896, "grad_norm": 0.2464882731437683, "learning_rate": 0.0003969858833615119, "loss": 0.3589, "mean_token_accuracy": 0.8793282993137836, "num_tokens": 8887323.0, "step": 130 }, { "epoch": 0.2649140546006067, "grad_norm": 0.16774067282676697, "learning_rate": 0.0003969138984997542, "loss": 0.3198, "mean_token_accuracy": 0.8886825554072857, "num_tokens": 8965857.0, "step": 131 }, { "epoch": 0.26693629929221435, "grad_norm": 0.19399577379226685, "learning_rate": 0.00039684107159059174, "loss": 0.3468, "mean_token_accuracy": 0.8808378390967846, "num_tokens": 9039028.0, "step": 132 }, { "epoch": 0.268958543983822, "grad_norm": 0.1961926966905594, "learning_rate": 0.00039676740298065467, "loss": 0.3501, "mean_token_accuracy": 0.8791337199509144, "num_tokens": 9108645.0, "step": 133 }, { "epoch": 0.27098078867542974, "grad_norm": 0.16180327534675598, "learning_rate": 0.00039669289302057955, "loss": 0.3291, "mean_token_accuracy": 0.889164712280035, "num_tokens": 9182295.0, "step": 134 }, { "epoch": 0.2730030333670374, "grad_norm": 0.18792307376861572, "learning_rate": 0.00039661754206500723, "loss": 0.305, "mean_token_accuracy": 0.890954252332449, "num_tokens": 9253798.0, "step": 135 }, { "epoch": 0.2750252780586451, "grad_norm": 0.2211407721042633, "learning_rate": 0.0003965413504725815, "loss": 0.3516, "mean_token_accuracy": 0.8829210363328457, "num_tokens": 9319632.0, "step": 136 }, { "epoch": 0.2770475227502528, "grad_norm": 0.15928597748279572, "learning_rate": 0.0003964643186059474, "loss": 0.3209, "mean_token_accuracy": 0.8902908116579056, "num_tokens": 9396460.0, "step": 137 }, { "epoch": 0.27906976744186046, "grad_norm": 0.25479844212532043, "learning_rate": 0.00039638644683174937, "loss": 0.3247, "mean_token_accuracy": 0.8880501836538315, "num_tokens": 9460466.0, "step": 138 }, { "epoch": 0.2810920121334681, "grad_norm": 0.17745117843151093, "learning_rate": 0.00039630773552062925, "loss": 0.3383, "mean_token_accuracy": 0.8863355927169323, "num_tokens": 9532155.0, "step": 139 }, { "epoch": 0.28311425682507585, "grad_norm": 0.22157195210456848, "learning_rate": 0.0003962281850472251, "loss": 0.3499, "mean_token_accuracy": 0.879049763083458, "num_tokens": 9590255.0, "step": 140 }, { "epoch": 0.2851365015166835, "grad_norm": 0.1807304471731186, "learning_rate": 0.0003961477957901689, "loss": 0.3065, "mean_token_accuracy": 0.8949154578149319, "num_tokens": 9667027.0, "step": 141 }, { "epoch": 0.2871587462082912, "grad_norm": 0.23244738578796387, "learning_rate": 0.00039606656813208504, "loss": 0.3608, "mean_token_accuracy": 0.8768214285373688, "num_tokens": 9723117.0, "step": 142 }, { "epoch": 0.2891809908998989, "grad_norm": 0.18404552340507507, "learning_rate": 0.0003959845024595883, "loss": 0.2972, "mean_token_accuracy": 0.8935975506901741, "num_tokens": 9792714.0, "step": 143 }, { "epoch": 0.29120323559150657, "grad_norm": 0.21092693507671356, "learning_rate": 0.00039590159916328224, "loss": 0.3552, "mean_token_accuracy": 0.8813748992979527, "num_tokens": 9846790.0, "step": 144 }, { "epoch": 0.29322548028311424, "grad_norm": 0.18293221294879913, "learning_rate": 0.00039581785863775705, "loss": 0.3497, "mean_token_accuracy": 0.8868285343050957, "num_tokens": 9920682.0, "step": 145 }, { "epoch": 0.29524772497472196, "grad_norm": 0.23161938786506653, "learning_rate": 0.00039573328128158803, "loss": 0.3671, "mean_token_accuracy": 0.8772343806922436, "num_tokens": 9989629.0, "step": 146 }, { "epoch": 0.2972699696663296, "grad_norm": 0.19797147810459137, "learning_rate": 0.0003956478674973333, "loss": 0.356, "mean_token_accuracy": 0.8782718777656555, "num_tokens": 10048794.0, "step": 147 }, { "epoch": 0.2992922143579373, "grad_norm": 0.18177340924739838, "learning_rate": 0.00039556161769153226, "loss": 0.3122, "mean_token_accuracy": 0.8886930793523788, "num_tokens": 10116701.0, "step": 148 }, { "epoch": 0.301314459049545, "grad_norm": 0.24357731640338898, "learning_rate": 0.0003954745322747034, "loss": 0.344, "mean_token_accuracy": 0.8848157115280628, "num_tokens": 10176439.0, "step": 149 }, { "epoch": 0.3033367037411527, "grad_norm": 0.18051762878894806, "learning_rate": 0.00039538661166134236, "loss": 0.3134, "mean_token_accuracy": 0.8913725949823856, "num_tokens": 10248461.0, "step": 150 }, { "epoch": 0.30535894843276035, "grad_norm": 0.20022518932819366, "learning_rate": 0.00039529785626992006, "loss": 0.3436, "mean_token_accuracy": 0.8848014548420906, "num_tokens": 10310254.0, "step": 151 }, { "epoch": 0.30738119312436807, "grad_norm": 0.23199647665023804, "learning_rate": 0.0003952082665228808, "loss": 0.3233, "mean_token_accuracy": 0.8871180489659309, "num_tokens": 10375248.0, "step": 152 }, { "epoch": 0.30940343781597573, "grad_norm": 0.18778662383556366, "learning_rate": 0.00039511784284663976, "loss": 0.3044, "mean_token_accuracy": 0.8951373845338821, "num_tokens": 10442606.0, "step": 153 }, { "epoch": 0.3114256825075834, "grad_norm": 0.17572450637817383, "learning_rate": 0.0003950265856715818, "loss": 0.3331, "mean_token_accuracy": 0.8889199234545231, "num_tokens": 10509923.0, "step": 154 }, { "epoch": 0.3134479271991911, "grad_norm": 0.16929855942726135, "learning_rate": 0.0003949344954320586, "loss": 0.348, "mean_token_accuracy": 0.8804797492921352, "num_tokens": 10579730.0, "step": 155 }, { "epoch": 0.3154701718907988, "grad_norm": 0.17655323445796967, "learning_rate": 0.0003948415725663871, "loss": 0.3293, "mean_token_accuracy": 0.883028332144022, "num_tokens": 10648731.0, "step": 156 }, { "epoch": 0.31749241658240646, "grad_norm": 0.1909574717283249, "learning_rate": 0.00039474781751684706, "loss": 0.3183, "mean_token_accuracy": 0.8886212892830372, "num_tokens": 10713689.0, "step": 157 }, { "epoch": 0.3195146612740142, "grad_norm": 0.17727530002593994, "learning_rate": 0.00039465323072967936, "loss": 0.3237, "mean_token_accuracy": 0.8898195438086987, "num_tokens": 10785736.0, "step": 158 }, { "epoch": 0.32153690596562184, "grad_norm": 0.18001440167427063, "learning_rate": 0.00039455781265508355, "loss": 0.332, "mean_token_accuracy": 0.8871553801000118, "num_tokens": 10856647.0, "step": 159 }, { "epoch": 0.3235591506572295, "grad_norm": 0.19728383421897888, "learning_rate": 0.0003944615637472158, "loss": 0.3621, "mean_token_accuracy": 0.8775678239762783, "num_tokens": 10918872.0, "step": 160 }, { "epoch": 0.32558139534883723, "grad_norm": 0.20538869500160217, "learning_rate": 0.00039436448446418683, "loss": 0.3633, "mean_token_accuracy": 0.8745956718921661, "num_tokens": 10981209.0, "step": 161 }, { "epoch": 0.3276036400404449, "grad_norm": 0.19733993709087372, "learning_rate": 0.00039426657526805937, "loss": 0.3201, "mean_token_accuracy": 0.8928566165268421, "num_tokens": 11047089.0, "step": 162 }, { "epoch": 0.32962588473205257, "grad_norm": 0.15281331539154053, "learning_rate": 0.0003941678366248468, "loss": 0.3003, "mean_token_accuracy": 0.8931796550750732, "num_tokens": 11122846.0, "step": 163 }, { "epoch": 0.3316481294236603, "grad_norm": 0.17711788415908813, "learning_rate": 0.00039406826900450977, "loss": 0.3127, "mean_token_accuracy": 0.892613273113966, "num_tokens": 11197993.0, "step": 164 }, { "epoch": 0.33367037411526795, "grad_norm": 0.2002251148223877, "learning_rate": 0.00039396787288095497, "loss": 0.3328, "mean_token_accuracy": 0.8890563920140266, "num_tokens": 11267855.0, "step": 165 }, { "epoch": 0.3356926188068756, "grad_norm": 0.16182006895542145, "learning_rate": 0.00039386664873203227, "loss": 0.3251, "mean_token_accuracy": 0.8839607983827591, "num_tokens": 11344330.0, "step": 166 }, { "epoch": 0.33771486349848334, "grad_norm": 0.16747458279132843, "learning_rate": 0.00039376459703953284, "loss": 0.3249, "mean_token_accuracy": 0.8876189365983009, "num_tokens": 11418350.0, "step": 167 }, { "epoch": 0.339737108190091, "grad_norm": 0.1826547235250473, "learning_rate": 0.0003936617182891864, "loss": 0.3291, "mean_token_accuracy": 0.8888828568160534, "num_tokens": 11485723.0, "step": 168 }, { "epoch": 0.3417593528816987, "grad_norm": 0.18488235771656036, "learning_rate": 0.0003935580129706593, "loss": 0.3097, "mean_token_accuracy": 0.8907660692930222, "num_tokens": 11551678.0, "step": 169 }, { "epoch": 0.3437815975733064, "grad_norm": 0.23008394241333008, "learning_rate": 0.00039345348157755213, "loss": 0.3533, "mean_token_accuracy": 0.8763989768922329, "num_tokens": 11609063.0, "step": 170 }, { "epoch": 0.34580384226491406, "grad_norm": 0.2060030996799469, "learning_rate": 0.0003933481246073973, "loss": 0.3399, "mean_token_accuracy": 0.8879686929285526, "num_tokens": 11673330.0, "step": 171 }, { "epoch": 0.34782608695652173, "grad_norm": 0.17570629715919495, "learning_rate": 0.0003932419425616565, "loss": 0.3454, "mean_token_accuracy": 0.8838200494647026, "num_tokens": 11740475.0, "step": 172 }, { "epoch": 0.34984833164812945, "grad_norm": 0.16710588335990906, "learning_rate": 0.0003931349359457187, "loss": 0.2969, "mean_token_accuracy": 0.899805661290884, "num_tokens": 11806954.0, "step": 173 }, { "epoch": 0.3518705763397371, "grad_norm": 0.20197796821594238, "learning_rate": 0.0003930271052688974, "loss": 0.3525, "mean_token_accuracy": 0.8779477626085281, "num_tokens": 11870286.0, "step": 174 }, { "epoch": 0.3538928210313448, "grad_norm": 0.17107857763767242, "learning_rate": 0.0003929184510444284, "loss": 0.3266, "mean_token_accuracy": 0.8888569958508015, "num_tokens": 11947117.0, "step": 175 }, { "epoch": 0.35591506572295245, "grad_norm": 0.17827239632606506, "learning_rate": 0.0003928089737894672, "loss": 0.3252, "mean_token_accuracy": 0.8897545039653778, "num_tokens": 12009582.0, "step": 176 }, { "epoch": 0.3579373104145602, "grad_norm": 0.22990773618221283, "learning_rate": 0.00039269867402508675, "loss": 0.3549, "mean_token_accuracy": 0.8815719597041607, "num_tokens": 12072827.0, "step": 177 }, { "epoch": 0.35995955510616784, "grad_norm": 0.19108358025550842, "learning_rate": 0.00039258755227627475, "loss": 0.3549, "mean_token_accuracy": 0.8812212906777859, "num_tokens": 12141736.0, "step": 178 }, { "epoch": 0.3619817997977755, "grad_norm": 0.19387130439281464, "learning_rate": 0.0003924756090719314, "loss": 0.3057, "mean_token_accuracy": 0.8937871865928173, "num_tokens": 12212850.0, "step": 179 }, { "epoch": 0.3640040444893832, "grad_norm": 0.19616757333278656, "learning_rate": 0.0003923628449448666, "loss": 0.3337, "mean_token_accuracy": 0.8879410326480865, "num_tokens": 12278676.0, "step": 180 }, { "epoch": 0.3660262891809909, "grad_norm": 0.19950613379478455, "learning_rate": 0.0003922492604317976, "loss": 0.333, "mean_token_accuracy": 0.8837904818356037, "num_tokens": 12344019.0, "step": 181 }, { "epoch": 0.36804853387259856, "grad_norm": 0.18320327997207642, "learning_rate": 0.0003921348560733464, "loss": 0.3379, "mean_token_accuracy": 0.8864001519978046, "num_tokens": 12414279.0, "step": 182 }, { "epoch": 0.3700707785642063, "grad_norm": 0.19148240983486176, "learning_rate": 0.0003920196324140371, "loss": 0.3438, "mean_token_accuracy": 0.8869296424090862, "num_tokens": 12481557.0, "step": 183 }, { "epoch": 0.37209302325581395, "grad_norm": 0.16867059469223022, "learning_rate": 0.00039190359000229364, "loss": 0.3347, "mean_token_accuracy": 0.8817239366471767, "num_tokens": 12552783.0, "step": 184 }, { "epoch": 0.3741152679474216, "grad_norm": 0.20269234478473663, "learning_rate": 0.0003917867293904365, "loss": 0.3599, "mean_token_accuracy": 0.8779093511402607, "num_tokens": 12611751.0, "step": 185 }, { "epoch": 0.37613751263902934, "grad_norm": 0.1963576078414917, "learning_rate": 0.0003916690511346809, "loss": 0.3219, "mean_token_accuracy": 0.8882619775831699, "num_tokens": 12674136.0, "step": 186 }, { "epoch": 0.378159757330637, "grad_norm": 0.1874200403690338, "learning_rate": 0.0003915505557951335, "loss": 0.2945, "mean_token_accuracy": 0.8926714062690735, "num_tokens": 12754627.0, "step": 187 }, { "epoch": 0.38018200202224467, "grad_norm": 0.21084272861480713, "learning_rate": 0.0003914312439357901, "loss": 0.3492, "mean_token_accuracy": 0.8815909698605537, "num_tokens": 12812878.0, "step": 188 }, { "epoch": 0.3822042467138524, "grad_norm": 0.21426641941070557, "learning_rate": 0.00039131111612453293, "loss": 0.3226, "mean_token_accuracy": 0.8860650397837162, "num_tokens": 12876950.0, "step": 189 }, { "epoch": 0.38422649140546006, "grad_norm": 0.1843956857919693, "learning_rate": 0.0003911901729331277, "loss": 0.3012, "mean_token_accuracy": 0.8955246210098267, "num_tokens": 12940008.0, "step": 190 }, { "epoch": 0.3862487360970677, "grad_norm": 0.16776444017887115, "learning_rate": 0.00039106841493722103, "loss": 0.2915, "mean_token_accuracy": 0.8939312994480133, "num_tokens": 13011277.0, "step": 191 }, { "epoch": 0.38827098078867545, "grad_norm": 0.21435709297657013, "learning_rate": 0.0003909458427163379, "loss": 0.3297, "mean_token_accuracy": 0.8883927799761295, "num_tokens": 13076795.0, "step": 192 }, { "epoch": 0.3902932254802831, "grad_norm": 0.18475346267223358, "learning_rate": 0.00039082245685387855, "loss": 0.3322, "mean_token_accuracy": 0.8888528421521187, "num_tokens": 13142952.0, "step": 193 }, { "epoch": 0.3923154701718908, "grad_norm": 0.19243639707565308, "learning_rate": 0.00039069825793711587, "loss": 0.3213, "mean_token_accuracy": 0.8921789862215519, "num_tokens": 13211022.0, "step": 194 }, { "epoch": 0.3943377148634985, "grad_norm": 0.1858910322189331, "learning_rate": 0.0003905732465571928, "loss": 0.3179, "mean_token_accuracy": 0.8920286670327187, "num_tokens": 13276701.0, "step": 195 }, { "epoch": 0.39635995955510617, "grad_norm": 0.20470379292964935, "learning_rate": 0.0003904474233091191, "loss": 0.3189, "mean_token_accuracy": 0.8954358175396919, "num_tokens": 13344684.0, "step": 196 }, { "epoch": 0.39838220424671383, "grad_norm": 0.18819299340248108, "learning_rate": 0.00039032078879176865, "loss": 0.3447, "mean_token_accuracy": 0.8849571086466312, "num_tokens": 13409885.0, "step": 197 }, { "epoch": 0.40040444893832156, "grad_norm": 0.17828333377838135, "learning_rate": 0.00039019334360787706, "loss": 0.324, "mean_token_accuracy": 0.8868827521800995, "num_tokens": 13473352.0, "step": 198 }, { "epoch": 0.4024266936299292, "grad_norm": 0.18609419465065002, "learning_rate": 0.0003900650883640381, "loss": 0.285, "mean_token_accuracy": 0.8940243273973465, "num_tokens": 13540264.0, "step": 199 }, { "epoch": 0.4044489383215369, "grad_norm": 0.18147540092468262, "learning_rate": 0.0003899360236707012, "loss": 0.3077, "mean_token_accuracy": 0.8837394788861275, "num_tokens": 13610806.0, "step": 200 }, { "epoch": 0.4064711830131446, "grad_norm": 0.19080513715744019, "learning_rate": 0.00038980615014216853, "loss": 0.3241, "mean_token_accuracy": 0.8904240913689137, "num_tokens": 13669371.0, "step": 201 }, { "epoch": 0.4084934277047523, "grad_norm": 0.16377419233322144, "learning_rate": 0.00038967546839659215, "loss": 0.3149, "mean_token_accuracy": 0.8902618512511253, "num_tokens": 13745941.0, "step": 202 }, { "epoch": 0.41051567239635994, "grad_norm": 0.19631735980510712, "learning_rate": 0.00038954397905597063, "loss": 0.3459, "mean_token_accuracy": 0.8863471113145351, "num_tokens": 13818760.0, "step": 203 }, { "epoch": 0.41253791708796766, "grad_norm": 0.18512631952762604, "learning_rate": 0.00038941168274614677, "loss": 0.3168, "mean_token_accuracy": 0.8905623555183411, "num_tokens": 13889651.0, "step": 204 }, { "epoch": 0.41456016177957533, "grad_norm": 0.21926718950271606, "learning_rate": 0.00038927858009680394, "loss": 0.3006, "mean_token_accuracy": 0.8961901552975178, "num_tokens": 13949554.0, "step": 205 }, { "epoch": 0.416582406471183, "grad_norm": 0.17943674325942993, "learning_rate": 0.0003891446717414635, "loss": 0.3066, "mean_token_accuracy": 0.8922952748835087, "num_tokens": 14021083.0, "step": 206 }, { "epoch": 0.4186046511627907, "grad_norm": 0.1913203001022339, "learning_rate": 0.0003890099583174819, "loss": 0.3209, "mean_token_accuracy": 0.8909911513328552, "num_tokens": 14093782.0, "step": 207 }, { "epoch": 0.4206268958543984, "grad_norm": 0.19808340072631836, "learning_rate": 0.0003888744404660472, "loss": 0.3338, "mean_token_accuracy": 0.884627778083086, "num_tokens": 14159998.0, "step": 208 }, { "epoch": 0.42264914054600605, "grad_norm": 0.17383399605751038, "learning_rate": 0.0003887381188321762, "loss": 0.3153, "mean_token_accuracy": 0.8930625729262829, "num_tokens": 14232551.0, "step": 209 }, { "epoch": 0.4246713852376138, "grad_norm": 0.17710869014263153, "learning_rate": 0.0003886009940647116, "loss": 0.3296, "mean_token_accuracy": 0.8819810189306736, "num_tokens": 14298797.0, "step": 210 }, { "epoch": 0.42669362992922144, "grad_norm": 0.1701733022928238, "learning_rate": 0.0003884630668163186, "loss": 0.3043, "mean_token_accuracy": 0.8916125111281872, "num_tokens": 14367264.0, "step": 211 }, { "epoch": 0.4287158746208291, "grad_norm": 0.16983942687511444, "learning_rate": 0.0003883243377434821, "loss": 0.336, "mean_token_accuracy": 0.8859187439084053, "num_tokens": 14434405.0, "step": 212 }, { "epoch": 0.43073811931243683, "grad_norm": 0.22955253720283508, "learning_rate": 0.0003881848075065032, "loss": 0.331, "mean_token_accuracy": 0.89054074883461, "num_tokens": 14504097.0, "step": 213 }, { "epoch": 0.4327603640040445, "grad_norm": 0.1674816757440567, "learning_rate": 0.0003880444767694963, "loss": 0.3292, "mean_token_accuracy": 0.8852434195578098, "num_tokens": 14576012.0, "step": 214 }, { "epoch": 0.43478260869565216, "grad_norm": 0.23137012124061584, "learning_rate": 0.00038790334620038606, "loss": 0.3293, "mean_token_accuracy": 0.8874834440648556, "num_tokens": 14641442.0, "step": 215 }, { "epoch": 0.4368048533872599, "grad_norm": 0.1810149997472763, "learning_rate": 0.00038776141647090375, "loss": 0.3359, "mean_token_accuracy": 0.8845292665064335, "num_tokens": 14701016.0, "step": 216 }, { "epoch": 0.43882709807886755, "grad_norm": 0.19873689115047455, "learning_rate": 0.00038761868825658465, "loss": 0.3275, "mean_token_accuracy": 0.8850444070994854, "num_tokens": 14762543.0, "step": 217 }, { "epoch": 0.4408493427704752, "grad_norm": 0.16571380198001862, "learning_rate": 0.00038747516223676447, "loss": 0.3097, "mean_token_accuracy": 0.8963964283466339, "num_tokens": 14837183.0, "step": 218 }, { "epoch": 0.44287158746208294, "grad_norm": 0.18150104582309723, "learning_rate": 0.00038733083909457607, "loss": 0.3066, "mean_token_accuracy": 0.891868706792593, "num_tokens": 14909675.0, "step": 219 }, { "epoch": 0.4448938321536906, "grad_norm": 0.2008552849292755, "learning_rate": 0.00038718571951694636, "loss": 0.3397, "mean_token_accuracy": 0.881518941372633, "num_tokens": 14974075.0, "step": 220 }, { "epoch": 0.44691607684529827, "grad_norm": 0.20857571065425873, "learning_rate": 0.00038703980419459323, "loss": 0.3251, "mean_token_accuracy": 0.8877891451120377, "num_tokens": 15044109.0, "step": 221 }, { "epoch": 0.448938321536906, "grad_norm": 0.1777462363243103, "learning_rate": 0.00038689309382202174, "loss": 0.3017, "mean_token_accuracy": 0.8944090716540813, "num_tokens": 15114045.0, "step": 222 }, { "epoch": 0.45096056622851366, "grad_norm": 0.16797004640102386, "learning_rate": 0.0003867455890975213, "loss": 0.2901, "mean_token_accuracy": 0.8903030268847942, "num_tokens": 15184412.0, "step": 223 }, { "epoch": 0.4529828109201213, "grad_norm": 0.26226508617401123, "learning_rate": 0.00038659729072316193, "loss": 0.356, "mean_token_accuracy": 0.8832045011222363, "num_tokens": 15245581.0, "step": 224 }, { "epoch": 0.455005055611729, "grad_norm": 0.16607579588890076, "learning_rate": 0.00038644819940479146, "loss": 0.3148, "mean_token_accuracy": 0.8910624943673611, "num_tokens": 15315013.0, "step": 225 }, { "epoch": 0.4570273003033367, "grad_norm": 0.15852072834968567, "learning_rate": 0.00038629831585203163, "loss": 0.2908, "mean_token_accuracy": 0.8945996090769768, "num_tokens": 15398701.0, "step": 226 }, { "epoch": 0.4590495449949444, "grad_norm": 0.17531050741672516, "learning_rate": 0.000386147640778275, "loss": 0.2748, "mean_token_accuracy": 0.9027018919587135, "num_tokens": 15490499.0, "step": 227 }, { "epoch": 0.46107178968655205, "grad_norm": 0.16767503321170807, "learning_rate": 0.00038599617490068134, "loss": 0.3044, "mean_token_accuracy": 0.8939338177442551, "num_tokens": 15556168.0, "step": 228 }, { "epoch": 0.46309403437815977, "grad_norm": 0.211036816239357, "learning_rate": 0.0003858439189401747, "loss": 0.3207, "mean_token_accuracy": 0.8899048455059528, "num_tokens": 15622005.0, "step": 229 }, { "epoch": 0.46511627906976744, "grad_norm": 0.16442608833312988, "learning_rate": 0.0003856908736214393, "loss": 0.3191, "mean_token_accuracy": 0.8901388570666313, "num_tokens": 15693753.0, "step": 230 }, { "epoch": 0.4671385237613751, "grad_norm": 0.15724638104438782, "learning_rate": 0.0003855370396729166, "loss": 0.2836, "mean_token_accuracy": 0.9016358070075512, "num_tokens": 15763494.0, "step": 231 }, { "epoch": 0.4691607684529828, "grad_norm": 0.18976381421089172, "learning_rate": 0.0003853824178268017, "loss": 0.3205, "mean_token_accuracy": 0.8904677703976631, "num_tokens": 15833863.0, "step": 232 }, { "epoch": 0.4711830131445905, "grad_norm": 0.14951825141906738, "learning_rate": 0.00038522700881903966, "loss": 0.2525, "mean_token_accuracy": 0.8994054794311523, "num_tokens": 15911573.0, "step": 233 }, { "epoch": 0.47320525783619816, "grad_norm": 0.19189335405826569, "learning_rate": 0.0003850708133893223, "loss": 0.3223, "mean_token_accuracy": 0.8889148533344269, "num_tokens": 15973006.0, "step": 234 }, { "epoch": 0.4752275025278059, "grad_norm": 0.15495674312114716, "learning_rate": 0.0003849138322810845, "loss": 0.3025, "mean_token_accuracy": 0.8922797180712223, "num_tokens": 16044921.0, "step": 235 }, { "epoch": 0.47724974721941354, "grad_norm": 0.1728491634130478, "learning_rate": 0.00038475606624150055, "loss": 0.3094, "mean_token_accuracy": 0.8931614607572556, "num_tokens": 16116096.0, "step": 236 }, { "epoch": 0.4792719919110212, "grad_norm": 0.1603267937898636, "learning_rate": 0.0003845975160214808, "loss": 0.3235, "mean_token_accuracy": 0.8852398991584778, "num_tokens": 16184529.0, "step": 237 }, { "epoch": 0.48129423660262893, "grad_norm": 0.16991828382015228, "learning_rate": 0.00038443818237566814, "loss": 0.2902, "mean_token_accuracy": 0.892944622784853, "num_tokens": 16253758.0, "step": 238 }, { "epoch": 0.4833164812942366, "grad_norm": 0.17524850368499756, "learning_rate": 0.0003842780660624343, "loss": 0.3227, "mean_token_accuracy": 0.8884528502821922, "num_tokens": 16320041.0, "step": 239 }, { "epoch": 0.48533872598584427, "grad_norm": 0.18329283595085144, "learning_rate": 0.00038411716784387596, "loss": 0.313, "mean_token_accuracy": 0.8975342884659767, "num_tokens": 16392051.0, "step": 240 }, { "epoch": 0.487360970677452, "grad_norm": 0.1628371924161911, "learning_rate": 0.00038395548848581165, "loss": 0.2817, "mean_token_accuracy": 0.9011796675622463, "num_tokens": 16462030.0, "step": 241 }, { "epoch": 0.48938321536905965, "grad_norm": 0.18072479963302612, "learning_rate": 0.0003837930287577778, "loss": 0.3041, "mean_token_accuracy": 0.8932337760925293, "num_tokens": 16532493.0, "step": 242 }, { "epoch": 0.4914054600606673, "grad_norm": 0.2059275507926941, "learning_rate": 0.000383629789433025, "loss": 0.3281, "mean_token_accuracy": 0.8870198056101799, "num_tokens": 16590133.0, "step": 243 }, { "epoch": 0.49342770475227504, "grad_norm": 0.19713951647281647, "learning_rate": 0.00038346577128851465, "loss": 0.3235, "mean_token_accuracy": 0.8893256969749928, "num_tokens": 16655566.0, "step": 244 }, { "epoch": 0.4954499494438827, "grad_norm": 0.16556710004806519, "learning_rate": 0.00038330097510491483, "loss": 0.3148, "mean_token_accuracy": 0.8895911388099194, "num_tokens": 16718728.0, "step": 245 }, { "epoch": 0.4974721941354904, "grad_norm": 0.1870684027671814, "learning_rate": 0.000383135401666597, "loss": 0.329, "mean_token_accuracy": 0.8862268440425396, "num_tokens": 16776165.0, "step": 246 }, { "epoch": 0.4994944388270981, "grad_norm": 0.18431027233600616, "learning_rate": 0.00038296905176163197, "loss": 0.3143, "mean_token_accuracy": 0.8902600333094597, "num_tokens": 16835743.0, "step": 247 }, { "epoch": 0.5015166835187057, "grad_norm": 0.18272148072719574, "learning_rate": 0.0003828019261817863, "loss": 0.3243, "mean_token_accuracy": 0.8864033743739128, "num_tokens": 16899775.0, "step": 248 }, { "epoch": 0.5035389282103134, "grad_norm": 0.1712082177400589, "learning_rate": 0.0003826340257225184, "loss": 0.324, "mean_token_accuracy": 0.8914847373962402, "num_tokens": 16972506.0, "step": 249 }, { "epoch": 0.5055611729019212, "grad_norm": 0.18500936031341553, "learning_rate": 0.00038246535118297497, "loss": 0.3006, "mean_token_accuracy": 0.8903259225189686, "num_tokens": 17036215.0, "step": 250 }, { "epoch": 0.5075834175935288, "grad_norm": 0.19614791870117188, "learning_rate": 0.00038229590336598694, "loss": 0.3176, "mean_token_accuracy": 0.8885915465652943, "num_tokens": 17099060.0, "step": 251 }, { "epoch": 0.5096056622851365, "grad_norm": 0.20587585866451263, "learning_rate": 0.0003821256830780658, "loss": 0.3252, "mean_token_accuracy": 0.8900357261300087, "num_tokens": 17160737.0, "step": 252 }, { "epoch": 0.5116279069767442, "grad_norm": 0.16274958848953247, "learning_rate": 0.0003819546911293999, "loss": 0.3065, "mean_token_accuracy": 0.8940119668841362, "num_tokens": 17228903.0, "step": 253 }, { "epoch": 0.5136501516683518, "grad_norm": 0.16572465002536774, "learning_rate": 0.0003817829283338501, "loss": 0.3011, "mean_token_accuracy": 0.8989259153604507, "num_tokens": 17309457.0, "step": 254 }, { "epoch": 0.5156723963599595, "grad_norm": 0.21092504262924194, "learning_rate": 0.0003816103955089464, "loss": 0.3645, "mean_token_accuracy": 0.8738524205982685, "num_tokens": 17371710.0, "step": 255 }, { "epoch": 0.5176946410515673, "grad_norm": 0.1776529848575592, "learning_rate": 0.0003814370934758839, "loss": 0.3413, "mean_token_accuracy": 0.8868374638259411, "num_tokens": 17445132.0, "step": 256 }, { "epoch": 0.5197168857431749, "grad_norm": 0.1718549132347107, "learning_rate": 0.0003812630230595188, "loss": 0.3472, "mean_token_accuracy": 0.8835309036076069, "num_tokens": 17511865.0, "step": 257 }, { "epoch": 0.5217391304347826, "grad_norm": 0.17998023331165314, "learning_rate": 0.0003810881850883645, "loss": 0.35, "mean_token_accuracy": 0.8810900300741196, "num_tokens": 17579299.0, "step": 258 }, { "epoch": 0.5237613751263903, "grad_norm": 0.15693029761314392, "learning_rate": 0.0003809125803945878, "loss": 0.2853, "mean_token_accuracy": 0.8982386291027069, "num_tokens": 17651748.0, "step": 259 }, { "epoch": 0.5257836198179979, "grad_norm": 0.18085962533950806, "learning_rate": 0.00038073620981400467, "loss": 0.2962, "mean_token_accuracy": 0.898784764111042, "num_tokens": 17716859.0, "step": 260 }, { "epoch": 0.5278058645096056, "grad_norm": 0.2137887328863144, "learning_rate": 0.00038055907418607654, "loss": 0.3485, "mean_token_accuracy": 0.8793986700475216, "num_tokens": 17776486.0, "step": 261 }, { "epoch": 0.5298281092012134, "grad_norm": 0.169187992811203, "learning_rate": 0.0003803811743539062, "loss": 0.3093, "mean_token_accuracy": 0.8887566514313221, "num_tokens": 17844621.0, "step": 262 }, { "epoch": 0.531850353892821, "grad_norm": 0.1435088813304901, "learning_rate": 0.0003802025111642338, "loss": 0.2623, "mean_token_accuracy": 0.9051036462187767, "num_tokens": 17930557.0, "step": 263 }, { "epoch": 0.5338725985844287, "grad_norm": 0.1761457622051239, "learning_rate": 0.00038002308546743256, "loss": 0.3008, "mean_token_accuracy": 0.8946518003940582, "num_tokens": 17999603.0, "step": 264 }, { "epoch": 0.5358948432760364, "grad_norm": 0.17682158946990967, "learning_rate": 0.0003798428981175053, "loss": 0.3043, "mean_token_accuracy": 0.8925192318856716, "num_tokens": 18071957.0, "step": 265 }, { "epoch": 0.537917087967644, "grad_norm": 0.18640998005867004, "learning_rate": 0.0003796619499720799, "loss": 0.3145, "mean_token_accuracy": 0.8919526562094688, "num_tokens": 18141501.0, "step": 266 }, { "epoch": 0.5399393326592518, "grad_norm": 0.1694413125514984, "learning_rate": 0.0003794802418924054, "loss": 0.299, "mean_token_accuracy": 0.8966234587132931, "num_tokens": 18215962.0, "step": 267 }, { "epoch": 0.5419615773508595, "grad_norm": 0.1720503866672516, "learning_rate": 0.00037929777474334756, "loss": 0.3269, "mean_token_accuracy": 0.8884270638227463, "num_tokens": 18282357.0, "step": 268 }, { "epoch": 0.5439838220424671, "grad_norm": 0.19224666059017181, "learning_rate": 0.0003791145493933855, "loss": 0.3477, "mean_token_accuracy": 0.8821601495146751, "num_tokens": 18347587.0, "step": 269 }, { "epoch": 0.5460060667340748, "grad_norm": 0.1664774864912033, "learning_rate": 0.0003789305667146069, "loss": 0.3009, "mean_token_accuracy": 0.8948215469717979, "num_tokens": 18415368.0, "step": 270 }, { "epoch": 0.5480283114256825, "grad_norm": 0.18322114646434784, "learning_rate": 0.0003787458275827039, "loss": 0.3195, "mean_token_accuracy": 0.8908861018717289, "num_tokens": 18482285.0, "step": 271 }, { "epoch": 0.5500505561172901, "grad_norm": 0.20983459055423737, "learning_rate": 0.00037856033287696943, "loss": 0.2945, "mean_token_accuracy": 0.8964951671659946, "num_tokens": 18540330.0, "step": 272 }, { "epoch": 0.5520728008088979, "grad_norm": 0.1815643161535263, "learning_rate": 0.00037837408348029235, "loss": 0.3159, "mean_token_accuracy": 0.8929238878190517, "num_tokens": 18604976.0, "step": 273 }, { "epoch": 0.5540950455005056, "grad_norm": 0.2073771208524704, "learning_rate": 0.00037818708027915376, "loss": 0.3244, "mean_token_accuracy": 0.8876978568732738, "num_tokens": 18672322.0, "step": 274 }, { "epoch": 0.5561172901921132, "grad_norm": 0.19434937834739685, "learning_rate": 0.00037799932416362266, "loss": 0.3111, "mean_token_accuracy": 0.8910202607512474, "num_tokens": 18735221.0, "step": 275 }, { "epoch": 0.5581395348837209, "grad_norm": 0.15825523436069489, "learning_rate": 0.00037781081602735145, "loss": 0.2758, "mean_token_accuracy": 0.8941913619637489, "num_tokens": 18815168.0, "step": 276 }, { "epoch": 0.5601617795753286, "grad_norm": 0.16185039281845093, "learning_rate": 0.00037762155676757196, "loss": 0.2978, "mean_token_accuracy": 0.89651133492589, "num_tokens": 18884062.0, "step": 277 }, { "epoch": 0.5621840242669363, "grad_norm": 0.18850262463092804, "learning_rate": 0.00037743154728509123, "loss": 0.3109, "mean_token_accuracy": 0.8866820931434631, "num_tokens": 18948236.0, "step": 278 }, { "epoch": 0.564206268958544, "grad_norm": 0.1736079454421997, "learning_rate": 0.00037724078848428707, "loss": 0.28, "mean_token_accuracy": 0.9002561867237091, "num_tokens": 19017663.0, "step": 279 }, { "epoch": 0.5662285136501517, "grad_norm": 0.15573325753211975, "learning_rate": 0.0003770492812731035, "loss": 0.3072, "mean_token_accuracy": 0.891198180615902, "num_tokens": 19089120.0, "step": 280 }, { "epoch": 0.5682507583417593, "grad_norm": 0.18526090681552887, "learning_rate": 0.0003768570265630471, "loss": 0.3305, "mean_token_accuracy": 0.8860407620668411, "num_tokens": 19154650.0, "step": 281 }, { "epoch": 0.570273003033367, "grad_norm": 0.1691296249628067, "learning_rate": 0.00037666402526918195, "loss": 0.3188, "mean_token_accuracy": 0.8919213153421879, "num_tokens": 19224445.0, "step": 282 }, { "epoch": 0.5722952477249748, "grad_norm": 0.17496982216835022, "learning_rate": 0.0003764702783101259, "loss": 0.3032, "mean_token_accuracy": 0.8902747184038162, "num_tokens": 19298006.0, "step": 283 }, { "epoch": 0.5743174924165824, "grad_norm": 0.14839443564414978, "learning_rate": 0.00037627578660804565, "loss": 0.2734, "mean_token_accuracy": 0.8967320993542671, "num_tokens": 19374661.0, "step": 284 }, { "epoch": 0.5763397371081901, "grad_norm": 0.1658451408147812, "learning_rate": 0.0003760805510886527, "loss": 0.2999, "mean_token_accuracy": 0.8904417157173157, "num_tokens": 19450524.0, "step": 285 }, { "epoch": 0.5783619817997978, "grad_norm": 0.19472143054008484, "learning_rate": 0.000375884572681199, "loss": 0.3083, "mean_token_accuracy": 0.8959350101649761, "num_tokens": 19516098.0, "step": 286 }, { "epoch": 0.5803842264914054, "grad_norm": 0.17645469307899475, "learning_rate": 0.0003756878523184721, "loss": 0.3232, "mean_token_accuracy": 0.8930424600839615, "num_tokens": 19586030.0, "step": 287 }, { "epoch": 0.5824064711830131, "grad_norm": 0.16119012236595154, "learning_rate": 0.0003754903909367912, "loss": 0.2305, "mean_token_accuracy": 0.9079996608197689, "num_tokens": 19652723.0, "step": 288 }, { "epoch": 0.5844287158746209, "grad_norm": 0.1650022268295288, "learning_rate": 0.00037529218947600254, "loss": 0.2913, "mean_token_accuracy": 0.8961706385016441, "num_tokens": 19721013.0, "step": 289 }, { "epoch": 0.5864509605662285, "grad_norm": 0.1751680225133896, "learning_rate": 0.00037509324887947465, "loss": 0.2996, "mean_token_accuracy": 0.8925143517553806, "num_tokens": 19785808.0, "step": 290 }, { "epoch": 0.5884732052578362, "grad_norm": 0.14274518191814423, "learning_rate": 0.0003748935700940942, "loss": 0.3009, "mean_token_accuracy": 0.8944595381617546, "num_tokens": 19864767.0, "step": 291 }, { "epoch": 0.5904954499494439, "grad_norm": 0.19173842668533325, "learning_rate": 0.00037469315407026154, "loss": 0.3189, "mean_token_accuracy": 0.8890005201101303, "num_tokens": 19934782.0, "step": 292 }, { "epoch": 0.5925176946410515, "grad_norm": 0.16339226067066193, "learning_rate": 0.0003744920017618856, "loss": 0.2984, "mean_token_accuracy": 0.891924075782299, "num_tokens": 20004350.0, "step": 293 }, { "epoch": 0.5945399393326593, "grad_norm": 0.1845332533121109, "learning_rate": 0.0003742901141263802, "loss": 0.3233, "mean_token_accuracy": 0.8917621746659279, "num_tokens": 20073462.0, "step": 294 }, { "epoch": 0.596562184024267, "grad_norm": 0.18664658069610596, "learning_rate": 0.00037408749212465895, "loss": 0.3168, "mean_token_accuracy": 0.8909800015389919, "num_tokens": 20135987.0, "step": 295 }, { "epoch": 0.5985844287158746, "grad_norm": 0.17890143394470215, "learning_rate": 0.0003738841367211304, "loss": 0.2679, "mean_token_accuracy": 0.9026052355766296, "num_tokens": 20206699.0, "step": 296 }, { "epoch": 0.6006066734074823, "grad_norm": 0.18279992043972015, "learning_rate": 0.0003736800488836944, "loss": 0.2929, "mean_token_accuracy": 0.8961853981018066, "num_tokens": 20272267.0, "step": 297 }, { "epoch": 0.60262891809909, "grad_norm": 0.2653316557407379, "learning_rate": 0.00037347522958373664, "loss": 0.286, "mean_token_accuracy": 0.8971174284815788, "num_tokens": 20343534.0, "step": 298 }, { "epoch": 0.6046511627906976, "grad_norm": 0.15699949860572815, "learning_rate": 0.00037326967979612425, "loss": 0.2861, "mean_token_accuracy": 0.9003230258822441, "num_tokens": 20420939.0, "step": 299 }, { "epoch": 0.6066734074823054, "grad_norm": 0.17641445994377136, "learning_rate": 0.0003730634004992013, "loss": 0.3051, "mean_token_accuracy": 0.8907876797020435, "num_tokens": 20488068.0, "step": 300 }, { "epoch": 0.6086956521739131, "grad_norm": 0.1636650264263153, "learning_rate": 0.0003728563926747842, "loss": 0.2928, "mean_token_accuracy": 0.8949981555342674, "num_tokens": 20560510.0, "step": 301 }, { "epoch": 0.6107178968655207, "grad_norm": 0.18622446060180664, "learning_rate": 0.0003726486573081567, "loss": 0.3156, "mean_token_accuracy": 0.8932462483644485, "num_tokens": 20627926.0, "step": 302 }, { "epoch": 0.6127401415571284, "grad_norm": 0.18102477490901947, "learning_rate": 0.00037244019538806546, "loss": 0.2859, "mean_token_accuracy": 0.897308062762022, "num_tokens": 20695635.0, "step": 303 }, { "epoch": 0.6147623862487361, "grad_norm": 0.19487911462783813, "learning_rate": 0.00037223100790671526, "loss": 0.3232, "mean_token_accuracy": 0.8873684406280518, "num_tokens": 20764073.0, "step": 304 }, { "epoch": 0.6167846309403437, "grad_norm": 0.16768330335617065, "learning_rate": 0.0003720210958597642, "loss": 0.2856, "mean_token_accuracy": 0.8974824510514736, "num_tokens": 20834156.0, "step": 305 }, { "epoch": 0.6188068756319515, "grad_norm": 0.17184442281723022, "learning_rate": 0.00037181046024631944, "loss": 0.3167, "mean_token_accuracy": 0.8905413933098316, "num_tokens": 20906046.0, "step": 306 }, { "epoch": 0.6208291203235592, "grad_norm": 0.17979033291339874, "learning_rate": 0.0003715991020689316, "loss": 0.3166, "mean_token_accuracy": 0.8910835459828377, "num_tokens": 20969038.0, "step": 307 }, { "epoch": 0.6228513650151668, "grad_norm": 0.16872760653495789, "learning_rate": 0.0003713870223335907, "loss": 0.3023, "mean_token_accuracy": 0.8999812118709087, "num_tokens": 21054878.0, "step": 308 }, { "epoch": 0.6248736097067745, "grad_norm": 0.17098355293273926, "learning_rate": 0.00037117422204972094, "loss": 0.2918, "mean_token_accuracy": 0.9006133303046227, "num_tokens": 21120211.0, "step": 309 }, { "epoch": 0.6268958543983822, "grad_norm": 0.19943217933177948, "learning_rate": 0.00037096070223017634, "loss": 0.2992, "mean_token_accuracy": 0.8970108516514301, "num_tokens": 21193385.0, "step": 310 }, { "epoch": 0.6289180990899899, "grad_norm": 0.19835074245929718, "learning_rate": 0.0003707464638912354, "loss": 0.2987, "mean_token_accuracy": 0.8971699252724648, "num_tokens": 21258335.0, "step": 311 }, { "epoch": 0.6309403437815976, "grad_norm": 0.1647316962480545, "learning_rate": 0.0003705315080525967, "loss": 0.2877, "mean_token_accuracy": 0.8915503136813641, "num_tokens": 21328815.0, "step": 312 }, { "epoch": 0.6329625884732053, "grad_norm": 0.18789348006248474, "learning_rate": 0.00037031583573737375, "loss": 0.2973, "mean_token_accuracy": 0.8956909030675888, "num_tokens": 21408498.0, "step": 313 }, { "epoch": 0.6349848331648129, "grad_norm": 0.23517835140228271, "learning_rate": 0.0003700994479720903, "loss": 0.3022, "mean_token_accuracy": 0.8944514766335487, "num_tokens": 21477506.0, "step": 314 }, { "epoch": 0.6370070778564206, "grad_norm": 0.1805562973022461, "learning_rate": 0.00036988234578667526, "loss": 0.313, "mean_token_accuracy": 0.892850112169981, "num_tokens": 21543808.0, "step": 315 }, { "epoch": 0.6390293225480284, "grad_norm": 0.2823885679244995, "learning_rate": 0.0003696645302144582, "loss": 0.3397, "mean_token_accuracy": 0.8829572051763535, "num_tokens": 21607431.0, "step": 316 }, { "epoch": 0.641051567239636, "grad_norm": 0.19618524610996246, "learning_rate": 0.00036944600229216375, "loss": 0.3164, "mean_token_accuracy": 0.8882573507726192, "num_tokens": 21675489.0, "step": 317 }, { "epoch": 0.6430738119312437, "grad_norm": 0.19782759249210358, "learning_rate": 0.00036922676305990753, "loss": 0.3211, "mean_token_accuracy": 0.8908263929188251, "num_tokens": 21739400.0, "step": 318 }, { "epoch": 0.6450960566228514, "grad_norm": 0.20694133639335632, "learning_rate": 0.00036900681356119043, "loss": 0.2927, "mean_token_accuracy": 0.8931123651564121, "num_tokens": 21807454.0, "step": 319 }, { "epoch": 0.647118301314459, "grad_norm": 0.16246715188026428, "learning_rate": 0.00036878615484289395, "loss": 0.3095, "mean_token_accuracy": 0.8925521671772003, "num_tokens": 21883534.0, "step": 320 }, { "epoch": 0.6491405460060667, "grad_norm": 0.1689622849225998, "learning_rate": 0.0003685647879552755, "loss": 0.3198, "mean_token_accuracy": 0.8910107761621475, "num_tokens": 21954057.0, "step": 321 }, { "epoch": 0.6511627906976745, "grad_norm": 0.21298348903656006, "learning_rate": 0.0003683427139519628, "loss": 0.3098, "mean_token_accuracy": 0.8946363367140293, "num_tokens": 22024559.0, "step": 322 }, { "epoch": 0.6531850353892821, "grad_norm": 0.20307037234306335, "learning_rate": 0.00036811993388994945, "loss": 0.3042, "mean_token_accuracy": 0.8996872641146183, "num_tokens": 22083005.0, "step": 323 }, { "epoch": 0.6552072800808898, "grad_norm": 0.19622348248958588, "learning_rate": 0.00036789644882958953, "loss": 0.3106, "mean_token_accuracy": 0.8917652256786823, "num_tokens": 22153882.0, "step": 324 }, { "epoch": 0.6572295247724975, "grad_norm": 0.2048502266407013, "learning_rate": 0.00036767225983459247, "loss": 0.3072, "mean_token_accuracy": 0.892122782766819, "num_tokens": 22223638.0, "step": 325 }, { "epoch": 0.6592517694641051, "grad_norm": 0.17371125519275665, "learning_rate": 0.00036744736797201855, "loss": 0.2818, "mean_token_accuracy": 0.9024628438055515, "num_tokens": 22287424.0, "step": 326 }, { "epoch": 0.6612740141557129, "grad_norm": 0.1815844625234604, "learning_rate": 0.0003672217743122732, "loss": 0.3064, "mean_token_accuracy": 0.8945932053029537, "num_tokens": 22349842.0, "step": 327 }, { "epoch": 0.6632962588473206, "grad_norm": 0.16366587579250336, "learning_rate": 0.00036699547992910227, "loss": 0.2836, "mean_token_accuracy": 0.8982814475893974, "num_tokens": 22436553.0, "step": 328 }, { "epoch": 0.6653185035389282, "grad_norm": 0.1992887556552887, "learning_rate": 0.00036676848589958663, "loss": 0.325, "mean_token_accuracy": 0.8879561647772789, "num_tokens": 22493823.0, "step": 329 }, { "epoch": 0.6673407482305359, "grad_norm": 0.17708779871463776, "learning_rate": 0.0003665407933041375, "loss": 0.3325, "mean_token_accuracy": 0.8939349353313446, "num_tokens": 22563840.0, "step": 330 }, { "epoch": 0.6693629929221436, "grad_norm": 0.2144147753715515, "learning_rate": 0.00036631240322649076, "loss": 0.3335, "mean_token_accuracy": 0.8810755871236324, "num_tokens": 22624256.0, "step": 331 }, { "epoch": 0.6713852376137512, "grad_norm": 0.16541875898838043, "learning_rate": 0.0003660833167537022, "loss": 0.3275, "mean_token_accuracy": 0.8926926329731941, "num_tokens": 22694170.0, "step": 332 }, { "epoch": 0.673407482305359, "grad_norm": 0.1698412150144577, "learning_rate": 0.00036585353497614224, "loss": 0.3066, "mean_token_accuracy": 0.8967249467968941, "num_tokens": 22768633.0, "step": 333 }, { "epoch": 0.6754297269969667, "grad_norm": 0.1821826696395874, "learning_rate": 0.00036562305898749054, "loss": 0.3208, "mean_token_accuracy": 0.886600024998188, "num_tokens": 22837600.0, "step": 334 }, { "epoch": 0.6774519716885743, "grad_norm": 0.1860353797674179, "learning_rate": 0.00036539188988473113, "loss": 0.3098, "mean_token_accuracy": 0.8903123624622822, "num_tokens": 22896567.0, "step": 335 }, { "epoch": 0.679474216380182, "grad_norm": 0.14535972476005554, "learning_rate": 0.0003651600287681469, "loss": 0.2686, "mean_token_accuracy": 0.9052710346877575, "num_tokens": 22973060.0, "step": 336 }, { "epoch": 0.6814964610717897, "grad_norm": 0.19355034828186035, "learning_rate": 0.0003649274767413145, "loss": 0.2877, "mean_token_accuracy": 0.8924892544746399, "num_tokens": 23043913.0, "step": 337 }, { "epoch": 0.6835187057633973, "grad_norm": 0.185837984085083, "learning_rate": 0.00036469423491109913, "loss": 0.2982, "mean_token_accuracy": 0.8957228772342205, "num_tokens": 23114457.0, "step": 338 }, { "epoch": 0.6855409504550051, "grad_norm": 0.18406859040260315, "learning_rate": 0.0003644603043876492, "loss": 0.3066, "mean_token_accuracy": 0.8950929716229439, "num_tokens": 23181146.0, "step": 339 }, { "epoch": 0.6875631951466128, "grad_norm": 0.19715051352977753, "learning_rate": 0.00036422568628439095, "loss": 0.307, "mean_token_accuracy": 0.8925964459776878, "num_tokens": 23245269.0, "step": 340 }, { "epoch": 0.6895854398382204, "grad_norm": 0.18601888418197632, "learning_rate": 0.0003639903817180233, "loss": 0.2756, "mean_token_accuracy": 0.8994149342179298, "num_tokens": 23321463.0, "step": 341 }, { "epoch": 0.6916076845298281, "grad_norm": 0.18005859851837158, "learning_rate": 0.0003637543918085127, "loss": 0.2958, "mean_token_accuracy": 0.8977576531469822, "num_tokens": 23385849.0, "step": 342 }, { "epoch": 0.6936299292214358, "grad_norm": 0.19051752984523773, "learning_rate": 0.00036351771767908727, "loss": 0.3074, "mean_token_accuracy": 0.8928764685988426, "num_tokens": 23456847.0, "step": 343 }, { "epoch": 0.6956521739130435, "grad_norm": 0.20482131838798523, "learning_rate": 0.0003632803604562319, "loss": 0.3029, "mean_token_accuracy": 0.8909181989729404, "num_tokens": 23526730.0, "step": 344 }, { "epoch": 0.6976744186046512, "grad_norm": 0.17358487844467163, "learning_rate": 0.00036304232126968295, "loss": 0.2844, "mean_token_accuracy": 0.898325003683567, "num_tokens": 23588681.0, "step": 345 }, { "epoch": 0.6996966632962589, "grad_norm": 0.1700018048286438, "learning_rate": 0.00036280360125242234, "loss": 0.2733, "mean_token_accuracy": 0.9010062254965305, "num_tokens": 23664445.0, "step": 346 }, { "epoch": 0.7017189079878665, "grad_norm": 0.193056121468544, "learning_rate": 0.0003625642015406727, "loss": 0.3102, "mean_token_accuracy": 0.8916714228689671, "num_tokens": 23738941.0, "step": 347 }, { "epoch": 0.7037411526794742, "grad_norm": 0.19169779121875763, "learning_rate": 0.0003623241232738919, "loss": 0.2957, "mean_token_accuracy": 0.8949874453246593, "num_tokens": 23801979.0, "step": 348 }, { "epoch": 0.7057633973710818, "grad_norm": 0.16655734181404114, "learning_rate": 0.00036208336759476704, "loss": 0.2937, "mean_token_accuracy": 0.896770391613245, "num_tokens": 23868193.0, "step": 349 }, { "epoch": 0.7077856420626896, "grad_norm": 0.15496356785297394, "learning_rate": 0.0003618419356492099, "loss": 0.2871, "mean_token_accuracy": 0.9015951566398144, "num_tokens": 23947204.0, "step": 350 }, { "epoch": 0.7098078867542973, "grad_norm": 0.160264790058136, "learning_rate": 0.00036159982858635105, "loss": 0.2825, "mean_token_accuracy": 0.9006201699376106, "num_tokens": 24021149.0, "step": 351 }, { "epoch": 0.7118301314459049, "grad_norm": 0.16146975755691528, "learning_rate": 0.00036135704755853407, "loss": 0.2757, "mean_token_accuracy": 0.9038827978074551, "num_tokens": 24092549.0, "step": 352 }, { "epoch": 0.7138523761375126, "grad_norm": 0.20805270969867706, "learning_rate": 0.0003611135937213106, "loss": 0.3267, "mean_token_accuracy": 0.8861317448318005, "num_tokens": 24157474.0, "step": 353 }, { "epoch": 0.7158746208291203, "grad_norm": 0.16421623528003693, "learning_rate": 0.0003608694682334345, "loss": 0.2935, "mean_token_accuracy": 0.8962382674217224, "num_tokens": 24230461.0, "step": 354 }, { "epoch": 0.717896865520728, "grad_norm": 0.1796526312828064, "learning_rate": 0.0003606246722568566, "loss": 0.2841, "mean_token_accuracy": 0.8999650441110134, "num_tokens": 24296781.0, "step": 355 }, { "epoch": 0.7199191102123357, "grad_norm": 0.18790611624717712, "learning_rate": 0.0003603792069567187, "loss": 0.3496, "mean_token_accuracy": 0.8827480934560299, "num_tokens": 24361770.0, "step": 356 }, { "epoch": 0.7219413549039434, "grad_norm": 0.16473916172981262, "learning_rate": 0.00036013307350134884, "loss": 0.314, "mean_token_accuracy": 0.8960560448467731, "num_tokens": 24432956.0, "step": 357 }, { "epoch": 0.723963599595551, "grad_norm": 0.17466352880001068, "learning_rate": 0.0003598862730622548, "loss": 0.3113, "mean_token_accuracy": 0.8914259672164917, "num_tokens": 24499417.0, "step": 358 }, { "epoch": 0.7259858442871587, "grad_norm": 0.17617358267307281, "learning_rate": 0.0003596388068141191, "loss": 0.2961, "mean_token_accuracy": 0.893797617405653, "num_tokens": 24567238.0, "step": 359 }, { "epoch": 0.7280080889787665, "grad_norm": 0.18195107579231262, "learning_rate": 0.0003593906759347934, "loss": 0.318, "mean_token_accuracy": 0.8848773874342442, "num_tokens": 24634769.0, "step": 360 }, { "epoch": 0.7300303336703741, "grad_norm": 0.16119951009750366, "learning_rate": 0.00035914188160529267, "loss": 0.2863, "mean_token_accuracy": 0.89824278652668, "num_tokens": 24701688.0, "step": 361 }, { "epoch": 0.7320525783619818, "grad_norm": 0.1530333161354065, "learning_rate": 0.00035889242500978966, "loss": 0.2737, "mean_token_accuracy": 0.901515819132328, "num_tokens": 24778487.0, "step": 362 }, { "epoch": 0.7340748230535895, "grad_norm": 0.172471821308136, "learning_rate": 0.0003586423073356092, "loss": 0.3, "mean_token_accuracy": 0.8986642919480801, "num_tokens": 24844779.0, "step": 363 }, { "epoch": 0.7360970677451971, "grad_norm": 0.1733032464981079, "learning_rate": 0.00035839152977322275, "loss": 0.2899, "mean_token_accuracy": 0.8977354988455772, "num_tokens": 24909088.0, "step": 364 }, { "epoch": 0.7381193124368048, "grad_norm": 0.16756588220596313, "learning_rate": 0.00035814009351624256, "loss": 0.2923, "mean_token_accuracy": 0.897175993770361, "num_tokens": 24978294.0, "step": 365 }, { "epoch": 0.7401415571284126, "grad_norm": 0.1823996752500534, "learning_rate": 0.00035788799976141605, "loss": 0.3227, "mean_token_accuracy": 0.8896390423178673, "num_tokens": 25043171.0, "step": 366 }, { "epoch": 0.7421638018200202, "grad_norm": 0.18004441261291504, "learning_rate": 0.0003576352497086201, "loss": 0.2954, "mean_token_accuracy": 0.8963689431548119, "num_tokens": 25113306.0, "step": 367 }, { "epoch": 0.7441860465116279, "grad_norm": 0.19010895490646362, "learning_rate": 0.0003573818445608552, "loss": 0.3013, "mean_token_accuracy": 0.8936556875705719, "num_tokens": 25178407.0, "step": 368 }, { "epoch": 0.7462082912032356, "grad_norm": 0.2009873390197754, "learning_rate": 0.0003571277855242401, "loss": 0.3204, "mean_token_accuracy": 0.8890100382268429, "num_tokens": 25236571.0, "step": 369 }, { "epoch": 0.7482305358948432, "grad_norm": 0.17589393258094788, "learning_rate": 0.00035687307380800556, "loss": 0.3046, "mean_token_accuracy": 0.8946997821331024, "num_tokens": 25298545.0, "step": 370 }, { "epoch": 0.750252780586451, "grad_norm": 0.1642550528049469, "learning_rate": 0.00035661771062448915, "loss": 0.2808, "mean_token_accuracy": 0.8977020867168903, "num_tokens": 25371496.0, "step": 371 }, { "epoch": 0.7522750252780587, "grad_norm": 0.178288072347641, "learning_rate": 0.00035636169718912894, "loss": 0.3122, "mean_token_accuracy": 0.8912137039005756, "num_tokens": 25434070.0, "step": 372 }, { "epoch": 0.7542972699696663, "grad_norm": 0.1830630898475647, "learning_rate": 0.0003561050347204581, "loss": 0.3156, "mean_token_accuracy": 0.8928086012601852, "num_tokens": 25499661.0, "step": 373 }, { "epoch": 0.756319514661274, "grad_norm": 0.15954959392547607, "learning_rate": 0.000355847724440099, "loss": 0.281, "mean_token_accuracy": 0.896581944078207, "num_tokens": 25577844.0, "step": 374 }, { "epoch": 0.7583417593528817, "grad_norm": 0.200165256857872, "learning_rate": 0.00035558976757275716, "loss": 0.3191, "mean_token_accuracy": 0.8899872414767742, "num_tokens": 25638524.0, "step": 375 }, { "epoch": 0.7603640040444893, "grad_norm": 0.1939467191696167, "learning_rate": 0.00035533116534621596, "loss": 0.3107, "mean_token_accuracy": 0.8947361186146736, "num_tokens": 25704939.0, "step": 376 }, { "epoch": 0.7623862487360971, "grad_norm": 0.16760645806789398, "learning_rate": 0.0003550719189913302, "loss": 0.2895, "mean_token_accuracy": 0.9010074771940708, "num_tokens": 25773040.0, "step": 377 }, { "epoch": 0.7644084934277048, "grad_norm": 0.17111922800540924, "learning_rate": 0.0003548120297420204, "loss": 0.2941, "mean_token_accuracy": 0.8943174667656422, "num_tokens": 25841353.0, "step": 378 }, { "epoch": 0.7664307381193124, "grad_norm": 0.19698713719844818, "learning_rate": 0.00035455149883526746, "loss": 0.3089, "mean_token_accuracy": 0.8988425992429256, "num_tokens": 25908894.0, "step": 379 }, { "epoch": 0.7684529828109201, "grad_norm": 0.19156275689601898, "learning_rate": 0.00035429032751110596, "loss": 0.2904, "mean_token_accuracy": 0.8982725702226162, "num_tokens": 25976883.0, "step": 380 }, { "epoch": 0.7704752275025278, "grad_norm": 0.17211389541625977, "learning_rate": 0.00035402851701261874, "loss": 0.2999, "mean_token_accuracy": 0.8920269943773746, "num_tokens": 26045757.0, "step": 381 }, { "epoch": 0.7724974721941354, "grad_norm": 0.17306530475616455, "learning_rate": 0.000353766068585931, "loss": 0.301, "mean_token_accuracy": 0.8918648697435856, "num_tokens": 26118719.0, "step": 382 }, { "epoch": 0.7745197168857432, "grad_norm": 0.17627696692943573, "learning_rate": 0.00035350298348020407, "loss": 0.2979, "mean_token_accuracy": 0.8935811407864094, "num_tokens": 26183890.0, "step": 383 }, { "epoch": 0.7765419615773509, "grad_norm": 0.16283521056175232, "learning_rate": 0.0003532392629476298, "loss": 0.2819, "mean_token_accuracy": 0.895574290305376, "num_tokens": 26254712.0, "step": 384 }, { "epoch": 0.7785642062689585, "grad_norm": 0.18045000731945038, "learning_rate": 0.00035297490824342436, "loss": 0.307, "mean_token_accuracy": 0.8899386301636696, "num_tokens": 26317196.0, "step": 385 }, { "epoch": 0.7805864509605662, "grad_norm": 0.15806086361408234, "learning_rate": 0.0003527099206258223, "loss": 0.289, "mean_token_accuracy": 0.8989690914750099, "num_tokens": 26385704.0, "step": 386 }, { "epoch": 0.782608695652174, "grad_norm": 0.17871202528476715, "learning_rate": 0.0003524443013560709, "loss": 0.2968, "mean_token_accuracy": 0.8961369805037975, "num_tokens": 26453865.0, "step": 387 }, { "epoch": 0.7846309403437816, "grad_norm": 0.17596516013145447, "learning_rate": 0.0003521780516984234, "loss": 0.2849, "mean_token_accuracy": 0.8956369571387768, "num_tokens": 26519337.0, "step": 388 }, { "epoch": 0.7866531850353893, "grad_norm": 0.1952444314956665, "learning_rate": 0.00035191117292013394, "loss": 0.3073, "mean_token_accuracy": 0.8928476311266422, "num_tokens": 26590979.0, "step": 389 }, { "epoch": 0.788675429726997, "grad_norm": 0.16196580231189728, "learning_rate": 0.00035164366629145073, "loss": 0.2858, "mean_token_accuracy": 0.8969371728599072, "num_tokens": 26662280.0, "step": 390 }, { "epoch": 0.7906976744186046, "grad_norm": 0.18022611737251282, "learning_rate": 0.0003513755330856104, "loss": 0.2996, "mean_token_accuracy": 0.8949360400438309, "num_tokens": 26735704.0, "step": 391 }, { "epoch": 0.7927199191102123, "grad_norm": 0.1670723408460617, "learning_rate": 0.000351106774578832, "loss": 0.3023, "mean_token_accuracy": 0.8980297967791557, "num_tokens": 26806733.0, "step": 392 }, { "epoch": 0.7947421638018201, "grad_norm": 0.16242116689682007, "learning_rate": 0.0003508373920503108, "loss": 0.2683, "mean_token_accuracy": 0.8985998295247555, "num_tokens": 26873233.0, "step": 393 }, { "epoch": 0.7967644084934277, "grad_norm": 0.15898491442203522, "learning_rate": 0.00035056738678221176, "loss": 0.2938, "mean_token_accuracy": 0.8989557921886444, "num_tokens": 26949546.0, "step": 394 }, { "epoch": 0.7987866531850354, "grad_norm": 0.1636972278356552, "learning_rate": 0.00035029676005966445, "loss": 0.2884, "mean_token_accuracy": 0.8981003984808922, "num_tokens": 27014513.0, "step": 395 }, { "epoch": 0.8008088978766431, "grad_norm": 0.1949148327112198, "learning_rate": 0.000350025513170756, "loss": 0.3172, "mean_token_accuracy": 0.8922760672867298, "num_tokens": 27076549.0, "step": 396 }, { "epoch": 0.8028311425682507, "grad_norm": 0.18752135336399078, "learning_rate": 0.0003497536474065254, "loss": 0.3197, "mean_token_accuracy": 0.8879435993731022, "num_tokens": 27143261.0, "step": 397 }, { "epoch": 0.8048533872598584, "grad_norm": 0.18382735550403595, "learning_rate": 0.0003494811640609572, "loss": 0.3165, "mean_token_accuracy": 0.8949453271925449, "num_tokens": 27208188.0, "step": 398 }, { "epoch": 0.8068756319514662, "grad_norm": 0.1782997101545334, "learning_rate": 0.0003492080644309756, "loss": 0.3018, "mean_token_accuracy": 0.8956249915063381, "num_tokens": 27279349.0, "step": 399 }, { "epoch": 0.8088978766430738, "grad_norm": 0.16625821590423584, "learning_rate": 0.0003489343498164378, "loss": 0.2909, "mean_token_accuracy": 0.8978218026459217, "num_tokens": 27349491.0, "step": 400 }, { "epoch": 0.8109201213346815, "grad_norm": 0.2034144103527069, "learning_rate": 0.0003486600215201284, "loss": 0.3205, "mean_token_accuracy": 0.8883098587393761, "num_tokens": 27425145.0, "step": 401 }, { "epoch": 0.8129423660262892, "grad_norm": 0.18235254287719727, "learning_rate": 0.0003483850808477527, "loss": 0.3142, "mean_token_accuracy": 0.8946905098855495, "num_tokens": 27493953.0, "step": 402 }, { "epoch": 0.8149646107178968, "grad_norm": 0.16972221434116364, "learning_rate": 0.00034810952910793085, "loss": 0.3183, "mean_token_accuracy": 0.886278223246336, "num_tokens": 27559794.0, "step": 403 }, { "epoch": 0.8169868554095046, "grad_norm": 0.17891989648342133, "learning_rate": 0.00034783336761219137, "loss": 0.2848, "mean_token_accuracy": 0.8995977118611336, "num_tokens": 27629989.0, "step": 404 }, { "epoch": 0.8190091001011123, "grad_norm": 0.1790463924407959, "learning_rate": 0.0003475565976749651, "loss": 0.3109, "mean_token_accuracy": 0.8868453428149223, "num_tokens": 27688846.0, "step": 405 }, { "epoch": 0.8210313447927199, "grad_norm": 0.1789504438638687, "learning_rate": 0.00034727922061357855, "loss": 0.3284, "mean_token_accuracy": 0.8879125751554966, "num_tokens": 27755235.0, "step": 406 }, { "epoch": 0.8230535894843276, "grad_norm": 0.19450780749320984, "learning_rate": 0.0003470012377482484, "loss": 0.3079, "mean_token_accuracy": 0.8906297236680984, "num_tokens": 27819736.0, "step": 407 }, { "epoch": 0.8250758341759353, "grad_norm": 0.21135565638542175, "learning_rate": 0.0003467226504020743, "loss": 0.3314, "mean_token_accuracy": 0.8855904154479504, "num_tokens": 27878648.0, "step": 408 }, { "epoch": 0.8270980788675429, "grad_norm": 0.1756933629512787, "learning_rate": 0.0003464434599010333, "loss": 0.3045, "mean_token_accuracy": 0.8893042095005512, "num_tokens": 27937967.0, "step": 409 }, { "epoch": 0.8291203235591507, "grad_norm": 0.1893833428621292, "learning_rate": 0.0003461636675739732, "loss": 0.3089, "mean_token_accuracy": 0.8921520821750164, "num_tokens": 28003500.0, "step": 410 }, { "epoch": 0.8311425682507584, "grad_norm": 0.19579611718654633, "learning_rate": 0.0003458832747526061, "loss": 0.2954, "mean_token_accuracy": 0.8962360806763172, "num_tokens": 28060691.0, "step": 411 }, { "epoch": 0.833164812942366, "grad_norm": 0.19954101741313934, "learning_rate": 0.0003456022827715025, "loss": 0.3057, "mean_token_accuracy": 0.8955631367862225, "num_tokens": 28119842.0, "step": 412 }, { "epoch": 0.8351870576339737, "grad_norm": 0.17535583674907684, "learning_rate": 0.0003453206929680844, "loss": 0.3181, "mean_token_accuracy": 0.8896914720535278, "num_tokens": 28189519.0, "step": 413 }, { "epoch": 0.8372093023255814, "grad_norm": 0.2034400850534439, "learning_rate": 0.0003450385066826195, "loss": 0.3132, "mean_token_accuracy": 0.8903135284781456, "num_tokens": 28256532.0, "step": 414 }, { "epoch": 0.839231547017189, "grad_norm": 0.18071752786636353, "learning_rate": 0.0003447557252582145, "loss": 0.3229, "mean_token_accuracy": 0.891409307718277, "num_tokens": 28320211.0, "step": 415 }, { "epoch": 0.8412537917087968, "grad_norm": 0.17119021713733673, "learning_rate": 0.00034447235004080853, "loss": 0.3096, "mean_token_accuracy": 0.8913502097129822, "num_tokens": 28384204.0, "step": 416 }, { "epoch": 0.8432760364004045, "grad_norm": 0.17320208251476288, "learning_rate": 0.0003441883823791671, "loss": 0.2935, "mean_token_accuracy": 0.8983162231743336, "num_tokens": 28454515.0, "step": 417 }, { "epoch": 0.8452982810920121, "grad_norm": 0.17323511838912964, "learning_rate": 0.0003439038236248757, "loss": 0.3053, "mean_token_accuracy": 0.8946337774395943, "num_tokens": 28524571.0, "step": 418 }, { "epoch": 0.8473205257836198, "grad_norm": 0.19488638639450073, "learning_rate": 0.00034361867513233303, "loss": 0.3131, "mean_token_accuracy": 0.8917714729905128, "num_tokens": 28583638.0, "step": 419 }, { "epoch": 0.8493427704752275, "grad_norm": 0.14881743490695953, "learning_rate": 0.00034333293825874464, "loss": 0.2561, "mean_token_accuracy": 0.9055963829159737, "num_tokens": 28668101.0, "step": 420 }, { "epoch": 0.8513650151668352, "grad_norm": 0.17198774218559265, "learning_rate": 0.0003430466143641168, "loss": 0.3071, "mean_token_accuracy": 0.8936148509383202, "num_tokens": 28739207.0, "step": 421 }, { "epoch": 0.8533872598584429, "grad_norm": 0.18449024856090546, "learning_rate": 0.00034275970481124977, "loss": 0.276, "mean_token_accuracy": 0.9006006754934788, "num_tokens": 28803993.0, "step": 422 }, { "epoch": 0.8554095045500506, "grad_norm": 0.1962573230266571, "learning_rate": 0.0003424722109657311, "loss": 0.3059, "mean_token_accuracy": 0.8941029235720634, "num_tokens": 28870609.0, "step": 423 }, { "epoch": 0.8574317492416582, "grad_norm": 0.1821158230304718, "learning_rate": 0.00034218413419592953, "loss": 0.2905, "mean_token_accuracy": 0.890890721231699, "num_tokens": 28943383.0, "step": 424 }, { "epoch": 0.8594539939332659, "grad_norm": 0.18370237946510315, "learning_rate": 0.00034189547587298836, "loss": 0.3166, "mean_token_accuracy": 0.8925870470702648, "num_tokens": 29009040.0, "step": 425 }, { "epoch": 0.8614762386248737, "grad_norm": 0.17396995425224304, "learning_rate": 0.00034160623737081885, "loss": 0.3011, "mean_token_accuracy": 0.8914640247821808, "num_tokens": 29074553.0, "step": 426 }, { "epoch": 0.8634984833164813, "grad_norm": 0.19026698172092438, "learning_rate": 0.00034131642006609365, "loss": 0.3249, "mean_token_accuracy": 0.8910115286707878, "num_tokens": 29147204.0, "step": 427 }, { "epoch": 0.865520728008089, "grad_norm": 0.1589595079421997, "learning_rate": 0.00034102602533824027, "loss": 0.2785, "mean_token_accuracy": 0.903257142752409, "num_tokens": 29218571.0, "step": 428 }, { "epoch": 0.8675429726996967, "grad_norm": 0.1674802154302597, "learning_rate": 0.00034073505456943463, "loss": 0.2977, "mean_token_accuracy": 0.8929527476429939, "num_tokens": 29287185.0, "step": 429 }, { "epoch": 0.8695652173913043, "grad_norm": 0.17129530012607574, "learning_rate": 0.0003404435091445945, "loss": 0.2769, "mean_token_accuracy": 0.8992316760122776, "num_tokens": 29355908.0, "step": 430 }, { "epoch": 0.871587462082912, "grad_norm": 0.1718977391719818, "learning_rate": 0.00034015139045137253, "loss": 0.3137, "mean_token_accuracy": 0.8935650922358036, "num_tokens": 29421396.0, "step": 431 }, { "epoch": 0.8736097067745198, "grad_norm": 0.17011679708957672, "learning_rate": 0.00033985869988015016, "loss": 0.2855, "mean_token_accuracy": 0.8953105248510838, "num_tokens": 29493294.0, "step": 432 }, { "epoch": 0.8756319514661274, "grad_norm": 0.1868988573551178, "learning_rate": 0.0003395654388240307, "loss": 0.3196, "mean_token_accuracy": 0.8894085213541985, "num_tokens": 29555484.0, "step": 433 }, { "epoch": 0.8776541961577351, "grad_norm": 0.15462960302829742, "learning_rate": 0.0003392716086788328, "loss": 0.2957, "mean_token_accuracy": 0.8983679711818695, "num_tokens": 29623656.0, "step": 434 }, { "epoch": 0.8796764408493428, "grad_norm": 0.16427457332611084, "learning_rate": 0.0003389772108430835, "loss": 0.2979, "mean_token_accuracy": 0.8941413648426533, "num_tokens": 29690023.0, "step": 435 }, { "epoch": 0.8816986855409504, "grad_norm": 0.1687782257795334, "learning_rate": 0.00033868224671801243, "loss": 0.2573, "mean_token_accuracy": 0.9024456590414047, "num_tokens": 29756579.0, "step": 436 }, { "epoch": 0.8837209302325582, "grad_norm": 0.1602339744567871, "learning_rate": 0.00033838671770754393, "loss": 0.2829, "mean_token_accuracy": 0.9009444527328014, "num_tokens": 29823974.0, "step": 437 }, { "epoch": 0.8857431749241659, "grad_norm": 0.17867590487003326, "learning_rate": 0.00033809062521829135, "loss": 0.3058, "mean_token_accuracy": 0.8952712267637253, "num_tokens": 29896076.0, "step": 438 }, { "epoch": 0.8877654196157735, "grad_norm": 0.20030587911605835, "learning_rate": 0.0003377939706595499, "loss": 0.3275, "mean_token_accuracy": 0.8882710337638855, "num_tokens": 29959878.0, "step": 439 }, { "epoch": 0.8897876643073812, "grad_norm": 0.18861141800880432, "learning_rate": 0.00033749675544329007, "loss": 0.2941, "mean_token_accuracy": 0.894235398620367, "num_tokens": 30029419.0, "step": 440 }, { "epoch": 0.8918099089989889, "grad_norm": 0.17503049969673157, "learning_rate": 0.0003371989809841508, "loss": 0.2796, "mean_token_accuracy": 0.8981444463133812, "num_tokens": 30099980.0, "step": 441 }, { "epoch": 0.8938321536905965, "grad_norm": 0.17344842851161957, "learning_rate": 0.00033690064869943304, "loss": 0.2806, "mean_token_accuracy": 0.9028143547475338, "num_tokens": 30160123.0, "step": 442 }, { "epoch": 0.8958543983822043, "grad_norm": 0.21486879885196686, "learning_rate": 0.00033660176000909256, "loss": 0.3017, "mean_token_accuracy": 0.8955220691859722, "num_tokens": 30221040.0, "step": 443 }, { "epoch": 0.897876643073812, "grad_norm": 0.16732099652290344, "learning_rate": 0.0003363023163357335, "loss": 0.3038, "mean_token_accuracy": 0.8961573019623756, "num_tokens": 30288318.0, "step": 444 }, { "epoch": 0.8998988877654196, "grad_norm": 0.17499873042106628, "learning_rate": 0.00033600231910460153, "loss": 0.2942, "mean_token_accuracy": 0.8975008726119995, "num_tokens": 30351020.0, "step": 445 }, { "epoch": 0.9019211324570273, "grad_norm": 0.18658067286014557, "learning_rate": 0.0003357017697435771, "loss": 0.2997, "mean_token_accuracy": 0.8956367336213589, "num_tokens": 30425559.0, "step": 446 }, { "epoch": 0.9039433771486349, "grad_norm": 0.19921845197677612, "learning_rate": 0.0003354006696831685, "loss": 0.321, "mean_token_accuracy": 0.8870183601975441, "num_tokens": 30487225.0, "step": 447 }, { "epoch": 0.9059656218402427, "grad_norm": 0.15201924741268158, "learning_rate": 0.00033509902035650527, "loss": 0.2805, "mean_token_accuracy": 0.8986309170722961, "num_tokens": 30566969.0, "step": 448 }, { "epoch": 0.9079878665318504, "grad_norm": 0.14417074620723724, "learning_rate": 0.00033479682319933124, "loss": 0.2746, "mean_token_accuracy": 0.9016837328672409, "num_tokens": 30657549.0, "step": 449 }, { "epoch": 0.910010111223458, "grad_norm": 0.20164437592029572, "learning_rate": 0.00033449407964999755, "loss": 0.307, "mean_token_accuracy": 0.8908158242702484, "num_tokens": 30719396.0, "step": 450 }, { "epoch": 0.9120323559150657, "grad_norm": 0.15949569642543793, "learning_rate": 0.0003341907911494562, "loss": 0.2813, "mean_token_accuracy": 0.8971740826964378, "num_tokens": 30796942.0, "step": 451 }, { "epoch": 0.9140546006066734, "grad_norm": 0.18862098455429077, "learning_rate": 0.0003338869591412529, "loss": 0.3339, "mean_token_accuracy": 0.8874437399208546, "num_tokens": 30858913.0, "step": 452 }, { "epoch": 0.916076845298281, "grad_norm": 0.19091889262199402, "learning_rate": 0.0003335825850715203, "loss": 0.3099, "mean_token_accuracy": 0.8915912732481956, "num_tokens": 30923946.0, "step": 453 }, { "epoch": 0.9180990899898888, "grad_norm": 0.17616930603981018, "learning_rate": 0.0003332776703889708, "loss": 0.302, "mean_token_accuracy": 0.8977428935468197, "num_tokens": 30991635.0, "step": 454 }, { "epoch": 0.9201213346814965, "grad_norm": 0.16347502171993256, "learning_rate": 0.00033297221654489026, "loss": 0.2968, "mean_token_accuracy": 0.8974283151328564, "num_tokens": 31065527.0, "step": 455 }, { "epoch": 0.9221435793731041, "grad_norm": 0.15494075417518616, "learning_rate": 0.0003326662249931307, "loss": 0.2745, "mean_token_accuracy": 0.9003672078251839, "num_tokens": 31139389.0, "step": 456 }, { "epoch": 0.9241658240647118, "grad_norm": 0.14488424360752106, "learning_rate": 0.0003323596971901032, "loss": 0.2315, "mean_token_accuracy": 0.9032083451747894, "num_tokens": 31211644.0, "step": 457 }, { "epoch": 0.9261880687563195, "grad_norm": 0.17343585193157196, "learning_rate": 0.0003320526345947716, "loss": 0.2834, "mean_token_accuracy": 0.8971737772226334, "num_tokens": 31281551.0, "step": 458 }, { "epoch": 0.9282103134479271, "grad_norm": 0.20809240639209747, "learning_rate": 0.0003317450386686447, "loss": 0.3392, "mean_token_accuracy": 0.8834185339510441, "num_tokens": 31339866.0, "step": 459 }, { "epoch": 0.9302325581395349, "grad_norm": 0.1745264083147049, "learning_rate": 0.00033143691087577016, "loss": 0.3135, "mean_token_accuracy": 0.8907811567187309, "num_tokens": 31397435.0, "step": 460 }, { "epoch": 0.9322548028311426, "grad_norm": 0.19855932891368866, "learning_rate": 0.00033112825268272693, "loss": 0.2874, "mean_token_accuracy": 0.9011034667491913, "num_tokens": 31477769.0, "step": 461 }, { "epoch": 0.9342770475227502, "grad_norm": 0.18550598621368408, "learning_rate": 0.0003308190655586185, "loss": 0.3026, "mean_token_accuracy": 0.8910555392503738, "num_tokens": 31543808.0, "step": 462 }, { "epoch": 0.9362992922143579, "grad_norm": 0.17249254882335663, "learning_rate": 0.000330509350975066, "loss": 0.2988, "mean_token_accuracy": 0.8944742307066917, "num_tokens": 31608876.0, "step": 463 }, { "epoch": 0.9383215369059656, "grad_norm": 0.15075324475765228, "learning_rate": 0.0003301991104062009, "loss": 0.272, "mean_token_accuracy": 0.90623002871871, "num_tokens": 31680601.0, "step": 464 }, { "epoch": 0.9403437815975733, "grad_norm": 0.18637825548648834, "learning_rate": 0.00032988834532865827, "loss": 0.3234, "mean_token_accuracy": 0.8885620683431625, "num_tokens": 31747402.0, "step": 465 }, { "epoch": 0.942366026289181, "grad_norm": 0.1554325670003891, "learning_rate": 0.0003295770572215697, "loss": 0.2836, "mean_token_accuracy": 0.9002716057002544, "num_tokens": 31818720.0, "step": 466 }, { "epoch": 0.9443882709807887, "grad_norm": 0.17428986728191376, "learning_rate": 0.00032926524756655615, "loss": 0.2917, "mean_token_accuracy": 0.8964979350566864, "num_tokens": 31891824.0, "step": 467 }, { "epoch": 0.9464105156723963, "grad_norm": 0.16667652130126953, "learning_rate": 0.000328952917847721, "loss": 0.2742, "mean_token_accuracy": 0.901694979518652, "num_tokens": 31967670.0, "step": 468 }, { "epoch": 0.948432760364004, "grad_norm": 0.17575259506702423, "learning_rate": 0.00032864006955164287, "loss": 0.3164, "mean_token_accuracy": 0.8907586932182312, "num_tokens": 32033261.0, "step": 469 }, { "epoch": 0.9504550050556118, "grad_norm": 0.17919106781482697, "learning_rate": 0.0003283267041673687, "loss": 0.303, "mean_token_accuracy": 0.8939293213188648, "num_tokens": 32096462.0, "step": 470 }, { "epoch": 0.9524772497472194, "grad_norm": 0.18951061367988586, "learning_rate": 0.0003280128231864066, "loss": 0.3249, "mean_token_accuracy": 0.8879147619009018, "num_tokens": 32157870.0, "step": 471 }, { "epoch": 0.9544994944388271, "grad_norm": 0.1526096761226654, "learning_rate": 0.0003276984281027186, "loss": 0.2505, "mean_token_accuracy": 0.9095052257180214, "num_tokens": 32236445.0, "step": 472 }, { "epoch": 0.9565217391304348, "grad_norm": 0.16995003819465637, "learning_rate": 0.00032738352041271395, "loss": 0.3174, "mean_token_accuracy": 0.8889270462095737, "num_tokens": 32304171.0, "step": 473 }, { "epoch": 0.9585439838220424, "grad_norm": 0.16517885029315948, "learning_rate": 0.0003270681016152414, "loss": 0.3144, "mean_token_accuracy": 0.8923964686691761, "num_tokens": 32372702.0, "step": 474 }, { "epoch": 0.9605662285136501, "grad_norm": 0.18384018540382385, "learning_rate": 0.00032675217321158264, "loss": 0.2903, "mean_token_accuracy": 0.8964046128094196, "num_tokens": 32442132.0, "step": 475 }, { "epoch": 0.9625884732052579, "grad_norm": 0.1601627767086029, "learning_rate": 0.0003264357367054449, "loss": 0.2766, "mean_token_accuracy": 0.9007900506258011, "num_tokens": 32514430.0, "step": 476 }, { "epoch": 0.9646107178968655, "grad_norm": 0.18358251452445984, "learning_rate": 0.00032611879360295345, "loss": 0.2927, "mean_token_accuracy": 0.8977400958538055, "num_tokens": 32579788.0, "step": 477 }, { "epoch": 0.9666329625884732, "grad_norm": 0.2047470211982727, "learning_rate": 0.0003258013454126452, "loss": 0.3131, "mean_token_accuracy": 0.8929316326975822, "num_tokens": 32642283.0, "step": 478 }, { "epoch": 0.9686552072800809, "grad_norm": 0.1662026345729828, "learning_rate": 0.0003254833936454609, "loss": 0.2841, "mean_token_accuracy": 0.8985595107078552, "num_tokens": 32709386.0, "step": 479 }, { "epoch": 0.9706774519716885, "grad_norm": 0.1934393048286438, "learning_rate": 0.00032516493981473826, "loss": 0.2869, "mean_token_accuracy": 0.8976165167987347, "num_tokens": 32778573.0, "step": 480 }, { "epoch": 0.9726996966632963, "grad_norm": 0.1651667058467865, "learning_rate": 0.0003248459854362044, "loss": 0.2993, "mean_token_accuracy": 0.893569964915514, "num_tokens": 32853785.0, "step": 481 }, { "epoch": 0.974721941354904, "grad_norm": 0.18779976665973663, "learning_rate": 0.00032452653202796915, "loss": 0.3223, "mean_token_accuracy": 0.8855483829975128, "num_tokens": 32917542.0, "step": 482 }, { "epoch": 0.9767441860465116, "grad_norm": 0.14583131670951843, "learning_rate": 0.00032420658111051746, "loss": 0.2772, "mean_token_accuracy": 0.8998262621462345, "num_tokens": 32987391.0, "step": 483 }, { "epoch": 0.9787664307381193, "grad_norm": 0.23910751938819885, "learning_rate": 0.00032388613420670213, "loss": 0.3257, "mean_token_accuracy": 0.8845948688685894, "num_tokens": 33053804.0, "step": 484 }, { "epoch": 0.980788675429727, "grad_norm": 0.1679566651582718, "learning_rate": 0.00032356519284173666, "loss": 0.2988, "mean_token_accuracy": 0.8954810760915279, "num_tokens": 33123281.0, "step": 485 }, { "epoch": 0.9828109201213346, "grad_norm": 0.17945775389671326, "learning_rate": 0.0003232437585431883, "loss": 0.3127, "mean_token_accuracy": 0.8931021988391876, "num_tokens": 33188358.0, "step": 486 }, { "epoch": 0.9848331648129424, "grad_norm": 0.18727077543735504, "learning_rate": 0.00032292183284097023, "loss": 0.3259, "mean_token_accuracy": 0.8901765421032906, "num_tokens": 33251289.0, "step": 487 }, { "epoch": 0.9868554095045501, "grad_norm": 0.1629391312599182, "learning_rate": 0.0003225994172673346, "loss": 0.3004, "mean_token_accuracy": 0.8926238007843494, "num_tokens": 33322968.0, "step": 488 }, { "epoch": 0.9888776541961577, "grad_norm": 0.1630707085132599, "learning_rate": 0.00032227651335686513, "loss": 0.2809, "mean_token_accuracy": 0.9002612978219986, "num_tokens": 33393350.0, "step": 489 }, { "epoch": 0.9908998988877654, "grad_norm": 0.17929117381572723, "learning_rate": 0.0003219531226464699, "loss": 0.3214, "mean_token_accuracy": 0.8894147910177708, "num_tokens": 33458431.0, "step": 490 }, { "epoch": 0.9929221435793731, "grad_norm": 0.1639278680086136, "learning_rate": 0.00032162924667537406, "loss": 0.2891, "mean_token_accuracy": 0.8945626839995384, "num_tokens": 33526451.0, "step": 491 }, { "epoch": 0.9949443882709808, "grad_norm": 0.1808111071586609, "learning_rate": 0.0003213048869851124, "loss": 0.2965, "mean_token_accuracy": 0.8966854028403759, "num_tokens": 33589564.0, "step": 492 }, { "epoch": 0.9969666329625885, "grad_norm": 0.1905975043773651, "learning_rate": 0.00032098004511952184, "loss": 0.3017, "mean_token_accuracy": 0.8935710862278938, "num_tokens": 33649359.0, "step": 493 }, { "epoch": 0.9989888776541962, "grad_norm": 0.17898094654083252, "learning_rate": 0.00032065472262473443, "loss": 0.3193, "mean_token_accuracy": 0.8906168565154076, "num_tokens": 33721593.0, "step": 494 }, { "epoch": 1.0, "grad_norm": 0.22628654539585114, "learning_rate": 0.00032032892104917, "loss": 0.3083, "mean_token_accuracy": 0.8914947211742401, "num_tokens": 33755641.0, "step": 495 }, { "epoch": 1.0020222446916076, "grad_norm": 0.13782188296318054, "learning_rate": 0.00032000264194352845, "loss": 0.2663, "mean_token_accuracy": 0.8996973298490047, "num_tokens": 33834819.0, "step": 496 }, { "epoch": 1.0040444893832154, "grad_norm": 0.17569021880626678, "learning_rate": 0.0003196758868607825, "loss": 0.2952, "mean_token_accuracy": 0.8985786736011505, "num_tokens": 33902435.0, "step": 497 }, { "epoch": 1.006066734074823, "grad_norm": 0.2067909836769104, "learning_rate": 0.0003193486573561705, "loss": 0.3225, "mean_token_accuracy": 0.8876040019094944, "num_tokens": 33965666.0, "step": 498 }, { "epoch": 1.0080889787664307, "grad_norm": 0.16878552734851837, "learning_rate": 0.0003190209549871888, "loss": 0.2942, "mean_token_accuracy": 0.8955768346786499, "num_tokens": 34032445.0, "step": 499 }, { "epoch": 1.0101112234580385, "grad_norm": 0.15274177491664886, "learning_rate": 0.00031869278131358455, "loss": 0.2427, "mean_token_accuracy": 0.9117574766278267, "num_tokens": 34114342.0, "step": 500 }, { "epoch": 1.012133468149646, "grad_norm": 0.22229406237602234, "learning_rate": 0.0003183641378973478, "loss": 0.2961, "mean_token_accuracy": 0.8931870721280575, "num_tokens": 34170031.0, "step": 501 }, { "epoch": 1.0141557128412537, "grad_norm": 0.17795279622077942, "learning_rate": 0.0003180350263027049, "loss": 0.2921, "mean_token_accuracy": 0.8974833749234676, "num_tokens": 34232994.0, "step": 502 }, { "epoch": 1.0161779575328616, "grad_norm": 0.1530430167913437, "learning_rate": 0.0003177054480961101, "loss": 0.2587, "mean_token_accuracy": 0.8979953937232494, "num_tokens": 34306018.0, "step": 503 }, { "epoch": 1.0182002022244692, "grad_norm": 0.17740803956985474, "learning_rate": 0.00031737540484623895, "loss": 0.3102, "mean_token_accuracy": 0.8884628489613533, "num_tokens": 34374661.0, "step": 504 }, { "epoch": 1.0202224469160768, "grad_norm": 0.177719384431839, "learning_rate": 0.00031704489812398013, "loss": 0.2953, "mean_token_accuracy": 0.8939866498112679, "num_tokens": 34438514.0, "step": 505 }, { "epoch": 1.0222446916076846, "grad_norm": 0.168897345662117, "learning_rate": 0.00031671392950242836, "loss": 0.269, "mean_token_accuracy": 0.9047276936471462, "num_tokens": 34505982.0, "step": 506 }, { "epoch": 1.0242669362992922, "grad_norm": 0.15597204864025116, "learning_rate": 0.0003163825005568769, "loss": 0.2585, "mean_token_accuracy": 0.9080711491405964, "num_tokens": 34578668.0, "step": 507 }, { "epoch": 1.0262891809908998, "grad_norm": 0.17869000136852264, "learning_rate": 0.00031605061286481013, "loss": 0.3069, "mean_token_accuracy": 0.8951312974095345, "num_tokens": 34649274.0, "step": 508 }, { "epoch": 1.0283114256825077, "grad_norm": 0.15539689362049103, "learning_rate": 0.0003157182680058955, "loss": 0.2495, "mean_token_accuracy": 0.9083127416670322, "num_tokens": 34727319.0, "step": 509 }, { "epoch": 1.0303336703741153, "grad_norm": 0.18144549429416656, "learning_rate": 0.00031538546756197693, "loss": 0.2856, "mean_token_accuracy": 0.9019791670143604, "num_tokens": 34797454.0, "step": 510 }, { "epoch": 1.0323559150657229, "grad_norm": 0.18584753572940826, "learning_rate": 0.0003150522131170663, "loss": 0.2954, "mean_token_accuracy": 0.8972033709287643, "num_tokens": 34864905.0, "step": 511 }, { "epoch": 1.0343781597573307, "grad_norm": 0.19840823113918304, "learning_rate": 0.0003147185062573365, "loss": 0.28, "mean_token_accuracy": 0.901741374284029, "num_tokens": 34928661.0, "step": 512 }, { "epoch": 1.0364004044489383, "grad_norm": 0.14095668494701385, "learning_rate": 0.00031438434857111405, "loss": 0.2666, "mean_token_accuracy": 0.9036082923412323, "num_tokens": 35002573.0, "step": 513 }, { "epoch": 1.038422649140546, "grad_norm": 0.13482429087162018, "learning_rate": 0.0003140497416488708, "loss": 0.2603, "mean_token_accuracy": 0.9059791043400764, "num_tokens": 35083602.0, "step": 514 }, { "epoch": 1.0404448938321538, "grad_norm": 0.20816905796527863, "learning_rate": 0.00031371468708321713, "loss": 0.3049, "mean_token_accuracy": 0.8949435539543629, "num_tokens": 35150470.0, "step": 515 }, { "epoch": 1.0424671385237614, "grad_norm": 0.17933416366577148, "learning_rate": 0.0003133791864688939, "loss": 0.2972, "mean_token_accuracy": 0.8948968909680843, "num_tokens": 35216813.0, "step": 516 }, { "epoch": 1.044489383215369, "grad_norm": 0.17087870836257935, "learning_rate": 0.00031304324140276496, "loss": 0.2891, "mean_token_accuracy": 0.8967925682663918, "num_tokens": 35287089.0, "step": 517 }, { "epoch": 1.0465116279069768, "grad_norm": 0.19874465465545654, "learning_rate": 0.0003127068534838098, "loss": 0.2864, "mean_token_accuracy": 0.8976041786372662, "num_tokens": 35348784.0, "step": 518 }, { "epoch": 1.0485338725985844, "grad_norm": 0.17467646300792694, "learning_rate": 0.0003123700243131155, "loss": 0.2742, "mean_token_accuracy": 0.9038321636617184, "num_tokens": 35430257.0, "step": 519 }, { "epoch": 1.050556117290192, "grad_norm": 0.20859748125076294, "learning_rate": 0.00031203275549386935, "loss": 0.29, "mean_token_accuracy": 0.8973617292940617, "num_tokens": 35492098.0, "step": 520 }, { "epoch": 1.0525783619817999, "grad_norm": 0.1560591757297516, "learning_rate": 0.00031169504863135157, "loss": 0.2593, "mean_token_accuracy": 0.9061496220529079, "num_tokens": 35578894.0, "step": 521 }, { "epoch": 1.0546006066734075, "grad_norm": 0.17322826385498047, "learning_rate": 0.0003113569053329268, "loss": 0.2656, "mean_token_accuracy": 0.9077408090233803, "num_tokens": 35658590.0, "step": 522 }, { "epoch": 1.056622851365015, "grad_norm": 0.16736696660518646, "learning_rate": 0.0003110183272080373, "loss": 0.2647, "mean_token_accuracy": 0.9043499119579792, "num_tokens": 35722339.0, "step": 523 }, { "epoch": 1.058645096056623, "grad_norm": 0.20183323323726654, "learning_rate": 0.00031067931586819473, "loss": 0.2937, "mean_token_accuracy": 0.8954190462827682, "num_tokens": 35782293.0, "step": 524 }, { "epoch": 1.0606673407482305, "grad_norm": 0.16886426508426666, "learning_rate": 0.000310339872926973, "loss": 0.2841, "mean_token_accuracy": 0.9006736651062965, "num_tokens": 35849795.0, "step": 525 }, { "epoch": 1.0626895854398382, "grad_norm": 0.16396957635879517, "learning_rate": 0.00031, "loss": 0.2747, "mean_token_accuracy": 0.9040698818862438, "num_tokens": 35926179.0, "step": 526 }, { "epoch": 1.064711830131446, "grad_norm": 0.17668411135673523, "learning_rate": 0.00030965969870495034, "loss": 0.293, "mean_token_accuracy": 0.8949432447552681, "num_tokens": 35992037.0, "step": 527 }, { "epoch": 1.0667340748230536, "grad_norm": 0.16346760094165802, "learning_rate": 0.0003093189706615375, "loss": 0.2524, "mean_token_accuracy": 0.9064350612461567, "num_tokens": 36060378.0, "step": 528 }, { "epoch": 1.0687563195146612, "grad_norm": 0.17525459825992584, "learning_rate": 0.000308977817491506, "loss": 0.2943, "mean_token_accuracy": 0.8935273364186287, "num_tokens": 36126013.0, "step": 529 }, { "epoch": 1.070778564206269, "grad_norm": 0.16501343250274658, "learning_rate": 0.00030863624081862415, "loss": 0.2789, "mean_token_accuracy": 0.8968185931444168, "num_tokens": 36196795.0, "step": 530 }, { "epoch": 1.0728008088978767, "grad_norm": 0.16026921570301056, "learning_rate": 0.0003082942422686754, "loss": 0.2671, "mean_token_accuracy": 0.9082406982779503, "num_tokens": 36275178.0, "step": 531 }, { "epoch": 1.0748230535894843, "grad_norm": 0.19023281335830688, "learning_rate": 0.0003079518234694519, "loss": 0.3116, "mean_token_accuracy": 0.8914121352136135, "num_tokens": 36338049.0, "step": 532 }, { "epoch": 1.076845298281092, "grad_norm": 0.18959233164787292, "learning_rate": 0.00030760898605074546, "loss": 0.2626, "mean_token_accuracy": 0.9018443673849106, "num_tokens": 36420122.0, "step": 533 }, { "epoch": 1.0788675429726997, "grad_norm": 0.18601641058921814, "learning_rate": 0.00030726573164434074, "loss": 0.2946, "mean_token_accuracy": 0.8955305181443691, "num_tokens": 36486673.0, "step": 534 }, { "epoch": 1.0808897876643073, "grad_norm": 0.17861206829547882, "learning_rate": 0.0003069220618840067, "loss": 0.2638, "mean_token_accuracy": 0.9000630341470242, "num_tokens": 36548189.0, "step": 535 }, { "epoch": 1.0829120323559152, "grad_norm": 0.16839022934436798, "learning_rate": 0.0003065779784054898, "loss": 0.2821, "mean_token_accuracy": 0.901892576366663, "num_tokens": 36619289.0, "step": 536 }, { "epoch": 1.0849342770475228, "grad_norm": 0.16797274351119995, "learning_rate": 0.0003062334828465052, "loss": 0.2722, "mean_token_accuracy": 0.901667632162571, "num_tokens": 36690144.0, "step": 537 }, { "epoch": 1.0869565217391304, "grad_norm": 0.1743130087852478, "learning_rate": 0.00030588857684672955, "loss": 0.2567, "mean_token_accuracy": 0.9072123803198338, "num_tokens": 36761617.0, "step": 538 }, { "epoch": 1.0889787664307382, "grad_norm": 0.1802840232849121, "learning_rate": 0.0003055432620477931, "loss": 0.2822, "mean_token_accuracy": 0.8998791016638279, "num_tokens": 36828873.0, "step": 539 }, { "epoch": 1.0910010111223458, "grad_norm": 0.19156496226787567, "learning_rate": 0.00030519754009327186, "loss": 0.3002, "mean_token_accuracy": 0.8940830379724503, "num_tokens": 36893847.0, "step": 540 }, { "epoch": 1.0930232558139534, "grad_norm": 0.18583235144615173, "learning_rate": 0.0003048514126286796, "loss": 0.2692, "mean_token_accuracy": 0.9024544768035412, "num_tokens": 36963240.0, "step": 541 }, { "epoch": 1.0950455005055613, "grad_norm": 0.17397500574588776, "learning_rate": 0.00030450488130146034, "loss": 0.2691, "mean_token_accuracy": 0.9022202827036381, "num_tokens": 37026381.0, "step": 542 }, { "epoch": 1.0970677451971689, "grad_norm": 0.24742691218852997, "learning_rate": 0.0003041579477609803, "loss": 0.3287, "mean_token_accuracy": 0.8853081800043583, "num_tokens": 37085095.0, "step": 543 }, { "epoch": 1.0990899898887765, "grad_norm": 0.16266337037086487, "learning_rate": 0.00030381061365852006, "loss": 0.2669, "mean_token_accuracy": 0.908314511179924, "num_tokens": 37156057.0, "step": 544 }, { "epoch": 1.1011122345803843, "grad_norm": 0.1805969476699829, "learning_rate": 0.00030346288064726676, "loss": 0.2762, "mean_token_accuracy": 0.9019368290901184, "num_tokens": 37218048.0, "step": 545 }, { "epoch": 1.103134479271992, "grad_norm": 0.2024918794631958, "learning_rate": 0.00030311475038230615, "loss": 0.2948, "mean_token_accuracy": 0.8978271037340164, "num_tokens": 37283475.0, "step": 546 }, { "epoch": 1.1051567239635995, "grad_norm": 0.16442124545574188, "learning_rate": 0.00030276622452061477, "loss": 0.2746, "mean_token_accuracy": 0.9010177366435528, "num_tokens": 37358871.0, "step": 547 }, { "epoch": 1.1071789686552074, "grad_norm": 0.17242524027824402, "learning_rate": 0.0003024173047210522, "loss": 0.2975, "mean_token_accuracy": 0.8940832912921906, "num_tokens": 37421863.0, "step": 548 }, { "epoch": 1.109201213346815, "grad_norm": 0.2123114913702011, "learning_rate": 0.00030206799264435294, "loss": 0.3084, "mean_token_accuracy": 0.8925547078251839, "num_tokens": 37486615.0, "step": 549 }, { "epoch": 1.1112234580384226, "grad_norm": 0.16941364109516144, "learning_rate": 0.00030171828995311845, "loss": 0.2997, "mean_token_accuracy": 0.8960695490241051, "num_tokens": 37556657.0, "step": 550 }, { "epoch": 1.1132457027300304, "grad_norm": 0.18581314384937286, "learning_rate": 0.0003013681983118096, "loss": 0.3056, "mean_token_accuracy": 0.8949491046369076, "num_tokens": 37623124.0, "step": 551 }, { "epoch": 1.115267947421638, "grad_norm": 0.17790380120277405, "learning_rate": 0.0003010177193867383, "loss": 0.2849, "mean_token_accuracy": 0.8990210555493832, "num_tokens": 37688876.0, "step": 552 }, { "epoch": 1.1172901921132457, "grad_norm": 0.17190231382846832, "learning_rate": 0.00030066685484606004, "loss": 0.2805, "mean_token_accuracy": 0.8991851061582565, "num_tokens": 37757188.0, "step": 553 }, { "epoch": 1.1193124368048535, "grad_norm": 0.17098551988601685, "learning_rate": 0.00030031560635976557, "loss": 0.2809, "mean_token_accuracy": 0.8985818810760975, "num_tokens": 37822088.0, "step": 554 }, { "epoch": 1.121334681496461, "grad_norm": 0.16426457464694977, "learning_rate": 0.0002999639755996731, "loss": 0.271, "mean_token_accuracy": 0.9015116766095161, "num_tokens": 37885778.0, "step": 555 }, { "epoch": 1.1233569261880687, "grad_norm": 0.16016022861003876, "learning_rate": 0.00029961196423942027, "loss": 0.2436, "mean_token_accuracy": 0.9075723215937614, "num_tokens": 37956105.0, "step": 556 }, { "epoch": 1.1253791708796763, "grad_norm": 0.17624878883361816, "learning_rate": 0.0002992595739544563, "loss": 0.2851, "mean_token_accuracy": 0.8980127796530724, "num_tokens": 38022057.0, "step": 557 }, { "epoch": 1.1274014155712841, "grad_norm": 0.2018936723470688, "learning_rate": 0.00029890680642203395, "loss": 0.2971, "mean_token_accuracy": 0.8927877955138683, "num_tokens": 38088320.0, "step": 558 }, { "epoch": 1.1294236602628918, "grad_norm": 0.19130869209766388, "learning_rate": 0.0002985536633212016, "loss": 0.2797, "mean_token_accuracy": 0.8997831009328365, "num_tokens": 38149395.0, "step": 559 }, { "epoch": 1.1314459049544996, "grad_norm": 0.19779284298419952, "learning_rate": 0.0002982001463327951, "loss": 0.3127, "mean_token_accuracy": 0.8897297792136669, "num_tokens": 38211779.0, "step": 560 }, { "epoch": 1.1334681496461072, "grad_norm": 0.1628047674894333, "learning_rate": 0.0002978462571394299, "loss": 0.2637, "mean_token_accuracy": 0.9051007218658924, "num_tokens": 38279919.0, "step": 561 }, { "epoch": 1.1354903943377148, "grad_norm": 0.1489226073026657, "learning_rate": 0.00029749199742549315, "loss": 0.2525, "mean_token_accuracy": 0.9131556376814842, "num_tokens": 38348885.0, "step": 562 }, { "epoch": 1.1375126390293224, "grad_norm": 0.16562367975711823, "learning_rate": 0.0002971373688771353, "loss": 0.2804, "mean_token_accuracy": 0.9060126468539238, "num_tokens": 38414361.0, "step": 563 }, { "epoch": 1.1395348837209303, "grad_norm": 0.18426918983459473, "learning_rate": 0.00029678237318226254, "loss": 0.3034, "mean_token_accuracy": 0.8923818841576576, "num_tokens": 38478031.0, "step": 564 }, { "epoch": 1.1415571284125379, "grad_norm": 0.18996812403202057, "learning_rate": 0.0002964270120305284, "loss": 0.3118, "mean_token_accuracy": 0.8920970819890499, "num_tokens": 38537650.0, "step": 565 }, { "epoch": 1.1435793731041457, "grad_norm": 0.1744386851787567, "learning_rate": 0.0002960712871133259, "loss": 0.3105, "mean_token_accuracy": 0.8955930359661579, "num_tokens": 38599799.0, "step": 566 }, { "epoch": 1.1456016177957533, "grad_norm": 0.1756746470928192, "learning_rate": 0.0002957152001237796, "loss": 0.2879, "mean_token_accuracy": 0.8998842090368271, "num_tokens": 38665696.0, "step": 567 }, { "epoch": 1.147623862487361, "grad_norm": 0.17731311917304993, "learning_rate": 0.00029535875275673706, "loss": 0.3028, "mean_token_accuracy": 0.896138958632946, "num_tokens": 38736012.0, "step": 568 }, { "epoch": 1.1496461071789685, "grad_norm": 0.16211020946502686, "learning_rate": 0.00029500194670876155, "loss": 0.2661, "mean_token_accuracy": 0.9007462747395039, "num_tokens": 38813042.0, "step": 569 }, { "epoch": 1.1516683518705764, "grad_norm": 0.16605907678604126, "learning_rate": 0.00029464478367812304, "loss": 0.2708, "mean_token_accuracy": 0.9033683091402054, "num_tokens": 38884323.0, "step": 570 }, { "epoch": 1.153690596562184, "grad_norm": 0.16346529126167297, "learning_rate": 0.0002942872653647911, "loss": 0.2787, "mean_token_accuracy": 0.8993464335799217, "num_tokens": 38954581.0, "step": 571 }, { "epoch": 1.1557128412537918, "grad_norm": 0.1715569943189621, "learning_rate": 0.0002939293934704259, "loss": 0.2876, "mean_token_accuracy": 0.899021927267313, "num_tokens": 39024859.0, "step": 572 }, { "epoch": 1.1577350859453994, "grad_norm": 0.1708040088415146, "learning_rate": 0.00029357116969837093, "loss": 0.2716, "mean_token_accuracy": 0.9040286540985107, "num_tokens": 39084032.0, "step": 573 }, { "epoch": 1.159757330637007, "grad_norm": 0.15547077357769012, "learning_rate": 0.00029321259575364406, "loss": 0.2876, "mean_token_accuracy": 0.9014556109905243, "num_tokens": 39158216.0, "step": 574 }, { "epoch": 1.1617795753286146, "grad_norm": 0.1835734099149704, "learning_rate": 0.0002928536733429302, "loss": 0.2904, "mean_token_accuracy": 0.8962517976760864, "num_tokens": 39219228.0, "step": 575 }, { "epoch": 1.1638018200202225, "grad_norm": 0.21164695918560028, "learning_rate": 0.00029249440417457274, "loss": 0.3095, "mean_token_accuracy": 0.8903193324804306, "num_tokens": 39279145.0, "step": 576 }, { "epoch": 1.16582406471183, "grad_norm": 0.16395002603530884, "learning_rate": 0.00029213478995856535, "loss": 0.2658, "mean_token_accuracy": 0.9063084498047829, "num_tokens": 39346035.0, "step": 577 }, { "epoch": 1.167846309403438, "grad_norm": 0.15447662770748138, "learning_rate": 0.0002917748324065443, "loss": 0.2609, "mean_token_accuracy": 0.9043813906610012, "num_tokens": 39419464.0, "step": 578 }, { "epoch": 1.1698685540950455, "grad_norm": 0.18628905713558197, "learning_rate": 0.0002914145332317798, "loss": 0.3079, "mean_token_accuracy": 0.892396155744791, "num_tokens": 39476986.0, "step": 579 }, { "epoch": 1.1718907987866531, "grad_norm": 0.15657448768615723, "learning_rate": 0.0002910538941491681, "loss": 0.2596, "mean_token_accuracy": 0.9103246405720711, "num_tokens": 39547007.0, "step": 580 }, { "epoch": 1.1739130434782608, "grad_norm": 0.16723878681659698, "learning_rate": 0.00029069291687522337, "loss": 0.2578, "mean_token_accuracy": 0.9113052189350128, "num_tokens": 39615140.0, "step": 581 }, { "epoch": 1.1759352881698686, "grad_norm": 0.21382521092891693, "learning_rate": 0.00029033160312806925, "loss": 0.2843, "mean_token_accuracy": 0.9006746262311935, "num_tokens": 39676629.0, "step": 582 }, { "epoch": 1.1779575328614762, "grad_norm": 0.17140787839889526, "learning_rate": 0.0002899699546274312, "loss": 0.2973, "mean_token_accuracy": 0.8942140191793442, "num_tokens": 39744182.0, "step": 583 }, { "epoch": 1.179979777553084, "grad_norm": 0.16415606439113617, "learning_rate": 0.0002896079730946277, "loss": 0.248, "mean_token_accuracy": 0.9046668969094753, "num_tokens": 39809087.0, "step": 584 }, { "epoch": 1.1820020222446916, "grad_norm": 0.15275758504867554, "learning_rate": 0.0002892456602525625, "loss": 0.2528, "mean_token_accuracy": 0.9055165685713291, "num_tokens": 39883376.0, "step": 585 }, { "epoch": 1.1840242669362993, "grad_norm": 0.1598130762577057, "learning_rate": 0.00028888301782571614, "loss": 0.2571, "mean_token_accuracy": 0.9055753275752068, "num_tokens": 39950688.0, "step": 586 }, { "epoch": 1.1860465116279069, "grad_norm": 0.16630232334136963, "learning_rate": 0.000288520047540138, "loss": 0.2857, "mean_token_accuracy": 0.9000633843243122, "num_tokens": 40015260.0, "step": 587 }, { "epoch": 1.1880687563195147, "grad_norm": 0.19941283762454987, "learning_rate": 0.00028815675112343794, "loss": 0.2954, "mean_token_accuracy": 0.8945838250219822, "num_tokens": 40079394.0, "step": 588 }, { "epoch": 1.1900910010111223, "grad_norm": 0.19106529653072357, "learning_rate": 0.00028779313030477793, "loss": 0.3112, "mean_token_accuracy": 0.8897448740899563, "num_tokens": 40144909.0, "step": 589 }, { "epoch": 1.1921132457027301, "grad_norm": 0.17041806876659393, "learning_rate": 0.0002874291868148642, "loss": 0.2819, "mean_token_accuracy": 0.8990175537765026, "num_tokens": 40217254.0, "step": 590 }, { "epoch": 1.1941354903943378, "grad_norm": 0.16470171511173248, "learning_rate": 0.0002870649223859386, "loss": 0.2773, "mean_token_accuracy": 0.9041831828653812, "num_tokens": 40280417.0, "step": 591 }, { "epoch": 1.1961577350859454, "grad_norm": 0.1665530502796173, "learning_rate": 0.00028670033875177053, "loss": 0.2663, "mean_token_accuracy": 0.9013455249369144, "num_tokens": 40350231.0, "step": 592 }, { "epoch": 1.198179979777553, "grad_norm": 0.19251202046871185, "learning_rate": 0.00028633543764764894, "loss": 0.3157, "mean_token_accuracy": 0.8875606693327427, "num_tokens": 40413686.0, "step": 593 }, { "epoch": 1.2002022244691608, "grad_norm": 0.17525707185268402, "learning_rate": 0.00028597022081037354, "loss": 0.2933, "mean_token_accuracy": 0.8971122018992901, "num_tokens": 40479649.0, "step": 594 }, { "epoch": 1.2022244691607684, "grad_norm": 0.19120153784751892, "learning_rate": 0.000285604689978247, "loss": 0.275, "mean_token_accuracy": 0.8998171053826809, "num_tokens": 40548513.0, "step": 595 }, { "epoch": 1.2042467138523762, "grad_norm": 0.15362586081027985, "learning_rate": 0.0002852388468910663, "loss": 0.2655, "mean_token_accuracy": 0.9043829254806042, "num_tokens": 40621501.0, "step": 596 }, { "epoch": 1.2062689585439839, "grad_norm": 0.1648460179567337, "learning_rate": 0.00028487269329011497, "loss": 0.2765, "mean_token_accuracy": 0.9020786061882973, "num_tokens": 40696483.0, "step": 597 }, { "epoch": 1.2082912032355915, "grad_norm": 0.1793263554573059, "learning_rate": 0.000284506230918154, "loss": 0.2914, "mean_token_accuracy": 0.8994336612522602, "num_tokens": 40765538.0, "step": 598 }, { "epoch": 1.210313447927199, "grad_norm": 0.17354300618171692, "learning_rate": 0.00028413946151941463, "loss": 0.2929, "mean_token_accuracy": 0.9005281217396259, "num_tokens": 40833551.0, "step": 599 }, { "epoch": 1.212335692618807, "grad_norm": 0.1781807243824005, "learning_rate": 0.00028377238683958885, "loss": 0.2849, "mean_token_accuracy": 0.8987740390002728, "num_tokens": 40895246.0, "step": 600 }, { "epoch": 1.2143579373104145, "grad_norm": 0.16701123118400574, "learning_rate": 0.0002834050086258221, "loss": 0.2607, "mean_token_accuracy": 0.9041876047849655, "num_tokens": 40964580.0, "step": 601 }, { "epoch": 1.2163801820020224, "grad_norm": 0.15654708445072174, "learning_rate": 0.00028303732862670417, "loss": 0.2702, "mean_token_accuracy": 0.9014758616685867, "num_tokens": 41039130.0, "step": 602 }, { "epoch": 1.21840242669363, "grad_norm": 0.18177339434623718, "learning_rate": 0.0002826693485922616, "loss": 0.2701, "mean_token_accuracy": 0.9032718986272812, "num_tokens": 41095473.0, "step": 603 }, { "epoch": 1.2204246713852376, "grad_norm": 0.16560594737529755, "learning_rate": 0.00028230107027394876, "loss": 0.2939, "mean_token_accuracy": 0.8934713453054428, "num_tokens": 41157491.0, "step": 604 }, { "epoch": 1.2224469160768452, "grad_norm": 0.18375754356384277, "learning_rate": 0.00028193249542463977, "loss": 0.2909, "mean_token_accuracy": 0.8953644298017025, "num_tokens": 41225218.0, "step": 605 }, { "epoch": 1.224469160768453, "grad_norm": 0.14936794340610504, "learning_rate": 0.0002815636257986204, "loss": 0.2539, "mean_token_accuracy": 0.9058601558208466, "num_tokens": 41307770.0, "step": 606 }, { "epoch": 1.2264914054600606, "grad_norm": 0.16326607763767242, "learning_rate": 0.00028119446315157896, "loss": 0.2507, "mean_token_accuracy": 0.9078186601400375, "num_tokens": 41371178.0, "step": 607 }, { "epoch": 1.2285136501516685, "grad_norm": 0.16785994172096252, "learning_rate": 0.0002808250092405989, "loss": 0.2589, "mean_token_accuracy": 0.9010850116610527, "num_tokens": 41444090.0, "step": 608 }, { "epoch": 1.230535894843276, "grad_norm": 0.17225563526153564, "learning_rate": 0.0002804552658241496, "loss": 0.2667, "mean_token_accuracy": 0.9027063623070717, "num_tokens": 41512243.0, "step": 609 }, { "epoch": 1.2325581395348837, "grad_norm": 0.16818945109844208, "learning_rate": 0.0002800852346620788, "loss": 0.2704, "mean_token_accuracy": 0.9012492336332798, "num_tokens": 41582048.0, "step": 610 }, { "epoch": 1.2345803842264913, "grad_norm": 0.1885753571987152, "learning_rate": 0.00027971491751560345, "loss": 0.2859, "mean_token_accuracy": 0.8967389948666096, "num_tokens": 41646351.0, "step": 611 }, { "epoch": 1.2366026289180991, "grad_norm": 0.15571804344654083, "learning_rate": 0.0002793443161473017, "loss": 0.2707, "mean_token_accuracy": 0.9040926285088062, "num_tokens": 41715042.0, "step": 612 }, { "epoch": 1.2386248736097067, "grad_norm": 0.1665385216474533, "learning_rate": 0.0002789734323211048, "loss": 0.2633, "mean_token_accuracy": 0.9024609327316284, "num_tokens": 41787021.0, "step": 613 }, { "epoch": 1.2406471183013146, "grad_norm": 0.17233288288116455, "learning_rate": 0.0002786022678022882, "loss": 0.3058, "mean_token_accuracy": 0.8898206166923046, "num_tokens": 41851767.0, "step": 614 }, { "epoch": 1.2426693629929222, "grad_norm": 0.1737981140613556, "learning_rate": 0.0002782308243574633, "loss": 0.2933, "mean_token_accuracy": 0.8971287794411182, "num_tokens": 41914797.0, "step": 615 }, { "epoch": 1.2446916076845298, "grad_norm": 0.16172519326210022, "learning_rate": 0.0002778591037545691, "loss": 0.2665, "mean_token_accuracy": 0.9057141467928886, "num_tokens": 41986868.0, "step": 616 }, { "epoch": 1.2467138523761374, "grad_norm": 0.15280866622924805, "learning_rate": 0.0002774871077628639, "loss": 0.2688, "mean_token_accuracy": 0.9038811773061752, "num_tokens": 42062995.0, "step": 617 }, { "epoch": 1.2487360970677452, "grad_norm": 0.17397160828113556, "learning_rate": 0.0002771148381529166, "loss": 0.2863, "mean_token_accuracy": 0.8941488154232502, "num_tokens": 42124939.0, "step": 618 }, { "epoch": 1.2507583417593529, "grad_norm": 0.1617380529642105, "learning_rate": 0.00027674229669659856, "loss": 0.2536, "mean_token_accuracy": 0.9045982100069523, "num_tokens": 42194011.0, "step": 619 }, { "epoch": 1.2527805864509607, "grad_norm": 0.15885986387729645, "learning_rate": 0.0002763694851670749, "loss": 0.2703, "mean_token_accuracy": 0.9061401709914207, "num_tokens": 42265919.0, "step": 620 }, { "epoch": 1.2548028311425683, "grad_norm": 0.16419966518878937, "learning_rate": 0.00027599640533879636, "loss": 0.2769, "mean_token_accuracy": 0.9034353755414486, "num_tokens": 42334638.0, "step": 621 }, { "epoch": 1.256825075834176, "grad_norm": 0.16629813611507416, "learning_rate": 0.0002756230589874905, "loss": 0.2687, "mean_token_accuracy": 0.9030461423099041, "num_tokens": 42404575.0, "step": 622 }, { "epoch": 1.2588473205257835, "grad_norm": 0.17728988826274872, "learning_rate": 0.00027524944789015366, "loss": 0.2751, "mean_token_accuracy": 0.9014569260179996, "num_tokens": 42475814.0, "step": 623 }, { "epoch": 1.2608695652173914, "grad_norm": 0.17427091300487518, "learning_rate": 0.00027487557382504195, "loss": 0.2657, "mean_token_accuracy": 0.9044037610292435, "num_tokens": 42543660.0, "step": 624 }, { "epoch": 1.262891809908999, "grad_norm": 0.1894424855709076, "learning_rate": 0.00027450143857166344, "loss": 0.2969, "mean_token_accuracy": 0.8965917490422726, "num_tokens": 42607124.0, "step": 625 }, { "epoch": 1.2649140546006068, "grad_norm": 0.15993963181972504, "learning_rate": 0.00027412704391076916, "loss": 0.2782, "mean_token_accuracy": 0.9031428508460522, "num_tokens": 42676066.0, "step": 626 }, { "epoch": 1.2669362992922144, "grad_norm": 0.17840322852134705, "learning_rate": 0.00027375239162434503, "loss": 0.2688, "mean_token_accuracy": 0.9015723317861557, "num_tokens": 42746212.0, "step": 627 }, { "epoch": 1.268958543983822, "grad_norm": 0.20184557139873505, "learning_rate": 0.00027337748349560276, "loss": 0.2963, "mean_token_accuracy": 0.8969193771481514, "num_tokens": 42803557.0, "step": 628 }, { "epoch": 1.2709807886754296, "grad_norm": 0.16635443270206451, "learning_rate": 0.0002730023213089724, "loss": 0.2884, "mean_token_accuracy": 0.8960177823901176, "num_tokens": 42866158.0, "step": 629 }, { "epoch": 1.2730030333670375, "grad_norm": 0.19960255920886993, "learning_rate": 0.0002726269068500926, "loss": 0.2841, "mean_token_accuracy": 0.8968143723905087, "num_tokens": 42927025.0, "step": 630 }, { "epoch": 1.275025278058645, "grad_norm": 0.1719711273908615, "learning_rate": 0.0002722512419058032, "loss": 0.2728, "mean_token_accuracy": 0.9018568396568298, "num_tokens": 43007744.0, "step": 631 }, { "epoch": 1.277047522750253, "grad_norm": 0.17668215930461884, "learning_rate": 0.00027187532826413607, "loss": 0.2683, "mean_token_accuracy": 0.9023380614817142, "num_tokens": 43071417.0, "step": 632 }, { "epoch": 1.2790697674418605, "grad_norm": 0.17645464837551117, "learning_rate": 0.00027149916771430677, "loss": 0.2787, "mean_token_accuracy": 0.9030827060341835, "num_tokens": 43143504.0, "step": 633 }, { "epoch": 1.2810920121334681, "grad_norm": 0.18298184871673584, "learning_rate": 0.00027112276204670617, "loss": 0.2886, "mean_token_accuracy": 0.8980408012866974, "num_tokens": 43219433.0, "step": 634 }, { "epoch": 1.2831142568250757, "grad_norm": 0.15996871888637543, "learning_rate": 0.00027074611305289147, "loss": 0.2622, "mean_token_accuracy": 0.902827687561512, "num_tokens": 43286472.0, "step": 635 }, { "epoch": 1.2851365015166836, "grad_norm": 0.1937190294265747, "learning_rate": 0.00027036922252557865, "loss": 0.2937, "mean_token_accuracy": 0.897728331387043, "num_tokens": 43346390.0, "step": 636 }, { "epoch": 1.2871587462082912, "grad_norm": 0.17584164440631866, "learning_rate": 0.00026999209225863263, "loss": 0.2896, "mean_token_accuracy": 0.897246178239584, "num_tokens": 43413853.0, "step": 637 }, { "epoch": 1.289180990899899, "grad_norm": 0.17733249068260193, "learning_rate": 0.0002696147240470598, "loss": 0.2882, "mean_token_accuracy": 0.8957457803189754, "num_tokens": 43478722.0, "step": 638 }, { "epoch": 1.2912032355915066, "grad_norm": 0.17890246212482452, "learning_rate": 0.0002692371196869992, "loss": 0.288, "mean_token_accuracy": 0.8960468098521233, "num_tokens": 43540378.0, "step": 639 }, { "epoch": 1.2932254802831142, "grad_norm": 0.15859632194042206, "learning_rate": 0.0002688592809757134, "loss": 0.2792, "mean_token_accuracy": 0.9036918766796589, "num_tokens": 43612284.0, "step": 640 }, { "epoch": 1.2952477249747218, "grad_norm": 0.16566091775894165, "learning_rate": 0.0002684812097115808, "loss": 0.2785, "mean_token_accuracy": 0.9012075029313564, "num_tokens": 43677352.0, "step": 641 }, { "epoch": 1.2972699696663297, "grad_norm": 0.17786841094493866, "learning_rate": 0.0002681029076940862, "loss": 0.2911, "mean_token_accuracy": 0.9009424708783627, "num_tokens": 43739163.0, "step": 642 }, { "epoch": 1.2992922143579373, "grad_norm": 0.15567278861999512, "learning_rate": 0.0002677243767238135, "loss": 0.2591, "mean_token_accuracy": 0.9091448336839676, "num_tokens": 43819970.0, "step": 643 }, { "epoch": 1.3013144590495451, "grad_norm": 0.20501317083835602, "learning_rate": 0.00026734561860243544, "loss": 0.3186, "mean_token_accuracy": 0.8898426368832588, "num_tokens": 43879943.0, "step": 644 }, { "epoch": 1.3033367037411527, "grad_norm": 0.18259315192699432, "learning_rate": 0.0002669666351327066, "loss": 0.2772, "mean_token_accuracy": 0.8982793055474758, "num_tokens": 43941000.0, "step": 645 }, { "epoch": 1.3053589484327603, "grad_norm": 0.18504492938518524, "learning_rate": 0.00026658742811845376, "loss": 0.2905, "mean_token_accuracy": 0.896319292485714, "num_tokens": 44000567.0, "step": 646 }, { "epoch": 1.307381193124368, "grad_norm": 0.17783911526203156, "learning_rate": 0.00026620799936456774, "loss": 0.2813, "mean_token_accuracy": 0.9009971134364605, "num_tokens": 44071352.0, "step": 647 }, { "epoch": 1.3094034378159758, "grad_norm": 0.21716438233852386, "learning_rate": 0.00026582835067699495, "loss": 0.2906, "mean_token_accuracy": 0.8958504274487495, "num_tokens": 44129790.0, "step": 648 }, { "epoch": 1.3114256825075834, "grad_norm": 0.1822315752506256, "learning_rate": 0.0002654484838627284, "loss": 0.2867, "mean_token_accuracy": 0.9037492237985134, "num_tokens": 44195417.0, "step": 649 }, { "epoch": 1.3134479271991912, "grad_norm": 0.15820986032485962, "learning_rate": 0.00026506840072979947, "loss": 0.2546, "mean_token_accuracy": 0.9098224155604839, "num_tokens": 44273153.0, "step": 650 }, { "epoch": 1.3154701718907988, "grad_norm": 0.1899651139974594, "learning_rate": 0.00026468810308726893, "loss": 0.28, "mean_token_accuracy": 0.8995106518268585, "num_tokens": 44349738.0, "step": 651 }, { "epoch": 1.3174924165824065, "grad_norm": 0.18798086047172546, "learning_rate": 0.00026430759274521877, "loss": 0.2964, "mean_token_accuracy": 0.8899718299508095, "num_tokens": 44415133.0, "step": 652 }, { "epoch": 1.319514661274014, "grad_norm": 0.13753436505794525, "learning_rate": 0.0002639268715147432, "loss": 0.2307, "mean_token_accuracy": 0.9101770743727684, "num_tokens": 44484697.0, "step": 653 }, { "epoch": 1.321536905965622, "grad_norm": 0.20119944214820862, "learning_rate": 0.00026354594120794016, "loss": 0.2926, "mean_token_accuracy": 0.897066742181778, "num_tokens": 44551987.0, "step": 654 }, { "epoch": 1.3235591506572295, "grad_norm": 0.18725383281707764, "learning_rate": 0.000263164803637903, "loss": 0.2742, "mean_token_accuracy": 0.9033515900373459, "num_tokens": 44617511.0, "step": 655 }, { "epoch": 1.3255813953488373, "grad_norm": 0.15222612023353577, "learning_rate": 0.0002627834606187112, "loss": 0.2518, "mean_token_accuracy": 0.9108999036252499, "num_tokens": 44698150.0, "step": 656 }, { "epoch": 1.327603640040445, "grad_norm": 0.16968220472335815, "learning_rate": 0.0002624019139654223, "loss": 0.2834, "mean_token_accuracy": 0.9003202244639397, "num_tokens": 44769993.0, "step": 657 }, { "epoch": 1.3296258847320526, "grad_norm": 0.1526424139738083, "learning_rate": 0.000262020165494063, "loss": 0.2493, "mean_token_accuracy": 0.9069892205297947, "num_tokens": 44848710.0, "step": 658 }, { "epoch": 1.3316481294236602, "grad_norm": 0.16174714267253876, "learning_rate": 0.00026163821702162074, "loss": 0.2581, "mean_token_accuracy": 0.9058538265526295, "num_tokens": 44932916.0, "step": 659 }, { "epoch": 1.333670374115268, "grad_norm": 0.18540237843990326, "learning_rate": 0.0002612560703660346, "loss": 0.2823, "mean_token_accuracy": 0.9005630798637867, "num_tokens": 44997865.0, "step": 660 }, { "epoch": 1.3356926188068756, "grad_norm": 0.145268976688385, "learning_rate": 0.0002608737273461872, "loss": 0.2402, "mean_token_accuracy": 0.9093809016048908, "num_tokens": 45074165.0, "step": 661 }, { "epoch": 1.3377148634984835, "grad_norm": 0.16983529925346375, "learning_rate": 0.0002604911897818957, "loss": 0.2763, "mean_token_accuracy": 0.9002145752310753, "num_tokens": 45140578.0, "step": 662 }, { "epoch": 1.339737108190091, "grad_norm": 0.18206650018692017, "learning_rate": 0.00026010845949390326, "loss": 0.271, "mean_token_accuracy": 0.9040128998458385, "num_tokens": 45206573.0, "step": 663 }, { "epoch": 1.3417593528816987, "grad_norm": 0.17423690855503082, "learning_rate": 0.00025972553830387027, "loss": 0.276, "mean_token_accuracy": 0.9035660028457642, "num_tokens": 45273772.0, "step": 664 }, { "epoch": 1.3437815975733063, "grad_norm": 0.17948757112026215, "learning_rate": 0.0002593424280343656, "loss": 0.3073, "mean_token_accuracy": 0.8898307755589485, "num_tokens": 45333260.0, "step": 665 }, { "epoch": 1.3458038422649141, "grad_norm": 0.1973046064376831, "learning_rate": 0.0002589591305088585, "loss": 0.298, "mean_token_accuracy": 0.8946604765951633, "num_tokens": 45397184.0, "step": 666 }, { "epoch": 1.3478260869565217, "grad_norm": 0.16013695299625397, "learning_rate": 0.0002585756475517092, "loss": 0.2698, "mean_token_accuracy": 0.905727930366993, "num_tokens": 45478638.0, "step": 667 }, { "epoch": 1.3498483316481296, "grad_norm": 0.1567625254392624, "learning_rate": 0.00025819198098816034, "loss": 0.2765, "mean_token_accuracy": 0.9000396579504013, "num_tokens": 45548715.0, "step": 668 }, { "epoch": 1.3518705763397372, "grad_norm": 0.16354252398014069, "learning_rate": 0.00025780813264432884, "loss": 0.2659, "mean_token_accuracy": 0.9028089232742786, "num_tokens": 45624018.0, "step": 669 }, { "epoch": 1.3538928210313448, "grad_norm": 0.19890683889389038, "learning_rate": 0.0002574241043471967, "loss": 0.3082, "mean_token_accuracy": 0.89163389056921, "num_tokens": 45692190.0, "step": 670 }, { "epoch": 1.3559150657229524, "grad_norm": 0.1480788290500641, "learning_rate": 0.0002570398979246023, "loss": 0.2605, "mean_token_accuracy": 0.905091181397438, "num_tokens": 45771127.0, "step": 671 }, { "epoch": 1.3579373104145602, "grad_norm": 0.17679338157176971, "learning_rate": 0.00025665551520523194, "loss": 0.2831, "mean_token_accuracy": 0.8965117931365967, "num_tokens": 45835910.0, "step": 672 }, { "epoch": 1.3599595551061678, "grad_norm": 0.17713719606399536, "learning_rate": 0.00025627095801861107, "loss": 0.2905, "mean_token_accuracy": 0.8971158005297184, "num_tokens": 45901225.0, "step": 673 }, { "epoch": 1.3619817997977754, "grad_norm": 0.17695656418800354, "learning_rate": 0.0002558862281950955, "loss": 0.3268, "mean_token_accuracy": 0.8890945613384247, "num_tokens": 45972893.0, "step": 674 }, { "epoch": 1.3640040444893833, "grad_norm": 0.15022985637187958, "learning_rate": 0.0002555013275658627, "loss": 0.28, "mean_token_accuracy": 0.9022598974406719, "num_tokens": 46053862.0, "step": 675 }, { "epoch": 1.366026289180991, "grad_norm": 0.16728746891021729, "learning_rate": 0.0002551162579629031, "loss": 0.2735, "mean_token_accuracy": 0.9004092961549759, "num_tokens": 46123535.0, "step": 676 }, { "epoch": 1.3680485338725985, "grad_norm": 0.17287185788154602, "learning_rate": 0.0002547310212190115, "loss": 0.2803, "mean_token_accuracy": 0.8980144336819649, "num_tokens": 46193498.0, "step": 677 }, { "epoch": 1.3700707785642063, "grad_norm": 0.184726744890213, "learning_rate": 0.0002543456191677781, "loss": 0.2927, "mean_token_accuracy": 0.8962498530745506, "num_tokens": 46261698.0, "step": 678 }, { "epoch": 1.372093023255814, "grad_norm": 0.15757699310779572, "learning_rate": 0.00025396005364357994, "loss": 0.2809, "mean_token_accuracy": 0.8978969343006611, "num_tokens": 46329372.0, "step": 679 }, { "epoch": 1.3741152679474216, "grad_norm": 0.18496832251548767, "learning_rate": 0.0002535743264815723, "loss": 0.2948, "mean_token_accuracy": 0.8964893855154514, "num_tokens": 46389989.0, "step": 680 }, { "epoch": 1.3761375126390294, "grad_norm": 0.19771555066108704, "learning_rate": 0.0002531884395176794, "loss": 0.3045, "mean_token_accuracy": 0.8947297558188438, "num_tokens": 46451529.0, "step": 681 }, { "epoch": 1.378159757330637, "grad_norm": 0.1643752008676529, "learning_rate": 0.0002528023945885866, "loss": 0.2691, "mean_token_accuracy": 0.9002487845718861, "num_tokens": 46518234.0, "step": 682 }, { "epoch": 1.3801820020222446, "grad_norm": 0.15709805488586426, "learning_rate": 0.00025241619353173056, "loss": 0.2517, "mean_token_accuracy": 0.9091945327818394, "num_tokens": 46590312.0, "step": 683 }, { "epoch": 1.3822042467138524, "grad_norm": 0.17834722995758057, "learning_rate": 0.00025202983818529154, "loss": 0.294, "mean_token_accuracy": 0.8986290767788887, "num_tokens": 46658404.0, "step": 684 }, { "epoch": 1.38422649140546, "grad_norm": 0.15814678370952606, "learning_rate": 0.00025164333038818384, "loss": 0.2708, "mean_token_accuracy": 0.9031675830483437, "num_tokens": 46724887.0, "step": 685 }, { "epoch": 1.3862487360970677, "grad_norm": 0.17998504638671875, "learning_rate": 0.0002512566719800475, "loss": 0.2856, "mean_token_accuracy": 0.89876314625144, "num_tokens": 46795038.0, "step": 686 }, { "epoch": 1.3882709807886755, "grad_norm": 0.17202328145503998, "learning_rate": 0.0002508698648012394, "loss": 0.2965, "mean_token_accuracy": 0.8947253711521626, "num_tokens": 46856174.0, "step": 687 }, { "epoch": 1.3902932254802831, "grad_norm": 0.16402584314346313, "learning_rate": 0.00025048291069282443, "loss": 0.2633, "mean_token_accuracy": 0.9063729159533978, "num_tokens": 46925752.0, "step": 688 }, { "epoch": 1.3923154701718907, "grad_norm": 0.19435186684131622, "learning_rate": 0.00025009581149656703, "loss": 0.2756, "mean_token_accuracy": 0.9030190780758858, "num_tokens": 46993260.0, "step": 689 }, { "epoch": 1.3943377148634986, "grad_norm": 0.18806155025959015, "learning_rate": 0.000249708569054922, "loss": 0.3033, "mean_token_accuracy": 0.896921843290329, "num_tokens": 47060294.0, "step": 690 }, { "epoch": 1.3963599595551062, "grad_norm": 0.19206839799880981, "learning_rate": 0.000249321185211026, "loss": 0.282, "mean_token_accuracy": 0.8990140780806541, "num_tokens": 47123248.0, "step": 691 }, { "epoch": 1.3983822042467138, "grad_norm": 0.16943977773189545, "learning_rate": 0.00024893366180868875, "loss": 0.2728, "mean_token_accuracy": 0.9020564220845699, "num_tokens": 47185179.0, "step": 692 }, { "epoch": 1.4004044489383216, "grad_norm": 0.1619652956724167, "learning_rate": 0.00024854600069238407, "loss": 0.2728, "mean_token_accuracy": 0.9024368785321712, "num_tokens": 47259239.0, "step": 693 }, { "epoch": 1.4024266936299292, "grad_norm": 0.17677046358585358, "learning_rate": 0.00024815820370724156, "loss": 0.2697, "mean_token_accuracy": 0.90378213301301, "num_tokens": 47322333.0, "step": 694 }, { "epoch": 1.4044489383215368, "grad_norm": 0.15612858533859253, "learning_rate": 0.0002477702726990372, "loss": 0.2826, "mean_token_accuracy": 0.9020431824028492, "num_tokens": 47391001.0, "step": 695 }, { "epoch": 1.4064711830131447, "grad_norm": 0.16640524566173553, "learning_rate": 0.000247382209514185, "loss": 0.2948, "mean_token_accuracy": 0.8942111246287823, "num_tokens": 47455737.0, "step": 696 }, { "epoch": 1.4084934277047523, "grad_norm": 0.16898459196090698, "learning_rate": 0.0002469940159997281, "loss": 0.2687, "mean_token_accuracy": 0.9056588634848595, "num_tokens": 47525615.0, "step": 697 }, { "epoch": 1.4105156723963599, "grad_norm": 0.18844769895076752, "learning_rate": 0.00024660569400332996, "loss": 0.2946, "mean_token_accuracy": 0.895747821778059, "num_tokens": 47592079.0, "step": 698 }, { "epoch": 1.4125379170879677, "grad_norm": 0.16074754297733307, "learning_rate": 0.00024621724537326545, "loss": 0.2831, "mean_token_accuracy": 0.9034741893410683, "num_tokens": 47667233.0, "step": 699 }, { "epoch": 1.4145601617795753, "grad_norm": 0.16710326075553894, "learning_rate": 0.00024582867195841227, "loss": 0.2863, "mean_token_accuracy": 0.9007730670273304, "num_tokens": 47743310.0, "step": 700 }, { "epoch": 1.416582406471183, "grad_norm": 0.18456129729747772, "learning_rate": 0.0002454399756082422, "loss": 0.2765, "mean_token_accuracy": 0.8989297412335873, "num_tokens": 47804656.0, "step": 701 }, { "epoch": 1.4186046511627908, "grad_norm": 0.14485791325569153, "learning_rate": 0.0002450511581728118, "loss": 0.2378, "mean_token_accuracy": 0.9135924205183983, "num_tokens": 47877505.0, "step": 702 }, { "epoch": 1.4206268958543984, "grad_norm": 0.16109082102775574, "learning_rate": 0.00024466222150275427, "loss": 0.2701, "mean_token_accuracy": 0.9057381004095078, "num_tokens": 47947797.0, "step": 703 }, { "epoch": 1.422649140546006, "grad_norm": 0.17397062480449677, "learning_rate": 0.00024427316744927015, "loss": 0.2748, "mean_token_accuracy": 0.9010849967598915, "num_tokens": 48013032.0, "step": 704 }, { "epoch": 1.4246713852376138, "grad_norm": 0.17228464782238007, "learning_rate": 0.0002438839978641188, "loss": 0.2902, "mean_token_accuracy": 0.8968134559690952, "num_tokens": 48077137.0, "step": 705 }, { "epoch": 1.4266936299292214, "grad_norm": 0.15708769857883453, "learning_rate": 0.00024349471459960933, "loss": 0.2639, "mean_token_accuracy": 0.9076020307838917, "num_tokens": 48148193.0, "step": 706 }, { "epoch": 1.428715874620829, "grad_norm": 0.16323234140872955, "learning_rate": 0.000243105319508592, "loss": 0.2767, "mean_token_accuracy": 0.9031167514622211, "num_tokens": 48216944.0, "step": 707 }, { "epoch": 1.4307381193124369, "grad_norm": 0.19718225300312042, "learning_rate": 0.00024271581444444936, "loss": 0.2857, "mean_token_accuracy": 0.8991989493370056, "num_tokens": 48289278.0, "step": 708 }, { "epoch": 1.4327603640040445, "grad_norm": 0.18652518093585968, "learning_rate": 0.0002423262012610874, "loss": 0.2761, "mean_token_accuracy": 0.8964316956698895, "num_tokens": 48356711.0, "step": 709 }, { "epoch": 1.434782608695652, "grad_norm": 0.15871575474739075, "learning_rate": 0.00024193648181292657, "loss": 0.2667, "mean_token_accuracy": 0.9004132300615311, "num_tokens": 48431698.0, "step": 710 }, { "epoch": 1.43680485338726, "grad_norm": 0.1658415049314499, "learning_rate": 0.00024154665795489324, "loss": 0.2923, "mean_token_accuracy": 0.8983742482960224, "num_tokens": 48499782.0, "step": 711 }, { "epoch": 1.4388270980788676, "grad_norm": 0.14790105819702148, "learning_rate": 0.00024115673154241082, "loss": 0.2752, "mean_token_accuracy": 0.9012794457376003, "num_tokens": 48575015.0, "step": 712 }, { "epoch": 1.4408493427704752, "grad_norm": 0.1578913778066635, "learning_rate": 0.00024076670443139056, "loss": 0.2717, "mean_token_accuracy": 0.9049608968198299, "num_tokens": 48645644.0, "step": 713 }, { "epoch": 1.442871587462083, "grad_norm": 0.14726778864860535, "learning_rate": 0.00024037657847822327, "loss": 0.2472, "mean_token_accuracy": 0.9099989496171474, "num_tokens": 48721939.0, "step": 714 }, { "epoch": 1.4448938321536906, "grad_norm": 0.1682555377483368, "learning_rate": 0.00023998635553977, "loss": 0.255, "mean_token_accuracy": 0.9088139645755291, "num_tokens": 48781700.0, "step": 715 }, { "epoch": 1.4469160768452982, "grad_norm": 0.1937257945537567, "learning_rate": 0.00023959603747335364, "loss": 0.2787, "mean_token_accuracy": 0.9022819362580776, "num_tokens": 48848209.0, "step": 716 }, { "epoch": 1.448938321536906, "grad_norm": 0.18163816630840302, "learning_rate": 0.0002392056261367497, "loss": 0.2603, "mean_token_accuracy": 0.9066541865468025, "num_tokens": 48908683.0, "step": 717 }, { "epoch": 1.4509605662285137, "grad_norm": 0.17626726627349854, "learning_rate": 0.00023881512338817763, "loss": 0.2719, "mean_token_accuracy": 0.9030824415385723, "num_tokens": 48971539.0, "step": 718 }, { "epoch": 1.4529828109201213, "grad_norm": 0.19325651228427887, "learning_rate": 0.00023842453108629207, "loss": 0.2825, "mean_token_accuracy": 0.9008334875106812, "num_tokens": 49036641.0, "step": 719 }, { "epoch": 1.4550050556117289, "grad_norm": 0.15112407505512238, "learning_rate": 0.00023803385109017375, "loss": 0.2491, "mean_token_accuracy": 0.908204834908247, "num_tokens": 49116609.0, "step": 720 }, { "epoch": 1.4570273003033367, "grad_norm": 0.1619442254304886, "learning_rate": 0.000237643085259321, "loss": 0.2674, "mean_token_accuracy": 0.9027148932218552, "num_tokens": 49184904.0, "step": 721 }, { "epoch": 1.4590495449949443, "grad_norm": 0.18082739412784576, "learning_rate": 0.00023725223545364036, "loss": 0.2897, "mean_token_accuracy": 0.8995592929422855, "num_tokens": 49242882.0, "step": 722 }, { "epoch": 1.4610717896865522, "grad_norm": 0.16797882318496704, "learning_rate": 0.00023686130353343842, "loss": 0.2752, "mean_token_accuracy": 0.9008001163601875, "num_tokens": 49314113.0, "step": 723 }, { "epoch": 1.4630940343781598, "grad_norm": 0.16804397106170654, "learning_rate": 0.00023647029135941247, "loss": 0.28, "mean_token_accuracy": 0.9004204832017422, "num_tokens": 49380492.0, "step": 724 }, { "epoch": 1.4651162790697674, "grad_norm": 0.189345121383667, "learning_rate": 0.00023607920079264164, "loss": 0.3136, "mean_token_accuracy": 0.8898900300264359, "num_tokens": 49442489.0, "step": 725 }, { "epoch": 1.467138523761375, "grad_norm": 0.1601288765668869, "learning_rate": 0.0002356880336945785, "loss": 0.2766, "mean_token_accuracy": 0.8993977271020412, "num_tokens": 49515310.0, "step": 726 }, { "epoch": 1.4691607684529828, "grad_norm": 0.16616767644882202, "learning_rate": 0.00023529679192703956, "loss": 0.2233, "mean_token_accuracy": 0.9060333073139191, "num_tokens": 49579141.0, "step": 727 }, { "epoch": 1.4711830131445904, "grad_norm": 0.17813973128795624, "learning_rate": 0.00023490547735219682, "loss": 0.2772, "mean_token_accuracy": 0.902538850903511, "num_tokens": 49651616.0, "step": 728 }, { "epoch": 1.4732052578361983, "grad_norm": 0.16227717697620392, "learning_rate": 0.0002345140918325689, "loss": 0.2725, "mean_token_accuracy": 0.9031726457178593, "num_tokens": 49723462.0, "step": 729 }, { "epoch": 1.4752275025278059, "grad_norm": 0.17003865540027618, "learning_rate": 0.00023412263723101214, "loss": 0.2961, "mean_token_accuracy": 0.8977791368961334, "num_tokens": 49787491.0, "step": 730 }, { "epoch": 1.4772497472194135, "grad_norm": 0.16923342645168304, "learning_rate": 0.0002337311154107115, "loss": 0.2787, "mean_token_accuracy": 0.9015961550176144, "num_tokens": 49854833.0, "step": 731 }, { "epoch": 1.479271991911021, "grad_norm": 0.1851927489042282, "learning_rate": 0.00023333952823517194, "loss": 0.2898, "mean_token_accuracy": 0.8972079865634441, "num_tokens": 49922341.0, "step": 732 }, { "epoch": 1.481294236602629, "grad_norm": 0.1822906881570816, "learning_rate": 0.0002329478775682095, "loss": 0.2829, "mean_token_accuracy": 0.900902509689331, "num_tokens": 49979729.0, "step": 733 }, { "epoch": 1.4833164812942365, "grad_norm": 0.1649109125137329, "learning_rate": 0.00023255616527394256, "loss": 0.2727, "mean_token_accuracy": 0.9016978107392788, "num_tokens": 50047775.0, "step": 734 }, { "epoch": 1.4853387259858444, "grad_norm": 0.1738775372505188, "learning_rate": 0.00023216439321678266, "loss": 0.281, "mean_token_accuracy": 0.9027018882334232, "num_tokens": 50118326.0, "step": 735 }, { "epoch": 1.487360970677452, "grad_norm": 0.1651855707168579, "learning_rate": 0.00023177256326142577, "loss": 0.2885, "mean_token_accuracy": 0.9000568836927414, "num_tokens": 50188336.0, "step": 736 }, { "epoch": 1.4893832153690596, "grad_norm": 0.17814993858337402, "learning_rate": 0.00023138067727284352, "loss": 0.2649, "mean_token_accuracy": 0.9053604751825333, "num_tokens": 50253602.0, "step": 737 }, { "epoch": 1.4914054600606672, "grad_norm": 0.18156695365905762, "learning_rate": 0.00023098873711627427, "loss": 0.2789, "mean_token_accuracy": 0.9026945792138577, "num_tokens": 50320254.0, "step": 738 }, { "epoch": 1.493427704752275, "grad_norm": 0.1529979407787323, "learning_rate": 0.00023059674465721402, "loss": 0.2575, "mean_token_accuracy": 0.9098235592246056, "num_tokens": 50394210.0, "step": 739 }, { "epoch": 1.4954499494438827, "grad_norm": 0.18546129763126373, "learning_rate": 0.000230204701761408, "loss": 0.2723, "mean_token_accuracy": 0.9047368690371513, "num_tokens": 50462482.0, "step": 740 }, { "epoch": 1.4974721941354905, "grad_norm": 0.17348864674568176, "learning_rate": 0.00022981261029484117, "loss": 0.2877, "mean_token_accuracy": 0.9010139890015125, "num_tokens": 50533752.0, "step": 741 }, { "epoch": 1.499494438827098, "grad_norm": 0.18445433676242828, "learning_rate": 0.00022942047212372996, "loss": 0.2889, "mean_token_accuracy": 0.8973320014774799, "num_tokens": 50595611.0, "step": 742 }, { "epoch": 1.5015166835187057, "grad_norm": 0.1771615445613861, "learning_rate": 0.00022902828911451284, "loss": 0.2869, "mean_token_accuracy": 0.9018849320709705, "num_tokens": 50660163.0, "step": 743 }, { "epoch": 1.5035389282103133, "grad_norm": 0.17673981189727783, "learning_rate": 0.00022863606313384193, "loss": 0.2745, "mean_token_accuracy": 0.9061728455126286, "num_tokens": 50735476.0, "step": 744 }, { "epoch": 1.5055611729019212, "grad_norm": 0.16728192567825317, "learning_rate": 0.00022824379604857376, "loss": 0.27, "mean_token_accuracy": 0.8988127410411835, "num_tokens": 50802788.0, "step": 745 }, { "epoch": 1.5075834175935288, "grad_norm": 0.15720367431640625, "learning_rate": 0.0002278514897257605, "loss": 0.2768, "mean_token_accuracy": 0.903729647397995, "num_tokens": 50871752.0, "step": 746 }, { "epoch": 1.5096056622851366, "grad_norm": 0.1581096202135086, "learning_rate": 0.00022745914603264114, "loss": 0.2782, "mean_token_accuracy": 0.9031247049570084, "num_tokens": 50946163.0, "step": 747 }, { "epoch": 1.5116279069767442, "grad_norm": 0.16542676091194153, "learning_rate": 0.00022706676683663239, "loss": 0.2615, "mean_token_accuracy": 0.9070020318031311, "num_tokens": 51020476.0, "step": 748 }, { "epoch": 1.5136501516683518, "grad_norm": 0.15188099443912506, "learning_rate": 0.00022667435400532013, "loss": 0.2683, "mean_token_accuracy": 0.9043072015047073, "num_tokens": 51099534.0, "step": 749 }, { "epoch": 1.5156723963599594, "grad_norm": 0.16521647572517395, "learning_rate": 0.00022628190940645023, "loss": 0.2762, "mean_token_accuracy": 0.9001554064452648, "num_tokens": 51160512.0, "step": 750 }, { "epoch": 1.5176946410515673, "grad_norm": 0.14251260459423065, "learning_rate": 0.00022588943490791974, "loss": 0.2354, "mean_token_accuracy": 0.9080785401165485, "num_tokens": 51240154.0, "step": 751 }, { "epoch": 1.5197168857431749, "grad_norm": 0.18312643468379974, "learning_rate": 0.00022549693237776812, "loss": 0.2882, "mean_token_accuracy": 0.896622322499752, "num_tokens": 51306825.0, "step": 752 }, { "epoch": 1.5217391304347827, "grad_norm": 0.1863006204366684, "learning_rate": 0.00022510440368416813, "loss": 0.2827, "mean_token_accuracy": 0.9015981592237949, "num_tokens": 51374019.0, "step": 753 }, { "epoch": 1.5237613751263903, "grad_norm": 0.2003999501466751, "learning_rate": 0.0002247118506954172, "loss": 0.2999, "mean_token_accuracy": 0.8948666267096996, "num_tokens": 51437280.0, "step": 754 }, { "epoch": 1.525783619817998, "grad_norm": 0.15196073055267334, "learning_rate": 0.00022431927527992822, "loss": 0.2457, "mean_token_accuracy": 0.9064719304442406, "num_tokens": 51516774.0, "step": 755 }, { "epoch": 1.5278058645096055, "grad_norm": 0.16066138446331024, "learning_rate": 0.00022392667930622105, "loss": 0.2567, "mean_token_accuracy": 0.9101277217268944, "num_tokens": 51587203.0, "step": 756 }, { "epoch": 1.5298281092012134, "grad_norm": 0.2019067108631134, "learning_rate": 0.0002235340646429131, "loss": 0.288, "mean_token_accuracy": 0.8997247666120529, "num_tokens": 51647601.0, "step": 757 }, { "epoch": 1.531850353892821, "grad_norm": 0.1630539447069168, "learning_rate": 0.00022314143315871107, "loss": 0.2839, "mean_token_accuracy": 0.9003589190542698, "num_tokens": 51709791.0, "step": 758 }, { "epoch": 1.5338725985844288, "grad_norm": 0.17768684029579163, "learning_rate": 0.0002227487867224014, "loss": 0.2953, "mean_token_accuracy": 0.8950943425297737, "num_tokens": 51775485.0, "step": 759 }, { "epoch": 1.5358948432760364, "grad_norm": 0.16720645129680634, "learning_rate": 0.000222356127202842, "loss": 0.268, "mean_token_accuracy": 0.9044617936015129, "num_tokens": 51840213.0, "step": 760 }, { "epoch": 1.537917087967644, "grad_norm": 0.18721389770507812, "learning_rate": 0.00022196345646895282, "loss": 0.3132, "mean_token_accuracy": 0.8925869949162006, "num_tokens": 51902000.0, "step": 761 }, { "epoch": 1.5399393326592516, "grad_norm": 0.16676832735538483, "learning_rate": 0.00022157077638970733, "loss": 0.2685, "mean_token_accuracy": 0.9057548753917217, "num_tokens": 51971547.0, "step": 762 }, { "epoch": 1.5419615773508595, "grad_norm": 0.17367734014987946, "learning_rate": 0.00022117808883412337, "loss": 0.2919, "mean_token_accuracy": 0.8966298326849937, "num_tokens": 52041743.0, "step": 763 }, { "epoch": 1.543983822042467, "grad_norm": 0.15831947326660156, "learning_rate": 0.0002207853956712544, "loss": 0.2713, "mean_token_accuracy": 0.9037296660244465, "num_tokens": 52114445.0, "step": 764 }, { "epoch": 1.546006066734075, "grad_norm": 0.1643955409526825, "learning_rate": 0.00022039269877018066, "loss": 0.2555, "mean_token_accuracy": 0.9053449369966984, "num_tokens": 52184749.0, "step": 765 }, { "epoch": 1.5480283114256825, "grad_norm": 0.19596439599990845, "learning_rate": 0.00022000000000000003, "loss": 0.2991, "mean_token_accuracy": 0.8983559235930443, "num_tokens": 52246858.0, "step": 766 }, { "epoch": 1.5500505561172901, "grad_norm": 0.17947359383106232, "learning_rate": 0.00021960730122981938, "loss": 0.3053, "mean_token_accuracy": 0.894125934690237, "num_tokens": 52311538.0, "step": 767 }, { "epoch": 1.5520728008088978, "grad_norm": 0.1566184163093567, "learning_rate": 0.00021921460432874565, "loss": 0.2471, "mean_token_accuracy": 0.9079805836081505, "num_tokens": 52377316.0, "step": 768 }, { "epoch": 1.5540950455005056, "grad_norm": 0.1782991886138916, "learning_rate": 0.0002188219111658767, "loss": 0.293, "mean_token_accuracy": 0.8960098177194595, "num_tokens": 52439738.0, "step": 769 }, { "epoch": 1.5561172901921132, "grad_norm": 0.1581069380044937, "learning_rate": 0.0002184292236102927, "loss": 0.2728, "mean_token_accuracy": 0.901589822024107, "num_tokens": 52511123.0, "step": 770 }, { "epoch": 1.558139534883721, "grad_norm": 0.16994433104991913, "learning_rate": 0.0002180365435310472, "loss": 0.2735, "mean_token_accuracy": 0.9033331945538521, "num_tokens": 52576097.0, "step": 771 }, { "epoch": 1.5601617795753286, "grad_norm": 0.1678851991891861, "learning_rate": 0.00021764387279715806, "loss": 0.2903, "mean_token_accuracy": 0.8981217853724957, "num_tokens": 52651544.0, "step": 772 }, { "epoch": 1.5621840242669363, "grad_norm": 0.19909563660621643, "learning_rate": 0.00021725121327759866, "loss": 0.2981, "mean_token_accuracy": 0.8940173611044884, "num_tokens": 52710252.0, "step": 773 }, { "epoch": 1.5642062689585439, "grad_norm": 0.15204082429409027, "learning_rate": 0.00021685856684128897, "loss": 0.2523, "mean_token_accuracy": 0.9075472876429558, "num_tokens": 52781084.0, "step": 774 }, { "epoch": 1.5662285136501517, "grad_norm": 0.19516132771968842, "learning_rate": 0.00021646593535708695, "loss": 0.2984, "mean_token_accuracy": 0.8923540487885475, "num_tokens": 52844889.0, "step": 775 }, { "epoch": 1.5682507583417593, "grad_norm": 0.16001375019550323, "learning_rate": 0.00021607332069377902, "loss": 0.2668, "mean_token_accuracy": 0.898894976824522, "num_tokens": 52910879.0, "step": 776 }, { "epoch": 1.5702730030333671, "grad_norm": 0.1823982298374176, "learning_rate": 0.00021568072472007185, "loss": 0.301, "mean_token_accuracy": 0.8939221948385239, "num_tokens": 52970597.0, "step": 777 }, { "epoch": 1.5722952477249748, "grad_norm": 0.17761389911174774, "learning_rate": 0.0002152881493045829, "loss": 0.2601, "mean_token_accuracy": 0.9084571748971939, "num_tokens": 53042768.0, "step": 778 }, { "epoch": 1.5743174924165824, "grad_norm": 0.17410063743591309, "learning_rate": 0.00021489559631583194, "loss": 0.272, "mean_token_accuracy": 0.8999650180339813, "num_tokens": 53103091.0, "step": 779 }, { "epoch": 1.57633973710819, "grad_norm": 0.15084944665431976, "learning_rate": 0.00021450306762223198, "loss": 0.2387, "mean_token_accuracy": 0.9114542976021767, "num_tokens": 53180173.0, "step": 780 }, { "epoch": 1.5783619817997978, "grad_norm": 0.18222583830356598, "learning_rate": 0.00021411056509208033, "loss": 0.2994, "mean_token_accuracy": 0.8931626752018929, "num_tokens": 53243670.0, "step": 781 }, { "epoch": 1.5803842264914054, "grad_norm": 0.19381971657276154, "learning_rate": 0.0002137180905935499, "loss": 0.3116, "mean_token_accuracy": 0.8938373290002346, "num_tokens": 53301834.0, "step": 782 }, { "epoch": 1.5824064711830133, "grad_norm": 0.157192200422287, "learning_rate": 0.00021332564599467997, "loss": 0.2654, "mean_token_accuracy": 0.9061449654400349, "num_tokens": 53368342.0, "step": 783 }, { "epoch": 1.5844287158746209, "grad_norm": 0.1753574013710022, "learning_rate": 0.00021293323316336774, "loss": 0.2793, "mean_token_accuracy": 0.901081707328558, "num_tokens": 53430874.0, "step": 784 }, { "epoch": 1.5864509605662285, "grad_norm": 0.17201204597949982, "learning_rate": 0.00021254085396735895, "loss": 0.2965, "mean_token_accuracy": 0.8940661884844303, "num_tokens": 53503559.0, "step": 785 }, { "epoch": 1.588473205257836, "grad_norm": 0.16792644560337067, "learning_rate": 0.00021214851027423953, "loss": 0.2853, "mean_token_accuracy": 0.8981418162584305, "num_tokens": 53570685.0, "step": 786 }, { "epoch": 1.590495449949444, "grad_norm": 0.1627027541399002, "learning_rate": 0.00021175620395142631, "loss": 0.2726, "mean_token_accuracy": 0.9035519734025002, "num_tokens": 53641626.0, "step": 787 }, { "epoch": 1.5925176946410515, "grad_norm": 0.1961667835712433, "learning_rate": 0.00021136393686615814, "loss": 0.2932, "mean_token_accuracy": 0.8953234739601612, "num_tokens": 53703211.0, "step": 788 }, { "epoch": 1.5945399393326594, "grad_norm": 0.16637316346168518, "learning_rate": 0.00021097171088548718, "loss": 0.2643, "mean_token_accuracy": 0.8997809141874313, "num_tokens": 53774031.0, "step": 789 }, { "epoch": 1.596562184024267, "grad_norm": 0.16356298327445984, "learning_rate": 0.0002105795278762701, "loss": 0.2812, "mean_token_accuracy": 0.9001871235668659, "num_tokens": 53842430.0, "step": 790 }, { "epoch": 1.5985844287158746, "grad_norm": 0.15379726886749268, "learning_rate": 0.00021018738970515885, "loss": 0.2753, "mean_token_accuracy": 0.9013938829302788, "num_tokens": 53918815.0, "step": 791 }, { "epoch": 1.6006066734074822, "grad_norm": 0.17770731449127197, "learning_rate": 0.000209795298238592, "loss": 0.2775, "mean_token_accuracy": 0.9020545892417431, "num_tokens": 53984486.0, "step": 792 }, { "epoch": 1.60262891809909, "grad_norm": 0.18510940670967102, "learning_rate": 0.00020940325534278596, "loss": 0.3084, "mean_token_accuracy": 0.8904885537922382, "num_tokens": 54049329.0, "step": 793 }, { "epoch": 1.6046511627906976, "grad_norm": 0.15585996210575104, "learning_rate": 0.00020901126288372574, "loss": 0.243, "mean_token_accuracy": 0.9050154872238636, "num_tokens": 54121377.0, "step": 794 }, { "epoch": 1.6066734074823055, "grad_norm": 0.2045961320400238, "learning_rate": 0.0002086193227271565, "loss": 0.3026, "mean_token_accuracy": 0.8903013169765472, "num_tokens": 54181133.0, "step": 795 }, { "epoch": 1.608695652173913, "grad_norm": 0.15141364932060242, "learning_rate": 0.00020822743673857424, "loss": 0.2622, "mean_token_accuracy": 0.90499372407794, "num_tokens": 54263363.0, "step": 796 }, { "epoch": 1.6107178968655207, "grad_norm": 0.15345874428749084, "learning_rate": 0.0002078356067832174, "loss": 0.2547, "mean_token_accuracy": 0.904791995882988, "num_tokens": 54334487.0, "step": 797 }, { "epoch": 1.6127401415571283, "grad_norm": 0.16000673174858093, "learning_rate": 0.00020744383472605745, "loss": 0.2731, "mean_token_accuracy": 0.9041004255414009, "num_tokens": 54403142.0, "step": 798 }, { "epoch": 1.6147623862487361, "grad_norm": 0.16879165172576904, "learning_rate": 0.0002070521224317905, "loss": 0.2736, "mean_token_accuracy": 0.9043679311871529, "num_tokens": 54467728.0, "step": 799 }, { "epoch": 1.6167846309403437, "grad_norm": 0.16487041115760803, "learning_rate": 0.00020666047176482816, "loss": 0.2929, "mean_token_accuracy": 0.8993552401661873, "num_tokens": 54537407.0, "step": 800 }, { "epoch": 1.6188068756319516, "grad_norm": 0.17032210528850555, "learning_rate": 0.00020626888458928858, "loss": 0.2799, "mean_token_accuracy": 0.8998575955629349, "num_tokens": 54599784.0, "step": 801 }, { "epoch": 1.6208291203235592, "grad_norm": 0.17334811389446259, "learning_rate": 0.00020587736276898798, "loss": 0.2606, "mean_token_accuracy": 0.901070773601532, "num_tokens": 54663420.0, "step": 802 }, { "epoch": 1.6228513650151668, "grad_norm": 0.1741548776626587, "learning_rate": 0.00020548590816743108, "loss": 0.2823, "mean_token_accuracy": 0.8988193459808826, "num_tokens": 54727696.0, "step": 803 }, { "epoch": 1.6248736097067744, "grad_norm": 0.1664174348115921, "learning_rate": 0.00020509452264780325, "loss": 0.2643, "mean_token_accuracy": 0.9047059267759323, "num_tokens": 54791606.0, "step": 804 }, { "epoch": 1.6268958543983822, "grad_norm": 0.14816100895404816, "learning_rate": 0.0002047032080729605, "loss": 0.2449, "mean_token_accuracy": 0.903932623565197, "num_tokens": 54865039.0, "step": 805 }, { "epoch": 1.6289180990899899, "grad_norm": 0.12588512897491455, "learning_rate": 0.00020431196630542152, "loss": 0.227, "mean_token_accuracy": 0.9160388633608818, "num_tokens": 54958620.0, "step": 806 }, { "epoch": 1.6309403437815977, "grad_norm": 0.17510341107845306, "learning_rate": 0.00020392079920735835, "loss": 0.2963, "mean_token_accuracy": 0.8962272480130196, "num_tokens": 55024008.0, "step": 807 }, { "epoch": 1.6329625884732053, "grad_norm": 0.1542372852563858, "learning_rate": 0.00020352970864058757, "loss": 0.2614, "mean_token_accuracy": 0.9044002443552017, "num_tokens": 55087163.0, "step": 808 }, { "epoch": 1.634984833164813, "grad_norm": 0.16116388142108917, "learning_rate": 0.00020313869646656162, "loss": 0.2721, "mean_token_accuracy": 0.9043215177953243, "num_tokens": 55154699.0, "step": 809 }, { "epoch": 1.6370070778564205, "grad_norm": 0.1448214203119278, "learning_rate": 0.0002027477645463597, "loss": 0.2598, "mean_token_accuracy": 0.9074460677802563, "num_tokens": 55224995.0, "step": 810 }, { "epoch": 1.6390293225480284, "grad_norm": 0.17575567960739136, "learning_rate": 0.00020235691474067912, "loss": 0.2647, "mean_token_accuracy": 0.9028755128383636, "num_tokens": 55291465.0, "step": 811 }, { "epoch": 1.641051567239636, "grad_norm": 0.1718558669090271, "learning_rate": 0.0002019661489098263, "loss": 0.2658, "mean_token_accuracy": 0.9058180525898933, "num_tokens": 55356793.0, "step": 812 }, { "epoch": 1.6430738119312438, "grad_norm": 0.16898474097251892, "learning_rate": 0.00020157546891370797, "loss": 0.2868, "mean_token_accuracy": 0.9008054211735725, "num_tokens": 55428748.0, "step": 813 }, { "epoch": 1.6450960566228514, "grad_norm": 0.1628302037715912, "learning_rate": 0.00020118487661182241, "loss": 0.2667, "mean_token_accuracy": 0.9067884795367718, "num_tokens": 55499100.0, "step": 814 }, { "epoch": 1.647118301314459, "grad_norm": 0.18391703069210052, "learning_rate": 0.00020079437386325032, "loss": 0.3138, "mean_token_accuracy": 0.8893741592764854, "num_tokens": 55570834.0, "step": 815 }, { "epoch": 1.6491405460060666, "grad_norm": 0.17336952686309814, "learning_rate": 0.00020040396252664642, "loss": 0.2778, "mean_token_accuracy": 0.9026199728250504, "num_tokens": 55640251.0, "step": 816 }, { "epoch": 1.6511627906976745, "grad_norm": 0.14611810445785522, "learning_rate": 0.00020001364446023002, "loss": 0.2433, "mean_token_accuracy": 0.9094121158123016, "num_tokens": 55722284.0, "step": 817 }, { "epoch": 1.653185035389282, "grad_norm": 0.15244677662849426, "learning_rate": 0.0001996234215217768, "loss": 0.2825, "mean_token_accuracy": 0.8958746008574963, "num_tokens": 55792901.0, "step": 818 }, { "epoch": 1.65520728008089, "grad_norm": 0.17220915853977203, "learning_rate": 0.00019923329556860954, "loss": 0.2887, "mean_token_accuracy": 0.8924598507583141, "num_tokens": 55854966.0, "step": 819 }, { "epoch": 1.6572295247724975, "grad_norm": 0.1598389595746994, "learning_rate": 0.00019884326845758925, "loss": 0.2718, "mean_token_accuracy": 0.9044957980513573, "num_tokens": 55929575.0, "step": 820 }, { "epoch": 1.6592517694641051, "grad_norm": 0.1721997857093811, "learning_rate": 0.0001984533420451068, "loss": 0.2944, "mean_token_accuracy": 0.8970884680747986, "num_tokens": 55997255.0, "step": 821 }, { "epoch": 1.6612740141557127, "grad_norm": 0.184437558054924, "learning_rate": 0.0001980635181870735, "loss": 0.2996, "mean_token_accuracy": 0.8938624709844589, "num_tokens": 56059407.0, "step": 822 }, { "epoch": 1.6632962588473206, "grad_norm": 0.17128629982471466, "learning_rate": 0.0001976737987389127, "loss": 0.2999, "mean_token_accuracy": 0.892108865082264, "num_tokens": 56124709.0, "step": 823 }, { "epoch": 1.6653185035389282, "grad_norm": 0.18039193749427795, "learning_rate": 0.00019728418555555068, "loss": 0.248, "mean_token_accuracy": 0.8995288237929344, "num_tokens": 56191016.0, "step": 824 }, { "epoch": 1.667340748230536, "grad_norm": 0.16719485819339752, "learning_rate": 0.00019689468049140802, "loss": 0.2673, "mean_token_accuracy": 0.9037236869335175, "num_tokens": 56259334.0, "step": 825 }, { "epoch": 1.6693629929221436, "grad_norm": 0.16847628355026245, "learning_rate": 0.00019650528540039077, "loss": 0.266, "mean_token_accuracy": 0.9061383940279484, "num_tokens": 56326477.0, "step": 826 }, { "epoch": 1.6713852376137512, "grad_norm": 0.17857936024665833, "learning_rate": 0.00019611600213588127, "loss": 0.3023, "mean_token_accuracy": 0.8900899365544319, "num_tokens": 56386327.0, "step": 827 }, { "epoch": 1.6734074823053589, "grad_norm": 0.18187786638736725, "learning_rate": 0.0001957268325507299, "loss": 0.3001, "mean_token_accuracy": 0.8927515000104904, "num_tokens": 56446400.0, "step": 828 }, { "epoch": 1.6754297269969667, "grad_norm": 0.15920601785182953, "learning_rate": 0.0001953377784972458, "loss": 0.2834, "mean_token_accuracy": 0.8984440118074417, "num_tokens": 56516627.0, "step": 829 }, { "epoch": 1.6774519716885743, "grad_norm": 0.16971920430660248, "learning_rate": 0.00019494884182718827, "loss": 0.2845, "mean_token_accuracy": 0.8991547487676144, "num_tokens": 56586404.0, "step": 830 }, { "epoch": 1.6794742163801821, "grad_norm": 0.16059236228466034, "learning_rate": 0.00019456002439175794, "loss": 0.2658, "mean_token_accuracy": 0.9038873426616192, "num_tokens": 56657253.0, "step": 831 }, { "epoch": 1.6814964610717897, "grad_norm": 0.16817672550678253, "learning_rate": 0.00019417132804158777, "loss": 0.2825, "mean_token_accuracy": 0.8981058970093727, "num_tokens": 56725926.0, "step": 832 }, { "epoch": 1.6835187057633973, "grad_norm": 0.15651072561740875, "learning_rate": 0.00019378275462673464, "loss": 0.2683, "mean_token_accuracy": 0.9055442661046982, "num_tokens": 56794928.0, "step": 833 }, { "epoch": 1.685540950455005, "grad_norm": 0.16662436723709106, "learning_rate": 0.00019339430599667009, "loss": 0.2795, "mean_token_accuracy": 0.9005163908004761, "num_tokens": 56861202.0, "step": 834 }, { "epoch": 1.6875631951466128, "grad_norm": 0.15520507097244263, "learning_rate": 0.0001930059840002719, "loss": 0.2789, "mean_token_accuracy": 0.9018525704741478, "num_tokens": 56940546.0, "step": 835 }, { "epoch": 1.6895854398382204, "grad_norm": 0.16705678403377533, "learning_rate": 0.00019261779048581498, "loss": 0.2817, "mean_token_accuracy": 0.9004562273621559, "num_tokens": 57010510.0, "step": 836 }, { "epoch": 1.6916076845298282, "grad_norm": 0.17928999662399292, "learning_rate": 0.00019222972730096281, "loss": 0.2898, "mean_token_accuracy": 0.8954050242900848, "num_tokens": 57076063.0, "step": 837 }, { "epoch": 1.6936299292214358, "grad_norm": 0.17176282405853271, "learning_rate": 0.00019184179629275842, "loss": 0.2784, "mean_token_accuracy": 0.9002024792134762, "num_tokens": 57139142.0, "step": 838 }, { "epoch": 1.6956521739130435, "grad_norm": 0.2011646181344986, "learning_rate": 0.00019145399930761592, "loss": 0.33, "mean_token_accuracy": 0.8854256272315979, "num_tokens": 57196478.0, "step": 839 }, { "epoch": 1.697674418604651, "grad_norm": 0.1873674988746643, "learning_rate": 0.00019106633819131132, "loss": 0.2956, "mean_token_accuracy": 0.8927418142557144, "num_tokens": 57257834.0, "step": 840 }, { "epoch": 1.699696663296259, "grad_norm": 0.15767039358615875, "learning_rate": 0.00019067881478897406, "loss": 0.2603, "mean_token_accuracy": 0.8997323326766491, "num_tokens": 57332979.0, "step": 841 }, { "epoch": 1.7017189079878665, "grad_norm": 0.1793752908706665, "learning_rate": 0.00019029143094507803, "loss": 0.2914, "mean_token_accuracy": 0.8960652127861977, "num_tokens": 57393796.0, "step": 842 }, { "epoch": 1.7037411526794743, "grad_norm": 0.17136353254318237, "learning_rate": 0.00018990418850343299, "loss": 0.2831, "mean_token_accuracy": 0.8985873088240623, "num_tokens": 57461020.0, "step": 843 }, { "epoch": 1.705763397371082, "grad_norm": 0.15511257946491241, "learning_rate": 0.0001895170893071756, "loss": 0.2648, "mean_token_accuracy": 0.9044994860887527, "num_tokens": 57534185.0, "step": 844 }, { "epoch": 1.7077856420626896, "grad_norm": 0.16191929578781128, "learning_rate": 0.00018913013519876066, "loss": 0.2689, "mean_token_accuracy": 0.9016621857881546, "num_tokens": 57602240.0, "step": 845 }, { "epoch": 1.7098078867542972, "grad_norm": 0.17678587138652802, "learning_rate": 0.00018874332801995257, "loss": 0.2791, "mean_token_accuracy": 0.9009885340929031, "num_tokens": 57665999.0, "step": 846 }, { "epoch": 1.7118301314459048, "grad_norm": 0.14890553057193756, "learning_rate": 0.0001883566696118162, "loss": 0.2469, "mean_token_accuracy": 0.9083396308124065, "num_tokens": 57745250.0, "step": 847 }, { "epoch": 1.7138523761375126, "grad_norm": 0.16399073600769043, "learning_rate": 0.00018797016181470856, "loss": 0.2699, "mean_token_accuracy": 0.9010614044964314, "num_tokens": 57820665.0, "step": 848 }, { "epoch": 1.7158746208291205, "grad_norm": 0.17773596942424774, "learning_rate": 0.00018758380646826943, "loss": 0.2801, "mean_token_accuracy": 0.9002369157969952, "num_tokens": 57882848.0, "step": 849 }, { "epoch": 1.717896865520728, "grad_norm": 0.17527812719345093, "learning_rate": 0.00018719760541141347, "loss": 0.285, "mean_token_accuracy": 0.8988419659435749, "num_tokens": 57956449.0, "step": 850 }, { "epoch": 1.7199191102123357, "grad_norm": 0.17075812816619873, "learning_rate": 0.00018681156048232063, "loss": 0.2797, "mean_token_accuracy": 0.8975733481347561, "num_tokens": 58019620.0, "step": 851 }, { "epoch": 1.7219413549039433, "grad_norm": 0.162892147898674, "learning_rate": 0.00018642567351842776, "loss": 0.3048, "mean_token_accuracy": 0.8936393298208714, "num_tokens": 58084770.0, "step": 852 }, { "epoch": 1.723963599595551, "grad_norm": 0.1569058746099472, "learning_rate": 0.0001860399463564201, "loss": 0.2779, "mean_token_accuracy": 0.90330421179533, "num_tokens": 58149930.0, "step": 853 }, { "epoch": 1.7259858442871587, "grad_norm": 0.15333376824855804, "learning_rate": 0.00018565438083222193, "loss": 0.2431, "mean_token_accuracy": 0.9056979790329933, "num_tokens": 58218764.0, "step": 854 }, { "epoch": 1.7280080889787666, "grad_norm": 0.18095627427101135, "learning_rate": 0.00018526897878098857, "loss": 0.2914, "mean_token_accuracy": 0.8964138776063919, "num_tokens": 58280108.0, "step": 855 }, { "epoch": 1.7300303336703742, "grad_norm": 0.17549115419387817, "learning_rate": 0.00018488374203709694, "loss": 0.2715, "mean_token_accuracy": 0.9019583091139793, "num_tokens": 58349603.0, "step": 856 }, { "epoch": 1.7320525783619818, "grad_norm": 0.14637798070907593, "learning_rate": 0.00018449867243413732, "loss": 0.2479, "mean_token_accuracy": 0.9110586978495121, "num_tokens": 58423158.0, "step": 857 }, { "epoch": 1.7340748230535894, "grad_norm": 0.18153415620326996, "learning_rate": 0.00018411377180490454, "loss": 0.2838, "mean_token_accuracy": 0.8981715328991413, "num_tokens": 58489878.0, "step": 858 }, { "epoch": 1.736097067745197, "grad_norm": 0.14081305265426636, "learning_rate": 0.00018372904198138895, "loss": 0.2421, "mean_token_accuracy": 0.9120564199984074, "num_tokens": 58567119.0, "step": 859 }, { "epoch": 1.7381193124368048, "grad_norm": 0.19423925876617432, "learning_rate": 0.0001833444847947681, "loss": 0.2827, "mean_token_accuracy": 0.8960412628948689, "num_tokens": 58629512.0, "step": 860 }, { "epoch": 1.7401415571284127, "grad_norm": 0.1835591346025467, "learning_rate": 0.00018296010207539775, "loss": 0.3066, "mean_token_accuracy": 0.8935861364006996, "num_tokens": 58692056.0, "step": 861 }, { "epoch": 1.7421638018200203, "grad_norm": 0.17017914354801178, "learning_rate": 0.00018257589565280337, "loss": 0.2839, "mean_token_accuracy": 0.8955631256103516, "num_tokens": 58757823.0, "step": 862 }, { "epoch": 1.744186046511628, "grad_norm": 0.17654229700565338, "learning_rate": 0.0001821918673556712, "loss": 0.2856, "mean_token_accuracy": 0.9003425352275372, "num_tokens": 58820185.0, "step": 863 }, { "epoch": 1.7462082912032355, "grad_norm": 0.18433596193790436, "learning_rate": 0.00018180801901183967, "loss": 0.276, "mean_token_accuracy": 0.9058196842670441, "num_tokens": 58888573.0, "step": 864 }, { "epoch": 1.7482305358948431, "grad_norm": 0.16942624747753143, "learning_rate": 0.0001814243524482909, "loss": 0.2676, "mean_token_accuracy": 0.9044988267123699, "num_tokens": 58953010.0, "step": 865 }, { "epoch": 1.750252780586451, "grad_norm": 0.1317698061466217, "learning_rate": 0.0001810408694911415, "loss": 0.2423, "mean_token_accuracy": 0.9077907241880894, "num_tokens": 59032037.0, "step": 866 }, { "epoch": 1.7522750252780588, "grad_norm": 0.17676536738872528, "learning_rate": 0.00018065757196563444, "loss": 0.2834, "mean_token_accuracy": 0.90084283426404, "num_tokens": 59102101.0, "step": 867 }, { "epoch": 1.7542972699696664, "grad_norm": 0.16460995376110077, "learning_rate": 0.00018027446169612983, "loss": 0.266, "mean_token_accuracy": 0.8994225487112999, "num_tokens": 59175507.0, "step": 868 }, { "epoch": 1.756319514661274, "grad_norm": 0.15954379737377167, "learning_rate": 0.0001798915405060968, "loss": 0.2613, "mean_token_accuracy": 0.9075300879776478, "num_tokens": 59241915.0, "step": 869 }, { "epoch": 1.7583417593528816, "grad_norm": 0.17243851721286774, "learning_rate": 0.00017950881021810435, "loss": 0.2653, "mean_token_accuracy": 0.9034992009401321, "num_tokens": 59305436.0, "step": 870 }, { "epoch": 1.7603640040444892, "grad_norm": 0.17760290205478668, "learning_rate": 0.00017912627265381285, "loss": 0.2885, "mean_token_accuracy": 0.8978960253298283, "num_tokens": 59370395.0, "step": 871 }, { "epoch": 1.762386248736097, "grad_norm": 0.17663156986236572, "learning_rate": 0.00017874392963396552, "loss": 0.2931, "mean_token_accuracy": 0.8978605940937996, "num_tokens": 59435634.0, "step": 872 }, { "epoch": 1.764408493427705, "grad_norm": 0.17674268782138824, "learning_rate": 0.00017836178297837938, "loss": 0.2717, "mean_token_accuracy": 0.9010186977684498, "num_tokens": 59500074.0, "step": 873 }, { "epoch": 1.7664307381193125, "grad_norm": 0.16433486342430115, "learning_rate": 0.0001779798345059371, "loss": 0.2598, "mean_token_accuracy": 0.9028237722814083, "num_tokens": 59574564.0, "step": 874 }, { "epoch": 1.7684529828109201, "grad_norm": 0.15955936908721924, "learning_rate": 0.0001775980860345778, "loss": 0.2677, "mean_token_accuracy": 0.9025723747909069, "num_tokens": 59650978.0, "step": 875 }, { "epoch": 1.7704752275025277, "grad_norm": 0.1750318855047226, "learning_rate": 0.00017721653938128888, "loss": 0.2866, "mean_token_accuracy": 0.8999117016792297, "num_tokens": 59714437.0, "step": 876 }, { "epoch": 1.7724974721941353, "grad_norm": 0.15407449007034302, "learning_rate": 0.00017683519636209707, "loss": 0.2586, "mean_token_accuracy": 0.9031764194369316, "num_tokens": 59795096.0, "step": 877 }, { "epoch": 1.7745197168857432, "grad_norm": 0.16260726749897003, "learning_rate": 0.00017645405879205983, "loss": 0.275, "mean_token_accuracy": 0.9040297567844391, "num_tokens": 59862394.0, "step": 878 }, { "epoch": 1.776541961577351, "grad_norm": 0.16649970412254333, "learning_rate": 0.0001760731284852568, "loss": 0.278, "mean_token_accuracy": 0.8974411375820637, "num_tokens": 59932031.0, "step": 879 }, { "epoch": 1.7785642062689586, "grad_norm": 0.1494332104921341, "learning_rate": 0.0001756924072547813, "loss": 0.2579, "mean_token_accuracy": 0.905670553445816, "num_tokens": 60011025.0, "step": 880 }, { "epoch": 1.7805864509605662, "grad_norm": 0.18167705833911896, "learning_rate": 0.00017531189691273106, "loss": 0.2776, "mean_token_accuracy": 0.8976808004081249, "num_tokens": 60068820.0, "step": 881 }, { "epoch": 1.7826086956521738, "grad_norm": 0.16186164319515228, "learning_rate": 0.00017493159927020054, "loss": 0.2811, "mean_token_accuracy": 0.9016175977885723, "num_tokens": 60130140.0, "step": 882 }, { "epoch": 1.7846309403437814, "grad_norm": 0.17380307614803314, "learning_rate": 0.0001745515161372716, "loss": 0.2945, "mean_token_accuracy": 0.8955324217677116, "num_tokens": 60193073.0, "step": 883 }, { "epoch": 1.7866531850353893, "grad_norm": 0.17945754528045654, "learning_rate": 0.00017417164932300502, "loss": 0.2722, "mean_token_accuracy": 0.8948768936097622, "num_tokens": 60255959.0, "step": 884 }, { "epoch": 1.7886754297269971, "grad_norm": 0.16201643645763397, "learning_rate": 0.00017379200063543225, "loss": 0.2761, "mean_token_accuracy": 0.8984379507601261, "num_tokens": 60331653.0, "step": 885 }, { "epoch": 1.7906976744186047, "grad_norm": 0.17004264891147614, "learning_rate": 0.00017341257188154625, "loss": 0.2785, "mean_token_accuracy": 0.902726124972105, "num_tokens": 60397891.0, "step": 886 }, { "epoch": 1.7927199191102123, "grad_norm": 0.17423401772975922, "learning_rate": 0.0001730333648672934, "loss": 0.2663, "mean_token_accuracy": 0.9040607661008835, "num_tokens": 60463271.0, "step": 887 }, { "epoch": 1.79474216380182, "grad_norm": 0.17113754153251648, "learning_rate": 0.00017265438139756455, "loss": 0.2754, "mean_token_accuracy": 0.901301734149456, "num_tokens": 60527757.0, "step": 888 }, { "epoch": 1.7967644084934276, "grad_norm": 0.1624325066804886, "learning_rate": 0.00017227562327618655, "loss": 0.264, "mean_token_accuracy": 0.8982259891927242, "num_tokens": 60590938.0, "step": 889 }, { "epoch": 1.7987866531850354, "grad_norm": 0.17364652454853058, "learning_rate": 0.00017189709230591376, "loss": 0.2768, "mean_token_accuracy": 0.8977219946682453, "num_tokens": 60666355.0, "step": 890 }, { "epoch": 1.8008088978766432, "grad_norm": 0.15866470336914062, "learning_rate": 0.00017151879028841935, "loss": 0.2556, "mean_token_accuracy": 0.9094675220549107, "num_tokens": 60734511.0, "step": 891 }, { "epoch": 1.8028311425682508, "grad_norm": 0.16959354281425476, "learning_rate": 0.0001711407190242867, "loss": 0.2827, "mean_token_accuracy": 0.9035419821739197, "num_tokens": 60800072.0, "step": 892 }, { "epoch": 1.8048533872598584, "grad_norm": 0.13593734800815582, "learning_rate": 0.00017076288031300086, "loss": 0.2233, "mean_token_accuracy": 0.9116230644285679, "num_tokens": 60877569.0, "step": 893 }, { "epoch": 1.806875631951466, "grad_norm": 0.159558966755867, "learning_rate": 0.00017038527595294016, "loss": 0.2713, "mean_token_accuracy": 0.9025290682911873, "num_tokens": 60946273.0, "step": 894 }, { "epoch": 1.8088978766430737, "grad_norm": 0.14993025362491608, "learning_rate": 0.00017000790774136744, "loss": 0.2563, "mean_token_accuracy": 0.906671367585659, "num_tokens": 61021490.0, "step": 895 }, { "epoch": 1.8109201213346815, "grad_norm": 0.16624176502227783, "learning_rate": 0.00016963077747442147, "loss": 0.285, "mean_token_accuracy": 0.9001613892614841, "num_tokens": 61087077.0, "step": 896 }, { "epoch": 1.8129423660262893, "grad_norm": 0.18598856031894684, "learning_rate": 0.00016925388694710857, "loss": 0.2816, "mean_token_accuracy": 0.8991341292858124, "num_tokens": 61155366.0, "step": 897 }, { "epoch": 1.814964610717897, "grad_norm": 0.15858127176761627, "learning_rate": 0.00016887723795329395, "loss": 0.259, "mean_token_accuracy": 0.9013683348894119, "num_tokens": 61227279.0, "step": 898 }, { "epoch": 1.8169868554095046, "grad_norm": 0.17217408120632172, "learning_rate": 0.00016850083228569327, "loss": 0.3001, "mean_token_accuracy": 0.8970577903091908, "num_tokens": 61294506.0, "step": 899 }, { "epoch": 1.8190091001011122, "grad_norm": 0.15035738050937653, "learning_rate": 0.00016812467173586395, "loss": 0.2645, "mean_token_accuracy": 0.9000033251941204, "num_tokens": 61365391.0, "step": 900 }, { "epoch": 1.8210313447927198, "grad_norm": 0.17095452547073364, "learning_rate": 0.0001677487580941968, "loss": 0.2723, "mean_token_accuracy": 0.9036833345890045, "num_tokens": 61430318.0, "step": 901 }, { "epoch": 1.8230535894843276, "grad_norm": 0.18995128571987152, "learning_rate": 0.00016737309314990742, "loss": 0.2963, "mean_token_accuracy": 0.897097785025835, "num_tokens": 61490667.0, "step": 902 }, { "epoch": 1.8250758341759354, "grad_norm": 0.15838812291622162, "learning_rate": 0.00016699767869102767, "loss": 0.2597, "mean_token_accuracy": 0.9020838551223278, "num_tokens": 61566103.0, "step": 903 }, { "epoch": 1.827098078867543, "grad_norm": 0.17972201108932495, "learning_rate": 0.00016662251650439725, "loss": 0.2853, "mean_token_accuracy": 0.899272233247757, "num_tokens": 61628595.0, "step": 904 }, { "epoch": 1.8291203235591507, "grad_norm": 0.1463383138179779, "learning_rate": 0.0001662476083756551, "loss": 0.26, "mean_token_accuracy": 0.9080706797540188, "num_tokens": 61703786.0, "step": 905 }, { "epoch": 1.8311425682507583, "grad_norm": 0.16255010664463043, "learning_rate": 0.00016587295608923088, "loss": 0.2805, "mean_token_accuracy": 0.9013442508876324, "num_tokens": 61776819.0, "step": 906 }, { "epoch": 1.8331648129423659, "grad_norm": 0.17431674897670746, "learning_rate": 0.0001654985614283366, "loss": 0.2877, "mean_token_accuracy": 0.9047906063497066, "num_tokens": 61846922.0, "step": 907 }, { "epoch": 1.8351870576339737, "grad_norm": 0.1731417030096054, "learning_rate": 0.00016512442617495804, "loss": 0.2809, "mean_token_accuracy": 0.8943095356225967, "num_tokens": 61913305.0, "step": 908 }, { "epoch": 1.8372093023255816, "grad_norm": 0.17473085224628448, "learning_rate": 0.00016475055210984641, "loss": 0.2765, "mean_token_accuracy": 0.9039146527647972, "num_tokens": 61974613.0, "step": 909 }, { "epoch": 1.8392315470171892, "grad_norm": 0.1697629690170288, "learning_rate": 0.00016437694101250952, "loss": 0.2672, "mean_token_accuracy": 0.9050569906830788, "num_tokens": 62042608.0, "step": 910 }, { "epoch": 1.8412537917087968, "grad_norm": 0.1614944189786911, "learning_rate": 0.00016400359466120366, "loss": 0.2737, "mean_token_accuracy": 0.9029634855687618, "num_tokens": 62112444.0, "step": 911 }, { "epoch": 1.8432760364004044, "grad_norm": 0.17687106132507324, "learning_rate": 0.00016363051483292513, "loss": 0.2648, "mean_token_accuracy": 0.9044081643223763, "num_tokens": 62181562.0, "step": 912 }, { "epoch": 1.845298281092012, "grad_norm": 0.1807907372713089, "learning_rate": 0.0001632577033034015, "loss": 0.274, "mean_token_accuracy": 0.9006009586155415, "num_tokens": 62245198.0, "step": 913 }, { "epoch": 1.8473205257836198, "grad_norm": 0.19810381531715393, "learning_rate": 0.00016288516184708346, "loss": 0.2893, "mean_token_accuracy": 0.8972717076539993, "num_tokens": 62308012.0, "step": 914 }, { "epoch": 1.8493427704752277, "grad_norm": 0.15699312090873718, "learning_rate": 0.00016251289223713616, "loss": 0.2744, "mean_token_accuracy": 0.9030490145087242, "num_tokens": 62379728.0, "step": 915 }, { "epoch": 1.8513650151668353, "grad_norm": 0.1709468960762024, "learning_rate": 0.000162140896245431, "loss": 0.2471, "mean_token_accuracy": 0.9071713648736477, "num_tokens": 62446402.0, "step": 916 }, { "epoch": 1.8533872598584429, "grad_norm": 0.152323380112648, "learning_rate": 0.00016176917564253679, "loss": 0.231, "mean_token_accuracy": 0.9163475334644318, "num_tokens": 62521000.0, "step": 917 }, { "epoch": 1.8554095045500505, "grad_norm": 0.1929645538330078, "learning_rate": 0.00016139773219771186, "loss": 0.2972, "mean_token_accuracy": 0.8963135108351707, "num_tokens": 62582288.0, "step": 918 }, { "epoch": 1.857431749241658, "grad_norm": 0.14357374608516693, "learning_rate": 0.00016102656767889522, "loss": 0.2525, "mean_token_accuracy": 0.905899915844202, "num_tokens": 62659214.0, "step": 919 }, { "epoch": 1.859453993933266, "grad_norm": 0.18147152662277222, "learning_rate": 0.00016065568385269834, "loss": 0.3062, "mean_token_accuracy": 0.891651626676321, "num_tokens": 62721172.0, "step": 920 }, { "epoch": 1.8614762386248738, "grad_norm": 0.15798717737197876, "learning_rate": 0.0001602850824843967, "loss": 0.2441, "mean_token_accuracy": 0.9044736139476299, "num_tokens": 62790693.0, "step": 921 }, { "epoch": 1.8634984833164814, "grad_norm": 0.20527228713035583, "learning_rate": 0.00015991476533792125, "loss": 0.2862, "mean_token_accuracy": 0.9020938500761986, "num_tokens": 62861978.0, "step": 922 }, { "epoch": 1.865520728008089, "grad_norm": 0.17211146652698517, "learning_rate": 0.00015954473417585042, "loss": 0.278, "mean_token_accuracy": 0.901647973805666, "num_tokens": 62928176.0, "step": 923 }, { "epoch": 1.8675429726996966, "grad_norm": 0.14656521379947662, "learning_rate": 0.00015917499075940116, "loss": 0.2436, "mean_token_accuracy": 0.9071595072746277, "num_tokens": 63008955.0, "step": 924 }, { "epoch": 1.8695652173913042, "grad_norm": 0.16858512163162231, "learning_rate": 0.000158805536848421, "loss": 0.2838, "mean_token_accuracy": 0.8976234942674637, "num_tokens": 63077352.0, "step": 925 }, { "epoch": 1.871587462082912, "grad_norm": 0.15110129117965698, "learning_rate": 0.00015843637420137965, "loss": 0.2491, "mean_token_accuracy": 0.9075470231473446, "num_tokens": 63155136.0, "step": 926 }, { "epoch": 1.8736097067745199, "grad_norm": 0.16917841136455536, "learning_rate": 0.00015806750457536016, "loss": 0.2777, "mean_token_accuracy": 0.9005607068538666, "num_tokens": 63228469.0, "step": 927 }, { "epoch": 1.8756319514661275, "grad_norm": 0.15289200842380524, "learning_rate": 0.00015769892972605125, "loss": 0.2535, "mean_token_accuracy": 0.9035063087940216, "num_tokens": 63299131.0, "step": 928 }, { "epoch": 1.877654196157735, "grad_norm": 0.16520720720291138, "learning_rate": 0.00015733065140773845, "loss": 0.2742, "mean_token_accuracy": 0.9034424312412739, "num_tokens": 63370295.0, "step": 929 }, { "epoch": 1.8796764408493427, "grad_norm": 0.15712064504623413, "learning_rate": 0.00015696267137329584, "loss": 0.2667, "mean_token_accuracy": 0.9040120244026184, "num_tokens": 63437736.0, "step": 930 }, { "epoch": 1.8816986855409503, "grad_norm": 0.1603911817073822, "learning_rate": 0.00015659499137417798, "loss": 0.2507, "mean_token_accuracy": 0.9087044671177864, "num_tokens": 63509676.0, "step": 931 }, { "epoch": 1.8837209302325582, "grad_norm": 0.16669879853725433, "learning_rate": 0.00015622761316041114, "loss": 0.278, "mean_token_accuracy": 0.8977540507912636, "num_tokens": 63576754.0, "step": 932 }, { "epoch": 1.885743174924166, "grad_norm": 0.17182767391204834, "learning_rate": 0.00015586053848058536, "loss": 0.2526, "mean_token_accuracy": 0.9016401395201683, "num_tokens": 63643843.0, "step": 933 }, { "epoch": 1.8877654196157736, "grad_norm": 0.17400912940502167, "learning_rate": 0.00015549376908184596, "loss": 0.282, "mean_token_accuracy": 0.8970470912754536, "num_tokens": 63712033.0, "step": 934 }, { "epoch": 1.8897876643073812, "grad_norm": 0.16362541913986206, "learning_rate": 0.00015512730670988508, "loss": 0.2794, "mean_token_accuracy": 0.9033955708146095, "num_tokens": 63783615.0, "step": 935 }, { "epoch": 1.8918099089989888, "grad_norm": 0.20413319766521454, "learning_rate": 0.00015476115310893374, "loss": 0.2973, "mean_token_accuracy": 0.8986438475549221, "num_tokens": 63837579.0, "step": 936 }, { "epoch": 1.8938321536905964, "grad_norm": 0.173280730843544, "learning_rate": 0.00015439531002175305, "loss": 0.2614, "mean_token_accuracy": 0.9053931087255478, "num_tokens": 63904296.0, "step": 937 }, { "epoch": 1.8958543983822043, "grad_norm": 0.16067558526992798, "learning_rate": 0.00015402977918962653, "loss": 0.2688, "mean_token_accuracy": 0.905962623655796, "num_tokens": 63982577.0, "step": 938 }, { "epoch": 1.897876643073812, "grad_norm": 0.18021517992019653, "learning_rate": 0.00015366456235235113, "loss": 0.2935, "mean_token_accuracy": 0.8951955139636993, "num_tokens": 64038048.0, "step": 939 }, { "epoch": 1.8998988877654197, "grad_norm": 0.14851278066635132, "learning_rate": 0.0001532996612482295, "loss": 0.2661, "mean_token_accuracy": 0.9066961444914341, "num_tokens": 64113768.0, "step": 940 }, { "epoch": 1.9019211324570273, "grad_norm": 0.17288359999656677, "learning_rate": 0.00015293507761406148, "loss": 0.271, "mean_token_accuracy": 0.9030660726130009, "num_tokens": 64178434.0, "step": 941 }, { "epoch": 1.903943377148635, "grad_norm": 0.16324573755264282, "learning_rate": 0.00015257081318513583, "loss": 0.274, "mean_token_accuracy": 0.9019493535161018, "num_tokens": 64249882.0, "step": 942 }, { "epoch": 1.9059656218402425, "grad_norm": 0.15509222447872162, "learning_rate": 0.0001522068696952221, "loss": 0.2354, "mean_token_accuracy": 0.9143304452300072, "num_tokens": 64322937.0, "step": 943 }, { "epoch": 1.9079878665318504, "grad_norm": 0.1547105610370636, "learning_rate": 0.00015184324887656208, "loss": 0.2553, "mean_token_accuracy": 0.9079734869301319, "num_tokens": 64393253.0, "step": 944 }, { "epoch": 1.910010111223458, "grad_norm": 0.15001994371414185, "learning_rate": 0.00015147995245986203, "loss": 0.2549, "mean_token_accuracy": 0.9065254330635071, "num_tokens": 64470294.0, "step": 945 }, { "epoch": 1.9120323559150658, "grad_norm": 0.17263031005859375, "learning_rate": 0.00015111698217428385, "loss": 0.2766, "mean_token_accuracy": 0.9019508697092533, "num_tokens": 64541359.0, "step": 946 }, { "epoch": 1.9140546006066734, "grad_norm": 0.19937334954738617, "learning_rate": 0.0001507543397474375, "loss": 0.2893, "mean_token_accuracy": 0.8960909508168697, "num_tokens": 64601687.0, "step": 947 }, { "epoch": 1.916076845298281, "grad_norm": 0.20299410820007324, "learning_rate": 0.00015039202690537233, "loss": 0.2875, "mean_token_accuracy": 0.8969489298760891, "num_tokens": 64662730.0, "step": 948 }, { "epoch": 1.9180990899898887, "grad_norm": 0.17673259973526, "learning_rate": 0.0001500300453725688, "loss": 0.285, "mean_token_accuracy": 0.8983747102320194, "num_tokens": 64726699.0, "step": 949 }, { "epoch": 1.9201213346814965, "grad_norm": 0.14203934371471405, "learning_rate": 0.00014966839687193074, "loss": 0.2413, "mean_token_accuracy": 0.9119373075664043, "num_tokens": 64804474.0, "step": 950 }, { "epoch": 1.922143579373104, "grad_norm": 0.18115116655826569, "learning_rate": 0.0001493070831247767, "loss": 0.2618, "mean_token_accuracy": 0.9050916060805321, "num_tokens": 64867023.0, "step": 951 }, { "epoch": 1.924165824064712, "grad_norm": 0.15658792853355408, "learning_rate": 0.00014894610585083196, "loss": 0.2539, "mean_token_accuracy": 0.9065564014017582, "num_tokens": 64933593.0, "step": 952 }, { "epoch": 1.9261880687563195, "grad_norm": 0.18066135048866272, "learning_rate": 0.00014858546676822023, "loss": 0.2731, "mean_token_accuracy": 0.9004339128732681, "num_tokens": 64997732.0, "step": 953 }, { "epoch": 1.9282103134479271, "grad_norm": 0.15237212181091309, "learning_rate": 0.0001482251675934557, "loss": 0.2476, "mean_token_accuracy": 0.9087250605225563, "num_tokens": 65080000.0, "step": 954 }, { "epoch": 1.9302325581395348, "grad_norm": 0.18067006766796112, "learning_rate": 0.00014786521004143467, "loss": 0.2712, "mean_token_accuracy": 0.9025260508060455, "num_tokens": 65148696.0, "step": 955 }, { "epoch": 1.9322548028311426, "grad_norm": 0.15837518870830536, "learning_rate": 0.00014750559582542736, "loss": 0.2606, "mean_token_accuracy": 0.9080248959362507, "num_tokens": 65223230.0, "step": 956 }, { "epoch": 1.9342770475227502, "grad_norm": 0.16518649458885193, "learning_rate": 0.00014714632665706985, "loss": 0.2539, "mean_token_accuracy": 0.9098630361258984, "num_tokens": 65292846.0, "step": 957 }, { "epoch": 1.936299292214358, "grad_norm": 0.18779224157333374, "learning_rate": 0.000146787404246356, "loss": 0.282, "mean_token_accuracy": 0.8994725160300732, "num_tokens": 65354948.0, "step": 958 }, { "epoch": 1.9383215369059656, "grad_norm": 0.16804009675979614, "learning_rate": 0.0001464288303016292, "loss": 0.2521, "mean_token_accuracy": 0.9077105298638344, "num_tokens": 65425082.0, "step": 959 }, { "epoch": 1.9403437815975733, "grad_norm": 0.17569729685783386, "learning_rate": 0.00014607060652957414, "loss": 0.2914, "mean_token_accuracy": 0.8924459666013718, "num_tokens": 65491402.0, "step": 960 }, { "epoch": 1.9423660262891809, "grad_norm": 0.14672434329986572, "learning_rate": 0.00014571273463520897, "loss": 0.2628, "mean_token_accuracy": 0.9076977856457233, "num_tokens": 65563535.0, "step": 961 }, { "epoch": 1.9443882709807887, "grad_norm": 0.1623447835445404, "learning_rate": 0.00014535521632187703, "loss": 0.2759, "mean_token_accuracy": 0.9029062166810036, "num_tokens": 65629601.0, "step": 962 }, { "epoch": 1.9464105156723963, "grad_norm": 0.1764685958623886, "learning_rate": 0.00014499805329123858, "loss": 0.3043, "mean_token_accuracy": 0.8929594941437244, "num_tokens": 65689192.0, "step": 963 }, { "epoch": 1.9484327603640041, "grad_norm": 0.1544012725353241, "learning_rate": 0.000144641247243263, "loss": 0.2664, "mean_token_accuracy": 0.9056011252105236, "num_tokens": 65761600.0, "step": 964 }, { "epoch": 1.9504550050556118, "grad_norm": 0.17178235948085785, "learning_rate": 0.00014428479987622055, "loss": 0.2688, "mean_token_accuracy": 0.9024265073239803, "num_tokens": 65824048.0, "step": 965 }, { "epoch": 1.9524772497472194, "grad_norm": 0.17977994680404663, "learning_rate": 0.00014392871288667415, "loss": 0.2762, "mean_token_accuracy": 0.9031669199466705, "num_tokens": 65889268.0, "step": 966 }, { "epoch": 1.954499494438827, "grad_norm": 0.15329943597316742, "learning_rate": 0.00014357298796947168, "loss": 0.2841, "mean_token_accuracy": 0.8999549075961113, "num_tokens": 65961165.0, "step": 967 }, { "epoch": 1.9565217391304348, "grad_norm": 0.17066965997219086, "learning_rate": 0.00014321762681773762, "loss": 0.2636, "mean_token_accuracy": 0.9027245566248894, "num_tokens": 66022951.0, "step": 968 }, { "epoch": 1.9585439838220424, "grad_norm": 0.1542961746454239, "learning_rate": 0.00014286263112286472, "loss": 0.2441, "mean_token_accuracy": 0.9134857915341854, "num_tokens": 66099844.0, "step": 969 }, { "epoch": 1.9605662285136503, "grad_norm": 0.17265184223651886, "learning_rate": 0.00014250800257450684, "loss": 0.2797, "mean_token_accuracy": 0.9043730795383453, "num_tokens": 66173153.0, "step": 970 }, { "epoch": 1.9625884732052579, "grad_norm": 0.1839493066072464, "learning_rate": 0.00014215374286057005, "loss": 0.2908, "mean_token_accuracy": 0.8951999023556709, "num_tokens": 66234689.0, "step": 971 }, { "epoch": 1.9646107178968655, "grad_norm": 0.15913142263889313, "learning_rate": 0.00014179985366720495, "loss": 0.2837, "mean_token_accuracy": 0.9044655375182629, "num_tokens": 66305941.0, "step": 972 }, { "epoch": 1.966632962588473, "grad_norm": 0.13867108523845673, "learning_rate": 0.0001414463366787984, "loss": 0.216, "mean_token_accuracy": 0.9169092550873756, "num_tokens": 66381037.0, "step": 973 }, { "epoch": 1.968655207280081, "grad_norm": 0.1802113801240921, "learning_rate": 0.00014109319357796606, "loss": 0.3038, "mean_token_accuracy": 0.893009040504694, "num_tokens": 66440797.0, "step": 974 }, { "epoch": 1.9706774519716885, "grad_norm": 0.17021583020687103, "learning_rate": 0.00014074042604554374, "loss": 0.2733, "mean_token_accuracy": 0.9027226865291595, "num_tokens": 66505699.0, "step": 975 }, { "epoch": 1.9726996966632964, "grad_norm": 0.19243739545345306, "learning_rate": 0.00014038803576057985, "loss": 0.3087, "mean_token_accuracy": 0.8891540095210075, "num_tokens": 66567155.0, "step": 976 }, { "epoch": 1.974721941354904, "grad_norm": 0.15427738428115845, "learning_rate": 0.00014003602440032693, "loss": 0.3055, "mean_token_accuracy": 0.8956649079918861, "num_tokens": 66644385.0, "step": 977 }, { "epoch": 1.9767441860465116, "grad_norm": 0.15167449414730072, "learning_rate": 0.00013968439364023442, "loss": 0.2612, "mean_token_accuracy": 0.9073714017868042, "num_tokens": 66714503.0, "step": 978 }, { "epoch": 1.9787664307381192, "grad_norm": 0.1547987014055252, "learning_rate": 0.00013933314515393995, "loss": 0.2721, "mean_token_accuracy": 0.9024667181074619, "num_tokens": 66779572.0, "step": 979 }, { "epoch": 1.980788675429727, "grad_norm": 0.14774559438228607, "learning_rate": 0.0001389822806132617, "loss": 0.2571, "mean_token_accuracy": 0.907380323857069, "num_tokens": 66855257.0, "step": 980 }, { "epoch": 1.9828109201213346, "grad_norm": 0.16043910384178162, "learning_rate": 0.00013863180168819048, "loss": 0.2523, "mean_token_accuracy": 0.9105048142373562, "num_tokens": 66919243.0, "step": 981 }, { "epoch": 1.9848331648129425, "grad_norm": 0.18005625903606415, "learning_rate": 0.0001382817100468816, "loss": 0.2914, "mean_token_accuracy": 0.8974611833691597, "num_tokens": 66987494.0, "step": 982 }, { "epoch": 1.98685540950455, "grad_norm": 0.1641789674758911, "learning_rate": 0.00013793200735564716, "loss": 0.2826, "mean_token_accuracy": 0.8971075974404812, "num_tokens": 67053248.0, "step": 983 }, { "epoch": 1.9888776541961577, "grad_norm": 0.15405279397964478, "learning_rate": 0.00013758269527894778, "loss": 0.2559, "mean_token_accuracy": 0.9002925455570221, "num_tokens": 67124156.0, "step": 984 }, { "epoch": 1.9908998988877653, "grad_norm": 0.15380239486694336, "learning_rate": 0.00013723377547938522, "loss": 0.2569, "mean_token_accuracy": 0.9101624749600887, "num_tokens": 67196079.0, "step": 985 }, { "epoch": 1.9929221435793731, "grad_norm": 0.16303087770938873, "learning_rate": 0.00013688524961769395, "loss": 0.2669, "mean_token_accuracy": 0.9014462493360043, "num_tokens": 67264659.0, "step": 986 }, { "epoch": 1.9949443882709808, "grad_norm": 0.17709141969680786, "learning_rate": 0.00013653711935273326, "loss": 0.2831, "mean_token_accuracy": 0.9000302441418171, "num_tokens": 67326567.0, "step": 987 }, { "epoch": 1.9969666329625886, "grad_norm": 0.16408245265483856, "learning_rate": 0.00013618938634147996, "loss": 0.2563, "mean_token_accuracy": 0.9081169851124287, "num_tokens": 67393706.0, "step": 988 }, { "epoch": 1.9989888776541962, "grad_norm": 0.1415863037109375, "learning_rate": 0.00013584205223901976, "loss": 0.2386, "mean_token_accuracy": 0.909894797950983, "num_tokens": 67472677.0, "step": 989 }, { "epoch": 2.0, "grad_norm": 0.19028227031230927, "learning_rate": 0.00013549511869853973, "loss": 0.2248, "mean_token_accuracy": 0.9155159220099449, "num_tokens": 67511282.0, "step": 990 } ], "logging_steps": 1, "max_steps": 1485, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.243396605005267e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }