{ "best_global_step": 1038, "best_metric": 0.5740059, "best_model_checkpoint": "/mnt/gpfs/shenyujiong/output/qwen3-vl-8b-int-sft-merged-nv5592-third3000-full-3epoch/v0-20251226-140741/checkpoint-1038", "epoch": 3.0, "eval_steps": 500, "global_step": 1038, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002890173410404624, "grad_norm": 6.073309605336921, "learning_rate": 1.923076923076923e-08, "loss": 0.8852723240852356, "step": 1, "token_acc": 0.7513407750453963 }, { "epoch": 0.005780346820809248, "grad_norm": 5.632770918536085, "learning_rate": 3.846153846153846e-08, "loss": 0.8229959607124329, "step": 2, "token_acc": 0.7648557050426209 }, { "epoch": 0.008670520231213872, "grad_norm": 5.550843708913173, "learning_rate": 5.7692307692307695e-08, "loss": 0.8395601511001587, "step": 3, "token_acc": 0.7611515500814708 }, { "epoch": 0.011560693641618497, "grad_norm": 5.463688271600264, "learning_rate": 7.692307692307692e-08, "loss": 0.8262450695037842, "step": 4, "token_acc": 0.7617775757231346 }, { "epoch": 0.014450867052023121, "grad_norm": 5.208733348546384, "learning_rate": 9.615384615384616e-08, "loss": 0.7870609760284424, "step": 5, "token_acc": 0.7738227378472486 }, { "epoch": 0.017341040462427744, "grad_norm": 6.094089600000965, "learning_rate": 1.1538461538461539e-07, "loss": 0.890167236328125, "step": 6, "token_acc": 0.7463134620800402 }, { "epoch": 0.02023121387283237, "grad_norm": 5.511558073866942, "learning_rate": 1.346153846153846e-07, "loss": 0.8200665712356567, "step": 7, "token_acc": 0.7655801718674399 }, { "epoch": 0.023121387283236993, "grad_norm": 5.840135867020467, "learning_rate": 1.5384615384615385e-07, "loss": 0.8561823964118958, "step": 8, "token_acc": 0.7551989061787877 }, { "epoch": 0.02601156069364162, "grad_norm": 4.93074237263625, "learning_rate": 1.7307692307692305e-07, "loss": 0.7908620834350586, "step": 9, "token_acc": 0.7736331966727492 }, { "epoch": 0.028901734104046242, "grad_norm": 5.513250434452228, "learning_rate": 1.9230769230769231e-07, "loss": 0.8536443710327148, "step": 10, "token_acc": 0.7537275655775426 }, { "epoch": 0.031791907514450865, "grad_norm": 5.6890026898261254, "learning_rate": 2.1153846153846152e-07, "loss": 0.8860396146774292, "step": 11, "token_acc": 0.7444433233394834 }, { "epoch": 0.03468208092485549, "grad_norm": 5.204460891865508, "learning_rate": 2.3076923076923078e-07, "loss": 0.8523805141448975, "step": 12, "token_acc": 0.7516541745600307 }, { "epoch": 0.03757225433526012, "grad_norm": 5.727537830602335, "learning_rate": 2.5e-07, "loss": 0.8715107440948486, "step": 13, "token_acc": 0.7483992966857977 }, { "epoch": 0.04046242774566474, "grad_norm": 5.573759954820184, "learning_rate": 2.692307692307692e-07, "loss": 0.8587294220924377, "step": 14, "token_acc": 0.752293881658215 }, { "epoch": 0.04335260115606936, "grad_norm": 5.626217493866761, "learning_rate": 2.884615384615384e-07, "loss": 0.8353704810142517, "step": 15, "token_acc": 0.7603716874100415 }, { "epoch": 0.046242774566473986, "grad_norm": 5.780174641621012, "learning_rate": 3.076923076923077e-07, "loss": 0.8726707100868225, "step": 16, "token_acc": 0.750940308255944 }, { "epoch": 0.049132947976878616, "grad_norm": 4.3328681597964875, "learning_rate": 3.269230769230769e-07, "loss": 0.718013346195221, "step": 17, "token_acc": 0.7931623195891079 }, { "epoch": 0.05202312138728324, "grad_norm": 5.47302287757926, "learning_rate": 3.461538461538461e-07, "loss": 0.8578764200210571, "step": 18, "token_acc": 0.7521628365412952 }, { "epoch": 0.05491329479768786, "grad_norm": 5.003969625540578, "learning_rate": 3.6538461538461534e-07, "loss": 0.8133180737495422, "step": 19, "token_acc": 0.7619723575896223 }, { "epoch": 0.057803468208092484, "grad_norm": 5.6946171227062115, "learning_rate": 3.8461538461538463e-07, "loss": 0.8691498041152954, "step": 20, "token_acc": 0.7492446732183174 }, { "epoch": 0.06069364161849711, "grad_norm": 5.520197357593707, "learning_rate": 4.0384615384615386e-07, "loss": 0.907565712928772, "step": 21, "token_acc": 0.739601049536876 }, { "epoch": 0.06358381502890173, "grad_norm": 4.583439446754697, "learning_rate": 4.2307692307692304e-07, "loss": 0.8114128708839417, "step": 22, "token_acc": 0.7639052404881551 }, { "epoch": 0.06647398843930635, "grad_norm": 4.920313367321747, "learning_rate": 4.423076923076923e-07, "loss": 0.8422179222106934, "step": 23, "token_acc": 0.7567072154640894 }, { "epoch": 0.06936416184971098, "grad_norm": 5.263032949222765, "learning_rate": 4.6153846153846156e-07, "loss": 0.8715439438819885, "step": 24, "token_acc": 0.7464963254144591 }, { "epoch": 0.07225433526011561, "grad_norm": 4.870068302475069, "learning_rate": 4.807692307692307e-07, "loss": 0.8316457271575928, "step": 25, "token_acc": 0.7582423573346417 }, { "epoch": 0.07514450867052024, "grad_norm": 4.199216776916685, "learning_rate": 5e-07, "loss": 0.7344825267791748, "step": 26, "token_acc": 0.7839733369517283 }, { "epoch": 0.07803468208092486, "grad_norm": 4.588333481721223, "learning_rate": 5.192307692307692e-07, "loss": 0.8012775182723999, "step": 27, "token_acc": 0.767028959599571 }, { "epoch": 0.08092485549132948, "grad_norm": 4.194674553902997, "learning_rate": 5.384615384615384e-07, "loss": 0.712963879108429, "step": 28, "token_acc": 0.7923740483107238 }, { "epoch": 0.0838150289017341, "grad_norm": 4.053747357354017, "learning_rate": 5.576923076923077e-07, "loss": 0.7496437430381775, "step": 29, "token_acc": 0.7814042116577906 }, { "epoch": 0.08670520231213873, "grad_norm": 3.718069447981091, "learning_rate": 5.769230769230768e-07, "loss": 0.7818017601966858, "step": 30, "token_acc": 0.7700440596977877 }, { "epoch": 0.08959537572254335, "grad_norm": 3.420080175405301, "learning_rate": 5.961538461538461e-07, "loss": 0.7861907482147217, "step": 31, "token_acc": 0.7627405151738911 }, { "epoch": 0.09248554913294797, "grad_norm": 2.639800184791621, "learning_rate": 6.153846153846154e-07, "loss": 0.6684123277664185, "step": 32, "token_acc": 0.7977043354655295 }, { "epoch": 0.0953757225433526, "grad_norm": 2.9502697501210413, "learning_rate": 6.346153846153845e-07, "loss": 0.7446445226669312, "step": 33, "token_acc": 0.771793289625916 }, { "epoch": 0.09826589595375723, "grad_norm": 2.8110101894954345, "learning_rate": 6.538461538461538e-07, "loss": 0.7382901906967163, "step": 34, "token_acc": 0.7770551133606955 }, { "epoch": 0.10115606936416185, "grad_norm": 2.9797000830123226, "learning_rate": 6.730769230769231e-07, "loss": 0.7384837865829468, "step": 35, "token_acc": 0.7742859974561853 }, { "epoch": 0.10404624277456648, "grad_norm": 2.7709890477908177, "learning_rate": 6.923076923076922e-07, "loss": 0.7289628982543945, "step": 36, "token_acc": 0.7765123239561783 }, { "epoch": 0.1069364161849711, "grad_norm": 2.59015685758215, "learning_rate": 7.115384615384616e-07, "loss": 0.7290064096450806, "step": 37, "token_acc": 0.7784733624454149 }, { "epoch": 0.10982658959537572, "grad_norm": 2.8646835764259233, "learning_rate": 7.307692307692307e-07, "loss": 0.7594764828681946, "step": 38, "token_acc": 0.7671359481427088 }, { "epoch": 0.11271676300578035, "grad_norm": 2.349168631790223, "learning_rate": 7.5e-07, "loss": 0.72218257188797, "step": 39, "token_acc": 0.7804759091596026 }, { "epoch": 0.11560693641618497, "grad_norm": 2.511985129172397, "learning_rate": 7.692307692307693e-07, "loss": 0.7277328968048096, "step": 40, "token_acc": 0.7780124249072232 }, { "epoch": 0.11849710982658959, "grad_norm": 2.5792884120122235, "learning_rate": 7.884615384615384e-07, "loss": 0.7460165619850159, "step": 41, "token_acc": 0.7733394615523893 }, { "epoch": 0.12138728323699421, "grad_norm": 1.5451971118538999, "learning_rate": 8.076923076923077e-07, "loss": 0.7386133670806885, "step": 42, "token_acc": 0.772020024353944 }, { "epoch": 0.12427745664739884, "grad_norm": 1.3982437840218045, "learning_rate": 8.269230769230768e-07, "loss": 0.7192668914794922, "step": 43, "token_acc": 0.7744656594039339 }, { "epoch": 0.12716763005780346, "grad_norm": 1.4772019806138394, "learning_rate": 8.461538461538461e-07, "loss": 0.6977580189704895, "step": 44, "token_acc": 0.7803848372212253 }, { "epoch": 0.13005780346820808, "grad_norm": 1.426662829341362, "learning_rate": 8.653846153846154e-07, "loss": 0.6999402642250061, "step": 45, "token_acc": 0.778780228821366 }, { "epoch": 0.1329479768786127, "grad_norm": 1.4168889938692493, "learning_rate": 8.846153846153846e-07, "loss": 0.7392410635948181, "step": 46, "token_acc": 0.7691270558007607 }, { "epoch": 0.13583815028901733, "grad_norm": 1.4711907038839338, "learning_rate": 9.038461538461538e-07, "loss": 0.7351399660110474, "step": 47, "token_acc": 0.7670848343481196 }, { "epoch": 0.13872832369942195, "grad_norm": 1.2965845227191142, "learning_rate": 9.230769230769231e-07, "loss": 0.7003874778747559, "step": 48, "token_acc": 0.7787950748052811 }, { "epoch": 0.1416184971098266, "grad_norm": 1.292104981035939, "learning_rate": 9.423076923076923e-07, "loss": 0.7326341867446899, "step": 49, "token_acc": 0.7685059219819624 }, { "epoch": 0.14450867052023122, "grad_norm": 1.2291132980421766, "learning_rate": 9.615384615384615e-07, "loss": 0.6871765851974487, "step": 50, "token_acc": 0.7841781074662453 }, { "epoch": 0.14739884393063585, "grad_norm": 1.123170268506369, "learning_rate": 9.807692307692306e-07, "loss": 0.6960352659225464, "step": 51, "token_acc": 0.7801758979708864 }, { "epoch": 0.15028901734104047, "grad_norm": 1.00691295990528, "learning_rate": 1e-06, "loss": 0.6956222653388977, "step": 52, "token_acc": 0.7829201628190622 }, { "epoch": 0.1531791907514451, "grad_norm": 0.9370942178938112, "learning_rate": 9.999974620354198e-07, "loss": 0.6958713531494141, "step": 53, "token_acc": 0.7809317408675194 }, { "epoch": 0.15606936416184972, "grad_norm": 1.1057401423493767, "learning_rate": 9.999898481674446e-07, "loss": 0.7062472105026245, "step": 54, "token_acc": 0.7756643140884724 }, { "epoch": 0.15895953757225434, "grad_norm": 0.8619542832761329, "learning_rate": 9.999771584733693e-07, "loss": 0.6577130556106567, "step": 55, "token_acc": 0.7922278867707445 }, { "epoch": 0.16184971098265896, "grad_norm": 0.9166807116221914, "learning_rate": 9.999593930820181e-07, "loss": 0.6945655941963196, "step": 56, "token_acc": 0.77725851438142 }, { "epoch": 0.16473988439306358, "grad_norm": 0.939862155697591, "learning_rate": 9.999365521737421e-07, "loss": 0.6921431422233582, "step": 57, "token_acc": 0.7773106126184057 }, { "epoch": 0.1676300578034682, "grad_norm": 0.9756834016584089, "learning_rate": 9.999086359804195e-07, "loss": 0.7256878018379211, "step": 58, "token_acc": 0.7686141412007078 }, { "epoch": 0.17052023121387283, "grad_norm": 0.8557348808489443, "learning_rate": 9.99875644785451e-07, "loss": 0.6813135147094727, "step": 59, "token_acc": 0.7843321803650282 }, { "epoch": 0.17341040462427745, "grad_norm": 0.8266352802865822, "learning_rate": 9.998375789237592e-07, "loss": 0.6513127088546753, "step": 60, "token_acc": 0.7914724403689247 }, { "epoch": 0.17630057803468208, "grad_norm": 0.8497866635296994, "learning_rate": 9.99794438781783e-07, "loss": 0.6605720520019531, "step": 61, "token_acc": 0.78915683493063 }, { "epoch": 0.1791907514450867, "grad_norm": 0.8351298607584619, "learning_rate": 9.99746224797475e-07, "loss": 0.6266233325004578, "step": 62, "token_acc": 0.7963586246917163 }, { "epoch": 0.18208092485549132, "grad_norm": 0.9019491097127296, "learning_rate": 9.996929374602968e-07, "loss": 0.6673212647438049, "step": 63, "token_acc": 0.7844323603274962 }, { "epoch": 0.18497109826589594, "grad_norm": 0.8813921264261143, "learning_rate": 9.996345773112138e-07, "loss": 0.7036587595939636, "step": 64, "token_acc": 0.7740703997187025 }, { "epoch": 0.18786127167630057, "grad_norm": 0.8869002415681166, "learning_rate": 9.995711449426901e-07, "loss": 0.6981368064880371, "step": 65, "token_acc": 0.7753412151954072 }, { "epoch": 0.1907514450867052, "grad_norm": 0.7752119383387671, "learning_rate": 9.99502640998682e-07, "loss": 0.6600744724273682, "step": 66, "token_acc": 0.788013646851561 }, { "epoch": 0.1936416184971098, "grad_norm": 0.8616071421748983, "learning_rate": 9.99429066174632e-07, "loss": 0.6547806262969971, "step": 67, "token_acc": 0.7894853017554794 }, { "epoch": 0.19653179190751446, "grad_norm": 0.8018562843868764, "learning_rate": 9.993504212174613e-07, "loss": 0.6278072595596313, "step": 68, "token_acc": 0.7972202882855006 }, { "epoch": 0.1994219653179191, "grad_norm": 0.7473736558335493, "learning_rate": 9.992667069255618e-07, "loss": 0.6237850785255432, "step": 69, "token_acc": 0.7982735792533637 }, { "epoch": 0.2023121387283237, "grad_norm": 0.6999587458869299, "learning_rate": 9.991779241487899e-07, "loss": 0.6401976346969604, "step": 70, "token_acc": 0.7928364264997928 }, { "epoch": 0.20520231213872833, "grad_norm": 0.6924984079683673, "learning_rate": 9.990840737884554e-07, "loss": 0.6805769205093384, "step": 71, "token_acc": 0.7801177818172763 }, { "epoch": 0.20809248554913296, "grad_norm": 0.7111004746445246, "learning_rate": 9.989851567973138e-07, "loss": 0.697790801525116, "step": 72, "token_acc": 0.7760267430754537 }, { "epoch": 0.21098265895953758, "grad_norm": 0.6869871346194354, "learning_rate": 9.988811741795566e-07, "loss": 0.6186888217926025, "step": 73, "token_acc": 0.7994626021789282 }, { "epoch": 0.2138728323699422, "grad_norm": 0.6177183453130074, "learning_rate": 9.987721269908005e-07, "loss": 0.5868158340454102, "step": 74, "token_acc": 0.8114196656276566 }, { "epoch": 0.21676300578034682, "grad_norm": 0.6307801092890282, "learning_rate": 9.98658016338077e-07, "loss": 0.6723257303237915, "step": 75, "token_acc": 0.7827200467097494 }, { "epoch": 0.21965317919075145, "grad_norm": 0.6150476355618669, "learning_rate": 9.985388433798215e-07, "loss": 0.6530448198318481, "step": 76, "token_acc": 0.7907922080887895 }, { "epoch": 0.22254335260115607, "grad_norm": 0.5940300278296939, "learning_rate": 9.984146093258608e-07, "loss": 0.6855973601341248, "step": 77, "token_acc": 0.7784828714678302 }, { "epoch": 0.2254335260115607, "grad_norm": 0.9497443806056196, "learning_rate": 9.982853154374013e-07, "loss": 0.6745576858520508, "step": 78, "token_acc": 0.7854156213413614 }, { "epoch": 0.22832369942196531, "grad_norm": 0.6791196750467849, "learning_rate": 9.981509630270167e-07, "loss": 0.6383039951324463, "step": 79, "token_acc": 0.7940166430627679 }, { "epoch": 0.23121387283236994, "grad_norm": 0.6194193913683183, "learning_rate": 9.980115534586333e-07, "loss": 0.6046701669692993, "step": 80, "token_acc": 0.8031263032947291 }, { "epoch": 0.23410404624277456, "grad_norm": 0.584941512318404, "learning_rate": 9.978670881475172e-07, "loss": 0.6113057136535645, "step": 81, "token_acc": 0.8002890249696458 }, { "epoch": 0.23699421965317918, "grad_norm": 0.576070429321087, "learning_rate": 9.9771756856026e-07, "loss": 0.6508547067642212, "step": 82, "token_acc": 0.7917054316809551 }, { "epoch": 0.2398843930635838, "grad_norm": 0.5782915674069733, "learning_rate": 9.975629962147633e-07, "loss": 0.6592724323272705, "step": 83, "token_acc": 0.7841273280945267 }, { "epoch": 0.24277456647398843, "grad_norm": 0.5894596908351907, "learning_rate": 9.974033726802235e-07, "loss": 0.5925013422966003, "step": 84, "token_acc": 0.8060771521769962 }, { "epoch": 0.24566473988439305, "grad_norm": 0.5279159216055382, "learning_rate": 9.972386995771164e-07, "loss": 0.6444322466850281, "step": 85, "token_acc": 0.7914691943127962 }, { "epoch": 0.24855491329479767, "grad_norm": 0.5809453095781784, "learning_rate": 9.970689785771798e-07, "loss": 0.6508707404136658, "step": 86, "token_acc": 0.7889469472867465 }, { "epoch": 0.2514450867052023, "grad_norm": 0.6715617527059413, "learning_rate": 9.968942114033973e-07, "loss": 0.5962953567504883, "step": 87, "token_acc": 0.8063397578524576 }, { "epoch": 0.2543352601156069, "grad_norm": 0.6155392081496504, "learning_rate": 9.967143998299802e-07, "loss": 0.6590582132339478, "step": 88, "token_acc": 0.786015653473848 }, { "epoch": 0.25722543352601157, "grad_norm": 0.6351340196244468, "learning_rate": 9.965295456823507e-07, "loss": 0.6178431510925293, "step": 89, "token_acc": 0.799615789600598 }, { "epoch": 0.26011560693641617, "grad_norm": 0.6389337976646079, "learning_rate": 9.963396508371217e-07, "loss": 0.6065088510513306, "step": 90, "token_acc": 0.8027006050850137 }, { "epoch": 0.2630057803468208, "grad_norm": 0.5682640638544528, "learning_rate": 9.961447172220785e-07, "loss": 0.6684330105781555, "step": 91, "token_acc": 0.7839487407338119 }, { "epoch": 0.2658959537572254, "grad_norm": 0.6029647051880634, "learning_rate": 9.959447468161596e-07, "loss": 0.6358112096786499, "step": 92, "token_acc": 0.7908192833685276 }, { "epoch": 0.26878612716763006, "grad_norm": 0.5632656008285092, "learning_rate": 9.957397416494366e-07, "loss": 0.6601473093032837, "step": 93, "token_acc": 0.7853722190438847 }, { "epoch": 0.27167630057803466, "grad_norm": 0.6013944385740286, "learning_rate": 9.955297038030926e-07, "loss": 0.668410062789917, "step": 94, "token_acc": 0.7828623747800797 }, { "epoch": 0.2745664739884393, "grad_norm": 0.5541440784608198, "learning_rate": 9.95314635409402e-07, "loss": 0.6117832660675049, "step": 95, "token_acc": 0.7995787198241185 }, { "epoch": 0.2774566473988439, "grad_norm": 0.6314740935897156, "learning_rate": 9.95094538651709e-07, "loss": 0.6261177062988281, "step": 96, "token_acc": 0.7962018726778723 }, { "epoch": 0.28034682080924855, "grad_norm": 0.7158918907846333, "learning_rate": 9.948694157644042e-07, "loss": 0.6556503772735596, "step": 97, "token_acc": 0.7869902468442614 }, { "epoch": 0.2832369942196532, "grad_norm": 0.5701552977234003, "learning_rate": 9.946392690329036e-07, "loss": 0.6187049746513367, "step": 98, "token_acc": 0.8010530865652874 }, { "epoch": 0.2861271676300578, "grad_norm": 0.5860362253461248, "learning_rate": 9.944041007936244e-07, "loss": 0.5410789847373962, "step": 99, "token_acc": 0.8207894360088595 }, { "epoch": 0.28901734104046245, "grad_norm": 0.6303808407906236, "learning_rate": 9.941639134339606e-07, "loss": 0.5768465399742126, "step": 100, "token_acc": 0.8087328873195813 }, { "epoch": 0.29190751445086704, "grad_norm": 0.616425173315349, "learning_rate": 9.939187093922609e-07, "loss": 0.6295806169509888, "step": 101, "token_acc": 0.7958193257384945 }, { "epoch": 0.2947976878612717, "grad_norm": 0.5753993917901922, "learning_rate": 9.936684911578017e-07, "loss": 0.5983704328536987, "step": 102, "token_acc": 0.8031383517086323 }, { "epoch": 0.2976878612716763, "grad_norm": 0.6140080800303133, "learning_rate": 9.93413261270763e-07, "loss": 0.5729444026947021, "step": 103, "token_acc": 0.816418031517547 }, { "epoch": 0.30057803468208094, "grad_norm": 0.5607455073068854, "learning_rate": 9.931530223222026e-07, "loss": 0.5967170596122742, "step": 104, "token_acc": 0.803475704051983 }, { "epoch": 0.30346820809248554, "grad_norm": 0.5675327028480304, "learning_rate": 9.928877769540293e-07, "loss": 0.6241474151611328, "step": 105, "token_acc": 0.7967706129971308 }, { "epoch": 0.3063583815028902, "grad_norm": 0.6046538978438704, "learning_rate": 9.926175278589767e-07, "loss": 0.6553393602371216, "step": 106, "token_acc": 0.7874527013411549 }, { "epoch": 0.3092485549132948, "grad_norm": 0.5734166676914433, "learning_rate": 9.923422777805751e-07, "loss": 0.6570492386817932, "step": 107, "token_acc": 0.7870601190355553 }, { "epoch": 0.31213872832369943, "grad_norm": 0.6001726322335739, "learning_rate": 9.920620295131245e-07, "loss": 0.6794227361679077, "step": 108, "token_acc": 0.7787853169709925 }, { "epoch": 0.315028901734104, "grad_norm": 0.6099760009068769, "learning_rate": 9.917767859016654e-07, "loss": 0.615708589553833, "step": 109, "token_acc": 0.7985643236886592 }, { "epoch": 0.3179190751445087, "grad_norm": 0.5778662206360861, "learning_rate": 9.91486549841951e-07, "loss": 0.5809392929077148, "step": 110, "token_acc": 0.8094654316503208 }, { "epoch": 0.3208092485549133, "grad_norm": 0.5704401870141648, "learning_rate": 9.911913242804158e-07, "loss": 0.6263046264648438, "step": 111, "token_acc": 0.7955055464485222 }, { "epoch": 0.3236994219653179, "grad_norm": 0.613652119648305, "learning_rate": 9.908911122141486e-07, "loss": 0.5810531377792358, "step": 112, "token_acc": 0.8122967000471536 }, { "epoch": 0.3265895953757225, "grad_norm": 0.5754148794590288, "learning_rate": 9.905859166908594e-07, "loss": 0.6450198888778687, "step": 113, "token_acc": 0.787714712471994 }, { "epoch": 0.32947976878612717, "grad_norm": 0.8102498152797749, "learning_rate": 9.902757408088501e-07, "loss": 0.6492223739624023, "step": 114, "token_acc": 0.7880358603802299 }, { "epoch": 0.33236994219653176, "grad_norm": 0.525946407195948, "learning_rate": 9.899605877169824e-07, "loss": 0.5984295606613159, "step": 115, "token_acc": 0.8024764689756009 }, { "epoch": 0.3352601156069364, "grad_norm": 0.5751169418426346, "learning_rate": 9.896404606146455e-07, "loss": 0.6295244097709656, "step": 116, "token_acc": 0.7922646493276646 }, { "epoch": 0.33815028901734107, "grad_norm": 0.5079153092397871, "learning_rate": 9.893153627517248e-07, "loss": 0.5976470112800598, "step": 117, "token_acc": 0.8038826857227929 }, { "epoch": 0.34104046242774566, "grad_norm": 0.5841459704013869, "learning_rate": 9.889852974285672e-07, "loss": 0.6472890973091125, "step": 118, "token_acc": 0.789158388689134 }, { "epoch": 0.3439306358381503, "grad_norm": 0.6150844233030651, "learning_rate": 9.886502679959497e-07, "loss": 0.5413444638252258, "step": 119, "token_acc": 0.8222654666342334 }, { "epoch": 0.3468208092485549, "grad_norm": 0.5935208615034318, "learning_rate": 9.883102778550434e-07, "loss": 0.663335919380188, "step": 120, "token_acc": 0.7862711064419373 }, { "epoch": 0.34971098265895956, "grad_norm": 0.6268736075123943, "learning_rate": 9.879653304573797e-07, "loss": 0.6072404384613037, "step": 121, "token_acc": 0.8010549723328334 }, { "epoch": 0.35260115606936415, "grad_norm": 0.5583642618257684, "learning_rate": 9.876154293048163e-07, "loss": 0.6144070029258728, "step": 122, "token_acc": 0.796381277924315 }, { "epoch": 0.3554913294797688, "grad_norm": 0.5410450297039057, "learning_rate": 9.872605779494997e-07, "loss": 0.5954463481903076, "step": 123, "token_acc": 0.8055216585201416 }, { "epoch": 0.3583815028901734, "grad_norm": 0.6425891449290073, "learning_rate": 9.869007799938305e-07, "loss": 0.6611199378967285, "step": 124, "token_acc": 0.786190934231093 }, { "epoch": 0.36127167630057805, "grad_norm": 0.5146021782369569, "learning_rate": 9.865360390904269e-07, "loss": 0.6081857085227966, "step": 125, "token_acc": 0.8017568952922327 }, { "epoch": 0.36416184971098264, "grad_norm": 0.5766433781688939, "learning_rate": 9.86166358942087e-07, "loss": 0.609286904335022, "step": 126, "token_acc": 0.8002619382070126 }, { "epoch": 0.3670520231213873, "grad_norm": 0.5450128204125277, "learning_rate": 9.857917433017508e-07, "loss": 0.5991868376731873, "step": 127, "token_acc": 0.8008499444919779 }, { "epoch": 0.3699421965317919, "grad_norm": 0.5810734133360594, "learning_rate": 9.854121959724635e-07, "loss": 0.607757568359375, "step": 128, "token_acc": 0.7998384333607254 }, { "epoch": 0.37283236994219654, "grad_norm": 0.5770182474218292, "learning_rate": 9.85027720807336e-07, "loss": 0.5918303728103638, "step": 129, "token_acc": 0.8040288846142103 }, { "epoch": 0.37572254335260113, "grad_norm": 0.5360179518405197, "learning_rate": 9.846383217095051e-07, "loss": 0.646679162979126, "step": 130, "token_acc": 0.7929178624953734 }, { "epoch": 0.3786127167630058, "grad_norm": 0.5278251178995469, "learning_rate": 9.842440026320958e-07, "loss": 0.6081724166870117, "step": 131, "token_acc": 0.7979095393804223 }, { "epoch": 0.3815028901734104, "grad_norm": 0.5857831669587502, "learning_rate": 9.838447675781793e-07, "loss": 0.5776185989379883, "step": 132, "token_acc": 0.8089180214756997 }, { "epoch": 0.38439306358381503, "grad_norm": 0.49786698791997097, "learning_rate": 9.834406206007335e-07, "loss": 0.6665687561035156, "step": 133, "token_acc": 0.7817376207568673 }, { "epoch": 0.3872832369942196, "grad_norm": 0.5272403389699103, "learning_rate": 9.83031565802601e-07, "loss": 0.607385516166687, "step": 134, "token_acc": 0.8027202321406094 }, { "epoch": 0.3901734104046243, "grad_norm": 0.5881996711071641, "learning_rate": 9.826176073364482e-07, "loss": 0.6304242014884949, "step": 135, "token_acc": 0.7967265117890893 }, { "epoch": 0.3930635838150289, "grad_norm": 0.5540108888142588, "learning_rate": 9.821987494047228e-07, "loss": 0.6314468383789062, "step": 136, "token_acc": 0.7919692387557874 }, { "epoch": 0.3959537572254335, "grad_norm": 0.5722154073047628, "learning_rate": 9.817749962596114e-07, "loss": 0.602054238319397, "step": 137, "token_acc": 0.802066245506265 }, { "epoch": 0.3988439306358382, "grad_norm": 0.5596376441219622, "learning_rate": 9.813463522029957e-07, "loss": 0.640647292137146, "step": 138, "token_acc": 0.7918518615352437 }, { "epoch": 0.40173410404624277, "grad_norm": 0.5545182797573466, "learning_rate": 9.809128215864096e-07, "loss": 0.6066859364509583, "step": 139, "token_acc": 0.801196721208976 }, { "epoch": 0.4046242774566474, "grad_norm": 0.5784484895204948, "learning_rate": 9.804744088109941e-07, "loss": 0.5408949851989746, "step": 140, "token_acc": 0.8248328121430766 }, { "epoch": 0.407514450867052, "grad_norm": 0.5637555298781167, "learning_rate": 9.80031118327454e-07, "loss": 0.6107698678970337, "step": 141, "token_acc": 0.7982127620772081 }, { "epoch": 0.41040462427745666, "grad_norm": 0.603110232763829, "learning_rate": 9.795829546360113e-07, "loss": 0.5912826061248779, "step": 142, "token_acc": 0.8041540066906055 }, { "epoch": 0.41329479768786126, "grad_norm": 0.5873555056914542, "learning_rate": 9.791299222863602e-07, "loss": 0.6161830425262451, "step": 143, "token_acc": 0.799708864508567 }, { "epoch": 0.4161849710982659, "grad_norm": 0.6843944560990027, "learning_rate": 9.786720258776213e-07, "loss": 0.5474255681037903, "step": 144, "token_acc": 0.8186930860033726 }, { "epoch": 0.4190751445086705, "grad_norm": 0.51545250769897, "learning_rate": 9.782092700582936e-07, "loss": 0.6216602325439453, "step": 145, "token_acc": 0.7965911940150556 }, { "epoch": 0.42196531791907516, "grad_norm": 0.5937549088482647, "learning_rate": 9.77741659526209e-07, "loss": 0.6248494386672974, "step": 146, "token_acc": 0.7956684720442111 }, { "epoch": 0.42485549132947975, "grad_norm": 0.5399979093459059, "learning_rate": 9.77269199028483e-07, "loss": 0.6089432239532471, "step": 147, "token_acc": 0.796826403459652 }, { "epoch": 0.4277456647398844, "grad_norm": 0.5564248028198713, "learning_rate": 9.76791893361468e-07, "loss": 0.6312023401260376, "step": 148, "token_acc": 0.7918012705466769 }, { "epoch": 0.430635838150289, "grad_norm": 0.559936805840691, "learning_rate": 9.763097473707035e-07, "loss": 0.619454026222229, "step": 149, "token_acc": 0.7984878886834271 }, { "epoch": 0.43352601156069365, "grad_norm": 0.6044059322614584, "learning_rate": 9.758227659508668e-07, "loss": 0.5221510529518127, "step": 150, "token_acc": 0.8266117865021535 }, { "epoch": 0.43641618497109824, "grad_norm": 0.5692770162596946, "learning_rate": 9.753309540457248e-07, "loss": 0.6139217615127563, "step": 151, "token_acc": 0.7982664696096701 }, { "epoch": 0.4393063583815029, "grad_norm": 0.5330985388783729, "learning_rate": 9.748343166480822e-07, "loss": 0.6154735088348389, "step": 152, "token_acc": 0.7984871546515382 }, { "epoch": 0.4421965317919075, "grad_norm": 0.6065632918781179, "learning_rate": 9.743328587997314e-07, "loss": 0.5449005365371704, "step": 153, "token_acc": 0.8221805561096261 }, { "epoch": 0.44508670520231214, "grad_norm": 0.6274255114547471, "learning_rate": 9.738265855914012e-07, "loss": 0.6112866401672363, "step": 154, "token_acc": 0.7997394616484714 }, { "epoch": 0.4479768786127168, "grad_norm": 0.6000527996102515, "learning_rate": 9.733155021627057e-07, "loss": 0.6302502155303955, "step": 155, "token_acc": 0.7939255615270142 }, { "epoch": 0.4508670520231214, "grad_norm": 0.5716424963426585, "learning_rate": 9.727996137020916e-07, "loss": 0.5589959621429443, "step": 156, "token_acc": 0.8167590708119868 }, { "epoch": 0.45375722543352603, "grad_norm": 0.5793130145184638, "learning_rate": 9.722789254467854e-07, "loss": 0.5811511874198914, "step": 157, "token_acc": 0.8068220017796527 }, { "epoch": 0.45664739884393063, "grad_norm": 0.6447386736666927, "learning_rate": 9.717534426827404e-07, "loss": 0.6125731468200684, "step": 158, "token_acc": 0.7982601354147698 }, { "epoch": 0.4595375722543353, "grad_norm": 0.5583551050757221, "learning_rate": 9.712231707445831e-07, "loss": 0.5681207180023193, "step": 159, "token_acc": 0.812138891502776 }, { "epoch": 0.4624277456647399, "grad_norm": 0.6227411154474924, "learning_rate": 9.70688115015559e-07, "loss": 0.5606650114059448, "step": 160, "token_acc": 0.8128119485280195 }, { "epoch": 0.4653179190751445, "grad_norm": 0.5637826519102942, "learning_rate": 9.701482809274787e-07, "loss": 0.584591269493103, "step": 161, "token_acc": 0.809975090499813 }, { "epoch": 0.4682080924855491, "grad_norm": 0.5527836562945804, "learning_rate": 9.696036739606606e-07, "loss": 0.6178029775619507, "step": 162, "token_acc": 0.7982424352237725 }, { "epoch": 0.47109826589595377, "grad_norm": 0.5261451706415371, "learning_rate": 9.690542996438777e-07, "loss": 0.5772680640220642, "step": 163, "token_acc": 0.8055154702213526 }, { "epoch": 0.47398843930635837, "grad_norm": 0.598598068991984, "learning_rate": 9.685001635543005e-07, "loss": 0.5761500597000122, "step": 164, "token_acc": 0.8095295422689632 }, { "epoch": 0.476878612716763, "grad_norm": 0.5603114991623558, "learning_rate": 9.679412713174398e-07, "loss": 0.6070771217346191, "step": 165, "token_acc": 0.7988323213451658 }, { "epoch": 0.4797687861271676, "grad_norm": 0.5909619017551228, "learning_rate": 9.673776286070905e-07, "loss": 0.5829952955245972, "step": 166, "token_acc": 0.8056856359399237 }, { "epoch": 0.48265895953757226, "grad_norm": 0.7664205949048083, "learning_rate": 9.668092411452735e-07, "loss": 0.591526985168457, "step": 167, "token_acc": 0.805959940764539 }, { "epoch": 0.48554913294797686, "grad_norm": 0.5816382553386844, "learning_rate": 9.66236114702178e-07, "loss": 0.6718764901161194, "step": 168, "token_acc": 0.7819054715177417 }, { "epoch": 0.4884393063583815, "grad_norm": 0.5443192837285905, "learning_rate": 9.656582550961018e-07, "loss": 0.5771794319152832, "step": 169, "token_acc": 0.8120637180483624 }, { "epoch": 0.4913294797687861, "grad_norm": 0.5439506241087468, "learning_rate": 9.650756681933947e-07, "loss": 0.5797525644302368, "step": 170, "token_acc": 0.8072481275670452 }, { "epoch": 0.49421965317919075, "grad_norm": 0.5750701292908912, "learning_rate": 9.644883599083957e-07, "loss": 0.616324782371521, "step": 171, "token_acc": 0.7961593487416124 }, { "epoch": 0.49710982658959535, "grad_norm": 0.5292422990653295, "learning_rate": 9.638963362033756e-07, "loss": 0.6252388954162598, "step": 172, "token_acc": 0.7945571248522018 }, { "epoch": 0.5, "grad_norm": 0.519900156438812, "learning_rate": 9.632996030884748e-07, "loss": 0.6072378158569336, "step": 173, "token_acc": 0.7983872825711323 }, { "epoch": 0.5028901734104047, "grad_norm": 2.014285868322542, "learning_rate": 9.626981666216439e-07, "loss": 0.5167373418807983, "step": 174, "token_acc": 0.8304752994472689 }, { "epoch": 0.5057803468208093, "grad_norm": 0.6229356072638176, "learning_rate": 9.620920329085802e-07, "loss": 0.5613738894462585, "step": 175, "token_acc": 0.8164609282841512 }, { "epoch": 0.5086705202312138, "grad_norm": 0.6427491754173409, "learning_rate": 9.614812081026678e-07, "loss": 0.6089553236961365, "step": 176, "token_acc": 0.8013446815125724 }, { "epoch": 0.5115606936416185, "grad_norm": 0.4795382180524186, "learning_rate": 9.608656984049132e-07, "loss": 0.579177737236023, "step": 177, "token_acc": 0.806047379906923 }, { "epoch": 0.5144508670520231, "grad_norm": 0.5089663171794683, "learning_rate": 9.602455100638835e-07, "loss": 0.5813893675804138, "step": 178, "token_acc": 0.8087914556082915 }, { "epoch": 0.5173410404624278, "grad_norm": 0.6116010486180593, "learning_rate": 9.596206493756432e-07, "loss": 0.5549554824829102, "step": 179, "token_acc": 0.8173080502386111 }, { "epoch": 0.5202312138728323, "grad_norm": 0.4852226717563288, "learning_rate": 9.589911226836895e-07, "loss": 0.5808215737342834, "step": 180, "token_acc": 0.8052112098427888 }, { "epoch": 0.523121387283237, "grad_norm": 0.5270020853161572, "learning_rate": 9.583569363788879e-07, "loss": 0.6398844122886658, "step": 181, "token_acc": 0.7898708976833977 }, { "epoch": 0.5260115606936416, "grad_norm": 0.5073350335042175, "learning_rate": 9.577180968994081e-07, "loss": 0.6154753565788269, "step": 182, "token_acc": 0.7993068610377478 }, { "epoch": 0.5289017341040463, "grad_norm": 0.5631567506627345, "learning_rate": 9.57074610730658e-07, "loss": 0.5920361876487732, "step": 183, "token_acc": 0.8048126355828951 }, { "epoch": 0.5317919075144508, "grad_norm": 0.4995115799741094, "learning_rate": 9.56426484405218e-07, "loss": 0.5912809371948242, "step": 184, "token_acc": 0.8075411124942672 }, { "epoch": 0.5346820809248555, "grad_norm": 0.560250197890468, "learning_rate": 9.557737245027746e-07, "loss": 0.6125437021255493, "step": 185, "token_acc": 0.7972027972027972 }, { "epoch": 0.5375722543352601, "grad_norm": 0.5819218618969146, "learning_rate": 9.551163376500542e-07, "loss": 0.5732159614562988, "step": 186, "token_acc": 0.8115202124085258 }, { "epoch": 0.5404624277456648, "grad_norm": 0.6129732835255256, "learning_rate": 9.544543305207546e-07, "loss": 0.6079097986221313, "step": 187, "token_acc": 0.7997229197333102 }, { "epoch": 0.5433526011560693, "grad_norm": 0.5263001528585832, "learning_rate": 9.537877098354784e-07, "loss": 0.5925722718238831, "step": 188, "token_acc": 0.8029342210305924 }, { "epoch": 0.546242774566474, "grad_norm": 0.583594997315983, "learning_rate": 9.531164823616646e-07, "loss": 0.5865395069122314, "step": 189, "token_acc": 0.8063752604903651 }, { "epoch": 0.5491329479768786, "grad_norm": 0.5781895560822031, "learning_rate": 9.524406549135193e-07, "loss": 0.6117700338363647, "step": 190, "token_acc": 0.7980149336253496 }, { "epoch": 0.5520231213872833, "grad_norm": 0.4893230139872087, "learning_rate": 9.517602343519471e-07, "loss": 0.5652576684951782, "step": 191, "token_acc": 0.8107140229095636 }, { "epoch": 0.5549132947976878, "grad_norm": 0.5760419810427979, "learning_rate": 9.510752275844809e-07, "loss": 0.579891562461853, "step": 192, "token_acc": 0.805735200834105 }, { "epoch": 0.5578034682080925, "grad_norm": 0.5102671355626198, "learning_rate": 9.503856415652125e-07, "loss": 0.5964775681495667, "step": 193, "token_acc": 0.8034283288223744 }, { "epoch": 0.5606936416184971, "grad_norm": 0.4894002019430091, "learning_rate": 9.496914832947214e-07, "loss": 0.6064220666885376, "step": 194, "token_acc": 0.799232275930387 }, { "epoch": 0.5635838150289018, "grad_norm": 0.5939844831348525, "learning_rate": 9.489927598200043e-07, "loss": 0.6116449236869812, "step": 195, "token_acc": 0.797429447731885 }, { "epoch": 0.5664739884393064, "grad_norm": 0.4783949579372596, "learning_rate": 9.482894782344024e-07, "loss": 0.6082786321640015, "step": 196, "token_acc": 0.796939850416096 }, { "epoch": 0.569364161849711, "grad_norm": 0.5532830089434996, "learning_rate": 9.475816456775312e-07, "loss": 0.5998172760009766, "step": 197, "token_acc": 0.8034065270191963 }, { "epoch": 0.5722543352601156, "grad_norm": 0.5660410481873773, "learning_rate": 9.468692693352062e-07, "loss": 0.5715000629425049, "step": 198, "token_acc": 0.8105325892615268 }, { "epoch": 0.5751445086705202, "grad_norm": 0.5454360730485784, "learning_rate": 9.461523564393714e-07, "loss": 0.5121803283691406, "step": 199, "token_acc": 0.8285392705145792 }, { "epoch": 0.5780346820809249, "grad_norm": 0.5378535866046305, "learning_rate": 9.454309142680246e-07, "loss": 0.5945334434509277, "step": 200, "token_acc": 0.8058855053489177 }, { "epoch": 0.5809248554913294, "grad_norm": 0.569376306217556, "learning_rate": 9.447049501451447e-07, "loss": 0.5850614905357361, "step": 201, "token_acc": 0.8075420015918657 }, { "epoch": 0.5838150289017341, "grad_norm": 0.5596293780541032, "learning_rate": 9.439744714406166e-07, "loss": 0.5594047904014587, "step": 202, "token_acc": 0.8121667287250859 }, { "epoch": 0.5867052023121387, "grad_norm": 0.5138636330605458, "learning_rate": 9.432394855701568e-07, "loss": 0.5849941372871399, "step": 203, "token_acc": 0.8073615179939259 }, { "epoch": 0.5895953757225434, "grad_norm": 0.5804821715876541, "learning_rate": 9.424999999952374e-07, "loss": 0.5801274180412292, "step": 204, "token_acc": 0.8069783212978903 }, { "epoch": 0.5924855491329479, "grad_norm": 0.5724417549737069, "learning_rate": 9.417560222230114e-07, "loss": 0.549828827381134, "step": 205, "token_acc": 0.8177920383625401 }, { "epoch": 0.5953757225433526, "grad_norm": 0.5635873362301451, "learning_rate": 9.410075598062357e-07, "loss": 0.6004040241241455, "step": 206, "token_acc": 0.8004078427231751 }, { "epoch": 0.5982658959537572, "grad_norm": 0.5235901257461258, "learning_rate": 9.402546203431947e-07, "loss": 0.5270985960960388, "step": 207, "token_acc": 0.8231543624161074 }, { "epoch": 0.6011560693641619, "grad_norm": 0.5532559810628388, "learning_rate": 9.394972114776229e-07, "loss": 0.574277937412262, "step": 208, "token_acc": 0.8074010315538029 }, { "epoch": 0.6040462427745664, "grad_norm": 0.5812311718782175, "learning_rate": 9.387353408986282e-07, "loss": 0.595463216304779, "step": 209, "token_acc": 0.8024861291665605 }, { "epoch": 0.6069364161849711, "grad_norm": 0.5142938651985898, "learning_rate": 9.379690163406128e-07, "loss": 0.5852739214897156, "step": 210, "token_acc": 0.8058286827885552 }, { "epoch": 0.6098265895953757, "grad_norm": 0.5954842210532877, "learning_rate": 9.371982455831946e-07, "loss": 0.5914256572723389, "step": 211, "token_acc": 0.8022748583309552 }, { "epoch": 0.6127167630057804, "grad_norm": 0.5993748062356747, "learning_rate": 9.364230364511295e-07, "loss": 0.5815471410751343, "step": 212, "token_acc": 0.8078214734227942 }, { "epoch": 0.615606936416185, "grad_norm": 0.5946619701512068, "learning_rate": 9.356433968142305e-07, "loss": 0.5513661503791809, "step": 213, "token_acc": 0.8162251537633719 }, { "epoch": 0.6184971098265896, "grad_norm": 0.6203774782127278, "learning_rate": 9.34859334587289e-07, "loss": 0.5972813367843628, "step": 214, "token_acc": 0.8014712230836974 }, { "epoch": 0.6213872832369942, "grad_norm": 0.551145459721042, "learning_rate": 9.340708577299936e-07, "loss": 0.6008709669113159, "step": 215, "token_acc": 0.8010602678571429 }, { "epoch": 0.6242774566473989, "grad_norm": 0.5965436915708601, "learning_rate": 9.332779742468495e-07, "loss": 0.6075496673583984, "step": 216, "token_acc": 0.7974854091642866 }, { "epoch": 0.6271676300578035, "grad_norm": 0.5460165665763135, "learning_rate": 9.324806921870975e-07, "loss": 0.5693843364715576, "step": 217, "token_acc": 0.8103969870963759 }, { "epoch": 0.630057803468208, "grad_norm": 0.5966690969554563, "learning_rate": 9.316790196446323e-07, "loss": 0.5560802221298218, "step": 218, "token_acc": 0.8236988940183998 }, { "epoch": 0.6329479768786127, "grad_norm": 0.6560441235449157, "learning_rate": 9.308729647579199e-07, "loss": 0.5824184417724609, "step": 219, "token_acc": 0.8070714583452526 }, { "epoch": 0.6358381502890174, "grad_norm": 0.6006127755099283, "learning_rate": 9.30062535709915e-07, "loss": 0.6167861819267273, "step": 220, "token_acc": 0.796514221545372 }, { "epoch": 0.638728323699422, "grad_norm": 0.5570520813344141, "learning_rate": 9.292477407279789e-07, "loss": 0.6107242703437805, "step": 221, "token_acc": 0.7990834404515732 }, { "epoch": 0.6416184971098265, "grad_norm": 0.5419716560460497, "learning_rate": 9.284285880837946e-07, "loss": 0.5959486365318298, "step": 222, "token_acc": 0.8022954328356064 }, { "epoch": 0.6445086705202312, "grad_norm": 0.6657313771062484, "learning_rate": 9.276050860932837e-07, "loss": 0.5727354884147644, "step": 223, "token_acc": 0.8082750530162884 }, { "epoch": 0.6473988439306358, "grad_norm": 0.512607896262416, "learning_rate": 9.267772431165218e-07, "loss": 0.5810614228248596, "step": 224, "token_acc": 0.8100355584987692 }, { "epoch": 0.6502890173410405, "grad_norm": 0.5208342958049974, "learning_rate": 9.259450675576535e-07, "loss": 0.5924381017684937, "step": 225, "token_acc": 0.8029396939581946 }, { "epoch": 0.653179190751445, "grad_norm": 0.6880250488481687, "learning_rate": 9.251085678648071e-07, "loss": 0.6493653059005737, "step": 226, "token_acc": 0.7886282137800538 }, { "epoch": 0.6560693641618497, "grad_norm": 0.548308907840708, "learning_rate": 9.242677525300088e-07, "loss": 0.570950448513031, "step": 227, "token_acc": 0.810275809890639 }, { "epoch": 0.6589595375722543, "grad_norm": 0.5340467208226745, "learning_rate": 9.234226300890972e-07, "loss": 0.565179169178009, "step": 228, "token_acc": 0.8106098958194559 }, { "epoch": 0.661849710982659, "grad_norm": 0.5609587429682379, "learning_rate": 9.225732091216354e-07, "loss": 0.6229733824729919, "step": 229, "token_acc": 0.7947594792619757 }, { "epoch": 0.6647398843930635, "grad_norm": 0.640345970021987, "learning_rate": 9.217194982508247e-07, "loss": 0.556702196598053, "step": 230, "token_acc": 0.8141483516483516 }, { "epoch": 0.6676300578034682, "grad_norm": 0.551511374308891, "learning_rate": 9.208615061434166e-07, "loss": 0.6125736236572266, "step": 231, "token_acc": 0.7977603246777648 }, { "epoch": 0.6705202312138728, "grad_norm": 0.5163364555056573, "learning_rate": 9.199992415096259e-07, "loss": 0.5473246574401855, "step": 232, "token_acc": 0.8160722450845908 }, { "epoch": 0.6734104046242775, "grad_norm": 0.5669711665664704, "learning_rate": 9.191327131030406e-07, "loss": 0.543914794921875, "step": 233, "token_acc": 0.8196051836235239 }, { "epoch": 0.6763005780346821, "grad_norm": 0.5406802703932962, "learning_rate": 9.182619297205347e-07, "loss": 0.5660564303398132, "step": 234, "token_acc": 0.8103913761289696 }, { "epoch": 0.6791907514450867, "grad_norm": 0.556661118525528, "learning_rate": 9.173869002021775e-07, "loss": 0.6406779289245605, "step": 235, "token_acc": 0.7926350563544501 }, { "epoch": 0.6820809248554913, "grad_norm": 0.5201140983806046, "learning_rate": 9.165076334311445e-07, "loss": 0.6177135109901428, "step": 236, "token_acc": 0.7982128177119112 }, { "epoch": 0.684971098265896, "grad_norm": 0.5850116831250167, "learning_rate": 9.156241383336278e-07, "loss": 0.5401256680488586, "step": 237, "token_acc": 0.8215590591627244 }, { "epoch": 0.6878612716763006, "grad_norm": 0.6403194474900529, "learning_rate": 9.147364238787443e-07, "loss": 0.581301212310791, "step": 238, "token_acc": 0.8056872398548133 }, { "epoch": 0.6907514450867052, "grad_norm": 0.5674551611529516, "learning_rate": 9.138444990784453e-07, "loss": 0.6117105484008789, "step": 239, "token_acc": 0.7969433519630166 }, { "epoch": 0.6936416184971098, "grad_norm": 0.5668476584273359, "learning_rate": 9.12948372987425e-07, "loss": 0.6042872071266174, "step": 240, "token_acc": 0.8012008915710148 }, { "epoch": 0.6965317919075145, "grad_norm": 0.5372423597194518, "learning_rate": 9.120480547030285e-07, "loss": 0.5781703591346741, "step": 241, "token_acc": 0.8076352705410822 }, { "epoch": 0.6994219653179191, "grad_norm": 0.582884431687299, "learning_rate": 9.111435533651595e-07, "loss": 0.594234824180603, "step": 242, "token_acc": 0.8027408303103587 }, { "epoch": 0.7023121387283237, "grad_norm": 0.5468197379764062, "learning_rate": 9.102348781561875e-07, "loss": 0.537114143371582, "step": 243, "token_acc": 0.8224276312689462 }, { "epoch": 0.7052023121387283, "grad_norm": 0.5799094186562964, "learning_rate": 9.093220383008544e-07, "loss": 0.5844765901565552, "step": 244, "token_acc": 0.8037892679887568 }, { "epoch": 0.708092485549133, "grad_norm": 0.5735743433347377, "learning_rate": 9.084050430661813e-07, "loss": 0.6163278818130493, "step": 245, "token_acc": 0.7963933546643635 }, { "epoch": 0.7109826589595376, "grad_norm": 0.5675339701772788, "learning_rate": 9.074839017613736e-07, "loss": 0.5186026692390442, "step": 246, "token_acc": 0.8264138256627419 }, { "epoch": 0.7138728323699421, "grad_norm": 0.5682213760378196, "learning_rate": 9.065586237377274e-07, "loss": 0.5759379267692566, "step": 247, "token_acc": 0.8082834141978154 }, { "epoch": 0.7167630057803468, "grad_norm": 0.5222160620275426, "learning_rate": 9.056292183885341e-07, "loss": 0.5911962985992432, "step": 248, "token_acc": 0.803399969606123 }, { "epoch": 0.7196531791907514, "grad_norm": 0.5098026312902073, "learning_rate": 9.046956951489852e-07, "loss": 0.5775253772735596, "step": 249, "token_acc": 0.8074704886249294 }, { "epoch": 0.7225433526011561, "grad_norm": 0.524303335092293, "learning_rate": 9.037580634960763e-07, "loss": 0.5572794675827026, "step": 250, "token_acc": 0.8146691719232317 }, { "epoch": 0.7254335260115607, "grad_norm": 0.6033497475819745, "learning_rate": 9.028163329485112e-07, "loss": 0.5832095742225647, "step": 251, "token_acc": 0.8073202656110331 }, { "epoch": 0.7283236994219653, "grad_norm": 0.5556496694710653, "learning_rate": 9.018705130666049e-07, "loss": 0.5459315776824951, "step": 252, "token_acc": 0.8191452178897479 }, { "epoch": 0.7312138728323699, "grad_norm": 0.7747218495040153, "learning_rate": 9.009206134521868e-07, "loss": 0.5795873999595642, "step": 253, "token_acc": 0.8071730383987341 }, { "epoch": 0.7341040462427746, "grad_norm": 0.5652371374587928, "learning_rate": 8.999666437485034e-07, "loss": 0.5758365392684937, "step": 254, "token_acc": 0.811742473608758 }, { "epoch": 0.7369942196531792, "grad_norm": 0.5206182140440342, "learning_rate": 8.990086136401198e-07, "loss": 0.5303860306739807, "step": 255, "token_acc": 0.823020148188528 }, { "epoch": 0.7398843930635838, "grad_norm": 0.6450852115537637, "learning_rate": 8.980465328528218e-07, "loss": 0.5547192096710205, "step": 256, "token_acc": 0.8162106882834197 }, { "epoch": 0.7427745664739884, "grad_norm": 0.5196181500327283, "learning_rate": 8.970804111535175e-07, "loss": 0.5457019209861755, "step": 257, "token_acc": 0.8167301624082492 }, { "epoch": 0.7456647398843931, "grad_norm": 0.6356725122188899, "learning_rate": 8.961102583501375e-07, "loss": 0.5676227807998657, "step": 258, "token_acc": 0.8146457172245137 }, { "epoch": 0.7485549132947977, "grad_norm": 0.5766749980898508, "learning_rate": 8.951360842915355e-07, "loss": 0.5487492084503174, "step": 259, "token_acc": 0.8176302961517421 }, { "epoch": 0.7514450867052023, "grad_norm": 0.561193367543964, "learning_rate": 8.941578988673885e-07, "loss": 0.5508721470832825, "step": 260, "token_acc": 0.8148807459638577 }, { "epoch": 0.7543352601156069, "grad_norm": 1.1616614497713094, "learning_rate": 8.931757120080965e-07, "loss": 0.5649725794792175, "step": 261, "token_acc": 0.8123450235984954 }, { "epoch": 0.7572254335260116, "grad_norm": 0.6269083895254, "learning_rate": 8.921895336846812e-07, "loss": 0.5234044790267944, "step": 262, "token_acc": 0.826336871809926 }, { "epoch": 0.7601156069364162, "grad_norm": 0.5491932745407809, "learning_rate": 8.911993739086852e-07, "loss": 0.5335085391998291, "step": 263, "token_acc": 0.8243787856172078 }, { "epoch": 0.7630057803468208, "grad_norm": 0.6001894076535953, "learning_rate": 8.902052427320703e-07, "loss": 0.6009457111358643, "step": 264, "token_acc": 0.8005332320797702 }, { "epoch": 0.7658959537572254, "grad_norm": 0.6105633418239023, "learning_rate": 8.892071502471154e-07, "loss": 0.512947678565979, "step": 265, "token_acc": 0.8283333333333334 }, { "epoch": 0.7687861271676301, "grad_norm": 0.530310690982596, "learning_rate": 8.882051065863139e-07, "loss": 0.5578915476799011, "step": 266, "token_acc": 0.8134685584406639 }, { "epoch": 0.7716763005780347, "grad_norm": 0.6053842724913201, "learning_rate": 8.871991219222712e-07, "loss": 0.5307576656341553, "step": 267, "token_acc": 0.8237498632235475 }, { "epoch": 0.7745664739884393, "grad_norm": 0.5839374903786066, "learning_rate": 8.861892064676008e-07, "loss": 0.4724132716655731, "step": 268, "token_acc": 0.8406308417366578 }, { "epoch": 0.7774566473988439, "grad_norm": 0.5382380436884167, "learning_rate": 8.851753704748219e-07, "loss": 0.5864905118942261, "step": 269, "token_acc": 0.805320596148614 }, { "epoch": 0.7803468208092486, "grad_norm": 0.536612826265518, "learning_rate": 8.841576242362533e-07, "loss": 0.5369473695755005, "step": 270, "token_acc": 0.8202307927330842 }, { "epoch": 0.7832369942196532, "grad_norm": 0.48433135594375987, "learning_rate": 8.831359780839107e-07, "loss": 0.5745148062705994, "step": 271, "token_acc": 0.8114247865236928 }, { "epoch": 0.7861271676300579, "grad_norm": 0.565668286608129, "learning_rate": 8.821104423894014e-07, "loss": 0.5306930541992188, "step": 272, "token_acc": 0.8240810142731839 }, { "epoch": 0.7890173410404624, "grad_norm": 0.5347471169063638, "learning_rate": 8.810810275638182e-07, "loss": 0.5508551597595215, "step": 273, "token_acc": 0.8150747430289043 }, { "epoch": 0.791907514450867, "grad_norm": 0.5872611855148089, "learning_rate": 8.800477440576346e-07, "loss": 0.5582222938537598, "step": 274, "token_acc": 0.8141057178356111 }, { "epoch": 0.7947976878612717, "grad_norm": 0.5930933510081743, "learning_rate": 8.790106023605985e-07, "loss": 0.5265220403671265, "step": 275, "token_acc": 0.8236343698306786 }, { "epoch": 0.7976878612716763, "grad_norm": 0.5326943859900286, "learning_rate": 8.779696130016252e-07, "loss": 0.589282751083374, "step": 276, "token_acc": 0.8041843462366995 }, { "epoch": 0.8005780346820809, "grad_norm": 0.682574668475925, "learning_rate": 8.769247865486915e-07, "loss": 0.5634682178497314, "step": 277, "token_acc": 0.8131609072741031 }, { "epoch": 0.8034682080924855, "grad_norm": 0.6170926445265313, "learning_rate": 8.758761336087273e-07, "loss": 0.5282115340232849, "step": 278, "token_acc": 0.8240009668063165 }, { "epoch": 0.8063583815028902, "grad_norm": 0.5931538447313858, "learning_rate": 8.748236648275087e-07, "loss": 0.4907287061214447, "step": 279, "token_acc": 0.838809946714032 }, { "epoch": 0.8092485549132948, "grad_norm": 0.567206538957563, "learning_rate": 8.737673908895497e-07, "loss": 0.6097589731216431, "step": 280, "token_acc": 0.7990020422972478 }, { "epoch": 0.8121387283236994, "grad_norm": 0.5887119791348107, "learning_rate": 8.727073225179937e-07, "loss": 0.5625665187835693, "step": 281, "token_acc": 0.8113687537033379 }, { "epoch": 0.815028901734104, "grad_norm": 0.5836331757411469, "learning_rate": 8.716434704745046e-07, "loss": 0.513110339641571, "step": 282, "token_acc": 0.8275925912738822 }, { "epoch": 0.8179190751445087, "grad_norm": 0.6054924912257345, "learning_rate": 8.705758455591576e-07, "loss": 0.602730393409729, "step": 283, "token_acc": 0.8022713898227125 }, { "epoch": 0.8208092485549133, "grad_norm": 0.6236226833744741, "learning_rate": 8.695044586103295e-07, "loss": 0.5747796893119812, "step": 284, "token_acc": 0.8079837217906031 }, { "epoch": 0.8236994219653179, "grad_norm": 0.5865612629064065, "learning_rate": 8.684293205045889e-07, "loss": 0.6070411205291748, "step": 285, "token_acc": 0.7988344760774713 }, { "epoch": 0.8265895953757225, "grad_norm": 0.5503455006576133, "learning_rate": 8.673504421565856e-07, "loss": 0.5685064792633057, "step": 286, "token_acc": 0.8102210757057314 }, { "epoch": 0.8294797687861272, "grad_norm": 0.5972785565939337, "learning_rate": 8.662678345189396e-07, "loss": 0.46608567237854004, "step": 287, "token_acc": 0.8438823801959227 }, { "epoch": 0.8323699421965318, "grad_norm": 0.5201509566608107, "learning_rate": 8.651815085821302e-07, "loss": 0.5298614501953125, "step": 288, "token_acc": 0.8236416811984237 }, { "epoch": 0.8352601156069365, "grad_norm": 0.49819051940062725, "learning_rate": 8.640914753743847e-07, "loss": 0.5882748365402222, "step": 289, "token_acc": 0.8065492356638473 }, { "epoch": 0.838150289017341, "grad_norm": 0.6397626208223341, "learning_rate": 8.629977459615654e-07, "loss": 0.604642927646637, "step": 290, "token_acc": 0.798697597059869 }, { "epoch": 0.8410404624277457, "grad_norm": 0.5735121088769557, "learning_rate": 8.619003314470586e-07, "loss": 0.5657530426979065, "step": 291, "token_acc": 0.8134929241446619 }, { "epoch": 0.8439306358381503, "grad_norm": 0.6029592728755434, "learning_rate": 8.607992429716608e-07, "loss": 0.5807414054870605, "step": 292, "token_acc": 0.8062111084672681 }, { "epoch": 0.846820809248555, "grad_norm": 0.5204268288621456, "learning_rate": 8.596944917134666e-07, "loss": 0.5696761608123779, "step": 293, "token_acc": 0.8102849975611456 }, { "epoch": 0.8497109826589595, "grad_norm": 0.570216087116967, "learning_rate": 8.585860888877536e-07, "loss": 0.6144391298294067, "step": 294, "token_acc": 0.7976966055615415 }, { "epoch": 0.8526011560693642, "grad_norm": 0.525009085518107, "learning_rate": 8.574740457468708e-07, "loss": 0.5926086902618408, "step": 295, "token_acc": 0.8030848268880814 }, { "epoch": 0.8554913294797688, "grad_norm": 0.5397367841143723, "learning_rate": 8.563583735801223e-07, "loss": 0.5647125244140625, "step": 296, "token_acc": 0.8113542939673369 }, { "epoch": 0.8583815028901735, "grad_norm": 0.5453044997059636, "learning_rate": 8.55239083713654e-07, "loss": 0.5306450128555298, "step": 297, "token_acc": 0.8242952898276619 }, { "epoch": 0.861271676300578, "grad_norm": 0.49382426600759494, "learning_rate": 8.541161875103379e-07, "loss": 0.5655560493469238, "step": 298, "token_acc": 0.81170671232068 }, { "epoch": 0.8641618497109826, "grad_norm": 0.5609985492228051, "learning_rate": 8.529896963696576e-07, "loss": 0.5431415438652039, "step": 299, "token_acc": 0.8162933876284661 }, { "epoch": 0.8670520231213873, "grad_norm": 0.5476351474370762, "learning_rate": 8.51859621727591e-07, "loss": 0.5872442126274109, "step": 300, "token_acc": 0.8065929411453266 }, { "epoch": 0.869942196531792, "grad_norm": 0.5282221087597836, "learning_rate": 8.507259750564961e-07, "loss": 0.5451909899711609, "step": 301, "token_acc": 0.8188552557155108 }, { "epoch": 0.8728323699421965, "grad_norm": 0.503389270767867, "learning_rate": 8.495887678649932e-07, "loss": 0.5154858231544495, "step": 302, "token_acc": 0.8274329950559459 }, { "epoch": 0.8757225433526011, "grad_norm": 0.518940089504941, "learning_rate": 8.484480116978486e-07, "loss": 0.5244746208190918, "step": 303, "token_acc": 0.8264815952633637 }, { "epoch": 0.8786127167630058, "grad_norm": 0.573024895950047, "learning_rate": 8.473037181358573e-07, "loss": 0.592721700668335, "step": 304, "token_acc": 0.8035201013934049 }, { "epoch": 0.8815028901734104, "grad_norm": 0.5039735997055694, "learning_rate": 8.461558987957252e-07, "loss": 0.5656961798667908, "step": 305, "token_acc": 0.8130110070213994 }, { "epoch": 0.884393063583815, "grad_norm": 0.5476756827664239, "learning_rate": 8.45004565329952e-07, "loss": 0.5374190807342529, "step": 306, "token_acc": 0.820976424170279 }, { "epoch": 0.8872832369942196, "grad_norm": 0.5275746578408953, "learning_rate": 8.438497294267116e-07, "loss": 0.5982400178909302, "step": 307, "token_acc": 0.7999831918648626 }, { "epoch": 0.8901734104046243, "grad_norm": 0.532750300928086, "learning_rate": 8.426914028097347e-07, "loss": 0.584047794342041, "step": 308, "token_acc": 0.8066207177537092 }, { "epoch": 0.8930635838150289, "grad_norm": 0.5003914631256399, "learning_rate": 8.415295972381889e-07, "loss": 0.6089476346969604, "step": 309, "token_acc": 0.7978914509526754 }, { "epoch": 0.8959537572254336, "grad_norm": 0.6278624794022574, "learning_rate": 8.403643245065597e-07, "loss": 0.5697731375694275, "step": 310, "token_acc": 0.8108995234993658 }, { "epoch": 0.8988439306358381, "grad_norm": 0.6052633593556834, "learning_rate": 8.391955964445309e-07, "loss": 0.5913630723953247, "step": 311, "token_acc": 0.8023921969586315 }, { "epoch": 0.9017341040462428, "grad_norm": 0.5312386556419646, "learning_rate": 8.38023424916864e-07, "loss": 0.5818167924880981, "step": 312, "token_acc": 0.8053130715134147 }, { "epoch": 0.9046242774566474, "grad_norm": 0.5377630147019918, "learning_rate": 8.368478218232787e-07, "loss": 0.5994030237197876, "step": 313, "token_acc": 0.8010770419994847 }, { "epoch": 0.9075144508670521, "grad_norm": 0.6387143665462728, "learning_rate": 8.356687990983305e-07, "loss": 0.5747004747390747, "step": 314, "token_acc": 0.8103654791154791 }, { "epoch": 0.9104046242774566, "grad_norm": 0.5539012149779035, "learning_rate": 8.344863687112913e-07, "loss": 0.5109165906906128, "step": 315, "token_acc": 0.8275082819675849 }, { "epoch": 0.9132947976878613, "grad_norm": 0.5431996662851367, "learning_rate": 8.333005426660271e-07, "loss": 0.4984626770019531, "step": 316, "token_acc": 0.8326753471796506 }, { "epoch": 0.9161849710982659, "grad_norm": 0.5476844147731238, "learning_rate": 8.321113330008756e-07, "loss": 0.5582059025764465, "step": 317, "token_acc": 0.8131992060627932 }, { "epoch": 0.9190751445086706, "grad_norm": 0.5288904758826702, "learning_rate": 8.309187517885249e-07, "loss": 0.5965433120727539, "step": 318, "token_acc": 0.8015113167980331 }, { "epoch": 0.9219653179190751, "grad_norm": 0.5061439317002303, "learning_rate": 8.297228111358906e-07, "loss": 0.50608229637146, "step": 319, "token_acc": 0.8302445369795833 }, { "epoch": 0.9248554913294798, "grad_norm": 0.49043399117893216, "learning_rate": 8.285235231839927e-07, "loss": 0.5492719411849976, "step": 320, "token_acc": 0.8174581468830556 }, { "epoch": 0.9277456647398844, "grad_norm": 0.6174249587001943, "learning_rate": 8.273209001078324e-07, "loss": 0.553361177444458, "step": 321, "token_acc": 0.8119886458507264 }, { "epoch": 0.930635838150289, "grad_norm": 0.5616150428871276, "learning_rate": 8.261149541162691e-07, "loss": 0.6025636196136475, "step": 322, "token_acc": 0.8005087935801005 }, { "epoch": 0.9335260115606936, "grad_norm": 0.6478516612944865, "learning_rate": 8.249056974518954e-07, "loss": 0.5491775274276733, "step": 323, "token_acc": 0.8185532095041541 }, { "epoch": 0.9364161849710982, "grad_norm": 0.5031858383227522, "learning_rate": 8.236931423909138e-07, "loss": 0.6022853255271912, "step": 324, "token_acc": 0.8037384243419552 }, { "epoch": 0.9393063583815029, "grad_norm": 0.5752991697267287, "learning_rate": 8.224773012430114e-07, "loss": 0.5954960584640503, "step": 325, "token_acc": 0.8036680189317106 }, { "epoch": 0.9421965317919075, "grad_norm": 0.5295029516066992, "learning_rate": 8.212581863512353e-07, "loss": 0.5488483309745789, "step": 326, "token_acc": 0.8157750324575375 }, { "epoch": 0.9450867052023122, "grad_norm": 0.5368502799479243, "learning_rate": 8.20035810091867e-07, "loss": 0.5652696490287781, "step": 327, "token_acc": 0.8106361614705574 }, { "epoch": 0.9479768786127167, "grad_norm": 0.5847097314866032, "learning_rate": 8.188101848742974e-07, "loss": 0.544079065322876, "step": 328, "token_acc": 0.819971546427805 }, { "epoch": 0.9508670520231214, "grad_norm": 0.5255181020508993, "learning_rate": 8.175813231408999e-07, "loss": 0.4978986382484436, "step": 329, "token_acc": 0.8333199723062348 }, { "epoch": 0.953757225433526, "grad_norm": 0.5127048703010287, "learning_rate": 8.163492373669047e-07, "loss": 0.5805110931396484, "step": 330, "token_acc": 0.8056335113743647 }, { "epoch": 0.9566473988439307, "grad_norm": 0.652335019028349, "learning_rate": 8.15113940060272e-07, "loss": 0.5597442388534546, "step": 331, "token_acc": 0.8161630076551519 }, { "epoch": 0.9595375722543352, "grad_norm": 0.5947335075670345, "learning_rate": 8.13875443761565e-07, "loss": 0.5277099609375, "step": 332, "token_acc": 0.8274886297575488 }, { "epoch": 0.9624277456647399, "grad_norm": 0.5459606580402216, "learning_rate": 8.126337610438229e-07, "loss": 0.5635240077972412, "step": 333, "token_acc": 0.8108978939573075 }, { "epoch": 0.9653179190751445, "grad_norm": 0.5488564858287155, "learning_rate": 8.113889045124323e-07, "loss": 0.49523666501045227, "step": 334, "token_acc": 0.8329320341089853 }, { "epoch": 0.9682080924855492, "grad_norm": 0.5694023522198697, "learning_rate": 8.101408868050008e-07, "loss": 0.5316784381866455, "step": 335, "token_acc": 0.8213875427499967 }, { "epoch": 0.9710982658959537, "grad_norm": 0.5290670622343212, "learning_rate": 8.088897205912271e-07, "loss": 0.5768337249755859, "step": 336, "token_acc": 0.808409267610014 }, { "epoch": 0.9739884393063584, "grad_norm": 0.5630882737173935, "learning_rate": 8.076354185727734e-07, "loss": 0.5607028007507324, "step": 337, "token_acc": 0.8111738071422572 }, { "epoch": 0.976878612716763, "grad_norm": 0.5389758264031266, "learning_rate": 8.06377993483136e-07, "loss": 0.5800102949142456, "step": 338, "token_acc": 0.8064102564102564 }, { "epoch": 0.9797687861271677, "grad_norm": 0.6483925804091112, "learning_rate": 8.051174580875163e-07, "loss": 0.5936282873153687, "step": 339, "token_acc": 0.8033736003463585 }, { "epoch": 0.9826589595375722, "grad_norm": 0.5683588968241811, "learning_rate": 8.038538251826912e-07, "loss": 0.5602604150772095, "step": 340, "token_acc": 0.8103426182505487 }, { "epoch": 0.9855491329479769, "grad_norm": 0.4984007019353715, "learning_rate": 8.025871075968826e-07, "loss": 0.559136152267456, "step": 341, "token_acc": 0.8140824580290378 }, { "epoch": 0.9884393063583815, "grad_norm": 1.1899348194485317, "learning_rate": 8.013173181896282e-07, "loss": 0.5955883860588074, "step": 342, "token_acc": 0.8027926447988978 }, { "epoch": 0.9913294797687862, "grad_norm": 0.5388156404908695, "learning_rate": 8.0004446985165e-07, "loss": 0.5661012530326843, "step": 343, "token_acc": 0.8099668055056346 }, { "epoch": 0.9942196531791907, "grad_norm": 0.5412535831553995, "learning_rate": 7.987685755047242e-07, "loss": 0.6086287498474121, "step": 344, "token_acc": 0.7963722407145177 }, { "epoch": 0.9971098265895953, "grad_norm": 0.696761929081249, "learning_rate": 7.974896481015494e-07, "loss": 0.5823131799697876, "step": 345, "token_acc": 0.8073882514689755 }, { "epoch": 1.0, "grad_norm": 0.4953947640304795, "learning_rate": 7.962077006256153e-07, "loss": 0.5682995319366455, "step": 346, "token_acc": 0.8121095151492658 }, { "epoch": 1.0028901734104045, "grad_norm": 0.7111654355632505, "learning_rate": 7.94922746091071e-07, "loss": 0.6060156226158142, "step": 347, "token_acc": 0.8014354938608955 }, { "epoch": 1.0057803468208093, "grad_norm": 0.5507935056779134, "learning_rate": 7.93634797542593e-07, "loss": 0.5295247435569763, "step": 348, "token_acc": 0.8211228506318624 }, { "epoch": 1.0086705202312138, "grad_norm": 0.6189562361784823, "learning_rate": 7.923438680552525e-07, "loss": 0.5647916197776794, "step": 349, "token_acc": 0.8137873547100433 }, { "epoch": 1.0115606936416186, "grad_norm": 0.6801159002216328, "learning_rate": 7.910499707343828e-07, "loss": 0.590101420879364, "step": 350, "token_acc": 0.803803399890662 }, { "epoch": 1.0144508670520231, "grad_norm": 0.6049076830653918, "learning_rate": 7.897531187154458e-07, "loss": 0.5088500380516052, "step": 351, "token_acc": 0.8279876049759735 }, { "epoch": 1.0173410404624277, "grad_norm": 0.5654302790773965, "learning_rate": 7.884533251638999e-07, "loss": 0.5929542779922485, "step": 352, "token_acc": 0.8047063731856507 }, { "epoch": 1.0202312138728324, "grad_norm": 0.5880451344105353, "learning_rate": 7.87150603275065e-07, "loss": 0.5749261379241943, "step": 353, "token_acc": 0.8056116433808085 }, { "epoch": 1.023121387283237, "grad_norm": 0.5426830225682386, "learning_rate": 7.85844966273989e-07, "loss": 0.5945314168930054, "step": 354, "token_acc": 0.800486217737808 }, { "epoch": 1.0260115606936415, "grad_norm": 0.49678361176775165, "learning_rate": 7.845364274153139e-07, "loss": 0.4898013472557068, "step": 355, "token_acc": 0.8352619622320034 }, { "epoch": 1.0289017341040463, "grad_norm": 0.6954304853085829, "learning_rate": 7.832249999831406e-07, "loss": 0.5588274598121643, "step": 356, "token_acc": 0.8166684201080533 }, { "epoch": 1.0317919075144508, "grad_norm": 0.5310648615446059, "learning_rate": 7.819106972908949e-07, "loss": 0.5819897651672363, "step": 357, "token_acc": 0.8045070775826193 }, { "epoch": 1.0346820809248556, "grad_norm": 0.5923922817451516, "learning_rate": 7.805935326811912e-07, "loss": 0.5737313032150269, "step": 358, "token_acc": 0.8051378103467133 }, { "epoch": 1.0375722543352601, "grad_norm": 0.5178307979556245, "learning_rate": 7.79273519525698e-07, "loss": 0.5936248302459717, "step": 359, "token_acc": 0.8025767773866199 }, { "epoch": 1.0404624277456647, "grad_norm": 0.5286013733045867, "learning_rate": 7.779506712250022e-07, "loss": 0.5494135618209839, "step": 360, "token_acc": 0.8171926851655723 }, { "epoch": 1.0433526011560694, "grad_norm": 0.49585832807282065, "learning_rate": 7.766250012084722e-07, "loss": 0.5698336958885193, "step": 361, "token_acc": 0.8116101814090845 }, { "epoch": 1.046242774566474, "grad_norm": 0.6962712390013456, "learning_rate": 7.752965229341219e-07, "loss": 0.535956621170044, "step": 362, "token_acc": 0.822281059722762 }, { "epoch": 1.0491329479768785, "grad_norm": 0.5694059644679526, "learning_rate": 7.739652498884747e-07, "loss": 0.5675574541091919, "step": 363, "token_acc": 0.8093009931245225 }, { "epoch": 1.0520231213872833, "grad_norm": 0.5547323442483891, "learning_rate": 7.726311955864261e-07, "loss": 0.5611029863357544, "step": 364, "token_acc": 0.8125364888148433 }, { "epoch": 1.0549132947976878, "grad_norm": 0.5476729662614271, "learning_rate": 7.712943735711062e-07, "loss": 0.5374180674552917, "step": 365, "token_acc": 0.8212820320132261 }, { "epoch": 1.0578034682080926, "grad_norm": 0.5180731484879565, "learning_rate": 7.699547974137426e-07, "loss": 0.5433316230773926, "step": 366, "token_acc": 0.8200906177478174 }, { "epoch": 1.060693641618497, "grad_norm": 0.5798685069888638, "learning_rate": 7.686124807135228e-07, "loss": 0.5966153740882874, "step": 367, "token_acc": 0.8028633971139337 }, { "epoch": 1.0635838150289016, "grad_norm": 0.5594356403434023, "learning_rate": 7.672674370974558e-07, "loss": 0.5133764743804932, "step": 368, "token_acc": 0.8287475052817048 }, { "epoch": 1.0664739884393064, "grad_norm": 0.5414940672989453, "learning_rate": 7.659196802202338e-07, "loss": 0.5794786214828491, "step": 369, "token_acc": 0.8080960204454181 }, { "epoch": 1.069364161849711, "grad_norm": 0.5596146246622683, "learning_rate": 7.645692237640937e-07, "loss": 0.6179242134094238, "step": 370, "token_acc": 0.7978232829012561 }, { "epoch": 1.0722543352601157, "grad_norm": 0.5658616759599563, "learning_rate": 7.632160814386779e-07, "loss": 0.5489234924316406, "step": 371, "token_acc": 0.818960201793722 }, { "epoch": 1.0751445086705202, "grad_norm": 0.5583854062469837, "learning_rate": 7.618602669808957e-07, "loss": 0.5576378703117371, "step": 372, "token_acc": 0.8134194149383499 }, { "epoch": 1.0780346820809248, "grad_norm": 0.5709606663652054, "learning_rate": 7.605017941547835e-07, "loss": 0.5531469583511353, "step": 373, "token_acc": 0.8139197537682152 }, { "epoch": 1.0809248554913296, "grad_norm": 0.5401961587153568, "learning_rate": 7.591406767513648e-07, "loss": 0.5335639715194702, "step": 374, "token_acc": 0.8189074796640434 }, { "epoch": 1.083815028901734, "grad_norm": 0.5776452597256104, "learning_rate": 7.577769285885108e-07, "loss": 0.5792023539543152, "step": 375, "token_acc": 0.8059631052038535 }, { "epoch": 1.0867052023121386, "grad_norm": 0.6631103343737483, "learning_rate": 7.564105635107996e-07, "loss": 0.5358845591545105, "step": 376, "token_acc": 0.8186349045446866 }, { "epoch": 1.0895953757225434, "grad_norm": 0.49688934026931153, "learning_rate": 7.550415953893756e-07, "loss": 0.5017120242118835, "step": 377, "token_acc": 0.8296466328279073 }, { "epoch": 1.092485549132948, "grad_norm": 0.5499825048622536, "learning_rate": 7.536700381218097e-07, "loss": 0.5757490396499634, "step": 378, "token_acc": 0.8071212248675023 }, { "epoch": 1.0953757225433527, "grad_norm": 0.5724354451620394, "learning_rate": 7.522959056319564e-07, "loss": 0.5289810299873352, "step": 379, "token_acc": 0.8224057244166174 }, { "epoch": 1.0982658959537572, "grad_norm": 0.5295598164095123, "learning_rate": 7.509192118698145e-07, "loss": 0.5217394828796387, "step": 380, "token_acc": 0.8247749871572029 }, { "epoch": 1.1011560693641618, "grad_norm": 0.6732543146745934, "learning_rate": 7.49539970811384e-07, "loss": 0.5446665287017822, "step": 381, "token_acc": 0.8187780645617508 }, { "epoch": 1.1040462427745665, "grad_norm": 0.593141398734888, "learning_rate": 7.481581964585244e-07, "loss": 0.6174026131629944, "step": 382, "token_acc": 0.7958839535507607 }, { "epoch": 1.106936416184971, "grad_norm": 0.5915717748635032, "learning_rate": 7.467739028388133e-07, "loss": 0.5956196784973145, "step": 383, "token_acc": 0.8005577327975455 }, { "epoch": 1.1098265895953756, "grad_norm": 0.5486121690897104, "learning_rate": 7.453871040054037e-07, "loss": 0.602386474609375, "step": 384, "token_acc": 0.7985531236588805 }, { "epoch": 1.1127167630057804, "grad_norm": 0.6468015023115512, "learning_rate": 7.439978140368803e-07, "loss": 0.5264239311218262, "step": 385, "token_acc": 0.8247053516043534 }, { "epoch": 1.115606936416185, "grad_norm": 0.5396942599943407, "learning_rate": 7.426060470371185e-07, "loss": 0.5322436094284058, "step": 386, "token_acc": 0.8225644386194845 }, { "epoch": 1.1184971098265897, "grad_norm": 0.546318443194639, "learning_rate": 7.412118171351395e-07, "loss": 0.5636791586875916, "step": 387, "token_acc": 0.8132001591389744 }, { "epoch": 1.1213872832369942, "grad_norm": 0.5681580355518231, "learning_rate": 7.398151384849679e-07, "loss": 0.5519202351570129, "step": 388, "token_acc": 0.8136924046076314 }, { "epoch": 1.1242774566473988, "grad_norm": 0.5949989948835427, "learning_rate": 7.384160252654873e-07, "loss": 0.5511115789413452, "step": 389, "token_acc": 0.8144513354081949 }, { "epoch": 1.1271676300578035, "grad_norm": 0.4837423293992909, "learning_rate": 7.370144916802969e-07, "loss": 0.5643985867500305, "step": 390, "token_acc": 0.8112824957599688 }, { "epoch": 1.130057803468208, "grad_norm": 0.5611205998910804, "learning_rate": 7.356105519575671e-07, "loss": 0.5409538745880127, "step": 391, "token_acc": 0.8188429729320618 }, { "epoch": 1.1329479768786128, "grad_norm": 0.5181274015479428, "learning_rate": 7.342042203498951e-07, "loss": 0.5411881804466248, "step": 392, "token_acc": 0.8171947300974061 }, { "epoch": 1.1358381502890174, "grad_norm": 0.5497633972492808, "learning_rate": 7.327955111341601e-07, "loss": 0.5626124143600464, "step": 393, "token_acc": 0.8131716531422224 }, { "epoch": 1.138728323699422, "grad_norm": 0.569806645978514, "learning_rate": 7.313844386113783e-07, "loss": 0.533359169960022, "step": 394, "token_acc": 0.8227007051547947 }, { "epoch": 1.1416184971098267, "grad_norm": 0.5809695758427657, "learning_rate": 7.299710171065584e-07, "loss": 0.5428122282028198, "step": 395, "token_acc": 0.8167381946213591 }, { "epoch": 1.1445086705202312, "grad_norm": 0.5685994639717983, "learning_rate": 7.28555260968555e-07, "loss": 0.5661939382553101, "step": 396, "token_acc": 0.8107361575857062 }, { "epoch": 1.147398843930636, "grad_norm": 0.5687294924284086, "learning_rate": 7.271371845699241e-07, "loss": 0.4796743392944336, "step": 397, "token_acc": 0.8378044059980814 }, { "epoch": 1.1502890173410405, "grad_norm": 0.5570998116553988, "learning_rate": 7.257168023067759e-07, "loss": 0.5698948502540588, "step": 398, "token_acc": 0.8108394509164174 }, { "epoch": 1.153179190751445, "grad_norm": 0.5764653559793665, "learning_rate": 7.242941285986303e-07, "loss": 0.5216134190559387, "step": 399, "token_acc": 0.8264347873981053 }, { "epoch": 1.1560693641618498, "grad_norm": 0.5519714242613649, "learning_rate": 7.228691778882692e-07, "loss": 0.5965580940246582, "step": 400, "token_acc": 0.8008848328263255 }, { "epoch": 1.1589595375722543, "grad_norm": 0.5713833806622776, "learning_rate": 7.2144196464159e-07, "loss": 0.530504584312439, "step": 401, "token_acc": 0.8193537207392506 }, { "epoch": 1.1618497109826589, "grad_norm": 0.5112285942897958, "learning_rate": 7.200125033474598e-07, "loss": 0.5425513982772827, "step": 402, "token_acc": 0.8176038122905598 }, { "epoch": 1.1647398843930636, "grad_norm": 0.5891524284010872, "learning_rate": 7.185808085175668e-07, "loss": 0.5737115740776062, "step": 403, "token_acc": 0.811070949924867 }, { "epoch": 1.1676300578034682, "grad_norm": 0.8927491774092401, "learning_rate": 7.171468946862743e-07, "loss": 0.5100395083427429, "step": 404, "token_acc": 0.8297666772416578 }, { "epoch": 1.1705202312138727, "grad_norm": 0.6290027028336996, "learning_rate": 7.157107764104723e-07, "loss": 0.5254942178726196, "step": 405, "token_acc": 0.8239488461275081 }, { "epoch": 1.1734104046242775, "grad_norm": 0.5413566372730959, "learning_rate": 7.142724682694299e-07, "loss": 0.5764940977096558, "step": 406, "token_acc": 0.8086516073191842 }, { "epoch": 1.176300578034682, "grad_norm": 0.5581695811593094, "learning_rate": 7.128319848646477e-07, "loss": 0.5500423312187195, "step": 407, "token_acc": 0.8153743413040916 }, { "epoch": 1.1791907514450868, "grad_norm": 0.4681952163328979, "learning_rate": 7.113893408197091e-07, "loss": 0.5582858324050903, "step": 408, "token_acc": 0.8114563586911728 }, { "epoch": 1.1820809248554913, "grad_norm": 0.6826359609914151, "learning_rate": 7.099445507801323e-07, "loss": 0.49809369444847107, "step": 409, "token_acc": 0.8353448588307781 }, { "epoch": 1.1849710982658959, "grad_norm": 0.5090205197384219, "learning_rate": 7.084976294132207e-07, "loss": 0.6029922962188721, "step": 410, "token_acc": 0.7973656093105548 }, { "epoch": 1.1878612716763006, "grad_norm": 0.5269042882225241, "learning_rate": 7.070485914079151e-07, "loss": 0.5927149057388306, "step": 411, "token_acc": 0.8014037282759605 }, { "epoch": 1.1907514450867052, "grad_norm": 0.49950817881103576, "learning_rate": 7.055974514746445e-07, "loss": 0.5837708711624146, "step": 412, "token_acc": 0.8074309042384765 }, { "epoch": 1.19364161849711, "grad_norm": 0.5860116475494397, "learning_rate": 7.041442243451752e-07, "loss": 0.5210489630699158, "step": 413, "token_acc": 0.8244094424028096 }, { "epoch": 1.1965317919075145, "grad_norm": 0.5718657608384051, "learning_rate": 7.026889247724635e-07, "loss": 0.5820956230163574, "step": 414, "token_acc": 0.8042295599535557 }, { "epoch": 1.199421965317919, "grad_norm": 0.5054409513703455, "learning_rate": 7.012315675305045e-07, "loss": 0.5862281918525696, "step": 415, "token_acc": 0.8023793187527289 }, { "epoch": 1.2023121387283238, "grad_norm": 0.5766487774658408, "learning_rate": 6.997721674141822e-07, "loss": 0.520296037197113, "step": 416, "token_acc": 0.8252748600155311 }, { "epoch": 1.2052023121387283, "grad_norm": 0.537979220335716, "learning_rate": 6.983107392391202e-07, "loss": 0.5797343850135803, "step": 417, "token_acc": 0.80571660344046 }, { "epoch": 1.208092485549133, "grad_norm": 0.5396946740305607, "learning_rate": 6.9684729784153e-07, "loss": 0.6153110265731812, "step": 418, "token_acc": 0.7969049998485812 }, { "epoch": 1.2109826589595376, "grad_norm": 0.5642823581815699, "learning_rate": 6.953818580780613e-07, "loss": 0.5325438976287842, "step": 419, "token_acc": 0.8222246858832225 }, { "epoch": 1.2138728323699421, "grad_norm": 0.5535087521581403, "learning_rate": 6.939144348256511e-07, "loss": 0.5709867477416992, "step": 420, "token_acc": 0.8069591256176074 }, { "epoch": 1.216763005780347, "grad_norm": 0.572340555748076, "learning_rate": 6.924450429813723e-07, "loss": 0.5548975467681885, "step": 421, "token_acc": 0.8185377583894686 }, { "epoch": 1.2196531791907514, "grad_norm": 0.5155912490897337, "learning_rate": 6.909736974622826e-07, "loss": 0.5856627225875854, "step": 422, "token_acc": 0.8058833037013092 }, { "epoch": 1.222543352601156, "grad_norm": 0.5287358182605065, "learning_rate": 6.895004132052735e-07, "loss": 0.530200719833374, "step": 423, "token_acc": 0.822671307855992 }, { "epoch": 1.2254335260115607, "grad_norm": 0.5377464968526829, "learning_rate": 6.88025205166918e-07, "loss": 0.6028895974159241, "step": 424, "token_acc": 0.8013212984612038 }, { "epoch": 1.2283236994219653, "grad_norm": 0.5204405657753005, "learning_rate": 6.865480883233189e-07, "loss": 0.5590497851371765, "step": 425, "token_acc": 0.8117163218535146 }, { "epoch": 1.2312138728323698, "grad_norm": 0.45493496853760634, "learning_rate": 6.850690776699573e-07, "loss": 0.5726251602172852, "step": 426, "token_acc": 0.8084424978300127 }, { "epoch": 1.2341040462427746, "grad_norm": 0.6240376452291253, "learning_rate": 6.835881882215395e-07, "loss": 0.5343113541603088, "step": 427, "token_acc": 0.8196929353326794 }, { "epoch": 1.2369942196531791, "grad_norm": 0.5773298029457239, "learning_rate": 6.821054350118458e-07, "loss": 0.5317709445953369, "step": 428, "token_acc": 0.8196335435275461 }, { "epoch": 1.239884393063584, "grad_norm": 0.5477278016005382, "learning_rate": 6.806208330935766e-07, "loss": 0.5721542835235596, "step": 429, "token_acc": 0.8069397675429067 }, { "epoch": 1.2427745664739884, "grad_norm": 0.5954432022727356, "learning_rate": 6.791343975381999e-07, "loss": 0.59670090675354, "step": 430, "token_acc": 0.8028038691690053 }, { "epoch": 1.245664739884393, "grad_norm": 0.6299231511446614, "learning_rate": 6.776461434357993e-07, "loss": 0.5712985396385193, "step": 431, "token_acc": 0.8093430920755399 }, { "epoch": 1.2485549132947977, "grad_norm": 0.5405979300580379, "learning_rate": 6.761560858949192e-07, "loss": 0.5809611082077026, "step": 432, "token_acc": 0.8070006162733515 }, { "epoch": 1.2514450867052023, "grad_norm": 0.5516822339033575, "learning_rate": 6.746642400424131e-07, "loss": 0.5620344281196594, "step": 433, "token_acc": 0.8121798185065721 }, { "epoch": 1.254335260115607, "grad_norm": 0.5284837836987685, "learning_rate": 6.731706210232882e-07, "loss": 0.5855224132537842, "step": 434, "token_acc": 0.8044497743554139 }, { "epoch": 1.2572254335260116, "grad_norm": 0.5627730241670859, "learning_rate": 6.716752440005537e-07, "loss": 0.5670550465583801, "step": 435, "token_acc": 0.8096381386958137 }, { "epoch": 1.260115606936416, "grad_norm": 0.538509679886266, "learning_rate": 6.701781241550648e-07, "loss": 0.5526491403579712, "step": 436, "token_acc": 0.8155125315340866 }, { "epoch": 1.2630057803468209, "grad_norm": 0.4771561540026018, "learning_rate": 6.686792766853705e-07, "loss": 0.5505247712135315, "step": 437, "token_acc": 0.8138159537283621 }, { "epoch": 1.2658959537572254, "grad_norm": 0.5223829257694631, "learning_rate": 6.671787168075575e-07, "loss": 0.5447695255279541, "step": 438, "token_acc": 0.8178192464935741 }, { "epoch": 1.2687861271676302, "grad_norm": 0.5159364504277794, "learning_rate": 6.656764597550975e-07, "loss": 0.5982085466384888, "step": 439, "token_acc": 0.8001320834327017 }, { "epoch": 1.2716763005780347, "grad_norm": 0.5310637224775283, "learning_rate": 6.641725207786909e-07, "loss": 0.5778173208236694, "step": 440, "token_acc": 0.8066611125837846 }, { "epoch": 1.2745664739884393, "grad_norm": 0.56776340532874, "learning_rate": 6.626669151461133e-07, "loss": 0.5481947660446167, "step": 441, "token_acc": 0.8165455226676658 }, { "epoch": 1.2774566473988438, "grad_norm": 0.5289033874903101, "learning_rate": 6.611596581420599e-07, "loss": 0.5178524255752563, "step": 442, "token_acc": 0.8276837132314907 }, { "epoch": 1.2803468208092486, "grad_norm": 0.6054263277819003, "learning_rate": 6.596507650679899e-07, "loss": 0.5819660425186157, "step": 443, "token_acc": 0.8038088791803834 }, { "epoch": 1.2832369942196533, "grad_norm": 0.5487293303925478, "learning_rate": 6.581402512419723e-07, "loss": 0.5847280621528625, "step": 444, "token_acc": 0.80743134495099 }, { "epoch": 1.2861271676300579, "grad_norm": 0.5388475336099026, "learning_rate": 6.566281319985295e-07, "loss": 0.5863124132156372, "step": 445, "token_acc": 0.8067254504627854 }, { "epoch": 1.2890173410404624, "grad_norm": 0.5538452871257553, "learning_rate": 6.551144226884815e-07, "loss": 0.5669398307800293, "step": 446, "token_acc": 0.8087953975429001 }, { "epoch": 1.291907514450867, "grad_norm": 0.557772227891473, "learning_rate": 6.53599138678791e-07, "loss": 0.5209745764732361, "step": 447, "token_acc": 0.8239799595072235 }, { "epoch": 1.2947976878612717, "grad_norm": 0.6127169435529054, "learning_rate": 6.520822953524065e-07, "loss": 0.5106294751167297, "step": 448, "token_acc": 0.8277936680145971 }, { "epoch": 1.2976878612716762, "grad_norm": 0.5375147488907324, "learning_rate": 6.505639081081066e-07, "loss": 0.5071303844451904, "step": 449, "token_acc": 0.8268003446613994 }, { "epoch": 1.300578034682081, "grad_norm": 0.5553311529997369, "learning_rate": 6.490439923603435e-07, "loss": 0.5532734394073486, "step": 450, "token_acc": 0.8134406172882417 }, { "epoch": 1.3034682080924855, "grad_norm": 0.5998759397432016, "learning_rate": 6.475225635390863e-07, "loss": 0.5865392088890076, "step": 451, "token_acc": 0.8023424626486245 }, { "epoch": 1.30635838150289, "grad_norm": 0.5417420736704273, "learning_rate": 6.459996370896652e-07, "loss": 0.546296238899231, "step": 452, "token_acc": 0.8187062949013282 }, { "epoch": 1.3092485549132948, "grad_norm": 0.5655148261341275, "learning_rate": 6.444752284726135e-07, "loss": 0.5877007246017456, "step": 453, "token_acc": 0.8039364919354839 }, { "epoch": 1.3121387283236994, "grad_norm": 0.6144864679165839, "learning_rate": 6.429493531635114e-07, "loss": 0.5454727411270142, "step": 454, "token_acc": 0.8179015382597002 }, { "epoch": 1.3150289017341041, "grad_norm": 0.5513024274913209, "learning_rate": 6.414220266528291e-07, "loss": 0.553301215171814, "step": 455, "token_acc": 0.8119396930565884 }, { "epoch": 1.3179190751445087, "grad_norm": 0.5291432658218749, "learning_rate": 6.398932644457689e-07, "loss": 0.5474492311477661, "step": 456, "token_acc": 0.8148487159928808 }, { "epoch": 1.3208092485549132, "grad_norm": 0.5239384490420579, "learning_rate": 6.383630820621081e-07, "loss": 0.5769109725952148, "step": 457, "token_acc": 0.8075285980313913 }, { "epoch": 1.323699421965318, "grad_norm": 0.5372997474035569, "learning_rate": 6.368314950360415e-07, "loss": 0.5458542108535767, "step": 458, "token_acc": 0.818262614678899 }, { "epoch": 1.3265895953757225, "grad_norm": 0.5222784886904625, "learning_rate": 6.352985189160234e-07, "loss": 0.543486475944519, "step": 459, "token_acc": 0.8140883445049911 }, { "epoch": 1.3294797687861273, "grad_norm": 0.5656149822293426, "learning_rate": 6.337641692646106e-07, "loss": 0.5165099501609802, "step": 460, "token_acc": 0.8232782145649256 }, { "epoch": 1.3323699421965318, "grad_norm": 0.5339208409670375, "learning_rate": 6.322284616583026e-07, "loss": 0.568447470664978, "step": 461, "token_acc": 0.8107062348801407 }, { "epoch": 1.3352601156069364, "grad_norm": 0.534789315369846, "learning_rate": 6.306914116873862e-07, "loss": 0.5637167692184448, "step": 462, "token_acc": 0.8118799414154401 }, { "epoch": 1.3381502890173411, "grad_norm": 0.5013992587561265, "learning_rate": 6.291530349557749e-07, "loss": 0.6041359305381775, "step": 463, "token_acc": 0.8002847429734529 }, { "epoch": 1.3410404624277457, "grad_norm": 0.6327002649058038, "learning_rate": 6.27613347080851e-07, "loss": 0.5996913909912109, "step": 464, "token_acc": 0.8028000921266601 }, { "epoch": 1.3439306358381504, "grad_norm": 0.47925020942862323, "learning_rate": 6.260723636933076e-07, "loss": 0.5272285342216492, "step": 465, "token_acc": 0.8219443104776792 }, { "epoch": 1.346820809248555, "grad_norm": 0.5418997127974843, "learning_rate": 6.2453010043699e-07, "loss": 0.5982799530029297, "step": 466, "token_acc": 0.8018455748733745 }, { "epoch": 1.3497109826589595, "grad_norm": 0.511563505395346, "learning_rate": 6.22986572968736e-07, "loss": 0.5489825010299683, "step": 467, "token_acc": 0.8149126753184632 }, { "epoch": 1.352601156069364, "grad_norm": 0.6199984691110088, "learning_rate": 6.214417969582181e-07, "loss": 0.5509693622589111, "step": 468, "token_acc": 0.8135395589697864 }, { "epoch": 1.3554913294797688, "grad_norm": 0.9112236282410355, "learning_rate": 6.198957880877833e-07, "loss": 0.5764250755310059, "step": 469, "token_acc": 0.8059208967249633 }, { "epoch": 1.3583815028901733, "grad_norm": 0.5989342589849401, "learning_rate": 6.183485620522946e-07, "loss": 0.5593207478523254, "step": 470, "token_acc": 0.8130887081520711 }, { "epoch": 1.361271676300578, "grad_norm": 0.539630418011966, "learning_rate": 6.168001345589715e-07, "loss": 0.5798720121383667, "step": 471, "token_acc": 0.8067868478007105 }, { "epoch": 1.3641618497109826, "grad_norm": 0.5728505086100849, "learning_rate": 6.152505213272307e-07, "loss": 0.5105577707290649, "step": 472, "token_acc": 0.8268291947926711 }, { "epoch": 1.3670520231213872, "grad_norm": 0.5731864783632108, "learning_rate": 6.136997380885259e-07, "loss": 0.505968451499939, "step": 473, "token_acc": 0.8303852677489701 }, { "epoch": 1.369942196531792, "grad_norm": 0.5786843206230191, "learning_rate": 6.12147800586189e-07, "loss": 0.570541501045227, "step": 474, "token_acc": 0.8074693848475233 }, { "epoch": 1.3728323699421965, "grad_norm": 0.5300325283027945, "learning_rate": 6.105947245752696e-07, "loss": 0.5622447729110718, "step": 475, "token_acc": 0.8132824737156444 }, { "epoch": 1.3757225433526012, "grad_norm": 0.5128915878177316, "learning_rate": 6.090405258223756e-07, "loss": 0.5856798887252808, "step": 476, "token_acc": 0.8047204813663714 }, { "epoch": 1.3786127167630058, "grad_norm": 0.6515506714427548, "learning_rate": 6.074852201055121e-07, "loss": 0.5826733112335205, "step": 477, "token_acc": 0.8034700052323068 }, { "epoch": 1.3815028901734103, "grad_norm": 0.5411318320511171, "learning_rate": 6.059288232139225e-07, "loss": 0.5210794806480408, "step": 478, "token_acc": 0.8267671925390047 }, { "epoch": 1.384393063583815, "grad_norm": 0.5057150421228545, "learning_rate": 6.043713509479277e-07, "loss": 0.5771398544311523, "step": 479, "token_acc": 0.805000332629771 }, { "epoch": 1.3872832369942196, "grad_norm": 0.5683244072025584, "learning_rate": 6.028128191187653e-07, "loss": 0.5385507941246033, "step": 480, "token_acc": 0.8176837578528416 }, { "epoch": 1.3901734104046244, "grad_norm": 0.5074682411792649, "learning_rate": 6.012532435484297e-07, "loss": 0.5577852725982666, "step": 481, "token_acc": 0.8137183546223177 }, { "epoch": 1.393063583815029, "grad_norm": 0.5087445776495183, "learning_rate": 5.996926400695113e-07, "loss": 0.5707537531852722, "step": 482, "token_acc": 0.8081652461733929 }, { "epoch": 1.3959537572254335, "grad_norm": 0.6070774288583548, "learning_rate": 5.981310245250351e-07, "loss": 0.5291765928268433, "step": 483, "token_acc": 0.8198564644248993 }, { "epoch": 1.3988439306358382, "grad_norm": 0.6426245006677934, "learning_rate": 5.965684127683012e-07, "loss": 0.5093721151351929, "step": 484, "token_acc": 0.8283427901813247 }, { "epoch": 1.4017341040462428, "grad_norm": 0.5235719939982498, "learning_rate": 5.950048206627228e-07, "loss": 0.5404484272003174, "step": 485, "token_acc": 0.8198352412538783 }, { "epoch": 1.4046242774566475, "grad_norm": 0.5330826415435456, "learning_rate": 5.934402640816651e-07, "loss": 0.6019877195358276, "step": 486, "token_acc": 0.7991196347629723 }, { "epoch": 1.407514450867052, "grad_norm": 0.5402624006228682, "learning_rate": 5.918747589082852e-07, "loss": 0.512151300907135, "step": 487, "token_acc": 0.8255600510667488 }, { "epoch": 1.4104046242774566, "grad_norm": 0.6034074325578554, "learning_rate": 5.903083210353695e-07, "loss": 0.5242146253585815, "step": 488, "token_acc": 0.821293480679374 }, { "epoch": 1.4132947976878611, "grad_norm": 0.6270290133131012, "learning_rate": 5.887409663651736e-07, "loss": 0.5783629417419434, "step": 489, "token_acc": 0.8050973979809469 }, { "epoch": 1.416184971098266, "grad_norm": 0.5697754520754279, "learning_rate": 5.8717271080926e-07, "loss": 0.5560973882675171, "step": 490, "token_acc": 0.8151623266302166 }, { "epoch": 1.4190751445086704, "grad_norm": 0.5157305125572653, "learning_rate": 5.856035702883368e-07, "loss": 0.5741870999336243, "step": 491, "token_acc": 0.8082165363392618 }, { "epoch": 1.4219653179190752, "grad_norm": 0.552078767595136, "learning_rate": 5.840335607320963e-07, "loss": 0.5855275392532349, "step": 492, "token_acc": 0.8052095872614805 }, { "epoch": 1.4248554913294798, "grad_norm": 0.503224099727086, "learning_rate": 5.824626980790532e-07, "loss": 0.5036199688911438, "step": 493, "token_acc": 0.8295647769617597 }, { "epoch": 1.4277456647398843, "grad_norm": 0.535330314229148, "learning_rate": 5.808909982763825e-07, "loss": 0.5614448189735413, "step": 494, "token_acc": 0.8112195584194068 }, { "epoch": 1.430635838150289, "grad_norm": 0.5656493275743161, "learning_rate": 5.793184772797577e-07, "loss": 0.5648437142372131, "step": 495, "token_acc": 0.809333342296497 }, { "epoch": 1.4335260115606936, "grad_norm": 0.591964902056671, "learning_rate": 5.777451510531894e-07, "loss": 0.4516139626502991, "step": 496, "token_acc": 0.8457953488372093 }, { "epoch": 1.4364161849710984, "grad_norm": 0.5299075126510611, "learning_rate": 5.761710355688627e-07, "loss": 0.4779651165008545, "step": 497, "token_acc": 0.8387296285988187 }, { "epoch": 1.439306358381503, "grad_norm": 0.5231792243250346, "learning_rate": 5.745961468069749e-07, "loss": 0.5104596614837646, "step": 498, "token_acc": 0.8271942849713633 }, { "epoch": 1.4421965317919074, "grad_norm": 0.6000529888737813, "learning_rate": 5.730205007555733e-07, "loss": 0.6098222136497498, "step": 499, "token_acc": 0.797237394529817 }, { "epoch": 1.4450867052023122, "grad_norm": 0.5852668345047015, "learning_rate": 5.714441134103936e-07, "loss": 0.5637513995170593, "step": 500, "token_acc": 0.8103524746275665 }, { "epoch": 1.4450867052023122, "eval_loss": 0.5809597969055176, "eval_runtime": 69.4729, "eval_samples_per_second": 1.583, "eval_steps_per_second": 0.202, "eval_token_acc": 0.8065338513984092, "step": 500 }, { "epoch": 1.4479768786127167, "grad_norm": 0.4857410434739766, "learning_rate": 5.698670007746966e-07, "loss": 0.5209301710128784, "step": 501, "token_acc": 0.8231232032245636 }, { "epoch": 1.4508670520231215, "grad_norm": 0.4784985253670375, "learning_rate": 5.682891788591065e-07, "loss": 0.5571726560592651, "step": 502, "token_acc": 0.8108515538539766 }, { "epoch": 1.453757225433526, "grad_norm": 0.5367888787021339, "learning_rate": 5.66710663681448e-07, "loss": 0.49731090664863586, "step": 503, "token_acc": 0.8312890657633916 }, { "epoch": 1.4566473988439306, "grad_norm": 0.610496198886357, "learning_rate": 5.651314712665832e-07, "loss": 0.5665647387504578, "step": 504, "token_acc": 0.8079466209795078 }, { "epoch": 1.4595375722543353, "grad_norm": 1.0378355029945652, "learning_rate": 5.635516176462501e-07, "loss": 0.5903141498565674, "step": 505, "token_acc": 0.8006371133060007 }, { "epoch": 1.4624277456647399, "grad_norm": 0.5540207350664488, "learning_rate": 5.619711188588986e-07, "loss": 0.5362493991851807, "step": 506, "token_acc": 0.820343725019984 }, { "epoch": 1.4653179190751446, "grad_norm": 0.5144874632858891, "learning_rate": 5.603899909495283e-07, "loss": 0.5462620258331299, "step": 507, "token_acc": 0.8145612480715733 }, { "epoch": 1.4682080924855492, "grad_norm": 0.5592031195717259, "learning_rate": 5.58808249969526e-07, "loss": 0.5476292371749878, "step": 508, "token_acc": 0.8147672146736102 }, { "epoch": 1.4710982658959537, "grad_norm": 0.8823564949169135, "learning_rate": 5.57225911976502e-07, "loss": 0.5868964195251465, "step": 509, "token_acc": 0.804788821591468 }, { "epoch": 1.4739884393063583, "grad_norm": 0.5547092232918307, "learning_rate": 5.556429930341273e-07, "loss": 0.5038424134254456, "step": 510, "token_acc": 0.8334411359013724 }, { "epoch": 1.476878612716763, "grad_norm": 0.5799136969979296, "learning_rate": 5.540595092119708e-07, "loss": 0.5707584619522095, "step": 511, "token_acc": 0.810527226273487 }, { "epoch": 1.4797687861271676, "grad_norm": 0.5034147261225864, "learning_rate": 5.52475476585336e-07, "loss": 0.5583351850509644, "step": 512, "token_acc": 0.8103952305319614 }, { "epoch": 1.4826589595375723, "grad_norm": 0.5160609299204681, "learning_rate": 5.508909112350976e-07, "loss": 0.5299844145774841, "step": 513, "token_acc": 0.8211946274807083 }, { "epoch": 1.4855491329479769, "grad_norm": 0.48690505381618093, "learning_rate": 5.493058292475387e-07, "loss": 0.5815989375114441, "step": 514, "token_acc": 0.8052997388378583 }, { "epoch": 1.4884393063583814, "grad_norm": 0.5497798749732475, "learning_rate": 5.477202467141864e-07, "loss": 0.5317429900169373, "step": 515, "token_acc": 0.8238470637503765 }, { "epoch": 1.4913294797687862, "grad_norm": 0.6297718557593524, "learning_rate": 5.46134179731651e-07, "loss": 0.5170228481292725, "step": 516, "token_acc": 0.8249339191625676 }, { "epoch": 1.4942196531791907, "grad_norm": 0.5879194826209626, "learning_rate": 5.445476444014591e-07, "loss": 0.5530685186386108, "step": 517, "token_acc": 0.8124287116369134 }, { "epoch": 1.4971098265895955, "grad_norm": 0.7172467911918745, "learning_rate": 5.429606568298925e-07, "loss": 0.5767130851745605, "step": 518, "token_acc": 0.8076758697324558 }, { "epoch": 1.5, "grad_norm": 0.47225756227931015, "learning_rate": 5.413732331278248e-07, "loss": 0.5357682704925537, "step": 519, "token_acc": 0.818332255376673 }, { "epoch": 1.5028901734104045, "grad_norm": 0.5315785549808126, "learning_rate": 5.397853894105559e-07, "loss": 0.5103631019592285, "step": 520, "token_acc": 0.8272772712126261 }, { "epoch": 1.5057803468208093, "grad_norm": 0.5411201442197484, "learning_rate": 5.381971417976505e-07, "loss": 0.6071707606315613, "step": 521, "token_acc": 0.7980659432441779 }, { "epoch": 1.5086705202312138, "grad_norm": 0.48884895821181845, "learning_rate": 5.366085064127734e-07, "loss": 0.5692754983901978, "step": 522, "token_acc": 0.8067921134275868 }, { "epoch": 1.5115606936416186, "grad_norm": 0.5118700142105465, "learning_rate": 5.350194993835257e-07, "loss": 0.5697520971298218, "step": 523, "token_acc": 0.8085179483452373 }, { "epoch": 1.5144508670520231, "grad_norm": 0.5936738113687722, "learning_rate": 5.33430136841282e-07, "loss": 0.5466612577438354, "step": 524, "token_acc": 0.8143604233276328 }, { "epoch": 1.5173410404624277, "grad_norm": 0.48377394646569144, "learning_rate": 5.318404349210255e-07, "loss": 0.5685998201370239, "step": 525, "token_acc": 0.808461779914424 }, { "epoch": 1.5202312138728322, "grad_norm": 0.5190453952524928, "learning_rate": 5.302504097611846e-07, "loss": 0.5479923486709595, "step": 526, "token_acc": 0.8150076205934166 }, { "epoch": 1.523121387283237, "grad_norm": 0.6234920552697755, "learning_rate": 5.286600775034699e-07, "loss": 0.5165071487426758, "step": 527, "token_acc": 0.8261736549800983 }, { "epoch": 1.5260115606936417, "grad_norm": 0.5881560338514248, "learning_rate": 5.270694542927088e-07, "loss": 0.5723020434379578, "step": 528, "token_acc": 0.808951938948829 }, { "epoch": 1.5289017341040463, "grad_norm": 0.5975961668165296, "learning_rate": 5.254785562766829e-07, "loss": 0.5684691667556763, "step": 529, "token_acc": 0.8089175396185871 }, { "epoch": 1.5317919075144508, "grad_norm": 0.6478162796925766, "learning_rate": 5.238873996059637e-07, "loss": 0.49971041083335876, "step": 530, "token_acc": 0.8301960912691917 }, { "epoch": 1.5346820809248554, "grad_norm": 0.6430164741639133, "learning_rate": 5.222960004337476e-07, "loss": 0.539410412311554, "step": 531, "token_acc": 0.8203352152694456 }, { "epoch": 1.5375722543352601, "grad_norm": 0.5795319284660402, "learning_rate": 5.207043749156944e-07, "loss": 0.5065566897392273, "step": 532, "token_acc": 0.8278279073124954 }, { "epoch": 1.5404624277456649, "grad_norm": 0.5464317098167678, "learning_rate": 5.191125392097604e-07, "loss": 0.5445448160171509, "step": 533, "token_acc": 0.8166163521084138 }, { "epoch": 1.5433526011560694, "grad_norm": 0.5152523695934649, "learning_rate": 5.175205094760361e-07, "loss": 0.5751731991767883, "step": 534, "token_acc": 0.8060461344386376 }, { "epoch": 1.546242774566474, "grad_norm": 0.5393208162828292, "learning_rate": 5.159283018765819e-07, "loss": 0.5777266621589661, "step": 535, "token_acc": 0.8069097414119084 }, { "epoch": 1.5491329479768785, "grad_norm": 0.5264135658228388, "learning_rate": 5.143359325752638e-07, "loss": 0.555731475353241, "step": 536, "token_acc": 0.8131375804713217 }, { "epoch": 1.5520231213872833, "grad_norm": 0.553539191702997, "learning_rate": 5.127434177375893e-07, "loss": 0.5539097189903259, "step": 537, "token_acc": 0.812809830006887 }, { "epoch": 1.5549132947976878, "grad_norm": 0.8304938276922723, "learning_rate": 5.111507735305434e-07, "loss": 0.535222589969635, "step": 538, "token_acc": 0.8182165566153093 }, { "epoch": 1.5578034682080926, "grad_norm": 0.5723817981155602, "learning_rate": 5.095580161224244e-07, "loss": 0.5616499185562134, "step": 539, "token_acc": 0.8143216251104015 }, { "epoch": 1.560693641618497, "grad_norm": 0.5555286868999088, "learning_rate": 5.079651616826801e-07, "loss": 0.5724209547042847, "step": 540, "token_acc": 0.8077700594252842 }, { "epoch": 1.5635838150289016, "grad_norm": 0.576829331739999, "learning_rate": 5.063722263817427e-07, "loss": 0.5502010583877563, "step": 541, "token_acc": 0.8148729355841307 }, { "epoch": 1.5664739884393064, "grad_norm": 0.6980607962330599, "learning_rate": 5.047792263908659e-07, "loss": 0.5372669696807861, "step": 542, "token_acc": 0.8214026830309711 }, { "epoch": 1.569364161849711, "grad_norm": 0.5728162578490732, "learning_rate": 5.031861778819601e-07, "loss": 0.5055459141731262, "step": 543, "token_acc": 0.8276528811478554 }, { "epoch": 1.5722543352601157, "grad_norm": 0.588844313912188, "learning_rate": 5.015930970274277e-07, "loss": 0.5107961893081665, "step": 544, "token_acc": 0.8256070951933737 }, { "epoch": 1.5751445086705202, "grad_norm": 0.49402725355257393, "learning_rate": 5e-07, "loss": 0.5780792236328125, "step": 545, "token_acc": 0.80536919727071 }, { "epoch": 1.5780346820809248, "grad_norm": 0.5150482904703839, "learning_rate": 4.984069029725722e-07, "loss": 0.5730597972869873, "step": 546, "token_acc": 0.8071878326447399 }, { "epoch": 1.5809248554913293, "grad_norm": 0.6120632562818131, "learning_rate": 4.968138221180401e-07, "loss": 0.48976290225982666, "step": 547, "token_acc": 0.8329062019477191 }, { "epoch": 1.583815028901734, "grad_norm": 0.5693765551777754, "learning_rate": 4.95220773609134e-07, "loss": 0.5690828561782837, "step": 548, "token_acc": 0.8103057397715957 }, { "epoch": 1.5867052023121389, "grad_norm": 0.5356011166477922, "learning_rate": 4.936277736182573e-07, "loss": 0.5775788426399231, "step": 549, "token_acc": 0.8077360101658677 }, { "epoch": 1.5895953757225434, "grad_norm": 0.5425409660783537, "learning_rate": 4.9203483831732e-07, "loss": 0.5838006138801575, "step": 550, "token_acc": 0.8051269382791122 }, { "epoch": 1.592485549132948, "grad_norm": 0.531381332935958, "learning_rate": 4.904419838775755e-07, "loss": 0.528168797492981, "step": 551, "token_acc": 0.8208675592063154 }, { "epoch": 1.5953757225433525, "grad_norm": 0.5122923018471659, "learning_rate": 4.888492264694565e-07, "loss": 0.5490496158599854, "step": 552, "token_acc": 0.8156343068498415 }, { "epoch": 1.5982658959537572, "grad_norm": 0.5537803937619057, "learning_rate": 4.872565822624106e-07, "loss": 0.5283633470535278, "step": 553, "token_acc": 0.8213697374264063 }, { "epoch": 1.601156069364162, "grad_norm": 0.533996696099063, "learning_rate": 4.856640674247363e-07, "loss": 0.5403317213058472, "step": 554, "token_acc": 0.8186500168747891 }, { "epoch": 1.6040462427745665, "grad_norm": 0.6968861408661483, "learning_rate": 4.840716981234181e-07, "loss": 0.5232794880867004, "step": 555, "token_acc": 0.8258206662354464 }, { "epoch": 1.606936416184971, "grad_norm": 0.5457170981213912, "learning_rate": 4.82479490523964e-07, "loss": 0.5531569123268127, "step": 556, "token_acc": 0.8132714653155657 }, { "epoch": 1.6098265895953756, "grad_norm": 0.5611664995745906, "learning_rate": 4.808874607902397e-07, "loss": 0.580593466758728, "step": 557, "token_acc": 0.8061331347873197 }, { "epoch": 1.6127167630057804, "grad_norm": 0.49146546445526984, "learning_rate": 4.792956250843055e-07, "loss": 0.5263780355453491, "step": 558, "token_acc": 0.8212147967727204 }, { "epoch": 1.6156069364161851, "grad_norm": 0.5650532769234693, "learning_rate": 4.777039995662522e-07, "loss": 0.535209596157074, "step": 559, "token_acc": 0.8197443965795302 }, { "epoch": 1.6184971098265897, "grad_norm": 0.6028109251795714, "learning_rate": 4.7611260039403655e-07, "loss": 0.5842093825340271, "step": 560, "token_acc": 0.8079101659544867 }, { "epoch": 1.6213872832369942, "grad_norm": 0.59069578828569, "learning_rate": 4.7452144372331715e-07, "loss": 0.49987393617630005, "step": 561, "token_acc": 0.8312081956170992 }, { "epoch": 1.6242774566473988, "grad_norm": 0.5752034924536564, "learning_rate": 4.7293054570729126e-07, "loss": 0.5631648302078247, "step": 562, "token_acc": 0.8105449311754528 }, { "epoch": 1.6271676300578035, "grad_norm": 0.48011026987442956, "learning_rate": 4.7133992249653026e-07, "loss": 0.6020775437355042, "step": 563, "token_acc": 0.80207682093969 }, { "epoch": 1.630057803468208, "grad_norm": 0.6157896994330491, "learning_rate": 4.697495902388154e-07, "loss": 0.5418002009391785, "step": 564, "token_acc": 0.8178849600782141 }, { "epoch": 1.6329479768786128, "grad_norm": 0.5711847053504078, "learning_rate": 4.681595650789746e-07, "loss": 0.5428210496902466, "step": 565, "token_acc": 0.815186965701749 }, { "epoch": 1.6358381502890174, "grad_norm": 0.5202306815183112, "learning_rate": 4.6656986315871815e-07, "loss": 0.5333169102668762, "step": 566, "token_acc": 0.8192019018509085 }, { "epoch": 1.638728323699422, "grad_norm": 0.5862764371195341, "learning_rate": 4.649805006164743e-07, "loss": 0.5256876349449158, "step": 567, "token_acc": 0.8224795998947091 }, { "epoch": 1.6416184971098264, "grad_norm": 0.5972850501922398, "learning_rate": 4.6339149358722675e-07, "loss": 0.4838550388813019, "step": 568, "token_acc": 0.8348972296693477 }, { "epoch": 1.6445086705202312, "grad_norm": 0.5597928387418396, "learning_rate": 4.618028582023495e-07, "loss": 0.5284090042114258, "step": 569, "token_acc": 0.8216369128482156 }, { "epoch": 1.647398843930636, "grad_norm": 0.6008687154199086, "learning_rate": 4.6021461058944415e-07, "loss": 0.5147076845169067, "step": 570, "token_acc": 0.8275472384008092 }, { "epoch": 1.6502890173410405, "grad_norm": 0.6575913400532123, "learning_rate": 4.5862676687217526e-07, "loss": 0.5117477178573608, "step": 571, "token_acc": 0.8287706152259228 }, { "epoch": 1.653179190751445, "grad_norm": 0.5137586329958652, "learning_rate": 4.5703934317010727e-07, "loss": 0.5332241058349609, "step": 572, "token_acc": 0.8202151610509888 }, { "epoch": 1.6560693641618496, "grad_norm": 0.565500132263929, "learning_rate": 4.5545235559854105e-07, "loss": 0.5527046918869019, "step": 573, "token_acc": 0.8138320979141528 }, { "epoch": 1.6589595375722543, "grad_norm": 0.5302962565332909, "learning_rate": 4.5386582026834904e-07, "loss": 0.5092106461524963, "step": 574, "token_acc": 0.8281128993919504 }, { "epoch": 1.661849710982659, "grad_norm": 0.5821742123016643, "learning_rate": 4.5227975328581335e-07, "loss": 0.5064735412597656, "step": 575, "token_acc": 0.827575659879804 }, { "epoch": 1.6647398843930636, "grad_norm": 0.5963479290796274, "learning_rate": 4.5069417075246146e-07, "loss": 0.4928985834121704, "step": 576, "token_acc": 0.8335413266775463 }, { "epoch": 1.6676300578034682, "grad_norm": 0.6048528428075496, "learning_rate": 4.491090887649024e-07, "loss": 0.49480709433555603, "step": 577, "token_acc": 0.8347347057118005 }, { "epoch": 1.6705202312138727, "grad_norm": 0.6285946360216301, "learning_rate": 4.475245234146639e-07, "loss": 0.49079689383506775, "step": 578, "token_acc": 0.83443186255369 }, { "epoch": 1.6734104046242775, "grad_norm": 0.5603272652152215, "learning_rate": 4.459404907880292e-07, "loss": 0.5334948897361755, "step": 579, "token_acc": 0.8186869024041065 }, { "epoch": 1.6763005780346822, "grad_norm": 0.5366750310588114, "learning_rate": 4.443570069658727e-07, "loss": 0.5434994101524353, "step": 580, "token_acc": 0.816468327847366 }, { "epoch": 1.6791907514450868, "grad_norm": 0.5467060355475981, "learning_rate": 4.42774088023498e-07, "loss": 0.5757695436477661, "step": 581, "token_acc": 0.8080333034841515 }, { "epoch": 1.6820809248554913, "grad_norm": 0.6184966009398549, "learning_rate": 4.4119175003047407e-07, "loss": 0.5647035241127014, "step": 582, "token_acc": 0.8111076384093734 }, { "epoch": 1.6849710982658959, "grad_norm": 0.5185867079907565, "learning_rate": 4.396100090504717e-07, "loss": 0.5796575546264648, "step": 583, "token_acc": 0.8038202807075824 }, { "epoch": 1.6878612716763006, "grad_norm": 0.813643580955912, "learning_rate": 4.380288811411015e-07, "loss": 0.4743460416793823, "step": 584, "token_acc": 0.8386408207372227 }, { "epoch": 1.6907514450867052, "grad_norm": 0.5897820309260559, "learning_rate": 4.364483823537498e-07, "loss": 0.5133877992630005, "step": 585, "token_acc": 0.8280596690740123 }, { "epoch": 1.69364161849711, "grad_norm": 0.5045181308055782, "learning_rate": 4.3486852873341675e-07, "loss": 0.4322221279144287, "step": 586, "token_acc": 0.8542273580630543 }, { "epoch": 1.6965317919075145, "grad_norm": 0.5368324019397285, "learning_rate": 4.3328933631855195e-07, "loss": 0.5392330884933472, "step": 587, "token_acc": 0.8167310479753804 }, { "epoch": 1.699421965317919, "grad_norm": 0.6325523087901944, "learning_rate": 4.317108211408933e-07, "loss": 0.5353363752365112, "step": 588, "token_acc": 0.8181194354468216 }, { "epoch": 1.7023121387283235, "grad_norm": 0.5524128184191415, "learning_rate": 4.301329992253034e-07, "loss": 0.49616819620132446, "step": 589, "token_acc": 0.8328951746002753 }, { "epoch": 1.7052023121387283, "grad_norm": 0.5034001899067154, "learning_rate": 4.285558865896065e-07, "loss": 0.60711270570755, "step": 590, "token_acc": 0.79853336934882 }, { "epoch": 1.708092485549133, "grad_norm": 0.5374954529356852, "learning_rate": 4.2697949924442667e-07, "loss": 0.5293912291526794, "step": 591, "token_acc": 0.823666171683991 }, { "epoch": 1.7109826589595376, "grad_norm": 0.5635901606786159, "learning_rate": 4.2540385319302524e-07, "loss": 0.5353492498397827, "step": 592, "token_acc": 0.8201790482173709 }, { "epoch": 1.7138728323699421, "grad_norm": 0.5253802438717141, "learning_rate": 4.2382896443113723e-07, "loss": 0.5334903001785278, "step": 593, "token_acc": 0.818032814303156 }, { "epoch": 1.7167630057803467, "grad_norm": 0.4950360437778214, "learning_rate": 4.222548489468105e-07, "loss": 0.5341077446937561, "step": 594, "token_acc": 0.8223698601883738 }, { "epoch": 1.7196531791907514, "grad_norm": 0.5514023397940045, "learning_rate": 4.2068152272024233e-07, "loss": 0.5363609194755554, "step": 595, "token_acc": 0.8196168676738834 }, { "epoch": 1.7225433526011562, "grad_norm": 0.5623269464968738, "learning_rate": 4.1910900172361763e-07, "loss": 0.5504116415977478, "step": 596, "token_acc": 0.8151576025420944 }, { "epoch": 1.7254335260115607, "grad_norm": 0.5274551240137945, "learning_rate": 4.175373019209468e-07, "loss": 0.5549143552780151, "step": 597, "token_acc": 0.8107931600579981 }, { "epoch": 1.7283236994219653, "grad_norm": 0.5704477484512106, "learning_rate": 4.159664392679038e-07, "loss": 0.5494258403778076, "step": 598, "token_acc": 0.8168460618486246 }, { "epoch": 1.7312138728323698, "grad_norm": 0.6161778636830428, "learning_rate": 4.143964297116633e-07, "loss": 0.5577751994132996, "step": 599, "token_acc": 0.8121810843728358 }, { "epoch": 1.7341040462427746, "grad_norm": 0.6075742333688984, "learning_rate": 4.1282728919074005e-07, "loss": 0.5403814315795898, "step": 600, "token_acc": 0.821105101452986 }, { "epoch": 1.7369942196531793, "grad_norm": 0.6520533036933062, "learning_rate": 4.1125903363482634e-07, "loss": 0.47892940044403076, "step": 601, "token_acc": 0.8369930163846361 }, { "epoch": 1.739884393063584, "grad_norm": 0.5680876440782588, "learning_rate": 4.0969167896463046e-07, "loss": 0.5336910486221313, "step": 602, "token_acc": 0.8216713342322719 }, { "epoch": 1.7427745664739884, "grad_norm": 0.7080634828510891, "learning_rate": 4.0812524109171475e-07, "loss": 0.524694561958313, "step": 603, "token_acc": 0.8261413383364603 }, { "epoch": 1.745664739884393, "grad_norm": 0.528594204710658, "learning_rate": 4.0655973591833475e-07, "loss": 0.5086634755134583, "step": 604, "token_acc": 0.8286352131054758 }, { "epoch": 1.7485549132947977, "grad_norm": 0.6260551904964319, "learning_rate": 4.0499517933727727e-07, "loss": 0.48479533195495605, "step": 605, "token_acc": 0.8348625638530771 }, { "epoch": 1.7514450867052023, "grad_norm": 0.5425421161730628, "learning_rate": 4.034315872316987e-07, "loss": 0.5817371606826782, "step": 606, "token_acc": 0.8068743095851797 }, { "epoch": 1.754335260115607, "grad_norm": 0.5183265889747526, "learning_rate": 4.018689754749648e-07, "loss": 0.508335292339325, "step": 607, "token_acc": 0.8271757714886951 }, { "epoch": 1.7572254335260116, "grad_norm": 0.5542866259664111, "learning_rate": 4.0030735993048884e-07, "loss": 0.5586389899253845, "step": 608, "token_acc": 0.8166898202884842 }, { "epoch": 1.760115606936416, "grad_norm": 0.5411864859640132, "learning_rate": 3.987467564515703e-07, "loss": 0.4601624608039856, "step": 609, "token_acc": 0.84508010404543 }, { "epoch": 1.7630057803468207, "grad_norm": 0.524886018198833, "learning_rate": 3.971871808812347e-07, "loss": 0.6006595492362976, "step": 610, "token_acc": 0.8011782786885245 }, { "epoch": 1.7658959537572254, "grad_norm": 0.6317327126827325, "learning_rate": 3.956286490520724e-07, "loss": 0.509284496307373, "step": 611, "token_acc": 0.8325460029684483 }, { "epoch": 1.7687861271676302, "grad_norm": 0.5390581631300952, "learning_rate": 3.9407117678607756e-07, "loss": 0.4938768744468689, "step": 612, "token_acc": 0.8321855607688815 }, { "epoch": 1.7716763005780347, "grad_norm": 0.6560783619375582, "learning_rate": 3.9251477989448795e-07, "loss": 0.517693042755127, "step": 613, "token_acc": 0.8247808891627084 }, { "epoch": 1.7745664739884393, "grad_norm": 0.5602632255167417, "learning_rate": 3.909594741776246e-07, "loss": 0.5566587448120117, "step": 614, "token_acc": 0.812049268832398 }, { "epoch": 1.7774566473988438, "grad_norm": 0.5947561408697656, "learning_rate": 3.8940527542473033e-07, "loss": 0.5609596967697144, "step": 615, "token_acc": 0.8135071333264908 }, { "epoch": 1.7803468208092486, "grad_norm": 0.5666442289982523, "learning_rate": 3.8785219941381096e-07, "loss": 0.5130019187927246, "step": 616, "token_acc": 0.8260872845234054 }, { "epoch": 1.7832369942196533, "grad_norm": 0.5455613722107414, "learning_rate": 3.8630026191147405e-07, "loss": 0.5589362978935242, "step": 617, "token_acc": 0.812414640315063 }, { "epoch": 1.7861271676300579, "grad_norm": 0.550217294387885, "learning_rate": 3.8474947867276943e-07, "loss": 0.5442770719528198, "step": 618, "token_acc": 0.8159889681462442 }, { "epoch": 1.7890173410404624, "grad_norm": 0.6147473096977814, "learning_rate": 3.8319986544102843e-07, "loss": 0.5019974708557129, "step": 619, "token_acc": 0.8287660341354818 }, { "epoch": 1.791907514450867, "grad_norm": 0.5247209374319454, "learning_rate": 3.8165143794770536e-07, "loss": 0.5381553769111633, "step": 620, "token_acc": 0.8177024482109227 }, { "epoch": 1.7947976878612717, "grad_norm": 0.5828193451002669, "learning_rate": 3.8010421191221684e-07, "loss": 0.523591160774231, "step": 621, "token_acc": 0.8240329148286393 }, { "epoch": 1.7976878612716765, "grad_norm": 0.6015955817395803, "learning_rate": 3.78558203041782e-07, "loss": 0.539184033870697, "step": 622, "token_acc": 0.8198696606927818 }, { "epoch": 1.800578034682081, "grad_norm": 0.6008612726420935, "learning_rate": 3.7701342703126394e-07, "loss": 0.48327842354774475, "step": 623, "token_acc": 0.8381134839691216 }, { "epoch": 1.8034682080924855, "grad_norm": 0.6147376285603221, "learning_rate": 3.754698995630101e-07, "loss": 0.5317155122756958, "step": 624, "token_acc": 0.8217411222039681 }, { "epoch": 1.80635838150289, "grad_norm": 0.6052477258361706, "learning_rate": 3.7392763630669243e-07, "loss": 0.5276878476142883, "step": 625, "token_acc": 0.8253162139403252 }, { "epoch": 1.8092485549132948, "grad_norm": 0.6010435836572232, "learning_rate": 3.7238665291914906e-07, "loss": 0.5263775587081909, "step": 626, "token_acc": 0.8255283062505889 }, { "epoch": 1.8121387283236994, "grad_norm": 0.46459212133429395, "learning_rate": 3.7084696504422525e-07, "loss": 0.547301173210144, "step": 627, "token_acc": 0.8155224935354174 }, { "epoch": 1.8150289017341041, "grad_norm": 0.567681963556663, "learning_rate": 3.693085883126137e-07, "loss": 0.504138708114624, "step": 628, "token_acc": 0.8300083822296731 }, { "epoch": 1.8179190751445087, "grad_norm": 0.5584446222303159, "learning_rate": 3.6777153834169726e-07, "loss": 0.5485329031944275, "step": 629, "token_acc": 0.8132374537904492 }, { "epoch": 1.8208092485549132, "grad_norm": 0.5610791187838037, "learning_rate": 3.6623583073538965e-07, "loss": 0.5641239881515503, "step": 630, "token_acc": 0.8092657184953543 }, { "epoch": 1.8236994219653178, "grad_norm": 0.5571741993799751, "learning_rate": 3.647014810839766e-07, "loss": 0.5435695648193359, "step": 631, "token_acc": 0.8177736577401747 }, { "epoch": 1.8265895953757225, "grad_norm": 0.49451328689884416, "learning_rate": 3.6316850496395855e-07, "loss": 0.5079208612442017, "step": 632, "token_acc": 0.8277710403419788 }, { "epoch": 1.8294797687861273, "grad_norm": 0.731312278004029, "learning_rate": 3.6163691793789183e-07, "loss": 0.5612790584564209, "step": 633, "token_acc": 0.8145309625996321 }, { "epoch": 1.8323699421965318, "grad_norm": 0.5433070122384833, "learning_rate": 3.6010673555423116e-07, "loss": 0.5702831149101257, "step": 634, "token_acc": 0.8084171358992268 }, { "epoch": 1.8352601156069364, "grad_norm": 0.5731111882216399, "learning_rate": 3.585779733471709e-07, "loss": 0.5208647847175598, "step": 635, "token_acc": 0.8247836812568473 }, { "epoch": 1.838150289017341, "grad_norm": 0.5863236667781423, "learning_rate": 3.5705064683648855e-07, "loss": 0.5619288682937622, "step": 636, "token_acc": 0.8113308744654901 }, { "epoch": 1.8410404624277457, "grad_norm": 0.5914772914689451, "learning_rate": 3.555247715273867e-07, "loss": 0.49036872386932373, "step": 637, "token_acc": 0.8374078180826161 }, { "epoch": 1.8439306358381504, "grad_norm": 0.5295217861583622, "learning_rate": 3.5400036291033485e-07, "loss": 0.5192829966545105, "step": 638, "token_acc": 0.8258416465326863 }, { "epoch": 1.846820809248555, "grad_norm": 0.5366095434473555, "learning_rate": 3.5247743646091367e-07, "loss": 0.48854076862335205, "step": 639, "token_acc": 0.8355026160864565 }, { "epoch": 1.8497109826589595, "grad_norm": 0.552265227323895, "learning_rate": 3.509560076396567e-07, "loss": 0.5541850924491882, "step": 640, "token_acc": 0.8161763703067251 }, { "epoch": 1.852601156069364, "grad_norm": 0.5766930712255567, "learning_rate": 3.4943609189189345e-07, "loss": 0.49490103125572205, "step": 641, "token_acc": 0.8331491368709432 }, { "epoch": 1.8554913294797688, "grad_norm": 0.535142297976956, "learning_rate": 3.4791770464759347e-07, "loss": 0.4898555278778076, "step": 642, "token_acc": 0.8374039851247991 }, { "epoch": 1.8583815028901736, "grad_norm": 0.6183254820329128, "learning_rate": 3.4640086132120906e-07, "loss": 0.5269954800605774, "step": 643, "token_acc": 0.8234169800850853 }, { "epoch": 1.861271676300578, "grad_norm": 0.5689322137373185, "learning_rate": 3.4488557731151845e-07, "loss": 0.5776628851890564, "step": 644, "token_acc": 0.8088350364511105 }, { "epoch": 1.8641618497109826, "grad_norm": 0.6658391987358445, "learning_rate": 3.433718680014705e-07, "loss": 0.5674536228179932, "step": 645, "token_acc": 0.8111267784268523 }, { "epoch": 1.8670520231213872, "grad_norm": 0.5702895217250429, "learning_rate": 3.418597487580277e-07, "loss": 0.5942685008049011, "step": 646, "token_acc": 0.8022179198440608 }, { "epoch": 1.869942196531792, "grad_norm": 0.5309534408388851, "learning_rate": 3.4034923493201007e-07, "loss": 0.5299490690231323, "step": 647, "token_acc": 0.821584668833352 }, { "epoch": 1.8728323699421965, "grad_norm": 0.5410494679792496, "learning_rate": 3.388403418579401e-07, "loss": 0.606309175491333, "step": 648, "token_acc": 0.798714223159906 }, { "epoch": 1.8757225433526012, "grad_norm": 0.5885088182247251, "learning_rate": 3.3733308485388654e-07, "loss": 0.5152050256729126, "step": 649, "token_acc": 0.8267703435171321 }, { "epoch": 1.8786127167630058, "grad_norm": 0.5654387308838804, "learning_rate": 3.3582747922130903e-07, "loss": 0.5702789425849915, "step": 650, "token_acc": 0.8114149857200532 }, { "epoch": 1.8815028901734103, "grad_norm": 0.5850200396224108, "learning_rate": 3.343235402449025e-07, "loss": 0.5715373754501343, "step": 651, "token_acc": 0.809812202628705 }, { "epoch": 1.8843930635838149, "grad_norm": 0.556702805056612, "learning_rate": 3.3282128319244237e-07, "loss": 0.5341757535934448, "step": 652, "token_acc": 0.8190304033783219 }, { "epoch": 1.8872832369942196, "grad_norm": 0.5947101357097584, "learning_rate": 3.313207233146296e-07, "loss": 0.5120434165000916, "step": 653, "token_acc": 0.8284752116658459 }, { "epoch": 1.8901734104046244, "grad_norm": 0.582059481324802, "learning_rate": 3.2982187584493516e-07, "loss": 0.55910724401474, "step": 654, "token_acc": 0.8136601394849785 }, { "epoch": 1.893063583815029, "grad_norm": 0.5455003297751219, "learning_rate": 3.283247559994463e-07, "loss": 0.4808557629585266, "step": 655, "token_acc": 0.8359401998347231 }, { "epoch": 1.8959537572254335, "grad_norm": 0.5917330827702398, "learning_rate": 3.268293789767118e-07, "loss": 0.5275037288665771, "step": 656, "token_acc": 0.8203649654462709 }, { "epoch": 1.898843930635838, "grad_norm": 0.604537834207858, "learning_rate": 3.2533575995758694e-07, "loss": 0.536374568939209, "step": 657, "token_acc": 0.8204949969817257 }, { "epoch": 1.9017341040462428, "grad_norm": 0.4877298329861977, "learning_rate": 3.2384391410508066e-07, "loss": 0.5517327785491943, "step": 658, "token_acc": 0.8144875608045037 }, { "epoch": 1.9046242774566475, "grad_norm": 0.5138107466063505, "learning_rate": 3.223538565642009e-07, "loss": 0.5936318635940552, "step": 659, "token_acc": 0.8033954818487206 }, { "epoch": 1.907514450867052, "grad_norm": 0.6408117816293808, "learning_rate": 3.2086560246180016e-07, "loss": 0.5199168920516968, "step": 660, "token_acc": 0.823338105590611 }, { "epoch": 1.9104046242774566, "grad_norm": 0.6769271622378699, "learning_rate": 3.1937916690642355e-07, "loss": 0.5296117067337036, "step": 661, "token_acc": 0.8234518795819685 }, { "epoch": 1.9132947976878611, "grad_norm": 0.5205148500482691, "learning_rate": 3.178945649881543e-07, "loss": 0.4881097674369812, "step": 662, "token_acc": 0.8381457544657637 }, { "epoch": 1.916184971098266, "grad_norm": 0.533469943639252, "learning_rate": 3.1641181177846046e-07, "loss": 0.5646488666534424, "step": 663, "token_acc": 0.8092274601183008 }, { "epoch": 1.9190751445086707, "grad_norm": 0.5079029266136241, "learning_rate": 3.1493092233004277e-07, "loss": 0.565247654914856, "step": 664, "token_acc": 0.8091681448977687 }, { "epoch": 1.9219653179190752, "grad_norm": 0.5846146749149876, "learning_rate": 3.1345191167668106e-07, "loss": 0.46707916259765625, "step": 665, "token_acc": 0.8448507638926736 }, { "epoch": 1.9248554913294798, "grad_norm": 0.6115493897752081, "learning_rate": 3.119747948330821e-07, "loss": 0.49020782113075256, "step": 666, "token_acc": 0.8343801519151217 }, { "epoch": 1.9277456647398843, "grad_norm": 0.5665579491864339, "learning_rate": 3.1049958679472645e-07, "loss": 0.4773547649383545, "step": 667, "token_acc": 0.840464602970709 }, { "epoch": 1.930635838150289, "grad_norm": 0.5428950150023341, "learning_rate": 3.0902630253771725e-07, "loss": 0.5331814885139465, "step": 668, "token_acc": 0.8203493165709791 }, { "epoch": 1.9335260115606936, "grad_norm": 0.535673154611531, "learning_rate": 3.0755495701862785e-07, "loss": 0.5440840125083923, "step": 669, "token_acc": 0.8188541358240693 }, { "epoch": 1.9364161849710984, "grad_norm": 0.4836434667966126, "learning_rate": 3.06085565174349e-07, "loss": 0.5037864446640015, "step": 670, "token_acc": 0.8303648820337454 }, { "epoch": 1.939306358381503, "grad_norm": 0.6272828775317285, "learning_rate": 3.046181419219386e-07, "loss": 0.5913348197937012, "step": 671, "token_acc": 0.804053529366086 }, { "epoch": 1.9421965317919074, "grad_norm": 0.47821443556435045, "learning_rate": 3.031527021584701e-07, "loss": 0.5496195554733276, "step": 672, "token_acc": 0.8131932821607896 }, { "epoch": 1.9450867052023122, "grad_norm": 0.5368717641927174, "learning_rate": 3.0168926076087986e-07, "loss": 0.5248396396636963, "step": 673, "token_acc": 0.8238304421235078 }, { "epoch": 1.9479768786127167, "grad_norm": 0.5546004209488442, "learning_rate": 3.002278325858177e-07, "loss": 0.5503116846084595, "step": 674, "token_acc": 0.8154341018265293 }, { "epoch": 1.9508670520231215, "grad_norm": 0.5406553961850177, "learning_rate": 2.987684324694957e-07, "loss": 0.5093920230865479, "step": 675, "token_acc": 0.8285504848168147 }, { "epoch": 1.953757225433526, "grad_norm": 0.5070602927484339, "learning_rate": 2.9731107522753654e-07, "loss": 0.6153904795646667, "step": 676, "token_acc": 0.7934051997463538 }, { "epoch": 1.9566473988439306, "grad_norm": 0.6200327187024355, "learning_rate": 2.9585577565482484e-07, "loss": 0.49602842330932617, "step": 677, "token_acc": 0.8349261185482811 }, { "epoch": 1.9595375722543351, "grad_norm": 0.5432813085052021, "learning_rate": 2.944025485253557e-07, "loss": 0.5533842444419861, "step": 678, "token_acc": 0.8136697934557625 }, { "epoch": 1.9624277456647399, "grad_norm": 0.5655183170978749, "learning_rate": 2.929514085920848e-07, "loss": 0.5408231019973755, "step": 679, "token_acc": 0.8149668765846079 }, { "epoch": 1.9653179190751446, "grad_norm": 0.5348380476951098, "learning_rate": 2.915023705867793e-07, "loss": 0.5112613439559937, "step": 680, "token_acc": 0.8288466633304877 }, { "epoch": 1.9682080924855492, "grad_norm": 0.5587948082197168, "learning_rate": 2.900554492198677e-07, "loss": 0.5132273435592651, "step": 681, "token_acc": 0.8262983388869136 }, { "epoch": 1.9710982658959537, "grad_norm": 0.6468264753422917, "learning_rate": 2.886106591802908e-07, "loss": 0.49628451466560364, "step": 682, "token_acc": 0.8309623989848394 }, { "epoch": 1.9739884393063583, "grad_norm": 0.8088000703258003, "learning_rate": 2.871680151353523e-07, "loss": 0.566349983215332, "step": 683, "token_acc": 0.813486073930626 }, { "epoch": 1.976878612716763, "grad_norm": 0.5639785659667156, "learning_rate": 2.8572753173057e-07, "loss": 0.5700632333755493, "step": 684, "token_acc": 0.8086862859910506 }, { "epoch": 1.9797687861271678, "grad_norm": 0.5543121051930197, "learning_rate": 2.842892235895279e-07, "loss": 0.5271592140197754, "step": 685, "token_acc": 0.8250378942459045 }, { "epoch": 1.9826589595375723, "grad_norm": 0.5567574729556525, "learning_rate": 2.828531053137257e-07, "loss": 0.528691828250885, "step": 686, "token_acc": 0.8240472063720813 }, { "epoch": 1.9855491329479769, "grad_norm": 0.582442051806669, "learning_rate": 2.814191914824332e-07, "loss": 0.5287505388259888, "step": 687, "token_acc": 0.821006600414202 }, { "epoch": 1.9884393063583814, "grad_norm": 0.5452501250540314, "learning_rate": 2.799874966525403e-07, "loss": 0.5334792733192444, "step": 688, "token_acc": 0.8213241825401043 }, { "epoch": 1.9913294797687862, "grad_norm": 0.5482828728372189, "learning_rate": 2.785580353584099e-07, "loss": 0.5632658004760742, "step": 689, "token_acc": 0.8116547561426986 }, { "epoch": 1.9942196531791907, "grad_norm": 0.633529877080459, "learning_rate": 2.771308221117309e-07, "loss": 0.516349196434021, "step": 690, "token_acc": 0.8251189141964578 }, { "epoch": 1.9971098265895955, "grad_norm": 0.5330351124089759, "learning_rate": 2.757058714013697e-07, "loss": 0.5631735324859619, "step": 691, "token_acc": 0.8110226467289205 }, { "epoch": 2.0, "grad_norm": 0.5696997466472099, "learning_rate": 2.7428319769322415e-07, "loss": 0.5440479516983032, "step": 692, "token_acc": 0.8158318122461348 }, { "epoch": 2.0028901734104045, "grad_norm": 0.5585685445254689, "learning_rate": 2.7286281543007597e-07, "loss": 0.5391400456428528, "step": 693, "token_acc": 0.8175343274767459 }, { "epoch": 2.005780346820809, "grad_norm": 0.4706256621473158, "learning_rate": 2.714447390314449e-07, "loss": 0.5360602140426636, "step": 694, "token_acc": 0.8195729923051913 }, { "epoch": 2.008670520231214, "grad_norm": 0.4975918712102163, "learning_rate": 2.700289828934416e-07, "loss": 0.5223442316055298, "step": 695, "token_acc": 0.8266022386843656 }, { "epoch": 2.0115606936416186, "grad_norm": 0.6855664652178536, "learning_rate": 2.686155613886215e-07, "loss": 0.5413398146629333, "step": 696, "token_acc": 0.8206837181461728 }, { "epoch": 2.014450867052023, "grad_norm": 0.48324739879314504, "learning_rate": 2.672044888658399e-07, "loss": 0.5646222829818726, "step": 697, "token_acc": 0.8079876543209876 }, { "epoch": 2.0173410404624277, "grad_norm": 0.5416524165161476, "learning_rate": 2.65795779650105e-07, "loss": 0.5677503347396851, "step": 698, "token_acc": 0.8107366402887164 }, { "epoch": 2.020231213872832, "grad_norm": 0.5180032228711846, "learning_rate": 2.64389448042433e-07, "loss": 0.5446953773498535, "step": 699, "token_acc": 0.8148853386782998 }, { "epoch": 2.023121387283237, "grad_norm": 0.5242926098982621, "learning_rate": 2.6298550831970307e-07, "loss": 0.5251763463020325, "step": 700, "token_acc": 0.8224519443333264 }, { "epoch": 2.0260115606936417, "grad_norm": 0.52590432100961, "learning_rate": 2.615839747345127e-07, "loss": 0.5811551809310913, "step": 701, "token_acc": 0.8070368200019533 }, { "epoch": 2.0289017341040463, "grad_norm": 0.5346477392780163, "learning_rate": 2.6018486151503213e-07, "loss": 0.5263258218765259, "step": 702, "token_acc": 0.8226229312836096 }, { "epoch": 2.031791907514451, "grad_norm": 0.6702369614403866, "learning_rate": 2.5878818286486026e-07, "loss": 0.4835773706436157, "step": 703, "token_acc": 0.8352293317787196 }, { "epoch": 2.0346820809248554, "grad_norm": 0.5810005206971598, "learning_rate": 2.573939529628816e-07, "loss": 0.5316369533538818, "step": 704, "token_acc": 0.8213102951763859 }, { "epoch": 2.03757225433526, "grad_norm": 0.5814408850367526, "learning_rate": 2.560021859631196e-07, "loss": 0.531090259552002, "step": 705, "token_acc": 0.8247005161281525 }, { "epoch": 2.040462427745665, "grad_norm": 0.5620278975131617, "learning_rate": 2.5461289599459646e-07, "loss": 0.4695814847946167, "step": 706, "token_acc": 0.8385467145834584 }, { "epoch": 2.0433526011560694, "grad_norm": 0.5109837854766828, "learning_rate": 2.532260971611867e-07, "loss": 0.5594449043273926, "step": 707, "token_acc": 0.8109966953664819 }, { "epoch": 2.046242774566474, "grad_norm": 0.5657246379091214, "learning_rate": 2.5184180354147554e-07, "loss": 0.520602285861969, "step": 708, "token_acc": 0.8247487538513655 }, { "epoch": 2.0491329479768785, "grad_norm": 0.4918673470663886, "learning_rate": 2.5046002918861606e-07, "loss": 0.5579814910888672, "step": 709, "token_acc": 0.8135782994649099 }, { "epoch": 2.052023121387283, "grad_norm": 0.48477796977022586, "learning_rate": 2.490807881301855e-07, "loss": 0.5919597744941711, "step": 710, "token_acc": 0.8019583967529172 }, { "epoch": 2.054913294797688, "grad_norm": 0.6496075635378676, "learning_rate": 2.477040943680436e-07, "loss": 0.48429036140441895, "step": 711, "token_acc": 0.8355824403733149 }, { "epoch": 2.0578034682080926, "grad_norm": 0.5519540209458493, "learning_rate": 2.4632996187819034e-07, "loss": 0.506065309047699, "step": 712, "token_acc": 0.8278258846453057 }, { "epoch": 2.060693641618497, "grad_norm": 0.5287310217228682, "learning_rate": 2.4495840461062433e-07, "loss": 0.5793042778968811, "step": 713, "token_acc": 0.8061971483241775 }, { "epoch": 2.0635838150289016, "grad_norm": 0.5904419866749646, "learning_rate": 2.435894364892005e-07, "loss": 0.573466420173645, "step": 714, "token_acc": 0.8098105997674032 }, { "epoch": 2.066473988439306, "grad_norm": 0.6225416912989975, "learning_rate": 2.4222307141148906e-07, "loss": 0.48143109679222107, "step": 715, "token_acc": 0.836179983151357 }, { "epoch": 2.069364161849711, "grad_norm": 0.5109219477999456, "learning_rate": 2.4085932324863507e-07, "loss": 0.544453501701355, "step": 716, "token_acc": 0.8168550972356652 }, { "epoch": 2.0722543352601157, "grad_norm": 0.544868652560984, "learning_rate": 2.394982058452165e-07, "loss": 0.550638735294342, "step": 717, "token_acc": 0.813385770281816 }, { "epoch": 2.0751445086705202, "grad_norm": 0.5334855839219953, "learning_rate": 2.3813973301910427e-07, "loss": 0.484441876411438, "step": 718, "token_acc": 0.8346531540424537 }, { "epoch": 2.078034682080925, "grad_norm": 0.5494544655057828, "learning_rate": 2.3678391856132202e-07, "loss": 0.5680737495422363, "step": 719, "token_acc": 0.8124086743334372 }, { "epoch": 2.0809248554913293, "grad_norm": 0.6045748429466216, "learning_rate": 2.3543077623590635e-07, "loss": 0.5128438472747803, "step": 720, "token_acc": 0.8279022575462924 }, { "epoch": 2.0838150289017343, "grad_norm": 0.48256069429990633, "learning_rate": 2.3408031977976623e-07, "loss": 0.5861136317253113, "step": 721, "token_acc": 0.8029797322959706 }, { "epoch": 2.086705202312139, "grad_norm": 0.5653447327029175, "learning_rate": 2.3273256290254402e-07, "loss": 0.537794828414917, "step": 722, "token_acc": 0.8187106929644486 }, { "epoch": 2.0895953757225434, "grad_norm": 0.511608140122125, "learning_rate": 2.3138751928647727e-07, "loss": 0.5536022782325745, "step": 723, "token_acc": 0.8143630972354428 }, { "epoch": 2.092485549132948, "grad_norm": 0.6461334504435571, "learning_rate": 2.3004520258625737e-07, "loss": 0.547166645526886, "step": 724, "token_acc": 0.8144167909990558 }, { "epoch": 2.0953757225433525, "grad_norm": 0.5280363246093879, "learning_rate": 2.2870562642889392e-07, "loss": 0.5407837629318237, "step": 725, "token_acc": 0.81717697615801 }, { "epoch": 2.098265895953757, "grad_norm": 0.5895491785859862, "learning_rate": 2.2736880441357398e-07, "loss": 0.5352712273597717, "step": 726, "token_acc": 0.8206253892344479 }, { "epoch": 2.101156069364162, "grad_norm": 0.510490807616544, "learning_rate": 2.2603475011152517e-07, "loss": 0.5849488973617554, "step": 727, "token_acc": 0.8032212807794704 }, { "epoch": 2.1040462427745665, "grad_norm": 0.5074478903676131, "learning_rate": 2.247034770658781e-07, "loss": 0.5740774869918823, "step": 728, "token_acc": 0.8094154108581142 }, { "epoch": 2.106936416184971, "grad_norm": 0.49465264402350506, "learning_rate": 2.2337499879152772e-07, "loss": 0.5517815351486206, "step": 729, "token_acc": 0.8150811818935997 }, { "epoch": 2.1098265895953756, "grad_norm": 0.5409252325098711, "learning_rate": 2.2204932877499778e-07, "loss": 0.5680674314498901, "step": 730, "token_acc": 0.8076237225087722 }, { "epoch": 2.11271676300578, "grad_norm": 0.5667599272734437, "learning_rate": 2.2072648047430182e-07, "loss": 0.546800971031189, "step": 731, "token_acc": 0.8193202586524828 }, { "epoch": 2.115606936416185, "grad_norm": 0.5820288457006244, "learning_rate": 2.1940646731880885e-07, "loss": 0.5512528419494629, "step": 732, "token_acc": 0.8157494966528321 }, { "epoch": 2.1184971098265897, "grad_norm": 0.4949523232866875, "learning_rate": 2.180893027091052e-07, "loss": 0.5347863435745239, "step": 733, "token_acc": 0.8186724373395966 }, { "epoch": 2.121387283236994, "grad_norm": 0.5570654028702667, "learning_rate": 2.1677500001685946e-07, "loss": 0.5904409289360046, "step": 734, "token_acc": 0.80330335262698 }, { "epoch": 2.1242774566473988, "grad_norm": 0.5169029043729536, "learning_rate": 2.154635725846861e-07, "loss": 0.516341507434845, "step": 735, "token_acc": 0.8256773697978942 }, { "epoch": 2.1271676300578033, "grad_norm": 0.5202271523957221, "learning_rate": 2.1415503372601096e-07, "loss": 0.5516679286956787, "step": 736, "token_acc": 0.8166926940731877 }, { "epoch": 2.1300578034682083, "grad_norm": 0.5270674995884185, "learning_rate": 2.1284939672493506e-07, "loss": 0.5113083124160767, "step": 737, "token_acc": 0.8254448999891605 }, { "epoch": 2.132947976878613, "grad_norm": 0.5738812261029933, "learning_rate": 2.1154667483609994e-07, "loss": 0.5508044958114624, "step": 738, "token_acc": 0.8145577840874766 }, { "epoch": 2.1358381502890174, "grad_norm": 0.5552867531342636, "learning_rate": 2.1024688128455432e-07, "loss": 0.5606477856636047, "step": 739, "token_acc": 0.8107334996977912 }, { "epoch": 2.138728323699422, "grad_norm": 0.6511169378075016, "learning_rate": 2.0895002926561733e-07, "loss": 0.5715325474739075, "step": 740, "token_acc": 0.808644395970687 }, { "epoch": 2.1416184971098264, "grad_norm": 0.5104195470816412, "learning_rate": 2.0765613194474756e-07, "loss": 0.5317230224609375, "step": 741, "token_acc": 0.8196870394179812 }, { "epoch": 2.1445086705202314, "grad_norm": 0.5222197914536979, "learning_rate": 2.0636520245740708e-07, "loss": 0.581384003162384, "step": 742, "token_acc": 0.8044084027512044 }, { "epoch": 2.147398843930636, "grad_norm": 0.5216435736648604, "learning_rate": 2.0507725390892895e-07, "loss": 0.5070130825042725, "step": 743, "token_acc": 0.8285304030472848 }, { "epoch": 2.1502890173410405, "grad_norm": 0.5689993002879171, "learning_rate": 2.0379229937438475e-07, "loss": 0.5079813599586487, "step": 744, "token_acc": 0.8282544832726795 }, { "epoch": 2.153179190751445, "grad_norm": 0.5478897581085619, "learning_rate": 2.0251035189845045e-07, "loss": 0.5614432692527771, "step": 745, "token_acc": 0.8101714880561034 }, { "epoch": 2.1560693641618496, "grad_norm": 0.5625549603262265, "learning_rate": 2.012314244952758e-07, "loss": 0.46915191411972046, "step": 746, "token_acc": 0.8398674842185119 }, { "epoch": 2.1589595375722546, "grad_norm": 0.5888007906160326, "learning_rate": 1.9995553014834986e-07, "loss": 0.5621305704116821, "step": 747, "token_acc": 0.8091583390025296 }, { "epoch": 2.161849710982659, "grad_norm": 0.5611702979006163, "learning_rate": 1.9868268181037184e-07, "loss": 0.5150927901268005, "step": 748, "token_acc": 0.8226671153861205 }, { "epoch": 2.1647398843930636, "grad_norm": 0.5111806577194473, "learning_rate": 1.9741289240311754e-07, "loss": 0.5273150205612183, "step": 749, "token_acc": 0.822871650821089 }, { "epoch": 2.167630057803468, "grad_norm": 0.5196873584862519, "learning_rate": 1.9614617481730882e-07, "loss": 0.5140695571899414, "step": 750, "token_acc": 0.8273383116061258 }, { "epoch": 2.1705202312138727, "grad_norm": 0.5735974858092083, "learning_rate": 1.948825419124837e-07, "loss": 0.5572013854980469, "step": 751, "token_acc": 0.8135551173589466 }, { "epoch": 2.1734104046242773, "grad_norm": 0.5173068836847717, "learning_rate": 1.9362200651686406e-07, "loss": 0.4991053640842438, "step": 752, "token_acc": 0.8299385295624275 }, { "epoch": 2.1763005780346822, "grad_norm": 0.5835529062955169, "learning_rate": 1.9236458142722672e-07, "loss": 0.4967957139015198, "step": 753, "token_acc": 0.8307953955965303 }, { "epoch": 2.179190751445087, "grad_norm": 0.5877111733686488, "learning_rate": 1.9111027940877283e-07, "loss": 0.5488715767860413, "step": 754, "token_acc": 0.8119714508486775 }, { "epoch": 2.1820809248554913, "grad_norm": 0.5937906866706819, "learning_rate": 1.898591131949992e-07, "loss": 0.5290513038635254, "step": 755, "token_acc": 0.8182620202911337 }, { "epoch": 2.184971098265896, "grad_norm": 0.5973610860546952, "learning_rate": 1.8861109548756764e-07, "loss": 0.5482075810432434, "step": 756, "token_acc": 0.8168008865903214 }, { "epoch": 2.1878612716763004, "grad_norm": 0.6092890006866195, "learning_rate": 1.873662389561771e-07, "loss": 0.5488214492797852, "step": 757, "token_acc": 0.8205397467749234 }, { "epoch": 2.1907514450867054, "grad_norm": 0.5100060557982842, "learning_rate": 1.861245562384351e-07, "loss": 0.5582944750785828, "step": 758, "token_acc": 0.8142653999590552 }, { "epoch": 2.19364161849711, "grad_norm": 0.5534172002173429, "learning_rate": 1.8488605993972806e-07, "loss": 0.5284197926521301, "step": 759, "token_acc": 0.8226439546852772 }, { "epoch": 2.1965317919075145, "grad_norm": 0.5676418034969823, "learning_rate": 1.8365076263309542e-07, "loss": 0.5176257491111755, "step": 760, "token_acc": 0.8240463351308168 }, { "epoch": 2.199421965317919, "grad_norm": 0.5273849733875124, "learning_rate": 1.8241867685910007e-07, "loss": 0.5415469408035278, "step": 761, "token_acc": 0.8159108203203757 }, { "epoch": 2.2023121387283235, "grad_norm": 0.5675178250606417, "learning_rate": 1.8118981512570254e-07, "loss": 0.495791494846344, "step": 762, "token_acc": 0.833165862256412 }, { "epoch": 2.2052023121387285, "grad_norm": 0.5356879254901209, "learning_rate": 1.7996418990813293e-07, "loss": 0.5700979828834534, "step": 763, "token_acc": 0.8082553122201417 }, { "epoch": 2.208092485549133, "grad_norm": 0.5440506283017456, "learning_rate": 1.7874181364876462e-07, "loss": 0.5215957164764404, "step": 764, "token_acc": 0.8242129054849903 }, { "epoch": 2.2109826589595376, "grad_norm": 0.48724727796349754, "learning_rate": 1.7752269875698872e-07, "loss": 0.48275503516197205, "step": 765, "token_acc": 0.8372185670308444 }, { "epoch": 2.213872832369942, "grad_norm": 0.6530933074612743, "learning_rate": 1.763068576090862e-07, "loss": 0.5122123956680298, "step": 766, "token_acc": 0.8289117165401221 }, { "epoch": 2.2167630057803467, "grad_norm": 0.5132130783753541, "learning_rate": 1.750943025481046e-07, "loss": 0.5450626611709595, "step": 767, "token_acc": 0.8163703808809519 }, { "epoch": 2.2196531791907512, "grad_norm": 0.5763340107528144, "learning_rate": 1.73885045883731e-07, "loss": 0.5134228467941284, "step": 768, "token_acc": 0.8268736586467864 }, { "epoch": 2.222543352601156, "grad_norm": 0.5678033281126066, "learning_rate": 1.726790998921675e-07, "loss": 0.5369815826416016, "step": 769, "token_acc": 0.8197942785502621 }, { "epoch": 2.2254335260115607, "grad_norm": 0.5494081888054269, "learning_rate": 1.7147647681600735e-07, "loss": 0.583419144153595, "step": 770, "token_acc": 0.8045412637492227 }, { "epoch": 2.2283236994219653, "grad_norm": 0.5002570926978792, "learning_rate": 1.7027718886410948e-07, "loss": 0.5762687921524048, "step": 771, "token_acc": 0.8050788141720897 }, { "epoch": 2.23121387283237, "grad_norm": 0.5621625282852232, "learning_rate": 1.6908124821147517e-07, "loss": 0.5734193325042725, "step": 772, "token_acc": 0.8072726721307747 }, { "epoch": 2.2341040462427744, "grad_norm": 0.5805542620358577, "learning_rate": 1.6788866699912434e-07, "loss": 0.5245779156684875, "step": 773, "token_acc": 0.8224566435530849 }, { "epoch": 2.2369942196531793, "grad_norm": 0.5784351770858037, "learning_rate": 1.6669945733397288e-07, "loss": 0.5163431763648987, "step": 774, "token_acc": 0.8234030645429656 }, { "epoch": 2.239884393063584, "grad_norm": 0.5443607425066719, "learning_rate": 1.6551363128870866e-07, "loss": 0.48509231209754944, "step": 775, "token_acc": 0.8364400070660744 }, { "epoch": 2.2427745664739884, "grad_norm": 0.5838705468342498, "learning_rate": 1.643312009016694e-07, "loss": 0.5485388040542603, "step": 776, "token_acc": 0.814316289454411 }, { "epoch": 2.245664739884393, "grad_norm": 0.5113123373755981, "learning_rate": 1.631521781767214e-07, "loss": 0.5461674928665161, "step": 777, "token_acc": 0.8178670064564116 }, { "epoch": 2.2485549132947975, "grad_norm": 0.5316036267961789, "learning_rate": 1.6197657508313595e-07, "loss": 0.5362288951873779, "step": 778, "token_acc": 0.8175199117906136 }, { "epoch": 2.2514450867052025, "grad_norm": 0.6922569927006882, "learning_rate": 1.608044035554692e-07, "loss": 0.5441286563873291, "step": 779, "token_acc": 0.8158920316612874 }, { "epoch": 2.254335260115607, "grad_norm": 0.6638081905493092, "learning_rate": 1.5963567549344026e-07, "loss": 0.5481600761413574, "step": 780, "token_acc": 0.8147708894878706 }, { "epoch": 2.2572254335260116, "grad_norm": 0.5594541395187226, "learning_rate": 1.5847040276181113e-07, "loss": 0.5381879210472107, "step": 781, "token_acc": 0.8191574437700821 }, { "epoch": 2.260115606936416, "grad_norm": 0.6007103186375023, "learning_rate": 1.5730859719026535e-07, "loss": 0.537074863910675, "step": 782, "token_acc": 0.8190765218606167 }, { "epoch": 2.2630057803468207, "grad_norm": 0.5565956593496582, "learning_rate": 1.561502705732883e-07, "loss": 0.4965110719203949, "step": 783, "token_acc": 0.8309357060849598 }, { "epoch": 2.2658959537572256, "grad_norm": 0.5642893968640419, "learning_rate": 1.5499543467004812e-07, "loss": 0.5519629120826721, "step": 784, "token_acc": 0.8145803817619548 }, { "epoch": 2.26878612716763, "grad_norm": 0.6562655659982366, "learning_rate": 1.538441012042747e-07, "loss": 0.5342061519622803, "step": 785, "token_acc": 0.8214097726480007 }, { "epoch": 2.2716763005780347, "grad_norm": 0.5502255728162866, "learning_rate": 1.526962818641428e-07, "loss": 0.5008838176727295, "step": 786, "token_acc": 0.8290141252177352 }, { "epoch": 2.2745664739884393, "grad_norm": 0.5549954985905744, "learning_rate": 1.5155198830215144e-07, "loss": 0.4954628348350525, "step": 787, "token_acc": 0.8334000233928208 }, { "epoch": 2.277456647398844, "grad_norm": 0.6131059587737819, "learning_rate": 1.5041123213500673e-07, "loss": 0.5419051647186279, "step": 788, "token_acc": 0.8164740751406938 }, { "epoch": 2.2803468208092488, "grad_norm": 0.6247230822104177, "learning_rate": 1.4927402494350383e-07, "loss": 0.5040674805641174, "step": 789, "token_acc": 0.8298278970337606 }, { "epoch": 2.2832369942196533, "grad_norm": 0.5169557886712214, "learning_rate": 1.4814037827240894e-07, "loss": 0.4267565608024597, "step": 790, "token_acc": 0.85461239288595 }, { "epoch": 2.286127167630058, "grad_norm": 0.5453091300597913, "learning_rate": 1.4701030363034244e-07, "loss": 0.5594276189804077, "step": 791, "token_acc": 0.8131839426158908 }, { "epoch": 2.2890173410404624, "grad_norm": 0.5304410532256004, "learning_rate": 1.4588381248966185e-07, "loss": 0.5278592109680176, "step": 792, "token_acc": 0.8218627568498552 }, { "epoch": 2.291907514450867, "grad_norm": 0.6120665191114517, "learning_rate": 1.4476091628634597e-07, "loss": 0.575430691242218, "step": 793, "token_acc": 0.807088911218437 }, { "epoch": 2.294797687861272, "grad_norm": 0.5799839527530729, "learning_rate": 1.4364162641987776e-07, "loss": 0.5156550407409668, "step": 794, "token_acc": 0.8260783412329787 }, { "epoch": 2.2976878612716765, "grad_norm": 0.5602063299660717, "learning_rate": 1.425259542531293e-07, "loss": 0.5343849658966064, "step": 795, "token_acc": 0.8199821131979047 }, { "epoch": 2.300578034682081, "grad_norm": 0.4887450635971321, "learning_rate": 1.414139111122463e-07, "loss": 0.5308408141136169, "step": 796, "token_acc": 0.8229694371764182 }, { "epoch": 2.3034682080924855, "grad_norm": 0.4993867501606219, "learning_rate": 1.4030550828653354e-07, "loss": 0.5518777966499329, "step": 797, "token_acc": 0.8136998348383776 }, { "epoch": 2.30635838150289, "grad_norm": 0.5067023143157817, "learning_rate": 1.3920075702833918e-07, "loss": 0.5633761882781982, "step": 798, "token_acc": 0.8110373410357782 }, { "epoch": 2.3092485549132946, "grad_norm": 0.49845534995334795, "learning_rate": 1.380996685529413e-07, "loss": 0.5841176509857178, "step": 799, "token_acc": 0.8055892737380623 }, { "epoch": 2.3121387283236996, "grad_norm": 0.5671598446889555, "learning_rate": 1.370022540384347e-07, "loss": 0.5178837180137634, "step": 800, "token_acc": 0.8236206769170149 }, { "epoch": 2.315028901734104, "grad_norm": 0.4945445707298972, "learning_rate": 1.3590852462561536e-07, "loss": 0.5855327844619751, "step": 801, "token_acc": 0.8038555657047487 }, { "epoch": 2.3179190751445087, "grad_norm": 0.5806465370535545, "learning_rate": 1.3481849141786977e-07, "loss": 0.5570707321166992, "step": 802, "token_acc": 0.8127311126755344 }, { "epoch": 2.320809248554913, "grad_norm": 0.6159090128169195, "learning_rate": 1.337321654810605e-07, "loss": 0.510475754737854, "step": 803, "token_acc": 0.8252182347235694 }, { "epoch": 2.3236994219653178, "grad_norm": 0.5376860591208902, "learning_rate": 1.3264955784341436e-07, "loss": 0.5326089859008789, "step": 804, "token_acc": 0.8201670917441944 }, { "epoch": 2.3265895953757223, "grad_norm": 0.673299584166168, "learning_rate": 1.3157067949541108e-07, "loss": 0.58345627784729, "step": 805, "token_acc": 0.8029432260094861 }, { "epoch": 2.3294797687861273, "grad_norm": 0.5206280305901979, "learning_rate": 1.304955413896705e-07, "loss": 0.574557900428772, "step": 806, "token_acc": 0.8069745418082558 }, { "epoch": 2.332369942196532, "grad_norm": 0.5136292360134201, "learning_rate": 1.294241544408425e-07, "loss": 0.5320082902908325, "step": 807, "token_acc": 0.8200797060551261 }, { "epoch": 2.3352601156069364, "grad_norm": 0.6862994942563941, "learning_rate": 1.2835652952549535e-07, "loss": 0.506873607635498, "step": 808, "token_acc": 0.8275425473721735 }, { "epoch": 2.338150289017341, "grad_norm": 0.512551355029386, "learning_rate": 1.272926774820063e-07, "loss": 0.5066085457801819, "step": 809, "token_acc": 0.8297983521714544 }, { "epoch": 2.3410404624277454, "grad_norm": 0.5604007523428769, "learning_rate": 1.2623260911045032e-07, "loss": 0.5025891065597534, "step": 810, "token_acc": 0.829209325638134 }, { "epoch": 2.3439306358381504, "grad_norm": 0.5268748443036352, "learning_rate": 1.251763351724912e-07, "loss": 0.4720842242240906, "step": 811, "token_acc": 0.8390679336697509 }, { "epoch": 2.346820809248555, "grad_norm": 0.5272184591480457, "learning_rate": 1.241238663912727e-07, "loss": 0.5422724485397339, "step": 812, "token_acc": 0.8181165262000732 }, { "epoch": 2.3497109826589595, "grad_norm": 0.6478156561205365, "learning_rate": 1.2307521345130856e-07, "loss": 0.4997095465660095, "step": 813, "token_acc": 0.83579220127889 }, { "epoch": 2.352601156069364, "grad_norm": 0.5596818812581189, "learning_rate": 1.2203038699837482e-07, "loss": 0.5354875326156616, "step": 814, "token_acc": 0.8179522864334984 }, { "epoch": 2.3554913294797686, "grad_norm": 0.5092123540436737, "learning_rate": 1.2098939763940146e-07, "loss": 0.5460278987884521, "step": 815, "token_acc": 0.8163918561804444 }, { "epoch": 2.3583815028901736, "grad_norm": 0.5800331579268285, "learning_rate": 1.1995225594236535e-07, "loss": 0.5022585988044739, "step": 816, "token_acc": 0.8274375641464249 }, { "epoch": 2.361271676300578, "grad_norm": 0.5756167659083334, "learning_rate": 1.1891897243618183e-07, "loss": 0.5118639469146729, "step": 817, "token_acc": 0.8277416762854647 }, { "epoch": 2.3641618497109826, "grad_norm": 0.7044868964257237, "learning_rate": 1.1788955761059848e-07, "loss": 0.5586499571800232, "step": 818, "token_acc": 0.8113651781794964 }, { "epoch": 2.367052023121387, "grad_norm": 0.5795349651059425, "learning_rate": 1.168640219160893e-07, "loss": 0.46478456258773804, "step": 819, "token_acc": 0.8425433103736172 }, { "epoch": 2.3699421965317917, "grad_norm": 0.5417472517233258, "learning_rate": 1.1584237576374672e-07, "loss": 0.5370988845825195, "step": 820, "token_acc": 0.8190044958253051 }, { "epoch": 2.3728323699421967, "grad_norm": 0.5406033227296971, "learning_rate": 1.1482462952517819e-07, "loss": 0.5212105512619019, "step": 821, "token_acc": 0.8224046418092507 }, { "epoch": 2.3757225433526012, "grad_norm": 0.6158759615805948, "learning_rate": 1.1381079353239915e-07, "loss": 0.5457302331924438, "step": 822, "token_acc": 0.8143862498308296 }, { "epoch": 2.378612716763006, "grad_norm": 0.5823036775149597, "learning_rate": 1.1280087807772881e-07, "loss": 0.5847820043563843, "step": 823, "token_acc": 0.8055109662743706 }, { "epoch": 2.3815028901734103, "grad_norm": 0.5934874612721635, "learning_rate": 1.1179489341368614e-07, "loss": 0.527098536491394, "step": 824, "token_acc": 0.8198975500818406 }, { "epoch": 2.384393063583815, "grad_norm": 0.48776844524252105, "learning_rate": 1.1079284975288456e-07, "loss": 0.5120328068733215, "step": 825, "token_acc": 0.8243783599233836 }, { "epoch": 2.38728323699422, "grad_norm": 0.6146965565569307, "learning_rate": 1.097947572679298e-07, "loss": 0.5407025814056396, "step": 826, "token_acc": 0.8166508538899431 }, { "epoch": 2.3901734104046244, "grad_norm": 0.5334859468151563, "learning_rate": 1.0880062609131485e-07, "loss": 0.5002784729003906, "step": 827, "token_acc": 0.8304765759384802 }, { "epoch": 2.393063583815029, "grad_norm": 0.5390442828664261, "learning_rate": 1.0781046631531887e-07, "loss": 0.539802074432373, "step": 828, "token_acc": 0.8201954263661371 }, { "epoch": 2.3959537572254335, "grad_norm": 0.5913404588285502, "learning_rate": 1.0682428799190357e-07, "loss": 0.5389546155929565, "step": 829, "token_acc": 0.8186631949877636 }, { "epoch": 2.398843930635838, "grad_norm": 0.5442985144352179, "learning_rate": 1.0584210113261138e-07, "loss": 0.5016453862190247, "step": 830, "token_acc": 0.8323601673886272 }, { "epoch": 2.401734104046243, "grad_norm": 0.5335838263183578, "learning_rate": 1.0486391570846447e-07, "loss": 0.5271462202072144, "step": 831, "token_acc": 0.8242358536755963 }, { "epoch": 2.4046242774566475, "grad_norm": 0.49716550117440406, "learning_rate": 1.0388974164986247e-07, "loss": 0.55882728099823, "step": 832, "token_acc": 0.8099962892130277 }, { "epoch": 2.407514450867052, "grad_norm": 0.47857456778328644, "learning_rate": 1.0291958884648244e-07, "loss": 0.49896830320358276, "step": 833, "token_acc": 0.8291924229963124 }, { "epoch": 2.4104046242774566, "grad_norm": 0.5097765363216997, "learning_rate": 1.0195346714717812e-07, "loss": 0.5477476716041565, "step": 834, "token_acc": 0.8156213758444858 }, { "epoch": 2.413294797687861, "grad_norm": 0.5235000424585246, "learning_rate": 1.0099138635988024e-07, "loss": 0.5449202060699463, "step": 835, "token_acc": 0.8174131547081592 }, { "epoch": 2.416184971098266, "grad_norm": 0.5918110484158251, "learning_rate": 1.0003335625149667e-07, "loss": 0.47566699981689453, "step": 836, "token_acc": 0.8377055807323248 }, { "epoch": 2.4190751445086707, "grad_norm": 0.5851719068244339, "learning_rate": 9.907938654781306e-08, "loss": 0.5465905666351318, "step": 837, "token_acc": 0.8147972978299083 }, { "epoch": 2.421965317919075, "grad_norm": 0.5682204824677508, "learning_rate": 9.812948693339518e-08, "loss": 0.5738434791564941, "step": 838, "token_acc": 0.8094719444296344 }, { "epoch": 2.4248554913294798, "grad_norm": 0.49007877801128724, "learning_rate": 9.718366705148878e-08, "loss": 0.5543205738067627, "step": 839, "token_acc": 0.8132528289037656 }, { "epoch": 2.4277456647398843, "grad_norm": 0.5842704513292558, "learning_rate": 9.62419365039237e-08, "loss": 0.5389681458473206, "step": 840, "token_acc": 0.8200700065948241 }, { "epoch": 2.430635838150289, "grad_norm": 0.5770762126755756, "learning_rate": 9.530430485101477e-08, "loss": 0.5231157541275024, "step": 841, "token_acc": 0.8205874308194584 }, { "epoch": 2.433526011560694, "grad_norm": 0.7677432650260306, "learning_rate": 9.437078161146589e-08, "loss": 0.48806625604629517, "step": 842, "token_acc": 0.8331080698798665 }, { "epoch": 2.4364161849710984, "grad_norm": 0.644925234497109, "learning_rate": 9.344137626227266e-08, "loss": 0.5736875534057617, "step": 843, "token_acc": 0.8089128548407091 }, { "epoch": 2.439306358381503, "grad_norm": 0.7396158526047033, "learning_rate": 9.251609823862638e-08, "loss": 0.4797173738479614, "step": 844, "token_acc": 0.8373787499437789 }, { "epoch": 2.4421965317919074, "grad_norm": 0.5468960652000051, "learning_rate": 9.15949569338188e-08, "loss": 0.5192615985870361, "step": 845, "token_acc": 0.8244522788344224 }, { "epoch": 2.445086705202312, "grad_norm": 0.5315006428054552, "learning_rate": 9.067796169914549e-08, "loss": 0.5097811222076416, "step": 846, "token_acc": 0.827042571766035 }, { "epoch": 2.447976878612717, "grad_norm": 0.7439553982785114, "learning_rate": 8.976512184381246e-08, "loss": 0.49079883098602295, "step": 847, "token_acc": 0.8330292060799148 }, { "epoch": 2.4508670520231215, "grad_norm": 0.6047154396535889, "learning_rate": 8.885644663484049e-08, "loss": 0.5638853311538696, "step": 848, "token_acc": 0.8139317111350264 }, { "epoch": 2.453757225433526, "grad_norm": 0.5113685852977929, "learning_rate": 8.795194529697148e-08, "loss": 0.5080073475837708, "step": 849, "token_acc": 0.8294516082294987 }, { "epoch": 2.4566473988439306, "grad_norm": 0.5784270460360631, "learning_rate": 8.705162701257501e-08, "loss": 0.4831171929836273, "step": 850, "token_acc": 0.8367839034908794 }, { "epoch": 2.459537572254335, "grad_norm": 0.8859232576451248, "learning_rate": 8.615550092155477e-08, "loss": 0.49585288763046265, "step": 851, "token_acc": 0.8318051901511245 }, { "epoch": 2.4624277456647397, "grad_norm": 0.5397198676813016, "learning_rate": 8.526357612125573e-08, "loss": 0.5402971506118774, "step": 852, "token_acc": 0.8140772038815954 }, { "epoch": 2.4653179190751446, "grad_norm": 0.5962698285712602, "learning_rate": 8.437586166637206e-08, "loss": 0.4982019066810608, "step": 853, "token_acc": 0.8291487495756479 }, { "epoch": 2.468208092485549, "grad_norm": 0.639088875669763, "learning_rate": 8.349236656885544e-08, "loss": 0.5227348804473877, "step": 854, "token_acc": 0.8234732997252996 }, { "epoch": 2.4710982658959537, "grad_norm": 0.5125821343592164, "learning_rate": 8.261309979782255e-08, "loss": 0.5540283918380737, "step": 855, "token_acc": 0.8137015888618007 }, { "epoch": 2.4739884393063583, "grad_norm": 0.6336792834178986, "learning_rate": 8.173807027946528e-08, "loss": 0.5213714838027954, "step": 856, "token_acc": 0.8260184658469347 }, { "epoch": 2.476878612716763, "grad_norm": 0.741297514751174, "learning_rate": 8.086728689695921e-08, "loss": 0.4948037564754486, "step": 857, "token_acc": 0.8296993252484727 }, { "epoch": 2.479768786127168, "grad_norm": 0.5470631077862728, "learning_rate": 8.000075849037408e-08, "loss": 0.5469754934310913, "step": 858, "token_acc": 0.8164498833341608 }, { "epoch": 2.4826589595375723, "grad_norm": 0.4864695217391108, "learning_rate": 7.913849385658333e-08, "loss": 0.5522366762161255, "step": 859, "token_acc": 0.8114838802706048 }, { "epoch": 2.485549132947977, "grad_norm": 0.6284131013971183, "learning_rate": 7.828050174917527e-08, "loss": 0.5867525935173035, "step": 860, "token_acc": 0.8053583956414843 }, { "epoch": 2.4884393063583814, "grad_norm": 0.6601691347825654, "learning_rate": 7.742679087836462e-08, "loss": 0.4591352045536041, "step": 861, "token_acc": 0.8464259952598495 }, { "epoch": 2.491329479768786, "grad_norm": 0.5223754803762156, "learning_rate": 7.657736991090263e-08, "loss": 0.5479453206062317, "step": 862, "token_acc": 0.8136173830420323 }, { "epoch": 2.494219653179191, "grad_norm": 0.6063178523383044, "learning_rate": 7.573224746999107e-08, "loss": 0.4984654486179352, "step": 863, "token_acc": 0.8310789771475875 }, { "epoch": 2.4971098265895955, "grad_norm": 0.5664401315392263, "learning_rate": 7.4891432135193e-08, "loss": 0.5375936031341553, "step": 864, "token_acc": 0.8193700891772278 }, { "epoch": 2.5, "grad_norm": 0.5684032151067252, "learning_rate": 7.405493244234651e-08, "loss": 0.5382214188575745, "step": 865, "token_acc": 0.8159053497942387 }, { "epoch": 2.5028901734104045, "grad_norm": 1.6304188232278813, "learning_rate": 7.322275688347818e-08, "loss": 0.5420823097229004, "step": 866, "token_acc": 0.8175298965740142 }, { "epoch": 2.505780346820809, "grad_norm": 0.5256843006054661, "learning_rate": 7.239491390671631e-08, "loss": 0.5603017807006836, "step": 867, "token_acc": 0.8130635711477354 }, { "epoch": 2.508670520231214, "grad_norm": 0.5290906377318529, "learning_rate": 7.157141191620548e-08, "loss": 0.4974015951156616, "step": 868, "token_acc": 0.8317996586674097 }, { "epoch": 2.5115606936416186, "grad_norm": 0.5009279956947961, "learning_rate": 7.075225927202105e-08, "loss": 0.5346574187278748, "step": 869, "token_acc": 0.8163790337713909 }, { "epoch": 2.514450867052023, "grad_norm": 0.4774847145184863, "learning_rate": 6.993746429008496e-08, "loss": 0.5793315768241882, "step": 870, "token_acc": 0.8044435794476767 }, { "epoch": 2.5173410404624277, "grad_norm": 0.579794607346244, "learning_rate": 6.912703524208019e-08, "loss": 0.4764576852321625, "step": 871, "token_acc": 0.8377503092002259 }, { "epoch": 2.520231213872832, "grad_norm": 0.5013881127258889, "learning_rate": 6.832098035536759e-08, "loss": 0.525843620300293, "step": 872, "token_acc": 0.8231466097001345 }, { "epoch": 2.523121387283237, "grad_norm": 0.48167613678527704, "learning_rate": 6.751930781290238e-08, "loss": 0.5380637049674988, "step": 873, "token_acc": 0.8183076636731655 }, { "epoch": 2.5260115606936417, "grad_norm": 0.4540447849829041, "learning_rate": 6.672202575315044e-08, "loss": 0.49698758125305176, "step": 874, "token_acc": 0.831075612916876 }, { "epoch": 2.5289017341040463, "grad_norm": 0.6661593346201325, "learning_rate": 6.59291422700064e-08, "loss": 0.4850313663482666, "step": 875, "token_acc": 0.8362135876193946 }, { "epoch": 2.531791907514451, "grad_norm": 0.505051966727968, "learning_rate": 6.514066541271085e-08, "loss": 0.499431312084198, "step": 876, "token_acc": 0.831420351210136 }, { "epoch": 2.5346820809248554, "grad_norm": 0.5882259006732896, "learning_rate": 6.435660318576935e-08, "loss": 0.5504227876663208, "step": 877, "token_acc": 0.8158776668803223 }, { "epoch": 2.5375722543352603, "grad_norm": 0.5391399587353708, "learning_rate": 6.357696354887049e-08, "loss": 0.5507422685623169, "step": 878, "token_acc": 0.8168785222461945 }, { "epoch": 2.540462427745665, "grad_norm": 0.5480460384925314, "learning_rate": 6.28017544168053e-08, "loss": 0.5473015308380127, "step": 879, "token_acc": 0.8178865534976365 }, { "epoch": 2.5433526011560694, "grad_norm": 0.5389986372049553, "learning_rate": 6.20309836593873e-08, "loss": 0.5189315676689148, "step": 880, "token_acc": 0.8252666894202909 }, { "epoch": 2.546242774566474, "grad_norm": 0.5707417078989917, "learning_rate": 6.126465910137163e-08, "loss": 0.5234180092811584, "step": 881, "token_acc": 0.8232250912282323 }, { "epoch": 2.5491329479768785, "grad_norm": 0.5632951051957191, "learning_rate": 6.0502788522377e-08, "loss": 0.5196454524993896, "step": 882, "token_acc": 0.8240517651811349 }, { "epoch": 2.5520231213872835, "grad_norm": 0.5312909361373286, "learning_rate": 5.974537965680537e-08, "loss": 0.5485826134681702, "step": 883, "token_acc": 0.8127245781077416 }, { "epoch": 2.5549132947976876, "grad_norm": 0.6429627848350591, "learning_rate": 5.899244019376426e-08, "loss": 0.5010867714881897, "step": 884, "token_acc": 0.8311800993506927 }, { "epoch": 2.5578034682080926, "grad_norm": 0.5223405882575716, "learning_rate": 5.824397777698858e-08, "loss": 0.5297751426696777, "step": 885, "token_acc": 0.8206137655553849 }, { "epoch": 2.560693641618497, "grad_norm": 0.8020502475631341, "learning_rate": 5.7500000004762574e-08, "loss": 0.5593537092208862, "step": 886, "token_acc": 0.811829619947517 }, { "epoch": 2.5635838150289016, "grad_norm": 0.6258112537179114, "learning_rate": 5.676051442984325e-08, "loss": 0.5434359908103943, "step": 887, "token_acc": 0.8160674580340842 }, { "epoch": 2.5664739884393066, "grad_norm": 0.5482233640675082, "learning_rate": 5.602552855938325e-08, "loss": 0.5392587780952454, "step": 888, "token_acc": 0.8183432292939603 }, { "epoch": 2.5693641618497107, "grad_norm": 0.5339167311609386, "learning_rate": 5.529504985485528e-08, "loss": 0.5843528509140015, "step": 889, "token_acc": 0.8041726059349488 }, { "epoch": 2.5722543352601157, "grad_norm": 0.5526129075488465, "learning_rate": 5.456908573197544e-08, "loss": 0.4785343408584595, "step": 890, "token_acc": 0.8354585097240348 }, { "epoch": 2.5751445086705202, "grad_norm": 0.5932930782479724, "learning_rate": 5.384764356062865e-08, "loss": 0.501940131187439, "step": 891, "token_acc": 0.8283741560885075 }, { "epoch": 2.578034682080925, "grad_norm": 0.5946977220929661, "learning_rate": 5.313073066479379e-08, "loss": 0.5379625558853149, "step": 892, "token_acc": 0.8177655126778356 }, { "epoch": 2.5809248554913293, "grad_norm": 0.5663018542099373, "learning_rate": 5.2418354322468884e-08, "loss": 0.4645715057849884, "step": 893, "token_acc": 0.8437703660317277 }, { "epoch": 2.583815028901734, "grad_norm": 0.5603090911019164, "learning_rate": 5.1710521765597593e-08, "loss": 0.5438505411148071, "step": 894, "token_acc": 0.8167114037179182 }, { "epoch": 2.586705202312139, "grad_norm": 0.5650529942357706, "learning_rate": 5.100724017999575e-08, "loss": 0.537551760673523, "step": 895, "token_acc": 0.8162509350365383 }, { "epoch": 2.5895953757225434, "grad_norm": 0.5946617661686765, "learning_rate": 5.0308516705278525e-08, "loss": 0.5363532304763794, "step": 896, "token_acc": 0.8188319733413082 }, { "epoch": 2.592485549132948, "grad_norm": 0.529447543384607, "learning_rate": 4.961435843478751e-08, "loss": 0.547370195388794, "step": 897, "token_acc": 0.8166483874998265 }, { "epoch": 2.5953757225433525, "grad_norm": 0.5564539974665098, "learning_rate": 4.892477241551901e-08, "loss": 0.5567014813423157, "step": 898, "token_acc": 0.8142607154390945 }, { "epoch": 2.598265895953757, "grad_norm": 0.6758226853294469, "learning_rate": 4.8239765648052985e-08, "loss": 0.5622668862342834, "step": 899, "token_acc": 0.8094786656801085 }, { "epoch": 2.601156069364162, "grad_norm": 0.6030746534353, "learning_rate": 4.755934508648057e-08, "loss": 0.48511946201324463, "step": 900, "token_acc": 0.8383746553751593 }, { "epoch": 2.6040462427745665, "grad_norm": 0.5291224134313559, "learning_rate": 4.688351763833531e-08, "loss": 0.5561063289642334, "step": 901, "token_acc": 0.811450131453075 }, { "epoch": 2.606936416184971, "grad_norm": 0.5231587422483082, "learning_rate": 4.621229016452155e-08, "loss": 0.585370659828186, "step": 902, "token_acc": 0.8056932036025608 }, { "epoch": 2.6098265895953756, "grad_norm": 1.1223139233293984, "learning_rate": 4.554566947924537e-08, "loss": 0.5447970628738403, "step": 903, "token_acc": 0.8164786148920761 }, { "epoch": 2.61271676300578, "grad_norm": 0.5225735759201205, "learning_rate": 4.4883662349945784e-08, "loss": 0.5505392551422119, "step": 904, "token_acc": 0.8164482180639134 }, { "epoch": 2.615606936416185, "grad_norm": 0.54473619880049, "learning_rate": 4.422627549722519e-08, "loss": 0.5359902381896973, "step": 905, "token_acc": 0.820455104729094 }, { "epoch": 2.6184971098265897, "grad_norm": 0.7561505246031067, "learning_rate": 4.357351559478201e-08, "loss": 0.47267240285873413, "step": 906, "token_acc": 0.8387789854590445 }, { "epoch": 2.621387283236994, "grad_norm": 0.5548449336113677, "learning_rate": 4.2925389269341916e-08, "loss": 0.5412442684173584, "step": 907, "token_acc": 0.8155705621117785 }, { "epoch": 2.6242774566473988, "grad_norm": 0.7283156817419644, "learning_rate": 4.228190310059182e-08, "loss": 0.5299142599105835, "step": 908, "token_acc": 0.8230541763009774 }, { "epoch": 2.6271676300578033, "grad_norm": 0.5365454152037888, "learning_rate": 4.164306362111208e-08, "loss": 0.5737514495849609, "step": 909, "token_acc": 0.8103234930175004 }, { "epoch": 2.6300578034682083, "grad_norm": 0.5438553812892487, "learning_rate": 4.100887731631053e-08, "loss": 0.5420162677764893, "step": 910, "token_acc": 0.8180698387235383 }, { "epoch": 2.632947976878613, "grad_norm": 0.64070798422041, "learning_rate": 4.0379350624356766e-08, "loss": 0.5189142823219299, "step": 911, "token_acc": 0.8237202834249387 }, { "epoch": 2.6358381502890174, "grad_norm": 0.47802319033882207, "learning_rate": 3.975448993611652e-08, "loss": 0.5308249592781067, "step": 912, "token_acc": 0.8203262576745515 }, { "epoch": 2.638728323699422, "grad_norm": 0.5724668109330596, "learning_rate": 3.913430159508696e-08, "loss": 0.5157672166824341, "step": 913, "token_acc": 0.8241608973797213 }, { "epoch": 2.6416184971098264, "grad_norm": 0.5470703054848514, "learning_rate": 3.8518791897332204e-08, "loss": 0.5976561307907104, "step": 914, "token_acc": 0.8007923950822223 }, { "epoch": 2.6445086705202314, "grad_norm": 0.5294401571240512, "learning_rate": 3.790796709141975e-08, "loss": 0.5527437925338745, "step": 915, "token_acc": 0.8132948131146666 }, { "epoch": 2.647398843930636, "grad_norm": 0.6321676647074376, "learning_rate": 3.7301833378356073e-08, "loss": 0.4902818202972412, "step": 916, "token_acc": 0.8343280912033046 }, { "epoch": 2.6502890173410405, "grad_norm": 0.6734799143444675, "learning_rate": 3.67003969115251e-08, "loss": 0.5476257801055908, "step": 917, "token_acc": 0.8164087189044648 }, { "epoch": 2.653179190751445, "grad_norm": 0.4933080483096889, "learning_rate": 3.610366379662455e-08, "loss": 0.5034703612327576, "step": 918, "token_acc": 0.8296526697770866 }, { "epoch": 2.6560693641618496, "grad_norm": 0.5701973114157253, "learning_rate": 3.551164009160429e-08, "loss": 0.5260199904441833, "step": 919, "token_acc": 0.8228647844657014 }, { "epoch": 2.6589595375722546, "grad_norm": 0.4606917700933646, "learning_rate": 3.4924331806605314e-08, "loss": 0.5847440361976624, "step": 920, "token_acc": 0.8036149091590186 }, { "epoch": 2.661849710982659, "grad_norm": 0.5312291603560868, "learning_rate": 3.4341744903897963e-08, "loss": 0.5280716419219971, "step": 921, "token_acc": 0.8217670827512655 }, { "epoch": 2.6647398843930636, "grad_norm": 0.5137738686874723, "learning_rate": 3.376388529782215e-08, "loss": 0.5434746146202087, "step": 922, "token_acc": 0.8166855043797683 }, { "epoch": 2.667630057803468, "grad_norm": 0.5112438107405131, "learning_rate": 3.319075885472644e-08, "loss": 0.4704023599624634, "step": 923, "token_acc": 0.8407168549429551 }, { "epoch": 2.6705202312138727, "grad_norm": 0.5633980375468464, "learning_rate": 3.262237139290952e-08, "loss": 0.5437241792678833, "step": 924, "token_acc": 0.8174555734488506 }, { "epoch": 2.6734104046242777, "grad_norm": 0.4789519578675391, "learning_rate": 3.205872868256021e-08, "loss": 0.5591274499893188, "step": 925, "token_acc": 0.8126648310155333 }, { "epoch": 2.6763005780346822, "grad_norm": 0.545383577218125, "learning_rate": 3.149983644569948e-08, "loss": 0.4846089482307434, "step": 926, "token_acc": 0.8357118170559603 }, { "epoch": 2.679190751445087, "grad_norm": 0.5624813066511716, "learning_rate": 3.094570035612226e-08, "loss": 0.5257154703140259, "step": 927, "token_acc": 0.8209082215813688 }, { "epoch": 2.6820809248554913, "grad_norm": 0.5921212603993137, "learning_rate": 3.0396326039339507e-08, "loss": 0.5992392897605896, "step": 928, "token_acc": 0.7986864607734648 }, { "epoch": 2.684971098265896, "grad_norm": 0.5498631051018497, "learning_rate": 2.9851719072521487e-08, "loss": 0.5509431958198547, "step": 929, "token_acc": 0.8177149696899494 }, { "epoch": 2.687861271676301, "grad_norm": 0.5215571767600914, "learning_rate": 2.9311884984440873e-08, "loss": 0.561446487903595, "step": 930, "token_acc": 0.8129055922352012 }, { "epoch": 2.690751445086705, "grad_norm": 0.559786563643402, "learning_rate": 2.8776829255416967e-08, "loss": 0.5166699290275574, "step": 931, "token_acc": 0.8237840118657938 }, { "epoch": 2.69364161849711, "grad_norm": 0.5753952050911679, "learning_rate": 2.8246557317259723e-08, "loss": 0.5357648134231567, "step": 932, "token_acc": 0.8212208495005039 }, { "epoch": 2.6965317919075145, "grad_norm": 0.5636571499534591, "learning_rate": 2.7721074553214596e-08, "loss": 0.5390565395355225, "step": 933, "token_acc": 0.8159201695282208 }, { "epoch": 2.699421965317919, "grad_norm": 0.5407560890645442, "learning_rate": 2.7200386297908386e-08, "loss": 0.541710615158081, "step": 934, "token_acc": 0.8174959891247107 }, { "epoch": 2.7023121387283235, "grad_norm": 0.48421827585155863, "learning_rate": 2.6684497837294208e-08, "loss": 0.5409998297691345, "step": 935, "token_acc": 0.8210280803345742 }, { "epoch": 2.705202312138728, "grad_norm": 0.49710877088501176, "learning_rate": 2.6173414408598826e-08, "loss": 0.5135529637336731, "step": 936, "token_acc": 0.8251490888501849 }, { "epoch": 2.708092485549133, "grad_norm": 0.6329172467067579, "learning_rate": 2.5667141200268694e-08, "loss": 0.5547735691070557, "step": 937, "token_acc": 0.8145400135743814 }, { "epoch": 2.7109826589595376, "grad_norm": 0.5576557557006313, "learning_rate": 2.5165683351917765e-08, "loss": 0.5579146146774292, "step": 938, "token_acc": 0.8112171853454817 }, { "epoch": 2.713872832369942, "grad_norm": 0.5905103597710084, "learning_rate": 2.4669045954275046e-08, "loss": 0.5442934632301331, "step": 939, "token_acc": 0.818311620283537 }, { "epoch": 2.7167630057803467, "grad_norm": 0.6610701567101593, "learning_rate": 2.4177234049133023e-08, "loss": 0.49151283502578735, "step": 940, "token_acc": 0.8325153415650084 }, { "epoch": 2.7196531791907512, "grad_norm": 0.6214821823759014, "learning_rate": 2.369025262929658e-08, "loss": 0.5725831389427185, "step": 941, "token_acc": 0.8070232229912145 }, { "epoch": 2.722543352601156, "grad_norm": 0.5547499629666095, "learning_rate": 2.3208106638531842e-08, "loss": 0.5330009460449219, "step": 942, "token_acc": 0.8195172027623966 }, { "epoch": 2.7254335260115607, "grad_norm": 0.5521438894414953, "learning_rate": 2.2730800971516862e-08, "loss": 0.5747419595718384, "step": 943, "token_acc": 0.8086665948043549 }, { "epoch": 2.7283236994219653, "grad_norm": 0.6317779099057246, "learning_rate": 2.225834047379099e-08, "loss": 0.49804458022117615, "step": 944, "token_acc": 0.8307906934881418 }, { "epoch": 2.73121387283237, "grad_norm": 0.5560572315857666, "learning_rate": 2.1790729941706276e-08, "loss": 0.5384119153022766, "step": 945, "token_acc": 0.8186016301942814 }, { "epoch": 2.7341040462427744, "grad_norm": 0.5706315776877087, "learning_rate": 2.132797412237869e-08, "loss": 0.5331531167030334, "step": 946, "token_acc": 0.8183284045442989 }, { "epoch": 2.7369942196531793, "grad_norm": 0.5767818083804982, "learning_rate": 2.087007771363969e-08, "loss": 0.5555546879768372, "step": 947, "token_acc": 0.8130259084965389 }, { "epoch": 2.739884393063584, "grad_norm": 0.5074851398256462, "learning_rate": 2.041704536398875e-08, "loss": 0.5641285181045532, "step": 948, "token_acc": 0.8102424125823674 }, { "epoch": 2.7427745664739884, "grad_norm": 0.5656737111306388, "learning_rate": 1.9968881672545957e-08, "loss": 0.5804109573364258, "step": 949, "token_acc": 0.8069046557228511 }, { "epoch": 2.745664739884393, "grad_norm": 0.5396023274518039, "learning_rate": 1.9525591189005874e-08, "loss": 0.5026800632476807, "step": 950, "token_acc": 0.8291645642615152 }, { "epoch": 2.7485549132947975, "grad_norm": 0.5545085068594241, "learning_rate": 1.9087178413590476e-08, "loss": 0.5121109485626221, "step": 951, "token_acc": 0.829365647193499 }, { "epoch": 2.7514450867052025, "grad_norm": 0.5744534847489216, "learning_rate": 1.8653647797004236e-08, "loss": 0.5073999166488647, "step": 952, "token_acc": 0.8286528286528286 }, { "epoch": 2.754335260115607, "grad_norm": 0.5473570344774414, "learning_rate": 1.8225003740388545e-08, "loss": 0.5411463975906372, "step": 953, "token_acc": 0.8197644649257553 }, { "epoch": 2.7572254335260116, "grad_norm": 0.5960870996950273, "learning_rate": 1.7801250595277095e-08, "loss": 0.45802488923072815, "step": 954, "token_acc": 0.8439128432584406 }, { "epoch": 2.760115606936416, "grad_norm": 0.5872410848204962, "learning_rate": 1.738239266355185e-08, "loss": 0.5364171862602234, "step": 955, "token_acc": 0.8192522793328644 }, { "epoch": 2.7630057803468207, "grad_norm": 0.5452386927866908, "learning_rate": 1.6968434197399072e-08, "loss": 0.5837544202804565, "step": 956, "token_acc": 0.8051349532888352 }, { "epoch": 2.7658959537572256, "grad_norm": 0.5752700596867665, "learning_rate": 1.655937939926655e-08, "loss": 0.5129964351654053, "step": 957, "token_acc": 0.8282252791972994 }, { "epoch": 2.76878612716763, "grad_norm": 0.5428098765109344, "learning_rate": 1.6155232421820653e-08, "loss": 0.5746065378189087, "step": 958, "token_acc": 0.8089228223154 }, { "epoch": 2.7716763005780347, "grad_norm": 0.5949829280630812, "learning_rate": 1.5755997367904173e-08, "loss": 0.4916711747646332, "step": 959, "token_acc": 0.8342608068069589 }, { "epoch": 2.7745664739884393, "grad_norm": 0.5674429218313363, "learning_rate": 1.536167829049495e-08, "loss": 0.5395721197128296, "step": 960, "token_acc": 0.8203693073096058 }, { "epoch": 2.777456647398844, "grad_norm": 0.561452376268135, "learning_rate": 1.497227919266414e-08, "loss": 0.51889967918396, "step": 961, "token_acc": 0.8233378239163167 }, { "epoch": 2.7803468208092488, "grad_norm": 0.6257227381883494, "learning_rate": 1.4587804027536454e-08, "loss": 0.5111842155456543, "step": 962, "token_acc": 0.8274028303059359 }, { "epoch": 2.7832369942196533, "grad_norm": 0.5900526631508034, "learning_rate": 1.420825669824921e-08, "loss": 0.5204794406890869, "step": 963, "token_acc": 0.8234049795759579 }, { "epoch": 2.786127167630058, "grad_norm": 0.509902068102799, "learning_rate": 1.3833641057913015e-08, "loss": 0.47923728823661804, "step": 964, "token_acc": 0.8353080111030787 }, { "epoch": 2.7890173410404624, "grad_norm": 0.5460825106119277, "learning_rate": 1.346396090957297e-08, "loss": 0.520375669002533, "step": 965, "token_acc": 0.8276919599125914 }, { "epoch": 2.791907514450867, "grad_norm": 0.5432685057122655, "learning_rate": 1.309922000616942e-08, "loss": 0.5795409679412842, "step": 966, "token_acc": 0.8071895906398279 }, { "epoch": 2.794797687861272, "grad_norm": 0.5657536988747344, "learning_rate": 1.2739422050500436e-08, "loss": 0.5345174074172974, "step": 967, "token_acc": 0.8179120793316155 }, { "epoch": 2.7976878612716765, "grad_norm": 0.521811401090051, "learning_rate": 1.2384570695183782e-08, "loss": 0.5313125252723694, "step": 968, "token_acc": 0.8208080793990667 }, { "epoch": 2.800578034682081, "grad_norm": 0.5951506599748814, "learning_rate": 1.2034669542620223e-08, "loss": 0.5154579877853394, "step": 969, "token_acc": 0.8274639716414208 }, { "epoch": 2.8034682080924855, "grad_norm": 0.7493969316675455, "learning_rate": 1.168972214495667e-08, "loss": 0.4610113203525543, "step": 970, "token_acc": 0.8410565847986298 }, { "epoch": 2.80635838150289, "grad_norm": 0.6158144745722535, "learning_rate": 1.1349732004050205e-08, "loss": 0.5308967232704163, "step": 971, "token_acc": 0.823366838754401 }, { "epoch": 2.809248554913295, "grad_norm": 0.49701991004281837, "learning_rate": 1.101470257143261e-08, "loss": 0.5433156490325928, "step": 972, "token_acc": 0.8172732427363528 }, { "epoch": 2.812138728323699, "grad_norm": 0.614964929129747, "learning_rate": 1.0684637248275175e-08, "loss": 0.4856722056865692, "step": 973, "token_acc": 0.8371653570989119 }, { "epoch": 2.815028901734104, "grad_norm": 0.5531928817079772, "learning_rate": 1.0359539385354387e-08, "loss": 0.5472983121871948, "step": 974, "token_acc": 0.8166184194819147 }, { "epoch": 2.8179190751445087, "grad_norm": 0.6036213061429313, "learning_rate": 1.0039412283017523e-08, "loss": 0.5529719591140747, "step": 975, "token_acc": 0.8155163061650604 }, { "epoch": 2.820809248554913, "grad_norm": 0.5564254532918392, "learning_rate": 9.724259191149774e-09, "loss": 0.4628450572490692, "step": 976, "token_acc": 0.8427982220798462 }, { "epoch": 2.8236994219653178, "grad_norm": 0.5588830748507647, "learning_rate": 9.414083309140453e-09, "loss": 0.5567787289619446, "step": 977, "token_acc": 0.8121751346288926 }, { "epoch": 2.8265895953757223, "grad_norm": 0.5529058564154966, "learning_rate": 9.108887785851338e-09, "loss": 0.5580377578735352, "step": 978, "token_acc": 0.8109314422108472 }, { "epoch": 2.8294797687861273, "grad_norm": 0.61646098239251, "learning_rate": 8.808675719584158e-09, "loss": 0.5375653505325317, "step": 979, "token_acc": 0.8192844783892899 }, { "epoch": 2.832369942196532, "grad_norm": 0.5248181521879705, "learning_rate": 8.513450158049106e-09, "loss": 0.5359894037246704, "step": 980, "token_acc": 0.8180794693882546 }, { "epoch": 2.8352601156069364, "grad_norm": 0.530766621077344, "learning_rate": 8.22321409833443e-09, "loss": 0.5032058358192444, "step": 981, "token_acc": 0.8299942928720195 }, { "epoch": 2.838150289017341, "grad_norm": 0.5767728092897907, "learning_rate": 7.93797048687539e-09, "loss": 0.555617094039917, "step": 982, "token_acc": 0.8127699150828953 }, { "epoch": 2.8410404624277454, "grad_norm": 0.5275196163844481, "learning_rate": 7.657722219424789e-09, "loss": 0.5177302956581116, "step": 983, "token_acc": 0.8254756164272545 }, { "epoch": 2.8439306358381504, "grad_norm": 0.7188190918164308, "learning_rate": 7.382472141023221e-09, "loss": 0.5488888025283813, "step": 984, "token_acc": 0.8139118457300275 }, { "epoch": 2.846820809248555, "grad_norm": 0.5053524666497287, "learning_rate": 7.112223045970589e-09, "loss": 0.5309122800827026, "step": 985, "token_acc": 0.818977587114551 }, { "epoch": 2.8497109826589595, "grad_norm": 0.49254982998325725, "learning_rate": 6.8469776777973494e-09, "loss": 0.48389381170272827, "step": 986, "token_acc": 0.839111193678302 }, { "epoch": 2.852601156069364, "grad_norm": 0.5088843284530131, "learning_rate": 6.5867387292369295e-09, "loss": 0.5327301025390625, "step": 987, "token_acc": 0.8190361305134541 }, { "epoch": 2.8554913294797686, "grad_norm": 0.5579589460192081, "learning_rate": 6.331508842198296e-09, "loss": 0.46285098791122437, "step": 988, "token_acc": 0.8444943903023158 }, { "epoch": 2.8583815028901736, "grad_norm": 0.5480219063407678, "learning_rate": 6.081290607739042e-09, "loss": 0.4747048616409302, "step": 989, "token_acc": 0.8427808981834031 }, { "epoch": 2.861271676300578, "grad_norm": 0.7741942154519839, "learning_rate": 5.836086566039289e-09, "loss": 0.5887913703918457, "step": 990, "token_acc": 0.8049742371893245 }, { "epoch": 2.8641618497109826, "grad_norm": 0.5193852803751504, "learning_rate": 5.595899206375654e-09, "loss": 0.5110014081001282, "step": 991, "token_acc": 0.8288312763590261 }, { "epoch": 2.867052023121387, "grad_norm": 0.5341612707698237, "learning_rate": 5.360730967096272e-09, "loss": 0.5477676391601562, "step": 992, "token_acc": 0.8129789165141573 }, { "epoch": 2.8699421965317917, "grad_norm": 0.7306055692439172, "learning_rate": 5.130584235595703e-09, "loss": 0.5541284680366516, "step": 993, "token_acc": 0.8145775823594559 }, { "epoch": 2.8728323699421967, "grad_norm": 0.5713799415951762, "learning_rate": 4.9054613482910065e-09, "loss": 0.44801950454711914, "step": 994, "token_acc": 0.845931691583633 }, { "epoch": 2.8757225433526012, "grad_norm": 0.5839589911780936, "learning_rate": 4.685364590597929e-09, "loss": 0.5638971924781799, "step": 995, "token_acc": 0.8107071579171281 }, { "epoch": 2.878612716763006, "grad_norm": 0.5287376481818248, "learning_rate": 4.470296196907364e-09, "loss": 0.5595090389251709, "step": 996, "token_acc": 0.8104899471905078 }, { "epoch": 2.8815028901734103, "grad_norm": 0.5379724615788479, "learning_rate": 4.260258350563317e-09, "loss": 0.5029683709144592, "step": 997, "token_acc": 0.8288261472452321 }, { "epoch": 2.884393063583815, "grad_norm": 0.6018325527774611, "learning_rate": 4.055253183840257e-09, "loss": 0.5635591149330139, "step": 998, "token_acc": 0.8117199938369883 }, { "epoch": 2.88728323699422, "grad_norm": 0.5473646076466034, "learning_rate": 3.855282777921465e-09, "loss": 0.44404757022857666, "step": 999, "token_acc": 0.8481432594156987 }, { "epoch": 2.8901734104046244, "grad_norm": 0.607676333795665, "learning_rate": 3.660349162878329e-09, "loss": 0.5595177412033081, "step": 1000, "token_acc": 0.8098022742758105 }, { "epoch": 2.8901734104046244, "eval_loss": 0.5740217566490173, "eval_runtime": 69.5297, "eval_samples_per_second": 1.582, "eval_steps_per_second": 0.201, "eval_token_acc": 0.808306147135369, "step": 1000 }, { "epoch": 2.893063583815029, "grad_norm": 0.6028179153533768, "learning_rate": 3.4704543176491407e-09, "loss": 0.5201370716094971, "step": 1001, "token_acc": 0.8248979009505466 }, { "epoch": 2.8959537572254335, "grad_norm": 0.5618469428482809, "learning_rate": 3.285600170019609e-09, "loss": 0.4737909138202667, "step": 1002, "token_acc": 0.8380801687763713 }, { "epoch": 2.898843930635838, "grad_norm": 0.520670079505936, "learning_rate": 3.10578859660271e-09, "loss": 0.4949793815612793, "step": 1003, "token_acc": 0.8310451985643839 }, { "epoch": 2.901734104046243, "grad_norm": 0.5898385451823664, "learning_rate": 2.9310214228202014e-09, "loss": 0.5583693981170654, "step": 1004, "token_acc": 0.8109677906011918 }, { "epoch": 2.9046242774566475, "grad_norm": 0.5434063241260475, "learning_rate": 2.7613004228835836e-09, "loss": 0.5403155088424683, "step": 1005, "token_acc": 0.8173558831911802 }, { "epoch": 2.907514450867052, "grad_norm": 0.5472051803786162, "learning_rate": 2.59662731977639e-09, "loss": 0.5251212120056152, "step": 1006, "token_acc": 0.8263490698267074 }, { "epoch": 2.9104046242774566, "grad_norm": 0.49207250611822545, "learning_rate": 2.437003785236702e-09, "loss": 0.5539924502372742, "step": 1007, "token_acc": 0.8112695897164994 }, { "epoch": 2.913294797687861, "grad_norm": 0.5002736177395538, "learning_rate": 2.2824314397399404e-09, "loss": 0.5284777283668518, "step": 1008, "token_acc": 0.8207929017091751 }, { "epoch": 2.916184971098266, "grad_norm": 0.5322616545740584, "learning_rate": 2.132911852482766e-09, "loss": 0.5585949420928955, "step": 1009, "token_acc": 0.8104817895999946 }, { "epoch": 2.9190751445086707, "grad_norm": 0.5531944879626155, "learning_rate": 1.9884465413667063e-09, "loss": 0.5428365468978882, "step": 1010, "token_acc": 0.815299992762539 }, { "epoch": 2.921965317919075, "grad_norm": 0.5219295200504247, "learning_rate": 1.8490369729832755e-09, "loss": 0.5256614685058594, "step": 1011, "token_acc": 0.8222089510292981 }, { "epoch": 2.9248554913294798, "grad_norm": 0.5231759747194448, "learning_rate": 1.714684562598545e-09, "loss": 0.5462931990623474, "step": 1012, "token_acc": 0.8166555934189188 }, { "epoch": 2.9277456647398843, "grad_norm": 0.511178905264401, "learning_rate": 1.5853906741392086e-09, "loss": 0.48754703998565674, "step": 1013, "token_acc": 0.8340968562927913 }, { "epoch": 2.9306358381502893, "grad_norm": 0.49209363879670576, "learning_rate": 1.4611566201785386e-09, "loss": 0.6072345972061157, "step": 1014, "token_acc": 0.796086135633005 }, { "epoch": 2.9335260115606934, "grad_norm": 0.5468806874394325, "learning_rate": 1.3419836619229519e-09, "loss": 0.5350404381752014, "step": 1015, "token_acc": 0.8205611421851678 }, { "epoch": 2.9364161849710984, "grad_norm": 0.5545661554638134, "learning_rate": 1.227873009199465e-09, "loss": 0.48873502016067505, "step": 1016, "token_acc": 0.8335308101581073 }, { "epoch": 2.939306358381503, "grad_norm": 0.6117033520146128, "learning_rate": 1.1188258204433144e-09, "loss": 0.5223637819290161, "step": 1017, "token_acc": 0.8220580971784899 }, { "epoch": 2.9421965317919074, "grad_norm": 0.5990530756110558, "learning_rate": 1.0148432026860775e-09, "loss": 0.5375405550003052, "step": 1018, "token_acc": 0.8204211966851669 }, { "epoch": 2.9450867052023124, "grad_norm": 0.5179575810720268, "learning_rate": 9.159262115445709e-10, "loss": 0.5529065132141113, "step": 1019, "token_acc": 0.8146867269147271 }, { "epoch": 2.9479768786127165, "grad_norm": 0.4852204771957678, "learning_rate": 8.220758512100246e-10, "loss": 0.5473994016647339, "step": 1020, "token_acc": 0.8154385812017952 }, { "epoch": 2.9508670520231215, "grad_norm": 0.5869353604242789, "learning_rate": 7.332930744380905e-10, "loss": 0.5176626443862915, "step": 1021, "token_acc": 0.8273430939731791 }, { "epoch": 2.953757225433526, "grad_norm": 0.5602528809896415, "learning_rate": 6.49578782538851e-10, "loss": 0.5115993618965149, "step": 1022, "token_acc": 0.8288524482039359 }, { "epoch": 2.9566473988439306, "grad_norm": 0.5342085317349031, "learning_rate": 5.709338253679363e-10, "loss": 0.5524012446403503, "step": 1023, "token_acc": 0.8131655170976683 }, { "epoch": 2.959537572254335, "grad_norm": 0.5776521748726285, "learning_rate": 4.973590013178652e-10, "loss": 0.5437720417976379, "step": 1024, "token_acc": 0.8181899648876977 }, { "epoch": 2.9624277456647397, "grad_norm": 0.5915883065627155, "learning_rate": 4.288550573098293e-10, "loss": 0.5497083067893982, "step": 1025, "token_acc": 0.8166504174699635 }, { "epoch": 2.9653179190751446, "grad_norm": 0.519862153616305, "learning_rate": 3.6542268878608785e-10, "loss": 0.5397800207138062, "step": 1026, "token_acc": 0.8185784280824216 }, { "epoch": 2.968208092485549, "grad_norm": 0.6328021139986955, "learning_rate": 3.070625397031401e-10, "loss": 0.5588440299034119, "step": 1027, "token_acc": 0.8125476802049286 }, { "epoch": 2.9710982658959537, "grad_norm": 0.5575020860016229, "learning_rate": 2.537752025249529e-10, "loss": 0.5562065839767456, "step": 1028, "token_acc": 0.8104220354019687 }, { "epoch": 2.9739884393063583, "grad_norm": 0.5378061802083338, "learning_rate": 2.0556121821696527e-10, "loss": 0.5177541971206665, "step": 1029, "token_acc": 0.8242314812400594 }, { "epoch": 2.976878612716763, "grad_norm": 0.5832757184904683, "learning_rate": 1.6242107624070412e-10, "loss": 0.49845069646835327, "step": 1030, "token_acc": 0.8330388762567243 }, { "epoch": 2.979768786127168, "grad_norm": 0.7982615431706986, "learning_rate": 1.2435521454884358e-10, "loss": 0.5247231125831604, "step": 1031, "token_acc": 0.823871938586352 }, { "epoch": 2.9826589595375723, "grad_norm": 0.5127749961245016, "learning_rate": 9.136401958059759e-11, "loss": 0.5525383353233337, "step": 1032, "token_acc": 0.8136602187346615 }, { "epoch": 2.985549132947977, "grad_norm": 0.542665341113767, "learning_rate": 6.34478262578897e-11, "loss": 0.5264041423797607, "step": 1033, "token_acc": 0.8259248289322793 }, { "epoch": 2.9884393063583814, "grad_norm": 0.5981387552317852, "learning_rate": 4.0606917981966804e-11, "loss": 0.5639816522598267, "step": 1034, "token_acc": 0.811261064452967 }, { "epoch": 2.991329479768786, "grad_norm": 0.5182263398780822, "learning_rate": 2.2841526630512642e-11, "loss": 0.5699348449707031, "step": 1035, "token_acc": 0.8084916570295722 }, { "epoch": 2.994219653179191, "grad_norm": 0.48173987479445357, "learning_rate": 1.0151832555205242e-11, "loss": 0.5670179128646851, "step": 1036, "token_acc": 0.8119991095280499 }, { "epoch": 2.9971098265895955, "grad_norm": 0.5532608077856682, "learning_rate": 2.5379645800516215e-12, "loss": 0.5611600875854492, "step": 1037, "token_acc": 0.8147770004529734 }, { "epoch": 3.0, "grad_norm": 0.5148238785537761, "learning_rate": 0.0, "loss": 0.5508678555488586, "step": 1038, "token_acc": 0.8153577131547579 }, { "epoch": 3.0, "eval_loss": 0.5740059018135071, "eval_runtime": 69.9798, "eval_samples_per_second": 1.572, "eval_steps_per_second": 0.2, "eval_token_acc": 0.808306147135369, "step": 1038 } ], "logging_steps": 1, "max_steps": 1038, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1140072026079232.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }