| { |
| "best_global_step": 1038, |
| "best_metric": 0.5740059, |
| "best_model_checkpoint": "/mnt/gpfs/shenyujiong/output/qwen3-vl-8b-int-sft-merged-nv5592-third3000-full-3epoch/v0-20251226-140741/checkpoint-1038", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1038, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002890173410404624, |
| "grad_norm": 6.073309605336921, |
| "learning_rate": 1.923076923076923e-08, |
| "loss": 0.8852723240852356, |
| "step": 1, |
| "token_acc": 0.7513407750453963 |
| }, |
| { |
| "epoch": 0.005780346820809248, |
| "grad_norm": 5.632770918536085, |
| "learning_rate": 3.846153846153846e-08, |
| "loss": 0.8229959607124329, |
| "step": 2, |
| "token_acc": 0.7648557050426209 |
| }, |
| { |
| "epoch": 0.008670520231213872, |
| "grad_norm": 5.550843708913173, |
| "learning_rate": 5.7692307692307695e-08, |
| "loss": 0.8395601511001587, |
| "step": 3, |
| "token_acc": 0.7611515500814708 |
| }, |
| { |
| "epoch": 0.011560693641618497, |
| "grad_norm": 5.463688271600264, |
| "learning_rate": 7.692307692307692e-08, |
| "loss": 0.8262450695037842, |
| "step": 4, |
| "token_acc": 0.7617775757231346 |
| }, |
| { |
| "epoch": 0.014450867052023121, |
| "grad_norm": 5.208733348546384, |
| "learning_rate": 9.615384615384616e-08, |
| "loss": 0.7870609760284424, |
| "step": 5, |
| "token_acc": 0.7738227378472486 |
| }, |
| { |
| "epoch": 0.017341040462427744, |
| "grad_norm": 6.094089600000965, |
| "learning_rate": 1.1538461538461539e-07, |
| "loss": 0.890167236328125, |
| "step": 6, |
| "token_acc": 0.7463134620800402 |
| }, |
| { |
| "epoch": 0.02023121387283237, |
| "grad_norm": 5.511558073866942, |
| "learning_rate": 1.346153846153846e-07, |
| "loss": 0.8200665712356567, |
| "step": 7, |
| "token_acc": 0.7655801718674399 |
| }, |
| { |
| "epoch": 0.023121387283236993, |
| "grad_norm": 5.840135867020467, |
| "learning_rate": 1.5384615384615385e-07, |
| "loss": 0.8561823964118958, |
| "step": 8, |
| "token_acc": 0.7551989061787877 |
| }, |
| { |
| "epoch": 0.02601156069364162, |
| "grad_norm": 4.93074237263625, |
| "learning_rate": 1.7307692307692305e-07, |
| "loss": 0.7908620834350586, |
| "step": 9, |
| "token_acc": 0.7736331966727492 |
| }, |
| { |
| "epoch": 0.028901734104046242, |
| "grad_norm": 5.513250434452228, |
| "learning_rate": 1.9230769230769231e-07, |
| "loss": 0.8536443710327148, |
| "step": 10, |
| "token_acc": 0.7537275655775426 |
| }, |
| { |
| "epoch": 0.031791907514450865, |
| "grad_norm": 5.6890026898261254, |
| "learning_rate": 2.1153846153846152e-07, |
| "loss": 0.8860396146774292, |
| "step": 11, |
| "token_acc": 0.7444433233394834 |
| }, |
| { |
| "epoch": 0.03468208092485549, |
| "grad_norm": 5.204460891865508, |
| "learning_rate": 2.3076923076923078e-07, |
| "loss": 0.8523805141448975, |
| "step": 12, |
| "token_acc": 0.7516541745600307 |
| }, |
| { |
| "epoch": 0.03757225433526012, |
| "grad_norm": 5.727537830602335, |
| "learning_rate": 2.5e-07, |
| "loss": 0.8715107440948486, |
| "step": 13, |
| "token_acc": 0.7483992966857977 |
| }, |
| { |
| "epoch": 0.04046242774566474, |
| "grad_norm": 5.573759954820184, |
| "learning_rate": 2.692307692307692e-07, |
| "loss": 0.8587294220924377, |
| "step": 14, |
| "token_acc": 0.752293881658215 |
| }, |
| { |
| "epoch": 0.04335260115606936, |
| "grad_norm": 5.626217493866761, |
| "learning_rate": 2.884615384615384e-07, |
| "loss": 0.8353704810142517, |
| "step": 15, |
| "token_acc": 0.7603716874100415 |
| }, |
| { |
| "epoch": 0.046242774566473986, |
| "grad_norm": 5.780174641621012, |
| "learning_rate": 3.076923076923077e-07, |
| "loss": 0.8726707100868225, |
| "step": 16, |
| "token_acc": 0.750940308255944 |
| }, |
| { |
| "epoch": 0.049132947976878616, |
| "grad_norm": 4.3328681597964875, |
| "learning_rate": 3.269230769230769e-07, |
| "loss": 0.718013346195221, |
| "step": 17, |
| "token_acc": 0.7931623195891079 |
| }, |
| { |
| "epoch": 0.05202312138728324, |
| "grad_norm": 5.47302287757926, |
| "learning_rate": 3.461538461538461e-07, |
| "loss": 0.8578764200210571, |
| "step": 18, |
| "token_acc": 0.7521628365412952 |
| }, |
| { |
| "epoch": 0.05491329479768786, |
| "grad_norm": 5.003969625540578, |
| "learning_rate": 3.6538461538461534e-07, |
| "loss": 0.8133180737495422, |
| "step": 19, |
| "token_acc": 0.7619723575896223 |
| }, |
| { |
| "epoch": 0.057803468208092484, |
| "grad_norm": 5.6946171227062115, |
| "learning_rate": 3.8461538461538463e-07, |
| "loss": 0.8691498041152954, |
| "step": 20, |
| "token_acc": 0.7492446732183174 |
| }, |
| { |
| "epoch": 0.06069364161849711, |
| "grad_norm": 5.520197357593707, |
| "learning_rate": 4.0384615384615386e-07, |
| "loss": 0.907565712928772, |
| "step": 21, |
| "token_acc": 0.739601049536876 |
| }, |
| { |
| "epoch": 0.06358381502890173, |
| "grad_norm": 4.583439446754697, |
| "learning_rate": 4.2307692307692304e-07, |
| "loss": 0.8114128708839417, |
| "step": 22, |
| "token_acc": 0.7639052404881551 |
| }, |
| { |
| "epoch": 0.06647398843930635, |
| "grad_norm": 4.920313367321747, |
| "learning_rate": 4.423076923076923e-07, |
| "loss": 0.8422179222106934, |
| "step": 23, |
| "token_acc": 0.7567072154640894 |
| }, |
| { |
| "epoch": 0.06936416184971098, |
| "grad_norm": 5.263032949222765, |
| "learning_rate": 4.6153846153846156e-07, |
| "loss": 0.8715439438819885, |
| "step": 24, |
| "token_acc": 0.7464963254144591 |
| }, |
| { |
| "epoch": 0.07225433526011561, |
| "grad_norm": 4.870068302475069, |
| "learning_rate": 4.807692307692307e-07, |
| "loss": 0.8316457271575928, |
| "step": 25, |
| "token_acc": 0.7582423573346417 |
| }, |
| { |
| "epoch": 0.07514450867052024, |
| "grad_norm": 4.199216776916685, |
| "learning_rate": 5e-07, |
| "loss": 0.7344825267791748, |
| "step": 26, |
| "token_acc": 0.7839733369517283 |
| }, |
| { |
| "epoch": 0.07803468208092486, |
| "grad_norm": 4.588333481721223, |
| "learning_rate": 5.192307692307692e-07, |
| "loss": 0.8012775182723999, |
| "step": 27, |
| "token_acc": 0.767028959599571 |
| }, |
| { |
| "epoch": 0.08092485549132948, |
| "grad_norm": 4.194674553902997, |
| "learning_rate": 5.384615384615384e-07, |
| "loss": 0.712963879108429, |
| "step": 28, |
| "token_acc": 0.7923740483107238 |
| }, |
| { |
| "epoch": 0.0838150289017341, |
| "grad_norm": 4.053747357354017, |
| "learning_rate": 5.576923076923077e-07, |
| "loss": 0.7496437430381775, |
| "step": 29, |
| "token_acc": 0.7814042116577906 |
| }, |
| { |
| "epoch": 0.08670520231213873, |
| "grad_norm": 3.718069447981091, |
| "learning_rate": 5.769230769230768e-07, |
| "loss": 0.7818017601966858, |
| "step": 30, |
| "token_acc": 0.7700440596977877 |
| }, |
| { |
| "epoch": 0.08959537572254335, |
| "grad_norm": 3.420080175405301, |
| "learning_rate": 5.961538461538461e-07, |
| "loss": 0.7861907482147217, |
| "step": 31, |
| "token_acc": 0.7627405151738911 |
| }, |
| { |
| "epoch": 0.09248554913294797, |
| "grad_norm": 2.639800184791621, |
| "learning_rate": 6.153846153846154e-07, |
| "loss": 0.6684123277664185, |
| "step": 32, |
| "token_acc": 0.7977043354655295 |
| }, |
| { |
| "epoch": 0.0953757225433526, |
| "grad_norm": 2.9502697501210413, |
| "learning_rate": 6.346153846153845e-07, |
| "loss": 0.7446445226669312, |
| "step": 33, |
| "token_acc": 0.771793289625916 |
| }, |
| { |
| "epoch": 0.09826589595375723, |
| "grad_norm": 2.8110101894954345, |
| "learning_rate": 6.538461538461538e-07, |
| "loss": 0.7382901906967163, |
| "step": 34, |
| "token_acc": 0.7770551133606955 |
| }, |
| { |
| "epoch": 0.10115606936416185, |
| "grad_norm": 2.9797000830123226, |
| "learning_rate": 6.730769230769231e-07, |
| "loss": 0.7384837865829468, |
| "step": 35, |
| "token_acc": 0.7742859974561853 |
| }, |
| { |
| "epoch": 0.10404624277456648, |
| "grad_norm": 2.7709890477908177, |
| "learning_rate": 6.923076923076922e-07, |
| "loss": 0.7289628982543945, |
| "step": 36, |
| "token_acc": 0.7765123239561783 |
| }, |
| { |
| "epoch": 0.1069364161849711, |
| "grad_norm": 2.59015685758215, |
| "learning_rate": 7.115384615384616e-07, |
| "loss": 0.7290064096450806, |
| "step": 37, |
| "token_acc": 0.7784733624454149 |
| }, |
| { |
| "epoch": 0.10982658959537572, |
| "grad_norm": 2.8646835764259233, |
| "learning_rate": 7.307692307692307e-07, |
| "loss": 0.7594764828681946, |
| "step": 38, |
| "token_acc": 0.7671359481427088 |
| }, |
| { |
| "epoch": 0.11271676300578035, |
| "grad_norm": 2.349168631790223, |
| "learning_rate": 7.5e-07, |
| "loss": 0.72218257188797, |
| "step": 39, |
| "token_acc": 0.7804759091596026 |
| }, |
| { |
| "epoch": 0.11560693641618497, |
| "grad_norm": 2.511985129172397, |
| "learning_rate": 7.692307692307693e-07, |
| "loss": 0.7277328968048096, |
| "step": 40, |
| "token_acc": 0.7780124249072232 |
| }, |
| { |
| "epoch": 0.11849710982658959, |
| "grad_norm": 2.5792884120122235, |
| "learning_rate": 7.884615384615384e-07, |
| "loss": 0.7460165619850159, |
| "step": 41, |
| "token_acc": 0.7733394615523893 |
| }, |
| { |
| "epoch": 0.12138728323699421, |
| "grad_norm": 1.5451971118538999, |
| "learning_rate": 8.076923076923077e-07, |
| "loss": 0.7386133670806885, |
| "step": 42, |
| "token_acc": 0.772020024353944 |
| }, |
| { |
| "epoch": 0.12427745664739884, |
| "grad_norm": 1.3982437840218045, |
| "learning_rate": 8.269230769230768e-07, |
| "loss": 0.7192668914794922, |
| "step": 43, |
| "token_acc": 0.7744656594039339 |
| }, |
| { |
| "epoch": 0.12716763005780346, |
| "grad_norm": 1.4772019806138394, |
| "learning_rate": 8.461538461538461e-07, |
| "loss": 0.6977580189704895, |
| "step": 44, |
| "token_acc": 0.7803848372212253 |
| }, |
| { |
| "epoch": 0.13005780346820808, |
| "grad_norm": 1.426662829341362, |
| "learning_rate": 8.653846153846154e-07, |
| "loss": 0.6999402642250061, |
| "step": 45, |
| "token_acc": 0.778780228821366 |
| }, |
| { |
| "epoch": 0.1329479768786127, |
| "grad_norm": 1.4168889938692493, |
| "learning_rate": 8.846153846153846e-07, |
| "loss": 0.7392410635948181, |
| "step": 46, |
| "token_acc": 0.7691270558007607 |
| }, |
| { |
| "epoch": 0.13583815028901733, |
| "grad_norm": 1.4711907038839338, |
| "learning_rate": 9.038461538461538e-07, |
| "loss": 0.7351399660110474, |
| "step": 47, |
| "token_acc": 0.7670848343481196 |
| }, |
| { |
| "epoch": 0.13872832369942195, |
| "grad_norm": 1.2965845227191142, |
| "learning_rate": 9.230769230769231e-07, |
| "loss": 0.7003874778747559, |
| "step": 48, |
| "token_acc": 0.7787950748052811 |
| }, |
| { |
| "epoch": 0.1416184971098266, |
| "grad_norm": 1.292104981035939, |
| "learning_rate": 9.423076923076923e-07, |
| "loss": 0.7326341867446899, |
| "step": 49, |
| "token_acc": 0.7685059219819624 |
| }, |
| { |
| "epoch": 0.14450867052023122, |
| "grad_norm": 1.2291132980421766, |
| "learning_rate": 9.615384615384615e-07, |
| "loss": 0.6871765851974487, |
| "step": 50, |
| "token_acc": 0.7841781074662453 |
| }, |
| { |
| "epoch": 0.14739884393063585, |
| "grad_norm": 1.123170268506369, |
| "learning_rate": 9.807692307692306e-07, |
| "loss": 0.6960352659225464, |
| "step": 51, |
| "token_acc": 0.7801758979708864 |
| }, |
| { |
| "epoch": 0.15028901734104047, |
| "grad_norm": 1.00691295990528, |
| "learning_rate": 1e-06, |
| "loss": 0.6956222653388977, |
| "step": 52, |
| "token_acc": 0.7829201628190622 |
| }, |
| { |
| "epoch": 0.1531791907514451, |
| "grad_norm": 0.9370942178938112, |
| "learning_rate": 9.999974620354198e-07, |
| "loss": 0.6958713531494141, |
| "step": 53, |
| "token_acc": 0.7809317408675194 |
| }, |
| { |
| "epoch": 0.15606936416184972, |
| "grad_norm": 1.1057401423493767, |
| "learning_rate": 9.999898481674446e-07, |
| "loss": 0.7062472105026245, |
| "step": 54, |
| "token_acc": 0.7756643140884724 |
| }, |
| { |
| "epoch": 0.15895953757225434, |
| "grad_norm": 0.8619542832761329, |
| "learning_rate": 9.999771584733693e-07, |
| "loss": 0.6577130556106567, |
| "step": 55, |
| "token_acc": 0.7922278867707445 |
| }, |
| { |
| "epoch": 0.16184971098265896, |
| "grad_norm": 0.9166807116221914, |
| "learning_rate": 9.999593930820181e-07, |
| "loss": 0.6945655941963196, |
| "step": 56, |
| "token_acc": 0.77725851438142 |
| }, |
| { |
| "epoch": 0.16473988439306358, |
| "grad_norm": 0.939862155697591, |
| "learning_rate": 9.999365521737421e-07, |
| "loss": 0.6921431422233582, |
| "step": 57, |
| "token_acc": 0.7773106126184057 |
| }, |
| { |
| "epoch": 0.1676300578034682, |
| "grad_norm": 0.9756834016584089, |
| "learning_rate": 9.999086359804195e-07, |
| "loss": 0.7256878018379211, |
| "step": 58, |
| "token_acc": 0.7686141412007078 |
| }, |
| { |
| "epoch": 0.17052023121387283, |
| "grad_norm": 0.8557348808489443, |
| "learning_rate": 9.99875644785451e-07, |
| "loss": 0.6813135147094727, |
| "step": 59, |
| "token_acc": 0.7843321803650282 |
| }, |
| { |
| "epoch": 0.17341040462427745, |
| "grad_norm": 0.8266352802865822, |
| "learning_rate": 9.998375789237592e-07, |
| "loss": 0.6513127088546753, |
| "step": 60, |
| "token_acc": 0.7914724403689247 |
| }, |
| { |
| "epoch": 0.17630057803468208, |
| "grad_norm": 0.8497866635296994, |
| "learning_rate": 9.99794438781783e-07, |
| "loss": 0.6605720520019531, |
| "step": 61, |
| "token_acc": 0.78915683493063 |
| }, |
| { |
| "epoch": 0.1791907514450867, |
| "grad_norm": 0.8351298607584619, |
| "learning_rate": 9.99746224797475e-07, |
| "loss": 0.6266233325004578, |
| "step": 62, |
| "token_acc": 0.7963586246917163 |
| }, |
| { |
| "epoch": 0.18208092485549132, |
| "grad_norm": 0.9019491097127296, |
| "learning_rate": 9.996929374602968e-07, |
| "loss": 0.6673212647438049, |
| "step": 63, |
| "token_acc": 0.7844323603274962 |
| }, |
| { |
| "epoch": 0.18497109826589594, |
| "grad_norm": 0.8813921264261143, |
| "learning_rate": 9.996345773112138e-07, |
| "loss": 0.7036587595939636, |
| "step": 64, |
| "token_acc": 0.7740703997187025 |
| }, |
| { |
| "epoch": 0.18786127167630057, |
| "grad_norm": 0.8869002415681166, |
| "learning_rate": 9.995711449426901e-07, |
| "loss": 0.6981368064880371, |
| "step": 65, |
| "token_acc": 0.7753412151954072 |
| }, |
| { |
| "epoch": 0.1907514450867052, |
| "grad_norm": 0.7752119383387671, |
| "learning_rate": 9.99502640998682e-07, |
| "loss": 0.6600744724273682, |
| "step": 66, |
| "token_acc": 0.788013646851561 |
| }, |
| { |
| "epoch": 0.1936416184971098, |
| "grad_norm": 0.8616071421748983, |
| "learning_rate": 9.99429066174632e-07, |
| "loss": 0.6547806262969971, |
| "step": 67, |
| "token_acc": 0.7894853017554794 |
| }, |
| { |
| "epoch": 0.19653179190751446, |
| "grad_norm": 0.8018562843868764, |
| "learning_rate": 9.993504212174613e-07, |
| "loss": 0.6278072595596313, |
| "step": 68, |
| "token_acc": 0.7972202882855006 |
| }, |
| { |
| "epoch": 0.1994219653179191, |
| "grad_norm": 0.7473736558335493, |
| "learning_rate": 9.992667069255618e-07, |
| "loss": 0.6237850785255432, |
| "step": 69, |
| "token_acc": 0.7982735792533637 |
| }, |
| { |
| "epoch": 0.2023121387283237, |
| "grad_norm": 0.6999587458869299, |
| "learning_rate": 9.991779241487899e-07, |
| "loss": 0.6401976346969604, |
| "step": 70, |
| "token_acc": 0.7928364264997928 |
| }, |
| { |
| "epoch": 0.20520231213872833, |
| "grad_norm": 0.6924984079683673, |
| "learning_rate": 9.990840737884554e-07, |
| "loss": 0.6805769205093384, |
| "step": 71, |
| "token_acc": 0.7801177818172763 |
| }, |
| { |
| "epoch": 0.20809248554913296, |
| "grad_norm": 0.7111004746445246, |
| "learning_rate": 9.989851567973138e-07, |
| "loss": 0.697790801525116, |
| "step": 72, |
| "token_acc": 0.7760267430754537 |
| }, |
| { |
| "epoch": 0.21098265895953758, |
| "grad_norm": 0.6869871346194354, |
| "learning_rate": 9.988811741795566e-07, |
| "loss": 0.6186888217926025, |
| "step": 73, |
| "token_acc": 0.7994626021789282 |
| }, |
| { |
| "epoch": 0.2138728323699422, |
| "grad_norm": 0.6177183453130074, |
| "learning_rate": 9.987721269908005e-07, |
| "loss": 0.5868158340454102, |
| "step": 74, |
| "token_acc": 0.8114196656276566 |
| }, |
| { |
| "epoch": 0.21676300578034682, |
| "grad_norm": 0.6307801092890282, |
| "learning_rate": 9.98658016338077e-07, |
| "loss": 0.6723257303237915, |
| "step": 75, |
| "token_acc": 0.7827200467097494 |
| }, |
| { |
| "epoch": 0.21965317919075145, |
| "grad_norm": 0.6150476355618669, |
| "learning_rate": 9.985388433798215e-07, |
| "loss": 0.6530448198318481, |
| "step": 76, |
| "token_acc": 0.7907922080887895 |
| }, |
| { |
| "epoch": 0.22254335260115607, |
| "grad_norm": 0.5940300278296939, |
| "learning_rate": 9.984146093258608e-07, |
| "loss": 0.6855973601341248, |
| "step": 77, |
| "token_acc": 0.7784828714678302 |
| }, |
| { |
| "epoch": 0.2254335260115607, |
| "grad_norm": 0.9497443806056196, |
| "learning_rate": 9.982853154374013e-07, |
| "loss": 0.6745576858520508, |
| "step": 78, |
| "token_acc": 0.7854156213413614 |
| }, |
| { |
| "epoch": 0.22832369942196531, |
| "grad_norm": 0.6791196750467849, |
| "learning_rate": 9.981509630270167e-07, |
| "loss": 0.6383039951324463, |
| "step": 79, |
| "token_acc": 0.7940166430627679 |
| }, |
| { |
| "epoch": 0.23121387283236994, |
| "grad_norm": 0.6194193913683183, |
| "learning_rate": 9.980115534586333e-07, |
| "loss": 0.6046701669692993, |
| "step": 80, |
| "token_acc": 0.8031263032947291 |
| }, |
| { |
| "epoch": 0.23410404624277456, |
| "grad_norm": 0.584941512318404, |
| "learning_rate": 9.978670881475172e-07, |
| "loss": 0.6113057136535645, |
| "step": 81, |
| "token_acc": 0.8002890249696458 |
| }, |
| { |
| "epoch": 0.23699421965317918, |
| "grad_norm": 0.576070429321087, |
| "learning_rate": 9.9771756856026e-07, |
| "loss": 0.6508547067642212, |
| "step": 82, |
| "token_acc": 0.7917054316809551 |
| }, |
| { |
| "epoch": 0.2398843930635838, |
| "grad_norm": 0.5782915674069733, |
| "learning_rate": 9.975629962147633e-07, |
| "loss": 0.6592724323272705, |
| "step": 83, |
| "token_acc": 0.7841273280945267 |
| }, |
| { |
| "epoch": 0.24277456647398843, |
| "grad_norm": 0.5894596908351907, |
| "learning_rate": 9.974033726802235e-07, |
| "loss": 0.5925013422966003, |
| "step": 84, |
| "token_acc": 0.8060771521769962 |
| }, |
| { |
| "epoch": 0.24566473988439305, |
| "grad_norm": 0.5279159216055382, |
| "learning_rate": 9.972386995771164e-07, |
| "loss": 0.6444322466850281, |
| "step": 85, |
| "token_acc": 0.7914691943127962 |
| }, |
| { |
| "epoch": 0.24855491329479767, |
| "grad_norm": 0.5809453095781784, |
| "learning_rate": 9.970689785771798e-07, |
| "loss": 0.6508707404136658, |
| "step": 86, |
| "token_acc": 0.7889469472867465 |
| }, |
| { |
| "epoch": 0.2514450867052023, |
| "grad_norm": 0.6715617527059413, |
| "learning_rate": 9.968942114033973e-07, |
| "loss": 0.5962953567504883, |
| "step": 87, |
| "token_acc": 0.8063397578524576 |
| }, |
| { |
| "epoch": 0.2543352601156069, |
| "grad_norm": 0.6155392081496504, |
| "learning_rate": 9.967143998299802e-07, |
| "loss": 0.6590582132339478, |
| "step": 88, |
| "token_acc": 0.786015653473848 |
| }, |
| { |
| "epoch": 0.25722543352601157, |
| "grad_norm": 0.6351340196244468, |
| "learning_rate": 9.965295456823507e-07, |
| "loss": 0.6178431510925293, |
| "step": 89, |
| "token_acc": 0.799615789600598 |
| }, |
| { |
| "epoch": 0.26011560693641617, |
| "grad_norm": 0.6389337976646079, |
| "learning_rate": 9.963396508371217e-07, |
| "loss": 0.6065088510513306, |
| "step": 90, |
| "token_acc": 0.8027006050850137 |
| }, |
| { |
| "epoch": 0.2630057803468208, |
| "grad_norm": 0.5682640638544528, |
| "learning_rate": 9.961447172220785e-07, |
| "loss": 0.6684330105781555, |
| "step": 91, |
| "token_acc": 0.7839487407338119 |
| }, |
| { |
| "epoch": 0.2658959537572254, |
| "grad_norm": 0.6029647051880634, |
| "learning_rate": 9.959447468161596e-07, |
| "loss": 0.6358112096786499, |
| "step": 92, |
| "token_acc": 0.7908192833685276 |
| }, |
| { |
| "epoch": 0.26878612716763006, |
| "grad_norm": 0.5632656008285092, |
| "learning_rate": 9.957397416494366e-07, |
| "loss": 0.6601473093032837, |
| "step": 93, |
| "token_acc": 0.7853722190438847 |
| }, |
| { |
| "epoch": 0.27167630057803466, |
| "grad_norm": 0.6013944385740286, |
| "learning_rate": 9.955297038030926e-07, |
| "loss": 0.668410062789917, |
| "step": 94, |
| "token_acc": 0.7828623747800797 |
| }, |
| { |
| "epoch": 0.2745664739884393, |
| "grad_norm": 0.5541440784608198, |
| "learning_rate": 9.95314635409402e-07, |
| "loss": 0.6117832660675049, |
| "step": 95, |
| "token_acc": 0.7995787198241185 |
| }, |
| { |
| "epoch": 0.2774566473988439, |
| "grad_norm": 0.6314740935897156, |
| "learning_rate": 9.95094538651709e-07, |
| "loss": 0.6261177062988281, |
| "step": 96, |
| "token_acc": 0.7962018726778723 |
| }, |
| { |
| "epoch": 0.28034682080924855, |
| "grad_norm": 0.7158918907846333, |
| "learning_rate": 9.948694157644042e-07, |
| "loss": 0.6556503772735596, |
| "step": 97, |
| "token_acc": 0.7869902468442614 |
| }, |
| { |
| "epoch": 0.2832369942196532, |
| "grad_norm": 0.5701552977234003, |
| "learning_rate": 9.946392690329036e-07, |
| "loss": 0.6187049746513367, |
| "step": 98, |
| "token_acc": 0.8010530865652874 |
| }, |
| { |
| "epoch": 0.2861271676300578, |
| "grad_norm": 0.5860362253461248, |
| "learning_rate": 9.944041007936244e-07, |
| "loss": 0.5410789847373962, |
| "step": 99, |
| "token_acc": 0.8207894360088595 |
| }, |
| { |
| "epoch": 0.28901734104046245, |
| "grad_norm": 0.6303808407906236, |
| "learning_rate": 9.941639134339606e-07, |
| "loss": 0.5768465399742126, |
| "step": 100, |
| "token_acc": 0.8087328873195813 |
| }, |
| { |
| "epoch": 0.29190751445086704, |
| "grad_norm": 0.616425173315349, |
| "learning_rate": 9.939187093922609e-07, |
| "loss": 0.6295806169509888, |
| "step": 101, |
| "token_acc": 0.7958193257384945 |
| }, |
| { |
| "epoch": 0.2947976878612717, |
| "grad_norm": 0.5753993917901922, |
| "learning_rate": 9.936684911578017e-07, |
| "loss": 0.5983704328536987, |
| "step": 102, |
| "token_acc": 0.8031383517086323 |
| }, |
| { |
| "epoch": 0.2976878612716763, |
| "grad_norm": 0.6140080800303133, |
| "learning_rate": 9.93413261270763e-07, |
| "loss": 0.5729444026947021, |
| "step": 103, |
| "token_acc": 0.816418031517547 |
| }, |
| { |
| "epoch": 0.30057803468208094, |
| "grad_norm": 0.5607455073068854, |
| "learning_rate": 9.931530223222026e-07, |
| "loss": 0.5967170596122742, |
| "step": 104, |
| "token_acc": 0.803475704051983 |
| }, |
| { |
| "epoch": 0.30346820809248554, |
| "grad_norm": 0.5675327028480304, |
| "learning_rate": 9.928877769540293e-07, |
| "loss": 0.6241474151611328, |
| "step": 105, |
| "token_acc": 0.7967706129971308 |
| }, |
| { |
| "epoch": 0.3063583815028902, |
| "grad_norm": 0.6046538978438704, |
| "learning_rate": 9.926175278589767e-07, |
| "loss": 0.6553393602371216, |
| "step": 106, |
| "token_acc": 0.7874527013411549 |
| }, |
| { |
| "epoch": 0.3092485549132948, |
| "grad_norm": 0.5734166676914433, |
| "learning_rate": 9.923422777805751e-07, |
| "loss": 0.6570492386817932, |
| "step": 107, |
| "token_acc": 0.7870601190355553 |
| }, |
| { |
| "epoch": 0.31213872832369943, |
| "grad_norm": 0.6001726322335739, |
| "learning_rate": 9.920620295131245e-07, |
| "loss": 0.6794227361679077, |
| "step": 108, |
| "token_acc": 0.7787853169709925 |
| }, |
| { |
| "epoch": 0.315028901734104, |
| "grad_norm": 0.6099760009068769, |
| "learning_rate": 9.917767859016654e-07, |
| "loss": 0.615708589553833, |
| "step": 109, |
| "token_acc": 0.7985643236886592 |
| }, |
| { |
| "epoch": 0.3179190751445087, |
| "grad_norm": 0.5778662206360861, |
| "learning_rate": 9.91486549841951e-07, |
| "loss": 0.5809392929077148, |
| "step": 110, |
| "token_acc": 0.8094654316503208 |
| }, |
| { |
| "epoch": 0.3208092485549133, |
| "grad_norm": 0.5704401870141648, |
| "learning_rate": 9.911913242804158e-07, |
| "loss": 0.6263046264648438, |
| "step": 111, |
| "token_acc": 0.7955055464485222 |
| }, |
| { |
| "epoch": 0.3236994219653179, |
| "grad_norm": 0.613652119648305, |
| "learning_rate": 9.908911122141486e-07, |
| "loss": 0.5810531377792358, |
| "step": 112, |
| "token_acc": 0.8122967000471536 |
| }, |
| { |
| "epoch": 0.3265895953757225, |
| "grad_norm": 0.5754148794590288, |
| "learning_rate": 9.905859166908594e-07, |
| "loss": 0.6450198888778687, |
| "step": 113, |
| "token_acc": 0.787714712471994 |
| }, |
| { |
| "epoch": 0.32947976878612717, |
| "grad_norm": 0.8102498152797749, |
| "learning_rate": 9.902757408088501e-07, |
| "loss": 0.6492223739624023, |
| "step": 114, |
| "token_acc": 0.7880358603802299 |
| }, |
| { |
| "epoch": 0.33236994219653176, |
| "grad_norm": 0.525946407195948, |
| "learning_rate": 9.899605877169824e-07, |
| "loss": 0.5984295606613159, |
| "step": 115, |
| "token_acc": 0.8024764689756009 |
| }, |
| { |
| "epoch": 0.3352601156069364, |
| "grad_norm": 0.5751169418426346, |
| "learning_rate": 9.896404606146455e-07, |
| "loss": 0.6295244097709656, |
| "step": 116, |
| "token_acc": 0.7922646493276646 |
| }, |
| { |
| "epoch": 0.33815028901734107, |
| "grad_norm": 0.5079153092397871, |
| "learning_rate": 9.893153627517248e-07, |
| "loss": 0.5976470112800598, |
| "step": 117, |
| "token_acc": 0.8038826857227929 |
| }, |
| { |
| "epoch": 0.34104046242774566, |
| "grad_norm": 0.5841459704013869, |
| "learning_rate": 9.889852974285672e-07, |
| "loss": 0.6472890973091125, |
| "step": 118, |
| "token_acc": 0.789158388689134 |
| }, |
| { |
| "epoch": 0.3439306358381503, |
| "grad_norm": 0.6150844233030651, |
| "learning_rate": 9.886502679959497e-07, |
| "loss": 0.5413444638252258, |
| "step": 119, |
| "token_acc": 0.8222654666342334 |
| }, |
| { |
| "epoch": 0.3468208092485549, |
| "grad_norm": 0.5935208615034318, |
| "learning_rate": 9.883102778550434e-07, |
| "loss": 0.663335919380188, |
| "step": 120, |
| "token_acc": 0.7862711064419373 |
| }, |
| { |
| "epoch": 0.34971098265895956, |
| "grad_norm": 0.6268736075123943, |
| "learning_rate": 9.879653304573797e-07, |
| "loss": 0.6072404384613037, |
| "step": 121, |
| "token_acc": 0.8010549723328334 |
| }, |
| { |
| "epoch": 0.35260115606936415, |
| "grad_norm": 0.5583642618257684, |
| "learning_rate": 9.876154293048163e-07, |
| "loss": 0.6144070029258728, |
| "step": 122, |
| "token_acc": 0.796381277924315 |
| }, |
| { |
| "epoch": 0.3554913294797688, |
| "grad_norm": 0.5410450297039057, |
| "learning_rate": 9.872605779494997e-07, |
| "loss": 0.5954463481903076, |
| "step": 123, |
| "token_acc": 0.8055216585201416 |
| }, |
| { |
| "epoch": 0.3583815028901734, |
| "grad_norm": 0.6425891449290073, |
| "learning_rate": 9.869007799938305e-07, |
| "loss": 0.6611199378967285, |
| "step": 124, |
| "token_acc": 0.786190934231093 |
| }, |
| { |
| "epoch": 0.36127167630057805, |
| "grad_norm": 0.5146021782369569, |
| "learning_rate": 9.865360390904269e-07, |
| "loss": 0.6081857085227966, |
| "step": 125, |
| "token_acc": 0.8017568952922327 |
| }, |
| { |
| "epoch": 0.36416184971098264, |
| "grad_norm": 0.5766433781688939, |
| "learning_rate": 9.86166358942087e-07, |
| "loss": 0.609286904335022, |
| "step": 126, |
| "token_acc": 0.8002619382070126 |
| }, |
| { |
| "epoch": 0.3670520231213873, |
| "grad_norm": 0.5450128204125277, |
| "learning_rate": 9.857917433017508e-07, |
| "loss": 0.5991868376731873, |
| "step": 127, |
| "token_acc": 0.8008499444919779 |
| }, |
| { |
| "epoch": 0.3699421965317919, |
| "grad_norm": 0.5810734133360594, |
| "learning_rate": 9.854121959724635e-07, |
| "loss": 0.607757568359375, |
| "step": 128, |
| "token_acc": 0.7998384333607254 |
| }, |
| { |
| "epoch": 0.37283236994219654, |
| "grad_norm": 0.5770182474218292, |
| "learning_rate": 9.85027720807336e-07, |
| "loss": 0.5918303728103638, |
| "step": 129, |
| "token_acc": 0.8040288846142103 |
| }, |
| { |
| "epoch": 0.37572254335260113, |
| "grad_norm": 0.5360179518405197, |
| "learning_rate": 9.846383217095051e-07, |
| "loss": 0.646679162979126, |
| "step": 130, |
| "token_acc": 0.7929178624953734 |
| }, |
| { |
| "epoch": 0.3786127167630058, |
| "grad_norm": 0.5278251178995469, |
| "learning_rate": 9.842440026320958e-07, |
| "loss": 0.6081724166870117, |
| "step": 131, |
| "token_acc": 0.7979095393804223 |
| }, |
| { |
| "epoch": 0.3815028901734104, |
| "grad_norm": 0.5857831669587502, |
| "learning_rate": 9.838447675781793e-07, |
| "loss": 0.5776185989379883, |
| "step": 132, |
| "token_acc": 0.8089180214756997 |
| }, |
| { |
| "epoch": 0.38439306358381503, |
| "grad_norm": 0.49786698791997097, |
| "learning_rate": 9.834406206007335e-07, |
| "loss": 0.6665687561035156, |
| "step": 133, |
| "token_acc": 0.7817376207568673 |
| }, |
| { |
| "epoch": 0.3872832369942196, |
| "grad_norm": 0.5272403389699103, |
| "learning_rate": 9.83031565802601e-07, |
| "loss": 0.607385516166687, |
| "step": 134, |
| "token_acc": 0.8027202321406094 |
| }, |
| { |
| "epoch": 0.3901734104046243, |
| "grad_norm": 0.5881996711071641, |
| "learning_rate": 9.826176073364482e-07, |
| "loss": 0.6304242014884949, |
| "step": 135, |
| "token_acc": 0.7967265117890893 |
| }, |
| { |
| "epoch": 0.3930635838150289, |
| "grad_norm": 0.5540108888142588, |
| "learning_rate": 9.821987494047228e-07, |
| "loss": 0.6314468383789062, |
| "step": 136, |
| "token_acc": 0.7919692387557874 |
| }, |
| { |
| "epoch": 0.3959537572254335, |
| "grad_norm": 0.5722154073047628, |
| "learning_rate": 9.817749962596114e-07, |
| "loss": 0.602054238319397, |
| "step": 137, |
| "token_acc": 0.802066245506265 |
| }, |
| { |
| "epoch": 0.3988439306358382, |
| "grad_norm": 0.5596376441219622, |
| "learning_rate": 9.813463522029957e-07, |
| "loss": 0.640647292137146, |
| "step": 138, |
| "token_acc": 0.7918518615352437 |
| }, |
| { |
| "epoch": 0.40173410404624277, |
| "grad_norm": 0.5545182797573466, |
| "learning_rate": 9.809128215864096e-07, |
| "loss": 0.6066859364509583, |
| "step": 139, |
| "token_acc": 0.801196721208976 |
| }, |
| { |
| "epoch": 0.4046242774566474, |
| "grad_norm": 0.5784484895204948, |
| "learning_rate": 9.804744088109941e-07, |
| "loss": 0.5408949851989746, |
| "step": 140, |
| "token_acc": 0.8248328121430766 |
| }, |
| { |
| "epoch": 0.407514450867052, |
| "grad_norm": 0.5637555298781167, |
| "learning_rate": 9.80031118327454e-07, |
| "loss": 0.6107698678970337, |
| "step": 141, |
| "token_acc": 0.7982127620772081 |
| }, |
| { |
| "epoch": 0.41040462427745666, |
| "grad_norm": 0.603110232763829, |
| "learning_rate": 9.795829546360113e-07, |
| "loss": 0.5912826061248779, |
| "step": 142, |
| "token_acc": 0.8041540066906055 |
| }, |
| { |
| "epoch": 0.41329479768786126, |
| "grad_norm": 0.5873555056914542, |
| "learning_rate": 9.791299222863602e-07, |
| "loss": 0.6161830425262451, |
| "step": 143, |
| "token_acc": 0.799708864508567 |
| }, |
| { |
| "epoch": 0.4161849710982659, |
| "grad_norm": 0.6843944560990027, |
| "learning_rate": 9.786720258776213e-07, |
| "loss": 0.5474255681037903, |
| "step": 144, |
| "token_acc": 0.8186930860033726 |
| }, |
| { |
| "epoch": 0.4190751445086705, |
| "grad_norm": 0.51545250769897, |
| "learning_rate": 9.782092700582936e-07, |
| "loss": 0.6216602325439453, |
| "step": 145, |
| "token_acc": 0.7965911940150556 |
| }, |
| { |
| "epoch": 0.42196531791907516, |
| "grad_norm": 0.5937549088482647, |
| "learning_rate": 9.77741659526209e-07, |
| "loss": 0.6248494386672974, |
| "step": 146, |
| "token_acc": 0.7956684720442111 |
| }, |
| { |
| "epoch": 0.42485549132947975, |
| "grad_norm": 0.5399979093459059, |
| "learning_rate": 9.77269199028483e-07, |
| "loss": 0.6089432239532471, |
| "step": 147, |
| "token_acc": 0.796826403459652 |
| }, |
| { |
| "epoch": 0.4277456647398844, |
| "grad_norm": 0.5564248028198713, |
| "learning_rate": 9.76791893361468e-07, |
| "loss": 0.6312023401260376, |
| "step": 148, |
| "token_acc": 0.7918012705466769 |
| }, |
| { |
| "epoch": 0.430635838150289, |
| "grad_norm": 0.559936805840691, |
| "learning_rate": 9.763097473707035e-07, |
| "loss": 0.619454026222229, |
| "step": 149, |
| "token_acc": 0.7984878886834271 |
| }, |
| { |
| "epoch": 0.43352601156069365, |
| "grad_norm": 0.6044059322614584, |
| "learning_rate": 9.758227659508668e-07, |
| "loss": 0.5221510529518127, |
| "step": 150, |
| "token_acc": 0.8266117865021535 |
| }, |
| { |
| "epoch": 0.43641618497109824, |
| "grad_norm": 0.5692770162596946, |
| "learning_rate": 9.753309540457248e-07, |
| "loss": 0.6139217615127563, |
| "step": 151, |
| "token_acc": 0.7982664696096701 |
| }, |
| { |
| "epoch": 0.4393063583815029, |
| "grad_norm": 0.5330985388783729, |
| "learning_rate": 9.748343166480822e-07, |
| "loss": 0.6154735088348389, |
| "step": 152, |
| "token_acc": 0.7984871546515382 |
| }, |
| { |
| "epoch": 0.4421965317919075, |
| "grad_norm": 0.6065632918781179, |
| "learning_rate": 9.743328587997314e-07, |
| "loss": 0.5449005365371704, |
| "step": 153, |
| "token_acc": 0.8221805561096261 |
| }, |
| { |
| "epoch": 0.44508670520231214, |
| "grad_norm": 0.6274255114547471, |
| "learning_rate": 9.738265855914012e-07, |
| "loss": 0.6112866401672363, |
| "step": 154, |
| "token_acc": 0.7997394616484714 |
| }, |
| { |
| "epoch": 0.4479768786127168, |
| "grad_norm": 0.6000527996102515, |
| "learning_rate": 9.733155021627057e-07, |
| "loss": 0.6302502155303955, |
| "step": 155, |
| "token_acc": 0.7939255615270142 |
| }, |
| { |
| "epoch": 0.4508670520231214, |
| "grad_norm": 0.5716424963426585, |
| "learning_rate": 9.727996137020916e-07, |
| "loss": 0.5589959621429443, |
| "step": 156, |
| "token_acc": 0.8167590708119868 |
| }, |
| { |
| "epoch": 0.45375722543352603, |
| "grad_norm": 0.5793130145184638, |
| "learning_rate": 9.722789254467854e-07, |
| "loss": 0.5811511874198914, |
| "step": 157, |
| "token_acc": 0.8068220017796527 |
| }, |
| { |
| "epoch": 0.45664739884393063, |
| "grad_norm": 0.6447386736666927, |
| "learning_rate": 9.717534426827404e-07, |
| "loss": 0.6125731468200684, |
| "step": 158, |
| "token_acc": 0.7982601354147698 |
| }, |
| { |
| "epoch": 0.4595375722543353, |
| "grad_norm": 0.5583551050757221, |
| "learning_rate": 9.712231707445831e-07, |
| "loss": 0.5681207180023193, |
| "step": 159, |
| "token_acc": 0.812138891502776 |
| }, |
| { |
| "epoch": 0.4624277456647399, |
| "grad_norm": 0.6227411154474924, |
| "learning_rate": 9.70688115015559e-07, |
| "loss": 0.5606650114059448, |
| "step": 160, |
| "token_acc": 0.8128119485280195 |
| }, |
| { |
| "epoch": 0.4653179190751445, |
| "grad_norm": 0.5637826519102942, |
| "learning_rate": 9.701482809274787e-07, |
| "loss": 0.584591269493103, |
| "step": 161, |
| "token_acc": 0.809975090499813 |
| }, |
| { |
| "epoch": 0.4682080924855491, |
| "grad_norm": 0.5527836562945804, |
| "learning_rate": 9.696036739606606e-07, |
| "loss": 0.6178029775619507, |
| "step": 162, |
| "token_acc": 0.7982424352237725 |
| }, |
| { |
| "epoch": 0.47109826589595377, |
| "grad_norm": 0.5261451706415371, |
| "learning_rate": 9.690542996438777e-07, |
| "loss": 0.5772680640220642, |
| "step": 163, |
| "token_acc": 0.8055154702213526 |
| }, |
| { |
| "epoch": 0.47398843930635837, |
| "grad_norm": 0.598598068991984, |
| "learning_rate": 9.685001635543005e-07, |
| "loss": 0.5761500597000122, |
| "step": 164, |
| "token_acc": 0.8095295422689632 |
| }, |
| { |
| "epoch": 0.476878612716763, |
| "grad_norm": 0.5603114991623558, |
| "learning_rate": 9.679412713174398e-07, |
| "loss": 0.6070771217346191, |
| "step": 165, |
| "token_acc": 0.7988323213451658 |
| }, |
| { |
| "epoch": 0.4797687861271676, |
| "grad_norm": 0.5909619017551228, |
| "learning_rate": 9.673776286070905e-07, |
| "loss": 0.5829952955245972, |
| "step": 166, |
| "token_acc": 0.8056856359399237 |
| }, |
| { |
| "epoch": 0.48265895953757226, |
| "grad_norm": 0.7664205949048083, |
| "learning_rate": 9.668092411452735e-07, |
| "loss": 0.591526985168457, |
| "step": 167, |
| "token_acc": 0.805959940764539 |
| }, |
| { |
| "epoch": 0.48554913294797686, |
| "grad_norm": 0.5816382553386844, |
| "learning_rate": 9.66236114702178e-07, |
| "loss": 0.6718764901161194, |
| "step": 168, |
| "token_acc": 0.7819054715177417 |
| }, |
| { |
| "epoch": 0.4884393063583815, |
| "grad_norm": 0.5443192837285905, |
| "learning_rate": 9.656582550961018e-07, |
| "loss": 0.5771794319152832, |
| "step": 169, |
| "token_acc": 0.8120637180483624 |
| }, |
| { |
| "epoch": 0.4913294797687861, |
| "grad_norm": 0.5439506241087468, |
| "learning_rate": 9.650756681933947e-07, |
| "loss": 0.5797525644302368, |
| "step": 170, |
| "token_acc": 0.8072481275670452 |
| }, |
| { |
| "epoch": 0.49421965317919075, |
| "grad_norm": 0.5750701292908912, |
| "learning_rate": 9.644883599083957e-07, |
| "loss": 0.616324782371521, |
| "step": 171, |
| "token_acc": 0.7961593487416124 |
| }, |
| { |
| "epoch": 0.49710982658959535, |
| "grad_norm": 0.5292422990653295, |
| "learning_rate": 9.638963362033756e-07, |
| "loss": 0.6252388954162598, |
| "step": 172, |
| "token_acc": 0.7945571248522018 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.519900156438812, |
| "learning_rate": 9.632996030884748e-07, |
| "loss": 0.6072378158569336, |
| "step": 173, |
| "token_acc": 0.7983872825711323 |
| }, |
| { |
| "epoch": 0.5028901734104047, |
| "grad_norm": 2.014285868322542, |
| "learning_rate": 9.626981666216439e-07, |
| "loss": 0.5167373418807983, |
| "step": 174, |
| "token_acc": 0.8304752994472689 |
| }, |
| { |
| "epoch": 0.5057803468208093, |
| "grad_norm": 0.6229356072638176, |
| "learning_rate": 9.620920329085802e-07, |
| "loss": 0.5613738894462585, |
| "step": 175, |
| "token_acc": 0.8164609282841512 |
| }, |
| { |
| "epoch": 0.5086705202312138, |
| "grad_norm": 0.6427491754173409, |
| "learning_rate": 9.614812081026678e-07, |
| "loss": 0.6089553236961365, |
| "step": 176, |
| "token_acc": 0.8013446815125724 |
| }, |
| { |
| "epoch": 0.5115606936416185, |
| "grad_norm": 0.4795382180524186, |
| "learning_rate": 9.608656984049132e-07, |
| "loss": 0.579177737236023, |
| "step": 177, |
| "token_acc": 0.806047379906923 |
| }, |
| { |
| "epoch": 0.5144508670520231, |
| "grad_norm": 0.5089663171794683, |
| "learning_rate": 9.602455100638835e-07, |
| "loss": 0.5813893675804138, |
| "step": 178, |
| "token_acc": 0.8087914556082915 |
| }, |
| { |
| "epoch": 0.5173410404624278, |
| "grad_norm": 0.6116010486180593, |
| "learning_rate": 9.596206493756432e-07, |
| "loss": 0.5549554824829102, |
| "step": 179, |
| "token_acc": 0.8173080502386111 |
| }, |
| { |
| "epoch": 0.5202312138728323, |
| "grad_norm": 0.4852226717563288, |
| "learning_rate": 9.589911226836895e-07, |
| "loss": 0.5808215737342834, |
| "step": 180, |
| "token_acc": 0.8052112098427888 |
| }, |
| { |
| "epoch": 0.523121387283237, |
| "grad_norm": 0.5270020853161572, |
| "learning_rate": 9.583569363788879e-07, |
| "loss": 0.6398844122886658, |
| "step": 181, |
| "token_acc": 0.7898708976833977 |
| }, |
| { |
| "epoch": 0.5260115606936416, |
| "grad_norm": 0.5073350335042175, |
| "learning_rate": 9.577180968994081e-07, |
| "loss": 0.6154753565788269, |
| "step": 182, |
| "token_acc": 0.7993068610377478 |
| }, |
| { |
| "epoch": 0.5289017341040463, |
| "grad_norm": 0.5631567506627345, |
| "learning_rate": 9.57074610730658e-07, |
| "loss": 0.5920361876487732, |
| "step": 183, |
| "token_acc": 0.8048126355828951 |
| }, |
| { |
| "epoch": 0.5317919075144508, |
| "grad_norm": 0.4995115799741094, |
| "learning_rate": 9.56426484405218e-07, |
| "loss": 0.5912809371948242, |
| "step": 184, |
| "token_acc": 0.8075411124942672 |
| }, |
| { |
| "epoch": 0.5346820809248555, |
| "grad_norm": 0.560250197890468, |
| "learning_rate": 9.557737245027746e-07, |
| "loss": 0.6125437021255493, |
| "step": 185, |
| "token_acc": 0.7972027972027972 |
| }, |
| { |
| "epoch": 0.5375722543352601, |
| "grad_norm": 0.5819218618969146, |
| "learning_rate": 9.551163376500542e-07, |
| "loss": 0.5732159614562988, |
| "step": 186, |
| "token_acc": 0.8115202124085258 |
| }, |
| { |
| "epoch": 0.5404624277456648, |
| "grad_norm": 0.6129732835255256, |
| "learning_rate": 9.544543305207546e-07, |
| "loss": 0.6079097986221313, |
| "step": 187, |
| "token_acc": 0.7997229197333102 |
| }, |
| { |
| "epoch": 0.5433526011560693, |
| "grad_norm": 0.5263001528585832, |
| "learning_rate": 9.537877098354784e-07, |
| "loss": 0.5925722718238831, |
| "step": 188, |
| "token_acc": 0.8029342210305924 |
| }, |
| { |
| "epoch": 0.546242774566474, |
| "grad_norm": 0.583594997315983, |
| "learning_rate": 9.531164823616646e-07, |
| "loss": 0.5865395069122314, |
| "step": 189, |
| "token_acc": 0.8063752604903651 |
| }, |
| { |
| "epoch": 0.5491329479768786, |
| "grad_norm": 0.5781895560822031, |
| "learning_rate": 9.524406549135193e-07, |
| "loss": 0.6117700338363647, |
| "step": 190, |
| "token_acc": 0.7980149336253496 |
| }, |
| { |
| "epoch": 0.5520231213872833, |
| "grad_norm": 0.4893230139872087, |
| "learning_rate": 9.517602343519471e-07, |
| "loss": 0.5652576684951782, |
| "step": 191, |
| "token_acc": 0.8107140229095636 |
| }, |
| { |
| "epoch": 0.5549132947976878, |
| "grad_norm": 0.5760419810427979, |
| "learning_rate": 9.510752275844809e-07, |
| "loss": 0.579891562461853, |
| "step": 192, |
| "token_acc": 0.805735200834105 |
| }, |
| { |
| "epoch": 0.5578034682080925, |
| "grad_norm": 0.5102671355626198, |
| "learning_rate": 9.503856415652125e-07, |
| "loss": 0.5964775681495667, |
| "step": 193, |
| "token_acc": 0.8034283288223744 |
| }, |
| { |
| "epoch": 0.5606936416184971, |
| "grad_norm": 0.4894002019430091, |
| "learning_rate": 9.496914832947214e-07, |
| "loss": 0.6064220666885376, |
| "step": 194, |
| "token_acc": 0.799232275930387 |
| }, |
| { |
| "epoch": 0.5635838150289018, |
| "grad_norm": 0.5939844831348525, |
| "learning_rate": 9.489927598200043e-07, |
| "loss": 0.6116449236869812, |
| "step": 195, |
| "token_acc": 0.797429447731885 |
| }, |
| { |
| "epoch": 0.5664739884393064, |
| "grad_norm": 0.4783949579372596, |
| "learning_rate": 9.482894782344024e-07, |
| "loss": 0.6082786321640015, |
| "step": 196, |
| "token_acc": 0.796939850416096 |
| }, |
| { |
| "epoch": 0.569364161849711, |
| "grad_norm": 0.5532830089434996, |
| "learning_rate": 9.475816456775312e-07, |
| "loss": 0.5998172760009766, |
| "step": 197, |
| "token_acc": 0.8034065270191963 |
| }, |
| { |
| "epoch": 0.5722543352601156, |
| "grad_norm": 0.5660410481873773, |
| "learning_rate": 9.468692693352062e-07, |
| "loss": 0.5715000629425049, |
| "step": 198, |
| "token_acc": 0.8105325892615268 |
| }, |
| { |
| "epoch": 0.5751445086705202, |
| "grad_norm": 0.5454360730485784, |
| "learning_rate": 9.461523564393714e-07, |
| "loss": 0.5121803283691406, |
| "step": 199, |
| "token_acc": 0.8285392705145792 |
| }, |
| { |
| "epoch": 0.5780346820809249, |
| "grad_norm": 0.5378535866046305, |
| "learning_rate": 9.454309142680246e-07, |
| "loss": 0.5945334434509277, |
| "step": 200, |
| "token_acc": 0.8058855053489177 |
| }, |
| { |
| "epoch": 0.5809248554913294, |
| "grad_norm": 0.569376306217556, |
| "learning_rate": 9.447049501451447e-07, |
| "loss": 0.5850614905357361, |
| "step": 201, |
| "token_acc": 0.8075420015918657 |
| }, |
| { |
| "epoch": 0.5838150289017341, |
| "grad_norm": 0.5596293780541032, |
| "learning_rate": 9.439744714406166e-07, |
| "loss": 0.5594047904014587, |
| "step": 202, |
| "token_acc": 0.8121667287250859 |
| }, |
| { |
| "epoch": 0.5867052023121387, |
| "grad_norm": 0.5138636330605458, |
| "learning_rate": 9.432394855701568e-07, |
| "loss": 0.5849941372871399, |
| "step": 203, |
| "token_acc": 0.8073615179939259 |
| }, |
| { |
| "epoch": 0.5895953757225434, |
| "grad_norm": 0.5804821715876541, |
| "learning_rate": 9.424999999952374e-07, |
| "loss": 0.5801274180412292, |
| "step": 204, |
| "token_acc": 0.8069783212978903 |
| }, |
| { |
| "epoch": 0.5924855491329479, |
| "grad_norm": 0.5724417549737069, |
| "learning_rate": 9.417560222230114e-07, |
| "loss": 0.549828827381134, |
| "step": 205, |
| "token_acc": 0.8177920383625401 |
| }, |
| { |
| "epoch": 0.5953757225433526, |
| "grad_norm": 0.5635873362301451, |
| "learning_rate": 9.410075598062357e-07, |
| "loss": 0.6004040241241455, |
| "step": 206, |
| "token_acc": 0.8004078427231751 |
| }, |
| { |
| "epoch": 0.5982658959537572, |
| "grad_norm": 0.5235901257461258, |
| "learning_rate": 9.402546203431947e-07, |
| "loss": 0.5270985960960388, |
| "step": 207, |
| "token_acc": 0.8231543624161074 |
| }, |
| { |
| "epoch": 0.6011560693641619, |
| "grad_norm": 0.5532559810628388, |
| "learning_rate": 9.394972114776229e-07, |
| "loss": 0.574277937412262, |
| "step": 208, |
| "token_acc": 0.8074010315538029 |
| }, |
| { |
| "epoch": 0.6040462427745664, |
| "grad_norm": 0.5812311718782175, |
| "learning_rate": 9.387353408986282e-07, |
| "loss": 0.595463216304779, |
| "step": 209, |
| "token_acc": 0.8024861291665605 |
| }, |
| { |
| "epoch": 0.6069364161849711, |
| "grad_norm": 0.5142938651985898, |
| "learning_rate": 9.379690163406128e-07, |
| "loss": 0.5852739214897156, |
| "step": 210, |
| "token_acc": 0.8058286827885552 |
| }, |
| { |
| "epoch": 0.6098265895953757, |
| "grad_norm": 0.5954842210532877, |
| "learning_rate": 9.371982455831946e-07, |
| "loss": 0.5914256572723389, |
| "step": 211, |
| "token_acc": 0.8022748583309552 |
| }, |
| { |
| "epoch": 0.6127167630057804, |
| "grad_norm": 0.5993748062356747, |
| "learning_rate": 9.364230364511295e-07, |
| "loss": 0.5815471410751343, |
| "step": 212, |
| "token_acc": 0.8078214734227942 |
| }, |
| { |
| "epoch": 0.615606936416185, |
| "grad_norm": 0.5946619701512068, |
| "learning_rate": 9.356433968142305e-07, |
| "loss": 0.5513661503791809, |
| "step": 213, |
| "token_acc": 0.8162251537633719 |
| }, |
| { |
| "epoch": 0.6184971098265896, |
| "grad_norm": 0.6203774782127278, |
| "learning_rate": 9.34859334587289e-07, |
| "loss": 0.5972813367843628, |
| "step": 214, |
| "token_acc": 0.8014712230836974 |
| }, |
| { |
| "epoch": 0.6213872832369942, |
| "grad_norm": 0.551145459721042, |
| "learning_rate": 9.340708577299936e-07, |
| "loss": 0.6008709669113159, |
| "step": 215, |
| "token_acc": 0.8010602678571429 |
| }, |
| { |
| "epoch": 0.6242774566473989, |
| "grad_norm": 0.5965436915708601, |
| "learning_rate": 9.332779742468495e-07, |
| "loss": 0.6075496673583984, |
| "step": 216, |
| "token_acc": 0.7974854091642866 |
| }, |
| { |
| "epoch": 0.6271676300578035, |
| "grad_norm": 0.5460165665763135, |
| "learning_rate": 9.324806921870975e-07, |
| "loss": 0.5693843364715576, |
| "step": 217, |
| "token_acc": 0.8103969870963759 |
| }, |
| { |
| "epoch": 0.630057803468208, |
| "grad_norm": 0.5966690969554563, |
| "learning_rate": 9.316790196446323e-07, |
| "loss": 0.5560802221298218, |
| "step": 218, |
| "token_acc": 0.8236988940183998 |
| }, |
| { |
| "epoch": 0.6329479768786127, |
| "grad_norm": 0.6560441235449157, |
| "learning_rate": 9.308729647579199e-07, |
| "loss": 0.5824184417724609, |
| "step": 219, |
| "token_acc": 0.8070714583452526 |
| }, |
| { |
| "epoch": 0.6358381502890174, |
| "grad_norm": 0.6006127755099283, |
| "learning_rate": 9.30062535709915e-07, |
| "loss": 0.6167861819267273, |
| "step": 220, |
| "token_acc": 0.796514221545372 |
| }, |
| { |
| "epoch": 0.638728323699422, |
| "grad_norm": 0.5570520813344141, |
| "learning_rate": 9.292477407279789e-07, |
| "loss": 0.6107242703437805, |
| "step": 221, |
| "token_acc": 0.7990834404515732 |
| }, |
| { |
| "epoch": 0.6416184971098265, |
| "grad_norm": 0.5419716560460497, |
| "learning_rate": 9.284285880837946e-07, |
| "loss": 0.5959486365318298, |
| "step": 222, |
| "token_acc": 0.8022954328356064 |
| }, |
| { |
| "epoch": 0.6445086705202312, |
| "grad_norm": 0.6657313771062484, |
| "learning_rate": 9.276050860932837e-07, |
| "loss": 0.5727354884147644, |
| "step": 223, |
| "token_acc": 0.8082750530162884 |
| }, |
| { |
| "epoch": 0.6473988439306358, |
| "grad_norm": 0.512607896262416, |
| "learning_rate": 9.267772431165218e-07, |
| "loss": 0.5810614228248596, |
| "step": 224, |
| "token_acc": 0.8100355584987692 |
| }, |
| { |
| "epoch": 0.6502890173410405, |
| "grad_norm": 0.5208342958049974, |
| "learning_rate": 9.259450675576535e-07, |
| "loss": 0.5924381017684937, |
| "step": 225, |
| "token_acc": 0.8029396939581946 |
| }, |
| { |
| "epoch": 0.653179190751445, |
| "grad_norm": 0.6880250488481687, |
| "learning_rate": 9.251085678648071e-07, |
| "loss": 0.6493653059005737, |
| "step": 226, |
| "token_acc": 0.7886282137800538 |
| }, |
| { |
| "epoch": 0.6560693641618497, |
| "grad_norm": 0.548308907840708, |
| "learning_rate": 9.242677525300088e-07, |
| "loss": 0.570950448513031, |
| "step": 227, |
| "token_acc": 0.810275809890639 |
| }, |
| { |
| "epoch": 0.6589595375722543, |
| "grad_norm": 0.5340467208226745, |
| "learning_rate": 9.234226300890972e-07, |
| "loss": 0.565179169178009, |
| "step": 228, |
| "token_acc": 0.8106098958194559 |
| }, |
| { |
| "epoch": 0.661849710982659, |
| "grad_norm": 0.5609587429682379, |
| "learning_rate": 9.225732091216354e-07, |
| "loss": 0.6229733824729919, |
| "step": 229, |
| "token_acc": 0.7947594792619757 |
| }, |
| { |
| "epoch": 0.6647398843930635, |
| "grad_norm": 0.640345970021987, |
| "learning_rate": 9.217194982508247e-07, |
| "loss": 0.556702196598053, |
| "step": 230, |
| "token_acc": 0.8141483516483516 |
| }, |
| { |
| "epoch": 0.6676300578034682, |
| "grad_norm": 0.551511374308891, |
| "learning_rate": 9.208615061434166e-07, |
| "loss": 0.6125736236572266, |
| "step": 231, |
| "token_acc": 0.7977603246777648 |
| }, |
| { |
| "epoch": 0.6705202312138728, |
| "grad_norm": 0.5163364555056573, |
| "learning_rate": 9.199992415096259e-07, |
| "loss": 0.5473246574401855, |
| "step": 232, |
| "token_acc": 0.8160722450845908 |
| }, |
| { |
| "epoch": 0.6734104046242775, |
| "grad_norm": 0.5669711665664704, |
| "learning_rate": 9.191327131030406e-07, |
| "loss": 0.543914794921875, |
| "step": 233, |
| "token_acc": 0.8196051836235239 |
| }, |
| { |
| "epoch": 0.6763005780346821, |
| "grad_norm": 0.5406802703932962, |
| "learning_rate": 9.182619297205347e-07, |
| "loss": 0.5660564303398132, |
| "step": 234, |
| "token_acc": 0.8103913761289696 |
| }, |
| { |
| "epoch": 0.6791907514450867, |
| "grad_norm": 0.556661118525528, |
| "learning_rate": 9.173869002021775e-07, |
| "loss": 0.6406779289245605, |
| "step": 235, |
| "token_acc": 0.7926350563544501 |
| }, |
| { |
| "epoch": 0.6820809248554913, |
| "grad_norm": 0.5201140983806046, |
| "learning_rate": 9.165076334311445e-07, |
| "loss": 0.6177135109901428, |
| "step": 236, |
| "token_acc": 0.7982128177119112 |
| }, |
| { |
| "epoch": 0.684971098265896, |
| "grad_norm": 0.5850116831250167, |
| "learning_rate": 9.156241383336278e-07, |
| "loss": 0.5401256680488586, |
| "step": 237, |
| "token_acc": 0.8215590591627244 |
| }, |
| { |
| "epoch": 0.6878612716763006, |
| "grad_norm": 0.6403194474900529, |
| "learning_rate": 9.147364238787443e-07, |
| "loss": 0.581301212310791, |
| "step": 238, |
| "token_acc": 0.8056872398548133 |
| }, |
| { |
| "epoch": 0.6907514450867052, |
| "grad_norm": 0.5674551611529516, |
| "learning_rate": 9.138444990784453e-07, |
| "loss": 0.6117105484008789, |
| "step": 239, |
| "token_acc": 0.7969433519630166 |
| }, |
| { |
| "epoch": 0.6936416184971098, |
| "grad_norm": 0.5668476584273359, |
| "learning_rate": 9.12948372987425e-07, |
| "loss": 0.6042872071266174, |
| "step": 240, |
| "token_acc": 0.8012008915710148 |
| }, |
| { |
| "epoch": 0.6965317919075145, |
| "grad_norm": 0.5372423597194518, |
| "learning_rate": 9.120480547030285e-07, |
| "loss": 0.5781703591346741, |
| "step": 241, |
| "token_acc": 0.8076352705410822 |
| }, |
| { |
| "epoch": 0.6994219653179191, |
| "grad_norm": 0.582884431687299, |
| "learning_rate": 9.111435533651595e-07, |
| "loss": 0.594234824180603, |
| "step": 242, |
| "token_acc": 0.8027408303103587 |
| }, |
| { |
| "epoch": 0.7023121387283237, |
| "grad_norm": 0.5468197379764062, |
| "learning_rate": 9.102348781561875e-07, |
| "loss": 0.537114143371582, |
| "step": 243, |
| "token_acc": 0.8224276312689462 |
| }, |
| { |
| "epoch": 0.7052023121387283, |
| "grad_norm": 0.5799094186562964, |
| "learning_rate": 9.093220383008544e-07, |
| "loss": 0.5844765901565552, |
| "step": 244, |
| "token_acc": 0.8037892679887568 |
| }, |
| { |
| "epoch": 0.708092485549133, |
| "grad_norm": 0.5735743433347377, |
| "learning_rate": 9.084050430661813e-07, |
| "loss": 0.6163278818130493, |
| "step": 245, |
| "token_acc": 0.7963933546643635 |
| }, |
| { |
| "epoch": 0.7109826589595376, |
| "grad_norm": 0.5675339701772788, |
| "learning_rate": 9.074839017613736e-07, |
| "loss": 0.5186026692390442, |
| "step": 246, |
| "token_acc": 0.8264138256627419 |
| }, |
| { |
| "epoch": 0.7138728323699421, |
| "grad_norm": 0.5682213760378196, |
| "learning_rate": 9.065586237377274e-07, |
| "loss": 0.5759379267692566, |
| "step": 247, |
| "token_acc": 0.8082834141978154 |
| }, |
| { |
| "epoch": 0.7167630057803468, |
| "grad_norm": 0.5222160620275426, |
| "learning_rate": 9.056292183885341e-07, |
| "loss": 0.5911962985992432, |
| "step": 248, |
| "token_acc": 0.803399969606123 |
| }, |
| { |
| "epoch": 0.7196531791907514, |
| "grad_norm": 0.5098026312902073, |
| "learning_rate": 9.046956951489852e-07, |
| "loss": 0.5775253772735596, |
| "step": 249, |
| "token_acc": 0.8074704886249294 |
| }, |
| { |
| "epoch": 0.7225433526011561, |
| "grad_norm": 0.524303335092293, |
| "learning_rate": 9.037580634960763e-07, |
| "loss": 0.5572794675827026, |
| "step": 250, |
| "token_acc": 0.8146691719232317 |
| }, |
| { |
| "epoch": 0.7254335260115607, |
| "grad_norm": 0.6033497475819745, |
| "learning_rate": 9.028163329485112e-07, |
| "loss": 0.5832095742225647, |
| "step": 251, |
| "token_acc": 0.8073202656110331 |
| }, |
| { |
| "epoch": 0.7283236994219653, |
| "grad_norm": 0.5556496694710653, |
| "learning_rate": 9.018705130666049e-07, |
| "loss": 0.5459315776824951, |
| "step": 252, |
| "token_acc": 0.8191452178897479 |
| }, |
| { |
| "epoch": 0.7312138728323699, |
| "grad_norm": 0.7747218495040153, |
| "learning_rate": 9.009206134521868e-07, |
| "loss": 0.5795873999595642, |
| "step": 253, |
| "token_acc": 0.8071730383987341 |
| }, |
| { |
| "epoch": 0.7341040462427746, |
| "grad_norm": 0.5652371374587928, |
| "learning_rate": 8.999666437485034e-07, |
| "loss": 0.5758365392684937, |
| "step": 254, |
| "token_acc": 0.811742473608758 |
| }, |
| { |
| "epoch": 0.7369942196531792, |
| "grad_norm": 0.5206182140440342, |
| "learning_rate": 8.990086136401198e-07, |
| "loss": 0.5303860306739807, |
| "step": 255, |
| "token_acc": 0.823020148188528 |
| }, |
| { |
| "epoch": 0.7398843930635838, |
| "grad_norm": 0.6450852115537637, |
| "learning_rate": 8.980465328528218e-07, |
| "loss": 0.5547192096710205, |
| "step": 256, |
| "token_acc": 0.8162106882834197 |
| }, |
| { |
| "epoch": 0.7427745664739884, |
| "grad_norm": 0.5196181500327283, |
| "learning_rate": 8.970804111535175e-07, |
| "loss": 0.5457019209861755, |
| "step": 257, |
| "token_acc": 0.8167301624082492 |
| }, |
| { |
| "epoch": 0.7456647398843931, |
| "grad_norm": 0.6356725122188899, |
| "learning_rate": 8.961102583501375e-07, |
| "loss": 0.5676227807998657, |
| "step": 258, |
| "token_acc": 0.8146457172245137 |
| }, |
| { |
| "epoch": 0.7485549132947977, |
| "grad_norm": 0.5766749980898508, |
| "learning_rate": 8.951360842915355e-07, |
| "loss": 0.5487492084503174, |
| "step": 259, |
| "token_acc": 0.8176302961517421 |
| }, |
| { |
| "epoch": 0.7514450867052023, |
| "grad_norm": 0.561193367543964, |
| "learning_rate": 8.941578988673885e-07, |
| "loss": 0.5508721470832825, |
| "step": 260, |
| "token_acc": 0.8148807459638577 |
| }, |
| { |
| "epoch": 0.7543352601156069, |
| "grad_norm": 1.1616614497713094, |
| "learning_rate": 8.931757120080965e-07, |
| "loss": 0.5649725794792175, |
| "step": 261, |
| "token_acc": 0.8123450235984954 |
| }, |
| { |
| "epoch": 0.7572254335260116, |
| "grad_norm": 0.6269083895254, |
| "learning_rate": 8.921895336846812e-07, |
| "loss": 0.5234044790267944, |
| "step": 262, |
| "token_acc": 0.826336871809926 |
| }, |
| { |
| "epoch": 0.7601156069364162, |
| "grad_norm": 0.5491932745407809, |
| "learning_rate": 8.911993739086852e-07, |
| "loss": 0.5335085391998291, |
| "step": 263, |
| "token_acc": 0.8243787856172078 |
| }, |
| { |
| "epoch": 0.7630057803468208, |
| "grad_norm": 0.6001894076535953, |
| "learning_rate": 8.902052427320703e-07, |
| "loss": 0.6009457111358643, |
| "step": 264, |
| "token_acc": 0.8005332320797702 |
| }, |
| { |
| "epoch": 0.7658959537572254, |
| "grad_norm": 0.6105633418239023, |
| "learning_rate": 8.892071502471154e-07, |
| "loss": 0.512947678565979, |
| "step": 265, |
| "token_acc": 0.8283333333333334 |
| }, |
| { |
| "epoch": 0.7687861271676301, |
| "grad_norm": 0.530310690982596, |
| "learning_rate": 8.882051065863139e-07, |
| "loss": 0.5578915476799011, |
| "step": 266, |
| "token_acc": 0.8134685584406639 |
| }, |
| { |
| "epoch": 0.7716763005780347, |
| "grad_norm": 0.6053842724913201, |
| "learning_rate": 8.871991219222712e-07, |
| "loss": 0.5307576656341553, |
| "step": 267, |
| "token_acc": 0.8237498632235475 |
| }, |
| { |
| "epoch": 0.7745664739884393, |
| "grad_norm": 0.5839374903786066, |
| "learning_rate": 8.861892064676008e-07, |
| "loss": 0.4724132716655731, |
| "step": 268, |
| "token_acc": 0.8406308417366578 |
| }, |
| { |
| "epoch": 0.7774566473988439, |
| "grad_norm": 0.5382380436884167, |
| "learning_rate": 8.851753704748219e-07, |
| "loss": 0.5864905118942261, |
| "step": 269, |
| "token_acc": 0.805320596148614 |
| }, |
| { |
| "epoch": 0.7803468208092486, |
| "grad_norm": 0.536612826265518, |
| "learning_rate": 8.841576242362533e-07, |
| "loss": 0.5369473695755005, |
| "step": 270, |
| "token_acc": 0.8202307927330842 |
| }, |
| { |
| "epoch": 0.7832369942196532, |
| "grad_norm": 0.48433135594375987, |
| "learning_rate": 8.831359780839107e-07, |
| "loss": 0.5745148062705994, |
| "step": 271, |
| "token_acc": 0.8114247865236928 |
| }, |
| { |
| "epoch": 0.7861271676300579, |
| "grad_norm": 0.565668286608129, |
| "learning_rate": 8.821104423894014e-07, |
| "loss": 0.5306930541992188, |
| "step": 272, |
| "token_acc": 0.8240810142731839 |
| }, |
| { |
| "epoch": 0.7890173410404624, |
| "grad_norm": 0.5347471169063638, |
| "learning_rate": 8.810810275638182e-07, |
| "loss": 0.5508551597595215, |
| "step": 273, |
| "token_acc": 0.8150747430289043 |
| }, |
| { |
| "epoch": 0.791907514450867, |
| "grad_norm": 0.5872611855148089, |
| "learning_rate": 8.800477440576346e-07, |
| "loss": 0.5582222938537598, |
| "step": 274, |
| "token_acc": 0.8141057178356111 |
| }, |
| { |
| "epoch": 0.7947976878612717, |
| "grad_norm": 0.5930933510081743, |
| "learning_rate": 8.790106023605985e-07, |
| "loss": 0.5265220403671265, |
| "step": 275, |
| "token_acc": 0.8236343698306786 |
| }, |
| { |
| "epoch": 0.7976878612716763, |
| "grad_norm": 0.5326943859900286, |
| "learning_rate": 8.779696130016252e-07, |
| "loss": 0.589282751083374, |
| "step": 276, |
| "token_acc": 0.8041843462366995 |
| }, |
| { |
| "epoch": 0.8005780346820809, |
| "grad_norm": 0.682574668475925, |
| "learning_rate": 8.769247865486915e-07, |
| "loss": 0.5634682178497314, |
| "step": 277, |
| "token_acc": 0.8131609072741031 |
| }, |
| { |
| "epoch": 0.8034682080924855, |
| "grad_norm": 0.6170926445265313, |
| "learning_rate": 8.758761336087273e-07, |
| "loss": 0.5282115340232849, |
| "step": 278, |
| "token_acc": 0.8240009668063165 |
| }, |
| { |
| "epoch": 0.8063583815028902, |
| "grad_norm": 0.5931538447313858, |
| "learning_rate": 8.748236648275087e-07, |
| "loss": 0.4907287061214447, |
| "step": 279, |
| "token_acc": 0.838809946714032 |
| }, |
| { |
| "epoch": 0.8092485549132948, |
| "grad_norm": 0.567206538957563, |
| "learning_rate": 8.737673908895497e-07, |
| "loss": 0.6097589731216431, |
| "step": 280, |
| "token_acc": 0.7990020422972478 |
| }, |
| { |
| "epoch": 0.8121387283236994, |
| "grad_norm": 0.5887119791348107, |
| "learning_rate": 8.727073225179937e-07, |
| "loss": 0.5625665187835693, |
| "step": 281, |
| "token_acc": 0.8113687537033379 |
| }, |
| { |
| "epoch": 0.815028901734104, |
| "grad_norm": 0.5836331757411469, |
| "learning_rate": 8.716434704745046e-07, |
| "loss": 0.513110339641571, |
| "step": 282, |
| "token_acc": 0.8275925912738822 |
| }, |
| { |
| "epoch": 0.8179190751445087, |
| "grad_norm": 0.6054924912257345, |
| "learning_rate": 8.705758455591576e-07, |
| "loss": 0.602730393409729, |
| "step": 283, |
| "token_acc": 0.8022713898227125 |
| }, |
| { |
| "epoch": 0.8208092485549133, |
| "grad_norm": 0.6236226833744741, |
| "learning_rate": 8.695044586103295e-07, |
| "loss": 0.5747796893119812, |
| "step": 284, |
| "token_acc": 0.8079837217906031 |
| }, |
| { |
| "epoch": 0.8236994219653179, |
| "grad_norm": 0.5865612629064065, |
| "learning_rate": 8.684293205045889e-07, |
| "loss": 0.6070411205291748, |
| "step": 285, |
| "token_acc": 0.7988344760774713 |
| }, |
| { |
| "epoch": 0.8265895953757225, |
| "grad_norm": 0.5503455006576133, |
| "learning_rate": 8.673504421565856e-07, |
| "loss": 0.5685064792633057, |
| "step": 286, |
| "token_acc": 0.8102210757057314 |
| }, |
| { |
| "epoch": 0.8294797687861272, |
| "grad_norm": 0.5972785565939337, |
| "learning_rate": 8.662678345189396e-07, |
| "loss": 0.46608567237854004, |
| "step": 287, |
| "token_acc": 0.8438823801959227 |
| }, |
| { |
| "epoch": 0.8323699421965318, |
| "grad_norm": 0.5201509566608107, |
| "learning_rate": 8.651815085821302e-07, |
| "loss": 0.5298614501953125, |
| "step": 288, |
| "token_acc": 0.8236416811984237 |
| }, |
| { |
| "epoch": 0.8352601156069365, |
| "grad_norm": 0.49819051940062725, |
| "learning_rate": 8.640914753743847e-07, |
| "loss": 0.5882748365402222, |
| "step": 289, |
| "token_acc": 0.8065492356638473 |
| }, |
| { |
| "epoch": 0.838150289017341, |
| "grad_norm": 0.6397626208223341, |
| "learning_rate": 8.629977459615654e-07, |
| "loss": 0.604642927646637, |
| "step": 290, |
| "token_acc": 0.798697597059869 |
| }, |
| { |
| "epoch": 0.8410404624277457, |
| "grad_norm": 0.5735121088769557, |
| "learning_rate": 8.619003314470586e-07, |
| "loss": 0.5657530426979065, |
| "step": 291, |
| "token_acc": 0.8134929241446619 |
| }, |
| { |
| "epoch": 0.8439306358381503, |
| "grad_norm": 0.6029592728755434, |
| "learning_rate": 8.607992429716608e-07, |
| "loss": 0.5807414054870605, |
| "step": 292, |
| "token_acc": 0.8062111084672681 |
| }, |
| { |
| "epoch": 0.846820809248555, |
| "grad_norm": 0.5204268288621456, |
| "learning_rate": 8.596944917134666e-07, |
| "loss": 0.5696761608123779, |
| "step": 293, |
| "token_acc": 0.8102849975611456 |
| }, |
| { |
| "epoch": 0.8497109826589595, |
| "grad_norm": 0.570216087116967, |
| "learning_rate": 8.585860888877536e-07, |
| "loss": 0.6144391298294067, |
| "step": 294, |
| "token_acc": 0.7976966055615415 |
| }, |
| { |
| "epoch": 0.8526011560693642, |
| "grad_norm": 0.525009085518107, |
| "learning_rate": 8.574740457468708e-07, |
| "loss": 0.5926086902618408, |
| "step": 295, |
| "token_acc": 0.8030848268880814 |
| }, |
| { |
| "epoch": 0.8554913294797688, |
| "grad_norm": 0.5397367841143723, |
| "learning_rate": 8.563583735801223e-07, |
| "loss": 0.5647125244140625, |
| "step": 296, |
| "token_acc": 0.8113542939673369 |
| }, |
| { |
| "epoch": 0.8583815028901735, |
| "grad_norm": 0.5453044997059636, |
| "learning_rate": 8.55239083713654e-07, |
| "loss": 0.5306450128555298, |
| "step": 297, |
| "token_acc": 0.8242952898276619 |
| }, |
| { |
| "epoch": 0.861271676300578, |
| "grad_norm": 0.49382426600759494, |
| "learning_rate": 8.541161875103379e-07, |
| "loss": 0.5655560493469238, |
| "step": 298, |
| "token_acc": 0.81170671232068 |
| }, |
| { |
| "epoch": 0.8641618497109826, |
| "grad_norm": 0.5609985492228051, |
| "learning_rate": 8.529896963696576e-07, |
| "loss": 0.5431415438652039, |
| "step": 299, |
| "token_acc": 0.8162933876284661 |
| }, |
| { |
| "epoch": 0.8670520231213873, |
| "grad_norm": 0.5476351474370762, |
| "learning_rate": 8.51859621727591e-07, |
| "loss": 0.5872442126274109, |
| "step": 300, |
| "token_acc": 0.8065929411453266 |
| }, |
| { |
| "epoch": 0.869942196531792, |
| "grad_norm": 0.5282221087597836, |
| "learning_rate": 8.507259750564961e-07, |
| "loss": 0.5451909899711609, |
| "step": 301, |
| "token_acc": 0.8188552557155108 |
| }, |
| { |
| "epoch": 0.8728323699421965, |
| "grad_norm": 0.503389270767867, |
| "learning_rate": 8.495887678649932e-07, |
| "loss": 0.5154858231544495, |
| "step": 302, |
| "token_acc": 0.8274329950559459 |
| }, |
| { |
| "epoch": 0.8757225433526011, |
| "grad_norm": 0.518940089504941, |
| "learning_rate": 8.484480116978486e-07, |
| "loss": 0.5244746208190918, |
| "step": 303, |
| "token_acc": 0.8264815952633637 |
| }, |
| { |
| "epoch": 0.8786127167630058, |
| "grad_norm": 0.573024895950047, |
| "learning_rate": 8.473037181358573e-07, |
| "loss": 0.592721700668335, |
| "step": 304, |
| "token_acc": 0.8035201013934049 |
| }, |
| { |
| "epoch": 0.8815028901734104, |
| "grad_norm": 0.5039735997055694, |
| "learning_rate": 8.461558987957252e-07, |
| "loss": 0.5656961798667908, |
| "step": 305, |
| "token_acc": 0.8130110070213994 |
| }, |
| { |
| "epoch": 0.884393063583815, |
| "grad_norm": 0.5476756827664239, |
| "learning_rate": 8.45004565329952e-07, |
| "loss": 0.5374190807342529, |
| "step": 306, |
| "token_acc": 0.820976424170279 |
| }, |
| { |
| "epoch": 0.8872832369942196, |
| "grad_norm": 0.5275746578408953, |
| "learning_rate": 8.438497294267116e-07, |
| "loss": 0.5982400178909302, |
| "step": 307, |
| "token_acc": 0.7999831918648626 |
| }, |
| { |
| "epoch": 0.8901734104046243, |
| "grad_norm": 0.532750300928086, |
| "learning_rate": 8.426914028097347e-07, |
| "loss": 0.584047794342041, |
| "step": 308, |
| "token_acc": 0.8066207177537092 |
| }, |
| { |
| "epoch": 0.8930635838150289, |
| "grad_norm": 0.5003914631256399, |
| "learning_rate": 8.415295972381889e-07, |
| "loss": 0.6089476346969604, |
| "step": 309, |
| "token_acc": 0.7978914509526754 |
| }, |
| { |
| "epoch": 0.8959537572254336, |
| "grad_norm": 0.6278624794022574, |
| "learning_rate": 8.403643245065597e-07, |
| "loss": 0.5697731375694275, |
| "step": 310, |
| "token_acc": 0.8108995234993658 |
| }, |
| { |
| "epoch": 0.8988439306358381, |
| "grad_norm": 0.6052633593556834, |
| "learning_rate": 8.391955964445309e-07, |
| "loss": 0.5913630723953247, |
| "step": 311, |
| "token_acc": 0.8023921969586315 |
| }, |
| { |
| "epoch": 0.9017341040462428, |
| "grad_norm": 0.5312386556419646, |
| "learning_rate": 8.38023424916864e-07, |
| "loss": 0.5818167924880981, |
| "step": 312, |
| "token_acc": 0.8053130715134147 |
| }, |
| { |
| "epoch": 0.9046242774566474, |
| "grad_norm": 0.5377630147019918, |
| "learning_rate": 8.368478218232787e-07, |
| "loss": 0.5994030237197876, |
| "step": 313, |
| "token_acc": 0.8010770419994847 |
| }, |
| { |
| "epoch": 0.9075144508670521, |
| "grad_norm": 0.6387143665462728, |
| "learning_rate": 8.356687990983305e-07, |
| "loss": 0.5747004747390747, |
| "step": 314, |
| "token_acc": 0.8103654791154791 |
| }, |
| { |
| "epoch": 0.9104046242774566, |
| "grad_norm": 0.5539012149779035, |
| "learning_rate": 8.344863687112913e-07, |
| "loss": 0.5109165906906128, |
| "step": 315, |
| "token_acc": 0.8275082819675849 |
| }, |
| { |
| "epoch": 0.9132947976878613, |
| "grad_norm": 0.5431996662851367, |
| "learning_rate": 8.333005426660271e-07, |
| "loss": 0.4984626770019531, |
| "step": 316, |
| "token_acc": 0.8326753471796506 |
| }, |
| { |
| "epoch": 0.9161849710982659, |
| "grad_norm": 0.5476844147731238, |
| "learning_rate": 8.321113330008756e-07, |
| "loss": 0.5582059025764465, |
| "step": 317, |
| "token_acc": 0.8131992060627932 |
| }, |
| { |
| "epoch": 0.9190751445086706, |
| "grad_norm": 0.5288904758826702, |
| "learning_rate": 8.309187517885249e-07, |
| "loss": 0.5965433120727539, |
| "step": 318, |
| "token_acc": 0.8015113167980331 |
| }, |
| { |
| "epoch": 0.9219653179190751, |
| "grad_norm": 0.5061439317002303, |
| "learning_rate": 8.297228111358906e-07, |
| "loss": 0.50608229637146, |
| "step": 319, |
| "token_acc": 0.8302445369795833 |
| }, |
| { |
| "epoch": 0.9248554913294798, |
| "grad_norm": 0.49043399117893216, |
| "learning_rate": 8.285235231839927e-07, |
| "loss": 0.5492719411849976, |
| "step": 320, |
| "token_acc": 0.8174581468830556 |
| }, |
| { |
| "epoch": 0.9277456647398844, |
| "grad_norm": 0.6174249587001943, |
| "learning_rate": 8.273209001078324e-07, |
| "loss": 0.553361177444458, |
| "step": 321, |
| "token_acc": 0.8119886458507264 |
| }, |
| { |
| "epoch": 0.930635838150289, |
| "grad_norm": 0.5616150428871276, |
| "learning_rate": 8.261149541162691e-07, |
| "loss": 0.6025636196136475, |
| "step": 322, |
| "token_acc": 0.8005087935801005 |
| }, |
| { |
| "epoch": 0.9335260115606936, |
| "grad_norm": 0.6478516612944865, |
| "learning_rate": 8.249056974518954e-07, |
| "loss": 0.5491775274276733, |
| "step": 323, |
| "token_acc": 0.8185532095041541 |
| }, |
| { |
| "epoch": 0.9364161849710982, |
| "grad_norm": 0.5031858383227522, |
| "learning_rate": 8.236931423909138e-07, |
| "loss": 0.6022853255271912, |
| "step": 324, |
| "token_acc": 0.8037384243419552 |
| }, |
| { |
| "epoch": 0.9393063583815029, |
| "grad_norm": 0.5752991697267287, |
| "learning_rate": 8.224773012430114e-07, |
| "loss": 0.5954960584640503, |
| "step": 325, |
| "token_acc": 0.8036680189317106 |
| }, |
| { |
| "epoch": 0.9421965317919075, |
| "grad_norm": 0.5295029516066992, |
| "learning_rate": 8.212581863512353e-07, |
| "loss": 0.5488483309745789, |
| "step": 326, |
| "token_acc": 0.8157750324575375 |
| }, |
| { |
| "epoch": 0.9450867052023122, |
| "grad_norm": 0.5368502799479243, |
| "learning_rate": 8.20035810091867e-07, |
| "loss": 0.5652696490287781, |
| "step": 327, |
| "token_acc": 0.8106361614705574 |
| }, |
| { |
| "epoch": 0.9479768786127167, |
| "grad_norm": 0.5847097314866032, |
| "learning_rate": 8.188101848742974e-07, |
| "loss": 0.544079065322876, |
| "step": 328, |
| "token_acc": 0.819971546427805 |
| }, |
| { |
| "epoch": 0.9508670520231214, |
| "grad_norm": 0.5255181020508993, |
| "learning_rate": 8.175813231408999e-07, |
| "loss": 0.4978986382484436, |
| "step": 329, |
| "token_acc": 0.8333199723062348 |
| }, |
| { |
| "epoch": 0.953757225433526, |
| "grad_norm": 0.5127048703010287, |
| "learning_rate": 8.163492373669047e-07, |
| "loss": 0.5805110931396484, |
| "step": 330, |
| "token_acc": 0.8056335113743647 |
| }, |
| { |
| "epoch": 0.9566473988439307, |
| "grad_norm": 0.652335019028349, |
| "learning_rate": 8.15113940060272e-07, |
| "loss": 0.5597442388534546, |
| "step": 331, |
| "token_acc": 0.8161630076551519 |
| }, |
| { |
| "epoch": 0.9595375722543352, |
| "grad_norm": 0.5947335075670345, |
| "learning_rate": 8.13875443761565e-07, |
| "loss": 0.5277099609375, |
| "step": 332, |
| "token_acc": 0.8274886297575488 |
| }, |
| { |
| "epoch": 0.9624277456647399, |
| "grad_norm": 0.5459606580402216, |
| "learning_rate": 8.126337610438229e-07, |
| "loss": 0.5635240077972412, |
| "step": 333, |
| "token_acc": 0.8108978939573075 |
| }, |
| { |
| "epoch": 0.9653179190751445, |
| "grad_norm": 0.5488564858287155, |
| "learning_rate": 8.113889045124323e-07, |
| "loss": 0.49523666501045227, |
| "step": 334, |
| "token_acc": 0.8329320341089853 |
| }, |
| { |
| "epoch": 0.9682080924855492, |
| "grad_norm": 0.5694023522198697, |
| "learning_rate": 8.101408868050008e-07, |
| "loss": 0.5316784381866455, |
| "step": 335, |
| "token_acc": 0.8213875427499967 |
| }, |
| { |
| "epoch": 0.9710982658959537, |
| "grad_norm": 0.5290670622343212, |
| "learning_rate": 8.088897205912271e-07, |
| "loss": 0.5768337249755859, |
| "step": 336, |
| "token_acc": 0.808409267610014 |
| }, |
| { |
| "epoch": 0.9739884393063584, |
| "grad_norm": 0.5630882737173935, |
| "learning_rate": 8.076354185727734e-07, |
| "loss": 0.5607028007507324, |
| "step": 337, |
| "token_acc": 0.8111738071422572 |
| }, |
| { |
| "epoch": 0.976878612716763, |
| "grad_norm": 0.5389758264031266, |
| "learning_rate": 8.06377993483136e-07, |
| "loss": 0.5800102949142456, |
| "step": 338, |
| "token_acc": 0.8064102564102564 |
| }, |
| { |
| "epoch": 0.9797687861271677, |
| "grad_norm": 0.6483925804091112, |
| "learning_rate": 8.051174580875163e-07, |
| "loss": 0.5936282873153687, |
| "step": 339, |
| "token_acc": 0.8033736003463585 |
| }, |
| { |
| "epoch": 0.9826589595375722, |
| "grad_norm": 0.5683588968241811, |
| "learning_rate": 8.038538251826912e-07, |
| "loss": 0.5602604150772095, |
| "step": 340, |
| "token_acc": 0.8103426182505487 |
| }, |
| { |
| "epoch": 0.9855491329479769, |
| "grad_norm": 0.4984007019353715, |
| "learning_rate": 8.025871075968826e-07, |
| "loss": 0.559136152267456, |
| "step": 341, |
| "token_acc": 0.8140824580290378 |
| }, |
| { |
| "epoch": 0.9884393063583815, |
| "grad_norm": 1.1899348194485317, |
| "learning_rate": 8.013173181896282e-07, |
| "loss": 0.5955883860588074, |
| "step": 342, |
| "token_acc": 0.8027926447988978 |
| }, |
| { |
| "epoch": 0.9913294797687862, |
| "grad_norm": 0.5388156404908695, |
| "learning_rate": 8.0004446985165e-07, |
| "loss": 0.5661012530326843, |
| "step": 343, |
| "token_acc": 0.8099668055056346 |
| }, |
| { |
| "epoch": 0.9942196531791907, |
| "grad_norm": 0.5412535831553995, |
| "learning_rate": 7.987685755047242e-07, |
| "loss": 0.6086287498474121, |
| "step": 344, |
| "token_acc": 0.7963722407145177 |
| }, |
| { |
| "epoch": 0.9971098265895953, |
| "grad_norm": 0.696761929081249, |
| "learning_rate": 7.974896481015494e-07, |
| "loss": 0.5823131799697876, |
| "step": 345, |
| "token_acc": 0.8073882514689755 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4953947640304795, |
| "learning_rate": 7.962077006256153e-07, |
| "loss": 0.5682995319366455, |
| "step": 346, |
| "token_acc": 0.8121095151492658 |
| }, |
| { |
| "epoch": 1.0028901734104045, |
| "grad_norm": 0.7111654355632505, |
| "learning_rate": 7.94922746091071e-07, |
| "loss": 0.6060156226158142, |
| "step": 347, |
| "token_acc": 0.8014354938608955 |
| }, |
| { |
| "epoch": 1.0057803468208093, |
| "grad_norm": 0.5507935056779134, |
| "learning_rate": 7.93634797542593e-07, |
| "loss": 0.5295247435569763, |
| "step": 348, |
| "token_acc": 0.8211228506318624 |
| }, |
| { |
| "epoch": 1.0086705202312138, |
| "grad_norm": 0.6189562361784823, |
| "learning_rate": 7.923438680552525e-07, |
| "loss": 0.5647916197776794, |
| "step": 349, |
| "token_acc": 0.8137873547100433 |
| }, |
| { |
| "epoch": 1.0115606936416186, |
| "grad_norm": 0.6801159002216328, |
| "learning_rate": 7.910499707343828e-07, |
| "loss": 0.590101420879364, |
| "step": 350, |
| "token_acc": 0.803803399890662 |
| }, |
| { |
| "epoch": 1.0144508670520231, |
| "grad_norm": 0.6049076830653918, |
| "learning_rate": 7.897531187154458e-07, |
| "loss": 0.5088500380516052, |
| "step": 351, |
| "token_acc": 0.8279876049759735 |
| }, |
| { |
| "epoch": 1.0173410404624277, |
| "grad_norm": 0.5654302790773965, |
| "learning_rate": 7.884533251638999e-07, |
| "loss": 0.5929542779922485, |
| "step": 352, |
| "token_acc": 0.8047063731856507 |
| }, |
| { |
| "epoch": 1.0202312138728324, |
| "grad_norm": 0.5880451344105353, |
| "learning_rate": 7.87150603275065e-07, |
| "loss": 0.5749261379241943, |
| "step": 353, |
| "token_acc": 0.8056116433808085 |
| }, |
| { |
| "epoch": 1.023121387283237, |
| "grad_norm": 0.5426830225682386, |
| "learning_rate": 7.85844966273989e-07, |
| "loss": 0.5945314168930054, |
| "step": 354, |
| "token_acc": 0.800486217737808 |
| }, |
| { |
| "epoch": 1.0260115606936415, |
| "grad_norm": 0.49678361176775165, |
| "learning_rate": 7.845364274153139e-07, |
| "loss": 0.4898013472557068, |
| "step": 355, |
| "token_acc": 0.8352619622320034 |
| }, |
| { |
| "epoch": 1.0289017341040463, |
| "grad_norm": 0.6954304853085829, |
| "learning_rate": 7.832249999831406e-07, |
| "loss": 0.5588274598121643, |
| "step": 356, |
| "token_acc": 0.8166684201080533 |
| }, |
| { |
| "epoch": 1.0317919075144508, |
| "grad_norm": 0.5310648615446059, |
| "learning_rate": 7.819106972908949e-07, |
| "loss": 0.5819897651672363, |
| "step": 357, |
| "token_acc": 0.8045070775826193 |
| }, |
| { |
| "epoch": 1.0346820809248556, |
| "grad_norm": 0.5923922817451516, |
| "learning_rate": 7.805935326811912e-07, |
| "loss": 0.5737313032150269, |
| "step": 358, |
| "token_acc": 0.8051378103467133 |
| }, |
| { |
| "epoch": 1.0375722543352601, |
| "grad_norm": 0.5178307979556245, |
| "learning_rate": 7.79273519525698e-07, |
| "loss": 0.5936248302459717, |
| "step": 359, |
| "token_acc": 0.8025767773866199 |
| }, |
| { |
| "epoch": 1.0404624277456647, |
| "grad_norm": 0.5286013733045867, |
| "learning_rate": 7.779506712250022e-07, |
| "loss": 0.5494135618209839, |
| "step": 360, |
| "token_acc": 0.8171926851655723 |
| }, |
| { |
| "epoch": 1.0433526011560694, |
| "grad_norm": 0.49585832807282065, |
| "learning_rate": 7.766250012084722e-07, |
| "loss": 0.5698336958885193, |
| "step": 361, |
| "token_acc": 0.8116101814090845 |
| }, |
| { |
| "epoch": 1.046242774566474, |
| "grad_norm": 0.6962712390013456, |
| "learning_rate": 7.752965229341219e-07, |
| "loss": 0.535956621170044, |
| "step": 362, |
| "token_acc": 0.822281059722762 |
| }, |
| { |
| "epoch": 1.0491329479768785, |
| "grad_norm": 0.5694059644679526, |
| "learning_rate": 7.739652498884747e-07, |
| "loss": 0.5675574541091919, |
| "step": 363, |
| "token_acc": 0.8093009931245225 |
| }, |
| { |
| "epoch": 1.0520231213872833, |
| "grad_norm": 0.5547323442483891, |
| "learning_rate": 7.726311955864261e-07, |
| "loss": 0.5611029863357544, |
| "step": 364, |
| "token_acc": 0.8125364888148433 |
| }, |
| { |
| "epoch": 1.0549132947976878, |
| "grad_norm": 0.5476729662614271, |
| "learning_rate": 7.712943735711062e-07, |
| "loss": 0.5374180674552917, |
| "step": 365, |
| "token_acc": 0.8212820320132261 |
| }, |
| { |
| "epoch": 1.0578034682080926, |
| "grad_norm": 0.5180731484879565, |
| "learning_rate": 7.699547974137426e-07, |
| "loss": 0.5433316230773926, |
| "step": 366, |
| "token_acc": 0.8200906177478174 |
| }, |
| { |
| "epoch": 1.060693641618497, |
| "grad_norm": 0.5798685069888638, |
| "learning_rate": 7.686124807135228e-07, |
| "loss": 0.5966153740882874, |
| "step": 367, |
| "token_acc": 0.8028633971139337 |
| }, |
| { |
| "epoch": 1.0635838150289016, |
| "grad_norm": 0.5594356403434023, |
| "learning_rate": 7.672674370974558e-07, |
| "loss": 0.5133764743804932, |
| "step": 368, |
| "token_acc": 0.8287475052817048 |
| }, |
| { |
| "epoch": 1.0664739884393064, |
| "grad_norm": 0.5414940672989453, |
| "learning_rate": 7.659196802202338e-07, |
| "loss": 0.5794786214828491, |
| "step": 369, |
| "token_acc": 0.8080960204454181 |
| }, |
| { |
| "epoch": 1.069364161849711, |
| "grad_norm": 0.5596146246622683, |
| "learning_rate": 7.645692237640937e-07, |
| "loss": 0.6179242134094238, |
| "step": 370, |
| "token_acc": 0.7978232829012561 |
| }, |
| { |
| "epoch": 1.0722543352601157, |
| "grad_norm": 0.5658616759599563, |
| "learning_rate": 7.632160814386779e-07, |
| "loss": 0.5489234924316406, |
| "step": 371, |
| "token_acc": 0.818960201793722 |
| }, |
| { |
| "epoch": 1.0751445086705202, |
| "grad_norm": 0.5583854062469837, |
| "learning_rate": 7.618602669808957e-07, |
| "loss": 0.5576378703117371, |
| "step": 372, |
| "token_acc": 0.8134194149383499 |
| }, |
| { |
| "epoch": 1.0780346820809248, |
| "grad_norm": 0.5709606663652054, |
| "learning_rate": 7.605017941547835e-07, |
| "loss": 0.5531469583511353, |
| "step": 373, |
| "token_acc": 0.8139197537682152 |
| }, |
| { |
| "epoch": 1.0809248554913296, |
| "grad_norm": 0.5401961587153568, |
| "learning_rate": 7.591406767513648e-07, |
| "loss": 0.5335639715194702, |
| "step": 374, |
| "token_acc": 0.8189074796640434 |
| }, |
| { |
| "epoch": 1.083815028901734, |
| "grad_norm": 0.5776452597256104, |
| "learning_rate": 7.577769285885108e-07, |
| "loss": 0.5792023539543152, |
| "step": 375, |
| "token_acc": 0.8059631052038535 |
| }, |
| { |
| "epoch": 1.0867052023121386, |
| "grad_norm": 0.6631103343737483, |
| "learning_rate": 7.564105635107996e-07, |
| "loss": 0.5358845591545105, |
| "step": 376, |
| "token_acc": 0.8186349045446866 |
| }, |
| { |
| "epoch": 1.0895953757225434, |
| "grad_norm": 0.49688934026931153, |
| "learning_rate": 7.550415953893756e-07, |
| "loss": 0.5017120242118835, |
| "step": 377, |
| "token_acc": 0.8296466328279073 |
| }, |
| { |
| "epoch": 1.092485549132948, |
| "grad_norm": 0.5499825048622536, |
| "learning_rate": 7.536700381218097e-07, |
| "loss": 0.5757490396499634, |
| "step": 378, |
| "token_acc": 0.8071212248675023 |
| }, |
| { |
| "epoch": 1.0953757225433527, |
| "grad_norm": 0.5724354451620394, |
| "learning_rate": 7.522959056319564e-07, |
| "loss": 0.5289810299873352, |
| "step": 379, |
| "token_acc": 0.8224057244166174 |
| }, |
| { |
| "epoch": 1.0982658959537572, |
| "grad_norm": 0.5295598164095123, |
| "learning_rate": 7.509192118698145e-07, |
| "loss": 0.5217394828796387, |
| "step": 380, |
| "token_acc": 0.8247749871572029 |
| }, |
| { |
| "epoch": 1.1011560693641618, |
| "grad_norm": 0.6732543146745934, |
| "learning_rate": 7.49539970811384e-07, |
| "loss": 0.5446665287017822, |
| "step": 381, |
| "token_acc": 0.8187780645617508 |
| }, |
| { |
| "epoch": 1.1040462427745665, |
| "grad_norm": 0.593141398734888, |
| "learning_rate": 7.481581964585244e-07, |
| "loss": 0.6174026131629944, |
| "step": 382, |
| "token_acc": 0.7958839535507607 |
| }, |
| { |
| "epoch": 1.106936416184971, |
| "grad_norm": 0.5915717748635032, |
| "learning_rate": 7.467739028388133e-07, |
| "loss": 0.5956196784973145, |
| "step": 383, |
| "token_acc": 0.8005577327975455 |
| }, |
| { |
| "epoch": 1.1098265895953756, |
| "grad_norm": 0.5486121690897104, |
| "learning_rate": 7.453871040054037e-07, |
| "loss": 0.602386474609375, |
| "step": 384, |
| "token_acc": 0.7985531236588805 |
| }, |
| { |
| "epoch": 1.1127167630057804, |
| "grad_norm": 0.6468015023115512, |
| "learning_rate": 7.439978140368803e-07, |
| "loss": 0.5264239311218262, |
| "step": 385, |
| "token_acc": 0.8247053516043534 |
| }, |
| { |
| "epoch": 1.115606936416185, |
| "grad_norm": 0.5396942599943407, |
| "learning_rate": 7.426060470371185e-07, |
| "loss": 0.5322436094284058, |
| "step": 386, |
| "token_acc": 0.8225644386194845 |
| }, |
| { |
| "epoch": 1.1184971098265897, |
| "grad_norm": 0.546318443194639, |
| "learning_rate": 7.412118171351395e-07, |
| "loss": 0.5636791586875916, |
| "step": 387, |
| "token_acc": 0.8132001591389744 |
| }, |
| { |
| "epoch": 1.1213872832369942, |
| "grad_norm": 0.5681580355518231, |
| "learning_rate": 7.398151384849679e-07, |
| "loss": 0.5519202351570129, |
| "step": 388, |
| "token_acc": 0.8136924046076314 |
| }, |
| { |
| "epoch": 1.1242774566473988, |
| "grad_norm": 0.5949989948835427, |
| "learning_rate": 7.384160252654873e-07, |
| "loss": 0.5511115789413452, |
| "step": 389, |
| "token_acc": 0.8144513354081949 |
| }, |
| { |
| "epoch": 1.1271676300578035, |
| "grad_norm": 0.4837423293992909, |
| "learning_rate": 7.370144916802969e-07, |
| "loss": 0.5643985867500305, |
| "step": 390, |
| "token_acc": 0.8112824957599688 |
| }, |
| { |
| "epoch": 1.130057803468208, |
| "grad_norm": 0.5611205998910804, |
| "learning_rate": 7.356105519575671e-07, |
| "loss": 0.5409538745880127, |
| "step": 391, |
| "token_acc": 0.8188429729320618 |
| }, |
| { |
| "epoch": 1.1329479768786128, |
| "grad_norm": 0.5181274015479428, |
| "learning_rate": 7.342042203498951e-07, |
| "loss": 0.5411881804466248, |
| "step": 392, |
| "token_acc": 0.8171947300974061 |
| }, |
| { |
| "epoch": 1.1358381502890174, |
| "grad_norm": 0.5497633972492808, |
| "learning_rate": 7.327955111341601e-07, |
| "loss": 0.5626124143600464, |
| "step": 393, |
| "token_acc": 0.8131716531422224 |
| }, |
| { |
| "epoch": 1.138728323699422, |
| "grad_norm": 0.569806645978514, |
| "learning_rate": 7.313844386113783e-07, |
| "loss": 0.533359169960022, |
| "step": 394, |
| "token_acc": 0.8227007051547947 |
| }, |
| { |
| "epoch": 1.1416184971098267, |
| "grad_norm": 0.5809695758427657, |
| "learning_rate": 7.299710171065584e-07, |
| "loss": 0.5428122282028198, |
| "step": 395, |
| "token_acc": 0.8167381946213591 |
| }, |
| { |
| "epoch": 1.1445086705202312, |
| "grad_norm": 0.5685994639717983, |
| "learning_rate": 7.28555260968555e-07, |
| "loss": 0.5661939382553101, |
| "step": 396, |
| "token_acc": 0.8107361575857062 |
| }, |
| { |
| "epoch": 1.147398843930636, |
| "grad_norm": 0.5687294924284086, |
| "learning_rate": 7.271371845699241e-07, |
| "loss": 0.4796743392944336, |
| "step": 397, |
| "token_acc": 0.8378044059980814 |
| }, |
| { |
| "epoch": 1.1502890173410405, |
| "grad_norm": 0.5570998116553988, |
| "learning_rate": 7.257168023067759e-07, |
| "loss": 0.5698948502540588, |
| "step": 398, |
| "token_acc": 0.8108394509164174 |
| }, |
| { |
| "epoch": 1.153179190751445, |
| "grad_norm": 0.5764653559793665, |
| "learning_rate": 7.242941285986303e-07, |
| "loss": 0.5216134190559387, |
| "step": 399, |
| "token_acc": 0.8264347873981053 |
| }, |
| { |
| "epoch": 1.1560693641618498, |
| "grad_norm": 0.5519714242613649, |
| "learning_rate": 7.228691778882692e-07, |
| "loss": 0.5965580940246582, |
| "step": 400, |
| "token_acc": 0.8008848328263255 |
| }, |
| { |
| "epoch": 1.1589595375722543, |
| "grad_norm": 0.5713833806622776, |
| "learning_rate": 7.2144196464159e-07, |
| "loss": 0.530504584312439, |
| "step": 401, |
| "token_acc": 0.8193537207392506 |
| }, |
| { |
| "epoch": 1.1618497109826589, |
| "grad_norm": 0.5112285942897958, |
| "learning_rate": 7.200125033474598e-07, |
| "loss": 0.5425513982772827, |
| "step": 402, |
| "token_acc": 0.8176038122905598 |
| }, |
| { |
| "epoch": 1.1647398843930636, |
| "grad_norm": 0.5891524284010872, |
| "learning_rate": 7.185808085175668e-07, |
| "loss": 0.5737115740776062, |
| "step": 403, |
| "token_acc": 0.811070949924867 |
| }, |
| { |
| "epoch": 1.1676300578034682, |
| "grad_norm": 0.8927491774092401, |
| "learning_rate": 7.171468946862743e-07, |
| "loss": 0.5100395083427429, |
| "step": 404, |
| "token_acc": 0.8297666772416578 |
| }, |
| { |
| "epoch": 1.1705202312138727, |
| "grad_norm": 0.6290027028336996, |
| "learning_rate": 7.157107764104723e-07, |
| "loss": 0.5254942178726196, |
| "step": 405, |
| "token_acc": 0.8239488461275081 |
| }, |
| { |
| "epoch": 1.1734104046242775, |
| "grad_norm": 0.5413566372730959, |
| "learning_rate": 7.142724682694299e-07, |
| "loss": 0.5764940977096558, |
| "step": 406, |
| "token_acc": 0.8086516073191842 |
| }, |
| { |
| "epoch": 1.176300578034682, |
| "grad_norm": 0.5581695811593094, |
| "learning_rate": 7.128319848646477e-07, |
| "loss": 0.5500423312187195, |
| "step": 407, |
| "token_acc": 0.8153743413040916 |
| }, |
| { |
| "epoch": 1.1791907514450868, |
| "grad_norm": 0.4681952163328979, |
| "learning_rate": 7.113893408197091e-07, |
| "loss": 0.5582858324050903, |
| "step": 408, |
| "token_acc": 0.8114563586911728 |
| }, |
| { |
| "epoch": 1.1820809248554913, |
| "grad_norm": 0.6826359609914151, |
| "learning_rate": 7.099445507801323e-07, |
| "loss": 0.49809369444847107, |
| "step": 409, |
| "token_acc": 0.8353448588307781 |
| }, |
| { |
| "epoch": 1.1849710982658959, |
| "grad_norm": 0.5090205197384219, |
| "learning_rate": 7.084976294132207e-07, |
| "loss": 0.6029922962188721, |
| "step": 410, |
| "token_acc": 0.7973656093105548 |
| }, |
| { |
| "epoch": 1.1878612716763006, |
| "grad_norm": 0.5269042882225241, |
| "learning_rate": 7.070485914079151e-07, |
| "loss": 0.5927149057388306, |
| "step": 411, |
| "token_acc": 0.8014037282759605 |
| }, |
| { |
| "epoch": 1.1907514450867052, |
| "grad_norm": 0.49950817881103576, |
| "learning_rate": 7.055974514746445e-07, |
| "loss": 0.5837708711624146, |
| "step": 412, |
| "token_acc": 0.8074309042384765 |
| }, |
| { |
| "epoch": 1.19364161849711, |
| "grad_norm": 0.5860116475494397, |
| "learning_rate": 7.041442243451752e-07, |
| "loss": 0.5210489630699158, |
| "step": 413, |
| "token_acc": 0.8244094424028096 |
| }, |
| { |
| "epoch": 1.1965317919075145, |
| "grad_norm": 0.5718657608384051, |
| "learning_rate": 7.026889247724635e-07, |
| "loss": 0.5820956230163574, |
| "step": 414, |
| "token_acc": 0.8042295599535557 |
| }, |
| { |
| "epoch": 1.199421965317919, |
| "grad_norm": 0.5054409513703455, |
| "learning_rate": 7.012315675305045e-07, |
| "loss": 0.5862281918525696, |
| "step": 415, |
| "token_acc": 0.8023793187527289 |
| }, |
| { |
| "epoch": 1.2023121387283238, |
| "grad_norm": 0.5766487774658408, |
| "learning_rate": 6.997721674141822e-07, |
| "loss": 0.520296037197113, |
| "step": 416, |
| "token_acc": 0.8252748600155311 |
| }, |
| { |
| "epoch": 1.2052023121387283, |
| "grad_norm": 0.537979220335716, |
| "learning_rate": 6.983107392391202e-07, |
| "loss": 0.5797343850135803, |
| "step": 417, |
| "token_acc": 0.80571660344046 |
| }, |
| { |
| "epoch": 1.208092485549133, |
| "grad_norm": 0.5396946740305607, |
| "learning_rate": 6.9684729784153e-07, |
| "loss": 0.6153110265731812, |
| "step": 418, |
| "token_acc": 0.7969049998485812 |
| }, |
| { |
| "epoch": 1.2109826589595376, |
| "grad_norm": 0.5642823581815699, |
| "learning_rate": 6.953818580780613e-07, |
| "loss": 0.5325438976287842, |
| "step": 419, |
| "token_acc": 0.8222246858832225 |
| }, |
| { |
| "epoch": 1.2138728323699421, |
| "grad_norm": 0.5535087521581403, |
| "learning_rate": 6.939144348256511e-07, |
| "loss": 0.5709867477416992, |
| "step": 420, |
| "token_acc": 0.8069591256176074 |
| }, |
| { |
| "epoch": 1.216763005780347, |
| "grad_norm": 0.572340555748076, |
| "learning_rate": 6.924450429813723e-07, |
| "loss": 0.5548975467681885, |
| "step": 421, |
| "token_acc": 0.8185377583894686 |
| }, |
| { |
| "epoch": 1.2196531791907514, |
| "grad_norm": 0.5155912490897337, |
| "learning_rate": 6.909736974622826e-07, |
| "loss": 0.5856627225875854, |
| "step": 422, |
| "token_acc": 0.8058833037013092 |
| }, |
| { |
| "epoch": 1.222543352601156, |
| "grad_norm": 0.5287358182605065, |
| "learning_rate": 6.895004132052735e-07, |
| "loss": 0.530200719833374, |
| "step": 423, |
| "token_acc": 0.822671307855992 |
| }, |
| { |
| "epoch": 1.2254335260115607, |
| "grad_norm": 0.5377464968526829, |
| "learning_rate": 6.88025205166918e-07, |
| "loss": 0.6028895974159241, |
| "step": 424, |
| "token_acc": 0.8013212984612038 |
| }, |
| { |
| "epoch": 1.2283236994219653, |
| "grad_norm": 0.5204405657753005, |
| "learning_rate": 6.865480883233189e-07, |
| "loss": 0.5590497851371765, |
| "step": 425, |
| "token_acc": 0.8117163218535146 |
| }, |
| { |
| "epoch": 1.2312138728323698, |
| "grad_norm": 0.45493496853760634, |
| "learning_rate": 6.850690776699573e-07, |
| "loss": 0.5726251602172852, |
| "step": 426, |
| "token_acc": 0.8084424978300127 |
| }, |
| { |
| "epoch": 1.2341040462427746, |
| "grad_norm": 0.6240376452291253, |
| "learning_rate": 6.835881882215395e-07, |
| "loss": 0.5343113541603088, |
| "step": 427, |
| "token_acc": 0.8196929353326794 |
| }, |
| { |
| "epoch": 1.2369942196531791, |
| "grad_norm": 0.5773298029457239, |
| "learning_rate": 6.821054350118458e-07, |
| "loss": 0.5317709445953369, |
| "step": 428, |
| "token_acc": 0.8196335435275461 |
| }, |
| { |
| "epoch": 1.239884393063584, |
| "grad_norm": 0.5477278016005382, |
| "learning_rate": 6.806208330935766e-07, |
| "loss": 0.5721542835235596, |
| "step": 429, |
| "token_acc": 0.8069397675429067 |
| }, |
| { |
| "epoch": 1.2427745664739884, |
| "grad_norm": 0.5954432022727356, |
| "learning_rate": 6.791343975381999e-07, |
| "loss": 0.59670090675354, |
| "step": 430, |
| "token_acc": 0.8028038691690053 |
| }, |
| { |
| "epoch": 1.245664739884393, |
| "grad_norm": 0.6299231511446614, |
| "learning_rate": 6.776461434357993e-07, |
| "loss": 0.5712985396385193, |
| "step": 431, |
| "token_acc": 0.8093430920755399 |
| }, |
| { |
| "epoch": 1.2485549132947977, |
| "grad_norm": 0.5405979300580379, |
| "learning_rate": 6.761560858949192e-07, |
| "loss": 0.5809611082077026, |
| "step": 432, |
| "token_acc": 0.8070006162733515 |
| }, |
| { |
| "epoch": 1.2514450867052023, |
| "grad_norm": 0.5516822339033575, |
| "learning_rate": 6.746642400424131e-07, |
| "loss": 0.5620344281196594, |
| "step": 433, |
| "token_acc": 0.8121798185065721 |
| }, |
| { |
| "epoch": 1.254335260115607, |
| "grad_norm": 0.5284837836987685, |
| "learning_rate": 6.731706210232882e-07, |
| "loss": 0.5855224132537842, |
| "step": 434, |
| "token_acc": 0.8044497743554139 |
| }, |
| { |
| "epoch": 1.2572254335260116, |
| "grad_norm": 0.5627730241670859, |
| "learning_rate": 6.716752440005537e-07, |
| "loss": 0.5670550465583801, |
| "step": 435, |
| "token_acc": 0.8096381386958137 |
| }, |
| { |
| "epoch": 1.260115606936416, |
| "grad_norm": 0.538509679886266, |
| "learning_rate": 6.701781241550648e-07, |
| "loss": 0.5526491403579712, |
| "step": 436, |
| "token_acc": 0.8155125315340866 |
| }, |
| { |
| "epoch": 1.2630057803468209, |
| "grad_norm": 0.4771561540026018, |
| "learning_rate": 6.686792766853705e-07, |
| "loss": 0.5505247712135315, |
| "step": 437, |
| "token_acc": 0.8138159537283621 |
| }, |
| { |
| "epoch": 1.2658959537572254, |
| "grad_norm": 0.5223829257694631, |
| "learning_rate": 6.671787168075575e-07, |
| "loss": 0.5447695255279541, |
| "step": 438, |
| "token_acc": 0.8178192464935741 |
| }, |
| { |
| "epoch": 1.2687861271676302, |
| "grad_norm": 0.5159364504277794, |
| "learning_rate": 6.656764597550975e-07, |
| "loss": 0.5982085466384888, |
| "step": 439, |
| "token_acc": 0.8001320834327017 |
| }, |
| { |
| "epoch": 1.2716763005780347, |
| "grad_norm": 0.5310637224775283, |
| "learning_rate": 6.641725207786909e-07, |
| "loss": 0.5778173208236694, |
| "step": 440, |
| "token_acc": 0.8066611125837846 |
| }, |
| { |
| "epoch": 1.2745664739884393, |
| "grad_norm": 0.56776340532874, |
| "learning_rate": 6.626669151461133e-07, |
| "loss": 0.5481947660446167, |
| "step": 441, |
| "token_acc": 0.8165455226676658 |
| }, |
| { |
| "epoch": 1.2774566473988438, |
| "grad_norm": 0.5289033874903101, |
| "learning_rate": 6.611596581420599e-07, |
| "loss": 0.5178524255752563, |
| "step": 442, |
| "token_acc": 0.8276837132314907 |
| }, |
| { |
| "epoch": 1.2803468208092486, |
| "grad_norm": 0.6054263277819003, |
| "learning_rate": 6.596507650679899e-07, |
| "loss": 0.5819660425186157, |
| "step": 443, |
| "token_acc": 0.8038088791803834 |
| }, |
| { |
| "epoch": 1.2832369942196533, |
| "grad_norm": 0.5487293303925478, |
| "learning_rate": 6.581402512419723e-07, |
| "loss": 0.5847280621528625, |
| "step": 444, |
| "token_acc": 0.80743134495099 |
| }, |
| { |
| "epoch": 1.2861271676300579, |
| "grad_norm": 0.5388475336099026, |
| "learning_rate": 6.566281319985295e-07, |
| "loss": 0.5863124132156372, |
| "step": 445, |
| "token_acc": 0.8067254504627854 |
| }, |
| { |
| "epoch": 1.2890173410404624, |
| "grad_norm": 0.5538452871257553, |
| "learning_rate": 6.551144226884815e-07, |
| "loss": 0.5669398307800293, |
| "step": 446, |
| "token_acc": 0.8087953975429001 |
| }, |
| { |
| "epoch": 1.291907514450867, |
| "grad_norm": 0.557772227891473, |
| "learning_rate": 6.53599138678791e-07, |
| "loss": 0.5209745764732361, |
| "step": 447, |
| "token_acc": 0.8239799595072235 |
| }, |
| { |
| "epoch": 1.2947976878612717, |
| "grad_norm": 0.6127169435529054, |
| "learning_rate": 6.520822953524065e-07, |
| "loss": 0.5106294751167297, |
| "step": 448, |
| "token_acc": 0.8277936680145971 |
| }, |
| { |
| "epoch": 1.2976878612716762, |
| "grad_norm": 0.5375147488907324, |
| "learning_rate": 6.505639081081066e-07, |
| "loss": 0.5071303844451904, |
| "step": 449, |
| "token_acc": 0.8268003446613994 |
| }, |
| { |
| "epoch": 1.300578034682081, |
| "grad_norm": 0.5553311529997369, |
| "learning_rate": 6.490439923603435e-07, |
| "loss": 0.5532734394073486, |
| "step": 450, |
| "token_acc": 0.8134406172882417 |
| }, |
| { |
| "epoch": 1.3034682080924855, |
| "grad_norm": 0.5998759397432016, |
| "learning_rate": 6.475225635390863e-07, |
| "loss": 0.5865392088890076, |
| "step": 451, |
| "token_acc": 0.8023424626486245 |
| }, |
| { |
| "epoch": 1.30635838150289, |
| "grad_norm": 0.5417420736704273, |
| "learning_rate": 6.459996370896652e-07, |
| "loss": 0.546296238899231, |
| "step": 452, |
| "token_acc": 0.8187062949013282 |
| }, |
| { |
| "epoch": 1.3092485549132948, |
| "grad_norm": 0.5655148261341275, |
| "learning_rate": 6.444752284726135e-07, |
| "loss": 0.5877007246017456, |
| "step": 453, |
| "token_acc": 0.8039364919354839 |
| }, |
| { |
| "epoch": 1.3121387283236994, |
| "grad_norm": 0.6144864679165839, |
| "learning_rate": 6.429493531635114e-07, |
| "loss": 0.5454727411270142, |
| "step": 454, |
| "token_acc": 0.8179015382597002 |
| }, |
| { |
| "epoch": 1.3150289017341041, |
| "grad_norm": 0.5513024274913209, |
| "learning_rate": 6.414220266528291e-07, |
| "loss": 0.553301215171814, |
| "step": 455, |
| "token_acc": 0.8119396930565884 |
| }, |
| { |
| "epoch": 1.3179190751445087, |
| "grad_norm": 0.5291432658218749, |
| "learning_rate": 6.398932644457689e-07, |
| "loss": 0.5474492311477661, |
| "step": 456, |
| "token_acc": 0.8148487159928808 |
| }, |
| { |
| "epoch": 1.3208092485549132, |
| "grad_norm": 0.5239384490420579, |
| "learning_rate": 6.383630820621081e-07, |
| "loss": 0.5769109725952148, |
| "step": 457, |
| "token_acc": 0.8075285980313913 |
| }, |
| { |
| "epoch": 1.323699421965318, |
| "grad_norm": 0.5372997474035569, |
| "learning_rate": 6.368314950360415e-07, |
| "loss": 0.5458542108535767, |
| "step": 458, |
| "token_acc": 0.818262614678899 |
| }, |
| { |
| "epoch": 1.3265895953757225, |
| "grad_norm": 0.5222784886904625, |
| "learning_rate": 6.352985189160234e-07, |
| "loss": 0.543486475944519, |
| "step": 459, |
| "token_acc": 0.8140883445049911 |
| }, |
| { |
| "epoch": 1.3294797687861273, |
| "grad_norm": 0.5656149822293426, |
| "learning_rate": 6.337641692646106e-07, |
| "loss": 0.5165099501609802, |
| "step": 460, |
| "token_acc": 0.8232782145649256 |
| }, |
| { |
| "epoch": 1.3323699421965318, |
| "grad_norm": 0.5339208409670375, |
| "learning_rate": 6.322284616583026e-07, |
| "loss": 0.568447470664978, |
| "step": 461, |
| "token_acc": 0.8107062348801407 |
| }, |
| { |
| "epoch": 1.3352601156069364, |
| "grad_norm": 0.534789315369846, |
| "learning_rate": 6.306914116873862e-07, |
| "loss": 0.5637167692184448, |
| "step": 462, |
| "token_acc": 0.8118799414154401 |
| }, |
| { |
| "epoch": 1.3381502890173411, |
| "grad_norm": 0.5013992587561265, |
| "learning_rate": 6.291530349557749e-07, |
| "loss": 0.6041359305381775, |
| "step": 463, |
| "token_acc": 0.8002847429734529 |
| }, |
| { |
| "epoch": 1.3410404624277457, |
| "grad_norm": 0.6327002649058038, |
| "learning_rate": 6.27613347080851e-07, |
| "loss": 0.5996913909912109, |
| "step": 464, |
| "token_acc": 0.8028000921266601 |
| }, |
| { |
| "epoch": 1.3439306358381504, |
| "grad_norm": 0.47925020942862323, |
| "learning_rate": 6.260723636933076e-07, |
| "loss": 0.5272285342216492, |
| "step": 465, |
| "token_acc": 0.8219443104776792 |
| }, |
| { |
| "epoch": 1.346820809248555, |
| "grad_norm": 0.5418997127974843, |
| "learning_rate": 6.2453010043699e-07, |
| "loss": 0.5982799530029297, |
| "step": 466, |
| "token_acc": 0.8018455748733745 |
| }, |
| { |
| "epoch": 1.3497109826589595, |
| "grad_norm": 0.511563505395346, |
| "learning_rate": 6.22986572968736e-07, |
| "loss": 0.5489825010299683, |
| "step": 467, |
| "token_acc": 0.8149126753184632 |
| }, |
| { |
| "epoch": 1.352601156069364, |
| "grad_norm": 0.6199984691110088, |
| "learning_rate": 6.214417969582181e-07, |
| "loss": 0.5509693622589111, |
| "step": 468, |
| "token_acc": 0.8135395589697864 |
| }, |
| { |
| "epoch": 1.3554913294797688, |
| "grad_norm": 0.9112236282410355, |
| "learning_rate": 6.198957880877833e-07, |
| "loss": 0.5764250755310059, |
| "step": 469, |
| "token_acc": 0.8059208967249633 |
| }, |
| { |
| "epoch": 1.3583815028901733, |
| "grad_norm": 0.5989342589849401, |
| "learning_rate": 6.183485620522946e-07, |
| "loss": 0.5593207478523254, |
| "step": 470, |
| "token_acc": 0.8130887081520711 |
| }, |
| { |
| "epoch": 1.361271676300578, |
| "grad_norm": 0.539630418011966, |
| "learning_rate": 6.168001345589715e-07, |
| "loss": 0.5798720121383667, |
| "step": 471, |
| "token_acc": 0.8067868478007105 |
| }, |
| { |
| "epoch": 1.3641618497109826, |
| "grad_norm": 0.5728505086100849, |
| "learning_rate": 6.152505213272307e-07, |
| "loss": 0.5105577707290649, |
| "step": 472, |
| "token_acc": 0.8268291947926711 |
| }, |
| { |
| "epoch": 1.3670520231213872, |
| "grad_norm": 0.5731864783632108, |
| "learning_rate": 6.136997380885259e-07, |
| "loss": 0.505968451499939, |
| "step": 473, |
| "token_acc": 0.8303852677489701 |
| }, |
| { |
| "epoch": 1.369942196531792, |
| "grad_norm": 0.5786843206230191, |
| "learning_rate": 6.12147800586189e-07, |
| "loss": 0.570541501045227, |
| "step": 474, |
| "token_acc": 0.8074693848475233 |
| }, |
| { |
| "epoch": 1.3728323699421965, |
| "grad_norm": 0.5300325283027945, |
| "learning_rate": 6.105947245752696e-07, |
| "loss": 0.5622447729110718, |
| "step": 475, |
| "token_acc": 0.8132824737156444 |
| }, |
| { |
| "epoch": 1.3757225433526012, |
| "grad_norm": 0.5128915878177316, |
| "learning_rate": 6.090405258223756e-07, |
| "loss": 0.5856798887252808, |
| "step": 476, |
| "token_acc": 0.8047204813663714 |
| }, |
| { |
| "epoch": 1.3786127167630058, |
| "grad_norm": 0.6515506714427548, |
| "learning_rate": 6.074852201055121e-07, |
| "loss": 0.5826733112335205, |
| "step": 477, |
| "token_acc": 0.8034700052323068 |
| }, |
| { |
| "epoch": 1.3815028901734103, |
| "grad_norm": 0.5411318320511171, |
| "learning_rate": 6.059288232139225e-07, |
| "loss": 0.5210794806480408, |
| "step": 478, |
| "token_acc": 0.8267671925390047 |
| }, |
| { |
| "epoch": 1.384393063583815, |
| "grad_norm": 0.5057150421228545, |
| "learning_rate": 6.043713509479277e-07, |
| "loss": 0.5771398544311523, |
| "step": 479, |
| "token_acc": 0.805000332629771 |
| }, |
| { |
| "epoch": 1.3872832369942196, |
| "grad_norm": 0.5683244072025584, |
| "learning_rate": 6.028128191187653e-07, |
| "loss": 0.5385507941246033, |
| "step": 480, |
| "token_acc": 0.8176837578528416 |
| }, |
| { |
| "epoch": 1.3901734104046244, |
| "grad_norm": 0.5074682411792649, |
| "learning_rate": 6.012532435484297e-07, |
| "loss": 0.5577852725982666, |
| "step": 481, |
| "token_acc": 0.8137183546223177 |
| }, |
| { |
| "epoch": 1.393063583815029, |
| "grad_norm": 0.5087445776495183, |
| "learning_rate": 5.996926400695113e-07, |
| "loss": 0.5707537531852722, |
| "step": 482, |
| "token_acc": 0.8081652461733929 |
| }, |
| { |
| "epoch": 1.3959537572254335, |
| "grad_norm": 0.6070774288583548, |
| "learning_rate": 5.981310245250351e-07, |
| "loss": 0.5291765928268433, |
| "step": 483, |
| "token_acc": 0.8198564644248993 |
| }, |
| { |
| "epoch": 1.3988439306358382, |
| "grad_norm": 0.6426245006677934, |
| "learning_rate": 5.965684127683012e-07, |
| "loss": 0.5093721151351929, |
| "step": 484, |
| "token_acc": 0.8283427901813247 |
| }, |
| { |
| "epoch": 1.4017341040462428, |
| "grad_norm": 0.5235719939982498, |
| "learning_rate": 5.950048206627228e-07, |
| "loss": 0.5404484272003174, |
| "step": 485, |
| "token_acc": 0.8198352412538783 |
| }, |
| { |
| "epoch": 1.4046242774566475, |
| "grad_norm": 0.5330826415435456, |
| "learning_rate": 5.934402640816651e-07, |
| "loss": 0.6019877195358276, |
| "step": 486, |
| "token_acc": 0.7991196347629723 |
| }, |
| { |
| "epoch": 1.407514450867052, |
| "grad_norm": 0.5402624006228682, |
| "learning_rate": 5.918747589082852e-07, |
| "loss": 0.512151300907135, |
| "step": 487, |
| "token_acc": 0.8255600510667488 |
| }, |
| { |
| "epoch": 1.4104046242774566, |
| "grad_norm": 0.6034074325578554, |
| "learning_rate": 5.903083210353695e-07, |
| "loss": 0.5242146253585815, |
| "step": 488, |
| "token_acc": 0.821293480679374 |
| }, |
| { |
| "epoch": 1.4132947976878611, |
| "grad_norm": 0.6270290133131012, |
| "learning_rate": 5.887409663651736e-07, |
| "loss": 0.5783629417419434, |
| "step": 489, |
| "token_acc": 0.8050973979809469 |
| }, |
| { |
| "epoch": 1.416184971098266, |
| "grad_norm": 0.5697754520754279, |
| "learning_rate": 5.8717271080926e-07, |
| "loss": 0.5560973882675171, |
| "step": 490, |
| "token_acc": 0.8151623266302166 |
| }, |
| { |
| "epoch": 1.4190751445086704, |
| "grad_norm": 0.5157305125572653, |
| "learning_rate": 5.856035702883368e-07, |
| "loss": 0.5741870999336243, |
| "step": 491, |
| "token_acc": 0.8082165363392618 |
| }, |
| { |
| "epoch": 1.4219653179190752, |
| "grad_norm": 0.552078767595136, |
| "learning_rate": 5.840335607320963e-07, |
| "loss": 0.5855275392532349, |
| "step": 492, |
| "token_acc": 0.8052095872614805 |
| }, |
| { |
| "epoch": 1.4248554913294798, |
| "grad_norm": 0.503224099727086, |
| "learning_rate": 5.824626980790532e-07, |
| "loss": 0.5036199688911438, |
| "step": 493, |
| "token_acc": 0.8295647769617597 |
| }, |
| { |
| "epoch": 1.4277456647398843, |
| "grad_norm": 0.535330314229148, |
| "learning_rate": 5.808909982763825e-07, |
| "loss": 0.5614448189735413, |
| "step": 494, |
| "token_acc": 0.8112195584194068 |
| }, |
| { |
| "epoch": 1.430635838150289, |
| "grad_norm": 0.5656493275743161, |
| "learning_rate": 5.793184772797577e-07, |
| "loss": 0.5648437142372131, |
| "step": 495, |
| "token_acc": 0.809333342296497 |
| }, |
| { |
| "epoch": 1.4335260115606936, |
| "grad_norm": 0.591964902056671, |
| "learning_rate": 5.777451510531894e-07, |
| "loss": 0.4516139626502991, |
| "step": 496, |
| "token_acc": 0.8457953488372093 |
| }, |
| { |
| "epoch": 1.4364161849710984, |
| "grad_norm": 0.5299075126510611, |
| "learning_rate": 5.761710355688627e-07, |
| "loss": 0.4779651165008545, |
| "step": 497, |
| "token_acc": 0.8387296285988187 |
| }, |
| { |
| "epoch": 1.439306358381503, |
| "grad_norm": 0.5231792243250346, |
| "learning_rate": 5.745961468069749e-07, |
| "loss": 0.5104596614837646, |
| "step": 498, |
| "token_acc": 0.8271942849713633 |
| }, |
| { |
| "epoch": 1.4421965317919074, |
| "grad_norm": 0.6000529888737813, |
| "learning_rate": 5.730205007555733e-07, |
| "loss": 0.6098222136497498, |
| "step": 499, |
| "token_acc": 0.797237394529817 |
| }, |
| { |
| "epoch": 1.4450867052023122, |
| "grad_norm": 0.5852668345047015, |
| "learning_rate": 5.714441134103936e-07, |
| "loss": 0.5637513995170593, |
| "step": 500, |
| "token_acc": 0.8103524746275665 |
| }, |
| { |
| "epoch": 1.4450867052023122, |
| "eval_loss": 0.5809597969055176, |
| "eval_runtime": 69.4729, |
| "eval_samples_per_second": 1.583, |
| "eval_steps_per_second": 0.202, |
| "eval_token_acc": 0.8065338513984092, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4479768786127167, |
| "grad_norm": 0.4857410434739766, |
| "learning_rate": 5.698670007746966e-07, |
| "loss": 0.5209301710128784, |
| "step": 501, |
| "token_acc": 0.8231232032245636 |
| }, |
| { |
| "epoch": 1.4508670520231215, |
| "grad_norm": 0.4784985253670375, |
| "learning_rate": 5.682891788591065e-07, |
| "loss": 0.5571726560592651, |
| "step": 502, |
| "token_acc": 0.8108515538539766 |
| }, |
| { |
| "epoch": 1.453757225433526, |
| "grad_norm": 0.5367888787021339, |
| "learning_rate": 5.66710663681448e-07, |
| "loss": 0.49731090664863586, |
| "step": 503, |
| "token_acc": 0.8312890657633916 |
| }, |
| { |
| "epoch": 1.4566473988439306, |
| "grad_norm": 0.610496198886357, |
| "learning_rate": 5.651314712665832e-07, |
| "loss": 0.5665647387504578, |
| "step": 504, |
| "token_acc": 0.8079466209795078 |
| }, |
| { |
| "epoch": 1.4595375722543353, |
| "grad_norm": 1.0378355029945652, |
| "learning_rate": 5.635516176462501e-07, |
| "loss": 0.5903141498565674, |
| "step": 505, |
| "token_acc": 0.8006371133060007 |
| }, |
| { |
| "epoch": 1.4624277456647399, |
| "grad_norm": 0.5540207350664488, |
| "learning_rate": 5.619711188588986e-07, |
| "loss": 0.5362493991851807, |
| "step": 506, |
| "token_acc": 0.820343725019984 |
| }, |
| { |
| "epoch": 1.4653179190751446, |
| "grad_norm": 0.5144874632858891, |
| "learning_rate": 5.603899909495283e-07, |
| "loss": 0.5462620258331299, |
| "step": 507, |
| "token_acc": 0.8145612480715733 |
| }, |
| { |
| "epoch": 1.4682080924855492, |
| "grad_norm": 0.5592031195717259, |
| "learning_rate": 5.58808249969526e-07, |
| "loss": 0.5476292371749878, |
| "step": 508, |
| "token_acc": 0.8147672146736102 |
| }, |
| { |
| "epoch": 1.4710982658959537, |
| "grad_norm": 0.8823564949169135, |
| "learning_rate": 5.57225911976502e-07, |
| "loss": 0.5868964195251465, |
| "step": 509, |
| "token_acc": 0.804788821591468 |
| }, |
| { |
| "epoch": 1.4739884393063583, |
| "grad_norm": 0.5547092232918307, |
| "learning_rate": 5.556429930341273e-07, |
| "loss": 0.5038424134254456, |
| "step": 510, |
| "token_acc": 0.8334411359013724 |
| }, |
| { |
| "epoch": 1.476878612716763, |
| "grad_norm": 0.5799136969979296, |
| "learning_rate": 5.540595092119708e-07, |
| "loss": 0.5707584619522095, |
| "step": 511, |
| "token_acc": 0.810527226273487 |
| }, |
| { |
| "epoch": 1.4797687861271676, |
| "grad_norm": 0.5034147261225864, |
| "learning_rate": 5.52475476585336e-07, |
| "loss": 0.5583351850509644, |
| "step": 512, |
| "token_acc": 0.8103952305319614 |
| }, |
| { |
| "epoch": 1.4826589595375723, |
| "grad_norm": 0.5160609299204681, |
| "learning_rate": 5.508909112350976e-07, |
| "loss": 0.5299844145774841, |
| "step": 513, |
| "token_acc": 0.8211946274807083 |
| }, |
| { |
| "epoch": 1.4855491329479769, |
| "grad_norm": 0.48690505381618093, |
| "learning_rate": 5.493058292475387e-07, |
| "loss": 0.5815989375114441, |
| "step": 514, |
| "token_acc": 0.8052997388378583 |
| }, |
| { |
| "epoch": 1.4884393063583814, |
| "grad_norm": 0.5497798749732475, |
| "learning_rate": 5.477202467141864e-07, |
| "loss": 0.5317429900169373, |
| "step": 515, |
| "token_acc": 0.8238470637503765 |
| }, |
| { |
| "epoch": 1.4913294797687862, |
| "grad_norm": 0.6297718557593524, |
| "learning_rate": 5.46134179731651e-07, |
| "loss": 0.5170228481292725, |
| "step": 516, |
| "token_acc": 0.8249339191625676 |
| }, |
| { |
| "epoch": 1.4942196531791907, |
| "grad_norm": 0.5879194826209626, |
| "learning_rate": 5.445476444014591e-07, |
| "loss": 0.5530685186386108, |
| "step": 517, |
| "token_acc": 0.8124287116369134 |
| }, |
| { |
| "epoch": 1.4971098265895955, |
| "grad_norm": 0.7172467911918745, |
| "learning_rate": 5.429606568298925e-07, |
| "loss": 0.5767130851745605, |
| "step": 518, |
| "token_acc": 0.8076758697324558 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.47225756227931015, |
| "learning_rate": 5.413732331278248e-07, |
| "loss": 0.5357682704925537, |
| "step": 519, |
| "token_acc": 0.818332255376673 |
| }, |
| { |
| "epoch": 1.5028901734104045, |
| "grad_norm": 0.5315785549808126, |
| "learning_rate": 5.397853894105559e-07, |
| "loss": 0.5103631019592285, |
| "step": 520, |
| "token_acc": 0.8272772712126261 |
| }, |
| { |
| "epoch": 1.5057803468208093, |
| "grad_norm": 0.5411201442197484, |
| "learning_rate": 5.381971417976505e-07, |
| "loss": 0.6071707606315613, |
| "step": 521, |
| "token_acc": 0.7980659432441779 |
| }, |
| { |
| "epoch": 1.5086705202312138, |
| "grad_norm": 0.48884895821181845, |
| "learning_rate": 5.366085064127734e-07, |
| "loss": 0.5692754983901978, |
| "step": 522, |
| "token_acc": 0.8067921134275868 |
| }, |
| { |
| "epoch": 1.5115606936416186, |
| "grad_norm": 0.5118700142105465, |
| "learning_rate": 5.350194993835257e-07, |
| "loss": 0.5697520971298218, |
| "step": 523, |
| "token_acc": 0.8085179483452373 |
| }, |
| { |
| "epoch": 1.5144508670520231, |
| "grad_norm": 0.5936738113687722, |
| "learning_rate": 5.33430136841282e-07, |
| "loss": 0.5466612577438354, |
| "step": 524, |
| "token_acc": 0.8143604233276328 |
| }, |
| { |
| "epoch": 1.5173410404624277, |
| "grad_norm": 0.48377394646569144, |
| "learning_rate": 5.318404349210255e-07, |
| "loss": 0.5685998201370239, |
| "step": 525, |
| "token_acc": 0.808461779914424 |
| }, |
| { |
| "epoch": 1.5202312138728322, |
| "grad_norm": 0.5190453952524928, |
| "learning_rate": 5.302504097611846e-07, |
| "loss": 0.5479923486709595, |
| "step": 526, |
| "token_acc": 0.8150076205934166 |
| }, |
| { |
| "epoch": 1.523121387283237, |
| "grad_norm": 0.6234920552697755, |
| "learning_rate": 5.286600775034699e-07, |
| "loss": 0.5165071487426758, |
| "step": 527, |
| "token_acc": 0.8261736549800983 |
| }, |
| { |
| "epoch": 1.5260115606936417, |
| "grad_norm": 0.5881560338514248, |
| "learning_rate": 5.270694542927088e-07, |
| "loss": 0.5723020434379578, |
| "step": 528, |
| "token_acc": 0.808951938948829 |
| }, |
| { |
| "epoch": 1.5289017341040463, |
| "grad_norm": 0.5975961668165296, |
| "learning_rate": 5.254785562766829e-07, |
| "loss": 0.5684691667556763, |
| "step": 529, |
| "token_acc": 0.8089175396185871 |
| }, |
| { |
| "epoch": 1.5317919075144508, |
| "grad_norm": 0.6478162796925766, |
| "learning_rate": 5.238873996059637e-07, |
| "loss": 0.49971041083335876, |
| "step": 530, |
| "token_acc": 0.8301960912691917 |
| }, |
| { |
| "epoch": 1.5346820809248554, |
| "grad_norm": 0.6430164741639133, |
| "learning_rate": 5.222960004337476e-07, |
| "loss": 0.539410412311554, |
| "step": 531, |
| "token_acc": 0.8203352152694456 |
| }, |
| { |
| "epoch": 1.5375722543352601, |
| "grad_norm": 0.5795319284660402, |
| "learning_rate": 5.207043749156944e-07, |
| "loss": 0.5065566897392273, |
| "step": 532, |
| "token_acc": 0.8278279073124954 |
| }, |
| { |
| "epoch": 1.5404624277456649, |
| "grad_norm": 0.5464317098167678, |
| "learning_rate": 5.191125392097604e-07, |
| "loss": 0.5445448160171509, |
| "step": 533, |
| "token_acc": 0.8166163521084138 |
| }, |
| { |
| "epoch": 1.5433526011560694, |
| "grad_norm": 0.5152523695934649, |
| "learning_rate": 5.175205094760361e-07, |
| "loss": 0.5751731991767883, |
| "step": 534, |
| "token_acc": 0.8060461344386376 |
| }, |
| { |
| "epoch": 1.546242774566474, |
| "grad_norm": 0.5393208162828292, |
| "learning_rate": 5.159283018765819e-07, |
| "loss": 0.5777266621589661, |
| "step": 535, |
| "token_acc": 0.8069097414119084 |
| }, |
| { |
| "epoch": 1.5491329479768785, |
| "grad_norm": 0.5264135658228388, |
| "learning_rate": 5.143359325752638e-07, |
| "loss": 0.555731475353241, |
| "step": 536, |
| "token_acc": 0.8131375804713217 |
| }, |
| { |
| "epoch": 1.5520231213872833, |
| "grad_norm": 0.553539191702997, |
| "learning_rate": 5.127434177375893e-07, |
| "loss": 0.5539097189903259, |
| "step": 537, |
| "token_acc": 0.812809830006887 |
| }, |
| { |
| "epoch": 1.5549132947976878, |
| "grad_norm": 0.8304938276922723, |
| "learning_rate": 5.111507735305434e-07, |
| "loss": 0.535222589969635, |
| "step": 538, |
| "token_acc": 0.8182165566153093 |
| }, |
| { |
| "epoch": 1.5578034682080926, |
| "grad_norm": 0.5723817981155602, |
| "learning_rate": 5.095580161224244e-07, |
| "loss": 0.5616499185562134, |
| "step": 539, |
| "token_acc": 0.8143216251104015 |
| }, |
| { |
| "epoch": 1.560693641618497, |
| "grad_norm": 0.5555286868999088, |
| "learning_rate": 5.079651616826801e-07, |
| "loss": 0.5724209547042847, |
| "step": 540, |
| "token_acc": 0.8077700594252842 |
| }, |
| { |
| "epoch": 1.5635838150289016, |
| "grad_norm": 0.576829331739999, |
| "learning_rate": 5.063722263817427e-07, |
| "loss": 0.5502010583877563, |
| "step": 541, |
| "token_acc": 0.8148729355841307 |
| }, |
| { |
| "epoch": 1.5664739884393064, |
| "grad_norm": 0.6980607962330599, |
| "learning_rate": 5.047792263908659e-07, |
| "loss": 0.5372669696807861, |
| "step": 542, |
| "token_acc": 0.8214026830309711 |
| }, |
| { |
| "epoch": 1.569364161849711, |
| "grad_norm": 0.5728162578490732, |
| "learning_rate": 5.031861778819601e-07, |
| "loss": 0.5055459141731262, |
| "step": 543, |
| "token_acc": 0.8276528811478554 |
| }, |
| { |
| "epoch": 1.5722543352601157, |
| "grad_norm": 0.588844313912188, |
| "learning_rate": 5.015930970274277e-07, |
| "loss": 0.5107961893081665, |
| "step": 544, |
| "token_acc": 0.8256070951933737 |
| }, |
| { |
| "epoch": 1.5751445086705202, |
| "grad_norm": 0.49402725355257393, |
| "learning_rate": 5e-07, |
| "loss": 0.5780792236328125, |
| "step": 545, |
| "token_acc": 0.80536919727071 |
| }, |
| { |
| "epoch": 1.5780346820809248, |
| "grad_norm": 0.5150482904703839, |
| "learning_rate": 4.984069029725722e-07, |
| "loss": 0.5730597972869873, |
| "step": 546, |
| "token_acc": 0.8071878326447399 |
| }, |
| { |
| "epoch": 1.5809248554913293, |
| "grad_norm": 0.6120632562818131, |
| "learning_rate": 4.968138221180401e-07, |
| "loss": 0.48976290225982666, |
| "step": 547, |
| "token_acc": 0.8329062019477191 |
| }, |
| { |
| "epoch": 1.583815028901734, |
| "grad_norm": 0.5693765551777754, |
| "learning_rate": 4.95220773609134e-07, |
| "loss": 0.5690828561782837, |
| "step": 548, |
| "token_acc": 0.8103057397715957 |
| }, |
| { |
| "epoch": 1.5867052023121389, |
| "grad_norm": 0.5356011166477922, |
| "learning_rate": 4.936277736182573e-07, |
| "loss": 0.5775788426399231, |
| "step": 549, |
| "token_acc": 0.8077360101658677 |
| }, |
| { |
| "epoch": 1.5895953757225434, |
| "grad_norm": 0.5425409660783537, |
| "learning_rate": 4.9203483831732e-07, |
| "loss": 0.5838006138801575, |
| "step": 550, |
| "token_acc": 0.8051269382791122 |
| }, |
| { |
| "epoch": 1.592485549132948, |
| "grad_norm": 0.531381332935958, |
| "learning_rate": 4.904419838775755e-07, |
| "loss": 0.528168797492981, |
| "step": 551, |
| "token_acc": 0.8208675592063154 |
| }, |
| { |
| "epoch": 1.5953757225433525, |
| "grad_norm": 0.5122923018471659, |
| "learning_rate": 4.888492264694565e-07, |
| "loss": 0.5490496158599854, |
| "step": 552, |
| "token_acc": 0.8156343068498415 |
| }, |
| { |
| "epoch": 1.5982658959537572, |
| "grad_norm": 0.5537803937619057, |
| "learning_rate": 4.872565822624106e-07, |
| "loss": 0.5283633470535278, |
| "step": 553, |
| "token_acc": 0.8213697374264063 |
| }, |
| { |
| "epoch": 1.601156069364162, |
| "grad_norm": 0.533996696099063, |
| "learning_rate": 4.856640674247363e-07, |
| "loss": 0.5403317213058472, |
| "step": 554, |
| "token_acc": 0.8186500168747891 |
| }, |
| { |
| "epoch": 1.6040462427745665, |
| "grad_norm": 0.6968861408661483, |
| "learning_rate": 4.840716981234181e-07, |
| "loss": 0.5232794880867004, |
| "step": 555, |
| "token_acc": 0.8258206662354464 |
| }, |
| { |
| "epoch": 1.606936416184971, |
| "grad_norm": 0.5457170981213912, |
| "learning_rate": 4.82479490523964e-07, |
| "loss": 0.5531569123268127, |
| "step": 556, |
| "token_acc": 0.8132714653155657 |
| }, |
| { |
| "epoch": 1.6098265895953756, |
| "grad_norm": 0.5611664995745906, |
| "learning_rate": 4.808874607902397e-07, |
| "loss": 0.580593466758728, |
| "step": 557, |
| "token_acc": 0.8061331347873197 |
| }, |
| { |
| "epoch": 1.6127167630057804, |
| "grad_norm": 0.49146546445526984, |
| "learning_rate": 4.792956250843055e-07, |
| "loss": 0.5263780355453491, |
| "step": 558, |
| "token_acc": 0.8212147967727204 |
| }, |
| { |
| "epoch": 1.6156069364161851, |
| "grad_norm": 0.5650532769234693, |
| "learning_rate": 4.777039995662522e-07, |
| "loss": 0.535209596157074, |
| "step": 559, |
| "token_acc": 0.8197443965795302 |
| }, |
| { |
| "epoch": 1.6184971098265897, |
| "grad_norm": 0.6028109251795714, |
| "learning_rate": 4.7611260039403655e-07, |
| "loss": 0.5842093825340271, |
| "step": 560, |
| "token_acc": 0.8079101659544867 |
| }, |
| { |
| "epoch": 1.6213872832369942, |
| "grad_norm": 0.59069578828569, |
| "learning_rate": 4.7452144372331715e-07, |
| "loss": 0.49987393617630005, |
| "step": 561, |
| "token_acc": 0.8312081956170992 |
| }, |
| { |
| "epoch": 1.6242774566473988, |
| "grad_norm": 0.5752034924536564, |
| "learning_rate": 4.7293054570729126e-07, |
| "loss": 0.5631648302078247, |
| "step": 562, |
| "token_acc": 0.8105449311754528 |
| }, |
| { |
| "epoch": 1.6271676300578035, |
| "grad_norm": 0.48011026987442956, |
| "learning_rate": 4.7133992249653026e-07, |
| "loss": 0.6020775437355042, |
| "step": 563, |
| "token_acc": 0.80207682093969 |
| }, |
| { |
| "epoch": 1.630057803468208, |
| "grad_norm": 0.6157896994330491, |
| "learning_rate": 4.697495902388154e-07, |
| "loss": 0.5418002009391785, |
| "step": 564, |
| "token_acc": 0.8178849600782141 |
| }, |
| { |
| "epoch": 1.6329479768786128, |
| "grad_norm": 0.5711847053504078, |
| "learning_rate": 4.681595650789746e-07, |
| "loss": 0.5428210496902466, |
| "step": 565, |
| "token_acc": 0.815186965701749 |
| }, |
| { |
| "epoch": 1.6358381502890174, |
| "grad_norm": 0.5202306815183112, |
| "learning_rate": 4.6656986315871815e-07, |
| "loss": 0.5333169102668762, |
| "step": 566, |
| "token_acc": 0.8192019018509085 |
| }, |
| { |
| "epoch": 1.638728323699422, |
| "grad_norm": 0.5862764371195341, |
| "learning_rate": 4.649805006164743e-07, |
| "loss": 0.5256876349449158, |
| "step": 567, |
| "token_acc": 0.8224795998947091 |
| }, |
| { |
| "epoch": 1.6416184971098264, |
| "grad_norm": 0.5972850501922398, |
| "learning_rate": 4.6339149358722675e-07, |
| "loss": 0.4838550388813019, |
| "step": 568, |
| "token_acc": 0.8348972296693477 |
| }, |
| { |
| "epoch": 1.6445086705202312, |
| "grad_norm": 0.5597928387418396, |
| "learning_rate": 4.618028582023495e-07, |
| "loss": 0.5284090042114258, |
| "step": 569, |
| "token_acc": 0.8216369128482156 |
| }, |
| { |
| "epoch": 1.647398843930636, |
| "grad_norm": 0.6008687154199086, |
| "learning_rate": 4.6021461058944415e-07, |
| "loss": 0.5147076845169067, |
| "step": 570, |
| "token_acc": 0.8275472384008092 |
| }, |
| { |
| "epoch": 1.6502890173410405, |
| "grad_norm": 0.6575913400532123, |
| "learning_rate": 4.5862676687217526e-07, |
| "loss": 0.5117477178573608, |
| "step": 571, |
| "token_acc": 0.8287706152259228 |
| }, |
| { |
| "epoch": 1.653179190751445, |
| "grad_norm": 0.5137586329958652, |
| "learning_rate": 4.5703934317010727e-07, |
| "loss": 0.5332241058349609, |
| "step": 572, |
| "token_acc": 0.8202151610509888 |
| }, |
| { |
| "epoch": 1.6560693641618496, |
| "grad_norm": 0.565500132263929, |
| "learning_rate": 4.5545235559854105e-07, |
| "loss": 0.5527046918869019, |
| "step": 573, |
| "token_acc": 0.8138320979141528 |
| }, |
| { |
| "epoch": 1.6589595375722543, |
| "grad_norm": 0.5302962565332909, |
| "learning_rate": 4.5386582026834904e-07, |
| "loss": 0.5092106461524963, |
| "step": 574, |
| "token_acc": 0.8281128993919504 |
| }, |
| { |
| "epoch": 1.661849710982659, |
| "grad_norm": 0.5821742123016643, |
| "learning_rate": 4.5227975328581335e-07, |
| "loss": 0.5064735412597656, |
| "step": 575, |
| "token_acc": 0.827575659879804 |
| }, |
| { |
| "epoch": 1.6647398843930636, |
| "grad_norm": 0.5963479290796274, |
| "learning_rate": 4.5069417075246146e-07, |
| "loss": 0.4928985834121704, |
| "step": 576, |
| "token_acc": 0.8335413266775463 |
| }, |
| { |
| "epoch": 1.6676300578034682, |
| "grad_norm": 0.6048528428075496, |
| "learning_rate": 4.491090887649024e-07, |
| "loss": 0.49480709433555603, |
| "step": 577, |
| "token_acc": 0.8347347057118005 |
| }, |
| { |
| "epoch": 1.6705202312138727, |
| "grad_norm": 0.6285946360216301, |
| "learning_rate": 4.475245234146639e-07, |
| "loss": 0.49079689383506775, |
| "step": 578, |
| "token_acc": 0.83443186255369 |
| }, |
| { |
| "epoch": 1.6734104046242775, |
| "grad_norm": 0.5603272652152215, |
| "learning_rate": 4.459404907880292e-07, |
| "loss": 0.5334948897361755, |
| "step": 579, |
| "token_acc": 0.8186869024041065 |
| }, |
| { |
| "epoch": 1.6763005780346822, |
| "grad_norm": 0.5366750310588114, |
| "learning_rate": 4.443570069658727e-07, |
| "loss": 0.5434994101524353, |
| "step": 580, |
| "token_acc": 0.816468327847366 |
| }, |
| { |
| "epoch": 1.6791907514450868, |
| "grad_norm": 0.5467060355475981, |
| "learning_rate": 4.42774088023498e-07, |
| "loss": 0.5757695436477661, |
| "step": 581, |
| "token_acc": 0.8080333034841515 |
| }, |
| { |
| "epoch": 1.6820809248554913, |
| "grad_norm": 0.6184966009398549, |
| "learning_rate": 4.4119175003047407e-07, |
| "loss": 0.5647035241127014, |
| "step": 582, |
| "token_acc": 0.8111076384093734 |
| }, |
| { |
| "epoch": 1.6849710982658959, |
| "grad_norm": 0.5185867079907565, |
| "learning_rate": 4.396100090504717e-07, |
| "loss": 0.5796575546264648, |
| "step": 583, |
| "token_acc": 0.8038202807075824 |
| }, |
| { |
| "epoch": 1.6878612716763006, |
| "grad_norm": 0.813643580955912, |
| "learning_rate": 4.380288811411015e-07, |
| "loss": 0.4743460416793823, |
| "step": 584, |
| "token_acc": 0.8386408207372227 |
| }, |
| { |
| "epoch": 1.6907514450867052, |
| "grad_norm": 0.5897820309260559, |
| "learning_rate": 4.364483823537498e-07, |
| "loss": 0.5133877992630005, |
| "step": 585, |
| "token_acc": 0.8280596690740123 |
| }, |
| { |
| "epoch": 1.69364161849711, |
| "grad_norm": 0.5045181308055782, |
| "learning_rate": 4.3486852873341675e-07, |
| "loss": 0.4322221279144287, |
| "step": 586, |
| "token_acc": 0.8542273580630543 |
| }, |
| { |
| "epoch": 1.6965317919075145, |
| "grad_norm": 0.5368324019397285, |
| "learning_rate": 4.3328933631855195e-07, |
| "loss": 0.5392330884933472, |
| "step": 587, |
| "token_acc": 0.8167310479753804 |
| }, |
| { |
| "epoch": 1.699421965317919, |
| "grad_norm": 0.6325523087901944, |
| "learning_rate": 4.317108211408933e-07, |
| "loss": 0.5353363752365112, |
| "step": 588, |
| "token_acc": 0.8181194354468216 |
| }, |
| { |
| "epoch": 1.7023121387283235, |
| "grad_norm": 0.5524128184191415, |
| "learning_rate": 4.301329992253034e-07, |
| "loss": 0.49616819620132446, |
| "step": 589, |
| "token_acc": 0.8328951746002753 |
| }, |
| { |
| "epoch": 1.7052023121387283, |
| "grad_norm": 0.5034001899067154, |
| "learning_rate": 4.285558865896065e-07, |
| "loss": 0.60711270570755, |
| "step": 590, |
| "token_acc": 0.79853336934882 |
| }, |
| { |
| "epoch": 1.708092485549133, |
| "grad_norm": 0.5374954529356852, |
| "learning_rate": 4.2697949924442667e-07, |
| "loss": 0.5293912291526794, |
| "step": 591, |
| "token_acc": 0.823666171683991 |
| }, |
| { |
| "epoch": 1.7109826589595376, |
| "grad_norm": 0.5635901606786159, |
| "learning_rate": 4.2540385319302524e-07, |
| "loss": 0.5353492498397827, |
| "step": 592, |
| "token_acc": 0.8201790482173709 |
| }, |
| { |
| "epoch": 1.7138728323699421, |
| "grad_norm": 0.5253802438717141, |
| "learning_rate": 4.2382896443113723e-07, |
| "loss": 0.5334903001785278, |
| "step": 593, |
| "token_acc": 0.818032814303156 |
| }, |
| { |
| "epoch": 1.7167630057803467, |
| "grad_norm": 0.4950360437778214, |
| "learning_rate": 4.222548489468105e-07, |
| "loss": 0.5341077446937561, |
| "step": 594, |
| "token_acc": 0.8223698601883738 |
| }, |
| { |
| "epoch": 1.7196531791907514, |
| "grad_norm": 0.5514023397940045, |
| "learning_rate": 4.2068152272024233e-07, |
| "loss": 0.5363609194755554, |
| "step": 595, |
| "token_acc": 0.8196168676738834 |
| }, |
| { |
| "epoch": 1.7225433526011562, |
| "grad_norm": 0.5623269464968738, |
| "learning_rate": 4.1910900172361763e-07, |
| "loss": 0.5504116415977478, |
| "step": 596, |
| "token_acc": 0.8151576025420944 |
| }, |
| { |
| "epoch": 1.7254335260115607, |
| "grad_norm": 0.5274551240137945, |
| "learning_rate": 4.175373019209468e-07, |
| "loss": 0.5549143552780151, |
| "step": 597, |
| "token_acc": 0.8107931600579981 |
| }, |
| { |
| "epoch": 1.7283236994219653, |
| "grad_norm": 0.5704477484512106, |
| "learning_rate": 4.159664392679038e-07, |
| "loss": 0.5494258403778076, |
| "step": 598, |
| "token_acc": 0.8168460618486246 |
| }, |
| { |
| "epoch": 1.7312138728323698, |
| "grad_norm": 0.6161778636830428, |
| "learning_rate": 4.143964297116633e-07, |
| "loss": 0.5577751994132996, |
| "step": 599, |
| "token_acc": 0.8121810843728358 |
| }, |
| { |
| "epoch": 1.7341040462427746, |
| "grad_norm": 0.6075742333688984, |
| "learning_rate": 4.1282728919074005e-07, |
| "loss": 0.5403814315795898, |
| "step": 600, |
| "token_acc": 0.821105101452986 |
| }, |
| { |
| "epoch": 1.7369942196531793, |
| "grad_norm": 0.6520533036933062, |
| "learning_rate": 4.1125903363482634e-07, |
| "loss": 0.47892940044403076, |
| "step": 601, |
| "token_acc": 0.8369930163846361 |
| }, |
| { |
| "epoch": 1.739884393063584, |
| "grad_norm": 0.5680876440782588, |
| "learning_rate": 4.0969167896463046e-07, |
| "loss": 0.5336910486221313, |
| "step": 602, |
| "token_acc": 0.8216713342322719 |
| }, |
| { |
| "epoch": 1.7427745664739884, |
| "grad_norm": 0.7080634828510891, |
| "learning_rate": 4.0812524109171475e-07, |
| "loss": 0.524694561958313, |
| "step": 603, |
| "token_acc": 0.8261413383364603 |
| }, |
| { |
| "epoch": 1.745664739884393, |
| "grad_norm": 0.528594204710658, |
| "learning_rate": 4.0655973591833475e-07, |
| "loss": 0.5086634755134583, |
| "step": 604, |
| "token_acc": 0.8286352131054758 |
| }, |
| { |
| "epoch": 1.7485549132947977, |
| "grad_norm": 0.6260551904964319, |
| "learning_rate": 4.0499517933727727e-07, |
| "loss": 0.48479533195495605, |
| "step": 605, |
| "token_acc": 0.8348625638530771 |
| }, |
| { |
| "epoch": 1.7514450867052023, |
| "grad_norm": 0.5425421161730628, |
| "learning_rate": 4.034315872316987e-07, |
| "loss": 0.5817371606826782, |
| "step": 606, |
| "token_acc": 0.8068743095851797 |
| }, |
| { |
| "epoch": 1.754335260115607, |
| "grad_norm": 0.5183265889747526, |
| "learning_rate": 4.018689754749648e-07, |
| "loss": 0.508335292339325, |
| "step": 607, |
| "token_acc": 0.8271757714886951 |
| }, |
| { |
| "epoch": 1.7572254335260116, |
| "grad_norm": 0.5542866259664111, |
| "learning_rate": 4.0030735993048884e-07, |
| "loss": 0.5586389899253845, |
| "step": 608, |
| "token_acc": 0.8166898202884842 |
| }, |
| { |
| "epoch": 1.760115606936416, |
| "grad_norm": 0.5411864859640132, |
| "learning_rate": 3.987467564515703e-07, |
| "loss": 0.4601624608039856, |
| "step": 609, |
| "token_acc": 0.84508010404543 |
| }, |
| { |
| "epoch": 1.7630057803468207, |
| "grad_norm": 0.524886018198833, |
| "learning_rate": 3.971871808812347e-07, |
| "loss": 0.6006595492362976, |
| "step": 610, |
| "token_acc": 0.8011782786885245 |
| }, |
| { |
| "epoch": 1.7658959537572254, |
| "grad_norm": 0.6317327126827325, |
| "learning_rate": 3.956286490520724e-07, |
| "loss": 0.509284496307373, |
| "step": 611, |
| "token_acc": 0.8325460029684483 |
| }, |
| { |
| "epoch": 1.7687861271676302, |
| "grad_norm": 0.5390581631300952, |
| "learning_rate": 3.9407117678607756e-07, |
| "loss": 0.4938768744468689, |
| "step": 612, |
| "token_acc": 0.8321855607688815 |
| }, |
| { |
| "epoch": 1.7716763005780347, |
| "grad_norm": 0.6560783619375582, |
| "learning_rate": 3.9251477989448795e-07, |
| "loss": 0.517693042755127, |
| "step": 613, |
| "token_acc": 0.8247808891627084 |
| }, |
| { |
| "epoch": 1.7745664739884393, |
| "grad_norm": 0.5602632255167417, |
| "learning_rate": 3.909594741776246e-07, |
| "loss": 0.5566587448120117, |
| "step": 614, |
| "token_acc": 0.812049268832398 |
| }, |
| { |
| "epoch": 1.7774566473988438, |
| "grad_norm": 0.5947561408697656, |
| "learning_rate": 3.8940527542473033e-07, |
| "loss": 0.5609596967697144, |
| "step": 615, |
| "token_acc": 0.8135071333264908 |
| }, |
| { |
| "epoch": 1.7803468208092486, |
| "grad_norm": 0.5666442289982523, |
| "learning_rate": 3.8785219941381096e-07, |
| "loss": 0.5130019187927246, |
| "step": 616, |
| "token_acc": 0.8260872845234054 |
| }, |
| { |
| "epoch": 1.7832369942196533, |
| "grad_norm": 0.5455613722107414, |
| "learning_rate": 3.8630026191147405e-07, |
| "loss": 0.5589362978935242, |
| "step": 617, |
| "token_acc": 0.812414640315063 |
| }, |
| { |
| "epoch": 1.7861271676300579, |
| "grad_norm": 0.550217294387885, |
| "learning_rate": 3.8474947867276943e-07, |
| "loss": 0.5442770719528198, |
| "step": 618, |
| "token_acc": 0.8159889681462442 |
| }, |
| { |
| "epoch": 1.7890173410404624, |
| "grad_norm": 0.6147473096977814, |
| "learning_rate": 3.8319986544102843e-07, |
| "loss": 0.5019974708557129, |
| "step": 619, |
| "token_acc": 0.8287660341354818 |
| }, |
| { |
| "epoch": 1.791907514450867, |
| "grad_norm": 0.5247209374319454, |
| "learning_rate": 3.8165143794770536e-07, |
| "loss": 0.5381553769111633, |
| "step": 620, |
| "token_acc": 0.8177024482109227 |
| }, |
| { |
| "epoch": 1.7947976878612717, |
| "grad_norm": 0.5828193451002669, |
| "learning_rate": 3.8010421191221684e-07, |
| "loss": 0.523591160774231, |
| "step": 621, |
| "token_acc": 0.8240329148286393 |
| }, |
| { |
| "epoch": 1.7976878612716765, |
| "grad_norm": 0.6015955817395803, |
| "learning_rate": 3.78558203041782e-07, |
| "loss": 0.539184033870697, |
| "step": 622, |
| "token_acc": 0.8198696606927818 |
| }, |
| { |
| "epoch": 1.800578034682081, |
| "grad_norm": 0.6008612726420935, |
| "learning_rate": 3.7701342703126394e-07, |
| "loss": 0.48327842354774475, |
| "step": 623, |
| "token_acc": 0.8381134839691216 |
| }, |
| { |
| "epoch": 1.8034682080924855, |
| "grad_norm": 0.6147376285603221, |
| "learning_rate": 3.754698995630101e-07, |
| "loss": 0.5317155122756958, |
| "step": 624, |
| "token_acc": 0.8217411222039681 |
| }, |
| { |
| "epoch": 1.80635838150289, |
| "grad_norm": 0.6052477258361706, |
| "learning_rate": 3.7392763630669243e-07, |
| "loss": 0.5276878476142883, |
| "step": 625, |
| "token_acc": 0.8253162139403252 |
| }, |
| { |
| "epoch": 1.8092485549132948, |
| "grad_norm": 0.6010435836572232, |
| "learning_rate": 3.7238665291914906e-07, |
| "loss": 0.5263775587081909, |
| "step": 626, |
| "token_acc": 0.8255283062505889 |
| }, |
| { |
| "epoch": 1.8121387283236994, |
| "grad_norm": 0.46459212133429395, |
| "learning_rate": 3.7084696504422525e-07, |
| "loss": 0.547301173210144, |
| "step": 627, |
| "token_acc": 0.8155224935354174 |
| }, |
| { |
| "epoch": 1.8150289017341041, |
| "grad_norm": 0.567681963556663, |
| "learning_rate": 3.693085883126137e-07, |
| "loss": 0.504138708114624, |
| "step": 628, |
| "token_acc": 0.8300083822296731 |
| }, |
| { |
| "epoch": 1.8179190751445087, |
| "grad_norm": 0.5584446222303159, |
| "learning_rate": 3.6777153834169726e-07, |
| "loss": 0.5485329031944275, |
| "step": 629, |
| "token_acc": 0.8132374537904492 |
| }, |
| { |
| "epoch": 1.8208092485549132, |
| "grad_norm": 0.5610791187838037, |
| "learning_rate": 3.6623583073538965e-07, |
| "loss": 0.5641239881515503, |
| "step": 630, |
| "token_acc": 0.8092657184953543 |
| }, |
| { |
| "epoch": 1.8236994219653178, |
| "grad_norm": 0.5571741993799751, |
| "learning_rate": 3.647014810839766e-07, |
| "loss": 0.5435695648193359, |
| "step": 631, |
| "token_acc": 0.8177736577401747 |
| }, |
| { |
| "epoch": 1.8265895953757225, |
| "grad_norm": 0.49451328689884416, |
| "learning_rate": 3.6316850496395855e-07, |
| "loss": 0.5079208612442017, |
| "step": 632, |
| "token_acc": 0.8277710403419788 |
| }, |
| { |
| "epoch": 1.8294797687861273, |
| "grad_norm": 0.731312278004029, |
| "learning_rate": 3.6163691793789183e-07, |
| "loss": 0.5612790584564209, |
| "step": 633, |
| "token_acc": 0.8145309625996321 |
| }, |
| { |
| "epoch": 1.8323699421965318, |
| "grad_norm": 0.5433070122384833, |
| "learning_rate": 3.6010673555423116e-07, |
| "loss": 0.5702831149101257, |
| "step": 634, |
| "token_acc": 0.8084171358992268 |
| }, |
| { |
| "epoch": 1.8352601156069364, |
| "grad_norm": 0.5731111882216399, |
| "learning_rate": 3.585779733471709e-07, |
| "loss": 0.5208647847175598, |
| "step": 635, |
| "token_acc": 0.8247836812568473 |
| }, |
| { |
| "epoch": 1.838150289017341, |
| "grad_norm": 0.5863236667781423, |
| "learning_rate": 3.5705064683648855e-07, |
| "loss": 0.5619288682937622, |
| "step": 636, |
| "token_acc": 0.8113308744654901 |
| }, |
| { |
| "epoch": 1.8410404624277457, |
| "grad_norm": 0.5914772914689451, |
| "learning_rate": 3.555247715273867e-07, |
| "loss": 0.49036872386932373, |
| "step": 637, |
| "token_acc": 0.8374078180826161 |
| }, |
| { |
| "epoch": 1.8439306358381504, |
| "grad_norm": 0.5295217861583622, |
| "learning_rate": 3.5400036291033485e-07, |
| "loss": 0.5192829966545105, |
| "step": 638, |
| "token_acc": 0.8258416465326863 |
| }, |
| { |
| "epoch": 1.846820809248555, |
| "grad_norm": 0.5366095434473555, |
| "learning_rate": 3.5247743646091367e-07, |
| "loss": 0.48854076862335205, |
| "step": 639, |
| "token_acc": 0.8355026160864565 |
| }, |
| { |
| "epoch": 1.8497109826589595, |
| "grad_norm": 0.552265227323895, |
| "learning_rate": 3.509560076396567e-07, |
| "loss": 0.5541850924491882, |
| "step": 640, |
| "token_acc": 0.8161763703067251 |
| }, |
| { |
| "epoch": 1.852601156069364, |
| "grad_norm": 0.5766930712255567, |
| "learning_rate": 3.4943609189189345e-07, |
| "loss": 0.49490103125572205, |
| "step": 641, |
| "token_acc": 0.8331491368709432 |
| }, |
| { |
| "epoch": 1.8554913294797688, |
| "grad_norm": 0.535142297976956, |
| "learning_rate": 3.4791770464759347e-07, |
| "loss": 0.4898555278778076, |
| "step": 642, |
| "token_acc": 0.8374039851247991 |
| }, |
| { |
| "epoch": 1.8583815028901736, |
| "grad_norm": 0.6183254820329128, |
| "learning_rate": 3.4640086132120906e-07, |
| "loss": 0.5269954800605774, |
| "step": 643, |
| "token_acc": 0.8234169800850853 |
| }, |
| { |
| "epoch": 1.861271676300578, |
| "grad_norm": 0.5689322137373185, |
| "learning_rate": 3.4488557731151845e-07, |
| "loss": 0.5776628851890564, |
| "step": 644, |
| "token_acc": 0.8088350364511105 |
| }, |
| { |
| "epoch": 1.8641618497109826, |
| "grad_norm": 0.6658391987358445, |
| "learning_rate": 3.433718680014705e-07, |
| "loss": 0.5674536228179932, |
| "step": 645, |
| "token_acc": 0.8111267784268523 |
| }, |
| { |
| "epoch": 1.8670520231213872, |
| "grad_norm": 0.5702895217250429, |
| "learning_rate": 3.418597487580277e-07, |
| "loss": 0.5942685008049011, |
| "step": 646, |
| "token_acc": 0.8022179198440608 |
| }, |
| { |
| "epoch": 1.869942196531792, |
| "grad_norm": 0.5309534408388851, |
| "learning_rate": 3.4034923493201007e-07, |
| "loss": 0.5299490690231323, |
| "step": 647, |
| "token_acc": 0.821584668833352 |
| }, |
| { |
| "epoch": 1.8728323699421965, |
| "grad_norm": 0.5410494679792496, |
| "learning_rate": 3.388403418579401e-07, |
| "loss": 0.606309175491333, |
| "step": 648, |
| "token_acc": 0.798714223159906 |
| }, |
| { |
| "epoch": 1.8757225433526012, |
| "grad_norm": 0.5885088182247251, |
| "learning_rate": 3.3733308485388654e-07, |
| "loss": 0.5152050256729126, |
| "step": 649, |
| "token_acc": 0.8267703435171321 |
| }, |
| { |
| "epoch": 1.8786127167630058, |
| "grad_norm": 0.5654387308838804, |
| "learning_rate": 3.3582747922130903e-07, |
| "loss": 0.5702789425849915, |
| "step": 650, |
| "token_acc": 0.8114149857200532 |
| }, |
| { |
| "epoch": 1.8815028901734103, |
| "grad_norm": 0.5850200396224108, |
| "learning_rate": 3.343235402449025e-07, |
| "loss": 0.5715373754501343, |
| "step": 651, |
| "token_acc": 0.809812202628705 |
| }, |
| { |
| "epoch": 1.8843930635838149, |
| "grad_norm": 0.556702805056612, |
| "learning_rate": 3.3282128319244237e-07, |
| "loss": 0.5341757535934448, |
| "step": 652, |
| "token_acc": 0.8190304033783219 |
| }, |
| { |
| "epoch": 1.8872832369942196, |
| "grad_norm": 0.5947101357097584, |
| "learning_rate": 3.313207233146296e-07, |
| "loss": 0.5120434165000916, |
| "step": 653, |
| "token_acc": 0.8284752116658459 |
| }, |
| { |
| "epoch": 1.8901734104046244, |
| "grad_norm": 0.582059481324802, |
| "learning_rate": 3.2982187584493516e-07, |
| "loss": 0.55910724401474, |
| "step": 654, |
| "token_acc": 0.8136601394849785 |
| }, |
| { |
| "epoch": 1.893063583815029, |
| "grad_norm": 0.5455003297751219, |
| "learning_rate": 3.283247559994463e-07, |
| "loss": 0.4808557629585266, |
| "step": 655, |
| "token_acc": 0.8359401998347231 |
| }, |
| { |
| "epoch": 1.8959537572254335, |
| "grad_norm": 0.5917330827702398, |
| "learning_rate": 3.268293789767118e-07, |
| "loss": 0.5275037288665771, |
| "step": 656, |
| "token_acc": 0.8203649654462709 |
| }, |
| { |
| "epoch": 1.898843930635838, |
| "grad_norm": 0.604537834207858, |
| "learning_rate": 3.2533575995758694e-07, |
| "loss": 0.536374568939209, |
| "step": 657, |
| "token_acc": 0.8204949969817257 |
| }, |
| { |
| "epoch": 1.9017341040462428, |
| "grad_norm": 0.4877298329861977, |
| "learning_rate": 3.2384391410508066e-07, |
| "loss": 0.5517327785491943, |
| "step": 658, |
| "token_acc": 0.8144875608045037 |
| }, |
| { |
| "epoch": 1.9046242774566475, |
| "grad_norm": 0.5138107466063505, |
| "learning_rate": 3.223538565642009e-07, |
| "loss": 0.5936318635940552, |
| "step": 659, |
| "token_acc": 0.8033954818487206 |
| }, |
| { |
| "epoch": 1.907514450867052, |
| "grad_norm": 0.6408117816293808, |
| "learning_rate": 3.2086560246180016e-07, |
| "loss": 0.5199168920516968, |
| "step": 660, |
| "token_acc": 0.823338105590611 |
| }, |
| { |
| "epoch": 1.9104046242774566, |
| "grad_norm": 0.6769271622378699, |
| "learning_rate": 3.1937916690642355e-07, |
| "loss": 0.5296117067337036, |
| "step": 661, |
| "token_acc": 0.8234518795819685 |
| }, |
| { |
| "epoch": 1.9132947976878611, |
| "grad_norm": 0.5205148500482691, |
| "learning_rate": 3.178945649881543e-07, |
| "loss": 0.4881097674369812, |
| "step": 662, |
| "token_acc": 0.8381457544657637 |
| }, |
| { |
| "epoch": 1.916184971098266, |
| "grad_norm": 0.533469943639252, |
| "learning_rate": 3.1641181177846046e-07, |
| "loss": 0.5646488666534424, |
| "step": 663, |
| "token_acc": 0.8092274601183008 |
| }, |
| { |
| "epoch": 1.9190751445086707, |
| "grad_norm": 0.5079029266136241, |
| "learning_rate": 3.1493092233004277e-07, |
| "loss": 0.565247654914856, |
| "step": 664, |
| "token_acc": 0.8091681448977687 |
| }, |
| { |
| "epoch": 1.9219653179190752, |
| "grad_norm": 0.5846146749149876, |
| "learning_rate": 3.1345191167668106e-07, |
| "loss": 0.46707916259765625, |
| "step": 665, |
| "token_acc": 0.8448507638926736 |
| }, |
| { |
| "epoch": 1.9248554913294798, |
| "grad_norm": 0.6115493897752081, |
| "learning_rate": 3.119747948330821e-07, |
| "loss": 0.49020782113075256, |
| "step": 666, |
| "token_acc": 0.8343801519151217 |
| }, |
| { |
| "epoch": 1.9277456647398843, |
| "grad_norm": 0.5665579491864339, |
| "learning_rate": 3.1049958679472645e-07, |
| "loss": 0.4773547649383545, |
| "step": 667, |
| "token_acc": 0.840464602970709 |
| }, |
| { |
| "epoch": 1.930635838150289, |
| "grad_norm": 0.5428950150023341, |
| "learning_rate": 3.0902630253771725e-07, |
| "loss": 0.5331814885139465, |
| "step": 668, |
| "token_acc": 0.8203493165709791 |
| }, |
| { |
| "epoch": 1.9335260115606936, |
| "grad_norm": 0.535673154611531, |
| "learning_rate": 3.0755495701862785e-07, |
| "loss": 0.5440840125083923, |
| "step": 669, |
| "token_acc": 0.8188541358240693 |
| }, |
| { |
| "epoch": 1.9364161849710984, |
| "grad_norm": 0.4836434667966126, |
| "learning_rate": 3.06085565174349e-07, |
| "loss": 0.5037864446640015, |
| "step": 670, |
| "token_acc": 0.8303648820337454 |
| }, |
| { |
| "epoch": 1.939306358381503, |
| "grad_norm": 0.6272828775317285, |
| "learning_rate": 3.046181419219386e-07, |
| "loss": 0.5913348197937012, |
| "step": 671, |
| "token_acc": 0.804053529366086 |
| }, |
| { |
| "epoch": 1.9421965317919074, |
| "grad_norm": 0.47821443556435045, |
| "learning_rate": 3.031527021584701e-07, |
| "loss": 0.5496195554733276, |
| "step": 672, |
| "token_acc": 0.8131932821607896 |
| }, |
| { |
| "epoch": 1.9450867052023122, |
| "grad_norm": 0.5368717641927174, |
| "learning_rate": 3.0168926076087986e-07, |
| "loss": 0.5248396396636963, |
| "step": 673, |
| "token_acc": 0.8238304421235078 |
| }, |
| { |
| "epoch": 1.9479768786127167, |
| "grad_norm": 0.5546004209488442, |
| "learning_rate": 3.002278325858177e-07, |
| "loss": 0.5503116846084595, |
| "step": 674, |
| "token_acc": 0.8154341018265293 |
| }, |
| { |
| "epoch": 1.9508670520231215, |
| "grad_norm": 0.5406553961850177, |
| "learning_rate": 2.987684324694957e-07, |
| "loss": 0.5093920230865479, |
| "step": 675, |
| "token_acc": 0.8285504848168147 |
| }, |
| { |
| "epoch": 1.953757225433526, |
| "grad_norm": 0.5070602927484339, |
| "learning_rate": 2.9731107522753654e-07, |
| "loss": 0.6153904795646667, |
| "step": 676, |
| "token_acc": 0.7934051997463538 |
| }, |
| { |
| "epoch": 1.9566473988439306, |
| "grad_norm": 0.6200327187024355, |
| "learning_rate": 2.9585577565482484e-07, |
| "loss": 0.49602842330932617, |
| "step": 677, |
| "token_acc": 0.8349261185482811 |
| }, |
| { |
| "epoch": 1.9595375722543351, |
| "grad_norm": 0.5432813085052021, |
| "learning_rate": 2.944025485253557e-07, |
| "loss": 0.5533842444419861, |
| "step": 678, |
| "token_acc": 0.8136697934557625 |
| }, |
| { |
| "epoch": 1.9624277456647399, |
| "grad_norm": 0.5655183170978749, |
| "learning_rate": 2.929514085920848e-07, |
| "loss": 0.5408231019973755, |
| "step": 679, |
| "token_acc": 0.8149668765846079 |
| }, |
| { |
| "epoch": 1.9653179190751446, |
| "grad_norm": 0.5348380476951098, |
| "learning_rate": 2.915023705867793e-07, |
| "loss": 0.5112613439559937, |
| "step": 680, |
| "token_acc": 0.8288466633304877 |
| }, |
| { |
| "epoch": 1.9682080924855492, |
| "grad_norm": 0.5587948082197168, |
| "learning_rate": 2.900554492198677e-07, |
| "loss": 0.5132273435592651, |
| "step": 681, |
| "token_acc": 0.8262983388869136 |
| }, |
| { |
| "epoch": 1.9710982658959537, |
| "grad_norm": 0.6468264753422917, |
| "learning_rate": 2.886106591802908e-07, |
| "loss": 0.49628451466560364, |
| "step": 682, |
| "token_acc": 0.8309623989848394 |
| }, |
| { |
| "epoch": 1.9739884393063583, |
| "grad_norm": 0.8088000703258003, |
| "learning_rate": 2.871680151353523e-07, |
| "loss": 0.566349983215332, |
| "step": 683, |
| "token_acc": 0.813486073930626 |
| }, |
| { |
| "epoch": 1.976878612716763, |
| "grad_norm": 0.5639785659667156, |
| "learning_rate": 2.8572753173057e-07, |
| "loss": 0.5700632333755493, |
| "step": 684, |
| "token_acc": 0.8086862859910506 |
| }, |
| { |
| "epoch": 1.9797687861271678, |
| "grad_norm": 0.5543121051930197, |
| "learning_rate": 2.842892235895279e-07, |
| "loss": 0.5271592140197754, |
| "step": 685, |
| "token_acc": 0.8250378942459045 |
| }, |
| { |
| "epoch": 1.9826589595375723, |
| "grad_norm": 0.5567574729556525, |
| "learning_rate": 2.828531053137257e-07, |
| "loss": 0.528691828250885, |
| "step": 686, |
| "token_acc": 0.8240472063720813 |
| }, |
| { |
| "epoch": 1.9855491329479769, |
| "grad_norm": 0.582442051806669, |
| "learning_rate": 2.814191914824332e-07, |
| "loss": 0.5287505388259888, |
| "step": 687, |
| "token_acc": 0.821006600414202 |
| }, |
| { |
| "epoch": 1.9884393063583814, |
| "grad_norm": 0.5452501250540314, |
| "learning_rate": 2.799874966525403e-07, |
| "loss": 0.5334792733192444, |
| "step": 688, |
| "token_acc": 0.8213241825401043 |
| }, |
| { |
| "epoch": 1.9913294797687862, |
| "grad_norm": 0.5482828728372189, |
| "learning_rate": 2.785580353584099e-07, |
| "loss": 0.5632658004760742, |
| "step": 689, |
| "token_acc": 0.8116547561426986 |
| }, |
| { |
| "epoch": 1.9942196531791907, |
| "grad_norm": 0.633529877080459, |
| "learning_rate": 2.771308221117309e-07, |
| "loss": 0.516349196434021, |
| "step": 690, |
| "token_acc": 0.8251189141964578 |
| }, |
| { |
| "epoch": 1.9971098265895955, |
| "grad_norm": 0.5330351124089759, |
| "learning_rate": 2.757058714013697e-07, |
| "loss": 0.5631735324859619, |
| "step": 691, |
| "token_acc": 0.8110226467289205 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5696997466472099, |
| "learning_rate": 2.7428319769322415e-07, |
| "loss": 0.5440479516983032, |
| "step": 692, |
| "token_acc": 0.8158318122461348 |
| }, |
| { |
| "epoch": 2.0028901734104045, |
| "grad_norm": 0.5585685445254689, |
| "learning_rate": 2.7286281543007597e-07, |
| "loss": 0.5391400456428528, |
| "step": 693, |
| "token_acc": 0.8175343274767459 |
| }, |
| { |
| "epoch": 2.005780346820809, |
| "grad_norm": 0.4706256621473158, |
| "learning_rate": 2.714447390314449e-07, |
| "loss": 0.5360602140426636, |
| "step": 694, |
| "token_acc": 0.8195729923051913 |
| }, |
| { |
| "epoch": 2.008670520231214, |
| "grad_norm": 0.4975918712102163, |
| "learning_rate": 2.700289828934416e-07, |
| "loss": 0.5223442316055298, |
| "step": 695, |
| "token_acc": 0.8266022386843656 |
| }, |
| { |
| "epoch": 2.0115606936416186, |
| "grad_norm": 0.6855664652178536, |
| "learning_rate": 2.686155613886215e-07, |
| "loss": 0.5413398146629333, |
| "step": 696, |
| "token_acc": 0.8206837181461728 |
| }, |
| { |
| "epoch": 2.014450867052023, |
| "grad_norm": 0.48324739879314504, |
| "learning_rate": 2.672044888658399e-07, |
| "loss": 0.5646222829818726, |
| "step": 697, |
| "token_acc": 0.8079876543209876 |
| }, |
| { |
| "epoch": 2.0173410404624277, |
| "grad_norm": 0.5416524165161476, |
| "learning_rate": 2.65795779650105e-07, |
| "loss": 0.5677503347396851, |
| "step": 698, |
| "token_acc": 0.8107366402887164 |
| }, |
| { |
| "epoch": 2.020231213872832, |
| "grad_norm": 0.5180032228711846, |
| "learning_rate": 2.64389448042433e-07, |
| "loss": 0.5446953773498535, |
| "step": 699, |
| "token_acc": 0.8148853386782998 |
| }, |
| { |
| "epoch": 2.023121387283237, |
| "grad_norm": 0.5242926098982621, |
| "learning_rate": 2.6298550831970307e-07, |
| "loss": 0.5251763463020325, |
| "step": 700, |
| "token_acc": 0.8224519443333264 |
| }, |
| { |
| "epoch": 2.0260115606936417, |
| "grad_norm": 0.52590432100961, |
| "learning_rate": 2.615839747345127e-07, |
| "loss": 0.5811551809310913, |
| "step": 701, |
| "token_acc": 0.8070368200019533 |
| }, |
| { |
| "epoch": 2.0289017341040463, |
| "grad_norm": 0.5346477392780163, |
| "learning_rate": 2.6018486151503213e-07, |
| "loss": 0.5263258218765259, |
| "step": 702, |
| "token_acc": 0.8226229312836096 |
| }, |
| { |
| "epoch": 2.031791907514451, |
| "grad_norm": 0.6702369614403866, |
| "learning_rate": 2.5878818286486026e-07, |
| "loss": 0.4835773706436157, |
| "step": 703, |
| "token_acc": 0.8352293317787196 |
| }, |
| { |
| "epoch": 2.0346820809248554, |
| "grad_norm": 0.5810005206971598, |
| "learning_rate": 2.573939529628816e-07, |
| "loss": 0.5316369533538818, |
| "step": 704, |
| "token_acc": 0.8213102951763859 |
| }, |
| { |
| "epoch": 2.03757225433526, |
| "grad_norm": 0.5814408850367526, |
| "learning_rate": 2.560021859631196e-07, |
| "loss": 0.531090259552002, |
| "step": 705, |
| "token_acc": 0.8247005161281525 |
| }, |
| { |
| "epoch": 2.040462427745665, |
| "grad_norm": 0.5620278975131617, |
| "learning_rate": 2.5461289599459646e-07, |
| "loss": 0.4695814847946167, |
| "step": 706, |
| "token_acc": 0.8385467145834584 |
| }, |
| { |
| "epoch": 2.0433526011560694, |
| "grad_norm": 0.5109837854766828, |
| "learning_rate": 2.532260971611867e-07, |
| "loss": 0.5594449043273926, |
| "step": 707, |
| "token_acc": 0.8109966953664819 |
| }, |
| { |
| "epoch": 2.046242774566474, |
| "grad_norm": 0.5657246379091214, |
| "learning_rate": 2.5184180354147554e-07, |
| "loss": 0.520602285861969, |
| "step": 708, |
| "token_acc": 0.8247487538513655 |
| }, |
| { |
| "epoch": 2.0491329479768785, |
| "grad_norm": 0.4918673470663886, |
| "learning_rate": 2.5046002918861606e-07, |
| "loss": 0.5579814910888672, |
| "step": 709, |
| "token_acc": 0.8135782994649099 |
| }, |
| { |
| "epoch": 2.052023121387283, |
| "grad_norm": 0.48477796977022586, |
| "learning_rate": 2.490807881301855e-07, |
| "loss": 0.5919597744941711, |
| "step": 710, |
| "token_acc": 0.8019583967529172 |
| }, |
| { |
| "epoch": 2.054913294797688, |
| "grad_norm": 0.6496075635378676, |
| "learning_rate": 2.477040943680436e-07, |
| "loss": 0.48429036140441895, |
| "step": 711, |
| "token_acc": 0.8355824403733149 |
| }, |
| { |
| "epoch": 2.0578034682080926, |
| "grad_norm": 0.5519540209458493, |
| "learning_rate": 2.4632996187819034e-07, |
| "loss": 0.506065309047699, |
| "step": 712, |
| "token_acc": 0.8278258846453057 |
| }, |
| { |
| "epoch": 2.060693641618497, |
| "grad_norm": 0.5287310217228682, |
| "learning_rate": 2.4495840461062433e-07, |
| "loss": 0.5793042778968811, |
| "step": 713, |
| "token_acc": 0.8061971483241775 |
| }, |
| { |
| "epoch": 2.0635838150289016, |
| "grad_norm": 0.5904419866749646, |
| "learning_rate": 2.435894364892005e-07, |
| "loss": 0.573466420173645, |
| "step": 714, |
| "token_acc": 0.8098105997674032 |
| }, |
| { |
| "epoch": 2.066473988439306, |
| "grad_norm": 0.6225416912989975, |
| "learning_rate": 2.4222307141148906e-07, |
| "loss": 0.48143109679222107, |
| "step": 715, |
| "token_acc": 0.836179983151357 |
| }, |
| { |
| "epoch": 2.069364161849711, |
| "grad_norm": 0.5109219477999456, |
| "learning_rate": 2.4085932324863507e-07, |
| "loss": 0.544453501701355, |
| "step": 716, |
| "token_acc": 0.8168550972356652 |
| }, |
| { |
| "epoch": 2.0722543352601157, |
| "grad_norm": 0.544868652560984, |
| "learning_rate": 2.394982058452165e-07, |
| "loss": 0.550638735294342, |
| "step": 717, |
| "token_acc": 0.813385770281816 |
| }, |
| { |
| "epoch": 2.0751445086705202, |
| "grad_norm": 0.5334855839219953, |
| "learning_rate": 2.3813973301910427e-07, |
| "loss": 0.484441876411438, |
| "step": 718, |
| "token_acc": 0.8346531540424537 |
| }, |
| { |
| "epoch": 2.078034682080925, |
| "grad_norm": 0.5494544655057828, |
| "learning_rate": 2.3678391856132202e-07, |
| "loss": 0.5680737495422363, |
| "step": 719, |
| "token_acc": 0.8124086743334372 |
| }, |
| { |
| "epoch": 2.0809248554913293, |
| "grad_norm": 0.6045748429466216, |
| "learning_rate": 2.3543077623590635e-07, |
| "loss": 0.5128438472747803, |
| "step": 720, |
| "token_acc": 0.8279022575462924 |
| }, |
| { |
| "epoch": 2.0838150289017343, |
| "grad_norm": 0.48256069429990633, |
| "learning_rate": 2.3408031977976623e-07, |
| "loss": 0.5861136317253113, |
| "step": 721, |
| "token_acc": 0.8029797322959706 |
| }, |
| { |
| "epoch": 2.086705202312139, |
| "grad_norm": 0.5653447327029175, |
| "learning_rate": 2.3273256290254402e-07, |
| "loss": 0.537794828414917, |
| "step": 722, |
| "token_acc": 0.8187106929644486 |
| }, |
| { |
| "epoch": 2.0895953757225434, |
| "grad_norm": 0.511608140122125, |
| "learning_rate": 2.3138751928647727e-07, |
| "loss": 0.5536022782325745, |
| "step": 723, |
| "token_acc": 0.8143630972354428 |
| }, |
| { |
| "epoch": 2.092485549132948, |
| "grad_norm": 0.6461334504435571, |
| "learning_rate": 2.3004520258625737e-07, |
| "loss": 0.547166645526886, |
| "step": 724, |
| "token_acc": 0.8144167909990558 |
| }, |
| { |
| "epoch": 2.0953757225433525, |
| "grad_norm": 0.5280363246093879, |
| "learning_rate": 2.2870562642889392e-07, |
| "loss": 0.5407837629318237, |
| "step": 725, |
| "token_acc": 0.81717697615801 |
| }, |
| { |
| "epoch": 2.098265895953757, |
| "grad_norm": 0.5895491785859862, |
| "learning_rate": 2.2736880441357398e-07, |
| "loss": 0.5352712273597717, |
| "step": 726, |
| "token_acc": 0.8206253892344479 |
| }, |
| { |
| "epoch": 2.101156069364162, |
| "grad_norm": 0.510490807616544, |
| "learning_rate": 2.2603475011152517e-07, |
| "loss": 0.5849488973617554, |
| "step": 727, |
| "token_acc": 0.8032212807794704 |
| }, |
| { |
| "epoch": 2.1040462427745665, |
| "grad_norm": 0.5074478903676131, |
| "learning_rate": 2.247034770658781e-07, |
| "loss": 0.5740774869918823, |
| "step": 728, |
| "token_acc": 0.8094154108581142 |
| }, |
| { |
| "epoch": 2.106936416184971, |
| "grad_norm": 0.49465264402350506, |
| "learning_rate": 2.2337499879152772e-07, |
| "loss": 0.5517815351486206, |
| "step": 729, |
| "token_acc": 0.8150811818935997 |
| }, |
| { |
| "epoch": 2.1098265895953756, |
| "grad_norm": 0.5409252325098711, |
| "learning_rate": 2.2204932877499778e-07, |
| "loss": 0.5680674314498901, |
| "step": 730, |
| "token_acc": 0.8076237225087722 |
| }, |
| { |
| "epoch": 2.11271676300578, |
| "grad_norm": 0.5667599272734437, |
| "learning_rate": 2.2072648047430182e-07, |
| "loss": 0.546800971031189, |
| "step": 731, |
| "token_acc": 0.8193202586524828 |
| }, |
| { |
| "epoch": 2.115606936416185, |
| "grad_norm": 0.5820288457006244, |
| "learning_rate": 2.1940646731880885e-07, |
| "loss": 0.5512528419494629, |
| "step": 732, |
| "token_acc": 0.8157494966528321 |
| }, |
| { |
| "epoch": 2.1184971098265897, |
| "grad_norm": 0.4949523232866875, |
| "learning_rate": 2.180893027091052e-07, |
| "loss": 0.5347863435745239, |
| "step": 733, |
| "token_acc": 0.8186724373395966 |
| }, |
| { |
| "epoch": 2.121387283236994, |
| "grad_norm": 0.5570654028702667, |
| "learning_rate": 2.1677500001685946e-07, |
| "loss": 0.5904409289360046, |
| "step": 734, |
| "token_acc": 0.80330335262698 |
| }, |
| { |
| "epoch": 2.1242774566473988, |
| "grad_norm": 0.5169029043729536, |
| "learning_rate": 2.154635725846861e-07, |
| "loss": 0.516341507434845, |
| "step": 735, |
| "token_acc": 0.8256773697978942 |
| }, |
| { |
| "epoch": 2.1271676300578033, |
| "grad_norm": 0.5202271523957221, |
| "learning_rate": 2.1415503372601096e-07, |
| "loss": 0.5516679286956787, |
| "step": 736, |
| "token_acc": 0.8166926940731877 |
| }, |
| { |
| "epoch": 2.1300578034682083, |
| "grad_norm": 0.5270674995884185, |
| "learning_rate": 2.1284939672493506e-07, |
| "loss": 0.5113083124160767, |
| "step": 737, |
| "token_acc": 0.8254448999891605 |
| }, |
| { |
| "epoch": 2.132947976878613, |
| "grad_norm": 0.5738812261029933, |
| "learning_rate": 2.1154667483609994e-07, |
| "loss": 0.5508044958114624, |
| "step": 738, |
| "token_acc": 0.8145577840874766 |
| }, |
| { |
| "epoch": 2.1358381502890174, |
| "grad_norm": 0.5552867531342636, |
| "learning_rate": 2.1024688128455432e-07, |
| "loss": 0.5606477856636047, |
| "step": 739, |
| "token_acc": 0.8107334996977912 |
| }, |
| { |
| "epoch": 2.138728323699422, |
| "grad_norm": 0.6511169378075016, |
| "learning_rate": 2.0895002926561733e-07, |
| "loss": 0.5715325474739075, |
| "step": 740, |
| "token_acc": 0.808644395970687 |
| }, |
| { |
| "epoch": 2.1416184971098264, |
| "grad_norm": 0.5104195470816412, |
| "learning_rate": 2.0765613194474756e-07, |
| "loss": 0.5317230224609375, |
| "step": 741, |
| "token_acc": 0.8196870394179812 |
| }, |
| { |
| "epoch": 2.1445086705202314, |
| "grad_norm": 0.5222197914536979, |
| "learning_rate": 2.0636520245740708e-07, |
| "loss": 0.581384003162384, |
| "step": 742, |
| "token_acc": 0.8044084027512044 |
| }, |
| { |
| "epoch": 2.147398843930636, |
| "grad_norm": 0.5216435736648604, |
| "learning_rate": 2.0507725390892895e-07, |
| "loss": 0.5070130825042725, |
| "step": 743, |
| "token_acc": 0.8285304030472848 |
| }, |
| { |
| "epoch": 2.1502890173410405, |
| "grad_norm": 0.5689993002879171, |
| "learning_rate": 2.0379229937438475e-07, |
| "loss": 0.5079813599586487, |
| "step": 744, |
| "token_acc": 0.8282544832726795 |
| }, |
| { |
| "epoch": 2.153179190751445, |
| "grad_norm": 0.5478897581085619, |
| "learning_rate": 2.0251035189845045e-07, |
| "loss": 0.5614432692527771, |
| "step": 745, |
| "token_acc": 0.8101714880561034 |
| }, |
| { |
| "epoch": 2.1560693641618496, |
| "grad_norm": 0.5625549603262265, |
| "learning_rate": 2.012314244952758e-07, |
| "loss": 0.46915191411972046, |
| "step": 746, |
| "token_acc": 0.8398674842185119 |
| }, |
| { |
| "epoch": 2.1589595375722546, |
| "grad_norm": 0.5888007906160326, |
| "learning_rate": 1.9995553014834986e-07, |
| "loss": 0.5621305704116821, |
| "step": 747, |
| "token_acc": 0.8091583390025296 |
| }, |
| { |
| "epoch": 2.161849710982659, |
| "grad_norm": 0.5611702979006163, |
| "learning_rate": 1.9868268181037184e-07, |
| "loss": 0.5150927901268005, |
| "step": 748, |
| "token_acc": 0.8226671153861205 |
| }, |
| { |
| "epoch": 2.1647398843930636, |
| "grad_norm": 0.5111806577194473, |
| "learning_rate": 1.9741289240311754e-07, |
| "loss": 0.5273150205612183, |
| "step": 749, |
| "token_acc": 0.822871650821089 |
| }, |
| { |
| "epoch": 2.167630057803468, |
| "grad_norm": 0.5196873584862519, |
| "learning_rate": 1.9614617481730882e-07, |
| "loss": 0.5140695571899414, |
| "step": 750, |
| "token_acc": 0.8273383116061258 |
| }, |
| { |
| "epoch": 2.1705202312138727, |
| "grad_norm": 0.5735974858092083, |
| "learning_rate": 1.948825419124837e-07, |
| "loss": 0.5572013854980469, |
| "step": 751, |
| "token_acc": 0.8135551173589466 |
| }, |
| { |
| "epoch": 2.1734104046242773, |
| "grad_norm": 0.5173068836847717, |
| "learning_rate": 1.9362200651686406e-07, |
| "loss": 0.4991053640842438, |
| "step": 752, |
| "token_acc": 0.8299385295624275 |
| }, |
| { |
| "epoch": 2.1763005780346822, |
| "grad_norm": 0.5835529062955169, |
| "learning_rate": 1.9236458142722672e-07, |
| "loss": 0.4967957139015198, |
| "step": 753, |
| "token_acc": 0.8307953955965303 |
| }, |
| { |
| "epoch": 2.179190751445087, |
| "grad_norm": 0.5877111733686488, |
| "learning_rate": 1.9111027940877283e-07, |
| "loss": 0.5488715767860413, |
| "step": 754, |
| "token_acc": 0.8119714508486775 |
| }, |
| { |
| "epoch": 2.1820809248554913, |
| "grad_norm": 0.5937906866706819, |
| "learning_rate": 1.898591131949992e-07, |
| "loss": 0.5290513038635254, |
| "step": 755, |
| "token_acc": 0.8182620202911337 |
| }, |
| { |
| "epoch": 2.184971098265896, |
| "grad_norm": 0.5973610860546952, |
| "learning_rate": 1.8861109548756764e-07, |
| "loss": 0.5482075810432434, |
| "step": 756, |
| "token_acc": 0.8168008865903214 |
| }, |
| { |
| "epoch": 2.1878612716763004, |
| "grad_norm": 0.6092890006866195, |
| "learning_rate": 1.873662389561771e-07, |
| "loss": 0.5488214492797852, |
| "step": 757, |
| "token_acc": 0.8205397467749234 |
| }, |
| { |
| "epoch": 2.1907514450867054, |
| "grad_norm": 0.5100060557982842, |
| "learning_rate": 1.861245562384351e-07, |
| "loss": 0.5582944750785828, |
| "step": 758, |
| "token_acc": 0.8142653999590552 |
| }, |
| { |
| "epoch": 2.19364161849711, |
| "grad_norm": 0.5534172002173429, |
| "learning_rate": 1.8488605993972806e-07, |
| "loss": 0.5284197926521301, |
| "step": 759, |
| "token_acc": 0.8226439546852772 |
| }, |
| { |
| "epoch": 2.1965317919075145, |
| "grad_norm": 0.5676418034969823, |
| "learning_rate": 1.8365076263309542e-07, |
| "loss": 0.5176257491111755, |
| "step": 760, |
| "token_acc": 0.8240463351308168 |
| }, |
| { |
| "epoch": 2.199421965317919, |
| "grad_norm": 0.5273849733875124, |
| "learning_rate": 1.8241867685910007e-07, |
| "loss": 0.5415469408035278, |
| "step": 761, |
| "token_acc": 0.8159108203203757 |
| }, |
| { |
| "epoch": 2.2023121387283235, |
| "grad_norm": 0.5675178250606417, |
| "learning_rate": 1.8118981512570254e-07, |
| "loss": 0.495791494846344, |
| "step": 762, |
| "token_acc": 0.833165862256412 |
| }, |
| { |
| "epoch": 2.2052023121387285, |
| "grad_norm": 0.5356879254901209, |
| "learning_rate": 1.7996418990813293e-07, |
| "loss": 0.5700979828834534, |
| "step": 763, |
| "token_acc": 0.8082553122201417 |
| }, |
| { |
| "epoch": 2.208092485549133, |
| "grad_norm": 0.5440506283017456, |
| "learning_rate": 1.7874181364876462e-07, |
| "loss": 0.5215957164764404, |
| "step": 764, |
| "token_acc": 0.8242129054849903 |
| }, |
| { |
| "epoch": 2.2109826589595376, |
| "grad_norm": 0.48724727796349754, |
| "learning_rate": 1.7752269875698872e-07, |
| "loss": 0.48275503516197205, |
| "step": 765, |
| "token_acc": 0.8372185670308444 |
| }, |
| { |
| "epoch": 2.213872832369942, |
| "grad_norm": 0.6530933074612743, |
| "learning_rate": 1.763068576090862e-07, |
| "loss": 0.5122123956680298, |
| "step": 766, |
| "token_acc": 0.8289117165401221 |
| }, |
| { |
| "epoch": 2.2167630057803467, |
| "grad_norm": 0.5132130783753541, |
| "learning_rate": 1.750943025481046e-07, |
| "loss": 0.5450626611709595, |
| "step": 767, |
| "token_acc": 0.8163703808809519 |
| }, |
| { |
| "epoch": 2.2196531791907512, |
| "grad_norm": 0.5763340107528144, |
| "learning_rate": 1.73885045883731e-07, |
| "loss": 0.5134228467941284, |
| "step": 768, |
| "token_acc": 0.8268736586467864 |
| }, |
| { |
| "epoch": 2.222543352601156, |
| "grad_norm": 0.5678033281126066, |
| "learning_rate": 1.726790998921675e-07, |
| "loss": 0.5369815826416016, |
| "step": 769, |
| "token_acc": 0.8197942785502621 |
| }, |
| { |
| "epoch": 2.2254335260115607, |
| "grad_norm": 0.5494081888054269, |
| "learning_rate": 1.7147647681600735e-07, |
| "loss": 0.583419144153595, |
| "step": 770, |
| "token_acc": 0.8045412637492227 |
| }, |
| { |
| "epoch": 2.2283236994219653, |
| "grad_norm": 0.5002570926978792, |
| "learning_rate": 1.7027718886410948e-07, |
| "loss": 0.5762687921524048, |
| "step": 771, |
| "token_acc": 0.8050788141720897 |
| }, |
| { |
| "epoch": 2.23121387283237, |
| "grad_norm": 0.5621625282852232, |
| "learning_rate": 1.6908124821147517e-07, |
| "loss": 0.5734193325042725, |
| "step": 772, |
| "token_acc": 0.8072726721307747 |
| }, |
| { |
| "epoch": 2.2341040462427744, |
| "grad_norm": 0.5805542620358577, |
| "learning_rate": 1.6788866699912434e-07, |
| "loss": 0.5245779156684875, |
| "step": 773, |
| "token_acc": 0.8224566435530849 |
| }, |
| { |
| "epoch": 2.2369942196531793, |
| "grad_norm": 0.5784351770858037, |
| "learning_rate": 1.6669945733397288e-07, |
| "loss": 0.5163431763648987, |
| "step": 774, |
| "token_acc": 0.8234030645429656 |
| }, |
| { |
| "epoch": 2.239884393063584, |
| "grad_norm": 0.5443607425066719, |
| "learning_rate": 1.6551363128870866e-07, |
| "loss": 0.48509231209754944, |
| "step": 775, |
| "token_acc": 0.8364400070660744 |
| }, |
| { |
| "epoch": 2.2427745664739884, |
| "grad_norm": 0.5838705468342498, |
| "learning_rate": 1.643312009016694e-07, |
| "loss": 0.5485388040542603, |
| "step": 776, |
| "token_acc": 0.814316289454411 |
| }, |
| { |
| "epoch": 2.245664739884393, |
| "grad_norm": 0.5113123373755981, |
| "learning_rate": 1.631521781767214e-07, |
| "loss": 0.5461674928665161, |
| "step": 777, |
| "token_acc": 0.8178670064564116 |
| }, |
| { |
| "epoch": 2.2485549132947975, |
| "grad_norm": 0.5316036267961789, |
| "learning_rate": 1.6197657508313595e-07, |
| "loss": 0.5362288951873779, |
| "step": 778, |
| "token_acc": 0.8175199117906136 |
| }, |
| { |
| "epoch": 2.2514450867052025, |
| "grad_norm": 0.6922569927006882, |
| "learning_rate": 1.608044035554692e-07, |
| "loss": 0.5441286563873291, |
| "step": 779, |
| "token_acc": 0.8158920316612874 |
| }, |
| { |
| "epoch": 2.254335260115607, |
| "grad_norm": 0.6638081905493092, |
| "learning_rate": 1.5963567549344026e-07, |
| "loss": 0.5481600761413574, |
| "step": 780, |
| "token_acc": 0.8147708894878706 |
| }, |
| { |
| "epoch": 2.2572254335260116, |
| "grad_norm": 0.5594541395187226, |
| "learning_rate": 1.5847040276181113e-07, |
| "loss": 0.5381879210472107, |
| "step": 781, |
| "token_acc": 0.8191574437700821 |
| }, |
| { |
| "epoch": 2.260115606936416, |
| "grad_norm": 0.6007103186375023, |
| "learning_rate": 1.5730859719026535e-07, |
| "loss": 0.537074863910675, |
| "step": 782, |
| "token_acc": 0.8190765218606167 |
| }, |
| { |
| "epoch": 2.2630057803468207, |
| "grad_norm": 0.5565956593496582, |
| "learning_rate": 1.561502705732883e-07, |
| "loss": 0.4965110719203949, |
| "step": 783, |
| "token_acc": 0.8309357060849598 |
| }, |
| { |
| "epoch": 2.2658959537572256, |
| "grad_norm": 0.5642893968640419, |
| "learning_rate": 1.5499543467004812e-07, |
| "loss": 0.5519629120826721, |
| "step": 784, |
| "token_acc": 0.8145803817619548 |
| }, |
| { |
| "epoch": 2.26878612716763, |
| "grad_norm": 0.6562655659982366, |
| "learning_rate": 1.538441012042747e-07, |
| "loss": 0.5342061519622803, |
| "step": 785, |
| "token_acc": 0.8214097726480007 |
| }, |
| { |
| "epoch": 2.2716763005780347, |
| "grad_norm": 0.5502255728162866, |
| "learning_rate": 1.526962818641428e-07, |
| "loss": 0.5008838176727295, |
| "step": 786, |
| "token_acc": 0.8290141252177352 |
| }, |
| { |
| "epoch": 2.2745664739884393, |
| "grad_norm": 0.5549954985905744, |
| "learning_rate": 1.5155198830215144e-07, |
| "loss": 0.4954628348350525, |
| "step": 787, |
| "token_acc": 0.8334000233928208 |
| }, |
| { |
| "epoch": 2.277456647398844, |
| "grad_norm": 0.6131059587737819, |
| "learning_rate": 1.5041123213500673e-07, |
| "loss": 0.5419051647186279, |
| "step": 788, |
| "token_acc": 0.8164740751406938 |
| }, |
| { |
| "epoch": 2.2803468208092488, |
| "grad_norm": 0.6247230822104177, |
| "learning_rate": 1.4927402494350383e-07, |
| "loss": 0.5040674805641174, |
| "step": 789, |
| "token_acc": 0.8298278970337606 |
| }, |
| { |
| "epoch": 2.2832369942196533, |
| "grad_norm": 0.5169557886712214, |
| "learning_rate": 1.4814037827240894e-07, |
| "loss": 0.4267565608024597, |
| "step": 790, |
| "token_acc": 0.85461239288595 |
| }, |
| { |
| "epoch": 2.286127167630058, |
| "grad_norm": 0.5453091300597913, |
| "learning_rate": 1.4701030363034244e-07, |
| "loss": 0.5594276189804077, |
| "step": 791, |
| "token_acc": 0.8131839426158908 |
| }, |
| { |
| "epoch": 2.2890173410404624, |
| "grad_norm": 0.5304410532256004, |
| "learning_rate": 1.4588381248966185e-07, |
| "loss": 0.5278592109680176, |
| "step": 792, |
| "token_acc": 0.8218627568498552 |
| }, |
| { |
| "epoch": 2.291907514450867, |
| "grad_norm": 0.6120665191114517, |
| "learning_rate": 1.4476091628634597e-07, |
| "loss": 0.575430691242218, |
| "step": 793, |
| "token_acc": 0.807088911218437 |
| }, |
| { |
| "epoch": 2.294797687861272, |
| "grad_norm": 0.5799839527530729, |
| "learning_rate": 1.4364162641987776e-07, |
| "loss": 0.5156550407409668, |
| "step": 794, |
| "token_acc": 0.8260783412329787 |
| }, |
| { |
| "epoch": 2.2976878612716765, |
| "grad_norm": 0.5602063299660717, |
| "learning_rate": 1.425259542531293e-07, |
| "loss": 0.5343849658966064, |
| "step": 795, |
| "token_acc": 0.8199821131979047 |
| }, |
| { |
| "epoch": 2.300578034682081, |
| "grad_norm": 0.4887450635971321, |
| "learning_rate": 1.414139111122463e-07, |
| "loss": 0.5308408141136169, |
| "step": 796, |
| "token_acc": 0.8229694371764182 |
| }, |
| { |
| "epoch": 2.3034682080924855, |
| "grad_norm": 0.4993867501606219, |
| "learning_rate": 1.4030550828653354e-07, |
| "loss": 0.5518777966499329, |
| "step": 797, |
| "token_acc": 0.8136998348383776 |
| }, |
| { |
| "epoch": 2.30635838150289, |
| "grad_norm": 0.5067023143157817, |
| "learning_rate": 1.3920075702833918e-07, |
| "loss": 0.5633761882781982, |
| "step": 798, |
| "token_acc": 0.8110373410357782 |
| }, |
| { |
| "epoch": 2.3092485549132946, |
| "grad_norm": 0.49845534995334795, |
| "learning_rate": 1.380996685529413e-07, |
| "loss": 0.5841176509857178, |
| "step": 799, |
| "token_acc": 0.8055892737380623 |
| }, |
| { |
| "epoch": 2.3121387283236996, |
| "grad_norm": 0.5671598446889555, |
| "learning_rate": 1.370022540384347e-07, |
| "loss": 0.5178837180137634, |
| "step": 800, |
| "token_acc": 0.8236206769170149 |
| }, |
| { |
| "epoch": 2.315028901734104, |
| "grad_norm": 0.4945445707298972, |
| "learning_rate": 1.3590852462561536e-07, |
| "loss": 0.5855327844619751, |
| "step": 801, |
| "token_acc": 0.8038555657047487 |
| }, |
| { |
| "epoch": 2.3179190751445087, |
| "grad_norm": 0.5806465370535545, |
| "learning_rate": 1.3481849141786977e-07, |
| "loss": 0.5570707321166992, |
| "step": 802, |
| "token_acc": 0.8127311126755344 |
| }, |
| { |
| "epoch": 2.320809248554913, |
| "grad_norm": 0.6159090128169195, |
| "learning_rate": 1.337321654810605e-07, |
| "loss": 0.510475754737854, |
| "step": 803, |
| "token_acc": 0.8252182347235694 |
| }, |
| { |
| "epoch": 2.3236994219653178, |
| "grad_norm": 0.5376860591208902, |
| "learning_rate": 1.3264955784341436e-07, |
| "loss": 0.5326089859008789, |
| "step": 804, |
| "token_acc": 0.8201670917441944 |
| }, |
| { |
| "epoch": 2.3265895953757223, |
| "grad_norm": 0.673299584166168, |
| "learning_rate": 1.3157067949541108e-07, |
| "loss": 0.58345627784729, |
| "step": 805, |
| "token_acc": 0.8029432260094861 |
| }, |
| { |
| "epoch": 2.3294797687861273, |
| "grad_norm": 0.5206280305901979, |
| "learning_rate": 1.304955413896705e-07, |
| "loss": 0.574557900428772, |
| "step": 806, |
| "token_acc": 0.8069745418082558 |
| }, |
| { |
| "epoch": 2.332369942196532, |
| "grad_norm": 0.5136292360134201, |
| "learning_rate": 1.294241544408425e-07, |
| "loss": 0.5320082902908325, |
| "step": 807, |
| "token_acc": 0.8200797060551261 |
| }, |
| { |
| "epoch": 2.3352601156069364, |
| "grad_norm": 0.6862994942563941, |
| "learning_rate": 1.2835652952549535e-07, |
| "loss": 0.506873607635498, |
| "step": 808, |
| "token_acc": 0.8275425473721735 |
| }, |
| { |
| "epoch": 2.338150289017341, |
| "grad_norm": 0.512551355029386, |
| "learning_rate": 1.272926774820063e-07, |
| "loss": 0.5066085457801819, |
| "step": 809, |
| "token_acc": 0.8297983521714544 |
| }, |
| { |
| "epoch": 2.3410404624277454, |
| "grad_norm": 0.5604007523428769, |
| "learning_rate": 1.2623260911045032e-07, |
| "loss": 0.5025891065597534, |
| "step": 810, |
| "token_acc": 0.829209325638134 |
| }, |
| { |
| "epoch": 2.3439306358381504, |
| "grad_norm": 0.5268748443036352, |
| "learning_rate": 1.251763351724912e-07, |
| "loss": 0.4720842242240906, |
| "step": 811, |
| "token_acc": 0.8390679336697509 |
| }, |
| { |
| "epoch": 2.346820809248555, |
| "grad_norm": 0.5272184591480457, |
| "learning_rate": 1.241238663912727e-07, |
| "loss": 0.5422724485397339, |
| "step": 812, |
| "token_acc": 0.8181165262000732 |
| }, |
| { |
| "epoch": 2.3497109826589595, |
| "grad_norm": 0.6478156561205365, |
| "learning_rate": 1.2307521345130856e-07, |
| "loss": 0.4997095465660095, |
| "step": 813, |
| "token_acc": 0.83579220127889 |
| }, |
| { |
| "epoch": 2.352601156069364, |
| "grad_norm": 0.5596818812581189, |
| "learning_rate": 1.2203038699837482e-07, |
| "loss": 0.5354875326156616, |
| "step": 814, |
| "token_acc": 0.8179522864334984 |
| }, |
| { |
| "epoch": 2.3554913294797686, |
| "grad_norm": 0.5092123540436737, |
| "learning_rate": 1.2098939763940146e-07, |
| "loss": 0.5460278987884521, |
| "step": 815, |
| "token_acc": 0.8163918561804444 |
| }, |
| { |
| "epoch": 2.3583815028901736, |
| "grad_norm": 0.5800331579268285, |
| "learning_rate": 1.1995225594236535e-07, |
| "loss": 0.5022585988044739, |
| "step": 816, |
| "token_acc": 0.8274375641464249 |
| }, |
| { |
| "epoch": 2.361271676300578, |
| "grad_norm": 0.5756167659083334, |
| "learning_rate": 1.1891897243618183e-07, |
| "loss": 0.5118639469146729, |
| "step": 817, |
| "token_acc": 0.8277416762854647 |
| }, |
| { |
| "epoch": 2.3641618497109826, |
| "grad_norm": 0.7044868964257237, |
| "learning_rate": 1.1788955761059848e-07, |
| "loss": 0.5586499571800232, |
| "step": 818, |
| "token_acc": 0.8113651781794964 |
| }, |
| { |
| "epoch": 2.367052023121387, |
| "grad_norm": 0.5795349651059425, |
| "learning_rate": 1.168640219160893e-07, |
| "loss": 0.46478456258773804, |
| "step": 819, |
| "token_acc": 0.8425433103736172 |
| }, |
| { |
| "epoch": 2.3699421965317917, |
| "grad_norm": 0.5417472517233258, |
| "learning_rate": 1.1584237576374672e-07, |
| "loss": 0.5370988845825195, |
| "step": 820, |
| "token_acc": 0.8190044958253051 |
| }, |
| { |
| "epoch": 2.3728323699421967, |
| "grad_norm": 0.5406033227296971, |
| "learning_rate": 1.1482462952517819e-07, |
| "loss": 0.5212105512619019, |
| "step": 821, |
| "token_acc": 0.8224046418092507 |
| }, |
| { |
| "epoch": 2.3757225433526012, |
| "grad_norm": 0.6158759615805948, |
| "learning_rate": 1.1381079353239915e-07, |
| "loss": 0.5457302331924438, |
| "step": 822, |
| "token_acc": 0.8143862498308296 |
| }, |
| { |
| "epoch": 2.378612716763006, |
| "grad_norm": 0.5823036775149597, |
| "learning_rate": 1.1280087807772881e-07, |
| "loss": 0.5847820043563843, |
| "step": 823, |
| "token_acc": 0.8055109662743706 |
| }, |
| { |
| "epoch": 2.3815028901734103, |
| "grad_norm": 0.5934874612721635, |
| "learning_rate": 1.1179489341368614e-07, |
| "loss": 0.527098536491394, |
| "step": 824, |
| "token_acc": 0.8198975500818406 |
| }, |
| { |
| "epoch": 2.384393063583815, |
| "grad_norm": 0.48776844524252105, |
| "learning_rate": 1.1079284975288456e-07, |
| "loss": 0.5120328068733215, |
| "step": 825, |
| "token_acc": 0.8243783599233836 |
| }, |
| { |
| "epoch": 2.38728323699422, |
| "grad_norm": 0.6146965565569307, |
| "learning_rate": 1.097947572679298e-07, |
| "loss": 0.5407025814056396, |
| "step": 826, |
| "token_acc": 0.8166508538899431 |
| }, |
| { |
| "epoch": 2.3901734104046244, |
| "grad_norm": 0.5334859468151563, |
| "learning_rate": 1.0880062609131485e-07, |
| "loss": 0.5002784729003906, |
| "step": 827, |
| "token_acc": 0.8304765759384802 |
| }, |
| { |
| "epoch": 2.393063583815029, |
| "grad_norm": 0.5390442828664261, |
| "learning_rate": 1.0781046631531887e-07, |
| "loss": 0.539802074432373, |
| "step": 828, |
| "token_acc": 0.8201954263661371 |
| }, |
| { |
| "epoch": 2.3959537572254335, |
| "grad_norm": 0.5913404588285502, |
| "learning_rate": 1.0682428799190357e-07, |
| "loss": 0.5389546155929565, |
| "step": 829, |
| "token_acc": 0.8186631949877636 |
| }, |
| { |
| "epoch": 2.398843930635838, |
| "grad_norm": 0.5442985144352179, |
| "learning_rate": 1.0584210113261138e-07, |
| "loss": 0.5016453862190247, |
| "step": 830, |
| "token_acc": 0.8323601673886272 |
| }, |
| { |
| "epoch": 2.401734104046243, |
| "grad_norm": 0.5335838263183578, |
| "learning_rate": 1.0486391570846447e-07, |
| "loss": 0.5271462202072144, |
| "step": 831, |
| "token_acc": 0.8242358536755963 |
| }, |
| { |
| "epoch": 2.4046242774566475, |
| "grad_norm": 0.49716550117440406, |
| "learning_rate": 1.0388974164986247e-07, |
| "loss": 0.55882728099823, |
| "step": 832, |
| "token_acc": 0.8099962892130277 |
| }, |
| { |
| "epoch": 2.407514450867052, |
| "grad_norm": 0.47857456778328644, |
| "learning_rate": 1.0291958884648244e-07, |
| "loss": 0.49896830320358276, |
| "step": 833, |
| "token_acc": 0.8291924229963124 |
| }, |
| { |
| "epoch": 2.4104046242774566, |
| "grad_norm": 0.5097765363216997, |
| "learning_rate": 1.0195346714717812e-07, |
| "loss": 0.5477476716041565, |
| "step": 834, |
| "token_acc": 0.8156213758444858 |
| }, |
| { |
| "epoch": 2.413294797687861, |
| "grad_norm": 0.5235000424585246, |
| "learning_rate": 1.0099138635988024e-07, |
| "loss": 0.5449202060699463, |
| "step": 835, |
| "token_acc": 0.8174131547081592 |
| }, |
| { |
| "epoch": 2.416184971098266, |
| "grad_norm": 0.5918110484158251, |
| "learning_rate": 1.0003335625149667e-07, |
| "loss": 0.47566699981689453, |
| "step": 836, |
| "token_acc": 0.8377055807323248 |
| }, |
| { |
| "epoch": 2.4190751445086707, |
| "grad_norm": 0.5851719068244339, |
| "learning_rate": 9.907938654781306e-08, |
| "loss": 0.5465905666351318, |
| "step": 837, |
| "token_acc": 0.8147972978299083 |
| }, |
| { |
| "epoch": 2.421965317919075, |
| "grad_norm": 0.5682204824677508, |
| "learning_rate": 9.812948693339518e-08, |
| "loss": 0.5738434791564941, |
| "step": 838, |
| "token_acc": 0.8094719444296344 |
| }, |
| { |
| "epoch": 2.4248554913294798, |
| "grad_norm": 0.49007877801128724, |
| "learning_rate": 9.718366705148878e-08, |
| "loss": 0.5543205738067627, |
| "step": 839, |
| "token_acc": 0.8132528289037656 |
| }, |
| { |
| "epoch": 2.4277456647398843, |
| "grad_norm": 0.5842704513292558, |
| "learning_rate": 9.62419365039237e-08, |
| "loss": 0.5389681458473206, |
| "step": 840, |
| "token_acc": 0.8200700065948241 |
| }, |
| { |
| "epoch": 2.430635838150289, |
| "grad_norm": 0.5770762126755756, |
| "learning_rate": 9.530430485101477e-08, |
| "loss": 0.5231157541275024, |
| "step": 841, |
| "token_acc": 0.8205874308194584 |
| }, |
| { |
| "epoch": 2.433526011560694, |
| "grad_norm": 0.7677432650260306, |
| "learning_rate": 9.437078161146589e-08, |
| "loss": 0.48806625604629517, |
| "step": 842, |
| "token_acc": 0.8331080698798665 |
| }, |
| { |
| "epoch": 2.4364161849710984, |
| "grad_norm": 0.644925234497109, |
| "learning_rate": 9.344137626227266e-08, |
| "loss": 0.5736875534057617, |
| "step": 843, |
| "token_acc": 0.8089128548407091 |
| }, |
| { |
| "epoch": 2.439306358381503, |
| "grad_norm": 0.7396158526047033, |
| "learning_rate": 9.251609823862638e-08, |
| "loss": 0.4797173738479614, |
| "step": 844, |
| "token_acc": 0.8373787499437789 |
| }, |
| { |
| "epoch": 2.4421965317919074, |
| "grad_norm": 0.5468960652000051, |
| "learning_rate": 9.15949569338188e-08, |
| "loss": 0.5192615985870361, |
| "step": 845, |
| "token_acc": 0.8244522788344224 |
| }, |
| { |
| "epoch": 2.445086705202312, |
| "grad_norm": 0.5315006428054552, |
| "learning_rate": 9.067796169914549e-08, |
| "loss": 0.5097811222076416, |
| "step": 846, |
| "token_acc": 0.827042571766035 |
| }, |
| { |
| "epoch": 2.447976878612717, |
| "grad_norm": 0.7439553982785114, |
| "learning_rate": 8.976512184381246e-08, |
| "loss": 0.49079883098602295, |
| "step": 847, |
| "token_acc": 0.8330292060799148 |
| }, |
| { |
| "epoch": 2.4508670520231215, |
| "grad_norm": 0.6047154396535889, |
| "learning_rate": 8.885644663484049e-08, |
| "loss": 0.5638853311538696, |
| "step": 848, |
| "token_acc": 0.8139317111350264 |
| }, |
| { |
| "epoch": 2.453757225433526, |
| "grad_norm": 0.5113685852977929, |
| "learning_rate": 8.795194529697148e-08, |
| "loss": 0.5080073475837708, |
| "step": 849, |
| "token_acc": 0.8294516082294987 |
| }, |
| { |
| "epoch": 2.4566473988439306, |
| "grad_norm": 0.5784270460360631, |
| "learning_rate": 8.705162701257501e-08, |
| "loss": 0.4831171929836273, |
| "step": 850, |
| "token_acc": 0.8367839034908794 |
| }, |
| { |
| "epoch": 2.459537572254335, |
| "grad_norm": 0.8859232576451248, |
| "learning_rate": 8.615550092155477e-08, |
| "loss": 0.49585288763046265, |
| "step": 851, |
| "token_acc": 0.8318051901511245 |
| }, |
| { |
| "epoch": 2.4624277456647397, |
| "grad_norm": 0.5397198676813016, |
| "learning_rate": 8.526357612125573e-08, |
| "loss": 0.5402971506118774, |
| "step": 852, |
| "token_acc": 0.8140772038815954 |
| }, |
| { |
| "epoch": 2.4653179190751446, |
| "grad_norm": 0.5962698285712602, |
| "learning_rate": 8.437586166637206e-08, |
| "loss": 0.4982019066810608, |
| "step": 853, |
| "token_acc": 0.8291487495756479 |
| }, |
| { |
| "epoch": 2.468208092485549, |
| "grad_norm": 0.639088875669763, |
| "learning_rate": 8.349236656885544e-08, |
| "loss": 0.5227348804473877, |
| "step": 854, |
| "token_acc": 0.8234732997252996 |
| }, |
| { |
| "epoch": 2.4710982658959537, |
| "grad_norm": 0.5125821343592164, |
| "learning_rate": 8.261309979782255e-08, |
| "loss": 0.5540283918380737, |
| "step": 855, |
| "token_acc": 0.8137015888618007 |
| }, |
| { |
| "epoch": 2.4739884393063583, |
| "grad_norm": 0.6336792834178986, |
| "learning_rate": 8.173807027946528e-08, |
| "loss": 0.5213714838027954, |
| "step": 856, |
| "token_acc": 0.8260184658469347 |
| }, |
| { |
| "epoch": 2.476878612716763, |
| "grad_norm": 0.741297514751174, |
| "learning_rate": 8.086728689695921e-08, |
| "loss": 0.4948037564754486, |
| "step": 857, |
| "token_acc": 0.8296993252484727 |
| }, |
| { |
| "epoch": 2.479768786127168, |
| "grad_norm": 0.5470631077862728, |
| "learning_rate": 8.000075849037408e-08, |
| "loss": 0.5469754934310913, |
| "step": 858, |
| "token_acc": 0.8164498833341608 |
| }, |
| { |
| "epoch": 2.4826589595375723, |
| "grad_norm": 0.4864695217391108, |
| "learning_rate": 7.913849385658333e-08, |
| "loss": 0.5522366762161255, |
| "step": 859, |
| "token_acc": 0.8114838802706048 |
| }, |
| { |
| "epoch": 2.485549132947977, |
| "grad_norm": 0.6284131013971183, |
| "learning_rate": 7.828050174917527e-08, |
| "loss": 0.5867525935173035, |
| "step": 860, |
| "token_acc": 0.8053583956414843 |
| }, |
| { |
| "epoch": 2.4884393063583814, |
| "grad_norm": 0.6601691347825654, |
| "learning_rate": 7.742679087836462e-08, |
| "loss": 0.4591352045536041, |
| "step": 861, |
| "token_acc": 0.8464259952598495 |
| }, |
| { |
| "epoch": 2.491329479768786, |
| "grad_norm": 0.5223754803762156, |
| "learning_rate": 7.657736991090263e-08, |
| "loss": 0.5479453206062317, |
| "step": 862, |
| "token_acc": 0.8136173830420323 |
| }, |
| { |
| "epoch": 2.494219653179191, |
| "grad_norm": 0.6063178523383044, |
| "learning_rate": 7.573224746999107e-08, |
| "loss": 0.4984654486179352, |
| "step": 863, |
| "token_acc": 0.8310789771475875 |
| }, |
| { |
| "epoch": 2.4971098265895955, |
| "grad_norm": 0.5664401315392263, |
| "learning_rate": 7.4891432135193e-08, |
| "loss": 0.5375936031341553, |
| "step": 864, |
| "token_acc": 0.8193700891772278 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.5684032151067252, |
| "learning_rate": 7.405493244234651e-08, |
| "loss": 0.5382214188575745, |
| "step": 865, |
| "token_acc": 0.8159053497942387 |
| }, |
| { |
| "epoch": 2.5028901734104045, |
| "grad_norm": 1.6304188232278813, |
| "learning_rate": 7.322275688347818e-08, |
| "loss": 0.5420823097229004, |
| "step": 866, |
| "token_acc": 0.8175298965740142 |
| }, |
| { |
| "epoch": 2.505780346820809, |
| "grad_norm": 0.5256843006054661, |
| "learning_rate": 7.239491390671631e-08, |
| "loss": 0.5603017807006836, |
| "step": 867, |
| "token_acc": 0.8130635711477354 |
| }, |
| { |
| "epoch": 2.508670520231214, |
| "grad_norm": 0.5290906377318529, |
| "learning_rate": 7.157141191620548e-08, |
| "loss": 0.4974015951156616, |
| "step": 868, |
| "token_acc": 0.8317996586674097 |
| }, |
| { |
| "epoch": 2.5115606936416186, |
| "grad_norm": 0.5009279956947961, |
| "learning_rate": 7.075225927202105e-08, |
| "loss": 0.5346574187278748, |
| "step": 869, |
| "token_acc": 0.8163790337713909 |
| }, |
| { |
| "epoch": 2.514450867052023, |
| "grad_norm": 0.4774847145184863, |
| "learning_rate": 6.993746429008496e-08, |
| "loss": 0.5793315768241882, |
| "step": 870, |
| "token_acc": 0.8044435794476767 |
| }, |
| { |
| "epoch": 2.5173410404624277, |
| "grad_norm": 0.579794607346244, |
| "learning_rate": 6.912703524208019e-08, |
| "loss": 0.4764576852321625, |
| "step": 871, |
| "token_acc": 0.8377503092002259 |
| }, |
| { |
| "epoch": 2.520231213872832, |
| "grad_norm": 0.5013881127258889, |
| "learning_rate": 6.832098035536759e-08, |
| "loss": 0.525843620300293, |
| "step": 872, |
| "token_acc": 0.8231466097001345 |
| }, |
| { |
| "epoch": 2.523121387283237, |
| "grad_norm": 0.48167613678527704, |
| "learning_rate": 6.751930781290238e-08, |
| "loss": 0.5380637049674988, |
| "step": 873, |
| "token_acc": 0.8183076636731655 |
| }, |
| { |
| "epoch": 2.5260115606936417, |
| "grad_norm": 0.4540447849829041, |
| "learning_rate": 6.672202575315044e-08, |
| "loss": 0.49698758125305176, |
| "step": 874, |
| "token_acc": 0.831075612916876 |
| }, |
| { |
| "epoch": 2.5289017341040463, |
| "grad_norm": 0.6661593346201325, |
| "learning_rate": 6.59291422700064e-08, |
| "loss": 0.4850313663482666, |
| "step": 875, |
| "token_acc": 0.8362135876193946 |
| }, |
| { |
| "epoch": 2.531791907514451, |
| "grad_norm": 0.505051966727968, |
| "learning_rate": 6.514066541271085e-08, |
| "loss": 0.499431312084198, |
| "step": 876, |
| "token_acc": 0.831420351210136 |
| }, |
| { |
| "epoch": 2.5346820809248554, |
| "grad_norm": 0.5882259006732896, |
| "learning_rate": 6.435660318576935e-08, |
| "loss": 0.5504227876663208, |
| "step": 877, |
| "token_acc": 0.8158776668803223 |
| }, |
| { |
| "epoch": 2.5375722543352603, |
| "grad_norm": 0.5391399587353708, |
| "learning_rate": 6.357696354887049e-08, |
| "loss": 0.5507422685623169, |
| "step": 878, |
| "token_acc": 0.8168785222461945 |
| }, |
| { |
| "epoch": 2.540462427745665, |
| "grad_norm": 0.5480460384925314, |
| "learning_rate": 6.28017544168053e-08, |
| "loss": 0.5473015308380127, |
| "step": 879, |
| "token_acc": 0.8178865534976365 |
| }, |
| { |
| "epoch": 2.5433526011560694, |
| "grad_norm": 0.5389986372049553, |
| "learning_rate": 6.20309836593873e-08, |
| "loss": 0.5189315676689148, |
| "step": 880, |
| "token_acc": 0.8252666894202909 |
| }, |
| { |
| "epoch": 2.546242774566474, |
| "grad_norm": 0.5707417078989917, |
| "learning_rate": 6.126465910137163e-08, |
| "loss": 0.5234180092811584, |
| "step": 881, |
| "token_acc": 0.8232250912282323 |
| }, |
| { |
| "epoch": 2.5491329479768785, |
| "grad_norm": 0.5632951051957191, |
| "learning_rate": 6.0502788522377e-08, |
| "loss": 0.5196454524993896, |
| "step": 882, |
| "token_acc": 0.8240517651811349 |
| }, |
| { |
| "epoch": 2.5520231213872835, |
| "grad_norm": 0.5312909361373286, |
| "learning_rate": 5.974537965680537e-08, |
| "loss": 0.5485826134681702, |
| "step": 883, |
| "token_acc": 0.8127245781077416 |
| }, |
| { |
| "epoch": 2.5549132947976876, |
| "grad_norm": 0.6429627848350591, |
| "learning_rate": 5.899244019376426e-08, |
| "loss": 0.5010867714881897, |
| "step": 884, |
| "token_acc": 0.8311800993506927 |
| }, |
| { |
| "epoch": 2.5578034682080926, |
| "grad_norm": 0.5223405882575716, |
| "learning_rate": 5.824397777698858e-08, |
| "loss": 0.5297751426696777, |
| "step": 885, |
| "token_acc": 0.8206137655553849 |
| }, |
| { |
| "epoch": 2.560693641618497, |
| "grad_norm": 0.8020502475631341, |
| "learning_rate": 5.7500000004762574e-08, |
| "loss": 0.5593537092208862, |
| "step": 886, |
| "token_acc": 0.811829619947517 |
| }, |
| { |
| "epoch": 2.5635838150289016, |
| "grad_norm": 0.6258112537179114, |
| "learning_rate": 5.676051442984325e-08, |
| "loss": 0.5434359908103943, |
| "step": 887, |
| "token_acc": 0.8160674580340842 |
| }, |
| { |
| "epoch": 2.5664739884393066, |
| "grad_norm": 0.5482233640675082, |
| "learning_rate": 5.602552855938325e-08, |
| "loss": 0.5392587780952454, |
| "step": 888, |
| "token_acc": 0.8183432292939603 |
| }, |
| { |
| "epoch": 2.5693641618497107, |
| "grad_norm": 0.5339167311609386, |
| "learning_rate": 5.529504985485528e-08, |
| "loss": 0.5843528509140015, |
| "step": 889, |
| "token_acc": 0.8041726059349488 |
| }, |
| { |
| "epoch": 2.5722543352601157, |
| "grad_norm": 0.5526129075488465, |
| "learning_rate": 5.456908573197544e-08, |
| "loss": 0.4785343408584595, |
| "step": 890, |
| "token_acc": 0.8354585097240348 |
| }, |
| { |
| "epoch": 2.5751445086705202, |
| "grad_norm": 0.5932930782479724, |
| "learning_rate": 5.384764356062865e-08, |
| "loss": 0.501940131187439, |
| "step": 891, |
| "token_acc": 0.8283741560885075 |
| }, |
| { |
| "epoch": 2.578034682080925, |
| "grad_norm": 0.5946977220929661, |
| "learning_rate": 5.313073066479379e-08, |
| "loss": 0.5379625558853149, |
| "step": 892, |
| "token_acc": 0.8177655126778356 |
| }, |
| { |
| "epoch": 2.5809248554913293, |
| "grad_norm": 0.5663018542099373, |
| "learning_rate": 5.2418354322468884e-08, |
| "loss": 0.4645715057849884, |
| "step": 893, |
| "token_acc": 0.8437703660317277 |
| }, |
| { |
| "epoch": 2.583815028901734, |
| "grad_norm": 0.5603090911019164, |
| "learning_rate": 5.1710521765597593e-08, |
| "loss": 0.5438505411148071, |
| "step": 894, |
| "token_acc": 0.8167114037179182 |
| }, |
| { |
| "epoch": 2.586705202312139, |
| "grad_norm": 0.5650529942357706, |
| "learning_rate": 5.100724017999575e-08, |
| "loss": 0.537551760673523, |
| "step": 895, |
| "token_acc": 0.8162509350365383 |
| }, |
| { |
| "epoch": 2.5895953757225434, |
| "grad_norm": 0.5946617661686765, |
| "learning_rate": 5.0308516705278525e-08, |
| "loss": 0.5363532304763794, |
| "step": 896, |
| "token_acc": 0.8188319733413082 |
| }, |
| { |
| "epoch": 2.592485549132948, |
| "grad_norm": 0.529447543384607, |
| "learning_rate": 4.961435843478751e-08, |
| "loss": 0.547370195388794, |
| "step": 897, |
| "token_acc": 0.8166483874998265 |
| }, |
| { |
| "epoch": 2.5953757225433525, |
| "grad_norm": 0.5564539974665098, |
| "learning_rate": 4.892477241551901e-08, |
| "loss": 0.5567014813423157, |
| "step": 898, |
| "token_acc": 0.8142607154390945 |
| }, |
| { |
| "epoch": 2.598265895953757, |
| "grad_norm": 0.6758226853294469, |
| "learning_rate": 4.8239765648052985e-08, |
| "loss": 0.5622668862342834, |
| "step": 899, |
| "token_acc": 0.8094786656801085 |
| }, |
| { |
| "epoch": 2.601156069364162, |
| "grad_norm": 0.6030746534353, |
| "learning_rate": 4.755934508648057e-08, |
| "loss": 0.48511946201324463, |
| "step": 900, |
| "token_acc": 0.8383746553751593 |
| }, |
| { |
| "epoch": 2.6040462427745665, |
| "grad_norm": 0.5291224134313559, |
| "learning_rate": 4.688351763833531e-08, |
| "loss": 0.5561063289642334, |
| "step": 901, |
| "token_acc": 0.811450131453075 |
| }, |
| { |
| "epoch": 2.606936416184971, |
| "grad_norm": 0.5231587422483082, |
| "learning_rate": 4.621229016452155e-08, |
| "loss": 0.585370659828186, |
| "step": 902, |
| "token_acc": 0.8056932036025608 |
| }, |
| { |
| "epoch": 2.6098265895953756, |
| "grad_norm": 1.1223139233293984, |
| "learning_rate": 4.554566947924537e-08, |
| "loss": 0.5447970628738403, |
| "step": 903, |
| "token_acc": 0.8164786148920761 |
| }, |
| { |
| "epoch": 2.61271676300578, |
| "grad_norm": 0.5225735759201205, |
| "learning_rate": 4.4883662349945784e-08, |
| "loss": 0.5505392551422119, |
| "step": 904, |
| "token_acc": 0.8164482180639134 |
| }, |
| { |
| "epoch": 2.615606936416185, |
| "grad_norm": 0.54473619880049, |
| "learning_rate": 4.422627549722519e-08, |
| "loss": 0.5359902381896973, |
| "step": 905, |
| "token_acc": 0.820455104729094 |
| }, |
| { |
| "epoch": 2.6184971098265897, |
| "grad_norm": 0.7561505246031067, |
| "learning_rate": 4.357351559478201e-08, |
| "loss": 0.47267240285873413, |
| "step": 906, |
| "token_acc": 0.8387789854590445 |
| }, |
| { |
| "epoch": 2.621387283236994, |
| "grad_norm": 0.5548449336113677, |
| "learning_rate": 4.2925389269341916e-08, |
| "loss": 0.5412442684173584, |
| "step": 907, |
| "token_acc": 0.8155705621117785 |
| }, |
| { |
| "epoch": 2.6242774566473988, |
| "grad_norm": 0.7283156817419644, |
| "learning_rate": 4.228190310059182e-08, |
| "loss": 0.5299142599105835, |
| "step": 908, |
| "token_acc": 0.8230541763009774 |
| }, |
| { |
| "epoch": 2.6271676300578033, |
| "grad_norm": 0.5365454152037888, |
| "learning_rate": 4.164306362111208e-08, |
| "loss": 0.5737514495849609, |
| "step": 909, |
| "token_acc": 0.8103234930175004 |
| }, |
| { |
| "epoch": 2.6300578034682083, |
| "grad_norm": 0.5438553812892487, |
| "learning_rate": 4.100887731631053e-08, |
| "loss": 0.5420162677764893, |
| "step": 910, |
| "token_acc": 0.8180698387235383 |
| }, |
| { |
| "epoch": 2.632947976878613, |
| "grad_norm": 0.64070798422041, |
| "learning_rate": 4.0379350624356766e-08, |
| "loss": 0.5189142823219299, |
| "step": 911, |
| "token_acc": 0.8237202834249387 |
| }, |
| { |
| "epoch": 2.6358381502890174, |
| "grad_norm": 0.47802319033882207, |
| "learning_rate": 3.975448993611652e-08, |
| "loss": 0.5308249592781067, |
| "step": 912, |
| "token_acc": 0.8203262576745515 |
| }, |
| { |
| "epoch": 2.638728323699422, |
| "grad_norm": 0.5724668109330596, |
| "learning_rate": 3.913430159508696e-08, |
| "loss": 0.5157672166824341, |
| "step": 913, |
| "token_acc": 0.8241608973797213 |
| }, |
| { |
| "epoch": 2.6416184971098264, |
| "grad_norm": 0.5470703054848514, |
| "learning_rate": 3.8518791897332204e-08, |
| "loss": 0.5976561307907104, |
| "step": 914, |
| "token_acc": 0.8007923950822223 |
| }, |
| { |
| "epoch": 2.6445086705202314, |
| "grad_norm": 0.5294401571240512, |
| "learning_rate": 3.790796709141975e-08, |
| "loss": 0.5527437925338745, |
| "step": 915, |
| "token_acc": 0.8132948131146666 |
| }, |
| { |
| "epoch": 2.647398843930636, |
| "grad_norm": 0.6321676647074376, |
| "learning_rate": 3.7301833378356073e-08, |
| "loss": 0.4902818202972412, |
| "step": 916, |
| "token_acc": 0.8343280912033046 |
| }, |
| { |
| "epoch": 2.6502890173410405, |
| "grad_norm": 0.6734799143444675, |
| "learning_rate": 3.67003969115251e-08, |
| "loss": 0.5476257801055908, |
| "step": 917, |
| "token_acc": 0.8164087189044648 |
| }, |
| { |
| "epoch": 2.653179190751445, |
| "grad_norm": 0.4933080483096889, |
| "learning_rate": 3.610366379662455e-08, |
| "loss": 0.5034703612327576, |
| "step": 918, |
| "token_acc": 0.8296526697770866 |
| }, |
| { |
| "epoch": 2.6560693641618496, |
| "grad_norm": 0.5701973114157253, |
| "learning_rate": 3.551164009160429e-08, |
| "loss": 0.5260199904441833, |
| "step": 919, |
| "token_acc": 0.8228647844657014 |
| }, |
| { |
| "epoch": 2.6589595375722546, |
| "grad_norm": 0.4606917700933646, |
| "learning_rate": 3.4924331806605314e-08, |
| "loss": 0.5847440361976624, |
| "step": 920, |
| "token_acc": 0.8036149091590186 |
| }, |
| { |
| "epoch": 2.661849710982659, |
| "grad_norm": 0.5312291603560868, |
| "learning_rate": 3.4341744903897963e-08, |
| "loss": 0.5280716419219971, |
| "step": 921, |
| "token_acc": 0.8217670827512655 |
| }, |
| { |
| "epoch": 2.6647398843930636, |
| "grad_norm": 0.5137738686874723, |
| "learning_rate": 3.376388529782215e-08, |
| "loss": 0.5434746146202087, |
| "step": 922, |
| "token_acc": 0.8166855043797683 |
| }, |
| { |
| "epoch": 2.667630057803468, |
| "grad_norm": 0.5112438107405131, |
| "learning_rate": 3.319075885472644e-08, |
| "loss": 0.4704023599624634, |
| "step": 923, |
| "token_acc": 0.8407168549429551 |
| }, |
| { |
| "epoch": 2.6705202312138727, |
| "grad_norm": 0.5633980375468464, |
| "learning_rate": 3.262237139290952e-08, |
| "loss": 0.5437241792678833, |
| "step": 924, |
| "token_acc": 0.8174555734488506 |
| }, |
| { |
| "epoch": 2.6734104046242777, |
| "grad_norm": 0.4789519578675391, |
| "learning_rate": 3.205872868256021e-08, |
| "loss": 0.5591274499893188, |
| "step": 925, |
| "token_acc": 0.8126648310155333 |
| }, |
| { |
| "epoch": 2.6763005780346822, |
| "grad_norm": 0.545383577218125, |
| "learning_rate": 3.149983644569948e-08, |
| "loss": 0.4846089482307434, |
| "step": 926, |
| "token_acc": 0.8357118170559603 |
| }, |
| { |
| "epoch": 2.679190751445087, |
| "grad_norm": 0.5624813066511716, |
| "learning_rate": 3.094570035612226e-08, |
| "loss": 0.5257154703140259, |
| "step": 927, |
| "token_acc": 0.8209082215813688 |
| }, |
| { |
| "epoch": 2.6820809248554913, |
| "grad_norm": 0.5921212603993137, |
| "learning_rate": 3.0396326039339507e-08, |
| "loss": 0.5992392897605896, |
| "step": 928, |
| "token_acc": 0.7986864607734648 |
| }, |
| { |
| "epoch": 2.684971098265896, |
| "grad_norm": 0.5498631051018497, |
| "learning_rate": 2.9851719072521487e-08, |
| "loss": 0.5509431958198547, |
| "step": 929, |
| "token_acc": 0.8177149696899494 |
| }, |
| { |
| "epoch": 2.687861271676301, |
| "grad_norm": 0.5215571767600914, |
| "learning_rate": 2.9311884984440873e-08, |
| "loss": 0.561446487903595, |
| "step": 930, |
| "token_acc": 0.8129055922352012 |
| }, |
| { |
| "epoch": 2.690751445086705, |
| "grad_norm": 0.559786563643402, |
| "learning_rate": 2.8776829255416967e-08, |
| "loss": 0.5166699290275574, |
| "step": 931, |
| "token_acc": 0.8237840118657938 |
| }, |
| { |
| "epoch": 2.69364161849711, |
| "grad_norm": 0.5753952050911679, |
| "learning_rate": 2.8246557317259723e-08, |
| "loss": 0.5357648134231567, |
| "step": 932, |
| "token_acc": 0.8212208495005039 |
| }, |
| { |
| "epoch": 2.6965317919075145, |
| "grad_norm": 0.5636571499534591, |
| "learning_rate": 2.7721074553214596e-08, |
| "loss": 0.5390565395355225, |
| "step": 933, |
| "token_acc": 0.8159201695282208 |
| }, |
| { |
| "epoch": 2.699421965317919, |
| "grad_norm": 0.5407560890645442, |
| "learning_rate": 2.7200386297908386e-08, |
| "loss": 0.541710615158081, |
| "step": 934, |
| "token_acc": 0.8174959891247107 |
| }, |
| { |
| "epoch": 2.7023121387283235, |
| "grad_norm": 0.48421827585155863, |
| "learning_rate": 2.6684497837294208e-08, |
| "loss": 0.5409998297691345, |
| "step": 935, |
| "token_acc": 0.8210280803345742 |
| }, |
| { |
| "epoch": 2.705202312138728, |
| "grad_norm": 0.49710877088501176, |
| "learning_rate": 2.6173414408598826e-08, |
| "loss": 0.5135529637336731, |
| "step": 936, |
| "token_acc": 0.8251490888501849 |
| }, |
| { |
| "epoch": 2.708092485549133, |
| "grad_norm": 0.6329172467067579, |
| "learning_rate": 2.5667141200268694e-08, |
| "loss": 0.5547735691070557, |
| "step": 937, |
| "token_acc": 0.8145400135743814 |
| }, |
| { |
| "epoch": 2.7109826589595376, |
| "grad_norm": 0.5576557557006313, |
| "learning_rate": 2.5165683351917765e-08, |
| "loss": 0.5579146146774292, |
| "step": 938, |
| "token_acc": 0.8112171853454817 |
| }, |
| { |
| "epoch": 2.713872832369942, |
| "grad_norm": 0.5905103597710084, |
| "learning_rate": 2.4669045954275046e-08, |
| "loss": 0.5442934632301331, |
| "step": 939, |
| "token_acc": 0.818311620283537 |
| }, |
| { |
| "epoch": 2.7167630057803467, |
| "grad_norm": 0.6610701567101593, |
| "learning_rate": 2.4177234049133023e-08, |
| "loss": 0.49151283502578735, |
| "step": 940, |
| "token_acc": 0.8325153415650084 |
| }, |
| { |
| "epoch": 2.7196531791907512, |
| "grad_norm": 0.6214821823759014, |
| "learning_rate": 2.369025262929658e-08, |
| "loss": 0.5725831389427185, |
| "step": 941, |
| "token_acc": 0.8070232229912145 |
| }, |
| { |
| "epoch": 2.722543352601156, |
| "grad_norm": 0.5547499629666095, |
| "learning_rate": 2.3208106638531842e-08, |
| "loss": 0.5330009460449219, |
| "step": 942, |
| "token_acc": 0.8195172027623966 |
| }, |
| { |
| "epoch": 2.7254335260115607, |
| "grad_norm": 0.5521438894414953, |
| "learning_rate": 2.2730800971516862e-08, |
| "loss": 0.5747419595718384, |
| "step": 943, |
| "token_acc": 0.8086665948043549 |
| }, |
| { |
| "epoch": 2.7283236994219653, |
| "grad_norm": 0.6317779099057246, |
| "learning_rate": 2.225834047379099e-08, |
| "loss": 0.49804458022117615, |
| "step": 944, |
| "token_acc": 0.8307906934881418 |
| }, |
| { |
| "epoch": 2.73121387283237, |
| "grad_norm": 0.5560572315857666, |
| "learning_rate": 2.1790729941706276e-08, |
| "loss": 0.5384119153022766, |
| "step": 945, |
| "token_acc": 0.8186016301942814 |
| }, |
| { |
| "epoch": 2.7341040462427744, |
| "grad_norm": 0.5706315776877087, |
| "learning_rate": 2.132797412237869e-08, |
| "loss": 0.5331531167030334, |
| "step": 946, |
| "token_acc": 0.8183284045442989 |
| }, |
| { |
| "epoch": 2.7369942196531793, |
| "grad_norm": 0.5767818083804982, |
| "learning_rate": 2.087007771363969e-08, |
| "loss": 0.5555546879768372, |
| "step": 947, |
| "token_acc": 0.8130259084965389 |
| }, |
| { |
| "epoch": 2.739884393063584, |
| "grad_norm": 0.5074851398256462, |
| "learning_rate": 2.041704536398875e-08, |
| "loss": 0.5641285181045532, |
| "step": 948, |
| "token_acc": 0.8102424125823674 |
| }, |
| { |
| "epoch": 2.7427745664739884, |
| "grad_norm": 0.5656737111306388, |
| "learning_rate": 1.9968881672545957e-08, |
| "loss": 0.5804109573364258, |
| "step": 949, |
| "token_acc": 0.8069046557228511 |
| }, |
| { |
| "epoch": 2.745664739884393, |
| "grad_norm": 0.5396023274518039, |
| "learning_rate": 1.9525591189005874e-08, |
| "loss": 0.5026800632476807, |
| "step": 950, |
| "token_acc": 0.8291645642615152 |
| }, |
| { |
| "epoch": 2.7485549132947975, |
| "grad_norm": 0.5545085068594241, |
| "learning_rate": 1.9087178413590476e-08, |
| "loss": 0.5121109485626221, |
| "step": 951, |
| "token_acc": 0.829365647193499 |
| }, |
| { |
| "epoch": 2.7514450867052025, |
| "grad_norm": 0.5744534847489216, |
| "learning_rate": 1.8653647797004236e-08, |
| "loss": 0.5073999166488647, |
| "step": 952, |
| "token_acc": 0.8286528286528286 |
| }, |
| { |
| "epoch": 2.754335260115607, |
| "grad_norm": 0.5473570344774414, |
| "learning_rate": 1.8225003740388545e-08, |
| "loss": 0.5411463975906372, |
| "step": 953, |
| "token_acc": 0.8197644649257553 |
| }, |
| { |
| "epoch": 2.7572254335260116, |
| "grad_norm": 0.5960870996950273, |
| "learning_rate": 1.7801250595277095e-08, |
| "loss": 0.45802488923072815, |
| "step": 954, |
| "token_acc": 0.8439128432584406 |
| }, |
| { |
| "epoch": 2.760115606936416, |
| "grad_norm": 0.5872410848204962, |
| "learning_rate": 1.738239266355185e-08, |
| "loss": 0.5364171862602234, |
| "step": 955, |
| "token_acc": 0.8192522793328644 |
| }, |
| { |
| "epoch": 2.7630057803468207, |
| "grad_norm": 0.5452386927866908, |
| "learning_rate": 1.6968434197399072e-08, |
| "loss": 0.5837544202804565, |
| "step": 956, |
| "token_acc": 0.8051349532888352 |
| }, |
| { |
| "epoch": 2.7658959537572256, |
| "grad_norm": 0.5752700596867665, |
| "learning_rate": 1.655937939926655e-08, |
| "loss": 0.5129964351654053, |
| "step": 957, |
| "token_acc": 0.8282252791972994 |
| }, |
| { |
| "epoch": 2.76878612716763, |
| "grad_norm": 0.5428098765109344, |
| "learning_rate": 1.6155232421820653e-08, |
| "loss": 0.5746065378189087, |
| "step": 958, |
| "token_acc": 0.8089228223154 |
| }, |
| { |
| "epoch": 2.7716763005780347, |
| "grad_norm": 0.5949829280630812, |
| "learning_rate": 1.5755997367904173e-08, |
| "loss": 0.4916711747646332, |
| "step": 959, |
| "token_acc": 0.8342608068069589 |
| }, |
| { |
| "epoch": 2.7745664739884393, |
| "grad_norm": 0.5674429218313363, |
| "learning_rate": 1.536167829049495e-08, |
| "loss": 0.5395721197128296, |
| "step": 960, |
| "token_acc": 0.8203693073096058 |
| }, |
| { |
| "epoch": 2.777456647398844, |
| "grad_norm": 0.561452376268135, |
| "learning_rate": 1.497227919266414e-08, |
| "loss": 0.51889967918396, |
| "step": 961, |
| "token_acc": 0.8233378239163167 |
| }, |
| { |
| "epoch": 2.7803468208092488, |
| "grad_norm": 0.6257227381883494, |
| "learning_rate": 1.4587804027536454e-08, |
| "loss": 0.5111842155456543, |
| "step": 962, |
| "token_acc": 0.8274028303059359 |
| }, |
| { |
| "epoch": 2.7832369942196533, |
| "grad_norm": 0.5900526631508034, |
| "learning_rate": 1.420825669824921e-08, |
| "loss": 0.5204794406890869, |
| "step": 963, |
| "token_acc": 0.8234049795759579 |
| }, |
| { |
| "epoch": 2.786127167630058, |
| "grad_norm": 0.509902068102799, |
| "learning_rate": 1.3833641057913015e-08, |
| "loss": 0.47923728823661804, |
| "step": 964, |
| "token_acc": 0.8353080111030787 |
| }, |
| { |
| "epoch": 2.7890173410404624, |
| "grad_norm": 0.5460825106119277, |
| "learning_rate": 1.346396090957297e-08, |
| "loss": 0.520375669002533, |
| "step": 965, |
| "token_acc": 0.8276919599125914 |
| }, |
| { |
| "epoch": 2.791907514450867, |
| "grad_norm": 0.5432685057122655, |
| "learning_rate": 1.309922000616942e-08, |
| "loss": 0.5795409679412842, |
| "step": 966, |
| "token_acc": 0.8071895906398279 |
| }, |
| { |
| "epoch": 2.794797687861272, |
| "grad_norm": 0.5657536988747344, |
| "learning_rate": 1.2739422050500436e-08, |
| "loss": 0.5345174074172974, |
| "step": 967, |
| "token_acc": 0.8179120793316155 |
| }, |
| { |
| "epoch": 2.7976878612716765, |
| "grad_norm": 0.521811401090051, |
| "learning_rate": 1.2384570695183782e-08, |
| "loss": 0.5313125252723694, |
| "step": 968, |
| "token_acc": 0.8208080793990667 |
| }, |
| { |
| "epoch": 2.800578034682081, |
| "grad_norm": 0.5951506599748814, |
| "learning_rate": 1.2034669542620223e-08, |
| "loss": 0.5154579877853394, |
| "step": 969, |
| "token_acc": 0.8274639716414208 |
| }, |
| { |
| "epoch": 2.8034682080924855, |
| "grad_norm": 0.7493969316675455, |
| "learning_rate": 1.168972214495667e-08, |
| "loss": 0.4610113203525543, |
| "step": 970, |
| "token_acc": 0.8410565847986298 |
| }, |
| { |
| "epoch": 2.80635838150289, |
| "grad_norm": 0.6158144745722535, |
| "learning_rate": 1.1349732004050205e-08, |
| "loss": 0.5308967232704163, |
| "step": 971, |
| "token_acc": 0.823366838754401 |
| }, |
| { |
| "epoch": 2.809248554913295, |
| "grad_norm": 0.49701991004281837, |
| "learning_rate": 1.101470257143261e-08, |
| "loss": 0.5433156490325928, |
| "step": 972, |
| "token_acc": 0.8172732427363528 |
| }, |
| { |
| "epoch": 2.812138728323699, |
| "grad_norm": 0.614964929129747, |
| "learning_rate": 1.0684637248275175e-08, |
| "loss": 0.4856722056865692, |
| "step": 973, |
| "token_acc": 0.8371653570989119 |
| }, |
| { |
| "epoch": 2.815028901734104, |
| "grad_norm": 0.5531928817079772, |
| "learning_rate": 1.0359539385354387e-08, |
| "loss": 0.5472983121871948, |
| "step": 974, |
| "token_acc": 0.8166184194819147 |
| }, |
| { |
| "epoch": 2.8179190751445087, |
| "grad_norm": 0.6036213061429313, |
| "learning_rate": 1.0039412283017523e-08, |
| "loss": 0.5529719591140747, |
| "step": 975, |
| "token_acc": 0.8155163061650604 |
| }, |
| { |
| "epoch": 2.820809248554913, |
| "grad_norm": 0.5564254532918392, |
| "learning_rate": 9.724259191149774e-09, |
| "loss": 0.4628450572490692, |
| "step": 976, |
| "token_acc": 0.8427982220798462 |
| }, |
| { |
| "epoch": 2.8236994219653178, |
| "grad_norm": 0.5588830748507647, |
| "learning_rate": 9.414083309140453e-09, |
| "loss": 0.5567787289619446, |
| "step": 977, |
| "token_acc": 0.8121751346288926 |
| }, |
| { |
| "epoch": 2.8265895953757223, |
| "grad_norm": 0.5529058564154966, |
| "learning_rate": 9.108887785851338e-09, |
| "loss": 0.5580377578735352, |
| "step": 978, |
| "token_acc": 0.8109314422108472 |
| }, |
| { |
| "epoch": 2.8294797687861273, |
| "grad_norm": 0.61646098239251, |
| "learning_rate": 8.808675719584158e-09, |
| "loss": 0.5375653505325317, |
| "step": 979, |
| "token_acc": 0.8192844783892899 |
| }, |
| { |
| "epoch": 2.832369942196532, |
| "grad_norm": 0.5248181521879705, |
| "learning_rate": 8.513450158049106e-09, |
| "loss": 0.5359894037246704, |
| "step": 980, |
| "token_acc": 0.8180794693882546 |
| }, |
| { |
| "epoch": 2.8352601156069364, |
| "grad_norm": 0.530766621077344, |
| "learning_rate": 8.22321409833443e-09, |
| "loss": 0.5032058358192444, |
| "step": 981, |
| "token_acc": 0.8299942928720195 |
| }, |
| { |
| "epoch": 2.838150289017341, |
| "grad_norm": 0.5767728092897907, |
| "learning_rate": 7.93797048687539e-09, |
| "loss": 0.555617094039917, |
| "step": 982, |
| "token_acc": 0.8127699150828953 |
| }, |
| { |
| "epoch": 2.8410404624277454, |
| "grad_norm": 0.5275196163844481, |
| "learning_rate": 7.657722219424789e-09, |
| "loss": 0.5177302956581116, |
| "step": 983, |
| "token_acc": 0.8254756164272545 |
| }, |
| { |
| "epoch": 2.8439306358381504, |
| "grad_norm": 0.7188190918164308, |
| "learning_rate": 7.382472141023221e-09, |
| "loss": 0.5488888025283813, |
| "step": 984, |
| "token_acc": 0.8139118457300275 |
| }, |
| { |
| "epoch": 2.846820809248555, |
| "grad_norm": 0.5053524666497287, |
| "learning_rate": 7.112223045970589e-09, |
| "loss": 0.5309122800827026, |
| "step": 985, |
| "token_acc": 0.818977587114551 |
| }, |
| { |
| "epoch": 2.8497109826589595, |
| "grad_norm": 0.49254982998325725, |
| "learning_rate": 6.8469776777973494e-09, |
| "loss": 0.48389381170272827, |
| "step": 986, |
| "token_acc": 0.839111193678302 |
| }, |
| { |
| "epoch": 2.852601156069364, |
| "grad_norm": 0.5088843284530131, |
| "learning_rate": 6.5867387292369295e-09, |
| "loss": 0.5327301025390625, |
| "step": 987, |
| "token_acc": 0.8190361305134541 |
| }, |
| { |
| "epoch": 2.8554913294797686, |
| "grad_norm": 0.5579589460192081, |
| "learning_rate": 6.331508842198296e-09, |
| "loss": 0.46285098791122437, |
| "step": 988, |
| "token_acc": 0.8444943903023158 |
| }, |
| { |
| "epoch": 2.8583815028901736, |
| "grad_norm": 0.5480219063407678, |
| "learning_rate": 6.081290607739042e-09, |
| "loss": 0.4747048616409302, |
| "step": 989, |
| "token_acc": 0.8427808981834031 |
| }, |
| { |
| "epoch": 2.861271676300578, |
| "grad_norm": 0.7741942154519839, |
| "learning_rate": 5.836086566039289e-09, |
| "loss": 0.5887913703918457, |
| "step": 990, |
| "token_acc": 0.8049742371893245 |
| }, |
| { |
| "epoch": 2.8641618497109826, |
| "grad_norm": 0.5193852803751504, |
| "learning_rate": 5.595899206375654e-09, |
| "loss": 0.5110014081001282, |
| "step": 991, |
| "token_acc": 0.8288312763590261 |
| }, |
| { |
| "epoch": 2.867052023121387, |
| "grad_norm": 0.5341612707698237, |
| "learning_rate": 5.360730967096272e-09, |
| "loss": 0.5477676391601562, |
| "step": 992, |
| "token_acc": 0.8129789165141573 |
| }, |
| { |
| "epoch": 2.8699421965317917, |
| "grad_norm": 0.7306055692439172, |
| "learning_rate": 5.130584235595703e-09, |
| "loss": 0.5541284680366516, |
| "step": 993, |
| "token_acc": 0.8145775823594559 |
| }, |
| { |
| "epoch": 2.8728323699421967, |
| "grad_norm": 0.5713799415951762, |
| "learning_rate": 4.9054613482910065e-09, |
| "loss": 0.44801950454711914, |
| "step": 994, |
| "token_acc": 0.845931691583633 |
| }, |
| { |
| "epoch": 2.8757225433526012, |
| "grad_norm": 0.5839589911780936, |
| "learning_rate": 4.685364590597929e-09, |
| "loss": 0.5638971924781799, |
| "step": 995, |
| "token_acc": 0.8107071579171281 |
| }, |
| { |
| "epoch": 2.878612716763006, |
| "grad_norm": 0.5287376481818248, |
| "learning_rate": 4.470296196907364e-09, |
| "loss": 0.5595090389251709, |
| "step": 996, |
| "token_acc": 0.8104899471905078 |
| }, |
| { |
| "epoch": 2.8815028901734103, |
| "grad_norm": 0.5379724615788479, |
| "learning_rate": 4.260258350563317e-09, |
| "loss": 0.5029683709144592, |
| "step": 997, |
| "token_acc": 0.8288261472452321 |
| }, |
| { |
| "epoch": 2.884393063583815, |
| "grad_norm": 0.6018325527774611, |
| "learning_rate": 4.055253183840257e-09, |
| "loss": 0.5635591149330139, |
| "step": 998, |
| "token_acc": 0.8117199938369883 |
| }, |
| { |
| "epoch": 2.88728323699422, |
| "grad_norm": 0.5473646076466034, |
| "learning_rate": 3.855282777921465e-09, |
| "loss": 0.44404757022857666, |
| "step": 999, |
| "token_acc": 0.8481432594156987 |
| }, |
| { |
| "epoch": 2.8901734104046244, |
| "grad_norm": 0.607676333795665, |
| "learning_rate": 3.660349162878329e-09, |
| "loss": 0.5595177412033081, |
| "step": 1000, |
| "token_acc": 0.8098022742758105 |
| }, |
| { |
| "epoch": 2.8901734104046244, |
| "eval_loss": 0.5740217566490173, |
| "eval_runtime": 69.5297, |
| "eval_samples_per_second": 1.582, |
| "eval_steps_per_second": 0.201, |
| "eval_token_acc": 0.808306147135369, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.893063583815029, |
| "grad_norm": 0.6028179153533768, |
| "learning_rate": 3.4704543176491407e-09, |
| "loss": 0.5201370716094971, |
| "step": 1001, |
| "token_acc": 0.8248979009505466 |
| }, |
| { |
| "epoch": 2.8959537572254335, |
| "grad_norm": 0.5618469428482809, |
| "learning_rate": 3.285600170019609e-09, |
| "loss": 0.4737909138202667, |
| "step": 1002, |
| "token_acc": 0.8380801687763713 |
| }, |
| { |
| "epoch": 2.898843930635838, |
| "grad_norm": 0.520670079505936, |
| "learning_rate": 3.10578859660271e-09, |
| "loss": 0.4949793815612793, |
| "step": 1003, |
| "token_acc": 0.8310451985643839 |
| }, |
| { |
| "epoch": 2.901734104046243, |
| "grad_norm": 0.5898385451823664, |
| "learning_rate": 2.9310214228202014e-09, |
| "loss": 0.5583693981170654, |
| "step": 1004, |
| "token_acc": 0.8109677906011918 |
| }, |
| { |
| "epoch": 2.9046242774566475, |
| "grad_norm": 0.5434063241260475, |
| "learning_rate": 2.7613004228835836e-09, |
| "loss": 0.5403155088424683, |
| "step": 1005, |
| "token_acc": 0.8173558831911802 |
| }, |
| { |
| "epoch": 2.907514450867052, |
| "grad_norm": 0.5472051803786162, |
| "learning_rate": 2.59662731977639e-09, |
| "loss": 0.5251212120056152, |
| "step": 1006, |
| "token_acc": 0.8263490698267074 |
| }, |
| { |
| "epoch": 2.9104046242774566, |
| "grad_norm": 0.49207250611822545, |
| "learning_rate": 2.437003785236702e-09, |
| "loss": 0.5539924502372742, |
| "step": 1007, |
| "token_acc": 0.8112695897164994 |
| }, |
| { |
| "epoch": 2.913294797687861, |
| "grad_norm": 0.5002736177395538, |
| "learning_rate": 2.2824314397399404e-09, |
| "loss": 0.5284777283668518, |
| "step": 1008, |
| "token_acc": 0.8207929017091751 |
| }, |
| { |
| "epoch": 2.916184971098266, |
| "grad_norm": 0.5322616545740584, |
| "learning_rate": 2.132911852482766e-09, |
| "loss": 0.5585949420928955, |
| "step": 1009, |
| "token_acc": 0.8104817895999946 |
| }, |
| { |
| "epoch": 2.9190751445086707, |
| "grad_norm": 0.5531944879626155, |
| "learning_rate": 1.9884465413667063e-09, |
| "loss": 0.5428365468978882, |
| "step": 1010, |
| "token_acc": 0.815299992762539 |
| }, |
| { |
| "epoch": 2.921965317919075, |
| "grad_norm": 0.5219295200504247, |
| "learning_rate": 1.8490369729832755e-09, |
| "loss": 0.5256614685058594, |
| "step": 1011, |
| "token_acc": 0.8222089510292981 |
| }, |
| { |
| "epoch": 2.9248554913294798, |
| "grad_norm": 0.5231759747194448, |
| "learning_rate": 1.714684562598545e-09, |
| "loss": 0.5462931990623474, |
| "step": 1012, |
| "token_acc": 0.8166555934189188 |
| }, |
| { |
| "epoch": 2.9277456647398843, |
| "grad_norm": 0.511178905264401, |
| "learning_rate": 1.5853906741392086e-09, |
| "loss": 0.48754703998565674, |
| "step": 1013, |
| "token_acc": 0.8340968562927913 |
| }, |
| { |
| "epoch": 2.9306358381502893, |
| "grad_norm": 0.49209363879670576, |
| "learning_rate": 1.4611566201785386e-09, |
| "loss": 0.6072345972061157, |
| "step": 1014, |
| "token_acc": 0.796086135633005 |
| }, |
| { |
| "epoch": 2.9335260115606934, |
| "grad_norm": 0.5468806874394325, |
| "learning_rate": 1.3419836619229519e-09, |
| "loss": 0.5350404381752014, |
| "step": 1015, |
| "token_acc": 0.8205611421851678 |
| }, |
| { |
| "epoch": 2.9364161849710984, |
| "grad_norm": 0.5545661554638134, |
| "learning_rate": 1.227873009199465e-09, |
| "loss": 0.48873502016067505, |
| "step": 1016, |
| "token_acc": 0.8335308101581073 |
| }, |
| { |
| "epoch": 2.939306358381503, |
| "grad_norm": 0.6117033520146128, |
| "learning_rate": 1.1188258204433144e-09, |
| "loss": 0.5223637819290161, |
| "step": 1017, |
| "token_acc": 0.8220580971784899 |
| }, |
| { |
| "epoch": 2.9421965317919074, |
| "grad_norm": 0.5990530756110558, |
| "learning_rate": 1.0148432026860775e-09, |
| "loss": 0.5375405550003052, |
| "step": 1018, |
| "token_acc": 0.8204211966851669 |
| }, |
| { |
| "epoch": 2.9450867052023124, |
| "grad_norm": 0.5179575810720268, |
| "learning_rate": 9.159262115445709e-10, |
| "loss": 0.5529065132141113, |
| "step": 1019, |
| "token_acc": 0.8146867269147271 |
| }, |
| { |
| "epoch": 2.9479768786127165, |
| "grad_norm": 0.4852204771957678, |
| "learning_rate": 8.220758512100246e-10, |
| "loss": 0.5473994016647339, |
| "step": 1020, |
| "token_acc": 0.8154385812017952 |
| }, |
| { |
| "epoch": 2.9508670520231215, |
| "grad_norm": 0.5869353604242789, |
| "learning_rate": 7.332930744380905e-10, |
| "loss": 0.5176626443862915, |
| "step": 1021, |
| "token_acc": 0.8273430939731791 |
| }, |
| { |
| "epoch": 2.953757225433526, |
| "grad_norm": 0.5602528809896415, |
| "learning_rate": 6.49578782538851e-10, |
| "loss": 0.5115993618965149, |
| "step": 1022, |
| "token_acc": 0.8288524482039359 |
| }, |
| { |
| "epoch": 2.9566473988439306, |
| "grad_norm": 0.5342085317349031, |
| "learning_rate": 5.709338253679363e-10, |
| "loss": 0.5524012446403503, |
| "step": 1023, |
| "token_acc": 0.8131655170976683 |
| }, |
| { |
| "epoch": 2.959537572254335, |
| "grad_norm": 0.5776521748726285, |
| "learning_rate": 4.973590013178652e-10, |
| "loss": 0.5437720417976379, |
| "step": 1024, |
| "token_acc": 0.8181899648876977 |
| }, |
| { |
| "epoch": 2.9624277456647397, |
| "grad_norm": 0.5915883065627155, |
| "learning_rate": 4.288550573098293e-10, |
| "loss": 0.5497083067893982, |
| "step": 1025, |
| "token_acc": 0.8166504174699635 |
| }, |
| { |
| "epoch": 2.9653179190751446, |
| "grad_norm": 0.519862153616305, |
| "learning_rate": 3.6542268878608785e-10, |
| "loss": 0.5397800207138062, |
| "step": 1026, |
| "token_acc": 0.8185784280824216 |
| }, |
| { |
| "epoch": 2.968208092485549, |
| "grad_norm": 0.6328021139986955, |
| "learning_rate": 3.070625397031401e-10, |
| "loss": 0.5588440299034119, |
| "step": 1027, |
| "token_acc": 0.8125476802049286 |
| }, |
| { |
| "epoch": 2.9710982658959537, |
| "grad_norm": 0.5575020860016229, |
| "learning_rate": 2.537752025249529e-10, |
| "loss": 0.5562065839767456, |
| "step": 1028, |
| "token_acc": 0.8104220354019687 |
| }, |
| { |
| "epoch": 2.9739884393063583, |
| "grad_norm": 0.5378061802083338, |
| "learning_rate": 2.0556121821696527e-10, |
| "loss": 0.5177541971206665, |
| "step": 1029, |
| "token_acc": 0.8242314812400594 |
| }, |
| { |
| "epoch": 2.976878612716763, |
| "grad_norm": 0.5832757184904683, |
| "learning_rate": 1.6242107624070412e-10, |
| "loss": 0.49845069646835327, |
| "step": 1030, |
| "token_acc": 0.8330388762567243 |
| }, |
| { |
| "epoch": 2.979768786127168, |
| "grad_norm": 0.7982615431706986, |
| "learning_rate": 1.2435521454884358e-10, |
| "loss": 0.5247231125831604, |
| "step": 1031, |
| "token_acc": 0.823871938586352 |
| }, |
| { |
| "epoch": 2.9826589595375723, |
| "grad_norm": 0.5127749961245016, |
| "learning_rate": 9.136401958059759e-11, |
| "loss": 0.5525383353233337, |
| "step": 1032, |
| "token_acc": 0.8136602187346615 |
| }, |
| { |
| "epoch": 2.985549132947977, |
| "grad_norm": 0.542665341113767, |
| "learning_rate": 6.34478262578897e-11, |
| "loss": 0.5264041423797607, |
| "step": 1033, |
| "token_acc": 0.8259248289322793 |
| }, |
| { |
| "epoch": 2.9884393063583814, |
| "grad_norm": 0.5981387552317852, |
| "learning_rate": 4.0606917981966804e-11, |
| "loss": 0.5639816522598267, |
| "step": 1034, |
| "token_acc": 0.811261064452967 |
| }, |
| { |
| "epoch": 2.991329479768786, |
| "grad_norm": 0.5182263398780822, |
| "learning_rate": 2.2841526630512642e-11, |
| "loss": 0.5699348449707031, |
| "step": 1035, |
| "token_acc": 0.8084916570295722 |
| }, |
| { |
| "epoch": 2.994219653179191, |
| "grad_norm": 0.48173987479445357, |
| "learning_rate": 1.0151832555205242e-11, |
| "loss": 0.5670179128646851, |
| "step": 1036, |
| "token_acc": 0.8119991095280499 |
| }, |
| { |
| "epoch": 2.9971098265895955, |
| "grad_norm": 0.5532608077856682, |
| "learning_rate": 2.5379645800516215e-12, |
| "loss": 0.5611600875854492, |
| "step": 1037, |
| "token_acc": 0.8147770004529734 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.5148238785537761, |
| "learning_rate": 0.0, |
| "loss": 0.5508678555488586, |
| "step": 1038, |
| "token_acc": 0.8153577131547579 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.5740059018135071, |
| "eval_runtime": 69.9798, |
| "eval_samples_per_second": 1.572, |
| "eval_steps_per_second": 0.2, |
| "eval_token_acc": 0.808306147135369, |
| "step": 1038 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1038, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1140072026079232.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|