| { | |
| "best_global_step": 2360, | |
| "best_metric": 0.71119624376297, | |
| "best_model_checkpoint": "/mnt/shared-storage-user/zhangchenhao/work/LLaMA-Factory-own/LLaMA-Factory/saves/SFT_StepCount_all_with_plus_without_point_reasoning_optimized_no_prompt_answer80_point2p5/checkpoint-2360", | |
| "epoch": 3.0, | |
| "eval_steps": 295, | |
| "global_step": 3537, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004244031830238726, | |
| "grad_norm": 297.75304987581995, | |
| "learning_rate": 5.649717514124295e-07, | |
| "loss": 5.8629, | |
| "num_input_tokens_seen": 4530624, | |
| "step": 5, | |
| "train_runtime": 653.1761, | |
| "train_tokens_per_second": 6936.298 | |
| }, | |
| { | |
| "epoch": 0.008488063660477453, | |
| "grad_norm": 131.78345996041816, | |
| "learning_rate": 1.2711864406779662e-06, | |
| "loss": 4.6976, | |
| "num_input_tokens_seen": 9034496, | |
| "step": 10, | |
| "train_runtime": 1273.69, | |
| "train_tokens_per_second": 7093.167 | |
| }, | |
| { | |
| "epoch": 0.01273209549071618, | |
| "grad_norm": 41.38402483800692, | |
| "learning_rate": 1.977401129943503e-06, | |
| "loss": 3.047, | |
| "num_input_tokens_seen": 13435712, | |
| "step": 15, | |
| "train_runtime": 1887.0646, | |
| "train_tokens_per_second": 7119.9 | |
| }, | |
| { | |
| "epoch": 0.016976127320954906, | |
| "grad_norm": 30.090022612502594, | |
| "learning_rate": 2.6836158192090396e-06, | |
| "loss": 2.1792, | |
| "num_input_tokens_seen": 17831104, | |
| "step": 20, | |
| "train_runtime": 2539.5416, | |
| "train_tokens_per_second": 7021.387 | |
| }, | |
| { | |
| "epoch": 0.021220159151193633, | |
| "grad_norm": 10.507378327830093, | |
| "learning_rate": 3.3898305084745763e-06, | |
| "loss": 1.806, | |
| "num_input_tokens_seen": 22489280, | |
| "step": 25, | |
| "train_runtime": 3223.2496, | |
| "train_tokens_per_second": 6977.207 | |
| }, | |
| { | |
| "epoch": 0.02546419098143236, | |
| "grad_norm": 7.849105701004419, | |
| "learning_rate": 4.096045197740113e-06, | |
| "loss": 1.5334, | |
| "num_input_tokens_seen": 26938368, | |
| "step": 30, | |
| "train_runtime": 3838.47, | |
| "train_tokens_per_second": 7017.996 | |
| }, | |
| { | |
| "epoch": 0.029708222811671087, | |
| "grad_norm": 7.864574931572695, | |
| "learning_rate": 4.80225988700565e-06, | |
| "loss": 1.377, | |
| "num_input_tokens_seen": 31539968, | |
| "step": 35, | |
| "train_runtime": 4503.6915, | |
| "train_tokens_per_second": 7003.137 | |
| }, | |
| { | |
| "epoch": 0.03395225464190981, | |
| "grad_norm": 7.3747833148860025, | |
| "learning_rate": 5.508474576271187e-06, | |
| "loss": 1.3089, | |
| "num_input_tokens_seen": 35979392, | |
| "step": 40, | |
| "train_runtime": 5147.1318, | |
| "train_tokens_per_second": 6990.183 | |
| }, | |
| { | |
| "epoch": 0.03819628647214854, | |
| "grad_norm": 8.361663447491122, | |
| "learning_rate": 6.214689265536724e-06, | |
| "loss": 1.2385, | |
| "num_input_tokens_seen": 40533440, | |
| "step": 45, | |
| "train_runtime": 5799.5796, | |
| "train_tokens_per_second": 6989.031 | |
| }, | |
| { | |
| "epoch": 0.042440318302387266, | |
| "grad_norm": 7.700383821460495, | |
| "learning_rate": 6.92090395480226e-06, | |
| "loss": 1.1772, | |
| "num_input_tokens_seen": 45172288, | |
| "step": 50, | |
| "train_runtime": 6470.1331, | |
| "train_tokens_per_second": 6981.663 | |
| }, | |
| { | |
| "epoch": 0.04668435013262599, | |
| "grad_norm": 7.153074169274688, | |
| "learning_rate": 7.627118644067798e-06, | |
| "loss": 1.1281, | |
| "num_input_tokens_seen": 49855040, | |
| "step": 55, | |
| "train_runtime": 7138.9239, | |
| "train_tokens_per_second": 6983.551 | |
| }, | |
| { | |
| "epoch": 0.05092838196286472, | |
| "grad_norm": 5.863677860447595, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.0659, | |
| "num_input_tokens_seen": 54260032, | |
| "step": 60, | |
| "train_runtime": 7733.863, | |
| "train_tokens_per_second": 7015.903 | |
| }, | |
| { | |
| "epoch": 0.05517241379310345, | |
| "grad_norm": 5.211482731988629, | |
| "learning_rate": 9.039548022598871e-06, | |
| "loss": 0.992, | |
| "num_input_tokens_seen": 58804672, | |
| "step": 65, | |
| "train_runtime": 8385.1953, | |
| "train_tokens_per_second": 7012.916 | |
| }, | |
| { | |
| "epoch": 0.059416445623342175, | |
| "grad_norm": 3.0025030899721137, | |
| "learning_rate": 9.745762711864407e-06, | |
| "loss": 0.9647, | |
| "num_input_tokens_seen": 63162880, | |
| "step": 70, | |
| "train_runtime": 9001.3827, | |
| "train_tokens_per_second": 7017.02 | |
| }, | |
| { | |
| "epoch": 0.0636604774535809, | |
| "grad_norm": 1.999806623127411, | |
| "learning_rate": 1.0451977401129943e-05, | |
| "loss": 0.9392, | |
| "num_input_tokens_seen": 67642560, | |
| "step": 75, | |
| "train_runtime": 9659.9049, | |
| "train_tokens_per_second": 7002.404 | |
| }, | |
| { | |
| "epoch": 0.06790450928381962, | |
| "grad_norm": 2.1507559480146172, | |
| "learning_rate": 1.115819209039548e-05, | |
| "loss": 0.9183, | |
| "num_input_tokens_seen": 72091264, | |
| "step": 80, | |
| "train_runtime": 10244.6396, | |
| "train_tokens_per_second": 7036.974 | |
| }, | |
| { | |
| "epoch": 0.07214854111405836, | |
| "grad_norm": 2.2495505770555297, | |
| "learning_rate": 1.1864406779661018e-05, | |
| "loss": 0.9072, | |
| "num_input_tokens_seen": 76507136, | |
| "step": 85, | |
| "train_runtime": 10853.5827, | |
| "train_tokens_per_second": 7049.021 | |
| }, | |
| { | |
| "epoch": 0.07639257294429708, | |
| "grad_norm": 2.121224741489407, | |
| "learning_rate": 1.2570621468926556e-05, | |
| "loss": 0.9108, | |
| "num_input_tokens_seen": 80870400, | |
| "step": 90, | |
| "train_runtime": 11472.3492, | |
| "train_tokens_per_second": 7049.158 | |
| }, | |
| { | |
| "epoch": 0.08063660477453581, | |
| "grad_norm": 2.588502611123473, | |
| "learning_rate": 1.3276836158192092e-05, | |
| "loss": 0.8924, | |
| "num_input_tokens_seen": 85329024, | |
| "step": 95, | |
| "train_runtime": 12124.6375, | |
| "train_tokens_per_second": 7037.656 | |
| }, | |
| { | |
| "epoch": 0.08488063660477453, | |
| "grad_norm": 2.168204787290024, | |
| "learning_rate": 1.3983050847457627e-05, | |
| "loss": 0.9035, | |
| "num_input_tokens_seen": 89666944, | |
| "step": 100, | |
| "train_runtime": 12721.9573, | |
| "train_tokens_per_second": 7048.203 | |
| }, | |
| { | |
| "epoch": 0.08912466843501327, | |
| "grad_norm": 2.039908089915845, | |
| "learning_rate": 1.4689265536723165e-05, | |
| "loss": 0.8925, | |
| "num_input_tokens_seen": 94089920, | |
| "step": 105, | |
| "train_runtime": 13381.4037, | |
| "train_tokens_per_second": 7031.394 | |
| }, | |
| { | |
| "epoch": 0.09336870026525199, | |
| "grad_norm": 120.4758045680371, | |
| "learning_rate": 1.5395480225988703e-05, | |
| "loss": 0.909, | |
| "num_input_tokens_seen": 98437760, | |
| "step": 110, | |
| "train_runtime": 14013.3414, | |
| "train_tokens_per_second": 7024.574 | |
| }, | |
| { | |
| "epoch": 0.09761273209549072, | |
| "grad_norm": 1.953705157702643, | |
| "learning_rate": 1.6101694915254237e-05, | |
| "loss": 0.9098, | |
| "num_input_tokens_seen": 103115456, | |
| "step": 115, | |
| "train_runtime": 14689.3018, | |
| "train_tokens_per_second": 7019.766 | |
| }, | |
| { | |
| "epoch": 0.10185676392572944, | |
| "grad_norm": 1.548682015852324, | |
| "learning_rate": 1.6807909604519774e-05, | |
| "loss": 0.8787, | |
| "num_input_tokens_seen": 107696768, | |
| "step": 120, | |
| "train_runtime": 15307.4003, | |
| "train_tokens_per_second": 7035.601 | |
| }, | |
| { | |
| "epoch": 0.10610079575596817, | |
| "grad_norm": 1.4155432758105517, | |
| "learning_rate": 1.7514124293785312e-05, | |
| "loss": 0.8971, | |
| "num_input_tokens_seen": 112245632, | |
| "step": 125, | |
| "train_runtime": 15954.3044, | |
| "train_tokens_per_second": 7035.445 | |
| }, | |
| { | |
| "epoch": 0.1103448275862069, | |
| "grad_norm": 1.5268658891716769, | |
| "learning_rate": 1.8220338983050846e-05, | |
| "loss": 0.8735, | |
| "num_input_tokens_seen": 116619392, | |
| "step": 130, | |
| "train_runtime": 16586.124, | |
| "train_tokens_per_second": 7031.142 | |
| }, | |
| { | |
| "epoch": 0.11458885941644563, | |
| "grad_norm": 1.4550371590475242, | |
| "learning_rate": 1.8926553672316387e-05, | |
| "loss": 0.8871, | |
| "num_input_tokens_seen": 121198656, | |
| "step": 135, | |
| "train_runtime": 17239.014, | |
| "train_tokens_per_second": 7030.487 | |
| }, | |
| { | |
| "epoch": 0.11883289124668435, | |
| "grad_norm": 1.4816395671279814, | |
| "learning_rate": 1.963276836158192e-05, | |
| "loss": 0.8735, | |
| "num_input_tokens_seen": 125766592, | |
| "step": 140, | |
| "train_runtime": 17856.4497, | |
| "train_tokens_per_second": 7043.203 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 1.8562529667931933, | |
| "learning_rate": 2.033898305084746e-05, | |
| "loss": 0.8841, | |
| "num_input_tokens_seen": 130417216, | |
| "step": 145, | |
| "train_runtime": 18488.9056, | |
| "train_tokens_per_second": 7053.809 | |
| }, | |
| { | |
| "epoch": 0.1273209549071618, | |
| "grad_norm": 1.3288711166284626, | |
| "learning_rate": 2.1045197740112996e-05, | |
| "loss": 0.8847, | |
| "num_input_tokens_seen": 134684288, | |
| "step": 150, | |
| "train_runtime": 19070.7007, | |
| "train_tokens_per_second": 7062.367 | |
| }, | |
| { | |
| "epoch": 0.13156498673740052, | |
| "grad_norm": 1.716843857648477, | |
| "learning_rate": 2.175141242937853e-05, | |
| "loss": 0.8699, | |
| "num_input_tokens_seen": 139319872, | |
| "step": 155, | |
| "train_runtime": 19757.791, | |
| "train_tokens_per_second": 7051.389 | |
| }, | |
| { | |
| "epoch": 0.13580901856763924, | |
| "grad_norm": 2.015691688957, | |
| "learning_rate": 2.245762711864407e-05, | |
| "loss": 0.8707, | |
| "num_input_tokens_seen": 143717952, | |
| "step": 160, | |
| "train_runtime": 20371.2146, | |
| "train_tokens_per_second": 7054.953 | |
| }, | |
| { | |
| "epoch": 0.140053050397878, | |
| "grad_norm": 1.787168466261934, | |
| "learning_rate": 2.3163841807909606e-05, | |
| "loss": 0.8796, | |
| "num_input_tokens_seen": 148245632, | |
| "step": 165, | |
| "train_runtime": 21001.0169, | |
| "train_tokens_per_second": 7058.974 | |
| }, | |
| { | |
| "epoch": 0.1442970822281167, | |
| "grad_norm": 4.713178832164353, | |
| "learning_rate": 2.3870056497175143e-05, | |
| "loss": 0.8977, | |
| "num_input_tokens_seen": 152687296, | |
| "step": 170, | |
| "train_runtime": 21617.8243, | |
| "train_tokens_per_second": 7063.028 | |
| }, | |
| { | |
| "epoch": 0.14854111405835543, | |
| "grad_norm": 1.4590858301256864, | |
| "learning_rate": 2.457627118644068e-05, | |
| "loss": 0.8959, | |
| "num_input_tokens_seen": 157314368, | |
| "step": 175, | |
| "train_runtime": 22299.4419, | |
| "train_tokens_per_second": 7054.633 | |
| }, | |
| { | |
| "epoch": 0.15278514588859415, | |
| "grad_norm": 1.4882611183761851, | |
| "learning_rate": 2.5282485875706215e-05, | |
| "loss": 0.8729, | |
| "num_input_tokens_seen": 161815808, | |
| "step": 180, | |
| "train_runtime": 22965.2954, | |
| "train_tokens_per_second": 7046.102 | |
| }, | |
| { | |
| "epoch": 0.1570291777188329, | |
| "grad_norm": 1.2937684487451329, | |
| "learning_rate": 2.5988700564971752e-05, | |
| "loss": 0.8801, | |
| "num_input_tokens_seen": 166361792, | |
| "step": 185, | |
| "train_runtime": 23622.1066, | |
| "train_tokens_per_second": 7042.631 | |
| }, | |
| { | |
| "epoch": 0.16127320954907162, | |
| "grad_norm": 1.326371860515381, | |
| "learning_rate": 2.669491525423729e-05, | |
| "loss": 0.8837, | |
| "num_input_tokens_seen": 170963072, | |
| "step": 190, | |
| "train_runtime": 24288.8241, | |
| "train_tokens_per_second": 7038.755 | |
| }, | |
| { | |
| "epoch": 0.16551724137931034, | |
| "grad_norm": 1.3881050133378776, | |
| "learning_rate": 2.7401129943502824e-05, | |
| "loss": 0.8848, | |
| "num_input_tokens_seen": 175401600, | |
| "step": 195, | |
| "train_runtime": 24906.6505, | |
| "train_tokens_per_second": 7042.36 | |
| }, | |
| { | |
| "epoch": 0.16976127320954906, | |
| "grad_norm": 1.5698900690401176, | |
| "learning_rate": 2.8107344632768362e-05, | |
| "loss": 0.8687, | |
| "num_input_tokens_seen": 179779200, | |
| "step": 200, | |
| "train_runtime": 25515.4439, | |
| "train_tokens_per_second": 7045.897 | |
| }, | |
| { | |
| "epoch": 0.1740053050397878, | |
| "grad_norm": 1.4897036851687533, | |
| "learning_rate": 2.88135593220339e-05, | |
| "loss": 0.8782, | |
| "num_input_tokens_seen": 184224512, | |
| "step": 205, | |
| "train_runtime": 26153.0151, | |
| "train_tokens_per_second": 7044.102 | |
| }, | |
| { | |
| "epoch": 0.17824933687002653, | |
| "grad_norm": 1.82238111934615, | |
| "learning_rate": 2.951977401129944e-05, | |
| "loss": 0.8838, | |
| "num_input_tokens_seen": 188802432, | |
| "step": 210, | |
| "train_runtime": 26835.6565, | |
| "train_tokens_per_second": 7035.506 | |
| }, | |
| { | |
| "epoch": 0.18249336870026525, | |
| "grad_norm": 1.653089989083378, | |
| "learning_rate": 3.022598870056497e-05, | |
| "loss": 0.8724, | |
| "num_input_tokens_seen": 193510336, | |
| "step": 215, | |
| "train_runtime": 27522.5514, | |
| "train_tokens_per_second": 7030.974 | |
| }, | |
| { | |
| "epoch": 0.18673740053050397, | |
| "grad_norm": 1.4627059373154376, | |
| "learning_rate": 3.093220338983051e-05, | |
| "loss": 0.8649, | |
| "num_input_tokens_seen": 197993536, | |
| "step": 220, | |
| "train_runtime": 28129.6066, | |
| "train_tokens_per_second": 7038.617 | |
| }, | |
| { | |
| "epoch": 0.1909814323607427, | |
| "grad_norm": 1.247186929390262, | |
| "learning_rate": 3.1638418079096046e-05, | |
| "loss": 0.8707, | |
| "num_input_tokens_seen": 202834048, | |
| "step": 225, | |
| "train_runtime": 28804.0632, | |
| "train_tokens_per_second": 7041.855 | |
| }, | |
| { | |
| "epoch": 0.19522546419098144, | |
| "grad_norm": 1.5240142785161233, | |
| "learning_rate": 3.234463276836158e-05, | |
| "loss": 0.8686, | |
| "num_input_tokens_seen": 207360320, | |
| "step": 230, | |
| "train_runtime": 29445.3467, | |
| "train_tokens_per_second": 7042.21 | |
| }, | |
| { | |
| "epoch": 0.19946949602122016, | |
| "grad_norm": 1.186249792586507, | |
| "learning_rate": 3.305084745762712e-05, | |
| "loss": 0.8591, | |
| "num_input_tokens_seen": 211972800, | |
| "step": 235, | |
| "train_runtime": 30125.9512, | |
| "train_tokens_per_second": 7036.219 | |
| }, | |
| { | |
| "epoch": 0.20371352785145888, | |
| "grad_norm": 1.5311530250979444, | |
| "learning_rate": 3.375706214689266e-05, | |
| "loss": 0.88, | |
| "num_input_tokens_seen": 216741248, | |
| "step": 240, | |
| "train_runtime": 30816.4459, | |
| "train_tokens_per_second": 7033.298 | |
| }, | |
| { | |
| "epoch": 0.2079575596816976, | |
| "grad_norm": 1.0601153174356541, | |
| "learning_rate": 3.446327683615819e-05, | |
| "loss": 0.8828, | |
| "num_input_tokens_seen": 221190080, | |
| "step": 245, | |
| "train_runtime": 31439.204, | |
| "train_tokens_per_second": 7035.486 | |
| }, | |
| { | |
| "epoch": 0.21220159151193635, | |
| "grad_norm": 1.75722517567588, | |
| "learning_rate": 3.516949152542373e-05, | |
| "loss": 0.8921, | |
| "num_input_tokens_seen": 225847168, | |
| "step": 250, | |
| "train_runtime": 32147.0921, | |
| "train_tokens_per_second": 7025.431 | |
| }, | |
| { | |
| "epoch": 0.21644562334217507, | |
| "grad_norm": 2.0274303550036263, | |
| "learning_rate": 3.587570621468927e-05, | |
| "loss": 0.9012, | |
| "num_input_tokens_seen": 230582208, | |
| "step": 255, | |
| "train_runtime": 32842.8641, | |
| "train_tokens_per_second": 7020.77 | |
| }, | |
| { | |
| "epoch": 0.2206896551724138, | |
| "grad_norm": 1.9253331362655286, | |
| "learning_rate": 3.6581920903954806e-05, | |
| "loss": 0.8999, | |
| "num_input_tokens_seen": 235274688, | |
| "step": 260, | |
| "train_runtime": 33539.0669, | |
| "train_tokens_per_second": 7014.944 | |
| }, | |
| { | |
| "epoch": 0.2249336870026525, | |
| "grad_norm": 2.2082649940934975, | |
| "learning_rate": 3.728813559322034e-05, | |
| "loss": 0.9029, | |
| "num_input_tokens_seen": 239864000, | |
| "step": 265, | |
| "train_runtime": 34221.492, | |
| "train_tokens_per_second": 7009.163 | |
| }, | |
| { | |
| "epoch": 0.22917771883289126, | |
| "grad_norm": 1.452691571968028, | |
| "learning_rate": 3.799435028248588e-05, | |
| "loss": 0.9046, | |
| "num_input_tokens_seen": 244225344, | |
| "step": 270, | |
| "train_runtime": 34836.4135, | |
| "train_tokens_per_second": 7010.634 | |
| }, | |
| { | |
| "epoch": 0.23342175066312998, | |
| "grad_norm": 2.730577739346072, | |
| "learning_rate": 3.8700564971751415e-05, | |
| "loss": 0.8874, | |
| "num_input_tokens_seen": 248726272, | |
| "step": 275, | |
| "train_runtime": 35499.0341, | |
| "train_tokens_per_second": 7006.565 | |
| }, | |
| { | |
| "epoch": 0.2376657824933687, | |
| "grad_norm": 1.093231645373866, | |
| "learning_rate": 3.940677966101695e-05, | |
| "loss": 0.8809, | |
| "num_input_tokens_seen": 253245952, | |
| "step": 280, | |
| "train_runtime": 36135.7808, | |
| "train_tokens_per_second": 7008.177 | |
| }, | |
| { | |
| "epoch": 0.24190981432360742, | |
| "grad_norm": 1.307461678503626, | |
| "learning_rate": 4.011299435028249e-05, | |
| "loss": 0.8598, | |
| "num_input_tokens_seen": 257684480, | |
| "step": 285, | |
| "train_runtime": 36728.042, | |
| "train_tokens_per_second": 7016.015 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 1.2991784389959953, | |
| "learning_rate": 4.0819209039548024e-05, | |
| "loss": 0.8981, | |
| "num_input_tokens_seen": 262108992, | |
| "step": 290, | |
| "train_runtime": 37341.918, | |
| "train_tokens_per_second": 7019.163 | |
| }, | |
| { | |
| "epoch": 0.25039787798408486, | |
| "grad_norm": 1.4781716766460902, | |
| "learning_rate": 4.152542372881356e-05, | |
| "loss": 0.9007, | |
| "num_input_tokens_seen": 266677504, | |
| "step": 295, | |
| "train_runtime": 38006.4439, | |
| "train_tokens_per_second": 7016.639 | |
| }, | |
| { | |
| "epoch": 0.25039787798408486, | |
| "eval_loss": 0.8760802745819092, | |
| "eval_runtime": 1055.1289, | |
| "eval_samples_per_second": 2.888, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 266677504, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.2546419098143236, | |
| "grad_norm": 1.3728608285035016, | |
| "learning_rate": 4.22316384180791e-05, | |
| "loss": 0.8857, | |
| "num_input_tokens_seen": 271486592, | |
| "step": 300, | |
| "train_runtime": 39774.4771, | |
| "train_tokens_per_second": 6825.648 | |
| }, | |
| { | |
| "epoch": 0.25888594164456236, | |
| "grad_norm": 1.3128466580163847, | |
| "learning_rate": 4.2937853107344634e-05, | |
| "loss": 1.0976, | |
| "num_input_tokens_seen": 276035072, | |
| "step": 305, | |
| "train_runtime": 40413.7335, | |
| "train_tokens_per_second": 6830.229 | |
| }, | |
| { | |
| "epoch": 0.26312997347480105, | |
| "grad_norm": 1.9380613988266078, | |
| "learning_rate": 4.3644067796610175e-05, | |
| "loss": 0.8879, | |
| "num_input_tokens_seen": 280424512, | |
| "step": 310, | |
| "train_runtime": 41016.3815, | |
| "train_tokens_per_second": 6836.891 | |
| }, | |
| { | |
| "epoch": 0.2673740053050398, | |
| "grad_norm": 1.7436380468280226, | |
| "learning_rate": 4.435028248587571e-05, | |
| "loss": 0.894, | |
| "num_input_tokens_seen": 284818432, | |
| "step": 315, | |
| "train_runtime": 41649.8882, | |
| "train_tokens_per_second": 6838.396 | |
| }, | |
| { | |
| "epoch": 0.2716180371352785, | |
| "grad_norm": 1.81753757832679, | |
| "learning_rate": 4.505649717514124e-05, | |
| "loss": 0.8883, | |
| "num_input_tokens_seen": 289399424, | |
| "step": 320, | |
| "train_runtime": 42320.1609, | |
| "train_tokens_per_second": 6838.335 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 0.9404537896048348, | |
| "learning_rate": 4.5762711864406784e-05, | |
| "loss": 0.9124, | |
| "num_input_tokens_seen": 293917312, | |
| "step": 325, | |
| "train_runtime": 42970.7229, | |
| "train_tokens_per_second": 6839.943 | |
| }, | |
| { | |
| "epoch": 0.280106100795756, | |
| "grad_norm": 2.947005859311638, | |
| "learning_rate": 4.646892655367232e-05, | |
| "loss": 0.8892, | |
| "num_input_tokens_seen": 298366272, | |
| "step": 330, | |
| "train_runtime": 43553.8539, | |
| "train_tokens_per_second": 6850.514 | |
| }, | |
| { | |
| "epoch": 0.2843501326259947, | |
| "grad_norm": 1.6174994735745432, | |
| "learning_rate": 4.717514124293785e-05, | |
| "loss": 0.8881, | |
| "num_input_tokens_seen": 302869952, | |
| "step": 335, | |
| "train_runtime": 44211.0204, | |
| "train_tokens_per_second": 6850.553 | |
| }, | |
| { | |
| "epoch": 0.2885941644562334, | |
| "grad_norm": 1.7792646072660459, | |
| "learning_rate": 4.788135593220339e-05, | |
| "loss": 0.8803, | |
| "num_input_tokens_seen": 307426560, | |
| "step": 340, | |
| "train_runtime": 44880.4175, | |
| "train_tokens_per_second": 6849.904 | |
| }, | |
| { | |
| "epoch": 0.2928381962864722, | |
| "grad_norm": 1.0669701860722622, | |
| "learning_rate": 4.8587570621468934e-05, | |
| "loss": 0.897, | |
| "num_input_tokens_seen": 312164928, | |
| "step": 345, | |
| "train_runtime": 45578.8582, | |
| "train_tokens_per_second": 6848.897 | |
| }, | |
| { | |
| "epoch": 0.29708222811671087, | |
| "grad_norm": 1.2083166811125388, | |
| "learning_rate": 4.929378531073446e-05, | |
| "loss": 0.9033, | |
| "num_input_tokens_seen": 316741824, | |
| "step": 350, | |
| "train_runtime": 46190.9574, | |
| "train_tokens_per_second": 6857.226 | |
| }, | |
| { | |
| "epoch": 0.3013262599469496, | |
| "grad_norm": 2.667811908170323, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8758, | |
| "num_input_tokens_seen": 321123008, | |
| "step": 355, | |
| "train_runtime": 46809.8332, | |
| "train_tokens_per_second": 6860.161 | |
| }, | |
| { | |
| "epoch": 0.3055702917771883, | |
| "grad_norm": 1.5315598777999704, | |
| "learning_rate": 4.999969557829892e-05, | |
| "loss": 0.8975, | |
| "num_input_tokens_seen": 325636416, | |
| "step": 360, | |
| "train_runtime": 47466.7463, | |
| "train_tokens_per_second": 6860.306 | |
| }, | |
| { | |
| "epoch": 0.30981432360742706, | |
| "grad_norm": 1.0061504802501977, | |
| "learning_rate": 4.999878232060946e-05, | |
| "loss": 0.8919, | |
| "num_input_tokens_seen": 330217472, | |
| "step": 365, | |
| "train_runtime": 48143.739, | |
| "train_tokens_per_second": 6858.991 | |
| }, | |
| { | |
| "epoch": 0.3140583554376658, | |
| "grad_norm": 1.0691191893512106, | |
| "learning_rate": 4.999726024917288e-05, | |
| "loss": 0.8775, | |
| "num_input_tokens_seen": 334605888, | |
| "step": 370, | |
| "train_runtime": 48800.4766, | |
| "train_tokens_per_second": 6856.611 | |
| }, | |
| { | |
| "epoch": 0.3183023872679045, | |
| "grad_norm": 1.1646960467870506, | |
| "learning_rate": 4.99951294010573e-05, | |
| "loss": 0.8944, | |
| "num_input_tokens_seen": 339190016, | |
| "step": 375, | |
| "train_runtime": 49427.2381, | |
| "train_tokens_per_second": 6862.411 | |
| }, | |
| { | |
| "epoch": 0.32254641909814324, | |
| "grad_norm": 13.073420853243217, | |
| "learning_rate": 4.999238982815683e-05, | |
| "loss": 0.908, | |
| "num_input_tokens_seen": 343751808, | |
| "step": 380, | |
| "train_runtime": 50079.7525, | |
| "train_tokens_per_second": 6864.088 | |
| }, | |
| { | |
| "epoch": 0.32679045092838194, | |
| "grad_norm": 1.030515508528764, | |
| "learning_rate": 4.99890415971903e-05, | |
| "loss": 0.9207, | |
| "num_input_tokens_seen": 348170752, | |
| "step": 385, | |
| "train_runtime": 50717.2414, | |
| "train_tokens_per_second": 6864.939 | |
| }, | |
| { | |
| "epoch": 0.3310344827586207, | |
| "grad_norm": 2.462002678202629, | |
| "learning_rate": 4.9985084789699645e-05, | |
| "loss": 0.8857, | |
| "num_input_tokens_seen": 352658368, | |
| "step": 390, | |
| "train_runtime": 51341.856, | |
| "train_tokens_per_second": 6868.828 | |
| }, | |
| { | |
| "epoch": 0.33527851458885943, | |
| "grad_norm": 1.1178056520913622, | |
| "learning_rate": 4.998051950204792e-05, | |
| "loss": 0.8942, | |
| "num_input_tokens_seen": 357241472, | |
| "step": 395, | |
| "train_runtime": 51991.5243, | |
| "train_tokens_per_second": 6871.148 | |
| }, | |
| { | |
| "epoch": 0.3395225464190981, | |
| "grad_norm": 12.174478000037658, | |
| "learning_rate": 4.997534584541692e-05, | |
| "loss": 0.9033, | |
| "num_input_tokens_seen": 361706368, | |
| "step": 400, | |
| "train_runtime": 52622.4401, | |
| "train_tokens_per_second": 6873.615 | |
| }, | |
| { | |
| "epoch": 0.3437665782493369, | |
| "grad_norm": 1.3129250294672423, | |
| "learning_rate": 4.996956394580453e-05, | |
| "loss": 0.9534, | |
| "num_input_tokens_seen": 366301824, | |
| "step": 405, | |
| "train_runtime": 53287.8409, | |
| "train_tokens_per_second": 6874.023 | |
| }, | |
| { | |
| "epoch": 0.3480106100795756, | |
| "grad_norm": 1.2922663914137134, | |
| "learning_rate": 4.9963173944021604e-05, | |
| "loss": 0.907, | |
| "num_input_tokens_seen": 370813440, | |
| "step": 410, | |
| "train_runtime": 53932.276, | |
| "train_tokens_per_second": 6875.539 | |
| }, | |
| { | |
| "epoch": 0.3522546419098143, | |
| "grad_norm": 3.97037390684764, | |
| "learning_rate": 4.995617599568855e-05, | |
| "loss": 0.8908, | |
| "num_input_tokens_seen": 375343232, | |
| "step": 415, | |
| "train_runtime": 54587.718, | |
| "train_tokens_per_second": 6875.965 | |
| }, | |
| { | |
| "epoch": 0.35649867374005306, | |
| "grad_norm": 1.198328142782024, | |
| "learning_rate": 4.9948570271231553e-05, | |
| "loss": 0.8871, | |
| "num_input_tokens_seen": 379904000, | |
| "step": 420, | |
| "train_runtime": 55281.7215, | |
| "train_tokens_per_second": 6872.145 | |
| }, | |
| { | |
| "epoch": 0.36074270557029176, | |
| "grad_norm": 0.971040029995864, | |
| "learning_rate": 4.9940356955878436e-05, | |
| "loss": 0.883, | |
| "num_input_tokens_seen": 384479488, | |
| "step": 425, | |
| "train_runtime": 55946.6877, | |
| "train_tokens_per_second": 6872.248 | |
| }, | |
| { | |
| "epoch": 0.3649867374005305, | |
| "grad_norm": 1.1690338997234486, | |
| "learning_rate": 4.99315362496541e-05, | |
| "loss": 0.8915, | |
| "num_input_tokens_seen": 389250176, | |
| "step": 430, | |
| "train_runtime": 56652.1149, | |
| "train_tokens_per_second": 6870.885 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 1.33505203332503, | |
| "learning_rate": 4.9922108367375695e-05, | |
| "loss": 0.8924, | |
| "num_input_tokens_seen": 393810688, | |
| "step": 435, | |
| "train_runtime": 57300.4821, | |
| "train_tokens_per_second": 6872.729 | |
| }, | |
| { | |
| "epoch": 0.37347480106100794, | |
| "grad_norm": 1.049216620404627, | |
| "learning_rate": 4.991207353864739e-05, | |
| "loss": 0.8777, | |
| "num_input_tokens_seen": 398511168, | |
| "step": 440, | |
| "train_runtime": 57988.9682, | |
| "train_tokens_per_second": 6872.189 | |
| }, | |
| { | |
| "epoch": 0.3777188328912467, | |
| "grad_norm": 1.1473154938029155, | |
| "learning_rate": 4.9901432007854744e-05, | |
| "loss": 0.8633, | |
| "num_input_tokens_seen": 403089152, | |
| "step": 445, | |
| "train_runtime": 58633.6036, | |
| "train_tokens_per_second": 6874.712 | |
| }, | |
| { | |
| "epoch": 0.3819628647214854, | |
| "grad_norm": 1.4204892310138295, | |
| "learning_rate": 4.9890184034158794e-05, | |
| "loss": 0.8873, | |
| "num_input_tokens_seen": 407921792, | |
| "step": 450, | |
| "train_runtime": 59298.1247, | |
| "train_tokens_per_second": 6879.169 | |
| }, | |
| { | |
| "epoch": 0.38620689655172413, | |
| "grad_norm": 1.1936514251690153, | |
| "learning_rate": 4.987832989148973e-05, | |
| "loss": 0.8795, | |
| "num_input_tokens_seen": 412324096, | |
| "step": 455, | |
| "train_runtime": 59926.4345, | |
| "train_tokens_per_second": 6880.504 | |
| }, | |
| { | |
| "epoch": 0.3904509283819629, | |
| "grad_norm": 1.10287952418463, | |
| "learning_rate": 4.986586986854019e-05, | |
| "loss": 0.8606, | |
| "num_input_tokens_seen": 416742912, | |
| "step": 460, | |
| "train_runtime": 60568.403, | |
| "train_tokens_per_second": 6880.533 | |
| }, | |
| { | |
| "epoch": 0.3946949602122016, | |
| "grad_norm": 1.072213338502524, | |
| "learning_rate": 4.985280426875831e-05, | |
| "loss": 0.872, | |
| "num_input_tokens_seen": 421138880, | |
| "step": 465, | |
| "train_runtime": 61260.5252, | |
| "train_tokens_per_second": 6874.555 | |
| }, | |
| { | |
| "epoch": 0.3989389920424403, | |
| "grad_norm": 1.0219296197838135, | |
| "learning_rate": 4.983913341034026e-05, | |
| "loss": 0.8775, | |
| "num_input_tokens_seen": 425727936, | |
| "step": 470, | |
| "train_runtime": 61904.3467, | |
| "train_tokens_per_second": 6877.19 | |
| }, | |
| { | |
| "epoch": 0.40318302387267907, | |
| "grad_norm": 1.3843761799310907, | |
| "learning_rate": 4.98248576262225e-05, | |
| "loss": 0.8775, | |
| "num_input_tokens_seen": 430157696, | |
| "step": 475, | |
| "train_runtime": 62556.696, | |
| "train_tokens_per_second": 6876.285 | |
| }, | |
| { | |
| "epoch": 0.40742705570291776, | |
| "grad_norm": 1.1025487338096294, | |
| "learning_rate": 4.980997726407371e-05, | |
| "loss": 0.8504, | |
| "num_input_tokens_seen": 434654208, | |
| "step": 480, | |
| "train_runtime": 63190.0546, | |
| "train_tokens_per_second": 6878.522 | |
| }, | |
| { | |
| "epoch": 0.4116710875331565, | |
| "grad_norm": 1.2747087605024068, | |
| "learning_rate": 4.979449268628632e-05, | |
| "loss": 0.8666, | |
| "num_input_tokens_seen": 439274752, | |
| "step": 485, | |
| "train_runtime": 63846.4067, | |
| "train_tokens_per_second": 6880.18 | |
| }, | |
| { | |
| "epoch": 0.4159151193633952, | |
| "grad_norm": 1.1710609815467128, | |
| "learning_rate": 4.977840426996763e-05, | |
| "loss": 0.8805, | |
| "num_input_tokens_seen": 443719872, | |
| "step": 490, | |
| "train_runtime": 64497.93, | |
| "train_tokens_per_second": 6879.599 | |
| }, | |
| { | |
| "epoch": 0.42015915119363395, | |
| "grad_norm": 1.0097086187416695, | |
| "learning_rate": 4.97617124069307e-05, | |
| "loss": 0.8903, | |
| "num_input_tokens_seen": 448255296, | |
| "step": 495, | |
| "train_runtime": 65132.19, | |
| "train_tokens_per_second": 6882.239 | |
| }, | |
| { | |
| "epoch": 0.4244031830238727, | |
| "grad_norm": 37.867408485972554, | |
| "learning_rate": 4.974441750368476e-05, | |
| "loss": 0.8397, | |
| "num_input_tokens_seen": 452923520, | |
| "step": 500, | |
| "train_runtime": 65815.3494, | |
| "train_tokens_per_second": 6881.731 | |
| }, | |
| { | |
| "epoch": 0.4286472148541114, | |
| "grad_norm": 1.0330730902667171, | |
| "learning_rate": 4.97265199814253e-05, | |
| "loss": 0.8865, | |
| "num_input_tokens_seen": 457377280, | |
| "step": 505, | |
| "train_runtime": 66451.0204, | |
| "train_tokens_per_second": 6882.923 | |
| }, | |
| { | |
| "epoch": 0.43289124668435014, | |
| "grad_norm": 1.3051062489077976, | |
| "learning_rate": 4.9708020276023874e-05, | |
| "loss": 0.86, | |
| "num_input_tokens_seen": 461956224, | |
| "step": 510, | |
| "train_runtime": 67114.9924, | |
| "train_tokens_per_second": 6883.056 | |
| }, | |
| { | |
| "epoch": 0.43713527851458883, | |
| "grad_norm": 2.1187078081806012, | |
| "learning_rate": 4.968891883801742e-05, | |
| "loss": 0.8749, | |
| "num_input_tokens_seen": 466374976, | |
| "step": 515, | |
| "train_runtime": 67739.4567, | |
| "train_tokens_per_second": 6884.835 | |
| }, | |
| { | |
| "epoch": 0.4413793103448276, | |
| "grad_norm": 1.4438973622990432, | |
| "learning_rate": 4.966921613259731e-05, | |
| "loss": 0.871, | |
| "num_input_tokens_seen": 470742528, | |
| "step": 520, | |
| "train_runtime": 68365.462, | |
| "train_tokens_per_second": 6885.678 | |
| }, | |
| { | |
| "epoch": 0.44562334217506633, | |
| "grad_norm": 1.53355639196128, | |
| "learning_rate": 4.964891263959803e-05, | |
| "loss": 0.8369, | |
| "num_input_tokens_seen": 475324480, | |
| "step": 525, | |
| "train_runtime": 69025.3358, | |
| "train_tokens_per_second": 6886.232 | |
| }, | |
| { | |
| "epoch": 0.449867374005305, | |
| "grad_norm": 1.128289481595987, | |
| "learning_rate": 4.962800885348551e-05, | |
| "loss": 0.863, | |
| "num_input_tokens_seen": 479877312, | |
| "step": 530, | |
| "train_runtime": 69684.2916, | |
| "train_tokens_per_second": 6886.449 | |
| }, | |
| { | |
| "epoch": 0.45411140583554377, | |
| "grad_norm": 1.0503072430304274, | |
| "learning_rate": 4.960650528334502e-05, | |
| "loss": 0.8667, | |
| "num_input_tokens_seen": 484343232, | |
| "step": 535, | |
| "train_runtime": 70344.4784, | |
| "train_tokens_per_second": 6885.306 | |
| }, | |
| { | |
| "epoch": 0.4583554376657825, | |
| "grad_norm": 0.9545521304763791, | |
| "learning_rate": 4.958440245286884e-05, | |
| "loss": 0.8696, | |
| "num_input_tokens_seen": 488876416, | |
| "step": 540, | |
| "train_runtime": 70972.8212, | |
| "train_tokens_per_second": 6888.22 | |
| }, | |
| { | |
| "epoch": 0.4625994694960212, | |
| "grad_norm": 1.3295608584891012, | |
| "learning_rate": 4.956170090034346e-05, | |
| "loss": 0.8349, | |
| "num_input_tokens_seen": 493485888, | |
| "step": 545, | |
| "train_runtime": 71650.7674, | |
| "train_tokens_per_second": 6887.378 | |
| }, | |
| { | |
| "epoch": 0.46684350132625996, | |
| "grad_norm": 1.1735342027871698, | |
| "learning_rate": 4.953840117863652e-05, | |
| "loss": 0.8458, | |
| "num_input_tokens_seen": 498090432, | |
| "step": 550, | |
| "train_runtime": 72292.0675, | |
| "train_tokens_per_second": 6889.974 | |
| }, | |
| { | |
| "epoch": 0.47108753315649865, | |
| "grad_norm": 1.2695672366224662, | |
| "learning_rate": 4.951450385518328e-05, | |
| "loss": 0.8423, | |
| "num_input_tokens_seen": 502546368, | |
| "step": 555, | |
| "train_runtime": 72919.7187, | |
| "train_tokens_per_second": 6891.776 | |
| }, | |
| { | |
| "epoch": 0.4753315649867374, | |
| "grad_norm": 1.0194113412118773, | |
| "learning_rate": 4.9490009511972856e-05, | |
| "loss": 0.8536, | |
| "num_input_tokens_seen": 507353920, | |
| "step": 560, | |
| "train_runtime": 73610.9277, | |
| "train_tokens_per_second": 6892.372 | |
| }, | |
| { | |
| "epoch": 0.47957559681697615, | |
| "grad_norm": 1.0743184753428263, | |
| "learning_rate": 4.9464918745534e-05, | |
| "loss": 0.8325, | |
| "num_input_tokens_seen": 511882560, | |
| "step": 565, | |
| "train_runtime": 74223.3431, | |
| "train_tokens_per_second": 6896.517 | |
| }, | |
| { | |
| "epoch": 0.48381962864721484, | |
| "grad_norm": 1.1038161960566173, | |
| "learning_rate": 4.943923216692064e-05, | |
| "loss": 0.834, | |
| "num_input_tokens_seen": 516353792, | |
| "step": 570, | |
| "train_runtime": 74834.7805, | |
| "train_tokens_per_second": 6899.917 | |
| }, | |
| { | |
| "epoch": 0.4880636604774536, | |
| "grad_norm": 1.0619822713768314, | |
| "learning_rate": 4.941295040169692e-05, | |
| "loss": 0.8388, | |
| "num_input_tokens_seen": 520893376, | |
| "step": 575, | |
| "train_runtime": 75515.5945, | |
| "train_tokens_per_second": 6897.825 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 0.9968217355531681, | |
| "learning_rate": 4.938607408992201e-05, | |
| "loss": 0.8393, | |
| "num_input_tokens_seen": 525369600, | |
| "step": 580, | |
| "train_runtime": 76150.219, | |
| "train_tokens_per_second": 6899.121 | |
| }, | |
| { | |
| "epoch": 0.496551724137931, | |
| "grad_norm": 1.081156576705322, | |
| "learning_rate": 4.9358603886134516e-05, | |
| "loss": 0.8227, | |
| "num_input_tokens_seen": 529878080, | |
| "step": 585, | |
| "train_runtime": 76814.7653, | |
| "train_tokens_per_second": 6898.128 | |
| }, | |
| { | |
| "epoch": 0.5007957559681697, | |
| "grad_norm": 0.9811791489788025, | |
| "learning_rate": 4.9330540459336536e-05, | |
| "loss": 0.8409, | |
| "num_input_tokens_seen": 534499648, | |
| "step": 590, | |
| "train_runtime": 77463.1501, | |
| "train_tokens_per_second": 6900.051 | |
| }, | |
| { | |
| "epoch": 0.5007957559681697, | |
| "eval_loss": 0.8492689728736877, | |
| "eval_runtime": 1055.1977, | |
| "eval_samples_per_second": 2.888, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 534499648, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5050397877984085, | |
| "grad_norm": 1.0876099733444793, | |
| "learning_rate": 4.930188449297737e-05, | |
| "loss": 0.8384, | |
| "num_input_tokens_seen": 538899968, | |
| "step": 595, | |
| "train_runtime": 79158.0223, | |
| "train_tokens_per_second": 6807.901 | |
| }, | |
| { | |
| "epoch": 0.5092838196286472, | |
| "grad_norm": 0.999155054979559, | |
| "learning_rate": 4.927263668493683e-05, | |
| "loss": 0.8359, | |
| "num_input_tokens_seen": 543296704, | |
| "step": 600, | |
| "train_runtime": 79818.1806, | |
| "train_tokens_per_second": 6806.679 | |
| }, | |
| { | |
| "epoch": 0.5135278514588859, | |
| "grad_norm": 1.3228294516057693, | |
| "learning_rate": 4.924279774750835e-05, | |
| "loss": 0.8315, | |
| "num_input_tokens_seen": 548007296, | |
| "step": 605, | |
| "train_runtime": 80478.8962, | |
| "train_tokens_per_second": 6809.329 | |
| }, | |
| { | |
| "epoch": 0.5177718832891247, | |
| "grad_norm": 1.1178865175204313, | |
| "learning_rate": 4.9212368407381515e-05, | |
| "loss": 0.8577, | |
| "num_input_tokens_seen": 552534656, | |
| "step": 610, | |
| "train_runtime": 81169.77, | |
| "train_tokens_per_second": 6807.148 | |
| }, | |
| { | |
| "epoch": 0.5220159151193634, | |
| "grad_norm": 1.1717734488513787, | |
| "learning_rate": 4.9181349405624444e-05, | |
| "loss": 0.8449, | |
| "num_input_tokens_seen": 557040512, | |
| "step": 615, | |
| "train_runtime": 81818.0476, | |
| "train_tokens_per_second": 6808.284 | |
| }, | |
| { | |
| "epoch": 0.5262599469496021, | |
| "grad_norm": 1.281506485794031, | |
| "learning_rate": 4.9149741497665724e-05, | |
| "loss": 0.8236, | |
| "num_input_tokens_seen": 561632640, | |
| "step": 620, | |
| "train_runtime": 82448.1767, | |
| "train_tokens_per_second": 6811.947 | |
| }, | |
| { | |
| "epoch": 0.5305039787798409, | |
| "grad_norm": 1.2084706718767035, | |
| "learning_rate": 4.9117545453276016e-05, | |
| "loss": 0.8396, | |
| "num_input_tokens_seen": 566108032, | |
| "step": 625, | |
| "train_runtime": 83077.9364, | |
| "train_tokens_per_second": 6814.18 | |
| }, | |
| { | |
| "epoch": 0.5347480106100796, | |
| "grad_norm": 0.9983384620282137, | |
| "learning_rate": 4.908476205654926e-05, | |
| "loss": 0.8534, | |
| "num_input_tokens_seen": 570380992, | |
| "step": 630, | |
| "train_runtime": 83704.2336, | |
| "train_tokens_per_second": 6814.243 | |
| }, | |
| { | |
| "epoch": 0.5389920424403183, | |
| "grad_norm": 0.9073500421909143, | |
| "learning_rate": 4.905139210588367e-05, | |
| "loss": 0.8345, | |
| "num_input_tokens_seen": 574750656, | |
| "step": 635, | |
| "train_runtime": 84320.0888, | |
| "train_tokens_per_second": 6816.296 | |
| }, | |
| { | |
| "epoch": 0.543236074270557, | |
| "grad_norm": 0.8991742233452803, | |
| "learning_rate": 4.9017436413962214e-05, | |
| "loss": 0.8238, | |
| "num_input_tokens_seen": 579340672, | |
| "step": 640, | |
| "train_runtime": 84974.8594, | |
| "train_tokens_per_second": 6817.789 | |
| }, | |
| { | |
| "epoch": 0.5474801061007958, | |
| "grad_norm": 4.653712662762566, | |
| "learning_rate": 4.898289580773284e-05, | |
| "loss": 0.8246, | |
| "num_input_tokens_seen": 583953984, | |
| "step": 645, | |
| "train_runtime": 85620.5078, | |
| "train_tokens_per_second": 6820.258 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 1.2360522636755376, | |
| "learning_rate": 4.8947771128388375e-05, | |
| "loss": 0.8556, | |
| "num_input_tokens_seen": 588613760, | |
| "step": 650, | |
| "train_runtime": 86304.8212, | |
| "train_tokens_per_second": 6820.172 | |
| }, | |
| { | |
| "epoch": 0.5559681697612732, | |
| "grad_norm": 1.012520096736992, | |
| "learning_rate": 4.891206323134598e-05, | |
| "loss": 0.8536, | |
| "num_input_tokens_seen": 593383872, | |
| "step": 655, | |
| "train_runtime": 87061.5623, | |
| "train_tokens_per_second": 6815.681 | |
| }, | |
| { | |
| "epoch": 0.560212201591512, | |
| "grad_norm": 0.8267761442768032, | |
| "learning_rate": 4.887577298622635e-05, | |
| "loss": 0.8353, | |
| "num_input_tokens_seen": 597805376, | |
| "step": 660, | |
| "train_runtime": 87694.9854, | |
| "train_tokens_per_second": 6816.871 | |
| }, | |
| { | |
| "epoch": 0.5644562334217507, | |
| "grad_norm": 0.7725220551193656, | |
| "learning_rate": 4.883890127683255e-05, | |
| "loss": 0.8328, | |
| "num_input_tokens_seen": 602285312, | |
| "step": 665, | |
| "train_runtime": 88327.7078, | |
| "train_tokens_per_second": 6818.759 | |
| }, | |
| { | |
| "epoch": 0.5687002652519894, | |
| "grad_norm": 1.0161234053244246, | |
| "learning_rate": 4.8801449001128455e-05, | |
| "loss": 0.8292, | |
| "num_input_tokens_seen": 606832384, | |
| "step": 670, | |
| "train_runtime": 88954.121, | |
| "train_tokens_per_second": 6821.858 | |
| }, | |
| { | |
| "epoch": 0.5729442970822282, | |
| "grad_norm": 0.9710515463242312, | |
| "learning_rate": 4.87634170712169e-05, | |
| "loss": 0.8299, | |
| "num_input_tokens_seen": 611343936, | |
| "step": 675, | |
| "train_runtime": 89596.2203, | |
| "train_tokens_per_second": 6823.323 | |
| }, | |
| { | |
| "epoch": 0.5771883289124669, | |
| "grad_norm": 0.877400582973328, | |
| "learning_rate": 4.872480641331747e-05, | |
| "loss": 0.8233, | |
| "num_input_tokens_seen": 615967936, | |
| "step": 680, | |
| "train_runtime": 90249.7743, | |
| "train_tokens_per_second": 6825.147 | |
| }, | |
| { | |
| "epoch": 0.5814323607427055, | |
| "grad_norm": 1.088037474445821, | |
| "learning_rate": 4.868561796774394e-05, | |
| "loss": 0.834, | |
| "num_input_tokens_seen": 620611584, | |
| "step": 685, | |
| "train_runtime": 90936.5491, | |
| "train_tokens_per_second": 6824.666 | |
| }, | |
| { | |
| "epoch": 0.5856763925729443, | |
| "grad_norm": 1.1907419217852628, | |
| "learning_rate": 4.8645852688881355e-05, | |
| "loss": 0.8409, | |
| "num_input_tokens_seen": 624875392, | |
| "step": 690, | |
| "train_runtime": 91496.14, | |
| "train_tokens_per_second": 6829.527 | |
| }, | |
| { | |
| "epoch": 0.589920424403183, | |
| "grad_norm": 1.050185541897206, | |
| "learning_rate": 4.860551154516285e-05, | |
| "loss": 0.8312, | |
| "num_input_tokens_seen": 629393280, | |
| "step": 695, | |
| "train_runtime": 92108.2917, | |
| "train_tokens_per_second": 6833.188 | |
| }, | |
| { | |
| "epoch": 0.5941644562334217, | |
| "grad_norm": 1.010783386950393, | |
| "learning_rate": 4.856459551904597e-05, | |
| "loss": 0.8435, | |
| "num_input_tokens_seen": 633982080, | |
| "step": 700, | |
| "train_runtime": 92773.246, | |
| "train_tokens_per_second": 6833.674 | |
| }, | |
| { | |
| "epoch": 0.5984084880636604, | |
| "grad_norm": 0.8655815203324599, | |
| "learning_rate": 4.8523105606988835e-05, | |
| "loss": 0.8341, | |
| "num_input_tokens_seen": 638403328, | |
| "step": 705, | |
| "train_runtime": 93409.0451, | |
| "train_tokens_per_second": 6834.492 | |
| }, | |
| { | |
| "epoch": 0.6026525198938992, | |
| "grad_norm": 1.0223867573550975, | |
| "learning_rate": 4.84810428194258e-05, | |
| "loss": 0.8298, | |
| "num_input_tokens_seen": 643227008, | |
| "step": 710, | |
| "train_runtime": 94093.6295, | |
| "train_tokens_per_second": 6836.031 | |
| }, | |
| { | |
| "epoch": 0.6068965517241379, | |
| "grad_norm": 0.9038681896334841, | |
| "learning_rate": 4.8438408180742894e-05, | |
| "loss": 0.8236, | |
| "num_input_tokens_seen": 647670528, | |
| "step": 715, | |
| "train_runtime": 94754.1626, | |
| "train_tokens_per_second": 6835.273 | |
| }, | |
| { | |
| "epoch": 0.6111405835543766, | |
| "grad_norm": 0.8915747409475175, | |
| "learning_rate": 4.839520272925286e-05, | |
| "loss": 0.8321, | |
| "num_input_tokens_seen": 652249152, | |
| "step": 720, | |
| "train_runtime": 95432.8253, | |
| "train_tokens_per_second": 6834.642 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.97865694250563, | |
| "learning_rate": 4.835142751716986e-05, | |
| "loss": 0.8209, | |
| "num_input_tokens_seen": 656865472, | |
| "step": 725, | |
| "train_runtime": 96098.174, | |
| "train_tokens_per_second": 6835.359 | |
| }, | |
| { | |
| "epoch": 0.6196286472148541, | |
| "grad_norm": 1.059303689963251, | |
| "learning_rate": 4.8307083610583846e-05, | |
| "loss": 0.8402, | |
| "num_input_tokens_seen": 661394048, | |
| "step": 730, | |
| "train_runtime": 96740.313, | |
| "train_tokens_per_second": 6836.799 | |
| }, | |
| { | |
| "epoch": 0.6238726790450928, | |
| "grad_norm": 1.0997482317899427, | |
| "learning_rate": 4.8262172089434635e-05, | |
| "loss": 0.8172, | |
| "num_input_tokens_seen": 666086336, | |
| "step": 735, | |
| "train_runtime": 97449.6172, | |
| "train_tokens_per_second": 6835.187 | |
| }, | |
| { | |
| "epoch": 0.6281167108753316, | |
| "grad_norm": 1.0533663604386134, | |
| "learning_rate": 4.8216694047485554e-05, | |
| "loss": 0.8044, | |
| "num_input_tokens_seen": 670654912, | |
| "step": 740, | |
| "train_runtime": 98101.755, | |
| "train_tokens_per_second": 6836.319 | |
| }, | |
| { | |
| "epoch": 0.6323607427055703, | |
| "grad_norm": 0.9308590706085377, | |
| "learning_rate": 4.817065059229682e-05, | |
| "loss": 0.8285, | |
| "num_input_tokens_seen": 675148480, | |
| "step": 745, | |
| "train_runtime": 98698.5544, | |
| "train_tokens_per_second": 6840.51 | |
| }, | |
| { | |
| "epoch": 0.636604774535809, | |
| "grad_norm": 1.3308817219838736, | |
| "learning_rate": 4.812404284519861e-05, | |
| "loss": 0.8158, | |
| "num_input_tokens_seen": 679716288, | |
| "step": 750, | |
| "train_runtime": 99373.7407, | |
| "train_tokens_per_second": 6839.999 | |
| }, | |
| { | |
| "epoch": 0.6408488063660478, | |
| "grad_norm": 0.9405621214215828, | |
| "learning_rate": 4.8076871941263676e-05, | |
| "loss": 0.8149, | |
| "num_input_tokens_seen": 684227584, | |
| "step": 755, | |
| "train_runtime": 100016.4536, | |
| "train_tokens_per_second": 6841.15 | |
| }, | |
| { | |
| "epoch": 0.6450928381962865, | |
| "grad_norm": 0.8872116598330962, | |
| "learning_rate": 4.8029139029279785e-05, | |
| "loss": 0.826, | |
| "num_input_tokens_seen": 688566720, | |
| "step": 760, | |
| "train_runtime": 100660.3191, | |
| "train_tokens_per_second": 6840.498 | |
| }, | |
| { | |
| "epoch": 0.6493368700265252, | |
| "grad_norm": 0.9148948434386133, | |
| "learning_rate": 4.798084527172167e-05, | |
| "loss": 0.8166, | |
| "num_input_tokens_seen": 693048704, | |
| "step": 765, | |
| "train_runtime": 101289.7336, | |
| "train_tokens_per_second": 6842.24 | |
| }, | |
| { | |
| "epoch": 0.6535809018567639, | |
| "grad_norm": 1.0036481967491233, | |
| "learning_rate": 4.793199184472274e-05, | |
| "loss": 0.7923, | |
| "num_input_tokens_seen": 697787008, | |
| "step": 770, | |
| "train_runtime": 101976.2461, | |
| "train_tokens_per_second": 6842.643 | |
| }, | |
| { | |
| "epoch": 0.6578249336870027, | |
| "grad_norm": 1.2742858954042269, | |
| "learning_rate": 4.7882579938046485e-05, | |
| "loss": 0.8215, | |
| "num_input_tokens_seen": 702239936, | |
| "step": 775, | |
| "train_runtime": 102610.041, | |
| "train_tokens_per_second": 6843.774 | |
| }, | |
| { | |
| "epoch": 0.6620689655172414, | |
| "grad_norm": 0.8006816309417074, | |
| "learning_rate": 4.783261075505743e-05, | |
| "loss": 0.8246, | |
| "num_input_tokens_seen": 706860352, | |
| "step": 780, | |
| "train_runtime": 103308.8254, | |
| "train_tokens_per_second": 6842.207 | |
| }, | |
| { | |
| "epoch": 0.6663129973474801, | |
| "grad_norm": 0.965961260253803, | |
| "learning_rate": 4.7782085512691875e-05, | |
| "loss": 0.8038, | |
| "num_input_tokens_seen": 711270720, | |
| "step": 785, | |
| "train_runtime": 103950.3781, | |
| "train_tokens_per_second": 6842.406 | |
| }, | |
| { | |
| "epoch": 0.6705570291777189, | |
| "grad_norm": 1.0034852088994206, | |
| "learning_rate": 4.7731005441428233e-05, | |
| "loss": 0.8138, | |
| "num_input_tokens_seen": 715836544, | |
| "step": 790, | |
| "train_runtime": 104619.9901, | |
| "train_tokens_per_second": 6842.254 | |
| }, | |
| { | |
| "epoch": 0.6748010610079576, | |
| "grad_norm": 1.0147223046467029, | |
| "learning_rate": 4.767937178525709e-05, | |
| "loss": 0.8025, | |
| "num_input_tokens_seen": 720186176, | |
| "step": 795, | |
| "train_runtime": 105243.2657, | |
| "train_tokens_per_second": 6843.062 | |
| }, | |
| { | |
| "epoch": 0.6790450928381963, | |
| "grad_norm": 0.926457311442124, | |
| "learning_rate": 4.7627185801650856e-05, | |
| "loss": 0.7936, | |
| "num_input_tokens_seen": 724606016, | |
| "step": 800, | |
| "train_runtime": 105887.5277, | |
| "train_tokens_per_second": 6843.167 | |
| }, | |
| { | |
| "epoch": 0.683289124668435, | |
| "grad_norm": 0.8015225346610259, | |
| "learning_rate": 4.757444876153323e-05, | |
| "loss": 0.8111, | |
| "num_input_tokens_seen": 729128640, | |
| "step": 805, | |
| "train_runtime": 106518.7609, | |
| "train_tokens_per_second": 6845.072 | |
| }, | |
| { | |
| "epoch": 0.6875331564986737, | |
| "grad_norm": 0.9590554887765099, | |
| "learning_rate": 4.752116194924816e-05, | |
| "loss": 0.817, | |
| "num_input_tokens_seen": 733513856, | |
| "step": 810, | |
| "train_runtime": 107154.5457, | |
| "train_tokens_per_second": 6845.383 | |
| }, | |
| { | |
| "epoch": 0.6917771883289124, | |
| "grad_norm": 1.252406369714219, | |
| "learning_rate": 4.746732666252861e-05, | |
| "loss": 0.8036, | |
| "num_input_tokens_seen": 737837376, | |
| "step": 815, | |
| "train_runtime": 107786.6851, | |
| "train_tokens_per_second": 6845.348 | |
| }, | |
| { | |
| "epoch": 0.6960212201591512, | |
| "grad_norm": 1.1913694647387847, | |
| "learning_rate": 4.7412944212464935e-05, | |
| "loss": 0.8184, | |
| "num_input_tokens_seen": 742147072, | |
| "step": 820, | |
| "train_runtime": 108401.4826, | |
| "train_tokens_per_second": 6846.282 | |
| }, | |
| { | |
| "epoch": 0.7002652519893899, | |
| "grad_norm": 0.9763850021800689, | |
| "learning_rate": 4.7358015923472986e-05, | |
| "loss": 0.8044, | |
| "num_input_tokens_seen": 747045952, | |
| "step": 825, | |
| "train_runtime": 109094.785, | |
| "train_tokens_per_second": 6847.678 | |
| }, | |
| { | |
| "epoch": 0.7045092838196286, | |
| "grad_norm": 0.9052886472757133, | |
| "learning_rate": 4.730254313326181e-05, | |
| "loss": 0.8081, | |
| "num_input_tokens_seen": 751393984, | |
| "step": 830, | |
| "train_runtime": 109732.5528, | |
| "train_tokens_per_second": 6847.503 | |
| }, | |
| { | |
| "epoch": 0.7087533156498673, | |
| "grad_norm": 0.8281556672805458, | |
| "learning_rate": 4.724652719280111e-05, | |
| "loss": 0.7982, | |
| "num_input_tokens_seen": 756075328, | |
| "step": 835, | |
| "train_runtime": 110411.1601, | |
| "train_tokens_per_second": 6847.816 | |
| }, | |
| { | |
| "epoch": 0.7129973474801061, | |
| "grad_norm": 0.971171145956504, | |
| "learning_rate": 4.718996946628829e-05, | |
| "loss": 0.7825, | |
| "num_input_tokens_seen": 760528320, | |
| "step": 840, | |
| "train_runtime": 111055.3652, | |
| "train_tokens_per_second": 6848.191 | |
| }, | |
| { | |
| "epoch": 0.7172413793103448, | |
| "grad_norm": 0.9931373968227002, | |
| "learning_rate": 4.713287133111533e-05, | |
| "loss": 0.8096, | |
| "num_input_tokens_seen": 765244928, | |
| "step": 845, | |
| "train_runtime": 111727.8513, | |
| "train_tokens_per_second": 6849.187 | |
| }, | |
| { | |
| "epoch": 0.7214854111405835, | |
| "grad_norm": 1.2834237882216515, | |
| "learning_rate": 4.707523417783511e-05, | |
| "loss": 0.7948, | |
| "num_input_tokens_seen": 769642624, | |
| "step": 850, | |
| "train_runtime": 112389.6421, | |
| "train_tokens_per_second": 6847.985 | |
| }, | |
| { | |
| "epoch": 0.7257294429708223, | |
| "grad_norm": 0.8486465229852926, | |
| "learning_rate": 4.701705941012767e-05, | |
| "loss": 0.8044, | |
| "num_input_tokens_seen": 774147136, | |
| "step": 855, | |
| "train_runtime": 113005.1043, | |
| "train_tokens_per_second": 6850.55 | |
| }, | |
| { | |
| "epoch": 0.729973474801061, | |
| "grad_norm": 0.7791940973514704, | |
| "learning_rate": 4.6958348444765954e-05, | |
| "loss": 0.7998, | |
| "num_input_tokens_seen": 778752064, | |
| "step": 860, | |
| "train_runtime": 113685.5974, | |
| "train_tokens_per_second": 6850.05 | |
| }, | |
| { | |
| "epoch": 0.7342175066312997, | |
| "grad_norm": 1.1164402590095137, | |
| "learning_rate": 4.689910271158131e-05, | |
| "loss": 0.8177, | |
| "num_input_tokens_seen": 783091968, | |
| "step": 865, | |
| "train_runtime": 114295.4264, | |
| "train_tokens_per_second": 6851.472 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 0.9094005817671243, | |
| "learning_rate": 4.6839323653428693e-05, | |
| "loss": 0.8154, | |
| "num_input_tokens_seen": 787572544, | |
| "step": 870, | |
| "train_runtime": 114937.7188, | |
| "train_tokens_per_second": 6852.168 | |
| }, | |
| { | |
| "epoch": 0.7427055702917772, | |
| "grad_norm": 1.0703160970060077, | |
| "learning_rate": 4.677901272615149e-05, | |
| "loss": 0.8013, | |
| "num_input_tokens_seen": 791977152, | |
| "step": 875, | |
| "train_runtime": 115587.8811, | |
| "train_tokens_per_second": 6851.732 | |
| }, | |
| { | |
| "epoch": 0.7469496021220159, | |
| "grad_norm": 0.749724050960587, | |
| "learning_rate": 4.6718171398546136e-05, | |
| "loss": 0.7849, | |
| "num_input_tokens_seen": 796372864, | |
| "step": 880, | |
| "train_runtime": 116239.6888, | |
| "train_tokens_per_second": 6851.127 | |
| }, | |
| { | |
| "epoch": 0.7511936339522547, | |
| "grad_norm": 0.8931070149936695, | |
| "learning_rate": 4.6656801152326244e-05, | |
| "loss": 0.7947, | |
| "num_input_tokens_seen": 800903424, | |
| "step": 885, | |
| "train_runtime": 116882.2446, | |
| "train_tokens_per_second": 6852.225 | |
| }, | |
| { | |
| "epoch": 0.7511936339522547, | |
| "eval_loss": 0.8023512363433838, | |
| "eval_runtime": 1055.9576, | |
| "eval_samples_per_second": 2.886, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 800903424, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7554376657824934, | |
| "grad_norm": 0.8326667759990252, | |
| "learning_rate": 4.6594903482086605e-05, | |
| "loss": 0.7865, | |
| "num_input_tokens_seen": 805358784, | |
| "step": 890, | |
| "train_runtime": 118552.9366, | |
| "train_tokens_per_second": 6793.242 | |
| }, | |
| { | |
| "epoch": 0.7596816976127321, | |
| "grad_norm": 0.8171872300696933, | |
| "learning_rate": 4.653247989526675e-05, | |
| "loss": 0.811, | |
| "num_input_tokens_seen": 809917248, | |
| "step": 895, | |
| "train_runtime": 119241.0553, | |
| "train_tokens_per_second": 6792.268 | |
| }, | |
| { | |
| "epoch": 0.7639257294429708, | |
| "grad_norm": 0.9066917659881738, | |
| "learning_rate": 4.646953191211422e-05, | |
| "loss": 0.7936, | |
| "num_input_tokens_seen": 814265600, | |
| "step": 900, | |
| "train_runtime": 119851.6659, | |
| "train_tokens_per_second": 6793.945 | |
| }, | |
| { | |
| "epoch": 0.7681697612732096, | |
| "grad_norm": 0.9338261305365108, | |
| "learning_rate": 4.640606106564759e-05, | |
| "loss": 0.8104, | |
| "num_input_tokens_seen": 818678144, | |
| "step": 905, | |
| "train_runtime": 120471.0976, | |
| "train_tokens_per_second": 6795.639 | |
| }, | |
| { | |
| "epoch": 0.7724137931034483, | |
| "grad_norm": 0.8719603501589874, | |
| "learning_rate": 4.6342068901619115e-05, | |
| "loss": 0.7913, | |
| "num_input_tokens_seen": 823195328, | |
| "step": 910, | |
| "train_runtime": 121127.4303, | |
| "train_tokens_per_second": 6796.11 | |
| }, | |
| { | |
| "epoch": 0.776657824933687, | |
| "grad_norm": 1.0213034444078577, | |
| "learning_rate": 4.6277556978477063e-05, | |
| "loss": 0.8081, | |
| "num_input_tokens_seen": 827899840, | |
| "step": 915, | |
| "train_runtime": 121853.2081, | |
| "train_tokens_per_second": 6794.239 | |
| }, | |
| { | |
| "epoch": 0.7809018567639258, | |
| "grad_norm": 1.0336867967280456, | |
| "learning_rate": 4.6212526867327785e-05, | |
| "loss": 0.8118, | |
| "num_input_tokens_seen": 832394688, | |
| "step": 920, | |
| "train_runtime": 122480.5431, | |
| "train_tokens_per_second": 6796.138 | |
| }, | |
| { | |
| "epoch": 0.7851458885941645, | |
| "grad_norm": 1.0694313116089975, | |
| "learning_rate": 4.614698015189744e-05, | |
| "loss": 0.8067, | |
| "num_input_tokens_seen": 837091072, | |
| "step": 925, | |
| "train_runtime": 123144.408, | |
| "train_tokens_per_second": 6797.638 | |
| }, | |
| { | |
| "epoch": 0.7893899204244031, | |
| "grad_norm": 0.9268002608848666, | |
| "learning_rate": 4.6080918428493447e-05, | |
| "loss": 0.7948, | |
| "num_input_tokens_seen": 841442112, | |
| "step": 930, | |
| "train_runtime": 123780.7152, | |
| "train_tokens_per_second": 6797.845 | |
| }, | |
| { | |
| "epoch": 0.793633952254642, | |
| "grad_norm": 0.9823980696238214, | |
| "learning_rate": 4.601434330596557e-05, | |
| "loss": 0.7926, | |
| "num_input_tokens_seen": 845885504, | |
| "step": 935, | |
| "train_runtime": 124463.32, | |
| "train_tokens_per_second": 6796.263 | |
| }, | |
| { | |
| "epoch": 0.7978779840848806, | |
| "grad_norm": 1.0766192496117053, | |
| "learning_rate": 4.594725640566679e-05, | |
| "loss": 0.8019, | |
| "num_input_tokens_seen": 850419456, | |
| "step": 940, | |
| "train_runtime": 125130.694, | |
| "train_tokens_per_second": 6796.25 | |
| }, | |
| { | |
| "epoch": 0.8021220159151193, | |
| "grad_norm": 1.0293248834632993, | |
| "learning_rate": 4.5879659361413754e-05, | |
| "loss": 0.8065, | |
| "num_input_tokens_seen": 854885120, | |
| "step": 945, | |
| "train_runtime": 125769.8721, | |
| "train_tokens_per_second": 6797.217 | |
| }, | |
| { | |
| "epoch": 0.8063660477453581, | |
| "grad_norm": 1.0089948918046479, | |
| "learning_rate": 4.581155381944705e-05, | |
| "loss": 0.8084, | |
| "num_input_tokens_seen": 859518400, | |
| "step": 950, | |
| "train_runtime": 126468.1103, | |
| "train_tokens_per_second": 6796.325 | |
| }, | |
| { | |
| "epoch": 0.8106100795755968, | |
| "grad_norm": 0.9051441031845476, | |
| "learning_rate": 4.574294143839107e-05, | |
| "loss": 0.7832, | |
| "num_input_tokens_seen": 863890816, | |
| "step": 955, | |
| "train_runtime": 127085.4612, | |
| "train_tokens_per_second": 6797.716 | |
| }, | |
| { | |
| "epoch": 0.8148541114058355, | |
| "grad_norm": 0.8826069009760195, | |
| "learning_rate": 4.567382388921363e-05, | |
| "loss": 0.8055, | |
| "num_input_tokens_seen": 868430208, | |
| "step": 960, | |
| "train_runtime": 127753.0201, | |
| "train_tokens_per_second": 6797.727 | |
| }, | |
| { | |
| "epoch": 0.8190981432360742, | |
| "grad_norm": 0.9189235994594633, | |
| "learning_rate": 4.560420285518529e-05, | |
| "loss": 0.8076, | |
| "num_input_tokens_seen": 873261376, | |
| "step": 965, | |
| "train_runtime": 128445.48, | |
| "train_tokens_per_second": 6798.693 | |
| }, | |
| { | |
| "epoch": 0.823342175066313, | |
| "grad_norm": 0.6649154972034895, | |
| "learning_rate": 4.5534080031838336e-05, | |
| "loss": 0.8748, | |
| "num_input_tokens_seen": 877848320, | |
| "step": 970, | |
| "train_runtime": 129117.949, | |
| "train_tokens_per_second": 6798.809 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 0.7645668184363056, | |
| "learning_rate": 4.5463457126925493e-05, | |
| "loss": 0.7949, | |
| "num_input_tokens_seen": 882236288, | |
| "step": 975, | |
| "train_runtime": 129732.9954, | |
| "train_tokens_per_second": 6800.4 | |
| }, | |
| { | |
| "epoch": 0.8318302387267904, | |
| "grad_norm": 0.9335431624509639, | |
| "learning_rate": 4.539233586037836e-05, | |
| "loss": 0.7904, | |
| "num_input_tokens_seen": 886710592, | |
| "step": 980, | |
| "train_runtime": 130375.8011, | |
| "train_tokens_per_second": 6801.19 | |
| }, | |
| { | |
| "epoch": 0.8360742705570292, | |
| "grad_norm": 0.8454411533111347, | |
| "learning_rate": 4.532071796426549e-05, | |
| "loss": 0.7944, | |
| "num_input_tokens_seen": 891233088, | |
| "step": 985, | |
| "train_runtime": 131026.9147, | |
| "train_tokens_per_second": 6801.909 | |
| }, | |
| { | |
| "epoch": 0.8403183023872679, | |
| "grad_norm": 0.9294276165792987, | |
| "learning_rate": 4.5248605182750224e-05, | |
| "loss": 0.8052, | |
| "num_input_tokens_seen": 895797056, | |
| "step": 990, | |
| "train_runtime": 131628.0839, | |
| "train_tokens_per_second": 6805.516 | |
| }, | |
| { | |
| "epoch": 0.8445623342175066, | |
| "grad_norm": 1.0688757710466947, | |
| "learning_rate": 4.5175999272048205e-05, | |
| "loss": 0.7871, | |
| "num_input_tokens_seen": 900477248, | |
| "step": 995, | |
| "train_runtime": 132307.0252, | |
| "train_tokens_per_second": 6805.967 | |
| }, | |
| { | |
| "epoch": 0.8488063660477454, | |
| "grad_norm": 0.8274182681903438, | |
| "learning_rate": 4.510290200038463e-05, | |
| "loss": 0.8022, | |
| "num_input_tokens_seen": 905019392, | |
| "step": 1000, | |
| "train_runtime": 132955.1592, | |
| "train_tokens_per_second": 6806.952 | |
| }, | |
| { | |
| "epoch": 0.8530503978779841, | |
| "grad_norm": 1.0546997932680735, | |
| "learning_rate": 4.502931514795116e-05, | |
| "loss": 0.7817, | |
| "num_input_tokens_seen": 909356352, | |
| "step": 1005, | |
| "train_runtime": 133539.5693, | |
| "train_tokens_per_second": 6809.64 | |
| }, | |
| { | |
| "epoch": 0.8572944297082228, | |
| "grad_norm": 1.2588102790900502, | |
| "learning_rate": 4.495524050686257e-05, | |
| "loss": 0.788, | |
| "num_input_tokens_seen": 913771904, | |
| "step": 1010, | |
| "train_runtime": 134129.5941, | |
| "train_tokens_per_second": 6812.605 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "grad_norm": 1.1408803201963393, | |
| "learning_rate": 4.488067988111313e-05, | |
| "loss": 0.8001, | |
| "num_input_tokens_seen": 918194944, | |
| "step": 1015, | |
| "train_runtime": 134742.2212, | |
| "train_tokens_per_second": 6814.456 | |
| }, | |
| { | |
| "epoch": 0.8657824933687003, | |
| "grad_norm": 1.1899291730699249, | |
| "learning_rate": 4.480563508653264e-05, | |
| "loss": 0.7955, | |
| "num_input_tokens_seen": 922666688, | |
| "step": 1020, | |
| "train_runtime": 135362.6791, | |
| "train_tokens_per_second": 6816.256 | |
| }, | |
| { | |
| "epoch": 0.870026525198939, | |
| "grad_norm": 0.8130281973510006, | |
| "learning_rate": 4.473010795074221e-05, | |
| "loss": 0.7979, | |
| "num_input_tokens_seen": 927156672, | |
| "step": 1025, | |
| "train_runtime": 136033.28, | |
| "train_tokens_per_second": 6815.661 | |
| }, | |
| { | |
| "epoch": 0.8742705570291777, | |
| "grad_norm": 0.7570121360764169, | |
| "learning_rate": 4.465410031310979e-05, | |
| "loss": 0.8073, | |
| "num_input_tokens_seen": 931890368, | |
| "step": 1030, | |
| "train_runtime": 136755.15, | |
| "train_tokens_per_second": 6814.298 | |
| }, | |
| { | |
| "epoch": 0.8785145888594165, | |
| "grad_norm": 0.9159403793732956, | |
| "learning_rate": 4.457761402470532e-05, | |
| "loss": 0.8, | |
| "num_input_tokens_seen": 936323264, | |
| "step": 1035, | |
| "train_runtime": 137383.018, | |
| "train_tokens_per_second": 6815.422 | |
| }, | |
| { | |
| "epoch": 0.8827586206896552, | |
| "grad_norm": 1.0377351854287133, | |
| "learning_rate": 4.450065094825567e-05, | |
| "loss": 0.801, | |
| "num_input_tokens_seen": 940907840, | |
| "step": 1040, | |
| "train_runtime": 138076.6951, | |
| "train_tokens_per_second": 6814.386 | |
| }, | |
| { | |
| "epoch": 0.8870026525198939, | |
| "grad_norm": 0.8502067435449593, | |
| "learning_rate": 4.442321295809932e-05, | |
| "loss": 0.7884, | |
| "num_input_tokens_seen": 945377920, | |
| "step": 1045, | |
| "train_runtime": 138693.9405, | |
| "train_tokens_per_second": 6816.289 | |
| }, | |
| { | |
| "epoch": 0.8912466843501327, | |
| "grad_norm": 1.0008412143003285, | |
| "learning_rate": 4.4345301940140625e-05, | |
| "loss": 0.794, | |
| "num_input_tokens_seen": 949653760, | |
| "step": 1050, | |
| "train_runtime": 139252.8781, | |
| "train_tokens_per_second": 6819.635 | |
| }, | |
| { | |
| "epoch": 0.8954907161803713, | |
| "grad_norm": 0.7701062680223436, | |
| "learning_rate": 4.426691979180395e-05, | |
| "loss": 0.7879, | |
| "num_input_tokens_seen": 953995840, | |
| "step": 1055, | |
| "train_runtime": 139848.6279, | |
| "train_tokens_per_second": 6821.632 | |
| }, | |
| { | |
| "epoch": 0.89973474801061, | |
| "grad_norm": 0.7657676098694198, | |
| "learning_rate": 4.4188068421987475e-05, | |
| "loss": 0.78, | |
| "num_input_tokens_seen": 958380160, | |
| "step": 1060, | |
| "train_runtime": 140471.8443, | |
| "train_tokens_per_second": 6822.578 | |
| }, | |
| { | |
| "epoch": 0.9039787798408488, | |
| "grad_norm": 0.9443174140002766, | |
| "learning_rate": 4.410874975101662e-05, | |
| "loss": 0.7975, | |
| "num_input_tokens_seen": 962938624, | |
| "step": 1065, | |
| "train_runtime": 141147.4618, | |
| "train_tokens_per_second": 6822.217 | |
| }, | |
| { | |
| "epoch": 0.9082228116710875, | |
| "grad_norm": 1.0109837282388179, | |
| "learning_rate": 4.402896571059738e-05, | |
| "loss": 0.7979, | |
| "num_input_tokens_seen": 967324608, | |
| "step": 1070, | |
| "train_runtime": 141768.6116, | |
| "train_tokens_per_second": 6823.264 | |
| }, | |
| { | |
| "epoch": 0.9124668435013262, | |
| "grad_norm": 0.8120979404556385, | |
| "learning_rate": 4.394871824376923e-05, | |
| "loss": 0.7889, | |
| "num_input_tokens_seen": 971853824, | |
| "step": 1075, | |
| "train_runtime": 142397.8849, | |
| "train_tokens_per_second": 6824.918 | |
| }, | |
| { | |
| "epoch": 0.916710875331565, | |
| "grad_norm": 0.7674620546551851, | |
| "learning_rate": 4.386800930485777e-05, | |
| "loss": 0.7872, | |
| "num_input_tokens_seen": 976342336, | |
| "step": 1080, | |
| "train_runtime": 143033.2803, | |
| "train_tokens_per_second": 6825.98 | |
| }, | |
| { | |
| "epoch": 0.9209549071618037, | |
| "grad_norm": 0.8642492390279295, | |
| "learning_rate": 4.378684085942722e-05, | |
| "loss": 0.7968, | |
| "num_input_tokens_seen": 980950016, | |
| "step": 1085, | |
| "train_runtime": 143727.3251, | |
| "train_tokens_per_second": 6825.077 | |
| }, | |
| { | |
| "epoch": 0.9251989389920424, | |
| "grad_norm": 0.7868533581684258, | |
| "learning_rate": 4.370521488423248e-05, | |
| "loss": 0.7723, | |
| "num_input_tokens_seen": 985579968, | |
| "step": 1090, | |
| "train_runtime": 144414.0117, | |
| "train_tokens_per_second": 6824.684 | |
| }, | |
| { | |
| "epoch": 0.9294429708222812, | |
| "grad_norm": 0.9784190030448764, | |
| "learning_rate": 4.3623133367171e-05, | |
| "loss": 0.7657, | |
| "num_input_tokens_seen": 990242240, | |
| "step": 1095, | |
| "train_runtime": 145104.4504, | |
| "train_tokens_per_second": 6824.341 | |
| }, | |
| { | |
| "epoch": 0.9336870026525199, | |
| "grad_norm": 0.8416491217730794, | |
| "learning_rate": 4.354059830723439e-05, | |
| "loss": 0.7762, | |
| "num_input_tokens_seen": 994700352, | |
| "step": 1100, | |
| "train_runtime": 145724.9371, | |
| "train_tokens_per_second": 6825.876 | |
| }, | |
| { | |
| "epoch": 0.9379310344827586, | |
| "grad_norm": 0.8128690749017204, | |
| "learning_rate": 4.34576117144597e-05, | |
| "loss": 0.7872, | |
| "num_input_tokens_seen": 999373568, | |
| "step": 1105, | |
| "train_runtime": 146429.9411, | |
| "train_tokens_per_second": 6824.926 | |
| }, | |
| { | |
| "epoch": 0.9421750663129973, | |
| "grad_norm": 0.8795752491352558, | |
| "learning_rate": 4.337417560988053e-05, | |
| "loss": 0.7907, | |
| "num_input_tokens_seen": 1003937216, | |
| "step": 1110, | |
| "train_runtime": 147068.6613, | |
| "train_tokens_per_second": 6826.316 | |
| }, | |
| { | |
| "epoch": 0.9464190981432361, | |
| "grad_norm": 1.0803855481863844, | |
| "learning_rate": 4.329029202547774e-05, | |
| "loss": 0.7802, | |
| "num_input_tokens_seen": 1008544768, | |
| "step": 1115, | |
| "train_runtime": 147756.5973, | |
| "train_tokens_per_second": 6825.717 | |
| }, | |
| { | |
| "epoch": 0.9506631299734748, | |
| "grad_norm": 0.9249709902148427, | |
| "learning_rate": 4.3205963004130016e-05, | |
| "loss": 0.7835, | |
| "num_input_tokens_seen": 1013050048, | |
| "step": 1120, | |
| "train_runtime": 148402.3493, | |
| "train_tokens_per_second": 6826.375 | |
| }, | |
| { | |
| "epoch": 0.9549071618037135, | |
| "grad_norm": 0.8556253260160931, | |
| "learning_rate": 4.3121190599564075e-05, | |
| "loss": 0.7797, | |
| "num_input_tokens_seen": 1017459840, | |
| "step": 1125, | |
| "train_runtime": 149027.5856, | |
| "train_tokens_per_second": 6827.326 | |
| }, | |
| { | |
| "epoch": 0.9591511936339523, | |
| "grad_norm": 1.0166848790525587, | |
| "learning_rate": 4.30359768763047e-05, | |
| "loss": 0.7676, | |
| "num_input_tokens_seen": 1022257088, | |
| "step": 1130, | |
| "train_runtime": 149712.8732, | |
| "train_tokens_per_second": 6828.117 | |
| }, | |
| { | |
| "epoch": 0.963395225464191, | |
| "grad_norm": 0.9097894747416901, | |
| "learning_rate": 4.2950323909624404e-05, | |
| "loss": 0.7736, | |
| "num_input_tokens_seen": 1026797248, | |
| "step": 1135, | |
| "train_runtime": 150374.5156, | |
| "train_tokens_per_second": 6828.266 | |
| }, | |
| { | |
| "epoch": 0.9676392572944297, | |
| "grad_norm": 0.7704218910218441, | |
| "learning_rate": 4.286423378549294e-05, | |
| "loss": 0.7899, | |
| "num_input_tokens_seen": 1031489344, | |
| "step": 1140, | |
| "train_runtime": 151058.329, | |
| "train_tokens_per_second": 6828.418 | |
| }, | |
| { | |
| "epoch": 0.9718832891246685, | |
| "grad_norm": 0.7852167446402514, | |
| "learning_rate": 4.2777708600526475e-05, | |
| "loss": 0.7825, | |
| "num_input_tokens_seen": 1035924096, | |
| "step": 1145, | |
| "train_runtime": 151677.6959, | |
| "train_tokens_per_second": 6829.772 | |
| }, | |
| { | |
| "epoch": 0.9761273209549072, | |
| "grad_norm": 0.8709541370781978, | |
| "learning_rate": 4.269075046193651e-05, | |
| "loss": 0.7853, | |
| "num_input_tokens_seen": 1040557120, | |
| "step": 1150, | |
| "train_runtime": 152331.7174, | |
| "train_tokens_per_second": 6830.863 | |
| }, | |
| { | |
| "epoch": 0.9803713527851459, | |
| "grad_norm": 0.7354772314266763, | |
| "learning_rate": 4.2603361487478635e-05, | |
| "loss": 0.7796, | |
| "num_input_tokens_seen": 1045138240, | |
| "step": 1155, | |
| "train_runtime": 153014.033, | |
| "train_tokens_per_second": 6830.342 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "grad_norm": 0.8929842185397294, | |
| "learning_rate": 4.2515543805400845e-05, | |
| "loss": 0.7931, | |
| "num_input_tokens_seen": 1049637440, | |
| "step": 1160, | |
| "train_runtime": 153673.9541, | |
| "train_tokens_per_second": 6830.288 | |
| }, | |
| { | |
| "epoch": 0.9888594164456234, | |
| "grad_norm": 0.8843306924811901, | |
| "learning_rate": 4.2427299554391795e-05, | |
| "loss": 0.7818, | |
| "num_input_tokens_seen": 1054084800, | |
| "step": 1165, | |
| "train_runtime": 154301.843, | |
| "train_tokens_per_second": 6831.317 | |
| }, | |
| { | |
| "epoch": 0.993103448275862, | |
| "grad_norm": 0.7260294542114571, | |
| "learning_rate": 4.2338630883528694e-05, | |
| "loss": 0.7868, | |
| "num_input_tokens_seen": 1058576128, | |
| "step": 1170, | |
| "train_runtime": 154951.0655, | |
| "train_tokens_per_second": 6831.68 | |
| }, | |
| { | |
| "epoch": 0.9973474801061007, | |
| "grad_norm": 1.0310167390412928, | |
| "learning_rate": 4.224953995222495e-05, | |
| "loss": 0.7913, | |
| "num_input_tokens_seen": 1063234944, | |
| "step": 1175, | |
| "train_runtime": 155625.4902, | |
| "train_tokens_per_second": 6832.01 | |
| }, | |
| { | |
| "epoch": 1.0008488063660477, | |
| "grad_norm": 0.9546542980993455, | |
| "learning_rate": 4.2160028930177586e-05, | |
| "loss": 0.6435, | |
| "num_input_tokens_seen": 1066978304, | |
| "step": 1180, | |
| "train_runtime": 156195.3002, | |
| "train_tokens_per_second": 6831.053 | |
| }, | |
| { | |
| "epoch": 1.0008488063660477, | |
| "eval_loss": 0.7810727953910828, | |
| "eval_runtime": 1060.3774, | |
| "eval_samples_per_second": 2.874, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 1066978304, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.0050928381962865, | |
| "grad_norm": 1.0559718016156323, | |
| "learning_rate": 4.207009999731441e-05, | |
| "loss": 0.724, | |
| "num_input_tokens_seen": 1071397376, | |
| "step": 1185, | |
| "train_runtime": 157954.8381, | |
| "train_tokens_per_second": 6782.935 | |
| }, | |
| { | |
| "epoch": 1.0093368700265253, | |
| "grad_norm": 0.8356946223535584, | |
| "learning_rate": 4.1979755343740936e-05, | |
| "loss": 0.7198, | |
| "num_input_tokens_seen": 1075925056, | |
| "step": 1190, | |
| "train_runtime": 158621.7481, | |
| "train_tokens_per_second": 6782.961 | |
| }, | |
| { | |
| "epoch": 1.0135809018567639, | |
| "grad_norm": 0.9532551565257256, | |
| "learning_rate": 4.188899716968699e-05, | |
| "loss": 0.7137, | |
| "num_input_tokens_seen": 1080308416, | |
| "step": 1195, | |
| "train_runtime": 159243.7038, | |
| "train_tokens_per_second": 6783.995 | |
| }, | |
| { | |
| "epoch": 1.0178249336870027, | |
| "grad_norm": 0.886163908743495, | |
| "learning_rate": 4.179782768545321e-05, | |
| "loss": 0.6985, | |
| "num_input_tokens_seen": 1084861632, | |
| "step": 1200, | |
| "train_runtime": 159917.1941, | |
| "train_tokens_per_second": 6783.896 | |
| }, | |
| { | |
| "epoch": 1.0220689655172415, | |
| "grad_norm": 0.8306981011679802, | |
| "learning_rate": 4.170624911135713e-05, | |
| "loss": 0.7246, | |
| "num_input_tokens_seen": 1089273600, | |
| "step": 1205, | |
| "train_runtime": 160556.71, | |
| "train_tokens_per_second": 6784.354 | |
| }, | |
| { | |
| "epoch": 1.02631299734748, | |
| "grad_norm": 0.9829055718597421, | |
| "learning_rate": 4.161426367767921e-05, | |
| "loss": 0.7219, | |
| "num_input_tokens_seen": 1093625472, | |
| "step": 1210, | |
| "train_runtime": 161182.2827, | |
| "train_tokens_per_second": 6785.023 | |
| }, | |
| { | |
| "epoch": 1.0305570291777189, | |
| "grad_norm": 0.9263411832733288, | |
| "learning_rate": 4.1521873624608396e-05, | |
| "loss": 0.7293, | |
| "num_input_tokens_seen": 1098036992, | |
| "step": 1215, | |
| "train_runtime": 161813.3681, | |
| "train_tokens_per_second": 6785.824 | |
| }, | |
| { | |
| "epoch": 1.0348010610079577, | |
| "grad_norm": 1.3238936418341176, | |
| "learning_rate": 4.1429081202187667e-05, | |
| "loss": 0.7214, | |
| "num_input_tokens_seen": 1102560128, | |
| "step": 1220, | |
| "train_runtime": 162471.5936, | |
| "train_tokens_per_second": 6786.172 | |
| }, | |
| { | |
| "epoch": 1.0390450928381962, | |
| "grad_norm": 0.822682923114858, | |
| "learning_rate": 4.1335888670259196e-05, | |
| "loss": 0.704, | |
| "num_input_tokens_seen": 1107129920, | |
| "step": 1225, | |
| "train_runtime": 163123.1296, | |
| "train_tokens_per_second": 6787.081 | |
| }, | |
| { | |
| "epoch": 1.043289124668435, | |
| "grad_norm": 0.9140492461674421, | |
| "learning_rate": 4.12422982984093e-05, | |
| "loss": 0.7199, | |
| "num_input_tokens_seen": 1111822080, | |
| "step": 1230, | |
| "train_runtime": 163825.6741, | |
| "train_tokens_per_second": 6786.617 | |
| }, | |
| { | |
| "epoch": 1.0475331564986738, | |
| "grad_norm": 0.8760621488535013, | |
| "learning_rate": 4.11483123659132e-05, | |
| "loss": 0.7055, | |
| "num_input_tokens_seen": 1116287808, | |
| "step": 1235, | |
| "train_runtime": 164439.6584, | |
| "train_tokens_per_second": 6788.434 | |
| }, | |
| { | |
| "epoch": 1.0517771883289124, | |
| "grad_norm": 0.8280642864840718, | |
| "learning_rate": 4.1053933161679494e-05, | |
| "loss": 0.7235, | |
| "num_input_tokens_seen": 1120773120, | |
| "step": 1240, | |
| "train_runtime": 165072.1322, | |
| "train_tokens_per_second": 6789.596 | |
| }, | |
| { | |
| "epoch": 1.0560212201591512, | |
| "grad_norm": 0.9389411578202482, | |
| "learning_rate": 4.095916298419441e-05, | |
| "loss": 0.7058, | |
| "num_input_tokens_seen": 1125450432, | |
| "step": 1245, | |
| "train_runtime": 165746.363, | |
| "train_tokens_per_second": 6790.197 | |
| }, | |
| { | |
| "epoch": 1.06026525198939, | |
| "grad_norm": 0.8674975290576171, | |
| "learning_rate": 4.0864004141465844e-05, | |
| "loss": 0.7144, | |
| "num_input_tokens_seen": 1129770880, | |
| "step": 1250, | |
| "train_runtime": 166381.7549, | |
| "train_tokens_per_second": 6790.233 | |
| }, | |
| { | |
| "epoch": 1.0645092838196286, | |
| "grad_norm": 0.8423637185173262, | |
| "learning_rate": 4.0768458950967135e-05, | |
| "loss": 0.6924, | |
| "num_input_tokens_seen": 1134325824, | |
| "step": 1255, | |
| "train_runtime": 167041.841, | |
| "train_tokens_per_second": 6790.669 | |
| }, | |
| { | |
| "epoch": 1.0687533156498674, | |
| "grad_norm": 0.8463731702794449, | |
| "learning_rate": 4.067252973958064e-05, | |
| "loss": 0.7022, | |
| "num_input_tokens_seen": 1138890880, | |
| "step": 1260, | |
| "train_runtime": 167698.4028, | |
| "train_tokens_per_second": 6791.304 | |
| }, | |
| { | |
| "epoch": 1.072997347480106, | |
| "grad_norm": 1.2814601777565326, | |
| "learning_rate": 4.0576218843541046e-05, | |
| "loss": 0.7244, | |
| "num_input_tokens_seen": 1143446080, | |
| "step": 1265, | |
| "train_runtime": 168389.1765, | |
| "train_tokens_per_second": 6790.496 | |
| }, | |
| { | |
| "epoch": 1.0772413793103448, | |
| "grad_norm": 0.7804384260432371, | |
| "learning_rate": 4.0479528608378515e-05, | |
| "loss": 0.7118, | |
| "num_input_tokens_seen": 1148022848, | |
| "step": 1270, | |
| "train_runtime": 169068.4692, | |
| "train_tokens_per_second": 6790.284 | |
| }, | |
| { | |
| "epoch": 1.0814854111405836, | |
| "grad_norm": 0.8440032711255191, | |
| "learning_rate": 4.0382461388861505e-05, | |
| "loss": 0.7069, | |
| "num_input_tokens_seen": 1152678080, | |
| "step": 1275, | |
| "train_runtime": 169749.9181, | |
| "train_tokens_per_second": 6790.449 | |
| }, | |
| { | |
| "epoch": 1.0857294429708222, | |
| "grad_norm": 1.0462601786941845, | |
| "learning_rate": 4.0285019548939464e-05, | |
| "loss": 0.7009, | |
| "num_input_tokens_seen": 1157385088, | |
| "step": 1280, | |
| "train_runtime": 170412.1829, | |
| "train_tokens_per_second": 6791.68 | |
| }, | |
| { | |
| "epoch": 1.089973474801061, | |
| "grad_norm": 0.7949896906145127, | |
| "learning_rate": 4.018720546168524e-05, | |
| "loss": 0.714, | |
| "num_input_tokens_seen": 1161843200, | |
| "step": 1285, | |
| "train_runtime": 171052.7364, | |
| "train_tokens_per_second": 6792.31 | |
| }, | |
| { | |
| "epoch": 1.0942175066312998, | |
| "grad_norm": 0.8723394657733861, | |
| "learning_rate": 4.008902150923731e-05, | |
| "loss": 0.7173, | |
| "num_input_tokens_seen": 1166275008, | |
| "step": 1290, | |
| "train_runtime": 171633.6694, | |
| "train_tokens_per_second": 6795.141 | |
| }, | |
| { | |
| "epoch": 1.0984615384615384, | |
| "grad_norm": 1.271199939246554, | |
| "learning_rate": 3.999047008274173e-05, | |
| "loss": 0.718, | |
| "num_input_tokens_seen": 1170805952, | |
| "step": 1295, | |
| "train_runtime": 172293.1957, | |
| "train_tokens_per_second": 6795.428 | |
| }, | |
| { | |
| "epoch": 1.1027055702917772, | |
| "grad_norm": 0.7123662125520419, | |
| "learning_rate": 3.989155358229394e-05, | |
| "loss": 0.7326, | |
| "num_input_tokens_seen": 1175398720, | |
| "step": 1300, | |
| "train_runtime": 172948.1235, | |
| "train_tokens_per_second": 6796.25 | |
| }, | |
| { | |
| "epoch": 1.106949602122016, | |
| "grad_norm": 0.838546216114602, | |
| "learning_rate": 3.979227441688028e-05, | |
| "loss": 0.7096, | |
| "num_input_tokens_seen": 1179790336, | |
| "step": 1305, | |
| "train_runtime": 173532.3842, | |
| "train_tokens_per_second": 6798.675 | |
| }, | |
| { | |
| "epoch": 1.1111936339522546, | |
| "grad_norm": 1.0578594741413954, | |
| "learning_rate": 3.969263500431935e-05, | |
| "loss": 0.736, | |
| "num_input_tokens_seen": 1184330304, | |
| "step": 1310, | |
| "train_runtime": 174200.2849, | |
| "train_tokens_per_second": 6798.67 | |
| }, | |
| { | |
| "epoch": 1.1154376657824934, | |
| "grad_norm": 1.0727364156307728, | |
| "learning_rate": 3.9592637771203114e-05, | |
| "loss": 0.7149, | |
| "num_input_tokens_seen": 1188880384, | |
| "step": 1315, | |
| "train_runtime": 174872.9271, | |
| "train_tokens_per_second": 6798.539 | |
| }, | |
| { | |
| "epoch": 1.1196816976127322, | |
| "grad_norm": 1.4213471650077618, | |
| "learning_rate": 3.949228515283777e-05, | |
| "loss": 0.7044, | |
| "num_input_tokens_seen": 1193170816, | |
| "step": 1320, | |
| "train_runtime": 175486.1393, | |
| "train_tokens_per_second": 6799.231 | |
| }, | |
| { | |
| "epoch": 1.1239257294429708, | |
| "grad_norm": 0.9528308642512087, | |
| "learning_rate": 3.9391579593184525e-05, | |
| "loss": 0.7046, | |
| "num_input_tokens_seen": 1197641344, | |
| "step": 1325, | |
| "train_runtime": 176147.7116, | |
| "train_tokens_per_second": 6799.074 | |
| }, | |
| { | |
| "epoch": 1.1281697612732096, | |
| "grad_norm": 0.8240250587692688, | |
| "learning_rate": 3.929052354479999e-05, | |
| "loss": 0.7073, | |
| "num_input_tokens_seen": 1202042432, | |
| "step": 1330, | |
| "train_runtime": 176809.4403, | |
| "train_tokens_per_second": 6798.52 | |
| }, | |
| { | |
| "epoch": 1.1324137931034484, | |
| "grad_norm": 0.9575820680101333, | |
| "learning_rate": 3.918911946877651e-05, | |
| "loss": 0.7123, | |
| "num_input_tokens_seen": 1206438080, | |
| "step": 1335, | |
| "train_runtime": 177399.569, | |
| "train_tokens_per_second": 6800.682 | |
| }, | |
| { | |
| "epoch": 1.136657824933687, | |
| "grad_norm": 1.0494568271450921, | |
| "learning_rate": 3.908736983468219e-05, | |
| "loss": 0.7037, | |
| "num_input_tokens_seen": 1211039616, | |
| "step": 1340, | |
| "train_runtime": 178060.7312, | |
| "train_tokens_per_second": 6801.273 | |
| }, | |
| { | |
| "epoch": 1.1409018567639257, | |
| "grad_norm": 1.3953016773438447, | |
| "learning_rate": 3.898527712050074e-05, | |
| "loss": 0.6992, | |
| "num_input_tokens_seen": 1215405568, | |
| "step": 1345, | |
| "train_runtime": 178700.2333, | |
| "train_tokens_per_second": 6801.365 | |
| }, | |
| { | |
| "epoch": 1.1451458885941646, | |
| "grad_norm": 0.9502009725403173, | |
| "learning_rate": 3.88828438125712e-05, | |
| "loss": 0.7273, | |
| "num_input_tokens_seen": 1220023168, | |
| "step": 1350, | |
| "train_runtime": 179354.7367, | |
| "train_tokens_per_second": 6802.291 | |
| }, | |
| { | |
| "epoch": 1.1493899204244031, | |
| "grad_norm": 1.1556987622554753, | |
| "learning_rate": 3.878007240552732e-05, | |
| "loss": 0.6946, | |
| "num_input_tokens_seen": 1224574464, | |
| "step": 1355, | |
| "train_runtime": 180001.4614, | |
| "train_tokens_per_second": 6803.136 | |
| }, | |
| { | |
| "epoch": 1.153633952254642, | |
| "grad_norm": 0.792466643427966, | |
| "learning_rate": 3.867696540223681e-05, | |
| "loss": 0.708, | |
| "num_input_tokens_seen": 1229115520, | |
| "step": 1360, | |
| "train_runtime": 180638.8559, | |
| "train_tokens_per_second": 6804.27 | |
| }, | |
| { | |
| "epoch": 1.1578779840848807, | |
| "grad_norm": 0.9099009827745455, | |
| "learning_rate": 3.8573525313740435e-05, | |
| "loss": 0.7198, | |
| "num_input_tokens_seen": 1233652160, | |
| "step": 1365, | |
| "train_runtime": 181281.2594, | |
| "train_tokens_per_second": 6805.183 | |
| }, | |
| { | |
| "epoch": 1.1621220159151193, | |
| "grad_norm": 0.8873710969043223, | |
| "learning_rate": 3.846975465919079e-05, | |
| "loss": 0.7047, | |
| "num_input_tokens_seen": 1238186112, | |
| "step": 1370, | |
| "train_runtime": 181914.6873, | |
| "train_tokens_per_second": 6806.411 | |
| }, | |
| { | |
| "epoch": 1.1663660477453581, | |
| "grad_norm": 1.0769227041949128, | |
| "learning_rate": 3.836565596579103e-05, | |
| "loss": 0.7363, | |
| "num_input_tokens_seen": 1242761728, | |
| "step": 1375, | |
| "train_runtime": 182587.2454, | |
| "train_tokens_per_second": 6806.399 | |
| }, | |
| { | |
| "epoch": 1.1706100795755967, | |
| "grad_norm": 0.9446612826256684, | |
| "learning_rate": 3.826123176873324e-05, | |
| "loss": 0.7001, | |
| "num_input_tokens_seen": 1247182656, | |
| "step": 1380, | |
| "train_runtime": 183248.8336, | |
| "train_tokens_per_second": 6805.951 | |
| }, | |
| { | |
| "epoch": 1.1748541114058355, | |
| "grad_norm": 0.7930410617767145, | |
| "learning_rate": 3.8156484611136774e-05, | |
| "loss": 0.7121, | |
| "num_input_tokens_seen": 1251653056, | |
| "step": 1385, | |
| "train_runtime": 183867.2318, | |
| "train_tokens_per_second": 6807.374 | |
| }, | |
| { | |
| "epoch": 1.1790981432360743, | |
| "grad_norm": 1.2141311456918997, | |
| "learning_rate": 3.805141704398626e-05, | |
| "loss": 0.7085, | |
| "num_input_tokens_seen": 1256043584, | |
| "step": 1390, | |
| "train_runtime": 184507.3826, | |
| "train_tokens_per_second": 6807.552 | |
| }, | |
| { | |
| "epoch": 1.1833421750663131, | |
| "grad_norm": 0.8727742652201835, | |
| "learning_rate": 3.794603162606949e-05, | |
| "loss": 0.7021, | |
| "num_input_tokens_seen": 1260434688, | |
| "step": 1395, | |
| "train_runtime": 185139.6011, | |
| "train_tokens_per_second": 6808.023 | |
| }, | |
| { | |
| "epoch": 1.1875862068965517, | |
| "grad_norm": 0.7037237894630392, | |
| "learning_rate": 3.784033092391513e-05, | |
| "loss": 0.732, | |
| "num_input_tokens_seen": 1264932736, | |
| "step": 1400, | |
| "train_runtime": 185754.0941, | |
| "train_tokens_per_second": 6809.717 | |
| }, | |
| { | |
| "epoch": 1.1918302387267905, | |
| "grad_norm": 0.8398911891859556, | |
| "learning_rate": 3.773431751173018e-05, | |
| "loss": 0.7254, | |
| "num_input_tokens_seen": 1269425664, | |
| "step": 1405, | |
| "train_runtime": 186452.0318, | |
| "train_tokens_per_second": 6808.323 | |
| }, | |
| { | |
| "epoch": 1.196074270557029, | |
| "grad_norm": 1.046439278032978, | |
| "learning_rate": 3.76279939713373e-05, | |
| "loss": 0.7034, | |
| "num_input_tokens_seen": 1273725056, | |
| "step": 1410, | |
| "train_runtime": 187095.6204, | |
| "train_tokens_per_second": 6807.883 | |
| }, | |
| { | |
| "epoch": 1.2003183023872679, | |
| "grad_norm": 0.7473279729847268, | |
| "learning_rate": 3.7521362892111945e-05, | |
| "loss": 0.7002, | |
| "num_input_tokens_seen": 1278142592, | |
| "step": 1415, | |
| "train_runtime": 187703.911, | |
| "train_tokens_per_second": 6809.355 | |
| }, | |
| { | |
| "epoch": 1.2045623342175067, | |
| "grad_norm": 1.0374275407130875, | |
| "learning_rate": 3.741442687091926e-05, | |
| "loss": 0.7204, | |
| "num_input_tokens_seen": 1282692032, | |
| "step": 1420, | |
| "train_runtime": 188345.3351, | |
| "train_tokens_per_second": 6810.32 | |
| }, | |
| { | |
| "epoch": 1.2088063660477453, | |
| "grad_norm": 0.7440268721036309, | |
| "learning_rate": 3.730718851205089e-05, | |
| "loss": 0.7114, | |
| "num_input_tokens_seen": 1287034560, | |
| "step": 1425, | |
| "train_runtime": 188944.1141, | |
| "train_tokens_per_second": 6811.721 | |
| }, | |
| { | |
| "epoch": 1.213050397877984, | |
| "grad_norm": 0.776479873397123, | |
| "learning_rate": 3.719965042716154e-05, | |
| "loss": 0.7081, | |
| "num_input_tokens_seen": 1291460416, | |
| "step": 1430, | |
| "train_runtime": 189556.4755, | |
| "train_tokens_per_second": 6813.064 | |
| }, | |
| { | |
| "epoch": 1.2172944297082229, | |
| "grad_norm": 0.7404149511150007, | |
| "learning_rate": 3.709181523520532e-05, | |
| "loss": 0.7022, | |
| "num_input_tokens_seen": 1296144576, | |
| "step": 1435, | |
| "train_runtime": 190275.4375, | |
| "train_tokens_per_second": 6811.938 | |
| }, | |
| { | |
| "epoch": 1.2215384615384615, | |
| "grad_norm": 0.7950074266734066, | |
| "learning_rate": 3.698368556237206e-05, | |
| "loss": 0.7245, | |
| "num_input_tokens_seen": 1300612352, | |
| "step": 1440, | |
| "train_runtime": 190917.0602, | |
| "train_tokens_per_second": 6812.447 | |
| }, | |
| { | |
| "epoch": 1.2257824933687003, | |
| "grad_norm": 0.8330406088802553, | |
| "learning_rate": 3.687526404202326e-05, | |
| "loss": 0.6876, | |
| "num_input_tokens_seen": 1305227776, | |
| "step": 1445, | |
| "train_runtime": 191618.6772, | |
| "train_tokens_per_second": 6811.59 | |
| }, | |
| { | |
| "epoch": 1.230026525198939, | |
| "grad_norm": 0.7986919467015979, | |
| "learning_rate": 3.6766553314628016e-05, | |
| "loss": 0.6882, | |
| "num_input_tokens_seen": 1309815616, | |
| "step": 1450, | |
| "train_runtime": 192268.2878, | |
| "train_tokens_per_second": 6812.437 | |
| }, | |
| { | |
| "epoch": 1.2342705570291777, | |
| "grad_norm": 0.8194811456779011, | |
| "learning_rate": 3.66575560276987e-05, | |
| "loss": 0.6992, | |
| "num_input_tokens_seen": 1314293440, | |
| "step": 1455, | |
| "train_runtime": 192911.152, | |
| "train_tokens_per_second": 6812.947 | |
| }, | |
| { | |
| "epoch": 1.2385145888594165, | |
| "grad_norm": 0.8920784814801206, | |
| "learning_rate": 3.654827483572647e-05, | |
| "loss": 0.7034, | |
| "num_input_tokens_seen": 1318678784, | |
| "step": 1460, | |
| "train_runtime": 193534.093, | |
| "train_tokens_per_second": 6813.677 | |
| }, | |
| { | |
| "epoch": 1.2427586206896553, | |
| "grad_norm": 0.7947592511270455, | |
| "learning_rate": 3.6438712400116626e-05, | |
| "loss": 0.7277, | |
| "num_input_tokens_seen": 1323485248, | |
| "step": 1465, | |
| "train_runtime": 194275.8066, | |
| "train_tokens_per_second": 6812.404 | |
| }, | |
| { | |
| "epoch": 1.2470026525198938, | |
| "grad_norm": 0.9177270797271538, | |
| "learning_rate": 3.6328871389123817e-05, | |
| "loss": 0.7177, | |
| "num_input_tokens_seen": 1327989184, | |
| "step": 1470, | |
| "train_runtime": 194920.646, | |
| "train_tokens_per_second": 6812.973 | |
| }, | |
| { | |
| "epoch": 1.2512466843501326, | |
| "grad_norm": 0.9095775155270419, | |
| "learning_rate": 3.6218754477787034e-05, | |
| "loss": 0.69, | |
| "num_input_tokens_seen": 1332484288, | |
| "step": 1475, | |
| "train_runtime": 195594.2663, | |
| "train_tokens_per_second": 6812.492 | |
| }, | |
| { | |
| "epoch": 1.2512466843501326, | |
| "eval_loss": 0.7701402306556702, | |
| "eval_runtime": 1058.7996, | |
| "eval_samples_per_second": 2.878, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 1332484288, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.2554907161803714, | |
| "grad_norm": 1.0025427441776165, | |
| "learning_rate": 3.610836434786448e-05, | |
| "loss": 0.7226, | |
| "num_input_tokens_seen": 1336834944, | |
| "step": 1480, | |
| "train_runtime": 197298.3845, | |
| "train_tokens_per_second": 6775.701 | |
| }, | |
| { | |
| "epoch": 1.25973474801061, | |
| "grad_norm": 1.1051405618486907, | |
| "learning_rate": 3.599770368776824e-05, | |
| "loss": 0.717, | |
| "num_input_tokens_seen": 1341307904, | |
| "step": 1485, | |
| "train_runtime": 197946.5126, | |
| "train_tokens_per_second": 6776.113 | |
| }, | |
| { | |
| "epoch": 1.2639787798408488, | |
| "grad_norm": 0.7277747101019799, | |
| "learning_rate": 3.588677519249883e-05, | |
| "loss": 0.7129, | |
| "num_input_tokens_seen": 1345945600, | |
| "step": 1490, | |
| "train_runtime": 198583.5461, | |
| "train_tokens_per_second": 6777.73 | |
| }, | |
| { | |
| "epoch": 1.2682228116710874, | |
| "grad_norm": 1.0644077159168257, | |
| "learning_rate": 3.577558156357954e-05, | |
| "loss": 0.6964, | |
| "num_input_tokens_seen": 1350721856, | |
| "step": 1495, | |
| "train_runtime": 199281.1546, | |
| "train_tokens_per_second": 6777.971 | |
| }, | |
| { | |
| "epoch": 1.2724668435013262, | |
| "grad_norm": 0.7336344093486956, | |
| "learning_rate": 3.566412550899067e-05, | |
| "loss": 0.7085, | |
| "num_input_tokens_seen": 1355297856, | |
| "step": 1500, | |
| "train_runtime": 199944.5213, | |
| "train_tokens_per_second": 6778.37 | |
| }, | |
| { | |
| "epoch": 1.276710875331565, | |
| "grad_norm": 0.8133225375902521, | |
| "learning_rate": 3.5552409743103556e-05, | |
| "loss": 0.6867, | |
| "num_input_tokens_seen": 1359679104, | |
| "step": 1505, | |
| "train_runtime": 200577.4067, | |
| "train_tokens_per_second": 6778.825 | |
| }, | |
| { | |
| "epoch": 1.2809549071618038, | |
| "grad_norm": 0.697351763291778, | |
| "learning_rate": 3.5440436986614475e-05, | |
| "loss": 0.6982, | |
| "num_input_tokens_seen": 1364232960, | |
| "step": 1510, | |
| "train_runtime": 201233.0814, | |
| "train_tokens_per_second": 6779.367 | |
| }, | |
| { | |
| "epoch": 1.2851989389920424, | |
| "grad_norm": 0.7278903758794186, | |
| "learning_rate": 3.53282099664784e-05, | |
| "loss": 0.6908, | |
| "num_input_tokens_seen": 1368631360, | |
| "step": 1515, | |
| "train_runtime": 201880.3686, | |
| "train_tokens_per_second": 6779.418 | |
| }, | |
| { | |
| "epoch": 1.2894429708222812, | |
| "grad_norm": 0.8347309141959418, | |
| "learning_rate": 3.521573141584254e-05, | |
| "loss": 0.6901, | |
| "num_input_tokens_seen": 1373082240, | |
| "step": 1520, | |
| "train_runtime": 202488.6455, | |
| "train_tokens_per_second": 6781.033 | |
| }, | |
| { | |
| "epoch": 1.2936870026525198, | |
| "grad_norm": 0.8283488230820615, | |
| "learning_rate": 3.5103004073979854e-05, | |
| "loss": 0.6807, | |
| "num_input_tokens_seen": 1377433792, | |
| "step": 1525, | |
| "train_runtime": 203113.9684, | |
| "train_tokens_per_second": 6781.581 | |
| }, | |
| { | |
| "epoch": 1.2979310344827586, | |
| "grad_norm": 0.7419000730982289, | |
| "learning_rate": 3.499003068622226e-05, | |
| "loss": 0.6995, | |
| "num_input_tokens_seen": 1382143808, | |
| "step": 1530, | |
| "train_runtime": 203848.007, | |
| "train_tokens_per_second": 6780.266 | |
| }, | |
| { | |
| "epoch": 1.3021750663129974, | |
| "grad_norm": 0.8544362640648647, | |
| "learning_rate": 3.487681400389384e-05, | |
| "loss": 0.6932, | |
| "num_input_tokens_seen": 1386581504, | |
| "step": 1535, | |
| "train_runtime": 204454.9055, | |
| "train_tokens_per_second": 6781.845 | |
| }, | |
| { | |
| "epoch": 1.3064190981432362, | |
| "grad_norm": 0.7409589690489501, | |
| "learning_rate": 3.4763356784243784e-05, | |
| "loss": 0.6943, | |
| "num_input_tokens_seen": 1391187968, | |
| "step": 1540, | |
| "train_runtime": 205112.5105, | |
| "train_tokens_per_second": 6782.56 | |
| }, | |
| { | |
| "epoch": 1.3106631299734748, | |
| "grad_norm": 0.8626742457912966, | |
| "learning_rate": 3.4649661790379285e-05, | |
| "loss": 0.6894, | |
| "num_input_tokens_seen": 1395756992, | |
| "step": 1545, | |
| "train_runtime": 205751.9562, | |
| "train_tokens_per_second": 6783.688 | |
| }, | |
| { | |
| "epoch": 1.3149071618037136, | |
| "grad_norm": 0.9751075736447785, | |
| "learning_rate": 3.453573179119821e-05, | |
| "loss": 0.6919, | |
| "num_input_tokens_seen": 1400210880, | |
| "step": 1550, | |
| "train_runtime": 206360.6402, | |
| "train_tokens_per_second": 6785.261 | |
| }, | |
| { | |
| "epoch": 1.3191511936339522, | |
| "grad_norm": 0.839491541858112, | |
| "learning_rate": 3.4421569561321705e-05, | |
| "loss": 0.7069, | |
| "num_input_tokens_seen": 1404544896, | |
| "step": 1555, | |
| "train_runtime": 206988.7608, | |
| "train_tokens_per_second": 6785.609 | |
| }, | |
| { | |
| "epoch": 1.323395225464191, | |
| "grad_norm": 0.7686049267619298, | |
| "learning_rate": 3.4307177881026574e-05, | |
| "loss": 0.7102, | |
| "num_input_tokens_seen": 1409135360, | |
| "step": 1560, | |
| "train_runtime": 207645.0724, | |
| "train_tokens_per_second": 6786.269 | |
| }, | |
| { | |
| "epoch": 1.3276392572944298, | |
| "grad_norm": 0.7897081067547944, | |
| "learning_rate": 3.419255953617762e-05, | |
| "loss": 0.7095, | |
| "num_input_tokens_seen": 1413494272, | |
| "step": 1565, | |
| "train_runtime": 208306.3313, | |
| "train_tokens_per_second": 6785.652 | |
| }, | |
| { | |
| "epoch": 1.3318832891246684, | |
| "grad_norm": 0.9461779222467549, | |
| "learning_rate": 3.407771731815975e-05, | |
| "loss": 0.685, | |
| "num_input_tokens_seen": 1417846272, | |
| "step": 1570, | |
| "train_runtime": 208904.186, | |
| "train_tokens_per_second": 6787.065 | |
| }, | |
| { | |
| "epoch": 1.3361273209549072, | |
| "grad_norm": 0.8934996824542671, | |
| "learning_rate": 3.3962654023810056e-05, | |
| "loss": 0.6814, | |
| "num_input_tokens_seen": 1422340672, | |
| "step": 1575, | |
| "train_runtime": 209567.9077, | |
| "train_tokens_per_second": 6787.016 | |
| }, | |
| { | |
| "epoch": 1.340371352785146, | |
| "grad_norm": 0.8661495630419888, | |
| "learning_rate": 3.384737245534962e-05, | |
| "loss": 0.7181, | |
| "num_input_tokens_seen": 1426955904, | |
| "step": 1580, | |
| "train_runtime": 210237.9687, | |
| "train_tokens_per_second": 6787.337 | |
| }, | |
| { | |
| "epoch": 1.3446153846153845, | |
| "grad_norm": 0.8719974075076845, | |
| "learning_rate": 3.373187542031534e-05, | |
| "loss": 0.6959, | |
| "num_input_tokens_seen": 1431489088, | |
| "step": 1585, | |
| "train_runtime": 210873.0161, | |
| "train_tokens_per_second": 6788.394 | |
| }, | |
| { | |
| "epoch": 1.3488594164456233, | |
| "grad_norm": 1.1626368748489013, | |
| "learning_rate": 3.361616573149153e-05, | |
| "loss": 0.6832, | |
| "num_input_tokens_seen": 1435997504, | |
| "step": 1590, | |
| "train_runtime": 211532.587, | |
| "train_tokens_per_second": 6788.54 | |
| }, | |
| { | |
| "epoch": 1.3531034482758622, | |
| "grad_norm": 0.8538479857974732, | |
| "learning_rate": 3.350024620684142e-05, | |
| "loss": 0.7099, | |
| "num_input_tokens_seen": 1440263232, | |
| "step": 1595, | |
| "train_runtime": 212121.5231, | |
| "train_tokens_per_second": 6789.802 | |
| }, | |
| { | |
| "epoch": 1.3573474801061007, | |
| "grad_norm": 0.8056069140705914, | |
| "learning_rate": 3.338411966943852e-05, | |
| "loss": 0.69, | |
| "num_input_tokens_seen": 1444712192, | |
| "step": 1600, | |
| "train_runtime": 212749.667, | |
| "train_tokens_per_second": 6790.667 | |
| }, | |
| { | |
| "epoch": 1.3615915119363395, | |
| "grad_norm": 0.7748917218818391, | |
| "learning_rate": 3.326778894739787e-05, | |
| "loss": 0.7012, | |
| "num_input_tokens_seen": 1449143872, | |
| "step": 1605, | |
| "train_runtime": 213375.0723, | |
| "train_tokens_per_second": 6791.533 | |
| }, | |
| { | |
| "epoch": 1.3658355437665781, | |
| "grad_norm": 0.7420765913634999, | |
| "learning_rate": 3.3151256873807166e-05, | |
| "loss": 0.7044, | |
| "num_input_tokens_seen": 1453720384, | |
| "step": 1610, | |
| "train_runtime": 214016.224, | |
| "train_tokens_per_second": 6792.571 | |
| }, | |
| { | |
| "epoch": 1.370079575596817, | |
| "grad_norm": 0.8236976518371382, | |
| "learning_rate": 3.3034526286657784e-05, | |
| "loss": 0.6767, | |
| "num_input_tokens_seen": 1458310144, | |
| "step": 1615, | |
| "train_runtime": 214691.52, | |
| "train_tokens_per_second": 6792.584 | |
| }, | |
| { | |
| "epoch": 1.3743236074270557, | |
| "grad_norm": 1.234048869991443, | |
| "learning_rate": 3.291760002877563e-05, | |
| "loss": 0.694, | |
| "num_input_tokens_seen": 1462804672, | |
| "step": 1620, | |
| "train_runtime": 215323.1611, | |
| "train_tokens_per_second": 6793.531 | |
| }, | |
| { | |
| "epoch": 1.3785676392572945, | |
| "grad_norm": 1.1190238309471685, | |
| "learning_rate": 3.280048094775194e-05, | |
| "loss": 0.6912, | |
| "num_input_tokens_seen": 1467354688, | |
| "step": 1625, | |
| "train_runtime": 215957.7438, | |
| "train_tokens_per_second": 6794.638 | |
| }, | |
| { | |
| "epoch": 1.3828116710875331, | |
| "grad_norm": 0.7424932571208389, | |
| "learning_rate": 3.268317189587389e-05, | |
| "loss": 0.6772, | |
| "num_input_tokens_seen": 1471765312, | |
| "step": 1630, | |
| "train_runtime": 216588.9755, | |
| "train_tokens_per_second": 6795.2 | |
| }, | |
| { | |
| "epoch": 1.387055702917772, | |
| "grad_norm": 0.8756485744320969, | |
| "learning_rate": 3.256567573005519e-05, | |
| "loss": 0.7056, | |
| "num_input_tokens_seen": 1476461312, | |
| "step": 1635, | |
| "train_runtime": 217239.6368, | |
| "train_tokens_per_second": 6796.464 | |
| }, | |
| { | |
| "epoch": 1.3912997347480105, | |
| "grad_norm": 0.8539432184293334, | |
| "learning_rate": 3.2447995311766426e-05, | |
| "loss": 0.6921, | |
| "num_input_tokens_seen": 1481070080, | |
| "step": 1640, | |
| "train_runtime": 217902.9575, | |
| "train_tokens_per_second": 6796.925 | |
| }, | |
| { | |
| "epoch": 1.3955437665782493, | |
| "grad_norm": 0.9984531979139321, | |
| "learning_rate": 3.233013350696547e-05, | |
| "loss": 0.6788, | |
| "num_input_tokens_seen": 1485426304, | |
| "step": 1645, | |
| "train_runtime": 218524.3978, | |
| "train_tokens_per_second": 6797.531 | |
| }, | |
| { | |
| "epoch": 1.399787798408488, | |
| "grad_norm": 0.8306599513500448, | |
| "learning_rate": 3.22120931860276e-05, | |
| "loss": 0.7068, | |
| "num_input_tokens_seen": 1489976064, | |
| "step": 1650, | |
| "train_runtime": 219164.0872, | |
| "train_tokens_per_second": 6798.45 | |
| }, | |
| { | |
| "epoch": 1.404031830238727, | |
| "grad_norm": 0.8165110526157475, | |
| "learning_rate": 3.2093877223675657e-05, | |
| "loss": 0.7055, | |
| "num_input_tokens_seen": 1494425408, | |
| "step": 1655, | |
| "train_runtime": 219817.6974, | |
| "train_tokens_per_second": 6798.476 | |
| }, | |
| { | |
| "epoch": 1.4082758620689655, | |
| "grad_norm": 0.9181922831479999, | |
| "learning_rate": 3.197548849890997e-05, | |
| "loss": 0.6919, | |
| "num_input_tokens_seen": 1499022912, | |
| "step": 1660, | |
| "train_runtime": 220540.8581, | |
| "train_tokens_per_second": 6797.03 | |
| }, | |
| { | |
| "epoch": 1.4125198938992043, | |
| "grad_norm": 0.8451299000501948, | |
| "learning_rate": 3.1856929894938294e-05, | |
| "loss": 0.6851, | |
| "num_input_tokens_seen": 1503165184, | |
| "step": 1665, | |
| "train_runtime": 221107.0695, | |
| "train_tokens_per_second": 6798.359 | |
| }, | |
| { | |
| "epoch": 1.4167639257294429, | |
| "grad_norm": 0.9309089115614846, | |
| "learning_rate": 3.17382042991056e-05, | |
| "loss": 0.6779, | |
| "num_input_tokens_seen": 1507912704, | |
| "step": 1670, | |
| "train_runtime": 221826.3413, | |
| "train_tokens_per_second": 6797.717 | |
| }, | |
| { | |
| "epoch": 1.4210079575596817, | |
| "grad_norm": 0.897730430026796, | |
| "learning_rate": 3.16193146028237e-05, | |
| "loss": 0.6916, | |
| "num_input_tokens_seen": 1512406912, | |
| "step": 1675, | |
| "train_runtime": 222474.5449, | |
| "train_tokens_per_second": 6798.112 | |
| }, | |
| { | |
| "epoch": 1.4252519893899205, | |
| "grad_norm": 1.0914616744302021, | |
| "learning_rate": 3.1500263701500896e-05, | |
| "loss": 0.7087, | |
| "num_input_tokens_seen": 1516995328, | |
| "step": 1680, | |
| "train_runtime": 223170.4553, | |
| "train_tokens_per_second": 6797.474 | |
| }, | |
| { | |
| "epoch": 1.4294960212201593, | |
| "grad_norm": 0.7658163870719906, | |
| "learning_rate": 3.1381054494471405e-05, | |
| "loss": 0.703, | |
| "num_input_tokens_seen": 1521406976, | |
| "step": 1685, | |
| "train_runtime": 223818.5244, | |
| "train_tokens_per_second": 6797.502 | |
| }, | |
| { | |
| "epoch": 1.4337400530503979, | |
| "grad_norm": 0.7295564226365354, | |
| "learning_rate": 3.12616898849248e-05, | |
| "loss": 0.7035, | |
| "num_input_tokens_seen": 1526055168, | |
| "step": 1690, | |
| "train_runtime": 224524.94, | |
| "train_tokens_per_second": 6796.818 | |
| }, | |
| { | |
| "epoch": 1.4379840848806367, | |
| "grad_norm": 0.8439956680094854, | |
| "learning_rate": 3.1142172779835274e-05, | |
| "loss": 0.6746, | |
| "num_input_tokens_seen": 1530635200, | |
| "step": 1695, | |
| "train_runtime": 225153.441, | |
| "train_tokens_per_second": 6798.187 | |
| }, | |
| { | |
| "epoch": 1.4422281167108753, | |
| "grad_norm": 0.8397468802586634, | |
| "learning_rate": 3.1022506089890876e-05, | |
| "loss": 0.7068, | |
| "num_input_tokens_seen": 1535012288, | |
| "step": 1700, | |
| "train_runtime": 225781.6539, | |
| "train_tokens_per_second": 6798.658 | |
| }, | |
| { | |
| "epoch": 1.446472148541114, | |
| "grad_norm": 0.7474048463993876, | |
| "learning_rate": 3.0902692729422575e-05, | |
| "loss": 0.6865, | |
| "num_input_tokens_seen": 1539284736, | |
| "step": 1705, | |
| "train_runtime": 226344.0602, | |
| "train_tokens_per_second": 6800.641 | |
| }, | |
| { | |
| "epoch": 1.4507161803713529, | |
| "grad_norm": 0.9736770454301451, | |
| "learning_rate": 3.078273561633335e-05, | |
| "loss": 0.6763, | |
| "num_input_tokens_seen": 1543699904, | |
| "step": 1710, | |
| "train_runtime": 226957.3024, | |
| "train_tokens_per_second": 6801.719 | |
| }, | |
| { | |
| "epoch": 1.4549602122015914, | |
| "grad_norm": 1.1147200850975938, | |
| "learning_rate": 3.066263767202706e-05, | |
| "loss": 0.6914, | |
| "num_input_tokens_seen": 1548275328, | |
| "step": 1715, | |
| "train_runtime": 227614.0923, | |
| "train_tokens_per_second": 6802.195 | |
| }, | |
| { | |
| "epoch": 1.4592042440318302, | |
| "grad_norm": 0.9261552445682865, | |
| "learning_rate": 3.0542401821337346e-05, | |
| "loss": 0.6895, | |
| "num_input_tokens_seen": 1552716864, | |
| "step": 1720, | |
| "train_runtime": 228231.3018, | |
| "train_tokens_per_second": 6803.26 | |
| }, | |
| { | |
| "epoch": 1.463448275862069, | |
| "grad_norm": 0.7494164761692941, | |
| "learning_rate": 3.042203099245639e-05, | |
| "loss": 0.6871, | |
| "num_input_tokens_seen": 1557269760, | |
| "step": 1725, | |
| "train_runtime": 228914.9513, | |
| "train_tokens_per_second": 6802.831 | |
| }, | |
| { | |
| "epoch": 1.4676923076923076, | |
| "grad_norm": 0.7383133192878851, | |
| "learning_rate": 3.0301528116863592e-05, | |
| "loss": 0.6914, | |
| "num_input_tokens_seen": 1561556608, | |
| "step": 1730, | |
| "train_runtime": 229541.9414, | |
| "train_tokens_per_second": 6802.925 | |
| }, | |
| { | |
| "epoch": 1.4719363395225464, | |
| "grad_norm": 0.877542891400688, | |
| "learning_rate": 3.0180896129254182e-05, | |
| "loss": 0.6962, | |
| "num_input_tokens_seen": 1565974592, | |
| "step": 1735, | |
| "train_runtime": 230156.1279, | |
| "train_tokens_per_second": 6803.967 | |
| }, | |
| { | |
| "epoch": 1.4761803713527852, | |
| "grad_norm": 0.7394328578918072, | |
| "learning_rate": 3.006013796746774e-05, | |
| "loss": 0.6763, | |
| "num_input_tokens_seen": 1570370368, | |
| "step": 1740, | |
| "train_runtime": 230776.2675, | |
| "train_tokens_per_second": 6804.731 | |
| }, | |
| { | |
| "epoch": 1.4804244031830238, | |
| "grad_norm": 0.8032649294789167, | |
| "learning_rate": 2.993925657241668e-05, | |
| "loss": 0.6904, | |
| "num_input_tokens_seen": 1574874432, | |
| "step": 1745, | |
| "train_runtime": 231438.1989, | |
| "train_tokens_per_second": 6804.73 | |
| }, | |
| { | |
| "epoch": 1.4846684350132626, | |
| "grad_norm": 0.9191103442108757, | |
| "learning_rate": 2.9818254888014586e-05, | |
| "loss": 0.6809, | |
| "num_input_tokens_seen": 1579401664, | |
| "step": 1750, | |
| "train_runtime": 232077.6612, | |
| "train_tokens_per_second": 6805.488 | |
| }, | |
| { | |
| "epoch": 1.4889124668435012, | |
| "grad_norm": 0.86131262301876, | |
| "learning_rate": 2.9697135861104546e-05, | |
| "loss": 0.6976, | |
| "num_input_tokens_seen": 1584000064, | |
| "step": 1755, | |
| "train_runtime": 232725.1511, | |
| "train_tokens_per_second": 6806.312 | |
| }, | |
| { | |
| "epoch": 1.49315649867374, | |
| "grad_norm": 0.7493354049181269, | |
| "learning_rate": 2.9575902441387393e-05, | |
| "loss": 0.693, | |
| "num_input_tokens_seen": 1588529152, | |
| "step": 1760, | |
| "train_runtime": 233378.3571, | |
| "train_tokens_per_second": 6806.669 | |
| }, | |
| { | |
| "epoch": 1.4974005305039788, | |
| "grad_norm": 0.790057237962092, | |
| "learning_rate": 2.9454557581349818e-05, | |
| "loss": 0.6793, | |
| "num_input_tokens_seen": 1593390656, | |
| "step": 1765, | |
| "train_runtime": 234055.6771, | |
| "train_tokens_per_second": 6807.742 | |
| }, | |
| { | |
| "epoch": 1.5016445623342176, | |
| "grad_norm": 0.8848325086982859, | |
| "learning_rate": 2.933310423619252e-05, | |
| "loss": 0.6963, | |
| "num_input_tokens_seen": 1597966720, | |
| "step": 1770, | |
| "train_runtime": 234761.2586, | |
| "train_tokens_per_second": 6806.774 | |
| }, | |
| { | |
| "epoch": 1.5016445623342176, | |
| "eval_loss": 0.7488037943840027, | |
| "eval_runtime": 1056.893, | |
| "eval_samples_per_second": 2.883, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 1597966720, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.5058885941644562, | |
| "grad_norm": 0.8733912053492838, | |
| "learning_rate": 2.9211545363758214e-05, | |
| "loss": 0.6861, | |
| "num_input_tokens_seen": 1602346944, | |
| "step": 1775, | |
| "train_runtime": 236413.8812, | |
| "train_tokens_per_second": 6777.719 | |
| }, | |
| { | |
| "epoch": 1.510132625994695, | |
| "grad_norm": 0.9213477294170037, | |
| "learning_rate": 2.9089883924459603e-05, | |
| "loss": 0.6802, | |
| "num_input_tokens_seen": 1606861888, | |
| "step": 1780, | |
| "train_runtime": 237090.594, | |
| "train_tokens_per_second": 6777.417 | |
| }, | |
| { | |
| "epoch": 1.5143766578249336, | |
| "grad_norm": 0.8533906385009806, | |
| "learning_rate": 2.8968122881207272e-05, | |
| "loss": 0.6926, | |
| "num_input_tokens_seen": 1611490176, | |
| "step": 1785, | |
| "train_runtime": 237783.8661, | |
| "train_tokens_per_second": 6777.122 | |
| }, | |
| { | |
| "epoch": 1.5186206896551724, | |
| "grad_norm": 0.700075097629672, | |
| "learning_rate": 2.884626519933753e-05, | |
| "loss": 0.6809, | |
| "num_input_tokens_seen": 1616104256, | |
| "step": 1790, | |
| "train_runtime": 238446.2141, | |
| "train_tokens_per_second": 6777.647 | |
| }, | |
| { | |
| "epoch": 1.5228647214854112, | |
| "grad_norm": 0.7529188790563152, | |
| "learning_rate": 2.872431384654021e-05, | |
| "loss": 0.6744, | |
| "num_input_tokens_seen": 1620585216, | |
| "step": 1795, | |
| "train_runtime": 239099.2559, | |
| "train_tokens_per_second": 6777.876 | |
| }, | |
| { | |
| "epoch": 1.52710875331565, | |
| "grad_norm": 0.9180471978156958, | |
| "learning_rate": 2.8602271792786355e-05, | |
| "loss": 0.6979, | |
| "num_input_tokens_seen": 1625263744, | |
| "step": 1800, | |
| "train_runtime": 239770.0872, | |
| "train_tokens_per_second": 6778.426 | |
| }, | |
| { | |
| "epoch": 1.5313527851458886, | |
| "grad_norm": 0.936376426158768, | |
| "learning_rate": 2.8480142010255956e-05, | |
| "loss": 0.6701, | |
| "num_input_tokens_seen": 1629558400, | |
| "step": 1805, | |
| "train_runtime": 240351.8577, | |
| "train_tokens_per_second": 6779.887 | |
| }, | |
| { | |
| "epoch": 1.5355968169761272, | |
| "grad_norm": 0.6813164585163979, | |
| "learning_rate": 2.835792747326549e-05, | |
| "loss": 0.6846, | |
| "num_input_tokens_seen": 1633885760, | |
| "step": 1810, | |
| "train_runtime": 240960.0435, | |
| "train_tokens_per_second": 6780.733 | |
| }, | |
| { | |
| "epoch": 1.539840848806366, | |
| "grad_norm": 0.9012352213500389, | |
| "learning_rate": 2.8235631158195542e-05, | |
| "loss": 0.6752, | |
| "num_input_tokens_seen": 1638485184, | |
| "step": 1815, | |
| "train_runtime": 241606.7877, | |
| "train_tokens_per_second": 6781.619 | |
| }, | |
| { | |
| "epoch": 1.5440848806366048, | |
| "grad_norm": 1.0501209838121786, | |
| "learning_rate": 2.8113256043418296e-05, | |
| "loss": 0.6786, | |
| "num_input_tokens_seen": 1643127424, | |
| "step": 1820, | |
| "train_runtime": 242328.0234, | |
| "train_tokens_per_second": 6780.592 | |
| }, | |
| { | |
| "epoch": 1.5483289124668436, | |
| "grad_norm": 0.7301282294890542, | |
| "learning_rate": 2.7990805109224994e-05, | |
| "loss": 0.7052, | |
| "num_input_tokens_seen": 1647584256, | |
| "step": 1825, | |
| "train_runtime": 242957.2381, | |
| "train_tokens_per_second": 6781.375 | |
| }, | |
| { | |
| "epoch": 1.5525729442970824, | |
| "grad_norm": 0.7180423134088882, | |
| "learning_rate": 2.786828133775337e-05, | |
| "loss": 0.6862, | |
| "num_input_tokens_seen": 1651979520, | |
| "step": 1830, | |
| "train_runtime": 243571.1176, | |
| "train_tokens_per_second": 6782.329 | |
| }, | |
| { | |
| "epoch": 1.556816976127321, | |
| "grad_norm": 1.0492539609549594, | |
| "learning_rate": 2.774568771291503e-05, | |
| "loss": 0.6832, | |
| "num_input_tokens_seen": 1656516672, | |
| "step": 1835, | |
| "train_runtime": 244227.2168, | |
| "train_tokens_per_second": 6782.687 | |
| }, | |
| { | |
| "epoch": 1.5610610079575595, | |
| "grad_norm": 1.1159044633198913, | |
| "learning_rate": 2.7623027220322757e-05, | |
| "loss": 0.6695, | |
| "num_input_tokens_seen": 1661151360, | |
| "step": 1840, | |
| "train_runtime": 244907.8876, | |
| "train_tokens_per_second": 6782.76 | |
| }, | |
| { | |
| "epoch": 1.5653050397877983, | |
| "grad_norm": 0.7897737642307381, | |
| "learning_rate": 2.75003028472178e-05, | |
| "loss": 0.6781, | |
| "num_input_tokens_seen": 1665702272, | |
| "step": 1845, | |
| "train_runtime": 245544.1767, | |
| "train_tokens_per_second": 6783.717 | |
| }, | |
| { | |
| "epoch": 1.5695490716180371, | |
| "grad_norm": 0.8021337355935967, | |
| "learning_rate": 2.737751758239717e-05, | |
| "loss": 0.6872, | |
| "num_input_tokens_seen": 1670142848, | |
| "step": 1850, | |
| "train_runtime": 246145.0482, | |
| "train_tokens_per_second": 6785.198 | |
| }, | |
| { | |
| "epoch": 1.573793103448276, | |
| "grad_norm": 0.9146806729788793, | |
| "learning_rate": 2.7254674416140796e-05, | |
| "loss": 0.6674, | |
| "num_input_tokens_seen": 1674686336, | |
| "step": 1855, | |
| "train_runtime": 246790.7905, | |
| "train_tokens_per_second": 6785.854 | |
| }, | |
| { | |
| "epoch": 1.5780371352785147, | |
| "grad_norm": 0.7370948567058712, | |
| "learning_rate": 2.7131776340138732e-05, | |
| "loss": 0.6835, | |
| "num_input_tokens_seen": 1679386880, | |
| "step": 1860, | |
| "train_runtime": 247484.4332, | |
| "train_tokens_per_second": 6785.828 | |
| }, | |
| { | |
| "epoch": 1.5822811671087533, | |
| "grad_norm": 0.8275204157097062, | |
| "learning_rate": 2.700882634741828e-05, | |
| "loss": 0.6633, | |
| "num_input_tokens_seen": 1683943488, | |
| "step": 1865, | |
| "train_runtime": 248132.5186, | |
| "train_tokens_per_second": 6786.468 | |
| }, | |
| { | |
| "epoch": 1.586525198938992, | |
| "grad_norm": 0.9040723037619556, | |
| "learning_rate": 2.688582743227112e-05, | |
| "loss": 0.6687, | |
| "num_input_tokens_seen": 1688602624, | |
| "step": 1870, | |
| "train_runtime": 248805.7127, | |
| "train_tokens_per_second": 6786.832 | |
| }, | |
| { | |
| "epoch": 1.5907692307692307, | |
| "grad_norm": 0.760985868201272, | |
| "learning_rate": 2.676278259018037e-05, | |
| "loss": 0.6978, | |
| "num_input_tokens_seen": 1693144960, | |
| "step": 1875, | |
| "train_runtime": 249443.0497, | |
| "train_tokens_per_second": 6787.701 | |
| }, | |
| { | |
| "epoch": 1.5950132625994695, | |
| "grad_norm": 0.6622361089518702, | |
| "learning_rate": 2.663969481774764e-05, | |
| "loss": 0.6809, | |
| "num_input_tokens_seen": 1697705216, | |
| "step": 1880, | |
| "train_runtime": 250088.7537, | |
| "train_tokens_per_second": 6788.411 | |
| }, | |
| { | |
| "epoch": 1.5992572944297083, | |
| "grad_norm": 0.9742697876211484, | |
| "learning_rate": 2.6516567112620057e-05, | |
| "loss": 0.6955, | |
| "num_input_tokens_seen": 1702328000, | |
| "step": 1885, | |
| "train_runtime": 250811.4006, | |
| "train_tokens_per_second": 6787.283 | |
| }, | |
| { | |
| "epoch": 1.603501326259947, | |
| "grad_norm": 0.8432142437312786, | |
| "learning_rate": 2.6393402473417257e-05, | |
| "loss": 0.6891, | |
| "num_input_tokens_seen": 1706848704, | |
| "step": 1890, | |
| "train_runtime": 251465.4682, | |
| "train_tokens_per_second": 6787.607 | |
| }, | |
| { | |
| "epoch": 1.6077453580901857, | |
| "grad_norm": 1.1461032434751868, | |
| "learning_rate": 2.627020389965835e-05, | |
| "loss": 0.6813, | |
| "num_input_tokens_seen": 1711334336, | |
| "step": 1895, | |
| "train_runtime": 252107.1822, | |
| "train_tokens_per_second": 6788.122 | |
| }, | |
| { | |
| "epoch": 1.6119893899204243, | |
| "grad_norm": 0.7667390099087915, | |
| "learning_rate": 2.61469743916889e-05, | |
| "loss": 0.6809, | |
| "num_input_tokens_seen": 1715695488, | |
| "step": 1900, | |
| "train_runtime": 252733.9504, | |
| "train_tokens_per_second": 6788.544 | |
| }, | |
| { | |
| "epoch": 1.616233421750663, | |
| "grad_norm": 1.319936113059114, | |
| "learning_rate": 2.6023716950607814e-05, | |
| "loss": 0.6773, | |
| "num_input_tokens_seen": 1720293184, | |
| "step": 1905, | |
| "train_runtime": 253411.1635, | |
| "train_tokens_per_second": 6788.545 | |
| }, | |
| { | |
| "epoch": 1.620477453580902, | |
| "grad_norm": 0.8696242756135805, | |
| "learning_rate": 2.590043457819428e-05, | |
| "loss": 0.6858, | |
| "num_input_tokens_seen": 1724818304, | |
| "step": 1910, | |
| "train_runtime": 254091.2449, | |
| "train_tokens_per_second": 6788.185 | |
| }, | |
| { | |
| "epoch": 1.6247214854111407, | |
| "grad_norm": 0.6984051732842884, | |
| "learning_rate": 2.5777130276834677e-05, | |
| "loss": 0.6558, | |
| "num_input_tokens_seen": 1729429824, | |
| "step": 1915, | |
| "train_runtime": 254736.195, | |
| "train_tokens_per_second": 6789.101 | |
| }, | |
| { | |
| "epoch": 1.6289655172413793, | |
| "grad_norm": 0.7277968832920597, | |
| "learning_rate": 2.56538070494494e-05, | |
| "loss": 0.6816, | |
| "num_input_tokens_seen": 1733884032, | |
| "step": 1920, | |
| "train_runtime": 255336.8051, | |
| "train_tokens_per_second": 6790.576 | |
| }, | |
| { | |
| "epoch": 1.633209549071618, | |
| "grad_norm": 0.9589871027299995, | |
| "learning_rate": 2.5530467899419792e-05, | |
| "loss": 0.6529, | |
| "num_input_tokens_seen": 1738507328, | |
| "step": 1925, | |
| "train_runtime": 256022.2293, | |
| "train_tokens_per_second": 6790.455 | |
| }, | |
| { | |
| "epoch": 1.6374535809018567, | |
| "grad_norm": 0.8788636504147925, | |
| "learning_rate": 2.5407115830514955e-05, | |
| "loss": 0.6613, | |
| "num_input_tokens_seen": 1743139584, | |
| "step": 1930, | |
| "train_runtime": 256649.7946, | |
| "train_tokens_per_second": 6791.899 | |
| }, | |
| { | |
| "epoch": 1.6416976127320955, | |
| "grad_norm": 0.751730530930981, | |
| "learning_rate": 2.5283753846818626e-05, | |
| "loss": 0.6688, | |
| "num_input_tokens_seen": 1747799104, | |
| "step": 1935, | |
| "train_runtime": 257356.5973, | |
| "train_tokens_per_second": 6791.351 | |
| }, | |
| { | |
| "epoch": 1.6459416445623343, | |
| "grad_norm": 1.0663719817746726, | |
| "learning_rate": 2.516038495265599e-05, | |
| "loss": 0.6806, | |
| "num_input_tokens_seen": 1752473536, | |
| "step": 1940, | |
| "train_runtime": 258018.0708, | |
| "train_tokens_per_second": 6792.057 | |
| }, | |
| { | |
| "epoch": 1.650185676392573, | |
| "grad_norm": 0.8682191978665272, | |
| "learning_rate": 2.503701215252056e-05, | |
| "loss": 0.6834, | |
| "num_input_tokens_seen": 1757236416, | |
| "step": 1945, | |
| "train_runtime": 258721.276, | |
| "train_tokens_per_second": 6792.006 | |
| }, | |
| { | |
| "epoch": 1.6544297082228117, | |
| "grad_norm": 0.7561744190987955, | |
| "learning_rate": 2.4913638451000926e-05, | |
| "loss": 0.6723, | |
| "num_input_tokens_seen": 1761509184, | |
| "step": 1950, | |
| "train_runtime": 259302.524, | |
| "train_tokens_per_second": 6793.259 | |
| }, | |
| { | |
| "epoch": 1.6586737400530502, | |
| "grad_norm": 0.9925925496141875, | |
| "learning_rate": 2.479026685270767e-05, | |
| "loss": 0.652, | |
| "num_input_tokens_seen": 1766119104, | |
| "step": 1955, | |
| "train_runtime": 259978.5543, | |
| "train_tokens_per_second": 6793.326 | |
| }, | |
| { | |
| "epoch": 1.662917771883289, | |
| "grad_norm": 0.6823275549351902, | |
| "learning_rate": 2.4666900362200124e-05, | |
| "loss": 0.6702, | |
| "num_input_tokens_seen": 1770634688, | |
| "step": 1960, | |
| "train_runtime": 260619.4472, | |
| "train_tokens_per_second": 6793.947 | |
| }, | |
| { | |
| "epoch": 1.6671618037135278, | |
| "grad_norm": 0.8309834963725057, | |
| "learning_rate": 2.4543541983913257e-05, | |
| "loss": 0.6498, | |
| "num_input_tokens_seen": 1775127616, | |
| "step": 1965, | |
| "train_runtime": 261263.6563, | |
| "train_tokens_per_second": 6794.392 | |
| }, | |
| { | |
| "epoch": 1.6714058355437666, | |
| "grad_norm": 0.916270148214916, | |
| "learning_rate": 2.4420194722084438e-05, | |
| "loss": 0.6637, | |
| "num_input_tokens_seen": 1779681280, | |
| "step": 1970, | |
| "train_runtime": 261910.4255, | |
| "train_tokens_per_second": 6795.0 | |
| }, | |
| { | |
| "epoch": 1.6756498673740055, | |
| "grad_norm": 0.8052511706103775, | |
| "learning_rate": 2.4296861580680348e-05, | |
| "loss": 0.6941, | |
| "num_input_tokens_seen": 1784311040, | |
| "step": 1975, | |
| "train_runtime": 262520.958, | |
| "train_tokens_per_second": 6796.833 | |
| }, | |
| { | |
| "epoch": 1.679893899204244, | |
| "grad_norm": 0.9680550812544997, | |
| "learning_rate": 2.4173545563323745e-05, | |
| "loss": 0.6812, | |
| "num_input_tokens_seen": 1788858240, | |
| "step": 1980, | |
| "train_runtime": 263164.4906, | |
| "train_tokens_per_second": 6797.491 | |
| }, | |
| { | |
| "epoch": 1.6841379310344826, | |
| "grad_norm": 0.854153010692225, | |
| "learning_rate": 2.4050249673220394e-05, | |
| "loss": 0.6798, | |
| "num_input_tokens_seen": 1793492672, | |
| "step": 1985, | |
| "train_runtime": 263823.0171, | |
| "train_tokens_per_second": 6798.09 | |
| }, | |
| { | |
| "epoch": 1.6883819628647214, | |
| "grad_norm": 0.9065907609733993, | |
| "learning_rate": 2.3926976913085848e-05, | |
| "loss": 0.6844, | |
| "num_input_tokens_seen": 1798141312, | |
| "step": 1990, | |
| "train_runtime": 264461.4917, | |
| "train_tokens_per_second": 6799.256 | |
| }, | |
| { | |
| "epoch": 1.6926259946949602, | |
| "grad_norm": 0.964561250811086, | |
| "learning_rate": 2.3803730285072366e-05, | |
| "loss": 0.6795, | |
| "num_input_tokens_seen": 1802590528, | |
| "step": 1995, | |
| "train_runtime": 265120.6177, | |
| "train_tokens_per_second": 6799.134 | |
| }, | |
| { | |
| "epoch": 1.696870026525199, | |
| "grad_norm": 0.9694821325807637, | |
| "learning_rate": 2.3680512790695818e-05, | |
| "loss": 0.6863, | |
| "num_input_tokens_seen": 1806991488, | |
| "step": 2000, | |
| "train_runtime": 265757.2174, | |
| "train_tokens_per_second": 6799.407 | |
| }, | |
| { | |
| "epoch": 1.7011140583554378, | |
| "grad_norm": 0.8677648954151779, | |
| "learning_rate": 2.3557327430762528e-05, | |
| "loss": 0.6698, | |
| "num_input_tokens_seen": 1811461056, | |
| "step": 2005, | |
| "train_runtime": 266396.2479, | |
| "train_tokens_per_second": 6799.875 | |
| }, | |
| { | |
| "epoch": 1.7053580901856764, | |
| "grad_norm": 0.7613369253339761, | |
| "learning_rate": 2.3434177205296257e-05, | |
| "loss": 0.6613, | |
| "num_input_tokens_seen": 1816142272, | |
| "step": 2010, | |
| "train_runtime": 267054.6771, | |
| "train_tokens_per_second": 6800.638 | |
| }, | |
| { | |
| "epoch": 1.709602122015915, | |
| "grad_norm": 1.0622307149303414, | |
| "learning_rate": 2.3311065113465083e-05, | |
| "loss": 0.6602, | |
| "num_input_tokens_seen": 1820555008, | |
| "step": 2015, | |
| "train_runtime": 267688.1492, | |
| "train_tokens_per_second": 6801.03 | |
| }, | |
| { | |
| "epoch": 1.7138461538461538, | |
| "grad_norm": 0.8964492847174279, | |
| "learning_rate": 2.3187994153508397e-05, | |
| "loss": 0.658, | |
| "num_input_tokens_seen": 1825223808, | |
| "step": 2020, | |
| "train_runtime": 268333.947, | |
| "train_tokens_per_second": 6802.061 | |
| }, | |
| { | |
| "epoch": 1.7180901856763926, | |
| "grad_norm": 1.177224036456473, | |
| "learning_rate": 2.3064967322663893e-05, | |
| "loss": 0.6932, | |
| "num_input_tokens_seen": 1829789568, | |
| "step": 2025, | |
| "train_runtime": 268969.5993, | |
| "train_tokens_per_second": 6802.961 | |
| }, | |
| { | |
| "epoch": 1.7223342175066314, | |
| "grad_norm": 1.0226100260638311, | |
| "learning_rate": 2.2941987617094527e-05, | |
| "loss": 0.6721, | |
| "num_input_tokens_seen": 1834277632, | |
| "step": 2030, | |
| "train_runtime": 269616.3957, | |
| "train_tokens_per_second": 6803.287 | |
| }, | |
| { | |
| "epoch": 1.72657824933687, | |
| "grad_norm": 0.776158652036907, | |
| "learning_rate": 2.2819058031815606e-05, | |
| "loss": 0.685, | |
| "num_input_tokens_seen": 1838997504, | |
| "step": 2035, | |
| "train_runtime": 270312.8678, | |
| "train_tokens_per_second": 6803.219 | |
| }, | |
| { | |
| "epoch": 1.7308222811671088, | |
| "grad_norm": 0.8255759535290057, | |
| "learning_rate": 2.26961815606218e-05, | |
| "loss": 0.6708, | |
| "num_input_tokens_seen": 1843497088, | |
| "step": 2040, | |
| "train_runtime": 270987.9158, | |
| "train_tokens_per_second": 6802.876 | |
| }, | |
| { | |
| "epoch": 1.7350663129973474, | |
| "grad_norm": 0.9846063905318818, | |
| "learning_rate": 2.2573361196014245e-05, | |
| "loss": 0.68, | |
| "num_input_tokens_seen": 1848067968, | |
| "step": 2045, | |
| "train_runtime": 271662.3336, | |
| "train_tokens_per_second": 6802.813 | |
| }, | |
| { | |
| "epoch": 1.7393103448275862, | |
| "grad_norm": 0.8104965013143679, | |
| "learning_rate": 2.2450599929127715e-05, | |
| "loss": 0.6681, | |
| "num_input_tokens_seen": 1852536512, | |
| "step": 2050, | |
| "train_runtime": 272308.8148, | |
| "train_tokens_per_second": 6803.072 | |
| }, | |
| { | |
| "epoch": 1.743554376657825, | |
| "grad_norm": 0.7512768919734117, | |
| "learning_rate": 2.2327900749657677e-05, | |
| "loss": 0.6608, | |
| "num_input_tokens_seen": 1856969408, | |
| "step": 2055, | |
| "train_runtime": 272924.5407, | |
| "train_tokens_per_second": 6803.966 | |
| }, | |
| { | |
| "epoch": 1.7477984084880638, | |
| "grad_norm": 0.938563336869354, | |
| "learning_rate": 2.2205266645787588e-05, | |
| "loss": 0.6436, | |
| "num_input_tokens_seen": 1861364032, | |
| "step": 2060, | |
| "train_runtime": 273525.8351, | |
| "train_tokens_per_second": 6805.076 | |
| }, | |
| { | |
| "epoch": 1.7520424403183024, | |
| "grad_norm": 0.9168142161151926, | |
| "learning_rate": 2.2082700604116046e-05, | |
| "loss": 0.6734, | |
| "num_input_tokens_seen": 1866079936, | |
| "step": 2065, | |
| "train_runtime": 274238.6936, | |
| "train_tokens_per_second": 6804.583 | |
| }, | |
| { | |
| "epoch": 1.7520424403183024, | |
| "eval_loss": 0.729947566986084, | |
| "eval_runtime": 1057.9673, | |
| "eval_samples_per_second": 2.88, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 1866079936, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 1.7562864721485412, | |
| "grad_norm": 0.7781219629120125, | |
| "learning_rate": 2.1960205609584066e-05, | |
| "loss": 0.6555, | |
| "num_input_tokens_seen": 1870563904, | |
| "step": 2070, | |
| "train_runtime": 275928.5055, | |
| "train_tokens_per_second": 6779.162 | |
| }, | |
| { | |
| "epoch": 1.7605305039787797, | |
| "grad_norm": 0.8980636445872143, | |
| "learning_rate": 2.183778464540244e-05, | |
| "loss": 0.6756, | |
| "num_input_tokens_seen": 1874859840, | |
| "step": 2075, | |
| "train_runtime": 276546.0381, | |
| "train_tokens_per_second": 6779.558 | |
| }, | |
| { | |
| "epoch": 1.7647745358090186, | |
| "grad_norm": 0.8923077727556372, | |
| "learning_rate": 2.1715440692978994e-05, | |
| "loss": 0.6779, | |
| "num_input_tokens_seen": 1879558656, | |
| "step": 2080, | |
| "train_runtime": 277240.8007, | |
| "train_tokens_per_second": 6779.517 | |
| }, | |
| { | |
| "epoch": 1.7690185676392574, | |
| "grad_norm": 1.1974687941835145, | |
| "learning_rate": 2.159317673184608e-05, | |
| "loss": 0.6671, | |
| "num_input_tokens_seen": 1883979904, | |
| "step": 2085, | |
| "train_runtime": 277861.5498, | |
| "train_tokens_per_second": 6780.283 | |
| }, | |
| { | |
| "epoch": 1.7732625994694962, | |
| "grad_norm": 0.7953214594587166, | |
| "learning_rate": 2.1470995739587944e-05, | |
| "loss": 0.6731, | |
| "num_input_tokens_seen": 1888448384, | |
| "step": 2090, | |
| "train_runtime": 278500.3438, | |
| "train_tokens_per_second": 6780.776 | |
| }, | |
| { | |
| "epoch": 1.7775066312997347, | |
| "grad_norm": 0.7353368817381706, | |
| "learning_rate": 2.13489006917682e-05, | |
| "loss": 0.6567, | |
| "num_input_tokens_seen": 1892962880, | |
| "step": 2095, | |
| "train_runtime": 279180.0365, | |
| "train_tokens_per_second": 6780.438 | |
| }, | |
| { | |
| "epoch": 1.7817506631299733, | |
| "grad_norm": 0.7817976445897892, | |
| "learning_rate": 2.1226894561857447e-05, | |
| "loss": 0.6645, | |
| "num_input_tokens_seen": 1897595968, | |
| "step": 2100, | |
| "train_runtime": 279829.9966, | |
| "train_tokens_per_second": 6781.246 | |
| }, | |
| { | |
| "epoch": 1.7859946949602121, | |
| "grad_norm": 0.8309895762650132, | |
| "learning_rate": 2.1104980321160752e-05, | |
| "loss": 0.6734, | |
| "num_input_tokens_seen": 1902109888, | |
| "step": 2105, | |
| "train_runtime": 280489.1904, | |
| "train_tokens_per_second": 6781.402 | |
| }, | |
| { | |
| "epoch": 1.790238726790451, | |
| "grad_norm": 0.6339748804576945, | |
| "learning_rate": 2.0983160938745382e-05, | |
| "loss": 0.6526, | |
| "num_input_tokens_seen": 1906705216, | |
| "step": 2110, | |
| "train_runtime": 281135.8565, | |
| "train_tokens_per_second": 6782.149 | |
| }, | |
| { | |
| "epoch": 1.7944827586206897, | |
| "grad_norm": 0.8552208011108713, | |
| "learning_rate": 2.086143938136841e-05, | |
| "loss": 0.6563, | |
| "num_input_tokens_seen": 1911218304, | |
| "step": 2115, | |
| "train_runtime": 281788.5646, | |
| "train_tokens_per_second": 6782.455 | |
| }, | |
| { | |
| "epoch": 1.7987267904509285, | |
| "grad_norm": 0.9732618024212317, | |
| "learning_rate": 2.0739818613404513e-05, | |
| "loss": 0.6619, | |
| "num_input_tokens_seen": 1915723008, | |
| "step": 2120, | |
| "train_runtime": 282459.2265, | |
| "train_tokens_per_second": 6782.299 | |
| }, | |
| { | |
| "epoch": 1.8029708222811671, | |
| "grad_norm": 1.0431409571591543, | |
| "learning_rate": 2.06183015967738e-05, | |
| "loss": 0.6451, | |
| "num_input_tokens_seen": 1920464320, | |
| "step": 2125, | |
| "train_runtime": 283127.6969, | |
| "train_tokens_per_second": 6783.032 | |
| }, | |
| { | |
| "epoch": 1.8072148541114057, | |
| "grad_norm": 0.874966660194592, | |
| "learning_rate": 2.0496891290869595e-05, | |
| "loss": 0.6679, | |
| "num_input_tokens_seen": 1924942528, | |
| "step": 2130, | |
| "train_runtime": 283780.283, | |
| "train_tokens_per_second": 6783.214 | |
| }, | |
| { | |
| "epoch": 1.8114588859416445, | |
| "grad_norm": 0.7749558949940442, | |
| "learning_rate": 2.0375590652486482e-05, | |
| "loss": 0.6803, | |
| "num_input_tokens_seen": 1929745408, | |
| "step": 2135, | |
| "train_runtime": 284477.4061, | |
| "train_tokens_per_second": 6783.475 | |
| }, | |
| { | |
| "epoch": 1.8157029177718833, | |
| "grad_norm": 0.7228243017653365, | |
| "learning_rate": 2.025440263574817e-05, | |
| "loss": 0.6338, | |
| "num_input_tokens_seen": 1934284800, | |
| "step": 2140, | |
| "train_runtime": 285111.2012, | |
| "train_tokens_per_second": 6784.317 | |
| }, | |
| { | |
| "epoch": 1.819946949602122, | |
| "grad_norm": 0.7810955976714484, | |
| "learning_rate": 2.013333019203563e-05, | |
| "loss": 0.6532, | |
| "num_input_tokens_seen": 1938877184, | |
| "step": 2145, | |
| "train_runtime": 285771.2844, | |
| "train_tokens_per_second": 6784.717 | |
| }, | |
| { | |
| "epoch": 1.8241909814323607, | |
| "grad_norm": 0.8174083310669077, | |
| "learning_rate": 2.001237626991523e-05, | |
| "loss": 0.6511, | |
| "num_input_tokens_seen": 1943391872, | |
| "step": 2150, | |
| "train_runtime": 286412.497, | |
| "train_tokens_per_second": 6785.29 | |
| }, | |
| { | |
| "epoch": 1.8284350132625995, | |
| "grad_norm": 0.795144872361272, | |
| "learning_rate": 1.989154381506684e-05, | |
| "loss": 0.6598, | |
| "num_input_tokens_seen": 1947919808, | |
| "step": 2155, | |
| "train_runtime": 287052.841, | |
| "train_tokens_per_second": 6785.928 | |
| }, | |
| { | |
| "epoch": 1.832679045092838, | |
| "grad_norm": 0.7485422633125285, | |
| "learning_rate": 1.9770835770212198e-05, | |
| "loss": 0.6566, | |
| "num_input_tokens_seen": 1952470976, | |
| "step": 2160, | |
| "train_runtime": 287718.7456, | |
| "train_tokens_per_second": 6786.04 | |
| }, | |
| { | |
| "epoch": 1.8369230769230769, | |
| "grad_norm": 1.1308794437672134, | |
| "learning_rate": 1.9650255075043163e-05, | |
| "loss": 0.6559, | |
| "num_input_tokens_seen": 1957140480, | |
| "step": 2165, | |
| "train_runtime": 288394.0615, | |
| "train_tokens_per_second": 6786.341 | |
| }, | |
| { | |
| "epoch": 1.8411671087533157, | |
| "grad_norm": 0.8804300984321736, | |
| "learning_rate": 1.9529804666150157e-05, | |
| "loss": 0.6628, | |
| "num_input_tokens_seen": 1961650176, | |
| "step": 2170, | |
| "train_runtime": 289025.4848, | |
| "train_tokens_per_second": 6787.118 | |
| }, | |
| { | |
| "epoch": 1.8454111405835545, | |
| "grad_norm": 0.7255223951059633, | |
| "learning_rate": 1.940948747695066e-05, | |
| "loss": 0.6394, | |
| "num_input_tokens_seen": 1966166336, | |
| "step": 2175, | |
| "train_runtime": 289641.3116, | |
| "train_tokens_per_second": 6788.28 | |
| }, | |
| { | |
| "epoch": 1.849655172413793, | |
| "grad_norm": 0.7679331866159973, | |
| "learning_rate": 1.9289306437617734e-05, | |
| "loss": 0.6643, | |
| "num_input_tokens_seen": 1970829888, | |
| "step": 2180, | |
| "train_runtime": 290308.436, | |
| "train_tokens_per_second": 6788.745 | |
| }, | |
| { | |
| "epoch": 1.8538992042440319, | |
| "grad_norm": 1.142785592894065, | |
| "learning_rate": 1.916926447500871e-05, | |
| "loss": 0.6499, | |
| "num_input_tokens_seen": 1975190528, | |
| "step": 2185, | |
| "train_runtime": 290918.3543, | |
| "train_tokens_per_second": 6789.501 | |
| }, | |
| { | |
| "epoch": 1.8581432360742705, | |
| "grad_norm": 0.9065635652001467, | |
| "learning_rate": 1.904936451259384e-05, | |
| "loss": 0.6607, | |
| "num_input_tokens_seen": 1979864704, | |
| "step": 2190, | |
| "train_runtime": 291624.09, | |
| "train_tokens_per_second": 6789.099 | |
| }, | |
| { | |
| "epoch": 1.8623872679045093, | |
| "grad_norm": 2.2396906301840747, | |
| "learning_rate": 1.892960947038519e-05, | |
| "loss": 0.671, | |
| "num_input_tokens_seen": 1984425600, | |
| "step": 2195, | |
| "train_runtime": 292249.7481, | |
| "train_tokens_per_second": 6790.17 | |
| }, | |
| { | |
| "epoch": 1.866631299734748, | |
| "grad_norm": 0.821508733622114, | |
| "learning_rate": 1.8810002264865444e-05, | |
| "loss": 0.6556, | |
| "num_input_tokens_seen": 1988847360, | |
| "step": 2200, | |
| "train_runtime": 292879.9609, | |
| "train_tokens_per_second": 6790.657 | |
| }, | |
| { | |
| "epoch": 1.8708753315649869, | |
| "grad_norm": 0.7742604167377043, | |
| "learning_rate": 1.8690545808916908e-05, | |
| "loss": 0.6713, | |
| "num_input_tokens_seen": 1993346432, | |
| "step": 2205, | |
| "train_runtime": 293530.2207, | |
| "train_tokens_per_second": 6790.941 | |
| }, | |
| { | |
| "epoch": 1.8751193633952254, | |
| "grad_norm": 0.7780945672181276, | |
| "learning_rate": 1.8571243011750604e-05, | |
| "loss": 0.6511, | |
| "num_input_tokens_seen": 1997950144, | |
| "step": 2210, | |
| "train_runtime": 294160.5008, | |
| "train_tokens_per_second": 6792.041 | |
| }, | |
| { | |
| "epoch": 1.879363395225464, | |
| "grad_norm": 0.7883576009365799, | |
| "learning_rate": 1.8452096778835348e-05, | |
| "loss": 0.6611, | |
| "num_input_tokens_seen": 2002450688, | |
| "step": 2215, | |
| "train_runtime": 294809.6886, | |
| "train_tokens_per_second": 6792.35 | |
| }, | |
| { | |
| "epoch": 1.8836074270557028, | |
| "grad_norm": 1.3753307055301716, | |
| "learning_rate": 1.833311001182707e-05, | |
| "loss": 0.6566, | |
| "num_input_tokens_seen": 2006911360, | |
| "step": 2220, | |
| "train_runtime": 295455.3095, | |
| "train_tokens_per_second": 6792.605 | |
| }, | |
| { | |
| "epoch": 1.8878514588859416, | |
| "grad_norm": 0.7211010767842578, | |
| "learning_rate": 1.821428560849809e-05, | |
| "loss": 0.6493, | |
| "num_input_tokens_seen": 2011488384, | |
| "step": 2225, | |
| "train_runtime": 296132.4569, | |
| "train_tokens_per_second": 6792.529 | |
| }, | |
| { | |
| "epoch": 1.8920954907161804, | |
| "grad_norm": 0.7499760395080998, | |
| "learning_rate": 1.8095626462666548e-05, | |
| "loss": 0.6688, | |
| "num_input_tokens_seen": 2016013248, | |
| "step": 2230, | |
| "train_runtime": 296757.4534, | |
| "train_tokens_per_second": 6793.471 | |
| }, | |
| { | |
| "epoch": 1.8963395225464192, | |
| "grad_norm": 0.7281088629603852, | |
| "learning_rate": 1.797713546412598e-05, | |
| "loss": 0.6691, | |
| "num_input_tokens_seen": 2020582592, | |
| "step": 2235, | |
| "train_runtime": 297412.3677, | |
| "train_tokens_per_second": 6793.875 | |
| }, | |
| { | |
| "epoch": 1.9005835543766578, | |
| "grad_norm": 0.6418455823156107, | |
| "learning_rate": 1.78588154985749e-05, | |
| "loss": 0.6638, | |
| "num_input_tokens_seen": 2025185600, | |
| "step": 2240, | |
| "train_runtime": 298049.559, | |
| "train_tokens_per_second": 6794.795 | |
| }, | |
| { | |
| "epoch": 1.9048275862068964, | |
| "grad_norm": 0.8741945049532132, | |
| "learning_rate": 1.7740669447546513e-05, | |
| "loss": 0.6691, | |
| "num_input_tokens_seen": 2029829952, | |
| "step": 2245, | |
| "train_runtime": 298722.6372, | |
| "train_tokens_per_second": 6795.032 | |
| }, | |
| { | |
| "epoch": 1.9090716180371352, | |
| "grad_norm": 0.841478880460149, | |
| "learning_rate": 1.762270018833857e-05, | |
| "loss": 0.6789, | |
| "num_input_tokens_seen": 2034547456, | |
| "step": 2250, | |
| "train_runtime": 299449.1091, | |
| "train_tokens_per_second": 6794.301 | |
| }, | |
| { | |
| "epoch": 1.913315649867374, | |
| "grad_norm": 0.8723647428014106, | |
| "learning_rate": 1.7504910593943267e-05, | |
| "loss": 0.6579, | |
| "num_input_tokens_seen": 2039144640, | |
| "step": 2255, | |
| "train_runtime": 300085.9181, | |
| "train_tokens_per_second": 6795.203 | |
| }, | |
| { | |
| "epoch": 1.9175596816976128, | |
| "grad_norm": 0.7892235650709017, | |
| "learning_rate": 1.738730353297732e-05, | |
| "loss": 0.6824, | |
| "num_input_tokens_seen": 2043803392, | |
| "step": 2260, | |
| "train_runtime": 300788.7082, | |
| "train_tokens_per_second": 6794.814 | |
| }, | |
| { | |
| "epoch": 1.9218037135278516, | |
| "grad_norm": 0.7722394088543671, | |
| "learning_rate": 1.726988186961202e-05, | |
| "loss": 0.6557, | |
| "num_input_tokens_seen": 2048335872, | |
| "step": 2265, | |
| "train_runtime": 301420.1009, | |
| "train_tokens_per_second": 6795.618 | |
| }, | |
| { | |
| "epoch": 1.9260477453580902, | |
| "grad_norm": 0.922769500095772, | |
| "learning_rate": 1.7152648463503605e-05, | |
| "loss": 0.6614, | |
| "num_input_tokens_seen": 2053131840, | |
| "step": 2270, | |
| "train_runtime": 302141.3095, | |
| "train_tokens_per_second": 6795.27 | |
| }, | |
| { | |
| "epoch": 1.9302917771883288, | |
| "grad_norm": 0.7824828630386448, | |
| "learning_rate": 1.7035606169723488e-05, | |
| "loss": 0.6478, | |
| "num_input_tokens_seen": 2057792768, | |
| "step": 2275, | |
| "train_runtime": 302806.6263, | |
| "train_tokens_per_second": 6795.732 | |
| }, | |
| { | |
| "epoch": 1.9345358090185676, | |
| "grad_norm": 0.8688413702016398, | |
| "learning_rate": 1.69187578386888e-05, | |
| "loss": 0.6524, | |
| "num_input_tokens_seen": 2062355392, | |
| "step": 2280, | |
| "train_runtime": 303443.8425, | |
| "train_tokens_per_second": 6796.498 | |
| }, | |
| { | |
| "epoch": 1.9387798408488064, | |
| "grad_norm": 0.9595105345229777, | |
| "learning_rate": 1.6802106316092966e-05, | |
| "loss": 0.6603, | |
| "num_input_tokens_seen": 2066871424, | |
| "step": 2285, | |
| "train_runtime": 304131.5685, | |
| "train_tokens_per_second": 6795.978 | |
| }, | |
| { | |
| "epoch": 1.9430238726790452, | |
| "grad_norm": 0.8712664797281483, | |
| "learning_rate": 1.6685654442836373e-05, | |
| "loss": 0.6587, | |
| "num_input_tokens_seen": 2071492864, | |
| "step": 2290, | |
| "train_runtime": 304825.7013, | |
| "train_tokens_per_second": 6795.663 | |
| }, | |
| { | |
| "epoch": 1.9472679045092838, | |
| "grad_norm": 0.7732671706732043, | |
| "learning_rate": 1.656940505495722e-05, | |
| "loss": 0.6524, | |
| "num_input_tokens_seen": 2075990976, | |
| "step": 2295, | |
| "train_runtime": 305476.6188, | |
| "train_tokens_per_second": 6795.908 | |
| }, | |
| { | |
| "epoch": 1.9515119363395226, | |
| "grad_norm": 0.8870290841303411, | |
| "learning_rate": 1.645336098356242e-05, | |
| "loss": 0.6405, | |
| "num_input_tokens_seen": 2080441856, | |
| "step": 2300, | |
| "train_runtime": 306111.3026, | |
| "train_tokens_per_second": 6796.358 | |
| }, | |
| { | |
| "epoch": 1.9557559681697612, | |
| "grad_norm": 0.8458061170360918, | |
| "learning_rate": 1.633752505475864e-05, | |
| "loss": 0.6634, | |
| "num_input_tokens_seen": 2085189888, | |
| "step": 2305, | |
| "train_runtime": 306781.713, | |
| "train_tokens_per_second": 6796.982 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.3279259324289545, | |
| "learning_rate": 1.622190008958354e-05, | |
| "loss": 0.6473, | |
| "num_input_tokens_seen": 2089579008, | |
| "step": 2310, | |
| "train_runtime": 307390.5024, | |
| "train_tokens_per_second": 6797.8 | |
| }, | |
| { | |
| "epoch": 1.9642440318302388, | |
| "grad_norm": 0.9118221582118621, | |
| "learning_rate": 1.610648890393701e-05, | |
| "loss": 0.6665, | |
| "num_input_tokens_seen": 2094280256, | |
| "step": 2315, | |
| "train_runtime": 308079.9412, | |
| "train_tokens_per_second": 6797.847 | |
| }, | |
| { | |
| "epoch": 1.9684880636604776, | |
| "grad_norm": 0.8711258127478657, | |
| "learning_rate": 1.5991294308512595e-05, | |
| "loss": 0.6587, | |
| "num_input_tokens_seen": 2098990464, | |
| "step": 2320, | |
| "train_runtime": 308818.5174, | |
| "train_tokens_per_second": 6796.841 | |
| }, | |
| { | |
| "epoch": 1.9727320954907162, | |
| "grad_norm": 0.7184830235566304, | |
| "learning_rate": 1.5876319108729077e-05, | |
| "loss": 0.6661, | |
| "num_input_tokens_seen": 2103407872, | |
| "step": 2325, | |
| "train_runtime": 309463.3706, | |
| "train_tokens_per_second": 6796.953 | |
| }, | |
| { | |
| "epoch": 1.976976127320955, | |
| "grad_norm": 0.7747189164106947, | |
| "learning_rate": 1.5761566104662117e-05, | |
| "loss": 0.6518, | |
| "num_input_tokens_seen": 2107807168, | |
| "step": 2330, | |
| "train_runtime": 310069.0252, | |
| "train_tokens_per_second": 6797.864 | |
| }, | |
| { | |
| "epoch": 1.9812201591511935, | |
| "grad_norm": 0.878008091110606, | |
| "learning_rate": 1.5647038090976114e-05, | |
| "loss": 0.6593, | |
| "num_input_tokens_seen": 2112190016, | |
| "step": 2335, | |
| "train_runtime": 310683.7408, | |
| "train_tokens_per_second": 6798.521 | |
| }, | |
| { | |
| "epoch": 1.9854641909814323, | |
| "grad_norm": 0.7138571628786999, | |
| "learning_rate": 1.5532737856856062e-05, | |
| "loss": 0.6507, | |
| "num_input_tokens_seen": 2116487360, | |
| "step": 2340, | |
| "train_runtime": 311271.0869, | |
| "train_tokens_per_second": 6799.499 | |
| }, | |
| { | |
| "epoch": 1.9897082228116711, | |
| "grad_norm": 0.873326228404246, | |
| "learning_rate": 1.5418668185939715e-05, | |
| "loss": 0.6422, | |
| "num_input_tokens_seen": 2120920256, | |
| "step": 2345, | |
| "train_runtime": 311893.1375, | |
| "train_tokens_per_second": 6800.15 | |
| }, | |
| { | |
| "epoch": 1.99395225464191, | |
| "grad_norm": 0.6793405806925589, | |
| "learning_rate": 1.530483185624973e-05, | |
| "loss": 0.6492, | |
| "num_input_tokens_seen": 2125213056, | |
| "step": 2350, | |
| "train_runtime": 312486.8653, | |
| "train_tokens_per_second": 6800.968 | |
| }, | |
| { | |
| "epoch": 1.9981962864721485, | |
| "grad_norm": 0.7840816343012773, | |
| "learning_rate": 1.519123164012603e-05, | |
| "loss": 0.6551, | |
| "num_input_tokens_seen": 2129589248, | |
| "step": 2355, | |
| "train_runtime": 313085.9064, | |
| "train_tokens_per_second": 6801.933 | |
| }, | |
| { | |
| "epoch": 2.0016976127320953, | |
| "grad_norm": 0.7056893265010777, | |
| "learning_rate": 1.507787030415831e-05, | |
| "loss": 0.4932, | |
| "num_input_tokens_seen": 2133444224, | |
| "step": 2360, | |
| "train_runtime": 313640.7613, | |
| "train_tokens_per_second": 6802.191 | |
| }, | |
| { | |
| "epoch": 2.0016976127320953, | |
| "eval_loss": 0.71119624376297, | |
| "eval_runtime": 1056.465, | |
| "eval_samples_per_second": 2.884, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 2133444224, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.005941644562334, | |
| "grad_norm": 0.8558740917942909, | |
| "learning_rate": 1.4964750609118614e-05, | |
| "loss": 0.5706, | |
| "num_input_tokens_seen": 2138178304, | |
| "step": 2365, | |
| "train_runtime": 315463.138, | |
| "train_tokens_per_second": 6777.902 | |
| }, | |
| { | |
| "epoch": 2.010185676392573, | |
| "grad_norm": 0.8240864942611333, | |
| "learning_rate": 1.4851875309894159e-05, | |
| "loss": 0.5672, | |
| "num_input_tokens_seen": 2142597568, | |
| "step": 2370, | |
| "train_runtime": 316074.4747, | |
| "train_tokens_per_second": 6778.774 | |
| }, | |
| { | |
| "epoch": 2.0144297082228118, | |
| "grad_norm": 0.9448165249761411, | |
| "learning_rate": 1.4739247155420183e-05, | |
| "loss": 0.5481, | |
| "num_input_tokens_seen": 2147211968, | |
| "step": 2375, | |
| "train_runtime": 316715.0106, | |
| "train_tokens_per_second": 6779.634 | |
| }, | |
| { | |
| "epoch": 2.0186737400530506, | |
| "grad_norm": 0.8633716944676363, | |
| "learning_rate": 1.4626868888613027e-05, | |
| "loss": 0.5397, | |
| "num_input_tokens_seen": 2151752896, | |
| "step": 2380, | |
| "train_runtime": 317376.0878, | |
| "train_tokens_per_second": 6779.82 | |
| }, | |
| { | |
| "epoch": 2.022917771883289, | |
| "grad_norm": 0.809745604875603, | |
| "learning_rate": 1.4514743246303359e-05, | |
| "loss": 0.5531, | |
| "num_input_tokens_seen": 2156288704, | |
| "step": 2385, | |
| "train_runtime": 318045.818, | |
| "train_tokens_per_second": 6779.805 | |
| }, | |
| { | |
| "epoch": 2.0271618037135277, | |
| "grad_norm": 0.8968293184086553, | |
| "learning_rate": 1.4402872959169461e-05, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 2160913088, | |
| "step": 2390, | |
| "train_runtime": 318702.9313, | |
| "train_tokens_per_second": 6780.336 | |
| }, | |
| { | |
| "epoch": 2.0314058355437665, | |
| "grad_norm": 0.9550756849016048, | |
| "learning_rate": 1.4291260751670816e-05, | |
| "loss": 0.5366, | |
| "num_input_tokens_seen": 2165574976, | |
| "step": 2395, | |
| "train_runtime": 319403.5503, | |
| "train_tokens_per_second": 6780.059 | |
| }, | |
| { | |
| "epoch": 2.0356498673740053, | |
| "grad_norm": 1.207409378421572, | |
| "learning_rate": 1.4179909341981625e-05, | |
| "loss": 0.5345, | |
| "num_input_tokens_seen": 2170092736, | |
| "step": 2400, | |
| "train_runtime": 320033.1824, | |
| "train_tokens_per_second": 6780.837 | |
| }, | |
| { | |
| "epoch": 2.039893899204244, | |
| "grad_norm": 0.8757416767488818, | |
| "learning_rate": 1.4068821441924779e-05, | |
| "loss": 0.5715, | |
| "num_input_tokens_seen": 2174494400, | |
| "step": 2405, | |
| "train_runtime": 320671.8092, | |
| "train_tokens_per_second": 6781.059 | |
| }, | |
| { | |
| "epoch": 2.044137931034483, | |
| "grad_norm": 0.8349852718141944, | |
| "learning_rate": 1.3957999756905643e-05, | |
| "loss": 0.5607, | |
| "num_input_tokens_seen": 2178937728, | |
| "step": 2410, | |
| "train_runtime": 321299.9151, | |
| "train_tokens_per_second": 6781.632 | |
| }, | |
| { | |
| "epoch": 2.0483819628647213, | |
| "grad_norm": 0.8313527873747903, | |
| "learning_rate": 1.3847446985846297e-05, | |
| "loss": 0.5364, | |
| "num_input_tokens_seen": 2183459520, | |
| "step": 2415, | |
| "train_runtime": 321952.4508, | |
| "train_tokens_per_second": 6781.932 | |
| }, | |
| { | |
| "epoch": 2.05262599469496, | |
| "grad_norm": 1.5262840043879295, | |
| "learning_rate": 1.3737165821119752e-05, | |
| "loss": 0.5404, | |
| "num_input_tokens_seen": 2187827712, | |
| "step": 2420, | |
| "train_runtime": 322540.9554, | |
| "train_tokens_per_second": 6783.1 | |
| }, | |
| { | |
| "epoch": 2.056870026525199, | |
| "grad_norm": 2.1756261698457076, | |
| "learning_rate": 1.3627158948484391e-05, | |
| "loss": 0.5469, | |
| "num_input_tokens_seen": 2192377216, | |
| "step": 2425, | |
| "train_runtime": 323190.2135, | |
| "train_tokens_per_second": 6783.551 | |
| }, | |
| { | |
| "epoch": 2.0611140583554377, | |
| "grad_norm": 1.0913830108446643, | |
| "learning_rate": 1.351742904701856e-05, | |
| "loss": 0.5683, | |
| "num_input_tokens_seen": 2196995328, | |
| "step": 2430, | |
| "train_runtime": 323877.7351, | |
| "train_tokens_per_second": 6783.41 | |
| }, | |
| { | |
| "epoch": 2.0653580901856765, | |
| "grad_norm": 0.9641578470507003, | |
| "learning_rate": 1.3407978789055311e-05, | |
| "loss": 0.551, | |
| "num_input_tokens_seen": 2201593728, | |
| "step": 2435, | |
| "train_runtime": 324546.3078, | |
| "train_tokens_per_second": 6783.604 | |
| }, | |
| { | |
| "epoch": 2.0696021220159153, | |
| "grad_norm": 0.79070536279513, | |
| "learning_rate": 1.3298810840117348e-05, | |
| "loss": 0.5296, | |
| "num_input_tokens_seen": 2206299712, | |
| "step": 2440, | |
| "train_runtime": 325212.3944, | |
| "train_tokens_per_second": 6784.181 | |
| }, | |
| { | |
| "epoch": 2.0738461538461537, | |
| "grad_norm": 0.9950759732472904, | |
| "learning_rate": 1.3189927858852092e-05, | |
| "loss": 0.5623, | |
| "num_input_tokens_seen": 2210768256, | |
| "step": 2445, | |
| "train_runtime": 325832.427, | |
| "train_tokens_per_second": 6784.985 | |
| }, | |
| { | |
| "epoch": 2.0780901856763925, | |
| "grad_norm": 1.0145751132111058, | |
| "learning_rate": 1.3081332496966923e-05, | |
| "loss": 0.5454, | |
| "num_input_tokens_seen": 2215064064, | |
| "step": 2450, | |
| "train_runtime": 326434.2676, | |
| "train_tokens_per_second": 6785.636 | |
| }, | |
| { | |
| "epoch": 2.0823342175066313, | |
| "grad_norm": 1.2823230060644373, | |
| "learning_rate": 1.297302739916463e-05, | |
| "loss": 0.5435, | |
| "num_input_tokens_seen": 2219600896, | |
| "step": 2455, | |
| "train_runtime": 327106.8846, | |
| "train_tokens_per_second": 6785.552 | |
| }, | |
| { | |
| "epoch": 2.08657824933687, | |
| "grad_norm": 0.9419195367761739, | |
| "learning_rate": 1.2865015203078996e-05, | |
| "loss": 0.5445, | |
| "num_input_tokens_seen": 2224140416, | |
| "step": 2460, | |
| "train_runtime": 327852.8283, | |
| "train_tokens_per_second": 6783.96 | |
| }, | |
| { | |
| "epoch": 2.090822281167109, | |
| "grad_norm": 1.125875379008506, | |
| "learning_rate": 1.27572985392105e-05, | |
| "loss": 0.5443, | |
| "num_input_tokens_seen": 2228717248, | |
| "step": 2465, | |
| "train_runtime": 328535.4625, | |
| "train_tokens_per_second": 6783.795 | |
| }, | |
| { | |
| "epoch": 2.0950663129973477, | |
| "grad_norm": 1.1324409125579475, | |
| "learning_rate": 1.2649880030862393e-05, | |
| "loss": 0.5599, | |
| "num_input_tokens_seen": 2233320128, | |
| "step": 2470, | |
| "train_runtime": 329151.185, | |
| "train_tokens_per_second": 6785.089 | |
| }, | |
| { | |
| "epoch": 2.099310344827586, | |
| "grad_norm": 0.931872300199955, | |
| "learning_rate": 1.2542762294076631e-05, | |
| "loss": 0.5637, | |
| "num_input_tokens_seen": 2237752384, | |
| "step": 2475, | |
| "train_runtime": 329773.7972, | |
| "train_tokens_per_second": 6785.719 | |
| }, | |
| { | |
| "epoch": 2.103554376657825, | |
| "grad_norm": 0.8489286981286124, | |
| "learning_rate": 1.2435947937570355e-05, | |
| "loss": 0.5598, | |
| "num_input_tokens_seen": 2242141568, | |
| "step": 2480, | |
| "train_runtime": 330402.9852, | |
| "train_tokens_per_second": 6786.081 | |
| }, | |
| { | |
| "epoch": 2.1077984084880637, | |
| "grad_norm": 1.1131489144682933, | |
| "learning_rate": 1.2329439562672178e-05, | |
| "loss": 0.5418, | |
| "num_input_tokens_seen": 2246654592, | |
| "step": 2485, | |
| "train_runtime": 331056.83, | |
| "train_tokens_per_second": 6786.311 | |
| }, | |
| { | |
| "epoch": 2.1120424403183025, | |
| "grad_norm": 0.9322580519781613, | |
| "learning_rate": 1.2223239763258965e-05, | |
| "loss": 0.5505, | |
| "num_input_tokens_seen": 2251247168, | |
| "step": 2490, | |
| "train_runtime": 331724.102, | |
| "train_tokens_per_second": 6786.505 | |
| }, | |
| { | |
| "epoch": 2.1162864721485413, | |
| "grad_norm": 0.9283803102424425, | |
| "learning_rate": 1.2117351125692603e-05, | |
| "loss": 0.5568, | |
| "num_input_tokens_seen": 2255680768, | |
| "step": 2495, | |
| "train_runtime": 332394.4276, | |
| "train_tokens_per_second": 6786.157 | |
| }, | |
| { | |
| "epoch": 2.12053050397878, | |
| "grad_norm": 1.1047391998064584, | |
| "learning_rate": 1.2011776228757024e-05, | |
| "loss": 0.5505, | |
| "num_input_tokens_seen": 2260087168, | |
| "step": 2500, | |
| "train_runtime": 333031.3463, | |
| "train_tokens_per_second": 6786.41 | |
| }, | |
| { | |
| "epoch": 2.1247745358090184, | |
| "grad_norm": 0.975091099261222, | |
| "learning_rate": 1.1906517643595408e-05, | |
| "loss": 0.5573, | |
| "num_input_tokens_seen": 2264578560, | |
| "step": 2505, | |
| "train_runtime": 333699.72, | |
| "train_tokens_per_second": 6786.276 | |
| }, | |
| { | |
| "epoch": 2.1290185676392572, | |
| "grad_norm": 1.1511567847202058, | |
| "learning_rate": 1.180157793364756e-05, | |
| "loss": 0.5413, | |
| "num_input_tokens_seen": 2269041472, | |
| "step": 2510, | |
| "train_runtime": 334339.6624, | |
| "train_tokens_per_second": 6786.636 | |
| }, | |
| { | |
| "epoch": 2.133262599469496, | |
| "grad_norm": 1.0466460806645501, | |
| "learning_rate": 1.1696959654587474e-05, | |
| "loss": 0.5493, | |
| "num_input_tokens_seen": 2273598720, | |
| "step": 2515, | |
| "train_runtime": 334997.2489, | |
| "train_tokens_per_second": 6786.918 | |
| }, | |
| { | |
| "epoch": 2.137506631299735, | |
| "grad_norm": 1.0291472253443341, | |
| "learning_rate": 1.1592665354261118e-05, | |
| "loss": 0.5456, | |
| "num_input_tokens_seen": 2278146944, | |
| "step": 2520, | |
| "train_runtime": 335632.7848, | |
| "train_tokens_per_second": 6787.617 | |
| }, | |
| { | |
| "epoch": 2.1417506631299736, | |
| "grad_norm": 1.0091794838431885, | |
| "learning_rate": 1.1488697572624351e-05, | |
| "loss": 0.5668, | |
| "num_input_tokens_seen": 2282573568, | |
| "step": 2525, | |
| "train_runtime": 336253.6573, | |
| "train_tokens_per_second": 6788.249 | |
| }, | |
| { | |
| "epoch": 2.145994694960212, | |
| "grad_norm": 0.85992217862161, | |
| "learning_rate": 1.138505884168109e-05, | |
| "loss": 0.5308, | |
| "num_input_tokens_seen": 2287001600, | |
| "step": 2530, | |
| "train_runtime": 336902.3826, | |
| "train_tokens_per_second": 6788.321 | |
| }, | |
| { | |
| "epoch": 2.150238726790451, | |
| "grad_norm": 0.8608747847941026, | |
| "learning_rate": 1.1281751685421646e-05, | |
| "loss": 0.5605, | |
| "num_input_tokens_seen": 2291406080, | |
| "step": 2535, | |
| "train_runtime": 337530.1254, | |
| "train_tokens_per_second": 6788.745 | |
| }, | |
| { | |
| "epoch": 2.1544827586206896, | |
| "grad_norm": 1.0839910126914474, | |
| "learning_rate": 1.1178778619761209e-05, | |
| "loss": 0.5507, | |
| "num_input_tokens_seen": 2295897472, | |
| "step": 2540, | |
| "train_runtime": 338146.0422, | |
| "train_tokens_per_second": 6789.662 | |
| }, | |
| { | |
| "epoch": 2.1587267904509284, | |
| "grad_norm": 0.8806247078251732, | |
| "learning_rate": 1.1076142152478686e-05, | |
| "loss": 0.5449, | |
| "num_input_tokens_seen": 2300505152, | |
| "step": 2545, | |
| "train_runtime": 338818.075, | |
| "train_tokens_per_second": 6789.795 | |
| }, | |
| { | |
| "epoch": 2.162970822281167, | |
| "grad_norm": 1.1516459030706268, | |
| "learning_rate": 1.0973844783155474e-05, | |
| "loss": 0.5267, | |
| "num_input_tokens_seen": 2304838976, | |
| "step": 2550, | |
| "train_runtime": 339432.6849, | |
| "train_tokens_per_second": 6790.268 | |
| }, | |
| { | |
| "epoch": 2.167214854111406, | |
| "grad_norm": 0.8899542374126376, | |
| "learning_rate": 1.0871889003114743e-05, | |
| "loss": 0.5415, | |
| "num_input_tokens_seen": 2309598144, | |
| "step": 2555, | |
| "train_runtime": 340127.2354, | |
| "train_tokens_per_second": 6790.395 | |
| }, | |
| { | |
| "epoch": 2.1714588859416444, | |
| "grad_norm": 0.9275051879320549, | |
| "learning_rate": 1.0770277295360629e-05, | |
| "loss": 0.535, | |
| "num_input_tokens_seen": 2314332800, | |
| "step": 2560, | |
| "train_runtime": 340820.4477, | |
| "train_tokens_per_second": 6790.475 | |
| }, | |
| { | |
| "epoch": 2.175702917771883, | |
| "grad_norm": 1.0929509877136006, | |
| "learning_rate": 1.066901213451785e-05, | |
| "loss": 0.5407, | |
| "num_input_tokens_seen": 2318735744, | |
| "step": 2565, | |
| "train_runtime": 341455.108, | |
| "train_tokens_per_second": 6790.748 | |
| }, | |
| { | |
| "epoch": 2.179946949602122, | |
| "grad_norm": 1.2365680582016083, | |
| "learning_rate": 1.0568095986771414e-05, | |
| "loss": 0.5256, | |
| "num_input_tokens_seen": 2323017216, | |
| "step": 2570, | |
| "train_runtime": 342078.7972, | |
| "train_tokens_per_second": 6790.883 | |
| }, | |
| { | |
| "epoch": 2.184190981432361, | |
| "grad_norm": 0.9117201092121693, | |
| "learning_rate": 1.0467531309806547e-05, | |
| "loss": 0.5471, | |
| "num_input_tokens_seen": 2327511360, | |
| "step": 2575, | |
| "train_runtime": 342733.0181, | |
| "train_tokens_per_second": 6791.033 | |
| }, | |
| { | |
| "epoch": 2.1884350132625996, | |
| "grad_norm": 0.9389916789629429, | |
| "learning_rate": 1.0367320552748849e-05, | |
| "loss": 0.533, | |
| "num_input_tokens_seen": 2332033792, | |
| "step": 2580, | |
| "train_runtime": 343352.1052, | |
| "train_tokens_per_second": 6791.96 | |
| }, | |
| { | |
| "epoch": 2.1926790450928384, | |
| "grad_norm": 0.9268628668955959, | |
| "learning_rate": 1.0267466156104655e-05, | |
| "loss": 0.5493, | |
| "num_input_tokens_seen": 2336623744, | |
| "step": 2585, | |
| "train_runtime": 344011.0131, | |
| "train_tokens_per_second": 6792.293 | |
| }, | |
| { | |
| "epoch": 2.1969230769230768, | |
| "grad_norm": 0.9632958346283522, | |
| "learning_rate": 1.0167970551701586e-05, | |
| "loss": 0.5585, | |
| "num_input_tokens_seen": 2341059904, | |
| "step": 2590, | |
| "train_runtime": 344653.4787, | |
| "train_tokens_per_second": 6792.503 | |
| }, | |
| { | |
| "epoch": 2.2011671087533156, | |
| "grad_norm": 0.9858607973372651, | |
| "learning_rate": 1.0068836162629333e-05, | |
| "loss": 0.551, | |
| "num_input_tokens_seen": 2345544192, | |
| "step": 2595, | |
| "train_runtime": 345280.1341, | |
| "train_tokens_per_second": 6793.163 | |
| }, | |
| { | |
| "epoch": 2.2054111405835544, | |
| "grad_norm": 0.9219599125064627, | |
| "learning_rate": 9.970065403180648e-06, | |
| "loss": 0.5456, | |
| "num_input_tokens_seen": 2350091328, | |
| "step": 2600, | |
| "train_runtime": 345944.8214, | |
| "train_tokens_per_second": 6793.255 | |
| }, | |
| { | |
| "epoch": 2.209655172413793, | |
| "grad_norm": 1.0056863158147027, | |
| "learning_rate": 9.871660678792532e-06, | |
| "loss": 0.5573, | |
| "num_input_tokens_seen": 2354507008, | |
| "step": 2605, | |
| "train_runtime": 346586.1214, | |
| "train_tokens_per_second": 6793.426 | |
| }, | |
| { | |
| "epoch": 2.213899204244032, | |
| "grad_norm": 0.805746716680293, | |
| "learning_rate": 9.77362438598769e-06, | |
| "loss": 0.5316, | |
| "num_input_tokens_seen": 2358953152, | |
| "step": 2610, | |
| "train_runtime": 347186.5132, | |
| "train_tokens_per_second": 6794.484 | |
| }, | |
| { | |
| "epoch": 2.2181432360742708, | |
| "grad_norm": 1.0149168469719647, | |
| "learning_rate": 9.675958912316091e-06, | |
| "loss": 0.5582, | |
| "num_input_tokens_seen": 2363513408, | |
| "step": 2615, | |
| "train_runtime": 347848.8662, | |
| "train_tokens_per_second": 6794.656 | |
| }, | |
| { | |
| "epoch": 2.222387267904509, | |
| "grad_norm": 0.9339666893590254, | |
| "learning_rate": 9.578666636296946e-06, | |
| "loss": 0.5468, | |
| "num_input_tokens_seen": 2368091328, | |
| "step": 2620, | |
| "train_runtime": 348511.7283, | |
| "train_tokens_per_second": 6794.868 | |
| }, | |
| { | |
| "epoch": 2.226631299734748, | |
| "grad_norm": 0.8237811196852415, | |
| "learning_rate": 9.481749927360627e-06, | |
| "loss": 0.5219, | |
| "num_input_tokens_seen": 2372630144, | |
| "step": 2625, | |
| "train_runtime": 349198.393, | |
| "train_tokens_per_second": 6794.505 | |
| }, | |
| { | |
| "epoch": 2.2308753315649867, | |
| "grad_norm": 0.9312360048745897, | |
| "learning_rate": 9.385211145791126e-06, | |
| "loss": 0.5316, | |
| "num_input_tokens_seen": 2377249792, | |
| "step": 2630, | |
| "train_runtime": 349861.9157, | |
| "train_tokens_per_second": 6794.823 | |
| }, | |
| { | |
| "epoch": 2.2351193633952255, | |
| "grad_norm": 1.0941579207092957, | |
| "learning_rate": 9.289052642668416e-06, | |
| "loss": 0.5307, | |
| "num_input_tokens_seen": 2381752576, | |
| "step": 2635, | |
| "train_runtime": 350477.7588, | |
| "train_tokens_per_second": 6795.731 | |
| }, | |
| { | |
| "epoch": 2.2393633952254643, | |
| "grad_norm": 1.054157711767946, | |
| "learning_rate": 9.193276759811339e-06, | |
| "loss": 0.5608, | |
| "num_input_tokens_seen": 2386147072, | |
| "step": 2640, | |
| "train_runtime": 351111.8395, | |
| "train_tokens_per_second": 6795.974 | |
| }, | |
| { | |
| "epoch": 2.2436074270557027, | |
| "grad_norm": 0.8910120445576349, | |
| "learning_rate": 9.097885829720443e-06, | |
| "loss": 0.5412, | |
| "num_input_tokens_seen": 2390898880, | |
| "step": 2645, | |
| "train_runtime": 351810.8417, | |
| "train_tokens_per_second": 6795.978 | |
| }, | |
| { | |
| "epoch": 2.2478514588859415, | |
| "grad_norm": 1.2007055994992777, | |
| "learning_rate": 9.002882175521272e-06, | |
| "loss": 0.5399, | |
| "num_input_tokens_seen": 2395503232, | |
| "step": 2650, | |
| "train_runtime": 352435.7792, | |
| "train_tokens_per_second": 6796.992 | |
| }, | |
| { | |
| "epoch": 2.2520954907161803, | |
| "grad_norm": 1.1196939855484749, | |
| "learning_rate": 8.90826811090775e-06, | |
| "loss": 0.5167, | |
| "num_input_tokens_seen": 2399908928, | |
| "step": 2655, | |
| "train_runtime": 353060.1062, | |
| "train_tokens_per_second": 6797.451 | |
| }, | |
| { | |
| "epoch": 2.2520954907161803, | |
| "eval_loss": 0.7378480434417725, | |
| "eval_runtime": 1056.8523, | |
| "eval_samples_per_second": 2.883, | |
| "eval_steps_per_second": 0.091, | |
| "num_input_tokens_seen": 2399908928, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 2.256339522546419, | |
| "grad_norm": 1.2536339396950835, | |
| "learning_rate": 8.814045940085832e-06, | |
| "loss": 0.5282, | |
| "num_input_tokens_seen": 2404340864, | |
| "step": 2660, | |
| "train_runtime": 354734.293, | |
| "train_tokens_per_second": 6777.864 | |
| }, | |
| { | |
| "epoch": 2.260583554376658, | |
| "grad_norm": 1.0188614881940494, | |
| "learning_rate": 8.720217957717409e-06, | |
| "loss": 0.5746, | |
| "num_input_tokens_seen": 2408905408, | |
| "step": 2665, | |
| "train_runtime": 355379.4662, | |
| "train_tokens_per_second": 6778.403 | |
| }, | |
| { | |
| "epoch": 2.2648275862068967, | |
| "grad_norm": 0.8497257096281086, | |
| "learning_rate": 8.62678644886439e-06, | |
| "loss": 0.5434, | |
| "num_input_tokens_seen": 2413171200, | |
| "step": 2670, | |
| "train_runtime": 355989.2119, | |
| "train_tokens_per_second": 6778.776 | |
| }, | |
| { | |
| "epoch": 2.269071618037135, | |
| "grad_norm": 0.9253941979058675, | |
| "learning_rate": 8.533753688933093e-06, | |
| "loss": 0.5716, | |
| "num_input_tokens_seen": 2417806976, | |
| "step": 2675, | |
| "train_runtime": 356663.8352, | |
| "train_tokens_per_second": 6778.952 | |
| }, | |
| { | |
| "epoch": 2.273315649867374, | |
| "grad_norm": 0.7919650133709085, | |
| "learning_rate": 8.441121943618797e-06, | |
| "loss": 0.5217, | |
| "num_input_tokens_seen": 2422469504, | |
| "step": 2680, | |
| "train_runtime": 357398.8789, | |
| "train_tokens_per_second": 6778.056 | |
| }, | |
| { | |
| "epoch": 2.2775596816976127, | |
| "grad_norm": 0.9826186100423545, | |
| "learning_rate": 8.34889346885058e-06, | |
| "loss": 0.537, | |
| "num_input_tokens_seen": 2427001408, | |
| "step": 2685, | |
| "train_runtime": 358060.275, | |
| "train_tokens_per_second": 6778.192 | |
| }, | |
| { | |
| "epoch": 2.2818037135278515, | |
| "grad_norm": 0.9270721696870025, | |
| "learning_rate": 8.257070510736375e-06, | |
| "loss": 0.5473, | |
| "num_input_tokens_seen": 2431585984, | |
| "step": 2690, | |
| "train_runtime": 358720.1033, | |
| "train_tokens_per_second": 6778.505 | |
| }, | |
| { | |
| "epoch": 2.2860477453580903, | |
| "grad_norm": 1.0463399115668766, | |
| "learning_rate": 8.165655305508283e-06, | |
| "loss": 0.5199, | |
| "num_input_tokens_seen": 2436195008, | |
| "step": 2695, | |
| "train_runtime": 359371.4989, | |
| "train_tokens_per_second": 6779.043 | |
| }, | |
| { | |
| "epoch": 2.290291777188329, | |
| "grad_norm": 0.8433980949747677, | |
| "learning_rate": 8.074650079468061e-06, | |
| "loss": 0.5406, | |
| "num_input_tokens_seen": 2440643712, | |
| "step": 2700, | |
| "train_runtime": 360023.2487, | |
| "train_tokens_per_second": 6779.128 | |
| }, | |
| { | |
| "epoch": 2.2945358090185675, | |
| "grad_norm": 0.8344761722848487, | |
| "learning_rate": 7.984057048932994e-06, | |
| "loss": 0.523, | |
| "num_input_tokens_seen": 2445383360, | |
| "step": 2705, | |
| "train_runtime": 360768.2247, | |
| "train_tokens_per_second": 6778.267 | |
| }, | |
| { | |
| "epoch": 2.2987798408488063, | |
| "grad_norm": 0.9238013027939782, | |
| "learning_rate": 7.893878420181814e-06, | |
| "loss": 0.5394, | |
| "num_input_tokens_seen": 2449944832, | |
| "step": 2710, | |
| "train_runtime": 361432.627, | |
| "train_tokens_per_second": 6778.427 | |
| }, | |
| { | |
| "epoch": 2.303023872679045, | |
| "grad_norm": 0.830003880013244, | |
| "learning_rate": 7.80411638940107e-06, | |
| "loss": 0.5329, | |
| "num_input_tokens_seen": 2454313856, | |
| "step": 2715, | |
| "train_runtime": 362054.2575, | |
| "train_tokens_per_second": 6778.856 | |
| }, | |
| { | |
| "epoch": 2.307267904509284, | |
| "grad_norm": 1.2592294301355798, | |
| "learning_rate": 7.714773142631553e-06, | |
| "loss": 0.5287, | |
| "num_input_tokens_seen": 2458717888, | |
| "step": 2720, | |
| "train_runtime": 362669.4395, | |
| "train_tokens_per_second": 6779.501 | |
| }, | |
| { | |
| "epoch": 2.3115119363395227, | |
| "grad_norm": 1.04619682795218, | |
| "learning_rate": 7.625850855715125e-06, | |
| "loss": 0.5494, | |
| "num_input_tokens_seen": 2463239104, | |
| "step": 2725, | |
| "train_runtime": 363325.7794, | |
| "train_tokens_per_second": 6779.698 | |
| }, | |
| { | |
| "epoch": 2.3157559681697615, | |
| "grad_norm": 0.9229168180828327, | |
| "learning_rate": 7.53735169424169e-06, | |
| "loss": 0.5247, | |
| "num_input_tokens_seen": 2467903744, | |
| "step": 2730, | |
| "train_runtime": 364007.0501, | |
| "train_tokens_per_second": 6779.824 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.9124446948723857, | |
| "learning_rate": 7.449277813496469e-06, | |
| "loss": 0.5264, | |
| "num_input_tokens_seen": 2472315328, | |
| "step": 2735, | |
| "train_runtime": 364630.5034, | |
| "train_tokens_per_second": 6780.331 | |
| }, | |
| { | |
| "epoch": 2.3242440318302386, | |
| "grad_norm": 0.9029907706736199, | |
| "learning_rate": 7.361631358407511e-06, | |
| "loss": 0.5462, | |
| "num_input_tokens_seen": 2476917568, | |
| "step": 2740, | |
| "train_runtime": 365293.8544, | |
| "train_tokens_per_second": 6780.617 | |
| }, | |
| { | |
| "epoch": 2.3284880636604774, | |
| "grad_norm": 0.965769243054048, | |
| "learning_rate": 7.274414463493457e-06, | |
| "loss": 0.5276, | |
| "num_input_tokens_seen": 2481395968, | |
| "step": 2745, | |
| "train_runtime": 365941.4705, | |
| "train_tokens_per_second": 6780.855 | |
| }, | |
| { | |
| "epoch": 2.3327320954907163, | |
| "grad_norm": 1.0526371028734811, | |
| "learning_rate": 7.1876292528115425e-06, | |
| "loss": 0.524, | |
| "num_input_tokens_seen": 2485823424, | |
| "step": 2750, | |
| "train_runtime": 366578.7434, | |
| "train_tokens_per_second": 6781.144 | |
| }, | |
| { | |
| "epoch": 2.336976127320955, | |
| "grad_norm": 1.029915538876609, | |
| "learning_rate": 7.101277839905887e-06, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 2490461696, | |
| "step": 2755, | |
| "train_runtime": 367244.9697, | |
| "train_tokens_per_second": 6781.473 | |
| }, | |
| { | |
| "epoch": 2.3412201591511934, | |
| "grad_norm": 0.8452644471628386, | |
| "learning_rate": 7.015362327756009e-06, | |
| "loss": 0.5565, | |
| "num_input_tokens_seen": 2494895104, | |
| "step": 2760, | |
| "train_runtime": 367940.8003, | |
| "train_tokens_per_second": 6780.697 | |
| }, | |
| { | |
| "epoch": 2.345464190981432, | |
| "grad_norm": 1.0284851305831375, | |
| "learning_rate": 6.92988480872562e-06, | |
| "loss": 0.5551, | |
| "num_input_tokens_seen": 2499453376, | |
| "step": 2765, | |
| "train_runtime": 368624.734, | |
| "train_tokens_per_second": 6780.482 | |
| }, | |
| { | |
| "epoch": 2.349708222811671, | |
| "grad_norm": 0.9746799945929299, | |
| "learning_rate": 6.844847364511667e-06, | |
| "loss": 0.5652, | |
| "num_input_tokens_seen": 2503898176, | |
| "step": 2770, | |
| "train_runtime": 369245.4874, | |
| "train_tokens_per_second": 6781.121 | |
| }, | |
| { | |
| "epoch": 2.35395225464191, | |
| "grad_norm": 1.0403632612371465, | |
| "learning_rate": 6.760252066093598e-06, | |
| "loss": 0.536, | |
| "num_input_tokens_seen": 2508404032, | |
| "step": 2775, | |
| "train_runtime": 369898.6243, | |
| "train_tokens_per_second": 6781.328 | |
| }, | |
| { | |
| "epoch": 2.3581962864721486, | |
| "grad_norm": 1.1324249124610484, | |
| "learning_rate": 6.676100973683019e-06, | |
| "loss": 0.5293, | |
| "num_input_tokens_seen": 2512983360, | |
| "step": 2780, | |
| "train_runtime": 370586.7059, | |
| "train_tokens_per_second": 6781.094 | |
| }, | |
| { | |
| "epoch": 2.3624403183023874, | |
| "grad_norm": 0.9136829751433924, | |
| "learning_rate": 6.592396136673396e-06, | |
| "loss": 0.5133, | |
| "num_input_tokens_seen": 2517545792, | |
| "step": 2785, | |
| "train_runtime": 371255.3251, | |
| "train_tokens_per_second": 6781.171 | |
| }, | |
| { | |
| "epoch": 2.3666843501326262, | |
| "grad_norm": 1.0337343562459835, | |
| "learning_rate": 6.509139593590263e-06, | |
| "loss": 0.5449, | |
| "num_input_tokens_seen": 2521995456, | |
| "step": 2790, | |
| "train_runtime": 371858.1677, | |
| "train_tokens_per_second": 6782.144 | |
| }, | |
| { | |
| "epoch": 2.3709283819628646, | |
| "grad_norm": 0.8876485451947176, | |
| "learning_rate": 6.426333372041482e-06, | |
| "loss": 0.5321, | |
| "num_input_tokens_seen": 2526578304, | |
| "step": 2795, | |
| "train_runtime": 372512.2967, | |
| "train_tokens_per_second": 6782.537 | |
| }, | |
| { | |
| "epoch": 2.3751724137931034, | |
| "grad_norm": 1.01633470136039, | |
| "learning_rate": 6.343979488667923e-06, | |
| "loss": 0.546, | |
| "num_input_tokens_seen": 2531181120, | |
| "step": 2800, | |
| "train_runtime": 373190.8361, | |
| "train_tokens_per_second": 6782.538 | |
| }, | |
| { | |
| "epoch": 2.379416445623342, | |
| "grad_norm": 1.17898509851511, | |
| "learning_rate": 6.2620799490943296e-06, | |
| "loss": 0.5339, | |
| "num_input_tokens_seen": 2535604096, | |
| "step": 2805, | |
| "train_runtime": 373804.9218, | |
| "train_tokens_per_second": 6783.228 | |
| }, | |
| { | |
| "epoch": 2.383660477453581, | |
| "grad_norm": 0.995446476535709, | |
| "learning_rate": 6.18063674788047e-06, | |
| "loss": 0.5294, | |
| "num_input_tokens_seen": 2539962496, | |
| "step": 2810, | |
| "train_runtime": 374428.8855, | |
| "train_tokens_per_second": 6783.565 | |
| }, | |
| { | |
| "epoch": 2.38790450928382, | |
| "grad_norm": 1.4781781798293818, | |
| "learning_rate": 6.099651868472578e-06, | |
| "loss": 0.5377, | |
| "num_input_tokens_seen": 2544523264, | |
| "step": 2815, | |
| "train_runtime": 375082.1614, | |
| "train_tokens_per_second": 6783.909 | |
| }, | |
| { | |
| "epoch": 2.392148541114058, | |
| "grad_norm": 0.9042911431359419, | |
| "learning_rate": 6.0191272831550296e-06, | |
| "loss": 0.5277, | |
| "num_input_tokens_seen": 2549001728, | |
| "step": 2820, | |
| "train_runtime": 375739.4269, | |
| "train_tokens_per_second": 6783.961 | |
| }, | |
| { | |
| "epoch": 2.396392572944297, | |
| "grad_norm": 0.9427728279286258, | |
| "learning_rate": 5.939064953002324e-06, | |
| "loss": 0.5286, | |
| "num_input_tokens_seen": 2553568448, | |
| "step": 2825, | |
| "train_runtime": 376350.7934, | |
| "train_tokens_per_second": 6785.075 | |
| }, | |
| { | |
| "epoch": 2.4006366047745358, | |
| "grad_norm": 1.140690055341589, | |
| "learning_rate": 5.859466827831325e-06, | |
| "loss": 0.5404, | |
| "num_input_tokens_seen": 2557913088, | |
| "step": 2830, | |
| "train_runtime": 376975.3041, | |
| "train_tokens_per_second": 6785.36 | |
| }, | |
| { | |
| "epoch": 2.4048806366047746, | |
| "grad_norm": 0.973992690753586, | |
| "learning_rate": 5.780334846153762e-06, | |
| "loss": 0.5361, | |
| "num_input_tokens_seen": 2562388224, | |
| "step": 2835, | |
| "train_runtime": 377604.1066, | |
| "train_tokens_per_second": 6785.912 | |
| }, | |
| { | |
| "epoch": 2.4091246684350134, | |
| "grad_norm": 0.8947292451598252, | |
| "learning_rate": 5.701670935129033e-06, | |
| "loss": 0.5458, | |
| "num_input_tokens_seen": 2566790976, | |
| "step": 2840, | |
| "train_runtime": 378241.5611, | |
| "train_tokens_per_second": 6786.116 | |
| }, | |
| { | |
| "epoch": 2.413368700265252, | |
| "grad_norm": 0.9334940824258273, | |
| "learning_rate": 5.623477010517269e-06, | |
| "loss": 0.5225, | |
| "num_input_tokens_seen": 2571270592, | |
| "step": 2845, | |
| "train_runtime": 378873.6154, | |
| "train_tokens_per_second": 6786.618 | |
| }, | |
| { | |
| "epoch": 2.4176127320954905, | |
| "grad_norm": 0.7746666166786925, | |
| "learning_rate": 5.545754976632672e-06, | |
| "loss": 0.534, | |
| "num_input_tokens_seen": 2575889152, | |
| "step": 2850, | |
| "train_runtime": 379547.952, | |
| "train_tokens_per_second": 6786.729 | |
| }, | |
| { | |
| "epoch": 2.4218567639257294, | |
| "grad_norm": 1.0182336381313688, | |
| "learning_rate": 5.468506726297149e-06, | |
| "loss": 0.5221, | |
| "num_input_tokens_seen": 2580260608, | |
| "step": 2855, | |
| "train_runtime": 380137.2732, | |
| "train_tokens_per_second": 6787.707 | |
| }, | |
| { | |
| "epoch": 2.426100795755968, | |
| "grad_norm": 0.9862563470365936, | |
| "learning_rate": 5.391734140794183e-06, | |
| "loss": 0.5398, | |
| "num_input_tokens_seen": 2585142720, | |
| "step": 2860, | |
| "train_runtime": 380839.0154, | |
| "train_tokens_per_second": 6788.02 | |
| }, | |
| { | |
| "epoch": 2.430344827586207, | |
| "grad_norm": 0.9041618526091733, | |
| "learning_rate": 5.3154390898230846e-06, | |
| "loss": 0.512, | |
| "num_input_tokens_seen": 2589755136, | |
| "step": 2865, | |
| "train_runtime": 381496.5415, | |
| "train_tokens_per_second": 6788.411 | |
| }, | |
| { | |
| "epoch": 2.4345888594164458, | |
| "grad_norm": 0.914359636860779, | |
| "learning_rate": 5.2396234314533665e-06, | |
| "loss": 0.5143, | |
| "num_input_tokens_seen": 2594081792, | |
| "step": 2870, | |
| "train_runtime": 382111.5001, | |
| "train_tokens_per_second": 6788.808 | |
| }, | |
| { | |
| "epoch": 2.438832891246684, | |
| "grad_norm": 1.033477002199706, | |
| "learning_rate": 5.16428901207959e-06, | |
| "loss": 0.5324, | |
| "num_input_tokens_seen": 2598687872, | |
| "step": 2875, | |
| "train_runtime": 382756.5754, | |
| "train_tokens_per_second": 6789.401 | |
| }, | |
| { | |
| "epoch": 2.443076923076923, | |
| "grad_norm": 0.9057174065952954, | |
| "learning_rate": 5.089437666376304e-06, | |
| "loss": 0.5263, | |
| "num_input_tokens_seen": 2603366144, | |
| "step": 2880, | |
| "train_runtime": 383439.9985, | |
| "train_tokens_per_second": 6789.501 | |
| }, | |
| { | |
| "epoch": 2.4473209549071617, | |
| "grad_norm": 0.8529850767813882, | |
| "learning_rate": 5.015071217253428e-06, | |
| "loss": 0.5113, | |
| "num_input_tokens_seen": 2608005440, | |
| "step": 2885, | |
| "train_runtime": 384090.9187, | |
| "train_tokens_per_second": 6790.073 | |
| }, | |
| { | |
| "epoch": 2.4515649867374005, | |
| "grad_norm": 1.3879570401538315, | |
| "learning_rate": 4.941191475811843e-06, | |
| "loss": 0.5222, | |
| "num_input_tokens_seen": 2612660480, | |
| "step": 2890, | |
| "train_runtime": 384747.1837, | |
| "train_tokens_per_second": 6790.59 | |
| }, | |
| { | |
| "epoch": 2.4558090185676393, | |
| "grad_norm": 1.2654753413045388, | |
| "learning_rate": 4.867800241299275e-06, | |
| "loss": 0.5128, | |
| "num_input_tokens_seen": 2617068928, | |
| "step": 2895, | |
| "train_runtime": 385371.9974, | |
| "train_tokens_per_second": 6791.02 | |
| }, | |
| { | |
| "epoch": 2.460053050397878, | |
| "grad_norm": 0.9035128336837777, | |
| "learning_rate": 4.794899301066477e-06, | |
| "loss": 0.529, | |
| "num_input_tokens_seen": 2621648384, | |
| "step": 2900, | |
| "train_runtime": 386085.7096, | |
| "train_tokens_per_second": 6790.327 | |
| }, | |
| { | |
| "epoch": 2.464297082228117, | |
| "grad_norm": 0.8567446173412482, | |
| "learning_rate": 4.72249043052371e-06, | |
| "loss": 0.5269, | |
| "num_input_tokens_seen": 2626115776, | |
| "step": 2905, | |
| "train_runtime": 386725.7007, | |
| "train_tokens_per_second": 6790.642 | |
| }, | |
| { | |
| "epoch": 2.4685411140583553, | |
| "grad_norm": 0.8912412665948739, | |
| "learning_rate": 4.650575393097498e-06, | |
| "loss": 0.5387, | |
| "num_input_tokens_seen": 2630631040, | |
| "step": 2910, | |
| "train_runtime": 387368.1112, | |
| "train_tokens_per_second": 6791.037 | |
| }, | |
| { | |
| "epoch": 2.472785145888594, | |
| "grad_norm": 1.1130459835714437, | |
| "learning_rate": 4.57915594018768e-06, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 2635143296, | |
| "step": 2915, | |
| "train_runtime": 388043.1985, | |
| "train_tokens_per_second": 6790.85 | |
| }, | |
| { | |
| "epoch": 2.477029177718833, | |
| "grad_norm": 0.9430504436597011, | |
| "learning_rate": 4.508233811124765e-06, | |
| "loss": 0.5261, | |
| "num_input_tokens_seen": 2639559808, | |
| "step": 2920, | |
| "train_runtime": 388662.674, | |
| "train_tokens_per_second": 6791.39 | |
| }, | |
| { | |
| "epoch": 2.4812732095490717, | |
| "grad_norm": 0.9383789050215547, | |
| "learning_rate": 4.437810733127571e-06, | |
| "loss": 0.5464, | |
| "num_input_tokens_seen": 2644172096, | |
| "step": 2925, | |
| "train_runtime": 389299.3342, | |
| "train_tokens_per_second": 6792.131 | |
| }, | |
| { | |
| "epoch": 2.4855172413793105, | |
| "grad_norm": 0.9063379227339012, | |
| "learning_rate": 4.367888421261154e-06, | |
| "loss": 0.509, | |
| "num_input_tokens_seen": 2648731712, | |
| "step": 2930, | |
| "train_runtime": 389972.3983, | |
| "train_tokens_per_second": 6792.1 | |
| }, | |
| { | |
| "epoch": 2.489761273209549, | |
| "grad_norm": 0.8758741295078302, | |
| "learning_rate": 4.298468578395029e-06, | |
| "loss": 0.5072, | |
| "num_input_tokens_seen": 2653268800, | |
| "step": 2935, | |
| "train_runtime": 390605.8102, | |
| "train_tokens_per_second": 6792.702 | |
| }, | |
| { | |
| "epoch": 2.4940053050397877, | |
| "grad_norm": 1.0498774972803595, | |
| "learning_rate": 4.229552895161754e-06, | |
| "loss": 0.5399, | |
| "num_input_tokens_seen": 2657767616, | |
| "step": 2940, | |
| "train_runtime": 391245.0688, | |
| "train_tokens_per_second": 6793.102 | |
| }, | |
| { | |
| "epoch": 2.4982493368700265, | |
| "grad_norm": 0.9576568252690971, | |
| "learning_rate": 4.161143049915661e-06, | |
| "loss": 0.5167, | |
| "num_input_tokens_seen": 2662204992, | |
| "step": 2945, | |
| "train_runtime": 391840.1072, | |
| "train_tokens_per_second": 6794.111 | |
| }, | |
| { | |
| "epoch": 2.5024933687002653, | |
| "grad_norm": 1.297030102246004, | |
| "learning_rate": 4.093240708692098e-06, | |
| "loss": 0.5434, | |
| "num_input_tokens_seen": 2666829440, | |
| "step": 2950, | |
| "train_runtime": 392486.1885, | |
| "train_tokens_per_second": 6794.709 | |
| }, | |
| { | |
| "epoch": 2.5024933687002653, | |
| "eval_loss": 0.7277879118919373, | |
| "eval_runtime": 1062.7442, | |
| "eval_samples_per_second": 2.867, | |
| "eval_steps_per_second": 0.09, | |
| "num_input_tokens_seen": 2666829440, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.506737400530504, | |
| "grad_norm": 0.9337370037141564, | |
| "learning_rate": 4.025847525166737e-06, | |
| "loss": 0.521, | |
| "num_input_tokens_seen": 2671303040, | |
| "step": 2955, | |
| "train_runtime": 394192.7243, | |
| "train_tokens_per_second": 6776.642 | |
| }, | |
| { | |
| "epoch": 2.510981432360743, | |
| "grad_norm": 1.0142479442870918, | |
| "learning_rate": 3.958965140615395e-06, | |
| "loss": 0.5013, | |
| "num_input_tokens_seen": 2675854528, | |
| "step": 2960, | |
| "train_runtime": 394856.393, | |
| "train_tokens_per_second": 6776.779 | |
| }, | |
| { | |
| "epoch": 2.5152254641909817, | |
| "grad_norm": 0.9673554697201667, | |
| "learning_rate": 3.892595183874015e-06, | |
| "loss": 0.5138, | |
| "num_input_tokens_seen": 2680310976, | |
| "step": 2965, | |
| "train_runtime": 395478.0069, | |
| "train_tokens_per_second": 6777.396 | |
| }, | |
| { | |
| "epoch": 2.51946949602122, | |
| "grad_norm": 0.9773791433571442, | |
| "learning_rate": 3.826739271299004e-06, | |
| "loss": 0.5374, | |
| "num_input_tokens_seen": 2685063360, | |
| "step": 2970, | |
| "train_runtime": 396163.4888, | |
| "train_tokens_per_second": 6777.665 | |
| }, | |
| { | |
| "epoch": 2.523713527851459, | |
| "grad_norm": 0.8762686166416458, | |
| "learning_rate": 3.761399006727878e-06, | |
| "loss": 0.518, | |
| "num_input_tokens_seen": 2689742912, | |
| "step": 2975, | |
| "train_runtime": 396848.2908, | |
| "train_tokens_per_second": 6777.761 | |
| }, | |
| { | |
| "epoch": 2.5279575596816977, | |
| "grad_norm": 0.9814246027282987, | |
| "learning_rate": 3.696575981440198e-06, | |
| "loss": 0.5333, | |
| "num_input_tokens_seen": 2694003776, | |
| "step": 2980, | |
| "train_runtime": 397469.5249, | |
| "train_tokens_per_second": 6777.888 | |
| }, | |
| { | |
| "epoch": 2.5322015915119365, | |
| "grad_norm": 1.0494687013783062, | |
| "learning_rate": 3.632271774118812e-06, | |
| "loss": 0.5249, | |
| "num_input_tokens_seen": 2698482432, | |
| "step": 2985, | |
| "train_runtime": 398112.2116, | |
| "train_tokens_per_second": 6778.196 | |
| }, | |
| { | |
| "epoch": 2.536445623342175, | |
| "grad_norm": 0.9167136316488117, | |
| "learning_rate": 3.568487950811414e-06, | |
| "loss": 0.5179, | |
| "num_input_tokens_seen": 2703208384, | |
| "step": 2990, | |
| "train_runtime": 398770.7605, | |
| "train_tokens_per_second": 6778.853 | |
| }, | |
| { | |
| "epoch": 2.5406896551724136, | |
| "grad_norm": 1.0631373570087592, | |
| "learning_rate": 3.5052260648924056e-06, | |
| "loss": 0.5258, | |
| "num_input_tokens_seen": 2707736448, | |
| "step": 2995, | |
| "train_runtime": 399417.3719, | |
| "train_tokens_per_second": 6779.216 | |
| }, | |
| { | |
| "epoch": 2.5449336870026524, | |
| "grad_norm": 0.8646476638396003, | |
| "learning_rate": 3.442487657025059e-06, | |
| "loss": 0.5148, | |
| "num_input_tokens_seen": 2712052544, | |
| "step": 3000, | |
| "train_runtime": 400011.3913, | |
| "train_tokens_per_second": 6779.938 | |
| }, | |
| { | |
| "epoch": 2.5491777188328912, | |
| "grad_norm": 1.3814464306803582, | |
| "learning_rate": 3.380274255124008e-06, | |
| "loss": 0.5328, | |
| "num_input_tokens_seen": 2716590016, | |
| "step": 3005, | |
| "train_runtime": 400647.6096, | |
| "train_tokens_per_second": 6780.497 | |
| }, | |
| { | |
| "epoch": 2.55342175066313, | |
| "grad_norm": 0.9608139693650856, | |
| "learning_rate": 3.318587374318008e-06, | |
| "loss": 0.5221, | |
| "num_input_tokens_seen": 2721057600, | |
| "step": 3010, | |
| "train_runtime": 401293.542, | |
| "train_tokens_per_second": 6780.716 | |
| }, | |
| { | |
| "epoch": 2.557665782493369, | |
| "grad_norm": 1.052375691071485, | |
| "learning_rate": 3.257428516913094e-06, | |
| "loss": 0.5356, | |
| "num_input_tokens_seen": 2725470592, | |
| "step": 3015, | |
| "train_runtime": 401915.8909, | |
| "train_tokens_per_second": 6781.196 | |
| }, | |
| { | |
| "epoch": 2.5619098143236076, | |
| "grad_norm": 1.0422726914250204, | |
| "learning_rate": 3.1967991723559186e-06, | |
| "loss": 0.5357, | |
| "num_input_tokens_seen": 2729976320, | |
| "step": 3020, | |
| "train_runtime": 402550.4344, | |
| "train_tokens_per_second": 6781.7 | |
| }, | |
| { | |
| "epoch": 2.566153846153846, | |
| "grad_norm": 1.0115755929526138, | |
| "learning_rate": 3.1367008171975606e-06, | |
| "loss": 0.5292, | |
| "num_input_tokens_seen": 2734356096, | |
| "step": 3025, | |
| "train_runtime": 403147.2983, | |
| "train_tokens_per_second": 6782.524 | |
| }, | |
| { | |
| "epoch": 2.570397877984085, | |
| "grad_norm": 0.9326348646064712, | |
| "learning_rate": 3.0771349150574833e-06, | |
| "loss": 0.5032, | |
| "num_input_tokens_seen": 2738757568, | |
| "step": 3030, | |
| "train_runtime": 403750.7771, | |
| "train_tokens_per_second": 6783.287 | |
| }, | |
| { | |
| "epoch": 2.5746419098143236, | |
| "grad_norm": 0.8962193098952964, | |
| "learning_rate": 3.0181029165879505e-06, | |
| "loss": 0.5235, | |
| "num_input_tokens_seen": 2743195520, | |
| "step": 3035, | |
| "train_runtime": 404385.6448, | |
| "train_tokens_per_second": 6783.613 | |
| }, | |
| { | |
| "epoch": 2.5788859416445624, | |
| "grad_norm": 0.9643001215470395, | |
| "learning_rate": 2.959606259438677e-06, | |
| "loss": 0.5371, | |
| "num_input_tokens_seen": 2747720128, | |
| "step": 3040, | |
| "train_runtime": 405024.1548, | |
| "train_tokens_per_second": 6784.09 | |
| }, | |
| { | |
| "epoch": 2.583129973474801, | |
| "grad_norm": 1.083989853159668, | |
| "learning_rate": 2.9016463682218137e-06, | |
| "loss": 0.5045, | |
| "num_input_tokens_seen": 2752185344, | |
| "step": 3045, | |
| "train_runtime": 405663.7384, | |
| "train_tokens_per_second": 6784.401 | |
| }, | |
| { | |
| "epoch": 2.5873740053050396, | |
| "grad_norm": 1.0350757402442816, | |
| "learning_rate": 2.844224654477251e-06, | |
| "loss": 0.535, | |
| "num_input_tokens_seen": 2756955712, | |
| "step": 3050, | |
| "train_runtime": 406343.9367, | |
| "train_tokens_per_second": 6784.784 | |
| }, | |
| { | |
| "epoch": 2.5916180371352784, | |
| "grad_norm": 1.0876688931917096, | |
| "learning_rate": 2.787342516638253e-06, | |
| "loss": 0.5241, | |
| "num_input_tokens_seen": 2761469632, | |
| "step": 3055, | |
| "train_runtime": 406991.7195, | |
| "train_tokens_per_second": 6785.076 | |
| }, | |
| { | |
| "epoch": 2.595862068965517, | |
| "grad_norm": 1.187734783928453, | |
| "learning_rate": 2.7310013399973937e-06, | |
| "loss": 0.5083, | |
| "num_input_tokens_seen": 2766078848, | |
| "step": 3060, | |
| "train_runtime": 407630.5997, | |
| "train_tokens_per_second": 6785.749 | |
| }, | |
| { | |
| "epoch": 2.600106100795756, | |
| "grad_norm": 0.8033965202229059, | |
| "learning_rate": 2.6752024966728186e-06, | |
| "loss": 0.5078, | |
| "num_input_tokens_seen": 2770419968, | |
| "step": 3065, | |
| "train_runtime": 408227.7584, | |
| "train_tokens_per_second": 6786.457 | |
| }, | |
| { | |
| "epoch": 2.604350132625995, | |
| "grad_norm": 1.0703320175428703, | |
| "learning_rate": 2.6199473455748302e-06, | |
| "loss": 0.523, | |
| "num_input_tokens_seen": 2774944704, | |
| "step": 3070, | |
| "train_runtime": 408874.2335, | |
| "train_tokens_per_second": 6786.793 | |
| }, | |
| { | |
| "epoch": 2.6085941644562336, | |
| "grad_norm": 0.8853137013274479, | |
| "learning_rate": 2.5652372323727995e-06, | |
| "loss": 0.5344, | |
| "num_input_tokens_seen": 2779491968, | |
| "step": 3075, | |
| "train_runtime": 409530.6069, | |
| "train_tokens_per_second": 6787.019 | |
| }, | |
| { | |
| "epoch": 2.6128381962864724, | |
| "grad_norm": 0.9523960118476597, | |
| "learning_rate": 2.5110734894623845e-06, | |
| "loss": 0.5238, | |
| "num_input_tokens_seen": 2784101184, | |
| "step": 3080, | |
| "train_runtime": 410217.1802, | |
| "train_tokens_per_second": 6786.896 | |
| }, | |
| { | |
| "epoch": 2.6170822281167108, | |
| "grad_norm": 0.9141894377610371, | |
| "learning_rate": 2.457457435933083e-06, | |
| "loss": 0.5072, | |
| "num_input_tokens_seen": 2788760320, | |
| "step": 3085, | |
| "train_runtime": 410902.2723, | |
| "train_tokens_per_second": 6786.919 | |
| }, | |
| { | |
| "epoch": 2.6213262599469496, | |
| "grad_norm": 0.9601107951690412, | |
| "learning_rate": 2.404390377536117e-06, | |
| "loss": 0.5461, | |
| "num_input_tokens_seen": 2793482240, | |
| "step": 3090, | |
| "train_runtime": 411591.1251, | |
| "train_tokens_per_second": 6787.032 | |
| }, | |
| { | |
| "epoch": 2.6255702917771884, | |
| "grad_norm": 1.092143809724588, | |
| "learning_rate": 2.3518736066526106e-06, | |
| "loss": 0.5355, | |
| "num_input_tokens_seen": 2798052800, | |
| "step": 3095, | |
| "train_runtime": 412253.9967, | |
| "train_tokens_per_second": 6787.206 | |
| }, | |
| { | |
| "epoch": 2.629814323607427, | |
| "grad_norm": 0.9775173060346775, | |
| "learning_rate": 2.2999084022621575e-06, | |
| "loss": 0.5187, | |
| "num_input_tokens_seen": 2802623616, | |
| "step": 3100, | |
| "train_runtime": 412972.0833, | |
| "train_tokens_per_second": 6786.472 | |
| }, | |
| { | |
| "epoch": 2.6340583554376655, | |
| "grad_norm": 0.8882149505754918, | |
| "learning_rate": 2.2484960299116176e-06, | |
| "loss": 0.5234, | |
| "num_input_tokens_seen": 2807059072, | |
| "step": 3105, | |
| "train_runtime": 413616.3561, | |
| "train_tokens_per_second": 6786.625 | |
| }, | |
| { | |
| "epoch": 2.6383023872679043, | |
| "grad_norm": 1.04737202246014, | |
| "learning_rate": 2.1976377416843496e-06, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 2811685952, | |
| "step": 3110, | |
| "train_runtime": 414302.3818, | |
| "train_tokens_per_second": 6786.555 | |
| }, | |
| { | |
| "epoch": 2.642546419098143, | |
| "grad_norm": 0.8525650275605402, | |
| "learning_rate": 2.1473347761696765e-06, | |
| "loss": 0.5122, | |
| "num_input_tokens_seen": 2816097472, | |
| "step": 3115, | |
| "train_runtime": 414912.2037, | |
| "train_tokens_per_second": 6787.213 | |
| }, | |
| { | |
| "epoch": 2.646790450928382, | |
| "grad_norm": 1.4897884354340427, | |
| "learning_rate": 2.097588358432745e-06, | |
| "loss": 0.5344, | |
| "num_input_tokens_seen": 2820541952, | |
| "step": 3120, | |
| "train_runtime": 415551.6146, | |
| "train_tokens_per_second": 6787.465 | |
| }, | |
| { | |
| "epoch": 2.6510344827586207, | |
| "grad_norm": 0.9315543634592929, | |
| "learning_rate": 2.048399699984685e-06, | |
| "loss": 0.5204, | |
| "num_input_tokens_seen": 2825211008, | |
| "step": 3125, | |
| "train_runtime": 416236.0998, | |
| "train_tokens_per_second": 6787.52 | |
| }, | |
| { | |
| "epoch": 2.6552785145888596, | |
| "grad_norm": 0.994893210880857, | |
| "learning_rate": 1.999769998753101e-06, | |
| "loss": 0.5092, | |
| "num_input_tokens_seen": 2829805440, | |
| "step": 3130, | |
| "train_runtime": 416937.1267, | |
| "train_tokens_per_second": 6787.128 | |
| }, | |
| { | |
| "epoch": 2.6595225464190984, | |
| "grad_norm": 0.9211547478732683, | |
| "learning_rate": 1.951700439052906e-06, | |
| "loss": 0.509, | |
| "num_input_tokens_seen": 2834319168, | |
| "step": 3135, | |
| "train_runtime": 417580.9275, | |
| "train_tokens_per_second": 6787.473 | |
| }, | |
| { | |
| "epoch": 2.6637665782493367, | |
| "grad_norm": 1.1092045966595683, | |
| "learning_rate": 1.9041921915574718e-06, | |
| "loss": 0.5213, | |
| "num_input_tokens_seen": 2838842944, | |
| "step": 3140, | |
| "train_runtime": 418214.8375, | |
| "train_tokens_per_second": 6788.001 | |
| }, | |
| { | |
| "epoch": 2.6680106100795755, | |
| "grad_norm": 0.8680731962765761, | |
| "learning_rate": 1.857246413270114e-06, | |
| "loss": 0.5303, | |
| "num_input_tokens_seen": 2843287936, | |
| "step": 3145, | |
| "train_runtime": 418837.8662, | |
| "train_tokens_per_second": 6788.517 | |
| }, | |
| { | |
| "epoch": 2.6722546419098143, | |
| "grad_norm": 1.370277456535701, | |
| "learning_rate": 1.810864247495933e-06, | |
| "loss": 0.5351, | |
| "num_input_tokens_seen": 2847823872, | |
| "step": 3150, | |
| "train_runtime": 419460.3925, | |
| "train_tokens_per_second": 6789.256 | |
| }, | |
| { | |
| "epoch": 2.676498673740053, | |
| "grad_norm": 1.0792319656449618, | |
| "learning_rate": 1.7650468238139484e-06, | |
| "loss": 0.5114, | |
| "num_input_tokens_seen": 2852148544, | |
| "step": 3155, | |
| "train_runtime": 420051.6697, | |
| "train_tokens_per_second": 6789.995 | |
| }, | |
| { | |
| "epoch": 2.680742705570292, | |
| "grad_norm": 0.8781496188483874, | |
| "learning_rate": 1.7197952580496086e-06, | |
| "loss": 0.5126, | |
| "num_input_tokens_seen": 2856524608, | |
| "step": 3160, | |
| "train_runtime": 420644.3052, | |
| "train_tokens_per_second": 6790.832 | |
| }, | |
| { | |
| "epoch": 2.6849867374005303, | |
| "grad_norm": 0.9491071552727024, | |
| "learning_rate": 1.6751106522476078e-06, | |
| "loss": 0.5242, | |
| "num_input_tokens_seen": 2861096576, | |
| "step": 3165, | |
| "train_runtime": 421277.8845, | |
| "train_tokens_per_second": 6791.471 | |
| }, | |
| { | |
| "epoch": 2.689230769230769, | |
| "grad_norm": 0.9895886874283716, | |
| "learning_rate": 1.6309940946450276e-06, | |
| "loss": 0.5422, | |
| "num_input_tokens_seen": 2865604864, | |
| "step": 3170, | |
| "train_runtime": 421930.8634, | |
| "train_tokens_per_second": 6791.646 | |
| }, | |
| { | |
| "epoch": 2.693474801061008, | |
| "grad_norm": 0.9535636472034763, | |
| "learning_rate": 1.5874466596448894e-06, | |
| "loss": 0.5138, | |
| "num_input_tokens_seen": 2870295232, | |
| "step": 3175, | |
| "train_runtime": 422607.9976, | |
| "train_tokens_per_second": 6791.862 | |
| }, | |
| { | |
| "epoch": 2.6977188328912467, | |
| "grad_norm": 1.2371458441648222, | |
| "learning_rate": 1.5444694077899112e-06, | |
| "loss": 0.5143, | |
| "num_input_tokens_seen": 2874723904, | |
| "step": 3180, | |
| "train_runtime": 423241.8919, | |
| "train_tokens_per_second": 6792.154 | |
| }, | |
| { | |
| "epoch": 2.7019628647214855, | |
| "grad_norm": 1.0270055431553666, | |
| "learning_rate": 1.5020633857367629e-06, | |
| "loss": 0.5097, | |
| "num_input_tokens_seen": 2879036160, | |
| "step": 3185, | |
| "train_runtime": 423847.9941, | |
| "train_tokens_per_second": 6792.615 | |
| }, | |
| { | |
| "epoch": 2.7062068965517243, | |
| "grad_norm": 1.0047782227717184, | |
| "learning_rate": 1.4602296262304998e-06, | |
| "loss": 0.531, | |
| "num_input_tokens_seen": 2883503104, | |
| "step": 3190, | |
| "train_runtime": 424512.874, | |
| "train_tokens_per_second": 6792.499 | |
| }, | |
| { | |
| "epoch": 2.710450928381963, | |
| "grad_norm": 0.7991655241640006, | |
| "learning_rate": 1.4189691480794659e-06, | |
| "loss": 0.5031, | |
| "num_input_tokens_seen": 2888068736, | |
| "step": 3195, | |
| "train_runtime": 425152.1322, | |
| "train_tokens_per_second": 6793.024 | |
| }, | |
| { | |
| "epoch": 2.7146949602122015, | |
| "grad_norm": 0.9290447093133773, | |
| "learning_rate": 1.3782829561304528e-06, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 2892383104, | |
| "step": 3200, | |
| "train_runtime": 425736.2555, | |
| "train_tokens_per_second": 6793.838 | |
| }, | |
| { | |
| "epoch": 2.7189389920424403, | |
| "grad_norm": 0.9336749097922185, | |
| "learning_rate": 1.3381720412442484e-06, | |
| "loss": 0.5046, | |
| "num_input_tokens_seen": 2896798592, | |
| "step": 3205, | |
| "train_runtime": 426372.4313, | |
| "train_tokens_per_second": 6794.057 | |
| }, | |
| { | |
| "epoch": 2.723183023872679, | |
| "grad_norm": 0.923464487407943, | |
| "learning_rate": 1.2986373802714806e-06, | |
| "loss": 0.519, | |
| "num_input_tokens_seen": 2901355840, | |
| "step": 3210, | |
| "train_runtime": 427007.4668, | |
| "train_tokens_per_second": 6794.626 | |
| }, | |
| { | |
| "epoch": 2.727427055702918, | |
| "grad_norm": 1.054195900551347, | |
| "learning_rate": 1.259679936028857e-06, | |
| "loss": 0.5278, | |
| "num_input_tokens_seen": 2906018048, | |
| "step": 3215, | |
| "train_runtime": 427698.5588, | |
| "train_tokens_per_second": 6794.547 | |
| }, | |
| { | |
| "epoch": 2.7316710875331562, | |
| "grad_norm": 1.053479012440482, | |
| "learning_rate": 1.2213006572756868e-06, | |
| "loss": 0.5052, | |
| "num_input_tokens_seen": 2910317120, | |
| "step": 3220, | |
| "train_runtime": 428289.4292, | |
| "train_tokens_per_second": 6795.211 | |
| }, | |
| { | |
| "epoch": 2.735915119363395, | |
| "grad_norm": 0.9664035844106731, | |
| "learning_rate": 1.1835004786907994e-06, | |
| "loss": 0.5285, | |
| "num_input_tokens_seen": 2914766912, | |
| "step": 3225, | |
| "train_runtime": 428926.8091, | |
| "train_tokens_per_second": 6795.488 | |
| }, | |
| { | |
| "epoch": 2.740159151193634, | |
| "grad_norm": 0.9743299232561019, | |
| "learning_rate": 1.1462803208497658e-06, | |
| "loss": 0.5349, | |
| "num_input_tokens_seen": 2919116864, | |
| "step": 3230, | |
| "train_runtime": 429579.5988, | |
| "train_tokens_per_second": 6795.287 | |
| }, | |
| { | |
| "epoch": 2.7444031830238726, | |
| "grad_norm": 0.9853416108002045, | |
| "learning_rate": 1.1096410902024874e-06, | |
| "loss": 0.5281, | |
| "num_input_tokens_seen": 2923624768, | |
| "step": 3235, | |
| "train_runtime": 430191.282, | |
| "train_tokens_per_second": 6796.104 | |
| }, | |
| { | |
| "epoch": 2.7486472148541115, | |
| "grad_norm": 0.9605567975250936, | |
| "learning_rate": 1.073583679051124e-06, | |
| "loss": 0.5301, | |
| "num_input_tokens_seen": 2928050752, | |
| "step": 3240, | |
| "train_runtime": 430824.3873, | |
| "train_tokens_per_second": 6796.39 | |
| }, | |
| { | |
| "epoch": 2.7528912466843503, | |
| "grad_norm": 1.1024226803397792, | |
| "learning_rate": 1.0381089655283394e-06, | |
| "loss": 0.5316, | |
| "num_input_tokens_seen": 2932468416, | |
| "step": 3245, | |
| "train_runtime": 431459.9194, | |
| "train_tokens_per_second": 6796.618 | |
| }, | |
| { | |
| "epoch": 2.7528912466843503, | |
| "eval_loss": 0.7258533835411072, | |
| "eval_runtime": 1061.3977, | |
| "eval_samples_per_second": 2.871, | |
| "eval_steps_per_second": 0.09, | |
| "num_input_tokens_seen": 2932468416, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 2.757135278514589, | |
| "grad_norm": 1.0283547426763588, | |
| "learning_rate": 1.0032178135759546e-06, | |
| "loss": 0.5119, | |
| "num_input_tokens_seen": 2936775872, | |
| "step": 3250, | |
| "train_runtime": 433099.9457, | |
| "train_tokens_per_second": 6780.827 | |
| }, | |
| { | |
| "epoch": 2.7613793103448274, | |
| "grad_norm": 0.9455593371411998, | |
| "learning_rate": 9.68911072923867e-07, | |
| "loss": 0.5204, | |
| "num_input_tokens_seen": 2941314240, | |
| "step": 3255, | |
| "train_runtime": 433763.0738, | |
| "train_tokens_per_second": 6780.924 | |
| }, | |
| { | |
| "epoch": 2.7656233421750662, | |
| "grad_norm": 0.9779382933428667, | |
| "learning_rate": 9.351895790693955e-07, | |
| "loss": 0.5273, | |
| "num_input_tokens_seen": 2945907136, | |
| "step": 3260, | |
| "train_runtime": 434418.6226, | |
| "train_tokens_per_second": 6781.263 | |
| }, | |
| { | |
| "epoch": 2.769867374005305, | |
| "grad_norm": 0.9433322444237364, | |
| "learning_rate": 9.020541532568899e-07, | |
| "loss": 0.5289, | |
| "num_input_tokens_seen": 2950538496, | |
| "step": 3265, | |
| "train_runtime": 435105.6078, | |
| "train_tokens_per_second": 6781.201 | |
| }, | |
| { | |
| "epoch": 2.774111405835544, | |
| "grad_norm": 1.0359211392265961, | |
| "learning_rate": 8.695056024577792e-07, | |
| "loss": 0.5082, | |
| "num_input_tokens_seen": 2955022208, | |
| "step": 3270, | |
| "train_runtime": 435708.6215, | |
| "train_tokens_per_second": 6782.106 | |
| }, | |
| { | |
| "epoch": 2.7783554376657826, | |
| "grad_norm": 0.8927269780868947, | |
| "learning_rate": 8.375447193508662e-07, | |
| "loss": 0.5196, | |
| "num_input_tokens_seen": 2959619008, | |
| "step": 3275, | |
| "train_runtime": 436404.7763, | |
| "train_tokens_per_second": 6781.821 | |
| }, | |
| { | |
| "epoch": 2.782599469496021, | |
| "grad_norm": 0.994250431687146, | |
| "learning_rate": 8.061722823030693e-07, | |
| "loss": 0.5294, | |
| "num_input_tokens_seen": 2964249856, | |
| "step": 3280, | |
| "train_runtime": 437073.9242, | |
| "train_tokens_per_second": 6782.033 | |
| }, | |
| { | |
| "epoch": 2.78684350132626, | |
| "grad_norm": 0.8514551639091007, | |
| "learning_rate": 7.753890553504422e-07, | |
| "loss": 0.5073, | |
| "num_input_tokens_seen": 2968889344, | |
| "step": 3285, | |
| "train_runtime": 437728.8681, | |
| "train_tokens_per_second": 6782.485 | |
| }, | |
| { | |
| "epoch": 2.7910875331564986, | |
| "grad_norm": 1.0389333757881603, | |
| "learning_rate": 7.451957881795673e-07, | |
| "loss": 0.532, | |
| "num_input_tokens_seen": 2973153152, | |
| "step": 3290, | |
| "train_runtime": 438317.3334, | |
| "train_tokens_per_second": 6783.106 | |
| }, | |
| { | |
| "epoch": 2.7953315649867374, | |
| "grad_norm": 0.8808663623494958, | |
| "learning_rate": 7.155932161093032e-07, | |
| "loss": 0.5312, | |
| "num_input_tokens_seen": 2977666176, | |
| "step": 3295, | |
| "train_runtime": 438968.9598, | |
| "train_tokens_per_second": 6783.318 | |
| }, | |
| { | |
| "epoch": 2.799575596816976, | |
| "grad_norm": 1.0049671673931104, | |
| "learning_rate": 6.865820600728823e-07, | |
| "loss": 0.5352, | |
| "num_input_tokens_seen": 2982187392, | |
| "step": 3300, | |
| "train_runtime": 439614.2835, | |
| "train_tokens_per_second": 6783.645 | |
| }, | |
| { | |
| "epoch": 2.803819628647215, | |
| "grad_norm": 0.9855264400649804, | |
| "learning_rate": 6.581630266003419e-07, | |
| "loss": 0.5523, | |
| "num_input_tokens_seen": 2986898304, | |
| "step": 3305, | |
| "train_runtime": 440321.2266, | |
| "train_tokens_per_second": 6783.453 | |
| }, | |
| { | |
| "epoch": 2.808063660477454, | |
| "grad_norm": 1.0813234011741917, | |
| "learning_rate": 6.303368078013183e-07, | |
| "loss": 0.5116, | |
| "num_input_tokens_seen": 2991295104, | |
| "step": 3310, | |
| "train_runtime": 440944.6485, | |
| "train_tokens_per_second": 6783.834 | |
| }, | |
| { | |
| "epoch": 2.812307692307692, | |
| "grad_norm": 0.9341722231468776, | |
| "learning_rate": 6.031040813482047e-07, | |
| "loss": 0.5215, | |
| "num_input_tokens_seen": 2995983808, | |
| "step": 3315, | |
| "train_runtime": 441625.9103, | |
| "train_tokens_per_second": 6783.986 | |
| }, | |
| { | |
| "epoch": 2.816551724137931, | |
| "grad_norm": 0.8962570756580445, | |
| "learning_rate": 5.764655104596311e-07, | |
| "loss": 0.536, | |
| "num_input_tokens_seen": 3000598080, | |
| "step": 3320, | |
| "train_runtime": 442297.9755, | |
| "train_tokens_per_second": 6784.11 | |
| }, | |
| { | |
| "epoch": 2.82079575596817, | |
| "grad_norm": 0.884157880198946, | |
| "learning_rate": 5.504217438843301e-07, | |
| "loss": 0.5187, | |
| "num_input_tokens_seen": 3005166144, | |
| "step": 3325, | |
| "train_runtime": 442917.9433, | |
| "train_tokens_per_second": 6784.928 | |
| }, | |
| { | |
| "epoch": 2.8250397877984086, | |
| "grad_norm": 0.9880705840554078, | |
| "learning_rate": 5.249734158853048e-07, | |
| "loss": 0.5367, | |
| "num_input_tokens_seen": 3009451904, | |
| "step": 3330, | |
| "train_runtime": 443509.8078, | |
| "train_tokens_per_second": 6785.536 | |
| }, | |
| { | |
| "epoch": 2.829283819628647, | |
| "grad_norm": 0.9577753896346771, | |
| "learning_rate": 5.001211462244359e-07, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 3013832512, | |
| "step": 3335, | |
| "train_runtime": 444142.2629, | |
| "train_tokens_per_second": 6785.737 | |
| }, | |
| { | |
| "epoch": 2.8335278514588857, | |
| "grad_norm": 0.9814158353022203, | |
| "learning_rate": 4.758655401473272e-07, | |
| "loss": 0.5255, | |
| "num_input_tokens_seen": 3018354112, | |
| "step": 3340, | |
| "train_runtime": 444781.1211, | |
| "train_tokens_per_second": 6786.156 | |
| }, | |
| { | |
| "epoch": 2.8377718832891246, | |
| "grad_norm": 1.1181096501664909, | |
| "learning_rate": 4.522071883686141e-07, | |
| "loss": 0.5053, | |
| "num_input_tokens_seen": 3022790720, | |
| "step": 3345, | |
| "train_runtime": 445390.6168, | |
| "train_tokens_per_second": 6786.831 | |
| }, | |
| { | |
| "epoch": 2.8420159151193634, | |
| "grad_norm": 0.9098207875260499, | |
| "learning_rate": 4.291466670575506e-07, | |
| "loss": 0.5205, | |
| "num_input_tokens_seen": 3027432384, | |
| "step": 3350, | |
| "train_runtime": 446049.4125, | |
| "train_tokens_per_second": 6787.213 | |
| }, | |
| { | |
| "epoch": 2.846259946949602, | |
| "grad_norm": 0.8045024632334659, | |
| "learning_rate": 4.0668453782398696e-07, | |
| "loss": 0.5009, | |
| "num_input_tokens_seen": 3032032768, | |
| "step": 3355, | |
| "train_runtime": 446687.1742, | |
| "train_tokens_per_second": 6787.821 | |
| }, | |
| { | |
| "epoch": 2.850503978779841, | |
| "grad_norm": 0.9168040586174651, | |
| "learning_rate": 3.848213477046919e-07, | |
| "loss": 0.5227, | |
| "num_input_tokens_seen": 3036567680, | |
| "step": 3360, | |
| "train_runtime": 447330.9014, | |
| "train_tokens_per_second": 6788.191 | |
| }, | |
| { | |
| "epoch": 2.8547480106100798, | |
| "grad_norm": 1.1113058183093447, | |
| "learning_rate": 3.6355762915002143e-07, | |
| "loss": 0.5462, | |
| "num_input_tokens_seen": 3041310976, | |
| "step": 3365, | |
| "train_runtime": 448024.7841, | |
| "train_tokens_per_second": 6788.265 | |
| }, | |
| { | |
| "epoch": 2.8589920424403186, | |
| "grad_norm": 1.1615276846957676, | |
| "learning_rate": 3.4289390001097377e-07, | |
| "loss": 0.5032, | |
| "num_input_tokens_seen": 3045742528, | |
| "step": 3370, | |
| "train_runtime": 448635.5559, | |
| "train_tokens_per_second": 6788.901 | |
| }, | |
| { | |
| "epoch": 2.863236074270557, | |
| "grad_norm": 1.022077622432263, | |
| "learning_rate": 3.2283066352654936e-07, | |
| "loss": 0.5328, | |
| "num_input_tokens_seen": 3050306496, | |
| "step": 3375, | |
| "train_runtime": 449289.2558, | |
| "train_tokens_per_second": 6789.182 | |
| }, | |
| { | |
| "epoch": 2.8674801061007957, | |
| "grad_norm": 0.9272311129173908, | |
| "learning_rate": 3.0336840831151626e-07, | |
| "loss": 0.5273, | |
| "num_input_tokens_seen": 3054867520, | |
| "step": 3380, | |
| "train_runtime": 449945.3409, | |
| "train_tokens_per_second": 6789.419 | |
| }, | |
| { | |
| "epoch": 2.8717241379310345, | |
| "grad_norm": 0.9436512733023248, | |
| "learning_rate": 2.8450760834450307e-07, | |
| "loss": 0.5194, | |
| "num_input_tokens_seen": 3059487552, | |
| "step": 3385, | |
| "train_runtime": 450608.7914, | |
| "train_tokens_per_second": 6789.676 | |
| }, | |
| { | |
| "epoch": 2.8759681697612733, | |
| "grad_norm": 0.9504790766629004, | |
| "learning_rate": 2.662487229564525e-07, | |
| "loss": 0.5342, | |
| "num_input_tokens_seen": 3064172992, | |
| "step": 3390, | |
| "train_runtime": 451284.6962, | |
| "train_tokens_per_second": 6789.889 | |
| }, | |
| { | |
| "epoch": 2.8802122015915117, | |
| "grad_norm": 0.9713790898947682, | |
| "learning_rate": 2.485921968194416e-07, | |
| "loss": 0.534, | |
| "num_input_tokens_seen": 3068583616, | |
| "step": 3395, | |
| "train_runtime": 451906.6499, | |
| "train_tokens_per_second": 6790.304 | |
| }, | |
| { | |
| "epoch": 2.8844562334217505, | |
| "grad_norm": 1.0394097264342836, | |
| "learning_rate": 2.3153845993584834e-07, | |
| "loss": 0.516, | |
| "num_input_tokens_seen": 3073191360, | |
| "step": 3400, | |
| "train_runtime": 452591.3839, | |
| "train_tokens_per_second": 6790.212 | |
| }, | |
| { | |
| "epoch": 2.8887002652519893, | |
| "grad_norm": 0.9630302970032499, | |
| "learning_rate": 2.1508792762787723e-07, | |
| "loss": 0.5191, | |
| "num_input_tokens_seen": 3077554560, | |
| "step": 3405, | |
| "train_runtime": 453214.0954, | |
| "train_tokens_per_second": 6790.509 | |
| }, | |
| { | |
| "epoch": 2.892944297082228, | |
| "grad_norm": 1.0072599246388074, | |
| "learning_rate": 1.9924100052745586e-07, | |
| "loss": 0.5081, | |
| "num_input_tokens_seen": 3082016768, | |
| "step": 3410, | |
| "train_runtime": 453848.0797, | |
| "train_tokens_per_second": 6790.856 | |
| }, | |
| { | |
| "epoch": 2.897188328912467, | |
| "grad_norm": 0.8435644898142695, | |
| "learning_rate": 1.8399806456645963e-07, | |
| "loss": 0.5184, | |
| "num_input_tokens_seen": 3086770176, | |
| "step": 3415, | |
| "train_runtime": 454559.6162, | |
| "train_tokens_per_second": 6790.683 | |
| }, | |
| { | |
| "epoch": 2.9014323607427057, | |
| "grad_norm": 1.0235162502387214, | |
| "learning_rate": 1.6935949096733016e-07, | |
| "loss": 0.534, | |
| "num_input_tokens_seen": 3091125184, | |
| "step": 3420, | |
| "train_runtime": 455183.2572, | |
| "train_tokens_per_second": 6790.947 | |
| }, | |
| { | |
| "epoch": 2.9056763925729445, | |
| "grad_norm": 0.9662120266545192, | |
| "learning_rate": 1.5532563623402718e-07, | |
| "loss": 0.526, | |
| "num_input_tokens_seen": 3095550336, | |
| "step": 3425, | |
| "train_runtime": 455803.201, | |
| "train_tokens_per_second": 6791.419 | |
| }, | |
| { | |
| "epoch": 2.909920424403183, | |
| "grad_norm": 0.8700940267517274, | |
| "learning_rate": 1.4189684214334087e-07, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 3100072256, | |
| "step": 3430, | |
| "train_runtime": 456479.466, | |
| "train_tokens_per_second": 6791.263 | |
| }, | |
| { | |
| "epoch": 2.9141644562334217, | |
| "grad_norm": 1.0663457112375756, | |
| "learning_rate": 1.2907343573658194e-07, | |
| "loss": 0.5457, | |
| "num_input_tokens_seen": 3104605376, | |
| "step": 3435, | |
| "train_runtime": 457154.9293, | |
| "train_tokens_per_second": 6791.145 | |
| }, | |
| { | |
| "epoch": 2.9184084880636605, | |
| "grad_norm": 0.8678572276073582, | |
| "learning_rate": 1.1685572931160737e-07, | |
| "loss": 0.5259, | |
| "num_input_tokens_seen": 3109137920, | |
| "step": 3440, | |
| "train_runtime": 457837.7574, | |
| "train_tokens_per_second": 6790.916 | |
| }, | |
| { | |
| "epoch": 2.9226525198938993, | |
| "grad_norm": 1.015827014706817, | |
| "learning_rate": 1.0524402041520997e-07, | |
| "loss": 0.5011, | |
| "num_input_tokens_seen": 3113606784, | |
| "step": 3445, | |
| "train_runtime": 458466.3165, | |
| "train_tokens_per_second": 6791.353 | |
| }, | |
| { | |
| "epoch": 2.926896551724138, | |
| "grad_norm": 1.006376986197294, | |
| "learning_rate": 9.42385918358879e-08, | |
| "loss": 0.5323, | |
| "num_input_tokens_seen": 3118176512, | |
| "step": 3450, | |
| "train_runtime": 459172.3619, | |
| "train_tokens_per_second": 6790.863 | |
| }, | |
| { | |
| "epoch": 2.9311405835543765, | |
| "grad_norm": 0.9168280974190645, | |
| "learning_rate": 8.383971159694193e-08, | |
| "loss": 0.5284, | |
| "num_input_tokens_seen": 3122635072, | |
| "step": 3455, | |
| "train_runtime": 459796.4487, | |
| "train_tokens_per_second": 6791.342 | |
| }, | |
| { | |
| "epoch": 2.9353846153846153, | |
| "grad_norm": 0.8691560318885593, | |
| "learning_rate": 7.404763294995565e-08, | |
| "loss": 0.5378, | |
| "num_input_tokens_seen": 3127211136, | |
| "step": 3460, | |
| "train_runtime": 460443.0378, | |
| "train_tokens_per_second": 6791.744 | |
| }, | |
| { | |
| "epoch": 2.939628647214854, | |
| "grad_norm": 0.8391826158196111, | |
| "learning_rate": 6.486259436863373e-08, | |
| "loss": 0.5057, | |
| "num_input_tokens_seen": 3131755648, | |
| "step": 3465, | |
| "train_runtime": 461119.9271, | |
| "train_tokens_per_second": 6791.629 | |
| }, | |
| { | |
| "epoch": 2.943872679045093, | |
| "grad_norm": 0.9266785431840717, | |
| "learning_rate": 5.628481954297604e-08, | |
| "loss": 0.5041, | |
| "num_input_tokens_seen": 3136402176, | |
| "step": 3470, | |
| "train_runtime": 461787.0454, | |
| "train_tokens_per_second": 6791.88 | |
| }, | |
| { | |
| "epoch": 2.9481167108753317, | |
| "grad_norm": 0.9181454594188112, | |
| "learning_rate": 4.83145173738514e-08, | |
| "loss": 0.5146, | |
| "num_input_tokens_seen": 3140924544, | |
| "step": 3475, | |
| "train_runtime": 462405.4235, | |
| "train_tokens_per_second": 6792.577 | |
| }, | |
| { | |
| "epoch": 2.9523607427055705, | |
| "grad_norm": 0.8349784460725406, | |
| "learning_rate": 4.095188196789057e-08, | |
| "loss": 0.5158, | |
| "num_input_tokens_seen": 3145415872, | |
| "step": 3480, | |
| "train_runtime": 463062.3059, | |
| "train_tokens_per_second": 6792.641 | |
| }, | |
| { | |
| "epoch": 2.9566047745358093, | |
| "grad_norm": 0.9323692378640641, | |
| "learning_rate": 3.419709263277893e-08, | |
| "loss": 0.5251, | |
| "num_input_tokens_seen": 3149924224, | |
| "step": 3485, | |
| "train_runtime": 463701.0926, | |
| "train_tokens_per_second": 6793.006 | |
| }, | |
| { | |
| "epoch": 2.9608488063660476, | |
| "grad_norm": 0.8451251639267238, | |
| "learning_rate": 2.8050313872868273e-08, | |
| "loss": 0.5253, | |
| "num_input_tokens_seen": 3154558400, | |
| "step": 3490, | |
| "train_runtime": 464407.8316, | |
| "train_tokens_per_second": 6792.647 | |
| }, | |
| { | |
| "epoch": 2.9650928381962864, | |
| "grad_norm": 1.0163495151903155, | |
| "learning_rate": 2.251169538518838e-08, | |
| "loss": 0.5481, | |
| "num_input_tokens_seen": 3159207360, | |
| "step": 3495, | |
| "train_runtime": 465073.4609, | |
| "train_tokens_per_second": 6792.921 | |
| }, | |
| { | |
| "epoch": 2.9693368700265252, | |
| "grad_norm": 1.0129744457478043, | |
| "learning_rate": 1.758137205579158e-08, | |
| "loss": 0.5135, | |
| "num_input_tokens_seen": 3163694016, | |
| "step": 3500, | |
| "train_runtime": 465699.5834, | |
| "train_tokens_per_second": 6793.422 | |
| }, | |
| { | |
| "epoch": 2.973580901856764, | |
| "grad_norm": 1.000739655978131, | |
| "learning_rate": 1.3259463956469265e-08, | |
| "loss": 0.5001, | |
| "num_input_tokens_seen": 3168337088, | |
| "step": 3505, | |
| "train_runtime": 466401.7551, | |
| "train_tokens_per_second": 6793.15 | |
| }, | |
| { | |
| "epoch": 2.9778249336870024, | |
| "grad_norm": 0.9167977963170866, | |
| "learning_rate": 9.546076341834798e-09, | |
| "loss": 0.5125, | |
| "num_input_tokens_seen": 3172760384, | |
| "step": 3510, | |
| "train_runtime": 467024.7423, | |
| "train_tokens_per_second": 6793.56 | |
| }, | |
| { | |
| "epoch": 2.982068965517241, | |
| "grad_norm": 0.9752251889997392, | |
| "learning_rate": 6.441299646750554e-09, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 3177405824, | |
| "step": 3515, | |
| "train_runtime": 467725.4913, | |
| "train_tokens_per_second": 6793.313 | |
| }, | |
| { | |
| "epoch": 2.98631299734748, | |
| "grad_norm": 0.9307610509549541, | |
| "learning_rate": 3.945209484124135e-09, | |
| "loss": 0.5254, | |
| "num_input_tokens_seen": 3182049024, | |
| "step": 3520, | |
| "train_runtime": 468383.2029, | |
| "train_tokens_per_second": 6793.687 | |
| }, | |
| { | |
| "epoch": 2.990557029177719, | |
| "grad_norm": 0.9957718574851812, | |
| "learning_rate": 2.0578666430765e-09, | |
| "loss": 0.5124, | |
| "num_input_tokens_seen": 3186550272, | |
| "step": 3525, | |
| "train_runtime": 469031.712, | |
| "train_tokens_per_second": 6793.891 | |
| }, | |
| { | |
| "epoch": 2.9948010610079576, | |
| "grad_norm": 1.1631587185707446, | |
| "learning_rate": 7.793170874625943e-10, | |
| "loss": 0.5197, | |
| "num_input_tokens_seen": 3191082304, | |
| "step": 3530, | |
| "train_runtime": 469677.0947, | |
| "train_tokens_per_second": 6794.205 | |
| }, | |
| { | |
| "epoch": 2.9990450928381964, | |
| "grad_norm": 0.9116914677984228, | |
| "learning_rate": 1.0959195473614348e-10, | |
| "loss": 0.5392, | |
| "num_input_tokens_seen": 3195636736, | |
| "step": 3535, | |
| "train_runtime": 470373.7704, | |
| "train_tokens_per_second": 6793.824 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3537, | |
| "num_input_tokens_seen": 3196694976, | |
| "num_train_epochs": 3, | |
| "save_steps": 1180, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.62813488136192e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |