| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.99486125385406, | |
| "eval_steps": 500, | |
| "global_step": 1215, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0041109969167523125, | |
| "grad_norm": 6.356808310456779, | |
| "learning_rate": 3.278688524590164e-07, | |
| "loss": 0.82, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008221993833504625, | |
| "grad_norm": 6.417518537271285, | |
| "learning_rate": 6.557377049180328e-07, | |
| "loss": 0.8218, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012332990750256937, | |
| "grad_norm": 6.266225501769135, | |
| "learning_rate": 9.836065573770493e-07, | |
| "loss": 0.7983, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01644398766700925, | |
| "grad_norm": 6.059283773388311, | |
| "learning_rate": 1.3114754098360657e-06, | |
| "loss": 0.7971, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.020554984583761562, | |
| "grad_norm": 5.8622860351950585, | |
| "learning_rate": 1.6393442622950819e-06, | |
| "loss": 0.811, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.024665981500513873, | |
| "grad_norm": 5.599946959024431, | |
| "learning_rate": 1.9672131147540985e-06, | |
| "loss": 0.7955, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02877697841726619, | |
| "grad_norm": 4.404619948223283, | |
| "learning_rate": 2.295081967213115e-06, | |
| "loss": 0.7786, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0328879753340185, | |
| "grad_norm": 3.922409083964933, | |
| "learning_rate": 2.6229508196721314e-06, | |
| "loss": 0.7401, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03699897225077081, | |
| "grad_norm": 2.2688697860924765, | |
| "learning_rate": 2.9508196721311478e-06, | |
| "loss": 0.7309, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.041109969167523124, | |
| "grad_norm": 2.066008891071192, | |
| "learning_rate": 3.2786885245901638e-06, | |
| "loss": 0.719, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.045220966084275435, | |
| "grad_norm": 1.9201877271634715, | |
| "learning_rate": 3.6065573770491806e-06, | |
| "loss": 0.7258, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04933196300102775, | |
| "grad_norm": 3.898309985774545, | |
| "learning_rate": 3.934426229508197e-06, | |
| "loss": 0.7205, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05344295991778006, | |
| "grad_norm": 4.0353502189938855, | |
| "learning_rate": 4.2622950819672135e-06, | |
| "loss": 0.7192, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 4.121703890013303, | |
| "learning_rate": 4.59016393442623e-06, | |
| "loss": 0.7244, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06166495375128469, | |
| "grad_norm": 3.928913042484364, | |
| "learning_rate": 4.918032786885246e-06, | |
| "loss": 0.6995, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.065775950668037, | |
| "grad_norm": 3.142521725483005, | |
| "learning_rate": 5.245901639344263e-06, | |
| "loss": 0.6643, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0698869475847893, | |
| "grad_norm": 2.847209107159321, | |
| "learning_rate": 5.573770491803278e-06, | |
| "loss": 0.6531, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07399794450154162, | |
| "grad_norm": 2.2003209693474126, | |
| "learning_rate": 5.9016393442622956e-06, | |
| "loss": 0.66, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07810894141829394, | |
| "grad_norm": 1.336519700469157, | |
| "learning_rate": 6.229508196721312e-06, | |
| "loss": 0.6164, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08221993833504625, | |
| "grad_norm": 1.2400729726657767, | |
| "learning_rate": 6.5573770491803276e-06, | |
| "loss": 0.6071, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08633093525179857, | |
| "grad_norm": 1.4350908797178215, | |
| "learning_rate": 6.885245901639345e-06, | |
| "loss": 0.6066, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09044193216855087, | |
| "grad_norm": 1.4014758782073495, | |
| "learning_rate": 7.213114754098361e-06, | |
| "loss": 0.5995, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09455292908530319, | |
| "grad_norm": 1.1237552991193, | |
| "learning_rate": 7.540983606557377e-06, | |
| "loss": 0.5895, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0986639260020555, | |
| "grad_norm": 0.8497080531691873, | |
| "learning_rate": 7.868852459016394e-06, | |
| "loss": 0.588, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10277492291880781, | |
| "grad_norm": 0.9384127005244138, | |
| "learning_rate": 8.19672131147541e-06, | |
| "loss": 0.5767, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.10688591983556012, | |
| "grad_norm": 0.8190287043666049, | |
| "learning_rate": 8.524590163934427e-06, | |
| "loss": 0.5698, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11099691675231244, | |
| "grad_norm": 0.6808839486547595, | |
| "learning_rate": 8.852459016393443e-06, | |
| "loss": 0.5795, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11510791366906475, | |
| "grad_norm": 0.7939592915072008, | |
| "learning_rate": 9.18032786885246e-06, | |
| "loss": 0.5642, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11921891058581706, | |
| "grad_norm": 0.7673957275771759, | |
| "learning_rate": 9.508196721311476e-06, | |
| "loss": 0.5505, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12332990750256938, | |
| "grad_norm": 0.542256505411903, | |
| "learning_rate": 9.836065573770493e-06, | |
| "loss": 0.5525, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12744090441932168, | |
| "grad_norm": 0.5835090756188929, | |
| "learning_rate": 1.0163934426229509e-05, | |
| "loss": 0.5394, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.131551901336074, | |
| "grad_norm": 2.7254227477748034, | |
| "learning_rate": 1.0491803278688525e-05, | |
| "loss": 0.587, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13566289825282632, | |
| "grad_norm": 0.869479260973271, | |
| "learning_rate": 1.0819672131147544e-05, | |
| "loss": 0.5492, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1397738951695786, | |
| "grad_norm": 0.5177682668145469, | |
| "learning_rate": 1.1147540983606557e-05, | |
| "loss": 0.5325, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14388489208633093, | |
| "grad_norm": 0.511981609333191, | |
| "learning_rate": 1.1475409836065575e-05, | |
| "loss": 0.5486, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.14799588900308325, | |
| "grad_norm": 0.5836498681132752, | |
| "learning_rate": 1.1803278688524591e-05, | |
| "loss": 0.5391, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15210688591983557, | |
| "grad_norm": 0.6259670640713604, | |
| "learning_rate": 1.2131147540983608e-05, | |
| "loss": 0.5342, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.15621788283658788, | |
| "grad_norm": 0.4863673175391185, | |
| "learning_rate": 1.2459016393442624e-05, | |
| "loss": 0.5202, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.16032887975334018, | |
| "grad_norm": 0.46984524173771686, | |
| "learning_rate": 1.2786885245901642e-05, | |
| "loss": 0.5275, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1644398766700925, | |
| "grad_norm": 0.4373961614640998, | |
| "learning_rate": 1.3114754098360655e-05, | |
| "loss": 0.5259, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1685508735868448, | |
| "grad_norm": 0.5382826043776142, | |
| "learning_rate": 1.3442622950819673e-05, | |
| "loss": 0.5289, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.17266187050359713, | |
| "grad_norm": 0.49858273312303336, | |
| "learning_rate": 1.377049180327869e-05, | |
| "loss": 0.5034, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17677286742034942, | |
| "grad_norm": 0.4565898955129958, | |
| "learning_rate": 1.4098360655737706e-05, | |
| "loss": 0.4992, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.18088386433710174, | |
| "grad_norm": 0.4250478862858475, | |
| "learning_rate": 1.4426229508196722e-05, | |
| "loss": 0.5099, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18499486125385406, | |
| "grad_norm": 0.5450828551582885, | |
| "learning_rate": 1.4754098360655739e-05, | |
| "loss": 0.5133, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18910585817060638, | |
| "grad_norm": 0.5082095741416903, | |
| "learning_rate": 1.5081967213114754e-05, | |
| "loss": 0.507, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1932168550873587, | |
| "grad_norm": 0.399668921358076, | |
| "learning_rate": 1.5409836065573772e-05, | |
| "loss": 0.499, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.197327852004111, | |
| "grad_norm": 0.5857374299092792, | |
| "learning_rate": 1.5737704918032788e-05, | |
| "loss": 0.5182, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2014388489208633, | |
| "grad_norm": 0.4337541604951673, | |
| "learning_rate": 1.6065573770491805e-05, | |
| "loss": 0.5048, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.20554984583761562, | |
| "grad_norm": 0.675490041268254, | |
| "learning_rate": 1.639344262295082e-05, | |
| "loss": 0.5091, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20966084275436794, | |
| "grad_norm": 0.44682409800475936, | |
| "learning_rate": 1.6721311475409837e-05, | |
| "loss": 0.4948, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.21377183967112023, | |
| "grad_norm": 0.5243379991172152, | |
| "learning_rate": 1.7049180327868854e-05, | |
| "loss": 0.4959, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.21788283658787255, | |
| "grad_norm": 0.48058870125487607, | |
| "learning_rate": 1.737704918032787e-05, | |
| "loss": 0.493, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.22199383350462487, | |
| "grad_norm": 0.4009755414381969, | |
| "learning_rate": 1.7704918032786887e-05, | |
| "loss": 0.5005, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2261048304213772, | |
| "grad_norm": 0.4487072583979547, | |
| "learning_rate": 1.8032786885245903e-05, | |
| "loss": 0.5, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2302158273381295, | |
| "grad_norm": 0.43431903464010596, | |
| "learning_rate": 1.836065573770492e-05, | |
| "loss": 0.4822, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2343268242548818, | |
| "grad_norm": 0.4223425144399419, | |
| "learning_rate": 1.8688524590163936e-05, | |
| "loss": 0.4884, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23843782117163412, | |
| "grad_norm": 0.40422238831771906, | |
| "learning_rate": 1.9016393442622952e-05, | |
| "loss": 0.5064, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.24254881808838644, | |
| "grad_norm": 0.4353031683109967, | |
| "learning_rate": 1.934426229508197e-05, | |
| "loss": 0.4844, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.24665981500513876, | |
| "grad_norm": 0.5063299442881862, | |
| "learning_rate": 1.9672131147540985e-05, | |
| "loss": 0.4871, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.25077081192189105, | |
| "grad_norm": 0.6405429501414496, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4916, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.25488180883864336, | |
| "grad_norm": 0.7398107392403913, | |
| "learning_rate": 2.0327868852459018e-05, | |
| "loss": 0.4967, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2589928057553957, | |
| "grad_norm": 0.6066259496387154, | |
| "learning_rate": 2.0655737704918034e-05, | |
| "loss": 0.4933, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.263103802672148, | |
| "grad_norm": 0.6888615660905145, | |
| "learning_rate": 2.098360655737705e-05, | |
| "loss": 0.4849, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2672147995889003, | |
| "grad_norm": 0.6046305786161926, | |
| "learning_rate": 2.1311475409836067e-05, | |
| "loss": 0.4997, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.27132579650565264, | |
| "grad_norm": 0.4755750596713722, | |
| "learning_rate": 2.1639344262295087e-05, | |
| "loss": 0.484, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27543679342240496, | |
| "grad_norm": 0.4901884477105443, | |
| "learning_rate": 2.1967213114754104e-05, | |
| "loss": 0.4714, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2795477903391572, | |
| "grad_norm": 0.5180862601664822, | |
| "learning_rate": 2.2295081967213113e-05, | |
| "loss": 0.4743, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.28365878725590954, | |
| "grad_norm": 0.6341799796360953, | |
| "learning_rate": 2.2622950819672133e-05, | |
| "loss": 0.4837, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.7050713511862262, | |
| "learning_rate": 2.295081967213115e-05, | |
| "loss": 0.4732, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2918807810894142, | |
| "grad_norm": 0.46520327730925665, | |
| "learning_rate": 2.3278688524590166e-05, | |
| "loss": 0.4763, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2959917780061665, | |
| "grad_norm": 0.46570649065351716, | |
| "learning_rate": 2.3606557377049182e-05, | |
| "loss": 0.4729, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3001027749229188, | |
| "grad_norm": 0.5435122355995184, | |
| "learning_rate": 2.39344262295082e-05, | |
| "loss": 0.4673, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.30421377183967113, | |
| "grad_norm": 0.7024832057525984, | |
| "learning_rate": 2.4262295081967215e-05, | |
| "loss": 0.4685, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.30832476875642345, | |
| "grad_norm": 0.5982496336902186, | |
| "learning_rate": 2.459016393442623e-05, | |
| "loss": 0.4683, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.31243576567317577, | |
| "grad_norm": 0.5579092038957036, | |
| "learning_rate": 2.4918032786885248e-05, | |
| "loss": 0.4818, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.31654676258992803, | |
| "grad_norm": 0.75454502368708, | |
| "learning_rate": 2.5245901639344264e-05, | |
| "loss": 0.4745, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.32065775950668035, | |
| "grad_norm": 0.9103711158770255, | |
| "learning_rate": 2.5573770491803284e-05, | |
| "loss": 0.4732, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.32476875642343267, | |
| "grad_norm": 0.7230510862281725, | |
| "learning_rate": 2.59016393442623e-05, | |
| "loss": 0.4726, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.328879753340185, | |
| "grad_norm": 0.661725190625586, | |
| "learning_rate": 2.622950819672131e-05, | |
| "loss": 0.4691, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3329907502569373, | |
| "grad_norm": 1.3040040424420736, | |
| "learning_rate": 2.655737704918033e-05, | |
| "loss": 0.4596, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3371017471736896, | |
| "grad_norm": 0.9247546415389841, | |
| "learning_rate": 2.6885245901639346e-05, | |
| "loss": 0.4687, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.34121274409044194, | |
| "grad_norm": 0.6690753031478268, | |
| "learning_rate": 2.7213114754098363e-05, | |
| "loss": 0.475, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.34532374100719426, | |
| "grad_norm": 0.8875577066120585, | |
| "learning_rate": 2.754098360655738e-05, | |
| "loss": 0.4779, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3494347379239466, | |
| "grad_norm": 0.9913820671901682, | |
| "learning_rate": 2.7868852459016396e-05, | |
| "loss": 0.4585, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.35354573484069884, | |
| "grad_norm": 1.0406026178086218, | |
| "learning_rate": 2.8196721311475412e-05, | |
| "loss": 0.4777, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.35765673175745116, | |
| "grad_norm": 0.8568856680996076, | |
| "learning_rate": 2.852459016393443e-05, | |
| "loss": 0.476, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3617677286742035, | |
| "grad_norm": 0.8578450824032388, | |
| "learning_rate": 2.8852459016393445e-05, | |
| "loss": 0.477, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3658787255909558, | |
| "grad_norm": 1.0997371612060205, | |
| "learning_rate": 2.918032786885246e-05, | |
| "loss": 0.4694, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3699897225077081, | |
| "grad_norm": 0.6710185323141514, | |
| "learning_rate": 2.9508196721311478e-05, | |
| "loss": 0.4664, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.37410071942446044, | |
| "grad_norm": 0.8753359919001613, | |
| "learning_rate": 2.9836065573770498e-05, | |
| "loss": 0.4675, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.37821171634121276, | |
| "grad_norm": 0.9060750756011728, | |
| "learning_rate": 3.0163934426229507e-05, | |
| "loss": 0.4577, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3823227132579651, | |
| "grad_norm": 0.8327145117934229, | |
| "learning_rate": 3.0491803278688527e-05, | |
| "loss": 0.4798, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3864337101747174, | |
| "grad_norm": 0.9953249905867948, | |
| "learning_rate": 3.0819672131147544e-05, | |
| "loss": 0.4588, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.39054470709146966, | |
| "grad_norm": 0.790890207442512, | |
| "learning_rate": 3.1147540983606557e-05, | |
| "loss": 0.4813, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.394655704008222, | |
| "grad_norm": 0.7071469844879325, | |
| "learning_rate": 3.1475409836065576e-05, | |
| "loss": 0.4715, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3987667009249743, | |
| "grad_norm": 0.683447139315226, | |
| "learning_rate": 3.180327868852459e-05, | |
| "loss": 0.4568, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4028776978417266, | |
| "grad_norm": 0.6863994738211686, | |
| "learning_rate": 3.213114754098361e-05, | |
| "loss": 0.4516, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.40698869475847893, | |
| "grad_norm": 0.6443321732944037, | |
| "learning_rate": 3.245901639344263e-05, | |
| "loss": 0.4467, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.41109969167523125, | |
| "grad_norm": 0.6015090752114448, | |
| "learning_rate": 3.278688524590164e-05, | |
| "loss": 0.4566, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.41521068859198357, | |
| "grad_norm": 0.648925234921687, | |
| "learning_rate": 3.311475409836066e-05, | |
| "loss": 0.4598, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4193216855087359, | |
| "grad_norm": 0.5740497039935356, | |
| "learning_rate": 3.3442622950819675e-05, | |
| "loss": 0.4514, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4234326824254882, | |
| "grad_norm": 0.7433508320080534, | |
| "learning_rate": 3.3770491803278695e-05, | |
| "loss": 0.4555, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.42754367934224047, | |
| "grad_norm": 0.9786371138605869, | |
| "learning_rate": 3.409836065573771e-05, | |
| "loss": 0.4724, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4316546762589928, | |
| "grad_norm": 1.16381322551552, | |
| "learning_rate": 3.442622950819672e-05, | |
| "loss": 0.4665, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4357656731757451, | |
| "grad_norm": 0.7033574666436274, | |
| "learning_rate": 3.475409836065574e-05, | |
| "loss": 0.4741, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4398766700924974, | |
| "grad_norm": 1.256476593209221, | |
| "learning_rate": 3.5081967213114754e-05, | |
| "loss": 0.476, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.44398766700924974, | |
| "grad_norm": 0.5933957475473355, | |
| "learning_rate": 3.5409836065573773e-05, | |
| "loss": 0.4653, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.44809866392600206, | |
| "grad_norm": 1.025564753787377, | |
| "learning_rate": 3.5737704918032786e-05, | |
| "loss": 0.47, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.4522096608427544, | |
| "grad_norm": 1.0088674998209484, | |
| "learning_rate": 3.6065573770491806e-05, | |
| "loss": 0.4681, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4563206577595067, | |
| "grad_norm": 0.9216004942062503, | |
| "learning_rate": 3.6393442622950826e-05, | |
| "loss": 0.4546, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.460431654676259, | |
| "grad_norm": 1.1054709646558805, | |
| "learning_rate": 3.672131147540984e-05, | |
| "loss": 0.4669, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4645426515930113, | |
| "grad_norm": 0.6642218594282759, | |
| "learning_rate": 3.704918032786886e-05, | |
| "loss": 0.4533, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.4686536485097636, | |
| "grad_norm": 0.8356269646157981, | |
| "learning_rate": 3.737704918032787e-05, | |
| "loss": 0.4599, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4727646454265159, | |
| "grad_norm": 1.1650429141300205, | |
| "learning_rate": 3.770491803278689e-05, | |
| "loss": 0.448, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.47687564234326824, | |
| "grad_norm": 0.6212175962293394, | |
| "learning_rate": 3.8032786885245905e-05, | |
| "loss": 0.4638, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.48098663926002055, | |
| "grad_norm": 1.1965895951813037, | |
| "learning_rate": 3.836065573770492e-05, | |
| "loss": 0.4619, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.4850976361767729, | |
| "grad_norm": 0.8457976781943612, | |
| "learning_rate": 3.868852459016394e-05, | |
| "loss": 0.4555, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.4892086330935252, | |
| "grad_norm": 0.8463559301031214, | |
| "learning_rate": 3.901639344262295e-05, | |
| "loss": 0.4713, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4933196300102775, | |
| "grad_norm": 0.6654609587793014, | |
| "learning_rate": 3.934426229508197e-05, | |
| "loss": 0.4461, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.49743062692702983, | |
| "grad_norm": 0.7698090467763701, | |
| "learning_rate": 3.9672131147540983e-05, | |
| "loss": 0.4627, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5015416238437821, | |
| "grad_norm": 0.5716155461137187, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4576, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5056526207605344, | |
| "grad_norm": 0.5151376433722467, | |
| "learning_rate": 3.999991738495905e-05, | |
| "loss": 0.4485, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5097636176772867, | |
| "grad_norm": 0.561427237450996, | |
| "learning_rate": 3.9999669540518704e-05, | |
| "loss": 0.454, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.513874614594039, | |
| "grad_norm": 0.6553799163893537, | |
| "learning_rate": 3.999925646872655e-05, | |
| "loss": 0.4523, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5179856115107914, | |
| "grad_norm": 0.7909652053854684, | |
| "learning_rate": 3.9998678172995157e-05, | |
| "loss": 0.4544, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5220966084275437, | |
| "grad_norm": 0.8388721187199466, | |
| "learning_rate": 3.999793465810214e-05, | |
| "loss": 0.4408, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.526207605344296, | |
| "grad_norm": 0.7373151231076792, | |
| "learning_rate": 3.999702593019004e-05, | |
| "loss": 0.4596, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5303186022610483, | |
| "grad_norm": 0.9546826007376602, | |
| "learning_rate": 3.9995951996766316e-05, | |
| "loss": 0.459, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5344295991778006, | |
| "grad_norm": 0.9027549638128062, | |
| "learning_rate": 3.999471286670328e-05, | |
| "loss": 0.4537, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.538540596094553, | |
| "grad_norm": 1.0477189023005884, | |
| "learning_rate": 3.9993308550238e-05, | |
| "loss": 0.4455, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5426515930113053, | |
| "grad_norm": 1.1744648733550076, | |
| "learning_rate": 3.999173905897226e-05, | |
| "loss": 0.4579, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5467625899280576, | |
| "grad_norm": 0.7693181453420259, | |
| "learning_rate": 3.99900044058724e-05, | |
| "loss": 0.4381, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5508735868448099, | |
| "grad_norm": 0.8932998819929917, | |
| "learning_rate": 3.998810460526927e-05, | |
| "loss": 0.4663, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5549845837615622, | |
| "grad_norm": 1.1927529473551686, | |
| "learning_rate": 3.998603967285808e-05, | |
| "loss": 0.456, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5590955806783144, | |
| "grad_norm": 0.7303627266940724, | |
| "learning_rate": 3.998380962569828e-05, | |
| "loss": 0.463, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5632065775950668, | |
| "grad_norm": 1.211411577139644, | |
| "learning_rate": 3.9981414482213405e-05, | |
| "loss": 0.4649, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5673175745118191, | |
| "grad_norm": 0.8811081756810052, | |
| "learning_rate": 3.997885426219096e-05, | |
| "loss": 0.4637, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.8713984959209414, | |
| "learning_rate": 3.99761289867822e-05, | |
| "loss": 0.4546, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.9788586712835476, | |
| "learning_rate": 3.9973238678501996e-05, | |
| "loss": 0.4475, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.579650565262076, | |
| "grad_norm": 0.7533682906447463, | |
| "learning_rate": 3.997018336122866e-05, | |
| "loss": 0.4428, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5837615621788284, | |
| "grad_norm": 0.7461312001689725, | |
| "learning_rate": 3.9966963060203684e-05, | |
| "loss": 0.4494, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5878725590955807, | |
| "grad_norm": 0.6174187621354268, | |
| "learning_rate": 3.996357780203161e-05, | |
| "loss": 0.4504, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.591983556012333, | |
| "grad_norm": 0.49003513931944637, | |
| "learning_rate": 3.9960027614679766e-05, | |
| "loss": 0.4427, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5960945529290853, | |
| "grad_norm": 0.5605653135696967, | |
| "learning_rate": 3.995631252747804e-05, | |
| "loss": 0.448, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6002055498458376, | |
| "grad_norm": 0.44488928569515496, | |
| "learning_rate": 3.9952432571118634e-05, | |
| "loss": 0.4467, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.60431654676259, | |
| "grad_norm": 0.5026250568866101, | |
| "learning_rate": 3.994838777765582e-05, | |
| "loss": 0.4477, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6084275436793423, | |
| "grad_norm": 0.5172580934087974, | |
| "learning_rate": 3.9944178180505685e-05, | |
| "loss": 0.4523, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6125385405960946, | |
| "grad_norm": 0.42653534399915305, | |
| "learning_rate": 3.993980381444583e-05, | |
| "loss": 0.4461, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6166495375128469, | |
| "grad_norm": 0.5471101554510113, | |
| "learning_rate": 3.993526471561509e-05, | |
| "loss": 0.4434, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6207605344295992, | |
| "grad_norm": 0.44006741961102114, | |
| "learning_rate": 3.993056092151326e-05, | |
| "loss": 0.4433, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6248715313463515, | |
| "grad_norm": 0.54547408589461, | |
| "learning_rate": 3.9925692471000755e-05, | |
| "loss": 0.4512, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6289825282631039, | |
| "grad_norm": 0.5069055596939723, | |
| "learning_rate": 3.9920659404298285e-05, | |
| "loss": 0.4407, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6330935251798561, | |
| "grad_norm": 0.4361985691247893, | |
| "learning_rate": 3.991546176298657e-05, | |
| "loss": 0.443, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6372045220966084, | |
| "grad_norm": 0.6492040966314581, | |
| "learning_rate": 3.991009959000593e-05, | |
| "loss": 0.4643, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6413155190133607, | |
| "grad_norm": 0.662599709889411, | |
| "learning_rate": 3.990457292965598e-05, | |
| "loss": 0.4421, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.645426515930113, | |
| "grad_norm": 0.7496440633893157, | |
| "learning_rate": 3.9898881827595255e-05, | |
| "loss": 0.4475, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6495375128468653, | |
| "grad_norm": 0.6565888966700488, | |
| "learning_rate": 3.989302633084081e-05, | |
| "loss": 0.4478, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6536485097636177, | |
| "grad_norm": 0.4914282437943478, | |
| "learning_rate": 3.988700648776786e-05, | |
| "loss": 0.4485, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.65775950668037, | |
| "grad_norm": 0.5765645279846272, | |
| "learning_rate": 3.9880822348109365e-05, | |
| "loss": 0.4406, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6618705035971223, | |
| "grad_norm": 0.54508458239329, | |
| "learning_rate": 3.9874473962955625e-05, | |
| "loss": 0.4377, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6659815005138746, | |
| "grad_norm": 0.5467537029004771, | |
| "learning_rate": 3.986796138475383e-05, | |
| "loss": 0.4404, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6700924974306269, | |
| "grad_norm": 0.5927993483965615, | |
| "learning_rate": 3.986128466730769e-05, | |
| "loss": 0.4397, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6742034943473793, | |
| "grad_norm": 0.549384325928676, | |
| "learning_rate": 3.985444386577693e-05, | |
| "loss": 0.4447, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6783144912641316, | |
| "grad_norm": 0.7853091838958088, | |
| "learning_rate": 3.984743903667685e-05, | |
| "loss": 0.4323, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6824254881808839, | |
| "grad_norm": 0.7452346665717267, | |
| "learning_rate": 3.984027023787789e-05, | |
| "loss": 0.4418, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6865364850976362, | |
| "grad_norm": 0.6354950658254404, | |
| "learning_rate": 3.98329375286051e-05, | |
| "loss": 0.4462, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6906474820143885, | |
| "grad_norm": 0.6039082662120046, | |
| "learning_rate": 3.982544096943769e-05, | |
| "loss": 0.4387, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6947584789311408, | |
| "grad_norm": 0.658037323729766, | |
| "learning_rate": 3.9817780622308515e-05, | |
| "loss": 0.4442, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6988694758478932, | |
| "grad_norm": 0.4757882993447632, | |
| "learning_rate": 3.980995655050356e-05, | |
| "loss": 0.4432, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7029804727646455, | |
| "grad_norm": 0.44532932025468364, | |
| "learning_rate": 3.980196881866143e-05, | |
| "loss": 0.4414, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7070914696813977, | |
| "grad_norm": 0.45179948654666446, | |
| "learning_rate": 3.9793817492772806e-05, | |
| "loss": 0.4509, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.71120246659815, | |
| "grad_norm": 0.4699683428704349, | |
| "learning_rate": 3.9785502640179905e-05, | |
| "loss": 0.4278, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7153134635149023, | |
| "grad_norm": 0.5201025709025265, | |
| "learning_rate": 3.97770243295759e-05, | |
| "loss": 0.4335, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7194244604316546, | |
| "grad_norm": 0.47266696898911464, | |
| "learning_rate": 3.9768382631004405e-05, | |
| "loss": 0.4501, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.723535457348407, | |
| "grad_norm": 0.4422465802779614, | |
| "learning_rate": 3.975957761585883e-05, | |
| "loss": 0.446, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7276464542651593, | |
| "grad_norm": 0.5391358485913682, | |
| "learning_rate": 3.9750609356881865e-05, | |
| "loss": 0.4512, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7317574511819116, | |
| "grad_norm": 0.46555411315299916, | |
| "learning_rate": 3.974147792816481e-05, | |
| "loss": 0.4374, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7358684480986639, | |
| "grad_norm": 0.5347959929577083, | |
| "learning_rate": 3.9732183405146984e-05, | |
| "loss": 0.4368, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7399794450154162, | |
| "grad_norm": 0.5543063250157177, | |
| "learning_rate": 3.9722725864615156e-05, | |
| "loss": 0.4468, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7440904419321686, | |
| "grad_norm": 0.39521704775907723, | |
| "learning_rate": 3.971310538470282e-05, | |
| "loss": 0.4338, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7482014388489209, | |
| "grad_norm": 0.47237629534672426, | |
| "learning_rate": 3.9703322044889605e-05, | |
| "loss": 0.4369, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7523124357656732, | |
| "grad_norm": 0.434146415819749, | |
| "learning_rate": 3.969337592600062e-05, | |
| "loss": 0.4458, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7564234326824255, | |
| "grad_norm": 0.38836391572812273, | |
| "learning_rate": 3.968326711020578e-05, | |
| "loss": 0.4546, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7605344295991778, | |
| "grad_norm": 0.34969919974995534, | |
| "learning_rate": 3.967299568101908e-05, | |
| "loss": 0.4459, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7646454265159301, | |
| "grad_norm": 0.41064464728289324, | |
| "learning_rate": 3.9662561723298e-05, | |
| "loss": 0.4326, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7687564234326825, | |
| "grad_norm": 0.4616126051202659, | |
| "learning_rate": 3.9651965323242704e-05, | |
| "loss": 0.4492, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7728674203494348, | |
| "grad_norm": 0.49195669527847435, | |
| "learning_rate": 3.964120656839541e-05, | |
| "loss": 0.4276, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7769784172661871, | |
| "grad_norm": 0.3807633073682157, | |
| "learning_rate": 3.963028554763961e-05, | |
| "loss": 0.4428, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7810894141829393, | |
| "grad_norm": 0.3811255626131261, | |
| "learning_rate": 3.9619202351199356e-05, | |
| "loss": 0.4337, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7852004110996916, | |
| "grad_norm": 0.3612639948436137, | |
| "learning_rate": 3.960795707063852e-05, | |
| "loss": 0.4363, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.789311408016444, | |
| "grad_norm": 0.4353027404982674, | |
| "learning_rate": 3.959654979886005e-05, | |
| "loss": 0.4365, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7934224049331963, | |
| "grad_norm": 0.37923924344854587, | |
| "learning_rate": 3.958498063010516e-05, | |
| "loss": 0.4277, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.7975334018499486, | |
| "grad_norm": 0.49016416134919827, | |
| "learning_rate": 3.957324965995257e-05, | |
| "loss": 0.4189, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8016443987667009, | |
| "grad_norm": 0.3808642318097945, | |
| "learning_rate": 3.956135698531777e-05, | |
| "loss": 0.428, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8057553956834532, | |
| "grad_norm": 0.4706420424359872, | |
| "learning_rate": 3.9549302704452104e-05, | |
| "loss": 0.4355, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8098663926002055, | |
| "grad_norm": 0.5558683512038307, | |
| "learning_rate": 3.953708691694208e-05, | |
| "loss": 0.4219, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8139773895169579, | |
| "grad_norm": 0.5188467581658631, | |
| "learning_rate": 3.952470972370848e-05, | |
| "loss": 0.4369, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8180883864337102, | |
| "grad_norm": 0.4485136574531589, | |
| "learning_rate": 3.951217122700554e-05, | |
| "loss": 0.4206, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8221993833504625, | |
| "grad_norm": 0.4872982826961068, | |
| "learning_rate": 3.9499471530420086e-05, | |
| "loss": 0.4434, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8263103802672148, | |
| "grad_norm": 0.5704413227159343, | |
| "learning_rate": 3.9486610738870726e-05, | |
| "loss": 0.4332, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8304213771839671, | |
| "grad_norm": 0.6576571504037381, | |
| "learning_rate": 3.947358895860693e-05, | |
| "loss": 0.4282, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8345323741007195, | |
| "grad_norm": 0.5236083635603117, | |
| "learning_rate": 3.9460406297208204e-05, | |
| "loss": 0.4418, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8386433710174718, | |
| "grad_norm": 0.4856398721711883, | |
| "learning_rate": 3.944706286358315e-05, | |
| "loss": 0.4446, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8427543679342241, | |
| "grad_norm": 0.553946219409764, | |
| "learning_rate": 3.94335587679686e-05, | |
| "loss": 0.4421, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8468653648509764, | |
| "grad_norm": 0.559411380541318, | |
| "learning_rate": 3.94198941219287e-05, | |
| "loss": 0.4628, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8509763617677287, | |
| "grad_norm": 0.4879763317857753, | |
| "learning_rate": 3.940606903835398e-05, | |
| "loss": 0.442, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8550873586844809, | |
| "grad_norm": 0.5054384831570833, | |
| "learning_rate": 3.939208363146041e-05, | |
| "loss": 0.4262, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8591983556012333, | |
| "grad_norm": 0.5553954849786898, | |
| "learning_rate": 3.937793801678851e-05, | |
| "loss": 0.427, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.5872415310529557, | |
| "learning_rate": 3.936363231120231e-05, | |
| "loss": 0.4413, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8674203494347379, | |
| "grad_norm": 0.5889656491169154, | |
| "learning_rate": 3.934916663288847e-05, | |
| "loss": 0.4374, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8715313463514902, | |
| "grad_norm": 0.5289928076892064, | |
| "learning_rate": 3.9334541101355244e-05, | |
| "loss": 0.4393, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8756423432682425, | |
| "grad_norm": 0.5133117516646354, | |
| "learning_rate": 3.931975583743152e-05, | |
| "loss": 0.4207, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8797533401849948, | |
| "grad_norm": 0.48037331045870174, | |
| "learning_rate": 3.930481096326583e-05, | |
| "loss": 0.4175, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8838643371017472, | |
| "grad_norm": 0.5410217736097758, | |
| "learning_rate": 3.92897066023253e-05, | |
| "loss": 0.431, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.8879753340184995, | |
| "grad_norm": 0.41649001377169803, | |
| "learning_rate": 3.927444287939467e-05, | |
| "loss": 0.4484, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8920863309352518, | |
| "grad_norm": 0.45628332224884727, | |
| "learning_rate": 3.925901992057525e-05, | |
| "loss": 0.4305, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.8961973278520041, | |
| "grad_norm": 0.5227314578776049, | |
| "learning_rate": 3.924343785328388e-05, | |
| "loss": 0.4393, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9003083247687564, | |
| "grad_norm": 0.4530459458277021, | |
| "learning_rate": 3.9227696806251875e-05, | |
| "loss": 0.4382, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9044193216855088, | |
| "grad_norm": 0.4488315318208515, | |
| "learning_rate": 3.9211796909523953e-05, | |
| "loss": 0.4209, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9085303186022611, | |
| "grad_norm": 0.4369045769060924, | |
| "learning_rate": 3.9195738294457186e-05, | |
| "loss": 0.4357, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9126413155190134, | |
| "grad_norm": 0.3980678441937295, | |
| "learning_rate": 3.9179521093719876e-05, | |
| "loss": 0.4142, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9167523124357657, | |
| "grad_norm": 0.5003747978502763, | |
| "learning_rate": 3.91631454412905e-05, | |
| "loss": 0.4484, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.920863309352518, | |
| "grad_norm": 0.43942976248272747, | |
| "learning_rate": 3.914661147245657e-05, | |
| "loss": 0.434, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9249743062692704, | |
| "grad_norm": 0.4174753367400882, | |
| "learning_rate": 3.912991932381355e-05, | |
| "loss": 0.4282, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9290853031860226, | |
| "grad_norm": 0.36920457252907907, | |
| "learning_rate": 3.91130691332637e-05, | |
| "loss": 0.4347, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9331963001027749, | |
| "grad_norm": 0.45392411540078437, | |
| "learning_rate": 3.9096061040014914e-05, | |
| "loss": 0.4135, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9373072970195272, | |
| "grad_norm": 0.4203872157822759, | |
| "learning_rate": 3.907889518457964e-05, | |
| "loss": 0.4422, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9414182939362795, | |
| "grad_norm": 0.391547280290097, | |
| "learning_rate": 3.9061571708773656e-05, | |
| "loss": 0.428, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9455292908530318, | |
| "grad_norm": 0.5746907556468481, | |
| "learning_rate": 3.9044090755714935e-05, | |
| "loss": 0.4273, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9496402877697842, | |
| "grad_norm": 0.5021218433821051, | |
| "learning_rate": 3.9026452469822435e-05, | |
| "loss": 0.4318, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.9537512846865365, | |
| "grad_norm": 0.5118619524543895, | |
| "learning_rate": 3.900865699681494e-05, | |
| "loss": 0.4565, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9578622816032888, | |
| "grad_norm": 0.4269764449835691, | |
| "learning_rate": 3.899070448370981e-05, | |
| "loss": 0.4242, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9619732785200411, | |
| "grad_norm": 0.544830400097823, | |
| "learning_rate": 3.897259507882181e-05, | |
| "loss": 0.4308, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9660842754367934, | |
| "grad_norm": 0.5029148596149111, | |
| "learning_rate": 3.895432893176186e-05, | |
| "loss": 0.4283, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.9701952723535457, | |
| "grad_norm": 0.6013585416586662, | |
| "learning_rate": 3.8935906193435814e-05, | |
| "loss": 0.4231, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9743062692702981, | |
| "grad_norm": 0.501625593569375, | |
| "learning_rate": 3.89173270160432e-05, | |
| "loss": 0.4335, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "grad_norm": 0.6586654174152249, | |
| "learning_rate": 3.889859155307596e-05, | |
| "loss": 0.4365, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9825282631038027, | |
| "grad_norm": 0.6491985191825143, | |
| "learning_rate": 3.8879699959317204e-05, | |
| "loss": 0.428, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.986639260020555, | |
| "grad_norm": 0.36412537479982626, | |
| "learning_rate": 3.8860652390839915e-05, | |
| "loss": 0.4258, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9907502569373073, | |
| "grad_norm": 0.562496507066076, | |
| "learning_rate": 3.884144900500565e-05, | |
| "loss": 0.4352, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.9948612538540597, | |
| "grad_norm": 0.4838097185277804, | |
| "learning_rate": 3.882208996046327e-05, | |
| "loss": 0.4422, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.998972250770812, | |
| "grad_norm": 0.4422917967441169, | |
| "learning_rate": 3.880257541714759e-05, | |
| "loss": 0.4273, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.0030832476875642, | |
| "grad_norm": 0.5334045773924255, | |
| "learning_rate": 3.878290553627809e-05, | |
| "loss": 0.3969, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.0071942446043165, | |
| "grad_norm": 0.5378794632121926, | |
| "learning_rate": 3.876308048035758e-05, | |
| "loss": 0.3903, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0113052415210688, | |
| "grad_norm": 0.5944247982125659, | |
| "learning_rate": 3.874310041317084e-05, | |
| "loss": 0.3866, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0154162384378211, | |
| "grad_norm": 0.5659631885785738, | |
| "learning_rate": 3.8722965499783265e-05, | |
| "loss": 0.3859, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0195272353545735, | |
| "grad_norm": 0.6678922530928978, | |
| "learning_rate": 3.8702675906539536e-05, | |
| "loss": 0.3975, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0236382322713258, | |
| "grad_norm": 0.6092071387321932, | |
| "learning_rate": 3.868223180106221e-05, | |
| "loss": 0.3805, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.027749229188078, | |
| "grad_norm": 0.48801873476109786, | |
| "learning_rate": 3.866163335225034e-05, | |
| "loss": 0.3924, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0318602261048304, | |
| "grad_norm": 0.5338205820825612, | |
| "learning_rate": 3.8640880730278105e-05, | |
| "loss": 0.4015, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.0359712230215827, | |
| "grad_norm": 0.47770709705325853, | |
| "learning_rate": 3.8619974106593365e-05, | |
| "loss": 0.3979, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.040082219938335, | |
| "grad_norm": 0.6103179105115757, | |
| "learning_rate": 3.859891365391628e-05, | |
| "loss": 0.388, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.0441932168550874, | |
| "grad_norm": 0.5427245439232725, | |
| "learning_rate": 3.8577699546237886e-05, | |
| "loss": 0.3811, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0483042137718397, | |
| "grad_norm": 0.532814479999278, | |
| "learning_rate": 3.8556331958818596e-05, | |
| "loss": 0.3872, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.052415210688592, | |
| "grad_norm": 0.5339131788688589, | |
| "learning_rate": 3.853481106818683e-05, | |
| "loss": 0.3914, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0565262076053443, | |
| "grad_norm": 0.5060398381577083, | |
| "learning_rate": 3.851313705213751e-05, | |
| "loss": 0.3876, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.0606372045220966, | |
| "grad_norm": 0.562896010283109, | |
| "learning_rate": 3.8491310089730614e-05, | |
| "loss": 0.3946, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.064748201438849, | |
| "grad_norm": 0.40638744368816154, | |
| "learning_rate": 3.846933036128968e-05, | |
| "loss": 0.3809, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.0688591983556013, | |
| "grad_norm": 0.5821981657729004, | |
| "learning_rate": 3.8447198048400325e-05, | |
| "loss": 0.4041, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0729701952723536, | |
| "grad_norm": 0.5613586250111681, | |
| "learning_rate": 3.8424913333908744e-05, | |
| "loss": 0.3834, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.077081192189106, | |
| "grad_norm": 0.5210705488884988, | |
| "learning_rate": 3.840247640192019e-05, | |
| "loss": 0.4053, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.0811921891058582, | |
| "grad_norm": 0.5223026076271566, | |
| "learning_rate": 3.837988743779747e-05, | |
| "loss": 0.4057, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.0853031860226106, | |
| "grad_norm": 0.40771864088972815, | |
| "learning_rate": 3.8357146628159415e-05, | |
| "loss": 0.3759, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0894141829393629, | |
| "grad_norm": 0.5106166010616134, | |
| "learning_rate": 3.8334254160879296e-05, | |
| "loss": 0.3927, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.0935251798561152, | |
| "grad_norm": 0.39265167645057447, | |
| "learning_rate": 3.8311210225083347e-05, | |
| "loss": 0.3772, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0976361767728675, | |
| "grad_norm": 0.5406659317819649, | |
| "learning_rate": 3.8288015011149126e-05, | |
| "loss": 0.3877, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.1017471736896198, | |
| "grad_norm": 0.4396566495996877, | |
| "learning_rate": 3.826466871070399e-05, | |
| "loss": 0.3919, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1058581706063721, | |
| "grad_norm": 0.4071828889239751, | |
| "learning_rate": 3.82411715166235e-05, | |
| "loss": 0.3929, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1099691675231242, | |
| "grad_norm": 0.4408302571199858, | |
| "learning_rate": 3.821752362302982e-05, | |
| "loss": 0.3984, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1140801644398768, | |
| "grad_norm": 0.3944864874139757, | |
| "learning_rate": 3.8193725225290105e-05, | |
| "loss": 0.3791, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.1181911613566289, | |
| "grad_norm": 0.5086637552588018, | |
| "learning_rate": 3.8169776520014935e-05, | |
| "loss": 0.3981, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1223021582733812, | |
| "grad_norm": 0.37495702811326503, | |
| "learning_rate": 3.814567770505663e-05, | |
| "loss": 0.399, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.1264131551901335, | |
| "grad_norm": 0.5606532197558952, | |
| "learning_rate": 3.812142897950765e-05, | |
| "loss": 0.3919, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1305241521068858, | |
| "grad_norm": 0.5021460420776965, | |
| "learning_rate": 3.809703054369893e-05, | |
| "loss": 0.3884, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.1346351490236382, | |
| "grad_norm": 0.42349030253760284, | |
| "learning_rate": 3.807248259919826e-05, | |
| "loss": 0.3834, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1387461459403905, | |
| "grad_norm": 0.4379650832741319, | |
| "learning_rate": 3.804778534880858e-05, | |
| "loss": 0.3907, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.49289600771412606, | |
| "learning_rate": 3.802293899656632e-05, | |
| "loss": 0.3813, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1469681397738951, | |
| "grad_norm": 0.42490353928620167, | |
| "learning_rate": 3.7997943747739735e-05, | |
| "loss": 0.3853, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.1510791366906474, | |
| "grad_norm": 0.518693269655709, | |
| "learning_rate": 3.797279980882716e-05, | |
| "loss": 0.3982, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1551901336073997, | |
| "grad_norm": 0.43881434866880253, | |
| "learning_rate": 3.794750738755536e-05, | |
| "loss": 0.3926, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.159301130524152, | |
| "grad_norm": 0.43485781066948115, | |
| "learning_rate": 3.792206669287776e-05, | |
| "loss": 0.3922, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.1634121274409044, | |
| "grad_norm": 0.45793373109478, | |
| "learning_rate": 3.789647793497279e-05, | |
| "loss": 0.3949, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.1675231243576567, | |
| "grad_norm": 0.4344299254818994, | |
| "learning_rate": 3.787074132524206e-05, | |
| "loss": 0.3895, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.171634121274409, | |
| "grad_norm": 0.44080712302633035, | |
| "learning_rate": 3.784485707630868e-05, | |
| "loss": 0.3914, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.1757451181911613, | |
| "grad_norm": 0.37695184690701744, | |
| "learning_rate": 3.781882540201547e-05, | |
| "loss": 0.3875, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1798561151079137, | |
| "grad_norm": 0.45884161347743313, | |
| "learning_rate": 3.7792646517423236e-05, | |
| "loss": 0.3744, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.183967112024666, | |
| "grad_norm": 0.4017320092584037, | |
| "learning_rate": 3.7766320638808924e-05, | |
| "loss": 0.3922, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.1880781089414183, | |
| "grad_norm": 0.4280615463958759, | |
| "learning_rate": 3.773984798366389e-05, | |
| "loss": 0.4006, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.1921891058581706, | |
| "grad_norm": 0.3513007234774324, | |
| "learning_rate": 3.7713228770692084e-05, | |
| "loss": 0.3819, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.196300102774923, | |
| "grad_norm": 0.4230975793009198, | |
| "learning_rate": 3.768646321980824e-05, | |
| "loss": 0.3819, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.2004110996916753, | |
| "grad_norm": 0.38816726480644864, | |
| "learning_rate": 3.765955155213607e-05, | |
| "loss": 0.391, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2045220966084276, | |
| "grad_norm": 0.41570862488108373, | |
| "learning_rate": 3.763249399000643e-05, | |
| "loss": 0.3879, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.20863309352518, | |
| "grad_norm": 0.5386712701377521, | |
| "learning_rate": 3.7605290756955476e-05, | |
| "loss": 0.404, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.2127440904419322, | |
| "grad_norm": 0.4142838391786987, | |
| "learning_rate": 3.757794207772283e-05, | |
| "loss": 0.394, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.2168550873586845, | |
| "grad_norm": 0.3918702689073396, | |
| "learning_rate": 3.755044817824971e-05, | |
| "loss": 0.3833, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2209660842754368, | |
| "grad_norm": 0.4790045020269064, | |
| "learning_rate": 3.752280928567709e-05, | |
| "loss": 0.3827, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.2250770811921892, | |
| "grad_norm": 0.4071852105252518, | |
| "learning_rate": 3.749502562834379e-05, | |
| "loss": 0.3972, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.2291880781089415, | |
| "grad_norm": 0.433522303940447, | |
| "learning_rate": 3.746709743578462e-05, | |
| "loss": 0.3985, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.2332990750256938, | |
| "grad_norm": 0.4222484903892338, | |
| "learning_rate": 3.7439024938728435e-05, | |
| "loss": 0.384, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2374100719424461, | |
| "grad_norm": 0.42821966368019687, | |
| "learning_rate": 3.74108083690963e-05, | |
| "loss": 0.3908, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.2415210688591984, | |
| "grad_norm": 0.5269787553817297, | |
| "learning_rate": 3.7382447959999514e-05, | |
| "loss": 0.3869, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2456320657759508, | |
| "grad_norm": 0.4206960432187445, | |
| "learning_rate": 3.7353943945737716e-05, | |
| "loss": 0.3984, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.249743062692703, | |
| "grad_norm": 0.3963715348953228, | |
| "learning_rate": 3.7325296561796936e-05, | |
| "loss": 0.3908, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.2538540596094552, | |
| "grad_norm": 0.5197873707406762, | |
| "learning_rate": 3.729650604484766e-05, | |
| "loss": 0.3789, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.2579650565262077, | |
| "grad_norm": 0.391975059464178, | |
| "learning_rate": 3.7267572632742846e-05, | |
| "loss": 0.39, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.2620760534429598, | |
| "grad_norm": 0.4297553917483092, | |
| "learning_rate": 3.7238496564516006e-05, | |
| "loss": 0.398, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.2661870503597124, | |
| "grad_norm": 0.32415884735671224, | |
| "learning_rate": 3.720927808037921e-05, | |
| "loss": 0.385, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2702980472764644, | |
| "grad_norm": 0.4050462187721075, | |
| "learning_rate": 3.717991742172106e-05, | |
| "loss": 0.3801, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.274409044193217, | |
| "grad_norm": 0.44040991415716113, | |
| "learning_rate": 3.7150414831104765e-05, | |
| "loss": 0.3936, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.278520041109969, | |
| "grad_norm": 0.4117947843277416, | |
| "learning_rate": 3.712077055226611e-05, | |
| "loss": 0.3966, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.2826310380267214, | |
| "grad_norm": 0.4039039643321521, | |
| "learning_rate": 3.7090984830111415e-05, | |
| "loss": 0.3863, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.2867420349434737, | |
| "grad_norm": 0.39088426091872597, | |
| "learning_rate": 3.7061057910715546e-05, | |
| "loss": 0.4019, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.290853031860226, | |
| "grad_norm": 0.3364663722128402, | |
| "learning_rate": 3.703099004131988e-05, | |
| "loss": 0.389, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.2949640287769784, | |
| "grad_norm": 0.396387529395801, | |
| "learning_rate": 3.700078147033023e-05, | |
| "loss": 0.3826, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.2990750256937307, | |
| "grad_norm": 0.4034497950317108, | |
| "learning_rate": 3.697043244731484e-05, | |
| "loss": 0.387, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.303186022610483, | |
| "grad_norm": 0.45567545271356036, | |
| "learning_rate": 3.693994322300228e-05, | |
| "loss": 0.3903, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.3072970195272353, | |
| "grad_norm": 0.36949826512347733, | |
| "learning_rate": 3.69093140492794e-05, | |
| "loss": 0.3907, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3114080164439876, | |
| "grad_norm": 0.3907383409192243, | |
| "learning_rate": 3.687854517918926e-05, | |
| "loss": 0.3884, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.31551901336074, | |
| "grad_norm": 0.400771927655429, | |
| "learning_rate": 3.684763686692898e-05, | |
| "loss": 0.3897, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3196300102774923, | |
| "grad_norm": 0.28349821982969425, | |
| "learning_rate": 3.681658936784773e-05, | |
| "loss": 0.3819, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.3237410071942446, | |
| "grad_norm": 0.3707057575475429, | |
| "learning_rate": 3.678540293844455e-05, | |
| "loss": 0.4029, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.327852004110997, | |
| "grad_norm": 0.3571877683162145, | |
| "learning_rate": 3.675407783636624e-05, | |
| "loss": 0.3888, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.3319630010277492, | |
| "grad_norm": 0.35550987334717343, | |
| "learning_rate": 3.672261432040527e-05, | |
| "loss": 0.388, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.3360739979445015, | |
| "grad_norm": 0.33342159219384704, | |
| "learning_rate": 3.6691012650497605e-05, | |
| "loss": 0.3949, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.3401849948612539, | |
| "grad_norm": 0.33685885775370095, | |
| "learning_rate": 3.665927308772057e-05, | |
| "loss": 0.3801, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3442959917780062, | |
| "grad_norm": 0.3507256755448898, | |
| "learning_rate": 3.6627395894290685e-05, | |
| "loss": 0.4011, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.3484069886947585, | |
| "grad_norm": 0.3729058607264493, | |
| "learning_rate": 3.659538133356153e-05, | |
| "loss": 0.3841, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.3525179856115108, | |
| "grad_norm": 0.35654106083716275, | |
| "learning_rate": 3.656322967002151e-05, | |
| "loss": 0.3798, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.3566289825282631, | |
| "grad_norm": 0.34899761497768883, | |
| "learning_rate": 3.6530941169291744e-05, | |
| "loss": 0.3769, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3607399794450155, | |
| "grad_norm": 0.3500913238620904, | |
| "learning_rate": 3.649851609812379e-05, | |
| "loss": 0.4005, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.3648509763617678, | |
| "grad_norm": 0.4370742910901644, | |
| "learning_rate": 3.646595472439753e-05, | |
| "loss": 0.3812, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.36896197327852, | |
| "grad_norm": 0.4531455394409143, | |
| "learning_rate": 3.643325731711888e-05, | |
| "loss": 0.3949, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.3730729701952724, | |
| "grad_norm": 0.3382908051688983, | |
| "learning_rate": 3.6400424146417604e-05, | |
| "loss": 0.3951, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.3771839671120247, | |
| "grad_norm": 0.48124284386355537, | |
| "learning_rate": 3.6367455483545066e-05, | |
| "loss": 0.3886, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.381294964028777, | |
| "grad_norm": 0.4073484304811201, | |
| "learning_rate": 3.633435160087202e-05, | |
| "loss": 0.3833, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.3854059609455294, | |
| "grad_norm": 0.37602122616857575, | |
| "learning_rate": 3.6301112771886315e-05, | |
| "loss": 0.3947, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.3895169578622815, | |
| "grad_norm": 0.35827821253734476, | |
| "learning_rate": 3.62677392711907e-05, | |
| "loss": 0.39, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.393627954779034, | |
| "grad_norm": 0.40220244427058716, | |
| "learning_rate": 3.623423137450046e-05, | |
| "loss": 0.3912, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.397738951695786, | |
| "grad_norm": 0.3520064901329717, | |
| "learning_rate": 3.620058935864123e-05, | |
| "loss": 0.3902, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4018499486125386, | |
| "grad_norm": 0.3470206706484027, | |
| "learning_rate": 3.616681350154666e-05, | |
| "loss": 0.3817, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.4059609455292907, | |
| "grad_norm": 0.3684394787845421, | |
| "learning_rate": 3.613290408225615e-05, | |
| "loss": 0.3827, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.4100719424460433, | |
| "grad_norm": 0.34749273169540446, | |
| "learning_rate": 3.609886138091247e-05, | |
| "loss": 0.3874, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.4141829393627954, | |
| "grad_norm": 0.36675012503855753, | |
| "learning_rate": 3.606468567875957e-05, | |
| "loss": 0.3863, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.418293936279548, | |
| "grad_norm": 0.3087501681001265, | |
| "learning_rate": 3.603037725814014e-05, | |
| "loss": 0.3878, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.4224049331963, | |
| "grad_norm": 0.44389782737034467, | |
| "learning_rate": 3.599593640249334e-05, | |
| "loss": 0.3775, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.4265159301130523, | |
| "grad_norm": 0.40184525645257135, | |
| "learning_rate": 3.5961363396352435e-05, | |
| "loss": 0.3878, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.4306269270298047, | |
| "grad_norm": 0.3739195862038827, | |
| "learning_rate": 3.592665852534246e-05, | |
| "loss": 0.3882, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.434737923946557, | |
| "grad_norm": 0.4667442608373335, | |
| "learning_rate": 3.589182207617785e-05, | |
| "loss": 0.3848, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.4388489208633093, | |
| "grad_norm": 0.2954516082957151, | |
| "learning_rate": 3.5856854336660075e-05, | |
| "loss": 0.3737, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.4429599177800616, | |
| "grad_norm": 0.3807234664590424, | |
| "learning_rate": 3.582175559567524e-05, | |
| "loss": 0.394, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.447070914696814, | |
| "grad_norm": 0.37991638726226773, | |
| "learning_rate": 3.578652614319177e-05, | |
| "loss": 0.3924, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4511819116135662, | |
| "grad_norm": 0.4605647523732803, | |
| "learning_rate": 3.575116627025791e-05, | |
| "loss": 0.3895, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.4552929085303186, | |
| "grad_norm": 0.34460237531655397, | |
| "learning_rate": 3.571567626899939e-05, | |
| "loss": 0.3979, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.4594039054470709, | |
| "grad_norm": 0.37502366115502783, | |
| "learning_rate": 3.568005643261701e-05, | |
| "loss": 0.3865, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.4635149023638232, | |
| "grad_norm": 0.361240868061172, | |
| "learning_rate": 3.5644307055384204e-05, | |
| "loss": 0.3927, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.4676258992805755, | |
| "grad_norm": 0.36549527451613106, | |
| "learning_rate": 3.5608428432644574e-05, | |
| "loss": 0.3906, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.4717368961973278, | |
| "grad_norm": 0.3841131971215696, | |
| "learning_rate": 3.557242086080953e-05, | |
| "loss": 0.3882, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.4758478931140802, | |
| "grad_norm": 0.3053174499069298, | |
| "learning_rate": 3.5536284637355766e-05, | |
| "loss": 0.3882, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.4799588900308325, | |
| "grad_norm": 0.42930198135043723, | |
| "learning_rate": 3.5500020060822844e-05, | |
| "loss": 0.39, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4840698869475848, | |
| "grad_norm": 0.3646026910744666, | |
| "learning_rate": 3.54636274308107e-05, | |
| "loss": 0.3919, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.4881808838643371, | |
| "grad_norm": 0.4584181730800767, | |
| "learning_rate": 3.542710704797721e-05, | |
| "loss": 0.3841, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.4922918807810894, | |
| "grad_norm": 0.3912766247821292, | |
| "learning_rate": 3.539045921403566e-05, | |
| "loss": 0.375, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.4964028776978417, | |
| "grad_norm": 0.39561662555483357, | |
| "learning_rate": 3.5353684231752276e-05, | |
| "loss": 0.3884, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.500513874614594, | |
| "grad_norm": 0.33669597693884484, | |
| "learning_rate": 3.531678240494373e-05, | |
| "loss": 0.3953, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.5046248715313464, | |
| "grad_norm": 0.4156836645972758, | |
| "learning_rate": 3.5279754038474616e-05, | |
| "loss": 0.3864, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.5087358684480987, | |
| "grad_norm": 0.3888603103920021, | |
| "learning_rate": 3.524259943825493e-05, | |
| "loss": 0.3864, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.512846865364851, | |
| "grad_norm": 0.34153109888601435, | |
| "learning_rate": 3.5205318911237566e-05, | |
| "loss": 0.3829, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.5169578622816033, | |
| "grad_norm": 0.4203599723923179, | |
| "learning_rate": 3.516791276541574e-05, | |
| "loss": 0.391, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.5210688591983557, | |
| "grad_norm": 0.39707036421576897, | |
| "learning_rate": 3.5130381309820474e-05, | |
| "loss": 0.3852, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.5251798561151078, | |
| "grad_norm": 0.35484540902249145, | |
| "learning_rate": 3.509272485451806e-05, | |
| "loss": 0.3813, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.5292908530318603, | |
| "grad_norm": 0.35726960151965814, | |
| "learning_rate": 3.5054943710607435e-05, | |
| "loss": 0.3943, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.5334018499486124, | |
| "grad_norm": 0.34918237917940137, | |
| "learning_rate": 3.50170381902177e-05, | |
| "loss": 0.3813, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.537512846865365, | |
| "grad_norm": 0.3225637816337971, | |
| "learning_rate": 3.497900860650545e-05, | |
| "loss": 0.3818, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.541623843782117, | |
| "grad_norm": 0.3243987867777615, | |
| "learning_rate": 3.494085527365224e-05, | |
| "loss": 0.3759, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.5457348406988696, | |
| "grad_norm": 0.3158935559652955, | |
| "learning_rate": 3.4902578506861995e-05, | |
| "loss": 0.3893, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.5498458376156217, | |
| "grad_norm": 0.3815644429337655, | |
| "learning_rate": 3.486417862235839e-05, | |
| "loss": 0.3905, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.5539568345323742, | |
| "grad_norm": 0.3118180182058997, | |
| "learning_rate": 3.4825655937382216e-05, | |
| "loss": 0.3865, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.5580678314491263, | |
| "grad_norm": 0.3841430312682266, | |
| "learning_rate": 3.4787010770188795e-05, | |
| "loss": 0.3932, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.5621788283658788, | |
| "grad_norm": 0.39242889501386036, | |
| "learning_rate": 3.474824344004534e-05, | |
| "loss": 0.3906, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.566289825282631, | |
| "grad_norm": 0.3632635332732287, | |
| "learning_rate": 3.4709354267228294e-05, | |
| "loss": 0.3783, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.5704008221993835, | |
| "grad_norm": 0.4216314417617418, | |
| "learning_rate": 3.467034357302073e-05, | |
| "loss": 0.3816, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.5745118191161356, | |
| "grad_norm": 0.410057885099804, | |
| "learning_rate": 3.463121167970966e-05, | |
| "loss": 0.3843, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.5786228160328881, | |
| "grad_norm": 0.30821430114214227, | |
| "learning_rate": 3.4591958910583365e-05, | |
| "loss": 0.3871, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.5827338129496402, | |
| "grad_norm": 0.3935547600639123, | |
| "learning_rate": 3.455258558992877e-05, | |
| "loss": 0.379, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.5868448098663928, | |
| "grad_norm": 0.3250409563547552, | |
| "learning_rate": 3.451309204302873e-05, | |
| "loss": 0.3801, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.5909558067831449, | |
| "grad_norm": 0.356548846946166, | |
| "learning_rate": 3.447347859615933e-05, | |
| "loss": 0.379, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.5950668036998972, | |
| "grad_norm": 0.361163416612143, | |
| "learning_rate": 3.443374557658723e-05, | |
| "loss": 0.3745, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5991778006166495, | |
| "grad_norm": 0.3446242948127641, | |
| "learning_rate": 3.439389331256694e-05, | |
| "loss": 0.3807, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.6032887975334018, | |
| "grad_norm": 0.33333480141647187, | |
| "learning_rate": 3.435392213333809e-05, | |
| "loss": 0.3832, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6073997944501541, | |
| "grad_norm": 0.38066181381339836, | |
| "learning_rate": 3.431383236912275e-05, | |
| "loss": 0.3692, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.6115107913669064, | |
| "grad_norm": 0.3255044440323713, | |
| "learning_rate": 3.427362435112268e-05, | |
| "loss": 0.3728, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.6156217882836588, | |
| "grad_norm": 0.41417790723734144, | |
| "learning_rate": 3.423329841151656e-05, | |
| "loss": 0.3868, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.619732785200411, | |
| "grad_norm": 0.32170041659499554, | |
| "learning_rate": 3.4192854883457326e-05, | |
| "loss": 0.3724, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.6238437821171634, | |
| "grad_norm": 0.3522389078445349, | |
| "learning_rate": 3.4152294101069345e-05, | |
| "loss": 0.3755, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.6279547790339157, | |
| "grad_norm": 0.3154196575435205, | |
| "learning_rate": 3.411161639944568e-05, | |
| "loss": 0.3866, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.632065775950668, | |
| "grad_norm": 0.3883625817054837, | |
| "learning_rate": 3.407082211464534e-05, | |
| "loss": 0.3842, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.6361767728674204, | |
| "grad_norm": 0.32478029230772587, | |
| "learning_rate": 3.402991158369047e-05, | |
| "loss": 0.3856, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.6402877697841727, | |
| "grad_norm": 0.33777536538509645, | |
| "learning_rate": 3.39888851445636e-05, | |
| "loss": 0.3738, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.644398766700925, | |
| "grad_norm": 0.3645535574440166, | |
| "learning_rate": 3.394774313620481e-05, | |
| "loss": 0.3768, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6485097636176773, | |
| "grad_norm": 0.33553965225554366, | |
| "learning_rate": 3.390648589850897e-05, | |
| "loss": 0.3854, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.6526207605344296, | |
| "grad_norm": 0.35131544263569836, | |
| "learning_rate": 3.386511377232293e-05, | |
| "loss": 0.383, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.656731757451182, | |
| "grad_norm": 0.3083698384899604, | |
| "learning_rate": 3.382362709944268e-05, | |
| "loss": 0.3913, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.6608427543679343, | |
| "grad_norm": 0.3444920510980315, | |
| "learning_rate": 3.3782026222610525e-05, | |
| "loss": 0.3912, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.6649537512846866, | |
| "grad_norm": 0.3109066824781155, | |
| "learning_rate": 3.374031148551229e-05, | |
| "loss": 0.3785, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.6690647482014387, | |
| "grad_norm": 0.393332877111885, | |
| "learning_rate": 3.3698483232774435e-05, | |
| "loss": 0.3811, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.6731757451181912, | |
| "grad_norm": 0.35010985881480106, | |
| "learning_rate": 3.365654180996126e-05, | |
| "loss": 0.3765, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.6772867420349433, | |
| "grad_norm": 0.3994860261819717, | |
| "learning_rate": 3.361448756357199e-05, | |
| "loss": 0.3855, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.6813977389516959, | |
| "grad_norm": 0.4026873313554007, | |
| "learning_rate": 3.3572320841037945e-05, | |
| "loss": 0.3776, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.685508735868448, | |
| "grad_norm": 0.3888166731552757, | |
| "learning_rate": 3.353004199071969e-05, | |
| "loss": 0.389, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.6896197327852005, | |
| "grad_norm": 0.4229642214250034, | |
| "learning_rate": 3.348765136190412e-05, | |
| "loss": 0.3844, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.6937307297019526, | |
| "grad_norm": 0.3719493753316055, | |
| "learning_rate": 3.344514930480158e-05, | |
| "loss": 0.3718, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.6978417266187051, | |
| "grad_norm": 0.3750792470447336, | |
| "learning_rate": 3.3402536170542985e-05, | |
| "loss": 0.4017, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.7019527235354572, | |
| "grad_norm": 0.37953623181883855, | |
| "learning_rate": 3.335981231117694e-05, | |
| "loss": 0.3786, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.7060637204522098, | |
| "grad_norm": 0.42228613250314784, | |
| "learning_rate": 3.331697807966676e-05, | |
| "loss": 0.3902, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.7101747173689619, | |
| "grad_norm": 0.33605301616513616, | |
| "learning_rate": 3.327403382988764e-05, | |
| "loss": 0.382, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.915407319860973, | |
| "learning_rate": 3.3230979916623667e-05, | |
| "loss": 0.3868, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.7183967112024665, | |
| "grad_norm": 0.4415883382317921, | |
| "learning_rate": 3.318781669556493e-05, | |
| "loss": 0.4025, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.722507708119219, | |
| "grad_norm": 0.29764556226533273, | |
| "learning_rate": 3.3144544523304545e-05, | |
| "loss": 0.3868, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.7266187050359711, | |
| "grad_norm": 0.3864981881512229, | |
| "learning_rate": 3.310116375733575e-05, | |
| "loss": 0.3848, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.7307297019527237, | |
| "grad_norm": 0.4237534589835872, | |
| "learning_rate": 3.3057674756048906e-05, | |
| "loss": 0.3884, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.7348406988694758, | |
| "grad_norm": 0.30622506767945284, | |
| "learning_rate": 3.30140778787286e-05, | |
| "loss": 0.3962, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.7389516957862283, | |
| "grad_norm": 0.3715870543554042, | |
| "learning_rate": 3.297037348555059e-05, | |
| "loss": 0.3804, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.7430626927029804, | |
| "grad_norm": 0.3158873451974222, | |
| "learning_rate": 3.292656193757891e-05, | |
| "loss": 0.3808, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.7471736896197327, | |
| "grad_norm": 3.5993500626700534, | |
| "learning_rate": 3.2882643596762847e-05, | |
| "loss": 0.3766, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.751284686536485, | |
| "grad_norm": 0.41799220024756045, | |
| "learning_rate": 3.283861882593394e-05, | |
| "loss": 0.3629, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.7553956834532374, | |
| "grad_norm": 0.4028165918419239, | |
| "learning_rate": 3.2794487988803024e-05, | |
| "loss": 0.3946, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.7595066803699897, | |
| "grad_norm": 0.45312099756724705, | |
| "learning_rate": 3.275025144995719e-05, | |
| "loss": 0.3826, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.763617677286742, | |
| "grad_norm": 0.3682320829470106, | |
| "learning_rate": 3.270590957485678e-05, | |
| "loss": 0.3822, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.7677286742034943, | |
| "grad_norm": 0.432471521500914, | |
| "learning_rate": 3.266146272983238e-05, | |
| "loss": 0.379, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7718396711202467, | |
| "grad_norm": 0.49258814180632715, | |
| "learning_rate": 3.261691128208178e-05, | |
| "loss": 0.3781, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.775950668036999, | |
| "grad_norm": 0.42039354140050533, | |
| "learning_rate": 3.2572255599666946e-05, | |
| "loss": 0.3858, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.7800616649537513, | |
| "grad_norm": 0.4266657139962505, | |
| "learning_rate": 3.252749605151099e-05, | |
| "loss": 0.3889, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.7841726618705036, | |
| "grad_norm": 0.4111540760053901, | |
| "learning_rate": 3.24826330073951e-05, | |
| "loss": 0.3828, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.788283658787256, | |
| "grad_norm": 0.40054562650751135, | |
| "learning_rate": 3.2437666837955495e-05, | |
| "loss": 0.3821, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.7923946557040082, | |
| "grad_norm": 0.4049883565747011, | |
| "learning_rate": 3.239259791468037e-05, | |
| "loss": 0.3782, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.7965056526207606, | |
| "grad_norm": 0.3211989179680821, | |
| "learning_rate": 3.234742660990681e-05, | |
| "loss": 0.3886, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.8006166495375129, | |
| "grad_norm": 0.3415159428416263, | |
| "learning_rate": 3.230215329681775e-05, | |
| "loss": 0.3865, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.8047276464542652, | |
| "grad_norm": 0.30795596054473745, | |
| "learning_rate": 3.225677834943884e-05, | |
| "loss": 0.3798, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.8088386433710175, | |
| "grad_norm": 0.3527630027822489, | |
| "learning_rate": 3.22113021426354e-05, | |
| "loss": 0.371, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.8129496402877698, | |
| "grad_norm": 0.38597884530280835, | |
| "learning_rate": 3.216572505210929e-05, | |
| "loss": 0.386, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.8170606372045222, | |
| "grad_norm": 0.35477892953521534, | |
| "learning_rate": 3.2120047454395845e-05, | |
| "loss": 0.3837, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.8211716341212743, | |
| "grad_norm": 0.34702546052353167, | |
| "learning_rate": 3.207426972686071e-05, | |
| "loss": 0.3892, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.8252826310380268, | |
| "grad_norm": 0.30619045437996395, | |
| "learning_rate": 3.202839224769678e-05, | |
| "loss": 0.3911, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.829393627954779, | |
| "grad_norm": 0.308117763052393, | |
| "learning_rate": 3.198241539592103e-05, | |
| "loss": 0.388, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.8335046248715314, | |
| "grad_norm": 0.3813900684937835, | |
| "learning_rate": 3.1936339551371416e-05, | |
| "loss": 0.3733, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.8376156217882835, | |
| "grad_norm": 0.37451609765152405, | |
| "learning_rate": 3.1890165094703704e-05, | |
| "loss": 0.382, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.841726618705036, | |
| "grad_norm": 0.3343641229801653, | |
| "learning_rate": 3.184389240738838e-05, | |
| "loss": 0.3843, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.8458376156217882, | |
| "grad_norm": 0.3671589935937082, | |
| "learning_rate": 3.179752187170741e-05, | |
| "loss": 0.3914, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.8499486125385407, | |
| "grad_norm": 0.3997665963907156, | |
| "learning_rate": 3.1751053870751184e-05, | |
| "loss": 0.3843, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8540596094552928, | |
| "grad_norm": 0.3253860699538578, | |
| "learning_rate": 3.1704488788415274e-05, | |
| "loss": 0.3855, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.8581706063720453, | |
| "grad_norm": 0.3876573196918091, | |
| "learning_rate": 3.16578270093973e-05, | |
| "loss": 0.386, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.8622816032887974, | |
| "grad_norm": 0.38773352168091224, | |
| "learning_rate": 3.1611068919193756e-05, | |
| "loss": 0.3783, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.86639260020555, | |
| "grad_norm": 0.31178745493034576, | |
| "learning_rate": 3.1564214904096774e-05, | |
| "loss": 0.385, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.870503597122302, | |
| "grad_norm": 0.5723565190756046, | |
| "learning_rate": 3.1517265351191e-05, | |
| "loss": 0.3841, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.8746145940390546, | |
| "grad_norm": 0.46777702023370726, | |
| "learning_rate": 3.147022064835036e-05, | |
| "loss": 0.385, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.8787255909558067, | |
| "grad_norm": 0.48665108942706403, | |
| "learning_rate": 3.142308118423485e-05, | |
| "loss": 0.3808, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.8828365878725593, | |
| "grad_norm": 0.5712910353884142, | |
| "learning_rate": 3.1375847348287365e-05, | |
| "loss": 0.3898, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.8869475847893113, | |
| "grad_norm": 0.4140740468743388, | |
| "learning_rate": 3.132851953073041e-05, | |
| "loss": 0.3823, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.8910585817060637, | |
| "grad_norm": 0.4981734090282241, | |
| "learning_rate": 3.128109812256296e-05, | |
| "loss": 0.379, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.895169578622816, | |
| "grad_norm": 0.3901725689405749, | |
| "learning_rate": 3.1233583515557166e-05, | |
| "loss": 0.3802, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.8992805755395683, | |
| "grad_norm": 0.39288710655716796, | |
| "learning_rate": 3.118597610225514e-05, | |
| "loss": 0.3648, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.9033915724563206, | |
| "grad_norm": 0.3943553998150945, | |
| "learning_rate": 3.113827627596575e-05, | |
| "loss": 0.3845, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.907502569373073, | |
| "grad_norm": 0.3815649604071033, | |
| "learning_rate": 3.1090484430761275e-05, | |
| "loss": 0.3968, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.9116135662898253, | |
| "grad_norm": 0.4341716993023021, | |
| "learning_rate": 3.104260096147426e-05, | |
| "loss": 0.3825, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.9157245632065776, | |
| "grad_norm": 0.2885815389134767, | |
| "learning_rate": 3.099462626369418e-05, | |
| "loss": 0.379, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.91983556012333, | |
| "grad_norm": 0.3706179172517124, | |
| "learning_rate": 3.094656073376419e-05, | |
| "loss": 0.3882, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.9239465570400822, | |
| "grad_norm": 0.3443004696246589, | |
| "learning_rate": 3.0898404768777863e-05, | |
| "loss": 0.3855, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.9280575539568345, | |
| "grad_norm": 0.29420490623628953, | |
| "learning_rate": 3.0850158766575907e-05, | |
| "loss": 0.3843, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.9321685508735869, | |
| "grad_norm": 0.3876924984247156, | |
| "learning_rate": 3.080182312574286e-05, | |
| "loss": 0.3746, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.9362795477903392, | |
| "grad_norm": 0.28747642038559285, | |
| "learning_rate": 3.075339824560382e-05, | |
| "loss": 0.3718, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.9403905447070915, | |
| "grad_norm": 0.32380146376848085, | |
| "learning_rate": 3.070488452622113e-05, | |
| "loss": 0.3934, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.9445015416238438, | |
| "grad_norm": 0.32465438985148803, | |
| "learning_rate": 3.0656282368391086e-05, | |
| "loss": 0.3729, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.9486125385405961, | |
| "grad_norm": 0.35563518327266175, | |
| "learning_rate": 3.0607592173640615e-05, | |
| "loss": 0.3795, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.9527235354573484, | |
| "grad_norm": 0.331866902145928, | |
| "learning_rate": 3.055881434422395e-05, | |
| "loss": 0.3981, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.9568345323741008, | |
| "grad_norm": 0.3342210067868538, | |
| "learning_rate": 3.0509949283119348e-05, | |
| "loss": 0.3717, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.960945529290853, | |
| "grad_norm": 0.33585492528175326, | |
| "learning_rate": 3.0460997394025694e-05, | |
| "loss": 0.3993, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.9650565262076052, | |
| "grad_norm": 0.31011270781830746, | |
| "learning_rate": 3.0411959081359223e-05, | |
| "loss": 0.3865, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.9691675231243577, | |
| "grad_norm": 0.35530598698818877, | |
| "learning_rate": 3.036283475025016e-05, | |
| "loss": 0.3784, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.9732785200411098, | |
| "grad_norm": 0.3734052740131826, | |
| "learning_rate": 3.031362480653937e-05, | |
| "loss": 0.3762, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.9773895169578624, | |
| "grad_norm": 0.3014940676108034, | |
| "learning_rate": 3.0264329656775e-05, | |
| "loss": 0.3757, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.9815005138746145, | |
| "grad_norm": 0.3512117145148321, | |
| "learning_rate": 3.021494970820912e-05, | |
| "loss": 0.3827, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.985611510791367, | |
| "grad_norm": 0.37355802443996994, | |
| "learning_rate": 3.01654853687944e-05, | |
| "loss": 0.3642, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.989722507708119, | |
| "grad_norm": 0.2861615252457176, | |
| "learning_rate": 3.011593704718067e-05, | |
| "loss": 0.3963, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.9938335046248716, | |
| "grad_norm": 0.3745753953644458, | |
| "learning_rate": 3.0066305152711598e-05, | |
| "loss": 0.3878, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.9979445015416237, | |
| "grad_norm": 0.26150625589651816, | |
| "learning_rate": 3.0016590095421273e-05, | |
| "loss": 0.3721, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.0020554984583763, | |
| "grad_norm": 0.3918210766291012, | |
| "learning_rate": 2.9966792286030853e-05, | |
| "loss": 0.3396, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.0061664953751284, | |
| "grad_norm": 0.315886174265335, | |
| "learning_rate": 2.9916912135945147e-05, | |
| "loss": 0.3326, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.010277492291881, | |
| "grad_norm": 0.5139005301093035, | |
| "learning_rate": 2.986695005724921e-05, | |
| "loss": 0.3331, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.014388489208633, | |
| "grad_norm": 0.4039956111942429, | |
| "learning_rate": 2.9816906462704963e-05, | |
| "loss": 0.3318, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0184994861253855, | |
| "grad_norm": 0.3643964107370674, | |
| "learning_rate": 2.9766781765747775e-05, | |
| "loss": 0.331, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.0226104830421376, | |
| "grad_norm": 0.36816703584916016, | |
| "learning_rate": 2.971657638048302e-05, | |
| "loss": 0.3318, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.02672147995889, | |
| "grad_norm": 0.42271803167213406, | |
| "learning_rate": 2.966629072168271e-05, | |
| "loss": 0.3344, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.0308324768756423, | |
| "grad_norm": 0.3930653905455099, | |
| "learning_rate": 2.9615925204782006e-05, | |
| "loss": 0.3177, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.034943473792395, | |
| "grad_norm": 0.40048080993718765, | |
| "learning_rate": 2.9565480245875845e-05, | |
| "loss": 0.3358, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.039054470709147, | |
| "grad_norm": 0.3484759664627585, | |
| "learning_rate": 2.9514956261715458e-05, | |
| "loss": 0.3303, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.0431654676258995, | |
| "grad_norm": 1.2952724176128951, | |
| "learning_rate": 2.9464353669704943e-05, | |
| "loss": 0.3683, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.0472764645426516, | |
| "grad_norm": 0.3554075988337682, | |
| "learning_rate": 2.9413672887897828e-05, | |
| "loss": 0.34, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.051387461459404, | |
| "grad_norm": 0.5002880458311052, | |
| "learning_rate": 2.936291433499359e-05, | |
| "loss": 0.3304, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.055498458376156, | |
| "grad_norm": 0.37407564917246083, | |
| "learning_rate": 2.9312078430334228e-05, | |
| "loss": 0.3358, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0596094552929087, | |
| "grad_norm": 0.4077464439245042, | |
| "learning_rate": 2.926116559390078e-05, | |
| "loss": 0.3232, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.063720452209661, | |
| "grad_norm": 0.3809714773736691, | |
| "learning_rate": 2.921017624630984e-05, | |
| "loss": 0.323, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.0678314491264134, | |
| "grad_norm": 0.311417329278231, | |
| "learning_rate": 2.9159110808810125e-05, | |
| "loss": 0.3192, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.0719424460431655, | |
| "grad_norm": 0.44030503896413653, | |
| "learning_rate": 2.9107969703278952e-05, | |
| "loss": 0.3354, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.0760534429599176, | |
| "grad_norm": 0.3036316440870094, | |
| "learning_rate": 2.905675335221877e-05, | |
| "loss": 0.3299, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.08016443987667, | |
| "grad_norm": 0.4247647928977746, | |
| "learning_rate": 2.900546217875368e-05, | |
| "loss": 0.3288, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.084275436793422, | |
| "grad_norm": 0.3390034407042314, | |
| "learning_rate": 2.895409660662592e-05, | |
| "loss": 0.3328, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.0883864337101747, | |
| "grad_norm": 0.4990365827984489, | |
| "learning_rate": 2.8902657060192366e-05, | |
| "loss": 0.3376, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.092497430626927, | |
| "grad_norm": 0.5173705351976455, | |
| "learning_rate": 2.8851143964421048e-05, | |
| "loss": 0.3356, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.0966084275436794, | |
| "grad_norm": 0.5151102205707064, | |
| "learning_rate": 2.879955774488762e-05, | |
| "loss": 0.332, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.1007194244604315, | |
| "grad_norm": 0.43417520836094964, | |
| "learning_rate": 2.8747898827771846e-05, | |
| "loss": 0.3389, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.104830421377184, | |
| "grad_norm": 0.5355654606933186, | |
| "learning_rate": 2.8696167639854073e-05, | |
| "loss": 0.341, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.108941418293936, | |
| "grad_norm": 0.4367393823993611, | |
| "learning_rate": 2.864436460851173e-05, | |
| "loss": 0.3299, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.1130524152106887, | |
| "grad_norm": 0.45783141095235763, | |
| "learning_rate": 2.8592490161715768e-05, | |
| "loss": 0.3191, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.1171634121274407, | |
| "grad_norm": 0.42545114058633565, | |
| "learning_rate": 2.8540544728027145e-05, | |
| "loss": 0.3145, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.1212744090441933, | |
| "grad_norm": 0.3661488589187853, | |
| "learning_rate": 2.8488528736593278e-05, | |
| "loss": 0.3275, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.1253854059609454, | |
| "grad_norm": 0.5060616601470208, | |
| "learning_rate": 2.843644261714448e-05, | |
| "loss": 0.3384, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.129496402877698, | |
| "grad_norm": 0.3580510131662911, | |
| "learning_rate": 2.8384286799990452e-05, | |
| "loss": 0.3296, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.13360739979445, | |
| "grad_norm": 0.45075270681673163, | |
| "learning_rate": 2.8332061716016692e-05, | |
| "loss": 0.32, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.1377183967112026, | |
| "grad_norm": 0.4708082264494772, | |
| "learning_rate": 2.8279767796680934e-05, | |
| "loss": 0.3332, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.1418293936279547, | |
| "grad_norm": 0.35417572710043976, | |
| "learning_rate": 2.8227405474009616e-05, | |
| "loss": 0.325, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.145940390544707, | |
| "grad_norm": 0.5178072041280041, | |
| "learning_rate": 2.817497518059428e-05, | |
| "loss": 0.3286, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.1500513874614593, | |
| "grad_norm": 0.333153745006992, | |
| "learning_rate": 2.8122477349588005e-05, | |
| "loss": 0.3247, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.154162384378212, | |
| "grad_norm": 0.5499040672396817, | |
| "learning_rate": 2.8069912414701842e-05, | |
| "loss": 0.3338, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.158273381294964, | |
| "grad_norm": 0.29956362280088755, | |
| "learning_rate": 2.8017280810201213e-05, | |
| "loss": 0.3307, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.1623843782117165, | |
| "grad_norm": 0.4276269537060341, | |
| "learning_rate": 2.7964582970902338e-05, | |
| "loss": 0.3263, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.1664953751284686, | |
| "grad_norm": 0.3311312720633184, | |
| "learning_rate": 2.7911819332168627e-05, | |
| "loss": 0.3302, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.170606372045221, | |
| "grad_norm": 0.32361165480350135, | |
| "learning_rate": 2.78589903299071e-05, | |
| "loss": 0.3307, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.174717368961973, | |
| "grad_norm": 0.33128441736832326, | |
| "learning_rate": 2.7806096400564775e-05, | |
| "loss": 0.3234, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.1788283658787257, | |
| "grad_norm": 0.2945513597575282, | |
| "learning_rate": 2.7753137981125068e-05, | |
| "loss": 0.3354, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.182939362795478, | |
| "grad_norm": 0.35769877925150756, | |
| "learning_rate": 2.7700115509104176e-05, | |
| "loss": 0.336, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.1870503597122304, | |
| "grad_norm": 0.3065613778661335, | |
| "learning_rate": 2.7647029422547465e-05, | |
| "loss": 0.3326, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.1911613566289825, | |
| "grad_norm": 0.31377341167653106, | |
| "learning_rate": 2.7593880160025864e-05, | |
| "loss": 0.3354, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.195272353545735, | |
| "grad_norm": 0.28252520009349796, | |
| "learning_rate": 2.754066816063222e-05, | |
| "loss": 0.3194, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.199383350462487, | |
| "grad_norm": 0.2792714767775337, | |
| "learning_rate": 2.7487393863977687e-05, | |
| "loss": 0.3369, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.2034943473792397, | |
| "grad_norm": 0.28232252447629436, | |
| "learning_rate": 2.7434057710188077e-05, | |
| "loss": 0.3157, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.2076053442959918, | |
| "grad_norm": 0.25752143372328223, | |
| "learning_rate": 2.738066013990025e-05, | |
| "loss": 0.3153, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.2117163412127443, | |
| "grad_norm": 0.297593757050134, | |
| "learning_rate": 2.732720159425845e-05, | |
| "loss": 0.3296, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.2158273381294964, | |
| "grad_norm": 0.2758026361391992, | |
| "learning_rate": 2.7273682514910668e-05, | |
| "loss": 0.3247, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.2199383350462485, | |
| "grad_norm": 0.3422530970797541, | |
| "learning_rate": 2.7220103344004995e-05, | |
| "loss": 0.3293, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.224049331963001, | |
| "grad_norm": 0.3668216989996492, | |
| "learning_rate": 2.7166464524185977e-05, | |
| "loss": 0.3419, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.2281603288797536, | |
| "grad_norm": 0.3353880633821636, | |
| "learning_rate": 2.7112766498590944e-05, | |
| "loss": 0.3277, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.2322713257965057, | |
| "grad_norm": 0.3990592979092236, | |
| "learning_rate": 2.705900971084635e-05, | |
| "loss": 0.3352, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.2363823227132578, | |
| "grad_norm": 0.34649020190108354, | |
| "learning_rate": 2.7005194605064122e-05, | |
| "loss": 0.3334, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.2404933196300103, | |
| "grad_norm": 0.30730771299144677, | |
| "learning_rate": 2.6951321625837975e-05, | |
| "loss": 0.3299, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.2446043165467624, | |
| "grad_norm": 0.3875753398426506, | |
| "learning_rate": 2.6897391218239746e-05, | |
| "loss": 0.3338, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.248715313463515, | |
| "grad_norm": 0.27365792996452604, | |
| "learning_rate": 2.6843403827815714e-05, | |
| "loss": 0.3353, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.252826310380267, | |
| "grad_norm": 0.3913041787492654, | |
| "learning_rate": 2.6789359900582935e-05, | |
| "loss": 0.3274, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.2569373072970196, | |
| "grad_norm": 0.25296864218831433, | |
| "learning_rate": 2.673525988302553e-05, | |
| "loss": 0.344, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.2610483042137717, | |
| "grad_norm": 0.3931054005221806, | |
| "learning_rate": 2.6681104222091018e-05, | |
| "loss": 0.3387, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.265159301130524, | |
| "grad_norm": 0.25058187158942646, | |
| "learning_rate": 2.662689336518661e-05, | |
| "loss": 0.3306, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.2692702980472763, | |
| "grad_norm": 0.34466530037047466, | |
| "learning_rate": 2.6572627760175523e-05, | |
| "loss": 0.334, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.273381294964029, | |
| "grad_norm": 0.27034275974079125, | |
| "learning_rate": 2.6518307855373276e-05, | |
| "loss": 0.3245, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.277492291880781, | |
| "grad_norm": 0.32305382508070213, | |
| "learning_rate": 2.6463934099543992e-05, | |
| "loss": 0.3337, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.2816032887975335, | |
| "grad_norm": 0.2943172520547782, | |
| "learning_rate": 2.6409506941896665e-05, | |
| "loss": 0.336, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.3589575171976915, | |
| "learning_rate": 2.6355026832081493e-05, | |
| "loss": 0.331, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.289825282631038, | |
| "grad_norm": 0.32434981120796447, | |
| "learning_rate": 2.6300494220186113e-05, | |
| "loss": 0.3318, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.2939362795477902, | |
| "grad_norm": 0.3207602935494296, | |
| "learning_rate": 2.6245909556731937e-05, | |
| "loss": 0.3244, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.2980472764645428, | |
| "grad_norm": 0.2994113594865251, | |
| "learning_rate": 2.6191273292670372e-05, | |
| "loss": 0.3342, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.302158273381295, | |
| "grad_norm": 0.3188506407975691, | |
| "learning_rate": 2.6136585879379145e-05, | |
| "loss": 0.3394, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.3062692702980474, | |
| "grad_norm": 0.3098768532791999, | |
| "learning_rate": 2.608184776865854e-05, | |
| "loss": 0.3289, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.3103802672147995, | |
| "grad_norm": 0.30655390743691074, | |
| "learning_rate": 2.602705941272769e-05, | |
| "loss": 0.322, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.314491264131552, | |
| "grad_norm": 0.3063020702615511, | |
| "learning_rate": 2.597222126422081e-05, | |
| "loss": 0.3332, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.318602261048304, | |
| "grad_norm": 0.3156706802866886, | |
| "learning_rate": 2.5917333776183503e-05, | |
| "loss": 0.3228, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.3227132579650567, | |
| "grad_norm": 0.28723121088598647, | |
| "learning_rate": 2.586239740206897e-05, | |
| "loss": 0.3197, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.3268242548818088, | |
| "grad_norm": 0.30433476866497944, | |
| "learning_rate": 2.5807412595734283e-05, | |
| "loss": 0.3279, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.3309352517985613, | |
| "grad_norm": 0.25756394575456126, | |
| "learning_rate": 2.5752379811436655e-05, | |
| "loss": 0.324, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.3350462487153134, | |
| "grad_norm": 0.3098697810639567, | |
| "learning_rate": 2.5697299503829657e-05, | |
| "loss": 0.3347, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.339157245632066, | |
| "grad_norm": 0.30837147852538255, | |
| "learning_rate": 2.5642172127959475e-05, | |
| "loss": 0.3292, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.343268242548818, | |
| "grad_norm": 0.3069265524451522, | |
| "learning_rate": 2.558699813926115e-05, | |
| "loss": 0.3323, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.3473792394655706, | |
| "grad_norm": 0.32407359000068336, | |
| "learning_rate": 2.5531777993554813e-05, | |
| "loss": 0.3317, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.3514902363823227, | |
| "grad_norm": 0.3118278827236543, | |
| "learning_rate": 2.5476512147041926e-05, | |
| "loss": 0.3428, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.3556012332990752, | |
| "grad_norm": 0.3342461379651357, | |
| "learning_rate": 2.5421201056301507e-05, | |
| "loss": 0.3284, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.3597122302158273, | |
| "grad_norm": 0.29958642203118996, | |
| "learning_rate": 2.5365845178286358e-05, | |
| "loss": 0.3275, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.3638232271325794, | |
| "grad_norm": 0.3328808710382115, | |
| "learning_rate": 2.5310444970319292e-05, | |
| "loss": 0.3301, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.367934224049332, | |
| "grad_norm": 0.3782109254880134, | |
| "learning_rate": 2.525500089008936e-05, | |
| "loss": 0.3375, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.3720452209660845, | |
| "grad_norm": 0.3451832289715049, | |
| "learning_rate": 2.5199513395648047e-05, | |
| "loss": 0.3207, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.3761562178828366, | |
| "grad_norm": 0.34430102536898843, | |
| "learning_rate": 2.5143982945405527e-05, | |
| "loss": 0.3335, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.3802672147995887, | |
| "grad_norm": 0.3480033297872511, | |
| "learning_rate": 2.5088409998126827e-05, | |
| "loss": 0.3364, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.3843782117163412, | |
| "grad_norm": 0.2864218297613634, | |
| "learning_rate": 2.5032795012928093e-05, | |
| "loss": 0.3296, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.3884892086330938, | |
| "grad_norm": 0.27656367755763744, | |
| "learning_rate": 2.4977138449272746e-05, | |
| "loss": 0.3252, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.392600205549846, | |
| "grad_norm": 0.3210933436925842, | |
| "learning_rate": 2.4921440766967718e-05, | |
| "loss": 0.3292, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.396711202466598, | |
| "grad_norm": 0.2695136624951651, | |
| "learning_rate": 2.4865702426159633e-05, | |
| "loss": 0.3345, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.4008221993833505, | |
| "grad_norm": 0.2853367762196653, | |
| "learning_rate": 2.4809923887331028e-05, | |
| "loss": 0.3272, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.4049331963001026, | |
| "grad_norm": 0.26413255067697416, | |
| "learning_rate": 2.4754105611296534e-05, | |
| "loss": 0.3244, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.409044193216855, | |
| "grad_norm": 0.2788852049644498, | |
| "learning_rate": 2.4698248059199056e-05, | |
| "loss": 0.3211, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.4131551901336072, | |
| "grad_norm": 0.2839043038975584, | |
| "learning_rate": 2.4642351692505998e-05, | |
| "loss": 0.3227, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.41726618705036, | |
| "grad_norm": 0.26795507769344473, | |
| "learning_rate": 2.4586416973005414e-05, | |
| "loss": 0.3255, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.421377183967112, | |
| "grad_norm": 0.28061689093119546, | |
| "learning_rate": 2.453044436280223e-05, | |
| "loss": 0.3297, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.4254881808838644, | |
| "grad_norm": 0.2768666834694627, | |
| "learning_rate": 2.4474434324314388e-05, | |
| "loss": 0.3351, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.4295991778006165, | |
| "grad_norm": 0.2992581902997908, | |
| "learning_rate": 2.4418387320269047e-05, | |
| "loss": 0.3185, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.433710174717369, | |
| "grad_norm": 0.2582002464494716, | |
| "learning_rate": 2.4362303813698766e-05, | |
| "loss": 0.3262, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.437821171634121, | |
| "grad_norm": 0.288633230170238, | |
| "learning_rate": 2.4306184267937654e-05, | |
| "loss": 0.3317, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.4419321685508737, | |
| "grad_norm": 0.2642364711177551, | |
| "learning_rate": 2.425002914661758e-05, | |
| "loss": 0.3325, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.446043165467626, | |
| "grad_norm": 0.28079190224987655, | |
| "learning_rate": 2.419383891366431e-05, | |
| "loss": 0.3186, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.4501541623843783, | |
| "grad_norm": 0.2517139036005103, | |
| "learning_rate": 2.4137614033293676e-05, | |
| "loss": 0.3325, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.4542651593011304, | |
| "grad_norm": 0.3048176393508488, | |
| "learning_rate": 2.408135497000776e-05, | |
| "loss": 0.3258, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.458376156217883, | |
| "grad_norm": 0.2553179117187841, | |
| "learning_rate": 2.4025062188591046e-05, | |
| "loss": 0.3286, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.462487153134635, | |
| "grad_norm": 0.297522330860201, | |
| "learning_rate": 2.3968736154106574e-05, | |
| "loss": 0.3257, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.4665981500513876, | |
| "grad_norm": 0.26227969960383657, | |
| "learning_rate": 2.3912377331892112e-05, | |
| "loss": 0.3348, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.4707091469681397, | |
| "grad_norm": 0.2678339048494993, | |
| "learning_rate": 2.3855986187556295e-05, | |
| "loss": 0.3247, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.4748201438848922, | |
| "grad_norm": 0.2782462750099432, | |
| "learning_rate": 2.3799563186974802e-05, | |
| "loss": 0.3288, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.4789311408016443, | |
| "grad_norm": 0.26942196233507953, | |
| "learning_rate": 2.374310879628647e-05, | |
| "loss": 0.3343, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.483042137718397, | |
| "grad_norm": 0.25192857481609987, | |
| "learning_rate": 2.3686623481889496e-05, | |
| "loss": 0.3355, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.487153134635149, | |
| "grad_norm": 0.27024432726841424, | |
| "learning_rate": 2.3630107710437526e-05, | |
| "loss": 0.3296, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.4912641315519015, | |
| "grad_norm": 0.2491507090752715, | |
| "learning_rate": 2.3573561948835836e-05, | |
| "loss": 0.3421, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.4953751284686536, | |
| "grad_norm": 0.3126612318343971, | |
| "learning_rate": 2.3516986664237474e-05, | |
| "loss": 0.3254, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.499486125385406, | |
| "grad_norm": 0.2643267374371664, | |
| "learning_rate": 2.3460382324039377e-05, | |
| "loss": 0.3272, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.5035971223021583, | |
| "grad_norm": 0.2786020179741824, | |
| "learning_rate": 2.3403749395878542e-05, | |
| "loss": 0.3292, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.5077081192189103, | |
| "grad_norm": 0.2873861028514028, | |
| "learning_rate": 2.3347088347628128e-05, | |
| "loss": 0.3307, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.511819116135663, | |
| "grad_norm": 0.2772071301023664, | |
| "learning_rate": 2.3290399647393628e-05, | |
| "loss": 0.324, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.5159301130524154, | |
| "grad_norm": 0.27537902676408144, | |
| "learning_rate": 2.3233683763508957e-05, | |
| "loss": 0.3343, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.5200411099691675, | |
| "grad_norm": 0.2859207355422494, | |
| "learning_rate": 2.317694116453263e-05, | |
| "loss": 0.34, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.5241521068859196, | |
| "grad_norm": 0.2743835098944321, | |
| "learning_rate": 2.3120172319243864e-05, | |
| "loss": 0.3338, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.528263103802672, | |
| "grad_norm": 0.27595779721707764, | |
| "learning_rate": 2.3063377696638707e-05, | |
| "loss": 0.3311, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.5323741007194247, | |
| "grad_norm": 0.26843945933414415, | |
| "learning_rate": 2.300655776592616e-05, | |
| "loss": 0.3335, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.536485097636177, | |
| "grad_norm": 0.25648610540979605, | |
| "learning_rate": 2.294971299652432e-05, | |
| "loss": 0.3235, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.540596094552929, | |
| "grad_norm": 0.3013681074148862, | |
| "learning_rate": 2.2892843858056474e-05, | |
| "loss": 0.3321, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.5447070914696814, | |
| "grad_norm": 0.24919313896376655, | |
| "learning_rate": 2.283595082034725e-05, | |
| "loss": 0.3167, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.548818088386434, | |
| "grad_norm": 0.2688625414735968, | |
| "learning_rate": 2.2779034353418707e-05, | |
| "loss": 0.3324, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.552929085303186, | |
| "grad_norm": 0.26263149016674175, | |
| "learning_rate": 2.2722094927486472e-05, | |
| "loss": 0.3286, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.557040082219938, | |
| "grad_norm": 0.2823135658125824, | |
| "learning_rate": 2.2665133012955844e-05, | |
| "loss": 0.3383, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.5611510791366907, | |
| "grad_norm": 0.276217133090313, | |
| "learning_rate": 2.2608149080417913e-05, | |
| "loss": 0.3289, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.565262076053443, | |
| "grad_norm": 0.2850565964189695, | |
| "learning_rate": 2.2551143600645672e-05, | |
| "loss": 0.3244, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.5693730729701953, | |
| "grad_norm": 0.26362750208519725, | |
| "learning_rate": 2.249411704459013e-05, | |
| "loss": 0.3361, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.5734840698869474, | |
| "grad_norm": 0.2798643173147311, | |
| "learning_rate": 2.2437069883376404e-05, | |
| "loss": 0.3142, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.5775950668037, | |
| "grad_norm": 0.2563288966482464, | |
| "learning_rate": 2.238000258829986e-05, | |
| "loss": 0.324, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.581706063720452, | |
| "grad_norm": 0.30508227611107025, | |
| "learning_rate": 2.2322915630822184e-05, | |
| "loss": 0.3226, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.5858170606372046, | |
| "grad_norm": 0.25091522041815256, | |
| "learning_rate": 2.226580948256751e-05, | |
| "loss": 0.3315, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.5899280575539567, | |
| "grad_norm": 0.32219820646356984, | |
| "learning_rate": 2.2208684615318515e-05, | |
| "loss": 0.3291, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.5940390544707093, | |
| "grad_norm": 0.2642592817496665, | |
| "learning_rate": 2.2151541501012526e-05, | |
| "loss": 0.3348, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.5981500513874614, | |
| "grad_norm": 0.2696044140880529, | |
| "learning_rate": 2.2094380611737615e-05, | |
| "loss": 0.336, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.602261048304214, | |
| "grad_norm": 0.2778886911936094, | |
| "learning_rate": 2.20372024197287e-05, | |
| "loss": 0.3221, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.606372045220966, | |
| "grad_norm": 0.26840999460258913, | |
| "learning_rate": 2.1980007397363653e-05, | |
| "loss": 0.3283, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.6104830421377185, | |
| "grad_norm": 0.30677147336816346, | |
| "learning_rate": 2.1922796017159382e-05, | |
| "loss": 0.3391, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.6145940390544706, | |
| "grad_norm": 0.26204192651719005, | |
| "learning_rate": 2.186556875176794e-05, | |
| "loss": 0.3181, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.618705035971223, | |
| "grad_norm": 0.29330464889106106, | |
| "learning_rate": 2.1808326073972618e-05, | |
| "loss": 0.3334, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.6228160328879753, | |
| "grad_norm": 0.3611585390826276, | |
| "learning_rate": 2.1751068456684026e-05, | |
| "loss": 0.3328, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.626927029804728, | |
| "grad_norm": 0.32081188768018193, | |
| "learning_rate": 2.1693796372936207e-05, | |
| "loss": 0.3348, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.63103802672148, | |
| "grad_norm": 0.3466957904122417, | |
| "learning_rate": 2.1636510295882723e-05, | |
| "loss": 0.3287, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.635149023638232, | |
| "grad_norm": 0.3325190809270464, | |
| "learning_rate": 2.1579210698792724e-05, | |
| "loss": 0.3357, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.6392600205549845, | |
| "grad_norm": 0.323613824705376, | |
| "learning_rate": 2.1521898055047065e-05, | |
| "loss": 0.3254, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.643371017471737, | |
| "grad_norm": 0.31700835111056935, | |
| "learning_rate": 2.1464572838134393e-05, | |
| "loss": 0.3405, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.647482014388489, | |
| "grad_norm": 0.31194064100135144, | |
| "learning_rate": 2.1407235521647216e-05, | |
| "loss": 0.3337, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.6515930113052413, | |
| "grad_norm": 0.291054868309333, | |
| "learning_rate": 2.134988657927802e-05, | |
| "loss": 0.3223, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.655704008221994, | |
| "grad_norm": 0.28960930247219024, | |
| "learning_rate": 2.129252648481532e-05, | |
| "loss": 0.3399, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.6598150051387464, | |
| "grad_norm": 0.262272292175284, | |
| "learning_rate": 2.123515571213977e-05, | |
| "loss": 0.3199, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.6639260020554985, | |
| "grad_norm": 0.3430422990168527, | |
| "learning_rate": 2.1177774735220246e-05, | |
| "loss": 0.3211, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.6680369989722506, | |
| "grad_norm": 0.24490577578554293, | |
| "learning_rate": 2.1120384028109928e-05, | |
| "loss": 0.3347, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.672147995889003, | |
| "grad_norm": 0.3135561697948168, | |
| "learning_rate": 2.106298406494237e-05, | |
| "loss": 0.337, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.6762589928057556, | |
| "grad_norm": 0.2536708220913538, | |
| "learning_rate": 2.1005575319927606e-05, | |
| "loss": 0.3286, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.6803699897225077, | |
| "grad_norm": 0.2905534330712754, | |
| "learning_rate": 2.094815826734822e-05, | |
| "loss": 0.3344, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.68448098663926, | |
| "grad_norm": 0.255577529722107, | |
| "learning_rate": 2.089073338155542e-05, | |
| "loss": 0.3347, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.6885919835560124, | |
| "grad_norm": 0.3169225043435795, | |
| "learning_rate": 2.0833301136965138e-05, | |
| "loss": 0.3368, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.692702980472765, | |
| "grad_norm": 0.24523301662966585, | |
| "learning_rate": 2.0775862008054102e-05, | |
| "loss": 0.3317, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.696813977389517, | |
| "grad_norm": 0.3377639592657221, | |
| "learning_rate": 2.0718416469355917e-05, | |
| "loss": 0.3327, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.700924974306269, | |
| "grad_norm": 0.2760670088077706, | |
| "learning_rate": 2.066096499545712e-05, | |
| "loss": 0.3254, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.7050359712230216, | |
| "grad_norm": 0.33508069878850794, | |
| "learning_rate": 2.0603508060993306e-05, | |
| "loss": 0.3324, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.7091469681397737, | |
| "grad_norm": 0.2888367467526053, | |
| "learning_rate": 2.0546046140645178e-05, | |
| "loss": 0.33, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.7132579650565263, | |
| "grad_norm": 0.2851449912230599, | |
| "learning_rate": 2.0488579709134623e-05, | |
| "loss": 0.3375, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.7173689619732784, | |
| "grad_norm": 0.28857625298935113, | |
| "learning_rate": 2.04311092412208e-05, | |
| "loss": 0.3324, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.721479958890031, | |
| "grad_norm": 0.3099583754195003, | |
| "learning_rate": 2.0373635211696214e-05, | |
| "loss": 0.331, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.725590955806783, | |
| "grad_norm": 0.3033491487781276, | |
| "learning_rate": 2.0316158095382797e-05, | |
| "loss": 0.3354, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.7297019527235356, | |
| "grad_norm": 0.2619645136131476, | |
| "learning_rate": 2.0258678367127972e-05, | |
| "loss": 0.3236, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.7338129496402876, | |
| "grad_norm": 0.30087165164939694, | |
| "learning_rate": 2.0201196501800768e-05, | |
| "loss": 0.3232, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.73792394655704, | |
| "grad_norm": 0.2738871408955049, | |
| "learning_rate": 2.0143712974287838e-05, | |
| "loss": 0.3277, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.7420349434737923, | |
| "grad_norm": 0.29720770878473823, | |
| "learning_rate": 2.0086228259489578e-05, | |
| "loss": 0.3419, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.746145940390545, | |
| "grad_norm": 0.24745189549975016, | |
| "learning_rate": 2.0028742832316202e-05, | |
| "loss": 0.3241, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.750256937307297, | |
| "grad_norm": 0.26535109416407787, | |
| "learning_rate": 1.99712571676838e-05, | |
| "loss": 0.3206, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.7543679342240495, | |
| "grad_norm": 0.28264530686991374, | |
| "learning_rate": 1.9913771740510426e-05, | |
| "loss": 0.3441, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.7584789311408016, | |
| "grad_norm": 0.2439511505193991, | |
| "learning_rate": 1.9856287025712172e-05, | |
| "loss": 0.3327, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.762589928057554, | |
| "grad_norm": 0.25559437564690174, | |
| "learning_rate": 1.979880349819924e-05, | |
| "loss": 0.3325, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.766700924974306, | |
| "grad_norm": 0.22699680175413017, | |
| "learning_rate": 1.974132163287203e-05, | |
| "loss": 0.329, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.7708119218910587, | |
| "grad_norm": 0.2582116792070818, | |
| "learning_rate": 1.9683841904617217e-05, | |
| "loss": 0.3319, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.774922918807811, | |
| "grad_norm": 0.24489677360477968, | |
| "learning_rate": 1.9626364788303796e-05, | |
| "loss": 0.3313, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.779033915724563, | |
| "grad_norm": 0.29973564906267575, | |
| "learning_rate": 1.956889075877921e-05, | |
| "loss": 0.3359, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.7831449126413155, | |
| "grad_norm": 0.24725710379682117, | |
| "learning_rate": 1.9511420290865387e-05, | |
| "loss": 0.3269, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.787255909558068, | |
| "grad_norm": 0.254114192213977, | |
| "learning_rate": 1.945395385935483e-05, | |
| "loss": 0.3315, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.79136690647482, | |
| "grad_norm": 0.2500901168277256, | |
| "learning_rate": 1.9396491939006693e-05, | |
| "loss": 0.317, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.795477903391572, | |
| "grad_norm": 0.2506907960447071, | |
| "learning_rate": 1.9339035004542883e-05, | |
| "loss": 0.3355, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.7995889003083247, | |
| "grad_norm": 0.254060820338398, | |
| "learning_rate": 1.9281583530644087e-05, | |
| "loss": 0.3274, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.8036998972250773, | |
| "grad_norm": 0.22470917674479732, | |
| "learning_rate": 1.9224137991945898e-05, | |
| "loss": 0.3161, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.8078108941418294, | |
| "grad_norm": 0.25617294354628883, | |
| "learning_rate": 1.9166698863034865e-05, | |
| "loss": 0.3326, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.8119218910585815, | |
| "grad_norm": 0.24637096854415516, | |
| "learning_rate": 1.910926661844459e-05, | |
| "loss": 0.3306, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.816032887975334, | |
| "grad_norm": 0.23065590461427085, | |
| "learning_rate": 1.905184173265179e-05, | |
| "loss": 0.3285, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.8201438848920866, | |
| "grad_norm": 0.25223738900179504, | |
| "learning_rate": 1.89944246800724e-05, | |
| "loss": 0.3315, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.8242548818088387, | |
| "grad_norm": 0.2813788401987118, | |
| "learning_rate": 1.8937015935057637e-05, | |
| "loss": 0.343, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.8283658787255908, | |
| "grad_norm": 0.23658155464390826, | |
| "learning_rate": 1.887961597189008e-05, | |
| "loss": 0.3361, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.8324768756423433, | |
| "grad_norm": 0.2560263043866784, | |
| "learning_rate": 1.8822225264779757e-05, | |
| "loss": 0.336, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.836587872559096, | |
| "grad_norm": 0.24171456841261904, | |
| "learning_rate": 1.8764844287860235e-05, | |
| "loss": 0.3155, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.840698869475848, | |
| "grad_norm": 0.2709130278349106, | |
| "learning_rate": 1.8707473515184686e-05, | |
| "loss": 0.3347, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.8448098663926, | |
| "grad_norm": 0.2389464904458257, | |
| "learning_rate": 1.8650113420721985e-05, | |
| "loss": 0.3261, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.8489208633093526, | |
| "grad_norm": 0.23853438478287736, | |
| "learning_rate": 1.8592764478352788e-05, | |
| "loss": 0.3269, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.8530318602261047, | |
| "grad_norm": 0.24002347978417551, | |
| "learning_rate": 1.8535427161865617e-05, | |
| "loss": 0.3273, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.24648736679259559, | |
| "learning_rate": 1.8478101944952946e-05, | |
| "loss": 0.3336, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.8612538540596093, | |
| "grad_norm": 0.2539305109029615, | |
| "learning_rate": 1.842078930120729e-05, | |
| "loss": 0.3285, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.865364850976362, | |
| "grad_norm": 0.25402455868598073, | |
| "learning_rate": 1.836348970411729e-05, | |
| "loss": 0.3415, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.869475847893114, | |
| "grad_norm": 0.2557721072489133, | |
| "learning_rate": 1.8306203627063803e-05, | |
| "loss": 0.3324, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.8735868448098665, | |
| "grad_norm": 0.24149579536024937, | |
| "learning_rate": 1.8248931543315974e-05, | |
| "loss": 0.3327, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.8776978417266186, | |
| "grad_norm": 0.2618118707433648, | |
| "learning_rate": 1.8191673926027386e-05, | |
| "loss": 0.336, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.881808838643371, | |
| "grad_norm": 0.22607821223026145, | |
| "learning_rate": 1.813443124823206e-05, | |
| "loss": 0.3366, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.885919835560123, | |
| "grad_norm": 0.2939881318908373, | |
| "learning_rate": 1.807720398284062e-05, | |
| "loss": 0.3279, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.8900308324768758, | |
| "grad_norm": 0.2260123918569214, | |
| "learning_rate": 1.801999260263635e-05, | |
| "loss": 0.3337, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.894141829393628, | |
| "grad_norm": 0.24275524474075025, | |
| "learning_rate": 1.7962797580271303e-05, | |
| "loss": 0.3244, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.8982528263103804, | |
| "grad_norm": 0.2665250980386111, | |
| "learning_rate": 1.790561938826239e-05, | |
| "loss": 0.3341, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.9023638232271325, | |
| "grad_norm": 0.2439607446787407, | |
| "learning_rate": 1.784845849898748e-05, | |
| "loss": 0.3177, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.906474820143885, | |
| "grad_norm": 0.22414006570173825, | |
| "learning_rate": 1.7791315384681488e-05, | |
| "loss": 0.3199, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.910585817060637, | |
| "grad_norm": 0.29428160100150474, | |
| "learning_rate": 1.7734190517432498e-05, | |
| "loss": 0.3276, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.9146968139773897, | |
| "grad_norm": 0.23652581393109087, | |
| "learning_rate": 1.7677084369177823e-05, | |
| "loss": 0.3252, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.9188078108941418, | |
| "grad_norm": 0.2638103273242896, | |
| "learning_rate": 1.7619997411700146e-05, | |
| "loss": 0.3269, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.9229188078108943, | |
| "grad_norm": 0.23905327112200858, | |
| "learning_rate": 1.7562930116623602e-05, | |
| "loss": 0.3221, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.9270298047276464, | |
| "grad_norm": 0.24681045434883284, | |
| "learning_rate": 1.750588295540988e-05, | |
| "loss": 0.3265, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.931140801644399, | |
| "grad_norm": 0.22345235272171315, | |
| "learning_rate": 1.7448856399354335e-05, | |
| "loss": 0.331, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.935251798561151, | |
| "grad_norm": 0.2620096260034816, | |
| "learning_rate": 1.7391850919582097e-05, | |
| "loss": 0.3133, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.939362795477903, | |
| "grad_norm": 0.24255411379013975, | |
| "learning_rate": 1.733486698704417e-05, | |
| "loss": 0.3345, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.9434737923946557, | |
| "grad_norm": 0.2677764953102307, | |
| "learning_rate": 1.7277905072513538e-05, | |
| "loss": 0.3125, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.947584789311408, | |
| "grad_norm": 0.23338472896647094, | |
| "learning_rate": 1.7220965646581304e-05, | |
| "loss": 0.3329, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.9516957862281603, | |
| "grad_norm": 0.256626369930556, | |
| "learning_rate": 1.7164049179652762e-05, | |
| "loss": 0.342, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.9558067831449124, | |
| "grad_norm": 0.24204486389961907, | |
| "learning_rate": 1.7107156141943536e-05, | |
| "loss": 0.3317, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.959917780061665, | |
| "grad_norm": 0.23154113899503126, | |
| "learning_rate": 1.7050287003475684e-05, | |
| "loss": 0.338, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.9640287769784175, | |
| "grad_norm": 0.2515108652826035, | |
| "learning_rate": 1.699344223407384e-05, | |
| "loss": 0.3221, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.9681397738951696, | |
| "grad_norm": 0.2508115805000718, | |
| "learning_rate": 1.6936622303361292e-05, | |
| "loss": 0.3272, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.9722507708119217, | |
| "grad_norm": 0.24166143586067093, | |
| "learning_rate": 1.6879827680756132e-05, | |
| "loss": 0.3283, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.9763617677286742, | |
| "grad_norm": 0.25924430328001846, | |
| "learning_rate": 1.682305883546737e-05, | |
| "loss": 0.3297, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.9804727646454268, | |
| "grad_norm": 0.26555305739163787, | |
| "learning_rate": 1.6766316236491046e-05, | |
| "loss": 0.3314, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.984583761562179, | |
| "grad_norm": 0.24257798147799245, | |
| "learning_rate": 1.6709600352606382e-05, | |
| "loss": 0.3238, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.988694758478931, | |
| "grad_norm": 0.2772710049751061, | |
| "learning_rate": 1.665291165237188e-05, | |
| "loss": 0.3274, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.9928057553956835, | |
| "grad_norm": 0.22542030051008188, | |
| "learning_rate": 1.6596250604121468e-05, | |
| "loss": 0.3328, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.996916752312436, | |
| "grad_norm": 0.27500500626350044, | |
| "learning_rate": 1.653961767596063e-05, | |
| "loss": 0.3335, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.001027749229188, | |
| "grad_norm": 0.270309023452525, | |
| "learning_rate": 1.6483013335762536e-05, | |
| "loss": 0.2946, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.0051387461459402, | |
| "grad_norm": 0.31524306413172215, | |
| "learning_rate": 1.6426438051164168e-05, | |
| "loss": 0.2781, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.0092497430626928, | |
| "grad_norm": 0.5781264673297727, | |
| "learning_rate": 1.636989228956248e-05, | |
| "loss": 0.2843, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.013360739979445, | |
| "grad_norm": 0.3024860387001426, | |
| "learning_rate": 1.631337651811051e-05, | |
| "loss": 0.2747, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.0174717368961974, | |
| "grad_norm": 0.3934556205957313, | |
| "learning_rate": 1.6256891203713533e-05, | |
| "loss": 0.2728, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.0215827338129495, | |
| "grad_norm": 0.3129040186390879, | |
| "learning_rate": 1.6200436813025208e-05, | |
| "loss": 0.2736, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.025693730729702, | |
| "grad_norm": 0.35042448311035457, | |
| "learning_rate": 1.6144013812443712e-05, | |
| "loss": 0.288, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.029804727646454, | |
| "grad_norm": 0.32431756468327383, | |
| "learning_rate": 1.60876226681079e-05, | |
| "loss": 0.2675, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.0339157245632067, | |
| "grad_norm": 0.3106864020345642, | |
| "learning_rate": 1.6031263845893436e-05, | |
| "loss": 0.2696, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.038026721479959, | |
| "grad_norm": 0.2918561704720298, | |
| "learning_rate": 1.5974937811408964e-05, | |
| "loss": 0.2806, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.0421377183967113, | |
| "grad_norm": 0.2824200303583143, | |
| "learning_rate": 1.5918645029992237e-05, | |
| "loss": 0.2669, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.0462487153134634, | |
| "grad_norm": 0.2751071529215866, | |
| "learning_rate": 1.5862385966706324e-05, | |
| "loss": 0.2827, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.050359712230216, | |
| "grad_norm": 0.2659741638733503, | |
| "learning_rate": 1.580616108633569e-05, | |
| "loss": 0.2772, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.054470709146968, | |
| "grad_norm": 0.2705999112071291, | |
| "learning_rate": 1.5749970853382416e-05, | |
| "loss": 0.2813, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.0585817060637206, | |
| "grad_norm": 0.2678804686149958, | |
| "learning_rate": 1.5693815732062346e-05, | |
| "loss": 0.2786, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.0626927029804727, | |
| "grad_norm": 0.28891827034365974, | |
| "learning_rate": 1.563769618630124e-05, | |
| "loss": 0.2781, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.0668036998972252, | |
| "grad_norm": 0.2608278243848426, | |
| "learning_rate": 1.558161267973096e-05, | |
| "loss": 0.2811, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.0709146968139773, | |
| "grad_norm": 0.28779231459872495, | |
| "learning_rate": 1.552556567568562e-05, | |
| "loss": 0.2731, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.07502569373073, | |
| "grad_norm": 0.27172400888062603, | |
| "learning_rate": 1.5469555637197775e-05, | |
| "loss": 0.273, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.079136690647482, | |
| "grad_norm": 0.284714544394066, | |
| "learning_rate": 1.541358302699459e-05, | |
| "loss": 0.2737, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.0832476875642345, | |
| "grad_norm": 0.27108204101689876, | |
| "learning_rate": 1.535764830749401e-05, | |
| "loss": 0.2719, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.0873586844809866, | |
| "grad_norm": 0.26218248343837663, | |
| "learning_rate": 1.5301751940800947e-05, | |
| "loss": 0.2702, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.091469681397739, | |
| "grad_norm": 0.27396785993274086, | |
| "learning_rate": 1.5245894388703473e-05, | |
| "loss": 0.2746, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.0955806783144912, | |
| "grad_norm": 0.26457645017367387, | |
| "learning_rate": 1.5190076112668975e-05, | |
| "loss": 0.2741, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.099691675231244, | |
| "grad_norm": 0.2669417946440861, | |
| "learning_rate": 1.5134297573840373e-05, | |
| "loss": 0.2609, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.103802672147996, | |
| "grad_norm": 0.24350309961263825, | |
| "learning_rate": 1.507855923303229e-05, | |
| "loss": 0.2683, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.1079136690647484, | |
| "grad_norm": 0.24218499055629, | |
| "learning_rate": 1.5022861550727261e-05, | |
| "loss": 0.2753, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.1120246659815005, | |
| "grad_norm": 0.23884760385788692, | |
| "learning_rate": 1.4967204987071916e-05, | |
| "loss": 0.2674, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.1161356628982526, | |
| "grad_norm": 0.24427494625864407, | |
| "learning_rate": 1.491159000187318e-05, | |
| "loss": 0.2766, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.120246659815005, | |
| "grad_norm": 0.22462887698775066, | |
| "learning_rate": 1.4856017054594487e-05, | |
| "loss": 0.2817, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.1243576567317572, | |
| "grad_norm": 0.22935229598945833, | |
| "learning_rate": 1.4800486604351953e-05, | |
| "loss": 0.2692, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.12846865364851, | |
| "grad_norm": 0.2283641464528615, | |
| "learning_rate": 1.4744999109910642e-05, | |
| "loss": 0.2881, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.132579650565262, | |
| "grad_norm": 0.2402319884401938, | |
| "learning_rate": 1.4689555029680706e-05, | |
| "loss": 0.2811, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.1366906474820144, | |
| "grad_norm": 0.24689907618158027, | |
| "learning_rate": 1.4634154821713642e-05, | |
| "loss": 0.2748, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.1408016443987665, | |
| "grad_norm": 0.21908840749036268, | |
| "learning_rate": 1.4578798943698495e-05, | |
| "loss": 0.2775, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.144912641315519, | |
| "grad_norm": 0.2706839692520901, | |
| "learning_rate": 1.4523487852958078e-05, | |
| "loss": 0.274, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.149023638232271, | |
| "grad_norm": 0.21829989516446477, | |
| "learning_rate": 1.4468222006445194e-05, | |
| "loss": 0.2846, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.1531346351490237, | |
| "grad_norm": 0.254462615428386, | |
| "learning_rate": 1.4413001860738857e-05, | |
| "loss": 0.2751, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.157245632065776, | |
| "grad_norm": 0.22996784925457855, | |
| "learning_rate": 1.4357827872040533e-05, | |
| "loss": 0.2763, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.1613566289825283, | |
| "grad_norm": 0.24916315195392996, | |
| "learning_rate": 1.4302700496170348e-05, | |
| "loss": 0.273, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.1654676258992804, | |
| "grad_norm": 0.2394151755505642, | |
| "learning_rate": 1.424762018856335e-05, | |
| "loss": 0.2733, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.169578622816033, | |
| "grad_norm": 0.24554198081740938, | |
| "learning_rate": 1.4192587404265723e-05, | |
| "loss": 0.2739, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.173689619732785, | |
| "grad_norm": 0.23659011712793626, | |
| "learning_rate": 1.4137602597931039e-05, | |
| "loss": 0.2819, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.1778006166495376, | |
| "grad_norm": 0.22564337444058383, | |
| "learning_rate": 1.4082666223816503e-05, | |
| "loss": 0.2792, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.1819116135662897, | |
| "grad_norm": 0.2317146590014487, | |
| "learning_rate": 1.4027778735779194e-05, | |
| "loss": 0.2777, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.1860226104830422, | |
| "grad_norm": 0.2150695765539657, | |
| "learning_rate": 1.397294058727232e-05, | |
| "loss": 0.2765, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.1901336073997943, | |
| "grad_norm": 0.23401579346325868, | |
| "learning_rate": 1.3918152231341466e-05, | |
| "loss": 0.2859, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.194244604316547, | |
| "grad_norm": 0.22190869981693315, | |
| "learning_rate": 1.3863414120620866e-05, | |
| "loss": 0.2739, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.198355601233299, | |
| "grad_norm": 0.24505629782483931, | |
| "learning_rate": 1.3808726707329636e-05, | |
| "loss": 0.2854, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.2024665981500515, | |
| "grad_norm": 0.23527894624102066, | |
| "learning_rate": 1.3754090443268073e-05, | |
| "loss": 0.2739, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.2065775950668036, | |
| "grad_norm": 0.250451185169838, | |
| "learning_rate": 1.3699505779813885e-05, | |
| "loss": 0.2779, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.210688591983556, | |
| "grad_norm": 0.24199976474098944, | |
| "learning_rate": 1.3644973167918509e-05, | |
| "loss": 0.2819, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.2147995889003083, | |
| "grad_norm": 0.2295279753606739, | |
| "learning_rate": 1.3590493058103334e-05, | |
| "loss": 0.2912, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.218910585817061, | |
| "grad_norm": 0.24479637435880175, | |
| "learning_rate": 1.353606590045601e-05, | |
| "loss": 0.2625, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.223021582733813, | |
| "grad_norm": 0.22366855597040158, | |
| "learning_rate": 1.3481692144626723e-05, | |
| "loss": 0.2716, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.2271325796505654, | |
| "grad_norm": 0.23386804780243653, | |
| "learning_rate": 1.3427372239824478e-05, | |
| "loss": 0.2833, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.2312435765673175, | |
| "grad_norm": 0.21138476617701588, | |
| "learning_rate": 1.3373106634813395e-05, | |
| "loss": 0.2815, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.23535457348407, | |
| "grad_norm": 0.21868921231797736, | |
| "learning_rate": 1.3318895777908989e-05, | |
| "loss": 0.2737, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.239465570400822, | |
| "grad_norm": 0.22091301457511603, | |
| "learning_rate": 1.3264740116974477e-05, | |
| "loss": 0.2784, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.2435765673175747, | |
| "grad_norm": 0.21177976498652176, | |
| "learning_rate": 1.3210640099417071e-05, | |
| "loss": 0.2677, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.247687564234327, | |
| "grad_norm": 0.22075747796505304, | |
| "learning_rate": 1.3156596172184291e-05, | |
| "loss": 0.2843, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.2517985611510793, | |
| "grad_norm": 0.2093957071643158, | |
| "learning_rate": 1.3102608781760262e-05, | |
| "loss": 0.2783, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.2559095580678314, | |
| "grad_norm": 0.2502487297475507, | |
| "learning_rate": 1.3048678374162033e-05, | |
| "loss": 0.2764, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.2600205549845835, | |
| "grad_norm": 0.21828424241121014, | |
| "learning_rate": 1.2994805394935883e-05, | |
| "loss": 0.2783, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.264131551901336, | |
| "grad_norm": 0.2225059796962467, | |
| "learning_rate": 1.2940990289153654e-05, | |
| "loss": 0.2818, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.2682425488180886, | |
| "grad_norm": 0.2384933128418085, | |
| "learning_rate": 1.2887233501409062e-05, | |
| "loss": 0.2715, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.2723535457348407, | |
| "grad_norm": 0.23881527823885554, | |
| "learning_rate": 1.283353547581403e-05, | |
| "loss": 0.2815, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.276464542651593, | |
| "grad_norm": 0.21990598524463273, | |
| "learning_rate": 1.2779896655995012e-05, | |
| "loss": 0.2649, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.2805755395683454, | |
| "grad_norm": 0.22014058980246703, | |
| "learning_rate": 1.2726317485089345e-05, | |
| "loss": 0.2857, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.2846865364850975, | |
| "grad_norm": 0.2128741761301097, | |
| "learning_rate": 1.2672798405741565e-05, | |
| "loss": 0.2744, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.28879753340185, | |
| "grad_norm": 0.2093902141995586, | |
| "learning_rate": 1.261933986009976e-05, | |
| "loss": 0.2714, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.292908530318602, | |
| "grad_norm": 0.2317393277270657, | |
| "learning_rate": 1.2565942289811926e-05, | |
| "loss": 0.2821, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.2970195272353546, | |
| "grad_norm": 0.2124614991624517, | |
| "learning_rate": 1.2512606136022316e-05, | |
| "loss": 0.2684, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.3011305241521067, | |
| "grad_norm": 0.2237964393839327, | |
| "learning_rate": 1.245933183936778e-05, | |
| "loss": 0.28, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.3052415210688593, | |
| "grad_norm": 0.20179137845865386, | |
| "learning_rate": 1.2406119839974137e-05, | |
| "loss": 0.2791, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.3093525179856114, | |
| "grad_norm": 0.21344599872935055, | |
| "learning_rate": 1.2352970577452536e-05, | |
| "loss": 0.282, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.313463514902364, | |
| "grad_norm": 0.21405309956045562, | |
| "learning_rate": 1.2299884490895829e-05, | |
| "loss": 0.2705, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.317574511819116, | |
| "grad_norm": 0.20836540998453448, | |
| "learning_rate": 1.2246862018874937e-05, | |
| "loss": 0.2675, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.3216855087358685, | |
| "grad_norm": 0.21917814502090704, | |
| "learning_rate": 1.2193903599435229e-05, | |
| "loss": 0.2867, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.3257965056526206, | |
| "grad_norm": 0.21478503443145303, | |
| "learning_rate": 1.2141009670092905e-05, | |
| "loss": 0.263, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 3.329907502569373, | |
| "grad_norm": 0.24017325608140172, | |
| "learning_rate": 1.2088180667831378e-05, | |
| "loss": 0.285, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.3340184994861253, | |
| "grad_norm": 0.21263315635103802, | |
| "learning_rate": 1.2035417029097669e-05, | |
| "loss": 0.2794, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 3.338129496402878, | |
| "grad_norm": 0.2208436673519513, | |
| "learning_rate": 1.198271918979879e-05, | |
| "loss": 0.2661, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.34224049331963, | |
| "grad_norm": 0.21410801362761014, | |
| "learning_rate": 1.1930087585298163e-05, | |
| "loss": 0.2691, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 3.3463514902363825, | |
| "grad_norm": 0.2189540505149734, | |
| "learning_rate": 1.1877522650412002e-05, | |
| "loss": 0.2777, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.3504624871531345, | |
| "grad_norm": 0.2235412920660751, | |
| "learning_rate": 1.1825024819405728e-05, | |
| "loss": 0.2829, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.354573484069887, | |
| "grad_norm": 0.22891833469755685, | |
| "learning_rate": 1.177259452599039e-05, | |
| "loss": 0.2883, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.358684480986639, | |
| "grad_norm": 0.20951091444066108, | |
| "learning_rate": 1.1720232203319072e-05, | |
| "loss": 0.2703, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 3.3627954779033917, | |
| "grad_norm": 0.2291000642315933, | |
| "learning_rate": 1.1667938283983318e-05, | |
| "loss": 0.2818, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.366906474820144, | |
| "grad_norm": 0.24820675241373585, | |
| "learning_rate": 1.1615713200009555e-05, | |
| "loss": 0.2894, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.3710174717368964, | |
| "grad_norm": 0.2112186174561992, | |
| "learning_rate": 1.1563557382855527e-05, | |
| "loss": 0.2765, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.3751284686536485, | |
| "grad_norm": 0.23516315367694957, | |
| "learning_rate": 1.1511471263406727e-05, | |
| "loss": 0.2783, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 3.379239465570401, | |
| "grad_norm": 0.20429288664608256, | |
| "learning_rate": 1.1459455271972855e-05, | |
| "loss": 0.2826, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.383350462487153, | |
| "grad_norm": 0.22120456160119745, | |
| "learning_rate": 1.1407509838284234e-05, | |
| "loss": 0.2702, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 3.3874614594039056, | |
| "grad_norm": 0.22196158784290934, | |
| "learning_rate": 1.1355635391488273e-05, | |
| "loss": 0.2816, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.3915724563206577, | |
| "grad_norm": 0.23198563181248005, | |
| "learning_rate": 1.130383236014593e-05, | |
| "loss": 0.2807, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 3.3956834532374103, | |
| "grad_norm": 0.21328959797566183, | |
| "learning_rate": 1.1252101172228161e-05, | |
| "loss": 0.2812, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.3997944501541624, | |
| "grad_norm": 0.20829955768200162, | |
| "learning_rate": 1.1200442255112382e-05, | |
| "loss": 0.2781, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 3.4039054470709145, | |
| "grad_norm": 0.210021918847506, | |
| "learning_rate": 1.1148856035578954e-05, | |
| "loss": 0.2793, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.408016443987667, | |
| "grad_norm": 0.21953053255099053, | |
| "learning_rate": 1.1097342939807639e-05, | |
| "loss": 0.2826, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 3.4121274409044196, | |
| "grad_norm": 0.21197251533168365, | |
| "learning_rate": 1.1045903393374088e-05, | |
| "loss": 0.2678, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.4162384378211716, | |
| "grad_norm": 0.22402861818250405, | |
| "learning_rate": 1.0994537821246322e-05, | |
| "loss": 0.2768, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 3.4203494347379237, | |
| "grad_norm": 0.20866347607213415, | |
| "learning_rate": 1.0943246647781231e-05, | |
| "loss": 0.2822, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.4244604316546763, | |
| "grad_norm": 0.20588546575745492, | |
| "learning_rate": 1.0892030296721053e-05, | |
| "loss": 0.274, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 0.21933896518445742, | |
| "learning_rate": 1.0840889191189881e-05, | |
| "loss": 0.2815, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.432682425488181, | |
| "grad_norm": 0.21986260521948456, | |
| "learning_rate": 1.0789823753690165e-05, | |
| "loss": 0.265, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 3.436793422404933, | |
| "grad_norm": 0.2472526595417136, | |
| "learning_rate": 1.073883440609923e-05, | |
| "loss": 0.2819, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.4409044193216856, | |
| "grad_norm": 0.23044008878105163, | |
| "learning_rate": 1.0687921569665778e-05, | |
| "loss": 0.2743, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 3.4450154162384377, | |
| "grad_norm": 0.2127401189830073, | |
| "learning_rate": 1.0637085665006416e-05, | |
| "loss": 0.2757, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.44912641315519, | |
| "grad_norm": 0.23011524871297998, | |
| "learning_rate": 1.058632711210218e-05, | |
| "loss": 0.2867, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 3.4532374100719423, | |
| "grad_norm": 0.2143448487687264, | |
| "learning_rate": 1.0535646330295064e-05, | |
| "loss": 0.2775, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.457348406988695, | |
| "grad_norm": 0.2157327739595805, | |
| "learning_rate": 1.0485043738284543e-05, | |
| "loss": 0.2772, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 3.461459403905447, | |
| "grad_norm": 0.21901388123050422, | |
| "learning_rate": 1.0434519754124155e-05, | |
| "loss": 0.2883, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.4655704008221995, | |
| "grad_norm": 0.20706260357694797, | |
| "learning_rate": 1.0384074795217995e-05, | |
| "loss": 0.2729, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 3.4696813977389516, | |
| "grad_norm": 0.20562057118619545, | |
| "learning_rate": 1.0333709278317295e-05, | |
| "loss": 0.2794, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.473792394655704, | |
| "grad_norm": 0.2053621886084836, | |
| "learning_rate": 1.0283423619516984e-05, | |
| "loss": 0.2831, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 3.477903391572456, | |
| "grad_norm": 0.21559967106224392, | |
| "learning_rate": 1.0233218234252233e-05, | |
| "loss": 0.2798, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.4820143884892087, | |
| "grad_norm": 0.20693716417643127, | |
| "learning_rate": 1.0183093537295038e-05, | |
| "loss": 0.2834, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 3.486125385405961, | |
| "grad_norm": 0.196045141198551, | |
| "learning_rate": 1.0133049942750794e-05, | |
| "loss": 0.2815, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.4902363823227134, | |
| "grad_norm": 0.22184037078133786, | |
| "learning_rate": 1.0083087864054862e-05, | |
| "loss": 0.2782, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 3.4943473792394655, | |
| "grad_norm": 0.21680925373572774, | |
| "learning_rate": 1.0033207713969152e-05, | |
| "loss": 0.2668, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.498458376156218, | |
| "grad_norm": 0.20929159215700033, | |
| "learning_rate": 9.983409904578732e-06, | |
| "loss": 0.2771, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 3.50256937307297, | |
| "grad_norm": 0.20085747960075442, | |
| "learning_rate": 9.93369484728841e-06, | |
| "loss": 0.2769, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.5066803699897227, | |
| "grad_norm": 0.21740453796251422, | |
| "learning_rate": 9.884062952819336e-06, | |
| "loss": 0.2809, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 3.5107913669064748, | |
| "grad_norm": 0.19826634602771384, | |
| "learning_rate": 9.834514631205607e-06, | |
| "loss": 0.2826, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.5149023638232273, | |
| "grad_norm": 0.19929040918628962, | |
| "learning_rate": 9.785050291790886e-06, | |
| "loss": 0.27, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 3.5190133607399794, | |
| "grad_norm": 0.20544687803262818, | |
| "learning_rate": 9.735670343225015e-06, | |
| "loss": 0.2759, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.523124357656732, | |
| "grad_norm": 0.20100075338402584, | |
| "learning_rate": 9.68637519346064e-06, | |
| "loss": 0.2842, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 3.527235354573484, | |
| "grad_norm": 0.19998157251828666, | |
| "learning_rate": 9.637165249749847e-06, | |
| "loss": 0.2677, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.531346351490236, | |
| "grad_norm": 0.20946212814759255, | |
| "learning_rate": 9.588040918640784e-06, | |
| "loss": 0.2819, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 3.5354573484069887, | |
| "grad_norm": 0.19305869769870324, | |
| "learning_rate": 9.539002605974315e-06, | |
| "loss": 0.2762, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.539568345323741, | |
| "grad_norm": 0.22246584009743214, | |
| "learning_rate": 9.490050716880652e-06, | |
| "loss": 0.2761, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 3.5436793422404933, | |
| "grad_norm": 0.2106791686837925, | |
| "learning_rate": 9.441185655776044e-06, | |
| "loss": 0.2836, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.5477903391572454, | |
| "grad_norm": 0.20735417375234855, | |
| "learning_rate": 9.392407826359386e-06, | |
| "loss": 0.2797, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 3.551901336073998, | |
| "grad_norm": 0.22319834142117814, | |
| "learning_rate": 9.343717631608913e-06, | |
| "loss": 0.2805, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.5560123329907505, | |
| "grad_norm": 0.21387661139677305, | |
| "learning_rate": 9.295115473778871e-06, | |
| "loss": 0.2737, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 3.5601233299075026, | |
| "grad_norm": 0.19614268534753534, | |
| "learning_rate": 9.246601754396184e-06, | |
| "loss": 0.2775, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.5642343268242547, | |
| "grad_norm": 0.21426258046660832, | |
| "learning_rate": 9.198176874257147e-06, | |
| "loss": 0.2801, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 3.568345323741007, | |
| "grad_norm": 0.20833350511079968, | |
| "learning_rate": 9.149841233424102e-06, | |
| "loss": 0.2903, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.5724563206577598, | |
| "grad_norm": 0.2054636105867438, | |
| "learning_rate": 9.101595231222142e-06, | |
| "loss": 0.2714, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 3.576567317574512, | |
| "grad_norm": 0.21677433378750463, | |
| "learning_rate": 9.053439266235817e-06, | |
| "loss": 0.2747, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.580678314491264, | |
| "grad_norm": 0.21258620908795176, | |
| "learning_rate": 9.005373736305827e-06, | |
| "loss": 0.2866, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 3.5847893114080165, | |
| "grad_norm": 0.21696485235415786, | |
| "learning_rate": 8.957399038525742e-06, | |
| "loss": 0.2768, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.588900308324769, | |
| "grad_norm": 0.2048645071808934, | |
| "learning_rate": 8.909515569238727e-06, | |
| "loss": 0.2805, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.593011305241521, | |
| "grad_norm": 0.2041872125610518, | |
| "learning_rate": 8.861723724034256e-06, | |
| "loss": 0.281, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.597122302158273, | |
| "grad_norm": 0.22105486900940344, | |
| "learning_rate": 8.814023897744861e-06, | |
| "loss": 0.2722, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.6012332990750258, | |
| "grad_norm": 0.20870597023983126, | |
| "learning_rate": 8.766416484442845e-06, | |
| "loss": 0.288, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.605344295991778, | |
| "grad_norm": 0.22305369395665908, | |
| "learning_rate": 8.71890187743705e-06, | |
| "loss": 0.2833, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 3.6094552929085304, | |
| "grad_norm": 0.20984704638631244, | |
| "learning_rate": 8.6714804692696e-06, | |
| "loss": 0.2815, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.6135662898252825, | |
| "grad_norm": 0.205661449222605, | |
| "learning_rate": 8.624152651712647e-06, | |
| "loss": 0.2796, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 3.617677286742035, | |
| "grad_norm": 0.21670437077691945, | |
| "learning_rate": 8.576918815765155e-06, | |
| "loss": 0.276, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.621788283658787, | |
| "grad_norm": 0.21657223103082457, | |
| "learning_rate": 8.52977935164965e-06, | |
| "loss": 0.2793, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 3.6258992805755397, | |
| "grad_norm": 0.19481374974543536, | |
| "learning_rate": 8.482734648808998e-06, | |
| "loss": 0.2828, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.6300102774922918, | |
| "grad_norm": 0.22945439732292053, | |
| "learning_rate": 8.435785095903226e-06, | |
| "loss": 0.2767, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 3.6341212744090443, | |
| "grad_norm": 0.21026587275904124, | |
| "learning_rate": 8.388931080806244e-06, | |
| "loss": 0.277, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.6382322713257964, | |
| "grad_norm": 0.1905115883548281, | |
| "learning_rate": 8.342172990602692e-06, | |
| "loss": 0.2743, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 3.642343268242549, | |
| "grad_norm": 0.21168502442048126, | |
| "learning_rate": 8.295511211584726e-06, | |
| "loss": 0.2684, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.646454265159301, | |
| "grad_norm": 0.21498006850055293, | |
| "learning_rate": 8.248946129248821e-06, | |
| "loss": 0.2762, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 3.6505652620760536, | |
| "grad_norm": 0.19489762757982362, | |
| "learning_rate": 8.202478128292594e-06, | |
| "loss": 0.279, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.6546762589928057, | |
| "grad_norm": 0.21734478601458554, | |
| "learning_rate": 8.15610759261163e-06, | |
| "loss": 0.2743, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 3.6587872559095582, | |
| "grad_norm": 0.2067502254964237, | |
| "learning_rate": 8.109834905296296e-06, | |
| "loss": 0.2687, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.6628982528263103, | |
| "grad_norm": 0.20837277018256964, | |
| "learning_rate": 8.06366044862859e-06, | |
| "loss": 0.2776, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 3.667009249743063, | |
| "grad_norm": 0.2024301743577271, | |
| "learning_rate": 8.017584604078974e-06, | |
| "loss": 0.2801, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.671120246659815, | |
| "grad_norm": 0.21530744818182257, | |
| "learning_rate": 7.971607752303226e-06, | |
| "loss": 0.28, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 3.675231243576567, | |
| "grad_norm": 0.2139811361890938, | |
| "learning_rate": 7.925730273139294e-06, | |
| "loss": 0.2712, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.6793422404933196, | |
| "grad_norm": 0.20799988041239068, | |
| "learning_rate": 7.879952545604163e-06, | |
| "loss": 0.2926, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 3.683453237410072, | |
| "grad_norm": 0.20418864938595824, | |
| "learning_rate": 7.834274947890715e-06, | |
| "loss": 0.2798, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.6875642343268242, | |
| "grad_norm": 0.20416263025450562, | |
| "learning_rate": 7.78869785736461e-06, | |
| "loss": 0.2694, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 3.6916752312435763, | |
| "grad_norm": 0.19066134679044647, | |
| "learning_rate": 7.74322165056117e-06, | |
| "loss": 0.2667, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.695786228160329, | |
| "grad_norm": 0.2121667278903765, | |
| "learning_rate": 7.697846703182262e-06, | |
| "loss": 0.2784, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 3.6998972250770814, | |
| "grad_norm": 0.2071705071263635, | |
| "learning_rate": 7.652573390093199e-06, | |
| "loss": 0.285, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.7040082219938335, | |
| "grad_norm": 0.2035973761053005, | |
| "learning_rate": 7.607402085319644e-06, | |
| "loss": 0.2759, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 3.7081192189105856, | |
| "grad_norm": 0.2083326930999411, | |
| "learning_rate": 7.562333162044508e-06, | |
| "loss": 0.2775, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.712230215827338, | |
| "grad_norm": 0.21563075052521988, | |
| "learning_rate": 7.517366992604902e-06, | |
| "loss": 0.2767, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 3.7163412127440907, | |
| "grad_norm": 0.20432779262539, | |
| "learning_rate": 7.4725039484890094e-06, | |
| "loss": 0.2874, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.720452209660843, | |
| "grad_norm": 0.2047844251053815, | |
| "learning_rate": 7.427744400333053e-06, | |
| "loss": 0.2789, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 3.724563206577595, | |
| "grad_norm": 0.2055231569256932, | |
| "learning_rate": 7.383088717918223e-06, | |
| "loss": 0.2748, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.7286742034943474, | |
| "grad_norm": 0.20467879963763858, | |
| "learning_rate": 7.338537270167625e-06, | |
| "loss": 0.277, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 3.7327852004111, | |
| "grad_norm": 0.21544746620927177, | |
| "learning_rate": 7.294090425143225e-06, | |
| "loss": 0.273, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.736896197327852, | |
| "grad_norm": 0.2000666684512926, | |
| "learning_rate": 7.249748550042817e-06, | |
| "loss": 0.2806, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 3.741007194244604, | |
| "grad_norm": 0.20770589378766816, | |
| "learning_rate": 7.20551201119698e-06, | |
| "loss": 0.2705, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.7451181911613567, | |
| "grad_norm": 0.20437780757014407, | |
| "learning_rate": 7.161381174066065e-06, | |
| "loss": 0.2829, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 3.749229188078109, | |
| "grad_norm": 0.19567720371080252, | |
| "learning_rate": 7.117356403237161e-06, | |
| "loss": 0.2813, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.7533401849948613, | |
| "grad_norm": 0.19171574936304334, | |
| "learning_rate": 7.073438062421094e-06, | |
| "loss": 0.2782, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 3.7574511819116134, | |
| "grad_norm": 0.20924848866916773, | |
| "learning_rate": 7.029626514449414e-06, | |
| "loss": 0.27, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.761562178828366, | |
| "grad_norm": 0.20438696705099926, | |
| "learning_rate": 6.985922121271409e-06, | |
| "loss": 0.2728, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 3.765673175745118, | |
| "grad_norm": 0.2084495335702813, | |
| "learning_rate": 6.942325243951098e-06, | |
| "loss": 0.2824, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.7697841726618706, | |
| "grad_norm": 0.1993990523612008, | |
| "learning_rate": 6.898836242664262e-06, | |
| "loss": 0.282, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 3.7738951695786227, | |
| "grad_norm": 0.19347775656849484, | |
| "learning_rate": 6.855455476695465e-06, | |
| "loss": 0.2706, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.7780061664953752, | |
| "grad_norm": 0.20109622486576145, | |
| "learning_rate": 6.812183304435083e-06, | |
| "loss": 0.2801, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 3.7821171634121273, | |
| "grad_norm": 0.18886838722656143, | |
| "learning_rate": 6.769020083376341e-06, | |
| "loss": 0.2721, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.78622816032888, | |
| "grad_norm": 0.208430820513582, | |
| "learning_rate": 6.725966170112368e-06, | |
| "loss": 0.2686, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 3.790339157245632, | |
| "grad_norm": 0.1967578418393911, | |
| "learning_rate": 6.6830219203332415e-06, | |
| "loss": 0.2721, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.7944501541623845, | |
| "grad_norm": 0.2015892872246403, | |
| "learning_rate": 6.640187688823065e-06, | |
| "loss": 0.2792, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 3.7985611510791366, | |
| "grad_norm": 0.1938822600108583, | |
| "learning_rate": 6.597463829457014e-06, | |
| "loss": 0.2799, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.802672147995889, | |
| "grad_norm": 0.2023587740694427, | |
| "learning_rate": 6.554850695198427e-06, | |
| "loss": 0.2695, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.8067831449126412, | |
| "grad_norm": 0.19570583847216003, | |
| "learning_rate": 6.512348638095887e-06, | |
| "loss": 0.2858, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.810894141829394, | |
| "grad_norm": 0.19738903231975544, | |
| "learning_rate": 6.469958009280315e-06, | |
| "loss": 0.2681, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 3.815005138746146, | |
| "grad_norm": 0.20083483818328293, | |
| "learning_rate": 6.4276791589620595e-06, | |
| "loss": 0.2852, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.819116135662898, | |
| "grad_norm": 0.19273874331489446, | |
| "learning_rate": 6.385512436428021e-06, | |
| "loss": 0.2864, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 3.8232271325796505, | |
| "grad_norm": 0.1869845010972472, | |
| "learning_rate": 6.343458190038747e-06, | |
| "loss": 0.2727, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.827338129496403, | |
| "grad_norm": 0.19346715289339741, | |
| "learning_rate": 6.301516767225568e-06, | |
| "loss": 0.2739, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 3.831449126413155, | |
| "grad_norm": 0.19227603993401987, | |
| "learning_rate": 6.259688514487718e-06, | |
| "loss": 0.2758, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 3.8355601233299073, | |
| "grad_norm": 0.20411187735886127, | |
| "learning_rate": 6.217973777389483e-06, | |
| "loss": 0.2761, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 3.83967112024666, | |
| "grad_norm": 0.18675711473098772, | |
| "learning_rate": 6.1763729005573284e-06, | |
| "loss": 0.2829, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 3.8437821171634123, | |
| "grad_norm": 0.2123802835671684, | |
| "learning_rate": 6.134886227677073e-06, | |
| "loss": 0.2922, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 3.8478931140801644, | |
| "grad_norm": 0.18956127541911397, | |
| "learning_rate": 6.093514101491034e-06, | |
| "loss": 0.2763, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 3.8520041109969165, | |
| "grad_norm": 0.18788309236848885, | |
| "learning_rate": 6.052256863795198e-06, | |
| "loss": 0.2711, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 3.856115107913669, | |
| "grad_norm": 0.19828249178491697, | |
| "learning_rate": 6.0111148554364084e-06, | |
| "loss": 0.2799, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.8602261048304216, | |
| "grad_norm": 0.18431610567167325, | |
| "learning_rate": 5.970088416309532e-06, | |
| "loss": 0.2689, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 3.8643371017471737, | |
| "grad_norm": 0.21004802063561837, | |
| "learning_rate": 5.929177885354665e-06, | |
| "loss": 0.279, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.868448098663926, | |
| "grad_norm": 0.18145712447424242, | |
| "learning_rate": 5.888383600554326e-06, | |
| "loss": 0.2769, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 3.8725590955806783, | |
| "grad_norm": 0.1998489072868665, | |
| "learning_rate": 5.8477058989306605e-06, | |
| "loss": 0.2902, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 3.876670092497431, | |
| "grad_norm": 0.19349791063075825, | |
| "learning_rate": 5.807145116542678e-06, | |
| "loss": 0.2772, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 3.880781089414183, | |
| "grad_norm": 0.20224120336775228, | |
| "learning_rate": 5.766701588483443e-06, | |
| "loss": 0.2766, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 3.884892086330935, | |
| "grad_norm": 0.20201558369754713, | |
| "learning_rate": 5.726375648877329e-06, | |
| "loss": 0.2711, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 3.8890030832476876, | |
| "grad_norm": 0.186362787594006, | |
| "learning_rate": 5.68616763087725e-06, | |
| "loss": 0.2637, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 3.8931140801644397, | |
| "grad_norm": 0.18827723220330278, | |
| "learning_rate": 5.646077866661912e-06, | |
| "loss": 0.2728, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 3.8972250770811923, | |
| "grad_norm": 0.20621122766057245, | |
| "learning_rate": 5.606106687433066e-06, | |
| "loss": 0.277, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 3.9013360739979444, | |
| "grad_norm": 0.1997165387359167, | |
| "learning_rate": 5.5662544234127735e-06, | |
| "loss": 0.2852, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 3.905447070914697, | |
| "grad_norm": 0.1986176597393475, | |
| "learning_rate": 5.526521403840677e-06, | |
| "loss": 0.2724, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.909558067831449, | |
| "grad_norm": 0.19315083170854766, | |
| "learning_rate": 5.486907956971277e-06, | |
| "loss": 0.2654, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 3.9136690647482015, | |
| "grad_norm": 0.19208269826966257, | |
| "learning_rate": 5.447414410071232e-06, | |
| "loss": 0.28, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 3.9177800616649536, | |
| "grad_norm": 0.1986061425594109, | |
| "learning_rate": 5.40804108941664e-06, | |
| "loss": 0.2809, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 3.921891058581706, | |
| "grad_norm": 0.18060496237659096, | |
| "learning_rate": 5.36878832029035e-06, | |
| "loss": 0.2753, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 3.9260020554984583, | |
| "grad_norm": 0.19007144815119342, | |
| "learning_rate": 5.329656426979275e-06, | |
| "loss": 0.2844, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 3.930113052415211, | |
| "grad_norm": 0.18228170358892676, | |
| "learning_rate": 5.290645732771711e-06, | |
| "loss": 0.2776, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 3.934224049331963, | |
| "grad_norm": 0.20611317253574513, | |
| "learning_rate": 5.251756559954668e-06, | |
| "loss": 0.2752, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 3.9383350462487154, | |
| "grad_norm": 0.19496510102086326, | |
| "learning_rate": 5.212989229811209e-06, | |
| "loss": 0.2703, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 3.9424460431654675, | |
| "grad_norm": 0.18813827312165923, | |
| "learning_rate": 5.174344062617789e-06, | |
| "loss": 0.2817, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 3.94655704008222, | |
| "grad_norm": 0.19091427031439173, | |
| "learning_rate": 5.135821377641616e-06, | |
| "loss": 0.2787, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.950668036998972, | |
| "grad_norm": 0.1901592123123516, | |
| "learning_rate": 5.097421493138008e-06, | |
| "loss": 0.2766, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 3.9547790339157247, | |
| "grad_norm": 0.1870098631363826, | |
| "learning_rate": 5.059144726347765e-06, | |
| "loss": 0.2728, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 3.958890030832477, | |
| "grad_norm": 0.17796954931972553, | |
| "learning_rate": 5.020991393494558e-06, | |
| "loss": 0.2867, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 3.963001027749229, | |
| "grad_norm": 0.19046713852280395, | |
| "learning_rate": 4.9829618097823055e-06, | |
| "loss": 0.2675, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 3.9671120246659815, | |
| "grad_norm": 0.19367792434634498, | |
| "learning_rate": 4.945056289392565e-06, | |
| "loss": 0.2765, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 3.971223021582734, | |
| "grad_norm": 0.18974765427392373, | |
| "learning_rate": 4.907275145481947e-06, | |
| "loss": 0.2731, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.975334018499486, | |
| "grad_norm": 0.18889755922787974, | |
| "learning_rate": 4.8696186901795275e-06, | |
| "loss": 0.2817, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 3.979445015416238, | |
| "grad_norm": 0.19028199023394596, | |
| "learning_rate": 4.832087234584266e-06, | |
| "loss": 0.2783, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 3.9835560123329907, | |
| "grad_norm": 0.1964825876876656, | |
| "learning_rate": 4.794681088762438e-06, | |
| "loss": 0.2744, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 3.9876670092497433, | |
| "grad_norm": 0.17957398832039587, | |
| "learning_rate": 4.757400561745069e-06, | |
| "loss": 0.2762, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.9917780061664954, | |
| "grad_norm": 0.20987505932024647, | |
| "learning_rate": 4.720245961525387e-06, | |
| "loss": 0.2949, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 3.9958890030832475, | |
| "grad_norm": 0.18879687589648914, | |
| "learning_rate": 4.683217595056275e-06, | |
| "loss": 0.2746, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.707156689602904, | |
| "learning_rate": 4.646315768247731e-06, | |
| "loss": 0.2868, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 4.0041109969167525, | |
| "grad_norm": 0.3673275720964706, | |
| "learning_rate": 4.609540785964348e-06, | |
| "loss": 0.2379, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 4.008221993833504, | |
| "grad_norm": 0.26013071708722996, | |
| "learning_rate": 4.572892952022796e-06, | |
| "loss": 0.2495, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 4.012332990750257, | |
| "grad_norm": 0.30039166221512403, | |
| "learning_rate": 4.5363725691893045e-06, | |
| "loss": 0.2434, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 4.016443987667009, | |
| "grad_norm": 0.40331206801802966, | |
| "learning_rate": 4.499979939177164e-06, | |
| "loss": 0.2413, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 4.020554984583762, | |
| "grad_norm": 0.2653915725640132, | |
| "learning_rate": 4.463715362644239e-06, | |
| "loss": 0.2415, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 4.0246659815005135, | |
| "grad_norm": 0.2706794398843468, | |
| "learning_rate": 4.427579139190474e-06, | |
| "loss": 0.2353, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 4.028776978417266, | |
| "grad_norm": 0.33800513453404296, | |
| "learning_rate": 4.391571567355428e-06, | |
| "loss": 0.244, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.0328879753340185, | |
| "grad_norm": 0.2848868937309266, | |
| "learning_rate": 4.355692944615806e-06, | |
| "loss": 0.2446, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 4.036998972250771, | |
| "grad_norm": 0.213052312700043, | |
| "learning_rate": 4.319943567382991e-06, | |
| "loss": 0.2446, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 4.041109969167523, | |
| "grad_norm": 0.24448300665475436, | |
| "learning_rate": 4.28432373100061e-06, | |
| "loss": 0.2383, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 4.045220966084275, | |
| "grad_norm": 0.28289541109409083, | |
| "learning_rate": 4.248833729742095e-06, | |
| "loss": 0.2335, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 4.049331963001028, | |
| "grad_norm": 0.27075279957678594, | |
| "learning_rate": 4.2134738568082325e-06, | |
| "loss": 0.2388, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.05344295991778, | |
| "grad_norm": 0.2271083193598205, | |
| "learning_rate": 4.1782444043247565e-06, | |
| "loss": 0.2386, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 4.057553956834532, | |
| "grad_norm": 0.22324730717439883, | |
| "learning_rate": 4.143145663339932e-06, | |
| "loss": 0.2447, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 4.061664953751285, | |
| "grad_norm": 0.26100760343340185, | |
| "learning_rate": 4.108177923822154e-06, | |
| "loss": 0.2426, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 4.065775950668037, | |
| "grad_norm": 0.23257567018511596, | |
| "learning_rate": 4.073341474657544e-06, | |
| "loss": 0.2482, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 4.06988694758479, | |
| "grad_norm": 0.1994071326027501, | |
| "learning_rate": 4.03863660364757e-06, | |
| "loss": 0.2389, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.073997944501541, | |
| "grad_norm": 0.21371643270197568, | |
| "learning_rate": 4.004063597506664e-06, | |
| "loss": 0.2337, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 4.078108941418294, | |
| "grad_norm": 0.24512669596399653, | |
| "learning_rate": 3.969622741859862e-06, | |
| "loss": 0.2477, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 4.082219938335046, | |
| "grad_norm": 0.21744045295237915, | |
| "learning_rate": 3.935314321240433e-06, | |
| "loss": 0.2405, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 4.086330935251799, | |
| "grad_norm": 0.20192278557379797, | |
| "learning_rate": 3.90113861908753e-06, | |
| "loss": 0.2394, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 4.090441932168551, | |
| "grad_norm": 0.2027471703666848, | |
| "learning_rate": 3.867095917743862e-06, | |
| "loss": 0.2326, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 4.094552929085303, | |
| "grad_norm": 0.20882580151186148, | |
| "learning_rate": 3.8331864984533404e-06, | |
| "loss": 0.2362, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 4.098663926002056, | |
| "grad_norm": 0.1930471416017011, | |
| "learning_rate": 3.799410641358776e-06, | |
| "loss": 0.2462, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 4.102774922918808, | |
| "grad_norm": 0.19859635746881463, | |
| "learning_rate": 3.7657686254995483e-06, | |
| "loss": 0.2404, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 4.10688591983556, | |
| "grad_norm": 0.1983957254871405, | |
| "learning_rate": 3.7322607288093117e-06, | |
| "loss": 0.2398, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 4.110996916752312, | |
| "grad_norm": 0.22293857279886048, | |
| "learning_rate": 3.6988872281136855e-06, | |
| "loss": 0.2363, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.115107913669065, | |
| "grad_norm": 0.20443840443106004, | |
| "learning_rate": 3.66564839912799e-06, | |
| "loss": 0.2318, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 4.1192189105858175, | |
| "grad_norm": 0.17966769630726293, | |
| "learning_rate": 3.632544516454941e-06, | |
| "loss": 0.2359, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.123329907502569, | |
| "grad_norm": 0.19432549741475053, | |
| "learning_rate": 3.5995758535823997e-06, | |
| "loss": 0.2316, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 4.127440904419322, | |
| "grad_norm": 0.18881014978005276, | |
| "learning_rate": 3.566742682881119e-06, | |
| "loss": 0.2608, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.131551901336074, | |
| "grad_norm": 0.19088807670118796, | |
| "learning_rate": 3.534045275602467e-06, | |
| "loss": 0.242, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 4.135662898252827, | |
| "grad_norm": 0.1816637262264018, | |
| "learning_rate": 3.501483901876208e-06, | |
| "loss": 0.244, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 4.139773895169578, | |
| "grad_norm": 0.19010713069523394, | |
| "learning_rate": 3.469058830708263e-06, | |
| "loss": 0.2324, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 4.143884892086331, | |
| "grad_norm": 0.19620537155899534, | |
| "learning_rate": 3.436770329978494e-06, | |
| "loss": 0.2481, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 4.1479958890030835, | |
| "grad_norm": 0.18566900979279455, | |
| "learning_rate": 3.4046186664384795e-06, | |
| "loss": 0.2432, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 4.152106885919835, | |
| "grad_norm": 0.1755700170371331, | |
| "learning_rate": 3.3726041057093186e-06, | |
| "loss": 0.2386, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.156217882836588, | |
| "grad_norm": 0.18096902328410783, | |
| "learning_rate": 3.3407269122794373e-06, | |
| "loss": 0.2487, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 4.16032887975334, | |
| "grad_norm": 0.192754387487128, | |
| "learning_rate": 3.3089873495023995e-06, | |
| "loss": 0.234, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 4.164439876670093, | |
| "grad_norm": 0.19892387100550088, | |
| "learning_rate": 3.2773856795947336e-06, | |
| "loss": 0.2339, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 4.168550873586844, | |
| "grad_norm": 0.18465157283491226, | |
| "learning_rate": 3.2459221636337633e-06, | |
| "loss": 0.2379, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 4.172661870503597, | |
| "grad_norm": 0.1899662552430034, | |
| "learning_rate": 3.214597061555458e-06, | |
| "loss": 0.2292, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 4.1767728674203495, | |
| "grad_norm": 0.18665807494909734, | |
| "learning_rate": 3.1834106321522727e-06, | |
| "loss": 0.2371, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 4.180883864337102, | |
| "grad_norm": 0.1854509036542964, | |
| "learning_rate": 3.152363133071024e-06, | |
| "loss": 0.2433, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 4.184994861253854, | |
| "grad_norm": 0.20338354606609246, | |
| "learning_rate": 3.12145482081075e-06, | |
| "loss": 0.2373, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 4.189105858170606, | |
| "grad_norm": 0.1823424640205926, | |
| "learning_rate": 3.0906859507206044e-06, | |
| "loss": 0.2425, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 4.193216855087359, | |
| "grad_norm": 0.18646817047228667, | |
| "learning_rate": 3.0600567769977286e-06, | |
| "loss": 0.2388, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.197327852004111, | |
| "grad_norm": 0.19248044840190429, | |
| "learning_rate": 3.0295675526851686e-06, | |
| "loss": 0.2327, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 4.201438848920863, | |
| "grad_norm": 0.1895682953006883, | |
| "learning_rate": 2.9992185296697763e-06, | |
| "loss": 0.2494, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 4.2055498458376155, | |
| "grad_norm": 0.1775774161260345, | |
| "learning_rate": 2.9690099586801223e-06, | |
| "loss": 0.2431, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 4.209660842754368, | |
| "grad_norm": 0.18688744320331976, | |
| "learning_rate": 2.938942089284453e-06, | |
| "loss": 0.2243, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 4.213771839671121, | |
| "grad_norm": 0.18321913204838605, | |
| "learning_rate": 2.909015169888587e-06, | |
| "loss": 0.2361, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 4.217882836587872, | |
| "grad_norm": 0.18558364928419416, | |
| "learning_rate": 2.879229447733893e-06, | |
| "loss": 0.2438, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 4.221993833504625, | |
| "grad_norm": 0.18370056819501662, | |
| "learning_rate": 2.849585168895237e-06, | |
| "loss": 0.2372, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 4.226104830421377, | |
| "grad_norm": 0.17922623411257754, | |
| "learning_rate": 2.8200825782789466e-06, | |
| "loss": 0.2389, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 4.23021582733813, | |
| "grad_norm": 0.1814704060047799, | |
| "learning_rate": 2.790721919620798e-06, | |
| "loss": 0.2299, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 4.2343268242548815, | |
| "grad_norm": 0.18999808738781843, | |
| "learning_rate": 2.7615034354839942e-06, | |
| "loss": 0.2346, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.238437821171634, | |
| "grad_norm": 0.18017749013937312, | |
| "learning_rate": 2.7324273672571577e-06, | |
| "loss": 0.2337, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 4.242548818088387, | |
| "grad_norm": 0.1799591693551389, | |
| "learning_rate": 2.7034939551523476e-06, | |
| "loss": 0.2439, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 4.246659815005139, | |
| "grad_norm": 0.18327330613798448, | |
| "learning_rate": 2.6747034382030655e-06, | |
| "loss": 0.2445, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 4.250770811921891, | |
| "grad_norm": 0.17889375387571904, | |
| "learning_rate": 2.646056054262287e-06, | |
| "loss": 0.2467, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 4.254881808838643, | |
| "grad_norm": 0.17679323974968908, | |
| "learning_rate": 2.6175520400004907e-06, | |
| "loss": 0.2405, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 4.258992805755396, | |
| "grad_norm": 0.17336200096095578, | |
| "learning_rate": 2.5891916309037046e-06, | |
| "loss": 0.2367, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 4.263103802672148, | |
| "grad_norm": 0.1862342732350899, | |
| "learning_rate": 2.560975061271569e-06, | |
| "loss": 0.2294, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 4.2672147995889, | |
| "grad_norm": 0.1761467998629582, | |
| "learning_rate": 2.5329025642153873e-06, | |
| "loss": 0.2448, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 4.271325796505653, | |
| "grad_norm": 0.17762679763602063, | |
| "learning_rate": 2.5049743716562104e-06, | |
| "loss": 0.2459, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 4.275436793422405, | |
| "grad_norm": 0.17679704716813474, | |
| "learning_rate": 2.4771907143229124e-06, | |
| "loss": 0.2366, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.279547790339157, | |
| "grad_norm": 0.18512587191088023, | |
| "learning_rate": 2.4495518217502936e-06, | |
| "loss": 0.2334, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 4.283658787255909, | |
| "grad_norm": 0.17098883894517244, | |
| "learning_rate": 2.422057922277179e-06, | |
| "loss": 0.2366, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 4.287769784172662, | |
| "grad_norm": 0.19038355517722344, | |
| "learning_rate": 2.3947092430445284e-06, | |
| "loss": 0.2361, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 4.291880781089414, | |
| "grad_norm": 0.1807683215327849, | |
| "learning_rate": 2.367506009993572e-06, | |
| "loss": 0.2314, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 4.295991778006167, | |
| "grad_norm": 0.18180874713294695, | |
| "learning_rate": 2.34044844786393e-06, | |
| "loss": 0.2385, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 4.300102774922919, | |
| "grad_norm": 0.18147750075878016, | |
| "learning_rate": 2.313536780191763e-06, | |
| "loss": 0.2336, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 4.304213771839671, | |
| "grad_norm": 0.1782373533284217, | |
| "learning_rate": 2.2867712293079223e-06, | |
| "loss": 0.2356, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 4.308324768756424, | |
| "grad_norm": 0.17802709783230702, | |
| "learning_rate": 2.2601520163361166e-06, | |
| "loss": 0.2445, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 4.312435765673175, | |
| "grad_norm": 0.17602254086438468, | |
| "learning_rate": 2.233679361191081e-06, | |
| "loss": 0.2296, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 4.316546762589928, | |
| "grad_norm": 0.17604437821882946, | |
| "learning_rate": 2.2073534825767683e-06, | |
| "loss": 0.2493, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.32065775950668, | |
| "grad_norm": 0.18364670883928147, | |
| "learning_rate": 2.18117459798453e-06, | |
| "loss": 0.2332, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 4.324768756423433, | |
| "grad_norm": 0.17647874008223446, | |
| "learning_rate": 2.155142923691329e-06, | |
| "loss": 0.2434, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 4.328879753340185, | |
| "grad_norm": 0.1821284298329628, | |
| "learning_rate": 2.129258674757948e-06, | |
| "loss": 0.2405, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 4.332990750256937, | |
| "grad_norm": 0.17780536510155415, | |
| "learning_rate": 2.103522065027217e-06, | |
| "loss": 0.2352, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 4.33710174717369, | |
| "grad_norm": 0.17826171239681762, | |
| "learning_rate": 2.07793330712224e-06, | |
| "loss": 0.2389, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 4.341212744090442, | |
| "grad_norm": 0.17939747251527152, | |
| "learning_rate": 2.0524926124446497e-06, | |
| "loss": 0.2419, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 4.345323741007194, | |
| "grad_norm": 0.18203279406090278, | |
| "learning_rate": 2.0272001911728466e-06, | |
| "loss": 0.237, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 4.349434737923946, | |
| "grad_norm": 0.1797008020492476, | |
| "learning_rate": 2.0020562522602716e-06, | |
| "loss": 0.2341, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 4.353545734840699, | |
| "grad_norm": 0.1739746708895779, | |
| "learning_rate": 1.9770610034336823e-06, | |
| "loss": 0.2391, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 4.3576567317574515, | |
| "grad_norm": 0.18058019470972567, | |
| "learning_rate": 1.9522146511914265e-06, | |
| "loss": 0.2322, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.361767728674203, | |
| "grad_norm": 0.17826352479236454, | |
| "learning_rate": 1.927517400801746e-06, | |
| "loss": 0.2422, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 4.365878725590956, | |
| "grad_norm": 0.16893750635969274, | |
| "learning_rate": 1.902969456301076e-06, | |
| "loss": 0.2332, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 4.369989722507708, | |
| "grad_norm": 0.1782753702044134, | |
| "learning_rate": 1.8785710204923612e-06, | |
| "loss": 0.2385, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 4.374100719424461, | |
| "grad_norm": 0.18141238302838078, | |
| "learning_rate": 1.8543222949433736e-06, | |
| "loss": 0.2463, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 4.378211716341212, | |
| "grad_norm": 0.17681464500466545, | |
| "learning_rate": 1.8302234799850671e-06, | |
| "loss": 0.2441, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 4.382322713257965, | |
| "grad_norm": 0.17491562407701997, | |
| "learning_rate": 1.8062747747098974e-06, | |
| "loss": 0.2359, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 4.3864337101747175, | |
| "grad_norm": 0.17582489650428018, | |
| "learning_rate": 1.782476376970188e-06, | |
| "loss": 0.2518, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 4.39054470709147, | |
| "grad_norm": 0.179309298657522, | |
| "learning_rate": 1.7588284833765024e-06, | |
| "loss": 0.2509, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 4.394655704008222, | |
| "grad_norm": 0.17355362198390345, | |
| "learning_rate": 1.7353312892960095e-06, | |
| "loss": 0.2396, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 4.398766700924974, | |
| "grad_norm": 0.20669657827730264, | |
| "learning_rate": 1.7119849888508766e-06, | |
| "loss": 0.2401, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.402877697841727, | |
| "grad_norm": 0.1759615590766294, | |
| "learning_rate": 1.6887897749166548e-06, | |
| "loss": 0.239, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 4.406988694758479, | |
| "grad_norm": 0.1867129692924266, | |
| "learning_rate": 1.6657458391207049e-06, | |
| "loss": 0.24, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 4.411099691675231, | |
| "grad_norm": 0.17724290686658428, | |
| "learning_rate": 1.6428533718405914e-06, | |
| "loss": 0.2485, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 4.4152106885919835, | |
| "grad_norm": 0.1769255669225784, | |
| "learning_rate": 1.6201125622025315e-06, | |
| "loss": 0.2343, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 4.419321685508736, | |
| "grad_norm": 0.17424978161900648, | |
| "learning_rate": 1.5975235980798153e-06, | |
| "loss": 0.2299, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 4.423432682425489, | |
| "grad_norm": 0.17407143372977305, | |
| "learning_rate": 1.5750866660912634e-06, | |
| "loss": 0.2294, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 4.42754367934224, | |
| "grad_norm": 0.17791660590457703, | |
| "learning_rate": 1.5528019515996783e-06, | |
| "loss": 0.2425, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 4.431654676258993, | |
| "grad_norm": 0.18301382705782807, | |
| "learning_rate": 1.5306696387103227e-06, | |
| "loss": 0.2343, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 4.435765673175745, | |
| "grad_norm": 0.17589070591387826, | |
| "learning_rate": 1.5086899102693875e-06, | |
| "loss": 0.2469, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 4.439876670092497, | |
| "grad_norm": 0.17198527500762634, | |
| "learning_rate": 1.486862947862493e-06, | |
| "loss": 0.2463, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.4439876670092495, | |
| "grad_norm": 0.17792281862140422, | |
| "learning_rate": 1.465188931813175e-06, | |
| "loss": 0.2301, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 4.448098663926002, | |
| "grad_norm": 0.17628369792032114, | |
| "learning_rate": 1.4436680411814097e-06, | |
| "loss": 0.2399, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 4.452209660842755, | |
| "grad_norm": 0.17439136560526375, | |
| "learning_rate": 1.42230045376212e-06, | |
| "loss": 0.237, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 4.456320657759507, | |
| "grad_norm": 0.17943067929919523, | |
| "learning_rate": 1.4010863460837132e-06, | |
| "loss": 0.2405, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 4.460431654676259, | |
| "grad_norm": 0.17235533535415476, | |
| "learning_rate": 1.380025893406638e-06, | |
| "loss": 0.2397, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 4.464542651593011, | |
| "grad_norm": 0.17886870223554543, | |
| "learning_rate": 1.3591192697219003e-06, | |
| "loss": 0.2409, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 4.468653648509764, | |
| "grad_norm": 0.16738079998494204, | |
| "learning_rate": 1.3383666477496627e-06, | |
| "loss": 0.2387, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 4.4727646454265155, | |
| "grad_norm": 0.16758787660813548, | |
| "learning_rate": 1.3177681989377944e-06, | |
| "loss": 0.2417, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 4.476875642343268, | |
| "grad_norm": 0.1752202184059869, | |
| "learning_rate": 1.2973240934604658e-06, | |
| "loss": 0.2274, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 4.480986639260021, | |
| "grad_norm": 0.17463210365794904, | |
| "learning_rate": 1.277034500216736e-06, | |
| "loss": 0.226, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.485097636176773, | |
| "grad_norm": 0.17601812963083546, | |
| "learning_rate": 1.2568995868291656e-06, | |
| "loss": 0.2491, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 4.489208633093525, | |
| "grad_norm": 0.1775954820916016, | |
| "learning_rate": 1.236919519642421e-06, | |
| "loss": 0.2432, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 4.493319630010277, | |
| "grad_norm": 0.17531188232428954, | |
| "learning_rate": 1.2170944637219106e-06, | |
| "loss": 0.2417, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 4.49743062692703, | |
| "grad_norm": 0.1752476486120662, | |
| "learning_rate": 1.1974245828524156e-06, | |
| "loss": 0.2274, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 4.501541623843782, | |
| "grad_norm": 0.18283725978641932, | |
| "learning_rate": 1.177910039536736e-06, | |
| "loss": 0.2408, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 4.505652620760534, | |
| "grad_norm": 0.17390337086901564, | |
| "learning_rate": 1.1585509949943518e-06, | |
| "loss": 0.2374, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 4.509763617677287, | |
| "grad_norm": 0.1780578101655513, | |
| "learning_rate": 1.1393476091600886e-06, | |
| "loss": 0.2473, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 4.513874614594039, | |
| "grad_norm": 0.17965538239208087, | |
| "learning_rate": 1.120300040682798e-06, | |
| "loss": 0.244, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 4.517985611510792, | |
| "grad_norm": 0.17589641804084827, | |
| "learning_rate": 1.1014084469240461e-06, | |
| "loss": 0.2435, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 4.522096608427543, | |
| "grad_norm": 0.17442573382633822, | |
| "learning_rate": 1.0826729839568073e-06, | |
| "loss": 0.2417, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.526207605344296, | |
| "grad_norm": 0.1807965837015226, | |
| "learning_rate": 1.0640938065641926e-06, | |
| "loss": 0.2424, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 4.530318602261048, | |
| "grad_norm": 0.17785902254626473, | |
| "learning_rate": 1.0456710682381455e-06, | |
| "loss": 0.2546, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 4.534429599177801, | |
| "grad_norm": 0.17258906030475882, | |
| "learning_rate": 1.0274049211781967e-06, | |
| "loss": 0.2422, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 4.538540596094553, | |
| "grad_norm": 1.2728190292585875, | |
| "learning_rate": 1.009295516290194e-06, | |
| "loss": 0.2608, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 4.542651593011305, | |
| "grad_norm": 0.17125359621163755, | |
| "learning_rate": 9.913430031850635e-07, | |
| "loss": 0.2356, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 4.546762589928058, | |
| "grad_norm": 0.17651600362640116, | |
| "learning_rate": 9.735475301775632e-07, | |
| "loss": 0.246, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 4.55087358684481, | |
| "grad_norm": 0.16931432337364227, | |
| "learning_rate": 9.559092442850671e-07, | |
| "loss": 0.2289, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 4.554984583761562, | |
| "grad_norm": 0.17727932117256406, | |
| "learning_rate": 9.384282912263475e-07, | |
| "loss": 0.2334, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 4.559095580678314, | |
| "grad_norm": 0.17674518726323318, | |
| "learning_rate": 9.211048154203661e-07, | |
| "loss": 0.2512, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 4.563206577595067, | |
| "grad_norm": 0.17332686615916262, | |
| "learning_rate": 9.039389599850912e-07, | |
| "loss": 0.2339, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.567317574511819, | |
| "grad_norm": 0.1651967177583125, | |
| "learning_rate": 8.869308667363063e-07, | |
| "loss": 0.241, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 0.17416950220257496, | |
| "learning_rate": 8.700806761864466e-07, | |
| "loss": 0.2329, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 4.575539568345324, | |
| "grad_norm": 0.16574888125550336, | |
| "learning_rate": 8.533885275434283e-07, | |
| "loss": 0.2429, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 4.579650565262076, | |
| "grad_norm": 0.1731537234968332, | |
| "learning_rate": 8.368545587095056e-07, | |
| "loss": 0.2414, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 4.583761562178829, | |
| "grad_norm": 0.17116481210634382, | |
| "learning_rate": 8.20478906280131e-07, | |
| "loss": 0.2405, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 4.5878725590955804, | |
| "grad_norm": 0.17008303524989962, | |
| "learning_rate": 8.042617055428215e-07, | |
| "loss": 0.2313, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 4.591983556012333, | |
| "grad_norm": 0.1724744667796713, | |
| "learning_rate": 7.882030904760518e-07, | |
| "loss": 0.238, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 4.5960945529290855, | |
| "grad_norm": 0.17588735295940247, | |
| "learning_rate": 7.723031937481318e-07, | |
| "loss": 0.2497, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 4.600205549845837, | |
| "grad_norm": 0.17565531889855016, | |
| "learning_rate": 7.565621467161244e-07, | |
| "loss": 0.2563, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 4.60431654676259, | |
| "grad_norm": 0.1730755101477369, | |
| "learning_rate": 7.409800794247557e-07, | |
| "loss": 0.2337, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.608427543679342, | |
| "grad_norm": 0.16854464186311885, | |
| "learning_rate": 7.25557120605338e-07, | |
| "loss": 0.2445, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 4.612538540596095, | |
| "grad_norm": 0.17181736526067778, | |
| "learning_rate": 7.102933976747084e-07, | |
| "loss": 0.2356, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 4.616649537512847, | |
| "grad_norm": 0.17224841346483227, | |
| "learning_rate": 6.951890367341763e-07, | |
| "loss": 0.2404, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 4.620760534429599, | |
| "grad_norm": 0.174217100719342, | |
| "learning_rate": 6.802441625684774e-07, | |
| "loss": 0.2505, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 4.6248715313463515, | |
| "grad_norm": 0.172147168668161, | |
| "learning_rate": 6.654588986447597e-07, | |
| "loss": 0.2387, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 4.628982528263104, | |
| "grad_norm": 0.17025566837994996, | |
| "learning_rate": 6.508333671115341e-07, | |
| "loss": 0.2445, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 4.633093525179856, | |
| "grad_norm": 0.1791903634128583, | |
| "learning_rate": 6.363676887976944e-07, | |
| "loss": 0.2458, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 4.637204522096608, | |
| "grad_norm": 0.17050764877819538, | |
| "learning_rate": 6.220619832114971e-07, | |
| "loss": 0.2504, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 4.641315519013361, | |
| "grad_norm": 0.17379476155710213, | |
| "learning_rate": 6.079163685395917e-07, | |
| "loss": 0.2426, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 4.645426515930113, | |
| "grad_norm": 0.17731694259336384, | |
| "learning_rate": 5.939309616460276e-07, | |
| "loss": 0.2356, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.649537512846865, | |
| "grad_norm": 0.17342657076328494, | |
| "learning_rate": 5.801058780713021e-07, | |
| "loss": 0.2454, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 4.6536485097636175, | |
| "grad_norm": 0.17506716139804732, | |
| "learning_rate": 5.664412320314027e-07, | |
| "loss": 0.2466, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 4.65775950668037, | |
| "grad_norm": 0.1760318065249804, | |
| "learning_rate": 5.529371364168535e-07, | |
| "loss": 0.2298, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 4.661870503597123, | |
| "grad_norm": 0.17418792165950514, | |
| "learning_rate": 5.395937027918008e-07, | |
| "loss": 0.2352, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 4.665981500513874, | |
| "grad_norm": 0.1706417249640066, | |
| "learning_rate": 5.264110413930735e-07, | |
| "loss": 0.2398, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 4.670092497430627, | |
| "grad_norm": 0.17721194204664026, | |
| "learning_rate": 5.133892611292846e-07, | |
| "loss": 0.2378, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 4.674203494347379, | |
| "grad_norm": 0.18022478809124595, | |
| "learning_rate": 5.005284695799217e-07, | |
| "loss": 0.2491, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 4.678314491264132, | |
| "grad_norm": 0.17519368559887136, | |
| "learning_rate": 4.878287729944697e-07, | |
| "loss": 0.2438, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 4.6824254881808836, | |
| "grad_norm": 0.1777158927515907, | |
| "learning_rate": 4.7529027629152234e-07, | |
| "loss": 0.2364, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 4.686536485097636, | |
| "grad_norm": 0.1707755427569748, | |
| "learning_rate": 4.6291308305792315e-07, | |
| "loss": 0.2453, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.690647482014389, | |
| "grad_norm": 0.16668258759849372, | |
| "learning_rate": 4.5069729554790386e-07, | |
| "loss": 0.2402, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 4.694758478931141, | |
| "grad_norm": 0.16459971581816116, | |
| "learning_rate": 4.386430146822429e-07, | |
| "loss": 0.2483, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 4.698869475847893, | |
| "grad_norm": 0.17875849392738621, | |
| "learning_rate": 4.2675034004743045e-07, | |
| "loss": 0.241, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 4.702980472764645, | |
| "grad_norm": 0.17296505254178202, | |
| "learning_rate": 4.150193698948468e-07, | |
| "loss": 0.2465, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 4.707091469681398, | |
| "grad_norm": 0.17224757284909492, | |
| "learning_rate": 4.034502011399499e-07, | |
| "loss": 0.2385, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 4.7112024665981505, | |
| "grad_norm": 0.17411551470001055, | |
| "learning_rate": 3.92042929361478e-07, | |
| "loss": 0.2362, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 4.715313463514902, | |
| "grad_norm": 0.17065619759470402, | |
| "learning_rate": 3.8079764880064817e-07, | |
| "loss": 0.2367, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 4.719424460431655, | |
| "grad_norm": 0.1692629166872625, | |
| "learning_rate": 3.6971445236039685e-07, | |
| "loss": 0.2441, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 4.723535457348407, | |
| "grad_norm": 0.1727903716287266, | |
| "learning_rate": 3.587934316045938e-07, | |
| "loss": 0.2332, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 4.727646454265159, | |
| "grad_norm": 0.16765805252498014, | |
| "learning_rate": 3.4803467675729843e-07, | |
| "loss": 0.2436, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.731757451181911, | |
| "grad_norm": 0.1685165614846259, | |
| "learning_rate": 3.374382767020068e-07, | |
| "loss": 0.2462, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 4.735868448098664, | |
| "grad_norm": 0.17176323835480908, | |
| "learning_rate": 3.270043189809213e-07, | |
| "loss": 0.2475, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 4.7399794450154165, | |
| "grad_norm": 0.17054983226303438, | |
| "learning_rate": 3.167328897942268e-07, | |
| "loss": 0.2396, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 4.744090441932169, | |
| "grad_norm": 0.17377255003391895, | |
| "learning_rate": 3.0662407399937757e-07, | |
| "loss": 0.2414, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 4.748201438848921, | |
| "grad_norm": 0.1707561407227221, | |
| "learning_rate": 2.96677955110396e-07, | |
| "loss": 0.2374, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 4.752312435765673, | |
| "grad_norm": 0.1720410967594104, | |
| "learning_rate": 2.8689461529718634e-07, | |
| "loss": 0.2439, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 4.756423432682426, | |
| "grad_norm": 0.18296516134186272, | |
| "learning_rate": 2.7727413538484625e-07, | |
| "loss": 0.2361, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 4.760534429599177, | |
| "grad_norm": 0.17896247286039071, | |
| "learning_rate": 2.678165948530143e-07, | |
| "loss": 0.2356, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 4.76464542651593, | |
| "grad_norm": 0.17407845197351318, | |
| "learning_rate": 2.5852207183519885e-07, | |
| "loss": 0.2251, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 4.7687564234326825, | |
| "grad_norm": 0.17062892002775074, | |
| "learning_rate": 2.493906431181392e-07, | |
| "loss": 0.2438, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.772867420349435, | |
| "grad_norm": 0.1672404807221335, | |
| "learning_rate": 2.4042238414117016e-07, | |
| "loss": 0.2261, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 4.7769784172661875, | |
| "grad_norm": 0.17275226054969903, | |
| "learning_rate": 2.3161736899560249e-07, | |
| "loss": 0.2394, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 4.781089414182939, | |
| "grad_norm": 0.17356630567530312, | |
| "learning_rate": 2.2297567042410372e-07, | |
| "loss": 0.2345, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 4.785200411099692, | |
| "grad_norm": 0.17592520932271608, | |
| "learning_rate": 2.1449735982010278e-07, | |
| "loss": 0.2431, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 4.789311408016444, | |
| "grad_norm": 0.17039915938031028, | |
| "learning_rate": 2.0618250722719501e-07, | |
| "loss": 0.2431, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 4.793422404933196, | |
| "grad_norm": 0.1730081549716055, | |
| "learning_rate": 1.9803118133857157e-07, | |
| "loss": 0.2486, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 4.7975334018499485, | |
| "grad_norm": 0.17038132685762578, | |
| "learning_rate": 1.9004344949644425e-07, | |
| "loss": 0.2409, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 4.801644398766701, | |
| "grad_norm": 0.1743699418678854, | |
| "learning_rate": 1.8221937769149045e-07, | |
| "loss": 0.2365, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 4.805755395683454, | |
| "grad_norm": 0.16868706789230756, | |
| "learning_rate": 1.745590305623157e-07, | |
| "loss": 0.2415, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 4.809866392600205, | |
| "grad_norm": 0.17209405727243507, | |
| "learning_rate": 1.6706247139490318e-07, | |
| "loss": 0.2434, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.813977389516958, | |
| "grad_norm": 0.17768294930126322, | |
| "learning_rate": 1.5972976212211388e-07, | |
| "loss": 0.2333, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 4.81808838643371, | |
| "grad_norm": 0.1692714430601308, | |
| "learning_rate": 1.525609633231495e-07, | |
| "loss": 0.247, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 4.822199383350463, | |
| "grad_norm": 0.16825815560990823, | |
| "learning_rate": 1.455561342230749e-07, | |
| "loss": 0.249, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 4.8263103802672145, | |
| "grad_norm": 0.17141290386405647, | |
| "learning_rate": 1.3871533269231187e-07, | |
| "loss": 0.2547, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 4.830421377183967, | |
| "grad_norm": 0.17040562779973573, | |
| "learning_rate": 1.3203861524617278e-07, | |
| "loss": 0.2519, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 4.83453237410072, | |
| "grad_norm": 0.1677164586091278, | |
| "learning_rate": 1.2552603704438115e-07, | |
| "loss": 0.2334, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 4.838643371017472, | |
| "grad_norm": 0.1695089968957749, | |
| "learning_rate": 1.1917765189063402e-07, | |
| "loss": 0.243, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 4.842754367934224, | |
| "grad_norm": 0.17083241390302353, | |
| "learning_rate": 1.1299351223214017e-07, | |
| "loss": 0.2349, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 4.846865364850976, | |
| "grad_norm": 0.17067355512311883, | |
| "learning_rate": 1.069736691591916e-07, | |
| "loss": 0.2392, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 4.850976361767729, | |
| "grad_norm": 0.16913426842634588, | |
| "learning_rate": 1.0111817240475052e-07, | |
| "loss": 0.23, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.8550873586844805, | |
| "grad_norm": 0.17062118220717767, | |
| "learning_rate": 9.542707034402299e-08, | |
| "loss": 0.2358, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 4.859198355601233, | |
| "grad_norm": 0.17029660865240914, | |
| "learning_rate": 8.990040999407701e-08, | |
| "loss": 0.2302, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 4.863309352517986, | |
| "grad_norm": 0.16773211882510938, | |
| "learning_rate": 8.453823701343622e-08, | |
| "loss": 0.245, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 4.867420349434738, | |
| "grad_norm": 0.16985639544523204, | |
| "learning_rate": 7.93405957017157e-08, | |
| "loss": 0.2345, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 4.871531346351491, | |
| "grad_norm": 0.16908464736483494, | |
| "learning_rate": 7.430752899924898e-08, | |
| "loss": 0.2413, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 4.875642343268242, | |
| "grad_norm": 0.1759346258041749, | |
| "learning_rate": 6.943907848673937e-08, | |
| "loss": 0.2427, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 4.879753340184995, | |
| "grad_norm": 0.17174700886638744, | |
| "learning_rate": 6.473528438490916e-08, | |
| "loss": 0.2439, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 4.883864337101747, | |
| "grad_norm": 0.19693454366502605, | |
| "learning_rate": 6.019618555417328e-08, | |
| "loss": 0.2377, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 4.887975334018499, | |
| "grad_norm": 0.17754029524221127, | |
| "learning_rate": 5.58218194943172e-08, | |
| "loss": 0.2293, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 4.892086330935252, | |
| "grad_norm": 0.1700686268287155, | |
| "learning_rate": 5.161222234418173e-08, | |
| "loss": 0.2416, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.896197327852004, | |
| "grad_norm": 0.1778073111241822, | |
| "learning_rate": 4.756742888136989e-08, | |
| "loss": 0.245, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 4.900308324768757, | |
| "grad_norm": 0.17085644813691567, | |
| "learning_rate": 4.3687472521962704e-08, | |
| "loss": 0.2386, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 4.904419321685509, | |
| "grad_norm": 0.17614275321803505, | |
| "learning_rate": 3.997238532023273e-08, | |
| "loss": 0.2378, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 4.908530318602261, | |
| "grad_norm": 0.16573489640277006, | |
| "learning_rate": 3.642219796839097e-08, | |
| "loss": 0.2386, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 4.912641315519013, | |
| "grad_norm": 0.16678550464295586, | |
| "learning_rate": 3.303693979632039e-08, | |
| "loss": 0.2411, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 4.916752312435766, | |
| "grad_norm": 0.1683860743917499, | |
| "learning_rate": 2.981663877134944e-08, | |
| "loss": 0.2443, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 4.920863309352518, | |
| "grad_norm": 0.16597729886542864, | |
| "learning_rate": 2.6761321498005587e-08, | |
| "loss": 0.2408, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 4.92497430626927, | |
| "grad_norm": 0.1718127781713061, | |
| "learning_rate": 2.3871013217806605e-08, | |
| "loss": 0.2412, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 4.929085303186023, | |
| "grad_norm": 0.17097349823279034, | |
| "learning_rate": 2.1145737809045162e-08, | |
| "loss": 0.2421, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 4.933196300102775, | |
| "grad_norm": 0.17427216115981084, | |
| "learning_rate": 1.8585517786597894e-08, | |
| "loss": 0.2381, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.937307297019527, | |
| "grad_norm": 0.1744216439196038, | |
| "learning_rate": 1.6190374301727762e-08, | |
| "loss": 0.2282, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 4.941418293936279, | |
| "grad_norm": 0.17445833543773084, | |
| "learning_rate": 1.3960327141926411e-08, | |
| "loss": 0.2299, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 4.945529290853032, | |
| "grad_norm": 0.17503271001824838, | |
| "learning_rate": 1.1895394730738751e-08, | |
| "loss": 0.2333, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 4.9496402877697845, | |
| "grad_norm": 0.1689882913201602, | |
| "learning_rate": 9.995594127607534e-09, | |
| "loss": 0.2426, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 4.953751284686536, | |
| "grad_norm": 0.17067106138398636, | |
| "learning_rate": 8.260941027746772e-09, | |
| "loss": 0.2477, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 4.957862281603289, | |
| "grad_norm": 0.17523534599452864, | |
| "learning_rate": 6.6914497619996465e-09, | |
| "loss": 0.2362, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 4.961973278520041, | |
| "grad_norm": 0.16958624262500685, | |
| "learning_rate": 5.287133296723035e-09, | |
| "loss": 0.2416, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 4.966084275436794, | |
| "grad_norm": 0.17141643376988966, | |
| "learning_rate": 4.048003233687592e-09, | |
| "loss": 0.2319, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 4.970195272353545, | |
| "grad_norm": 0.17444671360259928, | |
| "learning_rate": 2.974069809964508e-09, | |
| "loss": 0.2442, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 4.974306269270298, | |
| "grad_norm": 0.16938004185341077, | |
| "learning_rate": 2.065341897865558e-09, | |
| "loss": 0.2482, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.9784172661870505, | |
| "grad_norm": 0.16777811457050917, | |
| "learning_rate": 1.32182700484762e-09, | |
| "loss": 0.2465, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 4.982528263103803, | |
| "grad_norm": 0.17657705108779342, | |
| "learning_rate": 7.435312734593858e-10, | |
| "loss": 0.2395, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 4.986639260020555, | |
| "grad_norm": 0.16972832488616352, | |
| "learning_rate": 3.304594812991724e-10, | |
| "loss": 0.2413, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 4.990750256937307, | |
| "grad_norm": 0.16507566272358404, | |
| "learning_rate": 8.261504095496976e-11, | |
| "loss": 0.2336, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 4.99486125385406, | |
| "grad_norm": 0.17100744078272323, | |
| "learning_rate": 0.0, | |
| "loss": 0.2357, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 4.99486125385406, | |
| "step": 1215, | |
| "total_flos": 4.757804886857613e+18, | |
| "train_loss": 0.34405366748939326, | |
| "train_runtime": 28473.715, | |
| "train_samples_per_second": 5.463, | |
| "train_steps_per_second": 0.043 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1215, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.757804886857613e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |