| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.996517107150174, | |
| "eval_steps": 500, | |
| "global_step": 760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006556033599672198, | |
| "grad_norm": 6.308972265166019, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 1.085, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.013112067199344397, | |
| "grad_norm": 6.3669176951390085, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 1.0943, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.019668100799016593, | |
| "grad_norm": 6.248172461223065, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 1.0945, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.026224134398688793, | |
| "grad_norm": 5.720783796325484, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 1.0822, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03278016799836099, | |
| "grad_norm": 4.166580116763421, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 1.0376, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03933620159803319, | |
| "grad_norm": 2.5258613072829443, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 1.0088, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04589223519770539, | |
| "grad_norm": 3.241153982591279, | |
| "learning_rate": 7.368421052631579e-06, | |
| "loss": 0.9787, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05244826879737759, | |
| "grad_norm": 4.215997338140802, | |
| "learning_rate": 8.421052631578948e-06, | |
| "loss": 1.0021, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05900430239704978, | |
| "grad_norm": 3.7917272082574036, | |
| "learning_rate": 9.473684210526315e-06, | |
| "loss": 0.9883, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.06556033599672198, | |
| "grad_norm": 3.1091596358257414, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 0.9323, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07211636959639418, | |
| "grad_norm": 2.7854345292697746, | |
| "learning_rate": 1.1578947368421053e-05, | |
| "loss": 0.9234, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.07867240319606637, | |
| "grad_norm": 1.8650709570608603, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 0.9023, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.08522843679573858, | |
| "grad_norm": 1.574226758545731, | |
| "learning_rate": 1.3684210526315791e-05, | |
| "loss": 0.8693, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.09178447039541078, | |
| "grad_norm": 1.4177768637857613, | |
| "learning_rate": 1.4736842105263159e-05, | |
| "loss": 0.8558, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.09834050399508297, | |
| "grad_norm": 1.2125668795375901, | |
| "learning_rate": 1.578947368421053e-05, | |
| "loss": 0.8394, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.10489653759475517, | |
| "grad_norm": 1.1398741474728522, | |
| "learning_rate": 1.6842105263157896e-05, | |
| "loss": 0.8373, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.11145257119442738, | |
| "grad_norm": 1.0380532605315191, | |
| "learning_rate": 1.7894736842105264e-05, | |
| "loss": 0.8138, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.11800860479409957, | |
| "grad_norm": 1.1944105144048436, | |
| "learning_rate": 1.894736842105263e-05, | |
| "loss": 0.8178, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.12456463839377177, | |
| "grad_norm": 0.9889022037403388, | |
| "learning_rate": 2e-05, | |
| "loss": 0.803, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.13112067199344396, | |
| "grad_norm": 0.7531044929728199, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 0.806, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13767670559311618, | |
| "grad_norm": 0.9451120842150798, | |
| "learning_rate": 2.210526315789474e-05, | |
| "loss": 0.7904, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.14423273919278837, | |
| "grad_norm": 0.8762905130696029, | |
| "learning_rate": 2.3157894736842107e-05, | |
| "loss": 0.8001, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.15078877279246056, | |
| "grad_norm": 0.6699562168478991, | |
| "learning_rate": 2.4210526315789474e-05, | |
| "loss": 0.7822, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.15734480639213275, | |
| "grad_norm": 0.6276115227175743, | |
| "learning_rate": 2.526315789473684e-05, | |
| "loss": 0.7773, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.16390083999180496, | |
| "grad_norm": 0.6290807862288751, | |
| "learning_rate": 2.6315789473684215e-05, | |
| "loss": 0.7783, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.17045687359147715, | |
| "grad_norm": 0.5742564332284298, | |
| "learning_rate": 2.7368421052631583e-05, | |
| "loss": 0.7674, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.17701290719114934, | |
| "grad_norm": 0.7412922803237626, | |
| "learning_rate": 2.842105263157895e-05, | |
| "loss": 0.7698, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.18356894079082156, | |
| "grad_norm": 1.0547770655616728, | |
| "learning_rate": 2.9473684210526317e-05, | |
| "loss": 0.7655, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.19012497439049375, | |
| "grad_norm": 1.5527104367024405, | |
| "learning_rate": 3.052631578947369e-05, | |
| "loss": 0.7707, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.19668100799016594, | |
| "grad_norm": 0.7294937893627019, | |
| "learning_rate": 3.157894736842106e-05, | |
| "loss": 0.7578, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.20323704158983816, | |
| "grad_norm": 0.9536771340964642, | |
| "learning_rate": 3.2631578947368426e-05, | |
| "loss": 0.748, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.20979307518951035, | |
| "grad_norm": 1.6534518524100028, | |
| "learning_rate": 3.368421052631579e-05, | |
| "loss": 0.7588, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.21634910878918254, | |
| "grad_norm": 0.6082911982013689, | |
| "learning_rate": 3.473684210526316e-05, | |
| "loss": 0.7424, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.22290514238885475, | |
| "grad_norm": 1.4728558033024115, | |
| "learning_rate": 3.578947368421053e-05, | |
| "loss": 0.7423, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.22946117598852694, | |
| "grad_norm": 0.8950252912362415, | |
| "learning_rate": 3.6842105263157895e-05, | |
| "loss": 0.7396, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.23601720958819913, | |
| "grad_norm": 1.2202323688159324, | |
| "learning_rate": 3.789473684210526e-05, | |
| "loss": 0.7394, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.24257324318787135, | |
| "grad_norm": 1.0375163505188818, | |
| "learning_rate": 3.8947368421052636e-05, | |
| "loss": 0.735, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.24912927678754354, | |
| "grad_norm": 1.1817870027992188, | |
| "learning_rate": 4e-05, | |
| "loss": 0.7316, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.25568531038721576, | |
| "grad_norm": 1.7472819001700346, | |
| "learning_rate": 4.105263157894738e-05, | |
| "loss": 0.7371, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.2622413439868879, | |
| "grad_norm": 0.6997273968133122, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 0.7303, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.26879737758656014, | |
| "grad_norm": 2.321753393397282, | |
| "learning_rate": 4.315789473684211e-05, | |
| "loss": 0.7458, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.27535341118623236, | |
| "grad_norm": 1.366635958874213, | |
| "learning_rate": 4.421052631578948e-05, | |
| "loss": 0.7176, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.2819094447859045, | |
| "grad_norm": 2.6212092693642552, | |
| "learning_rate": 4.5263157894736846e-05, | |
| "loss": 0.7315, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.28846547838557673, | |
| "grad_norm": 2.5130277449596496, | |
| "learning_rate": 4.6315789473684214e-05, | |
| "loss": 0.753, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.29502151198524895, | |
| "grad_norm": 1.6285728910727462, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.7297, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3015775455849211, | |
| "grad_norm": 2.0964313396830456, | |
| "learning_rate": 4.842105263157895e-05, | |
| "loss": 0.7265, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.30813357918459333, | |
| "grad_norm": 2.0887773848236697, | |
| "learning_rate": 4.947368421052632e-05, | |
| "loss": 0.7239, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3146896127842655, | |
| "grad_norm": 1.1072655635857458, | |
| "learning_rate": 5.052631578947368e-05, | |
| "loss": 0.7193, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3212456463839377, | |
| "grad_norm": 1.4488623000707561, | |
| "learning_rate": 5.157894736842106e-05, | |
| "loss": 0.7203, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.32780167998360993, | |
| "grad_norm": 1.2322683716693836, | |
| "learning_rate": 5.263157894736843e-05, | |
| "loss": 0.7238, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3343577135832821, | |
| "grad_norm": 1.38345500389746, | |
| "learning_rate": 5.368421052631579e-05, | |
| "loss": 0.7142, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.3409137471829543, | |
| "grad_norm": 0.9094126438399451, | |
| "learning_rate": 5.4736842105263165e-05, | |
| "loss": 0.72, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3474697807826265, | |
| "grad_norm": 1.2022041950473747, | |
| "learning_rate": 5.5789473684210526e-05, | |
| "loss": 0.7075, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3540258143822987, | |
| "grad_norm": 1.6189179778780058, | |
| "learning_rate": 5.68421052631579e-05, | |
| "loss": 0.7105, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3605818479819709, | |
| "grad_norm": 1.2006446280190224, | |
| "learning_rate": 5.789473684210527e-05, | |
| "loss": 0.7151, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3671378815816431, | |
| "grad_norm": 1.6321920047861564, | |
| "learning_rate": 5.8947368421052634e-05, | |
| "loss": 0.7136, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3736939151813153, | |
| "grad_norm": 1.5287305591592921, | |
| "learning_rate": 6.000000000000001e-05, | |
| "loss": 0.7107, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3802499487809875, | |
| "grad_norm": 1.3257983915749183, | |
| "learning_rate": 6.105263157894738e-05, | |
| "loss": 0.7183, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3868059823806597, | |
| "grad_norm": 0.8173893638911733, | |
| "learning_rate": 6.210526315789474e-05, | |
| "loss": 0.7087, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3933620159803319, | |
| "grad_norm": 1.5027100216095572, | |
| "learning_rate": 6.315789473684212e-05, | |
| "loss": 0.7099, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3999180495800041, | |
| "grad_norm": 0.9118886385672123, | |
| "learning_rate": 6.421052631578948e-05, | |
| "loss": 0.7056, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.4064740831796763, | |
| "grad_norm": 8.129014604966502, | |
| "learning_rate": 6.526315789473685e-05, | |
| "loss": 0.7359, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.4130301167793485, | |
| "grad_norm": 3.8240665538108343, | |
| "learning_rate": 6.631578947368421e-05, | |
| "loss": 0.7738, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.4195861503790207, | |
| "grad_norm": 3.710700934507178, | |
| "learning_rate": 6.736842105263159e-05, | |
| "loss": 0.7309, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.4261421839786929, | |
| "grad_norm": 1.7575755137455045, | |
| "learning_rate": 6.842105263157895e-05, | |
| "loss": 0.7373, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4326982175783651, | |
| "grad_norm": 1.29443521888165, | |
| "learning_rate": 6.947368421052632e-05, | |
| "loss": 0.7149, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.4392542511780373, | |
| "grad_norm": 1.9730328727947009, | |
| "learning_rate": 7.052631578947368e-05, | |
| "loss": 0.7309, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.4458102847777095, | |
| "grad_norm": 1.5434748912530512, | |
| "learning_rate": 7.157894736842105e-05, | |
| "loss": 0.7307, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.45236631837738167, | |
| "grad_norm": 1.3098144472083486, | |
| "learning_rate": 7.263157894736843e-05, | |
| "loss": 0.7226, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.4589223519770539, | |
| "grad_norm": 1.4763764747327572, | |
| "learning_rate": 7.368421052631579e-05, | |
| "loss": 0.7122, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4654783855767261, | |
| "grad_norm": 1.782399664190018, | |
| "learning_rate": 7.473684210526316e-05, | |
| "loss": 0.7105, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.47203441917639827, | |
| "grad_norm": 0.8645965374178922, | |
| "learning_rate": 7.578947368421052e-05, | |
| "loss": 0.7189, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.4785904527760705, | |
| "grad_norm": 1.1783141500345207, | |
| "learning_rate": 7.68421052631579e-05, | |
| "loss": 0.7155, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4851464863757427, | |
| "grad_norm": 1.631405575969008, | |
| "learning_rate": 7.789473684210527e-05, | |
| "loss": 0.7163, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.49170251997541486, | |
| "grad_norm": 1.799584170211315, | |
| "learning_rate": 7.894736842105263e-05, | |
| "loss": 0.724, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4982585535750871, | |
| "grad_norm": 131.28302876683892, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9347, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.5048145871747592, | |
| "grad_norm": 2.879295120261663, | |
| "learning_rate": 7.999957809295807e-05, | |
| "loss": 0.7474, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.5113706207744315, | |
| "grad_norm": 0.9410659862896121, | |
| "learning_rate": 7.99983123807325e-05, | |
| "loss": 0.7124, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.5179266543741037, | |
| "grad_norm": 2.4498983574935487, | |
| "learning_rate": 7.999620289002397e-05, | |
| "loss": 0.7294, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.5244826879737758, | |
| "grad_norm": 1.3312297085268832, | |
| "learning_rate": 7.999324966533291e-05, | |
| "loss": 0.7155, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5310387215734481, | |
| "grad_norm": 1.9333142275612973, | |
| "learning_rate": 7.998945276895866e-05, | |
| "loss": 0.7249, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5375947551731203, | |
| "grad_norm": 1.5284340416931426, | |
| "learning_rate": 7.998481228099806e-05, | |
| "loss": 0.7199, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.5441507887727924, | |
| "grad_norm": 1.5374661844560404, | |
| "learning_rate": 7.997932829934386e-05, | |
| "loss": 0.7174, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.5507068223724647, | |
| "grad_norm": 1.099517635025565, | |
| "learning_rate": 7.997300093968255e-05, | |
| "loss": 0.7106, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.5572628559721369, | |
| "grad_norm": 1.4642952447149862, | |
| "learning_rate": 7.996583033549204e-05, | |
| "loss": 0.7087, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.563818889571809, | |
| "grad_norm": 1.4883363499473765, | |
| "learning_rate": 7.995781663803876e-05, | |
| "loss": 0.724, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5703749231714813, | |
| "grad_norm": 0.8588229153701384, | |
| "learning_rate": 7.994896001637443e-05, | |
| "loss": 0.7052, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.5769309567711535, | |
| "grad_norm": 1.1329548393299773, | |
| "learning_rate": 7.993926065733265e-05, | |
| "loss": 0.7092, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5834869903708256, | |
| "grad_norm": 1.9381285339000986, | |
| "learning_rate": 7.99287187655248e-05, | |
| "loss": 0.7075, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5900430239704979, | |
| "grad_norm": 1.0298360769847832, | |
| "learning_rate": 7.991733456333579e-05, | |
| "loss": 0.7088, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5965990575701701, | |
| "grad_norm": 1.5913971706858665, | |
| "learning_rate": 7.990510829091938e-05, | |
| "loss": 0.7044, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.6031550911698422, | |
| "grad_norm": 1.0752295835923937, | |
| "learning_rate": 7.98920402061931e-05, | |
| "loss": 0.698, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.6097111247695144, | |
| "grad_norm": 1.3411827568812442, | |
| "learning_rate": 7.987813058483278e-05, | |
| "loss": 0.6897, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.6162671583691867, | |
| "grad_norm": 1.0994663107424685, | |
| "learning_rate": 7.98633797202668e-05, | |
| "loss": 0.7009, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.6228231919688588, | |
| "grad_norm": 1.5572209303699358, | |
| "learning_rate": 7.984778792366983e-05, | |
| "loss": 0.7001, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.629379225568531, | |
| "grad_norm": 1.2622939681539915, | |
| "learning_rate": 7.98313555239563e-05, | |
| "loss": 0.7027, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6359352591682033, | |
| "grad_norm": 1.5287178198020728, | |
| "learning_rate": 7.98140828677735e-05, | |
| "loss": 0.6977, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.6424912927678754, | |
| "grad_norm": 1.1853944350439, | |
| "learning_rate": 7.979597031949415e-05, | |
| "loss": 0.6943, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.6490473263675476, | |
| "grad_norm": 1.1219402341610256, | |
| "learning_rate": 7.977701826120888e-05, | |
| "loss": 0.6984, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.6556033599672199, | |
| "grad_norm": 1.8026221843535832, | |
| "learning_rate": 7.975722709271799e-05, | |
| "loss": 0.6955, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.662159393566892, | |
| "grad_norm": 1.0073031848270164, | |
| "learning_rate": 7.973659723152317e-05, | |
| "loss": 0.6942, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.6687154271665642, | |
| "grad_norm": 1.4709635304665, | |
| "learning_rate": 7.97151291128186e-05, | |
| "loss": 0.7024, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.6752714607662365, | |
| "grad_norm": 1.5710635409318867, | |
| "learning_rate": 7.96928231894818e-05, | |
| "loss": 0.6917, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.6818274943659086, | |
| "grad_norm": 0.8520728596618132, | |
| "learning_rate": 7.96696799320641e-05, | |
| "loss": 0.6821, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.6883835279655808, | |
| "grad_norm": 1.3459049216152963, | |
| "learning_rate": 7.964569982878063e-05, | |
| "loss": 0.6916, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.694939561565253, | |
| "grad_norm": 0.9826433336489447, | |
| "learning_rate": 7.962088338550013e-05, | |
| "loss": 0.6894, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.7014955951649252, | |
| "grad_norm": 1.2896938337982984, | |
| "learning_rate": 7.959523112573422e-05, | |
| "loss": 0.6933, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.7080516287645974, | |
| "grad_norm": 0.9867952374397386, | |
| "learning_rate": 7.956874359062632e-05, | |
| "loss": 0.697, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.7146076623642696, | |
| "grad_norm": 1.2639647577953248, | |
| "learning_rate": 7.954142133894033e-05, | |
| "loss": 0.6894, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.7211636959639418, | |
| "grad_norm": 1.357158506896498, | |
| "learning_rate": 7.951326494704878e-05, | |
| "loss": 0.691, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.727719729563614, | |
| "grad_norm": 0.8936367138602801, | |
| "learning_rate": 7.948427500892065e-05, | |
| "loss": 0.6887, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.7342757631632862, | |
| "grad_norm": 1.0046258611386971, | |
| "learning_rate": 7.94544521361089e-05, | |
| "loss": 0.6866, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.7408317967629584, | |
| "grad_norm": 1.0742250060740215, | |
| "learning_rate": 7.942379695773753e-05, | |
| "loss": 0.6888, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.7473878303626306, | |
| "grad_norm": 0.7284144457213086, | |
| "learning_rate": 7.939231012048833e-05, | |
| "loss": 0.6811, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.7539438639623028, | |
| "grad_norm": 0.7052427321018496, | |
| "learning_rate": 7.93599922885872e-05, | |
| "loss": 0.6763, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.760499897561975, | |
| "grad_norm": 0.798707901395817, | |
| "learning_rate": 7.932684414379021e-05, | |
| "loss": 0.692, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.7670559311616472, | |
| "grad_norm": 1.5879889624779187, | |
| "learning_rate": 7.929286638536913e-05, | |
| "loss": 0.6908, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.7736119647613194, | |
| "grad_norm": 0.8333519158322553, | |
| "learning_rate": 7.925805973009672e-05, | |
| "loss": 0.6734, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.7801679983609916, | |
| "grad_norm": 1.0218779201091506, | |
| "learning_rate": 7.922242491223167e-05, | |
| "loss": 0.684, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.7867240319606638, | |
| "grad_norm": 1.2082599250889219, | |
| "learning_rate": 7.918596268350296e-05, | |
| "loss": 0.6765, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.793280065560336, | |
| "grad_norm": 1.179921751372486, | |
| "learning_rate": 7.914867381309418e-05, | |
| "loss": 0.6868, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.7998360991600082, | |
| "grad_norm": 0.9486398114671882, | |
| "learning_rate": 7.911055908762718e-05, | |
| "loss": 0.6749, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.8063921327596804, | |
| "grad_norm": 0.8000482601006661, | |
| "learning_rate": 7.90716193111455e-05, | |
| "loss": 0.6747, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.8129481663593526, | |
| "grad_norm": 0.9517101245313929, | |
| "learning_rate": 7.903185530509743e-05, | |
| "loss": 0.6794, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.8195041999590248, | |
| "grad_norm": 0.9986534633574224, | |
| "learning_rate": 7.899126790831869e-05, | |
| "loss": 0.6774, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.826060233558697, | |
| "grad_norm": 1.1257859217497668, | |
| "learning_rate": 7.894985797701472e-05, | |
| "loss": 0.6793, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.8326162671583692, | |
| "grad_norm": 1.0292173321571256, | |
| "learning_rate": 7.890762638474256e-05, | |
| "loss": 0.6826, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.8391723007580414, | |
| "grad_norm": 0.7557871567857994, | |
| "learning_rate": 7.886457402239256e-05, | |
| "loss": 0.6792, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.8457283343577136, | |
| "grad_norm": 1.10326991797423, | |
| "learning_rate": 7.882070179816944e-05, | |
| "loss": 0.6786, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.8522843679573858, | |
| "grad_norm": 0.6776535159011668, | |
| "learning_rate": 7.877601063757323e-05, | |
| "loss": 0.6769, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.858840401557058, | |
| "grad_norm": 0.8010200105501427, | |
| "learning_rate": 7.873050148337967e-05, | |
| "loss": 0.6748, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.8653964351567301, | |
| "grad_norm": 0.9126529329815608, | |
| "learning_rate": 7.868417529562043e-05, | |
| "loss": 0.6632, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.8719524687564024, | |
| "grad_norm": 1.145222649075524, | |
| "learning_rate": 7.863703305156273e-05, | |
| "loss": 0.6756, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.8785085023560746, | |
| "grad_norm": 1.082669097422374, | |
| "learning_rate": 7.858907574568882e-05, | |
| "loss": 0.6765, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.8850645359557467, | |
| "grad_norm": 1.0310987601522028, | |
| "learning_rate": 7.854030438967494e-05, | |
| "loss": 0.6738, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.891620569555419, | |
| "grad_norm": 1.0160706638367318, | |
| "learning_rate": 7.849072001237001e-05, | |
| "loss": 0.6778, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.8981766031550912, | |
| "grad_norm": 0.9374366244899177, | |
| "learning_rate": 7.844032365977396e-05, | |
| "loss": 0.6736, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.9047326367547633, | |
| "grad_norm": 0.7008738978429749, | |
| "learning_rate": 7.838911639501557e-05, | |
| "loss": 0.6781, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.9112886703544356, | |
| "grad_norm": 0.4532137724870558, | |
| "learning_rate": 7.833709929833012e-05, | |
| "loss": 0.6686, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.9178447039541078, | |
| "grad_norm": 0.5139137753219752, | |
| "learning_rate": 7.828427346703657e-05, | |
| "loss": 0.6672, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9244007375537799, | |
| "grad_norm": 0.47106346244337904, | |
| "learning_rate": 7.823064001551445e-05, | |
| "loss": 0.6621, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.9309567711534522, | |
| "grad_norm": 0.3844200151143341, | |
| "learning_rate": 7.81762000751803e-05, | |
| "loss": 0.6669, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.9375128047531244, | |
| "grad_norm": 0.4844251889310036, | |
| "learning_rate": 7.812095479446383e-05, | |
| "loss": 0.6606, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.9440688383527965, | |
| "grad_norm": 0.5614156709455322, | |
| "learning_rate": 7.806490533878368e-05, | |
| "loss": 0.6627, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.9506248719524688, | |
| "grad_norm": 0.6295196907491479, | |
| "learning_rate": 7.800805289052286e-05, | |
| "loss": 0.6652, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.957180905552141, | |
| "grad_norm": 0.7172605017331375, | |
| "learning_rate": 7.795039864900378e-05, | |
| "loss": 0.6632, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.9637369391518131, | |
| "grad_norm": 0.9441139561703984, | |
| "learning_rate": 7.789194383046295e-05, | |
| "loss": 0.6681, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.9702929727514854, | |
| "grad_norm": 1.3151350146737344, | |
| "learning_rate": 7.783268966802539e-05, | |
| "loss": 0.6703, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.9768490063511576, | |
| "grad_norm": 0.5115168470062825, | |
| "learning_rate": 7.777263741167849e-05, | |
| "loss": 0.6647, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.9834050399508297, | |
| "grad_norm": 1.0163356112728301, | |
| "learning_rate": 7.771178832824573e-05, | |
| "loss": 0.666, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9899610735505019, | |
| "grad_norm": 1.1732972717592203, | |
| "learning_rate": 7.765014370135999e-05, | |
| "loss": 0.6592, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.9965171071501742, | |
| "grad_norm": 0.7851604401937097, | |
| "learning_rate": 7.758770483143634e-05, | |
| "loss": 0.6592, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.0055316533497234, | |
| "grad_norm": 1.6420611019668498, | |
| "learning_rate": 7.752447303564475e-05, | |
| "loss": 1.1949, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.0120876869493955, | |
| "grad_norm": 0.9808444668187579, | |
| "learning_rate": 7.74604496478822e-05, | |
| "loss": 0.6428, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.0186437205490677, | |
| "grad_norm": 1.1426869091876994, | |
| "learning_rate": 7.73956360187446e-05, | |
| "loss": 0.6346, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.02519975414874, | |
| "grad_norm": 1.147145143696543, | |
| "learning_rate": 7.733003351549829e-05, | |
| "loss": 0.6388, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.0317557877484123, | |
| "grad_norm": 0.7817012939106086, | |
| "learning_rate": 7.726364352205117e-05, | |
| "loss": 0.6408, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.0383118213480844, | |
| "grad_norm": 0.694581928412185, | |
| "learning_rate": 7.719646743892352e-05, | |
| "loss": 0.6284, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.0448678549477566, | |
| "grad_norm": 0.6070626955744831, | |
| "learning_rate": 7.712850668321846e-05, | |
| "loss": 0.6405, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.0514238885474287, | |
| "grad_norm": 0.6514623348225942, | |
| "learning_rate": 7.705976268859207e-05, | |
| "loss": 0.6395, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.057979922147101, | |
| "grad_norm": 0.5619592930813068, | |
| "learning_rate": 7.699023690522315e-05, | |
| "loss": 0.629, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.0645359557467733, | |
| "grad_norm": 0.5769129425178826, | |
| "learning_rate": 7.691993079978252e-05, | |
| "loss": 0.6381, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.0710919893464454, | |
| "grad_norm": 0.8198780194642444, | |
| "learning_rate": 7.684884585540227e-05, | |
| "loss": 0.6363, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.0776480229461176, | |
| "grad_norm": 0.7916477488592911, | |
| "learning_rate": 7.677698357164431e-05, | |
| "loss": 0.635, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.0842040565457898, | |
| "grad_norm": 0.5931606825910369, | |
| "learning_rate": 7.670434546446886e-05, | |
| "loss": 0.6333, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.090760090145462, | |
| "grad_norm": 0.5877139627697113, | |
| "learning_rate": 7.663093306620231e-05, | |
| "loss": 0.629, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.097316123745134, | |
| "grad_norm": 0.852370483441162, | |
| "learning_rate": 7.655674792550507e-05, | |
| "loss": 0.6395, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.1038721573448065, | |
| "grad_norm": 1.00327271618352, | |
| "learning_rate": 7.648179160733883e-05, | |
| "loss": 0.6478, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.1104281909444786, | |
| "grad_norm": 1.0959509941262375, | |
| "learning_rate": 7.640606569293347e-05, | |
| "loss": 0.6415, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.1169842245441508, | |
| "grad_norm": 0.7671153051839551, | |
| "learning_rate": 7.632957177975387e-05, | |
| "loss": 0.6401, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.123540258143823, | |
| "grad_norm": 0.7544679819781906, | |
| "learning_rate": 7.625231148146601e-05, | |
| "loss": 0.6365, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.1300962917434951, | |
| "grad_norm": 0.7948382138503196, | |
| "learning_rate": 7.61742864279031e-05, | |
| "loss": 0.6363, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.1366523253431673, | |
| "grad_norm": 0.7047533773597212, | |
| "learning_rate": 7.609549826503115e-05, | |
| "loss": 0.6421, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.1432083589428397, | |
| "grad_norm": 0.7374204351284568, | |
| "learning_rate": 7.601594865491414e-05, | |
| "loss": 0.6407, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.1497643925425118, | |
| "grad_norm": 0.6911928835033512, | |
| "learning_rate": 7.593563927567916e-05, | |
| "loss": 0.625, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.156320426142184, | |
| "grad_norm": 0.4803947112250499, | |
| "learning_rate": 7.585457182148081e-05, | |
| "loss": 0.628, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.1628764597418562, | |
| "grad_norm": 0.3950061404414093, | |
| "learning_rate": 7.577274800246558e-05, | |
| "loss": 0.6357, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.1694324933415283, | |
| "grad_norm": 0.4280389547638801, | |
| "learning_rate": 7.569016954473577e-05, | |
| "loss": 0.6434, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.1759885269412005, | |
| "grad_norm": 0.5103954479058104, | |
| "learning_rate": 7.560683819031298e-05, | |
| "loss": 0.6325, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.1825445605408729, | |
| "grad_norm": 0.48405163068225376, | |
| "learning_rate": 7.552275569710152e-05, | |
| "loss": 0.621, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.189100594140545, | |
| "grad_norm": 0.49505546120752403, | |
| "learning_rate": 7.543792383885113e-05, | |
| "loss": 0.6335, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.1956566277402172, | |
| "grad_norm": 0.5549014381017361, | |
| "learning_rate": 7.535234440511979e-05, | |
| "loss": 0.6401, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.2022126613398894, | |
| "grad_norm": 0.5679718866243005, | |
| "learning_rate": 7.526601920123574e-05, | |
| "loss": 0.6339, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.2087686949395615, | |
| "grad_norm": 0.5837881333403254, | |
| "learning_rate": 7.517895004825956e-05, | |
| "loss": 0.6201, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.2153247285392337, | |
| "grad_norm": 0.5275232093842687, | |
| "learning_rate": 7.509113878294572e-05, | |
| "loss": 0.6313, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.2218807621389058, | |
| "grad_norm": 0.4187731695243854, | |
| "learning_rate": 7.500258725770375e-05, | |
| "loss": 0.6297, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.2284367957385782, | |
| "grad_norm": 0.3496822319709094, | |
| "learning_rate": 7.491329734055926e-05, | |
| "loss": 0.6341, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.2349928293382504, | |
| "grad_norm": 0.2892298691529975, | |
| "learning_rate": 7.48232709151145e-05, | |
| "loss": 0.6289, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.2415488629379225, | |
| "grad_norm": 0.3970392684873568, | |
| "learning_rate": 7.473250988050861e-05, | |
| "loss": 0.6327, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.2481048965375947, | |
| "grad_norm": 0.5366046878238813, | |
| "learning_rate": 7.464101615137756e-05, | |
| "loss": 0.6234, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.2546609301372669, | |
| "grad_norm": 0.6365895325361283, | |
| "learning_rate": 7.454879165781379e-05, | |
| "loss": 0.6322, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.2612169637369393, | |
| "grad_norm": 0.6390242865748291, | |
| "learning_rate": 7.445583834532546e-05, | |
| "loss": 0.6252, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.2677729973366114, | |
| "grad_norm": 0.7130289120451936, | |
| "learning_rate": 7.436215817479541e-05, | |
| "loss": 0.6369, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.2743290309362836, | |
| "grad_norm": 0.8001337986119181, | |
| "learning_rate": 7.426775312243986e-05, | |
| "loss": 0.6341, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.2808850645359557, | |
| "grad_norm": 0.9421645194421999, | |
| "learning_rate": 7.41726251797666e-05, | |
| "loss": 0.629, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.287441098135628, | |
| "grad_norm": 1.0651209082990578, | |
| "learning_rate": 7.407677635353308e-05, | |
| "loss": 0.6274, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.2939971317353, | |
| "grad_norm": 0.8478545298742448, | |
| "learning_rate": 7.398020866570407e-05, | |
| "loss": 0.6373, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.3005531653349722, | |
| "grad_norm": 0.5323333743534195, | |
| "learning_rate": 7.388292415340888e-05, | |
| "loss": 0.631, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.3071091989346446, | |
| "grad_norm": 0.37990592076356283, | |
| "learning_rate": 7.37849248688986e-05, | |
| "loss": 0.631, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.3136652325343168, | |
| "grad_norm": 0.4732271711472085, | |
| "learning_rate": 7.368621287950264e-05, | |
| "loss": 0.6328, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.320221266133989, | |
| "grad_norm": 0.5259491499827896, | |
| "learning_rate": 7.358679026758515e-05, | |
| "loss": 0.6283, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.326777299733661, | |
| "grad_norm": 0.5000279931190099, | |
| "learning_rate": 7.348665913050115e-05, | |
| "loss": 0.6208, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.3912490196632365, | |
| "learning_rate": 7.338582158055224e-05, | |
| "loss": 0.6251, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.3398893669330056, | |
| "grad_norm": 0.38905604815288286, | |
| "learning_rate": 7.328427974494201e-05, | |
| "loss": 0.6179, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.3464454005326778, | |
| "grad_norm": 0.4906878706390755, | |
| "learning_rate": 7.318203576573126e-05, | |
| "loss": 0.6271, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.35300143413235, | |
| "grad_norm": 0.4146373419913017, | |
| "learning_rate": 7.307909179979274e-05, | |
| "loss": 0.6256, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.3595574677320221, | |
| "grad_norm": 0.29246282870192875, | |
| "learning_rate": 7.297545001876563e-05, | |
| "loss": 0.6219, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.3661135013316943, | |
| "grad_norm": 0.27459922699544653, | |
| "learning_rate": 7.28711126090098e-05, | |
| "loss": 0.6346, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.3726695349313665, | |
| "grad_norm": 0.3594793278431249, | |
| "learning_rate": 7.276608177155968e-05, | |
| "loss": 0.6234, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.3792255685310386, | |
| "grad_norm": 0.46199653506152194, | |
| "learning_rate": 7.266035972207773e-05, | |
| "loss": 0.6328, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.385781602130711, | |
| "grad_norm": 0.5084391547285126, | |
| "learning_rate": 7.25539486908078e-05, | |
| "loss": 0.6287, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.3923376357303832, | |
| "grad_norm": 0.4970090588981773, | |
| "learning_rate": 7.24468509225281e-05, | |
| "loss": 0.6338, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.3988936693300553, | |
| "grad_norm": 0.4297079110960391, | |
| "learning_rate": 7.233906867650373e-05, | |
| "loss": 0.6246, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.4054497029297275, | |
| "grad_norm": 0.4277415770961674, | |
| "learning_rate": 7.223060422643914e-05, | |
| "loss": 0.6235, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.4120057365293996, | |
| "grad_norm": 0.5211718034258063, | |
| "learning_rate": 7.212145986043007e-05, | |
| "loss": 0.626, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.418561770129072, | |
| "grad_norm": 0.7113602181910502, | |
| "learning_rate": 7.201163788091536e-05, | |
| "loss": 0.626, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.4251178037287442, | |
| "grad_norm": 0.8702986528721038, | |
| "learning_rate": 7.190114060462837e-05, | |
| "loss": 0.6285, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.4316738373284164, | |
| "grad_norm": 0.88781618603766, | |
| "learning_rate": 7.178997036254799e-05, | |
| "loss": 0.625, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.4382298709280885, | |
| "grad_norm": 0.8543987595137604, | |
| "learning_rate": 7.167812949984966e-05, | |
| "loss": 0.6369, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.4447859045277607, | |
| "grad_norm": 0.6591694598278867, | |
| "learning_rate": 7.156562037585576e-05, | |
| "loss": 0.6309, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.4513419381274328, | |
| "grad_norm": 0.435460183285974, | |
| "learning_rate": 7.145244536398584e-05, | |
| "loss": 0.6337, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.457897971727105, | |
| "grad_norm": 0.5394064377655016, | |
| "learning_rate": 7.133860685170665e-05, | |
| "loss": 0.6272, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.4644540053267772, | |
| "grad_norm": 0.6560512889140389, | |
| "learning_rate": 7.12241072404817e-05, | |
| "loss": 0.6317, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.4710100389264495, | |
| "grad_norm": 0.5108872994150431, | |
| "learning_rate": 7.110894894572056e-05, | |
| "loss": 0.6266, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.4775660725261217, | |
| "grad_norm": 0.4508746314491801, | |
| "learning_rate": 7.099313439672806e-05, | |
| "loss": 0.6222, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.4841221061257939, | |
| "grad_norm": 0.5932762714408137, | |
| "learning_rate": 7.087666603665284e-05, | |
| "loss": 0.6174, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.490678139725466, | |
| "grad_norm": 0.5703671444864514, | |
| "learning_rate": 7.0759546322436e-05, | |
| "loss": 0.6286, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.4972341733251384, | |
| "grad_norm": 0.4219759927229514, | |
| "learning_rate": 7.064177772475912e-05, | |
| "loss": 0.621, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.5037902069248106, | |
| "grad_norm": 0.3899488374409842, | |
| "learning_rate": 7.052336272799227e-05, | |
| "loss": 0.6368, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.5103462405244827, | |
| "grad_norm": 0.3352814743100072, | |
| "learning_rate": 7.040430383014146e-05, | |
| "loss": 0.6273, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.516902274124155, | |
| "grad_norm": 0.49330992250119887, | |
| "learning_rate": 7.02846035427961e-05, | |
| "loss": 0.6268, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.523458307723827, | |
| "grad_norm": 0.7275390777504656, | |
| "learning_rate": 7.016426439107586e-05, | |
| "loss": 0.6198, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.5300143413234992, | |
| "grad_norm": 0.5944986948743242, | |
| "learning_rate": 7.004328891357753e-05, | |
| "loss": 0.6321, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.5365703749231714, | |
| "grad_norm": 0.40136071416301794, | |
| "learning_rate": 6.992167966232143e-05, | |
| "loss": 0.6205, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.5431264085228436, | |
| "grad_norm": 0.42683320761735183, | |
| "learning_rate": 6.979943920269749e-05, | |
| "loss": 0.6282, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.5496824421225157, | |
| "grad_norm": 0.37233786072575653, | |
| "learning_rate": 6.967657011341126e-05, | |
| "loss": 0.6216, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.556238475722188, | |
| "grad_norm": 0.367274159532209, | |
| "learning_rate": 6.955307498642948e-05, | |
| "loss": 0.6224, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.5627945093218603, | |
| "grad_norm": 0.3660447730807658, | |
| "learning_rate": 6.942895642692527e-05, | |
| "loss": 0.6202, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.5693505429215324, | |
| "grad_norm": 0.38882305238122433, | |
| "learning_rate": 6.930421705322339e-05, | |
| "loss": 0.6195, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.5759065765212048, | |
| "grad_norm": 0.5684648344288802, | |
| "learning_rate": 6.917885949674483e-05, | |
| "loss": 0.6228, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.582462610120877, | |
| "grad_norm": 0.7431328174808526, | |
| "learning_rate": 6.905288640195141e-05, | |
| "loss": 0.626, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.5890186437205491, | |
| "grad_norm": 0.7410518651663662, | |
| "learning_rate": 6.892630042628988e-05, | |
| "loss": 0.6248, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.5955746773202213, | |
| "grad_norm": 0.6219837555078648, | |
| "learning_rate": 6.879910424013599e-05, | |
| "loss": 0.6295, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.6021307109198935, | |
| "grad_norm": 0.5396699034196659, | |
| "learning_rate": 6.867130052673806e-05, | |
| "loss": 0.6231, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.6086867445195656, | |
| "grad_norm": 0.4531223244262461, | |
| "learning_rate": 6.854289198216042e-05, | |
| "loss": 0.6346, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.6152427781192378, | |
| "grad_norm": 0.3242170520225258, | |
| "learning_rate": 6.841388131522656e-05, | |
| "loss": 0.6292, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.62179881171891, | |
| "grad_norm": 0.34710026048248854, | |
| "learning_rate": 6.828427124746191e-05, | |
| "loss": 0.6153, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.628354845318582, | |
| "grad_norm": 0.44267957645174644, | |
| "learning_rate": 6.815406451303647e-05, | |
| "loss": 0.6205, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.6349108789182545, | |
| "grad_norm": 0.5545408679102592, | |
| "learning_rate": 6.802326385870715e-05, | |
| "loss": 0.621, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.6414669125179266, | |
| "grad_norm": 0.5443247431153629, | |
| "learning_rate": 6.789187204375981e-05, | |
| "loss": 0.622, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.6480229461175988, | |
| "grad_norm": 0.4301197066284, | |
| "learning_rate": 6.775989183995108e-05, | |
| "loss": 0.6135, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.6545789797172712, | |
| "grad_norm": 0.396698152583572, | |
| "learning_rate": 6.762732603144978e-05, | |
| "loss": 0.6216, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.6611350133169434, | |
| "grad_norm": 0.3515261284616528, | |
| "learning_rate": 6.749417741477836e-05, | |
| "loss": 0.6187, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.6676910469166155, | |
| "grad_norm": 0.35858581397311556, | |
| "learning_rate": 6.736044879875373e-05, | |
| "loss": 0.6185, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.6742470805162877, | |
| "grad_norm": 0.34392278311565416, | |
| "learning_rate": 6.722614300442815e-05, | |
| "loss": 0.6154, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.6808031141159598, | |
| "grad_norm": 0.36707892916862706, | |
| "learning_rate": 6.709126286502965e-05, | |
| "loss": 0.6179, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.687359147715632, | |
| "grad_norm": 0.441219103876971, | |
| "learning_rate": 6.695581122590225e-05, | |
| "loss": 0.6227, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.6939151813153042, | |
| "grad_norm": 0.3994835806954666, | |
| "learning_rate": 6.681979094444596e-05, | |
| "loss": 0.6192, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.7004712149149763, | |
| "grad_norm": 0.433716188444932, | |
| "learning_rate": 6.668320489005654e-05, | |
| "loss": 0.622, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.7070272485146485, | |
| "grad_norm": 0.47283003533467305, | |
| "learning_rate": 6.654605594406486e-05, | |
| "loss": 0.6253, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.7135832821143209, | |
| "grad_norm": 0.49013239605830206, | |
| "learning_rate": 6.640834699967626e-05, | |
| "loss": 0.6214, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.720139315713993, | |
| "grad_norm": 0.49324672918814405, | |
| "learning_rate": 6.627008096190938e-05, | |
| "loss": 0.6182, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.7266953493136652, | |
| "grad_norm": 0.5169572714827575, | |
| "learning_rate": 6.6131260747535e-05, | |
| "loss": 0.6259, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.7332513829133376, | |
| "grad_norm": 0.5873107078500235, | |
| "learning_rate": 6.59918892850144e-05, | |
| "loss": 0.6235, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.7398074165130097, | |
| "grad_norm": 0.5020487886809576, | |
| "learning_rate": 6.585196951443763e-05, | |
| "loss": 0.6221, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.746363450112682, | |
| "grad_norm": 0.329791614699069, | |
| "learning_rate": 6.571150438746157e-05, | |
| "loss": 0.6265, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.752919483712354, | |
| "grad_norm": 0.2861139606661766, | |
| "learning_rate": 6.557049686724751e-05, | |
| "loss": 0.624, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.7594755173120262, | |
| "grad_norm": 0.46750896287166216, | |
| "learning_rate": 6.542894992839873e-05, | |
| "loss": 0.6241, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.7660315509116984, | |
| "grad_norm": 0.5684934196993104, | |
| "learning_rate": 6.528686655689774e-05, | |
| "loss": 0.6152, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.7725875845113706, | |
| "grad_norm": 0.48490900499130857, | |
| "learning_rate": 6.514424975004329e-05, | |
| "loss": 0.6111, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.7791436181110427, | |
| "grad_norm": 0.5506105973703118, | |
| "learning_rate": 6.500110251638715e-05, | |
| "loss": 0.6152, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.7856996517107149, | |
| "grad_norm": 0.4539294761333573, | |
| "learning_rate": 6.48574278756706e-05, | |
| "loss": 0.6273, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.7922556853103873, | |
| "grad_norm": 0.34705648209401146, | |
| "learning_rate": 6.471322885876077e-05, | |
| "loss": 0.6204, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.7988117189100594, | |
| "grad_norm": 0.2427469168923526, | |
| "learning_rate": 6.456850850758673e-05, | |
| "loss": 0.6214, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.8053677525097316, | |
| "grad_norm": 0.3004384124304078, | |
| "learning_rate": 6.44232698750752e-05, | |
| "loss": 0.6135, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.811923786109404, | |
| "grad_norm": 0.29782974812920376, | |
| "learning_rate": 6.427751602508628e-05, | |
| "loss": 0.6104, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.8184798197090761, | |
| "grad_norm": 0.29267694689244445, | |
| "learning_rate": 6.413125003234876e-05, | |
| "loss": 0.624, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.8250358533087483, | |
| "grad_norm": 0.3462332305273243, | |
| "learning_rate": 6.398447498239527e-05, | |
| "loss": 0.612, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.8315918869084205, | |
| "grad_norm": 0.3949978565357491, | |
| "learning_rate": 6.383719397149715e-05, | |
| "loss": 0.622, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.8381479205080926, | |
| "grad_norm": 0.40536973102729335, | |
| "learning_rate": 6.368941010659921e-05, | |
| "loss": 0.6118, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.8447039541077648, | |
| "grad_norm": 0.3621578404050215, | |
| "learning_rate": 6.354112650525407e-05, | |
| "loss": 0.6149, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.851259987707437, | |
| "grad_norm": 0.3445107311397209, | |
| "learning_rate": 6.339234629555655e-05, | |
| "loss": 0.6196, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.857816021307109, | |
| "grad_norm": 0.30605392165712425, | |
| "learning_rate": 6.324307261607754e-05, | |
| "loss": 0.6239, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.8643720549067813, | |
| "grad_norm": 0.3080701040643038, | |
| "learning_rate": 6.309330861579786e-05, | |
| "loss": 0.629, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.8709280885064536, | |
| "grad_norm": 0.3349937492345958, | |
| "learning_rate": 6.294305745404185e-05, | |
| "loss": 0.6224, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.8774841221061258, | |
| "grad_norm": 0.3396848816828842, | |
| "learning_rate": 6.279232230041065e-05, | |
| "loss": 0.6182, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.884040155705798, | |
| "grad_norm": 0.3410093342010557, | |
| "learning_rate": 6.26411063347154e-05, | |
| "loss": 0.6188, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.8905961893054704, | |
| "grad_norm": 0.314163527323835, | |
| "learning_rate": 6.248941274691017e-05, | |
| "loss": 0.6169, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.8971522229051425, | |
| "grad_norm": 0.3976946162007582, | |
| "learning_rate": 6.233724473702457e-05, | |
| "loss": 0.6195, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.9037082565048147, | |
| "grad_norm": 0.5601353024746372, | |
| "learning_rate": 6.218460551509636e-05, | |
| "loss": 0.6206, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.9102642901044868, | |
| "grad_norm": 0.6132983016524085, | |
| "learning_rate": 6.203149830110367e-05, | |
| "loss": 0.6138, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.916820323704159, | |
| "grad_norm": 0.5021407478552538, | |
| "learning_rate": 6.18779263248971e-05, | |
| "loss": 0.6219, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.9233763573038312, | |
| "grad_norm": 0.3890448472238098, | |
| "learning_rate": 6.172389282613151e-05, | |
| "loss": 0.6187, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.9299323909035033, | |
| "grad_norm": 0.3361740572901459, | |
| "learning_rate": 6.156940105419785e-05, | |
| "loss": 0.6218, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.9364884245031755, | |
| "grad_norm": 0.2907876046977178, | |
| "learning_rate": 6.141445426815443e-05, | |
| "loss": 0.6166, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.9430444581028476, | |
| "grad_norm": 0.2632987500846716, | |
| "learning_rate": 6.125905573665824e-05, | |
| "loss": 0.6232, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.9496004917025198, | |
| "grad_norm": 0.32120831457165155, | |
| "learning_rate": 6.110320873789604e-05, | |
| "loss": 0.6242, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.9561565253021922, | |
| "grad_norm": 0.32170026271883145, | |
| "learning_rate": 6.094691655951512e-05, | |
| "loss": 0.6094, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.9627125589018644, | |
| "grad_norm": 0.27126547953075625, | |
| "learning_rate": 6.079018249855402e-05, | |
| "loss": 0.622, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.9692685925015365, | |
| "grad_norm": 0.25823866111956445, | |
| "learning_rate": 6.063300986137297e-05, | |
| "loss": 0.6195, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.975824626101209, | |
| "grad_norm": 0.2777978831118673, | |
| "learning_rate": 6.047540196358405e-05, | |
| "loss": 0.6188, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.982380659700881, | |
| "grad_norm": 0.3097245776135163, | |
| "learning_rate": 6.0317362129981375e-05, | |
| "loss": 0.6161, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.9889366933005532, | |
| "grad_norm": 0.3108082557836966, | |
| "learning_rate": 6.015889369447088e-05, | |
| "loss": 0.6166, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.9954927269002254, | |
| "grad_norm": 0.2659644339645489, | |
| "learning_rate": 6.000000000000001e-05, | |
| "loss": 0.6143, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.006556033599672, | |
| "grad_norm": 0.3001511379848704, | |
| "learning_rate": 5.9840684398487186e-05, | |
| "loss": 0.5867, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.0131120671993443, | |
| "grad_norm": 0.3404835526439102, | |
| "learning_rate": 5.968095025075114e-05, | |
| "loss": 0.5745, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.0196681007990165, | |
| "grad_norm": 0.5738437895797627, | |
| "learning_rate": 5.952080092643993e-05, | |
| "loss": 0.5828, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.0262241343986886, | |
| "grad_norm": 0.7841436327218428, | |
| "learning_rate": 5.936023980395997e-05, | |
| "loss": 0.583, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.032780167998361, | |
| "grad_norm": 0.7382363346356913, | |
| "learning_rate": 5.919927027040463e-05, | |
| "loss": 0.5795, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.039336201598033, | |
| "grad_norm": 0.6545848867535631, | |
| "learning_rate": 5.903789572148295e-05, | |
| "loss": 0.5835, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.0458922351977056, | |
| "grad_norm": 0.516762214742333, | |
| "learning_rate": 5.887611956144782e-05, | |
| "loss": 0.5787, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.0524482687973777, | |
| "grad_norm": 0.5327353778668117, | |
| "learning_rate": 5.871394520302432e-05, | |
| "loss": 0.5742, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.05900430239705, | |
| "grad_norm": 0.5080742962254841, | |
| "learning_rate": 5.8551376067337626e-05, | |
| "loss": 0.5737, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.065560335996722, | |
| "grad_norm": 0.5649012551531187, | |
| "learning_rate": 5.838841558384091e-05, | |
| "loss": 0.5764, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.0721163695963942, | |
| "grad_norm": 0.5820350754778492, | |
| "learning_rate": 5.8225067190242925e-05, | |
| "loss": 0.5716, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.0786724031960664, | |
| "grad_norm": 0.46076842055118317, | |
| "learning_rate": 5.806133433243558e-05, | |
| "loss": 0.5753, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.0852284367957385, | |
| "grad_norm": 0.47933858405340773, | |
| "learning_rate": 5.789722046442114e-05, | |
| "loss": 0.575, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.0917844703954107, | |
| "grad_norm": 0.5168151561479127, | |
| "learning_rate": 5.7732729048239444e-05, | |
| "loss": 0.5749, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.098340503995083, | |
| "grad_norm": 0.40840899989073715, | |
| "learning_rate": 5.756786355389482e-05, | |
| "loss": 0.5802, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.104896537594755, | |
| "grad_norm": 0.44997063706673934, | |
| "learning_rate": 5.740262745928293e-05, | |
| "loss": 0.5717, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.111452571194427, | |
| "grad_norm": 0.5369629713906064, | |
| "learning_rate": 5.723702425011738e-05, | |
| "loss": 0.5751, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.1180086047940994, | |
| "grad_norm": 0.4475818760429054, | |
| "learning_rate": 5.707105741985615e-05, | |
| "loss": 0.5765, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.124564638393772, | |
| "grad_norm": 0.30932774384801376, | |
| "learning_rate": 5.6904730469627985e-05, | |
| "loss": 0.5785, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.131120671993444, | |
| "grad_norm": 0.33040348186144713, | |
| "learning_rate": 5.673804690815845e-05, | |
| "loss": 0.5768, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.1376767055931163, | |
| "grad_norm": 0.3576099667078179, | |
| "learning_rate": 5.6571010251695954e-05, | |
| "loss": 0.5815, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.1442327391927885, | |
| "grad_norm": 0.4237003547581017, | |
| "learning_rate": 5.6403624023937614e-05, | |
| "loss": 0.5747, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.1507887727924606, | |
| "grad_norm": 0.4700100373051473, | |
| "learning_rate": 5.62358917559548e-05, | |
| "loss": 0.5751, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.1573448063921328, | |
| "grad_norm": 0.39617955284785017, | |
| "learning_rate": 5.606781698611879e-05, | |
| "loss": 0.5798, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.163900839991805, | |
| "grad_norm": 0.28548306735776896, | |
| "learning_rate": 5.5899403260026006e-05, | |
| "loss": 0.5724, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.170456873591477, | |
| "grad_norm": 0.3315286342160672, | |
| "learning_rate": 5.573065413042333e-05, | |
| "loss": 0.5721, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.1770129071911493, | |
| "grad_norm": 0.3343441243180511, | |
| "learning_rate": 5.556157315713305e-05, | |
| "loss": 0.5783, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.1835689407908214, | |
| "grad_norm": 0.29129418213987884, | |
| "learning_rate": 5.5392163906977835e-05, | |
| "loss": 0.577, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.1901249743904936, | |
| "grad_norm": 0.25286013518278194, | |
| "learning_rate": 5.522242995370545e-05, | |
| "loss": 0.5698, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.1966810079901657, | |
| "grad_norm": 0.5001313540973934, | |
| "learning_rate": 5.505237487791343e-05, | |
| "loss": 0.5915, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.2032370415898384, | |
| "grad_norm": 0.2322461169124981, | |
| "learning_rate": 5.488200226697345e-05, | |
| "loss": 0.5707, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.2097930751895105, | |
| "grad_norm": 0.24729941417632514, | |
| "learning_rate": 5.471131571495574e-05, | |
| "loss": 0.5688, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.2163491087891827, | |
| "grad_norm": 0.25095902176236934, | |
| "learning_rate": 5.454031882255319e-05, | |
| "loss": 0.5804, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.222905142388855, | |
| "grad_norm": 0.26393993849887953, | |
| "learning_rate": 5.4369015197005506e-05, | |
| "loss": 0.5741, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.229461175988527, | |
| "grad_norm": 0.32581537912637687, | |
| "learning_rate": 5.419740845202292e-05, | |
| "loss": 0.5841, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.236017209588199, | |
| "grad_norm": 0.2928430349319568, | |
| "learning_rate": 5.4025502207710184e-05, | |
| "loss": 0.5763, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.2425732431878713, | |
| "grad_norm": 0.28344192622339104, | |
| "learning_rate": 5.385330009049003e-05, | |
| "loss": 0.5748, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.2491292767875435, | |
| "grad_norm": 0.2108350778995995, | |
| "learning_rate": 5.368080573302676e-05, | |
| "loss": 0.5677, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.2556853103872156, | |
| "grad_norm": 0.2654969873255356, | |
| "learning_rate": 5.3508022774149574e-05, | |
| "loss": 0.5759, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.262241343986888, | |
| "grad_norm": 0.26893594961047856, | |
| "learning_rate": 5.333495485877583e-05, | |
| "loss": 0.5713, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.26879737758656, | |
| "grad_norm": 0.2359706290249212, | |
| "learning_rate": 5.3161605637834135e-05, | |
| "loss": 0.5826, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.2753534111862326, | |
| "grad_norm": 0.2001826448668912, | |
| "learning_rate": 5.298797876818735e-05, | |
| "loss": 0.5828, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.2819094447859047, | |
| "grad_norm": 0.18687800048623612, | |
| "learning_rate": 5.2814077912555415e-05, | |
| "loss": 0.5674, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.288465478385577, | |
| "grad_norm": 0.18869821797720956, | |
| "learning_rate": 5.263990673943811e-05, | |
| "loss": 0.5795, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.295021511985249, | |
| "grad_norm": 0.18002946746844742, | |
| "learning_rate": 5.246546892303766e-05, | |
| "loss": 0.5766, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.3015775455849212, | |
| "grad_norm": 0.2532930596607861, | |
| "learning_rate": 5.229076814318122e-05, | |
| "loss": 0.5742, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.3081335791845934, | |
| "grad_norm": 0.3187321065178083, | |
| "learning_rate": 5.211580808524325e-05, | |
| "loss": 0.5739, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.3146896127842655, | |
| "grad_norm": 0.3465322974483502, | |
| "learning_rate": 5.194059244006779e-05, | |
| "loss": 0.5699, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.3212456463839377, | |
| "grad_norm": 0.48722607810944585, | |
| "learning_rate": 5.176512490389055e-05, | |
| "loss": 0.5756, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.32780167998361, | |
| "grad_norm": 1.1074199903854496, | |
| "learning_rate": 5.158940917826099e-05, | |
| "loss": 0.5832, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.334357713583282, | |
| "grad_norm": 0.3988094901700725, | |
| "learning_rate": 5.141344896996422e-05, | |
| "loss": 0.5871, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.340913747182954, | |
| "grad_norm": 0.3381422893134184, | |
| "learning_rate": 5.123724799094279e-05, | |
| "loss": 0.5908, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.3474697807826264, | |
| "grad_norm": 0.30952489856697063, | |
| "learning_rate": 5.106080995821836e-05, | |
| "loss": 0.5763, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.3540258143822985, | |
| "grad_norm": 0.3302426550471252, | |
| "learning_rate": 5.088413859381341e-05, | |
| "loss": 0.5796, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.3605818479819707, | |
| "grad_norm": 0.2513297320397786, | |
| "learning_rate": 5.070723762467254e-05, | |
| "loss": 0.5749, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.3671378815816433, | |
| "grad_norm": 0.26942873256331556, | |
| "learning_rate": 5.053011078258397e-05, | |
| "loss": 0.5782, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.3736939151813155, | |
| "grad_norm": 0.26811935982451485, | |
| "learning_rate": 5.0352761804100835e-05, | |
| "loss": 0.5893, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.3802499487809876, | |
| "grad_norm": 0.24961381636090246, | |
| "learning_rate": 5.017519443046226e-05, | |
| "loss": 0.5752, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.3868059823806598, | |
| "grad_norm": 0.2789479751757785, | |
| "learning_rate": 4.999741240751451e-05, | |
| "loss": 0.5819, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.393362015980332, | |
| "grad_norm": 0.23968714486879794, | |
| "learning_rate": 4.981941948563197e-05, | |
| "loss": 0.5864, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.399918049580004, | |
| "grad_norm": 0.3892339244230853, | |
| "learning_rate": 4.9641219419637985e-05, | |
| "loss": 0.589, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.4064740831796763, | |
| "grad_norm": 0.19471837418765098, | |
| "learning_rate": 4.94628159687257e-05, | |
| "loss": 0.5841, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.4130301167793484, | |
| "grad_norm": 0.19750205892208292, | |
| "learning_rate": 4.928421289637871e-05, | |
| "loss": 0.5735, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.4195861503790206, | |
| "grad_norm": 0.2849687278419682, | |
| "learning_rate": 4.9105413970291747e-05, | |
| "loss": 0.5806, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.4261421839786927, | |
| "grad_norm": 0.4039957657566193, | |
| "learning_rate": 4.892642296229107e-05, | |
| "loss": 0.5802, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.432698217578365, | |
| "grad_norm": 0.339476402567376, | |
| "learning_rate": 4.874724364825504e-05, | |
| "loss": 0.583, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.4392542511780375, | |
| "grad_norm": 0.2352273125693621, | |
| "learning_rate": 4.856787980803437e-05, | |
| "loss": 0.5822, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.4458102847777097, | |
| "grad_norm": 0.20048904331921943, | |
| "learning_rate": 4.8388335225372416e-05, | |
| "loss": 0.5865, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.452366318377382, | |
| "grad_norm": 0.20167869688700724, | |
| "learning_rate": 4.820861368782537e-05, | |
| "loss": 0.5753, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.458922351977054, | |
| "grad_norm": 0.30771705944153094, | |
| "learning_rate": 4.802871898668237e-05, | |
| "loss": 0.5888, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.465478385576726, | |
| "grad_norm": 0.38232619534443235, | |
| "learning_rate": 4.7848654916885446e-05, | |
| "loss": 0.5729, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.4720344191763983, | |
| "grad_norm": 0.2297372798599205, | |
| "learning_rate": 4.7668425276949546e-05, | |
| "loss": 0.5806, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.4785904527760705, | |
| "grad_norm": 0.22126295840938795, | |
| "learning_rate": 4.74880338688824e-05, | |
| "loss": 0.5761, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.4851464863757426, | |
| "grad_norm": 0.2882635600513661, | |
| "learning_rate": 4.730748449810429e-05, | |
| "loss": 0.577, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.491702519975415, | |
| "grad_norm": 0.306926273041758, | |
| "learning_rate": 4.712678097336773e-05, | |
| "loss": 0.5822, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.498258553575087, | |
| "grad_norm": 0.22726474030427726, | |
| "learning_rate": 4.694592710667723e-05, | |
| "loss": 0.5795, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.504814587174759, | |
| "grad_norm": 0.25322867792835857, | |
| "learning_rate": 4.6764926713208756e-05, | |
| "loss": 0.5762, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.5113706207744313, | |
| "grad_norm": 0.22298267407895275, | |
| "learning_rate": 4.658378361122936e-05, | |
| "loss": 0.5737, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.5179266543741035, | |
| "grad_norm": 0.20258291898015557, | |
| "learning_rate": 4.640250162201656e-05, | |
| "loss": 0.5787, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.5244826879737756, | |
| "grad_norm": 0.22752302168713118, | |
| "learning_rate": 4.622108456977773e-05, | |
| "loss": 0.58, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.5310387215734482, | |
| "grad_norm": 0.7757782276758474, | |
| "learning_rate": 4.6039536281569476e-05, | |
| "loss": 0.5823, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.5375947551731204, | |
| "grad_norm": 0.2825695671491106, | |
| "learning_rate": 4.585786058721687e-05, | |
| "loss": 0.5617, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.5441507887727925, | |
| "grad_norm": 0.18948706553634664, | |
| "learning_rate": 4.567606131923263e-05, | |
| "loss": 0.5747, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.5507068223724647, | |
| "grad_norm": 0.21721693917710894, | |
| "learning_rate": 4.549414231273633e-05, | |
| "loss": 0.5829, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.557262855972137, | |
| "grad_norm": 0.2383552245325273, | |
| "learning_rate": 4.531210740537347e-05, | |
| "loss": 0.5845, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.563818889571809, | |
| "grad_norm": 0.30583160784152247, | |
| "learning_rate": 4.512996043723453e-05, | |
| "loss": 0.5865, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.570374923171481, | |
| "grad_norm": 0.2812991313238803, | |
| "learning_rate": 4.494770525077392e-05, | |
| "loss": 0.5886, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.5769309567711534, | |
| "grad_norm": 0.2573334610716088, | |
| "learning_rate": 4.476534569072895e-05, | |
| "loss": 0.5812, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.5834869903708255, | |
| "grad_norm": 0.18065438415446586, | |
| "learning_rate": 4.458288560403878e-05, | |
| "loss": 0.5745, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.590043023970498, | |
| "grad_norm": 0.2944192743209924, | |
| "learning_rate": 4.440032883976318e-05, | |
| "loss": 0.5771, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.5965990575701703, | |
| "grad_norm": 0.23346564334137798, | |
| "learning_rate": 4.421767924900136e-05, | |
| "loss": 0.5773, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.6031550911698424, | |
| "grad_norm": 0.2274103844648281, | |
| "learning_rate": 4.403494068481074e-05, | |
| "loss": 0.5792, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.6097111247695146, | |
| "grad_norm": 0.18276656304603, | |
| "learning_rate": 4.385211700212567e-05, | |
| "loss": 0.5858, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.6162671583691868, | |
| "grad_norm": 0.18285901538118296, | |
| "learning_rate": 4.3669212057676145e-05, | |
| "loss": 0.5799, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.622823191968859, | |
| "grad_norm": 0.18097190512177241, | |
| "learning_rate": 4.348622970990634e-05, | |
| "loss": 0.5739, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.629379225568531, | |
| "grad_norm": 0.1930953632606967, | |
| "learning_rate": 4.33031738188933e-05, | |
| "loss": 0.5753, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.6359352591682033, | |
| "grad_norm": 0.17356931537284068, | |
| "learning_rate": 4.312004824626551e-05, | |
| "loss": 0.5777, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.6424912927678754, | |
| "grad_norm": 0.1734706404199846, | |
| "learning_rate": 4.293685685512142e-05, | |
| "loss": 0.5752, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.6490473263675476, | |
| "grad_norm": 0.19379812038615699, | |
| "learning_rate": 4.275360350994791e-05, | |
| "loss": 0.5795, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.6556033599672197, | |
| "grad_norm": 0.1660813082194996, | |
| "learning_rate": 4.257029207653881e-05, | |
| "loss": 0.5826, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.662159393566892, | |
| "grad_norm": 0.19954108364376924, | |
| "learning_rate": 4.238692642191336e-05, | |
| "loss": 0.5825, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.668715427166564, | |
| "grad_norm": 0.19322396699850317, | |
| "learning_rate": 4.220351041423462e-05, | |
| "loss": 0.5841, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.6752714607662362, | |
| "grad_norm": 0.17735120440062302, | |
| "learning_rate": 4.202004792272785e-05, | |
| "loss": 0.5798, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.6818274943659084, | |
| "grad_norm": 0.17855572539839387, | |
| "learning_rate": 4.183654281759888e-05, | |
| "loss": 0.5785, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.6883835279655806, | |
| "grad_norm": 0.1998138629729567, | |
| "learning_rate": 4.165299896995253e-05, | |
| "loss": 0.5782, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.694939561565253, | |
| "grad_norm": 0.19733364770879444, | |
| "learning_rate": 4.1469420251710905e-05, | |
| "loss": 0.5738, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.7014955951649253, | |
| "grad_norm": 0.17455960566932077, | |
| "learning_rate": 4.128581053553169e-05, | |
| "loss": 0.5804, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.7080516287645975, | |
| "grad_norm": 0.2277904402914904, | |
| "learning_rate": 4.110217369472649e-05, | |
| "loss": 0.5823, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.7146076623642696, | |
| "grad_norm": 0.17727829284042818, | |
| "learning_rate": 4.091851360317912e-05, | |
| "loss": 0.5806, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.721163695963942, | |
| "grad_norm": 0.20033803661764574, | |
| "learning_rate": 4.07348341352639e-05, | |
| "loss": 0.5737, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.727719729563614, | |
| "grad_norm": 0.28585140789050134, | |
| "learning_rate": 4.055113916576386e-05, | |
| "loss": 0.5922, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.734275763163286, | |
| "grad_norm": 0.1770226397602663, | |
| "learning_rate": 4.0367432569789065e-05, | |
| "loss": 0.5708, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.7408317967629583, | |
| "grad_norm": 0.19810748724107435, | |
| "learning_rate": 4.0183718222694823e-05, | |
| "loss": 0.5822, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.7473878303626305, | |
| "grad_norm": 0.16916185476319684, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5781, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.753943863962303, | |
| "grad_norm": 0.1832943105404772, | |
| "learning_rate": 3.9816281777305176e-05, | |
| "loss": 0.5812, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.7604998975619752, | |
| "grad_norm": 0.1502272563296327, | |
| "learning_rate": 3.963256743021095e-05, | |
| "loss": 0.5716, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.7670559311616474, | |
| "grad_norm": 0.21339511828926633, | |
| "learning_rate": 3.944886083423615e-05, | |
| "loss": 0.5801, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.7736119647613195, | |
| "grad_norm": 0.177457878421534, | |
| "learning_rate": 3.92651658647361e-05, | |
| "loss": 0.5747, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.7801679983609917, | |
| "grad_norm": 0.1839395596929418, | |
| "learning_rate": 3.908148639682089e-05, | |
| "loss": 0.5843, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.786724031960664, | |
| "grad_norm": 0.19753047943291085, | |
| "learning_rate": 3.889782630527353e-05, | |
| "loss": 0.574, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.793280065560336, | |
| "grad_norm": 0.14640408330908058, | |
| "learning_rate": 3.8714189464468334e-05, | |
| "loss": 0.5786, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.799836099160008, | |
| "grad_norm": 0.19103262838391485, | |
| "learning_rate": 3.853057974828911e-05, | |
| "loss": 0.5798, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.8063921327596804, | |
| "grad_norm": 0.1673182640044429, | |
| "learning_rate": 3.834700103004747e-05, | |
| "loss": 0.5729, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.8129481663593525, | |
| "grad_norm": 0.20105657869010776, | |
| "learning_rate": 3.816345718240113e-05, | |
| "loss": 0.5744, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.8195041999590247, | |
| "grad_norm": 0.13814925979553985, | |
| "learning_rate": 3.797995207727217e-05, | |
| "loss": 0.5694, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.826060233558697, | |
| "grad_norm": 0.16253212459987165, | |
| "learning_rate": 3.779648958576538e-05, | |
| "loss": 0.5732, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.832616267158369, | |
| "grad_norm": 0.14306544183315967, | |
| "learning_rate": 3.7613073578086644e-05, | |
| "loss": 0.5737, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.839172300758041, | |
| "grad_norm": 0.16148301199590984, | |
| "learning_rate": 3.74297079234612e-05, | |
| "loss": 0.5696, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.8457283343577133, | |
| "grad_norm": 0.1591738101867343, | |
| "learning_rate": 3.7246396490052117e-05, | |
| "loss": 0.5752, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.852284367957386, | |
| "grad_norm": 0.14229281054940401, | |
| "learning_rate": 3.706314314487859e-05, | |
| "loss": 0.5778, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.858840401557058, | |
| "grad_norm": 0.18401979626969228, | |
| "learning_rate": 3.687995175373449e-05, | |
| "loss": 0.5785, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.8653964351567303, | |
| "grad_norm": 0.14532042872787476, | |
| "learning_rate": 3.669682618110671e-05, | |
| "loss": 0.5717, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.8719524687564024, | |
| "grad_norm": 0.14280350348324158, | |
| "learning_rate": 3.6513770290093674e-05, | |
| "loss": 0.5737, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.8785085023560746, | |
| "grad_norm": 0.17828968160150296, | |
| "learning_rate": 3.6330787942323855e-05, | |
| "loss": 0.5739, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.8850645359557467, | |
| "grad_norm": 0.1680340039645173, | |
| "learning_rate": 3.614788299787434e-05, | |
| "loss": 0.5719, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.891620569555419, | |
| "grad_norm": 0.16773501838978377, | |
| "learning_rate": 3.5965059315189274e-05, | |
| "loss": 0.5861, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.898176603155091, | |
| "grad_norm": 0.1466119984148444, | |
| "learning_rate": 3.578232075099866e-05, | |
| "loss": 0.5719, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.9047326367547632, | |
| "grad_norm": 0.18860006392370388, | |
| "learning_rate": 3.559967116023683e-05, | |
| "loss": 0.573, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.911288670354436, | |
| "grad_norm": 0.17180023728534166, | |
| "learning_rate": 3.541711439596122e-05, | |
| "loss": 0.5715, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.917844703954108, | |
| "grad_norm": 0.16863095150334853, | |
| "learning_rate": 3.523465430927106e-05, | |
| "loss": 0.587, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.92440073755378, | |
| "grad_norm": 0.18865259295148487, | |
| "learning_rate": 3.5052294749226094e-05, | |
| "loss": 0.5705, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.9309567711534523, | |
| "grad_norm": 0.3900404738786457, | |
| "learning_rate": 3.4870039562765475e-05, | |
| "loss": 0.5808, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.9375128047531245, | |
| "grad_norm": 0.1577690647524753, | |
| "learning_rate": 3.4687892594626536e-05, | |
| "loss": 0.5767, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.9440688383527966, | |
| "grad_norm": 0.17930301136018978, | |
| "learning_rate": 3.4505857687263675e-05, | |
| "loss": 0.5794, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.950624871952469, | |
| "grad_norm": 0.21442963322771297, | |
| "learning_rate": 3.432393868076739e-05, | |
| "loss": 0.5813, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.957180905552141, | |
| "grad_norm": 0.5263365227065998, | |
| "learning_rate": 3.414213941278314e-05, | |
| "loss": 0.5712, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.963736939151813, | |
| "grad_norm": 0.24486881857008144, | |
| "learning_rate": 3.396046371843052e-05, | |
| "loss": 0.5862, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.9702929727514853, | |
| "grad_norm": 0.14980108475089587, | |
| "learning_rate": 3.377891543022229e-05, | |
| "loss": 0.5801, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.9768490063511575, | |
| "grad_norm": 0.20536856095424458, | |
| "learning_rate": 3.3597498377983444e-05, | |
| "loss": 0.5802, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.9834050399508296, | |
| "grad_norm": 0.18782303046647886, | |
| "learning_rate": 3.341621638877064e-05, | |
| "loss": 0.5784, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.989961073550502, | |
| "grad_norm": 0.14515446375387317, | |
| "learning_rate": 3.3235073286791264e-05, | |
| "loss": 0.5754, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.996517107150174, | |
| "grad_norm": 0.16161434398106808, | |
| "learning_rate": 3.305407289332279e-05, | |
| "loss": 0.5756, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 3.0055316533497236, | |
| "grad_norm": 0.3533323244538683, | |
| "learning_rate": 3.287321902663229e-05, | |
| "loss": 1.0491, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 3.0120876869493958, | |
| "grad_norm": 0.3045514737546258, | |
| "learning_rate": 3.269251550189573e-05, | |
| "loss": 0.5446, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 3.018643720549068, | |
| "grad_norm": 0.25472049022532234, | |
| "learning_rate": 3.251196613111761e-05, | |
| "loss": 0.5407, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 3.02519975414874, | |
| "grad_norm": 0.25510858573604617, | |
| "learning_rate": 3.2331574723050474e-05, | |
| "loss": 0.5411, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.0317557877484123, | |
| "grad_norm": 0.38483951060765287, | |
| "learning_rate": 3.2151345083114574e-05, | |
| "loss": 0.5396, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 3.0383118213480844, | |
| "grad_norm": 0.2558618267692145, | |
| "learning_rate": 3.197128101331764e-05, | |
| "loss": 0.5349, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 3.0448678549477566, | |
| "grad_norm": 0.23801295747592094, | |
| "learning_rate": 3.179138631217463e-05, | |
| "loss": 0.5439, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 3.0514238885474287, | |
| "grad_norm": 0.2746658371274729, | |
| "learning_rate": 3.161166477462759e-05, | |
| "loss": 0.5463, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 3.057979922147101, | |
| "grad_norm": 0.23307565195751595, | |
| "learning_rate": 3.1432120191965647e-05, | |
| "loss": 0.538, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 3.064535955746773, | |
| "grad_norm": 0.23849808366960823, | |
| "learning_rate": 3.125275635174497e-05, | |
| "loss": 0.5431, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 3.0710919893464452, | |
| "grad_norm": 0.22477045443908192, | |
| "learning_rate": 3.1073577037708935e-05, | |
| "loss": 0.5422, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 3.0776480229461174, | |
| "grad_norm": 0.2060382177048335, | |
| "learning_rate": 3.089458602970828e-05, | |
| "loss": 0.5446, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.08420405654579, | |
| "grad_norm": 0.22200054079896087, | |
| "learning_rate": 3.0715787103621294e-05, | |
| "loss": 0.5479, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 3.090760090145462, | |
| "grad_norm": 0.17039781849738478, | |
| "learning_rate": 3.0537184031274306e-05, | |
| "loss": 0.5305, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.0973161237451343, | |
| "grad_norm": 0.16941181980633563, | |
| "learning_rate": 3.0358780580362025e-05, | |
| "loss": 0.5261, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 3.1038721573448065, | |
| "grad_norm": 0.17660138294537184, | |
| "learning_rate": 3.0180580514368037e-05, | |
| "loss": 0.5312, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 3.1104281909444786, | |
| "grad_norm": 0.18073996525501446, | |
| "learning_rate": 3.0002587592485497e-05, | |
| "loss": 0.5371, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 3.116984224544151, | |
| "grad_norm": 0.3988440151798082, | |
| "learning_rate": 2.9824805569537747e-05, | |
| "loss": 0.5513, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 3.123540258143823, | |
| "grad_norm": 0.20423237269747027, | |
| "learning_rate": 2.9647238195899168e-05, | |
| "loss": 0.5375, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.130096291743495, | |
| "grad_norm": 0.17213193598614746, | |
| "learning_rate": 2.9469889217416045e-05, | |
| "loss": 0.5437, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 3.1366523253431673, | |
| "grad_norm": 0.21207611299854595, | |
| "learning_rate": 2.9292762375327483e-05, | |
| "loss": 0.5339, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 3.1432083589428395, | |
| "grad_norm": 0.15310274061749113, | |
| "learning_rate": 2.9115861406186593e-05, | |
| "loss": 0.5303, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 3.1497643925425116, | |
| "grad_norm": 0.1877335953144904, | |
| "learning_rate": 2.8939190041781647e-05, | |
| "loss": 0.5319, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 3.1563204261421838, | |
| "grad_norm": 0.1458292108542802, | |
| "learning_rate": 2.8762752009057232e-05, | |
| "loss": 0.54, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.1628764597418564, | |
| "grad_norm": 0.17442833234197713, | |
| "learning_rate": 2.85865510300358e-05, | |
| "loss": 0.5377, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 3.1694324933415285, | |
| "grad_norm": 0.15457635774978962, | |
| "learning_rate": 2.841059082173902e-05, | |
| "loss": 0.5389, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 3.1759885269412007, | |
| "grad_norm": 0.16823476302413515, | |
| "learning_rate": 2.823487509610946e-05, | |
| "loss": 0.5435, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 3.182544560540873, | |
| "grad_norm": 0.14604985181804828, | |
| "learning_rate": 2.805940755993223e-05, | |
| "loss": 0.5377, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.189100594140545, | |
| "grad_norm": 0.14560052298572645, | |
| "learning_rate": 2.7884191914756757e-05, | |
| "loss": 0.5409, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.195656627740217, | |
| "grad_norm": 0.1472508058071573, | |
| "learning_rate": 2.770923185681878e-05, | |
| "loss": 0.5455, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.2022126613398894, | |
| "grad_norm": 0.13624234976069768, | |
| "learning_rate": 2.7534531076962356e-05, | |
| "loss": 0.5433, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 3.2087686949395615, | |
| "grad_norm": 0.15264871662264248, | |
| "learning_rate": 2.7360093260561904e-05, | |
| "loss": 0.5372, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.2153247285392337, | |
| "grad_norm": 0.1462659967641793, | |
| "learning_rate": 2.7185922087444602e-05, | |
| "loss": 0.538, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.221880762138906, | |
| "grad_norm": 0.16303592219574492, | |
| "learning_rate": 2.7012021231812666e-05, | |
| "loss": 0.542, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.228436795738578, | |
| "grad_norm": 0.14173407966037618, | |
| "learning_rate": 2.6838394362165875e-05, | |
| "loss": 0.5387, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.23499282933825, | |
| "grad_norm": 0.15432095686939976, | |
| "learning_rate": 2.6665045141224193e-05, | |
| "loss": 0.5377, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.2415488629379228, | |
| "grad_norm": 0.1482561893469378, | |
| "learning_rate": 2.6491977225850446e-05, | |
| "loss": 0.5371, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.248104896537595, | |
| "grad_norm": 0.16624560752182815, | |
| "learning_rate": 2.6319194266973256e-05, | |
| "loss": 0.5359, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.254660930137267, | |
| "grad_norm": 0.15016468812997047, | |
| "learning_rate": 2.6146699909509984e-05, | |
| "loss": 0.5411, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.2612169637369393, | |
| "grad_norm": 0.2450944209511445, | |
| "learning_rate": 2.597449779228983e-05, | |
| "loss": 0.5429, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.2677729973366114, | |
| "grad_norm": 0.2412941457078939, | |
| "learning_rate": 2.580259154797709e-05, | |
| "loss": 0.5343, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.2743290309362836, | |
| "grad_norm": 0.14493021044588159, | |
| "learning_rate": 2.563098480299451e-05, | |
| "loss": 0.5409, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.2808850645359557, | |
| "grad_norm": 0.16960807003233563, | |
| "learning_rate": 2.5459681177446803e-05, | |
| "loss": 0.5389, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.287441098135628, | |
| "grad_norm": 0.14869909968414688, | |
| "learning_rate": 2.5288684285044283e-05, | |
| "loss": 0.5353, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.2939971317353, | |
| "grad_norm": 0.14777225005600433, | |
| "learning_rate": 2.5117997733026566e-05, | |
| "loss": 0.5393, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.3005531653349722, | |
| "grad_norm": 0.15139629935458157, | |
| "learning_rate": 2.4947625122086585e-05, | |
| "loss": 0.5435, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.3071091989346444, | |
| "grad_norm": 0.1392849654786053, | |
| "learning_rate": 2.477757004629456e-05, | |
| "loss": 0.5375, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.3136652325343166, | |
| "grad_norm": 0.1367733939357107, | |
| "learning_rate": 2.460783609302218e-05, | |
| "loss": 0.5426, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.3202212661339887, | |
| "grad_norm": 0.12699654005182315, | |
| "learning_rate": 2.4438426842866966e-05, | |
| "loss": 0.5353, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.3267772997336613, | |
| "grad_norm": 0.1390969751788505, | |
| "learning_rate": 2.4269345869576676e-05, | |
| "loss": 0.539, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.12248688460845146, | |
| "learning_rate": 2.4100596739973993e-05, | |
| "loss": 0.5392, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.3398893669330056, | |
| "grad_norm": 0.13805750150926843, | |
| "learning_rate": 2.393218301388123e-05, | |
| "loss": 0.5401, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.346445400532678, | |
| "grad_norm": 0.12180259855826225, | |
| "learning_rate": 2.3764108244045212e-05, | |
| "loss": 0.5465, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.35300143413235, | |
| "grad_norm": 0.12245696463081471, | |
| "learning_rate": 2.35963759760624e-05, | |
| "loss": 0.5311, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.359557467732022, | |
| "grad_norm": 0.12896584095171493, | |
| "learning_rate": 2.342898974830405e-05, | |
| "loss": 0.5391, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.3661135013316943, | |
| "grad_norm": 0.12281667944621683, | |
| "learning_rate": 2.3261953091841553e-05, | |
| "loss": 0.5335, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.3726695349313665, | |
| "grad_norm": 0.11829751242108535, | |
| "learning_rate": 2.3095269530372032e-05, | |
| "loss": 0.5447, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.3792255685310386, | |
| "grad_norm": 0.13317495284187636, | |
| "learning_rate": 2.2928942580143855e-05, | |
| "loss": 0.5438, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.3857816021307108, | |
| "grad_norm": 0.12369993983516651, | |
| "learning_rate": 2.276297574988263e-05, | |
| "loss": 0.5433, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.392337635730383, | |
| "grad_norm": 0.1246149968311202, | |
| "learning_rate": 2.2597372540717083e-05, | |
| "loss": 0.5412, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.3988936693300555, | |
| "grad_norm": 0.12919720585139893, | |
| "learning_rate": 2.2432136446105192e-05, | |
| "loss": 0.5442, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.4054497029297277, | |
| "grad_norm": 0.12089802144087428, | |
| "learning_rate": 2.226727095176057e-05, | |
| "loss": 0.539, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.4120057365294, | |
| "grad_norm": 0.13804336756907107, | |
| "learning_rate": 2.210277953557888e-05, | |
| "loss": 0.5462, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.418561770129072, | |
| "grad_norm": 0.12982437086238163, | |
| "learning_rate": 2.1938665667564435e-05, | |
| "loss": 0.5344, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.425117803728744, | |
| "grad_norm": 0.12345004982954612, | |
| "learning_rate": 2.177493280975708e-05, | |
| "loss": 0.5465, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.4316738373284164, | |
| "grad_norm": 0.14573537922186017, | |
| "learning_rate": 2.1611584416159106e-05, | |
| "loss": 0.5323, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.4382298709280885, | |
| "grad_norm": 0.1097589192258888, | |
| "learning_rate": 2.1448623932662377e-05, | |
| "loss": 0.5437, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.4447859045277607, | |
| "grad_norm": 0.1317797633987106, | |
| "learning_rate": 2.1286054796975696e-05, | |
| "loss": 0.5377, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.451341938127433, | |
| "grad_norm": 0.13321471541724095, | |
| "learning_rate": 2.1123880438552187e-05, | |
| "loss": 0.5518, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.457897971727105, | |
| "grad_norm": 0.1352447131480959, | |
| "learning_rate": 2.096210427851706e-05, | |
| "loss": 0.532, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.464454005326777, | |
| "grad_norm": 0.13508733978142928, | |
| "learning_rate": 2.0800729729595385e-05, | |
| "loss": 0.5314, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.4710100389264493, | |
| "grad_norm": 0.1380525297606027, | |
| "learning_rate": 2.063976019604006e-05, | |
| "loss": 0.5438, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.4775660725261215, | |
| "grad_norm": 0.13529984461100034, | |
| "learning_rate": 2.0479199073560084e-05, | |
| "loss": 0.5451, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.4841221061257936, | |
| "grad_norm": 0.13383345341162217, | |
| "learning_rate": 2.0319049749248876e-05, | |
| "loss": 0.5335, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.4906781397254663, | |
| "grad_norm": 0.12966276679499278, | |
| "learning_rate": 2.0159315601512817e-05, | |
| "loss": 0.5404, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.4972341733251384, | |
| "grad_norm": 0.12661938479816495, | |
| "learning_rate": 2.0000000000000012e-05, | |
| "loss": 0.5361, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.5037902069248106, | |
| "grad_norm": 0.25950697427544217, | |
| "learning_rate": 1.9841106305529133e-05, | |
| "loss": 0.5475, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.5103462405244827, | |
| "grad_norm": 0.12388736409274281, | |
| "learning_rate": 1.9682637870018638e-05, | |
| "loss": 0.5433, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.516902274124155, | |
| "grad_norm": 0.13692640136699888, | |
| "learning_rate": 1.9524598036415973e-05, | |
| "loss": 0.5379, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.523458307723827, | |
| "grad_norm": 0.22971792027818427, | |
| "learning_rate": 1.9366990138627054e-05, | |
| "loss": 0.5354, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.5300143413234992, | |
| "grad_norm": 0.13477440715558586, | |
| "learning_rate": 1.9209817501445978e-05, | |
| "loss": 0.5408, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.5365703749231714, | |
| "grad_norm": 0.12000818819921334, | |
| "learning_rate": 1.9053083440484887e-05, | |
| "loss": 0.539, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.5431264085228436, | |
| "grad_norm": 0.18018036064763043, | |
| "learning_rate": 1.889679126210397e-05, | |
| "loss": 0.54, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.5496824421225157, | |
| "grad_norm": 0.13187429164193115, | |
| "learning_rate": 1.8740944263341773e-05, | |
| "loss": 0.5336, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.5562384757221883, | |
| "grad_norm": 0.1115344100782877, | |
| "learning_rate": 1.8585545731845584e-05, | |
| "loss": 0.5457, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.5627945093218605, | |
| "grad_norm": 0.4821099792222361, | |
| "learning_rate": 1.8430598945802156e-05, | |
| "loss": 0.5429, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.5693505429215326, | |
| "grad_norm": 0.13726412619312758, | |
| "learning_rate": 1.8276107173868503e-05, | |
| "loss": 0.551, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.575906576521205, | |
| "grad_norm": 0.11727031408692318, | |
| "learning_rate": 1.8122073675102935e-05, | |
| "loss": 0.5315, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.582462610120877, | |
| "grad_norm": 0.12442269217723008, | |
| "learning_rate": 1.7968501698896346e-05, | |
| "loss": 0.5338, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.589018643720549, | |
| "grad_norm": 0.11894782465239562, | |
| "learning_rate": 1.781539448490365e-05, | |
| "loss": 0.5365, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.5955746773202213, | |
| "grad_norm": 0.11002626026600501, | |
| "learning_rate": 1.7662755262975432e-05, | |
| "loss": 0.5404, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.6021307109198935, | |
| "grad_norm": 0.13251223380465485, | |
| "learning_rate": 1.7510587253089842e-05, | |
| "loss": 0.543, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.6086867445195656, | |
| "grad_norm": 0.11425745596834401, | |
| "learning_rate": 1.7358893665284595e-05, | |
| "loss": 0.5345, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.6152427781192378, | |
| "grad_norm": 0.11826825954302657, | |
| "learning_rate": 1.7207677699589355e-05, | |
| "loss": 0.5463, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.62179881171891, | |
| "grad_norm": 0.10903975348972718, | |
| "learning_rate": 1.7056942545958167e-05, | |
| "loss": 0.5396, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.628354845318582, | |
| "grad_norm": 0.11693940603496059, | |
| "learning_rate": 1.690669138420215e-05, | |
| "loss": 0.5305, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.6349108789182543, | |
| "grad_norm": 0.1142560549805311, | |
| "learning_rate": 1.6756927383922473e-05, | |
| "loss": 0.5391, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.6414669125179264, | |
| "grad_norm": 0.10839349884507322, | |
| "learning_rate": 1.6607653704443457e-05, | |
| "loss": 0.542, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.6480229461175986, | |
| "grad_norm": 0.11225360540051456, | |
| "learning_rate": 1.6458873494745926e-05, | |
| "loss": 0.5388, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.654578979717271, | |
| "grad_norm": 0.11188297317863019, | |
| "learning_rate": 1.6310589893400804e-05, | |
| "loss": 0.5381, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.6611350133169434, | |
| "grad_norm": 0.7105498519897432, | |
| "learning_rate": 1.6162806028502852e-05, | |
| "loss": 0.5411, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.6676910469166155, | |
| "grad_norm": 0.11475316554461859, | |
| "learning_rate": 1.601552501760473e-05, | |
| "loss": 0.5416, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.6742470805162877, | |
| "grad_norm": 0.12194606250202103, | |
| "learning_rate": 1.5868749967651252e-05, | |
| "loss": 0.5396, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.68080311411596, | |
| "grad_norm": 0.12661866626370497, | |
| "learning_rate": 1.5722483974913737e-05, | |
| "loss": 0.531, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.687359147715632, | |
| "grad_norm": 0.11256332358317793, | |
| "learning_rate": 1.5576730124924822e-05, | |
| "loss": 0.5418, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.693915181315304, | |
| "grad_norm": 0.11015606275564016, | |
| "learning_rate": 1.5431491492413288e-05, | |
| "loss": 0.5411, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.7004712149149763, | |
| "grad_norm": 0.12074411265164606, | |
| "learning_rate": 1.528677114123923e-05, | |
| "loss": 0.53, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.7070272485146485, | |
| "grad_norm": 0.10893671793883805, | |
| "learning_rate": 1.5142572124329418e-05, | |
| "loss": 0.5397, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.713583282114321, | |
| "grad_norm": 0.11746065306540288, | |
| "learning_rate": 1.4998897483612865e-05, | |
| "loss": 0.5423, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.7201393157139933, | |
| "grad_norm": 0.1126528745871064, | |
| "learning_rate": 1.4855750249956718e-05, | |
| "loss": 0.5426, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.7266953493136654, | |
| "grad_norm": 0.3165608272055915, | |
| "learning_rate": 1.4713133443102283e-05, | |
| "loss": 0.5428, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.7332513829133376, | |
| "grad_norm": 0.11800782137431694, | |
| "learning_rate": 1.457105007160129e-05, | |
| "loss": 0.5443, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.7398074165130097, | |
| "grad_norm": 0.11122501505766147, | |
| "learning_rate": 1.44295031327525e-05, | |
| "loss": 0.5397, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.746363450112682, | |
| "grad_norm": 0.12798481597310407, | |
| "learning_rate": 1.4288495612538427e-05, | |
| "loss": 0.5457, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.752919483712354, | |
| "grad_norm": 0.10441587675286511, | |
| "learning_rate": 1.4148030485562362e-05, | |
| "loss": 0.5371, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.7594755173120262, | |
| "grad_norm": 0.12743357362001942, | |
| "learning_rate": 1.4008110714985623e-05, | |
| "loss": 0.5392, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.7660315509116984, | |
| "grad_norm": 0.1114224145991442, | |
| "learning_rate": 1.3868739252465017e-05, | |
| "loss": 0.5368, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.7725875845113706, | |
| "grad_norm": 0.10668882720776016, | |
| "learning_rate": 1.3729919038090627e-05, | |
| "loss": 0.5386, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.7791436181110427, | |
| "grad_norm": 0.11554119750738572, | |
| "learning_rate": 1.3591653000323764e-05, | |
| "loss": 0.5336, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.785699651710715, | |
| "grad_norm": 0.11456186328555522, | |
| "learning_rate": 1.3453944055935151e-05, | |
| "loss": 0.5447, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.792255685310387, | |
| "grad_norm": 0.12119615336966778, | |
| "learning_rate": 1.3316795109943476e-05, | |
| "loss": 0.5333, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.798811718910059, | |
| "grad_norm": 0.1097823442621975, | |
| "learning_rate": 1.3180209055554043e-05, | |
| "loss": 0.5353, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.8053677525097314, | |
| "grad_norm": 0.12113771390183198, | |
| "learning_rate": 1.3044188774097757e-05, | |
| "loss": 0.5406, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.811923786109404, | |
| "grad_norm": 0.1076103282360743, | |
| "learning_rate": 1.2908737134970367e-05, | |
| "loss": 0.5428, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.818479819709076, | |
| "grad_norm": 0.10778481828407349, | |
| "learning_rate": 1.2773856995571858e-05, | |
| "loss": 0.5346, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.8250358533087483, | |
| "grad_norm": 0.10605992762588787, | |
| "learning_rate": 1.2639551201246278e-05, | |
| "loss": 0.5394, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.8315918869084205, | |
| "grad_norm": 0.10492552888703595, | |
| "learning_rate": 1.2505822585221665e-05, | |
| "loss": 0.5402, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.8381479205080926, | |
| "grad_norm": 0.10835031468844004, | |
| "learning_rate": 1.2372673968550229e-05, | |
| "loss": 0.5449, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.8447039541077648, | |
| "grad_norm": 0.10610757084756944, | |
| "learning_rate": 1.2240108160048934e-05, | |
| "loss": 0.5366, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.851259987707437, | |
| "grad_norm": 0.10287232050693294, | |
| "learning_rate": 1.2108127956240186e-05, | |
| "loss": 0.5367, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.857816021307109, | |
| "grad_norm": 0.10929614703844563, | |
| "learning_rate": 1.1976736141292853e-05, | |
| "loss": 0.5335, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.8643720549067813, | |
| "grad_norm": 0.1204731707453792, | |
| "learning_rate": 1.1845935486963546e-05, | |
| "loss": 0.5404, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.870928088506454, | |
| "grad_norm": 0.10037277898866681, | |
| "learning_rate": 1.1715728752538103e-05, | |
| "loss": 0.5386, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.877484122106126, | |
| "grad_norm": 0.10755482937489974, | |
| "learning_rate": 1.158611868477344e-05, | |
| "loss": 0.5396, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.884040155705798, | |
| "grad_norm": 0.1125655323891997, | |
| "learning_rate": 1.1457108017839587e-05, | |
| "loss": 0.5522, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.8905961893054704, | |
| "grad_norm": 0.09908512763395612, | |
| "learning_rate": 1.1328699473261957e-05, | |
| "loss": 0.5389, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.8971522229051425, | |
| "grad_norm": 0.10011512841325966, | |
| "learning_rate": 1.1200895759864027e-05, | |
| "loss": 0.5402, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.9037082565048147, | |
| "grad_norm": 0.11190228493508152, | |
| "learning_rate": 1.107369957371013e-05, | |
| "loss": 0.5402, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.910264290104487, | |
| "grad_norm": 0.0997294109524961, | |
| "learning_rate": 1.09471135980486e-05, | |
| "loss": 0.5356, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.916820323704159, | |
| "grad_norm": 0.10926000465284759, | |
| "learning_rate": 1.0821140503255174e-05, | |
| "loss": 0.5459, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.923376357303831, | |
| "grad_norm": 0.10485038006440338, | |
| "learning_rate": 1.0695782946776619e-05, | |
| "loss": 0.5407, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.9299323909035033, | |
| "grad_norm": 0.10291841117205842, | |
| "learning_rate": 1.0571043573074737e-05, | |
| "loss": 0.5373, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.9364884245031755, | |
| "grad_norm": 0.10153144309813497, | |
| "learning_rate": 1.0446925013570545e-05, | |
| "loss": 0.5408, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.9430444581028476, | |
| "grad_norm": 0.10605125481156527, | |
| "learning_rate": 1.0323429886588743e-05, | |
| "loss": 0.5411, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.94960049170252, | |
| "grad_norm": 0.10215451404049354, | |
| "learning_rate": 1.020056079730252e-05, | |
| "loss": 0.5428, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.956156525302192, | |
| "grad_norm": 0.10818454985255951, | |
| "learning_rate": 1.0078320337678584e-05, | |
| "loss": 0.5396, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.962712558901864, | |
| "grad_norm": 0.10188342720575627, | |
| "learning_rate": 9.956711086422471e-06, | |
| "loss": 0.5486, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.9692685925015363, | |
| "grad_norm": 0.10148315246310981, | |
| "learning_rate": 9.835735608924155e-06, | |
| "loss": 0.5341, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.975824626101209, | |
| "grad_norm": 0.09241997326882184, | |
| "learning_rate": 9.715396457203918e-06, | |
| "loss": 0.5389, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.982380659700881, | |
| "grad_norm": 0.102047541160635, | |
| "learning_rate": 9.595696169858542e-06, | |
| "loss": 0.5343, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.9889366933005532, | |
| "grad_norm": 0.16329932937909056, | |
| "learning_rate": 9.476637272007748e-06, | |
| "loss": 0.5414, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.9954927269002254, | |
| "grad_norm": 0.10040695165913859, | |
| "learning_rate": 9.358222275240884e-06, | |
| "loss": 0.5365, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 4.006556033599672, | |
| "grad_norm": 0.16186764492729486, | |
| "learning_rate": 9.24045367756401e-06, | |
| "loss": 0.5171, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 4.013112067199344, | |
| "grad_norm": 0.1322033843585489, | |
| "learning_rate": 9.123333963347166e-06, | |
| "loss": 0.5209, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.0196681007990165, | |
| "grad_norm": 0.12245478327966862, | |
| "learning_rate": 9.006865603271952e-06, | |
| "loss": 0.5112, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 4.026224134398689, | |
| "grad_norm": 0.117065406177187, | |
| "learning_rate": 8.89105105427945e-06, | |
| "loss": 0.5089, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 4.032780167998361, | |
| "grad_norm": 0.1358673718015058, | |
| "learning_rate": 8.775892759518321e-06, | |
| "loss": 0.5245, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 4.039336201598033, | |
| "grad_norm": 0.1436375868810231, | |
| "learning_rate": 8.661393148293355e-06, | |
| "loss": 0.523, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 4.045892235197705, | |
| "grad_norm": 0.11924450152050672, | |
| "learning_rate": 8.547554636014177e-06, | |
| "loss": 0.506, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 4.052448268797377, | |
| "grad_norm": 0.11988650232489285, | |
| "learning_rate": 8.434379624144261e-06, | |
| "loss": 0.515, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 4.0590043023970495, | |
| "grad_norm": 0.13738230782732197, | |
| "learning_rate": 8.321870500150347e-06, | |
| "loss": 0.511, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 4.065560335996722, | |
| "grad_norm": 0.1371441628587204, | |
| "learning_rate": 8.210029637452016e-06, | |
| "loss": 0.5139, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 4.072116369596394, | |
| "grad_norm": 0.3628915461879963, | |
| "learning_rate": 8.098859395371641e-06, | |
| "loss": 0.5165, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 4.078672403196066, | |
| "grad_norm": 0.11429063516858927, | |
| "learning_rate": 7.988362119084642e-06, | |
| "loss": 0.5136, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.085228436795739, | |
| "grad_norm": 0.13045931327904303, | |
| "learning_rate": 7.87854013956994e-06, | |
| "loss": 0.5097, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 4.091784470395411, | |
| "grad_norm": 0.11629419650039642, | |
| "learning_rate": 7.769395773560874e-06, | |
| "loss": 0.5157, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 4.098340503995083, | |
| "grad_norm": 0.11362363257154101, | |
| "learning_rate": 7.660931323496283e-06, | |
| "loss": 0.5188, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 4.1048965375947555, | |
| "grad_norm": 0.10992983384699095, | |
| "learning_rate": 7.553149077471915e-06, | |
| "loss": 0.5124, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 4.111452571194428, | |
| "grad_norm": 0.10426021171852758, | |
| "learning_rate": 7.446051309192204e-06, | |
| "loss": 0.5142, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.1180086047941, | |
| "grad_norm": 0.10863479923168164, | |
| "learning_rate": 7.3396402779222845e-06, | |
| "loss": 0.5127, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 4.124564638393772, | |
| "grad_norm": 0.10635132221465905, | |
| "learning_rate": 7.233918228440324e-06, | |
| "loss": 0.5162, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 4.131120671993444, | |
| "grad_norm": 0.09912102469646186, | |
| "learning_rate": 7.128887390990198e-06, | |
| "loss": 0.5224, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 4.137676705593116, | |
| "grad_norm": 0.10290328673729038, | |
| "learning_rate": 7.024549981234377e-06, | |
| "loss": 0.5217, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 4.1442327391927885, | |
| "grad_norm": 0.1038941435526061, | |
| "learning_rate": 6.9209082002072725e-06, | |
| "loss": 0.5133, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.150788772792461, | |
| "grad_norm": 0.09603936143418888, | |
| "learning_rate": 6.817964234268748e-06, | |
| "loss": 0.5176, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 4.157344806392133, | |
| "grad_norm": 0.11813529879188972, | |
| "learning_rate": 6.715720255058e-06, | |
| "loss": 0.5152, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 4.163900839991805, | |
| "grad_norm": 0.09790007822885208, | |
| "learning_rate": 6.614178419447781e-06, | |
| "loss": 0.5158, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 4.170456873591477, | |
| "grad_norm": 0.10792344691656378, | |
| "learning_rate": 6.513340869498859e-06, | |
| "loss": 0.5122, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 4.177012907191149, | |
| "grad_norm": 0.24107917853631503, | |
| "learning_rate": 6.4132097324148556e-06, | |
| "loss": 0.5161, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.183568940790821, | |
| "grad_norm": 0.09669004690900093, | |
| "learning_rate": 6.313787120497376e-06, | |
| "loss": 0.507, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 4.190124974390494, | |
| "grad_norm": 0.10731913975620888, | |
| "learning_rate": 6.215075131101405e-06, | |
| "loss": 0.5087, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 4.196681007990166, | |
| "grad_norm": 0.0997103006367049, | |
| "learning_rate": 6.117075846591123e-06, | |
| "loss": 0.5201, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 4.203237041589838, | |
| "grad_norm": 0.09955705599544924, | |
| "learning_rate": 6.019791334295955e-06, | |
| "loss": 0.5076, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 4.20979307518951, | |
| "grad_norm": 0.09540357195970695, | |
| "learning_rate": 5.923223646466923e-06, | |
| "loss": 0.5181, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.216349108789182, | |
| "grad_norm": 0.0952939694361504, | |
| "learning_rate": 5.827374820233407e-06, | |
| "loss": 0.5195, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 4.222905142388854, | |
| "grad_norm": 0.09942874484427339, | |
| "learning_rate": 5.732246877560146e-06, | |
| "loss": 0.5171, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 4.2294611759885266, | |
| "grad_norm": 0.09564484595006637, | |
| "learning_rate": 5.637841825204588e-06, | |
| "loss": 0.5131, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 4.236017209588199, | |
| "grad_norm": 0.1056325087529548, | |
| "learning_rate": 5.5441616546745646e-06, | |
| "loss": 0.5095, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 4.242573243187872, | |
| "grad_norm": 0.09442156372590707, | |
| "learning_rate": 5.451208342186229e-06, | |
| "loss": 0.5139, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.249129276787544, | |
| "grad_norm": 0.08963880451358089, | |
| "learning_rate": 5.358983848622452e-06, | |
| "loss": 0.5192, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 4.255685310387216, | |
| "grad_norm": 0.0917773531851403, | |
| "learning_rate": 5.26749011949141e-06, | |
| "loss": 0.5112, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 4.262241343986888, | |
| "grad_norm": 0.09650192405895454, | |
| "learning_rate": 5.176729084885508e-06, | |
| "loss": 0.5156, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 4.26879737758656, | |
| "grad_norm": 0.09638202151344397, | |
| "learning_rate": 5.086702659440743e-06, | |
| "loss": 0.5132, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 4.275353411186233, | |
| "grad_norm": 0.09515733205388084, | |
| "learning_rate": 4.99741274229625e-06, | |
| "loss": 0.5107, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.281909444785905, | |
| "grad_norm": 0.08694276833101254, | |
| "learning_rate": 4.908861217054281e-06, | |
| "loss": 0.511, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.288465478385577, | |
| "grad_norm": 0.09365787216266765, | |
| "learning_rate": 4.821049951740442e-06, | |
| "loss": 0.5196, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.295021511985249, | |
| "grad_norm": 0.09875335551778978, | |
| "learning_rate": 4.733980798764273e-06, | |
| "loss": 0.5139, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.301577545584921, | |
| "grad_norm": 0.0984165654822695, | |
| "learning_rate": 4.647655594880225e-06, | |
| "loss": 0.5194, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.308133579184593, | |
| "grad_norm": 0.08981515555040814, | |
| "learning_rate": 4.562076161148881e-06, | |
| "loss": 0.5159, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.3146896127842655, | |
| "grad_norm": 0.08699993576971486, | |
| "learning_rate": 4.4772443028985004e-06, | |
| "loss": 0.5107, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.321245646383938, | |
| "grad_norm": 0.09531375447276737, | |
| "learning_rate": 4.393161809687021e-06, | |
| "loss": 0.5211, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.32780167998361, | |
| "grad_norm": 0.16067767992274057, | |
| "learning_rate": 4.3098304552642385e-06, | |
| "loss": 0.5163, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.334357713583282, | |
| "grad_norm": 0.09069517004519909, | |
| "learning_rate": 4.227251997534416e-06, | |
| "loss": 0.5107, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.340913747182954, | |
| "grad_norm": 0.08688482436943581, | |
| "learning_rate": 4.1454281785191995e-06, | |
| "loss": 0.5118, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.347469780782626, | |
| "grad_norm": 0.09109521686027529, | |
| "learning_rate": 4.064360724320846e-06, | |
| "loss": 0.5114, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.3540258143822985, | |
| "grad_norm": 0.0885789818679224, | |
| "learning_rate": 3.984051345085855e-06, | |
| "loss": 0.5126, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.360581847981971, | |
| "grad_norm": 0.09379559333937212, | |
| "learning_rate": 3.90450173496887e-06, | |
| "loss": 0.5207, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.367137881581643, | |
| "grad_norm": 0.08937679173054215, | |
| "learning_rate": 3.825713572096903e-06, | |
| "loss": 0.5175, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.373693915181315, | |
| "grad_norm": 0.08924834510849128, | |
| "learning_rate": 3.747688518534003e-06, | |
| "loss": 0.5186, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.380249948780987, | |
| "grad_norm": 0.09109376121051856, | |
| "learning_rate": 3.6704282202461515e-06, | |
| "loss": 0.5161, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.386805982380659, | |
| "grad_norm": 0.10119588536048828, | |
| "learning_rate": 3.5939343070665243e-06, | |
| "loss": 0.5032, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.3933620159803315, | |
| "grad_norm": 0.09644206288351137, | |
| "learning_rate": 3.518208392661184e-06, | |
| "loss": 0.5188, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.399918049580004, | |
| "grad_norm": 0.08778658376932175, | |
| "learning_rate": 3.4432520744949317e-06, | |
| "loss": 0.5133, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.406474083179677, | |
| "grad_norm": 0.0941601011641409, | |
| "learning_rate": 3.3690669337977e-06, | |
| "loss": 0.5061, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.413030116779349, | |
| "grad_norm": 0.09791658312405495, | |
| "learning_rate": 3.295654535531161e-06, | |
| "loss": 0.5193, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.419586150379021, | |
| "grad_norm": 0.09354050139551012, | |
| "learning_rate": 3.2230164283556918e-06, | |
| "loss": 0.5172, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.426142183978693, | |
| "grad_norm": 0.08893737381738466, | |
| "learning_rate": 3.151154144597741e-06, | |
| "loss": 0.5111, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.432698217578365, | |
| "grad_norm": 0.08445332878405215, | |
| "learning_rate": 3.080069200217497e-06, | |
| "loss": 0.5097, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.4392542511780375, | |
| "grad_norm": 0.08801285298434917, | |
| "learning_rate": 3.0097630947768695e-06, | |
| "loss": 0.5135, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.44581028477771, | |
| "grad_norm": 0.09402581699959618, | |
| "learning_rate": 2.9402373114079295e-06, | |
| "loss": 0.5086, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.452366318377382, | |
| "grad_norm": 0.0905312100821144, | |
| "learning_rate": 2.871493316781546e-06, | |
| "loss": 0.515, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.458922351977054, | |
| "grad_norm": 0.09633255689013458, | |
| "learning_rate": 2.803532561076492e-06, | |
| "loss": 0.5123, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.465478385576726, | |
| "grad_norm": 0.08803341472121784, | |
| "learning_rate": 2.7363564779488448e-06, | |
| "loss": 0.5135, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.472034419176398, | |
| "grad_norm": 0.08797758072438489, | |
| "learning_rate": 2.669966484501716e-06, | |
| "loss": 0.5199, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.4785904527760705, | |
| "grad_norm": 0.09112213281433264, | |
| "learning_rate": 2.6043639812554043e-06, | |
| "loss": 0.5205, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.485146486375743, | |
| "grad_norm": 0.09544750033107532, | |
| "learning_rate": 2.5395503521178143e-06, | |
| "loss": 0.5253, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.491702519975415, | |
| "grad_norm": 0.09376536821854904, | |
| "learning_rate": 2.4755269643552594e-06, | |
| "loss": 0.5164, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.498258553575087, | |
| "grad_norm": 0.09454989320113857, | |
| "learning_rate": 2.4122951685636674e-06, | |
| "loss": 0.5187, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.504814587174759, | |
| "grad_norm": 0.08421901449177807, | |
| "learning_rate": 2.3498562986400187e-06, | |
| "loss": 0.5154, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.511370620774431, | |
| "grad_norm": 0.08753626044860159, | |
| "learning_rate": 2.2882116717542634e-06, | |
| "loss": 0.5176, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.5179266543741035, | |
| "grad_norm": 0.0907417635028892, | |
| "learning_rate": 2.22736258832152e-06, | |
| "loss": 0.5244, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.524482687973776, | |
| "grad_norm": 0.08866763487206057, | |
| "learning_rate": 2.1673103319746146e-06, | |
| "loss": 0.5188, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.531038721573448, | |
| "grad_norm": 0.08406194293605605, | |
| "learning_rate": 2.1080561695370425e-06, | |
| "loss": 0.5117, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.53759475517312, | |
| "grad_norm": 0.08781306185528281, | |
| "learning_rate": 2.049601350996233e-06, | |
| "loss": 0.5105, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.544150788772792, | |
| "grad_norm": 0.0862714841925732, | |
| "learning_rate": 1.9919471094771523e-06, | |
| "loss": 0.513, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.550706822372465, | |
| "grad_norm": 0.08316320143150852, | |
| "learning_rate": 1.93509466121633e-06, | |
| "loss": 0.5143, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.557262855972137, | |
| "grad_norm": 0.08459303029799788, | |
| "learning_rate": 1.8790452055361764e-06, | |
| "loss": 0.5117, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.5638188895718095, | |
| "grad_norm": 0.08646962847262735, | |
| "learning_rate": 1.8237999248197002e-06, | |
| "loss": 0.5122, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.570374923171482, | |
| "grad_norm": 0.18896054046080968, | |
| "learning_rate": 1.7693599844855568e-06, | |
| "loss": 0.5187, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.576930956771154, | |
| "grad_norm": 0.08583158851506041, | |
| "learning_rate": 1.7157265329634354e-06, | |
| "loss": 0.5206, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.583486990370826, | |
| "grad_norm": 0.0823819160891982, | |
| "learning_rate": 1.6629007016698918e-06, | |
| "loss": 0.5119, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.590043023970498, | |
| "grad_norm": 0.0856122092163256, | |
| "learning_rate": 1.6108836049844434e-06, | |
| "loss": 0.5192, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.59659905757017, | |
| "grad_norm": 0.08366071868253996, | |
| "learning_rate": 1.5596763402260462e-06, | |
| "loss": 0.5169, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.6031550911698424, | |
| "grad_norm": 0.08756847365451516, | |
| "learning_rate": 1.5092799876299835e-06, | |
| "loss": 0.5142, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.609711124769515, | |
| "grad_norm": 0.08472854571793725, | |
| "learning_rate": 1.459695610325067e-06, | |
| "loss": 0.5191, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.616267158369187, | |
| "grad_norm": 0.08619093020176126, | |
| "learning_rate": 1.4109242543111834e-06, | |
| "loss": 0.5166, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.622823191968859, | |
| "grad_norm": 0.08301363626424127, | |
| "learning_rate": 1.3629669484372722e-06, | |
| "loss": 0.517, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.629379225568531, | |
| "grad_norm": 0.08230801802672034, | |
| "learning_rate": 1.3158247043795735e-06, | |
| "loss": 0.5033, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.635935259168203, | |
| "grad_norm": 0.08247077823518248, | |
| "learning_rate": 1.2694985166203311e-06, | |
| "loss": 0.523, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.642491292767875, | |
| "grad_norm": 0.08077290324213217, | |
| "learning_rate": 1.2239893624267852e-06, | |
| "loss": 0.5218, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.649047326367548, | |
| "grad_norm": 0.08354463750730508, | |
| "learning_rate": 1.1792982018305677e-06, | |
| "loss": 0.5143, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.65560335996722, | |
| "grad_norm": 0.08393485609493388, | |
| "learning_rate": 1.1354259776074472e-06, | |
| "loss": 0.5151, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.662159393566892, | |
| "grad_norm": 0.0832402976978181, | |
| "learning_rate": 1.0923736152574428e-06, | |
| "loss": 0.5233, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.668715427166564, | |
| "grad_norm": 0.0835585475483202, | |
| "learning_rate": 1.050142022985292e-06, | |
| "loss": 0.5093, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.675271460766236, | |
| "grad_norm": 0.08176206607867041, | |
| "learning_rate": 1.0087320916813127e-06, | |
| "loss": 0.5225, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.681827494365908, | |
| "grad_norm": 0.08752232597844366, | |
| "learning_rate": 9.681446949025752e-07, | |
| "loss": 0.5156, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.6883835279655806, | |
| "grad_norm": 0.08108785189147111, | |
| "learning_rate": 9.283806888545111e-07, | |
| "loss": 0.5138, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.694939561565253, | |
| "grad_norm": 0.08223618174467849, | |
| "learning_rate": 8.89440912372832e-07, | |
| "loss": 0.5172, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.701495595164925, | |
| "grad_norm": 0.08351263590200621, | |
| "learning_rate": 8.513261869058209e-07, | |
| "loss": 0.5282, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.708051628764597, | |
| "grad_norm": 0.08286053605490928, | |
| "learning_rate": 8.140373164970428e-07, | |
| "loss": 0.5244, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.714607662364269, | |
| "grad_norm": 0.08518895529322469, | |
| "learning_rate": 7.775750877683452e-07, | |
| "loss": 0.5171, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.721163695963941, | |
| "grad_norm": 0.0813459247213106, | |
| "learning_rate": 7.419402699032852e-07, | |
| "loss": 0.5186, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.7277197295636135, | |
| "grad_norm": 0.0803887295459458, | |
| "learning_rate": 7.071336146308883e-07, | |
| "loss": 0.5059, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.734275763163287, | |
| "grad_norm": 0.08172953576855356, | |
| "learning_rate": 6.731558562097995e-07, | |
| "loss": 0.5224, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.740831796762959, | |
| "grad_norm": 0.08549688805268194, | |
| "learning_rate": 6.400077114128023e-07, | |
| "loss": 0.5165, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.747387830362631, | |
| "grad_norm": 0.08286626015188583, | |
| "learning_rate": 6.076898795116792e-07, | |
| "loss": 0.51, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.753943863962303, | |
| "grad_norm": 0.0823372680890407, | |
| "learning_rate": 5.762030422624732e-07, | |
| "loss": 0.5212, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.760499897561975, | |
| "grad_norm": 0.08161043456784334, | |
| "learning_rate": 5.455478638911071e-07, | |
| "loss": 0.5116, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.767055931161647, | |
| "grad_norm": 0.08090694406998772, | |
| "learning_rate": 5.15724991079356e-07, | |
| "loss": 0.5223, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.7736119647613195, | |
| "grad_norm": 0.08130626566721383, | |
| "learning_rate": 4.867350529512261e-07, | |
| "loss": 0.5104, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.780167998360992, | |
| "grad_norm": 0.08487388342418309, | |
| "learning_rate": 4.5857866105966763e-07, | |
| "loss": 0.5129, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.786724031960664, | |
| "grad_norm": 0.0816069016289385, | |
| "learning_rate": 4.3125640937368373e-07, | |
| "loss": 0.5123, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.793280065560336, | |
| "grad_norm": 0.08049465996969124, | |
| "learning_rate": 4.047688742657885e-07, | |
| "loss": 0.5063, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.799836099160008, | |
| "grad_norm": 0.0830575657951303, | |
| "learning_rate": 3.791166144998704e-07, | |
| "loss": 0.5121, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.80639213275968, | |
| "grad_norm": 0.08387321061054255, | |
| "learning_rate": 3.54300171219375e-07, | |
| "loss": 0.5146, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.8129481663593525, | |
| "grad_norm": 0.08224120804212026, | |
| "learning_rate": 3.3032006793590977e-07, | |
| "loss": 0.5206, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.819504199959025, | |
| "grad_norm": 0.08072502199031102, | |
| "learning_rate": 3.0717681051819935e-07, | |
| "loss": 0.522, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.826060233558697, | |
| "grad_norm": 0.07860877555923813, | |
| "learning_rate": 2.848708871814054e-07, | |
| "loss": 0.5188, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.832616267158369, | |
| "grad_norm": 0.08001404199090854, | |
| "learning_rate": 2.634027684768414e-07, | |
| "loss": 0.5124, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.839172300758041, | |
| "grad_norm": 0.08310618061839851, | |
| "learning_rate": 2.4277290728202063e-07, | |
| "loss": 0.5212, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.845728334357713, | |
| "grad_norm": 0.0797549571951669, | |
| "learning_rate": 2.2298173879113481e-07, | |
| "loss": 0.5168, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.8522843679573855, | |
| "grad_norm": 0.0824361700062477, | |
| "learning_rate": 2.040296805058528e-07, | |
| "loss": 0.5158, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.858840401557058, | |
| "grad_norm": 0.08076093089733337, | |
| "learning_rate": 1.859171322265141e-07, | |
| "loss": 0.5253, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.86539643515673, | |
| "grad_norm": 0.08043719703067892, | |
| "learning_rate": 1.6864447604370004e-07, | |
| "loss": 0.5244, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.871952468756403, | |
| "grad_norm": 0.08277960997697664, | |
| "learning_rate": 1.522120763301782e-07, | |
| "loss": 0.5085, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.878508502356075, | |
| "grad_norm": 0.08180092674268333, | |
| "learning_rate": 1.3662027973320614e-07, | |
| "loss": 0.5202, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.885064535955747, | |
| "grad_norm": 0.08183966123917269, | |
| "learning_rate": 1.2186941516722173e-07, | |
| "loss": 0.5231, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.891620569555419, | |
| "grad_norm": 0.07963446358425141, | |
| "learning_rate": 1.0795979380690657e-07, | |
| "loss": 0.5136, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.8981766031550915, | |
| "grad_norm": 0.07852420651092652, | |
| "learning_rate": 9.489170908062228e-08, | |
| "loss": 0.5204, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.904732636754764, | |
| "grad_norm": 0.12877804510272148, | |
| "learning_rate": 8.266543666421544e-08, | |
| "loss": 0.5185, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.911288670354436, | |
| "grad_norm": 0.0869160748356661, | |
| "learning_rate": 7.128123447520452e-08, | |
| "loss": 0.5234, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.917844703954108, | |
| "grad_norm": 0.08086821677688971, | |
| "learning_rate": 6.073934266735303e-08, | |
| "loss": 0.5129, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.92440073755378, | |
| "grad_norm": 0.07880134225937953, | |
| "learning_rate": 5.10399836255715e-08, | |
| "loss": 0.5064, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.930956771153452, | |
| "grad_norm": 0.07797418195960645, | |
| "learning_rate": 4.218336196125439e-08, | |
| "loss": 0.5106, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.9375128047531245, | |
| "grad_norm": 0.08006201262285005, | |
| "learning_rate": 3.416966450795922e-08, | |
| "loss": 0.5099, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.944068838352797, | |
| "grad_norm": 0.08059149745693742, | |
| "learning_rate": 2.699906031745414e-08, | |
| "loss": 0.5176, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.950624871952469, | |
| "grad_norm": 0.08251393526468398, | |
| "learning_rate": 2.067170065615187e-08, | |
| "loss": 0.5235, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.957180905552141, | |
| "grad_norm": 0.08234699138692923, | |
| "learning_rate": 1.5187719001943378e-08, | |
| "loss": 0.5122, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.963736939151813, | |
| "grad_norm": 0.08191620156821956, | |
| "learning_rate": 1.0547231041346806e-08, | |
| "loss": 0.5237, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.970292972751485, | |
| "grad_norm": 0.08084676979927179, | |
| "learning_rate": 6.750334667091629e-09, | |
| "loss": 0.5178, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.9768490063511575, | |
| "grad_norm": 0.08117469896416148, | |
| "learning_rate": 3.797109976035884e-09, | |
| "loss": 0.5192, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.98340503995083, | |
| "grad_norm": 0.0821326486604627, | |
| "learning_rate": 1.6876192675052695e-09, | |
| "loss": 0.519, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.989961073550502, | |
| "grad_norm": 0.08465924625593949, | |
| "learning_rate": 4.219070419475557e-10, | |
| "loss": 0.5078, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.996517107150174, | |
| "grad_norm": 0.08042205253823026, | |
| "learning_rate": 0.0, | |
| "loss": 0.5193, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.996517107150174, | |
| "step": 760, | |
| "total_flos": 2.0225694010775175e+19, | |
| "train_loss": 0.10313788385767686, | |
| "train_runtime": 35636.9099, | |
| "train_samples_per_second": 10.956, | |
| "train_steps_per_second": 0.021 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 760, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0225694010775175e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |