| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.981412639405205, | |
| "eval_steps": 500, | |
| "global_step": 1005, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004956629491945477, | |
| "grad_norm": 5.755599171540443, | |
| "learning_rate": 7.920792079207921e-07, | |
| "loss": 0.8036, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009913258983890954, | |
| "grad_norm": 5.709874822891588, | |
| "learning_rate": 1.5841584158415842e-06, | |
| "loss": 0.7966, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01486988847583643, | |
| "grad_norm": 5.669629961381398, | |
| "learning_rate": 2.3762376237623762e-06, | |
| "loss": 0.7948, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01982651796778191, | |
| "grad_norm": 5.3157614476293835, | |
| "learning_rate": 3.1683168316831685e-06, | |
| "loss": 0.7762, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.024783147459727387, | |
| "grad_norm": 4.356092541330962, | |
| "learning_rate": 3.960396039603961e-06, | |
| "loss": 0.7499, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02973977695167286, | |
| "grad_norm": 2.250263146370207, | |
| "learning_rate": 4.7524752475247525e-06, | |
| "loss": 0.6956, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03469640644361834, | |
| "grad_norm": 1.914117915511351, | |
| "learning_rate": 5.544554455445545e-06, | |
| "loss": 0.6839, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03965303593556382, | |
| "grad_norm": 3.1492377262690066, | |
| "learning_rate": 6.336633663366337e-06, | |
| "loss": 0.6668, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04460966542750929, | |
| "grad_norm": 3.4712456437876367, | |
| "learning_rate": 7.128712871287129e-06, | |
| "loss": 0.677, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04956629491945477, | |
| "grad_norm": 3.421087454928539, | |
| "learning_rate": 7.920792079207921e-06, | |
| "loss": 0.6624, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05452292441140025, | |
| "grad_norm": 2.9620186432080806, | |
| "learning_rate": 8.712871287128714e-06, | |
| "loss": 0.6236, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05947955390334572, | |
| "grad_norm": 2.8338118123601115, | |
| "learning_rate": 9.504950495049505e-06, | |
| "loss": 0.6228, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0644361833952912, | |
| "grad_norm": 1.8504400593431658, | |
| "learning_rate": 1.0297029702970298e-05, | |
| "loss": 0.6038, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06939281288723669, | |
| "grad_norm": 1.4609039363755218, | |
| "learning_rate": 1.108910891089109e-05, | |
| "loss": 0.5935, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07434944237918216, | |
| "grad_norm": 1.97639910368095, | |
| "learning_rate": 1.1881188118811881e-05, | |
| "loss": 0.572, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07930607187112763, | |
| "grad_norm": 1.7151285016710816, | |
| "learning_rate": 1.2673267326732674e-05, | |
| "loss": 0.5634, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08426270136307311, | |
| "grad_norm": 1.1070303436473, | |
| "learning_rate": 1.3465346534653467e-05, | |
| "loss": 0.5572, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08921933085501858, | |
| "grad_norm": 1.2639833149957966, | |
| "learning_rate": 1.4257425742574257e-05, | |
| "loss": 0.5474, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09417596034696406, | |
| "grad_norm": 1.3091402588020495, | |
| "learning_rate": 1.504950495049505e-05, | |
| "loss": 0.5477, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09913258983890955, | |
| "grad_norm": 0.8044594878857394, | |
| "learning_rate": 1.5841584158415843e-05, | |
| "loss": 0.5397, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10408921933085502, | |
| "grad_norm": 1.0704477258758005, | |
| "learning_rate": 1.6633663366336635e-05, | |
| "loss": 0.5281, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.1090458488228005, | |
| "grad_norm": 0.9039341093983634, | |
| "learning_rate": 1.7425742574257428e-05, | |
| "loss": 0.5199, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11400247831474597, | |
| "grad_norm": 0.8044388950158062, | |
| "learning_rate": 1.821782178217822e-05, | |
| "loss": 0.5139, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11895910780669144, | |
| "grad_norm": 0.9382332156976467, | |
| "learning_rate": 1.900990099009901e-05, | |
| "loss": 0.5193, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.12391573729863693, | |
| "grad_norm": 0.6982723380926246, | |
| "learning_rate": 1.9801980198019803e-05, | |
| "loss": 0.5147, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1288723667905824, | |
| "grad_norm": 0.783184579204947, | |
| "learning_rate": 2.0594059405940595e-05, | |
| "loss": 0.5062, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.13382899628252787, | |
| "grad_norm": 0.6904331997041252, | |
| "learning_rate": 2.1386138613861388e-05, | |
| "loss": 0.4938, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13878562577447337, | |
| "grad_norm": 0.7021840211104649, | |
| "learning_rate": 2.217821782178218e-05, | |
| "loss": 0.4978, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.14374225526641884, | |
| "grad_norm": 0.683654391663701, | |
| "learning_rate": 2.297029702970297e-05, | |
| "loss": 0.4961, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.14869888475836432, | |
| "grad_norm": 0.5156280172267378, | |
| "learning_rate": 2.3762376237623762e-05, | |
| "loss": 0.4928, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1536555142503098, | |
| "grad_norm": 0.6200967438468933, | |
| "learning_rate": 2.4554455445544555e-05, | |
| "loss": 0.4962, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15861214374225527, | |
| "grad_norm": 0.44876109447418644, | |
| "learning_rate": 2.5346534653465348e-05, | |
| "loss": 0.4841, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.16356877323420074, | |
| "grad_norm": 0.6136329783927771, | |
| "learning_rate": 2.613861386138614e-05, | |
| "loss": 0.486, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.16852540272614622, | |
| "grad_norm": 0.5297287329726431, | |
| "learning_rate": 2.6930693069306933e-05, | |
| "loss": 0.4742, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1734820322180917, | |
| "grad_norm": 0.5768447884060003, | |
| "learning_rate": 2.7722772277227722e-05, | |
| "loss": 0.4825, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.17843866171003717, | |
| "grad_norm": 0.5982890948235946, | |
| "learning_rate": 2.8514851485148515e-05, | |
| "loss": 0.4824, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.18339529120198264, | |
| "grad_norm": 0.5067059248621495, | |
| "learning_rate": 2.9306930693069308e-05, | |
| "loss": 0.4816, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.18835192069392812, | |
| "grad_norm": 0.6942780068708954, | |
| "learning_rate": 3.00990099009901e-05, | |
| "loss": 0.4775, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.19330855018587362, | |
| "grad_norm": 0.8143766320066766, | |
| "learning_rate": 3.0891089108910896e-05, | |
| "loss": 0.4804, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1982651796778191, | |
| "grad_norm": 1.3888869925003242, | |
| "learning_rate": 3.1683168316831686e-05, | |
| "loss": 0.4754, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.20322180916976457, | |
| "grad_norm": 0.8396306197733768, | |
| "learning_rate": 3.247524752475248e-05, | |
| "loss": 0.4694, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.20817843866171004, | |
| "grad_norm": 0.6817330424057285, | |
| "learning_rate": 3.326732673267327e-05, | |
| "loss": 0.4584, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.21313506815365552, | |
| "grad_norm": 1.366427166379272, | |
| "learning_rate": 3.405940594059406e-05, | |
| "loss": 0.4645, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.218091697645601, | |
| "grad_norm": 0.8371176675680243, | |
| "learning_rate": 3.4851485148514856e-05, | |
| "loss": 0.4708, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.22304832713754646, | |
| "grad_norm": 1.0401044191645252, | |
| "learning_rate": 3.5643564356435645e-05, | |
| "loss": 0.4703, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.22800495662949194, | |
| "grad_norm": 1.3486576493438978, | |
| "learning_rate": 3.643564356435644e-05, | |
| "loss": 0.4661, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.23296158612143741, | |
| "grad_norm": 0.9562289057981055, | |
| "learning_rate": 3.722772277227723e-05, | |
| "loss": 0.4527, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2379182156133829, | |
| "grad_norm": 0.8711388828137366, | |
| "learning_rate": 3.801980198019802e-05, | |
| "loss": 0.4541, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.24287484510532836, | |
| "grad_norm": 1.0960375147187478, | |
| "learning_rate": 3.8811881188118816e-05, | |
| "loss": 0.451, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.24783147459727387, | |
| "grad_norm": 1.7582704593641059, | |
| "learning_rate": 3.9603960396039605e-05, | |
| "loss": 0.4628, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2527881040892193, | |
| "grad_norm": 0.7712567737809581, | |
| "learning_rate": 4.03960396039604e-05, | |
| "loss": 0.4572, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2577447335811648, | |
| "grad_norm": 1.5724932491218706, | |
| "learning_rate": 4.118811881188119e-05, | |
| "loss": 0.4561, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.26270136307311026, | |
| "grad_norm": 1.113040327791958, | |
| "learning_rate": 4.1980198019801987e-05, | |
| "loss": 0.4577, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.26765799256505574, | |
| "grad_norm": 1.4339506148154477, | |
| "learning_rate": 4.2772277227722776e-05, | |
| "loss": 0.4518, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.27261462205700127, | |
| "grad_norm": 1.0054609210522674, | |
| "learning_rate": 4.356435643564357e-05, | |
| "loss": 0.4592, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.27757125154894674, | |
| "grad_norm": 1.677173196248357, | |
| "learning_rate": 4.435643564356436e-05, | |
| "loss": 0.4526, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2825278810408922, | |
| "grad_norm": 1.6091305822608544, | |
| "learning_rate": 4.514851485148515e-05, | |
| "loss": 0.4543, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.2874845105328377, | |
| "grad_norm": 0.8750200586980326, | |
| "learning_rate": 4.594059405940594e-05, | |
| "loss": 0.4562, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.29244114002478316, | |
| "grad_norm": 1.6157973616198276, | |
| "learning_rate": 4.6732673267326736e-05, | |
| "loss": 0.4572, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.29739776951672864, | |
| "grad_norm": 1.1414962367611863, | |
| "learning_rate": 4.7524752475247525e-05, | |
| "loss": 0.4563, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3023543990086741, | |
| "grad_norm": 1.2158957352523736, | |
| "learning_rate": 4.831683168316832e-05, | |
| "loss": 0.4535, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3073110285006196, | |
| "grad_norm": 1.3830326369244816, | |
| "learning_rate": 4.910891089108911e-05, | |
| "loss": 0.4536, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.31226765799256506, | |
| "grad_norm": 0.9017071808108595, | |
| "learning_rate": 4.9900990099009906e-05, | |
| "loss": 0.4421, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.31722428748451054, | |
| "grad_norm": 1.0504602546546122, | |
| "learning_rate": 5.0693069306930696e-05, | |
| "loss": 0.45, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.322180916976456, | |
| "grad_norm": 1.4474448248935323, | |
| "learning_rate": 5.148514851485149e-05, | |
| "loss": 0.4474, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3271375464684015, | |
| "grad_norm": 0.8895932355362427, | |
| "learning_rate": 5.227722772277228e-05, | |
| "loss": 0.445, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.33209417596034696, | |
| "grad_norm": 1.403939101234426, | |
| "learning_rate": 5.306930693069308e-05, | |
| "loss": 0.4485, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.33705080545229243, | |
| "grad_norm": 1.255761190764639, | |
| "learning_rate": 5.3861386138613866e-05, | |
| "loss": 0.4441, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3420074349442379, | |
| "grad_norm": 1.4235767315147947, | |
| "learning_rate": 5.465346534653466e-05, | |
| "loss": 0.4352, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3469640644361834, | |
| "grad_norm": 1.0711027609931336, | |
| "learning_rate": 5.5445544554455445e-05, | |
| "loss": 0.4422, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.35192069392812886, | |
| "grad_norm": 0.8536336221070487, | |
| "learning_rate": 5.623762376237624e-05, | |
| "loss": 0.4387, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.35687732342007433, | |
| "grad_norm": 1.0968094429380055, | |
| "learning_rate": 5.702970297029703e-05, | |
| "loss": 0.4395, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3618339529120198, | |
| "grad_norm": 1.6921836428631551, | |
| "learning_rate": 5.7821782178217826e-05, | |
| "loss": 0.4412, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3667905824039653, | |
| "grad_norm": 0.9943215031713406, | |
| "learning_rate": 5.8613861386138615e-05, | |
| "loss": 0.4471, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.37174721189591076, | |
| "grad_norm": 1.3369972747378565, | |
| "learning_rate": 5.940594059405941e-05, | |
| "loss": 0.4462, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.37670384138785623, | |
| "grad_norm": 1.1434125178085341, | |
| "learning_rate": 6.01980198019802e-05, | |
| "loss": 0.437, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.38166047087980176, | |
| "grad_norm": 1.9891182727972199, | |
| "learning_rate": 6.0990099009900997e-05, | |
| "loss": 0.4393, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.38661710037174724, | |
| "grad_norm": 1.3853668626293705, | |
| "learning_rate": 6.178217821782179e-05, | |
| "loss": 0.4443, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3915737298636927, | |
| "grad_norm": 1.346996528108508, | |
| "learning_rate": 6.257425742574258e-05, | |
| "loss": 0.4362, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3965303593556382, | |
| "grad_norm": 1.660430248946372, | |
| "learning_rate": 6.336633663366337e-05, | |
| "loss": 0.4418, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.40148698884758366, | |
| "grad_norm": 1.5145413014843, | |
| "learning_rate": 6.415841584158417e-05, | |
| "loss": 0.4432, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.40644361833952913, | |
| "grad_norm": 1.046097641466344, | |
| "learning_rate": 6.495049504950496e-05, | |
| "loss": 0.435, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4114002478314746, | |
| "grad_norm": 2.753750924493306, | |
| "learning_rate": 6.574257425742575e-05, | |
| "loss": 0.4391, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4163568773234201, | |
| "grad_norm": 2.2173389152299703, | |
| "learning_rate": 6.653465346534654e-05, | |
| "loss": 0.4348, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.42131350681536556, | |
| "grad_norm": 1.861248154528301, | |
| "learning_rate": 6.732673267326732e-05, | |
| "loss": 0.44, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.42627013630731103, | |
| "grad_norm": 1.3599555831815109, | |
| "learning_rate": 6.811881188118812e-05, | |
| "loss": 0.4412, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4312267657992565, | |
| "grad_norm": 1.8825908999305676, | |
| "learning_rate": 6.891089108910892e-05, | |
| "loss": 0.4402, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.436183395291202, | |
| "grad_norm": 1.0404951505743878, | |
| "learning_rate": 6.970297029702971e-05, | |
| "loss": 0.4413, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.44114002478314746, | |
| "grad_norm": 1.9626701461839848, | |
| "learning_rate": 7.04950495049505e-05, | |
| "loss": 0.4387, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.44609665427509293, | |
| "grad_norm": 1.346795312977841, | |
| "learning_rate": 7.128712871287129e-05, | |
| "loss": 0.4459, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4510532837670384, | |
| "grad_norm": 1.8165061269552687, | |
| "learning_rate": 7.207920792079209e-05, | |
| "loss": 0.4386, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4560099132589839, | |
| "grad_norm": 1.7047163209623588, | |
| "learning_rate": 7.287128712871288e-05, | |
| "loss": 0.446, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.46096654275092935, | |
| "grad_norm": 1.1114346196800748, | |
| "learning_rate": 7.366336633663368e-05, | |
| "loss": 0.437, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.46592317224287483, | |
| "grad_norm": 1.278726477325811, | |
| "learning_rate": 7.445544554455446e-05, | |
| "loss": 0.4484, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4708798017348203, | |
| "grad_norm": 1.1877378594704833, | |
| "learning_rate": 7.524752475247524e-05, | |
| "loss": 0.4413, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4758364312267658, | |
| "grad_norm": 1.5235522525394471, | |
| "learning_rate": 7.603960396039604e-05, | |
| "loss": 0.4506, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.48079306071871125, | |
| "grad_norm": 1.347165643600965, | |
| "learning_rate": 7.683168316831684e-05, | |
| "loss": 0.4395, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4857496902106567, | |
| "grad_norm": 1.059604823529573, | |
| "learning_rate": 7.762376237623763e-05, | |
| "loss": 0.4314, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.49070631970260226, | |
| "grad_norm": 1.2713286854324148, | |
| "learning_rate": 7.841584158415841e-05, | |
| "loss": 0.4445, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.49566294919454773, | |
| "grad_norm": 1.2438870796013015, | |
| "learning_rate": 7.920792079207921e-05, | |
| "loss": 0.4347, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5006195786864932, | |
| "grad_norm": 1.1477026233352385, | |
| "learning_rate": 8e-05, | |
| "loss": 0.4342, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5055762081784386, | |
| "grad_norm": 2.0339153985641882, | |
| "learning_rate": 7.999975845811957e-05, | |
| "loss": 0.4357, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5105328376703842, | |
| "grad_norm": 1.2573420056205944, | |
| "learning_rate": 7.999903383539539e-05, | |
| "loss": 0.4338, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5154894671623296, | |
| "grad_norm": 1.7533982812405757, | |
| "learning_rate": 7.99978261405788e-05, | |
| "loss": 0.444, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5204460966542751, | |
| "grad_norm": 1.5042306616329586, | |
| "learning_rate": 7.999613538825525e-05, | |
| "loss": 0.4349, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5254027261462205, | |
| "grad_norm": 1.4254423900122548, | |
| "learning_rate": 7.999396159884411e-05, | |
| "loss": 0.443, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.530359355638166, | |
| "grad_norm": 1.9091378325875628, | |
| "learning_rate": 7.999130479859843e-05, | |
| "loss": 0.4391, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5353159851301115, | |
| "grad_norm": 1.0666407111374168, | |
| "learning_rate": 7.998816501960465e-05, | |
| "loss": 0.4336, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.540272614622057, | |
| "grad_norm": 2.296350041317558, | |
| "learning_rate": 7.998454229978217e-05, | |
| "loss": 0.4391, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5452292441140025, | |
| "grad_norm": 1.9484546845581812, | |
| "learning_rate": 7.998043668288292e-05, | |
| "loss": 0.4455, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.550185873605948, | |
| "grad_norm": 1.4987176063935246, | |
| "learning_rate": 7.997584821849081e-05, | |
| "loss": 0.4323, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5551425030978935, | |
| "grad_norm": 1.2690226187336895, | |
| "learning_rate": 7.997077696202118e-05, | |
| "loss": 0.4345, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5600991325898389, | |
| "grad_norm": 1.3508924265435234, | |
| "learning_rate": 7.996522297472005e-05, | |
| "loss": 0.4357, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5650557620817844, | |
| "grad_norm": 0.878205176991638, | |
| "learning_rate": 7.995918632366346e-05, | |
| "loss": 0.4244, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5700123915737298, | |
| "grad_norm": 1.2129143217229221, | |
| "learning_rate": 7.995266708175662e-05, | |
| "loss": 0.4344, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5749690210656754, | |
| "grad_norm": 1.1214123361921224, | |
| "learning_rate": 7.994566532773299e-05, | |
| "loss": 0.4254, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5799256505576208, | |
| "grad_norm": 1.138267588266493, | |
| "learning_rate": 7.993818114615345e-05, | |
| "loss": 0.4207, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5848822800495663, | |
| "grad_norm": 1.175279377193168, | |
| "learning_rate": 7.993021462740514e-05, | |
| "loss": 0.4283, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5898389095415117, | |
| "grad_norm": 1.2923967393215616, | |
| "learning_rate": 7.992176586770047e-05, | |
| "loss": 0.4239, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5947955390334573, | |
| "grad_norm": 1.1604367180664859, | |
| "learning_rate": 7.991283496907591e-05, | |
| "loss": 0.4247, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5997521685254027, | |
| "grad_norm": 1.2250015260621407, | |
| "learning_rate": 7.990342203939075e-05, | |
| "loss": 0.4272, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6047087980173482, | |
| "grad_norm": 1.0696805090524393, | |
| "learning_rate": 7.989352719232583e-05, | |
| "loss": 0.419, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6096654275092936, | |
| "grad_norm": 0.7685739951418558, | |
| "learning_rate": 7.988315054738214e-05, | |
| "loss": 0.4245, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6146220570012392, | |
| "grad_norm": 1.099330987636103, | |
| "learning_rate": 7.987229222987942e-05, | |
| "loss": 0.419, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6195786864931846, | |
| "grad_norm": 1.1959932170089127, | |
| "learning_rate": 7.986095237095457e-05, | |
| "loss": 0.4241, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6245353159851301, | |
| "grad_norm": 1.1283319629455746, | |
| "learning_rate": 7.984913110756015e-05, | |
| "loss": 0.4141, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6294919454770755, | |
| "grad_norm": 1.391652204029837, | |
| "learning_rate": 7.983682858246265e-05, | |
| "loss": 0.4221, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6344485749690211, | |
| "grad_norm": 1.072268257225815, | |
| "learning_rate": 7.982404494424083e-05, | |
| "loss": 0.4252, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6394052044609665, | |
| "grad_norm": 1.082064345697297, | |
| "learning_rate": 7.98107803472839e-05, | |
| "loss": 0.4192, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.644361833952912, | |
| "grad_norm": 1.247560729399529, | |
| "learning_rate": 7.979703495178964e-05, | |
| "loss": 0.4325, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6493184634448576, | |
| "grad_norm": 1.2191150794285281, | |
| "learning_rate": 7.978280892376246e-05, | |
| "loss": 0.43, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.654275092936803, | |
| "grad_norm": 1.482051924735927, | |
| "learning_rate": 7.976810243501147e-05, | |
| "loss": 0.4156, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6592317224287485, | |
| "grad_norm": 1.0556101942015566, | |
| "learning_rate": 7.975291566314832e-05, | |
| "loss": 0.4204, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6641883519206939, | |
| "grad_norm": 1.4485559402791497, | |
| "learning_rate": 7.973724879158506e-05, | |
| "loss": 0.4282, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6691449814126395, | |
| "grad_norm": 1.0373100960236672, | |
| "learning_rate": 7.972110200953197e-05, | |
| "loss": 0.419, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6741016109045849, | |
| "grad_norm": 1.7160742238845912, | |
| "learning_rate": 7.970447551199527e-05, | |
| "loss": 0.4262, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6790582403965304, | |
| "grad_norm": 0.853051103915497, | |
| "learning_rate": 7.968736949977473e-05, | |
| "loss": 0.4183, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6840148698884758, | |
| "grad_norm": 1.6581770847461408, | |
| "learning_rate": 7.966978417946126e-05, | |
| "loss": 0.4254, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6889714993804213, | |
| "grad_norm": 1.0503931111123224, | |
| "learning_rate": 7.965171976343443e-05, | |
| "loss": 0.4234, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6939281288723668, | |
| "grad_norm": 1.5689706067197353, | |
| "learning_rate": 7.96331764698599e-05, | |
| "loss": 0.4254, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6988847583643123, | |
| "grad_norm": 1.2998733109100478, | |
| "learning_rate": 7.961415452268675e-05, | |
| "loss": 0.4348, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7038413878562577, | |
| "grad_norm": 1.3801257793213206, | |
| "learning_rate": 7.959465415164485e-05, | |
| "loss": 0.4156, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7087980173482032, | |
| "grad_norm": 1.0868824840439038, | |
| "learning_rate": 7.957467559224202e-05, | |
| "loss": 0.4208, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7137546468401487, | |
| "grad_norm": 1.2221028482858696, | |
| "learning_rate": 7.955421908576115e-05, | |
| "loss": 0.4176, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7187112763320942, | |
| "grad_norm": 0.9007360001653403, | |
| "learning_rate": 7.953328487925744e-05, | |
| "loss": 0.4201, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7236679058240396, | |
| "grad_norm": 0.8717287063452277, | |
| "learning_rate": 7.951187322555525e-05, | |
| "loss": 0.4197, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7286245353159851, | |
| "grad_norm": 0.792698187374001, | |
| "learning_rate": 7.948998438324515e-05, | |
| "loss": 0.4164, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7335811648079306, | |
| "grad_norm": 0.8872820819558791, | |
| "learning_rate": 7.946761861668072e-05, | |
| "loss": 0.4186, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7385377942998761, | |
| "grad_norm": 0.7604504848999115, | |
| "learning_rate": 7.944477619597546e-05, | |
| "loss": 0.4168, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7434944237918215, | |
| "grad_norm": 0.645718499087449, | |
| "learning_rate": 7.94214573969994e-05, | |
| "loss": 0.4191, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.748451053283767, | |
| "grad_norm": 1.1186697000537262, | |
| "learning_rate": 7.939766250137589e-05, | |
| "loss": 0.421, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7534076827757125, | |
| "grad_norm": 1.2841332603219227, | |
| "learning_rate": 7.93733917964781e-05, | |
| "loss": 0.4172, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.758364312267658, | |
| "grad_norm": 0.8213291321881714, | |
| "learning_rate": 7.934864557542565e-05, | |
| "loss": 0.4158, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7633209417596035, | |
| "grad_norm": 0.8974802788633387, | |
| "learning_rate": 7.932342413708094e-05, | |
| "loss": 0.4096, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7682775712515489, | |
| "grad_norm": 1.2384278066852972, | |
| "learning_rate": 7.929772778604569e-05, | |
| "loss": 0.4175, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7732342007434945, | |
| "grad_norm": 1.0539193472753547, | |
| "learning_rate": 7.927155683265711e-05, | |
| "loss": 0.4075, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7781908302354399, | |
| "grad_norm": 1.176982037154112, | |
| "learning_rate": 7.924491159298429e-05, | |
| "loss": 0.4162, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7831474597273854, | |
| "grad_norm": 1.130671996299273, | |
| "learning_rate": 7.921779238882428e-05, | |
| "loss": 0.4178, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7881040892193308, | |
| "grad_norm": 0.8096653915853804, | |
| "learning_rate": 7.919019954769828e-05, | |
| "loss": 0.4087, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7930607187112764, | |
| "grad_norm": 0.9599899411922913, | |
| "learning_rate": 7.916213340284759e-05, | |
| "loss": 0.4162, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7980173482032218, | |
| "grad_norm": 0.8001378357264783, | |
| "learning_rate": 7.913359429322972e-05, | |
| "loss": 0.4122, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8029739776951673, | |
| "grad_norm": 0.6305011813200033, | |
| "learning_rate": 7.910458256351416e-05, | |
| "loss": 0.4053, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8079306071871127, | |
| "grad_norm": 0.9703142661268936, | |
| "learning_rate": 7.907509856407828e-05, | |
| "loss": 0.4066, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8128872366790583, | |
| "grad_norm": 1.1698912536676753, | |
| "learning_rate": 7.904514265100315e-05, | |
| "loss": 0.4131, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8178438661710037, | |
| "grad_norm": 0.936435424097582, | |
| "learning_rate": 7.901471518606913e-05, | |
| "loss": 0.4125, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8228004956629492, | |
| "grad_norm": 1.2575701101961008, | |
| "learning_rate": 7.898381653675158e-05, | |
| "loss": 0.4078, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8277571251548946, | |
| "grad_norm": 0.9355459201867814, | |
| "learning_rate": 7.895244707621638e-05, | |
| "loss": 0.4096, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8327137546468402, | |
| "grad_norm": 0.9651532815403727, | |
| "learning_rate": 7.892060718331546e-05, | |
| "loss": 0.4123, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8376703841387856, | |
| "grad_norm": 0.9789444392587163, | |
| "learning_rate": 7.888829724258221e-05, | |
| "loss": 0.4103, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8426270136307311, | |
| "grad_norm": 1.257987990016476, | |
| "learning_rate": 7.885551764422682e-05, | |
| "loss": 0.4133, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8475836431226765, | |
| "grad_norm": 0.8590261676536683, | |
| "learning_rate": 7.882226878413157e-05, | |
| "loss": 0.4124, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8525402726146221, | |
| "grad_norm": 0.7848985828871553, | |
| "learning_rate": 7.878855106384608e-05, | |
| "loss": 0.4091, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8574969021065675, | |
| "grad_norm": 0.8217556335290177, | |
| "learning_rate": 7.875436489058243e-05, | |
| "loss": 0.4069, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.862453531598513, | |
| "grad_norm": 0.8421940106759833, | |
| "learning_rate": 7.871971067721024e-05, | |
| "loss": 0.4079, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8674101610904585, | |
| "grad_norm": 0.732338688120224, | |
| "learning_rate": 7.86845888422517e-05, | |
| "loss": 0.4122, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.872366790582404, | |
| "grad_norm": 0.760792326142143, | |
| "learning_rate": 7.864899980987654e-05, | |
| "loss": 0.4027, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8773234200743495, | |
| "grad_norm": 0.802406665807114, | |
| "learning_rate": 7.861294400989681e-05, | |
| "loss": 0.4012, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8822800495662949, | |
| "grad_norm": 1.1214748433312076, | |
| "learning_rate": 7.857642187776182e-05, | |
| "loss": 0.4042, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8872366790582404, | |
| "grad_norm": 1.264405893330972, | |
| "learning_rate": 7.85394338545528e-05, | |
| "loss": 0.4036, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8921933085501859, | |
| "grad_norm": 0.9654797671767958, | |
| "learning_rate": 7.850198038697756e-05, | |
| "loss": 0.405, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8971499380421314, | |
| "grad_norm": 0.7820135181375528, | |
| "learning_rate": 7.846406192736517e-05, | |
| "loss": 0.4022, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9021065675340768, | |
| "grad_norm": 0.608588052923434, | |
| "learning_rate": 7.842567893366043e-05, | |
| "loss": 0.3952, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9070631970260223, | |
| "grad_norm": 0.7415828590929223, | |
| "learning_rate": 7.838683186941835e-05, | |
| "loss": 0.4087, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9120198265179678, | |
| "grad_norm": 0.8731014126615367, | |
| "learning_rate": 7.834752120379857e-05, | |
| "loss": 0.4035, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9169764560099133, | |
| "grad_norm": 1.087019749607736, | |
| "learning_rate": 7.830774741155975e-05, | |
| "loss": 0.4125, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9219330855018587, | |
| "grad_norm": 0.8865327294603108, | |
| "learning_rate": 7.826751097305367e-05, | |
| "loss": 0.4006, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9268897149938042, | |
| "grad_norm": 0.6344821453740659, | |
| "learning_rate": 7.822681237421956e-05, | |
| "loss": 0.399, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9318463444857497, | |
| "grad_norm": 0.7187394053419348, | |
| "learning_rate": 7.818565210657827e-05, | |
| "loss": 0.3977, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9368029739776952, | |
| "grad_norm": 1.0404564829171956, | |
| "learning_rate": 7.814403066722622e-05, | |
| "loss": 0.4067, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9417596034696406, | |
| "grad_norm": 1.0665407812935424, | |
| "learning_rate": 7.810194855882943e-05, | |
| "loss": 0.4086, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9467162329615861, | |
| "grad_norm": 1.1577211948919968, | |
| "learning_rate": 7.805940628961747e-05, | |
| "loss": 0.4073, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9516728624535316, | |
| "grad_norm": 0.998880858399891, | |
| "learning_rate": 7.801640437337736e-05, | |
| "loss": 0.4019, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9566294919454771, | |
| "grad_norm": 0.9782567188381105, | |
| "learning_rate": 7.797294332944725e-05, | |
| "loss": 0.4045, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9615861214374225, | |
| "grad_norm": 0.8665875399307627, | |
| "learning_rate": 7.792902368271027e-05, | |
| "loss": 0.3992, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.966542750929368, | |
| "grad_norm": 0.8017539697149025, | |
| "learning_rate": 7.788464596358811e-05, | |
| "loss": 0.4028, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9714993804213135, | |
| "grad_norm": 1.021352081272535, | |
| "learning_rate": 7.783981070803469e-05, | |
| "loss": 0.4088, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.976456009913259, | |
| "grad_norm": 1.220616641821274, | |
| "learning_rate": 7.779451845752957e-05, | |
| "loss": 0.4121, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9814126394052045, | |
| "grad_norm": 0.6851459878038231, | |
| "learning_rate": 7.774876975907154e-05, | |
| "loss": 0.3978, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9863692688971499, | |
| "grad_norm": 0.8256154983716247, | |
| "learning_rate": 7.770256516517191e-05, | |
| "loss": 0.4006, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9913258983890955, | |
| "grad_norm": 1.02233854118449, | |
| "learning_rate": 7.765590523384794e-05, | |
| "loss": 0.406, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9962825278810409, | |
| "grad_norm": 1.2566526671400393, | |
| "learning_rate": 7.760879052861596e-05, | |
| "loss": 0.3971, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0012391573729864, | |
| "grad_norm": 1.0197719096595084, | |
| "learning_rate": 7.756122161848474e-05, | |
| "loss": 0.4958, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.0061957868649318, | |
| "grad_norm": 1.2508655704855365, | |
| "learning_rate": 7.751319907794846e-05, | |
| "loss": 0.3909, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.0111524163568772, | |
| "grad_norm": 1.0070567867985618, | |
| "learning_rate": 7.746472348697987e-05, | |
| "loss": 0.3833, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.016109045848823, | |
| "grad_norm": 1.1597304198121792, | |
| "learning_rate": 7.74157954310232e-05, | |
| "loss": 0.3881, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0210656753407683, | |
| "grad_norm": 0.9662929035122743, | |
| "learning_rate": 7.736641550098724e-05, | |
| "loss": 0.3848, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0260223048327137, | |
| "grad_norm": 1.0635323036573268, | |
| "learning_rate": 7.731658429323801e-05, | |
| "loss": 0.3874, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0309789343246591, | |
| "grad_norm": 0.8185655718509083, | |
| "learning_rate": 7.72663024095917e-05, | |
| "loss": 0.38, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.0359355638166048, | |
| "grad_norm": 1.147797704777796, | |
| "learning_rate": 7.721557045730734e-05, | |
| "loss": 0.3791, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0408921933085502, | |
| "grad_norm": 0.6711907484138894, | |
| "learning_rate": 7.71643890490795e-05, | |
| "loss": 0.3759, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0458488228004956, | |
| "grad_norm": 0.767288350635738, | |
| "learning_rate": 7.711275880303087e-05, | |
| "loss": 0.3857, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.050805452292441, | |
| "grad_norm": 0.7110584540901675, | |
| "learning_rate": 7.706068034270474e-05, | |
| "loss": 0.3805, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0557620817843867, | |
| "grad_norm": 1.1141445015648397, | |
| "learning_rate": 7.700815429705761e-05, | |
| "loss": 0.3722, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.060718711276332, | |
| "grad_norm": 0.8830291052163628, | |
| "learning_rate": 7.695518130045147e-05, | |
| "loss": 0.3735, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0656753407682775, | |
| "grad_norm": 0.6334177235159056, | |
| "learning_rate": 7.690176199264617e-05, | |
| "loss": 0.3814, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.070631970260223, | |
| "grad_norm": 0.6128048872826931, | |
| "learning_rate": 7.684789701879173e-05, | |
| "loss": 0.3823, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0755885997521686, | |
| "grad_norm": 0.635130178101925, | |
| "learning_rate": 7.679358702942047e-05, | |
| "loss": 0.384, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.080545229244114, | |
| "grad_norm": 0.7130440719911276, | |
| "learning_rate": 7.673883268043927e-05, | |
| "loss": 0.3789, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.0855018587360594, | |
| "grad_norm": 0.8361125459029056, | |
| "learning_rate": 7.668363463312155e-05, | |
| "loss": 0.3756, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.090458488228005, | |
| "grad_norm": 1.0272028699052964, | |
| "learning_rate": 7.662799355409929e-05, | |
| "loss": 0.3794, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0954151177199505, | |
| "grad_norm": 0.959950576279975, | |
| "learning_rate": 7.657191011535505e-05, | |
| "loss": 0.3725, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.100371747211896, | |
| "grad_norm": 0.7562311795443518, | |
| "learning_rate": 7.65153849942138e-05, | |
| "loss": 0.3777, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1053283767038413, | |
| "grad_norm": 0.7638923008741431, | |
| "learning_rate": 7.645841887333472e-05, | |
| "loss": 0.377, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.110285006195787, | |
| "grad_norm": 0.754375385945502, | |
| "learning_rate": 7.640101244070304e-05, | |
| "loss": 0.3794, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.1152416356877324, | |
| "grad_norm": 0.7522586200297396, | |
| "learning_rate": 7.634316638962161e-05, | |
| "loss": 0.379, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.1201982651796778, | |
| "grad_norm": 0.700854901575676, | |
| "learning_rate": 7.628488141870266e-05, | |
| "loss": 0.3819, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.1251548946716232, | |
| "grad_norm": 0.825909383512544, | |
| "learning_rate": 7.622615823185925e-05, | |
| "loss": 0.379, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.1301115241635689, | |
| "grad_norm": 1.0754465578447554, | |
| "learning_rate": 7.616699753829681e-05, | |
| "loss": 0.3834, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.1350681536555143, | |
| "grad_norm": 1.1737988185703399, | |
| "learning_rate": 7.610740005250464e-05, | |
| "loss": 0.3826, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.1400247831474597, | |
| "grad_norm": 0.5785260728190653, | |
| "learning_rate": 7.604736649424714e-05, | |
| "loss": 0.3788, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1449814126394051, | |
| "grad_norm": 0.6586379291379977, | |
| "learning_rate": 7.598689758855525e-05, | |
| "loss": 0.38, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1499380421313508, | |
| "grad_norm": 0.7869399289494947, | |
| "learning_rate": 7.592599406571763e-05, | |
| "loss": 0.3802, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.1548946716232962, | |
| "grad_norm": 0.6729360063352638, | |
| "learning_rate": 7.586465666127187e-05, | |
| "loss": 0.3769, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1598513011152416, | |
| "grad_norm": 0.736581035090937, | |
| "learning_rate": 7.580288611599554e-05, | |
| "loss": 0.3824, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.164807930607187, | |
| "grad_norm": 0.6709797895211724, | |
| "learning_rate": 7.574068317589734e-05, | |
| "loss": 0.3775, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1697645600991327, | |
| "grad_norm": 0.6148419568958634, | |
| "learning_rate": 7.567804859220802e-05, | |
| "loss": 0.3776, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.174721189591078, | |
| "grad_norm": 0.6467262658722034, | |
| "learning_rate": 7.561498312137135e-05, | |
| "loss": 0.3755, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.1796778190830235, | |
| "grad_norm": 0.5461086867691934, | |
| "learning_rate": 7.555148752503495e-05, | |
| "loss": 0.3828, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.1846344485749691, | |
| "grad_norm": 0.4599155278425996, | |
| "learning_rate": 7.548756257004108e-05, | |
| "loss": 0.382, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.1895910780669146, | |
| "grad_norm": 0.46455442182053813, | |
| "learning_rate": 7.542320902841746e-05, | |
| "loss": 0.3768, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.19454770755886, | |
| "grad_norm": 0.34719408842324345, | |
| "learning_rate": 7.535842767736784e-05, | |
| "loss": 0.3768, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.1995043370508054, | |
| "grad_norm": 0.37653138917342843, | |
| "learning_rate": 7.52932192992627e-05, | |
| "loss": 0.376, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.2044609665427508, | |
| "grad_norm": 0.41966657762815895, | |
| "learning_rate": 7.522758468162975e-05, | |
| "loss": 0.3815, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2094175960346965, | |
| "grad_norm": 0.3618542297929767, | |
| "learning_rate": 7.516152461714445e-05, | |
| "loss": 0.3765, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2143742255266419, | |
| "grad_norm": 0.5657649815473641, | |
| "learning_rate": 7.509503990362039e-05, | |
| "loss": 0.378, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.2193308550185873, | |
| "grad_norm": 0.6892489845465257, | |
| "learning_rate": 7.502813134399974e-05, | |
| "loss": 0.3792, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.224287484510533, | |
| "grad_norm": 0.727166972122069, | |
| "learning_rate": 7.496079974634342e-05, | |
| "loss": 0.3745, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.2292441140024783, | |
| "grad_norm": 0.8477735977905793, | |
| "learning_rate": 7.48930459238215e-05, | |
| "loss": 0.3803, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.2342007434944238, | |
| "grad_norm": 1.1470825347340907, | |
| "learning_rate": 7.482487069470325e-05, | |
| "loss": 0.3783, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.2391573729863692, | |
| "grad_norm": 0.9004187144612258, | |
| "learning_rate": 7.475627488234733e-05, | |
| "loss": 0.3756, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2441140024783148, | |
| "grad_norm": 0.7971542884599199, | |
| "learning_rate": 7.46872593151918e-05, | |
| "loss": 0.3765, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2490706319702602, | |
| "grad_norm": 0.9413192899083336, | |
| "learning_rate": 7.461782482674417e-05, | |
| "loss": 0.3752, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2540272614622057, | |
| "grad_norm": 1.0620179749799983, | |
| "learning_rate": 7.454797225557129e-05, | |
| "loss": 0.3829, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.258983890954151, | |
| "grad_norm": 0.9188902988306181, | |
| "learning_rate": 7.44777024452892e-05, | |
| "loss": 0.3811, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2639405204460967, | |
| "grad_norm": 0.76176279520022, | |
| "learning_rate": 7.440701624455303e-05, | |
| "loss": 0.3781, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.2688971499380421, | |
| "grad_norm": 0.7176306699252524, | |
| "learning_rate": 7.433591450704667e-05, | |
| "loss": 0.3726, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.2738537794299876, | |
| "grad_norm": 0.8007478629584447, | |
| "learning_rate": 7.426439809147247e-05, | |
| "loss": 0.3776, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.2788104089219332, | |
| "grad_norm": 0.9843733510756626, | |
| "learning_rate": 7.419246786154094e-05, | |
| "loss": 0.3735, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.2837670384138786, | |
| "grad_norm": 0.9162149531984846, | |
| "learning_rate": 7.412012468596022e-05, | |
| "loss": 0.3806, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.288723667905824, | |
| "grad_norm": 0.6176604637207814, | |
| "learning_rate": 7.404736943842562e-05, | |
| "loss": 0.3769, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.2936802973977695, | |
| "grad_norm": 0.5205293597911049, | |
| "learning_rate": 7.397420299760911e-05, | |
| "loss": 0.3743, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.2986369268897149, | |
| "grad_norm": 0.49261144114942623, | |
| "learning_rate": 7.39006262471487e-05, | |
| "loss": 0.3758, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.3035935563816605, | |
| "grad_norm": 0.4679036077993508, | |
| "learning_rate": 7.38266400756377e-05, | |
| "loss": 0.379, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.308550185873606, | |
| "grad_norm": 0.5193217780337118, | |
| "learning_rate": 7.375224537661407e-05, | |
| "loss": 0.3731, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.3135068153655514, | |
| "grad_norm": 0.5536487287786134, | |
| "learning_rate": 7.367744304854958e-05, | |
| "loss": 0.3736, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.318463444857497, | |
| "grad_norm": 0.4743844504481324, | |
| "learning_rate": 7.360223399483897e-05, | |
| "loss": 0.3763, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.3234200743494424, | |
| "grad_norm": 0.45973522881542617, | |
| "learning_rate": 7.352661912378909e-05, | |
| "loss": 0.373, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.3283767038413878, | |
| "grad_norm": 0.48786805257589877, | |
| "learning_rate": 7.34505993486078e-05, | |
| "loss": 0.3779, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.46082909055767357, | |
| "learning_rate": 7.33741755873931e-05, | |
| "loss": 0.3726, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.3382899628252787, | |
| "grad_norm": 0.40480055483099847, | |
| "learning_rate": 7.329734876312192e-05, | |
| "loss": 0.3683, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.3432465923172243, | |
| "grad_norm": 0.5260288211761569, | |
| "learning_rate": 7.322011980363908e-05, | |
| "loss": 0.375, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.3482032218091697, | |
| "grad_norm": 0.8000585333697147, | |
| "learning_rate": 7.314248964164594e-05, | |
| "loss": 0.3812, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3531598513011152, | |
| "grad_norm": 1.1176749895674423, | |
| "learning_rate": 7.306445921468928e-05, | |
| "loss": 0.3784, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3581164807930608, | |
| "grad_norm": 0.8643033686238092, | |
| "learning_rate": 7.29860294651499e-05, | |
| "loss": 0.3765, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.3630731102850062, | |
| "grad_norm": 0.6961833389784987, | |
| "learning_rate": 7.290720134023128e-05, | |
| "loss": 0.3793, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.3680297397769516, | |
| "grad_norm": 0.8897368708021905, | |
| "learning_rate": 7.282797579194808e-05, | |
| "loss": 0.3764, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3729863692688973, | |
| "grad_norm": 1.157059505215818, | |
| "learning_rate": 7.27483537771147e-05, | |
| "loss": 0.3835, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.3779429987608427, | |
| "grad_norm": 0.9009906161404626, | |
| "learning_rate": 7.26683362573337e-05, | |
| "loss": 0.3766, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.3828996282527881, | |
| "grad_norm": 0.745181325850118, | |
| "learning_rate": 7.258792419898416e-05, | |
| "loss": 0.3759, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.3878562577447335, | |
| "grad_norm": 0.6749818636165081, | |
| "learning_rate": 7.250711857321012e-05, | |
| "loss": 0.3744, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.392812887236679, | |
| "grad_norm": 0.5923790849381074, | |
| "learning_rate": 7.24259203559087e-05, | |
| "loss": 0.3751, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.3977695167286246, | |
| "grad_norm": 0.8935001689053471, | |
| "learning_rate": 7.234433052771838e-05, | |
| "loss": 0.3773, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.40272614622057, | |
| "grad_norm": 1.0946320595396666, | |
| "learning_rate": 7.226235007400722e-05, | |
| "loss": 0.3729, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4076827757125154, | |
| "grad_norm": 0.6024341847073685, | |
| "learning_rate": 7.217997998486085e-05, | |
| "loss": 0.3725, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.412639405204461, | |
| "grad_norm": 0.4608665149912086, | |
| "learning_rate": 7.209722125507059e-05, | |
| "loss": 0.3716, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.4175960346964065, | |
| "grad_norm": 0.7816845006015856, | |
| "learning_rate": 7.20140748841214e-05, | |
| "loss": 0.3753, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.422552664188352, | |
| "grad_norm": 0.8396407484972205, | |
| "learning_rate": 7.19305418761798e-05, | |
| "loss": 0.3754, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.4275092936802973, | |
| "grad_norm": 0.6099391657297455, | |
| "learning_rate": 7.18466232400818e-05, | |
| "loss": 0.3693, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.4324659231722427, | |
| "grad_norm": 0.40684831008388006, | |
| "learning_rate": 7.176231998932067e-05, | |
| "loss": 0.3725, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.4374225526641884, | |
| "grad_norm": 0.42586071135075304, | |
| "learning_rate": 7.167763314203465e-05, | |
| "loss": 0.3744, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.4423791821561338, | |
| "grad_norm": 0.43053523382268793, | |
| "learning_rate": 7.159256372099481e-05, | |
| "loss": 0.3708, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.4473358116480792, | |
| "grad_norm": 0.4113030106637991, | |
| "learning_rate": 7.150711275359252e-05, | |
| "loss": 0.3766, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.4522924411400249, | |
| "grad_norm": 0.4387752897731675, | |
| "learning_rate": 7.142128127182714e-05, | |
| "loss": 0.3696, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4572490706319703, | |
| "grad_norm": 0.4738786254884929, | |
| "learning_rate": 7.133507031229358e-05, | |
| "loss": 0.3701, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.4622057001239157, | |
| "grad_norm": 0.5336855316791109, | |
| "learning_rate": 7.124848091616966e-05, | |
| "loss": 0.3766, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.4671623296158613, | |
| "grad_norm": 0.525640287895575, | |
| "learning_rate": 7.116151412920369e-05, | |
| "loss": 0.3691, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4721189591078068, | |
| "grad_norm": 0.6208954999535536, | |
| "learning_rate": 7.107417100170171e-05, | |
| "loss": 0.3735, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.4770755885997522, | |
| "grad_norm": 0.6750123425630558, | |
| "learning_rate": 7.098645258851491e-05, | |
| "loss": 0.3732, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.4820322180916976, | |
| "grad_norm": 0.6877081965958101, | |
| "learning_rate": 7.089835994902679e-05, | |
| "loss": 0.3698, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.486988847583643, | |
| "grad_norm": 0.7156301301268085, | |
| "learning_rate": 7.080989414714045e-05, | |
| "loss": 0.3741, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.4919454770755887, | |
| "grad_norm": 0.7147095744659843, | |
| "learning_rate": 7.072105625126568e-05, | |
| "loss": 0.3678, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.496902106567534, | |
| "grad_norm": 0.7610853993131405, | |
| "learning_rate": 7.063184733430615e-05, | |
| "loss": 0.3697, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.5018587360594795, | |
| "grad_norm": 0.9061730193624111, | |
| "learning_rate": 7.054226847364627e-05, | |
| "loss": 0.3756, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.5068153655514251, | |
| "grad_norm": 1.0523239987620783, | |
| "learning_rate": 7.045232075113841e-05, | |
| "loss": 0.3746, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.5117719950433703, | |
| "grad_norm": 0.9900385124301968, | |
| "learning_rate": 7.036200525308966e-05, | |
| "loss": 0.3686, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.516728624535316, | |
| "grad_norm": 0.9815673674275408, | |
| "learning_rate": 7.02713230702488e-05, | |
| "loss": 0.3746, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.5216852540272616, | |
| "grad_norm": 0.84313540646779, | |
| "learning_rate": 7.018027529779302e-05, | |
| "loss": 0.3728, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.5266418835192068, | |
| "grad_norm": 0.4618784111325805, | |
| "learning_rate": 7.00888630353149e-05, | |
| "loss": 0.3798, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.5315985130111525, | |
| "grad_norm": 0.41703424984720067, | |
| "learning_rate": 6.999708738680886e-05, | |
| "loss": 0.3689, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.5365551425030979, | |
| "grad_norm": 0.5913215667695161, | |
| "learning_rate": 6.99049494606581e-05, | |
| "loss": 0.372, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.5415117719950433, | |
| "grad_norm": 0.697528011758827, | |
| "learning_rate": 6.981245036962098e-05, | |
| "loss": 0.3741, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.546468401486989, | |
| "grad_norm": 0.5699378604300869, | |
| "learning_rate": 6.971959123081771e-05, | |
| "loss": 0.3723, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.5514250309789344, | |
| "grad_norm": 0.4182368238977319, | |
| "learning_rate": 6.962637316571687e-05, | |
| "loss": 0.3715, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.5563816604708798, | |
| "grad_norm": 0.37941077478326063, | |
| "learning_rate": 6.95327973001218e-05, | |
| "loss": 0.3676, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5613382899628254, | |
| "grad_norm": 0.46011385805195815, | |
| "learning_rate": 6.943886476415699e-05, | |
| "loss": 0.3706, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5662949194547706, | |
| "grad_norm": 0.5885696797756296, | |
| "learning_rate": 6.934457669225454e-05, | |
| "loss": 0.375, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.5712515489467163, | |
| "grad_norm": 0.6601349168571249, | |
| "learning_rate": 6.924993422314035e-05, | |
| "loss": 0.3754, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.5762081784386617, | |
| "grad_norm": 0.746944923527877, | |
| "learning_rate": 6.915493849982041e-05, | |
| "loss": 0.3695, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.581164807930607, | |
| "grad_norm": 0.7143244562614945, | |
| "learning_rate": 6.905959066956703e-05, | |
| "loss": 0.3723, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.5861214374225527, | |
| "grad_norm": 0.4858977001025183, | |
| "learning_rate": 6.896389188390488e-05, | |
| "loss": 0.3661, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.5910780669144982, | |
| "grad_norm": 0.3903518503904867, | |
| "learning_rate": 6.886784329859722e-05, | |
| "loss": 0.3659, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.5960346964064436, | |
| "grad_norm": 0.5780590658779307, | |
| "learning_rate": 6.877144607363183e-05, | |
| "loss": 0.368, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.6009913258983892, | |
| "grad_norm": 0.6094396527936917, | |
| "learning_rate": 6.867470137320707e-05, | |
| "loss": 0.3751, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.6059479553903344, | |
| "grad_norm": 0.5299948076513158, | |
| "learning_rate": 6.857761036571778e-05, | |
| "loss": 0.3701, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.61090458488228, | |
| "grad_norm": 0.32554048958176607, | |
| "learning_rate": 6.84801742237412e-05, | |
| "loss": 0.3658, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.6158612143742255, | |
| "grad_norm": 0.3917198928259944, | |
| "learning_rate": 6.838239412402275e-05, | |
| "loss": 0.3663, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.620817843866171, | |
| "grad_norm": 0.580429864868858, | |
| "learning_rate": 6.828427124746191e-05, | |
| "loss": 0.3711, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.6257744733581165, | |
| "grad_norm": 0.4122977155429013, | |
| "learning_rate": 6.818580677909787e-05, | |
| "loss": 0.3667, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.630731102850062, | |
| "grad_norm": 0.3311640599067616, | |
| "learning_rate": 6.808700190809529e-05, | |
| "loss": 0.3653, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.6356877323420074, | |
| "grad_norm": 0.4555478935608727, | |
| "learning_rate": 6.798785782772987e-05, | |
| "loss": 0.3695, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.640644361833953, | |
| "grad_norm": 0.5386186943934842, | |
| "learning_rate": 6.7888375735374e-05, | |
| "loss": 0.3715, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.6456009913258984, | |
| "grad_norm": 0.5119260829822706, | |
| "learning_rate": 6.778855683248226e-05, | |
| "loss": 0.3678, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.6505576208178439, | |
| "grad_norm": 0.4174256955489929, | |
| "learning_rate": 6.768840232457691e-05, | |
| "loss": 0.3701, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.6555142503097895, | |
| "grad_norm": 0.4330935030350318, | |
| "learning_rate": 6.75879134212334e-05, | |
| "loss": 0.3647, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.6604708798017347, | |
| "grad_norm": 0.4977143818569726, | |
| "learning_rate": 6.748709133606562e-05, | |
| "loss": 0.3645, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.6654275092936803, | |
| "grad_norm": 0.4632344110293173, | |
| "learning_rate": 6.738593728671141e-05, | |
| "loss": 0.3688, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.6703841387856257, | |
| "grad_norm": 0.4343232693352364, | |
| "learning_rate": 6.72844524948177e-05, | |
| "loss": 0.3716, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.6753407682775712, | |
| "grad_norm": 0.5497224652685649, | |
| "learning_rate": 6.718263818602587e-05, | |
| "loss": 0.3687, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.6802973977695168, | |
| "grad_norm": 0.6580306405235301, | |
| "learning_rate": 6.708049558995692e-05, | |
| "loss": 0.3668, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.6852540272614622, | |
| "grad_norm": 0.6272479044564085, | |
| "learning_rate": 6.697802594019659e-05, | |
| "loss": 0.3736, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.6902106567534076, | |
| "grad_norm": 0.49972287263472315, | |
| "learning_rate": 6.687523047428043e-05, | |
| "loss": 0.3721, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.6951672862453533, | |
| "grad_norm": 0.4860929606946379, | |
| "learning_rate": 6.677211043367898e-05, | |
| "loss": 0.3691, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.7001239157372985, | |
| "grad_norm": 0.5528988110266627, | |
| "learning_rate": 6.666866706378268e-05, | |
| "loss": 0.3663, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.7050805452292441, | |
| "grad_norm": 0.5931778952264959, | |
| "learning_rate": 6.65649016138868e-05, | |
| "loss": 0.367, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.7100371747211895, | |
| "grad_norm": 0.6219580459440871, | |
| "learning_rate": 6.646081533717647e-05, | |
| "loss": 0.3709, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.714993804213135, | |
| "grad_norm": 0.62122822614519, | |
| "learning_rate": 6.635640949071141e-05, | |
| "loss": 0.3682, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.7199504337050806, | |
| "grad_norm": 0.6383264360282018, | |
| "learning_rate": 6.625168533541086e-05, | |
| "loss": 0.3759, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.724907063197026, | |
| "grad_norm": 0.7086745256179291, | |
| "learning_rate": 6.614664413603826e-05, | |
| "loss": 0.3743, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.7298636926889714, | |
| "grad_norm": 0.7914076334715233, | |
| "learning_rate": 6.60412871611861e-05, | |
| "loss": 0.366, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.734820322180917, | |
| "grad_norm": 0.6800636113803615, | |
| "learning_rate": 6.593561568326045e-05, | |
| "loss": 0.3653, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.7397769516728625, | |
| "grad_norm": 0.45351689701921466, | |
| "learning_rate": 6.582963097846567e-05, | |
| "loss": 0.3671, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.744733581164808, | |
| "grad_norm": 0.46229129788058876, | |
| "learning_rate": 6.572333432678901e-05, | |
| "loss": 0.3675, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.7496902106567536, | |
| "grad_norm": 0.5621458743860188, | |
| "learning_rate": 6.561672701198515e-05, | |
| "loss": 0.3709, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.7546468401486988, | |
| "grad_norm": 0.6591816312736469, | |
| "learning_rate": 6.550981032156064e-05, | |
| "loss": 0.3678, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.7596034696406444, | |
| "grad_norm": 0.858815006302805, | |
| "learning_rate": 6.540258554675837e-05, | |
| "loss": 0.3687, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.7645600991325898, | |
| "grad_norm": 1.0641612393607265, | |
| "learning_rate": 6.529505398254209e-05, | |
| "loss": 0.3682, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.7695167286245352, | |
| "grad_norm": 0.8476056220579151, | |
| "learning_rate": 6.518721692758056e-05, | |
| "loss": 0.3692, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.7744733581164809, | |
| "grad_norm": 0.5833411072292431, | |
| "learning_rate": 6.507907568423205e-05, | |
| "loss": 0.3727, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.7794299876084263, | |
| "grad_norm": 0.39389932942655925, | |
| "learning_rate": 6.497063155852853e-05, | |
| "loss": 0.373, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.7843866171003717, | |
| "grad_norm": 0.4232182241553237, | |
| "learning_rate": 6.48618858601599e-05, | |
| "loss": 0.3761, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.7893432465923174, | |
| "grad_norm": 0.6103087106854266, | |
| "learning_rate": 6.475283990245816e-05, | |
| "loss": 0.3695, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.7942998760842626, | |
| "grad_norm": 0.6002125693210419, | |
| "learning_rate": 6.464349500238162e-05, | |
| "loss": 0.3682, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.7992565055762082, | |
| "grad_norm": 0.4564158999485684, | |
| "learning_rate": 6.453385248049893e-05, | |
| "loss": 0.3689, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.8042131350681536, | |
| "grad_norm": 0.3973850442566215, | |
| "learning_rate": 6.44239136609731e-05, | |
| "loss": 0.3684, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.809169764560099, | |
| "grad_norm": 0.3722004844867567, | |
| "learning_rate": 6.431367987154561e-05, | |
| "loss": 0.3597, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.8141263940520447, | |
| "grad_norm": 0.36135251217138775, | |
| "learning_rate": 6.42031524435203e-05, | |
| "loss": 0.3666, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.81908302354399, | |
| "grad_norm": 0.42456457513430096, | |
| "learning_rate": 6.40923327117473e-05, | |
| "loss": 0.3692, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.8240396530359355, | |
| "grad_norm": 0.34018392891314136, | |
| "learning_rate": 6.398122201460694e-05, | |
| "loss": 0.3582, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.8289962825278812, | |
| "grad_norm": 0.34702535248917055, | |
| "learning_rate": 6.386982169399355e-05, | |
| "loss": 0.362, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.8339529120198264, | |
| "grad_norm": 0.37222768584462623, | |
| "learning_rate": 6.375813309529929e-05, | |
| "loss": 0.3693, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.838909541511772, | |
| "grad_norm": 0.37704314430735963, | |
| "learning_rate": 6.364615756739784e-05, | |
| "loss": 0.3644, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.8438661710037176, | |
| "grad_norm": 0.3614680716279667, | |
| "learning_rate": 6.353389646262823e-05, | |
| "loss": 0.3638, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.8488228004956628, | |
| "grad_norm": 0.3341638078127682, | |
| "learning_rate": 6.34213511367783e-05, | |
| "loss": 0.3635, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.8537794299876085, | |
| "grad_norm": 0.37396523466792747, | |
| "learning_rate": 6.330852294906861e-05, | |
| "loss": 0.3631, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.858736059479554, | |
| "grad_norm": 0.4237992579587528, | |
| "learning_rate": 6.319541326213573e-05, | |
| "loss": 0.3628, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.8636926889714993, | |
| "grad_norm": 0.40425255099410395, | |
| "learning_rate": 6.308202344201602e-05, | |
| "loss": 0.3653, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.868649318463445, | |
| "grad_norm": 0.3209955179999364, | |
| "learning_rate": 6.296835485812899e-05, | |
| "loss": 0.3692, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.8736059479553904, | |
| "grad_norm": 0.3253660638473066, | |
| "learning_rate": 6.285440888326082e-05, | |
| "loss": 0.3612, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.8785625774473358, | |
| "grad_norm": 0.3271459226873318, | |
| "learning_rate": 6.274018689354776e-05, | |
| "loss": 0.3706, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.8835192069392814, | |
| "grad_norm": 0.3149746962127161, | |
| "learning_rate": 6.262569026845949e-05, | |
| "loss": 0.3696, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.8884758364312266, | |
| "grad_norm": 0.30573145047857164, | |
| "learning_rate": 6.251092039078256e-05, | |
| "loss": 0.3675, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.8934324659231723, | |
| "grad_norm": 0.3047375886031863, | |
| "learning_rate": 6.239587864660354e-05, | |
| "loss": 0.3637, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.8983890954151177, | |
| "grad_norm": 0.3941393217498381, | |
| "learning_rate": 6.228056642529242e-05, | |
| "loss": 0.369, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.903345724907063, | |
| "grad_norm": 0.5259788343113935, | |
| "learning_rate": 6.216498511948572e-05, | |
| "loss": 0.3715, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.9083023543990087, | |
| "grad_norm": 0.6494740207626633, | |
| "learning_rate": 6.204913612506975e-05, | |
| "loss": 0.3606, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.9132589838909542, | |
| "grad_norm": 0.7041520852135289, | |
| "learning_rate": 6.193302084116368e-05, | |
| "loss": 0.3641, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.9182156133828996, | |
| "grad_norm": 0.7602483900224043, | |
| "learning_rate": 6.181664067010275e-05, | |
| "loss": 0.3621, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.9231722428748452, | |
| "grad_norm": 0.7899041502702561, | |
| "learning_rate": 6.169999701742118e-05, | |
| "loss": 0.3671, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.9281288723667904, | |
| "grad_norm": 0.7030444055558277, | |
| "learning_rate": 6.158309129183538e-05, | |
| "loss": 0.3656, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.933085501858736, | |
| "grad_norm": 0.6071958638842148, | |
| "learning_rate": 6.146592490522677e-05, | |
| "loss": 0.3659, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.9380421313506815, | |
| "grad_norm": 0.4728524200414151, | |
| "learning_rate": 6.134849927262481e-05, | |
| "loss": 0.3672, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.942998760842627, | |
| "grad_norm": 0.3446842094670103, | |
| "learning_rate": 6.123081581218992e-05, | |
| "loss": 0.3649, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.9479553903345725, | |
| "grad_norm": 0.4022174305496056, | |
| "learning_rate": 6.11128759451963e-05, | |
| "loss": 0.3673, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.952912019826518, | |
| "grad_norm": 0.5636096699586152, | |
| "learning_rate": 6.099468109601483e-05, | |
| "loss": 0.362, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.9578686493184634, | |
| "grad_norm": 0.6819652998130323, | |
| "learning_rate": 6.0876232692095794e-05, | |
| "loss": 0.3661, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.962825278810409, | |
| "grad_norm": 0.6365953298610997, | |
| "learning_rate": 6.075753216395172e-05, | |
| "loss": 0.3632, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.9677819083023544, | |
| "grad_norm": 0.43428749476968237, | |
| "learning_rate": 6.063858094514004e-05, | |
| "loss": 0.3636, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.9727385377942999, | |
| "grad_norm": 0.33548805713556684, | |
| "learning_rate": 6.051938047224582e-05, | |
| "loss": 0.3647, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.9776951672862455, | |
| "grad_norm": 0.41462999397741335, | |
| "learning_rate": 6.0399932184864356e-05, | |
| "loss": 0.3651, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.9826517967781907, | |
| "grad_norm": 0.48226204851258686, | |
| "learning_rate": 6.028023752558387e-05, | |
| "loss": 0.3614, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.9876084262701363, | |
| "grad_norm": 0.5751629936810563, | |
| "learning_rate": 6.0160297939968e-05, | |
| "loss": 0.3665, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.9925650557620818, | |
| "grad_norm": 0.6779156841941448, | |
| "learning_rate": 6.00401148765384e-05, | |
| "loss": 0.3645, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.9975216852540272, | |
| "grad_norm": 0.6950369897000113, | |
| "learning_rate": 5.9919689786757234e-05, | |
| "loss": 0.3954, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.002478314745973, | |
| "grad_norm": 0.9229607445545323, | |
| "learning_rate": 5.979902412500963e-05, | |
| "loss": 0.4126, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.007434944237918, | |
| "grad_norm": 1.2431445665943202, | |
| "learning_rate": 5.9678119348586115e-05, | |
| "loss": 0.3456, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.0123915737298637, | |
| "grad_norm": 0.648853154194552, | |
| "learning_rate": 5.955697691766507e-05, | |
| "loss": 0.3418, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.0173482032218093, | |
| "grad_norm": 0.579480891477055, | |
| "learning_rate": 5.943559829529501e-05, | |
| "loss": 0.334, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.0223048327137545, | |
| "grad_norm": 0.8551507180871017, | |
| "learning_rate": 5.931398494737696e-05, | |
| "loss": 0.3376, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.0272614622057, | |
| "grad_norm": 0.7020237179868006, | |
| "learning_rate": 5.9192138342646785e-05, | |
| "loss": 0.34, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.032218091697646, | |
| "grad_norm": 0.6195345824183058, | |
| "learning_rate": 5.907005995265735e-05, | |
| "loss": 0.3401, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.037174721189591, | |
| "grad_norm": 0.5772527139168068, | |
| "learning_rate": 5.894775125176087e-05, | |
| "loss": 0.3292, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.0421313506815366, | |
| "grad_norm": 0.5021265872507295, | |
| "learning_rate": 5.882521371709102e-05, | |
| "loss": 0.3381, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.047087980173482, | |
| "grad_norm": 0.5894812915546477, | |
| "learning_rate": 5.870244882854513e-05, | |
| "loss": 0.3374, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.0520446096654275, | |
| "grad_norm": 0.5506092063644734, | |
| "learning_rate": 5.857945806876631e-05, | |
| "loss": 0.3362, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.057001239157373, | |
| "grad_norm": 0.3460835982963106, | |
| "learning_rate": 5.845624292312551e-05, | |
| "loss": 0.3314, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.0619578686493183, | |
| "grad_norm": 0.49475849318415527, | |
| "learning_rate": 5.8332804879703634e-05, | |
| "loss": 0.3312, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.066914498141264, | |
| "grad_norm": 0.40797927085998376, | |
| "learning_rate": 5.820914542927356e-05, | |
| "loss": 0.3375, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.0718711276332096, | |
| "grad_norm": 0.46256125838881207, | |
| "learning_rate": 5.808526606528207e-05, | |
| "loss": 0.3385, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.0768277571251548, | |
| "grad_norm": 0.43520996500621334, | |
| "learning_rate": 5.7961168283831914e-05, | |
| "loss": 0.3361, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.0817843866171004, | |
| "grad_norm": 0.3275995772725094, | |
| "learning_rate": 5.783685358366363e-05, | |
| "loss": 0.3329, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.086741016109046, | |
| "grad_norm": 0.3508599652724197, | |
| "learning_rate": 5.771232346613759e-05, | |
| "loss": 0.3387, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.0916976456009913, | |
| "grad_norm": 0.33993733778247687, | |
| "learning_rate": 5.758757943521568e-05, | |
| "loss": 0.3381, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.096654275092937, | |
| "grad_norm": 0.3779989543335282, | |
| "learning_rate": 5.746262299744333e-05, | |
| "loss": 0.3368, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.101610904584882, | |
| "grad_norm": 0.4056520143137526, | |
| "learning_rate": 5.7337455661931184e-05, | |
| "loss": 0.3296, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.1065675340768277, | |
| "grad_norm": 0.2829188187459132, | |
| "learning_rate": 5.72120789403369e-05, | |
| "loss": 0.3346, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.1115241635687734, | |
| "grad_norm": 0.28438834763470383, | |
| "learning_rate": 5.7086494346846954e-05, | |
| "loss": 0.333, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.1164807930607186, | |
| "grad_norm": 0.2811433639980777, | |
| "learning_rate": 5.6960703398158265e-05, | |
| "loss": 0.3344, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.121437422552664, | |
| "grad_norm": 0.264872240951993, | |
| "learning_rate": 5.683470761345997e-05, | |
| "loss": 0.336, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.12639405204461, | |
| "grad_norm": 0.32094685957368546, | |
| "learning_rate": 5.670850851441499e-05, | |
| "loss": 0.335, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.131350681536555, | |
| "grad_norm": 0.29696749695480423, | |
| "learning_rate": 5.6582107625141724e-05, | |
| "loss": 0.3344, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.1363073110285007, | |
| "grad_norm": 0.31226224047135687, | |
| "learning_rate": 5.6455506472195584e-05, | |
| "loss": 0.3403, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.141263940520446, | |
| "grad_norm": 0.3100881935692483, | |
| "learning_rate": 5.6328706584550615e-05, | |
| "loss": 0.3329, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.1462205700123915, | |
| "grad_norm": 0.2909272332350804, | |
| "learning_rate": 5.6201709493580964e-05, | |
| "loss": 0.3329, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.151177199504337, | |
| "grad_norm": 0.3860693343638538, | |
| "learning_rate": 5.607451673304245e-05, | |
| "loss": 0.3386, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.1561338289962824, | |
| "grad_norm": 0.3471498999682872, | |
| "learning_rate": 5.5947129839053996e-05, | |
| "loss": 0.3316, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.161090458488228, | |
| "grad_norm": 0.30300923166845395, | |
| "learning_rate": 5.5819550350079096e-05, | |
| "loss": 0.3335, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.1660470879801736, | |
| "grad_norm": 0.29394820981141223, | |
| "learning_rate": 5.5691779806907245e-05, | |
| "loss": 0.338, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.171003717472119, | |
| "grad_norm": 0.34673047871654705, | |
| "learning_rate": 5.556381975263529e-05, | |
| "loss": 0.3347, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.1759603469640645, | |
| "grad_norm": 0.3160177032904872, | |
| "learning_rate": 5.543567173264885e-05, | |
| "loss": 0.3355, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.18091697645601, | |
| "grad_norm": 0.22496198223456335, | |
| "learning_rate": 5.5307337294603595e-05, | |
| "loss": 0.3338, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.1858736059479553, | |
| "grad_norm": 0.2614030147556954, | |
| "learning_rate": 5.517881798840662e-05, | |
| "loss": 0.333, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.190830235439901, | |
| "grad_norm": 0.2707422405401144, | |
| "learning_rate": 5.505011536619766e-05, | |
| "loss": 0.335, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.195786864931846, | |
| "grad_norm": 0.24192751386308842, | |
| "learning_rate": 5.4921230982330374e-05, | |
| "loss": 0.3341, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.200743494423792, | |
| "grad_norm": 0.24976556710707468, | |
| "learning_rate": 5.479216639335361e-05, | |
| "loss": 0.3321, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.2057001239157374, | |
| "grad_norm": 0.24224759625348416, | |
| "learning_rate": 5.466292315799252e-05, | |
| "loss": 0.3328, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.2106567534076826, | |
| "grad_norm": 0.26176016864712703, | |
| "learning_rate": 5.453350283712982e-05, | |
| "loss": 0.3398, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.2156133828996283, | |
| "grad_norm": 0.2637805491990165, | |
| "learning_rate": 5.4403906993786886e-05, | |
| "loss": 0.3395, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.220570012391574, | |
| "grad_norm": 0.24268721259176745, | |
| "learning_rate": 5.4274137193104915e-05, | |
| "loss": 0.3349, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.225526641883519, | |
| "grad_norm": 0.24270028247035141, | |
| "learning_rate": 5.4144195002325975e-05, | |
| "loss": 0.335, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.2304832713754648, | |
| "grad_norm": 0.3093752787269832, | |
| "learning_rate": 5.401408199077413e-05, | |
| "loss": 0.3386, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.23543990086741, | |
| "grad_norm": 0.3330988364901497, | |
| "learning_rate": 5.3883799729836464e-05, | |
| "loss": 0.3377, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.2403965303593556, | |
| "grad_norm": 0.3445158375784873, | |
| "learning_rate": 5.3753349792944066e-05, | |
| "loss": 0.3332, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.2453531598513012, | |
| "grad_norm": 0.42889232065694805, | |
| "learning_rate": 5.3622733755553126e-05, | |
| "loss": 0.3386, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.2503097893432464, | |
| "grad_norm": 0.36669493271134557, | |
| "learning_rate": 5.349195319512577e-05, | |
| "loss": 0.34, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.255266418835192, | |
| "grad_norm": 0.2651829328711018, | |
| "learning_rate": 5.3361009691111154e-05, | |
| "loss": 0.3326, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.2602230483271377, | |
| "grad_norm": 0.21243536113292363, | |
| "learning_rate": 5.322990482492625e-05, | |
| "loss": 0.3289, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.265179677819083, | |
| "grad_norm": 0.23488873689796186, | |
| "learning_rate": 5.30986401799369e-05, | |
| "loss": 0.3298, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.2701363073110286, | |
| "grad_norm": 0.28278539825982185, | |
| "learning_rate": 5.296721734143854e-05, | |
| "loss": 0.335, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.2750929368029738, | |
| "grad_norm": 0.28374872230764553, | |
| "learning_rate": 5.2835637896637115e-05, | |
| "loss": 0.3338, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.2800495662949194, | |
| "grad_norm": 0.28778231594332443, | |
| "learning_rate": 5.2703903434630024e-05, | |
| "loss": 0.3358, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.285006195786865, | |
| "grad_norm": 0.2821746117801878, | |
| "learning_rate": 5.257201554638668e-05, | |
| "loss": 0.335, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.2899628252788102, | |
| "grad_norm": 0.22260019555120894, | |
| "learning_rate": 5.243997582472956e-05, | |
| "loss": 0.3321, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.294919454770756, | |
| "grad_norm": 0.23359117637012264, | |
| "learning_rate": 5.2307785864314764e-05, | |
| "loss": 0.3341, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.2998760842627015, | |
| "grad_norm": 0.2655316000215375, | |
| "learning_rate": 5.21754472616129e-05, | |
| "loss": 0.3409, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.3048327137546467, | |
| "grad_norm": 0.2670026997209511, | |
| "learning_rate": 5.204296161488968e-05, | |
| "loss": 0.3377, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.3097893432465924, | |
| "grad_norm": 0.1999091301337533, | |
| "learning_rate": 5.1910330524186745e-05, | |
| "loss": 0.3384, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.314745972738538, | |
| "grad_norm": 0.2517364041892872, | |
| "learning_rate": 5.1777555591302236e-05, | |
| "loss": 0.34, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.319702602230483, | |
| "grad_norm": 0.24227717495907994, | |
| "learning_rate": 5.164463841977151e-05, | |
| "loss": 0.3392, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.324659231722429, | |
| "grad_norm": 0.24877649271423527, | |
| "learning_rate": 5.151158061484774e-05, | |
| "loss": 0.3377, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.329615861214374, | |
| "grad_norm": 0.18945359917741403, | |
| "learning_rate": 5.137838378348255e-05, | |
| "loss": 0.3436, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.3345724907063197, | |
| "grad_norm": 0.19893332403593147, | |
| "learning_rate": 5.124504953430658e-05, | |
| "loss": 0.3376, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.3395291201982653, | |
| "grad_norm": 0.2137047502635654, | |
| "learning_rate": 5.111157947761012e-05, | |
| "loss": 0.3289, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.3444857496902105, | |
| "grad_norm": 0.25186226963289404, | |
| "learning_rate": 5.097797522532356e-05, | |
| "loss": 0.3315, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.349442379182156, | |
| "grad_norm": 0.255051673508901, | |
| "learning_rate": 5.084423839099805e-05, | |
| "loss": 0.339, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.354399008674102, | |
| "grad_norm": 0.20323499501401712, | |
| "learning_rate": 5.071037058978589e-05, | |
| "loss": 0.3353, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.359355638166047, | |
| "grad_norm": 0.20801901587197655, | |
| "learning_rate": 5.0576373438421115e-05, | |
| "loss": 0.3387, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.3643122676579926, | |
| "grad_norm": 0.22381819606938655, | |
| "learning_rate": 5.044224855519991e-05, | |
| "loss": 0.3319, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.3692688971499383, | |
| "grad_norm": 0.2196511953550138, | |
| "learning_rate": 5.030799755996111e-05, | |
| "loss": 0.3307, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.3742255266418835, | |
| "grad_norm": 0.22301886381698216, | |
| "learning_rate": 5.0173622074066604e-05, | |
| "loss": 0.3364, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.379182156133829, | |
| "grad_norm": 0.2051336333121646, | |
| "learning_rate": 5.0039123720381765e-05, | |
| "loss": 0.337, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.3841387856257743, | |
| "grad_norm": 0.20300166533913025, | |
| "learning_rate": 4.990450412325586e-05, | |
| "loss": 0.3306, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.38909541511772, | |
| "grad_norm": 0.2374307593924014, | |
| "learning_rate": 4.9769764908502413e-05, | |
| "loss": 0.3428, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.3940520446096656, | |
| "grad_norm": 0.21175057446827547, | |
| "learning_rate": 4.96349077033796e-05, | |
| "loss": 0.3373, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.399008674101611, | |
| "grad_norm": 0.20563440127736773, | |
| "learning_rate": 4.949993413657057e-05, | |
| "loss": 0.3319, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.4039653035935564, | |
| "grad_norm": 0.22371247404771757, | |
| "learning_rate": 4.936484583816376e-05, | |
| "loss": 0.3383, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.4089219330855016, | |
| "grad_norm": 0.20615797475462871, | |
| "learning_rate": 4.922964443963326e-05, | |
| "loss": 0.3376, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.4138785625774473, | |
| "grad_norm": 0.22893533763150797, | |
| "learning_rate": 4.9094331573819096e-05, | |
| "loss": 0.337, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.418835192069393, | |
| "grad_norm": 0.23517939853671133, | |
| "learning_rate": 4.895890887490743e-05, | |
| "loss": 0.3356, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.423791821561338, | |
| "grad_norm": 0.20057429424724502, | |
| "learning_rate": 4.8823377978410964e-05, | |
| "loss": 0.3376, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.4287484510532837, | |
| "grad_norm": 0.21919829961802664, | |
| "learning_rate": 4.8687740521149056e-05, | |
| "loss": 0.3337, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.4337050805452294, | |
| "grad_norm": 0.26014242642752583, | |
| "learning_rate": 4.855199814122804e-05, | |
| "loss": 0.3352, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.4386617100371746, | |
| "grad_norm": 0.2584940458127453, | |
| "learning_rate": 4.84161524780214e-05, | |
| "loss": 0.3288, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.44361833952912, | |
| "grad_norm": 0.24674245573299244, | |
| "learning_rate": 4.828020517214997e-05, | |
| "loss": 0.3303, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.448574969021066, | |
| "grad_norm": 0.3158488718882322, | |
| "learning_rate": 4.8144157865462176e-05, | |
| "loss": 0.3404, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.453531598513011, | |
| "grad_norm": 0.3413638205153234, | |
| "learning_rate": 4.8008012201014096e-05, | |
| "loss": 0.3333, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.4584882280049567, | |
| "grad_norm": 0.37582492333886947, | |
| "learning_rate": 4.787176982304975e-05, | |
| "loss": 0.3282, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.463444857496902, | |
| "grad_norm": 0.3793384775099229, | |
| "learning_rate": 4.7735432376981125e-05, | |
| "loss": 0.3381, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.4684014869888475, | |
| "grad_norm": 0.2816384401529513, | |
| "learning_rate": 4.759900150936839e-05, | |
| "loss": 0.3322, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.473358116480793, | |
| "grad_norm": 0.24741234458553119, | |
| "learning_rate": 4.7462478867899944e-05, | |
| "loss": 0.3338, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.4783147459727384, | |
| "grad_norm": 0.22710242981496517, | |
| "learning_rate": 4.7325866101372585e-05, | |
| "loss": 0.3351, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.483271375464684, | |
| "grad_norm": 0.23327941149354073, | |
| "learning_rate": 4.7189164859671526e-05, | |
| "loss": 0.3355, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.4882280049566297, | |
| "grad_norm": 0.25610086026152373, | |
| "learning_rate": 4.705237679375052e-05, | |
| "loss": 0.333, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.493184634448575, | |
| "grad_norm": 0.29590363645911516, | |
| "learning_rate": 4.69155035556119e-05, | |
| "loss": 0.3352, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.4981412639405205, | |
| "grad_norm": 0.3296002633563399, | |
| "learning_rate": 4.6778546798286633e-05, | |
| "loss": 0.3362, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.503097893432466, | |
| "grad_norm": 0.2633441308891435, | |
| "learning_rate": 4.664150817581435e-05, | |
| "loss": 0.3295, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.5080545229244113, | |
| "grad_norm": 0.2003652549018511, | |
| "learning_rate": 4.650438934322337e-05, | |
| "loss": 0.3344, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.513011152416357, | |
| "grad_norm": 0.2298644200478977, | |
| "learning_rate": 4.6367191956510764e-05, | |
| "loss": 0.332, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.517967781908302, | |
| "grad_norm": 0.23379538562070729, | |
| "learning_rate": 4.622991767262222e-05, | |
| "loss": 0.3366, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.522924411400248, | |
| "grad_norm": 0.3007388205665313, | |
| "learning_rate": 4.60925681494322e-05, | |
| "loss": 0.336, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.5278810408921935, | |
| "grad_norm": 0.27981497022974244, | |
| "learning_rate": 4.595514504572382e-05, | |
| "loss": 0.3348, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.5328376703841387, | |
| "grad_norm": 0.20408295200827142, | |
| "learning_rate": 4.58176500211688e-05, | |
| "loss": 0.3374, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.5377942998760843, | |
| "grad_norm": 0.19246516876902225, | |
| "learning_rate": 4.568008473630749e-05, | |
| "loss": 0.3342, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.5427509293680295, | |
| "grad_norm": 0.23233073687830116, | |
| "learning_rate": 4.5542450852528764e-05, | |
| "loss": 0.3359, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.547707558859975, | |
| "grad_norm": 0.22140903549459723, | |
| "learning_rate": 4.5404750032049984e-05, | |
| "loss": 0.3381, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.5526641883519208, | |
| "grad_norm": 0.22974445394924758, | |
| "learning_rate": 4.526698393789691e-05, | |
| "loss": 0.3353, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.5576208178438664, | |
| "grad_norm": 0.30986295920934853, | |
| "learning_rate": 4.51291542338836e-05, | |
| "loss": 0.3405, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.5625774473358116, | |
| "grad_norm": 0.2719615696911325, | |
| "learning_rate": 4.499126258459235e-05, | |
| "loss": 0.3355, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.5675340768277573, | |
| "grad_norm": 0.16191374687101495, | |
| "learning_rate": 4.4853310655353586e-05, | |
| "loss": 0.3352, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.5724907063197024, | |
| "grad_norm": 0.2532907979488151, | |
| "learning_rate": 4.471530011222572e-05, | |
| "loss": 0.3337, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.577447335811648, | |
| "grad_norm": 0.23967557439949316, | |
| "learning_rate": 4.457723262197506e-05, | |
| "loss": 0.3377, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.5824039653035937, | |
| "grad_norm": 0.1912898551193947, | |
| "learning_rate": 4.443910985205566e-05, | |
| "loss": 0.3278, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.587360594795539, | |
| "grad_norm": 0.19961800458113116, | |
| "learning_rate": 4.430093347058921e-05, | |
| "loss": 0.3364, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.5923172242874846, | |
| "grad_norm": 0.26900359284803255, | |
| "learning_rate": 4.416270514634485e-05, | |
| "loss": 0.3351, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.5972738537794298, | |
| "grad_norm": 0.21472946546241412, | |
| "learning_rate": 4.402442654871905e-05, | |
| "loss": 0.3314, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.6022304832713754, | |
| "grad_norm": 0.1872384290524764, | |
| "learning_rate": 4.388609934771544e-05, | |
| "loss": 0.3385, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.607187112763321, | |
| "grad_norm": 0.24976194756574016, | |
| "learning_rate": 4.374772521392463e-05, | |
| "loss": 0.3368, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.6121437422552667, | |
| "grad_norm": 0.2249642971113071, | |
| "learning_rate": 4.360930581850405e-05, | |
| "loss": 0.3392, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.617100371747212, | |
| "grad_norm": 0.2054171586674057, | |
| "learning_rate": 4.347084283315773e-05, | |
| "loss": 0.3374, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.6220570012391575, | |
| "grad_norm": 0.1827910858801494, | |
| "learning_rate": 4.333233793011619e-05, | |
| "loss": 0.3322, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.6270136307311027, | |
| "grad_norm": 0.1846393876427912, | |
| "learning_rate": 4.3193792782116164e-05, | |
| "loss": 0.3348, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.6319702602230484, | |
| "grad_norm": 0.2422856985597145, | |
| "learning_rate": 4.305520906238041e-05, | |
| "loss": 0.3327, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.636926889714994, | |
| "grad_norm": 0.23896322328508743, | |
| "learning_rate": 4.291658844459757e-05, | |
| "loss": 0.3338, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.641883519206939, | |
| "grad_norm": 0.18112180383848728, | |
| "learning_rate": 4.277793260290183e-05, | |
| "loss": 0.3364, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.646840148698885, | |
| "grad_norm": 0.23428378261560656, | |
| "learning_rate": 4.2639243211852895e-05, | |
| "loss": 0.3333, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.65179677819083, | |
| "grad_norm": 0.19183119613946728, | |
| "learning_rate": 4.2500521946415514e-05, | |
| "loss": 0.3365, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.6567534076827757, | |
| "grad_norm": 0.20154211295704755, | |
| "learning_rate": 4.23617704819395e-05, | |
| "loss": 0.3336, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.6617100371747213, | |
| "grad_norm": 0.2427391083128824, | |
| "learning_rate": 4.222299049413932e-05, | |
| "loss": 0.3324, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.18062474951109844, | |
| "learning_rate": 4.208418365907393e-05, | |
| "loss": 0.3347, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.671623296158612, | |
| "grad_norm": 0.2305349411435518, | |
| "learning_rate": 4.194535165312653e-05, | |
| "loss": 0.333, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.6765799256505574, | |
| "grad_norm": 0.21312698762796153, | |
| "learning_rate": 4.1806496152984304e-05, | |
| "loss": 0.3305, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.681536555142503, | |
| "grad_norm": 0.1818189463567409, | |
| "learning_rate": 4.16676188356182e-05, | |
| "loss": 0.3336, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.6864931846344486, | |
| "grad_norm": 0.19922786477591123, | |
| "learning_rate": 4.1528721378262605e-05, | |
| "loss": 0.3282, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.6914498141263943, | |
| "grad_norm": 0.18034983700167911, | |
| "learning_rate": 4.138980545839522e-05, | |
| "loss": 0.3345, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.6964064436183395, | |
| "grad_norm": 0.19797325344913994, | |
| "learning_rate": 4.125087275371661e-05, | |
| "loss": 0.3354, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.701363073110285, | |
| "grad_norm": 0.20423478730918823, | |
| "learning_rate": 4.1111924942130164e-05, | |
| "loss": 0.3339, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.7063197026022303, | |
| "grad_norm": 0.19187739598428746, | |
| "learning_rate": 4.097296370172164e-05, | |
| "loss": 0.3317, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.711276332094176, | |
| "grad_norm": 0.18918569677735037, | |
| "learning_rate": 4.083399071073902e-05, | |
| "loss": 0.3327, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.7162329615861216, | |
| "grad_norm": 0.2165189596882268, | |
| "learning_rate": 4.069500764757217e-05, | |
| "loss": 0.3311, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.721189591078067, | |
| "grad_norm": 0.1957402774919667, | |
| "learning_rate": 4.055601619073261e-05, | |
| "loss": 0.3354, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.7261462205700124, | |
| "grad_norm": 0.17969490125915344, | |
| "learning_rate": 4.041701801883324e-05, | |
| "loss": 0.3314, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.7311028500619576, | |
| "grad_norm": 0.22689675751479285, | |
| "learning_rate": 4.0278014810568045e-05, | |
| "loss": 0.3332, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.7360594795539033, | |
| "grad_norm": 0.1755924105874963, | |
| "learning_rate": 4.0139008244691845e-05, | |
| "loss": 0.3322, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.741016109045849, | |
| "grad_norm": 0.19129031130715107, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3418, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.7459727385377946, | |
| "grad_norm": 0.17689484335846134, | |
| "learning_rate": 3.986099175530817e-05, | |
| "loss": 0.3309, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.7509293680297398, | |
| "grad_norm": 0.15442290795456226, | |
| "learning_rate": 3.9721985189431975e-05, | |
| "loss": 0.33, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.7558859975216854, | |
| "grad_norm": 0.21388356391908783, | |
| "learning_rate": 3.958298198116677e-05, | |
| "loss": 0.3385, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.7608426270136306, | |
| "grad_norm": 0.1684374241371316, | |
| "learning_rate": 3.9443983809267405e-05, | |
| "loss": 0.339, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.7657992565055762, | |
| "grad_norm": 0.20515582530430282, | |
| "learning_rate": 3.930499235242783e-05, | |
| "loss": 0.3327, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.770755885997522, | |
| "grad_norm": 0.22058261160844722, | |
| "learning_rate": 3.9166009289260995e-05, | |
| "loss": 0.3369, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.775712515489467, | |
| "grad_norm": 0.18346028180253854, | |
| "learning_rate": 3.9027036298278366e-05, | |
| "loss": 0.3344, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.7806691449814127, | |
| "grad_norm": 0.16762816915906323, | |
| "learning_rate": 3.888807505786984e-05, | |
| "loss": 0.3323, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.785625774473358, | |
| "grad_norm": 0.1990414852782902, | |
| "learning_rate": 3.8749127246283386e-05, | |
| "loss": 0.3334, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.7905824039653035, | |
| "grad_norm": 0.18489638959504387, | |
| "learning_rate": 3.86101945416048e-05, | |
| "loss": 0.3284, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.795539033457249, | |
| "grad_norm": 0.1652010562534876, | |
| "learning_rate": 3.84712786217374e-05, | |
| "loss": 0.3326, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.8004956629491944, | |
| "grad_norm": 0.17456119169719603, | |
| "learning_rate": 3.8332381164381814e-05, | |
| "loss": 0.335, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.80545229244114, | |
| "grad_norm": 0.16210051076095758, | |
| "learning_rate": 3.81935038470157e-05, | |
| "loss": 0.3346, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.8104089219330852, | |
| "grad_norm": 0.16777608423300605, | |
| "learning_rate": 3.805464834687349e-05, | |
| "loss": 0.3337, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.815365551425031, | |
| "grad_norm": 0.15906324157550705, | |
| "learning_rate": 3.791581634092609e-05, | |
| "loss": 0.3306, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.8203221809169765, | |
| "grad_norm": 0.14249599390439402, | |
| "learning_rate": 3.7777009505860686e-05, | |
| "loss": 0.3318, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.825278810408922, | |
| "grad_norm": 0.18459264664094613, | |
| "learning_rate": 3.763822951806051e-05, | |
| "loss": 0.3386, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.8302354399008673, | |
| "grad_norm": 0.20022055680618658, | |
| "learning_rate": 3.749947805358449e-05, | |
| "loss": 0.3303, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.835192069392813, | |
| "grad_norm": 0.20222925053513227, | |
| "learning_rate": 3.736075678814712e-05, | |
| "loss": 0.3356, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.840148698884758, | |
| "grad_norm": 0.20684131712971296, | |
| "learning_rate": 3.722206739709817e-05, | |
| "loss": 0.3303, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.845105328376704, | |
| "grad_norm": 0.1779337567262826, | |
| "learning_rate": 3.708341155540246e-05, | |
| "loss": 0.3335, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.8500619578686495, | |
| "grad_norm": 0.2413271248873493, | |
| "learning_rate": 3.69447909376196e-05, | |
| "loss": 0.3393, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.8550185873605947, | |
| "grad_norm": 0.23918769230290013, | |
| "learning_rate": 3.680620721788385e-05, | |
| "loss": 0.3294, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.8599752168525403, | |
| "grad_norm": 0.1621753755153279, | |
| "learning_rate": 3.6667662069883814e-05, | |
| "loss": 0.3264, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.8649318463444855, | |
| "grad_norm": 0.19949158413337165, | |
| "learning_rate": 3.652915716684228e-05, | |
| "loss": 0.3333, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.869888475836431, | |
| "grad_norm": 0.2061475811060586, | |
| "learning_rate": 3.639069418149596e-05, | |
| "loss": 0.33, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.874845105328377, | |
| "grad_norm": 0.17077547287771824, | |
| "learning_rate": 3.6252274786075375e-05, | |
| "loss": 0.3329, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.8798017348203224, | |
| "grad_norm": 0.19248463763061568, | |
| "learning_rate": 3.611390065228457e-05, | |
| "loss": 0.3321, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.8847583643122676, | |
| "grad_norm": 0.1862619363005395, | |
| "learning_rate": 3.597557345128097e-05, | |
| "loss": 0.3326, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.8897149938042133, | |
| "grad_norm": 0.18090445666144891, | |
| "learning_rate": 3.5837294853655166e-05, | |
| "loss": 0.3303, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.8946716232961585, | |
| "grad_norm": 0.1440347767860962, | |
| "learning_rate": 3.5699066529410804e-05, | |
| "loss": 0.3364, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.899628252788104, | |
| "grad_norm": 0.184137285441072, | |
| "learning_rate": 3.556089014794434e-05, | |
| "loss": 0.3332, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.9045848822800497, | |
| "grad_norm": 0.18962839342191237, | |
| "learning_rate": 3.542276737802495e-05, | |
| "loss": 0.334, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.909541511771995, | |
| "grad_norm": 0.16779757161733827, | |
| "learning_rate": 3.528469988777429e-05, | |
| "loss": 0.326, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.9144981412639406, | |
| "grad_norm": 0.1690959070694777, | |
| "learning_rate": 3.514668934464642e-05, | |
| "loss": 0.3318, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.9194547707558858, | |
| "grad_norm": 0.19976961678804345, | |
| "learning_rate": 3.500873741540767e-05, | |
| "loss": 0.3307, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.9244114002478314, | |
| "grad_norm": 0.16964715443901282, | |
| "learning_rate": 3.487084576611642e-05, | |
| "loss": 0.3316, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.929368029739777, | |
| "grad_norm": 0.1610263789509033, | |
| "learning_rate": 3.4733016062103105e-05, | |
| "loss": 0.3291, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.9343246592317227, | |
| "grad_norm": 0.19562682805644308, | |
| "learning_rate": 3.4595249967950015e-05, | |
| "loss": 0.3314, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.939281288723668, | |
| "grad_norm": 0.18783423711181588, | |
| "learning_rate": 3.4457549147471256e-05, | |
| "loss": 0.3337, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.9442379182156135, | |
| "grad_norm": 0.17249410205027924, | |
| "learning_rate": 3.431991526369253e-05, | |
| "loss": 0.336, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.9491945477075587, | |
| "grad_norm": 0.20170942304434009, | |
| "learning_rate": 3.418234997883121e-05, | |
| "loss": 0.3275, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.9541511771995044, | |
| "grad_norm": 0.18222174501295177, | |
| "learning_rate": 3.4044854954276186e-05, | |
| "loss": 0.3365, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.95910780669145, | |
| "grad_norm": 0.1843856632215925, | |
| "learning_rate": 3.3907431850567804e-05, | |
| "loss": 0.3345, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.964064436183395, | |
| "grad_norm": 0.17773265749067887, | |
| "learning_rate": 3.377008232737779e-05, | |
| "loss": 0.3318, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.969021065675341, | |
| "grad_norm": 0.17045064724964207, | |
| "learning_rate": 3.363280804348925e-05, | |
| "loss": 0.3369, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.973977695167286, | |
| "grad_norm": 0.18104363543938948, | |
| "learning_rate": 3.349561065677663e-05, | |
| "loss": 0.3321, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.9789343246592317, | |
| "grad_norm": 0.16258663213066146, | |
| "learning_rate": 3.335849182418567e-05, | |
| "loss": 0.3345, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.9838909541511773, | |
| "grad_norm": 0.16984768351493296, | |
| "learning_rate": 3.322145320171339e-05, | |
| "loss": 0.334, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.9888475836431225, | |
| "grad_norm": 0.18346639298353426, | |
| "learning_rate": 3.3084496444388105e-05, | |
| "loss": 0.3304, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.993804213135068, | |
| "grad_norm": 0.14403509012455282, | |
| "learning_rate": 3.294762320624949e-05, | |
| "loss": 0.3287, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.9987608426270134, | |
| "grad_norm": 0.20073794639228396, | |
| "learning_rate": 3.281083514032849e-05, | |
| "loss": 0.3859, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.003717472118959, | |
| "grad_norm": 0.2089444698911026, | |
| "learning_rate": 3.267413389862742e-05, | |
| "loss": 0.3361, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.0086741016109046, | |
| "grad_norm": 0.1762655173237084, | |
| "learning_rate": 3.2537521132100056e-05, | |
| "loss": 0.2999, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.01363073110285, | |
| "grad_norm": 0.19507744188878484, | |
| "learning_rate": 3.240099849063163e-05, | |
| "loss": 0.3009, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.0185873605947955, | |
| "grad_norm": 0.21454164658321215, | |
| "learning_rate": 3.2264567623018895e-05, | |
| "loss": 0.3077, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.023543990086741, | |
| "grad_norm": 0.21359327347841275, | |
| "learning_rate": 3.212823017695026e-05, | |
| "loss": 0.3023, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.0285006195786863, | |
| "grad_norm": 0.20315773200419085, | |
| "learning_rate": 3.199198779898591e-05, | |
| "loss": 0.3026, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.033457249070632, | |
| "grad_norm": 0.18593215246007733, | |
| "learning_rate": 3.1855842134537844e-05, | |
| "loss": 0.3035, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.0384138785625776, | |
| "grad_norm": 0.1884189275848539, | |
| "learning_rate": 3.1719794827850034e-05, | |
| "loss": 0.3038, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.043370508054523, | |
| "grad_norm": 0.1795860923978728, | |
| "learning_rate": 3.158384752197861e-05, | |
| "loss": 0.3039, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.0483271375464684, | |
| "grad_norm": 0.18315632009685323, | |
| "learning_rate": 3.144800185877197e-05, | |
| "loss": 0.3064, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.053283767038414, | |
| "grad_norm": 0.19153130258541615, | |
| "learning_rate": 3.131225947885096e-05, | |
| "loss": 0.3048, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.0582403965303593, | |
| "grad_norm": 0.18262171453236378, | |
| "learning_rate": 3.117662202158904e-05, | |
| "loss": 0.3045, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.063197026022305, | |
| "grad_norm": 0.1937465027808409, | |
| "learning_rate": 3.104109112509257e-05, | |
| "loss": 0.3024, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.06815365551425, | |
| "grad_norm": 0.18294252068153583, | |
| "learning_rate": 3.0905668426180925e-05, | |
| "loss": 0.3034, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.0731102850061958, | |
| "grad_norm": 0.186995165113216, | |
| "learning_rate": 3.0770355560366745e-05, | |
| "loss": 0.3069, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.0780669144981414, | |
| "grad_norm": 0.20411338272963947, | |
| "learning_rate": 3.063515416183625e-05, | |
| "loss": 0.3046, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.0830235439900866, | |
| "grad_norm": 0.17006596521617198, | |
| "learning_rate": 3.0500065863429446e-05, | |
| "loss": 0.303, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.0879801734820322, | |
| "grad_norm": 0.19496621027527322, | |
| "learning_rate": 3.0365092296620418e-05, | |
| "loss": 0.2992, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.092936802973978, | |
| "grad_norm": 0.18067090099414398, | |
| "learning_rate": 3.0230235091497593e-05, | |
| "loss": 0.3046, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.097893432465923, | |
| "grad_norm": 0.16987068098319333, | |
| "learning_rate": 3.0095495876744156e-05, | |
| "loss": 0.3051, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.1028500619578687, | |
| "grad_norm": 0.1704176025292816, | |
| "learning_rate": 2.9960876279618238e-05, | |
| "loss": 0.304, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.107806691449814, | |
| "grad_norm": 0.15464066621895128, | |
| "learning_rate": 2.982637792593342e-05, | |
| "loss": 0.3063, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.1127633209417596, | |
| "grad_norm": 0.18094562465557273, | |
| "learning_rate": 2.96920024400389e-05, | |
| "loss": 0.2998, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.117719950433705, | |
| "grad_norm": 0.16554943056629043, | |
| "learning_rate": 2.9557751444800097e-05, | |
| "loss": 0.3015, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.1226765799256504, | |
| "grad_norm": 0.17971307653565008, | |
| "learning_rate": 2.9423626561578885e-05, | |
| "loss": 0.3096, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.127633209417596, | |
| "grad_norm": 0.1806551510167954, | |
| "learning_rate": 2.9289629410214117e-05, | |
| "loss": 0.3033, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.1325898389095417, | |
| "grad_norm": 0.15240283472952065, | |
| "learning_rate": 2.9155761609001964e-05, | |
| "loss": 0.3032, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.137546468401487, | |
| "grad_norm": 0.1577195426163746, | |
| "learning_rate": 2.9022024774676442e-05, | |
| "loss": 0.3021, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.1425030978934325, | |
| "grad_norm": 0.16849560034494637, | |
| "learning_rate": 2.8888420522389905e-05, | |
| "loss": 0.3088, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.147459727385378, | |
| "grad_norm": 0.14345317953623954, | |
| "learning_rate": 2.8754950465693427e-05, | |
| "loss": 0.3056, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.1524163568773234, | |
| "grad_norm": 0.1523622436509427, | |
| "learning_rate": 2.8621616216517462e-05, | |
| "loss": 0.3025, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.157372986369269, | |
| "grad_norm": 0.15673429079604273, | |
| "learning_rate": 2.848841938515226e-05, | |
| "loss": 0.3024, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.162329615861214, | |
| "grad_norm": 0.13466456756806872, | |
| "learning_rate": 2.8355361580228495e-05, | |
| "loss": 0.3032, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.16728624535316, | |
| "grad_norm": 0.1560250590207022, | |
| "learning_rate": 2.8222444408697767e-05, | |
| "loss": 0.3044, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.1722428748451055, | |
| "grad_norm": 0.15822779235945905, | |
| "learning_rate": 2.808966947581327e-05, | |
| "loss": 0.3004, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.1771995043370507, | |
| "grad_norm": 0.15868479894261964, | |
| "learning_rate": 2.795703838511033e-05, | |
| "loss": 0.3012, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.1821561338289963, | |
| "grad_norm": 0.17605282990168575, | |
| "learning_rate": 2.7824552738387124e-05, | |
| "loss": 0.304, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.187112763320942, | |
| "grad_norm": 0.12790318324939634, | |
| "learning_rate": 2.769221413568525e-05, | |
| "loss": 0.3056, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.192069392812887, | |
| "grad_norm": 0.141503542353362, | |
| "learning_rate": 2.756002417527045e-05, | |
| "loss": 0.304, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.197026022304833, | |
| "grad_norm": 0.17436702006560106, | |
| "learning_rate": 2.742798445361332e-05, | |
| "loss": 0.3017, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.201982651796778, | |
| "grad_norm": 0.14699228929283586, | |
| "learning_rate": 2.729609656537e-05, | |
| "loss": 0.3056, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.2069392812887236, | |
| "grad_norm": 0.14936078562070917, | |
| "learning_rate": 2.7164362103362888e-05, | |
| "loss": 0.3032, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.2118959107806693, | |
| "grad_norm": 0.1542891045557535, | |
| "learning_rate": 2.703278265856148e-05, | |
| "loss": 0.3056, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.2168525402726145, | |
| "grad_norm": 0.15226766205832612, | |
| "learning_rate": 2.6901359820063107e-05, | |
| "loss": 0.3016, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.22180916976456, | |
| "grad_norm": 0.16501837622862042, | |
| "learning_rate": 2.6770095175073758e-05, | |
| "loss": 0.3052, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.2267657992565058, | |
| "grad_norm": 0.1454205047479316, | |
| "learning_rate": 2.663899030888886e-05, | |
| "loss": 0.3039, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 3.231722428748451, | |
| "grad_norm": 0.16727660054359092, | |
| "learning_rate": 2.650804680487424e-05, | |
| "loss": 0.3055, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 3.2366790582403966, | |
| "grad_norm": 0.14477947917200043, | |
| "learning_rate": 2.6377266244446898e-05, | |
| "loss": 0.3028, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 3.241635687732342, | |
| "grad_norm": 0.1555736064441561, | |
| "learning_rate": 2.624665020705594e-05, | |
| "loss": 0.3063, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 3.2465923172242874, | |
| "grad_norm": 0.1586046258309188, | |
| "learning_rate": 2.611620027016355e-05, | |
| "loss": 0.307, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.251548946716233, | |
| "grad_norm": 0.15599462369905948, | |
| "learning_rate": 2.598591800922588e-05, | |
| "loss": 0.3017, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 3.2565055762081783, | |
| "grad_norm": 0.1552548399346659, | |
| "learning_rate": 2.5855804997674042e-05, | |
| "loss": 0.3036, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.261462205700124, | |
| "grad_norm": 0.14508182578537362, | |
| "learning_rate": 2.5725862806895098e-05, | |
| "loss": 0.3015, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 3.2664188351920695, | |
| "grad_norm": 0.14299675482805052, | |
| "learning_rate": 2.559609300621312e-05, | |
| "loss": 0.3048, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.2713754646840147, | |
| "grad_norm": 0.1360295624700782, | |
| "learning_rate": 2.546649716287019e-05, | |
| "loss": 0.2999, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.2763320941759604, | |
| "grad_norm": 0.1413517812236507, | |
| "learning_rate": 2.5337076842007504e-05, | |
| "loss": 0.2998, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.281288723667906, | |
| "grad_norm": 0.13127029981708502, | |
| "learning_rate": 2.5207833606646403e-05, | |
| "loss": 0.2996, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.2862453531598512, | |
| "grad_norm": 0.13139183414183372, | |
| "learning_rate": 2.5078769017669632e-05, | |
| "loss": 0.3063, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.291201982651797, | |
| "grad_norm": 0.13006606842033047, | |
| "learning_rate": 2.4949884633802343e-05, | |
| "loss": 0.3026, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.296158612143742, | |
| "grad_norm": 0.136510548326439, | |
| "learning_rate": 2.482118201159339e-05, | |
| "loss": 0.3058, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.3011152416356877, | |
| "grad_norm": 0.15418988542864148, | |
| "learning_rate": 2.4692662705396412e-05, | |
| "loss": 0.3009, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.3060718711276333, | |
| "grad_norm": 0.13624965023397706, | |
| "learning_rate": 2.4564328267351165e-05, | |
| "loss": 0.3058, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.3110285006195785, | |
| "grad_norm": 0.13980897795605768, | |
| "learning_rate": 2.4436180247364734e-05, | |
| "loss": 0.3071, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.315985130111524, | |
| "grad_norm": 0.1204698411572796, | |
| "learning_rate": 2.430822019309277e-05, | |
| "loss": 0.3007, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.32094175960347, | |
| "grad_norm": 0.13636864767510704, | |
| "learning_rate": 2.418044964992091e-05, | |
| "loss": 0.3077, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.325898389095415, | |
| "grad_norm": 0.14454738758907348, | |
| "learning_rate": 2.405287016094601e-05, | |
| "loss": 0.3071, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.3308550185873607, | |
| "grad_norm": 0.1282434836197698, | |
| "learning_rate": 2.3925483266957558e-05, | |
| "loss": 0.3032, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.3358116480793063, | |
| "grad_norm": 0.14263625355479145, | |
| "learning_rate": 2.3798290506419042e-05, | |
| "loss": 0.3037, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.3407682775712515, | |
| "grad_norm": 0.11974835937679618, | |
| "learning_rate": 2.3671293415449395e-05, | |
| "loss": 0.3002, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.345724907063197, | |
| "grad_norm": 0.13263778471023774, | |
| "learning_rate": 2.3544493527804412e-05, | |
| "loss": 0.3046, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.3506815365551423, | |
| "grad_norm": 0.13481169999279188, | |
| "learning_rate": 2.341789237485829e-05, | |
| "loss": 0.3025, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.355638166047088, | |
| "grad_norm": 0.1317185419349756, | |
| "learning_rate": 2.329149148558502e-05, | |
| "loss": 0.3011, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.3605947955390336, | |
| "grad_norm": 0.13819168121551242, | |
| "learning_rate": 2.3165292386540048e-05, | |
| "loss": 0.3071, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.365551425030979, | |
| "grad_norm": 0.14611123791855496, | |
| "learning_rate": 2.3039296601841745e-05, | |
| "loss": 0.3, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.3705080545229245, | |
| "grad_norm": 0.13143204123843844, | |
| "learning_rate": 2.291350565315307e-05, | |
| "loss": 0.305, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.3754646840148697, | |
| "grad_norm": 0.15517786038481768, | |
| "learning_rate": 2.2787921059663107e-05, | |
| "loss": 0.3044, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.3804213135068153, | |
| "grad_norm": 0.1281201579748097, | |
| "learning_rate": 2.266254433806883e-05, | |
| "loss": 0.2988, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.385377942998761, | |
| "grad_norm": 0.1354716231594334, | |
| "learning_rate": 2.253737700255668e-05, | |
| "loss": 0.3014, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.390334572490706, | |
| "grad_norm": 0.13847005305529575, | |
| "learning_rate": 2.2412420564784324e-05, | |
| "loss": 0.3049, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.3952912019826518, | |
| "grad_norm": 0.1360457576632222, | |
| "learning_rate": 2.228767653386242e-05, | |
| "loss": 0.3054, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.4002478314745974, | |
| "grad_norm": 0.13897673922449205, | |
| "learning_rate": 2.2163146416336362e-05, | |
| "loss": 0.3052, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.4052044609665426, | |
| "grad_norm": 0.12908787668600238, | |
| "learning_rate": 2.203883171616812e-05, | |
| "loss": 0.3073, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.4101610904584883, | |
| "grad_norm": 0.13724987290006566, | |
| "learning_rate": 2.1914733934717943e-05, | |
| "loss": 0.3029, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.415117719950434, | |
| "grad_norm": 0.13922005425903064, | |
| "learning_rate": 2.179085457072645e-05, | |
| "loss": 0.3057, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.420074349442379, | |
| "grad_norm": 0.1354335750505619, | |
| "learning_rate": 2.1667195120296362e-05, | |
| "loss": 0.3049, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.4250309789343247, | |
| "grad_norm": 0.14882023636556463, | |
| "learning_rate": 2.1543757076874502e-05, | |
| "loss": 0.3048, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.42998760842627, | |
| "grad_norm": 0.13836163300637985, | |
| "learning_rate": 2.1420541931233712e-05, | |
| "loss": 0.3051, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.4349442379182156, | |
| "grad_norm": 0.14256891798066113, | |
| "learning_rate": 2.1297551171454875e-05, | |
| "loss": 0.3055, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.439900867410161, | |
| "grad_norm": 0.1384981981066132, | |
| "learning_rate": 2.1174786282908978e-05, | |
| "loss": 0.3092, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.4448574969021064, | |
| "grad_norm": 0.12340421646609305, | |
| "learning_rate": 2.105224874823914e-05, | |
| "loss": 0.3051, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.449814126394052, | |
| "grad_norm": 0.13563333777142683, | |
| "learning_rate": 2.092994004734267e-05, | |
| "loss": 0.3056, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.4547707558859977, | |
| "grad_norm": 0.14050309446491482, | |
| "learning_rate": 2.0807861657353232e-05, | |
| "loss": 0.3029, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.459727385377943, | |
| "grad_norm": 0.11583428009228229, | |
| "learning_rate": 2.0686015052623036e-05, | |
| "loss": 0.3122, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.4646840148698885, | |
| "grad_norm": 0.13336246662013498, | |
| "learning_rate": 2.0564401704705e-05, | |
| "loss": 0.3045, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.469640644361834, | |
| "grad_norm": 0.12782118270953827, | |
| "learning_rate": 2.0443023082334947e-05, | |
| "loss": 0.3055, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.4745972738537794, | |
| "grad_norm": 0.12141941353759776, | |
| "learning_rate": 2.032188065141389e-05, | |
| "loss": 0.3059, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.479553903345725, | |
| "grad_norm": 0.1277767185346589, | |
| "learning_rate": 2.0200975874990395e-05, | |
| "loss": 0.3045, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.48451053283767, | |
| "grad_norm": 0.12523003640838745, | |
| "learning_rate": 2.0080310213242776e-05, | |
| "loss": 0.3017, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.489467162329616, | |
| "grad_norm": 0.11933778268709544, | |
| "learning_rate": 1.9959885123461605e-05, | |
| "loss": 0.3051, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.4944237918215615, | |
| "grad_norm": 0.13986871746721385, | |
| "learning_rate": 1.983970206003201e-05, | |
| "loss": 0.3083, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.4993804213135067, | |
| "grad_norm": 0.11878623960823902, | |
| "learning_rate": 1.971976247441615e-05, | |
| "loss": 0.307, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.5043370508054523, | |
| "grad_norm": 0.12807313446541005, | |
| "learning_rate": 1.960006781513565e-05, | |
| "loss": 0.3083, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.5092936802973975, | |
| "grad_norm": 0.13808763890459724, | |
| "learning_rate": 1.9480619527754184e-05, | |
| "loss": 0.3091, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.514250309789343, | |
| "grad_norm": 0.13222322590668356, | |
| "learning_rate": 1.9361419054859965e-05, | |
| "loss": 0.3017, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.519206939281289, | |
| "grad_norm": 0.12925333708719713, | |
| "learning_rate": 1.9242467836048296e-05, | |
| "loss": 0.3016, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.5241635687732344, | |
| "grad_norm": 0.131207679724427, | |
| "learning_rate": 1.9123767307904216e-05, | |
| "loss": 0.3048, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.5291201982651796, | |
| "grad_norm": 0.13331684079879477, | |
| "learning_rate": 1.900531890398518e-05, | |
| "loss": 0.3065, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.5340768277571253, | |
| "grad_norm": 0.11121332621566554, | |
| "learning_rate": 1.8887124054803712e-05, | |
| "loss": 0.301, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.5390334572490705, | |
| "grad_norm": 0.16037753358502257, | |
| "learning_rate": 1.8769184187810097e-05, | |
| "loss": 0.3082, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.543990086741016, | |
| "grad_norm": 0.11888454595139157, | |
| "learning_rate": 1.8651500727375197e-05, | |
| "loss": 0.3049, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.5489467162329618, | |
| "grad_norm": 0.12979326089504686, | |
| "learning_rate": 1.853407509477323e-05, | |
| "loss": 0.3069, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.553903345724907, | |
| "grad_norm": 0.11330744270390525, | |
| "learning_rate": 1.8416908708164625e-05, | |
| "loss": 0.3003, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.5588599752168526, | |
| "grad_norm": 0.1263040498972687, | |
| "learning_rate": 1.830000298257881e-05, | |
| "loss": 0.3038, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.563816604708798, | |
| "grad_norm": 0.1080320775091172, | |
| "learning_rate": 1.8183359329897273e-05, | |
| "loss": 0.3054, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.5687732342007434, | |
| "grad_norm": 0.12148959658498093, | |
| "learning_rate": 1.8066979158836324e-05, | |
| "loss": 0.3015, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.573729863692689, | |
| "grad_norm": 0.1306325937471217, | |
| "learning_rate": 1.7950863874930272e-05, | |
| "loss": 0.3091, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.5786864931846347, | |
| "grad_norm": 0.11558208336385786, | |
| "learning_rate": 1.7835014880514285e-05, | |
| "loss": 0.3054, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.58364312267658, | |
| "grad_norm": 0.11233874217296087, | |
| "learning_rate": 1.771943357470759e-05, | |
| "loss": 0.3099, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.5885997521685256, | |
| "grad_norm": 0.10715556044653257, | |
| "learning_rate": 1.760412135339646e-05, | |
| "loss": 0.3096, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.5935563816604708, | |
| "grad_norm": 0.12755358454831467, | |
| "learning_rate": 1.7489079609217454e-05, | |
| "loss": 0.3067, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.5985130111524164, | |
| "grad_norm": 0.11556106841681339, | |
| "learning_rate": 1.7374309731540512e-05, | |
| "loss": 0.302, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.603469640644362, | |
| "grad_norm": 0.13828149686762833, | |
| "learning_rate": 1.7259813106452264e-05, | |
| "loss": 0.3042, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.6084262701363072, | |
| "grad_norm": 0.11303890136663619, | |
| "learning_rate": 1.7145591116739188e-05, | |
| "loss": 0.3059, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.613382899628253, | |
| "grad_norm": 0.15458800679371804, | |
| "learning_rate": 1.7031645141871017e-05, | |
| "loss": 0.3078, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.618339529120198, | |
| "grad_norm": 0.10950048353822843, | |
| "learning_rate": 1.691797655798398e-05, | |
| "loss": 0.3012, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.6232961586121437, | |
| "grad_norm": 0.14585691976372792, | |
| "learning_rate": 1.680458673786426e-05, | |
| "loss": 0.3056, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.6282527881040894, | |
| "grad_norm": 0.10063716590394328, | |
| "learning_rate": 1.66914770509314e-05, | |
| "loss": 0.3059, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.6332094175960346, | |
| "grad_norm": 0.16856798984329935, | |
| "learning_rate": 1.6578648863221704e-05, | |
| "loss": 0.2976, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.63816604708798, | |
| "grad_norm": 0.09860058370125988, | |
| "learning_rate": 1.6466103537371786e-05, | |
| "loss": 0.3033, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.6431226765799254, | |
| "grad_norm": 0.13460381635384605, | |
| "learning_rate": 1.635384243260215e-05, | |
| "loss": 0.302, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.648079306071871, | |
| "grad_norm": 0.11916515297319498, | |
| "learning_rate": 1.6241866904700717e-05, | |
| "loss": 0.2986, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.6530359355638167, | |
| "grad_norm": 0.12728574264346731, | |
| "learning_rate": 1.6130178306006458e-05, | |
| "loss": 0.3049, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.6579925650557623, | |
| "grad_norm": 0.1267215019920343, | |
| "learning_rate": 1.601877798539307e-05, | |
| "loss": 0.3069, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.6629491945477075, | |
| "grad_norm": 0.12422633657735385, | |
| "learning_rate": 1.5907667288252698e-05, | |
| "loss": 0.3, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.667905824039653, | |
| "grad_norm": 0.10548593638677428, | |
| "learning_rate": 1.5796847556479714e-05, | |
| "loss": 0.3028, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.6728624535315983, | |
| "grad_norm": 0.10690753261122879, | |
| "learning_rate": 1.5686320128454407e-05, | |
| "loss": 0.3062, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.677819083023544, | |
| "grad_norm": 0.10095888952717658, | |
| "learning_rate": 1.557608633902691e-05, | |
| "loss": 0.3058, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.6827757125154896, | |
| "grad_norm": 0.11008343099787869, | |
| "learning_rate": 1.5466147519501074e-05, | |
| "loss": 0.3045, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.687732342007435, | |
| "grad_norm": 0.09550712617687758, | |
| "learning_rate": 1.5356504997618382e-05, | |
| "loss": 0.3, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.6926889714993805, | |
| "grad_norm": 0.11495321087022631, | |
| "learning_rate": 1.524716009754184e-05, | |
| "loss": 0.2979, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.6976456009913257, | |
| "grad_norm": 0.11635709075735622, | |
| "learning_rate": 1.5138114139840117e-05, | |
| "loss": 0.3023, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.7026022304832713, | |
| "grad_norm": 0.11348474927513516, | |
| "learning_rate": 1.5029368441471483e-05, | |
| "loss": 0.3015, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.707558859975217, | |
| "grad_norm": 0.12353200503351507, | |
| "learning_rate": 1.4920924315767952e-05, | |
| "loss": 0.3018, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.7125154894671626, | |
| "grad_norm": 0.11128731875814545, | |
| "learning_rate": 1.4812783072419442e-05, | |
| "loss": 0.3024, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.717472118959108, | |
| "grad_norm": 0.12479505016869401, | |
| "learning_rate": 1.4704946017457925e-05, | |
| "loss": 0.3029, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.7224287484510534, | |
| "grad_norm": 0.10039835754063967, | |
| "learning_rate": 1.4597414453241636e-05, | |
| "loss": 0.3062, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.7273853779429986, | |
| "grad_norm": 0.13104594311695542, | |
| "learning_rate": 1.4490189678439376e-05, | |
| "loss": 0.3044, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.7323420074349443, | |
| "grad_norm": 0.11702158446742426, | |
| "learning_rate": 1.4383272988014851e-05, | |
| "loss": 0.3064, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.73729863692689, | |
| "grad_norm": 0.11975778168587813, | |
| "learning_rate": 1.427666567321099e-05, | |
| "loss": 0.3067, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.742255266418835, | |
| "grad_norm": 0.1181913843002168, | |
| "learning_rate": 1.4170369021534347e-05, | |
| "loss": 0.3015, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.7472118959107807, | |
| "grad_norm": 0.1045706641304008, | |
| "learning_rate": 1.4064384316739563e-05, | |
| "loss": 0.3036, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.752168525402726, | |
| "grad_norm": 0.11402120830752768, | |
| "learning_rate": 1.3958712838813902e-05, | |
| "loss": 0.3039, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.7571251548946716, | |
| "grad_norm": 0.12157415963281785, | |
| "learning_rate": 1.3853355863961731e-05, | |
| "loss": 0.303, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.7620817843866172, | |
| "grad_norm": 0.10041584835817995, | |
| "learning_rate": 1.3748314664589169e-05, | |
| "loss": 0.3044, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.7670384138785624, | |
| "grad_norm": 0.11527234898628047, | |
| "learning_rate": 1.3643590509288607e-05, | |
| "loss": 0.3022, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.771995043370508, | |
| "grad_norm": 0.10863127064484607, | |
| "learning_rate": 1.353918466282354e-05, | |
| "loss": 0.3038, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.7769516728624533, | |
| "grad_norm": 0.1101423637817911, | |
| "learning_rate": 1.3435098386113192e-05, | |
| "loss": 0.2996, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.781908302354399, | |
| "grad_norm": 0.11761126602863732, | |
| "learning_rate": 1.3331332936217326e-05, | |
| "loss": 0.3039, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.7868649318463445, | |
| "grad_norm": 0.10558893915498015, | |
| "learning_rate": 1.3227889566321022e-05, | |
| "loss": 0.3039, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.79182156133829, | |
| "grad_norm": 0.10727419606238538, | |
| "learning_rate": 1.3124769525719576e-05, | |
| "loss": 0.3055, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.7967781908302354, | |
| "grad_norm": 0.110726016670653, | |
| "learning_rate": 1.3021974059803432e-05, | |
| "loss": 0.3026, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.801734820322181, | |
| "grad_norm": 0.09144950044698749, | |
| "learning_rate": 1.2919504410043083e-05, | |
| "loss": 0.2944, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.806691449814126, | |
| "grad_norm": 0.1001368569514324, | |
| "learning_rate": 1.2817361813974136e-05, | |
| "loss": 0.3017, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.811648079306072, | |
| "grad_norm": 0.09886758402680869, | |
| "learning_rate": 1.2715547505182312e-05, | |
| "loss": 0.3011, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.8166047087980175, | |
| "grad_norm": 0.10959965871086583, | |
| "learning_rate": 1.2614062713288608e-05, | |
| "loss": 0.3053, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.8215613382899627, | |
| "grad_norm": 0.09813115118616604, | |
| "learning_rate": 1.251290866393438e-05, | |
| "loss": 0.3046, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.8265179677819083, | |
| "grad_norm": 0.10075106248289464, | |
| "learning_rate": 1.2412086578766602e-05, | |
| "loss": 0.3068, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.8314745972738535, | |
| "grad_norm": 0.11066290728626664, | |
| "learning_rate": 1.2311597675423089e-05, | |
| "loss": 0.3003, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.836431226765799, | |
| "grad_norm": 0.09795728766221641, | |
| "learning_rate": 1.2211443167517757e-05, | |
| "loss": 0.301, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.841387856257745, | |
| "grad_norm": 0.10258653518600384, | |
| "learning_rate": 1.2111624264626012e-05, | |
| "loss": 0.3048, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.8463444857496905, | |
| "grad_norm": 0.10558082756801986, | |
| "learning_rate": 1.2012142172270136e-05, | |
| "loss": 0.3057, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.8513011152416357, | |
| "grad_norm": 0.10445656619048024, | |
| "learning_rate": 1.1912998091904724e-05, | |
| "loss": 0.3113, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.8562577447335813, | |
| "grad_norm": 0.10212700920367412, | |
| "learning_rate": 1.1814193220902146e-05, | |
| "loss": 0.3031, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.8612143742255265, | |
| "grad_norm": 0.10074868067573164, | |
| "learning_rate": 1.1715728752538103e-05, | |
| "loss": 0.308, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.866171003717472, | |
| "grad_norm": 0.10571053475203836, | |
| "learning_rate": 1.1617605875977253e-05, | |
| "loss": 0.3045, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.8711276332094178, | |
| "grad_norm": 0.10411602926538378, | |
| "learning_rate": 1.1519825776258812e-05, | |
| "loss": 0.3043, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.876084262701363, | |
| "grad_norm": 0.10184699452843421, | |
| "learning_rate": 1.142238963428223e-05, | |
| "loss": 0.3025, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.8810408921933086, | |
| "grad_norm": 0.09478239661042007, | |
| "learning_rate": 1.1325298626792937e-05, | |
| "loss": 0.3049, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.885997521685254, | |
| "grad_norm": 0.09053688283495022, | |
| "learning_rate": 1.1228553926368173e-05, | |
| "loss": 0.3056, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.8909541511771994, | |
| "grad_norm": 0.09331448652847525, | |
| "learning_rate": 1.1132156701402796e-05, | |
| "loss": 0.3022, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.895910780669145, | |
| "grad_norm": 0.09489554080409102, | |
| "learning_rate": 1.1036108116095142e-05, | |
| "loss": 0.3073, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.9008674101610907, | |
| "grad_norm": 0.09039951396430053, | |
| "learning_rate": 1.0940409330432988e-05, | |
| "loss": 0.3038, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.905824039653036, | |
| "grad_norm": 0.09184380274512176, | |
| "learning_rate": 1.0845061500179588e-05, | |
| "loss": 0.3075, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.9107806691449816, | |
| "grad_norm": 0.09822283803476375, | |
| "learning_rate": 1.0750065776859659e-05, | |
| "loss": 0.2995, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.9157372986369268, | |
| "grad_norm": 0.09884065337414615, | |
| "learning_rate": 1.0655423307745463e-05, | |
| "loss": 0.3025, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.9206939281288724, | |
| "grad_norm": 0.1023994062957483, | |
| "learning_rate": 1.0561135235843016e-05, | |
| "loss": 0.3062, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.925650557620818, | |
| "grad_norm": 0.09323621076945705, | |
| "learning_rate": 1.0467202699878212e-05, | |
| "loss": 0.3072, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.9306071871127632, | |
| "grad_norm": 0.0996518708664855, | |
| "learning_rate": 1.0373626834283134e-05, | |
| "loss": 0.2942, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.935563816604709, | |
| "grad_norm": 0.09801724942181, | |
| "learning_rate": 1.028040876918229e-05, | |
| "loss": 0.3068, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.940520446096654, | |
| "grad_norm": 0.10159923408173142, | |
| "learning_rate": 1.018754963037904e-05, | |
| "loss": 0.3018, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.9454770755885997, | |
| "grad_norm": 0.09250643265924624, | |
| "learning_rate": 1.0095050539341926e-05, | |
| "loss": 0.3006, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.9504337050805454, | |
| "grad_norm": 0.09946375207125929, | |
| "learning_rate": 1.0002912613191152e-05, | |
| "loss": 0.3053, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.9553903345724906, | |
| "grad_norm": 0.10001637507100242, | |
| "learning_rate": 9.911136964685121e-06, | |
| "loss": 0.2997, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.960346964064436, | |
| "grad_norm": 0.08720799711260668, | |
| "learning_rate": 9.819724702206984e-06, | |
| "loss": 0.3009, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.9653035935563814, | |
| "grad_norm": 0.09577575913167323, | |
| "learning_rate": 9.728676929751235e-06, | |
| "loss": 0.3055, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.970260223048327, | |
| "grad_norm": 0.10355284470935523, | |
| "learning_rate": 9.637994746910348e-06, | |
| "loss": 0.308, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.9752168525402727, | |
| "grad_norm": 0.09524290206581833, | |
| "learning_rate": 9.547679248861593e-06, | |
| "loss": 0.307, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.9801734820322183, | |
| "grad_norm": 0.10448142281939526, | |
| "learning_rate": 9.457731526353725e-06, | |
| "loss": 0.3049, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.9851301115241635, | |
| "grad_norm": 0.09263016898548693, | |
| "learning_rate": 9.368152665693864e-06, | |
| "loss": 0.3008, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.990086741016109, | |
| "grad_norm": 0.10213714785836968, | |
| "learning_rate": 9.278943748734321e-06, | |
| "loss": 0.3046, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.9950433705080544, | |
| "grad_norm": 0.09415919510644356, | |
| "learning_rate": 9.190105852859559e-06, | |
| "loss": 0.3001, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.11841781438474604, | |
| "learning_rate": 9.101640050973213e-06, | |
| "loss": 0.3822, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.004956629491946, | |
| "grad_norm": 0.14722791895577392, | |
| "learning_rate": 9.013547411485102e-06, | |
| "loss": 0.2865, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.009913258983891, | |
| "grad_norm": 0.12108449172793088, | |
| "learning_rate": 8.925828998298298e-06, | |
| "loss": 0.2798, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.014869888475836, | |
| "grad_norm": 0.1016479933236839, | |
| "learning_rate": 8.83848587079632e-06, | |
| "loss": 0.2802, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.019826517967782, | |
| "grad_norm": 0.11127976713554319, | |
| "learning_rate": 8.75151908383034e-06, | |
| "loss": 0.2792, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.024783147459727, | |
| "grad_norm": 0.13578069554004418, | |
| "learning_rate": 8.664929687706434e-06, | |
| "loss": 0.281, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.029739776951673, | |
| "grad_norm": 0.1360686294776198, | |
| "learning_rate": 8.578718728172868e-06, | |
| "loss": 0.281, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.034696406443619, | |
| "grad_norm": 0.11601909584488747, | |
| "learning_rate": 8.492887246407489e-06, | |
| "loss": 0.284, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 4.039653035935564, | |
| "grad_norm": 0.1154783460259016, | |
| "learning_rate": 8.407436279005203e-06, | |
| "loss": 0.2833, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 4.044609665427509, | |
| "grad_norm": 0.11725746533152305, | |
| "learning_rate": 8.322366857965356e-06, | |
| "loss": 0.2813, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 4.049566294919455, | |
| "grad_norm": 0.1181058547228756, | |
| "learning_rate": 8.237680010679345e-06, | |
| "loss": 0.2811, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 4.0545229244114, | |
| "grad_norm": 0.11517240470012005, | |
| "learning_rate": 8.153376759918207e-06, | |
| "loss": 0.2822, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 4.059479553903346, | |
| "grad_norm": 0.11125181982315334, | |
| "learning_rate": 8.06945812382022e-06, | |
| "loss": 0.2857, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 4.064436183395292, | |
| "grad_norm": 0.11133148377505786, | |
| "learning_rate": 7.985925115878621e-06, | |
| "loss": 0.2795, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.069392812887236, | |
| "grad_norm": 0.10387667204411895, | |
| "learning_rate": 7.902778744929414e-06, | |
| "loss": 0.2852, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 4.074349442379182, | |
| "grad_norm": 0.10525576161206568, | |
| "learning_rate": 7.820020015139156e-06, | |
| "loss": 0.2836, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 4.079306071871128, | |
| "grad_norm": 0.108828574574679, | |
| "learning_rate": 7.737649925992792e-06, | |
| "loss": 0.277, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 4.084262701363073, | |
| "grad_norm": 0.10127418772217861, | |
| "learning_rate": 7.655669472281625e-06, | |
| "loss": 0.2839, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 4.089219330855019, | |
| "grad_norm": 0.10115795681430574, | |
| "learning_rate": 7.57407964409131e-06, | |
| "loss": 0.283, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.094175960346964, | |
| "grad_norm": 0.09518739210847543, | |
| "learning_rate": 7.492881426789882e-06, | |
| "loss": 0.2814, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 4.099132589838909, | |
| "grad_norm": 0.0959614198507469, | |
| "learning_rate": 7.412075801015843e-06, | |
| "loss": 0.285, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 4.104089219330855, | |
| "grad_norm": 0.10585234312512412, | |
| "learning_rate": 7.331663742666317e-06, | |
| "loss": 0.2763, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 4.1090458488228006, | |
| "grad_norm": 0.1004428715176425, | |
| "learning_rate": 7.251646222885305e-06, | |
| "loss": 0.2821, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 4.114002478314746, | |
| "grad_norm": 0.09091700534277954, | |
| "learning_rate": 7.172024208051925e-06, | |
| "loss": 0.2823, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.118959107806692, | |
| "grad_norm": 0.09687764910942183, | |
| "learning_rate": 7.09279865976872e-06, | |
| "loss": 0.2822, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 4.123915737298637, | |
| "grad_norm": 0.0958781996446087, | |
| "learning_rate": 7.013970534850103e-06, | |
| "loss": 0.2806, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.128872366790582, | |
| "grad_norm": 0.09496634844167438, | |
| "learning_rate": 6.935540785310731e-06, | |
| "loss": 0.2782, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 4.133828996282528, | |
| "grad_norm": 0.09824356232858408, | |
| "learning_rate": 6.857510358354078e-06, | |
| "loss": 0.2868, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.1387856257744735, | |
| "grad_norm": 0.09351975487729051, | |
| "learning_rate": 6.7798801963609375e-06, | |
| "loss": 0.2798, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.143742255266419, | |
| "grad_norm": 0.09995853450151393, | |
| "learning_rate": 6.702651236878086e-06, | |
| "loss": 0.2845, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 4.148698884758364, | |
| "grad_norm": 0.09083827830168457, | |
| "learning_rate": 6.625824412606911e-06, | |
| "loss": 0.279, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 4.1536555142503095, | |
| "grad_norm": 0.09393734030049775, | |
| "learning_rate": 6.549400651392215e-06, | |
| "loss": 0.2818, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 4.158612143742255, | |
| "grad_norm": 0.08659148767062241, | |
| "learning_rate": 6.473380876210927e-06, | |
| "loss": 0.2851, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 4.163568773234201, | |
| "grad_norm": 0.08429036468561842, | |
| "learning_rate": 6.397766005161035e-06, | |
| "loss": 0.2828, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.1685254027261465, | |
| "grad_norm": 0.09080594271610455, | |
| "learning_rate": 6.322556951450431e-06, | |
| "loss": 0.2828, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 4.173482032218092, | |
| "grad_norm": 0.09298606471973449, | |
| "learning_rate": 6.247754623385946e-06, | |
| "loss": 0.2817, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 4.178438661710037, | |
| "grad_norm": 0.09224315456638131, | |
| "learning_rate": 6.173359924362312e-06, | |
| "loss": 0.2818, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 4.1833952912019825, | |
| "grad_norm": 0.08293341261202053, | |
| "learning_rate": 6.0993737528513055e-06, | |
| "loss": 0.2823, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 4.188351920693928, | |
| "grad_norm": 0.08879511214700042, | |
| "learning_rate": 6.025797002390894e-06, | |
| "loss": 0.2845, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.193308550185874, | |
| "grad_norm": 0.08314309847087989, | |
| "learning_rate": 5.952630561574402e-06, | |
| "loss": 0.2884, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 4.198265179677819, | |
| "grad_norm": 0.09240969634004986, | |
| "learning_rate": 5.8798753140397956e-06, | |
| "loss": 0.2813, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 4.203221809169764, | |
| "grad_norm": 0.08832272007337454, | |
| "learning_rate": 5.807532138459056e-06, | |
| "loss": 0.2796, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 4.20817843866171, | |
| "grad_norm": 0.08855693742599202, | |
| "learning_rate": 5.735601908527528e-06, | |
| "loss": 0.2852, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 4.2131350681536555, | |
| "grad_norm": 0.09166662154860299, | |
| "learning_rate": 5.664085492953347e-06, | |
| "loss": 0.2808, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.218091697645601, | |
| "grad_norm": 0.09526345498603268, | |
| "learning_rate": 5.592983755446981e-06, | |
| "loss": 0.2853, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 4.223048327137547, | |
| "grad_norm": 0.08830250030108663, | |
| "learning_rate": 5.52229755471081e-06, | |
| "loss": 0.2796, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.228004956629492, | |
| "grad_norm": 0.0860713986805829, | |
| "learning_rate": 5.452027744428732e-06, | |
| "loss": 0.2803, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 4.232961586121437, | |
| "grad_norm": 0.09234109531994655, | |
| "learning_rate": 5.382175173255846e-06, | |
| "loss": 0.2839, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 4.237918215613383, | |
| "grad_norm": 0.08834860437724747, | |
| "learning_rate": 5.312740684808209e-06, | |
| "loss": 0.2838, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.242874845105328, | |
| "grad_norm": 0.08133438171291063, | |
| "learning_rate": 5.24372511765268e-06, | |
| "loss": 0.2818, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 4.247831474597274, | |
| "grad_norm": 0.08160907852625027, | |
| "learning_rate": 5.17512930529676e-06, | |
| "loss": 0.2823, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 4.25278810408922, | |
| "grad_norm": 0.09136437292265856, | |
| "learning_rate": 5.106954076178503e-06, | |
| "loss": 0.2828, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 4.2577447335811645, | |
| "grad_norm": 0.09152299706832191, | |
| "learning_rate": 5.039200253656584e-06, | |
| "loss": 0.2832, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 4.26270136307311, | |
| "grad_norm": 0.08998360407936888, | |
| "learning_rate": 4.971868656000278e-06, | |
| "loss": 0.2804, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.267657992565056, | |
| "grad_norm": 0.08355009921069978, | |
| "learning_rate": 4.904960096379609e-06, | |
| "loss": 0.278, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 4.272614622057001, | |
| "grad_norm": 0.09162982326472421, | |
| "learning_rate": 4.838475382855556e-06, | |
| "loss": 0.2817, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 4.277571251548947, | |
| "grad_norm": 0.08769303434470369, | |
| "learning_rate": 4.7724153183702586e-06, | |
| "loss": 0.285, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 4.282527881040892, | |
| "grad_norm": 0.07983368734380246, | |
| "learning_rate": 4.706780700737317e-06, | |
| "loss": 0.2814, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.287484510532837, | |
| "grad_norm": 0.08034220721366041, | |
| "learning_rate": 4.641572322632177e-06, | |
| "loss": 0.2791, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.292441140024783, | |
| "grad_norm": 0.08230090157319601, | |
| "learning_rate": 4.576790971582559e-06, | |
| "loss": 0.2789, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 4.297397769516729, | |
| "grad_norm": 0.08740869773472577, | |
| "learning_rate": 4.512437429958936e-06, | |
| "loss": 0.2802, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 4.302354399008674, | |
| "grad_norm": 0.08406629521865011, | |
| "learning_rate": 4.448512474965072e-06, | |
| "loss": 0.2841, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 4.30731102850062, | |
| "grad_norm": 0.0806706965026141, | |
| "learning_rate": 4.385016878628654e-06, | |
| "loss": 0.2852, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 4.312267657992565, | |
| "grad_norm": 0.08521916884499932, | |
| "learning_rate": 4.321951407791977e-06, | |
| "loss": 0.2804, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.31722428748451, | |
| "grad_norm": 0.07929363763871186, | |
| "learning_rate": 4.25931682410266e-06, | |
| "loss": 0.2853, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 4.322180916976456, | |
| "grad_norm": 0.08265437801922285, | |
| "learning_rate": 4.197113884004473e-06, | |
| "loss": 0.2864, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 4.327137546468402, | |
| "grad_norm": 0.08998988120810332, | |
| "learning_rate": 4.135343338728142e-06, | |
| "loss": 0.2834, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 4.332094175960347, | |
| "grad_norm": 0.08132773635051033, | |
| "learning_rate": 4.074005934282368e-06, | |
| "loss": 0.2836, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 4.337050805452292, | |
| "grad_norm": 0.08585364564919164, | |
| "learning_rate": 4.013102411444752e-06, | |
| "loss": 0.2808, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.342007434944238, | |
| "grad_norm": 0.08217709909623434, | |
| "learning_rate": 3.95263350575287e-06, | |
| "loss": 0.284, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.346964064436183, | |
| "grad_norm": 0.08717986026942609, | |
| "learning_rate": 3.892599947495379e-06, | |
| "loss": 0.2875, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 4.351920693928129, | |
| "grad_norm": 0.0908634629774549, | |
| "learning_rate": 3.8330024617031906e-06, | |
| "loss": 0.2834, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 4.356877323420075, | |
| "grad_norm": 0.08175206838736869, | |
| "learning_rate": 3.7738417681407647e-06, | |
| "loss": 0.2806, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.36183395291202, | |
| "grad_norm": 0.0787428947081775, | |
| "learning_rate": 3.7151185812973435e-06, | |
| "loss": 0.2818, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.366790582403965, | |
| "grad_norm": 0.08111202314877745, | |
| "learning_rate": 3.656833610378394e-06, | |
| "loss": 0.2839, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.371747211895911, | |
| "grad_norm": 0.08482710438193812, | |
| "learning_rate": 3.5989875592969694e-06, | |
| "loss": 0.2869, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.376703841387856, | |
| "grad_norm": 0.08182735521141883, | |
| "learning_rate": 3.5415811266652856e-06, | |
| "loss": 0.2821, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.381660470879802, | |
| "grad_norm": 0.08194368399664302, | |
| "learning_rate": 3.4846150057862115e-06, | |
| "loss": 0.2792, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.386617100371748, | |
| "grad_norm": 0.08181551891620019, | |
| "learning_rate": 3.428089884644954e-06, | |
| "loss": 0.2794, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.391573729863692, | |
| "grad_norm": 0.08743666320985356, | |
| "learning_rate": 3.3720064459007218e-06, | |
| "loss": 0.28, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.396530359355638, | |
| "grad_norm": 0.07883732835031036, | |
| "learning_rate": 3.316365366878471e-06, | |
| "loss": 0.2814, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.401486988847584, | |
| "grad_norm": 0.084162773543217, | |
| "learning_rate": 3.261167319560734e-06, | |
| "loss": 0.2857, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.406443618339529, | |
| "grad_norm": 0.07717717817978705, | |
| "learning_rate": 3.2064129705795266e-06, | |
| "loss": 0.2857, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.411400247831475, | |
| "grad_norm": 0.07800427937716604, | |
| "learning_rate": 3.1521029812082803e-06, | |
| "loss": 0.2866, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.41635687732342, | |
| "grad_norm": 0.07848859258578157, | |
| "learning_rate": 3.098238007353831e-06, | |
| "loss": 0.2828, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.421313506815365, | |
| "grad_norm": 0.0904568854051929, | |
| "learning_rate": 3.0448186995485307e-06, | |
| "loss": 0.2864, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.426270136307311, | |
| "grad_norm": 0.07851394101041086, | |
| "learning_rate": 2.991845702942389e-06, | |
| "loss": 0.2824, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.431226765799257, | |
| "grad_norm": 0.08119789855176456, | |
| "learning_rate": 2.939319657295263e-06, | |
| "loss": 0.2803, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.436183395291202, | |
| "grad_norm": 0.08619128184770572, | |
| "learning_rate": 2.8872411969691527e-06, | |
| "loss": 0.2828, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.441140024783148, | |
| "grad_norm": 0.08498957696639169, | |
| "learning_rate": 2.8356109509205e-06, | |
| "loss": 0.2817, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.446096654275093, | |
| "grad_norm": 0.07444800742896455, | |
| "learning_rate": 2.7844295426926593e-06, | |
| "loss": 0.2848, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.451053283767038, | |
| "grad_norm": 0.07560633490699613, | |
| "learning_rate": 2.7336975904083085e-06, | |
| "loss": 0.2811, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 4.456009913258984, | |
| "grad_norm": 0.07621442311288926, | |
| "learning_rate": 2.683415706762e-06, | |
| "loss": 0.2779, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 4.4609665427509295, | |
| "grad_norm": 0.07853324700844574, | |
| "learning_rate": 2.6335844990127646e-06, | |
| "loss": 0.281, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.465923172242875, | |
| "grad_norm": 0.08060513652848875, | |
| "learning_rate": 2.5842045689767935e-06, | |
| "loss": 0.281, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 4.47087980173482, | |
| "grad_norm": 0.0770124294811412, | |
| "learning_rate": 2.535276513020142e-06, | |
| "loss": 0.2872, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 4.4758364312267656, | |
| "grad_norm": 0.07941564944704892, | |
| "learning_rate": 2.48680092205154e-06, | |
| "loss": 0.2812, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 4.480793060718711, | |
| "grad_norm": 0.07746658082235451, | |
| "learning_rate": 2.4387783815152634e-06, | |
| "loss": 0.2831, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 4.485749690210657, | |
| "grad_norm": 0.07801782052601454, | |
| "learning_rate": 2.3912094713840395e-06, | |
| "loss": 0.2864, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.4907063197026025, | |
| "grad_norm": 0.07813726856842436, | |
| "learning_rate": 2.3440947661520763e-06, | |
| "loss": 0.2801, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 4.495662949194548, | |
| "grad_norm": 0.07746692349365746, | |
| "learning_rate": 2.297434834828094e-06, | |
| "loss": 0.281, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 4.500619578686493, | |
| "grad_norm": 0.07816724100489567, | |
| "learning_rate": 2.2512302409284724e-06, | |
| "loss": 0.286, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 4.5055762081784385, | |
| "grad_norm": 0.07670669793398054, | |
| "learning_rate": 2.2054815424704447e-06, | |
| "loss": 0.2834, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 4.510532837670384, | |
| "grad_norm": 0.07813664769713757, | |
| "learning_rate": 2.1601892919653223e-06, | |
| "loss": 0.2833, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.51548946716233, | |
| "grad_norm": 0.07854983906424601, | |
| "learning_rate": 2.1153540364118895e-06, | |
| "loss": 0.2864, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 4.520446096654275, | |
| "grad_norm": 0.07823949746195673, | |
| "learning_rate": 2.0709763172897366e-06, | |
| "loss": 0.2829, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 4.52540272614622, | |
| "grad_norm": 0.07798016395999526, | |
| "learning_rate": 2.027056670552767e-06, | |
| "loss": 0.2865, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 4.530359355638166, | |
| "grad_norm": 0.07835829227806566, | |
| "learning_rate": 1.9835956266226564e-06, | |
| "loss": 0.281, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 4.5353159851301115, | |
| "grad_norm": 0.07473839377544324, | |
| "learning_rate": 1.9405937103825323e-06, | |
| "loss": 0.2794, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.540272614622057, | |
| "grad_norm": 0.07424740849428764, | |
| "learning_rate": 1.8980514411705764e-06, | |
| "loss": 0.2829, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 4.545229244114003, | |
| "grad_norm": 0.07948180946158458, | |
| "learning_rate": 1.8559693327737881e-06, | |
| "loss": 0.2872, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 4.5501858736059475, | |
| "grad_norm": 0.07490654186327057, | |
| "learning_rate": 1.814347893421733e-06, | |
| "loss": 0.2815, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 4.555142503097893, | |
| "grad_norm": 0.07699939647242027, | |
| "learning_rate": 1.7731876257804436e-06, | |
| "loss": 0.284, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 4.560099132589839, | |
| "grad_norm": 0.07804147212637814, | |
| "learning_rate": 1.7324890269463513e-06, | |
| "loss": 0.2814, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.565055762081784, | |
| "grad_norm": 0.07825477347641324, | |
| "learning_rate": 1.692252588440262e-06, | |
| "loss": 0.2845, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 4.57001239157373, | |
| "grad_norm": 0.07437512487598033, | |
| "learning_rate": 1.6524787962014244e-06, | |
| "loss": 0.2798, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 4.574969021065676, | |
| "grad_norm": 0.07557314641029611, | |
| "learning_rate": 1.6131681305816637e-06, | |
| "loss": 0.2873, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 4.5799256505576205, | |
| "grad_norm": 0.07775126489770805, | |
| "learning_rate": 1.5743210663395813e-06, | |
| "loss": 0.2847, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.584882280049566, | |
| "grad_norm": 0.07766485367642273, | |
| "learning_rate": 1.5359380726348394e-06, | |
| "loss": 0.2821, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.589838909541512, | |
| "grad_norm": 0.07875944113674188, | |
| "learning_rate": 1.49801961302245e-06, | |
| "loss": 0.2812, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.594795539033457, | |
| "grad_norm": 0.07557815096974223, | |
| "learning_rate": 1.4605661454472153e-06, | |
| "loss": 0.2827, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 4.599752168525403, | |
| "grad_norm": 0.07912225031104035, | |
| "learning_rate": 1.4235781222381895e-06, | |
| "loss": 0.284, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.604708798017349, | |
| "grad_norm": 0.07482374187052634, | |
| "learning_rate": 1.3870559901031987e-06, | |
| "loss": 0.2862, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 4.609665427509293, | |
| "grad_norm": 0.07746410057512453, | |
| "learning_rate": 1.3510001901234725e-06, | |
| "loss": 0.2878, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.614622057001239, | |
| "grad_norm": 0.07418347712699255, | |
| "learning_rate": 1.315411157748301e-06, | |
| "loss": 0.2831, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 4.619578686493185, | |
| "grad_norm": 0.07796826236037331, | |
| "learning_rate": 1.2802893227897672e-06, | |
| "loss": 0.2907, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 4.62453531598513, | |
| "grad_norm": 0.07337212609853155, | |
| "learning_rate": 1.2456351094175756e-06, | |
| "loss": 0.2814, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 4.629491945477076, | |
| "grad_norm": 0.07760397710295686, | |
| "learning_rate": 1.2114489361539205e-06, | |
| "loss": 0.2819, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 4.634448574969021, | |
| "grad_norm": 0.07402370372538353, | |
| "learning_rate": 1.1777312158684339e-06, | |
| "loss": 0.2866, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 4.639405204460966, | |
| "grad_norm": 0.07319655425900132, | |
| "learning_rate": 1.1444823557731887e-06, | |
| "loss": 0.2826, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 4.644361833952912, | |
| "grad_norm": 0.08008983797140948, | |
| "learning_rate": 1.1117027574177918e-06, | |
| "loss": 0.2842, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 4.649318463444858, | |
| "grad_norm": 0.07566854910903925, | |
| "learning_rate": 1.0793928166845436e-06, | |
| "loss": 0.2811, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 4.654275092936803, | |
| "grad_norm": 0.07349105620187181, | |
| "learning_rate": 1.0475529237836325e-06, | |
| "loss": 0.2845, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 4.659231722428748, | |
| "grad_norm": 0.07639825926061859, | |
| "learning_rate": 1.0161834632484368e-06, | |
| "loss": 0.2823, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.664188351920694, | |
| "grad_norm": 0.07596374087858862, | |
| "learning_rate": 9.8528481393088e-07, | |
| "loss": 0.282, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 4.669144981412639, | |
| "grad_norm": 0.07448678341002254, | |
| "learning_rate": 9.54857348996856e-07, | |
| "loss": 0.2899, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 4.674101610904585, | |
| "grad_norm": 0.07508881677292457, | |
| "learning_rate": 9.249014359217256e-07, | |
| "loss": 0.2853, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 4.679058240396531, | |
| "grad_norm": 0.07395798034861722, | |
| "learning_rate": 8.954174364858548e-07, | |
| "loss": 0.2832, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 4.684014869888475, | |
| "grad_norm": 0.07501163398901284, | |
| "learning_rate": 8.664057067702924e-07, | |
| "loss": 0.2815, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 4.688971499380421, | |
| "grad_norm": 0.07709895170244532, | |
| "learning_rate": 8.378665971524147e-07, | |
| "loss": 0.2855, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 4.693928128872367, | |
| "grad_norm": 0.07216153858578365, | |
| "learning_rate": 8.098004523017367e-07, | |
| "loss": 0.2825, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 4.698884758364312, | |
| "grad_norm": 0.0714764773582895, | |
| "learning_rate": 7.822076111757205e-07, | |
| "loss": 0.2803, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 4.703841387856258, | |
| "grad_norm": 0.07349501739897359, | |
| "learning_rate": 7.55088407015716e-07, | |
| "loss": 0.283, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 4.708798017348204, | |
| "grad_norm": 0.07253316663670173, | |
| "learning_rate": 7.284431673428937e-07, | |
| "loss": 0.288, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.713754646840148, | |
| "grad_norm": 0.07338883975150082, | |
| "learning_rate": 7.022722139543225e-07, | |
| "loss": 0.2834, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 4.718711276332094, | |
| "grad_norm": 0.07911708566826511, | |
| "learning_rate": 6.765758629190578e-07, | |
| "loss": 0.2866, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 4.72366790582404, | |
| "grad_norm": 0.07502515318559184, | |
| "learning_rate": 6.513544245743575e-07, | |
| "loss": 0.2856, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 4.728624535315985, | |
| "grad_norm": 0.07421022982739711, | |
| "learning_rate": 6.26608203521899e-07, | |
| "loss": 0.2831, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 4.733581164807931, | |
| "grad_norm": 0.07497763527285664, | |
| "learning_rate": 6.023374986241193e-07, | |
| "loss": 0.2809, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 4.7385377942998765, | |
| "grad_norm": 0.07670570586677927, | |
| "learning_rate": 5.785426030006091e-07, | |
| "loss": 0.2856, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 4.743494423791821, | |
| "grad_norm": 0.07480580777663845, | |
| "learning_rate": 5.552238040245516e-07, | |
| "loss": 0.2821, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 4.748451053283767, | |
| "grad_norm": 0.07308664128147804, | |
| "learning_rate": 5.323813833192848e-07, | |
| "loss": 0.2821, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 4.753407682775713, | |
| "grad_norm": 0.07166740014206943, | |
| "learning_rate": 5.100156167548642e-07, | |
| "loss": 0.2857, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 4.758364312267658, | |
| "grad_norm": 0.0750156333319193, | |
| "learning_rate": 4.881267744447548e-07, | |
| "loss": 0.2807, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.763320941759604, | |
| "grad_norm": 0.0760232819168498, | |
| "learning_rate": 4.6671512074256686e-07, | |
| "loss": 0.2835, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 4.768277571251549, | |
| "grad_norm": 0.07186700445060604, | |
| "learning_rate": 4.4578091423885804e-07, | |
| "loss": 0.2851, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 4.773234200743494, | |
| "grad_norm": 0.0699929775880697, | |
| "learning_rate": 4.253244077580032e-07, | |
| "loss": 0.2887, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 4.77819083023544, | |
| "grad_norm": 0.07177034127350093, | |
| "learning_rate": 4.05345848355152e-07, | |
| "loss": 0.2814, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 4.7831474597273855, | |
| "grad_norm": 0.07318619478686327, | |
| "learning_rate": 3.858454773132492e-07, | |
| "loss": 0.2851, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 4.788104089219331, | |
| "grad_norm": 0.07196381092321871, | |
| "learning_rate": 3.66823530140108e-07, | |
| "loss": 0.2806, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 4.793060718711276, | |
| "grad_norm": 0.07269362276261772, | |
| "learning_rate": 3.4828023656557687e-07, | |
| "loss": 0.2848, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 4.798017348203222, | |
| "grad_norm": 0.07207708326827385, | |
| "learning_rate": 3.302158205387507e-07, | |
| "loss": 0.2826, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 4.802973977695167, | |
| "grad_norm": 0.07133213853387121, | |
| "learning_rate": 3.1263050022528385e-07, | |
| "loss": 0.2805, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 4.807930607187113, | |
| "grad_norm": 0.07251252362084308, | |
| "learning_rate": 2.955244880047392e-07, | |
| "loss": 0.2839, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.8128872366790585, | |
| "grad_norm": 0.070298461918812, | |
| "learning_rate": 2.7889799046803446e-07, | |
| "loss": 0.2781, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 4.817843866171003, | |
| "grad_norm": 0.07430381244740472, | |
| "learning_rate": 2.6275120841495083e-07, | |
| "loss": 0.2837, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 4.822800495662949, | |
| "grad_norm": 0.06981369751740535, | |
| "learning_rate": 2.4708433685169064e-07, | |
| "loss": 0.2809, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 4.8277571251548945, | |
| "grad_norm": 0.07352139801671918, | |
| "learning_rate": 2.31897564988528e-07, | |
| "loss": 0.2868, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 4.83271375464684, | |
| "grad_norm": 0.07432448801089282, | |
| "learning_rate": 2.1719107623753955e-07, | |
| "loss": 0.2797, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 4.837670384138786, | |
| "grad_norm": 0.07063237991275789, | |
| "learning_rate": 2.0296504821037067e-07, | |
| "loss": 0.2865, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 4.8426270136307314, | |
| "grad_norm": 0.07228098025327327, | |
| "learning_rate": 1.8921965271610387e-07, | |
| "loss": 0.2818, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 4.847583643122676, | |
| "grad_norm": 0.07637299443039416, | |
| "learning_rate": 1.759550557591716e-07, | |
| "loss": 0.283, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 4.852540272614622, | |
| "grad_norm": 0.07208686982109491, | |
| "learning_rate": 1.6317141753735334e-07, | |
| "loss": 0.2812, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 4.8574969021065675, | |
| "grad_norm": 0.07114579990275795, | |
| "learning_rate": 1.5086889243985715e-07, | |
| "loss": 0.2805, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.862453531598513, | |
| "grad_norm": 0.07434434344635836, | |
| "learning_rate": 1.390476290454279e-07, | |
| "loss": 0.284, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 4.867410161090459, | |
| "grad_norm": 0.07430905888830036, | |
| "learning_rate": 1.2770777012057978e-07, | |
| "loss": 0.2862, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 4.872366790582404, | |
| "grad_norm": 0.07079802955341113, | |
| "learning_rate": 1.1684945261785541e-07, | |
| "loss": 0.2831, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 4.877323420074349, | |
| "grad_norm": 0.07119983463830963, | |
| "learning_rate": 1.064728076741739e-07, | |
| "loss": 0.2838, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 4.882280049566295, | |
| "grad_norm": 0.07152021858357932, | |
| "learning_rate": 9.657796060925429e-08, | |
| "loss": 0.2854, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.88723667905824, | |
| "grad_norm": 0.0727452791489002, | |
| "learning_rate": 8.716503092409679e-08, | |
| "loss": 0.2842, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 4.892193308550186, | |
| "grad_norm": 0.07330856909647866, | |
| "learning_rate": 7.823413229953058e-08, | |
| "loss": 0.2835, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 4.897149938042132, | |
| "grad_norm": 0.07217490209474922, | |
| "learning_rate": 6.97853725948594e-08, | |
| "loss": 0.2834, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 4.9021065675340765, | |
| "grad_norm": 0.07249892909698981, | |
| "learning_rate": 6.181885384656028e-08, | |
| "loss": 0.281, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 4.907063197026022, | |
| "grad_norm": 0.07439509854947902, | |
| "learning_rate": 5.43346722670135e-08, | |
| "loss": 0.2817, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.912019826517968, | |
| "grad_norm": 0.07152145735511466, | |
| "learning_rate": 4.733291824339237e-08, | |
| "loss": 0.2832, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 4.916976456009913, | |
| "grad_norm": 0.07325341342974692, | |
| "learning_rate": 4.0813676336539656e-08, | |
| "loss": 0.2844, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 4.921933085501859, | |
| "grad_norm": 0.0703233636733742, | |
| "learning_rate": 3.4777025279950635e-08, | |
| "loss": 0.2814, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 4.926889714993804, | |
| "grad_norm": 0.07009377049264516, | |
| "learning_rate": 2.9223037978822755e-08, | |
| "loss": 0.2838, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 4.931846344485749, | |
| "grad_norm": 0.0735843894676601, | |
| "learning_rate": 2.415178150918962e-08, | |
| "loss": 0.2838, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 4.936802973977695, | |
| "grad_norm": 0.07197069283776517, | |
| "learning_rate": 1.9563317117090585e-08, | |
| "loss": 0.2846, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 4.941759603469641, | |
| "grad_norm": 0.07023145383915408, | |
| "learning_rate": 1.545770021783799e-08, | |
| "loss": 0.2812, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 4.946716232961586, | |
| "grad_norm": 0.07391522734344161, | |
| "learning_rate": 1.1834980395359907e-08, | |
| "loss": 0.2829, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 4.951672862453531, | |
| "grad_norm": 0.07202531152737036, | |
| "learning_rate": 8.695201401578424e-09, | |
| "loss": 0.2786, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 4.956629491945477, | |
| "grad_norm": 0.07119570489216936, | |
| "learning_rate": 6.038401155903373e-09, | |
| "loss": 0.2809, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.961586121437422, | |
| "grad_norm": 0.07309534984608951, | |
| "learning_rate": 3.864611744757163e-09, | |
| "loss": 0.2808, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 4.966542750929368, | |
| "grad_norm": 0.07040039615225531, | |
| "learning_rate": 2.1738594212061816e-09, | |
| "loss": 0.2832, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.971499380421314, | |
| "grad_norm": 0.06981510822264225, | |
| "learning_rate": 9.661646046144057e-10, | |
| "loss": 0.2799, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 4.976456009913259, | |
| "grad_norm": 0.0715282791329122, | |
| "learning_rate": 2.415418804346814e-10, | |
| "loss": 0.2867, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.981412639405205, | |
| "grad_norm": 0.0720799754466833, | |
| "learning_rate": 0.0, | |
| "loss": 0.2831, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 4.981412639405205, | |
| "step": 1005, | |
| "total_flos": 2.069268270658722e+19, | |
| "train_loss": 0.35086224974684455, | |
| "train_runtime": 52973.0269, | |
| "train_samples_per_second": 9.749, | |
| "train_steps_per_second": 0.019 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1005, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.069268270658722e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |