| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.995135510771369, | |
| "eval_steps": 500, | |
| "global_step": 895, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005559416261292564, | |
| "grad_norm": 7.9358195450351205, | |
| "learning_rate": 8.88888888888889e-07, | |
| "loss": 1.2622, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.011118832522585128, | |
| "grad_norm": 7.942651735525788, | |
| "learning_rate": 1.777777777777778e-06, | |
| "loss": 1.2714, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01667824878387769, | |
| "grad_norm": 7.8124967595464785, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.2541, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.022237665045170257, | |
| "grad_norm": 7.377301625314602, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 1.258, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02779708130646282, | |
| "grad_norm": 5.956675332171705, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 1.2091, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03335649756775538, | |
| "grad_norm": 3.181434103411455, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.1656, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03891591382904795, | |
| "grad_norm": 2.540442378830216, | |
| "learning_rate": 6.222222222222223e-06, | |
| "loss": 1.1705, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04447533009034051, | |
| "grad_norm": 6.203334552578263, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 1.1541, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05003474635163308, | |
| "grad_norm": 6.731756614308619, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.1547, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05559416261292564, | |
| "grad_norm": 6.247745131639911, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 1.1347, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.061153578874218205, | |
| "grad_norm": 6.29730189833508, | |
| "learning_rate": 9.777777777777779e-06, | |
| "loss": 1.1331, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06671299513551077, | |
| "grad_norm": 5.132108221825742, | |
| "learning_rate": 1.0666666666666667e-05, | |
| "loss": 1.117, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07227241139680333, | |
| "grad_norm": 3.2550227184562375, | |
| "learning_rate": 1.1555555555555556e-05, | |
| "loss": 1.0767, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0778318276580959, | |
| "grad_norm": 2.294739996569744, | |
| "learning_rate": 1.2444444444444446e-05, | |
| "loss": 1.0591, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08339124391938846, | |
| "grad_norm": 2.257537007260575, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.0411, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08895066018068103, | |
| "grad_norm": 1.919991827155782, | |
| "learning_rate": 1.4222222222222224e-05, | |
| "loss": 1.0206, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0945100764419736, | |
| "grad_norm": 1.543897719994616, | |
| "learning_rate": 1.5111111111111112e-05, | |
| "loss": 1.0078, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.10006949270326616, | |
| "grad_norm": 1.2679636773521212, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.9897, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10562890896455872, | |
| "grad_norm": 1.180397863768546, | |
| "learning_rate": 1.688888888888889e-05, | |
| "loss": 0.9982, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.11118832522585129, | |
| "grad_norm": 0.9555181178645678, | |
| "learning_rate": 1.7777777777777777e-05, | |
| "loss": 0.9708, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11674774148714386, | |
| "grad_norm": 1.0269402752613572, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 0.9798, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.12230715774843641, | |
| "grad_norm": 1.0708000870410468, | |
| "learning_rate": 1.9555555555555557e-05, | |
| "loss": 0.9699, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12786657400972898, | |
| "grad_norm": 1.0055337623395493, | |
| "learning_rate": 2.0444444444444446e-05, | |
| "loss": 0.9427, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.13342599027102153, | |
| "grad_norm": 0.9229760705571243, | |
| "learning_rate": 2.1333333333333335e-05, | |
| "loss": 0.9471, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13898540653231412, | |
| "grad_norm": 1.0000200092904565, | |
| "learning_rate": 2.2222222222222227e-05, | |
| "loss": 0.9358, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14454482279360667, | |
| "grad_norm": 0.9339129127635016, | |
| "learning_rate": 2.3111111111111112e-05, | |
| "loss": 0.9482, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.15010423905489922, | |
| "grad_norm": 1.4607014103386948, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.9356, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.1556636553161918, | |
| "grad_norm": 0.8421272457585499, | |
| "learning_rate": 2.4888888888888893e-05, | |
| "loss": 0.9225, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.16122307157748436, | |
| "grad_norm": 1.1844488591141227, | |
| "learning_rate": 2.577777777777778e-05, | |
| "loss": 0.9203, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16678248783877692, | |
| "grad_norm": 1.2189146868730585, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.927, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1723419041000695, | |
| "grad_norm": 0.883273254038553, | |
| "learning_rate": 2.755555555555556e-05, | |
| "loss": 0.9089, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17790132036136205, | |
| "grad_norm": 1.3537906784503, | |
| "learning_rate": 2.8444444444444447e-05, | |
| "loss": 0.8963, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1834607366226546, | |
| "grad_norm": 0.8379832204564226, | |
| "learning_rate": 2.9333333333333333e-05, | |
| "loss": 0.8902, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1890201528839472, | |
| "grad_norm": 1.0943609442451472, | |
| "learning_rate": 3.0222222222222225e-05, | |
| "loss": 0.9132, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.19457956914523974, | |
| "grad_norm": 1.317940648780496, | |
| "learning_rate": 3.111111111111112e-05, | |
| "loss": 0.8984, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.20013898540653233, | |
| "grad_norm": 0.8848172619144172, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.9093, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.20569840166782488, | |
| "grad_norm": 1.8109414393395322, | |
| "learning_rate": 3.288888888888889e-05, | |
| "loss": 0.9033, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.21125781792911744, | |
| "grad_norm": 1.2554883307684124, | |
| "learning_rate": 3.377777777777778e-05, | |
| "loss": 0.9143, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.21681723419041002, | |
| "grad_norm": 1.3729869755187412, | |
| "learning_rate": 3.466666666666667e-05, | |
| "loss": 0.9062, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.22237665045170257, | |
| "grad_norm": 1.3452250001878445, | |
| "learning_rate": 3.555555555555555e-05, | |
| "loss": 0.897, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22793606671299513, | |
| "grad_norm": 1.4414798342602804, | |
| "learning_rate": 3.644444444444445e-05, | |
| "loss": 0.8827, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.2334954829742877, | |
| "grad_norm": 1.1308801320597703, | |
| "learning_rate": 3.733333333333334e-05, | |
| "loss": 0.8888, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.23905489923558026, | |
| "grad_norm": 1.5239027018360913, | |
| "learning_rate": 3.8222222222222226e-05, | |
| "loss": 0.8797, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.24461431549687282, | |
| "grad_norm": 1.712956853860375, | |
| "learning_rate": 3.9111111111111115e-05, | |
| "loss": 0.8739, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.2501737317581654, | |
| "grad_norm": 0.9900656403191864, | |
| "learning_rate": 4e-05, | |
| "loss": 0.8806, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.25573314801945796, | |
| "grad_norm": 1.6460944571950658, | |
| "learning_rate": 4.088888888888889e-05, | |
| "loss": 0.8763, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.26129256428075054, | |
| "grad_norm": 1.2766480112920402, | |
| "learning_rate": 4.177777777777778e-05, | |
| "loss": 0.8736, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.26685198054204307, | |
| "grad_norm": 1.3233659252241343, | |
| "learning_rate": 4.266666666666667e-05, | |
| "loss": 0.8542, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.27241139680333565, | |
| "grad_norm": 1.634661434972772, | |
| "learning_rate": 4.355555555555556e-05, | |
| "loss": 0.8695, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.27797081306462823, | |
| "grad_norm": 0.8670372531726727, | |
| "learning_rate": 4.444444444444445e-05, | |
| "loss": 0.8671, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.28353022932592076, | |
| "grad_norm": 1.8672796712712416, | |
| "learning_rate": 4.5333333333333335e-05, | |
| "loss": 0.8745, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.28908964558721334, | |
| "grad_norm": 1.1637792401354623, | |
| "learning_rate": 4.6222222222222224e-05, | |
| "loss": 0.8571, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2946490618485059, | |
| "grad_norm": 2.2610360534030116, | |
| "learning_rate": 4.711111111111112e-05, | |
| "loss": 0.8766, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.30020847810979845, | |
| "grad_norm": 1.996143461729675, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.8775, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.30576789437109103, | |
| "grad_norm": 1.4081005708876793, | |
| "learning_rate": 4.88888888888889e-05, | |
| "loss": 0.8662, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3113273106323836, | |
| "grad_norm": 3.134853339618935, | |
| "learning_rate": 4.9777777777777785e-05, | |
| "loss": 0.8539, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.31688672689367614, | |
| "grad_norm": 1.9084219748530682, | |
| "learning_rate": 5.066666666666667e-05, | |
| "loss": 0.8798, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3224461431549687, | |
| "grad_norm": 3.5153547411633674, | |
| "learning_rate": 5.155555555555556e-05, | |
| "loss": 0.8634, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3280055594162613, | |
| "grad_norm": 2.6122587526657894, | |
| "learning_rate": 5.244444444444445e-05, | |
| "loss": 0.848, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.33356497567755383, | |
| "grad_norm": 2.975460445642716, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.8518, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3391243919388464, | |
| "grad_norm": 2.1222203908542396, | |
| "learning_rate": 5.422222222222223e-05, | |
| "loss": 0.874, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.344683808200139, | |
| "grad_norm": 2.796857287788194, | |
| "learning_rate": 5.511111111111112e-05, | |
| "loss": 0.8533, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3502432244614315, | |
| "grad_norm": 2.0600145849078917, | |
| "learning_rate": 5.6e-05, | |
| "loss": 0.8874, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3558026407227241, | |
| "grad_norm": 2.930616478920807, | |
| "learning_rate": 5.6888888888888895e-05, | |
| "loss": 0.872, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3613620569840167, | |
| "grad_norm": 1.999313770783877, | |
| "learning_rate": 5.777777777777778e-05, | |
| "loss": 0.8517, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3669214732453092, | |
| "grad_norm": 2.777297306851716, | |
| "learning_rate": 5.8666666666666665e-05, | |
| "loss": 0.8569, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3724808895066018, | |
| "grad_norm": 2.2731615451600593, | |
| "learning_rate": 5.955555555555556e-05, | |
| "loss": 0.8616, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3780403057678944, | |
| "grad_norm": 1.998295903749419, | |
| "learning_rate": 6.044444444444445e-05, | |
| "loss": 0.8543, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.38359972202918696, | |
| "grad_norm": 1.6203916734242618, | |
| "learning_rate": 6.133333333333334e-05, | |
| "loss": 0.8446, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3891591382904795, | |
| "grad_norm": 1.76915202879457, | |
| "learning_rate": 6.222222222222223e-05, | |
| "loss": 0.8576, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.39471855455177207, | |
| "grad_norm": 1.3152162421151332, | |
| "learning_rate": 6.311111111111112e-05, | |
| "loss": 0.8563, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.40027797081306465, | |
| "grad_norm": 1.3643468996208965, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.8426, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.4058373870743572, | |
| "grad_norm": 1.908069980328843, | |
| "learning_rate": 6.488888888888889e-05, | |
| "loss": 0.8531, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.41139680333564976, | |
| "grad_norm": 1.685214282010214, | |
| "learning_rate": 6.577777777777777e-05, | |
| "loss": 0.8441, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.41695621959694235, | |
| "grad_norm": 1.1524590649427782, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.8496, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4225156358582349, | |
| "grad_norm": 2.5121776075082476, | |
| "learning_rate": 6.755555555555557e-05, | |
| "loss": 0.8517, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.42807505211952745, | |
| "grad_norm": 2.010450049558264, | |
| "learning_rate": 6.844444444444445e-05, | |
| "loss": 0.8621, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.43363446838082004, | |
| "grad_norm": 1.6558455850767062, | |
| "learning_rate": 6.933333333333334e-05, | |
| "loss": 0.8535, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.43919388464211256, | |
| "grad_norm": 2.0116967221199697, | |
| "learning_rate": 7.022222222222222e-05, | |
| "loss": 0.8534, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.44475330090340515, | |
| "grad_norm": 1.2758552339008475, | |
| "learning_rate": 7.11111111111111e-05, | |
| "loss": 0.8475, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.45031271716469773, | |
| "grad_norm": 2.171298513799551, | |
| "learning_rate": 7.2e-05, | |
| "loss": 0.8557, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.45587213342599026, | |
| "grad_norm": 1.492569972617151, | |
| "learning_rate": 7.28888888888889e-05, | |
| "loss": 0.8599, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.46143154968728284, | |
| "grad_norm": 1.8374241836084444, | |
| "learning_rate": 7.377777777777779e-05, | |
| "loss": 0.8585, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4669909659485754, | |
| "grad_norm": 1.7493375110413492, | |
| "learning_rate": 7.466666666666667e-05, | |
| "loss": 0.8403, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.47255038220986795, | |
| "grad_norm": 1.1188241074603504, | |
| "learning_rate": 7.555555555555556e-05, | |
| "loss": 0.8301, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.47810979847116053, | |
| "grad_norm": 2.1803527841540045, | |
| "learning_rate": 7.644444444444445e-05, | |
| "loss": 0.8579, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4836692147324531, | |
| "grad_norm": 2.605263047586671, | |
| "learning_rate": 7.733333333333333e-05, | |
| "loss": 0.862, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.48922863099374564, | |
| "grad_norm": 1.3879012695020032, | |
| "learning_rate": 7.822222222222223e-05, | |
| "loss": 0.8344, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4947880472550382, | |
| "grad_norm": 1.3012238789432584, | |
| "learning_rate": 7.911111111111112e-05, | |
| "loss": 0.8587, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5003474635163307, | |
| "grad_norm": 2.513237057025125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.868, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5059068797776234, | |
| "grad_norm": 1.8055394762212944, | |
| "learning_rate": 7.999969539471858e-05, | |
| "loss": 0.8533, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5114662960389159, | |
| "grad_norm": 1.5304364449758314, | |
| "learning_rate": 7.999878158351353e-05, | |
| "loss": 0.8452, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5170257123002084, | |
| "grad_norm": 2.1193756766009084, | |
| "learning_rate": 7.999725858030245e-05, | |
| "loss": 0.864, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5225851285615011, | |
| "grad_norm": 1.2380452088121536, | |
| "learning_rate": 7.999512640828105e-05, | |
| "loss": 0.8458, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5281445448227936, | |
| "grad_norm": 2.058829749102372, | |
| "learning_rate": 7.999238509992291e-05, | |
| "loss": 0.8558, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5337039610840861, | |
| "grad_norm": 1.6580103531528805, | |
| "learning_rate": 7.998903469697887e-05, | |
| "loss": 0.8646, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5392633773453788, | |
| "grad_norm": 1.5955718716301766, | |
| "learning_rate": 7.998507525047644e-05, | |
| "loss": 0.8436, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5448227936066713, | |
| "grad_norm": 1.4398145473558983, | |
| "learning_rate": 7.998050682071902e-05, | |
| "loss": 0.8497, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5503822098679638, | |
| "grad_norm": 1.3793681111087452, | |
| "learning_rate": 7.997532947728505e-05, | |
| "loss": 0.8381, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5559416261292565, | |
| "grad_norm": 1.4240010090800914, | |
| "learning_rate": 7.99695432990268e-05, | |
| "loss": 0.8624, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.561501042390549, | |
| "grad_norm": 1.9629255774107022, | |
| "learning_rate": 7.99631483740693e-05, | |
| "loss": 0.8387, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5670604586518415, | |
| "grad_norm": 1.177962963776449, | |
| "learning_rate": 7.995614479980896e-05, | |
| "loss": 0.845, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5726198749131342, | |
| "grad_norm": 1.5104993823032296, | |
| "learning_rate": 7.994853268291205e-05, | |
| "loss": 0.8377, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5781792911744267, | |
| "grad_norm": 1.3395795088646747, | |
| "learning_rate": 7.994031213931312e-05, | |
| "loss": 0.8435, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5837387074357192, | |
| "grad_norm": 1.5424073475176412, | |
| "learning_rate": 7.993148329421323e-05, | |
| "loss": 0.8447, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5892981236970118, | |
| "grad_norm": 1.944768632226559, | |
| "learning_rate": 7.992204628207802e-05, | |
| "loss": 0.846, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5948575399583044, | |
| "grad_norm": 1.1063119498625391, | |
| "learning_rate": 7.991200124663568e-05, | |
| "loss": 0.8304, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6004169562195969, | |
| "grad_norm": 2.3248288094528635, | |
| "learning_rate": 7.990134834087473e-05, | |
| "loss": 0.8473, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.6059763724808895, | |
| "grad_norm": 1.402163201884845, | |
| "learning_rate": 7.989008772704177e-05, | |
| "loss": 0.8383, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6115357887421821, | |
| "grad_norm": 1.7492854215090492, | |
| "learning_rate": 7.987821957663889e-05, | |
| "loss": 0.8505, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6170952050034746, | |
| "grad_norm": 1.2887514260164146, | |
| "learning_rate": 7.986574407042118e-05, | |
| "loss": 0.8327, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6226546212647672, | |
| "grad_norm": 1.307242590555259, | |
| "learning_rate": 7.985266139839389e-05, | |
| "loss": 0.8525, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6282140375260598, | |
| "grad_norm": 1.6231277109038205, | |
| "learning_rate": 7.983897175980957e-05, | |
| "loss": 0.8385, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6337734537873523, | |
| "grad_norm": 1.3006915329623785, | |
| "learning_rate": 7.982467536316502e-05, | |
| "loss": 0.8359, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6393328700486449, | |
| "grad_norm": 1.7752541708560459, | |
| "learning_rate": 7.980977242619814e-05, | |
| "loss": 0.8497, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6448922863099374, | |
| "grad_norm": 0.963970001633594, | |
| "learning_rate": 7.97942631758846e-05, | |
| "loss": 0.8348, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.65045170257123, | |
| "grad_norm": 1.8279125029062835, | |
| "learning_rate": 7.977814784843438e-05, | |
| "loss": 0.8291, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6560111188325226, | |
| "grad_norm": 1.1016041521938733, | |
| "learning_rate": 7.976142668928815e-05, | |
| "loss": 0.837, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6615705350938151, | |
| "grad_norm": 1.9123138383948801, | |
| "learning_rate": 7.97440999531136e-05, | |
| "loss": 0.8284, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6671299513551077, | |
| "grad_norm": 1.4770547180557922, | |
| "learning_rate": 7.972616790380151e-05, | |
| "loss": 0.8498, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6726893676164003, | |
| "grad_norm": 1.4177848951880787, | |
| "learning_rate": 7.970763081446172e-05, | |
| "loss": 0.834, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6782487838776928, | |
| "grad_norm": 1.4568355456555888, | |
| "learning_rate": 7.968848896741896e-05, | |
| "loss": 0.8436, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6838082001389854, | |
| "grad_norm": 1.417327263712158, | |
| "learning_rate": 7.966874265420866e-05, | |
| "loss": 0.8416, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.689367616400278, | |
| "grad_norm": 1.1607989065407163, | |
| "learning_rate": 7.964839217557237e-05, | |
| "loss": 0.8487, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6949270326615705, | |
| "grad_norm": 1.1655546055699306, | |
| "learning_rate": 7.962743784145323e-05, | |
| "loss": 0.8309, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.700486448922863, | |
| "grad_norm": 1.6283722594753118, | |
| "learning_rate": 7.960587997099132e-05, | |
| "loss": 0.8476, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.7060458651841557, | |
| "grad_norm": 0.855140445880346, | |
| "learning_rate": 7.958371889251868e-05, | |
| "loss": 0.8167, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7116052814454482, | |
| "grad_norm": 1.181067166006884, | |
| "learning_rate": 7.956095494355438e-05, | |
| "loss": 0.8273, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7171646977067407, | |
| "grad_norm": 1.3100507927777372, | |
| "learning_rate": 7.95375884707994e-05, | |
| "loss": 0.8438, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7227241139680334, | |
| "grad_norm": 1.6550084905324673, | |
| "learning_rate": 7.951361983013127e-05, | |
| "loss": 0.8377, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7282835302293259, | |
| "grad_norm": 1.2643405442051172, | |
| "learning_rate": 7.948904938659872e-05, | |
| "loss": 0.8449, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7338429464906184, | |
| "grad_norm": 1.2710601497668057, | |
| "learning_rate": 7.946387751441609e-05, | |
| "loss": 0.8404, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7394023627519111, | |
| "grad_norm": 1.4879557131368137, | |
| "learning_rate": 7.943810459695764e-05, | |
| "loss": 0.8242, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7449617790132036, | |
| "grad_norm": 1.1635532204185386, | |
| "learning_rate": 7.941173102675172e-05, | |
| "loss": 0.8153, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7505211952744962, | |
| "grad_norm": 1.2694310707160308, | |
| "learning_rate": 7.938475720547477e-05, | |
| "loss": 0.8372, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7560806115357888, | |
| "grad_norm": 1.071426149452144, | |
| "learning_rate": 7.93571835439452e-05, | |
| "loss": 0.8311, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7616400277970813, | |
| "grad_norm": 1.561376324467928, | |
| "learning_rate": 7.932901046211715e-05, | |
| "loss": 0.8182, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7671994440583739, | |
| "grad_norm": 0.9569705113881821, | |
| "learning_rate": 7.930023838907411e-05, | |
| "loss": 0.8285, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7727588603196665, | |
| "grad_norm": 1.3769869224499816, | |
| "learning_rate": 7.927086776302235e-05, | |
| "loss": 0.8223, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.778318276580959, | |
| "grad_norm": 1.0661634612442863, | |
| "learning_rate": 7.924089903128425e-05, | |
| "loss": 0.821, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7838776928422516, | |
| "grad_norm": 1.7150494701801486, | |
| "learning_rate": 7.921033265029153e-05, | |
| "loss": 0.8318, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7894371091035441, | |
| "grad_norm": 1.1760603467329838, | |
| "learning_rate": 7.917916908557822e-05, | |
| "loss": 0.8141, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7949965253648367, | |
| "grad_norm": 1.195770766355331, | |
| "learning_rate": 7.914740881177365e-05, | |
| "loss": 0.8086, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.8005559416261293, | |
| "grad_norm": 1.0311554098100406, | |
| "learning_rate": 7.911505231259516e-05, | |
| "loss": 0.8082, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.8061153578874218, | |
| "grad_norm": 1.6912543359730465, | |
| "learning_rate": 7.908210008084082e-05, | |
| "loss": 0.8023, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8116747741487144, | |
| "grad_norm": 1.0813659299032288, | |
| "learning_rate": 7.904855261838179e-05, | |
| "loss": 0.8315, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.817234190410007, | |
| "grad_norm": 1.5581335270047423, | |
| "learning_rate": 7.901441043615479e-05, | |
| "loss": 0.8268, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8227936066712995, | |
| "grad_norm": 1.00596454320661, | |
| "learning_rate": 7.897967405415425e-05, | |
| "loss": 0.8386, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.828353022932592, | |
| "grad_norm": 1.144194212812256, | |
| "learning_rate": 7.894434400142447e-05, | |
| "loss": 0.8325, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8339124391938847, | |
| "grad_norm": 1.1625675356230636, | |
| "learning_rate": 7.890842081605148e-05, | |
| "loss": 0.8504, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8394718554551772, | |
| "grad_norm": 1.1297535406659176, | |
| "learning_rate": 7.887190504515485e-05, | |
| "loss": 0.8075, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8450312717164697, | |
| "grad_norm": 1.2053386499038343, | |
| "learning_rate": 7.883479724487946e-05, | |
| "loss": 0.8247, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8505906879777624, | |
| "grad_norm": 1.1106411060998684, | |
| "learning_rate": 7.879709798038685e-05, | |
| "loss": 0.8298, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8561501042390549, | |
| "grad_norm": 1.0971069205166395, | |
| "learning_rate": 7.875880782584683e-05, | |
| "loss": 0.8273, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8617095205003474, | |
| "grad_norm": 1.159414773344552, | |
| "learning_rate": 7.871992736442852e-05, | |
| "loss": 0.8326, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8672689367616401, | |
| "grad_norm": 1.4910863801494305, | |
| "learning_rate": 7.868045718829166e-05, | |
| "loss": 0.8263, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8728283530229326, | |
| "grad_norm": 0.8170303946123546, | |
| "learning_rate": 7.864039789857743e-05, | |
| "loss": 0.8193, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8783877692842251, | |
| "grad_norm": 0.8610983641895286, | |
| "learning_rate": 7.859975010539937e-05, | |
| "loss": 0.8119, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8839471855455178, | |
| "grad_norm": 0.9841408044834976, | |
| "learning_rate": 7.855851442783414e-05, | |
| "loss": 0.8291, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8895066018068103, | |
| "grad_norm": 1.9526023275945634, | |
| "learning_rate": 7.851669149391198e-05, | |
| "loss": 0.8103, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8950660180681028, | |
| "grad_norm": 0.7191259063946382, | |
| "learning_rate": 7.847428194060722e-05, | |
| "loss": 0.8214, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.9006254343293955, | |
| "grad_norm": 1.8842244115014957, | |
| "learning_rate": 7.843128641382856e-05, | |
| "loss": 0.8198, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.906184850590688, | |
| "grad_norm": 1.0514604879384255, | |
| "learning_rate": 7.838770556840923e-05, | |
| "loss": 0.821, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.9117442668519805, | |
| "grad_norm": 1.6003009095500897, | |
| "learning_rate": 7.834354006809699e-05, | |
| "loss": 0.8396, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.9173036831132731, | |
| "grad_norm": 1.2604980474829044, | |
| "learning_rate": 7.829879058554411e-05, | |
| "loss": 0.8169, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9228630993745657, | |
| "grad_norm": 1.3754424425834815, | |
| "learning_rate": 7.8253457802297e-05, | |
| "loss": 0.8248, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.9284225156358582, | |
| "grad_norm": 1.2970707388137186, | |
| "learning_rate": 7.820754240878593e-05, | |
| "loss": 0.8242, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9339819318971508, | |
| "grad_norm": 1.1094402817946039, | |
| "learning_rate": 7.816104510431446e-05, | |
| "loss": 0.821, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9395413481584434, | |
| "grad_norm": 1.0619969692986742, | |
| "learning_rate": 7.811396659704884e-05, | |
| "loss": 0.7994, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9451007644197359, | |
| "grad_norm": 1.268154938816547, | |
| "learning_rate": 7.806630760400712e-05, | |
| "loss": 0.8231, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9506601806810285, | |
| "grad_norm": 0.8114888834391076, | |
| "learning_rate": 7.80180688510484e-05, | |
| "loss": 0.8138, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9562195969423211, | |
| "grad_norm": 1.1353293038885697, | |
| "learning_rate": 7.79692510728616e-05, | |
| "loss": 0.8363, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9617790132036136, | |
| "grad_norm": 1.1710537031908355, | |
| "learning_rate": 7.791985501295437e-05, | |
| "loss": 0.8124, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9673384294649062, | |
| "grad_norm": 0.9925849663427073, | |
| "learning_rate": 7.786988142364175e-05, | |
| "loss": 0.8062, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9728978457261988, | |
| "grad_norm": 0.9839146497835408, | |
| "learning_rate": 7.781933106603472e-05, | |
| "loss": 0.8202, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9784572619874913, | |
| "grad_norm": 1.4171846242465553, | |
| "learning_rate": 7.776820471002856e-05, | |
| "loss": 0.7936, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9840166782487839, | |
| "grad_norm": 0.7277915858989548, | |
| "learning_rate": 7.771650313429117e-05, | |
| "loss": 0.8091, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9895760945100764, | |
| "grad_norm": 0.8641953694944955, | |
| "learning_rate": 7.76642271262512e-05, | |
| "loss": 0.8146, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.995135510771369, | |
| "grad_norm": 1.334464922937482, | |
| "learning_rate": 7.761137748208606e-05, | |
| "loss": 0.7959, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.0006949270326615, | |
| "grad_norm": 0.8723143516212564, | |
| "learning_rate": 7.75579550067098e-05, | |
| "loss": 0.8144, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0062543432939541, | |
| "grad_norm": 0.9676877145889436, | |
| "learning_rate": 7.750396051376082e-05, | |
| "loss": 0.7972, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.0118137595552468, | |
| "grad_norm": 0.8078199257078474, | |
| "learning_rate": 7.74493948255895e-05, | |
| "loss": 0.7873, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.0173731758165392, | |
| "grad_norm": 0.9618995502446188, | |
| "learning_rate": 7.739425877324567e-05, | |
| "loss": 0.806, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.0229325920778318, | |
| "grad_norm": 1.1903105794790574, | |
| "learning_rate": 7.733855319646598e-05, | |
| "loss": 0.8063, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.0284920083391245, | |
| "grad_norm": 1.113260237892017, | |
| "learning_rate": 7.72822789436611e-05, | |
| "loss": 0.8055, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0340514246004169, | |
| "grad_norm": 1.2466501606809175, | |
| "learning_rate": 7.722543687190271e-05, | |
| "loss": 0.8, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.0396108408617095, | |
| "grad_norm": 0.8935894806107648, | |
| "learning_rate": 7.716802784691061e-05, | |
| "loss": 0.7886, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.0451702571230022, | |
| "grad_norm": 1.0645110228278147, | |
| "learning_rate": 7.711005274303939e-05, | |
| "loss": 0.8025, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0507296733842946, | |
| "grad_norm": 0.655899538337792, | |
| "learning_rate": 7.70515124432652e-05, | |
| "loss": 0.7939, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.0562890896455872, | |
| "grad_norm": 0.7149867223741726, | |
| "learning_rate": 7.699240783917226e-05, | |
| "loss": 0.7819, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0618485059068798, | |
| "grad_norm": 0.9227738003310058, | |
| "learning_rate": 7.693273983093932e-05, | |
| "loss": 0.7815, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0674079221681723, | |
| "grad_norm": 1.1743470172337884, | |
| "learning_rate": 7.687250932732587e-05, | |
| "loss": 0.8034, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.072967338429465, | |
| "grad_norm": 1.1326402294789017, | |
| "learning_rate": 7.681171724565841e-05, | |
| "loss": 0.8039, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0785267546907575, | |
| "grad_norm": 0.981410769406106, | |
| "learning_rate": 7.675036451181638e-05, | |
| "loss": 0.7856, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.08408617095205, | |
| "grad_norm": 0.8619461846062771, | |
| "learning_rate": 7.668845206021812e-05, | |
| "loss": 0.7938, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0896455872133426, | |
| "grad_norm": 1.0839380849451241, | |
| "learning_rate": 7.662598083380664e-05, | |
| "loss": 0.7848, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0952050034746352, | |
| "grad_norm": 1.0392746014030274, | |
| "learning_rate": 7.656295178403519e-05, | |
| "loss": 0.7911, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.1007644197359276, | |
| "grad_norm": 1.0359884367143106, | |
| "learning_rate": 7.649936587085285e-05, | |
| "loss": 0.7861, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.1063238359972203, | |
| "grad_norm": 1.0028428027595355, | |
| "learning_rate": 7.643522406268985e-05, | |
| "loss": 0.7883, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.111883252258513, | |
| "grad_norm": 0.8262483797533237, | |
| "learning_rate": 7.63705273364429e-05, | |
| "loss": 0.7801, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1174426685198053, | |
| "grad_norm": 0.793966822779039, | |
| "learning_rate": 7.630527667746022e-05, | |
| "loss": 0.7808, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.123002084781098, | |
| "grad_norm": 0.5841255495857546, | |
| "learning_rate": 7.623947307952655e-05, | |
| "loss": 0.7875, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.1285615010423906, | |
| "grad_norm": 0.641770336653931, | |
| "learning_rate": 7.617311754484809e-05, | |
| "loss": 0.7769, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.134120917303683, | |
| "grad_norm": 0.49349990660077014, | |
| "learning_rate": 7.610621108403714e-05, | |
| "loss": 0.7812, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1396803335649757, | |
| "grad_norm": 0.7063714078210745, | |
| "learning_rate": 7.603875471609677e-05, | |
| "loss": 0.7771, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.1452397498262683, | |
| "grad_norm": 0.6252197448112845, | |
| "learning_rate": 7.59707494684053e-05, | |
| "loss": 0.7807, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1507991660875607, | |
| "grad_norm": 0.5127771372908517, | |
| "learning_rate": 7.590219637670055e-05, | |
| "loss": 0.7707, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.1563585823488534, | |
| "grad_norm": 0.7846508910498406, | |
| "learning_rate": 7.583309648506429e-05, | |
| "loss": 0.7866, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.161917998610146, | |
| "grad_norm": 0.6435801852453142, | |
| "learning_rate": 7.576345084590606e-05, | |
| "loss": 0.7807, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1674774148714384, | |
| "grad_norm": 0.6129447763782494, | |
| "learning_rate": 7.569326051994735e-05, | |
| "loss": 0.7947, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.173036831132731, | |
| "grad_norm": 0.7393929046755997, | |
| "learning_rate": 7.562252657620536e-05, | |
| "loss": 0.7898, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1785962473940237, | |
| "grad_norm": 0.7875515383471235, | |
| "learning_rate": 7.555125009197675e-05, | |
| "loss": 0.7749, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.184155663655316, | |
| "grad_norm": 0.8502781736276134, | |
| "learning_rate": 7.547943215282116e-05, | |
| "loss": 0.796, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1897150799166087, | |
| "grad_norm": 0.9381926799528068, | |
| "learning_rate": 7.54070738525448e-05, | |
| "loss": 0.7866, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.1952744961779014, | |
| "grad_norm": 1.1151942570209228, | |
| "learning_rate": 7.533417629318367e-05, | |
| "loss": 0.7807, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.2008339124391938, | |
| "grad_norm": 1.2041485011685347, | |
| "learning_rate": 7.526074058498687e-05, | |
| "loss": 0.7905, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.2063933287004864, | |
| "grad_norm": 0.9555182120392091, | |
| "learning_rate": 7.518676784639962e-05, | |
| "loss": 0.7931, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.211952744961779, | |
| "grad_norm": 1.0723231936146647, | |
| "learning_rate": 7.511225920404626e-05, | |
| "loss": 0.7937, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.2175121612230715, | |
| "grad_norm": 1.026526722316571, | |
| "learning_rate": 7.503721579271308e-05, | |
| "loss": 0.7878, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.2230715774843641, | |
| "grad_norm": 0.9506008956242211, | |
| "learning_rate": 7.496163875533105e-05, | |
| "loss": 0.7697, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2286309937456568, | |
| "grad_norm": 1.131493716226611, | |
| "learning_rate": 7.488552924295843e-05, | |
| "loss": 0.7785, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.2341904100069492, | |
| "grad_norm": 0.7811448856254682, | |
| "learning_rate": 7.480888841476319e-05, | |
| "loss": 0.7744, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.2397498262682418, | |
| "grad_norm": 0.5750988813544421, | |
| "learning_rate": 7.473171743800535e-05, | |
| "loss": 0.7697, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.2453092425295345, | |
| "grad_norm": 0.6043177617706299, | |
| "learning_rate": 7.46540174880193e-05, | |
| "loss": 0.7774, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2508686587908269, | |
| "grad_norm": 0.6185743964153164, | |
| "learning_rate": 7.457578974819578e-05, | |
| "loss": 0.7745, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.2564280750521195, | |
| "grad_norm": 0.5315823047799536, | |
| "learning_rate": 7.449703540996393e-05, | |
| "loss": 0.7818, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2619874913134121, | |
| "grad_norm": 0.4889499415667462, | |
| "learning_rate": 7.44177556727731e-05, | |
| "loss": 0.7811, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2675469075747046, | |
| "grad_norm": 0.6576085919723266, | |
| "learning_rate": 7.433795174407465e-05, | |
| "loss": 0.7887, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2731063238359972, | |
| "grad_norm": 0.7111593144278238, | |
| "learning_rate": 7.425762483930347e-05, | |
| "loss": 0.7748, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2786657400972898, | |
| "grad_norm": 0.6484637513039345, | |
| "learning_rate": 7.417677618185955e-05, | |
| "loss": 0.776, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2842251563585823, | |
| "grad_norm": 0.6761213806310967, | |
| "learning_rate": 7.409540700308927e-05, | |
| "loss": 0.7906, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.289784572619875, | |
| "grad_norm": 0.72586401747787, | |
| "learning_rate": 7.401351854226673e-05, | |
| "loss": 0.7863, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2953439888811675, | |
| "grad_norm": 0.8761480055698481, | |
| "learning_rate": 7.393111204657478e-05, | |
| "loss": 0.7808, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.30090340514246, | |
| "grad_norm": 1.155943642457972, | |
| "learning_rate": 7.384818877108617e-05, | |
| "loss": 0.7679, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.3064628214037526, | |
| "grad_norm": 0.8100094228240489, | |
| "learning_rate": 7.376474997874422e-05, | |
| "loss": 0.7976, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.3120222376650452, | |
| "grad_norm": 0.5445781974209539, | |
| "learning_rate": 7.368079694034381e-05, | |
| "loss": 0.7746, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.3175816539263376, | |
| "grad_norm": 0.6679063542986605, | |
| "learning_rate": 7.359633093451187e-05, | |
| "loss": 0.7883, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.3231410701876303, | |
| "grad_norm": 0.958523219858762, | |
| "learning_rate": 7.351135324768797e-05, | |
| "loss": 0.7757, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.328700486448923, | |
| "grad_norm": 1.1832452619017166, | |
| "learning_rate": 7.342586517410473e-05, | |
| "loss": 0.7866, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.3342599027102153, | |
| "grad_norm": 0.6620192952316521, | |
| "learning_rate": 7.333986801576808e-05, | |
| "loss": 0.7787, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.339819318971508, | |
| "grad_norm": 0.5706874893778034, | |
| "learning_rate": 7.325336308243747e-05, | |
| "loss": 0.7753, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.3453787352328006, | |
| "grad_norm": 0.7293610495624486, | |
| "learning_rate": 7.316635169160585e-05, | |
| "loss": 0.779, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.350938151494093, | |
| "grad_norm": 0.9230133745177531, | |
| "learning_rate": 7.307883516847968e-05, | |
| "loss": 0.7855, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.3564975677553857, | |
| "grad_norm": 0.8597946100934724, | |
| "learning_rate": 7.299081484595874e-05, | |
| "loss": 0.7712, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3620569840166783, | |
| "grad_norm": 0.7770773303527336, | |
| "learning_rate": 7.290229206461578e-05, | |
| "loss": 0.7765, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.3676164002779707, | |
| "grad_norm": 1.018019133526188, | |
| "learning_rate": 7.281326817267612e-05, | |
| "loss": 0.7827, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.3731758165392633, | |
| "grad_norm": 1.2608059715209885, | |
| "learning_rate": 7.272374452599717e-05, | |
| "loss": 0.7801, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.378735232800556, | |
| "grad_norm": 0.5427742215121941, | |
| "learning_rate": 7.263372248804768e-05, | |
| "loss": 0.769, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3842946490618484, | |
| "grad_norm": 0.9707313981078285, | |
| "learning_rate": 7.254320342988707e-05, | |
| "loss": 0.7879, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.389854065323141, | |
| "grad_norm": 1.3746763366179666, | |
| "learning_rate": 7.245218873014451e-05, | |
| "loss": 0.8022, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3954134815844337, | |
| "grad_norm": 0.8168605853299865, | |
| "learning_rate": 7.236067977499791e-05, | |
| "loss": 0.7879, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.400972897845726, | |
| "grad_norm": 1.5862041867625931, | |
| "learning_rate": 7.22686779581528e-05, | |
| "loss": 0.787, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.4065323141070187, | |
| "grad_norm": 0.6237034602452829, | |
| "learning_rate": 7.217618468082118e-05, | |
| "loss": 0.7659, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.4120917303683114, | |
| "grad_norm": 1.248354943849073, | |
| "learning_rate": 7.208320135170004e-05, | |
| "loss": 0.7944, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.4176511466296038, | |
| "grad_norm": 0.5870554519430948, | |
| "learning_rate": 7.19897293869501e-05, | |
| "loss": 0.7942, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.4232105628908964, | |
| "grad_norm": 1.2147306579223598, | |
| "learning_rate": 7.189577021017402e-05, | |
| "loss": 0.7768, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.428769979152189, | |
| "grad_norm": 0.5689684573208176, | |
| "learning_rate": 7.180132525239488e-05, | |
| "loss": 0.7777, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.4343293954134815, | |
| "grad_norm": 0.891498824230005, | |
| "learning_rate": 7.170639595203434e-05, | |
| "loss": 0.7697, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.4398888116747741, | |
| "grad_norm": 0.5981417552374583, | |
| "learning_rate": 7.16109837548907e-05, | |
| "loss": 0.7845, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.4454482279360668, | |
| "grad_norm": 0.6749443606812077, | |
| "learning_rate": 7.151509011411693e-05, | |
| "loss": 0.7751, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4510076441973592, | |
| "grad_norm": 0.54518347236071, | |
| "learning_rate": 7.14187164901985e-05, | |
| "loss": 0.7787, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.4565670604586518, | |
| "grad_norm": 0.5880443192755512, | |
| "learning_rate": 7.132186435093115e-05, | |
| "loss": 0.7901, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.4621264767199444, | |
| "grad_norm": 0.5541773489903166, | |
| "learning_rate": 7.122453517139854e-05, | |
| "loss": 0.7697, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.4676858929812369, | |
| "grad_norm": 0.6059693677431078, | |
| "learning_rate": 7.112673043394976e-05, | |
| "loss": 0.7935, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.4732453092425295, | |
| "grad_norm": 0.4757882619272586, | |
| "learning_rate": 7.10284516281768e-05, | |
| "loss": 0.7735, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4788047255038221, | |
| "grad_norm": 0.611919607902747, | |
| "learning_rate": 7.092970025089183e-05, | |
| "loss": 0.7914, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4843641417651146, | |
| "grad_norm": 0.542145528258939, | |
| "learning_rate": 7.08304778061044e-05, | |
| "loss": 0.7774, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.4899235580264072, | |
| "grad_norm": 0.5325510989772689, | |
| "learning_rate": 7.073078580499854e-05, | |
| "loss": 0.7761, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4954829742876998, | |
| "grad_norm": 0.5986995332394465, | |
| "learning_rate": 7.063062576590975e-05, | |
| "loss": 0.7838, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.5010423905489922, | |
| "grad_norm": 0.4616995964663547, | |
| "learning_rate": 7.052999921430189e-05, | |
| "loss": 0.7755, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.5066018068102849, | |
| "grad_norm": 0.49058298770662495, | |
| "learning_rate": 7.04289076827439e-05, | |
| "loss": 0.7861, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.5121612230715775, | |
| "grad_norm": 0.44812644938940693, | |
| "learning_rate": 7.032735271088652e-05, | |
| "loss": 0.7564, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.51772063933287, | |
| "grad_norm": 0.475517524463409, | |
| "learning_rate": 7.022533584543877e-05, | |
| "loss": 0.7649, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.5232800555941626, | |
| "grad_norm": 0.3956398801829029, | |
| "learning_rate": 7.012285864014445e-05, | |
| "loss": 0.7748, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.5288394718554552, | |
| "grad_norm": 0.3897447425893131, | |
| "learning_rate": 7.001992265575846e-05, | |
| "loss": 0.7783, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.5343988881167476, | |
| "grad_norm": 0.4597977578717938, | |
| "learning_rate": 6.991652946002302e-05, | |
| "loss": 0.7635, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.5399583043780403, | |
| "grad_norm": 0.4666034369576422, | |
| "learning_rate": 6.981268062764383e-05, | |
| "loss": 0.7683, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.545517720639333, | |
| "grad_norm": 0.4753869261509179, | |
| "learning_rate": 6.9708377740266e-05, | |
| "loss": 0.7666, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.5510771369006253, | |
| "grad_norm": 0.43411039480677927, | |
| "learning_rate": 6.960362238645008e-05, | |
| "loss": 0.775, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.556636553161918, | |
| "grad_norm": 0.35532156974575624, | |
| "learning_rate": 6.949841616164774e-05, | |
| "loss": 0.7715, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.5621959694232106, | |
| "grad_norm": 0.36018397788729956, | |
| "learning_rate": 6.939276066817759e-05, | |
| "loss": 0.7695, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.567755385684503, | |
| "grad_norm": 0.4363262505492209, | |
| "learning_rate": 6.928665751520067e-05, | |
| "loss": 0.8013, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.5733148019457956, | |
| "grad_norm": 0.49701805746941385, | |
| "learning_rate": 6.918010831869604e-05, | |
| "loss": 0.7855, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.5788742182070883, | |
| "grad_norm": 0.5598941853744219, | |
| "learning_rate": 6.907311470143609e-05, | |
| "loss": 0.779, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5844336344683807, | |
| "grad_norm": 0.6100802112728656, | |
| "learning_rate": 6.896567829296185e-05, | |
| "loss": 0.7794, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5899930507296733, | |
| "grad_norm": 0.7281021082315408, | |
| "learning_rate": 6.885780072955824e-05, | |
| "loss": 0.7709, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.595552466990966, | |
| "grad_norm": 0.8642915362417339, | |
| "learning_rate": 6.874948365422899e-05, | |
| "loss": 0.7772, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.6011118832522584, | |
| "grad_norm": 1.0745648538812815, | |
| "learning_rate": 6.864072871667177e-05, | |
| "loss": 0.7766, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.606671299513551, | |
| "grad_norm": 1.0956998824768283, | |
| "learning_rate": 6.853153757325303e-05, | |
| "loss": 0.78, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.6122307157748437, | |
| "grad_norm": 1.2750864854683548, | |
| "learning_rate": 6.842191188698267e-05, | |
| "loss": 0.7857, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.617790132036136, | |
| "grad_norm": 0.8795380185955686, | |
| "learning_rate": 6.831185332748889e-05, | |
| "loss": 0.7788, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.6233495482974287, | |
| "grad_norm": 0.8009577214005955, | |
| "learning_rate": 6.820136357099256e-05, | |
| "loss": 0.7716, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.6289089645587214, | |
| "grad_norm": 0.7908950069367938, | |
| "learning_rate": 6.80904443002819e-05, | |
| "loss": 0.7834, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.6344683808200138, | |
| "grad_norm": 0.7087524595566469, | |
| "learning_rate": 6.797909720468665e-05, | |
| "loss": 0.7799, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.6400277970813064, | |
| "grad_norm": 0.8154001152670927, | |
| "learning_rate": 6.786732398005254e-05, | |
| "loss": 0.7729, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.645587213342599, | |
| "grad_norm": 1.0032770151573074, | |
| "learning_rate": 6.775512632871522e-05, | |
| "loss": 0.7743, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.6511466296038915, | |
| "grad_norm": 1.0526489795713596, | |
| "learning_rate": 6.76425059594746e-05, | |
| "loss": 0.7755, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.656706045865184, | |
| "grad_norm": 0.9590121679714712, | |
| "learning_rate": 6.752946458756862e-05, | |
| "loss": 0.7709, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.6622654621264767, | |
| "grad_norm": 0.8034166003807086, | |
| "learning_rate": 6.741600393464725e-05, | |
| "loss": 0.7584, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.6678248783877692, | |
| "grad_norm": 0.6265399191838994, | |
| "learning_rate": 6.730212572874618e-05, | |
| "loss": 0.7701, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6733842946490618, | |
| "grad_norm": 0.7017430730297519, | |
| "learning_rate": 6.718783170426055e-05, | |
| "loss": 0.7709, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.6789437109103544, | |
| "grad_norm": 0.7032001111839687, | |
| "learning_rate": 6.707312360191854e-05, | |
| "loss": 0.7714, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.6845031271716469, | |
| "grad_norm": 0.3993478869941896, | |
| "learning_rate": 6.695800316875484e-05, | |
| "loss": 0.7599, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.6900625434329395, | |
| "grad_norm": 0.5249562467408584, | |
| "learning_rate": 6.684247215808407e-05, | |
| "loss": 0.7692, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.6956219596942321, | |
| "grad_norm": 0.5899591681953836, | |
| "learning_rate": 6.6726532329474e-05, | |
| "loss": 0.7621, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.7011813759555245, | |
| "grad_norm": 0.5069877083978761, | |
| "learning_rate": 6.661018544871884e-05, | |
| "loss": 0.7845, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.7067407922168172, | |
| "grad_norm": 0.488248778365674, | |
| "learning_rate": 6.649343328781232e-05, | |
| "loss": 0.7832, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.7123002084781098, | |
| "grad_norm": 0.4690318525581349, | |
| "learning_rate": 6.637627762492067e-05, | |
| "loss": 0.7804, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.7178596247394022, | |
| "grad_norm": 0.4199909982678252, | |
| "learning_rate": 6.625872024435559e-05, | |
| "loss": 0.7816, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.7234190410006949, | |
| "grad_norm": 0.38057764950277645, | |
| "learning_rate": 6.6140762936547e-05, | |
| "loss": 0.7753, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.7289784572619875, | |
| "grad_norm": 0.3918174751770195, | |
| "learning_rate": 6.602240749801588e-05, | |
| "loss": 0.7688, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.73453787352328, | |
| "grad_norm": 0.3670670542000639, | |
| "learning_rate": 6.59036557313468e-05, | |
| "loss": 0.7671, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.7400972897845726, | |
| "grad_norm": 0.3846828078401982, | |
| "learning_rate": 6.578450944516051e-05, | |
| "loss": 0.7923, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.7456567060458652, | |
| "grad_norm": 0.39298089631112915, | |
| "learning_rate": 6.566497045408645e-05, | |
| "loss": 0.771, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.7512161223071576, | |
| "grad_norm": 0.370122117562583, | |
| "learning_rate": 6.554504057873498e-05, | |
| "loss": 0.7544, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.7567755385684505, | |
| "grad_norm": 0.3303038453558157, | |
| "learning_rate": 6.542472164566979e-05, | |
| "loss": 0.7655, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.762334954829743, | |
| "grad_norm": 0.5075770625427899, | |
| "learning_rate": 6.530401548738001e-05, | |
| "loss": 0.7576, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.7678943710910353, | |
| "grad_norm": 0.500351509483819, | |
| "learning_rate": 6.51829239422523e-05, | |
| "loss": 0.7668, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.7734537873523282, | |
| "grad_norm": 0.4365766487802718, | |
| "learning_rate": 6.506144885454285e-05, | |
| "loss": 0.7539, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.7790132036136206, | |
| "grad_norm": 0.45527067954463785, | |
| "learning_rate": 6.493959207434934e-05, | |
| "loss": 0.7666, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.784572619874913, | |
| "grad_norm": 0.4757862949658658, | |
| "learning_rate": 6.481735545758273e-05, | |
| "loss": 0.7609, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.7901320361362059, | |
| "grad_norm": 0.6179343909917517, | |
| "learning_rate": 6.469474086593894e-05, | |
| "loss": 0.7594, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7956914523974983, | |
| "grad_norm": 0.7997107662830784, | |
| "learning_rate": 6.45717501668706e-05, | |
| "loss": 0.7559, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.8012508686587907, | |
| "grad_norm": 1.0969367036016504, | |
| "learning_rate": 6.444838523355852e-05, | |
| "loss": 0.7796, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.8068102849200836, | |
| "grad_norm": 1.2089537958976195, | |
| "learning_rate": 6.432464794488323e-05, | |
| "loss": 0.7724, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.812369701181376, | |
| "grad_norm": 0.6596723262517451, | |
| "learning_rate": 6.42005401853963e-05, | |
| "loss": 0.7607, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.8179291174426684, | |
| "grad_norm": 0.4902729906062423, | |
| "learning_rate": 6.407606384529168e-05, | |
| "loss": 0.778, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.8234885337039612, | |
| "grad_norm": 0.46790228332539113, | |
| "learning_rate": 6.39512208203769e-05, | |
| "loss": 0.7701, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.8290479499652537, | |
| "grad_norm": 0.5469006439320506, | |
| "learning_rate": 6.382601301204421e-05, | |
| "loss": 0.7818, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.834607366226546, | |
| "grad_norm": 0.740833234029731, | |
| "learning_rate": 6.370044232724158e-05, | |
| "loss": 0.7603, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.840166782487839, | |
| "grad_norm": 0.9010614851907555, | |
| "learning_rate": 6.35745106784437e-05, | |
| "loss": 0.7731, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.8457261987491314, | |
| "grad_norm": 1.0238595680669471, | |
| "learning_rate": 6.344821998362285e-05, | |
| "loss": 0.7776, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.8512856150104238, | |
| "grad_norm": 1.0395463083151937, | |
| "learning_rate": 6.332157216621964e-05, | |
| "loss": 0.7729, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.8568450312717166, | |
| "grad_norm": 0.8370888480193454, | |
| "learning_rate": 6.31945691551138e-05, | |
| "loss": 0.7582, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.862404447533009, | |
| "grad_norm": 0.6973849424171054, | |
| "learning_rate": 6.30672128845947e-05, | |
| "loss": 0.7638, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.8679638637943015, | |
| "grad_norm": 0.5915083898425778, | |
| "learning_rate": 6.293950529433199e-05, | |
| "loss": 0.7754, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.8735232800555943, | |
| "grad_norm": 0.45025014596604773, | |
| "learning_rate": 6.281144832934596e-05, | |
| "loss": 0.7699, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.8790826963168867, | |
| "grad_norm": 0.38555129030834384, | |
| "learning_rate": 6.268304393997806e-05, | |
| "loss": 0.7759, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.8846421125781792, | |
| "grad_norm": 0.36084485280720024, | |
| "learning_rate": 6.2554294081861e-05, | |
| "loss": 0.784, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.890201528839472, | |
| "grad_norm": 0.405266578021245, | |
| "learning_rate": 6.242520071588915e-05, | |
| "loss": 0.7783, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8957609451007644, | |
| "grad_norm": 0.4124119655208444, | |
| "learning_rate": 6.229576580818852e-05, | |
| "loss": 0.7672, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.9013203613620568, | |
| "grad_norm": 0.4488084951300913, | |
| "learning_rate": 6.2165991330087e-05, | |
| "loss": 0.7822, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.9068797776233497, | |
| "grad_norm": 0.4082635480033681, | |
| "learning_rate": 6.20358792580841e-05, | |
| "loss": 0.7693, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.9124391938846421, | |
| "grad_norm": 0.377339639242065, | |
| "learning_rate": 6.190543157382107e-05, | |
| "loss": 0.7654, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.9179986101459345, | |
| "grad_norm": 0.35237882278693, | |
| "learning_rate": 6.177465026405058e-05, | |
| "loss": 0.7607, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.9235580264072274, | |
| "grad_norm": 0.3671603093472658, | |
| "learning_rate": 6.164353732060651e-05, | |
| "loss": 0.7676, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.9291174426685198, | |
| "grad_norm": 0.38659296565653467, | |
| "learning_rate": 6.151209474037363e-05, | |
| "loss": 0.7558, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.9346768589298122, | |
| "grad_norm": 0.359729700225076, | |
| "learning_rate": 6.13803245252571e-05, | |
| "loss": 0.7706, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.940236275191105, | |
| "grad_norm": 0.3272217394439331, | |
| "learning_rate": 6.124822868215213e-05, | |
| "loss": 0.7674, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.9457956914523975, | |
| "grad_norm": 0.3587548812477975, | |
| "learning_rate": 6.11158092229133e-05, | |
| "loss": 0.7566, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.95135510771369, | |
| "grad_norm": 0.38927389559214304, | |
| "learning_rate": 6.098306816432393e-05, | |
| "loss": 0.7608, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.9569145239749828, | |
| "grad_norm": 0.345702831255413, | |
| "learning_rate": 6.0850007528065385e-05, | |
| "loss": 0.7642, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.9624739402362752, | |
| "grad_norm": 0.3699780588350165, | |
| "learning_rate": 6.0716629340686314e-05, | |
| "loss": 0.7585, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.9680333564975676, | |
| "grad_norm": 0.4077210246910142, | |
| "learning_rate": 6.058293563357172e-05, | |
| "loss": 0.7634, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.9735927727588605, | |
| "grad_norm": 0.42750949194399185, | |
| "learning_rate": 6.0448928442912064e-05, | |
| "loss": 0.7644, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.9791521890201529, | |
| "grad_norm": 0.4974639937813857, | |
| "learning_rate": 6.031460980967225e-05, | |
| "loss": 0.7736, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.9847116052814453, | |
| "grad_norm": 0.4864835186867503, | |
| "learning_rate": 6.017998177956052e-05, | |
| "loss": 0.7617, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.9902710215427382, | |
| "grad_norm": 0.35299484136118686, | |
| "learning_rate": 6.004504640299736e-05, | |
| "loss": 0.7675, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.9958304378040306, | |
| "grad_norm": 0.31827026637009653, | |
| "learning_rate": 5.990980573508415e-05, | |
| "loss": 0.7631, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.001389854065323, | |
| "grad_norm": 0.4024790298162928, | |
| "learning_rate": 5.9774261835571996e-05, | |
| "loss": 0.7498, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.006949270326616, | |
| "grad_norm": 0.47173300208049507, | |
| "learning_rate": 5.9638416768830277e-05, | |
| "loss": 0.7376, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.0125086865879083, | |
| "grad_norm": 0.6895792658770121, | |
| "learning_rate": 5.950227260381522e-05, | |
| "loss": 0.7353, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.0180681028492007, | |
| "grad_norm": 1.0070048914467056, | |
| "learning_rate": 5.9365831414038424e-05, | |
| "loss": 0.7444, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.0236275191104935, | |
| "grad_norm": 1.3775654286348442, | |
| "learning_rate": 5.9229095277535236e-05, | |
| "loss": 0.7357, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.029186935371786, | |
| "grad_norm": 0.5355386773201476, | |
| "learning_rate": 5.909206627683313e-05, | |
| "loss": 0.7403, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.0347463516330784, | |
| "grad_norm": 0.46568020048542935, | |
| "learning_rate": 5.895474649891995e-05, | |
| "loss": 0.7293, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.0403057678943712, | |
| "grad_norm": 0.8082533261595521, | |
| "learning_rate": 5.88171380352122e-05, | |
| "loss": 0.7468, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.0458651841556637, | |
| "grad_norm": 1.0373988341475624, | |
| "learning_rate": 5.867924298152311e-05, | |
| "loss": 0.732, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.051424600416956, | |
| "grad_norm": 0.7732534471549348, | |
| "learning_rate": 5.854106343803075e-05, | |
| "loss": 0.7358, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.056984016678249, | |
| "grad_norm": 0.6069046552269562, | |
| "learning_rate": 5.840260150924609e-05, | |
| "loss": 0.7096, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.0625434329395413, | |
| "grad_norm": 0.8205673012712087, | |
| "learning_rate": 5.826385930398084e-05, | |
| "loss": 0.7427, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.0681028492008338, | |
| "grad_norm": 0.8486167959591705, | |
| "learning_rate": 5.812483893531543e-05, | |
| "loss": 0.7307, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.0736622654621266, | |
| "grad_norm": 0.662144513978678, | |
| "learning_rate": 5.7985542520566795e-05, | |
| "loss": 0.734, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.079221681723419, | |
| "grad_norm": 0.457883479207113, | |
| "learning_rate": 5.7845972181256104e-05, | |
| "loss": 0.7421, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.0847810979847115, | |
| "grad_norm": 0.517800202826518, | |
| "learning_rate": 5.770613004307648e-05, | |
| "loss": 0.7278, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.0903405142460043, | |
| "grad_norm": 0.8598983388632617, | |
| "learning_rate": 5.756601823586063e-05, | |
| "loss": 0.7522, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.0958999305072967, | |
| "grad_norm": 0.4689738299674123, | |
| "learning_rate": 5.742563889354837e-05, | |
| "loss": 0.7358, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.101459346768589, | |
| "grad_norm": 0.6323865613873795, | |
| "learning_rate": 5.728499415415415e-05, | |
| "loss": 0.7223, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.107018763029882, | |
| "grad_norm": 3.6525531842750674, | |
| "learning_rate": 5.71440861597345e-05, | |
| "loss": 0.7474, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.1125781792911744, | |
| "grad_norm": 115.32668021205329, | |
| "learning_rate": 5.700291705635538e-05, | |
| "loss": 1.2108, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.118137595552467, | |
| "grad_norm": 3.1583730002039574, | |
| "learning_rate": 5.68614889940595e-05, | |
| "loss": 0.7749, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.1236970118137597, | |
| "grad_norm": 0.590660191944181, | |
| "learning_rate": 5.671980412683363e-05, | |
| "loss": 0.7389, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.129256428075052, | |
| "grad_norm": 1.8224073499970155, | |
| "learning_rate": 5.657786461257567e-05, | |
| "loss": 0.7478, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.1348158443363445, | |
| "grad_norm": 1.2951121366611997, | |
| "learning_rate": 5.643567261306194e-05, | |
| "loss": 0.7464, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.1403752605976374, | |
| "grad_norm": 1.5832979985529985, | |
| "learning_rate": 5.629323029391411e-05, | |
| "loss": 0.7421, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.14593467685893, | |
| "grad_norm": 1.2316696310155895, | |
| "learning_rate": 5.6150539824566344e-05, | |
| "loss": 0.7482, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.151494093120222, | |
| "grad_norm": 1.1353584045603453, | |
| "learning_rate": 5.600760337823215e-05, | |
| "loss": 0.7433, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.157053509381515, | |
| "grad_norm": 0.8270888985327509, | |
| "learning_rate": 5.586442313187137e-05, | |
| "loss": 0.7518, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.1626129256428075, | |
| "grad_norm": 0.9863689806177816, | |
| "learning_rate": 5.572100126615695e-05, | |
| "loss": 0.7464, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.1681723419041, | |
| "grad_norm": 0.4731001360415031, | |
| "learning_rate": 5.557733996544179e-05, | |
| "loss": 0.7346, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.1737317581653928, | |
| "grad_norm": 0.7801308944094809, | |
| "learning_rate": 5.543344141772542e-05, | |
| "loss": 0.7299, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.179291174426685, | |
| "grad_norm": 0.5816725512915829, | |
| "learning_rate": 5.528930781462074e-05, | |
| "loss": 0.7494, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.1848505906879776, | |
| "grad_norm": 1.0772848290573547, | |
| "learning_rate": 5.514494135132058e-05, | |
| "loss": 0.7408, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.1904100069492705, | |
| "grad_norm": 11.53337748265314, | |
| "learning_rate": 5.500034422656429e-05, | |
| "loss": 0.734, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.195969423210563, | |
| "grad_norm": 4.309994215264064, | |
| "learning_rate": 5.4855518642604276e-05, | |
| "loss": 0.7513, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.2015288394718553, | |
| "grad_norm": 1.3250161354260865, | |
| "learning_rate": 5.4710466805172414e-05, | |
| "loss": 0.7502, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.207088255733148, | |
| "grad_norm": 1.3777075745252818, | |
| "learning_rate": 5.456519092344652e-05, | |
| "loss": 0.7562, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.2126476719944406, | |
| "grad_norm": 1.0819205433328192, | |
| "learning_rate": 5.4419693210016586e-05, | |
| "loss": 0.7501, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.218207088255733, | |
| "grad_norm": 1.2833623880524272, | |
| "learning_rate": 5.427397588085127e-05, | |
| "loss": 0.7361, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.223766504517026, | |
| "grad_norm": 1.0451018448350549, | |
| "learning_rate": 5.412804115526392e-05, | |
| "loss": 0.7461, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.2293259207783183, | |
| "grad_norm": 0.8816531121474304, | |
| "learning_rate": 5.3981891255878985e-05, | |
| "loss": 0.7425, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.2348853370396107, | |
| "grad_norm": 0.7542953513475557, | |
| "learning_rate": 5.383552840859799e-05, | |
| "loss": 0.742, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.2404447533009035, | |
| "grad_norm": 0.6832304869722696, | |
| "learning_rate": 5.368895484256578e-05, | |
| "loss": 0.7399, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.246004169562196, | |
| "grad_norm": 0.7498644535493143, | |
| "learning_rate": 5.3542172790136464e-05, | |
| "loss": 0.7417, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.2515635858234884, | |
| "grad_norm": 0.5620794275795693, | |
| "learning_rate": 5.339518448683945e-05, | |
| "loss": 0.7437, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.2571230020847812, | |
| "grad_norm": 0.7280261530015376, | |
| "learning_rate": 5.324799217134542e-05, | |
| "loss": 0.7447, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.2626824183460736, | |
| "grad_norm": 0.5794346609888263, | |
| "learning_rate": 5.310059808543221e-05, | |
| "loss": 0.7428, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.268241834607366, | |
| "grad_norm": 0.594584254894041, | |
| "learning_rate": 5.2953004473950676e-05, | |
| "loss": 0.7456, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.273801250868659, | |
| "grad_norm": 0.4540497546609938, | |
| "learning_rate": 5.2805213584790486e-05, | |
| "loss": 0.7349, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.2793606671299513, | |
| "grad_norm": 0.48721903761938296, | |
| "learning_rate": 5.26572276688459e-05, | |
| "loss": 0.7473, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.2849200833912438, | |
| "grad_norm": 0.3735664300049461, | |
| "learning_rate": 5.250904897998153e-05, | |
| "loss": 0.7379, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.2904794996525366, | |
| "grad_norm": 0.42570356028831763, | |
| "learning_rate": 5.23606797749979e-05, | |
| "loss": 0.7423, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.296038915913829, | |
| "grad_norm": 0.3608144662151246, | |
| "learning_rate": 5.2212122313597206e-05, | |
| "loss": 0.725, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.3015983321751214, | |
| "grad_norm": 0.3972157863450482, | |
| "learning_rate": 5.206337885834881e-05, | |
| "loss": 0.7435, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.3071577484364143, | |
| "grad_norm": 0.37044100478568076, | |
| "learning_rate": 5.191445167465481e-05, | |
| "loss": 0.7305, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.3127171646977067, | |
| "grad_norm": 0.40372283544972726, | |
| "learning_rate": 5.176534303071553e-05, | |
| "loss": 0.7341, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.318276580958999, | |
| "grad_norm": 0.35291131427848343, | |
| "learning_rate": 5.161605519749502e-05, | |
| "loss": 0.7342, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.323835997220292, | |
| "grad_norm": 0.4106862294883081, | |
| "learning_rate": 5.1466590448686375e-05, | |
| "loss": 0.7441, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.3293954134815844, | |
| "grad_norm": 0.3315316376787157, | |
| "learning_rate": 5.13169510606772e-05, | |
| "loss": 0.7277, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.334954829742877, | |
| "grad_norm": 0.35212971202718274, | |
| "learning_rate": 5.116713931251491e-05, | |
| "loss": 0.7214, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.3405142460041697, | |
| "grad_norm": 0.3109496189357139, | |
| "learning_rate": 5.101715748587195e-05, | |
| "loss": 0.739, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.346073662265462, | |
| "grad_norm": 0.34317285629474853, | |
| "learning_rate": 5.0867007865011186e-05, | |
| "loss": 0.7362, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.3516330785267545, | |
| "grad_norm": 0.39953180128623716, | |
| "learning_rate": 5.071669273675095e-05, | |
| "loss": 0.7444, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.3571924947880474, | |
| "grad_norm": 0.29225775466241055, | |
| "learning_rate": 5.0566214390430386e-05, | |
| "loss": 0.7252, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.36275191104934, | |
| "grad_norm": 0.34147649076740483, | |
| "learning_rate": 5.041557511787442e-05, | |
| "loss": 0.7265, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.368311327310632, | |
| "grad_norm": 0.28209832198556345, | |
| "learning_rate": 5.026477721335895e-05, | |
| "loss": 0.7333, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.373870743571925, | |
| "grad_norm": 0.27112430335660087, | |
| "learning_rate": 5.011382297357589e-05, | |
| "loss": 0.7326, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.3794301598332175, | |
| "grad_norm": 0.2958995444714563, | |
| "learning_rate": 4.9962714697598175e-05, | |
| "loss": 0.7418, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.38498957609451, | |
| "grad_norm": 0.254705600992757, | |
| "learning_rate": 4.9811454686844756e-05, | |
| "loss": 0.7539, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.3905489923558028, | |
| "grad_norm": 0.2704842883747253, | |
| "learning_rate": 4.966004524504552e-05, | |
| "loss": 0.7384, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.396108408617095, | |
| "grad_norm": 0.27016709718038645, | |
| "learning_rate": 4.950848867820628e-05, | |
| "loss": 0.727, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.4016678248783876, | |
| "grad_norm": 0.3267030249113294, | |
| "learning_rate": 4.935678729457355e-05, | |
| "loss": 0.7288, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.4072272411396805, | |
| "grad_norm": 0.31940168140369773, | |
| "learning_rate": 4.920494340459947e-05, | |
| "loss": 0.7351, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.412786657400973, | |
| "grad_norm": 0.29347717180560046, | |
| "learning_rate": 4.9052959320906564e-05, | |
| "loss": 0.7269, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.4183460736622653, | |
| "grad_norm": 0.26336896811236926, | |
| "learning_rate": 4.890083735825258e-05, | |
| "loss": 0.7397, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.423905489923558, | |
| "grad_norm": 0.33942479552349747, | |
| "learning_rate": 4.874857983349517e-05, | |
| "loss": 0.7305, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.4294649061848506, | |
| "grad_norm": 0.3056551202672292, | |
| "learning_rate": 4.859618906555667e-05, | |
| "loss": 0.7445, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.435024322446143, | |
| "grad_norm": 0.255653122241217, | |
| "learning_rate": 4.8443667375388686e-05, | |
| "loss": 0.7435, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.440583738707436, | |
| "grad_norm": 0.27740072317554226, | |
| "learning_rate": 4.8291017085936834e-05, | |
| "loss": 0.7435, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.4461431549687283, | |
| "grad_norm": 0.22741019497934506, | |
| "learning_rate": 4.8138240522105365e-05, | |
| "loss": 0.7375, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.4517025712300207, | |
| "grad_norm": 0.26409421864038285, | |
| "learning_rate": 4.7985340010721654e-05, | |
| "loss": 0.737, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.4572619874913135, | |
| "grad_norm": 0.26315157997100586, | |
| "learning_rate": 4.783231788050089e-05, | |
| "loss": 0.7373, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.462821403752606, | |
| "grad_norm": 0.254673897089132, | |
| "learning_rate": 4.767917646201051e-05, | |
| "loss": 0.7241, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.4683808200138984, | |
| "grad_norm": 0.312897463352474, | |
| "learning_rate": 4.7525918087634775e-05, | |
| "loss": 0.7345, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.473940236275191, | |
| "grad_norm": 0.28397633530974886, | |
| "learning_rate": 4.737254509153918e-05, | |
| "loss": 0.7244, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.4794996525364836, | |
| "grad_norm": 0.413384182880905, | |
| "learning_rate": 4.721905980963496e-05, | |
| "loss": 0.738, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.485059068797776, | |
| "grad_norm": 0.5086958530399852, | |
| "learning_rate": 4.706546457954351e-05, | |
| "loss": 0.7421, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.490618485059069, | |
| "grad_norm": 0.7824855922432978, | |
| "learning_rate": 4.691176174056071e-05, | |
| "loss": 0.737, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.4961779013203613, | |
| "grad_norm": 0.4381170078896152, | |
| "learning_rate": 4.675795363362142e-05, | |
| "loss": 0.7312, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.5017373175816537, | |
| "grad_norm": 0.3248189318651948, | |
| "learning_rate": 4.660404260126369e-05, | |
| "loss": 0.7362, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.5072967338429466, | |
| "grad_norm": 0.22687788903538336, | |
| "learning_rate": 4.6450030987593226e-05, | |
| "loss": 0.7466, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.512856150104239, | |
| "grad_norm": 0.2484699828082822, | |
| "learning_rate": 4.629592113824754e-05, | |
| "loss": 0.7261, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.5184155663655314, | |
| "grad_norm": 0.26873441485423244, | |
| "learning_rate": 4.614171540036037e-05, | |
| "loss": 0.7274, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.5239749826268243, | |
| "grad_norm": 0.25584966455636315, | |
| "learning_rate": 4.598741612252577e-05, | |
| "loss": 0.7482, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.5295343988881167, | |
| "grad_norm": 0.25300161712467034, | |
| "learning_rate": 4.5833025654762535e-05, | |
| "loss": 0.7307, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.535093815149409, | |
| "grad_norm": 0.2556990789787588, | |
| "learning_rate": 4.567854634847825e-05, | |
| "loss": 0.7397, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.540653231410702, | |
| "grad_norm": 0.25012778975423555, | |
| "learning_rate": 4.552398055643353e-05, | |
| "loss": 0.746, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.5462126476719944, | |
| "grad_norm": 0.27118048168405157, | |
| "learning_rate": 4.5369330632706223e-05, | |
| "loss": 0.7419, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.551772063933287, | |
| "grad_norm": 0.28964960768196785, | |
| "learning_rate": 4.521459893265548e-05, | |
| "loss": 0.7346, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.5573314801945797, | |
| "grad_norm": 0.27364215502822814, | |
| "learning_rate": 4.505978781288598e-05, | |
| "loss": 0.7312, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.562890896455872, | |
| "grad_norm": 0.21770274914348633, | |
| "learning_rate": 4.490489963121194e-05, | |
| "loss": 0.7257, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.5684503127171645, | |
| "grad_norm": 0.25218759647538125, | |
| "learning_rate": 4.474993674662128e-05, | |
| "loss": 0.741, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.5740097289784574, | |
| "grad_norm": 0.2419318546982388, | |
| "learning_rate": 4.4594901519239645e-05, | |
| "loss": 0.7474, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.57956914523975, | |
| "grad_norm": 0.32215620862830957, | |
| "learning_rate": 4.443979631029449e-05, | |
| "loss": 0.723, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.585128561501042, | |
| "grad_norm": 0.29857838205094217, | |
| "learning_rate": 4.428462348207911e-05, | |
| "loss": 0.7309, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.590687977762335, | |
| "grad_norm": 0.2209060117495464, | |
| "learning_rate": 4.412938539791665e-05, | |
| "loss": 0.7359, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.5962473940236275, | |
| "grad_norm": 0.22472545767913193, | |
| "learning_rate": 4.3974084422124134e-05, | |
| "loss": 0.7324, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.60180681028492, | |
| "grad_norm": 0.1946564937314796, | |
| "learning_rate": 4.381872291997641e-05, | |
| "loss": 0.7362, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.6073662265462128, | |
| "grad_norm": 0.2359140723071903, | |
| "learning_rate": 4.366330325767022e-05, | |
| "loss": 0.7286, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.612925642807505, | |
| "grad_norm": 0.2129248467763739, | |
| "learning_rate": 4.350782780228802e-05, | |
| "loss": 0.7302, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.6184850590687976, | |
| "grad_norm": 0.24861193172162277, | |
| "learning_rate": 4.33522989217621e-05, | |
| "loss": 0.7203, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.6240444753300904, | |
| "grad_norm": 0.22260636691489233, | |
| "learning_rate": 4.3196718984838345e-05, | |
| "loss": 0.7328, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.629603891591383, | |
| "grad_norm": 0.21657344137925627, | |
| "learning_rate": 4.304109036104029e-05, | |
| "loss": 0.7369, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.6351633078526753, | |
| "grad_norm": 0.1829539329834595, | |
| "learning_rate": 4.288541542063297e-05, | |
| "loss": 0.7195, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.640722724113968, | |
| "grad_norm": 0.19991359500635122, | |
| "learning_rate": 4.272969653458685e-05, | |
| "loss": 0.7289, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.6462821403752605, | |
| "grad_norm": 0.19984516052864532, | |
| "learning_rate": 4.257393607454167e-05, | |
| "loss": 0.7266, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.651841556636553, | |
| "grad_norm": 0.18507080512216367, | |
| "learning_rate": 4.241813641277036e-05, | |
| "loss": 0.7402, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.657400972897846, | |
| "grad_norm": 0.22016264298352997, | |
| "learning_rate": 4.226229992214293e-05, | |
| "loss": 0.7302, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.6629603891591382, | |
| "grad_norm": 0.23800061174952958, | |
| "learning_rate": 4.2106428976090286e-05, | |
| "loss": 0.7336, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.6685198054204307, | |
| "grad_norm": 0.1989157186858635, | |
| "learning_rate": 4.1950525948568073e-05, | |
| "loss": 0.7338, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.6740792216817235, | |
| "grad_norm": 0.283763583201288, | |
| "learning_rate": 4.17945932140206e-05, | |
| "loss": 0.7266, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.679638637943016, | |
| "grad_norm": 0.321917277351061, | |
| "learning_rate": 4.1638633147344575e-05, | |
| "loss": 0.7288, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.6851980542043083, | |
| "grad_norm": 0.23459593411861587, | |
| "learning_rate": 4.1482648123853e-05, | |
| "loss": 0.727, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.690757470465601, | |
| "grad_norm": 0.24861513819835215, | |
| "learning_rate": 4.132664051923897e-05, | |
| "loss": 0.7392, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.6963168867268936, | |
| "grad_norm": 0.30446525462850177, | |
| "learning_rate": 4.117061270953951e-05, | |
| "loss": 0.7516, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.701876302988186, | |
| "grad_norm": 0.3155678535641023, | |
| "learning_rate": 4.101456707109935e-05, | |
| "loss": 0.7215, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.707435719249479, | |
| "grad_norm": 0.2323552605079562, | |
| "learning_rate": 4.085850598053479e-05, | |
| "loss": 0.7336, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.7129951355107713, | |
| "grad_norm": 0.2044343122866403, | |
| "learning_rate": 4.0702431814697434e-05, | |
| "loss": 0.7283, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.7185545517720637, | |
| "grad_norm": 0.28637190695954, | |
| "learning_rate": 4.054634695063804e-05, | |
| "loss": 0.7456, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.7241139680333566, | |
| "grad_norm": 0.3267253563070388, | |
| "learning_rate": 4.039025376557033e-05, | |
| "loss": 0.7441, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.729673384294649, | |
| "grad_norm": 0.3347609079486309, | |
| "learning_rate": 4.023415463683472e-05, | |
| "loss": 0.7504, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.7352328005559414, | |
| "grad_norm": 0.2919900086611737, | |
| "learning_rate": 4.007805194186213e-05, | |
| "loss": 0.7401, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.7407922168172343, | |
| "grad_norm": 0.20720695930174413, | |
| "learning_rate": 3.9921948058137874e-05, | |
| "loss": 0.7201, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.7463516330785267, | |
| "grad_norm": 0.3269135713536802, | |
| "learning_rate": 3.97658453631653e-05, | |
| "loss": 0.7424, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.751911049339819, | |
| "grad_norm": 0.4091768369958175, | |
| "learning_rate": 3.960974623442968e-05, | |
| "loss": 0.7323, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.757470465601112, | |
| "grad_norm": 0.36444186506436177, | |
| "learning_rate": 3.9453653049361966e-05, | |
| "loss": 0.7307, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.7630298818624044, | |
| "grad_norm": 0.2700660410690018, | |
| "learning_rate": 3.929756818530258e-05, | |
| "loss": 0.7459, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.768589298123697, | |
| "grad_norm": 0.1864682458158403, | |
| "learning_rate": 3.914149401946522e-05, | |
| "loss": 0.7331, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.7741487143849897, | |
| "grad_norm": 0.23385225361417114, | |
| "learning_rate": 3.898543292890065e-05, | |
| "loss": 0.7359, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.779708130646282, | |
| "grad_norm": 0.3193885099831001, | |
| "learning_rate": 3.882938729046051e-05, | |
| "loss": 0.7416, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.7852675469075745, | |
| "grad_norm": 0.3468433720321668, | |
| "learning_rate": 3.867335948076104e-05, | |
| "loss": 0.7203, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.7908269631688674, | |
| "grad_norm": 0.2336768102315487, | |
| "learning_rate": 3.851735187614701e-05, | |
| "loss": 0.7383, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.7963863794301598, | |
| "grad_norm": 0.20477576787186483, | |
| "learning_rate": 3.836136685265543e-05, | |
| "loss": 0.7375, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.801945795691452, | |
| "grad_norm": 0.3370531882508558, | |
| "learning_rate": 3.820540678597942e-05, | |
| "loss": 0.7329, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.807505211952745, | |
| "grad_norm": 0.3168959312457467, | |
| "learning_rate": 3.804947405143193e-05, | |
| "loss": 0.7278, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.8130646282140375, | |
| "grad_norm": 0.213213613251235, | |
| "learning_rate": 3.789357102390973e-05, | |
| "loss": 0.7294, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.81862404447533, | |
| "grad_norm": 0.19536547709601546, | |
| "learning_rate": 3.7737700077857076e-05, | |
| "loss": 0.7314, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.8241834607366227, | |
| "grad_norm": 0.22171103501129447, | |
| "learning_rate": 3.7581863587229645e-05, | |
| "loss": 0.7357, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.829742876997915, | |
| "grad_norm": 0.27575352788878604, | |
| "learning_rate": 3.7426063925458355e-05, | |
| "loss": 0.727, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.8353022932592076, | |
| "grad_norm": 0.3155479135697007, | |
| "learning_rate": 3.727030346541317e-05, | |
| "loss": 0.7349, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.8408617095205004, | |
| "grad_norm": 0.18619005189562574, | |
| "learning_rate": 3.7114584579367034e-05, | |
| "loss": 0.7285, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.846421125781793, | |
| "grad_norm": 0.296120411593163, | |
| "learning_rate": 3.695890963895972e-05, | |
| "loss": 0.7233, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.8519805420430853, | |
| "grad_norm": 0.26348492927406175, | |
| "learning_rate": 3.6803281015161675e-05, | |
| "loss": 0.7303, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.857539958304378, | |
| "grad_norm": 0.20058141840327803, | |
| "learning_rate": 3.664770107823792e-05, | |
| "loss": 0.7233, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.8630993745656705, | |
| "grad_norm": 0.254471235916167, | |
| "learning_rate": 3.6492172197711984e-05, | |
| "loss": 0.7265, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.868658790826963, | |
| "grad_norm": 0.18654197868120972, | |
| "learning_rate": 3.633669674232979e-05, | |
| "loss": 0.7341, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.874218207088256, | |
| "grad_norm": 0.20182500678634277, | |
| "learning_rate": 3.61812770800236e-05, | |
| "loss": 0.7339, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.8797776233495482, | |
| "grad_norm": 0.2344052637904768, | |
| "learning_rate": 3.602591557787589e-05, | |
| "loss": 0.7362, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.8853370396108406, | |
| "grad_norm": 0.2082291409375869, | |
| "learning_rate": 3.5870614602083365e-05, | |
| "loss": 0.729, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.8908964558721335, | |
| "grad_norm": 0.22360753610299702, | |
| "learning_rate": 3.571537651792091e-05, | |
| "loss": 0.7461, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.896455872133426, | |
| "grad_norm": 0.2603672706455522, | |
| "learning_rate": 3.556020368970552e-05, | |
| "loss": 0.7394, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.9020152883947183, | |
| "grad_norm": 0.22388119959520128, | |
| "learning_rate": 3.540509848076037e-05, | |
| "loss": 0.7246, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.907574704656011, | |
| "grad_norm": 0.27500339761238496, | |
| "learning_rate": 3.525006325337873e-05, | |
| "loss": 0.7371, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.9131341209173036, | |
| "grad_norm": 0.23292609157697053, | |
| "learning_rate": 3.509510036878807e-05, | |
| "loss": 0.7362, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.918693537178596, | |
| "grad_norm": 0.19290073858029927, | |
| "learning_rate": 3.4940212187114024e-05, | |
| "loss": 0.7229, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.924252953439889, | |
| "grad_norm": 0.24314351107870302, | |
| "learning_rate": 3.478540106734452e-05, | |
| "loss": 0.7367, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.9298123697011813, | |
| "grad_norm": 0.18761409664747392, | |
| "learning_rate": 3.4630669367293797e-05, | |
| "loss": 0.7331, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.9353717859624737, | |
| "grad_norm": 0.23665013272710855, | |
| "learning_rate": 3.4476019443566474e-05, | |
| "loss": 0.7478, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.9409312022237666, | |
| "grad_norm": 0.222031580657751, | |
| "learning_rate": 3.4321453651521756e-05, | |
| "loss": 0.7264, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.946490618485059, | |
| "grad_norm": 0.20604984922930578, | |
| "learning_rate": 3.4166974345237465e-05, | |
| "loss": 0.7203, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.9520500347463514, | |
| "grad_norm": 0.20100587036500991, | |
| "learning_rate": 3.401258387747425e-05, | |
| "loss": 0.7337, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.9576094510076443, | |
| "grad_norm": 0.1899794463734188, | |
| "learning_rate": 3.385828459963966e-05, | |
| "loss": 0.7441, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.9631688672689367, | |
| "grad_norm": 0.2206507855989311, | |
| "learning_rate": 3.3704078861752463e-05, | |
| "loss": 0.7268, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.968728283530229, | |
| "grad_norm": 0.1884387695061294, | |
| "learning_rate": 3.354996901240678e-05, | |
| "loss": 0.7289, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.974287699791522, | |
| "grad_norm": 0.20088164351300247, | |
| "learning_rate": 3.339595739873631e-05, | |
| "loss": 0.7301, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.9798471160528144, | |
| "grad_norm": 0.2190606208850353, | |
| "learning_rate": 3.32420463663786e-05, | |
| "loss": 0.7359, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.985406532314107, | |
| "grad_norm": 0.21071320916886715, | |
| "learning_rate": 3.3088238259439304e-05, | |
| "loss": 0.7287, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.9909659485753997, | |
| "grad_norm": 0.2503997265876079, | |
| "learning_rate": 3.2934535420456506e-05, | |
| "loss": 0.7202, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.996525364836692, | |
| "grad_norm": 0.22396626980732445, | |
| "learning_rate": 3.2780940190365043e-05, | |
| "loss": 0.7312, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.0020847810979845, | |
| "grad_norm": 0.2498542781596777, | |
| "learning_rate": 3.262745490846084e-05, | |
| "loss": 0.725, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.0076441973592773, | |
| "grad_norm": 0.2037678144034356, | |
| "learning_rate": 3.2474081912365245e-05, | |
| "loss": 0.6976, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.0132036136205698, | |
| "grad_norm": 0.27581558969736686, | |
| "learning_rate": 3.23208235379895e-05, | |
| "loss": 0.7119, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.018763029881862, | |
| "grad_norm": 0.30757106176107923, | |
| "learning_rate": 3.216768211949912e-05, | |
| "loss": 0.7182, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.024322446143155, | |
| "grad_norm": 0.3465027003205821, | |
| "learning_rate": 3.201465998927835e-05, | |
| "loss": 0.7012, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.0298818624044475, | |
| "grad_norm": 0.3205211872891199, | |
| "learning_rate": 3.1861759477894656e-05, | |
| "loss": 0.6971, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.03544127866574, | |
| "grad_norm": 0.26675757876064743, | |
| "learning_rate": 3.170898291406317e-05, | |
| "loss": 0.7005, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.0410006949270327, | |
| "grad_norm": 0.33157082769310375, | |
| "learning_rate": 3.155633262461133e-05, | |
| "loss": 0.692, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.046560111188325, | |
| "grad_norm": 0.3520237527349914, | |
| "learning_rate": 3.1403810934443346e-05, | |
| "loss": 0.6979, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.052119527449618, | |
| "grad_norm": 0.2806618105175904, | |
| "learning_rate": 3.125142016650482e-05, | |
| "loss": 0.6943, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.0576789437109104, | |
| "grad_norm": 0.26339431867257346, | |
| "learning_rate": 3.109916264174743e-05, | |
| "loss": 0.6992, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.063238359972203, | |
| "grad_norm": 0.29331652252245627, | |
| "learning_rate": 3.094704067909345e-05, | |
| "loss": 0.7074, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.0687977762334953, | |
| "grad_norm": 0.22842865907148296, | |
| "learning_rate": 3.079505659540055e-05, | |
| "loss": 0.695, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.074357192494788, | |
| "grad_norm": 0.22450151562077095, | |
| "learning_rate": 3.064321270542646e-05, | |
| "loss": 0.7091, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.0799166087560805, | |
| "grad_norm": 0.2901780200486473, | |
| "learning_rate": 3.0491511321793737e-05, | |
| "loss": 0.7176, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.085476025017373, | |
| "grad_norm": 0.2667950414899655, | |
| "learning_rate": 3.0339954754954488e-05, | |
| "loss": 0.7092, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.091035441278666, | |
| "grad_norm": 0.20249142476959187, | |
| "learning_rate": 3.0188545313155257e-05, | |
| "loss": 0.7044, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.0965948575399582, | |
| "grad_norm": 0.2417989895903253, | |
| "learning_rate": 3.0037285302401828e-05, | |
| "loss": 0.7107, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.102154273801251, | |
| "grad_norm": 0.3090304508627475, | |
| "learning_rate": 2.9886177026424107e-05, | |
| "loss": 0.7063, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.1077136900625435, | |
| "grad_norm": 0.23078910950196774, | |
| "learning_rate": 2.9735222786641067e-05, | |
| "loss": 0.6995, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.113273106323836, | |
| "grad_norm": 0.2160202375232252, | |
| "learning_rate": 2.9584424882125593e-05, | |
| "loss": 0.7149, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.1188325225851288, | |
| "grad_norm": 0.21966525904866915, | |
| "learning_rate": 2.943378560956962e-05, | |
| "loss": 0.7155, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.124391938846421, | |
| "grad_norm": 0.21942461047996667, | |
| "learning_rate": 2.9283307263249048e-05, | |
| "loss": 0.7024, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.1299513551077136, | |
| "grad_norm": 0.20165607592754078, | |
| "learning_rate": 2.913299213498884e-05, | |
| "loss": 0.6945, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.135510771369006, | |
| "grad_norm": 0.207087253518945, | |
| "learning_rate": 2.8982842514128058e-05, | |
| "loss": 0.7007, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.141070187630299, | |
| "grad_norm": 0.20062728334317925, | |
| "learning_rate": 2.8832860687485107e-05, | |
| "loss": 0.7113, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.1466296038915913, | |
| "grad_norm": 0.19818489838165365, | |
| "learning_rate": 2.86830489393228e-05, | |
| "loss": 0.679, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.1521890201528837, | |
| "grad_norm": 0.18399616142175082, | |
| "learning_rate": 2.8533409551313628e-05, | |
| "loss": 0.7127, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.1577484364141766, | |
| "grad_norm": 0.19589928841704413, | |
| "learning_rate": 2.8383944802505003e-05, | |
| "loss": 0.6942, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.163307852675469, | |
| "grad_norm": 0.18770585615805666, | |
| "learning_rate": 2.8234656969284483e-05, | |
| "loss": 0.6891, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.1688672689367614, | |
| "grad_norm": 0.23570832512589554, | |
| "learning_rate": 2.8085548325345208e-05, | |
| "loss": 0.6943, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.1744266851980543, | |
| "grad_norm": 0.1901096056694783, | |
| "learning_rate": 2.7936621141651197e-05, | |
| "loss": 0.7037, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.1799861014593467, | |
| "grad_norm": 0.21456549437287814, | |
| "learning_rate": 2.7787877686402807e-05, | |
| "loss": 0.6951, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.185545517720639, | |
| "grad_norm": 0.23553224751824287, | |
| "learning_rate": 2.7639320225002108e-05, | |
| "loss": 0.6918, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.191104933981932, | |
| "grad_norm": 0.18659378336889193, | |
| "learning_rate": 2.7490951020018476e-05, | |
| "loss": 0.6989, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.1966643502432244, | |
| "grad_norm": 0.21903803816553757, | |
| "learning_rate": 2.7342772331154102e-05, | |
| "loss": 0.6982, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.2022237665045172, | |
| "grad_norm": 0.2476302597618211, | |
| "learning_rate": 2.7194786415209528e-05, | |
| "loss": 0.6984, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.2077831827658096, | |
| "grad_norm": 0.17282018437992405, | |
| "learning_rate": 2.7046995526049334e-05, | |
| "loss": 0.7081, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.213342599027102, | |
| "grad_norm": 0.2142426744032126, | |
| "learning_rate": 2.68994019145678e-05, | |
| "loss": 0.7005, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.218902015288395, | |
| "grad_norm": 0.18982645909872903, | |
| "learning_rate": 2.675200782865459e-05, | |
| "loss": 0.7108, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.2244614315496873, | |
| "grad_norm": 0.1695279613152048, | |
| "learning_rate": 2.6604815513160556e-05, | |
| "loss": 0.704, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.2300208478109798, | |
| "grad_norm": 0.1868390211196699, | |
| "learning_rate": 2.645782720986356e-05, | |
| "loss": 0.712, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.2355802640722726, | |
| "grad_norm": 0.1800257784614353, | |
| "learning_rate": 2.6311045157434235e-05, | |
| "loss": 0.689, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.241139680333565, | |
| "grad_norm": 0.18122410935561592, | |
| "learning_rate": 2.6164471591402016e-05, | |
| "loss": 0.716, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.2466990965948574, | |
| "grad_norm": 0.17842363937321248, | |
| "learning_rate": 2.6018108744121032e-05, | |
| "loss": 0.7085, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.25225851285615, | |
| "grad_norm": 0.21715670011851243, | |
| "learning_rate": 2.5871958844736083e-05, | |
| "loss": 0.7079, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.2578179291174427, | |
| "grad_norm": 0.16191852665066875, | |
| "learning_rate": 2.5726024119148757e-05, | |
| "loss": 0.7156, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.263377345378735, | |
| "grad_norm": 0.18732524749733667, | |
| "learning_rate": 2.5580306789983418e-05, | |
| "loss": 0.7082, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.2689367616400276, | |
| "grad_norm": 0.19189813282737295, | |
| "learning_rate": 2.54348090765535e-05, | |
| "loss": 0.6969, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.2744961779013204, | |
| "grad_norm": 0.2040135809617877, | |
| "learning_rate": 2.5289533194827586e-05, | |
| "loss": 0.6864, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.280055594162613, | |
| "grad_norm": 0.20698619029904855, | |
| "learning_rate": 2.514448135739574e-05, | |
| "loss": 0.7149, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.2856150104239052, | |
| "grad_norm": 0.21355927166605512, | |
| "learning_rate": 2.4999655773435726e-05, | |
| "loss": 0.6965, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.291174426685198, | |
| "grad_norm": 0.19655275079030954, | |
| "learning_rate": 2.4855058648679434e-05, | |
| "loss": 0.709, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.2967338429464905, | |
| "grad_norm": 0.24361735832750547, | |
| "learning_rate": 2.4710692185379264e-05, | |
| "loss": 0.6981, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.302293259207783, | |
| "grad_norm": 0.2009267455727919, | |
| "learning_rate": 2.4566558582274583e-05, | |
| "loss": 0.7, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.307852675469076, | |
| "grad_norm": 0.25746317333166957, | |
| "learning_rate": 2.442266003455823e-05, | |
| "loss": 0.6986, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.313412091730368, | |
| "grad_norm": 0.26242513130569445, | |
| "learning_rate": 2.427899873384306e-05, | |
| "loss": 0.7138, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.3189715079916606, | |
| "grad_norm": 0.1882559013402892, | |
| "learning_rate": 2.4135576868128636e-05, | |
| "loss": 0.7013, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.3245309242529535, | |
| "grad_norm": 0.2889534969293074, | |
| "learning_rate": 2.399239662176785e-05, | |
| "loss": 0.7144, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.330090340514246, | |
| "grad_norm": 0.20123131689928303, | |
| "learning_rate": 2.3849460175433673e-05, | |
| "loss": 0.716, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.3356497567755383, | |
| "grad_norm": 0.23261788576448003, | |
| "learning_rate": 2.3706769706085896e-05, | |
| "loss": 0.6947, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.341209173036831, | |
| "grad_norm": 0.21185868547279016, | |
| "learning_rate": 2.3564327386938075e-05, | |
| "loss": 0.7044, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.3467685892981236, | |
| "grad_norm": 0.23101318712846342, | |
| "learning_rate": 2.3422135387424342e-05, | |
| "loss": 0.6948, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.352328005559416, | |
| "grad_norm": 0.17488832365954124, | |
| "learning_rate": 2.3280195873166384e-05, | |
| "loss": 0.7178, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.357887421820709, | |
| "grad_norm": 0.2277773250170869, | |
| "learning_rate": 2.3138511005940502e-05, | |
| "loss": 0.6903, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.3634468380820013, | |
| "grad_norm": 0.15592015395767056, | |
| "learning_rate": 2.2997082943644635e-05, | |
| "loss": 0.6961, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.3690062543432937, | |
| "grad_norm": 0.2772999957520557, | |
| "learning_rate": 2.2855913840265504e-05, | |
| "loss": 0.716, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.3745656706045866, | |
| "grad_norm": 0.1913976829716375, | |
| "learning_rate": 2.271500584584585e-05, | |
| "loss": 0.6921, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.380125086865879, | |
| "grad_norm": 0.17887054500690366, | |
| "learning_rate": 2.2574361106451637e-05, | |
| "loss": 0.6911, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.385684503127172, | |
| "grad_norm": 0.21750205862731467, | |
| "learning_rate": 2.2433981764139383e-05, | |
| "loss": 0.7096, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.3912439193884643, | |
| "grad_norm": 0.19593147325746155, | |
| "learning_rate": 2.2293869956923517e-05, | |
| "loss": 0.6982, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.3968033356497567, | |
| "grad_norm": 0.20784090087675822, | |
| "learning_rate": 2.215402781874391e-05, | |
| "loss": 0.701, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.4023627519110495, | |
| "grad_norm": 0.2094241373890822, | |
| "learning_rate": 2.2014457479433204e-05, | |
| "loss": 0.7006, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.407922168172342, | |
| "grad_norm": 0.19165733648750527, | |
| "learning_rate": 2.1875161064684586e-05, | |
| "loss": 0.7011, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.4134815844336344, | |
| "grad_norm": 0.28101320010779757, | |
| "learning_rate": 2.1736140696019166e-05, | |
| "loss": 0.6973, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.4190410006949272, | |
| "grad_norm": 0.18629632876510363, | |
| "learning_rate": 2.1597398490753917e-05, | |
| "loss": 0.7066, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.4246004169562196, | |
| "grad_norm": 0.22661440985457448, | |
| "learning_rate": 2.1458936561969254e-05, | |
| "loss": 0.7026, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.430159833217512, | |
| "grad_norm": 0.2865403117660965, | |
| "learning_rate": 2.132075701847691e-05, | |
| "loss": 0.708, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.435719249478805, | |
| "grad_norm": 0.15089884025551945, | |
| "learning_rate": 2.1182861964787803e-05, | |
| "loss": 0.7115, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.4412786657400973, | |
| "grad_norm": 0.2952596320891401, | |
| "learning_rate": 2.1045253501080058e-05, | |
| "loss": 0.6971, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.4468380820013897, | |
| "grad_norm": 0.2189167426361826, | |
| "learning_rate": 2.0907933723166888e-05, | |
| "loss": 0.7074, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.4523974982626826, | |
| "grad_norm": 0.21844536238960785, | |
| "learning_rate": 2.0770904722464764e-05, | |
| "loss": 0.6917, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.457956914523975, | |
| "grad_norm": 0.1843811278861263, | |
| "learning_rate": 2.06341685859616e-05, | |
| "loss": 0.7093, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.4635163307852674, | |
| "grad_norm": 0.2161993516067528, | |
| "learning_rate": 2.049772739618479e-05, | |
| "loss": 0.7032, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.4690757470465603, | |
| "grad_norm": 0.21374762977410636, | |
| "learning_rate": 2.0361583231169747e-05, | |
| "loss": 0.709, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.4746351633078527, | |
| "grad_norm": 0.2060717500194053, | |
| "learning_rate": 2.022573816442801e-05, | |
| "loss": 0.6936, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.480194579569145, | |
| "grad_norm": 0.19602587113180545, | |
| "learning_rate": 2.0090194264915873e-05, | |
| "loss": 0.7118, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.485753995830438, | |
| "grad_norm": 0.20086622178734398, | |
| "learning_rate": 1.9954953597002654e-05, | |
| "loss": 0.7105, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.4913134120917304, | |
| "grad_norm": 0.19511153222139876, | |
| "learning_rate": 1.9820018220439487e-05, | |
| "loss": 0.6957, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.496872828353023, | |
| "grad_norm": 0.19884256435263836, | |
| "learning_rate": 1.9685390190327757e-05, | |
| "loss": 0.7098, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.5024322446143152, | |
| "grad_norm": 0.15325133615321943, | |
| "learning_rate": 1.955107155708795e-05, | |
| "loss": 0.6971, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.507991660875608, | |
| "grad_norm": 0.16150214875067356, | |
| "learning_rate": 1.9417064366428295e-05, | |
| "loss": 0.7186, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.5135510771369005, | |
| "grad_norm": 0.1789338613581912, | |
| "learning_rate": 1.92833706593137e-05, | |
| "loss": 0.6964, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.519110493398193, | |
| "grad_norm": 0.20761650610037347, | |
| "learning_rate": 1.9149992471934614e-05, | |
| "loss": 0.6896, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.524669909659486, | |
| "grad_norm": 0.18407744122093334, | |
| "learning_rate": 1.9016931835676078e-05, | |
| "loss": 0.705, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.530229325920778, | |
| "grad_norm": 0.20855512031756548, | |
| "learning_rate": 1.8884190777086705e-05, | |
| "loss": 0.6974, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.5357887421820706, | |
| "grad_norm": 0.19336549621743102, | |
| "learning_rate": 1.8751771317847876e-05, | |
| "loss": 0.706, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.5413481584433635, | |
| "grad_norm": 0.21214610837166514, | |
| "learning_rate": 1.8619675474742904e-05, | |
| "loss": 0.7115, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.546907574704656, | |
| "grad_norm": 0.2022598865741006, | |
| "learning_rate": 1.848790525962639e-05, | |
| "loss": 0.7107, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.5524669909659483, | |
| "grad_norm": 0.21753789595081924, | |
| "learning_rate": 1.8356462679393488e-05, | |
| "loss": 0.7102, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.558026407227241, | |
| "grad_norm": 0.16764139285094648, | |
| "learning_rate": 1.8225349735949435e-05, | |
| "loss": 0.7051, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.5635858234885336, | |
| "grad_norm": 0.18750292991240983, | |
| "learning_rate": 1.809456842617893e-05, | |
| "loss": 0.7093, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.569145239749826, | |
| "grad_norm": 0.1684536912531375, | |
| "learning_rate": 1.7964120741915905e-05, | |
| "loss": 0.7072, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.574704656011119, | |
| "grad_norm": 0.15202938571970473, | |
| "learning_rate": 1.7834008669913016e-05, | |
| "loss": 0.7013, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.5802640722724113, | |
| "grad_norm": 0.15639709243101124, | |
| "learning_rate": 1.770423419181147e-05, | |
| "loss": 0.711, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.5858234885337037, | |
| "grad_norm": 0.16082010640489602, | |
| "learning_rate": 1.7574799284110863e-05, | |
| "loss": 0.6973, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.5913829047949966, | |
| "grad_norm": 0.1658067634382349, | |
| "learning_rate": 1.744570591813901e-05, | |
| "loss": 0.6946, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.596942321056289, | |
| "grad_norm": 0.16969803277351794, | |
| "learning_rate": 1.7316956060021957e-05, | |
| "loss": 0.6827, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.6025017373175814, | |
| "grad_norm": 0.16177207356044507, | |
| "learning_rate": 1.718855167065404e-05, | |
| "loss": 0.7036, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.6080611535788742, | |
| "grad_norm": 0.16664683404878003, | |
| "learning_rate": 1.706049470566803e-05, | |
| "loss": 0.6964, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.6136205698401667, | |
| "grad_norm": 0.19244596702153122, | |
| "learning_rate": 1.6932787115405318e-05, | |
| "loss": 0.703, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.619179986101459, | |
| "grad_norm": 0.14531076153595857, | |
| "learning_rate": 1.6805430844886226e-05, | |
| "loss": 0.7104, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 3.624739402362752, | |
| "grad_norm": 0.17188892378376985, | |
| "learning_rate": 1.667842783378036e-05, | |
| "loss": 0.7014, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 3.6302988186240444, | |
| "grad_norm": 0.14674805004070898, | |
| "learning_rate": 1.655178001637716e-05, | |
| "loss": 0.697, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 3.6358582348853368, | |
| "grad_norm": 0.18306188304250803, | |
| "learning_rate": 1.6425489321556306e-05, | |
| "loss": 0.7171, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 3.6414176511466296, | |
| "grad_norm": 0.1770643704313744, | |
| "learning_rate": 1.6299557672758434e-05, | |
| "loss": 0.7063, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.646977067407922, | |
| "grad_norm": 0.17018093462077719, | |
| "learning_rate": 1.6173986987955794e-05, | |
| "loss": 0.7014, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 3.6525364836692145, | |
| "grad_norm": 0.15144979941800404, | |
| "learning_rate": 1.6048779179623098e-05, | |
| "loss": 0.6987, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.6580958999305073, | |
| "grad_norm": 0.1306496439198933, | |
| "learning_rate": 1.5923936154708332e-05, | |
| "loss": 0.6992, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 3.6636553161917997, | |
| "grad_norm": 0.18827301402231672, | |
| "learning_rate": 1.5799459814603716e-05, | |
| "loss": 0.6939, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.669214732453092, | |
| "grad_norm": 0.1310253500615189, | |
| "learning_rate": 1.5675352055116774e-05, | |
| "loss": 0.6972, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.674774148714385, | |
| "grad_norm": 0.1583027749111766, | |
| "learning_rate": 1.5551614766441483e-05, | |
| "loss": 0.691, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.6803335649756774, | |
| "grad_norm": 0.1442216498175938, | |
| "learning_rate": 1.54282498331294e-05, | |
| "loss": 0.6931, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.68589298123697, | |
| "grad_norm": 0.16962082435760417, | |
| "learning_rate": 1.5305259134061075e-05, | |
| "loss": 0.6965, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.6914523974982627, | |
| "grad_norm": 0.15393789815894893, | |
| "learning_rate": 1.5182644542417278e-05, | |
| "loss": 0.7041, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.697011813759555, | |
| "grad_norm": 0.1407850186063632, | |
| "learning_rate": 1.5060407925650662e-05, | |
| "loss": 0.6907, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.7025712300208475, | |
| "grad_norm": 0.14524255117484877, | |
| "learning_rate": 1.4938551145457152e-05, | |
| "loss": 0.6991, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.7081306462821404, | |
| "grad_norm": 0.17278063379073116, | |
| "learning_rate": 1.4817076057747728e-05, | |
| "loss": 0.7089, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.713690062543433, | |
| "grad_norm": 0.14326689230859943, | |
| "learning_rate": 1.4695984512619999e-05, | |
| "loss": 0.7191, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.7192494788047252, | |
| "grad_norm": 0.1542646207279153, | |
| "learning_rate": 1.4575278354330213e-05, | |
| "loss": 0.6991, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.724808895066018, | |
| "grad_norm": 0.16046575585370446, | |
| "learning_rate": 1.4454959421265029e-05, | |
| "loss": 0.7049, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.7303683113273105, | |
| "grad_norm": 0.14105189080116268, | |
| "learning_rate": 1.4335029545913556e-05, | |
| "loss": 0.7065, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.735927727588603, | |
| "grad_norm": 0.17908408959110106, | |
| "learning_rate": 1.4215490554839483e-05, | |
| "loss": 0.6959, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.741487143849896, | |
| "grad_norm": 0.1411806231847332, | |
| "learning_rate": 1.409634426865321e-05, | |
| "loss": 0.6953, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.747046560111188, | |
| "grad_norm": 0.1376363802363308, | |
| "learning_rate": 1.397759250198413e-05, | |
| "loss": 0.7022, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.752605976372481, | |
| "grad_norm": 0.18325827062078764, | |
| "learning_rate": 1.3859237063453002e-05, | |
| "loss": 0.6977, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.7581653926337735, | |
| "grad_norm": 0.16076543857018247, | |
| "learning_rate": 1.3741279755644427e-05, | |
| "loss": 0.7089, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.763724808895066, | |
| "grad_norm": 0.15436087851311603, | |
| "learning_rate": 1.3623722375079345e-05, | |
| "loss": 0.6907, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.7692842251563587, | |
| "grad_norm": 0.17697113323045366, | |
| "learning_rate": 1.3506566712187703e-05, | |
| "loss": 0.6964, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.774843641417651, | |
| "grad_norm": 0.14034086311040858, | |
| "learning_rate": 1.338981455128117e-05, | |
| "loss": 0.7001, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.7804030576789436, | |
| "grad_norm": 0.1798120065285811, | |
| "learning_rate": 1.3273467670526018e-05, | |
| "loss": 0.7137, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.7859624739402364, | |
| "grad_norm": 0.14177752914344133, | |
| "learning_rate": 1.3157527841915951e-05, | |
| "loss": 0.6985, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.791521890201529, | |
| "grad_norm": 0.15387764621799602, | |
| "learning_rate": 1.3041996831245172e-05, | |
| "loss": 0.7127, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.7970813064628213, | |
| "grad_norm": 0.1383432826118217, | |
| "learning_rate": 1.292687639808147e-05, | |
| "loss": 0.6918, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.802640722724114, | |
| "grad_norm": 0.1322044138065007, | |
| "learning_rate": 1.2812168295739466e-05, | |
| "loss": 0.6926, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.8082001389854065, | |
| "grad_norm": 0.1459059689077956, | |
| "learning_rate": 1.2697874271253844e-05, | |
| "loss": 0.6878, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.813759555246699, | |
| "grad_norm": 0.13579645760751588, | |
| "learning_rate": 1.258399606535277e-05, | |
| "loss": 0.7, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.819318971507992, | |
| "grad_norm": 0.1355030336005884, | |
| "learning_rate": 1.2470535412431387e-05, | |
| "loss": 0.6918, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.8248783877692842, | |
| "grad_norm": 0.12460724345510901, | |
| "learning_rate": 1.2357494040525416e-05, | |
| "loss": 0.709, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.8304378040305767, | |
| "grad_norm": 0.1401072983412368, | |
| "learning_rate": 1.2244873671284788e-05, | |
| "loss": 0.7081, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.8359972202918695, | |
| "grad_norm": 0.1410007273902405, | |
| "learning_rate": 1.2132676019947493e-05, | |
| "loss": 0.7073, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.841556636553162, | |
| "grad_norm": 0.1347022225437132, | |
| "learning_rate": 1.2020902795313352e-05, | |
| "loss": 0.6991, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.8471160528144543, | |
| "grad_norm": 0.16412649743147234, | |
| "learning_rate": 1.1909555699718118e-05, | |
| "loss": 0.6948, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.852675469075747, | |
| "grad_norm": 0.1304705458644605, | |
| "learning_rate": 1.179863642900744e-05, | |
| "loss": 0.7176, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.8582348853370396, | |
| "grad_norm": 0.201843338518629, | |
| "learning_rate": 1.1688146672511143e-05, | |
| "loss": 0.7149, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.863794301598332, | |
| "grad_norm": 0.13997227423899886, | |
| "learning_rate": 1.1578088113017337e-05, | |
| "loss": 0.7015, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.869353717859625, | |
| "grad_norm": 0.16701470491046372, | |
| "learning_rate": 1.1468462426746988e-05, | |
| "loss": 0.6942, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.8749131341209173, | |
| "grad_norm": 0.1504863826631891, | |
| "learning_rate": 1.1359271283328224e-05, | |
| "loss": 0.7117, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.8804725503822097, | |
| "grad_norm": 0.15305924367225743, | |
| "learning_rate": 1.1250516345771016e-05, | |
| "loss": 0.7046, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.8860319666435026, | |
| "grad_norm": 0.14203590464816526, | |
| "learning_rate": 1.1142199270441774e-05, | |
| "loss": 0.6949, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.891591382904795, | |
| "grad_norm": 0.14514210360106045, | |
| "learning_rate": 1.1034321707038153e-05, | |
| "loss": 0.7134, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.8971507991660874, | |
| "grad_norm": 0.143692077723024, | |
| "learning_rate": 1.092688529856393e-05, | |
| "loss": 0.6996, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.9027102154273803, | |
| "grad_norm": 0.12831987823898527, | |
| "learning_rate": 1.0819891681303973e-05, | |
| "loss": 0.712, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.9082696316886727, | |
| "grad_norm": 0.15655538061615665, | |
| "learning_rate": 1.0713342484799342e-05, | |
| "loss": 0.7124, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.913829047949965, | |
| "grad_norm": 0.13774726018900033, | |
| "learning_rate": 1.060723933182243e-05, | |
| "loss": 0.6946, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.919388464211258, | |
| "grad_norm": 0.15042270167507696, | |
| "learning_rate": 1.0501583838352269e-05, | |
| "loss": 0.7101, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.9249478804725504, | |
| "grad_norm": 0.14701679045150232, | |
| "learning_rate": 1.0396377613549924e-05, | |
| "loss": 0.6975, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.930507296733843, | |
| "grad_norm": 0.1348037007073666, | |
| "learning_rate": 1.0291622259733995e-05, | |
| "loss": 0.6898, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.9360667129951357, | |
| "grad_norm": 0.14628526905379458, | |
| "learning_rate": 1.0187319372356175e-05, | |
| "loss": 0.6942, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.941626129256428, | |
| "grad_norm": 0.13615945281914504, | |
| "learning_rate": 1.0083470539976986e-05, | |
| "loss": 0.7064, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.9471855455177205, | |
| "grad_norm": 0.14466139658327062, | |
| "learning_rate": 9.980077344241547e-06, | |
| "loss": 0.7026, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.9527449617790134, | |
| "grad_norm": 0.13153285118835809, | |
| "learning_rate": 9.877141359855567e-06, | |
| "loss": 0.7, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.9583043780403058, | |
| "grad_norm": 0.1419211581773803, | |
| "learning_rate": 9.774664154561249e-06, | |
| "loss": 0.6884, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.963863794301598, | |
| "grad_norm": 0.1301976817383647, | |
| "learning_rate": 9.672647289113497e-06, | |
| "loss": 0.698, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.969423210562891, | |
| "grad_norm": 0.12100613516338608, | |
| "learning_rate": 9.5710923172561e-06, | |
| "loss": 0.6899, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.9749826268241835, | |
| "grad_norm": 0.12619604031849105, | |
| "learning_rate": 9.470000785698117e-06, | |
| "loss": 0.7149, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.980542043085476, | |
| "grad_norm": 0.13766300137361, | |
| "learning_rate": 9.369374234090248e-06, | |
| "loss": 0.6991, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.9861014593467687, | |
| "grad_norm": 0.13324844897778138, | |
| "learning_rate": 9.269214195001482e-06, | |
| "loss": 0.6986, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.991660875608061, | |
| "grad_norm": 0.13768942764899267, | |
| "learning_rate": 9.169522193895614e-06, | |
| "loss": 0.7133, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.9972202918693536, | |
| "grad_norm": 0.15395342971750262, | |
| "learning_rate": 9.070299749108184e-06, | |
| "loss": 0.6978, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.0222376650451706, | |
| "grad_norm": 0.19291822566974853, | |
| "learning_rate": 8.971548371823205e-06, | |
| "loss": 0.6835, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.0277970813064625, | |
| "grad_norm": 0.16867681043248, | |
| "learning_rate": 8.873269566050262e-06, | |
| "loss": 0.6883, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.033356497567755, | |
| "grad_norm": 0.16326964961944673, | |
| "learning_rate": 8.775464828601477e-06, | |
| "loss": 0.674, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.038915913829048, | |
| "grad_norm": 0.16270178485101133, | |
| "learning_rate": 8.678135649068862e-06, | |
| "loss": 0.677, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.04447533009034, | |
| "grad_norm": 0.1917386211633971, | |
| "learning_rate": 8.581283509801501e-06, | |
| "loss": 0.6761, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.050034746351633, | |
| "grad_norm": 0.1811245778611519, | |
| "learning_rate": 8.484909885883073e-06, | |
| "loss": 0.6744, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.055594162612926, | |
| "grad_norm": 0.18514509335495727, | |
| "learning_rate": 8.389016245109305e-06, | |
| "loss": 0.6703, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.061153578874218, | |
| "grad_norm": 0.18858746875253857, | |
| "learning_rate": 8.293604047965677e-06, | |
| "loss": 0.6671, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.066712995135511, | |
| "grad_norm": 0.1850999009534838, | |
| "learning_rate": 8.19867474760514e-06, | |
| "loss": 0.679, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.072272411396804, | |
| "grad_norm": 0.17959193780762223, | |
| "learning_rate": 8.104229789825994e-06, | |
| "loss": 0.6812, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.077831827658096, | |
| "grad_norm": 0.16993387051766873, | |
| "learning_rate": 8.010270613049918e-06, | |
| "loss": 0.6646, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.0833912439193885, | |
| "grad_norm": 0.18321014233141378, | |
| "learning_rate": 7.916798648299964e-06, | |
| "loss": 0.672, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.088950660180681, | |
| "grad_norm": 0.1760023318041356, | |
| "learning_rate": 7.823815319178844e-06, | |
| "loss": 0.6757, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.094510076441973, | |
| "grad_norm": 0.16517402304449183, | |
| "learning_rate": 7.731322041847207e-06, | |
| "loss": 0.6966, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.100069492703266, | |
| "grad_norm": 0.1534852983464337, | |
| "learning_rate": 7.639320225002106e-06, | |
| "loss": 0.6849, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.105628908964559, | |
| "grad_norm": 0.17441227331833753, | |
| "learning_rate": 7.547811269855501e-06, | |
| "loss": 0.6922, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.111188325225851, | |
| "grad_norm": 0.14483951588357777, | |
| "learning_rate": 7.456796570112939e-06, | |
| "loss": 0.6896, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.116747741487144, | |
| "grad_norm": 0.12347091194252453, | |
| "learning_rate": 7.366277511952326e-06, | |
| "loss": 0.6781, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.122307157748437, | |
| "grad_norm": 0.18498892500395253, | |
| "learning_rate": 7.2762554740028445e-06, | |
| "loss": 0.6781, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.127866574009729, | |
| "grad_norm": 0.253100855454218, | |
| "learning_rate": 7.186731827323883e-06, | |
| "loss": 0.6783, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.1334259902710215, | |
| "grad_norm": 0.12547901205381473, | |
| "learning_rate": 7.097707935384232e-06, | |
| "loss": 0.6726, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.138985406532314, | |
| "grad_norm": 0.13373732748809097, | |
| "learning_rate": 7.009185154041258e-06, | |
| "loss": 0.6751, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.144544822793606, | |
| "grad_norm": 0.1362307104037655, | |
| "learning_rate": 6.921164831520322e-06, | |
| "loss": 0.6777, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.150104239054899, | |
| "grad_norm": 0.11860525182508581, | |
| "learning_rate": 6.833648308394156e-06, | |
| "loss": 0.6838, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.155663655316192, | |
| "grad_norm": 0.13403843504100116, | |
| "learning_rate": 6.7466369175625475e-06, | |
| "loss": 0.6807, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.161223071577484, | |
| "grad_norm": 0.1306791214455748, | |
| "learning_rate": 6.660131984231917e-06, | |
| "loss": 0.6852, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.166782487838777, | |
| "grad_norm": 0.1234499283506164, | |
| "learning_rate": 6.574134825895276e-06, | |
| "loss": 0.6936, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.17234190410007, | |
| "grad_norm": 0.11590058368187668, | |
| "learning_rate": 6.488646752312031e-06, | |
| "loss": 0.6875, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.177901320361362, | |
| "grad_norm": 0.13263371190427117, | |
| "learning_rate": 6.403669065488141e-06, | |
| "loss": 0.6722, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.183460736622655, | |
| "grad_norm": 0.10833522836358132, | |
| "learning_rate": 6.3192030596562e-06, | |
| "loss": 0.6703, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.1890201528839475, | |
| "grad_norm": 0.11948792048123115, | |
| "learning_rate": 6.235250021255788e-06, | |
| "loss": 0.6804, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.194579569145239, | |
| "grad_norm": 0.11643796212883935, | |
| "learning_rate": 6.15181122891384e-06, | |
| "loss": 0.6835, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.200138985406532, | |
| "grad_norm": 0.12784086684218327, | |
| "learning_rate": 6.068887953425213e-06, | |
| "loss": 0.7022, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.205698401667825, | |
| "grad_norm": 0.11701035763266034, | |
| "learning_rate": 5.986481457733284e-06, | |
| "loss": 0.6986, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.211257817929117, | |
| "grad_norm": 0.11133015565758361, | |
| "learning_rate": 5.904592996910739e-06, | |
| "loss": 0.6767, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.21681723419041, | |
| "grad_norm": 0.1172582650820155, | |
| "learning_rate": 5.823223818140458e-06, | |
| "loss": 0.6804, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.222376650451703, | |
| "grad_norm": 0.1108105150189601, | |
| "learning_rate": 5.7423751606965296e-06, | |
| "loss": 0.6763, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.227936066712995, | |
| "grad_norm": 0.10612497316301034, | |
| "learning_rate": 5.662048255925357e-06, | |
| "loss": 0.6726, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.233495482974288, | |
| "grad_norm": 0.10674977611900638, | |
| "learning_rate": 5.582244327226907e-06, | |
| "loss": 0.6785, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.2390548992355805, | |
| "grad_norm": 0.11506717156249478, | |
| "learning_rate": 5.502964590036093e-06, | |
| "loss": 0.6682, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.2446143154968725, | |
| "grad_norm": 0.10977340853370719, | |
| "learning_rate": 5.424210251804227e-06, | |
| "loss": 0.6891, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.250173731758165, | |
| "grad_norm": 0.10897381874854041, | |
| "learning_rate": 5.345982511980707e-06, | |
| "loss": 0.6927, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.255733148019458, | |
| "grad_norm": 0.1068366534868816, | |
| "learning_rate": 5.268282561994658e-06, | |
| "loss": 0.6831, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.26129256428075, | |
| "grad_norm": 0.11704849472765765, | |
| "learning_rate": 5.191111585236828e-06, | |
| "loss": 0.6925, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.266851980542043, | |
| "grad_norm": 0.1082316966515285, | |
| "learning_rate": 5.114470757041572e-06, | |
| "loss": 0.6845, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.272411396803336, | |
| "grad_norm": 0.11684449251733979, | |
| "learning_rate": 5.038361244668952e-06, | |
| "loss": 0.6915, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.277970813064628, | |
| "grad_norm": 0.1076252680934964, | |
| "learning_rate": 4.96278420728693e-06, | |
| "loss": 0.6757, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.283530229325921, | |
| "grad_norm": 0.11124587762106175, | |
| "learning_rate": 4.887740795953764e-06, | |
| "loss": 0.6922, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.289089645587214, | |
| "grad_norm": 0.11190635320264125, | |
| "learning_rate": 4.813232153600393e-06, | |
| "loss": 0.6847, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.294649061848506, | |
| "grad_norm": 0.1114491984468364, | |
| "learning_rate": 4.739259415013138e-06, | |
| "loss": 0.6761, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.3002084781097984, | |
| "grad_norm": 0.10315313503375144, | |
| "learning_rate": 4.6658237068163285e-06, | |
| "loss": 0.6729, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.305767894371091, | |
| "grad_norm": 0.11278503790768903, | |
| "learning_rate": 4.5929261474552164e-06, | |
| "loss": 0.6765, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.311327310632383, | |
| "grad_norm": 0.11395222910887802, | |
| "learning_rate": 4.520567847178847e-06, | |
| "loss": 0.6922, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.316886726893676, | |
| "grad_norm": 0.11402328760203634, | |
| "learning_rate": 4.4487499080232685e-06, | |
| "loss": 0.6757, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.322446143154969, | |
| "grad_norm": 0.11252026920394785, | |
| "learning_rate": 4.377473423794643e-06, | |
| "loss": 0.6827, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.328005559416261, | |
| "grad_norm": 0.10596243645246563, | |
| "learning_rate": 4.30673948005266e-06, | |
| "loss": 0.6798, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.333564975677554, | |
| "grad_norm": 0.11097354795039509, | |
| "learning_rate": 4.236549154093954e-06, | |
| "loss": 0.6639, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 4.339124391938847, | |
| "grad_norm": 0.11133822797610729, | |
| "learning_rate": 4.1669035149357255e-06, | |
| "loss": 0.6843, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 4.344683808200139, | |
| "grad_norm": 0.10566152940069157, | |
| "learning_rate": 4.097803623299443e-06, | |
| "loss": 0.68, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 4.3502432244614315, | |
| "grad_norm": 0.11005589437352328, | |
| "learning_rate": 4.029250531594722e-06, | |
| "loss": 0.6808, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 4.355802640722724, | |
| "grad_norm": 0.11436092029090196, | |
| "learning_rate": 3.961245283903239e-06, | |
| "loss": 0.6937, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.361362056984016, | |
| "grad_norm": 0.1084924543952515, | |
| "learning_rate": 3.893788915962873e-06, | |
| "loss": 0.6851, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 4.366921473245309, | |
| "grad_norm": 0.10292395998913514, | |
| "learning_rate": 3.8268824551519214e-06, | |
| "loss": 0.6791, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 4.372480889506602, | |
| "grad_norm": 0.11533814757630266, | |
| "learning_rate": 3.760526920473462e-06, | |
| "loss": 0.6833, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 4.378040305767894, | |
| "grad_norm": 0.10749393726522205, | |
| "learning_rate": 3.6947233225397993e-06, | |
| "loss": 0.6778, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 4.383599722029187, | |
| "grad_norm": 0.10782596391834809, | |
| "learning_rate": 3.62947266355711e-06, | |
| "loss": 0.678, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.38915913829048, | |
| "grad_norm": 0.10920407631291443, | |
| "learning_rate": 3.5647759373101585e-06, | |
| "loss": 0.6756, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 4.394718554551772, | |
| "grad_norm": 0.10267228546675215, | |
| "learning_rate": 3.500634129147167e-06, | |
| "loss": 0.6741, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 4.400277970813065, | |
| "grad_norm": 0.11750192758379473, | |
| "learning_rate": 3.437048215964827e-06, | |
| "loss": 0.6983, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 4.4058373870743575, | |
| "grad_norm": 0.09915445619844299, | |
| "learning_rate": 3.3740191661933764e-06, | |
| "loss": 0.6847, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 4.411396803335649, | |
| "grad_norm": 0.10665441100795098, | |
| "learning_rate": 3.311547939781887e-06, | |
| "loss": 0.6731, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.416956219596942, | |
| "grad_norm": 0.11341264840729572, | |
| "learning_rate": 3.2496354881836268e-06, | |
| "loss": 0.69, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 4.422515635858235, | |
| "grad_norm": 0.10742879505946441, | |
| "learning_rate": 3.188282754341603e-06, | |
| "loss": 0.6722, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 4.428075052119527, | |
| "grad_norm": 0.09857512757834257, | |
| "learning_rate": 3.1274906726741317e-06, | |
| "loss": 0.6868, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 4.43363446838082, | |
| "grad_norm": 0.09914583832141616, | |
| "learning_rate": 3.067260169060697e-06, | |
| "loss": 0.6919, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 4.439193884642113, | |
| "grad_norm": 0.1053410371476582, | |
| "learning_rate": 3.0075921608277415e-06, | |
| "loss": 0.6728, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.444753300903405, | |
| "grad_norm": 0.10244450162235408, | |
| "learning_rate": 2.9484875567348117e-06, | |
| "loss": 0.6733, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 4.450312717164698, | |
| "grad_norm": 0.10827946870195214, | |
| "learning_rate": 2.8899472569606166e-06, | |
| "loss": 0.6817, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 4.4558721334259905, | |
| "grad_norm": 0.11096751325342262, | |
| "learning_rate": 2.8319721530894084e-06, | |
| "loss": 0.6833, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 4.4614315496872825, | |
| "grad_norm": 0.10884842138622, | |
| "learning_rate": 2.7745631280973005e-06, | |
| "loss": 0.6857, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 4.466990965948575, | |
| "grad_norm": 0.1061505303127893, | |
| "learning_rate": 2.7177210563389178e-06, | |
| "loss": 0.6643, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.472550382209868, | |
| "grad_norm": 0.10184960600698661, | |
| "learning_rate": 2.6614468035340137e-06, | |
| "loss": 0.6838, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 4.47810979847116, | |
| "grad_norm": 0.10986135159522677, | |
| "learning_rate": 2.6057412267543347e-06, | |
| "loss": 0.6642, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 4.483669214732453, | |
| "grad_norm": 0.11858338514040914, | |
| "learning_rate": 2.550605174410512e-06, | |
| "loss": 0.6896, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 4.489228630993746, | |
| "grad_norm": 0.11205998179561413, | |
| "learning_rate": 2.49603948623919e-06, | |
| "loss": 0.6706, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 4.494788047255038, | |
| "grad_norm": 0.10980379918802778, | |
| "learning_rate": 2.442044993290198e-06, | |
| "loss": 0.6693, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.500347463516331, | |
| "grad_norm": 0.10176209096587764, | |
| "learning_rate": 2.388622517913937e-06, | |
| "loss": 0.6951, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.505906879777624, | |
| "grad_norm": 0.11498795165110608, | |
| "learning_rate": 2.335772873748807e-06, | |
| "loss": 0.6738, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.511466296038916, | |
| "grad_norm": 0.11488527586994095, | |
| "learning_rate": 2.283496865708843e-06, | |
| "loss": 0.667, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.517025712300208, | |
| "grad_norm": 0.10001308436336462, | |
| "learning_rate": 2.231795289971448e-06, | |
| "loss": 0.6727, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.522585128561501, | |
| "grad_norm": 0.10142949842798026, | |
| "learning_rate": 2.1806689339652864e-06, | |
| "loss": 0.6861, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.528144544822793, | |
| "grad_norm": 0.10831719019410518, | |
| "learning_rate": 2.130118576358249e-06, | |
| "loss": 0.6901, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.533703961084086, | |
| "grad_norm": 0.10533792235643635, | |
| "learning_rate": 2.0801449870456424e-06, | |
| "loss": 0.6914, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.539263377345379, | |
| "grad_norm": 0.11015380431460803, | |
| "learning_rate": 2.0307489271384107e-06, | |
| "loss": 0.6742, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.544822793606671, | |
| "grad_norm": 0.10267772411742519, | |
| "learning_rate": 1.9819311489516122e-06, | |
| "loss": 0.6788, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 4.550382209867964, | |
| "grad_norm": 0.10352184892836523, | |
| "learning_rate": 1.9336923959928855e-06, | |
| "loss": 0.6753, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 4.555941626129257, | |
| "grad_norm": 0.10255910515017448, | |
| "learning_rate": 1.8860334029511795e-06, | |
| "loss": 0.6888, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 4.561501042390549, | |
| "grad_norm": 0.1101267679218166, | |
| "learning_rate": 1.8389548956855474e-06, | |
| "loss": 0.6676, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 4.5670604586518415, | |
| "grad_norm": 0.09919355138446458, | |
| "learning_rate": 1.7924575912140785e-06, | |
| "loss": 0.6816, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 4.572619874913134, | |
| "grad_norm": 0.10048348598330001, | |
| "learning_rate": 1.7465421977030094e-06, | |
| "loss": 0.668, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 4.578179291174426, | |
| "grad_norm": 0.09864741465772012, | |
| "learning_rate": 1.7012094144558932e-06, | |
| "loss": 0.6906, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.583738707435719, | |
| "grad_norm": 0.09852629373371855, | |
| "learning_rate": 1.656459931903016e-06, | |
| "loss": 0.671, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 4.589298123697012, | |
| "grad_norm": 0.10250147345118527, | |
| "learning_rate": 1.6122944315907841e-06, | |
| "loss": 0.6904, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 4.594857539958304, | |
| "grad_norm": 0.10233888167494866, | |
| "learning_rate": 1.568713586171451e-06, | |
| "loss": 0.6864, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 4.600416956219597, | |
| "grad_norm": 0.1018547320747838, | |
| "learning_rate": 1.5257180593927845e-06, | |
| "loss": 0.6739, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 4.60597637248089, | |
| "grad_norm": 0.10391800179774276, | |
| "learning_rate": 1.4833085060880349e-06, | |
| "loss": 0.69, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.611535788742182, | |
| "grad_norm": 0.10061052758881242, | |
| "learning_rate": 1.4414855721658705e-06, | |
| "loss": 0.6771, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 4.617095205003475, | |
| "grad_norm": 0.09769748536321253, | |
| "learning_rate": 1.400249894600636e-06, | |
| "loss": 0.6873, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 4.6226546212647674, | |
| "grad_norm": 0.10073570885444108, | |
| "learning_rate": 1.3596021014225858e-06, | |
| "loss": 0.6769, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 4.628214037526059, | |
| "grad_norm": 0.10144089064892822, | |
| "learning_rate": 1.3195428117083453e-06, | |
| "loss": 0.6814, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 4.633773453787352, | |
| "grad_norm": 0.09732733650725929, | |
| "learning_rate": 1.2800726355714743e-06, | |
| "loss": 0.6728, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.639332870048645, | |
| "grad_norm": 0.10872983620981282, | |
| "learning_rate": 1.2411921741531807e-06, | |
| "loss": 0.6945, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 4.644892286309937, | |
| "grad_norm": 0.12488470295046429, | |
| "learning_rate": 1.2029020196131502e-06, | |
| "loss": 0.7003, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.65045170257123, | |
| "grad_norm": 0.11938725767925232, | |
| "learning_rate": 1.165202755120558e-06, | |
| "loss": 0.6682, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 4.656011118832523, | |
| "grad_norm": 0.09568930125694097, | |
| "learning_rate": 1.1280949548451559e-06, | |
| "loss": 0.6832, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.661570535093815, | |
| "grad_norm": 0.0907090050821951, | |
| "learning_rate": 1.0915791839485412e-06, | |
| "loss": 0.6839, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.667129951355108, | |
| "grad_norm": 0.09108448182810729, | |
| "learning_rate": 1.0556559985755377e-06, | |
| "loss": 0.6813, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 4.6726893676164005, | |
| "grad_norm": 0.09467659368857642, | |
| "learning_rate": 1.020325945845757e-06, | |
| "loss": 0.6887, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 4.6782487838776925, | |
| "grad_norm": 0.09322239875663856, | |
| "learning_rate": 9.855895638452284e-07, | |
| "loss": 0.6751, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 4.683808200138985, | |
| "grad_norm": 0.09515328712397934, | |
| "learning_rate": 9.514473816182179e-07, | |
| "loss": 0.6704, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 4.689367616400278, | |
| "grad_norm": 0.10020370379321845, | |
| "learning_rate": 9.17899919159182e-07, | |
| "loss": 0.6874, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.69492703266157, | |
| "grad_norm": 0.09790464816845096, | |
| "learning_rate": 8.849476874048313e-07, | |
| "loss": 0.6948, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 4.700486448922863, | |
| "grad_norm": 0.10255487590164812, | |
| "learning_rate": 8.525911882263593e-07, | |
| "loss": 0.6719, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 4.706045865184156, | |
| "grad_norm": 0.10203961568031111, | |
| "learning_rate": 8.208309144217907e-07, | |
| "loss": 0.6735, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 4.711605281445448, | |
| "grad_norm": 0.09324266068650292, | |
| "learning_rate": 7.896673497084806e-07, | |
| "loss": 0.6829, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 4.717164697706741, | |
| "grad_norm": 0.09193097095894487, | |
| "learning_rate": 7.591009687157513e-07, | |
| "loss": 0.6727, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.722724113968034, | |
| "grad_norm": 0.09523637486286199, | |
| "learning_rate": 7.291322369776587e-07, | |
| "loss": 0.6903, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 4.728283530229326, | |
| "grad_norm": 0.09029936441258628, | |
| "learning_rate": 6.997616109258953e-07, | |
| "loss": 0.6814, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 4.733842946490618, | |
| "grad_norm": 0.09621068677262763, | |
| "learning_rate": 6.709895378828624e-07, | |
| "loss": 0.6812, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 4.739402362751911, | |
| "grad_norm": 0.10052019299112373, | |
| "learning_rate": 6.428164560548134e-07, | |
| "loss": 0.6712, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 4.744961779013203, | |
| "grad_norm": 0.09083034386967587, | |
| "learning_rate": 6.152427945252415e-07, | |
| "loss": 0.678, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.750521195274496, | |
| "grad_norm": 0.09879226071076684, | |
| "learning_rate": 5.882689732482849e-07, | |
| "loss": 0.6757, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 4.756080611535789, | |
| "grad_norm": 0.09553488895974188, | |
| "learning_rate": 5.618954030423629e-07, | |
| "loss": 0.6752, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.761640027797081, | |
| "grad_norm": 0.09424384994567137, | |
| "learning_rate": 5.361224855839231e-07, | |
| "loss": 0.683, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 4.767199444058374, | |
| "grad_norm": 0.09152077250908355, | |
| "learning_rate": 5.109506134012954e-07, | |
| "loss": 0.6859, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 4.772758860319667, | |
| "grad_norm": 0.09865066740590055, | |
| "learning_rate": 4.863801698687409e-07, | |
| "loss": 0.6862, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.778318276580959, | |
| "grad_norm": 0.09925592595855842, | |
| "learning_rate": 4.6241152920060817e-07, | |
| "loss": 0.6738, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 4.7838776928422515, | |
| "grad_norm": 0.09697390059397973, | |
| "learning_rate": 4.390450564456172e-07, | |
| "loss": 0.6896, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 4.789437109103544, | |
| "grad_norm": 0.0967282255695822, | |
| "learning_rate": 4.1628110748132664e-07, | |
| "loss": 0.6782, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 4.794996525364836, | |
| "grad_norm": 0.09442707370223684, | |
| "learning_rate": 3.941200290086844e-07, | |
| "loss": 0.6816, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 4.800555941626129, | |
| "grad_norm": 0.09576785298097937, | |
| "learning_rate": 3.725621585467698e-07, | |
| "loss": 0.6705, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.806115357887422, | |
| "grad_norm": 0.09695945225291408, | |
| "learning_rate": 3.5160782442764216e-07, | |
| "loss": 0.6899, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 4.811674774148714, | |
| "grad_norm": 0.09577760816364103, | |
| "learning_rate": 3.3125734579134484e-07, | |
| "loss": 0.6777, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 4.817234190410007, | |
| "grad_norm": 0.09338488218320969, | |
| "learning_rate": 3.115110325810422e-07, | |
| "loss": 0.6854, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 4.8227936066713, | |
| "grad_norm": 0.0960229378366019, | |
| "learning_rate": 2.9236918553829486e-07, | |
| "loss": 0.6763, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.828353022932592, | |
| "grad_norm": 0.09329244119901567, | |
| "learning_rate": 2.738320961984897e-07, | |
| "loss": 0.6897, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.833912439193885, | |
| "grad_norm": 0.09515237568102272, | |
| "learning_rate": 2.559000468863948e-07, | |
| "loss": 0.6762, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 4.839471855455177, | |
| "grad_norm": 0.09217529962550147, | |
| "learning_rate": 2.3857331071185595e-07, | |
| "loss": 0.6844, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 4.845031271716469, | |
| "grad_norm": 0.09436065825798744, | |
| "learning_rate": 2.2185215156563578e-07, | |
| "loss": 0.6807, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 4.850590687977762, | |
| "grad_norm": 0.09523828279039967, | |
| "learning_rate": 2.057368241154123e-07, | |
| "loss": 0.6864, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 4.856150104239055, | |
| "grad_norm": 0.09536282527606363, | |
| "learning_rate": 1.9022757380186662e-07, | |
| "loss": 0.6746, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.861709520500347, | |
| "grad_norm": 0.09068920548317291, | |
| "learning_rate": 1.7532463683499258e-07, | |
| "loss": 0.6953, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 4.86726893676164, | |
| "grad_norm": 0.09687334992464874, | |
| "learning_rate": 1.6102824019043728e-07, | |
| "loss": 0.6804, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 4.872828353022933, | |
| "grad_norm": 0.1059025814156545, | |
| "learning_rate": 1.473386016061129e-07, | |
| "loss": 0.6888, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 4.878387769284225, | |
| "grad_norm": 0.09122337586792513, | |
| "learning_rate": 1.3425592957881707e-07, | |
| "loss": 0.6764, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 4.883947185545518, | |
| "grad_norm": 0.08918864835295429, | |
| "learning_rate": 1.2178042336111084e-07, | |
| "loss": 0.6928, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.8895066018068105, | |
| "grad_norm": 0.09078091143268772, | |
| "learning_rate": 1.0991227295824136e-07, | |
| "loss": 0.6773, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.8950660180681025, | |
| "grad_norm": 0.10134111972909693, | |
| "learning_rate": 9.865165912527286e-08, | |
| "loss": 0.689, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 4.900625434329395, | |
| "grad_norm": 0.09866237453180664, | |
| "learning_rate": 8.799875336433339e-08, | |
| "loss": 0.674, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 4.906184850590688, | |
| "grad_norm": 0.09296303972334871, | |
| "learning_rate": 7.795371792198136e-08, | |
| "loss": 0.6724, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.91174426685198, | |
| "grad_norm": 0.08889541850184303, | |
| "learning_rate": 6.851670578677195e-08, | |
| "loss": 0.6667, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.917303683113273, | |
| "grad_norm": 0.09207938757906006, | |
| "learning_rate": 5.968786068688115e-08, | |
| "loss": 0.6853, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.922863099374566, | |
| "grad_norm": 0.09370939668317128, | |
| "learning_rate": 5.146731708795649e-08, | |
| "loss": 0.6806, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.928422515635858, | |
| "grad_norm": 0.09199672461751096, | |
| "learning_rate": 4.3855200191043056e-08, | |
| "loss": 0.6859, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.933981931897151, | |
| "grad_norm": 0.09359409102339064, | |
| "learning_rate": 3.685162593070057e-08, | |
| "loss": 0.6827, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.939541348158444, | |
| "grad_norm": 0.09622079928972942, | |
| "learning_rate": 3.0456700973204854e-08, | |
| "loss": 0.6887, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.945100764419736, | |
| "grad_norm": 0.08901882738431295, | |
| "learning_rate": 2.4670522714957955e-08, | |
| "loss": 0.6689, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.950660180681028, | |
| "grad_norm": 0.09115816975859309, | |
| "learning_rate": 1.949317928097827e-08, | |
| "loss": 0.6838, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.956219596942321, | |
| "grad_norm": 0.08967241218428385, | |
| "learning_rate": 1.4924749523572703e-08, | |
| "loss": 0.6764, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.961779013203613, | |
| "grad_norm": 0.09584309542019002, | |
| "learning_rate": 1.0965303021142071e-08, | |
| "loss": 0.6877, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.967338429464906, | |
| "grad_norm": 0.09452425078700585, | |
| "learning_rate": 7.614900077093091e-09, | |
| "loss": 0.6903, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.972897845726199, | |
| "grad_norm": 0.09503080666295127, | |
| "learning_rate": 4.873591718945747e-09, | |
| "loss": 0.685, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.978457261987491, | |
| "grad_norm": 0.09096217519727556, | |
| "learning_rate": 2.741419697560588e-09, | |
| "loss": 0.6779, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.984016678248784, | |
| "grad_norm": 0.08920592010999287, | |
| "learning_rate": 1.2184164864725845e-09, | |
| "loss": 0.6931, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.989576094510077, | |
| "grad_norm": 0.09316414190683223, | |
| "learning_rate": 3.0460528142040033e-10, | |
| "loss": 0.6791, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.995135510771369, | |
| "grad_norm": 0.09052975457523055, | |
| "learning_rate": 0.0, | |
| "loss": 0.6829, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.995135510771369, | |
| "step": 895, | |
| "total_flos": 2.3044737561377374e+19, | |
| "train_loss": 0.13391224918418757, | |
| "train_runtime": 17729.1155, | |
| "train_samples_per_second": 25.968, | |
| "train_steps_per_second": 0.05 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 895, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3044737561377374e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |