| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9983753046303816, |
| "eval_steps": 500, |
| "global_step": 2460, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0012185215272136475, |
| "grad_norm": 6.834534852371434, |
| "learning_rate": 4.0650406504065046e-08, |
| "loss": 1.0814, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002437043054427295, |
| "grad_norm": 6.434639482368721, |
| "learning_rate": 8.130081300813009e-08, |
| "loss": 1.0829, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0036555645816409425, |
| "grad_norm": 6.863769364155968, |
| "learning_rate": 1.2195121951219514e-07, |
| "loss": 1.1046, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00487408610885459, |
| "grad_norm": 6.5857528488136925, |
| "learning_rate": 1.6260162601626018e-07, |
| "loss": 1.0853, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.006092607636068237, |
| "grad_norm": 6.521091645053231, |
| "learning_rate": 2.0325203252032523e-07, |
| "loss": 1.0773, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.007311129163281885, |
| "grad_norm": 6.29456329359191, |
| "learning_rate": 2.439024390243903e-07, |
| "loss": 1.0569, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.008529650690495532, |
| "grad_norm": 6.6897100511742575, |
| "learning_rate": 2.845528455284553e-07, |
| "loss": 1.0615, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00974817221770918, |
| "grad_norm": 6.645918686532055, |
| "learning_rate": 3.2520325203252037e-07, |
| "loss": 1.11, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010966693744922826, |
| "grad_norm": 6.5860967765132505, |
| "learning_rate": 3.6585365853658536e-07, |
| "loss": 1.0968, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.012185215272136474, |
| "grad_norm": 6.316550148573923, |
| "learning_rate": 4.0650406504065046e-07, |
| "loss": 1.0402, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013403736799350122, |
| "grad_norm": 6.589233946221034, |
| "learning_rate": 4.471544715447155e-07, |
| "loss": 1.0883, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01462225832656377, |
| "grad_norm": 5.923561630066145, |
| "learning_rate": 4.878048780487805e-07, |
| "loss": 1.0577, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.015840779853777416, |
| "grad_norm": 6.107921911359583, |
| "learning_rate": 5.284552845528456e-07, |
| "loss": 1.0624, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.017059301380991064, |
| "grad_norm": 5.923788461868563, |
| "learning_rate": 5.691056910569106e-07, |
| "loss": 1.0736, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.018277822908204712, |
| "grad_norm": 5.874057009305477, |
| "learning_rate": 6.097560975609757e-07, |
| "loss": 1.0447, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01949634443541836, |
| "grad_norm": 5.031342070027601, |
| "learning_rate": 6.504065040650407e-07, |
| "loss": 1.0433, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.020714865962632008, |
| "grad_norm": 4.869478502925938, |
| "learning_rate": 6.910569105691058e-07, |
| "loss": 1.0411, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.021933387489845652, |
| "grad_norm": 4.534685569961031, |
| "learning_rate": 7.317073170731707e-07, |
| "loss": 0.9874, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0231519090170593, |
| "grad_norm": 4.357854034553876, |
| "learning_rate": 7.723577235772359e-07, |
| "loss": 1.0014, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.024370430544272948, |
| "grad_norm": 4.315329088535952, |
| "learning_rate": 8.130081300813009e-07, |
| "loss": 0.9925, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.025588952071486596, |
| "grad_norm": 3.3054166230275337, |
| "learning_rate": 8.53658536585366e-07, |
| "loss": 1.003, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.026807473598700244, |
| "grad_norm": 2.726952834638993, |
| "learning_rate": 8.94308943089431e-07, |
| "loss": 0.9692, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.028025995125913892, |
| "grad_norm": 2.7243029466264, |
| "learning_rate": 9.349593495934959e-07, |
| "loss": 1.0212, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02924451665312754, |
| "grad_norm": 2.5896267051583313, |
| "learning_rate": 9.75609756097561e-07, |
| "loss": 0.9757, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.030463038180341188, |
| "grad_norm": 2.6706226110708498, |
| "learning_rate": 1.0162601626016261e-06, |
| "loss": 0.9801, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03168155970755483, |
| "grad_norm": 2.437924378367497, |
| "learning_rate": 1.0569105691056912e-06, |
| "loss": 0.9768, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03290008123476848, |
| "grad_norm": 2.5803827822332397, |
| "learning_rate": 1.0975609756097562e-06, |
| "loss": 0.9895, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03411860276198213, |
| "grad_norm": 2.203043465720474, |
| "learning_rate": 1.1382113821138213e-06, |
| "loss": 0.9726, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.035337124289195776, |
| "grad_norm": 1.805085101507344, |
| "learning_rate": 1.1788617886178863e-06, |
| "loss": 0.9237, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.036555645816409424, |
| "grad_norm": 2.1165116479858215, |
| "learning_rate": 1.2195121951219514e-06, |
| "loss": 0.9338, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03777416734362307, |
| "grad_norm": 2.3435725524236735, |
| "learning_rate": 1.2601626016260162e-06, |
| "loss": 0.9365, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03899268887083672, |
| "grad_norm": 2.2985182006567326, |
| "learning_rate": 1.3008130081300815e-06, |
| "loss": 0.9135, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04021121039805037, |
| "grad_norm": 2.1356048436615036, |
| "learning_rate": 1.3414634146341465e-06, |
| "loss": 0.9196, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.041429731925264016, |
| "grad_norm": 2.028116965668269, |
| "learning_rate": 1.3821138211382116e-06, |
| "loss": 0.9074, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.042648253452477664, |
| "grad_norm": 1.7266339650438713, |
| "learning_rate": 1.4227642276422766e-06, |
| "loss": 0.8969, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.043866774979691305, |
| "grad_norm": 1.548462619619361, |
| "learning_rate": 1.4634146341463414e-06, |
| "loss": 0.9009, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04508529650690495, |
| "grad_norm": 1.2325234503287605, |
| "learning_rate": 1.5040650406504067e-06, |
| "loss": 0.8942, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0463038180341186, |
| "grad_norm": 1.119008985117944, |
| "learning_rate": 1.5447154471544717e-06, |
| "loss": 0.8869, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04752233956133225, |
| "grad_norm": 1.232748685157114, |
| "learning_rate": 1.5853658536585368e-06, |
| "loss": 0.8542, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.048740861088545896, |
| "grad_norm": 1.6677472249438465, |
| "learning_rate": 1.6260162601626018e-06, |
| "loss": 0.8594, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.049959382615759544, |
| "grad_norm": 1.5964345757340976, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.8481, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.05117790414297319, |
| "grad_norm": 1.5241421915515152, |
| "learning_rate": 1.707317073170732e-06, |
| "loss": 0.8478, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05239642567018684, |
| "grad_norm": 1.350659230101709, |
| "learning_rate": 1.747967479674797e-06, |
| "loss": 0.8669, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.05361494719740049, |
| "grad_norm": 1.0459886477510316, |
| "learning_rate": 1.788617886178862e-06, |
| "loss": 0.8229, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.054833468724614136, |
| "grad_norm": 0.9790437025811132, |
| "learning_rate": 1.8292682926829268e-06, |
| "loss": 0.8214, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.056051990251827784, |
| "grad_norm": 1.1555035661921353, |
| "learning_rate": 1.8699186991869919e-06, |
| "loss": 0.8106, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05727051177904143, |
| "grad_norm": 1.246916947603362, |
| "learning_rate": 1.9105691056910574e-06, |
| "loss": 0.8193, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05848903330625508, |
| "grad_norm": 1.1905103203002494, |
| "learning_rate": 1.951219512195122e-06, |
| "loss": 0.8075, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05970755483346873, |
| "grad_norm": 0.918493036902164, |
| "learning_rate": 1.991869918699187e-06, |
| "loss": 0.7842, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.060926076360682375, |
| "grad_norm": 0.961427277586569, |
| "learning_rate": 2.0325203252032523e-06, |
| "loss": 0.8206, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.062144597887896016, |
| "grad_norm": 0.7135293393398392, |
| "learning_rate": 2.073170731707317e-06, |
| "loss": 0.7784, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.06336311941510966, |
| "grad_norm": 0.8687421281930399, |
| "learning_rate": 2.1138211382113824e-06, |
| "loss": 0.7953, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06458164094232331, |
| "grad_norm": 0.8575700781368814, |
| "learning_rate": 2.154471544715447e-06, |
| "loss": 0.7926, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06580016246953696, |
| "grad_norm": 0.9435599171162209, |
| "learning_rate": 2.1951219512195125e-06, |
| "loss": 0.7766, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06701868399675061, |
| "grad_norm": 0.7215369508734659, |
| "learning_rate": 2.2357723577235773e-06, |
| "loss": 0.7686, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06823720552396426, |
| "grad_norm": 0.6329822213923535, |
| "learning_rate": 2.2764227642276426e-06, |
| "loss": 0.7868, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0694557270511779, |
| "grad_norm": 0.6559439915679887, |
| "learning_rate": 2.317073170731708e-06, |
| "loss": 0.7672, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.07067424857839155, |
| "grad_norm": 0.6424064072265188, |
| "learning_rate": 2.3577235772357727e-06, |
| "loss": 0.7876, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0718927701056052, |
| "grad_norm": 0.5670684781739027, |
| "learning_rate": 2.3983739837398375e-06, |
| "loss": 0.7545, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.07311129163281885, |
| "grad_norm": 0.62119888744641, |
| "learning_rate": 2.4390243902439027e-06, |
| "loss": 0.7778, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0743298131600325, |
| "grad_norm": 0.5945888357559133, |
| "learning_rate": 2.4796747967479676e-06, |
| "loss": 0.7593, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07554833468724614, |
| "grad_norm": 0.5566344615882963, |
| "learning_rate": 2.5203252032520324e-06, |
| "loss": 0.7783, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07676685621445979, |
| "grad_norm": 0.6010029344681969, |
| "learning_rate": 2.5609756097560977e-06, |
| "loss": 0.7824, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.07798537774167344, |
| "grad_norm": 0.5620208665027641, |
| "learning_rate": 2.601626016260163e-06, |
| "loss": 0.7538, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07920389926888709, |
| "grad_norm": 0.629839488738847, |
| "learning_rate": 2.6422764227642278e-06, |
| "loss": 0.7478, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.08042242079610074, |
| "grad_norm": 0.5843721125393191, |
| "learning_rate": 2.682926829268293e-06, |
| "loss": 0.7551, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.08164094232331438, |
| "grad_norm": 0.6807297637633912, |
| "learning_rate": 2.723577235772358e-06, |
| "loss": 0.7643, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.08285946385052803, |
| "grad_norm": 0.5337293638802343, |
| "learning_rate": 2.764227642276423e-06, |
| "loss": 0.7581, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08407798537774168, |
| "grad_norm": 0.5557859370743894, |
| "learning_rate": 2.8048780487804884e-06, |
| "loss": 0.7485, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.08529650690495533, |
| "grad_norm": 0.5518435051656209, |
| "learning_rate": 2.845528455284553e-06, |
| "loss": 0.7503, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08651502843216897, |
| "grad_norm": 0.5585475097227505, |
| "learning_rate": 2.8861788617886185e-06, |
| "loss": 0.7456, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08773354995938261, |
| "grad_norm": 0.5842062776799712, |
| "learning_rate": 2.926829268292683e-06, |
| "loss": 0.7457, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08895207148659626, |
| "grad_norm": 0.5691413037940362, |
| "learning_rate": 2.967479674796748e-06, |
| "loss": 0.7316, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0901705930138099, |
| "grad_norm": 0.592000953180703, |
| "learning_rate": 3.0081300813008134e-06, |
| "loss": 0.7379, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.09138911454102355, |
| "grad_norm": 0.4986095202064355, |
| "learning_rate": 3.0487804878048782e-06, |
| "loss": 0.7026, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0926076360682372, |
| "grad_norm": 0.5610598503101445, |
| "learning_rate": 3.0894308943089435e-06, |
| "loss": 0.7366, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09382615759545085, |
| "grad_norm": 0.5189286785140359, |
| "learning_rate": 3.1300813008130083e-06, |
| "loss": 0.7343, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0950446791226645, |
| "grad_norm": 0.5352050364602269, |
| "learning_rate": 3.1707317073170736e-06, |
| "loss": 0.7128, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09626320064987814, |
| "grad_norm": 0.589544520130887, |
| "learning_rate": 3.211382113821139e-06, |
| "loss": 0.7377, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.09748172217709179, |
| "grad_norm": 0.5170292516124821, |
| "learning_rate": 3.2520325203252037e-06, |
| "loss": 0.751, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09870024370430544, |
| "grad_norm": 0.5178115988752247, |
| "learning_rate": 3.292682926829269e-06, |
| "loss": 0.7263, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09991876523151909, |
| "grad_norm": 0.5758324455305359, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.7204, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.10113728675873274, |
| "grad_norm": 0.5191922407454059, |
| "learning_rate": 3.3739837398373986e-06, |
| "loss": 0.7323, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.10235580828594638, |
| "grad_norm": 0.5706404216543195, |
| "learning_rate": 3.414634146341464e-06, |
| "loss": 0.7343, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.10357432981316003, |
| "grad_norm": 0.5166974408338545, |
| "learning_rate": 3.4552845528455287e-06, |
| "loss": 0.7347, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.10479285134037368, |
| "grad_norm": 0.575076347441057, |
| "learning_rate": 3.495934959349594e-06, |
| "loss": 0.729, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.10601137286758733, |
| "grad_norm": 0.5503219216241421, |
| "learning_rate": 3.5365853658536588e-06, |
| "loss": 0.7247, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.10722989439480098, |
| "grad_norm": 0.5315644262103328, |
| "learning_rate": 3.577235772357724e-06, |
| "loss": 0.7188, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.10844841592201462, |
| "grad_norm": 0.5283688627559194, |
| "learning_rate": 3.6178861788617893e-06, |
| "loss": 0.7328, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.10966693744922827, |
| "grad_norm": 0.5657078936164937, |
| "learning_rate": 3.6585365853658537e-06, |
| "loss": 0.7317, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11088545897644192, |
| "grad_norm": 0.5285271136308272, |
| "learning_rate": 3.699186991869919e-06, |
| "loss": 0.7259, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.11210398050365557, |
| "grad_norm": 0.5665339591374581, |
| "learning_rate": 3.7398373983739838e-06, |
| "loss": 0.71, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.11332250203086922, |
| "grad_norm": 0.5408789367861271, |
| "learning_rate": 3.780487804878049e-06, |
| "loss": 0.7287, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.11454102355808286, |
| "grad_norm": 0.530024765222071, |
| "learning_rate": 3.821138211382115e-06, |
| "loss": 0.7158, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.11575954508529651, |
| "grad_norm": 0.525972820727265, |
| "learning_rate": 3.861788617886179e-06, |
| "loss": 0.6953, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.11697806661251016, |
| "grad_norm": 0.5538892198758983, |
| "learning_rate": 3.902439024390244e-06, |
| "loss": 0.7213, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.11819658813972381, |
| "grad_norm": 0.544929946076996, |
| "learning_rate": 3.943089430894309e-06, |
| "loss": 0.6996, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.11941510966693746, |
| "grad_norm": 0.5734775021447369, |
| "learning_rate": 3.983739837398374e-06, |
| "loss": 0.7154, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1206336311941511, |
| "grad_norm": 0.5045397032963369, |
| "learning_rate": 4.024390243902439e-06, |
| "loss": 0.7024, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.12185215272136475, |
| "grad_norm": 0.5236114609222794, |
| "learning_rate": 4.0650406504065046e-06, |
| "loss": 0.7226, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12307067424857839, |
| "grad_norm": 0.5641036987903533, |
| "learning_rate": 4.10569105691057e-06, |
| "loss": 0.7054, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.12428919577579203, |
| "grad_norm": 0.508465869676476, |
| "learning_rate": 4.146341463414634e-06, |
| "loss": 0.7003, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1255077173030057, |
| "grad_norm": 0.5202630797257376, |
| "learning_rate": 4.1869918699186995e-06, |
| "loss": 0.7204, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.12672623883021933, |
| "grad_norm": 0.5552933325377176, |
| "learning_rate": 4.227642276422765e-06, |
| "loss": 0.7279, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.127944760357433, |
| "grad_norm": 0.5416012915563714, |
| "learning_rate": 4.268292682926829e-06, |
| "loss": 0.7093, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.12916328188464662, |
| "grad_norm": 0.5104004921064896, |
| "learning_rate": 4.308943089430894e-06, |
| "loss": 0.7013, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1303818034118603, |
| "grad_norm": 0.5765782104977603, |
| "learning_rate": 4.34959349593496e-06, |
| "loss": 0.7045, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.13160032493907392, |
| "grad_norm": 0.5017287673543831, |
| "learning_rate": 4.390243902439025e-06, |
| "loss": 0.6997, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.13281884646628758, |
| "grad_norm": 0.48808935518722807, |
| "learning_rate": 4.43089430894309e-06, |
| "loss": 0.7184, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.13403736799350122, |
| "grad_norm": 0.531216252027469, |
| "learning_rate": 4.471544715447155e-06, |
| "loss": 0.7069, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.13525588952071488, |
| "grad_norm": 0.5149543102282697, |
| "learning_rate": 4.51219512195122e-06, |
| "loss": 0.7023, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1364744110479285, |
| "grad_norm": 0.5291257352871406, |
| "learning_rate": 4.552845528455285e-06, |
| "loss": 0.7185, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.13769293257514217, |
| "grad_norm": 0.47858897961333036, |
| "learning_rate": 4.59349593495935e-06, |
| "loss": 0.7149, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1389114541023558, |
| "grad_norm": 0.5359903721383661, |
| "learning_rate": 4.634146341463416e-06, |
| "loss": 0.7118, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.14012997562956944, |
| "grad_norm": 0.5023325811416011, |
| "learning_rate": 4.67479674796748e-06, |
| "loss": 0.6986, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1413484971567831, |
| "grad_norm": 0.507102678949565, |
| "learning_rate": 4.715447154471545e-06, |
| "loss": 0.7053, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.14256701868399674, |
| "grad_norm": 0.5162377887307996, |
| "learning_rate": 4.75609756097561e-06, |
| "loss": 0.6971, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1437855402112104, |
| "grad_norm": 0.5228780554370066, |
| "learning_rate": 4.796747967479675e-06, |
| "loss": 0.6915, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.14500406173842403, |
| "grad_norm": 0.5539538888660016, |
| "learning_rate": 4.83739837398374e-06, |
| "loss": 0.6979, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1462225832656377, |
| "grad_norm": 0.6135340785022244, |
| "learning_rate": 4.8780487804878055e-06, |
| "loss": 0.7197, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14744110479285133, |
| "grad_norm": 0.5261935119823417, |
| "learning_rate": 4.918699186991871e-06, |
| "loss": 0.6957, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.148659626320065, |
| "grad_norm": 0.5941876044718514, |
| "learning_rate": 4.959349593495935e-06, |
| "loss": 0.7031, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.14987814784727863, |
| "grad_norm": 0.5436866255986976, |
| "learning_rate": 5e-06, |
| "loss": 0.7068, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1510966693744923, |
| "grad_norm": 0.5295736343510782, |
| "learning_rate": 5.040650406504065e-06, |
| "loss": 0.686, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.15231519090170592, |
| "grad_norm": 0.5536691790810129, |
| "learning_rate": 5.081300813008131e-06, |
| "loss": 0.681, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.15353371242891958, |
| "grad_norm": 0.6057295035493935, |
| "learning_rate": 5.121951219512195e-06, |
| "loss": 0.7195, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.15475223395613322, |
| "grad_norm": 0.49006287650569935, |
| "learning_rate": 5.162601626016261e-06, |
| "loss": 0.6846, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.15597075548334688, |
| "grad_norm": 0.5312531193234717, |
| "learning_rate": 5.203252032520326e-06, |
| "loss": 0.7038, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1571892770105605, |
| "grad_norm": 0.5581018411430491, |
| "learning_rate": 5.243902439024391e-06, |
| "loss": 0.7184, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.15840779853777417, |
| "grad_norm": 0.5133234429893759, |
| "learning_rate": 5.2845528455284555e-06, |
| "loss": 0.6792, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1596263200649878, |
| "grad_norm": 0.5191238918744202, |
| "learning_rate": 5.32520325203252e-06, |
| "loss": 0.6921, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.16084484159220147, |
| "grad_norm": 0.6134860482308477, |
| "learning_rate": 5.365853658536586e-06, |
| "loss": 0.6764, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1620633631194151, |
| "grad_norm": 0.6271296104201189, |
| "learning_rate": 5.4065040650406504e-06, |
| "loss": 0.6829, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.16328188464662877, |
| "grad_norm": 0.6385973016730124, |
| "learning_rate": 5.447154471544716e-06, |
| "loss": 0.6957, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.1645004061738424, |
| "grad_norm": 0.5686127235373731, |
| "learning_rate": 5.487804878048781e-06, |
| "loss": 0.698, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.16571892770105606, |
| "grad_norm": 0.6833156687934043, |
| "learning_rate": 5.528455284552846e-06, |
| "loss": 0.6851, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1669374492282697, |
| "grad_norm": 0.700308205780514, |
| "learning_rate": 5.569105691056911e-06, |
| "loss": 0.6806, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.16815597075548336, |
| "grad_norm": 0.5462617932277235, |
| "learning_rate": 5.609756097560977e-06, |
| "loss": 0.7091, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.169374492282697, |
| "grad_norm": 0.7852807393490868, |
| "learning_rate": 5.650406504065041e-06, |
| "loss": 0.7123, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.17059301380991065, |
| "grad_norm": 0.6057177569541422, |
| "learning_rate": 5.691056910569106e-06, |
| "loss": 0.7078, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1718115353371243, |
| "grad_norm": 0.6623630085315475, |
| "learning_rate": 5.731707317073171e-06, |
| "loss": 0.6685, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.17303005686433795, |
| "grad_norm": 0.6456913396682378, |
| "learning_rate": 5.772357723577237e-06, |
| "loss": 0.7076, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.17424857839155158, |
| "grad_norm": 0.5490338737139909, |
| "learning_rate": 5.813008130081301e-06, |
| "loss": 0.6679, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.17546709991876522, |
| "grad_norm": 0.6684035267057585, |
| "learning_rate": 5.853658536585366e-06, |
| "loss": 0.6862, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.17668562144597888, |
| "grad_norm": 0.5217072648450134, |
| "learning_rate": 5.894308943089432e-06, |
| "loss": 0.6846, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.17790414297319251, |
| "grad_norm": 0.5999701195487821, |
| "learning_rate": 5.934959349593496e-06, |
| "loss": 0.6971, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.17912266450040618, |
| "grad_norm": 0.6672016415742844, |
| "learning_rate": 5.9756097560975615e-06, |
| "loss": 0.7023, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.1803411860276198, |
| "grad_norm": 0.5638253505945849, |
| "learning_rate": 6.016260162601627e-06, |
| "loss": 0.6718, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.18155970755483347, |
| "grad_norm": 0.5443164616899534, |
| "learning_rate": 6.056910569105692e-06, |
| "loss": 0.678, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.1827782290820471, |
| "grad_norm": 0.5515636708718161, |
| "learning_rate": 6.0975609756097564e-06, |
| "loss": 0.6914, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.18399675060926077, |
| "grad_norm": 0.6385969983161707, |
| "learning_rate": 6.138211382113821e-06, |
| "loss": 0.6796, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1852152721364744, |
| "grad_norm": 0.6113406592810082, |
| "learning_rate": 6.178861788617887e-06, |
| "loss": 0.682, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.18643379366368806, |
| "grad_norm": 0.6906350808865743, |
| "learning_rate": 6.219512195121951e-06, |
| "loss": 0.6671, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.1876523151909017, |
| "grad_norm": 0.7020113339328089, |
| "learning_rate": 6.260162601626017e-06, |
| "loss": 0.6835, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.18887083671811536, |
| "grad_norm": 0.5548828056807938, |
| "learning_rate": 6.300813008130082e-06, |
| "loss": 0.6809, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.190089358245329, |
| "grad_norm": 0.8352572415357199, |
| "learning_rate": 6.341463414634147e-06, |
| "loss": 0.6809, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.19130787977254265, |
| "grad_norm": 0.6517742914384106, |
| "learning_rate": 6.3821138211382115e-06, |
| "loss": 0.6791, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1925264012997563, |
| "grad_norm": 0.6204344146843959, |
| "learning_rate": 6.422764227642278e-06, |
| "loss": 0.6766, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.19374492282696995, |
| "grad_norm": 0.8219899409754744, |
| "learning_rate": 6.463414634146342e-06, |
| "loss": 0.6726, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.19496344435418358, |
| "grad_norm": 0.6541183549209502, |
| "learning_rate": 6.504065040650407e-06, |
| "loss": 0.6781, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.19618196588139725, |
| "grad_norm": 0.566310879262149, |
| "learning_rate": 6.544715447154472e-06, |
| "loss": 0.6702, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.19740048740861088, |
| "grad_norm": 0.775755089339994, |
| "learning_rate": 6.585365853658538e-06, |
| "loss": 0.6905, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.19861900893582454, |
| "grad_norm": 0.6288678821845954, |
| "learning_rate": 6.626016260162602e-06, |
| "loss": 0.6881, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.19983753046303818, |
| "grad_norm": 0.7640676261377178, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.6737, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.20105605199025184, |
| "grad_norm": 0.5731637259066372, |
| "learning_rate": 6.707317073170733e-06, |
| "loss": 0.674, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.20227457351746547, |
| "grad_norm": 0.7761516718399211, |
| "learning_rate": 6.747967479674797e-06, |
| "loss": 0.6928, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.20349309504467913, |
| "grad_norm": 0.7100095841961804, |
| "learning_rate": 6.788617886178862e-06, |
| "loss": 0.6727, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.20471161657189277, |
| "grad_norm": 0.569930635478734, |
| "learning_rate": 6.829268292682928e-06, |
| "loss": 0.689, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.20593013809910643, |
| "grad_norm": 0.7691268212355195, |
| "learning_rate": 6.869918699186993e-06, |
| "loss": 0.6973, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.20714865962632006, |
| "grad_norm": 0.560097362553805, |
| "learning_rate": 6.910569105691057e-06, |
| "loss": 0.6681, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.20836718115353373, |
| "grad_norm": 0.6849837037178143, |
| "learning_rate": 6.951219512195122e-06, |
| "loss": 0.6592, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.20958570268074736, |
| "grad_norm": 0.7951681541297303, |
| "learning_rate": 6.991869918699188e-06, |
| "loss": 0.6812, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.210804224207961, |
| "grad_norm": 0.5428585266109707, |
| "learning_rate": 7.032520325203252e-06, |
| "loss": 0.696, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.21202274573517466, |
| "grad_norm": 0.7462142080092842, |
| "learning_rate": 7.0731707317073175e-06, |
| "loss": 0.6793, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2132412672623883, |
| "grad_norm": 0.6370138105851062, |
| "learning_rate": 7.113821138211383e-06, |
| "loss": 0.6717, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.21445978878960195, |
| "grad_norm": 0.566025113423941, |
| "learning_rate": 7.154471544715448e-06, |
| "loss": 0.6694, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.21567831031681559, |
| "grad_norm": 0.6632467338949928, |
| "learning_rate": 7.1951219512195125e-06, |
| "loss": 0.679, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.21689683184402925, |
| "grad_norm": 0.5775437329049822, |
| "learning_rate": 7.2357723577235786e-06, |
| "loss": 0.6738, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.21811535337124288, |
| "grad_norm": 0.6763254821774859, |
| "learning_rate": 7.276422764227643e-06, |
| "loss": 0.6885, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.21933387489845654, |
| "grad_norm": 0.6525555364778458, |
| "learning_rate": 7.317073170731707e-06, |
| "loss": 0.6829, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.22055239642567018, |
| "grad_norm": 0.6376488223620492, |
| "learning_rate": 7.357723577235773e-06, |
| "loss": 0.6611, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.22177091795288384, |
| "grad_norm": 0.6135443136132807, |
| "learning_rate": 7.398373983739838e-06, |
| "loss": 0.6875, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.22298943948009747, |
| "grad_norm": 0.6616707267536054, |
| "learning_rate": 7.439024390243903e-06, |
| "loss": 0.6637, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.22420796100731114, |
| "grad_norm": 0.6601543949811752, |
| "learning_rate": 7.4796747967479676e-06, |
| "loss": 0.6714, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.22542648253452477, |
| "grad_norm": 0.689531862633905, |
| "learning_rate": 7.520325203252034e-06, |
| "loss": 0.6717, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.22664500406173843, |
| "grad_norm": 0.6693067594219624, |
| "learning_rate": 7.560975609756098e-06, |
| "loss": 0.6538, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.22786352558895206, |
| "grad_norm": 0.6877489613732909, |
| "learning_rate": 7.601626016260163e-06, |
| "loss": 0.6791, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.22908204711616573, |
| "grad_norm": 0.6102935004924294, |
| "learning_rate": 7.64227642276423e-06, |
| "loss": 0.6697, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.23030056864337936, |
| "grad_norm": 0.7109056322063843, |
| "learning_rate": 7.682926829268293e-06, |
| "loss": 0.679, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.23151909017059302, |
| "grad_norm": 0.6183616410187914, |
| "learning_rate": 7.723577235772358e-06, |
| "loss": 0.662, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.23273761169780666, |
| "grad_norm": 0.6117992409555106, |
| "learning_rate": 7.764227642276424e-06, |
| "loss": 0.6671, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.23395613322502032, |
| "grad_norm": 0.7288006220266883, |
| "learning_rate": 7.804878048780489e-06, |
| "loss": 0.7049, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.23517465475223395, |
| "grad_norm": 0.6795369812377434, |
| "learning_rate": 7.845528455284554e-06, |
| "loss": 0.6638, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.23639317627944761, |
| "grad_norm": 0.6336366896960686, |
| "learning_rate": 7.886178861788618e-06, |
| "loss": 0.6738, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.23761169780666125, |
| "grad_norm": 0.7184491761674651, |
| "learning_rate": 7.926829268292685e-06, |
| "loss": 0.6628, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2388302193338749, |
| "grad_norm": 0.659177288266525, |
| "learning_rate": 7.967479674796748e-06, |
| "loss": 0.6805, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.24004874086108854, |
| "grad_norm": 0.578465157473097, |
| "learning_rate": 8.008130081300813e-06, |
| "loss": 0.6894, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2412672623883022, |
| "grad_norm": 0.6868825593189901, |
| "learning_rate": 8.048780487804879e-06, |
| "loss": 0.6847, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.24248578391551584, |
| "grad_norm": 0.6185564483778565, |
| "learning_rate": 8.089430894308944e-06, |
| "loss": 0.6667, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.2437043054427295, |
| "grad_norm": 0.7195682220690447, |
| "learning_rate": 8.130081300813009e-06, |
| "loss": 0.6681, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.24492282696994314, |
| "grad_norm": 0.5988887592571761, |
| "learning_rate": 8.170731707317073e-06, |
| "loss": 0.663, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.24614134849715677, |
| "grad_norm": 0.6728160874756142, |
| "learning_rate": 8.21138211382114e-06, |
| "loss": 0.6688, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.24735987002437043, |
| "grad_norm": 0.6167676512934452, |
| "learning_rate": 8.252032520325203e-06, |
| "loss": 0.6597, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.24857839155158407, |
| "grad_norm": 0.6963420894322225, |
| "learning_rate": 8.292682926829268e-06, |
| "loss": 0.6739, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.24979691307879773, |
| "grad_norm": 0.6374482813383724, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6782, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.2510154346060114, |
| "grad_norm": 0.7061607202293944, |
| "learning_rate": 8.373983739837399e-06, |
| "loss": 0.6784, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.252233956133225, |
| "grad_norm": 0.6588145062673179, |
| "learning_rate": 8.414634146341464e-06, |
| "loss": 0.6785, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.25345247766043866, |
| "grad_norm": 0.5575233234513415, |
| "learning_rate": 8.45528455284553e-06, |
| "loss": 0.659, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2546709991876523, |
| "grad_norm": 0.660881658687861, |
| "learning_rate": 8.495934959349595e-06, |
| "loss": 0.672, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.255889520714866, |
| "grad_norm": 0.8528802704630485, |
| "learning_rate": 8.536585365853658e-06, |
| "loss": 0.6758, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2571080422420796, |
| "grad_norm": 0.5423728405429434, |
| "learning_rate": 8.577235772357724e-06, |
| "loss": 0.6526, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.25832656376929325, |
| "grad_norm": 0.7828577081027186, |
| "learning_rate": 8.617886178861789e-06, |
| "loss": 0.6619, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.2595450852965069, |
| "grad_norm": 0.683409796151077, |
| "learning_rate": 8.658536585365854e-06, |
| "loss": 0.6609, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.2607636068237206, |
| "grad_norm": 0.7965663856791516, |
| "learning_rate": 8.69918699186992e-06, |
| "loss": 0.6771, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2619821283509342, |
| "grad_norm": 0.5296355193224814, |
| "learning_rate": 8.739837398373985e-06, |
| "loss": 0.6631, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.26320064987814784, |
| "grad_norm": 0.6996343517682662, |
| "learning_rate": 8.78048780487805e-06, |
| "loss": 0.6778, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2644191714053615, |
| "grad_norm": 0.5685534992796657, |
| "learning_rate": 8.821138211382113e-06, |
| "loss": 0.6542, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.26563769293257516, |
| "grad_norm": 0.5812188798996168, |
| "learning_rate": 8.86178861788618e-06, |
| "loss": 0.6665, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2668562144597888, |
| "grad_norm": 0.5872098802829594, |
| "learning_rate": 8.902439024390244e-06, |
| "loss": 0.6793, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.26807473598700243, |
| "grad_norm": 0.6138295440628797, |
| "learning_rate": 8.94308943089431e-06, |
| "loss": 0.6845, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.26929325751421607, |
| "grad_norm": 0.5555943793492026, |
| "learning_rate": 8.983739837398374e-06, |
| "loss": 0.6456, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.27051177904142976, |
| "grad_norm": 0.5716563368438368, |
| "learning_rate": 9.02439024390244e-06, |
| "loss": 0.6881, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2717303005686434, |
| "grad_norm": 0.5657678117161605, |
| "learning_rate": 9.065040650406505e-06, |
| "loss": 0.6721, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.272948822095857, |
| "grad_norm": 0.6494717196083141, |
| "learning_rate": 9.10569105691057e-06, |
| "loss": 0.6856, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.27416734362307066, |
| "grad_norm": 0.5769888402008441, |
| "learning_rate": 9.146341463414635e-06, |
| "loss": 0.65, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.27538586515028435, |
| "grad_norm": 0.6372976291357912, |
| "learning_rate": 9.1869918699187e-06, |
| "loss": 0.679, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.276604386677498, |
| "grad_norm": 0.61020413240267, |
| "learning_rate": 9.227642276422764e-06, |
| "loss": 0.6462, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.2778229082047116, |
| "grad_norm": 0.7347279477946881, |
| "learning_rate": 9.268292682926831e-06, |
| "loss": 0.6502, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.27904142973192525, |
| "grad_norm": 0.6513241840116742, |
| "learning_rate": 9.308943089430895e-06, |
| "loss": 0.6707, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2802599512591389, |
| "grad_norm": 0.6578226137183896, |
| "learning_rate": 9.34959349593496e-06, |
| "loss": 0.6559, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2814784727863526, |
| "grad_norm": 0.6443126179924461, |
| "learning_rate": 9.390243902439025e-06, |
| "loss": 0.6815, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2826969943135662, |
| "grad_norm": 0.5681908489104979, |
| "learning_rate": 9.43089430894309e-06, |
| "loss": 0.6483, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.28391551584077984, |
| "grad_norm": 0.638868530973396, |
| "learning_rate": 9.471544715447156e-06, |
| "loss": 0.6663, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.2851340373679935, |
| "grad_norm": 0.5345735736702238, |
| "learning_rate": 9.51219512195122e-06, |
| "loss": 0.6507, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.28635255889520717, |
| "grad_norm": 0.6170557049684545, |
| "learning_rate": 9.552845528455286e-06, |
| "loss": 0.6533, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2875710804224208, |
| "grad_norm": 0.6282001318911594, |
| "learning_rate": 9.59349593495935e-06, |
| "loss": 0.6715, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.28878960194963443, |
| "grad_norm": 0.548783110101442, |
| "learning_rate": 9.634146341463415e-06, |
| "loss": 0.6536, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.29000812347684807, |
| "grad_norm": 0.6300302160047813, |
| "learning_rate": 9.67479674796748e-06, |
| "loss": 0.657, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.29122664500406176, |
| "grad_norm": 0.5955216072274768, |
| "learning_rate": 9.715447154471546e-06, |
| "loss": 0.6767, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2924451665312754, |
| "grad_norm": 0.6216921714562351, |
| "learning_rate": 9.756097560975611e-06, |
| "loss": 0.6492, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.293663688058489, |
| "grad_norm": 0.6909539613975563, |
| "learning_rate": 9.796747967479675e-06, |
| "loss": 0.6618, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.29488220958570266, |
| "grad_norm": 0.8137292747107515, |
| "learning_rate": 9.837398373983741e-06, |
| "loss": 0.6614, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.29610073111291635, |
| "grad_norm": 0.5855911517789665, |
| "learning_rate": 9.878048780487805e-06, |
| "loss": 0.6561, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.29731925264013, |
| "grad_norm": 0.8851136874577217, |
| "learning_rate": 9.91869918699187e-06, |
| "loss": 0.6498, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2985377741673436, |
| "grad_norm": 0.57227502230073, |
| "learning_rate": 9.959349593495936e-06, |
| "loss": 0.6606, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.29975629569455725, |
| "grad_norm": 0.9576157821693805, |
| "learning_rate": 1e-05, |
| "loss": 0.648, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.30097481722177094, |
| "grad_norm": 0.574426873878406, |
| "learning_rate": 9.999994966333388e-06, |
| "loss": 0.6543, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.3021933387489846, |
| "grad_norm": 0.7230465083023617, |
| "learning_rate": 9.99997986534369e-06, |
| "loss": 0.6654, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3034118602761982, |
| "grad_norm": 0.5421626680587527, |
| "learning_rate": 9.999954697061305e-06, |
| "loss": 0.6343, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.30463038180341184, |
| "grad_norm": 0.6129301937842085, |
| "learning_rate": 9.999919461536915e-06, |
| "loss": 0.6449, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.30584890333062553, |
| "grad_norm": 0.563497786259594, |
| "learning_rate": 9.999874158841462e-06, |
| "loss": 0.66, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.30706742485783917, |
| "grad_norm": 0.6709530297921161, |
| "learning_rate": 9.999818789066164e-06, |
| "loss": 0.6575, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.3082859463850528, |
| "grad_norm": 0.6033112191541231, |
| "learning_rate": 9.999753352322502e-06, |
| "loss": 0.6745, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.30950446791226643, |
| "grad_norm": 0.7085418197042371, |
| "learning_rate": 9.999677848742238e-06, |
| "loss": 0.645, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3107229894394801, |
| "grad_norm": 0.6149439429340515, |
| "learning_rate": 9.999592278477389e-06, |
| "loss": 0.6553, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.31194151096669376, |
| "grad_norm": 0.5361824485289747, |
| "learning_rate": 9.999496641700249e-06, |
| "loss": 0.6394, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3131600324939074, |
| "grad_norm": 0.7876266919973667, |
| "learning_rate": 9.99939093860338e-06, |
| "loss": 0.651, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.314378554021121, |
| "grad_norm": 0.5240336550865616, |
| "learning_rate": 9.999275169399614e-06, |
| "loss": 0.6445, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.31559707554833466, |
| "grad_norm": 0.9003012478867778, |
| "learning_rate": 9.999149334322047e-06, |
| "loss": 0.6759, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.31681559707554835, |
| "grad_norm": 0.520552428762164, |
| "learning_rate": 9.999013433624042e-06, |
| "loss": 0.6656, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.318034118602762, |
| "grad_norm": 0.8451285058918907, |
| "learning_rate": 9.998867467579234e-06, |
| "loss": 0.6393, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.3192526401299756, |
| "grad_norm": 0.6368634173244008, |
| "learning_rate": 9.998711436481519e-06, |
| "loss": 0.6544, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.32047116165718925, |
| "grad_norm": 0.690099709138949, |
| "learning_rate": 9.998545340645058e-06, |
| "loss": 0.6609, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.32168968318440294, |
| "grad_norm": 0.7144861500132949, |
| "learning_rate": 9.998369180404283e-06, |
| "loss": 0.6647, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3229082047116166, |
| "grad_norm": 0.6362319514002672, |
| "learning_rate": 9.998182956113885e-06, |
| "loss": 0.6533, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.3241267262388302, |
| "grad_norm": 0.6488964510495924, |
| "learning_rate": 9.99798666814882e-06, |
| "loss": 0.6504, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.32534524776604384, |
| "grad_norm": 0.6063198470537309, |
| "learning_rate": 9.99778031690431e-06, |
| "loss": 0.6563, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.32656376929325753, |
| "grad_norm": 0.5938533025522102, |
| "learning_rate": 9.997563902795834e-06, |
| "loss": 0.6675, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.32778229082047117, |
| "grad_norm": 0.7515871090930308, |
| "learning_rate": 9.997337426259134e-06, |
| "loss": 0.6792, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.3290008123476848, |
| "grad_norm": 0.703279934707329, |
| "learning_rate": 9.997100887750215e-06, |
| "loss": 0.6635, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.33021933387489844, |
| "grad_norm": 0.695544945955001, |
| "learning_rate": 9.996854287745337e-06, |
| "loss": 0.645, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.3314378554021121, |
| "grad_norm": 0.7462833994362996, |
| "learning_rate": 9.996597626741023e-06, |
| "loss": 0.6478, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.33265637692932576, |
| "grad_norm": 0.6876699055946316, |
| "learning_rate": 9.99633090525405e-06, |
| "loss": 0.6495, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.3338748984565394, |
| "grad_norm": 0.6161949269900944, |
| "learning_rate": 9.996054123821455e-06, |
| "loss": 0.6477, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.335093419983753, |
| "grad_norm": 0.6992818714334844, |
| "learning_rate": 9.995767283000526e-06, |
| "loss": 0.6471, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3363119415109667, |
| "grad_norm": 0.6649545633189144, |
| "learning_rate": 9.995470383368808e-06, |
| "loss": 0.6526, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.33753046303818035, |
| "grad_norm": 0.7069772548058584, |
| "learning_rate": 9.995163425524097e-06, |
| "loss": 0.6622, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.338748984565394, |
| "grad_norm": 0.7343365884623839, |
| "learning_rate": 9.994846410084447e-06, |
| "loss": 0.6401, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.3399675060926076, |
| "grad_norm": 0.7666383023534878, |
| "learning_rate": 9.994519337688152e-06, |
| "loss": 0.6351, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.3411860276198213, |
| "grad_norm": 0.7101687784996984, |
| "learning_rate": 9.994182208993766e-06, |
| "loss": 0.6686, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.34240454914703494, |
| "grad_norm": 0.794098416336116, |
| "learning_rate": 9.993835024680084e-06, |
| "loss": 0.6534, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3436230706742486, |
| "grad_norm": 0.6476191969862704, |
| "learning_rate": 9.993477785446151e-06, |
| "loss": 0.6321, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.3448415922014622, |
| "grad_norm": 0.7027462161925977, |
| "learning_rate": 9.993110492011256e-06, |
| "loss": 0.6677, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3460601137286759, |
| "grad_norm": 0.7368948502336647, |
| "learning_rate": 9.992733145114932e-06, |
| "loss": 0.6332, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.34727863525588953, |
| "grad_norm": 0.769793462172428, |
| "learning_rate": 9.992345745516954e-06, |
| "loss": 0.6627, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.34849715678310317, |
| "grad_norm": 0.6391657112532801, |
| "learning_rate": 9.99194829399734e-06, |
| "loss": 0.6364, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3497156783103168, |
| "grad_norm": 0.8671328129231476, |
| "learning_rate": 9.991540791356342e-06, |
| "loss": 0.6558, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.35093419983753044, |
| "grad_norm": 0.6143371180878986, |
| "learning_rate": 9.991123238414455e-06, |
| "loss": 0.6725, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3521527213647441, |
| "grad_norm": 0.7114612477683598, |
| "learning_rate": 9.99069563601241e-06, |
| "loss": 0.6386, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.35337124289195776, |
| "grad_norm": 0.5910112375855043, |
| "learning_rate": 9.990257985011168e-06, |
| "loss": 0.6648, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3545897644191714, |
| "grad_norm": 0.6709399619542642, |
| "learning_rate": 9.989810286291923e-06, |
| "loss": 0.6641, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.35580828594638503, |
| "grad_norm": 0.5876086675256037, |
| "learning_rate": 9.989352540756103e-06, |
| "loss": 0.6519, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3570268074735987, |
| "grad_norm": 0.4993245470857056, |
| "learning_rate": 9.988884749325366e-06, |
| "loss": 0.6409, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.35824532900081235, |
| "grad_norm": 0.6361394412220084, |
| "learning_rate": 9.988406912941591e-06, |
| "loss": 0.6543, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.359463850528026, |
| "grad_norm": 0.5972665446098098, |
| "learning_rate": 9.987919032566885e-06, |
| "loss": 0.6379, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.3606823720552396, |
| "grad_norm": 0.5332779981117456, |
| "learning_rate": 9.987421109183581e-06, |
| "loss": 0.6362, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.3619008935824533, |
| "grad_norm": 0.6057994457076236, |
| "learning_rate": 9.986913143794232e-06, |
| "loss": 0.6455, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.36311941510966694, |
| "grad_norm": 0.6075132715056499, |
| "learning_rate": 9.986395137421607e-06, |
| "loss": 0.6624, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3643379366368806, |
| "grad_norm": 0.5258247408109219, |
| "learning_rate": 9.985867091108697e-06, |
| "loss": 0.638, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3655564581640942, |
| "grad_norm": 0.5267906230313797, |
| "learning_rate": 9.985329005918702e-06, |
| "loss": 0.6362, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3667749796913079, |
| "grad_norm": 0.5638416352250496, |
| "learning_rate": 9.984780882935043e-06, |
| "loss": 0.6301, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.36799350121852153, |
| "grad_norm": 0.545011579464239, |
| "learning_rate": 9.984222723261344e-06, |
| "loss": 0.6599, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.36921202274573517, |
| "grad_norm": 0.5606014722546357, |
| "learning_rate": 9.983654528021442e-06, |
| "loss": 0.6542, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.3704305442729488, |
| "grad_norm": 0.6018343388636366, |
| "learning_rate": 9.98307629835938e-06, |
| "loss": 0.6368, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.3716490658001625, |
| "grad_norm": 0.6118602452372705, |
| "learning_rate": 9.982488035439401e-06, |
| "loss": 0.6513, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3728675873273761, |
| "grad_norm": 0.6022653337990805, |
| "learning_rate": 9.981889740445958e-06, |
| "loss": 0.6496, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.37408610885458976, |
| "grad_norm": 0.569004250440184, |
| "learning_rate": 9.981281414583693e-06, |
| "loss": 0.6598, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3753046303818034, |
| "grad_norm": 0.5713014740165444, |
| "learning_rate": 9.980663059077453e-06, |
| "loss": 0.6613, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3765231519090171, |
| "grad_norm": 0.6154580840564017, |
| "learning_rate": 9.980034675172274e-06, |
| "loss": 0.6442, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3777416734362307, |
| "grad_norm": 0.5917553562402863, |
| "learning_rate": 9.979396264133388e-06, |
| "loss": 0.6431, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.37896019496344435, |
| "grad_norm": 0.578864320620872, |
| "learning_rate": 9.978747827246214e-06, |
| "loss": 0.6589, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.380178716490658, |
| "grad_norm": 0.6460070122884725, |
| "learning_rate": 9.978089365816357e-06, |
| "loss": 0.6267, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3813972380178717, |
| "grad_norm": 0.6165901634865715, |
| "learning_rate": 9.977420881169607e-06, |
| "loss": 0.6357, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3826157595450853, |
| "grad_norm": 0.6862027434641219, |
| "learning_rate": 9.976742374651936e-06, |
| "loss": 0.6607, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.38383428107229894, |
| "grad_norm": 0.6447789605505084, |
| "learning_rate": 9.976053847629496e-06, |
| "loss": 0.6464, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3850528025995126, |
| "grad_norm": 0.597882927094437, |
| "learning_rate": 9.97535530148861e-06, |
| "loss": 0.6337, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3862713241267262, |
| "grad_norm": 0.6296819593414332, |
| "learning_rate": 9.974646737635781e-06, |
| "loss": 0.6474, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.3874898456539399, |
| "grad_norm": 0.6313838311506389, |
| "learning_rate": 9.973928157497675e-06, |
| "loss": 0.6289, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.38870836718115354, |
| "grad_norm": 0.6255452790127047, |
| "learning_rate": 9.97319956252113e-06, |
| "loss": 0.6418, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.38992688870836717, |
| "grad_norm": 0.501125482187719, |
| "learning_rate": 9.972460954173149e-06, |
| "loss": 0.6469, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3911454102355808, |
| "grad_norm": 0.5644277137540713, |
| "learning_rate": 9.971712333940896e-06, |
| "loss": 0.6431, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.3923639317627945, |
| "grad_norm": 0.5401625089221826, |
| "learning_rate": 9.970953703331692e-06, |
| "loss": 0.6399, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3935824532900081, |
| "grad_norm": 0.6126970579614653, |
| "learning_rate": 9.970185063873012e-06, |
| "loss": 0.6312, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.39480097481722176, |
| "grad_norm": 0.6237167355625934, |
| "learning_rate": 9.969406417112489e-06, |
| "loss": 0.6492, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3960194963444354, |
| "grad_norm": 0.6083530680570769, |
| "learning_rate": 9.9686177646179e-06, |
| "loss": 0.6404, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3972380178716491, |
| "grad_norm": 0.6156210783234582, |
| "learning_rate": 9.967819107977175e-06, |
| "loss": 0.626, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3984565393988627, |
| "grad_norm": 0.6913246389420981, |
| "learning_rate": 9.967010448798376e-06, |
| "loss": 0.6464, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.39967506092607635, |
| "grad_norm": 0.6430895031047548, |
| "learning_rate": 9.966191788709716e-06, |
| "loss": 0.6482, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.40089358245329, |
| "grad_norm": 0.670581453307023, |
| "learning_rate": 9.965363129359537e-06, |
| "loss": 0.649, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.4021121039805037, |
| "grad_norm": 0.6373745499675882, |
| "learning_rate": 9.964524472416319e-06, |
| "loss": 0.6231, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4033306255077173, |
| "grad_norm": 0.5729524017518108, |
| "learning_rate": 9.96367581956867e-06, |
| "loss": 0.639, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.40454914703493094, |
| "grad_norm": 0.60528048612915, |
| "learning_rate": 9.962817172525323e-06, |
| "loss": 0.6412, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.4057676685621446, |
| "grad_norm": 0.5439146819119978, |
| "learning_rate": 9.961948533015135e-06, |
| "loss": 0.6463, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.40698619008935827, |
| "grad_norm": 0.6696342043794363, |
| "learning_rate": 9.961069902787082e-06, |
| "loss": 0.6559, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4082047116165719, |
| "grad_norm": 0.6137113821251218, |
| "learning_rate": 9.96018128361026e-06, |
| "loss": 0.6186, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.40942323314378554, |
| "grad_norm": 0.7521896228588043, |
| "learning_rate": 9.959282677273869e-06, |
| "loss": 0.6585, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.41064175467099917, |
| "grad_norm": 0.6161644621872354, |
| "learning_rate": 9.958374085587228e-06, |
| "loss": 0.6511, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.41186027619821286, |
| "grad_norm": 0.6232166791838529, |
| "learning_rate": 9.957455510379753e-06, |
| "loss": 0.6421, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4130787977254265, |
| "grad_norm": 0.6575837363786434, |
| "learning_rate": 9.956526953500965e-06, |
| "loss": 0.6288, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.41429731925264013, |
| "grad_norm": 0.624761687952515, |
| "learning_rate": 9.955588416820482e-06, |
| "loss": 0.6397, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.41551584077985376, |
| "grad_norm": 0.6332930756907055, |
| "learning_rate": 9.954639902228018e-06, |
| "loss": 0.6444, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.41673436230706745, |
| "grad_norm": 0.5746664206825376, |
| "learning_rate": 9.953681411633376e-06, |
| "loss": 0.6414, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.4179528838342811, |
| "grad_norm": 0.6762777021979247, |
| "learning_rate": 9.952712946966441e-06, |
| "loss": 0.6306, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.4191714053614947, |
| "grad_norm": 0.6244129529931802, |
| "learning_rate": 9.951734510177187e-06, |
| "loss": 0.6366, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.42038992688870835, |
| "grad_norm": 0.6226787509569254, |
| "learning_rate": 9.950746103235663e-06, |
| "loss": 0.6302, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.421608448415922, |
| "grad_norm": 0.6520199261370837, |
| "learning_rate": 9.949747728131994e-06, |
| "loss": 0.6816, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.4228269699431357, |
| "grad_norm": 0.6026134976644628, |
| "learning_rate": 9.948739386876376e-06, |
| "loss": 0.6385, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.4240454914703493, |
| "grad_norm": 0.6012466224483265, |
| "learning_rate": 9.947721081499068e-06, |
| "loss": 0.6458, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.42526401299756295, |
| "grad_norm": 0.5524226925373649, |
| "learning_rate": 9.946692814050396e-06, |
| "loss": 0.6281, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.4264825345247766, |
| "grad_norm": 0.6055953304742949, |
| "learning_rate": 9.945654586600741e-06, |
| "loss": 0.6467, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.42770105605199027, |
| "grad_norm": 0.586137745210729, |
| "learning_rate": 9.944606401240538e-06, |
| "loss": 0.6379, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.4289195775792039, |
| "grad_norm": 0.5125599093697626, |
| "learning_rate": 9.943548260080277e-06, |
| "loss": 0.6523, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.43013809910641754, |
| "grad_norm": 0.6305973658118967, |
| "learning_rate": 9.942480165250487e-06, |
| "loss": 0.6389, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.43135662063363117, |
| "grad_norm": 0.5220411272087411, |
| "learning_rate": 9.941402118901743e-06, |
| "loss": 0.6425, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.43257514216084486, |
| "grad_norm": 0.5753441957701829, |
| "learning_rate": 9.940314123204656e-06, |
| "loss": 0.6441, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.4337936636880585, |
| "grad_norm": 0.584328279121849, |
| "learning_rate": 9.939216180349864e-06, |
| "loss": 0.6359, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.43501218521527213, |
| "grad_norm": 0.6135441335146246, |
| "learning_rate": 9.938108292548044e-06, |
| "loss": 0.6267, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.43623070674248576, |
| "grad_norm": 0.5429972724232678, |
| "learning_rate": 9.93699046202989e-06, |
| "loss": 0.611, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.43744922826969945, |
| "grad_norm": 0.6487815842031103, |
| "learning_rate": 9.935862691046114e-06, |
| "loss": 0.6395, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.4386677497969131, |
| "grad_norm": 0.5638558609882317, |
| "learning_rate": 9.934724981867447e-06, |
| "loss": 0.6398, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4398862713241267, |
| "grad_norm": 0.7915256825394801, |
| "learning_rate": 9.93357733678463e-06, |
| "loss": 0.6275, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.44110479285134035, |
| "grad_norm": 0.6072564790199728, |
| "learning_rate": 9.932419758108403e-06, |
| "loss": 0.6313, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.44232331437855404, |
| "grad_norm": 0.7829204972438968, |
| "learning_rate": 9.931252248169518e-06, |
| "loss": 0.6334, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.4435418359057677, |
| "grad_norm": 0.6029448727505217, |
| "learning_rate": 9.930074809318714e-06, |
| "loss": 0.6469, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.4447603574329813, |
| "grad_norm": 0.6793840267075067, |
| "learning_rate": 9.928887443926725e-06, |
| "loss": 0.6334, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.44597887896019495, |
| "grad_norm": 0.5488302948299049, |
| "learning_rate": 9.927690154384273e-06, |
| "loss": 0.6213, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.44719740048740864, |
| "grad_norm": 0.7346734434148855, |
| "learning_rate": 9.92648294310206e-06, |
| "loss": 0.6295, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.44841592201462227, |
| "grad_norm": 0.7457059967309784, |
| "learning_rate": 9.925265812510767e-06, |
| "loss": 0.6379, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4496344435418359, |
| "grad_norm": 0.621543177481449, |
| "learning_rate": 9.924038765061042e-06, |
| "loss": 0.641, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.45085296506904954, |
| "grad_norm": 0.8188643504363363, |
| "learning_rate": 9.922801803223506e-06, |
| "loss": 0.6481, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.45207148659626323, |
| "grad_norm": 0.6040894853255576, |
| "learning_rate": 9.921554929488741e-06, |
| "loss": 0.6493, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.45329000812347686, |
| "grad_norm": 0.8455545003582287, |
| "learning_rate": 9.920298146367287e-06, |
| "loss": 0.6436, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.4545085296506905, |
| "grad_norm": 0.626392939964308, |
| "learning_rate": 9.919031456389632e-06, |
| "loss": 0.6303, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.45572705117790413, |
| "grad_norm": 0.7483260656404666, |
| "learning_rate": 9.917754862106216e-06, |
| "loss": 0.6306, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.45694557270511776, |
| "grad_norm": 0.6122181327172058, |
| "learning_rate": 9.916468366087418e-06, |
| "loss": 0.6409, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.45816409423233145, |
| "grad_norm": 0.5593648989087618, |
| "learning_rate": 9.915171970923556e-06, |
| "loss": 0.6583, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4593826157595451, |
| "grad_norm": 0.7626157086282944, |
| "learning_rate": 9.913865679224876e-06, |
| "loss": 0.648, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.4606011372867587, |
| "grad_norm": 0.5027545868887003, |
| "learning_rate": 9.912549493621555e-06, |
| "loss": 0.6378, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.46181965881397236, |
| "grad_norm": 0.6593540069533284, |
| "learning_rate": 9.911223416763689e-06, |
| "loss": 0.6487, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.46303818034118605, |
| "grad_norm": 0.7507657782021496, |
| "learning_rate": 9.909887451321288e-06, |
| "loss": 0.6628, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4642567018683997, |
| "grad_norm": 0.5963371403892291, |
| "learning_rate": 9.908541599984276e-06, |
| "loss": 0.6304, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.4654752233956133, |
| "grad_norm": 0.7456866534587581, |
| "learning_rate": 9.907185865462476e-06, |
| "loss": 0.6362, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.46669374492282695, |
| "grad_norm": 0.5547991254906135, |
| "learning_rate": 9.905820250485619e-06, |
| "loss": 0.631, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.46791226645004064, |
| "grad_norm": 0.7089080919365149, |
| "learning_rate": 9.904444757803322e-06, |
| "loss": 0.6281, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.46913078797725427, |
| "grad_norm": 0.5003916403857714, |
| "learning_rate": 9.903059390185093e-06, |
| "loss": 0.6412, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4703493095044679, |
| "grad_norm": 0.6729850093918749, |
| "learning_rate": 9.901664150420328e-06, |
| "loss": 0.6329, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.47156783103168154, |
| "grad_norm": 0.5557718878026181, |
| "learning_rate": 9.90025904131829e-06, |
| "loss": 0.6226, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.47278635255889523, |
| "grad_norm": 0.6260971706778755, |
| "learning_rate": 9.898844065708121e-06, |
| "loss": 0.6257, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.47400487408610886, |
| "grad_norm": 0.5411961675821981, |
| "learning_rate": 9.89741922643883e-06, |
| "loss": 0.6517, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.4752233956133225, |
| "grad_norm": 0.5597130938499267, |
| "learning_rate": 9.895984526379282e-06, |
| "loss": 0.6157, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.47644191714053613, |
| "grad_norm": 0.58052501455543, |
| "learning_rate": 9.894539968418195e-06, |
| "loss": 0.6322, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.4776604386677498, |
| "grad_norm": 0.5211161945377233, |
| "learning_rate": 9.893085555464143e-06, |
| "loss": 0.6089, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.47887896019496345, |
| "grad_norm": 0.6838111314182518, |
| "learning_rate": 9.891621290445534e-06, |
| "loss": 0.632, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4800974817221771, |
| "grad_norm": 0.5785699696283433, |
| "learning_rate": 9.890147176310618e-06, |
| "loss": 0.623, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4813160032493907, |
| "grad_norm": 0.6260781225868985, |
| "learning_rate": 9.888663216027477e-06, |
| "loss": 0.6433, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.4825345247766044, |
| "grad_norm": 0.5634389513735794, |
| "learning_rate": 9.887169412584012e-06, |
| "loss": 0.6359, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.48375304630381805, |
| "grad_norm": 0.576861556797157, |
| "learning_rate": 9.885665768987947e-06, |
| "loss": 0.6289, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.4849715678310317, |
| "grad_norm": 0.5991685983326442, |
| "learning_rate": 9.88415228826682e-06, |
| "loss": 0.6345, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4861900893582453, |
| "grad_norm": 0.5331826337156919, |
| "learning_rate": 9.882628973467972e-06, |
| "loss": 0.6282, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.487408610885459, |
| "grad_norm": 0.5052439699487477, |
| "learning_rate": 9.881095827658548e-06, |
| "loss": 0.629, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.48862713241267264, |
| "grad_norm": 0.5842564825983466, |
| "learning_rate": 9.879552853925486e-06, |
| "loss": 0.6518, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.48984565393988627, |
| "grad_norm": 0.5538659465643975, |
| "learning_rate": 9.878000055375512e-06, |
| "loss": 0.6333, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4910641754670999, |
| "grad_norm": 0.5200827864775698, |
| "learning_rate": 9.876437435135133e-06, |
| "loss": 0.6348, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.49228269699431354, |
| "grad_norm": 0.6043127912027646, |
| "learning_rate": 9.874864996350633e-06, |
| "loss": 0.6136, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.49350121852152723, |
| "grad_norm": 0.4948272003142496, |
| "learning_rate": 9.873282742188066e-06, |
| "loss": 0.6301, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.49471974004874086, |
| "grad_norm": 0.5983030540970795, |
| "learning_rate": 9.871690675833248e-06, |
| "loss": 0.6354, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.4959382615759545, |
| "grad_norm": 0.5309927588463559, |
| "learning_rate": 9.87008880049175e-06, |
| "loss": 0.6316, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.49715678310316813, |
| "grad_norm": 0.46510544628039285, |
| "learning_rate": 9.868477119388897e-06, |
| "loss": 0.641, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4983753046303818, |
| "grad_norm": 0.4745237655389145, |
| "learning_rate": 9.866855635769753e-06, |
| "loss": 0.6484, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.49959382615759546, |
| "grad_norm": 0.562173043770555, |
| "learning_rate": 9.86522435289912e-06, |
| "loss": 0.6263, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5008123476848091, |
| "grad_norm": 0.5419982591023096, |
| "learning_rate": 9.863583274061535e-06, |
| "loss": 0.6197, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.5020308692120228, |
| "grad_norm": 0.5709095665576734, |
| "learning_rate": 9.861932402561253e-06, |
| "loss": 0.6253, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5032493907392364, |
| "grad_norm": 0.5575561882923015, |
| "learning_rate": 9.86027174172225e-06, |
| "loss": 0.6257, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.50446791226645, |
| "grad_norm": 0.5818761313113621, |
| "learning_rate": 9.858601294888212e-06, |
| "loss": 0.6375, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5056864337936637, |
| "grad_norm": 0.55560278003152, |
| "learning_rate": 9.856921065422527e-06, |
| "loss": 0.6327, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.5069049553208773, |
| "grad_norm": 0.5142680238787152, |
| "learning_rate": 9.855231056708281e-06, |
| "loss": 0.6347, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.508123476848091, |
| "grad_norm": 0.5468260799033448, |
| "learning_rate": 9.853531272148248e-06, |
| "loss": 0.6165, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.5093419983753046, |
| "grad_norm": 0.5366215405716666, |
| "learning_rate": 9.851821715164891e-06, |
| "loss": 0.6232, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.5105605199025183, |
| "grad_norm": 0.6815769917483668, |
| "learning_rate": 9.850102389200346e-06, |
| "loss": 0.6375, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.511779041429732, |
| "grad_norm": 0.5766636790628379, |
| "learning_rate": 9.848373297716414e-06, |
| "loss": 0.6411, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5129975629569455, |
| "grad_norm": 0.6508434213004275, |
| "learning_rate": 9.846634444194568e-06, |
| "loss": 0.6277, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.5142160844841592, |
| "grad_norm": 0.5654811023161467, |
| "learning_rate": 9.844885832135928e-06, |
| "loss": 0.6192, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.5154346060113729, |
| "grad_norm": 0.6220408843438429, |
| "learning_rate": 9.84312746506127e-06, |
| "loss": 0.6254, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.5166531275385865, |
| "grad_norm": 0.5550144456923615, |
| "learning_rate": 9.841359346511004e-06, |
| "loss": 0.6288, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5178716490658002, |
| "grad_norm": 0.5804117244385404, |
| "learning_rate": 9.83958148004518e-06, |
| "loss": 0.6244, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.5190901705930138, |
| "grad_norm": 0.6245742605810847, |
| "learning_rate": 9.837793869243468e-06, |
| "loss": 0.6209, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.5203086921202275, |
| "grad_norm": 0.5661037548895256, |
| "learning_rate": 9.83599651770517e-06, |
| "loss": 0.6279, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.5215272136474411, |
| "grad_norm": 0.5358603369119569, |
| "learning_rate": 9.834189429049188e-06, |
| "loss": 0.6307, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.5227457351746547, |
| "grad_norm": 0.6122007731857034, |
| "learning_rate": 9.832372606914038e-06, |
| "loss": 0.6158, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.5239642567018684, |
| "grad_norm": 0.5972271574369769, |
| "learning_rate": 9.830546054957828e-06, |
| "loss": 0.6204, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.525182778229082, |
| "grad_norm": 0.5443858161988891, |
| "learning_rate": 9.82870977685826e-06, |
| "loss": 0.621, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.5264012997562957, |
| "grad_norm": 0.6250123596443754, |
| "learning_rate": 9.826863776312621e-06, |
| "loss": 0.6408, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.5276198212835094, |
| "grad_norm": 0.5933038352389216, |
| "learning_rate": 9.825008057037769e-06, |
| "loss": 0.6588, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.528838342810723, |
| "grad_norm": 0.6567920347058966, |
| "learning_rate": 9.823142622770135e-06, |
| "loss": 0.625, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.5300568643379366, |
| "grad_norm": 0.5779776066299945, |
| "learning_rate": 9.821267477265705e-06, |
| "loss": 0.6387, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.5312753858651503, |
| "grad_norm": 0.570082080677981, |
| "learning_rate": 9.819382624300027e-06, |
| "loss": 0.6324, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5324939073923639, |
| "grad_norm": 0.5818606827175574, |
| "learning_rate": 9.817488067668186e-06, |
| "loss": 0.644, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.5337124289195776, |
| "grad_norm": 0.5476824827124901, |
| "learning_rate": 9.815583811184809e-06, |
| "loss": 0.6189, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5349309504467912, |
| "grad_norm": 0.5768267508522074, |
| "learning_rate": 9.813669858684054e-06, |
| "loss": 0.6222, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.5361494719740049, |
| "grad_norm": 0.5120867453918215, |
| "learning_rate": 9.8117462140196e-06, |
| "loss": 0.6204, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5373679935012186, |
| "grad_norm": 0.5186146607717382, |
| "learning_rate": 9.80981288106464e-06, |
| "loss": 0.6195, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.5385865150284321, |
| "grad_norm": 0.5895698622661449, |
| "learning_rate": 9.807869863711878e-06, |
| "loss": 0.6205, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5398050365556458, |
| "grad_norm": 0.5421346973971489, |
| "learning_rate": 9.805917165873515e-06, |
| "loss": 0.6303, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.5410235580828595, |
| "grad_norm": 0.5227058266380313, |
| "learning_rate": 9.803954791481239e-06, |
| "loss": 0.6196, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5422420796100731, |
| "grad_norm": 0.4665750459631165, |
| "learning_rate": 9.801982744486229e-06, |
| "loss": 0.628, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.5434606011372868, |
| "grad_norm": 0.5340051839015313, |
| "learning_rate": 9.800001028859135e-06, |
| "loss": 0.6321, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5446791226645004, |
| "grad_norm": 0.49569443009344233, |
| "learning_rate": 9.798009648590073e-06, |
| "loss": 0.6295, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.545897644191714, |
| "grad_norm": 0.5589394978947685, |
| "learning_rate": 9.796008607688624e-06, |
| "loss": 0.6458, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5471161657189277, |
| "grad_norm": 0.5349825334411198, |
| "learning_rate": 9.793997910183815e-06, |
| "loss": 0.6348, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.5483346872461413, |
| "grad_norm": 0.5406756193824626, |
| "learning_rate": 9.79197756012412e-06, |
| "loss": 0.6352, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.549553208773355, |
| "grad_norm": 0.5590939249326192, |
| "learning_rate": 9.789947561577445e-06, |
| "loss": 0.6345, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.5507717303005687, |
| "grad_norm": 0.5138272981689205, |
| "learning_rate": 9.787907918631125e-06, |
| "loss": 0.6457, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5519902518277823, |
| "grad_norm": 0.5967975071520696, |
| "learning_rate": 9.785858635391913e-06, |
| "loss": 0.6059, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.553208773354996, |
| "grad_norm": 0.4912288949887055, |
| "learning_rate": 9.783799715985973e-06, |
| "loss": 0.6254, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5544272948822095, |
| "grad_norm": 0.5903941074513651, |
| "learning_rate": 9.78173116455887e-06, |
| "loss": 0.6108, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5556458164094232, |
| "grad_norm": 0.5632794329839387, |
| "learning_rate": 9.779652985275562e-06, |
| "loss": 0.6187, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5568643379366369, |
| "grad_norm": 0.5941486268629673, |
| "learning_rate": 9.777565182320396e-06, |
| "loss": 0.6184, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5580828594638505, |
| "grad_norm": 0.6416650599158464, |
| "learning_rate": 9.775467759897092e-06, |
| "loss": 0.6331, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5593013809910642, |
| "grad_norm": 0.5651281069823211, |
| "learning_rate": 9.773360722228742e-06, |
| "loss": 0.6307, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5605199025182778, |
| "grad_norm": 0.6620891236551917, |
| "learning_rate": 9.771244073557792e-06, |
| "loss": 0.6078, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5617384240454915, |
| "grad_norm": 0.6015785675867341, |
| "learning_rate": 9.769117818146048e-06, |
| "loss": 0.6237, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5629569455727051, |
| "grad_norm": 0.8038047522794796, |
| "learning_rate": 9.766981960274653e-06, |
| "loss": 0.6173, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5641754670999187, |
| "grad_norm": 0.6163269598618792, |
| "learning_rate": 9.764836504244086e-06, |
| "loss": 0.6264, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.5653939886271324, |
| "grad_norm": 0.6244153487192251, |
| "learning_rate": 9.762681454374148e-06, |
| "loss": 0.6112, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5666125101543461, |
| "grad_norm": 0.724456218504814, |
| "learning_rate": 9.760516815003965e-06, |
| "loss": 0.6255, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5678310316815597, |
| "grad_norm": 0.580652096091434, |
| "learning_rate": 9.758342590491961e-06, |
| "loss": 0.6342, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5690495532087734, |
| "grad_norm": 0.6644456071205537, |
| "learning_rate": 9.756158785215866e-06, |
| "loss": 0.6127, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.570268074735987, |
| "grad_norm": 0.5736293156748269, |
| "learning_rate": 9.753965403572703e-06, |
| "loss": 0.6313, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5714865962632006, |
| "grad_norm": 0.6178186373387958, |
| "learning_rate": 9.751762449978767e-06, |
| "loss": 0.643, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5727051177904143, |
| "grad_norm": 0.584712916385393, |
| "learning_rate": 9.749549928869636e-06, |
| "loss": 0.5948, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5739236393176279, |
| "grad_norm": 0.6116917271773714, |
| "learning_rate": 9.747327844700147e-06, |
| "loss": 0.6297, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5751421608448416, |
| "grad_norm": 0.4903955649085751, |
| "learning_rate": 9.745096201944391e-06, |
| "loss": 0.6251, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5763606823720553, |
| "grad_norm": 0.6968313476556924, |
| "learning_rate": 9.742855005095706e-06, |
| "loss": 0.6117, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5775792038992689, |
| "grad_norm": 0.48897486873959584, |
| "learning_rate": 9.740604258666668e-06, |
| "loss": 0.6058, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5787977254264826, |
| "grad_norm": 0.7217629239411762, |
| "learning_rate": 9.73834396718908e-06, |
| "loss": 0.6265, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5800162469536961, |
| "grad_norm": 0.613354162646377, |
| "learning_rate": 9.736074135213962e-06, |
| "loss": 0.6399, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5812347684809098, |
| "grad_norm": 0.6447055703309105, |
| "learning_rate": 9.733794767311545e-06, |
| "loss": 0.6335, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5824532900081235, |
| "grad_norm": 0.5823448015058018, |
| "learning_rate": 9.731505868071262e-06, |
| "loss": 0.6262, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5836718115353371, |
| "grad_norm": 0.5125864553684497, |
| "learning_rate": 9.729207442101736e-06, |
| "loss": 0.6101, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5848903330625508, |
| "grad_norm": 0.6147081791430226, |
| "learning_rate": 9.726899494030768e-06, |
| "loss": 0.6411, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5861088545897645, |
| "grad_norm": 0.5467046907537908, |
| "learning_rate": 9.724582028505336e-06, |
| "loss": 0.6203, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.587327376116978, |
| "grad_norm": 0.5741960101018327, |
| "learning_rate": 9.72225505019158e-06, |
| "loss": 0.624, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5885458976441917, |
| "grad_norm": 0.6709034274446143, |
| "learning_rate": 9.719918563774793e-06, |
| "loss": 0.6316, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5897644191714053, |
| "grad_norm": 0.5633926121392079, |
| "learning_rate": 9.71757257395941e-06, |
| "loss": 0.6205, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.590982940698619, |
| "grad_norm": 0.5752003286544818, |
| "learning_rate": 9.715217085469009e-06, |
| "loss": 0.601, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5922014622258327, |
| "grad_norm": 0.6676085473844594, |
| "learning_rate": 9.712852103046281e-06, |
| "loss": 0.6425, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5934199837530463, |
| "grad_norm": 0.43714860457984767, |
| "learning_rate": 9.710477631453044e-06, |
| "loss": 0.6264, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.59463850528026, |
| "grad_norm": 0.7834186015627101, |
| "learning_rate": 9.708093675470214e-06, |
| "loss": 0.6294, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5958570268074735, |
| "grad_norm": 0.5229823852593044, |
| "learning_rate": 9.705700239897809e-06, |
| "loss": 0.6253, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5970755483346872, |
| "grad_norm": 0.6641427142623177, |
| "learning_rate": 9.70329732955493e-06, |
| "loss": 0.6208, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5982940698619009, |
| "grad_norm": 0.5777300627058165, |
| "learning_rate": 9.70088494927976e-06, |
| "loss": 0.62, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5995125913891145, |
| "grad_norm": 0.47427848956457735, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.6168, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6007311129163282, |
| "grad_norm": 0.6176694192284208, |
| "learning_rate": 9.696031798380586e-06, |
| "loss": 0.6192, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.6019496344435419, |
| "grad_norm": 0.5380294280704867, |
| "learning_rate": 9.693591037528239e-06, |
| "loss": 0.6324, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.6031681559707555, |
| "grad_norm": 0.5270092433580651, |
| "learning_rate": 9.691140826286893e-06, |
| "loss": 0.6275, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6043866774979691, |
| "grad_norm": 0.5928211370503502, |
| "learning_rate": 9.688681169589971e-06, |
| "loss": 0.6295, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.6056051990251827, |
| "grad_norm": 0.487281690093329, |
| "learning_rate": 9.686212072389904e-06, |
| "loss": 0.6157, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.6068237205523964, |
| "grad_norm": 0.5179266059337351, |
| "learning_rate": 9.68373353965814e-06, |
| "loss": 0.6098, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.6080422420796101, |
| "grad_norm": 0.5314913870970437, |
| "learning_rate": 9.68124557638512e-06, |
| "loss": 0.6173, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.6092607636068237, |
| "grad_norm": 0.4844744555610714, |
| "learning_rate": 9.678748187580278e-06, |
| "loss": 0.6186, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6104792851340374, |
| "grad_norm": 0.5188776477142794, |
| "learning_rate": 9.676241378272022e-06, |
| "loss": 0.6168, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.6116978066612511, |
| "grad_norm": 0.49668970689497427, |
| "learning_rate": 9.673725153507727e-06, |
| "loss": 0.6128, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.6129163281884646, |
| "grad_norm": 0.5049088012633238, |
| "learning_rate": 9.67119951835373e-06, |
| "loss": 0.6204, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.6141348497156783, |
| "grad_norm": 0.5286755135827618, |
| "learning_rate": 9.66866447789531e-06, |
| "loss": 0.6321, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.6153533712428919, |
| "grad_norm": 0.5414829955250333, |
| "learning_rate": 9.666120037236692e-06, |
| "loss": 0.6073, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.6165718927701056, |
| "grad_norm": 0.5929807296645003, |
| "learning_rate": 9.663566201501017e-06, |
| "loss": 0.6219, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.6177904142973193, |
| "grad_norm": 0.565513002212362, |
| "learning_rate": 9.66100297583035e-06, |
| "loss": 0.6218, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.6190089358245329, |
| "grad_norm": 0.48043459347807704, |
| "learning_rate": 9.65843036538566e-06, |
| "loss": 0.607, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.6202274573517466, |
| "grad_norm": 0.6289509926942585, |
| "learning_rate": 9.655848375346812e-06, |
| "loss": 0.6396, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.6214459788789602, |
| "grad_norm": 0.5609440147588081, |
| "learning_rate": 9.65325701091256e-06, |
| "loss": 0.6303, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6226645004061738, |
| "grad_norm": 0.5893573188478602, |
| "learning_rate": 9.650656277300525e-06, |
| "loss": 0.6166, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.6238830219333875, |
| "grad_norm": 0.5628137809478111, |
| "learning_rate": 9.6480461797472e-06, |
| "loss": 0.6291, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.6251015434606011, |
| "grad_norm": 0.5493464215154626, |
| "learning_rate": 9.645426723507929e-06, |
| "loss": 0.6222, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.6263200649878148, |
| "grad_norm": 0.5629698357909129, |
| "learning_rate": 9.6427979138569e-06, |
| "loss": 0.6317, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.6275385865150285, |
| "grad_norm": 0.6664927672498832, |
| "learning_rate": 9.640159756087136e-06, |
| "loss": 0.6382, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.628757108042242, |
| "grad_norm": 0.5522749634660304, |
| "learning_rate": 9.637512255510475e-06, |
| "loss": 0.6143, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.6299756295694557, |
| "grad_norm": 0.5532267628661862, |
| "learning_rate": 9.63485541745757e-06, |
| "loss": 0.6374, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.6311941510966693, |
| "grad_norm": 0.6876124654936631, |
| "learning_rate": 9.632189247277885e-06, |
| "loss": 0.6392, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.632412672623883, |
| "grad_norm": 0.653192030137328, |
| "learning_rate": 9.629513750339656e-06, |
| "loss": 0.6146, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.6336311941510967, |
| "grad_norm": 0.5264590327684809, |
| "learning_rate": 9.626828932029907e-06, |
| "loss": 0.6187, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6348497156783103, |
| "grad_norm": 0.6140627235902801, |
| "learning_rate": 9.624134797754437e-06, |
| "loss": 0.5948, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.636068237205524, |
| "grad_norm": 0.715948251788629, |
| "learning_rate": 9.62143135293779e-06, |
| "loss": 0.6221, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6372867587327377, |
| "grad_norm": 0.6814424426040064, |
| "learning_rate": 9.618718603023261e-06, |
| "loss": 0.6279, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.6385052802599512, |
| "grad_norm": 0.600168318088034, |
| "learning_rate": 9.615996553472885e-06, |
| "loss": 0.6267, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6397238017871649, |
| "grad_norm": 0.5619413500131725, |
| "learning_rate": 9.613265209767417e-06, |
| "loss": 0.6288, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6409423233143785, |
| "grad_norm": 0.5903652755615201, |
| "learning_rate": 9.610524577406325e-06, |
| "loss": 0.6305, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.6421608448415922, |
| "grad_norm": 0.5087861988940737, |
| "learning_rate": 9.607774661907783e-06, |
| "loss": 0.6192, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.6433793663688059, |
| "grad_norm": 0.6555944853088764, |
| "learning_rate": 9.605015468808651e-06, |
| "loss": 0.6255, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6445978878960195, |
| "grad_norm": 0.6123139168204214, |
| "learning_rate": 9.602247003664476e-06, |
| "loss": 0.6185, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.6458164094232332, |
| "grad_norm": 0.5503960050113602, |
| "learning_rate": 9.599469272049468e-06, |
| "loss": 0.6385, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6470349309504468, |
| "grad_norm": 0.5823472571150912, |
| "learning_rate": 9.596682279556499e-06, |
| "loss": 0.6241, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.6482534524776604, |
| "grad_norm": 0.5840631388468679, |
| "learning_rate": 9.593886031797081e-06, |
| "loss": 0.625, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6494719740048741, |
| "grad_norm": 0.5622117171111194, |
| "learning_rate": 9.591080534401371e-06, |
| "loss": 0.6192, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.6506904955320877, |
| "grad_norm": 0.5707745901206253, |
| "learning_rate": 9.588265793018141e-06, |
| "loss": 0.6391, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6519090170593014, |
| "grad_norm": 0.5896800585312665, |
| "learning_rate": 9.58544181331478e-06, |
| "loss": 0.6339, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.6531275385865151, |
| "grad_norm": 0.5209906229065117, |
| "learning_rate": 9.582608600977276e-06, |
| "loss": 0.601, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.6543460601137286, |
| "grad_norm": 0.5155011577582275, |
| "learning_rate": 9.579766161710209e-06, |
| "loss": 0.6015, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.6555645816409423, |
| "grad_norm": 0.48807425767261786, |
| "learning_rate": 9.576914501236734e-06, |
| "loss": 0.6167, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.656783103168156, |
| "grad_norm": 0.5579148908182612, |
| "learning_rate": 9.574053625298577e-06, |
| "loss": 0.6193, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6580016246953696, |
| "grad_norm": 0.5287053319535842, |
| "learning_rate": 9.571183539656011e-06, |
| "loss": 0.6291, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6592201462225833, |
| "grad_norm": 0.6191360016551267, |
| "learning_rate": 9.568304250087864e-06, |
| "loss": 0.6139, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.6604386677497969, |
| "grad_norm": 0.5099069268786582, |
| "learning_rate": 9.565415762391485e-06, |
| "loss": 0.6013, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6616571892770106, |
| "grad_norm": 0.5421293076141, |
| "learning_rate": 9.562518082382751e-06, |
| "loss": 0.5907, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.6628757108042242, |
| "grad_norm": 0.5498541039203616, |
| "learning_rate": 9.559611215896041e-06, |
| "loss": 0.627, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6640942323314378, |
| "grad_norm": 0.5680961983046815, |
| "learning_rate": 9.556695168784236e-06, |
| "loss": 0.5952, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.6653127538586515, |
| "grad_norm": 0.5218060004228549, |
| "learning_rate": 9.553769946918698e-06, |
| "loss": 0.6228, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6665312753858651, |
| "grad_norm": 0.5543031912725007, |
| "learning_rate": 9.550835556189264e-06, |
| "loss": 0.6338, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6677497969130788, |
| "grad_norm": 0.5668524593324846, |
| "learning_rate": 9.547892002504233e-06, |
| "loss": 0.6219, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6689683184402925, |
| "grad_norm": 0.5873694380478705, |
| "learning_rate": 9.544939291790352e-06, |
| "loss": 0.624, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.670186839967506, |
| "grad_norm": 0.5399986226537774, |
| "learning_rate": 9.541977429992803e-06, |
| "loss": 0.6385, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6714053614947197, |
| "grad_norm": 0.7171400926799747, |
| "learning_rate": 9.5390064230752e-06, |
| "loss": 0.621, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.6726238830219334, |
| "grad_norm": 0.6092647452638789, |
| "learning_rate": 9.536026277019562e-06, |
| "loss": 0.6223, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.673842404549147, |
| "grad_norm": 0.683988747327427, |
| "learning_rate": 9.533036997826315e-06, |
| "loss": 0.6199, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.6750609260763607, |
| "grad_norm": 0.5791819914636441, |
| "learning_rate": 9.530038591514275e-06, |
| "loss": 0.6328, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6762794476035743, |
| "grad_norm": 0.6782628719672897, |
| "learning_rate": 9.527031064120632e-06, |
| "loss": 0.6127, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.677497969130788, |
| "grad_norm": 0.6767775073979123, |
| "learning_rate": 9.524014421700942e-06, |
| "loss": 0.6186, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6787164906580017, |
| "grad_norm": 0.5114857558759379, |
| "learning_rate": 9.520988670329114e-06, |
| "loss": 0.63, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6799350121852152, |
| "grad_norm": 0.5501380880007342, |
| "learning_rate": 9.517953816097396e-06, |
| "loss": 0.5915, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6811535337124289, |
| "grad_norm": 0.6714746829201106, |
| "learning_rate": 9.514909865116368e-06, |
| "loss": 0.6067, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6823720552396426, |
| "grad_norm": 0.5375092336126965, |
| "learning_rate": 9.511856823514924e-06, |
| "loss": 0.596, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6835905767668562, |
| "grad_norm": 0.6176188040728243, |
| "learning_rate": 9.508794697440257e-06, |
| "loss": 0.6333, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6848090982940699, |
| "grad_norm": 0.6212303271054956, |
| "learning_rate": 9.505723493057862e-06, |
| "loss": 0.6178, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6860276198212835, |
| "grad_norm": 0.5377188134801542, |
| "learning_rate": 9.502643216551502e-06, |
| "loss": 0.6017, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6872461413484972, |
| "grad_norm": 0.6362000539969834, |
| "learning_rate": 9.499553874123213e-06, |
| "loss": 0.6392, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6884646628757108, |
| "grad_norm": 0.5480382319562058, |
| "learning_rate": 9.496455471993284e-06, |
| "loss": 0.6113, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6896831844029244, |
| "grad_norm": 0.6994517506614581, |
| "learning_rate": 9.49334801640024e-06, |
| "loss": 0.6327, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6909017059301381, |
| "grad_norm": 0.5335729160289857, |
| "learning_rate": 9.490231513600842e-06, |
| "loss": 0.6218, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6921202274573518, |
| "grad_norm": 0.6063268804347564, |
| "learning_rate": 9.487105969870068e-06, |
| "loss": 0.6174, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6933387489845654, |
| "grad_norm": 0.6267394635949436, |
| "learning_rate": 9.48397139150109e-06, |
| "loss": 0.605, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6945572705117791, |
| "grad_norm": 0.48229350211609867, |
| "learning_rate": 9.480827784805278e-06, |
| "loss": 0.6138, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6957757920389926, |
| "grad_norm": 0.6094361236823382, |
| "learning_rate": 9.477675156112183e-06, |
| "loss": 0.616, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6969943135662063, |
| "grad_norm": 0.5646668548267415, |
| "learning_rate": 9.474513511769513e-06, |
| "loss": 0.6257, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.69821283509342, |
| "grad_norm": 0.5605266691062354, |
| "learning_rate": 9.47134285814314e-06, |
| "loss": 0.623, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6994313566206336, |
| "grad_norm": 0.5976205093855237, |
| "learning_rate": 9.468163201617063e-06, |
| "loss": 0.6182, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.7006498781478473, |
| "grad_norm": 0.5736754942220608, |
| "learning_rate": 9.464974548593415e-06, |
| "loss": 0.5973, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.7018683996750609, |
| "grad_norm": 0.5782971035374301, |
| "learning_rate": 9.461776905492446e-06, |
| "loss": 0.6021, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.7030869212022746, |
| "grad_norm": 0.5094228164183464, |
| "learning_rate": 9.458570278752501e-06, |
| "loss": 0.6028, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.7043054427294883, |
| "grad_norm": 0.5803305530484321, |
| "learning_rate": 9.455354674830016e-06, |
| "loss": 0.6224, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.7055239642567018, |
| "grad_norm": 0.5229464149205902, |
| "learning_rate": 9.452130100199504e-06, |
| "loss": 0.6157, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.7067424857839155, |
| "grad_norm": 0.5965075801420928, |
| "learning_rate": 9.448896561353536e-06, |
| "loss": 0.6062, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7079610073111292, |
| "grad_norm": 0.5275236801559984, |
| "learning_rate": 9.445654064802738e-06, |
| "loss": 0.611, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.7091795288383428, |
| "grad_norm": 0.511555457965572, |
| "learning_rate": 9.442402617075765e-06, |
| "loss": 0.6263, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.7103980503655565, |
| "grad_norm": 0.5490562182756723, |
| "learning_rate": 9.439142224719302e-06, |
| "loss": 0.6236, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.7116165718927701, |
| "grad_norm": 0.5258200584782562, |
| "learning_rate": 9.435872894298037e-06, |
| "loss": 0.6106, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.7128350934199837, |
| "grad_norm": 0.5189357566107585, |
| "learning_rate": 9.43259463239466e-06, |
| "loss": 0.636, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.7140536149471974, |
| "grad_norm": 0.5097577073371684, |
| "learning_rate": 9.429307445609841e-06, |
| "loss": 0.6337, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.715272136474411, |
| "grad_norm": 0.6069103268356187, |
| "learning_rate": 9.426011340562222e-06, |
| "loss": 0.6177, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.7164906580016247, |
| "grad_norm": 0.48842546371203027, |
| "learning_rate": 9.422706323888398e-06, |
| "loss": 0.6011, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.7177091795288384, |
| "grad_norm": 0.5365657101299985, |
| "learning_rate": 9.419392402242912e-06, |
| "loss": 0.6007, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.718927701056052, |
| "grad_norm": 0.5101507591790149, |
| "learning_rate": 9.416069582298236e-06, |
| "loss": 0.6175, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7201462225832657, |
| "grad_norm": 0.4516555710559031, |
| "learning_rate": 9.412737870744752e-06, |
| "loss": 0.6107, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.7213647441104792, |
| "grad_norm": 0.4881759934731241, |
| "learning_rate": 9.409397274290756e-06, |
| "loss": 0.6224, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.7225832656376929, |
| "grad_norm": 0.45459978443672416, |
| "learning_rate": 9.406047799662426e-06, |
| "loss": 0.6089, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.7238017871649066, |
| "grad_norm": 0.505751917086364, |
| "learning_rate": 9.402689453603815e-06, |
| "loss": 0.6244, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.7250203086921202, |
| "grad_norm": 0.5110751597586063, |
| "learning_rate": 9.399322242876843e-06, |
| "loss": 0.601, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.7262388302193339, |
| "grad_norm": 0.504579475445371, |
| "learning_rate": 9.395946174261274e-06, |
| "loss": 0.6216, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.7274573517465476, |
| "grad_norm": 0.534595723022526, |
| "learning_rate": 9.392561254554712e-06, |
| "loss": 0.6067, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.7286758732737612, |
| "grad_norm": 0.5583009202449097, |
| "learning_rate": 9.38916749057258e-06, |
| "loss": 0.6249, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.7298943948009748, |
| "grad_norm": 0.5059716144312469, |
| "learning_rate": 9.385764889148107e-06, |
| "loss": 0.6115, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.7311129163281884, |
| "grad_norm": 0.6121449401534393, |
| "learning_rate": 9.382353457132318e-06, |
| "loss": 0.6077, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7323314378554021, |
| "grad_norm": 0.4829522546788395, |
| "learning_rate": 9.378933201394019e-06, |
| "loss": 0.6216, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.7335499593826158, |
| "grad_norm": 0.5436028145378481, |
| "learning_rate": 9.375504128819779e-06, |
| "loss": 0.6185, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.7347684809098294, |
| "grad_norm": 0.5172970082009579, |
| "learning_rate": 9.372066246313922e-06, |
| "loss": 0.644, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.7359870024370431, |
| "grad_norm": 0.4738987982796835, |
| "learning_rate": 9.368619560798511e-06, |
| "loss": 0.6246, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.7372055239642566, |
| "grad_norm": 0.4525040495867516, |
| "learning_rate": 9.36516407921333e-06, |
| "loss": 0.6109, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.7384240454914703, |
| "grad_norm": 0.5076237716007553, |
| "learning_rate": 9.361699808515877e-06, |
| "loss": 0.6151, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.739642567018684, |
| "grad_norm": 0.5074655977130175, |
| "learning_rate": 9.358226755681342e-06, |
| "loss": 0.6082, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.7408610885458976, |
| "grad_norm": 0.4840933107276308, |
| "learning_rate": 9.354744927702607e-06, |
| "loss": 0.615, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.7420796100731113, |
| "grad_norm": 0.5219253787729252, |
| "learning_rate": 9.351254331590216e-06, |
| "loss": 0.5996, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.743298131600325, |
| "grad_norm": 0.5601150249253273, |
| "learning_rate": 9.347754974372365e-06, |
| "loss": 0.6032, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7445166531275386, |
| "grad_norm": 0.4986838680038737, |
| "learning_rate": 9.344246863094893e-06, |
| "loss": 0.5976, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.7457351746547523, |
| "grad_norm": 0.4948788586568317, |
| "learning_rate": 9.340730004821266e-06, |
| "loss": 0.6085, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7469536961819658, |
| "grad_norm": 0.5238689007424114, |
| "learning_rate": 9.33720440663256e-06, |
| "loss": 0.6129, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.7481722177091795, |
| "grad_norm": 0.47607891045094536, |
| "learning_rate": 9.33367007562745e-06, |
| "loss": 0.6199, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7493907392363932, |
| "grad_norm": 0.4955962984701164, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.5949, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.7506092607636068, |
| "grad_norm": 0.6100851359106775, |
| "learning_rate": 9.326575243650618e-06, |
| "loss": 0.6143, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.7518277822908205, |
| "grad_norm": 0.48084331485799453, |
| "learning_rate": 9.323014756964104e-06, |
| "loss": 0.6064, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.7530463038180342, |
| "grad_norm": 0.6768728956598579, |
| "learning_rate": 9.31944556603157e-06, |
| "loss": 0.6229, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.7542648253452477, |
| "grad_norm": 0.6664441895394185, |
| "learning_rate": 9.315867678039469e-06, |
| "loss": 0.631, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.7554833468724614, |
| "grad_norm": 0.6265982250759069, |
| "learning_rate": 9.312281100191752e-06, |
| "loss": 0.63, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.756701868399675, |
| "grad_norm": 0.6297592873763573, |
| "learning_rate": 9.308685839709878e-06, |
| "loss": 0.6264, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.7579203899268887, |
| "grad_norm": 0.5583877292859594, |
| "learning_rate": 9.305081903832784e-06, |
| "loss": 0.5974, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.7591389114541024, |
| "grad_norm": 0.5001555304308823, |
| "learning_rate": 9.301469299816874e-06, |
| "loss": 0.6117, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.760357432981316, |
| "grad_norm": 0.5390093336249369, |
| "learning_rate": 9.297848034936007e-06, |
| "loss": 0.6088, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.7615759545085297, |
| "grad_norm": 0.5678848176997396, |
| "learning_rate": 9.294218116481476e-06, |
| "loss": 0.6018, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7627944760357434, |
| "grad_norm": 0.5844799796481355, |
| "learning_rate": 9.290579551762002e-06, |
| "loss": 0.604, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7640129975629569, |
| "grad_norm": 0.5159143134307803, |
| "learning_rate": 9.286932348103716e-06, |
| "loss": 0.6083, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.7652315190901706, |
| "grad_norm": 0.5326620021016965, |
| "learning_rate": 9.283276512850137e-06, |
| "loss": 0.6206, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.7664500406173842, |
| "grad_norm": 0.5963411548189359, |
| "learning_rate": 9.27961205336217e-06, |
| "loss": 0.6108, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.7676685621445979, |
| "grad_norm": 0.5014319447503888, |
| "learning_rate": 9.275938977018082e-06, |
| "loss": 0.6034, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7688870836718116, |
| "grad_norm": 0.5126870488620024, |
| "learning_rate": 9.272257291213488e-06, |
| "loss": 0.6176, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.7701056051990252, |
| "grad_norm": 0.4787184158365945, |
| "learning_rate": 9.268567003361341e-06, |
| "loss": 0.607, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7713241267262388, |
| "grad_norm": 0.557057771330538, |
| "learning_rate": 9.264868120891913e-06, |
| "loss": 0.6318, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.7725426482534524, |
| "grad_norm": 0.535409561474859, |
| "learning_rate": 9.261160651252778e-06, |
| "loss": 0.62, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7737611697806661, |
| "grad_norm": 0.4814507650875912, |
| "learning_rate": 9.257444601908806e-06, |
| "loss": 0.6074, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.7749796913078798, |
| "grad_norm": 0.6101990877396614, |
| "learning_rate": 9.253719980342134e-06, |
| "loss": 0.6208, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7761982128350934, |
| "grad_norm": 0.5403900228621851, |
| "learning_rate": 9.249986794052168e-06, |
| "loss": 0.5968, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.7774167343623071, |
| "grad_norm": 0.5703352381203307, |
| "learning_rate": 9.24624505055555e-06, |
| "loss": 0.626, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7786352558895208, |
| "grad_norm": 0.5241053254774348, |
| "learning_rate": 9.24249475738616e-06, |
| "loss": 0.5959, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.7798537774167343, |
| "grad_norm": 0.5780889050780196, |
| "learning_rate": 9.238735922095083e-06, |
| "loss": 0.5783, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.781072298943948, |
| "grad_norm": 0.5164354758896532, |
| "learning_rate": 9.234968552250612e-06, |
| "loss": 0.6192, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.7822908204711616, |
| "grad_norm": 0.5672667605052139, |
| "learning_rate": 9.231192655438222e-06, |
| "loss": 0.6003, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7835093419983753, |
| "grad_norm": 0.5135255221881695, |
| "learning_rate": 9.22740823926055e-06, |
| "loss": 0.6082, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.784727863525589, |
| "grad_norm": 0.5584536390516718, |
| "learning_rate": 9.223615311337395e-06, |
| "loss": 0.614, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7859463850528026, |
| "grad_norm": 0.5216134140261057, |
| "learning_rate": 9.219813879305692e-06, |
| "loss": 0.6012, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.7871649065800163, |
| "grad_norm": 0.5736410922364097, |
| "learning_rate": 9.216003950819497e-06, |
| "loss": 0.6194, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7883834281072299, |
| "grad_norm": 0.5049300976776431, |
| "learning_rate": 9.21218553354997e-06, |
| "loss": 0.6115, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.7896019496344435, |
| "grad_norm": 0.5596092247163901, |
| "learning_rate": 9.208358635185372e-06, |
| "loss": 0.6002, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7908204711616572, |
| "grad_norm": 0.6492697062225624, |
| "learning_rate": 9.204523263431034e-06, |
| "loss": 0.6087, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.7920389926888708, |
| "grad_norm": 0.5493287831302429, |
| "learning_rate": 9.200679426009347e-06, |
| "loss": 0.6134, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7932575142160845, |
| "grad_norm": 0.5393423473357866, |
| "learning_rate": 9.196827130659752e-06, |
| "loss": 0.6077, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7944760357432982, |
| "grad_norm": 0.4822437257768845, |
| "learning_rate": 9.192966385138714e-06, |
| "loss": 0.6206, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7956945572705117, |
| "grad_norm": 0.5489723911011465, |
| "learning_rate": 9.189097197219718e-06, |
| "loss": 0.6237, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7969130787977254, |
| "grad_norm": 0.465446021569481, |
| "learning_rate": 9.185219574693242e-06, |
| "loss": 0.5969, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7981316003249391, |
| "grad_norm": 0.5608574163560325, |
| "learning_rate": 9.181333525366756e-06, |
| "loss": 0.6116, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7993501218521527, |
| "grad_norm": 0.47338894132856235, |
| "learning_rate": 9.177439057064684e-06, |
| "loss": 0.5898, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.8005686433793664, |
| "grad_norm": 0.5538432939088667, |
| "learning_rate": 9.17353617762841e-06, |
| "loss": 0.6042, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.80178716490658, |
| "grad_norm": 0.5129997268787104, |
| "learning_rate": 9.169624894916252e-06, |
| "loss": 0.6045, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.8030056864337937, |
| "grad_norm": 0.491484979669411, |
| "learning_rate": 9.165705216803446e-06, |
| "loss": 0.6159, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.8042242079610074, |
| "grad_norm": 0.4865407913972347, |
| "learning_rate": 9.161777151182137e-06, |
| "loss": 0.6095, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8054427294882209, |
| "grad_norm": 0.5482167186016993, |
| "learning_rate": 9.15784070596135e-06, |
| "loss": 0.6063, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.8066612510154346, |
| "grad_norm": 0.4899874123032885, |
| "learning_rate": 9.153895889066988e-06, |
| "loss": 0.5993, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.8078797725426482, |
| "grad_norm": 0.4971658879090838, |
| "learning_rate": 9.149942708441808e-06, |
| "loss": 0.6349, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.8090982940698619, |
| "grad_norm": 0.4774943646678603, |
| "learning_rate": 9.145981172045407e-06, |
| "loss": 0.5937, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.8103168155970756, |
| "grad_norm": 0.5239506111079297, |
| "learning_rate": 9.142011287854206e-06, |
| "loss": 0.596, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.8115353371242892, |
| "grad_norm": 0.49171964255133527, |
| "learning_rate": 9.138033063861436e-06, |
| "loss": 0.5866, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.8127538586515028, |
| "grad_norm": 0.5198610207245239, |
| "learning_rate": 9.134046508077116e-06, |
| "loss": 0.6022, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.8139723801787165, |
| "grad_norm": 0.4768598644726109, |
| "learning_rate": 9.130051628528046e-06, |
| "loss": 0.6057, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.8151909017059301, |
| "grad_norm": 0.539806947114795, |
| "learning_rate": 9.12604843325778e-06, |
| "loss": 0.6175, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.8164094232331438, |
| "grad_norm": 0.49480984634291075, |
| "learning_rate": 9.122036930326618e-06, |
| "loss": 0.6214, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8176279447603574, |
| "grad_norm": 0.5006857848218066, |
| "learning_rate": 9.118017127811591e-06, |
| "loss": 0.6084, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.8188464662875711, |
| "grad_norm": 0.4713529456554149, |
| "learning_rate": 9.113989033806434e-06, |
| "loss": 0.6177, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.8200649878147848, |
| "grad_norm": 0.5234744664186434, |
| "learning_rate": 9.10995265642158e-06, |
| "loss": 0.623, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.8212835093419983, |
| "grad_norm": 0.46959588708419714, |
| "learning_rate": 9.105908003784142e-06, |
| "loss": 0.6223, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.822502030869212, |
| "grad_norm": 0.483130564646199, |
| "learning_rate": 9.101855084037893e-06, |
| "loss": 0.6079, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.8237205523964257, |
| "grad_norm": 0.4707432015389284, |
| "learning_rate": 9.097793905343251e-06, |
| "loss": 0.6246, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.8249390739236393, |
| "grad_norm": 0.5109208158836949, |
| "learning_rate": 9.093724475877262e-06, |
| "loss": 0.6223, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.826157595450853, |
| "grad_norm": 0.524528742300806, |
| "learning_rate": 9.089646803833589e-06, |
| "loss": 0.6054, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.8273761169780666, |
| "grad_norm": 0.48479589382874644, |
| "learning_rate": 9.085560897422487e-06, |
| "loss": 0.5978, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.8285946385052803, |
| "grad_norm": 0.520310530932384, |
| "learning_rate": 9.081466764870795e-06, |
| "loss": 0.6141, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.829813160032494, |
| "grad_norm": 0.5320998645898771, |
| "learning_rate": 9.07736441442191e-06, |
| "loss": 0.5952, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.8310316815597075, |
| "grad_norm": 0.522944143229052, |
| "learning_rate": 9.073253854335777e-06, |
| "loss": 0.5966, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.8322502030869212, |
| "grad_norm": 0.5438608445694643, |
| "learning_rate": 9.069135092888874e-06, |
| "loss": 0.6036, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.8334687246141349, |
| "grad_norm": 0.4929729088140395, |
| "learning_rate": 9.06500813837419e-06, |
| "loss": 0.603, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.8346872461413485, |
| "grad_norm": 0.5376420120613337, |
| "learning_rate": 9.060872999101206e-06, |
| "loss": 0.6151, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.8359057676685622, |
| "grad_norm": 0.52471690520972, |
| "learning_rate": 9.056729683395892e-06, |
| "loss": 0.581, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.8371242891957758, |
| "grad_norm": 0.49865247625736375, |
| "learning_rate": 9.052578199600675e-06, |
| "loss": 0.6067, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.8383428107229894, |
| "grad_norm": 0.5035636474694776, |
| "learning_rate": 9.048418556074425e-06, |
| "loss": 0.605, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.8395613322502031, |
| "grad_norm": 0.5460518150855164, |
| "learning_rate": 9.04425076119245e-06, |
| "loss": 0.6008, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.8407798537774167, |
| "grad_norm": 0.5154326591857874, |
| "learning_rate": 9.040074823346466e-06, |
| "loss": 0.612, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8419983753046304, |
| "grad_norm": 0.41895451726050503, |
| "learning_rate": 9.035890750944583e-06, |
| "loss": 0.5947, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.843216896831844, |
| "grad_norm": 0.49674088276174516, |
| "learning_rate": 9.03169855241129e-06, |
| "loss": 0.625, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.8444354183590577, |
| "grad_norm": 0.5650371934623263, |
| "learning_rate": 9.02749823618744e-06, |
| "loss": 0.5954, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.8456539398862714, |
| "grad_norm": 0.5010709938981562, |
| "learning_rate": 9.02328981073023e-06, |
| "loss": 0.6071, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.8468724614134849, |
| "grad_norm": 0.5831039880668286, |
| "learning_rate": 9.019073284513184e-06, |
| "loss": 0.5989, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.8480909829406986, |
| "grad_norm": 0.5796544622455602, |
| "learning_rate": 9.014848666026138e-06, |
| "loss": 0.6328, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.8493095044679123, |
| "grad_norm": 0.5898423233515925, |
| "learning_rate": 9.01061596377522e-06, |
| "loss": 0.6316, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.8505280259951259, |
| "grad_norm": 0.576717321636104, |
| "learning_rate": 9.006375186282832e-06, |
| "loss": 0.6129, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8517465475223396, |
| "grad_norm": 0.5274725251295577, |
| "learning_rate": 9.002126342087643e-06, |
| "loss": 0.6103, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.8529650690495532, |
| "grad_norm": 0.5405289062395403, |
| "learning_rate": 8.997869439744555e-06, |
| "loss": 0.6252, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8541835905767668, |
| "grad_norm": 0.5521347732238037, |
| "learning_rate": 8.993604487824701e-06, |
| "loss": 0.6008, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.8554021121039805, |
| "grad_norm": 0.5196724445810474, |
| "learning_rate": 8.989331494915417e-06, |
| "loss": 0.6185, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.8566206336311941, |
| "grad_norm": 0.5683878673891257, |
| "learning_rate": 8.985050469620236e-06, |
| "loss": 0.6245, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.8578391551584078, |
| "grad_norm": 0.5407694973000146, |
| "learning_rate": 8.980761420558855e-06, |
| "loss": 0.6142, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.8590576766856215, |
| "grad_norm": 0.5649995760138024, |
| "learning_rate": 8.976464356367133e-06, |
| "loss": 0.5985, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.8602761982128351, |
| "grad_norm": 0.4922853729727254, |
| "learning_rate": 8.972159285697066e-06, |
| "loss": 0.6128, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.8614947197400488, |
| "grad_norm": 0.5653149236554849, |
| "learning_rate": 8.967846217216771e-06, |
| "loss": 0.6085, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.8627132412672623, |
| "grad_norm": 0.5367471044143063, |
| "learning_rate": 8.963525159610465e-06, |
| "loss": 0.6148, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.863931762794476, |
| "grad_norm": 0.6165337631503633, |
| "learning_rate": 8.959196121578455e-06, |
| "loss": 0.6152, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.8651502843216897, |
| "grad_norm": 0.4805242301641202, |
| "learning_rate": 8.954859111837115e-06, |
| "loss": 0.6012, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8663688058489033, |
| "grad_norm": 0.5673830583367931, |
| "learning_rate": 8.950514139118868e-06, |
| "loss": 0.6137, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.867587327376117, |
| "grad_norm": 0.6116666852593193, |
| "learning_rate": 8.946161212172172e-06, |
| "loss": 0.6067, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8688058489033307, |
| "grad_norm": 0.4787324171983748, |
| "learning_rate": 8.941800339761503e-06, |
| "loss": 0.6229, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.8700243704305443, |
| "grad_norm": 0.5603801815973803, |
| "learning_rate": 8.937431530667329e-06, |
| "loss": 0.6105, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.871242891957758, |
| "grad_norm": 0.5681506397184968, |
| "learning_rate": 8.933054793686102e-06, |
| "loss": 0.6196, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.8724614134849715, |
| "grad_norm": 0.4745590461881841, |
| "learning_rate": 8.928670137630236e-06, |
| "loss": 0.6041, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.8736799350121852, |
| "grad_norm": 0.5290850478804046, |
| "learning_rate": 8.924277571328091e-06, |
| "loss": 0.5968, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.8748984565393989, |
| "grad_norm": 0.4724468577981056, |
| "learning_rate": 8.919877103623949e-06, |
| "loss": 0.5888, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8761169780666125, |
| "grad_norm": 0.4710021425585232, |
| "learning_rate": 8.915468743378009e-06, |
| "loss": 0.6039, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.8773354995938262, |
| "grad_norm": 0.5615817507996624, |
| "learning_rate": 8.911052499466358e-06, |
| "loss": 0.611, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8785540211210398, |
| "grad_norm": 0.5372617716587773, |
| "learning_rate": 8.906628380780951e-06, |
| "loss": 0.5853, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.8797725426482534, |
| "grad_norm": 0.4671881493526463, |
| "learning_rate": 8.902196396229605e-06, |
| "loss": 0.6135, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8809910641754671, |
| "grad_norm": 0.6571538751607443, |
| "learning_rate": 8.897756554735976e-06, |
| "loss": 0.6166, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.8822095857026807, |
| "grad_norm": 0.5407143640334066, |
| "learning_rate": 8.893308865239536e-06, |
| "loss": 0.5946, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8834281072298944, |
| "grad_norm": 0.53845654868447, |
| "learning_rate": 8.888853336695558e-06, |
| "loss": 0.6056, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8846466287571081, |
| "grad_norm": 0.5501103328024185, |
| "learning_rate": 8.884389978075098e-06, |
| "loss": 0.5983, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8858651502843217, |
| "grad_norm": 0.5308109296782529, |
| "learning_rate": 8.879918798364984e-06, |
| "loss": 0.5777, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.8870836718115354, |
| "grad_norm": 0.5017325039220928, |
| "learning_rate": 8.875439806567786e-06, |
| "loss": 0.6045, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8883021933387489, |
| "grad_norm": 0.5901206372277947, |
| "learning_rate": 8.870953011701804e-06, |
| "loss": 0.604, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.8895207148659626, |
| "grad_norm": 0.45439896535640995, |
| "learning_rate": 8.866458422801048e-06, |
| "loss": 0.6073, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8907392363931763, |
| "grad_norm": 0.5577426986098635, |
| "learning_rate": 8.861956048915225e-06, |
| "loss": 0.5915, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.8919577579203899, |
| "grad_norm": 0.6016567936834477, |
| "learning_rate": 8.857445899109716e-06, |
| "loss": 0.6046, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.8931762794476036, |
| "grad_norm": 0.5445868957449489, |
| "learning_rate": 8.852927982465553e-06, |
| "loss": 0.6106, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.8943948009748173, |
| "grad_norm": 0.74687623190731, |
| "learning_rate": 8.848402308079415e-06, |
| "loss": 0.6106, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.8956133225020309, |
| "grad_norm": 0.5720296451679941, |
| "learning_rate": 8.843868885063594e-06, |
| "loss": 0.6051, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.8968318440292445, |
| "grad_norm": 0.6556133763306434, |
| "learning_rate": 8.839327722545985e-06, |
| "loss": 0.6167, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.8980503655564581, |
| "grad_norm": 0.564067584928174, |
| "learning_rate": 8.83477882967007e-06, |
| "loss": 0.5994, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.8992688870836718, |
| "grad_norm": 0.7349456844478599, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.6114, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.9004874086108855, |
| "grad_norm": 0.5690040907358448, |
| "learning_rate": 8.82565788949504e-06, |
| "loss": 0.5881, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.9017059301380991, |
| "grad_norm": 0.6984688918514965, |
| "learning_rate": 8.821085860560633e-06, |
| "loss": 0.5983, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9029244516653128, |
| "grad_norm": 0.5870268436598589, |
| "learning_rate": 8.8165061379973e-06, |
| "loss": 0.6158, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.9041429731925265, |
| "grad_norm": 0.730806962459982, |
| "learning_rate": 8.81191873102616e-06, |
| "loss": 0.6058, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.90536149471974, |
| "grad_norm": 0.5520509944838993, |
| "learning_rate": 8.807323648883802e-06, |
| "loss": 0.6076, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.9065800162469537, |
| "grad_norm": 0.5674479495642151, |
| "learning_rate": 8.80272090082227e-06, |
| "loss": 0.6017, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.9077985377741673, |
| "grad_norm": 0.6471015570221698, |
| "learning_rate": 8.798110496109047e-06, |
| "loss": 0.6114, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.909017059301381, |
| "grad_norm": 0.5077905529540144, |
| "learning_rate": 8.793492444027027e-06, |
| "loss": 0.6086, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.9102355808285947, |
| "grad_norm": 0.5684591151412205, |
| "learning_rate": 8.788866753874504e-06, |
| "loss": 0.5939, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.9114541023558083, |
| "grad_norm": 0.5373473945369368, |
| "learning_rate": 8.784233434965149e-06, |
| "loss": 0.605, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.912672623883022, |
| "grad_norm": 0.4922150085749876, |
| "learning_rate": 8.779592496627998e-06, |
| "loss": 0.6016, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.9138911454102355, |
| "grad_norm": 0.5346368247367626, |
| "learning_rate": 8.774943948207427e-06, |
| "loss": 0.5894, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9151096669374492, |
| "grad_norm": 0.5910293461390073, |
| "learning_rate": 8.770287799063128e-06, |
| "loss": 0.5928, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.9163281884646629, |
| "grad_norm": 0.45941353467858154, |
| "learning_rate": 8.765624058570106e-06, |
| "loss": 0.606, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.9175467099918765, |
| "grad_norm": 0.5187731411231332, |
| "learning_rate": 8.760952736118645e-06, |
| "loss": 0.6128, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.9187652315190902, |
| "grad_norm": 0.5257713049314863, |
| "learning_rate": 8.756273841114297e-06, |
| "loss": 0.5954, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.9199837530463039, |
| "grad_norm": 0.5158216045537021, |
| "learning_rate": 8.751587382977862e-06, |
| "loss": 0.6016, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.9212022745735174, |
| "grad_norm": 0.48265635843326626, |
| "learning_rate": 8.746893371145367e-06, |
| "loss": 0.6023, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.9224207961007311, |
| "grad_norm": 0.56635290896361, |
| "learning_rate": 8.742191815068048e-06, |
| "loss": 0.6168, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.9236393176279447, |
| "grad_norm": 0.5246869149929032, |
| "learning_rate": 8.737482724212331e-06, |
| "loss": 0.6073, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.9248578391551584, |
| "grad_norm": 0.5675558144411569, |
| "learning_rate": 8.732766108059814e-06, |
| "loss": 0.6089, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.9260763606823721, |
| "grad_norm": 0.5373680000020842, |
| "learning_rate": 8.728041976107247e-06, |
| "loss": 0.6229, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9272948822095857, |
| "grad_norm": 0.4781724675355625, |
| "learning_rate": 8.723310337866508e-06, |
| "loss": 0.6109, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.9285134037367994, |
| "grad_norm": 0.5425148864348092, |
| "learning_rate": 8.718571202864598e-06, |
| "loss": 0.6135, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.929731925264013, |
| "grad_norm": 0.5848574183660457, |
| "learning_rate": 8.713824580643606e-06, |
| "loss": 0.5856, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.9309504467912266, |
| "grad_norm": 0.5359644668976268, |
| "learning_rate": 8.709070480760696e-06, |
| "loss": 0.6005, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.9321689683184403, |
| "grad_norm": 0.620026762890768, |
| "learning_rate": 8.70430891278809e-06, |
| "loss": 0.6068, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.9333874898456539, |
| "grad_norm": 0.47117448230839937, |
| "learning_rate": 8.699539886313047e-06, |
| "loss": 0.6252, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.9346060113728676, |
| "grad_norm": 0.5057879313386596, |
| "learning_rate": 8.69476341093784e-06, |
| "loss": 0.6043, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.9358245329000813, |
| "grad_norm": 0.5719864673466165, |
| "learning_rate": 8.689979496279747e-06, |
| "loss": 0.6021, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.9370430544272949, |
| "grad_norm": 0.4550279435061135, |
| "learning_rate": 8.685188151971018e-06, |
| "loss": 0.5903, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.9382615759545085, |
| "grad_norm": 0.5815823584929373, |
| "learning_rate": 8.680389387658866e-06, |
| "loss": 0.5994, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9394800974817222, |
| "grad_norm": 0.5037028317625714, |
| "learning_rate": 8.675583213005443e-06, |
| "loss": 0.619, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.9406986190089358, |
| "grad_norm": 0.5242690261886358, |
| "learning_rate": 8.67076963768782e-06, |
| "loss": 0.6048, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.9419171405361495, |
| "grad_norm": 0.6218367099845817, |
| "learning_rate": 8.66594867139797e-06, |
| "loss": 0.5839, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.9431356620633631, |
| "grad_norm": 0.47012822627564055, |
| "learning_rate": 8.661120323842751e-06, |
| "loss": 0.5901, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.9443541835905768, |
| "grad_norm": 0.5922308137676237, |
| "learning_rate": 8.656284604743877e-06, |
| "loss": 0.5949, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.9455727051177905, |
| "grad_norm": 0.5371260230634575, |
| "learning_rate": 8.651441523837908e-06, |
| "loss": 0.623, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.946791226645004, |
| "grad_norm": 0.5773759686297267, |
| "learning_rate": 8.646591090876225e-06, |
| "loss": 0.6234, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.9480097481722177, |
| "grad_norm": 0.5887590407239388, |
| "learning_rate": 8.641733315625014e-06, |
| "loss": 0.6111, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.9492282696994313, |
| "grad_norm": 0.5226241995561731, |
| "learning_rate": 8.636868207865244e-06, |
| "loss": 0.6206, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.950446791226645, |
| "grad_norm": 0.6014897765265561, |
| "learning_rate": 8.631995777392645e-06, |
| "loss": 0.6098, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.9516653127538587, |
| "grad_norm": 0.4728664792789181, |
| "learning_rate": 8.627116034017697e-06, |
| "loss": 0.6175, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.9528838342810723, |
| "grad_norm": 0.5599521599776955, |
| "learning_rate": 8.622228987565597e-06, |
| "loss": 0.6121, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.954102355808286, |
| "grad_norm": 0.45561297167703785, |
| "learning_rate": 8.61733464787625e-06, |
| "loss": 0.585, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.9553208773354996, |
| "grad_norm": 0.4965712938546266, |
| "learning_rate": 8.612433024804246e-06, |
| "loss": 0.5844, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.9565393988627132, |
| "grad_norm": 0.49923609484853176, |
| "learning_rate": 8.607524128218842e-06, |
| "loss": 0.6056, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.9577579203899269, |
| "grad_norm": 0.5194489854997212, |
| "learning_rate": 8.602607968003935e-06, |
| "loss": 0.6157, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.9589764419171405, |
| "grad_norm": 0.45374807644787585, |
| "learning_rate": 8.597684554058053e-06, |
| "loss": 0.6131, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.9601949634443542, |
| "grad_norm": 0.48980331599376176, |
| "learning_rate": 8.59275389629432e-06, |
| "loss": 0.6277, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.9614134849715679, |
| "grad_norm": 0.512984376262805, |
| "learning_rate": 8.587816004640456e-06, |
| "loss": 0.6079, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.9626320064987814, |
| "grad_norm": 0.46938679490869983, |
| "learning_rate": 8.58287088903874e-06, |
| "loss": 0.6024, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9638505280259951, |
| "grad_norm": 0.5727370279954419, |
| "learning_rate": 8.577918559445994e-06, |
| "loss": 0.6133, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.9650690495532088, |
| "grad_norm": 0.46813355754433694, |
| "learning_rate": 8.572959025833573e-06, |
| "loss": 0.6091, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.9662875710804224, |
| "grad_norm": 0.5352006872401892, |
| "learning_rate": 8.56799229818733e-06, |
| "loss": 0.5926, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.9675060926076361, |
| "grad_norm": 0.5423797070420179, |
| "learning_rate": 8.563018386507607e-06, |
| "loss": 0.6055, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.9687246141348497, |
| "grad_norm": 0.5598760717169532, |
| "learning_rate": 8.558037300809209e-06, |
| "loss": 0.601, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.9699431356620634, |
| "grad_norm": 0.5899307915518814, |
| "learning_rate": 8.553049051121383e-06, |
| "loss": 0.5925, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.971161657189277, |
| "grad_norm": 0.5817700253793735, |
| "learning_rate": 8.548053647487808e-06, |
| "loss": 0.5794, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.9723801787164906, |
| "grad_norm": 0.6684891953193655, |
| "learning_rate": 8.543051099966558e-06, |
| "loss": 0.6158, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.9735987002437043, |
| "grad_norm": 0.6186641627844115, |
| "learning_rate": 8.538041418630099e-06, |
| "loss": 0.6045, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.974817221770918, |
| "grad_norm": 0.5620245115548018, |
| "learning_rate": 8.533024613565256e-06, |
| "loss": 0.6074, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9760357432981316, |
| "grad_norm": 0.5360734619477909, |
| "learning_rate": 8.5280006948732e-06, |
| "loss": 0.5781, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.9772542648253453, |
| "grad_norm": 0.5649861774930516, |
| "learning_rate": 8.522969672669419e-06, |
| "loss": 0.603, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.9784727863525589, |
| "grad_norm": 0.5524388375136041, |
| "learning_rate": 8.517931557083713e-06, |
| "loss": 0.5927, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.9796913078797725, |
| "grad_norm": 0.5048333497363491, |
| "learning_rate": 8.512886358260162e-06, |
| "loss": 0.6218, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.9809098294069862, |
| "grad_norm": 0.5532699810235799, |
| "learning_rate": 8.5078340863571e-06, |
| "loss": 0.5935, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.9821283509341998, |
| "grad_norm": 0.482227106626454, |
| "learning_rate": 8.502774751547108e-06, |
| "loss": 0.5946, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.9833468724614135, |
| "grad_norm": 0.5612628853741157, |
| "learning_rate": 8.49770836401699e-06, |
| "loss": 0.6174, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.9845653939886271, |
| "grad_norm": 0.5165079876207431, |
| "learning_rate": 8.492634933967749e-06, |
| "loss": 0.586, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.9857839155158408, |
| "grad_norm": 0.5243350260461674, |
| "learning_rate": 8.487554471614568e-06, |
| "loss": 0.598, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.9870024370430545, |
| "grad_norm": 0.5334138693548346, |
| "learning_rate": 8.482466987186785e-06, |
| "loss": 0.6156, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.988220958570268, |
| "grad_norm": 0.5183630888601999, |
| "learning_rate": 8.477372490927882e-06, |
| "loss": 0.6043, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.9894394800974817, |
| "grad_norm": 0.5064511107410842, |
| "learning_rate": 8.47227099309546e-06, |
| "loss": 0.618, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.9906580016246954, |
| "grad_norm": 0.502910387382079, |
| "learning_rate": 8.467162503961209e-06, |
| "loss": 0.5921, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.991876523151909, |
| "grad_norm": 0.6360985673189292, |
| "learning_rate": 8.462047033810906e-06, |
| "loss": 0.6196, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.9930950446791227, |
| "grad_norm": 0.48804000994343705, |
| "learning_rate": 8.456924592944377e-06, |
| "loss": 0.5874, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.9943135662063363, |
| "grad_norm": 0.5525784026778128, |
| "learning_rate": 8.451795191675488e-06, |
| "loss": 0.6121, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.99553208773355, |
| "grad_norm": 0.6244758885512404, |
| "learning_rate": 8.446658840332115e-06, |
| "loss": 0.6117, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.9967506092607636, |
| "grad_norm": 0.5125354504575084, |
| "learning_rate": 8.441515549256134e-06, |
| "loss": 0.6029, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.9979691307879772, |
| "grad_norm": 0.48689738688414835, |
| "learning_rate": 8.436365328803386e-06, |
| "loss": 0.6118, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.9991876523151909, |
| "grad_norm": 0.6498259018985348, |
| "learning_rate": 8.43120818934367e-06, |
| "loss": 0.6102, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.0008123476848092, |
| "grad_norm": 0.9638337013283915, |
| "learning_rate": 8.426044141260712e-06, |
| "loss": 0.9573, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.0020308692120228, |
| "grad_norm": 0.49843778312392245, |
| "learning_rate": 8.420873194952153e-06, |
| "loss": 0.5312, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.0032493907392364, |
| "grad_norm": 0.5736142039977695, |
| "learning_rate": 8.415695360829521e-06, |
| "loss": 0.5481, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.00446791226645, |
| "grad_norm": 0.5588125856539439, |
| "learning_rate": 8.410510649318211e-06, |
| "loss": 0.6112, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.0056864337936637, |
| "grad_norm": 0.5238730088532109, |
| "learning_rate": 8.405319070857466e-06, |
| "loss": 0.5738, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.0069049553208773, |
| "grad_norm": 0.5729923093577028, |
| "learning_rate": 8.40012063590036e-06, |
| "loss": 0.563, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.008123476848091, |
| "grad_norm": 0.542308645982848, |
| "learning_rate": 8.394915354913763e-06, |
| "loss": 0.5825, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.0093419983753047, |
| "grad_norm": 0.5635399800755453, |
| "learning_rate": 8.38970323837834e-06, |
| "loss": 0.5596, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.0105605199025183, |
| "grad_norm": 0.5412240812641438, |
| "learning_rate": 8.384484296788509e-06, |
| "loss": 0.583, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.0117790414297319, |
| "grad_norm": 0.4985722523246039, |
| "learning_rate": 8.379258540652438e-06, |
| "loss": 0.5269, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.0129975629569457, |
| "grad_norm": 0.5577073237880519, |
| "learning_rate": 8.37402598049201e-06, |
| "loss": 0.5971, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.0142160844841592, |
| "grad_norm": 0.5397320632233633, |
| "learning_rate": 8.368786626842815e-06, |
| "loss": 0.576, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.0154346060113728, |
| "grad_norm": 0.5446374373068642, |
| "learning_rate": 8.363540490254111e-06, |
| "loss": 0.5604, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.0166531275385866, |
| "grad_norm": 0.5916157265480478, |
| "learning_rate": 8.358287581288824e-06, |
| "loss": 0.5977, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.0178716490658002, |
| "grad_norm": 0.44317757053413465, |
| "learning_rate": 8.353027910523506e-06, |
| "loss": 0.5386, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.0190901705930138, |
| "grad_norm": 0.5306644900080113, |
| "learning_rate": 8.347761488548334e-06, |
| "loss": 0.5685, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.0203086921202273, |
| "grad_norm": 0.554634789156319, |
| "learning_rate": 8.342488325967068e-06, |
| "loss": 0.5906, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.0215272136474411, |
| "grad_norm": 0.46926134132806735, |
| "learning_rate": 8.337208433397051e-06, |
| "loss": 0.5518, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.0227457351746547, |
| "grad_norm": 0.5223237573306092, |
| "learning_rate": 8.331921821469164e-06, |
| "loss": 0.5482, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.0239642567018683, |
| "grad_norm": 0.6456110639127597, |
| "learning_rate": 8.326628500827826e-06, |
| "loss": 0.5533, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.025182778229082, |
| "grad_norm": 0.49817045727119846, |
| "learning_rate": 8.321328482130967e-06, |
| "loss": 0.5828, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.0264012997562957, |
| "grad_norm": 0.6439926526967455, |
| "learning_rate": 8.31602177604999e-06, |
| "loss": 0.5445, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.0276198212835093, |
| "grad_norm": 0.5597217590326287, |
| "learning_rate": 8.310708393269773e-06, |
| "loss": 0.5919, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.028838342810723, |
| "grad_norm": 0.5067484108191753, |
| "learning_rate": 8.305388344488636e-06, |
| "loss": 0.5119, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.0300568643379366, |
| "grad_norm": 0.6138111359383427, |
| "learning_rate": 8.300061640418322e-06, |
| "loss": 0.5819, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.0312753858651502, |
| "grad_norm": 0.5228439245226578, |
| "learning_rate": 8.294728291783967e-06, |
| "loss": 0.5488, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.032493907392364, |
| "grad_norm": 0.5069119735333029, |
| "learning_rate": 8.289388309324094e-06, |
| "loss": 0.5531, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.0337124289195776, |
| "grad_norm": 0.6055259774711721, |
| "learning_rate": 8.284041703790578e-06, |
| "loss": 0.6323, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.0349309504467912, |
| "grad_norm": 0.40577407124920994, |
| "learning_rate": 8.278688485948634e-06, |
| "loss": 0.5171, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.036149471974005, |
| "grad_norm": 0.5480653507617855, |
| "learning_rate": 8.273328666576783e-06, |
| "loss": 0.5708, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.0373679935012186, |
| "grad_norm": 0.5332307457846426, |
| "learning_rate": 8.267962256466845e-06, |
| "loss": 0.5802, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.0385865150284321, |
| "grad_norm": 0.45617231239236866, |
| "learning_rate": 8.262589266423908e-06, |
| "loss": 0.5367, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.0398050365556457, |
| "grad_norm": 0.4487718203264924, |
| "learning_rate": 8.257209707266308e-06, |
| "loss": 0.5412, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.0410235580828595, |
| "grad_norm": 0.49617901681065096, |
| "learning_rate": 8.251823589825608e-06, |
| "loss": 0.582, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.042242079610073, |
| "grad_norm": 0.47465221989539974, |
| "learning_rate": 8.246430924946575e-06, |
| "loss": 0.5377, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.0434606011372867, |
| "grad_norm": 0.4988725203576914, |
| "learning_rate": 8.24103172348716e-06, |
| "loss": 0.6148, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.0446791226645005, |
| "grad_norm": 0.4769299659284957, |
| "learning_rate": 8.235625996318475e-06, |
| "loss": 0.5376, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.045897644191714, |
| "grad_norm": 0.5418879737499556, |
| "learning_rate": 8.230213754324773e-06, |
| "loss": 0.5688, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.0471161657189276, |
| "grad_norm": 0.4361367720716124, |
| "learning_rate": 8.22479500840342e-06, |
| "loss": 0.5337, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.0483346872461414, |
| "grad_norm": 0.5323815827851344, |
| "learning_rate": 8.219369769464883e-06, |
| "loss": 0.6055, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.049553208773355, |
| "grad_norm": 0.5879673529136081, |
| "learning_rate": 8.213938048432697e-06, |
| "loss": 0.5415, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.0507717303005686, |
| "grad_norm": 0.4684259408064238, |
| "learning_rate": 8.208499856243453e-06, |
| "loss": 0.5515, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.0519902518277824, |
| "grad_norm": 0.5196995774290054, |
| "learning_rate": 8.20305520384677e-06, |
| "loss": 0.5934, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.053208773354996, |
| "grad_norm": 0.555821404956024, |
| "learning_rate": 8.19760410220527e-06, |
| "loss": 0.5608, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.0544272948822095, |
| "grad_norm": 0.49067810902195214, |
| "learning_rate": 8.19214656229457e-06, |
| "loss": 0.5338, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.0556458164094233, |
| "grad_norm": 0.5035110725818862, |
| "learning_rate": 8.186682595103241e-06, |
| "loss": 0.579, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.056864337936637, |
| "grad_norm": 0.5005979772843533, |
| "learning_rate": 8.1812122116328e-06, |
| "loss": 0.5824, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.0580828594638505, |
| "grad_norm": 0.5504829458164456, |
| "learning_rate": 8.175735422897682e-06, |
| "loss": 0.5574, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.059301380991064, |
| "grad_norm": 0.5207101568397476, |
| "learning_rate": 8.170252239925215e-06, |
| "loss": 0.5894, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.0605199025182779, |
| "grad_norm": 0.41793216877614997, |
| "learning_rate": 8.16476267375561e-06, |
| "loss": 0.509, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.0617384240454915, |
| "grad_norm": 0.5270083025323902, |
| "learning_rate": 8.159266735441922e-06, |
| "loss": 0.584, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.062956945572705, |
| "grad_norm": 0.4966922910229618, |
| "learning_rate": 8.15376443605004e-06, |
| "loss": 0.5269, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.0641754670999188, |
| "grad_norm": 0.4961677071135526, |
| "learning_rate": 8.148255786658661e-06, |
| "loss": 0.6035, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.0653939886271324, |
| "grad_norm": 0.4946533201405728, |
| "learning_rate": 8.142740798359268e-06, |
| "loss": 0.5932, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.066612510154346, |
| "grad_norm": 0.49312465250267673, |
| "learning_rate": 8.137219482256102e-06, |
| "loss": 0.5337, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.0678310316815598, |
| "grad_norm": 0.5074238436289318, |
| "learning_rate": 8.131691849466154e-06, |
| "loss": 0.5536, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.0690495532087734, |
| "grad_norm": 0.5179722934326702, |
| "learning_rate": 8.126157911119124e-06, |
| "loss": 0.5781, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.070268074735987, |
| "grad_norm": 0.42106727984073683, |
| "learning_rate": 8.120617678357415e-06, |
| "loss": 0.5364, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.0714865962632008, |
| "grad_norm": 0.5619541047984238, |
| "learning_rate": 8.115071162336099e-06, |
| "loss": 0.6302, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.0727051177904143, |
| "grad_norm": 0.48218497269212, |
| "learning_rate": 8.109518374222902e-06, |
| "loss": 0.5081, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.073923639317628, |
| "grad_norm": 0.5288776434466912, |
| "learning_rate": 8.103959325198178e-06, |
| "loss": 0.6161, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.0751421608448415, |
| "grad_norm": 0.4396305550189922, |
| "learning_rate": 8.098394026454886e-06, |
| "loss": 0.5269, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.0763606823720553, |
| "grad_norm": 0.5705187563085431, |
| "learning_rate": 8.09282248919857e-06, |
| "loss": 0.5918, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.0775792038992689, |
| "grad_norm": 0.5173394574008403, |
| "learning_rate": 8.087244724647333e-06, |
| "loss": 0.55, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.0787977254264824, |
| "grad_norm": 0.5259195540857357, |
| "learning_rate": 8.081660744031818e-06, |
| "loss": 0.5587, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.0800162469536962, |
| "grad_norm": 0.5013768900277689, |
| "learning_rate": 8.076070558595188e-06, |
| "loss": 0.5847, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.0812347684809098, |
| "grad_norm": 0.5113716323758455, |
| "learning_rate": 8.070474179593088e-06, |
| "loss": 0.5841, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.0824532900081234, |
| "grad_norm": 0.4304893769830929, |
| "learning_rate": 8.064871618293647e-06, |
| "loss": 0.474, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.0836718115353372, |
| "grad_norm": 0.5581590870053381, |
| "learning_rate": 8.05926288597743e-06, |
| "loss": 0.5883, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.0848903330625508, |
| "grad_norm": 0.5966885478295298, |
| "learning_rate": 8.053647993937436e-06, |
| "loss": 0.6114, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0861088545897644, |
| "grad_norm": 0.45798182910038504, |
| "learning_rate": 8.048026953479062e-06, |
| "loss": 0.5349, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.0873273761169782, |
| "grad_norm": 0.5977190234288519, |
| "learning_rate": 8.042399775920084e-06, |
| "loss": 0.5822, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.0885458976441917, |
| "grad_norm": 0.5579549068887683, |
| "learning_rate": 8.036766472590636e-06, |
| "loss": 0.5892, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.0897644191714053, |
| "grad_norm": 0.5035624965150097, |
| "learning_rate": 8.031127054833192e-06, |
| "loss": 0.5278, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.090982940698619, |
| "grad_norm": 0.569184764093924, |
| "learning_rate": 8.025481534002524e-06, |
| "loss": 0.5904, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.0922014622258327, |
| "grad_norm": 0.47339482033152885, |
| "learning_rate": 8.019829921465703e-06, |
| "loss": 0.5598, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.0934199837530463, |
| "grad_norm": 0.4510131001279952, |
| "learning_rate": 8.014172228602063e-06, |
| "loss": 0.5218, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.0946385052802599, |
| "grad_norm": 0.5778676271124781, |
| "learning_rate": 8.00850846680318e-06, |
| "loss": 0.6047, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.0958570268074737, |
| "grad_norm": 0.437095810398411, |
| "learning_rate": 8.002838647472848e-06, |
| "loss": 0.5497, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.0970755483346872, |
| "grad_norm": 0.5562520913467127, |
| "learning_rate": 7.997162782027061e-06, |
| "loss": 0.5555, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0982940698619008, |
| "grad_norm": 0.49447252137766545, |
| "learning_rate": 7.991480881893982e-06, |
| "loss": 0.5282, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.0995125913891146, |
| "grad_norm": 0.5223776301957348, |
| "learning_rate": 7.985792958513932e-06, |
| "loss": 0.5936, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.1007311129163282, |
| "grad_norm": 0.43743454592876513, |
| "learning_rate": 7.98009902333935e-06, |
| "loss": 0.5209, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.1019496344435418, |
| "grad_norm": 0.48630293369462313, |
| "learning_rate": 7.974399087834786e-06, |
| "loss": 0.5629, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.1031681559707556, |
| "grad_norm": 0.4518898797022784, |
| "learning_rate": 7.968693163476872e-06, |
| "loss": 0.5469, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.1043866774979691, |
| "grad_norm": 0.5599257334925746, |
| "learning_rate": 7.962981261754295e-06, |
| "loss": 0.6093, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.1056051990251827, |
| "grad_norm": 0.508379851023288, |
| "learning_rate": 7.957263394167778e-06, |
| "loss": 0.5502, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.1068237205523965, |
| "grad_norm": 0.46905549399423435, |
| "learning_rate": 7.951539572230058e-06, |
| "loss": 0.5498, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.10804224207961, |
| "grad_norm": 0.5331570716206057, |
| "learning_rate": 7.945809807465857e-06, |
| "loss": 0.5936, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.1092607636068237, |
| "grad_norm": 0.43447287523932976, |
| "learning_rate": 7.940074111411869e-06, |
| "loss": 0.5205, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.1104792851340373, |
| "grad_norm": 0.4675250634574423, |
| "learning_rate": 7.934332495616723e-06, |
| "loss": 0.5921, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.111697806661251, |
| "grad_norm": 0.5710382430607513, |
| "learning_rate": 7.928584971640974e-06, |
| "loss": 0.5528, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.1129163281884646, |
| "grad_norm": 0.43616129376419555, |
| "learning_rate": 7.922831551057068e-06, |
| "loss": 0.5304, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.1141348497156782, |
| "grad_norm": 0.4931780007557348, |
| "learning_rate": 7.917072245449327e-06, |
| "loss": 0.5667, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.115353371242892, |
| "grad_norm": 0.46266355232192513, |
| "learning_rate": 7.91130706641392e-06, |
| "loss": 0.557, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.1165718927701056, |
| "grad_norm": 0.4769121004651534, |
| "learning_rate": 7.90553602555884e-06, |
| "loss": 0.5761, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.1177904142973192, |
| "grad_norm": 0.4543130942521957, |
| "learning_rate": 7.899759134503888e-06, |
| "loss": 0.5667, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.119008935824533, |
| "grad_norm": 0.4622820207175306, |
| "learning_rate": 7.893976404880643e-06, |
| "loss": 0.5217, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.1202274573517466, |
| "grad_norm": 0.45946359941638926, |
| "learning_rate": 7.888187848332434e-06, |
| "loss": 0.552, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.1214459788789601, |
| "grad_norm": 0.5221530283186372, |
| "learning_rate": 7.88239347651433e-06, |
| "loss": 0.6037, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.122664500406174, |
| "grad_norm": 0.490304437758209, |
| "learning_rate": 7.876593301093104e-06, |
| "loss": 0.5435, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.1238830219333875, |
| "grad_norm": 0.5353872887084351, |
| "learning_rate": 7.870787333747216e-06, |
| "loss": 0.5465, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.125101543460601, |
| "grad_norm": 0.5305459219097892, |
| "learning_rate": 7.864975586166788e-06, |
| "loss": 0.5401, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.126320064987815, |
| "grad_norm": 0.4522121891276298, |
| "learning_rate": 7.859158070053578e-06, |
| "loss": 0.56, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.1275385865150285, |
| "grad_norm": 0.5400674612069138, |
| "learning_rate": 7.853334797120961e-06, |
| "loss": 0.5938, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.128757108042242, |
| "grad_norm": 0.4735679351556697, |
| "learning_rate": 7.847505779093906e-06, |
| "loss": 0.5517, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.1299756295694556, |
| "grad_norm": 0.48850903658646466, |
| "learning_rate": 7.841671027708945e-06, |
| "loss": 0.5805, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.1311941510966694, |
| "grad_norm": 0.4465079826503964, |
| "learning_rate": 7.835830554714153e-06, |
| "loss": 0.5332, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.132412672623883, |
| "grad_norm": 0.5630070888376983, |
| "learning_rate": 7.82998437186913e-06, |
| "loss": 0.5744, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.1336311941510966, |
| "grad_norm": 0.4850227941162986, |
| "learning_rate": 7.824132490944968e-06, |
| "loss": 0.5284, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.1348497156783104, |
| "grad_norm": 0.5473017535296978, |
| "learning_rate": 7.818274923724237e-06, |
| "loss": 0.5853, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.136068237205524, |
| "grad_norm": 0.6180360857968815, |
| "learning_rate": 7.81241168200095e-06, |
| "loss": 0.6005, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.1372867587327375, |
| "grad_norm": 0.606221772548701, |
| "learning_rate": 7.80654277758055e-06, |
| "loss": 0.5534, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.1385052802599513, |
| "grad_norm": 0.4683974906247182, |
| "learning_rate": 7.80066822227988e-06, |
| "loss": 0.5557, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.139723801787165, |
| "grad_norm": 0.5733918926578689, |
| "learning_rate": 7.794788027927165e-06, |
| "loss": 0.5617, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.1409423233143785, |
| "grad_norm": 0.5394769205967501, |
| "learning_rate": 7.788902206361974e-06, |
| "loss": 0.5949, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.1421608448415923, |
| "grad_norm": 0.4616046919338432, |
| "learning_rate": 7.783010769435216e-06, |
| "loss": 0.5173, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.1433793663688059, |
| "grad_norm": 0.5796955213884182, |
| "learning_rate": 7.7771137290091e-06, |
| "loss": 0.5924, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.1445978878960195, |
| "grad_norm": 0.5847720129488866, |
| "learning_rate": 7.771211096957125e-06, |
| "loss": 0.5562, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.145816409423233, |
| "grad_norm": 0.5171314095714995, |
| "learning_rate": 7.765302885164038e-06, |
| "loss": 0.5548, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.1470349309504468, |
| "grad_norm": 0.49901458608547633, |
| "learning_rate": 7.759389105525832e-06, |
| "loss": 0.5725, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.1482534524776604, |
| "grad_norm": 0.5352472484551857, |
| "learning_rate": 7.753469769949701e-06, |
| "loss": 0.5582, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.149471974004874, |
| "grad_norm": 0.6669984026812862, |
| "learning_rate": 7.747544890354031e-06, |
| "loss": 0.6313, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.1506904955320878, |
| "grad_norm": 0.4640017618478166, |
| "learning_rate": 7.74161447866837e-06, |
| "loss": 0.5275, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.1519090170593014, |
| "grad_norm": 0.5032260303359475, |
| "learning_rate": 7.735678546833403e-06, |
| "loss": 0.5405, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.153127538586515, |
| "grad_norm": 0.545384651096698, |
| "learning_rate": 7.729737106800932e-06, |
| "loss": 0.5856, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.1543460601137288, |
| "grad_norm": 0.5735240939112272, |
| "learning_rate": 7.723790170533848e-06, |
| "loss": 0.571, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.1555645816409423, |
| "grad_norm": 0.4552234746793405, |
| "learning_rate": 7.717837750006106e-06, |
| "loss": 0.5067, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.156783103168156, |
| "grad_norm": 0.49406048197174507, |
| "learning_rate": 7.71187985720271e-06, |
| "loss": 0.592, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.1580016246953697, |
| "grad_norm": 0.5489847996831881, |
| "learning_rate": 7.705916504119679e-06, |
| "loss": 0.5716, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.1592201462225833, |
| "grad_norm": 0.48074624532511123, |
| "learning_rate": 7.699947702764021e-06, |
| "loss": 0.5287, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.1604386677497969, |
| "grad_norm": 0.4833115004977427, |
| "learning_rate": 7.693973465153724e-06, |
| "loss": 0.5667, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.1616571892770104, |
| "grad_norm": 0.5472052571967937, |
| "learning_rate": 7.68799380331771e-06, |
| "loss": 0.5806, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.1628757108042242, |
| "grad_norm": 0.4381241429842595, |
| "learning_rate": 7.682008729295834e-06, |
| "loss": 0.5448, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.1640942323314378, |
| "grad_norm": 0.6129536550799662, |
| "learning_rate": 7.676018255138841e-06, |
| "loss": 0.6091, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.1653127538586514, |
| "grad_norm": 0.524234969513479, |
| "learning_rate": 7.67002239290835e-06, |
| "loss": 0.5363, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.1665312753858652, |
| "grad_norm": 0.43755065750263256, |
| "learning_rate": 7.664021154676828e-06, |
| "loss": 0.5683, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.1677497969130788, |
| "grad_norm": 0.4767439220213808, |
| "learning_rate": 7.658014552527572e-06, |
| "loss": 0.5201, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.1689683184402924, |
| "grad_norm": 0.6051473086713034, |
| "learning_rate": 7.652002598554675e-06, |
| "loss": 0.6148, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.1701868399675062, |
| "grad_norm": 0.442810424258257, |
| "learning_rate": 7.645985304863004e-06, |
| "loss": 0.5089, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.1714053614947197, |
| "grad_norm": 0.5212534237408961, |
| "learning_rate": 7.639962683568178e-06, |
| "loss": 0.6398, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.1726238830219333, |
| "grad_norm": 0.4782128214916858, |
| "learning_rate": 7.633934746796545e-06, |
| "loss": 0.5247, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.1738424045491471, |
| "grad_norm": 0.555997733569589, |
| "learning_rate": 7.627901506685157e-06, |
| "loss": 0.57, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.1750609260763607, |
| "grad_norm": 0.4524690440478936, |
| "learning_rate": 7.621862975381739e-06, |
| "loss": 0.5032, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.1762794476035743, |
| "grad_norm": 0.5558207018565952, |
| "learning_rate": 7.615819165044671e-06, |
| "loss": 0.6055, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.1774979691307879, |
| "grad_norm": 0.5285401986639633, |
| "learning_rate": 7.609770087842969e-06, |
| "loss": 0.5232, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.1787164906580017, |
| "grad_norm": 0.4906926197877719, |
| "learning_rate": 7.603715755956243e-06, |
| "loss": 0.6184, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.1799350121852152, |
| "grad_norm": 0.5453800647325697, |
| "learning_rate": 7.597656181574691e-06, |
| "loss": 0.5449, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.181153533712429, |
| "grad_norm": 0.532023507332386, |
| "learning_rate": 7.5915913768990615e-06, |
| "loss": 0.574, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.1823720552396426, |
| "grad_norm": 0.46068002444123424, |
| "learning_rate": 7.585521354140638e-06, |
| "loss": 0.5616, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.1835905767668562, |
| "grad_norm": 0.45366600351939207, |
| "learning_rate": 7.57944612552121e-06, |
| "loss": 0.5576, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.1848090982940698, |
| "grad_norm": 0.5035963241142227, |
| "learning_rate": 7.573365703273045e-06, |
| "loss": 0.5842, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.1860276198212836, |
| "grad_norm": 0.46429524269523453, |
| "learning_rate": 7.567280099638874e-06, |
| "loss": 0.5603, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.1872461413484972, |
| "grad_norm": 0.4391995658392802, |
| "learning_rate": 7.561189326871854e-06, |
| "loss": 0.5483, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.1884646628757107, |
| "grad_norm": 0.5688078918566764, |
| "learning_rate": 7.555093397235553e-06, |
| "loss": 0.6145, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.1896831844029245, |
| "grad_norm": 0.4535069143341333, |
| "learning_rate": 7.548992323003923e-06, |
| "loss": 0.529, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.190901705930138, |
| "grad_norm": 0.5610828923463264, |
| "learning_rate": 7.542886116461272e-06, |
| "loss": 0.5604, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.1921202274573517, |
| "grad_norm": 0.49771566362561265, |
| "learning_rate": 7.536774789902246e-06, |
| "loss": 0.5339, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.1933387489845655, |
| "grad_norm": 0.5055933911391732, |
| "learning_rate": 7.530658355631795e-06, |
| "loss": 0.5307, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.194557270511779, |
| "grad_norm": 0.5075577294535538, |
| "learning_rate": 7.524536825965154e-06, |
| "loss": 0.5604, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.1957757920389926, |
| "grad_norm": 0.5520230309503728, |
| "learning_rate": 7.518410213227823e-06, |
| "loss": 0.6162, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.1969943135662064, |
| "grad_norm": 0.5218152039597276, |
| "learning_rate": 7.512278529755529e-06, |
| "loss": 0.5613, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.19821283509342, |
| "grad_norm": 0.4971095496314555, |
| "learning_rate": 7.506141787894214e-06, |
| "loss": 0.5643, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.1994313566206336, |
| "grad_norm": 0.5351931771239321, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.5365, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.2006498781478472, |
| "grad_norm": 0.49713221603010127, |
| "learning_rate": 7.493853178439177e-06, |
| "loss": 0.5276, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.201868399675061, |
| "grad_norm": 0.49687942243856253, |
| "learning_rate": 7.48770133558816e-06, |
| "loss": 0.5705, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.2030869212022746, |
| "grad_norm": 0.4638420387813551, |
| "learning_rate": 7.481544483833485e-06, |
| "loss": 0.5143, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.2043054427294881, |
| "grad_norm": 0.5737984880330318, |
| "learning_rate": 7.475382635571761e-06, |
| "loss": 0.6105, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.205523964256702, |
| "grad_norm": 0.4548720894167483, |
| "learning_rate": 7.4692158032096706e-06, |
| "loss": 0.5409, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.2067424857839155, |
| "grad_norm": 0.49711497244164915, |
| "learning_rate": 7.463043999163919e-06, |
| "loss": 0.5803, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.207961007311129, |
| "grad_norm": 0.47268020267724503, |
| "learning_rate": 7.456867235861231e-06, |
| "loss": 0.563, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.209179528838343, |
| "grad_norm": 0.4431695796449243, |
| "learning_rate": 7.450685525738315e-06, |
| "loss": 0.5458, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.2103980503655565, |
| "grad_norm": 0.5514220959709781, |
| "learning_rate": 7.444498881241835e-06, |
| "loss": 0.5719, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.21161657189277, |
| "grad_norm": 0.48730651156910637, |
| "learning_rate": 7.4383073148283945e-06, |
| "loss": 0.5547, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.2128350934199839, |
| "grad_norm": 0.48026701020561735, |
| "learning_rate": 7.432110838964508e-06, |
| "loss": 0.5446, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.2140536149471974, |
| "grad_norm": 0.49526550877005804, |
| "learning_rate": 7.4259094661265685e-06, |
| "loss": 0.5539, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.215272136474411, |
| "grad_norm": 0.5033075517007225, |
| "learning_rate": 7.419703208800839e-06, |
| "loss": 0.5885, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.2164906580016246, |
| "grad_norm": 0.4591330610679407, |
| "learning_rate": 7.413492079483405e-06, |
| "loss": 0.4958, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.2177091795288384, |
| "grad_norm": 0.5435516527726211, |
| "learning_rate": 7.407276090680173e-06, |
| "loss": 0.5941, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.218927701056052, |
| "grad_norm": 0.5014818934661753, |
| "learning_rate": 7.401055254906829e-06, |
| "loss": 0.5674, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.2201462225832655, |
| "grad_norm": 0.5506374382220622, |
| "learning_rate": 7.394829584688816e-06, |
| "loss": 0.5623, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.2213647441104794, |
| "grad_norm": 0.47988582460651985, |
| "learning_rate": 7.388599092561315e-06, |
| "loss": 0.579, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.222583265637693, |
| "grad_norm": 0.5116646928435937, |
| "learning_rate": 7.382363791069214e-06, |
| "loss": 0.5789, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.2238017871649065, |
| "grad_norm": 0.5815639981335669, |
| "learning_rate": 7.376123692767084e-06, |
| "loss": 0.5306, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.2250203086921203, |
| "grad_norm": 0.47545875532554605, |
| "learning_rate": 7.369878810219154e-06, |
| "loss": 0.574, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.2262388302193339, |
| "grad_norm": 0.5843762256050973, |
| "learning_rate": 7.363629155999289e-06, |
| "loss": 0.5835, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.2274573517465475, |
| "grad_norm": 0.49038029629420044, |
| "learning_rate": 7.357374742690956e-06, |
| "loss": 0.5277, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.2286758732737613, |
| "grad_norm": 0.4825203440227731, |
| "learning_rate": 7.351115582887212e-06, |
| "loss": 0.5749, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.2298943948009748, |
| "grad_norm": 0.5230621508499962, |
| "learning_rate": 7.344851689190662e-06, |
| "loss": 0.5494, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.2311129163281884, |
| "grad_norm": 0.49942387299855917, |
| "learning_rate": 7.33858307421345e-06, |
| "loss": 0.5684, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.232331437855402, |
| "grad_norm": 0.5550781071831415, |
| "learning_rate": 7.3323097505772225e-06, |
| "loss": 0.5552, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.2335499593826158, |
| "grad_norm": 0.5160851429477965, |
| "learning_rate": 7.326031730913107e-06, |
| "loss": 0.5365, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.2347684809098294, |
| "grad_norm": 0.5594132080926748, |
| "learning_rate": 7.319749027861687e-06, |
| "loss": 0.5805, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.235987002437043, |
| "grad_norm": 0.5035664881102385, |
| "learning_rate": 7.313461654072974e-06, |
| "loss": 0.5572, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.2372055239642568, |
| "grad_norm": 0.5011647298301126, |
| "learning_rate": 7.3071696222063874e-06, |
| "loss": 0.5736, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.2384240454914703, |
| "grad_norm": 0.5003447796526637, |
| "learning_rate": 7.300872944930724e-06, |
| "loss": 0.5724, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.239642567018684, |
| "grad_norm": 0.4488541730554654, |
| "learning_rate": 7.2945716349241305e-06, |
| "loss": 0.5271, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.2408610885458977, |
| "grad_norm": 0.48397897498100484, |
| "learning_rate": 7.288265704874089e-06, |
| "loss": 0.5702, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.2420796100731113, |
| "grad_norm": 0.46076984680494393, |
| "learning_rate": 7.281955167477372e-06, |
| "loss": 0.5235, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.2432981316003249, |
| "grad_norm": 0.46851694123351845, |
| "learning_rate": 7.2756400354400445e-06, |
| "loss": 0.5237, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.2445166531275387, |
| "grad_norm": 0.48677378118465786, |
| "learning_rate": 7.2693203214774084e-06, |
| "loss": 0.6109, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.2457351746547523, |
| "grad_norm": 0.4780766187805638, |
| "learning_rate": 7.262996038314001e-06, |
| "loss": 0.5765, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.2469536961819658, |
| "grad_norm": 0.4640167779478858, |
| "learning_rate": 7.2566671986835515e-06, |
| "loss": 0.5642, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.2481722177091794, |
| "grad_norm": 0.48778459720464146, |
| "learning_rate": 7.25033381532897e-06, |
| "loss": 0.4946, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.2493907392363932, |
| "grad_norm": 0.4659728876017271, |
| "learning_rate": 7.243995901002312e-06, |
| "loss": 0.5638, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.2506092607636068, |
| "grad_norm": 0.4038916973792116, |
| "learning_rate": 7.237653468464756e-06, |
| "loss": 0.5607, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.2518277822908206, |
| "grad_norm": 0.5567339438269147, |
| "learning_rate": 7.231306530486579e-06, |
| "loss": 0.5561, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.2530463038180342, |
| "grad_norm": 0.4641852200663108, |
| "learning_rate": 7.224955099847129e-06, |
| "loss": 0.6096, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.2542648253452477, |
| "grad_norm": 0.4411515265169084, |
| "learning_rate": 7.218599189334799e-06, |
| "loss": 0.4709, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.2554833468724613, |
| "grad_norm": 0.5058133934757223, |
| "learning_rate": 7.212238811747003e-06, |
| "loss": 0.5904, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.2567018683996751, |
| "grad_norm": 0.41291563737696013, |
| "learning_rate": 7.205873979890151e-06, |
| "loss": 0.5436, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.2579203899268887, |
| "grad_norm": 0.4994662597356207, |
| "learning_rate": 7.199504706579617e-06, |
| "loss": 0.6102, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.2591389114541023, |
| "grad_norm": 0.419031706073167, |
| "learning_rate": 7.193131004639722e-06, |
| "loss": 0.5104, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.260357432981316, |
| "grad_norm": 0.4373098819276125, |
| "learning_rate": 7.186752886903702e-06, |
| "loss": 0.5539, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.2615759545085297, |
| "grad_norm": 0.42312469752099624, |
| "learning_rate": 7.180370366213684e-06, |
| "loss": 0.5685, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.2627944760357432, |
| "grad_norm": 0.4976440200214435, |
| "learning_rate": 7.173983455420659e-06, |
| "loss": 0.5886, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.2640129975629568, |
| "grad_norm": 0.4458571719063019, |
| "learning_rate": 7.167592167384461e-06, |
| "loss": 0.5481, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.2652315190901706, |
| "grad_norm": 0.5011046191959967, |
| "learning_rate": 7.161196514973735e-06, |
| "loss": 0.591, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.2664500406173842, |
| "grad_norm": 0.49133842958144974, |
| "learning_rate": 7.154796511065914e-06, |
| "loss": 0.5523, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.267668562144598, |
| "grad_norm": 0.47022131838731085, |
| "learning_rate": 7.148392168547191e-06, |
| "loss": 0.5736, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.2688870836718116, |
| "grad_norm": 0.41386960779050597, |
| "learning_rate": 7.141983500312498e-06, |
| "loss": 0.5529, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.2701056051990252, |
| "grad_norm": 0.44977069875020453, |
| "learning_rate": 7.135570519265473e-06, |
| "loss": 0.548, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.2713241267262387, |
| "grad_norm": 0.505607270978524, |
| "learning_rate": 7.129153238318441e-06, |
| "loss": 0.5685, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.2725426482534525, |
| "grad_norm": 0.4473291490790123, |
| "learning_rate": 7.122731670392381e-06, |
| "loss": 0.5914, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.2737611697806661, |
| "grad_norm": 0.42761462653683685, |
| "learning_rate": 7.116305828416907e-06, |
| "loss": 0.5596, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.2749796913078797, |
| "grad_norm": 0.5367569602527996, |
| "learning_rate": 7.109875725330239e-06, |
| "loss": 0.5705, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.2761982128350935, |
| "grad_norm": 0.4239534982631823, |
| "learning_rate": 7.1034413740791705e-06, |
| "loss": 0.4988, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.277416734362307, |
| "grad_norm": 0.5193109373280052, |
| "learning_rate": 7.097002787619059e-06, |
| "loss": 0.5812, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.2786352558895206, |
| "grad_norm": 0.5147411712979314, |
| "learning_rate": 7.090559978913781e-06, |
| "loss": 0.5916, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.2798537774167342, |
| "grad_norm": 0.4224143215053458, |
| "learning_rate": 7.0841129609357165e-06, |
| "loss": 0.4905, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.281072298943948, |
| "grad_norm": 0.47217055541643876, |
| "learning_rate": 7.0776617466657196e-06, |
| "loss": 0.5592, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.2822908204711616, |
| "grad_norm": 0.4826081486423026, |
| "learning_rate": 7.071206349093097e-06, |
| "loss": 0.5822, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.2835093419983754, |
| "grad_norm": 0.42489592319050484, |
| "learning_rate": 7.064746781215578e-06, |
| "loss": 0.539, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.284727863525589, |
| "grad_norm": 0.4378036437269882, |
| "learning_rate": 7.058283056039283e-06, |
| "loss": 0.5224, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.2859463850528026, |
| "grad_norm": 0.5090205584091956, |
| "learning_rate": 7.051815186578711e-06, |
| "loss": 0.6022, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.2871649065800161, |
| "grad_norm": 0.421460820182392, |
| "learning_rate": 7.045343185856701e-06, |
| "loss": 0.5371, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.28838342810723, |
| "grad_norm": 0.45568572401745694, |
| "learning_rate": 7.038867066904407e-06, |
| "loss": 0.5549, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.2896019496344435, |
| "grad_norm": 0.4249363344861208, |
| "learning_rate": 7.032386842761282e-06, |
| "loss": 0.5434, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.2908204711616573, |
| "grad_norm": 0.4562034562178344, |
| "learning_rate": 7.025902526475039e-06, |
| "loss": 0.5494, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.292038992688871, |
| "grad_norm": 0.5341880271433396, |
| "learning_rate": 7.0194141311016336e-06, |
| "loss": 0.613, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.2932575142160845, |
| "grad_norm": 0.4504428137448532, |
| "learning_rate": 7.0129216697052345e-06, |
| "loss": 0.5016, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.294476035743298, |
| "grad_norm": 0.48710310604219204, |
| "learning_rate": 7.006425155358195e-06, |
| "loss": 0.5966, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.2956945572705119, |
| "grad_norm": 0.4178638324054384, |
| "learning_rate": 6.99992460114103e-06, |
| "loss": 0.518, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.2969130787977254, |
| "grad_norm": 0.4592904842250764, |
| "learning_rate": 6.993420020142389e-06, |
| "loss": 0.5731, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.298131600324939, |
| "grad_norm": 0.44542709276757847, |
| "learning_rate": 6.986911425459028e-06, |
| "loss": 0.5713, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.2993501218521528, |
| "grad_norm": 0.43271038208431817, |
| "learning_rate": 6.980398830195785e-06, |
| "loss": 0.5394, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.3005686433793664, |
| "grad_norm": 0.42858083262689106, |
| "learning_rate": 6.9738822474655555e-06, |
| "loss": 0.5593, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.30178716490658, |
| "grad_norm": 0.45958843226910784, |
| "learning_rate": 6.967361690389258e-06, |
| "loss": 0.6054, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.3030056864337936, |
| "grad_norm": 0.4289960695158536, |
| "learning_rate": 6.960837172095822e-06, |
| "loss": 0.5548, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.3042242079610074, |
| "grad_norm": 0.47468738466334404, |
| "learning_rate": 6.954308705722142e-06, |
| "loss": 0.572, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.305442729488221, |
| "grad_norm": 0.47013938140744177, |
| "learning_rate": 6.947776304413072e-06, |
| "loss": 0.5705, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.3066612510154347, |
| "grad_norm": 0.42486037624655837, |
| "learning_rate": 6.941239981321379e-06, |
| "loss": 0.5541, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.3078797725426483, |
| "grad_norm": 0.49246997027712336, |
| "learning_rate": 6.9346997496077365e-06, |
| "loss": 0.5955, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.309098294069862, |
| "grad_norm": 0.4472253157123058, |
| "learning_rate": 6.92815562244068e-06, |
| "loss": 0.5347, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.3103168155970755, |
| "grad_norm": 0.4795845777067209, |
| "learning_rate": 6.921607612996591e-06, |
| "loss": 0.544, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.3115353371242893, |
| "grad_norm": 0.4858592748082412, |
| "learning_rate": 6.915055734459669e-06, |
| "loss": 0.5825, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.3127538586515028, |
| "grad_norm": 0.440529958757846, |
| "learning_rate": 6.908500000021905e-06, |
| "loss": 0.4894, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.3139723801787164, |
| "grad_norm": 0.49777302193386763, |
| "learning_rate": 6.9019404228830465e-06, |
| "loss": 0.6143, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.3151909017059302, |
| "grad_norm": 0.42490987305110145, |
| "learning_rate": 6.895377016250589e-06, |
| "loss": 0.5383, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.3164094232331438, |
| "grad_norm": 0.4250241686101231, |
| "learning_rate": 6.888809793339729e-06, |
| "loss": 0.5343, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.3176279447603574, |
| "grad_norm": 0.47908573640303, |
| "learning_rate": 6.882238767373352e-06, |
| "loss": 0.5766, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.318846466287571, |
| "grad_norm": 0.4393923199378749, |
| "learning_rate": 6.875663951582e-06, |
| "loss": 0.518, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.3200649878147848, |
| "grad_norm": 0.5298761025962999, |
| "learning_rate": 6.869085359203844e-06, |
| "loss": 0.5687, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.3212835093419983, |
| "grad_norm": 0.4742825873608696, |
| "learning_rate": 6.862503003484662e-06, |
| "loss": 0.5804, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.3225020308692121, |
| "grad_norm": 0.4633225475847929, |
| "learning_rate": 6.855916897677806e-06, |
| "loss": 0.556, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.3237205523964257, |
| "grad_norm": 0.5225783981999679, |
| "learning_rate": 6.849327055044182e-06, |
| "loss": 0.5814, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.3249390739236393, |
| "grad_norm": 0.4288152429153542, |
| "learning_rate": 6.842733488852218e-06, |
| "loss": 0.5576, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.3261575954508529, |
| "grad_norm": 0.5221719185878941, |
| "learning_rate": 6.836136212377839e-06, |
| "loss": 0.5535, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.3273761169780667, |
| "grad_norm": 0.5296939222461858, |
| "learning_rate": 6.82953523890444e-06, |
| "loss": 0.5367, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.3285946385052803, |
| "grad_norm": 0.4975997883807605, |
| "learning_rate": 6.822930581722864e-06, |
| "loss": 0.5888, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.3298131600324938, |
| "grad_norm": 0.5680495533922292, |
| "learning_rate": 6.8163222541313646e-06, |
| "loss": 0.5797, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.3310316815597076, |
| "grad_norm": 0.4587905010305772, |
| "learning_rate": 6.80971026943559e-06, |
| "loss": 0.5202, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.3322502030869212, |
| "grad_norm": 0.551574996506335, |
| "learning_rate": 6.803094640948553e-06, |
| "loss": 0.5777, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.3334687246141348, |
| "grad_norm": 0.5703735684360373, |
| "learning_rate": 6.796475381990598e-06, |
| "loss": 0.5764, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.3346872461413484, |
| "grad_norm": 0.4925036270565778, |
| "learning_rate": 6.789852505889384e-06, |
| "loss": 0.528, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.3359057676685622, |
| "grad_norm": 0.47585637004253845, |
| "learning_rate": 6.78322602597985e-06, |
| "loss": 0.5379, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.3371242891957758, |
| "grad_norm": 0.5098349120934949, |
| "learning_rate": 6.776595955604192e-06, |
| "loss": 0.5564, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.3383428107229896, |
| "grad_norm": 0.45580609051116194, |
| "learning_rate": 6.769962308111839e-06, |
| "loss": 0.5753, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.3395613322502031, |
| "grad_norm": 0.5171674920493432, |
| "learning_rate": 6.7633250968594145e-06, |
| "loss": 0.5949, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.3407798537774167, |
| "grad_norm": 0.4877120256762604, |
| "learning_rate": 6.756684335210724e-06, |
| "loss": 0.515, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.3419983753046303, |
| "grad_norm": 0.4814845112101113, |
| "learning_rate": 6.750040036536718e-06, |
| "loss": 0.5684, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.343216896831844, |
| "grad_norm": 0.5705372014720597, |
| "learning_rate": 6.743392214215473e-06, |
| "loss": 0.6171, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.3444354183590577, |
| "grad_norm": 0.41955386315882853, |
| "learning_rate": 6.736740881632156e-06, |
| "loss": 0.5509, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.3456539398862712, |
| "grad_norm": 0.5028763983027598, |
| "learning_rate": 6.7300860521790034e-06, |
| "loss": 0.5519, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.346872461413485, |
| "grad_norm": 0.4751712922206779, |
| "learning_rate": 6.723427739255291e-06, |
| "loss": 0.5871, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.3480909829406986, |
| "grad_norm": 0.44250278427343415, |
| "learning_rate": 6.716765956267313e-06, |
| "loss": 0.5563, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.3493095044679122, |
| "grad_norm": 0.42447760271061347, |
| "learning_rate": 6.710100716628345e-06, |
| "loss": 0.5246, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.3505280259951258, |
| "grad_norm": 0.4884332973463199, |
| "learning_rate": 6.7034320337586236e-06, |
| "loss": 0.5906, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.3517465475223396, |
| "grad_norm": 0.47868995347975324, |
| "learning_rate": 6.696759921085321e-06, |
| "loss": 0.56, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.3529650690495532, |
| "grad_norm": 0.4891201962969247, |
| "learning_rate": 6.690084392042514e-06, |
| "loss": 0.5387, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.354183590576767, |
| "grad_norm": 0.4799279297524152, |
| "learning_rate": 6.683405460071158e-06, |
| "loss": 0.5584, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.3554021121039805, |
| "grad_norm": 0.468730614409241, |
| "learning_rate": 6.676723138619056e-06, |
| "loss": 0.5639, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.3566206336311941, |
| "grad_norm": 0.4691028535874034, |
| "learning_rate": 6.670037441140844e-06, |
| "loss": 0.5249, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.3578391551584077, |
| "grad_norm": 0.5055139224683095, |
| "learning_rate": 6.663348381097949e-06, |
| "loss": 0.5668, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.3590576766856215, |
| "grad_norm": 0.4641835440622289, |
| "learning_rate": 6.656655971958569e-06, |
| "loss": 0.5168, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.360276198212835, |
| "grad_norm": 0.5446202821644559, |
| "learning_rate": 6.649960227197648e-06, |
| "loss": 0.613, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.3614947197400489, |
| "grad_norm": 0.4947120887114883, |
| "learning_rate": 6.6432611602968445e-06, |
| "loss": 0.5567, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.3627132412672625, |
| "grad_norm": 0.43139439093199355, |
| "learning_rate": 6.636558784744507e-06, |
| "loss": 0.5242, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.363931762794476, |
| "grad_norm": 0.5614131203778131, |
| "learning_rate": 6.629853114035643e-06, |
| "loss": 0.5333, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.3651502843216896, |
| "grad_norm": 0.47984259019139797, |
| "learning_rate": 6.623144161671899e-06, |
| "loss": 0.6073, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.3663688058489034, |
| "grad_norm": 0.48772092734746075, |
| "learning_rate": 6.616431941161525e-06, |
| "loss": 0.519, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.367587327376117, |
| "grad_norm": 0.4944650047147664, |
| "learning_rate": 6.609716466019356e-06, |
| "loss": 0.5982, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.3688058489033306, |
| "grad_norm": 0.4514730750801606, |
| "learning_rate": 6.602997749766773e-06, |
| "loss": 0.5215, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.3700243704305444, |
| "grad_norm": 0.4806270554361702, |
| "learning_rate": 6.596275805931691e-06, |
| "loss": 0.6507, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.371242891957758, |
| "grad_norm": 0.42879599863826967, |
| "learning_rate": 6.589550648048517e-06, |
| "loss": 0.5263, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.3724614134849715, |
| "grad_norm": 0.5002076010149914, |
| "learning_rate": 6.582822289658134e-06, |
| "loss": 0.544, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.373679935012185, |
| "grad_norm": 0.49996651647577767, |
| "learning_rate": 6.576090744307866e-06, |
| "loss": 0.6115, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.374898456539399, |
| "grad_norm": 0.47192523752862847, |
| "learning_rate": 6.569356025551454e-06, |
| "loss": 0.5044, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.3761169780666125, |
| "grad_norm": 0.5486850848812702, |
| "learning_rate": 6.562618146949033e-06, |
| "loss": 0.5963, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.3773354995938263, |
| "grad_norm": 0.44516782959863155, |
| "learning_rate": 6.5558771220670935e-06, |
| "loss": 0.5424, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.3785540211210399, |
| "grad_norm": 0.49271550503516953, |
| "learning_rate": 6.5491329644784655e-06, |
| "loss": 0.5241, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.3797725426482534, |
| "grad_norm": 0.5660845065509308, |
| "learning_rate": 6.542385687762287e-06, |
| "loss": 0.6154, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.380991064175467, |
| "grad_norm": 0.4271740206518289, |
| "learning_rate": 6.53563530550397e-06, |
| "loss": 0.4689, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.3822095857026808, |
| "grad_norm": 0.5195908868358481, |
| "learning_rate": 6.5288818312951886e-06, |
| "loss": 0.5462, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.3834281072298944, |
| "grad_norm": 0.5034196032593611, |
| "learning_rate": 6.5221252787338365e-06, |
| "loss": 0.587, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.384646628757108, |
| "grad_norm": 0.5196583715973591, |
| "learning_rate": 6.515365661424007e-06, |
| "loss": 0.577, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.3858651502843218, |
| "grad_norm": 0.47148796040432117, |
| "learning_rate": 6.508602992975963e-06, |
| "loss": 0.5516, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.3870836718115354, |
| "grad_norm": 0.47240263639853314, |
| "learning_rate": 6.501837287006112e-06, |
| "loss": 0.5017, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.388302193338749, |
| "grad_norm": 0.4848195827079731, |
| "learning_rate": 6.495068557136979e-06, |
| "loss": 0.6068, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.3895207148659625, |
| "grad_norm": 0.464916968432065, |
| "learning_rate": 6.4882968169971734e-06, |
| "loss": 0.5114, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.3907392363931763, |
| "grad_norm": 0.4672169290921844, |
| "learning_rate": 6.4815220802213705e-06, |
| "loss": 0.571, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.39195775792039, |
| "grad_norm": 0.45354629086847004, |
| "learning_rate": 6.474744360450274e-06, |
| "loss": 0.559, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.3931762794476037, |
| "grad_norm": 0.49697460412752753, |
| "learning_rate": 6.467963671330602e-06, |
| "loss": 0.5712, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.3943948009748173, |
| "grad_norm": 0.42597106705666193, |
| "learning_rate": 6.461180026515038e-06, |
| "loss": 0.4836, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.3956133225020309, |
| "grad_norm": 0.5696838757187256, |
| "learning_rate": 6.45439343966223e-06, |
| "loss": 0.6293, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.3968318440292444, |
| "grad_norm": 0.44015111009766694, |
| "learning_rate": 6.447603924436744e-06, |
| "loss": 0.5672, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.3980503655564582, |
| "grad_norm": 0.5171923824405892, |
| "learning_rate": 6.44081149450904e-06, |
| "loss": 0.543, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.3992688870836718, |
| "grad_norm": 0.4861104307146921, |
| "learning_rate": 6.434016163555452e-06, |
| "loss": 0.5536, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.4004874086108854, |
| "grad_norm": 0.4672428316707098, |
| "learning_rate": 6.4272179452581505e-06, |
| "loss": 0.5513, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.4017059301380992, |
| "grad_norm": 0.5041699642923018, |
| "learning_rate": 6.42041685330512e-06, |
| "loss": 0.5579, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.4029244516653128, |
| "grad_norm": 0.5689659183656529, |
| "learning_rate": 6.413612901390136e-06, |
| "loss": 0.5171, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.4041429731925263, |
| "grad_norm": 0.4852858521398993, |
| "learning_rate": 6.406806103212725e-06, |
| "loss": 0.619, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.40536149471974, |
| "grad_norm": 0.5193110473839417, |
| "learning_rate": 6.39999647247815e-06, |
| "loss": 0.549, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.4065800162469537, |
| "grad_norm": 0.4769214566829931, |
| "learning_rate": 6.393184022897375e-06, |
| "loss": 0.526, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.4077985377741673, |
| "grad_norm": 0.4334565428180597, |
| "learning_rate": 6.38636876818704e-06, |
| "loss": 0.5511, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.409017059301381, |
| "grad_norm": 0.7019468412425452, |
| "learning_rate": 6.3795507220694335e-06, |
| "loss": 0.6058, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.4102355808285947, |
| "grad_norm": 0.4559746858499104, |
| "learning_rate": 6.372729898272463e-06, |
| "loss": 0.5623, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.4114541023558083, |
| "grad_norm": 0.4873072450531043, |
| "learning_rate": 6.365906310529631e-06, |
| "loss": 0.526, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.4126726238830218, |
| "grad_norm": 0.4930877948197653, |
| "learning_rate": 6.359079972580001e-06, |
| "loss": 0.5417, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.4138911454102356, |
| "grad_norm": 0.4585436187425684, |
| "learning_rate": 6.352250898168181e-06, |
| "loss": 0.5558, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.4151096669374492, |
| "grad_norm": 0.4843507493218003, |
| "learning_rate": 6.345419101044281e-06, |
| "loss": 0.6178, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.4163281884646628, |
| "grad_norm": 0.4103238225687281, |
| "learning_rate": 6.338584594963898e-06, |
| "loss": 0.486, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.4175467099918766, |
| "grad_norm": 0.4339709319145015, |
| "learning_rate": 6.3317473936880814e-06, |
| "loss": 0.5516, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.4187652315190902, |
| "grad_norm": 0.5006035176222503, |
| "learning_rate": 6.32490751098331e-06, |
| "loss": 0.5893, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.4199837530463038, |
| "grad_norm": 0.43944118536778887, |
| "learning_rate": 6.318064960621456e-06, |
| "loss": 0.554, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.4212022745735173, |
| "grad_norm": 0.4205988668702698, |
| "learning_rate": 6.31121975637977e-06, |
| "loss": 0.5705, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.4224207961007311, |
| "grad_norm": 0.42492946208091176, |
| "learning_rate": 6.30437191204084e-06, |
| "loss": 0.5382, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.4236393176279447, |
| "grad_norm": 0.4782081072972405, |
| "learning_rate": 6.297521441392572e-06, |
| "loss": 0.6081, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.4248578391551585, |
| "grad_norm": 0.4056428801301219, |
| "learning_rate": 6.290668358228162e-06, |
| "loss": 0.5448, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.426076360682372, |
| "grad_norm": 0.4346131086300656, |
| "learning_rate": 6.2838126763460635e-06, |
| "loss": 0.5339, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.4272948822095857, |
| "grad_norm": 0.4104447709327887, |
| "learning_rate": 6.276954409549963e-06, |
| "loss": 0.5399, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.4285134037367992, |
| "grad_norm": 0.46444896204069186, |
| "learning_rate": 6.270093571648752e-06, |
| "loss": 0.5941, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.429731925264013, |
| "grad_norm": 0.4451786529645794, |
| "learning_rate": 6.263230176456497e-06, |
| "loss": 0.5384, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.4309504467912266, |
| "grad_norm": 0.47981749578622157, |
| "learning_rate": 6.256364237792419e-06, |
| "loss": 0.5765, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.4321689683184404, |
| "grad_norm": 0.4367054717344673, |
| "learning_rate": 6.249495769480856e-06, |
| "loss": 0.5124, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.433387489845654, |
| "grad_norm": 0.42899069684022384, |
| "learning_rate": 6.2426247853512355e-06, |
| "loss": 0.5524, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.4346060113728676, |
| "grad_norm": 0.4904917718170387, |
| "learning_rate": 6.23575129923806e-06, |
| "loss": 0.5613, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.4358245329000812, |
| "grad_norm": 0.7624825038153906, |
| "learning_rate": 6.228875324980862e-06, |
| "loss": 0.5469, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.437043054427295, |
| "grad_norm": 0.48032758007828885, |
| "learning_rate": 6.221996876424186e-06, |
| "loss": 0.6088, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.4382615759545085, |
| "grad_norm": 0.4261676949483954, |
| "learning_rate": 6.21511596741756e-06, |
| "loss": 0.5269, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.4394800974817221, |
| "grad_norm": 0.44938704101588606, |
| "learning_rate": 6.208232611815463e-06, |
| "loss": 0.5497, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.440698619008936, |
| "grad_norm": 0.47843420481431187, |
| "learning_rate": 6.2013468234773034e-06, |
| "loss": 0.5673, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.4419171405361495, |
| "grad_norm": 0.4143118724051908, |
| "learning_rate": 6.194458616267388e-06, |
| "loss": 0.5561, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.443135662063363, |
| "grad_norm": 0.4687400706518928, |
| "learning_rate": 6.187568004054888e-06, |
| "loss": 0.5599, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.4443541835905767, |
| "grad_norm": 0.43117586360472987, |
| "learning_rate": 6.180675000713825e-06, |
| "loss": 0.5579, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.4455727051177905, |
| "grad_norm": 0.4677168526332838, |
| "learning_rate": 6.173779620123028e-06, |
| "loss": 0.5377, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.446791226645004, |
| "grad_norm": 0.4684613773900322, |
| "learning_rate": 6.166881876166119e-06, |
| "loss": 0.5505, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.4480097481722178, |
| "grad_norm": 0.45099302981330264, |
| "learning_rate": 6.1599817827314744e-06, |
| "loss": 0.5349, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.4492282696994314, |
| "grad_norm": 0.44725643758516653, |
| "learning_rate": 6.153079353712201e-06, |
| "loss": 0.5445, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.450446791226645, |
| "grad_norm": 0.509822041445509, |
| "learning_rate": 6.14617460300611e-06, |
| "loss": 0.6048, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.4516653127538586, |
| "grad_norm": 0.48251767820083963, |
| "learning_rate": 6.139267544515689e-06, |
| "loss": 0.5214, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.4528838342810724, |
| "grad_norm": 0.462469865969966, |
| "learning_rate": 6.132358192148065e-06, |
| "loss": 0.5628, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.454102355808286, |
| "grad_norm": 0.42592720114566546, |
| "learning_rate": 6.125446559814994e-06, |
| "loss": 0.4844, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.4553208773354995, |
| "grad_norm": 0.49275532261036237, |
| "learning_rate": 6.118532661432812e-06, |
| "loss": 0.5944, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.4565393988627133, |
| "grad_norm": 0.4649906751266784, |
| "learning_rate": 6.111616510922426e-06, |
| "loss": 0.5493, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.457757920389927, |
| "grad_norm": 0.46291320623399196, |
| "learning_rate": 6.104698122209274e-06, |
| "loss": 0.5172, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.4589764419171405, |
| "grad_norm": 0.5426739834419568, |
| "learning_rate": 6.097777509223299e-06, |
| "loss": 0.5666, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.460194963444354, |
| "grad_norm": 0.45093365966871296, |
| "learning_rate": 6.090854685898928e-06, |
| "loss": 0.5357, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.4614134849715679, |
| "grad_norm": 0.46357917186858966, |
| "learning_rate": 6.083929666175031e-06, |
| "loss": 0.5102, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.4626320064987814, |
| "grad_norm": 0.42735860218881255, |
| "learning_rate": 6.077002463994908e-06, |
| "loss": 0.5353, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.4638505280259952, |
| "grad_norm": 0.48773472737225, |
| "learning_rate": 6.070073093306246e-06, |
| "loss": 0.5969, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.4650690495532088, |
| "grad_norm": 0.45583834308371346, |
| "learning_rate": 6.063141568061104e-06, |
| "loss": 0.5501, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.4662875710804224, |
| "grad_norm": 0.48230795906015783, |
| "learning_rate": 6.056207902215874e-06, |
| "loss": 0.5943, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.467506092607636, |
| "grad_norm": 0.48530024356797447, |
| "learning_rate": 6.049272109731266e-06, |
| "loss": 0.535, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.4687246141348498, |
| "grad_norm": 0.39847364405399893, |
| "learning_rate": 6.042334204572261e-06, |
| "loss": 0.5088, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.4699431356620634, |
| "grad_norm": 0.4192802944065179, |
| "learning_rate": 6.035394200708104e-06, |
| "loss": 0.5541, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.471161657189277, |
| "grad_norm": 0.5095459416726968, |
| "learning_rate": 6.02845211211226e-06, |
| "loss": 0.6044, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.4723801787164907, |
| "grad_norm": 0.4834365213328995, |
| "learning_rate": 6.021507952762392e-06, |
| "loss": 0.5698, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.4735987002437043, |
| "grad_norm": 0.43629510532697163, |
| "learning_rate": 6.014561736640334e-06, |
| "loss": 0.536, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.474817221770918, |
| "grad_norm": 0.469188019208721, |
| "learning_rate": 6.007613477732061e-06, |
| "loss": 0.5495, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.4760357432981315, |
| "grad_norm": 0.4901471440756352, |
| "learning_rate": 6.000663190027658e-06, |
| "loss": 0.5661, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.4772542648253453, |
| "grad_norm": 0.4686562631964871, |
| "learning_rate": 5.993710887521302e-06, |
| "loss": 0.5812, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.4784727863525589, |
| "grad_norm": 0.48734085024012297, |
| "learning_rate": 5.986756584211217e-06, |
| "loss": 0.5335, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.4796913078797727, |
| "grad_norm": 0.5326878131009583, |
| "learning_rate": 5.979800294099666e-06, |
| "loss": 0.5689, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.4809098294069862, |
| "grad_norm": 0.4253596342133157, |
| "learning_rate": 5.972842031192901e-06, |
| "loss": 0.5265, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.4821283509341998, |
| "grad_norm": 0.4985627825685433, |
| "learning_rate": 5.965881809501158e-06, |
| "loss": 0.5632, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.4833468724614134, |
| "grad_norm": 0.45204140138324095, |
| "learning_rate": 5.958919643038609e-06, |
| "loss": 0.5569, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.4845653939886272, |
| "grad_norm": 0.4483567522219748, |
| "learning_rate": 5.951955545823342e-06, |
| "loss": 0.5731, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.4857839155158408, |
| "grad_norm": 0.4426846776302582, |
| "learning_rate": 5.944989531877337e-06, |
| "loss": 0.528, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.4870024370430543, |
| "grad_norm": 0.44399265576382146, |
| "learning_rate": 5.938021615226431e-06, |
| "loss": 0.5489, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.4882209585702681, |
| "grad_norm": 0.4581264239244066, |
| "learning_rate": 5.93105180990029e-06, |
| "loss": 0.5794, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.4894394800974817, |
| "grad_norm": 0.4198932355319627, |
| "learning_rate": 5.924080129932386e-06, |
| "loss": 0.5179, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.4906580016246953, |
| "grad_norm": 0.47789225286091797, |
| "learning_rate": 5.9171065893599625e-06, |
| "loss": 0.5638, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.4918765231519089, |
| "grad_norm": 0.4321558944637844, |
| "learning_rate": 5.910131202224011e-06, |
| "loss": 0.5057, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.4930950446791227, |
| "grad_norm": 0.4450307808888705, |
| "learning_rate": 5.903153982569243e-06, |
| "loss": 0.5421, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.4943135662063363, |
| "grad_norm": 0.5226652256866916, |
| "learning_rate": 5.8961749444440555e-06, |
| "loss": 0.576, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.49553208773355, |
| "grad_norm": 0.42973467257152986, |
| "learning_rate": 5.8891941019005095e-06, |
| "loss": 0.6013, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.4967506092607636, |
| "grad_norm": 0.40820805235816976, |
| "learning_rate": 5.882211468994299e-06, |
| "loss": 0.5175, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.4979691307879772, |
| "grad_norm": 0.4801201178398426, |
| "learning_rate": 5.87522705978472e-06, |
| "loss": 0.5833, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.4991876523151908, |
| "grad_norm": 0.45266942815985767, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.5284, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.5004061738424046, |
| "grad_norm": 0.4353303644229792, |
| "learning_rate": 5.8612529687105156e-06, |
| "loss": 0.526, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.5016246953696182, |
| "grad_norm": 0.48827760996687436, |
| "learning_rate": 5.854263314982252e-06, |
| "loss": 0.5955, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.502843216896832, |
| "grad_norm": 0.4255509928321056, |
| "learning_rate": 5.847271941223301e-06, |
| "loss": 0.5442, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.5040617384240456, |
| "grad_norm": 0.43808076740492513, |
| "learning_rate": 5.840278861510555e-06, |
| "loss": 0.5433, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.5052802599512591, |
| "grad_norm": 0.4760892817675488, |
| "learning_rate": 5.83328408992435e-06, |
| "loss": 0.5702, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.5064987814784727, |
| "grad_norm": 0.4481600783932247, |
| "learning_rate": 5.826287640548425e-06, |
| "loss": 0.5946, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.5077173030056863, |
| "grad_norm": 0.42699536589107157, |
| "learning_rate": 5.819289527469897e-06, |
| "loss": 0.5642, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.5089358245329, |
| "grad_norm": 0.4633211620631564, |
| "learning_rate": 5.812289764779232e-06, |
| "loss": 0.4845, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.510154346060114, |
| "grad_norm": 0.4571770115639661, |
| "learning_rate": 5.80528836657022e-06, |
| "loss": 0.5513, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.5113728675873275, |
| "grad_norm": 0.4701604947751102, |
| "learning_rate": 5.798285346939942e-06, |
| "loss": 0.559, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.512591389114541, |
| "grad_norm": 0.4865903026982408, |
| "learning_rate": 5.791280719988747e-06, |
| "loss": 0.5878, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.5138099106417546, |
| "grad_norm": 0.4412282841651163, |
| "learning_rate": 5.784274499820214e-06, |
| "loss": 0.5197, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.5150284321689682, |
| "grad_norm": 0.5747443137859876, |
| "learning_rate": 5.777266700541134e-06, |
| "loss": 0.6011, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.516246953696182, |
| "grad_norm": 0.4564303892112499, |
| "learning_rate": 5.770257336261482e-06, |
| "loss": 0.5279, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.5174654752233956, |
| "grad_norm": 0.45997367471162187, |
| "learning_rate": 5.763246421094373e-06, |
| "loss": 0.5255, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.5186839967506094, |
| "grad_norm": 0.4695480650402549, |
| "learning_rate": 5.7562339691560556e-06, |
| "loss": 0.5885, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.519902518277823, |
| "grad_norm": 0.5356612979245375, |
| "learning_rate": 5.749219994565863e-06, |
| "loss": 0.5569, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.5211210398050365, |
| "grad_norm": 0.5813954013182587, |
| "learning_rate": 5.742204511446203e-06, |
| "loss": 0.5544, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.5223395613322501, |
| "grad_norm": 0.43618938610834346, |
| "learning_rate": 5.7351875339225164e-06, |
| "loss": 0.5374, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.5235580828594637, |
| "grad_norm": 0.4937073666394837, |
| "learning_rate": 5.7281690761232515e-06, |
| "loss": 0.5162, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.5247766043866775, |
| "grad_norm": 0.4780704238400619, |
| "learning_rate": 5.72114915217984e-06, |
| "loss": 0.542, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.5259951259138913, |
| "grad_norm": 0.458787226662822, |
| "learning_rate": 5.714127776226667e-06, |
| "loss": 0.5708, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.5272136474411049, |
| "grad_norm": 0.4727970564003603, |
| "learning_rate": 5.707104962401034e-06, |
| "loss": 0.5678, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.5284321689683185, |
| "grad_norm": 0.42019947987975415, |
| "learning_rate": 5.7000807248431466e-06, |
| "loss": 0.4449, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.529650690495532, |
| "grad_norm": 0.5313576192243948, |
| "learning_rate": 5.693055077696069e-06, |
| "loss": 0.62, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.5308692120227456, |
| "grad_norm": 0.4133150947222481, |
| "learning_rate": 5.686028035105711e-06, |
| "loss": 0.5446, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.5320877335499594, |
| "grad_norm": 0.5182558216138413, |
| "learning_rate": 5.6789996112207865e-06, |
| "loss": 0.5589, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.533306255077173, |
| "grad_norm": 0.5235310986043601, |
| "learning_rate": 5.671969820192794e-06, |
| "loss": 0.5516, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.5345247766043868, |
| "grad_norm": 0.43543733715186, |
| "learning_rate": 5.664938676175982e-06, |
| "loss": 0.5463, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.5357432981316004, |
| "grad_norm": 0.542523163223611, |
| "learning_rate": 5.657906193327325e-06, |
| "loss": 0.5289, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.536961819658814, |
| "grad_norm": 0.6705586961902954, |
| "learning_rate": 5.650872385806492e-06, |
| "loss": 0.6, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.5381803411860275, |
| "grad_norm": 0.4252405119039053, |
| "learning_rate": 5.64383726777582e-06, |
| "loss": 0.5558, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.5393988627132411, |
| "grad_norm": 0.5168792668343379, |
| "learning_rate": 5.636800853400285e-06, |
| "loss": 0.5427, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.540617384240455, |
| "grad_norm": 0.56808607878734, |
| "learning_rate": 5.6297631568474705e-06, |
| "loss": 0.5785, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.5418359057676687, |
| "grad_norm": 0.4194312889852155, |
| "learning_rate": 5.622724192287548e-06, |
| "loss": 0.5061, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.5430544272948823, |
| "grad_norm": 0.46739113422443607, |
| "learning_rate": 5.615683973893235e-06, |
| "loss": 0.5543, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.5442729488220959, |
| "grad_norm": 0.4711436329274137, |
| "learning_rate": 5.608642515839777e-06, |
| "loss": 0.5468, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.5454914703493094, |
| "grad_norm": 0.45925976085714865, |
| "learning_rate": 5.601599832304915e-06, |
| "loss": 0.5533, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.546709991876523, |
| "grad_norm": 0.4629430310984532, |
| "learning_rate": 5.594555937468856e-06, |
| "loss": 0.6238, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.5479285134037368, |
| "grad_norm": 0.4430875583116207, |
| "learning_rate": 5.587510845514249e-06, |
| "loss": 0.5334, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.5491470349309504, |
| "grad_norm": 0.4964005647402626, |
| "learning_rate": 5.5804645706261515e-06, |
| "loss": 0.5563, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.5503655564581642, |
| "grad_norm": 0.47908339446690995, |
| "learning_rate": 5.573417126992004e-06, |
| "loss": 0.5761, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.5515840779853778, |
| "grad_norm": 0.4320719099995596, |
| "learning_rate": 5.5663685288015955e-06, |
| "loss": 0.5519, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.5528025995125914, |
| "grad_norm": 0.45893470814872167, |
| "learning_rate": 5.5593187902470465e-06, |
| "loss": 0.5122, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.554021121039805, |
| "grad_norm": 0.47949830848407404, |
| "learning_rate": 5.55226792552277e-06, |
| "loss": 0.5839, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.5552396425670185, |
| "grad_norm": 0.415731009852519, |
| "learning_rate": 5.545215948825447e-06, |
| "loss": 0.5378, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.5564581640942323, |
| "grad_norm": 0.466056698108541, |
| "learning_rate": 5.538162874353994e-06, |
| "loss": 0.4983, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.5576766856214461, |
| "grad_norm": 0.5916240577351891, |
| "learning_rate": 5.5311087163095475e-06, |
| "loss": 0.6251, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.5588952071486597, |
| "grad_norm": 0.44367509738450317, |
| "learning_rate": 5.524053488895413e-06, |
| "loss": 0.5488, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.5601137286758733, |
| "grad_norm": 0.47062048808194906, |
| "learning_rate": 5.516997206317061e-06, |
| "loss": 0.5563, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.5613322502030869, |
| "grad_norm": 0.5420478722656378, |
| "learning_rate": 5.509939882782077e-06, |
| "loss": 0.5416, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.5625507717303004, |
| "grad_norm": 0.5222284367927739, |
| "learning_rate": 5.502881532500149e-06, |
| "loss": 0.5965, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.5637692932575142, |
| "grad_norm": 0.42208342526415327, |
| "learning_rate": 5.49582216968303e-06, |
| "loss": 0.5467, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.5649878147847278, |
| "grad_norm": 0.4294650898913376, |
| "learning_rate": 5.4887618085445094e-06, |
| "loss": 0.5287, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.5662063363119416, |
| "grad_norm": 0.46855647055671784, |
| "learning_rate": 5.48170046330039e-06, |
| "loss": 0.5628, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.5674248578391552, |
| "grad_norm": 0.4699651333558714, |
| "learning_rate": 5.474638148168456e-06, |
| "loss": 0.5574, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.5686433793663688, |
| "grad_norm": 0.5135379339848296, |
| "learning_rate": 5.467574877368441e-06, |
| "loss": 0.547, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.5698619008935824, |
| "grad_norm": 0.4810680839376017, |
| "learning_rate": 5.460510665122007e-06, |
| "loss": 0.557, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.5710804224207962, |
| "grad_norm": 0.4098166771088161, |
| "learning_rate": 5.453445525652711e-06, |
| "loss": 0.5418, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.5722989439480097, |
| "grad_norm": 0.450215288957951, |
| "learning_rate": 5.446379473185972e-06, |
| "loss": 0.5357, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.5735174654752235, |
| "grad_norm": 0.5294521431799539, |
| "learning_rate": 5.4393125219490536e-06, |
| "loss": 0.5643, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.574735987002437, |
| "grad_norm": 0.4592328236388863, |
| "learning_rate": 5.432244686171025e-06, |
| "loss": 0.5579, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.5759545085296507, |
| "grad_norm": 0.43283051916010107, |
| "learning_rate": 5.42517598008274e-06, |
| "loss": 0.5045, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.5771730300568643, |
| "grad_norm": 0.5659434705667795, |
| "learning_rate": 5.418106417916799e-06, |
| "loss": 0.6214, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.5783915515840778, |
| "grad_norm": 0.43767902318474483, |
| "learning_rate": 5.411036013907534e-06, |
| "loss": 0.4785, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.5796100731112916, |
| "grad_norm": 0.49107247160929135, |
| "learning_rate": 5.403964782290962e-06, |
| "loss": 0.6033, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.5808285946385054, |
| "grad_norm": 0.4941184832970728, |
| "learning_rate": 5.396892737304779e-06, |
| "loss": 0.5625, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.582047116165719, |
| "grad_norm": 0.45207210705440176, |
| "learning_rate": 5.389819893188304e-06, |
| "loss": 0.5955, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.5832656376929326, |
| "grad_norm": 0.41624551022025036, |
| "learning_rate": 5.38274626418248e-06, |
| "loss": 0.4859, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.5844841592201462, |
| "grad_norm": 0.5355211526596017, |
| "learning_rate": 5.375671864529817e-06, |
| "loss": 0.5847, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.5857026807473598, |
| "grad_norm": 0.4975201469339488, |
| "learning_rate": 5.368596708474388e-06, |
| "loss": 0.5338, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.5869212022745736, |
| "grad_norm": 0.4863357216575736, |
| "learning_rate": 5.361520810261779e-06, |
| "loss": 0.5535, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.5881397238017871, |
| "grad_norm": 0.4458515473467672, |
| "learning_rate": 5.354444184139077e-06, |
| "loss": 0.5457, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.589358245329001, |
| "grad_norm": 0.4614906452198629, |
| "learning_rate": 5.347366844354833e-06, |
| "loss": 0.5398, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.5905767668562145, |
| "grad_norm": 0.4685010422012627, |
| "learning_rate": 5.340288805159037e-06, |
| "loss": 0.5407, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.591795288383428, |
| "grad_norm": 0.48804182586096323, |
| "learning_rate": 5.33321008080308e-06, |
| "loss": 0.547, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.5930138099106417, |
| "grad_norm": 0.44694564705893386, |
| "learning_rate": 5.3261306855397395e-06, |
| "loss": 0.5459, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.5942323314378553, |
| "grad_norm": 0.4139859944920655, |
| "learning_rate": 5.319050633623141e-06, |
| "loss": 0.5519, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.595450852965069, |
| "grad_norm": 0.5097755056565069, |
| "learning_rate": 5.311969939308736e-06, |
| "loss": 0.5901, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.5966693744922829, |
| "grad_norm": 0.47592489399723925, |
| "learning_rate": 5.304888616853265e-06, |
| "loss": 0.5324, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.5978878960194964, |
| "grad_norm": 0.4276883892776071, |
| "learning_rate": 5.297806680514731e-06, |
| "loss": 0.5106, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.59910641754671, |
| "grad_norm": 0.4681244968477927, |
| "learning_rate": 5.290724144552379e-06, |
| "loss": 0.6054, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.6003249390739236, |
| "grad_norm": 0.4896701927637777, |
| "learning_rate": 5.283641023226661e-06, |
| "loss": 0.5455, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.6015434606011372, |
| "grad_norm": 0.4245053792739156, |
| "learning_rate": 5.276557330799203e-06, |
| "loss": 0.5471, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.602761982128351, |
| "grad_norm": 0.4874649206218259, |
| "learning_rate": 5.269473081532785e-06, |
| "loss": 0.5782, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.6039805036555645, |
| "grad_norm": 0.47549962008011226, |
| "learning_rate": 5.262388289691303e-06, |
| "loss": 0.575, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.6051990251827783, |
| "grad_norm": 0.42642213924678707, |
| "learning_rate": 5.255302969539753e-06, |
| "loss": 0.5805, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.606417546709992, |
| "grad_norm": 0.42684200856960786, |
| "learning_rate": 5.248217135344191e-06, |
| "loss": 0.5072, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.6076360682372055, |
| "grad_norm": 0.4365701459872912, |
| "learning_rate": 5.241130801371704e-06, |
| "loss": 0.5658, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.608854589764419, |
| "grad_norm": 0.42471390001052695, |
| "learning_rate": 5.234043981890395e-06, |
| "loss": 0.5698, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.6100731112916327, |
| "grad_norm": 0.4535238587027896, |
| "learning_rate": 5.226956691169332e-06, |
| "loss": 0.5839, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.6112916328188465, |
| "grad_norm": 0.4247946464572348, |
| "learning_rate": 5.219868943478542e-06, |
| "loss": 0.5577, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.6125101543460603, |
| "grad_norm": 0.43376338736220743, |
| "learning_rate": 5.212780753088968e-06, |
| "loss": 0.5449, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.6137286758732738, |
| "grad_norm": 0.4061841147634886, |
| "learning_rate": 5.205692134272445e-06, |
| "loss": 0.5179, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.6149471974004874, |
| "grad_norm": 0.4596996267175098, |
| "learning_rate": 5.1986031013016706e-06, |
| "loss": 0.5818, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.616165718927701, |
| "grad_norm": 0.43123766272618486, |
| "learning_rate": 5.191513668450178e-06, |
| "loss": 0.5687, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.6173842404549146, |
| "grad_norm": 0.4329937345499755, |
| "learning_rate": 5.184423849992299e-06, |
| "loss": 0.5348, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.6186027619821284, |
| "grad_norm": 0.49663961496101067, |
| "learning_rate": 5.177333660203153e-06, |
| "loss": 0.5956, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.619821283509342, |
| "grad_norm": 0.3924685962518714, |
| "learning_rate": 5.170243113358594e-06, |
| "loss": 0.5125, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.6210398050365558, |
| "grad_norm": 0.4856207429888876, |
| "learning_rate": 5.163152223735206e-06, |
| "loss": 0.5778, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.6222583265637693, |
| "grad_norm": 0.45002527423182, |
| "learning_rate": 5.156061005610258e-06, |
| "loss": 0.5584, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.623476848090983, |
| "grad_norm": 0.4310106517218945, |
| "learning_rate": 5.1489694732616805e-06, |
| "loss": 0.5377, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.6246953696181965, |
| "grad_norm": 0.49448879444066074, |
| "learning_rate": 5.141877640968037e-06, |
| "loss": 0.623, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.62591389114541, |
| "grad_norm": 0.40362533961876157, |
| "learning_rate": 5.134785523008496e-06, |
| "loss": 0.5014, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.6271324126726239, |
| "grad_norm": 0.4269483197368071, |
| "learning_rate": 5.127693133662801e-06, |
| "loss": 0.573, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.6283509341998377, |
| "grad_norm": 0.4258879503760348, |
| "learning_rate": 5.12060048721124e-06, |
| "loss": 0.5314, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.6295694557270513, |
| "grad_norm": 0.44120462268057764, |
| "learning_rate": 5.11350759793462e-06, |
| "loss": 0.5373, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.6307879772542648, |
| "grad_norm": 0.4276083907367786, |
| "learning_rate": 5.106414480114238e-06, |
| "loss": 0.5276, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.6320064987814784, |
| "grad_norm": 0.4517524664721021, |
| "learning_rate": 5.099321148031851e-06, |
| "loss": 0.5504, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.633225020308692, |
| "grad_norm": 0.44913374968040776, |
| "learning_rate": 5.092227615969643e-06, |
| "loss": 0.553, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.6344435418359058, |
| "grad_norm": 0.49845971138611844, |
| "learning_rate": 5.085133898210208e-06, |
| "loss": 0.5653, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.6356620633631194, |
| "grad_norm": 0.4427260322632497, |
| "learning_rate": 5.078040009036509e-06, |
| "loss": 0.5213, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.6368805848903332, |
| "grad_norm": 0.4177253316358593, |
| "learning_rate": 5.070945962731854e-06, |
| "loss": 0.5397, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.6380991064175467, |
| "grad_norm": 0.47651126334983296, |
| "learning_rate": 5.06385177357987e-06, |
| "loss": 0.5595, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.6393176279447603, |
| "grad_norm": 0.5627892918210755, |
| "learning_rate": 5.056757455864469e-06, |
| "loss": 0.6096, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.640536149471974, |
| "grad_norm": 0.44180856064958623, |
| "learning_rate": 5.049663023869824e-06, |
| "loss": 0.5025, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.6417546709991877, |
| "grad_norm": 0.460979656039155, |
| "learning_rate": 5.042568491880338e-06, |
| "loss": 0.5982, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.6429731925264013, |
| "grad_norm": 0.4821324897781787, |
| "learning_rate": 5.035473874180612e-06, |
| "loss": 0.5598, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.644191714053615, |
| "grad_norm": 0.45517260087056105, |
| "learning_rate": 5.028379185055424e-06, |
| "loss": 0.5246, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.6454102355808287, |
| "grad_norm": 0.4413055629736707, |
| "learning_rate": 5.021284438789694e-06, |
| "loss": 0.5341, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.6466287571080422, |
| "grad_norm": 0.4614955719864221, |
| "learning_rate": 5.014189649668456e-06, |
| "loss": 0.5578, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.6478472786352558, |
| "grad_norm": 0.4953936356649888, |
| "learning_rate": 5.007094831976832e-06, |
| "loss": 0.5765, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.6490658001624694, |
| "grad_norm": 0.39648893153136167, |
| "learning_rate": 5e-06, |
| "loss": 0.5342, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.6502843216896832, |
| "grad_norm": 0.43855043725681864, |
| "learning_rate": 4.992905168023169e-06, |
| "loss": 0.543, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.6515028432168968, |
| "grad_norm": 0.5301209980205615, |
| "learning_rate": 4.985810350331544e-06, |
| "loss": 0.6293, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.6527213647441106, |
| "grad_norm": 0.38590596359640195, |
| "learning_rate": 4.9787155612103076e-06, |
| "loss": 0.5296, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.6539398862713242, |
| "grad_norm": 0.42738095322238806, |
| "learning_rate": 4.9716208149445776e-06, |
| "loss": 0.5308, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.6551584077985377, |
| "grad_norm": 0.4555041349632123, |
| "learning_rate": 4.96452612581939e-06, |
| "loss": 0.5788, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.6563769293257513, |
| "grad_norm": 0.4558921081759917, |
| "learning_rate": 4.9574315081196634e-06, |
| "loss": 0.5609, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.6575954508529651, |
| "grad_norm": 0.4503929824518257, |
| "learning_rate": 4.950336976130176e-06, |
| "loss": 0.5341, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.6588139723801787, |
| "grad_norm": 0.43711031728275695, |
| "learning_rate": 4.9432425441355334e-06, |
| "loss": 0.5793, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.6600324939073925, |
| "grad_norm": 0.39568528756580684, |
| "learning_rate": 4.936148226420133e-06, |
| "loss": 0.5069, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.661251015434606, |
| "grad_norm": 0.4309659404250017, |
| "learning_rate": 4.929054037268147e-06, |
| "loss": 0.5872, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.6624695369618196, |
| "grad_norm": 0.482908985444469, |
| "learning_rate": 4.921959990963493e-06, |
| "loss": 0.5583, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.6636880584890332, |
| "grad_norm": 0.4133363420753277, |
| "learning_rate": 4.914866101789793e-06, |
| "loss": 0.484, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.6649065800162468, |
| "grad_norm": 0.46336848283664533, |
| "learning_rate": 4.907772384030357e-06, |
| "loss": 0.6055, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.6661251015434606, |
| "grad_norm": 0.4021280914849084, |
| "learning_rate": 4.900678851968152e-06, |
| "loss": 0.4953, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.6673436230706744, |
| "grad_norm": 0.4496122068891948, |
| "learning_rate": 4.893585519885764e-06, |
| "loss": 0.5631, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.668562144597888, |
| "grad_norm": 0.4386416975070193, |
| "learning_rate": 4.886492402065381e-06, |
| "loss": 0.5632, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.6697806661251016, |
| "grad_norm": 0.4335033691327931, |
| "learning_rate": 4.8793995127887615e-06, |
| "loss": 0.5377, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.6709991876523151, |
| "grad_norm": 0.4639132609070873, |
| "learning_rate": 4.8723068663372005e-06, |
| "loss": 0.5658, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.6722177091795287, |
| "grad_norm": 0.4186533135703324, |
| "learning_rate": 4.865214476991506e-06, |
| "loss": 0.538, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.6734362307067425, |
| "grad_norm": 0.5100673554858591, |
| "learning_rate": 4.858122359031964e-06, |
| "loss": 0.5977, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.674654752233956, |
| "grad_norm": 0.4284001466166066, |
| "learning_rate": 4.851030526738321e-06, |
| "loss": 0.5325, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.67587327376117, |
| "grad_norm": 0.4048773843920905, |
| "learning_rate": 4.843938994389744e-06, |
| "loss": 0.4975, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.6770917952883835, |
| "grad_norm": 0.4074001135895807, |
| "learning_rate": 4.836847776264794e-06, |
| "loss": 0.5762, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.678310316815597, |
| "grad_norm": 0.41740364142746117, |
| "learning_rate": 4.829756886641408e-06, |
| "loss": 0.5731, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.6795288383428106, |
| "grad_norm": 0.4812773839220182, |
| "learning_rate": 4.82266633979685e-06, |
| "loss": 0.5849, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.6807473598700242, |
| "grad_norm": 0.39560445425868235, |
| "learning_rate": 4.815576150007702e-06, |
| "loss": 0.4699, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.681965881397238, |
| "grad_norm": 0.4414471548591453, |
| "learning_rate": 4.808486331549824e-06, |
| "loss": 0.5626, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.6831844029244518, |
| "grad_norm": 0.38187499198826846, |
| "learning_rate": 4.801396898698329e-06, |
| "loss": 0.5071, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.6844029244516654, |
| "grad_norm": 0.4892251033230591, |
| "learning_rate": 4.794307865727555e-06, |
| "loss": 0.5552, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.685621445978879, |
| "grad_norm": 0.482903388217794, |
| "learning_rate": 4.787219246911034e-06, |
| "loss": 0.5492, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.6868399675060926, |
| "grad_norm": 0.45801551724996875, |
| "learning_rate": 4.78013105652146e-06, |
| "loss": 0.5838, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.6880584890333061, |
| "grad_norm": 0.42866796779932836, |
| "learning_rate": 4.77304330883067e-06, |
| "loss": 0.5085, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.68927701056052, |
| "grad_norm": 0.4475021559493066, |
| "learning_rate": 4.765956018109607e-06, |
| "loss": 0.5505, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.6904955320877335, |
| "grad_norm": 0.4697192585313218, |
| "learning_rate": 4.758869198628296e-06, |
| "loss": 0.5479, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.6917140536149473, |
| "grad_norm": 0.465791753930643, |
| "learning_rate": 4.7517828646558115e-06, |
| "loss": 0.56, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.692932575142161, |
| "grad_norm": 0.4015038202394012, |
| "learning_rate": 4.744697030460248e-06, |
| "loss": 0.5492, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.6941510966693745, |
| "grad_norm": 0.5232226854854597, |
| "learning_rate": 4.7376117103086974e-06, |
| "loss": 0.5464, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.695369618196588, |
| "grad_norm": 0.4518351945360455, |
| "learning_rate": 4.730526918467217e-06, |
| "loss": 0.533, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.6965881397238016, |
| "grad_norm": 0.4769324521614458, |
| "learning_rate": 4.7234426692007985e-06, |
| "loss": 0.6265, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.6978066612510154, |
| "grad_norm": 0.39722631525643654, |
| "learning_rate": 4.716358976773342e-06, |
| "loss": 0.4616, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.6990251827782292, |
| "grad_norm": 0.5143439560883679, |
| "learning_rate": 4.7092758554476215e-06, |
| "loss": 0.5927, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.7002437043054428, |
| "grad_norm": 0.4893384326011186, |
| "learning_rate": 4.702193319485271e-06, |
| "loss": 0.581, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.7014622258326564, |
| "grad_norm": 0.40330674171655206, |
| "learning_rate": 4.695111383146738e-06, |
| "loss": 0.5152, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.70268074735987, |
| "grad_norm": 0.4812638116299566, |
| "learning_rate": 4.688030060691264e-06, |
| "loss": 0.6068, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.7038992688870835, |
| "grad_norm": 0.42808075960070036, |
| "learning_rate": 4.680949366376858e-06, |
| "loss": 0.5232, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.7051177904142973, |
| "grad_norm": 0.4186184139760809, |
| "learning_rate": 4.673869314460262e-06, |
| "loss": 0.5375, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.706336311941511, |
| "grad_norm": 0.4351340155979422, |
| "learning_rate": 4.666789919196923e-06, |
| "loss": 0.5493, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.7075548334687247, |
| "grad_norm": 0.5600164408896984, |
| "learning_rate": 4.659711194840964e-06, |
| "loss": 0.587, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.7087733549959383, |
| "grad_norm": 0.43365827783641364, |
| "learning_rate": 4.6526331556451674e-06, |
| "loss": 0.519, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.7099918765231519, |
| "grad_norm": 0.44015645831753214, |
| "learning_rate": 4.645555815860923e-06, |
| "loss": 0.5523, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.7112103980503655, |
| "grad_norm": 0.4552471646368589, |
| "learning_rate": 4.638479189738224e-06, |
| "loss": 0.5404, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.7124289195775793, |
| "grad_norm": 0.4535728417437257, |
| "learning_rate": 4.631403291525615e-06, |
| "loss": 0.5368, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.7136474411047928, |
| "grad_norm": 0.4734624014107752, |
| "learning_rate": 4.624328135470184e-06, |
| "loss": 0.5778, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.7148659626320066, |
| "grad_norm": 0.4934447889274217, |
| "learning_rate": 4.617253735817522e-06, |
| "loss": 0.5476, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.7160844841592202, |
| "grad_norm": 0.4984539363836997, |
| "learning_rate": 4.610180106811696e-06, |
| "loss": 0.5649, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.7173030056864338, |
| "grad_norm": 0.4848858968212611, |
| "learning_rate": 4.603107262695225e-06, |
| "loss": 0.5111, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.7185215272136474, |
| "grad_norm": 0.47036832640121645, |
| "learning_rate": 4.596035217709039e-06, |
| "loss": 0.5948, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.719740048740861, |
| "grad_norm": 0.44168165703224904, |
| "learning_rate": 4.588963986092468e-06, |
| "loss": 0.5941, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.7209585702680747, |
| "grad_norm": 0.39666220117961165, |
| "learning_rate": 4.5818935820832014e-06, |
| "loss": 0.4913, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.7221770917952883, |
| "grad_norm": 0.5025801254491269, |
| "learning_rate": 4.574824019917262e-06, |
| "loss": 0.5932, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.7233956133225021, |
| "grad_norm": 0.3845664023510723, |
| "learning_rate": 4.5677553138289764e-06, |
| "loss": 0.5369, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.7246141348497157, |
| "grad_norm": 0.42320355598590065, |
| "learning_rate": 4.560687478050947e-06, |
| "loss": 0.5294, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.7258326563769293, |
| "grad_norm": 0.4096157422530506, |
| "learning_rate": 4.553620526814029e-06, |
| "loss": 0.519, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.7270511779041429, |
| "grad_norm": 0.48631875630001814, |
| "learning_rate": 4.546554474347291e-06, |
| "loss": 0.6101, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.7282696994313567, |
| "grad_norm": 0.4768787594020578, |
| "learning_rate": 4.539489334877992e-06, |
| "loss": 0.5629, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.7294882209585702, |
| "grad_norm": 0.41978448851594347, |
| "learning_rate": 4.532425122631559e-06, |
| "loss": 0.5365, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.730706742485784, |
| "grad_norm": 0.4298141402145644, |
| "learning_rate": 4.5253618518315455e-06, |
| "loss": 0.5346, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.7319252640129976, |
| "grad_norm": 0.43330287443239485, |
| "learning_rate": 4.5182995366996115e-06, |
| "loss": 0.565, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.7331437855402112, |
| "grad_norm": 0.4618063094916825, |
| "learning_rate": 4.511238191455491e-06, |
| "loss": 0.5669, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.7343623070674248, |
| "grad_norm": 0.4330349372337, |
| "learning_rate": 4.504177830316971e-06, |
| "loss": 0.5563, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.7355808285946384, |
| "grad_norm": 0.4061046490367817, |
| "learning_rate": 4.497118467499852e-06, |
| "loss": 0.5371, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.7367993501218522, |
| "grad_norm": 0.4524064658816882, |
| "learning_rate": 4.490060117217925e-06, |
| "loss": 0.5273, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.738017871649066, |
| "grad_norm": 0.4153684807216417, |
| "learning_rate": 4.483002793682941e-06, |
| "loss": 0.5202, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.7392363931762795, |
| "grad_norm": 0.5126499568306361, |
| "learning_rate": 4.475946511104588e-06, |
| "loss": 0.5964, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.7404549147034931, |
| "grad_norm": 0.442175693450011, |
| "learning_rate": 4.468891283690454e-06, |
| "loss": 0.514, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.7416734362307067, |
| "grad_norm": 0.421309384527005, |
| "learning_rate": 4.461837125646007e-06, |
| "loss": 0.6091, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.7428919577579203, |
| "grad_norm": 0.4380243684629681, |
| "learning_rate": 4.4547840511745565e-06, |
| "loss": 0.4913, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.744110479285134, |
| "grad_norm": 0.4812216276097867, |
| "learning_rate": 4.447732074477233e-06, |
| "loss": 0.5582, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.7453290008123477, |
| "grad_norm": 0.40488056766666325, |
| "learning_rate": 4.440681209752955e-06, |
| "loss": 0.5758, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.7465475223395615, |
| "grad_norm": 0.4732265653920416, |
| "learning_rate": 4.433631471198406e-06, |
| "loss": 0.5962, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.747766043866775, |
| "grad_norm": 0.42539261148413177, |
| "learning_rate": 4.426582873007999e-06, |
| "loss": 0.4769, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.7489845653939886, |
| "grad_norm": 0.512036705158376, |
| "learning_rate": 4.4195354293738484e-06, |
| "loss": 0.582, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.7502030869212022, |
| "grad_norm": 0.4305096055128761, |
| "learning_rate": 4.412489154485752e-06, |
| "loss": 0.5326, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.7514216084484158, |
| "grad_norm": 0.5036201316708941, |
| "learning_rate": 4.405444062531145e-06, |
| "loss": 0.579, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.7526401299756296, |
| "grad_norm": 0.42814700978676606, |
| "learning_rate": 4.3984001676950875e-06, |
| "loss": 0.5706, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.7538586515028434, |
| "grad_norm": 0.4336700472324628, |
| "learning_rate": 4.391357484160223e-06, |
| "loss": 0.5429, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.755077173030057, |
| "grad_norm": 0.4197620066836796, |
| "learning_rate": 4.384316026106766e-06, |
| "loss": 0.5312, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.7562956945572705, |
| "grad_norm": 0.4358185412850227, |
| "learning_rate": 4.377275807712453e-06, |
| "loss": 0.5601, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.757514216084484, |
| "grad_norm": 0.4593898380941711, |
| "learning_rate": 4.37023684315253e-06, |
| "loss": 0.5522, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.7587327376116977, |
| "grad_norm": 0.41694136338662585, |
| "learning_rate": 4.363199146599717e-06, |
| "loss": 0.5436, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.7599512591389115, |
| "grad_norm": 0.41051974887386045, |
| "learning_rate": 4.3561627322241815e-06, |
| "loss": 0.5484, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.761169780666125, |
| "grad_norm": 0.49654495683052513, |
| "learning_rate": 4.34912761419351e-06, |
| "loss": 0.5471, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.7623883021933389, |
| "grad_norm": 0.46755105267929675, |
| "learning_rate": 4.342093806672678e-06, |
| "loss": 0.5675, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.7636068237205524, |
| "grad_norm": 0.4560949973440655, |
| "learning_rate": 4.335061323824019e-06, |
| "loss": 0.5921, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.764825345247766, |
| "grad_norm": 0.4254462067059595, |
| "learning_rate": 4.328030179807207e-06, |
| "loss": 0.4801, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.7660438667749796, |
| "grad_norm": 0.43590945113760904, |
| "learning_rate": 4.321000388779214e-06, |
| "loss": 0.55, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.7672623883021932, |
| "grad_norm": 0.45385792985801476, |
| "learning_rate": 4.313971964894289e-06, |
| "loss": 0.5936, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.768480909829407, |
| "grad_norm": 0.45173148922198614, |
| "learning_rate": 4.306944922303932e-06, |
| "loss": 0.5198, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.7696994313566208, |
| "grad_norm": 0.4738870866999846, |
| "learning_rate": 4.299919275156857e-06, |
| "loss": 0.5695, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.7709179528838344, |
| "grad_norm": 0.4308222859015806, |
| "learning_rate": 4.292895037598968e-06, |
| "loss": 0.5302, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.772136474411048, |
| "grad_norm": 0.43194034641229945, |
| "learning_rate": 4.285872223773336e-06, |
| "loss": 0.5277, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.7733549959382615, |
| "grad_norm": 0.44969920461261864, |
| "learning_rate": 4.278850847820161e-06, |
| "loss": 0.5552, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.774573517465475, |
| "grad_norm": 0.45716879761679, |
| "learning_rate": 4.2718309238767485e-06, |
| "loss": 0.5785, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.775792038992689, |
| "grad_norm": 0.4340108500215334, |
| "learning_rate": 4.264812466077486e-06, |
| "loss": 0.5973, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.7770105605199025, |
| "grad_norm": 0.40605446125162264, |
| "learning_rate": 4.2577954885537985e-06, |
| "loss": 0.5262, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.7782290820471163, |
| "grad_norm": 0.4862703366986213, |
| "learning_rate": 4.2507800054341385e-06, |
| "loss": 0.576, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.7794476035743299, |
| "grad_norm": 0.48359582578678745, |
| "learning_rate": 4.243766030843947e-06, |
| "loss": 0.5998, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.7806661251015434, |
| "grad_norm": 0.3661204928776939, |
| "learning_rate": 4.236753578905627e-06, |
| "loss": 0.4968, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.781884646628757, |
| "grad_norm": 0.43106534453803774, |
| "learning_rate": 4.229742663738521e-06, |
| "loss": 0.5418, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.7831031681559708, |
| "grad_norm": 0.48202616303627543, |
| "learning_rate": 4.2227332994588666e-06, |
| "loss": 0.5486, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.7843216896831844, |
| "grad_norm": 0.46787213059943966, |
| "learning_rate": 4.215725500179788e-06, |
| "loss": 0.5394, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.7855402112103982, |
| "grad_norm": 0.4786631164932734, |
| "learning_rate": 4.208719280011255e-06, |
| "loss": 0.6512, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.7867587327376118, |
| "grad_norm": 0.5007334603891063, |
| "learning_rate": 4.2017146530600585e-06, |
| "loss": 0.5262, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.7879772542648253, |
| "grad_norm": 0.4766688726563304, |
| "learning_rate": 4.194711633429782e-06, |
| "loss": 0.4996, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.789195775792039, |
| "grad_norm": 0.46491040345633633, |
| "learning_rate": 4.1877102352207695e-06, |
| "loss": 0.5968, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.7904142973192525, |
| "grad_norm": 0.4085352403505702, |
| "learning_rate": 4.180710472530105e-06, |
| "loss": 0.5262, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.7916328188464663, |
| "grad_norm": 0.43126540204858976, |
| "learning_rate": 4.173712359451576e-06, |
| "loss": 0.5407, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.7928513403736799, |
| "grad_norm": 0.5202009737302775, |
| "learning_rate": 4.16671591007565e-06, |
| "loss": 0.5644, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.7940698619008937, |
| "grad_norm": 0.43231572065106405, |
| "learning_rate": 4.159721138489445e-06, |
| "loss": 0.5143, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.7952883834281073, |
| "grad_norm": 0.4626044446914442, |
| "learning_rate": 4.152728058776701e-06, |
| "loss": 0.5853, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.7965069049553208, |
| "grad_norm": 0.43407883748754916, |
| "learning_rate": 4.145736685017749e-06, |
| "loss": 0.5239, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.7977254264825344, |
| "grad_norm": 0.4267857290356126, |
| "learning_rate": 4.138747031289485e-06, |
| "loss": 0.5558, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.7989439480097482, |
| "grad_norm": 0.4315508799133083, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.5807, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.8001624695369618, |
| "grad_norm": 0.4048772175153014, |
| "learning_rate": 4.124772940215279e-06, |
| "loss": 0.508, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.8013809910641756, |
| "grad_norm": 0.4268392177126804, |
| "learning_rate": 4.1177885310057045e-06, |
| "loss": 0.552, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.8025995125913892, |
| "grad_norm": 0.4495564163997895, |
| "learning_rate": 4.110805898099492e-06, |
| "loss": 0.5669, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.8038180341186028, |
| "grad_norm": 0.4570284740109343, |
| "learning_rate": 4.103825055555947e-06, |
| "loss": 0.5503, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.8050365556458163, |
| "grad_norm": 0.45712926339273185, |
| "learning_rate": 4.096846017430758e-06, |
| "loss": 0.5861, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.80625507717303, |
| "grad_norm": 0.4363450699012883, |
| "learning_rate": 4.0898687977759895e-06, |
| "loss": 0.5698, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.8074735987002437, |
| "grad_norm": 0.36642253412778386, |
| "learning_rate": 4.08289341064004e-06, |
| "loss": 0.4882, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.8086921202274575, |
| "grad_norm": 0.4626576143609871, |
| "learning_rate": 4.075919870067617e-06, |
| "loss": 0.5695, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.809910641754671, |
| "grad_norm": 0.46018408439267183, |
| "learning_rate": 4.068948190099711e-06, |
| "loss": 0.5529, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.8111291632818847, |
| "grad_norm": 0.4119449731431994, |
| "learning_rate": 4.06197838477357e-06, |
| "loss": 0.5024, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.8123476848090982, |
| "grad_norm": 0.4015730766144408, |
| "learning_rate": 4.0550104681226635e-06, |
| "loss": 0.5656, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.8135662063363118, |
| "grad_norm": 0.4503343260237984, |
| "learning_rate": 4.048044454176658e-06, |
| "loss": 0.5661, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.8147847278635256, |
| "grad_norm": 0.43240190245880916, |
| "learning_rate": 4.041080356961393e-06, |
| "loss": 0.4974, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.8160032493907392, |
| "grad_norm": 0.4734473361008657, |
| "learning_rate": 4.034118190498843e-06, |
| "loss": 0.5663, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.817221770917953, |
| "grad_norm": 0.43362890265223014, |
| "learning_rate": 4.0271579688071e-06, |
| "loss": 0.5531, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.8184402924451666, |
| "grad_norm": 0.46894586845233727, |
| "learning_rate": 4.020199705900335e-06, |
| "loss": 0.5534, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.8196588139723802, |
| "grad_norm": 0.5328522267534698, |
| "learning_rate": 4.013243415788783e-06, |
| "loss": 0.6018, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.8208773354995937, |
| "grad_norm": 0.41829831723127575, |
| "learning_rate": 4.0062891124787e-06, |
| "loss": 0.5562, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.8220958570268073, |
| "grad_norm": 0.45791268251247896, |
| "learning_rate": 3.999336809972343e-06, |
| "loss": 0.5226, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.8233143785540211, |
| "grad_norm": 0.52749121116633, |
| "learning_rate": 3.99238652226794e-06, |
| "loss": 0.5885, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.824532900081235, |
| "grad_norm": 0.4102080465654976, |
| "learning_rate": 3.985438263359667e-06, |
| "loss": 0.4996, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.8257514216084485, |
| "grad_norm": 0.453636908099918, |
| "learning_rate": 3.978492047237608e-06, |
| "loss": 0.568, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.826969943135662, |
| "grad_norm": 0.49160181331071584, |
| "learning_rate": 3.971547887887742e-06, |
| "loss": 0.574, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.8281884646628757, |
| "grad_norm": 0.4128170671886108, |
| "learning_rate": 3.964605799291897e-06, |
| "loss": 0.4792, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.8294069861900892, |
| "grad_norm": 0.4714468421227976, |
| "learning_rate": 3.9576657954277406e-06, |
| "loss": 0.5527, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.830625507717303, |
| "grad_norm": 0.4759788646029719, |
| "learning_rate": 3.950727890268736e-06, |
| "loss": 0.564, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.8318440292445166, |
| "grad_norm": 0.4269752606026449, |
| "learning_rate": 3.943792097784126e-06, |
| "loss": 0.5733, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.8330625507717304, |
| "grad_norm": 0.4407801675115479, |
| "learning_rate": 3.936858431938899e-06, |
| "loss": 0.501, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.834281072298944, |
| "grad_norm": 0.417785240435009, |
| "learning_rate": 3.929926906693757e-06, |
| "loss": 0.5292, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.8354995938261576, |
| "grad_norm": 0.4954357818413886, |
| "learning_rate": 3.922997536005094e-06, |
| "loss": 0.5834, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.8367181153533712, |
| "grad_norm": 0.4520246987276718, |
| "learning_rate": 3.91607033382497e-06, |
| "loss": 0.601, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.8379366368805847, |
| "grad_norm": 0.41894543258809586, |
| "learning_rate": 3.909145314101074e-06, |
| "loss": 0.5201, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.8391551584077985, |
| "grad_norm": 0.48207218741173996, |
| "learning_rate": 3.9022224907767e-06, |
| "loss": 0.5478, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.8403736799350123, |
| "grad_norm": 0.45664125938234335, |
| "learning_rate": 3.895301877790728e-06, |
| "loss": 0.5646, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.841592201462226, |
| "grad_norm": 0.4171767656165807, |
| "learning_rate": 3.888383489077576e-06, |
| "loss": 0.511, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.8428107229894395, |
| "grad_norm": 0.43354385082610986, |
| "learning_rate": 3.88146733856719e-06, |
| "loss": 0.526, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.844029244516653, |
| "grad_norm": 0.4920523523977966, |
| "learning_rate": 3.874553440185008e-06, |
| "loss": 0.5767, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.8452477660438666, |
| "grad_norm": 0.46705637995124133, |
| "learning_rate": 3.867641807851935e-06, |
| "loss": 0.5835, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.8464662875710804, |
| "grad_norm": 0.4461828469934018, |
| "learning_rate": 3.860732455484314e-06, |
| "loss": 0.4961, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.847684809098294, |
| "grad_norm": 0.4633901834358105, |
| "learning_rate": 3.853825396993891e-06, |
| "loss": 0.5811, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.8489033306255078, |
| "grad_norm": 0.4438313726356196, |
| "learning_rate": 3.8469206462878e-06, |
| "loss": 0.5655, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.8501218521527214, |
| "grad_norm": 0.4546281191367206, |
| "learning_rate": 3.840018217268527e-06, |
| "loss": 0.5556, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.851340373679935, |
| "grad_norm": 0.39692829259522316, |
| "learning_rate": 3.833118123833881e-06, |
| "loss": 0.5083, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.8525588952071486, |
| "grad_norm": 0.4367611773412124, |
| "learning_rate": 3.826220379876974e-06, |
| "loss": 0.5621, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.8537774167343624, |
| "grad_norm": 0.46250207668673327, |
| "learning_rate": 3.819324999286177e-06, |
| "loss": 0.5502, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.854995938261576, |
| "grad_norm": 0.42252748085937586, |
| "learning_rate": 3.8124319959451133e-06, |
| "loss": 0.5428, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.8562144597887897, |
| "grad_norm": 0.42520604252823624, |
| "learning_rate": 3.8055413837326133e-06, |
| "loss": 0.5484, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.8574329813160033, |
| "grad_norm": 0.41242969185302275, |
| "learning_rate": 3.7986531765226965e-06, |
| "loss": 0.521, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.858651502843217, |
| "grad_norm": 0.43001901614946786, |
| "learning_rate": 3.7917673881845373e-06, |
| "loss": 0.5943, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.8598700243704305, |
| "grad_norm": 0.4097185174805625, |
| "learning_rate": 3.7848840325824428e-06, |
| "loss": 0.5407, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.861088545897644, |
| "grad_norm": 0.4509948723964594, |
| "learning_rate": 3.778003123575815e-06, |
| "loss": 0.5526, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.8623070674248579, |
| "grad_norm": 0.458525992527633, |
| "learning_rate": 3.77112467501914e-06, |
| "loss": 0.5546, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.8635255889520714, |
| "grad_norm": 0.407821722457764, |
| "learning_rate": 3.7642487007619417e-06, |
| "loss": 0.5205, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.8647441104792852, |
| "grad_norm": 0.4630986805415289, |
| "learning_rate": 3.757375214648764e-06, |
| "loss": 0.5733, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.8659626320064988, |
| "grad_norm": 0.46336209457236627, |
| "learning_rate": 3.7505042305191463e-06, |
| "loss": 0.5653, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.8671811535337124, |
| "grad_norm": 0.39311369649530103, |
| "learning_rate": 3.743635762207582e-06, |
| "loss": 0.5342, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.868399675060926, |
| "grad_norm": 0.42025451422654897, |
| "learning_rate": 3.7367698235435036e-06, |
| "loss": 0.5474, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.8696181965881398, |
| "grad_norm": 0.42303104824107784, |
| "learning_rate": 3.72990642835125e-06, |
| "loss": 0.52, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.8708367181153533, |
| "grad_norm": 0.40431884043673944, |
| "learning_rate": 3.7230455904500385e-06, |
| "loss": 0.5468, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.8720552396425671, |
| "grad_norm": 0.4312111249145443, |
| "learning_rate": 3.716187323653939e-06, |
| "loss": 0.5888, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.8732737611697807, |
| "grad_norm": 0.41823157797464644, |
| "learning_rate": 3.7093316417718407e-06, |
| "loss": 0.5638, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.8744922826969943, |
| "grad_norm": 0.42521945264285177, |
| "learning_rate": 3.702478558607429e-06, |
| "loss": 0.5357, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.8757108042242079, |
| "grad_norm": 0.4652010212406273, |
| "learning_rate": 3.695628087959162e-06, |
| "loss": 0.5809, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.8769293257514215, |
| "grad_norm": 0.399398106227539, |
| "learning_rate": 3.6887802436202307e-06, |
| "loss": 0.5233, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.8781478472786353, |
| "grad_norm": 0.40874149994794573, |
| "learning_rate": 3.6819350393785445e-06, |
| "loss": 0.5534, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.879366368805849, |
| "grad_norm": 0.4553334343530874, |
| "learning_rate": 3.675092489016693e-06, |
| "loss": 0.5369, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.8805848903330626, |
| "grad_norm": 0.40028666583281924, |
| "learning_rate": 3.6682526063119206e-06, |
| "loss": 0.5209, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.8818034118602762, |
| "grad_norm": 0.41838120396321116, |
| "learning_rate": 3.661415405036103e-06, |
| "loss": 0.5752, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.8830219333874898, |
| "grad_norm": 0.4136128207763801, |
| "learning_rate": 3.654580898955721e-06, |
| "loss": 0.5277, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.8842404549147034, |
| "grad_norm": 0.4024921294917515, |
| "learning_rate": 3.647749101831821e-06, |
| "loss": 0.5239, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.8854589764419172, |
| "grad_norm": 0.4141269485652032, |
| "learning_rate": 3.640920027420001e-06, |
| "loss": 0.5508, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.8866774979691308, |
| "grad_norm": 0.440719562642394, |
| "learning_rate": 3.6340936894703717e-06, |
| "loss": 0.5702, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.8878960194963446, |
| "grad_norm": 0.4786206622488289, |
| "learning_rate": 3.6272701017275385e-06, |
| "loss": 0.5721, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.8891145410235581, |
| "grad_norm": 0.4129960293133917, |
| "learning_rate": 3.6204492779305678e-06, |
| "loss": 0.5382, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.8903330625507717, |
| "grad_norm": 0.425696733632996, |
| "learning_rate": 3.61363123181296e-06, |
| "loss": 0.546, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.8915515840779853, |
| "grad_norm": 0.4653679268493305, |
| "learning_rate": 3.6068159771026267e-06, |
| "loss": 0.5789, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.8927701056051989, |
| "grad_norm": 0.4522003365392251, |
| "learning_rate": 3.6000035275218515e-06, |
| "loss": 0.5224, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.8939886271324127, |
| "grad_norm": 0.3920499944414549, |
| "learning_rate": 3.593193896787277e-06, |
| "loss": 0.4976, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.8952071486596265, |
| "grad_norm": 0.45058258754829983, |
| "learning_rate": 3.5863870986098655e-06, |
| "loss": 0.5745, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.89642567018684, |
| "grad_norm": 0.4244815509498337, |
| "learning_rate": 3.5795831466948805e-06, |
| "loss": 0.5414, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.8976441917140536, |
| "grad_norm": 0.4331016099950002, |
| "learning_rate": 3.5727820547418525e-06, |
| "loss": 0.539, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.8988627132412672, |
| "grad_norm": 0.417995629833821, |
| "learning_rate": 3.5659838364445505e-06, |
| "loss": 0.5156, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.9000812347684808, |
| "grad_norm": 0.4734474494296379, |
| "learning_rate": 3.5591885054909605e-06, |
| "loss": 0.5925, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.9012997562956946, |
| "grad_norm": 0.46115486081361745, |
| "learning_rate": 3.5523960755632573e-06, |
| "loss": 0.5091, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.9025182778229082, |
| "grad_norm": 0.40875274875883305, |
| "learning_rate": 3.5456065603377697e-06, |
| "loss": 0.5567, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.903736799350122, |
| "grad_norm": 0.45386074829816486, |
| "learning_rate": 3.5388199734849626e-06, |
| "loss": 0.5578, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.9049553208773355, |
| "grad_norm": 0.38709828752403175, |
| "learning_rate": 3.5320363286694015e-06, |
| "loss": 0.5179, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.9061738424045491, |
| "grad_norm": 0.42735900716697534, |
| "learning_rate": 3.5252556395497274e-06, |
| "loss": 0.5712, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.9073923639317627, |
| "grad_norm": 0.4181700954501109, |
| "learning_rate": 3.518477919778631e-06, |
| "loss": 0.5781, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.9086108854589763, |
| "grad_norm": 0.421517198534864, |
| "learning_rate": 3.5117031830028274e-06, |
| "loss": 0.5214, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.90982940698619, |
| "grad_norm": 0.44082135382564575, |
| "learning_rate": 3.504931442863023e-06, |
| "loss": 0.5929, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.9110479285134039, |
| "grad_norm": 0.3829707161093217, |
| "learning_rate": 3.49816271299389e-06, |
| "loss": 0.4973, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.9122664500406175, |
| "grad_norm": 0.4241401054720212, |
| "learning_rate": 3.4913970070240388e-06, |
| "loss": 0.5694, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.913484971567831, |
| "grad_norm": 0.4287983948624245, |
| "learning_rate": 3.484634338575995e-06, |
| "loss": 0.5123, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.9147034930950446, |
| "grad_norm": 0.40950888500677163, |
| "learning_rate": 3.4778747212661647e-06, |
| "loss": 0.5595, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.9159220146222582, |
| "grad_norm": 0.4272739781741268, |
| "learning_rate": 3.4711181687048114e-06, |
| "loss": 0.5609, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.917140536149472, |
| "grad_norm": 0.41564421693161757, |
| "learning_rate": 3.464364694496031e-06, |
| "loss": 0.5336, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.9183590576766856, |
| "grad_norm": 0.4359864387668293, |
| "learning_rate": 3.457614312237716e-06, |
| "loss": 0.5371, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.9195775792038994, |
| "grad_norm": 0.4799831871173569, |
| "learning_rate": 3.450867035521536e-06, |
| "loss": 0.5299, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.920796100731113, |
| "grad_norm": 0.4453426614702043, |
| "learning_rate": 3.4441228779329073e-06, |
| "loss": 0.5502, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.9220146222583265, |
| "grad_norm": 0.4238694285973907, |
| "learning_rate": 3.4373818530509686e-06, |
| "loss": 0.5275, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.9232331437855401, |
| "grad_norm": 0.41917397616804203, |
| "learning_rate": 3.4306439744485453e-06, |
| "loss": 0.5761, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.924451665312754, |
| "grad_norm": 0.425933885933589, |
| "learning_rate": 3.423909255692137e-06, |
| "loss": 0.515, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.9256701868399675, |
| "grad_norm": 0.4456890458176112, |
| "learning_rate": 3.417177710341868e-06, |
| "loss": 0.5522, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.9268887083671813, |
| "grad_norm": 0.41653994234849523, |
| "learning_rate": 3.4104493519514844e-06, |
| "loss": 0.5675, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.9281072298943949, |
| "grad_norm": 0.4065472343645043, |
| "learning_rate": 3.40372419406831e-06, |
| "loss": 0.5016, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.9293257514216084, |
| "grad_norm": 0.4554171323483848, |
| "learning_rate": 3.3970022502332273e-06, |
| "loss": 0.5919, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.930544272948822, |
| "grad_norm": 0.4247465501155384, |
| "learning_rate": 3.3902835339806463e-06, |
| "loss": 0.565, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.9317627944760356, |
| "grad_norm": 0.44295772231878283, |
| "learning_rate": 3.3835680588384767e-06, |
| "loss": 0.5046, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.9329813160032494, |
| "grad_norm": 0.4268447826543325, |
| "learning_rate": 3.3768558383281024e-06, |
| "loss": 0.5193, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.934199837530463, |
| "grad_norm": 0.44799827489237776, |
| "learning_rate": 3.3701468859643583e-06, |
| "loss": 0.5631, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.9354183590576768, |
| "grad_norm": 0.4332023943083594, |
| "learning_rate": 3.363441215255495e-06, |
| "loss": 0.5724, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.9366368805848904, |
| "grad_norm": 0.3996520998473681, |
| "learning_rate": 3.356738839703158e-06, |
| "loss": 0.5255, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.937855402112104, |
| "grad_norm": 0.44409520862327806, |
| "learning_rate": 3.3500397728023536e-06, |
| "loss": 0.5425, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.9390739236393175, |
| "grad_norm": 0.4405722390495154, |
| "learning_rate": 3.343344028041433e-06, |
| "loss": 0.6053, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.9402924451665313, |
| "grad_norm": 0.40826303676438436, |
| "learning_rate": 3.336651618902054e-06, |
| "loss": 0.5324, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.941510966693745, |
| "grad_norm": 0.3851707449193732, |
| "learning_rate": 3.3299625588591568e-06, |
| "loss": 0.5088, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.9427294882209587, |
| "grad_norm": 0.40859643518575894, |
| "learning_rate": 3.3232768613809453e-06, |
| "loss": 0.581, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.9439480097481723, |
| "grad_norm": 0.3722542671220263, |
| "learning_rate": 3.316594539928845e-06, |
| "loss": 0.4977, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.9451665312753859, |
| "grad_norm": 0.4383598182132238, |
| "learning_rate": 3.309915607957487e-06, |
| "loss": 0.6137, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.9463850528025994, |
| "grad_norm": 0.4011690274677538, |
| "learning_rate": 3.303240078914679e-06, |
| "loss": 0.563, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.947603574329813, |
| "grad_norm": 0.36889683412600394, |
| "learning_rate": 3.2965679662413772e-06, |
| "loss": 0.4968, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.9488220958570268, |
| "grad_norm": 0.43411779671306283, |
| "learning_rate": 3.289899283371657e-06, |
| "loss": 0.5802, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.9500406173842406, |
| "grad_norm": 0.3803158460715809, |
| "learning_rate": 3.283234043732689e-06, |
| "loss": 0.5093, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.9512591389114542, |
| "grad_norm": 0.4391648613289349, |
| "learning_rate": 3.276572260744709e-06, |
| "loss": 0.565, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.9524776604386678, |
| "grad_norm": 0.40723374350566227, |
| "learning_rate": 3.2699139478209987e-06, |
| "loss": 0.514, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.9536961819658814, |
| "grad_norm": 0.454313203169353, |
| "learning_rate": 3.263259118367845e-06, |
| "loss": 0.6135, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.954914703493095, |
| "grad_norm": 0.4178433726812962, |
| "learning_rate": 3.256607785784527e-06, |
| "loss": 0.5301, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.9561332250203087, |
| "grad_norm": 0.42422765865196377, |
| "learning_rate": 3.249959963463283e-06, |
| "loss": 0.5278, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.9573517465475223, |
| "grad_norm": 0.4507513531340492, |
| "learning_rate": 3.2433156647892784e-06, |
| "loss": 0.5154, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.958570268074736, |
| "grad_norm": 0.4155636470893334, |
| "learning_rate": 3.2366749031405875e-06, |
| "loss": 0.5627, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.9597887896019497, |
| "grad_norm": 0.41760402999256324, |
| "learning_rate": 3.2300376918881628e-06, |
| "loss": 0.5779, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.9610073111291633, |
| "grad_norm": 0.4262673425446038, |
| "learning_rate": 3.223404044395808e-06, |
| "loss": 0.5939, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.9622258326563768, |
| "grad_norm": 0.39002137904137807, |
| "learning_rate": 3.216773974020152e-06, |
| "loss": 0.4796, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.9634443541835904, |
| "grad_norm": 0.48341233875628054, |
| "learning_rate": 3.210147494110618e-06, |
| "loss": 0.5623, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.9646628757108042, |
| "grad_norm": 0.4263996506849499, |
| "learning_rate": 3.203524618009403e-06, |
| "loss": 0.5771, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.965881397238018, |
| "grad_norm": 0.39778372039996124, |
| "learning_rate": 3.1969053590514487e-06, |
| "loss": 0.5291, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.9670999187652316, |
| "grad_norm": 0.4303659167460693, |
| "learning_rate": 3.19028973056441e-06, |
| "loss": 0.5488, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.9683184402924452, |
| "grad_norm": 0.4137612167679553, |
| "learning_rate": 3.1836777458686363e-06, |
| "loss": 0.5619, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.9695369618196588, |
| "grad_norm": 0.36766081180510835, |
| "learning_rate": 3.177069418277139e-06, |
| "loss": 0.4946, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.9707554833468723, |
| "grad_norm": 0.4182717579515874, |
| "learning_rate": 3.1704647610955618e-06, |
| "loss": 0.5297, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.9719740048740861, |
| "grad_norm": 0.4584959850560608, |
| "learning_rate": 3.163863787622162e-06, |
| "loss": 0.6143, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.9731925264012997, |
| "grad_norm": 0.4458263983902489, |
| "learning_rate": 3.157266511147783e-06, |
| "loss": 0.5079, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.9744110479285135, |
| "grad_norm": 0.43613330489917596, |
| "learning_rate": 3.150672944955818e-06, |
| "loss": 0.5714, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.975629569455727, |
| "grad_norm": 0.3858901721024831, |
| "learning_rate": 3.1440831023221952e-06, |
| "loss": 0.5283, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.9768480909829407, |
| "grad_norm": 0.40043306380620164, |
| "learning_rate": 3.137496996515339e-06, |
| "loss": 0.5618, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.9780666125101543, |
| "grad_norm": 0.4155561403542389, |
| "learning_rate": 3.1309146407961565e-06, |
| "loss": 0.5793, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.9792851340373678, |
| "grad_norm": 0.48452491106537393, |
| "learning_rate": 3.1243360484180012e-06, |
| "loss": 0.5955, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.9805036555645816, |
| "grad_norm": 0.4052625054606517, |
| "learning_rate": 3.117761232626648e-06, |
| "loss": 0.5113, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.9817221770917954, |
| "grad_norm": 0.42003854375542504, |
| "learning_rate": 3.111190206660273e-06, |
| "loss": 0.5462, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.982940698619009, |
| "grad_norm": 0.425058799574285, |
| "learning_rate": 3.1046229837494123e-06, |
| "loss": 0.5244, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.9841592201462226, |
| "grad_norm": 0.4113830672023175, |
| "learning_rate": 3.0980595771169543e-06, |
| "loss": 0.5297, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.9853777416734362, |
| "grad_norm": 0.4015669403567964, |
| "learning_rate": 3.091499999978097e-06, |
| "loss": 0.5261, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.9865962632006497, |
| "grad_norm": 0.4283955622994288, |
| "learning_rate": 3.0849442655403315e-06, |
| "loss": 0.5755, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.9878147847278635, |
| "grad_norm": 0.41898536957171045, |
| "learning_rate": 3.0783923870034094e-06, |
| "loss": 0.5468, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.9890333062550771, |
| "grad_norm": 0.39218815049699407, |
| "learning_rate": 3.0718443775593233e-06, |
| "loss": 0.5094, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.990251827782291, |
| "grad_norm": 0.4279916922662056, |
| "learning_rate": 3.065300250392265e-06, |
| "loss": 0.5914, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.9914703493095045, |
| "grad_norm": 0.41267484908021385, |
| "learning_rate": 3.058760018678622e-06, |
| "loss": 0.5182, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.992688870836718, |
| "grad_norm": 0.44135796853573733, |
| "learning_rate": 3.0522236955869293e-06, |
| "loss": 0.5306, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.9939073923639317, |
| "grad_norm": 0.47924737111572846, |
| "learning_rate": 3.0456912942778585e-06, |
| "loss": 0.5286, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.9951259138911455, |
| "grad_norm": 0.42225974588467396, |
| "learning_rate": 3.0391628279041797e-06, |
| "loss": 0.5143, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.996344435418359, |
| "grad_norm": 0.443505986438843, |
| "learning_rate": 3.0326383096107424e-06, |
| "loss": 0.603, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.9975629569455728, |
| "grad_norm": 0.43300461200802, |
| "learning_rate": 3.0261177525344458e-06, |
| "loss": 0.529, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.9987814784727864, |
| "grad_norm": 0.44310730809971327, |
| "learning_rate": 3.019601169804216e-06, |
| "loss": 0.5712, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.0004061738424044, |
| "grad_norm": 0.9468576561635692, |
| "learning_rate": 3.0130885745409744e-06, |
| "loss": 0.9149, |
| "step": 1641 |
| }, |
| { |
| "epoch": 2.0016246953696184, |
| "grad_norm": 0.4706438333540491, |
| "learning_rate": 3.0065799798576146e-06, |
| "loss": 0.4931, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.002843216896832, |
| "grad_norm": 0.4720816823665595, |
| "learning_rate": 3.0000753988589717e-06, |
| "loss": 0.4837, |
| "step": 1643 |
| }, |
| { |
| "epoch": 2.0040617384240456, |
| "grad_norm": 0.47408797235071, |
| "learning_rate": 2.993574844641807e-06, |
| "loss": 0.4923, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.005280259951259, |
| "grad_norm": 0.4519771428113875, |
| "learning_rate": 2.987078330294767e-06, |
| "loss": 0.5211, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.0064987814784727, |
| "grad_norm": 0.519886504494184, |
| "learning_rate": 2.9805858688983656e-06, |
| "loss": 0.5746, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.0077173030056863, |
| "grad_norm": 0.42316406477644747, |
| "learning_rate": 2.9740974735249627e-06, |
| "loss": 0.4762, |
| "step": 1647 |
| }, |
| { |
| "epoch": 2.0089358245329, |
| "grad_norm": 0.4459084131070543, |
| "learning_rate": 2.96761315723872e-06, |
| "loss": 0.517, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.010154346060114, |
| "grad_norm": 0.4787475311586957, |
| "learning_rate": 2.961132933095595e-06, |
| "loss": 0.5475, |
| "step": 1649 |
| }, |
| { |
| "epoch": 2.0113728675873275, |
| "grad_norm": 0.44287904587306054, |
| "learning_rate": 2.9546568141433007e-06, |
| "loss": 0.513, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.012591389114541, |
| "grad_norm": 0.3984732881743886, |
| "learning_rate": 2.94818481342129e-06, |
| "loss": 0.5093, |
| "step": 1651 |
| }, |
| { |
| "epoch": 2.0138099106417546, |
| "grad_norm": 0.4399121723080827, |
| "learning_rate": 2.941716943960716e-06, |
| "loss": 0.511, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.015028432168968, |
| "grad_norm": 0.44831302014249225, |
| "learning_rate": 2.9352532187844254e-06, |
| "loss": 0.4984, |
| "step": 1653 |
| }, |
| { |
| "epoch": 2.016246953696182, |
| "grad_norm": 0.42996322667286735, |
| "learning_rate": 2.9287936509069036e-06, |
| "loss": 0.5191, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.017465475223396, |
| "grad_norm": 0.4183124555440696, |
| "learning_rate": 2.9223382533342825e-06, |
| "loss": 0.545, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.0186839967506094, |
| "grad_norm": 0.40446852524401855, |
| "learning_rate": 2.915887039064287e-06, |
| "loss": 0.503, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.019902518277823, |
| "grad_norm": 0.46166747184685086, |
| "learning_rate": 2.9094400210862206e-06, |
| "loss": 0.5397, |
| "step": 1657 |
| }, |
| { |
| "epoch": 2.0211210398050365, |
| "grad_norm": 0.44952433573058875, |
| "learning_rate": 2.9029972123809425e-06, |
| "loss": 0.5055, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.02233956133225, |
| "grad_norm": 0.4250306396302766, |
| "learning_rate": 2.8965586259208295e-06, |
| "loss": 0.521, |
| "step": 1659 |
| }, |
| { |
| "epoch": 2.0235580828594637, |
| "grad_norm": 0.4183203719527761, |
| "learning_rate": 2.890124274669764e-06, |
| "loss": 0.4974, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.0247766043866773, |
| "grad_norm": 0.4411815617611041, |
| "learning_rate": 2.8836941715830943e-06, |
| "loss": 0.5129, |
| "step": 1661 |
| }, |
| { |
| "epoch": 2.0259951259138913, |
| "grad_norm": 0.44795060381325624, |
| "learning_rate": 2.8772683296076197e-06, |
| "loss": 0.5142, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.027213647441105, |
| "grad_norm": 0.41579452280547835, |
| "learning_rate": 2.8708467616815606e-06, |
| "loss": 0.4951, |
| "step": 1663 |
| }, |
| { |
| "epoch": 2.0284321689683185, |
| "grad_norm": 0.4012708682220002, |
| "learning_rate": 2.864429480734529e-06, |
| "loss": 0.512, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.029650690495532, |
| "grad_norm": 0.4273458204316277, |
| "learning_rate": 2.858016499687503e-06, |
| "loss": 0.5401, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.0308692120227456, |
| "grad_norm": 0.45185500116453636, |
| "learning_rate": 2.8516078314528082e-06, |
| "loss": 0.4782, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.032087733549959, |
| "grad_norm": 0.49808257517442245, |
| "learning_rate": 2.8452034889340874e-06, |
| "loss": 0.5078, |
| "step": 1667 |
| }, |
| { |
| "epoch": 2.033306255077173, |
| "grad_norm": 0.4147857957964427, |
| "learning_rate": 2.838803485026265e-06, |
| "loss": 0.5092, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.034524776604387, |
| "grad_norm": 0.4270575824323386, |
| "learning_rate": 2.8324078326155403e-06, |
| "loss": 0.5239, |
| "step": 1669 |
| }, |
| { |
| "epoch": 2.0357432981316004, |
| "grad_norm": 0.4396795083275381, |
| "learning_rate": 2.8260165445793417e-06, |
| "loss": 0.5106, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.036961819658814, |
| "grad_norm": 0.42042014253641513, |
| "learning_rate": 2.819629633786319e-06, |
| "loss": 0.4699, |
| "step": 1671 |
| }, |
| { |
| "epoch": 2.0381803411860275, |
| "grad_norm": 0.4430726691393502, |
| "learning_rate": 2.8132471130962997e-06, |
| "loss": 0.4899, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.039398862713241, |
| "grad_norm": 0.40352990345385337, |
| "learning_rate": 2.806868995360278e-06, |
| "loss": 0.5271, |
| "step": 1673 |
| }, |
| { |
| "epoch": 2.0406173842404547, |
| "grad_norm": 0.4264717440525784, |
| "learning_rate": 2.800495293420384e-06, |
| "loss": 0.5358, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.0418359057676687, |
| "grad_norm": 0.406701314146554, |
| "learning_rate": 2.7941260201098513e-06, |
| "loss": 0.5347, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.0430544272948823, |
| "grad_norm": 0.3789120115073334, |
| "learning_rate": 2.7877611882529978e-06, |
| "loss": 0.5291, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.044272948822096, |
| "grad_norm": 0.376105306550124, |
| "learning_rate": 2.781400810665201e-06, |
| "loss": 0.4798, |
| "step": 1677 |
| }, |
| { |
| "epoch": 2.0454914703493094, |
| "grad_norm": 0.41689803288099314, |
| "learning_rate": 2.775044900152873e-06, |
| "loss": 0.5603, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.046709991876523, |
| "grad_norm": 0.39084573445190973, |
| "learning_rate": 2.7686934695134237e-06, |
| "loss": 0.5172, |
| "step": 1679 |
| }, |
| { |
| "epoch": 2.0479285134037366, |
| "grad_norm": 0.3983625559018197, |
| "learning_rate": 2.762346531535246e-06, |
| "loss": 0.5169, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.0491470349309506, |
| "grad_norm": 0.4214853676308127, |
| "learning_rate": 2.7560040989976894e-06, |
| "loss": 0.4956, |
| "step": 1681 |
| }, |
| { |
| "epoch": 2.050365556458164, |
| "grad_norm": 0.3999213184120299, |
| "learning_rate": 2.749666184671032e-06, |
| "loss": 0.4772, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.051584077985378, |
| "grad_norm": 0.47662956549783836, |
| "learning_rate": 2.7433328013164493e-06, |
| "loss": 0.5384, |
| "step": 1683 |
| }, |
| { |
| "epoch": 2.0528025995125914, |
| "grad_norm": 0.41183336935775333, |
| "learning_rate": 2.737003961686e-06, |
| "loss": 0.5383, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.054021121039805, |
| "grad_norm": 0.4162288645157988, |
| "learning_rate": 2.730679678522592e-06, |
| "loss": 0.4879, |
| "step": 1685 |
| }, |
| { |
| "epoch": 2.0552396425670185, |
| "grad_norm": 0.4208621537377079, |
| "learning_rate": 2.724359964559958e-06, |
| "loss": 0.5302, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.0564581640942325, |
| "grad_norm": 0.46940669999316137, |
| "learning_rate": 2.7180448325226283e-06, |
| "loss": 0.5038, |
| "step": 1687 |
| }, |
| { |
| "epoch": 2.057676685621446, |
| "grad_norm": 0.42207329602275184, |
| "learning_rate": 2.711734295125913e-06, |
| "loss": 0.5136, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.0588952071486597, |
| "grad_norm": 0.42923148362073005, |
| "learning_rate": 2.705428365075868e-06, |
| "loss": 0.4974, |
| "step": 1689 |
| }, |
| { |
| "epoch": 2.0601137286758733, |
| "grad_norm": 0.4486303504796547, |
| "learning_rate": 2.6991270550692794e-06, |
| "loss": 0.4896, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.061332250203087, |
| "grad_norm": 0.4450084773476215, |
| "learning_rate": 2.692830377793614e-06, |
| "loss": 0.5368, |
| "step": 1691 |
| }, |
| { |
| "epoch": 2.0625507717303004, |
| "grad_norm": 0.41459790491115805, |
| "learning_rate": 2.686538345927027e-06, |
| "loss": 0.5181, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.063769293257514, |
| "grad_norm": 0.39428684445951, |
| "learning_rate": 2.680250972138314e-06, |
| "loss": 0.5002, |
| "step": 1693 |
| }, |
| { |
| "epoch": 2.064987814784728, |
| "grad_norm": 0.46824872444922333, |
| "learning_rate": 2.6739682690868947e-06, |
| "loss": 0.5303, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.0662063363119416, |
| "grad_norm": 0.4328466707209096, |
| "learning_rate": 2.6676902494227795e-06, |
| "loss": 0.5603, |
| "step": 1695 |
| }, |
| { |
| "epoch": 2.067424857839155, |
| "grad_norm": 0.3839348433674553, |
| "learning_rate": 2.6614169257865513e-06, |
| "loss": 0.4682, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.0686433793663688, |
| "grad_norm": 0.4527195063930813, |
| "learning_rate": 2.6551483108093378e-06, |
| "loss": 0.5468, |
| "step": 1697 |
| }, |
| { |
| "epoch": 2.0698619008935824, |
| "grad_norm": 0.3864719481645061, |
| "learning_rate": 2.6488844171127903e-06, |
| "loss": 0.4596, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.071080422420796, |
| "grad_norm": 0.46426343352179134, |
| "learning_rate": 2.6426252573090437e-06, |
| "loss": 0.56, |
| "step": 1699 |
| }, |
| { |
| "epoch": 2.07229894394801, |
| "grad_norm": 0.4288286079073576, |
| "learning_rate": 2.6363708440007136e-06, |
| "loss": 0.5161, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.0735174654752235, |
| "grad_norm": 0.40637110346427036, |
| "learning_rate": 2.6301211897808463e-06, |
| "loss": 0.5389, |
| "step": 1701 |
| }, |
| { |
| "epoch": 2.074735987002437, |
| "grad_norm": 0.3951734512163483, |
| "learning_rate": 2.623876307232919e-06, |
| "loss": 0.526, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.0759545085296507, |
| "grad_norm": 0.3885989062888163, |
| "learning_rate": 2.6176362089307873e-06, |
| "loss": 0.4725, |
| "step": 1703 |
| }, |
| { |
| "epoch": 2.0771730300568643, |
| "grad_norm": 0.43823376795203267, |
| "learning_rate": 2.611400907438685e-06, |
| "loss": 0.5124, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.078391551584078, |
| "grad_norm": 0.39530623859105424, |
| "learning_rate": 2.6051704153111847e-06, |
| "loss": 0.4934, |
| "step": 1705 |
| }, |
| { |
| "epoch": 2.0796100731112914, |
| "grad_norm": 0.3703827995949618, |
| "learning_rate": 2.598944745093174e-06, |
| "loss": 0.477, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.0808285946385054, |
| "grad_norm": 0.399727045865617, |
| "learning_rate": 2.5927239093198273e-06, |
| "loss": 0.5887, |
| "step": 1707 |
| }, |
| { |
| "epoch": 2.082047116165719, |
| "grad_norm": 0.366776126773729, |
| "learning_rate": 2.5865079205165953e-06, |
| "loss": 0.4682, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.0832656376929326, |
| "grad_norm": 0.4208917158039958, |
| "learning_rate": 2.5802967911991637e-06, |
| "loss": 0.5203, |
| "step": 1709 |
| }, |
| { |
| "epoch": 2.084484159220146, |
| "grad_norm": 0.4493230756017366, |
| "learning_rate": 2.574090533873431e-06, |
| "loss": 0.5273, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.0857026807473598, |
| "grad_norm": 0.4464314211780269, |
| "learning_rate": 2.567889161035494e-06, |
| "loss": 0.589, |
| "step": 1711 |
| }, |
| { |
| "epoch": 2.0869212022745733, |
| "grad_norm": 0.37995030061127644, |
| "learning_rate": 2.5616926851716055e-06, |
| "loss": 0.4443, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.0881397238017874, |
| "grad_norm": 0.43366495148118606, |
| "learning_rate": 2.555501118758167e-06, |
| "loss": 0.5068, |
| "step": 1713 |
| }, |
| { |
| "epoch": 2.089358245329001, |
| "grad_norm": 0.43859783988060524, |
| "learning_rate": 2.549314474261686e-06, |
| "loss": 0.5061, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.0905767668562145, |
| "grad_norm": 0.41699646136345586, |
| "learning_rate": 2.5431327641387682e-06, |
| "loss": 0.5149, |
| "step": 1715 |
| }, |
| { |
| "epoch": 2.091795288383428, |
| "grad_norm": 0.4456011191053756, |
| "learning_rate": 2.5369560008360826e-06, |
| "loss": 0.521, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.0930138099106417, |
| "grad_norm": 0.3959554858313093, |
| "learning_rate": 2.5307841967903337e-06, |
| "loss": 0.5048, |
| "step": 1717 |
| }, |
| { |
| "epoch": 2.0942323314378553, |
| "grad_norm": 0.41396649662012225, |
| "learning_rate": 2.52461736442824e-06, |
| "loss": 0.5162, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.095450852965069, |
| "grad_norm": 0.42258000014128283, |
| "learning_rate": 2.518455516166517e-06, |
| "loss": 0.5517, |
| "step": 1719 |
| }, |
| { |
| "epoch": 2.096669374492283, |
| "grad_norm": 0.39450373632220187, |
| "learning_rate": 2.512298664411841e-06, |
| "loss": 0.4964, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.0978878960194964, |
| "grad_norm": 0.3723209543555369, |
| "learning_rate": 2.5061468215608243e-06, |
| "loss": 0.5218, |
| "step": 1721 |
| }, |
| { |
| "epoch": 2.09910641754671, |
| "grad_norm": 0.41901342453157836, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.5245, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.1003249390739236, |
| "grad_norm": 0.40321870380750857, |
| "learning_rate": 2.493858212105788e-06, |
| "loss": 0.5008, |
| "step": 1723 |
| }, |
| { |
| "epoch": 2.101543460601137, |
| "grad_norm": 0.41094871301137714, |
| "learning_rate": 2.487721470244473e-06, |
| "loss": 0.5255, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.1027619821283507, |
| "grad_norm": 0.38906496669749396, |
| "learning_rate": 2.481589786772178e-06, |
| "loss": 0.5077, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.1039805036555648, |
| "grad_norm": 0.4041745743019385, |
| "learning_rate": 2.4754631740348455e-06, |
| "loss": 0.5387, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.1051990251827783, |
| "grad_norm": 0.393377125185753, |
| "learning_rate": 2.4693416443682074e-06, |
| "loss": 0.5206, |
| "step": 1727 |
| }, |
| { |
| "epoch": 2.106417546709992, |
| "grad_norm": 0.43228252852381843, |
| "learning_rate": 2.4632252100977567e-06, |
| "loss": 0.5457, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.1076360682372055, |
| "grad_norm": 0.3665069578741783, |
| "learning_rate": 2.4571138835387293e-06, |
| "loss": 0.4513, |
| "step": 1729 |
| }, |
| { |
| "epoch": 2.108854589764419, |
| "grad_norm": 0.40575471379817674, |
| "learning_rate": 2.4510076769960784e-06, |
| "loss": 0.486, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.1100731112916327, |
| "grad_norm": 0.43487942611154445, |
| "learning_rate": 2.4449066027644473e-06, |
| "loss": 0.542, |
| "step": 1731 |
| }, |
| { |
| "epoch": 2.1112916328188467, |
| "grad_norm": 0.41618547734069145, |
| "learning_rate": 2.4388106731281496e-06, |
| "loss": 0.5405, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.1125101543460603, |
| "grad_norm": 0.37250571753343925, |
| "learning_rate": 2.4327199003611285e-06, |
| "loss": 0.5298, |
| "step": 1733 |
| }, |
| { |
| "epoch": 2.113728675873274, |
| "grad_norm": 0.38311544589645186, |
| "learning_rate": 2.426634296726955e-06, |
| "loss": 0.4806, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.1149471974004874, |
| "grad_norm": 0.41137106387325834, |
| "learning_rate": 2.4205538744787904e-06, |
| "loss": 0.5201, |
| "step": 1735 |
| }, |
| { |
| "epoch": 2.116165718927701, |
| "grad_norm": 0.39341688405639397, |
| "learning_rate": 2.4144786458593635e-06, |
| "loss": 0.4973, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.1173842404549146, |
| "grad_norm": 0.4261520463046671, |
| "learning_rate": 2.40840862310094e-06, |
| "loss": 0.5574, |
| "step": 1737 |
| }, |
| { |
| "epoch": 2.118602761982128, |
| "grad_norm": 0.4270827914830235, |
| "learning_rate": 2.4023438184253115e-06, |
| "loss": 0.5011, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.119821283509342, |
| "grad_norm": 0.37282235236530675, |
| "learning_rate": 2.3962842440437584e-06, |
| "loss": 0.4675, |
| "step": 1739 |
| }, |
| { |
| "epoch": 2.1210398050365558, |
| "grad_norm": 0.44674425439539434, |
| "learning_rate": 2.3902299121570332e-06, |
| "loss": 0.5741, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.1222583265637693, |
| "grad_norm": 0.41462268384604345, |
| "learning_rate": 2.384180834955329e-06, |
| "loss": 0.4876, |
| "step": 1741 |
| }, |
| { |
| "epoch": 2.123476848090983, |
| "grad_norm": 0.4534841107739354, |
| "learning_rate": 2.378137024618262e-06, |
| "loss": 0.5135, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.1246953696181965, |
| "grad_norm": 0.3978485819501479, |
| "learning_rate": 2.3720984933148443e-06, |
| "loss": 0.5208, |
| "step": 1743 |
| }, |
| { |
| "epoch": 2.12591389114541, |
| "grad_norm": 0.37184067874428883, |
| "learning_rate": 2.366065253203456e-06, |
| "loss": 0.5007, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.1271324126726237, |
| "grad_norm": 0.4276632980042562, |
| "learning_rate": 2.360037316431823e-06, |
| "loss": 0.5317, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.1283509341998377, |
| "grad_norm": 0.4617864367024641, |
| "learning_rate": 2.354014695136997e-06, |
| "loss": 0.5064, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.1295694557270513, |
| "grad_norm": 0.3859544135248513, |
| "learning_rate": 2.3479974014453255e-06, |
| "loss": 0.4865, |
| "step": 1747 |
| }, |
| { |
| "epoch": 2.130787977254265, |
| "grad_norm": 0.39298854898729213, |
| "learning_rate": 2.3419854474724284e-06, |
| "loss": 0.5399, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.1320064987814784, |
| "grad_norm": 0.4537583444692293, |
| "learning_rate": 2.3359788453231723e-06, |
| "loss": 0.5134, |
| "step": 1749 |
| }, |
| { |
| "epoch": 2.133225020308692, |
| "grad_norm": 0.4143013070479495, |
| "learning_rate": 2.329977607091652e-06, |
| "loss": 0.5128, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.1344435418359056, |
| "grad_norm": 0.37360321916937234, |
| "learning_rate": 2.323981744861162e-06, |
| "loss": 0.5181, |
| "step": 1751 |
| }, |
| { |
| "epoch": 2.1356620633631196, |
| "grad_norm": 0.4044550908338376, |
| "learning_rate": 2.317991270704167e-06, |
| "loss": 0.5197, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.136880584890333, |
| "grad_norm": 0.4067716384869161, |
| "learning_rate": 2.3120061966822915e-06, |
| "loss": 0.4899, |
| "step": 1753 |
| }, |
| { |
| "epoch": 2.1380991064175467, |
| "grad_norm": 0.4141293721178242, |
| "learning_rate": 2.3060265348462777e-06, |
| "loss": 0.5499, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.1393176279447603, |
| "grad_norm": 0.4005349519648887, |
| "learning_rate": 2.3000522972359803e-06, |
| "loss": 0.5395, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.140536149471974, |
| "grad_norm": 0.39248292555402464, |
| "learning_rate": 2.2940834958803228e-06, |
| "loss": 0.4931, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.1417546709991875, |
| "grad_norm": 0.38822332167948, |
| "learning_rate": 2.2881201427972894e-06, |
| "loss": 0.4722, |
| "step": 1757 |
| }, |
| { |
| "epoch": 2.1429731925264015, |
| "grad_norm": 0.38077200885472606, |
| "learning_rate": 2.282162249993895e-06, |
| "loss": 0.5326, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.144191714053615, |
| "grad_norm": 0.38524212575031547, |
| "learning_rate": 2.2762098294661556e-06, |
| "loss": 0.5109, |
| "step": 1759 |
| }, |
| { |
| "epoch": 2.1454102355808287, |
| "grad_norm": 0.40347299946589055, |
| "learning_rate": 2.27026289319907e-06, |
| "loss": 0.5579, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.1466287571080422, |
| "grad_norm": 0.3924233257276901, |
| "learning_rate": 2.264321453166598e-06, |
| "loss": 0.5165, |
| "step": 1761 |
| }, |
| { |
| "epoch": 2.147847278635256, |
| "grad_norm": 0.37575656828172116, |
| "learning_rate": 2.2583855213316326e-06, |
| "loss": 0.4895, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.1490658001624694, |
| "grad_norm": 0.3937169968667044, |
| "learning_rate": 2.2524551096459703e-06, |
| "loss": 0.53, |
| "step": 1763 |
| }, |
| { |
| "epoch": 2.150284321689683, |
| "grad_norm": 0.39457712101518455, |
| "learning_rate": 2.2465302300503012e-06, |
| "loss": 0.4689, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.151502843216897, |
| "grad_norm": 0.4393918889033505, |
| "learning_rate": 2.2406108944741696e-06, |
| "loss": 0.5178, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.1527213647441106, |
| "grad_norm": 0.4147988916944454, |
| "learning_rate": 2.234697114835963e-06, |
| "loss": 0.5385, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.153939886271324, |
| "grad_norm": 0.40920290551320604, |
| "learning_rate": 2.228788903042877e-06, |
| "loss": 0.5229, |
| "step": 1767 |
| }, |
| { |
| "epoch": 2.1551584077985377, |
| "grad_norm": 0.3841912028808192, |
| "learning_rate": 2.2228862709909e-06, |
| "loss": 0.4859, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.1563769293257513, |
| "grad_norm": 0.4017528120034774, |
| "learning_rate": 2.2169892305647865e-06, |
| "loss": 0.5067, |
| "step": 1769 |
| }, |
| { |
| "epoch": 2.157595450852965, |
| "grad_norm": 0.4106356542533839, |
| "learning_rate": 2.211097793638029e-06, |
| "loss": 0.511, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.158813972380179, |
| "grad_norm": 0.3922497877364856, |
| "learning_rate": 2.2052119720728375e-06, |
| "loss": 0.5213, |
| "step": 1771 |
| }, |
| { |
| "epoch": 2.1600324939073925, |
| "grad_norm": 0.41142355231120237, |
| "learning_rate": 2.1993317777201197e-06, |
| "loss": 0.55, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.161251015434606, |
| "grad_norm": 0.3793501040249683, |
| "learning_rate": 2.19345722241945e-06, |
| "loss": 0.4942, |
| "step": 1773 |
| }, |
| { |
| "epoch": 2.1624695369618196, |
| "grad_norm": 0.4181558266950597, |
| "learning_rate": 2.1875883179990515e-06, |
| "loss": 0.5179, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.1636880584890332, |
| "grad_norm": 0.4101546221225696, |
| "learning_rate": 2.1817250762757657e-06, |
| "loss": 0.4854, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.164906580016247, |
| "grad_norm": 0.40370317236247594, |
| "learning_rate": 2.175867509055033e-06, |
| "loss": 0.5675, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.166125101543461, |
| "grad_norm": 0.34161977537337374, |
| "learning_rate": 2.170015628130871e-06, |
| "loss": 0.4693, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.1673436230706744, |
| "grad_norm": 0.3929226138427561, |
| "learning_rate": 2.1641694452858486e-06, |
| "loss": 0.4932, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.168562144597888, |
| "grad_norm": 0.414139184233712, |
| "learning_rate": 2.158328972291056e-06, |
| "loss": 0.5428, |
| "step": 1779 |
| }, |
| { |
| "epoch": 2.1697806661251016, |
| "grad_norm": 0.4021702827253732, |
| "learning_rate": 2.1524942209060944e-06, |
| "loss": 0.553, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.170999187652315, |
| "grad_norm": 0.3914173100634304, |
| "learning_rate": 2.1466652028790384e-06, |
| "loss": 0.4846, |
| "step": 1781 |
| }, |
| { |
| "epoch": 2.1722177091795287, |
| "grad_norm": 0.4155952702393289, |
| "learning_rate": 2.1408419299464245e-06, |
| "loss": 0.5062, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.1734362307067423, |
| "grad_norm": 0.4029405381679447, |
| "learning_rate": 2.1350244138332143e-06, |
| "loss": 0.5543, |
| "step": 1783 |
| }, |
| { |
| "epoch": 2.1746547522339563, |
| "grad_norm": 0.3847467238608974, |
| "learning_rate": 2.1292126662527846e-06, |
| "loss": 0.4783, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.17587327376117, |
| "grad_norm": 0.3774561138941112, |
| "learning_rate": 2.1234066989068972e-06, |
| "loss": 0.5736, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.1770917952883835, |
| "grad_norm": 0.3791792062977483, |
| "learning_rate": 2.1176065234856725e-06, |
| "loss": 0.4782, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.178310316815597, |
| "grad_norm": 0.40849575227250023, |
| "learning_rate": 2.111812151667567e-06, |
| "loss": 0.498, |
| "step": 1787 |
| }, |
| { |
| "epoch": 2.1795288383428106, |
| "grad_norm": 0.36289578272484346, |
| "learning_rate": 2.106023595119358e-06, |
| "loss": 0.4866, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.180747359870024, |
| "grad_norm": 0.3869017558290611, |
| "learning_rate": 2.1002408654961124e-06, |
| "loss": 0.4643, |
| "step": 1789 |
| }, |
| { |
| "epoch": 2.181965881397238, |
| "grad_norm": 0.4954162333959639, |
| "learning_rate": 2.0944639744411627e-06, |
| "loss": 0.5415, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.183184402924452, |
| "grad_norm": 0.42167227063607843, |
| "learning_rate": 2.088692933586083e-06, |
| "loss": 0.5359, |
| "step": 1791 |
| }, |
| { |
| "epoch": 2.1844029244516654, |
| "grad_norm": 0.38236452945033045, |
| "learning_rate": 2.0829277545506736e-06, |
| "loss": 0.4971, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.185621445978879, |
| "grad_norm": 0.4285834277650781, |
| "learning_rate": 2.077168448942933e-06, |
| "loss": 0.5475, |
| "step": 1793 |
| }, |
| { |
| "epoch": 2.1868399675060926, |
| "grad_norm": 0.4023920796181869, |
| "learning_rate": 2.071415028359026e-06, |
| "loss": 0.4797, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.188058489033306, |
| "grad_norm": 0.4074540063011702, |
| "learning_rate": 2.065667504383276e-06, |
| "loss": 0.5254, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.1892770105605197, |
| "grad_norm": 0.39510353720495955, |
| "learning_rate": 2.0599258885881317e-06, |
| "loss": 0.4899, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.1904955320877337, |
| "grad_norm": 0.5548900318530214, |
| "learning_rate": 2.0541901925341446e-06, |
| "loss": 0.5198, |
| "step": 1797 |
| }, |
| { |
| "epoch": 2.1917140536149473, |
| "grad_norm": 0.3825302397550243, |
| "learning_rate": 2.0484604277699437e-06, |
| "loss": 0.5098, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.192932575142161, |
| "grad_norm": 0.40564085390755233, |
| "learning_rate": 2.042736605832222e-06, |
| "loss": 0.5323, |
| "step": 1799 |
| }, |
| { |
| "epoch": 2.1941510966693745, |
| "grad_norm": 0.39704056600422283, |
| "learning_rate": 2.037018738245707e-06, |
| "loss": 0.5108, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.195369618196588, |
| "grad_norm": 0.41600542688505693, |
| "learning_rate": 2.0313068365231303e-06, |
| "loss": 0.4978, |
| "step": 1801 |
| }, |
| { |
| "epoch": 2.1965881397238016, |
| "grad_norm": 0.39387757095797193, |
| "learning_rate": 2.0256009121652147e-06, |
| "loss": 0.5273, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.1978066612510156, |
| "grad_norm": 0.36242925936592724, |
| "learning_rate": 2.019900976660651e-06, |
| "loss": 0.4982, |
| "step": 1803 |
| }, |
| { |
| "epoch": 2.1990251827782292, |
| "grad_norm": 0.35509245865119315, |
| "learning_rate": 2.0142070414860704e-06, |
| "loss": 0.4878, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.200243704305443, |
| "grad_norm": 0.38952269804673, |
| "learning_rate": 2.0085191181060176e-06, |
| "loss": 0.5369, |
| "step": 1805 |
| }, |
| { |
| "epoch": 2.2014622258326564, |
| "grad_norm": 0.3866235545814812, |
| "learning_rate": 2.0028372179729405e-06, |
| "loss": 0.4802, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.20268074735987, |
| "grad_norm": 0.3922622961361272, |
| "learning_rate": 1.9971613525271523e-06, |
| "loss": 0.5284, |
| "step": 1807 |
| }, |
| { |
| "epoch": 2.2038992688870835, |
| "grad_norm": 0.38718886845940065, |
| "learning_rate": 1.9914915331968217e-06, |
| "loss": 0.4846, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.205117790414297, |
| "grad_norm": 0.39994907283167946, |
| "learning_rate": 1.985827771397938e-06, |
| "loss": 0.5433, |
| "step": 1809 |
| }, |
| { |
| "epoch": 2.206336311941511, |
| "grad_norm": 0.37905307147188894, |
| "learning_rate": 1.980170078534297e-06, |
| "loss": 0.5145, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.2075548334687247, |
| "grad_norm": 0.4192247244192786, |
| "learning_rate": 1.9745184659974764e-06, |
| "loss": 0.5118, |
| "step": 1811 |
| }, |
| { |
| "epoch": 2.2087733549959383, |
| "grad_norm": 0.3522566177407128, |
| "learning_rate": 1.9688729451668116e-06, |
| "loss": 0.4751, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.209991876523152, |
| "grad_norm": 0.3772830661699162, |
| "learning_rate": 1.9632335274093645e-06, |
| "loss": 0.4859, |
| "step": 1813 |
| }, |
| { |
| "epoch": 2.2112103980503655, |
| "grad_norm": 0.41564321254010056, |
| "learning_rate": 1.957600224079917e-06, |
| "loss": 0.5474, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.212428919577579, |
| "grad_norm": 0.40463784380961515, |
| "learning_rate": 1.9519730465209384e-06, |
| "loss": 0.5135, |
| "step": 1815 |
| }, |
| { |
| "epoch": 2.213647441104793, |
| "grad_norm": 0.42048597280314337, |
| "learning_rate": 1.9463520060625647e-06, |
| "loss": 0.51, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.2148659626320066, |
| "grad_norm": 0.4057972852875916, |
| "learning_rate": 1.940737114022572e-06, |
| "loss": 0.5291, |
| "step": 1817 |
| }, |
| { |
| "epoch": 2.21608448415922, |
| "grad_norm": 0.3860966909234876, |
| "learning_rate": 1.935128381706355e-06, |
| "loss": 0.4638, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.217303005686434, |
| "grad_norm": 0.3992185398314597, |
| "learning_rate": 1.9295258204069116e-06, |
| "loss": 0.4846, |
| "step": 1819 |
| }, |
| { |
| "epoch": 2.2185215272136474, |
| "grad_norm": 0.4414225404994573, |
| "learning_rate": 1.9239294414048143e-06, |
| "loss": 0.5729, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.219740048740861, |
| "grad_norm": 0.38820997327095225, |
| "learning_rate": 1.9183392559681812e-06, |
| "loss": 0.4883, |
| "step": 1821 |
| }, |
| { |
| "epoch": 2.2209585702680745, |
| "grad_norm": 0.3830961932249294, |
| "learning_rate": 1.9127552753526683e-06, |
| "loss": 0.4959, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.2221770917952886, |
| "grad_norm": 0.40371127197935186, |
| "learning_rate": 1.907177510801431e-06, |
| "loss": 0.5322, |
| "step": 1823 |
| }, |
| { |
| "epoch": 2.223395613322502, |
| "grad_norm": 0.4422584980389884, |
| "learning_rate": 1.901605973545116e-06, |
| "loss": 0.544, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.2246141348497157, |
| "grad_norm": 0.3671460120788879, |
| "learning_rate": 1.8960406748018229e-06, |
| "loss": 0.447, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.2258326563769293, |
| "grad_norm": 0.41787679838261416, |
| "learning_rate": 1.8904816257770976e-06, |
| "loss": 0.4837, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.227051177904143, |
| "grad_norm": 0.40739753945636065, |
| "learning_rate": 1.884928837663902e-06, |
| "loss": 0.5215, |
| "step": 1827 |
| }, |
| { |
| "epoch": 2.2282696994313564, |
| "grad_norm": 0.418750898184457, |
| "learning_rate": 1.8793823216425872e-06, |
| "loss": 0.5042, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.2294882209585705, |
| "grad_norm": 0.42352381285835144, |
| "learning_rate": 1.8738420888808767e-06, |
| "loss": 0.5266, |
| "step": 1829 |
| }, |
| { |
| "epoch": 2.230706742485784, |
| "grad_norm": 0.3766712142861196, |
| "learning_rate": 1.8683081505338468e-06, |
| "loss": 0.4898, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.2319252640129976, |
| "grad_norm": 0.3646915065525184, |
| "learning_rate": 1.8627805177438984e-06, |
| "loss": 0.5102, |
| "step": 1831 |
| }, |
| { |
| "epoch": 2.233143785540211, |
| "grad_norm": 0.3728256406154505, |
| "learning_rate": 1.8572592016407337e-06, |
| "loss": 0.5124, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.234362307067425, |
| "grad_norm": 0.4171588204608096, |
| "learning_rate": 1.8517442133413405e-06, |
| "loss": 0.543, |
| "step": 1833 |
| }, |
| { |
| "epoch": 2.2355808285946384, |
| "grad_norm": 0.38601375413922817, |
| "learning_rate": 1.8462355639499614e-06, |
| "loss": 0.4802, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.236799350121852, |
| "grad_norm": 0.41191751363191975, |
| "learning_rate": 1.8407332645580805e-06, |
| "loss": 0.498, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.238017871649066, |
| "grad_norm": 0.36858808021561745, |
| "learning_rate": 1.8352373262443918e-06, |
| "loss": 0.5308, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.2392363931762795, |
| "grad_norm": 0.39319359572416623, |
| "learning_rate": 1.8297477600747854e-06, |
| "loss": 0.5147, |
| "step": 1837 |
| }, |
| { |
| "epoch": 2.240454914703493, |
| "grad_norm": 0.4146287760930502, |
| "learning_rate": 1.8242645771023205e-06, |
| "loss": 0.4951, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.2416734362307067, |
| "grad_norm": 0.42074375031523503, |
| "learning_rate": 1.8187877883672024e-06, |
| "loss": 0.5238, |
| "step": 1839 |
| }, |
| { |
| "epoch": 2.2428919577579203, |
| "grad_norm": 0.3933564686288827, |
| "learning_rate": 1.81331740489676e-06, |
| "loss": 0.5319, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.244110479285134, |
| "grad_norm": 0.39931640514560685, |
| "learning_rate": 1.8078534377054303e-06, |
| "loss": 0.4921, |
| "step": 1841 |
| }, |
| { |
| "epoch": 2.245329000812348, |
| "grad_norm": 0.43198490048399485, |
| "learning_rate": 1.8023958977947303e-06, |
| "loss": 0.55, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.2465475223395615, |
| "grad_norm": 0.4043002892149948, |
| "learning_rate": 1.7969447961532333e-06, |
| "loss": 0.4992, |
| "step": 1843 |
| }, |
| { |
| "epoch": 2.247766043866775, |
| "grad_norm": 0.4118718760088592, |
| "learning_rate": 1.7915001437565481e-06, |
| "loss": 0.4981, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.2489845653939886, |
| "grad_norm": 0.4099852822242898, |
| "learning_rate": 1.7860619515673034e-06, |
| "loss": 0.5036, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.250203086921202, |
| "grad_norm": 0.4061790060023943, |
| "learning_rate": 1.7806302305351191e-06, |
| "loss": 0.518, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.2514216084484158, |
| "grad_norm": 0.38920893319388733, |
| "learning_rate": 1.7752049915965807e-06, |
| "loss": 0.5347, |
| "step": 1847 |
| }, |
| { |
| "epoch": 2.25264012997563, |
| "grad_norm": 0.378445552638885, |
| "learning_rate": 1.7697862456752273e-06, |
| "loss": 0.4489, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.2538586515028434, |
| "grad_norm": 0.43624250871726583, |
| "learning_rate": 1.764374003681526e-06, |
| "loss": 0.5076, |
| "step": 1849 |
| }, |
| { |
| "epoch": 2.255077173030057, |
| "grad_norm": 0.4301972293121319, |
| "learning_rate": 1.7589682765128424e-06, |
| "loss": 0.5106, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.2562956945572705, |
| "grad_norm": 0.4206525298070121, |
| "learning_rate": 1.7535690750534268e-06, |
| "loss": 0.5224, |
| "step": 1851 |
| }, |
| { |
| "epoch": 2.257514216084484, |
| "grad_norm": 0.3846425486629517, |
| "learning_rate": 1.7481764101743925e-06, |
| "loss": 0.4962, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.2587327376116977, |
| "grad_norm": 0.3994656170665921, |
| "learning_rate": 1.7427902927336932e-06, |
| "loss": 0.5142, |
| "step": 1853 |
| }, |
| { |
| "epoch": 2.2599512591389113, |
| "grad_norm": 0.4156779226062884, |
| "learning_rate": 1.7374107335760937e-06, |
| "loss": 0.5224, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.2611697806661253, |
| "grad_norm": 0.44546616602212363, |
| "learning_rate": 1.732037743533156e-06, |
| "loss": 0.49, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.262388302193339, |
| "grad_norm": 0.41287367815029863, |
| "learning_rate": 1.7266713334232177e-06, |
| "loss": 0.5125, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.2636068237205524, |
| "grad_norm": 0.4171809094817276, |
| "learning_rate": 1.7213115140513687e-06, |
| "loss": 0.4866, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.264825345247766, |
| "grad_norm": 0.4055921589949916, |
| "learning_rate": 1.7159582962094224e-06, |
| "loss": 0.5221, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.2660438667749796, |
| "grad_norm": 0.37574794466013317, |
| "learning_rate": 1.710611690675908e-06, |
| "loss": 0.5475, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.267262388302193, |
| "grad_norm": 0.3923703686975919, |
| "learning_rate": 1.7052717082160348e-06, |
| "loss": 0.502, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.2684809098294068, |
| "grad_norm": 0.4104360688359129, |
| "learning_rate": 1.6999383595816816e-06, |
| "loss": 0.4915, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.2696994313566208, |
| "grad_norm": 0.42448276721497596, |
| "learning_rate": 1.694611655511365e-06, |
| "loss": 0.5187, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.2709179528838344, |
| "grad_norm": 0.4182208678556554, |
| "learning_rate": 1.6892916067302279e-06, |
| "loss": 0.5431, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.272136474411048, |
| "grad_norm": 0.3809524496490651, |
| "learning_rate": 1.6839782239500114e-06, |
| "loss": 0.4962, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.2733549959382615, |
| "grad_norm": 0.4044653821113984, |
| "learning_rate": 1.6786715178690372e-06, |
| "loss": 0.5455, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.274573517465475, |
| "grad_norm": 0.4017956989739632, |
| "learning_rate": 1.6733714991721738e-06, |
| "loss": 0.5124, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.275792038992689, |
| "grad_norm": 0.3948697674235281, |
| "learning_rate": 1.668078178530837e-06, |
| "loss": 0.5121, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.2770105605199027, |
| "grad_norm": 0.406458521824727, |
| "learning_rate": 1.6627915666029503e-06, |
| "loss": 0.5111, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.2782290820471163, |
| "grad_norm": 0.3778951735019038, |
| "learning_rate": 1.6575116740329316e-06, |
| "loss": 0.4983, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.27944760357433, |
| "grad_norm": 0.3577602501518659, |
| "learning_rate": 1.6522385114516681e-06, |
| "loss": 0.4748, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.2806661251015434, |
| "grad_norm": 0.39147597379268634, |
| "learning_rate": 1.6469720894764945e-06, |
| "loss": 0.5167, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.281884646628757, |
| "grad_norm": 0.41526652630800426, |
| "learning_rate": 1.6417124187111778e-06, |
| "loss": 0.4856, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.2831031681559706, |
| "grad_norm": 0.46476506582341715, |
| "learning_rate": 1.6364595097458901e-06, |
| "loss": 0.5541, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.2843216896831846, |
| "grad_norm": 0.4413380147809054, |
| "learning_rate": 1.6312133731571867e-06, |
| "loss": 0.5681, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.285540211210398, |
| "grad_norm": 0.41316580395580427, |
| "learning_rate": 1.6259740195079903e-06, |
| "loss": 0.4902, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.2867587327376118, |
| "grad_norm": 0.375223833084769, |
| "learning_rate": 1.6207414593475634e-06, |
| "loss": 0.5059, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.2879772542648253, |
| "grad_norm": 0.4191217491734361, |
| "learning_rate": 1.6155157032114926e-06, |
| "loss": 0.4903, |
| "step": 1877 |
| }, |
| { |
| "epoch": 2.289195775792039, |
| "grad_norm": 0.4104399365050538, |
| "learning_rate": 1.610296761621662e-06, |
| "loss": 0.4978, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.2904142973192525, |
| "grad_norm": 0.452132557586679, |
| "learning_rate": 1.6050846450862368e-06, |
| "loss": 0.5529, |
| "step": 1879 |
| }, |
| { |
| "epoch": 2.291632818846466, |
| "grad_norm": 0.38189023256593707, |
| "learning_rate": 1.5998793640996418e-06, |
| "loss": 0.4534, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.29285134037368, |
| "grad_norm": 0.4105896104811722, |
| "learning_rate": 1.5946809291425352e-06, |
| "loss": 0.5157, |
| "step": 1881 |
| }, |
| { |
| "epoch": 2.2940698619008937, |
| "grad_norm": 0.39415858749113886, |
| "learning_rate": 1.589489350681791e-06, |
| "loss": 0.504, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.2952883834281073, |
| "grad_norm": 0.35980183287414685, |
| "learning_rate": 1.5843046391704802e-06, |
| "loss": 0.5077, |
| "step": 1883 |
| }, |
| { |
| "epoch": 2.296506904955321, |
| "grad_norm": 0.38396469649464077, |
| "learning_rate": 1.5791268050478487e-06, |
| "loss": 0.5051, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.2977254264825344, |
| "grad_norm": 0.3821275005707828, |
| "learning_rate": 1.573955858739289e-06, |
| "loss": 0.5345, |
| "step": 1885 |
| }, |
| { |
| "epoch": 2.298943948009748, |
| "grad_norm": 0.39018524126022086, |
| "learning_rate": 1.5687918106563326e-06, |
| "loss": 0.4713, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.3001624695369616, |
| "grad_norm": 0.4269492782915114, |
| "learning_rate": 1.5636346711966154e-06, |
| "loss": 0.5396, |
| "step": 1887 |
| }, |
| { |
| "epoch": 2.3013809910641756, |
| "grad_norm": 0.40024049549443624, |
| "learning_rate": 1.5584844507438678e-06, |
| "loss": 0.5119, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.302599512591389, |
| "grad_norm": 0.3769373466661974, |
| "learning_rate": 1.5533411596678843e-06, |
| "loss": 0.4858, |
| "step": 1889 |
| }, |
| { |
| "epoch": 2.3038180341186028, |
| "grad_norm": 0.40957300518215145, |
| "learning_rate": 1.5482048083245116e-06, |
| "loss": 0.5299, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.3050365556458163, |
| "grad_norm": 0.3887283184505036, |
| "learning_rate": 1.543075407055623e-06, |
| "loss": 0.5276, |
| "step": 1891 |
| }, |
| { |
| "epoch": 2.30625507717303, |
| "grad_norm": 0.3944358213197462, |
| "learning_rate": 1.5379529661890956e-06, |
| "loss": 0.512, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.307473598700244, |
| "grad_norm": 0.38335955851758596, |
| "learning_rate": 1.532837496038792e-06, |
| "loss": 0.4802, |
| "step": 1893 |
| }, |
| { |
| "epoch": 2.3086921202274575, |
| "grad_norm": 0.40294432984687933, |
| "learning_rate": 1.5277290069045414e-06, |
| "loss": 0.5171, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.309910641754671, |
| "grad_norm": 0.39774386298303005, |
| "learning_rate": 1.5226275090721183e-06, |
| "loss": 0.4993, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.3111291632818847, |
| "grad_norm": 0.4200593782198228, |
| "learning_rate": 1.517533012813217e-06, |
| "loss": 0.5606, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.3123476848090982, |
| "grad_norm": 0.4204431416124692, |
| "learning_rate": 1.512445528385434e-06, |
| "loss": 0.5538, |
| "step": 1897 |
| }, |
| { |
| "epoch": 2.313566206336312, |
| "grad_norm": 0.33671871586550156, |
| "learning_rate": 1.5073650660322509e-06, |
| "loss": 0.4575, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.3147847278635254, |
| "grad_norm": 0.42276990891040056, |
| "learning_rate": 1.5022916359830114e-06, |
| "loss": 0.5744, |
| "step": 1899 |
| }, |
| { |
| "epoch": 2.3160032493907394, |
| "grad_norm": 0.35916851603197664, |
| "learning_rate": 1.4972252484528938e-06, |
| "loss": 0.4721, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.317221770917953, |
| "grad_norm": 0.40308654798116644, |
| "learning_rate": 1.4921659136429022e-06, |
| "loss": 0.5283, |
| "step": 1901 |
| }, |
| { |
| "epoch": 2.3184402924451666, |
| "grad_norm": 0.36536184604215355, |
| "learning_rate": 1.4871136417398407e-06, |
| "loss": 0.4748, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.31965881397238, |
| "grad_norm": 0.42296472427505094, |
| "learning_rate": 1.4820684429162879e-06, |
| "loss": 0.6, |
| "step": 1903 |
| }, |
| { |
| "epoch": 2.3208773354995937, |
| "grad_norm": 0.3598564220383708, |
| "learning_rate": 1.477030327330582e-06, |
| "loss": 0.4422, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.3220958570268073, |
| "grad_norm": 0.3948776090594206, |
| "learning_rate": 1.4719993051268023e-06, |
| "loss": 0.5343, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.323314378554021, |
| "grad_norm": 0.4028197151468667, |
| "learning_rate": 1.466975386434744e-06, |
| "loss": 0.5355, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.324532900081235, |
| "grad_norm": 0.3958394776231179, |
| "learning_rate": 1.4619585813699032e-06, |
| "loss": 0.5119, |
| "step": 1907 |
| }, |
| { |
| "epoch": 2.3257514216084485, |
| "grad_norm": 0.3640898960169131, |
| "learning_rate": 1.4569489000334435e-06, |
| "loss": 0.4749, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.326969943135662, |
| "grad_norm": 0.42215058629769103, |
| "learning_rate": 1.4519463525121934e-06, |
| "loss": 0.5157, |
| "step": 1909 |
| }, |
| { |
| "epoch": 2.3281884646628757, |
| "grad_norm": 0.38263832424003186, |
| "learning_rate": 1.4469509488786165e-06, |
| "loss": 0.509, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.3294069861900892, |
| "grad_norm": 0.41466783476778923, |
| "learning_rate": 1.4419626991907925e-06, |
| "loss": 0.5222, |
| "step": 1911 |
| }, |
| { |
| "epoch": 2.330625507717303, |
| "grad_norm": 0.3818440846598212, |
| "learning_rate": 1.436981613492394e-06, |
| "loss": 0.5153, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.331844029244517, |
| "grad_norm": 0.3744448281484142, |
| "learning_rate": 1.4320077018126704e-06, |
| "loss": 0.4932, |
| "step": 1913 |
| }, |
| { |
| "epoch": 2.3330625507717304, |
| "grad_norm": 0.35650000061694337, |
| "learning_rate": 1.427040974166427e-06, |
| "loss": 0.4711, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.334281072298944, |
| "grad_norm": 0.44375949024201433, |
| "learning_rate": 1.4220814405540067e-06, |
| "loss": 0.6081, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.3354995938261576, |
| "grad_norm": 0.3632417016214969, |
| "learning_rate": 1.4171291109612618e-06, |
| "loss": 0.4439, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.336718115353371, |
| "grad_norm": 0.391995578036673, |
| "learning_rate": 1.412183995359544e-06, |
| "loss": 0.5208, |
| "step": 1917 |
| }, |
| { |
| "epoch": 2.3379366368805847, |
| "grad_norm": 0.4106831300879619, |
| "learning_rate": 1.4072461037056806e-06, |
| "loss": 0.5185, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.3391551584077988, |
| "grad_norm": 0.37156715583934985, |
| "learning_rate": 1.4023154459419497e-06, |
| "loss": 0.492, |
| "step": 1919 |
| }, |
| { |
| "epoch": 2.3403736799350123, |
| "grad_norm": 0.4194748560314305, |
| "learning_rate": 1.3973920319960654e-06, |
| "loss": 0.5387, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.341592201462226, |
| "grad_norm": 0.4089237007134858, |
| "learning_rate": 1.3924758717811582e-06, |
| "loss": 0.5258, |
| "step": 1921 |
| }, |
| { |
| "epoch": 2.3428107229894395, |
| "grad_norm": 0.3694012500954815, |
| "learning_rate": 1.3875669751957548e-06, |
| "loss": 0.4604, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.344029244516653, |
| "grad_norm": 0.4054490233784107, |
| "learning_rate": 1.3826653521237526e-06, |
| "loss": 0.5113, |
| "step": 1923 |
| }, |
| { |
| "epoch": 2.3452477660438666, |
| "grad_norm": 0.39405086162290015, |
| "learning_rate": 1.3777710124344058e-06, |
| "loss": 0.5753, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.3464662875710802, |
| "grad_norm": 0.3708763755034275, |
| "learning_rate": 1.3728839659823045e-06, |
| "loss": 0.5154, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.3476848090982942, |
| "grad_norm": 0.3772580609058518, |
| "learning_rate": 1.3680042226073554e-06, |
| "loss": 0.4871, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.348903330625508, |
| "grad_norm": 0.39403743871876235, |
| "learning_rate": 1.3631317921347564e-06, |
| "loss": 0.5306, |
| "step": 1927 |
| }, |
| { |
| "epoch": 2.3501218521527214, |
| "grad_norm": 0.3873906953232042, |
| "learning_rate": 1.358266684374987e-06, |
| "loss": 0.5123, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.351340373679935, |
| "grad_norm": 0.37942846145056086, |
| "learning_rate": 1.3534089091237757e-06, |
| "loss": 0.5054, |
| "step": 1929 |
| }, |
| { |
| "epoch": 2.3525588952071486, |
| "grad_norm": 0.3607773079027305, |
| "learning_rate": 1.348558476162094e-06, |
| "loss": 0.481, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.353777416734362, |
| "grad_norm": 0.4005225791492384, |
| "learning_rate": 1.343715395256124e-06, |
| "loss": 0.5331, |
| "step": 1931 |
| }, |
| { |
| "epoch": 2.3549959382615757, |
| "grad_norm": 0.36782048392498773, |
| "learning_rate": 1.3388796761572493e-06, |
| "loss": 0.4872, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.3562144597887897, |
| "grad_norm": 0.38694592195175675, |
| "learning_rate": 1.3340513286020307e-06, |
| "loss": 0.5245, |
| "step": 1933 |
| }, |
| { |
| "epoch": 2.3574329813160033, |
| "grad_norm": 0.3951746347940695, |
| "learning_rate": 1.3292303623121828e-06, |
| "loss": 0.5296, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.358651502843217, |
| "grad_norm": 0.4201044387513612, |
| "learning_rate": 1.324416786994559e-06, |
| "loss": 0.5284, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.3598700243704305, |
| "grad_norm": 0.4088086029813002, |
| "learning_rate": 1.3196106123411345e-06, |
| "loss": 0.5212, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.361088545897644, |
| "grad_norm": 0.38621048916855116, |
| "learning_rate": 1.3148118480289834e-06, |
| "loss": 0.5078, |
| "step": 1937 |
| }, |
| { |
| "epoch": 2.362307067424858, |
| "grad_norm": 0.38423444639816645, |
| "learning_rate": 1.310020503720254e-06, |
| "loss": 0.5363, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.3635255889520717, |
| "grad_norm": 0.3807864286378039, |
| "learning_rate": 1.3052365890621615e-06, |
| "loss": 0.5349, |
| "step": 1939 |
| }, |
| { |
| "epoch": 2.3647441104792852, |
| "grad_norm": 0.3969153122233334, |
| "learning_rate": 1.3004601136869555e-06, |
| "loss": 0.5245, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.365962632006499, |
| "grad_norm": 0.3706070359987908, |
| "learning_rate": 1.295691087211912e-06, |
| "loss": 0.4639, |
| "step": 1941 |
| }, |
| { |
| "epoch": 2.3671811535337124, |
| "grad_norm": 0.4076566869065765, |
| "learning_rate": 1.2909295192393057e-06, |
| "loss": 0.5623, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.368399675060926, |
| "grad_norm": 0.3609321931094141, |
| "learning_rate": 1.2861754193563948e-06, |
| "loss": 0.4532, |
| "step": 1943 |
| }, |
| { |
| "epoch": 2.3696181965881395, |
| "grad_norm": 0.3775351942907534, |
| "learning_rate": 1.2814287971354023e-06, |
| "loss": 0.5515, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.3708367181153536, |
| "grad_norm": 0.37390993961577534, |
| "learning_rate": 1.2766896621334928e-06, |
| "loss": 0.5097, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.372055239642567, |
| "grad_norm": 0.37384134850125694, |
| "learning_rate": 1.2719580238927553e-06, |
| "loss": 0.5557, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.3732737611697807, |
| "grad_norm": 0.3775237757952146, |
| "learning_rate": 1.2672338919401866e-06, |
| "loss": 0.5197, |
| "step": 1947 |
| }, |
| { |
| "epoch": 2.3744922826969943, |
| "grad_norm": 0.3845839355688612, |
| "learning_rate": 1.2625172757876691e-06, |
| "loss": 0.5175, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.375710804224208, |
| "grad_norm": 0.3930000499088082, |
| "learning_rate": 1.2578081849319547e-06, |
| "loss": 0.4908, |
| "step": 1949 |
| }, |
| { |
| "epoch": 2.3769293257514215, |
| "grad_norm": 0.3679733160971826, |
| "learning_rate": 1.253106628854635e-06, |
| "loss": 0.4807, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.378147847278635, |
| "grad_norm": 0.41138237630333274, |
| "learning_rate": 1.2484126170221388e-06, |
| "loss": 0.5494, |
| "step": 1951 |
| }, |
| { |
| "epoch": 2.379366368805849, |
| "grad_norm": 0.3644013554869518, |
| "learning_rate": 1.2437261588857037e-06, |
| "loss": 0.4715, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.3805848903330626, |
| "grad_norm": 0.3754357671998505, |
| "learning_rate": 1.2390472638813572e-06, |
| "loss": 0.5106, |
| "step": 1953 |
| }, |
| { |
| "epoch": 2.381803411860276, |
| "grad_norm": 0.4210050485232648, |
| "learning_rate": 1.2343759414298955e-06, |
| "loss": 0.5755, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.38302193338749, |
| "grad_norm": 0.3648248351254956, |
| "learning_rate": 1.229712200936874e-06, |
| "loss": 0.4928, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.3842404549147034, |
| "grad_norm": 0.34960648428885643, |
| "learning_rate": 1.2250560517925747e-06, |
| "loss": 0.4643, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.385458976441917, |
| "grad_norm": 0.3790545819611439, |
| "learning_rate": 1.2204075033720025e-06, |
| "loss": 0.4949, |
| "step": 1957 |
| }, |
| { |
| "epoch": 2.386677497969131, |
| "grad_norm": 0.3711764526859297, |
| "learning_rate": 1.2157665650348516e-06, |
| "loss": 0.4838, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.3878960194963446, |
| "grad_norm": 0.4040275413347584, |
| "learning_rate": 1.211133246125497e-06, |
| "loss": 0.5255, |
| "step": 1959 |
| }, |
| { |
| "epoch": 2.389114541023558, |
| "grad_norm": 0.40676437477424116, |
| "learning_rate": 1.2065075559729749e-06, |
| "loss": 0.5417, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.3903330625507717, |
| "grad_norm": 0.3828497235073974, |
| "learning_rate": 1.201889503890955e-06, |
| "loss": 0.5003, |
| "step": 1961 |
| }, |
| { |
| "epoch": 2.3915515840779853, |
| "grad_norm": 0.40024563629448995, |
| "learning_rate": 1.197279099177731e-06, |
| "loss": 0.5627, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.392770105605199, |
| "grad_norm": 0.3508330713937016, |
| "learning_rate": 1.1926763511161993e-06, |
| "loss": 0.4607, |
| "step": 1963 |
| }, |
| { |
| "epoch": 2.393988627132413, |
| "grad_norm": 0.4280688001820654, |
| "learning_rate": 1.188081268973842e-06, |
| "loss": 0.5389, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.3952071486596265, |
| "grad_norm": 0.3853051982812674, |
| "learning_rate": 1.183493862002702e-06, |
| "loss": 0.4576, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.39642567018684, |
| "grad_norm": 0.4045347572603379, |
| "learning_rate": 1.1789141394393683e-06, |
| "loss": 0.5698, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.3976441917140536, |
| "grad_norm": 0.3983176516664499, |
| "learning_rate": 1.1743421105049612e-06, |
| "loss": 0.4725, |
| "step": 1967 |
| }, |
| { |
| "epoch": 2.398862713241267, |
| "grad_norm": 0.39056153824479495, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.5365, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.400081234768481, |
| "grad_norm": 0.38372959754674263, |
| "learning_rate": 1.165221170329931e-06, |
| "loss": 0.5051, |
| "step": 1969 |
| }, |
| { |
| "epoch": 2.4012997562956944, |
| "grad_norm": 0.3938860376877866, |
| "learning_rate": 1.1606722774540146e-06, |
| "loss": 0.4948, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.4025182778229084, |
| "grad_norm": 0.38806865450446354, |
| "learning_rate": 1.1561311149364075e-06, |
| "loss": 0.5132, |
| "step": 1971 |
| }, |
| { |
| "epoch": 2.403736799350122, |
| "grad_norm": 0.41405852900105905, |
| "learning_rate": 1.1515976919205869e-06, |
| "loss": 0.5287, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.4049553208773355, |
| "grad_norm": 0.41486030319490325, |
| "learning_rate": 1.1470720175344473e-06, |
| "loss": 0.4826, |
| "step": 1973 |
| }, |
| { |
| "epoch": 2.406173842404549, |
| "grad_norm": 0.4073479693226464, |
| "learning_rate": 1.1425541008902852e-06, |
| "loss": 0.5061, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.4073923639317627, |
| "grad_norm": 0.3698628863486537, |
| "learning_rate": 1.1380439510847757e-06, |
| "loss": 0.4822, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.4086108854589763, |
| "grad_norm": 0.40735700660846696, |
| "learning_rate": 1.1335415771989538e-06, |
| "loss": 0.5198, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.40982940698619, |
| "grad_norm": 0.39248680656683244, |
| "learning_rate": 1.1290469882981987e-06, |
| "loss": 0.5513, |
| "step": 1977 |
| }, |
| { |
| "epoch": 2.411047928513404, |
| "grad_norm": 0.36602544238604245, |
| "learning_rate": 1.1245601934322148e-06, |
| "loss": 0.5042, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.4122664500406175, |
| "grad_norm": 0.3765733599147946, |
| "learning_rate": 1.1200812016350172e-06, |
| "loss": 0.5031, |
| "step": 1979 |
| }, |
| { |
| "epoch": 2.413484971567831, |
| "grad_norm": 0.34319770025526913, |
| "learning_rate": 1.1156100219249022e-06, |
| "loss": 0.5049, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.4147034930950446, |
| "grad_norm": 0.4344345273336274, |
| "learning_rate": 1.1111466633044448e-06, |
| "loss": 0.6097, |
| "step": 1981 |
| }, |
| { |
| "epoch": 2.415922014622258, |
| "grad_norm": 0.4011208632543373, |
| "learning_rate": 1.1066911347604653e-06, |
| "loss": 0.4408, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.417140536149472, |
| "grad_norm": 0.3602299607841865, |
| "learning_rate": 1.1022434452640252e-06, |
| "loss": 0.4878, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.418359057676686, |
| "grad_norm": 0.3938174382324399, |
| "learning_rate": 1.0978036037703955e-06, |
| "loss": 0.5246, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.4195775792038994, |
| "grad_norm": 0.40627392150384173, |
| "learning_rate": 1.0933716192190502e-06, |
| "loss": 0.5191, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.420796100731113, |
| "grad_norm": 0.3960581508676551, |
| "learning_rate": 1.0889475005336447e-06, |
| "loss": 0.4755, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.4220146222583265, |
| "grad_norm": 0.3909127795418108, |
| "learning_rate": 1.0845312566219924e-06, |
| "loss": 0.5128, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.42323314378554, |
| "grad_norm": 0.39827799729251556, |
| "learning_rate": 1.0801228963760518e-06, |
| "loss": 0.5425, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.4244516653127537, |
| "grad_norm": 0.37511159704629343, |
| "learning_rate": 1.075722428671911e-06, |
| "loss": 0.4761, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.4256701868399677, |
| "grad_norm": 0.3982398641015277, |
| "learning_rate": 1.0713298623697654e-06, |
| "loss": 0.5386, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.4268887083671813, |
| "grad_norm": 0.3947556572315106, |
| "learning_rate": 1.0669452063138992e-06, |
| "loss": 0.4842, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.428107229894395, |
| "grad_norm": 0.40576214681574757, |
| "learning_rate": 1.0625684693326727e-06, |
| "loss": 0.5423, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.4293257514216084, |
| "grad_norm": 0.40693906455637163, |
| "learning_rate": 1.0581996602384975e-06, |
| "loss": 0.5159, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.430544272948822, |
| "grad_norm": 0.3488770060857356, |
| "learning_rate": 1.0538387878278283e-06, |
| "loss": 0.5187, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.4317627944760356, |
| "grad_norm": 0.4158444319209436, |
| "learning_rate": 1.0494858608811326e-06, |
| "loss": 0.5313, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.432981316003249, |
| "grad_norm": 0.4169970056929273, |
| "learning_rate": 1.0451408881628855e-06, |
| "loss": 0.4866, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.434199837530463, |
| "grad_norm": 0.3823144671122315, |
| "learning_rate": 1.0408038784215462e-06, |
| "loss": 0.4871, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.435418359057677, |
| "grad_norm": 0.38435379244248535, |
| "learning_rate": 1.0364748403895368e-06, |
| "loss": 0.5276, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.4366368805848904, |
| "grad_norm": 0.39978814004238356, |
| "learning_rate": 1.0321537827832311e-06, |
| "loss": 0.5374, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.437855402112104, |
| "grad_norm": 0.3858037469760484, |
| "learning_rate": 1.0278407143029346e-06, |
| "loss": 0.4967, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.4390739236393175, |
| "grad_norm": 0.36427416648269983, |
| "learning_rate": 1.0235356436328675e-06, |
| "loss": 0.5147, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.440292445166531, |
| "grad_norm": 0.42530639031865014, |
| "learning_rate": 1.019238579441148e-06, |
| "loss": 0.4949, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.4415109666937447, |
| "grad_norm": 0.4047119130435624, |
| "learning_rate": 1.014949530379767e-06, |
| "loss": 0.491, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.4427294882209587, |
| "grad_norm": 0.38798880943669517, |
| "learning_rate": 1.0106685050845838e-06, |
| "loss": 0.5433, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.4439480097481723, |
| "grad_norm": 0.4009909590310544, |
| "learning_rate": 1.0063955121752999e-06, |
| "loss": 0.5113, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.445166531275386, |
| "grad_norm": 0.35796810794196016, |
| "learning_rate": 1.0021305602554459e-06, |
| "loss": 0.5113, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.4463850528025994, |
| "grad_norm": 0.3755800564081513, |
| "learning_rate": 9.978736579123577e-07, |
| "loss": 0.5004, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.447603574329813, |
| "grad_norm": 0.3683007765287416, |
| "learning_rate": 9.936248137171684e-07, |
| "loss": 0.4974, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.448822095857027, |
| "grad_norm": 0.3952937239349372, |
| "learning_rate": 9.893840362247809e-07, |
| "loss": 0.4971, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.4500406173842406, |
| "grad_norm": 0.4284741934979282, |
| "learning_rate": 9.851513339738627e-07, |
| "loss": 0.561, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.451259138911454, |
| "grad_norm": 0.4056720067131079, |
| "learning_rate": 9.809267154868163e-07, |
| "loss": 0.5179, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.4524776604386678, |
| "grad_norm": 0.35255993479863745, |
| "learning_rate": 9.7671018926977e-07, |
| "loss": 0.4424, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.4536961819658814, |
| "grad_norm": 0.40547305130915784, |
| "learning_rate": 9.725017638125612e-07, |
| "loss": 0.5524, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.454914703493095, |
| "grad_norm": 0.35656299866798635, |
| "learning_rate": 9.683014475887126e-07, |
| "loss": 0.4676, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.4561332250203085, |
| "grad_norm": 0.37086670515583137, |
| "learning_rate": 9.641092490554195e-07, |
| "loss": 0.5398, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.4573517465475225, |
| "grad_norm": 0.3791246421084454, |
| "learning_rate": 9.599251766535344e-07, |
| "loss": 0.4933, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.458570268074736, |
| "grad_norm": 0.40912338906986023, |
| "learning_rate": 9.5574923880755e-07, |
| "loss": 0.562, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.4597887896019497, |
| "grad_norm": 0.41181426146954847, |
| "learning_rate": 9.51581443925576e-07, |
| "loss": 0.4892, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.4610073111291633, |
| "grad_norm": 0.4026513287049664, |
| "learning_rate": 9.474218003993275e-07, |
| "loss": 0.5278, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.462225832656377, |
| "grad_norm": 0.38613963899490084, |
| "learning_rate": 9.432703166041085e-07, |
| "loss": 0.4996, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.4634443541835904, |
| "grad_norm": 0.38482050851065364, |
| "learning_rate": 9.391270008987946e-07, |
| "loss": 0.5189, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.464662875710804, |
| "grad_norm": 0.39758696168412205, |
| "learning_rate": 9.349918616258113e-07, |
| "loss": 0.5078, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.465881397238018, |
| "grad_norm": 0.38614047610686164, |
| "learning_rate": 9.308649071111259e-07, |
| "loss": 0.4729, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.4670999187652316, |
| "grad_norm": 0.37024789300038397, |
| "learning_rate": 9.267461456642235e-07, |
| "loss": 0.5187, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.468318440292445, |
| "grad_norm": 0.393874443470908, |
| "learning_rate": 9.226355855780922e-07, |
| "loss": 0.5266, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.4695369618196588, |
| "grad_norm": 0.39515255892810097, |
| "learning_rate": 9.185332351292059e-07, |
| "loss": 0.4979, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.4707554833468723, |
| "grad_norm": 0.36209573390534977, |
| "learning_rate": 9.144391025775123e-07, |
| "loss": 0.4685, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.471974004874086, |
| "grad_norm": 0.3690675939364142, |
| "learning_rate": 9.10353196166412e-07, |
| "loss": 0.5109, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.4731925264013, |
| "grad_norm": 0.4126364096164172, |
| "learning_rate": 9.0627552412274e-07, |
| "loss": 0.551, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.4744110479285135, |
| "grad_norm": 0.39808305056022897, |
| "learning_rate": 9.022060946567512e-07, |
| "loss": 0.4829, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.475629569455727, |
| "grad_norm": 0.3791592284080857, |
| "learning_rate": 8.981449159621075e-07, |
| "loss": 0.4993, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.4768480909829407, |
| "grad_norm": 0.3890390516980624, |
| "learning_rate": 8.940919962158584e-07, |
| "loss": 0.5213, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.4780666125101543, |
| "grad_norm": 0.42524657999992466, |
| "learning_rate": 8.900473435784196e-07, |
| "loss": 0.5666, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.479285134037368, |
| "grad_norm": 0.3815964696084361, |
| "learning_rate": 8.860109661935673e-07, |
| "loss": 0.4625, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.480503655564582, |
| "grad_norm": 0.42469861666467223, |
| "learning_rate": 8.819828721884094e-07, |
| "loss": 0.5373, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.4817221770917954, |
| "grad_norm": 0.38320361649924684, |
| "learning_rate": 8.779630696733821e-07, |
| "loss": 0.5375, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.482940698619009, |
| "grad_norm": 0.3687214848508832, |
| "learning_rate": 8.739515667422211e-07, |
| "loss": 0.4435, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.4841592201462226, |
| "grad_norm": 0.40061711007827416, |
| "learning_rate": 8.699483714719547e-07, |
| "loss": 0.5467, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.485377741673436, |
| "grad_norm": 0.40521522379814523, |
| "learning_rate": 8.659534919228845e-07, |
| "loss": 0.536, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.4865962632006497, |
| "grad_norm": 0.3672113864753048, |
| "learning_rate": 8.619669361385663e-07, |
| "loss": 0.4978, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.4878147847278633, |
| "grad_norm": 0.3620893091593676, |
| "learning_rate": 8.579887121457952e-07, |
| "loss": 0.5038, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.4890333062550773, |
| "grad_norm": 0.3663778220669876, |
| "learning_rate": 8.540188279545942e-07, |
| "loss": 0.4862, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.490251827782291, |
| "grad_norm": 0.38043441191253624, |
| "learning_rate": 8.500572915581923e-07, |
| "loss": 0.5152, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.4914703493095045, |
| "grad_norm": 0.3942635437659399, |
| "learning_rate": 8.461041109330132e-07, |
| "loss": 0.5055, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.492688870836718, |
| "grad_norm": 0.3729970950643679, |
| "learning_rate": 8.421592940386514e-07, |
| "loss": 0.5022, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.4939073923639317, |
| "grad_norm": 0.40364708615741257, |
| "learning_rate": 8.382228488178639e-07, |
| "loss": 0.5297, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.4951259138911452, |
| "grad_norm": 0.3841836875471797, |
| "learning_rate": 8.342947831965537e-07, |
| "loss": 0.4594, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.496344435418359, |
| "grad_norm": 0.39167222446559674, |
| "learning_rate": 8.3037510508375e-07, |
| "loss": 0.538, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.497562956945573, |
| "grad_norm": 0.36017475560838597, |
| "learning_rate": 8.264638223715916e-07, |
| "loss": 0.4904, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.4987814784727864, |
| "grad_norm": 0.38494521342543364, |
| "learning_rate": 8.225609429353187e-07, |
| "loss": 0.5098, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.3915568133305174, |
| "learning_rate": 8.186664746332457e-07, |
| "loss": 0.5479, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.5012185215272136, |
| "grad_norm": 0.3653001722783512, |
| "learning_rate": 8.147804253067581e-07, |
| "loss": 0.5505, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.502437043054427, |
| "grad_norm": 0.38529539896383097, |
| "learning_rate": 8.109028027802834e-07, |
| "loss": 0.5075, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.503655564581641, |
| "grad_norm": 0.32985269566739706, |
| "learning_rate": 8.070336148612873e-07, |
| "loss": 0.4737, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.5048740861088543, |
| "grad_norm": 0.3688596078684635, |
| "learning_rate": 8.031728693402502e-07, |
| "loss": 0.4933, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.5060926076360683, |
| "grad_norm": 0.3574147764462336, |
| "learning_rate": 7.993205739906551e-07, |
| "loss": 0.5036, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.507311129163282, |
| "grad_norm": 0.3933673997370336, |
| "learning_rate": 7.954767365689675e-07, |
| "loss": 0.5284, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.5085296506904955, |
| "grad_norm": 0.3804892757598497, |
| "learning_rate": 7.916413648146282e-07, |
| "loss": 0.5314, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.509748172217709, |
| "grad_norm": 0.3972280482401795, |
| "learning_rate": 7.878144664500304e-07, |
| "loss": 0.5042, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.5109666937449227, |
| "grad_norm": 0.4139170649517036, |
| "learning_rate": 7.839960491805048e-07, |
| "loss": 0.513, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.5121852152721367, |
| "grad_norm": 0.3682306221980358, |
| "learning_rate": 7.80186120694309e-07, |
| "loss": 0.5082, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.5134037367993503, |
| "grad_norm": 0.40743403282060575, |
| "learning_rate": 7.763846886626048e-07, |
| "loss": 0.4982, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.514622258326564, |
| "grad_norm": 0.3807959558438016, |
| "learning_rate": 7.725917607394512e-07, |
| "loss": 0.4893, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.5158407798537774, |
| "grad_norm": 0.3774151979891591, |
| "learning_rate": 7.6880734456178e-07, |
| "loss": 0.5308, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.517059301380991, |
| "grad_norm": 0.39200277210093626, |
| "learning_rate": 7.650314477493875e-07, |
| "loss": 0.5221, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.5182778229082046, |
| "grad_norm": 0.3987158423288902, |
| "learning_rate": 7.612640779049174e-07, |
| "loss": 0.5387, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.519496344435418, |
| "grad_norm": 0.3432299316334845, |
| "learning_rate": 7.575052426138424e-07, |
| "loss": 0.448, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.520714865962632, |
| "grad_norm": 0.40306877146829656, |
| "learning_rate": 7.537549494444502e-07, |
| "loss": 0.5319, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.5219333874898457, |
| "grad_norm": 0.3624054180666312, |
| "learning_rate": 7.500132059478327e-07, |
| "loss": 0.4755, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.5231519090170593, |
| "grad_norm": 0.3943720013643357, |
| "learning_rate": 7.462800196578662e-07, |
| "loss": 0.5517, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.524370430544273, |
| "grad_norm": 0.3760692184644974, |
| "learning_rate": 7.425553980911959e-07, |
| "loss": 0.5198, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.5255889520714865, |
| "grad_norm": 0.36875663183404517, |
| "learning_rate": 7.388393487472223e-07, |
| "loss": 0.5099, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.5268074735987005, |
| "grad_norm": 0.3765385801804941, |
| "learning_rate": 7.351318791080881e-07, |
| "loss": 0.4877, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.5280259951259136, |
| "grad_norm": 0.3880867322532877, |
| "learning_rate": 7.314329966386596e-07, |
| "loss": 0.5191, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.5292445166531277, |
| "grad_norm": 0.38480762192630874, |
| "learning_rate": 7.277427087865124e-07, |
| "loss": 0.5367, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.5304630381803412, |
| "grad_norm": 0.37367690807549686, |
| "learning_rate": 7.240610229819195e-07, |
| "loss": 0.4796, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.531681559707555, |
| "grad_norm": 0.356459470205227, |
| "learning_rate": 7.203879466378311e-07, |
| "loss": 0.4846, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.5329000812347684, |
| "grad_norm": 0.368312803237026, |
| "learning_rate": 7.167234871498646e-07, |
| "loss": 0.512, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.534118602761982, |
| "grad_norm": 0.42790949260764394, |
| "learning_rate": 7.130676518962859e-07, |
| "loss": 0.5199, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.535337124289196, |
| "grad_norm": 0.3760245356587111, |
| "learning_rate": 7.094204482379985e-07, |
| "loss": 0.5206, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.5365556458164096, |
| "grad_norm": 0.36529563925832975, |
| "learning_rate": 7.057818835185243e-07, |
| "loss": 0.5169, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.537774167343623, |
| "grad_norm": 0.37415123963436103, |
| "learning_rate": 7.021519650639952e-07, |
| "loss": 0.4682, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.5389926888708367, |
| "grad_norm": 0.3599256024573686, |
| "learning_rate": 6.985307001831266e-07, |
| "loss": 0.5237, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.5402112103980503, |
| "grad_norm": 0.37172969261280475, |
| "learning_rate": 6.949180961672159e-07, |
| "loss": 0.5229, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.541429731925264, |
| "grad_norm": 0.3692464609849223, |
| "learning_rate": 6.913141602901213e-07, |
| "loss": 0.4967, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.5426482534524775, |
| "grad_norm": 0.41481021551912467, |
| "learning_rate": 6.877188998082484e-07, |
| "loss": 0.5364, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.5438667749796915, |
| "grad_norm": 0.3587567944310898, |
| "learning_rate": 6.841323219605333e-07, |
| "loss": 0.477, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.545085296506905, |
| "grad_norm": 0.36227017983644627, |
| "learning_rate": 6.805544339684295e-07, |
| "loss": 0.5186, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.5463038180341186, |
| "grad_norm": 0.3848961894752312, |
| "learning_rate": 6.769852430358969e-07, |
| "loss": 0.494, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.5475223395613322, |
| "grad_norm": 0.400827672871941, |
| "learning_rate": 6.734247563493829e-07, |
| "loss": 0.5104, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.548740861088546, |
| "grad_norm": 0.3858206572812583, |
| "learning_rate": 6.698729810778065e-07, |
| "loss": 0.5203, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.5499593826157594, |
| "grad_norm": 0.39420570104347397, |
| "learning_rate": 6.663299243725512e-07, |
| "loss": 0.514, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.551177904142973, |
| "grad_norm": 0.37623344903141814, |
| "learning_rate": 6.627955933674412e-07, |
| "loss": 0.4675, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.552396425670187, |
| "grad_norm": 0.37984856280561025, |
| "learning_rate": 6.592699951787362e-07, |
| "loss": 0.5349, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.5536149471974006, |
| "grad_norm": 0.38942296808421134, |
| "learning_rate": 6.55753136905109e-07, |
| "loss": 0.5222, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.554833468724614, |
| "grad_norm": 0.38744941426091656, |
| "learning_rate": 6.522450256276363e-07, |
| "loss": 0.4997, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.5560519902518277, |
| "grad_norm": 0.40862429991424404, |
| "learning_rate": 6.487456684097848e-07, |
| "loss": 0.5409, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.5572705117790413, |
| "grad_norm": 0.37635062650001033, |
| "learning_rate": 6.452550722973927e-07, |
| "loss": 0.4627, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.5584890333062553, |
| "grad_norm": 0.4221777822228316, |
| "learning_rate": 6.417732443186575e-07, |
| "loss": 0.5358, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.5597075548334685, |
| "grad_norm": 0.39847174733267055, |
| "learning_rate": 6.383001914841252e-07, |
| "loss": 0.5012, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.5609260763606825, |
| "grad_norm": 0.3748715416676312, |
| "learning_rate": 6.348359207866722e-07, |
| "loss": 0.4956, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.562144597887896, |
| "grad_norm": 0.37750025006496746, |
| "learning_rate": 6.313804392014905e-07, |
| "loss": 0.4854, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.5633631194151096, |
| "grad_norm": 0.3998375296968308, |
| "learning_rate": 6.279337536860786e-07, |
| "loss": 0.5143, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.564581640942323, |
| "grad_norm": 0.3710721048856582, |
| "learning_rate": 6.244958711802213e-07, |
| "loss": 0.5591, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.565800162469537, |
| "grad_norm": 0.34868738151134687, |
| "learning_rate": 6.210667986059821e-07, |
| "loss": 0.4551, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.567018683996751, |
| "grad_norm": 0.35595641503961983, |
| "learning_rate": 6.17646542867682e-07, |
| "loss": 0.5152, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.5682372055239644, |
| "grad_norm": 0.36663979047928985, |
| "learning_rate": 6.142351108518929e-07, |
| "loss": 0.503, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.569455727051178, |
| "grad_norm": 0.34787252687208675, |
| "learning_rate": 6.108325094274209e-07, |
| "loss": 0.5031, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.5706742485783916, |
| "grad_norm": 0.39033263561688103, |
| "learning_rate": 6.074387454452891e-07, |
| "loss": 0.5214, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.571892770105605, |
| "grad_norm": 0.38512927731883373, |
| "learning_rate": 6.040538257387268e-07, |
| "loss": 0.5198, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.5731112916328187, |
| "grad_norm": 0.3590301126097114, |
| "learning_rate": 6.006777571231587e-07, |
| "loss": 0.5027, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.5743298131600323, |
| "grad_norm": 0.3732504638805604, |
| "learning_rate": 5.973105463961864e-07, |
| "loss": 0.5066, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.5755483346872463, |
| "grad_norm": 0.3729739011338398, |
| "learning_rate": 5.939522003375753e-07, |
| "loss": 0.4958, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.57676685621446, |
| "grad_norm": 0.37186730911837346, |
| "learning_rate": 5.906027257092444e-07, |
| "loss": 0.4761, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.5779853777416735, |
| "grad_norm": 0.3661760756265481, |
| "learning_rate": 5.872621292552477e-07, |
| "loss": 0.5327, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.579203899268887, |
| "grad_norm": 0.40542839626324956, |
| "learning_rate": 5.839304177017663e-07, |
| "loss": 0.5512, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.5804224207961006, |
| "grad_norm": 0.3840467276263846, |
| "learning_rate": 5.806075977570886e-07, |
| "loss": 0.4793, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.5816409423233146, |
| "grad_norm": 0.37820337565321277, |
| "learning_rate": 5.772936761116027e-07, |
| "loss": 0.506, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.582859463850528, |
| "grad_norm": 0.3797306170789339, |
| "learning_rate": 5.739886594377803e-07, |
| "loss": 0.508, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.584077985377742, |
| "grad_norm": 0.3828935693851265, |
| "learning_rate": 5.706925543901609e-07, |
| "loss": 0.5097, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.5852965069049554, |
| "grad_norm": 0.3900080504691436, |
| "learning_rate": 5.674053676053415e-07, |
| "loss": 0.5168, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.586515028432169, |
| "grad_norm": 0.3587725291460617, |
| "learning_rate": 5.641271057019637e-07, |
| "loss": 0.4565, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.5877335499593825, |
| "grad_norm": 0.3939424632788925, |
| "learning_rate": 5.608577752806987e-07, |
| "loss": 0.5494, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.588952071486596, |
| "grad_norm": 0.3725432276278501, |
| "learning_rate": 5.575973829242365e-07, |
| "loss": 0.4588, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.59017059301381, |
| "grad_norm": 0.38604468058456287, |
| "learning_rate": 5.543459351972635e-07, |
| "loss": 0.529, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.5913891145410237, |
| "grad_norm": 0.36341318860508387, |
| "learning_rate": 5.511034386464642e-07, |
| "loss": 0.494, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.5926076360682373, |
| "grad_norm": 0.35625493095798805, |
| "learning_rate": 5.478698998004967e-07, |
| "loss": 0.5456, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.593826157595451, |
| "grad_norm": 0.36227564286221264, |
| "learning_rate": 5.446453251699851e-07, |
| "loss": 0.514, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.5950446791226645, |
| "grad_norm": 0.3662431166869742, |
| "learning_rate": 5.414297212475012e-07, |
| "loss": 0.5157, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.596263200649878, |
| "grad_norm": 0.3558072452798451, |
| "learning_rate": 5.382230945075556e-07, |
| "loss": 0.4961, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.5974817221770916, |
| "grad_norm": 0.3795263836967965, |
| "learning_rate": 5.350254514065856e-07, |
| "loss": 0.5127, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.5987002437043056, |
| "grad_norm": 0.3690040036136185, |
| "learning_rate": 5.318367983829393e-07, |
| "loss": 0.4908, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.599918765231519, |
| "grad_norm": 0.3608821461773019, |
| "learning_rate": 5.286571418568615e-07, |
| "loss": 0.5289, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.601137286758733, |
| "grad_norm": 0.4006495491671045, |
| "learning_rate": 5.254864882304855e-07, |
| "loss": 0.5254, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.6023558082859464, |
| "grad_norm": 0.38150929128537214, |
| "learning_rate": 5.223248438878176e-07, |
| "loss": 0.4622, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.60357432981316, |
| "grad_norm": 0.400783680185111, |
| "learning_rate": 5.191722151947227e-07, |
| "loss": 0.5474, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.6047928513403735, |
| "grad_norm": 0.3662412318337768, |
| "learning_rate": 5.160286084989119e-07, |
| "loss": 0.536, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.606011372867587, |
| "grad_norm": 0.37308572148487257, |
| "learning_rate": 5.128940301299334e-07, |
| "loss": 0.4731, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.607229894394801, |
| "grad_norm": 0.39187078975715245, |
| "learning_rate": 5.097684863991575e-07, |
| "loss": 0.5249, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.6084484159220147, |
| "grad_norm": 0.3885064721528569, |
| "learning_rate": 5.066519835997613e-07, |
| "loss": 0.5225, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.6096669374492283, |
| "grad_norm": 0.41543896829402627, |
| "learning_rate": 5.03544528006718e-07, |
| "loss": 0.5476, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.610885458976442, |
| "grad_norm": 0.33915812403705176, |
| "learning_rate": 5.004461258767873e-07, |
| "loss": 0.4825, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.6121039805036554, |
| "grad_norm": 0.39963867108256157, |
| "learning_rate": 4.973567834484988e-07, |
| "loss": 0.4868, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.6133225020308695, |
| "grad_norm": 0.4052069661227251, |
| "learning_rate": 4.942765069421384e-07, |
| "loss": 0.5707, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.6145410235580826, |
| "grad_norm": 0.3744715850855104, |
| "learning_rate": 4.91205302559743e-07, |
| "loss": 0.4698, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.6157595450852966, |
| "grad_norm": 0.39172802789195654, |
| "learning_rate": 4.881431764850775e-07, |
| "loss": 0.5429, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.61697806661251, |
| "grad_norm": 0.3617617734796279, |
| "learning_rate": 4.850901348836328e-07, |
| "loss": 0.5195, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.618196588139724, |
| "grad_norm": 0.3582182319101665, |
| "learning_rate": 4.820461839026047e-07, |
| "loss": 0.5237, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.6194151096669374, |
| "grad_norm": 0.382565389265081, |
| "learning_rate": 4.79011329670887e-07, |
| "loss": 0.508, |
| "step": 2149 |
| }, |
| { |
| "epoch": 2.620633631194151, |
| "grad_norm": 0.36371999280375944, |
| "learning_rate": 4.7598557829905913e-07, |
| "loss": 0.5138, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.621852152721365, |
| "grad_norm": 0.36372813807546805, |
| "learning_rate": 4.729689358793693e-07, |
| "loss": 0.4863, |
| "step": 2151 |
| }, |
| { |
| "epoch": 2.6230706742485785, |
| "grad_norm": 0.4358272328748702, |
| "learning_rate": 4.699614084857257e-07, |
| "loss": 0.5501, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.624289195775792, |
| "grad_norm": 0.40082789201202496, |
| "learning_rate": 4.669630021736854e-07, |
| "loss": 0.4957, |
| "step": 2153 |
| }, |
| { |
| "epoch": 2.6255077173030057, |
| "grad_norm": 0.38531826765138316, |
| "learning_rate": 4.639737229804403e-07, |
| "loss": 0.5189, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.6267262388302193, |
| "grad_norm": 0.3510117904392168, |
| "learning_rate": 4.609935769248025e-07, |
| "loss": 0.4438, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.627944760357433, |
| "grad_norm": 0.3854632098940677, |
| "learning_rate": 4.5802257000719885e-07, |
| "loss": 0.5672, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.6291632818846464, |
| "grad_norm": 0.356713590588076, |
| "learning_rate": 4.5506070820964973e-07, |
| "loss": 0.4941, |
| "step": 2157 |
| }, |
| { |
| "epoch": 2.6303818034118605, |
| "grad_norm": 0.37107534196018116, |
| "learning_rate": 4.5210799749576815e-07, |
| "loss": 0.537, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.631600324939074, |
| "grad_norm": 0.36951174703750844, |
| "learning_rate": 4.4916444381073674e-07, |
| "loss": 0.487, |
| "step": 2159 |
| }, |
| { |
| "epoch": 2.6328188464662876, |
| "grad_norm": 0.3737744583819628, |
| "learning_rate": 4.4623005308130243e-07, |
| "loss": 0.5047, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.634037367993501, |
| "grad_norm": 0.41814109045623277, |
| "learning_rate": 4.433048312157651e-07, |
| "loss": 0.4921, |
| "step": 2161 |
| }, |
| { |
| "epoch": 2.6352558895207148, |
| "grad_norm": 0.38314084064991044, |
| "learning_rate": 4.4038878410396003e-07, |
| "loss": 0.545, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.636474411047929, |
| "grad_norm": 0.34232486717400545, |
| "learning_rate": 4.374819176172501e-07, |
| "loss": 0.451, |
| "step": 2163 |
| }, |
| { |
| "epoch": 2.637692932575142, |
| "grad_norm": 0.4161225048009829, |
| "learning_rate": 4.3458423760851523e-07, |
| "loss": 0.5468, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.638911454102356, |
| "grad_norm": 0.3670515864821956, |
| "learning_rate": 4.316957499121377e-07, |
| "loss": 0.5067, |
| "step": 2165 |
| }, |
| { |
| "epoch": 2.6401299756295695, |
| "grad_norm": 0.3624129319596192, |
| "learning_rate": 4.2881646034398926e-07, |
| "loss": 0.4816, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.641348497156783, |
| "grad_norm": 0.3972920967019095, |
| "learning_rate": 4.2594637470142587e-07, |
| "loss": 0.5452, |
| "step": 2167 |
| }, |
| { |
| "epoch": 2.6425670186839967, |
| "grad_norm": 0.36647997443354524, |
| "learning_rate": 4.230854987632671e-07, |
| "loss": 0.4962, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.6437855402112103, |
| "grad_norm": 0.38616087967711843, |
| "learning_rate": 4.2023383828979305e-07, |
| "loss": 0.5471, |
| "step": 2169 |
| }, |
| { |
| "epoch": 2.6450040617384243, |
| "grad_norm": 0.35103710867257426, |
| "learning_rate": 4.173913990227252e-07, |
| "loss": 0.4679, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.6462225832656374, |
| "grad_norm": 0.39309483512948734, |
| "learning_rate": 4.145581866852211e-07, |
| "loss": 0.5224, |
| "step": 2171 |
| }, |
| { |
| "epoch": 2.6474411047928514, |
| "grad_norm": 0.38439655446848475, |
| "learning_rate": 4.1173420698186027e-07, |
| "loss": 0.504, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.648659626320065, |
| "grad_norm": 0.3577220323312346, |
| "learning_rate": 4.089194655986306e-07, |
| "loss": 0.5131, |
| "step": 2173 |
| }, |
| { |
| "epoch": 2.6498781478472786, |
| "grad_norm": 0.36326136971896916, |
| "learning_rate": 4.0611396820291915e-07, |
| "loss": 0.5451, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.651096669374492, |
| "grad_norm": 0.36056618051796013, |
| "learning_rate": 4.0331772044350235e-07, |
| "loss": 0.5175, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.6523151909017058, |
| "grad_norm": 0.35979010393998906, |
| "learning_rate": 4.0053072795053163e-07, |
| "loss": 0.5057, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.6535337124289198, |
| "grad_norm": 0.38026873249239673, |
| "learning_rate": 3.9775299633552535e-07, |
| "loss": 0.5173, |
| "step": 2177 |
| }, |
| { |
| "epoch": 2.6547522339561334, |
| "grad_norm": 0.34812609416605766, |
| "learning_rate": 3.9498453119134917e-07, |
| "loss": 0.4774, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.655970755483347, |
| "grad_norm": 0.37873267778767, |
| "learning_rate": 3.9222533809221864e-07, |
| "loss": 0.5171, |
| "step": 2179 |
| }, |
| { |
| "epoch": 2.6571892770105605, |
| "grad_norm": 0.3876778226332137, |
| "learning_rate": 3.894754225936753e-07, |
| "loss": 0.5367, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.658407798537774, |
| "grad_norm": 0.37800746580583733, |
| "learning_rate": 3.8673479023258464e-07, |
| "loss": 0.5366, |
| "step": 2181 |
| }, |
| { |
| "epoch": 2.6596263200649877, |
| "grad_norm": 0.3592372247497727, |
| "learning_rate": 3.840034465271164e-07, |
| "loss": 0.4612, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.6608448415922012, |
| "grad_norm": 0.37676898244480056, |
| "learning_rate": 3.812813969767398e-07, |
| "loss": 0.5335, |
| "step": 2183 |
| }, |
| { |
| "epoch": 2.6620633631194153, |
| "grad_norm": 0.3889687505966972, |
| "learning_rate": 3.7856864706221187e-07, |
| "loss": 0.5379, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.663281884646629, |
| "grad_norm": 0.3429621452435135, |
| "learning_rate": 3.7586520224556444e-07, |
| "loss": 0.4249, |
| "step": 2185 |
| }, |
| { |
| "epoch": 2.6645004061738424, |
| "grad_norm": 0.4100593265019823, |
| "learning_rate": 3.731710679700923e-07, |
| "loss": 0.5571, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.665718927701056, |
| "grad_norm": 0.3754827320099358, |
| "learning_rate": 3.7048624966034506e-07, |
| "loss": 0.4772, |
| "step": 2187 |
| }, |
| { |
| "epoch": 2.6669374492282696, |
| "grad_norm": 0.506362418039737, |
| "learning_rate": 3.6781075272211643e-07, |
| "loss": 0.4898, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.6681559707554836, |
| "grad_norm": 0.39960584269392463, |
| "learning_rate": 3.6514458254242936e-07, |
| "loss": 0.5355, |
| "step": 2189 |
| }, |
| { |
| "epoch": 2.6693744922826967, |
| "grad_norm": 0.38884516821746157, |
| "learning_rate": 3.6248774448952695e-07, |
| "loss": 0.4607, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.6705930138099108, |
| "grad_norm": 0.38681697956869593, |
| "learning_rate": 3.598402439128656e-07, |
| "loss": 0.5662, |
| "step": 2191 |
| }, |
| { |
| "epoch": 2.6718115353371243, |
| "grad_norm": 0.3756082857300239, |
| "learning_rate": 3.572020861430997e-07, |
| "loss": 0.5143, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.673030056864338, |
| "grad_norm": 0.40002189934283794, |
| "learning_rate": 3.545732764920717e-07, |
| "loss": 0.5061, |
| "step": 2193 |
| }, |
| { |
| "epoch": 2.6742485783915515, |
| "grad_norm": 0.36384776166641386, |
| "learning_rate": 3.519538202528011e-07, |
| "loss": 0.504, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.675467099918765, |
| "grad_norm": 0.3788979703828696, |
| "learning_rate": 3.4934372269947613e-07, |
| "loss": 0.4801, |
| "step": 2195 |
| }, |
| { |
| "epoch": 2.676685621445979, |
| "grad_norm": 0.3818268978478761, |
| "learning_rate": 3.467429890874424e-07, |
| "loss": 0.5279, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.6779041429731927, |
| "grad_norm": 0.35141288719000796, |
| "learning_rate": 3.4415162465318843e-07, |
| "loss": 0.4803, |
| "step": 2197 |
| }, |
| { |
| "epoch": 2.6791226645004063, |
| "grad_norm": 0.39220258510601774, |
| "learning_rate": 3.4156963461434156e-07, |
| "loss": 0.5009, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.68034118602762, |
| "grad_norm": 0.4103479084725928, |
| "learning_rate": 3.3899702416965166e-07, |
| "loss": 0.6119, |
| "step": 2199 |
| }, |
| { |
| "epoch": 2.6815597075548334, |
| "grad_norm": 0.3797647584117213, |
| "learning_rate": 3.364337984989846e-07, |
| "loss": 0.4665, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.682778229082047, |
| "grad_norm": 0.3540938338082574, |
| "learning_rate": 3.3387996276330934e-07, |
| "loss": 0.4382, |
| "step": 2201 |
| }, |
| { |
| "epoch": 2.6839967506092606, |
| "grad_norm": 0.3743322734466896, |
| "learning_rate": 3.313355221046888e-07, |
| "loss": 0.5334, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.6852152721364746, |
| "grad_norm": 0.38933035048539233, |
| "learning_rate": 3.2880048164627087e-07, |
| "loss": 0.5351, |
| "step": 2203 |
| }, |
| { |
| "epoch": 2.686433793663688, |
| "grad_norm": 0.37060820135278527, |
| "learning_rate": 3.262748464922738e-07, |
| "loss": 0.5097, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.6876523151909018, |
| "grad_norm": 0.38495794293474345, |
| "learning_rate": 3.2375862172797866e-07, |
| "loss": 0.5678, |
| "step": 2205 |
| }, |
| { |
| "epoch": 2.6888708367181153, |
| "grad_norm": 0.36198556723986514, |
| "learning_rate": 3.212518124197217e-07, |
| "loss": 0.4704, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.690089358245329, |
| "grad_norm": 0.36538988897114305, |
| "learning_rate": 3.1875442361487987e-07, |
| "loss": 0.5394, |
| "step": 2207 |
| }, |
| { |
| "epoch": 2.6913078797725425, |
| "grad_norm": 0.3529695149923601, |
| "learning_rate": 3.1626646034186084e-07, |
| "loss": 0.4924, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.692526401299756, |
| "grad_norm": 0.3589469252207183, |
| "learning_rate": 3.1378792761009745e-07, |
| "loss": 0.5141, |
| "step": 2209 |
| }, |
| { |
| "epoch": 2.69374492282697, |
| "grad_norm": 0.3716457330306638, |
| "learning_rate": 3.1131883041003065e-07, |
| "loss": 0.5162, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.6949634443541837, |
| "grad_norm": 0.39002214118541273, |
| "learning_rate": 3.0885917371310745e-07, |
| "loss": 0.5371, |
| "step": 2211 |
| }, |
| { |
| "epoch": 2.6961819658813972, |
| "grad_norm": 0.38634067585511356, |
| "learning_rate": 3.0640896247176257e-07, |
| "loss": 0.5303, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.697400487408611, |
| "grad_norm": 0.3679719670190458, |
| "learning_rate": 3.039682016194162e-07, |
| "loss": 0.4844, |
| "step": 2213 |
| }, |
| { |
| "epoch": 2.6986190089358244, |
| "grad_norm": 0.3650561363535968, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.5491, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.6998375304630384, |
| "grad_norm": 0.35589244778023654, |
| "learning_rate": 2.9911505072024173e-07, |
| "loss": 0.4435, |
| "step": 2215 |
| }, |
| { |
| "epoch": 2.7010560519902516, |
| "grad_norm": 0.3971577742524068, |
| "learning_rate": 2.967026704450704e-07, |
| "loss": 0.5417, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.7022745735174656, |
| "grad_norm": 0.36768123246070666, |
| "learning_rate": 2.942997601021924e-07, |
| "loss": 0.4946, |
| "step": 2217 |
| }, |
| { |
| "epoch": 2.703493095044679, |
| "grad_norm": 0.3868406198179559, |
| "learning_rate": 2.9190632452978706e-07, |
| "loss": 0.5273, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.7047116165718927, |
| "grad_norm": 0.3708200192787325, |
| "learning_rate": 2.895223685469578e-07, |
| "loss": 0.5005, |
| "step": 2219 |
| }, |
| { |
| "epoch": 2.7059301380991063, |
| "grad_norm": 0.37953093384871284, |
| "learning_rate": 2.871478969537206e-07, |
| "loss": 0.5435, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.70714865962632, |
| "grad_norm": 0.3614383708651899, |
| "learning_rate": 2.847829145309933e-07, |
| "loss": 0.4749, |
| "step": 2221 |
| }, |
| { |
| "epoch": 2.708367181153534, |
| "grad_norm": 0.3737932290872502, |
| "learning_rate": 2.824274260405896e-07, |
| "loss": 0.5178, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.7095857026807475, |
| "grad_norm": 0.3678542573451642, |
| "learning_rate": 2.800814362252091e-07, |
| "loss": 0.5328, |
| "step": 2223 |
| }, |
| { |
| "epoch": 2.710804224207961, |
| "grad_norm": 0.3590587724355208, |
| "learning_rate": 2.7774494980842117e-07, |
| "loss": 0.488, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.7120227457351747, |
| "grad_norm": 0.37585088629389257, |
| "learning_rate": 2.754179714946653e-07, |
| "loss": 0.4925, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.7132412672623882, |
| "grad_norm": 0.3719950620904676, |
| "learning_rate": 2.7310050596923323e-07, |
| "loss": 0.4999, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.714459788789602, |
| "grad_norm": 0.3568715514712545, |
| "learning_rate": 2.7079255789826565e-07, |
| "loss": 0.4807, |
| "step": 2227 |
| }, |
| { |
| "epoch": 2.7156783103168154, |
| "grad_norm": 0.35482907121703094, |
| "learning_rate": 2.6849413192873816e-07, |
| "loss": 0.4793, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.7168968318440294, |
| "grad_norm": 0.395651827257008, |
| "learning_rate": 2.662052326884551e-07, |
| "loss": 0.544, |
| "step": 2229 |
| }, |
| { |
| "epoch": 2.718115353371243, |
| "grad_norm": 0.3935098863850654, |
| "learning_rate": 2.639258647860399e-07, |
| "loss": 0.5635, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.7193338748984566, |
| "grad_norm": 0.40165173172581203, |
| "learning_rate": 2.616560328109219e-07, |
| "loss": 0.4864, |
| "step": 2231 |
| }, |
| { |
| "epoch": 2.72055239642567, |
| "grad_norm": 0.37629992760618575, |
| "learning_rate": 2.593957413333331e-07, |
| "loss": 0.4642, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.7217709179528837, |
| "grad_norm": 0.39061948229389115, |
| "learning_rate": 2.571449949042942e-07, |
| "loss": 0.4931, |
| "step": 2233 |
| }, |
| { |
| "epoch": 2.7229894394800978, |
| "grad_norm": 0.37406505289417313, |
| "learning_rate": 2.549037980556096e-07, |
| "loss": 0.5149, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.724207961007311, |
| "grad_norm": 0.3865469301953249, |
| "learning_rate": 2.5267215529985346e-07, |
| "loss": 0.5662, |
| "step": 2235 |
| }, |
| { |
| "epoch": 2.725426482534525, |
| "grad_norm": 0.4176157521247648, |
| "learning_rate": 2.5045007113036315e-07, |
| "loss": 0.4846, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.7266450040617385, |
| "grad_norm": 0.3593515844357923, |
| "learning_rate": 2.4823755002123253e-07, |
| "loss": 0.5028, |
| "step": 2237 |
| }, |
| { |
| "epoch": 2.727863525588952, |
| "grad_norm": 0.3763091273664034, |
| "learning_rate": 2.4603459642729867e-07, |
| "loss": 0.4883, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.7290820471161656, |
| "grad_norm": 0.3254080082849582, |
| "learning_rate": 2.4384121478413403e-07, |
| "loss": 0.4552, |
| "step": 2239 |
| }, |
| { |
| "epoch": 2.7303005686433792, |
| "grad_norm": 0.3704714874750766, |
| "learning_rate": 2.416574095080404e-07, |
| "loss": 0.5491, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.7315190901705932, |
| "grad_norm": 0.36236149508058013, |
| "learning_rate": 2.394831849960377e-07, |
| "loss": 0.5425, |
| "step": 2241 |
| }, |
| { |
| "epoch": 2.732737611697807, |
| "grad_norm": 0.3749697833768894, |
| "learning_rate": 2.373185456258531e-07, |
| "loss": 0.5278, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.7339561332250204, |
| "grad_norm": 0.34972552754271036, |
| "learning_rate": 2.3516349575591568e-07, |
| "loss": 0.4618, |
| "step": 2243 |
| }, |
| { |
| "epoch": 2.735174654752234, |
| "grad_norm": 0.375574385039016, |
| "learning_rate": 2.330180397253473e-07, |
| "loss": 0.5175, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.7363931762794476, |
| "grad_norm": 0.37141386139233595, |
| "learning_rate": 2.3088218185395195e-07, |
| "loss": 0.5511, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.737611697806661, |
| "grad_norm": 0.3813893728380543, |
| "learning_rate": 2.2875592644220846e-07, |
| "loss": 0.4508, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.7388302193338747, |
| "grad_norm": 0.3775198226604257, |
| "learning_rate": 2.266392777712595e-07, |
| "loss": 0.4983, |
| "step": 2247 |
| }, |
| { |
| "epoch": 2.7400487408610887, |
| "grad_norm": 0.3750870423354922, |
| "learning_rate": 2.245322401029082e-07, |
| "loss": 0.5044, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.7412672623883023, |
| "grad_norm": 0.40047002764422285, |
| "learning_rate": 2.2243481767960483e-07, |
| "loss": 0.5827, |
| "step": 2249 |
| }, |
| { |
| "epoch": 2.742485783915516, |
| "grad_norm": 0.36949294277977374, |
| "learning_rate": 2.2034701472443854e-07, |
| "loss": 0.4752, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.7437043054427295, |
| "grad_norm": 0.3824346198409812, |
| "learning_rate": 2.1826883544113165e-07, |
| "loss": 0.5286, |
| "step": 2251 |
| }, |
| { |
| "epoch": 2.744922826969943, |
| "grad_norm": 0.3417914549036838, |
| "learning_rate": 2.1620028401402815e-07, |
| "loss": 0.4697, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.7461413484971566, |
| "grad_norm": 0.41461070303616865, |
| "learning_rate": 2.141413646080881e-07, |
| "loss": 0.5349, |
| "step": 2253 |
| }, |
| { |
| "epoch": 2.74735987002437, |
| "grad_norm": 0.37741611628599325, |
| "learning_rate": 2.1209208136887593e-07, |
| "loss": 0.5375, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.7485783915515842, |
| "grad_norm": 0.39321969452161015, |
| "learning_rate": 2.1005243842255552e-07, |
| "loss": 0.5025, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.749796913078798, |
| "grad_norm": 0.36043353231485903, |
| "learning_rate": 2.0802243987588068e-07, |
| "loss": 0.479, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.7510154346060114, |
| "grad_norm": 0.3771749654256085, |
| "learning_rate": 2.060020898161863e-07, |
| "loss": 0.5296, |
| "step": 2257 |
| }, |
| { |
| "epoch": 2.752233956133225, |
| "grad_norm": 0.36344961189872743, |
| "learning_rate": 2.0399139231137731e-07, |
| "loss": 0.513, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.7534524776604385, |
| "grad_norm": 0.37387207502104647, |
| "learning_rate": 2.019903514099275e-07, |
| "loss": 0.4837, |
| "step": 2259 |
| }, |
| { |
| "epoch": 2.7546709991876526, |
| "grad_norm": 0.4074258198058105, |
| "learning_rate": 1.999989711408662e-07, |
| "loss": 0.5165, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.7558895207148657, |
| "grad_norm": 0.40163838399796564, |
| "learning_rate": 1.9801725551377217e-07, |
| "loss": 0.484, |
| "step": 2261 |
| }, |
| { |
| "epoch": 2.7571080422420797, |
| "grad_norm": 0.39748068787202046, |
| "learning_rate": 1.9604520851876196e-07, |
| "loss": 0.5346, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.7583265637692933, |
| "grad_norm": 0.3837696844219795, |
| "learning_rate": 1.940828341264861e-07, |
| "loss": 0.5195, |
| "step": 2263 |
| }, |
| { |
| "epoch": 2.759545085296507, |
| "grad_norm": 0.372851636737616, |
| "learning_rate": 1.9213013628812173e-07, |
| "loss": 0.5025, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.7607636068237205, |
| "grad_norm": 0.3824295932103617, |
| "learning_rate": 1.9018711893535991e-07, |
| "loss": 0.4982, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.761982128350934, |
| "grad_norm": 0.3941575511286188, |
| "learning_rate": 1.8825378598040067e-07, |
| "loss": 0.4943, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.763200649878148, |
| "grad_norm": 0.3942171147413845, |
| "learning_rate": 1.863301413159474e-07, |
| "loss": 0.5597, |
| "step": 2267 |
| }, |
| { |
| "epoch": 2.7644191714053616, |
| "grad_norm": 0.3934183991195872, |
| "learning_rate": 1.8441618881519186e-07, |
| "loss": 0.483, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.765637692932575, |
| "grad_norm": 0.37982935450069527, |
| "learning_rate": 1.825119323318153e-07, |
| "loss": 0.4977, |
| "step": 2269 |
| }, |
| { |
| "epoch": 2.766856214459789, |
| "grad_norm": 0.35859063843934896, |
| "learning_rate": 1.8061737569997407e-07, |
| "loss": 0.5082, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.7680747359870024, |
| "grad_norm": 0.3882067848746523, |
| "learning_rate": 1.787325227342951e-07, |
| "loss": 0.5204, |
| "step": 2271 |
| }, |
| { |
| "epoch": 2.769293257514216, |
| "grad_norm": 0.3679939619950258, |
| "learning_rate": 1.768573772298665e-07, |
| "loss": 0.5395, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.7705117790414295, |
| "grad_norm": 0.3720953279482073, |
| "learning_rate": 1.7499194296223209e-07, |
| "loss": 0.5176, |
| "step": 2273 |
| }, |
| { |
| "epoch": 2.7717303005686436, |
| "grad_norm": 0.3858227060687811, |
| "learning_rate": 1.7313622368738014e-07, |
| "loss": 0.5067, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.772948822095857, |
| "grad_norm": 0.3747485160463101, |
| "learning_rate": 1.7129022314174015e-07, |
| "loss": 0.4811, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.7741673436230707, |
| "grad_norm": 0.38326441088479096, |
| "learning_rate": 1.694539450421734e-07, |
| "loss": 0.4991, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.7753858651502843, |
| "grad_norm": 0.34922626985376715, |
| "learning_rate": 1.6762739308596343e-07, |
| "loss": 0.5068, |
| "step": 2277 |
| }, |
| { |
| "epoch": 2.776604386677498, |
| "grad_norm": 0.37804059096779785, |
| "learning_rate": 1.6581057095081288e-07, |
| "loss": 0.4969, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.777822908204712, |
| "grad_norm": 0.39482724733491026, |
| "learning_rate": 1.640034822948311e-07, |
| "loss": 0.5356, |
| "step": 2279 |
| }, |
| { |
| "epoch": 2.779041429731925, |
| "grad_norm": 0.3564142364029908, |
| "learning_rate": 1.6220613075653201e-07, |
| "loss": 0.5082, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.780259951259139, |
| "grad_norm": 0.3808785336291618, |
| "learning_rate": 1.604185199548225e-07, |
| "loss": 0.5012, |
| "step": 2281 |
| }, |
| { |
| "epoch": 2.7814784727863526, |
| "grad_norm": 0.3466837847337903, |
| "learning_rate": 1.586406534889967e-07, |
| "loss": 0.5215, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.782696994313566, |
| "grad_norm": 0.35748226035408104, |
| "learning_rate": 1.5687253493873068e-07, |
| "loss": 0.4975, |
| "step": 2283 |
| }, |
| { |
| "epoch": 2.78391551584078, |
| "grad_norm": 0.38342498504946887, |
| "learning_rate": 1.5511416786407164e-07, |
| "loss": 0.499, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.7851340373679934, |
| "grad_norm": 0.38368966272625266, |
| "learning_rate": 1.5336555580543256e-07, |
| "loss": 0.5289, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.7863525588952074, |
| "grad_norm": 0.3761350556362916, |
| "learning_rate": 1.51626702283586e-07, |
| "loss": 0.5334, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.7875710804224205, |
| "grad_norm": 0.34488391672914276, |
| "learning_rate": 1.4989761079965583e-07, |
| "loss": 0.4731, |
| "step": 2287 |
| }, |
| { |
| "epoch": 2.7887896019496345, |
| "grad_norm": 0.3711824659401226, |
| "learning_rate": 1.4817828483510933e-07, |
| "loss": 0.5647, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.790008123476848, |
| "grad_norm": 0.36364946680537624, |
| "learning_rate": 1.4646872785175182e-07, |
| "loss": 0.5068, |
| "step": 2289 |
| }, |
| { |
| "epoch": 2.7912266450040617, |
| "grad_norm": 0.3640609104391418, |
| "learning_rate": 1.4476894329172042e-07, |
| "loss": 0.5129, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.7924451665312753, |
| "grad_norm": 0.36422428829864073, |
| "learning_rate": 1.4307893457747358e-07, |
| "loss": 0.5234, |
| "step": 2291 |
| }, |
| { |
| "epoch": 2.793663688058489, |
| "grad_norm": 0.3541446608840156, |
| "learning_rate": 1.4139870511178767e-07, |
| "loss": 0.5035, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.794882209585703, |
| "grad_norm": 0.3772886381428785, |
| "learning_rate": 1.3972825827774928e-07, |
| "loss": 0.5069, |
| "step": 2293 |
| }, |
| { |
| "epoch": 2.7961007311129165, |
| "grad_norm": 0.39173775427502877, |
| "learning_rate": 1.3806759743874688e-07, |
| "loss": 0.5421, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.79731925264013, |
| "grad_norm": 0.3652175638097129, |
| "learning_rate": 1.3641672593846632e-07, |
| "loss": 0.5213, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.7985377741673436, |
| "grad_norm": 0.36544225086563453, |
| "learning_rate": 1.3477564710088097e-07, |
| "loss": 0.4687, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.799756295694557, |
| "grad_norm": 0.3740249931778571, |
| "learning_rate": 1.3314436423024935e-07, |
| "loss": 0.518, |
| "step": 2297 |
| }, |
| { |
| "epoch": 2.8009748172217708, |
| "grad_norm": 0.37454883087183666, |
| "learning_rate": 1.3152288061110518e-07, |
| "loss": 0.4902, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.8021933387489844, |
| "grad_norm": 0.3629763422238737, |
| "learning_rate": 1.2991119950825138e-07, |
| "loss": 0.5329, |
| "step": 2299 |
| }, |
| { |
| "epoch": 2.8034118602761984, |
| "grad_norm": 0.3825610104170137, |
| "learning_rate": 1.2830932416675323e-07, |
| "loss": 0.5217, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.804630381803412, |
| "grad_norm": 0.35458779804689705, |
| "learning_rate": 1.2671725781193467e-07, |
| "loss": 0.482, |
| "step": 2301 |
| }, |
| { |
| "epoch": 2.8058489033306255, |
| "grad_norm": 0.4060620936449498, |
| "learning_rate": 1.251350036493676e-07, |
| "loss": 0.5396, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.807067424857839, |
| "grad_norm": 0.37363725202431575, |
| "learning_rate": 1.2356256486486806e-07, |
| "loss": 0.4898, |
| "step": 2303 |
| }, |
| { |
| "epoch": 2.8082859463850527, |
| "grad_norm": 0.35835154484517495, |
| "learning_rate": 1.2199994462448906e-07, |
| "loss": 0.493, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.8095044679122667, |
| "grad_norm": 0.42120102584212404, |
| "learning_rate": 1.2044714607451436e-07, |
| "loss": 0.5257, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.81072298943948, |
| "grad_norm": 0.3556943551485984, |
| "learning_rate": 1.1890417234145246e-07, |
| "loss": 0.5095, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.811941510966694, |
| "grad_norm": 0.38386985507916965, |
| "learning_rate": 1.1737102653202825e-07, |
| "loss": 0.5279, |
| "step": 2307 |
| }, |
| { |
| "epoch": 2.8131600324939074, |
| "grad_norm": 0.36317152822127746, |
| "learning_rate": 1.1584771173318076e-07, |
| "loss": 0.4927, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.814378554021121, |
| "grad_norm": 0.4011884968558589, |
| "learning_rate": 1.1433423101205321e-07, |
| "loss": 0.5282, |
| "step": 2309 |
| }, |
| { |
| "epoch": 2.8155970755483346, |
| "grad_norm": 0.3642862846860186, |
| "learning_rate": 1.1283058741598962e-07, |
| "loss": 0.4734, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.816815597075548, |
| "grad_norm": 0.38393727230766617, |
| "learning_rate": 1.1133678397252434e-07, |
| "loss": 0.5357, |
| "step": 2311 |
| }, |
| { |
| "epoch": 2.818034118602762, |
| "grad_norm": 0.3837474969152823, |
| "learning_rate": 1.0985282368938199e-07, |
| "loss": 0.5024, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.819252640129976, |
| "grad_norm": 0.4030179784599761, |
| "learning_rate": 1.0837870955446639e-07, |
| "loss": 0.5339, |
| "step": 2313 |
| }, |
| { |
| "epoch": 2.8204711616571894, |
| "grad_norm": 0.3539980646211641, |
| "learning_rate": 1.0691444453585775e-07, |
| "loss": 0.4979, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.821689683184403, |
| "grad_norm": 0.3490699508092425, |
| "learning_rate": 1.0546003158180496e-07, |
| "loss": 0.4861, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.8229082047116165, |
| "grad_norm": 0.3504210969211817, |
| "learning_rate": 1.0401547362071939e-07, |
| "loss": 0.4995, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.82412672623883, |
| "grad_norm": 0.3626756496837975, |
| "learning_rate": 1.0258077356117057e-07, |
| "loss": 0.5019, |
| "step": 2317 |
| }, |
| { |
| "epoch": 2.8253452477660437, |
| "grad_norm": 0.39120905301617104, |
| "learning_rate": 1.0115593429187942e-07, |
| "loss": 0.5056, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.8265637692932577, |
| "grad_norm": 0.38360724438244953, |
| "learning_rate": 9.974095868171164e-08, |
| "loss": 0.4574, |
| "step": 2319 |
| }, |
| { |
| "epoch": 2.8277822908204713, |
| "grad_norm": 0.4154087623735947, |
| "learning_rate": 9.833584957967491e-08, |
| "loss": 0.5459, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.829000812347685, |
| "grad_norm": 0.3684945590836955, |
| "learning_rate": 9.694060981490783e-08, |
| "loss": 0.5044, |
| "step": 2321 |
| }, |
| { |
| "epoch": 2.8302193338748984, |
| "grad_norm": 0.3759625212141255, |
| "learning_rate": 9.555524219667989e-08, |
| "loss": 0.5045, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.831437855402112, |
| "grad_norm": 0.3711414030240098, |
| "learning_rate": 9.417974951438203e-08, |
| "loss": 0.4909, |
| "step": 2323 |
| }, |
| { |
| "epoch": 2.8326563769293256, |
| "grad_norm": 0.40793520425182356, |
| "learning_rate": 9.281413453752386e-08, |
| "loss": 0.5911, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.833874898456539, |
| "grad_norm": 0.35038615442889337, |
| "learning_rate": 9.145840001572537e-08, |
| "loss": 0.5061, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.835093419983753, |
| "grad_norm": 0.33298280777144973, |
| "learning_rate": 9.011254867871244e-08, |
| "loss": 0.4843, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.8363119415109668, |
| "grad_norm": 0.37732042868872595, |
| "learning_rate": 8.877658323631188e-08, |
| "loss": 0.5434, |
| "step": 2327 |
| }, |
| { |
| "epoch": 2.8375304630381804, |
| "grad_norm": 0.3989852512203333, |
| "learning_rate": 8.745050637844532e-08, |
| "loss": 0.5179, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.838748984565394, |
| "grad_norm": 0.38524826151374814, |
| "learning_rate": 8.613432077512474e-08, |
| "loss": 0.5135, |
| "step": 2329 |
| }, |
| { |
| "epoch": 2.8399675060926075, |
| "grad_norm": 0.3561681050422304, |
| "learning_rate": 8.482802907644528e-08, |
| "loss": 0.5332, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.8411860276198215, |
| "grad_norm": 0.35905357929351883, |
| "learning_rate": 8.353163391258302e-08, |
| "loss": 0.4736, |
| "step": 2331 |
| }, |
| { |
| "epoch": 2.8424045491470347, |
| "grad_norm": 0.3771943557678179, |
| "learning_rate": 8.224513789378497e-08, |
| "loss": 0.4974, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.8436230706742487, |
| "grad_norm": 0.3689008459698471, |
| "learning_rate": 8.09685436103691e-08, |
| "loss": 0.5007, |
| "step": 2333 |
| }, |
| { |
| "epoch": 2.8448415922014623, |
| "grad_norm": 0.3946551692601427, |
| "learning_rate": 7.970185363271432e-08, |
| "loss": 0.5555, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.846060113728676, |
| "grad_norm": 0.3814135760411076, |
| "learning_rate": 7.844507051125937e-08, |
| "loss": 0.4953, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.8472786352558894, |
| "grad_norm": 0.3877741958314108, |
| "learning_rate": 7.71981967764951e-08, |
| "loss": 0.5059, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.848497156783103, |
| "grad_norm": 0.364018962944753, |
| "learning_rate": 7.59612349389599e-08, |
| "loss": 0.4948, |
| "step": 2337 |
| }, |
| { |
| "epoch": 2.849715678310317, |
| "grad_norm": 0.4075558874034061, |
| "learning_rate": 7.473418748923545e-08, |
| "loss": 0.5948, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.8509341998375306, |
| "grad_norm": 0.37855730911487784, |
| "learning_rate": 7.351705689794042e-08, |
| "loss": 0.424, |
| "step": 2339 |
| }, |
| { |
| "epoch": 2.852152721364744, |
| "grad_norm": 0.3738096901546095, |
| "learning_rate": 7.230984561572729e-08, |
| "loss": 0.5434, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.8533712428919578, |
| "grad_norm": 0.37627980072639083, |
| "learning_rate": 7.11125560732756e-08, |
| "loss": 0.4934, |
| "step": 2341 |
| }, |
| { |
| "epoch": 2.8545897644191713, |
| "grad_norm": 0.39211088076356204, |
| "learning_rate": 6.992519068128701e-08, |
| "loss": 0.4979, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.855808285946385, |
| "grad_norm": 0.36180712823693784, |
| "learning_rate": 6.8747751830483e-08, |
| "loss": 0.54, |
| "step": 2343 |
| }, |
| { |
| "epoch": 2.8570268074735985, |
| "grad_norm": 0.3422753121701595, |
| "learning_rate": 6.758024189159718e-08, |
| "loss": 0.4674, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.8582453290008125, |
| "grad_norm": 0.3670191252106066, |
| "learning_rate": 6.64226632153725e-08, |
| "loss": 0.5208, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.859463850528026, |
| "grad_norm": 0.3796974952937155, |
| "learning_rate": 6.527501813255344e-08, |
| "loss": 0.5399, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.8606823720552397, |
| "grad_norm": 0.36481445761250786, |
| "learning_rate": 6.413730895388714e-08, |
| "loss": 0.5072, |
| "step": 2347 |
| }, |
| { |
| "epoch": 2.8619008935824533, |
| "grad_norm": 0.3636784036139616, |
| "learning_rate": 6.300953797011178e-08, |
| "loss": 0.5291, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.863119415109667, |
| "grad_norm": 0.35701639845422045, |
| "learning_rate": 6.18917074519565e-08, |
| "loss": 0.503, |
| "step": 2349 |
| }, |
| { |
| "epoch": 2.864337936636881, |
| "grad_norm": 0.3780638398451471, |
| "learning_rate": 6.078381965013646e-08, |
| "loss": 0.526, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.865556458164094, |
| "grad_norm": 0.3863922669183168, |
| "learning_rate": 5.968587679534621e-08, |
| "loss": 0.4887, |
| "step": 2351 |
| }, |
| { |
| "epoch": 2.866774979691308, |
| "grad_norm": 0.36868437000440496, |
| "learning_rate": 5.8597881098257924e-08, |
| "loss": 0.535, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.8679935012185216, |
| "grad_norm": 0.3667806587451281, |
| "learning_rate": 5.751983474951317e-08, |
| "loss": 0.5357, |
| "step": 2353 |
| }, |
| { |
| "epoch": 2.869212022745735, |
| "grad_norm": 0.35160456413985003, |
| "learning_rate": 5.6451739919723417e-08, |
| "loss": 0.4966, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.8704305442729487, |
| "grad_norm": 0.3702286255153397, |
| "learning_rate": 5.539359875946171e-08, |
| "loss": 0.5364, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.8716490658001623, |
| "grad_norm": 0.3545425220241147, |
| "learning_rate": 5.434541339926047e-08, |
| "loss": 0.4989, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.8728675873273763, |
| "grad_norm": 0.37190280510667345, |
| "learning_rate": 5.3307185949605935e-08, |
| "loss": 0.5177, |
| "step": 2357 |
| }, |
| { |
| "epoch": 2.87408610885459, |
| "grad_norm": 0.39562598350439043, |
| "learning_rate": 5.227891850093314e-08, |
| "loss": 0.5489, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.8753046303818035, |
| "grad_norm": 0.3653775053445267, |
| "learning_rate": 5.12606131236254e-08, |
| "loss": 0.485, |
| "step": 2359 |
| }, |
| { |
| "epoch": 2.876523151909017, |
| "grad_norm": 0.3641584823261951, |
| "learning_rate": 5.025227186800652e-08, |
| "loss": 0.5217, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.8777416734362307, |
| "grad_norm": 0.38104906167780134, |
| "learning_rate": 4.925389676433745e-08, |
| "loss": 0.485, |
| "step": 2361 |
| }, |
| { |
| "epoch": 2.8789601949634442, |
| "grad_norm": 0.39160093264511175, |
| "learning_rate": 4.8265489822814094e-08, |
| "loss": 0.515, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.880178716490658, |
| "grad_norm": 0.37954553391255796, |
| "learning_rate": 4.728705303356007e-08, |
| "loss": 0.4743, |
| "step": 2363 |
| }, |
| { |
| "epoch": 2.881397238017872, |
| "grad_norm": 0.3786501167734631, |
| "learning_rate": 4.631858836662562e-08, |
| "loss": 0.5282, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.8826157595450854, |
| "grad_norm": 0.3847521896579275, |
| "learning_rate": 4.536009777198203e-08, |
| "loss": 0.4954, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.883834281072299, |
| "grad_norm": 0.3686885036028533, |
| "learning_rate": 4.441158317951777e-08, |
| "loss": 0.5, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.8850528025995126, |
| "grad_norm": 0.3476414945698473, |
| "learning_rate": 4.347304649903572e-08, |
| "loss": 0.5112, |
| "step": 2367 |
| }, |
| { |
| "epoch": 2.886271324126726, |
| "grad_norm": 0.3501811924875589, |
| "learning_rate": 4.2544489620248155e-08, |
| "loss": 0.5212, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.8874898456539397, |
| "grad_norm": 0.3522575951304983, |
| "learning_rate": 4.162591441277341e-08, |
| "loss": 0.5216, |
| "step": 2369 |
| }, |
| { |
| "epoch": 2.8887083671811533, |
| "grad_norm": 0.34261605478893353, |
| "learning_rate": 4.071732272613149e-08, |
| "loss": 0.4688, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.8899268887083673, |
| "grad_norm": 0.3647353980991927, |
| "learning_rate": 3.981871638974177e-08, |
| "loss": 0.5131, |
| "step": 2371 |
| }, |
| { |
| "epoch": 2.891145410235581, |
| "grad_norm": 0.37709939404151627, |
| "learning_rate": 3.8930097212918625e-08, |
| "loss": 0.5103, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.8923639317627945, |
| "grad_norm": 0.35808464221731223, |
| "learning_rate": 3.805146698486695e-08, |
| "loss": 0.4684, |
| "step": 2373 |
| }, |
| { |
| "epoch": 2.893582453290008, |
| "grad_norm": 0.3812340713874895, |
| "learning_rate": 3.7182827474678273e-08, |
| "loss": 0.5575, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.8948009748172217, |
| "grad_norm": 0.36259528565916516, |
| "learning_rate": 3.632418043133079e-08, |
| "loss": 0.513, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.8960194963444357, |
| "grad_norm": 0.3422911070110535, |
| "learning_rate": 3.5475527583681005e-08, |
| "loss": 0.4727, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.897238017871649, |
| "grad_norm": 0.3609331496351682, |
| "learning_rate": 3.463687064046317e-08, |
| "loss": 0.529, |
| "step": 2377 |
| }, |
| { |
| "epoch": 2.898456539398863, |
| "grad_norm": 0.3819585041721209, |
| "learning_rate": 3.3808211290284886e-08, |
| "loss": 0.481, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.8996750609260764, |
| "grad_norm": 0.3731434598411168, |
| "learning_rate": 3.2989551201624836e-08, |
| "loss": 0.5226, |
| "step": 2379 |
| }, |
| { |
| "epoch": 2.90089358245329, |
| "grad_norm": 0.3885400700621752, |
| "learning_rate": 3.2180892022826705e-08, |
| "loss": 0.5329, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.9021121039805036, |
| "grad_norm": 0.3572845398778449, |
| "learning_rate": 3.138223538209973e-08, |
| "loss": 0.4753, |
| "step": 2381 |
| }, |
| { |
| "epoch": 2.903330625507717, |
| "grad_norm": 0.3614383781834413, |
| "learning_rate": 3.059358288751202e-08, |
| "loss": 0.5409, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.904549147034931, |
| "grad_norm": 0.37189556032401827, |
| "learning_rate": 2.981493612698838e-08, |
| "loss": 0.5195, |
| "step": 2383 |
| }, |
| { |
| "epoch": 2.9057676685621447, |
| "grad_norm": 0.3789215584826568, |
| "learning_rate": 2.9046296668309716e-08, |
| "loss": 0.5074, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.9069861900893583, |
| "grad_norm": 0.370512893367381, |
| "learning_rate": 2.8287666059104713e-08, |
| "loss": 0.5191, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.908204711616572, |
| "grad_norm": 0.38297199689289846, |
| "learning_rate": 2.753904582685096e-08, |
| "loss": 0.4737, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.9094232331437855, |
| "grad_norm": 0.3959947838736986, |
| "learning_rate": 2.6800437478870512e-08, |
| "loss": 0.5115, |
| "step": 2387 |
| }, |
| { |
| "epoch": 2.910641754670999, |
| "grad_norm": 0.3957470998942836, |
| "learning_rate": 2.6071842502326526e-08, |
| "loss": 0.5063, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.9118602761982126, |
| "grad_norm": 0.3723580924084453, |
| "learning_rate": 2.535326236422053e-08, |
| "loss": 0.4892, |
| "step": 2389 |
| }, |
| { |
| "epoch": 2.9130787977254267, |
| "grad_norm": 0.37260731194295516, |
| "learning_rate": 2.464469851139073e-08, |
| "loss": 0.5542, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.9142973192526402, |
| "grad_norm": 0.35868024153034395, |
| "learning_rate": 2.394615237050535e-08, |
| "loss": 0.523, |
| "step": 2391 |
| }, |
| { |
| "epoch": 2.915515840779854, |
| "grad_norm": 0.36371587526669685, |
| "learning_rate": 2.3257625348064306e-08, |
| "loss": 0.4825, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.9167343623070674, |
| "grad_norm": 0.38007578965240923, |
| "learning_rate": 2.2579118830393654e-08, |
| "loss": 0.5096, |
| "step": 2393 |
| }, |
| { |
| "epoch": 2.917952883834281, |
| "grad_norm": 0.3894212168804138, |
| "learning_rate": 2.1910634183644475e-08, |
| "loss": 0.4839, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.9191714053614946, |
| "grad_norm": 0.40052317739398724, |
| "learning_rate": 2.1252172753787324e-08, |
| "loss": 0.5651, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.920389926888708, |
| "grad_norm": 0.35188112107988123, |
| "learning_rate": 2.060373586661224e-08, |
| "loss": 0.4977, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.921608448415922, |
| "grad_norm": 0.3499426602627878, |
| "learning_rate": 1.996532482772595e-08, |
| "loss": 0.4519, |
| "step": 2397 |
| }, |
| { |
| "epoch": 2.9228269699431357, |
| "grad_norm": 0.3812985888964239, |
| "learning_rate": 1.933694092254801e-08, |
| "loss": 0.5197, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.9240454914703493, |
| "grad_norm": 0.37294376101878496, |
| "learning_rate": 1.8718585416307443e-08, |
| "loss": 0.5252, |
| "step": 2399 |
| }, |
| { |
| "epoch": 2.925264012997563, |
| "grad_norm": 0.390284947297633, |
| "learning_rate": 1.811025955404333e-08, |
| "loss": 0.4939, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.9264825345247765, |
| "grad_norm": 0.38299656914528474, |
| "learning_rate": 1.751196456059867e-08, |
| "loss": 0.5282, |
| "step": 2401 |
| }, |
| { |
| "epoch": 2.9277010560519905, |
| "grad_norm": 0.4033256657794055, |
| "learning_rate": 1.6923701640621514e-08, |
| "loss": 0.5516, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.9289195775792036, |
| "grad_norm": 0.3564131129340729, |
| "learning_rate": 1.6345471978558847e-08, |
| "loss": 0.4492, |
| "step": 2403 |
| }, |
| { |
| "epoch": 2.9301380991064176, |
| "grad_norm": 0.3696818550866779, |
| "learning_rate": 1.577727673865659e-08, |
| "loss": 0.5282, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.9313566206336312, |
| "grad_norm": 0.3511572891694367, |
| "learning_rate": 1.5219117064957934e-08, |
| "loss": 0.5573, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.932575142160845, |
| "grad_norm": 0.3777779794080296, |
| "learning_rate": 1.4670994081297796e-08, |
| "loss": 0.4964, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.9337936636880584, |
| "grad_norm": 0.3408974732458375, |
| "learning_rate": 1.413290889130392e-08, |
| "loss": 0.5008, |
| "step": 2407 |
| }, |
| { |
| "epoch": 2.935012185215272, |
| "grad_norm": 0.35326628190060844, |
| "learning_rate": 1.3604862578392996e-08, |
| "loss": 0.4734, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.936230706742486, |
| "grad_norm": 0.3885332399006649, |
| "learning_rate": 1.3086856205768439e-08, |
| "loss": 0.5695, |
| "step": 2409 |
| }, |
| { |
| "epoch": 2.9374492282696996, |
| "grad_norm": 0.36720005893901214, |
| "learning_rate": 1.257889081641872e-08, |
| "loss": 0.4626, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.938667749796913, |
| "grad_norm": 0.38654367810832296, |
| "learning_rate": 1.208096743311571e-08, |
| "loss": 0.5201, |
| "step": 2411 |
| }, |
| { |
| "epoch": 2.9398862713241267, |
| "grad_norm": 0.35989398437475195, |
| "learning_rate": 1.159308705841078e-08, |
| "loss": 0.524, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.9411047928513403, |
| "grad_norm": 0.36447060978418805, |
| "learning_rate": 1.111525067463537e-08, |
| "loss": 0.4977, |
| "step": 2413 |
| }, |
| { |
| "epoch": 2.942323314378554, |
| "grad_norm": 0.3941566187065989, |
| "learning_rate": 1.0647459243897095e-08, |
| "loss": 0.5241, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.9435418359057675, |
| "grad_norm": 0.3971195756945423, |
| "learning_rate": 1.0189713708078086e-08, |
| "loss": 0.5083, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.9447603574329815, |
| "grad_norm": 0.3617746907444414, |
| "learning_rate": 9.74201498883387e-09, |
| "loss": 0.4824, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.945978878960195, |
| "grad_norm": 0.38983833247818245, |
| "learning_rate": 9.304363987591158e-09, |
| "loss": 0.5426, |
| "step": 2417 |
| }, |
| { |
| "epoch": 2.9471974004874086, |
| "grad_norm": 0.3739423997155145, |
| "learning_rate": 8.87676158554507e-09, |
| "loss": 0.4452, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.948415922014622, |
| "grad_norm": 0.37952476055522766, |
| "learning_rate": 8.459208643659122e-09, |
| "loss": 0.5432, |
| "step": 2419 |
| }, |
| { |
| "epoch": 2.949634443541836, |
| "grad_norm": 0.3634354801661458, |
| "learning_rate": 8.051706002661919e-09, |
| "loss": 0.5223, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.95085296506905, |
| "grad_norm": 0.3655917645054435, |
| "learning_rate": 7.65425448304713e-09, |
| "loss": 0.4784, |
| "step": 2421 |
| }, |
| { |
| "epoch": 2.952071486596263, |
| "grad_norm": 0.3878857225849821, |
| "learning_rate": 7.266854885069619e-09, |
| "loss": 0.536, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.953290008123477, |
| "grad_norm": 0.3860010124815134, |
| "learning_rate": 6.889507988745436e-09, |
| "loss": 0.5343, |
| "step": 2423 |
| }, |
| { |
| "epoch": 2.9545085296506906, |
| "grad_norm": 0.37137826994221546, |
| "learning_rate": 6.5222145538501595e-09, |
| "loss": 0.4683, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.955727051177904, |
| "grad_norm": 0.40358716261973293, |
| "learning_rate": 6.164975319917221e-09, |
| "loss": 0.5118, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.9569455727051177, |
| "grad_norm": 0.3805519912768825, |
| "learning_rate": 5.817791006235141e-09, |
| "loss": 0.5446, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.9581640942323313, |
| "grad_norm": 0.3645099929908209, |
| "learning_rate": 5.480662311848628e-09, |
| "loss": 0.4789, |
| "step": 2427 |
| }, |
| { |
| "epoch": 2.9593826157595453, |
| "grad_norm": 0.36366977745298745, |
| "learning_rate": 5.153589915554702e-09, |
| "loss": 0.5268, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.960601137286759, |
| "grad_norm": 0.35084949812297817, |
| "learning_rate": 4.836574475903244e-09, |
| "loss": 0.4545, |
| "step": 2429 |
| }, |
| { |
| "epoch": 2.9618196588139725, |
| "grad_norm": 0.3688342331523407, |
| "learning_rate": 4.5296166311931125e-09, |
| "loss": 0.5512, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.963038180341186, |
| "grad_norm": 0.3521398793576215, |
| "learning_rate": 4.232716999474917e-09, |
| "loss": 0.5379, |
| "step": 2431 |
| }, |
| { |
| "epoch": 2.9642567018683996, |
| "grad_norm": 0.3706937567678196, |
| "learning_rate": 3.9458761785460266e-09, |
| "loss": 0.5445, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.965475223395613, |
| "grad_norm": 0.35060638658108007, |
| "learning_rate": 3.669094745950008e-09, |
| "loss": 0.5027, |
| "step": 2433 |
| }, |
| { |
| "epoch": 2.966693744922827, |
| "grad_norm": 0.33178253347322945, |
| "learning_rate": 3.4023732589777426e-09, |
| "loss": 0.4681, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.967912266450041, |
| "grad_norm": 0.35617115102694386, |
| "learning_rate": 3.1457122546635353e-09, |
| "loss": 0.5019, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.9691307879772544, |
| "grad_norm": 0.3548326105732938, |
| "learning_rate": 2.899112249786229e-09, |
| "loss": 0.5276, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.970349309504468, |
| "grad_norm": 0.3984987781160192, |
| "learning_rate": 2.6625737408669804e-09, |
| "loss": 0.5172, |
| "step": 2437 |
| }, |
| { |
| "epoch": 2.9715678310316815, |
| "grad_norm": 0.362754815960633, |
| "learning_rate": 2.436097204167043e-09, |
| "loss": 0.5206, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.972786352558895, |
| "grad_norm": 0.38356526868895263, |
| "learning_rate": 2.2196830956905392e-09, |
| "loss": 0.4762, |
| "step": 2439 |
| }, |
| { |
| "epoch": 2.9740048740861087, |
| "grad_norm": 0.37390570550389457, |
| "learning_rate": 2.0133318511800227e-09, |
| "loss": 0.5343, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.9752233956133223, |
| "grad_norm": 0.35679383760064753, |
| "learning_rate": 1.8170438861159212e-09, |
| "loss": 0.4894, |
| "step": 2441 |
| }, |
| { |
| "epoch": 2.9764419171405363, |
| "grad_norm": 0.3592057260000064, |
| "learning_rate": 1.6308195957182028e-09, |
| "loss": 0.5362, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.97766043866775, |
| "grad_norm": 0.3259038266407775, |
| "learning_rate": 1.4546593549424892e-09, |
| "loss": 0.4727, |
| "step": 2443 |
| }, |
| { |
| "epoch": 2.9788789601949635, |
| "grad_norm": 0.37363652956982973, |
| "learning_rate": 1.2885635184828326e-09, |
| "loss": 0.5277, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.980097481722177, |
| "grad_norm": 0.35379174534673236, |
| "learning_rate": 1.1325324207667187e-09, |
| "loss": 0.4837, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.9813160032493906, |
| "grad_norm": 0.3675995235345453, |
| "learning_rate": 9.865663759578426e-10, |
| "loss": 0.5461, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.9825345247766046, |
| "grad_norm": 0.37636771913858935, |
| "learning_rate": 8.50665677953888e-10, |
| "loss": 0.4862, |
| "step": 2447 |
| }, |
| { |
| "epoch": 2.9837530463038178, |
| "grad_norm": 0.3681022019205161, |
| "learning_rate": 7.24830600386528e-10, |
| "loss": 0.4963, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.984971567831032, |
| "grad_norm": 0.3746370138837488, |
| "learning_rate": 6.09061396620314e-10, |
| "loss": 0.534, |
| "step": 2449 |
| }, |
| { |
| "epoch": 2.9861900893582454, |
| "grad_norm": 0.36486933688431966, |
| "learning_rate": 5.033582997526765e-10, |
| "loss": 0.577, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.987408610885459, |
| "grad_norm": 0.3650821349016925, |
| "learning_rate": 4.0772152261336906e-10, |
| "loss": 0.4508, |
| "step": 2451 |
| }, |
| { |
| "epoch": 2.9886271324126725, |
| "grad_norm": 0.380070504025212, |
| "learning_rate": 3.221512577639141e-10, |
| "loss": 0.5051, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.989845653939886, |
| "grad_norm": 0.3786185974021647, |
| "learning_rate": 2.466476774970472e-10, |
| "loss": 0.4845, |
| "step": 2453 |
| }, |
| { |
| "epoch": 2.9910641754671, |
| "grad_norm": 0.360482935976555, |
| "learning_rate": 1.812109338367174e-10, |
| "loss": 0.5414, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.9922826969943137, |
| "grad_norm": 0.372228898631232, |
| "learning_rate": 1.2584115853808697e-10, |
| "loss": 0.5194, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.9935012185215273, |
| "grad_norm": 0.38381176412541346, |
| "learning_rate": 8.053846308531122e-11, |
| "loss": 0.4912, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.994719740048741, |
| "grad_norm": 0.3934048198873252, |
| "learning_rate": 4.53029386948689e-11, |
| "loss": 0.5137, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.9959382615759544, |
| "grad_norm": 0.38117458951656574, |
| "learning_rate": 2.0134656311676658e-11, |
| "loss": 0.5353, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.997156783103168, |
| "grad_norm": 0.38135142008825745, |
| "learning_rate": 5.033666611864441e-12, |
| "loss": 0.4967, |
| "step": 2459 |
| }, |
| { |
| "epoch": 2.9983753046303816, |
| "grad_norm": 0.36779717909800114, |
| "learning_rate": 0.0, |
| "loss": 0.5127, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.9983753046303816, |
| "step": 2460, |
| "total_flos": 2708356203970560.0, |
| "train_loss": 0.5759637464110444, |
| "train_runtime": 38974.7056, |
| "train_samples_per_second": 6.062, |
| "train_steps_per_second": 0.063 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2708356203970560.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|