| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.990948709353001, | |
| "eval_steps": 500, | |
| "global_step": 930, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0053637277908146165, | |
| "grad_norm": 6.338560672252144, | |
| "learning_rate": 8.60215053763441e-07, | |
| "loss": 1.0751, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010727455581629233, | |
| "grad_norm": 6.34459310995922, | |
| "learning_rate": 1.720430107526882e-06, | |
| "loss": 1.0724, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01609118337244385, | |
| "grad_norm": 6.24204412097311, | |
| "learning_rate": 2.580645161290323e-06, | |
| "loss": 1.0656, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.021454911163258466, | |
| "grad_norm": 5.836679159124573, | |
| "learning_rate": 3.440860215053764e-06, | |
| "loss": 1.0643, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02681863895407308, | |
| "grad_norm": 4.516135892252434, | |
| "learning_rate": 4.3010752688172045e-06, | |
| "loss": 1.0254, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0321823667448877, | |
| "grad_norm": 2.6133078802773384, | |
| "learning_rate": 5.161290322580646e-06, | |
| "loss": 0.9857, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03754609453570231, | |
| "grad_norm": 2.356431435775483, | |
| "learning_rate": 6.021505376344087e-06, | |
| "loss": 0.9684, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04290982232651693, | |
| "grad_norm": 3.8129269226867466, | |
| "learning_rate": 6.881720430107528e-06, | |
| "loss": 0.9487, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.048273550117331546, | |
| "grad_norm": 3.837805004980831, | |
| "learning_rate": 7.741935483870968e-06, | |
| "loss": 0.958, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05363727790814616, | |
| "grad_norm": 3.226513127256882, | |
| "learning_rate": 8.602150537634409e-06, | |
| "loss": 0.9352, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05900100569896078, | |
| "grad_norm": 3.1817577843556006, | |
| "learning_rate": 9.46236559139785e-06, | |
| "loss": 0.8991, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0643647334897754, | |
| "grad_norm": 2.734625287133208, | |
| "learning_rate": 1.0322580645161291e-05, | |
| "loss": 0.8769, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06972846128059002, | |
| "grad_norm": 1.9702603165063668, | |
| "learning_rate": 1.118279569892473e-05, | |
| "loss": 0.8565, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07509218907140462, | |
| "grad_norm": 1.4326323487408281, | |
| "learning_rate": 1.2043010752688173e-05, | |
| "loss": 0.8304, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08045591686221924, | |
| "grad_norm": 1.5771781448342905, | |
| "learning_rate": 1.2903225806451613e-05, | |
| "loss": 0.8223, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08581964465303386, | |
| "grad_norm": 1.2675059911437987, | |
| "learning_rate": 1.3763440860215056e-05, | |
| "loss": 0.8036, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09118337244384847, | |
| "grad_norm": 0.998013780742343, | |
| "learning_rate": 1.4623655913978497e-05, | |
| "loss": 0.7903, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09654710023466309, | |
| "grad_norm": 1.0123573361429459, | |
| "learning_rate": 1.5483870967741936e-05, | |
| "loss": 0.7909, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10191082802547771, | |
| "grad_norm": 0.867381543747644, | |
| "learning_rate": 1.6344086021505377e-05, | |
| "loss": 0.7807, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10727455581629232, | |
| "grad_norm": 0.8142913834018638, | |
| "learning_rate": 1.7204301075268818e-05, | |
| "loss": 0.7675, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11263828360710694, | |
| "grad_norm": 0.848005958837766, | |
| "learning_rate": 1.806451612903226e-05, | |
| "loss": 0.7644, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11800201139792156, | |
| "grad_norm": 0.7118287539298433, | |
| "learning_rate": 1.89247311827957e-05, | |
| "loss": 0.7569, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12336573918873617, | |
| "grad_norm": 0.5855004670255447, | |
| "learning_rate": 1.978494623655914e-05, | |
| "loss": 0.7487, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1287294669795508, | |
| "grad_norm": 0.6728204797382786, | |
| "learning_rate": 2.0645161290322582e-05, | |
| "loss": 0.7416, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1340931947703654, | |
| "grad_norm": 0.5967666869348331, | |
| "learning_rate": 2.1505376344086024e-05, | |
| "loss": 0.7363, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.13945692256118003, | |
| "grad_norm": 0.6160085099956011, | |
| "learning_rate": 2.236559139784946e-05, | |
| "loss": 0.734, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14482065035199462, | |
| "grad_norm": 0.6363894242487178, | |
| "learning_rate": 2.3225806451612906e-05, | |
| "loss": 0.7262, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15018437814280924, | |
| "grad_norm": 0.6626486374605112, | |
| "learning_rate": 2.4086021505376347e-05, | |
| "loss": 0.7188, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15554810593362386, | |
| "grad_norm": 0.7590807753214363, | |
| "learning_rate": 2.4946236559139788e-05, | |
| "loss": 0.7223, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16091183372443849, | |
| "grad_norm": 0.8043401688063679, | |
| "learning_rate": 2.5806451612903226e-05, | |
| "loss": 0.7165, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1662755615152531, | |
| "grad_norm": 0.8116558016551599, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.7128, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17163928930606773, | |
| "grad_norm": 0.7050286092608573, | |
| "learning_rate": 2.752688172043011e-05, | |
| "loss": 0.709, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.17700301709688232, | |
| "grad_norm": 0.605916542924061, | |
| "learning_rate": 2.8387096774193552e-05, | |
| "loss": 0.7035, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.18236674488769694, | |
| "grad_norm": 0.48538643613649285, | |
| "learning_rate": 2.9247311827956993e-05, | |
| "loss": 0.7025, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.18773047267851156, | |
| "grad_norm": 0.4038151178490983, | |
| "learning_rate": 3.010752688172043e-05, | |
| "loss": 0.7043, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19309420046932618, | |
| "grad_norm": 0.5220993966401706, | |
| "learning_rate": 3.096774193548387e-05, | |
| "loss": 0.6972, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1984579282601408, | |
| "grad_norm": 1.1471863644219147, | |
| "learning_rate": 3.182795698924731e-05, | |
| "loss": 0.6992, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.20382165605095542, | |
| "grad_norm": 11.637743206594976, | |
| "learning_rate": 3.2688172043010754e-05, | |
| "loss": 0.7018, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.20918538384177002, | |
| "grad_norm": 2.51589445225404, | |
| "learning_rate": 3.3548387096774195e-05, | |
| "loss": 0.7004, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21454911163258464, | |
| "grad_norm": 0.8841029393673814, | |
| "learning_rate": 3.4408602150537636e-05, | |
| "loss": 0.6959, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21991283942339926, | |
| "grad_norm": 2.0189356445033613, | |
| "learning_rate": 3.526881720430108e-05, | |
| "loss": 0.6962, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.22527656721421388, | |
| "grad_norm": 0.8438418075337472, | |
| "learning_rate": 3.612903225806452e-05, | |
| "loss": 0.6948, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.2306402950050285, | |
| "grad_norm": 2.280049552210539, | |
| "learning_rate": 3.698924731182796e-05, | |
| "loss": 0.691, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.23600402279584312, | |
| "grad_norm": 1.5063081335495634, | |
| "learning_rate": 3.78494623655914e-05, | |
| "loss": 0.6911, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24136775058665771, | |
| "grad_norm": 1.8084395582302646, | |
| "learning_rate": 3.870967741935484e-05, | |
| "loss": 0.6906, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24673147837747234, | |
| "grad_norm": 1.6588924960337472, | |
| "learning_rate": 3.956989247311828e-05, | |
| "loss": 0.6929, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.25209520616828696, | |
| "grad_norm": 1.014045775957985, | |
| "learning_rate": 4.0430107526881724e-05, | |
| "loss": 0.6908, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2574589339591016, | |
| "grad_norm": 1.9198362282460184, | |
| "learning_rate": 4.1290322580645165e-05, | |
| "loss": 0.687, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2628226617499162, | |
| "grad_norm": 1.744156574216378, | |
| "learning_rate": 4.2150537634408606e-05, | |
| "loss": 0.6804, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2681863895407308, | |
| "grad_norm": 1.1668744438759826, | |
| "learning_rate": 4.301075268817205e-05, | |
| "loss": 0.6784, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.27355011733154544, | |
| "grad_norm": 2.3338212759533854, | |
| "learning_rate": 4.387096774193548e-05, | |
| "loss": 0.6808, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.27891384512236006, | |
| "grad_norm": 1.7244150150466557, | |
| "learning_rate": 4.473118279569892e-05, | |
| "loss": 0.6838, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2842775729131747, | |
| "grad_norm": 2.419331402256656, | |
| "learning_rate": 4.559139784946237e-05, | |
| "loss": 0.68, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.28964130070398925, | |
| "grad_norm": 4.265232664271197, | |
| "learning_rate": 4.645161290322581e-05, | |
| "loss": 0.6877, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.29500502849480387, | |
| "grad_norm": 1.6428312936958347, | |
| "learning_rate": 4.731182795698925e-05, | |
| "loss": 0.6866, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3003687562856185, | |
| "grad_norm": 1.0611731524926487, | |
| "learning_rate": 4.8172043010752693e-05, | |
| "loss": 0.6722, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3057324840764331, | |
| "grad_norm": 1.7504655396715425, | |
| "learning_rate": 4.9032258064516135e-05, | |
| "loss": 0.6823, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.31109621186724773, | |
| "grad_norm": 1.4266527434683252, | |
| "learning_rate": 4.9892473118279576e-05, | |
| "loss": 0.678, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.31645993965806235, | |
| "grad_norm": 1.3715035848857164, | |
| "learning_rate": 5.075268817204302e-05, | |
| "loss": 0.6669, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.32182366744887697, | |
| "grad_norm": 1.4341866180855634, | |
| "learning_rate": 5.161290322580645e-05, | |
| "loss": 0.6809, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3271873952396916, | |
| "grad_norm": 1.5209173251883614, | |
| "learning_rate": 5.247311827956989e-05, | |
| "loss": 0.6694, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3325511230305062, | |
| "grad_norm": 1.3054315394599638, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.6637, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.33791485082132083, | |
| "grad_norm": 1.5264153975136474, | |
| "learning_rate": 5.4193548387096774e-05, | |
| "loss": 0.6726, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.34327857861213545, | |
| "grad_norm": 1.2283408808572964, | |
| "learning_rate": 5.505376344086022e-05, | |
| "loss": 0.6746, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3486423064029501, | |
| "grad_norm": 1.4653304361520723, | |
| "learning_rate": 5.591397849462366e-05, | |
| "loss": 0.6734, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.35400603419376464, | |
| "grad_norm": 1.1322311838365562, | |
| "learning_rate": 5.6774193548387104e-05, | |
| "loss": 0.6665, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.35936976198457926, | |
| "grad_norm": 1.4758546005558055, | |
| "learning_rate": 5.7634408602150545e-05, | |
| "loss": 0.6696, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3647334897753939, | |
| "grad_norm": 1.1524101733290173, | |
| "learning_rate": 5.8494623655913986e-05, | |
| "loss": 0.6698, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3700972175662085, | |
| "grad_norm": 1.7469295396906894, | |
| "learning_rate": 5.935483870967743e-05, | |
| "loss": 0.6709, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3754609453570231, | |
| "grad_norm": 1.1846563978993125, | |
| "learning_rate": 6.021505376344086e-05, | |
| "loss": 0.6673, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.38082467314783774, | |
| "grad_norm": 1.1459305938713673, | |
| "learning_rate": 6.10752688172043e-05, | |
| "loss": 0.6575, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.38618840093865237, | |
| "grad_norm": 1.8209165087943502, | |
| "learning_rate": 6.193548387096774e-05, | |
| "loss": 0.6742, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.391552128729467, | |
| "grad_norm": 1.111422678506876, | |
| "learning_rate": 6.279569892473119e-05, | |
| "loss": 0.669, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3969158565202816, | |
| "grad_norm": 1.8170583952074655, | |
| "learning_rate": 6.365591397849463e-05, | |
| "loss": 0.6677, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.4022795843110962, | |
| "grad_norm": 1.1599786495072175, | |
| "learning_rate": 6.451612903225807e-05, | |
| "loss": 0.6666, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.40764331210191085, | |
| "grad_norm": 1.4841350723375708, | |
| "learning_rate": 6.537634408602151e-05, | |
| "loss": 0.6707, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.41300703989272547, | |
| "grad_norm": 1.0958715407789212, | |
| "learning_rate": 6.623655913978495e-05, | |
| "loss": 0.6593, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.41837076768354003, | |
| "grad_norm": 1.4849874395428133, | |
| "learning_rate": 6.709677419354839e-05, | |
| "loss": 0.6658, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.42373449547435466, | |
| "grad_norm": 1.3922476398945494, | |
| "learning_rate": 6.795698924731183e-05, | |
| "loss": 0.6627, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4290982232651693, | |
| "grad_norm": 1.349253537020458, | |
| "learning_rate": 6.881720430107527e-05, | |
| "loss": 0.6651, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4344619510559839, | |
| "grad_norm": 1.214908872484667, | |
| "learning_rate": 6.967741935483871e-05, | |
| "loss": 0.667, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.4398256788467985, | |
| "grad_norm": 1.345794151853099, | |
| "learning_rate": 7.053763440860215e-05, | |
| "loss": 0.663, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.44518940663761314, | |
| "grad_norm": 1.8118348713875871, | |
| "learning_rate": 7.13978494623656e-05, | |
| "loss": 0.6627, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.45055313442842776, | |
| "grad_norm": 1.106435642838999, | |
| "learning_rate": 7.225806451612904e-05, | |
| "loss": 0.6583, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4559168622192424, | |
| "grad_norm": 1.4776781772073297, | |
| "learning_rate": 7.311827956989248e-05, | |
| "loss": 0.6647, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.461280590010057, | |
| "grad_norm": 1.0848264815933542, | |
| "learning_rate": 7.397849462365592e-05, | |
| "loss": 0.6557, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4666443178008716, | |
| "grad_norm": 1.8994900378306065, | |
| "learning_rate": 7.483870967741936e-05, | |
| "loss": 0.6561, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.47200804559168624, | |
| "grad_norm": 1.1008594371177927, | |
| "learning_rate": 7.56989247311828e-05, | |
| "loss": 0.6541, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.47737177338250086, | |
| "grad_norm": 1.7741666055604592, | |
| "learning_rate": 7.655913978494624e-05, | |
| "loss": 0.6532, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.48273550117331543, | |
| "grad_norm": 1.1479464223196045, | |
| "learning_rate": 7.741935483870968e-05, | |
| "loss": 0.6639, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.48809922896413005, | |
| "grad_norm": 1.3988914997706596, | |
| "learning_rate": 7.827956989247312e-05, | |
| "loss": 0.6511, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.49346295675494467, | |
| "grad_norm": 1.2477219744293453, | |
| "learning_rate": 7.913978494623657e-05, | |
| "loss": 0.6605, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4988266845457593, | |
| "grad_norm": 0.9097553948609914, | |
| "learning_rate": 8e-05, | |
| "loss": 0.65, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5041904123365739, | |
| "grad_norm": 1.5836301376704878, | |
| "learning_rate": 7.999971824066397e-05, | |
| "loss": 0.6542, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5095541401273885, | |
| "grad_norm": 1.4923647833076734, | |
| "learning_rate": 7.99988729666253e-05, | |
| "loss": 0.6531, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5149178679182032, | |
| "grad_norm": 0.9849105190117686, | |
| "learning_rate": 7.999746418979217e-05, | |
| "loss": 0.643, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5202815957090178, | |
| "grad_norm": 1.012040139532396, | |
| "learning_rate": 7.99954919300114e-05, | |
| "loss": 0.6485, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5256453234998324, | |
| "grad_norm": 1.3986606888790023, | |
| "learning_rate": 7.999295621506808e-05, | |
| "loss": 0.6549, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.531009051290647, | |
| "grad_norm": 1.1230453443468684, | |
| "learning_rate": 7.998985708068532e-05, | |
| "loss": 0.6459, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5363727790814616, | |
| "grad_norm": 1.3637771512073018, | |
| "learning_rate": 7.998619457052362e-05, | |
| "loss": 0.6398, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5417365068722763, | |
| "grad_norm": 1.11658331412697, | |
| "learning_rate": 7.998196873618028e-05, | |
| "loss": 0.6486, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5471002346630909, | |
| "grad_norm": 1.2713507835255546, | |
| "learning_rate": 7.997717963718872e-05, | |
| "loss": 0.6429, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5524639624539055, | |
| "grad_norm": 1.2063346906341978, | |
| "learning_rate": 7.997182734101763e-05, | |
| "loss": 0.6453, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5578276902447201, | |
| "grad_norm": 1.5786954989331352, | |
| "learning_rate": 7.996591192306995e-05, | |
| "loss": 0.6481, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5631914180355347, | |
| "grad_norm": 2.037493924714199, | |
| "learning_rate": 7.99594334666819e-05, | |
| "loss": 0.6452, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5685551458263494, | |
| "grad_norm": 1.076036499954452, | |
| "learning_rate": 7.995239206312176e-05, | |
| "loss": 0.6366, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.573918873617164, | |
| "grad_norm": 2.8730059630882607, | |
| "learning_rate": 7.994478781158861e-05, | |
| "loss": 0.6585, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5792826014079785, | |
| "grad_norm": 2.526449754135261, | |
| "learning_rate": 7.993662081921086e-05, | |
| "loss": 0.6701, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5846463291987931, | |
| "grad_norm": 1.4700621099416509, | |
| "learning_rate": 7.992789120104486e-05, | |
| "loss": 0.6466, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5900100569896077, | |
| "grad_norm": 1.3276483909541417, | |
| "learning_rate": 7.991859908007314e-05, | |
| "loss": 0.6535, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5953737847804224, | |
| "grad_norm": 0.9621323964779972, | |
| "learning_rate": 7.990874458720283e-05, | |
| "loss": 0.6381, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.600737512571237, | |
| "grad_norm": 1.7905082277492617, | |
| "learning_rate": 7.989832786126369e-05, | |
| "loss": 0.6477, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6061012403620516, | |
| "grad_norm": 0.7687516563695098, | |
| "learning_rate": 7.98873490490062e-05, | |
| "loss": 0.6512, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6114649681528662, | |
| "grad_norm": 1.9114078648875095, | |
| "learning_rate": 7.987580830509949e-05, | |
| "loss": 0.6532, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6168286959436808, | |
| "grad_norm": 1.1288630311317658, | |
| "learning_rate": 7.986370579212921e-05, | |
| "loss": 0.6521, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6221924237344955, | |
| "grad_norm": 1.7375799757138244, | |
| "learning_rate": 7.985104168059514e-05, | |
| "loss": 0.6482, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6275561515253101, | |
| "grad_norm": 1.4087756071428208, | |
| "learning_rate": 7.983781614890886e-05, | |
| "loss": 0.6542, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6329198793161247, | |
| "grad_norm": 1.4278272442020752, | |
| "learning_rate": 7.982402938339123e-05, | |
| "loss": 0.6467, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6382836071069393, | |
| "grad_norm": 0.9613841064478965, | |
| "learning_rate": 7.980968157826976e-05, | |
| "loss": 0.6392, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6436473348977539, | |
| "grad_norm": 1.4516378583492695, | |
| "learning_rate": 7.97947729356758e-05, | |
| "loss": 0.6405, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6490110626885686, | |
| "grad_norm": 0.8053425444626354, | |
| "learning_rate": 7.977930366564188e-05, | |
| "loss": 0.6413, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6543747904793832, | |
| "grad_norm": 1.0531976112785613, | |
| "learning_rate": 7.976327398609851e-05, | |
| "loss": 0.6371, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6597385182701978, | |
| "grad_norm": 0.7167882513685747, | |
| "learning_rate": 7.974668412287133e-05, | |
| "loss": 0.6363, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6651022460610124, | |
| "grad_norm": 0.9378177990169265, | |
| "learning_rate": 7.972953430967773e-05, | |
| "loss": 0.6341, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.670465973851827, | |
| "grad_norm": 0.8552565042049741, | |
| "learning_rate": 7.971182478812374e-05, | |
| "loss": 0.6324, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6758297016426417, | |
| "grad_norm": 0.7453958012562631, | |
| "learning_rate": 7.96935558077005e-05, | |
| "loss": 0.63, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6811934294334563, | |
| "grad_norm": 0.814810784183303, | |
| "learning_rate": 7.967472762578082e-05, | |
| "loss": 0.6363, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6865571572242709, | |
| "grad_norm": 1.048879669741169, | |
| "learning_rate": 7.965534050761548e-05, | |
| "loss": 0.6316, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6919208850150855, | |
| "grad_norm": 1.5279961244780853, | |
| "learning_rate": 7.963539472632956e-05, | |
| "loss": 0.6455, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6972846128059002, | |
| "grad_norm": 0.7981994579669985, | |
| "learning_rate": 7.961489056291858e-05, | |
| "loss": 0.633, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7026483405967148, | |
| "grad_norm": 1.0473944191586428, | |
| "learning_rate": 7.95938283062445e-05, | |
| "loss": 0.631, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7080120683875293, | |
| "grad_norm": 1.2737207552833516, | |
| "learning_rate": 7.957220825303168e-05, | |
| "loss": 0.6318, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7133757961783439, | |
| "grad_norm": 0.8894810359025763, | |
| "learning_rate": 7.955003070786275e-05, | |
| "loss": 0.6269, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7187395239691585, | |
| "grad_norm": 1.122051061656217, | |
| "learning_rate": 7.95272959831742e-05, | |
| "loss": 0.6337, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7241032517599731, | |
| "grad_norm": 0.7525451166224594, | |
| "learning_rate": 7.950400439925207e-05, | |
| "loss": 0.6335, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7294669795507878, | |
| "grad_norm": 0.7016690417729449, | |
| "learning_rate": 7.948015628422745e-05, | |
| "loss": 0.6286, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7348307073416024, | |
| "grad_norm": 0.6789098247390296, | |
| "learning_rate": 7.945575197407177e-05, | |
| "loss": 0.6329, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.740194435132417, | |
| "grad_norm": 0.6404674159758782, | |
| "learning_rate": 7.943079181259215e-05, | |
| "loss": 0.6287, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7455581629232316, | |
| "grad_norm": 0.9093960160044203, | |
| "learning_rate": 7.940527615142653e-05, | |
| "loss": 0.6289, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7509218907140462, | |
| "grad_norm": 1.1758965813037474, | |
| "learning_rate": 7.937920535003866e-05, | |
| "loss": 0.6351, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7562856185048609, | |
| "grad_norm": 0.742370532155386, | |
| "learning_rate": 7.935257977571317e-05, | |
| "loss": 0.6261, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7616493462956755, | |
| "grad_norm": 0.542139953471737, | |
| "learning_rate": 7.932539980355023e-05, | |
| "loss": 0.6268, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7670130740864901, | |
| "grad_norm": 0.6206950110046128, | |
| "learning_rate": 7.92976658164604e-05, | |
| "loss": 0.6202, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7723768018773047, | |
| "grad_norm": 0.729311584778343, | |
| "learning_rate": 7.926937820515918e-05, | |
| "loss": 0.6212, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7777405296681194, | |
| "grad_norm": 0.654400750098185, | |
| "learning_rate": 7.924053736816148e-05, | |
| "loss": 0.6182, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.783104257458934, | |
| "grad_norm": 0.6195531403475248, | |
| "learning_rate": 7.921114371177607e-05, | |
| "loss": 0.6245, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7884679852497486, | |
| "grad_norm": 0.7976556334248974, | |
| "learning_rate": 7.918119765009979e-05, | |
| "loss": 0.6249, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7938317130405632, | |
| "grad_norm": 0.8396887583061823, | |
| "learning_rate": 7.915069960501177e-05, | |
| "loss": 0.6248, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7991954408313778, | |
| "grad_norm": 0.8502364179472985, | |
| "learning_rate": 7.911965000616746e-05, | |
| "loss": 0.6194, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8045591686221925, | |
| "grad_norm": 0.9898063147614041, | |
| "learning_rate": 7.908804929099256e-05, | |
| "loss": 0.62, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8099228964130071, | |
| "grad_norm": 1.2455473972915074, | |
| "learning_rate": 7.905589790467694e-05, | |
| "loss": 0.6198, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8152866242038217, | |
| "grad_norm": 0.6806078960215027, | |
| "learning_rate": 7.902319630016822e-05, | |
| "loss": 0.6162, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8206503519946363, | |
| "grad_norm": 0.4476574519688243, | |
| "learning_rate": 7.898994493816553e-05, | |
| "loss": 0.6214, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8260140797854509, | |
| "grad_norm": 0.8696643453540888, | |
| "learning_rate": 7.895614428711296e-05, | |
| "loss": 0.6148, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8313778075762656, | |
| "grad_norm": 1.246010486407204, | |
| "learning_rate": 7.892179482319297e-05, | |
| "loss": 0.6231, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8367415353670801, | |
| "grad_norm": 0.5040497252873622, | |
| "learning_rate": 7.888689703031963e-05, | |
| "loss": 0.6192, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 0.5624852670118361, | |
| "learning_rate": 7.885145140013192e-05, | |
| "loss": 0.6173, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8474689909487093, | |
| "grad_norm": 0.9368458860548771, | |
| "learning_rate": 7.88154584319867e-05, | |
| "loss": 0.6137, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8528327187395239, | |
| "grad_norm": 0.8845383469044678, | |
| "learning_rate": 7.87789186329517e-05, | |
| "loss": 0.6182, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8581964465303386, | |
| "grad_norm": 0.5894895395765093, | |
| "learning_rate": 7.87418325177984e-05, | |
| "loss": 0.6091, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8635601743211532, | |
| "grad_norm": 0.5731381526470704, | |
| "learning_rate": 7.870420060899476e-05, | |
| "loss": 0.6187, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8689239021119678, | |
| "grad_norm": 0.7549017840945705, | |
| "learning_rate": 7.866602343669785e-05, | |
| "loss": 0.6203, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8742876299027824, | |
| "grad_norm": 0.962856257611003, | |
| "learning_rate": 7.862730153874642e-05, | |
| "loss": 0.6176, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.879651357693597, | |
| "grad_norm": 1.3065614325608164, | |
| "learning_rate": 7.858803546065328e-05, | |
| "loss": 0.6247, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8850150854844117, | |
| "grad_norm": 0.7514931987981934, | |
| "learning_rate": 7.854822575559764e-05, | |
| "loss": 0.6167, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8903788132752263, | |
| "grad_norm": 0.6171596328035264, | |
| "learning_rate": 7.85078729844173e-05, | |
| "loss": 0.6191, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8957425410660409, | |
| "grad_norm": 0.7129708826504616, | |
| "learning_rate": 7.846697771560075e-05, | |
| "loss": 0.6176, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9011062688568555, | |
| "grad_norm": 1.0448469281639305, | |
| "learning_rate": 7.842554052527918e-05, | |
| "loss": 0.6186, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9064699966476701, | |
| "grad_norm": 0.8929655231467754, | |
| "learning_rate": 7.838356199721836e-05, | |
| "loss": 0.6123, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9118337244384848, | |
| "grad_norm": 0.6547241466981105, | |
| "learning_rate": 7.834104272281041e-05, | |
| "loss": 0.6157, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9171974522292994, | |
| "grad_norm": 0.820018666142171, | |
| "learning_rate": 7.829798330106544e-05, | |
| "loss": 0.6151, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.922561180020114, | |
| "grad_norm": 0.8525569097143616, | |
| "learning_rate": 7.825438433860314e-05, | |
| "loss": 0.6227, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9279249078109286, | |
| "grad_norm": 0.7094520065383435, | |
| "learning_rate": 7.821024644964429e-05, | |
| "loss": 0.6123, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9332886356017432, | |
| "grad_norm": 0.5885877988974711, | |
| "learning_rate": 7.816557025600196e-05, | |
| "loss": 0.6042, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9386523633925579, | |
| "grad_norm": 0.610585149507006, | |
| "learning_rate": 7.81203563870729e-05, | |
| "loss": 0.6115, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9440160911833725, | |
| "grad_norm": 0.4909861004310254, | |
| "learning_rate": 7.807460547982861e-05, | |
| "loss": 0.6136, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9493798189741871, | |
| "grad_norm": 0.552485503833097, | |
| "learning_rate": 7.802831817880633e-05, | |
| "loss": 0.607, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9547435467650017, | |
| "grad_norm": 0.7187876946145139, | |
| "learning_rate": 7.798149513610003e-05, | |
| "loss": 0.6081, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9601072745558162, | |
| "grad_norm": 0.6564549638726072, | |
| "learning_rate": 7.793413701135119e-05, | |
| "loss": 0.6092, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9654710023466309, | |
| "grad_norm": 0.7342350726235318, | |
| "learning_rate": 7.788624447173948e-05, | |
| "loss": 0.6082, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9708347301374455, | |
| "grad_norm": 0.6410201096556201, | |
| "learning_rate": 7.783781819197341e-05, | |
| "loss": 0.6098, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9761984579282601, | |
| "grad_norm": 0.7068555927099052, | |
| "learning_rate": 7.778885885428082e-05, | |
| "loss": 0.6159, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9815621857190747, | |
| "grad_norm": 0.972774444436176, | |
| "learning_rate": 7.773936714839922e-05, | |
| "loss": 0.6077, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9869259135098893, | |
| "grad_norm": 0.982305769799067, | |
| "learning_rate": 7.768934377156613e-05, | |
| "loss": 0.6074, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.992289641300704, | |
| "grad_norm": 0.792349928717434, | |
| "learning_rate": 7.76387894285092e-05, | |
| "loss": 0.6071, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9976533690915186, | |
| "grad_norm": 0.6150582773472084, | |
| "learning_rate": 7.758770483143634e-05, | |
| "loss": 0.6055, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.003687562856185, | |
| "grad_norm": 1.0092690602500856, | |
| "learning_rate": 7.75360907000257e-05, | |
| "loss": 1.0004, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.0090512906469997, | |
| "grad_norm": 1.4743324999618854, | |
| "learning_rate": 7.748394776141539e-05, | |
| "loss": 0.5946, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0144150184378142, | |
| "grad_norm": 0.6277281735974461, | |
| "learning_rate": 7.743127675019344e-05, | |
| "loss": 0.5838, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.019778746228629, | |
| "grad_norm": 1.448463639411485, | |
| "learning_rate": 7.737807840838728e-05, | |
| "loss": 0.5989, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0251424740194435, | |
| "grad_norm": 0.853892200703028, | |
| "learning_rate": 7.732435348545341e-05, | |
| "loss": 0.5831, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0305062018102582, | |
| "grad_norm": 0.9983332294058066, | |
| "learning_rate": 7.727010273826674e-05, | |
| "loss": 0.5929, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0358699296010727, | |
| "grad_norm": 0.828704229898241, | |
| "learning_rate": 7.721532693111002e-05, | |
| "loss": 0.5829, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0412336573918874, | |
| "grad_norm": 0.6753173564927042, | |
| "learning_rate": 7.716002683566298e-05, | |
| "loss": 0.5876, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.046597385182702, | |
| "grad_norm": 0.7363007051587145, | |
| "learning_rate": 7.710420323099151e-05, | |
| "loss": 0.5817, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0519611129735167, | |
| "grad_norm": 0.6189597764944422, | |
| "learning_rate": 7.704785690353674e-05, | |
| "loss": 0.5813, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0573248407643312, | |
| "grad_norm": 0.548504465316847, | |
| "learning_rate": 7.699098864710385e-05, | |
| "loss": 0.5833, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.062688568555146, | |
| "grad_norm": 0.5803042076698839, | |
| "learning_rate": 7.693359926285095e-05, | |
| "loss": 0.5862, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0680522963459604, | |
| "grad_norm": 0.43453807837921604, | |
| "learning_rate": 7.687568955927776e-05, | |
| "loss": 0.5786, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0734160241367752, | |
| "grad_norm": 0.5542433696373542, | |
| "learning_rate": 7.681726035221428e-05, | |
| "loss": 0.5712, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0787797519275897, | |
| "grad_norm": 0.616342629969921, | |
| "learning_rate": 7.675831246480923e-05, | |
| "loss": 0.5765, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0841434797184042, | |
| "grad_norm": 0.5795433672277299, | |
| "learning_rate": 7.66988467275185e-05, | |
| "loss": 0.5833, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.089507207509219, | |
| "grad_norm": 0.4501970786991801, | |
| "learning_rate": 7.663886397809341e-05, | |
| "loss": 0.5791, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.0948709353000334, | |
| "grad_norm": 0.4353910474176125, | |
| "learning_rate": 7.657836506156896e-05, | |
| "loss": 0.5741, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1002346630908482, | |
| "grad_norm": 0.3477330860396474, | |
| "learning_rate": 7.651735083025187e-05, | |
| "loss": 0.5753, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.1055983908816627, | |
| "grad_norm": 0.3477663464279302, | |
| "learning_rate": 7.64558221437086e-05, | |
| "loss": 0.5771, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1109621186724774, | |
| "grad_norm": 0.4507396818143408, | |
| "learning_rate": 7.639377986875323e-05, | |
| "loss": 0.5735, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.116325846463292, | |
| "grad_norm": 0.3794218987883004, | |
| "learning_rate": 7.63312248794353e-05, | |
| "loss": 0.5776, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1216895742541066, | |
| "grad_norm": 0.3574145388894302, | |
| "learning_rate": 7.626815805702741e-05, | |
| "loss": 0.5776, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1270533020449212, | |
| "grad_norm": 0.473623038030073, | |
| "learning_rate": 7.620458029001286e-05, | |
| "loss": 0.5687, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1324170298357359, | |
| "grad_norm": 0.513881497788833, | |
| "learning_rate": 7.61404924740731e-05, | |
| "loss": 0.571, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1377807576265504, | |
| "grad_norm": 0.5631346609022523, | |
| "learning_rate": 7.60758955120752e-05, | |
| "loss": 0.5784, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1431444854173651, | |
| "grad_norm": 0.6816476567358593, | |
| "learning_rate": 7.601079031405899e-05, | |
| "loss": 0.575, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1485082132081796, | |
| "grad_norm": 0.852628392408615, | |
| "learning_rate": 7.594517779722432e-05, | |
| "loss": 0.5799, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.1538719409989944, | |
| "grad_norm": 0.8589566472396788, | |
| "learning_rate": 7.587905888591818e-05, | |
| "loss": 0.5774, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1592356687898089, | |
| "grad_norm": 0.7183732341560192, | |
| "learning_rate": 7.581243451162157e-05, | |
| "loss": 0.5725, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1645993965806236, | |
| "grad_norm": 0.5764553522343966, | |
| "learning_rate": 7.57453056129365e-05, | |
| "loss": 0.571, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1699631243714381, | |
| "grad_norm": 0.4929901875121555, | |
| "learning_rate": 7.567767313557262e-05, | |
| "loss": 0.5757, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1753268521622529, | |
| "grad_norm": 0.4723018655052622, | |
| "learning_rate": 7.560953803233407e-05, | |
| "loss": 0.5707, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1806905799530674, | |
| "grad_norm": 0.4230581385616684, | |
| "learning_rate": 7.554090126310589e-05, | |
| "loss": 0.5741, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1860543077438819, | |
| "grad_norm": 0.38511465865648165, | |
| "learning_rate": 7.547176379484063e-05, | |
| "loss": 0.5703, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.1914180355346966, | |
| "grad_norm": 0.3152141271515254, | |
| "learning_rate": 7.540212660154462e-05, | |
| "loss": 0.5746, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1967817633255113, | |
| "grad_norm": 0.31639051914639366, | |
| "learning_rate": 7.533199066426435e-05, | |
| "loss": 0.5746, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.2021454911163258, | |
| "grad_norm": 0.377554126720378, | |
| "learning_rate": 7.526135697107258e-05, | |
| "loss": 0.5679, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2075092189071404, | |
| "grad_norm": 0.3964575276420449, | |
| "learning_rate": 7.519022651705441e-05, | |
| "loss": 0.5703, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.212872946697955, | |
| "grad_norm": 0.3112698936775079, | |
| "learning_rate": 7.511860030429333e-05, | |
| "loss": 0.5679, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2182366744887696, | |
| "grad_norm": 0.32379714073410504, | |
| "learning_rate": 7.504647934185706e-05, | |
| "loss": 0.5693, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2236004022795843, | |
| "grad_norm": 0.4040411094185397, | |
| "learning_rate": 7.497386464578329e-05, | |
| "loss": 0.5731, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2289641300703988, | |
| "grad_norm": 0.4908973335455194, | |
| "learning_rate": 7.490075723906548e-05, | |
| "loss": 0.5698, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2343278578612136, | |
| "grad_norm": 0.5659873818172788, | |
| "learning_rate": 7.482715815163833e-05, | |
| "loss": 0.5677, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.239691585652028, | |
| "grad_norm": 0.6149372973578214, | |
| "learning_rate": 7.475306842036336e-05, | |
| "loss": 0.5686, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2450553134428428, | |
| "grad_norm": 0.6411124815404463, | |
| "learning_rate": 7.467848908901422e-05, | |
| "loss": 0.5731, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2504190412336573, | |
| "grad_norm": 0.7525181257988096, | |
| "learning_rate": 7.460342120826207e-05, | |
| "loss": 0.5634, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.255782769024472, | |
| "grad_norm": 0.8271566546622127, | |
| "learning_rate": 7.452786583566072e-05, | |
| "loss": 0.5741, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.2611464968152866, | |
| "grad_norm": 0.886416613492408, | |
| "learning_rate": 7.445182403563176e-05, | |
| "loss": 0.5728, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2665102246061013, | |
| "grad_norm": 0.8924881545698343, | |
| "learning_rate": 7.437529687944951e-05, | |
| "loss": 0.5717, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.2718739523969158, | |
| "grad_norm": 0.7745460347324375, | |
| "learning_rate": 7.429828544522604e-05, | |
| "loss": 0.5722, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2772376801877305, | |
| "grad_norm": 0.7338807909601145, | |
| "learning_rate": 7.422079081789587e-05, | |
| "loss": 0.5695, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.282601407978545, | |
| "grad_norm": 0.7633048091889993, | |
| "learning_rate": 7.414281408920074e-05, | |
| "loss": 0.5671, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.2879651357693598, | |
| "grad_norm": 0.7923736165710045, | |
| "learning_rate": 7.40643563576742e-05, | |
| "loss": 0.5678, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2933288635601743, | |
| "grad_norm": 0.7486989851097655, | |
| "learning_rate": 7.398541872862619e-05, | |
| "loss": 0.5667, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.298692591350989, | |
| "grad_norm": 0.6208239838109744, | |
| "learning_rate": 7.390600231412737e-05, | |
| "loss": 0.5736, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.3040563191418035, | |
| "grad_norm": 0.7404604089133735, | |
| "learning_rate": 7.382610823299359e-05, | |
| "loss": 0.5727, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.309420046932618, | |
| "grad_norm": 0.8945885885911449, | |
| "learning_rate": 7.374573761077001e-05, | |
| "loss": 0.5711, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3147837747234328, | |
| "grad_norm": 0.7417028846645015, | |
| "learning_rate": 7.366489157971525e-05, | |
| "loss": 0.5632, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.3201475025142475, | |
| "grad_norm": 0.40901103836642044, | |
| "learning_rate": 7.358357127878557e-05, | |
| "loss": 0.5643, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.325511230305062, | |
| "grad_norm": 0.4165629890181651, | |
| "learning_rate": 7.35017778536186e-05, | |
| "loss": 0.5655, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.3308749580958765, | |
| "grad_norm": 0.5243219329421154, | |
| "learning_rate": 7.341951245651747e-05, | |
| "loss": 0.5633, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3362386858866913, | |
| "grad_norm": 0.4504046228772055, | |
| "learning_rate": 7.333677624643431e-05, | |
| "loss": 0.566, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.341602413677506, | |
| "grad_norm": 0.35827695239534885, | |
| "learning_rate": 7.325357038895413e-05, | |
| "loss": 0.5743, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3469661414683205, | |
| "grad_norm": 0.32152110100309084, | |
| "learning_rate": 7.316989605627825e-05, | |
| "loss": 0.5629, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.352329869259135, | |
| "grad_norm": 0.30834448489072347, | |
| "learning_rate": 7.308575442720796e-05, | |
| "loss": 0.5657, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.3576935970499497, | |
| "grad_norm": 0.27834088197947626, | |
| "learning_rate": 7.300114668712767e-05, | |
| "loss": 0.5644, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3630573248407643, | |
| "grad_norm": 0.2294870511600659, | |
| "learning_rate": 7.291607402798843e-05, | |
| "loss": 0.5663, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 0.31049794029517497, | |
| "learning_rate": 7.283053764829106e-05, | |
| "loss": 0.5662, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.3737847804223935, | |
| "grad_norm": 0.3317352484927729, | |
| "learning_rate": 7.274453875306922e-05, | |
| "loss": 0.5624, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.3791485082132082, | |
| "grad_norm": 0.2991395872386746, | |
| "learning_rate": 7.265807855387251e-05, | |
| "loss": 0.5671, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.3845122360040227, | |
| "grad_norm": 0.36719705545840825, | |
| "learning_rate": 7.257115826874931e-05, | |
| "loss": 0.5696, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.3898759637948375, | |
| "grad_norm": 0.42253987097227313, | |
| "learning_rate": 7.248377912222974e-05, | |
| "loss": 0.5619, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.395239691585652, | |
| "grad_norm": 0.45185019659051934, | |
| "learning_rate": 7.239594234530831e-05, | |
| "loss": 0.5635, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4006034193764667, | |
| "grad_norm": 0.49545413351898454, | |
| "learning_rate": 7.23076491754266e-05, | |
| "loss": 0.5634, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.4059671471672812, | |
| "grad_norm": 0.5802415947227182, | |
| "learning_rate": 7.221890085645588e-05, | |
| "loss": 0.5686, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.411330874958096, | |
| "grad_norm": 0.6878874855186285, | |
| "learning_rate": 7.212969863867953e-05, | |
| "loss": 0.5648, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.4166946027489105, | |
| "grad_norm": 0.815358307587891, | |
| "learning_rate": 7.204004377877539e-05, | |
| "loss": 0.5676, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.4220583305397252, | |
| "grad_norm": 0.7718060228087203, | |
| "learning_rate": 7.194993753979818e-05, | |
| "loss": 0.567, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4274220583305397, | |
| "grad_norm": 0.5699309161254399, | |
| "learning_rate": 7.185938119116161e-05, | |
| "loss": 0.5613, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4327857861213542, | |
| "grad_norm": 0.5107074162600784, | |
| "learning_rate": 7.176837600862049e-05, | |
| "loss": 0.5658, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.438149513912169, | |
| "grad_norm": 0.6522642870784296, | |
| "learning_rate": 7.167692327425282e-05, | |
| "loss": 0.5717, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4435132417029837, | |
| "grad_norm": 0.7567797167312894, | |
| "learning_rate": 7.15850242764417e-05, | |
| "loss": 0.5678, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.4488769694937982, | |
| "grad_norm": 0.5791144265821572, | |
| "learning_rate": 7.149268030985714e-05, | |
| "loss": 0.5689, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.4542406972846127, | |
| "grad_norm": 0.3877687625030928, | |
| "learning_rate": 7.139989267543787e-05, | |
| "loss": 0.5674, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.4596044250754274, | |
| "grad_norm": 0.5209778834900987, | |
| "learning_rate": 7.130666268037303e-05, | |
| "loss": 0.5599, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.4649681528662422, | |
| "grad_norm": 0.467485068394913, | |
| "learning_rate": 7.121299163808368e-05, | |
| "loss": 0.5666, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4703318806570567, | |
| "grad_norm": 0.3765790034913379, | |
| "learning_rate": 7.111888086820435e-05, | |
| "loss": 0.5687, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.4756956084478712, | |
| "grad_norm": 0.4702163124649037, | |
| "learning_rate": 7.102433169656445e-05, | |
| "loss": 0.5653, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.481059336238686, | |
| "grad_norm": 0.4595483441798163, | |
| "learning_rate": 7.092934545516958e-05, | |
| "loss": 0.5668, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.4864230640295006, | |
| "grad_norm": 0.3806817840099515, | |
| "learning_rate": 7.083392348218274e-05, | |
| "loss": 0.5575, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.4917867918203152, | |
| "grad_norm": 0.2933836832087611, | |
| "learning_rate": 7.073806712190551e-05, | |
| "loss": 0.566, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.4971505196111297, | |
| "grad_norm": 0.29502601995192856, | |
| "learning_rate": 7.064177772475912e-05, | |
| "loss": 0.5579, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.5025142474019444, | |
| "grad_norm": 0.2446527849116459, | |
| "learning_rate": 7.054505664726542e-05, | |
| "loss": 0.5588, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.5078779751927591, | |
| "grad_norm": 0.23256693103187187, | |
| "learning_rate": 7.044790525202772e-05, | |
| "loss": 0.5575, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.5132417029835736, | |
| "grad_norm": 0.31883595054809466, | |
| "learning_rate": 7.035032490771165e-05, | |
| "loss": 0.5608, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.5186054307743881, | |
| "grad_norm": 0.35371535591743564, | |
| "learning_rate": 7.025231698902585e-05, | |
| "loss": 0.5622, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.5239691585652029, | |
| "grad_norm": 0.30500733849822, | |
| "learning_rate": 7.015388287670264e-05, | |
| "loss": 0.561, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5293328863560174, | |
| "grad_norm": 0.27539890689680196, | |
| "learning_rate": 7.005502395747854e-05, | |
| "loss": 0.5632, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.534696614146832, | |
| "grad_norm": 0.3163401305287628, | |
| "learning_rate": 6.995574162407471e-05, | |
| "loss": 0.5647, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.5400603419376466, | |
| "grad_norm": 0.3537933994086675, | |
| "learning_rate": 6.985603727517736e-05, | |
| "loss": 0.5559, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.5454240697284614, | |
| "grad_norm": 0.40878100621394714, | |
| "learning_rate": 6.975591231541805e-05, | |
| "loss": 0.5602, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5507877975192759, | |
| "grad_norm": 0.44323300216184397, | |
| "learning_rate": 6.96553681553539e-05, | |
| "loss": 0.5628, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.5561515253100904, | |
| "grad_norm": 0.4339681253670531, | |
| "learning_rate": 6.955440621144766e-05, | |
| "loss": 0.5622, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.561515253100905, | |
| "grad_norm": 0.42574808327461705, | |
| "learning_rate": 6.945302790604789e-05, | |
| "loss": 0.5656, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.5668789808917198, | |
| "grad_norm": 0.49984403935729443, | |
| "learning_rate": 6.935123466736878e-05, | |
| "loss": 0.5615, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.5722427086825344, | |
| "grad_norm": 0.6728978442736605, | |
| "learning_rate": 6.92490279294701e-05, | |
| "loss": 0.5612, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.5776064364733489, | |
| "grad_norm": 0.7429424392949961, | |
| "learning_rate": 6.914640913223695e-05, | |
| "loss": 0.5666, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.5829701642641636, | |
| "grad_norm": 0.6836320262283112, | |
| "learning_rate": 6.904337972135958e-05, | |
| "loss": 0.5607, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.5883338920549783, | |
| "grad_norm": 0.5867869403119099, | |
| "learning_rate": 6.893994114831287e-05, | |
| "loss": 0.5608, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.5936976198457928, | |
| "grad_norm": 0.5953453881284777, | |
| "learning_rate": 6.883609487033605e-05, | |
| "loss": 0.56, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.5990613476366073, | |
| "grad_norm": 0.6338055543705274, | |
| "learning_rate": 6.8731842350412e-05, | |
| "loss": 0.5641, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.604425075427422, | |
| "grad_norm": 0.6648867991249984, | |
| "learning_rate": 6.862718505724677e-05, | |
| "loss": 0.566, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.6097888032182368, | |
| "grad_norm": 0.7077186281640041, | |
| "learning_rate": 6.852212446524881e-05, | |
| "loss": 0.5603, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6151525310090513, | |
| "grad_norm": 0.7046651305076743, | |
| "learning_rate": 6.84166620545083e-05, | |
| "loss": 0.5619, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.6205162587998658, | |
| "grad_norm": 0.5221466006404026, | |
| "learning_rate": 6.831079931077615e-05, | |
| "loss": 0.5561, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.6258799865906806, | |
| "grad_norm": 0.37944143421384124, | |
| "learning_rate": 6.820453772544318e-05, | |
| "loss": 0.5631, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.6312437143814953, | |
| "grad_norm": 0.5844863656673001, | |
| "learning_rate": 6.80978787955191e-05, | |
| "loss": 0.5545, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.6366074421723098, | |
| "grad_norm": 0.5727271056764134, | |
| "learning_rate": 6.799082402361131e-05, | |
| "loss": 0.5602, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.6419711699631243, | |
| "grad_norm": 0.3690953808626758, | |
| "learning_rate": 6.788337491790397e-05, | |
| "loss": 0.5613, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.647334897753939, | |
| "grad_norm": 0.31408506346634396, | |
| "learning_rate": 6.777553299213646e-05, | |
| "loss": 0.5587, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.6526986255447536, | |
| "grad_norm": 0.36584423383126785, | |
| "learning_rate": 6.766729976558226e-05, | |
| "loss": 0.5579, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.658062353335568, | |
| "grad_norm": 0.34138280754486466, | |
| "learning_rate": 6.755867676302747e-05, | |
| "loss": 0.5597, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.6634260811263828, | |
| "grad_norm": 0.2631015494780705, | |
| "learning_rate": 6.744966551474936e-05, | |
| "loss": 0.558, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6687898089171975, | |
| "grad_norm": 0.34093472748141784, | |
| "learning_rate": 6.734026755649474e-05, | |
| "loss": 0.5619, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.674153536708012, | |
| "grad_norm": 0.43169917288860177, | |
| "learning_rate": 6.723048442945845e-05, | |
| "loss": 0.5617, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.6795172644988265, | |
| "grad_norm": 0.4064955294037096, | |
| "learning_rate": 6.712031768026154e-05, | |
| "loss": 0.562, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.6848809922896413, | |
| "grad_norm": 0.4126204222058468, | |
| "learning_rate": 6.70097688609295e-05, | |
| "loss": 0.5592, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.690244720080456, | |
| "grad_norm": 0.4791759638439358, | |
| "learning_rate": 6.689883952887042e-05, | |
| "loss": 0.5612, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.6956084478712705, | |
| "grad_norm": 0.5431515205787689, | |
| "learning_rate": 6.678753124685306e-05, | |
| "loss": 0.5653, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.700972175662085, | |
| "grad_norm": 0.6320848556741568, | |
| "learning_rate": 6.667584558298481e-05, | |
| "loss": 0.5576, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.7063359034528998, | |
| "grad_norm": 0.7004122845515078, | |
| "learning_rate": 6.656378411068958e-05, | |
| "loss": 0.5609, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.7116996312437145, | |
| "grad_norm": 0.6089676867039382, | |
| "learning_rate": 6.645134840868566e-05, | |
| "loss": 0.5647, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.717063359034529, | |
| "grad_norm": 0.4272876666051787, | |
| "learning_rate": 6.633854006096351e-05, | |
| "loss": 0.5579, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7224270868253435, | |
| "grad_norm": 0.4606786107127789, | |
| "learning_rate": 6.622536065676338e-05, | |
| "loss": 0.5598, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.7277908146161582, | |
| "grad_norm": 0.5009778503199392, | |
| "learning_rate": 6.611181179055296e-05, | |
| "loss": 0.5581, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.733154542406973, | |
| "grad_norm": 0.4252208216862443, | |
| "learning_rate": 6.599789506200491e-05, | |
| "loss": 0.5598, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.7385182701977875, | |
| "grad_norm": 0.3441171701556595, | |
| "learning_rate": 6.588361207597432e-05, | |
| "loss": 0.5592, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.743881997988602, | |
| "grad_norm": 0.3879024267741338, | |
| "learning_rate": 6.576896444247609e-05, | |
| "loss": 0.5579, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7492457257794167, | |
| "grad_norm": 0.30111558862635124, | |
| "learning_rate": 6.565395377666228e-05, | |
| "loss": 0.5594, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.7546094535702315, | |
| "grad_norm": 0.300732536138267, | |
| "learning_rate": 6.553858169879934e-05, | |
| "loss": 0.56, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.759973181361046, | |
| "grad_norm": 0.340331678535412, | |
| "learning_rate": 6.542284983424528e-05, | |
| "loss": 0.5558, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.7653369091518605, | |
| "grad_norm": 0.3902331189981612, | |
| "learning_rate": 6.530675981342674e-05, | |
| "loss": 0.5586, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.7707006369426752, | |
| "grad_norm": 0.43726753368459903, | |
| "learning_rate": 6.51903132718161e-05, | |
| "loss": 0.5525, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7760643647334897, | |
| "grad_norm": 0.39280624763313293, | |
| "learning_rate": 6.507351184990837e-05, | |
| "loss": 0.555, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.7814280925243042, | |
| "grad_norm": 0.4556458085809101, | |
| "learning_rate": 6.49563571931981e-05, | |
| "loss": 0.5605, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.786791820315119, | |
| "grad_norm": 0.47834893977036586, | |
| "learning_rate": 6.48388509521562e-05, | |
| "loss": 0.5619, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.7921555481059337, | |
| "grad_norm": 0.37688804258392605, | |
| "learning_rate": 6.47209947822067e-05, | |
| "loss": 0.5591, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.7975192758967482, | |
| "grad_norm": 0.31185260137172, | |
| "learning_rate": 6.46027903437034e-05, | |
| "loss": 0.5601, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.8028830036875627, | |
| "grad_norm": 0.38273388647684164, | |
| "learning_rate": 6.448423930190653e-05, | |
| "loss": 0.5586, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.8082467314783774, | |
| "grad_norm": 0.3659890752118683, | |
| "learning_rate": 6.43653433269592e-05, | |
| "loss": 0.5593, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.8136104592691922, | |
| "grad_norm": 0.28697986611796433, | |
| "learning_rate": 6.424610409386396e-05, | |
| "loss": 0.5531, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.8189741870600067, | |
| "grad_norm": 0.3578108779196417, | |
| "learning_rate": 6.41265232824592e-05, | |
| "loss": 0.5576, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.8243379148508212, | |
| "grad_norm": 0.3595477545374786, | |
| "learning_rate": 6.40066025773954e-05, | |
| "loss": 0.5536, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.829701642641636, | |
| "grad_norm": 0.24495176220429252, | |
| "learning_rate": 6.388634366811146e-05, | |
| "loss": 0.5561, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8350653704324507, | |
| "grad_norm": 0.34910710828586194, | |
| "learning_rate": 6.376574824881092e-05, | |
| "loss": 0.5554, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.8404290982232652, | |
| "grad_norm": 0.4115446924161809, | |
| "learning_rate": 6.364481801843802e-05, | |
| "loss": 0.556, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.8457928260140797, | |
| "grad_norm": 0.45492770981427666, | |
| "learning_rate": 6.352355468065386e-05, | |
| "loss": 0.5574, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.8511565538048944, | |
| "grad_norm": 0.4502627142016844, | |
| "learning_rate": 6.34019599438123e-05, | |
| "loss": 0.5578, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.8565202815957091, | |
| "grad_norm": 0.3764500404139869, | |
| "learning_rate": 6.328003552093597e-05, | |
| "loss": 0.5554, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8618840093865237, | |
| "grad_norm": 0.3785656825858741, | |
| "learning_rate": 6.315778312969208e-05, | |
| "loss": 0.5551, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.8672477371773382, | |
| "grad_norm": 0.43002728271919993, | |
| "learning_rate": 6.303520449236827e-05, | |
| "loss": 0.5502, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.872611464968153, | |
| "grad_norm": 0.3968933759349479, | |
| "learning_rate": 6.291230133584829e-05, | |
| "loss": 0.5563, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.8779751927589676, | |
| "grad_norm": 0.3445454221288414, | |
| "learning_rate": 6.278907539158775e-05, | |
| "loss": 0.5593, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8833389205497821, | |
| "grad_norm": 0.26807986583588095, | |
| "learning_rate": 6.266552839558965e-05, | |
| "loss": 0.5495, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.8887026483405966, | |
| "grad_norm": 0.25008221285974236, | |
| "learning_rate": 6.254166208837998e-05, | |
| "loss": 0.5517, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.8940663761314114, | |
| "grad_norm": 0.24820258665637415, | |
| "learning_rate": 6.241747821498315e-05, | |
| "loss": 0.5522, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.899430103922226, | |
| "grad_norm": 0.27580712951039277, | |
| "learning_rate": 6.229297852489746e-05, | |
| "loss": 0.5583, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.9047938317130404, | |
| "grad_norm": 0.25679911776420106, | |
| "learning_rate": 6.21681647720704e-05, | |
| "loss": 0.5499, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.9101575595038551, | |
| "grad_norm": 0.2197401652483747, | |
| "learning_rate": 6.204303871487399e-05, | |
| "loss": 0.5549, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.9155212872946699, | |
| "grad_norm": 0.2598523527517371, | |
| "learning_rate": 6.191760211607995e-05, | |
| "loss": 0.5581, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.9208850150854844, | |
| "grad_norm": 0.25865343855556444, | |
| "learning_rate": 6.179185674283493e-05, | |
| "loss": 0.5561, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.9262487428762989, | |
| "grad_norm": 0.23265157023397873, | |
| "learning_rate": 6.166580436663557e-05, | |
| "loss": 0.5538, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.9316124706671136, | |
| "grad_norm": 0.21694798938619186, | |
| "learning_rate": 6.153944676330357e-05, | |
| "loss": 0.5488, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9369761984579283, | |
| "grad_norm": 0.2266361665583917, | |
| "learning_rate": 6.141278571296064e-05, | |
| "loss": 0.5479, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.9423399262487429, | |
| "grad_norm": 0.20869559940686064, | |
| "learning_rate": 6.128582300000345e-05, | |
| "loss": 0.5507, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.9477036540395574, | |
| "grad_norm": 0.24712363397279544, | |
| "learning_rate": 6.11585604130785e-05, | |
| "loss": 0.5521, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.953067381830372, | |
| "grad_norm": 0.2625049015072699, | |
| "learning_rate": 6.103099974505689e-05, | |
| "loss": 0.5538, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.9584311096211868, | |
| "grad_norm": 0.26045303486127896, | |
| "learning_rate": 6.0903142793009055e-05, | |
| "loss": 0.5521, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.9637948374120013, | |
| "grad_norm": 0.2729210974767953, | |
| "learning_rate": 6.0774991358179505e-05, | |
| "loss": 0.5545, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9691585652028158, | |
| "grad_norm": 0.2661858868407906, | |
| "learning_rate": 6.064654724596141e-05, | |
| "loss": 0.5565, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.9745222929936306, | |
| "grad_norm": 0.3161992287956601, | |
| "learning_rate": 6.051781226587114e-05, | |
| "loss": 0.5566, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.9798860207844453, | |
| "grad_norm": 0.3247068065937088, | |
| "learning_rate": 6.038878823152283e-05, | |
| "loss": 0.5485, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.9852497485752598, | |
| "grad_norm": 0.31042344714670567, | |
| "learning_rate": 6.0259476960602795e-05, | |
| "loss": 0.5529, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9906134763660743, | |
| "grad_norm": 0.28663489099284756, | |
| "learning_rate": 6.012988027484392e-05, | |
| "loss": 0.553, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.995977204156889, | |
| "grad_norm": 0.32048941108555556, | |
| "learning_rate": 6.000000000000001e-05, | |
| "loss": 0.5491, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.0020113979215557, | |
| "grad_norm": 0.5619929857332273, | |
| "learning_rate": 5.9869837965820064e-05, | |
| "loss": 0.9077, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.00737512571237, | |
| "grad_norm": 0.9276754960897458, | |
| "learning_rate": 5.973939600602251e-05, | |
| "loss": 0.5122, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.0127388535031847, | |
| "grad_norm": 1.1885282416108998, | |
| "learning_rate": 5.960867595826934e-05, | |
| "loss": 0.5236, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.0181025812939994, | |
| "grad_norm": 0.6750751948664354, | |
| "learning_rate": 5.9477679664140256e-05, | |
| "loss": 0.5117, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.0234663090848137, | |
| "grad_norm": 0.7850363466297992, | |
| "learning_rate": 5.934640896910668e-05, | |
| "loss": 0.5088, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.0288300368756285, | |
| "grad_norm": 0.9014636634663518, | |
| "learning_rate": 5.921486572250582e-05, | |
| "loss": 0.5097, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.034193764666443, | |
| "grad_norm": 0.8754469407524593, | |
| "learning_rate": 5.908305177751457e-05, | |
| "loss": 0.5147, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.039557492457258, | |
| "grad_norm": 0.4769303794801558, | |
| "learning_rate": 5.89509689911234e-05, | |
| "loss": 0.5087, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.044921220248072, | |
| "grad_norm": 0.7004101867500354, | |
| "learning_rate": 5.881861922411023e-05, | |
| "loss": 0.5088, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.050284948038887, | |
| "grad_norm": 0.518970595004128, | |
| "learning_rate": 5.8686004341014175e-05, | |
| "loss": 0.5106, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.0556486758297017, | |
| "grad_norm": 0.562641653776111, | |
| "learning_rate": 5.855312621010932e-05, | |
| "loss": 0.5076, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.0610124036205164, | |
| "grad_norm": 0.6089557991308883, | |
| "learning_rate": 5.841998670337834e-05, | |
| "loss": 0.5098, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.0663761314113307, | |
| "grad_norm": 0.3745335212494372, | |
| "learning_rate": 5.828658769648621e-05, | |
| "loss": 0.5107, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.0717398592021454, | |
| "grad_norm": 0.5038019010238222, | |
| "learning_rate": 5.815293106875369e-05, | |
| "loss": 0.5086, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.07710358699296, | |
| "grad_norm": 0.4417411581025683, | |
| "learning_rate": 5.8019018703130924e-05, | |
| "loss": 0.5082, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.082467314783775, | |
| "grad_norm": 0.4645390192932457, | |
| "learning_rate": 5.788485248617088e-05, | |
| "loss": 0.5088, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.087831042574589, | |
| "grad_norm": 0.38534363222977375, | |
| "learning_rate": 5.775043430800274e-05, | |
| "loss": 0.5044, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.093194770365404, | |
| "grad_norm": 0.3177873591397125, | |
| "learning_rate": 5.761576606230538e-05, | |
| "loss": 0.5037, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.0985584981562186, | |
| "grad_norm": 0.31327769202934297, | |
| "learning_rate": 5.7480849646280536e-05, | |
| "loss": 0.5065, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.1039222259470334, | |
| "grad_norm": 0.3096007175172773, | |
| "learning_rate": 5.7345686960626216e-05, | |
| "loss": 0.5089, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.1092859537378477, | |
| "grad_norm": 0.3250396986727379, | |
| "learning_rate": 5.7210279909509846e-05, | |
| "loss": 0.5075, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.1146496815286624, | |
| "grad_norm": 0.3335529585038257, | |
| "learning_rate": 5.707463040054147e-05, | |
| "loss": 0.5034, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.120013409319477, | |
| "grad_norm": 0.27510903683348653, | |
| "learning_rate": 5.693874034474686e-05, | |
| "loss": 0.5058, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.125377137110292, | |
| "grad_norm": 0.30416211894239636, | |
| "learning_rate": 5.6802611656540605e-05, | |
| "loss": 0.5052, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.130740864901106, | |
| "grad_norm": 0.3309159515345732, | |
| "learning_rate": 5.666624625369915e-05, | |
| "loss": 0.5011, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.136104592691921, | |
| "grad_norm": 0.2633474754073962, | |
| "learning_rate": 5.652964605733378e-05, | |
| "loss": 0.5031, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.1414683204827356, | |
| "grad_norm": 0.23995436691844568, | |
| "learning_rate": 5.6392812991863505e-05, | |
| "loss": 0.5013, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.1468320482735503, | |
| "grad_norm": 0.21205441071144687, | |
| "learning_rate": 5.6255748984988026e-05, | |
| "loss": 0.5026, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.1521957760643646, | |
| "grad_norm": 0.1978028326665556, | |
| "learning_rate": 5.61184559676605e-05, | |
| "loss": 0.5029, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.1575595038551794, | |
| "grad_norm": 0.23744700406570274, | |
| "learning_rate": 5.598093587406042e-05, | |
| "loss": 0.5036, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.162923231645994, | |
| "grad_norm": 0.22982879915535342, | |
| "learning_rate": 5.584319064156628e-05, | |
| "loss": 0.5007, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.1682869594368084, | |
| "grad_norm": 0.2332943467736946, | |
| "learning_rate": 5.570522221072835e-05, | |
| "loss": 0.5023, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.173650687227623, | |
| "grad_norm": 0.21085441964760665, | |
| "learning_rate": 5.5567032525241315e-05, | |
| "loss": 0.5009, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.179014415018438, | |
| "grad_norm": 0.22875835498415426, | |
| "learning_rate": 5.542862353191686e-05, | |
| "loss": 0.5004, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.1843781428092526, | |
| "grad_norm": 0.2508518468627133, | |
| "learning_rate": 5.528999718065629e-05, | |
| "loss": 0.5027, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.189741870600067, | |
| "grad_norm": 0.2133758915716313, | |
| "learning_rate": 5.515115542442305e-05, | |
| "loss": 0.5039, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.1951055983908816, | |
| "grad_norm": 0.21013033352445007, | |
| "learning_rate": 5.501210021921518e-05, | |
| "loss": 0.509, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.2004693261816963, | |
| "grad_norm": 0.24089413117157768, | |
| "learning_rate": 5.48728335240378e-05, | |
| "loss": 0.5034, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.205833053972511, | |
| "grad_norm": 0.2673713830759583, | |
| "learning_rate": 5.47333573008755e-05, | |
| "loss": 0.5003, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.2111967817633253, | |
| "grad_norm": 0.26775112246936006, | |
| "learning_rate": 5.459367351466466e-05, | |
| "loss": 0.5023, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.21656050955414, | |
| "grad_norm": 0.29448398829721556, | |
| "learning_rate": 5.445378413326582e-05, | |
| "loss": 0.5026, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.221924237344955, | |
| "grad_norm": 0.26959315515895454, | |
| "learning_rate": 5.4313691127435975e-05, | |
| "loss": 0.5037, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.2272879651357695, | |
| "grad_norm": 0.21092593980509475, | |
| "learning_rate": 5.417339647080071e-05, | |
| "loss": 0.5018, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.232651692926584, | |
| "grad_norm": 0.27287941846913594, | |
| "learning_rate": 5.40329021398265e-05, | |
| "loss": 0.504, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.2380154207173986, | |
| "grad_norm": 0.2243145959214538, | |
| "learning_rate": 5.389221011379281e-05, | |
| "loss": 0.5016, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.2433791485082133, | |
| "grad_norm": 0.23263330992814404, | |
| "learning_rate": 5.3751322374764254e-05, | |
| "loss": 0.496, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.248742876299028, | |
| "grad_norm": 0.2512271712538734, | |
| "learning_rate": 5.361024090756259e-05, | |
| "loss": 0.5035, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.2541066040898423, | |
| "grad_norm": 0.2551912835527355, | |
| "learning_rate": 5.346896769973886e-05, | |
| "loss": 0.5007, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.259470331880657, | |
| "grad_norm": 0.22385870672760472, | |
| "learning_rate": 5.3327504741545326e-05, | |
| "loss": 0.5029, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.2648340596714718, | |
| "grad_norm": 0.19743299945786424, | |
| "learning_rate": 5.318585402590745e-05, | |
| "loss": 0.5028, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.270197787462286, | |
| "grad_norm": 0.20277764076614224, | |
| "learning_rate": 5.3044017548395804e-05, | |
| "loss": 0.5005, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.275561515253101, | |
| "grad_norm": 0.19076747830149005, | |
| "learning_rate": 5.290199730719798e-05, | |
| "loss": 0.4979, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.2809252430439155, | |
| "grad_norm": 0.20942827686948592, | |
| "learning_rate": 5.275979530309042e-05, | |
| "loss": 0.5038, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.2862889708347303, | |
| "grad_norm": 0.21537452339336768, | |
| "learning_rate": 5.2617413539410236e-05, | |
| "loss": 0.5053, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.291652698625545, | |
| "grad_norm": 0.25627491704457783, | |
| "learning_rate": 5.247485402202697e-05, | |
| "loss": 0.4995, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.2970164264163593, | |
| "grad_norm": 0.2829522803374003, | |
| "learning_rate": 5.2332118759314394e-05, | |
| "loss": 0.4969, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.302380154207174, | |
| "grad_norm": 0.22098956424737304, | |
| "learning_rate": 5.218920976212215e-05, | |
| "loss": 0.504, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.3077438819979887, | |
| "grad_norm": 0.2239527428611037, | |
| "learning_rate": 5.204612904374745e-05, | |
| "loss": 0.5001, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.313107609788803, | |
| "grad_norm": 0.20447198338227954, | |
| "learning_rate": 5.1902878619906694e-05, | |
| "loss": 0.5008, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.3184713375796178, | |
| "grad_norm": 0.1994784390562372, | |
| "learning_rate": 5.1759460508707085e-05, | |
| "loss": 0.4954, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.3238350653704325, | |
| "grad_norm": 0.20376249784991973, | |
| "learning_rate": 5.1615876730618226e-05, | |
| "loss": 0.5051, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.3291987931612472, | |
| "grad_norm": 0.1777004640090449, | |
| "learning_rate": 5.1472129308443616e-05, | |
| "loss": 0.5059, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.3345625209520615, | |
| "grad_norm": 0.17919423701924383, | |
| "learning_rate": 5.132822026729216e-05, | |
| "loss": 0.4997, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.3399262487428762, | |
| "grad_norm": 0.1952145263768232, | |
| "learning_rate": 5.118415163454968e-05, | |
| "loss": 0.4964, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.345289976533691, | |
| "grad_norm": 0.23136811372687383, | |
| "learning_rate": 5.1039925439850244e-05, | |
| "loss": 0.4853, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.3506537043245057, | |
| "grad_norm": 0.2527914130838122, | |
| "learning_rate": 5.0895543715047737e-05, | |
| "loss": 0.5063, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.35601743211532, | |
| "grad_norm": 0.23487193932679698, | |
| "learning_rate": 5.075100849418708e-05, | |
| "loss": 0.5067, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.3613811599061347, | |
| "grad_norm": 0.17865390789281577, | |
| "learning_rate": 5.060632181347568e-05, | |
| "loss": 0.5051, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.3667448876969495, | |
| "grad_norm": 0.19380856376412295, | |
| "learning_rate": 5.046148571125468e-05, | |
| "loss": 0.5012, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.3721086154877637, | |
| "grad_norm": 0.2136191065502088, | |
| "learning_rate": 5.031650222797028e-05, | |
| "loss": 0.4994, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.3774723432785785, | |
| "grad_norm": 0.26432526505519316, | |
| "learning_rate": 5.0171373406144985e-05, | |
| "loss": 0.4977, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.382836071069393, | |
| "grad_norm": 0.27436611300421115, | |
| "learning_rate": 5.002610129034883e-05, | |
| "loss": 0.5019, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.388199798860208, | |
| "grad_norm": 0.20385491901178016, | |
| "learning_rate": 4.9880687927170534e-05, | |
| "loss": 0.4977, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.3935635266510227, | |
| "grad_norm": 0.16876671632134482, | |
| "learning_rate": 4.973513536518875e-05, | |
| "loss": 0.4975, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.398927254441837, | |
| "grad_norm": 0.1925770688555282, | |
| "learning_rate": 4.958944565494314e-05, | |
| "loss": 0.5, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.4042909822326517, | |
| "grad_norm": 0.20179672034929158, | |
| "learning_rate": 4.944362084890548e-05, | |
| "loss": 0.4984, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.4096547100234664, | |
| "grad_norm": 0.21265428318185095, | |
| "learning_rate": 4.929766300145083e-05, | |
| "loss": 0.4974, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.4150184378142807, | |
| "grad_norm": 0.25978527351861586, | |
| "learning_rate": 4.915157416882849e-05, | |
| "loss": 0.5055, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.4203821656050954, | |
| "grad_norm": 0.21775715781130206, | |
| "learning_rate": 4.9005356409133044e-05, | |
| "loss": 0.5016, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.42574589339591, | |
| "grad_norm": 0.19027697490800918, | |
| "learning_rate": 4.88590117822755e-05, | |
| "loss": 0.5022, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.431109621186725, | |
| "grad_norm": 0.22937605026076494, | |
| "learning_rate": 4.871254234995406e-05, | |
| "loss": 0.5041, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.436473348977539, | |
| "grad_norm": 0.24637066107726857, | |
| "learning_rate": 4.856595017562525e-05, | |
| "loss": 0.5024, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.441837076768354, | |
| "grad_norm": 0.21654301339170334, | |
| "learning_rate": 4.8419237324474734e-05, | |
| "loss": 0.499, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.4472008045591687, | |
| "grad_norm": 0.23644059339454185, | |
| "learning_rate": 4.82724058633883e-05, | |
| "loss": 0.5013, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.4525645323499834, | |
| "grad_norm": 0.22992449590363817, | |
| "learning_rate": 4.812545786092269e-05, | |
| "loss": 0.507, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.4579282601407977, | |
| "grad_norm": 0.23917593718094943, | |
| "learning_rate": 4.7978395387276475e-05, | |
| "loss": 0.5017, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.4632919879316124, | |
| "grad_norm": 0.20812657582063035, | |
| "learning_rate": 4.783122051426093e-05, | |
| "loss": 0.5019, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.468655715722427, | |
| "grad_norm": 0.16762169107118158, | |
| "learning_rate": 4.768393531527077e-05, | |
| "loss": 0.5096, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.474019443513242, | |
| "grad_norm": 0.21282987152563532, | |
| "learning_rate": 4.753654186525497e-05, | |
| "loss": 0.4999, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.479383171304056, | |
| "grad_norm": 0.21742448911503381, | |
| "learning_rate": 4.738904224068758e-05, | |
| "loss": 0.5036, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.484746899094871, | |
| "grad_norm": 0.1925300663006384, | |
| "learning_rate": 4.724143851953841e-05, | |
| "loss": 0.4977, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.4901106268856856, | |
| "grad_norm": 0.18087700742417484, | |
| "learning_rate": 4.7093732781243777e-05, | |
| "loss": 0.5079, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.4954743546765004, | |
| "grad_norm": 0.15639702761113433, | |
| "learning_rate": 4.694592710667723e-05, | |
| "loss": 0.5072, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.5008380824673146, | |
| "grad_norm": 0.19597235332511273, | |
| "learning_rate": 4.6798023578120184e-05, | |
| "loss": 0.5018, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.5062018102581294, | |
| "grad_norm": 0.1684316769198244, | |
| "learning_rate": 4.6650024279232666e-05, | |
| "loss": 0.4929, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.511565538048944, | |
| "grad_norm": 0.21055212869799955, | |
| "learning_rate": 4.650193129502386e-05, | |
| "loss": 0.5006, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.5169292658397584, | |
| "grad_norm": 0.24733144692822004, | |
| "learning_rate": 4.635374671182283e-05, | |
| "loss": 0.4994, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.522292993630573, | |
| "grad_norm": 0.24533597541006635, | |
| "learning_rate": 4.620547261724906e-05, | |
| "loss": 0.4958, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.527656721421388, | |
| "grad_norm": 0.2066880499616437, | |
| "learning_rate": 4.605711110018307e-05, | |
| "loss": 0.5005, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.5330204492122026, | |
| "grad_norm": 0.16991247454378994, | |
| "learning_rate": 4.590866425073698e-05, | |
| "loss": 0.4937, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.5383841770030173, | |
| "grad_norm": 0.1921709883210605, | |
| "learning_rate": 4.576013416022511e-05, | |
| "loss": 0.4986, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.5437479047938316, | |
| "grad_norm": 0.21100456607229467, | |
| "learning_rate": 4.5611522921134394e-05, | |
| "loss": 0.5096, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.5491116325846463, | |
| "grad_norm": 0.21922187130563403, | |
| "learning_rate": 4.546283262709506e-05, | |
| "loss": 0.4961, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.554475360375461, | |
| "grad_norm": 0.2138728259158541, | |
| "learning_rate": 4.531406537285103e-05, | |
| "loss": 0.4978, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.5598390881662754, | |
| "grad_norm": 0.17501395717864893, | |
| "learning_rate": 4.516522325423046e-05, | |
| "loss": 0.4978, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.56520281595709, | |
| "grad_norm": 0.2043784564828151, | |
| "learning_rate": 4.5016308368116155e-05, | |
| "loss": 0.5071, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.570566543747905, | |
| "grad_norm": 0.23306913353410968, | |
| "learning_rate": 4.486732281241611e-05, | |
| "loss": 0.4983, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.5759302715387196, | |
| "grad_norm": 0.2446931512816206, | |
| "learning_rate": 4.471826868603385e-05, | |
| "loss": 0.4967, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.5812939993295343, | |
| "grad_norm": 0.19818051787599317, | |
| "learning_rate": 4.456914808883898e-05, | |
| "loss": 0.5007, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.5866577271203486, | |
| "grad_norm": 0.16087879372692104, | |
| "learning_rate": 4.4419963121637526e-05, | |
| "loss": 0.4944, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.5920214549111633, | |
| "grad_norm": 0.20092825461048774, | |
| "learning_rate": 4.427071588614236e-05, | |
| "loss": 0.5002, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.597385182701978, | |
| "grad_norm": 0.2281785256323823, | |
| "learning_rate": 4.412140848494356e-05, | |
| "loss": 0.5031, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.6027489104927923, | |
| "grad_norm": 0.20204680377867434, | |
| "learning_rate": 4.397204302147886e-05, | |
| "loss": 0.496, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.608112638283607, | |
| "grad_norm": 0.1977652017995082, | |
| "learning_rate": 4.3822621600003934e-05, | |
| "loss": 0.4959, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.613476366074422, | |
| "grad_norm": 0.19244577039841376, | |
| "learning_rate": 4.367314632556281e-05, | |
| "loss": 0.4949, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.618840093865236, | |
| "grad_norm": 0.24680562797983854, | |
| "learning_rate": 4.3523619303958196e-05, | |
| "loss": 0.5001, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.624203821656051, | |
| "grad_norm": 0.2608000397905511, | |
| "learning_rate": 4.3374042641721787e-05, | |
| "loss": 0.5006, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.6295675494468655, | |
| "grad_norm": 0.16561810253679096, | |
| "learning_rate": 4.322441844608469e-05, | |
| "loss": 0.4972, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.6349312772376803, | |
| "grad_norm": 0.22607914890990072, | |
| "learning_rate": 4.3074748824947546e-05, | |
| "loss": 0.5002, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.640295005028495, | |
| "grad_norm": 0.2918667204791213, | |
| "learning_rate": 4.292503588685105e-05, | |
| "loss": 0.5, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.6456587328193093, | |
| "grad_norm": 0.2047093208105323, | |
| "learning_rate": 4.277528174094607e-05, | |
| "loss": 0.4999, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.651022460610124, | |
| "grad_norm": 0.1740999252433492, | |
| "learning_rate": 4.262548849696407e-05, | |
| "loss": 0.495, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.6563861884009388, | |
| "grad_norm": 0.1585431462983745, | |
| "learning_rate": 4.247565826518728e-05, | |
| "loss": 0.4913, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.661749916191753, | |
| "grad_norm": 0.18550331479957272, | |
| "learning_rate": 4.2325793156419035e-05, | |
| "loss": 0.4991, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.667113643982568, | |
| "grad_norm": 0.18017249852635744, | |
| "learning_rate": 4.217589528195403e-05, | |
| "loss": 0.4952, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.6724773717733825, | |
| "grad_norm": 0.2170267245439509, | |
| "learning_rate": 4.202596675354851e-05, | |
| "loss": 0.4979, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.6778410995641972, | |
| "grad_norm": 0.20466183171637414, | |
| "learning_rate": 4.187600968339064e-05, | |
| "loss": 0.4977, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.683204827355012, | |
| "grad_norm": 0.1859091442920982, | |
| "learning_rate": 4.1726026184070625e-05, | |
| "loss": 0.5037, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.6885685551458263, | |
| "grad_norm": 0.16577581911976427, | |
| "learning_rate": 4.157601836855103e-05, | |
| "loss": 0.4996, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.693932282936641, | |
| "grad_norm": 0.20982822011955427, | |
| "learning_rate": 4.142598835013698e-05, | |
| "loss": 0.5003, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.6992960107274557, | |
| "grad_norm": 0.1760144931085274, | |
| "learning_rate": 4.12759382424464e-05, | |
| "loss": 0.5048, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.70465973851827, | |
| "grad_norm": 0.16308891153060442, | |
| "learning_rate": 4.11258701593802e-05, | |
| "loss": 0.4995, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.7100234663090847, | |
| "grad_norm": 0.18485290922951852, | |
| "learning_rate": 4.0975786215092596e-05, | |
| "loss": 0.4974, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.7153871940998995, | |
| "grad_norm": 0.16453966274325355, | |
| "learning_rate": 4.0825688523961176e-05, | |
| "loss": 0.4961, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.7207509218907138, | |
| "grad_norm": 0.17078151543088202, | |
| "learning_rate": 4.0675579200557246e-05, | |
| "loss": 0.4996, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.7261146496815285, | |
| "grad_norm": 0.18413562304090325, | |
| "learning_rate": 4.052546035961596e-05, | |
| "loss": 0.5047, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.7314783774723432, | |
| "grad_norm": 0.1807528229446923, | |
| "learning_rate": 4.0375334116006596e-05, | |
| "loss": 0.4999, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 0.1708195041200041, | |
| "learning_rate": 4.0225202584702643e-05, | |
| "loss": 0.5039, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.7422058330539727, | |
| "grad_norm": 0.1772112061283108, | |
| "learning_rate": 4.0075067880752165e-05, | |
| "loss": 0.5006, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.747569560844787, | |
| "grad_norm": 0.16771636982644764, | |
| "learning_rate": 3.9924932119247855e-05, | |
| "loss": 0.5008, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.7529332886356017, | |
| "grad_norm": 0.16916780256561517, | |
| "learning_rate": 3.977479741529738e-05, | |
| "loss": 0.4939, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.7582970164264164, | |
| "grad_norm": 0.17680260996970482, | |
| "learning_rate": 3.962466588399342e-05, | |
| "loss": 0.4997, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.7636607442172307, | |
| "grad_norm": 0.1780764938281788, | |
| "learning_rate": 3.947453964038404e-05, | |
| "loss": 0.4939, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.7690244720080455, | |
| "grad_norm": 0.20020918095732, | |
| "learning_rate": 3.932442079944276e-05, | |
| "loss": 0.4932, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.77438819979886, | |
| "grad_norm": 0.19304746634023665, | |
| "learning_rate": 3.9174311476038824e-05, | |
| "loss": 0.5026, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.779751927589675, | |
| "grad_norm": 0.19887309023558672, | |
| "learning_rate": 3.902421378490742e-05, | |
| "loss": 0.4953, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.7851156553804897, | |
| "grad_norm": 0.204450641639583, | |
| "learning_rate": 3.887412984061979e-05, | |
| "loss": 0.5001, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.790479383171304, | |
| "grad_norm": 0.1651139970176456, | |
| "learning_rate": 3.872406175755362e-05, | |
| "loss": 0.4985, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.7958431109621187, | |
| "grad_norm": 0.2563097667637881, | |
| "learning_rate": 3.857401164986303e-05, | |
| "loss": 0.4948, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.8012068387529334, | |
| "grad_norm": 0.234054045546983, | |
| "learning_rate": 3.842398163144899e-05, | |
| "loss": 0.4947, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.8065705665437477, | |
| "grad_norm": 0.15811375667315455, | |
| "learning_rate": 3.827397381592939e-05, | |
| "loss": 0.5048, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.8119342943345624, | |
| "grad_norm": 0.1987863698237863, | |
| "learning_rate": 3.8123990316609364e-05, | |
| "loss": 0.501, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.817298022125377, | |
| "grad_norm": 0.17172619805487405, | |
| "learning_rate": 3.7974033246451496e-05, | |
| "loss": 0.4991, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.822661749916192, | |
| "grad_norm": 0.16007263014351186, | |
| "learning_rate": 3.782410471804599e-05, | |
| "loss": 0.4972, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.8280254777070066, | |
| "grad_norm": 0.16534850292679054, | |
| "learning_rate": 3.767420684358097e-05, | |
| "loss": 0.4984, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.833389205497821, | |
| "grad_norm": 0.14773257937726625, | |
| "learning_rate": 3.752434173481273e-05, | |
| "loss": 0.504, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.8387529332886356, | |
| "grad_norm": 0.17349714217162007, | |
| "learning_rate": 3.737451150303595e-05, | |
| "loss": 0.4984, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.8441166610794504, | |
| "grad_norm": 0.15861554785434998, | |
| "learning_rate": 3.722471825905394e-05, | |
| "loss": 0.5046, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.8494803888702647, | |
| "grad_norm": 0.17356548128864052, | |
| "learning_rate": 3.707496411314896e-05, | |
| "loss": 0.4965, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.8548441166610794, | |
| "grad_norm": 0.16137459004699495, | |
| "learning_rate": 3.692525117505246e-05, | |
| "loss": 0.4966, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.860207844451894, | |
| "grad_norm": 0.1543679059675596, | |
| "learning_rate": 3.677558155391532e-05, | |
| "loss": 0.4969, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.8655715722427084, | |
| "grad_norm": 0.17840356309672536, | |
| "learning_rate": 3.662595735827822e-05, | |
| "loss": 0.4928, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.870935300033523, | |
| "grad_norm": 0.15915427976526053, | |
| "learning_rate": 3.647638069604182e-05, | |
| "loss": 0.491, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.876299027824338, | |
| "grad_norm": 0.1960858603291955, | |
| "learning_rate": 3.632685367443721e-05, | |
| "loss": 0.4981, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.8816627556151526, | |
| "grad_norm": 0.20296992977396197, | |
| "learning_rate": 3.617737839999608e-05, | |
| "loss": 0.4984, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.8870264834059673, | |
| "grad_norm": 0.2694415740935397, | |
| "learning_rate": 3.602795697852116e-05, | |
| "loss": 0.4907, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.8923902111967816, | |
| "grad_norm": 0.19651514395841568, | |
| "learning_rate": 3.587859151505645e-05, | |
| "loss": 0.4907, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.8977539389875964, | |
| "grad_norm": 0.17781434805395327, | |
| "learning_rate": 3.572928411385765e-05, | |
| "loss": 0.5042, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.903117666778411, | |
| "grad_norm": 0.20415382211791988, | |
| "learning_rate": 3.558003687836249e-05, | |
| "loss": 0.4973, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.9084813945692254, | |
| "grad_norm": 0.25361323116982154, | |
| "learning_rate": 3.5430851911161025e-05, | |
| "loss": 0.4957, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.91384512236004, | |
| "grad_norm": 0.20713936406930122, | |
| "learning_rate": 3.528173131396617e-05, | |
| "loss": 0.5036, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.919208850150855, | |
| "grad_norm": 0.17026848748787526, | |
| "learning_rate": 3.513267718758391e-05, | |
| "loss": 0.4975, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.9245725779416696, | |
| "grad_norm": 0.21622589822789534, | |
| "learning_rate": 3.498369163188385e-05, | |
| "loss": 0.4976, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.9299363057324843, | |
| "grad_norm": 0.19646465258305665, | |
| "learning_rate": 3.483477674576955e-05, | |
| "loss": 0.4987, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.9353000335232986, | |
| "grad_norm": 0.17789915328753722, | |
| "learning_rate": 3.468593462714896e-05, | |
| "loss": 0.4952, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.9406637613141133, | |
| "grad_norm": 0.16632647338954235, | |
| "learning_rate": 3.453716737290495e-05, | |
| "loss": 0.498, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.946027489104928, | |
| "grad_norm": 0.18269935667563644, | |
| "learning_rate": 3.438847707886561e-05, | |
| "loss": 0.4993, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.9513912168957424, | |
| "grad_norm": 0.1574198748294102, | |
| "learning_rate": 3.4239865839774906e-05, | |
| "loss": 0.4938, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.956754944686557, | |
| "grad_norm": 0.16957001146132789, | |
| "learning_rate": 3.409133574926302e-05, | |
| "loss": 0.4969, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.962118672477372, | |
| "grad_norm": 0.15435240809940032, | |
| "learning_rate": 3.394288889981695e-05, | |
| "loss": 0.4946, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.9674824002681865, | |
| "grad_norm": 0.154260292592301, | |
| "learning_rate": 3.379452738275095e-05, | |
| "loss": 0.4929, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.9728461280590013, | |
| "grad_norm": 0.15155738642592756, | |
| "learning_rate": 3.364625328817717e-05, | |
| "loss": 0.4972, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.9782098558498156, | |
| "grad_norm": 0.17467421089382126, | |
| "learning_rate": 3.349806870497615e-05, | |
| "loss": 0.5008, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.9835735836406303, | |
| "grad_norm": 0.17056105214141762, | |
| "learning_rate": 3.334997572076734e-05, | |
| "loss": 0.4905, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.988937311431445, | |
| "grad_norm": 0.1673670147899835, | |
| "learning_rate": 3.320197642187983e-05, | |
| "loss": 0.494, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.9943010392222593, | |
| "grad_norm": 0.1850966895118181, | |
| "learning_rate": 3.305407289332279e-05, | |
| "loss": 0.4992, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.000335232986926, | |
| "grad_norm": 0.2722101703808084, | |
| "learning_rate": 3.2906267218756244e-05, | |
| "loss": 0.836, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.0056989607777407, | |
| "grad_norm": 0.3632821992997869, | |
| "learning_rate": 3.2758561480461606e-05, | |
| "loss": 0.4507, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.011062688568555, | |
| "grad_norm": 0.3428188312032687, | |
| "learning_rate": 3.261095775931244e-05, | |
| "loss": 0.46, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.0164264163593697, | |
| "grad_norm": 0.2992573935904219, | |
| "learning_rate": 3.2463458134745036e-05, | |
| "loss": 0.4511, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.0217901441501844, | |
| "grad_norm": 0.31765288073417064, | |
| "learning_rate": 3.2316064684729246e-05, | |
| "loss": 0.4476, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.027153871940999, | |
| "grad_norm": 0.26053192252342355, | |
| "learning_rate": 3.2168779485739086e-05, | |
| "loss": 0.4473, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.0325175997318135, | |
| "grad_norm": 0.298616923861466, | |
| "learning_rate": 3.2021604612723525e-05, | |
| "loss": 0.4521, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.037881327522628, | |
| "grad_norm": 0.28423459110220495, | |
| "learning_rate": 3.187454213907733e-05, | |
| "loss": 0.4507, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.043245055313443, | |
| "grad_norm": 0.23009487403692486, | |
| "learning_rate": 3.172759413661172e-05, | |
| "loss": 0.448, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.0486087831042576, | |
| "grad_norm": 0.28376655658935424, | |
| "learning_rate": 3.1580762675525286e-05, | |
| "loss": 0.4509, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.053972510895072, | |
| "grad_norm": 0.22381560292455216, | |
| "learning_rate": 3.143404982437476e-05, | |
| "loss": 0.4521, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.0593362386858867, | |
| "grad_norm": 0.24070713732730523, | |
| "learning_rate": 3.128745765004594e-05, | |
| "loss": 0.4439, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.0646999664767014, | |
| "grad_norm": 0.2632500650736694, | |
| "learning_rate": 3.114098821772451e-05, | |
| "loss": 0.4489, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.070063694267516, | |
| "grad_norm": 0.19380561177767147, | |
| "learning_rate": 3.099464359086695e-05, | |
| "loss": 0.4473, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.0754274220583304, | |
| "grad_norm": 0.28710006884974615, | |
| "learning_rate": 3.084842583117153e-05, | |
| "loss": 0.4527, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.080791149849145, | |
| "grad_norm": 0.18312560702847464, | |
| "learning_rate": 3.0702336998549175e-05, | |
| "loss": 0.451, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.08615487763996, | |
| "grad_norm": 0.23079325997690583, | |
| "learning_rate": 3.055637915109453e-05, | |
| "loss": 0.4427, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.0915186054307746, | |
| "grad_norm": 0.19698651608881707, | |
| "learning_rate": 3.0410554345056876e-05, | |
| "loss": 0.4534, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.096882333221589, | |
| "grad_norm": 0.18838697788327152, | |
| "learning_rate": 3.026486463481125e-05, | |
| "loss": 0.451, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.1022460610124036, | |
| "grad_norm": 0.17680792058130787, | |
| "learning_rate": 3.0119312072829476e-05, | |
| "loss": 0.4435, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.1076097888032184, | |
| "grad_norm": 0.1601761416116848, | |
| "learning_rate": 2.997389870965118e-05, | |
| "loss": 0.4511, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.1129735165940327, | |
| "grad_norm": 0.17773164119539953, | |
| "learning_rate": 2.982862659385502e-05, | |
| "loss": 0.4513, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.1183372443848474, | |
| "grad_norm": 0.17193804443547908, | |
| "learning_rate": 2.968349777202973e-05, | |
| "loss": 0.4484, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.123700972175662, | |
| "grad_norm": 0.16914799438808237, | |
| "learning_rate": 2.9538514288745336e-05, | |
| "loss": 0.4463, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.129064699966477, | |
| "grad_norm": 0.16873259416647984, | |
| "learning_rate": 2.939367818652434e-05, | |
| "loss": 0.4491, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.134428427757291, | |
| "grad_norm": 0.16119241415751986, | |
| "learning_rate": 2.9248991505812944e-05, | |
| "loss": 0.4473, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.139792155548106, | |
| "grad_norm": 0.15930346566056242, | |
| "learning_rate": 2.9104456284952277e-05, | |
| "loss": 0.4464, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.1451558833389206, | |
| "grad_norm": 0.15103719252838246, | |
| "learning_rate": 2.8960074560149752e-05, | |
| "loss": 0.4458, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.1505196111297353, | |
| "grad_norm": 0.14739728413169334, | |
| "learning_rate": 2.8815848365450336e-05, | |
| "loss": 0.4461, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.1558833389205496, | |
| "grad_norm": 0.1636187894417558, | |
| "learning_rate": 2.867177973270784e-05, | |
| "loss": 0.4444, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.1612470667113644, | |
| "grad_norm": 0.15742478053043885, | |
| "learning_rate": 2.8527870691556404e-05, | |
| "loss": 0.4476, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.166610794502179, | |
| "grad_norm": 0.611468578577517, | |
| "learning_rate": 2.8384123269381784e-05, | |
| "loss": 0.4607, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.171974522292994, | |
| "grad_norm": 0.17061785943367402, | |
| "learning_rate": 2.8240539491292938e-05, | |
| "loss": 0.4518, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.177338250083808, | |
| "grad_norm": 0.14588623033185896, | |
| "learning_rate": 2.8097121380093323e-05, | |
| "loss": 0.4404, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.182701977874623, | |
| "grad_norm": 0.18047802738317803, | |
| "learning_rate": 2.7953870956252562e-05, | |
| "loss": 0.4472, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.1880657056654376, | |
| "grad_norm": 0.15324942876302464, | |
| "learning_rate": 2.7810790237877857e-05, | |
| "loss": 0.4471, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.1934294334562523, | |
| "grad_norm": 0.19424542098598574, | |
| "learning_rate": 2.7667881240685606e-05, | |
| "loss": 0.4441, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.1987931612470666, | |
| "grad_norm": 0.16898760852013042, | |
| "learning_rate": 2.7525145977973045e-05, | |
| "loss": 0.4435, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.2041568890378813, | |
| "grad_norm": 0.19404967889677557, | |
| "learning_rate": 2.738258646058978e-05, | |
| "loss": 0.4427, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.209520616828696, | |
| "grad_norm": 0.16937723295023943, | |
| "learning_rate": 2.7240204696909603e-05, | |
| "loss": 0.4493, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.2148843446195103, | |
| "grad_norm": 0.14987599858224868, | |
| "learning_rate": 2.7098002692802033e-05, | |
| "loss": 0.4482, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.220248072410325, | |
| "grad_norm": 0.14816239745817442, | |
| "learning_rate": 2.6955982451604206e-05, | |
| "loss": 0.4512, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.22561180020114, | |
| "grad_norm": 0.13895435016957602, | |
| "learning_rate": 2.6814145974092566e-05, | |
| "loss": 0.4479, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.2309755279919545, | |
| "grad_norm": 0.13200544957430468, | |
| "learning_rate": 2.6672495258454678e-05, | |
| "loss": 0.4543, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.236339255782769, | |
| "grad_norm": 0.1456637489059305, | |
| "learning_rate": 2.6531032300261153e-05, | |
| "loss": 0.4475, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.2417029835735836, | |
| "grad_norm": 0.13818311152408982, | |
| "learning_rate": 2.6389759092437418e-05, | |
| "loss": 0.4446, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.2470667113643983, | |
| "grad_norm": 0.1330695845217543, | |
| "learning_rate": 2.6248677625235763e-05, | |
| "loss": 0.4513, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.252430439155213, | |
| "grad_norm": 0.12671554312337807, | |
| "learning_rate": 2.6107789886207195e-05, | |
| "loss": 0.443, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.2577941669460273, | |
| "grad_norm": 0.148616489659857, | |
| "learning_rate": 2.5967097860173514e-05, | |
| "loss": 0.4494, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.263157894736842, | |
| "grad_norm": 0.13556510213657127, | |
| "learning_rate": 2.58266035291993e-05, | |
| "loss": 0.4535, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.2685216225276568, | |
| "grad_norm": 0.15241618420915304, | |
| "learning_rate": 2.5686308872564028e-05, | |
| "loss": 0.4496, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.2738853503184715, | |
| "grad_norm": 0.144757212701804, | |
| "learning_rate": 2.5546215866734185e-05, | |
| "loss": 0.4465, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.279249078109286, | |
| "grad_norm": 0.14396415920602024, | |
| "learning_rate": 2.540632648533536e-05, | |
| "loss": 0.448, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.2846128059001005, | |
| "grad_norm": 0.15210625698722938, | |
| "learning_rate": 2.526664269912452e-05, | |
| "loss": 0.4453, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.2899765336909153, | |
| "grad_norm": 0.1536053230061205, | |
| "learning_rate": 2.5127166475962205e-05, | |
| "loss": 0.4487, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.29534026148173, | |
| "grad_norm": 0.15509713077990947, | |
| "learning_rate": 2.4987899780784836e-05, | |
| "loss": 0.4457, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.3007039892725443, | |
| "grad_norm": 0.13085919728595163, | |
| "learning_rate": 2.484884457557696e-05, | |
| "loss": 0.4443, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.306067717063359, | |
| "grad_norm": 0.16886670984395816, | |
| "learning_rate": 2.4710002819343712e-05, | |
| "loss": 0.4538, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.3114314448541737, | |
| "grad_norm": 0.12562181708750828, | |
| "learning_rate": 2.457137646808315e-05, | |
| "loss": 0.4477, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.316795172644988, | |
| "grad_norm": 0.1577346712555077, | |
| "learning_rate": 2.44329674747587e-05, | |
| "loss": 0.4542, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.3221589004358028, | |
| "grad_norm": 0.13554447992255508, | |
| "learning_rate": 2.4294777789271663e-05, | |
| "loss": 0.4495, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.3275226282266175, | |
| "grad_norm": 0.14611739712605668, | |
| "learning_rate": 2.4156809358433728e-05, | |
| "loss": 0.4472, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.332886356017432, | |
| "grad_norm": 0.1400607484697624, | |
| "learning_rate": 2.4019064125939603e-05, | |
| "loss": 0.4521, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.338250083808247, | |
| "grad_norm": 0.14259331457489, | |
| "learning_rate": 2.3881544032339506e-05, | |
| "loss": 0.448, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.3436138115990612, | |
| "grad_norm": 0.13315687312240615, | |
| "learning_rate": 2.3744251015011987e-05, | |
| "loss": 0.4463, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.348977539389876, | |
| "grad_norm": 0.12598643415214605, | |
| "learning_rate": 2.360718700813651e-05, | |
| "loss": 0.4447, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.3543412671806907, | |
| "grad_norm": 0.14066053069067935, | |
| "learning_rate": 2.347035394266623e-05, | |
| "loss": 0.446, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.359704994971505, | |
| "grad_norm": 0.12802549460203494, | |
| "learning_rate": 2.333375374630086e-05, | |
| "loss": 0.4419, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.3650687227623197, | |
| "grad_norm": 0.12761654852469786, | |
| "learning_rate": 2.3197388343459405e-05, | |
| "loss": 0.4501, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.3704324505531345, | |
| "grad_norm": 0.14182894960939563, | |
| "learning_rate": 2.3061259655253165e-05, | |
| "loss": 0.4463, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.375796178343949, | |
| "grad_norm": 0.41133093836911544, | |
| "learning_rate": 2.2925369599458543e-05, | |
| "loss": 0.4551, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.3811599061347635, | |
| "grad_norm": 0.13182899206147553, | |
| "learning_rate": 2.2789720090490167e-05, | |
| "loss": 0.4446, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.386523633925578, | |
| "grad_norm": 0.1170971831574601, | |
| "learning_rate": 2.265431303937379e-05, | |
| "loss": 0.4417, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.391887361716393, | |
| "grad_norm": 0.12294985745042769, | |
| "learning_rate": 2.2519150353719478e-05, | |
| "loss": 0.4494, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.3972510895072077, | |
| "grad_norm": 0.13039963065660728, | |
| "learning_rate": 2.2384233937694626e-05, | |
| "loss": 0.447, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.402614817298022, | |
| "grad_norm": 0.11831506164421687, | |
| "learning_rate": 2.2249565691997263e-05, | |
| "loss": 0.4499, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.4079785450888367, | |
| "grad_norm": 0.12143409559233521, | |
| "learning_rate": 2.2115147513829145e-05, | |
| "loss": 0.4476, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.4133422728796514, | |
| "grad_norm": 0.1291445962430486, | |
| "learning_rate": 2.1980981296869083e-05, | |
| "loss": 0.4486, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.418706000670466, | |
| "grad_norm": 0.13989097404575776, | |
| "learning_rate": 2.184706893124633e-05, | |
| "loss": 0.4387, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.4240697284612804, | |
| "grad_norm": 0.3715077464881586, | |
| "learning_rate": 2.1713412303513803e-05, | |
| "loss": 0.4472, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.429433456252095, | |
| "grad_norm": 0.1251774557356555, | |
| "learning_rate": 2.1580013296621657e-05, | |
| "loss": 0.4539, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.43479718404291, | |
| "grad_norm": 0.13346719081780015, | |
| "learning_rate": 2.144687378989069e-05, | |
| "loss": 0.4494, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.4401609118337246, | |
| "grad_norm": 0.13105974532551945, | |
| "learning_rate": 2.1313995658985825e-05, | |
| "loss": 0.4457, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.445524639624539, | |
| "grad_norm": 0.12874889844793144, | |
| "learning_rate": 2.118138077588978e-05, | |
| "loss": 0.4471, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.4508883674153537, | |
| "grad_norm": 0.13187822242861885, | |
| "learning_rate": 2.1049031008876603e-05, | |
| "loss": 0.4486, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.4562520952061684, | |
| "grad_norm": 0.11696538375561746, | |
| "learning_rate": 2.0916948222485446e-05, | |
| "loss": 0.4509, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.4616158229969827, | |
| "grad_norm": 0.121989247700871, | |
| "learning_rate": 2.0785134277494202e-05, | |
| "loss": 0.4462, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.4669795507877974, | |
| "grad_norm": 0.893427164640271, | |
| "learning_rate": 2.065359103089333e-05, | |
| "loss": 0.4619, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.472343278578612, | |
| "grad_norm": 0.45677961197929307, | |
| "learning_rate": 2.0522320335859768e-05, | |
| "loss": 0.4513, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.477707006369427, | |
| "grad_norm": 0.13666000173799434, | |
| "learning_rate": 2.0391324041730665e-05, | |
| "loss": 0.4538, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.4830707341602416, | |
| "grad_norm": 0.2608095297691742, | |
| "learning_rate": 2.026060399397751e-05, | |
| "loss": 0.4489, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.488434461951056, | |
| "grad_norm": 0.18107927299339568, | |
| "learning_rate": 2.013016203417994e-05, | |
| "loss": 0.4526, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.4937981897418706, | |
| "grad_norm": 0.14754327484468957, | |
| "learning_rate": 2.0000000000000012e-05, | |
| "loss": 0.4467, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 3.4991619175326854, | |
| "grad_norm": 0.13398053465109833, | |
| "learning_rate": 1.9870119725156094e-05, | |
| "loss": 0.4526, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 3.5045256453234996, | |
| "grad_norm": 0.13952089135435625, | |
| "learning_rate": 1.9740523039397225e-05, | |
| "loss": 0.4407, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 3.5098893731143144, | |
| "grad_norm": 0.14080074484432106, | |
| "learning_rate": 1.9611211768477173e-05, | |
| "loss": 0.4454, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 3.515253100905129, | |
| "grad_norm": 0.1198388915000314, | |
| "learning_rate": 1.948218773412886e-05, | |
| "loss": 0.4492, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.520616828695944, | |
| "grad_norm": 0.13993217838224647, | |
| "learning_rate": 1.935345275403859e-05, | |
| "loss": 0.44, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 3.5259805564867586, | |
| "grad_norm": 0.12889045963571127, | |
| "learning_rate": 1.9225008641820498e-05, | |
| "loss": 0.4443, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.531344284277573, | |
| "grad_norm": 0.12378896201897356, | |
| "learning_rate": 1.909685720699096e-05, | |
| "loss": 0.4456, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 3.5367080120683876, | |
| "grad_norm": 0.1276674265766728, | |
| "learning_rate": 1.8969000254943125e-05, | |
| "loss": 0.4481, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.5420717398592023, | |
| "grad_norm": 0.12390916606673247, | |
| "learning_rate": 1.8841439586921515e-05, | |
| "loss": 0.4451, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.5474354676500166, | |
| "grad_norm": 0.12273677799318142, | |
| "learning_rate": 1.871417699999656e-05, | |
| "loss": 0.4502, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.5527991954408313, | |
| "grad_norm": 0.11996313423286693, | |
| "learning_rate": 1.858721428703937e-05, | |
| "loss": 0.4561, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.558162923231646, | |
| "grad_norm": 0.12358256215052507, | |
| "learning_rate": 1.8460553236696448e-05, | |
| "loss": 0.4475, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.5635266510224604, | |
| "grad_norm": 1.460296103471973, | |
| "learning_rate": 1.8334195633364435e-05, | |
| "loss": 0.4528, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.568890378813275, | |
| "grad_norm": 0.12331491175077743, | |
| "learning_rate": 1.8208143257165085e-05, | |
| "loss": 0.4509, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.57425410660409, | |
| "grad_norm": 0.11750213968895096, | |
| "learning_rate": 1.808239788392006e-05, | |
| "loss": 0.4489, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.5796178343949046, | |
| "grad_norm": 0.11714707650553612, | |
| "learning_rate": 1.795696128512603e-05, | |
| "loss": 0.4466, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.5849815621857193, | |
| "grad_norm": 0.12196764778181841, | |
| "learning_rate": 1.78318352279296e-05, | |
| "loss": 0.4447, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.5903452899765336, | |
| "grad_norm": 0.12562029298509103, | |
| "learning_rate": 1.7707021475102548e-05, | |
| "loss": 0.4533, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.5957090177673483, | |
| "grad_norm": 0.14349002013949913, | |
| "learning_rate": 1.758252178501686e-05, | |
| "loss": 0.4436, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.601072745558163, | |
| "grad_norm": 0.11537479526408528, | |
| "learning_rate": 1.745833791162003e-05, | |
| "loss": 0.4482, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.6064364733489773, | |
| "grad_norm": 0.3056266641244182, | |
| "learning_rate": 1.7334471604410367e-05, | |
| "loss": 0.4419, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.611800201139792, | |
| "grad_norm": 0.11213414375074307, | |
| "learning_rate": 1.7210924608412257e-05, | |
| "loss": 0.4517, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.617163928930607, | |
| "grad_norm": 0.13465101181054254, | |
| "learning_rate": 1.7087698664151724e-05, | |
| "loss": 0.4386, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.6225276567214215, | |
| "grad_norm": 0.1265640161713725, | |
| "learning_rate": 1.6964795507631745e-05, | |
| "loss": 0.4495, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.6278913845122363, | |
| "grad_norm": 0.13249341487141286, | |
| "learning_rate": 1.684221687030793e-05, | |
| "loss": 0.4505, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.6332551123030505, | |
| "grad_norm": 0.10926988434260812, | |
| "learning_rate": 1.671996447906403e-05, | |
| "loss": 0.4507, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.6386188400938653, | |
| "grad_norm": 0.1160947830417163, | |
| "learning_rate": 1.659804005618769e-05, | |
| "loss": 0.4548, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.64398256788468, | |
| "grad_norm": 0.14474625249969114, | |
| "learning_rate": 1.6476445319346143e-05, | |
| "loss": 0.4415, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.6493462956754943, | |
| "grad_norm": 0.11464961989670115, | |
| "learning_rate": 1.6355181981561976e-05, | |
| "loss": 0.4493, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.654710023466309, | |
| "grad_norm": 0.11514676338990276, | |
| "learning_rate": 1.62342517511891e-05, | |
| "loss": 0.4464, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.6600737512571238, | |
| "grad_norm": 0.10388737077706772, | |
| "learning_rate": 1.6113656331888563e-05, | |
| "loss": 0.4431, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.665437479047938, | |
| "grad_norm": 0.21254990175343727, | |
| "learning_rate": 1.599339742260463e-05, | |
| "loss": 0.4494, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.6708012068387528, | |
| "grad_norm": 0.11018791870383089, | |
| "learning_rate": 1.5873476717540818e-05, | |
| "loss": 0.4453, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.6761649346295675, | |
| "grad_norm": 0.11202984963659915, | |
| "learning_rate": 1.575389590613604e-05, | |
| "loss": 0.4421, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.6815286624203822, | |
| "grad_norm": 0.5609794652204741, | |
| "learning_rate": 1.5634656673040824e-05, | |
| "loss": 0.4453, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.686892390211197, | |
| "grad_norm": 0.12056021537980104, | |
| "learning_rate": 1.5515760698093485e-05, | |
| "loss": 0.4422, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.6922561180020113, | |
| "grad_norm": 0.1143368342750797, | |
| "learning_rate": 1.539720965629661e-05, | |
| "loss": 0.4459, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.697619845792826, | |
| "grad_norm": 0.12221336375445264, | |
| "learning_rate": 1.5279005217793307e-05, | |
| "loss": 0.4454, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.7029835735836407, | |
| "grad_norm": 0.12874007735808712, | |
| "learning_rate": 1.5161149047843813e-05, | |
| "loss": 0.4501, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.708347301374455, | |
| "grad_norm": 0.23200442640856928, | |
| "learning_rate": 1.504364280680191e-05, | |
| "loss": 0.4421, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.7137110291652697, | |
| "grad_norm": 0.118738423956062, | |
| "learning_rate": 1.492648815009163e-05, | |
| "loss": 0.4473, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.7190747569560845, | |
| "grad_norm": 0.11440430785036737, | |
| "learning_rate": 1.4809686728183903e-05, | |
| "loss": 0.4441, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.724438484746899, | |
| "grad_norm": 0.11276169565585069, | |
| "learning_rate": 1.4693240186573267e-05, | |
| "loss": 0.4441, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.729802212537714, | |
| "grad_norm": 0.10878975487213734, | |
| "learning_rate": 1.4577150165754739e-05, | |
| "loss": 0.4455, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.7351659403285282, | |
| "grad_norm": 0.11093388920860042, | |
| "learning_rate": 1.4461418301200665e-05, | |
| "loss": 0.4422, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.740529668119343, | |
| "grad_norm": 0.10930519937275324, | |
| "learning_rate": 1.4346046223337737e-05, | |
| "loss": 0.4458, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.7458933959101577, | |
| "grad_norm": 0.11767456655079721, | |
| "learning_rate": 1.4231035557523925e-05, | |
| "loss": 0.4455, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.751257123700972, | |
| "grad_norm": 0.11061581216489039, | |
| "learning_rate": 1.4116387924025703e-05, | |
| "loss": 0.4451, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.7566208514917867, | |
| "grad_norm": 0.12532437758806012, | |
| "learning_rate": 1.4002104937995103e-05, | |
| "loss": 0.4435, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.7619845792826014, | |
| "grad_norm": 0.1101987534346296, | |
| "learning_rate": 1.388818820944704e-05, | |
| "loss": 0.4445, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.767348307073416, | |
| "grad_norm": 0.11241200124363177, | |
| "learning_rate": 1.377463934323663e-05, | |
| "loss": 0.4424, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.772712034864231, | |
| "grad_norm": 0.11769736086113589, | |
| "learning_rate": 1.3661459939036493e-05, | |
| "loss": 0.4441, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.778075762655045, | |
| "grad_norm": 0.10530984853730485, | |
| "learning_rate": 1.354865159131435e-05, | |
| "loss": 0.4454, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.78343949044586, | |
| "grad_norm": 0.128491542213919, | |
| "learning_rate": 1.3436215889310433e-05, | |
| "loss": 0.4483, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.7888032182366747, | |
| "grad_norm": 0.10663013232583238, | |
| "learning_rate": 1.3324154417015205e-05, | |
| "loss": 0.4477, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.794166946027489, | |
| "grad_norm": 0.11468548568373477, | |
| "learning_rate": 1.3212468753146955e-05, | |
| "loss": 0.4399, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.7995306738183037, | |
| "grad_norm": 0.11277620565805507, | |
| "learning_rate": 1.3101160471129588e-05, | |
| "loss": 0.4412, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.8048944016091184, | |
| "grad_norm": 0.10429631660245424, | |
| "learning_rate": 1.2990231139070519e-05, | |
| "loss": 0.439, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.8102581293999327, | |
| "grad_norm": 0.11117524802508165, | |
| "learning_rate": 1.2879682319738467e-05, | |
| "loss": 0.448, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.8156218571907474, | |
| "grad_norm": 0.10565379203871825, | |
| "learning_rate": 1.2769515570541554e-05, | |
| "loss": 0.4506, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.820985584981562, | |
| "grad_norm": 0.10995579742720774, | |
| "learning_rate": 1.2659732443505263e-05, | |
| "loss": 0.4378, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.826349312772377, | |
| "grad_norm": 0.1094854319106533, | |
| "learning_rate": 1.2550334485250661e-05, | |
| "loss": 0.4388, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.8317130405631916, | |
| "grad_norm": 0.10426452233943717, | |
| "learning_rate": 1.2441323236972536e-05, | |
| "loss": 0.449, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.837076768354006, | |
| "grad_norm": 0.11501738794377854, | |
| "learning_rate": 1.2332700234417745e-05, | |
| "loss": 0.4502, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.8424404961448206, | |
| "grad_norm": 0.10783773069849821, | |
| "learning_rate": 1.222446700786355e-05, | |
| "loss": 0.443, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.8478042239356354, | |
| "grad_norm": 0.1158852559257513, | |
| "learning_rate": 1.2116625082096043e-05, | |
| "loss": 0.4439, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.8531679517264497, | |
| "grad_norm": 0.10640912577878574, | |
| "learning_rate": 1.2009175976388683e-05, | |
| "loss": 0.4416, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.8585316795172644, | |
| "grad_norm": 0.11306196171729263, | |
| "learning_rate": 1.1902121204480928e-05, | |
| "loss": 0.4475, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.863895407308079, | |
| "grad_norm": 0.10374780127454973, | |
| "learning_rate": 1.1795462274556835e-05, | |
| "loss": 0.4413, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.869259135098894, | |
| "grad_norm": 0.10797835729481477, | |
| "learning_rate": 1.1689200689223862e-05, | |
| "loss": 0.4467, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.8746228628897086, | |
| "grad_norm": 0.10466854161183786, | |
| "learning_rate": 1.1583337945491717e-05, | |
| "loss": 0.4435, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.879986590680523, | |
| "grad_norm": 0.1037575420149514, | |
| "learning_rate": 1.1477875534751192e-05, | |
| "loss": 0.437, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.8853503184713376, | |
| "grad_norm": 0.11088366593365082, | |
| "learning_rate": 1.1372814942753246e-05, | |
| "loss": 0.4431, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.8907140462621523, | |
| "grad_norm": 0.09889840234162205, | |
| "learning_rate": 1.1268157649588018e-05, | |
| "loss": 0.4406, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.8960777740529666, | |
| "grad_norm": 0.10215017922131649, | |
| "learning_rate": 1.1163905129663956e-05, | |
| "loss": 0.4483, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.9014415018437814, | |
| "grad_norm": 0.1077840732925271, | |
| "learning_rate": 1.1060058851687128e-05, | |
| "loss": 0.4417, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.906805229634596, | |
| "grad_norm": 0.10317372106874026, | |
| "learning_rate": 1.0956620278640427e-05, | |
| "loss": 0.4446, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.912168957425411, | |
| "grad_norm": 0.1328432948301595, | |
| "learning_rate": 1.0853590867763054e-05, | |
| "loss": 0.431, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.9175326852162256, | |
| "grad_norm": 0.10066539948258632, | |
| "learning_rate": 1.0750972070529922e-05, | |
| "loss": 0.4474, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.92289641300704, | |
| "grad_norm": 0.10308611398222804, | |
| "learning_rate": 1.064876533263122e-05, | |
| "loss": 0.4466, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.9282601407978546, | |
| "grad_norm": 0.10575171336887328, | |
| "learning_rate": 1.0546972093952114e-05, | |
| "loss": 0.4448, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.9336238685886693, | |
| "grad_norm": 0.1090751248770618, | |
| "learning_rate": 1.0445593788552344e-05, | |
| "loss": 0.4405, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.9389875963794836, | |
| "grad_norm": 0.10355561012627491, | |
| "learning_rate": 1.0344631844646128e-05, | |
| "loss": 0.4432, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.9443513241702983, | |
| "grad_norm": 0.10595362039676634, | |
| "learning_rate": 1.024408768458196e-05, | |
| "loss": 0.4379, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.949715051961113, | |
| "grad_norm": 0.11198347644741112, | |
| "learning_rate": 1.0143962724822653e-05, | |
| "loss": 0.4491, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.9550787797519273, | |
| "grad_norm": 0.11560971244461928, | |
| "learning_rate": 1.0044258375925295e-05, | |
| "loss": 0.4482, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.960442507542742, | |
| "grad_norm": 0.10482045247799242, | |
| "learning_rate": 9.944976042521465e-06, | |
| "loss": 0.4427, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.965806235333557, | |
| "grad_norm": 0.10640773573235275, | |
| "learning_rate": 9.846117123297353e-06, | |
| "loss": 0.4455, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.9711699631243715, | |
| "grad_norm": 0.1038041198852965, | |
| "learning_rate": 9.747683010974147e-06, | |
| "loss": 0.4427, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.9765336909151863, | |
| "grad_norm": 0.10117239899529366, | |
| "learning_rate": 9.649675092288366e-06, | |
| "loss": 0.4413, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.9818974187060006, | |
| "grad_norm": 0.10620766516638773, | |
| "learning_rate": 9.552094747972297e-06, | |
| "loss": 0.4444, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.9872611464968153, | |
| "grad_norm": 0.10086629667003526, | |
| "learning_rate": 9.454943352734598e-06, | |
| "loss": 0.4417, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.99262487428763, | |
| "grad_norm": 0.09919272930882302, | |
| "learning_rate": 9.358222275240884e-06, | |
| "loss": 0.4487, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.9979886020784443, | |
| "grad_norm": 0.11767811064838585, | |
| "learning_rate": 9.26193287809451e-06, | |
| "loss": 0.5044, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.004022795843111, | |
| "grad_norm": 0.23793487869261376, | |
| "learning_rate": 9.166076517817281e-06, | |
| "loss": 0.661, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.009386523633926, | |
| "grad_norm": 0.13621220417679167, | |
| "learning_rate": 9.07065454483043e-06, | |
| "loss": 0.4149, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.01475025142474, | |
| "grad_norm": 0.17673227803196773, | |
| "learning_rate": 8.975668303435556e-06, | |
| "loss": 0.4189, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.020113979215555, | |
| "grad_norm": 0.18439349920499457, | |
| "learning_rate": 8.881119131795652e-06, | |
| "loss": 0.4107, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.025477707006369, | |
| "grad_norm": 0.12260746721261573, | |
| "learning_rate": 8.787008361916332e-06, | |
| "loss": 0.4125, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.030841434797184, | |
| "grad_norm": 0.15019363340139308, | |
| "learning_rate": 8.693337319626978e-06, | |
| "loss": 0.4128, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.036205162587999, | |
| "grad_norm": 0.1375901687390116, | |
| "learning_rate": 8.60010732456214e-06, | |
| "loss": 0.4055, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.041568890378813, | |
| "grad_norm": 0.12724391705577723, | |
| "learning_rate": 8.507319690142871e-06, | |
| "loss": 0.4146, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.0469326181696275, | |
| "grad_norm": 0.13895885677671274, | |
| "learning_rate": 8.414975723558317e-06, | |
| "loss": 0.4142, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.052296345960443, | |
| "grad_norm": 0.14044158258051825, | |
| "learning_rate": 8.323076725747192e-06, | |
| "loss": 0.4106, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.057660073751257, | |
| "grad_norm": 0.11701974422585514, | |
| "learning_rate": 8.23162399137952e-06, | |
| "loss": 0.4192, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.063023801542072, | |
| "grad_norm": 0.12971366592995717, | |
| "learning_rate": 8.140618808838408e-06, | |
| "loss": 0.4031, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.068387529332886, | |
| "grad_norm": 0.12697445696160525, | |
| "learning_rate": 8.050062460201827e-06, | |
| "loss": 0.4131, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.073751257123701, | |
| "grad_norm": 0.11378592889355851, | |
| "learning_rate": 7.959956221224626e-06, | |
| "loss": 0.4111, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.079114984914516, | |
| "grad_norm": 0.12117141039045348, | |
| "learning_rate": 7.870301361320485e-06, | |
| "loss": 0.4165, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.08447871270533, | |
| "grad_norm": 0.1197961111627781, | |
| "learning_rate": 7.781099143544124e-06, | |
| "loss": 0.4129, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.089842440496144, | |
| "grad_norm": 0.10807056855124261, | |
| "learning_rate": 7.692350824573402e-06, | |
| "loss": 0.4134, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.09520616828696, | |
| "grad_norm": 0.11324627112259107, | |
| "learning_rate": 7.604057654691699e-06, | |
| "loss": 0.4079, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.100569896077774, | |
| "grad_norm": 0.11724445449675143, | |
| "learning_rate": 7.516220877770273e-06, | |
| "loss": 0.411, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.105933623868589, | |
| "grad_norm": 0.10958796773895332, | |
| "learning_rate": 7.428841731250695e-06, | |
| "loss": 0.415, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.111297351659403, | |
| "grad_norm": 0.11066322738663384, | |
| "learning_rate": 7.341921446127509e-06, | |
| "loss": 0.4106, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.116661079450218, | |
| "grad_norm": 0.11185922785392959, | |
| "learning_rate": 7.255461246930791e-06, | |
| "loss": 0.4101, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.122024807241033, | |
| "grad_norm": 0.11501328533411367, | |
| "learning_rate": 7.169462351708958e-06, | |
| "loss": 0.4154, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.127388535031847, | |
| "grad_norm": 0.10891964674735306, | |
| "learning_rate": 7.083925972011583e-06, | |
| "loss": 0.4109, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.132752262822661, | |
| "grad_norm": 0.1084357061608093, | |
| "learning_rate": 6.998853312872347e-06, | |
| "loss": 0.4176, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.138115990613477, | |
| "grad_norm": 0.11666376677405135, | |
| "learning_rate": 6.914245572792064e-06, | |
| "loss": 0.4063, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.143479718404291, | |
| "grad_norm": 0.10587157920780786, | |
| "learning_rate": 6.830103943721749e-06, | |
| "loss": 0.4149, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.148843446195105, | |
| "grad_norm": 1.5402775013214236, | |
| "learning_rate": 6.7464296110458925e-06, | |
| "loss": 0.4212, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.15420717398592, | |
| "grad_norm": 0.1127675778731525, | |
| "learning_rate": 6.6632237535656995e-06, | |
| "loss": 0.4123, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.159570901776735, | |
| "grad_norm": 0.10636517110929453, | |
| "learning_rate": 6.58048754348255e-06, | |
| "loss": 0.4031, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.16493462956755, | |
| "grad_norm": 0.10408511766608668, | |
| "learning_rate": 6.4982221463813965e-06, | |
| "loss": 0.4092, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 4.170298357358364, | |
| "grad_norm": 0.10980308429743034, | |
| "learning_rate": 6.41642872121444e-06, | |
| "loss": 0.4133, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 4.175662085149178, | |
| "grad_norm": 0.1811518845819661, | |
| "learning_rate": 6.335108420284748e-06, | |
| "loss": 0.4114, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 4.1810258129399935, | |
| "grad_norm": 0.10929364439171839, | |
| "learning_rate": 6.254262389230006e-06, | |
| "loss": 0.4175, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 4.186389540730808, | |
| "grad_norm": 0.10394422643680504, | |
| "learning_rate": 6.1738917670064194e-06, | |
| "loss": 0.418, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.191753268521622, | |
| "grad_norm": 0.1012577329095609, | |
| "learning_rate": 6.09399768587263e-06, | |
| "loss": 0.4118, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 4.197116996312437, | |
| "grad_norm": 0.10687807539475037, | |
| "learning_rate": 6.014581271373829e-06, | |
| "loss": 0.4062, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 4.202480724103252, | |
| "grad_norm": 0.10262903089768421, | |
| "learning_rate": 5.935643642325808e-06, | |
| "loss": 0.4067, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 4.207844451894067, | |
| "grad_norm": 0.09927029282657747, | |
| "learning_rate": 5.857185910799277e-06, | |
| "loss": 0.4141, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 4.213208179684881, | |
| "grad_norm": 0.10324140566962033, | |
| "learning_rate": 5.779209182104133e-06, | |
| "loss": 0.4087, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.218571907475695, | |
| "grad_norm": 0.09857973369656504, | |
| "learning_rate": 5.701714554773956e-06, | |
| "loss": 0.4122, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 4.2239356352665105, | |
| "grad_norm": 0.10704699849654356, | |
| "learning_rate": 5.624703120550492e-06, | |
| "loss": 0.4132, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 4.229299363057325, | |
| "grad_norm": 0.09932400738261876, | |
| "learning_rate": 5.548175964368248e-06, | |
| "loss": 0.4136, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 4.234663090848139, | |
| "grad_norm": 0.10182388734864921, | |
| "learning_rate": 5.4721341643392845e-06, | |
| "loss": 0.4165, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 4.240026818638954, | |
| "grad_norm": 0.10458339193532643, | |
| "learning_rate": 5.39657879173793e-06, | |
| "loss": 0.4136, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.2453905464297685, | |
| "grad_norm": 0.10492190568796772, | |
| "learning_rate": 5.3215109109857835e-06, | |
| "loss": 0.4156, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 4.250754274220584, | |
| "grad_norm": 0.09833089016292879, | |
| "learning_rate": 5.246931579636654e-06, | |
| "loss": 0.4213, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 4.256118002011398, | |
| "grad_norm": 0.09631929734391337, | |
| "learning_rate": 5.172841848361674e-06, | |
| "loss": 0.4126, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 4.261481729802212, | |
| "grad_norm": 0.09916119673198169, | |
| "learning_rate": 5.099242760934533e-06, | |
| "loss": 0.4093, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 4.2668454575930275, | |
| "grad_norm": 0.09868198578261592, | |
| "learning_rate": 5.026135354216717e-06, | |
| "loss": 0.4136, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.272209185383842, | |
| "grad_norm": 0.09504747159177464, | |
| "learning_rate": 4.953520658142958e-06, | |
| "loss": 0.4156, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 4.277572913174656, | |
| "grad_norm": 0.09411067382234167, | |
| "learning_rate": 4.881399695706677e-06, | |
| "loss": 0.4136, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 4.282936640965471, | |
| "grad_norm": 0.10081365538952461, | |
| "learning_rate": 4.809773482945601e-06, | |
| "loss": 0.4122, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 4.2883003687562855, | |
| "grad_norm": 1.2440830128881666, | |
| "learning_rate": 4.738643028927432e-06, | |
| "loss": 0.4265, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 4.293664096547101, | |
| "grad_norm": 0.09904601559918061, | |
| "learning_rate": 4.668009335735648e-06, | |
| "loss": 0.4119, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.299027824337915, | |
| "grad_norm": 0.10410871952221983, | |
| "learning_rate": 4.5978733984553835e-06, | |
| "loss": 0.4116, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 4.304391552128729, | |
| "grad_norm": 0.11936741762617652, | |
| "learning_rate": 4.528236205159386e-06, | |
| "loss": 0.4163, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 4.309755279919544, | |
| "grad_norm": 0.7752585101606566, | |
| "learning_rate": 4.459098736894114e-06, | |
| "loss": 0.4168, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 4.315119007710359, | |
| "grad_norm": 0.10147283835455163, | |
| "learning_rate": 4.39046196766594e-06, | |
| "loss": 0.409, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 4.320482735501173, | |
| "grad_norm": 0.10494531512367294, | |
| "learning_rate": 4.322326864427387e-06, | |
| "loss": 0.4108, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.325846463291988, | |
| "grad_norm": 0.09812759496228979, | |
| "learning_rate": 4.254694387063514e-06, | |
| "loss": 0.4139, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.3312101910828025, | |
| "grad_norm": 0.10039748389385268, | |
| "learning_rate": 4.187565488378434e-06, | |
| "loss": 0.4099, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.336573918873617, | |
| "grad_norm": 0.1000059293045232, | |
| "learning_rate": 4.120941114081833e-06, | |
| "loss": 0.4112, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.341937646664432, | |
| "grad_norm": 0.09526496629638147, | |
| "learning_rate": 4.0548222027756835e-06, | |
| "loss": 0.4115, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.347301374455246, | |
| "grad_norm": 0.09813119886581101, | |
| "learning_rate": 3.989209685941027e-06, | |
| "loss": 0.412, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.352665102246061, | |
| "grad_norm": 0.09732532571840974, | |
| "learning_rate": 3.924104487924805e-06, | |
| "loss": 0.4061, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.358028830036876, | |
| "grad_norm": 0.09543190553673306, | |
| "learning_rate": 3.859507525926897e-06, | |
| "loss": 0.4141, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.36339255782769, | |
| "grad_norm": 0.09873317668913352, | |
| "learning_rate": 3.795419709987149e-06, | |
| "loss": 0.4153, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.368756285618505, | |
| "grad_norm": 0.23096877402734253, | |
| "learning_rate": 3.7318419429726025e-06, | |
| "loss": 0.4102, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 4.374120013409319, | |
| "grad_norm": 0.10185639946736366, | |
| "learning_rate": 3.6687751205647117e-06, | |
| "loss": 0.4145, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 4.379483741200134, | |
| "grad_norm": 0.38509111321061773, | |
| "learning_rate": 3.606220131246776e-06, | |
| "loss": 0.4162, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 4.384847468990949, | |
| "grad_norm": 0.09806919042690798, | |
| "learning_rate": 3.5441778562914242e-06, | |
| "loss": 0.4141, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 4.390211196781763, | |
| "grad_norm": 0.0969544886038649, | |
| "learning_rate": 3.482649169748147e-06, | |
| "loss": 0.4212, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 4.395574924572578, | |
| "grad_norm": 0.11496521800565809, | |
| "learning_rate": 3.4216349384310533e-06, | |
| "loss": 0.4115, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 4.400938652363393, | |
| "grad_norm": 0.09584588428602607, | |
| "learning_rate": 3.3611360219065925e-06, | |
| "loss": 0.4113, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.406302380154207, | |
| "grad_norm": 0.0925654699857564, | |
| "learning_rate": 3.3011532724815142e-06, | |
| "loss": 0.4102, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 4.411666107945022, | |
| "grad_norm": 0.09392896960421455, | |
| "learning_rate": 3.241687535190776e-06, | |
| "loss": 0.4105, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 4.417029835735836, | |
| "grad_norm": 0.09291428552953933, | |
| "learning_rate": 3.1827396477857264e-06, | |
| "loss": 0.4135, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 4.422393563526651, | |
| "grad_norm": 0.09323644383847197, | |
| "learning_rate": 3.124310440722247e-06, | |
| "loss": 0.4159, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 4.427757291317466, | |
| "grad_norm": 0.23059082156453775, | |
| "learning_rate": 3.0664007371490558e-06, | |
| "loss": 0.4185, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.43312101910828, | |
| "grad_norm": 0.09839755683407715, | |
| "learning_rate": 3.009011352896152e-06, | |
| "loss": 0.4144, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 4.4384847468990944, | |
| "grad_norm": 0.09990156860630273, | |
| "learning_rate": 2.9521430964632602e-06, | |
| "loss": 0.4056, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 4.44384847468991, | |
| "grad_norm": 0.093523667605744, | |
| "learning_rate": 2.8957967690084986e-06, | |
| "loss": 0.4165, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 4.449212202480724, | |
| "grad_norm": 0.09192510131863983, | |
| "learning_rate": 2.839973164337044e-06, | |
| "loss": 0.4115, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 4.454575930271539, | |
| "grad_norm": 0.09410786987122588, | |
| "learning_rate": 2.7846730688900003e-06, | |
| "loss": 0.4138, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.459939658062353, | |
| "grad_norm": 0.09707065586772506, | |
| "learning_rate": 2.729897261733263e-06, | |
| "loss": 0.4109, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 4.465303385853168, | |
| "grad_norm": 0.09262040362541676, | |
| "learning_rate": 2.675646514546597e-06, | |
| "loss": 0.4139, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.470667113643983, | |
| "grad_norm": 0.09786358492563979, | |
| "learning_rate": 2.6219215916127283e-06, | |
| "loss": 0.4112, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 4.476030841434797, | |
| "grad_norm": 0.09168116998441043, | |
| "learning_rate": 2.568723249806575e-06, | |
| "loss": 0.4106, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.481394569225611, | |
| "grad_norm": 0.09262516025294427, | |
| "learning_rate": 2.516052238584625e-06, | |
| "loss": 0.4149, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.486758297016427, | |
| "grad_norm": 0.09205026270318697, | |
| "learning_rate": 2.463909299974323e-06, | |
| "loss": 0.4062, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 4.492122024807241, | |
| "grad_norm": 0.09122547354614034, | |
| "learning_rate": 2.4122951685636674e-06, | |
| "loss": 0.4177, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 4.497485752598056, | |
| "grad_norm": 0.09211390919384611, | |
| "learning_rate": 2.3612105714908173e-06, | |
| "loss": 0.4046, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 4.50284948038887, | |
| "grad_norm": 0.091478554430513, | |
| "learning_rate": 2.310656228433894e-06, | |
| "loss": 0.412, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 4.508213208179685, | |
| "grad_norm": 0.09399408591730637, | |
| "learning_rate": 2.260632851600795e-06, | |
| "loss": 0.4149, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.5135769359705, | |
| "grad_norm": 0.09195607008303311, | |
| "learning_rate": 2.211141145719191e-06, | |
| "loss": 0.413, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 4.518940663761314, | |
| "grad_norm": 0.47959804465608913, | |
| "learning_rate": 2.1621818080265955e-06, | |
| "loss": 0.4182, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 4.524304391552128, | |
| "grad_norm": 0.09051553997088063, | |
| "learning_rate": 2.1137555282605325e-06, | |
| "loss": 0.4114, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 4.5296681193429436, | |
| "grad_norm": 0.08829292877572836, | |
| "learning_rate": 2.0658629886488234e-06, | |
| "loss": 0.4165, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 4.535031847133758, | |
| "grad_norm": 0.09080343947058375, | |
| "learning_rate": 2.0185048638999706e-06, | |
| "loss": 0.4142, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.540395574924572, | |
| "grad_norm": 0.09243369068431759, | |
| "learning_rate": 1.9716818211936674e-06, | |
| "loss": 0.4005, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 4.545759302715387, | |
| "grad_norm": 0.09353671619124167, | |
| "learning_rate": 1.925394520171393e-06, | |
| "loss": 0.4143, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 4.551123030506202, | |
| "grad_norm": 0.09072830507170011, | |
| "learning_rate": 1.8796436129270955e-06, | |
| "loss": 0.4116, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 4.556486758297017, | |
| "grad_norm": 0.09201634812096428, | |
| "learning_rate": 1.8344297439980475e-06, | |
| "loss": 0.4093, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 4.561850486087831, | |
| "grad_norm": 0.20856475534127397, | |
| "learning_rate": 1.7897535503557196e-06, | |
| "loss": 0.4161, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.567214213878645, | |
| "grad_norm": 0.0894737416874815, | |
| "learning_rate": 1.7456156613968644e-06, | |
| "loss": 0.4128, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 4.5725779416694605, | |
| "grad_norm": 0.09249497786193772, | |
| "learning_rate": 1.702016698934581e-06, | |
| "loss": 0.4104, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.577941669460275, | |
| "grad_norm": 0.10099307480578555, | |
| "learning_rate": 1.65895727718961e-06, | |
| "loss": 0.4134, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 4.58330539725109, | |
| "grad_norm": 0.0894656321450511, | |
| "learning_rate": 1.6164380027816485e-06, | |
| "loss": 0.4153, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 4.588669125041904, | |
| "grad_norm": 0.08952970498041048, | |
| "learning_rate": 1.5744594747208308e-06, | |
| "loss": 0.4118, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.594032852832719, | |
| "grad_norm": 0.08843654007502101, | |
| "learning_rate": 1.5330222843992658e-06, | |
| "loss": 0.4156, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 4.599396580623534, | |
| "grad_norm": 0.18591112023947728, | |
| "learning_rate": 1.492127015582714e-06, | |
| "loss": 0.4154, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 4.604760308414348, | |
| "grad_norm": 0.0944881909519439, | |
| "learning_rate": 1.4517742444023665e-06, | |
| "loss": 0.4129, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 4.610124036205162, | |
| "grad_norm": 0.09301181960862621, | |
| "learning_rate": 1.4119645393467196e-06, | |
| "loss": 0.4193, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 4.6154877639959775, | |
| "grad_norm": 0.08874447140825721, | |
| "learning_rate": 1.3726984612535854e-06, | |
| "loss": 0.4126, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.620851491786792, | |
| "grad_norm": 0.09037539019783776, | |
| "learning_rate": 1.3339765633021551e-06, | |
| "loss": 0.4164, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 4.626215219577606, | |
| "grad_norm": 0.09159608133309274, | |
| "learning_rate": 1.2957993910052503e-06, | |
| "loss": 0.4156, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 4.631578947368421, | |
| "grad_norm": 0.0876847986538798, | |
| "learning_rate": 1.2581674822016087e-06, | |
| "loss": 0.4133, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 4.6369426751592355, | |
| "grad_norm": 0.08889717326718397, | |
| "learning_rate": 1.221081367048309e-06, | |
| "loss": 0.4173, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.64230640295005, | |
| "grad_norm": 0.08911910146230485, | |
| "learning_rate": 1.1845415680133089e-06, | |
| "loss": 0.4041, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.647670130740865, | |
| "grad_norm": 0.0912001297197578, | |
| "learning_rate": 1.1485485998680822e-06, | |
| "loss": 0.4057, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 4.653033858531679, | |
| "grad_norm": 0.08889958044076257, | |
| "learning_rate": 1.1131029696803774e-06, | |
| "loss": 0.4078, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 4.6583975863224945, | |
| "grad_norm": 0.09028811157516371, | |
| "learning_rate": 1.0782051768070477e-06, | |
| "loss": 0.417, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 4.663761314113309, | |
| "grad_norm": 0.09213399459832071, | |
| "learning_rate": 1.0438557128870408e-06, | |
| "loss": 0.4148, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 4.669125041904123, | |
| "grad_norm": 0.08974457665914247, | |
| "learning_rate": 1.010055061834474e-06, | |
| "loss": 0.4075, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.674488769694938, | |
| "grad_norm": 0.08738862355780086, | |
| "learning_rate": 9.768036998317875e-07, | |
| "loss": 0.4167, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 4.6798524974857525, | |
| "grad_norm": 0.08744381941505991, | |
| "learning_rate": 9.441020953230696e-07, | |
| "loss": 0.4044, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 4.685216225276568, | |
| "grad_norm": 0.08768023798398505, | |
| "learning_rate": 9.119507090074342e-07, | |
| "loss": 0.4094, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 4.690579953067382, | |
| "grad_norm": 0.08529166544500341, | |
| "learning_rate": 8.803499938325477e-07, | |
| "loss": 0.4129, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 4.695943680858196, | |
| "grad_norm": 0.08637519839116772, | |
| "learning_rate": 8.493003949882373e-07, | |
| "loss": 0.4133, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.701307408649011, | |
| "grad_norm": 0.0887897705514168, | |
| "learning_rate": 8.188023499002206e-07, | |
| "loss": 0.4051, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.706671136439826, | |
| "grad_norm": 0.09055462354291803, | |
| "learning_rate": 7.888562882239425e-07, | |
| "loss": 0.4161, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 4.71203486423064, | |
| "grad_norm": 0.08675152093159026, | |
| "learning_rate": 7.594626318385256e-07, | |
| "loss": 0.4039, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 4.717398592021455, | |
| "grad_norm": 0.08841042189110157, | |
| "learning_rate": 7.30621794840829e-07, | |
| "loss": 0.411, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.7227623198122695, | |
| "grad_norm": 0.09048394753620571, | |
| "learning_rate": 7.023341835396036e-07, | |
| "loss": 0.4158, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.728126047603084, | |
| "grad_norm": 0.0937542868591101, | |
| "learning_rate": 6.746001964497773e-07, | |
| "loss": 0.4108, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.733489775393899, | |
| "grad_norm": 0.09183387737661795, | |
| "learning_rate": 6.474202242868411e-07, | |
| "loss": 0.4132, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.738853503184713, | |
| "grad_norm": 0.08673828591623912, | |
| "learning_rate": 6.207946499613382e-07, | |
| "loss": 0.4138, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.7442172309755275, | |
| "grad_norm": 0.1221653497024461, | |
| "learning_rate": 5.947238485734819e-07, | |
| "loss": 0.411, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.749580958766343, | |
| "grad_norm": 0.08800343336015064, | |
| "learning_rate": 5.692081874078481e-07, | |
| "loss": 0.4044, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.754944686557157, | |
| "grad_norm": 0.08826254435249375, | |
| "learning_rate": 5.442480259282335e-07, | |
| "loss": 0.414, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.760308414347972, | |
| "grad_norm": 0.0886779509344544, | |
| "learning_rate": 5.198437157725567e-07, | |
| "loss": 0.4116, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.765672142138786, | |
| "grad_norm": 0.09045462951093008, | |
| "learning_rate": 4.959956007479338e-07, | |
| "loss": 0.4035, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.771035869929601, | |
| "grad_norm": 0.08680316882639165, | |
| "learning_rate": 4.7270401682581567e-07, | |
| "loss": 0.415, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.776399597720416, | |
| "grad_norm": 0.0867270897509586, | |
| "learning_rate": 4.499692921372667e-07, | |
| "loss": 0.4143, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.78176332551123, | |
| "grad_norm": 0.08467163312345347, | |
| "learning_rate": 4.277917469683246e-07, | |
| "loss": 0.4081, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.787127053302045, | |
| "grad_norm": 0.14351442138219858, | |
| "learning_rate": 4.061716937555149e-07, | |
| "loss": 0.4149, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.79249078109286, | |
| "grad_norm": 0.08694159920502774, | |
| "learning_rate": 3.851094370814323e-07, | |
| "loss": 0.4173, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.797854508883674, | |
| "grad_norm": 0.08529615404396881, | |
| "learning_rate": 3.646052736704464e-07, | |
| "loss": 0.4062, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.803218236674489, | |
| "grad_norm": 0.08879884361022528, | |
| "learning_rate": 3.4465949238453144e-07, | |
| "loss": 0.4197, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.808581964465303, | |
| "grad_norm": 0.0862432359092941, | |
| "learning_rate": 3.252723742191899e-07, | |
| "loss": 0.4139, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.813945692256118, | |
| "grad_norm": 0.08670212847739438, | |
| "learning_rate": 3.064441922995043e-07, | |
| "loss": 0.421, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.819309420046933, | |
| "grad_norm": 0.0861260158634291, | |
| "learning_rate": 2.8817521187626926e-07, | |
| "loss": 0.4114, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 4.824673147837747, | |
| "grad_norm": 0.13041000271914696, | |
| "learning_rate": 2.704656903222791e-07, | |
| "loss": 0.4089, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 4.830036875628561, | |
| "grad_norm": 0.0853917982287929, | |
| "learning_rate": 2.533158771286903e-07, | |
| "loss": 0.4189, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.835400603419377, | |
| "grad_norm": 0.08585480202942483, | |
| "learning_rate": 2.3672601390148707e-07, | |
| "loss": 0.4102, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 4.840764331210191, | |
| "grad_norm": 0.08514537079223002, | |
| "learning_rate": 2.206963343581281e-07, | |
| "loss": 0.4186, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 4.846128059001006, | |
| "grad_norm": 0.0854519613880308, | |
| "learning_rate": 2.0522706432419382e-07, | |
| "loss": 0.4166, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 4.85149178679182, | |
| "grad_norm": 0.08595768719036412, | |
| "learning_rate": 1.903184217302556e-07, | |
| "loss": 0.4138, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 4.856855514582635, | |
| "grad_norm": 0.08593142424333411, | |
| "learning_rate": 1.7597061660877157e-07, | |
| "loss": 0.4187, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.86221924237345, | |
| "grad_norm": 0.08605519303696639, | |
| "learning_rate": 1.6218385109114665e-07, | |
| "loss": 0.4152, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 4.867582970164264, | |
| "grad_norm": 0.08778080722891127, | |
| "learning_rate": 1.4895831940486827e-07, | |
| "loss": 0.408, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 4.872946697955078, | |
| "grad_norm": 0.5236720260419594, | |
| "learning_rate": 1.3629420787079738e-07, | |
| "loss": 0.4086, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 4.878310425745894, | |
| "grad_norm": 0.08691961311755944, | |
| "learning_rate": 1.2419169490051287e-07, | |
| "loss": 0.4162, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 4.883674153536708, | |
| "grad_norm": 0.08604130165924209, | |
| "learning_rate": 1.1265095099381118e-07, | |
| "loss": 0.4161, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.889037881327523, | |
| "grad_norm": 0.08562809530743228, | |
| "learning_rate": 1.0167213873631732e-07, | |
| "loss": 0.4068, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 4.894401609118337, | |
| "grad_norm": 0.08788772763541514, | |
| "learning_rate": 9.125541279717098e-08, | |
| "loss": 0.4117, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 4.899765336909152, | |
| "grad_norm": 0.08465883837484985, | |
| "learning_rate": 8.14009199268595e-08, | |
| "loss": 0.412, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 4.905129064699967, | |
| "grad_norm": 0.08635396133541602, | |
| "learning_rate": 7.210879895515277e-08, | |
| "loss": 0.4114, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 4.910492792490781, | |
| "grad_norm": 0.08583085092727974, | |
| "learning_rate": 6.337918078914041e-08, | |
| "loss": 0.4129, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.915856520281595, | |
| "grad_norm": 0.08664926249547512, | |
| "learning_rate": 5.521218841139764e-08, | |
| "loss": 0.4154, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 4.9212202480724105, | |
| "grad_norm": 0.08448787096930517, | |
| "learning_rate": 4.7607936878235614e-08, | |
| "loss": 0.406, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 4.926583975863225, | |
| "grad_norm": 0.0850836523204834, | |
| "learning_rate": 4.0566533318102676e-08, | |
| "loss": 0.4122, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 4.931947703654039, | |
| "grad_norm": 0.08844457157599679, | |
| "learning_rate": 3.408807693006111e-08, | |
| "loss": 0.4186, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 4.937311431444854, | |
| "grad_norm": 0.08507338195494901, | |
| "learning_rate": 2.817265898237942e-08, | |
| "loss": 0.4044, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.942675159235669, | |
| "grad_norm": 0.08409639688790976, | |
| "learning_rate": 2.2820362811279973e-08, | |
| "loss": 0.4084, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 4.948038887026484, | |
| "grad_norm": 0.08816595457478153, | |
| "learning_rate": 1.8031263819726642e-08, | |
| "loss": 0.4145, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 4.953402614817298, | |
| "grad_norm": 0.08680564134665435, | |
| "learning_rate": 1.3805429476385634e-08, | |
| "loss": 0.4068, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 4.958766342608112, | |
| "grad_norm": 0.09052432530510993, | |
| "learning_rate": 1.0142919314679588e-08, | |
| "loss": 0.4153, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.9641300703989275, | |
| "grad_norm": 0.09320878153934302, | |
| "learning_rate": 7.043784931921593e-09, | |
| "loss": 0.4103, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.969493798189742, | |
| "grad_norm": 0.08708424563628227, | |
| "learning_rate": 4.508069988617969e-09, | |
| "loss": 0.4131, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.974857525980557, | |
| "grad_norm": 0.08389684177916036, | |
| "learning_rate": 2.5358102078376635e-09, | |
| "loss": 0.4079, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 4.980221253771371, | |
| "grad_norm": 0.08673227922167183, | |
| "learning_rate": 1.127033374705988e-09, | |
| "loss": 0.4102, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.9855849815621855, | |
| "grad_norm": 0.08435533244348804, | |
| "learning_rate": 2.8175933603158403e-10, | |
| "loss": 0.412, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 4.990948709353001, | |
| "grad_norm": 0.0856513074559934, | |
| "learning_rate": 0.0, | |
| "loss": 0.4095, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.990948709353001, | |
| "step": 930, | |
| "total_flos": 2.393166916865753e+19, | |
| "train_loss": 0.5230806940986265, | |
| "train_runtime": 141229.2058, | |
| "train_samples_per_second": 3.379, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 930, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.393166916865753e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |