{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7397, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001351922264469793, "grad_norm": 81.23384094238281, "learning_rate": 0.0, "loss": 0.75775146484375, "step": 1 }, { "epoch": 0.0002703844528939586, "grad_norm": 62.586753845214844, "learning_rate": 5.859375e-08, "loss": 0.735595703125, "step": 2 }, { "epoch": 0.0004055766793409379, "grad_norm": 64.03299713134766, "learning_rate": 1.171875e-07, "loss": 0.73968505859375, "step": 3 }, { "epoch": 0.0005407689057879172, "grad_norm": 67.41014862060547, "learning_rate": 1.7578125e-07, "loss": 0.7408447265625, "step": 4 }, { "epoch": 0.0006759611322348965, "grad_norm": 64.51225280761719, "learning_rate": 2.34375e-07, "loss": 0.73907470703125, "step": 5 }, { "epoch": 0.0008111533586818758, "grad_norm": 71.63513946533203, "learning_rate": 2.9296875000000003e-07, "loss": 0.74359130859375, "step": 6 }, { "epoch": 0.0009463455851288551, "grad_norm": 74.01216125488281, "learning_rate": 3.515625e-07, "loss": 0.745849609375, "step": 7 }, { "epoch": 0.0010815378115758344, "grad_norm": 64.52039337158203, "learning_rate": 4.1015625e-07, "loss": 0.73797607421875, "step": 8 }, { "epoch": 0.0012167300380228137, "grad_norm": 69.04621124267578, "learning_rate": 4.6875e-07, "loss": 0.740966796875, "step": 9 }, { "epoch": 0.001351922264469793, "grad_norm": 67.63582611083984, "learning_rate": 5.2734375e-07, "loss": 0.73968505859375, "step": 10 }, { "epoch": 0.0014871144909167722, "grad_norm": 72.65314483642578, "learning_rate": 5.859375000000001e-07, "loss": 0.74273681640625, "step": 11 }, { "epoch": 0.0016223067173637515, "grad_norm": 70.06163024902344, "learning_rate": 6.4453125e-07, "loss": 0.7413330078125, "step": 12 }, { "epoch": 0.0017574989438107309, "grad_norm": 69.63719940185547, "learning_rate": 7.03125e-07, "loss": 0.73553466796875, "step": 13 }, { "epoch": 0.0018926911702577102, "grad_norm": 73.82935333251953, "learning_rate": 7.6171875e-07, "loss": 0.74273681640625, "step": 14 }, { "epoch": 0.0020278833967046894, "grad_norm": 72.55986022949219, "learning_rate": 8.203125e-07, "loss": 0.73785400390625, "step": 15 }, { "epoch": 0.0021630756231516687, "grad_norm": 74.7134017944336, "learning_rate": 8.7890625e-07, "loss": 0.73419189453125, "step": 16 }, { "epoch": 0.002298267849598648, "grad_norm": 60.44881820678711, "learning_rate": 9.375e-07, "loss": 0.72503662109375, "step": 17 }, { "epoch": 0.0024334600760456274, "grad_norm": 51.40937042236328, "learning_rate": 9.9609375e-07, "loss": 0.71600341796875, "step": 18 }, { "epoch": 0.0025686523024926067, "grad_norm": 77.11557006835938, "learning_rate": 1.0546875e-06, "loss": 0.72540283203125, "step": 19 }, { "epoch": 0.002703844528939586, "grad_norm": 54.883934020996094, "learning_rate": 1.11328125e-06, "loss": 0.71466064453125, "step": 20 }, { "epoch": 0.0028390367553865654, "grad_norm": 47.21072769165039, "learning_rate": 1.1718750000000001e-06, "loss": 0.71234130859375, "step": 21 }, { "epoch": 0.0029742289818335444, "grad_norm": 69.94409942626953, "learning_rate": 1.23046875e-06, "loss": 0.69549560546875, "step": 22 }, { "epoch": 0.0031094212082805237, "grad_norm": 60.53618621826172, "learning_rate": 1.2890625e-06, "loss": 0.69573974609375, "step": 23 }, { "epoch": 0.003244613434727503, "grad_norm": 60.381465911865234, "learning_rate": 1.34765625e-06, "loss": 0.6925048828125, "step": 24 }, { "epoch": 0.0033798056611744824, "grad_norm": 51.19105911254883, "learning_rate": 1.40625e-06, "loss": 0.67572021484375, "step": 25 }, { "epoch": 0.0035149978876214617, "grad_norm": 52.668724060058594, "learning_rate": 1.46484375e-06, "loss": 0.6717529296875, "step": 26 }, { "epoch": 0.003650190114068441, "grad_norm": 52.021339416503906, "learning_rate": 1.5234375e-06, "loss": 0.67193603515625, "step": 27 }, { "epoch": 0.0037853823405154204, "grad_norm": 55.213294982910156, "learning_rate": 1.5820312500000001e-06, "loss": 0.6666259765625, "step": 28 }, { "epoch": 0.003920574566962399, "grad_norm": 49.64873123168945, "learning_rate": 1.640625e-06, "loss": 0.666748046875, "step": 29 }, { "epoch": 0.004055766793409379, "grad_norm": 47.42051696777344, "learning_rate": 1.69921875e-06, "loss": 0.6123046875, "step": 30 }, { "epoch": 0.004190959019856358, "grad_norm": 47.74208068847656, "learning_rate": 1.7578125e-06, "loss": 0.6053466796875, "step": 31 }, { "epoch": 0.004326151246303337, "grad_norm": 40.186519622802734, "learning_rate": 1.81640625e-06, "loss": 0.6141357421875, "step": 32 }, { "epoch": 0.004461343472750317, "grad_norm": 40.875267028808594, "learning_rate": 1.875e-06, "loss": 0.60552978515625, "step": 33 }, { "epoch": 0.004596535699197296, "grad_norm": 41.25675964355469, "learning_rate": 1.93359375e-06, "loss": 0.58428955078125, "step": 34 }, { "epoch": 0.0047317279256442754, "grad_norm": 41.89238357543945, "learning_rate": 1.9921875e-06, "loss": 0.572235107421875, "step": 35 }, { "epoch": 0.004866920152091255, "grad_norm": 49.741817474365234, "learning_rate": 2.05078125e-06, "loss": 0.54248046875, "step": 36 }, { "epoch": 0.005002112378538234, "grad_norm": 39.604393005371094, "learning_rate": 2.109375e-06, "loss": 0.560882568359375, "step": 37 }, { "epoch": 0.0051373046049852135, "grad_norm": 36.70024871826172, "learning_rate": 2.16796875e-06, "loss": 0.573028564453125, "step": 38 }, { "epoch": 0.005272496831432193, "grad_norm": 31.933439254760742, "learning_rate": 2.2265625e-06, "loss": 0.562408447265625, "step": 39 }, { "epoch": 0.005407689057879172, "grad_norm": 26.103178024291992, "learning_rate": 2.28515625e-06, "loss": 0.54730224609375, "step": 40 }, { "epoch": 0.0055428812843261515, "grad_norm": 17.096759796142578, "learning_rate": 2.3437500000000002e-06, "loss": 0.533477783203125, "step": 41 }, { "epoch": 0.005678073510773131, "grad_norm": 17.932605743408203, "learning_rate": 2.40234375e-06, "loss": 0.50421142578125, "step": 42 }, { "epoch": 0.00581326573722011, "grad_norm": 14.606308937072754, "learning_rate": 2.4609375e-06, "loss": 0.51214599609375, "step": 43 }, { "epoch": 0.005948457963667089, "grad_norm": 19.89915657043457, "learning_rate": 2.5195312500000003e-06, "loss": 0.4691162109375, "step": 44 }, { "epoch": 0.006083650190114068, "grad_norm": 14.97246265411377, "learning_rate": 2.578125e-06, "loss": 0.490631103515625, "step": 45 }, { "epoch": 0.006218842416561047, "grad_norm": 13.272928237915039, "learning_rate": 2.63671875e-06, "loss": 0.48687744140625, "step": 46 }, { "epoch": 0.006354034643008027, "grad_norm": 17.15617561340332, "learning_rate": 2.6953125e-06, "loss": 0.451324462890625, "step": 47 }, { "epoch": 0.006489226869455006, "grad_norm": 18.87810707092285, "learning_rate": 2.75390625e-06, "loss": 0.427703857421875, "step": 48 }, { "epoch": 0.0066244190959019855, "grad_norm": 4.2861738204956055, "learning_rate": 2.8125e-06, "loss": 0.51715087890625, "step": 49 }, { "epoch": 0.006759611322348965, "grad_norm": 1.9247297048568726, "learning_rate": 2.87109375e-06, "loss": 0.525146484375, "step": 50 }, { "epoch": 0.006894803548795944, "grad_norm": 6.371675491333008, "learning_rate": 2.9296875e-06, "loss": 0.48345947265625, "step": 51 }, { "epoch": 0.0070299957752429235, "grad_norm": 2.8637478351593018, "learning_rate": 2.9882812500000002e-06, "loss": 0.496612548828125, "step": 52 }, { "epoch": 0.007165188001689903, "grad_norm": 8.740577697753906, "learning_rate": 3.046875e-06, "loss": 0.4402313232421875, "step": 53 }, { "epoch": 0.007300380228136882, "grad_norm": 4.226352691650391, "learning_rate": 3.10546875e-06, "loss": 0.462860107421875, "step": 54 }, { "epoch": 0.0074355724545838615, "grad_norm": 5.924042224884033, "learning_rate": 3.1640625000000003e-06, "loss": 0.5353546142578125, "step": 55 }, { "epoch": 0.007570764681030841, "grad_norm": 6.777284145355225, "learning_rate": 3.22265625e-06, "loss": 0.42047119140625, "step": 56 }, { "epoch": 0.00770595690747782, "grad_norm": 6.5702080726623535, "learning_rate": 3.28125e-06, "loss": 0.4191436767578125, "step": 57 }, { "epoch": 0.007841149133924799, "grad_norm": 1.8501906394958496, "learning_rate": 3.3398437500000003e-06, "loss": 0.4581298828125, "step": 58 }, { "epoch": 0.007976341360371779, "grad_norm": 9.259123802185059, "learning_rate": 3.3984375e-06, "loss": 0.5446319580078125, "step": 59 }, { "epoch": 0.008111533586818757, "grad_norm": 4.78719425201416, "learning_rate": 3.45703125e-06, "loss": 0.518890380859375, "step": 60 }, { "epoch": 0.008246725813265738, "grad_norm": 11.670870780944824, "learning_rate": 3.515625e-06, "loss": 0.3968658447265625, "step": 61 }, { "epoch": 0.008381918039712716, "grad_norm": 14.914684295654297, "learning_rate": 3.57421875e-06, "loss": 0.360321044921875, "step": 62 }, { "epoch": 0.008517110266159696, "grad_norm": 11.149032592773438, "learning_rate": 3.6328125e-06, "loss": 0.569610595703125, "step": 63 }, { "epoch": 0.008652302492606675, "grad_norm": 9.514175415039062, "learning_rate": 3.69140625e-06, "loss": 0.3821563720703125, "step": 64 }, { "epoch": 0.008787494719053655, "grad_norm": 4.481330394744873, "learning_rate": 3.75e-06, "loss": 0.4743499755859375, "step": 65 }, { "epoch": 0.008922686945500634, "grad_norm": 5.4768218994140625, "learning_rate": 3.8085937500000002e-06, "loss": 0.469268798828125, "step": 66 }, { "epoch": 0.009057879171947614, "grad_norm": 7.203653335571289, "learning_rate": 3.8671875e-06, "loss": 0.4289093017578125, "step": 67 }, { "epoch": 0.009193071398394592, "grad_norm": 5.832691669464111, "learning_rate": 3.92578125e-06, "loss": 0.456634521484375, "step": 68 }, { "epoch": 0.009328263624841572, "grad_norm": 9.66999340057373, "learning_rate": 3.984375e-06, "loss": 0.3985137939453125, "step": 69 }, { "epoch": 0.009463455851288551, "grad_norm": 6.045322418212891, "learning_rate": 4.0429687500000004e-06, "loss": 0.438232421875, "step": 70 }, { "epoch": 0.00959864807773553, "grad_norm": 9.853673934936523, "learning_rate": 4.1015625e-06, "loss": 0.40960693359375, "step": 71 }, { "epoch": 0.00973384030418251, "grad_norm": 14.634685516357422, "learning_rate": 4.16015625e-06, "loss": 0.6088104248046875, "step": 72 }, { "epoch": 0.009869032530629488, "grad_norm": 20.060110092163086, "learning_rate": 4.21875e-06, "loss": 0.460662841796875, "step": 73 }, { "epoch": 0.010004224757076468, "grad_norm": 12.174761772155762, "learning_rate": 4.27734375e-06, "loss": 0.542572021484375, "step": 74 }, { "epoch": 0.010139416983523447, "grad_norm": 9.763131141662598, "learning_rate": 4.3359375e-06, "loss": 0.401763916015625, "step": 75 }, { "epoch": 0.010274609209970427, "grad_norm": 3.679368257522583, "learning_rate": 4.3945312500000005e-06, "loss": 0.4951019287109375, "step": 76 }, { "epoch": 0.010409801436417405, "grad_norm": 5.807453632354736, "learning_rate": 4.453125e-06, "loss": 0.485809326171875, "step": 77 }, { "epoch": 0.010544993662864386, "grad_norm": 9.901314735412598, "learning_rate": 4.51171875e-06, "loss": 0.46160888671875, "step": 78 }, { "epoch": 0.010680185889311364, "grad_norm": 8.818758964538574, "learning_rate": 4.5703125e-06, "loss": 0.4622344970703125, "step": 79 }, { "epoch": 0.010815378115758344, "grad_norm": 13.558056831359863, "learning_rate": 4.62890625e-06, "loss": 0.480987548828125, "step": 80 }, { "epoch": 0.010950570342205323, "grad_norm": 7.493067741394043, "learning_rate": 4.6875000000000004e-06, "loss": 0.4514007568359375, "step": 81 }, { "epoch": 0.011085762568652303, "grad_norm": 7.834639549255371, "learning_rate": 4.74609375e-06, "loss": 0.3695068359375, "step": 82 }, { "epoch": 0.011220954795099282, "grad_norm": 8.9389009475708, "learning_rate": 4.8046875e-06, "loss": 0.5247650146484375, "step": 83 }, { "epoch": 0.011356147021546262, "grad_norm": 7.8822102546691895, "learning_rate": 4.86328125e-06, "loss": 0.37096405029296875, "step": 84 }, { "epoch": 0.01149133924799324, "grad_norm": 8.921436309814453, "learning_rate": 4.921875e-06, "loss": 0.469696044921875, "step": 85 }, { "epoch": 0.01162653147444022, "grad_norm": 13.998419761657715, "learning_rate": 4.98046875e-06, "loss": 0.4219207763671875, "step": 86 }, { "epoch": 0.011761723700887199, "grad_norm": 20.839725494384766, "learning_rate": 5.0390625000000005e-06, "loss": 0.273895263671875, "step": 87 }, { "epoch": 0.011896915927334177, "grad_norm": 14.408513069152832, "learning_rate": 5.09765625e-06, "loss": 0.4432525634765625, "step": 88 }, { "epoch": 0.012032108153781158, "grad_norm": 7.540858268737793, "learning_rate": 5.15625e-06, "loss": 0.403533935546875, "step": 89 }, { "epoch": 0.012167300380228136, "grad_norm": 11.975055694580078, "learning_rate": 5.21484375e-06, "loss": 0.4277801513671875, "step": 90 }, { "epoch": 0.012302492606675116, "grad_norm": 13.353858947753906, "learning_rate": 5.2734375e-06, "loss": 0.39711761474609375, "step": 91 }, { "epoch": 0.012437684833122095, "grad_norm": 16.96271514892578, "learning_rate": 5.3320312500000004e-06, "loss": 0.3714141845703125, "step": 92 }, { "epoch": 0.012572877059569075, "grad_norm": 10.64302921295166, "learning_rate": 5.390625e-06, "loss": 0.443603515625, "step": 93 }, { "epoch": 0.012708069286016054, "grad_norm": 13.161224365234375, "learning_rate": 5.44921875e-06, "loss": 0.3780975341796875, "step": 94 }, { "epoch": 0.012843261512463034, "grad_norm": 11.99887752532959, "learning_rate": 5.5078125e-06, "loss": 0.3973236083984375, "step": 95 }, { "epoch": 0.012978453738910012, "grad_norm": 12.698922157287598, "learning_rate": 5.56640625e-06, "loss": 0.4170989990234375, "step": 96 }, { "epoch": 0.013113645965356992, "grad_norm": 12.280120849609375, "learning_rate": 5.625e-06, "loss": 0.390411376953125, "step": 97 }, { "epoch": 0.013248838191803971, "grad_norm": 10.428301811218262, "learning_rate": 5.6835937500000005e-06, "loss": 0.32039642333984375, "step": 98 }, { "epoch": 0.013384030418250951, "grad_norm": 13.72142219543457, "learning_rate": 5.7421875e-06, "loss": 0.40573883056640625, "step": 99 }, { "epoch": 0.01351922264469793, "grad_norm": 11.776626586914062, "learning_rate": 5.80078125e-06, "loss": 0.29107666015625, "step": 100 }, { "epoch": 0.01365441487114491, "grad_norm": 9.963550567626953, "learning_rate": 5.859375e-06, "loss": 0.466705322265625, "step": 101 }, { "epoch": 0.013789607097591888, "grad_norm": 12.714323043823242, "learning_rate": 5.91796875e-06, "loss": 0.348602294921875, "step": 102 }, { "epoch": 0.013924799324038869, "grad_norm": 9.004317283630371, "learning_rate": 5.9765625000000004e-06, "loss": 0.32762908935546875, "step": 103 }, { "epoch": 0.014059991550485847, "grad_norm": 6.390115737915039, "learning_rate": 6.03515625e-06, "loss": 0.367767333984375, "step": 104 }, { "epoch": 0.014195183776932827, "grad_norm": 5.2753520011901855, "learning_rate": 6.09375e-06, "loss": 0.340789794921875, "step": 105 }, { "epoch": 0.014330376003379806, "grad_norm": 19.965015411376953, "learning_rate": 6.15234375e-06, "loss": 0.4667205810546875, "step": 106 }, { "epoch": 0.014465568229826784, "grad_norm": 7.3258466720581055, "learning_rate": 6.2109375e-06, "loss": 0.25698089599609375, "step": 107 }, { "epoch": 0.014600760456273764, "grad_norm": 7.896075248718262, "learning_rate": 6.26953125e-06, "loss": 0.407379150390625, "step": 108 }, { "epoch": 0.014735952682720743, "grad_norm": 14.004000663757324, "learning_rate": 6.3281250000000005e-06, "loss": 0.39601898193359375, "step": 109 }, { "epoch": 0.014871144909167723, "grad_norm": 14.068887710571289, "learning_rate": 6.38671875e-06, "loss": 0.31365203857421875, "step": 110 }, { "epoch": 0.015006337135614702, "grad_norm": 4.65051794052124, "learning_rate": 6.4453125e-06, "loss": 0.426300048828125, "step": 111 }, { "epoch": 0.015141529362061682, "grad_norm": 19.288307189941406, "learning_rate": 6.50390625e-06, "loss": 0.362030029296875, "step": 112 }, { "epoch": 0.01527672158850866, "grad_norm": 17.15165138244629, "learning_rate": 6.5625e-06, "loss": 0.2597808837890625, "step": 113 }, { "epoch": 0.01541191381495564, "grad_norm": 9.220398902893066, "learning_rate": 6.6210937500000004e-06, "loss": 0.4727783203125, "step": 114 }, { "epoch": 0.015547106041402619, "grad_norm": 16.606584548950195, "learning_rate": 6.679687500000001e-06, "loss": 0.34545135498046875, "step": 115 }, { "epoch": 0.015682298267849597, "grad_norm": 12.121626853942871, "learning_rate": 6.73828125e-06, "loss": 0.32347869873046875, "step": 116 }, { "epoch": 0.01581749049429658, "grad_norm": 9.206586837768555, "learning_rate": 6.796875e-06, "loss": 0.2846336364746094, "step": 117 }, { "epoch": 0.015952682720743558, "grad_norm": 11.462008476257324, "learning_rate": 6.85546875e-06, "loss": 0.44615936279296875, "step": 118 }, { "epoch": 0.016087874947190536, "grad_norm": 7.873308181762695, "learning_rate": 6.9140625e-06, "loss": 0.317413330078125, "step": 119 }, { "epoch": 0.016223067173637515, "grad_norm": 11.945247650146484, "learning_rate": 6.9726562500000005e-06, "loss": 0.41915130615234375, "step": 120 }, { "epoch": 0.016358259400084497, "grad_norm": 8.758480072021484, "learning_rate": 7.03125e-06, "loss": 0.41268157958984375, "step": 121 }, { "epoch": 0.016493451626531475, "grad_norm": 16.99803352355957, "learning_rate": 7.08984375e-06, "loss": 0.33650970458984375, "step": 122 }, { "epoch": 0.016628643852978454, "grad_norm": 12.159713745117188, "learning_rate": 7.1484375e-06, "loss": 0.431549072265625, "step": 123 }, { "epoch": 0.016763836079425432, "grad_norm": 8.628973007202148, "learning_rate": 7.20703125e-06, "loss": 0.3750762939453125, "step": 124 }, { "epoch": 0.01689902830587241, "grad_norm": 15.568916320800781, "learning_rate": 7.265625e-06, "loss": 0.352691650390625, "step": 125 }, { "epoch": 0.017034220532319393, "grad_norm": 32.42605972290039, "learning_rate": 7.3242187500000006e-06, "loss": 0.4794044494628906, "step": 126 }, { "epoch": 0.01716941275876637, "grad_norm": 8.526758193969727, "learning_rate": 7.3828125e-06, "loss": 0.3136444091796875, "step": 127 }, { "epoch": 0.01730460498521335, "grad_norm": 12.648958206176758, "learning_rate": 7.44140625e-06, "loss": 0.272247314453125, "step": 128 }, { "epoch": 0.017439797211660328, "grad_norm": 9.088125228881836, "learning_rate": 7.5e-06, "loss": 0.25101470947265625, "step": 129 }, { "epoch": 0.01757498943810731, "grad_norm": 10.947595596313477, "learning_rate": 7.55859375e-06, "loss": 0.33487701416015625, "step": 130 }, { "epoch": 0.01771018166455429, "grad_norm": 8.987068176269531, "learning_rate": 7.6171875000000005e-06, "loss": 0.44268798828125, "step": 131 }, { "epoch": 0.017845373891001267, "grad_norm": 10.176579475402832, "learning_rate": 7.67578125e-06, "loss": 0.39456939697265625, "step": 132 }, { "epoch": 0.017980566117448245, "grad_norm": 4.725843906402588, "learning_rate": 7.734375e-06, "loss": 0.39962005615234375, "step": 133 }, { "epoch": 0.018115758343895227, "grad_norm": 5.729996681213379, "learning_rate": 7.792968750000001e-06, "loss": 0.4094390869140625, "step": 134 }, { "epoch": 0.018250950570342206, "grad_norm": 6.760279655456543, "learning_rate": 7.8515625e-06, "loss": 0.34267425537109375, "step": 135 }, { "epoch": 0.018386142796789184, "grad_norm": 6.717825412750244, "learning_rate": 7.91015625e-06, "loss": 0.38262939453125, "step": 136 }, { "epoch": 0.018521335023236163, "grad_norm": 8.859396934509277, "learning_rate": 7.96875e-06, "loss": 0.4053955078125, "step": 137 }, { "epoch": 0.018656527249683145, "grad_norm": 13.363924980163574, "learning_rate": 8.02734375e-06, "loss": 0.37969207763671875, "step": 138 }, { "epoch": 0.018791719476130123, "grad_norm": 7.235777854919434, "learning_rate": 8.085937500000001e-06, "loss": 0.32489013671875, "step": 139 }, { "epoch": 0.018926911702577102, "grad_norm": 6.2245564460754395, "learning_rate": 8.14453125e-06, "loss": 0.2818946838378906, "step": 140 }, { "epoch": 0.01906210392902408, "grad_norm": 3.3765194416046143, "learning_rate": 8.203125e-06, "loss": 0.3400154113769531, "step": 141 }, { "epoch": 0.01919729615547106, "grad_norm": 3.3864426612854004, "learning_rate": 8.26171875e-06, "loss": 0.372314453125, "step": 142 }, { "epoch": 0.01933248838191804, "grad_norm": 7.222830295562744, "learning_rate": 8.3203125e-06, "loss": 0.3372230529785156, "step": 143 }, { "epoch": 0.01946768060836502, "grad_norm": 9.870015144348145, "learning_rate": 8.37890625e-06, "loss": 0.3125762939453125, "step": 144 }, { "epoch": 0.019602872834811998, "grad_norm": 3.844804525375366, "learning_rate": 8.4375e-06, "loss": 0.40564727783203125, "step": 145 }, { "epoch": 0.019738065061258976, "grad_norm": 10.666752815246582, "learning_rate": 8.49609375e-06, "loss": 0.399871826171875, "step": 146 }, { "epoch": 0.019873257287705958, "grad_norm": 20.025733947753906, "learning_rate": 8.5546875e-06, "loss": 0.29644775390625, "step": 147 }, { "epoch": 0.020008449514152937, "grad_norm": 5.059605598449707, "learning_rate": 8.61328125e-06, "loss": 0.3897705078125, "step": 148 }, { "epoch": 0.020143641740599915, "grad_norm": 12.984992980957031, "learning_rate": 8.671875e-06, "loss": 0.32715606689453125, "step": 149 }, { "epoch": 0.020278833967046894, "grad_norm": 4.818148612976074, "learning_rate": 8.73046875e-06, "loss": 0.3247337341308594, "step": 150 }, { "epoch": 0.020414026193493875, "grad_norm": 4.039071083068848, "learning_rate": 8.789062500000001e-06, "loss": 0.34757232666015625, "step": 151 }, { "epoch": 0.020549218419940854, "grad_norm": 5.584234237670898, "learning_rate": 8.84765625e-06, "loss": 0.2400054931640625, "step": 152 }, { "epoch": 0.020684410646387832, "grad_norm": 5.080233573913574, "learning_rate": 8.90625e-06, "loss": 0.26808929443359375, "step": 153 }, { "epoch": 0.02081960287283481, "grad_norm": 9.186685562133789, "learning_rate": 8.96484375e-06, "loss": 0.3578472137451172, "step": 154 }, { "epoch": 0.020954795099281793, "grad_norm": 8.834890365600586, "learning_rate": 9.0234375e-06, "loss": 0.32218170166015625, "step": 155 }, { "epoch": 0.02108998732572877, "grad_norm": 29.870014190673828, "learning_rate": 9.082031250000001e-06, "loss": 0.430694580078125, "step": 156 }, { "epoch": 0.02122517955217575, "grad_norm": 22.970182418823242, "learning_rate": 9.140625e-06, "loss": 0.45813751220703125, "step": 157 }, { "epoch": 0.02136037177862273, "grad_norm": 17.33034896850586, "learning_rate": 9.19921875e-06, "loss": 0.31902313232421875, "step": 158 }, { "epoch": 0.021495564005069707, "grad_norm": 8.452795028686523, "learning_rate": 9.2578125e-06, "loss": 0.3018226623535156, "step": 159 }, { "epoch": 0.02163075623151669, "grad_norm": 11.49000358581543, "learning_rate": 9.31640625e-06, "loss": 0.277069091796875, "step": 160 }, { "epoch": 0.021765948457963667, "grad_norm": 12.22887134552002, "learning_rate": 9.375000000000001e-06, "loss": 0.2909584045410156, "step": 161 }, { "epoch": 0.021901140684410646, "grad_norm": 16.935884475708008, "learning_rate": 9.43359375e-06, "loss": 0.3690185546875, "step": 162 }, { "epoch": 0.022036332910857624, "grad_norm": 19.534664154052734, "learning_rate": 9.4921875e-06, "loss": 0.44385528564453125, "step": 163 }, { "epoch": 0.022171525137304606, "grad_norm": 11.166132926940918, "learning_rate": 9.55078125e-06, "loss": 0.3913116455078125, "step": 164 }, { "epoch": 0.022306717363751585, "grad_norm": 7.799185752868652, "learning_rate": 9.609375e-06, "loss": 0.2675018310546875, "step": 165 }, { "epoch": 0.022441909590198563, "grad_norm": 7.301191806793213, "learning_rate": 9.66796875e-06, "loss": 0.23796844482421875, "step": 166 }, { "epoch": 0.02257710181664554, "grad_norm": 12.863231658935547, "learning_rate": 9.7265625e-06, "loss": 0.417205810546875, "step": 167 }, { "epoch": 0.022712294043092524, "grad_norm": 5.220954895019531, "learning_rate": 9.78515625e-06, "loss": 0.29888153076171875, "step": 168 }, { "epoch": 0.022847486269539502, "grad_norm": 6.2648539543151855, "learning_rate": 9.84375e-06, "loss": 0.3386878967285156, "step": 169 }, { "epoch": 0.02298267849598648, "grad_norm": 11.405940055847168, "learning_rate": 9.90234375e-06, "loss": 0.36103057861328125, "step": 170 }, { "epoch": 0.02311787072243346, "grad_norm": 13.143427848815918, "learning_rate": 9.9609375e-06, "loss": 0.4598541259765625, "step": 171 }, { "epoch": 0.02325306294888044, "grad_norm": 9.1638822555542, "learning_rate": 1.001953125e-05, "loss": 0.413787841796875, "step": 172 }, { "epoch": 0.02338825517532742, "grad_norm": 14.425140380859375, "learning_rate": 1.0078125000000001e-05, "loss": 0.3013954162597656, "step": 173 }, { "epoch": 0.023523447401774398, "grad_norm": 7.103453636169434, "learning_rate": 1.013671875e-05, "loss": 0.3523521423339844, "step": 174 }, { "epoch": 0.023658639628221376, "grad_norm": 7.764557838439941, "learning_rate": 1.01953125e-05, "loss": 0.2942028045654297, "step": 175 }, { "epoch": 0.023793831854668355, "grad_norm": 9.54080867767334, "learning_rate": 1.025390625e-05, "loss": 0.4434051513671875, "step": 176 }, { "epoch": 0.023929024081115337, "grad_norm": 9.157366752624512, "learning_rate": 1.03125e-05, "loss": 0.31499481201171875, "step": 177 }, { "epoch": 0.024064216307562315, "grad_norm": 7.387535572052002, "learning_rate": 1.0371093750000001e-05, "loss": 0.3870658874511719, "step": 178 }, { "epoch": 0.024199408534009294, "grad_norm": 9.071150779724121, "learning_rate": 1.04296875e-05, "loss": 0.29233551025390625, "step": 179 }, { "epoch": 0.024334600760456272, "grad_norm": 8.732147216796875, "learning_rate": 1.048828125e-05, "loss": 0.3111305236816406, "step": 180 }, { "epoch": 0.024469792986903254, "grad_norm": 3.857217788696289, "learning_rate": 1.0546875e-05, "loss": 0.33007049560546875, "step": 181 }, { "epoch": 0.024604985213350233, "grad_norm": 4.270830154418945, "learning_rate": 1.060546875e-05, "loss": 0.3454132080078125, "step": 182 }, { "epoch": 0.02474017743979721, "grad_norm": 4.961788654327393, "learning_rate": 1.0664062500000001e-05, "loss": 0.4149169921875, "step": 183 }, { "epoch": 0.02487536966624419, "grad_norm": 9.476563453674316, "learning_rate": 1.072265625e-05, "loss": 0.34647369384765625, "step": 184 }, { "epoch": 0.02501056189269117, "grad_norm": 8.641980171203613, "learning_rate": 1.078125e-05, "loss": 0.38382720947265625, "step": 185 }, { "epoch": 0.02514575411913815, "grad_norm": 17.837678909301758, "learning_rate": 1.083984375e-05, "loss": 0.31372833251953125, "step": 186 }, { "epoch": 0.02528094634558513, "grad_norm": 6.171658039093018, "learning_rate": 1.08984375e-05, "loss": 0.24570274353027344, "step": 187 }, { "epoch": 0.025416138572032107, "grad_norm": 45.034725189208984, "learning_rate": 1.095703125e-05, "loss": 0.4607353210449219, "step": 188 }, { "epoch": 0.02555133079847909, "grad_norm": 29.19608497619629, "learning_rate": 1.1015625e-05, "loss": 0.3447914123535156, "step": 189 }, { "epoch": 0.025686523024926067, "grad_norm": 9.654741287231445, "learning_rate": 1.1074218750000001e-05, "loss": 0.34331512451171875, "step": 190 }, { "epoch": 0.025821715251373046, "grad_norm": 27.05508041381836, "learning_rate": 1.11328125e-05, "loss": 0.33905029296875, "step": 191 }, { "epoch": 0.025956907477820024, "grad_norm": 19.2808837890625, "learning_rate": 1.119140625e-05, "loss": 0.32193756103515625, "step": 192 }, { "epoch": 0.026092099704267006, "grad_norm": 6.524961471557617, "learning_rate": 1.125e-05, "loss": 0.2579345703125, "step": 193 }, { "epoch": 0.026227291930713985, "grad_norm": 26.02958106994629, "learning_rate": 1.130859375e-05, "loss": 0.3294353485107422, "step": 194 }, { "epoch": 0.026362484157160963, "grad_norm": 9.47587776184082, "learning_rate": 1.1367187500000001e-05, "loss": 0.30571746826171875, "step": 195 }, { "epoch": 0.026497676383607942, "grad_norm": 8.048364639282227, "learning_rate": 1.142578125e-05, "loss": 0.2755298614501953, "step": 196 }, { "epoch": 0.02663286861005492, "grad_norm": 17.279748916625977, "learning_rate": 1.1484375e-05, "loss": 0.2881507873535156, "step": 197 }, { "epoch": 0.026768060836501902, "grad_norm": 9.93814468383789, "learning_rate": 1.154296875e-05, "loss": 0.3548431396484375, "step": 198 }, { "epoch": 0.02690325306294888, "grad_norm": 9.75289535522461, "learning_rate": 1.16015625e-05, "loss": 0.25585174560546875, "step": 199 }, { "epoch": 0.02703844528939586, "grad_norm": 21.917634963989258, "learning_rate": 1.1660156250000001e-05, "loss": 0.3770751953125, "step": 200 }, { "epoch": 0.027173637515842838, "grad_norm": 10.445472717285156, "learning_rate": 1.171875e-05, "loss": 0.33472442626953125, "step": 201 }, { "epoch": 0.02730882974228982, "grad_norm": 28.41674041748047, "learning_rate": 1.177734375e-05, "loss": 0.3972015380859375, "step": 202 }, { "epoch": 0.027444021968736798, "grad_norm": 16.933401107788086, "learning_rate": 1.18359375e-05, "loss": 0.2882823944091797, "step": 203 }, { "epoch": 0.027579214195183777, "grad_norm": 8.066474914550781, "learning_rate": 1.189453125e-05, "loss": 0.28705596923828125, "step": 204 }, { "epoch": 0.027714406421630755, "grad_norm": 8.025276184082031, "learning_rate": 1.1953125000000001e-05, "loss": 0.3666839599609375, "step": 205 }, { "epoch": 0.027849598648077737, "grad_norm": 13.482354164123535, "learning_rate": 1.201171875e-05, "loss": 0.283447265625, "step": 206 }, { "epoch": 0.027984790874524716, "grad_norm": 8.359320640563965, "learning_rate": 1.20703125e-05, "loss": 0.331634521484375, "step": 207 }, { "epoch": 0.028119983100971694, "grad_norm": 14.367286682128906, "learning_rate": 1.212890625e-05, "loss": 0.3609771728515625, "step": 208 }, { "epoch": 0.028255175327418672, "grad_norm": 17.017526626586914, "learning_rate": 1.21875e-05, "loss": 0.3172340393066406, "step": 209 }, { "epoch": 0.028390367553865654, "grad_norm": 12.142645835876465, "learning_rate": 1.224609375e-05, "loss": 0.393798828125, "step": 210 }, { "epoch": 0.028525559780312633, "grad_norm": 10.331498146057129, "learning_rate": 1.23046875e-05, "loss": 0.2973442077636719, "step": 211 }, { "epoch": 0.02866075200675961, "grad_norm": 15.544872283935547, "learning_rate": 1.2363281250000001e-05, "loss": 0.4059600830078125, "step": 212 }, { "epoch": 0.02879594423320659, "grad_norm": 6.606779098510742, "learning_rate": 1.2421875e-05, "loss": 0.3096923828125, "step": 213 }, { "epoch": 0.02893113645965357, "grad_norm": 5.737157821655273, "learning_rate": 1.248046875e-05, "loss": 0.32675933837890625, "step": 214 }, { "epoch": 0.02906632868610055, "grad_norm": 5.541354656219482, "learning_rate": 1.25390625e-05, "loss": 0.2597007751464844, "step": 215 }, { "epoch": 0.02920152091254753, "grad_norm": 5.784581661224365, "learning_rate": 1.259765625e-05, "loss": 0.23841476440429688, "step": 216 }, { "epoch": 0.029336713138994507, "grad_norm": 9.065890312194824, "learning_rate": 1.2656250000000001e-05, "loss": 0.2053375244140625, "step": 217 }, { "epoch": 0.029471905365441486, "grad_norm": 5.764812469482422, "learning_rate": 1.271484375e-05, "loss": 0.3548126220703125, "step": 218 }, { "epoch": 0.029607097591888468, "grad_norm": 12.094768524169922, "learning_rate": 1.27734375e-05, "loss": 0.2837066650390625, "step": 219 }, { "epoch": 0.029742289818335446, "grad_norm": 15.443978309631348, "learning_rate": 1.283203125e-05, "loss": 0.3972358703613281, "step": 220 }, { "epoch": 0.029877482044782425, "grad_norm": 8.34568977355957, "learning_rate": 1.2890625e-05, "loss": 0.2913818359375, "step": 221 }, { "epoch": 0.030012674271229403, "grad_norm": 24.16734504699707, "learning_rate": 1.2949218750000001e-05, "loss": 0.2877616882324219, "step": 222 }, { "epoch": 0.030147866497676385, "grad_norm": 19.035968780517578, "learning_rate": 1.30078125e-05, "loss": 0.2989997863769531, "step": 223 }, { "epoch": 0.030283058724123364, "grad_norm": 7.522139072418213, "learning_rate": 1.306640625e-05, "loss": 0.24536705017089844, "step": 224 }, { "epoch": 0.030418250950570342, "grad_norm": 13.014775276184082, "learning_rate": 1.3125e-05, "loss": 0.2332782745361328, "step": 225 }, { "epoch": 0.03055344317701732, "grad_norm": 25.451913833618164, "learning_rate": 1.318359375e-05, "loss": 0.4199247360229492, "step": 226 }, { "epoch": 0.030688635403464302, "grad_norm": 11.6997652053833, "learning_rate": 1.3242187500000001e-05, "loss": 0.27001953125, "step": 227 }, { "epoch": 0.03082382762991128, "grad_norm": 4.29236364364624, "learning_rate": 1.330078125e-05, "loss": 0.3325614929199219, "step": 228 }, { "epoch": 0.03095901985635826, "grad_norm": 14.468042373657227, "learning_rate": 1.3359375000000001e-05, "loss": 0.3017425537109375, "step": 229 }, { "epoch": 0.031094212082805238, "grad_norm": 34.97588348388672, "learning_rate": 1.341796875e-05, "loss": 0.2820014953613281, "step": 230 }, { "epoch": 0.031229404309252216, "grad_norm": 31.7833309173584, "learning_rate": 1.34765625e-05, "loss": 0.30699920654296875, "step": 231 }, { "epoch": 0.031364596535699195, "grad_norm": 9.370939254760742, "learning_rate": 1.353515625e-05, "loss": 0.29029083251953125, "step": 232 }, { "epoch": 0.03149978876214617, "grad_norm": 18.17502784729004, "learning_rate": 1.359375e-05, "loss": 0.3473472595214844, "step": 233 }, { "epoch": 0.03163498098859316, "grad_norm": 16.00482749938965, "learning_rate": 1.3652343750000001e-05, "loss": 0.34558868408203125, "step": 234 }, { "epoch": 0.03177017321504014, "grad_norm": 10.303112030029297, "learning_rate": 1.37109375e-05, "loss": 0.23703765869140625, "step": 235 }, { "epoch": 0.031905365441487116, "grad_norm": 3.414919853210449, "learning_rate": 1.376953125e-05, "loss": 0.170166015625, "step": 236 }, { "epoch": 0.032040557667934094, "grad_norm": 17.686811447143555, "learning_rate": 1.3828125e-05, "loss": 0.3130531311035156, "step": 237 }, { "epoch": 0.03217574989438107, "grad_norm": 18.49032211303711, "learning_rate": 1.388671875e-05, "loss": 0.29993247985839844, "step": 238 }, { "epoch": 0.03231094212082805, "grad_norm": 9.850604057312012, "learning_rate": 1.3945312500000001e-05, "loss": 0.2655344009399414, "step": 239 }, { "epoch": 0.03244613434727503, "grad_norm": 6.737331867218018, "learning_rate": 1.400390625e-05, "loss": 0.2937583923339844, "step": 240 }, { "epoch": 0.03258132657372201, "grad_norm": 4.894274711608887, "learning_rate": 1.40625e-05, "loss": 0.18481063842773438, "step": 241 }, { "epoch": 0.032716518800168994, "grad_norm": 3.0006649494171143, "learning_rate": 1.412109375e-05, "loss": 0.24634361267089844, "step": 242 }, { "epoch": 0.03285171102661597, "grad_norm": 5.40485954284668, "learning_rate": 1.41796875e-05, "loss": 0.3110008239746094, "step": 243 }, { "epoch": 0.03298690325306295, "grad_norm": 3.7458879947662354, "learning_rate": 1.4238281250000001e-05, "loss": 0.2802543640136719, "step": 244 }, { "epoch": 0.03312209547950993, "grad_norm": 7.09403657913208, "learning_rate": 1.4296875e-05, "loss": 0.324951171875, "step": 245 }, { "epoch": 0.03325728770595691, "grad_norm": 10.268945693969727, "learning_rate": 1.435546875e-05, "loss": 0.2251300811767578, "step": 246 }, { "epoch": 0.033392479932403886, "grad_norm": 16.29755973815918, "learning_rate": 1.44140625e-05, "loss": 0.3162803649902344, "step": 247 }, { "epoch": 0.033527672158850864, "grad_norm": 18.552701950073242, "learning_rate": 1.447265625e-05, "loss": 0.25196361541748047, "step": 248 }, { "epoch": 0.03366286438529784, "grad_norm": 6.371397495269775, "learning_rate": 1.453125e-05, "loss": 0.32024192810058594, "step": 249 }, { "epoch": 0.03379805661174482, "grad_norm": 21.615142822265625, "learning_rate": 1.458984375e-05, "loss": 0.31653594970703125, "step": 250 }, { "epoch": 0.03393324883819181, "grad_norm": 24.584264755249023, "learning_rate": 1.4648437500000001e-05, "loss": 0.3035125732421875, "step": 251 }, { "epoch": 0.034068441064638785, "grad_norm": 7.3283796310424805, "learning_rate": 1.470703125e-05, "loss": 0.2931327819824219, "step": 252 }, { "epoch": 0.034203633291085764, "grad_norm": 11.640965461730957, "learning_rate": 1.4765625e-05, "loss": 0.22333145141601562, "step": 253 }, { "epoch": 0.03433882551753274, "grad_norm": 11.33310604095459, "learning_rate": 1.482421875e-05, "loss": 0.33174896240234375, "step": 254 }, { "epoch": 0.03447401774397972, "grad_norm": 10.178672790527344, "learning_rate": 1.48828125e-05, "loss": 0.30368804931640625, "step": 255 }, { "epoch": 0.0346092099704267, "grad_norm": 12.790731430053711, "learning_rate": 1.4941406250000001e-05, "loss": 0.331085205078125, "step": 256 }, { "epoch": 0.03474440219687368, "grad_norm": 8.673245429992676, "learning_rate": 1.5e-05, "loss": 0.2547416687011719, "step": 257 }, { "epoch": 0.034879594423320656, "grad_norm": 4.6530866622924805, "learning_rate": 1.505859375e-05, "loss": 0.2751197814941406, "step": 258 }, { "epoch": 0.03501478664976764, "grad_norm": 6.476203918457031, "learning_rate": 1.51171875e-05, "loss": 0.28345680236816406, "step": 259 }, { "epoch": 0.03514997887621462, "grad_norm": 14.593551635742188, "learning_rate": 1.517578125e-05, "loss": 0.3824882507324219, "step": 260 }, { "epoch": 0.0352851711026616, "grad_norm": 6.374526500701904, "learning_rate": 1.5234375000000001e-05, "loss": 0.28825950622558594, "step": 261 }, { "epoch": 0.03542036332910858, "grad_norm": 3.9945902824401855, "learning_rate": 1.529296875e-05, "loss": 0.2472667694091797, "step": 262 }, { "epoch": 0.035555555555555556, "grad_norm": 2.7736659049987793, "learning_rate": 1.53515625e-05, "loss": 0.18715667724609375, "step": 263 }, { "epoch": 0.035690747782002534, "grad_norm": 4.772533416748047, "learning_rate": 1.541015625e-05, "loss": 0.2139892578125, "step": 264 }, { "epoch": 0.03582594000844951, "grad_norm": 5.28697395324707, "learning_rate": 1.546875e-05, "loss": 0.271026611328125, "step": 265 }, { "epoch": 0.03596113223489649, "grad_norm": 4.601757526397705, "learning_rate": 1.552734375e-05, "loss": 0.3020343780517578, "step": 266 }, { "epoch": 0.03609632446134347, "grad_norm": 20.658479690551758, "learning_rate": 1.5585937500000002e-05, "loss": 0.3063621520996094, "step": 267 }, { "epoch": 0.036231516687790455, "grad_norm": 17.867830276489258, "learning_rate": 1.564453125e-05, "loss": 0.245086669921875, "step": 268 }, { "epoch": 0.03636670891423743, "grad_norm": 17.761445999145508, "learning_rate": 1.5703125e-05, "loss": 0.27483367919921875, "step": 269 }, { "epoch": 0.03650190114068441, "grad_norm": 8.101611137390137, "learning_rate": 1.576171875e-05, "loss": 0.27150917053222656, "step": 270 }, { "epoch": 0.03663709336713139, "grad_norm": 9.16867733001709, "learning_rate": 1.58203125e-05, "loss": 0.29632568359375, "step": 271 }, { "epoch": 0.03677228559357837, "grad_norm": 5.931591987609863, "learning_rate": 1.587890625e-05, "loss": 0.253448486328125, "step": 272 }, { "epoch": 0.03690747782002535, "grad_norm": 9.642913818359375, "learning_rate": 1.59375e-05, "loss": 0.3577842712402344, "step": 273 }, { "epoch": 0.037042670046472326, "grad_norm": 8.441208839416504, "learning_rate": 1.599609375e-05, "loss": 0.2773256301879883, "step": 274 }, { "epoch": 0.037177862272919304, "grad_norm": 7.997591018676758, "learning_rate": 1.60546875e-05, "loss": 0.3258819580078125, "step": 275 }, { "epoch": 0.03731305449936629, "grad_norm": 2.695664167404175, "learning_rate": 1.611328125e-05, "loss": 0.2272777557373047, "step": 276 }, { "epoch": 0.03744824672581327, "grad_norm": 15.456908226013184, "learning_rate": 1.6171875000000002e-05, "loss": 0.2947044372558594, "step": 277 }, { "epoch": 0.03758343895226025, "grad_norm": 11.73242473602295, "learning_rate": 1.623046875e-05, "loss": 0.24881935119628906, "step": 278 }, { "epoch": 0.037718631178707225, "grad_norm": 3.7097480297088623, "learning_rate": 1.62890625e-05, "loss": 0.2575969696044922, "step": 279 }, { "epoch": 0.037853823405154204, "grad_norm": 6.957103252410889, "learning_rate": 1.634765625e-05, "loss": 0.26689910888671875, "step": 280 }, { "epoch": 0.03798901563160118, "grad_norm": 10.129047393798828, "learning_rate": 1.640625e-05, "loss": 0.258697509765625, "step": 281 }, { "epoch": 0.03812420785804816, "grad_norm": 4.334877967834473, "learning_rate": 1.646484375e-05, "loss": 0.2727241516113281, "step": 282 }, { "epoch": 0.03825940008449514, "grad_norm": 7.06351900100708, "learning_rate": 1.65234375e-05, "loss": 0.24359893798828125, "step": 283 }, { "epoch": 0.03839459231094212, "grad_norm": 9.38354778289795, "learning_rate": 1.6582031250000002e-05, "loss": 0.24840164184570312, "step": 284 }, { "epoch": 0.0385297845373891, "grad_norm": 3.335857391357422, "learning_rate": 1.6640625e-05, "loss": 0.21318435668945312, "step": 285 }, { "epoch": 0.03866497676383608, "grad_norm": 4.352935314178467, "learning_rate": 1.669921875e-05, "loss": 0.2790679931640625, "step": 286 }, { "epoch": 0.03880016899028306, "grad_norm": 2.930211305618286, "learning_rate": 1.67578125e-05, "loss": 0.23891067504882812, "step": 287 }, { "epoch": 0.03893536121673004, "grad_norm": 3.004099130630493, "learning_rate": 1.681640625e-05, "loss": 0.2674732208251953, "step": 288 }, { "epoch": 0.03907055344317702, "grad_norm": 2.01277232170105, "learning_rate": 1.6875e-05, "loss": 0.17083263397216797, "step": 289 }, { "epoch": 0.039205745669623995, "grad_norm": 7.299152374267578, "learning_rate": 1.693359375e-05, "loss": 0.24252033233642578, "step": 290 }, { "epoch": 0.039340937896070974, "grad_norm": 3.011568546295166, "learning_rate": 1.69921875e-05, "loss": 0.20447540283203125, "step": 291 }, { "epoch": 0.03947613012251795, "grad_norm": 23.560914993286133, "learning_rate": 1.705078125e-05, "loss": 0.36942291259765625, "step": 292 }, { "epoch": 0.03961132234896494, "grad_norm": 21.460453033447266, "learning_rate": 1.7109375e-05, "loss": 0.3147735595703125, "step": 293 }, { "epoch": 0.039746514575411916, "grad_norm": 10.668805122375488, "learning_rate": 1.7167968750000002e-05, "loss": 0.2743339538574219, "step": 294 }, { "epoch": 0.039881706801858895, "grad_norm": 13.089706420898438, "learning_rate": 1.72265625e-05, "loss": 0.3108539581298828, "step": 295 }, { "epoch": 0.04001689902830587, "grad_norm": 7.003668785095215, "learning_rate": 1.728515625e-05, "loss": 0.16877460479736328, "step": 296 }, { "epoch": 0.04015209125475285, "grad_norm": 11.977317810058594, "learning_rate": 1.734375e-05, "loss": 0.32431793212890625, "step": 297 }, { "epoch": 0.04028728348119983, "grad_norm": 5.474246025085449, "learning_rate": 1.740234375e-05, "loss": 0.3123817443847656, "step": 298 }, { "epoch": 0.04042247570764681, "grad_norm": 10.325031280517578, "learning_rate": 1.74609375e-05, "loss": 0.2569923400878906, "step": 299 }, { "epoch": 0.04055766793409379, "grad_norm": 10.47744083404541, "learning_rate": 1.751953125e-05, "loss": 0.301055908203125, "step": 300 }, { "epoch": 0.040692860160540766, "grad_norm": 3.0338551998138428, "learning_rate": 1.7578125000000002e-05, "loss": 0.21859359741210938, "step": 301 }, { "epoch": 0.04082805238698775, "grad_norm": 12.519035339355469, "learning_rate": 1.763671875e-05, "loss": 0.23868560791015625, "step": 302 }, { "epoch": 0.04096324461343473, "grad_norm": 10.300127029418945, "learning_rate": 1.76953125e-05, "loss": 0.2349681854248047, "step": 303 }, { "epoch": 0.04109843683988171, "grad_norm": 8.500965118408203, "learning_rate": 1.775390625e-05, "loss": 0.30113983154296875, "step": 304 }, { "epoch": 0.041233629066328686, "grad_norm": 2.5798254013061523, "learning_rate": 1.78125e-05, "loss": 0.25214195251464844, "step": 305 }, { "epoch": 0.041368821292775665, "grad_norm": 7.849074840545654, "learning_rate": 1.787109375e-05, "loss": 0.2838611602783203, "step": 306 }, { "epoch": 0.04150401351922264, "grad_norm": 8.086491584777832, "learning_rate": 1.79296875e-05, "loss": 0.24018478393554688, "step": 307 }, { "epoch": 0.04163920574566962, "grad_norm": 7.702834606170654, "learning_rate": 1.798828125e-05, "loss": 0.31969451904296875, "step": 308 }, { "epoch": 0.0417743979721166, "grad_norm": 3.2762339115142822, "learning_rate": 1.8046875e-05, "loss": 0.2358264923095703, "step": 309 }, { "epoch": 0.041909590198563586, "grad_norm": 4.388336658477783, "learning_rate": 1.810546875e-05, "loss": 0.27145957946777344, "step": 310 }, { "epoch": 0.042044782425010564, "grad_norm": 13.492469787597656, "learning_rate": 1.8164062500000002e-05, "loss": 0.3591728210449219, "step": 311 }, { "epoch": 0.04217997465145754, "grad_norm": 6.579132556915283, "learning_rate": 1.822265625e-05, "loss": 0.3139381408691406, "step": 312 }, { "epoch": 0.04231516687790452, "grad_norm": 4.826672554016113, "learning_rate": 1.828125e-05, "loss": 0.2637805938720703, "step": 313 }, { "epoch": 0.0424503591043515, "grad_norm": 7.286214828491211, "learning_rate": 1.833984375e-05, "loss": 0.17613601684570312, "step": 314 }, { "epoch": 0.04258555133079848, "grad_norm": 8.452092170715332, "learning_rate": 1.83984375e-05, "loss": 0.224517822265625, "step": 315 }, { "epoch": 0.04272074355724546, "grad_norm": 7.877833366394043, "learning_rate": 1.845703125e-05, "loss": 0.23193931579589844, "step": 316 }, { "epoch": 0.042855935783692435, "grad_norm": 7.8617753982543945, "learning_rate": 1.8515625e-05, "loss": 0.25810813903808594, "step": 317 }, { "epoch": 0.042991128010139414, "grad_norm": 7.064297199249268, "learning_rate": 1.8574218750000002e-05, "loss": 0.262176513671875, "step": 318 }, { "epoch": 0.0431263202365864, "grad_norm": 4.151225566864014, "learning_rate": 1.86328125e-05, "loss": 0.2243175506591797, "step": 319 }, { "epoch": 0.04326151246303338, "grad_norm": 3.794111490249634, "learning_rate": 1.869140625e-05, "loss": 0.20331192016601562, "step": 320 }, { "epoch": 0.043396704689480356, "grad_norm": 2.461073875427246, "learning_rate": 1.8750000000000002e-05, "loss": 0.21922779083251953, "step": 321 }, { "epoch": 0.043531896915927334, "grad_norm": 13.333686828613281, "learning_rate": 1.880859375e-05, "loss": 0.33484649658203125, "step": 322 }, { "epoch": 0.04366708914237431, "grad_norm": 2.3405187129974365, "learning_rate": 1.88671875e-05, "loss": 0.2036457061767578, "step": 323 }, { "epoch": 0.04380228136882129, "grad_norm": 2.2139670848846436, "learning_rate": 1.892578125e-05, "loss": 0.24195098876953125, "step": 324 }, { "epoch": 0.04393747359526827, "grad_norm": 6.235994815826416, "learning_rate": 1.8984375e-05, "loss": 0.2675590515136719, "step": 325 }, { "epoch": 0.04407266582171525, "grad_norm": 7.546850204467773, "learning_rate": 1.904296875e-05, "loss": 0.26372528076171875, "step": 326 }, { "epoch": 0.044207858048162234, "grad_norm": 3.1368439197540283, "learning_rate": 1.91015625e-05, "loss": 0.16007423400878906, "step": 327 }, { "epoch": 0.04434305027460921, "grad_norm": 3.7086551189422607, "learning_rate": 1.9160156250000002e-05, "loss": 0.21489334106445312, "step": 328 }, { "epoch": 0.04447824250105619, "grad_norm": 4.261851787567139, "learning_rate": 1.921875e-05, "loss": 0.2682380676269531, "step": 329 }, { "epoch": 0.04461343472750317, "grad_norm": 8.233115196228027, "learning_rate": 1.927734375e-05, "loss": 0.23845291137695312, "step": 330 }, { "epoch": 0.04474862695395015, "grad_norm": 4.965247631072998, "learning_rate": 1.93359375e-05, "loss": 0.2496166229248047, "step": 331 }, { "epoch": 0.044883819180397126, "grad_norm": 7.692807197570801, "learning_rate": 1.939453125e-05, "loss": 0.3063468933105469, "step": 332 }, { "epoch": 0.045019011406844105, "grad_norm": 6.241950988769531, "learning_rate": 1.9453125e-05, "loss": 0.1916065216064453, "step": 333 }, { "epoch": 0.04515420363329108, "grad_norm": 4.692477226257324, "learning_rate": 1.951171875e-05, "loss": 0.3115119934082031, "step": 334 }, { "epoch": 0.04528939585973806, "grad_norm": 5.504664421081543, "learning_rate": 1.95703125e-05, "loss": 0.24482059478759766, "step": 335 }, { "epoch": 0.04542458808618505, "grad_norm": 7.126749515533447, "learning_rate": 1.962890625e-05, "loss": 0.29393482208251953, "step": 336 }, { "epoch": 0.045559780312632026, "grad_norm": 2.8093347549438477, "learning_rate": 1.96875e-05, "loss": 0.18366432189941406, "step": 337 }, { "epoch": 0.045694972539079004, "grad_norm": 3.0783469676971436, "learning_rate": 1.9746093750000002e-05, "loss": 0.2876453399658203, "step": 338 }, { "epoch": 0.04583016476552598, "grad_norm": 3.5758750438690186, "learning_rate": 1.98046875e-05, "loss": 0.1931171417236328, "step": 339 }, { "epoch": 0.04596535699197296, "grad_norm": 2.6777350902557373, "learning_rate": 1.986328125e-05, "loss": 0.16173744201660156, "step": 340 }, { "epoch": 0.04610054921841994, "grad_norm": 14.727062225341797, "learning_rate": 1.9921875e-05, "loss": 0.34665393829345703, "step": 341 }, { "epoch": 0.04623574144486692, "grad_norm": 6.252585411071777, "learning_rate": 1.998046875e-05, "loss": 0.2133960723876953, "step": 342 }, { "epoch": 0.046370933671313896, "grad_norm": 6.252894401550293, "learning_rate": 2.00390625e-05, "loss": 0.231964111328125, "step": 343 }, { "epoch": 0.04650612589776088, "grad_norm": 9.064559936523438, "learning_rate": 2.009765625e-05, "loss": 0.269866943359375, "step": 344 }, { "epoch": 0.04664131812420786, "grad_norm": 2.912302017211914, "learning_rate": 2.0156250000000002e-05, "loss": 0.24353790283203125, "step": 345 }, { "epoch": 0.04677651035065484, "grad_norm": 9.062849044799805, "learning_rate": 2.021484375e-05, "loss": 0.23482990264892578, "step": 346 }, { "epoch": 0.04691170257710182, "grad_norm": 10.70043659210205, "learning_rate": 2.02734375e-05, "loss": 0.23520183563232422, "step": 347 }, { "epoch": 0.047046894803548796, "grad_norm": 12.147732734680176, "learning_rate": 2.033203125e-05, "loss": 0.3428192138671875, "step": 348 }, { "epoch": 0.047182087029995774, "grad_norm": 3.205064535140991, "learning_rate": 2.0390625e-05, "loss": 0.25203704833984375, "step": 349 }, { "epoch": 0.04731727925644275, "grad_norm": 12.81641960144043, "learning_rate": 2.044921875e-05, "loss": 0.3087310791015625, "step": 350 }, { "epoch": 0.04745247148288973, "grad_norm": 12.45585823059082, "learning_rate": 2.05078125e-05, "loss": 0.2872276306152344, "step": 351 }, { "epoch": 0.04758766370933671, "grad_norm": 8.705986022949219, "learning_rate": 2.056640625e-05, "loss": 0.22833251953125, "step": 352 }, { "epoch": 0.047722855935783695, "grad_norm": 1.7599164247512817, "learning_rate": 2.0625e-05, "loss": 0.22316741943359375, "step": 353 }, { "epoch": 0.047858048162230674, "grad_norm": 4.0907182693481445, "learning_rate": 2.068359375e-05, "loss": 0.18764877319335938, "step": 354 }, { "epoch": 0.04799324038867765, "grad_norm": 6.2126593589782715, "learning_rate": 2.0742187500000002e-05, "loss": 0.20470428466796875, "step": 355 }, { "epoch": 0.04812843261512463, "grad_norm": 3.4249207973480225, "learning_rate": 2.080078125e-05, "loss": 0.245849609375, "step": 356 }, { "epoch": 0.04826362484157161, "grad_norm": 3.2741127014160156, "learning_rate": 2.0859375e-05, "loss": 0.23532485961914062, "step": 357 }, { "epoch": 0.04839881706801859, "grad_norm": 5.984785079956055, "learning_rate": 2.091796875e-05, "loss": 0.22047042846679688, "step": 358 }, { "epoch": 0.048534009294465566, "grad_norm": 4.98041296005249, "learning_rate": 2.09765625e-05, "loss": 0.22716712951660156, "step": 359 }, { "epoch": 0.048669201520912544, "grad_norm": 3.615262508392334, "learning_rate": 2.103515625e-05, "loss": 0.17584776878356934, "step": 360 }, { "epoch": 0.04880439374735953, "grad_norm": 6.543410778045654, "learning_rate": 2.109375e-05, "loss": 0.2426910400390625, "step": 361 }, { "epoch": 0.04893958597380651, "grad_norm": 10.300976753234863, "learning_rate": 2.1152343750000002e-05, "loss": 0.3159904479980469, "step": 362 }, { "epoch": 0.04907477820025349, "grad_norm": 3.458944082260132, "learning_rate": 2.12109375e-05, "loss": 0.3029327392578125, "step": 363 }, { "epoch": 0.049209970426700465, "grad_norm": 3.7983591556549072, "learning_rate": 2.126953125e-05, "loss": 0.26883888244628906, "step": 364 }, { "epoch": 0.049345162653147444, "grad_norm": 2.6477882862091064, "learning_rate": 2.1328125000000002e-05, "loss": 0.22419357299804688, "step": 365 }, { "epoch": 0.04948035487959442, "grad_norm": 8.004109382629395, "learning_rate": 2.138671875e-05, "loss": 0.3123588562011719, "step": 366 }, { "epoch": 0.0496155471060414, "grad_norm": 4.6794867515563965, "learning_rate": 2.14453125e-05, "loss": 0.28144264221191406, "step": 367 }, { "epoch": 0.04975073933248838, "grad_norm": 2.9711503982543945, "learning_rate": 2.150390625e-05, "loss": 0.3131561279296875, "step": 368 }, { "epoch": 0.049885931558935365, "grad_norm": 2.825662851333618, "learning_rate": 2.15625e-05, "loss": 0.3069877624511719, "step": 369 }, { "epoch": 0.05002112378538234, "grad_norm": 2.660505771636963, "learning_rate": 2.162109375e-05, "loss": 0.18547439575195312, "step": 370 }, { "epoch": 0.05015631601182932, "grad_norm": 3.1826038360595703, "learning_rate": 2.16796875e-05, "loss": 0.23659896850585938, "step": 371 }, { "epoch": 0.0502915082382763, "grad_norm": 2.21543550491333, "learning_rate": 2.1738281250000002e-05, "loss": 0.2086467742919922, "step": 372 }, { "epoch": 0.05042670046472328, "grad_norm": 3.2628138065338135, "learning_rate": 2.1796875e-05, "loss": 0.25342559814453125, "step": 373 }, { "epoch": 0.05056189269117026, "grad_norm": 2.196044921875, "learning_rate": 2.185546875e-05, "loss": 0.23962783813476562, "step": 374 }, { "epoch": 0.050697084917617236, "grad_norm": 9.615337371826172, "learning_rate": 2.19140625e-05, "loss": 0.2426605224609375, "step": 375 }, { "epoch": 0.050832277144064214, "grad_norm": 6.569389820098877, "learning_rate": 2.197265625e-05, "loss": 0.26476287841796875, "step": 376 }, { "epoch": 0.05096746937051119, "grad_norm": 4.2048115730285645, "learning_rate": 2.203125e-05, "loss": 0.30292510986328125, "step": 377 }, { "epoch": 0.05110266159695818, "grad_norm": 6.898926734924316, "learning_rate": 2.208984375e-05, "loss": 0.23542022705078125, "step": 378 }, { "epoch": 0.051237853823405156, "grad_norm": 4.4261274337768555, "learning_rate": 2.2148437500000002e-05, "loss": 0.22298049926757812, "step": 379 }, { "epoch": 0.051373046049852135, "grad_norm": 3.0320937633514404, "learning_rate": 2.220703125e-05, "loss": 0.3094482421875, "step": 380 }, { "epoch": 0.05150823827629911, "grad_norm": 6.120395183563232, "learning_rate": 2.2265625e-05, "loss": 0.26009178161621094, "step": 381 }, { "epoch": 0.05164343050274609, "grad_norm": 2.3273556232452393, "learning_rate": 2.2324218750000002e-05, "loss": 0.1851358413696289, "step": 382 }, { "epoch": 0.05177862272919307, "grad_norm": 8.57958698272705, "learning_rate": 2.23828125e-05, "loss": 0.32439231872558594, "step": 383 }, { "epoch": 0.05191381495564005, "grad_norm": 5.873696804046631, "learning_rate": 2.244140625e-05, "loss": 0.18160438537597656, "step": 384 }, { "epoch": 0.05204900718208703, "grad_norm": 3.3337645530700684, "learning_rate": 2.25e-05, "loss": 0.2522258758544922, "step": 385 }, { "epoch": 0.05218419940853401, "grad_norm": 2.935910701751709, "learning_rate": 2.255859375e-05, "loss": 0.21403980255126953, "step": 386 }, { "epoch": 0.05231939163498099, "grad_norm": 1.5019489526748657, "learning_rate": 2.26171875e-05, "loss": 0.19890975952148438, "step": 387 }, { "epoch": 0.05245458386142797, "grad_norm": 2.947593927383423, "learning_rate": 2.267578125e-05, "loss": 0.22186899185180664, "step": 388 }, { "epoch": 0.05258977608787495, "grad_norm": 4.533196449279785, "learning_rate": 2.2734375000000002e-05, "loss": 0.24439239501953125, "step": 389 }, { "epoch": 0.05272496831432193, "grad_norm": 2.6896891593933105, "learning_rate": 2.279296875e-05, "loss": 0.22571945190429688, "step": 390 }, { "epoch": 0.052860160540768905, "grad_norm": 6.780669689178467, "learning_rate": 2.28515625e-05, "loss": 0.14905357360839844, "step": 391 }, { "epoch": 0.052995352767215884, "grad_norm": 5.1014814376831055, "learning_rate": 2.291015625e-05, "loss": 0.21990394592285156, "step": 392 }, { "epoch": 0.05313054499366286, "grad_norm": 2.7403886318206787, "learning_rate": 2.296875e-05, "loss": 0.1731433868408203, "step": 393 }, { "epoch": 0.05326573722010984, "grad_norm": 3.225177049636841, "learning_rate": 2.302734375e-05, "loss": 0.19796180725097656, "step": 394 }, { "epoch": 0.053400929446556826, "grad_norm": 5.5813703536987305, "learning_rate": 2.30859375e-05, "loss": 0.2199115753173828, "step": 395 }, { "epoch": 0.053536121673003804, "grad_norm": 5.583347320556641, "learning_rate": 2.3144531250000002e-05, "loss": 0.21397972106933594, "step": 396 }, { "epoch": 0.05367131389945078, "grad_norm": 5.987549304962158, "learning_rate": 2.3203125e-05, "loss": 0.290771484375, "step": 397 }, { "epoch": 0.05380650612589776, "grad_norm": 5.06838846206665, "learning_rate": 2.326171875e-05, "loss": 0.23844623565673828, "step": 398 }, { "epoch": 0.05394169835234474, "grad_norm": 3.899824619293213, "learning_rate": 2.3320312500000002e-05, "loss": 0.23722076416015625, "step": 399 }, { "epoch": 0.05407689057879172, "grad_norm": 4.47520112991333, "learning_rate": 2.337890625e-05, "loss": 0.2806396484375, "step": 400 }, { "epoch": 0.0542120828052387, "grad_norm": 2.9857523441314697, "learning_rate": 2.34375e-05, "loss": 0.265472412109375, "step": 401 }, { "epoch": 0.054347275031685675, "grad_norm": 7.4022016525268555, "learning_rate": 2.349609375e-05, "loss": 0.29033660888671875, "step": 402 }, { "epoch": 0.05448246725813266, "grad_norm": 6.564642429351807, "learning_rate": 2.35546875e-05, "loss": 0.18271923065185547, "step": 403 }, { "epoch": 0.05461765948457964, "grad_norm": 2.413015365600586, "learning_rate": 2.361328125e-05, "loss": 0.2622261047363281, "step": 404 }, { "epoch": 0.05475285171102662, "grad_norm": 3.002732038497925, "learning_rate": 2.3671875e-05, "loss": 0.13452529907226562, "step": 405 }, { "epoch": 0.054888043937473596, "grad_norm": 8.486944198608398, "learning_rate": 2.3730468750000002e-05, "loss": 0.22184371948242188, "step": 406 }, { "epoch": 0.055023236163920575, "grad_norm": 6.0893425941467285, "learning_rate": 2.37890625e-05, "loss": 0.1980915069580078, "step": 407 }, { "epoch": 0.05515842839036755, "grad_norm": 5.347884654998779, "learning_rate": 2.384765625e-05, "loss": 0.2153921127319336, "step": 408 }, { "epoch": 0.05529362061681453, "grad_norm": 5.466994285583496, "learning_rate": 2.3906250000000002e-05, "loss": 0.2512245178222656, "step": 409 }, { "epoch": 0.05542881284326151, "grad_norm": 8.381427764892578, "learning_rate": 2.396484375e-05, "loss": 0.22220420837402344, "step": 410 }, { "epoch": 0.05556400506970849, "grad_norm": 6.532052516937256, "learning_rate": 2.40234375e-05, "loss": 0.20798015594482422, "step": 411 }, { "epoch": 0.055699197296155474, "grad_norm": 2.884687900543213, "learning_rate": 2.408203125e-05, "loss": 0.22382354736328125, "step": 412 }, { "epoch": 0.05583438952260245, "grad_norm": 6.863306045532227, "learning_rate": 2.4140625e-05, "loss": 0.22544097900390625, "step": 413 }, { "epoch": 0.05596958174904943, "grad_norm": 6.584400653839111, "learning_rate": 2.419921875e-05, "loss": 0.29451560974121094, "step": 414 }, { "epoch": 0.05610477397549641, "grad_norm": 1.5605359077453613, "learning_rate": 2.42578125e-05, "loss": 0.14832115173339844, "step": 415 }, { "epoch": 0.05623996620194339, "grad_norm": 7.915679454803467, "learning_rate": 2.4316406250000002e-05, "loss": 0.25832462310791016, "step": 416 }, { "epoch": 0.056375158428390366, "grad_norm": 1.8255497217178345, "learning_rate": 2.4375e-05, "loss": 0.22922325134277344, "step": 417 }, { "epoch": 0.056510350654837345, "grad_norm": 2.959744930267334, "learning_rate": 2.443359375e-05, "loss": 0.17743492126464844, "step": 418 }, { "epoch": 0.05664554288128432, "grad_norm": 2.6996357440948486, "learning_rate": 2.44921875e-05, "loss": 0.2768716812133789, "step": 419 }, { "epoch": 0.05678073510773131, "grad_norm": 2.3034348487854004, "learning_rate": 2.455078125e-05, "loss": 0.1283588409423828, "step": 420 }, { "epoch": 0.05691592733417829, "grad_norm": 2.5214056968688965, "learning_rate": 2.4609375e-05, "loss": 0.2310009002685547, "step": 421 }, { "epoch": 0.057051119560625266, "grad_norm": 2.879680871963501, "learning_rate": 2.466796875e-05, "loss": 0.23724746704101562, "step": 422 }, { "epoch": 0.057186311787072244, "grad_norm": 10.767793655395508, "learning_rate": 2.4726562500000002e-05, "loss": 0.325225830078125, "step": 423 }, { "epoch": 0.05732150401351922, "grad_norm": 3.9540767669677734, "learning_rate": 2.478515625e-05, "loss": 0.20673561096191406, "step": 424 }, { "epoch": 0.0574566962399662, "grad_norm": 1.7493932247161865, "learning_rate": 2.484375e-05, "loss": 0.19439697265625, "step": 425 }, { "epoch": 0.05759188846641318, "grad_norm": 5.450231075286865, "learning_rate": 2.4902343750000002e-05, "loss": 0.22993087768554688, "step": 426 }, { "epoch": 0.05772708069286016, "grad_norm": 8.564682960510254, "learning_rate": 2.49609375e-05, "loss": 0.2718772888183594, "step": 427 }, { "epoch": 0.05786227291930714, "grad_norm": 6.0642476081848145, "learning_rate": 2.501953125e-05, "loss": 0.28029823303222656, "step": 428 }, { "epoch": 0.05799746514575412, "grad_norm": 3.1526358127593994, "learning_rate": 2.5078125e-05, "loss": 0.18692588806152344, "step": 429 }, { "epoch": 0.0581326573722011, "grad_norm": 4.7930498123168945, "learning_rate": 2.513671875e-05, "loss": 0.20549392700195312, "step": 430 }, { "epoch": 0.05826784959864808, "grad_norm": 4.353790283203125, "learning_rate": 2.51953125e-05, "loss": 0.22398090362548828, "step": 431 }, { "epoch": 0.05840304182509506, "grad_norm": 4.567189693450928, "learning_rate": 2.525390625e-05, "loss": 0.23551559448242188, "step": 432 }, { "epoch": 0.058538234051542036, "grad_norm": 3.749096393585205, "learning_rate": 2.5312500000000002e-05, "loss": 0.24753570556640625, "step": 433 }, { "epoch": 0.058673426277989014, "grad_norm": 6.223649501800537, "learning_rate": 2.537109375e-05, "loss": 0.295318603515625, "step": 434 }, { "epoch": 0.05880861850443599, "grad_norm": 4.8487548828125, "learning_rate": 2.54296875e-05, "loss": 0.23954010009765625, "step": 435 }, { "epoch": 0.05894381073088297, "grad_norm": 3.14316987991333, "learning_rate": 2.548828125e-05, "loss": 0.2597484588623047, "step": 436 }, { "epoch": 0.05907900295732996, "grad_norm": 3.5172786712646484, "learning_rate": 2.5546875e-05, "loss": 0.22348642349243164, "step": 437 }, { "epoch": 0.059214195183776935, "grad_norm": 2.4640042781829834, "learning_rate": 2.560546875e-05, "loss": 0.29814910888671875, "step": 438 }, { "epoch": 0.059349387410223914, "grad_norm": 2.347519636154175, "learning_rate": 2.56640625e-05, "loss": 0.25396728515625, "step": 439 }, { "epoch": 0.05948457963667089, "grad_norm": 5.281825065612793, "learning_rate": 2.5722656250000002e-05, "loss": 0.23248767852783203, "step": 440 }, { "epoch": 0.05961977186311787, "grad_norm": 1.6211901903152466, "learning_rate": 2.578125e-05, "loss": 0.1940174102783203, "step": 441 }, { "epoch": 0.05975496408956485, "grad_norm": 4.001313209533691, "learning_rate": 2.583984375e-05, "loss": 0.23023605346679688, "step": 442 }, { "epoch": 0.05989015631601183, "grad_norm": 6.017233848571777, "learning_rate": 2.5898437500000002e-05, "loss": 0.2937355041503906, "step": 443 }, { "epoch": 0.060025348542458806, "grad_norm": 2.354102611541748, "learning_rate": 2.595703125e-05, "loss": 0.22694778442382812, "step": 444 }, { "epoch": 0.060160540768905785, "grad_norm": 2.4847521781921387, "learning_rate": 2.6015625e-05, "loss": 0.21416473388671875, "step": 445 }, { "epoch": 0.06029573299535277, "grad_norm": 1.6427913904190063, "learning_rate": 2.607421875e-05, "loss": 0.19562911987304688, "step": 446 }, { "epoch": 0.06043092522179975, "grad_norm": 3.264057159423828, "learning_rate": 2.61328125e-05, "loss": 0.29549407958984375, "step": 447 }, { "epoch": 0.06056611744824673, "grad_norm": 3.7565858364105225, "learning_rate": 2.619140625e-05, "loss": 0.2630767822265625, "step": 448 }, { "epoch": 0.060701309674693706, "grad_norm": 4.027137279510498, "learning_rate": 2.625e-05, "loss": 0.27564239501953125, "step": 449 }, { "epoch": 0.060836501901140684, "grad_norm": 6.034366607666016, "learning_rate": 2.6308593750000002e-05, "loss": 0.22731399536132812, "step": 450 }, { "epoch": 0.06097169412758766, "grad_norm": 4.657773494720459, "learning_rate": 2.63671875e-05, "loss": 0.18934249877929688, "step": 451 }, { "epoch": 0.06110688635403464, "grad_norm": 1.4614509344100952, "learning_rate": 2.642578125e-05, "loss": 0.1571674346923828, "step": 452 }, { "epoch": 0.06124207858048162, "grad_norm": 3.297563314437866, "learning_rate": 2.6484375000000002e-05, "loss": 0.22957229614257812, "step": 453 }, { "epoch": 0.061377270806928605, "grad_norm": 4.779603004455566, "learning_rate": 2.654296875e-05, "loss": 0.273284912109375, "step": 454 }, { "epoch": 0.06151246303337558, "grad_norm": 5.337319374084473, "learning_rate": 2.66015625e-05, "loss": 0.2867279052734375, "step": 455 }, { "epoch": 0.06164765525982256, "grad_norm": 2.4499621391296387, "learning_rate": 2.666015625e-05, "loss": 0.2900524139404297, "step": 456 }, { "epoch": 0.06178284748626954, "grad_norm": 11.705016136169434, "learning_rate": 2.6718750000000002e-05, "loss": 0.2447681427001953, "step": 457 }, { "epoch": 0.06191803971271652, "grad_norm": 9.218425750732422, "learning_rate": 2.677734375e-05, "loss": 0.2472400665283203, "step": 458 }, { "epoch": 0.0620532319391635, "grad_norm": 7.032962799072266, "learning_rate": 2.68359375e-05, "loss": 0.1722564697265625, "step": 459 }, { "epoch": 0.062188424165610476, "grad_norm": 2.389957904815674, "learning_rate": 2.6894531250000002e-05, "loss": 0.2060413360595703, "step": 460 }, { "epoch": 0.062323616392057454, "grad_norm": 3.1125590801239014, "learning_rate": 2.6953125e-05, "loss": 0.2782325744628906, "step": 461 }, { "epoch": 0.06245880861850443, "grad_norm": 6.986378192901611, "learning_rate": 2.701171875e-05, "loss": 0.2196359634399414, "step": 462 }, { "epoch": 0.06259400084495141, "grad_norm": 3.1970584392547607, "learning_rate": 2.70703125e-05, "loss": 0.24990081787109375, "step": 463 }, { "epoch": 0.06272919307139839, "grad_norm": 2.5662636756896973, "learning_rate": 2.712890625e-05, "loss": 0.20447540283203125, "step": 464 }, { "epoch": 0.06286438529784537, "grad_norm": 3.5405521392822266, "learning_rate": 2.71875e-05, "loss": 0.29277610778808594, "step": 465 }, { "epoch": 0.06299957752429235, "grad_norm": 1.644301414489746, "learning_rate": 2.724609375e-05, "loss": 0.2523994445800781, "step": 466 }, { "epoch": 0.06313476975073934, "grad_norm": 3.12206768989563, "learning_rate": 2.7304687500000002e-05, "loss": 0.2957744598388672, "step": 467 }, { "epoch": 0.06326996197718632, "grad_norm": 1.8226984739303589, "learning_rate": 2.736328125e-05, "loss": 0.22678375244140625, "step": 468 }, { "epoch": 0.0634051542036333, "grad_norm": 3.7132630348205566, "learning_rate": 2.7421875e-05, "loss": 0.2456817626953125, "step": 469 }, { "epoch": 0.06354034643008027, "grad_norm": 3.246887683868408, "learning_rate": 2.7480468750000002e-05, "loss": 0.19822216033935547, "step": 470 }, { "epoch": 0.06367553865652725, "grad_norm": 2.4337685108184814, "learning_rate": 2.75390625e-05, "loss": 0.25124549865722656, "step": 471 }, { "epoch": 0.06381073088297423, "grad_norm": 1.425062894821167, "learning_rate": 2.759765625e-05, "loss": 0.15297698974609375, "step": 472 }, { "epoch": 0.06394592310942121, "grad_norm": 1.73653244972229, "learning_rate": 2.765625e-05, "loss": 0.2443256378173828, "step": 473 }, { "epoch": 0.06408111533586819, "grad_norm": 6.485707759857178, "learning_rate": 2.7714843750000002e-05, "loss": 0.24271011352539062, "step": 474 }, { "epoch": 0.06421630756231517, "grad_norm": 4.944649696350098, "learning_rate": 2.77734375e-05, "loss": 0.20535194873809814, "step": 475 }, { "epoch": 0.06435149978876215, "grad_norm": 1.7760387659072876, "learning_rate": 2.783203125e-05, "loss": 0.18415069580078125, "step": 476 }, { "epoch": 0.06448669201520912, "grad_norm": 2.805852174758911, "learning_rate": 2.7890625000000002e-05, "loss": 0.2592926025390625, "step": 477 }, { "epoch": 0.0646218842416561, "grad_norm": 7.339973449707031, "learning_rate": 2.794921875e-05, "loss": 0.303680419921875, "step": 478 }, { "epoch": 0.06475707646810308, "grad_norm": 3.530911684036255, "learning_rate": 2.80078125e-05, "loss": 0.1755390167236328, "step": 479 }, { "epoch": 0.06489226869455006, "grad_norm": 2.5858101844787598, "learning_rate": 2.806640625e-05, "loss": 0.2688274383544922, "step": 480 }, { "epoch": 0.06502746092099704, "grad_norm": 4.0986175537109375, "learning_rate": 2.8125e-05, "loss": 0.1589217185974121, "step": 481 }, { "epoch": 0.06516265314744402, "grad_norm": 3.4985320568084717, "learning_rate": 2.818359375e-05, "loss": 0.269622802734375, "step": 482 }, { "epoch": 0.065297845373891, "grad_norm": 2.5402328968048096, "learning_rate": 2.82421875e-05, "loss": 0.22033214569091797, "step": 483 }, { "epoch": 0.06543303760033799, "grad_norm": 2.327655076980591, "learning_rate": 2.8300781250000002e-05, "loss": 0.2842884063720703, "step": 484 }, { "epoch": 0.06556822982678497, "grad_norm": 6.257692813873291, "learning_rate": 2.8359375e-05, "loss": 0.2590751647949219, "step": 485 }, { "epoch": 0.06570342205323194, "grad_norm": 1.8788670301437378, "learning_rate": 2.841796875e-05, "loss": 0.2032928466796875, "step": 486 }, { "epoch": 0.06583861427967892, "grad_norm": 2.2117254734039307, "learning_rate": 2.8476562500000002e-05, "loss": 0.28125762939453125, "step": 487 }, { "epoch": 0.0659738065061259, "grad_norm": 3.816068410873413, "learning_rate": 2.853515625e-05, "loss": 0.2964038848876953, "step": 488 }, { "epoch": 0.06610899873257288, "grad_norm": 4.036928653717041, "learning_rate": 2.859375e-05, "loss": 0.20896530151367188, "step": 489 }, { "epoch": 0.06624419095901986, "grad_norm": 1.7956700325012207, "learning_rate": 2.865234375e-05, "loss": 0.23501205444335938, "step": 490 }, { "epoch": 0.06637938318546684, "grad_norm": 3.9310989379882812, "learning_rate": 2.87109375e-05, "loss": 0.29599761962890625, "step": 491 }, { "epoch": 0.06651457541191381, "grad_norm": 2.839219808578491, "learning_rate": 2.876953125e-05, "loss": 0.26711463928222656, "step": 492 }, { "epoch": 0.0666497676383608, "grad_norm": 3.587273597717285, "learning_rate": 2.8828125e-05, "loss": 0.2360992431640625, "step": 493 }, { "epoch": 0.06678495986480777, "grad_norm": 5.025626182556152, "learning_rate": 2.8886718750000002e-05, "loss": 0.2226734161376953, "step": 494 }, { "epoch": 0.06692015209125475, "grad_norm": 2.0641028881073, "learning_rate": 2.89453125e-05, "loss": 0.2490673065185547, "step": 495 }, { "epoch": 0.06705534431770173, "grad_norm": 3.0104970932006836, "learning_rate": 2.900390625e-05, "loss": 0.2042388916015625, "step": 496 }, { "epoch": 0.06719053654414871, "grad_norm": 2.581287384033203, "learning_rate": 2.90625e-05, "loss": 0.2679271697998047, "step": 497 }, { "epoch": 0.06732572877059569, "grad_norm": 1.3771032094955444, "learning_rate": 2.912109375e-05, "loss": 0.18079185485839844, "step": 498 }, { "epoch": 0.06746092099704266, "grad_norm": 1.7775890827178955, "learning_rate": 2.91796875e-05, "loss": 0.22919654846191406, "step": 499 }, { "epoch": 0.06759611322348964, "grad_norm": 6.642019271850586, "learning_rate": 2.923828125e-05, "loss": 0.22353172302246094, "step": 500 }, { "epoch": 0.06773130544993664, "grad_norm": 3.651278495788574, "learning_rate": 2.9296875000000002e-05, "loss": 0.1730327606201172, "step": 501 }, { "epoch": 0.06786649767638361, "grad_norm": 1.6108444929122925, "learning_rate": 2.935546875e-05, "loss": 0.19461822509765625, "step": 502 }, { "epoch": 0.06800168990283059, "grad_norm": 3.256890296936035, "learning_rate": 2.94140625e-05, "loss": 0.18629169464111328, "step": 503 }, { "epoch": 0.06813688212927757, "grad_norm": 2.7766642570495605, "learning_rate": 2.9472656250000002e-05, "loss": 0.20125579833984375, "step": 504 }, { "epoch": 0.06827207435572455, "grad_norm": 5.880879878997803, "learning_rate": 2.953125e-05, "loss": 0.2665996551513672, "step": 505 }, { "epoch": 0.06840726658217153, "grad_norm": 7.447145938873291, "learning_rate": 2.958984375e-05, "loss": 0.24422645568847656, "step": 506 }, { "epoch": 0.0685424588086185, "grad_norm": 4.4957685470581055, "learning_rate": 2.96484375e-05, "loss": 0.1899728775024414, "step": 507 }, { "epoch": 0.06867765103506548, "grad_norm": 2.0955448150634766, "learning_rate": 2.970703125e-05, "loss": 0.190673828125, "step": 508 }, { "epoch": 0.06881284326151246, "grad_norm": 2.9720211029052734, "learning_rate": 2.9765625e-05, "loss": 0.19868850708007812, "step": 509 }, { "epoch": 0.06894803548795944, "grad_norm": 1.5614224672317505, "learning_rate": 2.982421875e-05, "loss": 0.19950485229492188, "step": 510 }, { "epoch": 0.06908322771440642, "grad_norm": 2.4292871952056885, "learning_rate": 2.9882812500000002e-05, "loss": 0.18830108642578125, "step": 511 }, { "epoch": 0.0692184199408534, "grad_norm": 2.116597890853882, "learning_rate": 2.994140625e-05, "loss": 0.23431777954101562, "step": 512 }, { "epoch": 0.06935361216730038, "grad_norm": 2.2066287994384766, "learning_rate": 3e-05, "loss": 0.2046375274658203, "step": 513 }, { "epoch": 0.06948880439374736, "grad_norm": 1.9042032957077026, "learning_rate": 2.9999998438460004e-05, "loss": 0.2035846710205078, "step": 514 }, { "epoch": 0.06962399662019433, "grad_norm": 2.0176074504852295, "learning_rate": 2.9999993753840344e-05, "loss": 0.17517471313476562, "step": 515 }, { "epoch": 0.06975918884664131, "grad_norm": 1.8081135749816895, "learning_rate": 2.9999985946141995e-05, "loss": 0.22444915771484375, "step": 516 }, { "epoch": 0.06989438107308829, "grad_norm": 1.5158302783966064, "learning_rate": 2.9999975015366586e-05, "loss": 0.20190048217773438, "step": 517 }, { "epoch": 0.07002957329953528, "grad_norm": 5.406824111938477, "learning_rate": 2.9999960961516384e-05, "loss": 0.2318258285522461, "step": 518 }, { "epoch": 0.07016476552598226, "grad_norm": 5.942788600921631, "learning_rate": 2.9999943784594325e-05, "loss": 0.21835899353027344, "step": 519 }, { "epoch": 0.07029995775242924, "grad_norm": 5.351837158203125, "learning_rate": 2.9999923484603975e-05, "loss": 0.2577095031738281, "step": 520 }, { "epoch": 0.07043514997887622, "grad_norm": 1.3302587270736694, "learning_rate": 2.999990006154957e-05, "loss": 0.2042369842529297, "step": 521 }, { "epoch": 0.0705703422053232, "grad_norm": 2.843609571456909, "learning_rate": 2.9999873515435977e-05, "loss": 0.22577476501464844, "step": 522 }, { "epoch": 0.07070553443177018, "grad_norm": 3.0564115047454834, "learning_rate": 2.9999843846268735e-05, "loss": 0.28678131103515625, "step": 523 }, { "epoch": 0.07084072665821715, "grad_norm": 3.8802387714385986, "learning_rate": 2.9999811054054018e-05, "loss": 0.27037811279296875, "step": 524 }, { "epoch": 0.07097591888466413, "grad_norm": 6.748092174530029, "learning_rate": 2.9999775138798646e-05, "loss": 0.2612476348876953, "step": 525 }, { "epoch": 0.07111111111111111, "grad_norm": 1.852267861366272, "learning_rate": 2.99997361005101e-05, "loss": 0.22038650512695312, "step": 526 }, { "epoch": 0.07124630333755809, "grad_norm": 2.9750607013702393, "learning_rate": 2.9999693939196513e-05, "loss": 0.22796630859375, "step": 527 }, { "epoch": 0.07138149556400507, "grad_norm": 3.8122777938842773, "learning_rate": 2.999964865486666e-05, "loss": 0.17757225036621094, "step": 528 }, { "epoch": 0.07151668779045205, "grad_norm": 4.453284740447998, "learning_rate": 2.999960024752997e-05, "loss": 0.31827449798583984, "step": 529 }, { "epoch": 0.07165188001689902, "grad_norm": 3.6600072383880615, "learning_rate": 2.9999548717196516e-05, "loss": 0.32689666748046875, "step": 530 }, { "epoch": 0.071787072243346, "grad_norm": 2.352653980255127, "learning_rate": 2.999949406387703e-05, "loss": 0.18181228637695312, "step": 531 }, { "epoch": 0.07192226446979298, "grad_norm": 2.4650542736053467, "learning_rate": 2.9999436287582903e-05, "loss": 0.2533912658691406, "step": 532 }, { "epoch": 0.07205745669623996, "grad_norm": 5.945155620574951, "learning_rate": 2.9999375388326145e-05, "loss": 0.27979278564453125, "step": 533 }, { "epoch": 0.07219264892268694, "grad_norm": 1.88994562625885, "learning_rate": 2.9999311366119447e-05, "loss": 0.18726348876953125, "step": 534 }, { "epoch": 0.07232784114913393, "grad_norm": 2.98053240776062, "learning_rate": 2.9999244220976137e-05, "loss": 0.302459716796875, "step": 535 }, { "epoch": 0.07246303337558091, "grad_norm": 3.906646490097046, "learning_rate": 2.9999173952910197e-05, "loss": 0.27861785888671875, "step": 536 }, { "epoch": 0.07259822560202789, "grad_norm": 4.483268737792969, "learning_rate": 2.9999100561936252e-05, "loss": 0.23635482788085938, "step": 537 }, { "epoch": 0.07273341782847487, "grad_norm": 3.2484240531921387, "learning_rate": 2.9999024048069585e-05, "loss": 0.16626930236816406, "step": 538 }, { "epoch": 0.07286861005492185, "grad_norm": 2.304311990737915, "learning_rate": 2.9998944411326127e-05, "loss": 0.21567249298095703, "step": 539 }, { "epoch": 0.07300380228136882, "grad_norm": 5.122791767120361, "learning_rate": 2.999886165172246e-05, "loss": 0.25857067108154297, "step": 540 }, { "epoch": 0.0731389945078158, "grad_norm": 1.861470103263855, "learning_rate": 2.9998775769275814e-05, "loss": 0.30873870849609375, "step": 541 }, { "epoch": 0.07327418673426278, "grad_norm": 1.849015474319458, "learning_rate": 2.9998686764004067e-05, "loss": 0.19608020782470703, "step": 542 }, { "epoch": 0.07340937896070976, "grad_norm": 2.7299294471740723, "learning_rate": 2.9998594635925755e-05, "loss": 0.19956016540527344, "step": 543 }, { "epoch": 0.07354457118715674, "grad_norm": 1.5337392091751099, "learning_rate": 2.999849938506005e-05, "loss": 0.22431564331054688, "step": 544 }, { "epoch": 0.07367976341360372, "grad_norm": 2.3284547328948975, "learning_rate": 2.99984010114268e-05, "loss": 0.23108291625976562, "step": 545 }, { "epoch": 0.0738149556400507, "grad_norm": 3.4310529232025146, "learning_rate": 2.9998299515046475e-05, "loss": 0.1887073516845703, "step": 546 }, { "epoch": 0.07395014786649767, "grad_norm": 2.7551848888397217, "learning_rate": 2.9998194895940213e-05, "loss": 0.26019287109375, "step": 547 }, { "epoch": 0.07408534009294465, "grad_norm": 1.8915430307388306, "learning_rate": 2.9998087154129792e-05, "loss": 0.19549083709716797, "step": 548 }, { "epoch": 0.07422053231939163, "grad_norm": 4.413297176361084, "learning_rate": 2.9997976289637645e-05, "loss": 0.23038673400878906, "step": 549 }, { "epoch": 0.07435572454583861, "grad_norm": 3.587127447128296, "learning_rate": 2.9997862302486855e-05, "loss": 0.1690073013305664, "step": 550 }, { "epoch": 0.07449091677228559, "grad_norm": 1.7189197540283203, "learning_rate": 2.9997745192701153e-05, "loss": 0.17391681671142578, "step": 551 }, { "epoch": 0.07462610899873258, "grad_norm": 2.2448935508728027, "learning_rate": 2.9997624960304926e-05, "loss": 0.18960976600646973, "step": 552 }, { "epoch": 0.07476130122517956, "grad_norm": 2.7006289958953857, "learning_rate": 2.9997501605323214e-05, "loss": 0.13849639892578125, "step": 553 }, { "epoch": 0.07489649345162654, "grad_norm": 3.4401018619537354, "learning_rate": 2.999737512778168e-05, "loss": 0.22487258911132812, "step": 554 }, { "epoch": 0.07503168567807351, "grad_norm": 2.79577898979187, "learning_rate": 2.9997245527706674e-05, "loss": 0.1993846893310547, "step": 555 }, { "epoch": 0.0751668779045205, "grad_norm": 1.3946646451950073, "learning_rate": 2.999711280512517e-05, "loss": 0.15007591247558594, "step": 556 }, { "epoch": 0.07530207013096747, "grad_norm": 10.264063835144043, "learning_rate": 2.9996976960064807e-05, "loss": 0.2519207000732422, "step": 557 }, { "epoch": 0.07543726235741445, "grad_norm": 10.854535102844238, "learning_rate": 2.999683799255387e-05, "loss": 0.2876319885253906, "step": 558 }, { "epoch": 0.07557245458386143, "grad_norm": 4.031068801879883, "learning_rate": 2.999669590262129e-05, "loss": 0.22539710998535156, "step": 559 }, { "epoch": 0.07570764681030841, "grad_norm": 2.927306890487671, "learning_rate": 2.999655069029665e-05, "loss": 0.21682333946228027, "step": 560 }, { "epoch": 0.07584283903675539, "grad_norm": 2.416666269302368, "learning_rate": 2.9996402355610183e-05, "loss": 0.2017526626586914, "step": 561 }, { "epoch": 0.07597803126320236, "grad_norm": 3.3551390171051025, "learning_rate": 2.9996250898592777e-05, "loss": 0.325958251953125, "step": 562 }, { "epoch": 0.07611322348964934, "grad_norm": 1.9157609939575195, "learning_rate": 2.9996096319275962e-05, "loss": 0.24203109741210938, "step": 563 }, { "epoch": 0.07624841571609632, "grad_norm": 4.674991607666016, "learning_rate": 2.9995938617691925e-05, "loss": 0.2591667175292969, "step": 564 }, { "epoch": 0.0763836079425433, "grad_norm": 2.6372244358062744, "learning_rate": 2.9995777793873504e-05, "loss": 0.20122432708740234, "step": 565 }, { "epoch": 0.07651880016899028, "grad_norm": 2.5040884017944336, "learning_rate": 2.9995613847854176e-05, "loss": 0.2384195327758789, "step": 566 }, { "epoch": 0.07665399239543726, "grad_norm": 4.281469821929932, "learning_rate": 2.9995446779668078e-05, "loss": 0.19385933876037598, "step": 567 }, { "epoch": 0.07678918462188423, "grad_norm": 5.763937473297119, "learning_rate": 2.9995276589349992e-05, "loss": 0.24071502685546875, "step": 568 }, { "epoch": 0.07692437684833123, "grad_norm": 4.173258304595947, "learning_rate": 2.9995103276935357e-05, "loss": 0.23386859893798828, "step": 569 }, { "epoch": 0.0770595690747782, "grad_norm": 7.034768104553223, "learning_rate": 2.9994926842460258e-05, "loss": 0.2025146484375, "step": 570 }, { "epoch": 0.07719476130122518, "grad_norm": 7.723634243011475, "learning_rate": 2.9994747285961428e-05, "loss": 0.3099212646484375, "step": 571 }, { "epoch": 0.07732995352767216, "grad_norm": 4.624016761779785, "learning_rate": 2.9994564607476255e-05, "loss": 0.18726730346679688, "step": 572 }, { "epoch": 0.07746514575411914, "grad_norm": 2.3658721446990967, "learning_rate": 2.9994378807042762e-05, "loss": 0.20200729370117188, "step": 573 }, { "epoch": 0.07760033798056612, "grad_norm": 3.864985942840576, "learning_rate": 2.9994189884699647e-05, "loss": 0.2131948471069336, "step": 574 }, { "epoch": 0.0777355302070131, "grad_norm": 3.0287411212921143, "learning_rate": 2.9993997840486233e-05, "loss": 0.226959228515625, "step": 575 }, { "epoch": 0.07787072243346008, "grad_norm": 4.788344383239746, "learning_rate": 2.9993802674442516e-05, "loss": 0.2349262237548828, "step": 576 }, { "epoch": 0.07800591465990706, "grad_norm": 2.301671266555786, "learning_rate": 2.999360438660913e-05, "loss": 0.2593231201171875, "step": 577 }, { "epoch": 0.07814110688635403, "grad_norm": 3.565685272216797, "learning_rate": 2.9993402977027346e-05, "loss": 0.2610816955566406, "step": 578 }, { "epoch": 0.07827629911280101, "grad_norm": 3.00917649269104, "learning_rate": 2.999319844573911e-05, "loss": 0.2576713562011719, "step": 579 }, { "epoch": 0.07841149133924799, "grad_norm": 4.8254265785217285, "learning_rate": 2.9992990792787007e-05, "loss": 0.22041988372802734, "step": 580 }, { "epoch": 0.07854668356569497, "grad_norm": 4.8675923347473145, "learning_rate": 2.999278001821427e-05, "loss": 0.24315643310546875, "step": 581 }, { "epoch": 0.07868187579214195, "grad_norm": 2.737541675567627, "learning_rate": 2.9992566122064775e-05, "loss": 0.2404327392578125, "step": 582 }, { "epoch": 0.07881706801858893, "grad_norm": 1.5600872039794922, "learning_rate": 2.999234910438307e-05, "loss": 0.17913246154785156, "step": 583 }, { "epoch": 0.0789522602450359, "grad_norm": 5.774623870849609, "learning_rate": 2.999212896521433e-05, "loss": 0.24062418937683105, "step": 584 }, { "epoch": 0.07908745247148288, "grad_norm": 9.099496841430664, "learning_rate": 2.999190570460439e-05, "loss": 0.25443267822265625, "step": 585 }, { "epoch": 0.07922264469792988, "grad_norm": 4.413066387176514, "learning_rate": 2.9991679322599734e-05, "loss": 0.15851974487304688, "step": 586 }, { "epoch": 0.07935783692437685, "grad_norm": 2.0952835083007812, "learning_rate": 2.9991449819247505e-05, "loss": 0.2638568878173828, "step": 587 }, { "epoch": 0.07949302915082383, "grad_norm": 1.587292194366455, "learning_rate": 2.9991217194595474e-05, "loss": 0.1701498031616211, "step": 588 }, { "epoch": 0.07962822137727081, "grad_norm": 11.579439163208008, "learning_rate": 2.9990981448692078e-05, "loss": 0.363983154296875, "step": 589 }, { "epoch": 0.07976341360371779, "grad_norm": 4.696298599243164, "learning_rate": 2.999074258158641e-05, "loss": 0.20482683181762695, "step": 590 }, { "epoch": 0.07989860583016477, "grad_norm": 1.8145850896835327, "learning_rate": 2.9990500593328192e-05, "loss": 0.19809770584106445, "step": 591 }, { "epoch": 0.08003379805661175, "grad_norm": 3.4981093406677246, "learning_rate": 2.999025548396781e-05, "loss": 0.18554019927978516, "step": 592 }, { "epoch": 0.08016899028305872, "grad_norm": 1.5671679973602295, "learning_rate": 2.9990007253556302e-05, "loss": 0.2060699462890625, "step": 593 }, { "epoch": 0.0803041825095057, "grad_norm": 1.588470220565796, "learning_rate": 2.9989755902145345e-05, "loss": 0.22665834426879883, "step": 594 }, { "epoch": 0.08043937473595268, "grad_norm": 2.0635406970977783, "learning_rate": 2.9989501429787273e-05, "loss": 0.20351028442382812, "step": 595 }, { "epoch": 0.08057456696239966, "grad_norm": 3.4679715633392334, "learning_rate": 2.9989243836535073e-05, "loss": 0.22932195663452148, "step": 596 }, { "epoch": 0.08070975918884664, "grad_norm": 2.1162478923797607, "learning_rate": 2.998898312244237e-05, "loss": 0.22040939331054688, "step": 597 }, { "epoch": 0.08084495141529362, "grad_norm": 1.7832661867141724, "learning_rate": 2.9988719287563452e-05, "loss": 0.184783935546875, "step": 598 }, { "epoch": 0.0809801436417406, "grad_norm": 1.2377979755401611, "learning_rate": 2.998845233195325e-05, "loss": 0.2354755401611328, "step": 599 }, { "epoch": 0.08111533586818757, "grad_norm": 4.187588691711426, "learning_rate": 2.998818225566734e-05, "loss": 0.2707843780517578, "step": 600 }, { "epoch": 0.08125052809463455, "grad_norm": 5.3939385414123535, "learning_rate": 2.998790905876196e-05, "loss": 0.29697132110595703, "step": 601 }, { "epoch": 0.08138572032108153, "grad_norm": 3.264021873474121, "learning_rate": 2.9987632741293987e-05, "loss": 0.2369537353515625, "step": 602 }, { "epoch": 0.08152091254752852, "grad_norm": 1.913804531097412, "learning_rate": 2.998735330332096e-05, "loss": 0.2901735305786133, "step": 603 }, { "epoch": 0.0816561047739755, "grad_norm": 6.600580215454102, "learning_rate": 2.9987070744901046e-05, "loss": 0.28864288330078125, "step": 604 }, { "epoch": 0.08179129700042248, "grad_norm": 8.056124687194824, "learning_rate": 2.9986785066093084e-05, "loss": 0.2801971435546875, "step": 605 }, { "epoch": 0.08192648922686946, "grad_norm": 2.676353693008423, "learning_rate": 2.9986496266956556e-05, "loss": 0.17203235626220703, "step": 606 }, { "epoch": 0.08206168145331644, "grad_norm": 1.5418872833251953, "learning_rate": 2.9986204347551583e-05, "loss": 0.2698516845703125, "step": 607 }, { "epoch": 0.08219687367976342, "grad_norm": 5.122815132141113, "learning_rate": 2.9985909307938948e-05, "loss": 0.19475841522216797, "step": 608 }, { "epoch": 0.0823320659062104, "grad_norm": 9.323260307312012, "learning_rate": 2.9985611148180082e-05, "loss": 0.27602052688598633, "step": 609 }, { "epoch": 0.08246725813265737, "grad_norm": 5.362401485443115, "learning_rate": 2.9985309868337063e-05, "loss": 0.20574188232421875, "step": 610 }, { "epoch": 0.08260245035910435, "grad_norm": 3.271744966506958, "learning_rate": 2.9985005468472617e-05, "loss": 0.2656221389770508, "step": 611 }, { "epoch": 0.08273764258555133, "grad_norm": 3.040532112121582, "learning_rate": 2.9984697948650124e-05, "loss": 0.2406768798828125, "step": 612 }, { "epoch": 0.08287283481199831, "grad_norm": 5.9468607902526855, "learning_rate": 2.998438730893361e-05, "loss": 0.3377494812011719, "step": 613 }, { "epoch": 0.08300802703844529, "grad_norm": 7.20884895324707, "learning_rate": 2.9984073549387747e-05, "loss": 0.23948097229003906, "step": 614 }, { "epoch": 0.08314321926489227, "grad_norm": 6.783653736114502, "learning_rate": 2.998375667007787e-05, "loss": 0.28171539306640625, "step": 615 }, { "epoch": 0.08327841149133924, "grad_norm": 3.8011462688446045, "learning_rate": 2.998343667106995e-05, "loss": 0.22377777099609375, "step": 616 }, { "epoch": 0.08341360371778622, "grad_norm": 3.0447449684143066, "learning_rate": 2.9983113552430616e-05, "loss": 0.2240753173828125, "step": 617 }, { "epoch": 0.0835487959442332, "grad_norm": 3.1536710262298584, "learning_rate": 2.9982787314227134e-05, "loss": 0.2377605438232422, "step": 618 }, { "epoch": 0.08368398817068018, "grad_norm": 1.3408907651901245, "learning_rate": 2.998245795652744e-05, "loss": 0.1878509521484375, "step": 619 }, { "epoch": 0.08381918039712717, "grad_norm": 3.2851474285125732, "learning_rate": 2.9982125479400106e-05, "loss": 0.2526235580444336, "step": 620 }, { "epoch": 0.08395437262357415, "grad_norm": 2.2513623237609863, "learning_rate": 2.9981789882914352e-05, "loss": 0.1652812957763672, "step": 621 }, { "epoch": 0.08408956485002113, "grad_norm": 1.2861738204956055, "learning_rate": 2.9981451167140048e-05, "loss": 0.19992446899414062, "step": 622 }, { "epoch": 0.08422475707646811, "grad_norm": 1.907787561416626, "learning_rate": 2.9981109332147722e-05, "loss": 0.17819786071777344, "step": 623 }, { "epoch": 0.08435994930291509, "grad_norm": 2.4429855346679688, "learning_rate": 2.9980764378008545e-05, "loss": 0.19518470764160156, "step": 624 }, { "epoch": 0.08449514152936206, "grad_norm": 1.650206446647644, "learning_rate": 2.9980416304794332e-05, "loss": 0.212799072265625, "step": 625 }, { "epoch": 0.08463033375580904, "grad_norm": 2.758831262588501, "learning_rate": 2.9980065112577565e-05, "loss": 0.23705673217773438, "step": 626 }, { "epoch": 0.08476552598225602, "grad_norm": 2.2661426067352295, "learning_rate": 2.9979710801431357e-05, "loss": 0.25582122802734375, "step": 627 }, { "epoch": 0.084900718208703, "grad_norm": 3.0475738048553467, "learning_rate": 2.997935337142948e-05, "loss": 0.24195194244384766, "step": 628 }, { "epoch": 0.08503591043514998, "grad_norm": 3.1204683780670166, "learning_rate": 2.9978992822646347e-05, "loss": 0.23675918579101562, "step": 629 }, { "epoch": 0.08517110266159696, "grad_norm": 3.261786699295044, "learning_rate": 2.9978629155157036e-05, "loss": 0.19226837158203125, "step": 630 }, { "epoch": 0.08530629488804393, "grad_norm": 2.0601446628570557, "learning_rate": 2.9978262369037252e-05, "loss": 0.23221588134765625, "step": 631 }, { "epoch": 0.08544148711449091, "grad_norm": 2.870387554168701, "learning_rate": 2.9977892464363375e-05, "loss": 0.23838233947753906, "step": 632 }, { "epoch": 0.08557667934093789, "grad_norm": 2.2026076316833496, "learning_rate": 2.9977519441212412e-05, "loss": 0.2767181396484375, "step": 633 }, { "epoch": 0.08571187156738487, "grad_norm": 2.9643681049346924, "learning_rate": 2.9977143299662034e-05, "loss": 0.22817230224609375, "step": 634 }, { "epoch": 0.08584706379383185, "grad_norm": 3.964478015899658, "learning_rate": 2.997676403979055e-05, "loss": 0.3262901306152344, "step": 635 }, { "epoch": 0.08598225602027883, "grad_norm": 2.877920150756836, "learning_rate": 2.997638166167693e-05, "loss": 0.2660694122314453, "step": 636 }, { "epoch": 0.08611744824672582, "grad_norm": 1.5161670446395874, "learning_rate": 2.9975996165400786e-05, "loss": 0.1931772232055664, "step": 637 }, { "epoch": 0.0862526404731728, "grad_norm": 0.940235435962677, "learning_rate": 2.9975607551042373e-05, "loss": 0.18752288818359375, "step": 638 }, { "epoch": 0.08638783269961978, "grad_norm": 3.5364956855773926, "learning_rate": 2.9975215818682607e-05, "loss": 0.2422189712524414, "step": 639 }, { "epoch": 0.08652302492606675, "grad_norm": 3.902735471725464, "learning_rate": 2.9974820968403056e-05, "loss": 0.31037139892578125, "step": 640 }, { "epoch": 0.08665821715251373, "grad_norm": 3.035266637802124, "learning_rate": 2.9974423000285923e-05, "loss": 0.2532157897949219, "step": 641 }, { "epoch": 0.08679340937896071, "grad_norm": 2.4688339233398438, "learning_rate": 2.9974021914414068e-05, "loss": 0.19481420516967773, "step": 642 }, { "epoch": 0.08692860160540769, "grad_norm": 2.674933671951294, "learning_rate": 2.9973617710871e-05, "loss": 0.2714195251464844, "step": 643 }, { "epoch": 0.08706379383185467, "grad_norm": 2.497926712036133, "learning_rate": 2.997321038974087e-05, "loss": 0.21243762969970703, "step": 644 }, { "epoch": 0.08719898605830165, "grad_norm": 2.77506422996521, "learning_rate": 2.997279995110849e-05, "loss": 0.21555233001708984, "step": 645 }, { "epoch": 0.08733417828474863, "grad_norm": 5.690005779266357, "learning_rate": 2.997238639505932e-05, "loss": 0.2408885955810547, "step": 646 }, { "epoch": 0.0874693705111956, "grad_norm": 1.117785096168518, "learning_rate": 2.997196972167946e-05, "loss": 0.2142963409423828, "step": 647 }, { "epoch": 0.08760456273764258, "grad_norm": 5.270402908325195, "learning_rate": 2.9971549931055665e-05, "loss": 0.2450122833251953, "step": 648 }, { "epoch": 0.08773975496408956, "grad_norm": 1.8804930448532104, "learning_rate": 2.997112702327533e-05, "loss": 0.1765308380126953, "step": 649 }, { "epoch": 0.08787494719053654, "grad_norm": 1.0243396759033203, "learning_rate": 2.9970700998426518e-05, "loss": 0.1481466293334961, "step": 650 }, { "epoch": 0.08801013941698352, "grad_norm": 2.6848249435424805, "learning_rate": 2.9970271856597925e-05, "loss": 0.24842071533203125, "step": 651 }, { "epoch": 0.0881453316434305, "grad_norm": 4.168036937713623, "learning_rate": 2.9969839597878896e-05, "loss": 0.1752300262451172, "step": 652 }, { "epoch": 0.08828052386987748, "grad_norm": 1.4729108810424805, "learning_rate": 2.9969404222359436e-05, "loss": 0.1951122283935547, "step": 653 }, { "epoch": 0.08841571609632447, "grad_norm": 3.0390238761901855, "learning_rate": 2.9968965730130188e-05, "loss": 0.21633243560791016, "step": 654 }, { "epoch": 0.08855090832277145, "grad_norm": 2.2606661319732666, "learning_rate": 2.9968524121282455e-05, "loss": 0.23235511779785156, "step": 655 }, { "epoch": 0.08868610054921842, "grad_norm": 3.079070806503296, "learning_rate": 2.9968079395908178e-05, "loss": 0.16539764404296875, "step": 656 }, { "epoch": 0.0888212927756654, "grad_norm": 4.1072282791137695, "learning_rate": 2.9967631554099947e-05, "loss": 0.17200088500976562, "step": 657 }, { "epoch": 0.08895648500211238, "grad_norm": 1.945708155632019, "learning_rate": 2.996718059595101e-05, "loss": 0.2137300968170166, "step": 658 }, { "epoch": 0.08909167722855936, "grad_norm": 2.9191908836364746, "learning_rate": 2.9966726521555265e-05, "loss": 0.214569091796875, "step": 659 }, { "epoch": 0.08922686945500634, "grad_norm": 1.7798532247543335, "learning_rate": 2.996626933100724e-05, "loss": 0.24387741088867188, "step": 660 }, { "epoch": 0.08936206168145332, "grad_norm": 5.730529308319092, "learning_rate": 2.996580902440213e-05, "loss": 0.2046966552734375, "step": 661 }, { "epoch": 0.0894972539079003, "grad_norm": 4.43034553527832, "learning_rate": 2.9965345601835773e-05, "loss": 0.2306232452392578, "step": 662 }, { "epoch": 0.08963244613434727, "grad_norm": 4.346843719482422, "learning_rate": 2.996487906340466e-05, "loss": 0.21460723876953125, "step": 663 }, { "epoch": 0.08976763836079425, "grad_norm": 2.161485433578491, "learning_rate": 2.996440940920592e-05, "loss": 0.1844959259033203, "step": 664 }, { "epoch": 0.08990283058724123, "grad_norm": 2.3417811393737793, "learning_rate": 2.996393663933735e-05, "loss": 0.23455810546875, "step": 665 }, { "epoch": 0.09003802281368821, "grad_norm": 5.569293022155762, "learning_rate": 2.9963460753897364e-05, "loss": 0.3124427795410156, "step": 666 }, { "epoch": 0.09017321504013519, "grad_norm": 5.964304447174072, "learning_rate": 2.996298175298506e-05, "loss": 0.23615455627441406, "step": 667 }, { "epoch": 0.09030840726658217, "grad_norm": 3.099236488342285, "learning_rate": 2.996249963670016e-05, "loss": 0.2277660369873047, "step": 668 }, { "epoch": 0.09044359949302914, "grad_norm": 2.5078465938568115, "learning_rate": 2.9962014405143042e-05, "loss": 0.24589157104492188, "step": 669 }, { "epoch": 0.09057879171947612, "grad_norm": 2.3147389888763428, "learning_rate": 2.9961526058414745e-05, "loss": 0.22175121307373047, "step": 670 }, { "epoch": 0.09071398394592312, "grad_norm": 2.0527143478393555, "learning_rate": 2.9961034596616936e-05, "loss": 0.24064350128173828, "step": 671 }, { "epoch": 0.0908491761723701, "grad_norm": 0.8992170095443726, "learning_rate": 2.996054001985194e-05, "loss": 0.11851060390472412, "step": 672 }, { "epoch": 0.09098436839881707, "grad_norm": 2.15411376953125, "learning_rate": 2.9960042328222732e-05, "loss": 0.21610164642333984, "step": 673 }, { "epoch": 0.09111956062526405, "grad_norm": 3.516059160232544, "learning_rate": 2.995954152183294e-05, "loss": 0.1685779094696045, "step": 674 }, { "epoch": 0.09125475285171103, "grad_norm": 3.6010234355926514, "learning_rate": 2.9959037600786822e-05, "loss": 0.2592010498046875, "step": 675 }, { "epoch": 0.09138994507815801, "grad_norm": 1.9942626953125, "learning_rate": 2.9958530565189307e-05, "loss": 0.22705411911010742, "step": 676 }, { "epoch": 0.09152513730460499, "grad_norm": 3.967087745666504, "learning_rate": 2.995802041514596e-05, "loss": 0.22382259368896484, "step": 677 }, { "epoch": 0.09166032953105197, "grad_norm": 5.022583484649658, "learning_rate": 2.9957507150762996e-05, "loss": 0.21911907196044922, "step": 678 }, { "epoch": 0.09179552175749894, "grad_norm": 7.530004501342773, "learning_rate": 2.9956990772147283e-05, "loss": 0.23216629028320312, "step": 679 }, { "epoch": 0.09193071398394592, "grad_norm": 4.901467800140381, "learning_rate": 2.9956471279406324e-05, "loss": 0.24022865295410156, "step": 680 }, { "epoch": 0.0920659062103929, "grad_norm": 1.4078967571258545, "learning_rate": 2.9955948672648298e-05, "loss": 0.20527267456054688, "step": 681 }, { "epoch": 0.09220109843683988, "grad_norm": 2.4498071670532227, "learning_rate": 2.9955422951981994e-05, "loss": 0.20481491088867188, "step": 682 }, { "epoch": 0.09233629066328686, "grad_norm": 1.6855629682540894, "learning_rate": 2.995489411751688e-05, "loss": 0.22455883026123047, "step": 683 }, { "epoch": 0.09247148288973384, "grad_norm": 2.1756672859191895, "learning_rate": 2.9954362169363064e-05, "loss": 0.32009124755859375, "step": 684 }, { "epoch": 0.09260667511618081, "grad_norm": 3.7600691318511963, "learning_rate": 2.99538271076313e-05, "loss": 0.22552490234375, "step": 685 }, { "epoch": 0.09274186734262779, "grad_norm": 1.720667839050293, "learning_rate": 2.9953288932432985e-05, "loss": 0.17055988311767578, "step": 686 }, { "epoch": 0.09287705956907477, "grad_norm": 1.6106798648834229, "learning_rate": 2.995274764388018e-05, "loss": 0.20691156387329102, "step": 687 }, { "epoch": 0.09301225179552176, "grad_norm": 2.795907735824585, "learning_rate": 2.9952203242085566e-05, "loss": 0.20398902893066406, "step": 688 }, { "epoch": 0.09314744402196874, "grad_norm": 1.6927968263626099, "learning_rate": 2.995165572716251e-05, "loss": 0.207000732421875, "step": 689 }, { "epoch": 0.09328263624841572, "grad_norm": 1.1094919443130493, "learning_rate": 2.9951105099225003e-05, "loss": 0.1868276596069336, "step": 690 }, { "epoch": 0.0934178284748627, "grad_norm": 2.6226561069488525, "learning_rate": 2.995055135838768e-05, "loss": 0.20621681213378906, "step": 691 }, { "epoch": 0.09355302070130968, "grad_norm": 4.05411434173584, "learning_rate": 2.994999450476584e-05, "loss": 0.2272930145263672, "step": 692 }, { "epoch": 0.09368821292775666, "grad_norm": 1.8267302513122559, "learning_rate": 2.9949434538475414e-05, "loss": 0.231536865234375, "step": 693 }, { "epoch": 0.09382340515420363, "grad_norm": 2.947174310684204, "learning_rate": 2.9948871459633008e-05, "loss": 0.20539474487304688, "step": 694 }, { "epoch": 0.09395859738065061, "grad_norm": 2.799992799758911, "learning_rate": 2.994830526835584e-05, "loss": 0.24158477783203125, "step": 695 }, { "epoch": 0.09409378960709759, "grad_norm": 2.7172555923461914, "learning_rate": 2.9947735964761803e-05, "loss": 0.24485206604003906, "step": 696 }, { "epoch": 0.09422898183354457, "grad_norm": 1.82590913772583, "learning_rate": 2.9947163548969428e-05, "loss": 0.2465667724609375, "step": 697 }, { "epoch": 0.09436417405999155, "grad_norm": 1.8396632671356201, "learning_rate": 2.9946588021097893e-05, "loss": 0.15935707092285156, "step": 698 }, { "epoch": 0.09449936628643853, "grad_norm": 2.0061380863189697, "learning_rate": 2.9946009381267028e-05, "loss": 0.12570571899414062, "step": 699 }, { "epoch": 0.0946345585128855, "grad_norm": 1.2996466159820557, "learning_rate": 2.9945427629597306e-05, "loss": 0.1744217872619629, "step": 700 }, { "epoch": 0.09476975073933248, "grad_norm": 2.234083414077759, "learning_rate": 2.9944842766209853e-05, "loss": 0.20143508911132812, "step": 701 }, { "epoch": 0.09490494296577946, "grad_norm": 2.381791830062866, "learning_rate": 2.9944254791226444e-05, "loss": 0.24639129638671875, "step": 702 }, { "epoch": 0.09504013519222644, "grad_norm": 4.267126560211182, "learning_rate": 2.994366370476949e-05, "loss": 0.21299982070922852, "step": 703 }, { "epoch": 0.09517532741867342, "grad_norm": 3.35009765625, "learning_rate": 2.9943069506962067e-05, "loss": 0.24997329711914062, "step": 704 }, { "epoch": 0.09531051964512041, "grad_norm": 4.331963539123535, "learning_rate": 2.9942472197927886e-05, "loss": 0.19637107849121094, "step": 705 }, { "epoch": 0.09544571187156739, "grad_norm": 4.351712226867676, "learning_rate": 2.994187177779131e-05, "loss": 0.2784843444824219, "step": 706 }, { "epoch": 0.09558090409801437, "grad_norm": 3.0953354835510254, "learning_rate": 2.9941268246677353e-05, "loss": 0.21401596069335938, "step": 707 }, { "epoch": 0.09571609632446135, "grad_norm": 3.9907052516937256, "learning_rate": 2.9940661604711664e-05, "loss": 0.199676513671875, "step": 708 }, { "epoch": 0.09585128855090833, "grad_norm": 1.653907299041748, "learning_rate": 2.994005185202056e-05, "loss": 0.19843292236328125, "step": 709 }, { "epoch": 0.0959864807773553, "grad_norm": 1.2830501794815063, "learning_rate": 2.9939438988730986e-05, "loss": 0.20074748992919922, "step": 710 }, { "epoch": 0.09612167300380228, "grad_norm": 7.845682144165039, "learning_rate": 2.9938823014970553e-05, "loss": 0.25787353515625, "step": 711 }, { "epoch": 0.09625686523024926, "grad_norm": 7.704529762268066, "learning_rate": 2.99382039308675e-05, "loss": 0.21416091918945312, "step": 712 }, { "epoch": 0.09639205745669624, "grad_norm": 12.752790451049805, "learning_rate": 2.993758173655073e-05, "loss": 0.3880424499511719, "step": 713 }, { "epoch": 0.09652724968314322, "grad_norm": 3.0337939262390137, "learning_rate": 2.993695643214979e-05, "loss": 0.19612884521484375, "step": 714 }, { "epoch": 0.0966624419095902, "grad_norm": 3.417722463607788, "learning_rate": 2.9936328017794864e-05, "loss": 0.2630596160888672, "step": 715 }, { "epoch": 0.09679763413603718, "grad_norm": 4.16177225112915, "learning_rate": 2.9935696493616796e-05, "loss": 0.219146728515625, "step": 716 }, { "epoch": 0.09693282636248415, "grad_norm": 4.165134906768799, "learning_rate": 2.9935061859747065e-05, "loss": 0.18430137634277344, "step": 717 }, { "epoch": 0.09706801858893113, "grad_norm": 5.327160835266113, "learning_rate": 2.993442411631782e-05, "loss": 0.210662841796875, "step": 718 }, { "epoch": 0.09720321081537811, "grad_norm": 2.605717182159424, "learning_rate": 2.9933783263461827e-05, "loss": 0.24525070190429688, "step": 719 }, { "epoch": 0.09733840304182509, "grad_norm": 3.0118792057037354, "learning_rate": 2.9933139301312526e-05, "loss": 0.26732635498046875, "step": 720 }, { "epoch": 0.09747359526827207, "grad_norm": 2.1598966121673584, "learning_rate": 2.9932492230003984e-05, "loss": 0.21266746520996094, "step": 721 }, { "epoch": 0.09760878749471906, "grad_norm": 1.9449892044067383, "learning_rate": 2.993184204967094e-05, "loss": 0.17081451416015625, "step": 722 }, { "epoch": 0.09774397972116604, "grad_norm": 3.7190966606140137, "learning_rate": 2.9931188760448748e-05, "loss": 0.19688892364501953, "step": 723 }, { "epoch": 0.09787917194761302, "grad_norm": 3.5418851375579834, "learning_rate": 2.9930532362473433e-05, "loss": 0.20413970947265625, "step": 724 }, { "epoch": 0.09801436417406, "grad_norm": 1.8002017736434937, "learning_rate": 2.9929872855881663e-05, "loss": 0.19686412811279297, "step": 725 }, { "epoch": 0.09814955640050697, "grad_norm": 1.354059100151062, "learning_rate": 2.9929210240810744e-05, "loss": 0.23836898803710938, "step": 726 }, { "epoch": 0.09828474862695395, "grad_norm": 2.2063252925872803, "learning_rate": 2.9928544517398644e-05, "loss": 0.1832256317138672, "step": 727 }, { "epoch": 0.09841994085340093, "grad_norm": 2.802690267562866, "learning_rate": 2.9927875685783966e-05, "loss": 0.18004226684570312, "step": 728 }, { "epoch": 0.09855513307984791, "grad_norm": 1.7790663242340088, "learning_rate": 2.9927203746105968e-05, "loss": 0.2653388977050781, "step": 729 }, { "epoch": 0.09869032530629489, "grad_norm": 3.0444061756134033, "learning_rate": 2.9926528698504546e-05, "loss": 0.2430095672607422, "step": 730 }, { "epoch": 0.09882551753274187, "grad_norm": 1.303714632987976, "learning_rate": 2.992585054312025e-05, "loss": 0.23905372619628906, "step": 731 }, { "epoch": 0.09896070975918884, "grad_norm": 2.3533811569213867, "learning_rate": 2.9925169280094278e-05, "loss": 0.23558425903320312, "step": 732 }, { "epoch": 0.09909590198563582, "grad_norm": 1.462594985961914, "learning_rate": 2.9924484909568472e-05, "loss": 0.13215255737304688, "step": 733 }, { "epoch": 0.0992310942120828, "grad_norm": 2.914008140563965, "learning_rate": 2.9923797431685322e-05, "loss": 0.24617767333984375, "step": 734 }, { "epoch": 0.09936628643852978, "grad_norm": 2.204941511154175, "learning_rate": 2.992310684658796e-05, "loss": 0.1777210235595703, "step": 735 }, { "epoch": 0.09950147866497676, "grad_norm": 5.732989311218262, "learning_rate": 2.9922413154420173e-05, "loss": 0.23714733123779297, "step": 736 }, { "epoch": 0.09963667089142374, "grad_norm": 2.6614887714385986, "learning_rate": 2.9921716355326393e-05, "loss": 0.2075042724609375, "step": 737 }, { "epoch": 0.09977186311787073, "grad_norm": 5.0292487144470215, "learning_rate": 2.9921016449451695e-05, "loss": 0.21545982360839844, "step": 738 }, { "epoch": 0.09990705534431771, "grad_norm": 2.069394111633301, "learning_rate": 2.9920313436941805e-05, "loss": 0.2056560516357422, "step": 739 }, { "epoch": 0.10004224757076469, "grad_norm": 6.506106853485107, "learning_rate": 2.991960731794309e-05, "loss": 0.23987579345703125, "step": 740 }, { "epoch": 0.10017743979721166, "grad_norm": 5.627443790435791, "learning_rate": 2.991889809260257e-05, "loss": 0.22814083099365234, "step": 741 }, { "epoch": 0.10031263202365864, "grad_norm": 8.164318084716797, "learning_rate": 2.9918185761067912e-05, "loss": 0.2442607879638672, "step": 742 }, { "epoch": 0.10044782425010562, "grad_norm": 8.184733390808105, "learning_rate": 2.9917470323487423e-05, "loss": 0.2915458679199219, "step": 743 }, { "epoch": 0.1005830164765526, "grad_norm": 0.954133927822113, "learning_rate": 2.9916751780010063e-05, "loss": 0.17754173278808594, "step": 744 }, { "epoch": 0.10071820870299958, "grad_norm": 0.8303343057632446, "learning_rate": 2.9916030130785436e-05, "loss": 0.15448570251464844, "step": 745 }, { "epoch": 0.10085340092944656, "grad_norm": 4.113893985748291, "learning_rate": 2.99153053759638e-05, "loss": 0.181427001953125, "step": 746 }, { "epoch": 0.10098859315589354, "grad_norm": 3.069335699081421, "learning_rate": 2.991457751569604e-05, "loss": 0.2271251678466797, "step": 747 }, { "epoch": 0.10112378538234051, "grad_norm": 3.969558000564575, "learning_rate": 2.991384655013371e-05, "loss": 0.29486083984375, "step": 748 }, { "epoch": 0.10125897760878749, "grad_norm": 2.7881696224212646, "learning_rate": 2.9913112479429e-05, "loss": 0.2432541847229004, "step": 749 }, { "epoch": 0.10139416983523447, "grad_norm": 1.2278746366500854, "learning_rate": 2.991237530373474e-05, "loss": 0.25930023193359375, "step": 750 }, { "epoch": 0.10152936206168145, "grad_norm": 5.310372829437256, "learning_rate": 2.9911635023204423e-05, "loss": 0.2572669982910156, "step": 751 }, { "epoch": 0.10166455428812843, "grad_norm": 6.210998058319092, "learning_rate": 2.9910891637992172e-05, "loss": 0.22033119201660156, "step": 752 }, { "epoch": 0.1017997465145754, "grad_norm": 4.798659324645996, "learning_rate": 2.991014514825277e-05, "loss": 0.2593822479248047, "step": 753 }, { "epoch": 0.10193493874102239, "grad_norm": 2.6991829872131348, "learning_rate": 2.9909395554141638e-05, "loss": 0.20731163024902344, "step": 754 }, { "epoch": 0.10207013096746938, "grad_norm": 3.036484479904175, "learning_rate": 2.9908642855814844e-05, "loss": 0.24079513549804688, "step": 755 }, { "epoch": 0.10220532319391636, "grad_norm": 4.04808235168457, "learning_rate": 2.9907887053429107e-05, "loss": 0.19020462036132812, "step": 756 }, { "epoch": 0.10234051542036333, "grad_norm": 3.054243326187134, "learning_rate": 2.9907128147141783e-05, "loss": 0.17934560775756836, "step": 757 }, { "epoch": 0.10247570764681031, "grad_norm": 2.564101219177246, "learning_rate": 2.990636613711089e-05, "loss": 0.24249744415283203, "step": 758 }, { "epoch": 0.10261089987325729, "grad_norm": 1.6259689331054688, "learning_rate": 2.990560102349507e-05, "loss": 0.09309077262878418, "step": 759 }, { "epoch": 0.10274609209970427, "grad_norm": 4.681954383850098, "learning_rate": 2.9904832806453635e-05, "loss": 0.26602935791015625, "step": 760 }, { "epoch": 0.10288128432615125, "grad_norm": 2.3799095153808594, "learning_rate": 2.9904061486146524e-05, "loss": 0.2074737548828125, "step": 761 }, { "epoch": 0.10301647655259823, "grad_norm": 3.689250946044922, "learning_rate": 2.9903287062734333e-05, "loss": 0.2257223129272461, "step": 762 }, { "epoch": 0.1031516687790452, "grad_norm": 2.096745014190674, "learning_rate": 2.990250953637831e-05, "loss": 0.21210384368896484, "step": 763 }, { "epoch": 0.10328686100549218, "grad_norm": 2.1747965812683105, "learning_rate": 2.9901728907240326e-05, "loss": 0.23769092559814453, "step": 764 }, { "epoch": 0.10342205323193916, "grad_norm": 5.016852378845215, "learning_rate": 2.9900945175482916e-05, "loss": 0.1928386688232422, "step": 765 }, { "epoch": 0.10355724545838614, "grad_norm": 1.9081209897994995, "learning_rate": 2.990015834126926e-05, "loss": 0.24413681030273438, "step": 766 }, { "epoch": 0.10369243768483312, "grad_norm": 1.4171063899993896, "learning_rate": 2.989936840476318e-05, "loss": 0.19045639038085938, "step": 767 }, { "epoch": 0.1038276299112801, "grad_norm": 5.06385612487793, "learning_rate": 2.9898575366129145e-05, "loss": 0.23425865173339844, "step": 768 }, { "epoch": 0.10396282213772708, "grad_norm": 3.5191590785980225, "learning_rate": 2.9897779225532273e-05, "loss": 0.20087623596191406, "step": 769 }, { "epoch": 0.10409801436417405, "grad_norm": 1.4005638360977173, "learning_rate": 2.989697998313832e-05, "loss": 0.21179962158203125, "step": 770 }, { "epoch": 0.10423320659062103, "grad_norm": 3.2815475463867188, "learning_rate": 2.989617763911369e-05, "loss": 0.26393890380859375, "step": 771 }, { "epoch": 0.10436839881706803, "grad_norm": 1.8032615184783936, "learning_rate": 2.9895372193625442e-05, "loss": 0.20674514770507812, "step": 772 }, { "epoch": 0.104503591043515, "grad_norm": 3.285004138946533, "learning_rate": 2.9894563646841273e-05, "loss": 0.22967910766601562, "step": 773 }, { "epoch": 0.10463878326996198, "grad_norm": 2.8750803470611572, "learning_rate": 2.9893751998929523e-05, "loss": 0.2560882568359375, "step": 774 }, { "epoch": 0.10477397549640896, "grad_norm": 1.3432013988494873, "learning_rate": 2.9892937250059187e-05, "loss": 0.24227142333984375, "step": 775 }, { "epoch": 0.10490916772285594, "grad_norm": 1.9622797966003418, "learning_rate": 2.9892119400399894e-05, "loss": 0.2538471221923828, "step": 776 }, { "epoch": 0.10504435994930292, "grad_norm": 5.148944854736328, "learning_rate": 2.989129845012193e-05, "loss": 0.24422264099121094, "step": 777 }, { "epoch": 0.1051795521757499, "grad_norm": 1.8951119184494019, "learning_rate": 2.989047439939621e-05, "loss": 0.15408658981323242, "step": 778 }, { "epoch": 0.10531474440219687, "grad_norm": 1.4069232940673828, "learning_rate": 2.9889647248394324e-05, "loss": 0.2533149719238281, "step": 779 }, { "epoch": 0.10544993662864385, "grad_norm": 1.3385614156723022, "learning_rate": 2.9888816997288475e-05, "loss": 0.20326805114746094, "step": 780 }, { "epoch": 0.10558512885509083, "grad_norm": 1.6833949089050293, "learning_rate": 2.988798364625153e-05, "loss": 0.20999956130981445, "step": 781 }, { "epoch": 0.10572032108153781, "grad_norm": 4.966547012329102, "learning_rate": 2.9887147195457002e-05, "loss": 0.26210498809814453, "step": 782 }, { "epoch": 0.10585551330798479, "grad_norm": 1.4661803245544434, "learning_rate": 2.9886307645079037e-05, "loss": 0.2142810821533203, "step": 783 }, { "epoch": 0.10599070553443177, "grad_norm": 3.6143314838409424, "learning_rate": 2.9885464995292436e-05, "loss": 0.24684715270996094, "step": 784 }, { "epoch": 0.10612589776087875, "grad_norm": 2.127908945083618, "learning_rate": 2.9884619246272648e-05, "loss": 0.22627639770507812, "step": 785 }, { "epoch": 0.10626108998732572, "grad_norm": 5.926246643066406, "learning_rate": 2.988377039819575e-05, "loss": 0.2096424102783203, "step": 786 }, { "epoch": 0.1063962822137727, "grad_norm": 2.9677090644836426, "learning_rate": 2.9882918451238494e-05, "loss": 0.1527118682861328, "step": 787 }, { "epoch": 0.10653147444021968, "grad_norm": 3.0704970359802246, "learning_rate": 2.988206340557825e-05, "loss": 0.14369964599609375, "step": 788 }, { "epoch": 0.10666666666666667, "grad_norm": 1.7233011722564697, "learning_rate": 2.9881205261393037e-05, "loss": 0.2638359069824219, "step": 789 }, { "epoch": 0.10680185889311365, "grad_norm": 2.2960398197174072, "learning_rate": 2.988034401886154e-05, "loss": 0.23662757873535156, "step": 790 }, { "epoch": 0.10693705111956063, "grad_norm": 2.6003386974334717, "learning_rate": 2.9879479678163065e-05, "loss": 0.2786293029785156, "step": 791 }, { "epoch": 0.10707224334600761, "grad_norm": 2.2291908264160156, "learning_rate": 2.9878612239477568e-05, "loss": 0.20638656616210938, "step": 792 }, { "epoch": 0.10720743557245459, "grad_norm": 2.587191343307495, "learning_rate": 2.9877741702985666e-05, "loss": 0.22580814361572266, "step": 793 }, { "epoch": 0.10734262779890157, "grad_norm": 2.0171875953674316, "learning_rate": 2.98768680688686e-05, "loss": 0.2021503448486328, "step": 794 }, { "epoch": 0.10747782002534854, "grad_norm": 4.239964485168457, "learning_rate": 2.9875991337308274e-05, "loss": 0.2075824737548828, "step": 795 }, { "epoch": 0.10761301225179552, "grad_norm": 3.4658312797546387, "learning_rate": 2.987511150848722e-05, "loss": 0.16890716552734375, "step": 796 }, { "epoch": 0.1077482044782425, "grad_norm": 5.566956043243408, "learning_rate": 2.9874228582588627e-05, "loss": 0.2117938995361328, "step": 797 }, { "epoch": 0.10788339670468948, "grad_norm": 4.032079696655273, "learning_rate": 2.9873342559796325e-05, "loss": 0.2528724670410156, "step": 798 }, { "epoch": 0.10801858893113646, "grad_norm": 2.9652650356292725, "learning_rate": 2.9872453440294787e-05, "loss": 0.17624187469482422, "step": 799 }, { "epoch": 0.10815378115758344, "grad_norm": 2.2240042686462402, "learning_rate": 2.9871561224269134e-05, "loss": 0.2869300842285156, "step": 800 }, { "epoch": 0.10828897338403042, "grad_norm": 3.174017906188965, "learning_rate": 2.9870665911905127e-05, "loss": 0.18218994140625, "step": 801 }, { "epoch": 0.1084241656104774, "grad_norm": 3.9555299282073975, "learning_rate": 2.9869767503389176e-05, "loss": 0.21283626556396484, "step": 802 }, { "epoch": 0.10855935783692437, "grad_norm": 6.190732955932617, "learning_rate": 2.986886599890834e-05, "loss": 0.28665733337402344, "step": 803 }, { "epoch": 0.10869455006337135, "grad_norm": 5.123399257659912, "learning_rate": 2.9867961398650306e-05, "loss": 0.19631671905517578, "step": 804 }, { "epoch": 0.10882974228981833, "grad_norm": 0.9978769421577454, "learning_rate": 2.9867053702803425e-05, "loss": 0.1465005874633789, "step": 805 }, { "epoch": 0.10896493451626532, "grad_norm": 3.4984874725341797, "learning_rate": 2.9866142911556685e-05, "loss": 0.20914459228515625, "step": 806 }, { "epoch": 0.1091001267427123, "grad_norm": 3.1558218002319336, "learning_rate": 2.9865229025099713e-05, "loss": 0.20807170867919922, "step": 807 }, { "epoch": 0.10923531896915928, "grad_norm": 1.9393128156661987, "learning_rate": 2.986431204362279e-05, "loss": 0.16961383819580078, "step": 808 }, { "epoch": 0.10937051119560626, "grad_norm": 3.27996563911438, "learning_rate": 2.9863391967316835e-05, "loss": 0.19561386108398438, "step": 809 }, { "epoch": 0.10950570342205324, "grad_norm": 1.438079833984375, "learning_rate": 2.9862468796373404e-05, "loss": 0.2212977409362793, "step": 810 }, { "epoch": 0.10964089564850021, "grad_norm": 1.8021934032440186, "learning_rate": 2.9861542530984718e-05, "loss": 0.19769763946533203, "step": 811 }, { "epoch": 0.10977608787494719, "grad_norm": 1.8025696277618408, "learning_rate": 2.9860613171343624e-05, "loss": 0.2205047607421875, "step": 812 }, { "epoch": 0.10991128010139417, "grad_norm": 1.6591147184371948, "learning_rate": 2.9859680717643623e-05, "loss": 0.19939613342285156, "step": 813 }, { "epoch": 0.11004647232784115, "grad_norm": 1.3125712871551514, "learning_rate": 2.985874517007885e-05, "loss": 0.19457054138183594, "step": 814 }, { "epoch": 0.11018166455428813, "grad_norm": 1.421205997467041, "learning_rate": 2.98578065288441e-05, "loss": 0.1952829360961914, "step": 815 }, { "epoch": 0.1103168567807351, "grad_norm": 1.8531049489974976, "learning_rate": 2.9856864794134798e-05, "loss": 0.18225479125976562, "step": 816 }, { "epoch": 0.11045204900718208, "grad_norm": 1.8185570240020752, "learning_rate": 2.9855919966147025e-05, "loss": 0.21445465087890625, "step": 817 }, { "epoch": 0.11058724123362906, "grad_norm": 1.273156762123108, "learning_rate": 2.9854972045077485e-05, "loss": 0.26434326171875, "step": 818 }, { "epoch": 0.11072243346007604, "grad_norm": 1.9759249687194824, "learning_rate": 2.9854021031123555e-05, "loss": 0.21663475036621094, "step": 819 }, { "epoch": 0.11085762568652302, "grad_norm": 1.6428236961364746, "learning_rate": 2.9853066924483232e-05, "loss": 0.20756149291992188, "step": 820 }, { "epoch": 0.11099281791297, "grad_norm": 2.1191680431365967, "learning_rate": 2.9852109725355173e-05, "loss": 0.13865947723388672, "step": 821 }, { "epoch": 0.11112801013941698, "grad_norm": 1.5840522050857544, "learning_rate": 2.9851149433938662e-05, "loss": 0.15669631958007812, "step": 822 }, { "epoch": 0.11126320236586397, "grad_norm": 1.2050979137420654, "learning_rate": 2.9850186050433645e-05, "loss": 0.1946253776550293, "step": 823 }, { "epoch": 0.11139839459231095, "grad_norm": 2.593139171600342, "learning_rate": 2.9849219575040708e-05, "loss": 0.18323898315429688, "step": 824 }, { "epoch": 0.11153358681875793, "grad_norm": 3.8891239166259766, "learning_rate": 2.984825000796106e-05, "loss": 0.17827606201171875, "step": 825 }, { "epoch": 0.1116687790452049, "grad_norm": 1.5008375644683838, "learning_rate": 2.9847277349396586e-05, "loss": 0.2430572509765625, "step": 826 }, { "epoch": 0.11180397127165188, "grad_norm": 1.4743549823760986, "learning_rate": 2.984630159954979e-05, "loss": 0.2042236328125, "step": 827 }, { "epoch": 0.11193916349809886, "grad_norm": 2.3984475135803223, "learning_rate": 2.9845322758623833e-05, "loss": 0.3000831604003906, "step": 828 }, { "epoch": 0.11207435572454584, "grad_norm": 3.811556100845337, "learning_rate": 2.984434082682251e-05, "loss": 0.26007080078125, "step": 829 }, { "epoch": 0.11220954795099282, "grad_norm": 3.7559499740600586, "learning_rate": 2.984335580435027e-05, "loss": 0.22284698486328125, "step": 830 }, { "epoch": 0.1123447401774398, "grad_norm": 5.927177429199219, "learning_rate": 2.9842367691412192e-05, "loss": 0.18510055541992188, "step": 831 }, { "epoch": 0.11247993240388678, "grad_norm": 2.3276305198669434, "learning_rate": 2.9841376488214015e-05, "loss": 0.2044525146484375, "step": 832 }, { "epoch": 0.11261512463033375, "grad_norm": 2.2319157123565674, "learning_rate": 2.984038219496211e-05, "loss": 0.27033138275146484, "step": 833 }, { "epoch": 0.11275031685678073, "grad_norm": 3.4616312980651855, "learning_rate": 2.9839384811863493e-05, "loss": 0.20784759521484375, "step": 834 }, { "epoch": 0.11288550908322771, "grad_norm": 3.4167797565460205, "learning_rate": 2.9838384339125824e-05, "loss": 0.16777801513671875, "step": 835 }, { "epoch": 0.11302070130967469, "grad_norm": 4.379949569702148, "learning_rate": 2.9837380776957405e-05, "loss": 0.2557048797607422, "step": 836 }, { "epoch": 0.11315589353612167, "grad_norm": 2.310194492340088, "learning_rate": 2.9836374125567193e-05, "loss": 0.22476959228515625, "step": 837 }, { "epoch": 0.11329108576256865, "grad_norm": 3.056804656982422, "learning_rate": 2.9835364385164764e-05, "loss": 0.2047567367553711, "step": 838 }, { "epoch": 0.11342627798901563, "grad_norm": 4.926165580749512, "learning_rate": 2.983435155596036e-05, "loss": 0.2521171569824219, "step": 839 }, { "epoch": 0.11356147021546262, "grad_norm": 4.483332633972168, "learning_rate": 2.9833335638164858e-05, "loss": 0.2576662302017212, "step": 840 }, { "epoch": 0.1136966624419096, "grad_norm": 2.9521732330322266, "learning_rate": 2.9832316631989774e-05, "loss": 0.2041778564453125, "step": 841 }, { "epoch": 0.11383185466835657, "grad_norm": 2.077770233154297, "learning_rate": 2.9831294537647272e-05, "loss": 0.2508983612060547, "step": 842 }, { "epoch": 0.11396704689480355, "grad_norm": 1.4439202547073364, "learning_rate": 2.9830269355350155e-05, "loss": 0.22361373901367188, "step": 843 }, { "epoch": 0.11410223912125053, "grad_norm": 1.345982551574707, "learning_rate": 2.9829241085311872e-05, "loss": 0.18943023681640625, "step": 844 }, { "epoch": 0.11423743134769751, "grad_norm": 3.3798348903656006, "learning_rate": 2.9828209727746522e-05, "loss": 0.23786544799804688, "step": 845 }, { "epoch": 0.11437262357414449, "grad_norm": 3.047659397125244, "learning_rate": 2.982717528286883e-05, "loss": 0.23119735717773438, "step": 846 }, { "epoch": 0.11450781580059147, "grad_norm": 5.75372314453125, "learning_rate": 2.9826137750894176e-05, "loss": 0.2439889907836914, "step": 847 }, { "epoch": 0.11464300802703845, "grad_norm": 1.329908847808838, "learning_rate": 2.9825097132038578e-05, "loss": 0.20257186889648438, "step": 848 }, { "epoch": 0.11477820025348542, "grad_norm": 2.1224234104156494, "learning_rate": 2.9824053426518703e-05, "loss": 0.22783279418945312, "step": 849 }, { "epoch": 0.1149133924799324, "grad_norm": 2.4482314586639404, "learning_rate": 2.9823006634551848e-05, "loss": 0.22953224182128906, "step": 850 }, { "epoch": 0.11504858470637938, "grad_norm": 1.3808059692382812, "learning_rate": 2.9821956756355973e-05, "loss": 0.20325851440429688, "step": 851 }, { "epoch": 0.11518377693282636, "grad_norm": 1.2937262058258057, "learning_rate": 2.9820903792149653e-05, "loss": 0.1532764434814453, "step": 852 }, { "epoch": 0.11531896915927334, "grad_norm": 1.2671122550964355, "learning_rate": 2.981984774215214e-05, "loss": 0.17059707641601562, "step": 853 }, { "epoch": 0.11545416138572032, "grad_norm": 1.4780408143997192, "learning_rate": 2.9818788606583286e-05, "loss": 0.21390724182128906, "step": 854 }, { "epoch": 0.1155893536121673, "grad_norm": 3.9047470092773438, "learning_rate": 2.9817726385663627e-05, "loss": 0.24678611755371094, "step": 855 }, { "epoch": 0.11572454583861427, "grad_norm": 1.4960459470748901, "learning_rate": 2.9816661079614316e-05, "loss": 0.18773746490478516, "step": 856 }, { "epoch": 0.11585973806506127, "grad_norm": 2.1197240352630615, "learning_rate": 2.9815592688657154e-05, "loss": 0.21656036376953125, "step": 857 }, { "epoch": 0.11599493029150824, "grad_norm": 4.272851467132568, "learning_rate": 2.9814521213014588e-05, "loss": 0.21514129638671875, "step": 858 }, { "epoch": 0.11613012251795522, "grad_norm": 2.013625383377075, "learning_rate": 2.9813446652909707e-05, "loss": 0.2300581932067871, "step": 859 }, { "epoch": 0.1162653147444022, "grad_norm": 2.0729455947875977, "learning_rate": 2.981236900856624e-05, "loss": 0.21991348266601562, "step": 860 }, { "epoch": 0.11640050697084918, "grad_norm": 3.9367828369140625, "learning_rate": 2.9811288280208552e-05, "loss": 0.20949935913085938, "step": 861 }, { "epoch": 0.11653569919729616, "grad_norm": 5.059543609619141, "learning_rate": 2.9810204468061664e-05, "loss": 0.27564048767089844, "step": 862 }, { "epoch": 0.11667089142374314, "grad_norm": 2.236551284790039, "learning_rate": 2.9809117572351223e-05, "loss": 0.18333816528320312, "step": 863 }, { "epoch": 0.11680608365019012, "grad_norm": 2.1935770511627197, "learning_rate": 2.9808027593303537e-05, "loss": 0.18828582763671875, "step": 864 }, { "epoch": 0.1169412758766371, "grad_norm": 2.681454658508301, "learning_rate": 2.980693453114554e-05, "loss": 0.2298579216003418, "step": 865 }, { "epoch": 0.11707646810308407, "grad_norm": 4.517125606536865, "learning_rate": 2.980583838610481e-05, "loss": 0.2062397003173828, "step": 866 }, { "epoch": 0.11721166032953105, "grad_norm": 2.519273519515991, "learning_rate": 2.980473915840957e-05, "loss": 0.20514249801635742, "step": 867 }, { "epoch": 0.11734685255597803, "grad_norm": 3.7285878658294678, "learning_rate": 2.9803636848288696e-05, "loss": 0.18355274200439453, "step": 868 }, { "epoch": 0.11748204478242501, "grad_norm": 1.79351007938385, "learning_rate": 2.9802531455971686e-05, "loss": 0.20285606384277344, "step": 869 }, { "epoch": 0.11761723700887199, "grad_norm": 3.005331039428711, "learning_rate": 2.980142298168869e-05, "loss": 0.23609542846679688, "step": 870 }, { "epoch": 0.11775242923531896, "grad_norm": 4.313886642456055, "learning_rate": 2.9800311425670495e-05, "loss": 0.2367558479309082, "step": 871 }, { "epoch": 0.11788762146176594, "grad_norm": 1.496138572692871, "learning_rate": 2.9799196788148538e-05, "loss": 0.19964981079101562, "step": 872 }, { "epoch": 0.11802281368821292, "grad_norm": 4.341944217681885, "learning_rate": 2.9798079069354893e-05, "loss": 0.2216787338256836, "step": 873 }, { "epoch": 0.11815800591465991, "grad_norm": 4.757167339324951, "learning_rate": 2.9796958269522273e-05, "loss": 0.1971874237060547, "step": 874 }, { "epoch": 0.11829319814110689, "grad_norm": 2.4889369010925293, "learning_rate": 2.9795834388884034e-05, "loss": 0.1622314453125, "step": 875 }, { "epoch": 0.11842839036755387, "grad_norm": 2.677736759185791, "learning_rate": 2.979470742767417e-05, "loss": 0.2699871063232422, "step": 876 }, { "epoch": 0.11856358259400085, "grad_norm": 3.8931052684783936, "learning_rate": 2.9793577386127327e-05, "loss": 0.19470500946044922, "step": 877 }, { "epoch": 0.11869877482044783, "grad_norm": 2.8569531440734863, "learning_rate": 2.9792444264478784e-05, "loss": 0.18169474601745605, "step": 878 }, { "epoch": 0.1188339670468948, "grad_norm": 5.615286350250244, "learning_rate": 2.979130806296446e-05, "loss": 0.18413448333740234, "step": 879 }, { "epoch": 0.11896915927334178, "grad_norm": 3.743774652481079, "learning_rate": 2.9790168781820925e-05, "loss": 0.23986244201660156, "step": 880 }, { "epoch": 0.11910435149978876, "grad_norm": 3.260669469833374, "learning_rate": 2.9789026421285375e-05, "loss": 0.21744728088378906, "step": 881 }, { "epoch": 0.11923954372623574, "grad_norm": 1.1249809265136719, "learning_rate": 2.9787880981595663e-05, "loss": 0.11754035949707031, "step": 882 }, { "epoch": 0.11937473595268272, "grad_norm": 3.258342981338501, "learning_rate": 2.9786732462990267e-05, "loss": 0.17868423461914062, "step": 883 }, { "epoch": 0.1195099281791297, "grad_norm": 1.321682095527649, "learning_rate": 2.9785580865708323e-05, "loss": 0.24796104431152344, "step": 884 }, { "epoch": 0.11964512040557668, "grad_norm": 1.6667258739471436, "learning_rate": 2.97844261899896e-05, "loss": 0.2400188446044922, "step": 885 }, { "epoch": 0.11978031263202366, "grad_norm": 3.6601486206054688, "learning_rate": 2.9783268436074495e-05, "loss": 0.2026195526123047, "step": 886 }, { "epoch": 0.11991550485847063, "grad_norm": 2.875750780105591, "learning_rate": 2.978210760420407e-05, "loss": 0.24673175811767578, "step": 887 }, { "epoch": 0.12005069708491761, "grad_norm": 2.139686107635498, "learning_rate": 2.978094369462002e-05, "loss": 0.17135417461395264, "step": 888 }, { "epoch": 0.12018588931136459, "grad_norm": 2.065563440322876, "learning_rate": 2.977977670756467e-05, "loss": 0.2228260040283203, "step": 889 }, { "epoch": 0.12032108153781157, "grad_norm": 5.263899803161621, "learning_rate": 2.9778606643280987e-05, "loss": 0.1907176971435547, "step": 890 }, { "epoch": 0.12045627376425856, "grad_norm": 4.728724956512451, "learning_rate": 2.97774335020126e-05, "loss": 0.21061134338378906, "step": 891 }, { "epoch": 0.12059146599070554, "grad_norm": 2.2337210178375244, "learning_rate": 2.9776257284003748e-05, "loss": 0.21288299560546875, "step": 892 }, { "epoch": 0.12072665821715252, "grad_norm": 2.4238178730010986, "learning_rate": 2.9775077989499338e-05, "loss": 0.22940444946289062, "step": 893 }, { "epoch": 0.1208618504435995, "grad_norm": 2.355374336242676, "learning_rate": 2.97738956187449e-05, "loss": 0.1994647979736328, "step": 894 }, { "epoch": 0.12099704267004648, "grad_norm": 1.5948034524917603, "learning_rate": 2.9772710171986605e-05, "loss": 0.19122695922851562, "step": 895 }, { "epoch": 0.12113223489649345, "grad_norm": 1.9716911315917969, "learning_rate": 2.977152164947128e-05, "loss": 0.22398757934570312, "step": 896 }, { "epoch": 0.12126742712294043, "grad_norm": 5.044217586517334, "learning_rate": 2.9770330051446373e-05, "loss": 0.2217998504638672, "step": 897 }, { "epoch": 0.12140261934938741, "grad_norm": 3.002575635910034, "learning_rate": 2.976913537815999e-05, "loss": 0.2006692886352539, "step": 898 }, { "epoch": 0.12153781157583439, "grad_norm": 0.9052819609642029, "learning_rate": 2.9767937629860853e-05, "loss": 0.21210289001464844, "step": 899 }, { "epoch": 0.12167300380228137, "grad_norm": 1.0156062841415405, "learning_rate": 2.9766736806798353e-05, "loss": 0.20656299591064453, "step": 900 }, { "epoch": 0.12180819602872835, "grad_norm": 2.6435019969940186, "learning_rate": 2.9765532909222512e-05, "loss": 0.23062872886657715, "step": 901 }, { "epoch": 0.12194338825517533, "grad_norm": 2.5739858150482178, "learning_rate": 2.976432593738397e-05, "loss": 0.17801666259765625, "step": 902 }, { "epoch": 0.1220785804816223, "grad_norm": 0.9694176316261292, "learning_rate": 2.9763115891534036e-05, "loss": 0.20056533813476562, "step": 903 }, { "epoch": 0.12221377270806928, "grad_norm": 4.098574161529541, "learning_rate": 2.9761902771924648e-05, "loss": 0.22260218858718872, "step": 904 }, { "epoch": 0.12234896493451626, "grad_norm": 3.375361204147339, "learning_rate": 2.9760686578808387e-05, "loss": 0.20522403717041016, "step": 905 }, { "epoch": 0.12248415716096324, "grad_norm": 2.8765158653259277, "learning_rate": 2.9759467312438462e-05, "loss": 0.22367095947265625, "step": 906 }, { "epoch": 0.12261934938741022, "grad_norm": 2.7609915733337402, "learning_rate": 2.975824497306874e-05, "loss": 0.24929046630859375, "step": 907 }, { "epoch": 0.12275454161385721, "grad_norm": 1.7472270727157593, "learning_rate": 2.9757019560953707e-05, "loss": 0.1735515594482422, "step": 908 }, { "epoch": 0.12288973384030419, "grad_norm": 1.9093526601791382, "learning_rate": 2.9755791076348517e-05, "loss": 0.2290334701538086, "step": 909 }, { "epoch": 0.12302492606675117, "grad_norm": 3.3614683151245117, "learning_rate": 2.9754559519508924e-05, "loss": 0.1747417449951172, "step": 910 }, { "epoch": 0.12316011829319815, "grad_norm": 1.4478107690811157, "learning_rate": 2.975332489069137e-05, "loss": 0.1736927032470703, "step": 911 }, { "epoch": 0.12329531051964512, "grad_norm": 2.3478851318359375, "learning_rate": 2.9752087190152893e-05, "loss": 0.26886940002441406, "step": 912 }, { "epoch": 0.1234305027460921, "grad_norm": 5.618727207183838, "learning_rate": 2.97508464181512e-05, "loss": 0.2665853500366211, "step": 913 }, { "epoch": 0.12356569497253908, "grad_norm": 2.92346453666687, "learning_rate": 2.9749602574944615e-05, "loss": 0.20128726959228516, "step": 914 }, { "epoch": 0.12370088719898606, "grad_norm": 2.4036526679992676, "learning_rate": 2.9748355660792125e-05, "loss": 0.185821533203125, "step": 915 }, { "epoch": 0.12383607942543304, "grad_norm": 3.120410442352295, "learning_rate": 2.9747105675953338e-05, "loss": 0.28485107421875, "step": 916 }, { "epoch": 0.12397127165188002, "grad_norm": 1.1527925729751587, "learning_rate": 2.9745852620688506e-05, "loss": 0.1913928985595703, "step": 917 }, { "epoch": 0.124106463878327, "grad_norm": 2.3701677322387695, "learning_rate": 2.974459649525853e-05, "loss": 0.3086204528808594, "step": 918 }, { "epoch": 0.12424165610477397, "grad_norm": 1.7501299381256104, "learning_rate": 2.9743337299924925e-05, "loss": 0.21162652969360352, "step": 919 }, { "epoch": 0.12437684833122095, "grad_norm": 1.5002126693725586, "learning_rate": 2.9742075034949883e-05, "loss": 0.2451343536376953, "step": 920 }, { "epoch": 0.12451204055766793, "grad_norm": 1.4094550609588623, "learning_rate": 2.97408097005962e-05, "loss": 0.20383453369140625, "step": 921 }, { "epoch": 0.12464723278411491, "grad_norm": 2.4477241039276123, "learning_rate": 2.973954129712733e-05, "loss": 0.251678466796875, "step": 922 }, { "epoch": 0.12478242501056189, "grad_norm": 1.790040135383606, "learning_rate": 2.973826982480736e-05, "loss": 0.2011585235595703, "step": 923 }, { "epoch": 0.12491761723700887, "grad_norm": 1.7725608348846436, "learning_rate": 2.9736995283901022e-05, "loss": 0.21489334106445312, "step": 924 }, { "epoch": 0.12505280946345584, "grad_norm": 1.7747427225112915, "learning_rate": 2.9735717674673676e-05, "loss": 0.25714111328125, "step": 925 }, { "epoch": 0.12518800168990282, "grad_norm": 2.6696882247924805, "learning_rate": 2.973443699739133e-05, "loss": 0.2600440979003906, "step": 926 }, { "epoch": 0.1253231939163498, "grad_norm": 3.341881036758423, "learning_rate": 2.973315325232063e-05, "loss": 0.19092655181884766, "step": 927 }, { "epoch": 0.12545838614279678, "grad_norm": 3.200180768966675, "learning_rate": 2.9731866439728853e-05, "loss": 0.17973732948303223, "step": 928 }, { "epoch": 0.12559357836924376, "grad_norm": 2.652151584625244, "learning_rate": 2.9730576559883924e-05, "loss": 0.1949176788330078, "step": 929 }, { "epoch": 0.12572877059569074, "grad_norm": 2.449274778366089, "learning_rate": 2.97292836130544e-05, "loss": 0.2210845947265625, "step": 930 }, { "epoch": 0.12586396282213771, "grad_norm": 0.970920741558075, "learning_rate": 2.9727987599509485e-05, "loss": 0.17169761657714844, "step": 931 }, { "epoch": 0.1259991550485847, "grad_norm": 0.8495640754699707, "learning_rate": 2.972668851951901e-05, "loss": 0.17377853393554688, "step": 932 }, { "epoch": 0.12613434727503167, "grad_norm": 2.3643150329589844, "learning_rate": 2.9725386373353455e-05, "loss": 0.2543010711669922, "step": 933 }, { "epoch": 0.12626953950147868, "grad_norm": 1.8319456577301025, "learning_rate": 2.972408116128393e-05, "loss": 0.15818405151367188, "step": 934 }, { "epoch": 0.12640473172792566, "grad_norm": 2.539389133453369, "learning_rate": 2.972277288358219e-05, "loss": 0.18526840209960938, "step": 935 }, { "epoch": 0.12653992395437264, "grad_norm": 3.0631022453308105, "learning_rate": 2.9721461540520628e-05, "loss": 0.26531219482421875, "step": 936 }, { "epoch": 0.1266751161808196, "grad_norm": 1.86379075050354, "learning_rate": 2.9720147132372265e-05, "loss": 0.2439422607421875, "step": 937 }, { "epoch": 0.1268103084072666, "grad_norm": 1.9019217491149902, "learning_rate": 2.9718829659410772e-05, "loss": 0.18474483489990234, "step": 938 }, { "epoch": 0.12694550063371357, "grad_norm": 1.4551502466201782, "learning_rate": 2.9717509121910453e-05, "loss": 0.23337364196777344, "step": 939 }, { "epoch": 0.12708069286016055, "grad_norm": 1.7903971672058105, "learning_rate": 2.971618552014625e-05, "loss": 0.21827220916748047, "step": 940 }, { "epoch": 0.12721588508660753, "grad_norm": 2.658393144607544, "learning_rate": 2.971485885439375e-05, "loss": 0.13556480407714844, "step": 941 }, { "epoch": 0.1273510773130545, "grad_norm": 3.486389398574829, "learning_rate": 2.9713529124929163e-05, "loss": 0.1954631805419922, "step": 942 }, { "epoch": 0.12748626953950148, "grad_norm": 1.5871957540512085, "learning_rate": 2.9712196332029352e-05, "loss": 0.16504263877868652, "step": 943 }, { "epoch": 0.12762146176594846, "grad_norm": 1.9027631282806396, "learning_rate": 2.971086047597181e-05, "loss": 0.23107624053955078, "step": 944 }, { "epoch": 0.12775665399239544, "grad_norm": 1.362157940864563, "learning_rate": 2.9709521557034668e-05, "loss": 0.2621803283691406, "step": 945 }, { "epoch": 0.12789184621884242, "grad_norm": 0.897609531879425, "learning_rate": 2.9708179575496696e-05, "loss": 0.1943206787109375, "step": 946 }, { "epoch": 0.1280270384452894, "grad_norm": 4.036199569702148, "learning_rate": 2.9706834531637303e-05, "loss": 0.18705272674560547, "step": 947 }, { "epoch": 0.12816223067173638, "grad_norm": 1.63685142993927, "learning_rate": 2.9705486425736537e-05, "loss": 0.1947479248046875, "step": 948 }, { "epoch": 0.12829742289818336, "grad_norm": 1.523179531097412, "learning_rate": 2.9704135258075077e-05, "loss": 0.16611099243164062, "step": 949 }, { "epoch": 0.12843261512463033, "grad_norm": 2.705198049545288, "learning_rate": 2.970278102893424e-05, "loss": 0.20716285705566406, "step": 950 }, { "epoch": 0.1285678073510773, "grad_norm": 2.0752038955688477, "learning_rate": 2.9701423738595992e-05, "loss": 0.1510772705078125, "step": 951 }, { "epoch": 0.1287029995775243, "grad_norm": 6.194035530090332, "learning_rate": 2.9700063387342925e-05, "loss": 0.2208089828491211, "step": 952 }, { "epoch": 0.12883819180397127, "grad_norm": 4.746684551239014, "learning_rate": 2.969869997545827e-05, "loss": 0.177886962890625, "step": 953 }, { "epoch": 0.12897338403041825, "grad_norm": 2.50292706489563, "learning_rate": 2.9697333503225897e-05, "loss": 0.22397232055664062, "step": 954 }, { "epoch": 0.12910857625686523, "grad_norm": 1.5793941020965576, "learning_rate": 2.969596397093031e-05, "loss": 0.22097396850585938, "step": 955 }, { "epoch": 0.1292437684833122, "grad_norm": 6.30570650100708, "learning_rate": 2.969459137885666e-05, "loss": 0.31198978424072266, "step": 956 }, { "epoch": 0.12937896070975918, "grad_norm": 2.05967378616333, "learning_rate": 2.969321572729072e-05, "loss": 0.1669597625732422, "step": 957 }, { "epoch": 0.12951415293620616, "grad_norm": 3.2239811420440674, "learning_rate": 2.9691837016518915e-05, "loss": 0.19611406326293945, "step": 958 }, { "epoch": 0.12964934516265314, "grad_norm": 3.287376642227173, "learning_rate": 2.9690455246828294e-05, "loss": 0.17228317260742188, "step": 959 }, { "epoch": 0.12978453738910012, "grad_norm": 3.6492111682891846, "learning_rate": 2.968907041850655e-05, "loss": 0.23149681091308594, "step": 960 }, { "epoch": 0.1299197296155471, "grad_norm": 1.3766086101531982, "learning_rate": 2.968768253184202e-05, "loss": 0.17496681213378906, "step": 961 }, { "epoch": 0.13005492184199408, "grad_norm": 3.6908836364746094, "learning_rate": 2.9686291587123655e-05, "loss": 0.23662948608398438, "step": 962 }, { "epoch": 0.13019011406844105, "grad_norm": 1.9154052734375, "learning_rate": 2.968489758464107e-05, "loss": 0.2047710418701172, "step": 963 }, { "epoch": 0.13032530629488803, "grad_norm": 1.989184021949768, "learning_rate": 2.9683500524684494e-05, "loss": 0.2490081787109375, "step": 964 }, { "epoch": 0.130460498521335, "grad_norm": 1.355475664138794, "learning_rate": 2.9682100407544812e-05, "loss": 0.17270660400390625, "step": 965 }, { "epoch": 0.130595690747782, "grad_norm": 4.148411273956299, "learning_rate": 2.9680697233513526e-05, "loss": 0.20015335083007812, "step": 966 }, { "epoch": 0.13073088297422897, "grad_norm": 1.6648255586624146, "learning_rate": 2.9679291002882793e-05, "loss": 0.19969749450683594, "step": 967 }, { "epoch": 0.13086607520067597, "grad_norm": 4.041023254394531, "learning_rate": 2.967788171594539e-05, "loss": 0.17649555206298828, "step": 968 }, { "epoch": 0.13100126742712295, "grad_norm": 3.6388397216796875, "learning_rate": 2.967646937299474e-05, "loss": 0.187774658203125, "step": 969 }, { "epoch": 0.13113645965356993, "grad_norm": 1.9017889499664307, "learning_rate": 2.9675053974324907e-05, "loss": 0.2440328598022461, "step": 970 }, { "epoch": 0.1312716518800169, "grad_norm": 1.7631199359893799, "learning_rate": 2.9673635520230576e-05, "loss": 0.20721054077148438, "step": 971 }, { "epoch": 0.1314068441064639, "grad_norm": 2.2709553241729736, "learning_rate": 2.9672214011007087e-05, "loss": 0.23993682861328125, "step": 972 }, { "epoch": 0.13154203633291087, "grad_norm": 1.8223376274108887, "learning_rate": 2.9670789446950396e-05, "loss": 0.2679557800292969, "step": 973 }, { "epoch": 0.13167722855935785, "grad_norm": 2.779097318649292, "learning_rate": 2.9669361828357105e-05, "loss": 0.20444297790527344, "step": 974 }, { "epoch": 0.13181242078580482, "grad_norm": 2.168428421020508, "learning_rate": 2.9667931155524454e-05, "loss": 0.1747760772705078, "step": 975 }, { "epoch": 0.1319476130122518, "grad_norm": 3.1765952110290527, "learning_rate": 2.966649742875032e-05, "loss": 0.20195388793945312, "step": 976 }, { "epoch": 0.13208280523869878, "grad_norm": 2.4435763359069824, "learning_rate": 2.9665060648333206e-05, "loss": 0.1827259063720703, "step": 977 }, { "epoch": 0.13221799746514576, "grad_norm": 1.147352933883667, "learning_rate": 2.9663620814572266e-05, "loss": 0.1866617202758789, "step": 978 }, { "epoch": 0.13235318969159274, "grad_norm": 0.8455138206481934, "learning_rate": 2.966217792776728e-05, "loss": 0.18280696868896484, "step": 979 }, { "epoch": 0.13248838191803972, "grad_norm": 1.369072437286377, "learning_rate": 2.9660731988218652e-05, "loss": 0.14558029174804688, "step": 980 }, { "epoch": 0.1326235741444867, "grad_norm": 2.3237037658691406, "learning_rate": 2.965928299622745e-05, "loss": 0.15476417541503906, "step": 981 }, { "epoch": 0.13275876637093367, "grad_norm": 1.4892674684524536, "learning_rate": 2.965783095209535e-05, "loss": 0.2066631317138672, "step": 982 }, { "epoch": 0.13289395859738065, "grad_norm": 1.9498488903045654, "learning_rate": 2.965637585612469e-05, "loss": 0.25786781311035156, "step": 983 }, { "epoch": 0.13302915082382763, "grad_norm": 3.2511940002441406, "learning_rate": 2.965491770861841e-05, "loss": 0.24323606491088867, "step": 984 }, { "epoch": 0.1331643430502746, "grad_norm": 4.255857467651367, "learning_rate": 2.965345650988012e-05, "loss": 0.1783914566040039, "step": 985 }, { "epoch": 0.1332995352767216, "grad_norm": 2.5358378887176514, "learning_rate": 2.9651992260214035e-05, "loss": 0.18294262886047363, "step": 986 }, { "epoch": 0.13343472750316857, "grad_norm": 2.9594359397888184, "learning_rate": 2.9650524959925037e-05, "loss": 0.22311067581176758, "step": 987 }, { "epoch": 0.13356991972961554, "grad_norm": 3.5358283519744873, "learning_rate": 2.9649054609318607e-05, "loss": 0.2275458574295044, "step": 988 }, { "epoch": 0.13370511195606252, "grad_norm": 1.8538273572921753, "learning_rate": 2.9647581208700894e-05, "loss": 0.26154327392578125, "step": 989 }, { "epoch": 0.1338403041825095, "grad_norm": 2.364600419998169, "learning_rate": 2.9646104758378666e-05, "loss": 0.20736408233642578, "step": 990 }, { "epoch": 0.13397549640895648, "grad_norm": 4.50392484664917, "learning_rate": 2.964462525865932e-05, "loss": 0.2237110137939453, "step": 991 }, { "epoch": 0.13411068863540346, "grad_norm": 2.281534194946289, "learning_rate": 2.96431427098509e-05, "loss": 0.1638660430908203, "step": 992 }, { "epoch": 0.13424588086185044, "grad_norm": 1.7651290893554688, "learning_rate": 2.9641657112262084e-05, "loss": 0.2558937072753906, "step": 993 }, { "epoch": 0.13438107308829741, "grad_norm": 1.1067181825637817, "learning_rate": 2.9640168466202174e-05, "loss": 0.22565078735351562, "step": 994 }, { "epoch": 0.1345162653147444, "grad_norm": 1.6819090843200684, "learning_rate": 2.9638676771981124e-05, "loss": 0.14623260498046875, "step": 995 }, { "epoch": 0.13465145754119137, "grad_norm": 3.1180827617645264, "learning_rate": 2.9637182029909508e-05, "loss": 0.20511770248413086, "step": 996 }, { "epoch": 0.13478664976763835, "grad_norm": 1.6389938592910767, "learning_rate": 2.9635684240298532e-05, "loss": 0.2666358947753906, "step": 997 }, { "epoch": 0.13492184199408533, "grad_norm": 0.9389215707778931, "learning_rate": 2.9634183403460053e-05, "loss": 0.18558883666992188, "step": 998 }, { "epoch": 0.1350570342205323, "grad_norm": 3.4492907524108887, "learning_rate": 2.9632679519706553e-05, "loss": 0.1696944236755371, "step": 999 }, { "epoch": 0.13519222644697929, "grad_norm": 1.490695834159851, "learning_rate": 2.9631172589351137e-05, "loss": 0.24640274047851562, "step": 1000 }, { "epoch": 0.1353274186734263, "grad_norm": 2.623732089996338, "learning_rate": 2.962966261270758e-05, "loss": 0.25098419189453125, "step": 1001 }, { "epoch": 0.13546261089987327, "grad_norm": 1.315132737159729, "learning_rate": 2.962814959009024e-05, "loss": 0.14512348175048828, "step": 1002 }, { "epoch": 0.13559780312632025, "grad_norm": 3.913986921310425, "learning_rate": 2.962663352181415e-05, "loss": 0.21914100646972656, "step": 1003 }, { "epoch": 0.13573299535276723, "grad_norm": 4.037308216094971, "learning_rate": 2.9625114408194966e-05, "loss": 0.2380237579345703, "step": 1004 }, { "epoch": 0.1358681875792142, "grad_norm": 1.155958890914917, "learning_rate": 2.962359224954897e-05, "loss": 0.15960216522216797, "step": 1005 }, { "epoch": 0.13600337980566118, "grad_norm": 2.270986557006836, "learning_rate": 2.9622067046193086e-05, "loss": 0.21088409423828125, "step": 1006 }, { "epoch": 0.13613857203210816, "grad_norm": 6.729562759399414, "learning_rate": 2.9620538798444867e-05, "loss": 0.2117156982421875, "step": 1007 }, { "epoch": 0.13627376425855514, "grad_norm": 6.297460079193115, "learning_rate": 2.9619007506622506e-05, "loss": 0.28343963623046875, "step": 1008 }, { "epoch": 0.13640895648500212, "grad_norm": 4.332822799682617, "learning_rate": 2.961747317104482e-05, "loss": 0.17863941192626953, "step": 1009 }, { "epoch": 0.1365441487114491, "grad_norm": 2.627390146255493, "learning_rate": 2.9615935792031274e-05, "loss": 0.1876964569091797, "step": 1010 }, { "epoch": 0.13667934093789608, "grad_norm": 4.272457599639893, "learning_rate": 2.9614395369901953e-05, "loss": 0.23885726928710938, "step": 1011 }, { "epoch": 0.13681453316434306, "grad_norm": 3.464097261428833, "learning_rate": 2.9612851904977582e-05, "loss": 0.2772798538208008, "step": 1012 }, { "epoch": 0.13694972539079003, "grad_norm": 4.8939127922058105, "learning_rate": 2.9611305397579518e-05, "loss": 0.28266334533691406, "step": 1013 }, { "epoch": 0.137084917617237, "grad_norm": 1.0168622732162476, "learning_rate": 2.9609755848029755e-05, "loss": 0.15162897109985352, "step": 1014 }, { "epoch": 0.137220109843684, "grad_norm": 4.415287494659424, "learning_rate": 2.9608203256650916e-05, "loss": 0.21660614013671875, "step": 1015 }, { "epoch": 0.13735530207013097, "grad_norm": 3.1307613849639893, "learning_rate": 2.9606647623766257e-05, "loss": 0.17457962036132812, "step": 1016 }, { "epoch": 0.13749049429657795, "grad_norm": 4.223004341125488, "learning_rate": 2.9605088949699672e-05, "loss": 0.2597503662109375, "step": 1017 }, { "epoch": 0.13762568652302493, "grad_norm": 1.541254997253418, "learning_rate": 2.9603527234775682e-05, "loss": 0.2314300537109375, "step": 1018 }, { "epoch": 0.1377608787494719, "grad_norm": 2.2885689735412598, "learning_rate": 2.960196247931945e-05, "loss": 0.18483352661132812, "step": 1019 }, { "epoch": 0.13789607097591888, "grad_norm": 3.845386505126953, "learning_rate": 2.960039468365676e-05, "loss": 0.24379920959472656, "step": 1020 }, { "epoch": 0.13803126320236586, "grad_norm": 1.9371105432510376, "learning_rate": 2.959882384811404e-05, "loss": 0.21224021911621094, "step": 1021 }, { "epoch": 0.13816645542881284, "grad_norm": 0.9895491600036621, "learning_rate": 2.9597249973018343e-05, "loss": 0.17930030822753906, "step": 1022 }, { "epoch": 0.13830164765525982, "grad_norm": 1.7604680061340332, "learning_rate": 2.959567305869736e-05, "loss": 0.19334888458251953, "step": 1023 }, { "epoch": 0.1384368398817068, "grad_norm": 4.62630033493042, "learning_rate": 2.9594093105479413e-05, "loss": 0.20189809799194336, "step": 1024 }, { "epoch": 0.13857203210815378, "grad_norm": 3.088228702545166, "learning_rate": 2.959251011369345e-05, "loss": 0.15636444091796875, "step": 1025 }, { "epoch": 0.13870722433460075, "grad_norm": 2.911190986633301, "learning_rate": 2.959092408366907e-05, "loss": 0.206268310546875, "step": 1026 }, { "epoch": 0.13884241656104773, "grad_norm": 1.934842824935913, "learning_rate": 2.958933501573649e-05, "loss": 0.27164459228515625, "step": 1027 }, { "epoch": 0.1389776087874947, "grad_norm": 4.05169153213501, "learning_rate": 2.9587742910226555e-05, "loss": 0.20975112915039062, "step": 1028 }, { "epoch": 0.1391128010139417, "grad_norm": 2.305387496948242, "learning_rate": 2.958614776747076e-05, "loss": 0.19959449768066406, "step": 1029 }, { "epoch": 0.13924799324038867, "grad_norm": 1.5476231575012207, "learning_rate": 2.9584549587801213e-05, "loss": 0.22294235229492188, "step": 1030 }, { "epoch": 0.13938318546683565, "grad_norm": 4.097291469573975, "learning_rate": 2.958294837155067e-05, "loss": 0.19585752487182617, "step": 1031 }, { "epoch": 0.13951837769328262, "grad_norm": 0.7149341106414795, "learning_rate": 2.9581344119052508e-05, "loss": 0.09413814544677734, "step": 1032 }, { "epoch": 0.1396535699197296, "grad_norm": 1.8023170232772827, "learning_rate": 2.957973683064074e-05, "loss": 0.2526130676269531, "step": 1033 }, { "epoch": 0.13978876214617658, "grad_norm": 3.4431264400482178, "learning_rate": 2.957812650665002e-05, "loss": 0.22739791870117188, "step": 1034 }, { "epoch": 0.1399239543726236, "grad_norm": 3.108116626739502, "learning_rate": 2.957651314741562e-05, "loss": 0.17715072631835938, "step": 1035 }, { "epoch": 0.14005914659907057, "grad_norm": 3.5500175952911377, "learning_rate": 2.9574896753273454e-05, "loss": 0.2532081604003906, "step": 1036 }, { "epoch": 0.14019433882551754, "grad_norm": 2.8899898529052734, "learning_rate": 2.9573277324560058e-05, "loss": 0.252410888671875, "step": 1037 }, { "epoch": 0.14032953105196452, "grad_norm": 2.784574270248413, "learning_rate": 2.9571654861612608e-05, "loss": 0.1392512321472168, "step": 1038 }, { "epoch": 0.1404647232784115, "grad_norm": 4.721029281616211, "learning_rate": 2.957002936476891e-05, "loss": 0.2327890396118164, "step": 1039 }, { "epoch": 0.14059991550485848, "grad_norm": 1.660416841506958, "learning_rate": 2.9568400834367406e-05, "loss": 0.18694639205932617, "step": 1040 }, { "epoch": 0.14073510773130546, "grad_norm": 1.355526089668274, "learning_rate": 2.9566769270747158e-05, "loss": 0.2514228820800781, "step": 1041 }, { "epoch": 0.14087029995775244, "grad_norm": 1.6010046005249023, "learning_rate": 2.9565134674247864e-05, "loss": 0.18515491485595703, "step": 1042 }, { "epoch": 0.14100549218419942, "grad_norm": 1.5745820999145508, "learning_rate": 2.9563497045209866e-05, "loss": 0.17581558227539062, "step": 1043 }, { "epoch": 0.1411406844106464, "grad_norm": 2.2860074043273926, "learning_rate": 2.9561856383974118e-05, "loss": 0.29541778564453125, "step": 1044 }, { "epoch": 0.14127587663709337, "grad_norm": 1.3537960052490234, "learning_rate": 2.9560212690882218e-05, "loss": 0.19983530044555664, "step": 1045 }, { "epoch": 0.14141106886354035, "grad_norm": 2.137998342514038, "learning_rate": 2.9558565966276395e-05, "loss": 0.19737625122070312, "step": 1046 }, { "epoch": 0.14154626108998733, "grad_norm": 3.0299360752105713, "learning_rate": 2.9556916210499497e-05, "loss": 0.21280860900878906, "step": 1047 }, { "epoch": 0.1416814533164343, "grad_norm": 5.062359809875488, "learning_rate": 2.9555263423895016e-05, "loss": 0.22482681274414062, "step": 1048 }, { "epoch": 0.1418166455428813, "grad_norm": 2.5297317504882812, "learning_rate": 2.955360760680708e-05, "loss": 0.19592857360839844, "step": 1049 }, { "epoch": 0.14195183776932827, "grad_norm": 1.363262414932251, "learning_rate": 2.9551948759580423e-05, "loss": 0.15517520904541016, "step": 1050 }, { "epoch": 0.14208702999577524, "grad_norm": 3.23099946975708, "learning_rate": 2.9550286882560435e-05, "loss": 0.1935439109802246, "step": 1051 }, { "epoch": 0.14222222222222222, "grad_norm": 1.5503112077713013, "learning_rate": 2.9548621976093126e-05, "loss": 0.2294635772705078, "step": 1052 }, { "epoch": 0.1423574144486692, "grad_norm": 2.5213406085968018, "learning_rate": 2.9546954040525144e-05, "loss": 0.20184803009033203, "step": 1053 }, { "epoch": 0.14249260667511618, "grad_norm": 2.4858009815216064, "learning_rate": 2.9545283076203753e-05, "loss": 0.222733736038208, "step": 1054 }, { "epoch": 0.14262779890156316, "grad_norm": 2.538125991821289, "learning_rate": 2.954360908347686e-05, "loss": 0.26206207275390625, "step": 1055 }, { "epoch": 0.14276299112801014, "grad_norm": 1.4709687232971191, "learning_rate": 2.9541932062693e-05, "loss": 0.1612682342529297, "step": 1056 }, { "epoch": 0.14289818335445711, "grad_norm": 1.5508737564086914, "learning_rate": 2.954025201420134e-05, "loss": 0.21424102783203125, "step": 1057 }, { "epoch": 0.1430333755809041, "grad_norm": 1.3205159902572632, "learning_rate": 2.9538568938351672e-05, "loss": 0.18906307220458984, "step": 1058 }, { "epoch": 0.14316856780735107, "grad_norm": 1.4068984985351562, "learning_rate": 2.953688283549442e-05, "loss": 0.18598365783691406, "step": 1059 }, { "epoch": 0.14330376003379805, "grad_norm": 1.3567590713500977, "learning_rate": 2.9535193705980642e-05, "loss": 0.15661144256591797, "step": 1060 }, { "epoch": 0.14343895226024503, "grad_norm": 4.0203728675842285, "learning_rate": 2.9533501550162028e-05, "loss": 0.20356178283691406, "step": 1061 }, { "epoch": 0.143574144486692, "grad_norm": 4.366781234741211, "learning_rate": 2.9531806368390882e-05, "loss": 0.20541906356811523, "step": 1062 }, { "epoch": 0.14370933671313899, "grad_norm": 1.332748532295227, "learning_rate": 2.953010816102016e-05, "loss": 0.14351272583007812, "step": 1063 }, { "epoch": 0.14384452893958596, "grad_norm": 3.0594022274017334, "learning_rate": 2.952840692840343e-05, "loss": 0.20209646224975586, "step": 1064 }, { "epoch": 0.14397972116603294, "grad_norm": 1.6943562030792236, "learning_rate": 2.9526702670894914e-05, "loss": 0.2362823486328125, "step": 1065 }, { "epoch": 0.14411491339247992, "grad_norm": 1.2198818922042847, "learning_rate": 2.952499538884943e-05, "loss": 0.18984317779541016, "step": 1066 }, { "epoch": 0.1442501056189269, "grad_norm": 4.816554069519043, "learning_rate": 2.9523285082622448e-05, "loss": 0.14357614517211914, "step": 1067 }, { "epoch": 0.14438529784537388, "grad_norm": 3.437350034713745, "learning_rate": 2.9521571752570064e-05, "loss": 0.15816020965576172, "step": 1068 }, { "epoch": 0.14452049007182088, "grad_norm": 5.826547145843506, "learning_rate": 2.9519855399049004e-05, "loss": 0.17350292205810547, "step": 1069 }, { "epoch": 0.14465568229826786, "grad_norm": 4.477969169616699, "learning_rate": 2.951813602241662e-05, "loss": 0.20851516723632812, "step": 1070 }, { "epoch": 0.14479087452471484, "grad_norm": 1.6647350788116455, "learning_rate": 2.9516413623030896e-05, "loss": 0.24280357360839844, "step": 1071 }, { "epoch": 0.14492606675116182, "grad_norm": 3.3024792671203613, "learning_rate": 2.951468820125045e-05, "loss": 0.22267723083496094, "step": 1072 }, { "epoch": 0.1450612589776088, "grad_norm": 2.139153242111206, "learning_rate": 2.9512959757434508e-05, "loss": 0.19460058212280273, "step": 1073 }, { "epoch": 0.14519645120405578, "grad_norm": 2.8732378482818604, "learning_rate": 2.951122829194296e-05, "loss": 0.19809532165527344, "step": 1074 }, { "epoch": 0.14533164343050275, "grad_norm": 2.5846972465515137, "learning_rate": 2.9509493805136296e-05, "loss": 0.17796707153320312, "step": 1075 }, { "epoch": 0.14546683565694973, "grad_norm": 1.7688478231430054, "learning_rate": 2.9507756297375648e-05, "loss": 0.22654151916503906, "step": 1076 }, { "epoch": 0.1456020278833967, "grad_norm": 3.1816835403442383, "learning_rate": 2.9506015769022778e-05, "loss": 0.2374286651611328, "step": 1077 }, { "epoch": 0.1457372201098437, "grad_norm": 4.030456066131592, "learning_rate": 2.950427222044006e-05, "loss": 0.2720985412597656, "step": 1078 }, { "epoch": 0.14587241233629067, "grad_norm": 1.2745141983032227, "learning_rate": 2.9502525651990525e-05, "loss": 0.2743339538574219, "step": 1079 }, { "epoch": 0.14600760456273765, "grad_norm": 0.9128302335739136, "learning_rate": 2.9500776064037813e-05, "loss": 0.19506359100341797, "step": 1080 }, { "epoch": 0.14614279678918463, "grad_norm": 1.0537209510803223, "learning_rate": 2.9499023456946194e-05, "loss": 0.15345001220703125, "step": 1081 }, { "epoch": 0.1462779890156316, "grad_norm": 1.4146454334259033, "learning_rate": 2.9497267831080575e-05, "loss": 0.2233428955078125, "step": 1082 }, { "epoch": 0.14641318124207858, "grad_norm": 3.1179754734039307, "learning_rate": 2.949550918680649e-05, "loss": 0.1699848175048828, "step": 1083 }, { "epoch": 0.14654837346852556, "grad_norm": 1.597707748413086, "learning_rate": 2.9493747524490086e-05, "loss": 0.2307872772216797, "step": 1084 }, { "epoch": 0.14668356569497254, "grad_norm": 0.9638230204582214, "learning_rate": 2.9491982844498156e-05, "loss": 0.149200439453125, "step": 1085 }, { "epoch": 0.14681875792141952, "grad_norm": 2.897291421890259, "learning_rate": 2.949021514719812e-05, "loss": 0.2945866584777832, "step": 1086 }, { "epoch": 0.1469539501478665, "grad_norm": 4.406247615814209, "learning_rate": 2.948844443295802e-05, "loss": 0.19987964630126953, "step": 1087 }, { "epoch": 0.14708914237431348, "grad_norm": 2.993009328842163, "learning_rate": 2.9486670702146526e-05, "loss": 0.2476806640625, "step": 1088 }, { "epoch": 0.14722433460076045, "grad_norm": 1.607150912284851, "learning_rate": 2.948489395513294e-05, "loss": 0.2028064727783203, "step": 1089 }, { "epoch": 0.14735952682720743, "grad_norm": 2.169950485229492, "learning_rate": 2.948311419228719e-05, "loss": 0.19666671752929688, "step": 1090 }, { "epoch": 0.1474947190536544, "grad_norm": 4.988809108734131, "learning_rate": 2.948133141397983e-05, "loss": 0.19045639038085938, "step": 1091 }, { "epoch": 0.1476299112801014, "grad_norm": 1.3648936748504639, "learning_rate": 2.9479545620582047e-05, "loss": 0.1711178421974182, "step": 1092 }, { "epoch": 0.14776510350654837, "grad_norm": 1.8938064575195312, "learning_rate": 2.9477756812465652e-05, "loss": 0.2031574249267578, "step": 1093 }, { "epoch": 0.14790029573299535, "grad_norm": 1.1864244937896729, "learning_rate": 2.9475964990003085e-05, "loss": 0.10277795791625977, "step": 1094 }, { "epoch": 0.14803548795944232, "grad_norm": 1.0426554679870605, "learning_rate": 2.9474170153567406e-05, "loss": 0.20920419692993164, "step": 1095 }, { "epoch": 0.1481706801858893, "grad_norm": 2.343167543411255, "learning_rate": 2.947237230353232e-05, "loss": 0.2818145751953125, "step": 1096 }, { "epoch": 0.14830587241233628, "grad_norm": 2.9093923568725586, "learning_rate": 2.9470571440272147e-05, "loss": 0.2296142578125, "step": 1097 }, { "epoch": 0.14844106463878326, "grad_norm": 1.608169674873352, "learning_rate": 2.946876756416183e-05, "loss": 0.21310806274414062, "step": 1098 }, { "epoch": 0.14857625686523024, "grad_norm": 1.6651605367660522, "learning_rate": 2.946696067557695e-05, "loss": 0.2605419158935547, "step": 1099 }, { "epoch": 0.14871144909167722, "grad_norm": 2.4382612705230713, "learning_rate": 2.9465150774893706e-05, "loss": 0.2169046401977539, "step": 1100 }, { "epoch": 0.1488466413181242, "grad_norm": 2.1044139862060547, "learning_rate": 2.9463337862488938e-05, "loss": 0.1279897689819336, "step": 1101 }, { "epoch": 0.14898183354457117, "grad_norm": 2.7398595809936523, "learning_rate": 2.9461521938740096e-05, "loss": 0.22760009765625, "step": 1102 }, { "epoch": 0.14911702577101818, "grad_norm": 2.6128108501434326, "learning_rate": 2.9459703004025273e-05, "loss": 0.2618274688720703, "step": 1103 }, { "epoch": 0.14925221799746516, "grad_norm": 0.9230862855911255, "learning_rate": 2.9457881058723174e-05, "loss": 0.21934127807617188, "step": 1104 }, { "epoch": 0.14938741022391214, "grad_norm": 2.928769826889038, "learning_rate": 2.9456056103213137e-05, "loss": 0.24720382690429688, "step": 1105 }, { "epoch": 0.14952260245035912, "grad_norm": 3.214954137802124, "learning_rate": 2.945422813787513e-05, "loss": 0.15367984771728516, "step": 1106 }, { "epoch": 0.1496577946768061, "grad_norm": 2.8005459308624268, "learning_rate": 2.9452397163089748e-05, "loss": 0.15401554107666016, "step": 1107 }, { "epoch": 0.14979298690325307, "grad_norm": 5.035986423492432, "learning_rate": 2.9450563179238207e-05, "loss": 0.22300243377685547, "step": 1108 }, { "epoch": 0.14992817912970005, "grad_norm": 1.8318477869033813, "learning_rate": 2.9448726186702354e-05, "loss": 0.19271504878997803, "step": 1109 }, { "epoch": 0.15006337135614703, "grad_norm": 0.7416521310806274, "learning_rate": 2.9446886185864652e-05, "loss": 0.12869834899902344, "step": 1110 }, { "epoch": 0.150198563582594, "grad_norm": 2.2862319946289062, "learning_rate": 2.944504317710821e-05, "loss": 0.19649887084960938, "step": 1111 }, { "epoch": 0.150333755809041, "grad_norm": 3.652017831802368, "learning_rate": 2.944319716081675e-05, "loss": 0.20375442504882812, "step": 1112 }, { "epoch": 0.15046894803548796, "grad_norm": 1.6085723638534546, "learning_rate": 2.944134813737462e-05, "loss": 0.22667503356933594, "step": 1113 }, { "epoch": 0.15060414026193494, "grad_norm": 2.0221939086914062, "learning_rate": 2.9439496107166796e-05, "loss": 0.21702194213867188, "step": 1114 }, { "epoch": 0.15073933248838192, "grad_norm": 1.3399120569229126, "learning_rate": 2.943764107057888e-05, "loss": 0.2021331787109375, "step": 1115 }, { "epoch": 0.1508745247148289, "grad_norm": 1.836799144744873, "learning_rate": 2.9435783027997106e-05, "loss": 0.23592090606689453, "step": 1116 }, { "epoch": 0.15100971694127588, "grad_norm": 1.072712779045105, "learning_rate": 2.9433921979808323e-05, "loss": 0.17313671112060547, "step": 1117 }, { "epoch": 0.15114490916772286, "grad_norm": 1.9546438455581665, "learning_rate": 2.9432057926400014e-05, "loss": 0.19092273712158203, "step": 1118 }, { "epoch": 0.15128010139416984, "grad_norm": 1.7589925527572632, "learning_rate": 2.943019086816028e-05, "loss": 0.20804977416992188, "step": 1119 }, { "epoch": 0.15141529362061681, "grad_norm": 2.976997137069702, "learning_rate": 2.9428320805477855e-05, "loss": 0.21226978302001953, "step": 1120 }, { "epoch": 0.1515504858470638, "grad_norm": 2.0414233207702637, "learning_rate": 2.9426447738742104e-05, "loss": 0.20100021362304688, "step": 1121 }, { "epoch": 0.15168567807351077, "grad_norm": 1.6670335531234741, "learning_rate": 2.9424571668343e-05, "loss": 0.2207794189453125, "step": 1122 }, { "epoch": 0.15182087029995775, "grad_norm": 0.9890133738517761, "learning_rate": 2.942269259467115e-05, "loss": 0.20777225494384766, "step": 1123 }, { "epoch": 0.15195606252640473, "grad_norm": 0.6977941393852234, "learning_rate": 2.9420810518117794e-05, "loss": 0.13891077041625977, "step": 1124 }, { "epoch": 0.1520912547528517, "grad_norm": 2.8817968368530273, "learning_rate": 2.9418925439074784e-05, "loss": 0.2390279769897461, "step": 1125 }, { "epoch": 0.15222644697929869, "grad_norm": 1.4219186305999756, "learning_rate": 2.9417037357934606e-05, "loss": 0.17908287048339844, "step": 1126 }, { "epoch": 0.15236163920574566, "grad_norm": 1.7705414295196533, "learning_rate": 2.9415146275090373e-05, "loss": 0.18467235565185547, "step": 1127 }, { "epoch": 0.15249683143219264, "grad_norm": 2.6281392574310303, "learning_rate": 2.9413252190935813e-05, "loss": 0.21423721313476562, "step": 1128 }, { "epoch": 0.15263202365863962, "grad_norm": 2.476922035217285, "learning_rate": 2.9411355105865286e-05, "loss": 0.19746297597885132, "step": 1129 }, { "epoch": 0.1527672158850866, "grad_norm": 1.6135934591293335, "learning_rate": 2.9409455020273775e-05, "loss": 0.26868247985839844, "step": 1130 }, { "epoch": 0.15290240811153358, "grad_norm": 1.3475333452224731, "learning_rate": 2.940755193455689e-05, "loss": 0.1934833526611328, "step": 1131 }, { "epoch": 0.15303760033798056, "grad_norm": 1.2489211559295654, "learning_rate": 2.940564584911086e-05, "loss": 0.1741466522216797, "step": 1132 }, { "epoch": 0.15317279256442753, "grad_norm": 4.067756652832031, "learning_rate": 2.9403736764332543e-05, "loss": 0.19297122955322266, "step": 1133 }, { "epoch": 0.1533079847908745, "grad_norm": 1.2733547687530518, "learning_rate": 2.9401824680619423e-05, "loss": 0.22699928283691406, "step": 1134 }, { "epoch": 0.1534431770173215, "grad_norm": 1.0942462682724, "learning_rate": 2.9399909598369604e-05, "loss": 0.19097328186035156, "step": 1135 }, { "epoch": 0.15357836924376847, "grad_norm": 1.5180552005767822, "learning_rate": 2.939799151798182e-05, "loss": 0.22004318237304688, "step": 1136 }, { "epoch": 0.15371356147021548, "grad_norm": 1.9421501159667969, "learning_rate": 2.9396070439855417e-05, "loss": 0.21540164947509766, "step": 1137 }, { "epoch": 0.15384875369666245, "grad_norm": 2.0043678283691406, "learning_rate": 2.9394146364390382e-05, "loss": 0.24493980407714844, "step": 1138 }, { "epoch": 0.15398394592310943, "grad_norm": 2.582948923110962, "learning_rate": 2.9392219291987315e-05, "loss": 0.20032691955566406, "step": 1139 }, { "epoch": 0.1541191381495564, "grad_norm": 1.8866467475891113, "learning_rate": 2.939028922304744e-05, "loss": 0.22159576416015625, "step": 1140 }, { "epoch": 0.1542543303760034, "grad_norm": 2.0384747982025146, "learning_rate": 2.9388356157972615e-05, "loss": 0.2097949981689453, "step": 1141 }, { "epoch": 0.15438952260245037, "grad_norm": 2.9942688941955566, "learning_rate": 2.938642009716531e-05, "loss": 0.2665824890136719, "step": 1142 }, { "epoch": 0.15452471482889735, "grad_norm": 1.0237970352172852, "learning_rate": 2.938448104102862e-05, "loss": 0.19638824462890625, "step": 1143 }, { "epoch": 0.15465990705534433, "grad_norm": 2.868194103240967, "learning_rate": 2.9382538989966267e-05, "loss": 0.23156356811523438, "step": 1144 }, { "epoch": 0.1547950992817913, "grad_norm": 0.6812503933906555, "learning_rate": 2.9380593944382605e-05, "loss": 0.13663482666015625, "step": 1145 }, { "epoch": 0.15493029150823828, "grad_norm": 2.0901856422424316, "learning_rate": 2.9378645904682596e-05, "loss": 0.14768600463867188, "step": 1146 }, { "epoch": 0.15506548373468526, "grad_norm": 1.353996992111206, "learning_rate": 2.937669487127183e-05, "loss": 0.19890642166137695, "step": 1147 }, { "epoch": 0.15520067596113224, "grad_norm": 2.0099098682403564, "learning_rate": 2.9374740844556532e-05, "loss": 0.2002553939819336, "step": 1148 }, { "epoch": 0.15533586818757922, "grad_norm": 1.2000133991241455, "learning_rate": 2.937278382494353e-05, "loss": 0.26312255859375, "step": 1149 }, { "epoch": 0.1554710604140262, "grad_norm": 2.436415433883667, "learning_rate": 2.9370823812840287e-05, "loss": 0.2123870849609375, "step": 1150 }, { "epoch": 0.15560625264047317, "grad_norm": 2.8203797340393066, "learning_rate": 2.93688608086549e-05, "loss": 0.22266101837158203, "step": 1151 }, { "epoch": 0.15574144486692015, "grad_norm": 1.9603325128555298, "learning_rate": 2.9366894812796064e-05, "loss": 0.21594619750976562, "step": 1152 }, { "epoch": 0.15587663709336713, "grad_norm": 1.303475022315979, "learning_rate": 2.9364925825673117e-05, "loss": 0.1968402862548828, "step": 1153 }, { "epoch": 0.1560118293198141, "grad_norm": 1.8851032257080078, "learning_rate": 2.9362953847696006e-05, "loss": 0.2257080078125, "step": 1154 }, { "epoch": 0.1561470215462611, "grad_norm": 2.605109214782715, "learning_rate": 2.9360978879275313e-05, "loss": 0.24187850952148438, "step": 1155 }, { "epoch": 0.15628221377270807, "grad_norm": 1.8437635898590088, "learning_rate": 2.9359000920822237e-05, "loss": 0.19596290588378906, "step": 1156 }, { "epoch": 0.15641740599915505, "grad_norm": 1.5065345764160156, "learning_rate": 2.9357019972748594e-05, "loss": 0.1710672378540039, "step": 1157 }, { "epoch": 0.15655259822560202, "grad_norm": 2.136629581451416, "learning_rate": 2.9355036035466836e-05, "loss": 0.19841909408569336, "step": 1158 }, { "epoch": 0.156687790452049, "grad_norm": 4.33582067489624, "learning_rate": 2.935304910939002e-05, "loss": 0.21745777130126953, "step": 1159 }, { "epoch": 0.15682298267849598, "grad_norm": 3.8167378902435303, "learning_rate": 2.935105919493184e-05, "loss": 0.21950364112854004, "step": 1160 }, { "epoch": 0.15695817490494296, "grad_norm": 1.8931597471237183, "learning_rate": 2.9349066292506613e-05, "loss": 0.22406768798828125, "step": 1161 }, { "epoch": 0.15709336713138994, "grad_norm": 2.715423822402954, "learning_rate": 2.934707040252926e-05, "loss": 0.186592698097229, "step": 1162 }, { "epoch": 0.15722855935783692, "grad_norm": 1.4493467807769775, "learning_rate": 2.9345071525415342e-05, "loss": 0.2242717742919922, "step": 1163 }, { "epoch": 0.1573637515842839, "grad_norm": 1.7794873714447021, "learning_rate": 2.9343069661581035e-05, "loss": 0.2085585594177246, "step": 1164 }, { "epoch": 0.15749894381073087, "grad_norm": 1.7526251077651978, "learning_rate": 2.9341064811443138e-05, "loss": 0.21421551704406738, "step": 1165 }, { "epoch": 0.15763413603717785, "grad_norm": 4.4980149269104, "learning_rate": 2.9339056975419078e-05, "loss": 0.2119426727294922, "step": 1166 }, { "epoch": 0.15776932826362483, "grad_norm": 2.7720754146575928, "learning_rate": 2.9337046153926882e-05, "loss": 0.18785572052001953, "step": 1167 }, { "epoch": 0.1579045204900718, "grad_norm": 2.674401044845581, "learning_rate": 2.9335032347385224e-05, "loss": 0.15754222869873047, "step": 1168 }, { "epoch": 0.1580397127165188, "grad_norm": 1.8676788806915283, "learning_rate": 2.933301555621339e-05, "loss": 0.2265453338623047, "step": 1169 }, { "epoch": 0.15817490494296577, "grad_norm": 5.242788314819336, "learning_rate": 2.933099578083128e-05, "loss": 0.23511505126953125, "step": 1170 }, { "epoch": 0.15831009716941277, "grad_norm": 4.543645858764648, "learning_rate": 2.932897302165943e-05, "loss": 0.2626004219055176, "step": 1171 }, { "epoch": 0.15844528939585975, "grad_norm": 3.0055973529815674, "learning_rate": 2.9326947279118983e-05, "loss": 0.22939300537109375, "step": 1172 }, { "epoch": 0.15858048162230673, "grad_norm": 4.152409076690674, "learning_rate": 2.9324918553631716e-05, "loss": 0.17406177520751953, "step": 1173 }, { "epoch": 0.1587156738487537, "grad_norm": 2.5324268341064453, "learning_rate": 2.9322886845620013e-05, "loss": 0.2228221893310547, "step": 1174 }, { "epoch": 0.1588508660752007, "grad_norm": 1.541008710861206, "learning_rate": 2.932085215550689e-05, "loss": 0.1670060157775879, "step": 1175 }, { "epoch": 0.15898605830164766, "grad_norm": 6.707080841064453, "learning_rate": 2.9318814483715982e-05, "loss": 0.2936382293701172, "step": 1176 }, { "epoch": 0.15912125052809464, "grad_norm": 2.299949884414673, "learning_rate": 2.9316773830671537e-05, "loss": 0.19097137451171875, "step": 1177 }, { "epoch": 0.15925644275454162, "grad_norm": 2.5141537189483643, "learning_rate": 2.9314730196798437e-05, "loss": 0.20153236389160156, "step": 1178 }, { "epoch": 0.1593916349809886, "grad_norm": 2.8581597805023193, "learning_rate": 2.9312683582522178e-05, "loss": 0.20731830596923828, "step": 1179 }, { "epoch": 0.15952682720743558, "grad_norm": 3.6299893856048584, "learning_rate": 2.9310633988268868e-05, "loss": 0.2257823944091797, "step": 1180 }, { "epoch": 0.15966201943388256, "grad_norm": 1.6399526596069336, "learning_rate": 2.9308581414465246e-05, "loss": 0.1636180877685547, "step": 1181 }, { "epoch": 0.15979721166032954, "grad_norm": 3.0158963203430176, "learning_rate": 2.9306525861538674e-05, "loss": 0.18083667755126953, "step": 1182 }, { "epoch": 0.15993240388677651, "grad_norm": 4.761128902435303, "learning_rate": 2.9304467329917127e-05, "loss": 0.2058734893798828, "step": 1183 }, { "epoch": 0.1600675961132235, "grad_norm": 3.8764705657958984, "learning_rate": 2.9302405820029198e-05, "loss": 0.2081432342529297, "step": 1184 }, { "epoch": 0.16020278833967047, "grad_norm": 2.9538233280181885, "learning_rate": 2.9300341332304114e-05, "loss": 0.2520465850830078, "step": 1185 }, { "epoch": 0.16033798056611745, "grad_norm": 1.5842045545578003, "learning_rate": 2.9298273867171697e-05, "loss": 0.21493911743164062, "step": 1186 }, { "epoch": 0.16047317279256443, "grad_norm": 1.9225190877914429, "learning_rate": 2.929620342506242e-05, "loss": 0.1946878433227539, "step": 1187 }, { "epoch": 0.1606083650190114, "grad_norm": 0.9797894954681396, "learning_rate": 2.929413000640735e-05, "loss": 0.19542312622070312, "step": 1188 }, { "epoch": 0.16074355724545838, "grad_norm": 1.8446816205978394, "learning_rate": 2.9292053611638187e-05, "loss": 0.1798248291015625, "step": 1189 }, { "epoch": 0.16087874947190536, "grad_norm": 2.0895726680755615, "learning_rate": 2.928997424118725e-05, "loss": 0.2467883825302124, "step": 1190 }, { "epoch": 0.16101394169835234, "grad_norm": 1.1365423202514648, "learning_rate": 2.928789189548747e-05, "loss": 0.24303627014160156, "step": 1191 }, { "epoch": 0.16114913392479932, "grad_norm": 2.5018985271453857, "learning_rate": 2.9285806574972405e-05, "loss": 0.20746994018554688, "step": 1192 }, { "epoch": 0.1612843261512463, "grad_norm": 1.2010382413864136, "learning_rate": 2.928371828007623e-05, "loss": 0.16903305053710938, "step": 1193 }, { "epoch": 0.16141951837769328, "grad_norm": 3.821223735809326, "learning_rate": 2.928162701123374e-05, "loss": 0.20270919799804688, "step": 1194 }, { "epoch": 0.16155471060414026, "grad_norm": 2.6594810485839844, "learning_rate": 2.9279532768880345e-05, "loss": 0.21689224243164062, "step": 1195 }, { "epoch": 0.16168990283058723, "grad_norm": 0.9651798605918884, "learning_rate": 2.9277435553452084e-05, "loss": 0.1259899139404297, "step": 1196 }, { "epoch": 0.1618250950570342, "grad_norm": 2.060258150100708, "learning_rate": 2.9275335365385602e-05, "loss": 0.21222925186157227, "step": 1197 }, { "epoch": 0.1619602872834812, "grad_norm": 2.437762975692749, "learning_rate": 2.927323220511817e-05, "loss": 0.21192359924316406, "step": 1198 }, { "epoch": 0.16209547950992817, "grad_norm": 2.5704808235168457, "learning_rate": 2.9271126073087684e-05, "loss": 0.2102813720703125, "step": 1199 }, { "epoch": 0.16223067173637515, "grad_norm": 2.143406629562378, "learning_rate": 2.926901696973264e-05, "loss": 0.1216421127319336, "step": 1200 }, { "epoch": 0.16236586396282213, "grad_norm": 1.2513593435287476, "learning_rate": 2.9266904895492177e-05, "loss": 0.20844626426696777, "step": 1201 }, { "epoch": 0.1625010561892691, "grad_norm": 5.105346202850342, "learning_rate": 2.926478985080603e-05, "loss": 0.2205181121826172, "step": 1202 }, { "epoch": 0.16263624841571608, "grad_norm": 0.9585946798324585, "learning_rate": 2.9262671836114568e-05, "loss": 0.16717815399169922, "step": 1203 }, { "epoch": 0.16277144064216306, "grad_norm": 1.400521993637085, "learning_rate": 2.9260550851858774e-05, "loss": 0.18681716918945312, "step": 1204 }, { "epoch": 0.16290663286861007, "grad_norm": 2.233264684677124, "learning_rate": 2.9258426898480243e-05, "loss": 0.26293182373046875, "step": 1205 }, { "epoch": 0.16304182509505705, "grad_norm": 1.3776154518127441, "learning_rate": 2.9256299976421198e-05, "loss": 0.16571426391601562, "step": 1206 }, { "epoch": 0.16317701732150403, "grad_norm": 2.0200002193450928, "learning_rate": 2.9254170086124474e-05, "loss": 0.2273101806640625, "step": 1207 }, { "epoch": 0.163312209547951, "grad_norm": 2.553645133972168, "learning_rate": 2.9252037228033526e-05, "loss": 0.2573814392089844, "step": 1208 }, { "epoch": 0.16344740177439798, "grad_norm": 1.7511731386184692, "learning_rate": 2.9249901402592424e-05, "loss": 0.19594955444335938, "step": 1209 }, { "epoch": 0.16358259400084496, "grad_norm": 1.692636251449585, "learning_rate": 2.9247762610245863e-05, "loss": 0.23878955841064453, "step": 1210 }, { "epoch": 0.16371778622729194, "grad_norm": 0.7301585078239441, "learning_rate": 2.9245620851439146e-05, "loss": 0.1350393295288086, "step": 1211 }, { "epoch": 0.16385297845373892, "grad_norm": 1.1896436214447021, "learning_rate": 2.92434761266182e-05, "loss": 0.19644403457641602, "step": 1212 }, { "epoch": 0.1639881706801859, "grad_norm": 0.7367956042289734, "learning_rate": 2.924132843622957e-05, "loss": 0.21621322631835938, "step": 1213 }, { "epoch": 0.16412336290663287, "grad_norm": 2.8648135662078857, "learning_rate": 2.9239177780720418e-05, "loss": 0.16452407836914062, "step": 1214 }, { "epoch": 0.16425855513307985, "grad_norm": 2.5793490409851074, "learning_rate": 2.923702416053852e-05, "loss": 0.22870445251464844, "step": 1215 }, { "epoch": 0.16439374735952683, "grad_norm": 1.4675109386444092, "learning_rate": 2.9234867576132268e-05, "loss": 0.15302658081054688, "step": 1216 }, { "epoch": 0.1645289395859738, "grad_norm": 2.1417102813720703, "learning_rate": 2.923270802795068e-05, "loss": 0.22233009338378906, "step": 1217 }, { "epoch": 0.1646641318124208, "grad_norm": 3.58647084236145, "learning_rate": 2.9230545516443378e-05, "loss": 0.18663978576660156, "step": 1218 }, { "epoch": 0.16479932403886777, "grad_norm": 3.886446714401245, "learning_rate": 2.9228380042060615e-05, "loss": 0.20212745666503906, "step": 1219 }, { "epoch": 0.16493451626531475, "grad_norm": 2.8805408477783203, "learning_rate": 2.9226211605253252e-05, "loss": 0.14271926879882812, "step": 1220 }, { "epoch": 0.16506970849176172, "grad_norm": 3.7256038188934326, "learning_rate": 2.922404020647277e-05, "loss": 0.18785858154296875, "step": 1221 }, { "epoch": 0.1652049007182087, "grad_norm": 2.530155897140503, "learning_rate": 2.9221865846171264e-05, "loss": 0.21385407447814941, "step": 1222 }, { "epoch": 0.16534009294465568, "grad_norm": 2.4122066497802734, "learning_rate": 2.9219688524801446e-05, "loss": 0.15297985076904297, "step": 1223 }, { "epoch": 0.16547528517110266, "grad_norm": 1.780105471611023, "learning_rate": 2.9217508242816653e-05, "loss": 0.18054676055908203, "step": 1224 }, { "epoch": 0.16561047739754964, "grad_norm": 1.5474873781204224, "learning_rate": 2.921532500067083e-05, "loss": 0.1624441146850586, "step": 1225 }, { "epoch": 0.16574566962399662, "grad_norm": 2.8681282997131348, "learning_rate": 2.9213138798818528e-05, "loss": 0.17368105053901672, "step": 1226 }, { "epoch": 0.1658808618504436, "grad_norm": 3.33196759223938, "learning_rate": 2.921094963771494e-05, "loss": 0.21352672576904297, "step": 1227 }, { "epoch": 0.16601605407689057, "grad_norm": 2.4219534397125244, "learning_rate": 2.9208757517815855e-05, "loss": 0.190521240234375, "step": 1228 }, { "epoch": 0.16615124630333755, "grad_norm": 0.9082304835319519, "learning_rate": 2.9206562439577684e-05, "loss": 0.15903091430664062, "step": 1229 }, { "epoch": 0.16628643852978453, "grad_norm": 1.1619412899017334, "learning_rate": 2.9204364403457452e-05, "loss": 0.1984710693359375, "step": 1230 }, { "epoch": 0.1664216307562315, "grad_norm": 0.7407615184783936, "learning_rate": 2.9202163409912808e-05, "loss": 0.21421432495117188, "step": 1231 }, { "epoch": 0.1665568229826785, "grad_norm": 2.1070401668548584, "learning_rate": 2.9199959459402003e-05, "loss": 0.2463531494140625, "step": 1232 }, { "epoch": 0.16669201520912547, "grad_norm": 3.299759864807129, "learning_rate": 2.919775255238392e-05, "loss": 0.1620769500732422, "step": 1233 }, { "epoch": 0.16682720743557244, "grad_norm": 1.0545693635940552, "learning_rate": 2.919554268931804e-05, "loss": 0.21468615531921387, "step": 1234 }, { "epoch": 0.16696239966201942, "grad_norm": 0.9881651401519775, "learning_rate": 2.9193329870664475e-05, "loss": 0.18878698348999023, "step": 1235 }, { "epoch": 0.1670975918884664, "grad_norm": 3.00102162361145, "learning_rate": 2.9191114096883938e-05, "loss": 0.21954917907714844, "step": 1236 }, { "epoch": 0.16723278411491338, "grad_norm": 3.4775328636169434, "learning_rate": 2.9188895368437774e-05, "loss": 0.2082061767578125, "step": 1237 }, { "epoch": 0.16736797634136036, "grad_norm": 2.9185492992401123, "learning_rate": 2.9186673685787926e-05, "loss": 0.1755695343017578, "step": 1238 }, { "epoch": 0.16750316856780736, "grad_norm": 1.8831610679626465, "learning_rate": 2.918444904939697e-05, "loss": 0.19309234619140625, "step": 1239 }, { "epoch": 0.16763836079425434, "grad_norm": 2.365365982055664, "learning_rate": 2.9182221459728078e-05, "loss": 0.17614078521728516, "step": 1240 }, { "epoch": 0.16777355302070132, "grad_norm": 1.92036771774292, "learning_rate": 2.917999091724505e-05, "loss": 0.21112632751464844, "step": 1241 }, { "epoch": 0.1679087452471483, "grad_norm": 3.2891643047332764, "learning_rate": 2.9177757422412294e-05, "loss": 0.22065162658691406, "step": 1242 }, { "epoch": 0.16804393747359528, "grad_norm": 4.371700763702393, "learning_rate": 2.917552097569484e-05, "loss": 0.24998044967651367, "step": 1243 }, { "epoch": 0.16817912970004226, "grad_norm": 3.5760509967803955, "learning_rate": 2.917328157755832e-05, "loss": 0.20297622680664062, "step": 1244 }, { "epoch": 0.16831432192648924, "grad_norm": 4.524680137634277, "learning_rate": 2.9171039228469003e-05, "loss": 0.17680931091308594, "step": 1245 }, { "epoch": 0.16844951415293621, "grad_norm": 1.991071343421936, "learning_rate": 2.9168793928893747e-05, "loss": 0.2197580337524414, "step": 1246 }, { "epoch": 0.1685847063793832, "grad_norm": 1.2785810232162476, "learning_rate": 2.9166545679300036e-05, "loss": 0.18245315551757812, "step": 1247 }, { "epoch": 0.16871989860583017, "grad_norm": 3.6238996982574463, "learning_rate": 2.9164294480155966e-05, "loss": 0.21080970764160156, "step": 1248 }, { "epoch": 0.16885509083227715, "grad_norm": 2.1705658435821533, "learning_rate": 2.9162040331930256e-05, "loss": 0.21681976318359375, "step": 1249 }, { "epoch": 0.16899028305872413, "grad_norm": 1.8535723686218262, "learning_rate": 2.915978323509223e-05, "loss": 0.22697019577026367, "step": 1250 }, { "epoch": 0.1691254752851711, "grad_norm": 1.32503080368042, "learning_rate": 2.915752319011182e-05, "loss": 0.22873973846435547, "step": 1251 }, { "epoch": 0.16926066751161808, "grad_norm": 0.9266617298126221, "learning_rate": 2.9155260197459588e-05, "loss": 0.13932466506958008, "step": 1252 }, { "epoch": 0.16939585973806506, "grad_norm": 1.1868035793304443, "learning_rate": 2.91529942576067e-05, "loss": 0.15386676788330078, "step": 1253 }, { "epoch": 0.16953105196451204, "grad_norm": 1.9122745990753174, "learning_rate": 2.915072537102493e-05, "loss": 0.18025827407836914, "step": 1254 }, { "epoch": 0.16966624419095902, "grad_norm": 1.0295727252960205, "learning_rate": 2.914845353818668e-05, "loss": 0.21793556213378906, "step": 1255 }, { "epoch": 0.169801436417406, "grad_norm": 1.1580969095230103, "learning_rate": 2.9146178759564953e-05, "loss": 0.20047378540039062, "step": 1256 }, { "epoch": 0.16993662864385298, "grad_norm": 2.033372402191162, "learning_rate": 2.914390103563337e-05, "loss": 0.22981834411621094, "step": 1257 }, { "epoch": 0.17007182087029996, "grad_norm": 1.376724123954773, "learning_rate": 2.914162036686617e-05, "loss": 0.2403717041015625, "step": 1258 }, { "epoch": 0.17020701309674693, "grad_norm": 1.2238609790802002, "learning_rate": 2.9139336753738196e-05, "loss": 0.1927042007446289, "step": 1259 }, { "epoch": 0.1703422053231939, "grad_norm": 3.4364662170410156, "learning_rate": 2.913705019672491e-05, "loss": 0.22051620483398438, "step": 1260 }, { "epoch": 0.1704773975496409, "grad_norm": 1.9599090814590454, "learning_rate": 2.9134760696302386e-05, "loss": 0.17123985290527344, "step": 1261 }, { "epoch": 0.17061258977608787, "grad_norm": 1.544330358505249, "learning_rate": 2.9132468252947306e-05, "loss": 0.23200416564941406, "step": 1262 }, { "epoch": 0.17074778200253485, "grad_norm": 1.335659384727478, "learning_rate": 2.9130172867136974e-05, "loss": 0.1358184814453125, "step": 1263 }, { "epoch": 0.17088297422898183, "grad_norm": 3.2440483570098877, "learning_rate": 2.91278745393493e-05, "loss": 0.2698020935058594, "step": 1264 }, { "epoch": 0.1710181664554288, "grad_norm": 1.3370169401168823, "learning_rate": 2.9125573270062812e-05, "loss": 0.14377784729003906, "step": 1265 }, { "epoch": 0.17115335868187578, "grad_norm": 2.1438615322113037, "learning_rate": 2.9123269059756634e-05, "loss": 0.13239169120788574, "step": 1266 }, { "epoch": 0.17128855090832276, "grad_norm": 1.484450340270996, "learning_rate": 2.9120961908910528e-05, "loss": 0.24248695373535156, "step": 1267 }, { "epoch": 0.17142374313476974, "grad_norm": 3.4147424697875977, "learning_rate": 2.911865181800485e-05, "loss": 0.15267038345336914, "step": 1268 }, { "epoch": 0.17155893536121672, "grad_norm": 2.7103404998779297, "learning_rate": 2.9116338787520577e-05, "loss": 0.19556808471679688, "step": 1269 }, { "epoch": 0.1716941275876637, "grad_norm": 2.1455602645874023, "learning_rate": 2.9114022817939283e-05, "loss": 0.20649147033691406, "step": 1270 }, { "epoch": 0.17182931981411068, "grad_norm": 2.1904516220092773, "learning_rate": 2.911170390974318e-05, "loss": 0.1810760498046875, "step": 1271 }, { "epoch": 0.17196451204055765, "grad_norm": 1.3651081323623657, "learning_rate": 2.9109382063415067e-05, "loss": 0.20650005340576172, "step": 1272 }, { "epoch": 0.17209970426700466, "grad_norm": 1.9265291690826416, "learning_rate": 2.9107057279438372e-05, "loss": 0.1748943328857422, "step": 1273 }, { "epoch": 0.17223489649345164, "grad_norm": 1.6655797958374023, "learning_rate": 2.910472955829712e-05, "loss": 0.20395660400390625, "step": 1274 }, { "epoch": 0.17237008871989862, "grad_norm": 1.812628984451294, "learning_rate": 2.9102398900475958e-05, "loss": 0.1981515884399414, "step": 1275 }, { "epoch": 0.1725052809463456, "grad_norm": 1.1675834655761719, "learning_rate": 2.910006530646014e-05, "loss": 0.1596975326538086, "step": 1276 }, { "epoch": 0.17264047317279257, "grad_norm": 0.992638349533081, "learning_rate": 2.909772877673554e-05, "loss": 0.18923377990722656, "step": 1277 }, { "epoch": 0.17277566539923955, "grad_norm": 1.1776173114776611, "learning_rate": 2.9095389311788626e-05, "loss": 0.20680618286132812, "step": 1278 }, { "epoch": 0.17291085762568653, "grad_norm": 3.43151593208313, "learning_rate": 2.9093046912106494e-05, "loss": 0.2683224678039551, "step": 1279 }, { "epoch": 0.1730460498521335, "grad_norm": 2.155771017074585, "learning_rate": 2.909070157817684e-05, "loss": 0.21654129028320312, "step": 1280 }, { "epoch": 0.1731812420785805, "grad_norm": 1.0390444993972778, "learning_rate": 2.9088353310487976e-05, "loss": 0.17767047882080078, "step": 1281 }, { "epoch": 0.17331643430502747, "grad_norm": 2.0621514320373535, "learning_rate": 2.9086002109528825e-05, "loss": 0.2587318420410156, "step": 1282 }, { "epoch": 0.17345162653147445, "grad_norm": 1.309306025505066, "learning_rate": 2.908364797578892e-05, "loss": 0.14774131774902344, "step": 1283 }, { "epoch": 0.17358681875792142, "grad_norm": 3.02162766456604, "learning_rate": 2.9081290909758405e-05, "loss": 0.19872283935546875, "step": 1284 }, { "epoch": 0.1737220109843684, "grad_norm": 2.652358293533325, "learning_rate": 2.9078930911928033e-05, "loss": 0.2086009979248047, "step": 1285 }, { "epoch": 0.17385720321081538, "grad_norm": 1.419096827507019, "learning_rate": 2.907656798278916e-05, "loss": 0.21865081787109375, "step": 1286 }, { "epoch": 0.17399239543726236, "grad_norm": 1.9373184442520142, "learning_rate": 2.9074202122833773e-05, "loss": 0.1815357208251953, "step": 1287 }, { "epoch": 0.17412758766370934, "grad_norm": 2.5388596057891846, "learning_rate": 2.907183333255445e-05, "loss": 0.24713134765625, "step": 1288 }, { "epoch": 0.17426277989015632, "grad_norm": 3.0996196269989014, "learning_rate": 2.9069461612444384e-05, "loss": 0.23250579833984375, "step": 1289 }, { "epoch": 0.1743979721166033, "grad_norm": 2.24444317817688, "learning_rate": 2.9067086962997385e-05, "loss": 0.22878265380859375, "step": 1290 }, { "epoch": 0.17453316434305027, "grad_norm": 3.0670111179351807, "learning_rate": 2.9064709384707868e-05, "loss": 0.1834259033203125, "step": 1291 }, { "epoch": 0.17466835656949725, "grad_norm": 5.112541675567627, "learning_rate": 2.9062328878070855e-05, "loss": 0.2781410217285156, "step": 1292 }, { "epoch": 0.17480354879594423, "grad_norm": 2.009633779525757, "learning_rate": 2.905994544358198e-05, "loss": 0.16334152221679688, "step": 1293 }, { "epoch": 0.1749387410223912, "grad_norm": 3.704183340072632, "learning_rate": 2.9057559081737482e-05, "loss": 0.19887161254882812, "step": 1294 }, { "epoch": 0.1750739332488382, "grad_norm": 1.3218884468078613, "learning_rate": 2.9055169793034225e-05, "loss": 0.256317138671875, "step": 1295 }, { "epoch": 0.17520912547528517, "grad_norm": 2.1976206302642822, "learning_rate": 2.9052777577969656e-05, "loss": 0.20646047592163086, "step": 1296 }, { "epoch": 0.17534431770173214, "grad_norm": 3.787241220474243, "learning_rate": 2.9050382437041868e-05, "loss": 0.16766834259033203, "step": 1297 }, { "epoch": 0.17547950992817912, "grad_norm": 4.669532775878906, "learning_rate": 2.9047984370749526e-05, "loss": 0.27324581146240234, "step": 1298 }, { "epoch": 0.1756147021546261, "grad_norm": 1.2683221101760864, "learning_rate": 2.9045583379591925e-05, "loss": 0.17315006256103516, "step": 1299 }, { "epoch": 0.17574989438107308, "grad_norm": 3.358713388442993, "learning_rate": 2.9043179464068965e-05, "loss": 0.23006439208984375, "step": 1300 }, { "epoch": 0.17588508660752006, "grad_norm": 1.9076160192489624, "learning_rate": 2.9040772624681152e-05, "loss": 0.18961048126220703, "step": 1301 }, { "epoch": 0.17602027883396704, "grad_norm": 4.759708404541016, "learning_rate": 2.9038362861929603e-05, "loss": 0.20519065856933594, "step": 1302 }, { "epoch": 0.17615547106041402, "grad_norm": 1.4643889665603638, "learning_rate": 2.903595017631605e-05, "loss": 0.2100391387939453, "step": 1303 }, { "epoch": 0.176290663286861, "grad_norm": 1.7860510349273682, "learning_rate": 2.903353456834282e-05, "loss": 0.18061447143554688, "step": 1304 }, { "epoch": 0.17642585551330797, "grad_norm": 3.441455364227295, "learning_rate": 2.903111603851285e-05, "loss": 0.19680213928222656, "step": 1305 }, { "epoch": 0.17656104773975495, "grad_norm": 3.934690475463867, "learning_rate": 2.9028694587329704e-05, "loss": 0.18488597869873047, "step": 1306 }, { "epoch": 0.17669623996620196, "grad_norm": 2.9123475551605225, "learning_rate": 2.902627021529753e-05, "loss": 0.17446136474609375, "step": 1307 }, { "epoch": 0.17683143219264894, "grad_norm": 1.947124719619751, "learning_rate": 2.9023842922921105e-05, "loss": 0.16583776473999023, "step": 1308 }, { "epoch": 0.1769666244190959, "grad_norm": 3.5330393314361572, "learning_rate": 2.90214127107058e-05, "loss": 0.24016952514648438, "step": 1309 }, { "epoch": 0.1771018166455429, "grad_norm": 2.1990811824798584, "learning_rate": 2.9018979579157592e-05, "loss": 0.2100963592529297, "step": 1310 }, { "epoch": 0.17723700887198987, "grad_norm": 3.6456589698791504, "learning_rate": 2.901654352878308e-05, "loss": 0.21712112426757812, "step": 1311 }, { "epoch": 0.17737220109843685, "grad_norm": 1.4664796590805054, "learning_rate": 2.9014104560089462e-05, "loss": 0.14890432357788086, "step": 1312 }, { "epoch": 0.17750739332488383, "grad_norm": 0.9743348360061646, "learning_rate": 2.9011662673584538e-05, "loss": 0.1746203899383545, "step": 1313 }, { "epoch": 0.1776425855513308, "grad_norm": 3.7662954330444336, "learning_rate": 2.900921786977673e-05, "loss": 0.20755767822265625, "step": 1314 }, { "epoch": 0.17777777777777778, "grad_norm": 3.263828754425049, "learning_rate": 2.900677014917505e-05, "loss": 0.21925640106201172, "step": 1315 }, { "epoch": 0.17791297000422476, "grad_norm": 2.312995433807373, "learning_rate": 2.9004319512289136e-05, "loss": 0.22922229766845703, "step": 1316 }, { "epoch": 0.17804816223067174, "grad_norm": 1.111663818359375, "learning_rate": 2.9001865959629222e-05, "loss": 0.17883014678955078, "step": 1317 }, { "epoch": 0.17818335445711872, "grad_norm": 1.9362506866455078, "learning_rate": 2.8999409491706143e-05, "loss": 0.183624267578125, "step": 1318 }, { "epoch": 0.1783185466835657, "grad_norm": 2.6189606189727783, "learning_rate": 2.8996950109031355e-05, "loss": 0.18933868408203125, "step": 1319 }, { "epoch": 0.17845373891001268, "grad_norm": 1.5163934230804443, "learning_rate": 2.8994487812116917e-05, "loss": 0.21705055236816406, "step": 1320 }, { "epoch": 0.17858893113645966, "grad_norm": 0.9534285664558411, "learning_rate": 2.8992022601475483e-05, "loss": 0.203521728515625, "step": 1321 }, { "epoch": 0.17872412336290663, "grad_norm": 1.0295037031173706, "learning_rate": 2.8989554477620332e-05, "loss": 0.14105701446533203, "step": 1322 }, { "epoch": 0.1788593155893536, "grad_norm": 2.6572842597961426, "learning_rate": 2.8987083441065335e-05, "loss": 0.19663238525390625, "step": 1323 }, { "epoch": 0.1789945078158006, "grad_norm": 1.6833895444869995, "learning_rate": 2.8984609492324983e-05, "loss": 0.1596360206604004, "step": 1324 }, { "epoch": 0.17912970004224757, "grad_norm": 5.08781623840332, "learning_rate": 2.8982132631914357e-05, "loss": 0.30466651916503906, "step": 1325 }, { "epoch": 0.17926489226869455, "grad_norm": 2.629206657409668, "learning_rate": 2.8979652860349154e-05, "loss": 0.2878236770629883, "step": 1326 }, { "epoch": 0.17940008449514153, "grad_norm": 1.5497839450836182, "learning_rate": 2.8977170178145675e-05, "loss": 0.18839454650878906, "step": 1327 }, { "epoch": 0.1795352767215885, "grad_norm": 1.8535789251327515, "learning_rate": 2.8974684585820833e-05, "loss": 0.1353282928466797, "step": 1328 }, { "epoch": 0.17967046894803548, "grad_norm": 1.1143041849136353, "learning_rate": 2.8972196083892138e-05, "loss": 0.1876659393310547, "step": 1329 }, { "epoch": 0.17980566117448246, "grad_norm": 1.020523190498352, "learning_rate": 2.8969704672877707e-05, "loss": 0.1754608154296875, "step": 1330 }, { "epoch": 0.17994085340092944, "grad_norm": 3.169733762741089, "learning_rate": 2.896721035329627e-05, "loss": 0.20612144470214844, "step": 1331 }, { "epoch": 0.18007604562737642, "grad_norm": 1.071004033088684, "learning_rate": 2.8964713125667153e-05, "loss": 0.1641855239868164, "step": 1332 }, { "epoch": 0.1802112378538234, "grad_norm": 2.315422773361206, "learning_rate": 2.8962212990510294e-05, "loss": 0.19281768798828125, "step": 1333 }, { "epoch": 0.18034643008027038, "grad_norm": 1.7461645603179932, "learning_rate": 2.8959709948346237e-05, "loss": 0.19316387176513672, "step": 1334 }, { "epoch": 0.18048162230671735, "grad_norm": 3.4042670726776123, "learning_rate": 2.8957203999696124e-05, "loss": 0.1898670196533203, "step": 1335 }, { "epoch": 0.18061681453316433, "grad_norm": 0.979421854019165, "learning_rate": 2.8954695145081713e-05, "loss": 0.1663360595703125, "step": 1336 }, { "epoch": 0.1807520067596113, "grad_norm": 1.626186490058899, "learning_rate": 2.8952183385025356e-05, "loss": 0.23313522338867188, "step": 1337 }, { "epoch": 0.1808871989860583, "grad_norm": 5.553304195404053, "learning_rate": 2.8949668720050014e-05, "loss": 0.24918651580810547, "step": 1338 }, { "epoch": 0.18102239121250527, "grad_norm": 2.7268736362457275, "learning_rate": 2.8947151150679256e-05, "loss": 0.12266921997070312, "step": 1339 }, { "epoch": 0.18115758343895225, "grad_norm": 4.369574069976807, "learning_rate": 2.8944630677437255e-05, "loss": 0.19501686096191406, "step": 1340 }, { "epoch": 0.18129277566539925, "grad_norm": 1.12800133228302, "learning_rate": 2.8942107300848784e-05, "loss": 0.12608051300048828, "step": 1341 }, { "epoch": 0.18142796789184623, "grad_norm": 0.9669626355171204, "learning_rate": 2.8939581021439225e-05, "loss": 0.2141742706298828, "step": 1342 }, { "epoch": 0.1815631601182932, "grad_norm": 1.275140643119812, "learning_rate": 2.8937051839734563e-05, "loss": 0.2077617645263672, "step": 1343 }, { "epoch": 0.1816983523447402, "grad_norm": 1.469282865524292, "learning_rate": 2.8934519756261384e-05, "loss": 0.216949462890625, "step": 1344 }, { "epoch": 0.18183354457118717, "grad_norm": 1.6897363662719727, "learning_rate": 2.8931984771546885e-05, "loss": 0.11701011657714844, "step": 1345 }, { "epoch": 0.18196873679763415, "grad_norm": 3.3048653602600098, "learning_rate": 2.8929446886118866e-05, "loss": 0.20543861389160156, "step": 1346 }, { "epoch": 0.18210392902408112, "grad_norm": 2.647064447402954, "learning_rate": 2.892690610050572e-05, "loss": 0.19722390174865723, "step": 1347 }, { "epoch": 0.1822391212505281, "grad_norm": 1.1585471630096436, "learning_rate": 2.892436241523646e-05, "loss": 0.17193031311035156, "step": 1348 }, { "epoch": 0.18237431347697508, "grad_norm": 2.3201839923858643, "learning_rate": 2.8921815830840685e-05, "loss": 0.18024063110351562, "step": 1349 }, { "epoch": 0.18250950570342206, "grad_norm": 1.4231117963790894, "learning_rate": 2.891926634784862e-05, "loss": 0.20241689682006836, "step": 1350 }, { "epoch": 0.18264469792986904, "grad_norm": 0.9113848209381104, "learning_rate": 2.8916713966791076e-05, "loss": 0.19225502014160156, "step": 1351 }, { "epoch": 0.18277989015631602, "grad_norm": 1.8199065923690796, "learning_rate": 2.8914158688199464e-05, "loss": 0.1899886131286621, "step": 1352 }, { "epoch": 0.182915082382763, "grad_norm": 1.0881524085998535, "learning_rate": 2.891160051260582e-05, "loss": 0.17791080474853516, "step": 1353 }, { "epoch": 0.18305027460920997, "grad_norm": 5.457920551300049, "learning_rate": 2.8909039440542758e-05, "loss": 0.26273536682128906, "step": 1354 }, { "epoch": 0.18318546683565695, "grad_norm": 4.6656174659729, "learning_rate": 2.890647547254352e-05, "loss": 0.19739341735839844, "step": 1355 }, { "epoch": 0.18332065906210393, "grad_norm": 3.544001579284668, "learning_rate": 2.8903908609141923e-05, "loss": 0.23843002319335938, "step": 1356 }, { "epoch": 0.1834558512885509, "grad_norm": 3.8569486141204834, "learning_rate": 2.8901338850872413e-05, "loss": 0.24228811264038086, "step": 1357 }, { "epoch": 0.1835910435149979, "grad_norm": 1.0984386205673218, "learning_rate": 2.8898766198270022e-05, "loss": 0.21202754974365234, "step": 1358 }, { "epoch": 0.18372623574144487, "grad_norm": 3.0971057415008545, "learning_rate": 2.8896190651870392e-05, "loss": 0.21640777587890625, "step": 1359 }, { "epoch": 0.18386142796789184, "grad_norm": 1.6464974880218506, "learning_rate": 2.8893612212209763e-05, "loss": 0.23998451232910156, "step": 1360 }, { "epoch": 0.18399662019433882, "grad_norm": 1.1478816270828247, "learning_rate": 2.8891030879824985e-05, "loss": 0.11033439636230469, "step": 1361 }, { "epoch": 0.1841318124207858, "grad_norm": 1.7123949527740479, "learning_rate": 2.88884466552535e-05, "loss": 0.1629199981689453, "step": 1362 }, { "epoch": 0.18426700464723278, "grad_norm": 3.9064555168151855, "learning_rate": 2.888585953903336e-05, "loss": 0.271240234375, "step": 1363 }, { "epoch": 0.18440219687367976, "grad_norm": 2.7453773021698, "learning_rate": 2.888326953170321e-05, "loss": 0.20867347717285156, "step": 1364 }, { "epoch": 0.18453738910012674, "grad_norm": 2.2827341556549072, "learning_rate": 2.8880676633802314e-05, "loss": 0.19246673583984375, "step": 1365 }, { "epoch": 0.18467258132657371, "grad_norm": 3.1657333374023438, "learning_rate": 2.8878080845870522e-05, "loss": 0.14481115341186523, "step": 1366 }, { "epoch": 0.1848077735530207, "grad_norm": 3.421050786972046, "learning_rate": 2.887548216844829e-05, "loss": 0.2201251983642578, "step": 1367 }, { "epoch": 0.18494296577946767, "grad_norm": 2.5648486614227295, "learning_rate": 2.8872880602076675e-05, "loss": 0.20283889770507812, "step": 1368 }, { "epoch": 0.18507815800591465, "grad_norm": 4.422479629516602, "learning_rate": 2.8870276147297344e-05, "loss": 0.2301197052001953, "step": 1369 }, { "epoch": 0.18521335023236163, "grad_norm": 1.447017788887024, "learning_rate": 2.8867668804652552e-05, "loss": 0.18880653381347656, "step": 1370 }, { "epoch": 0.1853485424588086, "grad_norm": 2.7759850025177, "learning_rate": 2.886505857468516e-05, "loss": 0.16425418853759766, "step": 1371 }, { "epoch": 0.18548373468525559, "grad_norm": 3.4335579872131348, "learning_rate": 2.8862445457938642e-05, "loss": 0.22813034057617188, "step": 1372 }, { "epoch": 0.18561892691170256, "grad_norm": 3.0456130504608154, "learning_rate": 2.8859829454957053e-05, "loss": 0.25408935546875, "step": 1373 }, { "epoch": 0.18575411913814954, "grad_norm": 4.762476444244385, "learning_rate": 2.8857210566285062e-05, "loss": 0.22141408920288086, "step": 1374 }, { "epoch": 0.18588931136459655, "grad_norm": 4.590306758880615, "learning_rate": 2.8854588792467932e-05, "loss": 0.2428569793701172, "step": 1375 }, { "epoch": 0.18602450359104353, "grad_norm": 2.2845871448516846, "learning_rate": 2.8851964134051535e-05, "loss": 0.19602394104003906, "step": 1376 }, { "epoch": 0.1861596958174905, "grad_norm": 3.2314529418945312, "learning_rate": 2.884933659158234e-05, "loss": 0.2736015319824219, "step": 1377 }, { "epoch": 0.18629488804393748, "grad_norm": 2.533949136734009, "learning_rate": 2.8846706165607415e-05, "loss": 0.1538395881652832, "step": 1378 }, { "epoch": 0.18643008027038446, "grad_norm": 5.120025634765625, "learning_rate": 2.8844072856674422e-05, "loss": 0.22174644470214844, "step": 1379 }, { "epoch": 0.18656527249683144, "grad_norm": 2.427504062652588, "learning_rate": 2.8841436665331634e-05, "loss": 0.22246456146240234, "step": 1380 }, { "epoch": 0.18670046472327842, "grad_norm": 1.838721752166748, "learning_rate": 2.8838797592127927e-05, "loss": 0.2053365707397461, "step": 1381 }, { "epoch": 0.1868356569497254, "grad_norm": 0.906598687171936, "learning_rate": 2.883615563761276e-05, "loss": 0.1869983673095703, "step": 1382 }, { "epoch": 0.18697084917617238, "grad_norm": 4.842560291290283, "learning_rate": 2.8833510802336206e-05, "loss": 0.30701446533203125, "step": 1383 }, { "epoch": 0.18710604140261936, "grad_norm": 2.9008145332336426, "learning_rate": 2.883086308684893e-05, "loss": 0.17647171020507812, "step": 1384 }, { "epoch": 0.18724123362906633, "grad_norm": 5.069915771484375, "learning_rate": 2.882821249170221e-05, "loss": 0.23810958862304688, "step": 1385 }, { "epoch": 0.1873764258555133, "grad_norm": 3.567537546157837, "learning_rate": 2.8825559017447905e-05, "loss": 0.2289581298828125, "step": 1386 }, { "epoch": 0.1875116180819603, "grad_norm": 1.2587440013885498, "learning_rate": 2.8822902664638487e-05, "loss": 0.20497703552246094, "step": 1387 }, { "epoch": 0.18764681030840727, "grad_norm": 1.731467843055725, "learning_rate": 2.882024343382702e-05, "loss": 0.18185138702392578, "step": 1388 }, { "epoch": 0.18778200253485425, "grad_norm": 1.4052733182907104, "learning_rate": 2.8817581325567174e-05, "loss": 0.24340581893920898, "step": 1389 }, { "epoch": 0.18791719476130123, "grad_norm": 1.1212856769561768, "learning_rate": 2.8814916340413205e-05, "loss": 0.1824474334716797, "step": 1390 }, { "epoch": 0.1880523869877482, "grad_norm": 3.4490857124328613, "learning_rate": 2.881224847891999e-05, "loss": 0.21667861938476562, "step": 1391 }, { "epoch": 0.18818757921419518, "grad_norm": 2.4879043102264404, "learning_rate": 2.8809577741642987e-05, "loss": 0.2562370300292969, "step": 1392 }, { "epoch": 0.18832277144064216, "grad_norm": 1.0481992959976196, "learning_rate": 2.8806904129138255e-05, "loss": 0.1817483901977539, "step": 1393 }, { "epoch": 0.18845796366708914, "grad_norm": 1.4090983867645264, "learning_rate": 2.8804227641962457e-05, "loss": 0.21759605407714844, "step": 1394 }, { "epoch": 0.18859315589353612, "grad_norm": 0.908618688583374, "learning_rate": 2.8801548280672847e-05, "loss": 0.14403915405273438, "step": 1395 }, { "epoch": 0.1887283481199831, "grad_norm": 2.1785736083984375, "learning_rate": 2.8798866045827288e-05, "loss": 0.18030166625976562, "step": 1396 }, { "epoch": 0.18886354034643008, "grad_norm": 1.8354631662368774, "learning_rate": 2.8796180937984234e-05, "loss": 0.13053417205810547, "step": 1397 }, { "epoch": 0.18899873257287705, "grad_norm": 1.5700068473815918, "learning_rate": 2.8793492957702738e-05, "loss": 0.14382648468017578, "step": 1398 }, { "epoch": 0.18913392479932403, "grad_norm": 0.8196216821670532, "learning_rate": 2.8790802105542454e-05, "loss": 0.16629981994628906, "step": 1399 }, { "epoch": 0.189269117025771, "grad_norm": 1.1157335042953491, "learning_rate": 2.8788108382063628e-05, "loss": 0.20897865295410156, "step": 1400 }, { "epoch": 0.189404309252218, "grad_norm": 5.303626537322998, "learning_rate": 2.878541178782711e-05, "loss": 0.22238540649414062, "step": 1401 }, { "epoch": 0.18953950147866497, "grad_norm": 6.307397842407227, "learning_rate": 2.8782712323394344e-05, "loss": 0.25563812255859375, "step": 1402 }, { "epoch": 0.18967469370511195, "grad_norm": 7.425320625305176, "learning_rate": 2.878000998932738e-05, "loss": 0.23553085327148438, "step": 1403 }, { "epoch": 0.18980988593155892, "grad_norm": 3.187054395675659, "learning_rate": 2.8777304786188847e-05, "loss": 0.14130496978759766, "step": 1404 }, { "epoch": 0.1899450781580059, "grad_norm": 4.92216157913208, "learning_rate": 2.8774596714541988e-05, "loss": 0.24230575561523438, "step": 1405 }, { "epoch": 0.19008027038445288, "grad_norm": 0.7569880485534668, "learning_rate": 2.8771885774950637e-05, "loss": 0.1248779296875, "step": 1406 }, { "epoch": 0.19021546261089986, "grad_norm": 1.5575529336929321, "learning_rate": 2.876917196797923e-05, "loss": 0.17722320556640625, "step": 1407 }, { "epoch": 0.19035065483734684, "grad_norm": 2.14261794090271, "learning_rate": 2.876645529419279e-05, "loss": 0.23764801025390625, "step": 1408 }, { "epoch": 0.19048584706379385, "grad_norm": 1.227704644203186, "learning_rate": 2.876373575415695e-05, "loss": 0.17714691162109375, "step": 1409 }, { "epoch": 0.19062103929024082, "grad_norm": 2.7674806118011475, "learning_rate": 2.8761013348437926e-05, "loss": 0.21637344360351562, "step": 1410 }, { "epoch": 0.1907562315166878, "grad_norm": 2.050769805908203, "learning_rate": 2.875828807760254e-05, "loss": 0.23986530303955078, "step": 1411 }, { "epoch": 0.19089142374313478, "grad_norm": 4.342446327209473, "learning_rate": 2.875555994221821e-05, "loss": 0.23349761962890625, "step": 1412 }, { "epoch": 0.19102661596958176, "grad_norm": 1.3309298753738403, "learning_rate": 2.8752828942852943e-05, "loss": 0.14211082458496094, "step": 1413 }, { "epoch": 0.19116180819602874, "grad_norm": 1.2024198770523071, "learning_rate": 2.875009508007535e-05, "loss": 0.1692056655883789, "step": 1414 }, { "epoch": 0.19129700042247572, "grad_norm": 1.1278339624404907, "learning_rate": 2.8747358354454642e-05, "loss": 0.1775684356689453, "step": 1415 }, { "epoch": 0.1914321926489227, "grad_norm": 3.680670738220215, "learning_rate": 2.8744618766560614e-05, "loss": 0.23484420776367188, "step": 1416 }, { "epoch": 0.19156738487536967, "grad_norm": 1.6394540071487427, "learning_rate": 2.8741876316963664e-05, "loss": 0.23087024688720703, "step": 1417 }, { "epoch": 0.19170257710181665, "grad_norm": 1.3851091861724854, "learning_rate": 2.873913100623478e-05, "loss": 0.20172119140625, "step": 1418 }, { "epoch": 0.19183776932826363, "grad_norm": 1.9548044204711914, "learning_rate": 2.873638283494556e-05, "loss": 0.2664356231689453, "step": 1419 }, { "epoch": 0.1919729615547106, "grad_norm": 1.3033467531204224, "learning_rate": 2.8733631803668178e-05, "loss": 0.14479398727416992, "step": 1420 }, { "epoch": 0.1921081537811576, "grad_norm": 0.9137272834777832, "learning_rate": 2.8730877912975418e-05, "loss": 0.16078853607177734, "step": 1421 }, { "epoch": 0.19224334600760457, "grad_norm": 0.9136776328086853, "learning_rate": 2.8728121163440656e-05, "loss": 0.17396926879882812, "step": 1422 }, { "epoch": 0.19237853823405154, "grad_norm": 5.344334125518799, "learning_rate": 2.8725361555637863e-05, "loss": 0.22088146209716797, "step": 1423 }, { "epoch": 0.19251373046049852, "grad_norm": 1.1931527853012085, "learning_rate": 2.8722599090141598e-05, "loss": 0.2191762924194336, "step": 1424 }, { "epoch": 0.1926489226869455, "grad_norm": 1.853440523147583, "learning_rate": 2.8719833767527026e-05, "loss": 0.23706769943237305, "step": 1425 }, { "epoch": 0.19278411491339248, "grad_norm": 3.5897345542907715, "learning_rate": 2.8717065588369896e-05, "loss": 0.22147750854492188, "step": 1426 }, { "epoch": 0.19291930713983946, "grad_norm": 1.0917056798934937, "learning_rate": 2.871429455324657e-05, "loss": 0.13545799255371094, "step": 1427 }, { "epoch": 0.19305449936628644, "grad_norm": 1.1243360042572021, "learning_rate": 2.871152066273398e-05, "loss": 0.2462306022644043, "step": 1428 }, { "epoch": 0.19318969159273341, "grad_norm": 1.8675858974456787, "learning_rate": 2.870874391740967e-05, "loss": 0.1827373504638672, "step": 1429 }, { "epoch": 0.1933248838191804, "grad_norm": 2.4345269203186035, "learning_rate": 2.8705964317851774e-05, "loss": 0.2342853546142578, "step": 1430 }, { "epoch": 0.19346007604562737, "grad_norm": 1.1927862167358398, "learning_rate": 2.8703181864639013e-05, "loss": 0.20449209213256836, "step": 1431 }, { "epoch": 0.19359526827207435, "grad_norm": 1.2638800144195557, "learning_rate": 2.870039655835072e-05, "loss": 0.2043933868408203, "step": 1432 }, { "epoch": 0.19373046049852133, "grad_norm": 1.4303911924362183, "learning_rate": 2.8697608399566796e-05, "loss": 0.16094160079956055, "step": 1433 }, { "epoch": 0.1938656527249683, "grad_norm": 0.7716794610023499, "learning_rate": 2.869481738886777e-05, "loss": 0.1574099063873291, "step": 1434 }, { "epoch": 0.19400084495141529, "grad_norm": 1.1690324544906616, "learning_rate": 2.8692023526834725e-05, "loss": 0.19349193572998047, "step": 1435 }, { "epoch": 0.19413603717786226, "grad_norm": 1.04790198802948, "learning_rate": 2.8689226814049367e-05, "loss": 0.18207645416259766, "step": 1436 }, { "epoch": 0.19427122940430924, "grad_norm": 1.2554149627685547, "learning_rate": 2.868642725109399e-05, "loss": 0.2126293182373047, "step": 1437 }, { "epoch": 0.19440642163075622, "grad_norm": 1.4943777322769165, "learning_rate": 2.868362483855147e-05, "loss": 0.2154521942138672, "step": 1438 }, { "epoch": 0.1945416138572032, "grad_norm": 5.169602394104004, "learning_rate": 2.8680819577005295e-05, "loss": 0.23990154266357422, "step": 1439 }, { "epoch": 0.19467680608365018, "grad_norm": 1.9807720184326172, "learning_rate": 2.8678011467039526e-05, "loss": 0.20207691192626953, "step": 1440 }, { "epoch": 0.19481199831009716, "grad_norm": 1.802787184715271, "learning_rate": 2.867520050923883e-05, "loss": 0.19464683532714844, "step": 1441 }, { "epoch": 0.19494719053654413, "grad_norm": 1.0303736925125122, "learning_rate": 2.8672386704188466e-05, "loss": 0.1305065155029297, "step": 1442 }, { "epoch": 0.19508238276299114, "grad_norm": 1.8220689296722412, "learning_rate": 2.8669570052474273e-05, "loss": 0.204681396484375, "step": 1443 }, { "epoch": 0.19521757498943812, "grad_norm": 3.049875259399414, "learning_rate": 2.86667505546827e-05, "loss": 0.1982402801513672, "step": 1444 }, { "epoch": 0.1953527672158851, "grad_norm": 1.4490158557891846, "learning_rate": 2.866392821140079e-05, "loss": 0.21976852416992188, "step": 1445 }, { "epoch": 0.19548795944233208, "grad_norm": 1.27225923538208, "learning_rate": 2.8661103023216154e-05, "loss": 0.18479537963867188, "step": 1446 }, { "epoch": 0.19562315166877906, "grad_norm": 1.972221851348877, "learning_rate": 2.8658274990717018e-05, "loss": 0.14703655242919922, "step": 1447 }, { "epoch": 0.19575834389522603, "grad_norm": 1.6986520290374756, "learning_rate": 2.86554441144922e-05, "loss": 0.23073577880859375, "step": 1448 }, { "epoch": 0.195893536121673, "grad_norm": 3.859036445617676, "learning_rate": 2.8652610395131097e-05, "loss": 0.24255752563476562, "step": 1449 }, { "epoch": 0.19602872834812, "grad_norm": 5.375704765319824, "learning_rate": 2.8649773833223702e-05, "loss": 0.26262664794921875, "step": 1450 }, { "epoch": 0.19616392057456697, "grad_norm": 1.6645658016204834, "learning_rate": 2.8646934429360606e-05, "loss": 0.1817626953125, "step": 1451 }, { "epoch": 0.19629911280101395, "grad_norm": 4.011504173278809, "learning_rate": 2.8644092184132986e-05, "loss": 0.23273086547851562, "step": 1452 }, { "epoch": 0.19643430502746093, "grad_norm": 2.6857388019561768, "learning_rate": 2.864124709813262e-05, "loss": 0.2297215461730957, "step": 1453 }, { "epoch": 0.1965694972539079, "grad_norm": 1.2273344993591309, "learning_rate": 2.8638399171951856e-05, "loss": 0.17424488067626953, "step": 1454 }, { "epoch": 0.19670468948035488, "grad_norm": 1.7742382287979126, "learning_rate": 2.8635548406183664e-05, "loss": 0.16910552978515625, "step": 1455 }, { "epoch": 0.19683988170680186, "grad_norm": 2.5885918140411377, "learning_rate": 2.8632694801421576e-05, "loss": 0.12966537475585938, "step": 1456 }, { "epoch": 0.19697507393324884, "grad_norm": 1.0252225399017334, "learning_rate": 2.862983835825973e-05, "loss": 0.22079086303710938, "step": 1457 }, { "epoch": 0.19711026615969582, "grad_norm": 1.3225539922714233, "learning_rate": 2.8626979077292856e-05, "loss": 0.20037174224853516, "step": 1458 }, { "epoch": 0.1972454583861428, "grad_norm": 3.418307065963745, "learning_rate": 2.862411695911627e-05, "loss": 0.23319053649902344, "step": 1459 }, { "epoch": 0.19738065061258978, "grad_norm": 0.6984314918518066, "learning_rate": 2.862125200432588e-05, "loss": 0.127166748046875, "step": 1460 }, { "epoch": 0.19751584283903675, "grad_norm": 1.5432121753692627, "learning_rate": 2.8618384213518188e-05, "loss": 0.16589832305908203, "step": 1461 }, { "epoch": 0.19765103506548373, "grad_norm": 1.9159175157546997, "learning_rate": 2.861551358729028e-05, "loss": 0.18456268310546875, "step": 1462 }, { "epoch": 0.1977862272919307, "grad_norm": 1.0911264419555664, "learning_rate": 2.8612640126239836e-05, "loss": 0.1954631805419922, "step": 1463 }, { "epoch": 0.1979214195183777, "grad_norm": 1.3658831119537354, "learning_rate": 2.8609763830965126e-05, "loss": 0.2354264259338379, "step": 1464 }, { "epoch": 0.19805661174482467, "grad_norm": 2.338291645050049, "learning_rate": 2.860688470206501e-05, "loss": 0.2104191780090332, "step": 1465 }, { "epoch": 0.19819180397127165, "grad_norm": 2.187239408493042, "learning_rate": 2.8604002740138936e-05, "loss": 0.2264385223388672, "step": 1466 }, { "epoch": 0.19832699619771862, "grad_norm": 1.0135387182235718, "learning_rate": 2.860111794578695e-05, "loss": 0.12063407897949219, "step": 1467 }, { "epoch": 0.1984621884241656, "grad_norm": 1.056800127029419, "learning_rate": 2.8598230319609677e-05, "loss": 0.1545705795288086, "step": 1468 }, { "epoch": 0.19859738065061258, "grad_norm": 1.6733328104019165, "learning_rate": 2.8595339862208336e-05, "loss": 0.17456912994384766, "step": 1469 }, { "epoch": 0.19873257287705956, "grad_norm": 3.5856924057006836, "learning_rate": 2.8592446574184733e-05, "loss": 0.2902717590332031, "step": 1470 }, { "epoch": 0.19886776510350654, "grad_norm": 2.837336540222168, "learning_rate": 2.8589550456141274e-05, "loss": 0.2137737274169922, "step": 1471 }, { "epoch": 0.19900295732995352, "grad_norm": 1.3219761848449707, "learning_rate": 2.8586651508680942e-05, "loss": 0.17941951751708984, "step": 1472 }, { "epoch": 0.1991381495564005, "grad_norm": 1.946235179901123, "learning_rate": 2.8583749732407312e-05, "loss": 0.2377338409423828, "step": 1473 }, { "epoch": 0.19927334178284747, "grad_norm": 2.5637385845184326, "learning_rate": 2.8580845127924546e-05, "loss": 0.18741416931152344, "step": 1474 }, { "epoch": 0.19940853400929445, "grad_norm": 3.8044192790985107, "learning_rate": 2.8577937695837408e-05, "loss": 0.22407150268554688, "step": 1475 }, { "epoch": 0.19954372623574146, "grad_norm": 2.4386212825775146, "learning_rate": 2.8575027436751235e-05, "loss": 0.18420982360839844, "step": 1476 }, { "epoch": 0.19967891846218844, "grad_norm": 1.290191650390625, "learning_rate": 2.8572114351271955e-05, "loss": 0.17699813842773438, "step": 1477 }, { "epoch": 0.19981411068863542, "grad_norm": 3.3647961616516113, "learning_rate": 2.85691984400061e-05, "loss": 0.19544219970703125, "step": 1478 }, { "epoch": 0.1999493029150824, "grad_norm": 0.8163108825683594, "learning_rate": 2.8566279703560762e-05, "loss": 0.18499088287353516, "step": 1479 }, { "epoch": 0.20008449514152937, "grad_norm": 1.1311842203140259, "learning_rate": 2.8563358142543648e-05, "loss": 0.17033767700195312, "step": 1480 }, { "epoch": 0.20021968736797635, "grad_norm": 2.9506378173828125, "learning_rate": 2.856043375756304e-05, "loss": 0.27475738525390625, "step": 1481 }, { "epoch": 0.20035487959442333, "grad_norm": 2.901394844055176, "learning_rate": 2.855750654922781e-05, "loss": 0.24585342407226562, "step": 1482 }, { "epoch": 0.2004900718208703, "grad_norm": 2.0981180667877197, "learning_rate": 2.855457651814742e-05, "loss": 0.23094463348388672, "step": 1483 }, { "epoch": 0.2006252640473173, "grad_norm": 2.154557704925537, "learning_rate": 2.8551643664931916e-05, "loss": 0.22888565063476562, "step": 1484 }, { "epoch": 0.20076045627376427, "grad_norm": 1.1707566976547241, "learning_rate": 2.8548707990191933e-05, "loss": 0.24201107025146484, "step": 1485 }, { "epoch": 0.20089564850021124, "grad_norm": 3.0858397483825684, "learning_rate": 2.8545769494538698e-05, "loss": 0.21252059936523438, "step": 1486 }, { "epoch": 0.20103084072665822, "grad_norm": 3.447730541229248, "learning_rate": 2.854282817858402e-05, "loss": 0.16162776947021484, "step": 1487 }, { "epoch": 0.2011660329531052, "grad_norm": 4.726772308349609, "learning_rate": 2.85398840429403e-05, "loss": 0.24300384521484375, "step": 1488 }, { "epoch": 0.20130122517955218, "grad_norm": 1.1938174962997437, "learning_rate": 2.853693708822051e-05, "loss": 0.23445892333984375, "step": 1489 }, { "epoch": 0.20143641740599916, "grad_norm": 1.2891297340393066, "learning_rate": 2.8533987315038234e-05, "loss": 0.1974506378173828, "step": 1490 }, { "epoch": 0.20157160963244614, "grad_norm": 2.2705130577087402, "learning_rate": 2.8531034724007627e-05, "loss": 0.2105541229248047, "step": 1491 }, { "epoch": 0.20170680185889311, "grad_norm": 1.015687108039856, "learning_rate": 2.8528079315743435e-05, "loss": 0.16239356994628906, "step": 1492 }, { "epoch": 0.2018419940853401, "grad_norm": 2.74764347076416, "learning_rate": 2.852512109086099e-05, "loss": 0.20856809616088867, "step": 1493 }, { "epoch": 0.20197718631178707, "grad_norm": 5.50916862487793, "learning_rate": 2.8522160049976208e-05, "loss": 0.2513465881347656, "step": 1494 }, { "epoch": 0.20211237853823405, "grad_norm": 0.9173393249511719, "learning_rate": 2.8519196193705595e-05, "loss": 0.14350605010986328, "step": 1495 }, { "epoch": 0.20224757076468103, "grad_norm": 1.6260699033737183, "learning_rate": 2.8516229522666243e-05, "loss": 0.19754600524902344, "step": 1496 }, { "epoch": 0.202382762991128, "grad_norm": 1.496013879776001, "learning_rate": 2.8513260037475825e-05, "loss": 0.1762409210205078, "step": 1497 }, { "epoch": 0.20251795521757499, "grad_norm": 1.4995561838150024, "learning_rate": 2.8510287738752604e-05, "loss": 0.21907901763916016, "step": 1498 }, { "epoch": 0.20265314744402196, "grad_norm": 1.9306213855743408, "learning_rate": 2.8507312627115435e-05, "loss": 0.13698774576187134, "step": 1499 }, { "epoch": 0.20278833967046894, "grad_norm": 1.097343921661377, "learning_rate": 2.850433470318374e-05, "loss": 0.1705303192138672, "step": 1500 }, { "epoch": 0.20292353189691592, "grad_norm": 2.041163682937622, "learning_rate": 2.8501353967577556e-05, "loss": 0.20937156677246094, "step": 1501 }, { "epoch": 0.2030587241233629, "grad_norm": 1.0126675367355347, "learning_rate": 2.8498370420917468e-05, "loss": 0.16542339324951172, "step": 1502 }, { "epoch": 0.20319391634980988, "grad_norm": 1.2650238275527954, "learning_rate": 2.8495384063824683e-05, "loss": 0.21272659301757812, "step": 1503 }, { "epoch": 0.20332910857625686, "grad_norm": 3.2360408306121826, "learning_rate": 2.8492394896920964e-05, "loss": 0.19436931610107422, "step": 1504 }, { "epoch": 0.20346430080270383, "grad_norm": 0.7906304001808167, "learning_rate": 2.848940292082868e-05, "loss": 0.17124080657958984, "step": 1505 }, { "epoch": 0.2035994930291508, "grad_norm": 1.1428529024124146, "learning_rate": 2.8486408136170772e-05, "loss": 0.16841888427734375, "step": 1506 }, { "epoch": 0.2037346852555978, "grad_norm": 3.7356717586517334, "learning_rate": 2.8483410543570776e-05, "loss": 0.22685575485229492, "step": 1507 }, { "epoch": 0.20386987748204477, "grad_norm": 3.227050542831421, "learning_rate": 2.8480410143652803e-05, "loss": 0.26908111572265625, "step": 1508 }, { "epoch": 0.20400506970849175, "grad_norm": 1.2363524436950684, "learning_rate": 2.8477406937041547e-05, "loss": 0.2000408172607422, "step": 1509 }, { "epoch": 0.20414026193493875, "grad_norm": 2.7056961059570312, "learning_rate": 2.8474400924362298e-05, "loss": 0.21938133239746094, "step": 1510 }, { "epoch": 0.20427545416138573, "grad_norm": 0.7228403091430664, "learning_rate": 2.847139210624092e-05, "loss": 0.1736917495727539, "step": 1511 }, { "epoch": 0.2044106463878327, "grad_norm": 2.4037535190582275, "learning_rate": 2.8468380483303873e-05, "loss": 0.20244455337524414, "step": 1512 }, { "epoch": 0.2045458386142797, "grad_norm": 1.0335203409194946, "learning_rate": 2.8465366056178183e-05, "loss": 0.11712980270385742, "step": 1513 }, { "epoch": 0.20468103084072667, "grad_norm": 2.4525163173675537, "learning_rate": 2.8462348825491475e-05, "loss": 0.21556472778320312, "step": 1514 }, { "epoch": 0.20481622306717365, "grad_norm": 1.2312915325164795, "learning_rate": 2.8459328791871953e-05, "loss": 0.2537040710449219, "step": 1515 }, { "epoch": 0.20495141529362063, "grad_norm": 2.9004316329956055, "learning_rate": 2.8456305955948402e-05, "loss": 0.20762920379638672, "step": 1516 }, { "epoch": 0.2050866075200676, "grad_norm": 2.7718160152435303, "learning_rate": 2.845328031835019e-05, "loss": 0.23495864868164062, "step": 1517 }, { "epoch": 0.20522179974651458, "grad_norm": 2.2636141777038574, "learning_rate": 2.8450251879707277e-05, "loss": 0.244171142578125, "step": 1518 }, { "epoch": 0.20535699197296156, "grad_norm": 1.0177468061447144, "learning_rate": 2.8447220640650194e-05, "loss": 0.18434715270996094, "step": 1519 }, { "epoch": 0.20549218419940854, "grad_norm": 1.335494875907898, "learning_rate": 2.8444186601810068e-05, "loss": 0.24886703491210938, "step": 1520 }, { "epoch": 0.20562737642585552, "grad_norm": 3.091848134994507, "learning_rate": 2.84411497638186e-05, "loss": 0.20006465911865234, "step": 1521 }, { "epoch": 0.2057625686523025, "grad_norm": 3.217728614807129, "learning_rate": 2.843811012730807e-05, "loss": 0.2499542236328125, "step": 1522 }, { "epoch": 0.20589776087874948, "grad_norm": 1.284584879875183, "learning_rate": 2.8435067692911353e-05, "loss": 0.2462329864501953, "step": 1523 }, { "epoch": 0.20603295310519645, "grad_norm": 1.2856589555740356, "learning_rate": 2.8432022461261897e-05, "loss": 0.19455242156982422, "step": 1524 }, { "epoch": 0.20616814533164343, "grad_norm": 3.2996480464935303, "learning_rate": 2.8428974432993736e-05, "loss": 0.2416839599609375, "step": 1525 }, { "epoch": 0.2063033375580904, "grad_norm": 2.059166431427002, "learning_rate": 2.8425923608741486e-05, "loss": 0.22332239151000977, "step": 1526 }, { "epoch": 0.2064385297845374, "grad_norm": 1.8817386627197266, "learning_rate": 2.8422869989140343e-05, "loss": 0.20667552947998047, "step": 1527 }, { "epoch": 0.20657372201098437, "grad_norm": 0.9530999660491943, "learning_rate": 2.8419813574826093e-05, "loss": 0.16479206085205078, "step": 1528 }, { "epoch": 0.20670891423743135, "grad_norm": 2.440660238265991, "learning_rate": 2.8416754366435092e-05, "loss": 0.20564651489257812, "step": 1529 }, { "epoch": 0.20684410646387832, "grad_norm": 1.2118011713027954, "learning_rate": 2.8413692364604285e-05, "loss": 0.16583633422851562, "step": 1530 }, { "epoch": 0.2069792986903253, "grad_norm": 4.1070122718811035, "learning_rate": 2.8410627569971197e-05, "loss": 0.2732048034667969, "step": 1531 }, { "epoch": 0.20711449091677228, "grad_norm": 3.7998554706573486, "learning_rate": 2.8407559983173934e-05, "loss": 0.26544189453125, "step": 1532 }, { "epoch": 0.20724968314321926, "grad_norm": 1.8670927286148071, "learning_rate": 2.8404489604851186e-05, "loss": 0.23783183097839355, "step": 1533 }, { "epoch": 0.20738487536966624, "grad_norm": 2.254140615463257, "learning_rate": 2.840141643564222e-05, "loss": 0.2005634307861328, "step": 1534 }, { "epoch": 0.20752006759611322, "grad_norm": 1.4189573526382446, "learning_rate": 2.8398340476186885e-05, "loss": 0.18670654296875, "step": 1535 }, { "epoch": 0.2076552598225602, "grad_norm": 1.2199699878692627, "learning_rate": 2.8395261727125617e-05, "loss": 0.23779630661010742, "step": 1536 }, { "epoch": 0.20779045204900717, "grad_norm": 4.575888633728027, "learning_rate": 2.8392180189099425e-05, "loss": 0.22336578369140625, "step": 1537 }, { "epoch": 0.20792564427545415, "grad_norm": 2.3746140003204346, "learning_rate": 2.83890958627499e-05, "loss": 0.16201210021972656, "step": 1538 }, { "epoch": 0.20806083650190113, "grad_norm": 1.6039785146713257, "learning_rate": 2.8386008748719216e-05, "loss": 0.14647293090820312, "step": 1539 }, { "epoch": 0.2081960287283481, "grad_norm": 3.3411197662353516, "learning_rate": 2.838291884765013e-05, "loss": 0.22340011596679688, "step": 1540 }, { "epoch": 0.2083312209547951, "grad_norm": 1.2625120878219604, "learning_rate": 2.8379826160185975e-05, "loss": 0.19697189331054688, "step": 1541 }, { "epoch": 0.20846641318124207, "grad_norm": 3.770214557647705, "learning_rate": 2.8376730686970664e-05, "loss": 0.24465465545654297, "step": 1542 }, { "epoch": 0.20860160540768904, "grad_norm": 2.887554407119751, "learning_rate": 2.8373632428648683e-05, "loss": 0.21036529541015625, "step": 1543 }, { "epoch": 0.20873679763413605, "grad_norm": 1.4231836795806885, "learning_rate": 2.8370531385865124e-05, "loss": 0.16444158554077148, "step": 1544 }, { "epoch": 0.20887198986058303, "grad_norm": 3.7218923568725586, "learning_rate": 2.8367427559265622e-05, "loss": 0.14816904067993164, "step": 1545 }, { "epoch": 0.20900718208703, "grad_norm": 1.702355146408081, "learning_rate": 2.836432094949642e-05, "loss": 0.20602989196777344, "step": 1546 }, { "epoch": 0.209142374313477, "grad_norm": 2.4044106006622314, "learning_rate": 2.836121155720433e-05, "loss": 0.16053009033203125, "step": 1547 }, { "epoch": 0.20927756653992396, "grad_norm": 1.4207279682159424, "learning_rate": 2.8358099383036745e-05, "loss": 0.19476890563964844, "step": 1548 }, { "epoch": 0.20941275876637094, "grad_norm": 2.0659759044647217, "learning_rate": 2.8354984427641634e-05, "loss": 0.15401077270507812, "step": 1549 }, { "epoch": 0.20954795099281792, "grad_norm": 2.910566568374634, "learning_rate": 2.8351866691667544e-05, "loss": 0.20627212524414062, "step": 1550 }, { "epoch": 0.2096831432192649, "grad_norm": 1.6680594682693481, "learning_rate": 2.8348746175763613e-05, "loss": 0.14860153198242188, "step": 1551 }, { "epoch": 0.20981833544571188, "grad_norm": 1.254815697669983, "learning_rate": 2.8345622880579537e-05, "loss": 0.21012496948242188, "step": 1552 }, { "epoch": 0.20995352767215886, "grad_norm": 1.8045488595962524, "learning_rate": 2.8342496806765615e-05, "loss": 0.23774147033691406, "step": 1553 }, { "epoch": 0.21008871989860584, "grad_norm": 3.642972707748413, "learning_rate": 2.833936795497271e-05, "loss": 0.16762542724609375, "step": 1554 }, { "epoch": 0.21022391212505281, "grad_norm": 2.241891384124756, "learning_rate": 2.8336236325852257e-05, "loss": 0.2126007080078125, "step": 1555 }, { "epoch": 0.2103591043514998, "grad_norm": 0.8299334049224854, "learning_rate": 2.8333101920056285e-05, "loss": 0.17404937744140625, "step": 1556 }, { "epoch": 0.21049429657794677, "grad_norm": 1.1053400039672852, "learning_rate": 2.8329964738237392e-05, "loss": 0.1776437759399414, "step": 1557 }, { "epoch": 0.21062948880439375, "grad_norm": 3.5212504863739014, "learning_rate": 2.8326824781048756e-05, "loss": 0.24463367462158203, "step": 1558 }, { "epoch": 0.21076468103084073, "grad_norm": 1.2256594896316528, "learning_rate": 2.8323682049144135e-05, "loss": 0.24507904052734375, "step": 1559 }, { "epoch": 0.2108998732572877, "grad_norm": 2.7001938819885254, "learning_rate": 2.832053654317786e-05, "loss": 0.213897705078125, "step": 1560 }, { "epoch": 0.21103506548373469, "grad_norm": 1.0681815147399902, "learning_rate": 2.8317388263804842e-05, "loss": 0.24500083923339844, "step": 1561 }, { "epoch": 0.21117025771018166, "grad_norm": 1.6112297773361206, "learning_rate": 2.8314237211680573e-05, "loss": 0.21057701110839844, "step": 1562 }, { "epoch": 0.21130544993662864, "grad_norm": 2.4548401832580566, "learning_rate": 2.8311083387461118e-05, "loss": 0.17877674102783203, "step": 1563 }, { "epoch": 0.21144064216307562, "grad_norm": 1.5524108409881592, "learning_rate": 2.8307926791803114e-05, "loss": 0.23436737060546875, "step": 1564 }, { "epoch": 0.2115758343895226, "grad_norm": 2.803323984146118, "learning_rate": 2.8304767425363785e-05, "loss": 0.21984291076660156, "step": 1565 }, { "epoch": 0.21171102661596958, "grad_norm": 2.1299476623535156, "learning_rate": 2.830160528880093e-05, "loss": 0.19360971450805664, "step": 1566 }, { "epoch": 0.21184621884241656, "grad_norm": 2.5855629444122314, "learning_rate": 2.829844038277292e-05, "loss": 0.21448516845703125, "step": 1567 }, { "epoch": 0.21198141106886353, "grad_norm": 0.9516133666038513, "learning_rate": 2.8295272707938706e-05, "loss": 0.17955780029296875, "step": 1568 }, { "epoch": 0.2121166032953105, "grad_norm": 2.1353797912597656, "learning_rate": 2.8292102264957817e-05, "loss": 0.24937820434570312, "step": 1569 }, { "epoch": 0.2122517955217575, "grad_norm": 1.1626243591308594, "learning_rate": 2.8288929054490357e-05, "loss": 0.2234783172607422, "step": 1570 }, { "epoch": 0.21238698774820447, "grad_norm": 2.427438974380493, "learning_rate": 2.8285753077196998e-05, "loss": 0.1664104461669922, "step": 1571 }, { "epoch": 0.21252217997465145, "grad_norm": 3.7011525630950928, "learning_rate": 2.8282574333739006e-05, "loss": 0.18247222900390625, "step": 1572 }, { "epoch": 0.21265737220109843, "grad_norm": 2.281188726425171, "learning_rate": 2.8279392824778197e-05, "loss": 0.21723365783691406, "step": 1573 }, { "epoch": 0.2127925644275454, "grad_norm": 0.8799855709075928, "learning_rate": 2.8276208550976993e-05, "loss": 0.18135833740234375, "step": 1574 }, { "epoch": 0.21292775665399238, "grad_norm": 1.1784205436706543, "learning_rate": 2.8273021512998372e-05, "loss": 0.21408653259277344, "step": 1575 }, { "epoch": 0.21306294888043936, "grad_norm": 1.5739400386810303, "learning_rate": 2.826983171150589e-05, "loss": 0.21373367309570312, "step": 1576 }, { "epoch": 0.21319814110688634, "grad_norm": 2.105349063873291, "learning_rate": 2.826663914716368e-05, "loss": 0.2302265167236328, "step": 1577 }, { "epoch": 0.21333333333333335, "grad_norm": 2.233496904373169, "learning_rate": 2.826344382063646e-05, "loss": 0.19851112365722656, "step": 1578 }, { "epoch": 0.21346852555978033, "grad_norm": 2.718311071395874, "learning_rate": 2.8260245732589503e-05, "loss": 0.20471858978271484, "step": 1579 }, { "epoch": 0.2136037177862273, "grad_norm": 1.2879897356033325, "learning_rate": 2.8257044883688672e-05, "loss": 0.18995189666748047, "step": 1580 }, { "epoch": 0.21373891001267428, "grad_norm": 0.8471916913986206, "learning_rate": 2.82538412746004e-05, "loss": 0.19594573974609375, "step": 1581 }, { "epoch": 0.21387410223912126, "grad_norm": 1.4126442670822144, "learning_rate": 2.8250634905991695e-05, "loss": 0.1682415008544922, "step": 1582 }, { "epoch": 0.21400929446556824, "grad_norm": 1.9348821640014648, "learning_rate": 2.824742577853015e-05, "loss": 0.17291641235351562, "step": 1583 }, { "epoch": 0.21414448669201522, "grad_norm": 1.182713508605957, "learning_rate": 2.8244213892883907e-05, "loss": 0.16258001327514648, "step": 1584 }, { "epoch": 0.2142796789184622, "grad_norm": 0.9006469249725342, "learning_rate": 2.82409992497217e-05, "loss": 0.1353912353515625, "step": 1585 }, { "epoch": 0.21441487114490917, "grad_norm": 3.2851181030273438, "learning_rate": 2.8237781849712852e-05, "loss": 0.19801855087280273, "step": 1586 }, { "epoch": 0.21455006337135615, "grad_norm": 1.9994642734527588, "learning_rate": 2.8234561693527222e-05, "loss": 0.16687679290771484, "step": 1587 }, { "epoch": 0.21468525559780313, "grad_norm": 4.265449047088623, "learning_rate": 2.8231338781835275e-05, "loss": 0.18825149536132812, "step": 1588 }, { "epoch": 0.2148204478242501, "grad_norm": 2.3270583152770996, "learning_rate": 2.8228113115308032e-05, "loss": 0.22142553329467773, "step": 1589 }, { "epoch": 0.2149556400506971, "grad_norm": 1.2166668176651, "learning_rate": 2.82248846946171e-05, "loss": 0.19626617431640625, "step": 1590 }, { "epoch": 0.21509083227714407, "grad_norm": 2.1520957946777344, "learning_rate": 2.822165352043465e-05, "loss": 0.19588088989257812, "step": 1591 }, { "epoch": 0.21522602450359105, "grad_norm": 1.523145318031311, "learning_rate": 2.8218419593433437e-05, "loss": 0.20798110961914062, "step": 1592 }, { "epoch": 0.21536121673003802, "grad_norm": 2.1777796745300293, "learning_rate": 2.8215182914286768e-05, "loss": 0.16910266876220703, "step": 1593 }, { "epoch": 0.215496408956485, "grad_norm": 1.4733436107635498, "learning_rate": 2.8211943483668546e-05, "loss": 0.21474647521972656, "step": 1594 }, { "epoch": 0.21563160118293198, "grad_norm": 0.805931031703949, "learning_rate": 2.8208701302253237e-05, "loss": 0.17388534545898438, "step": 1595 }, { "epoch": 0.21576679340937896, "grad_norm": 4.49268913269043, "learning_rate": 2.820545637071588e-05, "loss": 0.2125091552734375, "step": 1596 }, { "epoch": 0.21590198563582594, "grad_norm": 3.891444206237793, "learning_rate": 2.8202208689732083e-05, "loss": 0.2002696990966797, "step": 1597 }, { "epoch": 0.21603717786227292, "grad_norm": 0.9846544861793518, "learning_rate": 2.819895825997804e-05, "loss": 0.13231468200683594, "step": 1598 }, { "epoch": 0.2161723700887199, "grad_norm": 1.3987812995910645, "learning_rate": 2.81957050821305e-05, "loss": 0.1939091682434082, "step": 1599 }, { "epoch": 0.21630756231516687, "grad_norm": 1.9934478998184204, "learning_rate": 2.8192449156866787e-05, "loss": 0.1595916748046875, "step": 1600 }, { "epoch": 0.21644275454161385, "grad_norm": 1.3430542945861816, "learning_rate": 2.8189190484864814e-05, "loss": 0.21430635452270508, "step": 1601 }, { "epoch": 0.21657794676806083, "grad_norm": 1.6507396697998047, "learning_rate": 2.8185929066803052e-05, "loss": 0.1488208770751953, "step": 1602 }, { "epoch": 0.2167131389945078, "grad_norm": 1.8464139699935913, "learning_rate": 2.818266490336054e-05, "loss": 0.1778697967529297, "step": 1603 }, { "epoch": 0.2168483312209548, "grad_norm": 2.0680527687072754, "learning_rate": 2.817939799521689e-05, "loss": 0.20356273651123047, "step": 1604 }, { "epoch": 0.21698352344740177, "grad_norm": 1.280734658241272, "learning_rate": 2.8176128343052304e-05, "loss": 0.2144622802734375, "step": 1605 }, { "epoch": 0.21711871567384874, "grad_norm": 3.2125136852264404, "learning_rate": 2.817285594754753e-05, "loss": 0.2136383056640625, "step": 1606 }, { "epoch": 0.21725390790029572, "grad_norm": 0.9972038865089417, "learning_rate": 2.8169580809383902e-05, "loss": 0.1695270538330078, "step": 1607 }, { "epoch": 0.2173891001267427, "grad_norm": 1.9265475273132324, "learning_rate": 2.8166302929243326e-05, "loss": 0.21355819702148438, "step": 1608 }, { "epoch": 0.21752429235318968, "grad_norm": 3.939326286315918, "learning_rate": 2.8163022307808264e-05, "loss": 0.18296241760253906, "step": 1609 }, { "epoch": 0.21765948457963666, "grad_norm": 2.347256898880005, "learning_rate": 2.8159738945761764e-05, "loss": 0.20624542236328125, "step": 1610 }, { "epoch": 0.21779467680608364, "grad_norm": 1.0728425979614258, "learning_rate": 2.8156452843787438e-05, "loss": 0.17418861389160156, "step": 1611 }, { "epoch": 0.21792986903253064, "grad_norm": 2.456512928009033, "learning_rate": 2.815316400256947e-05, "loss": 0.2517204284667969, "step": 1612 }, { "epoch": 0.21806506125897762, "grad_norm": 2.0149176120758057, "learning_rate": 2.814987242279262e-05, "loss": 0.22298049926757812, "step": 1613 }, { "epoch": 0.2182002534854246, "grad_norm": 1.3046553134918213, "learning_rate": 2.8146578105142202e-05, "loss": 0.2370433807373047, "step": 1614 }, { "epoch": 0.21833544571187158, "grad_norm": 1.6654703617095947, "learning_rate": 2.814328105030412e-05, "loss": 0.19285964965820312, "step": 1615 }, { "epoch": 0.21847063793831856, "grad_norm": 3.8162431716918945, "learning_rate": 2.8139981258964836e-05, "loss": 0.2237834930419922, "step": 1616 }, { "epoch": 0.21860583016476554, "grad_norm": 3.192939519882202, "learning_rate": 2.8136678731811385e-05, "loss": 0.17301559448242188, "step": 1617 }, { "epoch": 0.21874102239121251, "grad_norm": 3.9818694591522217, "learning_rate": 2.8133373469531362e-05, "loss": 0.2200603485107422, "step": 1618 }, { "epoch": 0.2188762146176595, "grad_norm": 1.1149611473083496, "learning_rate": 2.8130065472812952e-05, "loss": 0.13320684432983398, "step": 1619 }, { "epoch": 0.21901140684410647, "grad_norm": 2.543436288833618, "learning_rate": 2.812675474234489e-05, "loss": 0.18166351318359375, "step": 1620 }, { "epoch": 0.21914659907055345, "grad_norm": 2.608145236968994, "learning_rate": 2.812344127881649e-05, "loss": 0.2615470886230469, "step": 1621 }, { "epoch": 0.21928179129700043, "grad_norm": 0.6870976090431213, "learning_rate": 2.8120125082917638e-05, "loss": 0.193450927734375, "step": 1622 }, { "epoch": 0.2194169835234474, "grad_norm": 1.0287131071090698, "learning_rate": 2.8116806155338773e-05, "loss": 0.1839599609375, "step": 1623 }, { "epoch": 0.21955217574989438, "grad_norm": 0.9177045226097107, "learning_rate": 2.8113484496770923e-05, "loss": 0.19803810119628906, "step": 1624 }, { "epoch": 0.21968736797634136, "grad_norm": 1.7474350929260254, "learning_rate": 2.811016010790567e-05, "loss": 0.24471282958984375, "step": 1625 }, { "epoch": 0.21982256020278834, "grad_norm": 0.7501457929611206, "learning_rate": 2.8106832989435165e-05, "loss": 0.19566917419433594, "step": 1626 }, { "epoch": 0.21995775242923532, "grad_norm": 1.048923373222351, "learning_rate": 2.8103503142052146e-05, "loss": 0.16785049438476562, "step": 1627 }, { "epoch": 0.2200929446556823, "grad_norm": 3.043039321899414, "learning_rate": 2.8100170566449892e-05, "loss": 0.19009780883789062, "step": 1628 }, { "epoch": 0.22022813688212928, "grad_norm": 2.7366185188293457, "learning_rate": 2.8096835263322266e-05, "loss": 0.2093372344970703, "step": 1629 }, { "epoch": 0.22036332910857626, "grad_norm": 3.2346158027648926, "learning_rate": 2.8093497233363702e-05, "loss": 0.22413063049316406, "step": 1630 }, { "epoch": 0.22049852133502323, "grad_norm": 1.3287252187728882, "learning_rate": 2.8090156477269185e-05, "loss": 0.182586669921875, "step": 1631 }, { "epoch": 0.2206337135614702, "grad_norm": 1.1486190557479858, "learning_rate": 2.808681299573429e-05, "loss": 0.18571710586547852, "step": 1632 }, { "epoch": 0.2207689057879172, "grad_norm": 1.3951661586761475, "learning_rate": 2.8083466789455137e-05, "loss": 0.23805999755859375, "step": 1633 }, { "epoch": 0.22090409801436417, "grad_norm": 2.6470999717712402, "learning_rate": 2.808011785912843e-05, "loss": 0.22373390197753906, "step": 1634 }, { "epoch": 0.22103929024081115, "grad_norm": 2.5507028102874756, "learning_rate": 2.8076766205451435e-05, "loss": 0.18219709396362305, "step": 1635 }, { "epoch": 0.22117448246725813, "grad_norm": 1.2907202243804932, "learning_rate": 2.8073411829121983e-05, "loss": 0.13901138305664062, "step": 1636 }, { "epoch": 0.2213096746937051, "grad_norm": 1.0998774766921997, "learning_rate": 2.8070054730838467e-05, "loss": 0.23057270050048828, "step": 1637 }, { "epoch": 0.22144486692015208, "grad_norm": 1.9028065204620361, "learning_rate": 2.8066694911299865e-05, "loss": 0.2582893371582031, "step": 1638 }, { "epoch": 0.22158005914659906, "grad_norm": 2.7175650596618652, "learning_rate": 2.8063332371205698e-05, "loss": 0.2271575927734375, "step": 1639 }, { "epoch": 0.22171525137304604, "grad_norm": 3.8392858505249023, "learning_rate": 2.8059967111256072e-05, "loss": 0.19440460205078125, "step": 1640 }, { "epoch": 0.22185044359949302, "grad_norm": 1.510316252708435, "learning_rate": 2.8056599132151647e-05, "loss": 0.24645233154296875, "step": 1641 }, { "epoch": 0.22198563582594, "grad_norm": 2.3433139324188232, "learning_rate": 2.8053228434593656e-05, "loss": 0.2302539348602295, "step": 1642 }, { "epoch": 0.22212082805238698, "grad_norm": 1.2337480783462524, "learning_rate": 2.8049855019283895e-05, "loss": 0.23509979248046875, "step": 1643 }, { "epoch": 0.22225602027883395, "grad_norm": 1.0035866498947144, "learning_rate": 2.8046478886924736e-05, "loss": 0.17784500122070312, "step": 1644 }, { "epoch": 0.22239121250528093, "grad_norm": 1.7777913808822632, "learning_rate": 2.804310003821909e-05, "loss": 0.2162342071533203, "step": 1645 }, { "epoch": 0.22252640473172794, "grad_norm": 0.9080791473388672, "learning_rate": 2.8039718473870473e-05, "loss": 0.23622703552246094, "step": 1646 }, { "epoch": 0.22266159695817492, "grad_norm": 1.7890592813491821, "learning_rate": 2.8036334194582924e-05, "loss": 0.15596580505371094, "step": 1647 }, { "epoch": 0.2227967891846219, "grad_norm": 0.8851687908172607, "learning_rate": 2.8032947201061084e-05, "loss": 0.14883136749267578, "step": 1648 }, { "epoch": 0.22293198141106887, "grad_norm": 1.5567103624343872, "learning_rate": 2.8029557494010132e-05, "loss": 0.23559951782226562, "step": 1649 }, { "epoch": 0.22306717363751585, "grad_norm": 2.0446503162384033, "learning_rate": 2.802616507413583e-05, "loss": 0.2289886474609375, "step": 1650 }, { "epoch": 0.22320236586396283, "grad_norm": 1.520552396774292, "learning_rate": 2.8022769942144492e-05, "loss": 0.23146438598632812, "step": 1651 }, { "epoch": 0.2233375580904098, "grad_norm": 1.9324134588241577, "learning_rate": 2.801937209874301e-05, "loss": 0.19847679138183594, "step": 1652 }, { "epoch": 0.2234727503168568, "grad_norm": 1.6959515810012817, "learning_rate": 2.8015971544638832e-05, "loss": 0.1936495304107666, "step": 1653 }, { "epoch": 0.22360794254330377, "grad_norm": 2.567949056625366, "learning_rate": 2.8012568280539964e-05, "loss": 0.25025177001953125, "step": 1654 }, { "epoch": 0.22374313476975075, "grad_norm": 1.755247950553894, "learning_rate": 2.800916230715499e-05, "loss": 0.16828346252441406, "step": 1655 }, { "epoch": 0.22387832699619772, "grad_norm": 2.0015316009521484, "learning_rate": 2.800575362519305e-05, "loss": 0.20948708057403564, "step": 1656 }, { "epoch": 0.2240135192226447, "grad_norm": 2.239182949066162, "learning_rate": 2.800234223536385e-05, "loss": 0.14583969116210938, "step": 1657 }, { "epoch": 0.22414871144909168, "grad_norm": 4.86674165725708, "learning_rate": 2.799892813837766e-05, "loss": 0.27881574630737305, "step": 1658 }, { "epoch": 0.22428390367553866, "grad_norm": 1.4903746843338013, "learning_rate": 2.7995511334945315e-05, "loss": 0.21027803421020508, "step": 1659 }, { "epoch": 0.22441909590198564, "grad_norm": 1.9718495607376099, "learning_rate": 2.7992091825778202e-05, "loss": 0.1566767692565918, "step": 1660 }, { "epoch": 0.22455428812843262, "grad_norm": 1.6419471502304077, "learning_rate": 2.7988669611588295e-05, "loss": 0.2285451889038086, "step": 1661 }, { "epoch": 0.2246894803548796, "grad_norm": 0.7592140436172485, "learning_rate": 2.7985244693088112e-05, "loss": 0.12282180786132812, "step": 1662 }, { "epoch": 0.22482467258132657, "grad_norm": 3.97247314453125, "learning_rate": 2.7981817070990736e-05, "loss": 0.21082305908203125, "step": 1663 }, { "epoch": 0.22495986480777355, "grad_norm": 2.3529508113861084, "learning_rate": 2.7978386746009813e-05, "loss": 0.20727157592773438, "step": 1664 }, { "epoch": 0.22509505703422053, "grad_norm": 1.3219422101974487, "learning_rate": 2.797495371885957e-05, "loss": 0.16812896728515625, "step": 1665 }, { "epoch": 0.2252302492606675, "grad_norm": 1.6870778799057007, "learning_rate": 2.7971517990254768e-05, "loss": 0.1778392791748047, "step": 1666 }, { "epoch": 0.2253654414871145, "grad_norm": 1.3643516302108765, "learning_rate": 2.7968079560910744e-05, "loss": 0.19860458374023438, "step": 1667 }, { "epoch": 0.22550063371356147, "grad_norm": 1.1700143814086914, "learning_rate": 2.7964638431543402e-05, "loss": 0.19585037231445312, "step": 1668 }, { "epoch": 0.22563582594000844, "grad_norm": 1.4670829772949219, "learning_rate": 2.7961194602869208e-05, "loss": 0.1944427490234375, "step": 1669 }, { "epoch": 0.22577101816645542, "grad_norm": 1.8440093994140625, "learning_rate": 2.7957748075605178e-05, "loss": 0.2509498596191406, "step": 1670 }, { "epoch": 0.2259062103929024, "grad_norm": 2.90238356590271, "learning_rate": 2.7954298850468898e-05, "loss": 0.1934528350830078, "step": 1671 }, { "epoch": 0.22604140261934938, "grad_norm": 3.8909621238708496, "learning_rate": 2.7950846928178517e-05, "loss": 0.22019195556640625, "step": 1672 }, { "epoch": 0.22617659484579636, "grad_norm": 1.6296356916427612, "learning_rate": 2.7947392309452744e-05, "loss": 0.1519021987915039, "step": 1673 }, { "epoch": 0.22631178707224334, "grad_norm": 1.9315357208251953, "learning_rate": 2.7943934995010845e-05, "loss": 0.22606325149536133, "step": 1674 }, { "epoch": 0.22644697929869032, "grad_norm": 1.8270851373672485, "learning_rate": 2.7940474985572657e-05, "loss": 0.12842559814453125, "step": 1675 }, { "epoch": 0.2265821715251373, "grad_norm": 0.8213422298431396, "learning_rate": 2.793701228185857e-05, "loss": 0.14566326141357422, "step": 1676 }, { "epoch": 0.22671736375158427, "grad_norm": 2.075437545776367, "learning_rate": 2.7933546884589536e-05, "loss": 0.17247819900512695, "step": 1677 }, { "epoch": 0.22685255597803125, "grad_norm": 0.8434417843818665, "learning_rate": 2.7930078794487077e-05, "loss": 0.1721210479736328, "step": 1678 }, { "epoch": 0.22698774820447823, "grad_norm": 1.6527256965637207, "learning_rate": 2.7926608012273253e-05, "loss": 0.17689895629882812, "step": 1679 }, { "epoch": 0.22712294043092524, "grad_norm": 1.156251311302185, "learning_rate": 2.7923134538670715e-05, "loss": 0.18906116485595703, "step": 1680 }, { "epoch": 0.2272581326573722, "grad_norm": 1.57206130027771, "learning_rate": 2.7919658374402645e-05, "loss": 0.1541461944580078, "step": 1681 }, { "epoch": 0.2273933248838192, "grad_norm": 2.581645965576172, "learning_rate": 2.7916179520192807e-05, "loss": 0.17291879653930664, "step": 1682 }, { "epoch": 0.22752851711026617, "grad_norm": 1.4095185995101929, "learning_rate": 2.7912697976765516e-05, "loss": 0.2251582145690918, "step": 1683 }, { "epoch": 0.22766370933671315, "grad_norm": 1.3922494649887085, "learning_rate": 2.790921374484565e-05, "loss": 0.19935274124145508, "step": 1684 }, { "epoch": 0.22779890156316013, "grad_norm": 1.9435997009277344, "learning_rate": 2.7905726825158637e-05, "loss": 0.21084976196289062, "step": 1685 }, { "epoch": 0.2279340937896071, "grad_norm": 1.2341065406799316, "learning_rate": 2.7902237218430485e-05, "loss": 0.14711856842041016, "step": 1686 }, { "epoch": 0.22806928601605408, "grad_norm": 1.801182746887207, "learning_rate": 2.7898744925387735e-05, "loss": 0.21130895614624023, "step": 1687 }, { "epoch": 0.22820447824250106, "grad_norm": 1.7433704137802124, "learning_rate": 2.7895249946757505e-05, "loss": 0.1819133758544922, "step": 1688 }, { "epoch": 0.22833967046894804, "grad_norm": 1.4107847213745117, "learning_rate": 2.7891752283267474e-05, "loss": 0.23377227783203125, "step": 1689 }, { "epoch": 0.22847486269539502, "grad_norm": 1.9736055135726929, "learning_rate": 2.788825193564587e-05, "loss": 0.14587831497192383, "step": 1690 }, { "epoch": 0.228610054921842, "grad_norm": 0.7618198394775391, "learning_rate": 2.7884748904621483e-05, "loss": 0.1023244857788086, "step": 1691 }, { "epoch": 0.22874524714828898, "grad_norm": 1.1160156726837158, "learning_rate": 2.7881243190923667e-05, "loss": 0.19730758666992188, "step": 1692 }, { "epoch": 0.22888043937473596, "grad_norm": 1.1658329963684082, "learning_rate": 2.7877734795282326e-05, "loss": 0.1697988510131836, "step": 1693 }, { "epoch": 0.22901563160118293, "grad_norm": 2.986938953399658, "learning_rate": 2.7874223718427926e-05, "loss": 0.20756864547729492, "step": 1694 }, { "epoch": 0.2291508238276299, "grad_norm": 1.662099003791809, "learning_rate": 2.78707099610915e-05, "loss": 0.1470174789428711, "step": 1695 }, { "epoch": 0.2292860160540769, "grad_norm": 1.952976942062378, "learning_rate": 2.7867193524004618e-05, "loss": 0.19955062866210938, "step": 1696 }, { "epoch": 0.22942120828052387, "grad_norm": 1.5065747499465942, "learning_rate": 2.786367440789943e-05, "loss": 0.1539320945739746, "step": 1697 }, { "epoch": 0.22955640050697085, "grad_norm": 1.4272929430007935, "learning_rate": 2.7860152613508634e-05, "loss": 0.20050048828125, "step": 1698 }, { "epoch": 0.22969159273341783, "grad_norm": 1.5121279954910278, "learning_rate": 2.7856628141565484e-05, "loss": 0.2255077362060547, "step": 1699 }, { "epoch": 0.2298267849598648, "grad_norm": 1.4406825304031372, "learning_rate": 2.7853100992803797e-05, "loss": 0.1759946346282959, "step": 1700 }, { "epoch": 0.22996197718631178, "grad_norm": 4.906989574432373, "learning_rate": 2.7849571167957942e-05, "loss": 0.21625328063964844, "step": 1701 }, { "epoch": 0.23009716941275876, "grad_norm": 4.190120697021484, "learning_rate": 2.784603866776285e-05, "loss": 0.24311447143554688, "step": 1702 }, { "epoch": 0.23023236163920574, "grad_norm": 6.110686302185059, "learning_rate": 2.7842503492953996e-05, "loss": 0.23919105529785156, "step": 1703 }, { "epoch": 0.23036755386565272, "grad_norm": 1.659705400466919, "learning_rate": 2.7838965644267435e-05, "loss": 0.20405960083007812, "step": 1704 }, { "epoch": 0.2305027460920997, "grad_norm": 1.594421148300171, "learning_rate": 2.7835425122439764e-05, "loss": 0.18706321716308594, "step": 1705 }, { "epoch": 0.23063793831854668, "grad_norm": 2.0728964805603027, "learning_rate": 2.7831881928208128e-05, "loss": 0.1926860809326172, "step": 1706 }, { "epoch": 0.23077313054499365, "grad_norm": 2.2609057426452637, "learning_rate": 2.7828336062310252e-05, "loss": 0.20254802703857422, "step": 1707 }, { "epoch": 0.23090832277144063, "grad_norm": 1.7946500778198242, "learning_rate": 2.7824787525484403e-05, "loss": 0.19638824462890625, "step": 1708 }, { "epoch": 0.2310435149978876, "grad_norm": 1.8342368602752686, "learning_rate": 2.7821236318469395e-05, "loss": 0.22933197021484375, "step": 1709 }, { "epoch": 0.2311787072243346, "grad_norm": 2.01474928855896, "learning_rate": 2.7817682442004615e-05, "loss": 0.19035649299621582, "step": 1710 }, { "epoch": 0.23131389945078157, "grad_norm": 0.974385142326355, "learning_rate": 2.781412589683e-05, "loss": 0.20328903198242188, "step": 1711 }, { "epoch": 0.23144909167722855, "grad_norm": 2.2072834968566895, "learning_rate": 2.781056668368604e-05, "loss": 0.19321441650390625, "step": 1712 }, { "epoch": 0.23158428390367553, "grad_norm": 1.1945016384124756, "learning_rate": 2.780700480331378e-05, "loss": 0.1665663719177246, "step": 1713 }, { "epoch": 0.23171947613012253, "grad_norm": 1.3647164106369019, "learning_rate": 2.7803440256454825e-05, "loss": 0.19145965576171875, "step": 1714 }, { "epoch": 0.2318546683565695, "grad_norm": 2.9263415336608887, "learning_rate": 2.7799873043851337e-05, "loss": 0.2088308334350586, "step": 1715 }, { "epoch": 0.2319898605830165, "grad_norm": 0.8898067474365234, "learning_rate": 2.7796303166246016e-05, "loss": 0.1266767978668213, "step": 1716 }, { "epoch": 0.23212505280946347, "grad_norm": 1.8486448526382446, "learning_rate": 2.7792730624382142e-05, "loss": 0.229888916015625, "step": 1717 }, { "epoch": 0.23226024503591045, "grad_norm": 4.6757073402404785, "learning_rate": 2.778915541900353e-05, "loss": 0.25392913818359375, "step": 1718 }, { "epoch": 0.23239543726235742, "grad_norm": 3.0886518955230713, "learning_rate": 2.7785577550854566e-05, "loss": 0.2079906463623047, "step": 1719 }, { "epoch": 0.2325306294888044, "grad_norm": 4.418379306793213, "learning_rate": 2.778199702068017e-05, "loss": 0.18099403381347656, "step": 1720 }, { "epoch": 0.23266582171525138, "grad_norm": 2.943293571472168, "learning_rate": 2.777841382922583e-05, "loss": 0.21305084228515625, "step": 1721 }, { "epoch": 0.23280101394169836, "grad_norm": 1.3767356872558594, "learning_rate": 2.7774827977237596e-05, "loss": 0.168975830078125, "step": 1722 }, { "epoch": 0.23293620616814534, "grad_norm": 2.19751238822937, "learning_rate": 2.777123946546205e-05, "loss": 0.19993972778320312, "step": 1723 }, { "epoch": 0.23307139839459232, "grad_norm": 2.026576519012451, "learning_rate": 2.776764829464634e-05, "loss": 0.20911407470703125, "step": 1724 }, { "epoch": 0.2332065906210393, "grad_norm": 2.5773041248321533, "learning_rate": 2.7764054465538173e-05, "loss": 0.15764427185058594, "step": 1725 }, { "epoch": 0.23334178284748627, "grad_norm": 2.362438440322876, "learning_rate": 2.7760457978885794e-05, "loss": 0.1825408935546875, "step": 1726 }, { "epoch": 0.23347697507393325, "grad_norm": 1.6827441453933716, "learning_rate": 2.7756858835438022e-05, "loss": 0.22827482223510742, "step": 1727 }, { "epoch": 0.23361216730038023, "grad_norm": 0.820035457611084, "learning_rate": 2.7753257035944216e-05, "loss": 0.13376903533935547, "step": 1728 }, { "epoch": 0.2337473595268272, "grad_norm": 0.6238997578620911, "learning_rate": 2.7749652581154277e-05, "loss": 0.13058167695999146, "step": 1729 }, { "epoch": 0.2338825517532742, "grad_norm": 1.0257359743118286, "learning_rate": 2.7746045471818685e-05, "loss": 0.17197132110595703, "step": 1730 }, { "epoch": 0.23401774397972117, "grad_norm": 1.614213228225708, "learning_rate": 2.7742435708688458e-05, "loss": 0.16758191585540771, "step": 1731 }, { "epoch": 0.23415293620616814, "grad_norm": 1.3026021718978882, "learning_rate": 2.7738823292515167e-05, "loss": 0.16087055206298828, "step": 1732 }, { "epoch": 0.23428812843261512, "grad_norm": 2.4020371437072754, "learning_rate": 2.773520822405093e-05, "loss": 0.23744964599609375, "step": 1733 }, { "epoch": 0.2344233206590621, "grad_norm": 1.2131128311157227, "learning_rate": 2.7731590504048433e-05, "loss": 0.11850261688232422, "step": 1734 }, { "epoch": 0.23455851288550908, "grad_norm": 1.6539579629898071, "learning_rate": 2.7727970133260896e-05, "loss": 0.2256336212158203, "step": 1735 }, { "epoch": 0.23469370511195606, "grad_norm": 1.4063458442687988, "learning_rate": 2.7724347112442106e-05, "loss": 0.17973995208740234, "step": 1736 }, { "epoch": 0.23482889733840304, "grad_norm": 1.7707152366638184, "learning_rate": 2.772072144234639e-05, "loss": 0.1767895221710205, "step": 1737 }, { "epoch": 0.23496408956485001, "grad_norm": 2.202954053878784, "learning_rate": 2.7717093123728634e-05, "loss": 0.19563531875610352, "step": 1738 }, { "epoch": 0.235099281791297, "grad_norm": 2.523167610168457, "learning_rate": 2.771346215734428e-05, "loss": 0.2375659942626953, "step": 1739 }, { "epoch": 0.23523447401774397, "grad_norm": 1.6361443996429443, "learning_rate": 2.7709828543949302e-05, "loss": 0.19640731811523438, "step": 1740 }, { "epoch": 0.23536966624419095, "grad_norm": 2.5076193809509277, "learning_rate": 2.770619228430025e-05, "loss": 0.20153188705444336, "step": 1741 }, { "epoch": 0.23550485847063793, "grad_norm": 2.2669665813446045, "learning_rate": 2.77025533791542e-05, "loss": 0.19234657287597656, "step": 1742 }, { "epoch": 0.2356400506970849, "grad_norm": 0.8073531985282898, "learning_rate": 2.76989118292688e-05, "loss": 0.16974449157714844, "step": 1743 }, { "epoch": 0.23577524292353189, "grad_norm": 1.5695230960845947, "learning_rate": 2.7695267635402242e-05, "loss": 0.22649002075195312, "step": 1744 }, { "epoch": 0.23591043514997886, "grad_norm": 2.3050343990325928, "learning_rate": 2.7691620798313258e-05, "loss": 0.1620922088623047, "step": 1745 }, { "epoch": 0.23604562737642584, "grad_norm": 2.264643669128418, "learning_rate": 2.7687971318761145e-05, "loss": 0.11782073974609375, "step": 1746 }, { "epoch": 0.23618081960287282, "grad_norm": 2.5366272926330566, "learning_rate": 2.7684319197505746e-05, "loss": 0.19781208038330078, "step": 1747 }, { "epoch": 0.23631601182931983, "grad_norm": 0.8493217825889587, "learning_rate": 2.7680664435307446e-05, "loss": 0.14930152893066406, "step": 1748 }, { "epoch": 0.2364512040557668, "grad_norm": 2.8321502208709717, "learning_rate": 2.767700703292719e-05, "loss": 0.20679473876953125, "step": 1749 }, { "epoch": 0.23658639628221378, "grad_norm": 1.8636189699172974, "learning_rate": 2.767334699112647e-05, "loss": 0.2142314910888672, "step": 1750 }, { "epoch": 0.23672158850866076, "grad_norm": 0.8879403471946716, "learning_rate": 2.7669684310667318e-05, "loss": 0.13498878479003906, "step": 1751 }, { "epoch": 0.23685678073510774, "grad_norm": 2.7346439361572266, "learning_rate": 2.7666018992312333e-05, "loss": 0.17259657382965088, "step": 1752 }, { "epoch": 0.23699197296155472, "grad_norm": 1.0043563842773438, "learning_rate": 2.7662351036824653e-05, "loss": 0.182769775390625, "step": 1753 }, { "epoch": 0.2371271651880017, "grad_norm": 3.7860617637634277, "learning_rate": 2.7658680444967964e-05, "loss": 0.1889791488647461, "step": 1754 }, { "epoch": 0.23726235741444868, "grad_norm": 3.9154348373413086, "learning_rate": 2.76550072175065e-05, "loss": 0.22542619705200195, "step": 1755 }, { "epoch": 0.23739754964089566, "grad_norm": 1.5919619798660278, "learning_rate": 2.7651331355205044e-05, "loss": 0.20059490203857422, "step": 1756 }, { "epoch": 0.23753274186734263, "grad_norm": 1.0741440057754517, "learning_rate": 2.7647652858828936e-05, "loss": 0.18515586853027344, "step": 1757 }, { "epoch": 0.2376679340937896, "grad_norm": 1.501904845237732, "learning_rate": 2.764397172914406e-05, "loss": 0.2596282958984375, "step": 1758 }, { "epoch": 0.2378031263202366, "grad_norm": 2.8270175457000732, "learning_rate": 2.7640287966916845e-05, "loss": 0.17212677001953125, "step": 1759 }, { "epoch": 0.23793831854668357, "grad_norm": 2.288524866104126, "learning_rate": 2.7636601572914266e-05, "loss": 0.20764827728271484, "step": 1760 }, { "epoch": 0.23807351077313055, "grad_norm": 1.3019907474517822, "learning_rate": 2.7632912547903855e-05, "loss": 0.1292734146118164, "step": 1761 }, { "epoch": 0.23820870299957753, "grad_norm": 0.9865933060646057, "learning_rate": 2.7629220892653685e-05, "loss": 0.19135475158691406, "step": 1762 }, { "epoch": 0.2383438952260245, "grad_norm": 2.4502198696136475, "learning_rate": 2.7625526607932378e-05, "loss": 0.21588802337646484, "step": 1763 }, { "epoch": 0.23847908745247148, "grad_norm": 1.4543583393096924, "learning_rate": 2.76218296945091e-05, "loss": 0.20258712768554688, "step": 1764 }, { "epoch": 0.23861427967891846, "grad_norm": 1.5020910501480103, "learning_rate": 2.7618130153153577e-05, "loss": 0.1699810028076172, "step": 1765 }, { "epoch": 0.23874947190536544, "grad_norm": 0.9557843208312988, "learning_rate": 2.7614427984636063e-05, "loss": 0.1514110565185547, "step": 1766 }, { "epoch": 0.23888466413181242, "grad_norm": 4.01826810836792, "learning_rate": 2.7610723189727377e-05, "loss": 0.1769256591796875, "step": 1767 }, { "epoch": 0.2390198563582594, "grad_norm": 1.3220897912979126, "learning_rate": 2.760701576919888e-05, "loss": 0.17946910858154297, "step": 1768 }, { "epoch": 0.23915504858470638, "grad_norm": 3.866835594177246, "learning_rate": 2.760330572382246e-05, "loss": 0.2404017448425293, "step": 1769 }, { "epoch": 0.23929024081115335, "grad_norm": 3.6667068004608154, "learning_rate": 2.7599593054370584e-05, "loss": 0.19556808471679688, "step": 1770 }, { "epoch": 0.23942543303760033, "grad_norm": 1.2638391256332397, "learning_rate": 2.7595877761616246e-05, "loss": 0.18737506866455078, "step": 1771 }, { "epoch": 0.2395606252640473, "grad_norm": 2.6164989471435547, "learning_rate": 2.759215984633299e-05, "loss": 0.220977783203125, "step": 1772 }, { "epoch": 0.2396958174904943, "grad_norm": 1.6919876337051392, "learning_rate": 2.7588439309294902e-05, "loss": 0.16015052795410156, "step": 1773 }, { "epoch": 0.23983100971694127, "grad_norm": 4.301952362060547, "learning_rate": 2.7584716151276623e-05, "loss": 0.22281265258789062, "step": 1774 }, { "epoch": 0.23996620194338825, "grad_norm": 2.655496120452881, "learning_rate": 2.7580990373053325e-05, "loss": 0.18245506286621094, "step": 1775 }, { "epoch": 0.24010139416983522, "grad_norm": 4.732064247131348, "learning_rate": 2.7577261975400747e-05, "loss": 0.2396221160888672, "step": 1776 }, { "epoch": 0.2402365863962822, "grad_norm": 3.0432991981506348, "learning_rate": 2.7573530959095154e-05, "loss": 0.18137884140014648, "step": 1777 }, { "epoch": 0.24037177862272918, "grad_norm": 1.0096946954727173, "learning_rate": 2.756979732491336e-05, "loss": 0.1828598976135254, "step": 1778 }, { "epoch": 0.24050697084917616, "grad_norm": 3.0186798572540283, "learning_rate": 2.756606107363274e-05, "loss": 0.17733001708984375, "step": 1779 }, { "epoch": 0.24064216307562314, "grad_norm": 3.2225539684295654, "learning_rate": 2.7562322206031192e-05, "loss": 0.19584250450134277, "step": 1780 }, { "epoch": 0.24077735530207012, "grad_norm": 1.3573607206344604, "learning_rate": 2.7558580722887166e-05, "loss": 0.19092178344726562, "step": 1781 }, { "epoch": 0.24091254752851712, "grad_norm": 2.0956177711486816, "learning_rate": 2.7554836624979666e-05, "loss": 0.17461013793945312, "step": 1782 }, { "epoch": 0.2410477397549641, "grad_norm": 3.6138434410095215, "learning_rate": 2.7551089913088233e-05, "loss": 0.20109272003173828, "step": 1783 }, { "epoch": 0.24118293198141108, "grad_norm": 1.2328728437423706, "learning_rate": 2.7547340587992948e-05, "loss": 0.2194671630859375, "step": 1784 }, { "epoch": 0.24131812420785806, "grad_norm": 1.6233024597167969, "learning_rate": 2.754358865047444e-05, "loss": 0.18277359008789062, "step": 1785 }, { "epoch": 0.24145331643430504, "grad_norm": 2.790472984313965, "learning_rate": 2.7539834101313885e-05, "loss": 0.2407855987548828, "step": 1786 }, { "epoch": 0.24158850866075202, "grad_norm": 1.1314349174499512, "learning_rate": 2.7536076941293003e-05, "loss": 0.1938343048095703, "step": 1787 }, { "epoch": 0.241723700887199, "grad_norm": 0.7296061515808105, "learning_rate": 2.753231717119405e-05, "loss": 0.15276622772216797, "step": 1788 }, { "epoch": 0.24185889311364597, "grad_norm": 1.7662526369094849, "learning_rate": 2.7528554791799826e-05, "loss": 0.13840866088867188, "step": 1789 }, { "epoch": 0.24199408534009295, "grad_norm": 1.0116701126098633, "learning_rate": 2.7524789803893686e-05, "loss": 0.1967926025390625, "step": 1790 }, { "epoch": 0.24212927756653993, "grad_norm": 1.804026484489441, "learning_rate": 2.7521022208259526e-05, "loss": 0.21604537963867188, "step": 1791 }, { "epoch": 0.2422644697929869, "grad_norm": 1.50075101852417, "learning_rate": 2.7517252005681762e-05, "loss": 0.19052696228027344, "step": 1792 }, { "epoch": 0.2423996620194339, "grad_norm": 2.2202341556549072, "learning_rate": 2.7513479196945385e-05, "loss": 0.19433832168579102, "step": 1793 }, { "epoch": 0.24253485424588087, "grad_norm": 1.766616702079773, "learning_rate": 2.750970378283591e-05, "loss": 0.2039794921875, "step": 1794 }, { "epoch": 0.24267004647232784, "grad_norm": 1.2007391452789307, "learning_rate": 2.7505925764139398e-05, "loss": 0.13902640342712402, "step": 1795 }, { "epoch": 0.24280523869877482, "grad_norm": 1.093062162399292, "learning_rate": 2.7502145141642447e-05, "loss": 0.14444732666015625, "step": 1796 }, { "epoch": 0.2429404309252218, "grad_norm": 2.9717624187469482, "learning_rate": 2.7498361916132212e-05, "loss": 0.18315601348876953, "step": 1797 }, { "epoch": 0.24307562315166878, "grad_norm": 0.974408745765686, "learning_rate": 2.7494576088396376e-05, "loss": 0.13589000701904297, "step": 1798 }, { "epoch": 0.24321081537811576, "grad_norm": 1.7642040252685547, "learning_rate": 2.749078765922317e-05, "loss": 0.1761341094970703, "step": 1799 }, { "epoch": 0.24334600760456274, "grad_norm": 2.1651322841644287, "learning_rate": 2.7486996629401366e-05, "loss": 0.20318889617919922, "step": 1800 }, { "epoch": 0.24348119983100971, "grad_norm": 4.523960590362549, "learning_rate": 2.7483202999720272e-05, "loss": 0.23863506317138672, "step": 1801 }, { "epoch": 0.2436163920574567, "grad_norm": 2.1422088146209717, "learning_rate": 2.7479406770969747e-05, "loss": 0.1845703125, "step": 1802 }, { "epoch": 0.24375158428390367, "grad_norm": 1.6567132472991943, "learning_rate": 2.7475607943940182e-05, "loss": 0.21222305297851562, "step": 1803 }, { "epoch": 0.24388677651035065, "grad_norm": 1.6445326805114746, "learning_rate": 2.7471806519422514e-05, "loss": 0.20706558227539062, "step": 1804 }, { "epoch": 0.24402196873679763, "grad_norm": 0.7460476756095886, "learning_rate": 2.746800249820822e-05, "loss": 0.12541675567626953, "step": 1805 }, { "epoch": 0.2441571609632446, "grad_norm": 1.324603796005249, "learning_rate": 2.7464195881089323e-05, "loss": 0.2231426239013672, "step": 1806 }, { "epoch": 0.24429235318969159, "grad_norm": 1.5851218700408936, "learning_rate": 2.746038666885837e-05, "loss": 0.18499135971069336, "step": 1807 }, { "epoch": 0.24442754541613856, "grad_norm": 1.8500932455062866, "learning_rate": 2.7456574862308474e-05, "loss": 0.28872108459472656, "step": 1808 }, { "epoch": 0.24456273764258554, "grad_norm": 0.7610194087028503, "learning_rate": 2.745276046223326e-05, "loss": 0.10070991516113281, "step": 1809 }, { "epoch": 0.24469792986903252, "grad_norm": 1.1204335689544678, "learning_rate": 2.744894346942691e-05, "loss": 0.15925097465515137, "step": 1810 }, { "epoch": 0.2448331220954795, "grad_norm": 2.9479923248291016, "learning_rate": 2.744512388468415e-05, "loss": 0.22916412353515625, "step": 1811 }, { "epoch": 0.24496831432192648, "grad_norm": 2.4301834106445312, "learning_rate": 2.7441301708800227e-05, "loss": 0.1831226348876953, "step": 1812 }, { "epoch": 0.24510350654837346, "grad_norm": 0.6941435933113098, "learning_rate": 2.7437476942570942e-05, "loss": 0.1551222801208496, "step": 1813 }, { "epoch": 0.24523869877482044, "grad_norm": 1.6689149141311646, "learning_rate": 2.7433649586792637e-05, "loss": 0.12225341796875, "step": 1814 }, { "epoch": 0.2453738910012674, "grad_norm": 1.4530420303344727, "learning_rate": 2.7429819642262178e-05, "loss": 0.16067218780517578, "step": 1815 }, { "epoch": 0.24550908322771442, "grad_norm": 2.3543004989624023, "learning_rate": 2.7425987109776994e-05, "loss": 0.17044639587402344, "step": 1816 }, { "epoch": 0.2456442754541614, "grad_norm": 1.1938011646270752, "learning_rate": 2.7422151990135022e-05, "loss": 0.205535888671875, "step": 1817 }, { "epoch": 0.24577946768060838, "grad_norm": 2.3887882232666016, "learning_rate": 2.741831428413477e-05, "loss": 0.20858478546142578, "step": 1818 }, { "epoch": 0.24591465990705536, "grad_norm": 1.386568307876587, "learning_rate": 2.7414473992575257e-05, "loss": 0.15534210205078125, "step": 1819 }, { "epoch": 0.24604985213350233, "grad_norm": 0.8983409404754639, "learning_rate": 2.7410631116256054e-05, "loss": 0.2323780059814453, "step": 1820 }, { "epoch": 0.2461850443599493, "grad_norm": 2.2798032760620117, "learning_rate": 2.7406785655977275e-05, "loss": 0.16332530975341797, "step": 1821 }, { "epoch": 0.2463202365863963, "grad_norm": 3.062772750854492, "learning_rate": 2.7402937612539563e-05, "loss": 0.2181262969970703, "step": 1822 }, { "epoch": 0.24645542881284327, "grad_norm": 2.45572829246521, "learning_rate": 2.7399086986744095e-05, "loss": 0.19539451599121094, "step": 1823 }, { "epoch": 0.24659062103929025, "grad_norm": 1.6240006685256958, "learning_rate": 2.7395233779392598e-05, "loss": 0.18505859375, "step": 1824 }, { "epoch": 0.24672581326573723, "grad_norm": 3.154146194458008, "learning_rate": 2.739137799128733e-05, "loss": 0.1941823959350586, "step": 1825 }, { "epoch": 0.2468610054921842, "grad_norm": 1.6081831455230713, "learning_rate": 2.7387519623231085e-05, "loss": 0.16661453247070312, "step": 1826 }, { "epoch": 0.24699619771863118, "grad_norm": 0.9047930836677551, "learning_rate": 2.7383658676027195e-05, "loss": 0.2242288589477539, "step": 1827 }, { "epoch": 0.24713138994507816, "grad_norm": 2.931452751159668, "learning_rate": 2.7379795150479535e-05, "loss": 0.23974227905273438, "step": 1828 }, { "epoch": 0.24726658217152514, "grad_norm": 0.6701819896697998, "learning_rate": 2.73759290473925e-05, "loss": 0.169036865234375, "step": 1829 }, { "epoch": 0.24740177439797212, "grad_norm": 2.6725099086761475, "learning_rate": 2.7372060367571044e-05, "loss": 0.26139163970947266, "step": 1830 }, { "epoch": 0.2475369666244191, "grad_norm": 1.5986239910125732, "learning_rate": 2.7368189111820648e-05, "loss": 0.23374414443969727, "step": 1831 }, { "epoch": 0.24767215885086608, "grad_norm": 1.6976271867752075, "learning_rate": 2.736431528094732e-05, "loss": 0.17014694213867188, "step": 1832 }, { "epoch": 0.24780735107731305, "grad_norm": 1.7940839529037476, "learning_rate": 2.7360438875757614e-05, "loss": 0.2106151580810547, "step": 1833 }, { "epoch": 0.24794254330376003, "grad_norm": 1.5498751401901245, "learning_rate": 2.7356559897058624e-05, "loss": 0.20965957641601562, "step": 1834 }, { "epoch": 0.248077735530207, "grad_norm": 2.21406888961792, "learning_rate": 2.735267834565797e-05, "loss": 0.1727910041809082, "step": 1835 }, { "epoch": 0.248212927756654, "grad_norm": 1.5922492742538452, "learning_rate": 2.734879422236381e-05, "loss": 0.2078418731689453, "step": 1836 }, { "epoch": 0.24834811998310097, "grad_norm": 2.4378628730773926, "learning_rate": 2.734490752798484e-05, "loss": 0.2581634521484375, "step": 1837 }, { "epoch": 0.24848331220954795, "grad_norm": 0.8777881264686584, "learning_rate": 2.7341018263330296e-05, "loss": 0.13143301010131836, "step": 1838 }, { "epoch": 0.24861850443599492, "grad_norm": 0.789832592010498, "learning_rate": 2.7337126429209935e-05, "loss": 0.15216636657714844, "step": 1839 }, { "epoch": 0.2487536966624419, "grad_norm": 2.5479233264923096, "learning_rate": 2.7333232026434064e-05, "loss": 0.21466970443725586, "step": 1840 }, { "epoch": 0.24888888888888888, "grad_norm": 1.6906899213790894, "learning_rate": 2.7329335055813517e-05, "loss": 0.169189453125, "step": 1841 }, { "epoch": 0.24902408111533586, "grad_norm": 1.350311517715454, "learning_rate": 2.732543551815966e-05, "loss": 0.16113853454589844, "step": 1842 }, { "epoch": 0.24915927334178284, "grad_norm": 4.948683261871338, "learning_rate": 2.7321533414284404e-05, "loss": 0.197845458984375, "step": 1843 }, { "epoch": 0.24929446556822982, "grad_norm": 1.508175253868103, "learning_rate": 2.731762874500018e-05, "loss": 0.17602157592773438, "step": 1844 }, { "epoch": 0.2494296577946768, "grad_norm": 1.8899592161178589, "learning_rate": 2.7313721511119972e-05, "loss": 0.188720703125, "step": 1845 }, { "epoch": 0.24956485002112377, "grad_norm": 0.7386027574539185, "learning_rate": 2.7309811713457275e-05, "loss": 0.15043258666992188, "step": 1846 }, { "epoch": 0.24970004224757075, "grad_norm": 1.8178147077560425, "learning_rate": 2.730589935282614e-05, "loss": 0.23105621337890625, "step": 1847 }, { "epoch": 0.24983523447401773, "grad_norm": 0.8323884010314941, "learning_rate": 2.7301984430041135e-05, "loss": 0.16028594970703125, "step": 1848 }, { "epoch": 0.2499704267004647, "grad_norm": 1.4203921556472778, "learning_rate": 2.7298066945917368e-05, "loss": 0.1997203826904297, "step": 1849 }, { "epoch": 0.2501056189269117, "grad_norm": 0.9265887141227722, "learning_rate": 2.7294146901270482e-05, "loss": 0.19940805435180664, "step": 1850 }, { "epoch": 0.25024081115335867, "grad_norm": 1.0390900373458862, "learning_rate": 2.7290224296916653e-05, "loss": 0.2062664031982422, "step": 1851 }, { "epoch": 0.25037600337980565, "grad_norm": 2.799058198928833, "learning_rate": 2.7286299133672584e-05, "loss": 0.21361827850341797, "step": 1852 }, { "epoch": 0.2505111956062526, "grad_norm": 1.4033443927764893, "learning_rate": 2.728237141235552e-05, "loss": 0.15416717529296875, "step": 1853 }, { "epoch": 0.2506463878326996, "grad_norm": 0.8309803605079651, "learning_rate": 2.727844113378322e-05, "loss": 0.19065475463867188, "step": 1854 }, { "epoch": 0.2507815800591466, "grad_norm": 1.404348611831665, "learning_rate": 2.7274508298774013e-05, "loss": 0.2071218490600586, "step": 1855 }, { "epoch": 0.25091677228559356, "grad_norm": 1.0040569305419922, "learning_rate": 2.727057290814672e-05, "loss": 0.20615386962890625, "step": 1856 }, { "epoch": 0.25105196451204054, "grad_norm": 2.0930609703063965, "learning_rate": 2.7266634962720704e-05, "loss": 0.18292236328125, "step": 1857 }, { "epoch": 0.2511871567384875, "grad_norm": 1.7553514242172241, "learning_rate": 2.726269446331588e-05, "loss": 0.17391109466552734, "step": 1858 }, { "epoch": 0.2513223489649345, "grad_norm": 0.946629524230957, "learning_rate": 2.7258751410752676e-05, "loss": 0.19818878173828125, "step": 1859 }, { "epoch": 0.2514575411913815, "grad_norm": 2.2126502990722656, "learning_rate": 2.725480580585206e-05, "loss": 0.1741102933883667, "step": 1860 }, { "epoch": 0.25159273341782845, "grad_norm": 1.352871298789978, "learning_rate": 2.7250857649435522e-05, "loss": 0.19280147552490234, "step": 1861 }, { "epoch": 0.25172792564427543, "grad_norm": 1.9207581281661987, "learning_rate": 2.724690694232509e-05, "loss": 0.1536569595336914, "step": 1862 }, { "epoch": 0.2518631178707224, "grad_norm": 1.9133845567703247, "learning_rate": 2.7242953685343327e-05, "loss": 0.194671630859375, "step": 1863 }, { "epoch": 0.2519983100971694, "grad_norm": 1.4482982158660889, "learning_rate": 2.723899787931332e-05, "loss": 0.1819009780883789, "step": 1864 }, { "epoch": 0.25213350232361637, "grad_norm": 1.0874003171920776, "learning_rate": 2.7235039525058684e-05, "loss": 0.17987632751464844, "step": 1865 }, { "epoch": 0.25226869455006334, "grad_norm": 2.602050304412842, "learning_rate": 2.7231078623403575e-05, "loss": 0.18448734283447266, "step": 1866 }, { "epoch": 0.2524038867765104, "grad_norm": 1.9931179285049438, "learning_rate": 2.722711517517267e-05, "loss": 0.17292213439941406, "step": 1867 }, { "epoch": 0.25253907900295736, "grad_norm": 2.9078757762908936, "learning_rate": 2.7223149181191187e-05, "loss": 0.23421669006347656, "step": 1868 }, { "epoch": 0.25267427122940433, "grad_norm": 1.5477068424224854, "learning_rate": 2.7219180642284864e-05, "loss": 0.19437646865844727, "step": 1869 }, { "epoch": 0.2528094634558513, "grad_norm": 1.6026208400726318, "learning_rate": 2.721520955927997e-05, "loss": 0.21612930297851562, "step": 1870 }, { "epoch": 0.2529446556822983, "grad_norm": 1.3329172134399414, "learning_rate": 2.7211235933003302e-05, "loss": 0.2152118682861328, "step": 1871 }, { "epoch": 0.25307984790874527, "grad_norm": 0.8584641218185425, "learning_rate": 2.72072597642822e-05, "loss": 0.12581825256347656, "step": 1872 }, { "epoch": 0.25321504013519225, "grad_norm": 1.4131656885147095, "learning_rate": 2.7203281053944512e-05, "loss": 0.2525138854980469, "step": 1873 }, { "epoch": 0.2533502323616392, "grad_norm": 1.1658369302749634, "learning_rate": 2.719929980281864e-05, "loss": 0.14561176300048828, "step": 1874 }, { "epoch": 0.2534854245880862, "grad_norm": 1.277383804321289, "learning_rate": 2.719531601173349e-05, "loss": 0.2118206024169922, "step": 1875 }, { "epoch": 0.2536206168145332, "grad_norm": 0.8549351692199707, "learning_rate": 2.7191329681518512e-05, "loss": 0.1967754364013672, "step": 1876 }, { "epoch": 0.25375580904098016, "grad_norm": 1.0922499895095825, "learning_rate": 2.7187340813003682e-05, "loss": 0.18384170532226562, "step": 1877 }, { "epoch": 0.25389100126742714, "grad_norm": 0.7149601578712463, "learning_rate": 2.718334940701951e-05, "loss": 0.14149856567382812, "step": 1878 }, { "epoch": 0.2540261934938741, "grad_norm": 1.3993784189224243, "learning_rate": 2.7179355464397014e-05, "loss": 0.1161503791809082, "step": 1879 }, { "epoch": 0.2541613857203211, "grad_norm": 1.866430640220642, "learning_rate": 2.7175358985967763e-05, "loss": 0.24223709106445312, "step": 1880 }, { "epoch": 0.2542965779467681, "grad_norm": 1.0008230209350586, "learning_rate": 2.717135997256385e-05, "loss": 0.1560840606689453, "step": 1881 }, { "epoch": 0.25443177017321505, "grad_norm": 2.41662335395813, "learning_rate": 2.7167358425017882e-05, "loss": 0.21687889099121094, "step": 1882 }, { "epoch": 0.25456696239966203, "grad_norm": 1.9120348691940308, "learning_rate": 2.7163354344163004e-05, "loss": 0.18424415588378906, "step": 1883 }, { "epoch": 0.254702154626109, "grad_norm": 1.2551332712173462, "learning_rate": 2.715934773083289e-05, "loss": 0.13746243715286255, "step": 1884 }, { "epoch": 0.254837346852556, "grad_norm": 4.2883524894714355, "learning_rate": 2.715533858586174e-05, "loss": 0.23687076568603516, "step": 1885 }, { "epoch": 0.25497253907900297, "grad_norm": 1.8226817846298218, "learning_rate": 2.715132691008427e-05, "loss": 0.14057111740112305, "step": 1886 }, { "epoch": 0.25510773130544995, "grad_norm": 4.030285835266113, "learning_rate": 2.714731270433574e-05, "loss": 0.21898365020751953, "step": 1887 }, { "epoch": 0.2552429235318969, "grad_norm": 1.1922999620437622, "learning_rate": 2.7143295969451933e-05, "loss": 0.1292862892150879, "step": 1888 }, { "epoch": 0.2553781157583439, "grad_norm": 0.9621856808662415, "learning_rate": 2.7139276706269147e-05, "loss": 0.13312244415283203, "step": 1889 }, { "epoch": 0.2555133079847909, "grad_norm": 0.7738590836524963, "learning_rate": 2.7135254915624213e-05, "loss": 0.1671161651611328, "step": 1890 }, { "epoch": 0.25564850021123786, "grad_norm": 2.4581139087677, "learning_rate": 2.7131230598354497e-05, "loss": 0.22307372093200684, "step": 1891 }, { "epoch": 0.25578369243768484, "grad_norm": 3.7177722454071045, "learning_rate": 2.712720375529787e-05, "loss": 0.23241043090820312, "step": 1892 }, { "epoch": 0.2559188846641318, "grad_norm": 5.150735378265381, "learning_rate": 2.7123174387292758e-05, "loss": 0.22222423553466797, "step": 1893 }, { "epoch": 0.2560540768905788, "grad_norm": 3.285849094390869, "learning_rate": 2.7119142495178088e-05, "loss": 0.2296743392944336, "step": 1894 }, { "epoch": 0.2561892691170258, "grad_norm": 2.1516175270080566, "learning_rate": 2.711510807979333e-05, "loss": 0.1914815902709961, "step": 1895 }, { "epoch": 0.25632446134347275, "grad_norm": 1.3553555011749268, "learning_rate": 2.7111071141978452e-05, "loss": 0.19443082809448242, "step": 1896 }, { "epoch": 0.25645965356991973, "grad_norm": 1.7527267932891846, "learning_rate": 2.7107031682573987e-05, "loss": 0.16572976112365723, "step": 1897 }, { "epoch": 0.2565948457963667, "grad_norm": 1.9503977298736572, "learning_rate": 2.710298970242096e-05, "loss": 0.19137954711914062, "step": 1898 }, { "epoch": 0.2567300380228137, "grad_norm": 2.2335622310638428, "learning_rate": 2.7098945202360937e-05, "loss": 0.2009143829345703, "step": 1899 }, { "epoch": 0.25686523024926067, "grad_norm": 1.4293417930603027, "learning_rate": 2.7094898183236e-05, "loss": 0.23547744750976562, "step": 1900 }, { "epoch": 0.25700042247570765, "grad_norm": 3.1159415245056152, "learning_rate": 2.709084864588877e-05, "loss": 0.23511123657226562, "step": 1901 }, { "epoch": 0.2571356147021546, "grad_norm": 1.6290208101272583, "learning_rate": 2.708679659116237e-05, "loss": 0.1189870834350586, "step": 1902 }, { "epoch": 0.2572708069286016, "grad_norm": 1.7773970365524292, "learning_rate": 2.708274201990047e-05, "loss": 0.21961307525634766, "step": 1903 }, { "epoch": 0.2574059991550486, "grad_norm": 3.6391491889953613, "learning_rate": 2.7078684932947247e-05, "loss": 0.21735668182373047, "step": 1904 }, { "epoch": 0.25754119138149556, "grad_norm": 5.386855602264404, "learning_rate": 2.7074625331147407e-05, "loss": 0.24686622619628906, "step": 1905 }, { "epoch": 0.25767638360794254, "grad_norm": 1.59454345703125, "learning_rate": 2.7070563215346184e-05, "loss": 0.2650566101074219, "step": 1906 }, { "epoch": 0.2578115758343895, "grad_norm": 2.7635247707366943, "learning_rate": 2.7066498586389332e-05, "loss": 0.23914718627929688, "step": 1907 }, { "epoch": 0.2579467680608365, "grad_norm": 5.463364601135254, "learning_rate": 2.7062431445123127e-05, "loss": 0.29244232177734375, "step": 1908 }, { "epoch": 0.2580819602872835, "grad_norm": 1.0602253675460815, "learning_rate": 2.705836179239437e-05, "loss": 0.13002872467041016, "step": 1909 }, { "epoch": 0.25821715251373045, "grad_norm": 1.020666241645813, "learning_rate": 2.705428962905039e-05, "loss": 0.1340007781982422, "step": 1910 }, { "epoch": 0.25835234474017743, "grad_norm": 0.893650233745575, "learning_rate": 2.705021495593902e-05, "loss": 0.14308738708496094, "step": 1911 }, { "epoch": 0.2584875369666244, "grad_norm": 1.1200660467147827, "learning_rate": 2.704613777390864e-05, "loss": 0.1405954360961914, "step": 1912 }, { "epoch": 0.2586227291930714, "grad_norm": 0.8722280859947205, "learning_rate": 2.7042058083808135e-05, "loss": 0.18476104736328125, "step": 1913 }, { "epoch": 0.25875792141951837, "grad_norm": 1.9932823181152344, "learning_rate": 2.7037975886486928e-05, "loss": 0.2548637390136719, "step": 1914 }, { "epoch": 0.25889311364596534, "grad_norm": 1.901976227760315, "learning_rate": 2.7033891182794942e-05, "loss": 0.19043922424316406, "step": 1915 }, { "epoch": 0.2590283058724123, "grad_norm": 1.4192148447036743, "learning_rate": 2.7029803973582642e-05, "loss": 0.22775554656982422, "step": 1916 }, { "epoch": 0.2591634980988593, "grad_norm": 1.6903505325317383, "learning_rate": 2.7025714259701e-05, "loss": 0.25521135330200195, "step": 1917 }, { "epoch": 0.2592986903253063, "grad_norm": 1.6820746660232544, "learning_rate": 2.7021622042001524e-05, "loss": 0.16521263122558594, "step": 1918 }, { "epoch": 0.25943388255175326, "grad_norm": 1.0428242683410645, "learning_rate": 2.701752732133623e-05, "loss": 0.18391036987304688, "step": 1919 }, { "epoch": 0.25956907477820024, "grad_norm": 0.8983698487281799, "learning_rate": 2.7013430098557664e-05, "loss": 0.15881729125976562, "step": 1920 }, { "epoch": 0.2597042670046472, "grad_norm": 1.1442865133285522, "learning_rate": 2.7009330374518885e-05, "loss": 0.212158203125, "step": 1921 }, { "epoch": 0.2598394592310942, "grad_norm": 1.9967166185379028, "learning_rate": 2.7005228150073483e-05, "loss": 0.17375755310058594, "step": 1922 }, { "epoch": 0.2599746514575412, "grad_norm": 1.153822898864746, "learning_rate": 2.7001123426075558e-05, "loss": 0.15046024322509766, "step": 1923 }, { "epoch": 0.26010984368398815, "grad_norm": 1.318525791168213, "learning_rate": 2.699701620337974e-05, "loss": 0.17002296447753906, "step": 1924 }, { "epoch": 0.26024503591043513, "grad_norm": 1.0753260850906372, "learning_rate": 2.699290648284117e-05, "loss": 0.15815973281860352, "step": 1925 }, { "epoch": 0.2603802281368821, "grad_norm": 1.1672066450119019, "learning_rate": 2.6988794265315522e-05, "loss": 0.17817401885986328, "step": 1926 }, { "epoch": 0.2605154203633291, "grad_norm": 2.196876049041748, "learning_rate": 2.698467955165897e-05, "loss": 0.21384429931640625, "step": 1927 }, { "epoch": 0.26065061258977607, "grad_norm": 2.0762689113616943, "learning_rate": 2.6980562342728226e-05, "loss": 0.21181869506835938, "step": 1928 }, { "epoch": 0.26078580481622304, "grad_norm": 0.9911255240440369, "learning_rate": 2.6976442639380516e-05, "loss": 0.14367055892944336, "step": 1929 }, { "epoch": 0.26092099704267, "grad_norm": 3.002906322479248, "learning_rate": 2.6972320442473583e-05, "loss": 0.2427806854248047, "step": 1930 }, { "epoch": 0.261056189269117, "grad_norm": 1.7476301193237305, "learning_rate": 2.6968195752865686e-05, "loss": 0.2146167755126953, "step": 1931 }, { "epoch": 0.261191381495564, "grad_norm": 1.9302798509597778, "learning_rate": 2.6964068571415613e-05, "loss": 0.15862274169921875, "step": 1932 }, { "epoch": 0.26132657372201096, "grad_norm": 1.2121418714523315, "learning_rate": 2.6959938898982667e-05, "loss": 0.15786981582641602, "step": 1933 }, { "epoch": 0.26146176594845794, "grad_norm": 0.8861576318740845, "learning_rate": 2.6955806736426657e-05, "loss": 0.19966506958007812, "step": 1934 }, { "epoch": 0.26159695817490497, "grad_norm": 1.776509165763855, "learning_rate": 2.6951672084607937e-05, "loss": 0.1838245391845703, "step": 1935 }, { "epoch": 0.26173215040135195, "grad_norm": 1.629281759262085, "learning_rate": 2.694753494438735e-05, "loss": 0.2515850067138672, "step": 1936 }, { "epoch": 0.2618673426277989, "grad_norm": 1.2359305620193481, "learning_rate": 2.6943395316626272e-05, "loss": 0.155120849609375, "step": 1937 }, { "epoch": 0.2620025348542459, "grad_norm": 0.9176020622253418, "learning_rate": 2.69392532021866e-05, "loss": 0.18673133850097656, "step": 1938 }, { "epoch": 0.2621377270806929, "grad_norm": 0.8933517336845398, "learning_rate": 2.693510860193075e-05, "loss": 0.15508127212524414, "step": 1939 }, { "epoch": 0.26227291930713986, "grad_norm": 3.950157880783081, "learning_rate": 2.6930961516721638e-05, "loss": 0.20750141143798828, "step": 1940 }, { "epoch": 0.26240811153358684, "grad_norm": 1.0190545320510864, "learning_rate": 2.6926811947422717e-05, "loss": 0.16823291778564453, "step": 1941 }, { "epoch": 0.2625433037600338, "grad_norm": 3.5451526641845703, "learning_rate": 2.6922659894897946e-05, "loss": 0.2516508102416992, "step": 1942 }, { "epoch": 0.2626784959864808, "grad_norm": 0.65116947889328, "learning_rate": 2.6918505360011805e-05, "loss": 0.12068653106689453, "step": 1943 }, { "epoch": 0.2628136882129278, "grad_norm": 2.8797261714935303, "learning_rate": 2.6914348343629292e-05, "loss": 0.24213027954101562, "step": 1944 }, { "epoch": 0.26294888043937475, "grad_norm": 1.2638421058654785, "learning_rate": 2.6910188846615918e-05, "loss": 0.15696048736572266, "step": 1945 }, { "epoch": 0.26308407266582173, "grad_norm": 1.1718541383743286, "learning_rate": 2.6906026869837714e-05, "loss": 0.17064857482910156, "step": 1946 }, { "epoch": 0.2632192648922687, "grad_norm": 0.9279624819755554, "learning_rate": 2.6901862414161222e-05, "loss": 0.1836872100830078, "step": 1947 }, { "epoch": 0.2633544571187157, "grad_norm": 1.4372190237045288, "learning_rate": 2.689769548045351e-05, "loss": 0.18924331665039062, "step": 1948 }, { "epoch": 0.26348964934516267, "grad_norm": 1.2390385866165161, "learning_rate": 2.6893526069582154e-05, "loss": 0.15457820892333984, "step": 1949 }, { "epoch": 0.26362484157160965, "grad_norm": 0.7015904188156128, "learning_rate": 2.6889354182415245e-05, "loss": 0.10678696632385254, "step": 1950 }, { "epoch": 0.2637600337980566, "grad_norm": 0.9320312738418579, "learning_rate": 2.688517981982139e-05, "loss": 0.20911598205566406, "step": 1951 }, { "epoch": 0.2638952260245036, "grad_norm": 1.5073645114898682, "learning_rate": 2.6881002982669723e-05, "loss": 0.1506037712097168, "step": 1952 }, { "epoch": 0.2640304182509506, "grad_norm": 1.2263963222503662, "learning_rate": 2.6876823671829874e-05, "loss": 0.16829490661621094, "step": 1953 }, { "epoch": 0.26416561047739756, "grad_norm": 1.1362329721450806, "learning_rate": 2.6872641888172e-05, "loss": 0.17322063446044922, "step": 1954 }, { "epoch": 0.26430080270384454, "grad_norm": 4.183491230010986, "learning_rate": 2.6868457632566774e-05, "loss": 0.22624492645263672, "step": 1955 }, { "epoch": 0.2644359949302915, "grad_norm": 2.6532790660858154, "learning_rate": 2.6864270905885377e-05, "loss": 0.2388458251953125, "step": 1956 }, { "epoch": 0.2645711871567385, "grad_norm": 2.1013286113739014, "learning_rate": 2.6860081708999515e-05, "loss": 0.15811729431152344, "step": 1957 }, { "epoch": 0.2647063793831855, "grad_norm": 1.4955461025238037, "learning_rate": 2.685589004278139e-05, "loss": 0.28665924072265625, "step": 1958 }, { "epoch": 0.26484157160963245, "grad_norm": 1.7254035472869873, "learning_rate": 2.6851695908103737e-05, "loss": 0.18318557739257812, "step": 1959 }, { "epoch": 0.26497676383607943, "grad_norm": 2.181720495223999, "learning_rate": 2.6847499305839796e-05, "loss": 0.1823415756225586, "step": 1960 }, { "epoch": 0.2651119560625264, "grad_norm": 1.5420656204223633, "learning_rate": 2.684330023686332e-05, "loss": 0.16841506958007812, "step": 1961 }, { "epoch": 0.2652471482889734, "grad_norm": 1.5167038440704346, "learning_rate": 2.6839098702048577e-05, "loss": 0.13259124755859375, "step": 1962 }, { "epoch": 0.26538234051542037, "grad_norm": 4.060940265655518, "learning_rate": 2.683489470227035e-05, "loss": 0.20993614196777344, "step": 1963 }, { "epoch": 0.26551753274186735, "grad_norm": 1.478872299194336, "learning_rate": 2.6830688238403936e-05, "loss": 0.21984004974365234, "step": 1964 }, { "epoch": 0.2656527249683143, "grad_norm": 0.7537587285041809, "learning_rate": 2.682647931132514e-05, "loss": 0.13451480865478516, "step": 1965 }, { "epoch": 0.2657879171947613, "grad_norm": 0.7682000994682312, "learning_rate": 2.682226792191029e-05, "loss": 0.1654500961303711, "step": 1966 }, { "epoch": 0.2659231094212083, "grad_norm": 1.8770439624786377, "learning_rate": 2.681805407103621e-05, "loss": 0.1817154884338379, "step": 1967 }, { "epoch": 0.26605830164765526, "grad_norm": 2.2880940437316895, "learning_rate": 2.6813837759580253e-05, "loss": 0.15549159049987793, "step": 1968 }, { "epoch": 0.26619349387410224, "grad_norm": 1.4084358215332031, "learning_rate": 2.6809618988420274e-05, "loss": 0.2133331298828125, "step": 1969 }, { "epoch": 0.2663286861005492, "grad_norm": 0.6391538977622986, "learning_rate": 2.6805397758434647e-05, "loss": 0.12550926208496094, "step": 1970 }, { "epoch": 0.2664638783269962, "grad_norm": 2.349172592163086, "learning_rate": 2.6801174070502248e-05, "loss": 0.14751052856445312, "step": 1971 }, { "epoch": 0.2665990705534432, "grad_norm": 1.127495288848877, "learning_rate": 2.679694792550248e-05, "loss": 0.2141246795654297, "step": 1972 }, { "epoch": 0.26673426277989015, "grad_norm": 1.4714776277542114, "learning_rate": 2.6792719324315248e-05, "loss": 0.17395401000976562, "step": 1973 }, { "epoch": 0.26686945500633713, "grad_norm": 3.0437843799591064, "learning_rate": 2.678848826782096e-05, "loss": 0.2095479965209961, "step": 1974 }, { "epoch": 0.2670046472327841, "grad_norm": 1.2225019931793213, "learning_rate": 2.678425475690055e-05, "loss": 0.2036905288696289, "step": 1975 }, { "epoch": 0.2671398394592311, "grad_norm": 1.1206014156341553, "learning_rate": 2.6780018792435464e-05, "loss": 0.19078588485717773, "step": 1976 }, { "epoch": 0.26727503168567807, "grad_norm": 1.2350801229476929, "learning_rate": 2.6775780375307645e-05, "loss": 0.19570541381835938, "step": 1977 }, { "epoch": 0.26741022391212504, "grad_norm": 1.7523033618927002, "learning_rate": 2.6771539506399555e-05, "loss": 0.182525634765625, "step": 1978 }, { "epoch": 0.267545416138572, "grad_norm": 1.6022891998291016, "learning_rate": 2.6767296186594165e-05, "loss": 0.14725637435913086, "step": 1979 }, { "epoch": 0.267680608365019, "grad_norm": 1.4451727867126465, "learning_rate": 2.676305041677496e-05, "loss": 0.1767730712890625, "step": 1980 }, { "epoch": 0.267815800591466, "grad_norm": 2.3180975914001465, "learning_rate": 2.675880219782593e-05, "loss": 0.16197192668914795, "step": 1981 }, { "epoch": 0.26795099281791296, "grad_norm": 1.615830898284912, "learning_rate": 2.6754551530631575e-05, "loss": 0.1683483123779297, "step": 1982 }, { "epoch": 0.26808618504435994, "grad_norm": 0.9067396521568298, "learning_rate": 2.6750298416076907e-05, "loss": 0.20182037353515625, "step": 1983 }, { "epoch": 0.2682213772708069, "grad_norm": 2.105661392211914, "learning_rate": 2.674604285504745e-05, "loss": 0.23011016845703125, "step": 1984 }, { "epoch": 0.2683565694972539, "grad_norm": 1.848522424697876, "learning_rate": 2.6741784848429235e-05, "loss": 0.19468402862548828, "step": 1985 }, { "epoch": 0.2684917617237009, "grad_norm": 1.1760467290878296, "learning_rate": 2.67375243971088e-05, "loss": 0.22147274017333984, "step": 1986 }, { "epoch": 0.26862695395014785, "grad_norm": 2.125758171081543, "learning_rate": 2.6733261501973192e-05, "loss": 0.18086528778076172, "step": 1987 }, { "epoch": 0.26876214617659483, "grad_norm": 2.056044578552246, "learning_rate": 2.672899616390997e-05, "loss": 0.15813064575195312, "step": 1988 }, { "epoch": 0.2688973384030418, "grad_norm": 1.2650346755981445, "learning_rate": 2.67247283838072e-05, "loss": 0.2342967987060547, "step": 1989 }, { "epoch": 0.2690325306294888, "grad_norm": 1.013774037361145, "learning_rate": 2.6720458162553457e-05, "loss": 0.1737499237060547, "step": 1990 }, { "epoch": 0.26916772285593576, "grad_norm": 0.9324135184288025, "learning_rate": 2.6716185501037822e-05, "loss": 0.14194679260253906, "step": 1991 }, { "epoch": 0.26930291508238274, "grad_norm": 1.2155331373214722, "learning_rate": 2.671191040014989e-05, "loss": 0.18551063537597656, "step": 1992 }, { "epoch": 0.2694381073088297, "grad_norm": 2.0454673767089844, "learning_rate": 2.6707632860779756e-05, "loss": 0.17071914672851562, "step": 1993 }, { "epoch": 0.2695732995352767, "grad_norm": 1.2182201147079468, "learning_rate": 2.6703352883818024e-05, "loss": 0.2146005630493164, "step": 1994 }, { "epoch": 0.2697084917617237, "grad_norm": 1.9126328229904175, "learning_rate": 2.6699070470155816e-05, "loss": 0.1687793731689453, "step": 1995 }, { "epoch": 0.26984368398817066, "grad_norm": 1.8489106893539429, "learning_rate": 2.669478562068475e-05, "loss": 0.20245933532714844, "step": 1996 }, { "epoch": 0.26997887621461764, "grad_norm": 1.096756935119629, "learning_rate": 2.6690498336296955e-05, "loss": 0.17561301589012146, "step": 1997 }, { "epoch": 0.2701140684410646, "grad_norm": 1.7402915954589844, "learning_rate": 2.6686208617885057e-05, "loss": 0.21321487426757812, "step": 1998 }, { "epoch": 0.2702492606675116, "grad_norm": 0.9610304832458496, "learning_rate": 2.668191646634221e-05, "loss": 0.07543182373046875, "step": 1999 }, { "epoch": 0.27038445289395857, "grad_norm": 2.4527642726898193, "learning_rate": 2.667762188256206e-05, "loss": 0.24045181274414062, "step": 2000 }, { "epoch": 0.27051964512040555, "grad_norm": 0.9730849266052246, "learning_rate": 2.6673324867438764e-05, "loss": 0.14896011352539062, "step": 2001 }, { "epoch": 0.2706548373468526, "grad_norm": 2.105949878692627, "learning_rate": 2.666902542186698e-05, "loss": 0.17322158813476562, "step": 2002 }, { "epoch": 0.27079002957329956, "grad_norm": 2.5108237266540527, "learning_rate": 2.666472354674187e-05, "loss": 0.28958892822265625, "step": 2003 }, { "epoch": 0.27092522179974654, "grad_norm": 2.3679802417755127, "learning_rate": 2.666041924295912e-05, "loss": 0.2319955825805664, "step": 2004 }, { "epoch": 0.2710604140261935, "grad_norm": 1.2818371057510376, "learning_rate": 2.6656112511414902e-05, "loss": 0.1999378204345703, "step": 2005 }, { "epoch": 0.2711956062526405, "grad_norm": 1.5476380586624146, "learning_rate": 2.6651803353005896e-05, "loss": 0.13533973693847656, "step": 2006 }, { "epoch": 0.2713307984790875, "grad_norm": 1.0504987239837646, "learning_rate": 2.66474917686293e-05, "loss": 0.2556438446044922, "step": 2007 }, { "epoch": 0.27146599070553445, "grad_norm": 2.778482437133789, "learning_rate": 2.664317775918281e-05, "loss": 0.18377113342285156, "step": 2008 }, { "epoch": 0.27160118293198143, "grad_norm": 0.7494098544120789, "learning_rate": 2.6638861325564615e-05, "loss": 0.2092266082763672, "step": 2009 }, { "epoch": 0.2717363751584284, "grad_norm": 1.8601988554000854, "learning_rate": 2.6634542468673432e-05, "loss": 0.18447113037109375, "step": 2010 }, { "epoch": 0.2718715673848754, "grad_norm": 2.059854030609131, "learning_rate": 2.663022118940846e-05, "loss": 0.22228622436523438, "step": 2011 }, { "epoch": 0.27200675961132237, "grad_norm": 1.053313970565796, "learning_rate": 2.662589748866942e-05, "loss": 0.18150997161865234, "step": 2012 }, { "epoch": 0.27214195183776935, "grad_norm": 0.9462782740592957, "learning_rate": 2.6621571367356522e-05, "loss": 0.17937421798706055, "step": 2013 }, { "epoch": 0.2722771440642163, "grad_norm": 5.025650501251221, "learning_rate": 2.6617242826370495e-05, "loss": 0.23069477081298828, "step": 2014 }, { "epoch": 0.2724123362906633, "grad_norm": 0.6408719420433044, "learning_rate": 2.661291186661256e-05, "loss": 0.13713550567626953, "step": 2015 }, { "epoch": 0.2725475285171103, "grad_norm": 5.929315567016602, "learning_rate": 2.6608578488984444e-05, "loss": 0.24613189697265625, "step": 2016 }, { "epoch": 0.27268272074355726, "grad_norm": 1.7467080354690552, "learning_rate": 2.6604242694388388e-05, "loss": 0.220123291015625, "step": 2017 }, { "epoch": 0.27281791297000424, "grad_norm": 3.739717960357666, "learning_rate": 2.6599904483727116e-05, "loss": 0.2046041488647461, "step": 2018 }, { "epoch": 0.2729531051964512, "grad_norm": 2.6116576194763184, "learning_rate": 2.6595563857903872e-05, "loss": 0.17149639129638672, "step": 2019 }, { "epoch": 0.2730882974228982, "grad_norm": 1.0223585367202759, "learning_rate": 2.6591220817822405e-05, "loss": 0.17108917236328125, "step": 2020 }, { "epoch": 0.2732234896493452, "grad_norm": 1.5758193731307983, "learning_rate": 2.658687536438694e-05, "loss": 0.2181262969970703, "step": 2021 }, { "epoch": 0.27335868187579215, "grad_norm": 1.0403705835342407, "learning_rate": 2.6582527498502243e-05, "loss": 0.1490325927734375, "step": 2022 }, { "epoch": 0.27349387410223913, "grad_norm": 2.325801134109497, "learning_rate": 2.6578177221073556e-05, "loss": 0.163970947265625, "step": 2023 }, { "epoch": 0.2736290663286861, "grad_norm": 2.478518009185791, "learning_rate": 2.6573824533006628e-05, "loss": 0.23021697998046875, "step": 2024 }, { "epoch": 0.2737642585551331, "grad_norm": 1.0423294305801392, "learning_rate": 2.6569469435207712e-05, "loss": 0.1589512825012207, "step": 2025 }, { "epoch": 0.27389945078158007, "grad_norm": 2.126283884048462, "learning_rate": 2.656511192858356e-05, "loss": 0.15384626388549805, "step": 2026 }, { "epoch": 0.27403464300802705, "grad_norm": 1.1336771249771118, "learning_rate": 2.6560752014041438e-05, "loss": 0.16162919998168945, "step": 2027 }, { "epoch": 0.274169835234474, "grad_norm": 1.238110899925232, "learning_rate": 2.6556389692489098e-05, "loss": 0.19629192352294922, "step": 2028 }, { "epoch": 0.274305027460921, "grad_norm": 1.2082984447479248, "learning_rate": 2.6552024964834795e-05, "loss": 0.23764610290527344, "step": 2029 }, { "epoch": 0.274440219687368, "grad_norm": 2.066488742828369, "learning_rate": 2.6547657831987286e-05, "loss": 0.23218154907226562, "step": 2030 }, { "epoch": 0.27457541191381496, "grad_norm": 1.7499059438705444, "learning_rate": 2.6543288294855843e-05, "loss": 0.16686058044433594, "step": 2031 }, { "epoch": 0.27471060414026194, "grad_norm": 1.5657683610916138, "learning_rate": 2.653891635435022e-05, "loss": 0.16902732849121094, "step": 2032 }, { "epoch": 0.2748457963667089, "grad_norm": 1.5561699867248535, "learning_rate": 2.653454201138068e-05, "loss": 0.1523299217224121, "step": 2033 }, { "epoch": 0.2749809885931559, "grad_norm": 1.3267085552215576, "learning_rate": 2.653016526685798e-05, "loss": 0.20015335083007812, "step": 2034 }, { "epoch": 0.2751161808196029, "grad_norm": 3.6330716609954834, "learning_rate": 2.6525786121693387e-05, "loss": 0.23436641693115234, "step": 2035 }, { "epoch": 0.27525137304604985, "grad_norm": 1.9537291526794434, "learning_rate": 2.652140457679866e-05, "loss": 0.17034339904785156, "step": 2036 }, { "epoch": 0.27538656527249683, "grad_norm": 2.2563674449920654, "learning_rate": 2.6517020633086064e-05, "loss": 0.17695999145507812, "step": 2037 }, { "epoch": 0.2755217574989438, "grad_norm": 1.1365547180175781, "learning_rate": 2.6512634291468354e-05, "loss": 0.18954849243164062, "step": 2038 }, { "epoch": 0.2756569497253908, "grad_norm": 0.710971474647522, "learning_rate": 2.6508245552858792e-05, "loss": 0.13004636764526367, "step": 2039 }, { "epoch": 0.27579214195183777, "grad_norm": 1.1256245374679565, "learning_rate": 2.6503854418171133e-05, "loss": 0.18889522552490234, "step": 2040 }, { "epoch": 0.27592733417828474, "grad_norm": 1.2299513816833496, "learning_rate": 2.6499460888319644e-05, "loss": 0.09887552261352539, "step": 2041 }, { "epoch": 0.2760625264047317, "grad_norm": 4.2272047996521, "learning_rate": 2.6495064964219073e-05, "loss": 0.25049734115600586, "step": 2042 }, { "epoch": 0.2761977186311787, "grad_norm": 0.8845816850662231, "learning_rate": 2.649066664678467e-05, "loss": 0.1443471908569336, "step": 2043 }, { "epoch": 0.2763329108576257, "grad_norm": 1.8426218032836914, "learning_rate": 2.6486265936932205e-05, "loss": 0.23305320739746094, "step": 2044 }, { "epoch": 0.27646810308407266, "grad_norm": 1.6967228651046753, "learning_rate": 2.6481862835577915e-05, "loss": 0.22339248657226562, "step": 2045 }, { "epoch": 0.27660329531051964, "grad_norm": 3.0935661792755127, "learning_rate": 2.6477457343638557e-05, "loss": 0.2148580551147461, "step": 2046 }, { "epoch": 0.2767384875369666, "grad_norm": 3.3383357524871826, "learning_rate": 2.647304946203137e-05, "loss": 0.22873878479003906, "step": 2047 }, { "epoch": 0.2768736797634136, "grad_norm": 1.2659884691238403, "learning_rate": 2.6468639191674106e-05, "loss": 0.14695262908935547, "step": 2048 }, { "epoch": 0.2770088719898606, "grad_norm": 1.1179301738739014, "learning_rate": 2.6464226533485007e-05, "loss": 0.21669387817382812, "step": 2049 }, { "epoch": 0.27714406421630755, "grad_norm": 1.5578583478927612, "learning_rate": 2.6459811488382806e-05, "loss": 0.2233123779296875, "step": 2050 }, { "epoch": 0.27727925644275453, "grad_norm": 1.8321536779403687, "learning_rate": 2.645539405728674e-05, "loss": 0.20200347900390625, "step": 2051 }, { "epoch": 0.2774144486692015, "grad_norm": 2.425370931625366, "learning_rate": 2.6450974241116545e-05, "loss": 0.16612529754638672, "step": 2052 }, { "epoch": 0.2775496408956485, "grad_norm": 1.711129903793335, "learning_rate": 2.644655204079245e-05, "loss": 0.1820354461669922, "step": 2053 }, { "epoch": 0.27768483312209546, "grad_norm": 1.1385349035263062, "learning_rate": 2.6442127457235177e-05, "loss": 0.16158390045166016, "step": 2054 }, { "epoch": 0.27782002534854244, "grad_norm": 1.342136263847351, "learning_rate": 2.6437700491365957e-05, "loss": 0.1511087417602539, "step": 2055 }, { "epoch": 0.2779552175749894, "grad_norm": 0.6681981086730957, "learning_rate": 2.6433271144106495e-05, "loss": 0.17515087127685547, "step": 2056 }, { "epoch": 0.2780904098014364, "grad_norm": 2.1738009452819824, "learning_rate": 2.6428839416379015e-05, "loss": 0.20028305053710938, "step": 2057 }, { "epoch": 0.2782256020278834, "grad_norm": 1.324000358581543, "learning_rate": 2.642440530910622e-05, "loss": 0.19054412841796875, "step": 2058 }, { "epoch": 0.27836079425433036, "grad_norm": 1.2259124517440796, "learning_rate": 2.6419968823211318e-05, "loss": 0.1987910270690918, "step": 2059 }, { "epoch": 0.27849598648077734, "grad_norm": 0.9730979800224304, "learning_rate": 2.641552995961801e-05, "loss": 0.11353349685668945, "step": 2060 }, { "epoch": 0.2786311787072243, "grad_norm": 0.8566688299179077, "learning_rate": 2.6411088719250484e-05, "loss": 0.18774032592773438, "step": 2061 }, { "epoch": 0.2787663709336713, "grad_norm": 1.7345997095108032, "learning_rate": 2.6406645103033442e-05, "loss": 0.20612430572509766, "step": 2062 }, { "epoch": 0.27890156316011827, "grad_norm": 0.6053770780563354, "learning_rate": 2.640219911189206e-05, "loss": 0.13812801241874695, "step": 2063 }, { "epoch": 0.27903675538656525, "grad_norm": 2.2172610759735107, "learning_rate": 2.6397750746752015e-05, "loss": 0.17090511322021484, "step": 2064 }, { "epoch": 0.27917194761301223, "grad_norm": 1.883966326713562, "learning_rate": 2.6393300008539488e-05, "loss": 0.24287033081054688, "step": 2065 }, { "epoch": 0.2793071398394592, "grad_norm": 3.5898597240448, "learning_rate": 2.6388846898181143e-05, "loss": 0.26644325256347656, "step": 2066 }, { "epoch": 0.2794423320659062, "grad_norm": 1.5288238525390625, "learning_rate": 2.6384391416604142e-05, "loss": 0.21924781799316406, "step": 2067 }, { "epoch": 0.27957752429235316, "grad_norm": 3.033341407775879, "learning_rate": 2.6379933564736136e-05, "loss": 0.16727256774902344, "step": 2068 }, { "epoch": 0.27971271651880014, "grad_norm": 2.6416549682617188, "learning_rate": 2.637547334350528e-05, "loss": 0.18758773803710938, "step": 2069 }, { "epoch": 0.2798479087452472, "grad_norm": 0.874432384967804, "learning_rate": 2.637101075384021e-05, "loss": 0.18726778030395508, "step": 2070 }, { "epoch": 0.27998310097169415, "grad_norm": 1.4221959114074707, "learning_rate": 2.636654579667006e-05, "loss": 0.21475791931152344, "step": 2071 }, { "epoch": 0.28011829319814113, "grad_norm": 1.5142220258712769, "learning_rate": 2.6362078472924467e-05, "loss": 0.23212432861328125, "step": 2072 }, { "epoch": 0.2802534854245881, "grad_norm": 1.2241703271865845, "learning_rate": 2.6357608783533545e-05, "loss": 0.2368154525756836, "step": 2073 }, { "epoch": 0.2803886776510351, "grad_norm": 4.463649749755859, "learning_rate": 2.6353136729427907e-05, "loss": 0.23796844482421875, "step": 2074 }, { "epoch": 0.28052386987748207, "grad_norm": 3.8595693111419678, "learning_rate": 2.6348662311538657e-05, "loss": 0.22257232666015625, "step": 2075 }, { "epoch": 0.28065906210392905, "grad_norm": 2.0826025009155273, "learning_rate": 2.6344185530797398e-05, "loss": 0.2516956329345703, "step": 2076 }, { "epoch": 0.280794254330376, "grad_norm": 1.2585065364837646, "learning_rate": 2.633970638813622e-05, "loss": 0.1693572998046875, "step": 2077 }, { "epoch": 0.280929446556823, "grad_norm": 2.590708017349243, "learning_rate": 2.6335224884487698e-05, "loss": 0.2310924530029297, "step": 2078 }, { "epoch": 0.28106463878327, "grad_norm": 1.8109679222106934, "learning_rate": 2.6330741020784905e-05, "loss": 0.1899118423461914, "step": 2079 }, { "epoch": 0.28119983100971696, "grad_norm": 2.415010452270508, "learning_rate": 2.6326254797961415e-05, "loss": 0.19678401947021484, "step": 2080 }, { "epoch": 0.28133502323616394, "grad_norm": 0.8185734152793884, "learning_rate": 2.6321766216951273e-05, "loss": 0.1306161880493164, "step": 2081 }, { "epoch": 0.2814702154626109, "grad_norm": 1.7102469205856323, "learning_rate": 2.631727527868903e-05, "loss": 0.14808940887451172, "step": 2082 }, { "epoch": 0.2816054076890579, "grad_norm": 1.7741369009017944, "learning_rate": 2.6312781984109727e-05, "loss": 0.24209308624267578, "step": 2083 }, { "epoch": 0.2817405999155049, "grad_norm": 2.03049373626709, "learning_rate": 2.6308286334148882e-05, "loss": 0.204376220703125, "step": 2084 }, { "epoch": 0.28187579214195185, "grad_norm": 1.3151803016662598, "learning_rate": 2.630378832974252e-05, "loss": 0.21219635009765625, "step": 2085 }, { "epoch": 0.28201098436839883, "grad_norm": 1.9331953525543213, "learning_rate": 2.6299287971827154e-05, "loss": 0.17891645431518555, "step": 2086 }, { "epoch": 0.2821461765948458, "grad_norm": 0.914463460445404, "learning_rate": 2.629478526133977e-05, "loss": 0.18464088439941406, "step": 2087 }, { "epoch": 0.2822813688212928, "grad_norm": 1.1610910892486572, "learning_rate": 2.6290280199217867e-05, "loss": 0.22725963592529297, "step": 2088 }, { "epoch": 0.28241656104773977, "grad_norm": 0.8498688340187073, "learning_rate": 2.6285772786399424e-05, "loss": 0.1393585205078125, "step": 2089 }, { "epoch": 0.28255175327418675, "grad_norm": 1.1159546375274658, "learning_rate": 2.6281263023822894e-05, "loss": 0.18888092041015625, "step": 2090 }, { "epoch": 0.2826869455006337, "grad_norm": 0.9358221888542175, "learning_rate": 2.627675091242725e-05, "loss": 0.1728057861328125, "step": 2091 }, { "epoch": 0.2828221377270807, "grad_norm": 0.8911249041557312, "learning_rate": 2.627223645315193e-05, "loss": 0.1833019256591797, "step": 2092 }, { "epoch": 0.2829573299535277, "grad_norm": 0.6716177463531494, "learning_rate": 2.6267719646936868e-05, "loss": 0.15749549865722656, "step": 2093 }, { "epoch": 0.28309252217997466, "grad_norm": 0.5911497473716736, "learning_rate": 2.626320049472249e-05, "loss": 0.1805562973022461, "step": 2094 }, { "epoch": 0.28322771440642164, "grad_norm": 3.019662857055664, "learning_rate": 2.6258678997449705e-05, "loss": 0.2205810546875, "step": 2095 }, { "epoch": 0.2833629066328686, "grad_norm": 1.71770441532135, "learning_rate": 2.6254155156059912e-05, "loss": 0.21925830841064453, "step": 2096 }, { "epoch": 0.2834980988593156, "grad_norm": 2.4266555309295654, "learning_rate": 2.6249628971495006e-05, "loss": 0.19247817993164062, "step": 2097 }, { "epoch": 0.2836332910857626, "grad_norm": 2.4780919551849365, "learning_rate": 2.6245100444697353e-05, "loss": 0.19807052612304688, "step": 2098 }, { "epoch": 0.28376848331220955, "grad_norm": 1.7566062211990356, "learning_rate": 2.6240569576609824e-05, "loss": 0.17220664024353027, "step": 2099 }, { "epoch": 0.28390367553865653, "grad_norm": 0.947040855884552, "learning_rate": 2.623603636817577e-05, "loss": 0.15949058532714844, "step": 2100 }, { "epoch": 0.2840388677651035, "grad_norm": 1.0521942377090454, "learning_rate": 2.6231500820339024e-05, "loss": 0.15768814086914062, "step": 2101 }, { "epoch": 0.2841740599915505, "grad_norm": 1.3224241733551025, "learning_rate": 2.6226962934043913e-05, "loss": 0.17158126831054688, "step": 2102 }, { "epoch": 0.28430925221799747, "grad_norm": 0.967298686504364, "learning_rate": 2.622242271023525e-05, "loss": 0.14494800567626953, "step": 2103 }, { "epoch": 0.28444444444444444, "grad_norm": 1.3841006755828857, "learning_rate": 2.6217880149858333e-05, "loss": 0.15308713912963867, "step": 2104 }, { "epoch": 0.2845796366708914, "grad_norm": 1.7650747299194336, "learning_rate": 2.621333525385895e-05, "loss": 0.21657776832580566, "step": 2105 }, { "epoch": 0.2847148288973384, "grad_norm": 2.175161361694336, "learning_rate": 2.6208788023183366e-05, "loss": 0.2273712158203125, "step": 2106 }, { "epoch": 0.2848500211237854, "grad_norm": 2.157089948654175, "learning_rate": 2.6204238458778346e-05, "loss": 0.28598785400390625, "step": 2107 }, { "epoch": 0.28498521335023236, "grad_norm": 0.8159327507019043, "learning_rate": 2.619968656159113e-05, "loss": 0.1382458209991455, "step": 2108 }, { "epoch": 0.28512040557667934, "grad_norm": 2.7145349979400635, "learning_rate": 2.6195132332569445e-05, "loss": 0.2370128631591797, "step": 2109 }, { "epoch": 0.2852555978031263, "grad_norm": 0.8647373914718628, "learning_rate": 2.619057577266151e-05, "loss": 0.18473148345947266, "step": 2110 }, { "epoch": 0.2853907900295733, "grad_norm": 1.1508220434188843, "learning_rate": 2.6186016882816027e-05, "loss": 0.189117431640625, "step": 2111 }, { "epoch": 0.28552598225602027, "grad_norm": 1.8512518405914307, "learning_rate": 2.6181455663982175e-05, "loss": 0.19964981079101562, "step": 2112 }, { "epoch": 0.28566117448246725, "grad_norm": 1.2716434001922607, "learning_rate": 2.6176892117109628e-05, "loss": 0.2062397003173828, "step": 2113 }, { "epoch": 0.28579636670891423, "grad_norm": 1.1229850053787231, "learning_rate": 2.617232624314854e-05, "loss": 0.20491981506347656, "step": 2114 }, { "epoch": 0.2859315589353612, "grad_norm": 0.8642187118530273, "learning_rate": 2.616775804304955e-05, "loss": 0.17128610610961914, "step": 2115 }, { "epoch": 0.2860667511618082, "grad_norm": 0.7997422814369202, "learning_rate": 2.616318751776378e-05, "loss": 0.11554145812988281, "step": 2116 }, { "epoch": 0.28620194338825516, "grad_norm": 0.8639920949935913, "learning_rate": 2.615861466824284e-05, "loss": 0.15627312660217285, "step": 2117 }, { "epoch": 0.28633713561470214, "grad_norm": 0.7817961573600769, "learning_rate": 2.6154039495438825e-05, "loss": 0.16451644897460938, "step": 2118 }, { "epoch": 0.2864723278411491, "grad_norm": 2.4173061847686768, "learning_rate": 2.6149462000304302e-05, "loss": 0.21515464782714844, "step": 2119 }, { "epoch": 0.2866075200675961, "grad_norm": 1.2405390739440918, "learning_rate": 2.6144882183792335e-05, "loss": 0.1426839828491211, "step": 2120 }, { "epoch": 0.2867427122940431, "grad_norm": 1.2057969570159912, "learning_rate": 2.6140300046856468e-05, "loss": 0.1967940330505371, "step": 2121 }, { "epoch": 0.28687790452049006, "grad_norm": 2.0398120880126953, "learning_rate": 2.6135715590450722e-05, "loss": 0.1947154998779297, "step": 2122 }, { "epoch": 0.28701309674693704, "grad_norm": 1.1911096572875977, "learning_rate": 2.6131128815529608e-05, "loss": 0.22559738159179688, "step": 2123 }, { "epoch": 0.287148288973384, "grad_norm": 2.6959095001220703, "learning_rate": 2.6126539723048115e-05, "loss": 0.25240516662597656, "step": 2124 }, { "epoch": 0.287283481199831, "grad_norm": 1.7841055393218994, "learning_rate": 2.612194831396172e-05, "loss": 0.15496039390563965, "step": 2125 }, { "epoch": 0.28741867342627797, "grad_norm": 1.9539332389831543, "learning_rate": 2.611735458922637e-05, "loss": 0.2016277313232422, "step": 2126 }, { "epoch": 0.28755386565272495, "grad_norm": 1.4427502155303955, "learning_rate": 2.6112758549798515e-05, "loss": 0.19338607788085938, "step": 2127 }, { "epoch": 0.2876890578791719, "grad_norm": 1.5410175323486328, "learning_rate": 2.610816019663507e-05, "loss": 0.24729251861572266, "step": 2128 }, { "epoch": 0.2878242501056189, "grad_norm": 1.2036269903182983, "learning_rate": 2.6103559530693436e-05, "loss": 0.23131179809570312, "step": 2129 }, { "epoch": 0.2879594423320659, "grad_norm": 1.691206455230713, "learning_rate": 2.6098956552931495e-05, "loss": 0.20843935012817383, "step": 2130 }, { "epoch": 0.28809463455851286, "grad_norm": 1.5262420177459717, "learning_rate": 2.6094351264307613e-05, "loss": 0.20964813232421875, "step": 2131 }, { "epoch": 0.28822982678495984, "grad_norm": 1.7162680625915527, "learning_rate": 2.6089743665780635e-05, "loss": 0.2512989044189453, "step": 2132 }, { "epoch": 0.2883650190114068, "grad_norm": 2.3404204845428467, "learning_rate": 2.6085133758309887e-05, "loss": 0.20935297012329102, "step": 2133 }, { "epoch": 0.2885002112378538, "grad_norm": 1.238438606262207, "learning_rate": 2.6080521542855182e-05, "loss": 0.2020893096923828, "step": 2134 }, { "epoch": 0.2886354034643008, "grad_norm": 1.7752658128738403, "learning_rate": 2.60759070203768e-05, "loss": 0.17306995391845703, "step": 2135 }, { "epoch": 0.28877059569074776, "grad_norm": 1.4922157526016235, "learning_rate": 2.607129019183551e-05, "loss": 0.13219031691551208, "step": 2136 }, { "epoch": 0.28890578791719473, "grad_norm": 1.0802258253097534, "learning_rate": 2.6066671058192566e-05, "loss": 0.16423606872558594, "step": 2137 }, { "epoch": 0.28904098014364177, "grad_norm": 1.4209825992584229, "learning_rate": 2.606204962040969e-05, "loss": 0.20698165893554688, "step": 2138 }, { "epoch": 0.28917617237008875, "grad_norm": 1.1673367023468018, "learning_rate": 2.6057425879449095e-05, "loss": 0.1962413787841797, "step": 2139 }, { "epoch": 0.2893113645965357, "grad_norm": 2.984081268310547, "learning_rate": 2.605279983627347e-05, "loss": 0.17170333862304688, "step": 2140 }, { "epoch": 0.2894465568229827, "grad_norm": 1.107867956161499, "learning_rate": 2.6048171491845974e-05, "loss": 0.20021629333496094, "step": 2141 }, { "epoch": 0.2895817490494297, "grad_norm": 2.630889892578125, "learning_rate": 2.604354084713026e-05, "loss": 0.16148757934570312, "step": 2142 }, { "epoch": 0.28971694127587666, "grad_norm": 2.610772132873535, "learning_rate": 2.6038907903090446e-05, "loss": 0.1705150604248047, "step": 2143 }, { "epoch": 0.28985213350232364, "grad_norm": 1.5406900644302368, "learning_rate": 2.6034272660691143e-05, "loss": 0.19655418395996094, "step": 2144 }, { "epoch": 0.2899873257287706, "grad_norm": 3.0631935596466064, "learning_rate": 2.6029635120897434e-05, "loss": 0.1822071075439453, "step": 2145 }, { "epoch": 0.2901225179552176, "grad_norm": 1.0158112049102783, "learning_rate": 2.6024995284674867e-05, "loss": 0.21086883544921875, "step": 2146 }, { "epoch": 0.2902577101816646, "grad_norm": 1.1145544052124023, "learning_rate": 2.6020353152989496e-05, "loss": 0.2055506706237793, "step": 2147 }, { "epoch": 0.29039290240811155, "grad_norm": 1.0146127939224243, "learning_rate": 2.601570872680783e-05, "loss": 0.15993070602416992, "step": 2148 }, { "epoch": 0.29052809463455853, "grad_norm": 1.011948585510254, "learning_rate": 2.6011062007096857e-05, "loss": 0.1594257354736328, "step": 2149 }, { "epoch": 0.2906632868610055, "grad_norm": 1.379586935043335, "learning_rate": 2.6006412994824067e-05, "loss": 0.2632780075073242, "step": 2150 }, { "epoch": 0.2907984790874525, "grad_norm": 1.061566710472107, "learning_rate": 2.6001761690957388e-05, "loss": 0.1822139024734497, "step": 2151 }, { "epoch": 0.29093367131389947, "grad_norm": 1.3806735277175903, "learning_rate": 2.5997108096465263e-05, "loss": 0.1765270233154297, "step": 2152 }, { "epoch": 0.29106886354034645, "grad_norm": 1.530015230178833, "learning_rate": 2.599245221231659e-05, "loss": 0.24334716796875, "step": 2153 }, { "epoch": 0.2912040557667934, "grad_norm": 0.8990092277526855, "learning_rate": 2.5987794039480743e-05, "loss": 0.15089893341064453, "step": 2154 }, { "epoch": 0.2913392479932404, "grad_norm": 1.0194252729415894, "learning_rate": 2.5983133578927584e-05, "loss": 0.17722558975219727, "step": 2155 }, { "epoch": 0.2914744402196874, "grad_norm": 0.8056871891021729, "learning_rate": 2.5978470831627444e-05, "loss": 0.16796493530273438, "step": 2156 }, { "epoch": 0.29160963244613436, "grad_norm": 0.8277191519737244, "learning_rate": 2.597380579855113e-05, "loss": 0.1648712158203125, "step": 2157 }, { "epoch": 0.29174482467258134, "grad_norm": 1.2488585710525513, "learning_rate": 2.5969138480669936e-05, "loss": 0.14107894897460938, "step": 2158 }, { "epoch": 0.2918800168990283, "grad_norm": 1.1313446760177612, "learning_rate": 2.5964468878955614e-05, "loss": 0.2240290641784668, "step": 2159 }, { "epoch": 0.2920152091254753, "grad_norm": 2.055948257446289, "learning_rate": 2.5959796994380397e-05, "loss": 0.2697296142578125, "step": 2160 }, { "epoch": 0.2921504013519223, "grad_norm": 1.0354812145233154, "learning_rate": 2.5955122827917004e-05, "loss": 0.17512941360473633, "step": 2161 }, { "epoch": 0.29228559357836925, "grad_norm": 1.991715669631958, "learning_rate": 2.595044638053862e-05, "loss": 0.22813034057617188, "step": 2162 }, { "epoch": 0.29242078580481623, "grad_norm": 1.2075145244598389, "learning_rate": 2.59457676532189e-05, "loss": 0.20118427276611328, "step": 2163 }, { "epoch": 0.2925559780312632, "grad_norm": 1.4355688095092773, "learning_rate": 2.594108664693199e-05, "loss": 0.14262676239013672, "step": 2164 }, { "epoch": 0.2926911702577102, "grad_norm": 0.989732027053833, "learning_rate": 2.5936403362652494e-05, "loss": 0.14153289794921875, "step": 2165 }, { "epoch": 0.29282636248415717, "grad_norm": 0.8272646069526672, "learning_rate": 2.5931717801355497e-05, "loss": 0.16473770141601562, "step": 2166 }, { "epoch": 0.29296155471060414, "grad_norm": 1.2441359758377075, "learning_rate": 2.5927029964016556e-05, "loss": 0.2236347198486328, "step": 2167 }, { "epoch": 0.2930967469370511, "grad_norm": 1.6876704692840576, "learning_rate": 2.592233985161171e-05, "loss": 0.25666236877441406, "step": 2168 }, { "epoch": 0.2932319391634981, "grad_norm": 2.8428456783294678, "learning_rate": 2.5917647465117463e-05, "loss": 0.18865680694580078, "step": 2169 }, { "epoch": 0.2933671313899451, "grad_norm": 2.0724146366119385, "learning_rate": 2.591295280551079e-05, "loss": 0.20254135131835938, "step": 2170 }, { "epoch": 0.29350232361639206, "grad_norm": 1.1036986112594604, "learning_rate": 2.590825587376915e-05, "loss": 0.2016429901123047, "step": 2171 }, { "epoch": 0.29363751584283904, "grad_norm": 1.9135407209396362, "learning_rate": 2.5903556670870464e-05, "loss": 0.19191265106201172, "step": 2172 }, { "epoch": 0.293772708069286, "grad_norm": 1.7431026697158813, "learning_rate": 2.589885519779314e-05, "loss": 0.18315362930297852, "step": 2173 }, { "epoch": 0.293907900295733, "grad_norm": 2.34696626663208, "learning_rate": 2.5894151455516043e-05, "loss": 0.18303298950195312, "step": 2174 }, { "epoch": 0.29404309252217997, "grad_norm": 3.4167516231536865, "learning_rate": 2.5889445445018513e-05, "loss": 0.2373952865600586, "step": 2175 }, { "epoch": 0.29417828474862695, "grad_norm": 1.6996597051620483, "learning_rate": 2.5884737167280375e-05, "loss": 0.21302413940429688, "step": 2176 }, { "epoch": 0.29431347697507393, "grad_norm": 2.1689019203186035, "learning_rate": 2.5880026623281914e-05, "loss": 0.180267333984375, "step": 2177 }, { "epoch": 0.2944486692015209, "grad_norm": 1.6127623319625854, "learning_rate": 2.5875313814003892e-05, "loss": 0.22878265380859375, "step": 2178 }, { "epoch": 0.2945838614279679, "grad_norm": 1.3531789779663086, "learning_rate": 2.587059874042754e-05, "loss": 0.13474464416503906, "step": 2179 }, { "epoch": 0.29471905365441486, "grad_norm": 2.838716745376587, "learning_rate": 2.5865881403534557e-05, "loss": 0.1846942901611328, "step": 2180 }, { "epoch": 0.29485424588086184, "grad_norm": 3.6311187744140625, "learning_rate": 2.5861161804307124e-05, "loss": 0.218994140625, "step": 2181 }, { "epoch": 0.2949894381073088, "grad_norm": 1.2332576513290405, "learning_rate": 2.5856439943727886e-05, "loss": 0.1806640625, "step": 2182 }, { "epoch": 0.2951246303337558, "grad_norm": 2.6649587154388428, "learning_rate": 2.5851715822779954e-05, "loss": 0.2530508041381836, "step": 2183 }, { "epoch": 0.2952598225602028, "grad_norm": 1.1564098596572876, "learning_rate": 2.5846989442446926e-05, "loss": 0.2039661407470703, "step": 2184 }, { "epoch": 0.29539501478664976, "grad_norm": 1.3246471881866455, "learning_rate": 2.584226080371285e-05, "loss": 0.1866436004638672, "step": 2185 }, { "epoch": 0.29553020701309674, "grad_norm": 1.6912167072296143, "learning_rate": 2.5837529907562258e-05, "loss": 0.17899036407470703, "step": 2186 }, { "epoch": 0.2956653992395437, "grad_norm": 1.0368320941925049, "learning_rate": 2.5832796754980138e-05, "loss": 0.1720564365386963, "step": 2187 }, { "epoch": 0.2958005914659907, "grad_norm": 1.5400532484054565, "learning_rate": 2.5828061346951974e-05, "loss": 0.2284374237060547, "step": 2188 }, { "epoch": 0.29593578369243767, "grad_norm": 1.1747688055038452, "learning_rate": 2.5823323684463693e-05, "loss": 0.1628575325012207, "step": 2189 }, { "epoch": 0.29607097591888465, "grad_norm": 1.5635459423065186, "learning_rate": 2.5818583768501708e-05, "loss": 0.1734914779663086, "step": 2190 }, { "epoch": 0.2962061681453316, "grad_norm": 2.3111798763275146, "learning_rate": 2.5813841600052887e-05, "loss": 0.179046630859375, "step": 2191 }, { "epoch": 0.2963413603717786, "grad_norm": 2.208799362182617, "learning_rate": 2.580909718010458e-05, "loss": 0.22995758056640625, "step": 2192 }, { "epoch": 0.2964765525982256, "grad_norm": 1.7966690063476562, "learning_rate": 2.58043505096446e-05, "loss": 0.18059682846069336, "step": 2193 }, { "epoch": 0.29661174482467256, "grad_norm": 2.3113040924072266, "learning_rate": 2.5799601589661223e-05, "loss": 0.23362350463867188, "step": 2194 }, { "epoch": 0.29674693705111954, "grad_norm": 0.5754366517066956, "learning_rate": 2.579485042114321e-05, "loss": 0.11508560180664062, "step": 2195 }, { "epoch": 0.2968821292775665, "grad_norm": 1.8896969556808472, "learning_rate": 2.5790097005079766e-05, "loss": 0.1593233346939087, "step": 2196 }, { "epoch": 0.2970173215040135, "grad_norm": 2.7346065044403076, "learning_rate": 2.5785341342460595e-05, "loss": 0.1968708038330078, "step": 2197 }, { "epoch": 0.2971525137304605, "grad_norm": 1.0837639570236206, "learning_rate": 2.5780583434275837e-05, "loss": 0.15680599212646484, "step": 2198 }, { "epoch": 0.29728770595690746, "grad_norm": 1.816139817237854, "learning_rate": 2.577582328151612e-05, "loss": 0.1582932472229004, "step": 2199 }, { "epoch": 0.29742289818335443, "grad_norm": 1.0152108669281006, "learning_rate": 2.5771060885172532e-05, "loss": 0.15219879150390625, "step": 2200 }, { "epoch": 0.2975580904098014, "grad_norm": 3.9077744483947754, "learning_rate": 2.5766296246236628e-05, "loss": 0.22844886779785156, "step": 2201 }, { "epoch": 0.2976932826362484, "grad_norm": 1.4241547584533691, "learning_rate": 2.5761529365700437e-05, "loss": 0.1748981475830078, "step": 2202 }, { "epoch": 0.29782847486269537, "grad_norm": 2.3375051021575928, "learning_rate": 2.5756760244556445e-05, "loss": 0.14005756378173828, "step": 2203 }, { "epoch": 0.29796366708914235, "grad_norm": 1.7697644233703613, "learning_rate": 2.5751988883797603e-05, "loss": 0.15149545669555664, "step": 2204 }, { "epoch": 0.2980988593155893, "grad_norm": 6.114085674285889, "learning_rate": 2.574721528441734e-05, "loss": 0.28840065002441406, "step": 2205 }, { "epoch": 0.29823405154203636, "grad_norm": 3.2697112560272217, "learning_rate": 2.5742439447409545e-05, "loss": 0.2531318664550781, "step": 2206 }, { "epoch": 0.29836924376848334, "grad_norm": 2.8559587001800537, "learning_rate": 2.5737661373768568e-05, "loss": 0.23653221130371094, "step": 2207 }, { "epoch": 0.2985044359949303, "grad_norm": 1.164550542831421, "learning_rate": 2.5732881064489237e-05, "loss": 0.17854595184326172, "step": 2208 }, { "epoch": 0.2986396282213773, "grad_norm": 1.0126031637191772, "learning_rate": 2.572809852056683e-05, "loss": 0.15370619297027588, "step": 2209 }, { "epoch": 0.2987748204478243, "grad_norm": 1.3241701126098633, "learning_rate": 2.572331374299711e-05, "loss": 0.2446765899658203, "step": 2210 }, { "epoch": 0.29891001267427125, "grad_norm": 1.987065076828003, "learning_rate": 2.5718526732776276e-05, "loss": 0.2176513671875, "step": 2211 }, { "epoch": 0.29904520490071823, "grad_norm": 2.3771207332611084, "learning_rate": 2.5713737490901023e-05, "loss": 0.23114395141601562, "step": 2212 }, { "epoch": 0.2991803971271652, "grad_norm": 2.6928560733795166, "learning_rate": 2.570894601836849e-05, "loss": 0.19826126098632812, "step": 2213 }, { "epoch": 0.2993155893536122, "grad_norm": 3.1843361854553223, "learning_rate": 2.5704152316176287e-05, "loss": 0.19635391235351562, "step": 2214 }, { "epoch": 0.29945078158005917, "grad_norm": 3.18815541267395, "learning_rate": 2.5699356385322487e-05, "loss": 0.25201416015625, "step": 2215 }, { "epoch": 0.29958597380650615, "grad_norm": 1.2190203666687012, "learning_rate": 2.5694558226805643e-05, "loss": 0.16676998138427734, "step": 2216 }, { "epoch": 0.2997211660329531, "grad_norm": 0.8309981226921082, "learning_rate": 2.568975784162474e-05, "loss": 0.18161582946777344, "step": 2217 }, { "epoch": 0.2998563582594001, "grad_norm": 0.6892561316490173, "learning_rate": 2.5684955230779245e-05, "loss": 0.12321281433105469, "step": 2218 }, { "epoch": 0.2999915504858471, "grad_norm": 1.2423349618911743, "learning_rate": 2.5680150395269096e-05, "loss": 0.2083759307861328, "step": 2219 }, { "epoch": 0.30012674271229406, "grad_norm": 1.6214042901992798, "learning_rate": 2.5675343336094683e-05, "loss": 0.1895122528076172, "step": 2220 }, { "epoch": 0.30026193493874104, "grad_norm": 0.8464069366455078, "learning_rate": 2.5670534054256855e-05, "loss": 0.17712879180908203, "step": 2221 }, { "epoch": 0.300397127165188, "grad_norm": 0.580012321472168, "learning_rate": 2.5665722550756937e-05, "loss": 0.1158294677734375, "step": 2222 }, { "epoch": 0.300532319391635, "grad_norm": 2.591585159301758, "learning_rate": 2.5660908826596707e-05, "loss": 0.22759437561035156, "step": 2223 }, { "epoch": 0.300667511618082, "grad_norm": 3.2795629501342773, "learning_rate": 2.5656092882778413e-05, "loss": 0.23479270935058594, "step": 2224 }, { "epoch": 0.30080270384452895, "grad_norm": 1.3287330865859985, "learning_rate": 2.565127472030475e-05, "loss": 0.20707225799560547, "step": 2225 }, { "epoch": 0.30093789607097593, "grad_norm": 1.2254912853240967, "learning_rate": 2.5646454340178894e-05, "loss": 0.13736248016357422, "step": 2226 }, { "epoch": 0.3010730882974229, "grad_norm": 2.279569149017334, "learning_rate": 2.564163174340447e-05, "loss": 0.16182327270507812, "step": 2227 }, { "epoch": 0.3012082805238699, "grad_norm": 1.6269927024841309, "learning_rate": 2.5636806930985565e-05, "loss": 0.22962188720703125, "step": 2228 }, { "epoch": 0.30134347275031687, "grad_norm": 1.3123782873153687, "learning_rate": 2.5631979903926738e-05, "loss": 0.18151378631591797, "step": 2229 }, { "epoch": 0.30147866497676384, "grad_norm": 1.6872673034667969, "learning_rate": 2.5627150663233e-05, "loss": 0.18174314498901367, "step": 2230 }, { "epoch": 0.3016138572032108, "grad_norm": 1.3398396968841553, "learning_rate": 2.5622319209909817e-05, "loss": 0.17935657501220703, "step": 2231 }, { "epoch": 0.3017490494296578, "grad_norm": 1.0318453311920166, "learning_rate": 2.5617485544963135e-05, "loss": 0.1580047607421875, "step": 2232 }, { "epoch": 0.3018842416561048, "grad_norm": 1.3219295740127563, "learning_rate": 2.561264966939934e-05, "loss": 0.1755967140197754, "step": 2233 }, { "epoch": 0.30201943388255176, "grad_norm": 1.2998502254486084, "learning_rate": 2.5607811584225294e-05, "loss": 0.17787599563598633, "step": 2234 }, { "epoch": 0.30215462610899874, "grad_norm": 0.6230987906455994, "learning_rate": 2.5602971290448305e-05, "loss": 0.14884185791015625, "step": 2235 }, { "epoch": 0.3022898183354457, "grad_norm": 4.1620354652404785, "learning_rate": 2.5598128789076152e-05, "loss": 0.1973257064819336, "step": 2236 }, { "epoch": 0.3024250105618927, "grad_norm": 2.0815341472625732, "learning_rate": 2.559328408111707e-05, "loss": 0.1889514923095703, "step": 2237 }, { "epoch": 0.30256020278833967, "grad_norm": 3.2900888919830322, "learning_rate": 2.5588437167579755e-05, "loss": 0.23741531372070312, "step": 2238 }, { "epoch": 0.30269539501478665, "grad_norm": 2.445986747741699, "learning_rate": 2.558358804947335e-05, "loss": 0.2249622344970703, "step": 2239 }, { "epoch": 0.30283058724123363, "grad_norm": 1.1287540197372437, "learning_rate": 2.557873672780748e-05, "loss": 0.14392662048339844, "step": 2240 }, { "epoch": 0.3029657794676806, "grad_norm": 1.2125771045684814, "learning_rate": 2.557388320359221e-05, "loss": 0.1726994514465332, "step": 2241 }, { "epoch": 0.3031009716941276, "grad_norm": 1.1019476652145386, "learning_rate": 2.5569027477838068e-05, "loss": 0.21356868743896484, "step": 2242 }, { "epoch": 0.30323616392057456, "grad_norm": 1.0761303901672363, "learning_rate": 2.5564169551556044e-05, "loss": 0.17145156860351562, "step": 2243 }, { "epoch": 0.30337135614702154, "grad_norm": 2.25685453414917, "learning_rate": 2.5559309425757586e-05, "loss": 0.236846923828125, "step": 2244 }, { "epoch": 0.3035065483734685, "grad_norm": 1.376454472541809, "learning_rate": 2.5554447101454597e-05, "loss": 0.23953962326049805, "step": 2245 }, { "epoch": 0.3036417405999155, "grad_norm": 3.6749227046966553, "learning_rate": 2.554958257965944e-05, "loss": 0.19617700576782227, "step": 2246 }, { "epoch": 0.3037769328263625, "grad_norm": 2.1911346912384033, "learning_rate": 2.554471586138493e-05, "loss": 0.155059814453125, "step": 2247 }, { "epoch": 0.30391212505280946, "grad_norm": 1.566085696220398, "learning_rate": 2.5539846947644342e-05, "loss": 0.1541481614112854, "step": 2248 }, { "epoch": 0.30404731727925643, "grad_norm": 1.2843924760818481, "learning_rate": 2.5534975839451416e-05, "loss": 0.19762039184570312, "step": 2249 }, { "epoch": 0.3041825095057034, "grad_norm": 1.8344876766204834, "learning_rate": 2.5530102537820348e-05, "loss": 0.18524169921875, "step": 2250 }, { "epoch": 0.3043177017321504, "grad_norm": 2.5840392112731934, "learning_rate": 2.5525227043765774e-05, "loss": 0.22797775268554688, "step": 2251 }, { "epoch": 0.30445289395859737, "grad_norm": 3.2412056922912598, "learning_rate": 2.55203493583028e-05, "loss": 0.20225143432617188, "step": 2252 }, { "epoch": 0.30458808618504435, "grad_norm": 4.206811904907227, "learning_rate": 2.551546948244699e-05, "loss": 0.16933536529541016, "step": 2253 }, { "epoch": 0.3047232784114913, "grad_norm": 1.2663875818252563, "learning_rate": 2.551058741721436e-05, "loss": 0.2192707061767578, "step": 2254 }, { "epoch": 0.3048584706379383, "grad_norm": 0.8034036755561829, "learning_rate": 2.550570316362138e-05, "loss": 0.18777036666870117, "step": 2255 }, { "epoch": 0.3049936628643853, "grad_norm": 1.8470826148986816, "learning_rate": 2.5500816722684975e-05, "loss": 0.1671457290649414, "step": 2256 }, { "epoch": 0.30512885509083226, "grad_norm": 0.9032037258148193, "learning_rate": 2.549592809542253e-05, "loss": 0.16002678871154785, "step": 2257 }, { "epoch": 0.30526404731727924, "grad_norm": 1.2584655284881592, "learning_rate": 2.549103728285189e-05, "loss": 0.20996952056884766, "step": 2258 }, { "epoch": 0.3053992395437262, "grad_norm": 1.2696239948272705, "learning_rate": 2.548614428599134e-05, "loss": 0.16689586639404297, "step": 2259 }, { "epoch": 0.3055344317701732, "grad_norm": 1.2131980657577515, "learning_rate": 2.5481249105859633e-05, "loss": 0.13894343376159668, "step": 2260 }, { "epoch": 0.3056696239966202, "grad_norm": 1.2188817262649536, "learning_rate": 2.5476351743475964e-05, "loss": 0.19422650337219238, "step": 2261 }, { "epoch": 0.30580481622306716, "grad_norm": 1.53557550907135, "learning_rate": 2.547145219986e-05, "loss": 0.2347869873046875, "step": 2262 }, { "epoch": 0.30594000844951413, "grad_norm": 1.2418861389160156, "learning_rate": 2.5466550476031846e-05, "loss": 0.15057945251464844, "step": 2263 }, { "epoch": 0.3060752006759611, "grad_norm": 0.8434281349182129, "learning_rate": 2.5461646573012072e-05, "loss": 0.17255020141601562, "step": 2264 }, { "epoch": 0.3062103929024081, "grad_norm": 1.88532292842865, "learning_rate": 2.5456740491821687e-05, "loss": 0.15487957000732422, "step": 2265 }, { "epoch": 0.30634558512885507, "grad_norm": 1.6142040491104126, "learning_rate": 2.5451832233482172e-05, "loss": 0.19624710083007812, "step": 2266 }, { "epoch": 0.30648077735530205, "grad_norm": 1.7512913942337036, "learning_rate": 2.544692179901545e-05, "loss": 0.21172046661376953, "step": 2267 }, { "epoch": 0.306615969581749, "grad_norm": 3.562217950820923, "learning_rate": 2.5442009189443902e-05, "loss": 0.15139293670654297, "step": 2268 }, { "epoch": 0.306751161808196, "grad_norm": 1.0516456365585327, "learning_rate": 2.5437094405790355e-05, "loss": 0.1160745620727539, "step": 2269 }, { "epoch": 0.306886354034643, "grad_norm": 2.5979087352752686, "learning_rate": 2.5432177449078096e-05, "loss": 0.18936586380004883, "step": 2270 }, { "epoch": 0.30702154626108996, "grad_norm": 0.7907645106315613, "learning_rate": 2.5427258320330857e-05, "loss": 0.09168052673339844, "step": 2271 }, { "epoch": 0.30715673848753694, "grad_norm": 1.0355113744735718, "learning_rate": 2.5422337020572835e-05, "loss": 0.16750812530517578, "step": 2272 }, { "epoch": 0.3072919307139839, "grad_norm": 1.8528800010681152, "learning_rate": 2.5417413550828667e-05, "loss": 0.17857742309570312, "step": 2273 }, { "epoch": 0.30742712294043095, "grad_norm": 1.2584067583084106, "learning_rate": 2.5412487912123444e-05, "loss": 0.18100261688232422, "step": 2274 }, { "epoch": 0.30756231516687793, "grad_norm": 1.2099308967590332, "learning_rate": 2.5407560105482708e-05, "loss": 0.1762828826904297, "step": 2275 }, { "epoch": 0.3076975073933249, "grad_norm": 2.1157209873199463, "learning_rate": 2.540263013193246e-05, "loss": 0.20116424560546875, "step": 2276 }, { "epoch": 0.3078326996197719, "grad_norm": 1.435807466506958, "learning_rate": 2.539769799249915e-05, "loss": 0.21176719665527344, "step": 2277 }, { "epoch": 0.30796789184621887, "grad_norm": 1.0198616981506348, "learning_rate": 2.5392763688209666e-05, "loss": 0.1684408187866211, "step": 2278 }, { "epoch": 0.30810308407266584, "grad_norm": 1.9164719581604004, "learning_rate": 2.5387827220091362e-05, "loss": 0.21175384521484375, "step": 2279 }, { "epoch": 0.3082382762991128, "grad_norm": 1.0155837535858154, "learning_rate": 2.538288858917204e-05, "loss": 0.1824626922607422, "step": 2280 }, { "epoch": 0.3083734685255598, "grad_norm": 0.6968599557876587, "learning_rate": 2.5377947796479936e-05, "loss": 0.1112971305847168, "step": 2281 }, { "epoch": 0.3085086607520068, "grad_norm": 0.8022904396057129, "learning_rate": 2.537300484304377e-05, "loss": 0.15833663940429688, "step": 2282 }, { "epoch": 0.30864385297845376, "grad_norm": 1.8228644132614136, "learning_rate": 2.536805972989267e-05, "loss": 0.14421063661575317, "step": 2283 }, { "epoch": 0.30877904520490074, "grad_norm": 1.1312681436538696, "learning_rate": 2.5363112458056252e-05, "loss": 0.1482563018798828, "step": 2284 }, { "epoch": 0.3089142374313477, "grad_norm": 1.108189582824707, "learning_rate": 2.5358163028564552e-05, "loss": 0.2332611083984375, "step": 2285 }, { "epoch": 0.3090494296577947, "grad_norm": 0.6742943525314331, "learning_rate": 2.535321144244808e-05, "loss": 0.14126014709472656, "step": 2286 }, { "epoch": 0.3091846218842417, "grad_norm": 1.6424269676208496, "learning_rate": 2.534825770073777e-05, "loss": 0.16980934143066406, "step": 2287 }, { "epoch": 0.30931981411068865, "grad_norm": 1.0887466669082642, "learning_rate": 2.5343301804465026e-05, "loss": 0.24367237091064453, "step": 2288 }, { "epoch": 0.30945500633713563, "grad_norm": 1.2062550783157349, "learning_rate": 2.533834375466169e-05, "loss": 0.187713623046875, "step": 2289 }, { "epoch": 0.3095901985635826, "grad_norm": 1.0450830459594727, "learning_rate": 2.533338355236005e-05, "loss": 0.16996359825134277, "step": 2290 }, { "epoch": 0.3097253907900296, "grad_norm": 0.9624519944190979, "learning_rate": 2.532842119859285e-05, "loss": 0.18154144287109375, "step": 2291 }, { "epoch": 0.30986058301647657, "grad_norm": 0.9605568647384644, "learning_rate": 2.532345669439328e-05, "loss": 0.1711254119873047, "step": 2292 }, { "epoch": 0.30999577524292354, "grad_norm": 2.006368637084961, "learning_rate": 2.5318490040794975e-05, "loss": 0.17003393173217773, "step": 2293 }, { "epoch": 0.3101309674693705, "grad_norm": 3.2251060009002686, "learning_rate": 2.531352123883202e-05, "loss": 0.19815653562545776, "step": 2294 }, { "epoch": 0.3102661596958175, "grad_norm": 4.171011447906494, "learning_rate": 2.530855028953894e-05, "loss": 0.23536014556884766, "step": 2295 }, { "epoch": 0.3104013519222645, "grad_norm": 1.8428337574005127, "learning_rate": 2.5303577193950724e-05, "loss": 0.137603759765625, "step": 2296 }, { "epoch": 0.31053654414871146, "grad_norm": 1.8379453420639038, "learning_rate": 2.5298601953102785e-05, "loss": 0.1956644058227539, "step": 2297 }, { "epoch": 0.31067173637515844, "grad_norm": 1.0162596702575684, "learning_rate": 2.5293624568031008e-05, "loss": 0.17864370346069336, "step": 2298 }, { "epoch": 0.3108069286016054, "grad_norm": 1.0062034130096436, "learning_rate": 2.5288645039771697e-05, "loss": 0.15030574798583984, "step": 2299 }, { "epoch": 0.3109421208280524, "grad_norm": 3.293567657470703, "learning_rate": 2.5283663369361624e-05, "loss": 0.13706159591674805, "step": 2300 }, { "epoch": 0.31107731305449937, "grad_norm": 3.191436767578125, "learning_rate": 2.5278679557837998e-05, "loss": 0.2034149169921875, "step": 2301 }, { "epoch": 0.31121250528094635, "grad_norm": 5.852672100067139, "learning_rate": 2.5273693606238474e-05, "loss": 0.24721908569335938, "step": 2302 }, { "epoch": 0.31134769750739333, "grad_norm": 3.2634170055389404, "learning_rate": 2.5268705515601164e-05, "loss": 0.18565797805786133, "step": 2303 }, { "epoch": 0.3114828897338403, "grad_norm": 2.484697103500366, "learning_rate": 2.5263715286964596e-05, "loss": 0.14623737335205078, "step": 2304 }, { "epoch": 0.3116180819602873, "grad_norm": 3.79632568359375, "learning_rate": 2.525872292136778e-05, "loss": 0.2135028839111328, "step": 2305 }, { "epoch": 0.31175327418673426, "grad_norm": 3.831333637237549, "learning_rate": 2.525372841985014e-05, "loss": 0.20622658729553223, "step": 2306 }, { "epoch": 0.31188846641318124, "grad_norm": 0.7250213623046875, "learning_rate": 2.5248731783451567e-05, "loss": 0.16280746459960938, "step": 2307 }, { "epoch": 0.3120236586396282, "grad_norm": 1.3498880863189697, "learning_rate": 2.524373301321238e-05, "loss": 0.23999404907226562, "step": 2308 }, { "epoch": 0.3121588508660752, "grad_norm": 1.2318817377090454, "learning_rate": 2.5238732110173356e-05, "loss": 0.19244003295898438, "step": 2309 }, { "epoch": 0.3122940430925222, "grad_norm": 0.956408679485321, "learning_rate": 2.5233729075375708e-05, "loss": 0.2026979923248291, "step": 2310 }, { "epoch": 0.31242923531896916, "grad_norm": 3.9792239665985107, "learning_rate": 2.522872390986109e-05, "loss": 0.2338409423828125, "step": 2311 }, { "epoch": 0.31256442754541613, "grad_norm": 2.1870150566101074, "learning_rate": 2.522371661467161e-05, "loss": 0.11838912963867188, "step": 2312 }, { "epoch": 0.3126996197718631, "grad_norm": 2.9099535942077637, "learning_rate": 2.521870719084981e-05, "loss": 0.25728273391723633, "step": 2313 }, { "epoch": 0.3128348119983101, "grad_norm": 2.243488073348999, "learning_rate": 2.5213695639438686e-05, "loss": 0.1966552734375, "step": 2314 }, { "epoch": 0.31297000422475707, "grad_norm": 2.1547231674194336, "learning_rate": 2.5208681961481657e-05, "loss": 0.2767219543457031, "step": 2315 }, { "epoch": 0.31310519645120405, "grad_norm": 1.801814079284668, "learning_rate": 2.5203666158022607e-05, "loss": 0.14489269256591797, "step": 2316 }, { "epoch": 0.313240388677651, "grad_norm": 1.970206379890442, "learning_rate": 2.519864823010585e-05, "loss": 0.15406322479248047, "step": 2317 }, { "epoch": 0.313375580904098, "grad_norm": 1.0015075206756592, "learning_rate": 2.5193628178776148e-05, "loss": 0.11316871643066406, "step": 2318 }, { "epoch": 0.313510773130545, "grad_norm": 1.687562108039856, "learning_rate": 2.5188606005078695e-05, "loss": 0.2304668426513672, "step": 2319 }, { "epoch": 0.31364596535699196, "grad_norm": 1.1796659231185913, "learning_rate": 2.518358171005914e-05, "loss": 0.2116527557373047, "step": 2320 }, { "epoch": 0.31378115758343894, "grad_norm": 3.509256601333618, "learning_rate": 2.517855529476357e-05, "loss": 0.23590087890625, "step": 2321 }, { "epoch": 0.3139163498098859, "grad_norm": 1.073394775390625, "learning_rate": 2.517352676023851e-05, "loss": 0.16514205932617188, "step": 2322 }, { "epoch": 0.3140515420363329, "grad_norm": 1.3163100481033325, "learning_rate": 2.5168496107530925e-05, "loss": 0.16283893585205078, "step": 2323 }, { "epoch": 0.3141867342627799, "grad_norm": 1.2644157409667969, "learning_rate": 2.5163463337688224e-05, "loss": 0.19581031799316406, "step": 2324 }, { "epoch": 0.31432192648922685, "grad_norm": 1.6979799270629883, "learning_rate": 2.515842845175826e-05, "loss": 0.15867233276367188, "step": 2325 }, { "epoch": 0.31445711871567383, "grad_norm": 0.714146614074707, "learning_rate": 2.5153391450789326e-05, "loss": 0.15462970733642578, "step": 2326 }, { "epoch": 0.3145923109421208, "grad_norm": 1.330087661743164, "learning_rate": 2.514835233583014e-05, "loss": 0.19726943969726562, "step": 2327 }, { "epoch": 0.3147275031685678, "grad_norm": 0.9222365021705627, "learning_rate": 2.514331110792988e-05, "loss": 0.1748828887939453, "step": 2328 }, { "epoch": 0.31486269539501477, "grad_norm": 2.0946593284606934, "learning_rate": 2.513826776813816e-05, "loss": 0.19808578491210938, "step": 2329 }, { "epoch": 0.31499788762146175, "grad_norm": 3.8943722248077393, "learning_rate": 2.5133222317505024e-05, "loss": 0.22673511505126953, "step": 2330 }, { "epoch": 0.3151330798479087, "grad_norm": 1.473652720451355, "learning_rate": 2.5128174757080965e-05, "loss": 0.19598007202148438, "step": 2331 }, { "epoch": 0.3152682720743557, "grad_norm": 2.0215470790863037, "learning_rate": 2.5123125087916916e-05, "loss": 0.20312118530273438, "step": 2332 }, { "epoch": 0.3154034643008027, "grad_norm": 1.6779415607452393, "learning_rate": 2.5118073311064236e-05, "loss": 0.19646644592285156, "step": 2333 }, { "epoch": 0.31553865652724966, "grad_norm": 1.116431713104248, "learning_rate": 2.5113019427574734e-05, "loss": 0.16550064086914062, "step": 2334 }, { "epoch": 0.31567384875369664, "grad_norm": 1.1136761903762817, "learning_rate": 2.5107963438500666e-05, "loss": 0.15624427795410156, "step": 2335 }, { "epoch": 0.3158090409801436, "grad_norm": 1.9314128160476685, "learning_rate": 2.51029053448947e-05, "loss": 0.18707656860351562, "step": 2336 }, { "epoch": 0.3159442332065906, "grad_norm": 3.9178435802459717, "learning_rate": 2.509784514780997e-05, "loss": 0.1781320571899414, "step": 2337 }, { "epoch": 0.3160794254330376, "grad_norm": 2.66619873046875, "learning_rate": 2.5092782848300033e-05, "loss": 0.1796245574951172, "step": 2338 }, { "epoch": 0.31621461765948455, "grad_norm": 2.299640417098999, "learning_rate": 2.5087718447418886e-05, "loss": 0.20847702026367188, "step": 2339 }, { "epoch": 0.31634980988593153, "grad_norm": 1.359383463859558, "learning_rate": 2.5082651946220958e-05, "loss": 0.2000293731689453, "step": 2340 }, { "epoch": 0.3164850021123785, "grad_norm": 0.9819516539573669, "learning_rate": 2.507758334576113e-05, "loss": 0.1815662384033203, "step": 2341 }, { "epoch": 0.31662019433882554, "grad_norm": 1.1998976469039917, "learning_rate": 2.5072512647094713e-05, "loss": 0.18518829345703125, "step": 2342 }, { "epoch": 0.3167553865652725, "grad_norm": 1.0507601499557495, "learning_rate": 2.506743985127745e-05, "loss": 0.22673416137695312, "step": 2343 }, { "epoch": 0.3168905787917195, "grad_norm": 0.7075194120407104, "learning_rate": 2.506236495936552e-05, "loss": 0.17055320739746094, "step": 2344 }, { "epoch": 0.3170257710181665, "grad_norm": 2.0761756896972656, "learning_rate": 2.5057287972415547e-05, "loss": 0.20184326171875, "step": 2345 }, { "epoch": 0.31716096324461346, "grad_norm": 1.518945336341858, "learning_rate": 2.5052208891484588e-05, "loss": 0.19922256469726562, "step": 2346 }, { "epoch": 0.31729615547106044, "grad_norm": 1.1840366125106812, "learning_rate": 2.504712771763013e-05, "loss": 0.1599903106689453, "step": 2347 }, { "epoch": 0.3174313476975074, "grad_norm": 2.1613547801971436, "learning_rate": 2.5042044451910108e-05, "loss": 0.1673727035522461, "step": 2348 }, { "epoch": 0.3175665399239544, "grad_norm": 2.690495252609253, "learning_rate": 2.5036959095382875e-05, "loss": 0.222930908203125, "step": 2349 }, { "epoch": 0.3177017321504014, "grad_norm": 1.2885096073150635, "learning_rate": 2.5031871649107233e-05, "loss": 0.17171669006347656, "step": 2350 }, { "epoch": 0.31783692437684835, "grad_norm": 2.8969340324401855, "learning_rate": 2.5026782114142426e-05, "loss": 0.22868919372558594, "step": 2351 }, { "epoch": 0.31797211660329533, "grad_norm": 2.9183242321014404, "learning_rate": 2.5021690491548107e-05, "loss": 0.23976516723632812, "step": 2352 }, { "epoch": 0.3181073088297423, "grad_norm": 1.4264113903045654, "learning_rate": 2.5016596782384387e-05, "loss": 0.16781902313232422, "step": 2353 }, { "epoch": 0.3182425010561893, "grad_norm": 1.1937352418899536, "learning_rate": 2.5011500987711804e-05, "loss": 0.26714324951171875, "step": 2354 }, { "epoch": 0.31837769328263626, "grad_norm": 0.9137232899665833, "learning_rate": 2.5006403108591325e-05, "loss": 0.1682300567626953, "step": 2355 }, { "epoch": 0.31851288550908324, "grad_norm": 1.080885648727417, "learning_rate": 2.500130314608436e-05, "loss": 0.19979095458984375, "step": 2356 }, { "epoch": 0.3186480777355302, "grad_norm": 0.6518683433532715, "learning_rate": 2.4996201101252742e-05, "loss": 0.15123838186264038, "step": 2357 }, { "epoch": 0.3187832699619772, "grad_norm": 1.095583438873291, "learning_rate": 2.4991096975158757e-05, "loss": 0.16352367401123047, "step": 2358 }, { "epoch": 0.3189184621884242, "grad_norm": 0.9431547522544861, "learning_rate": 2.4985990768865095e-05, "loss": 0.1791553497314453, "step": 2359 }, { "epoch": 0.31905365441487116, "grad_norm": 0.7539752125740051, "learning_rate": 2.4980882483434904e-05, "loss": 0.16378402709960938, "step": 2360 }, { "epoch": 0.31918884664131814, "grad_norm": 1.3075010776519775, "learning_rate": 2.497577211993176e-05, "loss": 0.20471477508544922, "step": 2361 }, { "epoch": 0.3193240388677651, "grad_norm": 1.2260907888412476, "learning_rate": 2.4970659679419658e-05, "loss": 0.16594409942626953, "step": 2362 }, { "epoch": 0.3194592310942121, "grad_norm": 2.4159138202667236, "learning_rate": 2.496554516296304e-05, "loss": 0.19655799865722656, "step": 2363 }, { "epoch": 0.31959442332065907, "grad_norm": 2.0845017433166504, "learning_rate": 2.4960428571626784e-05, "loss": 0.19893836975097656, "step": 2364 }, { "epoch": 0.31972961554710605, "grad_norm": 1.508702039718628, "learning_rate": 2.4955309906476177e-05, "loss": 0.20336627960205078, "step": 2365 }, { "epoch": 0.31986480777355303, "grad_norm": 1.4192456007003784, "learning_rate": 2.495018916857696e-05, "loss": 0.21660232543945312, "step": 2366 }, { "epoch": 0.32, "grad_norm": 3.0699455738067627, "learning_rate": 2.4945066358995304e-05, "loss": 0.1481180191040039, "step": 2367 }, { "epoch": 0.320135192226447, "grad_norm": 1.7595261335372925, "learning_rate": 2.493994147879779e-05, "loss": 0.19792747497558594, "step": 2368 }, { "epoch": 0.32027038445289396, "grad_norm": 2.8754079341888428, "learning_rate": 2.4934814529051458e-05, "loss": 0.19551849365234375, "step": 2369 }, { "epoch": 0.32040557667934094, "grad_norm": 1.2139326333999634, "learning_rate": 2.4929685510823763e-05, "loss": 0.2159442901611328, "step": 2370 }, { "epoch": 0.3205407689057879, "grad_norm": 2.0129222869873047, "learning_rate": 2.492455442518259e-05, "loss": 0.22185516357421875, "step": 2371 }, { "epoch": 0.3206759611322349, "grad_norm": 1.0887688398361206, "learning_rate": 2.4919421273196262e-05, "loss": 0.18426275253295898, "step": 2372 }, { "epoch": 0.3208111533586819, "grad_norm": 1.4184889793395996, "learning_rate": 2.4914286055933527e-05, "loss": 0.18798828125, "step": 2373 }, { "epoch": 0.32094634558512886, "grad_norm": 1.6306990385055542, "learning_rate": 2.4909148774463572e-05, "loss": 0.2216320037841797, "step": 2374 }, { "epoch": 0.32108153781157583, "grad_norm": 0.8503995537757874, "learning_rate": 2.4904009429855992e-05, "loss": 0.13969135284423828, "step": 2375 }, { "epoch": 0.3212167300380228, "grad_norm": 1.226109266281128, "learning_rate": 2.4898868023180844e-05, "loss": 0.1608600616455078, "step": 2376 }, { "epoch": 0.3213519222644698, "grad_norm": 1.2110795974731445, "learning_rate": 2.4893724555508575e-05, "loss": 0.1925792694091797, "step": 2377 }, { "epoch": 0.32148711449091677, "grad_norm": 1.0036662817001343, "learning_rate": 2.4888579027910105e-05, "loss": 0.1652846336364746, "step": 2378 }, { "epoch": 0.32162230671736375, "grad_norm": 1.193157434463501, "learning_rate": 2.4883431441456738e-05, "loss": 0.2190074920654297, "step": 2379 }, { "epoch": 0.3217574989438107, "grad_norm": 1.5323030948638916, "learning_rate": 2.4878281797220244e-05, "loss": 0.17658305168151855, "step": 2380 }, { "epoch": 0.3218926911702577, "grad_norm": 2.215569496154785, "learning_rate": 2.4873130096272805e-05, "loss": 0.24140548706054688, "step": 2381 }, { "epoch": 0.3220278833967047, "grad_norm": 1.2992013692855835, "learning_rate": 2.4867976339687026e-05, "loss": 0.1514596939086914, "step": 2382 }, { "epoch": 0.32216307562315166, "grad_norm": 2.6177167892456055, "learning_rate": 2.4862820528535955e-05, "loss": 0.1799936294555664, "step": 2383 }, { "epoch": 0.32229826784959864, "grad_norm": 1.3907675743103027, "learning_rate": 2.4857662663893054e-05, "loss": 0.21718692779541016, "step": 2384 }, { "epoch": 0.3224334600760456, "grad_norm": 1.0709302425384521, "learning_rate": 2.485250274683222e-05, "loss": 0.10641193389892578, "step": 2385 }, { "epoch": 0.3225686523024926, "grad_norm": 2.103909492492676, "learning_rate": 2.4847340778427772e-05, "loss": 0.2388477325439453, "step": 2386 }, { "epoch": 0.3227038445289396, "grad_norm": 1.4906790256500244, "learning_rate": 2.484217675975446e-05, "loss": 0.18300247192382812, "step": 2387 }, { "epoch": 0.32283903675538655, "grad_norm": 2.170334815979004, "learning_rate": 2.4837010691887466e-05, "loss": 0.17945575714111328, "step": 2388 }, { "epoch": 0.32297422898183353, "grad_norm": 1.061652660369873, "learning_rate": 2.4831842575902383e-05, "loss": 0.2139759063720703, "step": 2389 }, { "epoch": 0.3231094212082805, "grad_norm": 2.693559408187866, "learning_rate": 2.482667241287525e-05, "loss": 0.1947317123413086, "step": 2390 }, { "epoch": 0.3232446134347275, "grad_norm": 1.53031587600708, "learning_rate": 2.4821500203882517e-05, "loss": 0.15073871612548828, "step": 2391 }, { "epoch": 0.32337980566117447, "grad_norm": 1.3599871397018433, "learning_rate": 2.4816325950001067e-05, "loss": 0.205078125, "step": 2392 }, { "epoch": 0.32351499788762145, "grad_norm": 0.8806572556495667, "learning_rate": 2.4811149652308205e-05, "loss": 0.1681804656982422, "step": 2393 }, { "epoch": 0.3236501901140684, "grad_norm": 2.000258445739746, "learning_rate": 2.480597131188167e-05, "loss": 0.23520851135253906, "step": 2394 }, { "epoch": 0.3237853823405154, "grad_norm": 3.5049009323120117, "learning_rate": 2.4800790929799614e-05, "loss": 0.2604351043701172, "step": 2395 }, { "epoch": 0.3239205745669624, "grad_norm": 2.268362283706665, "learning_rate": 2.4795608507140623e-05, "loss": 0.17398452758789062, "step": 2396 }, { "epoch": 0.32405576679340936, "grad_norm": 1.0210410356521606, "learning_rate": 2.4790424044983705e-05, "loss": 0.21225357055664062, "step": 2397 }, { "epoch": 0.32419095901985634, "grad_norm": 1.2955994606018066, "learning_rate": 2.4785237544408288e-05, "loss": 0.2442169189453125, "step": 2398 }, { "epoch": 0.3243261512463033, "grad_norm": 1.6955705881118774, "learning_rate": 2.478004900649424e-05, "loss": 0.23143577575683594, "step": 2399 }, { "epoch": 0.3244613434727503, "grad_norm": 2.3066468238830566, "learning_rate": 2.477485843232183e-05, "loss": 0.1939082145690918, "step": 2400 }, { "epoch": 0.3245965356991973, "grad_norm": 1.6697825193405151, "learning_rate": 2.476966582297177e-05, "loss": 0.20110034942626953, "step": 2401 }, { "epoch": 0.32473172792564425, "grad_norm": 3.9010043144226074, "learning_rate": 2.4764471179525188e-05, "loss": 0.259124755859375, "step": 2402 }, { "epoch": 0.32486692015209123, "grad_norm": 1.8830945491790771, "learning_rate": 2.4759274503063632e-05, "loss": 0.21721935272216797, "step": 2403 }, { "epoch": 0.3250021123785382, "grad_norm": 0.9610902667045593, "learning_rate": 2.4754075794669088e-05, "loss": 0.1812286376953125, "step": 2404 }, { "epoch": 0.3251373046049852, "grad_norm": 1.3830190896987915, "learning_rate": 2.4748875055423942e-05, "loss": 0.21308517456054688, "step": 2405 }, { "epoch": 0.32527249683143217, "grad_norm": 1.1364787817001343, "learning_rate": 2.4743672286411027e-05, "loss": 0.17693567276000977, "step": 2406 }, { "epoch": 0.32540768905787915, "grad_norm": 1.2751948833465576, "learning_rate": 2.4738467488713582e-05, "loss": 0.20960617065429688, "step": 2407 }, { "epoch": 0.3255428812843261, "grad_norm": 1.0974429845809937, "learning_rate": 2.473326066341527e-05, "loss": 0.1755962371826172, "step": 2408 }, { "epoch": 0.3256780735107731, "grad_norm": 2.473176956176758, "learning_rate": 2.4728051811600184e-05, "loss": 0.20442771911621094, "step": 2409 }, { "epoch": 0.32581326573722014, "grad_norm": 0.8829950094223022, "learning_rate": 2.4722840934352838e-05, "loss": 0.1776590347290039, "step": 2410 }, { "epoch": 0.3259484579636671, "grad_norm": 1.0712145566940308, "learning_rate": 2.471762803275816e-05, "loss": 0.14638614654541016, "step": 2411 }, { "epoch": 0.3260836501901141, "grad_norm": 1.307511329650879, "learning_rate": 2.4712413107901504e-05, "loss": 0.216033935546875, "step": 2412 }, { "epoch": 0.3262188424165611, "grad_norm": 1.1126899719238281, "learning_rate": 2.470719616086865e-05, "loss": 0.2010488510131836, "step": 2413 }, { "epoch": 0.32635403464300805, "grad_norm": 0.8406078219413757, "learning_rate": 2.4701977192745785e-05, "loss": 0.1741199493408203, "step": 2414 }, { "epoch": 0.32648922686945503, "grad_norm": 2.324248790740967, "learning_rate": 2.4696756204619535e-05, "loss": 0.2655830383300781, "step": 2415 }, { "epoch": 0.326624419095902, "grad_norm": 2.4082279205322266, "learning_rate": 2.469153319757693e-05, "loss": 0.261277437210083, "step": 2416 }, { "epoch": 0.326759611322349, "grad_norm": 1.1285223960876465, "learning_rate": 2.4686308172705433e-05, "loss": 0.14336681365966797, "step": 2417 }, { "epoch": 0.32689480354879596, "grad_norm": 1.115792155265808, "learning_rate": 2.4681081131092926e-05, "loss": 0.15285491943359375, "step": 2418 }, { "epoch": 0.32702999577524294, "grad_norm": 1.3496662378311157, "learning_rate": 2.467585207382769e-05, "loss": 0.2096233367919922, "step": 2419 }, { "epoch": 0.3271651880016899, "grad_norm": 1.0363285541534424, "learning_rate": 2.4670621001998467e-05, "loss": 0.1822519302368164, "step": 2420 }, { "epoch": 0.3273003802281369, "grad_norm": 0.9571179151535034, "learning_rate": 2.466538791669437e-05, "loss": 0.1873304843902588, "step": 2421 }, { "epoch": 0.3274355724545839, "grad_norm": 1.0557729005813599, "learning_rate": 2.4660152819004973e-05, "loss": 0.20589971542358398, "step": 2422 }, { "epoch": 0.32757076468103086, "grad_norm": 1.6920645236968994, "learning_rate": 2.4654915710020246e-05, "loss": 0.1682581901550293, "step": 2423 }, { "epoch": 0.32770595690747784, "grad_norm": 1.0892901420593262, "learning_rate": 2.464967659083058e-05, "loss": 0.21057605743408203, "step": 2424 }, { "epoch": 0.3278411491339248, "grad_norm": 2.5834336280822754, "learning_rate": 2.464443546252679e-05, "loss": 0.15996742248535156, "step": 2425 }, { "epoch": 0.3279763413603718, "grad_norm": 1.8357839584350586, "learning_rate": 2.4639192326200104e-05, "loss": 0.16809016466140747, "step": 2426 }, { "epoch": 0.32811153358681877, "grad_norm": 0.9272051453590393, "learning_rate": 2.463394718294218e-05, "loss": 0.10030841827392578, "step": 2427 }, { "epoch": 0.32824672581326575, "grad_norm": 2.409123659133911, "learning_rate": 2.4628700033845072e-05, "loss": 0.17902874946594238, "step": 2428 }, { "epoch": 0.32838191803971273, "grad_norm": 1.497423768043518, "learning_rate": 2.4623450880001268e-05, "loss": 0.24622726440429688, "step": 2429 }, { "epoch": 0.3285171102661597, "grad_norm": 1.3953806161880493, "learning_rate": 2.4618199722503676e-05, "loss": 0.17273902893066406, "step": 2430 }, { "epoch": 0.3286523024926067, "grad_norm": 2.72944712638855, "learning_rate": 2.4612946562445613e-05, "loss": 0.17840099334716797, "step": 2431 }, { "epoch": 0.32878749471905366, "grad_norm": 2.6016595363616943, "learning_rate": 2.460769140092081e-05, "loss": 0.23370933532714844, "step": 2432 }, { "epoch": 0.32892268694550064, "grad_norm": 1.0173238515853882, "learning_rate": 2.460243423902342e-05, "loss": 0.17308282852172852, "step": 2433 }, { "epoch": 0.3290578791719476, "grad_norm": 1.8887403011322021, "learning_rate": 2.459717507784802e-05, "loss": 0.18835830688476562, "step": 2434 }, { "epoch": 0.3291930713983946, "grad_norm": 1.4456491470336914, "learning_rate": 2.459191391848959e-05, "loss": 0.1899099349975586, "step": 2435 }, { "epoch": 0.3293282636248416, "grad_norm": 1.0256524085998535, "learning_rate": 2.4586650762043538e-05, "loss": 0.16742992401123047, "step": 2436 }, { "epoch": 0.32946345585128856, "grad_norm": 1.5991610288619995, "learning_rate": 2.4581385609605665e-05, "loss": 0.18927955627441406, "step": 2437 }, { "epoch": 0.32959864807773553, "grad_norm": 1.734049916267395, "learning_rate": 2.4576118462272218e-05, "loss": 0.22829437255859375, "step": 2438 }, { "epoch": 0.3297338403041825, "grad_norm": 1.7728407382965088, "learning_rate": 2.4570849321139836e-05, "loss": 0.26377248764038086, "step": 2439 }, { "epoch": 0.3298690325306295, "grad_norm": 1.804621696472168, "learning_rate": 2.4565578187305596e-05, "loss": 0.1879110336303711, "step": 2440 }, { "epoch": 0.33000422475707647, "grad_norm": 1.3535054922103882, "learning_rate": 2.456030506186696e-05, "loss": 0.1630997657775879, "step": 2441 }, { "epoch": 0.33013941698352345, "grad_norm": 1.120963454246521, "learning_rate": 2.4555029945921832e-05, "loss": 0.1389904022216797, "step": 2442 }, { "epoch": 0.3302746092099704, "grad_norm": 1.0562878847122192, "learning_rate": 2.4549752840568516e-05, "loss": 0.16719341278076172, "step": 2443 }, { "epoch": 0.3304098014364174, "grad_norm": 2.646899461746216, "learning_rate": 2.4544473746905733e-05, "loss": 0.1888103485107422, "step": 2444 }, { "epoch": 0.3305449936628644, "grad_norm": 1.506170392036438, "learning_rate": 2.4539192666032617e-05, "loss": 0.16521596908569336, "step": 2445 }, { "epoch": 0.33068018588931136, "grad_norm": 0.8474022746086121, "learning_rate": 2.4533909599048718e-05, "loss": 0.1264805793762207, "step": 2446 }, { "epoch": 0.33081537811575834, "grad_norm": 0.7529324889183044, "learning_rate": 2.4528624547054003e-05, "loss": 0.17188549041748047, "step": 2447 }, { "epoch": 0.3309505703422053, "grad_norm": 2.3013992309570312, "learning_rate": 2.4523337511148843e-05, "loss": 0.2099895477294922, "step": 2448 }, { "epoch": 0.3310857625686523, "grad_norm": 2.2117671966552734, "learning_rate": 2.4518048492434028e-05, "loss": 0.26207733154296875, "step": 2449 }, { "epoch": 0.3312209547950993, "grad_norm": 1.1245808601379395, "learning_rate": 2.4512757492010762e-05, "loss": 0.13423165678977966, "step": 2450 }, { "epoch": 0.33135614702154625, "grad_norm": 1.275658369064331, "learning_rate": 2.4507464510980652e-05, "loss": 0.23124170303344727, "step": 2451 }, { "epoch": 0.33149133924799323, "grad_norm": 0.8938910365104675, "learning_rate": 2.450216955044574e-05, "loss": 0.175262451171875, "step": 2452 }, { "epoch": 0.3316265314744402, "grad_norm": 1.8871339559555054, "learning_rate": 2.449687261150845e-05, "loss": 0.2356433868408203, "step": 2453 }, { "epoch": 0.3317617237008872, "grad_norm": 1.3848785161972046, "learning_rate": 2.449157369527164e-05, "loss": 0.17171788215637207, "step": 2454 }, { "epoch": 0.33189691592733417, "grad_norm": 1.3380428552627563, "learning_rate": 2.448627280283857e-05, "loss": 0.16664600372314453, "step": 2455 }, { "epoch": 0.33203210815378115, "grad_norm": 1.6905087232589722, "learning_rate": 2.4480969935312917e-05, "loss": 0.19647979736328125, "step": 2456 }, { "epoch": 0.3321673003802281, "grad_norm": 0.928114652633667, "learning_rate": 2.4475665093798766e-05, "loss": 0.18549537658691406, "step": 2457 }, { "epoch": 0.3323024926066751, "grad_norm": 1.8578013181686401, "learning_rate": 2.447035827940061e-05, "loss": 0.21084284782409668, "step": 2458 }, { "epoch": 0.3324376848331221, "grad_norm": 1.9083585739135742, "learning_rate": 2.4465049493223356e-05, "loss": 0.13677185773849487, "step": 2459 }, { "epoch": 0.33257287705956906, "grad_norm": 1.1070064306259155, "learning_rate": 2.4459738736372327e-05, "loss": 0.22038650512695312, "step": 2460 }, { "epoch": 0.33270806928601604, "grad_norm": 1.0057308673858643, "learning_rate": 2.4454426009953252e-05, "loss": 0.14522600173950195, "step": 2461 }, { "epoch": 0.332843261512463, "grad_norm": 0.8012288212776184, "learning_rate": 2.4449111315072254e-05, "loss": 0.18804454803466797, "step": 2462 }, { "epoch": 0.33297845373891, "grad_norm": 2.2348666191101074, "learning_rate": 2.44437946528359e-05, "loss": 0.24016952514648438, "step": 2463 }, { "epoch": 0.333113645965357, "grad_norm": 1.6972193717956543, "learning_rate": 2.4438476024351138e-05, "loss": 0.15994834899902344, "step": 2464 }, { "epoch": 0.33324883819180395, "grad_norm": 1.1029222011566162, "learning_rate": 2.4433155430725333e-05, "loss": 0.20725440979003906, "step": 2465 }, { "epoch": 0.33338403041825093, "grad_norm": 2.482452630996704, "learning_rate": 2.4427832873066262e-05, "loss": 0.21537017822265625, "step": 2466 }, { "epoch": 0.3335192226446979, "grad_norm": 2.408444404602051, "learning_rate": 2.4422508352482113e-05, "loss": 0.16070270538330078, "step": 2467 }, { "epoch": 0.3336544148711449, "grad_norm": 1.1893521547317505, "learning_rate": 2.441718187008148e-05, "loss": 0.1671581268310547, "step": 2468 }, { "epoch": 0.33378960709759187, "grad_norm": 1.0434621572494507, "learning_rate": 2.441185342697336e-05, "loss": 0.1521902084350586, "step": 2469 }, { "epoch": 0.33392479932403885, "grad_norm": 1.1673152446746826, "learning_rate": 2.440652302426717e-05, "loss": 0.20055389404296875, "step": 2470 }, { "epoch": 0.3340599915504858, "grad_norm": 0.931617259979248, "learning_rate": 2.440119066307272e-05, "loss": 0.1866617202758789, "step": 2471 }, { "epoch": 0.3341951837769328, "grad_norm": 0.9258384704589844, "learning_rate": 2.4395856344500244e-05, "loss": 0.16117477416992188, "step": 2472 }, { "epoch": 0.3343303760033798, "grad_norm": 1.4245527982711792, "learning_rate": 2.4390520069660377e-05, "loss": 0.21545791625976562, "step": 2473 }, { "epoch": 0.33446556822982676, "grad_norm": 1.1898967027664185, "learning_rate": 2.4385181839664146e-05, "loss": 0.24100685119628906, "step": 2474 }, { "epoch": 0.33460076045627374, "grad_norm": 1.3068917989730835, "learning_rate": 2.437984165562301e-05, "loss": 0.20553207397460938, "step": 2475 }, { "epoch": 0.3347359526827207, "grad_norm": 0.9906862378120422, "learning_rate": 2.4374499518648827e-05, "loss": 0.11668205261230469, "step": 2476 }, { "epoch": 0.33487114490916775, "grad_norm": 0.8711386322975159, "learning_rate": 2.436915542985385e-05, "loss": 0.1628131866455078, "step": 2477 }, { "epoch": 0.33500633713561473, "grad_norm": 1.1357275247573853, "learning_rate": 2.436380939035075e-05, "loss": 0.17133522033691406, "step": 2478 }, { "epoch": 0.3351415293620617, "grad_norm": 0.614902675151825, "learning_rate": 2.43584614012526e-05, "loss": 0.12802362442016602, "step": 2479 }, { "epoch": 0.3352767215885087, "grad_norm": 0.7127976417541504, "learning_rate": 2.4353111463672882e-05, "loss": 0.14868545532226562, "step": 2480 }, { "epoch": 0.33541191381495566, "grad_norm": 2.096118450164795, "learning_rate": 2.4347759578725482e-05, "loss": 0.2213134765625, "step": 2481 }, { "epoch": 0.33554710604140264, "grad_norm": 1.228935956954956, "learning_rate": 2.4342405747524685e-05, "loss": 0.19279241561889648, "step": 2482 }, { "epoch": 0.3356822982678496, "grad_norm": 2.142918825149536, "learning_rate": 2.4337049971185194e-05, "loss": 0.24425315856933594, "step": 2483 }, { "epoch": 0.3358174904942966, "grad_norm": 1.8035968542099, "learning_rate": 2.433169225082211e-05, "loss": 0.20528411865234375, "step": 2484 }, { "epoch": 0.3359526827207436, "grad_norm": 0.8939853310585022, "learning_rate": 2.432633258755093e-05, "loss": 0.1780223846435547, "step": 2485 }, { "epoch": 0.33608787494719056, "grad_norm": 1.1747101545333862, "learning_rate": 2.432097098248758e-05, "loss": 0.1714015007019043, "step": 2486 }, { "epoch": 0.33622306717363754, "grad_norm": 1.9472585916519165, "learning_rate": 2.4315607436748362e-05, "loss": 0.17976665496826172, "step": 2487 }, { "epoch": 0.3363582594000845, "grad_norm": 2.374389886856079, "learning_rate": 2.4310241951449997e-05, "loss": 0.17896461486816406, "step": 2488 }, { "epoch": 0.3364934516265315, "grad_norm": 2.0952980518341064, "learning_rate": 2.4304874527709614e-05, "loss": 0.15448570251464844, "step": 2489 }, { "epoch": 0.33662864385297847, "grad_norm": 1.0373332500457764, "learning_rate": 2.429950516664473e-05, "loss": 0.17436861991882324, "step": 2490 }, { "epoch": 0.33676383607942545, "grad_norm": 1.2658730745315552, "learning_rate": 2.4294133869373284e-05, "loss": 0.19199371337890625, "step": 2491 }, { "epoch": 0.33689902830587243, "grad_norm": 2.53019118309021, "learning_rate": 2.42887606370136e-05, "loss": 0.1992034912109375, "step": 2492 }, { "epoch": 0.3370342205323194, "grad_norm": 1.8225162029266357, "learning_rate": 2.428338547068442e-05, "loss": 0.15047931671142578, "step": 2493 }, { "epoch": 0.3371694127587664, "grad_norm": 2.6724796295166016, "learning_rate": 2.427800837150488e-05, "loss": 0.1808147430419922, "step": 2494 }, { "epoch": 0.33730460498521336, "grad_norm": 1.2367048263549805, "learning_rate": 2.4272629340594518e-05, "loss": 0.1918959617614746, "step": 2495 }, { "epoch": 0.33743979721166034, "grad_norm": 1.2357866764068604, "learning_rate": 2.426724837907328e-05, "loss": 0.20812225341796875, "step": 2496 }, { "epoch": 0.3375749894381073, "grad_norm": 1.421576976776123, "learning_rate": 2.4261865488061512e-05, "loss": 0.19313526153564453, "step": 2497 }, { "epoch": 0.3377101816645543, "grad_norm": 0.901887059211731, "learning_rate": 2.4256480668679958e-05, "loss": 0.16237592697143555, "step": 2498 }, { "epoch": 0.3378453738910013, "grad_norm": 1.7934291362762451, "learning_rate": 2.4251093922049766e-05, "loss": 0.17233753204345703, "step": 2499 }, { "epoch": 0.33798056611744826, "grad_norm": 0.7717118859291077, "learning_rate": 2.4245705249292494e-05, "loss": 0.14551448822021484, "step": 2500 }, { "epoch": 0.33811575834389523, "grad_norm": 2.1062262058258057, "learning_rate": 2.4240314651530073e-05, "loss": 0.2516136169433594, "step": 2501 }, { "epoch": 0.3382509505703422, "grad_norm": 0.8947479128837585, "learning_rate": 2.4234922129884873e-05, "loss": 0.16076993942260742, "step": 2502 }, { "epoch": 0.3383861427967892, "grad_norm": 1.0195494890213013, "learning_rate": 2.4229527685479644e-05, "loss": 0.2044377326965332, "step": 2503 }, { "epoch": 0.33852133502323617, "grad_norm": 0.8918678164482117, "learning_rate": 2.4224131319437523e-05, "loss": 0.17592430114746094, "step": 2504 }, { "epoch": 0.33865652724968315, "grad_norm": 1.0139936208724976, "learning_rate": 2.421873303288208e-05, "loss": 0.1934947967529297, "step": 2505 }, { "epoch": 0.3387917194761301, "grad_norm": 1.0904579162597656, "learning_rate": 2.4213332826937255e-05, "loss": 0.2616539001464844, "step": 2506 }, { "epoch": 0.3389269117025771, "grad_norm": 1.538239598274231, "learning_rate": 2.4207930702727404e-05, "loss": 0.1877422332763672, "step": 2507 }, { "epoch": 0.3390621039290241, "grad_norm": 1.317819356918335, "learning_rate": 2.420252666137728e-05, "loss": 0.15736961364746094, "step": 2508 }, { "epoch": 0.33919729615547106, "grad_norm": 0.8357636332511902, "learning_rate": 2.419712070401203e-05, "loss": 0.18008995056152344, "step": 2509 }, { "epoch": 0.33933248838191804, "grad_norm": 1.422378420829773, "learning_rate": 2.4191712831757203e-05, "loss": 0.17917823791503906, "step": 2510 }, { "epoch": 0.339467680608365, "grad_norm": 1.3978854417800903, "learning_rate": 2.418630304573875e-05, "loss": 0.12674283981323242, "step": 2511 }, { "epoch": 0.339602872834812, "grad_norm": 1.2488456964492798, "learning_rate": 2.418089134708302e-05, "loss": 0.22544097900390625, "step": 2512 }, { "epoch": 0.339738065061259, "grad_norm": 1.9491636753082275, "learning_rate": 2.4175477736916743e-05, "loss": 0.2161998748779297, "step": 2513 }, { "epoch": 0.33987325728770595, "grad_norm": 2.983546495437622, "learning_rate": 2.4170062216367082e-05, "loss": 0.21469497680664062, "step": 2514 }, { "epoch": 0.34000844951415293, "grad_norm": 1.4879140853881836, "learning_rate": 2.416464478656156e-05, "loss": 0.2046222686767578, "step": 2515 }, { "epoch": 0.3401436417405999, "grad_norm": 1.793262004852295, "learning_rate": 2.4159225448628123e-05, "loss": 0.19949722290039062, "step": 2516 }, { "epoch": 0.3402788339670469, "grad_norm": 2.3415403366088867, "learning_rate": 2.4153804203695103e-05, "loss": 0.23149585723876953, "step": 2517 }, { "epoch": 0.34041402619349387, "grad_norm": 3.3753397464752197, "learning_rate": 2.4148381052891236e-05, "loss": 0.20917415618896484, "step": 2518 }, { "epoch": 0.34054921841994085, "grad_norm": 1.089974284172058, "learning_rate": 2.4142955997345648e-05, "loss": 0.18297362327575684, "step": 2519 }, { "epoch": 0.3406844106463878, "grad_norm": 1.2006945610046387, "learning_rate": 2.4137529038187864e-05, "loss": 0.22381591796875, "step": 2520 }, { "epoch": 0.3408196028728348, "grad_norm": 1.2854912281036377, "learning_rate": 2.413210017654781e-05, "loss": 0.1605682373046875, "step": 2521 }, { "epoch": 0.3409547950992818, "grad_norm": 3.5291454792022705, "learning_rate": 2.4126669413555802e-05, "loss": 0.19884204864501953, "step": 2522 }, { "epoch": 0.34108998732572876, "grad_norm": 0.9140754342079163, "learning_rate": 2.4121236750342548e-05, "loss": 0.1940155029296875, "step": 2523 }, { "epoch": 0.34122517955217574, "grad_norm": 1.0800786018371582, "learning_rate": 2.4115802188039165e-05, "loss": 0.16860485076904297, "step": 2524 }, { "epoch": 0.3413603717786227, "grad_norm": 1.7351289987564087, "learning_rate": 2.4110365727777156e-05, "loss": 0.190399169921875, "step": 2525 }, { "epoch": 0.3414955640050697, "grad_norm": 1.2880914211273193, "learning_rate": 2.410492737068842e-05, "loss": 0.1831836700439453, "step": 2526 }, { "epoch": 0.3416307562315167, "grad_norm": 0.886844277381897, "learning_rate": 2.409948711790525e-05, "loss": 0.18851280212402344, "step": 2527 }, { "epoch": 0.34176594845796365, "grad_norm": 1.6285735368728638, "learning_rate": 2.4094044970560336e-05, "loss": 0.2408885955810547, "step": 2528 }, { "epoch": 0.34190114068441063, "grad_norm": 0.7682656049728394, "learning_rate": 2.4088600929786767e-05, "loss": 0.1581580638885498, "step": 2529 }, { "epoch": 0.3420363329108576, "grad_norm": 0.9529601335525513, "learning_rate": 2.408315499671802e-05, "loss": 0.19876766204833984, "step": 2530 }, { "epoch": 0.3421715251373046, "grad_norm": 1.5328891277313232, "learning_rate": 2.407770717248796e-05, "loss": 0.13497352600097656, "step": 2531 }, { "epoch": 0.34230671736375157, "grad_norm": 1.8998607397079468, "learning_rate": 2.407225745823086e-05, "loss": 0.20287132263183594, "step": 2532 }, { "epoch": 0.34244190959019855, "grad_norm": 1.6016870737075806, "learning_rate": 2.4066805855081378e-05, "loss": 0.1540660858154297, "step": 2533 }, { "epoch": 0.3425771018166455, "grad_norm": 3.1063601970672607, "learning_rate": 2.406135236417457e-05, "loss": 0.2858104705810547, "step": 2534 }, { "epoch": 0.3427122940430925, "grad_norm": 1.2761781215667725, "learning_rate": 2.4055896986645875e-05, "loss": 0.19180679321289062, "step": 2535 }, { "epoch": 0.3428474862695395, "grad_norm": 2.3274855613708496, "learning_rate": 2.4050439723631136e-05, "loss": 0.15848922729492188, "step": 2536 }, { "epoch": 0.34298267849598646, "grad_norm": 0.9571801424026489, "learning_rate": 2.404498057626659e-05, "loss": 0.16309762001037598, "step": 2537 }, { "epoch": 0.34311787072243344, "grad_norm": 4.079365253448486, "learning_rate": 2.4039519545688848e-05, "loss": 0.2073078155517578, "step": 2538 }, { "epoch": 0.3432530629488804, "grad_norm": 1.35211181640625, "learning_rate": 2.4034056633034932e-05, "loss": 0.19197654724121094, "step": 2539 }, { "epoch": 0.3433882551753274, "grad_norm": 1.4108097553253174, "learning_rate": 2.402859183944225e-05, "loss": 0.23297119140625, "step": 2540 }, { "epoch": 0.3435234474017744, "grad_norm": 1.9342842102050781, "learning_rate": 2.4023125166048597e-05, "loss": 0.20113182067871094, "step": 2541 }, { "epoch": 0.34365863962822135, "grad_norm": 0.9363011121749878, "learning_rate": 2.401765661399218e-05, "loss": 0.17429351806640625, "step": 2542 }, { "epoch": 0.34379383185466833, "grad_norm": 1.118664264678955, "learning_rate": 2.4012186184411556e-05, "loss": 0.1884288787841797, "step": 2543 }, { "epoch": 0.3439290240811153, "grad_norm": 1.6545270681381226, "learning_rate": 2.400671387844571e-05, "loss": 0.212799072265625, "step": 2544 }, { "epoch": 0.34406421630756234, "grad_norm": 1.051919937133789, "learning_rate": 2.4001239697234008e-05, "loss": 0.16757678985595703, "step": 2545 }, { "epoch": 0.3441994085340093, "grad_norm": 2.737590789794922, "learning_rate": 2.3995763641916205e-05, "loss": 0.1410379409790039, "step": 2546 }, { "epoch": 0.3443346007604563, "grad_norm": 2.5880064964294434, "learning_rate": 2.3990285713632436e-05, "loss": 0.20301103591918945, "step": 2547 }, { "epoch": 0.3444697929869033, "grad_norm": 1.4437180757522583, "learning_rate": 2.398480591352324e-05, "loss": 0.16388225555419922, "step": 2548 }, { "epoch": 0.34460498521335026, "grad_norm": 0.7233229875564575, "learning_rate": 2.3979324242729537e-05, "loss": 0.12668228149414062, "step": 2549 }, { "epoch": 0.34474017743979724, "grad_norm": 1.4757874011993408, "learning_rate": 2.3973840702392646e-05, "loss": 0.20982742309570312, "step": 2550 }, { "epoch": 0.3448753696662442, "grad_norm": 0.8445335626602173, "learning_rate": 2.3968355293654267e-05, "loss": 0.1392221450805664, "step": 2551 }, { "epoch": 0.3450105618926912, "grad_norm": 0.854028046131134, "learning_rate": 2.396286801765649e-05, "loss": 0.17388153076171875, "step": 2552 }, { "epoch": 0.34514575411913817, "grad_norm": 1.141477346420288, "learning_rate": 2.3957378875541795e-05, "loss": 0.15336370468139648, "step": 2553 }, { "epoch": 0.34528094634558515, "grad_norm": 2.723266839981079, "learning_rate": 2.395188786845305e-05, "loss": 0.2098388671875, "step": 2554 }, { "epoch": 0.3454161385720321, "grad_norm": 0.9907428026199341, "learning_rate": 2.3946394997533516e-05, "loss": 0.16144514083862305, "step": 2555 }, { "epoch": 0.3455513307984791, "grad_norm": 2.478806257247925, "learning_rate": 2.3940900263926833e-05, "loss": 0.186614990234375, "step": 2556 }, { "epoch": 0.3456865230249261, "grad_norm": 4.923023700714111, "learning_rate": 2.393540366877704e-05, "loss": 0.22186851501464844, "step": 2557 }, { "epoch": 0.34582171525137306, "grad_norm": 2.883528232574463, "learning_rate": 2.392990521322855e-05, "loss": 0.18793392181396484, "step": 2558 }, { "epoch": 0.34595690747782004, "grad_norm": 1.6688995361328125, "learning_rate": 2.392440489842618e-05, "loss": 0.2287139892578125, "step": 2559 }, { "epoch": 0.346092099704267, "grad_norm": 1.709358811378479, "learning_rate": 2.3918902725515118e-05, "loss": 0.20283222198486328, "step": 2560 }, { "epoch": 0.346227291930714, "grad_norm": 1.1052404642105103, "learning_rate": 2.391339869564094e-05, "loss": 0.21852874755859375, "step": 2561 }, { "epoch": 0.346362484157161, "grad_norm": 0.6618618965148926, "learning_rate": 2.3907892809949628e-05, "loss": 0.13562345504760742, "step": 2562 }, { "epoch": 0.34649767638360796, "grad_norm": 1.1674070358276367, "learning_rate": 2.390238506958753e-05, "loss": 0.19200897216796875, "step": 2563 }, { "epoch": 0.34663286861005493, "grad_norm": 0.8596978783607483, "learning_rate": 2.3896875475701387e-05, "loss": 0.17667770385742188, "step": 2564 }, { "epoch": 0.3467680608365019, "grad_norm": 0.8014450073242188, "learning_rate": 2.3891364029438323e-05, "loss": 0.12021541595458984, "step": 2565 }, { "epoch": 0.3469032530629489, "grad_norm": 1.8242905139923096, "learning_rate": 2.3885850731945857e-05, "loss": 0.23589324951171875, "step": 2566 }, { "epoch": 0.34703844528939587, "grad_norm": 2.3304731845855713, "learning_rate": 2.3880335584371884e-05, "loss": 0.18854045867919922, "step": 2567 }, { "epoch": 0.34717363751584285, "grad_norm": 1.4412691593170166, "learning_rate": 2.387481858786468e-05, "loss": 0.2093658447265625, "step": 2568 }, { "epoch": 0.3473088297422898, "grad_norm": 1.801837682723999, "learning_rate": 2.386929974357293e-05, "loss": 0.10120463371276855, "step": 2569 }, { "epoch": 0.3474440219687368, "grad_norm": 0.917465090751648, "learning_rate": 2.386377905264567e-05, "loss": 0.19179609417915344, "step": 2570 }, { "epoch": 0.3475792141951838, "grad_norm": 1.4551877975463867, "learning_rate": 2.3858256516232346e-05, "loss": 0.2104778289794922, "step": 2571 }, { "epoch": 0.34771440642163076, "grad_norm": 1.2551275491714478, "learning_rate": 2.3852732135482775e-05, "loss": 0.2025623321533203, "step": 2572 }, { "epoch": 0.34784959864807774, "grad_norm": 2.55228853225708, "learning_rate": 2.3847205911547166e-05, "loss": 0.1773223876953125, "step": 2573 }, { "epoch": 0.3479847908745247, "grad_norm": 0.7685827016830444, "learning_rate": 2.3841677845576108e-05, "loss": 0.12705326080322266, "step": 2574 }, { "epoch": 0.3481199831009717, "grad_norm": 0.950711190700531, "learning_rate": 2.383614793872057e-05, "loss": 0.24342632293701172, "step": 2575 }, { "epoch": 0.3482551753274187, "grad_norm": 1.8239257335662842, "learning_rate": 2.3830616192131913e-05, "loss": 0.2003173828125, "step": 2576 }, { "epoch": 0.34839036755386565, "grad_norm": 0.8463389873504639, "learning_rate": 2.3825082606961876e-05, "loss": 0.19254016876220703, "step": 2577 }, { "epoch": 0.34852555978031263, "grad_norm": 1.1968097686767578, "learning_rate": 2.3819547184362575e-05, "loss": 0.17284011840820312, "step": 2578 }, { "epoch": 0.3486607520067596, "grad_norm": 0.8905538320541382, "learning_rate": 2.3814009925486522e-05, "loss": 0.21463394165039062, "step": 2579 }, { "epoch": 0.3487959442332066, "grad_norm": 0.9455034136772156, "learning_rate": 2.38084708314866e-05, "loss": 0.213592529296875, "step": 2580 }, { "epoch": 0.34893113645965357, "grad_norm": 0.8118772506713867, "learning_rate": 2.380292990351608e-05, "loss": 0.17920255661010742, "step": 2581 }, { "epoch": 0.34906632868610055, "grad_norm": 0.8592420816421509, "learning_rate": 2.3797387142728607e-05, "loss": 0.18619155883789062, "step": 2582 }, { "epoch": 0.3492015209125475, "grad_norm": 1.451931118965149, "learning_rate": 2.379184255027822e-05, "loss": 0.19113922119140625, "step": 2583 }, { "epoch": 0.3493367131389945, "grad_norm": 1.0150200128555298, "learning_rate": 2.378629612731933e-05, "loss": 0.21033525466918945, "step": 2584 }, { "epoch": 0.3494719053654415, "grad_norm": 0.9090067744255066, "learning_rate": 2.3780747875006735e-05, "loss": 0.18269860744476318, "step": 2585 }, { "epoch": 0.34960709759188846, "grad_norm": 1.1425822973251343, "learning_rate": 2.37751977944956e-05, "loss": 0.1976017951965332, "step": 2586 }, { "epoch": 0.34974228981833544, "grad_norm": 2.177725076675415, "learning_rate": 2.3769645886941497e-05, "loss": 0.18825340270996094, "step": 2587 }, { "epoch": 0.3498774820447824, "grad_norm": 0.8271177411079407, "learning_rate": 2.376409215350035e-05, "loss": 0.20056915283203125, "step": 2588 }, { "epoch": 0.3500126742712294, "grad_norm": 2.2640480995178223, "learning_rate": 2.3758536595328486e-05, "loss": 0.2184314727783203, "step": 2589 }, { "epoch": 0.3501478664976764, "grad_norm": 2.270983934402466, "learning_rate": 2.375297921358259e-05, "loss": 0.16173028945922852, "step": 2590 }, { "epoch": 0.35028305872412335, "grad_norm": 1.3338305950164795, "learning_rate": 2.3747420009419745e-05, "loss": 0.16893577575683594, "step": 2591 }, { "epoch": 0.35041825095057033, "grad_norm": 1.061629056930542, "learning_rate": 2.3741858983997415e-05, "loss": 0.1718158721923828, "step": 2592 }, { "epoch": 0.3505534431770173, "grad_norm": 1.5213676691055298, "learning_rate": 2.373629613847342e-05, "loss": 0.16145992279052734, "step": 2593 }, { "epoch": 0.3506886354034643, "grad_norm": 1.0195932388305664, "learning_rate": 2.3730731474005988e-05, "loss": 0.1998300552368164, "step": 2594 }, { "epoch": 0.35082382762991127, "grad_norm": 0.7545379400253296, "learning_rate": 2.37251649917537e-05, "loss": 0.09798622131347656, "step": 2595 }, { "epoch": 0.35095901985635825, "grad_norm": 0.88923579454422, "learning_rate": 2.3719596692875534e-05, "loss": 0.15967369079589844, "step": 2596 }, { "epoch": 0.3510942120828052, "grad_norm": 1.2652108669281006, "learning_rate": 2.3714026578530836e-05, "loss": 0.2121795415878296, "step": 2597 }, { "epoch": 0.3512294043092522, "grad_norm": 1.1806765794754028, "learning_rate": 2.370845464987934e-05, "loss": 0.1807718276977539, "step": 2598 }, { "epoch": 0.3513645965356992, "grad_norm": 1.8119354248046875, "learning_rate": 2.370288090808114e-05, "loss": 0.14874553680419922, "step": 2599 }, { "epoch": 0.35149978876214616, "grad_norm": 1.0228831768035889, "learning_rate": 2.369730535429673e-05, "loss": 0.13712120056152344, "step": 2600 }, { "epoch": 0.35163498098859314, "grad_norm": 1.4343212842941284, "learning_rate": 2.369172798968697e-05, "loss": 0.19031810760498047, "step": 2601 }, { "epoch": 0.3517701732150401, "grad_norm": 1.2089320421218872, "learning_rate": 2.3686148815413083e-05, "loss": 0.20747804641723633, "step": 2602 }, { "epoch": 0.3519053654414871, "grad_norm": 1.4964444637298584, "learning_rate": 2.3680567832636695e-05, "loss": 0.18310546875, "step": 2603 }, { "epoch": 0.3520405576679341, "grad_norm": 2.436677932739258, "learning_rate": 2.3674985042519795e-05, "loss": 0.2509002685546875, "step": 2604 }, { "epoch": 0.35217574989438105, "grad_norm": 1.6857314109802246, "learning_rate": 2.366940044622475e-05, "loss": 0.16338443756103516, "step": 2605 }, { "epoch": 0.35231094212082803, "grad_norm": 1.4831477403640747, "learning_rate": 2.3663814044914302e-05, "loss": 0.23669815063476562, "step": 2606 }, { "epoch": 0.352446134347275, "grad_norm": 1.2123229503631592, "learning_rate": 2.3658225839751566e-05, "loss": 0.18099260330200195, "step": 2607 }, { "epoch": 0.352581326573722, "grad_norm": 1.2879902124404907, "learning_rate": 2.3652635831900043e-05, "loss": 0.16869163513183594, "step": 2608 }, { "epoch": 0.35271651880016897, "grad_norm": 1.2032151222229004, "learning_rate": 2.3647044022523595e-05, "loss": 0.23766326904296875, "step": 2609 }, { "epoch": 0.35285171102661594, "grad_norm": 1.1872565746307373, "learning_rate": 2.364145041278647e-05, "loss": 0.19086599349975586, "step": 2610 }, { "epoch": 0.3529869032530629, "grad_norm": 0.7739558219909668, "learning_rate": 2.3635855003853287e-05, "loss": 0.19658470153808594, "step": 2611 }, { "epoch": 0.3531220954795099, "grad_norm": 1.058189034461975, "learning_rate": 2.363025779688904e-05, "loss": 0.1699427366256714, "step": 2612 }, { "epoch": 0.35325728770595693, "grad_norm": 0.8167728185653687, "learning_rate": 2.3624658793059103e-05, "loss": 0.16359329223632812, "step": 2613 }, { "epoch": 0.3533924799324039, "grad_norm": 1.3814804553985596, "learning_rate": 2.3619057993529204e-05, "loss": 0.1732616424560547, "step": 2614 }, { "epoch": 0.3535276721588509, "grad_norm": 1.6405669450759888, "learning_rate": 2.3613455399465475e-05, "loss": 0.22738027572631836, "step": 2615 }, { "epoch": 0.35366286438529787, "grad_norm": 1.991821527481079, "learning_rate": 2.3607851012034394e-05, "loss": 0.2736663818359375, "step": 2616 }, { "epoch": 0.35379805661174485, "grad_norm": 1.369095802307129, "learning_rate": 2.3602244832402838e-05, "loss": 0.19951248168945312, "step": 2617 }, { "epoch": 0.3539332488381918, "grad_norm": 2.1316335201263428, "learning_rate": 2.3596636861738024e-05, "loss": 0.24468517303466797, "step": 2618 }, { "epoch": 0.3540684410646388, "grad_norm": 0.9557300806045532, "learning_rate": 2.3591027101207578e-05, "loss": 0.1684889793395996, "step": 2619 }, { "epoch": 0.3542036332910858, "grad_norm": 0.7996432185173035, "learning_rate": 2.3585415551979476e-05, "loss": 0.18543052673339844, "step": 2620 }, { "epoch": 0.35433882551753276, "grad_norm": 0.9396438002586365, "learning_rate": 2.3579802215222076e-05, "loss": 0.16266250610351562, "step": 2621 }, { "epoch": 0.35447401774397974, "grad_norm": 1.9129470586776733, "learning_rate": 2.35741870921041e-05, "loss": 0.16109466552734375, "step": 2622 }, { "epoch": 0.3546092099704267, "grad_norm": 1.0115118026733398, "learning_rate": 2.3568570183794645e-05, "loss": 0.16150188446044922, "step": 2623 }, { "epoch": 0.3547444021968737, "grad_norm": 2.092118501663208, "learning_rate": 2.356295149146319e-05, "loss": 0.2214651107788086, "step": 2624 }, { "epoch": 0.3548795944233207, "grad_norm": 2.7650279998779297, "learning_rate": 2.3557331016279567e-05, "loss": 0.1821880340576172, "step": 2625 }, { "epoch": 0.35501478664976766, "grad_norm": 1.164493203163147, "learning_rate": 2.3551708759413998e-05, "loss": 0.2632560729980469, "step": 2626 }, { "epoch": 0.35514997887621463, "grad_norm": 0.9716615080833435, "learning_rate": 2.354608472203706e-05, "loss": 0.1773982048034668, "step": 2627 }, { "epoch": 0.3552851711026616, "grad_norm": 1.2671552896499634, "learning_rate": 2.3540458905319705e-05, "loss": 0.18938159942626953, "step": 2628 }, { "epoch": 0.3554203633291086, "grad_norm": 1.4439524412155151, "learning_rate": 2.3534831310433264e-05, "loss": 0.15556955337524414, "step": 2629 }, { "epoch": 0.35555555555555557, "grad_norm": 0.9766533970832825, "learning_rate": 2.3529201938549434e-05, "loss": 0.19466400146484375, "step": 2630 }, { "epoch": 0.35569074778200255, "grad_norm": 2.0301287174224854, "learning_rate": 2.3523570790840274e-05, "loss": 0.2008056640625, "step": 2631 }, { "epoch": 0.3558259400084495, "grad_norm": 1.4814296960830688, "learning_rate": 2.3517937868478228e-05, "loss": 0.22864580154418945, "step": 2632 }, { "epoch": 0.3559611322348965, "grad_norm": 2.8422672748565674, "learning_rate": 2.3512303172636092e-05, "loss": 0.21220684051513672, "step": 2633 }, { "epoch": 0.3560963244613435, "grad_norm": 1.3261207342147827, "learning_rate": 2.3506666704487033e-05, "loss": 0.2205190658569336, "step": 2634 }, { "epoch": 0.35623151668779046, "grad_norm": 0.8513601422309875, "learning_rate": 2.3501028465204614e-05, "loss": 0.12939000129699707, "step": 2635 }, { "epoch": 0.35636670891423744, "grad_norm": 1.9643076658248901, "learning_rate": 2.3495388455962734e-05, "loss": 0.22187423706054688, "step": 2636 }, { "epoch": 0.3565019011406844, "grad_norm": 0.770757257938385, "learning_rate": 2.3489746677935673e-05, "loss": 0.16515254974365234, "step": 2637 }, { "epoch": 0.3566370933671314, "grad_norm": 1.5731253623962402, "learning_rate": 2.3484103132298082e-05, "loss": 0.19867467880249023, "step": 2638 }, { "epoch": 0.3567722855935784, "grad_norm": 1.6666430234909058, "learning_rate": 2.347845782022497e-05, "loss": 0.21525192260742188, "step": 2639 }, { "epoch": 0.35690747782002535, "grad_norm": 1.06008780002594, "learning_rate": 2.3472810742891734e-05, "loss": 0.1654644012451172, "step": 2640 }, { "epoch": 0.35704267004647233, "grad_norm": 0.8769435286521912, "learning_rate": 2.3467161901474118e-05, "loss": 0.17406654357910156, "step": 2641 }, { "epoch": 0.3571778622729193, "grad_norm": 1.8264845609664917, "learning_rate": 2.346151129714824e-05, "loss": 0.18178749084472656, "step": 2642 }, { "epoch": 0.3573130544993663, "grad_norm": 2.356940507888794, "learning_rate": 2.3455858931090588e-05, "loss": 0.19235610961914062, "step": 2643 }, { "epoch": 0.35744824672581327, "grad_norm": 3.2623207569122314, "learning_rate": 2.3450204804478014e-05, "loss": 0.17163968086242676, "step": 2644 }, { "epoch": 0.35758343895226025, "grad_norm": 2.5361011028289795, "learning_rate": 2.344454891848774e-05, "loss": 0.19122600555419922, "step": 2645 }, { "epoch": 0.3577186311787072, "grad_norm": 0.6441481113433838, "learning_rate": 2.3438891274297348e-05, "loss": 0.13372802734375, "step": 2646 }, { "epoch": 0.3578538234051542, "grad_norm": 0.6589263081550598, "learning_rate": 2.343323187308479e-05, "loss": 0.10570955276489258, "step": 2647 }, { "epoch": 0.3579890156316012, "grad_norm": 0.6837125420570374, "learning_rate": 2.342757071602839e-05, "loss": 0.161163330078125, "step": 2648 }, { "epoch": 0.35812420785804816, "grad_norm": 0.9433907866477966, "learning_rate": 2.3421907804306816e-05, "loss": 0.1594400405883789, "step": 2649 }, { "epoch": 0.35825940008449514, "grad_norm": 1.3440614938735962, "learning_rate": 2.341624313909913e-05, "loss": 0.15282392501831055, "step": 2650 }, { "epoch": 0.3583945923109421, "grad_norm": 1.6969292163848877, "learning_rate": 2.3410576721584742e-05, "loss": 0.21156692504882812, "step": 2651 }, { "epoch": 0.3585297845373891, "grad_norm": 1.724616527557373, "learning_rate": 2.3404908552943435e-05, "loss": 0.168853759765625, "step": 2652 }, { "epoch": 0.3586649767638361, "grad_norm": 1.5592639446258545, "learning_rate": 2.339923863435534e-05, "loss": 0.17954468727111816, "step": 2653 }, { "epoch": 0.35880016899028305, "grad_norm": 1.4546220302581787, "learning_rate": 2.3393566967000974e-05, "loss": 0.1942293643951416, "step": 2654 }, { "epoch": 0.35893536121673003, "grad_norm": 2.1446166038513184, "learning_rate": 2.3387893552061202e-05, "loss": 0.1470191478729248, "step": 2655 }, { "epoch": 0.359070553443177, "grad_norm": 3.507612943649292, "learning_rate": 2.3382218390717268e-05, "loss": 0.20348644256591797, "step": 2656 }, { "epoch": 0.359205745669624, "grad_norm": 2.9627695083618164, "learning_rate": 2.3376541484150762e-05, "loss": 0.17058563232421875, "step": 2657 }, { "epoch": 0.35934093789607097, "grad_norm": 1.885601282119751, "learning_rate": 2.3370862833543652e-05, "loss": 0.1741476058959961, "step": 2658 }, { "epoch": 0.35947613012251795, "grad_norm": 3.068471670150757, "learning_rate": 2.336518244007826e-05, "loss": 0.16492938995361328, "step": 2659 }, { "epoch": 0.3596113223489649, "grad_norm": 2.09063720703125, "learning_rate": 2.3359500304937274e-05, "loss": 0.2456369400024414, "step": 2660 }, { "epoch": 0.3597465145754119, "grad_norm": 1.2787151336669922, "learning_rate": 2.335381642930375e-05, "loss": 0.13349342346191406, "step": 2661 }, { "epoch": 0.3598817068018589, "grad_norm": 2.2733945846557617, "learning_rate": 2.3348130814361094e-05, "loss": 0.2045907974243164, "step": 2662 }, { "epoch": 0.36001689902830586, "grad_norm": 1.7334760427474976, "learning_rate": 2.334244346129309e-05, "loss": 0.20584678649902344, "step": 2663 }, { "epoch": 0.36015209125475284, "grad_norm": 1.9678996801376343, "learning_rate": 2.3336754371283862e-05, "loss": 0.14791107177734375, "step": 2664 }, { "epoch": 0.3602872834811998, "grad_norm": 0.9949893355369568, "learning_rate": 2.333106354551792e-05, "loss": 0.19701766967773438, "step": 2665 }, { "epoch": 0.3604224757076468, "grad_norm": 1.2357940673828125, "learning_rate": 2.332537098518012e-05, "loss": 0.20240211486816406, "step": 2666 }, { "epoch": 0.3605576679340938, "grad_norm": 0.7524257898330688, "learning_rate": 2.3319676691455686e-05, "loss": 0.17739391326904297, "step": 2667 }, { "epoch": 0.36069286016054075, "grad_norm": 0.9199552536010742, "learning_rate": 2.3313980665530205e-05, "loss": 0.20941162109375, "step": 2668 }, { "epoch": 0.36082805238698773, "grad_norm": 1.7981719970703125, "learning_rate": 2.3308282908589606e-05, "loss": 0.18999576568603516, "step": 2669 }, { "epoch": 0.3609632446134347, "grad_norm": 1.6676130294799805, "learning_rate": 2.330258342182021e-05, "loss": 0.26366519927978516, "step": 2670 }, { "epoch": 0.3610984368398817, "grad_norm": 1.6140620708465576, "learning_rate": 2.329688220640866e-05, "loss": 0.19182705879211426, "step": 2671 }, { "epoch": 0.36123362906632867, "grad_norm": 0.9684808850288391, "learning_rate": 2.329117926354199e-05, "loss": 0.17320823669433594, "step": 2672 }, { "epoch": 0.36136882129277564, "grad_norm": 3.644124984741211, "learning_rate": 2.3285474594407588e-05, "loss": 0.23334693908691406, "step": 2673 }, { "epoch": 0.3615040135192226, "grad_norm": 1.0325514078140259, "learning_rate": 2.327976820019319e-05, "loss": 0.1668715476989746, "step": 2674 }, { "epoch": 0.3616392057456696, "grad_norm": 2.9641542434692383, "learning_rate": 2.32740600820869e-05, "loss": 0.26941871643066406, "step": 2675 }, { "epoch": 0.3617743979721166, "grad_norm": 3.1507081985473633, "learning_rate": 2.326835024127718e-05, "loss": 0.18852519989013672, "step": 2676 }, { "epoch": 0.36190959019856356, "grad_norm": 0.807555615901947, "learning_rate": 2.326263867895285e-05, "loss": 0.1259281039237976, "step": 2677 }, { "epoch": 0.36204478242501054, "grad_norm": 0.9904191493988037, "learning_rate": 2.3256925396303076e-05, "loss": 0.15046119689941406, "step": 2678 }, { "epoch": 0.3621799746514575, "grad_norm": 0.9153783917427063, "learning_rate": 2.3251210394517412e-05, "loss": 0.22531938552856445, "step": 2679 }, { "epoch": 0.3623151668779045, "grad_norm": 1.551855444908142, "learning_rate": 2.3245493674785742e-05, "loss": 0.13031911849975586, "step": 2680 }, { "epoch": 0.3624503591043515, "grad_norm": 0.7588427066802979, "learning_rate": 2.3239775238298316e-05, "loss": 0.15736627578735352, "step": 2681 }, { "epoch": 0.3625855513307985, "grad_norm": 1.3534470796585083, "learning_rate": 2.3234055086245744e-05, "loss": 0.16922950744628906, "step": 2682 }, { "epoch": 0.3627207435572455, "grad_norm": 0.7819817662239075, "learning_rate": 2.3228333219818998e-05, "loss": 0.17315673828125, "step": 2683 }, { "epoch": 0.36285593578369246, "grad_norm": 1.6962943077087402, "learning_rate": 2.3222609640209397e-05, "loss": 0.23409461975097656, "step": 2684 }, { "epoch": 0.36299112801013944, "grad_norm": 2.9697887897491455, "learning_rate": 2.3216884348608614e-05, "loss": 0.24148082733154297, "step": 2685 }, { "epoch": 0.3631263202365864, "grad_norm": 1.3740758895874023, "learning_rate": 2.32111573462087e-05, "loss": 0.1751852035522461, "step": 2686 }, { "epoch": 0.3632615124630334, "grad_norm": 1.5446159839630127, "learning_rate": 2.3205428634202028e-05, "loss": 0.2353992462158203, "step": 2687 }, { "epoch": 0.3633967046894804, "grad_norm": 1.7599570751190186, "learning_rate": 2.3199698213781367e-05, "loss": 0.211417555809021, "step": 2688 }, { "epoch": 0.36353189691592735, "grad_norm": 1.3838764429092407, "learning_rate": 2.319396608613981e-05, "loss": 0.1794447898864746, "step": 2689 }, { "epoch": 0.36366708914237433, "grad_norm": 1.423200249671936, "learning_rate": 2.318823225247082e-05, "loss": 0.20132160186767578, "step": 2690 }, { "epoch": 0.3638022813688213, "grad_norm": 1.7932085990905762, "learning_rate": 2.3182496713968208e-05, "loss": 0.2605876922607422, "step": 2691 }, { "epoch": 0.3639374735952683, "grad_norm": 0.5866106748580933, "learning_rate": 2.3176759471826143e-05, "loss": 0.11948943138122559, "step": 2692 }, { "epoch": 0.36407266582171527, "grad_norm": 1.108756422996521, "learning_rate": 2.3171020527239155e-05, "loss": 0.21635818481445312, "step": 2693 }, { "epoch": 0.36420785804816225, "grad_norm": 1.9077092409133911, "learning_rate": 2.316527988140212e-05, "loss": 0.2179112434387207, "step": 2694 }, { "epoch": 0.3643430502746092, "grad_norm": 1.996392846107483, "learning_rate": 2.315953753551027e-05, "loss": 0.14326000213623047, "step": 2695 }, { "epoch": 0.3644782425010562, "grad_norm": 3.499091863632202, "learning_rate": 2.3153793490759197e-05, "loss": 0.16757678985595703, "step": 2696 }, { "epoch": 0.3646134347275032, "grad_norm": 1.0090104341506958, "learning_rate": 2.3148047748344835e-05, "loss": 0.15411949157714844, "step": 2697 }, { "epoch": 0.36474862695395016, "grad_norm": 0.8679322600364685, "learning_rate": 2.314230030946348e-05, "loss": 0.13198280334472656, "step": 2698 }, { "epoch": 0.36488381918039714, "grad_norm": 0.8545182943344116, "learning_rate": 2.3136551175311782e-05, "loss": 0.1557769775390625, "step": 2699 }, { "epoch": 0.3650190114068441, "grad_norm": 0.679964542388916, "learning_rate": 2.313080034708674e-05, "loss": 0.15737056732177734, "step": 2700 }, { "epoch": 0.3651542036332911, "grad_norm": 2.1893441677093506, "learning_rate": 2.312504782598571e-05, "loss": 0.16611766815185547, "step": 2701 }, { "epoch": 0.3652893958597381, "grad_norm": 1.1812126636505127, "learning_rate": 2.311929361320639e-05, "loss": 0.16109466552734375, "step": 2702 }, { "epoch": 0.36542458808618505, "grad_norm": 1.9342776536941528, "learning_rate": 2.311353770994684e-05, "loss": 0.23283231258392334, "step": 2703 }, { "epoch": 0.36555978031263203, "grad_norm": 1.5147558450698853, "learning_rate": 2.310778011740548e-05, "loss": 0.205535888671875, "step": 2704 }, { "epoch": 0.365694972539079, "grad_norm": 3.4692702293395996, "learning_rate": 2.310202083678106e-05, "loss": 0.2441082000732422, "step": 2705 }, { "epoch": 0.365830164765526, "grad_norm": 1.031876564025879, "learning_rate": 2.3096259869272694e-05, "loss": 0.13544178009033203, "step": 2706 }, { "epoch": 0.36596535699197297, "grad_norm": 0.927740752696991, "learning_rate": 2.309049721607985e-05, "loss": 0.1540679931640625, "step": 2707 }, { "epoch": 0.36610054921841995, "grad_norm": 0.795728325843811, "learning_rate": 2.3084732878402342e-05, "loss": 0.14947128295898438, "step": 2708 }, { "epoch": 0.3662357414448669, "grad_norm": 1.3317497968673706, "learning_rate": 2.307896685744034e-05, "loss": 0.20220565795898438, "step": 2709 }, { "epoch": 0.3663709336713139, "grad_norm": 1.7826968431472778, "learning_rate": 2.3073199154394352e-05, "loss": 0.2117633819580078, "step": 2710 }, { "epoch": 0.3665061258977609, "grad_norm": 0.8682212233543396, "learning_rate": 2.3067429770465246e-05, "loss": 0.20788288116455078, "step": 2711 }, { "epoch": 0.36664131812420786, "grad_norm": 1.17942214012146, "learning_rate": 2.3061658706854244e-05, "loss": 0.1511554718017578, "step": 2712 }, { "epoch": 0.36677651035065484, "grad_norm": 1.595288634300232, "learning_rate": 2.3055885964762907e-05, "loss": 0.21232032775878906, "step": 2713 }, { "epoch": 0.3669117025771018, "grad_norm": 0.8747696280479431, "learning_rate": 2.3050111545393156e-05, "loss": 0.19792938232421875, "step": 2714 }, { "epoch": 0.3670468948035488, "grad_norm": 1.2315130233764648, "learning_rate": 2.304433544994725e-05, "loss": 0.20705533027648926, "step": 2715 }, { "epoch": 0.3671820870299958, "grad_norm": 1.1320106983184814, "learning_rate": 2.303855767962781e-05, "loss": 0.24963951110839844, "step": 2716 }, { "epoch": 0.36731727925644275, "grad_norm": 0.7152423858642578, "learning_rate": 2.303277823563779e-05, "loss": 0.12834644317626953, "step": 2717 }, { "epoch": 0.36745247148288973, "grad_norm": 0.7315583825111389, "learning_rate": 2.3026997119180507e-05, "loss": 0.1488189697265625, "step": 2718 }, { "epoch": 0.3675876637093367, "grad_norm": 1.0991500616073608, "learning_rate": 2.3021214331459616e-05, "loss": 0.1364074945449829, "step": 2719 }, { "epoch": 0.3677228559357837, "grad_norm": 1.0755302906036377, "learning_rate": 2.301542987367913e-05, "loss": 0.1743030548095703, "step": 2720 }, { "epoch": 0.36785804816223067, "grad_norm": 1.206933856010437, "learning_rate": 2.3009643747043403e-05, "loss": 0.19913291931152344, "step": 2721 }, { "epoch": 0.36799324038867764, "grad_norm": 1.1955817937850952, "learning_rate": 2.3003855952757132e-05, "loss": 0.1390678882598877, "step": 2722 }, { "epoch": 0.3681284326151246, "grad_norm": 1.1761120557785034, "learning_rate": 2.2998066492025372e-05, "loss": 0.21669769287109375, "step": 2723 }, { "epoch": 0.3682636248415716, "grad_norm": 1.9794600009918213, "learning_rate": 2.2992275366053513e-05, "loss": 0.17833518981933594, "step": 2724 }, { "epoch": 0.3683988170680186, "grad_norm": 1.0502009391784668, "learning_rate": 2.2986482576047305e-05, "loss": 0.20044803619384766, "step": 2725 }, { "epoch": 0.36853400929446556, "grad_norm": 2.7960731983184814, "learning_rate": 2.298068812321284e-05, "loss": 0.2517871856689453, "step": 2726 }, { "epoch": 0.36866920152091254, "grad_norm": 2.1069982051849365, "learning_rate": 2.297489200875654e-05, "loss": 0.1832141876220703, "step": 2727 }, { "epoch": 0.3688043937473595, "grad_norm": 1.8392685651779175, "learning_rate": 2.2969094233885204e-05, "loss": 0.17749810218811035, "step": 2728 }, { "epoch": 0.3689395859738065, "grad_norm": 0.811008632183075, "learning_rate": 2.296329479980595e-05, "loss": 0.16436004638671875, "step": 2729 }, { "epoch": 0.3690747782002535, "grad_norm": 0.6516792178153992, "learning_rate": 2.2957493707726252e-05, "loss": 0.17687416076660156, "step": 2730 }, { "epoch": 0.36920997042670045, "grad_norm": 2.243013858795166, "learning_rate": 2.2951690958853932e-05, "loss": 0.19929885864257812, "step": 2731 }, { "epoch": 0.36934516265314743, "grad_norm": 1.7597277164459229, "learning_rate": 2.2945886554397154e-05, "loss": 0.205718994140625, "step": 2732 }, { "epoch": 0.3694803548795944, "grad_norm": 2.5990350246429443, "learning_rate": 2.294008049556441e-05, "loss": 0.21671009063720703, "step": 2733 }, { "epoch": 0.3696155471060414, "grad_norm": 1.0671941041946411, "learning_rate": 2.2934272783564577e-05, "loss": 0.19755804538726807, "step": 2734 }, { "epoch": 0.36975073933248837, "grad_norm": 2.4794886112213135, "learning_rate": 2.2928463419606835e-05, "loss": 0.19269847869873047, "step": 2735 }, { "epoch": 0.36988593155893534, "grad_norm": 1.0212067365646362, "learning_rate": 2.292265240490073e-05, "loss": 0.1706867218017578, "step": 2736 }, { "epoch": 0.3700211237853823, "grad_norm": 1.8596913814544678, "learning_rate": 2.2916839740656154e-05, "loss": 0.18334484100341797, "step": 2737 }, { "epoch": 0.3701563160118293, "grad_norm": 1.3385870456695557, "learning_rate": 2.2911025428083316e-05, "loss": 0.14020609855651855, "step": 2738 }, { "epoch": 0.3702915082382763, "grad_norm": 1.8333052396774292, "learning_rate": 2.2905209468392798e-05, "loss": 0.14725017547607422, "step": 2739 }, { "epoch": 0.37042670046472326, "grad_norm": 3.6977462768554688, "learning_rate": 2.2899391862795514e-05, "loss": 0.2024831771850586, "step": 2740 }, { "epoch": 0.37056189269117024, "grad_norm": 1.22323739528656, "learning_rate": 2.2893572612502718e-05, "loss": 0.09719038009643555, "step": 2741 }, { "epoch": 0.3706970849176172, "grad_norm": 0.8222039341926575, "learning_rate": 2.2887751718726013e-05, "loss": 0.16521143913269043, "step": 2742 }, { "epoch": 0.3708322771440642, "grad_norm": 1.368192434310913, "learning_rate": 2.288192918267734e-05, "loss": 0.14769744873046875, "step": 2743 }, { "epoch": 0.37096746937051117, "grad_norm": 1.356164574623108, "learning_rate": 2.2876105005568974e-05, "loss": 0.19460105895996094, "step": 2744 }, { "epoch": 0.37110266159695815, "grad_norm": 1.188924789428711, "learning_rate": 2.287027918861355e-05, "loss": 0.2086029052734375, "step": 2745 }, { "epoch": 0.37123785382340513, "grad_norm": 1.7186880111694336, "learning_rate": 2.2864451733024024e-05, "loss": 0.1468663215637207, "step": 2746 }, { "epoch": 0.3713730460498521, "grad_norm": 3.6514415740966797, "learning_rate": 2.2858622640013716e-05, "loss": 0.20410096645355225, "step": 2747 }, { "epoch": 0.3715082382762991, "grad_norm": 1.6800239086151123, "learning_rate": 2.285279191079626e-05, "loss": 0.27765846252441406, "step": 2748 }, { "epoch": 0.3716434305027461, "grad_norm": 1.6330631971359253, "learning_rate": 2.2846959546585656e-05, "loss": 0.2166886329650879, "step": 2749 }, { "epoch": 0.3717786227291931, "grad_norm": 0.7831001281738281, "learning_rate": 2.2841125548596225e-05, "loss": 0.1688394546508789, "step": 2750 }, { "epoch": 0.3719138149556401, "grad_norm": 1.1121560335159302, "learning_rate": 2.2835289918042648e-05, "loss": 0.18171215057373047, "step": 2751 }, { "epoch": 0.37204900718208705, "grad_norm": 4.414970874786377, "learning_rate": 2.282945265613992e-05, "loss": 0.2252330780029297, "step": 2752 }, { "epoch": 0.37218419940853403, "grad_norm": 1.740721344947815, "learning_rate": 2.2823613764103406e-05, "loss": 0.2262563705444336, "step": 2753 }, { "epoch": 0.372319391634981, "grad_norm": 1.5421509742736816, "learning_rate": 2.2817773243148776e-05, "loss": 0.1658763885498047, "step": 2754 }, { "epoch": 0.372454583861428, "grad_norm": 1.3289058208465576, "learning_rate": 2.2811931094492074e-05, "loss": 0.14342939853668213, "step": 2755 }, { "epoch": 0.37258977608787497, "grad_norm": 1.6568609476089478, "learning_rate": 2.280608731934966e-05, "loss": 0.2000293731689453, "step": 2756 }, { "epoch": 0.37272496831432195, "grad_norm": 1.32503080368042, "learning_rate": 2.280024191893823e-05, "loss": 0.18233680725097656, "step": 2757 }, { "epoch": 0.3728601605407689, "grad_norm": 4.2607903480529785, "learning_rate": 2.279439489447485e-05, "loss": 0.24362659454345703, "step": 2758 }, { "epoch": 0.3729953527672159, "grad_norm": 1.0344898700714111, "learning_rate": 2.278854624717688e-05, "loss": 0.16037487983703613, "step": 2759 }, { "epoch": 0.3731305449936629, "grad_norm": 2.1257076263427734, "learning_rate": 2.2782695978262045e-05, "loss": 0.2311878204345703, "step": 2760 }, { "epoch": 0.37326573722010986, "grad_norm": 0.8441314101219177, "learning_rate": 2.2776844088948406e-05, "loss": 0.1779491901397705, "step": 2761 }, { "epoch": 0.37340092944655684, "grad_norm": 0.8698086738586426, "learning_rate": 2.2770990580454364e-05, "loss": 0.20713424682617188, "step": 2762 }, { "epoch": 0.3735361216730038, "grad_norm": 1.9039814472198486, "learning_rate": 2.276513545399864e-05, "loss": 0.15225887298583984, "step": 2763 }, { "epoch": 0.3736713138994508, "grad_norm": 1.314050316810608, "learning_rate": 2.2759278710800306e-05, "loss": 0.2066965103149414, "step": 2764 }, { "epoch": 0.3738065061258978, "grad_norm": 2.8302485942840576, "learning_rate": 2.275342035207876e-05, "loss": 0.21586287021636963, "step": 2765 }, { "epoch": 0.37394169835234475, "grad_norm": 1.9034204483032227, "learning_rate": 2.2747560379053752e-05, "loss": 0.19992351531982422, "step": 2766 }, { "epoch": 0.37407689057879173, "grad_norm": 0.96148282289505, "learning_rate": 2.2741698792945364e-05, "loss": 0.1269235610961914, "step": 2767 }, { "epoch": 0.3742120828052387, "grad_norm": 1.278441309928894, "learning_rate": 2.2735835594974003e-05, "loss": 0.1609492301940918, "step": 2768 }, { "epoch": 0.3743472750316857, "grad_norm": 0.8664324283599854, "learning_rate": 2.272997078636042e-05, "loss": 0.16405844688415527, "step": 2769 }, { "epoch": 0.37448246725813267, "grad_norm": 1.8115465641021729, "learning_rate": 2.272410436832569e-05, "loss": 0.1721198558807373, "step": 2770 }, { "epoch": 0.37461765948457965, "grad_norm": 0.8689283132553101, "learning_rate": 2.2718236342091248e-05, "loss": 0.12782704830169678, "step": 2771 }, { "epoch": 0.3747528517110266, "grad_norm": 2.6778218746185303, "learning_rate": 2.2712366708878838e-05, "loss": 0.18440723419189453, "step": 2772 }, { "epoch": 0.3748880439374736, "grad_norm": 1.32063627243042, "learning_rate": 2.2706495469910552e-05, "loss": 0.1354503631591797, "step": 2773 }, { "epoch": 0.3750232361639206, "grad_norm": 2.570680618286133, "learning_rate": 2.2700622626408814e-05, "loss": 0.14022397994995117, "step": 2774 }, { "epoch": 0.37515842839036756, "grad_norm": 1.262670636177063, "learning_rate": 2.2694748179596375e-05, "loss": 0.17854344844818115, "step": 2775 }, { "epoch": 0.37529362061681454, "grad_norm": 2.77175235748291, "learning_rate": 2.2688872130696342e-05, "loss": 0.23230743408203125, "step": 2776 }, { "epoch": 0.3754288128432615, "grad_norm": 1.1634495258331299, "learning_rate": 2.268299448093212e-05, "loss": 0.16405296325683594, "step": 2777 }, { "epoch": 0.3755640050697085, "grad_norm": 2.1187777519226074, "learning_rate": 2.2677115231527482e-05, "loss": 0.17717647552490234, "step": 2778 }, { "epoch": 0.3756991972961555, "grad_norm": 2.0991415977478027, "learning_rate": 2.267123438370651e-05, "loss": 0.16820430755615234, "step": 2779 }, { "epoch": 0.37583438952260245, "grad_norm": 4.217456340789795, "learning_rate": 2.266535193869363e-05, "loss": 0.19469070434570312, "step": 2780 }, { "epoch": 0.37596958174904943, "grad_norm": 3.2345173358917236, "learning_rate": 2.2659467897713604e-05, "loss": 0.13300704956054688, "step": 2781 }, { "epoch": 0.3761047739754964, "grad_norm": 1.4967330694198608, "learning_rate": 2.2653582261991516e-05, "loss": 0.1920604705810547, "step": 2782 }, { "epoch": 0.3762399662019434, "grad_norm": 0.719992995262146, "learning_rate": 2.2647695032752785e-05, "loss": 0.1298656463623047, "step": 2783 }, { "epoch": 0.37637515842839037, "grad_norm": 1.2852716445922852, "learning_rate": 2.264180621122317e-05, "loss": 0.17065811157226562, "step": 2784 }, { "epoch": 0.37651035065483734, "grad_norm": 1.0930293798446655, "learning_rate": 2.2635915798628747e-05, "loss": 0.2602882385253906, "step": 2785 }, { "epoch": 0.3766455428812843, "grad_norm": 1.5484962463378906, "learning_rate": 2.2630023796195932e-05, "loss": 0.19918441772460938, "step": 2786 }, { "epoch": 0.3767807351077313, "grad_norm": 1.6997473239898682, "learning_rate": 2.262413020515148e-05, "loss": 0.17521190643310547, "step": 2787 }, { "epoch": 0.3769159273341783, "grad_norm": 1.4968205690383911, "learning_rate": 2.261823502672246e-05, "loss": 0.19996929168701172, "step": 2788 }, { "epoch": 0.37705111956062526, "grad_norm": 1.352866768836975, "learning_rate": 2.261233826213628e-05, "loss": 0.14621496200561523, "step": 2789 }, { "epoch": 0.37718631178707224, "grad_norm": 3.2397778034210205, "learning_rate": 2.2606439912620688e-05, "loss": 0.20677757263183594, "step": 2790 }, { "epoch": 0.3773215040135192, "grad_norm": 1.1323494911193848, "learning_rate": 2.2600539979403734e-05, "loss": 0.2451915740966797, "step": 2791 }, { "epoch": 0.3774566962399662, "grad_norm": 1.2870954275131226, "learning_rate": 2.259463846371383e-05, "loss": 0.18496131896972656, "step": 2792 }, { "epoch": 0.3775918884664132, "grad_norm": 0.7410619258880615, "learning_rate": 2.2588735366779698e-05, "loss": 0.15869379043579102, "step": 2793 }, { "epoch": 0.37772708069286015, "grad_norm": 0.7197501063346863, "learning_rate": 2.2582830689830394e-05, "loss": 0.1327822208404541, "step": 2794 }, { "epoch": 0.37786227291930713, "grad_norm": 0.5694572925567627, "learning_rate": 2.2576924434095305e-05, "loss": 0.14859962463378906, "step": 2795 }, { "epoch": 0.3779974651457541, "grad_norm": 0.8164991736412048, "learning_rate": 2.257101660080414e-05, "loss": 0.14496421813964844, "step": 2796 }, { "epoch": 0.3781326573722011, "grad_norm": 2.050577402114868, "learning_rate": 2.256510719118695e-05, "loss": 0.17336273193359375, "step": 2797 }, { "epoch": 0.37826784959864806, "grad_norm": 0.8579196929931641, "learning_rate": 2.2559196206474094e-05, "loss": 0.13872623443603516, "step": 2798 }, { "epoch": 0.37840304182509504, "grad_norm": 2.3823320865631104, "learning_rate": 2.2553283647896287e-05, "loss": 0.14835309982299805, "step": 2799 }, { "epoch": 0.378538234051542, "grad_norm": 2.0684659481048584, "learning_rate": 2.254736951668454e-05, "loss": 0.17647743225097656, "step": 2800 }, { "epoch": 0.378673426277989, "grad_norm": 0.877079963684082, "learning_rate": 2.2541453814070212e-05, "loss": 0.18906688690185547, "step": 2801 }, { "epoch": 0.378808618504436, "grad_norm": 1.0572319030761719, "learning_rate": 2.2535536541284983e-05, "loss": 0.18941116333007812, "step": 2802 }, { "epoch": 0.37894381073088296, "grad_norm": 2.116623640060425, "learning_rate": 2.2529617699560857e-05, "loss": 0.15986156463623047, "step": 2803 }, { "epoch": 0.37907900295732994, "grad_norm": 2.5547878742218018, "learning_rate": 2.2523697290130185e-05, "loss": 0.1714191436767578, "step": 2804 }, { "epoch": 0.3792141951837769, "grad_norm": 1.0090625286102295, "learning_rate": 2.251777531422561e-05, "loss": 0.20269203186035156, "step": 2805 }, { "epoch": 0.3793493874102239, "grad_norm": 1.1625059843063354, "learning_rate": 2.2511851773080127e-05, "loss": 0.2607383728027344, "step": 2806 }, { "epoch": 0.37948457963667087, "grad_norm": 0.9749452471733093, "learning_rate": 2.2505926667927043e-05, "loss": 0.16684675216674805, "step": 2807 }, { "epoch": 0.37961977186311785, "grad_norm": 2.202070713043213, "learning_rate": 2.25e-05, "loss": 0.17569732666015625, "step": 2808 }, { "epoch": 0.37975496408956483, "grad_norm": 0.9541611671447754, "learning_rate": 2.2494071770532966e-05, "loss": 0.16417407989501953, "step": 2809 }, { "epoch": 0.3798901563160118, "grad_norm": 1.2085424661636353, "learning_rate": 2.2488141980760223e-05, "loss": 0.1471109390258789, "step": 2810 }, { "epoch": 0.3800253485424588, "grad_norm": 1.2389203310012817, "learning_rate": 2.248221063191639e-05, "loss": 0.22618675231933594, "step": 2811 }, { "epoch": 0.38016054076890576, "grad_norm": 1.2779831886291504, "learning_rate": 2.24762777252364e-05, "loss": 0.16464471817016602, "step": 2812 }, { "epoch": 0.38029573299535274, "grad_norm": 0.9568815231323242, "learning_rate": 2.2470343261955525e-05, "loss": 0.1331338882446289, "step": 2813 }, { "epoch": 0.3804309252217997, "grad_norm": 0.8668561577796936, "learning_rate": 2.246440724330934e-05, "loss": 0.1792125701904297, "step": 2814 }, { "epoch": 0.3805661174482467, "grad_norm": 2.066789150238037, "learning_rate": 2.2458469670533765e-05, "loss": 0.13564109802246094, "step": 2815 }, { "epoch": 0.3807013096746937, "grad_norm": 1.1422011852264404, "learning_rate": 2.2452530544865034e-05, "loss": 0.20665264129638672, "step": 2816 }, { "epoch": 0.3808365019011407, "grad_norm": 0.8526986837387085, "learning_rate": 2.24465898675397e-05, "loss": 0.17189264297485352, "step": 2817 }, { "epoch": 0.3809716941275877, "grad_norm": 0.8208529949188232, "learning_rate": 2.244064763979464e-05, "loss": 0.17484569549560547, "step": 2818 }, { "epoch": 0.38110688635403467, "grad_norm": 1.3694156408309937, "learning_rate": 2.2434703862867068e-05, "loss": 0.13419246673583984, "step": 2819 }, { "epoch": 0.38124207858048165, "grad_norm": 0.882036566734314, "learning_rate": 2.2428758537994504e-05, "loss": 0.14685344696044922, "step": 2820 }, { "epoch": 0.3813772708069286, "grad_norm": 1.2096648216247559, "learning_rate": 2.24228116664148e-05, "loss": 0.19815731048583984, "step": 2821 }, { "epoch": 0.3815124630333756, "grad_norm": 2.187608242034912, "learning_rate": 2.2416863249366125e-05, "loss": 0.2264232635498047, "step": 2822 }, { "epoch": 0.3816476552598226, "grad_norm": 2.656158685684204, "learning_rate": 2.241091328808696e-05, "loss": 0.1796717643737793, "step": 2823 }, { "epoch": 0.38178284748626956, "grad_norm": 1.072092890739441, "learning_rate": 2.240496178381614e-05, "loss": 0.1416279673576355, "step": 2824 }, { "epoch": 0.38191803971271654, "grad_norm": 1.5303592681884766, "learning_rate": 2.239900873779278e-05, "loss": 0.19420289993286133, "step": 2825 }, { "epoch": 0.3820532319391635, "grad_norm": 0.7878977656364441, "learning_rate": 2.2393054151256352e-05, "loss": 0.14476585388183594, "step": 2826 }, { "epoch": 0.3821884241656105, "grad_norm": 3.1602046489715576, "learning_rate": 2.238709802544662e-05, "loss": 0.224456787109375, "step": 2827 }, { "epoch": 0.3823236163920575, "grad_norm": 1.2856125831604004, "learning_rate": 2.2381140361603686e-05, "loss": 0.2051258087158203, "step": 2828 }, { "epoch": 0.38245880861850445, "grad_norm": 1.5704478025436401, "learning_rate": 2.237518116096797e-05, "loss": 0.168792724609375, "step": 2829 }, { "epoch": 0.38259400084495143, "grad_norm": 1.0216450691223145, "learning_rate": 2.2369220424780203e-05, "loss": 0.18901348114013672, "step": 2830 }, { "epoch": 0.3827291930713984, "grad_norm": 1.7724815607070923, "learning_rate": 2.2363258154281452e-05, "loss": 0.24583053588867188, "step": 2831 }, { "epoch": 0.3828643852978454, "grad_norm": 0.8602866530418396, "learning_rate": 2.2357294350713088e-05, "loss": 0.14009666442871094, "step": 2832 }, { "epoch": 0.38299957752429237, "grad_norm": 1.2809284925460815, "learning_rate": 2.2351329015316802e-05, "loss": 0.1760730743408203, "step": 2833 }, { "epoch": 0.38313476975073935, "grad_norm": 1.180512547492981, "learning_rate": 2.2345362149334613e-05, "loss": 0.23828506469726562, "step": 2834 }, { "epoch": 0.3832699619771863, "grad_norm": 1.7512367963790894, "learning_rate": 2.2339393754008854e-05, "loss": 0.22279834747314453, "step": 2835 }, { "epoch": 0.3834051542036333, "grad_norm": 1.1981666088104248, "learning_rate": 2.233342383058218e-05, "loss": 0.1991875171661377, "step": 2836 }, { "epoch": 0.3835403464300803, "grad_norm": 1.8916113376617432, "learning_rate": 2.2327452380297554e-05, "loss": 0.13935136795043945, "step": 2837 }, { "epoch": 0.38367553865652726, "grad_norm": 1.4282495975494385, "learning_rate": 2.232147940439827e-05, "loss": 0.19857406616210938, "step": 2838 }, { "epoch": 0.38381073088297424, "grad_norm": 1.0461626052856445, "learning_rate": 2.2315504904127936e-05, "loss": 0.21667861938476562, "step": 2839 }, { "epoch": 0.3839459231094212, "grad_norm": 2.026803731918335, "learning_rate": 2.2309528880730463e-05, "loss": 0.21555709838867188, "step": 2840 }, { "epoch": 0.3840811153358682, "grad_norm": 0.7633315920829773, "learning_rate": 2.2303551335450096e-05, "loss": 0.14227962493896484, "step": 2841 }, { "epoch": 0.3842163075623152, "grad_norm": 0.7599472403526306, "learning_rate": 2.2297572269531398e-05, "loss": 0.16444778442382812, "step": 2842 }, { "epoch": 0.38435149978876215, "grad_norm": 0.9680823683738708, "learning_rate": 2.2291591684219243e-05, "loss": 0.14874744415283203, "step": 2843 }, { "epoch": 0.38448669201520913, "grad_norm": 2.3640425205230713, "learning_rate": 2.2285609580758806e-05, "loss": 0.1773529052734375, "step": 2844 }, { "epoch": 0.3846218842416561, "grad_norm": 0.7894426584243774, "learning_rate": 2.227962596039561e-05, "loss": 0.14577817916870117, "step": 2845 }, { "epoch": 0.3847570764681031, "grad_norm": 1.8325927257537842, "learning_rate": 2.2273640824375462e-05, "loss": 0.15846896171569824, "step": 2846 }, { "epoch": 0.38489226869455007, "grad_norm": 0.8720874190330505, "learning_rate": 2.2267654173944515e-05, "loss": 0.1635441780090332, "step": 2847 }, { "epoch": 0.38502746092099704, "grad_norm": 1.0309906005859375, "learning_rate": 2.2261666010349212e-05, "loss": 0.16109275817871094, "step": 2848 }, { "epoch": 0.385162653147444, "grad_norm": 1.1663784980773926, "learning_rate": 2.2255676334836317e-05, "loss": 0.19454479217529297, "step": 2849 }, { "epoch": 0.385297845373891, "grad_norm": 2.7666244506835938, "learning_rate": 2.2249685148652917e-05, "loss": 0.19118833541870117, "step": 2850 }, { "epoch": 0.385433037600338, "grad_norm": 3.6009457111358643, "learning_rate": 2.224369245304641e-05, "loss": 0.21427249908447266, "step": 2851 }, { "epoch": 0.38556822982678496, "grad_norm": 2.510897159576416, "learning_rate": 2.2237698249264507e-05, "loss": 0.19642353057861328, "step": 2852 }, { "epoch": 0.38570342205323194, "grad_norm": 2.7096643447875977, "learning_rate": 2.2231702538555235e-05, "loss": 0.18151569366455078, "step": 2853 }, { "epoch": 0.3858386142796789, "grad_norm": 0.8593248724937439, "learning_rate": 2.2225705322166928e-05, "loss": 0.17100143432617188, "step": 2854 }, { "epoch": 0.3859738065061259, "grad_norm": 1.2871543169021606, "learning_rate": 2.2219706601348242e-05, "loss": 0.1817951202392578, "step": 2855 }, { "epoch": 0.3861089987325729, "grad_norm": 0.8620615005493164, "learning_rate": 2.221370637734814e-05, "loss": 0.17870712280273438, "step": 2856 }, { "epoch": 0.38624419095901985, "grad_norm": 1.0389586687088013, "learning_rate": 2.22077046514159e-05, "loss": 0.18694639205932617, "step": 2857 }, { "epoch": 0.38637938318546683, "grad_norm": 1.508880615234375, "learning_rate": 2.220170142480112e-05, "loss": 0.17932653427124023, "step": 2858 }, { "epoch": 0.3865145754119138, "grad_norm": 0.8926329016685486, "learning_rate": 2.2195696698753695e-05, "loss": 0.15302467346191406, "step": 2859 }, { "epoch": 0.3866497676383608, "grad_norm": 0.655914843082428, "learning_rate": 2.2189690474523844e-05, "loss": 0.11203479766845703, "step": 2860 }, { "epoch": 0.38678495986480776, "grad_norm": 1.0332105159759521, "learning_rate": 2.21836827533621e-05, "loss": 0.20537948608398438, "step": 2861 }, { "epoch": 0.38692015209125474, "grad_norm": 0.8247001767158508, "learning_rate": 2.2177673536519297e-05, "loss": 0.1389141082763672, "step": 2862 }, { "epoch": 0.3870553443177017, "grad_norm": 1.8151029348373413, "learning_rate": 2.217166282524659e-05, "loss": 0.21303224563598633, "step": 2863 }, { "epoch": 0.3871905365441487, "grad_norm": 2.5937812328338623, "learning_rate": 2.216565062079544e-05, "loss": 0.24167728424072266, "step": 2864 }, { "epoch": 0.3873257287705957, "grad_norm": 2.4652202129364014, "learning_rate": 2.2159636924417612e-05, "loss": 0.15409326553344727, "step": 2865 }, { "epoch": 0.38746092099704266, "grad_norm": 0.8946840763092041, "learning_rate": 2.2153621737365205e-05, "loss": 0.18581390380859375, "step": 2866 }, { "epoch": 0.38759611322348964, "grad_norm": 1.3004682064056396, "learning_rate": 2.2147605060890598e-05, "loss": 0.18044090270996094, "step": 2867 }, { "epoch": 0.3877313054499366, "grad_norm": 1.2560685873031616, "learning_rate": 2.2141586896246503e-05, "loss": 0.13004255294799805, "step": 2868 }, { "epoch": 0.3878664976763836, "grad_norm": 1.7169181108474731, "learning_rate": 2.2135567244685933e-05, "loss": 0.17757415771484375, "step": 2869 }, { "epoch": 0.38800168990283057, "grad_norm": 1.2001307010650635, "learning_rate": 2.2129546107462214e-05, "loss": 0.2307891845703125, "step": 2870 }, { "epoch": 0.38813688212927755, "grad_norm": 2.2192158699035645, "learning_rate": 2.212352348582897e-05, "loss": 0.20961380004882812, "step": 2871 }, { "epoch": 0.38827207435572453, "grad_norm": 0.6095111966133118, "learning_rate": 2.2117499381040157e-05, "loss": 0.14725971221923828, "step": 2872 }, { "epoch": 0.3884072665821715, "grad_norm": 0.7406976819038391, "learning_rate": 2.211147379435001e-05, "loss": 0.16102075576782227, "step": 2873 }, { "epoch": 0.3885424588086185, "grad_norm": 1.516456127166748, "learning_rate": 2.2105446727013098e-05, "loss": 0.2035961151123047, "step": 2874 }, { "epoch": 0.38867765103506546, "grad_norm": 1.0755302906036377, "learning_rate": 2.209941818028429e-05, "loss": 0.1721811294555664, "step": 2875 }, { "epoch": 0.38881284326151244, "grad_norm": 1.366456389427185, "learning_rate": 2.2093388155418757e-05, "loss": 0.20836257934570312, "step": 2876 }, { "epoch": 0.3889480354879594, "grad_norm": 0.9442431330680847, "learning_rate": 2.2087356653671982e-05, "loss": 0.19383478164672852, "step": 2877 }, { "epoch": 0.3890832277144064, "grad_norm": 1.488297700881958, "learning_rate": 2.2081323676299756e-05, "loss": 0.1557302474975586, "step": 2878 }, { "epoch": 0.3892184199408534, "grad_norm": 3.0893261432647705, "learning_rate": 2.207528922455818e-05, "loss": 0.15931320190429688, "step": 2879 }, { "epoch": 0.38935361216730036, "grad_norm": 0.9239708781242371, "learning_rate": 2.206925329970366e-05, "loss": 0.13497304916381836, "step": 2880 }, { "epoch": 0.38948880439374733, "grad_norm": 1.2790453433990479, "learning_rate": 2.20632159029929e-05, "loss": 0.249176025390625, "step": 2881 }, { "epoch": 0.3896239966201943, "grad_norm": 0.8639877438545227, "learning_rate": 2.2057177035682926e-05, "loss": 0.17080211639404297, "step": 2882 }, { "epoch": 0.3897591888466413, "grad_norm": 1.3367589712142944, "learning_rate": 2.2051136699031058e-05, "loss": 0.18737030029296875, "step": 2883 }, { "epoch": 0.38989438107308827, "grad_norm": 1.1957826614379883, "learning_rate": 2.2045094894294933e-05, "loss": 0.17303848266601562, "step": 2884 }, { "epoch": 0.3900295732995353, "grad_norm": 2.638127326965332, "learning_rate": 2.203905162273248e-05, "loss": 0.14135169982910156, "step": 2885 }, { "epoch": 0.3901647655259823, "grad_norm": 1.1049944162368774, "learning_rate": 2.203300688560194e-05, "loss": 0.23536300659179688, "step": 2886 }, { "epoch": 0.39029995775242926, "grad_norm": 2.236567974090576, "learning_rate": 2.2026960684161862e-05, "loss": 0.15195465087890625, "step": 2887 }, { "epoch": 0.39043514997887624, "grad_norm": 0.8347408175468445, "learning_rate": 2.2020913019671097e-05, "loss": 0.1467151641845703, "step": 2888 }, { "epoch": 0.3905703422053232, "grad_norm": 2.1340439319610596, "learning_rate": 2.20148638933888e-05, "loss": 0.166839599609375, "step": 2889 }, { "epoch": 0.3907055344317702, "grad_norm": 1.1151121854782104, "learning_rate": 2.2008813306574438e-05, "loss": 0.16907787322998047, "step": 2890 }, { "epoch": 0.3908407266582172, "grad_norm": 1.3802108764648438, "learning_rate": 2.200276126048777e-05, "loss": 0.22904014587402344, "step": 2891 }, { "epoch": 0.39097591888466415, "grad_norm": 1.897668480873108, "learning_rate": 2.199670775638886e-05, "loss": 0.2270374298095703, "step": 2892 }, { "epoch": 0.39111111111111113, "grad_norm": 1.322569489479065, "learning_rate": 2.1990652795538085e-05, "loss": 0.13013172149658203, "step": 2893 }, { "epoch": 0.3912463033375581, "grad_norm": 1.019840955734253, "learning_rate": 2.1984596379196117e-05, "loss": 0.13911151885986328, "step": 2894 }, { "epoch": 0.3913814955640051, "grad_norm": 1.2421302795410156, "learning_rate": 2.1978538508623942e-05, "loss": 0.18236064910888672, "step": 2895 }, { "epoch": 0.39151668779045207, "grad_norm": 0.871039867401123, "learning_rate": 2.197247918508283e-05, "loss": 0.11935877799987793, "step": 2896 }, { "epoch": 0.39165188001689905, "grad_norm": 2.6692447662353516, "learning_rate": 2.1966418409834374e-05, "loss": 0.21228408813476562, "step": 2897 }, { "epoch": 0.391787072243346, "grad_norm": 1.360025405883789, "learning_rate": 2.1960356184140453e-05, "loss": 0.1808757781982422, "step": 2898 }, { "epoch": 0.391922264469793, "grad_norm": 0.9737027287483215, "learning_rate": 2.1954292509263258e-05, "loss": 0.1680135726928711, "step": 2899 }, { "epoch": 0.39205745669624, "grad_norm": 1.5282942056655884, "learning_rate": 2.194822738646528e-05, "loss": 0.12001419067382812, "step": 2900 }, { "epoch": 0.39219264892268696, "grad_norm": 1.55282461643219, "learning_rate": 2.1942160817009304e-05, "loss": 0.12455630302429199, "step": 2901 }, { "epoch": 0.39232784114913394, "grad_norm": 0.8353968262672424, "learning_rate": 2.193609280215843e-05, "loss": 0.16773700714111328, "step": 2902 }, { "epoch": 0.3924630333755809, "grad_norm": 1.0280730724334717, "learning_rate": 2.1930023343176044e-05, "loss": 0.20391559600830078, "step": 2903 }, { "epoch": 0.3925982256020279, "grad_norm": 0.9376540780067444, "learning_rate": 2.1923952441325837e-05, "loss": 0.1752481460571289, "step": 2904 }, { "epoch": 0.3927334178284749, "grad_norm": 0.9284510612487793, "learning_rate": 2.191788009787182e-05, "loss": 0.18593358993530273, "step": 2905 }, { "epoch": 0.39286861005492185, "grad_norm": 1.2415282726287842, "learning_rate": 2.1911806314078267e-05, "loss": 0.22683334350585938, "step": 2906 }, { "epoch": 0.39300380228136883, "grad_norm": 1.5311275720596313, "learning_rate": 2.1905731091209786e-05, "loss": 0.2314624786376953, "step": 2907 }, { "epoch": 0.3931389945078158, "grad_norm": 2.4889657497406006, "learning_rate": 2.1899654430531262e-05, "loss": 0.20072174072265625, "step": 2908 }, { "epoch": 0.3932741867342628, "grad_norm": 2.7938175201416016, "learning_rate": 2.18935763333079e-05, "loss": 0.20580291748046875, "step": 2909 }, { "epoch": 0.39340937896070977, "grad_norm": 2.0887486934661865, "learning_rate": 2.1887496800805175e-05, "loss": 0.1824359893798828, "step": 2910 }, { "epoch": 0.39354457118715674, "grad_norm": 2.5393736362457275, "learning_rate": 2.188141583428889e-05, "loss": 0.2472076416015625, "step": 2911 }, { "epoch": 0.3936797634136037, "grad_norm": 0.8902279138565063, "learning_rate": 2.1875333435025138e-05, "loss": 0.08413231372833252, "step": 2912 }, { "epoch": 0.3938149556400507, "grad_norm": 0.8436653017997742, "learning_rate": 2.1869249604280296e-05, "loss": 0.1276235580444336, "step": 2913 }, { "epoch": 0.3939501478664977, "grad_norm": 1.749403715133667, "learning_rate": 2.1863164343321057e-05, "loss": 0.19369029998779297, "step": 2914 }, { "epoch": 0.39408534009294466, "grad_norm": 1.6402604579925537, "learning_rate": 2.1857077653414397e-05, "loss": 0.21311521530151367, "step": 2915 }, { "epoch": 0.39422053231939164, "grad_norm": 2.640350580215454, "learning_rate": 2.185098953582761e-05, "loss": 0.22010326385498047, "step": 2916 }, { "epoch": 0.3943557245458386, "grad_norm": 1.7663654088974, "learning_rate": 2.1844899991828265e-05, "loss": 0.218597412109375, "step": 2917 }, { "epoch": 0.3944909167722856, "grad_norm": 1.1774868965148926, "learning_rate": 2.1838809022684247e-05, "loss": 0.17959117889404297, "step": 2918 }, { "epoch": 0.39462610899873257, "grad_norm": 1.1452014446258545, "learning_rate": 2.1832716629663712e-05, "loss": 0.14960861206054688, "step": 2919 }, { "epoch": 0.39476130122517955, "grad_norm": 1.0479589700698853, "learning_rate": 2.1826622814035138e-05, "loss": 0.19725799560546875, "step": 2920 }, { "epoch": 0.39489649345162653, "grad_norm": 0.906482994556427, "learning_rate": 2.1820527577067293e-05, "loss": 0.14765214920043945, "step": 2921 }, { "epoch": 0.3950316856780735, "grad_norm": 2.3501925468444824, "learning_rate": 2.1814430920029238e-05, "loss": 0.24921202659606934, "step": 2922 }, { "epoch": 0.3951668779045205, "grad_norm": 2.188354969024658, "learning_rate": 2.1808332844190325e-05, "loss": 0.19327163696289062, "step": 2923 }, { "epoch": 0.39530207013096746, "grad_norm": 2.953916072845459, "learning_rate": 2.1802233350820203e-05, "loss": 0.1996135711669922, "step": 2924 }, { "epoch": 0.39543726235741444, "grad_norm": 2.9287643432617188, "learning_rate": 2.179613244118883e-05, "loss": 0.19552922248840332, "step": 2925 }, { "epoch": 0.3955724545838614, "grad_norm": 1.8541208505630493, "learning_rate": 2.1790030116566436e-05, "loss": 0.18764495849609375, "step": 2926 }, { "epoch": 0.3957076468103084, "grad_norm": 1.2976611852645874, "learning_rate": 2.1783926378223563e-05, "loss": 0.19126343727111816, "step": 2927 }, { "epoch": 0.3958428390367554, "grad_norm": 2.2463831901550293, "learning_rate": 2.1777821227431048e-05, "loss": 0.15668010711669922, "step": 2928 }, { "epoch": 0.39597803126320236, "grad_norm": 1.0793263912200928, "learning_rate": 2.1771714665460005e-05, "loss": 0.21531438827514648, "step": 2929 }, { "epoch": 0.39611322348964934, "grad_norm": 1.7339545488357544, "learning_rate": 2.1765606693581857e-05, "loss": 0.16473960876464844, "step": 2930 }, { "epoch": 0.3962484157160963, "grad_norm": 3.6025266647338867, "learning_rate": 2.1759497313068316e-05, "loss": 0.23078536987304688, "step": 2931 }, { "epoch": 0.3963836079425433, "grad_norm": 1.894962191581726, "learning_rate": 2.175338652519139e-05, "loss": 0.1701984405517578, "step": 2932 }, { "epoch": 0.39651880016899027, "grad_norm": 1.0657076835632324, "learning_rate": 2.1747274331223377e-05, "loss": 0.13462257385253906, "step": 2933 }, { "epoch": 0.39665399239543725, "grad_norm": 1.743187427520752, "learning_rate": 2.1741160732436865e-05, "loss": 0.1734457015991211, "step": 2934 }, { "epoch": 0.39678918462188423, "grad_norm": 1.6830084323883057, "learning_rate": 2.1735045730104746e-05, "loss": 0.2747783660888672, "step": 2935 }, { "epoch": 0.3969243768483312, "grad_norm": 0.7709751725196838, "learning_rate": 2.1728929325500183e-05, "loss": 0.15062332153320312, "step": 2936 }, { "epoch": 0.3970595690747782, "grad_norm": 0.9073984622955322, "learning_rate": 2.1722811519896654e-05, "loss": 0.1971604824066162, "step": 2937 }, { "epoch": 0.39719476130122516, "grad_norm": 0.7754150032997131, "learning_rate": 2.171669231456792e-05, "loss": 0.17679405212402344, "step": 2938 }, { "epoch": 0.39732995352767214, "grad_norm": 1.0466722249984741, "learning_rate": 2.1710571710788025e-05, "loss": 0.18363523483276367, "step": 2939 }, { "epoch": 0.3974651457541191, "grad_norm": 0.5604305863380432, "learning_rate": 2.1704449709831312e-05, "loss": 0.13359642028808594, "step": 2940 }, { "epoch": 0.3976003379805661, "grad_norm": 0.877889096736908, "learning_rate": 2.1698326312972423e-05, "loss": 0.1728067398071289, "step": 2941 }, { "epoch": 0.3977355302070131, "grad_norm": 0.6807836294174194, "learning_rate": 2.1692201521486268e-05, "loss": 0.12730789184570312, "step": 2942 }, { "epoch": 0.39787072243346006, "grad_norm": 1.5109617710113525, "learning_rate": 2.1686075336648075e-05, "loss": 0.19756031036376953, "step": 2943 }, { "epoch": 0.39800591465990703, "grad_norm": 1.3805861473083496, "learning_rate": 2.167994775973334e-05, "loss": 0.1860179901123047, "step": 2944 }, { "epoch": 0.398141106886354, "grad_norm": 0.9127232432365417, "learning_rate": 2.167381879201786e-05, "loss": 0.1761932373046875, "step": 2945 }, { "epoch": 0.398276299112801, "grad_norm": 1.7958850860595703, "learning_rate": 2.166768843477772e-05, "loss": 0.19452857971191406, "step": 2946 }, { "epoch": 0.39841149133924797, "grad_norm": 2.4171359539031982, "learning_rate": 2.166155668928929e-05, "loss": 0.19708538055419922, "step": 2947 }, { "epoch": 0.39854668356569495, "grad_norm": 2.2775895595550537, "learning_rate": 2.1655423556829233e-05, "loss": 0.1575450897216797, "step": 2948 }, { "epoch": 0.3986818757921419, "grad_norm": 1.1040598154067993, "learning_rate": 2.1649289038674504e-05, "loss": 0.14584064483642578, "step": 2949 }, { "epoch": 0.3988170680185889, "grad_norm": 2.0849831104278564, "learning_rate": 2.1643153136102333e-05, "loss": 0.17862319946289062, "step": 2950 }, { "epoch": 0.3989522602450359, "grad_norm": 0.9875862002372742, "learning_rate": 2.1637015850390255e-05, "loss": 0.15175437927246094, "step": 2951 }, { "epoch": 0.3990874524714829, "grad_norm": 1.0719780921936035, "learning_rate": 2.1630877182816087e-05, "loss": 0.18024969100952148, "step": 2952 }, { "epoch": 0.3992226446979299, "grad_norm": 1.6491544246673584, "learning_rate": 2.162473713465793e-05, "loss": 0.1746959686279297, "step": 2953 }, { "epoch": 0.3993578369243769, "grad_norm": 1.344290018081665, "learning_rate": 2.161859570719417e-05, "loss": 0.1865081787109375, "step": 2954 }, { "epoch": 0.39949302915082385, "grad_norm": 1.221110224723816, "learning_rate": 2.161245290170349e-05, "loss": 0.24373531341552734, "step": 2955 }, { "epoch": 0.39962822137727083, "grad_norm": 0.9813793301582336, "learning_rate": 2.1606308719464858e-05, "loss": 0.217193603515625, "step": 2956 }, { "epoch": 0.3997634136037178, "grad_norm": 1.84911048412323, "learning_rate": 2.160016316175752e-05, "loss": 0.22316360473632812, "step": 2957 }, { "epoch": 0.3998986058301648, "grad_norm": 0.7422873377799988, "learning_rate": 2.159401622986101e-05, "loss": 0.17644762992858887, "step": 2958 }, { "epoch": 0.40003379805661177, "grad_norm": 0.6138271689414978, "learning_rate": 2.1587867925055165e-05, "loss": 0.12640142440795898, "step": 2959 }, { "epoch": 0.40016899028305875, "grad_norm": 1.374695897102356, "learning_rate": 2.158171824862008e-05, "loss": 0.17972755432128906, "step": 2960 }, { "epoch": 0.4003041825095057, "grad_norm": 1.806179165840149, "learning_rate": 2.157556720183616e-05, "loss": 0.16175079345703125, "step": 2961 }, { "epoch": 0.4004393747359527, "grad_norm": 1.0433707237243652, "learning_rate": 2.156941478598409e-05, "loss": 0.19652175903320312, "step": 2962 }, { "epoch": 0.4005745669623997, "grad_norm": 0.9381135106086731, "learning_rate": 2.156326100234482e-05, "loss": 0.1557760238647461, "step": 2963 }, { "epoch": 0.40070975918884666, "grad_norm": 1.6645901203155518, "learning_rate": 2.1557105852199612e-05, "loss": 0.20635509490966797, "step": 2964 }, { "epoch": 0.40084495141529364, "grad_norm": 1.121830701828003, "learning_rate": 2.155094933683e-05, "loss": 0.1860337257385254, "step": 2965 }, { "epoch": 0.4009801436417406, "grad_norm": 0.8452627062797546, "learning_rate": 2.1544791457517802e-05, "loss": 0.16274452209472656, "step": 2966 }, { "epoch": 0.4011153358681876, "grad_norm": 1.5445414781570435, "learning_rate": 2.1538632215545126e-05, "loss": 0.17372894287109375, "step": 2967 }, { "epoch": 0.4012505280946346, "grad_norm": 0.8613083958625793, "learning_rate": 2.153247161219435e-05, "loss": 0.15696048736572266, "step": 2968 }, { "epoch": 0.40138572032108155, "grad_norm": 0.9513514637947083, "learning_rate": 2.1526309648748147e-05, "loss": 0.18019723892211914, "step": 2969 }, { "epoch": 0.40152091254752853, "grad_norm": 1.529106855392456, "learning_rate": 2.1520146326489476e-05, "loss": 0.1893787384033203, "step": 2970 }, { "epoch": 0.4016561047739755, "grad_norm": 1.6951128244400024, "learning_rate": 2.151398164670157e-05, "loss": 0.19144916534423828, "step": 2971 }, { "epoch": 0.4017912970004225, "grad_norm": 0.7875958681106567, "learning_rate": 2.1507815610667948e-05, "loss": 0.15167522430419922, "step": 2972 }, { "epoch": 0.40192648922686947, "grad_norm": 1.6109178066253662, "learning_rate": 2.1501648219672407e-05, "loss": 0.1681361198425293, "step": 2973 }, { "epoch": 0.40206168145331644, "grad_norm": 3.321265459060669, "learning_rate": 2.149547947499904e-05, "loss": 0.2428741455078125, "step": 2974 }, { "epoch": 0.4021968736797634, "grad_norm": 1.5932071208953857, "learning_rate": 2.1489309377932212e-05, "loss": 0.1852436065673828, "step": 2975 }, { "epoch": 0.4023320659062104, "grad_norm": 1.3729808330535889, "learning_rate": 2.1483137929756562e-05, "loss": 0.1788616180419922, "step": 2976 }, { "epoch": 0.4024672581326574, "grad_norm": 1.4057748317718506, "learning_rate": 2.147696513175702e-05, "loss": 0.17625093460083008, "step": 2977 }, { "epoch": 0.40260245035910436, "grad_norm": 1.1885308027267456, "learning_rate": 2.1470790985218804e-05, "loss": 0.17126131057739258, "step": 2978 }, { "epoch": 0.40273764258555134, "grad_norm": 1.4859938621520996, "learning_rate": 2.1464615491427393e-05, "loss": 0.18955612182617188, "step": 2979 }, { "epoch": 0.4028728348119983, "grad_norm": 1.176913857460022, "learning_rate": 2.1458438651668567e-05, "loss": 0.14587020874023438, "step": 2980 }, { "epoch": 0.4030080270384453, "grad_norm": 1.2134369611740112, "learning_rate": 2.1452260467228376e-05, "loss": 0.1571359634399414, "step": 2981 }, { "epoch": 0.40314321926489227, "grad_norm": 1.9966589212417603, "learning_rate": 2.144608093939314e-05, "loss": 0.2558879852294922, "step": 2982 }, { "epoch": 0.40327841149133925, "grad_norm": 1.0463056564331055, "learning_rate": 2.1439900069449483e-05, "loss": 0.16789817810058594, "step": 2983 }, { "epoch": 0.40341360371778623, "grad_norm": 3.088319778442383, "learning_rate": 2.1433717858684286e-05, "loss": 0.21585512161254883, "step": 2984 }, { "epoch": 0.4035487959442332, "grad_norm": 1.1387271881103516, "learning_rate": 2.1427534308384724e-05, "loss": 0.17087364196777344, "step": 2985 }, { "epoch": 0.4036839881706802, "grad_norm": 1.410264492034912, "learning_rate": 2.1421349419838245e-05, "loss": 0.2035980224609375, "step": 2986 }, { "epoch": 0.40381918039712716, "grad_norm": 2.4529712200164795, "learning_rate": 2.1415163194332574e-05, "loss": 0.23231029510498047, "step": 2987 }, { "epoch": 0.40395437262357414, "grad_norm": 2.6798970699310303, "learning_rate": 2.1408975633155715e-05, "loss": 0.1841106414794922, "step": 2988 }, { "epoch": 0.4040895648500211, "grad_norm": 1.6458081007003784, "learning_rate": 2.140278673759595e-05, "loss": 0.16478300094604492, "step": 2989 }, { "epoch": 0.4042247570764681, "grad_norm": 0.8627346158027649, "learning_rate": 2.1396596508941847e-05, "loss": 0.20490455627441406, "step": 2990 }, { "epoch": 0.4043599493029151, "grad_norm": 1.2831915616989136, "learning_rate": 2.1390404948482238e-05, "loss": 0.1735677719116211, "step": 2991 }, { "epoch": 0.40449514152936206, "grad_norm": 1.3130743503570557, "learning_rate": 2.1384212057506243e-05, "loss": 0.20348739624023438, "step": 2992 }, { "epoch": 0.40463033375580904, "grad_norm": 0.8924576640129089, "learning_rate": 2.137801783730325e-05, "loss": 0.16890335083007812, "step": 2993 }, { "epoch": 0.404765525982256, "grad_norm": 1.6816691160202026, "learning_rate": 2.137182228916293e-05, "loss": 0.16756820678710938, "step": 2994 }, { "epoch": 0.404900718208703, "grad_norm": 0.7269234657287598, "learning_rate": 2.136562541437523e-05, "loss": 0.16210460662841797, "step": 2995 }, { "epoch": 0.40503591043514997, "grad_norm": 2.38232421875, "learning_rate": 2.135942721423038e-05, "loss": 0.12434983253479004, "step": 2996 }, { "epoch": 0.40517110266159695, "grad_norm": 0.9577071070671082, "learning_rate": 2.1353227690018865e-05, "loss": 0.14405155181884766, "step": 2997 }, { "epoch": 0.4053062948880439, "grad_norm": 2.890902280807495, "learning_rate": 2.1347026843031467e-05, "loss": 0.23656272888183594, "step": 2998 }, { "epoch": 0.4054414871144909, "grad_norm": 1.5602978467941284, "learning_rate": 2.1340824674559238e-05, "loss": 0.19223594665527344, "step": 2999 }, { "epoch": 0.4055766793409379, "grad_norm": 2.897514581680298, "learning_rate": 2.133462118589349e-05, "loss": 0.19192218780517578, "step": 3000 }, { "epoch": 0.40571187156738486, "grad_norm": 0.7892051935195923, "learning_rate": 2.1328416378325837e-05, "loss": 0.11046719551086426, "step": 3001 }, { "epoch": 0.40584706379383184, "grad_norm": 0.5690903067588806, "learning_rate": 2.1322210253148144e-05, "loss": 0.13419723510742188, "step": 3002 }, { "epoch": 0.4059822560202788, "grad_norm": 1.5261783599853516, "learning_rate": 2.131600281165257e-05, "loss": 0.15524673461914062, "step": 3003 }, { "epoch": 0.4061174482467258, "grad_norm": 2.154695749282837, "learning_rate": 2.130979405513152e-05, "loss": 0.1865830421447754, "step": 3004 }, { "epoch": 0.4062526404731728, "grad_norm": 4.571969032287598, "learning_rate": 2.1303583984877697e-05, "loss": 0.24930477142333984, "step": 3005 }, { "epoch": 0.40638783269961976, "grad_norm": 1.1895395517349243, "learning_rate": 2.1297372602184085e-05, "loss": 0.14764881134033203, "step": 3006 }, { "epoch": 0.40652302492606673, "grad_norm": 1.1962355375289917, "learning_rate": 2.1291159908343907e-05, "loss": 0.20009803771972656, "step": 3007 }, { "epoch": 0.4066582171525137, "grad_norm": 1.4184571504592896, "learning_rate": 2.1284945904650693e-05, "loss": 0.23280048370361328, "step": 3008 }, { "epoch": 0.4067934093789607, "grad_norm": 1.2918347120285034, "learning_rate": 2.127873059239822e-05, "loss": 0.20061492919921875, "step": 3009 }, { "epoch": 0.40692860160540767, "grad_norm": 1.2394102811813354, "learning_rate": 2.127251397288056e-05, "loss": 0.16162443161010742, "step": 3010 }, { "epoch": 0.40706379383185465, "grad_norm": 1.4771368503570557, "learning_rate": 2.126629604739204e-05, "loss": 0.19285202026367188, "step": 3011 }, { "epoch": 0.4071989860583016, "grad_norm": 0.8390077948570251, "learning_rate": 2.1260076817227268e-05, "loss": 0.19829559326171875, "step": 3012 }, { "epoch": 0.4073341782847486, "grad_norm": 0.9760518074035645, "learning_rate": 2.1253856283681122e-05, "loss": 0.176422119140625, "step": 3013 }, { "epoch": 0.4074693705111956, "grad_norm": 0.837346613407135, "learning_rate": 2.1247634448048743e-05, "loss": 0.19060707092285156, "step": 3014 }, { "epoch": 0.40760456273764256, "grad_norm": 1.304717779159546, "learning_rate": 2.1241411311625562e-05, "loss": 0.1276702880859375, "step": 3015 }, { "epoch": 0.40773975496408954, "grad_norm": 0.9274672865867615, "learning_rate": 2.1235186875707257e-05, "loss": 0.1623210906982422, "step": 3016 }, { "epoch": 0.4078749471905365, "grad_norm": 1.5505789518356323, "learning_rate": 2.1228961141589797e-05, "loss": 0.20755767822265625, "step": 3017 }, { "epoch": 0.4080101394169835, "grad_norm": 0.6763966083526611, "learning_rate": 2.122273411056941e-05, "loss": 0.1399059295654297, "step": 3018 }, { "epoch": 0.4081453316434305, "grad_norm": 2.3691086769104004, "learning_rate": 2.1216505783942592e-05, "loss": 0.1951141357421875, "step": 3019 }, { "epoch": 0.4082805238698775, "grad_norm": 1.3408936262130737, "learning_rate": 2.121027616300613e-05, "loss": 0.2190837860107422, "step": 3020 }, { "epoch": 0.4084157160963245, "grad_norm": 0.9116777777671814, "learning_rate": 2.1204045249057043e-05, "loss": 0.1967792510986328, "step": 3021 }, { "epoch": 0.40855090832277147, "grad_norm": 2.6176626682281494, "learning_rate": 2.119781304339266e-05, "loss": 0.19192123413085938, "step": 3022 }, { "epoch": 0.40868610054921845, "grad_norm": 1.1024119853973389, "learning_rate": 2.1191579547310547e-05, "loss": 0.17528629302978516, "step": 3023 }, { "epoch": 0.4088212927756654, "grad_norm": 0.7836039662361145, "learning_rate": 2.1185344762108556e-05, "loss": 0.16887688636779785, "step": 3024 }, { "epoch": 0.4089564850021124, "grad_norm": 1.98976731300354, "learning_rate": 2.11791086890848e-05, "loss": 0.1696305274963379, "step": 3025 }, { "epoch": 0.4090916772285594, "grad_norm": 1.0534249544143677, "learning_rate": 2.1172871329537662e-05, "loss": 0.170501708984375, "step": 3026 }, { "epoch": 0.40922686945500636, "grad_norm": 0.6537767648696899, "learning_rate": 2.1166632684765794e-05, "loss": 0.12562775611877441, "step": 3027 }, { "epoch": 0.40936206168145334, "grad_norm": 3.7160472869873047, "learning_rate": 2.1160392756068124e-05, "loss": 0.24985885620117188, "step": 3028 }, { "epoch": 0.4094972539079003, "grad_norm": 1.416527509689331, "learning_rate": 2.1154151544743826e-05, "loss": 0.19873285293579102, "step": 3029 }, { "epoch": 0.4096324461343473, "grad_norm": 0.950940728187561, "learning_rate": 2.114790905209236e-05, "loss": 0.1541004180908203, "step": 3030 }, { "epoch": 0.4097676383607943, "grad_norm": 1.276268720626831, "learning_rate": 2.1141665279413444e-05, "loss": 0.17284393310546875, "step": 3031 }, { "epoch": 0.40990283058724125, "grad_norm": 0.7973108887672424, "learning_rate": 2.1135420228007062e-05, "loss": 0.1438283920288086, "step": 3032 }, { "epoch": 0.41003802281368823, "grad_norm": 0.7642303705215454, "learning_rate": 2.1129173899173474e-05, "loss": 0.1288890838623047, "step": 3033 }, { "epoch": 0.4101732150401352, "grad_norm": 1.076489806175232, "learning_rate": 2.11229262942132e-05, "loss": 0.21950244903564453, "step": 3034 }, { "epoch": 0.4103084072665822, "grad_norm": 1.766666293144226, "learning_rate": 2.1116677414427008e-05, "loss": 0.2514934539794922, "step": 3035 }, { "epoch": 0.41044359949302917, "grad_norm": 1.5698294639587402, "learning_rate": 2.1110427261115972e-05, "loss": 0.22774696350097656, "step": 3036 }, { "epoch": 0.41057879171947614, "grad_norm": 1.140944242477417, "learning_rate": 2.1104175835581386e-05, "loss": 0.2371673583984375, "step": 3037 }, { "epoch": 0.4107139839459231, "grad_norm": 3.5694587230682373, "learning_rate": 2.1097923139124846e-05, "loss": 0.2121124267578125, "step": 3038 }, { "epoch": 0.4108491761723701, "grad_norm": 2.5289413928985596, "learning_rate": 2.109166917304819e-05, "loss": 0.2229328155517578, "step": 3039 }, { "epoch": 0.4109843683988171, "grad_norm": 1.4896842241287231, "learning_rate": 2.1085413938653532e-05, "loss": 0.18973636627197266, "step": 3040 }, { "epoch": 0.41111956062526406, "grad_norm": 1.3239771127700806, "learning_rate": 2.107915743724323e-05, "loss": 0.17284321784973145, "step": 3041 }, { "epoch": 0.41125475285171104, "grad_norm": 0.9550789594650269, "learning_rate": 2.1072899670119935e-05, "loss": 0.18607521057128906, "step": 3042 }, { "epoch": 0.411389945078158, "grad_norm": 1.1535742282867432, "learning_rate": 2.1066640638586543e-05, "loss": 0.22809982299804688, "step": 3043 }, { "epoch": 0.411525137304605, "grad_norm": 0.6176862716674805, "learning_rate": 2.1060380343946223e-05, "loss": 0.11718082427978516, "step": 3044 }, { "epoch": 0.41166032953105197, "grad_norm": 1.288192868232727, "learning_rate": 2.10541187875024e-05, "loss": 0.17744064331054688, "step": 3045 }, { "epoch": 0.41179552175749895, "grad_norm": 1.1375256776809692, "learning_rate": 2.1047855970558753e-05, "loss": 0.11963081359863281, "step": 3046 }, { "epoch": 0.41193071398394593, "grad_norm": 1.8950451612472534, "learning_rate": 2.1041591894419244e-05, "loss": 0.19255638122558594, "step": 3047 }, { "epoch": 0.4120659062103929, "grad_norm": 1.9351564645767212, "learning_rate": 2.1035326560388087e-05, "loss": 0.24155616760253906, "step": 3048 }, { "epoch": 0.4122010984368399, "grad_norm": 1.6658726930618286, "learning_rate": 2.1029059969769756e-05, "loss": 0.19255447387695312, "step": 3049 }, { "epoch": 0.41233629066328686, "grad_norm": 2.594334363937378, "learning_rate": 2.1022792123868986e-05, "loss": 0.22394180297851562, "step": 3050 }, { "epoch": 0.41247148288973384, "grad_norm": 1.028749942779541, "learning_rate": 2.1016523023990783e-05, "loss": 0.19150543212890625, "step": 3051 }, { "epoch": 0.4126066751161808, "grad_norm": 0.5970864295959473, "learning_rate": 2.1010252671440398e-05, "loss": 0.11243534088134766, "step": 3052 }, { "epoch": 0.4127418673426278, "grad_norm": 1.1464234590530396, "learning_rate": 2.1003981067523358e-05, "loss": 0.15420818328857422, "step": 3053 }, { "epoch": 0.4128770595690748, "grad_norm": 1.7887133359909058, "learning_rate": 2.099770821354544e-05, "loss": 0.19118118286132812, "step": 3054 }, { "epoch": 0.41301225179552176, "grad_norm": 1.2951503992080688, "learning_rate": 2.0991434110812692e-05, "loss": 0.14705228805541992, "step": 3055 }, { "epoch": 0.41314744402196873, "grad_norm": 2.0299723148345947, "learning_rate": 2.0985158760631415e-05, "loss": 0.19005227088928223, "step": 3056 }, { "epoch": 0.4132826362484157, "grad_norm": 1.116276741027832, "learning_rate": 2.0978882164308157e-05, "loss": 0.16593730449676514, "step": 3057 }, { "epoch": 0.4134178284748627, "grad_norm": 1.097907543182373, "learning_rate": 2.0972604323149755e-05, "loss": 0.16720247268676758, "step": 3058 }, { "epoch": 0.41355302070130967, "grad_norm": 0.9085415601730347, "learning_rate": 2.0966325238463283e-05, "loss": 0.23619651794433594, "step": 3059 }, { "epoch": 0.41368821292775665, "grad_norm": 0.7797397971153259, "learning_rate": 2.096004491155608e-05, "loss": 0.1812114715576172, "step": 3060 }, { "epoch": 0.4138234051542036, "grad_norm": 1.6356220245361328, "learning_rate": 2.0953763343735746e-05, "loss": 0.2852611541748047, "step": 3061 }, { "epoch": 0.4139585973806506, "grad_norm": 0.9383690357208252, "learning_rate": 2.0947480536310133e-05, "loss": 0.16326236724853516, "step": 3062 }, { "epoch": 0.4140937896070976, "grad_norm": 2.589324474334717, "learning_rate": 2.0941196490587352e-05, "loss": 0.2070094347000122, "step": 3063 }, { "epoch": 0.41422898183354456, "grad_norm": 0.9352585673332214, "learning_rate": 2.0934911207875782e-05, "loss": 0.19829416275024414, "step": 3064 }, { "epoch": 0.41436417405999154, "grad_norm": 1.3081955909729004, "learning_rate": 2.092862468948405e-05, "loss": 0.24007797241210938, "step": 3065 }, { "epoch": 0.4144993662864385, "grad_norm": 1.246611475944519, "learning_rate": 2.0922336936721044e-05, "loss": 0.2225322723388672, "step": 3066 }, { "epoch": 0.4146345585128855, "grad_norm": 0.7423734068870544, "learning_rate": 2.0916047950895907e-05, "loss": 0.1749706268310547, "step": 3067 }, { "epoch": 0.4147697507393325, "grad_norm": 3.3691582679748535, "learning_rate": 2.0909757733318035e-05, "loss": 0.19530868530273438, "step": 3068 }, { "epoch": 0.41490494296577946, "grad_norm": 2.329500675201416, "learning_rate": 2.090346628529709e-05, "loss": 0.21277809143066406, "step": 3069 }, { "epoch": 0.41504013519222643, "grad_norm": 0.8931111097335815, "learning_rate": 2.089717360814298e-05, "loss": 0.2049236297607422, "step": 3070 }, { "epoch": 0.4151753274186734, "grad_norm": 0.7539544701576233, "learning_rate": 2.0890879703165885e-05, "loss": 0.16530132293701172, "step": 3071 }, { "epoch": 0.4153105196451204, "grad_norm": 1.1251816749572754, "learning_rate": 2.0884584571676217e-05, "loss": 0.14655494689941406, "step": 3072 }, { "epoch": 0.41544571187156737, "grad_norm": 2.418135404586792, "learning_rate": 2.0878288214984657e-05, "loss": 0.22464561462402344, "step": 3073 }, { "epoch": 0.41558090409801435, "grad_norm": 1.0123423337936401, "learning_rate": 2.0871990634402147e-05, "loss": 0.19751358032226562, "step": 3074 }, { "epoch": 0.4157160963244613, "grad_norm": 1.1898612976074219, "learning_rate": 2.0865691831239877e-05, "loss": 0.18123340606689453, "step": 3075 }, { "epoch": 0.4158512885509083, "grad_norm": 1.1554439067840576, "learning_rate": 2.0859391806809285e-05, "loss": 0.21947240829467773, "step": 3076 }, { "epoch": 0.4159864807773553, "grad_norm": 2.1257498264312744, "learning_rate": 2.0853090562422072e-05, "loss": 0.16626596450805664, "step": 3077 }, { "epoch": 0.41612167300380226, "grad_norm": 1.8598438501358032, "learning_rate": 2.084678809939019e-05, "loss": 0.14995288848876953, "step": 3078 }, { "epoch": 0.41625686523024924, "grad_norm": 2.615832805633545, "learning_rate": 2.084048441902585e-05, "loss": 0.22571182250976562, "step": 3079 }, { "epoch": 0.4163920574566962, "grad_norm": 0.9203280806541443, "learning_rate": 2.0834179522641508e-05, "loss": 0.15561556816101074, "step": 3080 }, { "epoch": 0.4165272496831432, "grad_norm": 1.5997341871261597, "learning_rate": 2.0827873411549877e-05, "loss": 0.21852684020996094, "step": 3081 }, { "epoch": 0.4166624419095902, "grad_norm": 1.297373652458191, "learning_rate": 2.0821566087063926e-05, "loss": 0.20093536376953125, "step": 3082 }, { "epoch": 0.41679763413603715, "grad_norm": 1.8051400184631348, "learning_rate": 2.081525755049687e-05, "loss": 0.21544265747070312, "step": 3083 }, { "epoch": 0.41693282636248413, "grad_norm": 1.1666992902755737, "learning_rate": 2.0808947803162182e-05, "loss": 0.15682530403137207, "step": 3084 }, { "epoch": 0.4170680185889311, "grad_norm": 0.9536730051040649, "learning_rate": 2.0802636846373578e-05, "loss": 0.13433504104614258, "step": 3085 }, { "epoch": 0.4172032108153781, "grad_norm": 1.1640393733978271, "learning_rate": 2.0796324681445045e-05, "loss": 0.2030181884765625, "step": 3086 }, { "epoch": 0.41733840304182507, "grad_norm": 0.8382679224014282, "learning_rate": 2.0790011309690806e-05, "loss": 0.1530303955078125, "step": 3087 }, { "epoch": 0.4174735952682721, "grad_norm": 1.199198603630066, "learning_rate": 2.0783696732425332e-05, "loss": 0.2301959991455078, "step": 3088 }, { "epoch": 0.4176087874947191, "grad_norm": 1.8117302656173706, "learning_rate": 2.0777380950963355e-05, "loss": 0.16292476654052734, "step": 3089 }, { "epoch": 0.41774397972116606, "grad_norm": 1.130401849746704, "learning_rate": 2.0771063966619854e-05, "loss": 0.19435930252075195, "step": 3090 }, { "epoch": 0.41787917194761304, "grad_norm": 2.5911643505096436, "learning_rate": 2.0764745780710065e-05, "loss": 0.23030471801757812, "step": 3091 }, { "epoch": 0.41801436417406, "grad_norm": 0.7333688139915466, "learning_rate": 2.075842639454946e-05, "loss": 0.1255178451538086, "step": 3092 }, { "epoch": 0.418149556400507, "grad_norm": 1.3307783603668213, "learning_rate": 2.075210580945378e-05, "loss": 0.18999814987182617, "step": 3093 }, { "epoch": 0.418284748626954, "grad_norm": 0.862221896648407, "learning_rate": 2.0745784026738984e-05, "loss": 0.17731380462646484, "step": 3094 }, { "epoch": 0.41841994085340095, "grad_norm": 1.3271745443344116, "learning_rate": 2.073946104772132e-05, "loss": 0.21765542030334473, "step": 3095 }, { "epoch": 0.41855513307984793, "grad_norm": 1.9001739025115967, "learning_rate": 2.0733136873717258e-05, "loss": 0.14759445190429688, "step": 3096 }, { "epoch": 0.4186903253062949, "grad_norm": 1.3153339624404907, "learning_rate": 2.0726811506043527e-05, "loss": 0.18173694610595703, "step": 3097 }, { "epoch": 0.4188255175327419, "grad_norm": 1.5945225954055786, "learning_rate": 2.0720484946017104e-05, "loss": 0.16118812561035156, "step": 3098 }, { "epoch": 0.41896070975918887, "grad_norm": 1.2689669132232666, "learning_rate": 2.0714157194955202e-05, "loss": 0.20510482788085938, "step": 3099 }, { "epoch": 0.41909590198563584, "grad_norm": 0.6630727648735046, "learning_rate": 2.070782825417531e-05, "loss": 0.14899134635925293, "step": 3100 }, { "epoch": 0.4192310942120828, "grad_norm": 1.7634410858154297, "learning_rate": 2.0701498124995127e-05, "loss": 0.2501850128173828, "step": 3101 }, { "epoch": 0.4193662864385298, "grad_norm": 1.1146520376205444, "learning_rate": 2.069516680873264e-05, "loss": 0.20702743530273438, "step": 3102 }, { "epoch": 0.4195014786649768, "grad_norm": 1.5814372301101685, "learning_rate": 2.0688834306706047e-05, "loss": 0.18718814849853516, "step": 3103 }, { "epoch": 0.41963667089142376, "grad_norm": 2.1391141414642334, "learning_rate": 2.0682500620233815e-05, "loss": 0.21801185607910156, "step": 3104 }, { "epoch": 0.41977186311787074, "grad_norm": 2.015735149383545, "learning_rate": 2.0676165750634656e-05, "loss": 0.14991378784179688, "step": 3105 }, { "epoch": 0.4199070553443177, "grad_norm": 1.3970814943313599, "learning_rate": 2.0669829699227513e-05, "loss": 0.1437692642211914, "step": 3106 }, { "epoch": 0.4200422475707647, "grad_norm": 1.0428743362426758, "learning_rate": 2.06634924673316e-05, "loss": 0.1648116111755371, "step": 3107 }, { "epoch": 0.42017743979721167, "grad_norm": 3.6171913146972656, "learning_rate": 2.0657154056266346e-05, "loss": 0.31303977966308594, "step": 3108 }, { "epoch": 0.42031263202365865, "grad_norm": 1.9231373071670532, "learning_rate": 2.0650814467351452e-05, "loss": 0.18438935279846191, "step": 3109 }, { "epoch": 0.42044782425010563, "grad_norm": 0.8312044143676758, "learning_rate": 2.064447370190685e-05, "loss": 0.1562342643737793, "step": 3110 }, { "epoch": 0.4205830164765526, "grad_norm": 1.5872935056686401, "learning_rate": 2.0638131761252724e-05, "loss": 0.20205354690551758, "step": 3111 }, { "epoch": 0.4207182087029996, "grad_norm": 2.4010915756225586, "learning_rate": 2.06317886467095e-05, "loss": 0.24871063232421875, "step": 3112 }, { "epoch": 0.42085340092944656, "grad_norm": 1.1522927284240723, "learning_rate": 2.0625444359597847e-05, "loss": 0.16045093536376953, "step": 3113 }, { "epoch": 0.42098859315589354, "grad_norm": 1.4989211559295654, "learning_rate": 2.0619098901238684e-05, "loss": 0.18958330154418945, "step": 3114 }, { "epoch": 0.4211237853823405, "grad_norm": 2.8187615871429443, "learning_rate": 2.0612752272953158e-05, "loss": 0.1924142837524414, "step": 3115 }, { "epoch": 0.4212589776087875, "grad_norm": 1.6889748573303223, "learning_rate": 2.060640447606268e-05, "loss": 0.14674735069274902, "step": 3116 }, { "epoch": 0.4213941698352345, "grad_norm": 1.0009809732437134, "learning_rate": 2.0600055511888895e-05, "loss": 0.16613101959228516, "step": 3117 }, { "epoch": 0.42152936206168146, "grad_norm": 0.9665440320968628, "learning_rate": 2.059370538175369e-05, "loss": 0.1789846420288086, "step": 3118 }, { "epoch": 0.42166455428812843, "grad_norm": 1.3382807970046997, "learning_rate": 2.0587354086979194e-05, "loss": 0.13223552703857422, "step": 3119 }, { "epoch": 0.4217997465145754, "grad_norm": 2.7905945777893066, "learning_rate": 2.0581001628887785e-05, "loss": 0.21348845958709717, "step": 3120 }, { "epoch": 0.4219349387410224, "grad_norm": 1.914476990699768, "learning_rate": 2.057464800880207e-05, "loss": 0.1931157112121582, "step": 3121 }, { "epoch": 0.42207013096746937, "grad_norm": 1.1248724460601807, "learning_rate": 2.0568293228044914e-05, "loss": 0.19129371643066406, "step": 3122 }, { "epoch": 0.42220532319391635, "grad_norm": 0.9605360627174377, "learning_rate": 2.0561937287939413e-05, "loss": 0.13921260833740234, "step": 3123 }, { "epoch": 0.4223405154203633, "grad_norm": 1.276961088180542, "learning_rate": 2.055558018980891e-05, "loss": 0.1785411834716797, "step": 3124 }, { "epoch": 0.4224757076468103, "grad_norm": 1.0074282884597778, "learning_rate": 2.0549221934976987e-05, "loss": 0.1901702880859375, "step": 3125 }, { "epoch": 0.4226108998732573, "grad_norm": 0.9029681086540222, "learning_rate": 2.054286252476746e-05, "loss": 0.1786508560180664, "step": 3126 }, { "epoch": 0.42274609209970426, "grad_norm": 1.8974783420562744, "learning_rate": 2.05365019605044e-05, "loss": 0.16581153869628906, "step": 3127 }, { "epoch": 0.42288128432615124, "grad_norm": 0.8541675806045532, "learning_rate": 2.053014024351211e-05, "loss": 0.1798553466796875, "step": 3128 }, { "epoch": 0.4230164765525982, "grad_norm": 1.5723378658294678, "learning_rate": 2.0523777375115133e-05, "loss": 0.17410755157470703, "step": 3129 }, { "epoch": 0.4231516687790452, "grad_norm": 0.8442850112915039, "learning_rate": 2.0517413356638245e-05, "loss": 0.1694507598876953, "step": 3130 }, { "epoch": 0.4232868610054922, "grad_norm": 1.1846411228179932, "learning_rate": 2.0511048189406472e-05, "loss": 0.1890854835510254, "step": 3131 }, { "epoch": 0.42342205323193916, "grad_norm": 1.237890601158142, "learning_rate": 2.0504681874745082e-05, "loss": 0.17635726928710938, "step": 3132 }, { "epoch": 0.42355724545838613, "grad_norm": 1.8898024559020996, "learning_rate": 2.049831441397957e-05, "loss": 0.1778106689453125, "step": 3133 }, { "epoch": 0.4236924376848331, "grad_norm": 0.9419623613357544, "learning_rate": 2.0491945808435674e-05, "loss": 0.21422576904296875, "step": 3134 }, { "epoch": 0.4238276299112801, "grad_norm": 1.1858009099960327, "learning_rate": 2.048557605943938e-05, "loss": 0.10719728469848633, "step": 3135 }, { "epoch": 0.42396282213772707, "grad_norm": 0.794576108455658, "learning_rate": 2.047920516831689e-05, "loss": 0.1824202537536621, "step": 3136 }, { "epoch": 0.42409801436417405, "grad_norm": 0.9986293315887451, "learning_rate": 2.047283313639467e-05, "loss": 0.22907638549804688, "step": 3137 }, { "epoch": 0.424233206590621, "grad_norm": 1.4526969194412231, "learning_rate": 2.0466459964999408e-05, "loss": 0.16235113143920898, "step": 3138 }, { "epoch": 0.424368398817068, "grad_norm": 0.857375979423523, "learning_rate": 2.0460085655458025e-05, "loss": 0.15218162536621094, "step": 3139 }, { "epoch": 0.424503591043515, "grad_norm": 0.9026086330413818, "learning_rate": 2.0453710209097697e-05, "loss": 0.20074081420898438, "step": 3140 }, { "epoch": 0.42463878326996196, "grad_norm": 1.767188310623169, "learning_rate": 2.044733362724582e-05, "loss": 0.19459891319274902, "step": 3141 }, { "epoch": 0.42477397549640894, "grad_norm": 1.2165381908416748, "learning_rate": 2.0440955911230028e-05, "loss": 0.19650840759277344, "step": 3142 }, { "epoch": 0.4249091677228559, "grad_norm": 1.5772616863250732, "learning_rate": 2.0434577062378203e-05, "loss": 0.21356773376464844, "step": 3143 }, { "epoch": 0.4250443599493029, "grad_norm": 1.4389405250549316, "learning_rate": 2.0428197082018458e-05, "loss": 0.2306346893310547, "step": 3144 }, { "epoch": 0.4251795521757499, "grad_norm": 1.2169252634048462, "learning_rate": 2.042181597147913e-05, "loss": 0.21268272399902344, "step": 3145 }, { "epoch": 0.42531474440219685, "grad_norm": 1.5070998668670654, "learning_rate": 2.0415433732088806e-05, "loss": 0.18635624647140503, "step": 3146 }, { "epoch": 0.42544993662864383, "grad_norm": 1.115201711654663, "learning_rate": 2.0409050365176294e-05, "loss": 0.20785045623779297, "step": 3147 }, { "epoch": 0.4255851288550908, "grad_norm": 1.2995914220809937, "learning_rate": 2.0402665872070656e-05, "loss": 0.22623825073242188, "step": 3148 }, { "epoch": 0.4257203210815378, "grad_norm": 0.7083000540733337, "learning_rate": 2.0396280254101172e-05, "loss": 0.11201667785644531, "step": 3149 }, { "epoch": 0.42585551330798477, "grad_norm": 2.1817712783813477, "learning_rate": 2.0389893512597364e-05, "loss": 0.17748546600341797, "step": 3150 }, { "epoch": 0.42599070553443175, "grad_norm": 0.8166563510894775, "learning_rate": 2.0383505648888986e-05, "loss": 0.1644129753112793, "step": 3151 }, { "epoch": 0.4261258977608787, "grad_norm": 0.7137550115585327, "learning_rate": 2.037711666430602e-05, "loss": 0.16689682006835938, "step": 3152 }, { "epoch": 0.4262610899873257, "grad_norm": 0.8674426674842834, "learning_rate": 2.0370726560178693e-05, "loss": 0.20224666595458984, "step": 3153 }, { "epoch": 0.4263962822137727, "grad_norm": 1.0230633020401, "learning_rate": 2.036433533783745e-05, "loss": 0.15178608894348145, "step": 3154 }, { "epoch": 0.42653147444021966, "grad_norm": 0.762922465801239, "learning_rate": 2.0357942998612988e-05, "loss": 0.1956329345703125, "step": 3155 }, { "epoch": 0.4266666666666667, "grad_norm": 1.6153441667556763, "learning_rate": 2.0351549543836224e-05, "loss": 0.1943340301513672, "step": 3156 }, { "epoch": 0.4268018588931137, "grad_norm": 1.0588141679763794, "learning_rate": 2.0345154974838307e-05, "loss": 0.13901329040527344, "step": 3157 }, { "epoch": 0.42693705111956065, "grad_norm": 1.4903565645217896, "learning_rate": 2.0338759292950618e-05, "loss": 0.20383358001708984, "step": 3158 }, { "epoch": 0.42707224334600763, "grad_norm": 1.0255467891693115, "learning_rate": 2.033236249950477e-05, "loss": 0.14542841911315918, "step": 3159 }, { "epoch": 0.4272074355724546, "grad_norm": 0.7896550297737122, "learning_rate": 2.0325964595832618e-05, "loss": 0.15297317504882812, "step": 3160 }, { "epoch": 0.4273426277989016, "grad_norm": 1.841535210609436, "learning_rate": 2.031956558326624e-05, "loss": 0.2152252197265625, "step": 3161 }, { "epoch": 0.42747782002534856, "grad_norm": 0.9769876599311829, "learning_rate": 2.0313165463137935e-05, "loss": 0.16324138641357422, "step": 3162 }, { "epoch": 0.42761301225179554, "grad_norm": 1.0629949569702148, "learning_rate": 2.030676423678025e-05, "loss": 0.1883869171142578, "step": 3163 }, { "epoch": 0.4277482044782425, "grad_norm": 1.3850748538970947, "learning_rate": 2.030036190552595e-05, "loss": 0.2057476043701172, "step": 3164 }, { "epoch": 0.4278833967046895, "grad_norm": 1.2737865447998047, "learning_rate": 2.029395847070803e-05, "loss": 0.21024131774902344, "step": 3165 }, { "epoch": 0.4280185889311365, "grad_norm": 1.1816891431808472, "learning_rate": 2.0287553933659735e-05, "loss": 0.18680477142333984, "step": 3166 }, { "epoch": 0.42815378115758346, "grad_norm": 1.6376953125, "learning_rate": 2.0281148295714512e-05, "loss": 0.21474647521972656, "step": 3167 }, { "epoch": 0.42828897338403044, "grad_norm": 1.059326171875, "learning_rate": 2.027474155820605e-05, "loss": 0.18851947784423828, "step": 3168 }, { "epoch": 0.4284241656104774, "grad_norm": 3.019373893737793, "learning_rate": 2.026833372246827e-05, "loss": 0.2694053649902344, "step": 3169 }, { "epoch": 0.4285593578369244, "grad_norm": 1.0689465999603271, "learning_rate": 2.026192478983531e-05, "loss": 0.19356346130371094, "step": 3170 }, { "epoch": 0.42869455006337137, "grad_norm": 2.4855387210845947, "learning_rate": 2.0255514761641555e-05, "loss": 0.21857261657714844, "step": 3171 }, { "epoch": 0.42882974228981835, "grad_norm": 2.4556994438171387, "learning_rate": 2.0249103639221597e-05, "loss": 0.15001726150512695, "step": 3172 }, { "epoch": 0.42896493451626533, "grad_norm": 2.4733104705810547, "learning_rate": 2.024269142391027e-05, "loss": 0.21355438232421875, "step": 3173 }, { "epoch": 0.4291001267427123, "grad_norm": 1.1895849704742432, "learning_rate": 2.023627811704263e-05, "loss": 0.1783924102783203, "step": 3174 }, { "epoch": 0.4292353189691593, "grad_norm": 1.321651816368103, "learning_rate": 2.0229863719953963e-05, "loss": 0.2277669906616211, "step": 3175 }, { "epoch": 0.42937051119560626, "grad_norm": 1.9732818603515625, "learning_rate": 2.0223448233979785e-05, "loss": 0.18292999267578125, "step": 3176 }, { "epoch": 0.42950570342205324, "grad_norm": 1.8285123109817505, "learning_rate": 2.0217031660455825e-05, "loss": 0.16721820831298828, "step": 3177 }, { "epoch": 0.4296408956485002, "grad_norm": 1.2731223106384277, "learning_rate": 2.0210614000718054e-05, "loss": 0.1712942123413086, "step": 3178 }, { "epoch": 0.4297760878749472, "grad_norm": 2.2243106365203857, "learning_rate": 2.020419525610266e-05, "loss": 0.13637590408325195, "step": 3179 }, { "epoch": 0.4299112801013942, "grad_norm": 2.3708536624908447, "learning_rate": 2.0197775427946066e-05, "loss": 0.19263267517089844, "step": 3180 }, { "epoch": 0.43004647232784116, "grad_norm": 0.9503238797187805, "learning_rate": 2.0191354517584902e-05, "loss": 0.22425031661987305, "step": 3181 }, { "epoch": 0.43018166455428813, "grad_norm": 0.7855859398841858, "learning_rate": 2.018493252635605e-05, "loss": 0.14455795288085938, "step": 3182 }, { "epoch": 0.4303168567807351, "grad_norm": 1.6721371412277222, "learning_rate": 2.0178509455596598e-05, "loss": 0.15391921997070312, "step": 3183 }, { "epoch": 0.4304520490071821, "grad_norm": 1.3813170194625854, "learning_rate": 2.017208530664386e-05, "loss": 0.14330005645751953, "step": 3184 }, { "epoch": 0.43058724123362907, "grad_norm": 0.6997906565666199, "learning_rate": 2.016566008083538e-05, "loss": 0.1602010726928711, "step": 3185 }, { "epoch": 0.43072243346007605, "grad_norm": 1.4364275932312012, "learning_rate": 2.0159233779508923e-05, "loss": 0.11324286460876465, "step": 3186 }, { "epoch": 0.430857625686523, "grad_norm": 1.631349802017212, "learning_rate": 2.0152806404002482e-05, "loss": 0.1696319580078125, "step": 3187 }, { "epoch": 0.43099281791297, "grad_norm": 1.1576733589172363, "learning_rate": 2.014637795565427e-05, "loss": 0.17049694061279297, "step": 3188 }, { "epoch": 0.431128010139417, "grad_norm": 1.0393377542495728, "learning_rate": 2.0139948435802722e-05, "loss": 0.16859054565429688, "step": 3189 }, { "epoch": 0.43126320236586396, "grad_norm": 0.9363009333610535, "learning_rate": 2.0133517845786504e-05, "loss": 0.17458343505859375, "step": 3190 }, { "epoch": 0.43139839459231094, "grad_norm": 1.162930965423584, "learning_rate": 2.012708618694449e-05, "loss": 0.22877025604248047, "step": 3191 }, { "epoch": 0.4315335868187579, "grad_norm": 1.1705584526062012, "learning_rate": 2.0120653460615795e-05, "loss": 0.16747665405273438, "step": 3192 }, { "epoch": 0.4316687790452049, "grad_norm": 2.058824300765991, "learning_rate": 2.011421966813974e-05, "loss": 0.17766547203063965, "step": 3193 }, { "epoch": 0.4318039712716519, "grad_norm": 1.0490683317184448, "learning_rate": 2.0107784810855882e-05, "loss": 0.1860666275024414, "step": 3194 }, { "epoch": 0.43193916349809885, "grad_norm": 1.3689110279083252, "learning_rate": 2.0101348890103985e-05, "loss": 0.2069377899169922, "step": 3195 }, { "epoch": 0.43207435572454583, "grad_norm": 1.6334882974624634, "learning_rate": 2.0094911907224043e-05, "loss": 0.16409778594970703, "step": 3196 }, { "epoch": 0.4322095479509928, "grad_norm": 1.1300592422485352, "learning_rate": 2.008847386355628e-05, "loss": 0.19797420501708984, "step": 3197 }, { "epoch": 0.4323447401774398, "grad_norm": 1.1207606792449951, "learning_rate": 2.008203476044112e-05, "loss": 0.2129497528076172, "step": 3198 }, { "epoch": 0.43247993240388677, "grad_norm": 1.706355333328247, "learning_rate": 2.007559459921922e-05, "loss": 0.26381683349609375, "step": 3199 }, { "epoch": 0.43261512463033375, "grad_norm": 0.8641300201416016, "learning_rate": 2.0069153381231456e-05, "loss": 0.17691802978515625, "step": 3200 }, { "epoch": 0.4327503168567807, "grad_norm": 2.3879542350769043, "learning_rate": 2.0062711107818933e-05, "loss": 0.19820594787597656, "step": 3201 }, { "epoch": 0.4328855090832277, "grad_norm": 1.6514135599136353, "learning_rate": 2.0056267780322953e-05, "loss": 0.1948375701904297, "step": 3202 }, { "epoch": 0.4330207013096747, "grad_norm": 0.7712211608886719, "learning_rate": 2.004982340008506e-05, "loss": 0.18509674072265625, "step": 3203 }, { "epoch": 0.43315589353612166, "grad_norm": 0.714439332485199, "learning_rate": 2.004337796844701e-05, "loss": 0.1577749252319336, "step": 3204 }, { "epoch": 0.43329108576256864, "grad_norm": 1.6942423582077026, "learning_rate": 2.003693148675077e-05, "loss": 0.1829838752746582, "step": 3205 }, { "epoch": 0.4334262779890156, "grad_norm": 1.2012410163879395, "learning_rate": 2.003048395633853e-05, "loss": 0.20160293579101562, "step": 3206 }, { "epoch": 0.4335614702154626, "grad_norm": 2.221736431121826, "learning_rate": 2.0024035378552708e-05, "loss": 0.17109394073486328, "step": 3207 }, { "epoch": 0.4336966624419096, "grad_norm": 0.9062662720680237, "learning_rate": 2.001758575473593e-05, "loss": 0.13499832153320312, "step": 3208 }, { "epoch": 0.43383185466835655, "grad_norm": 1.8224637508392334, "learning_rate": 2.0011135086231042e-05, "loss": 0.1153252124786377, "step": 3209 }, { "epoch": 0.43396704689480353, "grad_norm": 1.7240568399429321, "learning_rate": 2.0004683374381104e-05, "loss": 0.1922140121459961, "step": 3210 }, { "epoch": 0.4341022391212505, "grad_norm": 1.0431997776031494, "learning_rate": 1.9998230620529395e-05, "loss": 0.12793731689453125, "step": 3211 }, { "epoch": 0.4342374313476975, "grad_norm": 1.7353354692459106, "learning_rate": 1.999177682601942e-05, "loss": 0.20990562438964844, "step": 3212 }, { "epoch": 0.43437262357414447, "grad_norm": 1.041535496711731, "learning_rate": 1.9985321992194896e-05, "loss": 0.1896677017211914, "step": 3213 }, { "epoch": 0.43450781580059145, "grad_norm": 0.7205056548118591, "learning_rate": 1.9978866120399746e-05, "loss": 0.12275075912475586, "step": 3214 }, { "epoch": 0.4346430080270384, "grad_norm": 1.9319335222244263, "learning_rate": 1.9972409211978116e-05, "loss": 0.20810317993164062, "step": 3215 }, { "epoch": 0.4347782002534854, "grad_norm": 0.9305655360221863, "learning_rate": 1.9965951268274373e-05, "loss": 0.16515445709228516, "step": 3216 }, { "epoch": 0.4349133924799324, "grad_norm": 1.829504132270813, "learning_rate": 1.9959492290633093e-05, "loss": 0.20354700088500977, "step": 3217 }, { "epoch": 0.43504858470637936, "grad_norm": 1.781173586845398, "learning_rate": 1.995303228039907e-05, "loss": 0.24132061004638672, "step": 3218 }, { "epoch": 0.43518377693282634, "grad_norm": 1.7699229717254639, "learning_rate": 1.994657123891732e-05, "loss": 0.1830425262451172, "step": 3219 }, { "epoch": 0.4353189691592733, "grad_norm": 1.302095651626587, "learning_rate": 1.9940109167533055e-05, "loss": 0.2108612060546875, "step": 3220 }, { "epoch": 0.4354541613857203, "grad_norm": 1.7591477632522583, "learning_rate": 1.9933646067591716e-05, "loss": 0.12987709045410156, "step": 3221 }, { "epoch": 0.4355893536121673, "grad_norm": 2.4805960655212402, "learning_rate": 1.992718194043896e-05, "loss": 0.19744253158569336, "step": 3222 }, { "epoch": 0.43572454583861425, "grad_norm": 2.7962231636047363, "learning_rate": 1.9920716787420643e-05, "loss": 0.2092113494873047, "step": 3223 }, { "epoch": 0.4358597380650613, "grad_norm": 1.1938811540603638, "learning_rate": 1.9914250609882858e-05, "loss": 0.20505475997924805, "step": 3224 }, { "epoch": 0.43599493029150826, "grad_norm": 1.1170421838760376, "learning_rate": 1.9907783409171885e-05, "loss": 0.2086191177368164, "step": 3225 }, { "epoch": 0.43613012251795524, "grad_norm": 2.823751449584961, "learning_rate": 1.990131518663424e-05, "loss": 0.2563009262084961, "step": 3226 }, { "epoch": 0.4362653147444022, "grad_norm": 0.9505105018615723, "learning_rate": 1.9894845943616632e-05, "loss": 0.1636509895324707, "step": 3227 }, { "epoch": 0.4364005069708492, "grad_norm": 1.3886934518814087, "learning_rate": 1.988837568146599e-05, "loss": 0.2254321575164795, "step": 3228 }, { "epoch": 0.4365356991972962, "grad_norm": 0.9767918586730957, "learning_rate": 1.988190440152947e-05, "loss": 0.1766366958618164, "step": 3229 }, { "epoch": 0.43667089142374316, "grad_norm": 2.4880735874176025, "learning_rate": 1.9875432105154424e-05, "loss": 0.26550865173339844, "step": 3230 }, { "epoch": 0.43680608365019014, "grad_norm": 0.9030943512916565, "learning_rate": 1.9868958793688412e-05, "loss": 0.2024822235107422, "step": 3231 }, { "epoch": 0.4369412758766371, "grad_norm": 1.3507105112075806, "learning_rate": 1.9862484468479213e-05, "loss": 0.16112804412841797, "step": 3232 }, { "epoch": 0.4370764681030841, "grad_norm": 0.9378476738929749, "learning_rate": 1.985600913087482e-05, "loss": 0.1256561279296875, "step": 3233 }, { "epoch": 0.43721166032953107, "grad_norm": 0.9202724099159241, "learning_rate": 1.9849532782223425e-05, "loss": 0.1743021011352539, "step": 3234 }, { "epoch": 0.43734685255597805, "grad_norm": 0.6298282146453857, "learning_rate": 1.9843055423873447e-05, "loss": 0.16308021545410156, "step": 3235 }, { "epoch": 0.43748204478242503, "grad_norm": 0.6978808045387268, "learning_rate": 1.9836577057173507e-05, "loss": 0.1531229019165039, "step": 3236 }, { "epoch": 0.437617237008872, "grad_norm": 1.0858879089355469, "learning_rate": 1.9830097683472427e-05, "loss": 0.19670486450195312, "step": 3237 }, { "epoch": 0.437752429235319, "grad_norm": 0.8587451577186584, "learning_rate": 1.9823617304119252e-05, "loss": 0.16200923919677734, "step": 3238 }, { "epoch": 0.43788762146176596, "grad_norm": 1.0380136966705322, "learning_rate": 1.9817135920463232e-05, "loss": 0.1998729705810547, "step": 3239 }, { "epoch": 0.43802281368821294, "grad_norm": 0.9099397659301758, "learning_rate": 1.9810653533853826e-05, "loss": 0.1688380241394043, "step": 3240 }, { "epoch": 0.4381580059146599, "grad_norm": 1.3872203826904297, "learning_rate": 1.9804170145640706e-05, "loss": 0.2563629150390625, "step": 3241 }, { "epoch": 0.4382931981411069, "grad_norm": 0.8556606769561768, "learning_rate": 1.9797685757173737e-05, "loss": 0.1521892547607422, "step": 3242 }, { "epoch": 0.4384283903675539, "grad_norm": 1.9313888549804688, "learning_rate": 1.979120036980301e-05, "loss": 0.1937112808227539, "step": 3243 }, { "epoch": 0.43856358259400086, "grad_norm": 0.9642466902732849, "learning_rate": 1.9784713984878814e-05, "loss": 0.17397403717041016, "step": 3244 }, { "epoch": 0.43869877482044783, "grad_norm": 1.062937617301941, "learning_rate": 1.9778226603751652e-05, "loss": 0.18570709228515625, "step": 3245 }, { "epoch": 0.4388339670468948, "grad_norm": 2.1490941047668457, "learning_rate": 1.9771738227772235e-05, "loss": 0.15308094024658203, "step": 3246 }, { "epoch": 0.4389691592733418, "grad_norm": 0.8808411955833435, "learning_rate": 1.976524885829147e-05, "loss": 0.16611814498901367, "step": 3247 }, { "epoch": 0.43910435149978877, "grad_norm": 1.0755809545516968, "learning_rate": 1.975875849666048e-05, "loss": 0.13784503936767578, "step": 3248 }, { "epoch": 0.43923954372623575, "grad_norm": 2.4913408756256104, "learning_rate": 1.9752267144230595e-05, "loss": 0.27016448974609375, "step": 3249 }, { "epoch": 0.4393747359526827, "grad_norm": 1.0091131925582886, "learning_rate": 1.9745774802353347e-05, "loss": 0.17917919158935547, "step": 3250 }, { "epoch": 0.4395099281791297, "grad_norm": 1.1258560419082642, "learning_rate": 1.973928147238048e-05, "loss": 0.1880321502685547, "step": 3251 }, { "epoch": 0.4396451204055767, "grad_norm": 2.2914650440216064, "learning_rate": 1.973278715566394e-05, "loss": 0.19746017456054688, "step": 3252 }, { "epoch": 0.43978031263202366, "grad_norm": 1.6040184497833252, "learning_rate": 1.972629185355587e-05, "loss": 0.13663291931152344, "step": 3253 }, { "epoch": 0.43991550485847064, "grad_norm": 1.7001338005065918, "learning_rate": 1.971979556740864e-05, "loss": 0.19942855834960938, "step": 3254 }, { "epoch": 0.4400506970849176, "grad_norm": 0.8530957102775574, "learning_rate": 1.971329829857479e-05, "loss": 0.1932353973388672, "step": 3255 }, { "epoch": 0.4401858893113646, "grad_norm": 1.4182236194610596, "learning_rate": 1.9706800048407112e-05, "loss": 0.24430084228515625, "step": 3256 }, { "epoch": 0.4403210815378116, "grad_norm": 1.5841634273529053, "learning_rate": 1.9700300818258566e-05, "loss": 0.15078258514404297, "step": 3257 }, { "epoch": 0.44045627376425855, "grad_norm": 1.5203839540481567, "learning_rate": 1.9693800609482318e-05, "loss": 0.1559772491455078, "step": 3258 }, { "epoch": 0.44059146599070553, "grad_norm": 1.0354779958724976, "learning_rate": 1.9687299423431754e-05, "loss": 0.19831275939941406, "step": 3259 }, { "epoch": 0.4407266582171525, "grad_norm": 0.8177117705345154, "learning_rate": 1.968079726146045e-05, "loss": 0.13483619689941406, "step": 3260 }, { "epoch": 0.4408618504435995, "grad_norm": 0.9529993534088135, "learning_rate": 1.9674294124922204e-05, "loss": 0.16558146476745605, "step": 3261 }, { "epoch": 0.44099704267004647, "grad_norm": 0.6532862782478333, "learning_rate": 1.966779001517099e-05, "loss": 0.14177227020263672, "step": 3262 }, { "epoch": 0.44113223489649345, "grad_norm": 1.7624483108520508, "learning_rate": 1.9661284933561006e-05, "loss": 0.2037334442138672, "step": 3263 }, { "epoch": 0.4412674271229404, "grad_norm": 1.1440997123718262, "learning_rate": 1.9654778881446636e-05, "loss": 0.1915740966796875, "step": 3264 }, { "epoch": 0.4414026193493874, "grad_norm": 2.2073144912719727, "learning_rate": 1.9648271860182487e-05, "loss": 0.1768202781677246, "step": 3265 }, { "epoch": 0.4415378115758344, "grad_norm": 2.678293466567993, "learning_rate": 1.9641763871123345e-05, "loss": 0.17505693435668945, "step": 3266 }, { "epoch": 0.44167300380228136, "grad_norm": 0.8177661299705505, "learning_rate": 1.963525491562421e-05, "loss": 0.2051105499267578, "step": 3267 }, { "epoch": 0.44180819602872834, "grad_norm": 1.999330759048462, "learning_rate": 1.9628744995040287e-05, "loss": 0.3141021728515625, "step": 3268 }, { "epoch": 0.4419433882551753, "grad_norm": 0.7680009603500366, "learning_rate": 1.9622234110726976e-05, "loss": 0.15154409408569336, "step": 3269 }, { "epoch": 0.4420785804816223, "grad_norm": 1.16696035861969, "learning_rate": 1.9615722264039868e-05, "loss": 0.1818697452545166, "step": 3270 }, { "epoch": 0.4422137727080693, "grad_norm": 2.3816661834716797, "learning_rate": 1.9609209456334772e-05, "loss": 0.14307022094726562, "step": 3271 }, { "epoch": 0.44234896493451625, "grad_norm": 2.4581425189971924, "learning_rate": 1.960269568896769e-05, "loss": 0.1819601058959961, "step": 3272 }, { "epoch": 0.44248415716096323, "grad_norm": 2.0372302532196045, "learning_rate": 1.9596180963294822e-05, "loss": 0.19658660888671875, "step": 3273 }, { "epoch": 0.4426193493874102, "grad_norm": 1.296618103981018, "learning_rate": 1.9589665280672564e-05, "loss": 0.17101097106933594, "step": 3274 }, { "epoch": 0.4427545416138572, "grad_norm": 0.9417622685432434, "learning_rate": 1.958314864245752e-05, "loss": 0.18425512313842773, "step": 3275 }, { "epoch": 0.44288973384030417, "grad_norm": 1.3346128463745117, "learning_rate": 1.957663105000649e-05, "loss": 0.21772289276123047, "step": 3276 }, { "epoch": 0.44302492606675115, "grad_norm": 2.2654311656951904, "learning_rate": 1.957011250467647e-05, "loss": 0.1448230743408203, "step": 3277 }, { "epoch": 0.4431601182931981, "grad_norm": 1.141088843345642, "learning_rate": 1.9563593007824658e-05, "loss": 0.1553792953491211, "step": 3278 }, { "epoch": 0.4432953105196451, "grad_norm": 1.3459566831588745, "learning_rate": 1.9557072560808442e-05, "loss": 0.24167442321777344, "step": 3279 }, { "epoch": 0.4434305027460921, "grad_norm": 3.7929370403289795, "learning_rate": 1.9550551164985418e-05, "loss": 0.191436767578125, "step": 3280 }, { "epoch": 0.44356569497253906, "grad_norm": 1.4540438652038574, "learning_rate": 1.9544028821713372e-05, "loss": 0.19977283477783203, "step": 3281 }, { "epoch": 0.44370088719898604, "grad_norm": 1.1990866661071777, "learning_rate": 1.9537505532350298e-05, "loss": 0.21065521240234375, "step": 3282 }, { "epoch": 0.443836079425433, "grad_norm": 2.6398627758026123, "learning_rate": 1.9530981298254376e-05, "loss": 0.20041465759277344, "step": 3283 }, { "epoch": 0.44397127165188, "grad_norm": 1.9614109992980957, "learning_rate": 1.9524456120783983e-05, "loss": 0.20159626007080078, "step": 3284 }, { "epoch": 0.444106463878327, "grad_norm": 0.8138933181762695, "learning_rate": 1.95179300012977e-05, "loss": 0.14339160919189453, "step": 3285 }, { "epoch": 0.44424165610477395, "grad_norm": 0.836086094379425, "learning_rate": 1.9511402941154296e-05, "loss": 0.1988210678100586, "step": 3286 }, { "epoch": 0.44437684833122093, "grad_norm": 3.8873000144958496, "learning_rate": 1.950487494171274e-05, "loss": 0.23756694793701172, "step": 3287 }, { "epoch": 0.4445120405576679, "grad_norm": 2.2183263301849365, "learning_rate": 1.9498346004332203e-05, "loss": 0.14792919158935547, "step": 3288 }, { "epoch": 0.4446472327841149, "grad_norm": 2.675966739654541, "learning_rate": 1.949181613037204e-05, "loss": 0.2288188934326172, "step": 3289 }, { "epoch": 0.44478242501056187, "grad_norm": 0.961983323097229, "learning_rate": 1.9485285321191804e-05, "loss": 0.14125490188598633, "step": 3290 }, { "epoch": 0.44491761723700884, "grad_norm": 2.292262315750122, "learning_rate": 1.9478753578151244e-05, "loss": 0.21262693405151367, "step": 3291 }, { "epoch": 0.4450528094634559, "grad_norm": 1.2248820066452026, "learning_rate": 1.9472220902610304e-05, "loss": 0.2552909851074219, "step": 3292 }, { "epoch": 0.44518800168990286, "grad_norm": 0.7249830365180969, "learning_rate": 1.9465687295929127e-05, "loss": 0.126265287399292, "step": 3293 }, { "epoch": 0.44532319391634984, "grad_norm": 0.6301270723342896, "learning_rate": 1.945915275946804e-05, "loss": 0.13928532600402832, "step": 3294 }, { "epoch": 0.4454583861427968, "grad_norm": 1.0418184995651245, "learning_rate": 1.9452617294587573e-05, "loss": 0.1963043212890625, "step": 3295 }, { "epoch": 0.4455935783692438, "grad_norm": 1.510541558265686, "learning_rate": 1.9446080902648435e-05, "loss": 0.14599323272705078, "step": 3296 }, { "epoch": 0.44572877059569077, "grad_norm": 0.8976702690124512, "learning_rate": 1.943954358501154e-05, "loss": 0.19105148315429688, "step": 3297 }, { "epoch": 0.44586396282213775, "grad_norm": 1.173773169517517, "learning_rate": 1.943300534303801e-05, "loss": 0.12724590301513672, "step": 3298 }, { "epoch": 0.44599915504858473, "grad_norm": 0.8885128498077393, "learning_rate": 1.9426466178089116e-05, "loss": 0.16897010803222656, "step": 3299 }, { "epoch": 0.4461343472750317, "grad_norm": 1.7829830646514893, "learning_rate": 1.9419926091526367e-05, "loss": 0.23958587646484375, "step": 3300 }, { "epoch": 0.4462695395014787, "grad_norm": 1.337445855140686, "learning_rate": 1.9413385084711425e-05, "loss": 0.2074289321899414, "step": 3301 }, { "epoch": 0.44640473172792566, "grad_norm": 1.976184368133545, "learning_rate": 1.9406843159006183e-05, "loss": 0.21131229400634766, "step": 3302 }, { "epoch": 0.44653992395437264, "grad_norm": 1.662597417831421, "learning_rate": 1.940030031577269e-05, "loss": 0.1674041748046875, "step": 3303 }, { "epoch": 0.4466751161808196, "grad_norm": 1.4795536994934082, "learning_rate": 1.9393756556373212e-05, "loss": 0.21033668518066406, "step": 3304 }, { "epoch": 0.4468103084072666, "grad_norm": 0.9193311333656311, "learning_rate": 1.9387211882170184e-05, "loss": 0.19032049179077148, "step": 3305 }, { "epoch": 0.4469455006337136, "grad_norm": 1.493209719657898, "learning_rate": 1.9380666294526243e-05, "loss": 0.19511127471923828, "step": 3306 }, { "epoch": 0.44708069286016056, "grad_norm": 2.4407761096954346, "learning_rate": 1.9374119794804228e-05, "loss": 0.1898479461669922, "step": 3307 }, { "epoch": 0.44721588508660753, "grad_norm": 1.7308591604232788, "learning_rate": 1.936757238436714e-05, "loss": 0.17169570922851562, "step": 3308 }, { "epoch": 0.4473510773130545, "grad_norm": 1.2203428745269775, "learning_rate": 1.936102406457819e-05, "loss": 0.1848452091217041, "step": 3309 }, { "epoch": 0.4474862695395015, "grad_norm": 0.9986810684204102, "learning_rate": 1.935447483680078e-05, "loss": 0.2286396026611328, "step": 3310 }, { "epoch": 0.44762146176594847, "grad_norm": 0.8714733719825745, "learning_rate": 1.9347924702398484e-05, "loss": 0.156044602394104, "step": 3311 }, { "epoch": 0.44775665399239545, "grad_norm": 2.1321327686309814, "learning_rate": 1.9341373662735075e-05, "loss": 0.21015453338623047, "step": 3312 }, { "epoch": 0.4478918462188424, "grad_norm": 1.5197192430496216, "learning_rate": 1.9334821719174524e-05, "loss": 0.1696155071258545, "step": 3313 }, { "epoch": 0.4480270384452894, "grad_norm": 1.585639238357544, "learning_rate": 1.9328268873080974e-05, "loss": 0.23131179809570312, "step": 3314 }, { "epoch": 0.4481622306717364, "grad_norm": 1.7585625648498535, "learning_rate": 1.9321715125818765e-05, "loss": 0.2266082763671875, "step": 3315 }, { "epoch": 0.44829742289818336, "grad_norm": 1.6605536937713623, "learning_rate": 1.931516047875242e-05, "loss": 0.14570701122283936, "step": 3316 }, { "epoch": 0.44843261512463034, "grad_norm": 2.61189866065979, "learning_rate": 1.930860493324665e-05, "loss": 0.17184185981750488, "step": 3317 }, { "epoch": 0.4485678073510773, "grad_norm": 1.735080599784851, "learning_rate": 1.9302048490666356e-05, "loss": 0.1675701141357422, "step": 3318 }, { "epoch": 0.4487029995775243, "grad_norm": 1.6295245885849, "learning_rate": 1.9295491152376623e-05, "loss": 0.1856555938720703, "step": 3319 }, { "epoch": 0.4488381918039713, "grad_norm": 1.884494423866272, "learning_rate": 1.928893291974273e-05, "loss": 0.16524791717529297, "step": 3320 }, { "epoch": 0.44897338403041825, "grad_norm": 1.0411334037780762, "learning_rate": 1.9282373794130132e-05, "loss": 0.1746060848236084, "step": 3321 }, { "epoch": 0.44910857625686523, "grad_norm": 0.9504089951515198, "learning_rate": 1.9275813776904472e-05, "loss": 0.14795160293579102, "step": 3322 }, { "epoch": 0.4492437684833122, "grad_norm": 0.7142082452774048, "learning_rate": 1.9269252869431582e-05, "loss": 0.18247127532958984, "step": 3323 }, { "epoch": 0.4493789607097592, "grad_norm": 1.5052988529205322, "learning_rate": 1.9262691073077476e-05, "loss": 0.18030238151550293, "step": 3324 }, { "epoch": 0.44951415293620617, "grad_norm": 0.9043465256690979, "learning_rate": 1.9256128389208363e-05, "loss": 0.15996450185775757, "step": 3325 }, { "epoch": 0.44964934516265315, "grad_norm": 2.2750658988952637, "learning_rate": 1.924956481919062e-05, "loss": 0.1936359405517578, "step": 3326 }, { "epoch": 0.4497845373891001, "grad_norm": 1.260048270225525, "learning_rate": 1.9243000364390825e-05, "loss": 0.23260053992271423, "step": 3327 }, { "epoch": 0.4499197296155471, "grad_norm": 1.9068249464035034, "learning_rate": 1.9236435026175717e-05, "loss": 0.1737051010131836, "step": 3328 }, { "epoch": 0.4500549218419941, "grad_norm": 2.0965352058410645, "learning_rate": 1.9229868805912248e-05, "loss": 0.2030625343322754, "step": 3329 }, { "epoch": 0.45019011406844106, "grad_norm": 0.904119074344635, "learning_rate": 1.9223301704967543e-05, "loss": 0.16301298141479492, "step": 3330 }, { "epoch": 0.45032530629488804, "grad_norm": 1.8079923391342163, "learning_rate": 1.92167337247089e-05, "loss": 0.19826650619506836, "step": 3331 }, { "epoch": 0.450460498521335, "grad_norm": 0.7676612138748169, "learning_rate": 1.9210164866503808e-05, "loss": 0.1650075912475586, "step": 3332 }, { "epoch": 0.450595690747782, "grad_norm": 0.7464839816093445, "learning_rate": 1.9203595131719935e-05, "loss": 0.14877605438232422, "step": 3333 }, { "epoch": 0.450730882974229, "grad_norm": 0.860927402973175, "learning_rate": 1.9197024521725148e-05, "loss": 0.24932289123535156, "step": 3334 }, { "epoch": 0.45086607520067595, "grad_norm": 1.0704501867294312, "learning_rate": 1.9190453037887465e-05, "loss": 0.22809600830078125, "step": 3335 }, { "epoch": 0.45100126742712293, "grad_norm": 1.003853440284729, "learning_rate": 1.918388068157512e-05, "loss": 0.15314126014709473, "step": 3336 }, { "epoch": 0.4511364596535699, "grad_norm": 1.2322697639465332, "learning_rate": 1.9177307454156507e-05, "loss": 0.17522716522216797, "step": 3337 }, { "epoch": 0.4512716518800169, "grad_norm": 1.9328501224517822, "learning_rate": 1.9170733357000202e-05, "loss": 0.20365238189697266, "step": 3338 }, { "epoch": 0.45140684410646387, "grad_norm": 0.9317488074302673, "learning_rate": 1.916415839147497e-05, "loss": 0.18757057189941406, "step": 3339 }, { "epoch": 0.45154203633291085, "grad_norm": 1.2405551671981812, "learning_rate": 1.9157582558949756e-05, "loss": 0.12496471405029297, "step": 3340 }, { "epoch": 0.4516772285593578, "grad_norm": 1.2460386753082275, "learning_rate": 1.9151005860793682e-05, "loss": 0.194488525390625, "step": 3341 }, { "epoch": 0.4518124207858048, "grad_norm": 1.045401930809021, "learning_rate": 1.9144428298376056e-05, "loss": 0.19829177856445312, "step": 3342 }, { "epoch": 0.4519476130122518, "grad_norm": 2.408951759338379, "learning_rate": 1.9137849873066355e-05, "loss": 0.20370006561279297, "step": 3343 }, { "epoch": 0.45208280523869876, "grad_norm": 1.8167108297348022, "learning_rate": 1.9131270586234243e-05, "loss": 0.18300151824951172, "step": 3344 }, { "epoch": 0.45221799746514574, "grad_norm": 0.9507236480712891, "learning_rate": 1.9124690439249564e-05, "loss": 0.13016855716705322, "step": 3345 }, { "epoch": 0.4523531896915927, "grad_norm": 1.3461902141571045, "learning_rate": 1.9118109433482342e-05, "loss": 0.16587352752685547, "step": 3346 }, { "epoch": 0.4524883819180397, "grad_norm": 0.8641530275344849, "learning_rate": 1.911152757030278e-05, "loss": 0.1675124168395996, "step": 3347 }, { "epoch": 0.4526235741444867, "grad_norm": 0.699028730392456, "learning_rate": 1.9104944851081247e-05, "loss": 0.12414193153381348, "step": 3348 }, { "epoch": 0.45275876637093365, "grad_norm": 3.3927464485168457, "learning_rate": 1.9098361277188303e-05, "loss": 0.22945332527160645, "step": 3349 }, { "epoch": 0.45289395859738063, "grad_norm": 2.2899057865142822, "learning_rate": 1.909177684999469e-05, "loss": 0.17086172103881836, "step": 3350 }, { "epoch": 0.4530291508238276, "grad_norm": 1.3996446132659912, "learning_rate": 1.9085191570871316e-05, "loss": 0.1568918228149414, "step": 3351 }, { "epoch": 0.4531643430502746, "grad_norm": 0.8151464462280273, "learning_rate": 1.9078605441189275e-05, "loss": 0.14997220039367676, "step": 3352 }, { "epoch": 0.45329953527672157, "grad_norm": 0.8316568732261658, "learning_rate": 1.9072018462319828e-05, "loss": 0.14484214782714844, "step": 3353 }, { "epoch": 0.45343472750316854, "grad_norm": 1.5172685384750366, "learning_rate": 1.9065430635634422e-05, "loss": 0.17213410139083862, "step": 3354 }, { "epoch": 0.4535699197296155, "grad_norm": 1.184983730316162, "learning_rate": 1.9058841962504677e-05, "loss": 0.1961350440979004, "step": 3355 }, { "epoch": 0.4537051119560625, "grad_norm": 1.0499653816223145, "learning_rate": 1.9052252444302394e-05, "loss": 0.15592074394226074, "step": 3356 }, { "epoch": 0.4538403041825095, "grad_norm": 1.0566301345825195, "learning_rate": 1.904566208239954e-05, "loss": 0.14187121391296387, "step": 3357 }, { "epoch": 0.45397549640895646, "grad_norm": 1.418910026550293, "learning_rate": 1.903907087816827e-05, "loss": 0.1613149642944336, "step": 3358 }, { "epoch": 0.45411068863540344, "grad_norm": 1.500810980796814, "learning_rate": 1.9032478832980902e-05, "loss": 0.24725914001464844, "step": 3359 }, { "epoch": 0.45424588086185047, "grad_norm": 2.5046122074127197, "learning_rate": 1.9025885948209938e-05, "loss": 0.1636638641357422, "step": 3360 }, { "epoch": 0.45438107308829745, "grad_norm": 1.6339011192321777, "learning_rate": 1.901929222522805e-05, "loss": 0.21809005737304688, "step": 3361 }, { "epoch": 0.4545162653147444, "grad_norm": 1.0242033004760742, "learning_rate": 1.901269766540809e-05, "loss": 0.2426128387451172, "step": 3362 }, { "epoch": 0.4546514575411914, "grad_norm": 0.7857242226600647, "learning_rate": 1.9006102270123076e-05, "loss": 0.11364269256591797, "step": 3363 }, { "epoch": 0.4547866497676384, "grad_norm": 1.756072998046875, "learning_rate": 1.8999506040746208e-05, "loss": 0.1663675308227539, "step": 3364 }, { "epoch": 0.45492184199408536, "grad_norm": 0.8138302564620972, "learning_rate": 1.899290897865085e-05, "loss": 0.17125940322875977, "step": 3365 }, { "epoch": 0.45505703422053234, "grad_norm": 1.630271315574646, "learning_rate": 1.898631108521055e-05, "loss": 0.1776294708251953, "step": 3366 }, { "epoch": 0.4551922264469793, "grad_norm": 0.5550186038017273, "learning_rate": 1.8979712361799027e-05, "loss": 0.16408538818359375, "step": 3367 }, { "epoch": 0.4553274186734263, "grad_norm": 2.5210983753204346, "learning_rate": 1.8973112809790168e-05, "loss": 0.2393512725830078, "step": 3368 }, { "epoch": 0.4554626108998733, "grad_norm": 0.9126302599906921, "learning_rate": 1.8966512430558036e-05, "loss": 0.20035552978515625, "step": 3369 }, { "epoch": 0.45559780312632026, "grad_norm": 0.8338290452957153, "learning_rate": 1.8959911225476858e-05, "loss": 0.1625652313232422, "step": 3370 }, { "epoch": 0.45573299535276723, "grad_norm": 0.9934579133987427, "learning_rate": 1.895330919592105e-05, "loss": 0.1514444351196289, "step": 3371 }, { "epoch": 0.4558681875792142, "grad_norm": 0.7505517601966858, "learning_rate": 1.8946706343265187e-05, "loss": 0.1527557373046875, "step": 3372 }, { "epoch": 0.4560033798056612, "grad_norm": 1.4540859460830688, "learning_rate": 1.8940102668884016e-05, "loss": 0.20904541015625, "step": 3373 }, { "epoch": 0.45613857203210817, "grad_norm": 1.0088428258895874, "learning_rate": 1.893349817415246e-05, "loss": 0.17722797393798828, "step": 3374 }, { "epoch": 0.45627376425855515, "grad_norm": 0.9405586123466492, "learning_rate": 1.8926892860445607e-05, "loss": 0.19090843200683594, "step": 3375 }, { "epoch": 0.4564089564850021, "grad_norm": 0.8506881594657898, "learning_rate": 1.8920286729138718e-05, "loss": 0.15937280654907227, "step": 3376 }, { "epoch": 0.4565441487114491, "grad_norm": 1.9556361436843872, "learning_rate": 1.8913679781607225e-05, "loss": 0.22687864303588867, "step": 3377 }, { "epoch": 0.4566793409378961, "grad_norm": 2.596022605895996, "learning_rate": 1.8907072019226734e-05, "loss": 0.1916961669921875, "step": 3378 }, { "epoch": 0.45681453316434306, "grad_norm": 1.1734687089920044, "learning_rate": 1.8900463443373015e-05, "loss": 0.20528650283813477, "step": 3379 }, { "epoch": 0.45694972539079004, "grad_norm": 0.8709990382194519, "learning_rate": 1.889385405542201e-05, "loss": 0.15732574462890625, "step": 3380 }, { "epoch": 0.457084917617237, "grad_norm": 1.492586374282837, "learning_rate": 1.8887243856749816e-05, "loss": 0.2473926544189453, "step": 3381 }, { "epoch": 0.457220109843684, "grad_norm": 1.4273171424865723, "learning_rate": 1.8880632848732723e-05, "loss": 0.2291412353515625, "step": 3382 }, { "epoch": 0.457355302070131, "grad_norm": 0.8740034103393555, "learning_rate": 1.8874021032747185e-05, "loss": 0.12774276733398438, "step": 3383 }, { "epoch": 0.45749049429657795, "grad_norm": 1.173427939414978, "learning_rate": 1.8867408410169803e-05, "loss": 0.13009929656982422, "step": 3384 }, { "epoch": 0.45762568652302493, "grad_norm": 0.7898250222206116, "learning_rate": 1.886079498237737e-05, "loss": 0.1951894760131836, "step": 3385 }, { "epoch": 0.4577608787494719, "grad_norm": 0.9126966595649719, "learning_rate": 1.885418075074683e-05, "loss": 0.18280935287475586, "step": 3386 }, { "epoch": 0.4578960709759189, "grad_norm": 1.1672455072402954, "learning_rate": 1.884756571665531e-05, "loss": 0.20835494995117188, "step": 3387 }, { "epoch": 0.45803126320236587, "grad_norm": 2.5968024730682373, "learning_rate": 1.8840949881480085e-05, "loss": 0.21082544326782227, "step": 3388 }, { "epoch": 0.45816645542881285, "grad_norm": 1.2569278478622437, "learning_rate": 1.8834333246598613e-05, "loss": 0.16714727878570557, "step": 3389 }, { "epoch": 0.4583016476552598, "grad_norm": 1.5037062168121338, "learning_rate": 1.8827715813388514e-05, "loss": 0.14668774604797363, "step": 3390 }, { "epoch": 0.4584368398817068, "grad_norm": 1.1261630058288574, "learning_rate": 1.8821097583227572e-05, "loss": 0.18340826034545898, "step": 3391 }, { "epoch": 0.4585720321081538, "grad_norm": 0.8690721988677979, "learning_rate": 1.8814478557493732e-05, "loss": 0.15972423553466797, "step": 3392 }, { "epoch": 0.45870722433460076, "grad_norm": 1.0738197565078735, "learning_rate": 1.8807858737565118e-05, "loss": 0.2241535186767578, "step": 3393 }, { "epoch": 0.45884241656104774, "grad_norm": 1.2549222707748413, "learning_rate": 1.880123812482001e-05, "loss": 0.1848316192626953, "step": 3394 }, { "epoch": 0.4589776087874947, "grad_norm": 1.4770159721374512, "learning_rate": 1.8794616720636853e-05, "loss": 0.16968345642089844, "step": 3395 }, { "epoch": 0.4591128010139417, "grad_norm": 0.9515596628189087, "learning_rate": 1.8787994526394257e-05, "loss": 0.22729110717773438, "step": 3396 }, { "epoch": 0.4592479932403887, "grad_norm": 0.9397410750389099, "learning_rate": 1.8781371543471e-05, "loss": 0.18487882614135742, "step": 3397 }, { "epoch": 0.45938318546683565, "grad_norm": 1.3566805124282837, "learning_rate": 1.8774747773246024e-05, "loss": 0.11250877380371094, "step": 3398 }, { "epoch": 0.45951837769328263, "grad_norm": 0.9017617702484131, "learning_rate": 1.8768123217098438e-05, "loss": 0.12335491180419922, "step": 3399 }, { "epoch": 0.4596535699197296, "grad_norm": 1.3379122018814087, "learning_rate": 1.8761497876407496e-05, "loss": 0.18169236183166504, "step": 3400 }, { "epoch": 0.4597887621461766, "grad_norm": 0.795560896396637, "learning_rate": 1.8754871752552646e-05, "loss": 0.16091537475585938, "step": 3401 }, { "epoch": 0.45992395437262357, "grad_norm": 0.7444160580635071, "learning_rate": 1.8748244846913463e-05, "loss": 0.09851551055908203, "step": 3402 }, { "epoch": 0.46005914659907055, "grad_norm": 1.6525068283081055, "learning_rate": 1.874161716086972e-05, "loss": 0.18190288543701172, "step": 3403 }, { "epoch": 0.4601943388255175, "grad_norm": 1.3186357021331787, "learning_rate": 1.8734988695801333e-05, "loss": 0.2541618347167969, "step": 3404 }, { "epoch": 0.4603295310519645, "grad_norm": 0.966378390789032, "learning_rate": 1.8728359453088382e-05, "loss": 0.18045425415039062, "step": 3405 }, { "epoch": 0.4604647232784115, "grad_norm": 0.6301789879798889, "learning_rate": 1.8721729434111108e-05, "loss": 0.11144638061523438, "step": 3406 }, { "epoch": 0.46059991550485846, "grad_norm": 0.8427004814147949, "learning_rate": 1.871509864024992e-05, "loss": 0.2093334197998047, "step": 3407 }, { "epoch": 0.46073510773130544, "grad_norm": 1.0891212224960327, "learning_rate": 1.8708467072885385e-05, "loss": 0.1659259796142578, "step": 3408 }, { "epoch": 0.4608702999577524, "grad_norm": 1.1697626113891602, "learning_rate": 1.8701834733398227e-05, "loss": 0.21251678466796875, "step": 3409 }, { "epoch": 0.4610054921841994, "grad_norm": 0.9912316203117371, "learning_rate": 1.8695201623169335e-05, "loss": 0.15319347381591797, "step": 3410 }, { "epoch": 0.4611406844106464, "grad_norm": 1.261716365814209, "learning_rate": 1.868856774357977e-05, "loss": 0.20406246185302734, "step": 3411 }, { "epoch": 0.46127587663709335, "grad_norm": 1.725563645362854, "learning_rate": 1.868193309601072e-05, "loss": 0.20885467529296875, "step": 3412 }, { "epoch": 0.46141106886354033, "grad_norm": 1.0795154571533203, "learning_rate": 1.867529768184357e-05, "loss": 0.13947153091430664, "step": 3413 }, { "epoch": 0.4615462610899873, "grad_norm": 1.2873276472091675, "learning_rate": 1.8668661502459842e-05, "loss": 0.16528213024139404, "step": 3414 }, { "epoch": 0.4616814533164343, "grad_norm": 1.6167263984680176, "learning_rate": 1.866202455924123e-05, "loss": 0.17933082580566406, "step": 3415 }, { "epoch": 0.46181664554288127, "grad_norm": 1.0261482000350952, "learning_rate": 1.865538685356957e-05, "loss": 0.20423603057861328, "step": 3416 }, { "epoch": 0.46195183776932824, "grad_norm": 1.0881630182266235, "learning_rate": 1.864874838682688e-05, "loss": 0.1706758737564087, "step": 3417 }, { "epoch": 0.4620870299957752, "grad_norm": 0.9593113660812378, "learning_rate": 1.864210916039531e-05, "loss": 0.1978282928466797, "step": 3418 }, { "epoch": 0.4622222222222222, "grad_norm": 1.883331298828125, "learning_rate": 1.86354691756572e-05, "loss": 0.16776585578918457, "step": 3419 }, { "epoch": 0.4623574144486692, "grad_norm": 1.0900118350982666, "learning_rate": 1.8628828433995013e-05, "loss": 0.13504266738891602, "step": 3420 }, { "epoch": 0.46249260667511616, "grad_norm": 1.8107829093933105, "learning_rate": 1.86221869367914e-05, "loss": 0.2114863395690918, "step": 3421 }, { "epoch": 0.46262779890156314, "grad_norm": 0.8562608957290649, "learning_rate": 1.8615544685429153e-05, "loss": 0.1545248031616211, "step": 3422 }, { "epoch": 0.4627629911280101, "grad_norm": 1.1713788509368896, "learning_rate": 1.860890168129122e-05, "loss": 0.1962137222290039, "step": 3423 }, { "epoch": 0.4628981833544571, "grad_norm": 0.8066584467887878, "learning_rate": 1.8602257925760708e-05, "loss": 0.14803075790405273, "step": 3424 }, { "epoch": 0.46303337558090407, "grad_norm": 1.6488244533538818, "learning_rate": 1.859561342022089e-05, "loss": 0.15384674072265625, "step": 3425 }, { "epoch": 0.46316856780735105, "grad_norm": 1.595786452293396, "learning_rate": 1.8588968166055185e-05, "loss": 0.15847110748291016, "step": 3426 }, { "epoch": 0.4633037600337981, "grad_norm": 3.0756123065948486, "learning_rate": 1.8582322164647166e-05, "loss": 0.1841282844543457, "step": 3427 }, { "epoch": 0.46343895226024506, "grad_norm": 0.981618344783783, "learning_rate": 1.8575675417380568e-05, "loss": 0.15735626220703125, "step": 3428 }, { "epoch": 0.46357414448669204, "grad_norm": 1.1846190690994263, "learning_rate": 1.856902792563928e-05, "loss": 0.2256298065185547, "step": 3429 }, { "epoch": 0.463709336713139, "grad_norm": 1.0386713743209839, "learning_rate": 1.856237969080735e-05, "loss": 0.21704483032226562, "step": 3430 }, { "epoch": 0.463844528939586, "grad_norm": 0.9064835906028748, "learning_rate": 1.8555730714268967e-05, "loss": 0.1602954864501953, "step": 3431 }, { "epoch": 0.463979721166033, "grad_norm": 1.2755295038223267, "learning_rate": 1.8549080997408492e-05, "loss": 0.18534564971923828, "step": 3432 }, { "epoch": 0.46411491339247996, "grad_norm": 0.9447746276855469, "learning_rate": 1.8542430541610426e-05, "loss": 0.16979122161865234, "step": 3433 }, { "epoch": 0.46425010561892693, "grad_norm": 1.1349126100540161, "learning_rate": 1.8535779348259428e-05, "loss": 0.1866673231124878, "step": 3434 }, { "epoch": 0.4643852978453739, "grad_norm": 0.7974268198013306, "learning_rate": 1.852912741874032e-05, "loss": 0.1420574188232422, "step": 3435 }, { "epoch": 0.4645204900718209, "grad_norm": 1.0906926393508911, "learning_rate": 1.8522474754438056e-05, "loss": 0.20548301935195923, "step": 3436 }, { "epoch": 0.46465568229826787, "grad_norm": 1.2485060691833496, "learning_rate": 1.851582135673777e-05, "loss": 0.1719493865966797, "step": 3437 }, { "epoch": 0.46479087452471485, "grad_norm": 1.1147270202636719, "learning_rate": 1.850916722702473e-05, "loss": 0.23005104064941406, "step": 3438 }, { "epoch": 0.4649260667511618, "grad_norm": 1.42415452003479, "learning_rate": 1.8502512366684355e-05, "loss": 0.17417442798614502, "step": 3439 }, { "epoch": 0.4650612589776088, "grad_norm": 3.0755507946014404, "learning_rate": 1.8495856777102232e-05, "loss": 0.19609451293945312, "step": 3440 }, { "epoch": 0.4651964512040558, "grad_norm": 1.1640913486480713, "learning_rate": 1.848920045966408e-05, "loss": 0.17813396453857422, "step": 3441 }, { "epoch": 0.46533164343050276, "grad_norm": 0.8279566168785095, "learning_rate": 1.8482543415755797e-05, "loss": 0.14305484294891357, "step": 3442 }, { "epoch": 0.46546683565694974, "grad_norm": 2.2585926055908203, "learning_rate": 1.8475885646763394e-05, "loss": 0.22636747360229492, "step": 3443 }, { "epoch": 0.4656020278833967, "grad_norm": 0.796954333782196, "learning_rate": 1.8469227154073064e-05, "loss": 0.1601715087890625, "step": 3444 }, { "epoch": 0.4657372201098437, "grad_norm": 0.6723203659057617, "learning_rate": 1.8462567939071142e-05, "loss": 0.14592552185058594, "step": 3445 }, { "epoch": 0.4658724123362907, "grad_norm": 2.2775490283966064, "learning_rate": 1.845590800314411e-05, "loss": 0.20641326904296875, "step": 3446 }, { "epoch": 0.46600760456273765, "grad_norm": 1.12370765209198, "learning_rate": 1.8449247347678607e-05, "loss": 0.18240737915039062, "step": 3447 }, { "epoch": 0.46614279678918463, "grad_norm": 1.2684212923049927, "learning_rate": 1.8442585974061405e-05, "loss": 0.14149951934814453, "step": 3448 }, { "epoch": 0.4662779890156316, "grad_norm": 1.2761445045471191, "learning_rate": 1.8435923883679452e-05, "loss": 0.1410980224609375, "step": 3449 }, { "epoch": 0.4664131812420786, "grad_norm": 0.988216757774353, "learning_rate": 1.8429261077919818e-05, "loss": 0.2061624526977539, "step": 3450 }, { "epoch": 0.46654837346852557, "grad_norm": 1.1586878299713135, "learning_rate": 1.8422597558169742e-05, "loss": 0.14987945556640625, "step": 3451 }, { "epoch": 0.46668356569497255, "grad_norm": 2.1076486110687256, "learning_rate": 1.84159333258166e-05, "loss": 0.21510791778564453, "step": 3452 }, { "epoch": 0.4668187579214195, "grad_norm": 1.7654314041137695, "learning_rate": 1.8409268382247933e-05, "loss": 0.18716001510620117, "step": 3453 }, { "epoch": 0.4669539501478665, "grad_norm": 0.6629133224487305, "learning_rate": 1.8402602728851405e-05, "loss": 0.15287065505981445, "step": 3454 }, { "epoch": 0.4670891423743135, "grad_norm": 0.46503645181655884, "learning_rate": 1.839593636701484e-05, "loss": 0.09570884704589844, "step": 3455 }, { "epoch": 0.46722433460076046, "grad_norm": 1.818081259727478, "learning_rate": 1.8389269298126214e-05, "loss": 0.16983318328857422, "step": 3456 }, { "epoch": 0.46735952682720744, "grad_norm": 2.8512046337127686, "learning_rate": 1.838260152357365e-05, "loss": 0.2278270721435547, "step": 3457 }, { "epoch": 0.4674947190536544, "grad_norm": 1.2884511947631836, "learning_rate": 1.837593304474541e-05, "loss": 0.19524097442626953, "step": 3458 }, { "epoch": 0.4676299112801014, "grad_norm": 1.0646090507507324, "learning_rate": 1.836926386302991e-05, "loss": 0.15915775299072266, "step": 3459 }, { "epoch": 0.4677651035065484, "grad_norm": 1.881972312927246, "learning_rate": 1.8362593979815696e-05, "loss": 0.16900920867919922, "step": 3460 }, { "epoch": 0.46790029573299535, "grad_norm": 2.5855352878570557, "learning_rate": 1.8355923396491496e-05, "loss": 0.15507221221923828, "step": 3461 }, { "epoch": 0.46803548795944233, "grad_norm": 0.6759036183357239, "learning_rate": 1.8349252114446138e-05, "loss": 0.10190773010253906, "step": 3462 }, { "epoch": 0.4681706801858893, "grad_norm": 3.0451323986053467, "learning_rate": 1.834258013506864e-05, "loss": 0.2116556167602539, "step": 3463 }, { "epoch": 0.4683058724123363, "grad_norm": 0.7880670428276062, "learning_rate": 1.833590745974813e-05, "loss": 0.1902482509613037, "step": 3464 }, { "epoch": 0.46844106463878327, "grad_norm": 1.0126115083694458, "learning_rate": 1.8329234089873892e-05, "loss": 0.16979169845581055, "step": 3465 }, { "epoch": 0.46857625686523025, "grad_norm": 1.4585893154144287, "learning_rate": 1.8322560026835366e-05, "loss": 0.19237709045410156, "step": 3466 }, { "epoch": 0.4687114490916772, "grad_norm": 1.211708426475525, "learning_rate": 1.8315885272022125e-05, "loss": 0.15447306632995605, "step": 3467 }, { "epoch": 0.4688466413181242, "grad_norm": 1.0668014287948608, "learning_rate": 1.830920982682389e-05, "loss": 0.19243741035461426, "step": 3468 }, { "epoch": 0.4689818335445712, "grad_norm": 1.4674735069274902, "learning_rate": 1.830253369263052e-05, "loss": 0.14574575424194336, "step": 3469 }, { "epoch": 0.46911702577101816, "grad_norm": 0.730385422706604, "learning_rate": 1.8295856870832024e-05, "loss": 0.19635009765625, "step": 3470 }, { "epoch": 0.46925221799746514, "grad_norm": 1.3035799264907837, "learning_rate": 1.828917936281855e-05, "loss": 0.14718365669250488, "step": 3471 }, { "epoch": 0.4693874102239121, "grad_norm": 0.5446547865867615, "learning_rate": 1.8282501169980396e-05, "loss": 0.10541534423828125, "step": 3472 }, { "epoch": 0.4695226024503591, "grad_norm": 1.5150097608566284, "learning_rate": 1.8275822293707992e-05, "loss": 0.18842267990112305, "step": 3473 }, { "epoch": 0.4696577946768061, "grad_norm": 0.9034777879714966, "learning_rate": 1.8269142735391917e-05, "loss": 0.15021824836730957, "step": 3474 }, { "epoch": 0.46979298690325305, "grad_norm": 0.8535484671592712, "learning_rate": 1.8262462496422893e-05, "loss": 0.13802433013916016, "step": 3475 }, { "epoch": 0.46992817912970003, "grad_norm": 1.2840960025787354, "learning_rate": 1.8255781578191778e-05, "loss": 0.18625903129577637, "step": 3476 }, { "epoch": 0.470063371356147, "grad_norm": 1.8340035676956177, "learning_rate": 1.824909998208958e-05, "loss": 0.18075108528137207, "step": 3477 }, { "epoch": 0.470198563582594, "grad_norm": 0.8349461555480957, "learning_rate": 1.8242417709507434e-05, "loss": 0.1463489532470703, "step": 3478 }, { "epoch": 0.47033375580904097, "grad_norm": 0.9402061700820923, "learning_rate": 1.8235734761836637e-05, "loss": 0.19009113311767578, "step": 3479 }, { "epoch": 0.47046894803548794, "grad_norm": 1.6222805976867676, "learning_rate": 1.82290511404686e-05, "loss": 0.1735401153564453, "step": 3480 }, { "epoch": 0.4706041402619349, "grad_norm": 1.6301549673080444, "learning_rate": 1.8222366846794904e-05, "loss": 0.24803972244262695, "step": 3481 }, { "epoch": 0.4707393324883819, "grad_norm": 1.3860433101654053, "learning_rate": 1.8215681882207238e-05, "loss": 0.196044921875, "step": 3482 }, { "epoch": 0.4708745247148289, "grad_norm": 1.435080647468567, "learning_rate": 1.8208996248097462e-05, "loss": 0.15694522857666016, "step": 3483 }, { "epoch": 0.47100971694127586, "grad_norm": 0.806428074836731, "learning_rate": 1.8202309945857557e-05, "loss": 0.10889911651611328, "step": 3484 }, { "epoch": 0.47114490916772284, "grad_norm": 1.4828214645385742, "learning_rate": 1.8195622976879638e-05, "loss": 0.12778782844543457, "step": 3485 }, { "epoch": 0.4712801013941698, "grad_norm": 1.0211106538772583, "learning_rate": 1.8188935342555977e-05, "loss": 0.1554047167301178, "step": 3486 }, { "epoch": 0.4714152936206168, "grad_norm": 1.030672311782837, "learning_rate": 1.818224704427897e-05, "loss": 0.14796066284179688, "step": 3487 }, { "epoch": 0.47155048584706377, "grad_norm": 1.0293545722961426, "learning_rate": 1.8175558083441162e-05, "loss": 0.19671058654785156, "step": 3488 }, { "epoch": 0.47168567807351075, "grad_norm": 0.7458539009094238, "learning_rate": 1.8168868461435225e-05, "loss": 0.18127822875976562, "step": 3489 }, { "epoch": 0.47182087029995773, "grad_norm": 0.8370358347892761, "learning_rate": 1.8162178179653977e-05, "loss": 0.167605459690094, "step": 3490 }, { "epoch": 0.4719560625264047, "grad_norm": 0.582918107509613, "learning_rate": 1.815548723949037e-05, "loss": 0.12580633163452148, "step": 3491 }, { "epoch": 0.4720912547528517, "grad_norm": 1.2632253170013428, "learning_rate": 1.814879564233749e-05, "loss": 0.15083742141723633, "step": 3492 }, { "epoch": 0.47222644697929866, "grad_norm": 1.1416912078857422, "learning_rate": 1.8142103389588567e-05, "loss": 0.17843055725097656, "step": 3493 }, { "epoch": 0.47236163920574564, "grad_norm": 2.356801748275757, "learning_rate": 1.813541048263696e-05, "loss": 0.1441950798034668, "step": 3494 }, { "epoch": 0.4724968314321927, "grad_norm": 0.8117491602897644, "learning_rate": 1.8128716922876178e-05, "loss": 0.19518089294433594, "step": 3495 }, { "epoch": 0.47263202365863966, "grad_norm": 0.8480263948440552, "learning_rate": 1.812202271169984e-05, "loss": 0.18100261688232422, "step": 3496 }, { "epoch": 0.47276721588508663, "grad_norm": 1.5033318996429443, "learning_rate": 1.8115327850501726e-05, "loss": 0.20951157808303833, "step": 3497 }, { "epoch": 0.4729024081115336, "grad_norm": 1.579942226409912, "learning_rate": 1.8108632340675746e-05, "loss": 0.16703033447265625, "step": 3498 }, { "epoch": 0.4730376003379806, "grad_norm": 0.76331627368927, "learning_rate": 1.810193618361593e-05, "loss": 0.15021705627441406, "step": 3499 }, { "epoch": 0.47317279256442757, "grad_norm": 0.8473283648490906, "learning_rate": 1.8095239380716464e-05, "loss": 0.20316314697265625, "step": 3500 }, { "epoch": 0.47330798479087455, "grad_norm": 0.9171915054321289, "learning_rate": 1.808854193337165e-05, "loss": 0.1653122901916504, "step": 3501 }, { "epoch": 0.4734431770173215, "grad_norm": 0.8565853834152222, "learning_rate": 1.8081843842975935e-05, "loss": 0.1794414520263672, "step": 3502 }, { "epoch": 0.4735783692437685, "grad_norm": 1.4016047716140747, "learning_rate": 1.80751451109239e-05, "loss": 0.17153263092041016, "step": 3503 }, { "epoch": 0.4737135614702155, "grad_norm": 0.8179826140403748, "learning_rate": 1.806844573861025e-05, "loss": 0.15096187591552734, "step": 3504 }, { "epoch": 0.47384875369666246, "grad_norm": 1.8277356624603271, "learning_rate": 1.806174572742984e-05, "loss": 0.1335737705230713, "step": 3505 }, { "epoch": 0.47398394592310944, "grad_norm": 2.086305856704712, "learning_rate": 1.8055045078777634e-05, "loss": 0.1453789472579956, "step": 3506 }, { "epoch": 0.4741191381495564, "grad_norm": 1.1727648973464966, "learning_rate": 1.8048343794048762e-05, "loss": 0.18294525146484375, "step": 3507 }, { "epoch": 0.4742543303760034, "grad_norm": 0.7482226490974426, "learning_rate": 1.8041641874638445e-05, "loss": 0.14932632446289062, "step": 3508 }, { "epoch": 0.4743895226024504, "grad_norm": 1.0309417247772217, "learning_rate": 1.8034939321942077e-05, "loss": 0.16088390350341797, "step": 3509 }, { "epoch": 0.47452471482889735, "grad_norm": 0.9488224387168884, "learning_rate": 1.8028236137355154e-05, "loss": 0.15990209579467773, "step": 3510 }, { "epoch": 0.47465990705534433, "grad_norm": 1.1417396068572998, "learning_rate": 1.8021532322273327e-05, "loss": 0.1642436981201172, "step": 3511 }, { "epoch": 0.4747950992817913, "grad_norm": 0.6780730485916138, "learning_rate": 1.8014827878092347e-05, "loss": 0.13392972946166992, "step": 3512 }, { "epoch": 0.4749302915082383, "grad_norm": 0.7557806372642517, "learning_rate": 1.800812280620813e-05, "loss": 0.15101051330566406, "step": 3513 }, { "epoch": 0.47506548373468527, "grad_norm": 2.0491278171539307, "learning_rate": 1.80014171080167e-05, "loss": 0.2699241638183594, "step": 3514 }, { "epoch": 0.47520067596113225, "grad_norm": 1.1695258617401123, "learning_rate": 1.7994710784914227e-05, "loss": 0.15940046310424805, "step": 3515 }, { "epoch": 0.4753358681875792, "grad_norm": 1.0257755517959595, "learning_rate": 1.7988003838297e-05, "loss": 0.15984225273132324, "step": 3516 }, { "epoch": 0.4754710604140262, "grad_norm": 1.2689498662948608, "learning_rate": 1.7981296269561438e-05, "loss": 0.15655231475830078, "step": 3517 }, { "epoch": 0.4756062526404732, "grad_norm": 1.4033067226409912, "learning_rate": 1.7974588080104095e-05, "loss": 0.15663385391235352, "step": 3518 }, { "epoch": 0.47574144486692016, "grad_norm": 1.3479342460632324, "learning_rate": 1.7967879271321648e-05, "loss": 0.15413331985473633, "step": 3519 }, { "epoch": 0.47587663709336714, "grad_norm": 1.155164361000061, "learning_rate": 1.7961169844610913e-05, "loss": 0.204315185546875, "step": 3520 }, { "epoch": 0.4760118293198141, "grad_norm": 0.8277118802070618, "learning_rate": 1.795445980136883e-05, "loss": 0.15469026565551758, "step": 3521 }, { "epoch": 0.4761470215462611, "grad_norm": 1.588104009628296, "learning_rate": 1.794774914299245e-05, "loss": 0.14812660217285156, "step": 3522 }, { "epoch": 0.4762822137727081, "grad_norm": 1.9750038385391235, "learning_rate": 1.794103787087899e-05, "loss": 0.19992446899414062, "step": 3523 }, { "epoch": 0.47641740599915505, "grad_norm": 0.6098859310150146, "learning_rate": 1.7934325986425755e-05, "loss": 0.12087976932525635, "step": 3524 }, { "epoch": 0.47655259822560203, "grad_norm": 0.879057765007019, "learning_rate": 1.7927613491030204e-05, "loss": 0.14919328689575195, "step": 3525 }, { "epoch": 0.476687790452049, "grad_norm": 1.123022198677063, "learning_rate": 1.7920900386089913e-05, "loss": 0.2040081024169922, "step": 3526 }, { "epoch": 0.476822982678496, "grad_norm": 1.0244865417480469, "learning_rate": 1.7914186673002588e-05, "loss": 0.21220779418945312, "step": 3527 }, { "epoch": 0.47695817490494297, "grad_norm": 2.2303543090820312, "learning_rate": 1.790747235316605e-05, "loss": 0.15620040893554688, "step": 3528 }, { "epoch": 0.47709336713138994, "grad_norm": 1.6135231256484985, "learning_rate": 1.790075742797827e-05, "loss": 0.23848295211791992, "step": 3529 }, { "epoch": 0.4772285593578369, "grad_norm": 0.9326907992362976, "learning_rate": 1.789404189883732e-05, "loss": 0.1906147003173828, "step": 3530 }, { "epoch": 0.4773637515842839, "grad_norm": 1.431780457496643, "learning_rate": 1.7887325767141415e-05, "loss": 0.24800872802734375, "step": 3531 }, { "epoch": 0.4774989438107309, "grad_norm": 0.8666039109230042, "learning_rate": 1.7880609034288894e-05, "loss": 0.1570730209350586, "step": 3532 }, { "epoch": 0.47763413603717786, "grad_norm": 1.049560308456421, "learning_rate": 1.7873891701678208e-05, "loss": 0.1844654083251953, "step": 3533 }, { "epoch": 0.47776932826362484, "grad_norm": 1.7594057321548462, "learning_rate": 1.786717377070794e-05, "loss": 0.16901493072509766, "step": 3534 }, { "epoch": 0.4779045204900718, "grad_norm": 0.7886475324630737, "learning_rate": 1.7860455242776803e-05, "loss": 0.18465614318847656, "step": 3535 }, { "epoch": 0.4780397127165188, "grad_norm": 1.2098989486694336, "learning_rate": 1.7853736119283635e-05, "loss": 0.19279992580413818, "step": 3536 }, { "epoch": 0.4781749049429658, "grad_norm": 0.8562732338905334, "learning_rate": 1.7847016401627388e-05, "loss": 0.20071792602539062, "step": 3537 }, { "epoch": 0.47831009716941275, "grad_norm": 2.1108808517456055, "learning_rate": 1.7840296091207144e-05, "loss": 0.17426657676696777, "step": 3538 }, { "epoch": 0.47844528939585973, "grad_norm": 0.8799384236335754, "learning_rate": 1.7833575189422107e-05, "loss": 0.17475128173828125, "step": 3539 }, { "epoch": 0.4785804816223067, "grad_norm": 1.1155035495758057, "learning_rate": 1.7826853697671604e-05, "loss": 0.164093017578125, "step": 3540 }, { "epoch": 0.4787156738487537, "grad_norm": 3.17952561378479, "learning_rate": 1.782013161735509e-05, "loss": 0.23760032653808594, "step": 3541 }, { "epoch": 0.47885086607520067, "grad_norm": 1.0523217916488647, "learning_rate": 1.781340894987213e-05, "loss": 0.16167926788330078, "step": 3542 }, { "epoch": 0.47898605830164764, "grad_norm": 1.0254875421524048, "learning_rate": 1.7806685696622426e-05, "loss": 0.1648855209350586, "step": 3543 }, { "epoch": 0.4791212505280946, "grad_norm": 1.4180539846420288, "learning_rate": 1.779996185900579e-05, "loss": 0.17851829528808594, "step": 3544 }, { "epoch": 0.4792564427545416, "grad_norm": 1.5110746622085571, "learning_rate": 1.7793237438422165e-05, "loss": 0.13212299346923828, "step": 3545 }, { "epoch": 0.4793916349809886, "grad_norm": 1.038517713546753, "learning_rate": 1.7786512436271617e-05, "loss": 0.23049545288085938, "step": 3546 }, { "epoch": 0.47952682720743556, "grad_norm": 2.5513124465942383, "learning_rate": 1.777978685395431e-05, "loss": 0.21246719360351562, "step": 3547 }, { "epoch": 0.47966201943388254, "grad_norm": 2.3631436824798584, "learning_rate": 1.7773060692870564e-05, "loss": 0.15679168701171875, "step": 3548 }, { "epoch": 0.4797972116603295, "grad_norm": 1.9723563194274902, "learning_rate": 1.7766333954420794e-05, "loss": 0.2117905616760254, "step": 3549 }, { "epoch": 0.4799324038867765, "grad_norm": 1.1882332563400269, "learning_rate": 1.775960664000554e-05, "loss": 0.2038745880126953, "step": 3550 }, { "epoch": 0.48006759611322347, "grad_norm": 2.519015073776245, "learning_rate": 1.7752878751025463e-05, "loss": 0.2249622344970703, "step": 3551 }, { "epoch": 0.48020278833967045, "grad_norm": 0.7844443917274475, "learning_rate": 1.7746150288881352e-05, "loss": 0.1720128059387207, "step": 3552 }, { "epoch": 0.48033798056611743, "grad_norm": 1.2197054624557495, "learning_rate": 1.7739421254974114e-05, "loss": 0.2151031494140625, "step": 3553 }, { "epoch": 0.4804731727925644, "grad_norm": 1.1257039308547974, "learning_rate": 1.7732691650704756e-05, "loss": 0.16568279266357422, "step": 3554 }, { "epoch": 0.4806083650190114, "grad_norm": 1.1142538785934448, "learning_rate": 1.7725961477474423e-05, "loss": 0.14854001998901367, "step": 3555 }, { "epoch": 0.48074355724545836, "grad_norm": 0.723314642906189, "learning_rate": 1.7719230736684375e-05, "loss": 0.12540531158447266, "step": 3556 }, { "epoch": 0.48087874947190534, "grad_norm": 0.7580430507659912, "learning_rate": 1.771249942973599e-05, "loss": 0.16405725479125977, "step": 3557 }, { "epoch": 0.4810139416983523, "grad_norm": 2.516895294189453, "learning_rate": 1.7705767558030756e-05, "loss": 0.1693258285522461, "step": 3558 }, { "epoch": 0.4811491339247993, "grad_norm": 1.2762389183044434, "learning_rate": 1.769903512297029e-05, "loss": 0.20034027099609375, "step": 3559 }, { "epoch": 0.4812843261512463, "grad_norm": 2.2060084342956543, "learning_rate": 1.7692302125956315e-05, "loss": 0.24058151245117188, "step": 3560 }, { "epoch": 0.48141951837769326, "grad_norm": 1.346879243850708, "learning_rate": 1.768556856839068e-05, "loss": 0.19856834411621094, "step": 3561 }, { "epoch": 0.48155471060414023, "grad_norm": 2.351047992706299, "learning_rate": 1.767883445167535e-05, "loss": 0.2039813995361328, "step": 3562 }, { "epoch": 0.48168990283058727, "grad_norm": 2.3243563175201416, "learning_rate": 1.7672099777212398e-05, "loss": 0.18050861358642578, "step": 3563 }, { "epoch": 0.48182509505703425, "grad_norm": 3.619431972503662, "learning_rate": 1.7665364546404034e-05, "loss": 0.22363626956939697, "step": 3564 }, { "epoch": 0.4819602872834812, "grad_norm": 1.8494421243667603, "learning_rate": 1.7658628760652548e-05, "loss": 0.1747303009033203, "step": 3565 }, { "epoch": 0.4820954795099282, "grad_norm": 2.077983856201172, "learning_rate": 1.765189242136038e-05, "loss": 0.21093487739562988, "step": 3566 }, { "epoch": 0.4822306717363752, "grad_norm": 0.6678910851478577, "learning_rate": 1.7645155529930065e-05, "loss": 0.14583158493041992, "step": 3567 }, { "epoch": 0.48236586396282216, "grad_norm": 1.4192137718200684, "learning_rate": 1.763841808776426e-05, "loss": 0.21659469604492188, "step": 3568 }, { "epoch": 0.48250105618926914, "grad_norm": 1.13424551486969, "learning_rate": 1.763168009626575e-05, "loss": 0.18615365028381348, "step": 3569 }, { "epoch": 0.4826362484157161, "grad_norm": 2.837808847427368, "learning_rate": 1.7624941556837406e-05, "loss": 0.16075420379638672, "step": 3570 }, { "epoch": 0.4827714406421631, "grad_norm": 0.8824772834777832, "learning_rate": 1.7618202470882233e-05, "loss": 0.1805715560913086, "step": 3571 }, { "epoch": 0.4829066328686101, "grad_norm": 1.3237603902816772, "learning_rate": 1.7611462839803336e-05, "loss": 0.14923858642578125, "step": 3572 }, { "epoch": 0.48304182509505705, "grad_norm": 2.017673969268799, "learning_rate": 1.760472266500396e-05, "loss": 0.22539281845092773, "step": 3573 }, { "epoch": 0.48317701732150403, "grad_norm": 1.4548580646514893, "learning_rate": 1.759798194788743e-05, "loss": 0.15132999420166016, "step": 3574 }, { "epoch": 0.483312209547951, "grad_norm": 1.2383757829666138, "learning_rate": 1.75912406898572e-05, "loss": 0.18599319458007812, "step": 3575 }, { "epoch": 0.483447401774398, "grad_norm": 0.8775805234909058, "learning_rate": 1.758449889231685e-05, "loss": 0.19205284118652344, "step": 3576 }, { "epoch": 0.48358259400084497, "grad_norm": 1.3527287244796753, "learning_rate": 1.757775655667004e-05, "loss": 0.26311492919921875, "step": 3577 }, { "epoch": 0.48371778622729195, "grad_norm": 1.782068133354187, "learning_rate": 1.757101368432057e-05, "loss": 0.19730281829833984, "step": 3578 }, { "epoch": 0.4838529784537389, "grad_norm": 2.465442180633545, "learning_rate": 1.7564270276672343e-05, "loss": 0.19033241271972656, "step": 3579 }, { "epoch": 0.4839881706801859, "grad_norm": 1.1994714736938477, "learning_rate": 1.7557526335129372e-05, "loss": 0.17396140098571777, "step": 3580 }, { "epoch": 0.4841233629066329, "grad_norm": 2.444040298461914, "learning_rate": 1.7550781861095774e-05, "loss": 0.20524978637695312, "step": 3581 }, { "epoch": 0.48425855513307986, "grad_norm": 0.9775839447975159, "learning_rate": 1.754403685597579e-05, "loss": 0.12641441822052002, "step": 3582 }, { "epoch": 0.48439374735952684, "grad_norm": 1.0033881664276123, "learning_rate": 1.7537291321173773e-05, "loss": 0.20652008056640625, "step": 3583 }, { "epoch": 0.4845289395859738, "grad_norm": 1.5384173393249512, "learning_rate": 1.7530545258094165e-05, "loss": 0.18323099613189697, "step": 3584 }, { "epoch": 0.4846641318124208, "grad_norm": 1.7562642097473145, "learning_rate": 1.7523798668141548e-05, "loss": 0.19761085510253906, "step": 3585 }, { "epoch": 0.4847993240388678, "grad_norm": 2.245157241821289, "learning_rate": 1.7517051552720584e-05, "loss": 0.24962902069091797, "step": 3586 }, { "epoch": 0.48493451626531475, "grad_norm": 1.066164255142212, "learning_rate": 1.7510303913236066e-05, "loss": 0.18131589889526367, "step": 3587 }, { "epoch": 0.48506970849176173, "grad_norm": 0.8821468949317932, "learning_rate": 1.7503555751092883e-05, "loss": 0.13342809677124023, "step": 3588 }, { "epoch": 0.4852049007182087, "grad_norm": 1.1430426836013794, "learning_rate": 1.7496807067696046e-05, "loss": 0.1805424690246582, "step": 3589 }, { "epoch": 0.4853400929446557, "grad_norm": 0.9823818802833557, "learning_rate": 1.7490057864450665e-05, "loss": 0.20342540740966797, "step": 3590 }, { "epoch": 0.48547528517110267, "grad_norm": 0.8003551959991455, "learning_rate": 1.748330814276195e-05, "loss": 0.1759796142578125, "step": 3591 }, { "epoch": 0.48561047739754964, "grad_norm": 1.0914113521575928, "learning_rate": 1.7476557904035243e-05, "loss": 0.17968034744262695, "step": 3592 }, { "epoch": 0.4857456696239966, "grad_norm": 1.8646550178527832, "learning_rate": 1.7469807149675973e-05, "loss": 0.1932525634765625, "step": 3593 }, { "epoch": 0.4858808618504436, "grad_norm": 1.0126187801361084, "learning_rate": 1.7463055881089685e-05, "loss": 0.22491276264190674, "step": 3594 }, { "epoch": 0.4860160540768906, "grad_norm": 1.840192437171936, "learning_rate": 1.7456304099682024e-05, "loss": 0.1873321533203125, "step": 3595 }, { "epoch": 0.48615124630333756, "grad_norm": 1.2671908140182495, "learning_rate": 1.7449551806858756e-05, "loss": 0.12606430053710938, "step": 3596 }, { "epoch": 0.48628643852978454, "grad_norm": 1.216102123260498, "learning_rate": 1.7442799004025733e-05, "loss": 0.16867345571517944, "step": 3597 }, { "epoch": 0.4864216307562315, "grad_norm": 0.6935027837753296, "learning_rate": 1.7436045692588934e-05, "loss": 0.09225940704345703, "step": 3598 }, { "epoch": 0.4865568229826785, "grad_norm": 1.234215259552002, "learning_rate": 1.742929187395443e-05, "loss": 0.23068809509277344, "step": 3599 }, { "epoch": 0.4866920152091255, "grad_norm": 1.2269054651260376, "learning_rate": 1.7422537549528402e-05, "loss": 0.21541976928710938, "step": 3600 }, { "epoch": 0.48682720743557245, "grad_norm": 0.6758655905723572, "learning_rate": 1.7415782720717147e-05, "loss": 0.12720251083374023, "step": 3601 }, { "epoch": 0.48696239966201943, "grad_norm": 0.6554602980613708, "learning_rate": 1.740902738892704e-05, "loss": 0.15577435493469238, "step": 3602 }, { "epoch": 0.4870975918884664, "grad_norm": 1.1283648014068604, "learning_rate": 1.7402271555564585e-05, "loss": 0.1436305046081543, "step": 3603 }, { "epoch": 0.4872327841149134, "grad_norm": 1.6576296091079712, "learning_rate": 1.739551522203638e-05, "loss": 0.18545818328857422, "step": 3604 }, { "epoch": 0.48736797634136036, "grad_norm": 1.4805735349655151, "learning_rate": 1.738875838974913e-05, "loss": 0.16046762466430664, "step": 3605 }, { "epoch": 0.48750316856780734, "grad_norm": 0.9514274001121521, "learning_rate": 1.7382001060109652e-05, "loss": 0.17522907257080078, "step": 3606 }, { "epoch": 0.4876383607942543, "grad_norm": 1.2427281141281128, "learning_rate": 1.7375243234524843e-05, "loss": 0.20197582244873047, "step": 3607 }, { "epoch": 0.4877735530207013, "grad_norm": 1.6158555746078491, "learning_rate": 1.736848491440173e-05, "loss": 0.1747041940689087, "step": 3608 }, { "epoch": 0.4879087452471483, "grad_norm": 0.7927384376525879, "learning_rate": 1.7361726101147424e-05, "loss": 0.1530170440673828, "step": 3609 }, { "epoch": 0.48804393747359526, "grad_norm": 1.0317902565002441, "learning_rate": 1.7354966796169157e-05, "loss": 0.18799781799316406, "step": 3610 }, { "epoch": 0.48817912970004224, "grad_norm": 1.3067508935928345, "learning_rate": 1.734820700087424e-05, "loss": 0.20744752883911133, "step": 3611 }, { "epoch": 0.4883143219264892, "grad_norm": 1.0842506885528564, "learning_rate": 1.7341446716670103e-05, "loss": 0.20425748825073242, "step": 3612 }, { "epoch": 0.4884495141529362, "grad_norm": 2.8209221363067627, "learning_rate": 1.7334685944964272e-05, "loss": 0.19693565368652344, "step": 3613 }, { "epoch": 0.48858470637938317, "grad_norm": 1.5135897397994995, "learning_rate": 1.732792468716438e-05, "loss": 0.1494002342224121, "step": 3614 }, { "epoch": 0.48871989860583015, "grad_norm": 2.901961326599121, "learning_rate": 1.7321162944678155e-05, "loss": 0.1833209991455078, "step": 3615 }, { "epoch": 0.48885509083227713, "grad_norm": 0.9659711718559265, "learning_rate": 1.7314400718913425e-05, "loss": 0.20191383361816406, "step": 3616 }, { "epoch": 0.4889902830587241, "grad_norm": 0.9311013221740723, "learning_rate": 1.7307638011278126e-05, "loss": 0.18041229248046875, "step": 3617 }, { "epoch": 0.4891254752851711, "grad_norm": 0.6251784563064575, "learning_rate": 1.7300874823180284e-05, "loss": 0.13950061798095703, "step": 3618 }, { "epoch": 0.48926066751161806, "grad_norm": 1.6965829133987427, "learning_rate": 1.7294111156028034e-05, "loss": 0.17876195907592773, "step": 3619 }, { "epoch": 0.48939585973806504, "grad_norm": 0.8463488221168518, "learning_rate": 1.7287347011229605e-05, "loss": 0.20867156982421875, "step": 3620 }, { "epoch": 0.489531051964512, "grad_norm": 0.7450462579727173, "learning_rate": 1.7280582390193333e-05, "loss": 0.1433115005493164, "step": 3621 }, { "epoch": 0.489666244190959, "grad_norm": 2.0672967433929443, "learning_rate": 1.7273817294327653e-05, "loss": 0.20038509368896484, "step": 3622 }, { "epoch": 0.489801436417406, "grad_norm": 1.4547683000564575, "learning_rate": 1.726705172504108e-05, "loss": 0.19504737854003906, "step": 3623 }, { "epoch": 0.48993662864385296, "grad_norm": 1.000737190246582, "learning_rate": 1.7260285683742248e-05, "loss": 0.17860937118530273, "step": 3624 }, { "epoch": 0.49007182087029993, "grad_norm": 0.5829146504402161, "learning_rate": 1.7253519171839883e-05, "loss": 0.12136948108673096, "step": 3625 }, { "epoch": 0.4902070130967469, "grad_norm": 0.9649888873100281, "learning_rate": 1.724675219074281e-05, "loss": 0.1799163818359375, "step": 3626 }, { "epoch": 0.4903422053231939, "grad_norm": 1.3372387886047363, "learning_rate": 1.7239984741859957e-05, "loss": 0.11334705352783203, "step": 3627 }, { "epoch": 0.49047739754964087, "grad_norm": 0.9142506718635559, "learning_rate": 1.7233216826600324e-05, "loss": 0.16736221313476562, "step": 3628 }, { "epoch": 0.49061258977608785, "grad_norm": 1.42210853099823, "learning_rate": 1.7226448446373047e-05, "loss": 0.12766671180725098, "step": 3629 }, { "epoch": 0.4907477820025348, "grad_norm": 1.2180275917053223, "learning_rate": 1.7219679602587326e-05, "loss": 0.1990499496459961, "step": 3630 }, { "epoch": 0.49088297422898186, "grad_norm": 1.6852333545684814, "learning_rate": 1.7212910296652476e-05, "loss": 0.16421890258789062, "step": 3631 }, { "epoch": 0.49101816645542884, "grad_norm": 0.8113219738006592, "learning_rate": 1.7206140529977904e-05, "loss": 0.11036968231201172, "step": 3632 }, { "epoch": 0.4911533586818758, "grad_norm": 0.9482988715171814, "learning_rate": 1.719937030397311e-05, "loss": 0.1797924041748047, "step": 3633 }, { "epoch": 0.4912885509083228, "grad_norm": 0.8661326169967651, "learning_rate": 1.7192599620047683e-05, "loss": 0.11438751220703125, "step": 3634 }, { "epoch": 0.4914237431347698, "grad_norm": 2.282918691635132, "learning_rate": 1.718582847961133e-05, "loss": 0.23289012908935547, "step": 3635 }, { "epoch": 0.49155893536121675, "grad_norm": 1.3346805572509766, "learning_rate": 1.7179056884073826e-05, "loss": 0.19357538223266602, "step": 3636 }, { "epoch": 0.49169412758766373, "grad_norm": 1.8274191617965698, "learning_rate": 1.717228483484506e-05, "loss": 0.1923694610595703, "step": 3637 }, { "epoch": 0.4918293198141107, "grad_norm": 2.077373743057251, "learning_rate": 1.7165512333335013e-05, "loss": 0.17790699005126953, "step": 3638 }, { "epoch": 0.4919645120405577, "grad_norm": 0.8957392573356628, "learning_rate": 1.715873938095374e-05, "loss": 0.1672954559326172, "step": 3639 }, { "epoch": 0.49209970426700467, "grad_norm": 1.4486415386199951, "learning_rate": 1.7151965979111427e-05, "loss": 0.1927928924560547, "step": 3640 }, { "epoch": 0.49223489649345165, "grad_norm": 1.1866776943206787, "learning_rate": 1.7145192129218313e-05, "loss": 0.16849040985107422, "step": 3641 }, { "epoch": 0.4923700887198986, "grad_norm": 1.1296336650848389, "learning_rate": 1.7138417832684763e-05, "loss": 0.17771363258361816, "step": 3642 }, { "epoch": 0.4925052809463456, "grad_norm": 0.9979537725448608, "learning_rate": 1.7131643090921216e-05, "loss": 0.1480550765991211, "step": 3643 }, { "epoch": 0.4926404731727926, "grad_norm": 2.2543773651123047, "learning_rate": 1.712486790533821e-05, "loss": 0.2135171890258789, "step": 3644 }, { "epoch": 0.49277566539923956, "grad_norm": 1.6149193048477173, "learning_rate": 1.7118092277346372e-05, "loss": 0.18088722229003906, "step": 3645 }, { "epoch": 0.49291085762568654, "grad_norm": 1.2460503578186035, "learning_rate": 1.7111316208356428e-05, "loss": 0.2027912139892578, "step": 3646 }, { "epoch": 0.4930460498521335, "grad_norm": 0.648792564868927, "learning_rate": 1.7104539699779192e-05, "loss": 0.12471961975097656, "step": 3647 }, { "epoch": 0.4931812420785805, "grad_norm": 0.8805149793624878, "learning_rate": 1.709776275302557e-05, "loss": 0.15224552154541016, "step": 3648 }, { "epoch": 0.4933164343050275, "grad_norm": 3.027792453765869, "learning_rate": 1.7090985369506555e-05, "loss": 0.27625274658203125, "step": 3649 }, { "epoch": 0.49345162653147445, "grad_norm": 0.8288435339927673, "learning_rate": 1.708420755063323e-05, "loss": 0.1549924612045288, "step": 3650 }, { "epoch": 0.49358681875792143, "grad_norm": 0.9256892800331116, "learning_rate": 1.707742929781678e-05, "loss": 0.13416099548339844, "step": 3651 }, { "epoch": 0.4937220109843684, "grad_norm": 0.6725975275039673, "learning_rate": 1.707065061246848e-05, "loss": 0.1769275665283203, "step": 3652 }, { "epoch": 0.4938572032108154, "grad_norm": 1.0161848068237305, "learning_rate": 1.7063871495999677e-05, "loss": 0.1681079864501953, "step": 3653 }, { "epoch": 0.49399239543726237, "grad_norm": 0.9416725635528564, "learning_rate": 1.705709194982182e-05, "loss": 0.15160465240478516, "step": 3654 }, { "epoch": 0.49412758766370934, "grad_norm": 0.9839097261428833, "learning_rate": 1.7050311975346447e-05, "loss": 0.22127151489257812, "step": 3655 }, { "epoch": 0.4942627798901563, "grad_norm": 1.8227262496948242, "learning_rate": 1.704353157398519e-05, "loss": 0.16324293613433838, "step": 3656 }, { "epoch": 0.4943979721166033, "grad_norm": 0.7980266809463501, "learning_rate": 1.7036750747149764e-05, "loss": 0.14069843292236328, "step": 3657 }, { "epoch": 0.4945331643430503, "grad_norm": 1.1018174886703491, "learning_rate": 1.702996949625197e-05, "loss": 0.1280226707458496, "step": 3658 }, { "epoch": 0.49466835656949726, "grad_norm": 1.1234852075576782, "learning_rate": 1.7023187822703702e-05, "loss": 0.17341399192810059, "step": 3659 }, { "epoch": 0.49480354879594424, "grad_norm": 1.6097593307495117, "learning_rate": 1.7016405727916936e-05, "loss": 0.21378421783447266, "step": 3660 }, { "epoch": 0.4949387410223912, "grad_norm": 1.690238118171692, "learning_rate": 1.700962321330375e-05, "loss": 0.18404769897460938, "step": 3661 }, { "epoch": 0.4950739332488382, "grad_norm": 1.0678889751434326, "learning_rate": 1.700284028027629e-05, "loss": 0.18759942054748535, "step": 3662 }, { "epoch": 0.4952091254752852, "grad_norm": 0.9970592260360718, "learning_rate": 1.6996056930246807e-05, "loss": 0.18451976776123047, "step": 3663 }, { "epoch": 0.49534431770173215, "grad_norm": 2.1461830139160156, "learning_rate": 1.6989273164627626e-05, "loss": 0.1855611801147461, "step": 3664 }, { "epoch": 0.49547950992817913, "grad_norm": 1.6247203350067139, "learning_rate": 1.6982488984831163e-05, "loss": 0.19552898406982422, "step": 3665 }, { "epoch": 0.4956147021546261, "grad_norm": 0.6429634690284729, "learning_rate": 1.697570439226992e-05, "loss": 0.1354351043701172, "step": 3666 }, { "epoch": 0.4957498943810731, "grad_norm": 1.2305651903152466, "learning_rate": 1.6968919388356486e-05, "loss": 0.21979141235351562, "step": 3667 }, { "epoch": 0.49588508660752006, "grad_norm": 0.9940130710601807, "learning_rate": 1.696213397450354e-05, "loss": 0.16416454315185547, "step": 3668 }, { "epoch": 0.49602027883396704, "grad_norm": 1.403414249420166, "learning_rate": 1.695534815212384e-05, "loss": 0.18146896362304688, "step": 3669 }, { "epoch": 0.496155471060414, "grad_norm": 1.5657511949539185, "learning_rate": 1.6948561922630223e-05, "loss": 0.1825275421142578, "step": 3670 }, { "epoch": 0.496290663286861, "grad_norm": 1.4954921007156372, "learning_rate": 1.694177528743562e-05, "loss": 0.19891357421875, "step": 3671 }, { "epoch": 0.496425855513308, "grad_norm": 1.6047428846359253, "learning_rate": 1.6934988247953053e-05, "loss": 0.1822667121887207, "step": 3672 }, { "epoch": 0.49656104773975496, "grad_norm": 0.9843138456344604, "learning_rate": 1.6928200805595606e-05, "loss": 0.18076705932617188, "step": 3673 }, { "epoch": 0.49669623996620194, "grad_norm": 0.8712040185928345, "learning_rate": 1.6921412961776475e-05, "loss": 0.1888713836669922, "step": 3674 }, { "epoch": 0.4968314321926489, "grad_norm": 1.2058136463165283, "learning_rate": 1.6914624717908922e-05, "loss": 0.16952574253082275, "step": 3675 }, { "epoch": 0.4969666244190959, "grad_norm": 0.7816467881202698, "learning_rate": 1.6907836075406288e-05, "loss": 0.15458011627197266, "step": 3676 }, { "epoch": 0.49710181664554287, "grad_norm": 1.2901582717895508, "learning_rate": 1.690104703568201e-05, "loss": 0.15792584419250488, "step": 3677 }, { "epoch": 0.49723700887198985, "grad_norm": 1.174155831336975, "learning_rate": 1.68942576001496e-05, "loss": 0.15606403350830078, "step": 3678 }, { "epoch": 0.49737220109843683, "grad_norm": 1.0145599842071533, "learning_rate": 1.6887467770222658e-05, "loss": 0.17368626594543457, "step": 3679 }, { "epoch": 0.4975073933248838, "grad_norm": 0.8530884385108948, "learning_rate": 1.6880677547314865e-05, "loss": 0.17932939529418945, "step": 3680 }, { "epoch": 0.4976425855513308, "grad_norm": 1.446481466293335, "learning_rate": 1.6873886932839973e-05, "loss": 0.17049932479858398, "step": 3681 }, { "epoch": 0.49777777777777776, "grad_norm": 1.398655891418457, "learning_rate": 1.686709592821183e-05, "loss": 0.14878273010253906, "step": 3682 }, { "epoch": 0.49791297000422474, "grad_norm": 0.8881034851074219, "learning_rate": 1.6860304534844355e-05, "loss": 0.19350051879882812, "step": 3683 }, { "epoch": 0.4980481622306717, "grad_norm": 1.6435049772262573, "learning_rate": 1.6853512754151556e-05, "loss": 0.23624801635742188, "step": 3684 }, { "epoch": 0.4981833544571187, "grad_norm": 0.7483974695205688, "learning_rate": 1.684672058754752e-05, "loss": 0.17313575744628906, "step": 3685 }, { "epoch": 0.4983185466835657, "grad_norm": 1.5263569355010986, "learning_rate": 1.6839928036446416e-05, "loss": 0.2442340850830078, "step": 3686 }, { "epoch": 0.49845373891001266, "grad_norm": 1.6226603984832764, "learning_rate": 1.6833135102262473e-05, "loss": 0.17629051208496094, "step": 3687 }, { "epoch": 0.49858893113645963, "grad_norm": 1.338348388671875, "learning_rate": 1.682634178641003e-05, "loss": 0.17438220977783203, "step": 3688 }, { "epoch": 0.4987241233629066, "grad_norm": 1.0375688076019287, "learning_rate": 1.6819548090303485e-05, "loss": 0.24475574493408203, "step": 3689 }, { "epoch": 0.4988593155893536, "grad_norm": 0.9845676422119141, "learning_rate": 1.6812754015357328e-05, "loss": 0.20128250122070312, "step": 3690 }, { "epoch": 0.49899450781580057, "grad_norm": 0.7193557024002075, "learning_rate": 1.680595956298612e-05, "loss": 0.17079925537109375, "step": 3691 }, { "epoch": 0.49912970004224755, "grad_norm": 1.9443861246109009, "learning_rate": 1.6799164734604497e-05, "loss": 0.18760833144187927, "step": 3692 }, { "epoch": 0.4992648922686945, "grad_norm": 1.9532657861709595, "learning_rate": 1.6792369531627186e-05, "loss": 0.21140480041503906, "step": 3693 }, { "epoch": 0.4994000844951415, "grad_norm": 0.8906942009925842, "learning_rate": 1.6785573955468974e-05, "loss": 0.16641902923583984, "step": 3694 }, { "epoch": 0.4995352767215885, "grad_norm": 0.9688380360603333, "learning_rate": 1.6778778007544745e-05, "loss": 0.12552356719970703, "step": 3695 }, { "epoch": 0.49967046894803546, "grad_norm": 0.8391817808151245, "learning_rate": 1.6771981689269452e-05, "loss": 0.18170928955078125, "step": 3696 }, { "epoch": 0.49980566117448244, "grad_norm": 0.7562984824180603, "learning_rate": 1.6765185002058123e-05, "loss": 0.14937639236450195, "step": 3697 }, { "epoch": 0.4999408534009294, "grad_norm": 1.4676051139831543, "learning_rate": 1.6758387947325856e-05, "loss": 0.20389175415039062, "step": 3698 }, { "epoch": 0.5000760456273764, "grad_norm": 0.8659172654151917, "learning_rate": 1.6751590526487843e-05, "loss": 0.13585340976715088, "step": 3699 }, { "epoch": 0.5002112378538234, "grad_norm": 2.341825485229492, "learning_rate": 1.6744792740959347e-05, "loss": 0.21474266052246094, "step": 3700 }, { "epoch": 0.5003464300802704, "grad_norm": 1.3531475067138672, "learning_rate": 1.6737994592155697e-05, "loss": 0.2002429962158203, "step": 3701 }, { "epoch": 0.5004816223067173, "grad_norm": 0.9007554650306702, "learning_rate": 1.6731196081492307e-05, "loss": 0.16272640228271484, "step": 3702 }, { "epoch": 0.5006168145331643, "grad_norm": 1.2967593669891357, "learning_rate": 1.6724397210384655e-05, "loss": 0.13285541534423828, "step": 3703 }, { "epoch": 0.5007520067596113, "grad_norm": 1.3202705383300781, "learning_rate": 1.6717597980248308e-05, "loss": 0.19582271575927734, "step": 3704 }, { "epoch": 0.5008871989860583, "grad_norm": 0.8645565509796143, "learning_rate": 1.6710798392498904e-05, "loss": 0.18326187133789062, "step": 3705 }, { "epoch": 0.5010223912125052, "grad_norm": 2.8827970027923584, "learning_rate": 1.6703998448552154e-05, "loss": 0.2470703125, "step": 3706 }, { "epoch": 0.5011575834389522, "grad_norm": 2.1755011081695557, "learning_rate": 1.669719814982384e-05, "loss": 0.1576862335205078, "step": 3707 }, { "epoch": 0.5012927756653992, "grad_norm": 1.4156084060668945, "learning_rate": 1.6690397497729818e-05, "loss": 0.23415184020996094, "step": 3708 }, { "epoch": 0.5014279678918462, "grad_norm": 1.570707082748413, "learning_rate": 1.6683596493686028e-05, "loss": 0.19475746154785156, "step": 3709 }, { "epoch": 0.5015631601182932, "grad_norm": 0.8530634045600891, "learning_rate": 1.667679513910846e-05, "loss": 0.17422962188720703, "step": 3710 }, { "epoch": 0.5016983523447401, "grad_norm": 1.0972626209259033, "learning_rate": 1.666999343541321e-05, "loss": 0.16504573822021484, "step": 3711 }, { "epoch": 0.5018335445711871, "grad_norm": 1.0667897462844849, "learning_rate": 1.6663191384016422e-05, "loss": 0.19575119018554688, "step": 3712 }, { "epoch": 0.5019687367976341, "grad_norm": 2.908559560775757, "learning_rate": 1.6656388986334315e-05, "loss": 0.18538391590118408, "step": 3713 }, { "epoch": 0.5021039290240811, "grad_norm": 0.9312616586685181, "learning_rate": 1.6649586243783186e-05, "loss": 0.21172237396240234, "step": 3714 }, { "epoch": 0.502239121250528, "grad_norm": 1.4984121322631836, "learning_rate": 1.6642783157779405e-05, "loss": 0.17607545852661133, "step": 3715 }, { "epoch": 0.502374313476975, "grad_norm": 0.6843975782394409, "learning_rate": 1.6635979729739417e-05, "loss": 0.1334661841392517, "step": 3716 }, { "epoch": 0.502509505703422, "grad_norm": 1.4057646989822388, "learning_rate": 1.662917596107972e-05, "loss": 0.23779678344726562, "step": 3717 }, { "epoch": 0.502644697929869, "grad_norm": 0.9410556554794312, "learning_rate": 1.6622371853216904e-05, "loss": 0.17829036712646484, "step": 3718 }, { "epoch": 0.502779890156316, "grad_norm": 1.2214949131011963, "learning_rate": 1.661556740756761e-05, "loss": 0.1530008316040039, "step": 3719 }, { "epoch": 0.502915082382763, "grad_norm": 0.9343261122703552, "learning_rate": 1.6608762625548572e-05, "loss": 0.19121551513671875, "step": 3720 }, { "epoch": 0.5030502746092099, "grad_norm": 1.7384676933288574, "learning_rate": 1.6601957508576573e-05, "loss": 0.21249008178710938, "step": 3721 }, { "epoch": 0.5031854668356569, "grad_norm": 1.3669605255126953, "learning_rate": 1.659515205806848e-05, "loss": 0.1611955165863037, "step": 3722 }, { "epoch": 0.5033206590621039, "grad_norm": 1.129101037979126, "learning_rate": 1.6588346275441224e-05, "loss": 0.15634536743164062, "step": 3723 }, { "epoch": 0.5034558512885509, "grad_norm": 0.840228259563446, "learning_rate": 1.65815401621118e-05, "loss": 0.1559734344482422, "step": 3724 }, { "epoch": 0.5035910435149978, "grad_norm": 0.9109746217727661, "learning_rate": 1.657473371949728e-05, "loss": 0.14329051971435547, "step": 3725 }, { "epoch": 0.5037262357414448, "grad_norm": 0.9348533749580383, "learning_rate": 1.6567926949014805e-05, "loss": 0.18874645233154297, "step": 3726 }, { "epoch": 0.5038614279678918, "grad_norm": 1.6442244052886963, "learning_rate": 1.6561119852081574e-05, "loss": 0.1718158721923828, "step": 3727 }, { "epoch": 0.5039966201943388, "grad_norm": 1.7377567291259766, "learning_rate": 1.6554312430114868e-05, "loss": 0.16811561584472656, "step": 3728 }, { "epoch": 0.5041318124207858, "grad_norm": 0.892093300819397, "learning_rate": 1.6547504684532026e-05, "loss": 0.09259796142578125, "step": 3729 }, { "epoch": 0.5042670046472327, "grad_norm": 1.0890238285064697, "learning_rate": 1.6540696616750454e-05, "loss": 0.14309978485107422, "step": 3730 }, { "epoch": 0.5044021968736797, "grad_norm": 1.2063267230987549, "learning_rate": 1.6533888228187628e-05, "loss": 0.1577291488647461, "step": 3731 }, { "epoch": 0.5045373891001267, "grad_norm": 0.7079459428787231, "learning_rate": 1.6527079520261103e-05, "loss": 0.1163473129272461, "step": 3732 }, { "epoch": 0.5046725813265738, "grad_norm": 1.76418936252594, "learning_rate": 1.6520270494388472e-05, "loss": 0.23038387298583984, "step": 3733 }, { "epoch": 0.5048077735530208, "grad_norm": 1.257030725479126, "learning_rate": 1.6513461151987418e-05, "loss": 0.1804513931274414, "step": 3734 }, { "epoch": 0.5049429657794677, "grad_norm": 2.04951810836792, "learning_rate": 1.6506651494475678e-05, "loss": 0.17595577239990234, "step": 3735 }, { "epoch": 0.5050781580059147, "grad_norm": 1.092045783996582, "learning_rate": 1.6499841523271062e-05, "loss": 0.17078399658203125, "step": 3736 }, { "epoch": 0.5052133502323617, "grad_norm": 1.4462261199951172, "learning_rate": 1.649303123979145e-05, "loss": 0.1940937042236328, "step": 3737 }, { "epoch": 0.5053485424588087, "grad_norm": 1.9332692623138428, "learning_rate": 1.648622064545477e-05, "loss": 0.1506175994873047, "step": 3738 }, { "epoch": 0.5054837346852556, "grad_norm": 0.7635692358016968, "learning_rate": 1.6479409741679025e-05, "loss": 0.17371559143066406, "step": 3739 }, { "epoch": 0.5056189269117026, "grad_norm": 1.1756696701049805, "learning_rate": 1.6472598529882277e-05, "loss": 0.15775585174560547, "step": 3740 }, { "epoch": 0.5057541191381496, "grad_norm": 0.6450092792510986, "learning_rate": 1.646578701148267e-05, "loss": 0.1482095718383789, "step": 3741 }, { "epoch": 0.5058893113645966, "grad_norm": 1.0582799911499023, "learning_rate": 1.6458975187898384e-05, "loss": 0.21964550018310547, "step": 3742 }, { "epoch": 0.5060245035910436, "grad_norm": 0.8152438998222351, "learning_rate": 1.6452163060547687e-05, "loss": 0.189666748046875, "step": 3743 }, { "epoch": 0.5061596958174905, "grad_norm": 0.7217793464660645, "learning_rate": 1.64453506308489e-05, "loss": 0.1564791202545166, "step": 3744 }, { "epoch": 0.5062948880439375, "grad_norm": 3.440624952316284, "learning_rate": 1.64385379002204e-05, "loss": 0.24029541015625, "step": 3745 }, { "epoch": 0.5064300802703845, "grad_norm": 0.9713578224182129, "learning_rate": 1.643172487008064e-05, "loss": 0.16650390625, "step": 3746 }, { "epoch": 0.5065652724968315, "grad_norm": 1.1922633647918701, "learning_rate": 1.6424911541848124e-05, "loss": 0.1462726593017578, "step": 3747 }, { "epoch": 0.5067004647232785, "grad_norm": 1.1119835376739502, "learning_rate": 1.641809791694143e-05, "loss": 0.18700838088989258, "step": 3748 }, { "epoch": 0.5068356569497254, "grad_norm": 1.0173624753952026, "learning_rate": 1.6411283996779184e-05, "loss": 0.19211769104003906, "step": 3749 }, { "epoch": 0.5069708491761724, "grad_norm": 1.719925045967102, "learning_rate": 1.6404469782780088e-05, "loss": 0.1684722900390625, "step": 3750 }, { "epoch": 0.5071060414026194, "grad_norm": 1.3538093566894531, "learning_rate": 1.639765527636289e-05, "loss": 0.2041792869567871, "step": 3751 }, { "epoch": 0.5072412336290664, "grad_norm": 0.927183985710144, "learning_rate": 1.639084047894641e-05, "loss": 0.19186663627624512, "step": 3752 }, { "epoch": 0.5073764258555133, "grad_norm": 1.4545674324035645, "learning_rate": 1.638402539194953e-05, "loss": 0.18865203857421875, "step": 3753 }, { "epoch": 0.5075116180819603, "grad_norm": 0.9324459433555603, "learning_rate": 1.6377210016791182e-05, "loss": 0.15943527221679688, "step": 3754 }, { "epoch": 0.5076468103084073, "grad_norm": 1.7523940801620483, "learning_rate": 1.6370394354890364e-05, "loss": 0.19582366943359375, "step": 3755 }, { "epoch": 0.5077820025348543, "grad_norm": 1.3089617490768433, "learning_rate": 1.636357840766613e-05, "loss": 0.2016620635986328, "step": 3756 }, { "epoch": 0.5079171947613013, "grad_norm": 0.8876031637191772, "learning_rate": 1.6356762176537606e-05, "loss": 0.11628293991088867, "step": 3757 }, { "epoch": 0.5080523869877482, "grad_norm": 1.1217848062515259, "learning_rate": 1.6349945662923953e-05, "loss": 0.1965618133544922, "step": 3758 }, { "epoch": 0.5081875792141952, "grad_norm": 0.9714937806129456, "learning_rate": 1.634312886824442e-05, "loss": 0.18343448638916016, "step": 3759 }, { "epoch": 0.5083227714406422, "grad_norm": 0.7153787612915039, "learning_rate": 1.6336311793918298e-05, "loss": 0.16303634643554688, "step": 3760 }, { "epoch": 0.5084579636670892, "grad_norm": 1.2874958515167236, "learning_rate": 1.6329494441364925e-05, "loss": 0.21447038650512695, "step": 3761 }, { "epoch": 0.5085931558935362, "grad_norm": 0.9138535857200623, "learning_rate": 1.6322676812003727e-05, "loss": 0.15356206893920898, "step": 3762 }, { "epoch": 0.5087283481199831, "grad_norm": 1.6435976028442383, "learning_rate": 1.631585890725416e-05, "loss": 0.17088913917541504, "step": 3763 }, { "epoch": 0.5088635403464301, "grad_norm": 1.1555720567703247, "learning_rate": 1.630904072853575e-05, "loss": 0.15492010116577148, "step": 3764 }, { "epoch": 0.5089987325728771, "grad_norm": 1.2766672372817993, "learning_rate": 1.6302222277268085e-05, "loss": 0.2268085479736328, "step": 3765 }, { "epoch": 0.5091339247993241, "grad_norm": 0.8496496081352234, "learning_rate": 1.6295403554870794e-05, "loss": 0.1459789276123047, "step": 3766 }, { "epoch": 0.509269117025771, "grad_norm": 1.3848164081573486, "learning_rate": 1.6288584562763572e-05, "loss": 0.2046680450439453, "step": 3767 }, { "epoch": 0.509404309252218, "grad_norm": 2.6479380130767822, "learning_rate": 1.6281765302366176e-05, "loss": 0.24046897888183594, "step": 3768 }, { "epoch": 0.509539501478665, "grad_norm": 1.484397530555725, "learning_rate": 1.6274945775098412e-05, "loss": 0.16905546188354492, "step": 3769 }, { "epoch": 0.509674693705112, "grad_norm": 0.9125504493713379, "learning_rate": 1.6268125982380135e-05, "loss": 0.15143680572509766, "step": 3770 }, { "epoch": 0.509809885931559, "grad_norm": 1.1373809576034546, "learning_rate": 1.626130592563127e-05, "loss": 0.23040485382080078, "step": 3771 }, { "epoch": 0.5099450781580059, "grad_norm": 0.9369140267372131, "learning_rate": 1.6254485606271778e-05, "loss": 0.13931798934936523, "step": 3772 }, { "epoch": 0.5100802703844529, "grad_norm": 1.2104058265686035, "learning_rate": 1.6247665025721698e-05, "loss": 0.2452993392944336, "step": 3773 }, { "epoch": 0.5102154626108999, "grad_norm": 1.185890555381775, "learning_rate": 1.62408441854011e-05, "loss": 0.2255420684814453, "step": 3774 }, { "epoch": 0.5103506548373469, "grad_norm": 0.9688547849655151, "learning_rate": 1.6234023086730136e-05, "loss": 0.1433429718017578, "step": 3775 }, { "epoch": 0.5104858470637939, "grad_norm": 0.9642335772514343, "learning_rate": 1.622720173112898e-05, "loss": 0.16822528839111328, "step": 3776 }, { "epoch": 0.5106210392902408, "grad_norm": 1.0118387937545776, "learning_rate": 1.6220380120017874e-05, "loss": 0.15408706665039062, "step": 3777 }, { "epoch": 0.5107562315166878, "grad_norm": 2.235281467437744, "learning_rate": 1.6213558254817128e-05, "loss": 0.1883697509765625, "step": 3778 }, { "epoch": 0.5108914237431348, "grad_norm": 1.6634868383407593, "learning_rate": 1.6206736136947074e-05, "loss": 0.1699237823486328, "step": 3779 }, { "epoch": 0.5110266159695818, "grad_norm": 0.9525468349456787, "learning_rate": 1.6199913767828126e-05, "loss": 0.17570972442626953, "step": 3780 }, { "epoch": 0.5111618081960287, "grad_norm": 2.252650499343872, "learning_rate": 1.6193091148880733e-05, "loss": 0.1870652437210083, "step": 3781 }, { "epoch": 0.5112970004224757, "grad_norm": 1.009068489074707, "learning_rate": 1.61862682815254e-05, "loss": 0.18193817138671875, "step": 3782 }, { "epoch": 0.5114321926489227, "grad_norm": 1.1845040321350098, "learning_rate": 1.617944516718268e-05, "loss": 0.17667770385742188, "step": 3783 }, { "epoch": 0.5115673848753697, "grad_norm": 0.9172955751419067, "learning_rate": 1.617262180727319e-05, "loss": 0.24310684204101562, "step": 3784 }, { "epoch": 0.5117025771018167, "grad_norm": 3.2803969383239746, "learning_rate": 1.6165798203217588e-05, "loss": 0.17811203002929688, "step": 3785 }, { "epoch": 0.5118377693282636, "grad_norm": 0.9216747283935547, "learning_rate": 1.6158974356436585e-05, "loss": 0.19390869140625, "step": 3786 }, { "epoch": 0.5119729615547106, "grad_norm": 1.0886200666427612, "learning_rate": 1.6152150268350938e-05, "loss": 0.2002582550048828, "step": 3787 }, { "epoch": 0.5121081537811576, "grad_norm": 0.9912815093994141, "learning_rate": 1.6145325940381458e-05, "loss": 0.1157693862915039, "step": 3788 }, { "epoch": 0.5122433460076046, "grad_norm": 1.2103458642959595, "learning_rate": 1.6138501373949018e-05, "loss": 0.207794189453125, "step": 3789 }, { "epoch": 0.5123785382340516, "grad_norm": 0.9280535578727722, "learning_rate": 1.613167657047451e-05, "loss": 0.13831615447998047, "step": 3790 }, { "epoch": 0.5125137304604985, "grad_norm": 1.5934362411499023, "learning_rate": 1.612485153137891e-05, "loss": 0.10633134841918945, "step": 3791 }, { "epoch": 0.5126489226869455, "grad_norm": 2.1842994689941406, "learning_rate": 1.611802625808323e-05, "loss": 0.17312049865722656, "step": 3792 }, { "epoch": 0.5127841149133925, "grad_norm": 2.5150809288024902, "learning_rate": 1.611120075200851e-05, "loss": 0.20072698593139648, "step": 3793 }, { "epoch": 0.5129193071398395, "grad_norm": 1.4305858612060547, "learning_rate": 1.610437501457587e-05, "loss": 0.19089317321777344, "step": 3794 }, { "epoch": 0.5130544993662864, "grad_norm": 2.1283395290374756, "learning_rate": 1.6097549047206464e-05, "loss": 0.18995332717895508, "step": 3795 }, { "epoch": 0.5131896915927334, "grad_norm": 1.5025591850280762, "learning_rate": 1.6090722851321497e-05, "loss": 0.15617609024047852, "step": 3796 }, { "epoch": 0.5133248838191804, "grad_norm": 1.674791932106018, "learning_rate": 1.6083896428342213e-05, "loss": 0.14823579788208008, "step": 3797 }, { "epoch": 0.5134600760456274, "grad_norm": 0.7367554903030396, "learning_rate": 1.6077069779689915e-05, "loss": 0.14664316177368164, "step": 3798 }, { "epoch": 0.5135952682720744, "grad_norm": 0.8756303787231445, "learning_rate": 1.607024290678594e-05, "loss": 0.13385021686553955, "step": 3799 }, { "epoch": 0.5137304604985213, "grad_norm": 0.7423316836357117, "learning_rate": 1.6063415811051686e-05, "loss": 0.17638158798217773, "step": 3800 }, { "epoch": 0.5138656527249683, "grad_norm": 1.1345577239990234, "learning_rate": 1.6056588493908596e-05, "loss": 0.18978214263916016, "step": 3801 }, { "epoch": 0.5140008449514153, "grad_norm": 1.9460718631744385, "learning_rate": 1.604976095677814e-05, "loss": 0.23801612854003906, "step": 3802 }, { "epoch": 0.5141360371778623, "grad_norm": 0.9300512671470642, "learning_rate": 1.604293320108186e-05, "loss": 0.12186884880065918, "step": 3803 }, { "epoch": 0.5142712294043092, "grad_norm": 1.2782626152038574, "learning_rate": 1.603610522824132e-05, "loss": 0.1538238525390625, "step": 3804 }, { "epoch": 0.5144064216307562, "grad_norm": 1.2012690305709839, "learning_rate": 1.6029277039678153e-05, "loss": 0.17549800872802734, "step": 3805 }, { "epoch": 0.5145416138572032, "grad_norm": 1.1604268550872803, "learning_rate": 1.602244863681401e-05, "loss": 0.16648483276367188, "step": 3806 }, { "epoch": 0.5146768060836502, "grad_norm": 0.9503032565116882, "learning_rate": 1.6015620021070613e-05, "loss": 0.16939926147460938, "step": 3807 }, { "epoch": 0.5148119983100972, "grad_norm": 1.2746299505233765, "learning_rate": 1.6008791193869714e-05, "loss": 0.14973747730255127, "step": 3808 }, { "epoch": 0.5149471905365441, "grad_norm": 1.1287305355072021, "learning_rate": 1.6001962156633102e-05, "loss": 0.1815018653869629, "step": 3809 }, { "epoch": 0.5150823827629911, "grad_norm": 1.3691349029541016, "learning_rate": 1.5995132910782632e-05, "loss": 0.19733047485351562, "step": 3810 }, { "epoch": 0.5152175749894381, "grad_norm": 0.9607386589050293, "learning_rate": 1.5988303457740178e-05, "loss": 0.15164661407470703, "step": 3811 }, { "epoch": 0.5153527672158851, "grad_norm": 1.290601134300232, "learning_rate": 1.598147379892768e-05, "loss": 0.12627220153808594, "step": 3812 }, { "epoch": 0.515487959442332, "grad_norm": 0.7313801050186157, "learning_rate": 1.5974643935767098e-05, "loss": 0.15091419219970703, "step": 3813 }, { "epoch": 0.515623151668779, "grad_norm": 1.0458593368530273, "learning_rate": 1.5967813869680452e-05, "loss": 0.17737579345703125, "step": 3814 }, { "epoch": 0.515758343895226, "grad_norm": 1.1229420900344849, "learning_rate": 1.59609836020898e-05, "loss": 0.19245529174804688, "step": 3815 }, { "epoch": 0.515893536121673, "grad_norm": 1.4036811590194702, "learning_rate": 1.5954153134417236e-05, "loss": 0.201324462890625, "step": 3816 }, { "epoch": 0.51602872834812, "grad_norm": 1.4691399335861206, "learning_rate": 1.59473224680849e-05, "loss": 0.1512455940246582, "step": 3817 }, { "epoch": 0.516163920574567, "grad_norm": 0.9885697960853577, "learning_rate": 1.5940491604514976e-05, "loss": 0.15338516235351562, "step": 3818 }, { "epoch": 0.5162991128010139, "grad_norm": 2.1241343021392822, "learning_rate": 1.5933660545129683e-05, "loss": 0.2154073715209961, "step": 3819 }, { "epoch": 0.5164343050274609, "grad_norm": 1.6376478672027588, "learning_rate": 1.5926829291351288e-05, "loss": 0.1667041778564453, "step": 3820 }, { "epoch": 0.5165694972539079, "grad_norm": 1.0530023574829102, "learning_rate": 1.591999784460209e-05, "loss": 0.14536833763122559, "step": 3821 }, { "epoch": 0.5167046894803549, "grad_norm": 1.5427544116973877, "learning_rate": 1.5913166206304435e-05, "loss": 0.22985076904296875, "step": 3822 }, { "epoch": 0.5168398817068018, "grad_norm": 1.008499026298523, "learning_rate": 1.5906334377880707e-05, "loss": 0.15213680267333984, "step": 3823 }, { "epoch": 0.5169750739332488, "grad_norm": 1.6367738246917725, "learning_rate": 1.589950236075333e-05, "loss": 0.2294178009033203, "step": 3824 }, { "epoch": 0.5171102661596958, "grad_norm": 1.2452484369277954, "learning_rate": 1.5892670156344764e-05, "loss": 0.18032073974609375, "step": 3825 }, { "epoch": 0.5172454583861428, "grad_norm": 1.0863773822784424, "learning_rate": 1.588583776607751e-05, "loss": 0.16354870796203613, "step": 3826 }, { "epoch": 0.5173806506125898, "grad_norm": 2.9368748664855957, "learning_rate": 1.5879005191374106e-05, "loss": 0.1796102523803711, "step": 3827 }, { "epoch": 0.5175158428390367, "grad_norm": 1.875327467918396, "learning_rate": 1.587217243365714e-05, "loss": 0.2396857738494873, "step": 3828 }, { "epoch": 0.5176510350654837, "grad_norm": 2.1258962154388428, "learning_rate": 1.586533949434922e-05, "loss": 0.14569997787475586, "step": 3829 }, { "epoch": 0.5177862272919307, "grad_norm": 2.010382890701294, "learning_rate": 1.5858506374872998e-05, "loss": 0.17071533203125, "step": 3830 }, { "epoch": 0.5179214195183777, "grad_norm": 1.021146297454834, "learning_rate": 1.5851673076651178e-05, "loss": 0.17508220672607422, "step": 3831 }, { "epoch": 0.5180566117448246, "grad_norm": 1.0530067682266235, "learning_rate": 1.5844839601106477e-05, "loss": 0.2031698226928711, "step": 3832 }, { "epoch": 0.5181918039712716, "grad_norm": 0.8794388175010681, "learning_rate": 1.583800594966167e-05, "loss": 0.18463897705078125, "step": 3833 }, { "epoch": 0.5183269961977186, "grad_norm": 1.0238717794418335, "learning_rate": 1.583117212373955e-05, "loss": 0.18951892852783203, "step": 3834 }, { "epoch": 0.5184621884241656, "grad_norm": 1.1634663343429565, "learning_rate": 1.5824338124762967e-05, "loss": 0.14718055725097656, "step": 3835 }, { "epoch": 0.5185973806506126, "grad_norm": 0.9456940293312073, "learning_rate": 1.581750395415479e-05, "loss": 0.13039398193359375, "step": 3836 }, { "epoch": 0.5187325728770595, "grad_norm": 1.6109614372253418, "learning_rate": 1.5810669613337922e-05, "loss": 0.2257823944091797, "step": 3837 }, { "epoch": 0.5188677651035065, "grad_norm": 0.9401279091835022, "learning_rate": 1.5803835103735327e-05, "loss": 0.15442514419555664, "step": 3838 }, { "epoch": 0.5190029573299535, "grad_norm": 1.3372737169265747, "learning_rate": 1.5797000426769973e-05, "loss": 0.18842506408691406, "step": 3839 }, { "epoch": 0.5191381495564005, "grad_norm": 1.5018569231033325, "learning_rate": 1.579016558386488e-05, "loss": 0.2101898193359375, "step": 3840 }, { "epoch": 0.5192733417828475, "grad_norm": 2.964165210723877, "learning_rate": 1.5783330576443096e-05, "loss": 0.19091320037841797, "step": 3841 }, { "epoch": 0.5194085340092944, "grad_norm": 1.3822510242462158, "learning_rate": 1.5776495405927716e-05, "loss": 0.1699199676513672, "step": 3842 }, { "epoch": 0.5195437262357414, "grad_norm": 1.023774266242981, "learning_rate": 1.5769660073741844e-05, "loss": 0.19399261474609375, "step": 3843 }, { "epoch": 0.5196789184621884, "grad_norm": 2.095381259918213, "learning_rate": 1.5762824581308645e-05, "loss": 0.1503143310546875, "step": 3844 }, { "epoch": 0.5198141106886354, "grad_norm": 1.578182578086853, "learning_rate": 1.5755988930051304e-05, "loss": 0.16045784950256348, "step": 3845 }, { "epoch": 0.5199493029150823, "grad_norm": 0.5900046825408936, "learning_rate": 1.5749153121393025e-05, "loss": 0.11719131469726562, "step": 3846 }, { "epoch": 0.5200844951415293, "grad_norm": 0.9304651021957397, "learning_rate": 1.574231715675708e-05, "loss": 0.1772136688232422, "step": 3847 }, { "epoch": 0.5202196873679763, "grad_norm": 0.824239194393158, "learning_rate": 1.573548103756674e-05, "loss": 0.14716565608978271, "step": 3848 }, { "epoch": 0.5203548795944233, "grad_norm": 0.9432123303413391, "learning_rate": 1.572864476524533e-05, "loss": 0.17438125610351562, "step": 3849 }, { "epoch": 0.5204900718208703, "grad_norm": 1.3992304801940918, "learning_rate": 1.5721808341216195e-05, "loss": 0.11055135726928711, "step": 3850 }, { "epoch": 0.5206252640473172, "grad_norm": 0.7463445067405701, "learning_rate": 1.571497176690271e-05, "loss": 0.20017051696777344, "step": 3851 }, { "epoch": 0.5207604562737642, "grad_norm": 1.0995190143585205, "learning_rate": 1.570813504372829e-05, "loss": 0.20088672637939453, "step": 3852 }, { "epoch": 0.5208956485002112, "grad_norm": 1.2843900918960571, "learning_rate": 1.570129817311638e-05, "loss": 0.19963359832763672, "step": 3853 }, { "epoch": 0.5210308407266582, "grad_norm": 1.325605869293213, "learning_rate": 1.5694461156490452e-05, "loss": 0.20557022094726562, "step": 3854 }, { "epoch": 0.5211660329531052, "grad_norm": 0.8538478016853333, "learning_rate": 1.5687623995274008e-05, "loss": 0.11966896057128906, "step": 3855 }, { "epoch": 0.5213012251795521, "grad_norm": 0.7925046682357788, "learning_rate": 1.568078669089058e-05, "loss": 0.18639755249023438, "step": 3856 }, { "epoch": 0.5214364174059991, "grad_norm": 1.9237161874771118, "learning_rate": 1.567394924476373e-05, "loss": 0.20045089721679688, "step": 3857 }, { "epoch": 0.5215716096324461, "grad_norm": 1.660418152809143, "learning_rate": 1.5667111658317057e-05, "loss": 0.1979236602783203, "step": 3858 }, { "epoch": 0.5217068018588931, "grad_norm": 0.50294429063797, "learning_rate": 1.5660273932974177e-05, "loss": 0.12451362609863281, "step": 3859 }, { "epoch": 0.52184199408534, "grad_norm": 2.575995445251465, "learning_rate": 1.5653436070158743e-05, "loss": 0.1822052001953125, "step": 3860 }, { "epoch": 0.521977186311787, "grad_norm": 1.4888801574707031, "learning_rate": 1.564659807129444e-05, "loss": 0.18964385986328125, "step": 3861 }, { "epoch": 0.522112378538234, "grad_norm": 0.8506982326507568, "learning_rate": 1.5639759937804962e-05, "loss": 0.16748619079589844, "step": 3862 }, { "epoch": 0.522247570764681, "grad_norm": 1.2662361860275269, "learning_rate": 1.5632921671114055e-05, "loss": 0.20804977416992188, "step": 3863 }, { "epoch": 0.522382762991128, "grad_norm": 0.7970473170280457, "learning_rate": 1.5626083272645485e-05, "loss": 0.17293357849121094, "step": 3864 }, { "epoch": 0.5225179552175749, "grad_norm": 1.7305662631988525, "learning_rate": 1.5619244743823038e-05, "loss": 0.1803497076034546, "step": 3865 }, { "epoch": 0.5226531474440219, "grad_norm": 1.322920799255371, "learning_rate": 1.5612406086070534e-05, "loss": 0.20030593872070312, "step": 3866 }, { "epoch": 0.5227883396704689, "grad_norm": 0.6300215125083923, "learning_rate": 1.560556730081181e-05, "loss": 0.15634965896606445, "step": 3867 }, { "epoch": 0.5229235318969159, "grad_norm": 1.0737369060516357, "learning_rate": 1.5598728389470754e-05, "loss": 0.2353343963623047, "step": 3868 }, { "epoch": 0.523058724123363, "grad_norm": 1.2187846899032593, "learning_rate": 1.5591889353471245e-05, "loss": 0.17796707153320312, "step": 3869 }, { "epoch": 0.5231939163498099, "grad_norm": 1.4197511672973633, "learning_rate": 1.5585050194237226e-05, "loss": 0.1568613052368164, "step": 3870 }, { "epoch": 0.5233291085762569, "grad_norm": 1.05025315284729, "learning_rate": 1.557821091319263e-05, "loss": 0.22429275512695312, "step": 3871 }, { "epoch": 0.5234643008027039, "grad_norm": 2.2244699001312256, "learning_rate": 1.5571371511761446e-05, "loss": 0.20125532150268555, "step": 3872 }, { "epoch": 0.5235994930291509, "grad_norm": 1.295419454574585, "learning_rate": 1.5564531991367658e-05, "loss": 0.16208553314208984, "step": 3873 }, { "epoch": 0.5237346852555979, "grad_norm": 1.144305944442749, "learning_rate": 1.5557692353435302e-05, "loss": 0.17605972290039062, "step": 3874 }, { "epoch": 0.5238698774820448, "grad_norm": 0.6235769987106323, "learning_rate": 1.5550852599388424e-05, "loss": 0.13022327423095703, "step": 3875 }, { "epoch": 0.5240050697084918, "grad_norm": 2.545581817626953, "learning_rate": 1.5544012730651096e-05, "loss": 0.2225794792175293, "step": 3876 }, { "epoch": 0.5241402619349388, "grad_norm": 0.7501353621482849, "learning_rate": 1.5537172748647422e-05, "loss": 0.13853168487548828, "step": 3877 }, { "epoch": 0.5242754541613858, "grad_norm": 1.1100043058395386, "learning_rate": 1.553033265480151e-05, "loss": 0.17278385162353516, "step": 3878 }, { "epoch": 0.5244106463878327, "grad_norm": 1.5144108533859253, "learning_rate": 1.552349245053752e-05, "loss": 0.2465207576751709, "step": 3879 }, { "epoch": 0.5245458386142797, "grad_norm": 1.1437766551971436, "learning_rate": 1.5516652137279597e-05, "loss": 0.1776876449584961, "step": 3880 }, { "epoch": 0.5246810308407267, "grad_norm": 0.7986635565757751, "learning_rate": 1.5509811716451955e-05, "loss": 0.1603860855102539, "step": 3881 }, { "epoch": 0.5248162230671737, "grad_norm": 0.7467828392982483, "learning_rate": 1.550297118947879e-05, "loss": 0.16000699996948242, "step": 3882 }, { "epoch": 0.5249514152936207, "grad_norm": 1.5045711994171143, "learning_rate": 1.5496130557784343e-05, "loss": 0.17192697525024414, "step": 3883 }, { "epoch": 0.5250866075200676, "grad_norm": 2.0142416954040527, "learning_rate": 1.5489289822792868e-05, "loss": 0.15709877014160156, "step": 3884 }, { "epoch": 0.5252217997465146, "grad_norm": 1.7170177698135376, "learning_rate": 1.5482448985928645e-05, "loss": 0.1925201416015625, "step": 3885 }, { "epoch": 0.5253569919729616, "grad_norm": 0.6801953911781311, "learning_rate": 1.5475608048615964e-05, "loss": 0.13994693756103516, "step": 3886 }, { "epoch": 0.5254921841994086, "grad_norm": 1.4294174909591675, "learning_rate": 1.546876701227916e-05, "loss": 0.17708587646484375, "step": 3887 }, { "epoch": 0.5256273764258556, "grad_norm": 0.9180756211280823, "learning_rate": 1.5461925878342558e-05, "loss": 0.20695972442626953, "step": 3888 }, { "epoch": 0.5257625686523025, "grad_norm": 0.8466778993606567, "learning_rate": 1.5455084648230527e-05, "loss": 0.20012283325195312, "step": 3889 }, { "epoch": 0.5258977608787495, "grad_norm": 1.1864982843399048, "learning_rate": 1.5448243323367438e-05, "loss": 0.20632076263427734, "step": 3890 }, { "epoch": 0.5260329531051965, "grad_norm": 1.3084194660186768, "learning_rate": 1.544140190517771e-05, "loss": 0.2099590301513672, "step": 3891 }, { "epoch": 0.5261681453316435, "grad_norm": 0.9205005764961243, "learning_rate": 1.5434560395085745e-05, "loss": 0.1709880828857422, "step": 3892 }, { "epoch": 0.5263033375580904, "grad_norm": 0.9179989695549011, "learning_rate": 1.542771879451599e-05, "loss": 0.17632102966308594, "step": 3893 }, { "epoch": 0.5264385297845374, "grad_norm": 0.6690536737442017, "learning_rate": 1.54208771048929e-05, "loss": 0.1686573028564453, "step": 3894 }, { "epoch": 0.5265737220109844, "grad_norm": 1.3778417110443115, "learning_rate": 1.5414035327640958e-05, "loss": 0.21535873413085938, "step": 3895 }, { "epoch": 0.5267089142374314, "grad_norm": 0.7535449862480164, "learning_rate": 1.5407193464184644e-05, "loss": 0.1034994125366211, "step": 3896 }, { "epoch": 0.5268441064638784, "grad_norm": 2.7647576332092285, "learning_rate": 1.5400351515948485e-05, "loss": 0.18512439727783203, "step": 3897 }, { "epoch": 0.5269792986903253, "grad_norm": 1.2415040731430054, "learning_rate": 1.5393509484357006e-05, "loss": 0.21524810791015625, "step": 3898 }, { "epoch": 0.5271144909167723, "grad_norm": 1.24103581905365, "learning_rate": 1.538666737083475e-05, "loss": 0.18302297592163086, "step": 3899 }, { "epoch": 0.5272496831432193, "grad_norm": 0.8943217992782593, "learning_rate": 1.537982517680629e-05, "loss": 0.16332626342773438, "step": 3900 }, { "epoch": 0.5273848753696663, "grad_norm": 1.4180735349655151, "learning_rate": 1.5372982903696196e-05, "loss": 0.1447221338748932, "step": 3901 }, { "epoch": 0.5275200675961133, "grad_norm": 1.6521670818328857, "learning_rate": 1.536614055292908e-05, "loss": 0.18349266052246094, "step": 3902 }, { "epoch": 0.5276552598225602, "grad_norm": 2.2906532287597656, "learning_rate": 1.535929812592955e-05, "loss": 0.1721649169921875, "step": 3903 }, { "epoch": 0.5277904520490072, "grad_norm": 0.9075860381126404, "learning_rate": 1.5352455624122227e-05, "loss": 0.16194629669189453, "step": 3904 }, { "epoch": 0.5279256442754542, "grad_norm": 2.6471893787384033, "learning_rate": 1.5345613048931765e-05, "loss": 0.207489013671875, "step": 3905 }, { "epoch": 0.5280608365019012, "grad_norm": 2.408668279647827, "learning_rate": 1.5338770401782822e-05, "loss": 0.1840343475341797, "step": 3906 }, { "epoch": 0.5281960287283481, "grad_norm": 2.299936294555664, "learning_rate": 1.5331927684100077e-05, "loss": 0.17118358612060547, "step": 3907 }, { "epoch": 0.5283312209547951, "grad_norm": 1.0176359415054321, "learning_rate": 1.5325084897308218e-05, "loss": 0.1513686180114746, "step": 3908 }, { "epoch": 0.5284664131812421, "grad_norm": 0.8170287609100342, "learning_rate": 1.5318242042831952e-05, "loss": 0.14023494720458984, "step": 3909 }, { "epoch": 0.5286016054076891, "grad_norm": 1.2909241914749146, "learning_rate": 1.5311399122095992e-05, "loss": 0.1741032600402832, "step": 3910 }, { "epoch": 0.5287367976341361, "grad_norm": 1.2220346927642822, "learning_rate": 1.5304556136525074e-05, "loss": 0.2351207733154297, "step": 3911 }, { "epoch": 0.528871989860583, "grad_norm": 1.4754970073699951, "learning_rate": 1.5297713087543948e-05, "loss": 0.1788029670715332, "step": 3912 }, { "epoch": 0.52900718208703, "grad_norm": 1.1380589008331299, "learning_rate": 1.5290869976577365e-05, "loss": 0.17387104034423828, "step": 3913 }, { "epoch": 0.529142374313477, "grad_norm": 0.7459555864334106, "learning_rate": 1.5284026805050107e-05, "loss": 0.17338323593139648, "step": 3914 }, { "epoch": 0.529277566539924, "grad_norm": 0.9824241399765015, "learning_rate": 1.5277183574386947e-05, "loss": 0.21282148361206055, "step": 3915 }, { "epoch": 0.529412758766371, "grad_norm": 1.485780119895935, "learning_rate": 1.5270340286012694e-05, "loss": 0.20425796508789062, "step": 3916 }, { "epoch": 0.5295479509928179, "grad_norm": 1.4654784202575684, "learning_rate": 1.526349694135215e-05, "loss": 0.20690631866455078, "step": 3917 }, { "epoch": 0.5296831432192649, "grad_norm": 1.7467727661132812, "learning_rate": 1.525665354183014e-05, "loss": 0.17418479919433594, "step": 3918 }, { "epoch": 0.5298183354457119, "grad_norm": 2.730034351348877, "learning_rate": 1.5249810088871493e-05, "loss": 0.18279647827148438, "step": 3919 }, { "epoch": 0.5299535276721589, "grad_norm": 1.9890397787094116, "learning_rate": 1.5242966583901052e-05, "loss": 0.1901264190673828, "step": 3920 }, { "epoch": 0.5300887198986058, "grad_norm": 1.2000585794448853, "learning_rate": 1.523612302834367e-05, "loss": 0.17255020141601562, "step": 3921 }, { "epoch": 0.5302239121250528, "grad_norm": 1.0145665407180786, "learning_rate": 1.5229279423624217e-05, "loss": 0.18259716033935547, "step": 3922 }, { "epoch": 0.5303591043514998, "grad_norm": 0.7533456683158875, "learning_rate": 1.5222435771167566e-05, "loss": 0.15386199951171875, "step": 3923 }, { "epoch": 0.5304942965779468, "grad_norm": 1.1919670104980469, "learning_rate": 1.5215592072398602e-05, "loss": 0.18500709533691406, "step": 3924 }, { "epoch": 0.5306294888043938, "grad_norm": 0.9636706709861755, "learning_rate": 1.520874832874222e-05, "loss": 0.17650437355041504, "step": 3925 }, { "epoch": 0.5307646810308407, "grad_norm": 1.0850481986999512, "learning_rate": 1.5201904541623318e-05, "loss": 0.19089877605438232, "step": 3926 }, { "epoch": 0.5308998732572877, "grad_norm": 0.6094186902046204, "learning_rate": 1.5195060712466817e-05, "loss": 0.11193490028381348, "step": 3927 }, { "epoch": 0.5310350654837347, "grad_norm": 1.2321534156799316, "learning_rate": 1.5188216842697635e-05, "loss": 0.23956584930419922, "step": 3928 }, { "epoch": 0.5311702577101817, "grad_norm": 0.8972972631454468, "learning_rate": 1.5181372933740703e-05, "loss": 0.1798996925354004, "step": 3929 }, { "epoch": 0.5313054499366286, "grad_norm": 0.752771258354187, "learning_rate": 1.5174528987020958e-05, "loss": 0.13164401054382324, "step": 3930 }, { "epoch": 0.5314406421630756, "grad_norm": 1.8373531103134155, "learning_rate": 1.5167685003963345e-05, "loss": 0.16129350662231445, "step": 3931 }, { "epoch": 0.5315758343895226, "grad_norm": 0.9029563665390015, "learning_rate": 1.5160840985992824e-05, "loss": 0.1434469223022461, "step": 3932 }, { "epoch": 0.5317110266159696, "grad_norm": 1.1272697448730469, "learning_rate": 1.515399693453435e-05, "loss": 0.17893600463867188, "step": 3933 }, { "epoch": 0.5318462188424166, "grad_norm": 1.0933237075805664, "learning_rate": 1.5147152851012894e-05, "loss": 0.1831519603729248, "step": 3934 }, { "epoch": 0.5319814110688635, "grad_norm": 1.3981655836105347, "learning_rate": 1.514030873685343e-05, "loss": 0.18175792694091797, "step": 3935 }, { "epoch": 0.5321166032953105, "grad_norm": 0.9870057702064514, "learning_rate": 1.513346459348094e-05, "loss": 0.15972137451171875, "step": 3936 }, { "epoch": 0.5322517955217575, "grad_norm": 1.3061848878860474, "learning_rate": 1.5126620422320405e-05, "loss": 0.1402750015258789, "step": 3937 }, { "epoch": 0.5323869877482045, "grad_norm": 2.1066627502441406, "learning_rate": 1.5119776224796823e-05, "loss": 0.14604568481445312, "step": 3938 }, { "epoch": 0.5325221799746515, "grad_norm": 0.9654192924499512, "learning_rate": 1.5112932002335195e-05, "loss": 0.19241619110107422, "step": 3939 }, { "epoch": 0.5326573722010984, "grad_norm": 1.8721652030944824, "learning_rate": 1.5106087756360524e-05, "loss": 0.17813873291015625, "step": 3940 }, { "epoch": 0.5327925644275454, "grad_norm": 0.9445196390151978, "learning_rate": 1.5099243488297816e-05, "loss": 0.187713623046875, "step": 3941 }, { "epoch": 0.5329277566539924, "grad_norm": 1.3595408201217651, "learning_rate": 1.5092399199572083e-05, "loss": 0.1769847869873047, "step": 3942 }, { "epoch": 0.5330629488804394, "grad_norm": 0.720579981803894, "learning_rate": 1.5085554891608343e-05, "loss": 0.1292252540588379, "step": 3943 }, { "epoch": 0.5331981411068863, "grad_norm": 0.9848388433456421, "learning_rate": 1.5078710565831616e-05, "loss": 0.16658592224121094, "step": 3944 }, { "epoch": 0.5333333333333333, "grad_norm": 1.2487695217132568, "learning_rate": 1.5071866223666935e-05, "loss": 0.1735973358154297, "step": 3945 }, { "epoch": 0.5334685255597803, "grad_norm": 1.0072109699249268, "learning_rate": 1.5065021866539323e-05, "loss": 0.17850494384765625, "step": 3946 }, { "epoch": 0.5336037177862273, "grad_norm": 1.576037049293518, "learning_rate": 1.5058177495873805e-05, "loss": 0.17388248443603516, "step": 3947 }, { "epoch": 0.5337389100126743, "grad_norm": 2.016763925552368, "learning_rate": 1.5051333113095429e-05, "loss": 0.21162033081054688, "step": 3948 }, { "epoch": 0.5338741022391212, "grad_norm": 1.004820704460144, "learning_rate": 1.5044488719629218e-05, "loss": 0.1365799903869629, "step": 3949 }, { "epoch": 0.5340092944655682, "grad_norm": 0.5475895404815674, "learning_rate": 1.5037644316900227e-05, "loss": 0.12336695194244385, "step": 3950 }, { "epoch": 0.5341444866920152, "grad_norm": 1.0990207195281982, "learning_rate": 1.5030799906333484e-05, "loss": 0.2123265266418457, "step": 3951 }, { "epoch": 0.5342796789184622, "grad_norm": 0.7240240573883057, "learning_rate": 1.5023955489354031e-05, "loss": 0.14586830139160156, "step": 3952 }, { "epoch": 0.5344148711449092, "grad_norm": 0.8073884844779968, "learning_rate": 1.5017111067386927e-05, "loss": 0.19486570358276367, "step": 3953 }, { "epoch": 0.5345500633713561, "grad_norm": 2.3705556392669678, "learning_rate": 1.50102666418572e-05, "loss": 0.2147655487060547, "step": 3954 }, { "epoch": 0.5346852555978031, "grad_norm": 1.6359292268753052, "learning_rate": 1.500342221418991e-05, "loss": 0.20354461669921875, "step": 3955 }, { "epoch": 0.5348204478242501, "grad_norm": 2.809063673019409, "learning_rate": 1.4996577785810094e-05, "loss": 0.18563270568847656, "step": 3956 }, { "epoch": 0.5349556400506971, "grad_norm": 1.2359619140625, "learning_rate": 1.4989733358142798e-05, "loss": 0.1841496229171753, "step": 3957 }, { "epoch": 0.535090832277144, "grad_norm": 1.5012120008468628, "learning_rate": 1.498288893261308e-05, "loss": 0.1871471405029297, "step": 3958 }, { "epoch": 0.535226024503591, "grad_norm": 0.83632493019104, "learning_rate": 1.497604451064597e-05, "loss": 0.18277502059936523, "step": 3959 }, { "epoch": 0.535361216730038, "grad_norm": 1.2533302307128906, "learning_rate": 1.496920009366652e-05, "loss": 0.1358942985534668, "step": 3960 }, { "epoch": 0.535496408956485, "grad_norm": 0.887109637260437, "learning_rate": 1.4962355683099777e-05, "loss": 0.18335533142089844, "step": 3961 }, { "epoch": 0.535631601182932, "grad_norm": 0.9484407305717468, "learning_rate": 1.4955511280370782e-05, "loss": 0.17981112003326416, "step": 3962 }, { "epoch": 0.5357667934093789, "grad_norm": 1.69333815574646, "learning_rate": 1.4948666886904579e-05, "loss": 0.17372846603393555, "step": 3963 }, { "epoch": 0.5359019856358259, "grad_norm": 1.7836371660232544, "learning_rate": 1.4941822504126199e-05, "loss": 0.20348739624023438, "step": 3964 }, { "epoch": 0.5360371778622729, "grad_norm": 1.3293485641479492, "learning_rate": 1.4934978133460681e-05, "loss": 0.19939088821411133, "step": 3965 }, { "epoch": 0.5361723700887199, "grad_norm": 0.968438982963562, "learning_rate": 1.4928133776333068e-05, "loss": 0.2183971405029297, "step": 3966 }, { "epoch": 0.5363075623151669, "grad_norm": 1.8090099096298218, "learning_rate": 1.4921289434168386e-05, "loss": 0.15348024666309357, "step": 3967 }, { "epoch": 0.5364427545416138, "grad_norm": 2.4087274074554443, "learning_rate": 1.4914445108391663e-05, "loss": 0.18173694610595703, "step": 3968 }, { "epoch": 0.5365779467680608, "grad_norm": 1.1351317167282104, "learning_rate": 1.4907600800427922e-05, "loss": 0.20950984954833984, "step": 3969 }, { "epoch": 0.5367131389945078, "grad_norm": 1.7744807004928589, "learning_rate": 1.4900756511702188e-05, "loss": 0.27730560302734375, "step": 3970 }, { "epoch": 0.5368483312209548, "grad_norm": 1.2004674673080444, "learning_rate": 1.4893912243639479e-05, "loss": 0.18226146697998047, "step": 3971 }, { "epoch": 0.5369835234474017, "grad_norm": 0.9677853584289551, "learning_rate": 1.4887067997664807e-05, "loss": 0.215972900390625, "step": 3972 }, { "epoch": 0.5371187156738487, "grad_norm": 0.9946901202201843, "learning_rate": 1.488022377520318e-05, "loss": 0.2423381805419922, "step": 3973 }, { "epoch": 0.5372539079002957, "grad_norm": 1.6103684902191162, "learning_rate": 1.4873379577679599e-05, "loss": 0.1437664031982422, "step": 3974 }, { "epoch": 0.5373891001267427, "grad_norm": 1.8710675239562988, "learning_rate": 1.4866535406519063e-05, "loss": 0.1977243423461914, "step": 3975 }, { "epoch": 0.5375242923531897, "grad_norm": 1.0028784275054932, "learning_rate": 1.4859691263146574e-05, "loss": 0.13887786865234375, "step": 3976 }, { "epoch": 0.5376594845796366, "grad_norm": 0.8750423192977905, "learning_rate": 1.485284714898711e-05, "loss": 0.139312744140625, "step": 3977 }, { "epoch": 0.5377946768060836, "grad_norm": 0.7252755761146545, "learning_rate": 1.4846003065465653e-05, "loss": 0.14550018310546875, "step": 3978 }, { "epoch": 0.5379298690325306, "grad_norm": 3.0151162147521973, "learning_rate": 1.4839159014007179e-05, "loss": 0.23079371452331543, "step": 3979 }, { "epoch": 0.5380650612589776, "grad_norm": 2.0465519428253174, "learning_rate": 1.4832314996036653e-05, "loss": 0.18709182739257812, "step": 3980 }, { "epoch": 0.5382002534854246, "grad_norm": 2.1044864654541016, "learning_rate": 1.4825471012979048e-05, "loss": 0.1925182342529297, "step": 3981 }, { "epoch": 0.5383354457118715, "grad_norm": 2.3994622230529785, "learning_rate": 1.4818627066259301e-05, "loss": 0.17262601852416992, "step": 3982 }, { "epoch": 0.5384706379383185, "grad_norm": 0.9226011037826538, "learning_rate": 1.481178315730237e-05, "loss": 0.17860889434814453, "step": 3983 }, { "epoch": 0.5386058301647655, "grad_norm": 1.1245869398117065, "learning_rate": 1.4804939287533184e-05, "loss": 0.21307754516601562, "step": 3984 }, { "epoch": 0.5387410223912125, "grad_norm": 2.064107894897461, "learning_rate": 1.4798095458376682e-05, "loss": 0.21994924545288086, "step": 3985 }, { "epoch": 0.5388762146176594, "grad_norm": 2.0353598594665527, "learning_rate": 1.4791251671257788e-05, "loss": 0.20468425750732422, "step": 3986 }, { "epoch": 0.5390114068441064, "grad_norm": 1.3710945844650269, "learning_rate": 1.4784407927601401e-05, "loss": 0.21075963973999023, "step": 3987 }, { "epoch": 0.5391465990705534, "grad_norm": 2.1930136680603027, "learning_rate": 1.4777564228832436e-05, "loss": 0.2022418975830078, "step": 3988 }, { "epoch": 0.5392817912970004, "grad_norm": 0.5666084289550781, "learning_rate": 1.4770720576375782e-05, "loss": 0.10650634765625, "step": 3989 }, { "epoch": 0.5394169835234474, "grad_norm": 1.0654162168502808, "learning_rate": 1.4763876971656334e-05, "loss": 0.16987371444702148, "step": 3990 }, { "epoch": 0.5395521757498943, "grad_norm": 0.8482323884963989, "learning_rate": 1.4757033416098953e-05, "loss": 0.2015666961669922, "step": 3991 }, { "epoch": 0.5396873679763413, "grad_norm": 3.253553867340088, "learning_rate": 1.4750189911128511e-05, "loss": 0.235931396484375, "step": 3992 }, { "epoch": 0.5398225602027883, "grad_norm": 1.031988501548767, "learning_rate": 1.4743346458169863e-05, "loss": 0.09942924976348877, "step": 3993 }, { "epoch": 0.5399577524292353, "grad_norm": 2.237548589706421, "learning_rate": 1.473650305864785e-05, "loss": 0.2098541259765625, "step": 3994 }, { "epoch": 0.5400929446556822, "grad_norm": 0.927822470664978, "learning_rate": 1.472965971398731e-05, "loss": 0.16896724700927734, "step": 3995 }, { "epoch": 0.5402281368821292, "grad_norm": 1.737018346786499, "learning_rate": 1.4722816425613054e-05, "loss": 0.16241240501403809, "step": 3996 }, { "epoch": 0.5403633291085762, "grad_norm": 0.7754279971122742, "learning_rate": 1.4715973194949895e-05, "loss": 0.15568161010742188, "step": 3997 }, { "epoch": 0.5404985213350232, "grad_norm": 1.9644675254821777, "learning_rate": 1.4709130023422636e-05, "loss": 0.18276691436767578, "step": 3998 }, { "epoch": 0.5406337135614702, "grad_norm": 0.9808417558670044, "learning_rate": 1.4702286912456052e-05, "loss": 0.1619720458984375, "step": 3999 }, { "epoch": 0.5407689057879171, "grad_norm": 1.5291649103164673, "learning_rate": 1.4695443863474928e-05, "loss": 0.1965045928955078, "step": 4000 }, { "epoch": 0.5409040980143641, "grad_norm": 1.6361366510391235, "learning_rate": 1.4688600877904012e-05, "loss": 0.16273021697998047, "step": 4001 }, { "epoch": 0.5410392902408111, "grad_norm": 1.2697741985321045, "learning_rate": 1.468175795716805e-05, "loss": 0.17620277404785156, "step": 4002 }, { "epoch": 0.5411744824672581, "grad_norm": 1.1318798065185547, "learning_rate": 1.4674915102691783e-05, "loss": 0.19409847259521484, "step": 4003 }, { "epoch": 0.5413096746937052, "grad_norm": 0.7105750441551208, "learning_rate": 1.4668072315899926e-05, "loss": 0.114227294921875, "step": 4004 }, { "epoch": 0.5414448669201521, "grad_norm": 1.1478139162063599, "learning_rate": 1.466122959821718e-05, "loss": 0.2017955780029297, "step": 4005 }, { "epoch": 0.5415800591465991, "grad_norm": 0.979746401309967, "learning_rate": 1.4654386951068239e-05, "loss": 0.1895599365234375, "step": 4006 }, { "epoch": 0.5417152513730461, "grad_norm": 1.0143623352050781, "learning_rate": 1.4647544375877776e-05, "loss": 0.13962459564208984, "step": 4007 }, { "epoch": 0.5418504435994931, "grad_norm": 1.5708460807800293, "learning_rate": 1.4640701874070457e-05, "loss": 0.19917583465576172, "step": 4008 }, { "epoch": 0.5419856358259401, "grad_norm": 1.867903470993042, "learning_rate": 1.4633859447070922e-05, "loss": 0.21210479736328125, "step": 4009 }, { "epoch": 0.542120828052387, "grad_norm": 1.0648705959320068, "learning_rate": 1.4627017096303805e-05, "loss": 0.21725082397460938, "step": 4010 }, { "epoch": 0.542256020278834, "grad_norm": 2.2926185131073, "learning_rate": 1.4620174823193711e-05, "loss": 0.2010936737060547, "step": 4011 }, { "epoch": 0.542391212505281, "grad_norm": 1.7901463508605957, "learning_rate": 1.4613332629165249e-05, "loss": 0.14879417419433594, "step": 4012 }, { "epoch": 0.542526404731728, "grad_norm": 1.015810251235962, "learning_rate": 1.4606490515642998e-05, "loss": 0.16561293601989746, "step": 4013 }, { "epoch": 0.542661596958175, "grad_norm": 1.0519723892211914, "learning_rate": 1.4599648484051516e-05, "loss": 0.2098369598388672, "step": 4014 }, { "epoch": 0.5427967891846219, "grad_norm": 0.973822295665741, "learning_rate": 1.4592806535815358e-05, "loss": 0.18896007537841797, "step": 4015 }, { "epoch": 0.5429319814110689, "grad_norm": 1.7192699909210205, "learning_rate": 1.4585964672359045e-05, "loss": 0.18070220947265625, "step": 4016 }, { "epoch": 0.5430671736375159, "grad_norm": 1.3553988933563232, "learning_rate": 1.4579122895107098e-05, "loss": 0.1761951446533203, "step": 4017 }, { "epoch": 0.5432023658639629, "grad_norm": 1.4251456260681152, "learning_rate": 1.4572281205484012e-05, "loss": 0.2001185417175293, "step": 4018 }, { "epoch": 0.5433375580904098, "grad_norm": 0.7896031141281128, "learning_rate": 1.4565439604914256e-05, "loss": 0.1615065336227417, "step": 4019 }, { "epoch": 0.5434727503168568, "grad_norm": 1.0697122812271118, "learning_rate": 1.4558598094822294e-05, "loss": 0.19288349151611328, "step": 4020 }, { "epoch": 0.5436079425433038, "grad_norm": 0.9079411029815674, "learning_rate": 1.455175667663256e-05, "loss": 0.13894164562225342, "step": 4021 }, { "epoch": 0.5437431347697508, "grad_norm": 0.5077937245368958, "learning_rate": 1.4544915351769476e-05, "loss": 0.08949291706085205, "step": 4022 }, { "epoch": 0.5438783269961978, "grad_norm": 0.8175442218780518, "learning_rate": 1.4538074121657448e-05, "loss": 0.13100862503051758, "step": 4023 }, { "epoch": 0.5440135192226447, "grad_norm": 1.2403984069824219, "learning_rate": 1.4531232987720846e-05, "loss": 0.2016468048095703, "step": 4024 }, { "epoch": 0.5441487114490917, "grad_norm": 0.92755526304245, "learning_rate": 1.4524391951384037e-05, "loss": 0.19174766540527344, "step": 4025 }, { "epoch": 0.5442839036755387, "grad_norm": 2.0467112064361572, "learning_rate": 1.4517551014071358e-05, "loss": 0.20962762832641602, "step": 4026 }, { "epoch": 0.5444190959019857, "grad_norm": 0.8494007587432861, "learning_rate": 1.4510710177207137e-05, "loss": 0.15510821342468262, "step": 4027 }, { "epoch": 0.5445542881284327, "grad_norm": 0.6568759679794312, "learning_rate": 1.450386944221566e-05, "loss": 0.11837959289550781, "step": 4028 }, { "epoch": 0.5446894803548796, "grad_norm": 1.319154143333435, "learning_rate": 1.449702881052121e-05, "loss": 0.16934967041015625, "step": 4029 }, { "epoch": 0.5448246725813266, "grad_norm": 1.1929186582565308, "learning_rate": 1.4490188283548048e-05, "loss": 0.20438766479492188, "step": 4030 }, { "epoch": 0.5449598648077736, "grad_norm": 1.232987642288208, "learning_rate": 1.44833478627204e-05, "loss": 0.2279338836669922, "step": 4031 }, { "epoch": 0.5450950570342206, "grad_norm": 0.8117069602012634, "learning_rate": 1.447650754946249e-05, "loss": 0.1895742416381836, "step": 4032 }, { "epoch": 0.5452302492606675, "grad_norm": 1.0740529298782349, "learning_rate": 1.4469667345198492e-05, "loss": 0.19192028045654297, "step": 4033 }, { "epoch": 0.5453654414871145, "grad_norm": 2.671614170074463, "learning_rate": 1.446282725135258e-05, "loss": 0.22579193115234375, "step": 4034 }, { "epoch": 0.5455006337135615, "grad_norm": 1.099198579788208, "learning_rate": 1.4455987269348904e-05, "loss": 0.17916584014892578, "step": 4035 }, { "epoch": 0.5456358259400085, "grad_norm": 1.3676178455352783, "learning_rate": 1.4449147400611578e-05, "loss": 0.186309814453125, "step": 4036 }, { "epoch": 0.5457710181664555, "grad_norm": 0.7914870977401733, "learning_rate": 1.4442307646564702e-05, "loss": 0.17490005493164062, "step": 4037 }, { "epoch": 0.5459062103929024, "grad_norm": 1.3446155786514282, "learning_rate": 1.4435468008632345e-05, "loss": 0.1973438262939453, "step": 4038 }, { "epoch": 0.5460414026193494, "grad_norm": 0.9203872084617615, "learning_rate": 1.4428628488238557e-05, "loss": 0.17908954620361328, "step": 4039 }, { "epoch": 0.5461765948457964, "grad_norm": 1.1400136947631836, "learning_rate": 1.442178908680737e-05, "loss": 0.21097850799560547, "step": 4040 }, { "epoch": 0.5463117870722434, "grad_norm": 0.7576656937599182, "learning_rate": 1.4414949805762779e-05, "loss": 0.11867785453796387, "step": 4041 }, { "epoch": 0.5464469792986903, "grad_norm": 0.8881217241287231, "learning_rate": 1.4408110646528757e-05, "loss": 0.1385340690612793, "step": 4042 }, { "epoch": 0.5465821715251373, "grad_norm": 1.2633638381958008, "learning_rate": 1.440127161052925e-05, "loss": 0.19174587726593018, "step": 4043 }, { "epoch": 0.5467173637515843, "grad_norm": 0.8522090911865234, "learning_rate": 1.4394432699188188e-05, "loss": 0.17963600158691406, "step": 4044 }, { "epoch": 0.5468525559780313, "grad_norm": 0.7230494022369385, "learning_rate": 1.4387593913929472e-05, "loss": 0.10930228233337402, "step": 4045 }, { "epoch": 0.5469877482044783, "grad_norm": 1.814190149307251, "learning_rate": 1.4380755256176968e-05, "loss": 0.1976947784423828, "step": 4046 }, { "epoch": 0.5471229404309252, "grad_norm": 0.6891637444496155, "learning_rate": 1.437391672735452e-05, "loss": 0.13751459121704102, "step": 4047 }, { "epoch": 0.5472581326573722, "grad_norm": 0.8325259685516357, "learning_rate": 1.4367078328885946e-05, "loss": 0.1578693389892578, "step": 4048 }, { "epoch": 0.5473933248838192, "grad_norm": 1.03226900100708, "learning_rate": 1.4360240062195039e-05, "loss": 0.12672805786132812, "step": 4049 }, { "epoch": 0.5475285171102662, "grad_norm": 0.7664415836334229, "learning_rate": 1.435340192870557e-05, "loss": 0.18357086181640625, "step": 4050 }, { "epoch": 0.5476637093367132, "grad_norm": 1.0694026947021484, "learning_rate": 1.434656392984126e-05, "loss": 0.1797046661376953, "step": 4051 }, { "epoch": 0.5477989015631601, "grad_norm": 3.5499792098999023, "learning_rate": 1.4339726067025828e-05, "loss": 0.2503662109375, "step": 4052 }, { "epoch": 0.5479340937896071, "grad_norm": 1.7963324785232544, "learning_rate": 1.4332888341682947e-05, "loss": 0.2182636260986328, "step": 4053 }, { "epoch": 0.5480692860160541, "grad_norm": 1.3704324960708618, "learning_rate": 1.432605075523627e-05, "loss": 0.24613571166992188, "step": 4054 }, { "epoch": 0.5482044782425011, "grad_norm": 0.8698115944862366, "learning_rate": 1.4319213309109426e-05, "loss": 0.1654510498046875, "step": 4055 }, { "epoch": 0.548339670468948, "grad_norm": 1.5419005155563354, "learning_rate": 1.4312376004725996e-05, "loss": 0.1967763900756836, "step": 4056 }, { "epoch": 0.548474862695395, "grad_norm": 1.1779868602752686, "learning_rate": 1.430553884350955e-05, "loss": 0.1338977813720703, "step": 4057 }, { "epoch": 0.548610054921842, "grad_norm": 1.7484750747680664, "learning_rate": 1.429870182688362e-05, "loss": 0.12793350219726562, "step": 4058 }, { "epoch": 0.548745247148289, "grad_norm": 0.6911334991455078, "learning_rate": 1.4291864956271713e-05, "loss": 0.14394855499267578, "step": 4059 }, { "epoch": 0.548880439374736, "grad_norm": 2.775631904602051, "learning_rate": 1.4285028233097293e-05, "loss": 0.19048500061035156, "step": 4060 }, { "epoch": 0.5490156316011829, "grad_norm": 1.3515230417251587, "learning_rate": 1.4278191658783809e-05, "loss": 0.22266387939453125, "step": 4061 }, { "epoch": 0.5491508238276299, "grad_norm": 1.1564401388168335, "learning_rate": 1.427135523475467e-05, "loss": 0.2068958282470703, "step": 4062 }, { "epoch": 0.5492860160540769, "grad_norm": 1.4923704862594604, "learning_rate": 1.4264518962433258e-05, "loss": 0.2165374755859375, "step": 4063 }, { "epoch": 0.5494212082805239, "grad_norm": 0.792876660823822, "learning_rate": 1.4257682843242925e-05, "loss": 0.16881990432739258, "step": 4064 }, { "epoch": 0.5495564005069709, "grad_norm": 1.3603845834732056, "learning_rate": 1.4250846878606974e-05, "loss": 0.16334915161132812, "step": 4065 }, { "epoch": 0.5496915927334178, "grad_norm": 0.7456710934638977, "learning_rate": 1.4244011069948702e-05, "loss": 0.1499490737915039, "step": 4066 }, { "epoch": 0.5498267849598648, "grad_norm": 0.8540663719177246, "learning_rate": 1.4237175418691357e-05, "loss": 0.14925193786621094, "step": 4067 }, { "epoch": 0.5499619771863118, "grad_norm": 1.8437916040420532, "learning_rate": 1.4230339926258153e-05, "loss": 0.1911764144897461, "step": 4068 }, { "epoch": 0.5500971694127588, "grad_norm": 1.10478675365448, "learning_rate": 1.422350459407229e-05, "loss": 0.16003799438476562, "step": 4069 }, { "epoch": 0.5502323616392057, "grad_norm": 1.7669198513031006, "learning_rate": 1.4216669423556903e-05, "loss": 0.19570636749267578, "step": 4070 }, { "epoch": 0.5503675538656527, "grad_norm": 1.269566297531128, "learning_rate": 1.420983441613512e-05, "loss": 0.1048593521118164, "step": 4071 }, { "epoch": 0.5505027460920997, "grad_norm": 1.4541712999343872, "learning_rate": 1.420299957323003e-05, "loss": 0.13025665283203125, "step": 4072 }, { "epoch": 0.5506379383185467, "grad_norm": 2.567148208618164, "learning_rate": 1.4196164896264679e-05, "loss": 0.24313735961914062, "step": 4073 }, { "epoch": 0.5507731305449937, "grad_norm": 1.2427109479904175, "learning_rate": 1.418933038666208e-05, "loss": 0.20119094848632812, "step": 4074 }, { "epoch": 0.5509083227714406, "grad_norm": 1.6346986293792725, "learning_rate": 1.4182496045845217e-05, "loss": 0.1792583465576172, "step": 4075 }, { "epoch": 0.5510435149978876, "grad_norm": 0.8594448566436768, "learning_rate": 1.4175661875237036e-05, "loss": 0.12098884582519531, "step": 4076 }, { "epoch": 0.5511787072243346, "grad_norm": 1.3619225025177002, "learning_rate": 1.416882787626045e-05, "loss": 0.10511493682861328, "step": 4077 }, { "epoch": 0.5513138994507816, "grad_norm": 1.0802544355392456, "learning_rate": 1.4161994050338334e-05, "loss": 0.15864920616149902, "step": 4078 }, { "epoch": 0.5514490916772286, "grad_norm": 2.0858240127563477, "learning_rate": 1.4155160398893528e-05, "loss": 0.19161272048950195, "step": 4079 }, { "epoch": 0.5515842839036755, "grad_norm": 1.680711030960083, "learning_rate": 1.4148326923348824e-05, "loss": 0.22324275970458984, "step": 4080 }, { "epoch": 0.5517194761301225, "grad_norm": 1.3554102182388306, "learning_rate": 1.4141493625127e-05, "loss": 0.15987396240234375, "step": 4081 }, { "epoch": 0.5518546683565695, "grad_norm": 1.3237963914871216, "learning_rate": 1.4134660505650786e-05, "loss": 0.19043636322021484, "step": 4082 }, { "epoch": 0.5519898605830165, "grad_norm": 1.440773844718933, "learning_rate": 1.4127827566342864e-05, "loss": 0.14103984832763672, "step": 4083 }, { "epoch": 0.5521250528094634, "grad_norm": 1.2290815114974976, "learning_rate": 1.4120994808625896e-05, "loss": 0.15516376495361328, "step": 4084 }, { "epoch": 0.5522602450359104, "grad_norm": 0.8551283478736877, "learning_rate": 1.4114162233922494e-05, "loss": 0.1876659393310547, "step": 4085 }, { "epoch": 0.5523954372623574, "grad_norm": 0.7810075283050537, "learning_rate": 1.4107329843655238e-05, "loss": 0.17477798461914062, "step": 4086 }, { "epoch": 0.5525306294888044, "grad_norm": 1.6059144735336304, "learning_rate": 1.4100497639246675e-05, "loss": 0.1829977035522461, "step": 4087 }, { "epoch": 0.5526658217152514, "grad_norm": 1.0176339149475098, "learning_rate": 1.4093665622119294e-05, "loss": 0.16252994537353516, "step": 4088 }, { "epoch": 0.5528010139416983, "grad_norm": 0.9489703178405762, "learning_rate": 1.4086833793695566e-05, "loss": 0.16922378540039062, "step": 4089 }, { "epoch": 0.5529362061681453, "grad_norm": 1.301771640777588, "learning_rate": 1.408000215539791e-05, "loss": 0.15185487270355225, "step": 4090 }, { "epoch": 0.5530713983945923, "grad_norm": 0.7590004801750183, "learning_rate": 1.4073170708648711e-05, "loss": 0.1298379898071289, "step": 4091 }, { "epoch": 0.5532065906210393, "grad_norm": 1.467963695526123, "learning_rate": 1.406633945487032e-05, "loss": 0.21006393432617188, "step": 4092 }, { "epoch": 0.5533417828474863, "grad_norm": 1.3346192836761475, "learning_rate": 1.4059508395485026e-05, "loss": 0.1307516098022461, "step": 4093 }, { "epoch": 0.5534769750739332, "grad_norm": 0.5424954295158386, "learning_rate": 1.4052677531915102e-05, "loss": 0.14656639099121094, "step": 4094 }, { "epoch": 0.5536121673003802, "grad_norm": 1.3153436183929443, "learning_rate": 1.4045846865582765e-05, "loss": 0.2225494384765625, "step": 4095 }, { "epoch": 0.5537473595268272, "grad_norm": 1.6059702634811401, "learning_rate": 1.4039016397910206e-05, "loss": 0.1785566806793213, "step": 4096 }, { "epoch": 0.5538825517532742, "grad_norm": 1.0207138061523438, "learning_rate": 1.403218613031955e-05, "loss": 0.1328721046447754, "step": 4097 }, { "epoch": 0.5540177439797211, "grad_norm": 0.9160784482955933, "learning_rate": 1.4025356064232903e-05, "loss": 0.128265380859375, "step": 4098 }, { "epoch": 0.5541529362061681, "grad_norm": 1.027923583984375, "learning_rate": 1.4018526201072324e-05, "loss": 0.186886727809906, "step": 4099 }, { "epoch": 0.5542881284326151, "grad_norm": 0.7982254028320312, "learning_rate": 1.4011696542259821e-05, "loss": 0.17944121360778809, "step": 4100 }, { "epoch": 0.5544233206590621, "grad_norm": 1.5268313884735107, "learning_rate": 1.4004867089217376e-05, "loss": 0.1860370635986328, "step": 4101 }, { "epoch": 0.5545585128855091, "grad_norm": 1.6268715858459473, "learning_rate": 1.39980378433669e-05, "loss": 0.15576553344726562, "step": 4102 }, { "epoch": 0.554693705111956, "grad_norm": 1.6642669439315796, "learning_rate": 1.399120880613029e-05, "loss": 0.19647693634033203, "step": 4103 }, { "epoch": 0.554828897338403, "grad_norm": 0.889184296131134, "learning_rate": 1.3984379978929388e-05, "loss": 0.12539100646972656, "step": 4104 }, { "epoch": 0.55496408956485, "grad_norm": 1.6981139183044434, "learning_rate": 1.3977551363185995e-05, "loss": 0.16975784301757812, "step": 4105 }, { "epoch": 0.555099281791297, "grad_norm": 0.9955816268920898, "learning_rate": 1.3970722960321854e-05, "loss": 0.1915283203125, "step": 4106 }, { "epoch": 0.555234474017744, "grad_norm": 1.5031569004058838, "learning_rate": 1.3963894771758682e-05, "loss": 0.09496653079986572, "step": 4107 }, { "epoch": 0.5553696662441909, "grad_norm": 0.4611392617225647, "learning_rate": 1.3957066798918143e-05, "loss": 0.10890722274780273, "step": 4108 }, { "epoch": 0.5555048584706379, "grad_norm": 0.6820783615112305, "learning_rate": 1.3950239043221861e-05, "loss": 0.1425970196723938, "step": 4109 }, { "epoch": 0.5556400506970849, "grad_norm": 1.1353121995925903, "learning_rate": 1.3943411506091408e-05, "loss": 0.15273046493530273, "step": 4110 }, { "epoch": 0.5557752429235319, "grad_norm": 1.0135598182678223, "learning_rate": 1.3936584188948313e-05, "loss": 0.1280224323272705, "step": 4111 }, { "epoch": 0.5559104351499788, "grad_norm": 1.030727744102478, "learning_rate": 1.3929757093214059e-05, "loss": 0.14476251602172852, "step": 4112 }, { "epoch": 0.5560456273764258, "grad_norm": 1.4061270952224731, "learning_rate": 1.3922930220310085e-05, "loss": 0.20631122589111328, "step": 4113 }, { "epoch": 0.5561808196028728, "grad_norm": 1.3502142429351807, "learning_rate": 1.3916103571657786e-05, "loss": 0.2161102294921875, "step": 4114 }, { "epoch": 0.5563160118293198, "grad_norm": 3.845872402191162, "learning_rate": 1.3909277148678504e-05, "loss": 0.26122093200683594, "step": 4115 }, { "epoch": 0.5564512040557668, "grad_norm": 1.0589784383773804, "learning_rate": 1.3902450952793536e-05, "loss": 0.20694923400878906, "step": 4116 }, { "epoch": 0.5565863962822137, "grad_norm": 1.228968620300293, "learning_rate": 1.389562498542413e-05, "loss": 0.1755162477493286, "step": 4117 }, { "epoch": 0.5567215885086607, "grad_norm": 1.1686471700668335, "learning_rate": 1.388879924799149e-05, "loss": 0.2123098373413086, "step": 4118 }, { "epoch": 0.5568567807351077, "grad_norm": 2.5104527473449707, "learning_rate": 1.388197374191678e-05, "loss": 0.18030071258544922, "step": 4119 }, { "epoch": 0.5569919729615547, "grad_norm": 1.3864128589630127, "learning_rate": 1.387514846862109e-05, "loss": 0.19811248779296875, "step": 4120 }, { "epoch": 0.5571271651880016, "grad_norm": 1.5137351751327515, "learning_rate": 1.3868323429525492e-05, "loss": 0.17752790451049805, "step": 4121 }, { "epoch": 0.5572623574144486, "grad_norm": 1.1227834224700928, "learning_rate": 1.3861498626050986e-05, "loss": 0.20056915283203125, "step": 4122 }, { "epoch": 0.5573975496408956, "grad_norm": 0.5824602842330933, "learning_rate": 1.385467405961854e-05, "loss": 0.0982813835144043, "step": 4123 }, { "epoch": 0.5575327418673426, "grad_norm": 0.7411953210830688, "learning_rate": 1.3847849731649066e-05, "loss": 0.1483306884765625, "step": 4124 }, { "epoch": 0.5576679340937896, "grad_norm": 1.3560596704483032, "learning_rate": 1.3841025643563418e-05, "loss": 0.17437362670898438, "step": 4125 }, { "epoch": 0.5578031263202365, "grad_norm": 1.6405913829803467, "learning_rate": 1.3834201796782413e-05, "loss": 0.16826295852661133, "step": 4126 }, { "epoch": 0.5579383185466835, "grad_norm": 1.0828807353973389, "learning_rate": 1.3827378192726808e-05, "loss": 0.23982620239257812, "step": 4127 }, { "epoch": 0.5580735107731305, "grad_norm": 0.6675437688827515, "learning_rate": 1.3820554832817324e-05, "loss": 0.13168621063232422, "step": 4128 }, { "epoch": 0.5582087029995775, "grad_norm": 1.4097481966018677, "learning_rate": 1.3813731718474606e-05, "loss": 0.14971518516540527, "step": 4129 }, { "epoch": 0.5583438952260245, "grad_norm": 0.921987771987915, "learning_rate": 1.380690885111927e-05, "loss": 0.22249794006347656, "step": 4130 }, { "epoch": 0.5584790874524714, "grad_norm": 1.6194636821746826, "learning_rate": 1.3800086232171877e-05, "loss": 0.19646787643432617, "step": 4131 }, { "epoch": 0.5586142796789184, "grad_norm": 0.6181746125221252, "learning_rate": 1.3793263863052926e-05, "loss": 0.1591939926147461, "step": 4132 }, { "epoch": 0.5587494719053654, "grad_norm": 0.7877742648124695, "learning_rate": 1.3786441745182881e-05, "loss": 0.17911720275878906, "step": 4133 }, { "epoch": 0.5588846641318124, "grad_norm": 2.1206464767456055, "learning_rate": 1.3779619879982127e-05, "loss": 0.17891883850097656, "step": 4134 }, { "epoch": 0.5590198563582593, "grad_norm": 0.9240705966949463, "learning_rate": 1.3772798268871025e-05, "loss": 0.1595001220703125, "step": 4135 }, { "epoch": 0.5591550485847063, "grad_norm": 0.9859445691108704, "learning_rate": 1.376597691326987e-05, "loss": 0.1372842788696289, "step": 4136 }, { "epoch": 0.5592902408111533, "grad_norm": 1.3918176889419556, "learning_rate": 1.3759155814598898e-05, "loss": 0.1839895248413086, "step": 4137 }, { "epoch": 0.5594254330376003, "grad_norm": 2.131326675415039, "learning_rate": 1.3752334974278308e-05, "loss": 0.19667625427246094, "step": 4138 }, { "epoch": 0.5595606252640473, "grad_norm": 1.300350308418274, "learning_rate": 1.3745514393728225e-05, "loss": 0.15147972106933594, "step": 4139 }, { "epoch": 0.5596958174904944, "grad_norm": 0.7752305865287781, "learning_rate": 1.3738694074368735e-05, "loss": 0.09185075759887695, "step": 4140 }, { "epoch": 0.5598310097169413, "grad_norm": 1.0831496715545654, "learning_rate": 1.3731874017619868e-05, "loss": 0.16763591766357422, "step": 4141 }, { "epoch": 0.5599662019433883, "grad_norm": 0.7103542685508728, "learning_rate": 1.3725054224901597e-05, "loss": 0.12990140914916992, "step": 4142 }, { "epoch": 0.5601013941698353, "grad_norm": 1.1584399938583374, "learning_rate": 1.3718234697633826e-05, "loss": 0.1871471405029297, "step": 4143 }, { "epoch": 0.5602365863962823, "grad_norm": 1.0294724702835083, "learning_rate": 1.3711415437236427e-05, "loss": 0.1923675537109375, "step": 4144 }, { "epoch": 0.5603717786227292, "grad_norm": 0.7195061445236206, "learning_rate": 1.3704596445129207e-05, "loss": 0.17370319366455078, "step": 4145 }, { "epoch": 0.5605069708491762, "grad_norm": 0.8394173979759216, "learning_rate": 1.369777772273192e-05, "loss": 0.15137863159179688, "step": 4146 }, { "epoch": 0.5606421630756232, "grad_norm": 1.0501354932785034, "learning_rate": 1.369095927146425e-05, "loss": 0.15513992309570312, "step": 4147 }, { "epoch": 0.5607773553020702, "grad_norm": 0.7999750375747681, "learning_rate": 1.3684141092745846e-05, "loss": 0.1793060302734375, "step": 4148 }, { "epoch": 0.5609125475285172, "grad_norm": 1.0264382362365723, "learning_rate": 1.3677323187996276e-05, "loss": 0.16245555877685547, "step": 4149 }, { "epoch": 0.5610477397549641, "grad_norm": 0.9214684367179871, "learning_rate": 1.3670505558635074e-05, "loss": 0.1819000244140625, "step": 4150 }, { "epoch": 0.5611829319814111, "grad_norm": 1.9814612865447998, "learning_rate": 1.366368820608171e-05, "loss": 0.1803140640258789, "step": 4151 }, { "epoch": 0.5613181242078581, "grad_norm": 0.5672913193702698, "learning_rate": 1.365687113175558e-05, "loss": 0.12874889373779297, "step": 4152 }, { "epoch": 0.5614533164343051, "grad_norm": 0.7467692494392395, "learning_rate": 1.3650054337076049e-05, "loss": 0.15250778198242188, "step": 4153 }, { "epoch": 0.561588508660752, "grad_norm": 0.9375015497207642, "learning_rate": 1.3643237823462398e-05, "loss": 0.15077543258666992, "step": 4154 }, { "epoch": 0.561723700887199, "grad_norm": 1.0464015007019043, "learning_rate": 1.363642159233387e-05, "loss": 0.18739771842956543, "step": 4155 }, { "epoch": 0.561858893113646, "grad_norm": 1.2129905223846436, "learning_rate": 1.3629605645109642e-05, "loss": 0.13581228256225586, "step": 4156 }, { "epoch": 0.561994085340093, "grad_norm": 1.5026555061340332, "learning_rate": 1.362278998320882e-05, "loss": 0.2496814727783203, "step": 4157 }, { "epoch": 0.56212927756654, "grad_norm": 1.0084865093231201, "learning_rate": 1.3615974608050472e-05, "loss": 0.19599342346191406, "step": 4158 }, { "epoch": 0.5622644697929869, "grad_norm": 0.9029825329780579, "learning_rate": 1.3609159521053588e-05, "loss": 0.2166290283203125, "step": 4159 }, { "epoch": 0.5623996620194339, "grad_norm": 2.849496841430664, "learning_rate": 1.3602344723637107e-05, "loss": 0.20038199424743652, "step": 4160 }, { "epoch": 0.5625348542458809, "grad_norm": 0.9239959120750427, "learning_rate": 1.3595530217219916e-05, "loss": 0.1507434844970703, "step": 4161 }, { "epoch": 0.5626700464723279, "grad_norm": 0.9023704528808594, "learning_rate": 1.3588716003220815e-05, "loss": 0.19112777709960938, "step": 4162 }, { "epoch": 0.5628052386987749, "grad_norm": 0.9706947803497314, "learning_rate": 1.3581902083058574e-05, "loss": 0.16707897186279297, "step": 4163 }, { "epoch": 0.5629404309252218, "grad_norm": 0.8416209816932678, "learning_rate": 1.3575088458151877e-05, "loss": 0.1408233642578125, "step": 4164 }, { "epoch": 0.5630756231516688, "grad_norm": 1.327923059463501, "learning_rate": 1.3568275129919367e-05, "loss": 0.19009003043174744, "step": 4165 }, { "epoch": 0.5632108153781158, "grad_norm": 0.9411091804504395, "learning_rate": 1.3561462099779604e-05, "loss": 0.18981170654296875, "step": 4166 }, { "epoch": 0.5633460076045628, "grad_norm": 0.9290975332260132, "learning_rate": 1.3554649369151104e-05, "loss": 0.18311023712158203, "step": 4167 }, { "epoch": 0.5634811998310097, "grad_norm": 1.2058131694793701, "learning_rate": 1.3547836939452315e-05, "loss": 0.1944713592529297, "step": 4168 }, { "epoch": 0.5636163920574567, "grad_norm": 1.922459363937378, "learning_rate": 1.3541024812101615e-05, "loss": 0.18303394317626953, "step": 4169 }, { "epoch": 0.5637515842839037, "grad_norm": 0.9371699690818787, "learning_rate": 1.3534212988517339e-05, "loss": 0.20040130615234375, "step": 4170 }, { "epoch": 0.5638867765103507, "grad_norm": 0.8874692916870117, "learning_rate": 1.3527401470117726e-05, "loss": 0.16982078552246094, "step": 4171 }, { "epoch": 0.5640219687367977, "grad_norm": 1.2609591484069824, "learning_rate": 1.3520590258320981e-05, "loss": 0.12705135345458984, "step": 4172 }, { "epoch": 0.5641571609632446, "grad_norm": 0.6744574904441833, "learning_rate": 1.3513779354545235e-05, "loss": 0.145172119140625, "step": 4173 }, { "epoch": 0.5642923531896916, "grad_norm": 0.9199692010879517, "learning_rate": 1.3506968760208557e-05, "loss": 0.18248939514160156, "step": 4174 }, { "epoch": 0.5644275454161386, "grad_norm": 1.445523977279663, "learning_rate": 1.3500158476728938e-05, "loss": 0.13695749640464783, "step": 4175 }, { "epoch": 0.5645627376425856, "grad_norm": 2.014815092086792, "learning_rate": 1.3493348505524325e-05, "loss": 0.16543793678283691, "step": 4176 }, { "epoch": 0.5646979298690326, "grad_norm": 1.3540127277374268, "learning_rate": 1.3486538848012586e-05, "loss": 0.16884231567382812, "step": 4177 }, { "epoch": 0.5648331220954795, "grad_norm": 0.8074826598167419, "learning_rate": 1.3479729505611532e-05, "loss": 0.18150711059570312, "step": 4178 }, { "epoch": 0.5649683143219265, "grad_norm": 1.3512455224990845, "learning_rate": 1.3472920479738906e-05, "loss": 0.18186187744140625, "step": 4179 }, { "epoch": 0.5651035065483735, "grad_norm": 2.8973007202148438, "learning_rate": 1.346611177181237e-05, "loss": 0.18483352661132812, "step": 4180 }, { "epoch": 0.5652386987748205, "grad_norm": 1.1826560497283936, "learning_rate": 1.3459303383249547e-05, "loss": 0.17414379119873047, "step": 4181 }, { "epoch": 0.5653738910012674, "grad_norm": 0.9242448806762695, "learning_rate": 1.3452495315467975e-05, "loss": 0.2081432342529297, "step": 4182 }, { "epoch": 0.5655090832277144, "grad_norm": 1.034800410270691, "learning_rate": 1.3445687569885132e-05, "loss": 0.14751625061035156, "step": 4183 }, { "epoch": 0.5656442754541614, "grad_norm": 0.6133968234062195, "learning_rate": 1.3438880147918429e-05, "loss": 0.10791802406311035, "step": 4184 }, { "epoch": 0.5657794676806084, "grad_norm": 0.7634124159812927, "learning_rate": 1.3432073050985201e-05, "loss": 0.1399097442626953, "step": 4185 }, { "epoch": 0.5659146599070554, "grad_norm": 1.112557053565979, "learning_rate": 1.3425266280502721e-05, "loss": 0.16903066635131836, "step": 4186 }, { "epoch": 0.5660498521335023, "grad_norm": 1.5856046676635742, "learning_rate": 1.3418459837888202e-05, "loss": 0.20407485961914062, "step": 4187 }, { "epoch": 0.5661850443599493, "grad_norm": 1.1973490715026855, "learning_rate": 1.3411653724558784e-05, "loss": 0.1922316551208496, "step": 4188 }, { "epoch": 0.5663202365863963, "grad_norm": 1.8968784809112549, "learning_rate": 1.3404847941931523e-05, "loss": 0.21197509765625, "step": 4189 }, { "epoch": 0.5664554288128433, "grad_norm": 0.8597639799118042, "learning_rate": 1.339804249142343e-05, "loss": 0.20575904846191406, "step": 4190 }, { "epoch": 0.5665906210392903, "grad_norm": 0.7887629866600037, "learning_rate": 1.3391237374451429e-05, "loss": 0.13595199584960938, "step": 4191 }, { "epoch": 0.5667258132657372, "grad_norm": 1.6754926443099976, "learning_rate": 1.3384432592432388e-05, "loss": 0.2043018341064453, "step": 4192 }, { "epoch": 0.5668610054921842, "grad_norm": 2.6410346031188965, "learning_rate": 1.3377628146783102e-05, "loss": 0.23443031311035156, "step": 4193 }, { "epoch": 0.5669961977186312, "grad_norm": 1.1812888383865356, "learning_rate": 1.3370824038920281e-05, "loss": 0.1658635139465332, "step": 4194 }, { "epoch": 0.5671313899450782, "grad_norm": 1.013667345046997, "learning_rate": 1.3364020270260586e-05, "loss": 0.2060832977294922, "step": 4195 }, { "epoch": 0.5672665821715251, "grad_norm": 1.507516622543335, "learning_rate": 1.335721684222059e-05, "loss": 0.19167089462280273, "step": 4196 }, { "epoch": 0.5674017743979721, "grad_norm": 0.7882696390151978, "learning_rate": 1.3350413756216816e-05, "loss": 0.13694250583648682, "step": 4197 }, { "epoch": 0.5675369666244191, "grad_norm": 1.3457995653152466, "learning_rate": 1.334361101366569e-05, "loss": 0.18742942810058594, "step": 4198 }, { "epoch": 0.5676721588508661, "grad_norm": 1.2155758142471313, "learning_rate": 1.3336808615983582e-05, "loss": 0.16469287872314453, "step": 4199 }, { "epoch": 0.5678073510773131, "grad_norm": 2.1494460105895996, "learning_rate": 1.3330006564586791e-05, "loss": 0.21450424194335938, "step": 4200 }, { "epoch": 0.56794254330376, "grad_norm": 4.148214817047119, "learning_rate": 1.3323204860891539e-05, "loss": 0.278839111328125, "step": 4201 }, { "epoch": 0.568077735530207, "grad_norm": 0.7228233218193054, "learning_rate": 1.3316403506313981e-05, "loss": 0.16372346878051758, "step": 4202 }, { "epoch": 0.568212927756654, "grad_norm": 0.6536335945129395, "learning_rate": 1.3309602502270184e-05, "loss": 0.13706541061401367, "step": 4203 }, { "epoch": 0.568348119983101, "grad_norm": 1.010862112045288, "learning_rate": 1.3302801850176161e-05, "loss": 0.14612197875976562, "step": 4204 }, { "epoch": 0.568483312209548, "grad_norm": 1.074796199798584, "learning_rate": 1.3296001551447848e-05, "loss": 0.1476306915283203, "step": 4205 }, { "epoch": 0.5686185044359949, "grad_norm": 2.4724109172821045, "learning_rate": 1.32892016075011e-05, "loss": 0.21670913696289062, "step": 4206 }, { "epoch": 0.5687536966624419, "grad_norm": 1.6053893566131592, "learning_rate": 1.3282402019751694e-05, "loss": 0.1446981430053711, "step": 4207 }, { "epoch": 0.5688888888888889, "grad_norm": 0.9083832502365112, "learning_rate": 1.327560278961535e-05, "loss": 0.15733730792999268, "step": 4208 }, { "epoch": 0.5690240811153359, "grad_norm": 1.1596590280532837, "learning_rate": 1.3268803918507699e-05, "loss": 0.16233444213867188, "step": 4209 }, { "epoch": 0.5691592733417828, "grad_norm": 1.3215627670288086, "learning_rate": 1.3262005407844306e-05, "loss": 0.14342010021209717, "step": 4210 }, { "epoch": 0.5692944655682298, "grad_norm": 0.9823212027549744, "learning_rate": 1.325520725904066e-05, "loss": 0.15561389923095703, "step": 4211 }, { "epoch": 0.5694296577946768, "grad_norm": 0.7310113906860352, "learning_rate": 1.3248409473512158e-05, "loss": 0.1578388214111328, "step": 4212 }, { "epoch": 0.5695648500211238, "grad_norm": 3.474916696548462, "learning_rate": 1.3241612052674146e-05, "loss": 0.2415449619293213, "step": 4213 }, { "epoch": 0.5697000422475708, "grad_norm": 1.611437201499939, "learning_rate": 1.3234814997941883e-05, "loss": 0.22538185119628906, "step": 4214 }, { "epoch": 0.5698352344740177, "grad_norm": 0.7980181574821472, "learning_rate": 1.322801831073055e-05, "loss": 0.14082515239715576, "step": 4215 }, { "epoch": 0.5699704267004647, "grad_norm": 0.9239475131034851, "learning_rate": 1.322122199245526e-05, "loss": 0.16942787170410156, "step": 4216 }, { "epoch": 0.5701056189269117, "grad_norm": 0.9296362996101379, "learning_rate": 1.321442604453103e-05, "loss": 0.16514015197753906, "step": 4217 }, { "epoch": 0.5702408111533587, "grad_norm": 2.047433376312256, "learning_rate": 1.320763046837282e-05, "loss": 0.20308876037597656, "step": 4218 }, { "epoch": 0.5703760033798057, "grad_norm": 2.0884346961975098, "learning_rate": 1.3200835265395504e-05, "loss": 0.18378257751464844, "step": 4219 }, { "epoch": 0.5705111956062526, "grad_norm": 1.5572584867477417, "learning_rate": 1.3194040437013885e-05, "loss": 0.2159261703491211, "step": 4220 }, { "epoch": 0.5706463878326996, "grad_norm": 1.6841769218444824, "learning_rate": 1.3187245984642673e-05, "loss": 0.17226409912109375, "step": 4221 }, { "epoch": 0.5707815800591466, "grad_norm": 0.8878856897354126, "learning_rate": 1.3180451909696517e-05, "loss": 0.134521484375, "step": 4222 }, { "epoch": 0.5709167722855936, "grad_norm": 1.563896656036377, "learning_rate": 1.3173658213589972e-05, "loss": 0.2036285400390625, "step": 4223 }, { "epoch": 0.5710519645120405, "grad_norm": 1.085018277168274, "learning_rate": 1.3166864897737526e-05, "loss": 0.15758800506591797, "step": 4224 }, { "epoch": 0.5711871567384875, "grad_norm": 0.744596004486084, "learning_rate": 1.3160071963553593e-05, "loss": 0.15124034881591797, "step": 4225 }, { "epoch": 0.5713223489649345, "grad_norm": 2.013113260269165, "learning_rate": 1.315327941245248e-05, "loss": 0.1838216781616211, "step": 4226 }, { "epoch": 0.5714575411913815, "grad_norm": 2.0539445877075195, "learning_rate": 1.3146487245848445e-05, "loss": 0.1830911636352539, "step": 4227 }, { "epoch": 0.5715927334178285, "grad_norm": 2.651798725128174, "learning_rate": 1.3139695465155645e-05, "loss": 0.1342296600341797, "step": 4228 }, { "epoch": 0.5717279256442754, "grad_norm": 1.0911517143249512, "learning_rate": 1.3132904071788177e-05, "loss": 0.17379283905029297, "step": 4229 }, { "epoch": 0.5718631178707224, "grad_norm": 1.005952000617981, "learning_rate": 1.3126113067160031e-05, "loss": 0.1800060272216797, "step": 4230 }, { "epoch": 0.5719983100971694, "grad_norm": 1.9741744995117188, "learning_rate": 1.3119322452685139e-05, "loss": 0.2034893035888672, "step": 4231 }, { "epoch": 0.5721335023236164, "grad_norm": 1.0766539573669434, "learning_rate": 1.3112532229777344e-05, "loss": 0.20059823989868164, "step": 4232 }, { "epoch": 0.5722686945500634, "grad_norm": 1.4954265356063843, "learning_rate": 1.3105742399850399e-05, "loss": 0.21318721771240234, "step": 4233 }, { "epoch": 0.5724038867765103, "grad_norm": 0.8464428186416626, "learning_rate": 1.3098952964317996e-05, "loss": 0.12308788299560547, "step": 4234 }, { "epoch": 0.5725390790029573, "grad_norm": 0.996900200843811, "learning_rate": 1.3092163924593717e-05, "loss": 0.19365215301513672, "step": 4235 }, { "epoch": 0.5726742712294043, "grad_norm": 1.1676437854766846, "learning_rate": 1.308537528209108e-05, "loss": 0.1657238006591797, "step": 4236 }, { "epoch": 0.5728094634558513, "grad_norm": 1.6026939153671265, "learning_rate": 1.3078587038223525e-05, "loss": 0.20125532150268555, "step": 4237 }, { "epoch": 0.5729446556822982, "grad_norm": 1.07183039188385, "learning_rate": 1.3071799194404392e-05, "loss": 0.2249908447265625, "step": 4238 }, { "epoch": 0.5730798479087452, "grad_norm": 1.5556108951568604, "learning_rate": 1.3065011752046955e-05, "loss": 0.20391273498535156, "step": 4239 }, { "epoch": 0.5732150401351922, "grad_norm": 1.349663257598877, "learning_rate": 1.3058224712564382e-05, "loss": 0.14986801147460938, "step": 4240 }, { "epoch": 0.5733502323616392, "grad_norm": 0.6696560382843018, "learning_rate": 1.305143807736978e-05, "loss": 0.17470741271972656, "step": 4241 }, { "epoch": 0.5734854245880862, "grad_norm": 1.268415093421936, "learning_rate": 1.3044651847876163e-05, "loss": 0.233642578125, "step": 4242 }, { "epoch": 0.5736206168145331, "grad_norm": 0.8891600370407104, "learning_rate": 1.3037866025496466e-05, "loss": 0.22421646118164062, "step": 4243 }, { "epoch": 0.5737558090409801, "grad_norm": 0.9408543705940247, "learning_rate": 1.3031080611643514e-05, "loss": 0.1760845184326172, "step": 4244 }, { "epoch": 0.5738910012674271, "grad_norm": 1.0780693292617798, "learning_rate": 1.3024295607730083e-05, "loss": 0.16676855087280273, "step": 4245 }, { "epoch": 0.5740261934938741, "grad_norm": 1.180047631263733, "learning_rate": 1.301751101516884e-05, "loss": 0.19536399841308594, "step": 4246 }, { "epoch": 0.574161385720321, "grad_norm": 0.9813616275787354, "learning_rate": 1.3010726835372377e-05, "loss": 0.17902040481567383, "step": 4247 }, { "epoch": 0.574296577946768, "grad_norm": 1.3495491743087769, "learning_rate": 1.30039430697532e-05, "loss": 0.23540306091308594, "step": 4248 }, { "epoch": 0.574431770173215, "grad_norm": 1.459987998008728, "learning_rate": 1.2997159719723713e-05, "loss": 0.16062259674072266, "step": 4249 }, { "epoch": 0.574566962399662, "grad_norm": 0.9632294178009033, "learning_rate": 1.2990376786696254e-05, "loss": 0.19474411010742188, "step": 4250 }, { "epoch": 0.574702154626109, "grad_norm": 1.340670108795166, "learning_rate": 1.2983594272083063e-05, "loss": 0.2207050323486328, "step": 4251 }, { "epoch": 0.5748373468525559, "grad_norm": 1.134849190711975, "learning_rate": 1.2976812177296307e-05, "loss": 0.17090415954589844, "step": 4252 }, { "epoch": 0.5749725390790029, "grad_norm": 1.022345781326294, "learning_rate": 1.2970030503748039e-05, "loss": 0.1649923324584961, "step": 4253 }, { "epoch": 0.5751077313054499, "grad_norm": 0.7901409864425659, "learning_rate": 1.2963249252850242e-05, "loss": 0.12773704528808594, "step": 4254 }, { "epoch": 0.5752429235318969, "grad_norm": 0.9397100806236267, "learning_rate": 1.295646842601481e-05, "loss": 0.1429595947265625, "step": 4255 }, { "epoch": 0.5753781157583439, "grad_norm": 2.5093753337860107, "learning_rate": 1.294968802465355e-05, "loss": 0.2624092102050781, "step": 4256 }, { "epoch": 0.5755133079847908, "grad_norm": 1.3152304887771606, "learning_rate": 1.2942908050178187e-05, "loss": 0.15006160736083984, "step": 4257 }, { "epoch": 0.5756485002112378, "grad_norm": 1.5723315477371216, "learning_rate": 1.293612850400033e-05, "loss": 0.16363239288330078, "step": 4258 }, { "epoch": 0.5757836924376848, "grad_norm": 1.3714478015899658, "learning_rate": 1.2929349387531525e-05, "loss": 0.24256324768066406, "step": 4259 }, { "epoch": 0.5759188846641318, "grad_norm": 2.4423773288726807, "learning_rate": 1.2922570702183217e-05, "loss": 0.1466083526611328, "step": 4260 }, { "epoch": 0.5760540768905787, "grad_norm": 0.7902779579162598, "learning_rate": 1.2915792449366768e-05, "loss": 0.17400836944580078, "step": 4261 }, { "epoch": 0.5761892691170257, "grad_norm": 1.0598139762878418, "learning_rate": 1.2909014630493451e-05, "loss": 0.15865302085876465, "step": 4262 }, { "epoch": 0.5763244613434727, "grad_norm": 1.1641024351119995, "learning_rate": 1.2902237246974432e-05, "loss": 0.18436670303344727, "step": 4263 }, { "epoch": 0.5764596535699197, "grad_norm": 1.0875691175460815, "learning_rate": 1.289546030022081e-05, "loss": 0.16290664672851562, "step": 4264 }, { "epoch": 0.5765948457963667, "grad_norm": 0.5964146256446838, "learning_rate": 1.2888683791643572e-05, "loss": 0.12008476257324219, "step": 4265 }, { "epoch": 0.5767300380228136, "grad_norm": 0.897089958190918, "learning_rate": 1.2881907722653633e-05, "loss": 0.144439697265625, "step": 4266 }, { "epoch": 0.5768652302492606, "grad_norm": 0.595203697681427, "learning_rate": 1.2875132094661796e-05, "loss": 0.1365680694580078, "step": 4267 }, { "epoch": 0.5770004224757076, "grad_norm": 2.0556488037109375, "learning_rate": 1.2868356909078787e-05, "loss": 0.1913890838623047, "step": 4268 }, { "epoch": 0.5771356147021546, "grad_norm": 0.9243132472038269, "learning_rate": 1.286158216731524e-05, "loss": 0.16130924224853516, "step": 4269 }, { "epoch": 0.5772708069286016, "grad_norm": 0.823891282081604, "learning_rate": 1.2854807870781686e-05, "loss": 0.17481136322021484, "step": 4270 }, { "epoch": 0.5774059991550485, "grad_norm": 0.9245734214782715, "learning_rate": 1.284803402088858e-05, "loss": 0.1632232666015625, "step": 4271 }, { "epoch": 0.5775411913814955, "grad_norm": 0.7338927984237671, "learning_rate": 1.284126061904626e-05, "loss": 0.15558815002441406, "step": 4272 }, { "epoch": 0.5776763836079425, "grad_norm": 1.0215390920639038, "learning_rate": 1.283448766666499e-05, "loss": 0.2491474151611328, "step": 4273 }, { "epoch": 0.5778115758343895, "grad_norm": 0.8469446301460266, "learning_rate": 1.282771516515494e-05, "loss": 0.13725662231445312, "step": 4274 }, { "epoch": 0.5779467680608364, "grad_norm": 1.7314738035202026, "learning_rate": 1.282094311592618e-05, "loss": 0.1603851318359375, "step": 4275 }, { "epoch": 0.5780819602872835, "grad_norm": 0.8443201184272766, "learning_rate": 1.2814171520388676e-05, "loss": 0.1697230339050293, "step": 4276 }, { "epoch": 0.5782171525137305, "grad_norm": 0.8805035948753357, "learning_rate": 1.2807400379952318e-05, "loss": 0.17308807373046875, "step": 4277 }, { "epoch": 0.5783523447401775, "grad_norm": 1.3650004863739014, "learning_rate": 1.2800629696026895e-05, "loss": 0.2112903594970703, "step": 4278 }, { "epoch": 0.5784875369666245, "grad_norm": 2.4215996265411377, "learning_rate": 1.2793859470022098e-05, "loss": 0.18595027923583984, "step": 4279 }, { "epoch": 0.5786227291930715, "grad_norm": 1.419944167137146, "learning_rate": 1.278708970334753e-05, "loss": 0.2301921844482422, "step": 4280 }, { "epoch": 0.5787579214195184, "grad_norm": 1.2153390645980835, "learning_rate": 1.2780320397412678e-05, "loss": 0.20734024047851562, "step": 4281 }, { "epoch": 0.5788931136459654, "grad_norm": 1.2576645612716675, "learning_rate": 1.2773551553626957e-05, "loss": 0.13971877098083496, "step": 4282 }, { "epoch": 0.5790283058724124, "grad_norm": 0.8711782097816467, "learning_rate": 1.2766783173399675e-05, "loss": 0.17128467559814453, "step": 4283 }, { "epoch": 0.5791634980988594, "grad_norm": 1.9798860549926758, "learning_rate": 1.276001525814005e-05, "loss": 0.20389437675476074, "step": 4284 }, { "epoch": 0.5792986903253063, "grad_norm": 1.053961157798767, "learning_rate": 1.2753247809257192e-05, "loss": 0.20755290985107422, "step": 4285 }, { "epoch": 0.5794338825517533, "grad_norm": 0.9589759707450867, "learning_rate": 1.2746480828160119e-05, "loss": 0.19939422607421875, "step": 4286 }, { "epoch": 0.5795690747782003, "grad_norm": 1.6030961275100708, "learning_rate": 1.2739714316257753e-05, "loss": 0.19925212860107422, "step": 4287 }, { "epoch": 0.5797042670046473, "grad_norm": 1.0283443927764893, "learning_rate": 1.273294827495892e-05, "loss": 0.16189134120941162, "step": 4288 }, { "epoch": 0.5798394592310943, "grad_norm": 0.5068125128746033, "learning_rate": 1.2726182705672352e-05, "loss": 0.12534713745117188, "step": 4289 }, { "epoch": 0.5799746514575412, "grad_norm": 1.5926272869110107, "learning_rate": 1.271941760980667e-05, "loss": 0.22414684295654297, "step": 4290 }, { "epoch": 0.5801098436839882, "grad_norm": 1.0084165334701538, "learning_rate": 1.2712652988770396e-05, "loss": 0.20436549186706543, "step": 4291 }, { "epoch": 0.5802450359104352, "grad_norm": 0.9874210357666016, "learning_rate": 1.2705888843971967e-05, "loss": 0.18202590942382812, "step": 4292 }, { "epoch": 0.5803802281368822, "grad_norm": 1.4401410818099976, "learning_rate": 1.2699125176819717e-05, "loss": 0.20728063583374023, "step": 4293 }, { "epoch": 0.5805154203633291, "grad_norm": 0.6970511674880981, "learning_rate": 1.269236198872188e-05, "loss": 0.11986136436462402, "step": 4294 }, { "epoch": 0.5806506125897761, "grad_norm": 0.7394571900367737, "learning_rate": 1.2685599281086577e-05, "loss": 0.16261768341064453, "step": 4295 }, { "epoch": 0.5807858048162231, "grad_norm": 0.6774092316627502, "learning_rate": 1.2678837055321849e-05, "loss": 0.13828563690185547, "step": 4296 }, { "epoch": 0.5809209970426701, "grad_norm": 0.9117124676704407, "learning_rate": 1.267207531283562e-05, "loss": 0.19450092315673828, "step": 4297 }, { "epoch": 0.5810561892691171, "grad_norm": 0.9277790784835815, "learning_rate": 1.266531405503573e-05, "loss": 0.13715744018554688, "step": 4298 }, { "epoch": 0.581191381495564, "grad_norm": 1.0747668743133545, "learning_rate": 1.26585532833299e-05, "loss": 0.21448707580566406, "step": 4299 }, { "epoch": 0.581326573722011, "grad_norm": 1.1016261577606201, "learning_rate": 1.2651792999125763e-05, "loss": 0.19028282165527344, "step": 4300 }, { "epoch": 0.581461765948458, "grad_norm": 1.1361908912658691, "learning_rate": 1.2645033203830846e-05, "loss": 0.1358637809753418, "step": 4301 }, { "epoch": 0.581596958174905, "grad_norm": 0.6379583477973938, "learning_rate": 1.2638273898852573e-05, "loss": 0.13873910903930664, "step": 4302 }, { "epoch": 0.581732150401352, "grad_norm": 1.1414406299591064, "learning_rate": 1.2631515085598275e-05, "loss": 0.20029830932617188, "step": 4303 }, { "epoch": 0.5818673426277989, "grad_norm": 2.173987627029419, "learning_rate": 1.262475676547516e-05, "loss": 0.17449665069580078, "step": 4304 }, { "epoch": 0.5820025348542459, "grad_norm": 0.9249931573867798, "learning_rate": 1.2617998939890352e-05, "loss": 0.15088510513305664, "step": 4305 }, { "epoch": 0.5821377270806929, "grad_norm": 1.3683372735977173, "learning_rate": 1.261124161025087e-05, "loss": 0.20980072021484375, "step": 4306 }, { "epoch": 0.5822729193071399, "grad_norm": 2.326523780822754, "learning_rate": 1.260448477796362e-05, "loss": 0.23107528686523438, "step": 4307 }, { "epoch": 0.5824081115335868, "grad_norm": 0.7506234645843506, "learning_rate": 1.259772844443542e-05, "loss": 0.12270912528038025, "step": 4308 }, { "epoch": 0.5825433037600338, "grad_norm": 1.0304185152053833, "learning_rate": 1.2590972611072964e-05, "loss": 0.20026206970214844, "step": 4309 }, { "epoch": 0.5826784959864808, "grad_norm": 1.0934737920761108, "learning_rate": 1.2584217279282855e-05, "loss": 0.16681957244873047, "step": 4310 }, { "epoch": 0.5828136882129278, "grad_norm": 2.0285887718200684, "learning_rate": 1.2577462450471593e-05, "loss": 0.16617146134376526, "step": 4311 }, { "epoch": 0.5829488804393748, "grad_norm": 1.667677402496338, "learning_rate": 1.2570708126045574e-05, "loss": 0.23253631591796875, "step": 4312 }, { "epoch": 0.5830840726658217, "grad_norm": 1.3718093633651733, "learning_rate": 1.256395430741107e-05, "loss": 0.14468622207641602, "step": 4313 }, { "epoch": 0.5832192648922687, "grad_norm": 1.5100034475326538, "learning_rate": 1.2557200995974268e-05, "loss": 0.202911376953125, "step": 4314 }, { "epoch": 0.5833544571187157, "grad_norm": 1.465610384941101, "learning_rate": 1.2550448193141248e-05, "loss": 0.15888404846191406, "step": 4315 }, { "epoch": 0.5834896493451627, "grad_norm": 1.3126379251480103, "learning_rate": 1.2543695900317977e-05, "loss": 0.2035231590270996, "step": 4316 }, { "epoch": 0.5836248415716097, "grad_norm": 1.0890021324157715, "learning_rate": 1.2536944118910323e-05, "loss": 0.1485280990600586, "step": 4317 }, { "epoch": 0.5837600337980566, "grad_norm": 1.1250413656234741, "learning_rate": 1.2530192850324032e-05, "loss": 0.15525531768798828, "step": 4318 }, { "epoch": 0.5838952260245036, "grad_norm": 0.991051971912384, "learning_rate": 1.252344209596476e-05, "loss": 0.1225278377532959, "step": 4319 }, { "epoch": 0.5840304182509506, "grad_norm": 1.389427661895752, "learning_rate": 1.251669185723805e-05, "loss": 0.21108627319335938, "step": 4320 }, { "epoch": 0.5841656104773976, "grad_norm": 2.044124126434326, "learning_rate": 1.2509942135549344e-05, "loss": 0.15939569473266602, "step": 4321 }, { "epoch": 0.5843008027038445, "grad_norm": 1.3384172916412354, "learning_rate": 1.250319293230396e-05, "loss": 0.1753253936767578, "step": 4322 }, { "epoch": 0.5844359949302915, "grad_norm": 1.1628210544586182, "learning_rate": 1.2496444248907121e-05, "loss": 0.20353269577026367, "step": 4323 }, { "epoch": 0.5845711871567385, "grad_norm": 0.6496607661247253, "learning_rate": 1.2489696086763939e-05, "loss": 0.13666749000549316, "step": 4324 }, { "epoch": 0.5847063793831855, "grad_norm": 1.9256809949874878, "learning_rate": 1.2482948447279417e-05, "loss": 0.20899200439453125, "step": 4325 }, { "epoch": 0.5848415716096325, "grad_norm": 1.220988154411316, "learning_rate": 1.2476201331858458e-05, "loss": 0.17862725257873535, "step": 4326 }, { "epoch": 0.5849767638360794, "grad_norm": 1.3501471281051636, "learning_rate": 1.2469454741905839e-05, "loss": 0.24008893966674805, "step": 4327 }, { "epoch": 0.5851119560625264, "grad_norm": 1.2645536661148071, "learning_rate": 1.2462708678826233e-05, "loss": 0.22155380249023438, "step": 4328 }, { "epoch": 0.5852471482889734, "grad_norm": 1.2619504928588867, "learning_rate": 1.245596314402421e-05, "loss": 0.2033100128173828, "step": 4329 }, { "epoch": 0.5853823405154204, "grad_norm": 0.8149838447570801, "learning_rate": 1.2449218138904225e-05, "loss": 0.1532745361328125, "step": 4330 }, { "epoch": 0.5855175327418674, "grad_norm": 0.9254529476165771, "learning_rate": 1.2442473664870636e-05, "loss": 0.17912769317626953, "step": 4331 }, { "epoch": 0.5856527249683143, "grad_norm": 0.7227391600608826, "learning_rate": 1.2435729723327661e-05, "loss": 0.15714073181152344, "step": 4332 }, { "epoch": 0.5857879171947613, "grad_norm": 1.2180477380752563, "learning_rate": 1.2428986315679433e-05, "loss": 0.18432140350341797, "step": 4333 }, { "epoch": 0.5859231094212083, "grad_norm": 0.9233143925666809, "learning_rate": 1.2422243443329962e-05, "loss": 0.1938343048095703, "step": 4334 }, { "epoch": 0.5860583016476553, "grad_norm": 1.5898863077163696, "learning_rate": 1.241550110768316e-05, "loss": 0.1848278045654297, "step": 4335 }, { "epoch": 0.5861934938741022, "grad_norm": 1.0914925336837769, "learning_rate": 1.2408759310142803e-05, "loss": 0.23800277709960938, "step": 4336 }, { "epoch": 0.5863286861005492, "grad_norm": 0.9458726048469543, "learning_rate": 1.2402018052112576e-05, "loss": 0.1806468963623047, "step": 4337 }, { "epoch": 0.5864638783269962, "grad_norm": 0.7406532764434814, "learning_rate": 1.2395277334996045e-05, "loss": 0.1611042022705078, "step": 4338 }, { "epoch": 0.5865990705534432, "grad_norm": 1.0693137645721436, "learning_rate": 1.2388537160196663e-05, "loss": 0.171966552734375, "step": 4339 }, { "epoch": 0.5867342627798902, "grad_norm": 1.371740460395813, "learning_rate": 1.2381797529117776e-05, "loss": 0.21809077262878418, "step": 4340 }, { "epoch": 0.5868694550063371, "grad_norm": 1.7710998058319092, "learning_rate": 1.23750584431626e-05, "loss": 0.19530391693115234, "step": 4341 }, { "epoch": 0.5870046472327841, "grad_norm": 1.2489817142486572, "learning_rate": 1.236831990373425e-05, "loss": 0.17886734008789062, "step": 4342 }, { "epoch": 0.5871398394592311, "grad_norm": 1.2524535655975342, "learning_rate": 1.2361581912235736e-05, "loss": 0.20679569244384766, "step": 4343 }, { "epoch": 0.5872750316856781, "grad_norm": 1.4351321458816528, "learning_rate": 1.235484447006994e-05, "loss": 0.16537857055664062, "step": 4344 }, { "epoch": 0.587410223912125, "grad_norm": 1.0270174741744995, "learning_rate": 1.2348107578639627e-05, "loss": 0.14121007919311523, "step": 4345 }, { "epoch": 0.587545416138572, "grad_norm": 0.9509152173995972, "learning_rate": 1.2341371239347454e-05, "loss": 0.2061767578125, "step": 4346 }, { "epoch": 0.587680608365019, "grad_norm": 0.839234471321106, "learning_rate": 1.233463545359597e-05, "loss": 0.21115493774414062, "step": 4347 }, { "epoch": 0.587815800591466, "grad_norm": 1.3698605298995972, "learning_rate": 1.23279002227876e-05, "loss": 0.1501150131225586, "step": 4348 }, { "epoch": 0.587950992817913, "grad_norm": 1.0913116931915283, "learning_rate": 1.2321165548324655e-05, "loss": 0.18481826782226562, "step": 4349 }, { "epoch": 0.5880861850443599, "grad_norm": 1.304708480834961, "learning_rate": 1.2314431431609323e-05, "loss": 0.1565408706665039, "step": 4350 }, { "epoch": 0.5882213772708069, "grad_norm": 1.2944470643997192, "learning_rate": 1.2307697874043687e-05, "loss": 0.21492767333984375, "step": 4351 }, { "epoch": 0.5883565694972539, "grad_norm": 1.3525173664093018, "learning_rate": 1.2300964877029712e-05, "loss": 0.16132545471191406, "step": 4352 }, { "epoch": 0.5884917617237009, "grad_norm": 0.8785399794578552, "learning_rate": 1.2294232441969246e-05, "loss": 0.17656803131103516, "step": 4353 }, { "epoch": 0.5886269539501479, "grad_norm": 1.5672249794006348, "learning_rate": 1.2287500570264017e-05, "loss": 0.18292903900146484, "step": 4354 }, { "epoch": 0.5887621461765948, "grad_norm": 0.6533482670783997, "learning_rate": 1.2280769263315628e-05, "loss": 0.13961410522460938, "step": 4355 }, { "epoch": 0.5888973384030418, "grad_norm": 1.1324665546417236, "learning_rate": 1.2274038522525577e-05, "loss": 0.21323680877685547, "step": 4356 }, { "epoch": 0.5890325306294888, "grad_norm": 1.1106525659561157, "learning_rate": 1.2267308349295246e-05, "loss": 0.1803441047668457, "step": 4357 }, { "epoch": 0.5891677228559358, "grad_norm": 0.6501247882843018, "learning_rate": 1.2260578745025892e-05, "loss": 0.11105108261108398, "step": 4358 }, { "epoch": 0.5893029150823828, "grad_norm": 1.8832035064697266, "learning_rate": 1.225384971111865e-05, "loss": 0.21324539184570312, "step": 4359 }, { "epoch": 0.5894381073088297, "grad_norm": 0.7464386820793152, "learning_rate": 1.224712124897454e-05, "loss": 0.1534595489501953, "step": 4360 }, { "epoch": 0.5895732995352767, "grad_norm": 1.8309890031814575, "learning_rate": 1.2240393359994466e-05, "loss": 0.19975852966308594, "step": 4361 }, { "epoch": 0.5897084917617237, "grad_norm": 1.187455415725708, "learning_rate": 1.2233666045579209e-05, "loss": 0.1963634490966797, "step": 4362 }, { "epoch": 0.5898436839881707, "grad_norm": 1.1307337284088135, "learning_rate": 1.222693930712944e-05, "loss": 0.22422409057617188, "step": 4363 }, { "epoch": 0.5899788762146176, "grad_norm": 1.4619524478912354, "learning_rate": 1.2220213146045691e-05, "loss": 0.15769386291503906, "step": 4364 }, { "epoch": 0.5901140684410646, "grad_norm": 0.8249306678771973, "learning_rate": 1.2213487563728389e-05, "loss": 0.11738276481628418, "step": 4365 }, { "epoch": 0.5902492606675116, "grad_norm": 2.279766798019409, "learning_rate": 1.220676256157783e-05, "loss": 0.17304039001464844, "step": 4366 }, { "epoch": 0.5903844528939586, "grad_norm": 1.2244853973388672, "learning_rate": 1.2200038140994212e-05, "loss": 0.16664791107177734, "step": 4367 }, { "epoch": 0.5905196451204056, "grad_norm": 0.8576146364212036, "learning_rate": 1.2193314303377578e-05, "loss": 0.14099502563476562, "step": 4368 }, { "epoch": 0.5906548373468525, "grad_norm": 0.9393298625946045, "learning_rate": 1.2186591050127874e-05, "loss": 0.14593619108200073, "step": 4369 }, { "epoch": 0.5907900295732995, "grad_norm": 1.0864542722702026, "learning_rate": 1.2179868382644916e-05, "loss": 0.1727609634399414, "step": 4370 }, { "epoch": 0.5909252217997465, "grad_norm": 1.1678662300109863, "learning_rate": 1.2173146302328396e-05, "loss": 0.17487037181854248, "step": 4371 }, { "epoch": 0.5910604140261935, "grad_norm": 0.7419041991233826, "learning_rate": 1.21664248105779e-05, "loss": 0.15142822265625, "step": 4372 }, { "epoch": 0.5911956062526404, "grad_norm": 1.0202295780181885, "learning_rate": 1.2159703908792858e-05, "loss": 0.23473358154296875, "step": 4373 }, { "epoch": 0.5913307984790874, "grad_norm": 0.9415817856788635, "learning_rate": 1.2152983598372613e-05, "loss": 0.1876983642578125, "step": 4374 }, { "epoch": 0.5914659907055344, "grad_norm": 0.9046608805656433, "learning_rate": 1.2146263880716366e-05, "loss": 0.19054603576660156, "step": 4375 }, { "epoch": 0.5916011829319814, "grad_norm": 1.2391260862350464, "learning_rate": 1.2139544757223194e-05, "loss": 0.2619590759277344, "step": 4376 }, { "epoch": 0.5917363751584284, "grad_norm": 1.1138972043991089, "learning_rate": 1.2132826229292066e-05, "loss": 0.1624593734741211, "step": 4377 }, { "epoch": 0.5918715673848753, "grad_norm": 2.4832799434661865, "learning_rate": 1.2126108298321798e-05, "loss": 0.15798282623291016, "step": 4378 }, { "epoch": 0.5920067596113223, "grad_norm": 1.0790153741836548, "learning_rate": 1.2119390965711107e-05, "loss": 0.19666290283203125, "step": 4379 }, { "epoch": 0.5921419518377693, "grad_norm": 2.42635178565979, "learning_rate": 1.2112674232858582e-05, "loss": 0.15904903411865234, "step": 4380 }, { "epoch": 0.5922771440642163, "grad_norm": 0.7873253226280212, "learning_rate": 1.2105958101162684e-05, "loss": 0.1550908088684082, "step": 4381 }, { "epoch": 0.5924123362906633, "grad_norm": 1.2129740715026855, "learning_rate": 1.2099242572021735e-05, "loss": 0.16709661483764648, "step": 4382 }, { "epoch": 0.5925475285171102, "grad_norm": 1.0263253450393677, "learning_rate": 1.209252764683395e-05, "loss": 0.1557321548461914, "step": 4383 }, { "epoch": 0.5926827207435572, "grad_norm": 1.5654892921447754, "learning_rate": 1.2085813326997414e-05, "loss": 0.1923818588256836, "step": 4384 }, { "epoch": 0.5928179129700042, "grad_norm": 1.7086323499679565, "learning_rate": 1.2079099613910088e-05, "loss": 0.24108505249023438, "step": 4385 }, { "epoch": 0.5929531051964512, "grad_norm": 1.155551552772522, "learning_rate": 1.20723865089698e-05, "loss": 0.13469314575195312, "step": 4386 }, { "epoch": 0.5930882974228981, "grad_norm": 1.0771546363830566, "learning_rate": 1.2065674013574248e-05, "loss": 0.13491582870483398, "step": 4387 }, { "epoch": 0.5932234896493451, "grad_norm": 1.1418342590332031, "learning_rate": 1.2058962129121013e-05, "loss": 0.14437389373779297, "step": 4388 }, { "epoch": 0.5933586818757921, "grad_norm": 1.1176729202270508, "learning_rate": 1.2052250857007548e-05, "loss": 0.20442867279052734, "step": 4389 }, { "epoch": 0.5934938741022391, "grad_norm": 0.7867580652236938, "learning_rate": 1.2045540198631177e-05, "loss": 0.15497398376464844, "step": 4390 }, { "epoch": 0.5936290663286861, "grad_norm": 0.716128945350647, "learning_rate": 1.2038830155389091e-05, "loss": 0.1474003791809082, "step": 4391 }, { "epoch": 0.593764258555133, "grad_norm": 1.2444361448287964, "learning_rate": 1.2032120728678354e-05, "loss": 0.15840959548950195, "step": 4392 }, { "epoch": 0.59389945078158, "grad_norm": 0.9370972514152527, "learning_rate": 1.2025411919895907e-05, "loss": 0.15124869346618652, "step": 4393 }, { "epoch": 0.594034643008027, "grad_norm": 1.0189954042434692, "learning_rate": 1.2018703730438561e-05, "loss": 0.18697166442871094, "step": 4394 }, { "epoch": 0.594169835234474, "grad_norm": 1.2457313537597656, "learning_rate": 1.2011996161703003e-05, "loss": 0.1989736557006836, "step": 4395 }, { "epoch": 0.594305027460921, "grad_norm": 2.136608600616455, "learning_rate": 1.2005289215085775e-05, "loss": 0.21790874004364014, "step": 4396 }, { "epoch": 0.5944402196873679, "grad_norm": 0.718353807926178, "learning_rate": 1.19985828919833e-05, "loss": 0.13578176498413086, "step": 4397 }, { "epoch": 0.5945754119138149, "grad_norm": 0.8081104755401611, "learning_rate": 1.1991877193791872e-05, "loss": 0.1490478515625, "step": 4398 }, { "epoch": 0.5947106041402619, "grad_norm": 1.5630050897598267, "learning_rate": 1.1985172121907653e-05, "loss": 0.20532894134521484, "step": 4399 }, { "epoch": 0.5948457963667089, "grad_norm": 1.1921988725662231, "learning_rate": 1.1978467677726682e-05, "loss": 0.17661619186401367, "step": 4400 }, { "epoch": 0.5949809885931558, "grad_norm": 1.5652927160263062, "learning_rate": 1.197176386264485e-05, "loss": 0.21953392028808594, "step": 4401 }, { "epoch": 0.5951161808196028, "grad_norm": 1.1749805212020874, "learning_rate": 1.1965060678057927e-05, "loss": 0.1577920913696289, "step": 4402 }, { "epoch": 0.5952513730460498, "grad_norm": 1.7236204147338867, "learning_rate": 1.1958358125361554e-05, "loss": 0.26123809814453125, "step": 4403 }, { "epoch": 0.5953865652724968, "grad_norm": 0.6865897178649902, "learning_rate": 1.1951656205951247e-05, "loss": 0.14724159240722656, "step": 4404 }, { "epoch": 0.5955217574989438, "grad_norm": 1.1327730417251587, "learning_rate": 1.1944954921222367e-05, "loss": 0.1978607177734375, "step": 4405 }, { "epoch": 0.5956569497253907, "grad_norm": 1.3412612676620483, "learning_rate": 1.1938254272570167e-05, "loss": 0.09381437301635742, "step": 4406 }, { "epoch": 0.5957921419518377, "grad_norm": 1.0048580169677734, "learning_rate": 1.1931554261389751e-05, "loss": 0.1890707015991211, "step": 4407 }, { "epoch": 0.5959273341782847, "grad_norm": 1.0373151302337646, "learning_rate": 1.1924854889076103e-05, "loss": 0.2082977294921875, "step": 4408 }, { "epoch": 0.5960625264047317, "grad_norm": 1.0999053716659546, "learning_rate": 1.191815615702407e-05, "loss": 0.1708965301513672, "step": 4409 }, { "epoch": 0.5961977186311787, "grad_norm": 1.4999499320983887, "learning_rate": 1.1911458066628353e-05, "loss": 0.13686561584472656, "step": 4410 }, { "epoch": 0.5963329108576257, "grad_norm": 0.7241935133934021, "learning_rate": 1.1904760619283537e-05, "loss": 0.11888504028320312, "step": 4411 }, { "epoch": 0.5964681030840727, "grad_norm": 1.7231944799423218, "learning_rate": 1.1898063816384069e-05, "loss": 0.24175643920898438, "step": 4412 }, { "epoch": 0.5966032953105197, "grad_norm": 1.0629514455795288, "learning_rate": 1.189136765932426e-05, "loss": 0.17063546180725098, "step": 4413 }, { "epoch": 0.5967384875369667, "grad_norm": 1.0609545707702637, "learning_rate": 1.1884672149498276e-05, "loss": 0.20450544357299805, "step": 4414 }, { "epoch": 0.5968736797634137, "grad_norm": 1.1088476181030273, "learning_rate": 1.187797728830016e-05, "loss": 0.13601970672607422, "step": 4415 }, { "epoch": 0.5970088719898606, "grad_norm": 0.9738326668739319, "learning_rate": 1.1871283077123823e-05, "loss": 0.15453529357910156, "step": 4416 }, { "epoch": 0.5971440642163076, "grad_norm": 0.7930399775505066, "learning_rate": 1.1864589517363038e-05, "loss": 0.16332721710205078, "step": 4417 }, { "epoch": 0.5972792564427546, "grad_norm": 0.9717445373535156, "learning_rate": 1.185789661041144e-05, "loss": 0.16792798042297363, "step": 4418 }, { "epoch": 0.5974144486692016, "grad_norm": 1.0329456329345703, "learning_rate": 1.1851204357662513e-05, "loss": 0.16846752166748047, "step": 4419 }, { "epoch": 0.5975496408956485, "grad_norm": 0.8872194886207581, "learning_rate": 1.1844512760509634e-05, "loss": 0.15872907638549805, "step": 4420 }, { "epoch": 0.5976848331220955, "grad_norm": 1.2731571197509766, "learning_rate": 1.1837821820346022e-05, "loss": 0.19169270992279053, "step": 4421 }, { "epoch": 0.5978200253485425, "grad_norm": 1.4863935708999634, "learning_rate": 1.1831131538564775e-05, "loss": 0.2197713851928711, "step": 4422 }, { "epoch": 0.5979552175749895, "grad_norm": 0.89424729347229, "learning_rate": 1.1824441916558843e-05, "loss": 0.12622642517089844, "step": 4423 }, { "epoch": 0.5980904098014365, "grad_norm": 1.5346304178237915, "learning_rate": 1.1817752955721031e-05, "loss": 0.18735313415527344, "step": 4424 }, { "epoch": 0.5982256020278834, "grad_norm": 1.4477431774139404, "learning_rate": 1.1811064657444023e-05, "loss": 0.1607341766357422, "step": 4425 }, { "epoch": 0.5983607942543304, "grad_norm": 1.0291216373443604, "learning_rate": 1.1804377023120361e-05, "loss": 0.17717933654785156, "step": 4426 }, { "epoch": 0.5984959864807774, "grad_norm": 1.7882074117660522, "learning_rate": 1.1797690054142451e-05, "loss": 0.16795873641967773, "step": 4427 }, { "epoch": 0.5986311787072244, "grad_norm": 0.7566695213317871, "learning_rate": 1.1791003751902542e-05, "loss": 0.1444687843322754, "step": 4428 }, { "epoch": 0.5987663709336714, "grad_norm": 1.025880217552185, "learning_rate": 1.1784318117792763e-05, "loss": 0.14234542846679688, "step": 4429 }, { "epoch": 0.5989015631601183, "grad_norm": 2.395387887954712, "learning_rate": 1.17776331532051e-05, "loss": 0.18979501724243164, "step": 4430 }, { "epoch": 0.5990367553865653, "grad_norm": 0.7047892212867737, "learning_rate": 1.1770948859531397e-05, "loss": 0.14166879653930664, "step": 4431 }, { "epoch": 0.5991719476130123, "grad_norm": 0.931225597858429, "learning_rate": 1.1764265238163369e-05, "loss": 0.16495800018310547, "step": 4432 }, { "epoch": 0.5993071398394593, "grad_norm": 1.7586725950241089, "learning_rate": 1.1757582290492568e-05, "loss": 0.22754478454589844, "step": 4433 }, { "epoch": 0.5994423320659062, "grad_norm": 1.6298688650131226, "learning_rate": 1.1750900017910425e-05, "loss": 0.25134849548339844, "step": 4434 }, { "epoch": 0.5995775242923532, "grad_norm": 0.5789780020713806, "learning_rate": 1.1744218421808221e-05, "loss": 0.16058588027954102, "step": 4435 }, { "epoch": 0.5997127165188002, "grad_norm": 0.5974397659301758, "learning_rate": 1.1737537503577112e-05, "loss": 0.1259450912475586, "step": 4436 }, { "epoch": 0.5998479087452472, "grad_norm": 1.842922568321228, "learning_rate": 1.1730857264608086e-05, "loss": 0.18219757080078125, "step": 4437 }, { "epoch": 0.5999831009716942, "grad_norm": 1.8020470142364502, "learning_rate": 1.1724177706292013e-05, "loss": 0.16417217254638672, "step": 4438 }, { "epoch": 0.6001182931981411, "grad_norm": 0.6571294665336609, "learning_rate": 1.1717498830019607e-05, "loss": 0.1324167251586914, "step": 4439 }, { "epoch": 0.6002534854245881, "grad_norm": 0.9516823887825012, "learning_rate": 1.1710820637181449e-05, "loss": 0.16005659103393555, "step": 4440 }, { "epoch": 0.6003886776510351, "grad_norm": 1.3139857053756714, "learning_rate": 1.170414312916798e-05, "loss": 0.18193626403808594, "step": 4441 }, { "epoch": 0.6005238698774821, "grad_norm": 0.8724102973937988, "learning_rate": 1.1697466307369484e-05, "loss": 0.21126365661621094, "step": 4442 }, { "epoch": 0.600659062103929, "grad_norm": 1.7499313354492188, "learning_rate": 1.1690790173176116e-05, "loss": 0.1791229248046875, "step": 4443 }, { "epoch": 0.600794254330376, "grad_norm": 1.04434072971344, "learning_rate": 1.1684114727977876e-05, "loss": 0.12543082237243652, "step": 4444 }, { "epoch": 0.600929446556823, "grad_norm": 1.4621626138687134, "learning_rate": 1.167743997316464e-05, "loss": 0.19497299194335938, "step": 4445 }, { "epoch": 0.60106463878327, "grad_norm": 1.579993486404419, "learning_rate": 1.1670765910126112e-05, "loss": 0.2030773162841797, "step": 4446 }, { "epoch": 0.601199831009717, "grad_norm": 0.7601368427276611, "learning_rate": 1.1664092540251877e-05, "loss": 0.15359234809875488, "step": 4447 }, { "epoch": 0.601335023236164, "grad_norm": 2.2608797550201416, "learning_rate": 1.1657419864931361e-05, "loss": 0.1562957763671875, "step": 4448 }, { "epoch": 0.6014702154626109, "grad_norm": 2.7142677307128906, "learning_rate": 1.165074788555386e-05, "loss": 0.2184542417526245, "step": 4449 }, { "epoch": 0.6016054076890579, "grad_norm": 1.0566056966781616, "learning_rate": 1.1644076603508514e-05, "loss": 0.1809086799621582, "step": 4450 }, { "epoch": 0.6017405999155049, "grad_norm": 1.276042103767395, "learning_rate": 1.1637406020184305e-05, "loss": 0.12530803680419922, "step": 4451 }, { "epoch": 0.6018757921419519, "grad_norm": 1.224887728691101, "learning_rate": 1.1630736136970097e-05, "loss": 0.18129348754882812, "step": 4452 }, { "epoch": 0.6020109843683988, "grad_norm": 0.8334834575653076, "learning_rate": 1.162406695525459e-05, "loss": 0.11057472229003906, "step": 4453 }, { "epoch": 0.6021461765948458, "grad_norm": 0.8606310486793518, "learning_rate": 1.161739847642635e-05, "loss": 0.16757774353027344, "step": 4454 }, { "epoch": 0.6022813688212928, "grad_norm": 2.4583969116210938, "learning_rate": 1.1610730701873788e-05, "loss": 0.1543407440185547, "step": 4455 }, { "epoch": 0.6024165610477398, "grad_norm": 1.5592225790023804, "learning_rate": 1.1604063632985163e-05, "loss": 0.20044517517089844, "step": 4456 }, { "epoch": 0.6025517532741868, "grad_norm": 1.4451719522476196, "learning_rate": 1.1597397271148598e-05, "loss": 0.2137298583984375, "step": 4457 }, { "epoch": 0.6026869455006337, "grad_norm": 1.1640915870666504, "learning_rate": 1.1590731617752067e-05, "loss": 0.2288990020751953, "step": 4458 }, { "epoch": 0.6028221377270807, "grad_norm": 1.4748117923736572, "learning_rate": 1.1584066674183398e-05, "loss": 0.21102356910705566, "step": 4459 }, { "epoch": 0.6029573299535277, "grad_norm": 0.6651380062103271, "learning_rate": 1.1577402441830262e-05, "loss": 0.12137174606323242, "step": 4460 }, { "epoch": 0.6030925221799747, "grad_norm": 1.3792213201522827, "learning_rate": 1.1570738922080185e-05, "loss": 0.1801152229309082, "step": 4461 }, { "epoch": 0.6032277144064216, "grad_norm": 0.8088310360908508, "learning_rate": 1.1564076116320552e-05, "loss": 0.14694571495056152, "step": 4462 }, { "epoch": 0.6033629066328686, "grad_norm": 1.3152133226394653, "learning_rate": 1.1557414025938592e-05, "loss": 0.1998577117919922, "step": 4463 }, { "epoch": 0.6034980988593156, "grad_norm": 1.5952835083007812, "learning_rate": 1.15507526523214e-05, "loss": 0.21207046508789062, "step": 4464 }, { "epoch": 0.6036332910857626, "grad_norm": 0.8917785882949829, "learning_rate": 1.1544091996855895e-05, "loss": 0.14701461791992188, "step": 4465 }, { "epoch": 0.6037684833122096, "grad_norm": 1.1096436977386475, "learning_rate": 1.153743206092886e-05, "loss": 0.1688985824584961, "step": 4466 }, { "epoch": 0.6039036755386565, "grad_norm": 1.0827115774154663, "learning_rate": 1.1530772845926936e-05, "loss": 0.17384815216064453, "step": 4467 }, { "epoch": 0.6040388677651035, "grad_norm": 0.8188608288764954, "learning_rate": 1.1524114353236614e-05, "loss": 0.11463069915771484, "step": 4468 }, { "epoch": 0.6041740599915505, "grad_norm": 1.302260160446167, "learning_rate": 1.151745658424421e-05, "loss": 0.1435256004333496, "step": 4469 }, { "epoch": 0.6043092522179975, "grad_norm": 1.0960304737091064, "learning_rate": 1.151079954033592e-05, "loss": 0.2195911407470703, "step": 4470 }, { "epoch": 0.6044444444444445, "grad_norm": 0.7673820853233337, "learning_rate": 1.150414322289777e-05, "loss": 0.1555652618408203, "step": 4471 }, { "epoch": 0.6045796366708914, "grad_norm": 0.9879593253135681, "learning_rate": 1.1497487633315643e-05, "loss": 0.2014293670654297, "step": 4472 }, { "epoch": 0.6047148288973384, "grad_norm": 1.3240058422088623, "learning_rate": 1.1490832772975275e-05, "loss": 0.1659221649169922, "step": 4473 }, { "epoch": 0.6048500211237854, "grad_norm": 0.9766626358032227, "learning_rate": 1.148417864326223e-05, "loss": 0.1854991912841797, "step": 4474 }, { "epoch": 0.6049852133502324, "grad_norm": 0.8749812245368958, "learning_rate": 1.1477525245561944e-05, "loss": 0.12531614303588867, "step": 4475 }, { "epoch": 0.6051204055766793, "grad_norm": 1.1721289157867432, "learning_rate": 1.1470872581259684e-05, "loss": 0.12496829032897949, "step": 4476 }, { "epoch": 0.6052555978031263, "grad_norm": 1.0978264808654785, "learning_rate": 1.146422065174057e-05, "loss": 0.20592308044433594, "step": 4477 }, { "epoch": 0.6053907900295733, "grad_norm": 1.0824264287948608, "learning_rate": 1.1457569458389578e-05, "loss": 0.1690044403076172, "step": 4478 }, { "epoch": 0.6055259822560203, "grad_norm": 2.1679115295410156, "learning_rate": 1.145091900259151e-05, "loss": 0.21513795852661133, "step": 4479 }, { "epoch": 0.6056611744824673, "grad_norm": 1.1737805604934692, "learning_rate": 1.1444269285731032e-05, "loss": 0.18422222137451172, "step": 4480 }, { "epoch": 0.6057963667089142, "grad_norm": 1.9953927993774414, "learning_rate": 1.1437620309192652e-05, "loss": 0.20594406127929688, "step": 4481 }, { "epoch": 0.6059315589353612, "grad_norm": 1.491796612739563, "learning_rate": 1.1430972074360722e-05, "loss": 0.1867208480834961, "step": 4482 }, { "epoch": 0.6060667511618082, "grad_norm": 1.4249210357666016, "learning_rate": 1.1424324582619435e-05, "loss": 0.2382183074951172, "step": 4483 }, { "epoch": 0.6062019433882552, "grad_norm": 0.9030844569206238, "learning_rate": 1.1417677835352837e-05, "loss": 0.1659536361694336, "step": 4484 }, { "epoch": 0.6063371356147021, "grad_norm": 1.4421099424362183, "learning_rate": 1.1411031833944816e-05, "loss": 0.16944313049316406, "step": 4485 }, { "epoch": 0.6064723278411491, "grad_norm": 1.5993998050689697, "learning_rate": 1.1404386579779111e-05, "loss": 0.21051788330078125, "step": 4486 }, { "epoch": 0.6066075200675961, "grad_norm": 1.6755239963531494, "learning_rate": 1.1397742074239296e-05, "loss": 0.223663330078125, "step": 4487 }, { "epoch": 0.6067427122940431, "grad_norm": 2.1034271717071533, "learning_rate": 1.1391098318708785e-05, "loss": 0.20969676971435547, "step": 4488 }, { "epoch": 0.6068779045204901, "grad_norm": 1.2152445316314697, "learning_rate": 1.1384455314570848e-05, "loss": 0.18887662887573242, "step": 4489 }, { "epoch": 0.607013096746937, "grad_norm": 1.2689993381500244, "learning_rate": 1.1377813063208596e-05, "loss": 0.20824241638183594, "step": 4490 }, { "epoch": 0.607148288973384, "grad_norm": 1.4393547773361206, "learning_rate": 1.1371171566004986e-05, "loss": 0.1833667755126953, "step": 4491 }, { "epoch": 0.607283481199831, "grad_norm": 2.1438379287719727, "learning_rate": 1.1364530824342806e-05, "loss": 0.19746971130371094, "step": 4492 }, { "epoch": 0.607418673426278, "grad_norm": 0.6704422235488892, "learning_rate": 1.1357890839604688e-05, "loss": 0.1542677879333496, "step": 4493 }, { "epoch": 0.607553865652725, "grad_norm": 0.672864556312561, "learning_rate": 1.1351251613173122e-05, "loss": 0.16314935684204102, "step": 4494 }, { "epoch": 0.6076890578791719, "grad_norm": 1.3669929504394531, "learning_rate": 1.1344613146430428e-05, "loss": 0.20238876342773438, "step": 4495 }, { "epoch": 0.6078242501056189, "grad_norm": 1.352858304977417, "learning_rate": 1.1337975440758775e-05, "loss": 0.18425321578979492, "step": 4496 }, { "epoch": 0.6079594423320659, "grad_norm": 1.45911705493927, "learning_rate": 1.133133849754016e-05, "loss": 0.1382887363433838, "step": 4497 }, { "epoch": 0.6080946345585129, "grad_norm": 0.6741175055503845, "learning_rate": 1.1324702318156431e-05, "loss": 0.11459493637084961, "step": 4498 }, { "epoch": 0.6082298267849598, "grad_norm": 0.6408833265304565, "learning_rate": 1.1318066903989279e-05, "loss": 0.12037158012390137, "step": 4499 }, { "epoch": 0.6083650190114068, "grad_norm": 0.9546215534210205, "learning_rate": 1.1311432256420232e-05, "loss": 0.18200302124023438, "step": 4500 }, { "epoch": 0.6085002112378538, "grad_norm": 2.613563060760498, "learning_rate": 1.1304798376830664e-05, "loss": 0.26177215576171875, "step": 4501 }, { "epoch": 0.6086354034643008, "grad_norm": 2.309908390045166, "learning_rate": 1.1298165266601778e-05, "loss": 0.20312881469726562, "step": 4502 }, { "epoch": 0.6087705956907478, "grad_norm": 1.8486065864562988, "learning_rate": 1.129153292711462e-05, "loss": 0.20337486267089844, "step": 4503 }, { "epoch": 0.6089057879171947, "grad_norm": 1.1266438961029053, "learning_rate": 1.1284901359750082e-05, "loss": 0.17210209369659424, "step": 4504 }, { "epoch": 0.6090409801436417, "grad_norm": 0.7883851528167725, "learning_rate": 1.1278270565888897e-05, "loss": 0.1486492156982422, "step": 4505 }, { "epoch": 0.6091761723700887, "grad_norm": 1.0527299642562866, "learning_rate": 1.1271640546911624e-05, "loss": 0.20038795471191406, "step": 4506 }, { "epoch": 0.6093113645965357, "grad_norm": 1.2081286907196045, "learning_rate": 1.1265011304198672e-05, "loss": 0.17427825927734375, "step": 4507 }, { "epoch": 0.6094465568229827, "grad_norm": 1.2031521797180176, "learning_rate": 1.1258382839130282e-05, "loss": 0.17670536041259766, "step": 4508 }, { "epoch": 0.6095817490494296, "grad_norm": 1.716536283493042, "learning_rate": 1.1251755153086536e-05, "loss": 0.17140483856201172, "step": 4509 }, { "epoch": 0.6097169412758766, "grad_norm": 1.6569764614105225, "learning_rate": 1.1245128247447362e-05, "loss": 0.20662498474121094, "step": 4510 }, { "epoch": 0.6098521335023236, "grad_norm": 0.9101887941360474, "learning_rate": 1.1238502123592507e-05, "loss": 0.17058944702148438, "step": 4511 }, { "epoch": 0.6099873257287706, "grad_norm": 1.4276028871536255, "learning_rate": 1.1231876782901568e-05, "loss": 0.1376333236694336, "step": 4512 }, { "epoch": 0.6101225179552175, "grad_norm": 1.7719577550888062, "learning_rate": 1.1225252226753975e-05, "loss": 0.17164087295532227, "step": 4513 }, { "epoch": 0.6102577101816645, "grad_norm": 1.115468144416809, "learning_rate": 1.1218628456529005e-05, "loss": 0.18319129943847656, "step": 4514 }, { "epoch": 0.6103929024081115, "grad_norm": 2.283583879470825, "learning_rate": 1.1212005473605746e-05, "loss": 0.1563892364501953, "step": 4515 }, { "epoch": 0.6105280946345585, "grad_norm": 1.5471925735473633, "learning_rate": 1.120538327936315e-05, "loss": 0.21539688110351562, "step": 4516 }, { "epoch": 0.6106632868610055, "grad_norm": 1.5506393909454346, "learning_rate": 1.1198761875179993e-05, "loss": 0.19168996810913086, "step": 4517 }, { "epoch": 0.6107984790874524, "grad_norm": 2.4129598140716553, "learning_rate": 1.1192141262434883e-05, "loss": 0.1796398162841797, "step": 4518 }, { "epoch": 0.6109336713138994, "grad_norm": 0.9621261954307556, "learning_rate": 1.1185521442506272e-05, "loss": 0.17345809936523438, "step": 4519 }, { "epoch": 0.6110688635403464, "grad_norm": 1.6808950901031494, "learning_rate": 1.1178902416772432e-05, "loss": 0.1947035789489746, "step": 4520 }, { "epoch": 0.6112040557667934, "grad_norm": 1.3471368551254272, "learning_rate": 1.1172284186611485e-05, "loss": 0.23317337036132812, "step": 4521 }, { "epoch": 0.6113392479932404, "grad_norm": 0.9267129302024841, "learning_rate": 1.1165666753401384e-05, "loss": 0.1781902313232422, "step": 4522 }, { "epoch": 0.6114744402196873, "grad_norm": 1.2934696674346924, "learning_rate": 1.1159050118519914e-05, "loss": 0.1874551773071289, "step": 4523 }, { "epoch": 0.6116096324461343, "grad_norm": 0.8290989995002747, "learning_rate": 1.1152434283344696e-05, "loss": 0.13314247131347656, "step": 4524 }, { "epoch": 0.6117448246725813, "grad_norm": 1.0614969730377197, "learning_rate": 1.114581924925317e-05, "loss": 0.19978570938110352, "step": 4525 }, { "epoch": 0.6118800168990283, "grad_norm": 1.0362046957015991, "learning_rate": 1.113920501762263e-05, "loss": 0.16478729248046875, "step": 4526 }, { "epoch": 0.6120152091254752, "grad_norm": 1.1429672241210938, "learning_rate": 1.1132591589830193e-05, "loss": 0.19054794311523438, "step": 4527 }, { "epoch": 0.6121504013519222, "grad_norm": 1.873545527458191, "learning_rate": 1.1125978967252818e-05, "loss": 0.21915864944458008, "step": 4528 }, { "epoch": 0.6122855935783692, "grad_norm": 1.033534288406372, "learning_rate": 1.1119367151267278e-05, "loss": 0.182769775390625, "step": 4529 }, { "epoch": 0.6124207858048162, "grad_norm": 0.9385781288146973, "learning_rate": 1.1112756143250186e-05, "loss": 0.1590590476989746, "step": 4530 }, { "epoch": 0.6125559780312632, "grad_norm": 1.1570844650268555, "learning_rate": 1.1106145944577995e-05, "loss": 0.18547821044921875, "step": 4531 }, { "epoch": 0.6126911702577101, "grad_norm": 1.081661581993103, "learning_rate": 1.1099536556626984e-05, "loss": 0.1681685447692871, "step": 4532 }, { "epoch": 0.6128263624841571, "grad_norm": 0.8565942049026489, "learning_rate": 1.1092927980773269e-05, "loss": 0.15418148040771484, "step": 4533 }, { "epoch": 0.6129615547106041, "grad_norm": 1.021229863166809, "learning_rate": 1.1086320218392777e-05, "loss": 0.19008445739746094, "step": 4534 }, { "epoch": 0.6130967469370511, "grad_norm": 2.2541840076446533, "learning_rate": 1.1079713270861286e-05, "loss": 0.20617389678955078, "step": 4535 }, { "epoch": 0.613231939163498, "grad_norm": 1.0166759490966797, "learning_rate": 1.1073107139554395e-05, "loss": 0.1794281005859375, "step": 4536 }, { "epoch": 0.613367131389945, "grad_norm": 1.092777967453003, "learning_rate": 1.1066501825847545e-05, "loss": 0.1987934112548828, "step": 4537 }, { "epoch": 0.613502323616392, "grad_norm": 0.8734856247901917, "learning_rate": 1.1059897331115985e-05, "loss": 0.15904521942138672, "step": 4538 }, { "epoch": 0.613637515842839, "grad_norm": 1.6738489866256714, "learning_rate": 1.1053293656734816e-05, "loss": 0.1717853546142578, "step": 4539 }, { "epoch": 0.613772708069286, "grad_norm": 0.768803060054779, "learning_rate": 1.1046690804078949e-05, "loss": 0.14902973175048828, "step": 4540 }, { "epoch": 0.613907900295733, "grad_norm": 2.584709882736206, "learning_rate": 1.1040088774523139e-05, "loss": 0.17228436470031738, "step": 4541 }, { "epoch": 0.6140430925221799, "grad_norm": 0.5402894616127014, "learning_rate": 1.1033487569441971e-05, "loss": 0.07872390747070312, "step": 4542 }, { "epoch": 0.6141782847486269, "grad_norm": 0.8832460045814514, "learning_rate": 1.1026887190209834e-05, "loss": 0.1846160888671875, "step": 4543 }, { "epoch": 0.6143134769750739, "grad_norm": 1.4036839008331299, "learning_rate": 1.1020287638200977e-05, "loss": 0.12274360656738281, "step": 4544 }, { "epoch": 0.6144486692015209, "grad_norm": 2.664909601211548, "learning_rate": 1.1013688914789452e-05, "loss": 0.2253737449645996, "step": 4545 }, { "epoch": 0.6145838614279678, "grad_norm": 1.306464433670044, "learning_rate": 1.100709102134915e-05, "loss": 0.19986629486083984, "step": 4546 }, { "epoch": 0.6147190536544149, "grad_norm": 1.1906028985977173, "learning_rate": 1.10004939592538e-05, "loss": 0.18352508544921875, "step": 4547 }, { "epoch": 0.6148542458808619, "grad_norm": 1.2507046461105347, "learning_rate": 1.0993897729876927e-05, "loss": 0.14834576845169067, "step": 4548 }, { "epoch": 0.6149894381073089, "grad_norm": 1.3494844436645508, "learning_rate": 1.0987302334591915e-05, "loss": 0.21949195861816406, "step": 4549 }, { "epoch": 0.6151246303337559, "grad_norm": 1.3425730466842651, "learning_rate": 1.098070777477195e-05, "loss": 0.13867855072021484, "step": 4550 }, { "epoch": 0.6152598225602028, "grad_norm": 0.8596582412719727, "learning_rate": 1.0974114051790067e-05, "loss": 0.18651771545410156, "step": 4551 }, { "epoch": 0.6153950147866498, "grad_norm": 0.7119019031524658, "learning_rate": 1.09675211670191e-05, "loss": 0.13357830047607422, "step": 4552 }, { "epoch": 0.6155302070130968, "grad_norm": 1.2550837993621826, "learning_rate": 1.0960929121831732e-05, "loss": 0.18177032470703125, "step": 4553 }, { "epoch": 0.6156653992395438, "grad_norm": 1.6899585723876953, "learning_rate": 1.095433791760046e-05, "loss": 0.19745254516601562, "step": 4554 }, { "epoch": 0.6158005914659908, "grad_norm": 1.235771894454956, "learning_rate": 1.0947747555697609e-05, "loss": 0.1887187957763672, "step": 4555 }, { "epoch": 0.6159357836924377, "grad_norm": 0.8981272578239441, "learning_rate": 1.0941158037495328e-05, "loss": 0.1479482650756836, "step": 4556 }, { "epoch": 0.6160709759188847, "grad_norm": 1.0570513010025024, "learning_rate": 1.0934569364365583e-05, "loss": 0.1391596794128418, "step": 4557 }, { "epoch": 0.6162061681453317, "grad_norm": 1.5715014934539795, "learning_rate": 1.0927981537680176e-05, "loss": 0.220855712890625, "step": 4558 }, { "epoch": 0.6163413603717787, "grad_norm": 1.3209017515182495, "learning_rate": 1.0921394558810726e-05, "loss": 0.18042564392089844, "step": 4559 }, { "epoch": 0.6164765525982256, "grad_norm": 1.069432020187378, "learning_rate": 1.0914808429128688e-05, "loss": 0.15283775329589844, "step": 4560 }, { "epoch": 0.6166117448246726, "grad_norm": 1.0885248184204102, "learning_rate": 1.0908223150005315e-05, "loss": 0.1771860122680664, "step": 4561 }, { "epoch": 0.6167469370511196, "grad_norm": 1.6780335903167725, "learning_rate": 1.09016387228117e-05, "loss": 0.1519308090209961, "step": 4562 }, { "epoch": 0.6168821292775666, "grad_norm": 1.3289042711257935, "learning_rate": 1.0895055148918758e-05, "loss": 0.13169002532958984, "step": 4563 }, { "epoch": 0.6170173215040136, "grad_norm": 0.828385591506958, "learning_rate": 1.0888472429697223e-05, "loss": 0.12916278839111328, "step": 4564 }, { "epoch": 0.6171525137304605, "grad_norm": 1.1332522630691528, "learning_rate": 1.088189056651766e-05, "loss": 0.19527626037597656, "step": 4565 }, { "epoch": 0.6172877059569075, "grad_norm": 1.0133165121078491, "learning_rate": 1.0875309560750438e-05, "loss": 0.1661849021911621, "step": 4566 }, { "epoch": 0.6174228981833545, "grad_norm": 0.9010225534439087, "learning_rate": 1.086872941376576e-05, "loss": 0.19013690948486328, "step": 4567 }, { "epoch": 0.6175580904098015, "grad_norm": 2.1900336742401123, "learning_rate": 1.0862150126933648e-05, "loss": 0.20239639282226562, "step": 4568 }, { "epoch": 0.6176932826362485, "grad_norm": 1.2862699031829834, "learning_rate": 1.0855571701623942e-05, "loss": 0.15459442138671875, "step": 4569 }, { "epoch": 0.6178284748626954, "grad_norm": 2.8855538368225098, "learning_rate": 1.0848994139206317e-05, "loss": 0.20385360717773438, "step": 4570 }, { "epoch": 0.6179636670891424, "grad_norm": 2.246278762817383, "learning_rate": 1.0842417441050247e-05, "loss": 0.17681121826171875, "step": 4571 }, { "epoch": 0.6180988593155894, "grad_norm": 1.1702877283096313, "learning_rate": 1.0835841608525031e-05, "loss": 0.16691112518310547, "step": 4572 }, { "epoch": 0.6182340515420364, "grad_norm": 1.3400757312774658, "learning_rate": 1.08292666429998e-05, "loss": 0.18357467651367188, "step": 4573 }, { "epoch": 0.6183692437684833, "grad_norm": 1.0004241466522217, "learning_rate": 1.08226925458435e-05, "loss": 0.15882110595703125, "step": 4574 }, { "epoch": 0.6185044359949303, "grad_norm": 0.7759889364242554, "learning_rate": 1.0816119318424882e-05, "loss": 0.13988018035888672, "step": 4575 }, { "epoch": 0.6186396282213773, "grad_norm": 1.545602560043335, "learning_rate": 1.0809546962112535e-05, "loss": 0.1817493438720703, "step": 4576 }, { "epoch": 0.6187748204478243, "grad_norm": 0.690873920917511, "learning_rate": 1.0802975478274856e-05, "loss": 0.15880346298217773, "step": 4577 }, { "epoch": 0.6189100126742713, "grad_norm": 0.995508074760437, "learning_rate": 1.0796404868280062e-05, "loss": 0.15073108673095703, "step": 4578 }, { "epoch": 0.6190452049007182, "grad_norm": 1.0737624168395996, "learning_rate": 1.07898351334962e-05, "loss": 0.188201904296875, "step": 4579 }, { "epoch": 0.6191803971271652, "grad_norm": 1.2057877779006958, "learning_rate": 1.0783266275291103e-05, "loss": 0.1840190887451172, "step": 4580 }, { "epoch": 0.6193155893536122, "grad_norm": 1.266737937927246, "learning_rate": 1.077669829503246e-05, "loss": 0.1401052474975586, "step": 4581 }, { "epoch": 0.6194507815800592, "grad_norm": 2.0364530086517334, "learning_rate": 1.077013119408775e-05, "loss": 0.207000732421875, "step": 4582 }, { "epoch": 0.6195859738065062, "grad_norm": 0.8128786683082581, "learning_rate": 1.0763564973824289e-05, "loss": 0.1413872241973877, "step": 4583 }, { "epoch": 0.6197211660329531, "grad_norm": 1.1461583375930786, "learning_rate": 1.0756999635609185e-05, "loss": 0.16029882431030273, "step": 4584 }, { "epoch": 0.6198563582594001, "grad_norm": 1.1196434497833252, "learning_rate": 1.0750435180809381e-05, "loss": 0.13834571838378906, "step": 4585 }, { "epoch": 0.6199915504858471, "grad_norm": 0.9671342372894287, "learning_rate": 1.074387161079164e-05, "loss": 0.15489912033081055, "step": 4586 }, { "epoch": 0.6201267427122941, "grad_norm": 0.6395117044448853, "learning_rate": 1.0737308926922521e-05, "loss": 0.12819957733154297, "step": 4587 }, { "epoch": 0.620261934938741, "grad_norm": 0.5339891314506531, "learning_rate": 1.0730747130568424e-05, "loss": 0.09637451171875, "step": 4588 }, { "epoch": 0.620397127165188, "grad_norm": 0.9782006740570068, "learning_rate": 1.0724186223095532e-05, "loss": 0.14755463600158691, "step": 4589 }, { "epoch": 0.620532319391635, "grad_norm": 0.717842698097229, "learning_rate": 1.071762620586987e-05, "loss": 0.12798690795898438, "step": 4590 }, { "epoch": 0.620667511618082, "grad_norm": 0.6801381707191467, "learning_rate": 1.0711067080257273e-05, "loss": 0.12725210189819336, "step": 4591 }, { "epoch": 0.620802703844529, "grad_norm": 1.1541473865509033, "learning_rate": 1.0704508847623374e-05, "loss": 0.16517877578735352, "step": 4592 }, { "epoch": 0.6209378960709759, "grad_norm": 1.15793776512146, "learning_rate": 1.069795150933365e-05, "loss": 0.16428565979003906, "step": 4593 }, { "epoch": 0.6210730882974229, "grad_norm": 1.8597896099090576, "learning_rate": 1.0691395066753357e-05, "loss": 0.1530294418334961, "step": 4594 }, { "epoch": 0.6212082805238699, "grad_norm": 1.3026847839355469, "learning_rate": 1.0684839521247584e-05, "loss": 0.1327953338623047, "step": 4595 }, { "epoch": 0.6213434727503169, "grad_norm": 2.3333561420440674, "learning_rate": 1.0678284874181234e-05, "loss": 0.19743061065673828, "step": 4596 }, { "epoch": 0.6214786649767639, "grad_norm": 1.294751763343811, "learning_rate": 1.0671731126919028e-05, "loss": 0.1623539924621582, "step": 4597 }, { "epoch": 0.6216138572032108, "grad_norm": 1.0483520030975342, "learning_rate": 1.066517828082548e-05, "loss": 0.18813586235046387, "step": 4598 }, { "epoch": 0.6217490494296578, "grad_norm": 1.2937839031219482, "learning_rate": 1.0658626337264926e-05, "loss": 0.18982410430908203, "step": 4599 }, { "epoch": 0.6218842416561048, "grad_norm": 1.4427005052566528, "learning_rate": 1.0652075297601518e-05, "loss": 0.1804485321044922, "step": 4600 }, { "epoch": 0.6220194338825518, "grad_norm": 1.1247267723083496, "learning_rate": 1.0645525163199222e-05, "loss": 0.17714977264404297, "step": 4601 }, { "epoch": 0.6221546261089987, "grad_norm": 0.7889978885650635, "learning_rate": 1.063897593542181e-05, "loss": 0.13114452362060547, "step": 4602 }, { "epoch": 0.6222898183354457, "grad_norm": 1.139345407485962, "learning_rate": 1.0632427615632864e-05, "loss": 0.15349483489990234, "step": 4603 }, { "epoch": 0.6224250105618927, "grad_norm": 1.434650182723999, "learning_rate": 1.0625880205195776e-05, "loss": 0.1331329345703125, "step": 4604 }, { "epoch": 0.6225602027883397, "grad_norm": 1.0262975692749023, "learning_rate": 1.0619333705473754e-05, "loss": 0.1393299102783203, "step": 4605 }, { "epoch": 0.6226953950147867, "grad_norm": 1.4420390129089355, "learning_rate": 1.0612788117829821e-05, "loss": 0.18594121932983398, "step": 4606 }, { "epoch": 0.6228305872412336, "grad_norm": 1.669246792793274, "learning_rate": 1.0606243443626792e-05, "loss": 0.17644691467285156, "step": 4607 }, { "epoch": 0.6229657794676806, "grad_norm": 0.8634865283966064, "learning_rate": 1.0599699684227313e-05, "loss": 0.12969398498535156, "step": 4608 }, { "epoch": 0.6231009716941276, "grad_norm": 1.1777290105819702, "learning_rate": 1.0593156840993818e-05, "loss": 0.1725749969482422, "step": 4609 }, { "epoch": 0.6232361639205746, "grad_norm": 0.7007025480270386, "learning_rate": 1.0586614915288571e-05, "loss": 0.11710739135742188, "step": 4610 }, { "epoch": 0.6233713561470215, "grad_norm": 0.9938804507255554, "learning_rate": 1.0580073908473641e-05, "loss": 0.16475486755371094, "step": 4611 }, { "epoch": 0.6235065483734685, "grad_norm": 1.4000250101089478, "learning_rate": 1.0573533821910885e-05, "loss": 0.15743255615234375, "step": 4612 }, { "epoch": 0.6236417405999155, "grad_norm": 1.3600869178771973, "learning_rate": 1.0566994656961997e-05, "loss": 0.16325855255126953, "step": 4613 }, { "epoch": 0.6237769328263625, "grad_norm": 1.289290428161621, "learning_rate": 1.0560456414988456e-05, "loss": 0.2019367218017578, "step": 4614 }, { "epoch": 0.6239121250528095, "grad_norm": 2.0171430110931396, "learning_rate": 1.0553919097351564e-05, "loss": 0.15810012817382812, "step": 4615 }, { "epoch": 0.6240473172792564, "grad_norm": 1.670272946357727, "learning_rate": 1.0547382705412434e-05, "loss": 0.16502857208251953, "step": 4616 }, { "epoch": 0.6241825095057034, "grad_norm": 1.7739546298980713, "learning_rate": 1.054084724053196e-05, "loss": 0.21545028686523438, "step": 4617 }, { "epoch": 0.6243177017321504, "grad_norm": 1.5325731039047241, "learning_rate": 1.0534312704070875e-05, "loss": 0.1570906639099121, "step": 4618 }, { "epoch": 0.6244528939585974, "grad_norm": 1.1488697528839111, "learning_rate": 1.0527779097389695e-05, "loss": 0.19235801696777344, "step": 4619 }, { "epoch": 0.6245880861850444, "grad_norm": 1.7705705165863037, "learning_rate": 1.0521246421848762e-05, "loss": 0.22964859008789062, "step": 4620 }, { "epoch": 0.6247232784114913, "grad_norm": 0.9314525127410889, "learning_rate": 1.0514714678808202e-05, "loss": 0.18545055389404297, "step": 4621 }, { "epoch": 0.6248584706379383, "grad_norm": 1.6340751647949219, "learning_rate": 1.0508183869627962e-05, "loss": 0.16865485906600952, "step": 4622 }, { "epoch": 0.6249936628643853, "grad_norm": 1.7807961702346802, "learning_rate": 1.0501653995667798e-05, "loss": 0.16479110717773438, "step": 4623 }, { "epoch": 0.6251288550908323, "grad_norm": 1.5432101488113403, "learning_rate": 1.0495125058287258e-05, "loss": 0.23844242095947266, "step": 4624 }, { "epoch": 0.6252640473172792, "grad_norm": 1.0514414310455322, "learning_rate": 1.0488597058845708e-05, "loss": 0.1761188507080078, "step": 4625 }, { "epoch": 0.6253992395437262, "grad_norm": 1.1294559240341187, "learning_rate": 1.0482069998702304e-05, "loss": 0.18339115381240845, "step": 4626 }, { "epoch": 0.6255344317701732, "grad_norm": 0.8533125519752502, "learning_rate": 1.0475543879216017e-05, "loss": 0.17749595642089844, "step": 4627 }, { "epoch": 0.6256696239966202, "grad_norm": 1.6903786659240723, "learning_rate": 1.0469018701745626e-05, "loss": 0.21721458435058594, "step": 4628 }, { "epoch": 0.6258048162230672, "grad_norm": 1.4242613315582275, "learning_rate": 1.0462494467649704e-05, "loss": 0.196502685546875, "step": 4629 }, { "epoch": 0.6259400084495141, "grad_norm": 0.8546478152275085, "learning_rate": 1.045597117828663e-05, "loss": 0.13039493560791016, "step": 4630 }, { "epoch": 0.6260752006759611, "grad_norm": 0.9507201313972473, "learning_rate": 1.0449448835014586e-05, "loss": 0.16056489944458008, "step": 4631 }, { "epoch": 0.6262103929024081, "grad_norm": 0.7374429106712341, "learning_rate": 1.044292743919156e-05, "loss": 0.1510152816772461, "step": 4632 }, { "epoch": 0.6263455851288551, "grad_norm": 0.72164386510849, "learning_rate": 1.0436406992175343e-05, "loss": 0.12612628936767578, "step": 4633 }, { "epoch": 0.626480777355302, "grad_norm": 1.7215973138809204, "learning_rate": 1.0429887495323532e-05, "loss": 0.13476836681365967, "step": 4634 }, { "epoch": 0.626615969581749, "grad_norm": 1.4045487642288208, "learning_rate": 1.0423368949993512e-05, "loss": 0.1385326385498047, "step": 4635 }, { "epoch": 0.626751161808196, "grad_norm": 1.3238190412521362, "learning_rate": 1.041685135754248e-05, "loss": 0.20109033584594727, "step": 4636 }, { "epoch": 0.626886354034643, "grad_norm": 0.9641658067703247, "learning_rate": 1.0410334719327435e-05, "loss": 0.1602001190185547, "step": 4637 }, { "epoch": 0.62702154626109, "grad_norm": 0.9967225790023804, "learning_rate": 1.0403819036705177e-05, "loss": 0.1710834503173828, "step": 4638 }, { "epoch": 0.627156738487537, "grad_norm": 1.0205364227294922, "learning_rate": 1.0397304311032311e-05, "loss": 0.1914135217666626, "step": 4639 }, { "epoch": 0.6272919307139839, "grad_norm": 1.4185870885849, "learning_rate": 1.039079054366523e-05, "loss": 0.15804290771484375, "step": 4640 }, { "epoch": 0.6274271229404309, "grad_norm": 1.4510555267333984, "learning_rate": 1.0384277735960133e-05, "loss": 0.23080825805664062, "step": 4641 }, { "epoch": 0.6275623151668779, "grad_norm": 1.5419342517852783, "learning_rate": 1.0377765889273025e-05, "loss": 0.17972993850708008, "step": 4642 }, { "epoch": 0.6276975073933249, "grad_norm": 1.2421633005142212, "learning_rate": 1.0371255004959715e-05, "loss": 0.19145584106445312, "step": 4643 }, { "epoch": 0.6278326996197718, "grad_norm": 1.2397582530975342, "learning_rate": 1.036474508437579e-05, "loss": 0.12718677520751953, "step": 4644 }, { "epoch": 0.6279678918462188, "grad_norm": 1.7837036848068237, "learning_rate": 1.035823612887666e-05, "loss": 0.1256704330444336, "step": 4645 }, { "epoch": 0.6281030840726658, "grad_norm": 0.6432226300239563, "learning_rate": 1.0351728139817517e-05, "loss": 0.11899089813232422, "step": 4646 }, { "epoch": 0.6282382762991128, "grad_norm": 1.33556067943573, "learning_rate": 1.0345221118553362e-05, "loss": 0.19678497314453125, "step": 4647 }, { "epoch": 0.6283734685255598, "grad_norm": 1.135422706604004, "learning_rate": 1.0338715066439002e-05, "loss": 0.1705026626586914, "step": 4648 }, { "epoch": 0.6285086607520067, "grad_norm": 1.9654052257537842, "learning_rate": 1.0332209984829013e-05, "loss": 0.17806053161621094, "step": 4649 }, { "epoch": 0.6286438529784537, "grad_norm": 0.8713955879211426, "learning_rate": 1.03257058750778e-05, "loss": 0.19050979614257812, "step": 4650 }, { "epoch": 0.6287790452049007, "grad_norm": 0.8905146718025208, "learning_rate": 1.0319202738539548e-05, "loss": 0.14739418029785156, "step": 4651 }, { "epoch": 0.6289142374313477, "grad_norm": 2.4853031635284424, "learning_rate": 1.0312700576568253e-05, "loss": 0.2068653106689453, "step": 4652 }, { "epoch": 0.6290494296577946, "grad_norm": 0.8443086743354797, "learning_rate": 1.0306199390517688e-05, "loss": 0.13300514221191406, "step": 4653 }, { "epoch": 0.6291846218842416, "grad_norm": 2.0395450592041016, "learning_rate": 1.0299699181741439e-05, "loss": 0.15018737316131592, "step": 4654 }, { "epoch": 0.6293198141106886, "grad_norm": 2.7070350646972656, "learning_rate": 1.0293199951592889e-05, "loss": 0.21428871154785156, "step": 4655 }, { "epoch": 0.6294550063371356, "grad_norm": 2.3074026107788086, "learning_rate": 1.0286701701425206e-05, "loss": 0.20762348175048828, "step": 4656 }, { "epoch": 0.6295901985635826, "grad_norm": 2.9708187580108643, "learning_rate": 1.0280204432591369e-05, "loss": 0.25496482849121094, "step": 4657 }, { "epoch": 0.6297253907900295, "grad_norm": 1.287521481513977, "learning_rate": 1.0273708146444133e-05, "loss": 0.20575332641601562, "step": 4658 }, { "epoch": 0.6298605830164765, "grad_norm": 1.2072495222091675, "learning_rate": 1.0267212844336062e-05, "loss": 0.1679697036743164, "step": 4659 }, { "epoch": 0.6299957752429235, "grad_norm": 0.9251106381416321, "learning_rate": 1.026071852761952e-05, "loss": 0.18853187561035156, "step": 4660 }, { "epoch": 0.6301309674693705, "grad_norm": 1.6173293590545654, "learning_rate": 1.025422519764665e-05, "loss": 0.2219095230102539, "step": 4661 }, { "epoch": 0.6302661596958175, "grad_norm": 1.0239653587341309, "learning_rate": 1.024773285576941e-05, "loss": 0.1710376739501953, "step": 4662 }, { "epoch": 0.6304013519222644, "grad_norm": 0.6747888922691345, "learning_rate": 1.0241241503339524e-05, "loss": 0.13608551025390625, "step": 4663 }, { "epoch": 0.6305365441487114, "grad_norm": 0.7485164999961853, "learning_rate": 1.023475114170853e-05, "loss": 0.13349580764770508, "step": 4664 }, { "epoch": 0.6306717363751584, "grad_norm": 0.8306536078453064, "learning_rate": 1.0228261772227768e-05, "loss": 0.17373108863830566, "step": 4665 }, { "epoch": 0.6308069286016054, "grad_norm": 1.2872484922409058, "learning_rate": 1.0221773396248349e-05, "loss": 0.20141887664794922, "step": 4666 }, { "epoch": 0.6309421208280523, "grad_norm": 1.0703446865081787, "learning_rate": 1.021528601512119e-05, "loss": 0.18709754943847656, "step": 4667 }, { "epoch": 0.6310773130544993, "grad_norm": 1.3038949966430664, "learning_rate": 1.0208799630196994e-05, "loss": 0.1746535301208496, "step": 4668 }, { "epoch": 0.6312125052809463, "grad_norm": 0.9164425134658813, "learning_rate": 1.0202314242826264e-05, "loss": 0.1842174530029297, "step": 4669 }, { "epoch": 0.6313476975073933, "grad_norm": 1.1847476959228516, "learning_rate": 1.0195829854359299e-05, "loss": 0.18784749507904053, "step": 4670 }, { "epoch": 0.6314828897338403, "grad_norm": 0.886667788028717, "learning_rate": 1.0189346466146175e-05, "loss": 0.15140533447265625, "step": 4671 }, { "epoch": 0.6316180819602872, "grad_norm": 1.0732970237731934, "learning_rate": 1.018286407953677e-05, "loss": 0.16356849670410156, "step": 4672 }, { "epoch": 0.6317532741867342, "grad_norm": 1.7637754678726196, "learning_rate": 1.017638269588075e-05, "loss": 0.16754722595214844, "step": 4673 }, { "epoch": 0.6318884664131812, "grad_norm": 1.2803517580032349, "learning_rate": 1.0169902316527575e-05, "loss": 0.11214661598205566, "step": 4674 }, { "epoch": 0.6320236586396282, "grad_norm": 2.3433589935302734, "learning_rate": 1.0163422942826502e-05, "loss": 0.2077770233154297, "step": 4675 }, { "epoch": 0.6321588508660752, "grad_norm": 1.4271533489227295, "learning_rate": 1.0156944576126555e-05, "loss": 0.19047927856445312, "step": 4676 }, { "epoch": 0.6322940430925221, "grad_norm": 0.8583289980888367, "learning_rate": 1.0150467217776579e-05, "loss": 0.1617131233215332, "step": 4677 }, { "epoch": 0.6324292353189691, "grad_norm": 0.7472401857376099, "learning_rate": 1.0143990869125185e-05, "loss": 0.1176985502243042, "step": 4678 }, { "epoch": 0.6325644275454161, "grad_norm": 1.6210994720458984, "learning_rate": 1.013751553152079e-05, "loss": 0.18180227279663086, "step": 4679 }, { "epoch": 0.6326996197718631, "grad_norm": 1.6664046049118042, "learning_rate": 1.0131041206311594e-05, "loss": 0.13129043579101562, "step": 4680 }, { "epoch": 0.63283481199831, "grad_norm": 1.600602626800537, "learning_rate": 1.0124567894845578e-05, "loss": 0.1798548698425293, "step": 4681 }, { "epoch": 0.632970004224757, "grad_norm": 1.4209963083267212, "learning_rate": 1.0118095598470528e-05, "loss": 0.15433979034423828, "step": 4682 }, { "epoch": 0.6331051964512041, "grad_norm": 1.3669917583465576, "learning_rate": 1.0111624318534006e-05, "loss": 0.18619203567504883, "step": 4683 }, { "epoch": 0.6332403886776511, "grad_norm": 0.7033229470252991, "learning_rate": 1.0105154056383377e-05, "loss": 0.13695907592773438, "step": 4684 }, { "epoch": 0.6333755809040981, "grad_norm": 2.507122755050659, "learning_rate": 1.0098684813365764e-05, "loss": 0.23016929626464844, "step": 4685 }, { "epoch": 0.633510773130545, "grad_norm": 2.0399060249328613, "learning_rate": 1.0092216590828115e-05, "loss": 0.20401859283447266, "step": 4686 }, { "epoch": 0.633645965356992, "grad_norm": 2.3903968334198, "learning_rate": 1.0085749390117146e-05, "loss": 0.17146015167236328, "step": 4687 }, { "epoch": 0.633781157583439, "grad_norm": 1.0674290657043457, "learning_rate": 1.0079283212579354e-05, "loss": 0.17020606994628906, "step": 4688 }, { "epoch": 0.633916349809886, "grad_norm": 1.0933576822280884, "learning_rate": 1.0072818059561045e-05, "loss": 0.19450807571411133, "step": 4689 }, { "epoch": 0.634051542036333, "grad_norm": 1.3950855731964111, "learning_rate": 1.0066353932408285e-05, "loss": 0.2066326141357422, "step": 4690 }, { "epoch": 0.6341867342627799, "grad_norm": 0.7375941872596741, "learning_rate": 1.0059890832466948e-05, "loss": 0.1607685089111328, "step": 4691 }, { "epoch": 0.6343219264892269, "grad_norm": 0.872260570526123, "learning_rate": 1.0053428761082684e-05, "loss": 0.16034507751464844, "step": 4692 }, { "epoch": 0.6344571187156739, "grad_norm": 1.213697910308838, "learning_rate": 1.0046967719600927e-05, "loss": 0.158735990524292, "step": 4693 }, { "epoch": 0.6345923109421209, "grad_norm": 0.8235481977462769, "learning_rate": 1.0040507709366912e-05, "loss": 0.14668965339660645, "step": 4694 }, { "epoch": 0.6347275031685679, "grad_norm": 1.2809909582138062, "learning_rate": 1.0034048731725631e-05, "loss": 0.19062519073486328, "step": 4695 }, { "epoch": 0.6348626953950148, "grad_norm": 0.9800539612770081, "learning_rate": 1.0027590788021886e-05, "loss": 0.13780784606933594, "step": 4696 }, { "epoch": 0.6349978876214618, "grad_norm": 1.1619529724121094, "learning_rate": 1.0021133879600258e-05, "loss": 0.21240901947021484, "step": 4697 }, { "epoch": 0.6351330798479088, "grad_norm": 0.5779402852058411, "learning_rate": 1.0014678007805108e-05, "loss": 0.12523365020751953, "step": 4698 }, { "epoch": 0.6352682720743558, "grad_norm": 1.340545892715454, "learning_rate": 1.0008223173980579e-05, "loss": 0.1750335693359375, "step": 4699 }, { "epoch": 0.6354034643008027, "grad_norm": 1.4963045120239258, "learning_rate": 1.0001769379470604e-05, "loss": 0.1944108009338379, "step": 4700 }, { "epoch": 0.6355386565272497, "grad_norm": 1.1744086742401123, "learning_rate": 9.995316625618898e-06, "loss": 0.16965961456298828, "step": 4701 }, { "epoch": 0.6356738487536967, "grad_norm": 1.0566115379333496, "learning_rate": 9.988864913768962e-06, "loss": 0.1754918098449707, "step": 4702 }, { "epoch": 0.6358090409801437, "grad_norm": 0.8660688400268555, "learning_rate": 9.982414245264071e-06, "loss": 0.11942672729492188, "step": 4703 }, { "epoch": 0.6359442332065907, "grad_norm": 0.7203137874603271, "learning_rate": 9.975964621447293e-06, "loss": 0.13349008560180664, "step": 4704 }, { "epoch": 0.6360794254330376, "grad_norm": 1.598728895187378, "learning_rate": 9.96951604366147e-06, "loss": 0.18683242797851562, "step": 4705 }, { "epoch": 0.6362146176594846, "grad_norm": 0.7741524577140808, "learning_rate": 9.963068513249233e-06, "loss": 0.1319897174835205, "step": 4706 }, { "epoch": 0.6363498098859316, "grad_norm": 0.8531058430671692, "learning_rate": 9.956622031552996e-06, "loss": 0.10056877136230469, "step": 4707 }, { "epoch": 0.6364850021123786, "grad_norm": 0.8294493556022644, "learning_rate": 9.950176599914942e-06, "loss": 0.1370983123779297, "step": 4708 }, { "epoch": 0.6366201943388256, "grad_norm": 1.1041579246520996, "learning_rate": 9.943732219677048e-06, "loss": 0.18423080444335938, "step": 4709 }, { "epoch": 0.6367553865652725, "grad_norm": 1.2175341844558716, "learning_rate": 9.93728889218107e-06, "loss": 0.20542526245117188, "step": 4710 }, { "epoch": 0.6368905787917195, "grad_norm": 0.9677608609199524, "learning_rate": 9.930846618768543e-06, "loss": 0.15720367431640625, "step": 4711 }, { "epoch": 0.6370257710181665, "grad_norm": 0.9568702578544617, "learning_rate": 9.924405400780784e-06, "loss": 0.1775522232055664, "step": 4712 }, { "epoch": 0.6371609632446135, "grad_norm": 0.7599431872367859, "learning_rate": 9.917965239558885e-06, "loss": 0.1610431671142578, "step": 4713 }, { "epoch": 0.6372961554710604, "grad_norm": 0.9448076486587524, "learning_rate": 9.911526136443726e-06, "loss": 0.1650867462158203, "step": 4714 }, { "epoch": 0.6374313476975074, "grad_norm": 1.0108996629714966, "learning_rate": 9.905088092775956e-06, "loss": 0.1370830535888672, "step": 4715 }, { "epoch": 0.6375665399239544, "grad_norm": 0.9641760587692261, "learning_rate": 9.898651109896015e-06, "loss": 0.16497325897216797, "step": 4716 }, { "epoch": 0.6377017321504014, "grad_norm": 0.9114315509796143, "learning_rate": 9.892215189144123e-06, "loss": 0.1178368330001831, "step": 4717 }, { "epoch": 0.6378369243768484, "grad_norm": 0.8271059393882751, "learning_rate": 9.88578033186026e-06, "loss": 0.14463472366333008, "step": 4718 }, { "epoch": 0.6379721166032953, "grad_norm": 1.1193820238113403, "learning_rate": 9.879346539384207e-06, "loss": 0.18987798690795898, "step": 4719 }, { "epoch": 0.6381073088297423, "grad_norm": 1.1091747283935547, "learning_rate": 9.87291381305551e-06, "loss": 0.1528254747390747, "step": 4720 }, { "epoch": 0.6382425010561893, "grad_norm": 1.3743292093276978, "learning_rate": 9.866482154213502e-06, "loss": 0.16873931884765625, "step": 4721 }, { "epoch": 0.6383776932826363, "grad_norm": 1.1861999034881592, "learning_rate": 9.86005156419728e-06, "loss": 0.22124958038330078, "step": 4722 }, { "epoch": 0.6385128855090833, "grad_norm": 1.2370994091033936, "learning_rate": 9.853622044345732e-06, "loss": 0.2176356315612793, "step": 4723 }, { "epoch": 0.6386480777355302, "grad_norm": 1.1271567344665527, "learning_rate": 9.847193595997522e-06, "loss": 0.17400717735290527, "step": 4724 }, { "epoch": 0.6387832699619772, "grad_norm": 1.503694772720337, "learning_rate": 9.840766220491078e-06, "loss": 0.24648284912109375, "step": 4725 }, { "epoch": 0.6389184621884242, "grad_norm": 0.7546154260635376, "learning_rate": 9.834339919164625e-06, "loss": 0.1584264039993286, "step": 4726 }, { "epoch": 0.6390536544148712, "grad_norm": 1.7379469871520996, "learning_rate": 9.827914693356145e-06, "loss": 0.21053314208984375, "step": 4727 }, { "epoch": 0.6391888466413181, "grad_norm": 1.36887526512146, "learning_rate": 9.821490544403403e-06, "loss": 0.17522525787353516, "step": 4728 }, { "epoch": 0.6393240388677651, "grad_norm": 3.0597784519195557, "learning_rate": 9.815067473643951e-06, "loss": 0.15152215957641602, "step": 4729 }, { "epoch": 0.6394592310942121, "grad_norm": 1.0458091497421265, "learning_rate": 9.808645482415097e-06, "loss": 0.181732177734375, "step": 4730 }, { "epoch": 0.6395944233206591, "grad_norm": 0.9721728563308716, "learning_rate": 9.80222457205394e-06, "loss": 0.16955041885375977, "step": 4731 }, { "epoch": 0.6397296155471061, "grad_norm": 1.3634389638900757, "learning_rate": 9.795804743897341e-06, "loss": 0.23503875732421875, "step": 4732 }, { "epoch": 0.639864807773553, "grad_norm": 0.8009504675865173, "learning_rate": 9.789385999281948e-06, "loss": 0.16812801361083984, "step": 4733 }, { "epoch": 0.64, "grad_norm": 0.45024386048316956, "learning_rate": 9.782968339544179e-06, "loss": 0.10645103454589844, "step": 4734 }, { "epoch": 0.640135192226447, "grad_norm": 1.0312482118606567, "learning_rate": 9.776551766020219e-06, "loss": 0.14143896102905273, "step": 4735 }, { "epoch": 0.640270384452894, "grad_norm": 0.6227663159370422, "learning_rate": 9.77013628004604e-06, "loss": 0.12190437316894531, "step": 4736 }, { "epoch": 0.640405576679341, "grad_norm": 0.8853806853294373, "learning_rate": 9.763721882957371e-06, "loss": 0.16942214965820312, "step": 4737 }, { "epoch": 0.6405407689057879, "grad_norm": 1.0890270471572876, "learning_rate": 9.757308576089732e-06, "loss": 0.1676945686340332, "step": 4738 }, { "epoch": 0.6406759611322349, "grad_norm": 0.8477455973625183, "learning_rate": 9.750896360778404e-06, "loss": 0.14232635498046875, "step": 4739 }, { "epoch": 0.6408111533586819, "grad_norm": 1.1622205972671509, "learning_rate": 9.744485238358448e-06, "loss": 0.20201444625854492, "step": 4740 }, { "epoch": 0.6409463455851289, "grad_norm": 0.91047203540802, "learning_rate": 9.73807521016469e-06, "loss": 0.1355438232421875, "step": 4741 }, { "epoch": 0.6410815378115758, "grad_norm": 0.7997440695762634, "learning_rate": 9.731666277531732e-06, "loss": 0.15801620483398438, "step": 4742 }, { "epoch": 0.6412167300380228, "grad_norm": 1.0661485195159912, "learning_rate": 9.725258441793947e-06, "loss": 0.1851511001586914, "step": 4743 }, { "epoch": 0.6413519222644698, "grad_norm": 0.7698494791984558, "learning_rate": 9.71885170428549e-06, "loss": 0.15169906616210938, "step": 4744 }, { "epoch": 0.6414871144909168, "grad_norm": 0.5379022359848022, "learning_rate": 9.712446066340265e-06, "loss": 0.11092925071716309, "step": 4745 }, { "epoch": 0.6416223067173638, "grad_norm": 1.1444133520126343, "learning_rate": 9.70604152929197e-06, "loss": 0.1769847869873047, "step": 4746 }, { "epoch": 0.6417574989438107, "grad_norm": 1.4192321300506592, "learning_rate": 9.699638094474054e-06, "loss": 0.222747802734375, "step": 4747 }, { "epoch": 0.6418926911702577, "grad_norm": 2.2647628784179688, "learning_rate": 9.693235763219752e-06, "loss": 0.16417694091796875, "step": 4748 }, { "epoch": 0.6420278833967047, "grad_norm": 1.2556986808776855, "learning_rate": 9.68683453686207e-06, "loss": 0.21502017974853516, "step": 4749 }, { "epoch": 0.6421630756231517, "grad_norm": 0.8340602517127991, "learning_rate": 9.680434416733763e-06, "loss": 0.14348602294921875, "step": 4750 }, { "epoch": 0.6422982678495986, "grad_norm": 1.0318548679351807, "learning_rate": 9.674035404167381e-06, "loss": 0.16250038146972656, "step": 4751 }, { "epoch": 0.6424334600760456, "grad_norm": 1.3777258396148682, "learning_rate": 9.66763750049523e-06, "loss": 0.21093463897705078, "step": 4752 }, { "epoch": 0.6425686523024926, "grad_norm": 0.8274357914924622, "learning_rate": 9.66124070704939e-06, "loss": 0.13401508331298828, "step": 4753 }, { "epoch": 0.6427038445289396, "grad_norm": 0.9177843928337097, "learning_rate": 9.654845025161699e-06, "loss": 0.11472606658935547, "step": 4754 }, { "epoch": 0.6428390367553866, "grad_norm": 0.9275671243667603, "learning_rate": 9.648450456163777e-06, "loss": 0.14565658569335938, "step": 4755 }, { "epoch": 0.6429742289818335, "grad_norm": 2.507289171218872, "learning_rate": 9.64205700138701e-06, "loss": 0.2075939178466797, "step": 4756 }, { "epoch": 0.6431094212082805, "grad_norm": 1.0546643733978271, "learning_rate": 9.635664662162548e-06, "loss": 0.21128082275390625, "step": 4757 }, { "epoch": 0.6432446134347275, "grad_norm": 1.3004906177520752, "learning_rate": 9.629273439821315e-06, "loss": 0.2620086669921875, "step": 4758 }, { "epoch": 0.6433798056611745, "grad_norm": 1.4010204076766968, "learning_rate": 9.622883335693984e-06, "loss": 0.22907114028930664, "step": 4759 }, { "epoch": 0.6435149978876215, "grad_norm": 1.6909691095352173, "learning_rate": 9.616494351111017e-06, "loss": 0.17894935607910156, "step": 4760 }, { "epoch": 0.6436501901140684, "grad_norm": 2.6781859397888184, "learning_rate": 9.610106487402637e-06, "loss": 0.19934654235839844, "step": 4761 }, { "epoch": 0.6437853823405154, "grad_norm": 1.2084870338439941, "learning_rate": 9.603719745898826e-06, "loss": 0.23883819580078125, "step": 4762 }, { "epoch": 0.6439205745669624, "grad_norm": 1.1885716915130615, "learning_rate": 9.597334127929346e-06, "loss": 0.18436622619628906, "step": 4763 }, { "epoch": 0.6440557667934094, "grad_norm": 0.98525470495224, "learning_rate": 9.590949634823707e-06, "loss": 0.1514650583267212, "step": 4764 }, { "epoch": 0.6441909590198563, "grad_norm": 0.7663836479187012, "learning_rate": 9.584566267911198e-06, "loss": 0.11879348754882812, "step": 4765 }, { "epoch": 0.6443261512463033, "grad_norm": 0.8613570332527161, "learning_rate": 9.578184028520874e-06, "loss": 0.21599197387695312, "step": 4766 }, { "epoch": 0.6444613434727503, "grad_norm": 1.2138065099716187, "learning_rate": 9.571802917981548e-06, "loss": 0.14101147651672363, "step": 4767 }, { "epoch": 0.6445965356991973, "grad_norm": 1.5802611112594604, "learning_rate": 9.565422937621798e-06, "loss": 0.18843746185302734, "step": 4768 }, { "epoch": 0.6447317279256443, "grad_norm": 1.049242377281189, "learning_rate": 9.559044088769971e-06, "loss": 0.13640689849853516, "step": 4769 }, { "epoch": 0.6448669201520912, "grad_norm": 0.9434901475906372, "learning_rate": 9.552666372754182e-06, "loss": 0.10306429862976074, "step": 4770 }, { "epoch": 0.6450021123785382, "grad_norm": 1.4389883279800415, "learning_rate": 9.546289790902307e-06, "loss": 0.16845756769180298, "step": 4771 }, { "epoch": 0.6451373046049852, "grad_norm": 0.7540355324745178, "learning_rate": 9.539914344541976e-06, "loss": 0.12136662006378174, "step": 4772 }, { "epoch": 0.6452724968314322, "grad_norm": 1.3766987323760986, "learning_rate": 9.533540035000598e-06, "loss": 0.15412092208862305, "step": 4773 }, { "epoch": 0.6454076890578792, "grad_norm": 0.7467913627624512, "learning_rate": 9.52716686360533e-06, "loss": 0.19866180419921875, "step": 4774 }, { "epoch": 0.6455428812843261, "grad_norm": 1.2792859077453613, "learning_rate": 9.520794831683108e-06, "loss": 0.180267333984375, "step": 4775 }, { "epoch": 0.6456780735107731, "grad_norm": 0.7078832983970642, "learning_rate": 9.514423940560627e-06, "loss": 0.12905120849609375, "step": 4776 }, { "epoch": 0.6458132657372201, "grad_norm": 1.7478652000427246, "learning_rate": 9.508054191564326e-06, "loss": 0.24469757080078125, "step": 4777 }, { "epoch": 0.6459484579636671, "grad_norm": 1.5673394203186035, "learning_rate": 9.501685586020434e-06, "loss": 0.15714645385742188, "step": 4778 }, { "epoch": 0.646083650190114, "grad_norm": 1.3585560321807861, "learning_rate": 9.495318125254919e-06, "loss": 0.21356773376464844, "step": 4779 }, { "epoch": 0.646218842416561, "grad_norm": 1.7007721662521362, "learning_rate": 9.488951810593527e-06, "loss": 0.14565658569335938, "step": 4780 }, { "epoch": 0.646354034643008, "grad_norm": 1.9243805408477783, "learning_rate": 9.48258664336176e-06, "loss": 0.1699962615966797, "step": 4781 }, { "epoch": 0.646489226869455, "grad_norm": 0.8874245285987854, "learning_rate": 9.476222624884873e-06, "loss": 0.14676380157470703, "step": 4782 }, { "epoch": 0.646624419095902, "grad_norm": 1.179166316986084, "learning_rate": 9.469859756487893e-06, "loss": 0.17524433135986328, "step": 4783 }, { "epoch": 0.6467596113223489, "grad_norm": 1.0857069492340088, "learning_rate": 9.463498039495598e-06, "loss": 0.13597440719604492, "step": 4784 }, { "epoch": 0.6468948035487959, "grad_norm": 1.7896784543991089, "learning_rate": 9.457137475232537e-06, "loss": 0.1766681671142578, "step": 4785 }, { "epoch": 0.6470299957752429, "grad_norm": 0.9283929467201233, "learning_rate": 9.450778065023019e-06, "loss": 0.19150161743164062, "step": 4786 }, { "epoch": 0.6471651880016899, "grad_norm": 0.9286468625068665, "learning_rate": 9.444419810191091e-06, "loss": 0.1736125946044922, "step": 4787 }, { "epoch": 0.6473003802281369, "grad_norm": 1.52741277217865, "learning_rate": 9.43806271206059e-06, "loss": 0.1750640869140625, "step": 4788 }, { "epoch": 0.6474355724545838, "grad_norm": 0.7907344102859497, "learning_rate": 9.431706771955089e-06, "loss": 0.15364623069763184, "step": 4789 }, { "epoch": 0.6475707646810308, "grad_norm": 0.9713426828384399, "learning_rate": 9.425351991197937e-06, "loss": 0.16255807876586914, "step": 4790 }, { "epoch": 0.6477059569074778, "grad_norm": 0.7919119596481323, "learning_rate": 9.418998371112221e-06, "loss": 0.16147422790527344, "step": 4791 }, { "epoch": 0.6478411491339248, "grad_norm": 1.0449354648590088, "learning_rate": 9.412645913020807e-06, "loss": 0.14014887809753418, "step": 4792 }, { "epoch": 0.6479763413603717, "grad_norm": 0.8172423243522644, "learning_rate": 9.406294618246313e-06, "loss": 0.18064022064208984, "step": 4793 }, { "epoch": 0.6481115335868187, "grad_norm": 1.11039137840271, "learning_rate": 9.399944488111103e-06, "loss": 0.13756990432739258, "step": 4794 }, { "epoch": 0.6482467258132657, "grad_norm": 0.9745544791221619, "learning_rate": 9.39359552393732e-06, "loss": 0.16774463653564453, "step": 4795 }, { "epoch": 0.6483819180397127, "grad_norm": 0.7310024499893188, "learning_rate": 9.387247727046845e-06, "loss": 0.13253021240234375, "step": 4796 }, { "epoch": 0.6485171102661597, "grad_norm": 2.6191494464874268, "learning_rate": 9.380901098761319e-06, "loss": 0.2328033447265625, "step": 4797 }, { "epoch": 0.6486523024926066, "grad_norm": 0.949286937713623, "learning_rate": 9.374555640402153e-06, "loss": 0.13180160522460938, "step": 4798 }, { "epoch": 0.6487874947190536, "grad_norm": 1.2516103982925415, "learning_rate": 9.368211353290503e-06, "loss": 0.14205217361450195, "step": 4799 }, { "epoch": 0.6489226869455006, "grad_norm": 1.8843960762023926, "learning_rate": 9.36186823874728e-06, "loss": 0.20497512817382812, "step": 4800 }, { "epoch": 0.6490578791719476, "grad_norm": 1.146822214126587, "learning_rate": 9.355526298093152e-06, "loss": 0.17856502532958984, "step": 4801 }, { "epoch": 0.6491930713983946, "grad_norm": 1.0923248529434204, "learning_rate": 9.34918553264855e-06, "loss": 0.18214702606201172, "step": 4802 }, { "epoch": 0.6493282636248415, "grad_norm": 1.3734220266342163, "learning_rate": 9.342845943733658e-06, "loss": 0.2081599235534668, "step": 4803 }, { "epoch": 0.6494634558512885, "grad_norm": 1.463240146636963, "learning_rate": 9.336507532668407e-06, "loss": 0.196807861328125, "step": 4804 }, { "epoch": 0.6495986480777355, "grad_norm": 0.8848334550857544, "learning_rate": 9.33017030077249e-06, "loss": 0.17956097424030304, "step": 4805 }, { "epoch": 0.6497338403041825, "grad_norm": 1.438395380973816, "learning_rate": 9.323834249365346e-06, "loss": 0.1773967742919922, "step": 4806 }, { "epoch": 0.6498690325306294, "grad_norm": 1.0371705293655396, "learning_rate": 9.317499379766183e-06, "loss": 0.1246175765991211, "step": 4807 }, { "epoch": 0.6500042247570764, "grad_norm": 1.0779000520706177, "learning_rate": 9.311165693293954e-06, "loss": 0.19726181030273438, "step": 4808 }, { "epoch": 0.6501394169835234, "grad_norm": 0.8488254547119141, "learning_rate": 9.304833191267364e-06, "loss": 0.13262367248535156, "step": 4809 }, { "epoch": 0.6502746092099704, "grad_norm": 0.9494053721427917, "learning_rate": 9.298501875004874e-06, "loss": 0.17995429039001465, "step": 4810 }, { "epoch": 0.6504098014364174, "grad_norm": 1.0145442485809326, "learning_rate": 9.292171745824695e-06, "loss": 0.21031951904296875, "step": 4811 }, { "epoch": 0.6505449936628643, "grad_norm": 1.9540719985961914, "learning_rate": 9.285842805044797e-06, "loss": 0.22034549713134766, "step": 4812 }, { "epoch": 0.6506801858893113, "grad_norm": 0.7179235219955444, "learning_rate": 9.279515053982905e-06, "loss": 0.12684202194213867, "step": 4813 }, { "epoch": 0.6508153781157583, "grad_norm": 1.0111486911773682, "learning_rate": 9.273188493956476e-06, "loss": 0.16587591171264648, "step": 4814 }, { "epoch": 0.6509505703422053, "grad_norm": 0.9578974843025208, "learning_rate": 9.266863126282746e-06, "loss": 0.14514923095703125, "step": 4815 }, { "epoch": 0.6510857625686522, "grad_norm": 1.45926833152771, "learning_rate": 9.260538952278683e-06, "loss": 0.1788005828857422, "step": 4816 }, { "epoch": 0.6512209547950992, "grad_norm": 1.0004585981369019, "learning_rate": 9.254215973261014e-06, "loss": 0.1754007339477539, "step": 4817 }, { "epoch": 0.6513561470215462, "grad_norm": 1.0461431741714478, "learning_rate": 9.247894190546228e-06, "loss": 0.1740589141845703, "step": 4818 }, { "epoch": 0.6514913392479933, "grad_norm": 1.23405122756958, "learning_rate": 9.241573605450539e-06, "loss": 0.17757678031921387, "step": 4819 }, { "epoch": 0.6516265314744403, "grad_norm": 0.6931892037391663, "learning_rate": 9.235254219289937e-06, "loss": 0.12467765808105469, "step": 4820 }, { "epoch": 0.6517617237008873, "grad_norm": 1.4731919765472412, "learning_rate": 9.228936033380143e-06, "loss": 0.17371177673339844, "step": 4821 }, { "epoch": 0.6518969159273342, "grad_norm": 1.4253627061843872, "learning_rate": 9.222619049036649e-06, "loss": 0.2474212646484375, "step": 4822 }, { "epoch": 0.6520321081537812, "grad_norm": 1.13967764377594, "learning_rate": 9.216303267574674e-06, "loss": 0.2549152374267578, "step": 4823 }, { "epoch": 0.6521673003802282, "grad_norm": 0.86136394739151, "learning_rate": 9.209988690309198e-06, "loss": 0.1515655517578125, "step": 4824 }, { "epoch": 0.6523024926066752, "grad_norm": 1.041922688484192, "learning_rate": 9.203675318554956e-06, "loss": 0.14273762702941895, "step": 4825 }, { "epoch": 0.6524376848331221, "grad_norm": 1.1738505363464355, "learning_rate": 9.19736315362642e-06, "loss": 0.16986465454101562, "step": 4826 }, { "epoch": 0.6525728770595691, "grad_norm": 1.1166892051696777, "learning_rate": 9.191052196837825e-06, "loss": 0.1899890899658203, "step": 4827 }, { "epoch": 0.6527080692860161, "grad_norm": 0.7434604167938232, "learning_rate": 9.184742449503135e-06, "loss": 0.11754012107849121, "step": 4828 }, { "epoch": 0.6528432615124631, "grad_norm": 0.8548848628997803, "learning_rate": 9.178433912936077e-06, "loss": 0.18799066543579102, "step": 4829 }, { "epoch": 0.6529784537389101, "grad_norm": 1.4570649862289429, "learning_rate": 9.172126588450125e-06, "loss": 0.2090167999267578, "step": 4830 }, { "epoch": 0.653113645965357, "grad_norm": 1.1536593437194824, "learning_rate": 9.165820477358491e-06, "loss": 0.1553363800048828, "step": 4831 }, { "epoch": 0.653248838191804, "grad_norm": 2.8031346797943115, "learning_rate": 9.159515580974154e-06, "loss": 0.24471917748451233, "step": 4832 }, { "epoch": 0.653384030418251, "grad_norm": 0.8729983568191528, "learning_rate": 9.15321190060981e-06, "loss": 0.1508469581604004, "step": 4833 }, { "epoch": 0.653519222644698, "grad_norm": 0.9529052972793579, "learning_rate": 9.14690943757793e-06, "loss": 0.1912670135498047, "step": 4834 }, { "epoch": 0.653654414871145, "grad_norm": 1.3464492559432983, "learning_rate": 9.14060819319072e-06, "loss": 0.22151947021484375, "step": 4835 }, { "epoch": 0.6537896070975919, "grad_norm": 0.8646287322044373, "learning_rate": 9.134308168760127e-06, "loss": 0.15400314331054688, "step": 4836 }, { "epoch": 0.6539247993240389, "grad_norm": 0.8091931343078613, "learning_rate": 9.128009365597854e-06, "loss": 0.15532279014587402, "step": 4837 }, { "epoch": 0.6540599915504859, "grad_norm": 2.07232928276062, "learning_rate": 9.121711785015342e-06, "loss": 0.18629729747772217, "step": 4838 }, { "epoch": 0.6541951837769329, "grad_norm": 1.9756603240966797, "learning_rate": 9.115415428323787e-06, "loss": 0.23978805541992188, "step": 4839 }, { "epoch": 0.6543303760033798, "grad_norm": 1.6293244361877441, "learning_rate": 9.109120296834118e-06, "loss": 0.1784200668334961, "step": 4840 }, { "epoch": 0.6544655682298268, "grad_norm": 1.9594029188156128, "learning_rate": 9.10282639185702e-06, "loss": 0.17702198028564453, "step": 4841 }, { "epoch": 0.6546007604562738, "grad_norm": 1.0720446109771729, "learning_rate": 9.096533714702913e-06, "loss": 0.17490577697753906, "step": 4842 }, { "epoch": 0.6547359526827208, "grad_norm": 1.2960160970687866, "learning_rate": 9.090242266681967e-06, "loss": 0.15349197387695312, "step": 4843 }, { "epoch": 0.6548711449091678, "grad_norm": 1.0827581882476807, "learning_rate": 9.083952049104094e-06, "loss": 0.1511077880859375, "step": 4844 }, { "epoch": 0.6550063371356147, "grad_norm": 0.8267826437950134, "learning_rate": 9.07766306327896e-06, "loss": 0.19099807739257812, "step": 4845 }, { "epoch": 0.6551415293620617, "grad_norm": 0.8066474199295044, "learning_rate": 9.071375310515949e-06, "loss": 0.1575620174407959, "step": 4846 }, { "epoch": 0.6552767215885087, "grad_norm": 1.2896349430084229, "learning_rate": 9.065088792124219e-06, "loss": 0.1339585781097412, "step": 4847 }, { "epoch": 0.6554119138149557, "grad_norm": 2.021620512008667, "learning_rate": 9.058803509412647e-06, "loss": 0.19025659561157227, "step": 4848 }, { "epoch": 0.6555471060414026, "grad_norm": 1.1733931303024292, "learning_rate": 9.05251946368987e-06, "loss": 0.14709186553955078, "step": 4849 }, { "epoch": 0.6556822982678496, "grad_norm": 1.1147786378860474, "learning_rate": 9.046236656264258e-06, "loss": 0.17572641372680664, "step": 4850 }, { "epoch": 0.6558174904942966, "grad_norm": 1.1058241128921509, "learning_rate": 9.03995508844392e-06, "loss": 0.14606094360351562, "step": 4851 }, { "epoch": 0.6559526827207436, "grad_norm": 0.9976809024810791, "learning_rate": 9.033674761536718e-06, "loss": 0.14615631103515625, "step": 4852 }, { "epoch": 0.6560878749471906, "grad_norm": 1.7091542482376099, "learning_rate": 9.027395676850244e-06, "loss": 0.14660930633544922, "step": 4853 }, { "epoch": 0.6562230671736375, "grad_norm": 2.5047571659088135, "learning_rate": 9.02111783569184e-06, "loss": 0.2225649356842041, "step": 4854 }, { "epoch": 0.6563582594000845, "grad_norm": 0.7046009302139282, "learning_rate": 9.014841239368591e-06, "loss": 0.09901881217956543, "step": 4855 }, { "epoch": 0.6564934516265315, "grad_norm": 1.403560757637024, "learning_rate": 9.008565889187308e-06, "loss": 0.17946624755859375, "step": 4856 }, { "epoch": 0.6566286438529785, "grad_norm": 1.2385027408599854, "learning_rate": 9.00229178645456e-06, "loss": 0.24045753479003906, "step": 4857 }, { "epoch": 0.6567638360794255, "grad_norm": 1.1896494626998901, "learning_rate": 8.996018932476641e-06, "loss": 0.19264793395996094, "step": 4858 }, { "epoch": 0.6568990283058724, "grad_norm": 1.398219108581543, "learning_rate": 8.989747328559606e-06, "loss": 0.2144947052001953, "step": 4859 }, { "epoch": 0.6570342205323194, "grad_norm": 1.5924408435821533, "learning_rate": 8.98347697600922e-06, "loss": 0.2008814811706543, "step": 4860 }, { "epoch": 0.6571694127587664, "grad_norm": 0.7375935316085815, "learning_rate": 8.977207876131013e-06, "loss": 0.13837194442749023, "step": 4861 }, { "epoch": 0.6573046049852134, "grad_norm": 2.3050174713134766, "learning_rate": 8.970940030230245e-06, "loss": 0.2113971710205078, "step": 4862 }, { "epoch": 0.6574397972116603, "grad_norm": 1.702325463294983, "learning_rate": 8.96467343961191e-06, "loss": 0.17105674743652344, "step": 4863 }, { "epoch": 0.6575749894381073, "grad_norm": 1.138919711112976, "learning_rate": 8.958408105580759e-06, "loss": 0.1725482940673828, "step": 4864 }, { "epoch": 0.6577101816645543, "grad_norm": 1.8034353256225586, "learning_rate": 8.952144029441248e-06, "loss": 0.18815898895263672, "step": 4865 }, { "epoch": 0.6578453738910013, "grad_norm": 0.9733583927154541, "learning_rate": 8.945881212497603e-06, "loss": 0.17118453979492188, "step": 4866 }, { "epoch": 0.6579805661174483, "grad_norm": 0.9002045392990112, "learning_rate": 8.939619656053777e-06, "loss": 0.15566253662109375, "step": 4867 }, { "epoch": 0.6581157583438952, "grad_norm": 1.1197967529296875, "learning_rate": 8.933359361413456e-06, "loss": 0.13778018951416016, "step": 4868 }, { "epoch": 0.6582509505703422, "grad_norm": 0.8414996266365051, "learning_rate": 8.92710032988007e-06, "loss": 0.12005412578582764, "step": 4869 }, { "epoch": 0.6583861427967892, "grad_norm": 1.6632550954818726, "learning_rate": 8.920842562756773e-06, "loss": 0.14882755279541016, "step": 4870 }, { "epoch": 0.6585213350232362, "grad_norm": 1.0913041830062866, "learning_rate": 8.914586061346474e-06, "loss": 0.1559741497039795, "step": 4871 }, { "epoch": 0.6586565272496832, "grad_norm": 1.557337999343872, "learning_rate": 8.908330826951811e-06, "loss": 0.19379496574401855, "step": 4872 }, { "epoch": 0.6587917194761301, "grad_norm": 1.44503653049469, "learning_rate": 8.902076860875155e-06, "loss": 0.21662044525146484, "step": 4873 }, { "epoch": 0.6589269117025771, "grad_norm": 0.9538995027542114, "learning_rate": 8.895824164418615e-06, "loss": 0.1930559277534485, "step": 4874 }, { "epoch": 0.6590621039290241, "grad_norm": 1.781646966934204, "learning_rate": 8.889572738884033e-06, "loss": 0.1397991180419922, "step": 4875 }, { "epoch": 0.6591972961554711, "grad_norm": 1.3334068059921265, "learning_rate": 8.88332258557299e-06, "loss": 0.09713077545166016, "step": 4876 }, { "epoch": 0.659332488381918, "grad_norm": 0.8527290225028992, "learning_rate": 8.877073705786806e-06, "loss": 0.16060161590576172, "step": 4877 }, { "epoch": 0.659467680608365, "grad_norm": 1.114309549331665, "learning_rate": 8.870826100826527e-06, "loss": 0.21113228797912598, "step": 4878 }, { "epoch": 0.659602872834812, "grad_norm": 1.1392513513565063, "learning_rate": 8.86457977199294e-06, "loss": 0.16429519653320312, "step": 4879 }, { "epoch": 0.659738065061259, "grad_norm": 0.9237369298934937, "learning_rate": 8.85833472058656e-06, "loss": 0.16629791259765625, "step": 4880 }, { "epoch": 0.659873257287706, "grad_norm": 1.204338788986206, "learning_rate": 8.852090947907643e-06, "loss": 0.15123748779296875, "step": 4881 }, { "epoch": 0.6600084495141529, "grad_norm": 1.6933914422988892, "learning_rate": 8.84584845525618e-06, "loss": 0.21339750289916992, "step": 4882 }, { "epoch": 0.6601436417405999, "grad_norm": 1.514875054359436, "learning_rate": 8.83960724393188e-06, "loss": 0.143829345703125, "step": 4883 }, { "epoch": 0.6602788339670469, "grad_norm": 1.519263505935669, "learning_rate": 8.833367315234206e-06, "loss": 0.23859786987304688, "step": 4884 }, { "epoch": 0.6604140261934939, "grad_norm": 0.8517480492591858, "learning_rate": 8.82712867046234e-06, "loss": 0.14067935943603516, "step": 4885 }, { "epoch": 0.6605492184199409, "grad_norm": 1.4438402652740479, "learning_rate": 8.820891310915203e-06, "loss": 0.14026641845703125, "step": 4886 }, { "epoch": 0.6606844106463878, "grad_norm": 1.4862960577011108, "learning_rate": 8.81465523789145e-06, "loss": 0.17629718780517578, "step": 4887 }, { "epoch": 0.6608196028728348, "grad_norm": 0.6990572214126587, "learning_rate": 8.808420452689455e-06, "loss": 0.10553503036499023, "step": 4888 }, { "epoch": 0.6609547950992818, "grad_norm": 0.9661646485328674, "learning_rate": 8.802186956607344e-06, "loss": 0.15777969360351562, "step": 4889 }, { "epoch": 0.6610899873257288, "grad_norm": 1.2454967498779297, "learning_rate": 8.795954750942954e-06, "loss": 0.23040008544921875, "step": 4890 }, { "epoch": 0.6612251795521757, "grad_norm": 1.3803699016571045, "learning_rate": 8.789723836993878e-06, "loss": 0.1743154525756836, "step": 4891 }, { "epoch": 0.6613603717786227, "grad_norm": 1.5137220621109009, "learning_rate": 8.783494216057407e-06, "loss": 0.20363807678222656, "step": 4892 }, { "epoch": 0.6614955640050697, "grad_norm": 1.015931248664856, "learning_rate": 8.777265889430593e-06, "loss": 0.18856453895568848, "step": 4893 }, { "epoch": 0.6616307562315167, "grad_norm": 0.7822278738021851, "learning_rate": 8.771038858410206e-06, "loss": 0.1442861557006836, "step": 4894 }, { "epoch": 0.6617659484579637, "grad_norm": 1.3010385036468506, "learning_rate": 8.764813124292744e-06, "loss": 0.20556259155273438, "step": 4895 }, { "epoch": 0.6619011406844106, "grad_norm": 0.921440064907074, "learning_rate": 8.758588688374445e-06, "loss": 0.14670181274414062, "step": 4896 }, { "epoch": 0.6620363329108576, "grad_norm": 1.2484965324401855, "learning_rate": 8.752365551951262e-06, "loss": 0.20201778411865234, "step": 4897 }, { "epoch": 0.6621715251373046, "grad_norm": 1.240796685218811, "learning_rate": 8.74614371631888e-06, "loss": 0.15339231491088867, "step": 4898 }, { "epoch": 0.6623067173637516, "grad_norm": 1.7703746557235718, "learning_rate": 8.739923182772732e-06, "loss": 0.20116877555847168, "step": 4899 }, { "epoch": 0.6624419095901986, "grad_norm": 1.1591278314590454, "learning_rate": 8.733703952607956e-06, "loss": 0.21904993057250977, "step": 4900 }, { "epoch": 0.6625771018166455, "grad_norm": 1.3635591268539429, "learning_rate": 8.727486027119443e-06, "loss": 0.19544029235839844, "step": 4901 }, { "epoch": 0.6627122940430925, "grad_norm": 1.079126000404358, "learning_rate": 8.721269407601783e-06, "loss": 0.15197277069091797, "step": 4902 }, { "epoch": 0.6628474862695395, "grad_norm": 1.6907446384429932, "learning_rate": 8.71505409534931e-06, "loss": 0.1521596908569336, "step": 4903 }, { "epoch": 0.6629826784959865, "grad_norm": 1.5907058715820312, "learning_rate": 8.708840091656093e-06, "loss": 0.2548694610595703, "step": 4904 }, { "epoch": 0.6631178707224334, "grad_norm": 1.1447182893753052, "learning_rate": 8.70262739781592e-06, "loss": 0.17750167846679688, "step": 4905 }, { "epoch": 0.6632530629488804, "grad_norm": 0.8437352776527405, "learning_rate": 8.696416015122302e-06, "loss": 0.14332294464111328, "step": 4906 }, { "epoch": 0.6633882551753274, "grad_norm": 0.9016723036766052, "learning_rate": 8.690205944868487e-06, "loss": 0.18363571166992188, "step": 4907 }, { "epoch": 0.6635234474017744, "grad_norm": 0.9682764410972595, "learning_rate": 8.683997188347436e-06, "loss": 0.151594877243042, "step": 4908 }, { "epoch": 0.6636586396282214, "grad_norm": 1.3980848789215088, "learning_rate": 8.677789746851855e-06, "loss": 0.12291526794433594, "step": 4909 }, { "epoch": 0.6637938318546683, "grad_norm": 0.981627881526947, "learning_rate": 8.671583621674167e-06, "loss": 0.18784523010253906, "step": 4910 }, { "epoch": 0.6639290240811153, "grad_norm": 0.7934214472770691, "learning_rate": 8.665378814106512e-06, "loss": 0.1471269130706787, "step": 4911 }, { "epoch": 0.6640642163075623, "grad_norm": 0.8068694472312927, "learning_rate": 8.65917532544077e-06, "loss": 0.16522598266601562, "step": 4912 }, { "epoch": 0.6641994085340093, "grad_norm": 2.6476728916168213, "learning_rate": 8.652973156968532e-06, "loss": 0.18506383895874023, "step": 4913 }, { "epoch": 0.6643346007604563, "grad_norm": 1.2200329303741455, "learning_rate": 8.646772309981141e-06, "loss": 0.1431293487548828, "step": 4914 }, { "epoch": 0.6644697929869032, "grad_norm": 0.896338939666748, "learning_rate": 8.640572785769624e-06, "loss": 0.1825408935546875, "step": 4915 }, { "epoch": 0.6646049852133502, "grad_norm": 1.2054084539413452, "learning_rate": 8.63437458562477e-06, "loss": 0.19852542877197266, "step": 4916 }, { "epoch": 0.6647401774397972, "grad_norm": 0.9763993620872498, "learning_rate": 8.628177710837068e-06, "loss": 0.15083742141723633, "step": 4917 }, { "epoch": 0.6648753696662442, "grad_norm": 1.5249390602111816, "learning_rate": 8.621982162696752e-06, "loss": 0.19697189331054688, "step": 4918 }, { "epoch": 0.6650105618926911, "grad_norm": 1.2886157035827637, "learning_rate": 8.615787942493766e-06, "loss": 0.1869983673095703, "step": 4919 }, { "epoch": 0.6651457541191381, "grad_norm": 0.8438150882720947, "learning_rate": 8.609595051517765e-06, "loss": 0.16390323638916016, "step": 4920 }, { "epoch": 0.6652809463455851, "grad_norm": 1.4547886848449707, "learning_rate": 8.603403491058157e-06, "loss": 0.2007460594177246, "step": 4921 }, { "epoch": 0.6654161385720321, "grad_norm": 1.0295220613479614, "learning_rate": 8.597213262404046e-06, "loss": 0.1322331428527832, "step": 4922 }, { "epoch": 0.6655513307984791, "grad_norm": 2.981210470199585, "learning_rate": 8.591024366844291e-06, "loss": 0.2532529830932617, "step": 4923 }, { "epoch": 0.665686523024926, "grad_norm": 2.019127607345581, "learning_rate": 8.584836805667434e-06, "loss": 0.18584585189819336, "step": 4924 }, { "epoch": 0.665821715251373, "grad_norm": 0.7941632270812988, "learning_rate": 8.578650580161754e-06, "loss": 0.18793773651123047, "step": 4925 }, { "epoch": 0.66595690747782, "grad_norm": 1.9173439741134644, "learning_rate": 8.572465691615275e-06, "loss": 0.2226409912109375, "step": 4926 }, { "epoch": 0.666092099704267, "grad_norm": 0.9492316246032715, "learning_rate": 8.56628214131571e-06, "loss": 0.10794901847839355, "step": 4927 }, { "epoch": 0.666227291930714, "grad_norm": 1.4376837015151978, "learning_rate": 8.560099930550523e-06, "loss": 0.17420196533203125, "step": 4928 }, { "epoch": 0.6663624841571609, "grad_norm": 2.9954895973205566, "learning_rate": 8.553919060606866e-06, "loss": 0.19881820678710938, "step": 4929 }, { "epoch": 0.6664976763836079, "grad_norm": 1.1781864166259766, "learning_rate": 8.54773953277163e-06, "loss": 0.20312881469726562, "step": 4930 }, { "epoch": 0.6666328686100549, "grad_norm": 0.8118711709976196, "learning_rate": 8.541561348331433e-06, "loss": 0.11457443237304688, "step": 4931 }, { "epoch": 0.6667680608365019, "grad_norm": 0.7794628143310547, "learning_rate": 8.535384508572603e-06, "loss": 0.15274116396903992, "step": 4932 }, { "epoch": 0.6669032530629488, "grad_norm": 1.098728895187378, "learning_rate": 8.529209014781202e-06, "loss": 0.1983509063720703, "step": 4933 }, { "epoch": 0.6670384452893958, "grad_norm": 2.0450127124786377, "learning_rate": 8.523034868242984e-06, "loss": 0.25242042541503906, "step": 4934 }, { "epoch": 0.6671736375158428, "grad_norm": 1.6885724067687988, "learning_rate": 8.51686207024344e-06, "loss": 0.13765239715576172, "step": 4935 }, { "epoch": 0.6673088297422898, "grad_norm": 1.2371320724487305, "learning_rate": 8.510690622067792e-06, "loss": 0.13949227333068848, "step": 4936 }, { "epoch": 0.6674440219687368, "grad_norm": 1.3392064571380615, "learning_rate": 8.50452052500096e-06, "loss": 0.18907546997070312, "step": 4937 }, { "epoch": 0.6675792141951837, "grad_norm": 1.7679469585418701, "learning_rate": 8.498351780327594e-06, "loss": 0.16900354623794556, "step": 4938 }, { "epoch": 0.6677144064216307, "grad_norm": 0.8894747495651245, "learning_rate": 8.492184389332061e-06, "loss": 0.16308212280273438, "step": 4939 }, { "epoch": 0.6678495986480777, "grad_norm": 1.581634521484375, "learning_rate": 8.486018353298432e-06, "loss": 0.18794822692871094, "step": 4940 }, { "epoch": 0.6679847908745247, "grad_norm": 1.3632307052612305, "learning_rate": 8.479853673510528e-06, "loss": 0.1606616973876953, "step": 4941 }, { "epoch": 0.6681199831009716, "grad_norm": 1.3677546977996826, "learning_rate": 8.473690351251855e-06, "loss": 0.22364234924316406, "step": 4942 }, { "epoch": 0.6682551753274186, "grad_norm": 1.4845232963562012, "learning_rate": 8.467528387805656e-06, "loss": 0.14280271530151367, "step": 4943 }, { "epoch": 0.6683903675538656, "grad_norm": 1.0892442464828491, "learning_rate": 8.461367784454881e-06, "loss": 0.1562657356262207, "step": 4944 }, { "epoch": 0.6685255597803126, "grad_norm": 0.8801248669624329, "learning_rate": 8.455208542482195e-06, "loss": 0.18584251403808594, "step": 4945 }, { "epoch": 0.6686607520067596, "grad_norm": 0.8785949945449829, "learning_rate": 8.449050663170004e-06, "loss": 0.1080223023891449, "step": 4946 }, { "epoch": 0.6687959442332065, "grad_norm": 1.0281901359558105, "learning_rate": 8.442894147800387e-06, "loss": 0.13111495971679688, "step": 4947 }, { "epoch": 0.6689311364596535, "grad_norm": 1.1611011028289795, "learning_rate": 8.436738997655184e-06, "loss": 0.17256391048431396, "step": 4948 }, { "epoch": 0.6690663286861005, "grad_norm": 1.9226800203323364, "learning_rate": 8.430585214015918e-06, "loss": 0.1781473159790039, "step": 4949 }, { "epoch": 0.6692015209125475, "grad_norm": 1.2277345657348633, "learning_rate": 8.424432798163838e-06, "loss": 0.1698395013809204, "step": 4950 }, { "epoch": 0.6693367131389945, "grad_norm": 1.5692715644836426, "learning_rate": 8.418281751379926e-06, "loss": 0.15980815887451172, "step": 4951 }, { "epoch": 0.6694719053654414, "grad_norm": 1.5422580242156982, "learning_rate": 8.41213207494484e-06, "loss": 0.17629623413085938, "step": 4952 }, { "epoch": 0.6696070975918884, "grad_norm": 1.3754093647003174, "learning_rate": 8.405983770138992e-06, "loss": 0.11031877994537354, "step": 4953 }, { "epoch": 0.6697422898183355, "grad_norm": 1.1165724992752075, "learning_rate": 8.399836838242479e-06, "loss": 0.16813087463378906, "step": 4954 }, { "epoch": 0.6698774820447825, "grad_norm": 1.586594581604004, "learning_rate": 8.393691280535143e-06, "loss": 0.19069862365722656, "step": 4955 }, { "epoch": 0.6700126742712295, "grad_norm": 1.0211690664291382, "learning_rate": 8.387547098296516e-06, "loss": 0.14371871948242188, "step": 4956 }, { "epoch": 0.6701478664976764, "grad_norm": 0.7847884297370911, "learning_rate": 8.38140429280583e-06, "loss": 0.1388864517211914, "step": 4957 }, { "epoch": 0.6702830587241234, "grad_norm": 2.0592474937438965, "learning_rate": 8.375262865342073e-06, "loss": 0.21479415893554688, "step": 4958 }, { "epoch": 0.6704182509505704, "grad_norm": 0.9477532505989075, "learning_rate": 8.36912281718391e-06, "loss": 0.18639659881591797, "step": 4959 }, { "epoch": 0.6705534431770174, "grad_norm": 0.7580112218856812, "learning_rate": 8.362984149609748e-06, "loss": 0.1933574676513672, "step": 4960 }, { "epoch": 0.6706886354034644, "grad_norm": 2.17968487739563, "learning_rate": 8.356846863897672e-06, "loss": 0.18117523193359375, "step": 4961 }, { "epoch": 0.6708238276299113, "grad_norm": 1.4100232124328613, "learning_rate": 8.350710961325498e-06, "loss": 0.22339344024658203, "step": 4962 }, { "epoch": 0.6709590198563583, "grad_norm": 0.8292788863182068, "learning_rate": 8.344576443170768e-06, "loss": 0.1196737289428711, "step": 4963 }, { "epoch": 0.6710942120828053, "grad_norm": 1.2195991277694702, "learning_rate": 8.338443310710708e-06, "loss": 0.1936655044555664, "step": 4964 }, { "epoch": 0.6712294043092523, "grad_norm": 2.9747374057769775, "learning_rate": 8.332311565222284e-06, "loss": 0.24501800537109375, "step": 4965 }, { "epoch": 0.6713645965356992, "grad_norm": 2.13307523727417, "learning_rate": 8.326181207982145e-06, "loss": 0.22800636291503906, "step": 4966 }, { "epoch": 0.6714997887621462, "grad_norm": 1.2976917028427124, "learning_rate": 8.32005224026666e-06, "loss": 0.14375638961791992, "step": 4967 }, { "epoch": 0.6716349809885932, "grad_norm": 1.534257411956787, "learning_rate": 8.313924663351927e-06, "loss": 0.2206401824951172, "step": 4968 }, { "epoch": 0.6717701732150402, "grad_norm": 0.9613286852836609, "learning_rate": 8.307798478513733e-06, "loss": 0.16698837280273438, "step": 4969 }, { "epoch": 0.6719053654414872, "grad_norm": 0.8958679437637329, "learning_rate": 8.301673687027583e-06, "loss": 0.20035743713378906, "step": 4970 }, { "epoch": 0.6720405576679341, "grad_norm": 0.5598917603492737, "learning_rate": 8.295550290168692e-06, "loss": 0.08476781845092773, "step": 4971 }, { "epoch": 0.6721757498943811, "grad_norm": 1.2285544872283936, "learning_rate": 8.289428289211977e-06, "loss": 0.15395641326904297, "step": 4972 }, { "epoch": 0.6723109421208281, "grad_norm": 2.167975425720215, "learning_rate": 8.283307685432083e-06, "loss": 0.1150202751159668, "step": 4973 }, { "epoch": 0.6724461343472751, "grad_norm": 0.7897242903709412, "learning_rate": 8.277188480103348e-06, "loss": 0.1756892204284668, "step": 4974 }, { "epoch": 0.672581326573722, "grad_norm": 0.9730179309844971, "learning_rate": 8.271070674499821e-06, "loss": 0.1560632884502411, "step": 4975 }, { "epoch": 0.672716518800169, "grad_norm": 1.4615154266357422, "learning_rate": 8.264954269895262e-06, "loss": 0.17297935485839844, "step": 4976 }, { "epoch": 0.672851711026616, "grad_norm": 2.755807876586914, "learning_rate": 8.258839267563134e-06, "loss": 0.23811721801757812, "step": 4977 }, { "epoch": 0.672986903253063, "grad_norm": 1.1392247676849365, "learning_rate": 8.252725668776623e-06, "loss": 0.16143178939819336, "step": 4978 }, { "epoch": 0.67312209547951, "grad_norm": 1.0451160669326782, "learning_rate": 8.24661347480861e-06, "loss": 0.19554519653320312, "step": 4979 }, { "epoch": 0.6732572877059569, "grad_norm": 1.4334434270858765, "learning_rate": 8.240502686931686e-06, "loss": 0.2019824981689453, "step": 4980 }, { "epoch": 0.6733924799324039, "grad_norm": 1.269405484199524, "learning_rate": 8.234393306418148e-06, "loss": 0.15655136108398438, "step": 4981 }, { "epoch": 0.6735276721588509, "grad_norm": 1.2012618780136108, "learning_rate": 8.228285334539995e-06, "loss": 0.14350223541259766, "step": 4982 }, { "epoch": 0.6736628643852979, "grad_norm": 0.9708613753318787, "learning_rate": 8.22217877256896e-06, "loss": 0.1885991096496582, "step": 4983 }, { "epoch": 0.6737980566117449, "grad_norm": 1.4719375371932983, "learning_rate": 8.216073621776436e-06, "loss": 0.16818499565124512, "step": 4984 }, { "epoch": 0.6739332488381918, "grad_norm": 0.8668748736381531, "learning_rate": 8.209969883433566e-06, "loss": 0.1496891975402832, "step": 4985 }, { "epoch": 0.6740684410646388, "grad_norm": 0.7099021673202515, "learning_rate": 8.203867558811177e-06, "loss": 0.15021228790283203, "step": 4986 }, { "epoch": 0.6742036332910858, "grad_norm": 2.1604654788970947, "learning_rate": 8.197766649179795e-06, "loss": 0.1982102394104004, "step": 4987 }, { "epoch": 0.6743388255175328, "grad_norm": 1.5797946453094482, "learning_rate": 8.191667155809684e-06, "loss": 0.22808456420898438, "step": 4988 }, { "epoch": 0.6744740177439797, "grad_norm": 1.4254366159439087, "learning_rate": 8.185569079970764e-06, "loss": 0.13309884071350098, "step": 4989 }, { "epoch": 0.6746092099704267, "grad_norm": 1.9066100120544434, "learning_rate": 8.179472422932709e-06, "loss": 0.18539905548095703, "step": 4990 }, { "epoch": 0.6747444021968737, "grad_norm": 0.628756046295166, "learning_rate": 8.17337718596486e-06, "loss": 0.11668860912322998, "step": 4991 }, { "epoch": 0.6748795944233207, "grad_norm": 0.9980939030647278, "learning_rate": 8.167283370336295e-06, "loss": 0.1680002212524414, "step": 4992 }, { "epoch": 0.6750147866497677, "grad_norm": 1.0438536405563354, "learning_rate": 8.161190977315766e-06, "loss": 0.18843841552734375, "step": 4993 }, { "epoch": 0.6751499788762146, "grad_norm": 0.9829400181770325, "learning_rate": 8.155100008171736e-06, "loss": 0.1763577163219452, "step": 4994 }, { "epoch": 0.6752851711026616, "grad_norm": 0.8536770939826965, "learning_rate": 8.149010464172392e-06, "loss": 0.14682388305664062, "step": 4995 }, { "epoch": 0.6754203633291086, "grad_norm": 1.2554261684417725, "learning_rate": 8.142922346585597e-06, "loss": 0.2022233009338379, "step": 4996 }, { "epoch": 0.6755555555555556, "grad_norm": 0.8678463697433472, "learning_rate": 8.13683565667895e-06, "loss": 0.18359661102294922, "step": 4997 }, { "epoch": 0.6756907477820026, "grad_norm": 1.1854984760284424, "learning_rate": 8.13075039571971e-06, "loss": 0.1778567135334015, "step": 4998 }, { "epoch": 0.6758259400084495, "grad_norm": 1.0105594396591187, "learning_rate": 8.124666564974864e-06, "loss": 0.12804698944091797, "step": 4999 }, { "epoch": 0.6759611322348965, "grad_norm": 2.407771587371826, "learning_rate": 8.11858416571111e-06, "loss": 0.20404052734375, "step": 5000 }, { "epoch": 0.6760963244613435, "grad_norm": 0.7561890482902527, "learning_rate": 8.112503199194821e-06, "loss": 0.15114164352416992, "step": 5001 }, { "epoch": 0.6762315166877905, "grad_norm": 1.409066081047058, "learning_rate": 8.106423666692108e-06, "loss": 0.1584477424621582, "step": 5002 }, { "epoch": 0.6763667089142374, "grad_norm": 1.0179040431976318, "learning_rate": 8.100345569468742e-06, "loss": 0.1906733512878418, "step": 5003 }, { "epoch": 0.6765019011406844, "grad_norm": 0.9477241635322571, "learning_rate": 8.094268908790215e-06, "loss": 0.19038772583007812, "step": 5004 }, { "epoch": 0.6766370933671314, "grad_norm": 1.7646867036819458, "learning_rate": 8.088193685921733e-06, "loss": 0.2060375213623047, "step": 5005 }, { "epoch": 0.6767722855935784, "grad_norm": 0.7761819362640381, "learning_rate": 8.082119902128185e-06, "loss": 0.12443733215332031, "step": 5006 }, { "epoch": 0.6769074778200254, "grad_norm": 0.9273766875267029, "learning_rate": 8.076047558674164e-06, "loss": 0.1891918182373047, "step": 5007 }, { "epoch": 0.6770426700464723, "grad_norm": 1.034024953842163, "learning_rate": 8.069976656823964e-06, "loss": 0.1491626501083374, "step": 5008 }, { "epoch": 0.6771778622729193, "grad_norm": 1.2532517910003662, "learning_rate": 8.063907197841574e-06, "loss": 0.1720867156982422, "step": 5009 }, { "epoch": 0.6773130544993663, "grad_norm": 0.8703910708427429, "learning_rate": 8.057839182990698e-06, "loss": 0.1313270926475525, "step": 5010 }, { "epoch": 0.6774482467258133, "grad_norm": 2.6156110763549805, "learning_rate": 8.051772613534725e-06, "loss": 0.18322277069091797, "step": 5011 }, { "epoch": 0.6775834389522603, "grad_norm": 0.9662993550300598, "learning_rate": 8.045707490736745e-06, "loss": 0.1251966953277588, "step": 5012 }, { "epoch": 0.6777186311787072, "grad_norm": 2.5010697841644287, "learning_rate": 8.039643815859552e-06, "loss": 0.253537654876709, "step": 5013 }, { "epoch": 0.6778538234051542, "grad_norm": 1.1776468753814697, "learning_rate": 8.033581590165627e-06, "loss": 0.19998162984848022, "step": 5014 }, { "epoch": 0.6779890156316012, "grad_norm": 1.942829966545105, "learning_rate": 8.027520814917175e-06, "loss": 0.24843978881835938, "step": 5015 }, { "epoch": 0.6781242078580482, "grad_norm": 1.1005936861038208, "learning_rate": 8.021461491376064e-06, "loss": 0.21546363830566406, "step": 5016 }, { "epoch": 0.6782594000844951, "grad_norm": 0.8666654229164124, "learning_rate": 8.015403620803885e-06, "loss": 0.14625072479248047, "step": 5017 }, { "epoch": 0.6783945923109421, "grad_norm": 0.8627327680587769, "learning_rate": 8.009347204461922e-06, "loss": 0.20458602905273438, "step": 5018 }, { "epoch": 0.6785297845373891, "grad_norm": 1.170483112335205, "learning_rate": 8.003292243611143e-06, "loss": 0.17746615409851074, "step": 5019 }, { "epoch": 0.6786649767638361, "grad_norm": 1.3867892026901245, "learning_rate": 7.99723873951224e-06, "loss": 0.18867969512939453, "step": 5020 }, { "epoch": 0.6788001689902831, "grad_norm": 1.124878168106079, "learning_rate": 7.991186693425563e-06, "loss": 0.13965702056884766, "step": 5021 }, { "epoch": 0.67893536121673, "grad_norm": 0.8524463772773743, "learning_rate": 7.9851361066112e-06, "loss": 0.17600560188293457, "step": 5022 }, { "epoch": 0.679070553443177, "grad_norm": 0.7314344048500061, "learning_rate": 7.979086980328907e-06, "loss": 0.10720205307006836, "step": 5023 }, { "epoch": 0.679205745669624, "grad_norm": 0.6285949349403381, "learning_rate": 7.973039315838137e-06, "loss": 0.08309197425842285, "step": 5024 }, { "epoch": 0.679340937896071, "grad_norm": 1.6278624534606934, "learning_rate": 7.966993114398067e-06, "loss": 0.19066143035888672, "step": 5025 }, { "epoch": 0.679476130122518, "grad_norm": 1.0072762966156006, "learning_rate": 7.960948377267524e-06, "loss": 0.18010520935058594, "step": 5026 }, { "epoch": 0.6796113223489649, "grad_norm": 1.5092215538024902, "learning_rate": 7.954905105705071e-06, "loss": 0.16632843017578125, "step": 5027 }, { "epoch": 0.6797465145754119, "grad_norm": 1.8192689418792725, "learning_rate": 7.948863300968938e-06, "loss": 0.2074413299560547, "step": 5028 }, { "epoch": 0.6798817068018589, "grad_norm": 1.9579755067825317, "learning_rate": 7.942822964317078e-06, "loss": 0.1721668243408203, "step": 5029 }, { "epoch": 0.6800168990283059, "grad_norm": 1.0593005418777466, "learning_rate": 7.936784097007105e-06, "loss": 0.1670999526977539, "step": 5030 }, { "epoch": 0.6801520912547528, "grad_norm": 1.8515827655792236, "learning_rate": 7.930746700296344e-06, "loss": 0.20845794677734375, "step": 5031 }, { "epoch": 0.6802872834811998, "grad_norm": 0.953555703163147, "learning_rate": 7.924710775441822e-06, "loss": 0.1791372299194336, "step": 5032 }, { "epoch": 0.6804224757076468, "grad_norm": 0.9557312726974487, "learning_rate": 7.918676323700241e-06, "loss": 0.15536212921142578, "step": 5033 }, { "epoch": 0.6805576679340938, "grad_norm": 1.7691465616226196, "learning_rate": 7.912643346328023e-06, "loss": 0.15507960319519043, "step": 5034 }, { "epoch": 0.6806928601605408, "grad_norm": 1.0645300149917603, "learning_rate": 7.906611844581251e-06, "loss": 0.10835909843444824, "step": 5035 }, { "epoch": 0.6808280523869877, "grad_norm": 1.416193962097168, "learning_rate": 7.900581819715713e-06, "loss": 0.181915283203125, "step": 5036 }, { "epoch": 0.6809632446134347, "grad_norm": 1.3164618015289307, "learning_rate": 7.894553272986901e-06, "loss": 0.19794172048568726, "step": 5037 }, { "epoch": 0.6810984368398817, "grad_norm": 0.9206435680389404, "learning_rate": 7.888526205649993e-06, "loss": 0.17533397674560547, "step": 5038 }, { "epoch": 0.6812336290663287, "grad_norm": 1.720955491065979, "learning_rate": 7.882500618959849e-06, "loss": 0.20714569091796875, "step": 5039 }, { "epoch": 0.6813688212927757, "grad_norm": 0.8436741828918457, "learning_rate": 7.876476514171033e-06, "loss": 0.21134567260742188, "step": 5040 }, { "epoch": 0.6815040135192226, "grad_norm": 1.3161191940307617, "learning_rate": 7.870453892537788e-06, "loss": 0.15209579467773438, "step": 5041 }, { "epoch": 0.6816392057456696, "grad_norm": 1.0485806465148926, "learning_rate": 7.864432755314068e-06, "loss": 0.13231611251831055, "step": 5042 }, { "epoch": 0.6817743979721166, "grad_norm": 1.1092981100082397, "learning_rate": 7.858413103753499e-06, "loss": 0.17936325073242188, "step": 5043 }, { "epoch": 0.6819095901985636, "grad_norm": 1.2274843454360962, "learning_rate": 7.852394939109408e-06, "loss": 0.176383376121521, "step": 5044 }, { "epoch": 0.6820447824250105, "grad_norm": 1.3140506744384766, "learning_rate": 7.846378262634803e-06, "loss": 0.16873395442962646, "step": 5045 }, { "epoch": 0.6821799746514575, "grad_norm": 0.6792768836021423, "learning_rate": 7.840363075582385e-06, "loss": 0.15387141704559326, "step": 5046 }, { "epoch": 0.6823151668779045, "grad_norm": 0.7771419286727905, "learning_rate": 7.834349379204565e-06, "loss": 0.1563504934310913, "step": 5047 }, { "epoch": 0.6824503591043515, "grad_norm": 0.9997833967208862, "learning_rate": 7.828337174753411e-06, "loss": 0.14500713348388672, "step": 5048 }, { "epoch": 0.6825855513307985, "grad_norm": 2.215217351913452, "learning_rate": 7.822326463480703e-06, "loss": 0.23760569095611572, "step": 5049 }, { "epoch": 0.6827207435572454, "grad_norm": 1.3574687242507935, "learning_rate": 7.816317246637901e-06, "loss": 0.19251084327697754, "step": 5050 }, { "epoch": 0.6828559357836924, "grad_norm": 2.2461235523223877, "learning_rate": 7.810309525476152e-06, "loss": 0.23035430908203125, "step": 5051 }, { "epoch": 0.6829911280101394, "grad_norm": 0.5272597670555115, "learning_rate": 7.804303301246311e-06, "loss": 0.1177072525024414, "step": 5052 }, { "epoch": 0.6831263202365864, "grad_norm": 1.1031173467636108, "learning_rate": 7.798298575198884e-06, "loss": 0.18041610717773438, "step": 5053 }, { "epoch": 0.6832615124630333, "grad_norm": 0.86440509557724, "learning_rate": 7.792295348584103e-06, "loss": 0.19728469848632812, "step": 5054 }, { "epoch": 0.6833967046894803, "grad_norm": 1.4030793905258179, "learning_rate": 7.786293622651866e-06, "loss": 0.21994972229003906, "step": 5055 }, { "epoch": 0.6835318969159273, "grad_norm": 1.1506909132003784, "learning_rate": 7.78029339865176e-06, "loss": 0.1763622760772705, "step": 5056 }, { "epoch": 0.6836670891423743, "grad_norm": 1.2515126466751099, "learning_rate": 7.774294677833078e-06, "loss": 0.1659860610961914, "step": 5057 }, { "epoch": 0.6838022813688213, "grad_norm": 1.3858851194381714, "learning_rate": 7.768297461444766e-06, "loss": 0.19391918182373047, "step": 5058 }, { "epoch": 0.6839374735952682, "grad_norm": 2.3579061031341553, "learning_rate": 7.762301750735494e-06, "loss": 0.1669478416442871, "step": 5059 }, { "epoch": 0.6840726658217152, "grad_norm": 1.0830591917037964, "learning_rate": 7.756307546953592e-06, "loss": 0.1611347198486328, "step": 5060 }, { "epoch": 0.6842078580481622, "grad_norm": 1.2107855081558228, "learning_rate": 7.750314851347087e-06, "loss": 0.21772003173828125, "step": 5061 }, { "epoch": 0.6843430502746092, "grad_norm": 1.972565770149231, "learning_rate": 7.74432366516369e-06, "loss": 0.1912527084350586, "step": 5062 }, { "epoch": 0.6844782425010562, "grad_norm": 1.1781563758850098, "learning_rate": 7.738333989650794e-06, "loss": 0.13153600692749023, "step": 5063 }, { "epoch": 0.6846134347275031, "grad_norm": 0.6033469438552856, "learning_rate": 7.732345826055487e-06, "loss": 0.1273174285888672, "step": 5064 }, { "epoch": 0.6847486269539501, "grad_norm": 0.9970125555992126, "learning_rate": 7.726359175624537e-06, "loss": 0.1446322202682495, "step": 5065 }, { "epoch": 0.6848838191803971, "grad_norm": 1.2162835597991943, "learning_rate": 7.720374039604395e-06, "loss": 0.1799755096435547, "step": 5066 }, { "epoch": 0.6850190114068441, "grad_norm": 2.279236078262329, "learning_rate": 7.714390419241198e-06, "loss": 0.18764285743236542, "step": 5067 }, { "epoch": 0.685154203633291, "grad_norm": 1.2881633043289185, "learning_rate": 7.70840831578076e-06, "loss": 0.17615699768066406, "step": 5068 }, { "epoch": 0.685289395859738, "grad_norm": 0.8145703673362732, "learning_rate": 7.702427730468601e-06, "loss": 0.1748189926147461, "step": 5069 }, { "epoch": 0.685424588086185, "grad_norm": 0.9014973640441895, "learning_rate": 7.696448664549898e-06, "loss": 0.20568609237670898, "step": 5070 }, { "epoch": 0.685559780312632, "grad_norm": 2.335993766784668, "learning_rate": 7.690471119269541e-06, "loss": 0.19422391057014465, "step": 5071 }, { "epoch": 0.685694972539079, "grad_norm": 1.0950287580490112, "learning_rate": 7.684495095872073e-06, "loss": 0.17508697509765625, "step": 5072 }, { "epoch": 0.6858301647655259, "grad_norm": 1.2751617431640625, "learning_rate": 7.678520595601728e-06, "loss": 0.16922283172607422, "step": 5073 }, { "epoch": 0.6859653569919729, "grad_norm": 2.023818254470825, "learning_rate": 7.672547619702445e-06, "loss": 0.18618106842041016, "step": 5074 }, { "epoch": 0.6861005492184199, "grad_norm": 1.076277494430542, "learning_rate": 7.666576169417823e-06, "loss": 0.16872596740722656, "step": 5075 }, { "epoch": 0.6862357414448669, "grad_norm": 1.1945850849151611, "learning_rate": 7.660606245991147e-06, "loss": 0.13384151458740234, "step": 5076 }, { "epoch": 0.6863709336713139, "grad_norm": 1.4496071338653564, "learning_rate": 7.654637850665393e-06, "loss": 0.18155241012573242, "step": 5077 }, { "epoch": 0.6865061258977608, "grad_norm": 1.0253264904022217, "learning_rate": 7.648670984683199e-06, "loss": 0.16184139251708984, "step": 5078 }, { "epoch": 0.6866413181242078, "grad_norm": 0.8470093607902527, "learning_rate": 7.642705649286916e-06, "loss": 0.1437368392944336, "step": 5079 }, { "epoch": 0.6867765103506548, "grad_norm": 0.615933358669281, "learning_rate": 7.63674184571855e-06, "loss": 0.1031641960144043, "step": 5080 }, { "epoch": 0.6869117025771018, "grad_norm": 0.9475538730621338, "learning_rate": 7.630779575219797e-06, "loss": 0.18244290351867676, "step": 5081 }, { "epoch": 0.6870468948035487, "grad_norm": 0.6023814082145691, "learning_rate": 7.6248188390320344e-06, "loss": 0.11127090454101562, "step": 5082 }, { "epoch": 0.6871820870299957, "grad_norm": 0.9950649738311768, "learning_rate": 7.6188596383963135e-06, "loss": 0.12942171096801758, "step": 5083 }, { "epoch": 0.6873172792564427, "grad_norm": 1.4469207525253296, "learning_rate": 7.612901974553388e-06, "loss": 0.1913074254989624, "step": 5084 }, { "epoch": 0.6874524714828897, "grad_norm": 1.0802984237670898, "learning_rate": 7.606945848743653e-06, "loss": 0.14243316650390625, "step": 5085 }, { "epoch": 0.6875876637093367, "grad_norm": 1.1903811693191528, "learning_rate": 7.600991262207221e-06, "loss": 0.18616104125976562, "step": 5086 }, { "epoch": 0.6877228559357836, "grad_norm": 1.2925255298614502, "learning_rate": 7.595038216183867e-06, "loss": 0.15991497039794922, "step": 5087 }, { "epoch": 0.6878580481622306, "grad_norm": 0.7834618091583252, "learning_rate": 7.589086711913037e-06, "loss": 0.11574745178222656, "step": 5088 }, { "epoch": 0.6879932403886776, "grad_norm": 0.7726534008979797, "learning_rate": 7.583136750633885e-06, "loss": 0.15797996520996094, "step": 5089 }, { "epoch": 0.6881284326151247, "grad_norm": 1.7054098844528198, "learning_rate": 7.577188333585202e-06, "loss": 0.18839263916015625, "step": 5090 }, { "epoch": 0.6882636248415717, "grad_norm": 1.546455979347229, "learning_rate": 7.5712414620054975e-06, "loss": 0.18819141387939453, "step": 5091 }, { "epoch": 0.6883988170680186, "grad_norm": 3.0678529739379883, "learning_rate": 7.565296137132935e-06, "loss": 0.2936263084411621, "step": 5092 }, { "epoch": 0.6885340092944656, "grad_norm": 1.0985461473464966, "learning_rate": 7.559352360205357e-06, "loss": 0.20450496673583984, "step": 5093 }, { "epoch": 0.6886692015209126, "grad_norm": 0.9503136873245239, "learning_rate": 7.553410132460308e-06, "loss": 0.20096588134765625, "step": 5094 }, { "epoch": 0.6888043937473596, "grad_norm": 0.8361325263977051, "learning_rate": 7.547469455134968e-06, "loss": 0.1712038516998291, "step": 5095 }, { "epoch": 0.6889395859738066, "grad_norm": 1.18292236328125, "learning_rate": 7.541530329466236e-06, "loss": 0.19519996643066406, "step": 5096 }, { "epoch": 0.6890747782002535, "grad_norm": 1.4044870138168335, "learning_rate": 7.535592756690661e-06, "loss": 0.15503311157226562, "step": 5097 }, { "epoch": 0.6892099704267005, "grad_norm": 0.6633076667785645, "learning_rate": 7.52965673804448e-06, "loss": 0.1333456039428711, "step": 5098 }, { "epoch": 0.6893451626531475, "grad_norm": 0.8061087131500244, "learning_rate": 7.5237222747636025e-06, "loss": 0.13283157348632812, "step": 5099 }, { "epoch": 0.6894803548795945, "grad_norm": 1.84687077999115, "learning_rate": 7.517789368083611e-06, "loss": 0.16704010963439941, "step": 5100 }, { "epoch": 0.6896155471060414, "grad_norm": 0.8452058434486389, "learning_rate": 7.511858019239778e-06, "loss": 0.17380380630493164, "step": 5101 }, { "epoch": 0.6897507393324884, "grad_norm": 1.1292773485183716, "learning_rate": 7.505928229467038e-06, "loss": 0.20114898681640625, "step": 5102 }, { "epoch": 0.6898859315589354, "grad_norm": 1.2943975925445557, "learning_rate": 7.500000000000004e-06, "loss": 0.17506837844848633, "step": 5103 }, { "epoch": 0.6900211237853824, "grad_norm": 1.6584535837173462, "learning_rate": 7.494073332072963e-06, "loss": 0.18539047241210938, "step": 5104 }, { "epoch": 0.6901563160118294, "grad_norm": 1.3279589414596558, "learning_rate": 7.488148226919877e-06, "loss": 0.21048736572265625, "step": 5105 }, { "epoch": 0.6902915082382763, "grad_norm": 1.2370537519454956, "learning_rate": 7.482224685774393e-06, "loss": 0.1621685028076172, "step": 5106 }, { "epoch": 0.6904267004647233, "grad_norm": 1.1495203971862793, "learning_rate": 7.4763027098698184e-06, "loss": 0.2266845703125, "step": 5107 }, { "epoch": 0.6905618926911703, "grad_norm": 2.1210896968841553, "learning_rate": 7.470382300439143e-06, "loss": 0.1802058219909668, "step": 5108 }, { "epoch": 0.6906970849176173, "grad_norm": 0.7871782779693604, "learning_rate": 7.4644634587150225e-06, "loss": 0.17002034187316895, "step": 5109 }, { "epoch": 0.6908322771440643, "grad_norm": 0.8943871259689331, "learning_rate": 7.4585461859297906e-06, "loss": 0.1412220001220703, "step": 5110 }, { "epoch": 0.6909674693705112, "grad_norm": 0.8312017917633057, "learning_rate": 7.452630483315463e-06, "loss": 0.16393661499023438, "step": 5111 }, { "epoch": 0.6911026615969582, "grad_norm": 0.9605817794799805, "learning_rate": 7.4467163521037186e-06, "loss": 0.18793749809265137, "step": 5112 }, { "epoch": 0.6912378538234052, "grad_norm": 2.580165147781372, "learning_rate": 7.440803793525907e-06, "loss": 0.19863653182983398, "step": 5113 }, { "epoch": 0.6913730460498522, "grad_norm": 1.0944455862045288, "learning_rate": 7.434892808813056e-06, "loss": 0.1680774688720703, "step": 5114 }, { "epoch": 0.6915082382762991, "grad_norm": 2.1677603721618652, "learning_rate": 7.42898339919586e-06, "loss": 0.16836369037628174, "step": 5115 }, { "epoch": 0.6916434305027461, "grad_norm": 1.1497037410736084, "learning_rate": 7.423075565904698e-06, "loss": 0.2027263641357422, "step": 5116 }, { "epoch": 0.6917786227291931, "grad_norm": 1.6549288034439087, "learning_rate": 7.417169310169609e-06, "loss": 0.18097591400146484, "step": 5117 }, { "epoch": 0.6919138149556401, "grad_norm": 0.9023087620735168, "learning_rate": 7.411264633220305e-06, "loss": 0.13146638870239258, "step": 5118 }, { "epoch": 0.6920490071820871, "grad_norm": 1.3073419332504272, "learning_rate": 7.405361536286174e-06, "loss": 0.16814422607421875, "step": 5119 }, { "epoch": 0.692184199408534, "grad_norm": 1.0796542167663574, "learning_rate": 7.399460020596266e-06, "loss": 0.16030216217041016, "step": 5120 }, { "epoch": 0.692319391634981, "grad_norm": 2.0547728538513184, "learning_rate": 7.393560087379322e-06, "loss": 0.21028709411621094, "step": 5121 }, { "epoch": 0.692454583861428, "grad_norm": 1.0796337127685547, "learning_rate": 7.3876617378637195e-06, "loss": 0.16209030151367188, "step": 5122 }, { "epoch": 0.692589776087875, "grad_norm": 0.9031347036361694, "learning_rate": 7.381764973277543e-06, "loss": 0.16449451446533203, "step": 5123 }, { "epoch": 0.692724968314322, "grad_norm": 2.1749205589294434, "learning_rate": 7.375869794848525e-06, "loss": 0.16182827949523926, "step": 5124 }, { "epoch": 0.6928601605407689, "grad_norm": 0.8099848031997681, "learning_rate": 7.3699762038040654e-06, "loss": 0.14237475395202637, "step": 5125 }, { "epoch": 0.6929953527672159, "grad_norm": 1.0861082077026367, "learning_rate": 7.364084201371261e-06, "loss": 0.1667957305908203, "step": 5126 }, { "epoch": 0.6931305449936629, "grad_norm": 1.9657409191131592, "learning_rate": 7.3581937887768334e-06, "loss": 0.24158883094787598, "step": 5127 }, { "epoch": 0.6932657372201099, "grad_norm": 0.9278636574745178, "learning_rate": 7.352304967247217e-06, "loss": 0.16916131973266602, "step": 5128 }, { "epoch": 0.6934009294465568, "grad_norm": 0.8497210144996643, "learning_rate": 7.346417738008487e-06, "loss": 0.15870952606201172, "step": 5129 }, { "epoch": 0.6935361216730038, "grad_norm": 2.4723055362701416, "learning_rate": 7.340532102286399e-06, "loss": 0.19055747985839844, "step": 5130 }, { "epoch": 0.6936713138994508, "grad_norm": 0.7040371894836426, "learning_rate": 7.3346480613063725e-06, "loss": 0.12305355072021484, "step": 5131 }, { "epoch": 0.6938065061258978, "grad_norm": 1.3402516841888428, "learning_rate": 7.328765616293491e-06, "loss": 0.17723703384399414, "step": 5132 }, { "epoch": 0.6939416983523448, "grad_norm": 0.7311825156211853, "learning_rate": 7.322884768472521e-06, "loss": 0.11639022827148438, "step": 5133 }, { "epoch": 0.6940768905787917, "grad_norm": 1.3027299642562866, "learning_rate": 7.317005519067881e-06, "loss": 0.1964874267578125, "step": 5134 }, { "epoch": 0.6942120828052387, "grad_norm": 1.1012269258499146, "learning_rate": 7.311127869303665e-06, "loss": 0.17229747772216797, "step": 5135 }, { "epoch": 0.6943472750316857, "grad_norm": 1.2746553421020508, "learning_rate": 7.305251820403628e-06, "loss": 0.16065621376037598, "step": 5136 }, { "epoch": 0.6944824672581327, "grad_norm": 1.0306593179702759, "learning_rate": 7.299377373591188e-06, "loss": 0.15151715278625488, "step": 5137 }, { "epoch": 0.6946176594845797, "grad_norm": 1.45720636844635, "learning_rate": 7.29350453008945e-06, "loss": 0.21277475357055664, "step": 5138 }, { "epoch": 0.6947528517110266, "grad_norm": 0.8701133728027344, "learning_rate": 7.287633291121166e-06, "loss": 0.188720703125, "step": 5139 }, { "epoch": 0.6948880439374736, "grad_norm": 0.6854133009910583, "learning_rate": 7.281763657908756e-06, "loss": 0.15523910522460938, "step": 5140 }, { "epoch": 0.6950232361639206, "grad_norm": 0.8548727035522461, "learning_rate": 7.275895631674313e-06, "loss": 0.155379056930542, "step": 5141 }, { "epoch": 0.6951584283903676, "grad_norm": 0.8193026185035706, "learning_rate": 7.2700292136395826e-06, "loss": 0.10057544708251953, "step": 5142 }, { "epoch": 0.6952936206168145, "grad_norm": 0.7833569049835205, "learning_rate": 7.264164405025997e-06, "loss": 0.1626291275024414, "step": 5143 }, { "epoch": 0.6954288128432615, "grad_norm": 2.0511326789855957, "learning_rate": 7.2583012070546364e-06, "loss": 0.26996612548828125, "step": 5144 }, { "epoch": 0.6955640050697085, "grad_norm": 0.8838818669319153, "learning_rate": 7.252439620946247e-06, "loss": 0.16716909408569336, "step": 5145 }, { "epoch": 0.6956991972961555, "grad_norm": 0.7456916570663452, "learning_rate": 7.246579647921243e-06, "loss": 0.1417551040649414, "step": 5146 }, { "epoch": 0.6958343895226025, "grad_norm": 1.6904188394546509, "learning_rate": 7.240721289199699e-06, "loss": 0.19864225387573242, "step": 5147 }, { "epoch": 0.6959695817490494, "grad_norm": 1.2677772045135498, "learning_rate": 7.234864546001364e-06, "loss": 0.15386009216308594, "step": 5148 }, { "epoch": 0.6961047739754964, "grad_norm": 1.5002886056900024, "learning_rate": 7.229009419545638e-06, "loss": 0.18121910095214844, "step": 5149 }, { "epoch": 0.6962399662019434, "grad_norm": 1.368133783340454, "learning_rate": 7.223155911051593e-06, "loss": 0.1557598114013672, "step": 5150 }, { "epoch": 0.6963751584283904, "grad_norm": 1.5539847612380981, "learning_rate": 7.2173040217379575e-06, "loss": 0.16486740112304688, "step": 5151 }, { "epoch": 0.6965103506548374, "grad_norm": 0.887169361114502, "learning_rate": 7.211453752823122e-06, "loss": 0.15547466278076172, "step": 5152 }, { "epoch": 0.6966455428812843, "grad_norm": 0.8338747620582581, "learning_rate": 7.205605105525161e-06, "loss": 0.12071585655212402, "step": 5153 }, { "epoch": 0.6967807351077313, "grad_norm": 1.4051493406295776, "learning_rate": 7.19975808106177e-06, "loss": 0.20055007934570312, "step": 5154 }, { "epoch": 0.6969159273341783, "grad_norm": 0.7138229012489319, "learning_rate": 7.193912680650346e-06, "loss": 0.1400771141052246, "step": 5155 }, { "epoch": 0.6970511195606253, "grad_norm": 0.8744693398475647, "learning_rate": 7.188068905507931e-06, "loss": 0.17476320266723633, "step": 5156 }, { "epoch": 0.6971863117870722, "grad_norm": 1.7933357954025269, "learning_rate": 7.182226756851223e-06, "loss": 0.20525169372558594, "step": 5157 }, { "epoch": 0.6973215040135192, "grad_norm": 1.2666850090026855, "learning_rate": 7.176386235896603e-06, "loss": 0.16199016571044922, "step": 5158 }, { "epoch": 0.6974566962399662, "grad_norm": 1.23150634765625, "learning_rate": 7.170547343860079e-06, "loss": 0.16907596588134766, "step": 5159 }, { "epoch": 0.6975918884664132, "grad_norm": 1.0402631759643555, "learning_rate": 7.164710081957355e-06, "loss": 0.17164039611816406, "step": 5160 }, { "epoch": 0.6977270806928602, "grad_norm": 0.7841763496398926, "learning_rate": 7.158874451403777e-06, "loss": 0.13807106018066406, "step": 5161 }, { "epoch": 0.6978622729193071, "grad_norm": 2.068512201309204, "learning_rate": 7.15304045341435e-06, "loss": 0.18516921997070312, "step": 5162 }, { "epoch": 0.6979974651457541, "grad_norm": 1.0427666902542114, "learning_rate": 7.147208089203745e-06, "loss": 0.1490764617919922, "step": 5163 }, { "epoch": 0.6981326573722011, "grad_norm": 1.014174461364746, "learning_rate": 7.141377359986288e-06, "loss": 0.15070199966430664, "step": 5164 }, { "epoch": 0.6982678495986481, "grad_norm": 0.6417869925498962, "learning_rate": 7.135548266975978e-06, "loss": 0.12025153636932373, "step": 5165 }, { "epoch": 0.698403041825095, "grad_norm": 1.6225730180740356, "learning_rate": 7.129720811386456e-06, "loss": 0.21914386749267578, "step": 5166 }, { "epoch": 0.698538234051542, "grad_norm": 2.340458869934082, "learning_rate": 7.12389499443103e-06, "loss": 0.1927943229675293, "step": 5167 }, { "epoch": 0.698673426277989, "grad_norm": 1.1614012718200684, "learning_rate": 7.118070817322668e-06, "loss": 0.17183303833007812, "step": 5168 }, { "epoch": 0.698808618504436, "grad_norm": 1.1580455303192139, "learning_rate": 7.1122482812739885e-06, "loss": 0.17609024047851562, "step": 5169 }, { "epoch": 0.698943810730883, "grad_norm": 0.8738818764686584, "learning_rate": 7.106427387497283e-06, "loss": 0.14845871925354004, "step": 5170 }, { "epoch": 0.6990790029573299, "grad_norm": 1.3449870347976685, "learning_rate": 7.10060813720449e-06, "loss": 0.17636489868164062, "step": 5171 }, { "epoch": 0.6992141951837769, "grad_norm": 1.9404939413070679, "learning_rate": 7.094790531607207e-06, "loss": 0.2740974426269531, "step": 5172 }, { "epoch": 0.6993493874102239, "grad_norm": 1.31975519657135, "learning_rate": 7.088974571916692e-06, "loss": 0.17537879943847656, "step": 5173 }, { "epoch": 0.6994845796366709, "grad_norm": 0.7084823250770569, "learning_rate": 7.0831602593438515e-06, "loss": 0.12566089630126953, "step": 5174 }, { "epoch": 0.6996197718631179, "grad_norm": 1.1030217409133911, "learning_rate": 7.077347595099269e-06, "loss": 0.15218579769134521, "step": 5175 }, { "epoch": 0.6997549640895648, "grad_norm": 1.0435913801193237, "learning_rate": 7.071536580393166e-06, "loss": 0.1305384635925293, "step": 5176 }, { "epoch": 0.6998901563160118, "grad_norm": 1.9203248023986816, "learning_rate": 7.065727216435426e-06, "loss": 0.11629164218902588, "step": 5177 }, { "epoch": 0.7000253485424588, "grad_norm": 2.108100175857544, "learning_rate": 7.05991950443559e-06, "loss": 0.18920421600341797, "step": 5178 }, { "epoch": 0.7001605407689058, "grad_norm": 0.8961849212646484, "learning_rate": 7.05411344560285e-06, "loss": 0.13516521453857422, "step": 5179 }, { "epoch": 0.7002957329953527, "grad_norm": 1.6319124698638916, "learning_rate": 7.048309041146069e-06, "loss": 0.19341468811035156, "step": 5180 }, { "epoch": 0.7004309252217997, "grad_norm": 0.7201474905014038, "learning_rate": 7.0425062922737495e-06, "loss": 0.1359405517578125, "step": 5181 }, { "epoch": 0.7005661174482467, "grad_norm": 0.9397055506706238, "learning_rate": 7.036705200194053e-06, "loss": 0.1709786057472229, "step": 5182 }, { "epoch": 0.7007013096746937, "grad_norm": 1.546152114868164, "learning_rate": 7.0309057661148e-06, "loss": 0.19952011108398438, "step": 5183 }, { "epoch": 0.7008365019011407, "grad_norm": 1.1358003616333008, "learning_rate": 7.0251079912434565e-06, "loss": 0.15515518188476562, "step": 5184 }, { "epoch": 0.7009716941275876, "grad_norm": 1.0617985725402832, "learning_rate": 7.019311876787169e-06, "loss": 0.16869735717773438, "step": 5185 }, { "epoch": 0.7011068863540346, "grad_norm": 1.3697569370269775, "learning_rate": 7.013517423952696e-06, "loss": 0.14263129234313965, "step": 5186 }, { "epoch": 0.7012420785804816, "grad_norm": 1.6162440776824951, "learning_rate": 7.0077246339464904e-06, "loss": 0.24807357788085938, "step": 5187 }, { "epoch": 0.7013772708069286, "grad_norm": 1.3330588340759277, "learning_rate": 7.001933507974635e-06, "loss": 0.15120649337768555, "step": 5188 }, { "epoch": 0.7015124630333756, "grad_norm": 1.3398762941360474, "learning_rate": 6.996144047242868e-06, "loss": 0.20532011985778809, "step": 5189 }, { "epoch": 0.7016476552598225, "grad_norm": 0.8158812522888184, "learning_rate": 6.9903562529566044e-06, "loss": 0.10885810852050781, "step": 5190 }, { "epoch": 0.7017828474862695, "grad_norm": 1.7342039346694946, "learning_rate": 6.984570126320869e-06, "loss": 0.17320013046264648, "step": 5191 }, { "epoch": 0.7019180397127165, "grad_norm": 1.240268588066101, "learning_rate": 6.978785668540384e-06, "loss": 0.15944337844848633, "step": 5192 }, { "epoch": 0.7020532319391635, "grad_norm": 1.0960705280303955, "learning_rate": 6.973002880819496e-06, "loss": 0.20203948020935059, "step": 5193 }, { "epoch": 0.7021884241656104, "grad_norm": 3.224013566970825, "learning_rate": 6.96722176436221e-06, "loss": 0.2921295166015625, "step": 5194 }, { "epoch": 0.7023236163920574, "grad_norm": 1.1440491676330566, "learning_rate": 6.9614423203721975e-06, "loss": 0.12075090408325195, "step": 5195 }, { "epoch": 0.7024588086185044, "grad_norm": 2.2682206630706787, "learning_rate": 6.955664550052749e-06, "loss": 0.2271726131439209, "step": 5196 }, { "epoch": 0.7025940008449514, "grad_norm": 1.2267061471939087, "learning_rate": 6.949888454606847e-06, "loss": 0.1933155059814453, "step": 5197 }, { "epoch": 0.7027291930713984, "grad_norm": 1.8831148147583008, "learning_rate": 6.944114035237095e-06, "loss": 0.20353317260742188, "step": 5198 }, { "epoch": 0.7028643852978453, "grad_norm": 1.0325020551681519, "learning_rate": 6.93834129314576e-06, "loss": 0.16477584838867188, "step": 5199 }, { "epoch": 0.7029995775242923, "grad_norm": 1.0077366828918457, "learning_rate": 6.932570229534759e-06, "loss": 0.16625213623046875, "step": 5200 }, { "epoch": 0.7031347697507393, "grad_norm": 1.5973337888717651, "learning_rate": 6.9268008456056505e-06, "loss": 0.1799304485321045, "step": 5201 }, { "epoch": 0.7032699619771863, "grad_norm": 1.5032504796981812, "learning_rate": 6.921033142559664e-06, "loss": 0.1950300931930542, "step": 5202 }, { "epoch": 0.7034051542036333, "grad_norm": 1.4120783805847168, "learning_rate": 6.915267121597659e-06, "loss": 0.15298080444335938, "step": 5203 }, { "epoch": 0.7035403464300802, "grad_norm": 1.5226410627365112, "learning_rate": 6.909502783920153e-06, "loss": 0.162506103515625, "step": 5204 }, { "epoch": 0.7036755386565272, "grad_norm": 1.3698943853378296, "learning_rate": 6.903740130727312e-06, "loss": 0.1988658905029297, "step": 5205 }, { "epoch": 0.7038107308829742, "grad_norm": 0.7702086567878723, "learning_rate": 6.8979791632189425e-06, "loss": 0.12569665908813477, "step": 5206 }, { "epoch": 0.7039459231094212, "grad_norm": 1.0782551765441895, "learning_rate": 6.892219882594523e-06, "loss": 0.1438922882080078, "step": 5207 }, { "epoch": 0.7040811153358681, "grad_norm": 0.8102098107337952, "learning_rate": 6.886462290053159e-06, "loss": 0.14880084991455078, "step": 5208 }, { "epoch": 0.7042163075623151, "grad_norm": 0.5309339165687561, "learning_rate": 6.880706386793614e-06, "loss": 0.10606718063354492, "step": 5209 }, { "epoch": 0.7043514997887621, "grad_norm": 1.839189887046814, "learning_rate": 6.874952174014298e-06, "loss": 0.14774513244628906, "step": 5210 }, { "epoch": 0.7044866920152091, "grad_norm": 1.2432509660720825, "learning_rate": 6.8691996529132585e-06, "loss": 0.18129825592041016, "step": 5211 }, { "epoch": 0.7046218842416561, "grad_norm": 1.2936617136001587, "learning_rate": 6.863448824688217e-06, "loss": 0.1758289337158203, "step": 5212 }, { "epoch": 0.704757076468103, "grad_norm": 1.2065259218215942, "learning_rate": 6.857699690536521e-06, "loss": 0.19231557846069336, "step": 5213 }, { "epoch": 0.70489226869455, "grad_norm": 1.1868269443511963, "learning_rate": 6.8519522516551685e-06, "loss": 0.14506006240844727, "step": 5214 }, { "epoch": 0.705027460920997, "grad_norm": 2.1638193130493164, "learning_rate": 6.846206509240807e-06, "loss": 0.2016735076904297, "step": 5215 }, { "epoch": 0.705162653147444, "grad_norm": 2.3246593475341797, "learning_rate": 6.840462464489726e-06, "loss": 0.2314605712890625, "step": 5216 }, { "epoch": 0.705297845373891, "grad_norm": 1.1544358730316162, "learning_rate": 6.834720118597879e-06, "loss": 0.1596202850341797, "step": 5217 }, { "epoch": 0.7054330376003379, "grad_norm": 1.245569109916687, "learning_rate": 6.828979472760846e-06, "loss": 0.21908187866210938, "step": 5218 }, { "epoch": 0.7055682298267849, "grad_norm": 0.7290953397750854, "learning_rate": 6.823240528173858e-06, "loss": 0.13218283653259277, "step": 5219 }, { "epoch": 0.7057034220532319, "grad_norm": 1.1778936386108398, "learning_rate": 6.817503286031797e-06, "loss": 0.1854863166809082, "step": 5220 }, { "epoch": 0.7058386142796789, "grad_norm": 1.1369704008102417, "learning_rate": 6.811767747529181e-06, "loss": 0.1791229248046875, "step": 5221 }, { "epoch": 0.7059738065061258, "grad_norm": 1.5306711196899414, "learning_rate": 6.806033913860195e-06, "loss": 0.21477317810058594, "step": 5222 }, { "epoch": 0.7061089987325728, "grad_norm": 3.785911798477173, "learning_rate": 6.800301786218634e-06, "loss": 0.23604393005371094, "step": 5223 }, { "epoch": 0.7062441909590198, "grad_norm": 1.475637435913086, "learning_rate": 6.794571365797971e-06, "loss": 0.18343877792358398, "step": 5224 }, { "epoch": 0.7063793831854668, "grad_norm": 1.4161368608474731, "learning_rate": 6.788842653791308e-06, "loss": 0.17431139945983887, "step": 5225 }, { "epoch": 0.7065145754119139, "grad_norm": 2.020963668823242, "learning_rate": 6.7831156513913864e-06, "loss": 0.1591472625732422, "step": 5226 }, { "epoch": 0.7066497676383608, "grad_norm": 1.020849347114563, "learning_rate": 6.777390359790614e-06, "loss": 0.14255046844482422, "step": 5227 }, { "epoch": 0.7067849598648078, "grad_norm": 1.9783674478530884, "learning_rate": 6.771666780181004e-06, "loss": 0.20640087127685547, "step": 5228 }, { "epoch": 0.7069201520912548, "grad_norm": 0.7509810924530029, "learning_rate": 6.765944913754258e-06, "loss": 0.10895109176635742, "step": 5229 }, { "epoch": 0.7070553443177018, "grad_norm": 1.8413426876068115, "learning_rate": 6.7602247617016885e-06, "loss": 0.22287940979003906, "step": 5230 }, { "epoch": 0.7071905365441488, "grad_norm": 0.9900017380714417, "learning_rate": 6.754506325214265e-06, "loss": 0.1849372386932373, "step": 5231 }, { "epoch": 0.7073257287705957, "grad_norm": 1.370452642440796, "learning_rate": 6.748789605482593e-06, "loss": 0.20341110229492188, "step": 5232 }, { "epoch": 0.7074609209970427, "grad_norm": 0.938466489315033, "learning_rate": 6.743074603696922e-06, "loss": 0.12316513061523438, "step": 5233 }, { "epoch": 0.7075961132234897, "grad_norm": 2.760768175125122, "learning_rate": 6.737361321047155e-06, "loss": 0.17108726501464844, "step": 5234 }, { "epoch": 0.7077313054499367, "grad_norm": 1.2002313137054443, "learning_rate": 6.731649758722823e-06, "loss": 0.22731781005859375, "step": 5235 }, { "epoch": 0.7078664976763837, "grad_norm": 0.6055838465690613, "learning_rate": 6.725939917913102e-06, "loss": 0.11905288696289062, "step": 5236 }, { "epoch": 0.7080016899028306, "grad_norm": 0.7012165188789368, "learning_rate": 6.720231799806814e-06, "loss": 0.1473172903060913, "step": 5237 }, { "epoch": 0.7081368821292776, "grad_norm": 0.5644224286079407, "learning_rate": 6.7145254055924136e-06, "loss": 0.11808204650878906, "step": 5238 }, { "epoch": 0.7082720743557246, "grad_norm": 0.7486729621887207, "learning_rate": 6.70882073645801e-06, "loss": 0.12996768951416016, "step": 5239 }, { "epoch": 0.7084072665821716, "grad_norm": 0.8076403737068176, "learning_rate": 6.703117793591346e-06, "loss": 0.13739728927612305, "step": 5240 }, { "epoch": 0.7085424588086185, "grad_norm": 0.963392972946167, "learning_rate": 6.6974165781798e-06, "loss": 0.16089248657226562, "step": 5241 }, { "epoch": 0.7086776510350655, "grad_norm": 0.965478241443634, "learning_rate": 6.691717091410398e-06, "loss": 0.1577134132385254, "step": 5242 }, { "epoch": 0.7088128432615125, "grad_norm": 0.8361654877662659, "learning_rate": 6.686019334469797e-06, "loss": 0.14540749788284302, "step": 5243 }, { "epoch": 0.7089480354879595, "grad_norm": 1.4129126071929932, "learning_rate": 6.680323308544312e-06, "loss": 0.1485309600830078, "step": 5244 }, { "epoch": 0.7090832277144065, "grad_norm": 1.2083866596221924, "learning_rate": 6.674629014819879e-06, "loss": 0.1945018768310547, "step": 5245 }, { "epoch": 0.7092184199408534, "grad_norm": 1.7138794660568237, "learning_rate": 6.668936454482082e-06, "loss": 0.19281005859375, "step": 5246 }, { "epoch": 0.7093536121673004, "grad_norm": 1.8031753301620483, "learning_rate": 6.6632456287161426e-06, "loss": 0.18366622924804688, "step": 5247 }, { "epoch": 0.7094888043937474, "grad_norm": 1.0596013069152832, "learning_rate": 6.657556538706914e-06, "loss": 0.1574840545654297, "step": 5248 }, { "epoch": 0.7096239966201944, "grad_norm": 1.1774144172668457, "learning_rate": 6.651869185638907e-06, "loss": 0.19628477096557617, "step": 5249 }, { "epoch": 0.7097591888466414, "grad_norm": 1.7380789518356323, "learning_rate": 6.646183570696253e-06, "loss": 0.2093358039855957, "step": 5250 }, { "epoch": 0.7098943810730883, "grad_norm": 0.9401379227638245, "learning_rate": 6.6404996950627275e-06, "loss": 0.1821298599243164, "step": 5251 }, { "epoch": 0.7100295732995353, "grad_norm": 1.5139966011047363, "learning_rate": 6.634817559921744e-06, "loss": 0.16340315341949463, "step": 5252 }, { "epoch": 0.7101647655259823, "grad_norm": 1.1462069749832153, "learning_rate": 6.629137166456348e-06, "loss": 0.1474614143371582, "step": 5253 }, { "epoch": 0.7102999577524293, "grad_norm": 0.8312404751777649, "learning_rate": 6.623458515849244e-06, "loss": 0.13142013549804688, "step": 5254 }, { "epoch": 0.7104351499788762, "grad_norm": 1.0690691471099854, "learning_rate": 6.6177816092827354e-06, "loss": 0.2099456787109375, "step": 5255 }, { "epoch": 0.7105703422053232, "grad_norm": 1.587024211883545, "learning_rate": 6.6121064479388e-06, "loss": 0.19501399993896484, "step": 5256 }, { "epoch": 0.7107055344317702, "grad_norm": 0.8954169750213623, "learning_rate": 6.606433032999031e-06, "loss": 0.17804574966430664, "step": 5257 }, { "epoch": 0.7108407266582172, "grad_norm": 0.8076308369636536, "learning_rate": 6.60076136564466e-06, "loss": 0.16410017013549805, "step": 5258 }, { "epoch": 0.7109759188846642, "grad_norm": 1.0554468631744385, "learning_rate": 6.595091447056574e-06, "loss": 0.21441853046417236, "step": 5259 }, { "epoch": 0.7111111111111111, "grad_norm": 1.2991645336151123, "learning_rate": 6.589423278415259e-06, "loss": 0.2372903823852539, "step": 5260 }, { "epoch": 0.7112463033375581, "grad_norm": 1.1390784978866577, "learning_rate": 6.583756860900872e-06, "loss": 0.13801372051239014, "step": 5261 }, { "epoch": 0.7113814955640051, "grad_norm": 1.022175669670105, "learning_rate": 6.578092195693187e-06, "loss": 0.1260509490966797, "step": 5262 }, { "epoch": 0.7115166877904521, "grad_norm": 0.7669751048088074, "learning_rate": 6.572429283971614e-06, "loss": 0.12987041473388672, "step": 5263 }, { "epoch": 0.711651880016899, "grad_norm": 0.8804888725280762, "learning_rate": 6.566768126915215e-06, "loss": 0.15361404418945312, "step": 5264 }, { "epoch": 0.711787072243346, "grad_norm": 1.2320399284362793, "learning_rate": 6.561108725702653e-06, "loss": 0.146209716796875, "step": 5265 }, { "epoch": 0.711922264469793, "grad_norm": 1.347063422203064, "learning_rate": 6.555451081512262e-06, "loss": 0.2016773223876953, "step": 5266 }, { "epoch": 0.71205745669624, "grad_norm": 1.02095627784729, "learning_rate": 6.549795195521988e-06, "loss": 0.1999359130859375, "step": 5267 }, { "epoch": 0.712192648922687, "grad_norm": 0.8457715511322021, "learning_rate": 6.544141068909416e-06, "loss": 0.11187124252319336, "step": 5268 }, { "epoch": 0.712327841149134, "grad_norm": 0.8879594206809998, "learning_rate": 6.5384887028517645e-06, "loss": 0.12949371337890625, "step": 5269 }, { "epoch": 0.7124630333755809, "grad_norm": 1.1260361671447754, "learning_rate": 6.532838098525883e-06, "loss": 0.1906595230102539, "step": 5270 }, { "epoch": 0.7125982256020279, "grad_norm": 0.8502476215362549, "learning_rate": 6.5271892571082655e-06, "loss": 0.14899611473083496, "step": 5271 }, { "epoch": 0.7127334178284749, "grad_norm": 1.2261719703674316, "learning_rate": 6.521542179775029e-06, "loss": 0.16855430603027344, "step": 5272 }, { "epoch": 0.7128686100549219, "grad_norm": 1.238086223602295, "learning_rate": 6.515896867701924e-06, "loss": 0.15983033180236816, "step": 5273 }, { "epoch": 0.7130038022813688, "grad_norm": 0.9716398119926453, "learning_rate": 6.510253322064333e-06, "loss": 0.1437387466430664, "step": 5274 }, { "epoch": 0.7131389945078158, "grad_norm": 1.5535765886306763, "learning_rate": 6.504611544037267e-06, "loss": 0.21181392669677734, "step": 5275 }, { "epoch": 0.7132741867342628, "grad_norm": 1.6955326795578003, "learning_rate": 6.498971534795387e-06, "loss": 0.21105480194091797, "step": 5276 }, { "epoch": 0.7134093789607098, "grad_norm": 1.180829405784607, "learning_rate": 6.493333295512965e-06, "loss": 0.14973068237304688, "step": 5277 }, { "epoch": 0.7135445711871568, "grad_norm": 0.9586646556854248, "learning_rate": 6.487696827363916e-06, "loss": 0.1754283905029297, "step": 5278 }, { "epoch": 0.7136797634136037, "grad_norm": 0.7057746648788452, "learning_rate": 6.48206213152178e-06, "loss": 0.14139747619628906, "step": 5279 }, { "epoch": 0.7138149556400507, "grad_norm": 1.4581705331802368, "learning_rate": 6.476429209159725e-06, "loss": 0.17615032196044922, "step": 5280 }, { "epoch": 0.7139501478664977, "grad_norm": 1.102221131324768, "learning_rate": 6.470798061450568e-06, "loss": 0.15878593921661377, "step": 5281 }, { "epoch": 0.7140853400929447, "grad_norm": 1.3147752285003662, "learning_rate": 6.465168689566738e-06, "loss": 0.1579265594482422, "step": 5282 }, { "epoch": 0.7142205323193916, "grad_norm": 1.3335950374603271, "learning_rate": 6.4595410946803e-06, "loss": 0.19703292846679688, "step": 5283 }, { "epoch": 0.7143557245458386, "grad_norm": 2.142792224884033, "learning_rate": 6.453915277962948e-06, "loss": 0.21161460876464844, "step": 5284 }, { "epoch": 0.7144909167722856, "grad_norm": 0.9373217225074768, "learning_rate": 6.4482912405860055e-06, "loss": 0.19946861267089844, "step": 5285 }, { "epoch": 0.7146261089987326, "grad_norm": 3.7490131855010986, "learning_rate": 6.442668983720434e-06, "loss": 0.22241973876953125, "step": 5286 }, { "epoch": 0.7147613012251796, "grad_norm": 1.23158860206604, "learning_rate": 6.437048508536813e-06, "loss": 0.1703634262084961, "step": 5287 }, { "epoch": 0.7148964934516265, "grad_norm": 1.6232367753982544, "learning_rate": 6.431429816205357e-06, "loss": 0.1179962158203125, "step": 5288 }, { "epoch": 0.7150316856780735, "grad_norm": 1.2622445821762085, "learning_rate": 6.425812907895904e-06, "loss": 0.2069411277770996, "step": 5289 }, { "epoch": 0.7151668779045205, "grad_norm": 0.8563898205757141, "learning_rate": 6.420197784777925e-06, "loss": 0.15022850036621094, "step": 5290 }, { "epoch": 0.7153020701309675, "grad_norm": 1.122961401939392, "learning_rate": 6.414584448020528e-06, "loss": 0.15265941619873047, "step": 5291 }, { "epoch": 0.7154372623574144, "grad_norm": 0.9871898293495178, "learning_rate": 6.408972898792423e-06, "loss": 0.21117401123046875, "step": 5292 }, { "epoch": 0.7155724545838614, "grad_norm": 1.1413291692733765, "learning_rate": 6.4033631382619766e-06, "loss": 0.14159274101257324, "step": 5293 }, { "epoch": 0.7157076468103084, "grad_norm": 1.3430914878845215, "learning_rate": 6.397755167597171e-06, "loss": 0.18158745765686035, "step": 5294 }, { "epoch": 0.7158428390367554, "grad_norm": 0.7743534445762634, "learning_rate": 6.392148987965603e-06, "loss": 0.12764322757720947, "step": 5295 }, { "epoch": 0.7159780312632024, "grad_norm": 1.141188144683838, "learning_rate": 6.386544600534532e-06, "loss": 0.17114418745040894, "step": 5296 }, { "epoch": 0.7161132234896493, "grad_norm": 0.94621741771698, "learning_rate": 6.3809420064707965e-06, "loss": 0.13990259170532227, "step": 5297 }, { "epoch": 0.7162484157160963, "grad_norm": 1.3892114162445068, "learning_rate": 6.375341206940902e-06, "loss": 0.1618633270263672, "step": 5298 }, { "epoch": 0.7163836079425433, "grad_norm": 1.5031366348266602, "learning_rate": 6.369742203110962e-06, "loss": 0.16081321239471436, "step": 5299 }, { "epoch": 0.7165188001689903, "grad_norm": 1.1159032583236694, "learning_rate": 6.364144996146716e-06, "loss": 0.12784147262573242, "step": 5300 }, { "epoch": 0.7166539923954373, "grad_norm": 1.0115638971328735, "learning_rate": 6.358549587213534e-06, "loss": 0.14824330806732178, "step": 5301 }, { "epoch": 0.7167891846218842, "grad_norm": 1.6801844835281372, "learning_rate": 6.352955977476405e-06, "loss": 0.14242029190063477, "step": 5302 }, { "epoch": 0.7169243768483312, "grad_norm": 0.7700613737106323, "learning_rate": 6.347364168099959e-06, "loss": 0.1633777618408203, "step": 5303 }, { "epoch": 0.7170595690747782, "grad_norm": 0.6836645603179932, "learning_rate": 6.341774160248435e-06, "loss": 0.10064876079559326, "step": 5304 }, { "epoch": 0.7171947613012252, "grad_norm": 1.1566351652145386, "learning_rate": 6.3361859550857e-06, "loss": 0.1804943084716797, "step": 5305 }, { "epoch": 0.7173299535276721, "grad_norm": 1.269128441810608, "learning_rate": 6.330599553775252e-06, "loss": 0.1790175437927246, "step": 5306 }, { "epoch": 0.7174651457541191, "grad_norm": 1.2210664749145508, "learning_rate": 6.325014957480203e-06, "loss": 0.16953563690185547, "step": 5307 }, { "epoch": 0.7176003379805661, "grad_norm": 1.5633882284164429, "learning_rate": 6.319432167363305e-06, "loss": 0.24365615844726562, "step": 5308 }, { "epoch": 0.7177355302070131, "grad_norm": 1.3419339656829834, "learning_rate": 6.313851184586918e-06, "loss": 0.1390066146850586, "step": 5309 }, { "epoch": 0.7178707224334601, "grad_norm": 1.3377631902694702, "learning_rate": 6.308272010313037e-06, "loss": 0.22277069091796875, "step": 5310 }, { "epoch": 0.718005914659907, "grad_norm": 1.1070847511291504, "learning_rate": 6.302694645703273e-06, "loss": 0.245086669921875, "step": 5311 }, { "epoch": 0.718141106886354, "grad_norm": 1.1244513988494873, "learning_rate": 6.297119091918857e-06, "loss": 0.19168567657470703, "step": 5312 }, { "epoch": 0.718276299112801, "grad_norm": 1.4264655113220215, "learning_rate": 6.2915453501206634e-06, "loss": 0.18460631370544434, "step": 5313 }, { "epoch": 0.718411491339248, "grad_norm": 1.0178738832473755, "learning_rate": 6.285973421469166e-06, "loss": 0.18571043014526367, "step": 5314 }, { "epoch": 0.718546683565695, "grad_norm": 1.9263534545898438, "learning_rate": 6.28040330712447e-06, "loss": 0.1701972484588623, "step": 5315 }, { "epoch": 0.7186818757921419, "grad_norm": 0.9028226137161255, "learning_rate": 6.274835008246304e-06, "loss": 0.15158700942993164, "step": 5316 }, { "epoch": 0.7188170680185889, "grad_norm": 0.9210264086723328, "learning_rate": 6.269268525994013e-06, "loss": 0.16845488548278809, "step": 5317 }, { "epoch": 0.7189522602450359, "grad_norm": 0.6532940864562988, "learning_rate": 6.263703861526578e-06, "loss": 0.12207603454589844, "step": 5318 }, { "epoch": 0.7190874524714829, "grad_norm": 2.0820014476776123, "learning_rate": 6.258141016002587e-06, "loss": 0.18713855743408203, "step": 5319 }, { "epoch": 0.7192226446979298, "grad_norm": 1.1615533828735352, "learning_rate": 6.252579990580254e-06, "loss": 0.17844200134277344, "step": 5320 }, { "epoch": 0.7193578369243768, "grad_norm": 1.5662070512771606, "learning_rate": 6.247020786417412e-06, "loss": 0.1857318878173828, "step": 5321 }, { "epoch": 0.7194930291508238, "grad_norm": 1.068244218826294, "learning_rate": 6.241463404671516e-06, "loss": 0.15768051147460938, "step": 5322 }, { "epoch": 0.7196282213772708, "grad_norm": 1.0775821208953857, "learning_rate": 6.235907846499655e-06, "loss": 0.1813983917236328, "step": 5323 }, { "epoch": 0.7197634136037178, "grad_norm": 0.9361177086830139, "learning_rate": 6.230354113058505e-06, "loss": 0.19098472595214844, "step": 5324 }, { "epoch": 0.7198986058301647, "grad_norm": 1.3774340152740479, "learning_rate": 6.2248022055044e-06, "loss": 0.16189992427825928, "step": 5325 }, { "epoch": 0.7200337980566117, "grad_norm": 1.1104826927185059, "learning_rate": 6.219252124993271e-06, "loss": 0.1779160499572754, "step": 5326 }, { "epoch": 0.7201689902830587, "grad_norm": 1.1793369054794312, "learning_rate": 6.213703872680668e-06, "loss": 0.23790359497070312, "step": 5327 }, { "epoch": 0.7203041825095057, "grad_norm": 1.657008409500122, "learning_rate": 6.208157449721785e-06, "loss": 0.15612578392028809, "step": 5328 }, { "epoch": 0.7204393747359527, "grad_norm": 0.9390886425971985, "learning_rate": 6.202612857271393e-06, "loss": 0.1592578887939453, "step": 5329 }, { "epoch": 0.7205745669623996, "grad_norm": 1.925319790840149, "learning_rate": 6.197070096483923e-06, "loss": 0.15016651153564453, "step": 5330 }, { "epoch": 0.7207097591888466, "grad_norm": 1.0671675205230713, "learning_rate": 6.191529168513403e-06, "loss": 0.16497421264648438, "step": 5331 }, { "epoch": 0.7208449514152936, "grad_norm": 0.883821964263916, "learning_rate": 6.1859900745134755e-06, "loss": 0.1655750274658203, "step": 5332 }, { "epoch": 0.7209801436417406, "grad_norm": 0.8217856287956238, "learning_rate": 6.180452815637429e-06, "loss": 0.1514291763305664, "step": 5333 }, { "epoch": 0.7211153358681875, "grad_norm": 1.1794695854187012, "learning_rate": 6.174917393038126e-06, "loss": 0.1785411834716797, "step": 5334 }, { "epoch": 0.7212505280946345, "grad_norm": 0.8648750185966492, "learning_rate": 6.169383807868088e-06, "loss": 0.19240760803222656, "step": 5335 }, { "epoch": 0.7213857203210815, "grad_norm": 0.7522581815719604, "learning_rate": 6.163852061279432e-06, "loss": 0.15709686279296875, "step": 5336 }, { "epoch": 0.7215209125475285, "grad_norm": 0.8271566033363342, "learning_rate": 6.158322154423897e-06, "loss": 0.15279579162597656, "step": 5337 }, { "epoch": 0.7216561047739755, "grad_norm": 0.8708582520484924, "learning_rate": 6.15279408845284e-06, "loss": 0.12851381301879883, "step": 5338 }, { "epoch": 0.7217912970004224, "grad_norm": 0.6457862257957458, "learning_rate": 6.147267864517226e-06, "loss": 0.12972640991210938, "step": 5339 }, { "epoch": 0.7219264892268694, "grad_norm": 0.8093400001525879, "learning_rate": 6.141743483767658e-06, "loss": 0.20019912719726562, "step": 5340 }, { "epoch": 0.7220616814533164, "grad_norm": 1.407281756401062, "learning_rate": 6.136220947354333e-06, "loss": 0.20703887939453125, "step": 5341 }, { "epoch": 0.7221968736797634, "grad_norm": 1.152104377746582, "learning_rate": 6.130700256427075e-06, "loss": 0.2013249397277832, "step": 5342 }, { "epoch": 0.7223320659062104, "grad_norm": 1.2698312997817993, "learning_rate": 6.1251814121353204e-06, "loss": 0.2011566162109375, "step": 5343 }, { "epoch": 0.7224672581326573, "grad_norm": 0.7777379155158997, "learning_rate": 6.1196644156281175e-06, "loss": 0.133453369140625, "step": 5344 }, { "epoch": 0.7226024503591043, "grad_norm": 0.9934561848640442, "learning_rate": 6.114149268054143e-06, "loss": 0.18868422508239746, "step": 5345 }, { "epoch": 0.7227376425855513, "grad_norm": 1.0047225952148438, "learning_rate": 6.108635970561679e-06, "loss": 0.12573719024658203, "step": 5346 }, { "epoch": 0.7228728348119983, "grad_norm": 1.8367822170257568, "learning_rate": 6.103124524298617e-06, "loss": 0.2581939697265625, "step": 5347 }, { "epoch": 0.7230080270384452, "grad_norm": 0.6864392161369324, "learning_rate": 6.097614930412475e-06, "loss": 0.13956880569458008, "step": 5348 }, { "epoch": 0.7231432192648922, "grad_norm": 1.3736300468444824, "learning_rate": 6.092107190050371e-06, "loss": 0.14592409133911133, "step": 5349 }, { "epoch": 0.7232784114913392, "grad_norm": 0.898743212223053, "learning_rate": 6.086601304359059e-06, "loss": 0.16562843322753906, "step": 5350 }, { "epoch": 0.7234136037177862, "grad_norm": 1.059808611869812, "learning_rate": 6.081097274484887e-06, "loss": 0.1921253204345703, "step": 5351 }, { "epoch": 0.7235487959442332, "grad_norm": 0.6184583306312561, "learning_rate": 6.075595101573825e-06, "loss": 0.10637474060058594, "step": 5352 }, { "epoch": 0.7236839881706801, "grad_norm": 0.9661160111427307, "learning_rate": 6.070094786771451e-06, "loss": 0.14311715960502625, "step": 5353 }, { "epoch": 0.7238191803971271, "grad_norm": 1.2027264833450317, "learning_rate": 6.06459633122296e-06, "loss": 0.15629911422729492, "step": 5354 }, { "epoch": 0.7239543726235741, "grad_norm": 1.3895877599716187, "learning_rate": 6.059099736073166e-06, "loss": 0.1889948844909668, "step": 5355 }, { "epoch": 0.7240895648500211, "grad_norm": 1.1352018117904663, "learning_rate": 6.0536050024664865e-06, "loss": 0.1892547607421875, "step": 5356 }, { "epoch": 0.724224757076468, "grad_norm": 0.8232440948486328, "learning_rate": 6.048112131546953e-06, "loss": 0.16222572326660156, "step": 5357 }, { "epoch": 0.724359949302915, "grad_norm": 1.675679087638855, "learning_rate": 6.0426211244582105e-06, "loss": 0.16073906421661377, "step": 5358 }, { "epoch": 0.724495141529362, "grad_norm": 1.1714844703674316, "learning_rate": 6.03713198234351e-06, "loss": 0.16106367111206055, "step": 5359 }, { "epoch": 0.724630333755809, "grad_norm": 0.9745199680328369, "learning_rate": 6.0316447063457395e-06, "loss": 0.19371986389160156, "step": 5360 }, { "epoch": 0.724765525982256, "grad_norm": 1.5840797424316406, "learning_rate": 6.026159297607356e-06, "loss": 0.17846298217773438, "step": 5361 }, { "epoch": 0.724900718208703, "grad_norm": 1.2856674194335938, "learning_rate": 6.020675757270466e-06, "loss": 0.17676793038845062, "step": 5362 }, { "epoch": 0.72503591043515, "grad_norm": 1.3327083587646484, "learning_rate": 6.015194086476766e-06, "loss": 0.1458110809326172, "step": 5363 }, { "epoch": 0.725171102661597, "grad_norm": 1.190673828125, "learning_rate": 6.009714286367565e-06, "loss": 0.1539926528930664, "step": 5364 }, { "epoch": 0.725306294888044, "grad_norm": 0.9986264109611511, "learning_rate": 6.004236358083802e-06, "loss": 0.16712522506713867, "step": 5365 }, { "epoch": 0.725441487114491, "grad_norm": 1.6283403635025024, "learning_rate": 5.998760302765989e-06, "loss": 0.15686607360839844, "step": 5366 }, { "epoch": 0.725576679340938, "grad_norm": 1.0782270431518555, "learning_rate": 5.993286121554289e-06, "loss": 0.1532679796218872, "step": 5367 }, { "epoch": 0.7257118715673849, "grad_norm": 0.8782466053962708, "learning_rate": 5.987813815588447e-06, "loss": 0.2120189666748047, "step": 5368 }, { "epoch": 0.7258470637938319, "grad_norm": 1.1710044145584106, "learning_rate": 5.982343386007827e-06, "loss": 0.19725322723388672, "step": 5369 }, { "epoch": 0.7259822560202789, "grad_norm": 0.7435158491134644, "learning_rate": 5.976874833951404e-06, "loss": 0.1588430404663086, "step": 5370 }, { "epoch": 0.7261174482467259, "grad_norm": 0.8678314685821533, "learning_rate": 5.971408160557751e-06, "loss": 0.14610815048217773, "step": 5371 }, { "epoch": 0.7262526404731728, "grad_norm": 1.2908798456192017, "learning_rate": 5.965943366965069e-06, "loss": 0.19222164154052734, "step": 5372 }, { "epoch": 0.7263878326996198, "grad_norm": 0.930377185344696, "learning_rate": 5.960480454311155e-06, "loss": 0.15697479248046875, "step": 5373 }, { "epoch": 0.7265230249260668, "grad_norm": 1.8857017755508423, "learning_rate": 5.955019423733416e-06, "loss": 0.2737865447998047, "step": 5374 }, { "epoch": 0.7266582171525138, "grad_norm": 0.9728448390960693, "learning_rate": 5.949560276368866e-06, "loss": 0.20526504516601562, "step": 5375 }, { "epoch": 0.7267934093789608, "grad_norm": 1.3432884216308594, "learning_rate": 5.9441030133541235e-06, "loss": 0.21331787109375, "step": 5376 }, { "epoch": 0.7269286016054077, "grad_norm": 0.8436256051063538, "learning_rate": 5.938647635825432e-06, "loss": 0.15552330017089844, "step": 5377 }, { "epoch": 0.7270637938318547, "grad_norm": 0.9984455108642578, "learning_rate": 5.933194144918623e-06, "loss": 0.19208359718322754, "step": 5378 }, { "epoch": 0.7271989860583017, "grad_norm": 1.8108103275299072, "learning_rate": 5.927742541769142e-06, "loss": 0.1502552032470703, "step": 5379 }, { "epoch": 0.7273341782847487, "grad_norm": 2.7279884815216064, "learning_rate": 5.9222928275120445e-06, "loss": 0.17171192169189453, "step": 5380 }, { "epoch": 0.7274693705111956, "grad_norm": 0.799777090549469, "learning_rate": 5.916845003281983e-06, "loss": 0.17359447479248047, "step": 5381 }, { "epoch": 0.7276045627376426, "grad_norm": 1.5356882810592651, "learning_rate": 5.911399070213234e-06, "loss": 0.207733154296875, "step": 5382 }, { "epoch": 0.7277397549640896, "grad_norm": 0.6983720064163208, "learning_rate": 5.905955029439665e-06, "loss": 0.16014456748962402, "step": 5383 }, { "epoch": 0.7278749471905366, "grad_norm": 1.4003392457962036, "learning_rate": 5.900512882094754e-06, "loss": 0.2197399139404297, "step": 5384 }, { "epoch": 0.7280101394169836, "grad_norm": 0.9805797934532166, "learning_rate": 5.8950726293115855e-06, "loss": 0.16019654273986816, "step": 5385 }, { "epoch": 0.7281453316434305, "grad_norm": 1.2284855842590332, "learning_rate": 5.889634272222844e-06, "loss": 0.18422317504882812, "step": 5386 }, { "epoch": 0.7282805238698775, "grad_norm": 0.5891017317771912, "learning_rate": 5.8841978119608345e-06, "loss": 0.10374271869659424, "step": 5387 }, { "epoch": 0.7284157160963245, "grad_norm": 0.9230350852012634, "learning_rate": 5.878763249657452e-06, "loss": 0.11990642547607422, "step": 5388 }, { "epoch": 0.7285509083227715, "grad_norm": 2.1666762828826904, "learning_rate": 5.873330586444202e-06, "loss": 0.1858212947845459, "step": 5389 }, { "epoch": 0.7286861005492185, "grad_norm": 1.917571783065796, "learning_rate": 5.867899823452193e-06, "loss": 0.19831299781799316, "step": 5390 }, { "epoch": 0.7288212927756654, "grad_norm": 2.017155647277832, "learning_rate": 5.862470961812133e-06, "loss": 0.15149211883544922, "step": 5391 }, { "epoch": 0.7289564850021124, "grad_norm": 0.8554419875144958, "learning_rate": 5.857044002654357e-06, "loss": 0.16566067934036255, "step": 5392 }, { "epoch": 0.7290916772285594, "grad_norm": 1.1382036209106445, "learning_rate": 5.851618947108764e-06, "loss": 0.1287059783935547, "step": 5393 }, { "epoch": 0.7292268694550064, "grad_norm": 1.8647043704986572, "learning_rate": 5.8461957963048984e-06, "loss": 0.18945884704589844, "step": 5394 }, { "epoch": 0.7293620616814533, "grad_norm": 1.6169652938842773, "learning_rate": 5.840774551371882e-06, "loss": 0.1940155029296875, "step": 5395 }, { "epoch": 0.7294972539079003, "grad_norm": 1.0978327989578247, "learning_rate": 5.8353552134384405e-06, "loss": 0.20725250244140625, "step": 5396 }, { "epoch": 0.7296324461343473, "grad_norm": 1.4625581502914429, "learning_rate": 5.829937783632926e-06, "loss": 0.21203231811523438, "step": 5397 }, { "epoch": 0.7297676383607943, "grad_norm": 0.7802647948265076, "learning_rate": 5.824522263083256e-06, "loss": 0.1077108383178711, "step": 5398 }, { "epoch": 0.7299028305872413, "grad_norm": 1.3631001710891724, "learning_rate": 5.8191086529169855e-06, "loss": 0.14869403839111328, "step": 5399 }, { "epoch": 0.7300380228136882, "grad_norm": 0.9750107526779175, "learning_rate": 5.813696954261253e-06, "loss": 0.12857818603515625, "step": 5400 }, { "epoch": 0.7301732150401352, "grad_norm": 1.142553448677063, "learning_rate": 5.8082871682428e-06, "loss": 0.20977020263671875, "step": 5401 }, { "epoch": 0.7303084072665822, "grad_norm": 1.1021348237991333, "learning_rate": 5.802879295987975e-06, "loss": 0.18714427947998047, "step": 5402 }, { "epoch": 0.7304435994930292, "grad_norm": 1.195054292678833, "learning_rate": 5.797473338622722e-06, "loss": 0.1609203815460205, "step": 5403 }, { "epoch": 0.7305787917194762, "grad_norm": 2.185063600540161, "learning_rate": 5.792069297272599e-06, "loss": 0.20419692993164062, "step": 5404 }, { "epoch": 0.7307139839459231, "grad_norm": 0.6816750168800354, "learning_rate": 5.7866671730627485e-06, "loss": 0.1489429473876953, "step": 5405 }, { "epoch": 0.7308491761723701, "grad_norm": 1.3252195119857788, "learning_rate": 5.781266967117925e-06, "loss": 0.16443443298339844, "step": 5406 }, { "epoch": 0.7309843683988171, "grad_norm": 1.0631413459777832, "learning_rate": 5.7758686805624815e-06, "loss": 0.15423917770385742, "step": 5407 }, { "epoch": 0.7311195606252641, "grad_norm": 0.7996984124183655, "learning_rate": 5.7704723145203605e-06, "loss": 0.17241859436035156, "step": 5408 }, { "epoch": 0.731254752851711, "grad_norm": 0.981478214263916, "learning_rate": 5.765077870115126e-06, "loss": 0.16557073593139648, "step": 5409 }, { "epoch": 0.731389945078158, "grad_norm": 1.3903049230575562, "learning_rate": 5.759685348469928e-06, "loss": 0.19651222229003906, "step": 5410 }, { "epoch": 0.731525137304605, "grad_norm": 1.4133092164993286, "learning_rate": 5.754294750707514e-06, "loss": 0.17502403259277344, "step": 5411 }, { "epoch": 0.731660329531052, "grad_norm": 0.7892844080924988, "learning_rate": 5.748906077950237e-06, "loss": 0.14845561981201172, "step": 5412 }, { "epoch": 0.731795521757499, "grad_norm": 1.1618221998214722, "learning_rate": 5.743519331320042e-06, "loss": 0.20782470703125, "step": 5413 }, { "epoch": 0.7319307139839459, "grad_norm": 1.258887529373169, "learning_rate": 5.73813451193849e-06, "loss": 0.16991233825683594, "step": 5414 }, { "epoch": 0.7320659062103929, "grad_norm": 1.2043417692184448, "learning_rate": 5.7327516209267225e-06, "loss": 0.18732059001922607, "step": 5415 }, { "epoch": 0.7322010984368399, "grad_norm": 0.8529903292655945, "learning_rate": 5.727370659405486e-06, "loss": 0.14250469207763672, "step": 5416 }, { "epoch": 0.7323362906632869, "grad_norm": 1.8589668273925781, "learning_rate": 5.7219916284951265e-06, "loss": 0.1817009449005127, "step": 5417 }, { "epoch": 0.7324714828897338, "grad_norm": 1.0302187204360962, "learning_rate": 5.716614529315582e-06, "loss": 0.18629693984985352, "step": 5418 }, { "epoch": 0.7326066751161808, "grad_norm": 1.132738709449768, "learning_rate": 5.711239362986401e-06, "loss": 0.19417476654052734, "step": 5419 }, { "epoch": 0.7327418673426278, "grad_norm": 1.0355405807495117, "learning_rate": 5.705866130626719e-06, "loss": 0.13148212432861328, "step": 5420 }, { "epoch": 0.7328770595690748, "grad_norm": 1.6669100522994995, "learning_rate": 5.700494833355271e-06, "loss": 0.20334434509277344, "step": 5421 }, { "epoch": 0.7330122517955218, "grad_norm": 1.911421775817871, "learning_rate": 5.69512547229039e-06, "loss": 0.2167491912841797, "step": 5422 }, { "epoch": 0.7331474440219687, "grad_norm": 1.1529169082641602, "learning_rate": 5.689758048550001e-06, "loss": 0.16358566284179688, "step": 5423 }, { "epoch": 0.7332826362484157, "grad_norm": 1.0827101469039917, "learning_rate": 5.684392563251644e-06, "loss": 0.14029693603515625, "step": 5424 }, { "epoch": 0.7334178284748627, "grad_norm": 1.3239370584487915, "learning_rate": 5.679029017512422e-06, "loss": 0.21293163299560547, "step": 5425 }, { "epoch": 0.7335530207013097, "grad_norm": 1.7510124444961548, "learning_rate": 5.6736674124490684e-06, "loss": 0.22458267211914062, "step": 5426 }, { "epoch": 0.7336882129277567, "grad_norm": 1.5140736103057861, "learning_rate": 5.6683077491778935e-06, "loss": 0.22754859924316406, "step": 5427 }, { "epoch": 0.7338234051542036, "grad_norm": 0.621523916721344, "learning_rate": 5.6629500288148044e-06, "loss": 0.10659146308898926, "step": 5428 }, { "epoch": 0.7339585973806506, "grad_norm": 0.4150683581829071, "learning_rate": 5.657594252475319e-06, "loss": 0.08076173067092896, "step": 5429 }, { "epoch": 0.7340937896070976, "grad_norm": 1.5244630575180054, "learning_rate": 5.652240421274521e-06, "loss": 0.198591947555542, "step": 5430 }, { "epoch": 0.7342289818335446, "grad_norm": 2.136486530303955, "learning_rate": 5.646888536327121e-06, "loss": 0.21309804916381836, "step": 5431 }, { "epoch": 0.7343641740599915, "grad_norm": 1.9946231842041016, "learning_rate": 5.641538598747403e-06, "loss": 0.17765235900878906, "step": 5432 }, { "epoch": 0.7344993662864385, "grad_norm": 1.6058526039123535, "learning_rate": 5.6361906096492495e-06, "loss": 0.19843292236328125, "step": 5433 }, { "epoch": 0.7346345585128855, "grad_norm": 0.9471537470817566, "learning_rate": 5.630844570146157e-06, "loss": 0.1451871544122696, "step": 5434 }, { "epoch": 0.7347697507393325, "grad_norm": 1.1687220335006714, "learning_rate": 5.625500481351176e-06, "loss": 0.16038846969604492, "step": 5435 }, { "epoch": 0.7349049429657795, "grad_norm": 2.6340956687927246, "learning_rate": 5.6201583443769895e-06, "loss": 0.18080902099609375, "step": 5436 }, { "epoch": 0.7350401351922264, "grad_norm": 1.7166969776153564, "learning_rate": 5.614818160335857e-06, "loss": 0.15173912048339844, "step": 5437 }, { "epoch": 0.7351753274186734, "grad_norm": 1.537317156791687, "learning_rate": 5.6094799303396315e-06, "loss": 0.1852703094482422, "step": 5438 }, { "epoch": 0.7353105196451204, "grad_norm": 1.0906593799591064, "learning_rate": 5.6041436554997595e-06, "loss": 0.19263076782226562, "step": 5439 }, { "epoch": 0.7354457118715674, "grad_norm": 0.6516191363334656, "learning_rate": 5.598809336927278e-06, "loss": 0.12322711944580078, "step": 5440 }, { "epoch": 0.7355809040980144, "grad_norm": 2.866525173187256, "learning_rate": 5.5934769757328325e-06, "loss": 0.21516799926757812, "step": 5441 }, { "epoch": 0.7357160963244613, "grad_norm": 0.9940685033798218, "learning_rate": 5.588146573026642e-06, "loss": 0.19419193267822266, "step": 5442 }, { "epoch": 0.7358512885509083, "grad_norm": 1.601098656654358, "learning_rate": 5.582818129918525e-06, "loss": 0.2529716491699219, "step": 5443 }, { "epoch": 0.7359864807773553, "grad_norm": 1.7732347249984741, "learning_rate": 5.5774916475178915e-06, "loss": 0.183624267578125, "step": 5444 }, { "epoch": 0.7361216730038023, "grad_norm": 1.0748921632766724, "learning_rate": 5.572167126933738e-06, "loss": 0.1806321144104004, "step": 5445 }, { "epoch": 0.7362568652302492, "grad_norm": 1.2453570365905762, "learning_rate": 5.566844569274669e-06, "loss": 0.2239093780517578, "step": 5446 }, { "epoch": 0.7363920574566962, "grad_norm": 2.179720163345337, "learning_rate": 5.5615239756488665e-06, "loss": 0.19482421875, "step": 5447 }, { "epoch": 0.7365272496831432, "grad_norm": 2.343989610671997, "learning_rate": 5.556205347164104e-06, "loss": 0.18737125396728516, "step": 5448 }, { "epoch": 0.7366624419095902, "grad_norm": 1.1184184551239014, "learning_rate": 5.550888684927746e-06, "loss": 0.12966585159301758, "step": 5449 }, { "epoch": 0.7367976341360372, "grad_norm": 1.3188831806182861, "learning_rate": 5.545573990046752e-06, "loss": 0.18623828887939453, "step": 5450 }, { "epoch": 0.7369328263624841, "grad_norm": 0.8549461960792542, "learning_rate": 5.540261263627672e-06, "loss": 0.15744781494140625, "step": 5451 }, { "epoch": 0.7370680185889311, "grad_norm": 0.8998275399208069, "learning_rate": 5.534950506776644e-06, "loss": 0.1818866729736328, "step": 5452 }, { "epoch": 0.7372032108153781, "grad_norm": 0.7639499306678772, "learning_rate": 5.529641720599393e-06, "loss": 0.15549564361572266, "step": 5453 }, { "epoch": 0.7373384030418251, "grad_norm": 1.4306678771972656, "learning_rate": 5.52433490620124e-06, "loss": 0.1531050205230713, "step": 5454 }, { "epoch": 0.737473595268272, "grad_norm": 1.0781877040863037, "learning_rate": 5.519030064687082e-06, "loss": 0.15582275390625, "step": 5455 }, { "epoch": 0.737608787494719, "grad_norm": 1.0096534490585327, "learning_rate": 5.51372719716143e-06, "loss": 0.153289794921875, "step": 5456 }, { "epoch": 0.737743979721166, "grad_norm": 2.3367340564727783, "learning_rate": 5.508426304728363e-06, "loss": 0.20807456970214844, "step": 5457 }, { "epoch": 0.737879171947613, "grad_norm": 1.458406686782837, "learning_rate": 5.503127388491552e-06, "loss": 0.1856670379638672, "step": 5458 }, { "epoch": 0.73801436417406, "grad_norm": 1.2269357442855835, "learning_rate": 5.497830449554266e-06, "loss": 0.25836181640625, "step": 5459 }, { "epoch": 0.738149556400507, "grad_norm": 0.8984341621398926, "learning_rate": 5.492535489019344e-06, "loss": 0.15860557556152344, "step": 5460 }, { "epoch": 0.7382847486269539, "grad_norm": 1.0002944469451904, "learning_rate": 5.4872425079892454e-06, "loss": 0.20991134643554688, "step": 5461 }, { "epoch": 0.7384199408534009, "grad_norm": 1.2617703676223755, "learning_rate": 5.481951507565973e-06, "loss": 0.18722152709960938, "step": 5462 }, { "epoch": 0.7385551330798479, "grad_norm": 1.349399209022522, "learning_rate": 5.476662488851159e-06, "loss": 0.1929912567138672, "step": 5463 }, { "epoch": 0.7386903253062949, "grad_norm": 0.7708116769790649, "learning_rate": 5.471375452946e-06, "loss": 0.1670513153076172, "step": 5464 }, { "epoch": 0.7388255175327418, "grad_norm": 1.4045013189315796, "learning_rate": 5.466090400951279e-06, "loss": 0.18651437759399414, "step": 5465 }, { "epoch": 0.7389607097591888, "grad_norm": 1.3405613899230957, "learning_rate": 5.460807333967387e-06, "loss": 0.17983055114746094, "step": 5466 }, { "epoch": 0.7390959019856358, "grad_norm": 1.4084433317184448, "learning_rate": 5.455526253094267e-06, "loss": 0.19733238220214844, "step": 5467 }, { "epoch": 0.7392310942120828, "grad_norm": 1.6750537157058716, "learning_rate": 5.450247159431486e-06, "loss": 0.11149340867996216, "step": 5468 }, { "epoch": 0.7393662864385298, "grad_norm": 0.8786253333091736, "learning_rate": 5.44497005407817e-06, "loss": 0.1979541778564453, "step": 5469 }, { "epoch": 0.7395014786649767, "grad_norm": 2.5073506832122803, "learning_rate": 5.439694938133042e-06, "loss": 0.2315061092376709, "step": 5470 }, { "epoch": 0.7396366708914237, "grad_norm": 2.5064053535461426, "learning_rate": 5.434421812694409e-06, "loss": 0.1924424171447754, "step": 5471 }, { "epoch": 0.7397718631178707, "grad_norm": 0.9299335479736328, "learning_rate": 5.4291506788601624e-06, "loss": 0.13190746307373047, "step": 5472 }, { "epoch": 0.7399070553443177, "grad_norm": 1.2495826482772827, "learning_rate": 5.423881537727785e-06, "loss": 0.20519351959228516, "step": 5473 }, { "epoch": 0.7400422475707646, "grad_norm": 1.0828267335891724, "learning_rate": 5.418614390394338e-06, "loss": 0.17133331298828125, "step": 5474 }, { "epoch": 0.7401774397972116, "grad_norm": 1.8061549663543701, "learning_rate": 5.413349237956469e-06, "loss": 0.27376604080200195, "step": 5475 }, { "epoch": 0.7403126320236586, "grad_norm": 1.3505018949508667, "learning_rate": 5.4080860815104125e-06, "loss": 0.21478939056396484, "step": 5476 }, { "epoch": 0.7404478242501056, "grad_norm": 1.9907888174057007, "learning_rate": 5.402824922151977e-06, "loss": 0.2262735366821289, "step": 5477 }, { "epoch": 0.7405830164765526, "grad_norm": 1.3529293537139893, "learning_rate": 5.397565760976577e-06, "loss": 0.19457340240478516, "step": 5478 }, { "epoch": 0.7407182087029995, "grad_norm": 0.6982883810997009, "learning_rate": 5.392308599079193e-06, "loss": 0.17594337463378906, "step": 5479 }, { "epoch": 0.7408534009294465, "grad_norm": 0.89969402551651, "learning_rate": 5.3870534375543916e-06, "loss": 0.15802907943725586, "step": 5480 }, { "epoch": 0.7409885931558935, "grad_norm": 1.5381609201431274, "learning_rate": 5.381800277496328e-06, "loss": 0.20641517639160156, "step": 5481 }, { "epoch": 0.7411237853823405, "grad_norm": 1.4240254163742065, "learning_rate": 5.376549119998731e-06, "loss": 0.22413253784179688, "step": 5482 }, { "epoch": 0.7412589776087875, "grad_norm": 1.7257921695709229, "learning_rate": 5.3712999661549314e-06, "loss": 0.22533750534057617, "step": 5483 }, { "epoch": 0.7413941698352344, "grad_norm": 2.5512783527374268, "learning_rate": 5.366052817057826e-06, "loss": 0.20793819427490234, "step": 5484 }, { "epoch": 0.7415293620616814, "grad_norm": 1.4617986679077148, "learning_rate": 5.360807673799899e-06, "loss": 0.1730213165283203, "step": 5485 }, { "epoch": 0.7416645542881284, "grad_norm": 1.292579174041748, "learning_rate": 5.355564537473214e-06, "loss": 0.13782978057861328, "step": 5486 }, { "epoch": 0.7417997465145754, "grad_norm": 0.9110461473464966, "learning_rate": 5.35032340916942e-06, "loss": 0.18610572814941406, "step": 5487 }, { "epoch": 0.7419349387410223, "grad_norm": 1.025336742401123, "learning_rate": 5.345084289979755e-06, "loss": 0.16460609436035156, "step": 5488 }, { "epoch": 0.7420701309674693, "grad_norm": 0.8170027732849121, "learning_rate": 5.339847180995026e-06, "loss": 0.10986661911010742, "step": 5489 }, { "epoch": 0.7422053231939163, "grad_norm": 1.35186767578125, "learning_rate": 5.33461208330563e-06, "loss": 0.16333770751953125, "step": 5490 }, { "epoch": 0.7423405154203633, "grad_norm": 1.5800249576568604, "learning_rate": 5.32937899800154e-06, "loss": 0.18089580535888672, "step": 5491 }, { "epoch": 0.7424757076468103, "grad_norm": 1.617856502532959, "learning_rate": 5.324147926172307e-06, "loss": 0.16485190391540527, "step": 5492 }, { "epoch": 0.7426108998732572, "grad_norm": 1.5219545364379883, "learning_rate": 5.318918868907084e-06, "loss": 0.1864166259765625, "step": 5493 }, { "epoch": 0.7427460920997042, "grad_norm": 0.9913140535354614, "learning_rate": 5.313691827294568e-06, "loss": 0.1461009979248047, "step": 5494 }, { "epoch": 0.7428812843261512, "grad_norm": 1.6396845579147339, "learning_rate": 5.308466802423072e-06, "loss": 0.1610097885131836, "step": 5495 }, { "epoch": 0.7430164765525982, "grad_norm": 1.3406883478164673, "learning_rate": 5.303243795380471e-06, "loss": 0.18573760986328125, "step": 5496 }, { "epoch": 0.7431516687790453, "grad_norm": 1.2215561866760254, "learning_rate": 5.298022807254215e-06, "loss": 0.1607666015625, "step": 5497 }, { "epoch": 0.7432868610054922, "grad_norm": 0.896152675151825, "learning_rate": 5.292803839131358e-06, "loss": 0.12671756744384766, "step": 5498 }, { "epoch": 0.7434220532319392, "grad_norm": 1.4494041204452515, "learning_rate": 5.287586892098496e-06, "loss": 0.179473876953125, "step": 5499 }, { "epoch": 0.7435572454583862, "grad_norm": 1.46336829662323, "learning_rate": 5.282371967241842e-06, "loss": 0.2173004150390625, "step": 5500 }, { "epoch": 0.7436924376848332, "grad_norm": 0.8535895347595215, "learning_rate": 5.277159065647164e-06, "loss": 0.15633583068847656, "step": 5501 }, { "epoch": 0.7438276299112802, "grad_norm": 1.6958073377609253, "learning_rate": 5.271948188399814e-06, "loss": 0.2004108428955078, "step": 5502 }, { "epoch": 0.7439628221377271, "grad_norm": 1.1025596857070923, "learning_rate": 5.266739336584735e-06, "loss": 0.18210983276367188, "step": 5503 }, { "epoch": 0.7440980143641741, "grad_norm": 0.9344438910484314, "learning_rate": 5.261532511286422e-06, "loss": 0.16583919525146484, "step": 5504 }, { "epoch": 0.7442332065906211, "grad_norm": 1.0008279085159302, "learning_rate": 5.256327713588977e-06, "loss": 0.15412235260009766, "step": 5505 }, { "epoch": 0.7443683988170681, "grad_norm": 2.138267755508423, "learning_rate": 5.25112494457606e-06, "loss": 0.2547874450683594, "step": 5506 }, { "epoch": 0.744503591043515, "grad_norm": 1.4364880323410034, "learning_rate": 5.245924205330919e-06, "loss": 0.21915864944458008, "step": 5507 }, { "epoch": 0.744638783269962, "grad_norm": 1.0808571577072144, "learning_rate": 5.240725496936373e-06, "loss": 0.17722320556640625, "step": 5508 }, { "epoch": 0.744773975496409, "grad_norm": 0.8131818771362305, "learning_rate": 5.2355288204748145e-06, "loss": 0.1521282196044922, "step": 5509 }, { "epoch": 0.744909167722856, "grad_norm": 1.7834761142730713, "learning_rate": 5.230334177028233e-06, "loss": 0.1450948715209961, "step": 5510 }, { "epoch": 0.745044359949303, "grad_norm": 1.3590015172958374, "learning_rate": 5.2251415676781726e-06, "loss": 0.18535304069519043, "step": 5511 }, { "epoch": 0.7451795521757499, "grad_norm": 0.8459165692329407, "learning_rate": 5.2199509935057655e-06, "loss": 0.1409931182861328, "step": 5512 }, { "epoch": 0.7453147444021969, "grad_norm": 0.848420262336731, "learning_rate": 5.214762455591713e-06, "loss": 0.11742591857910156, "step": 5513 }, { "epoch": 0.7454499366286439, "grad_norm": 1.4901010990142822, "learning_rate": 5.209575955016295e-06, "loss": 0.20101547241210938, "step": 5514 }, { "epoch": 0.7455851288550909, "grad_norm": 1.5747272968292236, "learning_rate": 5.204391492859377e-06, "loss": 0.17344218492507935, "step": 5515 }, { "epoch": 0.7457203210815379, "grad_norm": 1.7182445526123047, "learning_rate": 5.199209070200388e-06, "loss": 0.19905948638916016, "step": 5516 }, { "epoch": 0.7458555133079848, "grad_norm": 0.8937649130821228, "learning_rate": 5.194028688118332e-06, "loss": 0.15001583099365234, "step": 5517 }, { "epoch": 0.7459907055344318, "grad_norm": 0.9096662402153015, "learning_rate": 5.188850347691797e-06, "loss": 0.14841651916503906, "step": 5518 }, { "epoch": 0.7461258977608788, "grad_norm": 0.9075912833213806, "learning_rate": 5.183674049998934e-06, "loss": 0.15425682067871094, "step": 5519 }, { "epoch": 0.7462610899873258, "grad_norm": 1.0316675901412964, "learning_rate": 5.178499796117485e-06, "loss": 0.15800857543945312, "step": 5520 }, { "epoch": 0.7463962822137727, "grad_norm": 1.031766414642334, "learning_rate": 5.173327587124753e-06, "loss": 0.1940668821334839, "step": 5521 }, { "epoch": 0.7465314744402197, "grad_norm": 1.390950083732605, "learning_rate": 5.16815742409762e-06, "loss": 0.20208454132080078, "step": 5522 }, { "epoch": 0.7466666666666667, "grad_norm": 0.7603386640548706, "learning_rate": 5.16298930811254e-06, "loss": 0.17708683013916016, "step": 5523 }, { "epoch": 0.7468018588931137, "grad_norm": 0.8726809620857239, "learning_rate": 5.15782324024554e-06, "loss": 0.1359729766845703, "step": 5524 }, { "epoch": 0.7469370511195607, "grad_norm": 1.8092880249023438, "learning_rate": 5.152659221572231e-06, "loss": 0.19325733184814453, "step": 5525 }, { "epoch": 0.7470722433460076, "grad_norm": 1.0619183778762817, "learning_rate": 5.147497253167784e-06, "loss": 0.16878509521484375, "step": 5526 }, { "epoch": 0.7472074355724546, "grad_norm": 1.340755820274353, "learning_rate": 5.142337336106948e-06, "loss": 0.23514556884765625, "step": 5527 }, { "epoch": 0.7473426277989016, "grad_norm": 1.4047523736953735, "learning_rate": 5.137179471464047e-06, "loss": 0.19392192363739014, "step": 5528 }, { "epoch": 0.7474778200253486, "grad_norm": 0.9587230682373047, "learning_rate": 5.13202366031297e-06, "loss": 0.15888595581054688, "step": 5529 }, { "epoch": 0.7476130122517956, "grad_norm": 1.0565791130065918, "learning_rate": 5.1268699037272e-06, "loss": 0.17792701721191406, "step": 5530 }, { "epoch": 0.7477482044782425, "grad_norm": 1.264702320098877, "learning_rate": 5.121718202779756e-06, "loss": 0.18007469177246094, "step": 5531 }, { "epoch": 0.7478833967046895, "grad_norm": 1.278017520904541, "learning_rate": 5.116568558543264e-06, "loss": 0.18833351135253906, "step": 5532 }, { "epoch": 0.7480185889311365, "grad_norm": 1.3293447494506836, "learning_rate": 5.1114209720899025e-06, "loss": 0.1842479705810547, "step": 5533 }, { "epoch": 0.7481537811575835, "grad_norm": 0.468590646982193, "learning_rate": 5.106275444491423e-06, "loss": 0.06927096843719482, "step": 5534 }, { "epoch": 0.7482889733840304, "grad_norm": 1.4397578239440918, "learning_rate": 5.101131976819165e-06, "loss": 0.2094583511352539, "step": 5535 }, { "epoch": 0.7484241656104774, "grad_norm": 1.2509934902191162, "learning_rate": 5.095990570144008e-06, "loss": 0.17056751251220703, "step": 5536 }, { "epoch": 0.7485593578369244, "grad_norm": 0.981372594833374, "learning_rate": 5.090851225536432e-06, "loss": 0.1465930938720703, "step": 5537 }, { "epoch": 0.7486945500633714, "grad_norm": 0.8076090216636658, "learning_rate": 5.085713944066474e-06, "loss": 0.1373729705810547, "step": 5538 }, { "epoch": 0.7488297422898184, "grad_norm": 1.0660492181777954, "learning_rate": 5.080578726803741e-06, "loss": 0.15929794311523438, "step": 5539 }, { "epoch": 0.7489649345162653, "grad_norm": 1.3466428518295288, "learning_rate": 5.075445574817415e-06, "loss": 0.19535255432128906, "step": 5540 }, { "epoch": 0.7491001267427123, "grad_norm": 1.931800127029419, "learning_rate": 5.07031448917624e-06, "loss": 0.1823577880859375, "step": 5541 }, { "epoch": 0.7492353189691593, "grad_norm": 1.2878483533859253, "learning_rate": 5.065185470948544e-06, "loss": 0.21713924407958984, "step": 5542 }, { "epoch": 0.7493705111956063, "grad_norm": 1.5037792921066284, "learning_rate": 5.060058521202211e-06, "loss": 0.18640804290771484, "step": 5543 }, { "epoch": 0.7495057034220532, "grad_norm": 1.9052491188049316, "learning_rate": 5.054933641004703e-06, "loss": 0.19440412521362305, "step": 5544 }, { "epoch": 0.7496408956485002, "grad_norm": 1.7354813814163208, "learning_rate": 5.0498108314230425e-06, "loss": 0.153099924325943, "step": 5545 }, { "epoch": 0.7497760878749472, "grad_norm": 0.8646870851516724, "learning_rate": 5.044690093523823e-06, "loss": 0.1392502784729004, "step": 5546 }, { "epoch": 0.7499112801013942, "grad_norm": 1.132386326789856, "learning_rate": 5.039571428373219e-06, "loss": 0.1456890106201172, "step": 5547 }, { "epoch": 0.7500464723278412, "grad_norm": 1.4150546789169312, "learning_rate": 5.034454837036959e-06, "loss": 0.13692450523376465, "step": 5548 }, { "epoch": 0.7501816645542881, "grad_norm": 1.4570070505142212, "learning_rate": 5.0293403205803455e-06, "loss": 0.18346023559570312, "step": 5549 }, { "epoch": 0.7503168567807351, "grad_norm": 1.0183249711990356, "learning_rate": 5.024227880068247e-06, "loss": 0.14780044555664062, "step": 5550 }, { "epoch": 0.7504520490071821, "grad_norm": 0.8708744049072266, "learning_rate": 5.019117516565096e-06, "loss": 0.13231420516967773, "step": 5551 }, { "epoch": 0.7505872412336291, "grad_norm": 0.8811061978340149, "learning_rate": 5.014009231134908e-06, "loss": 0.15431594848632812, "step": 5552 }, { "epoch": 0.750722433460076, "grad_norm": 1.0152461528778076, "learning_rate": 5.008903024841248e-06, "loss": 0.18190956115722656, "step": 5553 }, { "epoch": 0.750857625686523, "grad_norm": 0.8767868876457214, "learning_rate": 5.0037988987472595e-06, "loss": 0.18207740783691406, "step": 5554 }, { "epoch": 0.75099281791297, "grad_norm": 1.1492546796798706, "learning_rate": 4.998696853915646e-06, "loss": 0.16100740432739258, "step": 5555 }, { "epoch": 0.751128010139417, "grad_norm": 1.1635277271270752, "learning_rate": 4.993596891408676e-06, "loss": 0.17439889907836914, "step": 5556 }, { "epoch": 0.751263202365864, "grad_norm": 1.0445656776428223, "learning_rate": 4.988499012288198e-06, "loss": 0.15757465362548828, "step": 5557 }, { "epoch": 0.751398394592311, "grad_norm": 0.5957070589065552, "learning_rate": 4.983403217615614e-06, "loss": 0.11692547798156738, "step": 5558 }, { "epoch": 0.7515335868187579, "grad_norm": 1.072064757347107, "learning_rate": 4.978309508451896e-06, "loss": 0.17076969146728516, "step": 5559 }, { "epoch": 0.7516687790452049, "grad_norm": 1.1318572759628296, "learning_rate": 4.973217885857578e-06, "loss": 0.15820837020874023, "step": 5560 }, { "epoch": 0.7518039712716519, "grad_norm": 1.1528007984161377, "learning_rate": 4.968128350892763e-06, "loss": 0.1442272663116455, "step": 5561 }, { "epoch": 0.7519391634980989, "grad_norm": 1.620975136756897, "learning_rate": 4.963040904617131e-06, "loss": 0.21622848510742188, "step": 5562 }, { "epoch": 0.7520743557245458, "grad_norm": 1.2600561380386353, "learning_rate": 4.9579555480898955e-06, "loss": 0.14247512817382812, "step": 5563 }, { "epoch": 0.7522095479509928, "grad_norm": 1.1414223909378052, "learning_rate": 4.952872282369873e-06, "loss": 0.14894485473632812, "step": 5564 }, { "epoch": 0.7523447401774398, "grad_norm": 0.5991482138633728, "learning_rate": 4.947791108515417e-06, "loss": 0.12276840209960938, "step": 5565 }, { "epoch": 0.7524799324038868, "grad_norm": 0.7365414500236511, "learning_rate": 4.942712027584453e-06, "loss": 0.14475154876708984, "step": 5566 }, { "epoch": 0.7526151246303338, "grad_norm": 1.5363119840621948, "learning_rate": 4.937635040634485e-06, "loss": 0.21039390563964844, "step": 5567 }, { "epoch": 0.7527503168567807, "grad_norm": 0.965306282043457, "learning_rate": 4.9325601487225545e-06, "loss": 0.14312076568603516, "step": 5568 }, { "epoch": 0.7528855090832277, "grad_norm": 2.654219150543213, "learning_rate": 4.927487352905289e-06, "loss": 0.22712326049804688, "step": 5569 }, { "epoch": 0.7530207013096747, "grad_norm": 1.0947959423065186, "learning_rate": 4.92241665423887e-06, "loss": 0.16634368896484375, "step": 5570 }, { "epoch": 0.7531558935361217, "grad_norm": 1.4040368795394897, "learning_rate": 4.917348053779039e-06, "loss": 0.15804576873779297, "step": 5571 }, { "epoch": 0.7532910857625686, "grad_norm": 1.2418389320373535, "learning_rate": 4.912281552581122e-06, "loss": 0.17496204376220703, "step": 5572 }, { "epoch": 0.7534262779890156, "grad_norm": 0.7636131048202515, "learning_rate": 4.907217151699969e-06, "loss": 0.1504077911376953, "step": 5573 }, { "epoch": 0.7535614702154626, "grad_norm": 0.887273907661438, "learning_rate": 4.9021548521900305e-06, "loss": 0.13809704780578613, "step": 5574 }, { "epoch": 0.7536966624419096, "grad_norm": 2.542372703552246, "learning_rate": 4.8970946551053005e-06, "loss": 0.23099708557128906, "step": 5575 }, { "epoch": 0.7538318546683566, "grad_norm": 1.084800124168396, "learning_rate": 4.892036561499339e-06, "loss": 0.14268946647644043, "step": 5576 }, { "epoch": 0.7539670468948035, "grad_norm": 0.9548205733299255, "learning_rate": 4.8869805724252675e-06, "loss": 0.18102121353149414, "step": 5577 }, { "epoch": 0.7541022391212505, "grad_norm": 1.2947176694869995, "learning_rate": 4.8819266889357665e-06, "loss": 0.19460105895996094, "step": 5578 }, { "epoch": 0.7542374313476975, "grad_norm": 1.579666256904602, "learning_rate": 4.876874912083088e-06, "loss": 0.18504762649536133, "step": 5579 }, { "epoch": 0.7543726235741445, "grad_norm": 1.6235867738723755, "learning_rate": 4.871825242919037e-06, "loss": 0.24318885803222656, "step": 5580 }, { "epoch": 0.7545078158005915, "grad_norm": 1.0481802225112915, "learning_rate": 4.866777682494978e-06, "loss": 0.2065591812133789, "step": 5581 }, { "epoch": 0.7546430080270384, "grad_norm": 1.6847590208053589, "learning_rate": 4.861732231861845e-06, "loss": 0.2007908821105957, "step": 5582 }, { "epoch": 0.7547782002534854, "grad_norm": 0.75111323595047, "learning_rate": 4.85668889207012e-06, "loss": 0.1385669708251953, "step": 5583 }, { "epoch": 0.7549133924799324, "grad_norm": 1.0754163265228271, "learning_rate": 4.851647664169862e-06, "loss": 0.17052078247070312, "step": 5584 }, { "epoch": 0.7550485847063794, "grad_norm": 1.0119633674621582, "learning_rate": 4.846608549210679e-06, "loss": 0.17748618125915527, "step": 5585 }, { "epoch": 0.7551837769328263, "grad_norm": 0.653028130531311, "learning_rate": 4.841571548241741e-06, "loss": 0.11672306060791016, "step": 5586 }, { "epoch": 0.7553189691592733, "grad_norm": 1.180534839630127, "learning_rate": 4.836536662311777e-06, "loss": 0.2061138153076172, "step": 5587 }, { "epoch": 0.7554541613857203, "grad_norm": 1.6200429201126099, "learning_rate": 4.8315038924690745e-06, "loss": 0.2112889289855957, "step": 5588 }, { "epoch": 0.7555893536121673, "grad_norm": 1.1325408220291138, "learning_rate": 4.82647323976149e-06, "loss": 0.20076751708984375, "step": 5589 }, { "epoch": 0.7557245458386143, "grad_norm": 0.7634033560752869, "learning_rate": 4.821444705236429e-06, "loss": 0.16547656059265137, "step": 5590 }, { "epoch": 0.7558597380650612, "grad_norm": 0.8454173803329468, "learning_rate": 4.81641828994086e-06, "loss": 0.15250205993652344, "step": 5591 }, { "epoch": 0.7559949302915082, "grad_norm": 1.1030910015106201, "learning_rate": 4.811393994921308e-06, "loss": 0.17107868194580078, "step": 5592 }, { "epoch": 0.7561301225179552, "grad_norm": 1.185733437538147, "learning_rate": 4.806371821223854e-06, "loss": 0.18511343002319336, "step": 5593 }, { "epoch": 0.7562653147444022, "grad_norm": 0.7229312062263489, "learning_rate": 4.801351769894151e-06, "loss": 0.14079052209854126, "step": 5594 }, { "epoch": 0.7564005069708492, "grad_norm": 1.2574173212051392, "learning_rate": 4.796333841977394e-06, "loss": 0.15498089790344238, "step": 5595 }, { "epoch": 0.7565356991972961, "grad_norm": 1.4316736459732056, "learning_rate": 4.791318038518345e-06, "loss": 0.17536544799804688, "step": 5596 }, { "epoch": 0.7566708914237431, "grad_norm": 0.9596560001373291, "learning_rate": 4.7863043605613185e-06, "loss": 0.16167068481445312, "step": 5597 }, { "epoch": 0.7568060836501901, "grad_norm": 1.0597124099731445, "learning_rate": 4.7812928091501865e-06, "loss": 0.16962623596191406, "step": 5598 }, { "epoch": 0.7569412758766371, "grad_norm": 0.8384588956832886, "learning_rate": 4.7762833853283935e-06, "loss": 0.12042665481567383, "step": 5599 }, { "epoch": 0.757076468103084, "grad_norm": 1.059099555015564, "learning_rate": 4.77127609013891e-06, "loss": 0.1955275535583496, "step": 5600 }, { "epoch": 0.757211660329531, "grad_norm": 0.9685809016227722, "learning_rate": 4.766270924624295e-06, "loss": 0.2124786376953125, "step": 5601 }, { "epoch": 0.757346852555978, "grad_norm": 1.4474679231643677, "learning_rate": 4.761267889826647e-06, "loss": 0.2092428207397461, "step": 5602 }, { "epoch": 0.757482044782425, "grad_norm": 1.664612054824829, "learning_rate": 4.756266986787619e-06, "loss": 0.21875, "step": 5603 }, { "epoch": 0.757617237008872, "grad_norm": 0.8639447689056396, "learning_rate": 4.751268216548439e-06, "loss": 0.14753055572509766, "step": 5604 }, { "epoch": 0.7577524292353189, "grad_norm": 0.5144842863082886, "learning_rate": 4.746271580149861e-06, "loss": 0.11172723770141602, "step": 5605 }, { "epoch": 0.7578876214617659, "grad_norm": 1.1795637607574463, "learning_rate": 4.7412770786322244e-06, "loss": 0.1476426124572754, "step": 5606 }, { "epoch": 0.7580228136882129, "grad_norm": 0.9664493799209595, "learning_rate": 4.736284713035406e-06, "loss": 0.14348816871643066, "step": 5607 }, { "epoch": 0.7581580059146599, "grad_norm": 1.530418038368225, "learning_rate": 4.731294484398843e-06, "loss": 0.16219043731689453, "step": 5608 }, { "epoch": 0.7582931981411069, "grad_norm": 0.9234780073165894, "learning_rate": 4.726306393761526e-06, "loss": 0.13222122192382812, "step": 5609 }, { "epoch": 0.7584283903675538, "grad_norm": 0.952521562576294, "learning_rate": 4.721320442162001e-06, "loss": 0.19065523147583008, "step": 5610 }, { "epoch": 0.7585635825940008, "grad_norm": 1.0002142190933228, "learning_rate": 4.716336630638378e-06, "loss": 0.1819133758544922, "step": 5611 }, { "epoch": 0.7586987748204478, "grad_norm": 1.343812108039856, "learning_rate": 4.711354960228306e-06, "loss": 0.201995849609375, "step": 5612 }, { "epoch": 0.7588339670468948, "grad_norm": 1.2567311525344849, "learning_rate": 4.706375431968998e-06, "loss": 0.1607494354248047, "step": 5613 }, { "epoch": 0.7589691592733417, "grad_norm": 1.004839539527893, "learning_rate": 4.701398046897218e-06, "loss": 0.17444348335266113, "step": 5614 }, { "epoch": 0.7591043514997887, "grad_norm": 1.7555283308029175, "learning_rate": 4.696422806049277e-06, "loss": 0.20669078826904297, "step": 5615 }, { "epoch": 0.7592395437262357, "grad_norm": 1.1199944019317627, "learning_rate": 4.69144971046106e-06, "loss": 0.15883255004882812, "step": 5616 }, { "epoch": 0.7593747359526827, "grad_norm": 1.5909534692764282, "learning_rate": 4.686478761167984e-06, "loss": 0.1797027587890625, "step": 5617 }, { "epoch": 0.7595099281791297, "grad_norm": 1.7642052173614502, "learning_rate": 4.681509959205028e-06, "loss": 0.2161731719970703, "step": 5618 }, { "epoch": 0.7596451204055766, "grad_norm": 0.7950900197029114, "learning_rate": 4.676543305606724e-06, "loss": 0.15172624588012695, "step": 5619 }, { "epoch": 0.7597803126320236, "grad_norm": 1.3509894609451294, "learning_rate": 4.67157880140715e-06, "loss": 0.18468694388866425, "step": 5620 }, { "epoch": 0.7599155048584706, "grad_norm": 1.6210018396377563, "learning_rate": 4.666616447639952e-06, "loss": 0.1890707015991211, "step": 5621 }, { "epoch": 0.7600506970849176, "grad_norm": 0.9629420042037964, "learning_rate": 4.661656245338314e-06, "loss": 0.14961004257202148, "step": 5622 }, { "epoch": 0.7601858893113645, "grad_norm": 0.8086307644844055, "learning_rate": 4.656698195534978e-06, "loss": 0.14900684356689453, "step": 5623 }, { "epoch": 0.7603210815378115, "grad_norm": 1.3364981412887573, "learning_rate": 4.651742299262233e-06, "loss": 0.1511223316192627, "step": 5624 }, { "epoch": 0.7604562737642585, "grad_norm": 0.7157567143440247, "learning_rate": 4.646788557551921e-06, "loss": 0.13143327832221985, "step": 5625 }, { "epoch": 0.7605914659907055, "grad_norm": 1.0600775480270386, "learning_rate": 4.641836971435445e-06, "loss": 0.18670892715454102, "step": 5626 }, { "epoch": 0.7607266582171525, "grad_norm": 1.2083684206008911, "learning_rate": 4.63688754194375e-06, "loss": 0.1737537384033203, "step": 5627 }, { "epoch": 0.7608618504435994, "grad_norm": 1.340206503868103, "learning_rate": 4.6319402701073295e-06, "loss": 0.18640708923339844, "step": 5628 }, { "epoch": 0.7609970426700464, "grad_norm": 2.08260440826416, "learning_rate": 4.6269951569562355e-06, "loss": 0.1842184066772461, "step": 5629 }, { "epoch": 0.7611322348964934, "grad_norm": 1.2912315130233765, "learning_rate": 4.622052203520061e-06, "loss": 0.17406892776489258, "step": 5630 }, { "epoch": 0.7612674271229404, "grad_norm": 0.8089340925216675, "learning_rate": 4.617111410827968e-06, "loss": 0.18362855911254883, "step": 5631 }, { "epoch": 0.7614026193493874, "grad_norm": 0.9377435445785522, "learning_rate": 4.612172779908639e-06, "loss": 0.14360904693603516, "step": 5632 }, { "epoch": 0.7615378115758344, "grad_norm": 1.583442211151123, "learning_rate": 4.607236311790335e-06, "loss": 0.2171487808227539, "step": 5633 }, { "epoch": 0.7616730038022814, "grad_norm": 1.7319791316986084, "learning_rate": 4.602302007500854e-06, "loss": 0.1705636978149414, "step": 5634 }, { "epoch": 0.7618081960287284, "grad_norm": 0.9414796233177185, "learning_rate": 4.597369868067537e-06, "loss": 0.14108514785766602, "step": 5635 }, { "epoch": 0.7619433882551754, "grad_norm": 0.777948260307312, "learning_rate": 4.592439894517296e-06, "loss": 0.14987754821777344, "step": 5636 }, { "epoch": 0.7620785804816224, "grad_norm": 1.052364468574524, "learning_rate": 4.587512087876559e-06, "loss": 0.15215209126472473, "step": 5637 }, { "epoch": 0.7622137727080693, "grad_norm": 0.7150318622589111, "learning_rate": 4.582586449171336e-06, "loss": 0.12601280212402344, "step": 5638 }, { "epoch": 0.7623489649345163, "grad_norm": 0.9592683911323547, "learning_rate": 4.577662979427168e-06, "loss": 0.14050960540771484, "step": 5639 }, { "epoch": 0.7624841571609633, "grad_norm": 1.3156485557556152, "learning_rate": 4.572741679669147e-06, "loss": 0.23079752922058105, "step": 5640 }, { "epoch": 0.7626193493874103, "grad_norm": 0.8563727140426636, "learning_rate": 4.567822550921912e-06, "loss": 0.1833357810974121, "step": 5641 }, { "epoch": 0.7627545416138573, "grad_norm": 1.1649134159088135, "learning_rate": 4.562905594209647e-06, "loss": 0.15782999992370605, "step": 5642 }, { "epoch": 0.7628897338403042, "grad_norm": 1.074506163597107, "learning_rate": 4.557990810556102e-06, "loss": 0.20490646362304688, "step": 5643 }, { "epoch": 0.7630249260667512, "grad_norm": 0.7917906641960144, "learning_rate": 4.553078200984553e-06, "loss": 0.18158650398254395, "step": 5644 }, { "epoch": 0.7631601182931982, "grad_norm": 1.0676697492599487, "learning_rate": 4.548167766517832e-06, "loss": 0.18944549560546875, "step": 5645 }, { "epoch": 0.7632953105196452, "grad_norm": 0.7175119519233704, "learning_rate": 4.543259508178318e-06, "loss": 0.15016746520996094, "step": 5646 }, { "epoch": 0.7634305027460921, "grad_norm": 1.0355457067489624, "learning_rate": 4.538353426987931e-06, "loss": 0.16494178771972656, "step": 5647 }, { "epoch": 0.7635656949725391, "grad_norm": 1.1480623483657837, "learning_rate": 4.533449523968154e-06, "loss": 0.1848607063293457, "step": 5648 }, { "epoch": 0.7637008871989861, "grad_norm": 0.9228355884552002, "learning_rate": 4.528547800140001e-06, "loss": 0.11853981018066406, "step": 5649 }, { "epoch": 0.7638360794254331, "grad_norm": 1.0169438123703003, "learning_rate": 4.523648256524037e-06, "loss": 0.15368366241455078, "step": 5650 }, { "epoch": 0.7639712716518801, "grad_norm": 1.1107758283615112, "learning_rate": 4.518750894140372e-06, "loss": 0.21843719482421875, "step": 5651 }, { "epoch": 0.764106463878327, "grad_norm": 2.2256572246551514, "learning_rate": 4.513855714008659e-06, "loss": 0.24049663543701172, "step": 5652 }, { "epoch": 0.764241656104774, "grad_norm": 1.0370551347732544, "learning_rate": 4.508962717148111e-06, "loss": 0.15035676956176758, "step": 5653 }, { "epoch": 0.764376848331221, "grad_norm": 0.9271912574768066, "learning_rate": 4.504071904577469e-06, "loss": 0.1890249252319336, "step": 5654 }, { "epoch": 0.764512040557668, "grad_norm": 1.1165226697921753, "learning_rate": 4.499183277315027e-06, "loss": 0.11548709869384766, "step": 5655 }, { "epoch": 0.764647232784115, "grad_norm": 0.9335180521011353, "learning_rate": 4.494296836378625e-06, "loss": 0.13154125213623047, "step": 5656 }, { "epoch": 0.7647824250105619, "grad_norm": 1.8017005920410156, "learning_rate": 4.4894125827856415e-06, "loss": 0.16046714782714844, "step": 5657 }, { "epoch": 0.7649176172370089, "grad_norm": 0.7010825276374817, "learning_rate": 4.4845305175530105e-06, "loss": 0.1520557403564453, "step": 5658 }, { "epoch": 0.7650528094634559, "grad_norm": 2.0297791957855225, "learning_rate": 4.479650641697201e-06, "loss": 0.21923065185546875, "step": 5659 }, { "epoch": 0.7651880016899029, "grad_norm": 1.4055614471435547, "learning_rate": 4.4747729562342305e-06, "loss": 0.18424701690673828, "step": 5660 }, { "epoch": 0.7653231939163498, "grad_norm": 0.8998335003852844, "learning_rate": 4.469897462179656e-06, "loss": 0.16823577880859375, "step": 5661 }, { "epoch": 0.7654583861427968, "grad_norm": 1.0070158243179321, "learning_rate": 4.46502416054858e-06, "loss": 0.1489400863647461, "step": 5662 }, { "epoch": 0.7655935783692438, "grad_norm": 0.9272902011871338, "learning_rate": 4.460153052355663e-06, "loss": 0.16038799285888672, "step": 5663 }, { "epoch": 0.7657287705956908, "grad_norm": 1.0019853115081787, "learning_rate": 4.455284138615074e-06, "loss": 0.14382219314575195, "step": 5664 }, { "epoch": 0.7658639628221378, "grad_norm": 1.6276488304138184, "learning_rate": 4.4504174203405656e-06, "loss": 0.17556190490722656, "step": 5665 }, { "epoch": 0.7659991550485847, "grad_norm": 0.9283202290534973, "learning_rate": 4.445552898545407e-06, "loss": 0.1423492431640625, "step": 5666 }, { "epoch": 0.7661343472750317, "grad_norm": 0.8269470930099487, "learning_rate": 4.440690574242413e-06, "loss": 0.17202186584472656, "step": 5667 }, { "epoch": 0.7662695395014787, "grad_norm": 0.8893545269966125, "learning_rate": 4.435830448443961e-06, "loss": 0.1474905014038086, "step": 5668 }, { "epoch": 0.7664047317279257, "grad_norm": 1.6766085624694824, "learning_rate": 4.430972522161934e-06, "loss": 0.2121429443359375, "step": 5669 }, { "epoch": 0.7665399239543726, "grad_norm": 1.9850035905838013, "learning_rate": 4.426116796407794e-06, "loss": 0.1836223602294922, "step": 5670 }, { "epoch": 0.7666751161808196, "grad_norm": 1.414304494857788, "learning_rate": 4.421263272192523e-06, "loss": 0.21085453033447266, "step": 5671 }, { "epoch": 0.7668103084072666, "grad_norm": 1.202636480331421, "learning_rate": 4.416411950526648e-06, "loss": 0.1845874786376953, "step": 5672 }, { "epoch": 0.7669455006337136, "grad_norm": 0.9374517798423767, "learning_rate": 4.411562832420252e-06, "loss": 0.13102245330810547, "step": 5673 }, { "epoch": 0.7670806928601606, "grad_norm": 1.0864344835281372, "learning_rate": 4.406715918882929e-06, "loss": 0.20935440063476562, "step": 5674 }, { "epoch": 0.7672158850866075, "grad_norm": 1.1287201642990112, "learning_rate": 4.4018712109238475e-06, "loss": 0.1835486888885498, "step": 5675 }, { "epoch": 0.7673510773130545, "grad_norm": 0.9374114871025085, "learning_rate": 4.3970287095516965e-06, "loss": 0.1298377513885498, "step": 5676 }, { "epoch": 0.7674862695395015, "grad_norm": 0.9655289053916931, "learning_rate": 4.39218841577471e-06, "loss": 0.18006229400634766, "step": 5677 }, { "epoch": 0.7676214617659485, "grad_norm": 2.1124372482299805, "learning_rate": 4.387350330600662e-06, "loss": 0.20349884033203125, "step": 5678 }, { "epoch": 0.7677566539923955, "grad_norm": 0.9207296371459961, "learning_rate": 4.382514455036864e-06, "loss": 0.1527242660522461, "step": 5679 }, { "epoch": 0.7678918462188424, "grad_norm": 0.9362502694129944, "learning_rate": 4.377680790090182e-06, "loss": 0.1410350799560547, "step": 5680 }, { "epoch": 0.7680270384452894, "grad_norm": 1.3551695346832275, "learning_rate": 4.372849336767004e-06, "loss": 0.20341205596923828, "step": 5681 }, { "epoch": 0.7681622306717364, "grad_norm": 1.017533302307129, "learning_rate": 4.3680200960732645e-06, "loss": 0.19510364532470703, "step": 5682 }, { "epoch": 0.7682974228981834, "grad_norm": 1.1069456338882446, "learning_rate": 4.363193069014439e-06, "loss": 0.1566905975341797, "step": 5683 }, { "epoch": 0.7684326151246303, "grad_norm": 1.5834105014801025, "learning_rate": 4.3583682565955325e-06, "loss": 0.26218605041503906, "step": 5684 }, { "epoch": 0.7685678073510773, "grad_norm": 1.1706461906433105, "learning_rate": 4.3535456598211074e-06, "loss": 0.15036487579345703, "step": 5685 }, { "epoch": 0.7687029995775243, "grad_norm": 1.4391405582427979, "learning_rate": 4.348725279695251e-06, "loss": 0.22117233276367188, "step": 5686 }, { "epoch": 0.7688381918039713, "grad_norm": 0.9555748701095581, "learning_rate": 4.343907117221591e-06, "loss": 0.11361606419086456, "step": 5687 }, { "epoch": 0.7689733840304183, "grad_norm": 0.9524842500686646, "learning_rate": 4.339091173403294e-06, "loss": 0.17129135131835938, "step": 5688 }, { "epoch": 0.7691085762568652, "grad_norm": 1.2796114683151245, "learning_rate": 4.334277449243061e-06, "loss": 0.1484670639038086, "step": 5689 }, { "epoch": 0.7692437684833122, "grad_norm": 1.1945613622665405, "learning_rate": 4.329465945743144e-06, "loss": 0.18136310577392578, "step": 5690 }, { "epoch": 0.7693789607097592, "grad_norm": 1.8413891792297363, "learning_rate": 4.32465666390532e-06, "loss": 0.156646728515625, "step": 5691 }, { "epoch": 0.7695141529362062, "grad_norm": 1.3953511714935303, "learning_rate": 4.319849604730905e-06, "loss": 0.1966233253479004, "step": 5692 }, { "epoch": 0.7696493451626532, "grad_norm": 0.9326554536819458, "learning_rate": 4.315044769220758e-06, "loss": 0.16521549224853516, "step": 5693 }, { "epoch": 0.7697845373891001, "grad_norm": 1.5161690711975098, "learning_rate": 4.310242158375264e-06, "loss": 0.22219562530517578, "step": 5694 }, { "epoch": 0.7699197296155471, "grad_norm": 1.4437106847763062, "learning_rate": 4.30544177319436e-06, "loss": 0.15215396881103516, "step": 5695 }, { "epoch": 0.7700549218419941, "grad_norm": 0.8638103604316711, "learning_rate": 4.300643614677511e-06, "loss": 0.13219094276428223, "step": 5696 }, { "epoch": 0.7701901140684411, "grad_norm": 1.5494129657745361, "learning_rate": 4.2958476838237165e-06, "loss": 0.17708861827850342, "step": 5697 }, { "epoch": 0.770325306294888, "grad_norm": 0.8461418747901917, "learning_rate": 4.2910539816315166e-06, "loss": 0.174560546875, "step": 5698 }, { "epoch": 0.770460498521335, "grad_norm": 0.9515027403831482, "learning_rate": 4.286262509098979e-06, "loss": 0.12755346298217773, "step": 5699 }, { "epoch": 0.770595690747782, "grad_norm": 1.2471880912780762, "learning_rate": 4.28147326722373e-06, "loss": 0.20840883255004883, "step": 5700 }, { "epoch": 0.770730882974229, "grad_norm": 0.9678155183792114, "learning_rate": 4.2766862570028965e-06, "loss": 0.13154226541519165, "step": 5701 }, { "epoch": 0.770866075200676, "grad_norm": 1.1689928770065308, "learning_rate": 4.2719014794331715e-06, "loss": 0.1676788330078125, "step": 5702 }, { "epoch": 0.7710012674271229, "grad_norm": 1.0286637544631958, "learning_rate": 4.267118935510767e-06, "loss": 0.14365673065185547, "step": 5703 }, { "epoch": 0.7711364596535699, "grad_norm": 0.7536453604698181, "learning_rate": 4.2623386262314306e-06, "loss": 0.1428537368774414, "step": 5704 }, { "epoch": 0.7712716518800169, "grad_norm": 1.5972416400909424, "learning_rate": 4.257560552590461e-06, "loss": 0.18857979774475098, "step": 5705 }, { "epoch": 0.7714068441064639, "grad_norm": 1.0247960090637207, "learning_rate": 4.252784715582661e-06, "loss": 0.1702425479888916, "step": 5706 }, { "epoch": 0.7715420363329109, "grad_norm": 1.3569600582122803, "learning_rate": 4.2480111162024e-06, "loss": 0.1625213623046875, "step": 5707 }, { "epoch": 0.7716772285593578, "grad_norm": 0.9905298948287964, "learning_rate": 4.243239755443561e-06, "loss": 0.20572853088378906, "step": 5708 }, { "epoch": 0.7718124207858048, "grad_norm": 1.0633753538131714, "learning_rate": 4.238470634299567e-06, "loss": 0.1307048797607422, "step": 5709 }, { "epoch": 0.7719476130122518, "grad_norm": 1.004807949066162, "learning_rate": 4.233703753763375e-06, "loss": 0.16947460174560547, "step": 5710 }, { "epoch": 0.7720828052386988, "grad_norm": 1.113210678100586, "learning_rate": 4.228939114827469e-06, "loss": 0.22174644470214844, "step": 5711 }, { "epoch": 0.7722179974651457, "grad_norm": 0.952313244342804, "learning_rate": 4.224176718483881e-06, "loss": 0.16562700271606445, "step": 5712 }, { "epoch": 0.7723531896915927, "grad_norm": 0.7830891609191895, "learning_rate": 4.219416565724165e-06, "loss": 0.11083650588989258, "step": 5713 }, { "epoch": 0.7724883819180397, "grad_norm": 0.7530669569969177, "learning_rate": 4.21465865753941e-06, "loss": 0.14617443084716797, "step": 5714 }, { "epoch": 0.7726235741444867, "grad_norm": 1.1028395891189575, "learning_rate": 4.209902994920236e-06, "loss": 0.1929454803466797, "step": 5715 }, { "epoch": 0.7727587663709337, "grad_norm": 0.8484943509101868, "learning_rate": 4.205149578856794e-06, "loss": 0.1592416763305664, "step": 5716 }, { "epoch": 0.7728939585973806, "grad_norm": 1.2756197452545166, "learning_rate": 4.200398410338779e-06, "loss": 0.16272687911987305, "step": 5717 }, { "epoch": 0.7730291508238276, "grad_norm": 1.0048065185546875, "learning_rate": 4.1956494903554056e-06, "loss": 0.17531967163085938, "step": 5718 }, { "epoch": 0.7731643430502746, "grad_norm": 0.774115264415741, "learning_rate": 4.190902819895425e-06, "loss": 0.15831279754638672, "step": 5719 }, { "epoch": 0.7732995352767216, "grad_norm": 1.5515379905700684, "learning_rate": 4.186158399947118e-06, "loss": 0.17557334899902344, "step": 5720 }, { "epoch": 0.7734347275031686, "grad_norm": 1.4792540073394775, "learning_rate": 4.181416231498292e-06, "loss": 0.1820697784423828, "step": 5721 }, { "epoch": 0.7735699197296155, "grad_norm": 1.9483230113983154, "learning_rate": 4.176676315536306e-06, "loss": 0.18980026245117188, "step": 5722 }, { "epoch": 0.7737051119560625, "grad_norm": 1.2648887634277344, "learning_rate": 4.171938653048027e-06, "loss": 0.1960453987121582, "step": 5723 }, { "epoch": 0.7738403041825095, "grad_norm": 1.0147806406021118, "learning_rate": 4.1672032450198616e-06, "loss": 0.17206907272338867, "step": 5724 }, { "epoch": 0.7739754964089565, "grad_norm": 2.1715707778930664, "learning_rate": 4.16247009243775e-06, "loss": 0.2610957622528076, "step": 5725 }, { "epoch": 0.7741106886354034, "grad_norm": 1.7615704536437988, "learning_rate": 4.1577391962871504e-06, "loss": 0.1836376190185547, "step": 5726 }, { "epoch": 0.7742458808618504, "grad_norm": 1.677809715270996, "learning_rate": 4.153010557553076e-06, "loss": 0.2107372283935547, "step": 5727 }, { "epoch": 0.7743810730882974, "grad_norm": 0.9997429251670837, "learning_rate": 4.148284177220045e-06, "loss": 0.13818359375, "step": 5728 }, { "epoch": 0.7745162653147444, "grad_norm": 1.3612557649612427, "learning_rate": 4.143560056272117e-06, "loss": 0.21035194396972656, "step": 5729 }, { "epoch": 0.7746514575411914, "grad_norm": 1.3863136768341064, "learning_rate": 4.1388381956928796e-06, "loss": 0.158616304397583, "step": 5730 }, { "epoch": 0.7747866497676383, "grad_norm": 1.1184004545211792, "learning_rate": 4.134118596465443e-06, "loss": 0.1649494171142578, "step": 5731 }, { "epoch": 0.7749218419940853, "grad_norm": 1.102777123451233, "learning_rate": 4.1294012595724675e-06, "loss": 0.1613903045654297, "step": 5732 }, { "epoch": 0.7750570342205323, "grad_norm": 2.2198445796966553, "learning_rate": 4.1246861859961114e-06, "loss": 0.2568550109863281, "step": 5733 }, { "epoch": 0.7751922264469793, "grad_norm": 1.2297929525375366, "learning_rate": 4.119973376718089e-06, "loss": 0.19216108322143555, "step": 5734 }, { "epoch": 0.7753274186734262, "grad_norm": 1.603895664215088, "learning_rate": 4.115262832719628e-06, "loss": 0.1389636993408203, "step": 5735 }, { "epoch": 0.7754626108998732, "grad_norm": 0.9889018535614014, "learning_rate": 4.110554554981486e-06, "loss": 0.14987993240356445, "step": 5736 }, { "epoch": 0.7755978031263202, "grad_norm": 0.8055657148361206, "learning_rate": 4.1058485444839655e-06, "loss": 0.14275503158569336, "step": 5737 }, { "epoch": 0.7757329953527672, "grad_norm": 1.2474499940872192, "learning_rate": 4.101144802206862e-06, "loss": 0.1623058319091797, "step": 5738 }, { "epoch": 0.7758681875792142, "grad_norm": 0.9579083919525146, "learning_rate": 4.096443329129535e-06, "loss": 0.14908599853515625, "step": 5739 }, { "epoch": 0.7760033798056611, "grad_norm": 1.1723302602767944, "learning_rate": 4.091744126230853e-06, "loss": 0.17624664306640625, "step": 5740 }, { "epoch": 0.7761385720321081, "grad_norm": 0.8712233304977417, "learning_rate": 4.08704719448921e-06, "loss": 0.17962646484375, "step": 5741 }, { "epoch": 0.7762737642585551, "grad_norm": 0.7037849426269531, "learning_rate": 4.082352534882543e-06, "loss": 0.09752249717712402, "step": 5742 }, { "epoch": 0.7764089564850021, "grad_norm": 1.0782477855682373, "learning_rate": 4.07766014838829e-06, "loss": 0.1750040054321289, "step": 5743 }, { "epoch": 0.7765441487114491, "grad_norm": 1.2373197078704834, "learning_rate": 4.072970035983443e-06, "loss": 0.15466690063476562, "step": 5744 }, { "epoch": 0.776679340937896, "grad_norm": 1.7748299837112427, "learning_rate": 4.068282198644505e-06, "loss": 0.1977221965789795, "step": 5745 }, { "epoch": 0.776814533164343, "grad_norm": 0.9341349005699158, "learning_rate": 4.06359663734751e-06, "loss": 0.125213623046875, "step": 5746 }, { "epoch": 0.77694972539079, "grad_norm": 1.1644492149353027, "learning_rate": 4.058913353068013e-06, "loss": 0.1862165927886963, "step": 5747 }, { "epoch": 0.777084917617237, "grad_norm": 2.433779001235962, "learning_rate": 4.0542323467810985e-06, "loss": 0.18503284454345703, "step": 5748 }, { "epoch": 0.777220109843684, "grad_norm": 0.7797772884368896, "learning_rate": 4.049553619461381e-06, "loss": 0.1127614974975586, "step": 5749 }, { "epoch": 0.7773553020701309, "grad_norm": 1.1835607290267944, "learning_rate": 4.044877172082997e-06, "loss": 0.18398284912109375, "step": 5750 }, { "epoch": 0.7774904942965779, "grad_norm": 0.6466657519340515, "learning_rate": 4.040203005619604e-06, "loss": 0.13517379760742188, "step": 5751 }, { "epoch": 0.7776256865230249, "grad_norm": 1.3917794227600098, "learning_rate": 4.035531121044392e-06, "loss": 0.23613929748535156, "step": 5752 }, { "epoch": 0.7777608787494719, "grad_norm": 1.2126444578170776, "learning_rate": 4.030861519330065e-06, "loss": 0.21366500854492188, "step": 5753 }, { "epoch": 0.7778960709759188, "grad_norm": 1.790773868560791, "learning_rate": 4.026194201448868e-06, "loss": 0.1931896209716797, "step": 5754 }, { "epoch": 0.7780312632023658, "grad_norm": 1.0487462282180786, "learning_rate": 4.021529168372558e-06, "loss": 0.194183349609375, "step": 5755 }, { "epoch": 0.7781664554288128, "grad_norm": 1.098608374595642, "learning_rate": 4.01686642107242e-06, "loss": 0.15494155883789062, "step": 5756 }, { "epoch": 0.7783016476552598, "grad_norm": 1.1798722743988037, "learning_rate": 4.0122059605192624e-06, "loss": 0.16982269287109375, "step": 5757 }, { "epoch": 0.7784368398817068, "grad_norm": 0.8994905352592468, "learning_rate": 4.007547787683412e-06, "loss": 0.15837860107421875, "step": 5758 }, { "epoch": 0.7785720321081537, "grad_norm": 0.9738286733627319, "learning_rate": 4.002891903534736e-06, "loss": 0.19238758087158203, "step": 5759 }, { "epoch": 0.7787072243346007, "grad_norm": 3.014173746109009, "learning_rate": 3.998238309042611e-06, "loss": 0.225982666015625, "step": 5760 }, { "epoch": 0.7788424165610477, "grad_norm": 2.397707223892212, "learning_rate": 3.993587005175937e-06, "loss": 0.2344341278076172, "step": 5761 }, { "epoch": 0.7789776087874947, "grad_norm": 1.6218763589859009, "learning_rate": 3.988937992903144e-06, "loss": 0.19930267333984375, "step": 5762 }, { "epoch": 0.7791128010139416, "grad_norm": 1.187485933303833, "learning_rate": 3.9842912731921716e-06, "loss": 0.15949058532714844, "step": 5763 }, { "epoch": 0.7792479932403886, "grad_norm": 1.2378003597259521, "learning_rate": 3.979646847010506e-06, "loss": 0.19077301025390625, "step": 5764 }, { "epoch": 0.7793831854668356, "grad_norm": 1.2841618061065674, "learning_rate": 3.975004715325134e-06, "loss": 0.20267200469970703, "step": 5765 }, { "epoch": 0.7795183776932826, "grad_norm": 1.336878776550293, "learning_rate": 3.970364879102572e-06, "loss": 0.15315532684326172, "step": 5766 }, { "epoch": 0.7796535699197296, "grad_norm": 0.6835128664970398, "learning_rate": 3.96572733930886e-06, "loss": 0.11800765991210938, "step": 5767 }, { "epoch": 0.7797887621461765, "grad_norm": 1.255144715309143, "learning_rate": 3.961092096909552e-06, "loss": 0.19077491760253906, "step": 5768 }, { "epoch": 0.7799239543726236, "grad_norm": 0.7086952924728394, "learning_rate": 3.9564591528697455e-06, "loss": 0.14722347259521484, "step": 5769 }, { "epoch": 0.7800591465990706, "grad_norm": 1.504512906074524, "learning_rate": 3.9518285081540275e-06, "loss": 0.21566295623779297, "step": 5770 }, { "epoch": 0.7801943388255176, "grad_norm": 0.9993622303009033, "learning_rate": 3.947200163726534e-06, "loss": 0.1497974395751953, "step": 5771 }, { "epoch": 0.7803295310519646, "grad_norm": 0.7543260455131531, "learning_rate": 3.9425741205509055e-06, "loss": 0.11919450759887695, "step": 5772 }, { "epoch": 0.7804647232784115, "grad_norm": 1.0005760192871094, "learning_rate": 3.9379503795903065e-06, "loss": 0.1514129638671875, "step": 5773 }, { "epoch": 0.7805999155048585, "grad_norm": 1.7753955125808716, "learning_rate": 3.933328941807439e-06, "loss": 0.18676376342773438, "step": 5774 }, { "epoch": 0.7807351077313055, "grad_norm": 1.4304224252700806, "learning_rate": 3.928709808164491e-06, "loss": 0.20827674865722656, "step": 5775 }, { "epoch": 0.7808702999577525, "grad_norm": 1.8002541065216064, "learning_rate": 3.924092979623203e-06, "loss": 0.2202911376953125, "step": 5776 }, { "epoch": 0.7810054921841995, "grad_norm": 1.176750898361206, "learning_rate": 3.919478457144824e-06, "loss": 0.17110055685043335, "step": 5777 }, { "epoch": 0.7811406844106464, "grad_norm": 0.9747717380523682, "learning_rate": 3.914866241690115e-06, "loss": 0.18190264701843262, "step": 5778 }, { "epoch": 0.7812758766370934, "grad_norm": 0.9151124954223633, "learning_rate": 3.9102563342193695e-06, "loss": 0.14024591445922852, "step": 5779 }, { "epoch": 0.7814110688635404, "grad_norm": 0.781014084815979, "learning_rate": 3.905648735692389e-06, "loss": 0.11922860145568848, "step": 5780 }, { "epoch": 0.7815462610899874, "grad_norm": 1.206429123878479, "learning_rate": 3.901043447068508e-06, "loss": 0.17983722686767578, "step": 5781 }, { "epoch": 0.7816814533164343, "grad_norm": 1.2806367874145508, "learning_rate": 3.896440469306567e-06, "loss": 0.22631263732910156, "step": 5782 }, { "epoch": 0.7818166455428813, "grad_norm": 0.8554794192314148, "learning_rate": 3.891839803364934e-06, "loss": 0.13141345977783203, "step": 5783 }, { "epoch": 0.7819518377693283, "grad_norm": 0.9105827212333679, "learning_rate": 3.887241450201487e-06, "loss": 0.11873197555541992, "step": 5784 }, { "epoch": 0.7820870299957753, "grad_norm": 0.8203549981117249, "learning_rate": 3.882645410773629e-06, "loss": 0.1869983673095703, "step": 5785 }, { "epoch": 0.7822222222222223, "grad_norm": 0.7337989211082458, "learning_rate": 3.878051686038284e-06, "loss": 0.15885210037231445, "step": 5786 }, { "epoch": 0.7823574144486692, "grad_norm": 1.308215856552124, "learning_rate": 3.873460276951889e-06, "loss": 0.17203521728515625, "step": 5787 }, { "epoch": 0.7824926066751162, "grad_norm": 1.3012659549713135, "learning_rate": 3.868871184470397e-06, "loss": 0.17644882202148438, "step": 5788 }, { "epoch": 0.7826277989015632, "grad_norm": 0.7731307744979858, "learning_rate": 3.864284409549282e-06, "loss": 0.13414859771728516, "step": 5789 }, { "epoch": 0.7827629911280102, "grad_norm": 0.9234111905097961, "learning_rate": 3.859699953143532e-06, "loss": 0.1620645523071289, "step": 5790 }, { "epoch": 0.7828981833544572, "grad_norm": 1.0429589748382568, "learning_rate": 3.855117816207665e-06, "loss": 0.1784496307373047, "step": 5791 }, { "epoch": 0.7830333755809041, "grad_norm": 0.9223520159721375, "learning_rate": 3.850537999695699e-06, "loss": 0.14377450942993164, "step": 5792 }, { "epoch": 0.7831685678073511, "grad_norm": 1.3123719692230225, "learning_rate": 3.845960504561179e-06, "loss": 0.19254302978515625, "step": 5793 }, { "epoch": 0.7833037600337981, "grad_norm": 2.292695999145508, "learning_rate": 3.841385331757161e-06, "loss": 0.1718158721923828, "step": 5794 }, { "epoch": 0.7834389522602451, "grad_norm": 1.375365972518921, "learning_rate": 3.8368124822362184e-06, "loss": 0.16459369659423828, "step": 5795 }, { "epoch": 0.783574144486692, "grad_norm": 0.7936541438102722, "learning_rate": 3.832241956950449e-06, "loss": 0.1504192352294922, "step": 5796 }, { "epoch": 0.783709336713139, "grad_norm": 1.1918771266937256, "learning_rate": 3.82767375685146e-06, "loss": 0.20840835571289062, "step": 5797 }, { "epoch": 0.783844528939586, "grad_norm": 1.1423534154891968, "learning_rate": 3.823107882890373e-06, "loss": 0.20076847076416016, "step": 5798 }, { "epoch": 0.783979721166033, "grad_norm": 1.7537564039230347, "learning_rate": 3.8185443360178265e-06, "loss": 0.17306888103485107, "step": 5799 }, { "epoch": 0.78411491339248, "grad_norm": 0.6891571283340454, "learning_rate": 3.813983117183973e-06, "loss": 0.14136600494384766, "step": 5800 }, { "epoch": 0.7842501056189269, "grad_norm": 1.4191315174102783, "learning_rate": 3.8094242273384932e-06, "loss": 0.21343612670898438, "step": 5801 }, { "epoch": 0.7843852978453739, "grad_norm": 1.7443660497665405, "learning_rate": 3.804867667430555e-06, "loss": 0.2536964416503906, "step": 5802 }, { "epoch": 0.7845204900718209, "grad_norm": 0.6830030679702759, "learning_rate": 3.800313438408874e-06, "loss": 0.11420917510986328, "step": 5803 }, { "epoch": 0.7846556822982679, "grad_norm": 0.9493191838264465, "learning_rate": 3.7957615412216582e-06, "loss": 0.18427276611328125, "step": 5804 }, { "epoch": 0.7847908745247149, "grad_norm": 1.298243761062622, "learning_rate": 3.791211976816634e-06, "loss": 0.2222881317138672, "step": 5805 }, { "epoch": 0.7849260667511618, "grad_norm": 1.9392026662826538, "learning_rate": 3.786664746141057e-06, "loss": 0.1992359161376953, "step": 5806 }, { "epoch": 0.7850612589776088, "grad_norm": 0.9334809184074402, "learning_rate": 3.782119850141669e-06, "loss": 0.16501259803771973, "step": 5807 }, { "epoch": 0.7851964512040558, "grad_norm": 1.539667010307312, "learning_rate": 3.777577289764752e-06, "loss": 0.18382835388183594, "step": 5808 }, { "epoch": 0.7853316434305028, "grad_norm": 1.213761806488037, "learning_rate": 3.7730370659560904e-06, "loss": 0.11244463920593262, "step": 5809 }, { "epoch": 0.7854668356569497, "grad_norm": 0.9012846350669861, "learning_rate": 3.7684991796609746e-06, "loss": 0.1271834373474121, "step": 5810 }, { "epoch": 0.7856020278833967, "grad_norm": 0.7157869935035706, "learning_rate": 3.7639636318242344e-06, "loss": 0.1372213363647461, "step": 5811 }, { "epoch": 0.7857372201098437, "grad_norm": 1.1840213537216187, "learning_rate": 3.7594304233901738e-06, "loss": 0.1892223358154297, "step": 5812 }, { "epoch": 0.7858724123362907, "grad_norm": 1.004406452178955, "learning_rate": 3.754899555302645e-06, "loss": 0.12111473083496094, "step": 5813 }, { "epoch": 0.7860076045627377, "grad_norm": 1.1647124290466309, "learning_rate": 3.7503710285049964e-06, "loss": 0.1362166404724121, "step": 5814 }, { "epoch": 0.7861427967891846, "grad_norm": 1.4857046604156494, "learning_rate": 3.7458448439400888e-06, "loss": 0.2019481658935547, "step": 5815 }, { "epoch": 0.7862779890156316, "grad_norm": 1.1760765314102173, "learning_rate": 3.7413210025502985e-06, "loss": 0.1202692985534668, "step": 5816 }, { "epoch": 0.7864131812420786, "grad_norm": 0.8019460439682007, "learning_rate": 3.7367995052775123e-06, "loss": 0.14469313621520996, "step": 5817 }, { "epoch": 0.7865483734685256, "grad_norm": 1.664031744003296, "learning_rate": 3.732280353063133e-06, "loss": 0.18428564071655273, "step": 5818 }, { "epoch": 0.7866835656949726, "grad_norm": 1.1546237468719482, "learning_rate": 3.727763546848074e-06, "loss": 0.1697826385498047, "step": 5819 }, { "epoch": 0.7868187579214195, "grad_norm": 1.524002194404602, "learning_rate": 3.7232490875727544e-06, "loss": 0.1154184341430664, "step": 5820 }, { "epoch": 0.7869539501478665, "grad_norm": 2.606360673904419, "learning_rate": 3.718736976177108e-06, "loss": 0.20606350898742676, "step": 5821 }, { "epoch": 0.7870891423743135, "grad_norm": 1.4419660568237305, "learning_rate": 3.71422721360058e-06, "loss": 0.17506885528564453, "step": 5822 }, { "epoch": 0.7872243346007605, "grad_norm": 1.0727072954177856, "learning_rate": 3.709719800782133e-06, "loss": 0.13677263259887695, "step": 5823 }, { "epoch": 0.7873595268272074, "grad_norm": 0.8261385560035706, "learning_rate": 3.7052147386602304e-06, "loss": 0.10388565063476562, "step": 5824 }, { "epoch": 0.7874947190536544, "grad_norm": 0.9895406365394592, "learning_rate": 3.700712028172851e-06, "loss": 0.13189315795898438, "step": 5825 }, { "epoch": 0.7876299112801014, "grad_norm": 1.8887265920639038, "learning_rate": 3.696211670257481e-06, "loss": 0.2043914794921875, "step": 5826 }, { "epoch": 0.7877651035065484, "grad_norm": 1.599543571472168, "learning_rate": 3.691713665851117e-06, "loss": 0.21718215942382812, "step": 5827 }, { "epoch": 0.7879002957329954, "grad_norm": 1.4330143928527832, "learning_rate": 3.6872180158902764e-06, "loss": 0.1817312240600586, "step": 5828 }, { "epoch": 0.7880354879594423, "grad_norm": 1.2297337055206299, "learning_rate": 3.6827247213109705e-06, "loss": 0.1617746353149414, "step": 5829 }, { "epoch": 0.7881706801858893, "grad_norm": 0.9841012358665466, "learning_rate": 3.6782337830487294e-06, "loss": 0.16829204559326172, "step": 5830 }, { "epoch": 0.7883058724123363, "grad_norm": 1.082634687423706, "learning_rate": 3.6737452020385886e-06, "loss": 0.14481878280639648, "step": 5831 }, { "epoch": 0.7884410646387833, "grad_norm": 1.3710473775863647, "learning_rate": 3.6692589792150923e-06, "loss": 0.19174480438232422, "step": 5832 }, { "epoch": 0.7885762568652303, "grad_norm": 1.2013518810272217, "learning_rate": 3.6647751155123026e-06, "loss": 0.16481590270996094, "step": 5833 }, { "epoch": 0.7887114490916772, "grad_norm": 1.1895521879196167, "learning_rate": 3.660293611863782e-06, "loss": 0.1885700225830078, "step": 5834 }, { "epoch": 0.7888466413181242, "grad_norm": 2.2827346324920654, "learning_rate": 3.655814469202602e-06, "loss": 0.2006072998046875, "step": 5835 }, { "epoch": 0.7889818335445712, "grad_norm": 2.6859853267669678, "learning_rate": 3.6513376884613446e-06, "loss": 0.2439422607421875, "step": 5836 }, { "epoch": 0.7891170257710182, "grad_norm": 1.476603388786316, "learning_rate": 3.6468632705720934e-06, "loss": 0.20268790423870087, "step": 5837 }, { "epoch": 0.7892522179974651, "grad_norm": 1.3803499937057495, "learning_rate": 3.6423912164664606e-06, "loss": 0.18988323211669922, "step": 5838 }, { "epoch": 0.7893874102239121, "grad_norm": 1.1412652730941772, "learning_rate": 3.637921527075534e-06, "loss": 0.1586771011352539, "step": 5839 }, { "epoch": 0.7895226024503591, "grad_norm": 0.9427790641784668, "learning_rate": 3.63345420332994e-06, "loss": 0.1585707664489746, "step": 5840 }, { "epoch": 0.7896577946768061, "grad_norm": 1.3868263959884644, "learning_rate": 3.628989246159795e-06, "loss": 0.158416748046875, "step": 5841 }, { "epoch": 0.7897929869032531, "grad_norm": 1.1198753118515015, "learning_rate": 3.6245266564947205e-06, "loss": 0.2070155143737793, "step": 5842 }, { "epoch": 0.7899281791297, "grad_norm": 0.8191492557525635, "learning_rate": 3.620066435263868e-06, "loss": 0.18553447723388672, "step": 5843 }, { "epoch": 0.790063371356147, "grad_norm": 1.6474485397338867, "learning_rate": 3.6156085833958596e-06, "loss": 0.17853546142578125, "step": 5844 }, { "epoch": 0.790198563582594, "grad_norm": 1.2241733074188232, "learning_rate": 3.6111531018188584e-06, "loss": 0.2231006622314453, "step": 5845 }, { "epoch": 0.790333755809041, "grad_norm": 1.1772730350494385, "learning_rate": 3.606699991460513e-06, "loss": 0.13770484924316406, "step": 5846 }, { "epoch": 0.790468948035488, "grad_norm": 1.581953763961792, "learning_rate": 3.602249253247986e-06, "loss": 0.2326803207397461, "step": 5847 }, { "epoch": 0.7906041402619349, "grad_norm": 1.3109188079833984, "learning_rate": 3.5978008881079445e-06, "loss": 0.1599719524383545, "step": 5848 }, { "epoch": 0.7907393324883819, "grad_norm": 0.7251803278923035, "learning_rate": 3.5933548969665587e-06, "loss": 0.13519883155822754, "step": 5849 }, { "epoch": 0.7908745247148289, "grad_norm": 0.7502581477165222, "learning_rate": 3.5889112807495152e-06, "loss": 0.1480565071105957, "step": 5850 }, { "epoch": 0.7910097169412759, "grad_norm": 1.209169864654541, "learning_rate": 3.5844700403819935e-06, "loss": 0.22179794311523438, "step": 5851 }, { "epoch": 0.7911449091677228, "grad_norm": 1.134719967842102, "learning_rate": 3.5800311767886847e-06, "loss": 0.15722942352294922, "step": 5852 }, { "epoch": 0.7912801013941698, "grad_norm": 0.9985283613204956, "learning_rate": 3.575594690893784e-06, "loss": 0.2027425765991211, "step": 5853 }, { "epoch": 0.7914152936206168, "grad_norm": 0.8400408625602722, "learning_rate": 3.5711605836209853e-06, "loss": 0.12825965881347656, "step": 5854 }, { "epoch": 0.7915504858470638, "grad_norm": 2.0692615509033203, "learning_rate": 3.566728855893505e-06, "loss": 0.20149993896484375, "step": 5855 }, { "epoch": 0.7916856780735108, "grad_norm": 1.2247815132141113, "learning_rate": 3.5622995086340466e-06, "loss": 0.24745559692382812, "step": 5856 }, { "epoch": 0.7918208702999577, "grad_norm": 1.2648489475250244, "learning_rate": 3.5578725427648233e-06, "loss": 0.1477351188659668, "step": 5857 }, { "epoch": 0.7919560625264047, "grad_norm": 0.8537172675132751, "learning_rate": 3.553447959207553e-06, "loss": 0.14914178848266602, "step": 5858 }, { "epoch": 0.7920912547528517, "grad_norm": 1.338640570640564, "learning_rate": 3.5490257588834552e-06, "loss": 0.15317249298095703, "step": 5859 }, { "epoch": 0.7922264469792987, "grad_norm": 0.8530309200286865, "learning_rate": 3.5446059427132615e-06, "loss": 0.15430736541748047, "step": 5860 }, { "epoch": 0.7923616392057456, "grad_norm": 2.4116532802581787, "learning_rate": 3.5401885116171977e-06, "loss": 0.19091796875, "step": 5861 }, { "epoch": 0.7924968314321926, "grad_norm": 1.244110345840454, "learning_rate": 3.5357734665149983e-06, "loss": 0.1914815902709961, "step": 5862 }, { "epoch": 0.7926320236586396, "grad_norm": 0.9299548864364624, "learning_rate": 3.5313608083258975e-06, "loss": 0.15876483917236328, "step": 5863 }, { "epoch": 0.7927672158850866, "grad_norm": 1.0739024877548218, "learning_rate": 3.526950537968629e-06, "loss": 0.1807384490966797, "step": 5864 }, { "epoch": 0.7929024081115336, "grad_norm": 1.2986563444137573, "learning_rate": 3.5225426563614466e-06, "loss": 0.15717315673828125, "step": 5865 }, { "epoch": 0.7930376003379805, "grad_norm": 0.9040958881378174, "learning_rate": 3.518137164422088e-06, "loss": 0.13258934020996094, "step": 5866 }, { "epoch": 0.7931727925644275, "grad_norm": 1.8466547727584839, "learning_rate": 3.513734063067799e-06, "loss": 0.1913738250732422, "step": 5867 }, { "epoch": 0.7933079847908745, "grad_norm": 0.6558601260185242, "learning_rate": 3.5093333532153316e-06, "loss": 0.12159445881843567, "step": 5868 }, { "epoch": 0.7934431770173215, "grad_norm": 0.8004727959632874, "learning_rate": 3.504935035780931e-06, "loss": 0.15655040740966797, "step": 5869 }, { "epoch": 0.7935783692437685, "grad_norm": 1.3584675788879395, "learning_rate": 3.500539111680364e-06, "loss": 0.16486310958862305, "step": 5870 }, { "epoch": 0.7937135614702154, "grad_norm": 1.7554868459701538, "learning_rate": 3.4961455818288683e-06, "loss": 0.207000732421875, "step": 5871 }, { "epoch": 0.7938487536966624, "grad_norm": 0.8711773157119751, "learning_rate": 3.491754447141212e-06, "loss": 0.1572742462158203, "step": 5872 }, { "epoch": 0.7939839459231094, "grad_norm": 1.2152760028839111, "learning_rate": 3.4873657085316504e-06, "loss": 0.17601966857910156, "step": 5873 }, { "epoch": 0.7941191381495564, "grad_norm": 0.8813444375991821, "learning_rate": 3.482979366913935e-06, "loss": 0.17440176010131836, "step": 5874 }, { "epoch": 0.7942543303760033, "grad_norm": 0.8859923481941223, "learning_rate": 3.4785954232013423e-06, "loss": 0.1443626880645752, "step": 5875 }, { "epoch": 0.7943895226024503, "grad_norm": 2.0067145824432373, "learning_rate": 3.4742138783066122e-06, "loss": 0.21198463439941406, "step": 5876 }, { "epoch": 0.7945247148288973, "grad_norm": 1.0756579637527466, "learning_rate": 3.4698347331420206e-06, "loss": 0.20216941833496094, "step": 5877 }, { "epoch": 0.7946599070553443, "grad_norm": 0.9025580286979675, "learning_rate": 3.4654579886193223e-06, "loss": 0.1793837547302246, "step": 5878 }, { "epoch": 0.7947950992817913, "grad_norm": 1.5246014595031738, "learning_rate": 3.461083645649782e-06, "loss": 0.17731475830078125, "step": 5879 }, { "epoch": 0.7949302915082382, "grad_norm": 1.0727031230926514, "learning_rate": 3.4567117051441594e-06, "loss": 0.1420574188232422, "step": 5880 }, { "epoch": 0.7950654837346852, "grad_norm": 0.850818932056427, "learning_rate": 3.4523421680127115e-06, "loss": 0.15140771865844727, "step": 5881 }, { "epoch": 0.7952006759611322, "grad_norm": 1.2207752466201782, "learning_rate": 3.447975035165209e-06, "loss": 0.16934967041015625, "step": 5882 }, { "epoch": 0.7953358681875792, "grad_norm": 1.8987658023834229, "learning_rate": 3.4436103075109076e-06, "loss": 0.19762897491455078, "step": 5883 }, { "epoch": 0.7954710604140262, "grad_norm": 1.1975834369659424, "learning_rate": 3.4392479859585642e-06, "loss": 0.16284751892089844, "step": 5884 }, { "epoch": 0.7956062526404731, "grad_norm": 1.592936396598816, "learning_rate": 3.4348880714164416e-06, "loss": 0.19690322875976562, "step": 5885 }, { "epoch": 0.7957414448669201, "grad_norm": 2.1984376907348633, "learning_rate": 3.430530564792289e-06, "loss": 0.18921947479248047, "step": 5886 }, { "epoch": 0.7958766370933671, "grad_norm": 1.0649752616882324, "learning_rate": 3.426175466993374e-06, "loss": 0.17252230644226074, "step": 5887 }, { "epoch": 0.7960118293198141, "grad_norm": 1.1380372047424316, "learning_rate": 3.4218227789264468e-06, "loss": 0.13836383819580078, "step": 5888 }, { "epoch": 0.796147021546261, "grad_norm": 0.7978634834289551, "learning_rate": 3.417472501497758e-06, "loss": 0.14134740829467773, "step": 5889 }, { "epoch": 0.796282213772708, "grad_norm": 2.3068878650665283, "learning_rate": 3.413124635613061e-06, "loss": 0.21338367462158203, "step": 5890 }, { "epoch": 0.796417405999155, "grad_norm": 1.2715401649475098, "learning_rate": 3.4087791821775986e-06, "loss": 0.18868911266326904, "step": 5891 }, { "epoch": 0.796552598225602, "grad_norm": 0.8652893304824829, "learning_rate": 3.4044361420961285e-06, "loss": 0.14075469970703125, "step": 5892 }, { "epoch": 0.796687790452049, "grad_norm": 1.0169157981872559, "learning_rate": 3.4000955162728866e-06, "loss": 0.13190746307373047, "step": 5893 }, { "epoch": 0.7968229826784959, "grad_norm": 1.3094630241394043, "learning_rate": 3.3957573056116164e-06, "loss": 0.1589794158935547, "step": 5894 }, { "epoch": 0.7969581749049429, "grad_norm": 0.6766445636749268, "learning_rate": 3.391421511015558e-06, "loss": 0.11551189422607422, "step": 5895 }, { "epoch": 0.7970933671313899, "grad_norm": 0.6453374028205872, "learning_rate": 3.38708813338744e-06, "loss": 0.12232255935668945, "step": 5896 }, { "epoch": 0.7972285593578369, "grad_norm": 1.168291449546814, "learning_rate": 3.382757173629506e-06, "loss": 0.1675577163696289, "step": 5897 }, { "epoch": 0.7973637515842839, "grad_norm": 1.5562000274658203, "learning_rate": 3.378428632643478e-06, "loss": 0.19261741638183594, "step": 5898 }, { "epoch": 0.7974989438107308, "grad_norm": 0.8348447680473328, "learning_rate": 3.3741025113305825e-06, "loss": 0.19612598419189453, "step": 5899 }, { "epoch": 0.7976341360371778, "grad_norm": 0.890941321849823, "learning_rate": 3.369778810591541e-06, "loss": 0.1629781723022461, "step": 5900 }, { "epoch": 0.7977693282636248, "grad_norm": 0.9431806802749634, "learning_rate": 3.3654575313265664e-06, "loss": 0.17294979095458984, "step": 5901 }, { "epoch": 0.7979045204900718, "grad_norm": 0.7037155032157898, "learning_rate": 3.361138674435386e-06, "loss": 0.1250828504562378, "step": 5902 }, { "epoch": 0.7980397127165187, "grad_norm": 1.0607848167419434, "learning_rate": 3.35682224081719e-06, "loss": 0.18647003173828125, "step": 5903 }, { "epoch": 0.7981749049429658, "grad_norm": 0.9927728772163391, "learning_rate": 3.352508231370699e-06, "loss": 0.1597309112548828, "step": 5904 }, { "epoch": 0.7983100971694128, "grad_norm": 0.7160606384277344, "learning_rate": 3.3481966469941044e-06, "loss": 0.1300565004348755, "step": 5905 }, { "epoch": 0.7984452893958598, "grad_norm": 1.1032862663269043, "learning_rate": 3.3438874885850984e-06, "loss": 0.21190643310546875, "step": 5906 }, { "epoch": 0.7985804816223068, "grad_norm": 1.1787217855453491, "learning_rate": 3.3395807570408847e-06, "loss": 0.18780803680419922, "step": 5907 }, { "epoch": 0.7987156738487537, "grad_norm": 1.0600337982177734, "learning_rate": 3.33527645325813e-06, "loss": 0.18407821655273438, "step": 5908 }, { "epoch": 0.7988508660752007, "grad_norm": 2.2151525020599365, "learning_rate": 3.3309745781330247e-06, "loss": 0.1832275390625, "step": 5909 }, { "epoch": 0.7989860583016477, "grad_norm": 1.0850056409835815, "learning_rate": 3.32667513256124e-06, "loss": 0.18148040771484375, "step": 5910 }, { "epoch": 0.7991212505280947, "grad_norm": 0.7696892023086548, "learning_rate": 3.3223781174379375e-06, "loss": 0.1491374969482422, "step": 5911 }, { "epoch": 0.7992564427545417, "grad_norm": 1.0843309164047241, "learning_rate": 3.3180835336577917e-06, "loss": 0.1965789794921875, "step": 5912 }, { "epoch": 0.7993916349809886, "grad_norm": 1.2669497728347778, "learning_rate": 3.313791382114943e-06, "loss": 0.2366180419921875, "step": 5913 }, { "epoch": 0.7995268272074356, "grad_norm": 1.4543774127960205, "learning_rate": 3.3095016637030505e-06, "loss": 0.22435855865478516, "step": 5914 }, { "epoch": 0.7996620194338826, "grad_norm": 1.7445255517959595, "learning_rate": 3.3052143793152524e-06, "loss": 0.1688370704650879, "step": 5915 }, { "epoch": 0.7997972116603296, "grad_norm": 0.7679470181465149, "learning_rate": 3.3009295298441855e-06, "loss": 0.14403915405273438, "step": 5916 }, { "epoch": 0.7999324038867766, "grad_norm": 0.9925470948219299, "learning_rate": 3.2966471161819767e-06, "loss": 0.1600818634033203, "step": 5917 }, { "epoch": 0.8000675961132235, "grad_norm": 1.1217235326766968, "learning_rate": 3.292367139220246e-06, "loss": 0.13979387283325195, "step": 5918 }, { "epoch": 0.8002027883396705, "grad_norm": 0.6252045631408691, "learning_rate": 3.288089599850112e-06, "loss": 0.10034584999084473, "step": 5919 }, { "epoch": 0.8003379805661175, "grad_norm": 1.0051994323730469, "learning_rate": 3.2838144989621795e-06, "loss": 0.1896648406982422, "step": 5920 }, { "epoch": 0.8004731727925645, "grad_norm": 2.191072463989258, "learning_rate": 3.2795418374465458e-06, "loss": 0.1831340789794922, "step": 5921 }, { "epoch": 0.8006083650190114, "grad_norm": 0.9865363240242004, "learning_rate": 3.275271616192803e-06, "loss": 0.17546606063842773, "step": 5922 }, { "epoch": 0.8007435572454584, "grad_norm": 1.162110686302185, "learning_rate": 3.2710038360900303e-06, "loss": 0.2068004608154297, "step": 5923 }, { "epoch": 0.8008787494719054, "grad_norm": 1.0392760038375854, "learning_rate": 3.266738498026808e-06, "loss": 0.20862388610839844, "step": 5924 }, { "epoch": 0.8010139416983524, "grad_norm": 0.8232437372207642, "learning_rate": 3.2624756028912005e-06, "loss": 0.1715259552001953, "step": 5925 }, { "epoch": 0.8011491339247994, "grad_norm": 1.089142918586731, "learning_rate": 3.2582151515707655e-06, "loss": 0.18799686431884766, "step": 5926 }, { "epoch": 0.8012843261512463, "grad_norm": 1.280590295791626, "learning_rate": 3.253957144952551e-06, "loss": 0.19170284271240234, "step": 5927 }, { "epoch": 0.8014195183776933, "grad_norm": 1.770272135734558, "learning_rate": 3.249701583923091e-06, "loss": 0.2247486114501953, "step": 5928 }, { "epoch": 0.8015547106041403, "grad_norm": 1.223771572113037, "learning_rate": 3.2454484693684257e-06, "loss": 0.18434619903564453, "step": 5929 }, { "epoch": 0.8016899028305873, "grad_norm": 0.7650221586227417, "learning_rate": 3.2411978021740727e-06, "loss": 0.15496540069580078, "step": 5930 }, { "epoch": 0.8018250950570343, "grad_norm": 0.9063459038734436, "learning_rate": 3.2369495832250434e-06, "loss": 0.16783761978149414, "step": 5931 }, { "epoch": 0.8019602872834812, "grad_norm": 0.6971040368080139, "learning_rate": 3.2327038134058378e-06, "loss": 0.1528301239013672, "step": 5932 }, { "epoch": 0.8020954795099282, "grad_norm": 1.5162559747695923, "learning_rate": 3.228460493600446e-06, "loss": 0.2302417755126953, "step": 5933 }, { "epoch": 0.8022306717363752, "grad_norm": 1.0452804565429688, "learning_rate": 3.2242196246923554e-06, "loss": 0.16797232627868652, "step": 5934 }, { "epoch": 0.8023658639628222, "grad_norm": 1.0449769496917725, "learning_rate": 3.2199812075645375e-06, "loss": 0.2094583511352539, "step": 5935 }, { "epoch": 0.8025010561892691, "grad_norm": 1.8267536163330078, "learning_rate": 3.2157452430994487e-06, "loss": 0.15792083740234375, "step": 5936 }, { "epoch": 0.8026362484157161, "grad_norm": 1.158838152885437, "learning_rate": 3.2115117321790427e-06, "loss": 0.1681222915649414, "step": 5937 }, { "epoch": 0.8027714406421631, "grad_norm": 0.9939912557601929, "learning_rate": 3.207280675684754e-06, "loss": 0.17552566528320312, "step": 5938 }, { "epoch": 0.8029066328686101, "grad_norm": 1.7586792707443237, "learning_rate": 3.203052074497523e-06, "loss": 0.15172886848449707, "step": 5939 }, { "epoch": 0.8030418250950571, "grad_norm": 2.5150694847106934, "learning_rate": 3.198825929497752e-06, "loss": 0.17313742637634277, "step": 5940 }, { "epoch": 0.803177017321504, "grad_norm": 0.8184636831283569, "learning_rate": 3.194602241565357e-06, "loss": 0.15375947952270508, "step": 5941 }, { "epoch": 0.803312209547951, "grad_norm": 0.9839374423027039, "learning_rate": 3.1903810115797282e-06, "loss": 0.1146245002746582, "step": 5942 }, { "epoch": 0.803447401774398, "grad_norm": 1.1804472208023071, "learning_rate": 3.1861622404197475e-06, "loss": 0.15810644626617432, "step": 5943 }, { "epoch": 0.803582594000845, "grad_norm": 0.9766131639480591, "learning_rate": 3.181945928963794e-06, "loss": 0.14694786071777344, "step": 5944 }, { "epoch": 0.803717786227292, "grad_norm": 1.7506136894226074, "learning_rate": 3.1777320780897124e-06, "loss": 0.15791082382202148, "step": 5945 }, { "epoch": 0.8038529784537389, "grad_norm": 0.8934969305992126, "learning_rate": 3.1735206886748602e-06, "loss": 0.16957378387451172, "step": 5946 }, { "epoch": 0.8039881706801859, "grad_norm": 1.1023931503295898, "learning_rate": 3.1693117615960665e-06, "loss": 0.19230270385742188, "step": 5947 }, { "epoch": 0.8041233629066329, "grad_norm": 1.4080901145935059, "learning_rate": 3.1651052977296537e-06, "loss": 0.20988082885742188, "step": 5948 }, { "epoch": 0.8042585551330799, "grad_norm": 0.9943472146987915, "learning_rate": 3.1609012979514273e-06, "loss": 0.17920851707458496, "step": 5949 }, { "epoch": 0.8043937473595268, "grad_norm": 1.4543040990829468, "learning_rate": 3.156699763136683e-06, "loss": 0.18654441833496094, "step": 5950 }, { "epoch": 0.8045289395859738, "grad_norm": 1.0371907949447632, "learning_rate": 3.152500694160207e-06, "loss": 0.1560354232788086, "step": 5951 }, { "epoch": 0.8046641318124208, "grad_norm": 2.1326818466186523, "learning_rate": 3.148304091896265e-06, "loss": 0.1775798797607422, "step": 5952 }, { "epoch": 0.8047993240388678, "grad_norm": 2.1662580966949463, "learning_rate": 3.144109957218612e-06, "loss": 0.16460657119750977, "step": 5953 }, { "epoch": 0.8049345162653148, "grad_norm": 1.733975887298584, "learning_rate": 3.1399182910004893e-06, "loss": 0.18264341354370117, "step": 5954 }, { "epoch": 0.8050697084917617, "grad_norm": 0.7742398381233215, "learning_rate": 3.1357290941146215e-06, "loss": 0.13188135623931885, "step": 5955 }, { "epoch": 0.8052049007182087, "grad_norm": 1.1824049949645996, "learning_rate": 3.1315423674332265e-06, "loss": 0.17107105255126953, "step": 5956 }, { "epoch": 0.8053400929446557, "grad_norm": 0.8766518831253052, "learning_rate": 3.127358111828002e-06, "loss": 0.17285728454589844, "step": 5957 }, { "epoch": 0.8054752851711027, "grad_norm": 0.6217026710510254, "learning_rate": 3.123176328170131e-06, "loss": 0.1420135498046875, "step": 5958 }, { "epoch": 0.8056104773975497, "grad_norm": 1.439857006072998, "learning_rate": 3.1189970173302816e-06, "loss": 0.22020769119262695, "step": 5959 }, { "epoch": 0.8057456696239966, "grad_norm": 1.166935682296753, "learning_rate": 3.1148201801786085e-06, "loss": 0.18059158325195312, "step": 5960 }, { "epoch": 0.8058808618504436, "grad_norm": 1.0186827182769775, "learning_rate": 3.1106458175847572e-06, "loss": 0.13172435760498047, "step": 5961 }, { "epoch": 0.8060160540768906, "grad_norm": 0.811034619808197, "learning_rate": 3.106473930417848e-06, "loss": 0.10042047500610352, "step": 5962 }, { "epoch": 0.8061512463033376, "grad_norm": 1.2436472177505493, "learning_rate": 3.1023045195464903e-06, "loss": 0.1500415802001953, "step": 5963 }, { "epoch": 0.8062864385297845, "grad_norm": 0.7157169580459595, "learning_rate": 3.098137585838779e-06, "loss": 0.17971539497375488, "step": 5964 }, { "epoch": 0.8064216307562315, "grad_norm": 1.1864300966262817, "learning_rate": 3.093973130162286e-06, "loss": 0.11483001708984375, "step": 5965 }, { "epoch": 0.8065568229826785, "grad_norm": 1.507524013519287, "learning_rate": 3.089811153384083e-06, "loss": 0.13365426659584045, "step": 5966 }, { "epoch": 0.8066920152091255, "grad_norm": 1.3271453380584717, "learning_rate": 3.08565165637071e-06, "loss": 0.21441078186035156, "step": 5967 }, { "epoch": 0.8068272074355725, "grad_norm": 1.6617037057876587, "learning_rate": 3.081494639988196e-06, "loss": 0.15883731842041016, "step": 5968 }, { "epoch": 0.8069623996620194, "grad_norm": 1.2097936868667603, "learning_rate": 3.077340105102057e-06, "loss": 0.17082786560058594, "step": 5969 }, { "epoch": 0.8070975918884664, "grad_norm": 0.9197073578834534, "learning_rate": 3.0731880525772817e-06, "loss": 0.17792129516601562, "step": 5970 }, { "epoch": 0.8072327841149134, "grad_norm": 1.4240236282348633, "learning_rate": 3.069038483278364e-06, "loss": 0.18358325958251953, "step": 5971 }, { "epoch": 0.8073679763413604, "grad_norm": 1.0109297037124634, "learning_rate": 3.0648913980692505e-06, "loss": 0.21124935150146484, "step": 5972 }, { "epoch": 0.8075031685678074, "grad_norm": 0.9199767112731934, "learning_rate": 3.0607467978133985e-06, "loss": 0.13824462890625, "step": 5973 }, { "epoch": 0.8076383607942543, "grad_norm": 1.2320756912231445, "learning_rate": 3.0566046833737294e-06, "loss": 0.18898355960845947, "step": 5974 }, { "epoch": 0.8077735530207013, "grad_norm": 0.8958389163017273, "learning_rate": 3.0524650556126517e-06, "loss": 0.15454483032226562, "step": 5975 }, { "epoch": 0.8079087452471483, "grad_norm": 0.7117584347724915, "learning_rate": 3.048327915392069e-06, "loss": 0.11751937866210938, "step": 5976 }, { "epoch": 0.8080439374735953, "grad_norm": 1.5684980154037476, "learning_rate": 3.044193263573341e-06, "loss": 0.18653488159179688, "step": 5977 }, { "epoch": 0.8081791297000422, "grad_norm": 0.633672833442688, "learning_rate": 3.0400611010173355e-06, "loss": 0.12723708152770996, "step": 5978 }, { "epoch": 0.8083143219264892, "grad_norm": 1.435807466506958, "learning_rate": 3.0359314285843863e-06, "loss": 0.15733051300048828, "step": 5979 }, { "epoch": 0.8084495141529362, "grad_norm": 1.5499682426452637, "learning_rate": 3.0318042471343104e-06, "loss": 0.18578052520751953, "step": 5980 }, { "epoch": 0.8085847063793832, "grad_norm": 0.8469221591949463, "learning_rate": 3.027679557526422e-06, "loss": 0.17724227905273438, "step": 5981 }, { "epoch": 0.8087198986058302, "grad_norm": 0.9022312760353088, "learning_rate": 3.0235573606194844e-06, "loss": 0.1400747299194336, "step": 5982 }, { "epoch": 0.8088550908322771, "grad_norm": 1.1368786096572876, "learning_rate": 3.0194376572717743e-06, "loss": 0.2078723907470703, "step": 5983 }, { "epoch": 0.8089902830587241, "grad_norm": 1.1913106441497803, "learning_rate": 3.0153204483410318e-06, "loss": 0.18519115447998047, "step": 5984 }, { "epoch": 0.8091254752851711, "grad_norm": 1.5260909795761108, "learning_rate": 3.0112057346844834e-06, "loss": 0.19892311096191406, "step": 5985 }, { "epoch": 0.8092606675116181, "grad_norm": 1.2321239709854126, "learning_rate": 3.007093517158832e-06, "loss": 0.15843772888183594, "step": 5986 }, { "epoch": 0.809395859738065, "grad_norm": 0.6808137893676758, "learning_rate": 3.002983796620261e-06, "loss": 0.14597702026367188, "step": 5987 }, { "epoch": 0.809531051964512, "grad_norm": 1.8292533159255981, "learning_rate": 2.9988765739244427e-06, "loss": 0.19481205940246582, "step": 5988 }, { "epoch": 0.809666244190959, "grad_norm": 1.192221999168396, "learning_rate": 2.9947718499265197e-06, "loss": 0.217620849609375, "step": 5989 }, { "epoch": 0.809801436417406, "grad_norm": 0.7766583561897278, "learning_rate": 2.9906696254811184e-06, "loss": 0.1564464569091797, "step": 5990 }, { "epoch": 0.809936628643853, "grad_norm": 1.8011471033096313, "learning_rate": 2.9865699014423404e-06, "loss": 0.1441364288330078, "step": 5991 }, { "epoch": 0.8100718208702999, "grad_norm": 1.290523648262024, "learning_rate": 2.9824726786637698e-06, "loss": 0.1641385555267334, "step": 5992 }, { "epoch": 0.8102070130967469, "grad_norm": 1.0178121328353882, "learning_rate": 2.978377957998477e-06, "loss": 0.18931198120117188, "step": 5993 }, { "epoch": 0.8103422053231939, "grad_norm": 0.8530979752540588, "learning_rate": 2.974285740299001e-06, "loss": 0.14047670364379883, "step": 5994 }, { "epoch": 0.8104773975496409, "grad_norm": 1.6549816131591797, "learning_rate": 2.9701960264173612e-06, "loss": 0.16742515563964844, "step": 5995 }, { "epoch": 0.8106125897760879, "grad_norm": 1.6105870008468628, "learning_rate": 2.96610881720506e-06, "loss": 0.1378917694091797, "step": 5996 }, { "epoch": 0.8107477820025348, "grad_norm": 1.0834912061691284, "learning_rate": 2.9620241135130715e-06, "loss": 0.2146596908569336, "step": 5997 }, { "epoch": 0.8108829742289818, "grad_norm": 2.49206805229187, "learning_rate": 2.9579419161918607e-06, "loss": 0.20630550384521484, "step": 5998 }, { "epoch": 0.8110181664554288, "grad_norm": 1.0012738704681396, "learning_rate": 2.9538622260913595e-06, "loss": 0.15961742401123047, "step": 5999 }, { "epoch": 0.8111533586818758, "grad_norm": 1.3934578895568848, "learning_rate": 2.9497850440609814e-06, "loss": 0.16727781295776367, "step": 6000 }, { "epoch": 0.8112885509083227, "grad_norm": 0.9119014739990234, "learning_rate": 2.945710370949616e-06, "loss": 0.14727783203125, "step": 6001 }, { "epoch": 0.8114237431347697, "grad_norm": 0.6402777433395386, "learning_rate": 2.941638207605629e-06, "loss": 0.11113357543945312, "step": 6002 }, { "epoch": 0.8115589353612167, "grad_norm": 1.40786874294281, "learning_rate": 2.937568554876873e-06, "loss": 0.1712799072265625, "step": 6003 }, { "epoch": 0.8116941275876637, "grad_norm": 1.195235252380371, "learning_rate": 2.9335014136106704e-06, "loss": 0.16188287734985352, "step": 6004 }, { "epoch": 0.8118293198141107, "grad_norm": 1.154290795326233, "learning_rate": 2.929436784653818e-06, "loss": 0.16428565979003906, "step": 6005 }, { "epoch": 0.8119645120405576, "grad_norm": 1.3054907321929932, "learning_rate": 2.925374668852597e-06, "loss": 0.19732093811035156, "step": 6006 }, { "epoch": 0.8120997042670046, "grad_norm": 0.768100917339325, "learning_rate": 2.921315067052754e-06, "loss": 0.14121365547180176, "step": 6007 }, { "epoch": 0.8122348964934516, "grad_norm": 0.9616193771362305, "learning_rate": 2.917257980099535e-06, "loss": 0.16252517700195312, "step": 6008 }, { "epoch": 0.8123700887198986, "grad_norm": 0.6918960809707642, "learning_rate": 2.913203408837629e-06, "loss": 0.16309165954589844, "step": 6009 }, { "epoch": 0.8125052809463456, "grad_norm": 0.8309774994850159, "learning_rate": 2.909151354111232e-06, "loss": 0.1505732536315918, "step": 6010 }, { "epoch": 0.8126404731727925, "grad_norm": 1.5441323518753052, "learning_rate": 2.905101816763998e-06, "loss": 0.1932506561279297, "step": 6011 }, { "epoch": 0.8127756653992395, "grad_norm": 1.2129780054092407, "learning_rate": 2.9010547976390617e-06, "loss": 0.1472775936126709, "step": 6012 }, { "epoch": 0.8129108576256865, "grad_norm": 1.1292622089385986, "learning_rate": 2.897010297579042e-06, "loss": 0.20280075073242188, "step": 6013 }, { "epoch": 0.8130460498521335, "grad_norm": 0.9956125617027283, "learning_rate": 2.8929683174260133e-06, "loss": 0.1752185821533203, "step": 6014 }, { "epoch": 0.8131812420785804, "grad_norm": 1.07522714138031, "learning_rate": 2.8889288580215467e-06, "loss": 0.10916900634765625, "step": 6015 }, { "epoch": 0.8133164343050274, "grad_norm": 1.2742761373519897, "learning_rate": 2.8848919202066752e-06, "loss": 0.182281494140625, "step": 6016 }, { "epoch": 0.8134516265314744, "grad_norm": 1.8565168380737305, "learning_rate": 2.8808575048219123e-06, "loss": 0.21141576766967773, "step": 6017 }, { "epoch": 0.8135868187579214, "grad_norm": 0.964576780796051, "learning_rate": 2.8768256127072436e-06, "loss": 0.12996721267700195, "step": 6018 }, { "epoch": 0.8137220109843684, "grad_norm": 1.0616918802261353, "learning_rate": 2.872796244702128e-06, "loss": 0.14802956581115723, "step": 6019 }, { "epoch": 0.8138572032108153, "grad_norm": 0.9760144352912903, "learning_rate": 2.8687694016455075e-06, "loss": 0.19278335571289062, "step": 6020 }, { "epoch": 0.8139923954372623, "grad_norm": 0.7736731767654419, "learning_rate": 2.86474508437579e-06, "loss": 0.17914915084838867, "step": 6021 }, { "epoch": 0.8141275876637093, "grad_norm": 1.0802083015441895, "learning_rate": 2.8607232937308587e-06, "loss": 0.11943113803863525, "step": 6022 }, { "epoch": 0.8142627798901563, "grad_norm": 1.279369831085205, "learning_rate": 2.856704030548072e-06, "loss": 0.1644296646118164, "step": 6023 }, { "epoch": 0.8143979721166033, "grad_norm": 0.8400365114212036, "learning_rate": 2.8526872956642568e-06, "loss": 0.12800359725952148, "step": 6024 }, { "epoch": 0.8145331643430502, "grad_norm": 0.911558985710144, "learning_rate": 2.84867308991573e-06, "loss": 0.14480876922607422, "step": 6025 }, { "epoch": 0.8146683565694972, "grad_norm": 1.1778076887130737, "learning_rate": 2.8446614141382638e-06, "loss": 0.191680908203125, "step": 6026 }, { "epoch": 0.8148035487959442, "grad_norm": 1.4221285581588745, "learning_rate": 2.8406522691671104e-06, "loss": 0.1989307403564453, "step": 6027 }, { "epoch": 0.8149387410223912, "grad_norm": 0.9350261092185974, "learning_rate": 2.8366456558369975e-06, "loss": 0.1621246337890625, "step": 6028 }, { "epoch": 0.8150739332488381, "grad_norm": 0.7575251460075378, "learning_rate": 2.8326415749821186e-06, "loss": 0.14951801300048828, "step": 6029 }, { "epoch": 0.8152091254752851, "grad_norm": 1.2740938663482666, "learning_rate": 2.828640027436151e-06, "loss": 0.18076324462890625, "step": 6030 }, { "epoch": 0.8153443177017321, "grad_norm": 1.2639656066894531, "learning_rate": 2.824641014032235e-06, "loss": 0.16839075088500977, "step": 6031 }, { "epoch": 0.8154795099281791, "grad_norm": 1.9373390674591064, "learning_rate": 2.820644535602987e-06, "loss": 0.20013999938964844, "step": 6032 }, { "epoch": 0.8156147021546261, "grad_norm": 1.3487224578857422, "learning_rate": 2.8166505929804953e-06, "loss": 0.22852134704589844, "step": 6033 }, { "epoch": 0.815749894381073, "grad_norm": 0.8397018909454346, "learning_rate": 2.8126591869963163e-06, "loss": 0.12119770050048828, "step": 6034 }, { "epoch": 0.81588508660752, "grad_norm": 0.9708743095397949, "learning_rate": 2.8086703184814887e-06, "loss": 0.17267417907714844, "step": 6035 }, { "epoch": 0.816020278833967, "grad_norm": 0.8887884616851807, "learning_rate": 2.8046839882665134e-06, "loss": 0.19072723388671875, "step": 6036 }, { "epoch": 0.816155471060414, "grad_norm": 1.8707904815673828, "learning_rate": 2.800700197181364e-06, "loss": 0.231536865234375, "step": 6037 }, { "epoch": 0.816290663286861, "grad_norm": 1.3133318424224854, "learning_rate": 2.7967189460554876e-06, "loss": 0.13998031616210938, "step": 6038 }, { "epoch": 0.8164258555133079, "grad_norm": 1.241249680519104, "learning_rate": 2.792740235717801e-06, "loss": 0.17536038160324097, "step": 6039 }, { "epoch": 0.816561047739755, "grad_norm": 1.308943748474121, "learning_rate": 2.7887640669967e-06, "loss": 0.1636800765991211, "step": 6040 }, { "epoch": 0.816696239966202, "grad_norm": 1.4974147081375122, "learning_rate": 2.7847904407200327e-06, "loss": 0.17994928359985352, "step": 6041 }, { "epoch": 0.816831432192649, "grad_norm": 1.1783897876739502, "learning_rate": 2.7808193577151363e-06, "loss": 0.16935157775878906, "step": 6042 }, { "epoch": 0.816966624419096, "grad_norm": 1.6643469333648682, "learning_rate": 2.776850818808812e-06, "loss": 0.21500921249389648, "step": 6043 }, { "epoch": 0.8171018166455429, "grad_norm": 0.9193552732467651, "learning_rate": 2.772884824827325e-06, "loss": 0.1640491485595703, "step": 6044 }, { "epoch": 0.8172370088719899, "grad_norm": 1.05073082447052, "learning_rate": 2.768921376596429e-06, "loss": 0.17467784881591797, "step": 6045 }, { "epoch": 0.8173722010984369, "grad_norm": 3.345153570175171, "learning_rate": 2.7649604749413176e-06, "loss": 0.22596174478530884, "step": 6046 }, { "epoch": 0.8175073933248839, "grad_norm": 1.515853762626648, "learning_rate": 2.7610021206866837e-06, "loss": 0.21549415588378906, "step": 6047 }, { "epoch": 0.8176425855513308, "grad_norm": 2.041374444961548, "learning_rate": 2.757046314656676e-06, "loss": 0.21407747268676758, "step": 6048 }, { "epoch": 0.8177777777777778, "grad_norm": 1.1523512601852417, "learning_rate": 2.753093057674909e-06, "loss": 0.2127547264099121, "step": 6049 }, { "epoch": 0.8179129700042248, "grad_norm": 1.790549397468567, "learning_rate": 2.749142350564483e-06, "loss": 0.18041372299194336, "step": 6050 }, { "epoch": 0.8180481622306718, "grad_norm": 0.684222936630249, "learning_rate": 2.7451941941479414e-06, "loss": 0.13886356353759766, "step": 6051 }, { "epoch": 0.8181833544571188, "grad_norm": 2.2031030654907227, "learning_rate": 2.741248589247323e-06, "loss": 0.1802968978881836, "step": 6052 }, { "epoch": 0.8183185466835657, "grad_norm": 0.9762936234474182, "learning_rate": 2.73730553668412e-06, "loss": 0.1410665512084961, "step": 6053 }, { "epoch": 0.8184537389100127, "grad_norm": 1.05668306350708, "learning_rate": 2.7333650372792978e-06, "loss": 0.1737685203552246, "step": 6054 }, { "epoch": 0.8185889311364597, "grad_norm": 1.5536688566207886, "learning_rate": 2.7294270918532876e-06, "loss": 0.13031005859375, "step": 6055 }, { "epoch": 0.8187241233629067, "grad_norm": 0.892826497554779, "learning_rate": 2.7254917012259882e-06, "loss": 0.12874698638916016, "step": 6056 }, { "epoch": 0.8188593155893537, "grad_norm": 0.9968985915184021, "learning_rate": 2.721558866216776e-06, "loss": 0.17169427871704102, "step": 6057 }, { "epoch": 0.8189945078158006, "grad_norm": 1.5622568130493164, "learning_rate": 2.7176285876444846e-06, "loss": 0.15861892700195312, "step": 6058 }, { "epoch": 0.8191297000422476, "grad_norm": 0.9793596863746643, "learning_rate": 2.713700866327417e-06, "loss": 0.1743389368057251, "step": 6059 }, { "epoch": 0.8192648922686946, "grad_norm": 1.0789310932159424, "learning_rate": 2.7097757030833497e-06, "loss": 0.14250314235687256, "step": 6060 }, { "epoch": 0.8194000844951416, "grad_norm": 1.404322624206543, "learning_rate": 2.705853098729517e-06, "loss": 0.16754817962646484, "step": 6061 }, { "epoch": 0.8195352767215885, "grad_norm": 0.8507217764854431, "learning_rate": 2.7019330540826325e-06, "loss": 0.15524959564208984, "step": 6062 }, { "epoch": 0.8196704689480355, "grad_norm": 1.3565703630447388, "learning_rate": 2.6980155699588666e-06, "loss": 0.19153404235839844, "step": 6063 }, { "epoch": 0.8198056611744825, "grad_norm": 1.4479483366012573, "learning_rate": 2.6941006471738633e-06, "loss": 0.1690673828125, "step": 6064 }, { "epoch": 0.8199408534009295, "grad_norm": 1.060706377029419, "learning_rate": 2.690188286542726e-06, "loss": 0.14524078369140625, "step": 6065 }, { "epoch": 0.8200760456273765, "grad_norm": 0.795319676399231, "learning_rate": 2.686278488880029e-06, "loss": 0.12415313720703125, "step": 6066 }, { "epoch": 0.8202112378538234, "grad_norm": 0.9854193329811096, "learning_rate": 2.6823712549998187e-06, "loss": 0.1701974868774414, "step": 6067 }, { "epoch": 0.8203464300802704, "grad_norm": 1.7769702672958374, "learning_rate": 2.678466585715599e-06, "loss": 0.18006324768066406, "step": 6068 }, { "epoch": 0.8204816223067174, "grad_norm": 1.349875569343567, "learning_rate": 2.6745644818403426e-06, "loss": 0.2193450927734375, "step": 6069 }, { "epoch": 0.8206168145331644, "grad_norm": 1.6767956018447876, "learning_rate": 2.6706649441864883e-06, "loss": 0.17186546325683594, "step": 6070 }, { "epoch": 0.8207520067596114, "grad_norm": 0.9098408222198486, "learning_rate": 2.666767973565937e-06, "loss": 0.15672779083251953, "step": 6071 }, { "epoch": 0.8208871989860583, "grad_norm": 0.880479097366333, "learning_rate": 2.6628735707900653e-06, "loss": 0.16680526733398438, "step": 6072 }, { "epoch": 0.8210223912125053, "grad_norm": 1.240790843963623, "learning_rate": 2.658981736669707e-06, "loss": 0.25571441650390625, "step": 6073 }, { "epoch": 0.8211575834389523, "grad_norm": 1.0422959327697754, "learning_rate": 2.655092472015161e-06, "loss": 0.15816307067871094, "step": 6074 }, { "epoch": 0.8212927756653993, "grad_norm": 1.1392889022827148, "learning_rate": 2.6512057776361935e-06, "loss": 0.20930880308151245, "step": 6075 }, { "epoch": 0.8214279678918462, "grad_norm": 1.7220994234085083, "learning_rate": 2.64732165434203e-06, "loss": 0.18173789978027344, "step": 6076 }, { "epoch": 0.8215631601182932, "grad_norm": 1.9753469228744507, "learning_rate": 2.6434401029413792e-06, "loss": 0.1540207862854004, "step": 6077 }, { "epoch": 0.8216983523447402, "grad_norm": 1.1543912887573242, "learning_rate": 2.639561124242385e-06, "loss": 0.20812129974365234, "step": 6078 }, { "epoch": 0.8218335445711872, "grad_norm": 0.6986100077629089, "learning_rate": 2.635684719052682e-06, "loss": 0.14097929000854492, "step": 6079 }, { "epoch": 0.8219687367976342, "grad_norm": 1.185673713684082, "learning_rate": 2.631810888179355e-06, "loss": 0.1335620880126953, "step": 6080 }, { "epoch": 0.8221039290240811, "grad_norm": 0.9642646908760071, "learning_rate": 2.627939632428952e-06, "loss": 0.13232135772705078, "step": 6081 }, { "epoch": 0.8222391212505281, "grad_norm": 1.2406675815582275, "learning_rate": 2.624070952607502e-06, "loss": 0.13703632354736328, "step": 6082 }, { "epoch": 0.8223743134769751, "grad_norm": 1.6374294757843018, "learning_rate": 2.620204849520468e-06, "loss": 0.18126296997070312, "step": 6083 }, { "epoch": 0.8225095057034221, "grad_norm": 1.0054970979690552, "learning_rate": 2.616341323972806e-06, "loss": 0.1741647720336914, "step": 6084 }, { "epoch": 0.822644697929869, "grad_norm": 1.076847791671753, "learning_rate": 2.612480376768917e-06, "loss": 0.1520226001739502, "step": 6085 }, { "epoch": 0.822779890156316, "grad_norm": 1.0962084531784058, "learning_rate": 2.608622008712672e-06, "loss": 0.11036491394042969, "step": 6086 }, { "epoch": 0.822915082382763, "grad_norm": 2.8500871658325195, "learning_rate": 2.6047662206074034e-06, "loss": 0.20782816410064697, "step": 6087 }, { "epoch": 0.82305027460921, "grad_norm": 1.0612248182296753, "learning_rate": 2.600913013255904e-06, "loss": 0.17698431015014648, "step": 6088 }, { "epoch": 0.823185466835657, "grad_norm": 1.733834981918335, "learning_rate": 2.59706238746044e-06, "loss": 0.1436328887939453, "step": 6089 }, { "epoch": 0.8233206590621039, "grad_norm": 1.6506434679031372, "learning_rate": 2.593214344022725e-06, "loss": 0.23116111755371094, "step": 6090 }, { "epoch": 0.8234558512885509, "grad_norm": 1.213346242904663, "learning_rate": 2.5893688837439474e-06, "loss": 0.2115011215209961, "step": 6091 }, { "epoch": 0.8235910435149979, "grad_norm": 0.8036051392555237, "learning_rate": 2.5855260074247473e-06, "loss": 0.126739501953125, "step": 6092 }, { "epoch": 0.8237262357414449, "grad_norm": 1.0845261812210083, "learning_rate": 2.581685715865232e-06, "loss": 0.1801142692565918, "step": 6093 }, { "epoch": 0.8238614279678919, "grad_norm": 1.4276034832000732, "learning_rate": 2.5778480098649766e-06, "loss": 0.2077932357788086, "step": 6094 }, { "epoch": 0.8239966201943388, "grad_norm": 1.3080353736877441, "learning_rate": 2.5740128902230087e-06, "loss": 0.1738567352294922, "step": 6095 }, { "epoch": 0.8241318124207858, "grad_norm": 1.7174516916275024, "learning_rate": 2.5701803577378214e-06, "loss": 0.18425846099853516, "step": 6096 }, { "epoch": 0.8242670046472328, "grad_norm": 0.8913487792015076, "learning_rate": 2.566350413207366e-06, "loss": 0.16863059997558594, "step": 6097 }, { "epoch": 0.8244021968736798, "grad_norm": 0.9576337933540344, "learning_rate": 2.5625230574290554e-06, "loss": 0.16823863983154297, "step": 6098 }, { "epoch": 0.8245373891001267, "grad_norm": 0.8997681140899658, "learning_rate": 2.558698291199773e-06, "loss": 0.15287399291992188, "step": 6099 }, { "epoch": 0.8246725813265737, "grad_norm": 1.208045244216919, "learning_rate": 2.5548761153158524e-06, "loss": 0.17196178436279297, "step": 6100 }, { "epoch": 0.8248077735530207, "grad_norm": 0.7089408040046692, "learning_rate": 2.55105653057309e-06, "loss": 0.1575608253479004, "step": 6101 }, { "epoch": 0.8249429657794677, "grad_norm": 1.524511456489563, "learning_rate": 2.547239537766743e-06, "loss": 0.17682933807373047, "step": 6102 }, { "epoch": 0.8250781580059147, "grad_norm": 0.8569207787513733, "learning_rate": 2.543425137691526e-06, "loss": 0.15643692016601562, "step": 6103 }, { "epoch": 0.8252133502323616, "grad_norm": 1.1182936429977417, "learning_rate": 2.5396133311416264e-06, "loss": 0.18325233459472656, "step": 6104 }, { "epoch": 0.8253485424588086, "grad_norm": 0.9564006328582764, "learning_rate": 2.5358041189106784e-06, "loss": 0.17651939392089844, "step": 6105 }, { "epoch": 0.8254837346852556, "grad_norm": 0.8630929589271545, "learning_rate": 2.531997501791779e-06, "loss": 0.15137434005737305, "step": 6106 }, { "epoch": 0.8256189269117026, "grad_norm": 0.90911465883255, "learning_rate": 2.528193480577489e-06, "loss": 0.11822032928466797, "step": 6107 }, { "epoch": 0.8257541191381496, "grad_norm": 0.8955482244491577, "learning_rate": 2.5243920560598186e-06, "loss": 0.1212911605834961, "step": 6108 }, { "epoch": 0.8258893113645965, "grad_norm": 2.1968774795532227, "learning_rate": 2.5205932290302598e-06, "loss": 0.22354888916015625, "step": 6109 }, { "epoch": 0.8260245035910435, "grad_norm": 0.7046879529953003, "learning_rate": 2.516797000279729e-06, "loss": 0.10719013214111328, "step": 6110 }, { "epoch": 0.8261596958174905, "grad_norm": 1.467839002609253, "learning_rate": 2.513003370598637e-06, "loss": 0.2000129222869873, "step": 6111 }, { "epoch": 0.8262948880439375, "grad_norm": 0.7647542953491211, "learning_rate": 2.509212340776832e-06, "loss": 0.0921945571899414, "step": 6112 }, { "epoch": 0.8264300802703844, "grad_norm": 1.3513394594192505, "learning_rate": 2.505423911603622e-06, "loss": 0.1749129295349121, "step": 6113 }, { "epoch": 0.8265652724968314, "grad_norm": 0.861473560333252, "learning_rate": 2.501638083867789e-06, "loss": 0.14263617992401123, "step": 6114 }, { "epoch": 0.8267004647232784, "grad_norm": 0.88294517993927, "learning_rate": 2.497854858357552e-06, "loss": 0.14968490600585938, "step": 6115 }, { "epoch": 0.8268356569497254, "grad_norm": 2.7444021701812744, "learning_rate": 2.494074235860604e-06, "loss": 0.17145919799804688, "step": 6116 }, { "epoch": 0.8269708491761724, "grad_norm": 0.8517177700996399, "learning_rate": 2.4902962171640913e-06, "loss": 0.14659547805786133, "step": 6117 }, { "epoch": 0.8271060414026193, "grad_norm": 0.9189021587371826, "learning_rate": 2.4865208030546167e-06, "loss": 0.15937137603759766, "step": 6118 }, { "epoch": 0.8272412336290663, "grad_norm": 0.9920191764831543, "learning_rate": 2.482747994318239e-06, "loss": 0.11202430725097656, "step": 6119 }, { "epoch": 0.8273764258555133, "grad_norm": 1.146591305732727, "learning_rate": 2.478977791740477e-06, "loss": 0.12406206130981445, "step": 6120 }, { "epoch": 0.8275116180819603, "grad_norm": 1.4638158082962036, "learning_rate": 2.475210196106313e-06, "loss": 0.2104511260986328, "step": 6121 }, { "epoch": 0.8276468103084073, "grad_norm": 1.3976386785507202, "learning_rate": 2.4714452082001753e-06, "loss": 0.14893722534179688, "step": 6122 }, { "epoch": 0.8277820025348542, "grad_norm": 1.0303772687911987, "learning_rate": 2.467682828805956e-06, "loss": 0.1817913055419922, "step": 6123 }, { "epoch": 0.8279171947613012, "grad_norm": 1.6236698627471924, "learning_rate": 2.4639230587070017e-06, "loss": 0.146897554397583, "step": 6124 }, { "epoch": 0.8280523869877482, "grad_norm": 1.711416482925415, "learning_rate": 2.460165898686114e-06, "loss": 0.18636691570281982, "step": 6125 }, { "epoch": 0.8281875792141952, "grad_norm": 3.414409875869751, "learning_rate": 2.4564113495255597e-06, "loss": 0.2081054449081421, "step": 6126 }, { "epoch": 0.8283227714406421, "grad_norm": 1.0230519771575928, "learning_rate": 2.4526594120070545e-06, "loss": 0.17973709106445312, "step": 6127 }, { "epoch": 0.8284579636670891, "grad_norm": 0.8750633597373962, "learning_rate": 2.4489100869117686e-06, "loss": 0.13215065002441406, "step": 6128 }, { "epoch": 0.8285931558935361, "grad_norm": 1.528674602508545, "learning_rate": 2.4451633750203344e-06, "loss": 0.22925639152526855, "step": 6129 }, { "epoch": 0.8287283481199831, "grad_norm": 1.441159725189209, "learning_rate": 2.441419277112831e-06, "loss": 0.19792938232421875, "step": 6130 }, { "epoch": 0.8288635403464301, "grad_norm": 1.6094666719436646, "learning_rate": 2.4376777939688107e-06, "loss": 0.2055816650390625, "step": 6131 }, { "epoch": 0.828998732572877, "grad_norm": 1.185416340827942, "learning_rate": 2.4339389263672625e-06, "loss": 0.18233442306518555, "step": 6132 }, { "epoch": 0.829133924799324, "grad_norm": 0.498168021440506, "learning_rate": 2.4302026750866406e-06, "loss": 0.09771871566772461, "step": 6133 }, { "epoch": 0.829269117025771, "grad_norm": 1.0136938095092773, "learning_rate": 2.4264690409048517e-06, "loss": 0.12264084815979004, "step": 6134 }, { "epoch": 0.829404309252218, "grad_norm": 1.0527483224868774, "learning_rate": 2.4227380245992555e-06, "loss": 0.15552902221679688, "step": 6135 }, { "epoch": 0.829539501478665, "grad_norm": 0.9546632170677185, "learning_rate": 2.4190096269466767e-06, "loss": 0.174957275390625, "step": 6136 }, { "epoch": 0.8296746937051119, "grad_norm": 1.0038312673568726, "learning_rate": 2.415283848723383e-06, "loss": 0.1708831787109375, "step": 6137 }, { "epoch": 0.8298098859315589, "grad_norm": 1.6940914392471313, "learning_rate": 2.411560690705101e-06, "loss": 0.22352981567382812, "step": 6138 }, { "epoch": 0.8299450781580059, "grad_norm": 1.003494143486023, "learning_rate": 2.4078401536670146e-06, "loss": 0.18477821350097656, "step": 6139 }, { "epoch": 0.8300802703844529, "grad_norm": 0.8504602313041687, "learning_rate": 2.4041222383837538e-06, "loss": 0.1630370318889618, "step": 6140 }, { "epoch": 0.8302154626108998, "grad_norm": 1.0547864437103271, "learning_rate": 2.400406945629418e-06, "loss": 0.15450400114059448, "step": 6141 }, { "epoch": 0.8303506548373468, "grad_norm": 1.3355293273925781, "learning_rate": 2.3966942761775396e-06, "loss": 0.13461029529571533, "step": 6142 }, { "epoch": 0.8304858470637938, "grad_norm": 1.4127442836761475, "learning_rate": 2.3929842308011263e-06, "loss": 0.13334119319915771, "step": 6143 }, { "epoch": 0.8306210392902408, "grad_norm": 1.4277772903442383, "learning_rate": 2.3892768102726236e-06, "loss": 0.18231201171875, "step": 6144 }, { "epoch": 0.8307562315166878, "grad_norm": 1.2124049663543701, "learning_rate": 2.3855720153639344e-06, "loss": 0.1561412811279297, "step": 6145 }, { "epoch": 0.8308914237431347, "grad_norm": 1.3672864437103271, "learning_rate": 2.381869846846428e-06, "loss": 0.22016239166259766, "step": 6146 }, { "epoch": 0.8310266159695817, "grad_norm": 1.3312278985977173, "learning_rate": 2.3781703054908993e-06, "loss": 0.16948366165161133, "step": 6147 }, { "epoch": 0.8311618081960287, "grad_norm": 2.371581792831421, "learning_rate": 2.374473392067624e-06, "loss": 0.23529160022735596, "step": 6148 }, { "epoch": 0.8312970004224757, "grad_norm": 1.0126550197601318, "learning_rate": 2.370779107346317e-06, "loss": 0.1361989974975586, "step": 6149 }, { "epoch": 0.8314321926489227, "grad_norm": 1.1454592943191528, "learning_rate": 2.3670874520961437e-06, "loss": 0.1357412338256836, "step": 6150 }, { "epoch": 0.8315673848753696, "grad_norm": 1.0357590913772583, "learning_rate": 2.3633984270857367e-06, "loss": 0.1480344831943512, "step": 6151 }, { "epoch": 0.8317025771018166, "grad_norm": 1.1969467401504517, "learning_rate": 2.359712033083156e-06, "loss": 0.15659022331237793, "step": 6152 }, { "epoch": 0.8318377693282636, "grad_norm": 1.244457721710205, "learning_rate": 2.35602827085594e-06, "loss": 0.18448418378829956, "step": 6153 }, { "epoch": 0.8319729615547106, "grad_norm": 1.0550355911254883, "learning_rate": 2.3523471411710644e-06, "loss": 0.18457984924316406, "step": 6154 }, { "epoch": 0.8321081537811575, "grad_norm": 1.0347785949707031, "learning_rate": 2.3486686447949585e-06, "loss": 0.15082788467407227, "step": 6155 }, { "epoch": 0.8322433460076045, "grad_norm": 2.2821013927459717, "learning_rate": 2.3449927824935075e-06, "loss": 0.1829671859741211, "step": 6156 }, { "epoch": 0.8323785382340515, "grad_norm": 0.9535189270973206, "learning_rate": 2.3413195550320393e-06, "loss": 0.18484878540039062, "step": 6157 }, { "epoch": 0.8325137304604985, "grad_norm": 1.0555816888809204, "learning_rate": 2.3376489631753474e-06, "loss": 0.15626144409179688, "step": 6158 }, { "epoch": 0.8326489226869455, "grad_norm": 0.7003810405731201, "learning_rate": 2.3339810076876665e-06, "loss": 0.11058981716632843, "step": 6159 }, { "epoch": 0.8327841149133924, "grad_norm": 1.2119359970092773, "learning_rate": 2.3303156893326815e-06, "loss": 0.1506023406982422, "step": 6160 }, { "epoch": 0.8329193071398394, "grad_norm": 1.0864394903182983, "learning_rate": 2.326653008873535e-06, "loss": 0.19234848022460938, "step": 6161 }, { "epoch": 0.8330544993662864, "grad_norm": 0.9930852055549622, "learning_rate": 2.3229929670728085e-06, "loss": 0.14966583251953125, "step": 6162 }, { "epoch": 0.8331896915927334, "grad_norm": 2.0420889854431152, "learning_rate": 2.319335564692554e-06, "loss": 0.19245147705078125, "step": 6163 }, { "epoch": 0.8333248838191804, "grad_norm": 0.7486175298690796, "learning_rate": 2.315680802494256e-06, "loss": 0.1457509994506836, "step": 6164 }, { "epoch": 0.8334600760456273, "grad_norm": 1.3638601303100586, "learning_rate": 2.312028681238856e-06, "loss": 0.19734573364257812, "step": 6165 }, { "epoch": 0.8335952682720743, "grad_norm": 2.4501829147338867, "learning_rate": 2.3083792016867434e-06, "loss": 0.1871786117553711, "step": 6166 }, { "epoch": 0.8337304604985213, "grad_norm": 1.2597883939743042, "learning_rate": 2.304732364597759e-06, "loss": 0.09703421592712402, "step": 6167 }, { "epoch": 0.8338656527249683, "grad_norm": 2.0859267711639404, "learning_rate": 2.3010881707311994e-06, "loss": 0.20683956146240234, "step": 6168 }, { "epoch": 0.8340008449514152, "grad_norm": 1.5158740282058716, "learning_rate": 2.2974466208458017e-06, "loss": 0.1488513946533203, "step": 6169 }, { "epoch": 0.8341360371778622, "grad_norm": 1.1211365461349487, "learning_rate": 2.293807715699755e-06, "loss": 0.180938720703125, "step": 6170 }, { "epoch": 0.8342712294043092, "grad_norm": 1.8995894193649292, "learning_rate": 2.2901714560507e-06, "loss": 0.22339916229248047, "step": 6171 }, { "epoch": 0.8344064216307562, "grad_norm": 0.8575047850608826, "learning_rate": 2.286537842655722e-06, "loss": 0.14801931381225586, "step": 6172 }, { "epoch": 0.8345416138572032, "grad_norm": 0.933614194393158, "learning_rate": 2.2829068762713633e-06, "loss": 0.15410709381103516, "step": 6173 }, { "epoch": 0.8346768060836501, "grad_norm": 1.2840702533721924, "learning_rate": 2.279278557653611e-06, "loss": 0.22905540466308594, "step": 6174 }, { "epoch": 0.8348119983100971, "grad_norm": 2.2746710777282715, "learning_rate": 2.2756528875578965e-06, "loss": 0.24266767501831055, "step": 6175 }, { "epoch": 0.8349471905365442, "grad_norm": 1.6231721639633179, "learning_rate": 2.2720298667391067e-06, "loss": 0.15422916412353516, "step": 6176 }, { "epoch": 0.8350823827629912, "grad_norm": 1.193681240081787, "learning_rate": 2.268409495951568e-06, "loss": 0.23371315002441406, "step": 6177 }, { "epoch": 0.8352175749894382, "grad_norm": 0.7155642509460449, "learning_rate": 2.2647917759490723e-06, "loss": 0.1354689598083496, "step": 6178 }, { "epoch": 0.8353527672158851, "grad_norm": 1.0123320817947388, "learning_rate": 2.261176707484834e-06, "loss": 0.11757993698120117, "step": 6179 }, { "epoch": 0.8354879594423321, "grad_norm": 2.0679335594177246, "learning_rate": 2.2575642913115408e-06, "loss": 0.2002277374267578, "step": 6180 }, { "epoch": 0.8356231516687791, "grad_norm": 1.0954053401947021, "learning_rate": 2.253954528181313e-06, "loss": 0.20328426361083984, "step": 6181 }, { "epoch": 0.8357583438952261, "grad_norm": 0.945042610168457, "learning_rate": 2.2503474188457206e-06, "loss": 0.20763778686523438, "step": 6182 }, { "epoch": 0.835893536121673, "grad_norm": 1.3575836420059204, "learning_rate": 2.2467429640557903e-06, "loss": 0.1425628662109375, "step": 6183 }, { "epoch": 0.83602872834812, "grad_norm": 0.8715459108352661, "learning_rate": 2.2431411645619776e-06, "loss": 0.14175176620483398, "step": 6184 }, { "epoch": 0.836163920574567, "grad_norm": 1.1249775886535645, "learning_rate": 2.239542021114205e-06, "loss": 0.21918106079101562, "step": 6185 }, { "epoch": 0.836299112801014, "grad_norm": 1.7404025793075562, "learning_rate": 2.2359455344618306e-06, "loss": 0.20702362060546875, "step": 6186 }, { "epoch": 0.836434305027461, "grad_norm": 1.311781406402588, "learning_rate": 2.232351705353663e-06, "loss": 0.2141704559326172, "step": 6187 }, { "epoch": 0.836569497253908, "grad_norm": 1.3623697757720947, "learning_rate": 2.228760534537955e-06, "loss": 0.18335247039794922, "step": 6188 }, { "epoch": 0.8367046894803549, "grad_norm": 1.0828830003738403, "learning_rate": 2.2251720227624044e-06, "loss": 0.17909908294677734, "step": 6189 }, { "epoch": 0.8368398817068019, "grad_norm": 1.3389908075332642, "learning_rate": 2.2215861707741666e-06, "loss": 0.2291707992553711, "step": 6190 }, { "epoch": 0.8369750739332489, "grad_norm": 1.6758027076721191, "learning_rate": 2.2180029793198313e-06, "loss": 0.1393446922302246, "step": 6191 }, { "epoch": 0.8371102661596959, "grad_norm": 1.672338604927063, "learning_rate": 2.2144224491454363e-06, "loss": 0.14776533842086792, "step": 6192 }, { "epoch": 0.8372454583861428, "grad_norm": 1.1904263496398926, "learning_rate": 2.2108445809964695e-06, "loss": 0.15963387489318848, "step": 6193 }, { "epoch": 0.8373806506125898, "grad_norm": 1.2583891153335571, "learning_rate": 2.2072693756178567e-06, "loss": 0.1912822723388672, "step": 6194 }, { "epoch": 0.8375158428390368, "grad_norm": 0.9383134245872498, "learning_rate": 2.203696833753983e-06, "loss": 0.20670700073242188, "step": 6195 }, { "epoch": 0.8376510350654838, "grad_norm": 1.3985265493392944, "learning_rate": 2.200126956148668e-06, "loss": 0.18425893783569336, "step": 6196 }, { "epoch": 0.8377862272919308, "grad_norm": 0.9472099542617798, "learning_rate": 2.196559743545177e-06, "loss": 0.17144203186035156, "step": 6197 }, { "epoch": 0.8379214195183777, "grad_norm": 1.364683747291565, "learning_rate": 2.1929951966862233e-06, "loss": 0.18219280242919922, "step": 6198 }, { "epoch": 0.8380566117448247, "grad_norm": 1.1948028802871704, "learning_rate": 2.1894333163139607e-06, "loss": 0.2162628173828125, "step": 6199 }, { "epoch": 0.8381918039712717, "grad_norm": 1.3858624696731567, "learning_rate": 2.1858741031700015e-06, "loss": 0.18250656127929688, "step": 6200 }, { "epoch": 0.8383269961977187, "grad_norm": 0.5892735719680786, "learning_rate": 2.1823175579953856e-06, "loss": 0.10857677459716797, "step": 6201 }, { "epoch": 0.8384621884241656, "grad_norm": 1.2920548915863037, "learning_rate": 2.1787636815306065e-06, "loss": 0.19202709197998047, "step": 6202 }, { "epoch": 0.8385973806506126, "grad_norm": 1.199432611465454, "learning_rate": 2.1752124745156005e-06, "loss": 0.21104812622070312, "step": 6203 }, { "epoch": 0.8387325728770596, "grad_norm": 0.9232925176620483, "learning_rate": 2.171663937689744e-06, "loss": 0.14895915985107422, "step": 6204 }, { "epoch": 0.8388677651035066, "grad_norm": 0.7447475790977478, "learning_rate": 2.168118071791868e-06, "loss": 0.14146804809570312, "step": 6205 }, { "epoch": 0.8390029573299536, "grad_norm": 1.361911654472351, "learning_rate": 2.164574877560237e-06, "loss": 0.21272659301757812, "step": 6206 }, { "epoch": 0.8391381495564005, "grad_norm": 1.462789535522461, "learning_rate": 2.161034355732564e-06, "loss": 0.19586515426635742, "step": 6207 }, { "epoch": 0.8392733417828475, "grad_norm": 0.9570719003677368, "learning_rate": 2.1574965070460047e-06, "loss": 0.1771411895751953, "step": 6208 }, { "epoch": 0.8394085340092945, "grad_norm": 1.6840550899505615, "learning_rate": 2.1539613322371527e-06, "loss": 0.16612529754638672, "step": 6209 }, { "epoch": 0.8395437262357415, "grad_norm": 0.851108193397522, "learning_rate": 2.1504288320420613e-06, "loss": 0.1774444580078125, "step": 6210 }, { "epoch": 0.8396789184621885, "grad_norm": 1.069047451019287, "learning_rate": 2.1468990071962038e-06, "loss": 0.1613612174987793, "step": 6211 }, { "epoch": 0.8398141106886354, "grad_norm": 0.6757184267044067, "learning_rate": 2.143371858434515e-06, "loss": 0.1392993927001953, "step": 6212 }, { "epoch": 0.8399493029150824, "grad_norm": 1.664408564567566, "learning_rate": 2.139847386491367e-06, "loss": 0.17747879028320312, "step": 6213 }, { "epoch": 0.8400844951415294, "grad_norm": 2.1912841796875, "learning_rate": 2.1363255921005685e-06, "loss": 0.19891071319580078, "step": 6214 }, { "epoch": 0.8402196873679764, "grad_norm": 1.742181658744812, "learning_rate": 2.1328064759953853e-06, "loss": 0.1901998519897461, "step": 6215 }, { "epoch": 0.8403548795944233, "grad_norm": 1.0470956563949585, "learning_rate": 2.129290038908504e-06, "loss": 0.15223264694213867, "step": 6216 }, { "epoch": 0.8404900718208703, "grad_norm": 0.8628921508789062, "learning_rate": 2.1257762815720745e-06, "loss": 0.12753534317016602, "step": 6217 }, { "epoch": 0.8406252640473173, "grad_norm": 2.58061146736145, "learning_rate": 2.122265204717678e-06, "loss": 0.1978759765625, "step": 6218 }, { "epoch": 0.8407604562737643, "grad_norm": 1.2342422008514404, "learning_rate": 2.1187568090763328e-06, "loss": 0.18824481964111328, "step": 6219 }, { "epoch": 0.8408956485002113, "grad_norm": 0.5594731569290161, "learning_rate": 2.1152510953785196e-06, "loss": 0.10934829711914062, "step": 6220 }, { "epoch": 0.8410308407266582, "grad_norm": 1.1864529848098755, "learning_rate": 2.1117480643541304e-06, "loss": 0.153900146484375, "step": 6221 }, { "epoch": 0.8411660329531052, "grad_norm": 1.358526349067688, "learning_rate": 2.1082477167325275e-06, "loss": 0.16078662872314453, "step": 6222 }, { "epoch": 0.8413012251795522, "grad_norm": 1.1038748025894165, "learning_rate": 2.1047500532424968e-06, "loss": 0.17917442321777344, "step": 6223 }, { "epoch": 0.8414364174059992, "grad_norm": 1.5504013299942017, "learning_rate": 2.1012550746122705e-06, "loss": 0.19980669021606445, "step": 6224 }, { "epoch": 0.8415716096324461, "grad_norm": 1.0121597051620483, "learning_rate": 2.0977627815695217e-06, "loss": 0.15400314331054688, "step": 6225 }, { "epoch": 0.8417068018588931, "grad_norm": 1.0103116035461426, "learning_rate": 2.094273174841362e-06, "loss": 0.17439568042755127, "step": 6226 }, { "epoch": 0.8418419940853401, "grad_norm": 1.112388253211975, "learning_rate": 2.0907862551543516e-06, "loss": 0.15419578552246094, "step": 6227 }, { "epoch": 0.8419771863117871, "grad_norm": 1.314086675643921, "learning_rate": 2.087302023234485e-06, "loss": 0.13413846492767334, "step": 6228 }, { "epoch": 0.8421123785382341, "grad_norm": 0.651127278804779, "learning_rate": 2.083820479807194e-06, "loss": 0.11928582191467285, "step": 6229 }, { "epoch": 0.842247570764681, "grad_norm": 1.2742419242858887, "learning_rate": 2.0803416255973585e-06, "loss": 0.19922637939453125, "step": 6230 }, { "epoch": 0.842382762991128, "grad_norm": 1.8111321926116943, "learning_rate": 2.0768654613292887e-06, "loss": 0.16463732719421387, "step": 6231 }, { "epoch": 0.842517955217575, "grad_norm": 1.633489727973938, "learning_rate": 2.0733919877267477e-06, "loss": 0.18404459953308105, "step": 6232 }, { "epoch": 0.842653147444022, "grad_norm": 1.2448697090148926, "learning_rate": 2.0699212055129268e-06, "loss": 0.1781761646270752, "step": 6233 }, { "epoch": 0.842788339670469, "grad_norm": 1.147660493850708, "learning_rate": 2.066453115410463e-06, "loss": 0.15729331970214844, "step": 6234 }, { "epoch": 0.8429235318969159, "grad_norm": 1.0645372867584229, "learning_rate": 2.062987718141431e-06, "loss": 0.20954132080078125, "step": 6235 }, { "epoch": 0.8430587241233629, "grad_norm": 1.390980839729309, "learning_rate": 2.0595250144273423e-06, "loss": 0.13446331024169922, "step": 6236 }, { "epoch": 0.8431939163498099, "grad_norm": 0.9093831181526184, "learning_rate": 2.056065004989155e-06, "loss": 0.13841819763183594, "step": 6237 }, { "epoch": 0.8433291085762569, "grad_norm": 1.0498404502868652, "learning_rate": 2.0526076905472585e-06, "loss": 0.1700420379638672, "step": 6238 }, { "epoch": 0.8434643008027038, "grad_norm": 2.439100503921509, "learning_rate": 2.0491530718214855e-06, "loss": 0.20040130615234375, "step": 6239 }, { "epoch": 0.8435994930291508, "grad_norm": 0.948650598526001, "learning_rate": 2.0457011495311045e-06, "loss": 0.14566409587860107, "step": 6240 }, { "epoch": 0.8437346852555978, "grad_norm": 1.1841480731964111, "learning_rate": 2.0422519243948232e-06, "loss": 0.14728641510009766, "step": 6241 }, { "epoch": 0.8438698774820448, "grad_norm": 1.4580824375152588, "learning_rate": 2.0388053971307927e-06, "loss": 0.2090930938720703, "step": 6242 }, { "epoch": 0.8440050697084918, "grad_norm": 1.3309868574142456, "learning_rate": 2.0353615684565956e-06, "loss": 0.1404285430908203, "step": 6243 }, { "epoch": 0.8441402619349387, "grad_norm": 0.9833971858024597, "learning_rate": 2.0319204390892566e-06, "loss": 0.17285728454589844, "step": 6244 }, { "epoch": 0.8442754541613857, "grad_norm": 1.2245672941207886, "learning_rate": 2.0284820097452374e-06, "loss": 0.13866472244262695, "step": 6245 }, { "epoch": 0.8444106463878327, "grad_norm": 0.6681135296821594, "learning_rate": 2.02504628114043e-06, "loss": 0.1077723503112793, "step": 6246 }, { "epoch": 0.8445458386142797, "grad_norm": 0.888547420501709, "learning_rate": 2.0216132539901865e-06, "loss": 0.16910552978515625, "step": 6247 }, { "epoch": 0.8446810308407267, "grad_norm": 1.6373014450073242, "learning_rate": 2.0181829290092663e-06, "loss": 0.17827892303466797, "step": 6248 }, { "epoch": 0.8448162230671736, "grad_norm": 1.2827008962631226, "learning_rate": 2.014755306911891e-06, "loss": 0.1621088981628418, "step": 6249 }, { "epoch": 0.8449514152936206, "grad_norm": 1.2814399003982544, "learning_rate": 2.0113303884117057e-06, "loss": 0.1277916431427002, "step": 6250 }, { "epoch": 0.8450866075200676, "grad_norm": 0.7529605627059937, "learning_rate": 2.0079081742217957e-06, "loss": 0.11689400672912598, "step": 6251 }, { "epoch": 0.8452217997465146, "grad_norm": 1.8506730794906616, "learning_rate": 2.0044886650546915e-06, "loss": 0.17331218719482422, "step": 6252 }, { "epoch": 0.8453569919729615, "grad_norm": 1.1317185163497925, "learning_rate": 2.0010718616223406e-06, "loss": 0.17982101440429688, "step": 6253 }, { "epoch": 0.8454921841994085, "grad_norm": 1.1462839841842651, "learning_rate": 1.9976577646361514e-06, "loss": 0.14794492721557617, "step": 6254 }, { "epoch": 0.8456273764258555, "grad_norm": 0.7859115600585938, "learning_rate": 1.994246374806953e-06, "loss": 0.1321239471435547, "step": 6255 }, { "epoch": 0.8457625686523025, "grad_norm": 2.170652389526367, "learning_rate": 1.9908376928450128e-06, "loss": 0.1707468032836914, "step": 6256 }, { "epoch": 0.8458977608787495, "grad_norm": 0.9859130382537842, "learning_rate": 1.987431719460039e-06, "loss": 0.15401244163513184, "step": 6257 }, { "epoch": 0.8460329531051964, "grad_norm": 1.1299784183502197, "learning_rate": 1.9840284553611706e-06, "loss": 0.14007854461669922, "step": 6258 }, { "epoch": 0.8461681453316434, "grad_norm": 1.0514180660247803, "learning_rate": 1.980627901256989e-06, "loss": 0.17079877853393555, "step": 6259 }, { "epoch": 0.8463033375580904, "grad_norm": 0.9567099809646606, "learning_rate": 1.9772300578555062e-06, "loss": 0.18895888328552246, "step": 6260 }, { "epoch": 0.8464385297845374, "grad_norm": 1.3666324615478516, "learning_rate": 1.973834925864172e-06, "loss": 0.18380475044250488, "step": 6261 }, { "epoch": 0.8465737220109844, "grad_norm": 1.0582427978515625, "learning_rate": 1.97044250598987e-06, "loss": 0.1831674575805664, "step": 6262 }, { "epoch": 0.8467089142374313, "grad_norm": 1.0290175676345825, "learning_rate": 1.9670527989389177e-06, "loss": 0.1715412139892578, "step": 6263 }, { "epoch": 0.8468441064638783, "grad_norm": 1.3548446893692017, "learning_rate": 1.9636658054170747e-06, "loss": 0.20255088806152344, "step": 6264 }, { "epoch": 0.8469792986903253, "grad_norm": 0.9062769412994385, "learning_rate": 1.960281526129531e-06, "loss": 0.193084716796875, "step": 6265 }, { "epoch": 0.8471144909167723, "grad_norm": 1.1642833948135376, "learning_rate": 1.9568999617809077e-06, "loss": 0.19539260864257812, "step": 6266 }, { "epoch": 0.8472496831432192, "grad_norm": 1.0204161405563354, "learning_rate": 1.9535211130752676e-06, "loss": 0.18901634216308594, "step": 6267 }, { "epoch": 0.8473848753696662, "grad_norm": 1.5740911960601807, "learning_rate": 1.950144980716101e-06, "loss": 0.2320117950439453, "step": 6268 }, { "epoch": 0.8475200675961132, "grad_norm": 0.8457884192466736, "learning_rate": 1.9467715654063444e-06, "loss": 0.15311002731323242, "step": 6269 }, { "epoch": 0.8476552598225602, "grad_norm": 1.1248319149017334, "learning_rate": 1.9434008678483532e-06, "loss": 0.1924905776977539, "step": 6270 }, { "epoch": 0.8477904520490072, "grad_norm": 1.0604660511016846, "learning_rate": 1.9400328887439295e-06, "loss": 0.1890702247619629, "step": 6271 }, { "epoch": 0.8479256442754541, "grad_norm": 2.2242255210876465, "learning_rate": 1.9366676287943038e-06, "loss": 0.2086944580078125, "step": 6272 }, { "epoch": 0.8480608365019011, "grad_norm": 1.0713787078857422, "learning_rate": 1.9333050887001337e-06, "loss": 0.16119706630706787, "step": 6273 }, { "epoch": 0.8481960287283481, "grad_norm": 1.2495838403701782, "learning_rate": 1.9299452691615293e-06, "loss": 0.11509251594543457, "step": 6274 }, { "epoch": 0.8483312209547951, "grad_norm": 1.0761128664016724, "learning_rate": 1.9265881708780182e-06, "loss": 0.18350791931152344, "step": 6275 }, { "epoch": 0.848466413181242, "grad_norm": 1.191149115562439, "learning_rate": 1.9232337945485657e-06, "loss": 0.16768741607666016, "step": 6276 }, { "epoch": 0.848601605407689, "grad_norm": 0.9965090155601501, "learning_rate": 1.91988214087157e-06, "loss": 0.1889941692352295, "step": 6277 }, { "epoch": 0.848736797634136, "grad_norm": 0.984058678150177, "learning_rate": 1.9165332105448613e-06, "loss": 0.20703887939453125, "step": 6278 }, { "epoch": 0.848871989860583, "grad_norm": 1.2002536058425903, "learning_rate": 1.913187004265715e-06, "loss": 0.21549177169799805, "step": 6279 }, { "epoch": 0.84900718208703, "grad_norm": 0.8504394888877869, "learning_rate": 1.909843522730814e-06, "loss": 0.16881728172302246, "step": 6280 }, { "epoch": 0.8491423743134769, "grad_norm": 0.7221499085426331, "learning_rate": 1.9065027666363017e-06, "loss": 0.11715841293334961, "step": 6281 }, { "epoch": 0.8492775665399239, "grad_norm": 1.7518599033355713, "learning_rate": 1.903164736677736e-06, "loss": 0.2264871597290039, "step": 6282 }, { "epoch": 0.8494127587663709, "grad_norm": 0.812059223651886, "learning_rate": 1.8998294335501082e-06, "loss": 0.15494966506958008, "step": 6283 }, { "epoch": 0.8495479509928179, "grad_norm": 0.8587742447853088, "learning_rate": 1.8964968579478592e-06, "loss": 0.15098953247070312, "step": 6284 }, { "epoch": 0.8496831432192649, "grad_norm": 0.9456426501274109, "learning_rate": 1.893167010564834e-06, "loss": 0.14714765548706055, "step": 6285 }, { "epoch": 0.8498183354457118, "grad_norm": 1.1235036849975586, "learning_rate": 1.8898398920943349e-06, "loss": 0.17379093170166016, "step": 6286 }, { "epoch": 0.8499535276721588, "grad_norm": 0.9256793260574341, "learning_rate": 1.886515503229081e-06, "loss": 0.15215396881103516, "step": 6287 }, { "epoch": 0.8500887198986058, "grad_norm": 0.6435022354125977, "learning_rate": 1.8831938446612269e-06, "loss": 0.11315274238586426, "step": 6288 }, { "epoch": 0.8502239121250528, "grad_norm": 1.5932315587997437, "learning_rate": 1.8798749170823676e-06, "loss": 0.14092445373535156, "step": 6289 }, { "epoch": 0.8503591043514998, "grad_norm": 2.051391839981079, "learning_rate": 1.8765587211835089e-06, "loss": 0.15987014770507812, "step": 6290 }, { "epoch": 0.8504942965779467, "grad_norm": 1.1069653034210205, "learning_rate": 1.8732452576551102e-06, "loss": 0.18801546096801758, "step": 6291 }, { "epoch": 0.8506294888043937, "grad_norm": 1.1818568706512451, "learning_rate": 1.8699345271870493e-06, "loss": 0.14302223920822144, "step": 6292 }, { "epoch": 0.8507646810308407, "grad_norm": 1.100877046585083, "learning_rate": 1.8666265304686387e-06, "loss": 0.14019489288330078, "step": 6293 }, { "epoch": 0.8508998732572877, "grad_norm": 1.1298385858535767, "learning_rate": 1.8633212681886203e-06, "loss": 0.15425443649291992, "step": 6294 }, { "epoch": 0.8510350654837346, "grad_norm": 1.2238614559173584, "learning_rate": 1.8600187410351621e-06, "loss": 0.18645191192626953, "step": 6295 }, { "epoch": 0.8511702577101816, "grad_norm": 0.8080711960792542, "learning_rate": 1.8567189496958776e-06, "loss": 0.14484024047851562, "step": 6296 }, { "epoch": 0.8513054499366286, "grad_norm": 0.9121830463409424, "learning_rate": 1.853421894857797e-06, "loss": 0.12390804290771484, "step": 6297 }, { "epoch": 0.8514406421630756, "grad_norm": 1.0221834182739258, "learning_rate": 1.8501275772073827e-06, "loss": 0.15497827529907227, "step": 6298 }, { "epoch": 0.8515758343895226, "grad_norm": 1.3868948221206665, "learning_rate": 1.8468359974305315e-06, "loss": 0.1701679229736328, "step": 6299 }, { "epoch": 0.8517110266159695, "grad_norm": 1.0798214673995972, "learning_rate": 1.8435471562125633e-06, "loss": 0.13428783416748047, "step": 6300 }, { "epoch": 0.8518462188424165, "grad_norm": 0.8273772597312927, "learning_rate": 1.8402610542382386e-06, "loss": 0.14809608459472656, "step": 6301 }, { "epoch": 0.8519814110688635, "grad_norm": 1.5770983695983887, "learning_rate": 1.836977692191742e-06, "loss": 0.1824333667755127, "step": 6302 }, { "epoch": 0.8521166032953105, "grad_norm": 1.2388843297958374, "learning_rate": 1.8336970707566781e-06, "loss": 0.22007465362548828, "step": 6303 }, { "epoch": 0.8522517955217574, "grad_norm": 0.9671942591667175, "learning_rate": 1.8304191906160973e-06, "loss": 0.19976234436035156, "step": 6304 }, { "epoch": 0.8523869877482044, "grad_norm": 1.1258398294448853, "learning_rate": 1.8271440524524668e-06, "loss": 0.21798467636108398, "step": 6305 }, { "epoch": 0.8525221799746514, "grad_norm": 1.3445639610290527, "learning_rate": 1.8238716569476949e-06, "loss": 0.1522226333618164, "step": 6306 }, { "epoch": 0.8526573722010984, "grad_norm": 1.0746725797653198, "learning_rate": 1.8206020047831078e-06, "loss": 0.1488351821899414, "step": 6307 }, { "epoch": 0.8527925644275454, "grad_norm": 1.4821486473083496, "learning_rate": 1.8173350966394648e-06, "loss": 0.15462207794189453, "step": 6308 }, { "epoch": 0.8529277566539923, "grad_norm": 0.8736923933029175, "learning_rate": 1.8140709331969513e-06, "loss": 0.16199684143066406, "step": 6309 }, { "epoch": 0.8530629488804393, "grad_norm": 1.3552755117416382, "learning_rate": 1.810809515135184e-06, "loss": 0.1948223114013672, "step": 6310 }, { "epoch": 0.8531981411068863, "grad_norm": 1.867465853691101, "learning_rate": 1.8075508431332111e-06, "loss": 0.18097877502441406, "step": 6311 }, { "epoch": 0.8533333333333334, "grad_norm": 0.9573332071304321, "learning_rate": 1.8042949178695034e-06, "loss": 0.11316967010498047, "step": 6312 }, { "epoch": 0.8534685255597804, "grad_norm": 0.9344730973243713, "learning_rate": 1.8010417400219636e-06, "loss": 0.15883541107177734, "step": 6313 }, { "epoch": 0.8536037177862273, "grad_norm": 1.6662367582321167, "learning_rate": 1.7977913102679167e-06, "loss": 0.1753377914428711, "step": 6314 }, { "epoch": 0.8537389100126743, "grad_norm": 1.8869081735610962, "learning_rate": 1.7945436292841193e-06, "loss": 0.19003963470458984, "step": 6315 }, { "epoch": 0.8538741022391213, "grad_norm": 1.0469653606414795, "learning_rate": 1.791298697746766e-06, "loss": 0.13212895393371582, "step": 6316 }, { "epoch": 0.8540092944655683, "grad_norm": 0.8228566646575928, "learning_rate": 1.7880565163314545e-06, "loss": 0.1853322982788086, "step": 6317 }, { "epoch": 0.8541444866920153, "grad_norm": 1.4089813232421875, "learning_rate": 1.784817085713233e-06, "loss": 0.1348482370376587, "step": 6318 }, { "epoch": 0.8542796789184622, "grad_norm": 0.9777628183364868, "learning_rate": 1.7815804065665669e-06, "loss": 0.1686868667602539, "step": 6319 }, { "epoch": 0.8544148711449092, "grad_norm": 1.4728889465332031, "learning_rate": 1.778346479565346e-06, "loss": 0.1647968292236328, "step": 6320 }, { "epoch": 0.8545500633713562, "grad_norm": 1.193688988685608, "learning_rate": 1.7751153053829011e-06, "loss": 0.1815328598022461, "step": 6321 }, { "epoch": 0.8546852555978032, "grad_norm": 1.304884672164917, "learning_rate": 1.7718868846919662e-06, "loss": 0.1832122802734375, "step": 6322 }, { "epoch": 0.8548204478242502, "grad_norm": 2.5404052734375, "learning_rate": 1.7686612181647266e-06, "loss": 0.1812753677368164, "step": 6323 }, { "epoch": 0.8549556400506971, "grad_norm": 1.100369930267334, "learning_rate": 1.7654383064727802e-06, "loss": 0.19495582580566406, "step": 6324 }, { "epoch": 0.8550908322771441, "grad_norm": 1.0349186658859253, "learning_rate": 1.762218150287152e-06, "loss": 0.11691141128540039, "step": 6325 }, { "epoch": 0.8552260245035911, "grad_norm": 1.4328306913375854, "learning_rate": 1.759000750278299e-06, "loss": 0.22269058227539062, "step": 6326 }, { "epoch": 0.8553612167300381, "grad_norm": 1.3923872709274292, "learning_rate": 1.7557861071160953e-06, "loss": 0.1661449670791626, "step": 6327 }, { "epoch": 0.855496408956485, "grad_norm": 0.6199512481689453, "learning_rate": 1.7525742214698538e-06, "loss": 0.09385907649993896, "step": 6328 }, { "epoch": 0.855631601182932, "grad_norm": 0.7496179938316345, "learning_rate": 1.7493650940083045e-06, "loss": 0.13285160064697266, "step": 6329 }, { "epoch": 0.855766793409379, "grad_norm": 0.6946294903755188, "learning_rate": 1.746158725399603e-06, "loss": 0.13773822784423828, "step": 6330 }, { "epoch": 0.855901985635826, "grad_norm": 1.548938274383545, "learning_rate": 1.7429551163113322e-06, "loss": 0.2080669403076172, "step": 6331 }, { "epoch": 0.856037177862273, "grad_norm": 1.1919760704040527, "learning_rate": 1.7397542674105e-06, "loss": 0.19496631622314453, "step": 6332 }, { "epoch": 0.8561723700887199, "grad_norm": 1.0689605474472046, "learning_rate": 1.7365561793635431e-06, "loss": 0.18716049194335938, "step": 6333 }, { "epoch": 0.8563075623151669, "grad_norm": 1.1003448963165283, "learning_rate": 1.7333608528363227e-06, "loss": 0.1591663360595703, "step": 6334 }, { "epoch": 0.8564427545416139, "grad_norm": 0.8463094830513, "learning_rate": 1.7301682884941128e-06, "loss": 0.16757440567016602, "step": 6335 }, { "epoch": 0.8565779467680609, "grad_norm": 0.9306474924087524, "learning_rate": 1.726978487001632e-06, "loss": 0.14037609100341797, "step": 6336 }, { "epoch": 0.8567131389945079, "grad_norm": 1.7993676662445068, "learning_rate": 1.7237914490230072e-06, "loss": 0.20835113525390625, "step": 6337 }, { "epoch": 0.8568483312209548, "grad_norm": 1.5667762756347656, "learning_rate": 1.7206071752218027e-06, "loss": 0.15067839622497559, "step": 6338 }, { "epoch": 0.8569835234474018, "grad_norm": 0.9301304817199707, "learning_rate": 1.7174256662610032e-06, "loss": 0.1498403549194336, "step": 6339 }, { "epoch": 0.8571187156738488, "grad_norm": 1.698898434638977, "learning_rate": 1.714246922803004e-06, "loss": 0.19235920906066895, "step": 6340 }, { "epoch": 0.8572539079002958, "grad_norm": 0.6782569885253906, "learning_rate": 1.7110709455096468e-06, "loss": 0.12676215171813965, "step": 6341 }, { "epoch": 0.8573891001267427, "grad_norm": 1.2585948705673218, "learning_rate": 1.7078977350421815e-06, "loss": 0.16432571411132812, "step": 6342 }, { "epoch": 0.8575242923531897, "grad_norm": 0.7204188704490662, "learning_rate": 1.7047272920612926e-06, "loss": 0.1386280059814453, "step": 6343 }, { "epoch": 0.8576594845796367, "grad_norm": 0.8334446549415588, "learning_rate": 1.7015596172270841e-06, "loss": 0.1485891342163086, "step": 6344 }, { "epoch": 0.8577946768060837, "grad_norm": 1.424103856086731, "learning_rate": 1.6983947111990717e-06, "loss": 0.2587437629699707, "step": 6345 }, { "epoch": 0.8579298690325307, "grad_norm": 0.9113597273826599, "learning_rate": 1.695232574636218e-06, "loss": 0.15562820434570312, "step": 6346 }, { "epoch": 0.8580650612589776, "grad_norm": 0.861763060092926, "learning_rate": 1.6920732081968882e-06, "loss": 0.1543712615966797, "step": 6347 }, { "epoch": 0.8582002534854246, "grad_norm": 1.4242202043533325, "learning_rate": 1.6889166125388878e-06, "loss": 0.1682729721069336, "step": 6348 }, { "epoch": 0.8583354457118716, "grad_norm": 0.7612265348434448, "learning_rate": 1.6857627883194277e-06, "loss": 0.1378183364868164, "step": 6349 }, { "epoch": 0.8584706379383186, "grad_norm": 1.0747435092926025, "learning_rate": 1.6826117361951577e-06, "loss": 0.19345355033874512, "step": 6350 }, { "epoch": 0.8586058301647655, "grad_norm": 1.1082537174224854, "learning_rate": 1.6794634568221412e-06, "loss": 0.1585688591003418, "step": 6351 }, { "epoch": 0.8587410223912125, "grad_norm": 0.8508347272872925, "learning_rate": 1.676317950855864e-06, "loss": 0.14651095867156982, "step": 6352 }, { "epoch": 0.8588762146176595, "grad_norm": 1.3187150955200195, "learning_rate": 1.6731752189512456e-06, "loss": 0.1440267562866211, "step": 6353 }, { "epoch": 0.8590114068441065, "grad_norm": 1.6386632919311523, "learning_rate": 1.6700352617626092e-06, "loss": 0.1860048770904541, "step": 6354 }, { "epoch": 0.8591465990705535, "grad_norm": 0.823549211025238, "learning_rate": 1.6668980799437167e-06, "loss": 0.1343517303466797, "step": 6355 }, { "epoch": 0.8592817912970004, "grad_norm": 0.6790689826011658, "learning_rate": 1.6637636741477458e-06, "loss": 0.1320209503173828, "step": 6356 }, { "epoch": 0.8594169835234474, "grad_norm": 1.2886433601379395, "learning_rate": 1.6606320450272943e-06, "loss": 0.1545734405517578, "step": 6357 }, { "epoch": 0.8595521757498944, "grad_norm": 0.7061427235603333, "learning_rate": 1.657503193234386e-06, "loss": 0.15484619140625, "step": 6358 }, { "epoch": 0.8596873679763414, "grad_norm": 0.8254783153533936, "learning_rate": 1.654377119420461e-06, "loss": 0.17655372619628906, "step": 6359 }, { "epoch": 0.8598225602027884, "grad_norm": 1.260725736618042, "learning_rate": 1.6512538242363889e-06, "loss": 0.1584911346435547, "step": 6360 }, { "epoch": 0.8599577524292353, "grad_norm": 1.5559543371200562, "learning_rate": 1.6481333083324563e-06, "loss": 0.1739358901977539, "step": 6361 }, { "epoch": 0.8600929446556823, "grad_norm": 0.6541129350662231, "learning_rate": 1.6450155723583698e-06, "loss": 0.12384319305419922, "step": 6362 }, { "epoch": 0.8602281368821293, "grad_norm": 0.8542926907539368, "learning_rate": 1.6419006169632573e-06, "loss": 0.12067985534667969, "step": 6363 }, { "epoch": 0.8603633291085763, "grad_norm": 1.2117305994033813, "learning_rate": 1.638788442795668e-06, "loss": 0.21179485321044922, "step": 6364 }, { "epoch": 0.8604985213350232, "grad_norm": 1.0029221773147583, "learning_rate": 1.6356790505035785e-06, "loss": 0.16630077362060547, "step": 6365 }, { "epoch": 0.8606337135614702, "grad_norm": 1.0357182025909424, "learning_rate": 1.6325724407343795e-06, "loss": 0.18329143524169922, "step": 6366 }, { "epoch": 0.8607689057879172, "grad_norm": 1.0740844011306763, "learning_rate": 1.6294686141348801e-06, "loss": 0.1861743927001953, "step": 6367 }, { "epoch": 0.8609040980143642, "grad_norm": 1.1387689113616943, "learning_rate": 1.626367571351317e-06, "loss": 0.17217063903808594, "step": 6368 }, { "epoch": 0.8610392902408112, "grad_norm": 1.981236219406128, "learning_rate": 1.6232693130293386e-06, "loss": 0.20763683319091797, "step": 6369 }, { "epoch": 0.8611744824672581, "grad_norm": 1.0935992002487183, "learning_rate": 1.6201738398140254e-06, "loss": 0.11727619171142578, "step": 6370 }, { "epoch": 0.8613096746937051, "grad_norm": 0.7181010246276855, "learning_rate": 1.6170811523498718e-06, "loss": 0.131159245967865, "step": 6371 }, { "epoch": 0.8614448669201521, "grad_norm": 1.4967948198318481, "learning_rate": 1.613991251280783e-06, "loss": 0.18373870849609375, "step": 6372 }, { "epoch": 0.8615800591465991, "grad_norm": 1.1380367279052734, "learning_rate": 1.6109041372501028e-06, "loss": 0.1880960464477539, "step": 6373 }, { "epoch": 0.861715251373046, "grad_norm": 1.5309343338012695, "learning_rate": 1.6078198109005766e-06, "loss": 0.1677541732788086, "step": 6374 }, { "epoch": 0.861850443599493, "grad_norm": 1.2975012063980103, "learning_rate": 1.6047382728743843e-06, "loss": 0.176544189453125, "step": 6375 }, { "epoch": 0.86198563582594, "grad_norm": 1.6805533170700073, "learning_rate": 1.6016595238131176e-06, "loss": 0.17225134372711182, "step": 6376 }, { "epoch": 0.862120828052387, "grad_norm": 1.078234314918518, "learning_rate": 1.5985835643577824e-06, "loss": 0.17897415161132812, "step": 6377 }, { "epoch": 0.862256020278834, "grad_norm": 0.7642520070075989, "learning_rate": 1.5955103951488177e-06, "loss": 0.1291799545288086, "step": 6378 }, { "epoch": 0.862391212505281, "grad_norm": 1.1915998458862305, "learning_rate": 1.5924400168260666e-06, "loss": 0.1720409393310547, "step": 6379 }, { "epoch": 0.8625264047317279, "grad_norm": 1.4987159967422485, "learning_rate": 1.5893724300288064e-06, "loss": 0.16414451599121094, "step": 6380 }, { "epoch": 0.8626615969581749, "grad_norm": 1.294561743736267, "learning_rate": 1.5863076353957196e-06, "loss": 0.1965770721435547, "step": 6381 }, { "epoch": 0.8627967891846219, "grad_norm": 1.7166186571121216, "learning_rate": 1.5832456335649104e-06, "loss": 0.21533203125, "step": 6382 }, { "epoch": 0.8629319814110689, "grad_norm": 2.1693785190582275, "learning_rate": 1.580186425173909e-06, "loss": 0.22189879417419434, "step": 6383 }, { "epoch": 0.8630671736375158, "grad_norm": 0.876650869846344, "learning_rate": 1.5771300108596543e-06, "loss": 0.1353088617324829, "step": 6384 }, { "epoch": 0.8632023658639628, "grad_norm": 1.4678584337234497, "learning_rate": 1.5740763912585171e-06, "loss": 0.15218353271484375, "step": 6385 }, { "epoch": 0.8633375580904098, "grad_norm": 3.0997366905212402, "learning_rate": 1.5710255670062657e-06, "loss": 0.18373775482177734, "step": 6386 }, { "epoch": 0.8634727503168568, "grad_norm": 1.0350871086120605, "learning_rate": 1.567977538738105e-06, "loss": 0.1682291030883789, "step": 6387 }, { "epoch": 0.8636079425433038, "grad_norm": 1.1952470541000366, "learning_rate": 1.5649323070886494e-06, "loss": 0.1619739532470703, "step": 6388 }, { "epoch": 0.8637431347697507, "grad_norm": 0.9888564348220825, "learning_rate": 1.5618898726919284e-06, "loss": 0.15361618995666504, "step": 6389 }, { "epoch": 0.8638783269961977, "grad_norm": 1.0632531642913818, "learning_rate": 1.5588502361814032e-06, "loss": 0.13387584686279297, "step": 6390 }, { "epoch": 0.8640135192226447, "grad_norm": 1.0776424407958984, "learning_rate": 1.5558133981899314e-06, "loss": 0.19541168212890625, "step": 6391 }, { "epoch": 0.8641487114490917, "grad_norm": 1.0624291896820068, "learning_rate": 1.5527793593498053e-06, "loss": 0.19646549224853516, "step": 6392 }, { "epoch": 0.8642839036755386, "grad_norm": 0.993002712726593, "learning_rate": 1.5497481202927244e-06, "loss": 0.1404862105846405, "step": 6393 }, { "epoch": 0.8644190959019856, "grad_norm": 1.8081856966018677, "learning_rate": 1.5467196816498107e-06, "loss": 0.18233346939086914, "step": 6394 }, { "epoch": 0.8645542881284326, "grad_norm": 0.9291538596153259, "learning_rate": 1.5436940440516018e-06, "loss": 0.10775184631347656, "step": 6395 }, { "epoch": 0.8646894803548796, "grad_norm": 1.2466799020767212, "learning_rate": 1.5406712081280484e-06, "loss": 0.1950855255126953, "step": 6396 }, { "epoch": 0.8648246725813266, "grad_norm": 1.6430858373641968, "learning_rate": 1.5376511745085254e-06, "loss": 0.17039108276367188, "step": 6397 }, { "epoch": 0.8649598648077735, "grad_norm": 1.1904093027114868, "learning_rate": 1.5346339438218181e-06, "loss": 0.16352558135986328, "step": 6398 }, { "epoch": 0.8650950570342205, "grad_norm": 1.0221774578094482, "learning_rate": 1.5316195166961295e-06, "loss": 0.14184236526489258, "step": 6399 }, { "epoch": 0.8652302492606675, "grad_norm": 0.7791646122932434, "learning_rate": 1.5286078937590802e-06, "loss": 0.16111791133880615, "step": 6400 }, { "epoch": 0.8653654414871145, "grad_norm": 1.0860053300857544, "learning_rate": 1.5255990756377025e-06, "loss": 0.1878659725189209, "step": 6401 }, { "epoch": 0.8655006337135615, "grad_norm": 1.0716074705123901, "learning_rate": 1.5225930629584534e-06, "loss": 0.14123046398162842, "step": 6402 }, { "epoch": 0.8656358259400084, "grad_norm": 2.6023716926574707, "learning_rate": 1.5195898563472038e-06, "loss": 0.23031234741210938, "step": 6403 }, { "epoch": 0.8657710181664554, "grad_norm": 0.9334373474121094, "learning_rate": 1.5165894564292254e-06, "loss": 0.11715030670166016, "step": 6404 }, { "epoch": 0.8659062103929024, "grad_norm": 1.602604627609253, "learning_rate": 1.5135918638292269e-06, "loss": 0.1645113229751587, "step": 6405 }, { "epoch": 0.8660414026193494, "grad_norm": 0.7436071634292603, "learning_rate": 1.5105970791713186e-06, "loss": 0.1606283187866211, "step": 6406 }, { "epoch": 0.8661765948457963, "grad_norm": 1.6294381618499756, "learning_rate": 1.5076051030790355e-06, "loss": 0.19243431091308594, "step": 6407 }, { "epoch": 0.8663117870722433, "grad_norm": 1.2523728609085083, "learning_rate": 1.5046159361753226e-06, "loss": 0.21007823944091797, "step": 6408 }, { "epoch": 0.8664469792986903, "grad_norm": 0.728074312210083, "learning_rate": 1.5016295790825336e-06, "loss": 0.13447272777557373, "step": 6409 }, { "epoch": 0.8665821715251373, "grad_norm": 1.6927303075790405, "learning_rate": 1.4986460324224493e-06, "loss": 0.17896175384521484, "step": 6410 }, { "epoch": 0.8667173637515843, "grad_norm": 1.1252261400222778, "learning_rate": 1.4956652968162582e-06, "loss": 0.17017865180969238, "step": 6411 }, { "epoch": 0.8668525559780312, "grad_norm": 0.7999898791313171, "learning_rate": 1.492687372884567e-06, "loss": 0.16033077239990234, "step": 6412 }, { "epoch": 0.8669877482044782, "grad_norm": 1.3143024444580078, "learning_rate": 1.4897122612473978e-06, "loss": 0.2348613739013672, "step": 6413 }, { "epoch": 0.8671229404309252, "grad_norm": 2.4359519481658936, "learning_rate": 1.4867399625241772e-06, "loss": 0.1888413429260254, "step": 6414 }, { "epoch": 0.8672581326573722, "grad_norm": 0.8997766971588135, "learning_rate": 1.4837704773337602e-06, "loss": 0.14679336547851562, "step": 6415 }, { "epoch": 0.8673933248838192, "grad_norm": 1.0707746744155884, "learning_rate": 1.4808038062944036e-06, "loss": 0.19666290283203125, "step": 6416 }, { "epoch": 0.8675285171102661, "grad_norm": 0.675529956817627, "learning_rate": 1.4778399500237933e-06, "loss": 0.141806960105896, "step": 6417 }, { "epoch": 0.8676637093367131, "grad_norm": 0.8770679235458374, "learning_rate": 1.4748789091390124e-06, "loss": 0.15349960327148438, "step": 6418 }, { "epoch": 0.8677989015631601, "grad_norm": 1.5107576847076416, "learning_rate": 1.471920684256563e-06, "loss": 0.18856382369995117, "step": 6419 }, { "epoch": 0.8679340937896071, "grad_norm": 0.7999342083930969, "learning_rate": 1.4689652759923721e-06, "loss": 0.14658582210540771, "step": 6420 }, { "epoch": 0.868069286016054, "grad_norm": 0.9932968020439148, "learning_rate": 1.4660126849617645e-06, "loss": 0.1845836639404297, "step": 6421 }, { "epoch": 0.868204478242501, "grad_norm": 1.276864767074585, "learning_rate": 1.4630629117794914e-06, "loss": 0.2045001983642578, "step": 6422 }, { "epoch": 0.868339670468948, "grad_norm": 0.9430343508720398, "learning_rate": 1.4601159570597033e-06, "loss": 0.16817855834960938, "step": 6423 }, { "epoch": 0.868474862695395, "grad_norm": 1.0615726709365845, "learning_rate": 1.4571718214159795e-06, "loss": 0.1956474781036377, "step": 6424 }, { "epoch": 0.868610054921842, "grad_norm": 1.1551867723464966, "learning_rate": 1.454230505461303e-06, "loss": 0.1643831729888916, "step": 6425 }, { "epoch": 0.8687452471482889, "grad_norm": 1.5699961185455322, "learning_rate": 1.4512920098080672e-06, "loss": 0.21731948852539062, "step": 6426 }, { "epoch": 0.8688804393747359, "grad_norm": 1.7338488101959229, "learning_rate": 1.4483563350680878e-06, "loss": 0.1677560806274414, "step": 6427 }, { "epoch": 0.8690156316011829, "grad_norm": 1.758483648300171, "learning_rate": 1.4454234818525824e-06, "loss": 0.2407245635986328, "step": 6428 }, { "epoch": 0.8691508238276299, "grad_norm": 1.1380667686462402, "learning_rate": 1.4424934507721926e-06, "loss": 0.15755844116210938, "step": 6429 }, { "epoch": 0.8692860160540768, "grad_norm": 1.7364165782928467, "learning_rate": 1.4395662424369622e-06, "loss": 0.19521808624267578, "step": 6430 }, { "epoch": 0.8694212082805238, "grad_norm": 0.9872238636016846, "learning_rate": 1.436641857456355e-06, "loss": 0.12627077102661133, "step": 6431 }, { "epoch": 0.8695564005069708, "grad_norm": 0.833183765411377, "learning_rate": 1.4337202964392409e-06, "loss": 0.18872451782226562, "step": 6432 }, { "epoch": 0.8696915927334178, "grad_norm": 0.7500179409980774, "learning_rate": 1.4308015599939033e-06, "loss": 0.13712453842163086, "step": 6433 }, { "epoch": 0.8698267849598648, "grad_norm": 1.0205035209655762, "learning_rate": 1.4278856487280428e-06, "loss": 0.2464752197265625, "step": 6434 }, { "epoch": 0.8699619771863117, "grad_norm": 0.7680830359458923, "learning_rate": 1.4249725632487653e-06, "loss": 0.13248729705810547, "step": 6435 }, { "epoch": 0.8700971694127587, "grad_norm": 0.9121467471122742, "learning_rate": 1.4220623041625924e-06, "loss": 0.122802734375, "step": 6436 }, { "epoch": 0.8702323616392057, "grad_norm": 0.8302256464958191, "learning_rate": 1.4191548720754527e-06, "loss": 0.13457965850830078, "step": 6437 }, { "epoch": 0.8703675538656527, "grad_norm": 1.1469273567199707, "learning_rate": 1.4162502675926887e-06, "loss": 0.1412487030029297, "step": 6438 }, { "epoch": 0.8705027460920997, "grad_norm": 1.5004311800003052, "learning_rate": 1.4133484913190596e-06, "loss": 0.2181873321533203, "step": 6439 }, { "epoch": 0.8706379383185466, "grad_norm": 0.7366278767585754, "learning_rate": 1.4104495438587295e-06, "loss": 0.13414216041564941, "step": 6440 }, { "epoch": 0.8707731305449936, "grad_norm": 0.7611256241798401, "learning_rate": 1.4075534258152667e-06, "loss": 0.12714362144470215, "step": 6441 }, { "epoch": 0.8709083227714406, "grad_norm": 0.927427351474762, "learning_rate": 1.4046601377916673e-06, "loss": 0.18413352966308594, "step": 6442 }, { "epoch": 0.8710435149978876, "grad_norm": 1.1897666454315186, "learning_rate": 1.4017696803903246e-06, "loss": 0.1521916389465332, "step": 6443 }, { "epoch": 0.8711787072243345, "grad_norm": 1.5434967279434204, "learning_rate": 1.3988820542130504e-06, "loss": 0.13733363151550293, "step": 6444 }, { "epoch": 0.8713138994507815, "grad_norm": 0.9383054375648499, "learning_rate": 1.395997259861067e-06, "loss": 0.15607070922851562, "step": 6445 }, { "epoch": 0.8714490916772285, "grad_norm": 0.9628801941871643, "learning_rate": 1.3931152979349926e-06, "loss": 0.11391592025756836, "step": 6446 }, { "epoch": 0.8715842839036756, "grad_norm": 1.1624178886413574, "learning_rate": 1.3902361690348769e-06, "loss": 0.18275737762451172, "step": 6447 }, { "epoch": 0.8717194761301226, "grad_norm": 1.4009239673614502, "learning_rate": 1.3873598737601639e-06, "loss": 0.21906280517578125, "step": 6448 }, { "epoch": 0.8718546683565696, "grad_norm": 2.043870687484741, "learning_rate": 1.3844864127097229e-06, "loss": 0.1984386444091797, "step": 6449 }, { "epoch": 0.8719898605830165, "grad_norm": 1.173125147819519, "learning_rate": 1.3816157864818151e-06, "loss": 0.18511009216308594, "step": 6450 }, { "epoch": 0.8721250528094635, "grad_norm": 2.294313669204712, "learning_rate": 1.3787479956741194e-06, "loss": 0.2332448959350586, "step": 6451 }, { "epoch": 0.8722602450359105, "grad_norm": 1.191086769104004, "learning_rate": 1.3758830408837314e-06, "loss": 0.17751431465148926, "step": 6452 }, { "epoch": 0.8723954372623575, "grad_norm": 1.0550912618637085, "learning_rate": 1.3730209227071439e-06, "loss": 0.16355609893798828, "step": 6453 }, { "epoch": 0.8725306294888044, "grad_norm": 0.7120527625083923, "learning_rate": 1.3701616417402734e-06, "loss": 0.14161574840545654, "step": 6454 }, { "epoch": 0.8726658217152514, "grad_norm": 1.025841474533081, "learning_rate": 1.367305198578429e-06, "loss": 0.17511940002441406, "step": 6455 }, { "epoch": 0.8728010139416984, "grad_norm": 0.7685655355453491, "learning_rate": 1.36445159381634e-06, "loss": 0.15527749061584473, "step": 6456 }, { "epoch": 0.8729362061681454, "grad_norm": 0.9519075751304626, "learning_rate": 1.361600828048144e-06, "loss": 0.15294861793518066, "step": 6457 }, { "epoch": 0.8730713983945924, "grad_norm": 1.369391679763794, "learning_rate": 1.3587529018673816e-06, "loss": 0.17380046844482422, "step": 6458 }, { "epoch": 0.8732065906210393, "grad_norm": 1.509925127029419, "learning_rate": 1.3559078158670152e-06, "loss": 0.14191436767578125, "step": 6459 }, { "epoch": 0.8733417828474863, "grad_norm": 0.9091055989265442, "learning_rate": 1.353065570639394e-06, "loss": 0.18527793884277344, "step": 6460 }, { "epoch": 0.8734769750739333, "grad_norm": 1.8208065032958984, "learning_rate": 1.3502261667763e-06, "loss": 0.17343950271606445, "step": 6461 }, { "epoch": 0.8736121673003803, "grad_norm": 1.1622000932693481, "learning_rate": 1.3473896048689067e-06, "loss": 0.1895294189453125, "step": 6462 }, { "epoch": 0.8737473595268272, "grad_norm": 1.209180235862732, "learning_rate": 1.3445558855078017e-06, "loss": 0.22981643676757812, "step": 6463 }, { "epoch": 0.8738825517532742, "grad_norm": 1.369468092918396, "learning_rate": 1.3417250092829814e-06, "loss": 0.1377144455909729, "step": 6464 }, { "epoch": 0.8740177439797212, "grad_norm": 0.9143245816230774, "learning_rate": 1.338896976783846e-06, "loss": 0.10422086715698242, "step": 6465 }, { "epoch": 0.8741529362061682, "grad_norm": 1.2839452028274536, "learning_rate": 1.336071788599213e-06, "loss": 0.14771032333374023, "step": 6466 }, { "epoch": 0.8742881284326152, "grad_norm": 0.6701331734657288, "learning_rate": 1.3332494453172982e-06, "loss": 0.16073846817016602, "step": 6467 }, { "epoch": 0.8744233206590621, "grad_norm": 2.085589647293091, "learning_rate": 1.3304299475257287e-06, "loss": 0.22716903686523438, "step": 6468 }, { "epoch": 0.8745585128855091, "grad_norm": 1.4576971530914307, "learning_rate": 1.3276132958115394e-06, "loss": 0.14553475379943848, "step": 6469 }, { "epoch": 0.8746937051119561, "grad_norm": 0.853722333908081, "learning_rate": 1.32479949076117e-06, "loss": 0.18283796310424805, "step": 6470 }, { "epoch": 0.8748288973384031, "grad_norm": 0.9312347173690796, "learning_rate": 1.3219885329604747e-06, "loss": 0.1799945831298828, "step": 6471 }, { "epoch": 0.8749640895648501, "grad_norm": 1.914143443107605, "learning_rate": 1.319180422994709e-06, "loss": 0.17462730407714844, "step": 6472 }, { "epoch": 0.875099281791297, "grad_norm": 0.905396580696106, "learning_rate": 1.3163751614485287e-06, "loss": 0.14744949340820312, "step": 6473 }, { "epoch": 0.875234474017744, "grad_norm": 0.9209718108177185, "learning_rate": 1.3135727489060113e-06, "loss": 0.12139225006103516, "step": 6474 }, { "epoch": 0.875369666244191, "grad_norm": 0.7193505764007568, "learning_rate": 1.3107731859506317e-06, "loss": 0.1452922821044922, "step": 6475 }, { "epoch": 0.875504858470638, "grad_norm": 1.2635722160339355, "learning_rate": 1.3079764731652772e-06, "loss": 0.15636277198791504, "step": 6476 }, { "epoch": 0.875640050697085, "grad_norm": 1.4347875118255615, "learning_rate": 1.3051826111322368e-06, "loss": 0.22640037536621094, "step": 6477 }, { "epoch": 0.8757752429235319, "grad_norm": 0.9692603945732117, "learning_rate": 1.3023916004332021e-06, "loss": 0.16889095306396484, "step": 6478 }, { "epoch": 0.8759104351499789, "grad_norm": 0.5560367703437805, "learning_rate": 1.2996034416492847e-06, "loss": 0.12021636962890625, "step": 6479 }, { "epoch": 0.8760456273764259, "grad_norm": 1.7467150688171387, "learning_rate": 1.2968181353609854e-06, "loss": 0.20093071460723877, "step": 6480 }, { "epoch": 0.8761808196028729, "grad_norm": 0.7838498950004578, "learning_rate": 1.2940356821482285e-06, "loss": 0.15776348114013672, "step": 6481 }, { "epoch": 0.8763160118293198, "grad_norm": 1.67617666721344, "learning_rate": 1.291256082590334e-06, "loss": 0.17545604705810547, "step": 6482 }, { "epoch": 0.8764512040557668, "grad_norm": 0.7530940175056458, "learning_rate": 1.2884793372660208e-06, "loss": 0.13154888153076172, "step": 6483 }, { "epoch": 0.8765863962822138, "grad_norm": 1.8822190761566162, "learning_rate": 1.285705446753433e-06, "loss": 0.14623165130615234, "step": 6484 }, { "epoch": 0.8767215885086608, "grad_norm": 1.1215318441390991, "learning_rate": 1.2829344116301e-06, "loss": 0.13733911514282227, "step": 6485 }, { "epoch": 0.8768567807351078, "grad_norm": 1.067826271057129, "learning_rate": 1.2801662324729774e-06, "loss": 0.1979503631591797, "step": 6486 }, { "epoch": 0.8769919729615547, "grad_norm": 2.797069787979126, "learning_rate": 1.2774009098584055e-06, "loss": 0.19020843505859375, "step": 6487 }, { "epoch": 0.8771271651880017, "grad_norm": 1.5129469633102417, "learning_rate": 1.274638444362139e-06, "loss": 0.1667652130126953, "step": 6488 }, { "epoch": 0.8772623574144487, "grad_norm": 1.2476168870925903, "learning_rate": 1.2718788365593443e-06, "loss": 0.20147371292114258, "step": 6489 }, { "epoch": 0.8773975496408957, "grad_norm": 0.6584613919258118, "learning_rate": 1.26912208702458e-06, "loss": 0.10057687759399414, "step": 6490 }, { "epoch": 0.8775327418673426, "grad_norm": 0.9104622006416321, "learning_rate": 1.2663681963318242e-06, "loss": 0.14842987060546875, "step": 6491 }, { "epoch": 0.8776679340937896, "grad_norm": 1.123921513557434, "learning_rate": 1.2636171650544443e-06, "loss": 0.12040328979492188, "step": 6492 }, { "epoch": 0.8778031263202366, "grad_norm": 1.5896222591400146, "learning_rate": 1.260868993765219e-06, "loss": 0.1627359390258789, "step": 6493 }, { "epoch": 0.8779383185466836, "grad_norm": 0.8061502575874329, "learning_rate": 1.258123683036339e-06, "loss": 0.14883995056152344, "step": 6494 }, { "epoch": 0.8780735107731306, "grad_norm": 1.2326322793960571, "learning_rate": 1.2553812334393872e-06, "loss": 0.1844642162322998, "step": 6495 }, { "epoch": 0.8782087029995775, "grad_norm": 0.980921745300293, "learning_rate": 1.2526416455453582e-06, "loss": 0.18453216552734375, "step": 6496 }, { "epoch": 0.8783438952260245, "grad_norm": 1.2596570253372192, "learning_rate": 1.249904919924646e-06, "loss": 0.14360570907592773, "step": 6497 }, { "epoch": 0.8784790874524715, "grad_norm": 1.179748296737671, "learning_rate": 1.2471710571470579e-06, "loss": 0.18545126914978027, "step": 6498 }, { "epoch": 0.8786142796789185, "grad_norm": 1.4292460680007935, "learning_rate": 1.2444400577817922e-06, "loss": 0.1529979705810547, "step": 6499 }, { "epoch": 0.8787494719053655, "grad_norm": 0.7834233641624451, "learning_rate": 1.2417119223974621e-06, "loss": 0.1285996437072754, "step": 6500 }, { "epoch": 0.8788846641318124, "grad_norm": 1.0200555324554443, "learning_rate": 1.2389866515620768e-06, "loss": 0.1552438735961914, "step": 6501 }, { "epoch": 0.8790198563582594, "grad_norm": 1.1155558824539185, "learning_rate": 1.2362642458430505e-06, "loss": 0.1605844497680664, "step": 6502 }, { "epoch": 0.8791550485847064, "grad_norm": 1.4928619861602783, "learning_rate": 1.2335447058072103e-06, "loss": 0.1800251007080078, "step": 6503 }, { "epoch": 0.8792902408111534, "grad_norm": 0.7532255053520203, "learning_rate": 1.230828032020771e-06, "loss": 0.15865063667297363, "step": 6504 }, { "epoch": 0.8794254330376003, "grad_norm": 1.2514050006866455, "learning_rate": 1.2281142250493638e-06, "loss": 0.18199777603149414, "step": 6505 }, { "epoch": 0.8795606252640473, "grad_norm": 1.4715584516525269, "learning_rate": 1.225403285458015e-06, "loss": 0.23967552185058594, "step": 6506 }, { "epoch": 0.8796958174904943, "grad_norm": 1.1212742328643799, "learning_rate": 1.2226952138111546e-06, "loss": 0.1686382293701172, "step": 6507 }, { "epoch": 0.8798310097169413, "grad_norm": 0.8724827170372009, "learning_rate": 1.219990010672622e-06, "loss": 0.1712191104888916, "step": 6508 }, { "epoch": 0.8799662019433883, "grad_norm": 1.4848971366882324, "learning_rate": 1.2172876766056562e-06, "loss": 0.1912059783935547, "step": 6509 }, { "epoch": 0.8801013941698352, "grad_norm": 1.0990840196609497, "learning_rate": 1.2145882121728906e-06, "loss": 0.15616416931152344, "step": 6510 }, { "epoch": 0.8802365863962822, "grad_norm": 0.9132286906242371, "learning_rate": 1.2118916179363727e-06, "loss": 0.09194135665893555, "step": 6511 }, { "epoch": 0.8803717786227292, "grad_norm": 0.6504778265953064, "learning_rate": 1.209197894457546e-06, "loss": 0.1567840576171875, "step": 6512 }, { "epoch": 0.8805069708491762, "grad_norm": 1.0683306455612183, "learning_rate": 1.2065070422972606e-06, "loss": 0.1783415675163269, "step": 6513 }, { "epoch": 0.8806421630756232, "grad_norm": 1.5259732007980347, "learning_rate": 1.2038190620157685e-06, "loss": 0.1909313201904297, "step": 6514 }, { "epoch": 0.8807773553020701, "grad_norm": 1.4287910461425781, "learning_rate": 1.2011339541727117e-06, "loss": 0.20134520530700684, "step": 6515 }, { "epoch": 0.8809125475285171, "grad_norm": 1.361433506011963, "learning_rate": 1.198451719327155e-06, "loss": 0.131264328956604, "step": 6516 }, { "epoch": 0.8810477397549641, "grad_norm": 0.9544827938079834, "learning_rate": 1.1957723580375447e-06, "loss": 0.20270538330078125, "step": 6517 }, { "epoch": 0.8811829319814111, "grad_norm": 1.1416447162628174, "learning_rate": 1.193095870861748e-06, "loss": 0.22045516967773438, "step": 6518 }, { "epoch": 0.881318124207858, "grad_norm": 1.3852561712265015, "learning_rate": 1.1904222583570156e-06, "loss": 0.14323043823242188, "step": 6519 }, { "epoch": 0.881453316434305, "grad_norm": 1.7089171409606934, "learning_rate": 1.1877515210800077e-06, "loss": 0.1660609245300293, "step": 6520 }, { "epoch": 0.881588508660752, "grad_norm": 0.8545126914978027, "learning_rate": 1.1850836595867925e-06, "loss": 0.15291047096252441, "step": 6521 }, { "epoch": 0.881723700887199, "grad_norm": 1.2741386890411377, "learning_rate": 1.1824186744328259e-06, "loss": 0.18982505798339844, "step": 6522 }, { "epoch": 0.881858893113646, "grad_norm": 0.9314596056938171, "learning_rate": 1.179756566172982e-06, "loss": 0.1536264419555664, "step": 6523 }, { "epoch": 0.8819940853400929, "grad_norm": 1.5855191946029663, "learning_rate": 1.177097335361516e-06, "loss": 0.2144012451171875, "step": 6524 }, { "epoch": 0.8821292775665399, "grad_norm": 0.6500311493873596, "learning_rate": 1.1744409825520969e-06, "loss": 0.10776782035827637, "step": 6525 }, { "epoch": 0.8822644697929869, "grad_norm": 0.8030337691307068, "learning_rate": 1.171787508297792e-06, "loss": 0.15600013732910156, "step": 6526 }, { "epoch": 0.8823996620194339, "grad_norm": 1.2796467542648315, "learning_rate": 1.1691369131510676e-06, "loss": 0.15424847602844238, "step": 6527 }, { "epoch": 0.8825348542458809, "grad_norm": 0.7039836645126343, "learning_rate": 1.1664891976637992e-06, "loss": 0.1290445327758789, "step": 6528 }, { "epoch": 0.8826700464723278, "grad_norm": 1.9667648077011108, "learning_rate": 1.1638443623872442e-06, "loss": 0.22120535373687744, "step": 6529 }, { "epoch": 0.8828052386987748, "grad_norm": 0.7460547089576721, "learning_rate": 1.1612024078720752e-06, "loss": 0.13184571266174316, "step": 6530 }, { "epoch": 0.8829404309252218, "grad_norm": 0.9625377655029297, "learning_rate": 1.1585633346683655e-06, "loss": 0.14002609252929688, "step": 6531 }, { "epoch": 0.8830756231516688, "grad_norm": 1.9665478467941284, "learning_rate": 1.155927143325579e-06, "loss": 0.16969597339630127, "step": 6532 }, { "epoch": 0.8832108153781157, "grad_norm": 1.0385044813156128, "learning_rate": 1.1532938343925887e-06, "loss": 0.17267227172851562, "step": 6533 }, { "epoch": 0.8833460076045627, "grad_norm": 1.2857882976531982, "learning_rate": 1.1506634084176587e-06, "loss": 0.162506103515625, "step": 6534 }, { "epoch": 0.8834811998310097, "grad_norm": 0.7613010406494141, "learning_rate": 1.148035865948463e-06, "loss": 0.13284587860107422, "step": 6535 }, { "epoch": 0.8836163920574567, "grad_norm": 1.1295785903930664, "learning_rate": 1.1454112075320688e-06, "loss": 0.16387176513671875, "step": 6536 }, { "epoch": 0.8837515842839037, "grad_norm": 0.8276877999305725, "learning_rate": 1.1427894337149426e-06, "loss": 0.1426563262939453, "step": 6537 }, { "epoch": 0.8838867765103506, "grad_norm": 1.4408518075942993, "learning_rate": 1.1401705450429506e-06, "loss": 0.1465773582458496, "step": 6538 }, { "epoch": 0.8840219687367976, "grad_norm": 1.148671269416809, "learning_rate": 1.1375545420613586e-06, "loss": 0.1928114891052246, "step": 6539 }, { "epoch": 0.8841571609632446, "grad_norm": 0.7789602875709534, "learning_rate": 1.1349414253148377e-06, "loss": 0.17882466316223145, "step": 6540 }, { "epoch": 0.8842923531896916, "grad_norm": 1.3094373941421509, "learning_rate": 1.1323311953474524e-06, "loss": 0.23018646240234375, "step": 6541 }, { "epoch": 0.8844275454161385, "grad_norm": 1.290117859840393, "learning_rate": 1.1297238527026582e-06, "loss": 0.20974111557006836, "step": 6542 }, { "epoch": 0.8845627376425855, "grad_norm": 0.8406637907028198, "learning_rate": 1.1271193979233258e-06, "loss": 0.14974021911621094, "step": 6543 }, { "epoch": 0.8846979298690325, "grad_norm": 1.1002711057662964, "learning_rate": 1.1245178315517113e-06, "loss": 0.19750595092773438, "step": 6544 }, { "epoch": 0.8848331220954795, "grad_norm": 0.8967341184616089, "learning_rate": 1.1219191541294798e-06, "loss": 0.13075494766235352, "step": 6545 }, { "epoch": 0.8849683143219265, "grad_norm": 0.8628976345062256, "learning_rate": 1.1193233661976887e-06, "loss": 0.1522972583770752, "step": 6546 }, { "epoch": 0.8851035065483734, "grad_norm": 2.2426846027374268, "learning_rate": 1.1167304682967904e-06, "loss": 0.22336578369140625, "step": 6547 }, { "epoch": 0.8852386987748204, "grad_norm": 0.8924522995948792, "learning_rate": 1.114140460966645e-06, "loss": 0.16767597198486328, "step": 6548 }, { "epoch": 0.8853738910012674, "grad_norm": 0.9396560788154602, "learning_rate": 1.111553344746501e-06, "loss": 0.17200660705566406, "step": 6549 }, { "epoch": 0.8855090832277144, "grad_norm": 0.8493363857269287, "learning_rate": 1.1089691201750174e-06, "loss": 0.14303302764892578, "step": 6550 }, { "epoch": 0.8856442754541614, "grad_norm": 1.7896114587783813, "learning_rate": 1.106387787790239e-06, "loss": 0.19734787940979004, "step": 6551 }, { "epoch": 0.8857794676806083, "grad_norm": 1.8230129480361938, "learning_rate": 1.1038093481296091e-06, "loss": 0.24407577514648438, "step": 6552 }, { "epoch": 0.8859146599070553, "grad_norm": 1.031961441040039, "learning_rate": 1.10123380172998e-06, "loss": 0.1483926773071289, "step": 6553 }, { "epoch": 0.8860498521335023, "grad_norm": 1.0547319650650024, "learning_rate": 1.098661149127586e-06, "loss": 0.16070556640625, "step": 6554 }, { "epoch": 0.8861850443599493, "grad_norm": 0.7637943625450134, "learning_rate": 1.0960913908580788e-06, "loss": 0.12938213348388672, "step": 6555 }, { "epoch": 0.8863202365863962, "grad_norm": 1.0752573013305664, "learning_rate": 1.0935245274564852e-06, "loss": 0.16737616062164307, "step": 6556 }, { "epoch": 0.8864554288128432, "grad_norm": 0.7607343196868896, "learning_rate": 1.0909605594572413e-06, "loss": 0.1563572883605957, "step": 6557 }, { "epoch": 0.8865906210392902, "grad_norm": 0.940025806427002, "learning_rate": 1.0883994873941816e-06, "loss": 0.17044353485107422, "step": 6558 }, { "epoch": 0.8867258132657372, "grad_norm": 1.5754863023757935, "learning_rate": 1.0858413118005345e-06, "loss": 0.1802845001220703, "step": 6559 }, { "epoch": 0.8868610054921842, "grad_norm": 0.9095927476882935, "learning_rate": 1.0832860332089288e-06, "loss": 0.1260828971862793, "step": 6560 }, { "epoch": 0.8869961977186311, "grad_norm": 2.0084259510040283, "learning_rate": 1.0807336521513828e-06, "loss": 0.17546653747558594, "step": 6561 }, { "epoch": 0.8871313899450781, "grad_norm": 1.382297158241272, "learning_rate": 1.0781841691593142e-06, "loss": 0.20831632614135742, "step": 6562 }, { "epoch": 0.8872665821715251, "grad_norm": 0.7710789442062378, "learning_rate": 1.0756375847635435e-06, "loss": 0.13666105270385742, "step": 6563 }, { "epoch": 0.8874017743979721, "grad_norm": 0.8217912912368774, "learning_rate": 1.0730938994942818e-06, "loss": 0.11766195297241211, "step": 6564 }, { "epoch": 0.887536966624419, "grad_norm": 1.270566701889038, "learning_rate": 1.070553113881137e-06, "loss": 0.11794900894165039, "step": 6565 }, { "epoch": 0.887672158850866, "grad_norm": 1.6255724430084229, "learning_rate": 1.0680152284531158e-06, "loss": 0.19935989379882812, "step": 6566 }, { "epoch": 0.887807351077313, "grad_norm": 1.008938193321228, "learning_rate": 1.0654802437386157e-06, "loss": 0.2162036895751953, "step": 6567 }, { "epoch": 0.88794254330376, "grad_norm": 2.335869312286377, "learning_rate": 1.062948160265438e-06, "loss": 0.17975997924804688, "step": 6568 }, { "epoch": 0.888077735530207, "grad_norm": 1.128427267074585, "learning_rate": 1.0604189785607772e-06, "loss": 0.17583847045898438, "step": 6569 }, { "epoch": 0.888212927756654, "grad_norm": 1.059628963470459, "learning_rate": 1.0578926991512171e-06, "loss": 0.14550495147705078, "step": 6570 }, { "epoch": 0.8883481199831009, "grad_norm": 0.9011848568916321, "learning_rate": 1.0553693225627458e-06, "loss": 0.15854835510253906, "step": 6571 }, { "epoch": 0.8884833122095479, "grad_norm": 0.9133105874061584, "learning_rate": 1.0528488493207444e-06, "loss": 0.1428055763244629, "step": 6572 }, { "epoch": 0.8886185044359949, "grad_norm": 1.562174677848816, "learning_rate": 1.0503312799499898e-06, "loss": 0.18050241470336914, "step": 6573 }, { "epoch": 0.8887536966624419, "grad_norm": 1.0124595165252686, "learning_rate": 1.0478166149746476e-06, "loss": 0.17652177810668945, "step": 6574 }, { "epoch": 0.8888888888888888, "grad_norm": 1.5915160179138184, "learning_rate": 1.0453048549182892e-06, "loss": 0.1987318992614746, "step": 6575 }, { "epoch": 0.8890240811153358, "grad_norm": 1.004435658454895, "learning_rate": 1.0427960003038744e-06, "loss": 0.14862060546875, "step": 6576 }, { "epoch": 0.8891592733417828, "grad_norm": 0.7279787659645081, "learning_rate": 1.040290051653764e-06, "loss": 0.12130200862884521, "step": 6577 }, { "epoch": 0.8892944655682298, "grad_norm": 1.5734901428222656, "learning_rate": 1.0377870094897085e-06, "loss": 0.19864702224731445, "step": 6578 }, { "epoch": 0.8894296577946768, "grad_norm": 1.0018874406814575, "learning_rate": 1.0352868743328497e-06, "loss": 0.12802064418792725, "step": 6579 }, { "epoch": 0.8895648500211237, "grad_norm": 2.0887367725372314, "learning_rate": 1.032789646703733e-06, "loss": 0.18796443939208984, "step": 6580 }, { "epoch": 0.8897000422475707, "grad_norm": 0.7076752185821533, "learning_rate": 1.0302953271222938e-06, "loss": 0.13414430618286133, "step": 6581 }, { "epoch": 0.8898352344740177, "grad_norm": 0.9663779735565186, "learning_rate": 1.0278039161078634e-06, "loss": 0.21762943267822266, "step": 6582 }, { "epoch": 0.8899704267004648, "grad_norm": 0.9245215058326721, "learning_rate": 1.0253154141791705e-06, "loss": 0.16146814823150635, "step": 6583 }, { "epoch": 0.8901056189269118, "grad_norm": 0.6266096234321594, "learning_rate": 1.0228298218543253e-06, "loss": 0.12328863143920898, "step": 6584 }, { "epoch": 0.8902408111533587, "grad_norm": 0.831498384475708, "learning_rate": 1.020347139650849e-06, "loss": 0.15243816375732422, "step": 6585 }, { "epoch": 0.8903760033798057, "grad_norm": 0.944320023059845, "learning_rate": 1.0178673680856448e-06, "loss": 0.1877889633178711, "step": 6586 }, { "epoch": 0.8905111956062527, "grad_norm": 0.9225878715515137, "learning_rate": 1.0153905076750196e-06, "loss": 0.1655750274658203, "step": 6587 }, { "epoch": 0.8906463878326997, "grad_norm": 0.7733673453330994, "learning_rate": 1.0129165589346644e-06, "loss": 0.17371702194213867, "step": 6588 }, { "epoch": 0.8907815800591466, "grad_norm": 1.5543419122695923, "learning_rate": 1.0104455223796688e-06, "loss": 0.16976165771484375, "step": 6589 }, { "epoch": 0.8909167722855936, "grad_norm": 1.9064877033233643, "learning_rate": 1.0079773985245178e-06, "loss": 0.1680774688720703, "step": 6590 }, { "epoch": 0.8910519645120406, "grad_norm": 1.4115545749664307, "learning_rate": 1.0055121878830837e-06, "loss": 0.2322063446044922, "step": 6591 }, { "epoch": 0.8911871567384876, "grad_norm": 1.4145376682281494, "learning_rate": 1.0030498909686458e-06, "loss": 0.20521163940429688, "step": 6592 }, { "epoch": 0.8913223489649346, "grad_norm": 1.4466968774795532, "learning_rate": 1.0005905082938593e-06, "loss": 0.17391395568847656, "step": 6593 }, { "epoch": 0.8914575411913815, "grad_norm": 0.7907772660255432, "learning_rate": 9.981340403707794e-07, "loss": 0.14064311981201172, "step": 6594 }, { "epoch": 0.8915927334178285, "grad_norm": 0.6483770608901978, "learning_rate": 9.956804877108638e-07, "loss": 0.08921170234680176, "step": 6595 }, { "epoch": 0.8917279256442755, "grad_norm": 1.345627784729004, "learning_rate": 9.932298508249488e-07, "loss": 0.15694713592529297, "step": 6596 }, { "epoch": 0.8918631178707225, "grad_norm": 0.9512044787406921, "learning_rate": 9.907821302232729e-07, "loss": 0.18037033081054688, "step": 6597 }, { "epoch": 0.8919983100971695, "grad_norm": 0.7375819087028503, "learning_rate": 9.883373264154633e-07, "loss": 0.15340042114257812, "step": 6598 }, { "epoch": 0.8921335023236164, "grad_norm": 1.1011661291122437, "learning_rate": 9.858954399105397e-07, "loss": 0.16709041595458984, "step": 6599 }, { "epoch": 0.8922686945500634, "grad_norm": 0.9979453682899475, "learning_rate": 9.834564712169202e-07, "loss": 0.19518184661865234, "step": 6600 }, { "epoch": 0.8924038867765104, "grad_norm": 1.0354499816894531, "learning_rate": 9.81020420842409e-07, "loss": 0.16480469703674316, "step": 6601 }, { "epoch": 0.8925390790029574, "grad_norm": 0.8557064533233643, "learning_rate": 9.785872892942033e-07, "loss": 0.15523147583007812, "step": 6602 }, { "epoch": 0.8926742712294043, "grad_norm": 0.6182724833488464, "learning_rate": 9.761570770788964e-07, "loss": 0.1368236541748047, "step": 6603 }, { "epoch": 0.8928094634558513, "grad_norm": 1.4018851518630981, "learning_rate": 9.737297847024685e-07, "loss": 0.20059490203857422, "step": 6604 }, { "epoch": 0.8929446556822983, "grad_norm": 1.0296344757080078, "learning_rate": 9.713054126702968e-07, "loss": 0.19029617309570312, "step": 6605 }, { "epoch": 0.8930798479087453, "grad_norm": 1.1691275835037231, "learning_rate": 9.688839614871497e-07, "loss": 0.1652843952178955, "step": 6606 }, { "epoch": 0.8932150401351923, "grad_norm": 1.9045733213424683, "learning_rate": 9.664654316571852e-07, "loss": 0.23373985290527344, "step": 6607 }, { "epoch": 0.8933502323616392, "grad_norm": 1.368884563446045, "learning_rate": 9.640498236839507e-07, "loss": 0.1771697998046875, "step": 6608 }, { "epoch": 0.8934854245880862, "grad_norm": 0.8743281960487366, "learning_rate": 9.616371380703953e-07, "loss": 0.18822669982910156, "step": 6609 }, { "epoch": 0.8936206168145332, "grad_norm": 0.8311845660209656, "learning_rate": 9.592273753188507e-07, "loss": 0.14622116088867188, "step": 6610 }, { "epoch": 0.8937558090409802, "grad_norm": 1.4363242387771606, "learning_rate": 9.568205359310372e-07, "loss": 0.15888357162475586, "step": 6611 }, { "epoch": 0.8938910012674272, "grad_norm": 1.1034234762191772, "learning_rate": 9.544166204080772e-07, "loss": 0.1808767318725586, "step": 6612 }, { "epoch": 0.8940261934938741, "grad_norm": 0.9408245086669922, "learning_rate": 9.520156292504739e-07, "loss": 0.1525440216064453, "step": 6613 }, { "epoch": 0.8941613857203211, "grad_norm": 2.0564417839050293, "learning_rate": 9.496175629581322e-07, "loss": 0.17179840803146362, "step": 6614 }, { "epoch": 0.8942965779467681, "grad_norm": 1.7813953161239624, "learning_rate": 9.472224220303427e-07, "loss": 0.19536590576171875, "step": 6615 }, { "epoch": 0.8944317701732151, "grad_norm": 0.8127740621566772, "learning_rate": 9.448302069657799e-07, "loss": 0.1358175277709961, "step": 6616 }, { "epoch": 0.894566962399662, "grad_norm": 1.2911267280578613, "learning_rate": 9.424409182625205e-07, "loss": 0.15685749053955078, "step": 6617 }, { "epoch": 0.894702154626109, "grad_norm": 1.1219089031219482, "learning_rate": 9.40054556418023e-07, "loss": 0.22308731079101562, "step": 6618 }, { "epoch": 0.894837346852556, "grad_norm": 1.0438556671142578, "learning_rate": 9.376711219291483e-07, "loss": 0.1798267364501953, "step": 6619 }, { "epoch": 0.894972539079003, "grad_norm": 1.3127247095108032, "learning_rate": 9.352906152921348e-07, "loss": 0.16666698455810547, "step": 6620 }, { "epoch": 0.89510773130545, "grad_norm": 0.877829909324646, "learning_rate": 9.32913037002614e-07, "loss": 0.1724621057510376, "step": 6621 }, { "epoch": 0.8952429235318969, "grad_norm": 0.7444975972175598, "learning_rate": 9.30538387555615e-07, "loss": 0.1267385482788086, "step": 6622 }, { "epoch": 0.8953781157583439, "grad_norm": 2.1655256748199463, "learning_rate": 9.281666674455508e-07, "loss": 0.2221851348876953, "step": 6623 }, { "epoch": 0.8955133079847909, "grad_norm": 1.118656873703003, "learning_rate": 9.257978771662295e-07, "loss": 0.17228031158447266, "step": 6624 }, { "epoch": 0.8956485002112379, "grad_norm": 1.2261126041412354, "learning_rate": 9.234320172108418e-07, "loss": 0.17986488342285156, "step": 6625 }, { "epoch": 0.8957836924376849, "grad_norm": 0.998991847038269, "learning_rate": 9.210690880719719e-07, "loss": 0.17502212524414062, "step": 6626 }, { "epoch": 0.8959188846641318, "grad_norm": 1.0733058452606201, "learning_rate": 9.187090902415962e-07, "loss": 0.19030380249023438, "step": 6627 }, { "epoch": 0.8960540768905788, "grad_norm": 1.1971570253372192, "learning_rate": 9.163520242110784e-07, "loss": 0.16012287139892578, "step": 6628 }, { "epoch": 0.8961892691170258, "grad_norm": 1.0565327405929565, "learning_rate": 9.13997890471176e-07, "loss": 0.1644878387451172, "step": 6629 }, { "epoch": 0.8963244613434728, "grad_norm": 0.9064598083496094, "learning_rate": 9.116466895120251e-07, "loss": 0.1627044677734375, "step": 6630 }, { "epoch": 0.8964596535699197, "grad_norm": 0.8583405017852783, "learning_rate": 9.092984218231609e-07, "loss": 0.15228843688964844, "step": 6631 }, { "epoch": 0.8965948457963667, "grad_norm": 1.5336501598358154, "learning_rate": 9.069530878935072e-07, "loss": 0.22105026245117188, "step": 6632 }, { "epoch": 0.8967300380228137, "grad_norm": 1.000156044960022, "learning_rate": 9.046106882113753e-07, "loss": 0.1772022247314453, "step": 6633 }, { "epoch": 0.8968652302492607, "grad_norm": 0.9172965288162231, "learning_rate": 9.022712232644631e-07, "loss": 0.1481032371520996, "step": 6634 }, { "epoch": 0.8970004224757077, "grad_norm": 0.7959843873977661, "learning_rate": 8.999346935398611e-07, "loss": 0.15102195739746094, "step": 6635 }, { "epoch": 0.8971356147021546, "grad_norm": 1.2164933681488037, "learning_rate": 8.976010995240436e-07, "loss": 0.14605331420898438, "step": 6636 }, { "epoch": 0.8972708069286016, "grad_norm": 1.1046253442764282, "learning_rate": 8.952704417028818e-07, "loss": 0.1568288803100586, "step": 6637 }, { "epoch": 0.8974059991550486, "grad_norm": 1.0943857431411743, "learning_rate": 8.929427205616308e-07, "loss": 0.1493082046508789, "step": 6638 }, { "epoch": 0.8975411913814956, "grad_norm": 1.0855458974838257, "learning_rate": 8.906179365849332e-07, "loss": 0.2113513946533203, "step": 6639 }, { "epoch": 0.8976763836079426, "grad_norm": 1.7264057397842407, "learning_rate": 8.882960902568216e-07, "loss": 0.19024419784545898, "step": 6640 }, { "epoch": 0.8978115758343895, "grad_norm": 1.164091944694519, "learning_rate": 8.85977182060716e-07, "loss": 0.18418312072753906, "step": 6641 }, { "epoch": 0.8979467680608365, "grad_norm": 2.2955360412597656, "learning_rate": 8.836612124794285e-07, "loss": 0.20495223999023438, "step": 6642 }, { "epoch": 0.8980819602872835, "grad_norm": 0.7050706744194031, "learning_rate": 8.813481819951502e-07, "loss": 0.15507221221923828, "step": 6643 }, { "epoch": 0.8982171525137305, "grad_norm": 0.8720549941062927, "learning_rate": 8.790380910894724e-07, "loss": 0.18284130096435547, "step": 6644 }, { "epoch": 0.8983523447401774, "grad_norm": 1.4186044931411743, "learning_rate": 8.767309402433671e-07, "loss": 0.1862649917602539, "step": 6645 }, { "epoch": 0.8984875369666244, "grad_norm": 1.249140739440918, "learning_rate": 8.744267299371917e-07, "loss": 0.22250747680664062, "step": 6646 }, { "epoch": 0.8986227291930714, "grad_norm": 1.2052971124649048, "learning_rate": 8.721254606507023e-07, "loss": 0.23142433166503906, "step": 6647 }, { "epoch": 0.8987579214195184, "grad_norm": 1.3691017627716064, "learning_rate": 8.698271328630275e-07, "loss": 0.14511680603027344, "step": 6648 }, { "epoch": 0.8988931136459654, "grad_norm": 2.337817668914795, "learning_rate": 8.675317470526961e-07, "loss": 0.20157241821289062, "step": 6649 }, { "epoch": 0.8990283058724123, "grad_norm": 1.4767838716506958, "learning_rate": 8.652393036976159e-07, "loss": 0.20978212356567383, "step": 6650 }, { "epoch": 0.8991634980988593, "grad_norm": 0.9995988607406616, "learning_rate": 8.629498032750916e-07, "loss": 0.17191696166992188, "step": 6651 }, { "epoch": 0.8992986903253063, "grad_norm": 1.2044475078582764, "learning_rate": 8.606632462618069e-07, "loss": 0.1571044921875, "step": 6652 }, { "epoch": 0.8994338825517533, "grad_norm": 1.2899765968322754, "learning_rate": 8.583796331338311e-07, "loss": 0.16674983501434326, "step": 6653 }, { "epoch": 0.8995690747782003, "grad_norm": 1.4176361560821533, "learning_rate": 8.560989643666306e-07, "loss": 0.215118408203125, "step": 6654 }, { "epoch": 0.8997042670046472, "grad_norm": 0.7933415174484253, "learning_rate": 8.538212404350471e-07, "loss": 0.1616649627685547, "step": 6655 }, { "epoch": 0.8998394592310942, "grad_norm": 1.0810478925704956, "learning_rate": 8.515464618133228e-07, "loss": 0.18869686126708984, "step": 6656 }, { "epoch": 0.8999746514575412, "grad_norm": 1.2443526983261108, "learning_rate": 8.492746289750725e-07, "loss": 0.19379818439483643, "step": 6657 }, { "epoch": 0.9001098436839882, "grad_norm": 0.8801183104515076, "learning_rate": 8.470057423933026e-07, "loss": 0.1304638385772705, "step": 6658 }, { "epoch": 0.9002450359104351, "grad_norm": 1.096293568611145, "learning_rate": 8.447398025404118e-07, "loss": 0.15602248907089233, "step": 6659 }, { "epoch": 0.9003802281368821, "grad_norm": 1.0055476427078247, "learning_rate": 8.42476809888178e-07, "loss": 0.17836523056030273, "step": 6660 }, { "epoch": 0.9005154203633291, "grad_norm": 1.208188772201538, "learning_rate": 8.402167649077725e-07, "loss": 0.17304229736328125, "step": 6661 }, { "epoch": 0.9006506125897761, "grad_norm": 1.6339181661605835, "learning_rate": 8.379596680697454e-07, "loss": 0.18050909042358398, "step": 6662 }, { "epoch": 0.9007858048162231, "grad_norm": 0.986391007900238, "learning_rate": 8.357055198440328e-07, "loss": 0.1651301383972168, "step": 6663 }, { "epoch": 0.90092099704267, "grad_norm": 1.594878077507019, "learning_rate": 8.334543206999673e-07, "loss": 0.1588139533996582, "step": 6664 }, { "epoch": 0.901056189269117, "grad_norm": 1.9142035245895386, "learning_rate": 8.312060711062558e-07, "loss": 0.23285150527954102, "step": 6665 }, { "epoch": 0.901191381495564, "grad_norm": 1.1975116729736328, "learning_rate": 8.289607715309988e-07, "loss": 0.15429186820983887, "step": 6666 }, { "epoch": 0.901326573722011, "grad_norm": 1.0255736112594604, "learning_rate": 8.267184224416791e-07, "loss": 0.18127059936523438, "step": 6667 }, { "epoch": 0.901461765948458, "grad_norm": 1.7517188787460327, "learning_rate": 8.244790243051614e-07, "loss": 0.19010353088378906, "step": 6668 }, { "epoch": 0.9015969581749049, "grad_norm": 1.193472146987915, "learning_rate": 8.222425775877079e-07, "loss": 0.2161407470703125, "step": 6669 }, { "epoch": 0.9017321504013519, "grad_norm": 1.3372970819473267, "learning_rate": 8.200090827549527e-07, "loss": 0.2160816192626953, "step": 6670 }, { "epoch": 0.9018673426277989, "grad_norm": 0.7680690288543701, "learning_rate": 8.17778540271924e-07, "loss": 0.14060938358306885, "step": 6671 }, { "epoch": 0.9020025348542459, "grad_norm": 1.1983314752578735, "learning_rate": 8.155509506030334e-07, "loss": 0.20907354354858398, "step": 6672 }, { "epoch": 0.9021377270806928, "grad_norm": 0.630081057548523, "learning_rate": 8.133263142120717e-07, "loss": 0.12068510055541992, "step": 6673 }, { "epoch": 0.9022729193071398, "grad_norm": 1.5676183700561523, "learning_rate": 8.111046315622284e-07, "loss": 0.17276239395141602, "step": 6674 }, { "epoch": 0.9024081115335868, "grad_norm": 1.4925576448440552, "learning_rate": 8.088859031160633e-07, "loss": 0.19556236267089844, "step": 6675 }, { "epoch": 0.9025433037600338, "grad_norm": 1.6685117483139038, "learning_rate": 8.066701293355288e-07, "loss": 0.18534636497497559, "step": 6676 }, { "epoch": 0.9026784959864808, "grad_norm": 1.430798888206482, "learning_rate": 8.044573106819625e-07, "loss": 0.21676015853881836, "step": 6677 }, { "epoch": 0.9028136882129277, "grad_norm": 0.7649757862091064, "learning_rate": 8.022474476160824e-07, "loss": 0.15969347953796387, "step": 6678 }, { "epoch": 0.9029488804393747, "grad_norm": 1.0281206369400024, "learning_rate": 8.000405405979988e-07, "loss": 0.14366722106933594, "step": 6679 }, { "epoch": 0.9030840726658217, "grad_norm": 1.120094895362854, "learning_rate": 7.978365900871943e-07, "loss": 0.1974048614501953, "step": 6680 }, { "epoch": 0.9032192648922687, "grad_norm": 1.030008316040039, "learning_rate": 7.956355965425482e-07, "loss": 0.1503148078918457, "step": 6681 }, { "epoch": 0.9033544571187156, "grad_norm": 1.462011694908142, "learning_rate": 7.934375604223193e-07, "loss": 0.2095794677734375, "step": 6682 }, { "epoch": 0.9034896493451626, "grad_norm": 1.064103364944458, "learning_rate": 7.912424821841463e-07, "loss": 0.1659870147705078, "step": 6683 }, { "epoch": 0.9036248415716096, "grad_norm": 0.8238223195075989, "learning_rate": 7.89050362285062e-07, "loss": 0.14467620849609375, "step": 6684 }, { "epoch": 0.9037600337980566, "grad_norm": 1.45100998878479, "learning_rate": 7.868612011814713e-07, "loss": 0.16307497024536133, "step": 6685 }, { "epoch": 0.9038952260245036, "grad_norm": 0.8868352174758911, "learning_rate": 7.846749993291746e-07, "loss": 0.16602325439453125, "step": 6686 }, { "epoch": 0.9040304182509505, "grad_norm": 1.0470459461212158, "learning_rate": 7.824917571833445e-07, "loss": 0.11875343322753906, "step": 6687 }, { "epoch": 0.9041656104773975, "grad_norm": 1.3385058641433716, "learning_rate": 7.80311475198554e-07, "loss": 0.1608753204345703, "step": 6688 }, { "epoch": 0.9043008027038445, "grad_norm": 0.8118037581443787, "learning_rate": 7.781341538287384e-07, "loss": 0.14539504051208496, "step": 6689 }, { "epoch": 0.9044359949302915, "grad_norm": 1.8691107034683228, "learning_rate": 7.759597935272316e-07, "loss": 0.14951753616333008, "step": 6690 }, { "epoch": 0.9045711871567385, "grad_norm": 1.0722888708114624, "learning_rate": 7.7378839474675e-07, "loss": 0.1387958526611328, "step": 6691 }, { "epoch": 0.9047063793831854, "grad_norm": 0.7438547611236572, "learning_rate": 7.716199579393851e-07, "loss": 0.15665017068386078, "step": 6692 }, { "epoch": 0.9048415716096324, "grad_norm": 0.7806492447853088, "learning_rate": 7.694544835566259e-07, "loss": 0.12917137145996094, "step": 6693 }, { "epoch": 0.9049767638360794, "grad_norm": 1.0045160055160522, "learning_rate": 7.672919720493249e-07, "loss": 0.18025779724121094, "step": 6694 }, { "epoch": 0.9051119560625264, "grad_norm": 0.9888438582420349, "learning_rate": 7.651324238677338e-07, "loss": 0.16955792903900146, "step": 6695 }, { "epoch": 0.9052471482889733, "grad_norm": 0.858755350112915, "learning_rate": 7.629758394614828e-07, "loss": 0.14068222045898438, "step": 6696 }, { "epoch": 0.9053823405154203, "grad_norm": 0.884871780872345, "learning_rate": 7.608222192795794e-07, "loss": 0.15554046630859375, "step": 6697 }, { "epoch": 0.9055175327418673, "grad_norm": 1.1258841753005981, "learning_rate": 7.586715637704284e-07, "loss": 0.1649169921875, "step": 6698 }, { "epoch": 0.9056527249683143, "grad_norm": 0.910371720790863, "learning_rate": 7.565238733817998e-07, "loss": 0.1685476303100586, "step": 6699 }, { "epoch": 0.9057879171947613, "grad_norm": 0.8648293614387512, "learning_rate": 7.543791485608542e-07, "loss": 0.2098095417022705, "step": 6700 }, { "epoch": 0.9059231094212082, "grad_norm": 1.2737137079238892, "learning_rate": 7.52237389754138e-07, "loss": 0.17931652069091797, "step": 6701 }, { "epoch": 0.9060583016476552, "grad_norm": 1.134995460510254, "learning_rate": 7.500985974075758e-07, "loss": 0.14721202850341797, "step": 6702 }, { "epoch": 0.9061934938741022, "grad_norm": 0.9695498943328857, "learning_rate": 7.479627719664767e-07, "loss": 0.17166423797607422, "step": 6703 }, { "epoch": 0.9063286861005492, "grad_norm": 1.0928118228912354, "learning_rate": 7.458299138755281e-07, "loss": 0.1693439483642578, "step": 6704 }, { "epoch": 0.9064638783269962, "grad_norm": 0.9881254434585571, "learning_rate": 7.437000235788033e-07, "loss": 0.12952327728271484, "step": 6705 }, { "epoch": 0.9065990705534431, "grad_norm": 1.5902758836746216, "learning_rate": 7.415731015197575e-07, "loss": 0.1794424057006836, "step": 6706 }, { "epoch": 0.9067342627798901, "grad_norm": 0.8967025876045227, "learning_rate": 7.39449148141228e-07, "loss": 0.11917352676391602, "step": 6707 }, { "epoch": 0.9068694550063371, "grad_norm": 0.9916856288909912, "learning_rate": 7.373281638854329e-07, "loss": 0.17327308654785156, "step": 6708 }, { "epoch": 0.9070046472327841, "grad_norm": 2.049166679382324, "learning_rate": 7.352101491939722e-07, "loss": 0.17474365234375, "step": 6709 }, { "epoch": 0.907139839459231, "grad_norm": 1.0560553073883057, "learning_rate": 7.330951045078249e-07, "loss": 0.14926815032958984, "step": 6710 }, { "epoch": 0.907275031685678, "grad_norm": 0.7818946838378906, "learning_rate": 7.309830302673621e-07, "loss": 0.17352962493896484, "step": 6711 }, { "epoch": 0.907410223912125, "grad_norm": 1.199280858039856, "learning_rate": 7.288739269123184e-07, "loss": 0.15593397617340088, "step": 6712 }, { "epoch": 0.907545416138572, "grad_norm": 1.6214392185211182, "learning_rate": 7.267677948818296e-07, "loss": 0.15970993041992188, "step": 6713 }, { "epoch": 0.907680608365019, "grad_norm": 0.7071395516395569, "learning_rate": 7.246646346143997e-07, "loss": 0.11743593215942383, "step": 6714 }, { "epoch": 0.9078158005914659, "grad_norm": 0.8203786015510559, "learning_rate": 7.225644465479153e-07, "loss": 0.1452326774597168, "step": 6715 }, { "epoch": 0.9079509928179129, "grad_norm": 1.303985357284546, "learning_rate": 7.204672311196547e-07, "loss": 0.1757822036743164, "step": 6716 }, { "epoch": 0.9080861850443599, "grad_norm": 1.1265748739242554, "learning_rate": 7.183729887662604e-07, "loss": 0.09971332550048828, "step": 6717 }, { "epoch": 0.9082213772708069, "grad_norm": 1.178935170173645, "learning_rate": 7.162817199237703e-07, "loss": 0.19397926330566406, "step": 6718 }, { "epoch": 0.908356569497254, "grad_norm": 1.5768262147903442, "learning_rate": 7.141934250275978e-07, "loss": 0.1585092544555664, "step": 6719 }, { "epoch": 0.9084917617237009, "grad_norm": 2.1778602600097656, "learning_rate": 7.121081045125316e-07, "loss": 0.18851089477539062, "step": 6720 }, { "epoch": 0.9086269539501479, "grad_norm": 1.0620629787445068, "learning_rate": 7.100257588127545e-07, "loss": 0.13008403778076172, "step": 6721 }, { "epoch": 0.9087621461765949, "grad_norm": 1.0338125228881836, "learning_rate": 7.079463883618148e-07, "loss": 0.1543283462524414, "step": 6722 }, { "epoch": 0.9088973384030419, "grad_norm": 1.0764707326889038, "learning_rate": 7.058699935926527e-07, "loss": 0.18457603454589844, "step": 6723 }, { "epoch": 0.9090325306294889, "grad_norm": 1.1665832996368408, "learning_rate": 7.037965749375808e-07, "loss": 0.16879498958587646, "step": 6724 }, { "epoch": 0.9091677228559358, "grad_norm": 1.127356767654419, "learning_rate": 7.017261328283037e-07, "loss": 0.16724681854248047, "step": 6725 }, { "epoch": 0.9093029150823828, "grad_norm": 1.3785279989242554, "learning_rate": 6.996586676958916e-07, "loss": 0.16535234451293945, "step": 6726 }, { "epoch": 0.9094381073088298, "grad_norm": 0.7778226733207703, "learning_rate": 6.975941799708019e-07, "loss": 0.149641752243042, "step": 6727 }, { "epoch": 0.9095732995352768, "grad_norm": 0.706840455532074, "learning_rate": 6.955326700828757e-07, "loss": 0.12535953521728516, "step": 6728 }, { "epoch": 0.9097084917617237, "grad_norm": 0.8697673082351685, "learning_rate": 6.934741384613246e-07, "loss": 0.1455981731414795, "step": 6729 }, { "epoch": 0.9098436839881707, "grad_norm": 0.8157376646995544, "learning_rate": 6.91418585534756e-07, "loss": 0.16133975982666016, "step": 6730 }, { "epoch": 0.9099788762146177, "grad_norm": 1.2191591262817383, "learning_rate": 6.893660117311373e-07, "loss": 0.22642278671264648, "step": 6731 }, { "epoch": 0.9101140684410647, "grad_norm": 0.9939918518066406, "learning_rate": 6.873164174778252e-07, "loss": 0.1807088851928711, "step": 6732 }, { "epoch": 0.9102492606675117, "grad_norm": 1.0536137819290161, "learning_rate": 6.852698032015631e-07, "loss": 0.14378440380096436, "step": 6733 }, { "epoch": 0.9103844528939586, "grad_norm": 0.7766066789627075, "learning_rate": 6.832261693284636e-07, "loss": 0.12801015377044678, "step": 6734 }, { "epoch": 0.9105196451204056, "grad_norm": 1.4431747198104858, "learning_rate": 6.811855162840214e-07, "loss": 0.16617298126220703, "step": 6735 }, { "epoch": 0.9106548373468526, "grad_norm": 2.011122941970825, "learning_rate": 6.791478444931132e-07, "loss": 0.22838783264160156, "step": 6736 }, { "epoch": 0.9107900295732996, "grad_norm": 1.3376126289367676, "learning_rate": 6.77113154379988e-07, "loss": 0.21860504150390625, "step": 6737 }, { "epoch": 0.9109252217997466, "grad_norm": 1.7054890394210815, "learning_rate": 6.75081446368287e-07, "loss": 0.17700958251953125, "step": 6738 }, { "epoch": 0.9110604140261935, "grad_norm": 1.1025233268737793, "learning_rate": 6.730527208810166e-07, "loss": 0.16390371322631836, "step": 6739 }, { "epoch": 0.9111956062526405, "grad_norm": 1.2439676523208618, "learning_rate": 6.710269783405709e-07, "loss": 0.20368516445159912, "step": 6740 }, { "epoch": 0.9113307984790875, "grad_norm": 1.4852169752120972, "learning_rate": 6.690042191687206e-07, "loss": 0.20545828342437744, "step": 6741 }, { "epoch": 0.9114659907055345, "grad_norm": 0.8305413126945496, "learning_rate": 6.669844437866124e-07, "loss": 0.15709972381591797, "step": 6742 }, { "epoch": 0.9116011829319814, "grad_norm": 1.0936546325683594, "learning_rate": 6.649676526147764e-07, "loss": 0.1474313735961914, "step": 6743 }, { "epoch": 0.9117363751584284, "grad_norm": 0.9522294998168945, "learning_rate": 6.629538460731199e-07, "loss": 0.19360923767089844, "step": 6744 }, { "epoch": 0.9118715673848754, "grad_norm": 0.8566400408744812, "learning_rate": 6.609430245809261e-07, "loss": 0.15583860874176025, "step": 6745 }, { "epoch": 0.9120067596113224, "grad_norm": 1.0703624486923218, "learning_rate": 6.589351885568617e-07, "loss": 0.15053081512451172, "step": 6746 }, { "epoch": 0.9121419518377694, "grad_norm": 1.214871883392334, "learning_rate": 6.569303384189624e-07, "loss": 0.2011098861694336, "step": 6747 }, { "epoch": 0.9122771440642163, "grad_norm": 1.5464457273483276, "learning_rate": 6.54928474584659e-07, "loss": 0.2445850372314453, "step": 6748 }, { "epoch": 0.9124123362906633, "grad_norm": 1.616714596748352, "learning_rate": 6.5292959747074e-07, "loss": 0.237945556640625, "step": 6749 }, { "epoch": 0.9125475285171103, "grad_norm": 1.1570183038711548, "learning_rate": 6.509337074933891e-07, "loss": 0.19701099395751953, "step": 6750 }, { "epoch": 0.9126827207435573, "grad_norm": 0.8465988636016846, "learning_rate": 6.489408050681589e-07, "loss": 0.11767005920410156, "step": 6751 }, { "epoch": 0.9128179129700043, "grad_norm": 1.2097126245498657, "learning_rate": 6.469508906099792e-07, "loss": 0.21654891967773438, "step": 6752 }, { "epoch": 0.9129531051964512, "grad_norm": 1.7918184995651245, "learning_rate": 6.449639645331684e-07, "loss": 0.13945382833480835, "step": 6753 }, { "epoch": 0.9130882974228982, "grad_norm": 1.8902429342269897, "learning_rate": 6.429800272514058e-07, "loss": 0.19729042053222656, "step": 6754 }, { "epoch": 0.9132234896493452, "grad_norm": 0.7039175629615784, "learning_rate": 6.409990791777659e-07, "loss": 0.1213526725769043, "step": 6755 }, { "epoch": 0.9133586818757922, "grad_norm": 1.102613925933838, "learning_rate": 6.390211207246888e-07, "loss": 0.20709228515625, "step": 6756 }, { "epoch": 0.9134938741022391, "grad_norm": 1.1705427169799805, "learning_rate": 6.370461523039967e-07, "loss": 0.19670867919921875, "step": 6757 }, { "epoch": 0.9136290663286861, "grad_norm": 0.9059383869171143, "learning_rate": 6.350741743268873e-07, "loss": 0.16883468627929688, "step": 6758 }, { "epoch": 0.9137642585551331, "grad_norm": 1.2360154390335083, "learning_rate": 6.331051872039373e-07, "loss": 0.15971755981445312, "step": 6759 }, { "epoch": 0.9138994507815801, "grad_norm": 0.6795998811721802, "learning_rate": 6.31139191345102e-07, "loss": 0.12293505668640137, "step": 6760 }, { "epoch": 0.9140346430080271, "grad_norm": 1.7758582830429077, "learning_rate": 6.291761871597091e-07, "loss": 0.17943763732910156, "step": 6761 }, { "epoch": 0.914169835234474, "grad_norm": 0.931975781917572, "learning_rate": 6.272161750564731e-07, "loss": 0.1481645107269287, "step": 6762 }, { "epoch": 0.914305027460921, "grad_norm": 0.7977692484855652, "learning_rate": 6.252591554434728e-07, "loss": 0.13846158981323242, "step": 6763 }, { "epoch": 0.914440219687368, "grad_norm": 0.9667164087295532, "learning_rate": 6.233051287281688e-07, "loss": 0.1804065704345703, "step": 6764 }, { "epoch": 0.914575411913815, "grad_norm": 1.8307242393493652, "learning_rate": 6.213540953174057e-07, "loss": 0.1817340850830078, "step": 6765 }, { "epoch": 0.914710604140262, "grad_norm": 1.6505370140075684, "learning_rate": 6.194060556173953e-07, "loss": 0.18779540061950684, "step": 6766 }, { "epoch": 0.9148457963667089, "grad_norm": 0.9660489559173584, "learning_rate": 6.17461010033733e-07, "loss": 0.1647796630859375, "step": 6767 }, { "epoch": 0.9149809885931559, "grad_norm": 0.92435622215271, "learning_rate": 6.155189589713833e-07, "loss": 0.17723655700683594, "step": 6768 }, { "epoch": 0.9151161808196029, "grad_norm": 0.8326014876365662, "learning_rate": 6.135799028346928e-07, "loss": 0.1484508514404297, "step": 6769 }, { "epoch": 0.9152513730460499, "grad_norm": 1.0861568450927734, "learning_rate": 6.116438420273868e-07, "loss": 0.17220067977905273, "step": 6770 }, { "epoch": 0.9153865652724968, "grad_norm": 1.156847357749939, "learning_rate": 6.097107769525595e-07, "loss": 0.13736557960510254, "step": 6771 }, { "epoch": 0.9155217574989438, "grad_norm": 1.4594306945800781, "learning_rate": 6.077807080126873e-07, "loss": 0.17350053787231445, "step": 6772 }, { "epoch": 0.9156569497253908, "grad_norm": 1.4144175052642822, "learning_rate": 6.058536356096206e-07, "loss": 0.2276449203491211, "step": 6773 }, { "epoch": 0.9157921419518378, "grad_norm": 1.4107614755630493, "learning_rate": 6.039295601445833e-07, "loss": 0.21044921875, "step": 6774 }, { "epoch": 0.9159273341782848, "grad_norm": 1.195586919784546, "learning_rate": 6.020084820181831e-07, "loss": 0.1856060028076172, "step": 6775 }, { "epoch": 0.9160625264047317, "grad_norm": 1.1761809587478638, "learning_rate": 6.000904016303971e-07, "loss": 0.141021728515625, "step": 6776 }, { "epoch": 0.9161977186311787, "grad_norm": 1.7974510192871094, "learning_rate": 5.981753193805789e-07, "loss": 0.14397656917572021, "step": 6777 }, { "epoch": 0.9163329108576257, "grad_norm": 1.5928044319152832, "learning_rate": 5.962632356674597e-07, "loss": 0.18073272705078125, "step": 6778 }, { "epoch": 0.9164681030840727, "grad_norm": 0.6539821028709412, "learning_rate": 5.94354150889141e-07, "loss": 0.12350940704345703, "step": 6779 }, { "epoch": 0.9166032953105197, "grad_norm": 0.9880049824714661, "learning_rate": 5.924480654431147e-07, "loss": 0.1483381986618042, "step": 6780 }, { "epoch": 0.9167384875369666, "grad_norm": 1.2179538011550903, "learning_rate": 5.905449797262252e-07, "loss": 0.14670944213867188, "step": 6781 }, { "epoch": 0.9168736797634136, "grad_norm": 0.8636088967323303, "learning_rate": 5.886448941347156e-07, "loss": 0.13743972778320312, "step": 6782 }, { "epoch": 0.9170088719898606, "grad_norm": 1.7533119916915894, "learning_rate": 5.867478090641892e-07, "loss": 0.1954803466796875, "step": 6783 }, { "epoch": 0.9171440642163076, "grad_norm": 0.8614572286605835, "learning_rate": 5.848537249096269e-07, "loss": 0.15858745574951172, "step": 6784 }, { "epoch": 0.9172792564427545, "grad_norm": 1.195934772491455, "learning_rate": 5.829626420653949e-07, "loss": 0.18611717224121094, "step": 6785 }, { "epoch": 0.9174144486692015, "grad_norm": 1.6256399154663086, "learning_rate": 5.810745609252166e-07, "loss": 0.21474456787109375, "step": 6786 }, { "epoch": 0.9175496408956485, "grad_norm": 1.0646802186965942, "learning_rate": 5.791894818822091e-07, "loss": 0.16489005088806152, "step": 6787 }, { "epoch": 0.9176848331220955, "grad_norm": 0.9180534482002258, "learning_rate": 5.773074053288519e-07, "loss": 0.15557479858398438, "step": 6788 }, { "epoch": 0.9178200253485425, "grad_norm": 2.328115701675415, "learning_rate": 5.75428331657003e-07, "loss": 0.1453409194946289, "step": 6789 }, { "epoch": 0.9179552175749894, "grad_norm": 0.7374373078346252, "learning_rate": 5.735522612578998e-07, "loss": 0.09610319137573242, "step": 6790 }, { "epoch": 0.9180904098014364, "grad_norm": 1.513714075088501, "learning_rate": 5.716791945221444e-07, "loss": 0.15625974535942078, "step": 6791 }, { "epoch": 0.9182256020278834, "grad_norm": 0.7924400568008423, "learning_rate": 5.698091318397219e-07, "loss": 0.15407180786132812, "step": 6792 }, { "epoch": 0.9183607942543304, "grad_norm": 1.5105005502700806, "learning_rate": 5.679420735999908e-07, "loss": 0.23344850540161133, "step": 6793 }, { "epoch": 0.9184959864807773, "grad_norm": 1.113030195236206, "learning_rate": 5.660780201916799e-07, "loss": 0.14873027801513672, "step": 6794 }, { "epoch": 0.9186311787072243, "grad_norm": 1.2140769958496094, "learning_rate": 5.642169720028973e-07, "loss": 0.2314774990081787, "step": 6795 }, { "epoch": 0.9187663709336713, "grad_norm": 1.0213863849639893, "learning_rate": 5.623589294211196e-07, "loss": 0.16526174545288086, "step": 6796 }, { "epoch": 0.9189015631601183, "grad_norm": 1.5017553567886353, "learning_rate": 5.605038928332057e-07, "loss": 0.19676971435546875, "step": 6797 }, { "epoch": 0.9190367553865653, "grad_norm": 1.120961308479309, "learning_rate": 5.586518626253817e-07, "loss": 0.14935043454170227, "step": 6798 }, { "epoch": 0.9191719476130122, "grad_norm": 0.8091015815734863, "learning_rate": 5.568028391832524e-07, "loss": 0.1257791519165039, "step": 6799 }, { "epoch": 0.9193071398394592, "grad_norm": 1.3048439025878906, "learning_rate": 5.549568228917917e-07, "loss": 0.19629764556884766, "step": 6800 }, { "epoch": 0.9194423320659062, "grad_norm": 1.2796249389648438, "learning_rate": 5.531138141353486e-07, "loss": 0.2021503448486328, "step": 6801 }, { "epoch": 0.9195775242923532, "grad_norm": 0.7782748341560364, "learning_rate": 5.512738132976514e-07, "loss": 0.1341695785522461, "step": 6802 }, { "epoch": 0.9197127165188002, "grad_norm": 0.8553540110588074, "learning_rate": 5.49436820761795e-07, "loss": 0.15029644966125488, "step": 6803 }, { "epoch": 0.9198479087452471, "grad_norm": 0.9512537121772766, "learning_rate": 5.476028369102537e-07, "loss": 0.16156387329101562, "step": 6804 }, { "epoch": 0.9199831009716941, "grad_norm": 0.9684756398200989, "learning_rate": 5.45771862124872e-07, "loss": 0.12393474578857422, "step": 6805 }, { "epoch": 0.9201182931981411, "grad_norm": 2.228598117828369, "learning_rate": 5.439438967868649e-07, "loss": 0.16321754455566406, "step": 6806 }, { "epoch": 0.9202534854245881, "grad_norm": 0.8086915016174316, "learning_rate": 5.421189412768296e-07, "loss": 0.13257122039794922, "step": 6807 }, { "epoch": 0.920388677651035, "grad_norm": 1.1834681034088135, "learning_rate": 5.402969959747306e-07, "loss": 0.2162151336669922, "step": 6808 }, { "epoch": 0.920523869877482, "grad_norm": 0.8241886496543884, "learning_rate": 5.384780612599044e-07, "loss": 0.18962574005126953, "step": 6809 }, { "epoch": 0.920659062103929, "grad_norm": 0.7904039025306702, "learning_rate": 5.366621375110647e-07, "loss": 0.09569859504699707, "step": 6810 }, { "epoch": 0.920794254330376, "grad_norm": 0.8951303362846375, "learning_rate": 5.348492251062942e-07, "loss": 0.16202640533447266, "step": 6811 }, { "epoch": 0.920929446556823, "grad_norm": 1.418837070465088, "learning_rate": 5.330393244230558e-07, "loss": 0.2121572494506836, "step": 6812 }, { "epoch": 0.9210646387832699, "grad_norm": 1.4238643646240234, "learning_rate": 5.312324358381731e-07, "loss": 0.1611948013305664, "step": 6813 }, { "epoch": 0.9211998310097169, "grad_norm": 1.1536599397659302, "learning_rate": 5.29428559727857e-07, "loss": 0.16081500053405762, "step": 6814 }, { "epoch": 0.9213350232361639, "grad_norm": 1.6921736001968384, "learning_rate": 5.276276964676802e-07, "loss": 0.16599369049072266, "step": 6815 }, { "epoch": 0.9214702154626109, "grad_norm": 0.86725914478302, "learning_rate": 5.258298464325928e-07, "loss": 0.15939807891845703, "step": 6816 }, { "epoch": 0.9216054076890579, "grad_norm": 1.0355339050292969, "learning_rate": 5.240350099969204e-07, "loss": 0.1880812644958496, "step": 6817 }, { "epoch": 0.9217405999155048, "grad_norm": 0.771996021270752, "learning_rate": 5.222431875343492e-07, "loss": 0.13859176635742188, "step": 6818 }, { "epoch": 0.9218757921419518, "grad_norm": 1.039351463317871, "learning_rate": 5.204543794179539e-07, "loss": 0.15136241912841797, "step": 6819 }, { "epoch": 0.9220109843683988, "grad_norm": 1.3492240905761719, "learning_rate": 5.186685860201717e-07, "loss": 0.15941619873046875, "step": 6820 }, { "epoch": 0.9221461765948458, "grad_norm": 0.9099437594413757, "learning_rate": 5.16885807712812e-07, "loss": 0.15189552307128906, "step": 6821 }, { "epoch": 0.9222813688212927, "grad_norm": 0.8142754435539246, "learning_rate": 5.151060448670625e-07, "loss": 0.11348915100097656, "step": 6822 }, { "epoch": 0.9224165610477397, "grad_norm": 1.0997073650360107, "learning_rate": 5.133292978534754e-07, "loss": 0.15671825408935547, "step": 6823 }, { "epoch": 0.9225517532741867, "grad_norm": 0.8878243565559387, "learning_rate": 5.115555670419814e-07, "loss": 0.16170263290405273, "step": 6824 }, { "epoch": 0.9226869455006337, "grad_norm": 1.1478452682495117, "learning_rate": 5.097848528018817e-07, "loss": 0.13878536224365234, "step": 6825 }, { "epoch": 0.9228221377270807, "grad_norm": 1.011464238166809, "learning_rate": 5.080171555018448e-07, "loss": 0.14964675903320312, "step": 6826 }, { "epoch": 0.9229573299535276, "grad_norm": 1.0709928274154663, "learning_rate": 5.06252475509918e-07, "loss": 0.18352794647216797, "step": 6827 }, { "epoch": 0.9230925221799746, "grad_norm": 1.724845051765442, "learning_rate": 5.044908131935139e-07, "loss": 0.23067855834960938, "step": 6828 }, { "epoch": 0.9232277144064216, "grad_norm": 0.921457827091217, "learning_rate": 5.027321689194242e-07, "loss": 0.189239501953125, "step": 6829 }, { "epoch": 0.9233629066328686, "grad_norm": 1.485518217086792, "learning_rate": 5.009765430538061e-07, "loss": 0.16341376304626465, "step": 6830 }, { "epoch": 0.9234980988593156, "grad_norm": 0.8229610323905945, "learning_rate": 4.992239359621886e-07, "loss": 0.13027381896972656, "step": 6831 }, { "epoch": 0.9236332910857625, "grad_norm": 1.1949256658554077, "learning_rate": 4.974743480094767e-07, "loss": 0.14664077758789062, "step": 6832 }, { "epoch": 0.9237684833122095, "grad_norm": 0.8907371759414673, "learning_rate": 4.957277795599407e-07, "loss": 0.15859603881835938, "step": 6833 }, { "epoch": 0.9239036755386565, "grad_norm": 1.3830283880233765, "learning_rate": 4.93984230977228e-07, "loss": 0.14602303504943848, "step": 6834 }, { "epoch": 0.9240388677651035, "grad_norm": 0.9414574503898621, "learning_rate": 4.922437026243531e-07, "loss": 0.170318603515625, "step": 6835 }, { "epoch": 0.9241740599915504, "grad_norm": 1.9344474077224731, "learning_rate": 4.905061948637063e-07, "loss": 0.24384307861328125, "step": 6836 }, { "epoch": 0.9243092522179974, "grad_norm": 1.0105434656143188, "learning_rate": 4.887717080570431e-07, "loss": 0.1478862762451172, "step": 6837 }, { "epoch": 0.9244444444444444, "grad_norm": 0.9474513530731201, "learning_rate": 4.870402425654913e-07, "loss": 0.14140701293945312, "step": 6838 }, { "epoch": 0.9245796366708914, "grad_norm": 1.2620896100997925, "learning_rate": 4.853117987495542e-07, "loss": 0.19680285453796387, "step": 6839 }, { "epoch": 0.9247148288973384, "grad_norm": 1.124436616897583, "learning_rate": 4.83586376969104e-07, "loss": 0.21566402912139893, "step": 6840 }, { "epoch": 0.9248500211237853, "grad_norm": 1.7103341817855835, "learning_rate": 4.818639775833816e-07, "loss": 0.1856060028076172, "step": 6841 }, { "epoch": 0.9249852133502323, "grad_norm": 0.8405003547668457, "learning_rate": 4.801446009509969e-07, "loss": 0.15198802947998047, "step": 6842 }, { "epoch": 0.9251204055766793, "grad_norm": 1.268671989440918, "learning_rate": 4.784282474299367e-07, "loss": 0.15775728225708008, "step": 6843 }, { "epoch": 0.9252555978031263, "grad_norm": 0.8011864423751831, "learning_rate": 4.767149173775537e-07, "loss": 0.15517902374267578, "step": 6844 }, { "epoch": 0.9253907900295733, "grad_norm": 0.9571229815483093, "learning_rate": 4.750046111505724e-07, "loss": 0.14945721626281738, "step": 6845 }, { "epoch": 0.9255259822560202, "grad_norm": 1.2321099042892456, "learning_rate": 4.732973291050896e-07, "loss": 0.22460174560546875, "step": 6846 }, { "epoch": 0.9256611744824672, "grad_norm": 0.8086537718772888, "learning_rate": 4.7159307159656607e-07, "loss": 0.14993280172348022, "step": 6847 }, { "epoch": 0.9257963667089142, "grad_norm": 0.9530975222587585, "learning_rate": 4.6989183897983954e-07, "loss": 0.13495564460754395, "step": 6848 }, { "epoch": 0.9259315589353612, "grad_norm": 0.8083940148353577, "learning_rate": 4.681936316091201e-07, "loss": 0.17575359344482422, "step": 6849 }, { "epoch": 0.9260667511618081, "grad_norm": 1.0287210941314697, "learning_rate": 4.664984498379765e-07, "loss": 0.1811199188232422, "step": 6850 }, { "epoch": 0.9262019433882551, "grad_norm": 1.3945170640945435, "learning_rate": 4.6480629401935814e-07, "loss": 0.17000555992126465, "step": 6851 }, { "epoch": 0.9263371356147021, "grad_norm": 0.8777503371238708, "learning_rate": 4.631171645055815e-07, "loss": 0.15983819961547852, "step": 6852 }, { "epoch": 0.9264723278411491, "grad_norm": 1.2790606021881104, "learning_rate": 4.614310616483286e-07, "loss": 0.1291027069091797, "step": 6853 }, { "epoch": 0.9266075200675962, "grad_norm": 2.5518715381622314, "learning_rate": 4.5974798579866193e-07, "loss": 0.2043776512145996, "step": 6854 }, { "epoch": 0.9267427122940431, "grad_norm": 1.289115071296692, "learning_rate": 4.580679373069996e-07, "loss": 0.2124267965555191, "step": 6855 }, { "epoch": 0.9268779045204901, "grad_norm": 2.0251758098602295, "learning_rate": 4.5639091652314e-07, "loss": 0.21028053760528564, "step": 6856 }, { "epoch": 0.9270130967469371, "grad_norm": 1.1990656852722168, "learning_rate": 4.54716923796249e-07, "loss": 0.19442427158355713, "step": 6857 }, { "epoch": 0.9271482889733841, "grad_norm": 0.8964511752128601, "learning_rate": 4.5304595947485927e-07, "loss": 0.1645498275756836, "step": 6858 }, { "epoch": 0.9272834811998311, "grad_norm": 2.4061243534088135, "learning_rate": 4.5137802390687433e-07, "loss": 0.17140674591064453, "step": 6859 }, { "epoch": 0.927418673426278, "grad_norm": 2.120358467102051, "learning_rate": 4.497131174395663e-07, "loss": 0.18662548065185547, "step": 6860 }, { "epoch": 0.927553865652725, "grad_norm": 1.4562383890151978, "learning_rate": 4.4805124041957967e-07, "loss": 0.14902305603027344, "step": 6861 }, { "epoch": 0.927689057879172, "grad_norm": 1.9436578750610352, "learning_rate": 4.463923931929259e-07, "loss": 0.22417545318603516, "step": 6862 }, { "epoch": 0.927824250105619, "grad_norm": 0.8414587378501892, "learning_rate": 4.4473657610498377e-07, "loss": 0.18558979034423828, "step": 6863 }, { "epoch": 0.927959442332066, "grad_norm": 1.178419589996338, "learning_rate": 4.430837895005058e-07, "loss": 0.1932048797607422, "step": 6864 }, { "epoch": 0.9280946345585129, "grad_norm": 0.7472342252731323, "learning_rate": 4.4143403372360836e-07, "loss": 0.1239774227142334, "step": 6865 }, { "epoch": 0.9282298267849599, "grad_norm": 1.2048664093017578, "learning_rate": 4.3978730911778176e-07, "loss": 0.19588279724121094, "step": 6866 }, { "epoch": 0.9283650190114069, "grad_norm": 1.6109174489974976, "learning_rate": 4.381436160258834e-07, "loss": 0.1859283447265625, "step": 6867 }, { "epoch": 0.9285002112378539, "grad_norm": 1.0783982276916504, "learning_rate": 4.3650295479013615e-07, "loss": 0.2029552459716797, "step": 6868 }, { "epoch": 0.9286354034643008, "grad_norm": 1.533647060394287, "learning_rate": 4.348653257521351e-07, "loss": 0.21853065490722656, "step": 6869 }, { "epoch": 0.9287705956907478, "grad_norm": 1.643060326576233, "learning_rate": 4.332307292528442e-07, "loss": 0.1819465160369873, "step": 6870 }, { "epoch": 0.9289057879171948, "grad_norm": 0.7947250008583069, "learning_rate": 4.315991656325946e-07, "loss": 0.1918010711669922, "step": 6871 }, { "epoch": 0.9290409801436418, "grad_norm": 1.1351237297058105, "learning_rate": 4.299706352310895e-07, "loss": 0.1490306854248047, "step": 6872 }, { "epoch": 0.9291761723700888, "grad_norm": 1.0748343467712402, "learning_rate": 4.283451383873926e-07, "loss": 0.22548198699951172, "step": 6873 }, { "epoch": 0.9293113645965357, "grad_norm": 1.0095285177230835, "learning_rate": 4.26722675439945e-07, "loss": 0.14268875122070312, "step": 6874 }, { "epoch": 0.9294465568229827, "grad_norm": 1.441441535949707, "learning_rate": 4.251032467265481e-07, "loss": 0.18883323669433594, "step": 6875 }, { "epoch": 0.9295817490494297, "grad_norm": 0.9878628253936768, "learning_rate": 4.234868525843805e-07, "loss": 0.1687760353088379, "step": 6876 }, { "epoch": 0.9297169412758767, "grad_norm": 1.282901644706726, "learning_rate": 4.218734933499796e-07, "loss": 0.20367050170898438, "step": 6877 }, { "epoch": 0.9298521335023237, "grad_norm": 0.6648201942443848, "learning_rate": 4.202631693592601e-07, "loss": 0.12532281875610352, "step": 6878 }, { "epoch": 0.9299873257287706, "grad_norm": 1.0283968448638916, "learning_rate": 4.186558809474955e-07, "loss": 0.2093348503112793, "step": 6879 }, { "epoch": 0.9301225179552176, "grad_norm": 1.1400434970855713, "learning_rate": 4.170516284493331e-07, "loss": 0.12535810470581055, "step": 6880 }, { "epoch": 0.9302577101816646, "grad_norm": 1.2748878002166748, "learning_rate": 4.1545041219879063e-07, "loss": 0.15709900856018066, "step": 6881 }, { "epoch": 0.9303929024081116, "grad_norm": 1.8680342435836792, "learning_rate": 4.138522325292432e-07, "loss": 0.1587967872619629, "step": 6882 }, { "epoch": 0.9305280946345585, "grad_norm": 0.8770149946212769, "learning_rate": 4.1225708977344457e-07, "loss": 0.1810312271118164, "step": 6883 }, { "epoch": 0.9306632868610055, "grad_norm": 1.538745403289795, "learning_rate": 4.106649842635124e-07, "loss": 0.1918349266052246, "step": 6884 }, { "epoch": 0.9307984790874525, "grad_norm": 1.6287195682525635, "learning_rate": 4.090759163309282e-07, "loss": 0.23414039611816406, "step": 6885 }, { "epoch": 0.9309336713138995, "grad_norm": 1.0517938137054443, "learning_rate": 4.07489886306549e-07, "loss": 0.15229272842407227, "step": 6886 }, { "epoch": 0.9310688635403465, "grad_norm": 0.6034350991249084, "learning_rate": 4.059068945205907e-07, "loss": 0.10676407814025879, "step": 6887 }, { "epoch": 0.9312040557667934, "grad_norm": 1.1849470138549805, "learning_rate": 4.043269413026429e-07, "loss": 0.17846012115478516, "step": 6888 }, { "epoch": 0.9313392479932404, "grad_norm": 1.0466327667236328, "learning_rate": 4.027500269816592e-07, "loss": 0.17405414581298828, "step": 6889 }, { "epoch": 0.9314744402196874, "grad_norm": 0.778199315071106, "learning_rate": 4.011761518859619e-07, "loss": 0.14313697814941406, "step": 6890 }, { "epoch": 0.9316096324461344, "grad_norm": 0.9889121055603027, "learning_rate": 3.996053163432406e-07, "loss": 0.15319538116455078, "step": 6891 }, { "epoch": 0.9317448246725814, "grad_norm": 1.3294932842254639, "learning_rate": 3.980375206805503e-07, "loss": 0.1484537124633789, "step": 6892 }, { "epoch": 0.9318800168990283, "grad_norm": 1.1328479051589966, "learning_rate": 3.9647276522431664e-07, "loss": 0.10921239852905273, "step": 6893 }, { "epoch": 0.9320152091254753, "grad_norm": 1.9791101217269897, "learning_rate": 3.949110503003289e-07, "loss": 0.18129920959472656, "step": 6894 }, { "epoch": 0.9321504013519223, "grad_norm": 0.8612326979637146, "learning_rate": 3.9335237623374377e-07, "loss": 0.15658773481845856, "step": 6895 }, { "epoch": 0.9322855935783693, "grad_norm": 2.137049913406372, "learning_rate": 3.917967433490849e-07, "loss": 0.17886686325073242, "step": 6896 }, { "epoch": 0.9324207858048162, "grad_norm": 1.0243951082229614, "learning_rate": 3.902441519702449e-07, "loss": 0.156585693359375, "step": 6897 }, { "epoch": 0.9325559780312632, "grad_norm": 1.082966923713684, "learning_rate": 3.886946024204818e-07, "loss": 0.1297922134399414, "step": 6898 }, { "epoch": 0.9326911702577102, "grad_norm": 2.3474693298339844, "learning_rate": 3.871480950224193e-07, "loss": 0.19859769940376282, "step": 6899 }, { "epoch": 0.9328263624841572, "grad_norm": 1.4735304117202759, "learning_rate": 3.856046300980498e-07, "loss": 0.15506011247634888, "step": 6900 }, { "epoch": 0.9329615547106042, "grad_norm": 0.8837671875953674, "learning_rate": 3.8406420796872953e-07, "loss": 0.12552547454833984, "step": 6901 }, { "epoch": 0.9330967469370511, "grad_norm": 0.7841081023216248, "learning_rate": 3.825268289551803e-07, "loss": 0.164825439453125, "step": 6902 }, { "epoch": 0.9332319391634981, "grad_norm": 1.6778563261032104, "learning_rate": 3.8099249337749777e-07, "loss": 0.20982837677001953, "step": 6903 }, { "epoch": 0.9333671313899451, "grad_norm": 0.6978492140769958, "learning_rate": 3.7946120155513465e-07, "loss": 0.13621997833251953, "step": 6904 }, { "epoch": 0.9335023236163921, "grad_norm": 0.9136191606521606, "learning_rate": 3.7793295380691595e-07, "loss": 0.15282034873962402, "step": 6905 }, { "epoch": 0.933637515842839, "grad_norm": 0.9879100322723389, "learning_rate": 3.7640775045103214e-07, "loss": 0.1868581771850586, "step": 6906 }, { "epoch": 0.933772708069286, "grad_norm": 0.6956599950790405, "learning_rate": 3.7488559180503423e-07, "loss": 0.1422288417816162, "step": 6907 }, { "epoch": 0.933907900295733, "grad_norm": 0.9054815769195557, "learning_rate": 3.7336647818584866e-07, "loss": 0.1433730125427246, "step": 6908 }, { "epoch": 0.93404309252218, "grad_norm": 1.2635170221328735, "learning_rate": 3.718504099097625e-07, "loss": 0.1242523193359375, "step": 6909 }, { "epoch": 0.934178284748627, "grad_norm": 1.3305695056915283, "learning_rate": 3.703373872924265e-07, "loss": 0.1690056324005127, "step": 6910 }, { "epoch": 0.9343134769750739, "grad_norm": 1.3940216302871704, "learning_rate": 3.688274106488604e-07, "loss": 0.18387436866760254, "step": 6911 }, { "epoch": 0.9344486692015209, "grad_norm": 1.4567819833755493, "learning_rate": 3.67320480293451e-07, "loss": 0.15241384506225586, "step": 6912 }, { "epoch": 0.9345838614279679, "grad_norm": 1.1121695041656494, "learning_rate": 3.6581659653994736e-07, "loss": 0.15810799598693848, "step": 6913 }, { "epoch": 0.9347190536544149, "grad_norm": 0.8731660842895508, "learning_rate": 3.64315759701469e-07, "loss": 0.15412139892578125, "step": 6914 }, { "epoch": 0.9348542458808619, "grad_norm": 0.8984377384185791, "learning_rate": 3.6281797009049765e-07, "loss": 0.14575576782226562, "step": 6915 }, { "epoch": 0.9349894381073088, "grad_norm": 1.1138936281204224, "learning_rate": 3.613232280188772e-07, "loss": 0.19501054286956787, "step": 6916 }, { "epoch": 0.9351246303337558, "grad_norm": 0.7889968752861023, "learning_rate": 3.5983153379782363e-07, "loss": 0.15529441833496094, "step": 6917 }, { "epoch": 0.9352598225602028, "grad_norm": 0.9597473740577698, "learning_rate": 3.5834288773791854e-07, "loss": 0.1800384521484375, "step": 6918 }, { "epoch": 0.9353950147866498, "grad_norm": 1.3094351291656494, "learning_rate": 3.568572901491007e-07, "loss": 0.15926361083984375, "step": 6919 }, { "epoch": 0.9355302070130967, "grad_norm": 2.3606109619140625, "learning_rate": 3.553747413406827e-07, "loss": 0.16363811492919922, "step": 6920 }, { "epoch": 0.9356653992395437, "grad_norm": 1.3630280494689941, "learning_rate": 3.538952416213376e-07, "loss": 0.184647798538208, "step": 6921 }, { "epoch": 0.9358005914659907, "grad_norm": 1.2892872095108032, "learning_rate": 3.524187912991056e-07, "loss": 0.2171459197998047, "step": 6922 }, { "epoch": 0.9359357836924377, "grad_norm": 1.436084270477295, "learning_rate": 3.5094539068139254e-07, "loss": 0.14622116088867188, "step": 6923 }, { "epoch": 0.9360709759188847, "grad_norm": 1.0650582313537598, "learning_rate": 3.494750400749663e-07, "loss": 0.11447733640670776, "step": 6924 }, { "epoch": 0.9362061681453316, "grad_norm": 1.2595068216323853, "learning_rate": 3.480077397859638e-07, "loss": 0.14632892608642578, "step": 6925 }, { "epoch": 0.9363413603717786, "grad_norm": 1.3274850845336914, "learning_rate": 3.4654349011988384e-07, "loss": 0.16489505767822266, "step": 6926 }, { "epoch": 0.9364765525982256, "grad_norm": 0.9624202847480774, "learning_rate": 3.4508229138159095e-07, "loss": 0.13409948348999023, "step": 6927 }, { "epoch": 0.9366117448246726, "grad_norm": 0.6399134993553162, "learning_rate": 3.4362414387531516e-07, "loss": 0.10895943641662598, "step": 6928 }, { "epoch": 0.9367469370511196, "grad_norm": 1.2405040264129639, "learning_rate": 3.4216904790464854e-07, "loss": 0.1999492645263672, "step": 6929 }, { "epoch": 0.9368821292775665, "grad_norm": 1.3759093284606934, "learning_rate": 3.407170037725521e-07, "loss": 0.1988992691040039, "step": 6930 }, { "epoch": 0.9370173215040135, "grad_norm": 1.4552645683288574, "learning_rate": 3.3926801178134737e-07, "loss": 0.16596126556396484, "step": 6931 }, { "epoch": 0.9371525137304605, "grad_norm": 0.9265978336334229, "learning_rate": 3.3782207223272467e-07, "loss": 0.15142905712127686, "step": 6932 }, { "epoch": 0.9372877059569075, "grad_norm": 0.8365117907524109, "learning_rate": 3.363791854277348e-07, "loss": 0.19535446166992188, "step": 6933 }, { "epoch": 0.9374228981833544, "grad_norm": 1.929330587387085, "learning_rate": 3.349393516667926e-07, "loss": 0.1852121353149414, "step": 6934 }, { "epoch": 0.9375580904098014, "grad_norm": 1.2405568361282349, "learning_rate": 3.335025712496814e-07, "loss": 0.15087223052978516, "step": 6935 }, { "epoch": 0.9376932826362484, "grad_norm": 1.422148585319519, "learning_rate": 3.320688444755471e-07, "loss": 0.21785545349121094, "step": 6936 }, { "epoch": 0.9378284748626954, "grad_norm": 1.6526321172714233, "learning_rate": 3.306381716428991e-07, "loss": 0.18062400817871094, "step": 6937 }, { "epoch": 0.9379636670891424, "grad_norm": 1.3130121231079102, "learning_rate": 3.2921055304960925e-07, "loss": 0.2098846435546875, "step": 6938 }, { "epoch": 0.9380988593155893, "grad_norm": 0.9979737997055054, "learning_rate": 3.277859889929147e-07, "loss": 0.1981792449951172, "step": 6939 }, { "epoch": 0.9382340515420363, "grad_norm": 0.7161824107170105, "learning_rate": 3.263644797694215e-07, "loss": 0.14830735325813293, "step": 6940 }, { "epoch": 0.9383692437684833, "grad_norm": 0.7012309432029724, "learning_rate": 3.2494602567509303e-07, "loss": 0.12056827545166016, "step": 6941 }, { "epoch": 0.9385044359949303, "grad_norm": 1.0711491107940674, "learning_rate": 3.2353062700525794e-07, "loss": 0.17443561553955078, "step": 6942 }, { "epoch": 0.9386396282213773, "grad_norm": 0.8885208368301392, "learning_rate": 3.221182840546122e-07, "loss": 0.17954778671264648, "step": 6943 }, { "epoch": 0.9387748204478242, "grad_norm": 0.6597723364830017, "learning_rate": 3.207089971172089e-07, "loss": 0.1304483413696289, "step": 6944 }, { "epoch": 0.9389100126742712, "grad_norm": 1.173721432685852, "learning_rate": 3.1930276648647504e-07, "loss": 0.18240642547607422, "step": 6945 }, { "epoch": 0.9390452049007182, "grad_norm": 1.2117289304733276, "learning_rate": 3.178995924551914e-07, "loss": 0.19396591186523438, "step": 6946 }, { "epoch": 0.9391803971271652, "grad_norm": 0.8468600511550903, "learning_rate": 3.164994753155059e-07, "loss": 0.16847282648086548, "step": 6947 }, { "epoch": 0.9393155893536121, "grad_norm": 1.629804253578186, "learning_rate": 3.1510241535893215e-07, "loss": 0.17116355895996094, "step": 6948 }, { "epoch": 0.9394507815800591, "grad_norm": 0.8811553716659546, "learning_rate": 3.1370841287634567e-07, "loss": 0.1329355239868164, "step": 6949 }, { "epoch": 0.9395859738065061, "grad_norm": 2.0105812549591064, "learning_rate": 3.1231746815798436e-07, "loss": 0.1999340057373047, "step": 6950 }, { "epoch": 0.9397211660329531, "grad_norm": 1.2636001110076904, "learning_rate": 3.1092958149344985e-07, "loss": 0.13026142120361328, "step": 6951 }, { "epoch": 0.9398563582594001, "grad_norm": 0.8365142941474915, "learning_rate": 3.095447531717077e-07, "loss": 0.12633132934570312, "step": 6952 }, { "epoch": 0.939991550485847, "grad_norm": 2.3109328746795654, "learning_rate": 3.08162983481089e-07, "loss": 0.19128036499023438, "step": 6953 }, { "epoch": 0.940126742712294, "grad_norm": 0.7146958708763123, "learning_rate": 3.067842727092801e-07, "loss": 0.09265774488449097, "step": 6954 }, { "epoch": 0.940261934938741, "grad_norm": 1.276873230934143, "learning_rate": 3.0540862114334323e-07, "loss": 0.1888561248779297, "step": 6955 }, { "epoch": 0.940397127165188, "grad_norm": 0.6841320395469666, "learning_rate": 3.0403602906969086e-07, "loss": 0.11055788397789001, "step": 6956 }, { "epoch": 0.940532319391635, "grad_norm": 1.168628215789795, "learning_rate": 3.0266649677410605e-07, "loss": 0.17499637603759766, "step": 6957 }, { "epoch": 0.9406675116180819, "grad_norm": 1.724021077156067, "learning_rate": 3.0130002454173243e-07, "loss": 0.1623673439025879, "step": 6958 }, { "epoch": 0.9408027038445289, "grad_norm": 1.3459324836730957, "learning_rate": 2.9993661265707407e-07, "loss": 0.20406723022460938, "step": 6959 }, { "epoch": 0.9409378960709759, "grad_norm": 1.0251187086105347, "learning_rate": 2.985762614040072e-07, "loss": 0.178466796875, "step": 6960 }, { "epoch": 0.9410730882974229, "grad_norm": 1.9301649332046509, "learning_rate": 2.972189710657586e-07, "loss": 0.14935904741287231, "step": 6961 }, { "epoch": 0.9412082805238698, "grad_norm": 0.9159307479858398, "learning_rate": 2.958647419249255e-07, "loss": 0.12334012985229492, "step": 6962 }, { "epoch": 0.9413434727503168, "grad_norm": 1.1499208211898804, "learning_rate": 2.9451357426346415e-07, "loss": 0.20439720153808594, "step": 6963 }, { "epoch": 0.9414786649767638, "grad_norm": 1.91754949092865, "learning_rate": 2.9316546836269776e-07, "loss": 0.1833571195602417, "step": 6964 }, { "epoch": 0.9416138572032108, "grad_norm": 1.253158450126648, "learning_rate": 2.9182042450330516e-07, "loss": 0.17911529541015625, "step": 6965 }, { "epoch": 0.9417490494296578, "grad_norm": 1.1508615016937256, "learning_rate": 2.9047844296533397e-07, "loss": 0.1601315140724182, "step": 6966 }, { "epoch": 0.9418842416561047, "grad_norm": 0.967922568321228, "learning_rate": 2.8913952402819246e-07, "loss": 0.16477394104003906, "step": 6967 }, { "epoch": 0.9420194338825517, "grad_norm": 1.3220723867416382, "learning_rate": 2.878036679706492e-07, "loss": 0.2159714698791504, "step": 6968 }, { "epoch": 0.9421546261089987, "grad_norm": 1.2639391422271729, "learning_rate": 2.8647087507083837e-07, "loss": 0.19647550582885742, "step": 6969 }, { "epoch": 0.9422898183354457, "grad_norm": 1.6839158535003662, "learning_rate": 2.8514114560625303e-07, "loss": 0.17318344116210938, "step": 6970 }, { "epoch": 0.9424250105618927, "grad_norm": 1.3257887363433838, "learning_rate": 2.8381447985375007e-07, "loss": 0.18767929077148438, "step": 6971 }, { "epoch": 0.9425602027883396, "grad_norm": 0.9894129037857056, "learning_rate": 2.8249087808954853e-07, "loss": 0.2026081085205078, "step": 6972 }, { "epoch": 0.9426953950147866, "grad_norm": 1.1003007888793945, "learning_rate": 2.811703405892296e-07, "loss": 0.19527816772460938, "step": 6973 }, { "epoch": 0.9428305872412336, "grad_norm": 0.6857394576072693, "learning_rate": 2.798528676277368e-07, "loss": 0.12031960487365723, "step": 6974 }, { "epoch": 0.9429657794676806, "grad_norm": 0.9882878661155701, "learning_rate": 2.785384594793738e-07, "loss": 0.1722729206085205, "step": 6975 }, { "epoch": 0.9431009716941275, "grad_norm": 2.102351427078247, "learning_rate": 2.772271164178086e-07, "loss": 0.1469125747680664, "step": 6976 }, { "epoch": 0.9432361639205745, "grad_norm": 1.4030895233154297, "learning_rate": 2.759188387160677e-07, "loss": 0.1977672576904297, "step": 6977 }, { "epoch": 0.9433713561470215, "grad_norm": 0.9867737293243408, "learning_rate": 2.746136266465449e-07, "loss": 0.1395587921142578, "step": 6978 }, { "epoch": 0.9435065483734685, "grad_norm": 0.7916335463523865, "learning_rate": 2.7331148048098943e-07, "loss": 0.13576698303222656, "step": 6979 }, { "epoch": 0.9436417405999155, "grad_norm": 2.0794079303741455, "learning_rate": 2.7201240049051613e-07, "loss": 0.16881781816482544, "step": 6980 }, { "epoch": 0.9437769328263624, "grad_norm": 1.2684155702590942, "learning_rate": 2.707163869455986e-07, "loss": 0.16578775644302368, "step": 6981 }, { "epoch": 0.9439121250528094, "grad_norm": 1.126895546913147, "learning_rate": 2.694234401160778e-07, "loss": 0.2003873586654663, "step": 6982 }, { "epoch": 0.9440473172792564, "grad_norm": 1.1429036855697632, "learning_rate": 2.6813356027114986e-07, "loss": 0.195068359375, "step": 6983 }, { "epoch": 0.9441825095057034, "grad_norm": 1.7646294832229614, "learning_rate": 2.6684674767937346e-07, "loss": 0.15339183807373047, "step": 6984 }, { "epoch": 0.9443177017321503, "grad_norm": 1.6443897485733032, "learning_rate": 2.655630026086708e-07, "loss": 0.21820354461669922, "step": 6985 }, { "epoch": 0.9444528939585973, "grad_norm": 1.0942819118499756, "learning_rate": 2.642823253263249e-07, "loss": 0.17298316955566406, "step": 6986 }, { "epoch": 0.9445880861850443, "grad_norm": 0.9483981728553772, "learning_rate": 2.630047160989807e-07, "loss": 0.14149093627929688, "step": 6987 }, { "epoch": 0.9447232784114913, "grad_norm": 1.279844045639038, "learning_rate": 2.6173017519263875e-07, "loss": 0.19954967498779297, "step": 6988 }, { "epoch": 0.9448584706379383, "grad_norm": 0.7546291351318359, "learning_rate": 2.6045870287267014e-07, "loss": 0.11913084983825684, "step": 6989 }, { "epoch": 0.9449936628643854, "grad_norm": 1.097650170326233, "learning_rate": 2.5919029940380147e-07, "loss": 0.21255111694335938, "step": 6990 }, { "epoch": 0.9451288550908323, "grad_norm": 0.8942933678627014, "learning_rate": 2.5792496505011807e-07, "loss": 0.1363382339477539, "step": 6991 }, { "epoch": 0.9452640473172793, "grad_norm": 0.9604797959327698, "learning_rate": 2.5666270007507266e-07, "loss": 0.1538677215576172, "step": 6992 }, { "epoch": 0.9453992395437263, "grad_norm": 0.8983729481697083, "learning_rate": 2.5540350474147324e-07, "loss": 0.14554214477539062, "step": 6993 }, { "epoch": 0.9455344317701733, "grad_norm": 0.7184409499168396, "learning_rate": 2.5414737931149346e-07, "loss": 0.10421323776245117, "step": 6994 }, { "epoch": 0.9456696239966202, "grad_norm": 0.8119010329246521, "learning_rate": 2.5289432404666246e-07, "loss": 0.1484060287475586, "step": 6995 }, { "epoch": 0.9458048162230672, "grad_norm": 1.395830750465393, "learning_rate": 2.5164433920787487e-07, "loss": 0.25280189514160156, "step": 6996 }, { "epoch": 0.9459400084495142, "grad_norm": 0.8624078631401062, "learning_rate": 2.503974250553842e-07, "loss": 0.15318012237548828, "step": 6997 }, { "epoch": 0.9460752006759612, "grad_norm": 0.9680809378623962, "learning_rate": 2.491535818488011e-07, "loss": 0.1606903076171875, "step": 6998 }, { "epoch": 0.9462103929024082, "grad_norm": 1.1840519905090332, "learning_rate": 2.479128098471067e-07, "loss": 0.2083454132080078, "step": 6999 }, { "epoch": 0.9463455851288551, "grad_norm": 1.8465065956115723, "learning_rate": 2.466751093086328e-07, "loss": 0.16587352752685547, "step": 7000 }, { "epoch": 0.9464807773553021, "grad_norm": 1.0893745422363281, "learning_rate": 2.454404804910748e-07, "loss": 0.19550800323486328, "step": 7001 }, { "epoch": 0.9466159695817491, "grad_norm": 1.111434817314148, "learning_rate": 2.442089236514888e-07, "loss": 0.16739463806152344, "step": 7002 }, { "epoch": 0.9467511618081961, "grad_norm": 0.9888833165168762, "learning_rate": 2.429804390462931e-07, "loss": 0.1490001678466797, "step": 7003 }, { "epoch": 0.946886354034643, "grad_norm": 1.4196141958236694, "learning_rate": 2.4175502693126293e-07, "loss": 0.18605244159698486, "step": 7004 }, { "epoch": 0.94702154626109, "grad_norm": 0.9099364876747131, "learning_rate": 2.4053268756153933e-07, "loss": 0.14433574676513672, "step": 7005 }, { "epoch": 0.947156738487537, "grad_norm": 1.4102363586425781, "learning_rate": 2.393134211916154e-07, "loss": 0.18846988677978516, "step": 7006 }, { "epoch": 0.947291930713984, "grad_norm": 1.0599256753921509, "learning_rate": 2.3809722807535128e-07, "loss": 0.19267940521240234, "step": 7007 }, { "epoch": 0.947427122940431, "grad_norm": 1.169120192527771, "learning_rate": 2.3688410846596287e-07, "loss": 0.19522762298583984, "step": 7008 }, { "epoch": 0.9475623151668779, "grad_norm": 1.2129591703414917, "learning_rate": 2.3567406261603143e-07, "loss": 0.15459728240966797, "step": 7009 }, { "epoch": 0.9476975073933249, "grad_norm": 1.0208072662353516, "learning_rate": 2.3446709077749206e-07, "loss": 0.16237735748291016, "step": 7010 }, { "epoch": 0.9478326996197719, "grad_norm": 0.8853017091751099, "learning_rate": 2.3326319320164546e-07, "loss": 0.169769287109375, "step": 7011 }, { "epoch": 0.9479678918462189, "grad_norm": 0.9158598184585571, "learning_rate": 2.320623701391461e-07, "loss": 0.15769469738006592, "step": 7012 }, { "epoch": 0.9481030840726659, "grad_norm": 1.2379778623580933, "learning_rate": 2.30864621840014e-07, "loss": 0.1318216323852539, "step": 7013 }, { "epoch": 0.9482382762991128, "grad_norm": 0.8993387222290039, "learning_rate": 2.2966994855362633e-07, "loss": 0.1631094217300415, "step": 7014 }, { "epoch": 0.9483734685255598, "grad_norm": 1.266741394996643, "learning_rate": 2.2847835052872079e-07, "loss": 0.1709613800048828, "step": 7015 }, { "epoch": 0.9485086607520068, "grad_norm": 1.3037835359573364, "learning_rate": 2.2728982801339392e-07, "loss": 0.12365007400512695, "step": 7016 }, { "epoch": 0.9486438529784538, "grad_norm": 1.358773946762085, "learning_rate": 2.261043812551028e-07, "loss": 0.1684703826904297, "step": 7017 }, { "epoch": 0.9487790452049008, "grad_norm": 1.4487115144729614, "learning_rate": 2.249220105006633e-07, "loss": 0.1723041534423828, "step": 7018 }, { "epoch": 0.9489142374313477, "grad_norm": 1.2705066204071045, "learning_rate": 2.2374271599625185e-07, "loss": 0.17004108428955078, "step": 7019 }, { "epoch": 0.9490494296577947, "grad_norm": 1.2343331575393677, "learning_rate": 2.2256649798740204e-07, "loss": 0.2066946029663086, "step": 7020 }, { "epoch": 0.9491846218842417, "grad_norm": 1.3259086608886719, "learning_rate": 2.2139335671901294e-07, "loss": 0.14203453063964844, "step": 7021 }, { "epoch": 0.9493198141106887, "grad_norm": 0.778133749961853, "learning_rate": 2.2022329243533422e-07, "loss": 0.1772785186767578, "step": 7022 }, { "epoch": 0.9494550063371356, "grad_norm": 0.9714994430541992, "learning_rate": 2.19056305379981e-07, "loss": 0.2127361297607422, "step": 7023 }, { "epoch": 0.9495901985635826, "grad_norm": 1.2761167287826538, "learning_rate": 2.178923957959289e-07, "loss": 0.15991878509521484, "step": 7024 }, { "epoch": 0.9497253907900296, "grad_norm": 1.0379520654678345, "learning_rate": 2.1673156392550408e-07, "loss": 0.1804487705230713, "step": 7025 }, { "epoch": 0.9498605830164766, "grad_norm": 0.8538023829460144, "learning_rate": 2.155738100104049e-07, "loss": 0.169907808303833, "step": 7026 }, { "epoch": 0.9499957752429236, "grad_norm": 1.181728482246399, "learning_rate": 2.1441913429167682e-07, "loss": 0.17593955993652344, "step": 7027 }, { "epoch": 0.9501309674693705, "grad_norm": 0.7111400961875916, "learning_rate": 2.1326753700973256e-07, "loss": 0.1240682601928711, "step": 7028 }, { "epoch": 0.9502661596958175, "grad_norm": 1.1719928979873657, "learning_rate": 2.1211901840434034e-07, "loss": 0.15153980255126953, "step": 7029 }, { "epoch": 0.9504013519222645, "grad_norm": 2.7940778732299805, "learning_rate": 2.1097357871462386e-07, "loss": 0.19905662536621094, "step": 7030 }, { "epoch": 0.9505365441487115, "grad_norm": 1.7728191614151, "learning_rate": 2.098312181790757e-07, "loss": 0.20144343376159668, "step": 7031 }, { "epoch": 0.9506717363751584, "grad_norm": 0.6830088496208191, "learning_rate": 2.086919370355389e-07, "loss": 0.11834859848022461, "step": 7032 }, { "epoch": 0.9508069286016054, "grad_norm": 0.8876307606697083, "learning_rate": 2.075557355212171e-07, "loss": 0.1480649709701538, "step": 7033 }, { "epoch": 0.9509421208280524, "grad_norm": 1.082811951637268, "learning_rate": 2.0642261387267268e-07, "loss": 0.17164230346679688, "step": 7034 }, { "epoch": 0.9510773130544994, "grad_norm": 0.9651477336883545, "learning_rate": 2.0529257232583033e-07, "loss": 0.20316505432128906, "step": 7035 }, { "epoch": 0.9512125052809464, "grad_norm": 1.1677285432815552, "learning_rate": 2.0416561111596844e-07, "loss": 0.14597034454345703, "step": 7036 }, { "epoch": 0.9513476975073933, "grad_norm": 0.881619930267334, "learning_rate": 2.0304173047772933e-07, "loss": 0.18599891662597656, "step": 7037 }, { "epoch": 0.9514828897338403, "grad_norm": 0.9835766553878784, "learning_rate": 2.0192093064510753e-07, "loss": 0.16470098495483398, "step": 7038 }, { "epoch": 0.9516180819602873, "grad_norm": 1.1271790266036987, "learning_rate": 2.0080321185146134e-07, "loss": 0.17893218994140625, "step": 7039 }, { "epoch": 0.9517532741867343, "grad_norm": 1.0424292087554932, "learning_rate": 1.996885743295046e-07, "loss": 0.17154884338378906, "step": 7040 }, { "epoch": 0.9518884664131813, "grad_norm": 1.6355482339859009, "learning_rate": 1.985770183113117e-07, "loss": 0.2030085325241089, "step": 7041 }, { "epoch": 0.9520236586396282, "grad_norm": 1.4516222476959229, "learning_rate": 1.9746854402831583e-07, "loss": 0.15929412841796875, "step": 7042 }, { "epoch": 0.9521588508660752, "grad_norm": 0.7079383730888367, "learning_rate": 1.963631517113057e-07, "loss": 0.1104116439819336, "step": 7043 }, { "epoch": 0.9522940430925222, "grad_norm": 0.9599607586860657, "learning_rate": 1.952608415904289e-07, "loss": 0.16903924942016602, "step": 7044 }, { "epoch": 0.9524292353189692, "grad_norm": 1.5272794961929321, "learning_rate": 1.9416161389519348e-07, "loss": 0.1675567626953125, "step": 7045 }, { "epoch": 0.9525644275454161, "grad_norm": 1.2193652391433716, "learning_rate": 1.9306546885446475e-07, "loss": 0.155792236328125, "step": 7046 }, { "epoch": 0.9526996197718631, "grad_norm": 0.8074981570243835, "learning_rate": 1.919724066964651e-07, "loss": 0.1313788890838623, "step": 7047 }, { "epoch": 0.9528348119983101, "grad_norm": 1.130308985710144, "learning_rate": 1.908824276487775e-07, "loss": 0.14759445190429688, "step": 7048 }, { "epoch": 0.9529700042247571, "grad_norm": 1.1680020093917847, "learning_rate": 1.8979553193833876e-07, "loss": 0.16346263885498047, "step": 7049 }, { "epoch": 0.9531051964512041, "grad_norm": 0.8579732775688171, "learning_rate": 1.8871171979144786e-07, "loss": 0.14668750762939453, "step": 7050 }, { "epoch": 0.953240388677651, "grad_norm": 0.9967208504676819, "learning_rate": 1.8763099143376262e-07, "loss": 0.14379024505615234, "step": 7051 }, { "epoch": 0.953375580904098, "grad_norm": 1.1646144390106201, "learning_rate": 1.8655334709029303e-07, "loss": 0.22216796875, "step": 7052 }, { "epoch": 0.953510773130545, "grad_norm": 0.7213800549507141, "learning_rate": 1.8547878698541132e-07, "loss": 0.16598081588745117, "step": 7053 }, { "epoch": 0.953645965356992, "grad_norm": 1.0283057689666748, "learning_rate": 1.8440731134284684e-07, "loss": 0.13375091552734375, "step": 7054 }, { "epoch": 0.953781157583439, "grad_norm": 1.6930747032165527, "learning_rate": 1.833389203856861e-07, "loss": 0.19726181030273438, "step": 7055 }, { "epoch": 0.9539163498098859, "grad_norm": 1.482744574546814, "learning_rate": 1.8227361433637625e-07, "loss": 0.19539260864257812, "step": 7056 }, { "epoch": 0.9540515420363329, "grad_norm": 1.0703012943267822, "learning_rate": 1.812113934167148e-07, "loss": 0.1871337890625, "step": 7057 }, { "epoch": 0.9541867342627799, "grad_norm": 1.0114115476608276, "learning_rate": 1.8015225784786483e-07, "loss": 0.17520053684711456, "step": 7058 }, { "epoch": 0.9543219264892269, "grad_norm": 1.0584514141082764, "learning_rate": 1.7909620785034663e-07, "loss": 0.19161224365234375, "step": 7059 }, { "epoch": 0.9544571187156738, "grad_norm": 0.8739388585090637, "learning_rate": 1.7804324364402936e-07, "loss": 0.17091941833496094, "step": 7060 }, { "epoch": 0.9545923109421208, "grad_norm": 1.7398759126663208, "learning_rate": 1.769933654481526e-07, "loss": 0.16160964965820312, "step": 7061 }, { "epoch": 0.9547275031685678, "grad_norm": 1.3912725448608398, "learning_rate": 1.7594657348129984e-07, "loss": 0.18796837329864502, "step": 7062 }, { "epoch": 0.9548626953950148, "grad_norm": 0.8760247826576233, "learning_rate": 1.749028679614234e-07, "loss": 0.16924095153808594, "step": 7063 }, { "epoch": 0.9549978876214618, "grad_norm": 1.1377896070480347, "learning_rate": 1.7386224910582615e-07, "loss": 0.11742448806762695, "step": 7064 }, { "epoch": 0.9551330798479087, "grad_norm": 1.3483519554138184, "learning_rate": 1.728247171311731e-07, "loss": 0.16912841796875, "step": 7065 }, { "epoch": 0.9552682720743557, "grad_norm": 1.2951308488845825, "learning_rate": 1.7179027225348142e-07, "loss": 0.19443154335021973, "step": 7066 }, { "epoch": 0.9554034643008027, "grad_norm": 1.1068569421768188, "learning_rate": 1.7075891468812722e-07, "loss": 0.18080520629882812, "step": 7067 }, { "epoch": 0.9555386565272497, "grad_norm": 1.0774205923080444, "learning_rate": 1.69730644649847e-07, "loss": 0.15363025665283203, "step": 7068 }, { "epoch": 0.9556738487536967, "grad_norm": 1.6942801475524902, "learning_rate": 1.687054623527312e-07, "loss": 0.19241046905517578, "step": 7069 }, { "epoch": 0.9558090409801436, "grad_norm": 0.8343832492828369, "learning_rate": 1.676833680102291e-07, "loss": 0.10416269302368164, "step": 7070 }, { "epoch": 0.9559442332065906, "grad_norm": 0.8511726260185242, "learning_rate": 1.6666436183514378e-07, "loss": 0.10283756256103516, "step": 7071 }, { "epoch": 0.9560794254330376, "grad_norm": 1.1140782833099365, "learning_rate": 1.6564844403964053e-07, "loss": 0.1880664825439453, "step": 7072 }, { "epoch": 0.9562146176594846, "grad_norm": 0.8259708285331726, "learning_rate": 1.6463561483523682e-07, "loss": 0.15117835998535156, "step": 7073 }, { "epoch": 0.9563498098859315, "grad_norm": 1.044789433479309, "learning_rate": 1.6362587443281063e-07, "loss": 0.1439371109008789, "step": 7074 }, { "epoch": 0.9564850021123785, "grad_norm": 3.2706148624420166, "learning_rate": 1.626192230425938e-07, "loss": 0.20502901077270508, "step": 7075 }, { "epoch": 0.9566201943388255, "grad_norm": 1.0384197235107422, "learning_rate": 1.6161566087417868e-07, "loss": 0.18082523345947266, "step": 7076 }, { "epoch": 0.9567553865652725, "grad_norm": 0.7943189740180969, "learning_rate": 1.6061518813650977e-07, "loss": 0.16203641891479492, "step": 7077 }, { "epoch": 0.9568905787917195, "grad_norm": 0.878852903842926, "learning_rate": 1.5961780503789215e-07, "loss": 0.1276082992553711, "step": 7078 }, { "epoch": 0.9570257710181664, "grad_norm": 1.022603154182434, "learning_rate": 1.5862351178598633e-07, "loss": 0.15902996063232422, "step": 7079 }, { "epoch": 0.9571609632446134, "grad_norm": 1.2049932479858398, "learning_rate": 1.5763230858781008e-07, "loss": 0.1198415756225586, "step": 7080 }, { "epoch": 0.9572961554710604, "grad_norm": 0.9522535800933838, "learning_rate": 1.5664419564973497e-07, "loss": 0.16373729705810547, "step": 7081 }, { "epoch": 0.9574313476975074, "grad_norm": 1.0250359773635864, "learning_rate": 1.5565917317749146e-07, "loss": 0.1851975917816162, "step": 7082 }, { "epoch": 0.9575665399239544, "grad_norm": 1.3224772214889526, "learning_rate": 1.5467724137617046e-07, "loss": 0.18883132934570312, "step": 7083 }, { "epoch": 0.9577017321504013, "grad_norm": 1.297202229499817, "learning_rate": 1.5369840045021178e-07, "loss": 0.1912250518798828, "step": 7084 }, { "epoch": 0.9578369243768483, "grad_norm": 0.9278029203414917, "learning_rate": 1.5272265060341572e-07, "loss": 0.19639205932617188, "step": 7085 }, { "epoch": 0.9579721166032953, "grad_norm": 1.536542534828186, "learning_rate": 1.517499920389398e-07, "loss": 0.19723987579345703, "step": 7086 }, { "epoch": 0.9581073088297423, "grad_norm": 0.9885773062705994, "learning_rate": 1.5078042495929534e-07, "loss": 0.19787216186523438, "step": 7087 }, { "epoch": 0.9582425010561892, "grad_norm": 1.3699674606323242, "learning_rate": 1.498139495663542e-07, "loss": 0.15988171100616455, "step": 7088 }, { "epoch": 0.9583776932826362, "grad_norm": 1.084795355796814, "learning_rate": 1.4885056606133707e-07, "loss": 0.19570159912109375, "step": 7089 }, { "epoch": 0.9585128855090832, "grad_norm": 0.6768075227737427, "learning_rate": 1.478902746448302e-07, "loss": 0.13908278942108154, "step": 7090 }, { "epoch": 0.9586480777355302, "grad_norm": 1.4892845153808594, "learning_rate": 1.469330755167686e-07, "loss": 0.22181129455566406, "step": 7091 }, { "epoch": 0.9587832699619772, "grad_norm": 1.0576359033584595, "learning_rate": 1.4597896887644458e-07, "loss": 0.1736459732055664, "step": 7092 }, { "epoch": 0.9589184621884241, "grad_norm": 0.7634274959564209, "learning_rate": 1.4502795492251418e-07, "loss": 0.1763286590576172, "step": 7093 }, { "epoch": 0.9590536544148711, "grad_norm": 0.7180197238922119, "learning_rate": 1.4408003385297742e-07, "loss": 0.1296900510787964, "step": 7094 }, { "epoch": 0.9591888466413181, "grad_norm": 0.7676934599876404, "learning_rate": 1.4313520586519968e-07, "loss": 0.11440253257751465, "step": 7095 }, { "epoch": 0.9593240388677651, "grad_norm": 0.7943113446235657, "learning_rate": 1.4219347115589863e-07, "loss": 0.14129972457885742, "step": 7096 }, { "epoch": 0.959459231094212, "grad_norm": 1.0915900468826294, "learning_rate": 1.4125482992114914e-07, "loss": 0.14510822296142578, "step": 7097 }, { "epoch": 0.959594423320659, "grad_norm": 2.141817092895508, "learning_rate": 1.403192823563798e-07, "loss": 0.15626931190490723, "step": 7098 }, { "epoch": 0.959729615547106, "grad_norm": 1.0602796077728271, "learning_rate": 1.3938682865637654e-07, "loss": 0.16187810897827148, "step": 7099 }, { "epoch": 0.959864807773553, "grad_norm": 0.9447826743125916, "learning_rate": 1.38457469015284e-07, "loss": 0.16381263732910156, "step": 7100 }, { "epoch": 0.96, "grad_norm": 1.0039433240890503, "learning_rate": 1.3753120362659576e-07, "loss": 0.1378326416015625, "step": 7101 }, { "epoch": 0.9601351922264469, "grad_norm": 1.00853431224823, "learning_rate": 1.3660803268316925e-07, "loss": 0.18384933471679688, "step": 7102 }, { "epoch": 0.9602703844528939, "grad_norm": 0.7060810923576355, "learning_rate": 1.3568795637721065e-07, "loss": 0.15129804611206055, "step": 7103 }, { "epoch": 0.9604055766793409, "grad_norm": 1.0206860303878784, "learning_rate": 1.347709749002851e-07, "loss": 0.16194725036621094, "step": 7104 }, { "epoch": 0.9605407689057879, "grad_norm": 1.98849618434906, "learning_rate": 1.338570884433149e-07, "loss": 0.2380657196044922, "step": 7105 }, { "epoch": 0.9606759611322349, "grad_norm": 0.991416871547699, "learning_rate": 1.3294629719657448e-07, "loss": 0.19721364974975586, "step": 7106 }, { "epoch": 0.9608111533586818, "grad_norm": 1.5264153480529785, "learning_rate": 1.3203860134969548e-07, "loss": 0.178314208984375, "step": 7107 }, { "epoch": 0.9609463455851288, "grad_norm": 0.9761550426483154, "learning_rate": 1.3113400109166508e-07, "loss": 0.14107108116149902, "step": 7108 }, { "epoch": 0.9610815378115758, "grad_norm": 2.3769993782043457, "learning_rate": 1.3023249661082592e-07, "loss": 0.19447320699691772, "step": 7109 }, { "epoch": 0.9612167300380228, "grad_norm": 0.9111101031303406, "learning_rate": 1.2933408809487623e-07, "loss": 0.1765880584716797, "step": 7110 }, { "epoch": 0.9613519222644697, "grad_norm": 1.793071985244751, "learning_rate": 1.2843877573086972e-07, "loss": 0.1785578727722168, "step": 7111 }, { "epoch": 0.9614871144909167, "grad_norm": 0.776685357093811, "learning_rate": 1.2754655970521556e-07, "loss": 0.11192750930786133, "step": 7112 }, { "epoch": 0.9616223067173637, "grad_norm": 1.4859867095947266, "learning_rate": 1.2665744020367686e-07, "loss": 0.17167648673057556, "step": 7113 }, { "epoch": 0.9617574989438107, "grad_norm": 1.095131516456604, "learning_rate": 1.2577141741137388e-07, "loss": 0.1838674545288086, "step": 7114 }, { "epoch": 0.9618926911702577, "grad_norm": 1.3015220165252686, "learning_rate": 1.248884915127807e-07, "loss": 0.1648005247116089, "step": 7115 }, { "epoch": 0.9620278833967046, "grad_norm": 0.9335525631904602, "learning_rate": 1.2400866269172694e-07, "loss": 0.12241888046264648, "step": 7116 }, { "epoch": 0.9621630756231516, "grad_norm": 2.4253151416778564, "learning_rate": 1.2313193113139777e-07, "loss": 0.1744241714477539, "step": 7117 }, { "epoch": 0.9622982678495986, "grad_norm": 1.8318414688110352, "learning_rate": 1.2225829701433545e-07, "loss": 0.14675283432006836, "step": 7118 }, { "epoch": 0.9624334600760456, "grad_norm": 0.8328853845596313, "learning_rate": 1.2138776052243116e-07, "loss": 0.1313028335571289, "step": 7119 }, { "epoch": 0.9625686523024926, "grad_norm": 1.4564807415008545, "learning_rate": 1.2052032183693996e-07, "loss": 0.2045459747314453, "step": 7120 }, { "epoch": 0.9627038445289395, "grad_norm": 1.504341959953308, "learning_rate": 1.196559811384623e-07, "loss": 0.2007904052734375, "step": 7121 }, { "epoch": 0.9628390367553865, "grad_norm": 1.3762198686599731, "learning_rate": 1.1879473860696266e-07, "loss": 0.17700862884521484, "step": 7122 }, { "epoch": 0.9629742289818335, "grad_norm": 1.278596043586731, "learning_rate": 1.179365944217542e-07, "loss": 0.22965621948242188, "step": 7123 }, { "epoch": 0.9631094212082805, "grad_norm": 0.8342136144638062, "learning_rate": 1.1708154876150735e-07, "loss": 0.16316509246826172, "step": 7124 }, { "epoch": 0.9632446134347274, "grad_norm": 0.8797103762626648, "learning_rate": 1.1622960180424801e-07, "loss": 0.1547832489013672, "step": 7125 }, { "epoch": 0.9633798056611745, "grad_norm": 1.561728596687317, "learning_rate": 1.1538075372735435e-07, "loss": 0.17431354522705078, "step": 7126 }, { "epoch": 0.9635149978876215, "grad_norm": 1.3765701055526733, "learning_rate": 1.1453500470756328e-07, "loss": 0.1888265609741211, "step": 7127 }, { "epoch": 0.9636501901140685, "grad_norm": 0.6069510579109192, "learning_rate": 1.1369235492096397e-07, "loss": 0.12566661834716797, "step": 7128 }, { "epoch": 0.9637853823405155, "grad_norm": 1.071848750114441, "learning_rate": 1.1285280454299774e-07, "loss": 0.12290287017822266, "step": 7129 }, { "epoch": 0.9639205745669625, "grad_norm": 1.1194480657577515, "learning_rate": 1.1201635374846808e-07, "loss": 0.14417016506195068, "step": 7130 }, { "epoch": 0.9640557667934094, "grad_norm": 1.3878682851791382, "learning_rate": 1.1118300271152404e-07, "loss": 0.1517190933227539, "step": 7131 }, { "epoch": 0.9641909590198564, "grad_norm": 1.0549837350845337, "learning_rate": 1.1035275160567682e-07, "loss": 0.13437843322753906, "step": 7132 }, { "epoch": 0.9643261512463034, "grad_norm": 0.8931710124015808, "learning_rate": 1.0952560060378813e-07, "loss": 0.16132497787475586, "step": 7133 }, { "epoch": 0.9644613434727504, "grad_norm": 2.0878305435180664, "learning_rate": 1.0870154987807523e-07, "loss": 0.18332195281982422, "step": 7134 }, { "epoch": 0.9645965356991973, "grad_norm": 1.961843729019165, "learning_rate": 1.0788059960010921e-07, "loss": 0.21039772033691406, "step": 7135 }, { "epoch": 0.9647317279256443, "grad_norm": 2.4643235206604004, "learning_rate": 1.0706274994081499e-07, "loss": 0.202484130859375, "step": 7136 }, { "epoch": 0.9648669201520913, "grad_norm": 1.414941668510437, "learning_rate": 1.0624800107047805e-07, "loss": 0.17751026153564453, "step": 7137 }, { "epoch": 0.9650021123785383, "grad_norm": 1.6314334869384766, "learning_rate": 1.0543635315872934e-07, "loss": 0.1760258674621582, "step": 7138 }, { "epoch": 0.9651373046049853, "grad_norm": 1.513124704360962, "learning_rate": 1.0462780637455871e-07, "loss": 0.16096115112304688, "step": 7139 }, { "epoch": 0.9652724968314322, "grad_norm": 1.3170726299285889, "learning_rate": 1.0382236088631148e-07, "loss": 0.19763565063476562, "step": 7140 }, { "epoch": 0.9654076890578792, "grad_norm": 1.1763368844985962, "learning_rate": 1.0302001686168349e-07, "loss": 0.19106268882751465, "step": 7141 }, { "epoch": 0.9655428812843262, "grad_norm": 1.1916087865829468, "learning_rate": 1.0222077446772949e-07, "loss": 0.22853660583496094, "step": 7142 }, { "epoch": 0.9656780735107732, "grad_norm": 0.9290386438369751, "learning_rate": 1.0142463387085465e-07, "loss": 0.1544055938720703, "step": 7143 }, { "epoch": 0.9658132657372202, "grad_norm": 1.145377516746521, "learning_rate": 1.0063159523682142e-07, "loss": 0.1629772186279297, "step": 7144 }, { "epoch": 0.9659484579636671, "grad_norm": 0.5970140099525452, "learning_rate": 9.984165873074102e-08, "loss": 0.12464714050292969, "step": 7145 }, { "epoch": 0.9660836501901141, "grad_norm": 1.0250977277755737, "learning_rate": 9.905482451708526e-08, "loss": 0.15589046478271484, "step": 7146 }, { "epoch": 0.9662188424165611, "grad_norm": 1.4684053659439087, "learning_rate": 9.827109275967638e-08, "loss": 0.18293946981430054, "step": 7147 }, { "epoch": 0.9663540346430081, "grad_norm": 2.2026636600494385, "learning_rate": 9.749046362169223e-08, "loss": 0.18991661071777344, "step": 7148 }, { "epoch": 0.966489226869455, "grad_norm": 0.7151421308517456, "learning_rate": 9.671293726566443e-08, "loss": 0.13555145263671875, "step": 7149 }, { "epoch": 0.966624419095902, "grad_norm": 1.5888656377792358, "learning_rate": 9.593851385347518e-08, "loss": 0.17270278930664062, "step": 7150 }, { "epoch": 0.966759611322349, "grad_norm": 1.7988790273666382, "learning_rate": 9.516719354636716e-08, "loss": 0.13352978229522705, "step": 7151 }, { "epoch": 0.966894803548796, "grad_norm": 0.8385295271873474, "learning_rate": 9.439897650493024e-08, "loss": 0.14668607711791992, "step": 7152 }, { "epoch": 0.967029995775243, "grad_norm": 1.4584004878997803, "learning_rate": 9.363386288911313e-08, "loss": 0.17164230346679688, "step": 7153 }, { "epoch": 0.9671651880016899, "grad_norm": 1.8320735692977905, "learning_rate": 9.287185285821675e-08, "loss": 0.1378650665283203, "step": 7154 }, { "epoch": 0.9673003802281369, "grad_norm": 1.011025071144104, "learning_rate": 9.211294657089587e-08, "loss": 0.17245864868164062, "step": 7155 }, { "epoch": 0.9674355724545839, "grad_norm": 1.7312713861465454, "learning_rate": 9.135714418515573e-08, "loss": 0.19532012939453125, "step": 7156 }, { "epoch": 0.9675707646810309, "grad_norm": 1.5306978225708008, "learning_rate": 9.060444585836381e-08, "loss": 0.18760108947753906, "step": 7157 }, { "epoch": 0.9677059569074778, "grad_norm": 1.1531139612197876, "learning_rate": 8.985485174722974e-08, "loss": 0.2074298858642578, "step": 7158 }, { "epoch": 0.9678411491339248, "grad_norm": 0.9318092465400696, "learning_rate": 8.910836200782868e-08, "loss": 0.16977548599243164, "step": 7159 }, { "epoch": 0.9679763413603718, "grad_norm": 0.8461886048316956, "learning_rate": 8.836497679557964e-08, "loss": 0.1353921890258789, "step": 7160 }, { "epoch": 0.9681115335868188, "grad_norm": 1.3059446811676025, "learning_rate": 8.762469626526048e-08, "loss": 0.19249725341796875, "step": 7161 }, { "epoch": 0.9682467258132658, "grad_norm": 0.7001742124557495, "learning_rate": 8.688752057100457e-08, "loss": 0.13103771209716797, "step": 7162 }, { "epoch": 0.9683819180397127, "grad_norm": 0.8404279351234436, "learning_rate": 8.615344986629082e-08, "loss": 0.17218804359436035, "step": 7163 }, { "epoch": 0.9685171102661597, "grad_norm": 0.9690999984741211, "learning_rate": 8.542248430396027e-08, "loss": 0.18452072143554688, "step": 7164 }, { "epoch": 0.9686523024926067, "grad_norm": 1.1719168424606323, "learning_rate": 8.469462403620287e-08, "loss": 0.1506209373474121, "step": 7165 }, { "epoch": 0.9687874947190537, "grad_norm": 1.027123212814331, "learning_rate": 8.39698692145624e-08, "loss": 0.1435985565185547, "step": 7166 }, { "epoch": 0.9689226869455007, "grad_norm": 1.2666568756103516, "learning_rate": 8.324821998993648e-08, "loss": 0.16504454612731934, "step": 7167 }, { "epoch": 0.9690578791719476, "grad_norm": 0.8207797408103943, "learning_rate": 8.252967651257826e-08, "loss": 0.15892601013183594, "step": 7168 }, { "epoch": 0.9691930713983946, "grad_norm": 1.1917369365692139, "learning_rate": 8.181423893208973e-08, "loss": 0.11492204666137695, "step": 7169 }, { "epoch": 0.9693282636248416, "grad_norm": 1.136677622795105, "learning_rate": 8.110190739743172e-08, "loss": 0.1826953887939453, "step": 7170 }, { "epoch": 0.9694634558512886, "grad_norm": 1.513115644454956, "learning_rate": 8.03926820569123e-08, "loss": 0.2111072540283203, "step": 7171 }, { "epoch": 0.9695986480777355, "grad_norm": 1.2681777477264404, "learning_rate": 7.968656305819833e-08, "loss": 0.17836666107177734, "step": 7172 }, { "epoch": 0.9697338403041825, "grad_norm": 1.457101583480835, "learning_rate": 7.898355054830719e-08, "loss": 0.147369384765625, "step": 7173 }, { "epoch": 0.9698690325306295, "grad_norm": 1.0182133913040161, "learning_rate": 7.828364467360849e-08, "loss": 0.14681339263916016, "step": 7174 }, { "epoch": 0.9700042247570765, "grad_norm": 1.2612043619155884, "learning_rate": 7.758684557982731e-08, "loss": 0.1560688018798828, "step": 7175 }, { "epoch": 0.9701394169835235, "grad_norm": 1.7324298620224, "learning_rate": 7.689315341204262e-08, "loss": 0.23563766479492188, "step": 7176 }, { "epoch": 0.9702746092099704, "grad_norm": 0.8930456042289734, "learning_rate": 7.62025683146822e-08, "loss": 0.17041826248168945, "step": 7177 }, { "epoch": 0.9704098014364174, "grad_norm": 0.8613118529319763, "learning_rate": 7.551509043152937e-08, "loss": 0.1299428939819336, "step": 7178 }, { "epoch": 0.9705449936628644, "grad_norm": 0.7200577259063721, "learning_rate": 7.483071990572132e-08, "loss": 0.14456462860107422, "step": 7179 }, { "epoch": 0.9706801858893114, "grad_norm": 0.6957589387893677, "learning_rate": 7.414945687975072e-08, "loss": 0.12284493446350098, "step": 7180 }, { "epoch": 0.9708153781157584, "grad_norm": 0.8150241374969482, "learning_rate": 7.347130149545578e-08, "loss": 0.1579742431640625, "step": 7181 }, { "epoch": 0.9709505703422053, "grad_norm": 2.2492763996124268, "learning_rate": 7.279625389403355e-08, "loss": 0.18596220016479492, "step": 7182 }, { "epoch": 0.9710857625686523, "grad_norm": 0.8839974403381348, "learning_rate": 7.212431421603327e-08, "loss": 0.19951319694519043, "step": 7183 }, { "epoch": 0.9712209547950993, "grad_norm": 1.9972691535949707, "learning_rate": 7.145548260135638e-08, "loss": 0.22493743896484375, "step": 7184 }, { "epoch": 0.9713561470215463, "grad_norm": 1.2882407903671265, "learning_rate": 7.078975918925645e-08, "loss": 0.17240333557128906, "step": 7185 }, { "epoch": 0.9714913392479932, "grad_norm": 1.1543464660644531, "learning_rate": 7.012714411834098e-08, "loss": 0.200042724609375, "step": 7186 }, { "epoch": 0.9716265314744402, "grad_norm": 0.7761035561561584, "learning_rate": 6.946763752656959e-08, "loss": 0.16349124908447266, "step": 7187 }, { "epoch": 0.9717617237008872, "grad_norm": 2.1249611377716064, "learning_rate": 6.881123955125579e-08, "loss": 0.1902303695678711, "step": 7188 }, { "epoch": 0.9718969159273342, "grad_norm": 1.2013185024261475, "learning_rate": 6.815795032906524e-08, "loss": 0.15337467193603516, "step": 7189 }, { "epoch": 0.9720321081537812, "grad_norm": 1.1208775043487549, "learning_rate": 6.750776999601415e-08, "loss": 0.13518810272216797, "step": 7190 }, { "epoch": 0.9721673003802281, "grad_norm": 1.5856140851974487, "learning_rate": 6.68606986874759e-08, "loss": 0.17936277389526367, "step": 7191 }, { "epoch": 0.9723024926066751, "grad_norm": 0.8141245245933533, "learning_rate": 6.62167365381744e-08, "loss": 0.1470470428466797, "step": 7192 }, { "epoch": 0.9724376848331221, "grad_norm": 1.0699129104614258, "learning_rate": 6.557588368218237e-08, "loss": 0.16811370849609375, "step": 7193 }, { "epoch": 0.9725728770595691, "grad_norm": 1.0268573760986328, "learning_rate": 6.493814025293476e-08, "loss": 0.20555877685546875, "step": 7194 }, { "epoch": 0.972708069286016, "grad_norm": 1.0829578638076782, "learning_rate": 6.430350638320704e-08, "loss": 0.19777393341064453, "step": 7195 }, { "epoch": 0.972843261512463, "grad_norm": 1.1269197463989258, "learning_rate": 6.367198220513848e-08, "loss": 0.1859452724456787, "step": 7196 }, { "epoch": 0.97297845373891, "grad_norm": 1.023902416229248, "learning_rate": 6.304356785021226e-08, "loss": 0.16260242462158203, "step": 7197 }, { "epoch": 0.973113645965357, "grad_norm": 2.685072183609009, "learning_rate": 6.241826344926704e-08, "loss": 0.2359628677368164, "step": 7198 }, { "epoch": 0.973248838191804, "grad_norm": 1.1743239164352417, "learning_rate": 6.17960691324987e-08, "loss": 0.21769332885742188, "step": 7199 }, { "epoch": 0.973384030418251, "grad_norm": 0.9565960168838501, "learning_rate": 6.117698502944857e-08, "loss": 0.1842660903930664, "step": 7200 }, { "epoch": 0.9735192226446979, "grad_norm": 0.8898730874061584, "learning_rate": 6.056101126901358e-08, "loss": 0.15668296813964844, "step": 7201 }, { "epoch": 0.9736544148711449, "grad_norm": 1.8366475105285645, "learning_rate": 5.994814797944281e-08, "loss": 0.2002243995666504, "step": 7202 }, { "epoch": 0.9737896070975919, "grad_norm": 0.6003488898277283, "learning_rate": 5.933839528833751e-08, "loss": 0.07465946674346924, "step": 7203 }, { "epoch": 0.9739247993240389, "grad_norm": 0.8286743760108948, "learning_rate": 5.873175332265279e-08, "loss": 0.14000606536865234, "step": 7204 }, { "epoch": 0.9740599915504858, "grad_norm": 1.0218167304992676, "learning_rate": 5.812822220869096e-08, "loss": 0.15097808837890625, "step": 7205 }, { "epoch": 0.9741951837769328, "grad_norm": 1.3449265956878662, "learning_rate": 5.752780207211483e-08, "loss": 0.1745452880859375, "step": 7206 }, { "epoch": 0.9743303760033798, "grad_norm": 0.7474161982536316, "learning_rate": 5.693049303793274e-08, "loss": 0.13282322883605957, "step": 7207 }, { "epoch": 0.9744655682298268, "grad_norm": 1.85740327835083, "learning_rate": 5.6336295230508536e-08, "loss": 0.1816849708557129, "step": 7208 }, { "epoch": 0.9746007604562738, "grad_norm": 1.1863350868225098, "learning_rate": 5.5745208773558266e-08, "loss": 0.18543028831481934, "step": 7209 }, { "epoch": 0.9747359526827207, "grad_norm": 0.8628146648406982, "learning_rate": 5.515723379014681e-08, "loss": 0.17764568328857422, "step": 7210 }, { "epoch": 0.9748711449091677, "grad_norm": 1.4264284372329712, "learning_rate": 5.4572370402694583e-08, "loss": 0.19461441040039062, "step": 7211 }, { "epoch": 0.9750063371356147, "grad_norm": 2.3023037910461426, "learning_rate": 5.399061873297417e-08, "loss": 0.1838665008544922, "step": 7212 }, { "epoch": 0.9751415293620617, "grad_norm": 0.8093187212944031, "learning_rate": 5.341197890210869e-08, "loss": 0.1583251953125, "step": 7213 }, { "epoch": 0.9752767215885086, "grad_norm": 0.8853726983070374, "learning_rate": 5.283645103057344e-08, "loss": 0.11314535140991211, "step": 7214 }, { "epoch": 0.9754119138149556, "grad_norm": 1.486255407333374, "learning_rate": 5.226403523819756e-08, "loss": 0.20304203033447266, "step": 7215 }, { "epoch": 0.9755471060414026, "grad_norm": 0.9995924830436707, "learning_rate": 5.169473164416072e-08, "loss": 0.17139625549316406, "step": 7216 }, { "epoch": 0.9756822982678496, "grad_norm": 0.8910306096076965, "learning_rate": 5.112854036699477e-08, "loss": 0.1562633514404297, "step": 7217 }, { "epoch": 0.9758174904942966, "grad_norm": 1.2105767726898193, "learning_rate": 5.0565461524583745e-08, "loss": 0.17580699920654297, "step": 7218 }, { "epoch": 0.9759526827207435, "grad_norm": 1.0213998556137085, "learning_rate": 5.0005495234163865e-08, "loss": 0.1726093292236328, "step": 7219 }, { "epoch": 0.9760878749471905, "grad_norm": 1.1177624464035034, "learning_rate": 4.9448641612321874e-08, "loss": 0.1695718765258789, "step": 7220 }, { "epoch": 0.9762230671736375, "grad_norm": 0.9595202803611755, "learning_rate": 4.889490077500003e-08, "loss": 0.1505718231201172, "step": 7221 }, { "epoch": 0.9763582594000845, "grad_norm": 1.8065110445022583, "learning_rate": 4.8344272837489434e-08, "loss": 0.19502532482147217, "step": 7222 }, { "epoch": 0.9764934516265315, "grad_norm": 1.5631041526794434, "learning_rate": 4.779675791443172e-08, "loss": 0.13937902450561523, "step": 7223 }, { "epoch": 0.9766286438529784, "grad_norm": 0.8443604707717896, "learning_rate": 4.72523561198257e-08, "loss": 0.16242873668670654, "step": 7224 }, { "epoch": 0.9767638360794254, "grad_norm": 1.0379021167755127, "learning_rate": 4.6711067567014044e-08, "loss": 0.18265342712402344, "step": 7225 }, { "epoch": 0.9768990283058724, "grad_norm": 0.9588872790336609, "learning_rate": 4.6172892368701595e-08, "loss": 0.1867837905883789, "step": 7226 }, { "epoch": 0.9770342205323194, "grad_norm": 1.001046061515808, "learning_rate": 4.5637830636935385e-08, "loss": 0.1444079875946045, "step": 7227 }, { "epoch": 0.9771694127587663, "grad_norm": 1.2527439594268799, "learning_rate": 4.5105882483119643e-08, "loss": 0.18381083011627197, "step": 7228 }, { "epoch": 0.9773046049852133, "grad_norm": 1.439324140548706, "learning_rate": 4.4577048018007436e-08, "loss": 0.16228103637695312, "step": 7229 }, { "epoch": 0.9774397972116603, "grad_norm": 1.0737818479537964, "learning_rate": 4.405132735170569e-08, "loss": 0.15839767456054688, "step": 7230 }, { "epoch": 0.9775749894381073, "grad_norm": 0.8232232928276062, "learning_rate": 4.3528720593675184e-08, "loss": 0.1758575439453125, "step": 7231 }, { "epoch": 0.9777101816645543, "grad_norm": 0.9403512477874756, "learning_rate": 4.300922785271888e-08, "loss": 0.1376628875732422, "step": 7232 }, { "epoch": 0.9778453738910012, "grad_norm": 1.2808226346969604, "learning_rate": 4.249284923700358e-08, "loss": 0.16298246383666992, "step": 7233 }, { "epoch": 0.9779805661174482, "grad_norm": 1.59022855758667, "learning_rate": 4.197958485404163e-08, "loss": 0.1289353370666504, "step": 7234 }, { "epoch": 0.9781157583438952, "grad_norm": 1.9270899295806885, "learning_rate": 4.1469434810694206e-08, "loss": 0.1957225799560547, "step": 7235 }, { "epoch": 0.9782509505703422, "grad_norm": 1.4817548990249634, "learning_rate": 4.096239921317968e-08, "loss": 0.16260910034179688, "step": 7236 }, { "epoch": 0.9783861427967891, "grad_norm": 1.0501106977462769, "learning_rate": 4.045847816706361e-08, "loss": 0.15535354614257812, "step": 7237 }, { "epoch": 0.9785213350232361, "grad_norm": 1.6144909858703613, "learning_rate": 3.9957671777268724e-08, "loss": 0.23519515991210938, "step": 7238 }, { "epoch": 0.9786565272496831, "grad_norm": 0.9325268268585205, "learning_rate": 3.945998014806163e-08, "loss": 0.18716812133789062, "step": 7239 }, { "epoch": 0.9787917194761301, "grad_norm": 1.1416516304016113, "learning_rate": 3.896540338306609e-08, "loss": 0.16619491577148438, "step": 7240 }, { "epoch": 0.9789269117025771, "grad_norm": 1.0785635709762573, "learning_rate": 3.847394158525641e-08, "loss": 0.17587709426879883, "step": 7241 }, { "epoch": 0.979062103929024, "grad_norm": 1.9263311624526978, "learning_rate": 3.798559485695574e-08, "loss": 0.23343753814697266, "step": 7242 }, { "epoch": 0.979197296155471, "grad_norm": 0.9645626544952393, "learning_rate": 3.7500363299842746e-08, "loss": 0.16544723510742188, "step": 7243 }, { "epoch": 0.979332488381918, "grad_norm": 1.0040361881256104, "learning_rate": 3.701824701494327e-08, "loss": 0.1776714324951172, "step": 7244 }, { "epoch": 0.979467680608365, "grad_norm": 1.1052600145339966, "learning_rate": 3.653924610263703e-08, "loss": 0.17157649993896484, "step": 7245 }, { "epoch": 0.979602872834812, "grad_norm": 0.9224521517753601, "learning_rate": 3.6063360662654255e-08, "loss": 0.15214157104492188, "step": 7246 }, { "epoch": 0.9797380650612589, "grad_norm": 0.9142236113548279, "learning_rate": 3.559059079407734e-08, "loss": 0.14783668518066406, "step": 7247 }, { "epoch": 0.9798732572877059, "grad_norm": 0.9299970865249634, "learning_rate": 3.512093659533922e-08, "loss": 0.20923995971679688, "step": 7248 }, { "epoch": 0.9800084495141529, "grad_norm": 0.7759482264518738, "learning_rate": 3.4654398164225e-08, "loss": 0.1537771224975586, "step": 7249 }, { "epoch": 0.9801436417405999, "grad_norm": 2.05537748336792, "learning_rate": 3.4190975597870325e-08, "loss": 0.18368864059448242, "step": 7250 }, { "epoch": 0.9802788339670468, "grad_norm": 0.983456015586853, "learning_rate": 3.373066899276134e-08, "loss": 0.15195655822753906, "step": 7251 }, { "epoch": 0.9804140261934938, "grad_norm": 0.7887934446334839, "learning_rate": 3.3273478444736386e-08, "loss": 0.15648460388183594, "step": 7252 }, { "epoch": 0.9805492184199408, "grad_norm": 0.9016557931900024, "learning_rate": 3.281940404898764e-08, "loss": 0.1652379035949707, "step": 7253 }, { "epoch": 0.9806844106463878, "grad_norm": 1.712913155555725, "learning_rate": 3.236844590005117e-08, "loss": 0.194793701171875, "step": 7254 }, { "epoch": 0.9808196028728348, "grad_norm": 1.3363252878189087, "learning_rate": 3.192060409182351e-08, "loss": 0.19053077697753906, "step": 7255 }, { "epoch": 0.9809547950992817, "grad_norm": 2.1128203868865967, "learning_rate": 3.147587871754509e-08, "loss": 0.2031574249267578, "step": 7256 }, { "epoch": 0.9810899873257287, "grad_norm": 1.0806351900100708, "learning_rate": 3.1034269869810174e-08, "loss": 0.19896411895751953, "step": 7257 }, { "epoch": 0.9812251795521757, "grad_norm": 1.3647725582122803, "learning_rate": 3.05957776405652e-08, "loss": 0.1773386001586914, "step": 7258 }, { "epoch": 0.9813603717786227, "grad_norm": 0.8439529538154602, "learning_rate": 3.016040212110549e-08, "loss": 0.1278771162033081, "step": 7259 }, { "epoch": 0.9814955640050697, "grad_norm": 1.5146340131759644, "learning_rate": 2.9728143402078522e-08, "loss": 0.18013668060302734, "step": 7260 }, { "epoch": 0.9816307562315166, "grad_norm": 0.9241484999656677, "learning_rate": 2.9299001573483975e-08, "loss": 0.13370704650878906, "step": 7261 }, { "epoch": 0.9817659484579637, "grad_norm": 1.0415635108947754, "learning_rate": 2.8872976724670375e-08, "loss": 0.20318269729614258, "step": 7262 }, { "epoch": 0.9819011406844107, "grad_norm": 0.9046740531921387, "learning_rate": 2.8450068944338436e-08, "loss": 0.1349802017211914, "step": 7263 }, { "epoch": 0.9820363329108577, "grad_norm": 0.9697754979133606, "learning_rate": 2.803027832054106e-08, "loss": 0.1750507354736328, "step": 7264 }, { "epoch": 0.9821715251373047, "grad_norm": 1.0696742534637451, "learning_rate": 2.7613604940679995e-08, "loss": 0.17043828964233398, "step": 7265 }, { "epoch": 0.9823067173637516, "grad_norm": 0.9562708139419556, "learning_rate": 2.7200048891509176e-08, "loss": 0.19631004333496094, "step": 7266 }, { "epoch": 0.9824419095901986, "grad_norm": 1.1618362665176392, "learning_rate": 2.67896102591314e-08, "loss": 0.19598841667175293, "step": 7267 }, { "epoch": 0.9825771018166456, "grad_norm": 1.6189159154891968, "learning_rate": 2.6382289129004978e-08, "loss": 0.17659282684326172, "step": 7268 }, { "epoch": 0.9827122940430926, "grad_norm": 1.9550522565841675, "learning_rate": 2.5978085585935395e-08, "loss": 0.17226266860961914, "step": 7269 }, { "epoch": 0.9828474862695395, "grad_norm": 0.9543769955635071, "learning_rate": 2.5576999714078676e-08, "loss": 0.17045879364013672, "step": 7270 }, { "epoch": 0.9829826784959865, "grad_norm": 0.6764682531356812, "learning_rate": 2.517903159694468e-08, "loss": 0.12017536163330078, "step": 7271 }, { "epoch": 0.9831178707224335, "grad_norm": 2.183420419692993, "learning_rate": 2.4784181317390465e-08, "loss": 0.2104175090789795, "step": 7272 }, { "epoch": 0.9832530629488805, "grad_norm": 1.640756607055664, "learning_rate": 2.4392448957628598e-08, "loss": 0.21809673309326172, "step": 7273 }, { "epoch": 0.9833882551753275, "grad_norm": 1.1838451623916626, "learning_rate": 2.4003834599217177e-08, "loss": 0.19672393798828125, "step": 7274 }, { "epoch": 0.9835234474017744, "grad_norm": 0.7297896146774292, "learning_rate": 2.3618338323071474e-08, "loss": 0.1278705596923828, "step": 7275 }, { "epoch": 0.9836586396282214, "grad_norm": 0.5232936143875122, "learning_rate": 2.3235960209448958e-08, "loss": 0.07738608121871948, "step": 7276 }, { "epoch": 0.9837938318546684, "grad_norm": 1.0427874326705933, "learning_rate": 2.2856700337967606e-08, "loss": 0.1937999725341797, "step": 7277 }, { "epoch": 0.9839290240811154, "grad_norm": 1.6218242645263672, "learning_rate": 2.2480558787587592e-08, "loss": 0.1422128677368164, "step": 7278 }, { "epoch": 0.9840642163075624, "grad_norm": 1.1342490911483765, "learning_rate": 2.2107535636626263e-08, "loss": 0.18131446838378906, "step": 7279 }, { "epoch": 0.9841994085340093, "grad_norm": 0.8911510109901428, "learning_rate": 2.1737630962746502e-08, "loss": 0.14415740966796875, "step": 7280 }, { "epoch": 0.9843346007604563, "grad_norm": 1.264528512954712, "learning_rate": 2.1370844842966696e-08, "loss": 0.1870269775390625, "step": 7281 }, { "epoch": 0.9844697929869033, "grad_norm": 1.0244718790054321, "learning_rate": 2.100717735365243e-08, "loss": 0.13822078704833984, "step": 7282 }, { "epoch": 0.9846049852133503, "grad_norm": 1.2269244194030762, "learning_rate": 2.0646628570521464e-08, "loss": 0.17969632148742676, "step": 7283 }, { "epoch": 0.9847401774397972, "grad_norm": 1.7875611782073975, "learning_rate": 2.028919856864375e-08, "loss": 0.20510482788085938, "step": 7284 }, { "epoch": 0.9848753696662442, "grad_norm": 0.8839919567108154, "learning_rate": 1.9934887422434766e-08, "loss": 0.1232914924621582, "step": 7285 }, { "epoch": 0.9850105618926912, "grad_norm": 0.9244188666343689, "learning_rate": 1.9583695205665496e-08, "loss": 0.1779327392578125, "step": 7286 }, { "epoch": 0.9851457541191382, "grad_norm": 0.8528125286102295, "learning_rate": 1.9235621991457454e-08, "loss": 0.12193822860717773, "step": 7287 }, { "epoch": 0.9852809463455852, "grad_norm": 0.8231453895568848, "learning_rate": 1.889066785227933e-08, "loss": 0.17607498168945312, "step": 7288 }, { "epoch": 0.9854161385720321, "grad_norm": 1.1122676134109497, "learning_rate": 1.854883285995368e-08, "loss": 0.1445636749267578, "step": 7289 }, { "epoch": 0.9855513307984791, "grad_norm": 0.9929426908493042, "learning_rate": 1.8210117085651902e-08, "loss": 0.18935394287109375, "step": 7290 }, { "epoch": 0.9856865230249261, "grad_norm": 0.9319225549697876, "learning_rate": 1.7874520599894252e-08, "loss": 0.1456918716430664, "step": 7291 }, { "epoch": 0.9858217152513731, "grad_norm": 1.7456135749816895, "learning_rate": 1.7542043472558166e-08, "loss": 0.18245935440063477, "step": 7292 }, { "epoch": 0.98595690747782, "grad_norm": 0.8514389395713806, "learning_rate": 1.7212685772864945e-08, "loss": 0.16852951049804688, "step": 7293 }, { "epoch": 0.986092099704267, "grad_norm": 1.0665686130523682, "learning_rate": 1.68864475693864e-08, "loss": 0.17768096923828125, "step": 7294 }, { "epoch": 0.986227291930714, "grad_norm": 1.8206572532653809, "learning_rate": 1.6563328930051526e-08, "loss": 0.21712517738342285, "step": 7295 }, { "epoch": 0.986362484157161, "grad_norm": 1.1156790256500244, "learning_rate": 1.624332992213151e-08, "loss": 0.1737537384033203, "step": 7296 }, { "epoch": 0.986497676383608, "grad_norm": 1.063873529434204, "learning_rate": 1.5926450612254728e-08, "loss": 0.18525314331054688, "step": 7297 }, { "epoch": 0.986632868610055, "grad_norm": 0.9279559254646301, "learning_rate": 1.5612691066395068e-08, "loss": 0.1679532527923584, "step": 7298 }, { "epoch": 0.9867680608365019, "grad_norm": 0.6950719952583313, "learning_rate": 1.530205134987861e-08, "loss": 0.1355583667755127, "step": 7299 }, { "epoch": 0.9869032530629489, "grad_norm": 1.4669368267059326, "learning_rate": 1.499453152738528e-08, "loss": 0.1821460723876953, "step": 7300 }, { "epoch": 0.9870384452893959, "grad_norm": 0.9478243589401245, "learning_rate": 1.4690131662938866e-08, "loss": 0.19354939460754395, "step": 7301 }, { "epoch": 0.9871736375158429, "grad_norm": 1.6360996961593628, "learning_rate": 1.438885181991867e-08, "loss": 0.15348577499389648, "step": 7302 }, { "epoch": 0.9873088297422898, "grad_norm": 0.8750420808792114, "learning_rate": 1.4090692061052846e-08, "loss": 0.13043498992919922, "step": 7303 }, { "epoch": 0.9874440219687368, "grad_norm": 0.9935842156410217, "learning_rate": 1.3795652448420071e-08, "loss": 0.16595458984375, "step": 7304 }, { "epoch": 0.9875792141951838, "grad_norm": 1.0260628461837769, "learning_rate": 1.3503733043447874e-08, "loss": 0.14501237869262695, "step": 7305 }, { "epoch": 0.9877144064216308, "grad_norm": 1.2830389738082886, "learning_rate": 1.3214933906915971e-08, "loss": 0.18228816986083984, "step": 7306 }, { "epoch": 0.9878495986480778, "grad_norm": 1.0005398988723755, "learning_rate": 1.2929255098954596e-08, "loss": 0.16384315490722656, "step": 7307 }, { "epoch": 0.9879847908745247, "grad_norm": 1.0565052032470703, "learning_rate": 1.2646696679042835e-08, "loss": 0.12819170951843262, "step": 7308 }, { "epoch": 0.9881199831009717, "grad_norm": 1.379349946975708, "learning_rate": 1.2367258706010298e-08, "loss": 0.1906871795654297, "step": 7309 }, { "epoch": 0.9882551753274187, "grad_norm": 1.4749375581741333, "learning_rate": 1.2090941238040443e-08, "loss": 0.1494770050048828, "step": 7310 }, { "epoch": 0.9883903675538657, "grad_norm": 1.1573714017868042, "learning_rate": 1.1817744332660584e-08, "loss": 0.19374942779541016, "step": 7311 }, { "epoch": 0.9885255597803126, "grad_norm": 1.9752477407455444, "learning_rate": 1.1547668046751891e-08, "loss": 0.19591617584228516, "step": 7312 }, { "epoch": 0.9886607520067596, "grad_norm": 1.1509065628051758, "learning_rate": 1.1280712436549379e-08, "loss": 0.17366409301757812, "step": 7313 }, { "epoch": 0.9887959442332066, "grad_norm": 0.8882109522819519, "learning_rate": 1.1016877557630257e-08, "loss": 0.1535041332244873, "step": 7314 }, { "epoch": 0.9889311364596536, "grad_norm": 1.4429450035095215, "learning_rate": 1.0756163464928915e-08, "loss": 0.15332555770874023, "step": 7315 }, { "epoch": 0.9890663286861006, "grad_norm": 1.075042486190796, "learning_rate": 1.0498570212726932e-08, "loss": 0.0976266860961914, "step": 7316 }, { "epoch": 0.9892015209125475, "grad_norm": 1.0847547054290771, "learning_rate": 1.024409785465641e-08, "loss": 0.2151951789855957, "step": 7317 }, { "epoch": 0.9893367131389945, "grad_norm": 2.1262550354003906, "learning_rate": 9.992746443699962e-09, "loss": 0.17905807495117188, "step": 7318 }, { "epoch": 0.9894719053654415, "grad_norm": 0.7050594091415405, "learning_rate": 9.744516032190731e-09, "loss": 0.12417125701904297, "step": 7319 }, { "epoch": 0.9896070975918885, "grad_norm": 0.7700551152229309, "learning_rate": 9.499406671809041e-09, "loss": 0.14566659927368164, "step": 7320 }, { "epoch": 0.9897422898183355, "grad_norm": 1.0600937604904175, "learning_rate": 9.2574184135924e-09, "loss": 0.16457796096801758, "step": 7321 }, { "epoch": 0.9898774820447824, "grad_norm": 0.9225361347198486, "learning_rate": 9.018551307920508e-09, "loss": 0.15133380889892578, "step": 7322 }, { "epoch": 0.9900126742712294, "grad_norm": 0.8620610237121582, "learning_rate": 8.782805404526917e-09, "loss": 0.12780380249023438, "step": 7323 }, { "epoch": 0.9901478664976764, "grad_norm": 0.9073818922042847, "learning_rate": 8.55018075249736e-09, "loss": 0.18050765991210938, "step": 7324 }, { "epoch": 0.9902830587241234, "grad_norm": 1.0237106084823608, "learning_rate": 8.320677400264764e-09, "loss": 0.1272258758544922, "step": 7325 }, { "epoch": 0.9904182509505703, "grad_norm": 2.0932223796844482, "learning_rate": 8.094295395610906e-09, "loss": 0.17127227783203125, "step": 7326 }, { "epoch": 0.9905534431770173, "grad_norm": 0.7614780068397522, "learning_rate": 7.87103478567308e-09, "loss": 0.1362626552581787, "step": 7327 }, { "epoch": 0.9906886354034643, "grad_norm": 2.6018054485321045, "learning_rate": 7.65089561693244e-09, "loss": 0.20467662811279297, "step": 7328 }, { "epoch": 0.9908238276299113, "grad_norm": 1.8599770069122314, "learning_rate": 7.433877935225652e-09, "loss": 0.23937749862670898, "step": 7329 }, { "epoch": 0.9909590198563583, "grad_norm": 1.6535546779632568, "learning_rate": 7.219981785733243e-09, "loss": 0.2488689422607422, "step": 7330 }, { "epoch": 0.9910942120828052, "grad_norm": 1.333464503288269, "learning_rate": 7.009207212992919e-09, "loss": 0.1871333122253418, "step": 7331 }, { "epoch": 0.9912294043092522, "grad_norm": 1.3936303853988647, "learning_rate": 6.801554260889575e-09, "loss": 0.18279457092285156, "step": 7332 }, { "epoch": 0.9913645965356992, "grad_norm": 2.6115469932556152, "learning_rate": 6.5970229726552976e-09, "loss": 0.17894554138183594, "step": 7333 }, { "epoch": 0.9914997887621462, "grad_norm": 1.036321997642517, "learning_rate": 6.3956133908743556e-09, "loss": 0.1586000919342041, "step": 7334 }, { "epoch": 0.9916349809885932, "grad_norm": 0.9874232411384583, "learning_rate": 6.197325557483202e-09, "loss": 0.18906879425048828, "step": 7335 }, { "epoch": 0.9917701732150401, "grad_norm": 1.2339227199554443, "learning_rate": 6.002159513765482e-09, "loss": 0.20660972595214844, "step": 7336 }, { "epoch": 0.9919053654414871, "grad_norm": 1.4209257364273071, "learning_rate": 5.810115300355357e-09, "loss": 0.1447368860244751, "step": 7337 }, { "epoch": 0.9920405576679341, "grad_norm": 1.4106571674346924, "learning_rate": 5.621192957239174e-09, "loss": 0.17977237701416016, "step": 7338 }, { "epoch": 0.9921757498943811, "grad_norm": 0.9243113398551941, "learning_rate": 5.435392523748806e-09, "loss": 0.1879138946533203, "step": 7339 }, { "epoch": 0.992310942120828, "grad_norm": 1.0342358350753784, "learning_rate": 5.252714038571638e-09, "loss": 0.1633462905883789, "step": 7340 }, { "epoch": 0.992446134347275, "grad_norm": 0.8588201999664307, "learning_rate": 5.073157539742246e-09, "loss": 0.2013254165649414, "step": 7341 }, { "epoch": 0.992581326573722, "grad_norm": 1.3490233421325684, "learning_rate": 4.896723064642394e-09, "loss": 0.19267654418945312, "step": 7342 }, { "epoch": 0.992716518800169, "grad_norm": 1.363053321838379, "learning_rate": 4.723410650009363e-09, "loss": 0.13930273056030273, "step": 7343 }, { "epoch": 0.992851711026616, "grad_norm": 0.7633225321769714, "learning_rate": 4.553220331925956e-09, "loss": 0.13668537139892578, "step": 7344 }, { "epoch": 0.9929869032530629, "grad_norm": 1.1288535594940186, "learning_rate": 4.38615214582716e-09, "loss": 0.18528461456298828, "step": 7345 }, { "epoch": 0.9931220954795099, "grad_norm": 1.898842692375183, "learning_rate": 4.2222061265001496e-09, "loss": 0.11673331260681152, "step": 7346 }, { "epoch": 0.9932572877059569, "grad_norm": 1.1446244716644287, "learning_rate": 4.0613823080742905e-09, "loss": 0.14681482315063477, "step": 7347 }, { "epoch": 0.9933924799324039, "grad_norm": 1.2303898334503174, "learning_rate": 3.903680724037795e-09, "loss": 0.1594851016998291, "step": 7348 }, { "epoch": 0.9935276721588508, "grad_norm": 0.6970008611679077, "learning_rate": 3.749101407224398e-09, "loss": 0.13020730018615723, "step": 7349 }, { "epoch": 0.9936628643852978, "grad_norm": 1.0794814825057983, "learning_rate": 3.597644389818355e-09, "loss": 0.16993427276611328, "step": 7350 }, { "epoch": 0.9937980566117448, "grad_norm": 1.0079402923583984, "learning_rate": 3.4493097033527767e-09, "loss": 0.18355178833007812, "step": 7351 }, { "epoch": 0.9939332488381918, "grad_norm": 1.695164442062378, "learning_rate": 3.3040973787112904e-09, "loss": 0.22621726989746094, "step": 7352 }, { "epoch": 0.9940684410646388, "grad_norm": 0.8182611465454102, "learning_rate": 3.162007446129711e-09, "loss": 0.15974998474121094, "step": 7353 }, { "epoch": 0.9942036332910857, "grad_norm": 1.092155933380127, "learning_rate": 3.023039935191041e-09, "loss": 0.12267303466796875, "step": 7354 }, { "epoch": 0.9943388255175327, "grad_norm": 1.8967422246932983, "learning_rate": 2.887194874830468e-09, "loss": 0.203948974609375, "step": 7355 }, { "epoch": 0.9944740177439797, "grad_norm": 1.5373715162277222, "learning_rate": 2.7544722933287026e-09, "loss": 0.168548583984375, "step": 7356 }, { "epoch": 0.9946092099704267, "grad_norm": 3.407031774520874, "learning_rate": 2.6248722183203066e-09, "loss": 0.26572322845458984, "step": 7357 }, { "epoch": 0.9947444021968737, "grad_norm": 1.1482545137405396, "learning_rate": 2.498394676790361e-09, "loss": 0.18760061264038086, "step": 7358 }, { "epoch": 0.9948795944233206, "grad_norm": 1.3253059387207031, "learning_rate": 2.375039695071135e-09, "loss": 0.184722900390625, "step": 7359 }, { "epoch": 0.9950147866497676, "grad_norm": 1.8004224300384521, "learning_rate": 2.2548072988454184e-09, "loss": 0.18215274810791016, "step": 7360 }, { "epoch": 0.9951499788762146, "grad_norm": 1.150065302848816, "learning_rate": 2.1376975131465194e-09, "loss": 0.18532133102416992, "step": 7361 }, { "epoch": 0.9952851711026616, "grad_norm": 1.2456303834915161, "learning_rate": 2.023710362356601e-09, "loss": 0.21260452270507812, "step": 7362 }, { "epoch": 0.9954203633291085, "grad_norm": 0.7877089977264404, "learning_rate": 1.9128458702100117e-09, "loss": 0.11085927486419678, "step": 7363 }, { "epoch": 0.9955555555555555, "grad_norm": 0.7680411338806152, "learning_rate": 1.8051040597882873e-09, "loss": 0.14110727608203888, "step": 7364 }, { "epoch": 0.9956907477820025, "grad_norm": 1.2317640781402588, "learning_rate": 1.70048495352515e-09, "loss": 0.1412665843963623, "step": 7365 }, { "epoch": 0.9958259400084495, "grad_norm": 1.1753548383712769, "learning_rate": 1.5989885731998443e-09, "loss": 0.2248525619506836, "step": 7366 }, { "epoch": 0.9959611322348965, "grad_norm": 1.157978892326355, "learning_rate": 1.5006149399487966e-09, "loss": 0.1808757781982422, "step": 7367 }, { "epoch": 0.9960963244613434, "grad_norm": 1.24444580078125, "learning_rate": 1.4053640742489604e-09, "loss": 0.1722240447998047, "step": 7368 }, { "epoch": 0.9962315166877904, "grad_norm": 0.7689839601516724, "learning_rate": 1.3132359959361351e-09, "loss": 0.13801860809326172, "step": 7369 }, { "epoch": 0.9963667089142374, "grad_norm": 0.7986721992492676, "learning_rate": 1.2242307241899787e-09, "loss": 0.13493633270263672, "step": 7370 }, { "epoch": 0.9965019011406844, "grad_norm": 1.176010012626648, "learning_rate": 1.1383482775406685e-09, "loss": 0.17741775512695312, "step": 7371 }, { "epoch": 0.9966370933671314, "grad_norm": 0.6402312517166138, "learning_rate": 1.0555886738738973e-09, "loss": 0.1308603286743164, "step": 7372 }, { "epoch": 0.9967722855935783, "grad_norm": 1.0092192888259888, "learning_rate": 9.75951930415886e-10, "loss": 0.15879487991333008, "step": 7373 }, { "epoch": 0.9969074778200253, "grad_norm": 0.9983760714530945, "learning_rate": 8.994380637483701e-10, "loss": 0.12014353275299072, "step": 7374 }, { "epoch": 0.9970426700464723, "grad_norm": 1.4045286178588867, "learning_rate": 8.260470898036054e-10, "loss": 0.1888408660888672, "step": 7375 }, { "epoch": 0.9971778622729193, "grad_norm": 0.9965384602546692, "learning_rate": 7.557790238627016e-10, "loss": 0.12786483764648438, "step": 7376 }, { "epoch": 0.9973130544993662, "grad_norm": 1.1297028064727783, "learning_rate": 6.886338805522918e-10, "loss": 0.1680002212524414, "step": 7377 }, { "epoch": 0.9974482467258132, "grad_norm": 0.8662614226341248, "learning_rate": 6.246116738561903e-10, "loss": 0.13668346405029297, "step": 7378 }, { "epoch": 0.9975834389522602, "grad_norm": 1.2495498657226562, "learning_rate": 5.637124171004038e-10, "loss": 0.17917275428771973, "step": 7379 }, { "epoch": 0.9977186311787072, "grad_norm": 1.2119765281677246, "learning_rate": 5.059361229681203e-10, "loss": 0.2170848846435547, "step": 7380 }, { "epoch": 0.9978538234051542, "grad_norm": 1.5703673362731934, "learning_rate": 4.5128280348638583e-10, "loss": 0.199737548828125, "step": 7381 }, { "epoch": 0.9979890156316011, "grad_norm": 1.292392373085022, "learning_rate": 3.9975247003443127e-10, "loss": 0.18749713897705078, "step": 7382 }, { "epoch": 0.9981242078580481, "grad_norm": 0.9077715277671814, "learning_rate": 3.51345133342007e-10, "loss": 0.15091705322265625, "step": 7383 }, { "epoch": 0.9982594000844951, "grad_norm": 0.7391771078109741, "learning_rate": 3.060608034877177e-10, "loss": 0.12168383598327637, "step": 7384 }, { "epoch": 0.9983945923109421, "grad_norm": 1.1722162961959839, "learning_rate": 2.638994898990221e-10, "loss": 0.1868276596069336, "step": 7385 }, { "epoch": 0.998529784537389, "grad_norm": 1.9503419399261475, "learning_rate": 2.2486120135556398e-10, "loss": 0.14784622192382812, "step": 7386 }, { "epoch": 0.998664976763836, "grad_norm": 1.0859471559524536, "learning_rate": 1.889459459841758e-10, "loss": 0.1971149444580078, "step": 7387 }, { "epoch": 0.998800168990283, "grad_norm": 1.0214329957962036, "learning_rate": 1.56153731263875e-10, "loss": 0.1439189910888672, "step": 7388 }, { "epoch": 0.99893536121673, "grad_norm": 0.9618058800697327, "learning_rate": 1.2648456402086784e-10, "loss": 0.15452098846435547, "step": 7389 }, { "epoch": 0.999070553443177, "grad_norm": 1.1153603792190552, "learning_rate": 9.99384504318801e-11, "loss": 0.14294958114624023, "step": 7390 }, { "epoch": 0.999205745669624, "grad_norm": 1.242556095123291, "learning_rate": 7.651539602582247e-11, "loss": 0.16652870178222656, "step": 7391 }, { "epoch": 0.9993409378960709, "grad_norm": 1.1816502809524536, "learning_rate": 5.6215405678794464e-11, "loss": 0.21294212341308594, "step": 7392 }, { "epoch": 0.9994761301225179, "grad_norm": 0.8464372754096985, "learning_rate": 3.9038483615749795e-11, "loss": 0.15835332870483398, "step": 7393 }, { "epoch": 0.9996113223489649, "grad_norm": 0.7513839602470398, "learning_rate": 2.4984633415492398e-11, "loss": 0.1408405303955078, "step": 7394 }, { "epoch": 0.9997465145754119, "grad_norm": 0.8847243189811707, "learning_rate": 1.4053858004015041e-11, "loss": 0.17638516426086426, "step": 7395 }, { "epoch": 0.9998817068018588, "grad_norm": 1.8010793924331665, "learning_rate": 6.246159654499373e-12, "loss": 0.19793701171875, "step": 7396 }, { "epoch": 1.0, "grad_norm": 1.0269384384155273, "learning_rate": 1.561539995642569e-12, "loss": 0.2227954864501953, "step": 7397 }, { "epoch": 1.0, "step": 7397, "total_flos": 8.015953600697703e+19, "train_loss": 0.1922166269180403, "train_runtime": 105556.7771, "train_samples_per_second": 17.939, "train_steps_per_second": 0.07 } ], "logging_steps": 1.0, "max_steps": 7397, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.015953600697703e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }