{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002717391304347826, "grad_norm": 1.2250695592789804, "learning_rate": 0.0, "loss": 1.0829896926879883, "step": 1 }, { "epoch": 0.0005434782608695652, "grad_norm": 1.0287681796078578, "learning_rate": 5.4347826086956524e-08, "loss": 1.0697593688964844, "step": 2 }, { "epoch": 0.0008152173913043478, "grad_norm": 1.2359415068778592, "learning_rate": 1.0869565217391305e-07, "loss": 0.8786199688911438, "step": 3 }, { "epoch": 0.0010869565217391304, "grad_norm": 1.2345237323851161, "learning_rate": 1.6304347826086958e-07, "loss": 1.1482996940612793, "step": 4 }, { "epoch": 0.001358695652173913, "grad_norm": 1.2459740712215772, "learning_rate": 2.173913043478261e-07, "loss": 1.0842301845550537, "step": 5 }, { "epoch": 0.0016304347826086956, "grad_norm": 1.0738795075837961, "learning_rate": 2.7173913043478264e-07, "loss": 0.9574501514434814, "step": 6 }, { "epoch": 0.0019021739130434783, "grad_norm": 1.2241936211524034, "learning_rate": 3.2608695652173915e-07, "loss": 1.0702435970306396, "step": 7 }, { "epoch": 0.002173913043478261, "grad_norm": 1.3573550866531237, "learning_rate": 3.804347826086957e-07, "loss": 1.1070070266723633, "step": 8 }, { "epoch": 0.0024456521739130437, "grad_norm": 1.0824607543377165, "learning_rate": 4.347826086956522e-07, "loss": 1.0341682434082031, "step": 9 }, { "epoch": 0.002717391304347826, "grad_norm": 1.1579134197785075, "learning_rate": 4.891304347826088e-07, "loss": 1.1163058280944824, "step": 10 }, { "epoch": 0.002989130434782609, "grad_norm": 1.149896545512519, "learning_rate": 5.434782608695653e-07, "loss": 0.902255654335022, "step": 11 }, { "epoch": 0.003260869565217391, "grad_norm": 0.9664164559534778, "learning_rate": 5.978260869565218e-07, "loss": 0.9490252733230591, "step": 12 }, { "epoch": 0.003532608695652174, "grad_norm": 1.087367677642894, "learning_rate": 6.521739130434783e-07, "loss": 0.9952810406684875, "step": 13 }, { "epoch": 0.0038043478260869567, "grad_norm": 1.1337355753676148, "learning_rate": 7.065217391304348e-07, "loss": 1.062730312347412, "step": 14 }, { "epoch": 0.004076086956521739, "grad_norm": 1.235232920720187, "learning_rate": 7.608695652173914e-07, "loss": 1.1677320003509521, "step": 15 }, { "epoch": 0.004347826086956522, "grad_norm": 1.2014925312437044, "learning_rate": 8.152173913043479e-07, "loss": 0.895161509513855, "step": 16 }, { "epoch": 0.004619565217391305, "grad_norm": 1.1479236359977931, "learning_rate": 8.695652173913044e-07, "loss": 0.9284771680831909, "step": 17 }, { "epoch": 0.004891304347826087, "grad_norm": 1.155748690251187, "learning_rate": 9.239130434782609e-07, "loss": 1.0536949634552002, "step": 18 }, { "epoch": 0.005163043478260869, "grad_norm": 1.1528558819517782, "learning_rate": 9.782608695652175e-07, "loss": 0.9651678800582886, "step": 19 }, { "epoch": 0.005434782608695652, "grad_norm": 0.9462537040019742, "learning_rate": 1.032608695652174e-06, "loss": 0.8023773431777954, "step": 20 }, { "epoch": 0.005706521739130435, "grad_norm": 1.1170728030632748, "learning_rate": 1.0869565217391306e-06, "loss": 1.0682737827301025, "step": 21 }, { "epoch": 0.005978260869565218, "grad_norm": 1.153269123089336, "learning_rate": 1.141304347826087e-06, "loss": 0.8537968397140503, "step": 22 }, { "epoch": 0.00625, "grad_norm": 1.3079792258641632, "learning_rate": 1.1956521739130436e-06, "loss": 1.0592350959777832, "step": 23 }, { "epoch": 0.006521739130434782, "grad_norm": 1.08954187939905, "learning_rate": 1.25e-06, "loss": 1.2744078636169434, "step": 24 }, { "epoch": 0.006793478260869565, "grad_norm": 1.0052090774136022, "learning_rate": 1.3043478260869566e-06, "loss": 1.0090994834899902, "step": 25 }, { "epoch": 0.007065217391304348, "grad_norm": 1.07169883881584, "learning_rate": 1.3586956521739131e-06, "loss": 0.94012451171875, "step": 26 }, { "epoch": 0.007336956521739131, "grad_norm": 1.0202385002710512, "learning_rate": 1.4130434782608697e-06, "loss": 0.8355286717414856, "step": 27 }, { "epoch": 0.007608695652173913, "grad_norm": 1.1338541804021738, "learning_rate": 1.4673913043478264e-06, "loss": 1.1235988140106201, "step": 28 }, { "epoch": 0.007880434782608695, "grad_norm": 1.0633979734893573, "learning_rate": 1.521739130434783e-06, "loss": 1.1033685207366943, "step": 29 }, { "epoch": 0.008152173913043478, "grad_norm": 1.0353124546585188, "learning_rate": 1.5760869565217394e-06, "loss": 1.0601937770843506, "step": 30 }, { "epoch": 0.00842391304347826, "grad_norm": 1.1788306557228736, "learning_rate": 1.6304347826086957e-06, "loss": 1.3721750974655151, "step": 31 }, { "epoch": 0.008695652173913044, "grad_norm": 1.1396741380322912, "learning_rate": 1.6847826086956522e-06, "loss": 1.2816855907440186, "step": 32 }, { "epoch": 0.008967391304347826, "grad_norm": 1.02787224453482, "learning_rate": 1.7391304347826088e-06, "loss": 1.2141066789627075, "step": 33 }, { "epoch": 0.00923913043478261, "grad_norm": 1.0234241599072096, "learning_rate": 1.7934782608695653e-06, "loss": 1.0515851974487305, "step": 34 }, { "epoch": 0.009510869565217392, "grad_norm": 1.1716914317136358, "learning_rate": 1.8478260869565218e-06, "loss": 1.1200916767120361, "step": 35 }, { "epoch": 0.009782608695652175, "grad_norm": 1.0099980145167555, "learning_rate": 1.9021739130434785e-06, "loss": 0.9467116594314575, "step": 36 }, { "epoch": 0.010054347826086956, "grad_norm": 1.160201540365826, "learning_rate": 1.956521739130435e-06, "loss": 0.9152494668960571, "step": 37 }, { "epoch": 0.010326086956521738, "grad_norm": 1.107446094838261, "learning_rate": 2.0108695652173916e-06, "loss": 0.8606852293014526, "step": 38 }, { "epoch": 0.010597826086956521, "grad_norm": 0.8676046589720252, "learning_rate": 2.065217391304348e-06, "loss": 0.8476055860519409, "step": 39 }, { "epoch": 0.010869565217391304, "grad_norm": 1.074614018448447, "learning_rate": 2.1195652173913046e-06, "loss": 1.1434733867645264, "step": 40 }, { "epoch": 0.011141304347826087, "grad_norm": 1.0766417956951042, "learning_rate": 2.173913043478261e-06, "loss": 1.1598572731018066, "step": 41 }, { "epoch": 0.01141304347826087, "grad_norm": 1.087511769463064, "learning_rate": 2.2282608695652176e-06, "loss": 1.2040066719055176, "step": 42 }, { "epoch": 0.011684782608695652, "grad_norm": 0.8980264604759558, "learning_rate": 2.282608695652174e-06, "loss": 1.0057287216186523, "step": 43 }, { "epoch": 0.011956521739130435, "grad_norm": 0.9247428552711667, "learning_rate": 2.3369565217391307e-06, "loss": 0.8806758522987366, "step": 44 }, { "epoch": 0.012228260869565218, "grad_norm": 0.9787357990195054, "learning_rate": 2.391304347826087e-06, "loss": 1.0527417659759521, "step": 45 }, { "epoch": 0.0125, "grad_norm": 0.9793676760719235, "learning_rate": 2.4456521739130437e-06, "loss": 0.9478249549865723, "step": 46 }, { "epoch": 0.012771739130434783, "grad_norm": 0.9722334061860975, "learning_rate": 2.5e-06, "loss": 0.977737545967102, "step": 47 }, { "epoch": 0.013043478260869565, "grad_norm": 0.8905264826719185, "learning_rate": 2.554347826086957e-06, "loss": 0.9367704391479492, "step": 48 }, { "epoch": 0.013315217391304347, "grad_norm": 0.8197438006511025, "learning_rate": 2.6086956521739132e-06, "loss": 0.9200581908226013, "step": 49 }, { "epoch": 0.01358695652173913, "grad_norm": 0.913997859735021, "learning_rate": 2.6630434782608698e-06, "loss": 1.060661792755127, "step": 50 }, { "epoch": 0.013858695652173913, "grad_norm": 0.8795572968946631, "learning_rate": 2.7173913043478263e-06, "loss": 0.8524699211120605, "step": 51 }, { "epoch": 0.014130434782608696, "grad_norm": 0.7866243125460237, "learning_rate": 2.771739130434783e-06, "loss": 0.8600091934204102, "step": 52 }, { "epoch": 0.014402173913043478, "grad_norm": 0.922861392300573, "learning_rate": 2.8260869565217393e-06, "loss": 0.991204559803009, "step": 53 }, { "epoch": 0.014673913043478261, "grad_norm": 0.7261498376656284, "learning_rate": 2.880434782608696e-06, "loss": 0.8218520879745483, "step": 54 }, { "epoch": 0.014945652173913044, "grad_norm": 0.7997154661047977, "learning_rate": 2.9347826086956528e-06, "loss": 0.9705663323402405, "step": 55 }, { "epoch": 0.015217391304347827, "grad_norm": 0.7133154338990054, "learning_rate": 2.989130434782609e-06, "loss": 0.7625200152397156, "step": 56 }, { "epoch": 0.01548913043478261, "grad_norm": 0.8104200980031449, "learning_rate": 3.043478260869566e-06, "loss": 1.0068050622940063, "step": 57 }, { "epoch": 0.01576086956521739, "grad_norm": 0.8169737235134097, "learning_rate": 3.097826086956522e-06, "loss": 1.0481294393539429, "step": 58 }, { "epoch": 0.016032608695652175, "grad_norm": 0.7864426078228588, "learning_rate": 3.152173913043479e-06, "loss": 0.948712944984436, "step": 59 }, { "epoch": 0.016304347826086956, "grad_norm": 0.7678490654984318, "learning_rate": 3.206521739130435e-06, "loss": 0.921778678894043, "step": 60 }, { "epoch": 0.01657608695652174, "grad_norm": 0.6413493802466795, "learning_rate": 3.2608695652173914e-06, "loss": 0.7060019373893738, "step": 61 }, { "epoch": 0.01684782608695652, "grad_norm": 0.7379400604859384, "learning_rate": 3.315217391304348e-06, "loss": 0.907953679561615, "step": 62 }, { "epoch": 0.017119565217391303, "grad_norm": 0.7068622106206051, "learning_rate": 3.3695652173913045e-06, "loss": 0.8853484392166138, "step": 63 }, { "epoch": 0.017391304347826087, "grad_norm": 0.6739418988265539, "learning_rate": 3.4239130434782614e-06, "loss": 0.8006744980812073, "step": 64 }, { "epoch": 0.017663043478260868, "grad_norm": 0.7450604674695157, "learning_rate": 3.4782608695652175e-06, "loss": 0.9807255268096924, "step": 65 }, { "epoch": 0.017934782608695653, "grad_norm": 0.7049821015627532, "learning_rate": 3.5326086956521745e-06, "loss": 0.8684797286987305, "step": 66 }, { "epoch": 0.018206521739130434, "grad_norm": 0.9598278950792991, "learning_rate": 3.5869565217391305e-06, "loss": 1.1477574110031128, "step": 67 }, { "epoch": 0.01847826086956522, "grad_norm": 0.7340245637719313, "learning_rate": 3.6413043478260875e-06, "loss": 0.8332781791687012, "step": 68 }, { "epoch": 0.01875, "grad_norm": 0.7024806067803638, "learning_rate": 3.6956521739130436e-06, "loss": 0.8457655906677246, "step": 69 }, { "epoch": 0.019021739130434784, "grad_norm": 0.8615518742730518, "learning_rate": 3.7500000000000005e-06, "loss": 1.0462435483932495, "step": 70 }, { "epoch": 0.019293478260869565, "grad_norm": 0.5653177278856013, "learning_rate": 3.804347826086957e-06, "loss": 0.5888741612434387, "step": 71 }, { "epoch": 0.01956521739130435, "grad_norm": 0.7094428786025203, "learning_rate": 3.8586956521739136e-06, "loss": 0.8332858085632324, "step": 72 }, { "epoch": 0.01983695652173913, "grad_norm": 0.7617121203538293, "learning_rate": 3.91304347826087e-06, "loss": 1.0421907901763916, "step": 73 }, { "epoch": 0.02010869565217391, "grad_norm": 0.6652959624707837, "learning_rate": 3.967391304347827e-06, "loss": 0.8340507745742798, "step": 74 }, { "epoch": 0.020380434782608696, "grad_norm": 0.6562049473527857, "learning_rate": 4.021739130434783e-06, "loss": 0.7795684337615967, "step": 75 }, { "epoch": 0.020652173913043477, "grad_norm": 0.6959677794227712, "learning_rate": 4.07608695652174e-06, "loss": 0.8747092485427856, "step": 76 }, { "epoch": 0.02092391304347826, "grad_norm": 0.6184967419619413, "learning_rate": 4.130434782608696e-06, "loss": 0.7428484559059143, "step": 77 }, { "epoch": 0.021195652173913043, "grad_norm": 0.7651182201222049, "learning_rate": 4.184782608695653e-06, "loss": 0.9611421823501587, "step": 78 }, { "epoch": 0.021467391304347827, "grad_norm": 0.8568948177460614, "learning_rate": 4.239130434782609e-06, "loss": 0.9423149824142456, "step": 79 }, { "epoch": 0.021739130434782608, "grad_norm": 0.6713991226392848, "learning_rate": 4.293478260869566e-06, "loss": 0.7348430156707764, "step": 80 }, { "epoch": 0.022010869565217393, "grad_norm": 0.6808588336746701, "learning_rate": 4.347826086956522e-06, "loss": 0.7494282722473145, "step": 81 }, { "epoch": 0.022282608695652174, "grad_norm": 0.7212553208549852, "learning_rate": 4.402173913043479e-06, "loss": 0.8030438423156738, "step": 82 }, { "epoch": 0.022554347826086958, "grad_norm": 0.7209240619170463, "learning_rate": 4.456521739130435e-06, "loss": 0.7714411616325378, "step": 83 }, { "epoch": 0.02282608695652174, "grad_norm": 0.7505896979677462, "learning_rate": 4.510869565217392e-06, "loss": 0.8814622163772583, "step": 84 }, { "epoch": 0.02309782608695652, "grad_norm": 0.7688458121699437, "learning_rate": 4.565217391304348e-06, "loss": 0.930201530456543, "step": 85 }, { "epoch": 0.023369565217391305, "grad_norm": 0.7448808598703436, "learning_rate": 4.619565217391305e-06, "loss": 0.9014756679534912, "step": 86 }, { "epoch": 0.023641304347826086, "grad_norm": 0.6572467580286708, "learning_rate": 4.673913043478261e-06, "loss": 0.8105689883232117, "step": 87 }, { "epoch": 0.02391304347826087, "grad_norm": 0.7606067757722674, "learning_rate": 4.728260869565218e-06, "loss": 0.8853908777236938, "step": 88 }, { "epoch": 0.02418478260869565, "grad_norm": 0.723443235670806, "learning_rate": 4.782608695652174e-06, "loss": 0.8573346138000488, "step": 89 }, { "epoch": 0.024456521739130436, "grad_norm": 0.6283132255089304, "learning_rate": 4.836956521739131e-06, "loss": 0.6912500262260437, "step": 90 }, { "epoch": 0.024728260869565217, "grad_norm": 0.7878315035029227, "learning_rate": 4.891304347826087e-06, "loss": 0.8573861122131348, "step": 91 }, { "epoch": 0.025, "grad_norm": 0.722655182191155, "learning_rate": 4.945652173913044e-06, "loss": 0.8138406276702881, "step": 92 }, { "epoch": 0.025271739130434782, "grad_norm": 0.6520880255579576, "learning_rate": 5e-06, "loss": 0.78059983253479, "step": 93 }, { "epoch": 0.025543478260869567, "grad_norm": 0.7974100778956341, "learning_rate": 5.054347826086957e-06, "loss": 0.8585740327835083, "step": 94 }, { "epoch": 0.025815217391304348, "grad_norm": 0.5403579443841297, "learning_rate": 5.108695652173914e-06, "loss": 0.5220387578010559, "step": 95 }, { "epoch": 0.02608695652173913, "grad_norm": 0.7185364125554955, "learning_rate": 5.16304347826087e-06, "loss": 0.7960209846496582, "step": 96 }, { "epoch": 0.026358695652173914, "grad_norm": 0.7102581648343127, "learning_rate": 5.2173913043478265e-06, "loss": 0.8067851066589355, "step": 97 }, { "epoch": 0.026630434782608695, "grad_norm": 0.6833342307700317, "learning_rate": 5.271739130434783e-06, "loss": 0.7853584289550781, "step": 98 }, { "epoch": 0.02690217391304348, "grad_norm": 0.7291258234438408, "learning_rate": 5.3260869565217395e-06, "loss": 0.8337762355804443, "step": 99 }, { "epoch": 0.02717391304347826, "grad_norm": 0.7953832201547705, "learning_rate": 5.380434782608695e-06, "loss": 0.8077871799468994, "step": 100 }, { "epoch": 0.027445652173913045, "grad_norm": 0.7030140983866873, "learning_rate": 5.4347826086956525e-06, "loss": 0.7430740594863892, "step": 101 }, { "epoch": 0.027717391304347826, "grad_norm": 0.699571431914891, "learning_rate": 5.489130434782609e-06, "loss": 0.6978483200073242, "step": 102 }, { "epoch": 0.02798913043478261, "grad_norm": 0.596298366001681, "learning_rate": 5.543478260869566e-06, "loss": 0.6163474917411804, "step": 103 }, { "epoch": 0.02826086956521739, "grad_norm": 0.6186418777947501, "learning_rate": 5.597826086956523e-06, "loss": 0.6841033697128296, "step": 104 }, { "epoch": 0.028532608695652172, "grad_norm": 0.6818385751570224, "learning_rate": 5.652173913043479e-06, "loss": 0.7268383502960205, "step": 105 }, { "epoch": 0.028804347826086957, "grad_norm": 0.7500368365795986, "learning_rate": 5.706521739130435e-06, "loss": 0.7388402223587036, "step": 106 }, { "epoch": 0.029076086956521738, "grad_norm": 0.7243828374747474, "learning_rate": 5.760869565217392e-06, "loss": 0.7680409550666809, "step": 107 }, { "epoch": 0.029347826086956522, "grad_norm": 0.7887568944010082, "learning_rate": 5.815217391304349e-06, "loss": 0.9365160465240479, "step": 108 }, { "epoch": 0.029619565217391303, "grad_norm": 0.7473032804419129, "learning_rate": 5.8695652173913055e-06, "loss": 0.6551934480667114, "step": 109 }, { "epoch": 0.029891304347826088, "grad_norm": 0.6632164368482341, "learning_rate": 5.923913043478261e-06, "loss": 0.6138002276420593, "step": 110 }, { "epoch": 0.03016304347826087, "grad_norm": 0.685898295759308, "learning_rate": 5.978260869565218e-06, "loss": 0.705558180809021, "step": 111 }, { "epoch": 0.030434782608695653, "grad_norm": 0.70482120730475, "learning_rate": 6.032608695652174e-06, "loss": 0.6709036827087402, "step": 112 }, { "epoch": 0.030706521739130434, "grad_norm": 0.7661967990216886, "learning_rate": 6.086956521739132e-06, "loss": 0.8270171880722046, "step": 113 }, { "epoch": 0.03097826086956522, "grad_norm": 0.6965746178582606, "learning_rate": 6.141304347826087e-06, "loss": 0.661056637763977, "step": 114 }, { "epoch": 0.03125, "grad_norm": 0.8860217871284418, "learning_rate": 6.195652173913044e-06, "loss": 0.9250222444534302, "step": 115 }, { "epoch": 0.03152173913043478, "grad_norm": 0.631471704068111, "learning_rate": 6.25e-06, "loss": 0.6487789154052734, "step": 116 }, { "epoch": 0.03179347826086956, "grad_norm": 0.5739117343066549, "learning_rate": 6.304347826086958e-06, "loss": 0.5238269567489624, "step": 117 }, { "epoch": 0.03206521739130435, "grad_norm": 0.6731334221438724, "learning_rate": 6.358695652173914e-06, "loss": 0.5660438537597656, "step": 118 }, { "epoch": 0.03233695652173913, "grad_norm": 0.7577996628142701, "learning_rate": 6.41304347826087e-06, "loss": 0.7833073139190674, "step": 119 }, { "epoch": 0.03260869565217391, "grad_norm": 0.6964751497267007, "learning_rate": 6.467391304347826e-06, "loss": 0.6901708841323853, "step": 120 }, { "epoch": 0.03288043478260869, "grad_norm": 0.7899393868356257, "learning_rate": 6.521739130434783e-06, "loss": 0.7019118070602417, "step": 121 }, { "epoch": 0.03315217391304348, "grad_norm": 0.7940821643013656, "learning_rate": 6.57608695652174e-06, "loss": 0.7362785935401917, "step": 122 }, { "epoch": 0.03342391304347826, "grad_norm": 0.7282186324981302, "learning_rate": 6.630434782608696e-06, "loss": 0.7169548869132996, "step": 123 }, { "epoch": 0.03369565217391304, "grad_norm": 0.8758969734579629, "learning_rate": 6.6847826086956524e-06, "loss": 0.831395149230957, "step": 124 }, { "epoch": 0.033967391304347824, "grad_norm": 0.7553122569553362, "learning_rate": 6.739130434782609e-06, "loss": 0.6828559637069702, "step": 125 }, { "epoch": 0.034239130434782605, "grad_norm": 0.682565862995381, "learning_rate": 6.793478260869566e-06, "loss": 0.6431168913841248, "step": 126 }, { "epoch": 0.03451086956521739, "grad_norm": 0.786115123423184, "learning_rate": 6.847826086956523e-06, "loss": 0.74827641248703, "step": 127 }, { "epoch": 0.034782608695652174, "grad_norm": 0.7406776842566438, "learning_rate": 6.9021739130434785e-06, "loss": 0.6205931901931763, "step": 128 }, { "epoch": 0.035054347826086955, "grad_norm": 0.8247964274500533, "learning_rate": 6.956521739130435e-06, "loss": 0.7841173410415649, "step": 129 }, { "epoch": 0.035326086956521736, "grad_norm": 0.7940062210057446, "learning_rate": 7.0108695652173915e-06, "loss": 0.7555547952651978, "step": 130 }, { "epoch": 0.035597826086956524, "grad_norm": 0.5750777996416995, "learning_rate": 7.065217391304349e-06, "loss": 0.48225390911102295, "step": 131 }, { "epoch": 0.035869565217391305, "grad_norm": 0.8717867477357565, "learning_rate": 7.119565217391305e-06, "loss": 0.8339749574661255, "step": 132 }, { "epoch": 0.036141304347826086, "grad_norm": 0.7047738675059401, "learning_rate": 7.173913043478261e-06, "loss": 0.6630027294158936, "step": 133 }, { "epoch": 0.03641304347826087, "grad_norm": 0.8243559106051679, "learning_rate": 7.228260869565218e-06, "loss": 0.8360967636108398, "step": 134 }, { "epoch": 0.036684782608695655, "grad_norm": 0.7031752291357636, "learning_rate": 7.282608695652175e-06, "loss": 0.6298309564590454, "step": 135 }, { "epoch": 0.03695652173913044, "grad_norm": 0.7693452732122565, "learning_rate": 7.3369565217391315e-06, "loss": 0.7313283681869507, "step": 136 }, { "epoch": 0.03722826086956522, "grad_norm": 0.7235688699173817, "learning_rate": 7.391304347826087e-06, "loss": 0.6697292327880859, "step": 137 }, { "epoch": 0.0375, "grad_norm": 0.783254775975525, "learning_rate": 7.445652173913044e-06, "loss": 0.6412578821182251, "step": 138 }, { "epoch": 0.03777173913043478, "grad_norm": 0.749940111911722, "learning_rate": 7.500000000000001e-06, "loss": 0.6357153654098511, "step": 139 }, { "epoch": 0.03804347826086957, "grad_norm": 0.7118321045391861, "learning_rate": 7.5543478260869576e-06, "loss": 0.5896950960159302, "step": 140 }, { "epoch": 0.03831521739130435, "grad_norm": 0.6884237216254593, "learning_rate": 7.608695652173914e-06, "loss": 0.5467962622642517, "step": 141 }, { "epoch": 0.03858695652173913, "grad_norm": 0.8109061338480277, "learning_rate": 7.66304347826087e-06, "loss": 0.759538471698761, "step": 142 }, { "epoch": 0.03885869565217391, "grad_norm": 0.7572757240105354, "learning_rate": 7.717391304347827e-06, "loss": 0.6624370813369751, "step": 143 }, { "epoch": 0.0391304347826087, "grad_norm": 0.9306550065149879, "learning_rate": 7.771739130434784e-06, "loss": 0.6727280616760254, "step": 144 }, { "epoch": 0.03940217391304348, "grad_norm": 0.8439679833357412, "learning_rate": 7.82608695652174e-06, "loss": 0.7030360102653503, "step": 145 }, { "epoch": 0.03967391304347826, "grad_norm": 0.8033543927598464, "learning_rate": 7.880434782608695e-06, "loss": 0.6910247802734375, "step": 146 }, { "epoch": 0.03994565217391304, "grad_norm": 0.7223578127308002, "learning_rate": 7.934782608695653e-06, "loss": 0.5766992568969727, "step": 147 }, { "epoch": 0.04021739130434782, "grad_norm": 0.8814345463494592, "learning_rate": 7.98913043478261e-06, "loss": 0.8166050314903259, "step": 148 }, { "epoch": 0.04048913043478261, "grad_norm": 0.744399797984772, "learning_rate": 8.043478260869566e-06, "loss": 0.6075907349586487, "step": 149 }, { "epoch": 0.04076086956521739, "grad_norm": 0.6882556418539939, "learning_rate": 8.097826086956523e-06, "loss": 0.5868034958839417, "step": 150 }, { "epoch": 0.04103260869565217, "grad_norm": 0.9391579996938547, "learning_rate": 8.15217391304348e-06, "loss": 0.7585864067077637, "step": 151 }, { "epoch": 0.041304347826086954, "grad_norm": 0.8271716083777652, "learning_rate": 8.206521739130436e-06, "loss": 0.661005973815918, "step": 152 }, { "epoch": 0.04157608695652174, "grad_norm": 0.8445943016264836, "learning_rate": 8.260869565217392e-06, "loss": 0.6994067430496216, "step": 153 }, { "epoch": 0.04184782608695652, "grad_norm": 0.7815349959403514, "learning_rate": 8.315217391304349e-06, "loss": 0.5935035347938538, "step": 154 }, { "epoch": 0.042119565217391304, "grad_norm": 0.7939698375063776, "learning_rate": 8.369565217391305e-06, "loss": 0.5987645387649536, "step": 155 }, { "epoch": 0.042391304347826085, "grad_norm": 0.9008778830729882, "learning_rate": 8.423913043478262e-06, "loss": 0.6434639692306519, "step": 156 }, { "epoch": 0.042663043478260866, "grad_norm": 0.7475633525016946, "learning_rate": 8.478260869565218e-06, "loss": 0.5423079133033752, "step": 157 }, { "epoch": 0.042934782608695654, "grad_norm": 0.7036276768722384, "learning_rate": 8.532608695652175e-06, "loss": 0.5685904026031494, "step": 158 }, { "epoch": 0.043206521739130435, "grad_norm": 0.8517269625031035, "learning_rate": 8.586956521739131e-06, "loss": 0.7237787246704102, "step": 159 }, { "epoch": 0.043478260869565216, "grad_norm": 0.8059010961717926, "learning_rate": 8.641304347826088e-06, "loss": 0.5714101791381836, "step": 160 }, { "epoch": 0.04375, "grad_norm": 0.8738629887854777, "learning_rate": 8.695652173913044e-06, "loss": 0.6294788122177124, "step": 161 }, { "epoch": 0.044021739130434785, "grad_norm": 0.8971201165384063, "learning_rate": 8.750000000000001e-06, "loss": 0.7725092172622681, "step": 162 }, { "epoch": 0.044293478260869566, "grad_norm": 0.7463368856864878, "learning_rate": 8.804347826086957e-06, "loss": 0.5509802103042603, "step": 163 }, { "epoch": 0.04456521739130435, "grad_norm": 0.869365397002378, "learning_rate": 8.858695652173914e-06, "loss": 0.6492209434509277, "step": 164 }, { "epoch": 0.04483695652173913, "grad_norm": 0.7721244203534584, "learning_rate": 8.91304347826087e-06, "loss": 0.5088313817977905, "step": 165 }, { "epoch": 0.045108695652173916, "grad_norm": 0.8298785886926119, "learning_rate": 8.967391304347827e-06, "loss": 0.6342763900756836, "step": 166 }, { "epoch": 0.0453804347826087, "grad_norm": 0.8557251493338759, "learning_rate": 9.021739130434784e-06, "loss": 0.701035737991333, "step": 167 }, { "epoch": 0.04565217391304348, "grad_norm": 0.8886644311318955, "learning_rate": 9.07608695652174e-06, "loss": 0.7229410409927368, "step": 168 }, { "epoch": 0.04592391304347826, "grad_norm": 0.8849029780846839, "learning_rate": 9.130434782608697e-06, "loss": 0.7297962307929993, "step": 169 }, { "epoch": 0.04619565217391304, "grad_norm": 0.9355716621839283, "learning_rate": 9.184782608695653e-06, "loss": 0.7163747549057007, "step": 170 }, { "epoch": 0.04646739130434783, "grad_norm": 0.852562696949911, "learning_rate": 9.23913043478261e-06, "loss": 0.6799457669258118, "step": 171 }, { "epoch": 0.04673913043478261, "grad_norm": 0.7966310168698807, "learning_rate": 9.293478260869566e-06, "loss": 0.6100296974182129, "step": 172 }, { "epoch": 0.04701086956521739, "grad_norm": 0.7971630331637756, "learning_rate": 9.347826086956523e-06, "loss": 0.5845295190811157, "step": 173 }, { "epoch": 0.04728260869565217, "grad_norm": 0.8289321236264132, "learning_rate": 9.402173913043479e-06, "loss": 0.5592339038848877, "step": 174 }, { "epoch": 0.04755434782608696, "grad_norm": 0.8454949611488424, "learning_rate": 9.456521739130436e-06, "loss": 0.6968813538551331, "step": 175 }, { "epoch": 0.04782608695652174, "grad_norm": 0.9624078249162221, "learning_rate": 9.510869565217392e-06, "loss": 0.7213064432144165, "step": 176 }, { "epoch": 0.04809782608695652, "grad_norm": 0.8745589052223126, "learning_rate": 9.565217391304349e-06, "loss": 0.6299700140953064, "step": 177 }, { "epoch": 0.0483695652173913, "grad_norm": 0.8638434750437401, "learning_rate": 9.619565217391305e-06, "loss": 0.6657505035400391, "step": 178 }, { "epoch": 0.048641304347826084, "grad_norm": 1.0094990397824672, "learning_rate": 9.673913043478262e-06, "loss": 0.7701022624969482, "step": 179 }, { "epoch": 0.04891304347826087, "grad_norm": 0.8808848002009791, "learning_rate": 9.728260869565218e-06, "loss": 0.6049208045005798, "step": 180 }, { "epoch": 0.04918478260869565, "grad_norm": 0.7816016546069977, "learning_rate": 9.782608695652175e-06, "loss": 0.5745230913162231, "step": 181 }, { "epoch": 0.049456521739130434, "grad_norm": 0.8932465776520285, "learning_rate": 9.836956521739131e-06, "loss": 0.6277545690536499, "step": 182 }, { "epoch": 0.049728260869565215, "grad_norm": 0.8906506232346835, "learning_rate": 9.891304347826088e-06, "loss": 0.7606451511383057, "step": 183 }, { "epoch": 0.05, "grad_norm": 0.8937041144724513, "learning_rate": 9.945652173913044e-06, "loss": 0.8257922530174255, "step": 184 }, { "epoch": 0.050271739130434784, "grad_norm": 0.9358887503790122, "learning_rate": 1e-05, "loss": 0.745275616645813, "step": 185 }, { "epoch": 0.050543478260869565, "grad_norm": 0.8518611290978289, "learning_rate": 1.0054347826086956e-05, "loss": 0.627392053604126, "step": 186 }, { "epoch": 0.050815217391304346, "grad_norm": 0.8473272410508216, "learning_rate": 1.0108695652173914e-05, "loss": 0.6922687292098999, "step": 187 }, { "epoch": 0.051086956521739134, "grad_norm": 0.9507886741868988, "learning_rate": 1.016304347826087e-05, "loss": 0.6783848404884338, "step": 188 }, { "epoch": 0.051358695652173915, "grad_norm": 0.9611421558244001, "learning_rate": 1.0217391304347829e-05, "loss": 0.7879489660263062, "step": 189 }, { "epoch": 0.051630434782608696, "grad_norm": 0.8804915357501675, "learning_rate": 1.0271739130434783e-05, "loss": 0.6987303495407104, "step": 190 }, { "epoch": 0.05190217391304348, "grad_norm": 1.0076135756012832, "learning_rate": 1.032608695652174e-05, "loss": 0.7235118746757507, "step": 191 }, { "epoch": 0.05217391304347826, "grad_norm": 0.8578667939449207, "learning_rate": 1.0380434782608696e-05, "loss": 0.6262919306755066, "step": 192 }, { "epoch": 0.052445652173913046, "grad_norm": 0.9486103474958836, "learning_rate": 1.0434782608695653e-05, "loss": 0.7383699417114258, "step": 193 }, { "epoch": 0.05271739130434783, "grad_norm": 0.7078740619862254, "learning_rate": 1.0489130434782611e-05, "loss": 0.4646698236465454, "step": 194 }, { "epoch": 0.05298913043478261, "grad_norm": 0.964840575807317, "learning_rate": 1.0543478260869566e-05, "loss": 0.745015025138855, "step": 195 }, { "epoch": 0.05326086956521739, "grad_norm": 0.9592234588140738, "learning_rate": 1.0597826086956523e-05, "loss": 0.7423193454742432, "step": 196 }, { "epoch": 0.05353260869565218, "grad_norm": 0.8904502372723639, "learning_rate": 1.0652173913043479e-05, "loss": 0.6446977853775024, "step": 197 }, { "epoch": 0.05380434782608696, "grad_norm": 0.912125829846862, "learning_rate": 1.0706521739130436e-05, "loss": 0.6539313793182373, "step": 198 }, { "epoch": 0.05407608695652174, "grad_norm": 0.8798442982242167, "learning_rate": 1.076086956521739e-05, "loss": 0.615871250629425, "step": 199 }, { "epoch": 0.05434782608695652, "grad_norm": 0.8277161254691946, "learning_rate": 1.0815217391304349e-05, "loss": 0.5405906438827515, "step": 200 }, { "epoch": 0.0546195652173913, "grad_norm": 0.9823954946491552, "learning_rate": 1.0869565217391305e-05, "loss": 0.6871336698532104, "step": 201 }, { "epoch": 0.05489130434782609, "grad_norm": 0.884032274805086, "learning_rate": 1.0923913043478263e-05, "loss": 0.6464623212814331, "step": 202 }, { "epoch": 0.05516304347826087, "grad_norm": 0.9253923494065682, "learning_rate": 1.0978260869565218e-05, "loss": 0.6650947332382202, "step": 203 }, { "epoch": 0.05543478260869565, "grad_norm": 1.002342648794571, "learning_rate": 1.1032608695652175e-05, "loss": 0.7445963621139526, "step": 204 }, { "epoch": 0.05570652173913043, "grad_norm": 0.9127832053292477, "learning_rate": 1.1086956521739131e-05, "loss": 0.6489561796188354, "step": 205 }, { "epoch": 0.05597826086956522, "grad_norm": 0.9415869290723259, "learning_rate": 1.1141304347826088e-05, "loss": 0.6751245260238647, "step": 206 }, { "epoch": 0.05625, "grad_norm": 0.8785705661717919, "learning_rate": 1.1195652173913046e-05, "loss": 0.6143460273742676, "step": 207 }, { "epoch": 0.05652173913043478, "grad_norm": 0.8867533232174281, "learning_rate": 1.125e-05, "loss": 0.7036882638931274, "step": 208 }, { "epoch": 0.05679347826086956, "grad_norm": 0.7893470386128982, "learning_rate": 1.1304347826086957e-05, "loss": 0.5585653185844421, "step": 209 }, { "epoch": 0.057065217391304345, "grad_norm": 1.0892654017088261, "learning_rate": 1.1358695652173914e-05, "loss": 0.7987618446350098, "step": 210 }, { "epoch": 0.05733695652173913, "grad_norm": 0.8407732615987474, "learning_rate": 1.141304347826087e-05, "loss": 0.5649052262306213, "step": 211 }, { "epoch": 0.057608695652173914, "grad_norm": 0.9028898786164872, "learning_rate": 1.1467391304347828e-05, "loss": 0.6254444718360901, "step": 212 }, { "epoch": 0.057880434782608695, "grad_norm": 0.8680482535700544, "learning_rate": 1.1521739130434783e-05, "loss": 0.5935732126235962, "step": 213 }, { "epoch": 0.058152173913043476, "grad_norm": 0.8660336416840433, "learning_rate": 1.157608695652174e-05, "loss": 0.44629573822021484, "step": 214 }, { "epoch": 0.058423913043478264, "grad_norm": 0.8057199413117724, "learning_rate": 1.1630434782608698e-05, "loss": 0.48287034034729004, "step": 215 }, { "epoch": 0.058695652173913045, "grad_norm": 0.9128446791370133, "learning_rate": 1.1684782608695653e-05, "loss": 0.6501718759536743, "step": 216 }, { "epoch": 0.058967391304347826, "grad_norm": 1.1127737974818674, "learning_rate": 1.1739130434782611e-05, "loss": 0.8350937366485596, "step": 217 }, { "epoch": 0.05923913043478261, "grad_norm": 0.9131440289480063, "learning_rate": 1.1793478260869566e-05, "loss": 0.5681010484695435, "step": 218 }, { "epoch": 0.059510869565217395, "grad_norm": 0.9375354665969485, "learning_rate": 1.1847826086956522e-05, "loss": 0.7201821804046631, "step": 219 }, { "epoch": 0.059782608695652176, "grad_norm": 0.9488767013298721, "learning_rate": 1.190217391304348e-05, "loss": 0.6239070892333984, "step": 220 }, { "epoch": 0.06005434782608696, "grad_norm": 0.9511317002187224, "learning_rate": 1.1956521739130435e-05, "loss": 0.6407225131988525, "step": 221 }, { "epoch": 0.06032608695652174, "grad_norm": 0.9115746360060782, "learning_rate": 1.2010869565217392e-05, "loss": 0.6474592685699463, "step": 222 }, { "epoch": 0.06059782608695652, "grad_norm": 0.8358731790118221, "learning_rate": 1.2065217391304348e-05, "loss": 0.5597754716873169, "step": 223 }, { "epoch": 0.06086956521739131, "grad_norm": 0.9168469167623263, "learning_rate": 1.2119565217391305e-05, "loss": 0.6581052541732788, "step": 224 }, { "epoch": 0.06114130434782609, "grad_norm": 0.8866880254476773, "learning_rate": 1.2173913043478263e-05, "loss": 0.631277322769165, "step": 225 }, { "epoch": 0.06141304347826087, "grad_norm": 0.826523835649116, "learning_rate": 1.2228260869565218e-05, "loss": 0.5197942852973938, "step": 226 }, { "epoch": 0.06168478260869565, "grad_norm": 1.0999656072436093, "learning_rate": 1.2282608695652175e-05, "loss": 0.7226812839508057, "step": 227 }, { "epoch": 0.06195652173913044, "grad_norm": 1.0011681279033322, "learning_rate": 1.2336956521739131e-05, "loss": 0.6437143683433533, "step": 228 }, { "epoch": 0.06222826086956522, "grad_norm": 0.905764304994649, "learning_rate": 1.2391304347826088e-05, "loss": 0.6829544901847839, "step": 229 }, { "epoch": 0.0625, "grad_norm": 0.8114846253593001, "learning_rate": 1.2445652173913046e-05, "loss": 0.5270165205001831, "step": 230 }, { "epoch": 0.06277173913043478, "grad_norm": 0.9202112490075265, "learning_rate": 1.25e-05, "loss": 0.5934036374092102, "step": 231 }, { "epoch": 0.06304347826086956, "grad_norm": 0.7984186353584632, "learning_rate": 1.2554347826086957e-05, "loss": 0.433380663394928, "step": 232 }, { "epoch": 0.06331521739130434, "grad_norm": 0.9100022092179991, "learning_rate": 1.2608695652173915e-05, "loss": 0.6683671474456787, "step": 233 }, { "epoch": 0.06358695652173912, "grad_norm": 0.9116471536809057, "learning_rate": 1.266304347826087e-05, "loss": 0.6200209856033325, "step": 234 }, { "epoch": 0.06385869565217392, "grad_norm": 0.990348579899064, "learning_rate": 1.2717391304347828e-05, "loss": 0.6782975196838379, "step": 235 }, { "epoch": 0.0641304347826087, "grad_norm": 1.0588178352767628, "learning_rate": 1.2771739130434783e-05, "loss": 0.7087604999542236, "step": 236 }, { "epoch": 0.06440217391304348, "grad_norm": 0.9635903094424494, "learning_rate": 1.282608695652174e-05, "loss": 0.6983143091201782, "step": 237 }, { "epoch": 0.06467391304347826, "grad_norm": 0.8703292385373886, "learning_rate": 1.2880434782608698e-05, "loss": 0.5704269409179688, "step": 238 }, { "epoch": 0.06494565217391304, "grad_norm": 0.9756156263097423, "learning_rate": 1.2934782608695653e-05, "loss": 0.608792245388031, "step": 239 }, { "epoch": 0.06521739130434782, "grad_norm": 0.8897437629310505, "learning_rate": 1.2989130434782611e-05, "loss": 0.5379980802536011, "step": 240 }, { "epoch": 0.0654891304347826, "grad_norm": 0.8811149712443395, "learning_rate": 1.3043478260869566e-05, "loss": 0.5735219717025757, "step": 241 }, { "epoch": 0.06576086956521739, "grad_norm": 0.9185614024166212, "learning_rate": 1.3097826086956522e-05, "loss": 0.6321060061454773, "step": 242 }, { "epoch": 0.06603260869565217, "grad_norm": 0.8345866399156637, "learning_rate": 1.315217391304348e-05, "loss": 0.5085781812667847, "step": 243 }, { "epoch": 0.06630434782608696, "grad_norm": 1.0289427206932709, "learning_rate": 1.3206521739130435e-05, "loss": 0.7323548793792725, "step": 244 }, { "epoch": 0.06657608695652174, "grad_norm": 1.06162605669598, "learning_rate": 1.3260869565217392e-05, "loss": 0.6112697124481201, "step": 245 }, { "epoch": 0.06684782608695652, "grad_norm": 0.9521079417370764, "learning_rate": 1.331521739130435e-05, "loss": 0.6640294790267944, "step": 246 }, { "epoch": 0.0671195652173913, "grad_norm": 0.9249903158568066, "learning_rate": 1.3369565217391305e-05, "loss": 0.5682545900344849, "step": 247 }, { "epoch": 0.06739130434782609, "grad_norm": 1.0685694272071138, "learning_rate": 1.3423913043478263e-05, "loss": 0.6383143663406372, "step": 248 }, { "epoch": 0.06766304347826087, "grad_norm": 1.021390764009021, "learning_rate": 1.3478260869565218e-05, "loss": 0.7238149642944336, "step": 249 }, { "epoch": 0.06793478260869565, "grad_norm": 0.8845023436300519, "learning_rate": 1.3532608695652174e-05, "loss": 0.6195739507675171, "step": 250 }, { "epoch": 0.06820652173913043, "grad_norm": 1.1595100205049398, "learning_rate": 1.3586956521739133e-05, "loss": 0.7919309139251709, "step": 251 }, { "epoch": 0.06847826086956521, "grad_norm": 1.1029631759898582, "learning_rate": 1.3641304347826087e-05, "loss": 0.6900105476379395, "step": 252 }, { "epoch": 0.06875, "grad_norm": 1.0234675937757267, "learning_rate": 1.3695652173913046e-05, "loss": 0.6890928745269775, "step": 253 }, { "epoch": 0.06902173913043479, "grad_norm": 0.9064954764934405, "learning_rate": 1.375e-05, "loss": 0.5614787936210632, "step": 254 }, { "epoch": 0.06929347826086957, "grad_norm": 0.9531476287628989, "learning_rate": 1.3804347826086957e-05, "loss": 0.5940463542938232, "step": 255 }, { "epoch": 0.06956521739130435, "grad_norm": 0.8228105671507785, "learning_rate": 1.3858695652173915e-05, "loss": 0.5390968918800354, "step": 256 }, { "epoch": 0.06983695652173913, "grad_norm": 0.8886026967372114, "learning_rate": 1.391304347826087e-05, "loss": 0.5491601824760437, "step": 257 }, { "epoch": 0.07010869565217391, "grad_norm": 0.9236830788138551, "learning_rate": 1.3967391304347828e-05, "loss": 0.6714186072349548, "step": 258 }, { "epoch": 0.07038043478260869, "grad_norm": 0.9759619005329401, "learning_rate": 1.4021739130434783e-05, "loss": 0.6437249183654785, "step": 259 }, { "epoch": 0.07065217391304347, "grad_norm": 0.877514128926328, "learning_rate": 1.407608695652174e-05, "loss": 0.594681978225708, "step": 260 }, { "epoch": 0.07092391304347827, "grad_norm": 0.9396237351238705, "learning_rate": 1.4130434782608698e-05, "loss": 0.6306597590446472, "step": 261 }, { "epoch": 0.07119565217391305, "grad_norm": 0.8891054308696187, "learning_rate": 1.4184782608695653e-05, "loss": 0.5651192665100098, "step": 262 }, { "epoch": 0.07146739130434783, "grad_norm": 1.026298751657587, "learning_rate": 1.423913043478261e-05, "loss": 0.711195707321167, "step": 263 }, { "epoch": 0.07173913043478261, "grad_norm": 0.9683631038660453, "learning_rate": 1.4293478260869567e-05, "loss": 0.6097120046615601, "step": 264 }, { "epoch": 0.07201086956521739, "grad_norm": 0.87370076341813, "learning_rate": 1.4347826086956522e-05, "loss": 0.57010418176651, "step": 265 }, { "epoch": 0.07228260869565217, "grad_norm": 1.2823566730730351, "learning_rate": 1.440217391304348e-05, "loss": 0.7457181215286255, "step": 266 }, { "epoch": 0.07255434782608695, "grad_norm": 0.9538772659625946, "learning_rate": 1.4456521739130435e-05, "loss": 0.581217885017395, "step": 267 }, { "epoch": 0.07282608695652174, "grad_norm": 1.1559756496802347, "learning_rate": 1.4510869565217392e-05, "loss": 0.7401679754257202, "step": 268 }, { "epoch": 0.07309782608695652, "grad_norm": 1.0063186572212808, "learning_rate": 1.456521739130435e-05, "loss": 0.6255360841751099, "step": 269 }, { "epoch": 0.07336956521739131, "grad_norm": 1.1178953239871998, "learning_rate": 1.4619565217391305e-05, "loss": 0.6797584891319275, "step": 270 }, { "epoch": 0.07364130434782609, "grad_norm": 1.001801876441011, "learning_rate": 1.4673913043478263e-05, "loss": 0.6681472063064575, "step": 271 }, { "epoch": 0.07391304347826087, "grad_norm": 0.8561621946004557, "learning_rate": 1.4728260869565218e-05, "loss": 0.5037481188774109, "step": 272 }, { "epoch": 0.07418478260869565, "grad_norm": 1.0445823165876458, "learning_rate": 1.4782608695652174e-05, "loss": 0.6379857063293457, "step": 273 }, { "epoch": 0.07445652173913044, "grad_norm": 1.1319709705012637, "learning_rate": 1.4836956521739133e-05, "loss": 0.7306862473487854, "step": 274 }, { "epoch": 0.07472826086956522, "grad_norm": 0.8174722242162635, "learning_rate": 1.4891304347826087e-05, "loss": 0.4806300699710846, "step": 275 }, { "epoch": 0.075, "grad_norm": 1.0329845361015357, "learning_rate": 1.4945652173913046e-05, "loss": 0.6540035605430603, "step": 276 }, { "epoch": 0.07527173913043478, "grad_norm": 1.098264447947003, "learning_rate": 1.5000000000000002e-05, "loss": 0.6824979186058044, "step": 277 }, { "epoch": 0.07554347826086956, "grad_norm": 1.0653370613436135, "learning_rate": 1.5054347826086957e-05, "loss": 0.6497901678085327, "step": 278 }, { "epoch": 0.07581521739130435, "grad_norm": 1.0881129253788013, "learning_rate": 1.5108695652173915e-05, "loss": 0.6676938533782959, "step": 279 }, { "epoch": 0.07608695652173914, "grad_norm": 1.1599246444729454, "learning_rate": 1.516304347826087e-05, "loss": 0.704906702041626, "step": 280 }, { "epoch": 0.07635869565217392, "grad_norm": 0.8569870542011234, "learning_rate": 1.5217391304347828e-05, "loss": 0.43365707993507385, "step": 281 }, { "epoch": 0.0766304347826087, "grad_norm": 0.875595115621392, "learning_rate": 1.5271739130434785e-05, "loss": 0.4870246946811676, "step": 282 }, { "epoch": 0.07690217391304348, "grad_norm": 1.0004698947943815, "learning_rate": 1.532608695652174e-05, "loss": 0.5790647864341736, "step": 283 }, { "epoch": 0.07717391304347826, "grad_norm": 1.049367662285521, "learning_rate": 1.5380434782608698e-05, "loss": 0.7169564366340637, "step": 284 }, { "epoch": 0.07744565217391304, "grad_norm": 1.0553851627195692, "learning_rate": 1.5434782608695654e-05, "loss": 0.6446009278297424, "step": 285 }, { "epoch": 0.07771739130434782, "grad_norm": 0.9184565152386879, "learning_rate": 1.548913043478261e-05, "loss": 0.5473958253860474, "step": 286 }, { "epoch": 0.0779891304347826, "grad_norm": 1.1571504802569774, "learning_rate": 1.5543478260869567e-05, "loss": 0.7963664531707764, "step": 287 }, { "epoch": 0.0782608695652174, "grad_norm": 1.0848488309640636, "learning_rate": 1.5597826086956524e-05, "loss": 0.7350301146507263, "step": 288 }, { "epoch": 0.07853260869565218, "grad_norm": 0.969385334475565, "learning_rate": 1.565217391304348e-05, "loss": 0.6297093629837036, "step": 289 }, { "epoch": 0.07880434782608696, "grad_norm": 0.8671002209286938, "learning_rate": 1.5706521739130437e-05, "loss": 0.5638586282730103, "step": 290 }, { "epoch": 0.07907608695652174, "grad_norm": 0.9817600535163333, "learning_rate": 1.576086956521739e-05, "loss": 0.7270001173019409, "step": 291 }, { "epoch": 0.07934782608695652, "grad_norm": 0.9324019224245822, "learning_rate": 1.581521739130435e-05, "loss": 0.641902506351471, "step": 292 }, { "epoch": 0.0796195652173913, "grad_norm": 0.7884897397276418, "learning_rate": 1.5869565217391306e-05, "loss": 0.5075803995132446, "step": 293 }, { "epoch": 0.07989130434782608, "grad_norm": 0.8330199486711082, "learning_rate": 1.5923913043478263e-05, "loss": 0.516287088394165, "step": 294 }, { "epoch": 0.08016304347826086, "grad_norm": 1.0239741318778806, "learning_rate": 1.597826086956522e-05, "loss": 0.6401122212409973, "step": 295 }, { "epoch": 0.08043478260869565, "grad_norm": 1.0267612855782886, "learning_rate": 1.6032608695652176e-05, "loss": 0.6498497724533081, "step": 296 }, { "epoch": 0.08070652173913044, "grad_norm": 0.9822181722241436, "learning_rate": 1.6086956521739132e-05, "loss": 0.6564515829086304, "step": 297 }, { "epoch": 0.08097826086956522, "grad_norm": 0.9143369889283508, "learning_rate": 1.614130434782609e-05, "loss": 0.5790969133377075, "step": 298 }, { "epoch": 0.08125, "grad_norm": 1.0558993529424683, "learning_rate": 1.6195652173913045e-05, "loss": 0.6840313076972961, "step": 299 }, { "epoch": 0.08152173913043478, "grad_norm": 1.0096468792036837, "learning_rate": 1.6250000000000002e-05, "loss": 0.6094136238098145, "step": 300 }, { "epoch": 0.08179347826086956, "grad_norm": 1.0686199206591478, "learning_rate": 1.630434782608696e-05, "loss": 0.6379678249359131, "step": 301 }, { "epoch": 0.08206521739130435, "grad_norm": 1.0078326875150168, "learning_rate": 1.6358695652173915e-05, "loss": 0.6886587142944336, "step": 302 }, { "epoch": 0.08233695652173913, "grad_norm": 0.8809096342904077, "learning_rate": 1.641304347826087e-05, "loss": 0.4855576753616333, "step": 303 }, { "epoch": 0.08260869565217391, "grad_norm": 0.8785252222179123, "learning_rate": 1.6467391304347828e-05, "loss": 0.5805788040161133, "step": 304 }, { "epoch": 0.08288043478260869, "grad_norm": 1.0614688625535245, "learning_rate": 1.6521739130434785e-05, "loss": 0.6366295218467712, "step": 305 }, { "epoch": 0.08315217391304348, "grad_norm": 1.1359311613622574, "learning_rate": 1.657608695652174e-05, "loss": 0.670891523361206, "step": 306 }, { "epoch": 0.08342391304347826, "grad_norm": 0.883138680356903, "learning_rate": 1.6630434782608698e-05, "loss": 0.4847770929336548, "step": 307 }, { "epoch": 0.08369565217391305, "grad_norm": 1.1192348671724102, "learning_rate": 1.6684782608695654e-05, "loss": 0.7179162502288818, "step": 308 }, { "epoch": 0.08396739130434783, "grad_norm": 1.02690333733837, "learning_rate": 1.673913043478261e-05, "loss": 0.7106865644454956, "step": 309 }, { "epoch": 0.08423913043478261, "grad_norm": 0.8708541057556253, "learning_rate": 1.6793478260869567e-05, "loss": 0.5898005962371826, "step": 310 }, { "epoch": 0.08451086956521739, "grad_norm": 0.8505057155471641, "learning_rate": 1.6847826086956524e-05, "loss": 0.5489276051521301, "step": 311 }, { "epoch": 0.08478260869565217, "grad_norm": 0.8901531991977464, "learning_rate": 1.690217391304348e-05, "loss": 0.5925596952438354, "step": 312 }, { "epoch": 0.08505434782608695, "grad_norm": 1.0282059922681797, "learning_rate": 1.6956521739130437e-05, "loss": 0.6035385131835938, "step": 313 }, { "epoch": 0.08532608695652173, "grad_norm": 1.0186355923970383, "learning_rate": 1.7010869565217393e-05, "loss": 0.5883886814117432, "step": 314 }, { "epoch": 0.08559782608695653, "grad_norm": 1.0872665683493812, "learning_rate": 1.706521739130435e-05, "loss": 0.6622264385223389, "step": 315 }, { "epoch": 0.08586956521739131, "grad_norm": 0.9687603420413298, "learning_rate": 1.7119565217391306e-05, "loss": 0.5790713429450989, "step": 316 }, { "epoch": 0.08614130434782609, "grad_norm": 0.9610707313564555, "learning_rate": 1.7173913043478263e-05, "loss": 0.62604159116745, "step": 317 }, { "epoch": 0.08641304347826087, "grad_norm": 0.9421908047154536, "learning_rate": 1.722826086956522e-05, "loss": 0.585541844367981, "step": 318 }, { "epoch": 0.08668478260869565, "grad_norm": 1.1041425213417517, "learning_rate": 1.7282608695652176e-05, "loss": 0.6414051651954651, "step": 319 }, { "epoch": 0.08695652173913043, "grad_norm": 1.1376191574270476, "learning_rate": 1.7336956521739132e-05, "loss": 0.7727217078208923, "step": 320 }, { "epoch": 0.08722826086956521, "grad_norm": 0.9633736322486562, "learning_rate": 1.739130434782609e-05, "loss": 0.5960030555725098, "step": 321 }, { "epoch": 0.0875, "grad_norm": 0.953140163260782, "learning_rate": 1.7445652173913045e-05, "loss": 0.5241268873214722, "step": 322 }, { "epoch": 0.08777173913043479, "grad_norm": 1.02658969213099, "learning_rate": 1.7500000000000002e-05, "loss": 0.6125473976135254, "step": 323 }, { "epoch": 0.08804347826086957, "grad_norm": 1.0726112931421676, "learning_rate": 1.755434782608696e-05, "loss": 0.6473292708396912, "step": 324 }, { "epoch": 0.08831521739130435, "grad_norm": 1.0899905208189693, "learning_rate": 1.7608695652173915e-05, "loss": 0.6212141513824463, "step": 325 }, { "epoch": 0.08858695652173913, "grad_norm": 1.0117803461981314, "learning_rate": 1.766304347826087e-05, "loss": 0.5945913791656494, "step": 326 }, { "epoch": 0.08885869565217391, "grad_norm": 0.9417929150525578, "learning_rate": 1.7717391304347828e-05, "loss": 0.5559732913970947, "step": 327 }, { "epoch": 0.0891304347826087, "grad_norm": 1.0220701330123787, "learning_rate": 1.7771739130434784e-05, "loss": 0.6418216824531555, "step": 328 }, { "epoch": 0.08940217391304348, "grad_norm": 0.9953294612619028, "learning_rate": 1.782608695652174e-05, "loss": 0.569625973701477, "step": 329 }, { "epoch": 0.08967391304347826, "grad_norm": 0.8486718387211641, "learning_rate": 1.7880434782608697e-05, "loss": 0.4729055166244507, "step": 330 }, { "epoch": 0.08994565217391304, "grad_norm": 1.0167519062897723, "learning_rate": 1.7934782608695654e-05, "loss": 0.6514877080917358, "step": 331 }, { "epoch": 0.09021739130434783, "grad_norm": 0.9574408017029441, "learning_rate": 1.798913043478261e-05, "loss": 0.5642672181129456, "step": 332 }, { "epoch": 0.09048913043478261, "grad_norm": 0.9569861938479455, "learning_rate": 1.8043478260869567e-05, "loss": 0.5872882604598999, "step": 333 }, { "epoch": 0.0907608695652174, "grad_norm": 0.9176149753619803, "learning_rate": 1.8097826086956524e-05, "loss": 0.6397286653518677, "step": 334 }, { "epoch": 0.09103260869565218, "grad_norm": 1.082981032279794, "learning_rate": 1.815217391304348e-05, "loss": 0.658473014831543, "step": 335 }, { "epoch": 0.09130434782608696, "grad_norm": 0.9318167140063919, "learning_rate": 1.8206521739130437e-05, "loss": 0.5820192694664001, "step": 336 }, { "epoch": 0.09157608695652174, "grad_norm": 1.149958065664226, "learning_rate": 1.8260869565217393e-05, "loss": 0.7064948081970215, "step": 337 }, { "epoch": 0.09184782608695652, "grad_norm": 1.0353767870867956, "learning_rate": 1.831521739130435e-05, "loss": 0.6510999202728271, "step": 338 }, { "epoch": 0.0921195652173913, "grad_norm": 1.013974404642072, "learning_rate": 1.8369565217391306e-05, "loss": 0.6430458426475525, "step": 339 }, { "epoch": 0.09239130434782608, "grad_norm": 0.9135999440552924, "learning_rate": 1.8423913043478263e-05, "loss": 0.5280141830444336, "step": 340 }, { "epoch": 0.09266304347826088, "grad_norm": 0.9319740310713687, "learning_rate": 1.847826086956522e-05, "loss": 0.48775386810302734, "step": 341 }, { "epoch": 0.09293478260869566, "grad_norm": 0.9523297630970142, "learning_rate": 1.8532608695652176e-05, "loss": 0.558611273765564, "step": 342 }, { "epoch": 0.09320652173913044, "grad_norm": 0.8825556924164031, "learning_rate": 1.8586956521739132e-05, "loss": 0.5010311603546143, "step": 343 }, { "epoch": 0.09347826086956522, "grad_norm": 1.1011977760542924, "learning_rate": 1.864130434782609e-05, "loss": 0.6518286466598511, "step": 344 }, { "epoch": 0.09375, "grad_norm": 1.028111610697607, "learning_rate": 1.8695652173913045e-05, "loss": 0.6371402740478516, "step": 345 }, { "epoch": 0.09402173913043478, "grad_norm": 1.0149954016437768, "learning_rate": 1.8750000000000002e-05, "loss": 0.5816962122917175, "step": 346 }, { "epoch": 0.09429347826086956, "grad_norm": 1.0730507973503158, "learning_rate": 1.8804347826086958e-05, "loss": 0.66534423828125, "step": 347 }, { "epoch": 0.09456521739130434, "grad_norm": 1.048174044750398, "learning_rate": 1.8858695652173915e-05, "loss": 0.6325708627700806, "step": 348 }, { "epoch": 0.09483695652173912, "grad_norm": 1.1872949678752254, "learning_rate": 1.891304347826087e-05, "loss": 0.6797744035720825, "step": 349 }, { "epoch": 0.09510869565217392, "grad_norm": 1.031404783021431, "learning_rate": 1.8967391304347828e-05, "loss": 0.6481859087944031, "step": 350 }, { "epoch": 0.0953804347826087, "grad_norm": 0.8727130305054152, "learning_rate": 1.9021739130434784e-05, "loss": 0.5656973123550415, "step": 351 }, { "epoch": 0.09565217391304348, "grad_norm": 1.0513854740809307, "learning_rate": 1.907608695652174e-05, "loss": 0.7596349716186523, "step": 352 }, { "epoch": 0.09592391304347826, "grad_norm": 1.0697350320993815, "learning_rate": 1.9130434782608697e-05, "loss": 0.6424898505210876, "step": 353 }, { "epoch": 0.09619565217391304, "grad_norm": 0.9392848530397204, "learning_rate": 1.9184782608695654e-05, "loss": 0.6196227073669434, "step": 354 }, { "epoch": 0.09646739130434782, "grad_norm": 0.9852424758533861, "learning_rate": 1.923913043478261e-05, "loss": 0.6113418936729431, "step": 355 }, { "epoch": 0.0967391304347826, "grad_norm": 0.9846893702571684, "learning_rate": 1.9293478260869567e-05, "loss": 0.6636749505996704, "step": 356 }, { "epoch": 0.09701086956521739, "grad_norm": 0.9798072120553816, "learning_rate": 1.9347826086956523e-05, "loss": 0.5736275911331177, "step": 357 }, { "epoch": 0.09728260869565217, "grad_norm": 0.8148178067596199, "learning_rate": 1.940217391304348e-05, "loss": 0.4942713975906372, "step": 358 }, { "epoch": 0.09755434782608696, "grad_norm": 0.7935678601153499, "learning_rate": 1.9456521739130436e-05, "loss": 0.519306480884552, "step": 359 }, { "epoch": 0.09782608695652174, "grad_norm": 0.9964099128895348, "learning_rate": 1.9510869565217393e-05, "loss": 0.628757894039154, "step": 360 }, { "epoch": 0.09809782608695652, "grad_norm": 1.0075395678967867, "learning_rate": 1.956521739130435e-05, "loss": 0.6299825310707092, "step": 361 }, { "epoch": 0.0983695652173913, "grad_norm": 0.9917407070986901, "learning_rate": 1.9619565217391306e-05, "loss": 0.6439937353134155, "step": 362 }, { "epoch": 0.09864130434782609, "grad_norm": 0.9461710459707291, "learning_rate": 1.9673913043478263e-05, "loss": 0.5388772487640381, "step": 363 }, { "epoch": 0.09891304347826087, "grad_norm": 0.8559837876948048, "learning_rate": 1.972826086956522e-05, "loss": 0.49039459228515625, "step": 364 }, { "epoch": 0.09918478260869565, "grad_norm": 1.0799249018963653, "learning_rate": 1.9782608695652176e-05, "loss": 0.6620017290115356, "step": 365 }, { "epoch": 0.09945652173913043, "grad_norm": 1.0228900943711372, "learning_rate": 1.9836956521739132e-05, "loss": 0.6728794574737549, "step": 366 }, { "epoch": 0.09972826086956521, "grad_norm": 1.059560497532917, "learning_rate": 1.989130434782609e-05, "loss": 0.6092290878295898, "step": 367 }, { "epoch": 0.1, "grad_norm": 0.8784710177618408, "learning_rate": 1.9945652173913045e-05, "loss": 0.4845273494720459, "step": 368 }, { "epoch": 0.10027173913043479, "grad_norm": 0.9318341898308732, "learning_rate": 2e-05, "loss": 0.5743525624275208, "step": 369 }, { "epoch": 0.10054347826086957, "grad_norm": 1.1159858355635013, "learning_rate": 1.9999998990591628e-05, "loss": 0.6279982328414917, "step": 370 }, { "epoch": 0.10081521739130435, "grad_norm": 1.0302531131700405, "learning_rate": 1.9999995962366713e-05, "loss": 0.6175355911254883, "step": 371 }, { "epoch": 0.10108695652173913, "grad_norm": 1.0599505863108694, "learning_rate": 1.999999091532586e-05, "loss": 0.6352770328521729, "step": 372 }, { "epoch": 0.10135869565217391, "grad_norm": 0.9660814306579413, "learning_rate": 1.9999983849470097e-05, "loss": 0.6104558706283569, "step": 373 }, { "epoch": 0.10163043478260869, "grad_norm": 1.0948201282845538, "learning_rate": 1.9999974764800848e-05, "loss": 0.7003172636032104, "step": 374 }, { "epoch": 0.10190217391304347, "grad_norm": 1.017712299616435, "learning_rate": 1.9999963661319944e-05, "loss": 0.5411758422851562, "step": 375 }, { "epoch": 0.10217391304347827, "grad_norm": 0.9766357828254563, "learning_rate": 1.9999950539029634e-05, "loss": 0.6104270815849304, "step": 376 }, { "epoch": 0.10244565217391305, "grad_norm": 1.1214875716970403, "learning_rate": 1.9999935397932555e-05, "loss": 0.7115544676780701, "step": 377 }, { "epoch": 0.10271739130434783, "grad_norm": 0.8245562276242714, "learning_rate": 1.9999918238031773e-05, "loss": 0.4366043508052826, "step": 378 }, { "epoch": 0.10298913043478261, "grad_norm": 0.9022479679546097, "learning_rate": 1.9999899059330753e-05, "loss": 0.530981719493866, "step": 379 }, { "epoch": 0.10326086956521739, "grad_norm": 0.8435321220521991, "learning_rate": 1.999987786183336e-05, "loss": 0.5045726895332336, "step": 380 }, { "epoch": 0.10353260869565217, "grad_norm": 0.972316360005618, "learning_rate": 1.9999854645543876e-05, "loss": 0.5562126636505127, "step": 381 }, { "epoch": 0.10380434782608695, "grad_norm": 0.9609921751936963, "learning_rate": 1.9999829410466993e-05, "loss": 0.6305237412452698, "step": 382 }, { "epoch": 0.10407608695652174, "grad_norm": 1.0643586834293632, "learning_rate": 1.99998021566078e-05, "loss": 0.662166178226471, "step": 383 }, { "epoch": 0.10434782608695652, "grad_norm": 1.1464982254282932, "learning_rate": 1.99997728839718e-05, "loss": 0.6950050592422485, "step": 384 }, { "epoch": 0.10461956521739131, "grad_norm": 1.0568218101017766, "learning_rate": 1.9999741592564903e-05, "loss": 0.6457014083862305, "step": 385 }, { "epoch": 0.10489130434782609, "grad_norm": 0.848753686361052, "learning_rate": 1.999970828239343e-05, "loss": 0.4668888449668884, "step": 386 }, { "epoch": 0.10516304347826087, "grad_norm": 1.0274603620493559, "learning_rate": 1.9999672953464095e-05, "loss": 0.5601106286048889, "step": 387 }, { "epoch": 0.10543478260869565, "grad_norm": 1.1217257466190977, "learning_rate": 1.9999635605784042e-05, "loss": 0.6413660049438477, "step": 388 }, { "epoch": 0.10570652173913044, "grad_norm": 1.0680661269286127, "learning_rate": 1.9999596239360804e-05, "loss": 0.6373885273933411, "step": 389 }, { "epoch": 0.10597826086956522, "grad_norm": 0.919701306303427, "learning_rate": 1.9999554854202334e-05, "loss": 0.4752201437950134, "step": 390 }, { "epoch": 0.10625, "grad_norm": 0.8716162409756772, "learning_rate": 1.999951145031698e-05, "loss": 0.5252925753593445, "step": 391 }, { "epoch": 0.10652173913043478, "grad_norm": 0.9843037473747791, "learning_rate": 1.999946602771351e-05, "loss": 0.5985313653945923, "step": 392 }, { "epoch": 0.10679347826086956, "grad_norm": 1.0011485181447666, "learning_rate": 1.9999418586401092e-05, "loss": 0.6412175893783569, "step": 393 }, { "epoch": 0.10706521739130435, "grad_norm": 1.0305517713786962, "learning_rate": 1.99993691263893e-05, "loss": 0.6314315795898438, "step": 394 }, { "epoch": 0.10733695652173914, "grad_norm": 1.1039757692387828, "learning_rate": 1.9999317647688127e-05, "loss": 0.6436986923217773, "step": 395 }, { "epoch": 0.10760869565217392, "grad_norm": 1.12034720937231, "learning_rate": 1.9999264150307956e-05, "loss": 0.6956436634063721, "step": 396 }, { "epoch": 0.1078804347826087, "grad_norm": 1.0708420492750825, "learning_rate": 1.9999208634259594e-05, "loss": 0.6688356399536133, "step": 397 }, { "epoch": 0.10815217391304348, "grad_norm": 1.0341107637869793, "learning_rate": 1.999915109955425e-05, "loss": 0.6022867560386658, "step": 398 }, { "epoch": 0.10842391304347826, "grad_norm": 0.9767671834519135, "learning_rate": 1.9999091546203533e-05, "loss": 0.5221936702728271, "step": 399 }, { "epoch": 0.10869565217391304, "grad_norm": 1.034289999122602, "learning_rate": 1.999902997421947e-05, "loss": 0.6844162940979004, "step": 400 }, { "epoch": 0.10896739130434782, "grad_norm": 1.171022873816017, "learning_rate": 1.999896638361449e-05, "loss": 0.7165743112564087, "step": 401 }, { "epoch": 0.1092391304347826, "grad_norm": 0.879391953763458, "learning_rate": 1.999890077440143e-05, "loss": 0.5104398131370544, "step": 402 }, { "epoch": 0.1095108695652174, "grad_norm": 0.9591607811659438, "learning_rate": 1.9998833146593535e-05, "loss": 0.6141295433044434, "step": 403 }, { "epoch": 0.10978260869565218, "grad_norm": 1.061674216639326, "learning_rate": 1.9998763500204463e-05, "loss": 0.5866646766662598, "step": 404 }, { "epoch": 0.11005434782608696, "grad_norm": 1.0248098529348524, "learning_rate": 1.9998691835248263e-05, "loss": 0.6196632981300354, "step": 405 }, { "epoch": 0.11032608695652174, "grad_norm": 1.0774821846479206, "learning_rate": 1.9998618151739418e-05, "loss": 0.7162747979164124, "step": 406 }, { "epoch": 0.11059782608695652, "grad_norm": 0.9721617831428588, "learning_rate": 1.9998542449692794e-05, "loss": 0.6074075698852539, "step": 407 }, { "epoch": 0.1108695652173913, "grad_norm": 1.0283066205524227, "learning_rate": 1.999846472912367e-05, "loss": 0.6279120445251465, "step": 408 }, { "epoch": 0.11114130434782608, "grad_norm": 1.0442464504213846, "learning_rate": 1.9998384990047746e-05, "loss": 0.7417161464691162, "step": 409 }, { "epoch": 0.11141304347826086, "grad_norm": 0.9535286083143752, "learning_rate": 1.9998303232481114e-05, "loss": 0.5760103464126587, "step": 410 }, { "epoch": 0.11168478260869565, "grad_norm": 0.9301176000819584, "learning_rate": 1.9998219456440285e-05, "loss": 0.5044856667518616, "step": 411 }, { "epoch": 0.11195652173913044, "grad_norm": 1.1915950674606757, "learning_rate": 1.999813366194216e-05, "loss": 0.7327628135681152, "step": 412 }, { "epoch": 0.11222826086956522, "grad_norm": 1.0438398752661922, "learning_rate": 1.9998045849004075e-05, "loss": 0.6154263019561768, "step": 413 }, { "epoch": 0.1125, "grad_norm": 1.060623904735119, "learning_rate": 1.9997956017643744e-05, "loss": 0.6965636014938354, "step": 414 }, { "epoch": 0.11277173913043478, "grad_norm": 1.033597694388616, "learning_rate": 1.9997864167879313e-05, "loss": 0.549498438835144, "step": 415 }, { "epoch": 0.11304347826086956, "grad_norm": 0.8753223843660218, "learning_rate": 1.999777029972932e-05, "loss": 0.4523797035217285, "step": 416 }, { "epoch": 0.11331521739130435, "grad_norm": 1.0384710302753428, "learning_rate": 1.999767441321271e-05, "loss": 0.5890218019485474, "step": 417 }, { "epoch": 0.11358695652173913, "grad_norm": 0.9060491956521283, "learning_rate": 1.9997576508348847e-05, "loss": 0.5408389568328857, "step": 418 }, { "epoch": 0.11385869565217391, "grad_norm": 1.1083359444323095, "learning_rate": 1.99974765851575e-05, "loss": 0.7201555967330933, "step": 419 }, { "epoch": 0.11413043478260869, "grad_norm": 1.0644830335644873, "learning_rate": 1.999737464365883e-05, "loss": 0.5671750903129578, "step": 420 }, { "epoch": 0.11440217391304348, "grad_norm": 0.8388125814172331, "learning_rate": 1.999727068387343e-05, "loss": 0.3937748670578003, "step": 421 }, { "epoch": 0.11467391304347826, "grad_norm": 1.0147622734151551, "learning_rate": 1.999716470582228e-05, "loss": 0.5739926099777222, "step": 422 }, { "epoch": 0.11494565217391305, "grad_norm": 0.9572838777582217, "learning_rate": 1.9997056709526776e-05, "loss": 0.5243722796440125, "step": 423 }, { "epoch": 0.11521739130434783, "grad_norm": 0.976144550025251, "learning_rate": 1.9996946695008718e-05, "loss": 0.5689518451690674, "step": 424 }, { "epoch": 0.11548913043478261, "grad_norm": 1.1800189812911361, "learning_rate": 1.9996834662290323e-05, "loss": 0.6386196613311768, "step": 425 }, { "epoch": 0.11576086956521739, "grad_norm": 0.8930220280653994, "learning_rate": 1.99967206113942e-05, "loss": 0.5425840020179749, "step": 426 }, { "epoch": 0.11603260869565217, "grad_norm": 0.9523689804940927, "learning_rate": 1.9996604542343384e-05, "loss": 0.6061644554138184, "step": 427 }, { "epoch": 0.11630434782608695, "grad_norm": 1.0671658297349917, "learning_rate": 1.99964864551613e-05, "loss": 0.5937210321426392, "step": 428 }, { "epoch": 0.11657608695652173, "grad_norm": 0.8912271391360199, "learning_rate": 1.9996366349871784e-05, "loss": 0.550702691078186, "step": 429 }, { "epoch": 0.11684782608695653, "grad_norm": 0.9583359874470296, "learning_rate": 1.9996244226499094e-05, "loss": 0.612038254737854, "step": 430 }, { "epoch": 0.11711956521739131, "grad_norm": 0.895666145995747, "learning_rate": 1.9996120085067873e-05, "loss": 0.5010676980018616, "step": 431 }, { "epoch": 0.11739130434782609, "grad_norm": 1.049838743696745, "learning_rate": 1.9995993925603192e-05, "loss": 0.6516764163970947, "step": 432 }, { "epoch": 0.11766304347826087, "grad_norm": 0.7868626793253468, "learning_rate": 1.9995865748130518e-05, "loss": 0.3969181180000305, "step": 433 }, { "epoch": 0.11793478260869565, "grad_norm": 1.011702018996398, "learning_rate": 1.9995735552675725e-05, "loss": 0.5987011194229126, "step": 434 }, { "epoch": 0.11820652173913043, "grad_norm": 1.0247581052728614, "learning_rate": 1.9995603339265095e-05, "loss": 0.5790184140205383, "step": 435 }, { "epoch": 0.11847826086956521, "grad_norm": 0.8865961181457083, "learning_rate": 1.9995469107925323e-05, "loss": 0.5656816363334656, "step": 436 }, { "epoch": 0.11875, "grad_norm": 1.1240529018386518, "learning_rate": 1.9995332858683512e-05, "loss": 0.6697234511375427, "step": 437 }, { "epoch": 0.11902173913043479, "grad_norm": 1.006988617157378, "learning_rate": 1.999519459156716e-05, "loss": 0.5663440227508545, "step": 438 }, { "epoch": 0.11929347826086957, "grad_norm": 1.1013866297259014, "learning_rate": 1.9995054306604187e-05, "loss": 0.6301734447479248, "step": 439 }, { "epoch": 0.11956521739130435, "grad_norm": 0.8757772352926078, "learning_rate": 1.9994912003822914e-05, "loss": 0.4412987232208252, "step": 440 }, { "epoch": 0.11983695652173913, "grad_norm": 1.0665707002200486, "learning_rate": 1.999476768325206e-05, "loss": 0.600553035736084, "step": 441 }, { "epoch": 0.12010869565217391, "grad_norm": 0.9495013375796794, "learning_rate": 1.9994621344920772e-05, "loss": 0.5177372694015503, "step": 442 }, { "epoch": 0.1203804347826087, "grad_norm": 1.0132823595429514, "learning_rate": 1.999447298885859e-05, "loss": 0.5645582675933838, "step": 443 }, { "epoch": 0.12065217391304348, "grad_norm": 1.2700174004562321, "learning_rate": 1.9994322615095458e-05, "loss": 0.7385030388832092, "step": 444 }, { "epoch": 0.12092391304347826, "grad_norm": 0.8454076082401949, "learning_rate": 1.999417022366174e-05, "loss": 0.40243709087371826, "step": 445 }, { "epoch": 0.12119565217391304, "grad_norm": 1.084224791279209, "learning_rate": 1.99940158145882e-05, "loss": 0.6652276515960693, "step": 446 }, { "epoch": 0.12146739130434783, "grad_norm": 0.9603961444244945, "learning_rate": 1.9993859387906016e-05, "loss": 0.5260887742042542, "step": 447 }, { "epoch": 0.12173913043478261, "grad_norm": 0.9224904896456922, "learning_rate": 1.9993700943646753e-05, "loss": 0.5647868514060974, "step": 448 }, { "epoch": 0.1220108695652174, "grad_norm": 1.1300975267053037, "learning_rate": 1.999354048184241e-05, "loss": 0.6744120121002197, "step": 449 }, { "epoch": 0.12228260869565218, "grad_norm": 0.8609741309299125, "learning_rate": 1.999337800252538e-05, "loss": 0.45130181312561035, "step": 450 }, { "epoch": 0.12255434782608696, "grad_norm": 0.994860236164018, "learning_rate": 1.999321350572846e-05, "loss": 0.6293144226074219, "step": 451 }, { "epoch": 0.12282608695652174, "grad_norm": 0.9085451758422951, "learning_rate": 1.999304699148486e-05, "loss": 0.578853964805603, "step": 452 }, { "epoch": 0.12309782608695652, "grad_norm": 0.9072417793994756, "learning_rate": 1.9992878459828203e-05, "loss": 0.5085766315460205, "step": 453 }, { "epoch": 0.1233695652173913, "grad_norm": 0.9039388536353852, "learning_rate": 1.99927079107925e-05, "loss": 0.49083051085472107, "step": 454 }, { "epoch": 0.12364130434782608, "grad_norm": 0.8722114774154629, "learning_rate": 1.9992535344412193e-05, "loss": 0.5054685473442078, "step": 455 }, { "epoch": 0.12391304347826088, "grad_norm": 1.0410883003022025, "learning_rate": 1.9992360760722115e-05, "loss": 0.6445056796073914, "step": 456 }, { "epoch": 0.12418478260869566, "grad_norm": 1.059994937971897, "learning_rate": 1.9992184159757513e-05, "loss": 0.5719254016876221, "step": 457 }, { "epoch": 0.12445652173913044, "grad_norm": 1.1525860667402439, "learning_rate": 1.9992005541554036e-05, "loss": 0.6632227897644043, "step": 458 }, { "epoch": 0.12472826086956522, "grad_norm": 1.0794404192745521, "learning_rate": 1.999182490614775e-05, "loss": 0.6408330202102661, "step": 459 }, { "epoch": 0.125, "grad_norm": 1.0288746737951027, "learning_rate": 1.999164225357512e-05, "loss": 0.5546019077301025, "step": 460 }, { "epoch": 0.1252717391304348, "grad_norm": 0.9145133092062209, "learning_rate": 1.999145758387301e-05, "loss": 0.5942026972770691, "step": 461 }, { "epoch": 0.12554347826086956, "grad_norm": 1.0369983052641085, "learning_rate": 1.9991270897078717e-05, "loss": 0.6218487024307251, "step": 462 }, { "epoch": 0.12581521739130436, "grad_norm": 0.9332963802736818, "learning_rate": 1.999108219322992e-05, "loss": 0.6113760471343994, "step": 463 }, { "epoch": 0.12608695652173912, "grad_norm": 0.9604181250881251, "learning_rate": 1.999089147236472e-05, "loss": 0.5768099427223206, "step": 464 }, { "epoch": 0.12635869565217392, "grad_norm": 1.09246565586252, "learning_rate": 1.9990698734521614e-05, "loss": 0.6722774505615234, "step": 465 }, { "epoch": 0.1266304347826087, "grad_norm": 1.075691170635151, "learning_rate": 1.9990503979739516e-05, "loss": 0.6218962669372559, "step": 466 }, { "epoch": 0.12690217391304348, "grad_norm": 0.8884525624562044, "learning_rate": 1.9990307208057743e-05, "loss": 0.577363133430481, "step": 467 }, { "epoch": 0.12717391304347825, "grad_norm": 0.9936588107453483, "learning_rate": 1.999010841951602e-05, "loss": 0.5866848230361938, "step": 468 }, { "epoch": 0.12744565217391304, "grad_norm": 0.9217366081127394, "learning_rate": 1.9989907614154478e-05, "loss": 0.5089142322540283, "step": 469 }, { "epoch": 0.12771739130434784, "grad_norm": 1.0120292853167447, "learning_rate": 1.9989704792013658e-05, "loss": 0.6959022283554077, "step": 470 }, { "epoch": 0.1279891304347826, "grad_norm": 1.3829368169107676, "learning_rate": 1.9989499953134506e-05, "loss": 0.4411349892616272, "step": 471 }, { "epoch": 0.1282608695652174, "grad_norm": 0.9080143965267434, "learning_rate": 1.998929309755837e-05, "loss": 0.4974944293498993, "step": 472 }, { "epoch": 0.12853260869565217, "grad_norm": 1.043334435099621, "learning_rate": 1.9989084225327014e-05, "loss": 0.593841552734375, "step": 473 }, { "epoch": 0.12880434782608696, "grad_norm": 0.9960667110048701, "learning_rate": 1.998887333648261e-05, "loss": 0.62131267786026, "step": 474 }, { "epoch": 0.12907608695652173, "grad_norm": 1.0271876643608602, "learning_rate": 1.9988660431067723e-05, "loss": 0.5507307052612305, "step": 475 }, { "epoch": 0.12934782608695652, "grad_norm": 0.9197305746240297, "learning_rate": 1.998844550912534e-05, "loss": 0.5464853048324585, "step": 476 }, { "epoch": 0.1296195652173913, "grad_norm": 1.0111425970152759, "learning_rate": 1.9988228570698853e-05, "loss": 0.5759999752044678, "step": 477 }, { "epoch": 0.1298913043478261, "grad_norm": 0.8552213734887477, "learning_rate": 1.9988009615832052e-05, "loss": 0.5465481877326965, "step": 478 }, { "epoch": 0.13016304347826088, "grad_norm": 1.0004270392874746, "learning_rate": 1.9987788644569144e-05, "loss": 0.4584847688674927, "step": 479 }, { "epoch": 0.13043478260869565, "grad_norm": 1.1205102005806178, "learning_rate": 1.9987565656954738e-05, "loss": 0.6259770393371582, "step": 480 }, { "epoch": 0.13070652173913044, "grad_norm": 0.9119659547178608, "learning_rate": 1.9987340653033847e-05, "loss": 0.5510997176170349, "step": 481 }, { "epoch": 0.1309782608695652, "grad_norm": 0.9033849021993481, "learning_rate": 1.99871136328519e-05, "loss": 0.5475002527236938, "step": 482 }, { "epoch": 0.13125, "grad_norm": 1.0478628371872325, "learning_rate": 1.998688459645473e-05, "loss": 0.6733339428901672, "step": 483 }, { "epoch": 0.13152173913043477, "grad_norm": 0.9670618128400278, "learning_rate": 1.998665354388857e-05, "loss": 0.5150229930877686, "step": 484 }, { "epoch": 0.13179347826086957, "grad_norm": 1.0613753846186837, "learning_rate": 1.998642047520007e-05, "loss": 0.6186578273773193, "step": 485 }, { "epoch": 0.13206521739130433, "grad_norm": 1.0606796981249103, "learning_rate": 1.9986185390436277e-05, "loss": 0.6019240617752075, "step": 486 }, { "epoch": 0.13233695652173913, "grad_norm": 1.1259920144939108, "learning_rate": 1.9985948289644654e-05, "loss": 0.5918797850608826, "step": 487 }, { "epoch": 0.13260869565217392, "grad_norm": 1.1015889881175769, "learning_rate": 1.9985709172873067e-05, "loss": 0.620570182800293, "step": 488 }, { "epoch": 0.1328804347826087, "grad_norm": 1.1492799100703128, "learning_rate": 1.9985468040169785e-05, "loss": 0.6109024286270142, "step": 489 }, { "epoch": 0.1331521739130435, "grad_norm": 1.0226881183575043, "learning_rate": 1.9985224891583498e-05, "loss": 0.49608832597732544, "step": 490 }, { "epoch": 0.13342391304347825, "grad_norm": 1.0848343868824446, "learning_rate": 1.9984979727163286e-05, "loss": 0.5815306901931763, "step": 491 }, { "epoch": 0.13369565217391305, "grad_norm": 1.0937013280671453, "learning_rate": 1.998473254695864e-05, "loss": 0.632835865020752, "step": 492 }, { "epoch": 0.13396739130434782, "grad_norm": 1.0209773325841285, "learning_rate": 1.998448335101947e-05, "loss": 0.5654631853103638, "step": 493 }, { "epoch": 0.1342391304347826, "grad_norm": 1.0394137591162063, "learning_rate": 1.998423213939608e-05, "loss": 0.6071439385414124, "step": 494 }, { "epoch": 0.13451086956521738, "grad_norm": 1.0467457709369783, "learning_rate": 1.998397891213918e-05, "loss": 0.609126091003418, "step": 495 }, { "epoch": 0.13478260869565217, "grad_norm": 1.0979681558597267, "learning_rate": 1.9983723669299898e-05, "loss": 0.6164345741271973, "step": 496 }, { "epoch": 0.13505434782608697, "grad_norm": 1.0307565250639155, "learning_rate": 1.9983466410929764e-05, "loss": 0.514223575592041, "step": 497 }, { "epoch": 0.13532608695652174, "grad_norm": 1.0928530232773639, "learning_rate": 1.998320713708071e-05, "loss": 0.6700409650802612, "step": 498 }, { "epoch": 0.13559782608695653, "grad_norm": 0.9232745102335573, "learning_rate": 1.998294584780508e-05, "loss": 0.518304705619812, "step": 499 }, { "epoch": 0.1358695652173913, "grad_norm": 0.9784264996268403, "learning_rate": 1.9982682543155624e-05, "loss": 0.6017490029335022, "step": 500 }, { "epoch": 0.1361413043478261, "grad_norm": 0.8980107622913444, "learning_rate": 1.9982417223185497e-05, "loss": 0.473702073097229, "step": 501 }, { "epoch": 0.13641304347826086, "grad_norm": 0.9910352932344029, "learning_rate": 1.9982149887948264e-05, "loss": 0.5803335905075073, "step": 502 }, { "epoch": 0.13668478260869565, "grad_norm": 1.0405495845813735, "learning_rate": 1.9981880537497894e-05, "loss": 0.5819352865219116, "step": 503 }, { "epoch": 0.13695652173913042, "grad_norm": 0.9974703256262815, "learning_rate": 1.9981609171888762e-05, "loss": 0.6259819269180298, "step": 504 }, { "epoch": 0.13722826086956522, "grad_norm": 0.9954935654090021, "learning_rate": 1.9981335791175655e-05, "loss": 0.5579284429550171, "step": 505 }, { "epoch": 0.1375, "grad_norm": 1.1360994273424094, "learning_rate": 1.9981060395413765e-05, "loss": 0.6499065160751343, "step": 506 }, { "epoch": 0.13777173913043478, "grad_norm": 1.0650826666523168, "learning_rate": 1.9980782984658682e-05, "loss": 0.6182603240013123, "step": 507 }, { "epoch": 0.13804347826086957, "grad_norm": 1.0571242660503317, "learning_rate": 1.9980503558966418e-05, "loss": 0.6234190464019775, "step": 508 }, { "epoch": 0.13831521739130434, "grad_norm": 0.9659453847270455, "learning_rate": 1.998022211839338e-05, "loss": 0.6218945980072021, "step": 509 }, { "epoch": 0.13858695652173914, "grad_norm": 1.0299709432910213, "learning_rate": 1.9979938662996387e-05, "loss": 0.5515654683113098, "step": 510 }, { "epoch": 0.1388586956521739, "grad_norm": 0.909624860390443, "learning_rate": 1.997965319283266e-05, "loss": 0.534189760684967, "step": 511 }, { "epoch": 0.1391304347826087, "grad_norm": 1.061346240700228, "learning_rate": 1.9979365707959836e-05, "loss": 0.6641613245010376, "step": 512 }, { "epoch": 0.13940217391304346, "grad_norm": 1.0548214272131244, "learning_rate": 1.997907620843595e-05, "loss": 0.5978137254714966, "step": 513 }, { "epoch": 0.13967391304347826, "grad_norm": 1.078402294166824, "learning_rate": 1.9978784694319445e-05, "loss": 0.6432967185974121, "step": 514 }, { "epoch": 0.13994565217391305, "grad_norm": 1.041755093911181, "learning_rate": 1.997849116566918e-05, "loss": 0.5861226916313171, "step": 515 }, { "epoch": 0.14021739130434782, "grad_norm": 1.1962881613028964, "learning_rate": 1.99781956225444e-05, "loss": 0.7123916745185852, "step": 516 }, { "epoch": 0.14048913043478262, "grad_norm": 1.0591910496330978, "learning_rate": 1.997789806500478e-05, "loss": 0.6056292653083801, "step": 517 }, { "epoch": 0.14076086956521738, "grad_norm": 0.9535743198048076, "learning_rate": 1.997759849311039e-05, "loss": 0.5122894048690796, "step": 518 }, { "epoch": 0.14103260869565218, "grad_norm": 0.8534353011051049, "learning_rate": 1.9977296906921702e-05, "loss": 0.4968096613883972, "step": 519 }, { "epoch": 0.14130434782608695, "grad_norm": 0.837892615078043, "learning_rate": 1.9976993306499607e-05, "loss": 0.4170382618904114, "step": 520 }, { "epoch": 0.14157608695652174, "grad_norm": 1.0591398486149646, "learning_rate": 1.9976687691905394e-05, "loss": 0.6079549789428711, "step": 521 }, { "epoch": 0.14184782608695654, "grad_norm": 0.9425670558021174, "learning_rate": 1.997638006320076e-05, "loss": 0.4890021085739136, "step": 522 }, { "epoch": 0.1421195652173913, "grad_norm": 0.9042100201251658, "learning_rate": 1.9976070420447814e-05, "loss": 0.4298403561115265, "step": 523 }, { "epoch": 0.1423913043478261, "grad_norm": 0.8815978436945273, "learning_rate": 1.9975758763709064e-05, "loss": 0.5039761662483215, "step": 524 }, { "epoch": 0.14266304347826086, "grad_norm": 1.0408267102113276, "learning_rate": 1.9975445093047425e-05, "loss": 0.5189402103424072, "step": 525 }, { "epoch": 0.14293478260869566, "grad_norm": 1.282486701372306, "learning_rate": 1.9975129408526227e-05, "loss": 0.7236330509185791, "step": 526 }, { "epoch": 0.14320652173913043, "grad_norm": 0.9017428070440577, "learning_rate": 1.99748117102092e-05, "loss": 0.5024945139884949, "step": 527 }, { "epoch": 0.14347826086956522, "grad_norm": 1.0391869489946213, "learning_rate": 1.9974491998160476e-05, "loss": 0.5609216690063477, "step": 528 }, { "epoch": 0.14375, "grad_norm": 1.1898932300808451, "learning_rate": 1.9974170272444604e-05, "loss": 0.5529563426971436, "step": 529 }, { "epoch": 0.14402173913043478, "grad_norm": 1.1473499039348063, "learning_rate": 1.9973846533126533e-05, "loss": 0.6627479791641235, "step": 530 }, { "epoch": 0.14429347826086958, "grad_norm": 1.083810261752322, "learning_rate": 1.997352078027162e-05, "loss": 0.6802459955215454, "step": 531 }, { "epoch": 0.14456521739130435, "grad_norm": 1.0998723167048785, "learning_rate": 1.997319301394563e-05, "loss": 0.6295997500419617, "step": 532 }, { "epoch": 0.14483695652173914, "grad_norm": 0.9986536989677866, "learning_rate": 1.9972863234214732e-05, "loss": 0.5387486219406128, "step": 533 }, { "epoch": 0.1451086956521739, "grad_norm": 1.0150572493284822, "learning_rate": 1.9972531441145503e-05, "loss": 0.5952854156494141, "step": 534 }, { "epoch": 0.1453804347826087, "grad_norm": 0.8414050969721473, "learning_rate": 1.9972197634804922e-05, "loss": 0.52373206615448, "step": 535 }, { "epoch": 0.14565217391304347, "grad_norm": 0.9199879874270983, "learning_rate": 1.9971861815260383e-05, "loss": 0.4739062190055847, "step": 536 }, { "epoch": 0.14592391304347826, "grad_norm": 0.8746100427891019, "learning_rate": 1.9971523982579682e-05, "loss": 0.4370855689048767, "step": 537 }, { "epoch": 0.14619565217391303, "grad_norm": 1.0769278751389257, "learning_rate": 1.9971184136831023e-05, "loss": 0.6462257504463196, "step": 538 }, { "epoch": 0.14646739130434783, "grad_norm": 0.8657059452383419, "learning_rate": 1.9970842278083004e-05, "loss": 0.5461211800575256, "step": 539 }, { "epoch": 0.14673913043478262, "grad_norm": 1.0200035851411415, "learning_rate": 1.997049840640465e-05, "loss": 0.693665623664856, "step": 540 }, { "epoch": 0.1470108695652174, "grad_norm": 1.1077239536659524, "learning_rate": 1.9970152521865384e-05, "loss": 0.6291755437850952, "step": 541 }, { "epoch": 0.14728260869565218, "grad_norm": 0.9935253427740147, "learning_rate": 1.9969804624535025e-05, "loss": 0.6247778534889221, "step": 542 }, { "epoch": 0.14755434782608695, "grad_norm": 1.1443314119593777, "learning_rate": 1.996945471448381e-05, "loss": 0.7466934323310852, "step": 543 }, { "epoch": 0.14782608695652175, "grad_norm": 0.975507102593212, "learning_rate": 1.996910279178238e-05, "loss": 0.608910083770752, "step": 544 }, { "epoch": 0.1480978260869565, "grad_norm": 1.1224858106693094, "learning_rate": 1.996874885650179e-05, "loss": 0.6648900508880615, "step": 545 }, { "epoch": 0.1483695652173913, "grad_norm": 0.989026713687694, "learning_rate": 1.996839290871348e-05, "loss": 0.5378342866897583, "step": 546 }, { "epoch": 0.14864130434782608, "grad_norm": 0.7946905349239192, "learning_rate": 1.996803494848932e-05, "loss": 0.45927566289901733, "step": 547 }, { "epoch": 0.14891304347826087, "grad_norm": 1.109356394042287, "learning_rate": 1.9967674975901564e-05, "loss": 0.6446850299835205, "step": 548 }, { "epoch": 0.14918478260869567, "grad_norm": 0.8927244916742341, "learning_rate": 1.9967312991022897e-05, "loss": 0.4130159020423889, "step": 549 }, { "epoch": 0.14945652173913043, "grad_norm": 0.8678587141459579, "learning_rate": 1.996694899392639e-05, "loss": 0.5304865837097168, "step": 550 }, { "epoch": 0.14972826086956523, "grad_norm": 1.0861021783192442, "learning_rate": 1.9966582984685522e-05, "loss": 0.6256424784660339, "step": 551 }, { "epoch": 0.15, "grad_norm": 1.0035565752315518, "learning_rate": 1.9966214963374197e-05, "loss": 0.5960760116577148, "step": 552 }, { "epoch": 0.1502717391304348, "grad_norm": 1.0903443673569966, "learning_rate": 1.99658449300667e-05, "loss": 0.5646905899047852, "step": 553 }, { "epoch": 0.15054347826086956, "grad_norm": 0.8205721212667049, "learning_rate": 1.996547288483774e-05, "loss": 0.3892267644405365, "step": 554 }, { "epoch": 0.15081521739130435, "grad_norm": 0.973810333663982, "learning_rate": 1.9965098827762423e-05, "loss": 0.5776790976524353, "step": 555 }, { "epoch": 0.15108695652173912, "grad_norm": 0.9875638662518406, "learning_rate": 1.9964722758916268e-05, "loss": 0.5211402177810669, "step": 556 }, { "epoch": 0.1513586956521739, "grad_norm": 0.8297665971899241, "learning_rate": 1.9964344678375194e-05, "loss": 0.48208218812942505, "step": 557 }, { "epoch": 0.1516304347826087, "grad_norm": 0.9321638864057757, "learning_rate": 1.9963964586215527e-05, "loss": 0.5077864527702332, "step": 558 }, { "epoch": 0.15190217391304348, "grad_norm": 0.9520413088573231, "learning_rate": 1.9963582482514003e-05, "loss": 0.4649862051010132, "step": 559 }, { "epoch": 0.15217391304347827, "grad_norm": 0.9147950452285756, "learning_rate": 1.996319836734776e-05, "loss": 0.4858861565589905, "step": 560 }, { "epoch": 0.15244565217391304, "grad_norm": 1.0849755960998637, "learning_rate": 1.9962812240794344e-05, "loss": 0.5294586420059204, "step": 561 }, { "epoch": 0.15271739130434783, "grad_norm": 1.2081397122026418, "learning_rate": 1.996242410293171e-05, "loss": 0.716445803642273, "step": 562 }, { "epoch": 0.1529891304347826, "grad_norm": 0.9657527451999169, "learning_rate": 1.9962033953838215e-05, "loss": 0.610751748085022, "step": 563 }, { "epoch": 0.1532608695652174, "grad_norm": 0.950253070847738, "learning_rate": 1.996164179359262e-05, "loss": 0.5132652521133423, "step": 564 }, { "epoch": 0.15353260869565216, "grad_norm": 0.8446205536021343, "learning_rate": 1.9961247622274093e-05, "loss": 0.4299188554286957, "step": 565 }, { "epoch": 0.15380434782608696, "grad_norm": 1.0606955564591098, "learning_rate": 1.9960851439962218e-05, "loss": 0.5909217596054077, "step": 566 }, { "epoch": 0.15407608695652175, "grad_norm": 0.9384487019127449, "learning_rate": 1.9960453246736972e-05, "loss": 0.6598085165023804, "step": 567 }, { "epoch": 0.15434782608695652, "grad_norm": 0.8931502910938742, "learning_rate": 1.996005304267874e-05, "loss": 0.47596579790115356, "step": 568 }, { "epoch": 0.1546195652173913, "grad_norm": 0.9096599509450669, "learning_rate": 1.9959650827868323e-05, "loss": 0.5150778293609619, "step": 569 }, { "epoch": 0.15489130434782608, "grad_norm": 0.9225217874522903, "learning_rate": 1.9959246602386918e-05, "loss": 0.5473342537879944, "step": 570 }, { "epoch": 0.15516304347826088, "grad_norm": 0.8252691387199838, "learning_rate": 1.9958840366316127e-05, "loss": 0.4451976418495178, "step": 571 }, { "epoch": 0.15543478260869564, "grad_norm": 1.1271956530255904, "learning_rate": 1.9958432119737966e-05, "loss": 0.6959035992622375, "step": 572 }, { "epoch": 0.15570652173913044, "grad_norm": 1.0369606909146682, "learning_rate": 1.995802186273485e-05, "loss": 0.6295270919799805, "step": 573 }, { "epoch": 0.1559782608695652, "grad_norm": 1.0884411547714687, "learning_rate": 1.99576095953896e-05, "loss": 0.5865702629089355, "step": 574 }, { "epoch": 0.15625, "grad_norm": 0.9327820066808555, "learning_rate": 1.9957195317785453e-05, "loss": 0.591279149055481, "step": 575 }, { "epoch": 0.1565217391304348, "grad_norm": 1.156730697311856, "learning_rate": 1.9956779030006038e-05, "loss": 0.6446715593338013, "step": 576 }, { "epoch": 0.15679347826086956, "grad_norm": 0.9256521531643022, "learning_rate": 1.99563607321354e-05, "loss": 0.511824369430542, "step": 577 }, { "epoch": 0.15706521739130436, "grad_norm": 1.0515905958807408, "learning_rate": 1.995594042425798e-05, "loss": 0.5802212953567505, "step": 578 }, { "epoch": 0.15733695652173912, "grad_norm": 1.0485599254833549, "learning_rate": 1.9955518106458633e-05, "loss": 0.5561065077781677, "step": 579 }, { "epoch": 0.15760869565217392, "grad_norm": 1.0565545707215531, "learning_rate": 1.995509377882262e-05, "loss": 0.6134364604949951, "step": 580 }, { "epoch": 0.1578804347826087, "grad_norm": 1.0828239286488452, "learning_rate": 1.99546674414356e-05, "loss": 0.5982726812362671, "step": 581 }, { "epoch": 0.15815217391304348, "grad_norm": 1.1605487524921982, "learning_rate": 1.995423909438365e-05, "loss": 0.6056562662124634, "step": 582 }, { "epoch": 0.15842391304347825, "grad_norm": 1.0808817831404776, "learning_rate": 1.9953808737753236e-05, "loss": 0.6758369207382202, "step": 583 }, { "epoch": 0.15869565217391304, "grad_norm": 1.065531536614862, "learning_rate": 1.995337637163125e-05, "loss": 0.5411529541015625, "step": 584 }, { "epoch": 0.15896739130434784, "grad_norm": 1.0135885660375232, "learning_rate": 1.9952941996104968e-05, "loss": 0.5528398156166077, "step": 585 }, { "epoch": 0.1592391304347826, "grad_norm": 1.0302182674812916, "learning_rate": 1.9952505611262088e-05, "loss": 0.5774904489517212, "step": 586 }, { "epoch": 0.1595108695652174, "grad_norm": 0.9607933452217696, "learning_rate": 1.9952067217190708e-05, "loss": 0.6100043654441833, "step": 587 }, { "epoch": 0.15978260869565217, "grad_norm": 0.9426937627456127, "learning_rate": 1.995162681397933e-05, "loss": 0.5155978202819824, "step": 588 }, { "epoch": 0.16005434782608696, "grad_norm": 1.0422943535422884, "learning_rate": 1.9951184401716867e-05, "loss": 0.5263962745666504, "step": 589 }, { "epoch": 0.16032608695652173, "grad_norm": 1.0249482231929594, "learning_rate": 1.9950739980492626e-05, "loss": 0.49016493558883667, "step": 590 }, { "epoch": 0.16059782608695652, "grad_norm": 0.8620123147435388, "learning_rate": 1.995029355039634e-05, "loss": 0.5108938217163086, "step": 591 }, { "epoch": 0.1608695652173913, "grad_norm": 1.0663337682466765, "learning_rate": 1.994984511151812e-05, "loss": 0.5888221263885498, "step": 592 }, { "epoch": 0.1611413043478261, "grad_norm": 0.9096545978981698, "learning_rate": 1.994939466394851e-05, "loss": 0.44522345066070557, "step": 593 }, { "epoch": 0.16141304347826088, "grad_norm": 0.9533924707104949, "learning_rate": 1.9948942207778442e-05, "loss": 0.5724238753318787, "step": 594 }, { "epoch": 0.16168478260869565, "grad_norm": 1.1926556537206743, "learning_rate": 1.9948487743099262e-05, "loss": 0.6961758732795715, "step": 595 }, { "epoch": 0.16195652173913044, "grad_norm": 0.9288199574315346, "learning_rate": 1.9948031270002713e-05, "loss": 0.5124096870422363, "step": 596 }, { "epoch": 0.1622282608695652, "grad_norm": 1.020974742468755, "learning_rate": 1.994757278858095e-05, "loss": 0.5749602317810059, "step": 597 }, { "epoch": 0.1625, "grad_norm": 0.9321762609966288, "learning_rate": 1.9947112298926533e-05, "loss": 0.5561555624008179, "step": 598 }, { "epoch": 0.16277173913043477, "grad_norm": 0.97749584468481, "learning_rate": 1.994664980113243e-05, "loss": 0.5701756477355957, "step": 599 }, { "epoch": 0.16304347826086957, "grad_norm": 1.0035254919152958, "learning_rate": 1.9946185295292e-05, "loss": 0.5645142793655396, "step": 600 }, { "epoch": 0.16331521739130433, "grad_norm": 1.1024136728637688, "learning_rate": 1.9945718781499032e-05, "loss": 0.6455186605453491, "step": 601 }, { "epoch": 0.16358695652173913, "grad_norm": 1.0321750694802836, "learning_rate": 1.9945250259847696e-05, "loss": 0.5852012634277344, "step": 602 }, { "epoch": 0.16385869565217392, "grad_norm": 0.9216229008444138, "learning_rate": 1.9944779730432586e-05, "loss": 0.5342987775802612, "step": 603 }, { "epoch": 0.1641304347826087, "grad_norm": 0.843750532012646, "learning_rate": 1.994430719334868e-05, "loss": 0.4390544295310974, "step": 604 }, { "epoch": 0.1644021739130435, "grad_norm": 0.9934283224737045, "learning_rate": 1.9943832648691392e-05, "loss": 0.6257288455963135, "step": 605 }, { "epoch": 0.16467391304347825, "grad_norm": 1.0621940122437934, "learning_rate": 1.9943356096556514e-05, "loss": 0.6369417905807495, "step": 606 }, { "epoch": 0.16494565217391305, "grad_norm": 0.9310235936803801, "learning_rate": 1.9942877537040256e-05, "loss": 0.4990113377571106, "step": 607 }, { "epoch": 0.16521739130434782, "grad_norm": 0.9442006021531554, "learning_rate": 1.9942396970239225e-05, "loss": 0.5499135851860046, "step": 608 }, { "epoch": 0.1654891304347826, "grad_norm": 0.9856226125024008, "learning_rate": 1.9941914396250447e-05, "loss": 0.6131972074508667, "step": 609 }, { "epoch": 0.16576086956521738, "grad_norm": 1.0124845546922057, "learning_rate": 1.9941429815171337e-05, "loss": 0.6492733955383301, "step": 610 }, { "epoch": 0.16603260869565217, "grad_norm": 1.026503008192585, "learning_rate": 1.9940943227099726e-05, "loss": 0.5440854430198669, "step": 611 }, { "epoch": 0.16630434782608697, "grad_norm": 1.0791430850896475, "learning_rate": 1.9940454632133852e-05, "loss": 0.6763926148414612, "step": 612 }, { "epoch": 0.16657608695652174, "grad_norm": 0.8815353856305024, "learning_rate": 1.993996403037235e-05, "loss": 0.4962517023086548, "step": 613 }, { "epoch": 0.16684782608695653, "grad_norm": 1.0027636211011315, "learning_rate": 1.993947142191426e-05, "loss": 0.6138594150543213, "step": 614 }, { "epoch": 0.1671195652173913, "grad_norm": 0.9596927424851531, "learning_rate": 1.993897680685903e-05, "loss": 0.4768928289413452, "step": 615 }, { "epoch": 0.1673913043478261, "grad_norm": 0.9818733935909577, "learning_rate": 1.993848018530652e-05, "loss": 0.5331119298934937, "step": 616 }, { "epoch": 0.16766304347826086, "grad_norm": 1.0574920244039545, "learning_rate": 1.9937981557356986e-05, "loss": 0.6595211029052734, "step": 617 }, { "epoch": 0.16793478260869565, "grad_norm": 1.1138483596479818, "learning_rate": 1.993748092311109e-05, "loss": 0.6292282342910767, "step": 618 }, { "epoch": 0.16820652173913042, "grad_norm": 0.9780053867215645, "learning_rate": 1.9936978282669906e-05, "loss": 0.4923531413078308, "step": 619 }, { "epoch": 0.16847826086956522, "grad_norm": 0.9959672127955645, "learning_rate": 1.9936473636134904e-05, "loss": 0.5384691953659058, "step": 620 }, { "epoch": 0.16875, "grad_norm": 0.8581456602426007, "learning_rate": 1.993596698360796e-05, "loss": 0.4134194552898407, "step": 621 }, { "epoch": 0.16902173913043478, "grad_norm": 1.1181615619221552, "learning_rate": 1.9935458325191365e-05, "loss": 0.6031485795974731, "step": 622 }, { "epoch": 0.16929347826086957, "grad_norm": 1.0920972833066502, "learning_rate": 1.99349476609878e-05, "loss": 0.6129193305969238, "step": 623 }, { "epoch": 0.16956521739130434, "grad_norm": 0.7835098638010691, "learning_rate": 1.9934434991100366e-05, "loss": 0.45291203260421753, "step": 624 }, { "epoch": 0.16983695652173914, "grad_norm": 1.064533033959401, "learning_rate": 1.9933920315632557e-05, "loss": 0.5970695614814758, "step": 625 }, { "epoch": 0.1701086956521739, "grad_norm": 1.0129627415354683, "learning_rate": 1.993340363468828e-05, "loss": 0.49170202016830444, "step": 626 }, { "epoch": 0.1703804347826087, "grad_norm": 1.0401195984653642, "learning_rate": 1.993288494837184e-05, "loss": 0.5713005065917969, "step": 627 }, { "epoch": 0.17065217391304346, "grad_norm": 0.8939156173251294, "learning_rate": 1.993236425678795e-05, "loss": 0.42899948358535767, "step": 628 }, { "epoch": 0.17092391304347826, "grad_norm": 0.9421293548854868, "learning_rate": 1.9931841560041735e-05, "loss": 0.4647592306137085, "step": 629 }, { "epoch": 0.17119565217391305, "grad_norm": 1.0974401613209073, "learning_rate": 1.9931316858238706e-05, "loss": 0.619158148765564, "step": 630 }, { "epoch": 0.17146739130434782, "grad_norm": 1.0122968970548705, "learning_rate": 1.9930790151484804e-05, "loss": 0.5882828235626221, "step": 631 }, { "epoch": 0.17173913043478262, "grad_norm": 0.9423808603504076, "learning_rate": 1.993026143988635e-05, "loss": 0.5125457048416138, "step": 632 }, { "epoch": 0.17201086956521738, "grad_norm": 0.9421807635274889, "learning_rate": 1.9929730723550084e-05, "loss": 0.524185299873352, "step": 633 }, { "epoch": 0.17228260869565218, "grad_norm": 1.1162261714867068, "learning_rate": 1.9929198002583156e-05, "loss": 0.5857672691345215, "step": 634 }, { "epoch": 0.17255434782608695, "grad_norm": 0.9714959613480556, "learning_rate": 1.99286632770931e-05, "loss": 0.5246225595474243, "step": 635 }, { "epoch": 0.17282608695652174, "grad_norm": 1.043760951896679, "learning_rate": 1.9928126547187876e-05, "loss": 0.5842361450195312, "step": 636 }, { "epoch": 0.17309782608695654, "grad_norm": 0.9606700221131264, "learning_rate": 1.992758781297584e-05, "loss": 0.5288549661636353, "step": 637 }, { "epoch": 0.1733695652173913, "grad_norm": 1.0472007617332686, "learning_rate": 1.992704707456575e-05, "loss": 0.5817160606384277, "step": 638 }, { "epoch": 0.1736413043478261, "grad_norm": 1.149709025416702, "learning_rate": 1.992650433206677e-05, "loss": 0.6108897924423218, "step": 639 }, { "epoch": 0.17391304347826086, "grad_norm": 0.9019102666893482, "learning_rate": 1.992595958558847e-05, "loss": 0.4840927720069885, "step": 640 }, { "epoch": 0.17418478260869566, "grad_norm": 0.8568187644415484, "learning_rate": 1.9925412835240826e-05, "loss": 0.4514176845550537, "step": 641 }, { "epoch": 0.17445652173913043, "grad_norm": 1.0710597927027952, "learning_rate": 1.9924864081134218e-05, "loss": 0.6102532148361206, "step": 642 }, { "epoch": 0.17472826086956522, "grad_norm": 0.9457682317027768, "learning_rate": 1.9924313323379426e-05, "loss": 0.49364519119262695, "step": 643 }, { "epoch": 0.175, "grad_norm": 0.8639381924892663, "learning_rate": 1.992376056208764e-05, "loss": 0.4752624034881592, "step": 644 }, { "epoch": 0.17527173913043478, "grad_norm": 0.7658658439361645, "learning_rate": 1.992320579737045e-05, "loss": 0.3747294247150421, "step": 645 }, { "epoch": 0.17554347826086958, "grad_norm": 1.0276441154705667, "learning_rate": 1.992264902933986e-05, "loss": 0.6003632545471191, "step": 646 }, { "epoch": 0.17581521739130435, "grad_norm": 0.8188472273251374, "learning_rate": 1.9922090258108265e-05, "loss": 0.4214438199996948, "step": 647 }, { "epoch": 0.17608695652173914, "grad_norm": 1.0067977806628785, "learning_rate": 1.9921529483788467e-05, "loss": 0.5989241600036621, "step": 648 }, { "epoch": 0.1763586956521739, "grad_norm": 0.8257571948546575, "learning_rate": 1.9920966706493686e-05, "loss": 0.4579585790634155, "step": 649 }, { "epoch": 0.1766304347826087, "grad_norm": 0.9583389455742113, "learning_rate": 1.9920401926337528e-05, "loss": 0.48077070713043213, "step": 650 }, { "epoch": 0.17690217391304347, "grad_norm": 1.1857757508215194, "learning_rate": 1.9919835143434016e-05, "loss": 0.6491228342056274, "step": 651 }, { "epoch": 0.17717391304347826, "grad_norm": 1.1655273687505527, "learning_rate": 1.9919266357897574e-05, "loss": 0.5973694324493408, "step": 652 }, { "epoch": 0.17744565217391303, "grad_norm": 0.917422514262778, "learning_rate": 1.9918695569843024e-05, "loss": 0.48567551374435425, "step": 653 }, { "epoch": 0.17771739130434783, "grad_norm": 0.945654327293231, "learning_rate": 1.99181227793856e-05, "loss": 0.5375871658325195, "step": 654 }, { "epoch": 0.17798913043478262, "grad_norm": 1.0307091525440497, "learning_rate": 1.9917547986640947e-05, "loss": 0.5307761430740356, "step": 655 }, { "epoch": 0.1782608695652174, "grad_norm": 0.927730156237492, "learning_rate": 1.991697119172509e-05, "loss": 0.4905701279640198, "step": 656 }, { "epoch": 0.17853260869565218, "grad_norm": 1.0410538763745765, "learning_rate": 1.9916392394754483e-05, "loss": 0.4901847243309021, "step": 657 }, { "epoch": 0.17880434782608695, "grad_norm": 0.8775172790716406, "learning_rate": 1.9915811595845974e-05, "loss": 0.4057765603065491, "step": 658 }, { "epoch": 0.17907608695652175, "grad_norm": 1.0045303882425813, "learning_rate": 1.991522879511681e-05, "loss": 0.5001389980316162, "step": 659 }, { "epoch": 0.1793478260869565, "grad_norm": 0.9267659999597773, "learning_rate": 1.9914643992684652e-05, "loss": 0.4887212812900543, "step": 660 }, { "epoch": 0.1796195652173913, "grad_norm": 0.9536673201951246, "learning_rate": 1.9914057188667558e-05, "loss": 0.48456788063049316, "step": 661 }, { "epoch": 0.17989130434782608, "grad_norm": 0.9801208340280226, "learning_rate": 1.9913468383184e-05, "loss": 0.4620800018310547, "step": 662 }, { "epoch": 0.18016304347826087, "grad_norm": 0.9831599906385796, "learning_rate": 1.9912877576352843e-05, "loss": 0.5689551830291748, "step": 663 }, { "epoch": 0.18043478260869567, "grad_norm": 0.9267920721573832, "learning_rate": 1.9912284768293356e-05, "loss": 0.5073776841163635, "step": 664 }, { "epoch": 0.18070652173913043, "grad_norm": 0.9426632880606114, "learning_rate": 1.991168995912522e-05, "loss": 0.5048125982284546, "step": 665 }, { "epoch": 0.18097826086956523, "grad_norm": 0.950407546462532, "learning_rate": 1.9911093148968515e-05, "loss": 0.4231773912906647, "step": 666 }, { "epoch": 0.18125, "grad_norm": 0.9118058231285014, "learning_rate": 1.991049433794373e-05, "loss": 0.47232404351234436, "step": 667 }, { "epoch": 0.1815217391304348, "grad_norm": 1.1572296588723918, "learning_rate": 1.9909893526171745e-05, "loss": 0.6467137336730957, "step": 668 }, { "epoch": 0.18179347826086956, "grad_norm": 1.2290844930348317, "learning_rate": 1.9909290713773863e-05, "loss": 0.6287305355072021, "step": 669 }, { "epoch": 0.18206521739130435, "grad_norm": 1.1141073663773986, "learning_rate": 1.9908685900871775e-05, "loss": 0.5230117440223694, "step": 670 }, { "epoch": 0.18233695652173912, "grad_norm": 1.1033950511928134, "learning_rate": 1.9908079087587583e-05, "loss": 0.6653680801391602, "step": 671 }, { "epoch": 0.1826086956521739, "grad_norm": 1.0260145911348841, "learning_rate": 1.990747027404379e-05, "loss": 0.5457808971405029, "step": 672 }, { "epoch": 0.1828804347826087, "grad_norm": 1.038146857906519, "learning_rate": 1.9906859460363307e-05, "loss": 0.6371933221817017, "step": 673 }, { "epoch": 0.18315217391304348, "grad_norm": 1.238987262688682, "learning_rate": 1.9906246646669447e-05, "loss": 0.7510971426963806, "step": 674 }, { "epoch": 0.18342391304347827, "grad_norm": 0.9075118302273769, "learning_rate": 1.9905631833085922e-05, "loss": 0.4554573893547058, "step": 675 }, { "epoch": 0.18369565217391304, "grad_norm": 1.0273656688070254, "learning_rate": 1.990501501973685e-05, "loss": 0.5313304662704468, "step": 676 }, { "epoch": 0.18396739130434783, "grad_norm": 0.9950978104130492, "learning_rate": 1.990439620674676e-05, "loss": 0.5283778309822083, "step": 677 }, { "epoch": 0.1842391304347826, "grad_norm": 0.9500725326093089, "learning_rate": 1.9903775394240578e-05, "loss": 0.4909709692001343, "step": 678 }, { "epoch": 0.1845108695652174, "grad_norm": 0.8665948521365939, "learning_rate": 1.9903152582343633e-05, "loss": 0.4415273666381836, "step": 679 }, { "epoch": 0.18478260869565216, "grad_norm": 1.1121428090645409, "learning_rate": 1.9902527771181657e-05, "loss": 0.6055665612220764, "step": 680 }, { "epoch": 0.18505434782608696, "grad_norm": 0.9092440669540035, "learning_rate": 1.9901900960880792e-05, "loss": 0.5385051965713501, "step": 681 }, { "epoch": 0.18532608695652175, "grad_norm": 0.9536292609852602, "learning_rate": 1.9901272151567576e-05, "loss": 0.561308741569519, "step": 682 }, { "epoch": 0.18559782608695652, "grad_norm": 0.8452440767462949, "learning_rate": 1.990064134336896e-05, "loss": 0.4309464693069458, "step": 683 }, { "epoch": 0.1858695652173913, "grad_norm": 1.000664384604712, "learning_rate": 1.9900008536412282e-05, "loss": 0.5510910749435425, "step": 684 }, { "epoch": 0.18614130434782608, "grad_norm": 0.8080365297197762, "learning_rate": 1.9899373730825306e-05, "loss": 0.4387463629245758, "step": 685 }, { "epoch": 0.18641304347826088, "grad_norm": 0.9951022238112779, "learning_rate": 1.989873692673618e-05, "loss": 0.5562642812728882, "step": 686 }, { "epoch": 0.18668478260869564, "grad_norm": 1.4191418990142104, "learning_rate": 1.9898098124273465e-05, "loss": 0.5930187106132507, "step": 687 }, { "epoch": 0.18695652173913044, "grad_norm": 0.837421775510672, "learning_rate": 1.9897457323566128e-05, "loss": 0.39952218532562256, "step": 688 }, { "epoch": 0.1872282608695652, "grad_norm": 1.1050548111373608, "learning_rate": 1.989681452474353e-05, "loss": 0.5427207946777344, "step": 689 }, { "epoch": 0.1875, "grad_norm": 1.2148691144101555, "learning_rate": 1.989616972793544e-05, "loss": 0.6662343740463257, "step": 690 }, { "epoch": 0.1877717391304348, "grad_norm": 0.8220914804559359, "learning_rate": 1.9895522933272028e-05, "loss": 0.4212324619293213, "step": 691 }, { "epoch": 0.18804347826086956, "grad_norm": 1.1653140051747666, "learning_rate": 1.9894874140883877e-05, "loss": 0.6605246067047119, "step": 692 }, { "epoch": 0.18831521739130436, "grad_norm": 1.1902044289302407, "learning_rate": 1.9894223350901965e-05, "loss": 0.6314643621444702, "step": 693 }, { "epoch": 0.18858695652173912, "grad_norm": 1.1605418200265634, "learning_rate": 1.989357056345767e-05, "loss": 0.6269517540931702, "step": 694 }, { "epoch": 0.18885869565217392, "grad_norm": 1.233904402168034, "learning_rate": 1.989291577868278e-05, "loss": 0.6818740367889404, "step": 695 }, { "epoch": 0.1891304347826087, "grad_norm": 0.9453612550795926, "learning_rate": 1.9892258996709488e-05, "loss": 0.5315555930137634, "step": 696 }, { "epoch": 0.18940217391304348, "grad_norm": 1.0704748271027333, "learning_rate": 1.9891600217670382e-05, "loss": 0.5367196202278137, "step": 697 }, { "epoch": 0.18967391304347825, "grad_norm": 0.938813997469159, "learning_rate": 1.9890939441698456e-05, "loss": 0.4737919867038727, "step": 698 }, { "epoch": 0.18994565217391304, "grad_norm": 1.010042176156003, "learning_rate": 1.9890276668927112e-05, "loss": 0.5205504894256592, "step": 699 }, { "epoch": 0.19021739130434784, "grad_norm": 1.1232021570284734, "learning_rate": 1.9889611899490152e-05, "loss": 0.5342676639556885, "step": 700 }, { "epoch": 0.1904891304347826, "grad_norm": 1.0350299521025133, "learning_rate": 1.9888945133521776e-05, "loss": 0.5783064365386963, "step": 701 }, { "epoch": 0.1907608695652174, "grad_norm": 1.066098750506912, "learning_rate": 1.9888276371156595e-05, "loss": 0.6154417395591736, "step": 702 }, { "epoch": 0.19103260869565217, "grad_norm": 1.1827526205470844, "learning_rate": 1.9887605612529622e-05, "loss": 0.6374446749687195, "step": 703 }, { "epoch": 0.19130434782608696, "grad_norm": 1.0737195289271886, "learning_rate": 1.9886932857776268e-05, "loss": 0.6543229818344116, "step": 704 }, { "epoch": 0.19157608695652173, "grad_norm": 0.9547840844792685, "learning_rate": 1.9886258107032352e-05, "loss": 0.5643703937530518, "step": 705 }, { "epoch": 0.19184782608695652, "grad_norm": 0.915152182093108, "learning_rate": 1.988558136043409e-05, "loss": 0.5100011229515076, "step": 706 }, { "epoch": 0.1921195652173913, "grad_norm": 0.9983182429441336, "learning_rate": 1.9884902618118108e-05, "loss": 0.5035141706466675, "step": 707 }, { "epoch": 0.1923913043478261, "grad_norm": 1.1688563065023676, "learning_rate": 1.9884221880221432e-05, "loss": 0.7015246152877808, "step": 708 }, { "epoch": 0.19266304347826088, "grad_norm": 0.950749782120535, "learning_rate": 1.988353914688149e-05, "loss": 0.465751588344574, "step": 709 }, { "epoch": 0.19293478260869565, "grad_norm": 0.8429338632052696, "learning_rate": 1.988285441823611e-05, "loss": 0.47026222944259644, "step": 710 }, { "epoch": 0.19320652173913044, "grad_norm": 1.1150338205934025, "learning_rate": 1.988216769442353e-05, "loss": 0.6729605197906494, "step": 711 }, { "epoch": 0.1934782608695652, "grad_norm": 1.0413735414335736, "learning_rate": 1.9881478975582387e-05, "loss": 0.5004146695137024, "step": 712 }, { "epoch": 0.19375, "grad_norm": 1.0297394718025332, "learning_rate": 1.9880788261851716e-05, "loss": 0.5044854879379272, "step": 713 }, { "epoch": 0.19402173913043477, "grad_norm": 1.099468506146578, "learning_rate": 1.9880095553370967e-05, "loss": 0.548453688621521, "step": 714 }, { "epoch": 0.19429347826086957, "grad_norm": 1.0159775794343746, "learning_rate": 1.987940085027998e-05, "loss": 0.4481867849826813, "step": 715 }, { "epoch": 0.19456521739130433, "grad_norm": 0.9203628544942455, "learning_rate": 1.9878704152719e-05, "loss": 0.5147095322608948, "step": 716 }, { "epoch": 0.19483695652173913, "grad_norm": 1.0474254542725858, "learning_rate": 1.987800546082869e-05, "loss": 0.6322912573814392, "step": 717 }, { "epoch": 0.19510869565217392, "grad_norm": 1.2164969876198235, "learning_rate": 1.9877304774750087e-05, "loss": 0.6417475938796997, "step": 718 }, { "epoch": 0.1953804347826087, "grad_norm": 0.958413237480967, "learning_rate": 1.9876602094624657e-05, "loss": 0.4796249270439148, "step": 719 }, { "epoch": 0.1956521739130435, "grad_norm": 0.6799100744285373, "learning_rate": 1.9875897420594253e-05, "loss": 0.3492799401283264, "step": 720 }, { "epoch": 0.19592391304347825, "grad_norm": 1.1825889088494026, "learning_rate": 1.987519075280114e-05, "loss": 0.6168991923332214, "step": 721 }, { "epoch": 0.19619565217391305, "grad_norm": 1.0812882139872, "learning_rate": 1.9874482091387982e-05, "loss": 0.5790043473243713, "step": 722 }, { "epoch": 0.19646739130434782, "grad_norm": 0.8381526182564073, "learning_rate": 1.987377143649784e-05, "loss": 0.35217157006263733, "step": 723 }, { "epoch": 0.1967391304347826, "grad_norm": 0.8807345201348986, "learning_rate": 1.9873058788274183e-05, "loss": 0.44466933608055115, "step": 724 }, { "epoch": 0.19701086956521738, "grad_norm": 1.0454203126130166, "learning_rate": 1.9872344146860886e-05, "loss": 0.570155143737793, "step": 725 }, { "epoch": 0.19728260869565217, "grad_norm": 0.9350294209467885, "learning_rate": 1.9871627512402216e-05, "loss": 0.5443938970565796, "step": 726 }, { "epoch": 0.19755434782608697, "grad_norm": 1.1142574003805579, "learning_rate": 1.987090888504285e-05, "loss": 0.6146491765975952, "step": 727 }, { "epoch": 0.19782608695652174, "grad_norm": 1.02350158471898, "learning_rate": 1.9870188264927873e-05, "loss": 0.5395548939704895, "step": 728 }, { "epoch": 0.19809782608695653, "grad_norm": 1.0274252105328343, "learning_rate": 1.9869465652202756e-05, "loss": 0.6039748191833496, "step": 729 }, { "epoch": 0.1983695652173913, "grad_norm": 1.0166018717403509, "learning_rate": 1.9868741047013382e-05, "loss": 0.552619457244873, "step": 730 }, { "epoch": 0.1986413043478261, "grad_norm": 0.9290552585355207, "learning_rate": 1.9868014449506045e-05, "loss": 0.4479619264602661, "step": 731 }, { "epoch": 0.19891304347826086, "grad_norm": 1.0028775822897151, "learning_rate": 1.9867285859827418e-05, "loss": 0.5364800691604614, "step": 732 }, { "epoch": 0.19918478260869565, "grad_norm": 1.0283518139287662, "learning_rate": 1.9866555278124597e-05, "loss": 0.5276193618774414, "step": 733 }, { "epoch": 0.19945652173913042, "grad_norm": 1.0775881514395282, "learning_rate": 1.9865822704545074e-05, "loss": 0.5588185787200928, "step": 734 }, { "epoch": 0.19972826086956522, "grad_norm": 1.056379628233691, "learning_rate": 1.986508813923674e-05, "loss": 0.5609844923019409, "step": 735 }, { "epoch": 0.2, "grad_norm": 1.8592245406411962, "learning_rate": 1.9864351582347892e-05, "loss": 0.5686638355255127, "step": 736 }, { "epoch": 0.20027173913043478, "grad_norm": 1.0844893319979623, "learning_rate": 1.9863613034027224e-05, "loss": 0.6232665181159973, "step": 737 }, { "epoch": 0.20054347826086957, "grad_norm": 1.0893219517358306, "learning_rate": 1.986287249442384e-05, "loss": 0.6446458101272583, "step": 738 }, { "epoch": 0.20081521739130434, "grad_norm": 1.081107744214337, "learning_rate": 1.986212996368724e-05, "loss": 0.6208013296127319, "step": 739 }, { "epoch": 0.20108695652173914, "grad_norm": 1.0937017498876052, "learning_rate": 1.9861385441967326e-05, "loss": 0.6377030611038208, "step": 740 }, { "epoch": 0.2013586956521739, "grad_norm": 0.8558893105409602, "learning_rate": 1.98606389294144e-05, "loss": 0.4518064260482788, "step": 741 }, { "epoch": 0.2016304347826087, "grad_norm": 1.0084133711159393, "learning_rate": 1.9859890426179178e-05, "loss": 0.5647328495979309, "step": 742 }, { "epoch": 0.20190217391304346, "grad_norm": 1.122932142891228, "learning_rate": 1.985913993241276e-05, "loss": 0.5823352336883545, "step": 743 }, { "epoch": 0.20217391304347826, "grad_norm": 0.8663698059819867, "learning_rate": 1.9858387448266665e-05, "loss": 0.4735708236694336, "step": 744 }, { "epoch": 0.20244565217391305, "grad_norm": 0.9439393026052161, "learning_rate": 1.9857632973892796e-05, "loss": 0.6262645721435547, "step": 745 }, { "epoch": 0.20271739130434782, "grad_norm": 0.9872434579895779, "learning_rate": 1.9856876509443477e-05, "loss": 0.5456720590591431, "step": 746 }, { "epoch": 0.20298913043478262, "grad_norm": 0.9478341783778994, "learning_rate": 1.985611805507142e-05, "loss": 0.5631636381149292, "step": 747 }, { "epoch": 0.20326086956521738, "grad_norm": 0.9007219411345233, "learning_rate": 1.9855357610929746e-05, "loss": 0.4645753502845764, "step": 748 }, { "epoch": 0.20353260869565218, "grad_norm": 0.9610100967266965, "learning_rate": 1.9854595177171968e-05, "loss": 0.5703261494636536, "step": 749 }, { "epoch": 0.20380434782608695, "grad_norm": 0.762860648913447, "learning_rate": 1.9853830753952014e-05, "loss": 0.453188419342041, "step": 750 }, { "epoch": 0.20407608695652174, "grad_norm": 1.0185607771953904, "learning_rate": 1.985306434142421e-05, "loss": 0.6393734216690063, "step": 751 }, { "epoch": 0.20434782608695654, "grad_norm": 1.04423422048874, "learning_rate": 1.9852295939743267e-05, "loss": 0.5306459665298462, "step": 752 }, { "epoch": 0.2046195652173913, "grad_norm": 0.8885462766810133, "learning_rate": 1.9851525549064324e-05, "loss": 0.45114773511886597, "step": 753 }, { "epoch": 0.2048913043478261, "grad_norm": 0.8629618122820801, "learning_rate": 1.9850753169542902e-05, "loss": 0.5092544555664062, "step": 754 }, { "epoch": 0.20516304347826086, "grad_norm": 1.0747924761842027, "learning_rate": 1.9849978801334935e-05, "loss": 0.4673920273780823, "step": 755 }, { "epoch": 0.20543478260869566, "grad_norm": 1.0195877444665251, "learning_rate": 1.984920244459675e-05, "loss": 0.4763972759246826, "step": 756 }, { "epoch": 0.20570652173913043, "grad_norm": 1.049287629682106, "learning_rate": 1.984842409948508e-05, "loss": 0.5661283731460571, "step": 757 }, { "epoch": 0.20597826086956522, "grad_norm": 0.8495333435182147, "learning_rate": 1.984764376615706e-05, "loss": 0.4869067668914795, "step": 758 }, { "epoch": 0.20625, "grad_norm": 1.0895314584975395, "learning_rate": 1.9846861444770226e-05, "loss": 0.6733304858207703, "step": 759 }, { "epoch": 0.20652173913043478, "grad_norm": 1.07944234665945, "learning_rate": 1.9846077135482513e-05, "loss": 0.5727745294570923, "step": 760 }, { "epoch": 0.20679347826086958, "grad_norm": 1.1154996934958212, "learning_rate": 1.9845290838452257e-05, "loss": 0.6079154014587402, "step": 761 }, { "epoch": 0.20706521739130435, "grad_norm": 1.081365453501359, "learning_rate": 1.9844502553838196e-05, "loss": 0.6013922095298767, "step": 762 }, { "epoch": 0.20733695652173914, "grad_norm": 1.0448221544342349, "learning_rate": 1.9843712281799473e-05, "loss": 0.6299561262130737, "step": 763 }, { "epoch": 0.2076086956521739, "grad_norm": 0.9360663334771382, "learning_rate": 1.9842920022495634e-05, "loss": 0.5373398065567017, "step": 764 }, { "epoch": 0.2078804347826087, "grad_norm": 0.9367063537833017, "learning_rate": 1.984212577608661e-05, "loss": 0.5085747838020325, "step": 765 }, { "epoch": 0.20815217391304347, "grad_norm": 1.092956124527194, "learning_rate": 1.9841329542732755e-05, "loss": 0.5264269113540649, "step": 766 }, { "epoch": 0.20842391304347826, "grad_norm": 1.04662389459451, "learning_rate": 1.9840531322594808e-05, "loss": 0.553507387638092, "step": 767 }, { "epoch": 0.20869565217391303, "grad_norm": 1.203713007591474, "learning_rate": 1.983973111583392e-05, "loss": 0.6184964179992676, "step": 768 }, { "epoch": 0.20896739130434783, "grad_norm": 0.989773096374112, "learning_rate": 1.9838928922611634e-05, "loss": 0.5961925983428955, "step": 769 }, { "epoch": 0.20923913043478262, "grad_norm": 0.970324726682183, "learning_rate": 1.98381247430899e-05, "loss": 0.5725278854370117, "step": 770 }, { "epoch": 0.2095108695652174, "grad_norm": 0.9566585124216027, "learning_rate": 1.9837318577431068e-05, "loss": 0.56490159034729, "step": 771 }, { "epoch": 0.20978260869565218, "grad_norm": 0.9758936606968334, "learning_rate": 1.9836510425797882e-05, "loss": 0.5033926963806152, "step": 772 }, { "epoch": 0.21005434782608695, "grad_norm": 0.9995876381344976, "learning_rate": 1.9835700288353502e-05, "loss": 0.5445008873939514, "step": 773 }, { "epoch": 0.21032608695652175, "grad_norm": 1.031505574928188, "learning_rate": 1.9834888165261474e-05, "loss": 0.576183557510376, "step": 774 }, { "epoch": 0.2105978260869565, "grad_norm": 1.1104371576203542, "learning_rate": 1.983407405668575e-05, "loss": 0.6638193130493164, "step": 775 }, { "epoch": 0.2108695652173913, "grad_norm": 0.9132507556860975, "learning_rate": 1.983325796279069e-05, "loss": 0.48247265815734863, "step": 776 }, { "epoch": 0.21114130434782608, "grad_norm": 1.0777185864429082, "learning_rate": 1.9832439883741038e-05, "loss": 0.6051630973815918, "step": 777 }, { "epoch": 0.21141304347826087, "grad_norm": 0.9720430773442605, "learning_rate": 1.9831619819701962e-05, "loss": 0.5509249567985535, "step": 778 }, { "epoch": 0.21168478260869567, "grad_norm": 0.9808801683968292, "learning_rate": 1.9830797770839008e-05, "loss": 0.5347088575363159, "step": 779 }, { "epoch": 0.21195652173913043, "grad_norm": 1.0105409669807948, "learning_rate": 1.982997373731814e-05, "loss": 0.560843825340271, "step": 780 }, { "epoch": 0.21222826086956523, "grad_norm": 1.0017223052134572, "learning_rate": 1.9829147719305706e-05, "loss": 0.4725677967071533, "step": 781 }, { "epoch": 0.2125, "grad_norm": 0.8838278694438336, "learning_rate": 1.9828319716968473e-05, "loss": 0.40701824426651, "step": 782 }, { "epoch": 0.2127717391304348, "grad_norm": 1.1401341117254797, "learning_rate": 1.9827489730473597e-05, "loss": 0.6328569650650024, "step": 783 }, { "epoch": 0.21304347826086956, "grad_norm": 1.051330083666482, "learning_rate": 1.982665775998863e-05, "loss": 0.49273669719696045, "step": 784 }, { "epoch": 0.21331521739130435, "grad_norm": 0.8737350095193981, "learning_rate": 1.9825823805681543e-05, "loss": 0.45596396923065186, "step": 785 }, { "epoch": 0.21358695652173912, "grad_norm": 1.1370791931102096, "learning_rate": 1.9824987867720685e-05, "loss": 0.6132450103759766, "step": 786 }, { "epoch": 0.2138586956521739, "grad_norm": 1.0454551307978572, "learning_rate": 1.9824149946274827e-05, "loss": 0.5216003060340881, "step": 787 }, { "epoch": 0.2141304347826087, "grad_norm": 0.8691208846463089, "learning_rate": 1.9823310041513123e-05, "loss": 0.45492982864379883, "step": 788 }, { "epoch": 0.21440217391304348, "grad_norm": 1.0070879382708937, "learning_rate": 1.9822468153605137e-05, "loss": 0.5446740984916687, "step": 789 }, { "epoch": 0.21467391304347827, "grad_norm": 1.0845270225782904, "learning_rate": 1.9821624282720827e-05, "loss": 0.518348217010498, "step": 790 }, { "epoch": 0.21494565217391304, "grad_norm": 1.080097047043649, "learning_rate": 1.982077842903056e-05, "loss": 0.6153488755226135, "step": 791 }, { "epoch": 0.21521739130434783, "grad_norm": 1.029443969620967, "learning_rate": 1.98199305927051e-05, "loss": 0.5496606230735779, "step": 792 }, { "epoch": 0.2154891304347826, "grad_norm": 1.0787942376373265, "learning_rate": 1.9819080773915603e-05, "loss": 0.5704481601715088, "step": 793 }, { "epoch": 0.2157608695652174, "grad_norm": 0.99875733874118, "learning_rate": 1.9818228972833636e-05, "loss": 0.5352619886398315, "step": 794 }, { "epoch": 0.21603260869565216, "grad_norm": 0.9731356068312629, "learning_rate": 1.9817375189631157e-05, "loss": 0.43848055601119995, "step": 795 }, { "epoch": 0.21630434782608696, "grad_norm": 1.047587326037894, "learning_rate": 1.9816519424480537e-05, "loss": 0.5669851303100586, "step": 796 }, { "epoch": 0.21657608695652175, "grad_norm": 0.9706313111260721, "learning_rate": 1.9815661677554537e-05, "loss": 0.5112494230270386, "step": 797 }, { "epoch": 0.21684782608695652, "grad_norm": 0.9793965719025034, "learning_rate": 1.9814801949026314e-05, "loss": 0.5067711472511292, "step": 798 }, { "epoch": 0.2171195652173913, "grad_norm": 1.0840642746070388, "learning_rate": 1.981394023906944e-05, "loss": 0.5728447437286377, "step": 799 }, { "epoch": 0.21739130434782608, "grad_norm": 1.0895595803397404, "learning_rate": 1.9813076547857874e-05, "loss": 0.6127052307128906, "step": 800 }, { "epoch": 0.21766304347826088, "grad_norm": 0.970820317126737, "learning_rate": 1.981221087556598e-05, "loss": 0.5352093577384949, "step": 801 }, { "epoch": 0.21793478260869564, "grad_norm": 1.1997389606032833, "learning_rate": 1.981134322236852e-05, "loss": 0.6487644910812378, "step": 802 }, { "epoch": 0.21820652173913044, "grad_norm": 0.9289125699926959, "learning_rate": 1.9810473588440662e-05, "loss": 0.46021515130996704, "step": 803 }, { "epoch": 0.2184782608695652, "grad_norm": 1.1153666562375462, "learning_rate": 1.9809601973957965e-05, "loss": 0.6195456385612488, "step": 804 }, { "epoch": 0.21875, "grad_norm": 0.9497455433670344, "learning_rate": 1.980872837909639e-05, "loss": 0.4986763000488281, "step": 805 }, { "epoch": 0.2190217391304348, "grad_norm": 0.8226833244975597, "learning_rate": 1.9807852804032306e-05, "loss": 0.4196348786354065, "step": 806 }, { "epoch": 0.21929347826086956, "grad_norm": 0.9414414084871887, "learning_rate": 1.9806975248942472e-05, "loss": 0.5236356854438782, "step": 807 }, { "epoch": 0.21956521739130436, "grad_norm": 0.9767820235935731, "learning_rate": 1.980609571400405e-05, "loss": 0.5848420858383179, "step": 808 }, { "epoch": 0.21983695652173912, "grad_norm": 0.9170808069059551, "learning_rate": 1.98052141993946e-05, "loss": 0.48360103368759155, "step": 809 }, { "epoch": 0.22010869565217392, "grad_norm": 0.9838710509961194, "learning_rate": 1.980433070529209e-05, "loss": 0.6240806579589844, "step": 810 }, { "epoch": 0.2203804347826087, "grad_norm": 1.0012572897620948, "learning_rate": 1.9803445231874877e-05, "loss": 0.5649389624595642, "step": 811 }, { "epoch": 0.22065217391304348, "grad_norm": 1.6708890070262206, "learning_rate": 1.980255777932172e-05, "loss": 0.6767339706420898, "step": 812 }, { "epoch": 0.22092391304347825, "grad_norm": 1.003394292473352, "learning_rate": 1.9801668347811786e-05, "loss": 0.5665686130523682, "step": 813 }, { "epoch": 0.22119565217391304, "grad_norm": 0.9715261067561298, "learning_rate": 1.9800776937524628e-05, "loss": 0.5621864199638367, "step": 814 }, { "epoch": 0.22146739130434784, "grad_norm": 1.0499591906949128, "learning_rate": 1.9799883548640208e-05, "loss": 0.582055926322937, "step": 815 }, { "epoch": 0.2217391304347826, "grad_norm": 1.0693286992216287, "learning_rate": 1.9798988181338887e-05, "loss": 0.5663842558860779, "step": 816 }, { "epoch": 0.2220108695652174, "grad_norm": 0.9870193034934028, "learning_rate": 1.9798090835801418e-05, "loss": 0.5549071431159973, "step": 817 }, { "epoch": 0.22228260869565217, "grad_norm": 1.040358917081902, "learning_rate": 1.979719151220896e-05, "loss": 0.5935961008071899, "step": 818 }, { "epoch": 0.22255434782608696, "grad_norm": 1.0193938633535204, "learning_rate": 1.979629021074308e-05, "loss": 0.4886711835861206, "step": 819 }, { "epoch": 0.22282608695652173, "grad_norm": 1.0573549530317008, "learning_rate": 1.979538693158572e-05, "loss": 0.604196310043335, "step": 820 }, { "epoch": 0.22309782608695652, "grad_norm": 1.062014824394039, "learning_rate": 1.979448167491924e-05, "loss": 0.5706455707550049, "step": 821 }, { "epoch": 0.2233695652173913, "grad_norm": 1.1112032095740374, "learning_rate": 1.97935744409264e-05, "loss": 0.6521550416946411, "step": 822 }, { "epoch": 0.2236413043478261, "grad_norm": 1.0442041411280463, "learning_rate": 1.9792665229790347e-05, "loss": 0.5736157894134521, "step": 823 }, { "epoch": 0.22391304347826088, "grad_norm": 1.1365711376716954, "learning_rate": 1.9791754041694645e-05, "loss": 0.5651278495788574, "step": 824 }, { "epoch": 0.22418478260869565, "grad_norm": 0.9015914271783283, "learning_rate": 1.979084087682323e-05, "loss": 0.48409855365753174, "step": 825 }, { "epoch": 0.22445652173913044, "grad_norm": 1.0019091372185993, "learning_rate": 1.9789925735360466e-05, "loss": 0.5449419021606445, "step": 826 }, { "epoch": 0.2247282608695652, "grad_norm": 1.112248714254809, "learning_rate": 1.9789008617491096e-05, "loss": 0.6577479839324951, "step": 827 }, { "epoch": 0.225, "grad_norm": 1.0544270104912659, "learning_rate": 1.9788089523400274e-05, "loss": 0.5039288401603699, "step": 828 }, { "epoch": 0.22527173913043477, "grad_norm": 1.0189011698159407, "learning_rate": 1.9787168453273546e-05, "loss": 0.5917250514030457, "step": 829 }, { "epoch": 0.22554347826086957, "grad_norm": 1.0944794296489981, "learning_rate": 1.978624540729686e-05, "loss": 0.5719192028045654, "step": 830 }, { "epoch": 0.22581521739130433, "grad_norm": 0.9173058903903097, "learning_rate": 1.978532038565656e-05, "loss": 0.5585107803344727, "step": 831 }, { "epoch": 0.22608695652173913, "grad_norm": 0.9664596218378271, "learning_rate": 1.9784393388539397e-05, "loss": 0.4846670627593994, "step": 832 }, { "epoch": 0.22635869565217392, "grad_norm": 1.064108840362223, "learning_rate": 1.9783464416132507e-05, "loss": 0.5285148620605469, "step": 833 }, { "epoch": 0.2266304347826087, "grad_norm": 5.064363981640104, "learning_rate": 1.9782533468623437e-05, "loss": 0.5852564573287964, "step": 834 }, { "epoch": 0.2269021739130435, "grad_norm": 0.9203127118432544, "learning_rate": 1.9781600546200126e-05, "loss": 0.5144131183624268, "step": 835 }, { "epoch": 0.22717391304347825, "grad_norm": 0.9246177092941241, "learning_rate": 1.9780665649050918e-05, "loss": 0.5013269186019897, "step": 836 }, { "epoch": 0.22744565217391305, "grad_norm": 0.9209920037378119, "learning_rate": 1.9779728777364548e-05, "loss": 0.4485984444618225, "step": 837 }, { "epoch": 0.22771739130434782, "grad_norm": 0.9929290606245263, "learning_rate": 1.977878993133015e-05, "loss": 0.49888670444488525, "step": 838 }, { "epoch": 0.2279891304347826, "grad_norm": 1.0213973279994268, "learning_rate": 1.9777849111137265e-05, "loss": 0.5353584289550781, "step": 839 }, { "epoch": 0.22826086956521738, "grad_norm": 0.9677810581681898, "learning_rate": 1.977690631697583e-05, "loss": 0.4760870039463043, "step": 840 }, { "epoch": 0.22853260869565217, "grad_norm": 1.1549817872547543, "learning_rate": 1.977596154903617e-05, "loss": 0.6459935307502747, "step": 841 }, { "epoch": 0.22880434782608697, "grad_norm": 0.904042901482507, "learning_rate": 1.9775014807509022e-05, "loss": 0.5330404043197632, "step": 842 }, { "epoch": 0.22907608695652174, "grad_norm": 1.0237852286911966, "learning_rate": 1.9774066092585517e-05, "loss": 0.5390040278434753, "step": 843 }, { "epoch": 0.22934782608695653, "grad_norm": 0.8606665788154052, "learning_rate": 1.9773115404457175e-05, "loss": 0.39365360140800476, "step": 844 }, { "epoch": 0.2296195652173913, "grad_norm": 0.8511699174348346, "learning_rate": 1.977216274331593e-05, "loss": 0.4453664720058441, "step": 845 }, { "epoch": 0.2298913043478261, "grad_norm": 1.183724873193321, "learning_rate": 1.9771208109354105e-05, "loss": 0.6909217834472656, "step": 846 }, { "epoch": 0.23016304347826086, "grad_norm": 1.0248867158706942, "learning_rate": 1.977025150276442e-05, "loss": 0.6005028486251831, "step": 847 }, { "epoch": 0.23043478260869565, "grad_norm": 0.9311519696420362, "learning_rate": 1.9769292923740006e-05, "loss": 0.4877510070800781, "step": 848 }, { "epoch": 0.23070652173913042, "grad_norm": 1.025425879782641, "learning_rate": 1.976833237247437e-05, "loss": 0.5673811435699463, "step": 849 }, { "epoch": 0.23097826086956522, "grad_norm": 0.824657013454397, "learning_rate": 1.9767369849161434e-05, "loss": 0.3940073251724243, "step": 850 }, { "epoch": 0.23125, "grad_norm": 0.8674647158836266, "learning_rate": 1.9766405353995517e-05, "loss": 0.523772120475769, "step": 851 }, { "epoch": 0.23152173913043478, "grad_norm": 0.9429900452110934, "learning_rate": 1.9765438887171327e-05, "loss": 0.4253033995628357, "step": 852 }, { "epoch": 0.23179347826086957, "grad_norm": 1.080031608922593, "learning_rate": 1.9764470448883987e-05, "loss": 0.5865961909294128, "step": 853 }, { "epoch": 0.23206521739130434, "grad_norm": 1.0666743629901059, "learning_rate": 1.9763500039328995e-05, "loss": 0.549787163734436, "step": 854 }, { "epoch": 0.23233695652173914, "grad_norm": 0.9187755955064982, "learning_rate": 1.9762527658702266e-05, "loss": 0.544360876083374, "step": 855 }, { "epoch": 0.2326086956521739, "grad_norm": 0.9757137146031155, "learning_rate": 1.9761553307200106e-05, "loss": 0.46080341935157776, "step": 856 }, { "epoch": 0.2328804347826087, "grad_norm": 0.9483476980456197, "learning_rate": 1.976057698501921e-05, "loss": 0.5306115746498108, "step": 857 }, { "epoch": 0.23315217391304346, "grad_norm": 0.9114094957192236, "learning_rate": 1.975959869235669e-05, "loss": 0.5398223996162415, "step": 858 }, { "epoch": 0.23342391304347826, "grad_norm": 1.1699643195880058, "learning_rate": 1.975861842941004e-05, "loss": 0.6491219997406006, "step": 859 }, { "epoch": 0.23369565217391305, "grad_norm": 1.1531852422109596, "learning_rate": 1.975763619637716e-05, "loss": 0.5807901620864868, "step": 860 }, { "epoch": 0.23396739130434782, "grad_norm": 1.1454480735941144, "learning_rate": 1.975665199345634e-05, "loss": 0.6237281560897827, "step": 861 }, { "epoch": 0.23423913043478262, "grad_norm": 1.0171474488379058, "learning_rate": 1.975566582084628e-05, "loss": 0.5806249380111694, "step": 862 }, { "epoch": 0.23451086956521738, "grad_norm": 1.10735763697691, "learning_rate": 1.9754677678746064e-05, "loss": 0.5858713388442993, "step": 863 }, { "epoch": 0.23478260869565218, "grad_norm": 0.9954164556611762, "learning_rate": 1.975368756735518e-05, "loss": 0.48027271032333374, "step": 864 }, { "epoch": 0.23505434782608695, "grad_norm": 1.119016607415699, "learning_rate": 1.9752695486873516e-05, "loss": 0.5980277061462402, "step": 865 }, { "epoch": 0.23532608695652174, "grad_norm": 1.1013774242331744, "learning_rate": 1.9751701437501352e-05, "loss": 0.49659106135368347, "step": 866 }, { "epoch": 0.23559782608695654, "grad_norm": 1.0663696039004655, "learning_rate": 1.9750705419439376e-05, "loss": 0.5701220035552979, "step": 867 }, { "epoch": 0.2358695652173913, "grad_norm": 0.9724491484247492, "learning_rate": 1.9749707432888653e-05, "loss": 0.4510907828807831, "step": 868 }, { "epoch": 0.2361413043478261, "grad_norm": 1.1894081777164884, "learning_rate": 1.974870747805067e-05, "loss": 0.7239501476287842, "step": 869 }, { "epoch": 0.23641304347826086, "grad_norm": 0.978285210470095, "learning_rate": 1.9747705555127292e-05, "loss": 0.5346792936325073, "step": 870 }, { "epoch": 0.23668478260869566, "grad_norm": 0.8306606826974794, "learning_rate": 1.9746701664320796e-05, "loss": 0.4295327961444855, "step": 871 }, { "epoch": 0.23695652173913043, "grad_norm": 1.0647095197320353, "learning_rate": 1.974569580583384e-05, "loss": 0.6335508823394775, "step": 872 }, { "epoch": 0.23722826086956522, "grad_norm": 0.9020455767588381, "learning_rate": 1.9744687979869494e-05, "loss": 0.5434656143188477, "step": 873 }, { "epoch": 0.2375, "grad_norm": 1.1051062102999436, "learning_rate": 1.974367818663122e-05, "loss": 0.5725032687187195, "step": 874 }, { "epoch": 0.23777173913043478, "grad_norm": 0.9765849661777639, "learning_rate": 1.9742666426322877e-05, "loss": 0.5616930723190308, "step": 875 }, { "epoch": 0.23804347826086958, "grad_norm": 0.8999361567486271, "learning_rate": 1.9741652699148716e-05, "loss": 0.45830416679382324, "step": 876 }, { "epoch": 0.23831521739130435, "grad_norm": 0.9747807746311162, "learning_rate": 1.9740637005313397e-05, "loss": 0.4503819942474365, "step": 877 }, { "epoch": 0.23858695652173914, "grad_norm": 1.0152173085758787, "learning_rate": 1.9739619345021963e-05, "loss": 0.5499618053436279, "step": 878 }, { "epoch": 0.2388586956521739, "grad_norm": 1.1159838931003643, "learning_rate": 1.9738599718479866e-05, "loss": 0.6233243942260742, "step": 879 }, { "epoch": 0.2391304347826087, "grad_norm": 1.0953185929004248, "learning_rate": 1.973757812589295e-05, "loss": 0.5446100234985352, "step": 880 }, { "epoch": 0.23940217391304347, "grad_norm": 1.004656385200306, "learning_rate": 1.973655456746745e-05, "loss": 0.5553869009017944, "step": 881 }, { "epoch": 0.23967391304347826, "grad_norm": 0.9997750806876782, "learning_rate": 1.9735529043410012e-05, "loss": 0.4603230953216553, "step": 882 }, { "epoch": 0.23994565217391303, "grad_norm": 1.1216094007808015, "learning_rate": 1.9734501553927667e-05, "loss": 0.5678719878196716, "step": 883 }, { "epoch": 0.24021739130434783, "grad_norm": 1.0862530321547028, "learning_rate": 1.973347209922784e-05, "loss": 0.5300582051277161, "step": 884 }, { "epoch": 0.24048913043478262, "grad_norm": 1.155020558111412, "learning_rate": 1.973244067951837e-05, "loss": 0.635382890701294, "step": 885 }, { "epoch": 0.2407608695652174, "grad_norm": 1.246129578759267, "learning_rate": 1.973140729500747e-05, "loss": 0.7313992977142334, "step": 886 }, { "epoch": 0.24103260869565218, "grad_norm": 1.239079511748044, "learning_rate": 1.9730371945903773e-05, "loss": 0.6293343305587769, "step": 887 }, { "epoch": 0.24130434782608695, "grad_norm": 2.864364413799318, "learning_rate": 1.972933463241629e-05, "loss": 0.3971366882324219, "step": 888 }, { "epoch": 0.24157608695652175, "grad_norm": 1.1659304841948421, "learning_rate": 1.9728295354754436e-05, "loss": 0.5581392049789429, "step": 889 }, { "epoch": 0.2418478260869565, "grad_norm": 1.2226518822900867, "learning_rate": 1.9727254113128027e-05, "loss": 0.6344895362854004, "step": 890 }, { "epoch": 0.2421195652173913, "grad_norm": 1.1586953612670212, "learning_rate": 1.9726210907747264e-05, "loss": 0.7000829577445984, "step": 891 }, { "epoch": 0.24239130434782608, "grad_norm": 1.097023505346397, "learning_rate": 1.9725165738822753e-05, "loss": 0.5914867520332336, "step": 892 }, { "epoch": 0.24266304347826087, "grad_norm": 1.0749022567913127, "learning_rate": 1.97241186065655e-05, "loss": 0.5329209566116333, "step": 893 }, { "epoch": 0.24293478260869567, "grad_norm": 1.0505036596815756, "learning_rate": 1.9723069511186894e-05, "loss": 0.520919144153595, "step": 894 }, { "epoch": 0.24320652173913043, "grad_norm": 0.897710798052193, "learning_rate": 1.9722018452898733e-05, "loss": 0.5347065925598145, "step": 895 }, { "epoch": 0.24347826086956523, "grad_norm": 1.0161176385107877, "learning_rate": 1.9720965431913203e-05, "loss": 0.4599308967590332, "step": 896 }, { "epoch": 0.24375, "grad_norm": 1.2452168014500928, "learning_rate": 1.9719910448442893e-05, "loss": 0.7389886379241943, "step": 897 }, { "epoch": 0.2440217391304348, "grad_norm": 1.0856848689723677, "learning_rate": 1.9718853502700783e-05, "loss": 0.5593697428703308, "step": 898 }, { "epoch": 0.24429347826086956, "grad_norm": 0.9914554308721673, "learning_rate": 1.971779459490025e-05, "loss": 0.5666186809539795, "step": 899 }, { "epoch": 0.24456521739130435, "grad_norm": 0.893443811586562, "learning_rate": 1.9716733725255072e-05, "loss": 0.447317898273468, "step": 900 }, { "epoch": 0.24483695652173912, "grad_norm": 0.9529249939138643, "learning_rate": 1.9715670893979416e-05, "loss": 0.50403892993927, "step": 901 }, { "epoch": 0.2451086956521739, "grad_norm": 1.0612595799067135, "learning_rate": 1.971460610128785e-05, "loss": 0.5306737422943115, "step": 902 }, { "epoch": 0.2453804347826087, "grad_norm": 1.000986501468309, "learning_rate": 1.971353934739533e-05, "loss": 0.5530113577842712, "step": 903 }, { "epoch": 0.24565217391304348, "grad_norm": 0.8957920532570176, "learning_rate": 1.971247063251722e-05, "loss": 0.45061594247817993, "step": 904 }, { "epoch": 0.24592391304347827, "grad_norm": 1.0666723838325902, "learning_rate": 1.9711399956869278e-05, "loss": 0.5978577136993408, "step": 905 }, { "epoch": 0.24619565217391304, "grad_norm": 1.0257837171222504, "learning_rate": 1.9710327320667647e-05, "loss": 0.6059432029724121, "step": 906 }, { "epoch": 0.24646739130434783, "grad_norm": 0.9458039846404799, "learning_rate": 1.970925272412887e-05, "loss": 0.49051719903945923, "step": 907 }, { "epoch": 0.2467391304347826, "grad_norm": 0.9085310077077747, "learning_rate": 1.9708176167469896e-05, "loss": 0.49916934967041016, "step": 908 }, { "epoch": 0.2470108695652174, "grad_norm": 0.9019982398433912, "learning_rate": 1.9707097650908057e-05, "loss": 0.4909152388572693, "step": 909 }, { "epoch": 0.24728260869565216, "grad_norm": 1.0191059206916573, "learning_rate": 1.970601717466109e-05, "loss": 0.6091455817222595, "step": 910 }, { "epoch": 0.24755434782608696, "grad_norm": 1.019851967869918, "learning_rate": 1.970493473894712e-05, "loss": 0.5470601916313171, "step": 911 }, { "epoch": 0.24782608695652175, "grad_norm": 1.0217703701834788, "learning_rate": 1.970385034398467e-05, "loss": 0.5326637029647827, "step": 912 }, { "epoch": 0.24809782608695652, "grad_norm": 1.1007455352426134, "learning_rate": 1.970276398999266e-05, "loss": 0.6104944944381714, "step": 913 }, { "epoch": 0.2483695652173913, "grad_norm": 0.8567737458819596, "learning_rate": 1.970167567719041e-05, "loss": 0.3981108069419861, "step": 914 }, { "epoch": 0.24864130434782608, "grad_norm": 0.9759838507769981, "learning_rate": 1.9700585405797623e-05, "loss": 0.5148709416389465, "step": 915 }, { "epoch": 0.24891304347826088, "grad_norm": 1.0871767280783773, "learning_rate": 1.9699493176034413e-05, "loss": 0.5873815417289734, "step": 916 }, { "epoch": 0.24918478260869564, "grad_norm": 1.061500383483326, "learning_rate": 1.969839898812127e-05, "loss": 0.5565299987792969, "step": 917 }, { "epoch": 0.24945652173913044, "grad_norm": 0.992103576155016, "learning_rate": 1.96973028422791e-05, "loss": 0.4791942536830902, "step": 918 }, { "epoch": 0.2497282608695652, "grad_norm": 1.2144051564268885, "learning_rate": 1.969620473872919e-05, "loss": 0.6919578313827515, "step": 919 }, { "epoch": 0.25, "grad_norm": 0.9532502018954342, "learning_rate": 1.9695104677693234e-05, "loss": 0.5040838122367859, "step": 920 }, { "epoch": 0.25027173913043477, "grad_norm": 1.0236620427526733, "learning_rate": 1.9694002659393306e-05, "loss": 0.6297273635864258, "step": 921 }, { "epoch": 0.2505434782608696, "grad_norm": 1.0967048167018414, "learning_rate": 1.9692898684051886e-05, "loss": 0.5837522149085999, "step": 922 }, { "epoch": 0.25081521739130436, "grad_norm": 1.018637332901232, "learning_rate": 1.9691792751891847e-05, "loss": 0.5433405041694641, "step": 923 }, { "epoch": 0.2510869565217391, "grad_norm": 1.0227437595183022, "learning_rate": 1.9690684863136457e-05, "loss": 0.4929983913898468, "step": 924 }, { "epoch": 0.2513586956521739, "grad_norm": 1.0347737538069754, "learning_rate": 1.9689575018009374e-05, "loss": 0.5123528838157654, "step": 925 }, { "epoch": 0.2516304347826087, "grad_norm": 1.0884000935718696, "learning_rate": 1.968846321673466e-05, "loss": 0.6046772003173828, "step": 926 }, { "epoch": 0.2519021739130435, "grad_norm": 0.9744902192205591, "learning_rate": 1.9687349459536768e-05, "loss": 0.445651113986969, "step": 927 }, { "epoch": 0.25217391304347825, "grad_norm": 0.8913460225363697, "learning_rate": 1.968623374664054e-05, "loss": 0.4709536135196686, "step": 928 }, { "epoch": 0.25244565217391307, "grad_norm": 0.9918697663389959, "learning_rate": 1.9685116078271224e-05, "loss": 0.45329800248146057, "step": 929 }, { "epoch": 0.25271739130434784, "grad_norm": 1.1537619779804367, "learning_rate": 1.9683996454654454e-05, "loss": 0.6677160263061523, "step": 930 }, { "epoch": 0.2529891304347826, "grad_norm": 0.9238694679956136, "learning_rate": 1.9682874876016263e-05, "loss": 0.4532586336135864, "step": 931 }, { "epoch": 0.2532608695652174, "grad_norm": 1.1701835891103491, "learning_rate": 1.9681751342583073e-05, "loss": 0.6215417385101318, "step": 932 }, { "epoch": 0.2535326086956522, "grad_norm": 1.0202535696881156, "learning_rate": 1.968062585458171e-05, "loss": 0.5453399419784546, "step": 933 }, { "epoch": 0.25380434782608696, "grad_norm": 0.9777882791599996, "learning_rate": 1.9679498412239387e-05, "loss": 0.4979228973388672, "step": 934 }, { "epoch": 0.25407608695652173, "grad_norm": 1.1264365969954648, "learning_rate": 1.967836901578371e-05, "loss": 0.4547666907310486, "step": 935 }, { "epoch": 0.2543478260869565, "grad_norm": 0.9958914567768543, "learning_rate": 1.967723766544269e-05, "loss": 0.4927179217338562, "step": 936 }, { "epoch": 0.2546195652173913, "grad_norm": 1.2353758206775307, "learning_rate": 1.9676104361444724e-05, "loss": 0.6119142770767212, "step": 937 }, { "epoch": 0.2548913043478261, "grad_norm": 0.9350910971914592, "learning_rate": 1.9674969104018603e-05, "loss": 0.40137845277786255, "step": 938 }, { "epoch": 0.25516304347826085, "grad_norm": 1.0494983339994552, "learning_rate": 1.967383189339352e-05, "loss": 0.5785964727401733, "step": 939 }, { "epoch": 0.2554347826086957, "grad_norm": 1.0996089781912652, "learning_rate": 1.9672692729799044e-05, "loss": 0.6593730449676514, "step": 940 }, { "epoch": 0.25570652173913044, "grad_norm": 1.153954676302412, "learning_rate": 1.967155161346517e-05, "loss": 0.6802933216094971, "step": 941 }, { "epoch": 0.2559782608695652, "grad_norm": 1.0706655451840397, "learning_rate": 1.9670408544622255e-05, "loss": 0.5472448468208313, "step": 942 }, { "epoch": 0.25625, "grad_norm": 1.1535447483254986, "learning_rate": 1.9669263523501066e-05, "loss": 0.5704107284545898, "step": 943 }, { "epoch": 0.2565217391304348, "grad_norm": 1.5393861851981832, "learning_rate": 1.966811655033277e-05, "loss": 0.5648292303085327, "step": 944 }, { "epoch": 0.25679347826086957, "grad_norm": 1.0259134848071927, "learning_rate": 1.9666967625348907e-05, "loss": 0.5845821499824524, "step": 945 }, { "epoch": 0.25706521739130433, "grad_norm": 0.9899560384154796, "learning_rate": 1.966581674878143e-05, "loss": 0.5254038572311401, "step": 946 }, { "epoch": 0.25733695652173916, "grad_norm": 1.1010763669133368, "learning_rate": 1.9664663920862684e-05, "loss": 0.6097866296768188, "step": 947 }, { "epoch": 0.2576086956521739, "grad_norm": 1.1661528139948085, "learning_rate": 1.9663509141825398e-05, "loss": 0.6462708115577698, "step": 948 }, { "epoch": 0.2578804347826087, "grad_norm": 1.1541213113529996, "learning_rate": 1.9662352411902702e-05, "loss": 0.6072111129760742, "step": 949 }, { "epoch": 0.25815217391304346, "grad_norm": 0.9255233105529865, "learning_rate": 1.966119373132812e-05, "loss": 0.5267648696899414, "step": 950 }, { "epoch": 0.2584239130434783, "grad_norm": 0.9370063207787185, "learning_rate": 1.9660033100335564e-05, "loss": 0.5179708003997803, "step": 951 }, { "epoch": 0.25869565217391305, "grad_norm": 0.9047990248312899, "learning_rate": 1.9658870519159352e-05, "loss": 0.489890992641449, "step": 952 }, { "epoch": 0.2589673913043478, "grad_norm": 0.9379185169032633, "learning_rate": 1.9657705988034182e-05, "loss": 0.48089271783828735, "step": 953 }, { "epoch": 0.2592391304347826, "grad_norm": 0.9484298929474755, "learning_rate": 1.9656539507195154e-05, "loss": 0.5365242958068848, "step": 954 }, { "epoch": 0.2595108695652174, "grad_norm": 1.0588101989504182, "learning_rate": 1.965537107687776e-05, "loss": 0.5219511985778809, "step": 955 }, { "epoch": 0.2597826086956522, "grad_norm": 0.8989491492950062, "learning_rate": 1.965420069731788e-05, "loss": 0.4631813168525696, "step": 956 }, { "epoch": 0.26005434782608694, "grad_norm": 1.1585509067826862, "learning_rate": 1.9653028368751795e-05, "loss": 0.5320429801940918, "step": 957 }, { "epoch": 0.26032608695652176, "grad_norm": 1.0590076343163475, "learning_rate": 1.9651854091416175e-05, "loss": 0.5579915046691895, "step": 958 }, { "epoch": 0.26059782608695653, "grad_norm": 1.110272038417072, "learning_rate": 1.965067786554809e-05, "loss": 0.6139797568321228, "step": 959 }, { "epoch": 0.2608695652173913, "grad_norm": 1.0149653512988268, "learning_rate": 1.964949969138499e-05, "loss": 0.48350515961647034, "step": 960 }, { "epoch": 0.26114130434782606, "grad_norm": 1.1334797971839448, "learning_rate": 1.964831956916474e-05, "loss": 0.6473950147628784, "step": 961 }, { "epoch": 0.2614130434782609, "grad_norm": 1.018543846809459, "learning_rate": 1.964713749912557e-05, "loss": 0.5072996616363525, "step": 962 }, { "epoch": 0.26168478260869565, "grad_norm": 1.0415493322906029, "learning_rate": 1.9645953481506127e-05, "loss": 0.5160703659057617, "step": 963 }, { "epoch": 0.2619565217391304, "grad_norm": 1.0359186996176144, "learning_rate": 1.9644767516545443e-05, "loss": 0.5474288463592529, "step": 964 }, { "epoch": 0.26222826086956524, "grad_norm": 1.0957759881747364, "learning_rate": 1.9643579604482937e-05, "loss": 0.5606968402862549, "step": 965 }, { "epoch": 0.2625, "grad_norm": 1.0972874297133564, "learning_rate": 1.964238974555843e-05, "loss": 0.5821158289909363, "step": 966 }, { "epoch": 0.2627717391304348, "grad_norm": 1.0057737109945644, "learning_rate": 1.9641197940012136e-05, "loss": 0.5240830183029175, "step": 967 }, { "epoch": 0.26304347826086955, "grad_norm": 0.9655001283369895, "learning_rate": 1.9640004188084655e-05, "loss": 0.47109243273735046, "step": 968 }, { "epoch": 0.26331521739130437, "grad_norm": 0.9106642584830579, "learning_rate": 1.9638808490016984e-05, "loss": 0.4594990313053131, "step": 969 }, { "epoch": 0.26358695652173914, "grad_norm": 0.9313639403369943, "learning_rate": 1.963761084605051e-05, "loss": 0.4747525751590729, "step": 970 }, { "epoch": 0.2638586956521739, "grad_norm": 1.0929995340303604, "learning_rate": 1.963641125642702e-05, "loss": 0.6219510436058044, "step": 971 }, { "epoch": 0.26413043478260867, "grad_norm": 0.8919114179546019, "learning_rate": 1.9635209721388687e-05, "loss": 0.4374340772628784, "step": 972 }, { "epoch": 0.2644021739130435, "grad_norm": 0.9940957753789924, "learning_rate": 1.9634006241178078e-05, "loss": 0.4902147948741913, "step": 973 }, { "epoch": 0.26467391304347826, "grad_norm": 1.1141045788829305, "learning_rate": 1.9632800816038154e-05, "loss": 0.4273327589035034, "step": 974 }, { "epoch": 0.264945652173913, "grad_norm": 1.3464242998275873, "learning_rate": 1.963159344621227e-05, "loss": 0.5310487151145935, "step": 975 }, { "epoch": 0.26521739130434785, "grad_norm": 0.9065663302117841, "learning_rate": 1.9630384131944173e-05, "loss": 0.5803983211517334, "step": 976 }, { "epoch": 0.2654891304347826, "grad_norm": 0.9779683865277039, "learning_rate": 1.9629172873477995e-05, "loss": 0.5244983434677124, "step": 977 }, { "epoch": 0.2657608695652174, "grad_norm": 0.9254885390260819, "learning_rate": 1.962795967105827e-05, "loss": 0.4969303011894226, "step": 978 }, { "epoch": 0.26603260869565215, "grad_norm": 0.8852991561083968, "learning_rate": 1.962674452492993e-05, "loss": 0.3766111731529236, "step": 979 }, { "epoch": 0.266304347826087, "grad_norm": 0.8290957886791541, "learning_rate": 1.9625527435338273e-05, "loss": 0.39491817355155945, "step": 980 }, { "epoch": 0.26657608695652174, "grad_norm": 0.8505538159117024, "learning_rate": 1.9624308402529023e-05, "loss": 0.4106161594390869, "step": 981 }, { "epoch": 0.2668478260869565, "grad_norm": 0.9915870151588958, "learning_rate": 1.9623087426748273e-05, "loss": 0.4853821396827698, "step": 982 }, { "epoch": 0.26711956521739133, "grad_norm": 0.9496001946830719, "learning_rate": 1.9621864508242517e-05, "loss": 0.5309531688690186, "step": 983 }, { "epoch": 0.2673913043478261, "grad_norm": 0.845106811087502, "learning_rate": 1.9620639647258644e-05, "loss": 0.49304455518722534, "step": 984 }, { "epoch": 0.26766304347826086, "grad_norm": 1.146864114850883, "learning_rate": 1.961941284404392e-05, "loss": 0.4937363862991333, "step": 985 }, { "epoch": 0.26793478260869563, "grad_norm": 0.96214365788005, "learning_rate": 1.9618184098846025e-05, "loss": 0.5080981254577637, "step": 986 }, { "epoch": 0.26820652173913045, "grad_norm": 0.8676285290790536, "learning_rate": 1.9616953411913017e-05, "loss": 0.3991142809391022, "step": 987 }, { "epoch": 0.2684782608695652, "grad_norm": 1.107437226321011, "learning_rate": 1.9615720783493347e-05, "loss": 0.5871748924255371, "step": 988 }, { "epoch": 0.26875, "grad_norm": 1.337228197196201, "learning_rate": 1.961448621383586e-05, "loss": 0.5652681589126587, "step": 989 }, { "epoch": 0.26902173913043476, "grad_norm": 1.0403396679728438, "learning_rate": 1.96132497031898e-05, "loss": 0.5453842878341675, "step": 990 }, { "epoch": 0.2692934782608696, "grad_norm": 1.109021332393744, "learning_rate": 1.9612011251804784e-05, "loss": 0.6684587597846985, "step": 991 }, { "epoch": 0.26956521739130435, "grad_norm": 1.132895214800092, "learning_rate": 1.961077085993084e-05, "loss": 0.5380070805549622, "step": 992 }, { "epoch": 0.2698369565217391, "grad_norm": 0.9550817219705693, "learning_rate": 1.960952852781838e-05, "loss": 0.5057839155197144, "step": 993 }, { "epoch": 0.27010869565217394, "grad_norm": 1.1537238221966224, "learning_rate": 1.960828425571821e-05, "loss": 0.5510427951812744, "step": 994 }, { "epoch": 0.2703804347826087, "grad_norm": 1.2469694388303958, "learning_rate": 1.960703804388152e-05, "loss": 0.6728643774986267, "step": 995 }, { "epoch": 0.27065217391304347, "grad_norm": 0.8751309903514037, "learning_rate": 1.9605789892559902e-05, "loss": 0.42387622594833374, "step": 996 }, { "epoch": 0.27092391304347824, "grad_norm": 0.9121526799475799, "learning_rate": 1.960453980200533e-05, "loss": 0.46964210271835327, "step": 997 }, { "epoch": 0.27119565217391306, "grad_norm": 0.893792169883677, "learning_rate": 1.9603287772470182e-05, "loss": 0.4340979754924774, "step": 998 }, { "epoch": 0.2714673913043478, "grad_norm": 1.2502196989120926, "learning_rate": 1.9602033804207212e-05, "loss": 0.6454471349716187, "step": 999 }, { "epoch": 0.2717391304347826, "grad_norm": 0.9152708434089859, "learning_rate": 1.960077789746958e-05, "loss": 0.42075011134147644, "step": 1000 }, { "epoch": 0.2720108695652174, "grad_norm": 1.0968812225418718, "learning_rate": 1.9599520052510824e-05, "loss": 0.587673008441925, "step": 1001 }, { "epoch": 0.2722826086956522, "grad_norm": 1.0711536882185007, "learning_rate": 1.9598260269584884e-05, "loss": 0.5581825971603394, "step": 1002 }, { "epoch": 0.27255434782608695, "grad_norm": 0.8251188233175604, "learning_rate": 1.959699854894609e-05, "loss": 0.4453129768371582, "step": 1003 }, { "epoch": 0.2728260869565217, "grad_norm": 0.9660088857332942, "learning_rate": 1.959573489084915e-05, "loss": 0.4796198606491089, "step": 1004 }, { "epoch": 0.27309782608695654, "grad_norm": 0.9116748617249384, "learning_rate": 1.9594469295549184e-05, "loss": 0.4377306401729584, "step": 1005 }, { "epoch": 0.2733695652173913, "grad_norm": 1.0606368763621588, "learning_rate": 1.9593201763301687e-05, "loss": 0.5915591716766357, "step": 1006 }, { "epoch": 0.2736413043478261, "grad_norm": 1.089798573020981, "learning_rate": 1.9591932294362552e-05, "loss": 0.6057679653167725, "step": 1007 }, { "epoch": 0.27391304347826084, "grad_norm": 0.995467022779834, "learning_rate": 1.9590660888988064e-05, "loss": 0.4804958701133728, "step": 1008 }, { "epoch": 0.27418478260869567, "grad_norm": 0.9641613257067597, "learning_rate": 1.958938754743489e-05, "loss": 0.5348326563835144, "step": 1009 }, { "epoch": 0.27445652173913043, "grad_norm": 0.8470845775306577, "learning_rate": 1.95881122699601e-05, "loss": 0.424241840839386, "step": 1010 }, { "epoch": 0.2747282608695652, "grad_norm": 1.1183748057306884, "learning_rate": 1.9586835056821148e-05, "loss": 0.4969187378883362, "step": 1011 }, { "epoch": 0.275, "grad_norm": 1.1614481313575538, "learning_rate": 1.958555590827588e-05, "loss": 0.5763249397277832, "step": 1012 }, { "epoch": 0.2752717391304348, "grad_norm": 1.1071777084670062, "learning_rate": 1.958427482458253e-05, "loss": 0.5655546188354492, "step": 1013 }, { "epoch": 0.27554347826086956, "grad_norm": 0.9157822832117248, "learning_rate": 1.9582991805999728e-05, "loss": 0.4681561589241028, "step": 1014 }, { "epoch": 0.2758152173913043, "grad_norm": 1.1066315421920319, "learning_rate": 1.9581706852786492e-05, "loss": 0.5782893896102905, "step": 1015 }, { "epoch": 0.27608695652173915, "grad_norm": 1.0709791337554282, "learning_rate": 1.9580419965202227e-05, "loss": 0.5996848344802856, "step": 1016 }, { "epoch": 0.2763586956521739, "grad_norm": 0.9493703560687069, "learning_rate": 1.9579131143506736e-05, "loss": 0.4759516417980194, "step": 1017 }, { "epoch": 0.2766304347826087, "grad_norm": 1.0350190499421534, "learning_rate": 1.9577840387960206e-05, "loss": 0.5158092975616455, "step": 1018 }, { "epoch": 0.2769021739130435, "grad_norm": 1.05536014749471, "learning_rate": 1.9576547698823222e-05, "loss": 0.5323532819747925, "step": 1019 }, { "epoch": 0.27717391304347827, "grad_norm": 1.0564420845931788, "learning_rate": 1.9575253076356747e-05, "loss": 0.48594993352890015, "step": 1020 }, { "epoch": 0.27744565217391304, "grad_norm": 0.9269447958207823, "learning_rate": 1.9573956520822147e-05, "loss": 0.43229764699935913, "step": 1021 }, { "epoch": 0.2777173913043478, "grad_norm": 1.2212713561301156, "learning_rate": 1.9572658032481167e-05, "loss": 0.7002719640731812, "step": 1022 }, { "epoch": 0.2779891304347826, "grad_norm": 1.0301717417868443, "learning_rate": 1.9571357611595955e-05, "loss": 0.4728018641471863, "step": 1023 }, { "epoch": 0.2782608695652174, "grad_norm": 1.0356915625386205, "learning_rate": 1.9570055258429042e-05, "loss": 0.4849589467048645, "step": 1024 }, { "epoch": 0.27853260869565216, "grad_norm": 0.9617553016515195, "learning_rate": 1.956875097324334e-05, "loss": 0.4206826984882355, "step": 1025 }, { "epoch": 0.27880434782608693, "grad_norm": 0.9968847115747156, "learning_rate": 1.9567444756302168e-05, "loss": 0.4983908534049988, "step": 1026 }, { "epoch": 0.27907608695652175, "grad_norm": 1.157412778316096, "learning_rate": 1.9566136607869224e-05, "loss": 0.5953733325004578, "step": 1027 }, { "epoch": 0.2793478260869565, "grad_norm": 0.9090340942073663, "learning_rate": 1.9564826528208608e-05, "loss": 0.45384514331817627, "step": 1028 }, { "epoch": 0.2796195652173913, "grad_norm": 1.1595996872972754, "learning_rate": 1.9563514517584786e-05, "loss": 0.6327357292175293, "step": 1029 }, { "epoch": 0.2798913043478261, "grad_norm": 1.1473958612478607, "learning_rate": 1.9562200576262644e-05, "loss": 0.6725311279296875, "step": 1030 }, { "epoch": 0.2801630434782609, "grad_norm": 1.0756219878735154, "learning_rate": 1.9560884704507427e-05, "loss": 0.5830652713775635, "step": 1031 }, { "epoch": 0.28043478260869564, "grad_norm": 0.935668643699713, "learning_rate": 1.95595669025848e-05, "loss": 0.49286895990371704, "step": 1032 }, { "epoch": 0.2807065217391304, "grad_norm": 1.063921626883338, "learning_rate": 1.9558247170760796e-05, "loss": 0.550892174243927, "step": 1033 }, { "epoch": 0.28097826086956523, "grad_norm": 1.1245423891669706, "learning_rate": 1.9556925509301844e-05, "loss": 0.5986615419387817, "step": 1034 }, { "epoch": 0.28125, "grad_norm": 1.025705551082057, "learning_rate": 1.9555601918474764e-05, "loss": 0.5099952220916748, "step": 1035 }, { "epoch": 0.28152173913043477, "grad_norm": 1.083325411177539, "learning_rate": 1.9554276398546767e-05, "loss": 0.5633349418640137, "step": 1036 }, { "epoch": 0.2817934782608696, "grad_norm": 1.1171597059755272, "learning_rate": 1.9552948949785452e-05, "loss": 0.5262375473976135, "step": 1037 }, { "epoch": 0.28206521739130436, "grad_norm": 1.1392707757531544, "learning_rate": 1.95516195724588e-05, "loss": 0.6002553701400757, "step": 1038 }, { "epoch": 0.2823369565217391, "grad_norm": 1.114031680763513, "learning_rate": 1.9550288266835194e-05, "loss": 0.6028637290000916, "step": 1039 }, { "epoch": 0.2826086956521739, "grad_norm": 1.0765091971816172, "learning_rate": 1.9548955033183402e-05, "loss": 0.6051616668701172, "step": 1040 }, { "epoch": 0.2828804347826087, "grad_norm": 0.913468261219643, "learning_rate": 1.9547619871772575e-05, "loss": 0.4421160817146301, "step": 1041 }, { "epoch": 0.2831521739130435, "grad_norm": 1.1698957162860488, "learning_rate": 1.9546282782872255e-05, "loss": 0.6448404788970947, "step": 1042 }, { "epoch": 0.28342391304347825, "grad_norm": 0.9986030673916241, "learning_rate": 1.954494376675238e-05, "loss": 0.49722206592559814, "step": 1043 }, { "epoch": 0.28369565217391307, "grad_norm": 1.0850078773774037, "learning_rate": 1.9543602823683274e-05, "loss": 0.4945758581161499, "step": 1044 }, { "epoch": 0.28396739130434784, "grad_norm": 0.9443483353785247, "learning_rate": 1.9542259953935643e-05, "loss": 0.4294206500053406, "step": 1045 }, { "epoch": 0.2842391304347826, "grad_norm": 0.9712738376956744, "learning_rate": 1.9540915157780597e-05, "loss": 0.5481219291687012, "step": 1046 }, { "epoch": 0.2845108695652174, "grad_norm": 1.0770608954071166, "learning_rate": 1.9539568435489617e-05, "loss": 0.4887540936470032, "step": 1047 }, { "epoch": 0.2847826086956522, "grad_norm": 0.9936999599891759, "learning_rate": 1.9538219787334584e-05, "loss": 0.563505232334137, "step": 1048 }, { "epoch": 0.28505434782608696, "grad_norm": 1.0352234667210332, "learning_rate": 1.9536869213587768e-05, "loss": 0.5536519289016724, "step": 1049 }, { "epoch": 0.28532608695652173, "grad_norm": 1.1149569671976303, "learning_rate": 1.9535516714521825e-05, "loss": 0.61167973279953, "step": 1050 }, { "epoch": 0.2855978260869565, "grad_norm": 1.0424086824261867, "learning_rate": 1.9534162290409796e-05, "loss": 0.55906081199646, "step": 1051 }, { "epoch": 0.2858695652173913, "grad_norm": 0.9378048291095034, "learning_rate": 1.9532805941525118e-05, "loss": 0.49703288078308105, "step": 1052 }, { "epoch": 0.2861413043478261, "grad_norm": 0.9859903701198398, "learning_rate": 1.953144766814161e-05, "loss": 0.546784520149231, "step": 1053 }, { "epoch": 0.28641304347826085, "grad_norm": 0.8386882433568921, "learning_rate": 1.9530087470533483e-05, "loss": 0.4013711214065552, "step": 1054 }, { "epoch": 0.2866847826086957, "grad_norm": 1.0576359031028295, "learning_rate": 1.952872534897534e-05, "loss": 0.49968957901000977, "step": 1055 }, { "epoch": 0.28695652173913044, "grad_norm": 1.1045595630506926, "learning_rate": 1.9527361303742165e-05, "loss": 0.5379431247711182, "step": 1056 }, { "epoch": 0.2872282608695652, "grad_norm": 1.0417025210264206, "learning_rate": 1.9525995335109333e-05, "loss": 0.5336359143257141, "step": 1057 }, { "epoch": 0.2875, "grad_norm": 0.9718478899163852, "learning_rate": 1.952462744335261e-05, "loss": 0.4433165192604065, "step": 1058 }, { "epoch": 0.2877717391304348, "grad_norm": 0.9854347015588689, "learning_rate": 1.9523257628748148e-05, "loss": 0.5600234270095825, "step": 1059 }, { "epoch": 0.28804347826086957, "grad_norm": 0.8813364455000346, "learning_rate": 1.9521885891572487e-05, "loss": 0.4292687177658081, "step": 1060 }, { "epoch": 0.28831521739130433, "grad_norm": 1.0243578387888688, "learning_rate": 1.9520512232102552e-05, "loss": 0.5426907539367676, "step": 1061 }, { "epoch": 0.28858695652173916, "grad_norm": 0.8438227104422478, "learning_rate": 1.9519136650615666e-05, "loss": 0.3902336359024048, "step": 1062 }, { "epoch": 0.2888586956521739, "grad_norm": 1.0980531266890081, "learning_rate": 1.9517759147389533e-05, "loss": 0.6163575649261475, "step": 1063 }, { "epoch": 0.2891304347826087, "grad_norm": 0.9410337215993276, "learning_rate": 1.951637972270224e-05, "loss": 0.4400478005409241, "step": 1064 }, { "epoch": 0.28940217391304346, "grad_norm": 0.8672848788454866, "learning_rate": 1.951499837683227e-05, "loss": 0.4883461594581604, "step": 1065 }, { "epoch": 0.2896739130434783, "grad_norm": 1.0700279728402324, "learning_rate": 1.9513615110058498e-05, "loss": 0.5042959451675415, "step": 1066 }, { "epoch": 0.28994565217391305, "grad_norm": 1.0024842926606465, "learning_rate": 1.951222992266017e-05, "loss": 0.5659500956535339, "step": 1067 }, { "epoch": 0.2902173913043478, "grad_norm": 1.046946141364744, "learning_rate": 1.951084281491694e-05, "loss": 0.5351751446723938, "step": 1068 }, { "epoch": 0.2904891304347826, "grad_norm": 1.0033877206255044, "learning_rate": 1.9509453787108827e-05, "loss": 0.5868367552757263, "step": 1069 }, { "epoch": 0.2907608695652174, "grad_norm": 1.0321626366715366, "learning_rate": 1.950806283951626e-05, "loss": 0.5985850095748901, "step": 1070 }, { "epoch": 0.2910326086956522, "grad_norm": 1.0584479948650753, "learning_rate": 1.9506669972420047e-05, "loss": 0.531173586845398, "step": 1071 }, { "epoch": 0.29130434782608694, "grad_norm": 0.8716216334242337, "learning_rate": 1.9505275186101378e-05, "loss": 0.4659265875816345, "step": 1072 }, { "epoch": 0.29157608695652176, "grad_norm": 1.0364042452952136, "learning_rate": 1.9503878480841832e-05, "loss": 0.5234419107437134, "step": 1073 }, { "epoch": 0.29184782608695653, "grad_norm": 1.0919738666523422, "learning_rate": 1.9502479856923386e-05, "loss": 0.48657190799713135, "step": 1074 }, { "epoch": 0.2921195652173913, "grad_norm": 1.0279964901430116, "learning_rate": 1.950107931462839e-05, "loss": 0.5839653611183167, "step": 1075 }, { "epoch": 0.29239130434782606, "grad_norm": 0.9236923032092749, "learning_rate": 1.949967685423959e-05, "loss": 0.4489138424396515, "step": 1076 }, { "epoch": 0.2926630434782609, "grad_norm": 1.1981819746657973, "learning_rate": 1.9498272476040115e-05, "loss": 0.5152185559272766, "step": 1077 }, { "epoch": 0.29293478260869565, "grad_norm": 1.1129649751657575, "learning_rate": 1.949686618031349e-05, "loss": 0.6501061916351318, "step": 1078 }, { "epoch": 0.2932065217391304, "grad_norm": 1.0205129771567107, "learning_rate": 1.9495457967343615e-05, "loss": 0.5034385919570923, "step": 1079 }, { "epoch": 0.29347826086956524, "grad_norm": 1.0049316047146424, "learning_rate": 1.949404783741478e-05, "loss": 0.5674021244049072, "step": 1080 }, { "epoch": 0.29375, "grad_norm": 1.02461574926018, "learning_rate": 1.949263579081167e-05, "loss": 0.5881675481796265, "step": 1081 }, { "epoch": 0.2940217391304348, "grad_norm": 0.8018699594601355, "learning_rate": 1.9491221827819348e-05, "loss": 0.36658990383148193, "step": 1082 }, { "epoch": 0.29429347826086955, "grad_norm": 0.9880522045887206, "learning_rate": 1.948980594872327e-05, "loss": 0.48109278082847595, "step": 1083 }, { "epoch": 0.29456521739130437, "grad_norm": 1.1174580470018711, "learning_rate": 1.9488388153809272e-05, "loss": 0.6288527846336365, "step": 1084 }, { "epoch": 0.29483695652173914, "grad_norm": 0.8032560901308092, "learning_rate": 1.9486968443363585e-05, "loss": 0.3864895701408386, "step": 1085 }, { "epoch": 0.2951086956521739, "grad_norm": 0.9723885182281443, "learning_rate": 1.9485546817672814e-05, "loss": 0.5446127653121948, "step": 1086 }, { "epoch": 0.29538043478260867, "grad_norm": 1.0192636853264727, "learning_rate": 1.9484123277023974e-05, "loss": 0.5669041275978088, "step": 1087 }, { "epoch": 0.2956521739130435, "grad_norm": 1.0279808460171884, "learning_rate": 1.948269782170444e-05, "loss": 0.4897504150867462, "step": 1088 }, { "epoch": 0.29592391304347826, "grad_norm": 1.084542860187978, "learning_rate": 1.948127045200199e-05, "loss": 0.5691831111907959, "step": 1089 }, { "epoch": 0.296195652173913, "grad_norm": 0.9026248462689002, "learning_rate": 1.9479841168204777e-05, "loss": 0.41361239552497864, "step": 1090 }, { "epoch": 0.29646739130434785, "grad_norm": 0.9022298989466739, "learning_rate": 1.947840997060136e-05, "loss": 0.4843534231185913, "step": 1091 }, { "epoch": 0.2967391304347826, "grad_norm": 0.9753609320647362, "learning_rate": 1.947697685948066e-05, "loss": 0.4881162643432617, "step": 1092 }, { "epoch": 0.2970108695652174, "grad_norm": 0.9803906663170915, "learning_rate": 1.9475541835132e-05, "loss": 0.5042041540145874, "step": 1093 }, { "epoch": 0.29728260869565215, "grad_norm": 1.0552862877522249, "learning_rate": 1.9474104897845086e-05, "loss": 0.5818353891372681, "step": 1094 }, { "epoch": 0.297554347826087, "grad_norm": 1.0407549850394744, "learning_rate": 1.947266604791001e-05, "loss": 0.5100194215774536, "step": 1095 }, { "epoch": 0.29782608695652174, "grad_norm": 1.0574146059450762, "learning_rate": 1.947122528561725e-05, "loss": 0.637343168258667, "step": 1096 }, { "epoch": 0.2980978260869565, "grad_norm": 1.0301952608251115, "learning_rate": 1.9469782611257662e-05, "loss": 0.5265081524848938, "step": 1097 }, { "epoch": 0.29836956521739133, "grad_norm": 1.0777222660062782, "learning_rate": 1.9468338025122504e-05, "loss": 0.5461665391921997, "step": 1098 }, { "epoch": 0.2986413043478261, "grad_norm": 1.1562203509164983, "learning_rate": 1.9466891527503412e-05, "loss": 0.5875447988510132, "step": 1099 }, { "epoch": 0.29891304347826086, "grad_norm": 1.11606422212214, "learning_rate": 1.94654431186924e-05, "loss": 0.5775849223136902, "step": 1100 }, { "epoch": 0.29918478260869563, "grad_norm": 1.0653629124814077, "learning_rate": 1.9463992798981876e-05, "loss": 0.5583875179290771, "step": 1101 }, { "epoch": 0.29945652173913045, "grad_norm": 1.0362251982414585, "learning_rate": 1.9462540568664637e-05, "loss": 0.4897683262825012, "step": 1102 }, { "epoch": 0.2997282608695652, "grad_norm": 1.1654464216616232, "learning_rate": 1.9461086428033864e-05, "loss": 0.6175625920295715, "step": 1103 }, { "epoch": 0.3, "grad_norm": 1.00379818622099, "learning_rate": 1.945963037738312e-05, "loss": 0.5926756858825684, "step": 1104 }, { "epoch": 0.30027173913043476, "grad_norm": 1.0495567413450912, "learning_rate": 1.9458172417006347e-05, "loss": 0.5555200576782227, "step": 1105 }, { "epoch": 0.3005434782608696, "grad_norm": 0.9929347092818187, "learning_rate": 1.9456712547197893e-05, "loss": 0.5412147045135498, "step": 1106 }, { "epoch": 0.30081521739130435, "grad_norm": 0.997018537426278, "learning_rate": 1.9455250768252466e-05, "loss": 0.5072035789489746, "step": 1107 }, { "epoch": 0.3010869565217391, "grad_norm": 1.0404884343086236, "learning_rate": 1.9453787080465183e-05, "loss": 0.5900579690933228, "step": 1108 }, { "epoch": 0.30135869565217394, "grad_norm": 1.0797104465184988, "learning_rate": 1.9452321484131528e-05, "loss": 0.5774831771850586, "step": 1109 }, { "epoch": 0.3016304347826087, "grad_norm": 0.8495364834689844, "learning_rate": 1.9450853979547384e-05, "loss": 0.4894735813140869, "step": 1110 }, { "epoch": 0.30190217391304347, "grad_norm": 1.0222735925915116, "learning_rate": 1.944938456700901e-05, "loss": 0.5310328602790833, "step": 1111 }, { "epoch": 0.30217391304347824, "grad_norm": 1.0475964849872255, "learning_rate": 1.9447913246813057e-05, "loss": 0.5540717244148254, "step": 1112 }, { "epoch": 0.30244565217391306, "grad_norm": 1.1307599447055372, "learning_rate": 1.9446440019256556e-05, "loss": 0.6528768539428711, "step": 1113 }, { "epoch": 0.3027173913043478, "grad_norm": 1.0481481523882341, "learning_rate": 1.944496488463692e-05, "loss": 0.540511965751648, "step": 1114 }, { "epoch": 0.3029891304347826, "grad_norm": 1.1852648899852165, "learning_rate": 1.9443487843251954e-05, "loss": 0.6099753975868225, "step": 1115 }, { "epoch": 0.3032608695652174, "grad_norm": 1.0153693371122419, "learning_rate": 1.944200889539985e-05, "loss": 0.5093674063682556, "step": 1116 }, { "epoch": 0.3035326086956522, "grad_norm": 0.9410080984428572, "learning_rate": 1.9440528041379173e-05, "loss": 0.4906381368637085, "step": 1117 }, { "epoch": 0.30380434782608695, "grad_norm": 0.9319236006065218, "learning_rate": 1.943904528148889e-05, "loss": 0.4782184958457947, "step": 1118 }, { "epoch": 0.3040760869565217, "grad_norm": 1.0095032002765751, "learning_rate": 1.9437560616028335e-05, "loss": 0.4898466467857361, "step": 1119 }, { "epoch": 0.30434782608695654, "grad_norm": 0.9545004774007416, "learning_rate": 1.943607404529724e-05, "loss": 0.4729803502559662, "step": 1120 }, { "epoch": 0.3046195652173913, "grad_norm": 1.0244762342346583, "learning_rate": 1.943458556959571e-05, "loss": 0.5634140968322754, "step": 1121 }, { "epoch": 0.3048913043478261, "grad_norm": 1.0827366557354177, "learning_rate": 1.9433095189224245e-05, "loss": 0.5408792495727539, "step": 1122 }, { "epoch": 0.30516304347826084, "grad_norm": 0.9144273078401445, "learning_rate": 1.9431602904483727e-05, "loss": 0.4524080753326416, "step": 1123 }, { "epoch": 0.30543478260869567, "grad_norm": 1.0677965394793747, "learning_rate": 1.943010871567542e-05, "loss": 0.6555860042572021, "step": 1124 }, { "epoch": 0.30570652173913043, "grad_norm": 1.0574132459706729, "learning_rate": 1.9428612623100972e-05, "loss": 0.5257971286773682, "step": 1125 }, { "epoch": 0.3059782608695652, "grad_norm": 0.9176399917593913, "learning_rate": 1.9427114627062415e-05, "loss": 0.48015496134757996, "step": 1126 }, { "epoch": 0.30625, "grad_norm": 1.0002324235888984, "learning_rate": 1.942561472786217e-05, "loss": 0.45025020837783813, "step": 1127 }, { "epoch": 0.3065217391304348, "grad_norm": 0.9616621827765214, "learning_rate": 1.942411292580304e-05, "loss": 0.5157660245895386, "step": 1128 }, { "epoch": 0.30679347826086956, "grad_norm": 1.0097790947395608, "learning_rate": 1.9422609221188208e-05, "loss": 0.41483938694000244, "step": 1129 }, { "epoch": 0.3070652173913043, "grad_norm": 1.021150861493092, "learning_rate": 1.9421103614321244e-05, "loss": 0.6307631731033325, "step": 1130 }, { "epoch": 0.30733695652173915, "grad_norm": 1.1455317789708637, "learning_rate": 1.9419596105506107e-05, "loss": 0.6253122091293335, "step": 1131 }, { "epoch": 0.3076086956521739, "grad_norm": 0.7038427187791819, "learning_rate": 1.9418086695047133e-05, "loss": 0.2617134749889374, "step": 1132 }, { "epoch": 0.3078804347826087, "grad_norm": 0.9899491038950987, "learning_rate": 1.9416575383249047e-05, "loss": 0.5177235007286072, "step": 1133 }, { "epoch": 0.3081521739130435, "grad_norm": 0.9313061625936254, "learning_rate": 1.9415062170416946e-05, "loss": 0.47219640016555786, "step": 1134 }, { "epoch": 0.30842391304347827, "grad_norm": 1.012615658138092, "learning_rate": 1.941354705685633e-05, "loss": 0.5143924355506897, "step": 1135 }, { "epoch": 0.30869565217391304, "grad_norm": 0.8704310300612613, "learning_rate": 1.941203004287307e-05, "loss": 0.43221476674079895, "step": 1136 }, { "epoch": 0.3089673913043478, "grad_norm": 1.0345167116208887, "learning_rate": 1.941051112877342e-05, "loss": 0.5254142880439758, "step": 1137 }, { "epoch": 0.3092391304347826, "grad_norm": 1.056089153299539, "learning_rate": 1.9408990314864023e-05, "loss": 0.5630410313606262, "step": 1138 }, { "epoch": 0.3095108695652174, "grad_norm": 1.1431012795230406, "learning_rate": 1.9407467601451902e-05, "loss": 0.5625146627426147, "step": 1139 }, { "epoch": 0.30978260869565216, "grad_norm": 1.0148881368268963, "learning_rate": 1.9405942988844472e-05, "loss": 0.5193542838096619, "step": 1140 }, { "epoch": 0.31005434782608693, "grad_norm": 1.0315061422550476, "learning_rate": 1.9404416477349514e-05, "loss": 0.5353635549545288, "step": 1141 }, { "epoch": 0.31032608695652175, "grad_norm": 1.3865770206075372, "learning_rate": 1.9402888067275213e-05, "loss": 0.6115107536315918, "step": 1142 }, { "epoch": 0.3105978260869565, "grad_norm": 1.0510034738466931, "learning_rate": 1.9401357758930116e-05, "loss": 0.5173114538192749, "step": 1143 }, { "epoch": 0.3108695652173913, "grad_norm": 1.0668071870374483, "learning_rate": 1.9399825552623172e-05, "loss": 0.4866836667060852, "step": 1144 }, { "epoch": 0.3111413043478261, "grad_norm": 1.107960626120672, "learning_rate": 1.9398291448663702e-05, "loss": 0.5644721984863281, "step": 1145 }, { "epoch": 0.3114130434782609, "grad_norm": 0.927087571148295, "learning_rate": 1.9396755447361417e-05, "loss": 0.5516999959945679, "step": 1146 }, { "epoch": 0.31168478260869564, "grad_norm": 1.1070434723122697, "learning_rate": 1.9395217549026404e-05, "loss": 0.5999361276626587, "step": 1147 }, { "epoch": 0.3119565217391304, "grad_norm": 1.2426569815490676, "learning_rate": 1.9393677753969137e-05, "loss": 0.6722252368927002, "step": 1148 }, { "epoch": 0.31222826086956523, "grad_norm": 1.047183685938822, "learning_rate": 1.9392136062500473e-05, "loss": 0.5422588586807251, "step": 1149 }, { "epoch": 0.3125, "grad_norm": 1.075401820823017, "learning_rate": 1.9390592474931653e-05, "loss": 0.5801819562911987, "step": 1150 }, { "epoch": 0.31277173913043477, "grad_norm": 1.0404448365631123, "learning_rate": 1.9389046991574298e-05, "loss": 0.5755735039710999, "step": 1151 }, { "epoch": 0.3130434782608696, "grad_norm": 1.0770915451522414, "learning_rate": 1.938749961274041e-05, "loss": 0.5958157777786255, "step": 1152 }, { "epoch": 0.31331521739130436, "grad_norm": 1.0468559668899953, "learning_rate": 1.9385950338742377e-05, "loss": 0.4937191307544708, "step": 1153 }, { "epoch": 0.3135869565217391, "grad_norm": 1.0834939911196741, "learning_rate": 1.9384399169892974e-05, "loss": 0.5095995664596558, "step": 1154 }, { "epoch": 0.3138586956521739, "grad_norm": 1.0873550942454229, "learning_rate": 1.938284610650535e-05, "loss": 0.5341449975967407, "step": 1155 }, { "epoch": 0.3141304347826087, "grad_norm": 0.816731740947837, "learning_rate": 1.938129114889304e-05, "loss": 0.4204268753528595, "step": 1156 }, { "epoch": 0.3144021739130435, "grad_norm": 0.9144009505257444, "learning_rate": 1.9379734297369957e-05, "loss": 0.4598162770271301, "step": 1157 }, { "epoch": 0.31467391304347825, "grad_norm": 1.4314561691360788, "learning_rate": 1.937817555225041e-05, "loss": 0.45089930295944214, "step": 1158 }, { "epoch": 0.31494565217391307, "grad_norm": 0.9705878804003462, "learning_rate": 1.9376614913849076e-05, "loss": 0.48113852739334106, "step": 1159 }, { "epoch": 0.31521739130434784, "grad_norm": 0.990276520045308, "learning_rate": 1.937505238248102e-05, "loss": 0.5701358318328857, "step": 1160 }, { "epoch": 0.3154891304347826, "grad_norm": 0.9142826917581747, "learning_rate": 1.9373487958461688e-05, "loss": 0.468792200088501, "step": 1161 }, { "epoch": 0.3157608695652174, "grad_norm": 0.8426259436213583, "learning_rate": 1.937192164210691e-05, "loss": 0.35192281007766724, "step": 1162 }, { "epoch": 0.3160326086956522, "grad_norm": 0.9754891688012416, "learning_rate": 1.9370353433732892e-05, "loss": 0.4471893608570099, "step": 1163 }, { "epoch": 0.31630434782608696, "grad_norm": 1.0848499449216888, "learning_rate": 1.9368783333656232e-05, "loss": 0.5794346332550049, "step": 1164 }, { "epoch": 0.31657608695652173, "grad_norm": 0.8596625188478692, "learning_rate": 1.9367211342193904e-05, "loss": 0.47765469551086426, "step": 1165 }, { "epoch": 0.3168478260869565, "grad_norm": 0.9037615565706135, "learning_rate": 1.936563745966326e-05, "loss": 0.48599815368652344, "step": 1166 }, { "epoch": 0.3171195652173913, "grad_norm": 1.0489023442064125, "learning_rate": 1.9364061686382042e-05, "loss": 0.568246603012085, "step": 1167 }, { "epoch": 0.3173913043478261, "grad_norm": 0.8118145891347474, "learning_rate": 1.936248402266837e-05, "loss": 0.387786865234375, "step": 1168 }, { "epoch": 0.31766304347826085, "grad_norm": 1.0114207454682878, "learning_rate": 1.936090446884074e-05, "loss": 0.49537017941474915, "step": 1169 }, { "epoch": 0.3179347826086957, "grad_norm": 1.0720157718945684, "learning_rate": 1.935932302521804e-05, "loss": 0.5171656608581543, "step": 1170 }, { "epoch": 0.31820652173913044, "grad_norm": 0.919371900715655, "learning_rate": 1.935773969211953e-05, "loss": 0.5002716183662415, "step": 1171 }, { "epoch": 0.3184782608695652, "grad_norm": 1.146957347117743, "learning_rate": 1.9356154469864867e-05, "loss": 0.5210121273994446, "step": 1172 }, { "epoch": 0.31875, "grad_norm": 1.0214411697178136, "learning_rate": 1.9354567358774065e-05, "loss": 0.5589213371276855, "step": 1173 }, { "epoch": 0.3190217391304348, "grad_norm": 0.9878989633413144, "learning_rate": 1.935297835916754e-05, "loss": 0.4638991355895996, "step": 1174 }, { "epoch": 0.31929347826086957, "grad_norm": 0.9351642611709431, "learning_rate": 1.935138747136608e-05, "loss": 0.49223631620407104, "step": 1175 }, { "epoch": 0.31956521739130433, "grad_norm": 1.0425255976096617, "learning_rate": 1.9349794695690856e-05, "loss": 0.5245919227600098, "step": 1176 }, { "epoch": 0.31983695652173916, "grad_norm": 0.9203176873408684, "learning_rate": 1.934820003246342e-05, "loss": 0.44296517968177795, "step": 1177 }, { "epoch": 0.3201086956521739, "grad_norm": 0.8492276319091353, "learning_rate": 1.9346603482005705e-05, "loss": 0.3857441544532776, "step": 1178 }, { "epoch": 0.3203804347826087, "grad_norm": 0.99410131542128, "learning_rate": 1.9345005044640028e-05, "loss": 0.4963721036911011, "step": 1179 }, { "epoch": 0.32065217391304346, "grad_norm": 1.0198311832274727, "learning_rate": 1.934340472068908e-05, "loss": 0.49257948994636536, "step": 1180 }, { "epoch": 0.3209239130434783, "grad_norm": 0.9754487462754681, "learning_rate": 1.934180251047594e-05, "loss": 0.5434215068817139, "step": 1181 }, { "epoch": 0.32119565217391305, "grad_norm": 0.926815907743611, "learning_rate": 1.934019841432406e-05, "loss": 0.48296058177948, "step": 1182 }, { "epoch": 0.3214673913043478, "grad_norm": 0.8203856888918258, "learning_rate": 1.933859243255729e-05, "loss": 0.4254078269004822, "step": 1183 }, { "epoch": 0.3217391304347826, "grad_norm": 0.8626031950128743, "learning_rate": 1.9336984565499834e-05, "loss": 0.3843669593334198, "step": 1184 }, { "epoch": 0.3220108695652174, "grad_norm": 1.2305849135768014, "learning_rate": 1.93353748134763e-05, "loss": 0.6139007806777954, "step": 1185 }, { "epoch": 0.3222826086956522, "grad_norm": 1.1090925201212554, "learning_rate": 1.9333763176811663e-05, "loss": 0.4970349073410034, "step": 1186 }, { "epoch": 0.32255434782608694, "grad_norm": 0.9417977236104862, "learning_rate": 1.9332149655831287e-05, "loss": 0.5105458498001099, "step": 1187 }, { "epoch": 0.32282608695652176, "grad_norm": 1.0065534657922781, "learning_rate": 1.9330534250860907e-05, "loss": 0.4959058463573456, "step": 1188 }, { "epoch": 0.32309782608695653, "grad_norm": 0.8100861604175023, "learning_rate": 1.9328916962226644e-05, "loss": 0.4409869909286499, "step": 1189 }, { "epoch": 0.3233695652173913, "grad_norm": 1.0330827109988852, "learning_rate": 1.9327297790255003e-05, "loss": 0.4832521975040436, "step": 1190 }, { "epoch": 0.32364130434782606, "grad_norm": 1.1296811931502957, "learning_rate": 1.9325676735272864e-05, "loss": 0.5867997407913208, "step": 1191 }, { "epoch": 0.3239130434782609, "grad_norm": 1.0421190697546496, "learning_rate": 1.932405379760749e-05, "loss": 0.5412529706954956, "step": 1192 }, { "epoch": 0.32418478260869565, "grad_norm": 0.9563587028730661, "learning_rate": 1.9322428977586515e-05, "loss": 0.482028603553772, "step": 1193 }, { "epoch": 0.3244565217391304, "grad_norm": 1.14107765509463, "learning_rate": 1.932080227553797e-05, "loss": 0.6424636840820312, "step": 1194 }, { "epoch": 0.32472826086956524, "grad_norm": 0.9957458429631053, "learning_rate": 1.931917369179025e-05, "loss": 0.4922977685928345, "step": 1195 }, { "epoch": 0.325, "grad_norm": 0.8288729823979384, "learning_rate": 1.9317543226672136e-05, "loss": 0.4142773151397705, "step": 1196 }, { "epoch": 0.3252717391304348, "grad_norm": 1.006125662370245, "learning_rate": 1.9315910880512792e-05, "loss": 0.5130233764648438, "step": 1197 }, { "epoch": 0.32554347826086955, "grad_norm": 1.073820658269327, "learning_rate": 1.9314276653641756e-05, "loss": 0.6197561025619507, "step": 1198 }, { "epoch": 0.32581521739130437, "grad_norm": 1.1006234126402614, "learning_rate": 1.9312640546388955e-05, "loss": 0.6110675930976868, "step": 1199 }, { "epoch": 0.32608695652173914, "grad_norm": 1.0782167063027996, "learning_rate": 1.931100255908468e-05, "loss": 0.48054373264312744, "step": 1200 }, { "epoch": 0.3263586956521739, "grad_norm": 1.102170585108467, "learning_rate": 1.9309362692059617e-05, "loss": 0.5965652465820312, "step": 1201 }, { "epoch": 0.32663043478260867, "grad_norm": 1.0797223035947618, "learning_rate": 1.930772094564482e-05, "loss": 0.4914613962173462, "step": 1202 }, { "epoch": 0.3269021739130435, "grad_norm": 0.9800148055606492, "learning_rate": 1.9306077320171734e-05, "loss": 0.5281360745429993, "step": 1203 }, { "epoch": 0.32717391304347826, "grad_norm": 1.1475531530570942, "learning_rate": 1.9304431815972174e-05, "loss": 0.5857018232345581, "step": 1204 }, { "epoch": 0.327445652173913, "grad_norm": 0.8923571389286437, "learning_rate": 1.9302784433378333e-05, "loss": 0.4631589949131012, "step": 1205 }, { "epoch": 0.32771739130434785, "grad_norm": 1.0193648745741444, "learning_rate": 1.9301135172722794e-05, "loss": 0.5426203012466431, "step": 1206 }, { "epoch": 0.3279891304347826, "grad_norm": 0.911905334358356, "learning_rate": 1.9299484034338506e-05, "loss": 0.4681814908981323, "step": 1207 }, { "epoch": 0.3282608695652174, "grad_norm": 1.003884610985637, "learning_rate": 1.929783101855881e-05, "loss": 0.49008792638778687, "step": 1208 }, { "epoch": 0.32853260869565215, "grad_norm": 1.1596317617347747, "learning_rate": 1.9296176125717414e-05, "loss": 0.5943505764007568, "step": 1209 }, { "epoch": 0.328804347826087, "grad_norm": 0.933169762275391, "learning_rate": 1.9294519356148415e-05, "loss": 0.47558271884918213, "step": 1210 }, { "epoch": 0.32907608695652174, "grad_norm": 1.0588943675369977, "learning_rate": 1.9292860710186284e-05, "loss": 0.48148393630981445, "step": 1211 }, { "epoch": 0.3293478260869565, "grad_norm": 0.9797539194259959, "learning_rate": 1.9291200188165865e-05, "loss": 0.5432913303375244, "step": 1212 }, { "epoch": 0.32961956521739133, "grad_norm": 1.0116601017563802, "learning_rate": 1.9289537790422395e-05, "loss": 0.48028674721717834, "step": 1213 }, { "epoch": 0.3298913043478261, "grad_norm": 1.0153421826149431, "learning_rate": 1.928787351729148e-05, "loss": 0.6434128284454346, "step": 1214 }, { "epoch": 0.33016304347826086, "grad_norm": 1.0526025489086501, "learning_rate": 1.92862073691091e-05, "loss": 0.5268206596374512, "step": 1215 }, { "epoch": 0.33043478260869563, "grad_norm": 1.0697105471852106, "learning_rate": 1.9284539346211627e-05, "loss": 0.6575846672058105, "step": 1216 }, { "epoch": 0.33070652173913045, "grad_norm": 0.8672966953029835, "learning_rate": 1.92828694489358e-05, "loss": 0.49151846766471863, "step": 1217 }, { "epoch": 0.3309782608695652, "grad_norm": 0.931862182161919, "learning_rate": 1.9281197677618744e-05, "loss": 0.4932265877723694, "step": 1218 }, { "epoch": 0.33125, "grad_norm": 1.108506480599207, "learning_rate": 1.9279524032597958e-05, "loss": 0.4798128008842468, "step": 1219 }, { "epoch": 0.33152173913043476, "grad_norm": 1.028855895646761, "learning_rate": 1.927784851421132e-05, "loss": 0.6035632491111755, "step": 1220 }, { "epoch": 0.3317934782608696, "grad_norm": 1.0078942656305314, "learning_rate": 1.927617112279708e-05, "loss": 0.4856202006340027, "step": 1221 }, { "epoch": 0.33206521739130435, "grad_norm": 1.0945293120757706, "learning_rate": 1.9274491858693882e-05, "loss": 0.5225082635879517, "step": 1222 }, { "epoch": 0.3323369565217391, "grad_norm": 1.0548925279367305, "learning_rate": 1.927281072224074e-05, "loss": 0.5453101396560669, "step": 1223 }, { "epoch": 0.33260869565217394, "grad_norm": 0.9073661307389945, "learning_rate": 1.9271127713777033e-05, "loss": 0.4492568373680115, "step": 1224 }, { "epoch": 0.3328804347826087, "grad_norm": 1.0390831807598715, "learning_rate": 1.926944283364254e-05, "loss": 0.5937210321426392, "step": 1225 }, { "epoch": 0.33315217391304347, "grad_norm": 1.0294629587269586, "learning_rate": 1.9267756082177404e-05, "loss": 0.5114114880561829, "step": 1226 }, { "epoch": 0.33342391304347824, "grad_norm": 0.9577402656615218, "learning_rate": 1.926606745972215e-05, "loss": 0.4465279281139374, "step": 1227 }, { "epoch": 0.33369565217391306, "grad_norm": 0.9968318903919553, "learning_rate": 1.9264376966617674e-05, "loss": 0.5528464317321777, "step": 1228 }, { "epoch": 0.3339673913043478, "grad_norm": 1.0560277170090986, "learning_rate": 1.9262684603205264e-05, "loss": 0.616241455078125, "step": 1229 }, { "epoch": 0.3342391304347826, "grad_norm": 1.1033969355039286, "learning_rate": 1.9260990369826572e-05, "loss": 0.6742187738418579, "step": 1230 }, { "epoch": 0.3345108695652174, "grad_norm": 0.9305683434402415, "learning_rate": 1.925929426682364e-05, "loss": 0.40262284874916077, "step": 1231 }, { "epoch": 0.3347826086956522, "grad_norm": 1.0785250281906404, "learning_rate": 1.9257596294538868e-05, "loss": 0.5488030910491943, "step": 1232 }, { "epoch": 0.33505434782608695, "grad_norm": 1.0060796559945346, "learning_rate": 1.9255896453315054e-05, "loss": 0.539901852607727, "step": 1233 }, { "epoch": 0.3353260869565217, "grad_norm": 0.9086534817192319, "learning_rate": 1.9254194743495363e-05, "loss": 0.43766283988952637, "step": 1234 }, { "epoch": 0.33559782608695654, "grad_norm": 1.2123634307141993, "learning_rate": 1.925249116542334e-05, "loss": 0.6282012462615967, "step": 1235 }, { "epoch": 0.3358695652173913, "grad_norm": 0.9472647035338833, "learning_rate": 1.9250785719442905e-05, "loss": 0.46335986256599426, "step": 1236 }, { "epoch": 0.3361413043478261, "grad_norm": 0.9588142031387018, "learning_rate": 1.9249078405898353e-05, "loss": 0.46744805574417114, "step": 1237 }, { "epoch": 0.33641304347826084, "grad_norm": 1.0322035367393834, "learning_rate": 1.9247369225134368e-05, "loss": 0.5234630107879639, "step": 1238 }, { "epoch": 0.33668478260869567, "grad_norm": 0.896865791125244, "learning_rate": 1.924565817749599e-05, "loss": 0.40379059314727783, "step": 1239 }, { "epoch": 0.33695652173913043, "grad_norm": 0.9991597926761718, "learning_rate": 1.924394526332866e-05, "loss": 0.5394446849822998, "step": 1240 }, { "epoch": 0.3372282608695652, "grad_norm": 0.8948954483985988, "learning_rate": 1.924223048297818e-05, "loss": 0.511469841003418, "step": 1241 }, { "epoch": 0.3375, "grad_norm": 1.1673604890873799, "learning_rate": 1.924051383679073e-05, "loss": 0.526949942111969, "step": 1242 }, { "epoch": 0.3377717391304348, "grad_norm": 0.9938871933620657, "learning_rate": 1.9238795325112867e-05, "loss": 0.48447149991989136, "step": 1243 }, { "epoch": 0.33804347826086956, "grad_norm": 1.0995240445458037, "learning_rate": 1.9237074948291536e-05, "loss": 0.5887590646743774, "step": 1244 }, { "epoch": 0.3383152173913043, "grad_norm": 1.077805069114864, "learning_rate": 1.9235352706674046e-05, "loss": 0.4921104311943054, "step": 1245 }, { "epoch": 0.33858695652173915, "grad_norm": 1.057647299270578, "learning_rate": 1.923362860060808e-05, "loss": 0.5258573293685913, "step": 1246 }, { "epoch": 0.3388586956521739, "grad_norm": 1.2370647883152726, "learning_rate": 1.9231902630441713e-05, "loss": 0.6188702583312988, "step": 1247 }, { "epoch": 0.3391304347826087, "grad_norm": 1.0572763715409077, "learning_rate": 1.923017479652338e-05, "loss": 0.5833462476730347, "step": 1248 }, { "epoch": 0.3394021739130435, "grad_norm": 1.074339698958743, "learning_rate": 1.92284450992019e-05, "loss": 0.47408372163772583, "step": 1249 }, { "epoch": 0.33967391304347827, "grad_norm": 1.069477444551191, "learning_rate": 1.922671353882647e-05, "loss": 0.5167064070701599, "step": 1250 }, { "epoch": 0.33994565217391304, "grad_norm": 1.0113119282479661, "learning_rate": 1.9224980115746658e-05, "loss": 0.6190445423126221, "step": 1251 }, { "epoch": 0.3402173913043478, "grad_norm": 0.9753254405222176, "learning_rate": 1.9223244830312408e-05, "loss": 0.501988410949707, "step": 1252 }, { "epoch": 0.3404891304347826, "grad_norm": 0.7953909563558057, "learning_rate": 1.9221507682874047e-05, "loss": 0.38844624161720276, "step": 1253 }, { "epoch": 0.3407608695652174, "grad_norm": 1.079337841573146, "learning_rate": 1.9219768673782273e-05, "loss": 0.5574384927749634, "step": 1254 }, { "epoch": 0.34103260869565216, "grad_norm": 1.039526268484624, "learning_rate": 1.9218027803388157e-05, "loss": 0.5582293272018433, "step": 1255 }, { "epoch": 0.34130434782608693, "grad_norm": 1.0728312163708178, "learning_rate": 1.921628507204315e-05, "loss": 0.5574272871017456, "step": 1256 }, { "epoch": 0.34157608695652175, "grad_norm": 0.9122154473378936, "learning_rate": 1.921454048009908e-05, "loss": 0.47853031754493713, "step": 1257 }, { "epoch": 0.3418478260869565, "grad_norm": 1.0293658277073328, "learning_rate": 1.9212794027908144e-05, "loss": 0.5948014259338379, "step": 1258 }, { "epoch": 0.3421195652173913, "grad_norm": 1.00244076112815, "learning_rate": 1.921104571582292e-05, "loss": 0.5263457894325256, "step": 1259 }, { "epoch": 0.3423913043478261, "grad_norm": 0.9824117607455571, "learning_rate": 1.920929554419636e-05, "loss": 0.5043308138847351, "step": 1260 }, { "epoch": 0.3426630434782609, "grad_norm": 1.0223526865509138, "learning_rate": 1.9207543513381793e-05, "loss": 0.504561722278595, "step": 1261 }, { "epoch": 0.34293478260869564, "grad_norm": 0.960495316588045, "learning_rate": 1.9205789623732923e-05, "loss": 0.4791363477706909, "step": 1262 }, { "epoch": 0.3432065217391304, "grad_norm": 0.9018821511559403, "learning_rate": 1.9204033875603824e-05, "loss": 0.476654589176178, "step": 1263 }, { "epoch": 0.34347826086956523, "grad_norm": 0.9824425026602508, "learning_rate": 1.9202276269348957e-05, "loss": 0.4218780994415283, "step": 1264 }, { "epoch": 0.34375, "grad_norm": 1.0157621843861522, "learning_rate": 1.920051680532314e-05, "loss": 0.4959160089492798, "step": 1265 }, { "epoch": 0.34402173913043477, "grad_norm": 0.9275808594034036, "learning_rate": 1.9198755483881585e-05, "loss": 0.4820437729358673, "step": 1266 }, { "epoch": 0.3442934782608696, "grad_norm": 1.004280687482042, "learning_rate": 1.9196992305379868e-05, "loss": 0.5039849877357483, "step": 1267 }, { "epoch": 0.34456521739130436, "grad_norm": 1.0346143682138638, "learning_rate": 1.9195227270173938e-05, "loss": 0.4967840313911438, "step": 1268 }, { "epoch": 0.3448369565217391, "grad_norm": 0.8668943091205658, "learning_rate": 1.9193460378620128e-05, "loss": 0.408751904964447, "step": 1269 }, { "epoch": 0.3451086956521739, "grad_norm": 0.9460495068533591, "learning_rate": 1.919169163107514e-05, "loss": 0.4249446988105774, "step": 1270 }, { "epoch": 0.3453804347826087, "grad_norm": 1.0690644555548618, "learning_rate": 1.9189921027896056e-05, "loss": 0.542231559753418, "step": 1271 }, { "epoch": 0.3456521739130435, "grad_norm": 1.1726258751348577, "learning_rate": 1.918814856944032e-05, "loss": 0.5614209175109863, "step": 1272 }, { "epoch": 0.34592391304347825, "grad_norm": 1.077276122004747, "learning_rate": 1.9186374256065763e-05, "loss": 0.5788774490356445, "step": 1273 }, { "epoch": 0.34619565217391307, "grad_norm": 1.0040825030198184, "learning_rate": 1.9184598088130587e-05, "loss": 0.5114722847938538, "step": 1274 }, { "epoch": 0.34646739130434784, "grad_norm": 0.8724019722266386, "learning_rate": 1.9182820065993367e-05, "loss": 0.43507128953933716, "step": 1275 }, { "epoch": 0.3467391304347826, "grad_norm": 1.0687036287984029, "learning_rate": 1.9181040190013055e-05, "loss": 0.573681116104126, "step": 1276 }, { "epoch": 0.3470108695652174, "grad_norm": 1.0721607390269907, "learning_rate": 1.9179258460548974e-05, "loss": 0.5346007943153381, "step": 1277 }, { "epoch": 0.3472826086956522, "grad_norm": 0.9427217170364577, "learning_rate": 1.917747487796082e-05, "loss": 0.4235379695892334, "step": 1278 }, { "epoch": 0.34755434782608696, "grad_norm": 1.027170599171524, "learning_rate": 1.9175689442608667e-05, "loss": 0.5386368632316589, "step": 1279 }, { "epoch": 0.34782608695652173, "grad_norm": 1.0692834393557782, "learning_rate": 1.9173902154852966e-05, "loss": 0.4353603720664978, "step": 1280 }, { "epoch": 0.3480978260869565, "grad_norm": 1.1148135529946193, "learning_rate": 1.917211301505453e-05, "loss": 0.5251810550689697, "step": 1281 }, { "epoch": 0.3483695652173913, "grad_norm": 0.9913118027129251, "learning_rate": 1.9170322023574562e-05, "loss": 0.4903816878795624, "step": 1282 }, { "epoch": 0.3486413043478261, "grad_norm": 0.990658716097297, "learning_rate": 1.9168529180774623e-05, "loss": 0.49942153692245483, "step": 1283 }, { "epoch": 0.34891304347826085, "grad_norm": 0.9755874586681349, "learning_rate": 1.916673448701666e-05, "loss": 0.5333443880081177, "step": 1284 }, { "epoch": 0.3491847826086957, "grad_norm": 1.1577002683230107, "learning_rate": 1.9164937942662985e-05, "loss": 0.621048092842102, "step": 1285 }, { "epoch": 0.34945652173913044, "grad_norm": 1.1044732585387889, "learning_rate": 1.9163139548076287e-05, "loss": 0.5736238956451416, "step": 1286 }, { "epoch": 0.3497282608695652, "grad_norm": 0.9711636720380283, "learning_rate": 1.9161339303619637e-05, "loss": 0.405206561088562, "step": 1287 }, { "epoch": 0.35, "grad_norm": 1.086467384338973, "learning_rate": 1.9159537209656463e-05, "loss": 0.5148593783378601, "step": 1288 }, { "epoch": 0.3502717391304348, "grad_norm": 0.8783749972354539, "learning_rate": 1.9157733266550577e-05, "loss": 0.4613136053085327, "step": 1289 }, { "epoch": 0.35054347826086957, "grad_norm": 1.0338817933594362, "learning_rate": 1.9155927474666162e-05, "loss": 0.5783101320266724, "step": 1290 }, { "epoch": 0.35081521739130433, "grad_norm": 1.030948667116423, "learning_rate": 1.9154119834367774e-05, "loss": 0.4715287983417511, "step": 1291 }, { "epoch": 0.35108695652173916, "grad_norm": 1.1402446286798587, "learning_rate": 1.9152310346020345e-05, "loss": 0.5771941542625427, "step": 1292 }, { "epoch": 0.3513586956521739, "grad_norm": 1.0718049506914087, "learning_rate": 1.9150499009989174e-05, "loss": 0.5450021028518677, "step": 1293 }, { "epoch": 0.3516304347826087, "grad_norm": 1.0171095170378128, "learning_rate": 1.914868582663994e-05, "loss": 0.5242140293121338, "step": 1294 }, { "epoch": 0.35190217391304346, "grad_norm": 1.143076968681501, "learning_rate": 1.914687079633869e-05, "loss": 0.6076923608779907, "step": 1295 }, { "epoch": 0.3521739130434783, "grad_norm": 0.8977664556799269, "learning_rate": 1.914505391945184e-05, "loss": 0.47057315707206726, "step": 1296 }, { "epoch": 0.35244565217391305, "grad_norm": 0.9777717964777722, "learning_rate": 1.9143235196346194e-05, "loss": 0.5054848194122314, "step": 1297 }, { "epoch": 0.3527173913043478, "grad_norm": 1.0295047380907387, "learning_rate": 1.9141414627388913e-05, "loss": 0.5366887450218201, "step": 1298 }, { "epoch": 0.3529891304347826, "grad_norm": 0.8942258869271137, "learning_rate": 1.9139592212947536e-05, "loss": 0.39782148599624634, "step": 1299 }, { "epoch": 0.3532608695652174, "grad_norm": 1.0505595769717169, "learning_rate": 1.913776795338998e-05, "loss": 0.5694347620010376, "step": 1300 }, { "epoch": 0.3535326086956522, "grad_norm": 0.9952381913388348, "learning_rate": 1.9135941849084522e-05, "loss": 0.5033687949180603, "step": 1301 }, { "epoch": 0.35380434782608694, "grad_norm": 0.9990592756605124, "learning_rate": 1.9134113900399824e-05, "loss": 0.5098665356636047, "step": 1302 }, { "epoch": 0.35407608695652176, "grad_norm": 1.1950130609751064, "learning_rate": 1.9132284107704918e-05, "loss": 0.5823764801025391, "step": 1303 }, { "epoch": 0.35434782608695653, "grad_norm": 1.1256191350932359, "learning_rate": 1.9130452471369198e-05, "loss": 0.6468843221664429, "step": 1304 }, { "epoch": 0.3546195652173913, "grad_norm": 1.0311036795567072, "learning_rate": 1.9128618991762442e-05, "loss": 0.5127092599868774, "step": 1305 }, { "epoch": 0.35489130434782606, "grad_norm": 1.0895701005402734, "learning_rate": 1.91267836692548e-05, "loss": 0.5889393091201782, "step": 1306 }, { "epoch": 0.3551630434782609, "grad_norm": 0.9764239494555302, "learning_rate": 1.9124946504216782e-05, "loss": 0.5352268218994141, "step": 1307 }, { "epoch": 0.35543478260869565, "grad_norm": 0.9840345694863014, "learning_rate": 1.9123107497019285e-05, "loss": 0.4295724034309387, "step": 1308 }, { "epoch": 0.3557065217391304, "grad_norm": 1.0534797758645338, "learning_rate": 1.9121266648033564e-05, "loss": 0.6550331115722656, "step": 1309 }, { "epoch": 0.35597826086956524, "grad_norm": 0.996246731697938, "learning_rate": 1.911942395763126e-05, "loss": 0.4489114582538605, "step": 1310 }, { "epoch": 0.35625, "grad_norm": 1.042046018430461, "learning_rate": 1.9117579426184374e-05, "loss": 0.47955331206321716, "step": 1311 }, { "epoch": 0.3565217391304348, "grad_norm": 1.0435422249806134, "learning_rate": 1.911573305406528e-05, "loss": 0.5559269189834595, "step": 1312 }, { "epoch": 0.35679347826086955, "grad_norm": 1.0753732005360286, "learning_rate": 1.9113884841646736e-05, "loss": 0.5491105318069458, "step": 1313 }, { "epoch": 0.35706521739130437, "grad_norm": 0.9701809579859638, "learning_rate": 1.9112034789301854e-05, "loss": 0.4909067749977112, "step": 1314 }, { "epoch": 0.35733695652173914, "grad_norm": 0.9639024385014869, "learning_rate": 1.911018289740413e-05, "loss": 0.5561282634735107, "step": 1315 }, { "epoch": 0.3576086956521739, "grad_norm": 1.050947893203117, "learning_rate": 1.9108329166327425e-05, "loss": 0.5498752593994141, "step": 1316 }, { "epoch": 0.35788043478260867, "grad_norm": 1.1087569410476563, "learning_rate": 1.910647359644597e-05, "loss": 0.47929978370666504, "step": 1317 }, { "epoch": 0.3581521739130435, "grad_norm": 0.964713917930974, "learning_rate": 1.910461618813438e-05, "loss": 0.4534458518028259, "step": 1318 }, { "epoch": 0.35842391304347826, "grad_norm": 0.8009036953768408, "learning_rate": 1.9102756941767625e-05, "loss": 0.35856202244758606, "step": 1319 }, { "epoch": 0.358695652173913, "grad_norm": 0.9857099513606634, "learning_rate": 1.910089585772105e-05, "loss": 0.5298299789428711, "step": 1320 }, { "epoch": 0.35896739130434785, "grad_norm": 1.0613305371976907, "learning_rate": 1.9099032936370382e-05, "loss": 0.5539020299911499, "step": 1321 }, { "epoch": 0.3592391304347826, "grad_norm": 1.1290272691834042, "learning_rate": 1.9097168178091703e-05, "loss": 0.5555785894393921, "step": 1322 }, { "epoch": 0.3595108695652174, "grad_norm": 1.0300123617673522, "learning_rate": 1.909530158326148e-05, "loss": 0.5676893591880798, "step": 1323 }, { "epoch": 0.35978260869565215, "grad_norm": 0.9990046538222168, "learning_rate": 1.909343315225654e-05, "loss": 0.6179074048995972, "step": 1324 }, { "epoch": 0.360054347826087, "grad_norm": 0.9882252971331382, "learning_rate": 1.9091562885454085e-05, "loss": 0.5272097587585449, "step": 1325 }, { "epoch": 0.36032608695652174, "grad_norm": 1.569654773941592, "learning_rate": 1.908969078323169e-05, "loss": 0.44543755054473877, "step": 1326 }, { "epoch": 0.3605978260869565, "grad_norm": 0.8366172751779444, "learning_rate": 1.9087816845967296e-05, "loss": 0.36076292395591736, "step": 1327 }, { "epoch": 0.36086956521739133, "grad_norm": 1.1529970594851195, "learning_rate": 1.9085941074039216e-05, "loss": 0.5612422227859497, "step": 1328 }, { "epoch": 0.3611413043478261, "grad_norm": 0.8996209111627432, "learning_rate": 1.9084063467826137e-05, "loss": 0.43764805793762207, "step": 1329 }, { "epoch": 0.36141304347826086, "grad_norm": 1.1134203118985952, "learning_rate": 1.908218402770711e-05, "loss": 0.6044945120811462, "step": 1330 }, { "epoch": 0.36168478260869563, "grad_norm": 0.8728285394653037, "learning_rate": 1.9080302754061564e-05, "loss": 0.4356845021247864, "step": 1331 }, { "epoch": 0.36195652173913045, "grad_norm": 0.9736219097968496, "learning_rate": 1.9078419647269292e-05, "loss": 0.5004799962043762, "step": 1332 }, { "epoch": 0.3622282608695652, "grad_norm": 1.038036103137875, "learning_rate": 1.9076534707710453e-05, "loss": 0.4905019700527191, "step": 1333 }, { "epoch": 0.3625, "grad_norm": 1.0563402692990305, "learning_rate": 1.907464793576559e-05, "loss": 0.47533178329467773, "step": 1334 }, { "epoch": 0.36277173913043476, "grad_norm": 0.9039706490034921, "learning_rate": 1.9072759331815602e-05, "loss": 0.4719272255897522, "step": 1335 }, { "epoch": 0.3630434782608696, "grad_norm": 0.921662141635178, "learning_rate": 1.9070868896241767e-05, "loss": 0.5273393392562866, "step": 1336 }, { "epoch": 0.36331521739130435, "grad_norm": 0.9158630568504256, "learning_rate": 1.9068976629425725e-05, "loss": 0.36915794014930725, "step": 1337 }, { "epoch": 0.3635869565217391, "grad_norm": 1.0446863649281644, "learning_rate": 1.9067082531749496e-05, "loss": 0.46726638078689575, "step": 1338 }, { "epoch": 0.36385869565217394, "grad_norm": 1.126791860290743, "learning_rate": 1.906518660359546e-05, "loss": 0.600965142250061, "step": 1339 }, { "epoch": 0.3641304347826087, "grad_norm": 1.1315389528051434, "learning_rate": 1.906328884534637e-05, "loss": 0.5977663993835449, "step": 1340 }, { "epoch": 0.36440217391304347, "grad_norm": 1.0466739158636504, "learning_rate": 1.9061389257385347e-05, "loss": 0.5691638588905334, "step": 1341 }, { "epoch": 0.36467391304347824, "grad_norm": 0.9466881207772946, "learning_rate": 1.905948784009589e-05, "loss": 0.49683696031570435, "step": 1342 }, { "epoch": 0.36494565217391306, "grad_norm": 0.9683974489836348, "learning_rate": 1.905758459386185e-05, "loss": 0.43387115001678467, "step": 1343 }, { "epoch": 0.3652173913043478, "grad_norm": 1.0019290376219727, "learning_rate": 1.905567951906747e-05, "loss": 0.5895763635635376, "step": 1344 }, { "epoch": 0.3654891304347826, "grad_norm": 0.9067858727151743, "learning_rate": 1.905377261609734e-05, "loss": 0.4398406744003296, "step": 1345 }, { "epoch": 0.3657608695652174, "grad_norm": 0.883758634753738, "learning_rate": 1.905186388533643e-05, "loss": 0.42849963903427124, "step": 1346 }, { "epoch": 0.3660326086956522, "grad_norm": 1.006807871353771, "learning_rate": 1.9049953327170078e-05, "loss": 0.5336288809776306, "step": 1347 }, { "epoch": 0.36630434782608695, "grad_norm": 1.065804948390523, "learning_rate": 1.9048040941983996e-05, "loss": 0.5222008228302002, "step": 1348 }, { "epoch": 0.3665760869565217, "grad_norm": 1.0357895451033698, "learning_rate": 1.9046126730164252e-05, "loss": 0.5376695394515991, "step": 1349 }, { "epoch": 0.36684782608695654, "grad_norm": 1.0523083827923303, "learning_rate": 1.90442106920973e-05, "loss": 0.5863019227981567, "step": 1350 }, { "epoch": 0.3671195652173913, "grad_norm": 0.9071994727852133, "learning_rate": 1.904229282816994e-05, "loss": 0.37624460458755493, "step": 1351 }, { "epoch": 0.3673913043478261, "grad_norm": 0.9748125555115904, "learning_rate": 1.904037313876936e-05, "loss": 0.4653872847557068, "step": 1352 }, { "epoch": 0.36766304347826084, "grad_norm": 1.0797873456322353, "learning_rate": 1.9038451624283113e-05, "loss": 0.5883658528327942, "step": 1353 }, { "epoch": 0.36793478260869567, "grad_norm": 0.7444644602364682, "learning_rate": 1.9036528285099116e-05, "loss": 0.3039018511772156, "step": 1354 }, { "epoch": 0.36820652173913043, "grad_norm": 1.021342877563796, "learning_rate": 1.9034603121605654e-05, "loss": 0.5014551877975464, "step": 1355 }, { "epoch": 0.3684782608695652, "grad_norm": 1.11621244610238, "learning_rate": 1.9032676134191384e-05, "loss": 0.6027573347091675, "step": 1356 }, { "epoch": 0.36875, "grad_norm": 1.063549242388566, "learning_rate": 1.903074732324533e-05, "loss": 0.5868324637413025, "step": 1357 }, { "epoch": 0.3690217391304348, "grad_norm": 0.9929133164695885, "learning_rate": 1.902881668915688e-05, "loss": 0.46725720167160034, "step": 1358 }, { "epoch": 0.36929347826086956, "grad_norm": 1.0461592623917093, "learning_rate": 1.9026884232315796e-05, "loss": 0.48339319229125977, "step": 1359 }, { "epoch": 0.3695652173913043, "grad_norm": 1.0423967575263926, "learning_rate": 1.9024949953112205e-05, "loss": 0.4785802364349365, "step": 1360 }, { "epoch": 0.36983695652173915, "grad_norm": 0.819396305372048, "learning_rate": 1.9023013851936603e-05, "loss": 0.3265475332736969, "step": 1361 }, { "epoch": 0.3701086956521739, "grad_norm": 1.0027333524372486, "learning_rate": 1.9021075929179854e-05, "loss": 0.4934942126274109, "step": 1362 }, { "epoch": 0.3703804347826087, "grad_norm": 1.0644652010710487, "learning_rate": 1.901913618523319e-05, "loss": 0.6192651987075806, "step": 1363 }, { "epoch": 0.3706521739130435, "grad_norm": 1.1324067438936507, "learning_rate": 1.9017194620488207e-05, "loss": 0.5553048849105835, "step": 1364 }, { "epoch": 0.37092391304347827, "grad_norm": 1.0572351509840803, "learning_rate": 1.9015251235336873e-05, "loss": 0.5232211947441101, "step": 1365 }, { "epoch": 0.37119565217391304, "grad_norm": 1.0099541048854266, "learning_rate": 1.901330603017152e-05, "loss": 0.47895073890686035, "step": 1366 }, { "epoch": 0.3714673913043478, "grad_norm": 1.0697803154952852, "learning_rate": 1.901135900538485e-05, "loss": 0.6247537136077881, "step": 1367 }, { "epoch": 0.3717391304347826, "grad_norm": 1.1361601230334408, "learning_rate": 1.9009410161369936e-05, "loss": 0.5702563524246216, "step": 1368 }, { "epoch": 0.3720108695652174, "grad_norm": 1.0910447129035168, "learning_rate": 1.9007459498520203e-05, "loss": 0.5595517158508301, "step": 1369 }, { "epoch": 0.37228260869565216, "grad_norm": 0.9859559856954873, "learning_rate": 1.900550701722947e-05, "loss": 0.47163575887680054, "step": 1370 }, { "epoch": 0.37255434782608693, "grad_norm": 0.947423578683019, "learning_rate": 1.9003552717891893e-05, "loss": 0.4756416082382202, "step": 1371 }, { "epoch": 0.37282608695652175, "grad_norm": 0.7676734933935327, "learning_rate": 1.9001596600902015e-05, "loss": 0.3735104203224182, "step": 1372 }, { "epoch": 0.3730978260869565, "grad_norm": 1.0741562251454329, "learning_rate": 1.8999638666654744e-05, "loss": 0.4919944107532501, "step": 1373 }, { "epoch": 0.3733695652173913, "grad_norm": 0.9844957891476577, "learning_rate": 1.8997678915545345e-05, "loss": 0.4532482624053955, "step": 1374 }, { "epoch": 0.3736413043478261, "grad_norm": 0.8709785072759806, "learning_rate": 1.8995717347969454e-05, "loss": 0.3883553147315979, "step": 1375 }, { "epoch": 0.3739130434782609, "grad_norm": 0.932914994960021, "learning_rate": 1.8993753964323086e-05, "loss": 0.4320828318595886, "step": 1376 }, { "epoch": 0.37418478260869564, "grad_norm": 1.0131377892227038, "learning_rate": 1.89917887650026e-05, "loss": 0.5475602149963379, "step": 1377 }, { "epoch": 0.3744565217391304, "grad_norm": 1.178594906900805, "learning_rate": 1.8989821750404743e-05, "loss": 0.6412314176559448, "step": 1378 }, { "epoch": 0.37472826086956523, "grad_norm": 1.102942117595813, "learning_rate": 1.898785292092661e-05, "loss": 0.6124590635299683, "step": 1379 }, { "epoch": 0.375, "grad_norm": 0.996507181188879, "learning_rate": 1.898588227696568e-05, "loss": 0.5264197587966919, "step": 1380 }, { "epoch": 0.37527173913043477, "grad_norm": 1.0495122626779336, "learning_rate": 1.898390981891979e-05, "loss": 0.6047711372375488, "step": 1381 }, { "epoch": 0.3755434782608696, "grad_norm": 1.195620698704646, "learning_rate": 1.8981935547187137e-05, "loss": 0.5747646689414978, "step": 1382 }, { "epoch": 0.37581521739130436, "grad_norm": 0.8904095395049859, "learning_rate": 1.8979959462166295e-05, "loss": 0.4523858428001404, "step": 1383 }, { "epoch": 0.3760869565217391, "grad_norm": 0.9408062751270408, "learning_rate": 1.8977981564256197e-05, "loss": 0.42428189516067505, "step": 1384 }, { "epoch": 0.3763586956521739, "grad_norm": 0.9853747224182283, "learning_rate": 1.8976001853856142e-05, "loss": 0.4721584916114807, "step": 1385 }, { "epoch": 0.3766304347826087, "grad_norm": 0.9578545948900112, "learning_rate": 1.8974020331365804e-05, "loss": 0.479347825050354, "step": 1386 }, { "epoch": 0.3769021739130435, "grad_norm": 1.029344288062538, "learning_rate": 1.8972036997185213e-05, "loss": 0.5749399662017822, "step": 1387 }, { "epoch": 0.37717391304347825, "grad_norm": 0.8885109356207753, "learning_rate": 1.8970051851714765e-05, "loss": 0.46959003806114197, "step": 1388 }, { "epoch": 0.37744565217391307, "grad_norm": 1.0177174473844233, "learning_rate": 1.896806489535523e-05, "loss": 0.5416974425315857, "step": 1389 }, { "epoch": 0.37771739130434784, "grad_norm": 0.9300964384518028, "learning_rate": 1.8966076128507728e-05, "loss": 0.48038530349731445, "step": 1390 }, { "epoch": 0.3779891304347826, "grad_norm": 0.9331963845681468, "learning_rate": 1.8964085551573767e-05, "loss": 0.4631877541542053, "step": 1391 }, { "epoch": 0.3782608695652174, "grad_norm": 0.8692798682597149, "learning_rate": 1.89620931649552e-05, "loss": 0.4260241687297821, "step": 1392 }, { "epoch": 0.3785326086956522, "grad_norm": 1.0283987869216837, "learning_rate": 1.8960098969054253e-05, "loss": 0.5083195567131042, "step": 1393 }, { "epoch": 0.37880434782608696, "grad_norm": 0.9093313347256304, "learning_rate": 1.8958102964273525e-05, "loss": 0.42282676696777344, "step": 1394 }, { "epoch": 0.37907608695652173, "grad_norm": 1.0595993187760895, "learning_rate": 1.8956105151015966e-05, "loss": 0.49968165159225464, "step": 1395 }, { "epoch": 0.3793478260869565, "grad_norm": 1.0318881572183936, "learning_rate": 1.8954105529684904e-05, "loss": 0.5087170004844666, "step": 1396 }, { "epoch": 0.3796195652173913, "grad_norm": 0.9972782736654779, "learning_rate": 1.8952104100684017e-05, "loss": 0.45870453119277954, "step": 1397 }, { "epoch": 0.3798913043478261, "grad_norm": 1.0340959960062124, "learning_rate": 1.8950100864417363e-05, "loss": 0.4970160722732544, "step": 1398 }, { "epoch": 0.38016304347826085, "grad_norm": 0.9401928828897385, "learning_rate": 1.8948095821289358e-05, "loss": 0.49058568477630615, "step": 1399 }, { "epoch": 0.3804347826086957, "grad_norm": 1.103919721796928, "learning_rate": 1.8946088971704784e-05, "loss": 0.5590236186981201, "step": 1400 }, { "epoch": 0.38070652173913044, "grad_norm": 0.9667892418656164, "learning_rate": 1.8944080316068785e-05, "loss": 0.45060789585113525, "step": 1401 }, { "epoch": 0.3809782608695652, "grad_norm": 1.0967548856857632, "learning_rate": 1.894206985478687e-05, "loss": 0.5592513084411621, "step": 1402 }, { "epoch": 0.38125, "grad_norm": 1.0804707524562616, "learning_rate": 1.8940057588264918e-05, "loss": 0.5532534122467041, "step": 1403 }, { "epoch": 0.3815217391304348, "grad_norm": 1.020657653596906, "learning_rate": 1.8938043516909173e-05, "loss": 0.4496728777885437, "step": 1404 }, { "epoch": 0.38179347826086957, "grad_norm": 1.0141479490640941, "learning_rate": 1.8936027641126228e-05, "loss": 0.5559653043746948, "step": 1405 }, { "epoch": 0.38206521739130433, "grad_norm": 1.1272518508278666, "learning_rate": 1.893400996132306e-05, "loss": 0.6220996379852295, "step": 1406 }, { "epoch": 0.38233695652173916, "grad_norm": 0.876998128100255, "learning_rate": 1.8931990477906997e-05, "loss": 0.4667511284351349, "step": 1407 }, { "epoch": 0.3826086956521739, "grad_norm": 0.8455922756355162, "learning_rate": 1.892996919128574e-05, "loss": 0.4081432819366455, "step": 1408 }, { "epoch": 0.3828804347826087, "grad_norm": 1.1076003371774714, "learning_rate": 1.8927946101867348e-05, "loss": 0.5554268956184387, "step": 1409 }, { "epoch": 0.38315217391304346, "grad_norm": 0.7675337344413009, "learning_rate": 1.892592121006024e-05, "loss": 0.3357534110546112, "step": 1410 }, { "epoch": 0.3834239130434783, "grad_norm": 1.1994698513563922, "learning_rate": 1.8923894516273216e-05, "loss": 0.6162464022636414, "step": 1411 }, { "epoch": 0.38369565217391305, "grad_norm": 0.8634299300522541, "learning_rate": 1.8921866020915418e-05, "loss": 0.45067957043647766, "step": 1412 }, { "epoch": 0.3839673913043478, "grad_norm": 0.9241418830291032, "learning_rate": 1.8919835724396363e-05, "loss": 0.4929599463939667, "step": 1413 }, { "epoch": 0.3842391304347826, "grad_norm": 1.1511887037628703, "learning_rate": 1.891780362712594e-05, "loss": 0.54661625623703, "step": 1414 }, { "epoch": 0.3845108695652174, "grad_norm": 1.0113341749438851, "learning_rate": 1.8915769729514384e-05, "loss": 0.493436723947525, "step": 1415 }, { "epoch": 0.3847826086956522, "grad_norm": 1.1215546828042786, "learning_rate": 1.8913734031972303e-05, "loss": 0.5519384145736694, "step": 1416 }, { "epoch": 0.38505434782608694, "grad_norm": 0.9902091165047551, "learning_rate": 1.8911696534910664e-05, "loss": 0.4353044033050537, "step": 1417 }, { "epoch": 0.38532608695652176, "grad_norm": 1.074319707919455, "learning_rate": 1.890965723874081e-05, "loss": 0.5987840294837952, "step": 1418 }, { "epoch": 0.38559782608695653, "grad_norm": 1.2233852836670316, "learning_rate": 1.8907616143874427e-05, "loss": 0.6335042715072632, "step": 1419 }, { "epoch": 0.3858695652173913, "grad_norm": 1.161927805547334, "learning_rate": 1.890557325072358e-05, "loss": 0.5027320384979248, "step": 1420 }, { "epoch": 0.38614130434782606, "grad_norm": 0.9902226458988067, "learning_rate": 1.890352855970069e-05, "loss": 0.4954683184623718, "step": 1421 }, { "epoch": 0.3864130434782609, "grad_norm": 1.001051308823507, "learning_rate": 1.8901482071218543e-05, "loss": 0.5488248467445374, "step": 1422 }, { "epoch": 0.38668478260869565, "grad_norm": 1.0815743681107934, "learning_rate": 1.889943378569029e-05, "loss": 0.5772058367729187, "step": 1423 }, { "epoch": 0.3869565217391304, "grad_norm": 0.9442878020847829, "learning_rate": 1.8897383703529435e-05, "loss": 0.45282596349716187, "step": 1424 }, { "epoch": 0.38722826086956524, "grad_norm": 1.0064001902514859, "learning_rate": 1.889533182514986e-05, "loss": 0.5564418435096741, "step": 1425 }, { "epoch": 0.3875, "grad_norm": 1.0896574586004888, "learning_rate": 1.8893278150965802e-05, "loss": 0.5928847789764404, "step": 1426 }, { "epoch": 0.3877717391304348, "grad_norm": 1.0381955179817037, "learning_rate": 1.8891222681391853e-05, "loss": 0.5086722373962402, "step": 1427 }, { "epoch": 0.38804347826086955, "grad_norm": 1.025210037775697, "learning_rate": 1.8889165416842982e-05, "loss": 0.5241433382034302, "step": 1428 }, { "epoch": 0.38831521739130437, "grad_norm": 0.8281537720493105, "learning_rate": 1.8887106357734506e-05, "loss": 0.4392186999320984, "step": 1429 }, { "epoch": 0.38858695652173914, "grad_norm": 1.0598199301424651, "learning_rate": 1.8885045504482116e-05, "loss": 0.568131685256958, "step": 1430 }, { "epoch": 0.3888586956521739, "grad_norm": 0.9919742898199518, "learning_rate": 1.888298285750186e-05, "loss": 0.4148816168308258, "step": 1431 }, { "epoch": 0.38913043478260867, "grad_norm": 1.0160702959341628, "learning_rate": 1.888091841721015e-05, "loss": 0.5675382614135742, "step": 1432 }, { "epoch": 0.3894021739130435, "grad_norm": 1.1210360230780905, "learning_rate": 1.8878852184023754e-05, "loss": 0.4712430238723755, "step": 1433 }, { "epoch": 0.38967391304347826, "grad_norm": 1.0111729549176296, "learning_rate": 1.887678415835981e-05, "loss": 0.4777193069458008, "step": 1434 }, { "epoch": 0.389945652173913, "grad_norm": 1.0706741211361201, "learning_rate": 1.8874714340635813e-05, "loss": 0.5624985694885254, "step": 1435 }, { "epoch": 0.39021739130434785, "grad_norm": 1.120260227608244, "learning_rate": 1.8872642731269623e-05, "loss": 0.592126727104187, "step": 1436 }, { "epoch": 0.3904891304347826, "grad_norm": 0.9685986346072196, "learning_rate": 1.887056933067946e-05, "loss": 0.5321649312973022, "step": 1437 }, { "epoch": 0.3907608695652174, "grad_norm": 0.9006725627760342, "learning_rate": 1.8868494139283903e-05, "loss": 0.42497074604034424, "step": 1438 }, { "epoch": 0.39103260869565215, "grad_norm": 1.0072100772415673, "learning_rate": 1.88664171575019e-05, "loss": 0.5971910953521729, "step": 1439 }, { "epoch": 0.391304347826087, "grad_norm": 0.9647339776037956, "learning_rate": 1.886433838575275e-05, "loss": 0.5377825498580933, "step": 1440 }, { "epoch": 0.39157608695652174, "grad_norm": 1.0853721720800058, "learning_rate": 1.886225782445612e-05, "loss": 0.5341800451278687, "step": 1441 }, { "epoch": 0.3918478260869565, "grad_norm": 1.1318080200277447, "learning_rate": 1.886017547403204e-05, "loss": 0.6394361257553101, "step": 1442 }, { "epoch": 0.39211956521739133, "grad_norm": 0.9943259378176287, "learning_rate": 1.8858091334900893e-05, "loss": 0.5188021659851074, "step": 1443 }, { "epoch": 0.3923913043478261, "grad_norm": 1.084309960912706, "learning_rate": 1.885600540748344e-05, "loss": 0.6031716465950012, "step": 1444 }, { "epoch": 0.39266304347826086, "grad_norm": 1.033503348433792, "learning_rate": 1.885391769220078e-05, "loss": 0.5310465097427368, "step": 1445 }, { "epoch": 0.39293478260869563, "grad_norm": 0.9696898651578028, "learning_rate": 1.8851828189474384e-05, "loss": 0.5266808271408081, "step": 1446 }, { "epoch": 0.39320652173913045, "grad_norm": 0.9207180954309087, "learning_rate": 1.8849736899726093e-05, "loss": 0.5025877356529236, "step": 1447 }, { "epoch": 0.3934782608695652, "grad_norm": 1.1996364147307317, "learning_rate": 1.8847643823378095e-05, "loss": 0.6195411682128906, "step": 1448 }, { "epoch": 0.39375, "grad_norm": 1.0339313408089588, "learning_rate": 1.8845548960852948e-05, "loss": 0.5905061960220337, "step": 1449 }, { "epoch": 0.39402173913043476, "grad_norm": 0.995468278271891, "learning_rate": 1.8843452312573557e-05, "loss": 0.502526044845581, "step": 1450 }, { "epoch": 0.3942934782608696, "grad_norm": 0.871261078406854, "learning_rate": 1.8841353878963203e-05, "loss": 0.3981224596500397, "step": 1451 }, { "epoch": 0.39456521739130435, "grad_norm": 0.8775911198363117, "learning_rate": 1.8839253660445523e-05, "loss": 0.42260584235191345, "step": 1452 }, { "epoch": 0.3948369565217391, "grad_norm": 1.0193407106270442, "learning_rate": 1.8837151657444513e-05, "loss": 0.49856311082839966, "step": 1453 }, { "epoch": 0.39510869565217394, "grad_norm": 1.0738123539795488, "learning_rate": 1.883504787038452e-05, "loss": 0.5257561206817627, "step": 1454 }, { "epoch": 0.3953804347826087, "grad_norm": 1.0284884193064414, "learning_rate": 1.8832942299690274e-05, "loss": 0.5851259231567383, "step": 1455 }, { "epoch": 0.39565217391304347, "grad_norm": 1.0658412082850655, "learning_rate": 1.883083494578684e-05, "loss": 0.5112056136131287, "step": 1456 }, { "epoch": 0.39592391304347824, "grad_norm": 1.01493799819051, "learning_rate": 1.8828725809099657e-05, "loss": 0.5049760937690735, "step": 1457 }, { "epoch": 0.39619565217391306, "grad_norm": 0.8185425086164236, "learning_rate": 1.8826614890054525e-05, "loss": 0.36750805377960205, "step": 1458 }, { "epoch": 0.3964673913043478, "grad_norm": 0.945120809227894, "learning_rate": 1.8824502189077593e-05, "loss": 0.49213606119155884, "step": 1459 }, { "epoch": 0.3967391304347826, "grad_norm": 0.9561133683201234, "learning_rate": 1.882238770659538e-05, "loss": 0.5049229860305786, "step": 1460 }, { "epoch": 0.3970108695652174, "grad_norm": 1.1941010969600696, "learning_rate": 1.8820271443034766e-05, "loss": 0.5586671829223633, "step": 1461 }, { "epoch": 0.3972826086956522, "grad_norm": 1.0218620911877185, "learning_rate": 1.8818153398822977e-05, "loss": 0.5276080965995789, "step": 1462 }, { "epoch": 0.39755434782608695, "grad_norm": 0.9866142936633144, "learning_rate": 1.8816033574387615e-05, "loss": 0.4585643410682678, "step": 1463 }, { "epoch": 0.3978260869565217, "grad_norm": 1.0744516343392594, "learning_rate": 1.881391197015663e-05, "loss": 0.5249114036560059, "step": 1464 }, { "epoch": 0.39809782608695654, "grad_norm": 1.1718891825938094, "learning_rate": 1.8811788586558334e-05, "loss": 0.6804819107055664, "step": 1465 }, { "epoch": 0.3983695652173913, "grad_norm": 1.023208811657123, "learning_rate": 1.88096634240214e-05, "loss": 0.540381669998169, "step": 1466 }, { "epoch": 0.3986413043478261, "grad_norm": 0.8224343649711794, "learning_rate": 1.880753648297486e-05, "loss": 0.382228285074234, "step": 1467 }, { "epoch": 0.39891304347826084, "grad_norm": 0.9662397592723672, "learning_rate": 1.880540776384811e-05, "loss": 0.5364258289337158, "step": 1468 }, { "epoch": 0.39918478260869567, "grad_norm": 1.025257225737901, "learning_rate": 1.8803277267070888e-05, "loss": 0.47076892852783203, "step": 1469 }, { "epoch": 0.39945652173913043, "grad_norm": 0.8409005337678286, "learning_rate": 1.8801144993073312e-05, "loss": 0.3433151841163635, "step": 1470 }, { "epoch": 0.3997282608695652, "grad_norm": 0.9769644190553174, "learning_rate": 1.879901094228584e-05, "loss": 0.546448826789856, "step": 1471 }, { "epoch": 0.4, "grad_norm": 0.9006816383513867, "learning_rate": 1.8796875115139307e-05, "loss": 0.41849908232688904, "step": 1472 }, { "epoch": 0.4002717391304348, "grad_norm": 0.9675747702054251, "learning_rate": 1.879473751206489e-05, "loss": 0.5047650337219238, "step": 1473 }, { "epoch": 0.40054347826086956, "grad_norm": 0.9124788025093282, "learning_rate": 1.879259813349414e-05, "loss": 0.410831481218338, "step": 1474 }, { "epoch": 0.4008152173913043, "grad_norm": 1.0033014718436153, "learning_rate": 1.879045697985895e-05, "loss": 0.5254583358764648, "step": 1475 }, { "epoch": 0.40108695652173915, "grad_norm": 1.0439552317573897, "learning_rate": 1.8788314051591583e-05, "loss": 0.5652742385864258, "step": 1476 }, { "epoch": 0.4013586956521739, "grad_norm": 0.9838604629485769, "learning_rate": 1.8786169349124658e-05, "loss": 0.4808654189109802, "step": 1477 }, { "epoch": 0.4016304347826087, "grad_norm": 1.0830226031099803, "learning_rate": 1.878402287289115e-05, "loss": 0.5104103088378906, "step": 1478 }, { "epoch": 0.4019021739130435, "grad_norm": 1.198190272255873, "learning_rate": 1.8781874623324396e-05, "loss": 0.5879234075546265, "step": 1479 }, { "epoch": 0.40217391304347827, "grad_norm": 1.138384733978382, "learning_rate": 1.8779724600858086e-05, "loss": 0.5483042001724243, "step": 1480 }, { "epoch": 0.40244565217391304, "grad_norm": 1.021040375049864, "learning_rate": 1.8777572805926267e-05, "loss": 0.4975246787071228, "step": 1481 }, { "epoch": 0.4027173913043478, "grad_norm": 0.9695787785766651, "learning_rate": 1.877541923896335e-05, "loss": 0.5263131260871887, "step": 1482 }, { "epoch": 0.4029891304347826, "grad_norm": 1.0630262543275486, "learning_rate": 1.87732639004041e-05, "loss": 0.5458682775497437, "step": 1483 }, { "epoch": 0.4032608695652174, "grad_norm": 0.831582780882438, "learning_rate": 1.877110679068364e-05, "loss": 0.3948633670806885, "step": 1484 }, { "epoch": 0.40353260869565216, "grad_norm": 0.9380416125229353, "learning_rate": 1.8768947910237452e-05, "loss": 0.41242510080337524, "step": 1485 }, { "epoch": 0.40380434782608693, "grad_norm": 1.2658025346813717, "learning_rate": 1.876678725950138e-05, "loss": 0.5980836153030396, "step": 1486 }, { "epoch": 0.40407608695652175, "grad_norm": 1.097448121810364, "learning_rate": 1.8764624838911608e-05, "loss": 0.5107351541519165, "step": 1487 }, { "epoch": 0.4043478260869565, "grad_norm": 0.9424282711913872, "learning_rate": 1.8762460648904696e-05, "loss": 0.47524696588516235, "step": 1488 }, { "epoch": 0.4046195652173913, "grad_norm": 1.0238390894401523, "learning_rate": 1.8760294689917556e-05, "loss": 0.5016785264015198, "step": 1489 }, { "epoch": 0.4048913043478261, "grad_norm": 1.0934735603535186, "learning_rate": 1.875812696238745e-05, "loss": 0.6490539312362671, "step": 1490 }, { "epoch": 0.4051630434782609, "grad_norm": 0.9569837367867202, "learning_rate": 1.8755957466752007e-05, "loss": 0.45956605672836304, "step": 1491 }, { "epoch": 0.40543478260869564, "grad_norm": 0.9975978348409077, "learning_rate": 1.8753786203449204e-05, "loss": 0.49448442459106445, "step": 1492 }, { "epoch": 0.4057065217391304, "grad_norm": 1.05070074864471, "learning_rate": 1.8751613172917386e-05, "loss": 0.5478475093841553, "step": 1493 }, { "epoch": 0.40597826086956523, "grad_norm": 0.9758856426861937, "learning_rate": 1.874943837559524e-05, "loss": 0.46835148334503174, "step": 1494 }, { "epoch": 0.40625, "grad_norm": 1.065564151199308, "learning_rate": 1.8747261811921825e-05, "loss": 0.5477917194366455, "step": 1495 }, { "epoch": 0.40652173913043477, "grad_norm": 1.1197557245651937, "learning_rate": 1.8745083482336547e-05, "loss": 0.5486472845077515, "step": 1496 }, { "epoch": 0.4067934782608696, "grad_norm": 1.130288302240526, "learning_rate": 1.8742903387279165e-05, "loss": 0.5495481491088867, "step": 1497 }, { "epoch": 0.40706521739130436, "grad_norm": 1.1753213198285344, "learning_rate": 1.8740721527189806e-05, "loss": 0.5587554574012756, "step": 1498 }, { "epoch": 0.4073369565217391, "grad_norm": 1.0750152858830075, "learning_rate": 1.873853790250895e-05, "loss": 0.49976223707199097, "step": 1499 }, { "epoch": 0.4076086956521739, "grad_norm": 0.890840337615306, "learning_rate": 1.8736352513677426e-05, "loss": 0.41266655921936035, "step": 1500 }, { "epoch": 0.4078804347826087, "grad_norm": 1.0547722798141623, "learning_rate": 1.8734165361136422e-05, "loss": 0.4685867130756378, "step": 1501 }, { "epoch": 0.4081521739130435, "grad_norm": 1.1524881521311525, "learning_rate": 1.8731976445327495e-05, "loss": 0.5274379849433899, "step": 1502 }, { "epoch": 0.40842391304347825, "grad_norm": 0.9693000134255899, "learning_rate": 1.8729785766692533e-05, "loss": 0.46485668420791626, "step": 1503 }, { "epoch": 0.40869565217391307, "grad_norm": 1.0808607242487465, "learning_rate": 1.87275933256738e-05, "loss": 0.5095617771148682, "step": 1504 }, { "epoch": 0.40896739130434784, "grad_norm": 0.8885082593492608, "learning_rate": 1.8725399122713914e-05, "loss": 0.43359407782554626, "step": 1505 }, { "epoch": 0.4092391304347826, "grad_norm": 1.0273192130852342, "learning_rate": 1.872320315825584e-05, "loss": 0.4215564429759979, "step": 1506 }, { "epoch": 0.4095108695652174, "grad_norm": 1.0487898660007213, "learning_rate": 1.87210054327429e-05, "loss": 0.5175881385803223, "step": 1507 }, { "epoch": 0.4097826086956522, "grad_norm": 1.1050043428439738, "learning_rate": 1.8718805946618782e-05, "loss": 0.5206078290939331, "step": 1508 }, { "epoch": 0.41005434782608696, "grad_norm": 1.11111706477448, "learning_rate": 1.8716604700327516e-05, "loss": 0.5558974742889404, "step": 1509 }, { "epoch": 0.41032608695652173, "grad_norm": 1.0266767122322364, "learning_rate": 1.8714401694313494e-05, "loss": 0.5514625906944275, "step": 1510 }, { "epoch": 0.4105978260869565, "grad_norm": 0.9049060293146444, "learning_rate": 1.8712196929021463e-05, "loss": 0.4017241895198822, "step": 1511 }, { "epoch": 0.4108695652173913, "grad_norm": 1.1555899166990287, "learning_rate": 1.8709990404896526e-05, "loss": 0.5972298383712769, "step": 1512 }, { "epoch": 0.4111413043478261, "grad_norm": 1.0646464535333, "learning_rate": 1.870778212238414e-05, "loss": 0.5932420492172241, "step": 1513 }, { "epoch": 0.41141304347826085, "grad_norm": 0.979101014400615, "learning_rate": 1.870557208193012e-05, "loss": 0.46087583899497986, "step": 1514 }, { "epoch": 0.4116847826086957, "grad_norm": 1.0625968310442695, "learning_rate": 1.8703360283980618e-05, "loss": 0.4135586619377136, "step": 1515 }, { "epoch": 0.41195652173913044, "grad_norm": 0.9476345900423854, "learning_rate": 1.8701146728982174e-05, "loss": 0.46156156063079834, "step": 1516 }, { "epoch": 0.4122282608695652, "grad_norm": 1.1985269899529334, "learning_rate": 1.8698931417381654e-05, "loss": 0.5835812091827393, "step": 1517 }, { "epoch": 0.4125, "grad_norm": 0.9465338896155339, "learning_rate": 1.869671434962629e-05, "loss": 0.5636988878250122, "step": 1518 }, { "epoch": 0.4127717391304348, "grad_norm": 1.0678541221259348, "learning_rate": 1.869449552616367e-05, "loss": 0.49080950021743774, "step": 1519 }, { "epoch": 0.41304347826086957, "grad_norm": 1.0350898556244095, "learning_rate": 1.8692274947441733e-05, "loss": 0.5815711617469788, "step": 1520 }, { "epoch": 0.41331521739130433, "grad_norm": 0.8964779836211396, "learning_rate": 1.869005261390877e-05, "loss": 0.464006245136261, "step": 1521 }, { "epoch": 0.41358695652173916, "grad_norm": 0.8624148207646644, "learning_rate": 1.8687828526013434e-05, "loss": 0.45454180240631104, "step": 1522 }, { "epoch": 0.4138586956521739, "grad_norm": 1.025735166171841, "learning_rate": 1.8685602684204722e-05, "loss": 0.4679485559463501, "step": 1523 }, { "epoch": 0.4141304347826087, "grad_norm": 1.0207944416052648, "learning_rate": 1.8683375088931998e-05, "loss": 0.4990267753601074, "step": 1524 }, { "epoch": 0.41440217391304346, "grad_norm": 1.0741280557026447, "learning_rate": 1.8681145740644965e-05, "loss": 0.5426681637763977, "step": 1525 }, { "epoch": 0.4146739130434783, "grad_norm": 1.0501030229029262, "learning_rate": 1.8678914639793696e-05, "loss": 0.5093374848365784, "step": 1526 }, { "epoch": 0.41494565217391305, "grad_norm": 0.9880970435326694, "learning_rate": 1.8676681786828598e-05, "loss": 0.3972374200820923, "step": 1527 }, { "epoch": 0.4152173913043478, "grad_norm": 0.958516459053714, "learning_rate": 1.8674447182200457e-05, "loss": 0.5062984824180603, "step": 1528 }, { "epoch": 0.4154891304347826, "grad_norm": 1.0942053250843313, "learning_rate": 1.8672210826360388e-05, "loss": 0.5585982203483582, "step": 1529 }, { "epoch": 0.4157608695652174, "grad_norm": 0.9075075658397298, "learning_rate": 1.8669972719759873e-05, "loss": 0.41018474102020264, "step": 1530 }, { "epoch": 0.4160326086956522, "grad_norm": 0.8821674991993341, "learning_rate": 1.8667732862850744e-05, "loss": 0.4171491861343384, "step": 1531 }, { "epoch": 0.41630434782608694, "grad_norm": 0.9605774479501396, "learning_rate": 1.866549125608519e-05, "loss": 0.44756001234054565, "step": 1532 }, { "epoch": 0.41657608695652176, "grad_norm": 0.889320304353864, "learning_rate": 1.866324789991575e-05, "loss": 0.44164031744003296, "step": 1533 }, { "epoch": 0.41684782608695653, "grad_norm": 1.1014563364688834, "learning_rate": 1.8661002794795316e-05, "loss": 0.5186464190483093, "step": 1534 }, { "epoch": 0.4171195652173913, "grad_norm": 1.175371179175727, "learning_rate": 1.8658755941177127e-05, "loss": 0.5435436964035034, "step": 1535 }, { "epoch": 0.41739130434782606, "grad_norm": 0.9289667971163806, "learning_rate": 1.8656507339514792e-05, "loss": 0.4429347515106201, "step": 1536 }, { "epoch": 0.4176630434782609, "grad_norm": 0.9640315304379375, "learning_rate": 1.865425699026226e-05, "loss": 0.43288135528564453, "step": 1537 }, { "epoch": 0.41793478260869565, "grad_norm": 1.034914277072305, "learning_rate": 1.865200489387383e-05, "loss": 0.5157713294029236, "step": 1538 }, { "epoch": 0.4182065217391304, "grad_norm": 1.0733798984672596, "learning_rate": 1.8649751050804164e-05, "loss": 0.4596726894378662, "step": 1539 }, { "epoch": 0.41847826086956524, "grad_norm": 1.0079946603874945, "learning_rate": 1.864749546150827e-05, "loss": 0.4695574641227722, "step": 1540 }, { "epoch": 0.41875, "grad_norm": 1.0581529091370838, "learning_rate": 1.864523812644151e-05, "loss": 0.5311746001243591, "step": 1541 }, { "epoch": 0.4190217391304348, "grad_norm": 1.2017232979119763, "learning_rate": 1.8642979046059595e-05, "loss": 0.6046945452690125, "step": 1542 }, { "epoch": 0.41929347826086955, "grad_norm": 1.099632445536185, "learning_rate": 1.8640718220818598e-05, "loss": 0.553799033164978, "step": 1543 }, { "epoch": 0.41956521739130437, "grad_norm": 0.9831734278075962, "learning_rate": 1.8638455651174933e-05, "loss": 0.5062249898910522, "step": 1544 }, { "epoch": 0.41983695652173914, "grad_norm": 1.0253394908372888, "learning_rate": 1.863619133758538e-05, "loss": 0.49988919496536255, "step": 1545 }, { "epoch": 0.4201086956521739, "grad_norm": 1.0977228471502734, "learning_rate": 1.8633925280507052e-05, "loss": 0.5602518320083618, "step": 1546 }, { "epoch": 0.42038043478260867, "grad_norm": 1.0840750272370694, "learning_rate": 1.863165748039743e-05, "loss": 0.5690163373947144, "step": 1547 }, { "epoch": 0.4206521739130435, "grad_norm": 0.927730727345421, "learning_rate": 1.862938793771434e-05, "loss": 0.47349053621292114, "step": 1548 }, { "epoch": 0.42092391304347826, "grad_norm": 1.004059207814751, "learning_rate": 1.8627116652915957e-05, "loss": 0.47028154134750366, "step": 1549 }, { "epoch": 0.421195652173913, "grad_norm": 0.9902909664161258, "learning_rate": 1.8624843626460824e-05, "loss": 0.5426449775695801, "step": 1550 }, { "epoch": 0.42146739130434785, "grad_norm": 0.8863743529126055, "learning_rate": 1.862256885880781e-05, "loss": 0.36568230390548706, "step": 1551 }, { "epoch": 0.4217391304347826, "grad_norm": 1.2392895814402376, "learning_rate": 1.8620292350416155e-05, "loss": 0.47175294160842896, "step": 1552 }, { "epoch": 0.4220108695652174, "grad_norm": 0.8719775459650384, "learning_rate": 1.8618014101745444e-05, "loss": 0.42807692289352417, "step": 1553 }, { "epoch": 0.42228260869565215, "grad_norm": 1.116469995902024, "learning_rate": 1.861573411325561e-05, "loss": 0.5868397951126099, "step": 1554 }, { "epoch": 0.422554347826087, "grad_norm": 0.8828252183537418, "learning_rate": 1.861345238540695e-05, "loss": 0.46700334548950195, "step": 1555 }, { "epoch": 0.42282608695652174, "grad_norm": 0.9959312918655948, "learning_rate": 1.8611168918660094e-05, "loss": 0.5410696268081665, "step": 1556 }, { "epoch": 0.4230978260869565, "grad_norm": 1.05605080111425, "learning_rate": 1.8608883713476035e-05, "loss": 0.5611105561256409, "step": 1557 }, { "epoch": 0.42336956521739133, "grad_norm": 1.0394699727515553, "learning_rate": 1.8606596770316114e-05, "loss": 0.5820943713188171, "step": 1558 }, { "epoch": 0.4236413043478261, "grad_norm": 1.1048494550356769, "learning_rate": 1.8604308089642025e-05, "loss": 0.5573689937591553, "step": 1559 }, { "epoch": 0.42391304347826086, "grad_norm": 0.9880856096426466, "learning_rate": 1.860201767191581e-05, "loss": 0.5096225738525391, "step": 1560 }, { "epoch": 0.42418478260869563, "grad_norm": 0.9622116321282896, "learning_rate": 1.8599725517599858e-05, "loss": 0.5277221202850342, "step": 1561 }, { "epoch": 0.42445652173913045, "grad_norm": 0.9756163109896511, "learning_rate": 1.8597431627156917e-05, "loss": 0.5693327784538269, "step": 1562 }, { "epoch": 0.4247282608695652, "grad_norm": 1.0028147373169716, "learning_rate": 1.8595136001050083e-05, "loss": 0.53178870677948, "step": 1563 }, { "epoch": 0.425, "grad_norm": 1.2265396996362872, "learning_rate": 1.8592838639742796e-05, "loss": 0.5772587060928345, "step": 1564 }, { "epoch": 0.42527173913043476, "grad_norm": 0.9915613508080828, "learning_rate": 1.8590539543698852e-05, "loss": 0.5028002262115479, "step": 1565 }, { "epoch": 0.4255434782608696, "grad_norm": 1.0178014604150456, "learning_rate": 1.85882387133824e-05, "loss": 0.5066350698471069, "step": 1566 }, { "epoch": 0.42581521739130435, "grad_norm": 0.9839780227655189, "learning_rate": 1.8585936149257935e-05, "loss": 0.5246078372001648, "step": 1567 }, { "epoch": 0.4260869565217391, "grad_norm": 1.100133785030126, "learning_rate": 1.8583631851790302e-05, "loss": 0.4994097948074341, "step": 1568 }, { "epoch": 0.42635869565217394, "grad_norm": 0.9763717374896462, "learning_rate": 1.858132582144469e-05, "loss": 0.5190144777297974, "step": 1569 }, { "epoch": 0.4266304347826087, "grad_norm": 1.0655674548554506, "learning_rate": 1.8579018058686653e-05, "loss": 0.5422321557998657, "step": 1570 }, { "epoch": 0.42690217391304347, "grad_norm": 1.0028117158027963, "learning_rate": 1.8576708563982085e-05, "loss": 0.4328332543373108, "step": 1571 }, { "epoch": 0.42717391304347824, "grad_norm": 1.0824276583434451, "learning_rate": 1.8574397337797225e-05, "loss": 0.45946112275123596, "step": 1572 }, { "epoch": 0.42744565217391306, "grad_norm": 1.155080511504908, "learning_rate": 1.857208438059867e-05, "loss": 0.5688107013702393, "step": 1573 }, { "epoch": 0.4277173913043478, "grad_norm": 1.04003890950066, "learning_rate": 1.856976969285337e-05, "loss": 0.5369885563850403, "step": 1574 }, { "epoch": 0.4279891304347826, "grad_norm": 1.1490963794412767, "learning_rate": 1.8567453275028606e-05, "loss": 0.5702865123748779, "step": 1575 }, { "epoch": 0.4282608695652174, "grad_norm": 1.0111379275590249, "learning_rate": 1.8565135127592027e-05, "loss": 0.472512423992157, "step": 1576 }, { "epoch": 0.4285326086956522, "grad_norm": 0.8816378872891317, "learning_rate": 1.8562815251011626e-05, "loss": 0.45006296038627625, "step": 1577 }, { "epoch": 0.42880434782608695, "grad_norm": 1.022762572484592, "learning_rate": 1.856049364575574e-05, "loss": 0.5287460088729858, "step": 1578 }, { "epoch": 0.4290760869565217, "grad_norm": 0.9113527858439935, "learning_rate": 1.8558170312293064e-05, "loss": 0.47719717025756836, "step": 1579 }, { "epoch": 0.42934782608695654, "grad_norm": 0.8182588105200453, "learning_rate": 1.855584525109263e-05, "loss": 0.39391452074050903, "step": 1580 }, { "epoch": 0.4296195652173913, "grad_norm": 1.0638381740124925, "learning_rate": 1.8553518462623826e-05, "loss": 0.5974037051200867, "step": 1581 }, { "epoch": 0.4298913043478261, "grad_norm": 1.0222547706692382, "learning_rate": 1.8551189947356393e-05, "loss": 0.4512571096420288, "step": 1582 }, { "epoch": 0.43016304347826084, "grad_norm": 1.1299418016436087, "learning_rate": 1.8548859705760414e-05, "loss": 0.5715160369873047, "step": 1583 }, { "epoch": 0.43043478260869567, "grad_norm": 1.041429254137219, "learning_rate": 1.8546527738306314e-05, "loss": 0.5838006734848022, "step": 1584 }, { "epoch": 0.43070652173913043, "grad_norm": 0.9976419184454699, "learning_rate": 1.8544194045464888e-05, "loss": 0.4724462032318115, "step": 1585 }, { "epoch": 0.4309782608695652, "grad_norm": 1.1195653741281442, "learning_rate": 1.8541858627707255e-05, "loss": 0.5974624752998352, "step": 1586 }, { "epoch": 0.43125, "grad_norm": 0.9591944075117481, "learning_rate": 1.8539521485504898e-05, "loss": 0.5353943109512329, "step": 1587 }, { "epoch": 0.4315217391304348, "grad_norm": 1.1130223625626399, "learning_rate": 1.853718261932964e-05, "loss": 0.5748199224472046, "step": 1588 }, { "epoch": 0.43179347826086956, "grad_norm": 1.0649587124164681, "learning_rate": 1.853484202965366e-05, "loss": 0.5115656852722168, "step": 1589 }, { "epoch": 0.4320652173913043, "grad_norm": 0.928875131133861, "learning_rate": 1.8532499716949476e-05, "loss": 0.4501742720603943, "step": 1590 }, { "epoch": 0.43233695652173915, "grad_norm": 0.8912095437377778, "learning_rate": 1.8530155681689962e-05, "loss": 0.43687695264816284, "step": 1591 }, { "epoch": 0.4326086956521739, "grad_norm": 0.9787983905470116, "learning_rate": 1.852780992434833e-05, "loss": 0.45941030979156494, "step": 1592 }, { "epoch": 0.4328804347826087, "grad_norm": 0.8593642849390902, "learning_rate": 1.8525462445398147e-05, "loss": 0.4277435541152954, "step": 1593 }, { "epoch": 0.4331521739130435, "grad_norm": 1.1035754479213502, "learning_rate": 1.852311324531333e-05, "loss": 0.5757485032081604, "step": 1594 }, { "epoch": 0.43342391304347827, "grad_norm": 1.0564966159364528, "learning_rate": 1.8520762324568138e-05, "loss": 0.5246074795722961, "step": 1595 }, { "epoch": 0.43369565217391304, "grad_norm": 1.0961731939868704, "learning_rate": 1.8518409683637178e-05, "loss": 0.5150027871131897, "step": 1596 }, { "epoch": 0.4339673913043478, "grad_norm": 1.0172684330894843, "learning_rate": 1.8516055322995402e-05, "loss": 0.5265240669250488, "step": 1597 }, { "epoch": 0.4342391304347826, "grad_norm": 1.0255291622734257, "learning_rate": 1.851369924311812e-05, "loss": 0.4880816340446472, "step": 1598 }, { "epoch": 0.4345108695652174, "grad_norm": 0.9688169679047691, "learning_rate": 1.851134144448097e-05, "loss": 0.5224365592002869, "step": 1599 }, { "epoch": 0.43478260869565216, "grad_norm": 1.0557826458626123, "learning_rate": 1.8508981927559963e-05, "loss": 0.5614888668060303, "step": 1600 }, { "epoch": 0.43505434782608693, "grad_norm": 1.8278718617554774, "learning_rate": 1.8506620692831427e-05, "loss": 0.46117711067199707, "step": 1601 }, { "epoch": 0.43532608695652175, "grad_norm": 1.1260021042513897, "learning_rate": 1.8504257740772065e-05, "loss": 0.5389969348907471, "step": 1602 }, { "epoch": 0.4355978260869565, "grad_norm": 1.0506242426810746, "learning_rate": 1.85018930718589e-05, "loss": 0.43726322054862976, "step": 1603 }, { "epoch": 0.4358695652173913, "grad_norm": 0.97797738710459, "learning_rate": 1.849952668656933e-05, "loss": 0.49980419874191284, "step": 1604 }, { "epoch": 0.4361413043478261, "grad_norm": 1.0205933396510745, "learning_rate": 1.8497158585381076e-05, "loss": 0.4788394272327423, "step": 1605 }, { "epoch": 0.4364130434782609, "grad_norm": 1.0670802994590347, "learning_rate": 1.8494788768772217e-05, "loss": 0.4853534698486328, "step": 1606 }, { "epoch": 0.43668478260869564, "grad_norm": 0.9623552095896756, "learning_rate": 1.8492417237221178e-05, "loss": 0.41382884979248047, "step": 1607 }, { "epoch": 0.4369565217391304, "grad_norm": 1.1957166937799504, "learning_rate": 1.849004399120672e-05, "loss": 0.5507556200027466, "step": 1608 }, { "epoch": 0.43722826086956523, "grad_norm": 1.0365842238031937, "learning_rate": 1.848766903120796e-05, "loss": 0.5732470750808716, "step": 1609 }, { "epoch": 0.4375, "grad_norm": 1.020975549660146, "learning_rate": 1.8485292357704367e-05, "loss": 0.5451812744140625, "step": 1610 }, { "epoch": 0.43777173913043477, "grad_norm": 1.1365384876208997, "learning_rate": 1.8482913971175737e-05, "loss": 0.6259847283363342, "step": 1611 }, { "epoch": 0.4380434782608696, "grad_norm": 1.0494354943535058, "learning_rate": 1.8480533872102233e-05, "loss": 0.48264896869659424, "step": 1612 }, { "epoch": 0.43831521739130436, "grad_norm": 0.9869528769875298, "learning_rate": 1.8478152060964343e-05, "loss": 0.5480381846427917, "step": 1613 }, { "epoch": 0.4385869565217391, "grad_norm": 0.8475605283247608, "learning_rate": 1.847576853824292e-05, "loss": 0.4144167900085449, "step": 1614 }, { "epoch": 0.4388586956521739, "grad_norm": 1.054003842163913, "learning_rate": 1.847338330441915e-05, "loss": 0.5414569973945618, "step": 1615 }, { "epoch": 0.4391304347826087, "grad_norm": 0.9958111092509433, "learning_rate": 1.8470996359974568e-05, "loss": 0.488103449344635, "step": 1616 }, { "epoch": 0.4394021739130435, "grad_norm": 1.049866970991841, "learning_rate": 1.846860770539105e-05, "loss": 0.571033239364624, "step": 1617 }, { "epoch": 0.43967391304347825, "grad_norm": 1.037517660430011, "learning_rate": 1.846621734115083e-05, "loss": 0.5403891205787659, "step": 1618 }, { "epoch": 0.43994565217391307, "grad_norm": 1.0225831949556496, "learning_rate": 1.8463825267736472e-05, "loss": 0.5402045249938965, "step": 1619 }, { "epoch": 0.44021739130434784, "grad_norm": 0.9635446885912842, "learning_rate": 1.8461431485630898e-05, "loss": 0.5150339007377625, "step": 1620 }, { "epoch": 0.4404891304347826, "grad_norm": 1.0421342526162183, "learning_rate": 1.8459035995317364e-05, "loss": 0.46588456630706787, "step": 1621 }, { "epoch": 0.4407608695652174, "grad_norm": 1.0591135411188553, "learning_rate": 1.845663879727947e-05, "loss": 0.49659258127212524, "step": 1622 }, { "epoch": 0.4410326086956522, "grad_norm": 0.7965018421927955, "learning_rate": 1.845423989200118e-05, "loss": 0.3334018886089325, "step": 1623 }, { "epoch": 0.44130434782608696, "grad_norm": 0.9170421382748167, "learning_rate": 1.845183927996678e-05, "loss": 0.42667943239212036, "step": 1624 }, { "epoch": 0.44157608695652173, "grad_norm": 1.1032622542829729, "learning_rate": 1.8449436961660908e-05, "loss": 0.4933627247810364, "step": 1625 }, { "epoch": 0.4418478260869565, "grad_norm": 1.1625558613011475, "learning_rate": 1.8447032937568557e-05, "loss": 0.5949050188064575, "step": 1626 }, { "epoch": 0.4421195652173913, "grad_norm": 1.203316462704643, "learning_rate": 1.844462720817505e-05, "loss": 0.63912034034729, "step": 1627 }, { "epoch": 0.4423913043478261, "grad_norm": 1.076205985276706, "learning_rate": 1.8442219773966055e-05, "loss": 0.5099828243255615, "step": 1628 }, { "epoch": 0.44266304347826085, "grad_norm": 0.9861465959020418, "learning_rate": 1.8439810635427596e-05, "loss": 0.49031710624694824, "step": 1629 }, { "epoch": 0.4429347826086957, "grad_norm": 1.2518764378902913, "learning_rate": 1.843739979304603e-05, "loss": 0.5847947597503662, "step": 1630 }, { "epoch": 0.44320652173913044, "grad_norm": 0.9572830258572533, "learning_rate": 1.843498724730807e-05, "loss": 0.4754626750946045, "step": 1631 }, { "epoch": 0.4434782608695652, "grad_norm": 1.0406541061336907, "learning_rate": 1.843257299870075e-05, "loss": 0.5388674736022949, "step": 1632 }, { "epoch": 0.44375, "grad_norm": 1.079165870927349, "learning_rate": 1.8430157047711473e-05, "loss": 0.5303686857223511, "step": 1633 }, { "epoch": 0.4440217391304348, "grad_norm": 0.9486967703405649, "learning_rate": 1.8427739394827976e-05, "loss": 0.5098252892494202, "step": 1634 }, { "epoch": 0.44429347826086957, "grad_norm": 1.0302491781777634, "learning_rate": 1.8425320040538332e-05, "loss": 0.5455173254013062, "step": 1635 }, { "epoch": 0.44456521739130433, "grad_norm": 1.1538292781894475, "learning_rate": 1.842289898533097e-05, "loss": 0.5258623957633972, "step": 1636 }, { "epoch": 0.44483695652173916, "grad_norm": 1.079722558400994, "learning_rate": 1.8420476229694656e-05, "loss": 0.5852627754211426, "step": 1637 }, { "epoch": 0.4451086956521739, "grad_norm": 1.0044597003105407, "learning_rate": 1.8418051774118493e-05, "loss": 0.5251306891441345, "step": 1638 }, { "epoch": 0.4453804347826087, "grad_norm": 0.8999742808500416, "learning_rate": 1.8415625619091945e-05, "loss": 0.4457217752933502, "step": 1639 }, { "epoch": 0.44565217391304346, "grad_norm": 1.160654940361542, "learning_rate": 1.84131977651048e-05, "loss": 0.5639628171920776, "step": 1640 }, { "epoch": 0.4459239130434783, "grad_norm": 0.9749029233831978, "learning_rate": 1.8410768212647204e-05, "loss": 0.4425748884677887, "step": 1641 }, { "epoch": 0.44619565217391305, "grad_norm": 1.2450111485382989, "learning_rate": 1.840833696220963e-05, "loss": 0.6348921656608582, "step": 1642 }, { "epoch": 0.4464673913043478, "grad_norm": 0.9919140029944924, "learning_rate": 1.8405904014282914e-05, "loss": 0.4559909701347351, "step": 1643 }, { "epoch": 0.4467391304347826, "grad_norm": 0.8687396943827856, "learning_rate": 1.8403469369358216e-05, "loss": 0.44343942403793335, "step": 1644 }, { "epoch": 0.4470108695652174, "grad_norm": 1.0755537422122925, "learning_rate": 1.8401033027927048e-05, "loss": 0.5519018173217773, "step": 1645 }, { "epoch": 0.4472826086956522, "grad_norm": 0.9034759757957641, "learning_rate": 1.839859499048126e-05, "loss": 0.4307515025138855, "step": 1646 }, { "epoch": 0.44755434782608694, "grad_norm": 1.0829266277386491, "learning_rate": 1.839615525751305e-05, "loss": 0.5682048797607422, "step": 1647 }, { "epoch": 0.44782608695652176, "grad_norm": 0.9072880945885159, "learning_rate": 1.839371382951496e-05, "loss": 0.3691966235637665, "step": 1648 }, { "epoch": 0.44809782608695653, "grad_norm": 1.0804580131452721, "learning_rate": 1.8391270706979864e-05, "loss": 0.5880473852157593, "step": 1649 }, { "epoch": 0.4483695652173913, "grad_norm": 1.1165442243393489, "learning_rate": 1.838882589040098e-05, "loss": 0.543582558631897, "step": 1650 }, { "epoch": 0.44864130434782606, "grad_norm": 0.9984430572651067, "learning_rate": 1.838637938027188e-05, "loss": 0.4534582495689392, "step": 1651 }, { "epoch": 0.4489130434782609, "grad_norm": 1.1804454904174297, "learning_rate": 1.8383931177086464e-05, "loss": 0.6019425392150879, "step": 1652 }, { "epoch": 0.44918478260869565, "grad_norm": 1.0911203801147793, "learning_rate": 1.8381481281338983e-05, "loss": 0.639467716217041, "step": 1653 }, { "epoch": 0.4494565217391304, "grad_norm": 1.0709952840247565, "learning_rate": 1.8379029693524022e-05, "loss": 0.4660944938659668, "step": 1654 }, { "epoch": 0.44972826086956524, "grad_norm": 1.0565525067161539, "learning_rate": 1.8376576414136513e-05, "loss": 0.5156705379486084, "step": 1655 }, { "epoch": 0.45, "grad_norm": 1.154691888719756, "learning_rate": 1.837412144367173e-05, "loss": 0.5881303548812866, "step": 1656 }, { "epoch": 0.4502717391304348, "grad_norm": 1.0768443974622286, "learning_rate": 1.8371664782625287e-05, "loss": 0.5889188051223755, "step": 1657 }, { "epoch": 0.45054347826086955, "grad_norm": 1.1144590369609302, "learning_rate": 1.8369206431493135e-05, "loss": 0.5103840827941895, "step": 1658 }, { "epoch": 0.45081521739130437, "grad_norm": 1.035703696306068, "learning_rate": 1.8366746390771576e-05, "loss": 0.4958946704864502, "step": 1659 }, { "epoch": 0.45108695652173914, "grad_norm": 1.0136084923781061, "learning_rate": 1.836428466095724e-05, "loss": 0.4366415739059448, "step": 1660 }, { "epoch": 0.4513586956521739, "grad_norm": 0.9885998173360886, "learning_rate": 1.836182124254711e-05, "loss": 0.5656514167785645, "step": 1661 }, { "epoch": 0.45163043478260867, "grad_norm": 1.0173599092694767, "learning_rate": 1.8359356136038504e-05, "loss": 0.47817981243133545, "step": 1662 }, { "epoch": 0.4519021739130435, "grad_norm": 1.0330750240576345, "learning_rate": 1.835688934192908e-05, "loss": 0.5015963912010193, "step": 1663 }, { "epoch": 0.45217391304347826, "grad_norm": 1.2263344784482928, "learning_rate": 1.8354420860716842e-05, "loss": 0.6465162038803101, "step": 1664 }, { "epoch": 0.452445652173913, "grad_norm": 1.0064453503971804, "learning_rate": 1.8351950692900127e-05, "loss": 0.5019867420196533, "step": 1665 }, { "epoch": 0.45271739130434785, "grad_norm": 1.3035909557167606, "learning_rate": 1.834947883897762e-05, "loss": 0.5664429664611816, "step": 1666 }, { "epoch": 0.4529891304347826, "grad_norm": 1.007130657027395, "learning_rate": 1.8347005299448342e-05, "loss": 0.5324559211730957, "step": 1667 }, { "epoch": 0.4532608695652174, "grad_norm": 1.0917594854576964, "learning_rate": 1.8344530074811657e-05, "loss": 0.5218695402145386, "step": 1668 }, { "epoch": 0.45353260869565215, "grad_norm": 1.0096215101640467, "learning_rate": 1.8342053165567263e-05, "loss": 0.4590188264846802, "step": 1669 }, { "epoch": 0.453804347826087, "grad_norm": 1.0341355214419168, "learning_rate": 1.8339574572215207e-05, "loss": 0.472795307636261, "step": 1670 }, { "epoch": 0.45407608695652174, "grad_norm": 1.1636294155976439, "learning_rate": 1.8337094295255866e-05, "loss": 0.6011660099029541, "step": 1671 }, { "epoch": 0.4543478260869565, "grad_norm": 1.176581480319248, "learning_rate": 1.833461233518997e-05, "loss": 0.5758886337280273, "step": 1672 }, { "epoch": 0.45461956521739133, "grad_norm": 0.861359302385034, "learning_rate": 1.8332128692518576e-05, "loss": 0.4381285309791565, "step": 1673 }, { "epoch": 0.4548913043478261, "grad_norm": 1.0927769143920636, "learning_rate": 1.8329643367743087e-05, "loss": 0.5144325494766235, "step": 1674 }, { "epoch": 0.45516304347826086, "grad_norm": 0.9480680203200501, "learning_rate": 1.8327156361365247e-05, "loss": 0.4263438284397125, "step": 1675 }, { "epoch": 0.45543478260869563, "grad_norm": 1.017921784596885, "learning_rate": 1.832466767388713e-05, "loss": 0.5553175210952759, "step": 1676 }, { "epoch": 0.45570652173913045, "grad_norm": 1.0046658736376666, "learning_rate": 1.8322177305811165e-05, "loss": 0.4002154767513275, "step": 1677 }, { "epoch": 0.4559782608695652, "grad_norm": 1.170268908629603, "learning_rate": 1.831968525764011e-05, "loss": 0.6455202102661133, "step": 1678 }, { "epoch": 0.45625, "grad_norm": 0.9845358710264025, "learning_rate": 1.831719152987706e-05, "loss": 0.4884776473045349, "step": 1679 }, { "epoch": 0.45652173913043476, "grad_norm": 1.1081332701826607, "learning_rate": 1.8314696123025456e-05, "loss": 0.5675164461135864, "step": 1680 }, { "epoch": 0.4567934782608696, "grad_norm": 0.9921098641643846, "learning_rate": 1.831219903758907e-05, "loss": 0.4745921492576599, "step": 1681 }, { "epoch": 0.45706521739130435, "grad_norm": 0.998384160167104, "learning_rate": 1.8309700274072023e-05, "loss": 0.49695920944213867, "step": 1682 }, { "epoch": 0.4573369565217391, "grad_norm": 1.0318262070197173, "learning_rate": 1.830719983297877e-05, "loss": 0.4218621253967285, "step": 1683 }, { "epoch": 0.45760869565217394, "grad_norm": 0.9599043823089902, "learning_rate": 1.8304697714814102e-05, "loss": 0.47214770317077637, "step": 1684 }, { "epoch": 0.4578804347826087, "grad_norm": 0.9701475324356015, "learning_rate": 1.830219392008315e-05, "loss": 0.4676242768764496, "step": 1685 }, { "epoch": 0.45815217391304347, "grad_norm": 1.1471318061726519, "learning_rate": 1.8299688449291385e-05, "loss": 0.6692973375320435, "step": 1686 }, { "epoch": 0.45842391304347824, "grad_norm": 0.9351491233150734, "learning_rate": 1.8297181302944616e-05, "loss": 0.47992825508117676, "step": 1687 }, { "epoch": 0.45869565217391306, "grad_norm": 0.8907935547827105, "learning_rate": 1.829467248154899e-05, "loss": 0.40894919633865356, "step": 1688 }, { "epoch": 0.4589673913043478, "grad_norm": 1.0064284877610488, "learning_rate": 1.8292161985610994e-05, "loss": 0.4235353171825409, "step": 1689 }, { "epoch": 0.4592391304347826, "grad_norm": 1.1368185540327302, "learning_rate": 1.828964981563745e-05, "loss": 0.6051974892616272, "step": 1690 }, { "epoch": 0.4595108695652174, "grad_norm": 1.1238201387475362, "learning_rate": 1.8287135972135515e-05, "loss": 0.5327686071395874, "step": 1691 }, { "epoch": 0.4597826086956522, "grad_norm": 0.9149811779086782, "learning_rate": 1.8284620455612692e-05, "loss": 0.4230074882507324, "step": 1692 }, { "epoch": 0.46005434782608695, "grad_norm": 0.9617647312912214, "learning_rate": 1.8282103266576817e-05, "loss": 0.4718018174171448, "step": 1693 }, { "epoch": 0.4603260869565217, "grad_norm": 0.9884838066741273, "learning_rate": 1.8279584405536065e-05, "loss": 0.4851105213165283, "step": 1694 }, { "epoch": 0.46059782608695654, "grad_norm": 0.9893117708176276, "learning_rate": 1.8277063872998946e-05, "loss": 0.48408639430999756, "step": 1695 }, { "epoch": 0.4608695652173913, "grad_norm": 1.0337413531310433, "learning_rate": 1.827454166947431e-05, "loss": 0.45703011751174927, "step": 1696 }, { "epoch": 0.4611413043478261, "grad_norm": 1.169975990818439, "learning_rate": 1.8272017795471345e-05, "loss": 0.5359090566635132, "step": 1697 }, { "epoch": 0.46141304347826084, "grad_norm": 1.0272149390880296, "learning_rate": 1.8269492251499576e-05, "loss": 0.5604609251022339, "step": 1698 }, { "epoch": 0.46168478260869567, "grad_norm": 1.1730008080218428, "learning_rate": 1.8266965038068856e-05, "loss": 0.6248401999473572, "step": 1699 }, { "epoch": 0.46195652173913043, "grad_norm": 1.0329583083165341, "learning_rate": 1.8264436155689395e-05, "loss": 0.5219649076461792, "step": 1700 }, { "epoch": 0.4622282608695652, "grad_norm": 1.0851314420963474, "learning_rate": 1.8261905604871722e-05, "loss": 0.4991772174835205, "step": 1701 }, { "epoch": 0.4625, "grad_norm": 1.1499251375776525, "learning_rate": 1.8259373386126708e-05, "loss": 0.6173416376113892, "step": 1702 }, { "epoch": 0.4627717391304348, "grad_norm": 1.0316839321308222, "learning_rate": 1.825683949996556e-05, "loss": 0.5070214867591858, "step": 1703 }, { "epoch": 0.46304347826086956, "grad_norm": 1.1217130568203455, "learning_rate": 1.8254303946899826e-05, "loss": 0.5591518878936768, "step": 1704 }, { "epoch": 0.4633152173913043, "grad_norm": 0.9062779838049867, "learning_rate": 1.8251766727441393e-05, "loss": 0.46813109517097473, "step": 1705 }, { "epoch": 0.46358695652173915, "grad_norm": 0.992425544718443, "learning_rate": 1.8249227842102465e-05, "loss": 0.5243329405784607, "step": 1706 }, { "epoch": 0.4638586956521739, "grad_norm": 1.0482913906153892, "learning_rate": 1.824668729139561e-05, "loss": 0.5123889446258545, "step": 1707 }, { "epoch": 0.4641304347826087, "grad_norm": 0.8696076755569855, "learning_rate": 1.8244145075833715e-05, "loss": 0.45424884557724, "step": 1708 }, { "epoch": 0.4644021739130435, "grad_norm": 1.178394376700604, "learning_rate": 1.824160119593e-05, "loss": 0.526117205619812, "step": 1709 }, { "epoch": 0.46467391304347827, "grad_norm": 1.1337731533549966, "learning_rate": 1.8239055652198036e-05, "loss": 0.5824063420295715, "step": 1710 }, { "epoch": 0.46494565217391304, "grad_norm": 0.8748165067709668, "learning_rate": 1.823650844515172e-05, "loss": 0.3868548274040222, "step": 1711 }, { "epoch": 0.4652173913043478, "grad_norm": 1.0703969756525236, "learning_rate": 1.8233959575305286e-05, "loss": 0.5200576186180115, "step": 1712 }, { "epoch": 0.4654891304347826, "grad_norm": 0.9570890924512809, "learning_rate": 1.82314090431733e-05, "loss": 0.4963829219341278, "step": 1713 }, { "epoch": 0.4657608695652174, "grad_norm": 0.9824195080470846, "learning_rate": 1.822885684927067e-05, "loss": 0.44468897581100464, "step": 1714 }, { "epoch": 0.46603260869565216, "grad_norm": 0.9967790717460587, "learning_rate": 1.822630299411264e-05, "loss": 0.4982604384422302, "step": 1715 }, { "epoch": 0.46630434782608693, "grad_norm": 1.2213195703178648, "learning_rate": 1.8223747478214787e-05, "loss": 0.581066906452179, "step": 1716 }, { "epoch": 0.46657608695652175, "grad_norm": 0.9995454479093809, "learning_rate": 1.8221190302093017e-05, "loss": 0.48009783029556274, "step": 1717 }, { "epoch": 0.4668478260869565, "grad_norm": 1.0996151803965963, "learning_rate": 1.8218631466263584e-05, "loss": 0.5019259452819824, "step": 1718 }, { "epoch": 0.4671195652173913, "grad_norm": 1.1409348691790577, "learning_rate": 1.8216070971243063e-05, "loss": 0.5607378482818604, "step": 1719 }, { "epoch": 0.4673913043478261, "grad_norm": 1.0227298690869076, "learning_rate": 1.8213508817548377e-05, "loss": 0.502508282661438, "step": 1720 }, { "epoch": 0.4676630434782609, "grad_norm": 1.1263988896588883, "learning_rate": 1.8210945005696773e-05, "loss": 0.5944421887397766, "step": 1721 }, { "epoch": 0.46793478260869564, "grad_norm": 1.0332927051619023, "learning_rate": 1.820837953620584e-05, "loss": 0.5447517037391663, "step": 1722 }, { "epoch": 0.4682065217391304, "grad_norm": 1.063431810365551, "learning_rate": 1.82058124095935e-05, "loss": 0.5068091154098511, "step": 1723 }, { "epoch": 0.46847826086956523, "grad_norm": 1.201743408431433, "learning_rate": 1.8203243626378008e-05, "loss": 0.6781260967254639, "step": 1724 }, { "epoch": 0.46875, "grad_norm": 0.8477084604957561, "learning_rate": 1.8200673187077956e-05, "loss": 0.3916478753089905, "step": 1725 }, { "epoch": 0.46902173913043477, "grad_norm": 1.0827802168916947, "learning_rate": 1.819810109221227e-05, "loss": 0.5006662607192993, "step": 1726 }, { "epoch": 0.4692934782608696, "grad_norm": 0.9973335573356558, "learning_rate": 1.81955273423002e-05, "loss": 0.5207929611206055, "step": 1727 }, { "epoch": 0.46956521739130436, "grad_norm": 0.7372753468533402, "learning_rate": 1.8192951937861347e-05, "loss": 0.3393290936946869, "step": 1728 }, { "epoch": 0.4698369565217391, "grad_norm": 0.8656973130270801, "learning_rate": 1.8190374879415634e-05, "loss": 0.38939034938812256, "step": 1729 }, { "epoch": 0.4701086956521739, "grad_norm": 0.9039457531123275, "learning_rate": 1.8187796167483325e-05, "loss": 0.4156908392906189, "step": 1730 }, { "epoch": 0.4703804347826087, "grad_norm": 0.8064252215609217, "learning_rate": 1.8185215802585013e-05, "loss": 0.3712955117225647, "step": 1731 }, { "epoch": 0.4706521739130435, "grad_norm": 1.0020121658721193, "learning_rate": 1.8182633785241627e-05, "loss": 0.4969245493412018, "step": 1732 }, { "epoch": 0.47092391304347825, "grad_norm": 1.131378890749361, "learning_rate": 1.818005011597443e-05, "loss": 0.5998613834381104, "step": 1733 }, { "epoch": 0.47119565217391307, "grad_norm": 1.1026361820456492, "learning_rate": 1.8177464795305013e-05, "loss": 0.5202996730804443, "step": 1734 }, { "epoch": 0.47146739130434784, "grad_norm": 0.9127880148094094, "learning_rate": 1.817487782375531e-05, "loss": 0.4324399530887604, "step": 1735 }, { "epoch": 0.4717391304347826, "grad_norm": 1.0002309034644348, "learning_rate": 1.817228920184758e-05, "loss": 0.46099400520324707, "step": 1736 }, { "epoch": 0.4720108695652174, "grad_norm": 1.1526771683930963, "learning_rate": 1.816969893010442e-05, "loss": 0.45792412757873535, "step": 1737 }, { "epoch": 0.4722826086956522, "grad_norm": 0.9566435186698667, "learning_rate": 1.816710700904876e-05, "loss": 0.49536919593811035, "step": 1738 }, { "epoch": 0.47255434782608696, "grad_norm": 1.0096500454213877, "learning_rate": 1.816451343920386e-05, "loss": 0.5416038632392883, "step": 1739 }, { "epoch": 0.47282608695652173, "grad_norm": 0.9883833621979161, "learning_rate": 1.8161918221093308e-05, "loss": 0.48094266653060913, "step": 1740 }, { "epoch": 0.4730978260869565, "grad_norm": 0.9722337798842092, "learning_rate": 1.8159321355241042e-05, "loss": 0.5171716809272766, "step": 1741 }, { "epoch": 0.4733695652173913, "grad_norm": 1.2077644334772863, "learning_rate": 1.815672284217131e-05, "loss": 0.6334196329116821, "step": 1742 }, { "epoch": 0.4736413043478261, "grad_norm": 1.1953213890297, "learning_rate": 1.8154122682408716e-05, "loss": 0.6219698786735535, "step": 1743 }, { "epoch": 0.47391304347826085, "grad_norm": 0.9721777854878934, "learning_rate": 1.8151520876478176e-05, "loss": 0.483155220746994, "step": 1744 }, { "epoch": 0.4741847826086957, "grad_norm": 1.0265286134366418, "learning_rate": 1.8148917424904952e-05, "loss": 0.5632447004318237, "step": 1745 }, { "epoch": 0.47445652173913044, "grad_norm": 0.9982684791362478, "learning_rate": 1.8146312328214627e-05, "loss": 0.5213781595230103, "step": 1746 }, { "epoch": 0.4747282608695652, "grad_norm": 1.064985746010234, "learning_rate": 1.8143705586933128e-05, "loss": 0.49413979053497314, "step": 1747 }, { "epoch": 0.475, "grad_norm": 1.0053329150238444, "learning_rate": 1.814109720158671e-05, "loss": 0.5227094888687134, "step": 1748 }, { "epoch": 0.4752717391304348, "grad_norm": 1.0977935881989818, "learning_rate": 1.813848717270195e-05, "loss": 0.5982433557510376, "step": 1749 }, { "epoch": 0.47554347826086957, "grad_norm": 1.035436323058615, "learning_rate": 1.8135875500805772e-05, "loss": 0.5499874949455261, "step": 1750 }, { "epoch": 0.47581521739130433, "grad_norm": 0.865767239973434, "learning_rate": 1.8133262186425424e-05, "loss": 0.4283156394958496, "step": 1751 }, { "epoch": 0.47608695652173916, "grad_norm": 1.2598991672531086, "learning_rate": 1.813064723008848e-05, "loss": 0.6088259816169739, "step": 1752 }, { "epoch": 0.4763586956521739, "grad_norm": 1.030341476864215, "learning_rate": 1.812803063232286e-05, "loss": 0.5351887941360474, "step": 1753 }, { "epoch": 0.4766304347826087, "grad_norm": 0.9264888575148711, "learning_rate": 1.8125412393656804e-05, "loss": 0.4286844730377197, "step": 1754 }, { "epoch": 0.47690217391304346, "grad_norm": 1.069061180934903, "learning_rate": 1.8122792514618886e-05, "loss": 0.5915864109992981, "step": 1755 }, { "epoch": 0.4771739130434783, "grad_norm": 1.013277320277948, "learning_rate": 1.812017099573801e-05, "loss": 0.5128914713859558, "step": 1756 }, { "epoch": 0.47744565217391305, "grad_norm": 1.0042404090004682, "learning_rate": 1.8117547837543416e-05, "loss": 0.5045719146728516, "step": 1757 }, { "epoch": 0.4777173913043478, "grad_norm": 1.1092830770939106, "learning_rate": 1.8114923040564667e-05, "loss": 0.5016496181488037, "step": 1758 }, { "epoch": 0.4779891304347826, "grad_norm": 0.9869572648790024, "learning_rate": 1.8112296605331664e-05, "loss": 0.49929022789001465, "step": 1759 }, { "epoch": 0.4782608695652174, "grad_norm": 1.1390132584070407, "learning_rate": 1.810966853237464e-05, "loss": 0.6317785382270813, "step": 1760 }, { "epoch": 0.4785326086956522, "grad_norm": 1.0458844237532625, "learning_rate": 1.810703882222415e-05, "loss": 0.5798840522766113, "step": 1761 }, { "epoch": 0.47880434782608694, "grad_norm": 0.9374438408286803, "learning_rate": 1.8104407475411084e-05, "loss": 0.49390119314193726, "step": 1762 }, { "epoch": 0.47907608695652176, "grad_norm": 1.0016792271774475, "learning_rate": 1.8101774492466665e-05, "loss": 0.5699809789657593, "step": 1763 }, { "epoch": 0.47934782608695653, "grad_norm": 0.9229782492538401, "learning_rate": 1.8099139873922443e-05, "loss": 0.43779540061950684, "step": 1764 }, { "epoch": 0.4796195652173913, "grad_norm": 1.1151151617547983, "learning_rate": 1.8096503620310298e-05, "loss": 0.4986271262168884, "step": 1765 }, { "epoch": 0.47989130434782606, "grad_norm": 0.9852761168979975, "learning_rate": 1.8093865732162443e-05, "loss": 0.4887051582336426, "step": 1766 }, { "epoch": 0.4801630434782609, "grad_norm": 1.0718928481615182, "learning_rate": 1.8091226210011423e-05, "loss": 0.5432959794998169, "step": 1767 }, { "epoch": 0.48043478260869565, "grad_norm": 0.9351521026076478, "learning_rate": 1.8088585054390097e-05, "loss": 0.5001257658004761, "step": 1768 }, { "epoch": 0.4807065217391304, "grad_norm": 1.1718145536201732, "learning_rate": 1.8085942265831682e-05, "loss": 0.6070380210876465, "step": 1769 }, { "epoch": 0.48097826086956524, "grad_norm": 1.1943486861539512, "learning_rate": 1.8083297844869695e-05, "loss": 0.6220139265060425, "step": 1770 }, { "epoch": 0.48125, "grad_norm": 1.0127145008744052, "learning_rate": 1.8080651792038006e-05, "loss": 0.5216864347457886, "step": 1771 }, { "epoch": 0.4815217391304348, "grad_norm": 0.8752748792725485, "learning_rate": 1.8078004107870797e-05, "loss": 0.4109097719192505, "step": 1772 }, { "epoch": 0.48179347826086955, "grad_norm": 1.1304267237727883, "learning_rate": 1.807535479290259e-05, "loss": 0.5345677137374878, "step": 1773 }, { "epoch": 0.48206521739130437, "grad_norm": 0.954633477569467, "learning_rate": 1.8072703847668234e-05, "loss": 0.4679284691810608, "step": 1774 }, { "epoch": 0.48233695652173914, "grad_norm": 1.0554230435234484, "learning_rate": 1.8070051272702905e-05, "loss": 0.5777572393417358, "step": 1775 }, { "epoch": 0.4826086956521739, "grad_norm": 0.9917491808983001, "learning_rate": 1.806739706854211e-05, "loss": 0.4902171790599823, "step": 1776 }, { "epoch": 0.48288043478260867, "grad_norm": 0.9753982278113537, "learning_rate": 1.806474123572169e-05, "loss": 0.4592333436012268, "step": 1777 }, { "epoch": 0.4831521739130435, "grad_norm": 1.1751939869955363, "learning_rate": 1.8062083774777796e-05, "loss": 0.6740610599517822, "step": 1778 }, { "epoch": 0.48342391304347826, "grad_norm": 1.3690063833224584, "learning_rate": 1.8059424686246928e-05, "loss": 0.588062047958374, "step": 1779 }, { "epoch": 0.483695652173913, "grad_norm": 1.037514213782377, "learning_rate": 1.805676397066591e-05, "loss": 0.508621871471405, "step": 1780 }, { "epoch": 0.48396739130434785, "grad_norm": 1.0826031438859933, "learning_rate": 1.8054101628571888e-05, "loss": 0.4801630973815918, "step": 1781 }, { "epoch": 0.4842391304347826, "grad_norm": 1.1518185912210805, "learning_rate": 1.805143766050234e-05, "loss": 0.5637484192848206, "step": 1782 }, { "epoch": 0.4845108695652174, "grad_norm": 1.0630815345413096, "learning_rate": 1.8048772066995074e-05, "loss": 0.5269975066184998, "step": 1783 }, { "epoch": 0.48478260869565215, "grad_norm": 1.2840427823861835, "learning_rate": 1.804610484858822e-05, "loss": 0.6600703597068787, "step": 1784 }, { "epoch": 0.485054347826087, "grad_norm": 1.0768095996228464, "learning_rate": 1.8043436005820247e-05, "loss": 0.4823746681213379, "step": 1785 }, { "epoch": 0.48532608695652174, "grad_norm": 1.251018871185221, "learning_rate": 1.8040765539229942e-05, "loss": 0.6182702779769897, "step": 1786 }, { "epoch": 0.4855978260869565, "grad_norm": 1.1452345155379144, "learning_rate": 1.8038093449356423e-05, "loss": 0.5748276710510254, "step": 1787 }, { "epoch": 0.48586956521739133, "grad_norm": 1.0497547287892175, "learning_rate": 1.8035419736739136e-05, "loss": 0.4499424695968628, "step": 1788 }, { "epoch": 0.4861413043478261, "grad_norm": 0.9454402078984212, "learning_rate": 1.803274440191786e-05, "loss": 0.45074236392974854, "step": 1789 }, { "epoch": 0.48641304347826086, "grad_norm": 1.056222232490243, "learning_rate": 1.8030067445432687e-05, "loss": 0.5012797117233276, "step": 1790 }, { "epoch": 0.48668478260869563, "grad_norm": 1.0276427290422552, "learning_rate": 1.8027388867824052e-05, "loss": 0.48786288499832153, "step": 1791 }, { "epoch": 0.48695652173913045, "grad_norm": 1.1668159996686251, "learning_rate": 1.8024708669632707e-05, "loss": 0.5324670076370239, "step": 1792 }, { "epoch": 0.4872282608695652, "grad_norm": 0.888228741024276, "learning_rate": 1.8022026851399737e-05, "loss": 0.4735208749771118, "step": 1793 }, { "epoch": 0.4875, "grad_norm": 1.1466915656331431, "learning_rate": 1.801934341366655e-05, "loss": 0.4725959002971649, "step": 1794 }, { "epoch": 0.48777173913043476, "grad_norm": 0.9327844656494809, "learning_rate": 1.8016658356974885e-05, "loss": 0.4677693247795105, "step": 1795 }, { "epoch": 0.4880434782608696, "grad_norm": 1.0017215468188705, "learning_rate": 1.801397168186681e-05, "loss": 0.4925960600376129, "step": 1796 }, { "epoch": 0.48831521739130435, "grad_norm": 1.1156584945067975, "learning_rate": 1.8011283388884703e-05, "loss": 0.5167743563652039, "step": 1797 }, { "epoch": 0.4885869565217391, "grad_norm": 1.0284245054935532, "learning_rate": 1.8008593478571294e-05, "loss": 0.4475448727607727, "step": 1798 }, { "epoch": 0.48885869565217394, "grad_norm": 1.0577770343265316, "learning_rate": 1.800590195146962e-05, "loss": 0.4637291431427002, "step": 1799 }, { "epoch": 0.4891304347826087, "grad_norm": 0.9731777668656757, "learning_rate": 1.800320880812305e-05, "loss": 0.5534663200378418, "step": 1800 }, { "epoch": 0.48940217391304347, "grad_norm": 1.0289924840767704, "learning_rate": 1.8000514049075283e-05, "loss": 0.4489121437072754, "step": 1801 }, { "epoch": 0.48967391304347824, "grad_norm": 1.1314075010920246, "learning_rate": 1.7997817674870345e-05, "loss": 0.593471884727478, "step": 1802 }, { "epoch": 0.48994565217391306, "grad_norm": 0.9754393046963077, "learning_rate": 1.799511968605258e-05, "loss": 0.46663233637809753, "step": 1803 }, { "epoch": 0.4902173913043478, "grad_norm": 0.8200298826120617, "learning_rate": 1.7992420083166657e-05, "loss": 0.39332056045532227, "step": 1804 }, { "epoch": 0.4904891304347826, "grad_norm": 0.8297366002976075, "learning_rate": 1.7989718866757587e-05, "loss": 0.3239939212799072, "step": 1805 }, { "epoch": 0.4907608695652174, "grad_norm": 1.0172942419696076, "learning_rate": 1.7987016037370687e-05, "loss": 0.48374366760253906, "step": 1806 }, { "epoch": 0.4910326086956522, "grad_norm": 0.9349572941334282, "learning_rate": 1.798431159555162e-05, "loss": 0.43499910831451416, "step": 1807 }, { "epoch": 0.49130434782608695, "grad_norm": 0.9894408562671808, "learning_rate": 1.7981605541846354e-05, "loss": 0.4471402168273926, "step": 1808 }, { "epoch": 0.4915760869565217, "grad_norm": 0.9686185131477654, "learning_rate": 1.7978897876801192e-05, "loss": 0.4351581335067749, "step": 1809 }, { "epoch": 0.49184782608695654, "grad_norm": 1.0721998198418239, "learning_rate": 1.7976188600962764e-05, "loss": 0.5895932912826538, "step": 1810 }, { "epoch": 0.4921195652173913, "grad_norm": 0.8150765486336169, "learning_rate": 1.7973477714878027e-05, "loss": 0.40696269273757935, "step": 1811 }, { "epoch": 0.4923913043478261, "grad_norm": 1.2897989313502205, "learning_rate": 1.7970765219094252e-05, "loss": 0.4542059302330017, "step": 1812 }, { "epoch": 0.49266304347826084, "grad_norm": 1.0533917047042771, "learning_rate": 1.7968051114159046e-05, "loss": 0.49763721227645874, "step": 1813 }, { "epoch": 0.49293478260869567, "grad_norm": 1.0566359783181811, "learning_rate": 1.7965335400620338e-05, "loss": 0.5436015129089355, "step": 1814 }, { "epoch": 0.49320652173913043, "grad_norm": 1.1657644512690049, "learning_rate": 1.796261807902638e-05, "loss": 0.4872962236404419, "step": 1815 }, { "epoch": 0.4934782608695652, "grad_norm": 1.0937760481070862, "learning_rate": 1.7959899149925748e-05, "loss": 0.5864538550376892, "step": 1816 }, { "epoch": 0.49375, "grad_norm": 0.9427988356929936, "learning_rate": 1.7957178613867343e-05, "loss": 0.45006459951400757, "step": 1817 }, { "epoch": 0.4940217391304348, "grad_norm": 1.132834767211491, "learning_rate": 1.7954456471400393e-05, "loss": 0.5395575165748596, "step": 1818 }, { "epoch": 0.49429347826086956, "grad_norm": 1.0046933484130944, "learning_rate": 1.7951732723074453e-05, "loss": 0.5213234424591064, "step": 1819 }, { "epoch": 0.4945652173913043, "grad_norm": 1.1395748816984497, "learning_rate": 1.7949007369439393e-05, "loss": 0.6587885618209839, "step": 1820 }, { "epoch": 0.49483695652173915, "grad_norm": 1.191592437505186, "learning_rate": 1.794628041104541e-05, "loss": 0.5423378944396973, "step": 1821 }, { "epoch": 0.4951086956521739, "grad_norm": 0.931061088304379, "learning_rate": 1.794355184844303e-05, "loss": 0.4595940113067627, "step": 1822 }, { "epoch": 0.4953804347826087, "grad_norm": 0.9980344352418246, "learning_rate": 1.79408216821831e-05, "loss": 0.5461511611938477, "step": 1823 }, { "epoch": 0.4956521739130435, "grad_norm": 1.0960536536154712, "learning_rate": 1.7938089912816796e-05, "loss": 0.49992111325263977, "step": 1824 }, { "epoch": 0.49592391304347827, "grad_norm": 1.1158473025157671, "learning_rate": 1.79353565408956e-05, "loss": 0.5325471758842468, "step": 1825 }, { "epoch": 0.49619565217391304, "grad_norm": 0.7641166939851124, "learning_rate": 1.7932621566971335e-05, "loss": 0.36970144510269165, "step": 1826 }, { "epoch": 0.4964673913043478, "grad_norm": 1.010952117533903, "learning_rate": 1.7929884991596144e-05, "loss": 0.44183769822120667, "step": 1827 }, { "epoch": 0.4967391304347826, "grad_norm": 0.9529054983366463, "learning_rate": 1.7927146815322492e-05, "loss": 0.4268456995487213, "step": 1828 }, { "epoch": 0.4970108695652174, "grad_norm": 1.0876884013901975, "learning_rate": 1.792440703870316e-05, "loss": 0.6004570722579956, "step": 1829 }, { "epoch": 0.49728260869565216, "grad_norm": 0.9956663317106877, "learning_rate": 1.792166566229127e-05, "loss": 0.48264193534851074, "step": 1830 }, { "epoch": 0.49755434782608693, "grad_norm": 1.4139412432061114, "learning_rate": 1.7918922686640248e-05, "loss": 0.6081105470657349, "step": 1831 }, { "epoch": 0.49782608695652175, "grad_norm": 1.192529022256738, "learning_rate": 1.791617811230385e-05, "loss": 0.5740145444869995, "step": 1832 }, { "epoch": 0.4980978260869565, "grad_norm": 1.3262239304388719, "learning_rate": 1.791343193983616e-05, "loss": 0.5375788807868958, "step": 1833 }, { "epoch": 0.4983695652173913, "grad_norm": 0.9143635035107681, "learning_rate": 1.7910684169791573e-05, "loss": 0.3718656301498413, "step": 1834 }, { "epoch": 0.4986413043478261, "grad_norm": 1.04422938004375, "learning_rate": 1.790793480272482e-05, "loss": 0.5720640420913696, "step": 1835 }, { "epoch": 0.4989130434782609, "grad_norm": 0.9509812201731016, "learning_rate": 1.7905183839190946e-05, "loss": 0.4930643141269684, "step": 1836 }, { "epoch": 0.49918478260869564, "grad_norm": 0.9799854216946705, "learning_rate": 1.7902431279745317e-05, "loss": 0.4629703164100647, "step": 1837 }, { "epoch": 0.4994565217391304, "grad_norm": 1.0162219651471942, "learning_rate": 1.789967712494363e-05, "loss": 0.43383878469467163, "step": 1838 }, { "epoch": 0.49972826086956523, "grad_norm": 1.1825026328119026, "learning_rate": 1.789692137534189e-05, "loss": 0.5450453758239746, "step": 1839 }, { "epoch": 0.5, "grad_norm": 1.1097441932505803, "learning_rate": 1.7894164031496443e-05, "loss": 0.588183581829071, "step": 1840 }, { "epoch": 0.5002717391304348, "grad_norm": 1.020292239655415, "learning_rate": 1.789140509396394e-05, "loss": 0.3936416506767273, "step": 1841 }, { "epoch": 0.5005434782608695, "grad_norm": 0.9952970512394362, "learning_rate": 1.7888644563301356e-05, "loss": 0.4745519161224365, "step": 1842 }, { "epoch": 0.5008152173913043, "grad_norm": 0.9824934181376992, "learning_rate": 1.7885882440066e-05, "loss": 0.46617239713668823, "step": 1843 }, { "epoch": 0.5010869565217392, "grad_norm": 0.8677306488736711, "learning_rate": 1.788311872481549e-05, "loss": 0.3744199275970459, "step": 1844 }, { "epoch": 0.501358695652174, "grad_norm": 0.9957460872236862, "learning_rate": 1.788035341810777e-05, "loss": 0.4926159083843231, "step": 1845 }, { "epoch": 0.5016304347826087, "grad_norm": 1.11672379372516, "learning_rate": 1.7877586520501104e-05, "loss": 0.5842005014419556, "step": 1846 }, { "epoch": 0.5019021739130435, "grad_norm": 1.0678727383605564, "learning_rate": 1.7874818032554074e-05, "loss": 0.5509307384490967, "step": 1847 }, { "epoch": 0.5021739130434782, "grad_norm": 1.1435386549856197, "learning_rate": 1.7872047954825594e-05, "loss": 0.5461618900299072, "step": 1848 }, { "epoch": 0.502445652173913, "grad_norm": 0.9807647338305983, "learning_rate": 1.7869276287874894e-05, "loss": 0.5429204702377319, "step": 1849 }, { "epoch": 0.5027173913043478, "grad_norm": 0.9421632449758951, "learning_rate": 1.7866503032261514e-05, "loss": 0.45770263671875, "step": 1850 }, { "epoch": 0.5029891304347827, "grad_norm": 1.0153251766793474, "learning_rate": 1.7863728188545326e-05, "loss": 0.5672116875648499, "step": 1851 }, { "epoch": 0.5032608695652174, "grad_norm": 1.057811884093097, "learning_rate": 1.7860951757286523e-05, "loss": 0.3887432813644409, "step": 1852 }, { "epoch": 0.5035326086956522, "grad_norm": 1.0506912013868106, "learning_rate": 1.7858173739045615e-05, "loss": 0.5314965844154358, "step": 1853 }, { "epoch": 0.503804347826087, "grad_norm": 1.0771817727154227, "learning_rate": 1.7855394134383433e-05, "loss": 0.47004544734954834, "step": 1854 }, { "epoch": 0.5040760869565217, "grad_norm": 0.9632254639690975, "learning_rate": 1.7852612943861128e-05, "loss": 0.4530360996723175, "step": 1855 }, { "epoch": 0.5043478260869565, "grad_norm": 0.9305226021118134, "learning_rate": 1.7849830168040165e-05, "loss": 0.3860244154930115, "step": 1856 }, { "epoch": 0.5046195652173913, "grad_norm": 0.8062502360834163, "learning_rate": 1.7847045807482347e-05, "loss": 0.40131914615631104, "step": 1857 }, { "epoch": 0.5048913043478261, "grad_norm": 0.9945602381241139, "learning_rate": 1.7844259862749778e-05, "loss": 0.496623158454895, "step": 1858 }, { "epoch": 0.5051630434782609, "grad_norm": 1.1805340580241521, "learning_rate": 1.7841472334404893e-05, "loss": 0.6650144457817078, "step": 1859 }, { "epoch": 0.5054347826086957, "grad_norm": 1.1191833082049947, "learning_rate": 1.783868322301044e-05, "loss": 0.5996521711349487, "step": 1860 }, { "epoch": 0.5057065217391304, "grad_norm": 1.1385410284140827, "learning_rate": 1.7835892529129486e-05, "loss": 0.5243428945541382, "step": 1861 }, { "epoch": 0.5059782608695652, "grad_norm": 1.1707060631445125, "learning_rate": 1.7833100253325427e-05, "loss": 0.6141012907028198, "step": 1862 }, { "epoch": 0.50625, "grad_norm": 0.8197991782966647, "learning_rate": 1.7830306396161972e-05, "loss": 0.3602915406227112, "step": 1863 }, { "epoch": 0.5065217391304347, "grad_norm": 1.1258510994336213, "learning_rate": 1.7827510958203147e-05, "loss": 0.5845584869384766, "step": 1864 }, { "epoch": 0.5067934782608695, "grad_norm": 0.9902899673379545, "learning_rate": 1.78247139400133e-05, "loss": 0.47288280725479126, "step": 1865 }, { "epoch": 0.5070652173913044, "grad_norm": 1.1467028484474633, "learning_rate": 1.78219153421571e-05, "loss": 0.5282984375953674, "step": 1866 }, { "epoch": 0.5073369565217392, "grad_norm": 1.1965477503136288, "learning_rate": 1.781911516519953e-05, "loss": 0.5665651559829712, "step": 1867 }, { "epoch": 0.5076086956521739, "grad_norm": 1.198637726877144, "learning_rate": 1.7816313409705896e-05, "loss": 0.5733317136764526, "step": 1868 }, { "epoch": 0.5078804347826087, "grad_norm": 0.9444893497951704, "learning_rate": 1.781351007624182e-05, "loss": 0.45581597089767456, "step": 1869 }, { "epoch": 0.5081521739130435, "grad_norm": 1.0066744805999583, "learning_rate": 1.7810705165373245e-05, "loss": 0.44012266397476196, "step": 1870 }, { "epoch": 0.5084239130434782, "grad_norm": 1.1679927646812776, "learning_rate": 1.780789867766643e-05, "loss": 0.5582298040390015, "step": 1871 }, { "epoch": 0.508695652173913, "grad_norm": 0.8168295631221771, "learning_rate": 1.7805090613687953e-05, "loss": 0.4110996723175049, "step": 1872 }, { "epoch": 0.5089673913043479, "grad_norm": 1.1418694227378083, "learning_rate": 1.7802280974004717e-05, "loss": 0.530990719795227, "step": 1873 }, { "epoch": 0.5092391304347826, "grad_norm": 1.0104670744983464, "learning_rate": 1.7799469759183924e-05, "loss": 0.5135195851325989, "step": 1874 }, { "epoch": 0.5095108695652174, "grad_norm": 0.866161102251934, "learning_rate": 1.779665696979312e-05, "loss": 0.3508322834968567, "step": 1875 }, { "epoch": 0.5097826086956522, "grad_norm": 1.0421532233064932, "learning_rate": 1.7793842606400144e-05, "loss": 0.48144620656967163, "step": 1876 }, { "epoch": 0.5100543478260869, "grad_norm": 0.9223057255935282, "learning_rate": 1.779102666957317e-05, "loss": 0.4658224284648895, "step": 1877 }, { "epoch": 0.5103260869565217, "grad_norm": 0.9208339213042614, "learning_rate": 1.7788209159880686e-05, "loss": 0.44257161021232605, "step": 1878 }, { "epoch": 0.5105978260869565, "grad_norm": 1.0993589806655018, "learning_rate": 1.7785390077891493e-05, "loss": 0.4887821674346924, "step": 1879 }, { "epoch": 0.5108695652173914, "grad_norm": 0.9118778231518782, "learning_rate": 1.7782569424174716e-05, "loss": 0.46590960025787354, "step": 1880 }, { "epoch": 0.5111413043478261, "grad_norm": 0.9830670398297952, "learning_rate": 1.7779747199299785e-05, "loss": 0.5388048887252808, "step": 1881 }, { "epoch": 0.5114130434782609, "grad_norm": 0.8620561656679266, "learning_rate": 1.7776923403836465e-05, "loss": 0.4111204147338867, "step": 1882 }, { "epoch": 0.5116847826086957, "grad_norm": 0.9290684258529743, "learning_rate": 1.777409803835482e-05, "loss": 0.44723495841026306, "step": 1883 }, { "epoch": 0.5119565217391304, "grad_norm": 1.0619954038127974, "learning_rate": 1.777127110342524e-05, "loss": 0.48752903938293457, "step": 1884 }, { "epoch": 0.5122282608695652, "grad_norm": 1.1320991205605901, "learning_rate": 1.7768442599618444e-05, "loss": 0.5008751749992371, "step": 1885 }, { "epoch": 0.5125, "grad_norm": 1.0765951828511853, "learning_rate": 1.776561252750544e-05, "loss": 0.5526731610298157, "step": 1886 }, { "epoch": 0.5127717391304348, "grad_norm": 0.8103202984706743, "learning_rate": 1.7762780887657576e-05, "loss": 0.3352450430393219, "step": 1887 }, { "epoch": 0.5130434782608696, "grad_norm": 0.98997981024506, "learning_rate": 1.7759947680646506e-05, "loss": 0.49125486612319946, "step": 1888 }, { "epoch": 0.5133152173913044, "grad_norm": 0.9905349453675801, "learning_rate": 1.77571129070442e-05, "loss": 0.5290700197219849, "step": 1889 }, { "epoch": 0.5135869565217391, "grad_norm": 0.9644664199270804, "learning_rate": 1.775427656742295e-05, "loss": 0.44694259762763977, "step": 1890 }, { "epoch": 0.5138586956521739, "grad_norm": 0.9458461532949844, "learning_rate": 1.775143866235536e-05, "loss": 0.4508589506149292, "step": 1891 }, { "epoch": 0.5141304347826087, "grad_norm": 0.8902069239238803, "learning_rate": 1.7748599192414354e-05, "loss": 0.39378997683525085, "step": 1892 }, { "epoch": 0.5144021739130434, "grad_norm": 1.160129837870234, "learning_rate": 1.7745758158173164e-05, "loss": 0.6077077388763428, "step": 1893 }, { "epoch": 0.5146739130434783, "grad_norm": 0.9644126907261416, "learning_rate": 1.774291556020534e-05, "loss": 0.45039546489715576, "step": 1894 }, { "epoch": 0.5149456521739131, "grad_norm": 1.185937671261707, "learning_rate": 1.774007139908476e-05, "loss": 0.5715851783752441, "step": 1895 }, { "epoch": 0.5152173913043478, "grad_norm": 1.303338771121637, "learning_rate": 1.7737225675385605e-05, "loss": 0.4706299304962158, "step": 1896 }, { "epoch": 0.5154891304347826, "grad_norm": 1.050654464876797, "learning_rate": 1.7734378389682373e-05, "loss": 0.49211132526397705, "step": 1897 }, { "epoch": 0.5157608695652174, "grad_norm": 1.0665286148742237, "learning_rate": 1.7731529542549875e-05, "loss": 0.5127550959587097, "step": 1898 }, { "epoch": 0.5160326086956522, "grad_norm": 0.9961485623688807, "learning_rate": 1.7728679134563247e-05, "loss": 0.5580568313598633, "step": 1899 }, { "epoch": 0.5163043478260869, "grad_norm": 1.1023021189288267, "learning_rate": 1.7725827166297934e-05, "loss": 0.5808174014091492, "step": 1900 }, { "epoch": 0.5165760869565217, "grad_norm": 0.8895496448968061, "learning_rate": 1.772297363832969e-05, "loss": 0.40757283568382263, "step": 1901 }, { "epoch": 0.5168478260869566, "grad_norm": 0.8789806733350173, "learning_rate": 1.7720118551234594e-05, "loss": 0.4116929769515991, "step": 1902 }, { "epoch": 0.5171195652173913, "grad_norm": 1.0496861168876828, "learning_rate": 1.7717261905589037e-05, "loss": 0.4752958118915558, "step": 1903 }, { "epoch": 0.5173913043478261, "grad_norm": 0.9999583484210472, "learning_rate": 1.7714403701969725e-05, "loss": 0.5765130519866943, "step": 1904 }, { "epoch": 0.5176630434782609, "grad_norm": 0.9717912383837903, "learning_rate": 1.7711543940953667e-05, "loss": 0.5097658634185791, "step": 1905 }, { "epoch": 0.5179347826086956, "grad_norm": 1.0178655190653603, "learning_rate": 1.7708682623118208e-05, "loss": 0.4916210174560547, "step": 1906 }, { "epoch": 0.5182065217391304, "grad_norm": 1.0152280583422186, "learning_rate": 1.7705819749040994e-05, "loss": 0.5650579333305359, "step": 1907 }, { "epoch": 0.5184782608695652, "grad_norm": 1.1530462040628415, "learning_rate": 1.770295531929998e-05, "loss": 0.5175379514694214, "step": 1908 }, { "epoch": 0.51875, "grad_norm": 0.997262039508151, "learning_rate": 1.7700089334473446e-05, "loss": 0.49098724126815796, "step": 1909 }, { "epoch": 0.5190217391304348, "grad_norm": 1.060581859698459, "learning_rate": 1.769722179513998e-05, "loss": 0.5777202844619751, "step": 1910 }, { "epoch": 0.5192934782608696, "grad_norm": 0.8272810768990269, "learning_rate": 1.7694352701878486e-05, "loss": 0.36735057830810547, "step": 1911 }, { "epoch": 0.5195652173913043, "grad_norm": 1.1292224166995988, "learning_rate": 1.7691482055268187e-05, "loss": 0.5889319181442261, "step": 1912 }, { "epoch": 0.5198369565217391, "grad_norm": 0.8848817497723904, "learning_rate": 1.768860985588861e-05, "loss": 0.46274101734161377, "step": 1913 }, { "epoch": 0.5201086956521739, "grad_norm": 1.0279163696020197, "learning_rate": 1.7685736104319596e-05, "loss": 0.5064065456390381, "step": 1914 }, { "epoch": 0.5203804347826086, "grad_norm": 0.8449750832513038, "learning_rate": 1.7682860801141306e-05, "loss": 0.4460434019565582, "step": 1915 }, { "epoch": 0.5206521739130435, "grad_norm": 1.0536085401182689, "learning_rate": 1.7679983946934212e-05, "loss": 0.5264816284179688, "step": 1916 }, { "epoch": 0.5209239130434783, "grad_norm": 1.0126658181948787, "learning_rate": 1.7677105542279093e-05, "loss": 0.4454791247844696, "step": 1917 }, { "epoch": 0.5211956521739131, "grad_norm": 0.9069844696447322, "learning_rate": 1.7674225587757056e-05, "loss": 0.3960435390472412, "step": 1918 }, { "epoch": 0.5214673913043478, "grad_norm": 0.9529637839909328, "learning_rate": 1.76713440839495e-05, "loss": 0.501092791557312, "step": 1919 }, { "epoch": 0.5217391304347826, "grad_norm": 1.0350245446850794, "learning_rate": 1.7668461031438155e-05, "loss": 0.5366734266281128, "step": 1920 }, { "epoch": 0.5220108695652174, "grad_norm": 1.2190089511211954, "learning_rate": 1.7665576430805053e-05, "loss": 0.5114688873291016, "step": 1921 }, { "epoch": 0.5222826086956521, "grad_norm": 1.0101123755370878, "learning_rate": 1.7662690282632545e-05, "loss": 0.5197491645812988, "step": 1922 }, { "epoch": 0.522554347826087, "grad_norm": 0.9558483330539101, "learning_rate": 1.7659802587503288e-05, "loss": 0.5403226613998413, "step": 1923 }, { "epoch": 0.5228260869565218, "grad_norm": 1.0106805339478848, "learning_rate": 1.765691334600026e-05, "loss": 0.43248310685157776, "step": 1924 }, { "epoch": 0.5230978260869565, "grad_norm": 1.1722461410717935, "learning_rate": 1.7654022558706738e-05, "loss": 0.58132004737854, "step": 1925 }, { "epoch": 0.5233695652173913, "grad_norm": 1.1004872851504612, "learning_rate": 1.7651130226206325e-05, "loss": 0.5108939409255981, "step": 1926 }, { "epoch": 0.5236413043478261, "grad_norm": 0.848098759574428, "learning_rate": 1.7648236349082928e-05, "loss": 0.4536004662513733, "step": 1927 }, { "epoch": 0.5239130434782608, "grad_norm": 0.9596180873729623, "learning_rate": 1.764534092792077e-05, "loss": 0.49186062812805176, "step": 1928 }, { "epoch": 0.5241847826086956, "grad_norm": 1.288318700220512, "learning_rate": 1.7642443963304377e-05, "loss": 0.4756736755371094, "step": 1929 }, { "epoch": 0.5244565217391305, "grad_norm": 1.1831712224483584, "learning_rate": 1.7639545455818603e-05, "loss": 0.4978850185871124, "step": 1930 }, { "epoch": 0.5247282608695653, "grad_norm": 0.8540148543245291, "learning_rate": 1.7636645406048597e-05, "loss": 0.42444688081741333, "step": 1931 }, { "epoch": 0.525, "grad_norm": 1.0881542278920093, "learning_rate": 1.7633743814579823e-05, "loss": 0.6326345205307007, "step": 1932 }, { "epoch": 0.5252717391304348, "grad_norm": 0.9719988814784722, "learning_rate": 1.7630840681998068e-05, "loss": 0.4808548390865326, "step": 1933 }, { "epoch": 0.5255434782608696, "grad_norm": 1.1381443012540953, "learning_rate": 1.7627936008889414e-05, "loss": 0.5062639117240906, "step": 1934 }, { "epoch": 0.5258152173913043, "grad_norm": 0.8909954822904337, "learning_rate": 1.7625029795840262e-05, "loss": 0.44700556993484497, "step": 1935 }, { "epoch": 0.5260869565217391, "grad_norm": 1.0945427262921548, "learning_rate": 1.762212204343733e-05, "loss": 0.502819299697876, "step": 1936 }, { "epoch": 0.5263586956521739, "grad_norm": 0.9751008954591841, "learning_rate": 1.7619212752267628e-05, "loss": 0.5146320462226868, "step": 1937 }, { "epoch": 0.5266304347826087, "grad_norm": 1.0219921629235713, "learning_rate": 1.7616301922918502e-05, "loss": 0.504403829574585, "step": 1938 }, { "epoch": 0.5269021739130435, "grad_norm": 0.968678064399498, "learning_rate": 1.7613389555977586e-05, "loss": 0.48484551906585693, "step": 1939 }, { "epoch": 0.5271739130434783, "grad_norm": 1.0881111924081444, "learning_rate": 1.7610475652032836e-05, "loss": 0.545257568359375, "step": 1940 }, { "epoch": 0.527445652173913, "grad_norm": 0.953914209197749, "learning_rate": 1.7607560211672515e-05, "loss": 0.4700406789779663, "step": 1941 }, { "epoch": 0.5277173913043478, "grad_norm": 1.1143737117138095, "learning_rate": 1.7604643235485197e-05, "loss": 0.45239564776420593, "step": 1942 }, { "epoch": 0.5279891304347826, "grad_norm": 1.1166891983232683, "learning_rate": 1.760172472405977e-05, "loss": 0.4947739839553833, "step": 1943 }, { "epoch": 0.5282608695652173, "grad_norm": 0.9455620793733664, "learning_rate": 1.759880467798542e-05, "loss": 0.38552427291870117, "step": 1944 }, { "epoch": 0.5285326086956522, "grad_norm": 1.1877742814537946, "learning_rate": 1.759588309785166e-05, "loss": 0.5822232961654663, "step": 1945 }, { "epoch": 0.528804347826087, "grad_norm": 1.1615793316925764, "learning_rate": 1.75929599842483e-05, "loss": 0.4886865019798279, "step": 1946 }, { "epoch": 0.5290760869565218, "grad_norm": 0.95685606885347, "learning_rate": 1.7590035337765455e-05, "loss": 0.39508286118507385, "step": 1947 }, { "epoch": 0.5293478260869565, "grad_norm": 0.9949997147038939, "learning_rate": 1.758710915899357e-05, "loss": 0.5141222476959229, "step": 1948 }, { "epoch": 0.5296195652173913, "grad_norm": 0.9825548689092536, "learning_rate": 1.758418144852338e-05, "loss": 0.4040827751159668, "step": 1949 }, { "epoch": 0.529891304347826, "grad_norm": 1.0000004142656962, "learning_rate": 1.758125220694594e-05, "loss": 0.5446081161499023, "step": 1950 }, { "epoch": 0.5301630434782608, "grad_norm": 1.2174979484812907, "learning_rate": 1.757832143485261e-05, "loss": 0.5564822554588318, "step": 1951 }, { "epoch": 0.5304347826086957, "grad_norm": 0.9619437908824414, "learning_rate": 1.7575389132835052e-05, "loss": 0.4306764006614685, "step": 1952 }, { "epoch": 0.5307065217391305, "grad_norm": 1.044124458686629, "learning_rate": 1.757245530148525e-05, "loss": 0.5215844511985779, "step": 1953 }, { "epoch": 0.5309782608695652, "grad_norm": 1.0870320724380649, "learning_rate": 1.756951994139549e-05, "loss": 0.48044389486312866, "step": 1954 }, { "epoch": 0.53125, "grad_norm": 1.1345166734764818, "learning_rate": 1.7566583053158367e-05, "loss": 0.5814797878265381, "step": 1955 }, { "epoch": 0.5315217391304348, "grad_norm": 0.8933875732046223, "learning_rate": 1.7563644637366786e-05, "loss": 0.4467318654060364, "step": 1956 }, { "epoch": 0.5317934782608695, "grad_norm": 0.8942908660981637, "learning_rate": 1.756070469461396e-05, "loss": 0.4398624300956726, "step": 1957 }, { "epoch": 0.5320652173913043, "grad_norm": 1.1425709794818726, "learning_rate": 1.7557763225493407e-05, "loss": 0.6090289354324341, "step": 1958 }, { "epoch": 0.5323369565217392, "grad_norm": 1.0243692171030687, "learning_rate": 1.7554820230598958e-05, "loss": 0.4468216300010681, "step": 1959 }, { "epoch": 0.532608695652174, "grad_norm": 0.7991343968940934, "learning_rate": 1.7551875710524743e-05, "loss": 0.3407285809516907, "step": 1960 }, { "epoch": 0.5328804347826087, "grad_norm": 0.8781935432093582, "learning_rate": 1.7548929665865217e-05, "loss": 0.5018380880355835, "step": 1961 }, { "epoch": 0.5331521739130435, "grad_norm": 1.3414358898904908, "learning_rate": 1.7545982097215125e-05, "loss": 0.6236938238143921, "step": 1962 }, { "epoch": 0.5334239130434782, "grad_norm": 0.9362420054318938, "learning_rate": 1.7543033005169532e-05, "loss": 0.4408566951751709, "step": 1963 }, { "epoch": 0.533695652173913, "grad_norm": 1.0008263833682776, "learning_rate": 1.7540082390323803e-05, "loss": 0.5022101402282715, "step": 1964 }, { "epoch": 0.5339673913043478, "grad_norm": 1.1913136187882922, "learning_rate": 1.7537130253273613e-05, "loss": 0.6234031319618225, "step": 1965 }, { "epoch": 0.5342391304347827, "grad_norm": 1.0155680681412478, "learning_rate": 1.7534176594614944e-05, "loss": 0.5334949493408203, "step": 1966 }, { "epoch": 0.5345108695652174, "grad_norm": 1.1460929585854762, "learning_rate": 1.7531221414944087e-05, "loss": 0.5200693607330322, "step": 1967 }, { "epoch": 0.5347826086956522, "grad_norm": 0.9984914303010024, "learning_rate": 1.7528264714857638e-05, "loss": 0.4669889211654663, "step": 1968 }, { "epoch": 0.535054347826087, "grad_norm": 1.1403926772848598, "learning_rate": 1.7525306494952498e-05, "loss": 0.5228513479232788, "step": 1969 }, { "epoch": 0.5353260869565217, "grad_norm": 1.0549737500104706, "learning_rate": 1.7522346755825882e-05, "loss": 0.47798436880111694, "step": 1970 }, { "epoch": 0.5355978260869565, "grad_norm": 1.1737085564714567, "learning_rate": 1.7519385498075308e-05, "loss": 0.5834509134292603, "step": 1971 }, { "epoch": 0.5358695652173913, "grad_norm": 1.0916198955249874, "learning_rate": 1.751642272229859e-05, "loss": 0.5623019933700562, "step": 1972 }, { "epoch": 0.5361413043478261, "grad_norm": 1.1021911341239923, "learning_rate": 1.751345842909387e-05, "loss": 0.5415070056915283, "step": 1973 }, { "epoch": 0.5364130434782609, "grad_norm": 0.8108045325736832, "learning_rate": 1.7510492619059582e-05, "loss": 0.3493044376373291, "step": 1974 }, { "epoch": 0.5366847826086957, "grad_norm": 1.042865529569911, "learning_rate": 1.7507525292794458e-05, "loss": 0.521115243434906, "step": 1975 }, { "epoch": 0.5369565217391304, "grad_norm": 1.077070083942969, "learning_rate": 1.7504556450897557e-05, "loss": 0.48638296127319336, "step": 1976 }, { "epoch": 0.5372282608695652, "grad_norm": 1.0194286183961556, "learning_rate": 1.7501586093968236e-05, "loss": 0.5390794277191162, "step": 1977 }, { "epoch": 0.5375, "grad_norm": 0.9716794167111371, "learning_rate": 1.7498614222606148e-05, "loss": 0.481827050447464, "step": 1978 }, { "epoch": 0.5377717391304347, "grad_norm": 0.9934622309369606, "learning_rate": 1.7495640837411265e-05, "loss": 0.4803565442562103, "step": 1979 }, { "epoch": 0.5380434782608695, "grad_norm": 1.0508765650805703, "learning_rate": 1.749266593898385e-05, "loss": 0.49441707134246826, "step": 1980 }, { "epoch": 0.5383152173913044, "grad_norm": 1.1039564771756967, "learning_rate": 1.7489689527924492e-05, "loss": 0.6023865342140198, "step": 1981 }, { "epoch": 0.5385869565217392, "grad_norm": 1.0813391264218337, "learning_rate": 1.7486711604834067e-05, "loss": 0.5094078779220581, "step": 1982 }, { "epoch": 0.5388586956521739, "grad_norm": 0.9698990192969552, "learning_rate": 1.7483732170313766e-05, "loss": 0.41226381063461304, "step": 1983 }, { "epoch": 0.5391304347826087, "grad_norm": 1.0534343152849137, "learning_rate": 1.7480751224965083e-05, "loss": 0.5224676132202148, "step": 1984 }, { "epoch": 0.5394021739130435, "grad_norm": 1.0390416682934231, "learning_rate": 1.747776876938981e-05, "loss": 0.5851411819458008, "step": 1985 }, { "epoch": 0.5396739130434782, "grad_norm": 0.9727241195635113, "learning_rate": 1.747478480419006e-05, "loss": 0.5014418363571167, "step": 1986 }, { "epoch": 0.539945652173913, "grad_norm": 0.8771711131101365, "learning_rate": 1.7471799329968232e-05, "loss": 0.36651328206062317, "step": 1987 }, { "epoch": 0.5402173913043479, "grad_norm": 1.0781496326117224, "learning_rate": 1.7468812347327043e-05, "loss": 0.4629722833633423, "step": 1988 }, { "epoch": 0.5404891304347826, "grad_norm": 0.938782957063364, "learning_rate": 1.7465823856869505e-05, "loss": 0.5085287094116211, "step": 1989 }, { "epoch": 0.5407608695652174, "grad_norm": 1.0029791257457585, "learning_rate": 1.7462833859198948e-05, "loss": 0.4291406571865082, "step": 1990 }, { "epoch": 0.5410326086956522, "grad_norm": 0.9006397097349359, "learning_rate": 1.7459842354918995e-05, "loss": 0.40664517879486084, "step": 1991 }, { "epoch": 0.5413043478260869, "grad_norm": 1.081804062763705, "learning_rate": 1.745684934463357e-05, "loss": 0.5256220698356628, "step": 1992 }, { "epoch": 0.5415760869565217, "grad_norm": 0.9945313763418377, "learning_rate": 1.7453854828946913e-05, "loss": 0.5021637082099915, "step": 1993 }, { "epoch": 0.5418478260869565, "grad_norm": 1.0603071584907657, "learning_rate": 1.7450858808463557e-05, "loss": 0.5627850294113159, "step": 1994 }, { "epoch": 0.5421195652173914, "grad_norm": 1.0038714605033296, "learning_rate": 1.7447861283788346e-05, "loss": 0.5032327175140381, "step": 1995 }, { "epoch": 0.5423913043478261, "grad_norm": 1.0189691155439995, "learning_rate": 1.744486225552643e-05, "loss": 0.4695398211479187, "step": 1996 }, { "epoch": 0.5426630434782609, "grad_norm": 0.9433616387851186, "learning_rate": 1.7441861724283253e-05, "loss": 0.48999500274658203, "step": 1997 }, { "epoch": 0.5429347826086957, "grad_norm": 1.1164010326713505, "learning_rate": 1.7438859690664568e-05, "loss": 0.6341030597686768, "step": 1998 }, { "epoch": 0.5432065217391304, "grad_norm": 0.9899981682974374, "learning_rate": 1.7435856155276426e-05, "loss": 0.5045713782310486, "step": 1999 }, { "epoch": 0.5434782608695652, "grad_norm": 0.8786738109555827, "learning_rate": 1.7432851118725196e-05, "loss": 0.3729885518550873, "step": 2000 }, { "epoch": 0.54375, "grad_norm": 0.8958080982371445, "learning_rate": 1.7429844581617532e-05, "loss": 0.4573911130428314, "step": 2001 }, { "epoch": 0.5440217391304348, "grad_norm": 1.0626080382047571, "learning_rate": 1.74268365445604e-05, "loss": 0.5641865730285645, "step": 2002 }, { "epoch": 0.5442934782608696, "grad_norm": 1.0357050367697265, "learning_rate": 1.742382700816107e-05, "loss": 0.5063352584838867, "step": 2003 }, { "epoch": 0.5445652173913044, "grad_norm": 0.9962703501501653, "learning_rate": 1.742081597302711e-05, "loss": 0.4806320071220398, "step": 2004 }, { "epoch": 0.5448369565217391, "grad_norm": 0.9568194228814704, "learning_rate": 1.7417803439766394e-05, "loss": 0.3374062180519104, "step": 2005 }, { "epoch": 0.5451086956521739, "grad_norm": 1.154127363974273, "learning_rate": 1.7414789408987095e-05, "loss": 0.591683030128479, "step": 2006 }, { "epoch": 0.5453804347826087, "grad_norm": 1.1318706518180985, "learning_rate": 1.741177388129769e-05, "loss": 0.5229567289352417, "step": 2007 }, { "epoch": 0.5456521739130434, "grad_norm": 1.1604346220216566, "learning_rate": 1.7408756857306967e-05, "loss": 0.6421641707420349, "step": 2008 }, { "epoch": 0.5459239130434783, "grad_norm": 1.0507798212659596, "learning_rate": 1.7405738337623998e-05, "loss": 0.5450689792633057, "step": 2009 }, { "epoch": 0.5461956521739131, "grad_norm": 1.0162602767971738, "learning_rate": 1.7402718322858173e-05, "loss": 0.4873335659503937, "step": 2010 }, { "epoch": 0.5464673913043478, "grad_norm": 1.019255362376881, "learning_rate": 1.7399696813619172e-05, "loss": 0.5065451264381409, "step": 2011 }, { "epoch": 0.5467391304347826, "grad_norm": 1.0686672639404642, "learning_rate": 1.7396673810516986e-05, "loss": 0.5330283045768738, "step": 2012 }, { "epoch": 0.5470108695652174, "grad_norm": 1.001764399454539, "learning_rate": 1.7393649314161908e-05, "loss": 0.4571048319339752, "step": 2013 }, { "epoch": 0.5472826086956522, "grad_norm": 1.0676406552367386, "learning_rate": 1.7390623325164522e-05, "loss": 0.49719688296318054, "step": 2014 }, { "epoch": 0.5475543478260869, "grad_norm": 0.9646595174474273, "learning_rate": 1.738759584413572e-05, "loss": 0.4858357310295105, "step": 2015 }, { "epoch": 0.5478260869565217, "grad_norm": 1.0822147668458373, "learning_rate": 1.7384566871686698e-05, "loss": 0.6245957612991333, "step": 2016 }, { "epoch": 0.5480978260869566, "grad_norm": 0.9455300264331141, "learning_rate": 1.7381536408428948e-05, "loss": 0.5082535147666931, "step": 2017 }, { "epoch": 0.5483695652173913, "grad_norm": 1.0316524393931263, "learning_rate": 1.7378504454974268e-05, "loss": 0.4660283029079437, "step": 2018 }, { "epoch": 0.5486413043478261, "grad_norm": 1.024984925068546, "learning_rate": 1.737547101193475e-05, "loss": 0.5274900197982788, "step": 2019 }, { "epoch": 0.5489130434782609, "grad_norm": 1.0476438347476134, "learning_rate": 1.737243607992279e-05, "loss": 0.5224243998527527, "step": 2020 }, { "epoch": 0.5491847826086956, "grad_norm": 0.8518160217839083, "learning_rate": 1.736939965955109e-05, "loss": 0.3856945037841797, "step": 2021 }, { "epoch": 0.5494565217391304, "grad_norm": 0.927986011974281, "learning_rate": 1.7366361751432645e-05, "loss": 0.45717132091522217, "step": 2022 }, { "epoch": 0.5497282608695652, "grad_norm": 0.9863165852844045, "learning_rate": 1.7363322356180753e-05, "loss": 0.44458627700805664, "step": 2023 }, { "epoch": 0.55, "grad_norm": 0.94438526156637, "learning_rate": 1.736028147440901e-05, "loss": 0.4485102891921997, "step": 2024 }, { "epoch": 0.5502717391304348, "grad_norm": 0.8689487018841433, "learning_rate": 1.735723910673132e-05, "loss": 0.4455784559249878, "step": 2025 }, { "epoch": 0.5505434782608696, "grad_norm": 0.9891808232223679, "learning_rate": 1.7354195253761873e-05, "loss": 0.37452012300491333, "step": 2026 }, { "epoch": 0.5508152173913043, "grad_norm": 1.0404281789587588, "learning_rate": 1.7351149916115174e-05, "loss": 0.5741654634475708, "step": 2027 }, { "epoch": 0.5510869565217391, "grad_norm": 0.9806896325761849, "learning_rate": 1.734810309440602e-05, "loss": 0.5011678338050842, "step": 2028 }, { "epoch": 0.5513586956521739, "grad_norm": 1.2773003467596393, "learning_rate": 1.7345054789249503e-05, "loss": 0.5816105008125305, "step": 2029 }, { "epoch": 0.5516304347826086, "grad_norm": 0.9736210008944807, "learning_rate": 1.734200500126103e-05, "loss": 0.45839741826057434, "step": 2030 }, { "epoch": 0.5519021739130435, "grad_norm": 0.8029179068926637, "learning_rate": 1.7338953731056287e-05, "loss": 0.37550538778305054, "step": 2031 }, { "epoch": 0.5521739130434783, "grad_norm": 1.0436370575349783, "learning_rate": 1.7335900979251278e-05, "loss": 0.5554155111312866, "step": 2032 }, { "epoch": 0.5524456521739131, "grad_norm": 1.059026255109778, "learning_rate": 1.733284674646229e-05, "loss": 0.47737669944763184, "step": 2033 }, { "epoch": 0.5527173913043478, "grad_norm": 1.2080698117726076, "learning_rate": 1.732979103330592e-05, "loss": 0.5606265664100647, "step": 2034 }, { "epoch": 0.5529891304347826, "grad_norm": 0.9741771004502443, "learning_rate": 1.732673384039906e-05, "loss": 0.5166712999343872, "step": 2035 }, { "epoch": 0.5532608695652174, "grad_norm": 1.1091293029842726, "learning_rate": 1.7323675168358908e-05, "loss": 0.5694420337677002, "step": 2036 }, { "epoch": 0.5535326086956521, "grad_norm": 0.9138175705947625, "learning_rate": 1.732061501780294e-05, "loss": 0.44355493783950806, "step": 2037 }, { "epoch": 0.553804347826087, "grad_norm": 1.2347400383716922, "learning_rate": 1.7317553389348957e-05, "loss": 0.618192195892334, "step": 2038 }, { "epoch": 0.5540760869565218, "grad_norm": 0.9987516821094503, "learning_rate": 1.731449028361504e-05, "loss": 0.5316530466079712, "step": 2039 }, { "epoch": 0.5543478260869565, "grad_norm": 1.0447493246748816, "learning_rate": 1.7311425701219576e-05, "loss": 0.5520852208137512, "step": 2040 }, { "epoch": 0.5546195652173913, "grad_norm": 1.008088502475247, "learning_rate": 1.730835964278124e-05, "loss": 0.5700333118438721, "step": 2041 }, { "epoch": 0.5548913043478261, "grad_norm": 1.0110445129155694, "learning_rate": 1.7305292108919028e-05, "loss": 0.5013723373413086, "step": 2042 }, { "epoch": 0.5551630434782608, "grad_norm": 0.976825607210512, "learning_rate": 1.7302223100252205e-05, "loss": 0.4489598870277405, "step": 2043 }, { "epoch": 0.5554347826086956, "grad_norm": 0.8888871636180213, "learning_rate": 1.7299152617400358e-05, "loss": 0.40502047538757324, "step": 2044 }, { "epoch": 0.5557065217391305, "grad_norm": 1.1081906312060703, "learning_rate": 1.7296080660983354e-05, "loss": 0.5874089002609253, "step": 2045 }, { "epoch": 0.5559782608695653, "grad_norm": 1.1529207448001142, "learning_rate": 1.7293007231621367e-05, "loss": 0.5897048711776733, "step": 2046 }, { "epoch": 0.55625, "grad_norm": 0.8094147774232259, "learning_rate": 1.7289932329934864e-05, "loss": 0.3657919466495514, "step": 2047 }, { "epoch": 0.5565217391304348, "grad_norm": 0.8568341638889709, "learning_rate": 1.7286855956544616e-05, "loss": 0.3737264573574066, "step": 2048 }, { "epoch": 0.5567934782608696, "grad_norm": 0.9903064600956678, "learning_rate": 1.7283778112071683e-05, "loss": 0.49620604515075684, "step": 2049 }, { "epoch": 0.5570652173913043, "grad_norm": 1.0274651512393282, "learning_rate": 1.7280698797137428e-05, "loss": 0.5284406542778015, "step": 2050 }, { "epoch": 0.5573369565217391, "grad_norm": 0.9938135756118446, "learning_rate": 1.7277618012363508e-05, "loss": 0.43831929564476013, "step": 2051 }, { "epoch": 0.5576086956521739, "grad_norm": 1.0329362255880157, "learning_rate": 1.7274535758371872e-05, "loss": 0.49920719861984253, "step": 2052 }, { "epoch": 0.5578804347826087, "grad_norm": 1.1467224697058578, "learning_rate": 1.7271452035784775e-05, "loss": 0.5190244913101196, "step": 2053 }, { "epoch": 0.5581521739130435, "grad_norm": 0.9874526061518557, "learning_rate": 1.726836684522476e-05, "loss": 0.46288973093032837, "step": 2054 }, { "epoch": 0.5584239130434783, "grad_norm": 1.2200032784672314, "learning_rate": 1.726528018731468e-05, "loss": 0.5717484354972839, "step": 2055 }, { "epoch": 0.558695652173913, "grad_norm": 0.9884857839545116, "learning_rate": 1.7262192062677664e-05, "loss": 0.453487753868103, "step": 2056 }, { "epoch": 0.5589673913043478, "grad_norm": 1.1271982135851228, "learning_rate": 1.7259102471937154e-05, "loss": 0.5809906721115112, "step": 2057 }, { "epoch": 0.5592391304347826, "grad_norm": 0.967327247758825, "learning_rate": 1.725601141571688e-05, "loss": 0.4843359589576721, "step": 2058 }, { "epoch": 0.5595108695652173, "grad_norm": 1.1267064364171093, "learning_rate": 1.725291889464087e-05, "loss": 0.6111929416656494, "step": 2059 }, { "epoch": 0.5597826086956522, "grad_norm": 0.8193289657705833, "learning_rate": 1.7249824909333445e-05, "loss": 0.3556232452392578, "step": 2060 }, { "epoch": 0.560054347826087, "grad_norm": 1.0110554852127813, "learning_rate": 1.7246729460419224e-05, "loss": 0.5558586716651917, "step": 2061 }, { "epoch": 0.5603260869565218, "grad_norm": 0.7764535716079347, "learning_rate": 1.7243632548523127e-05, "loss": 0.3036763072013855, "step": 2062 }, { "epoch": 0.5605978260869565, "grad_norm": 1.028905859828624, "learning_rate": 1.724053417427036e-05, "loss": 0.44752269983291626, "step": 2063 }, { "epoch": 0.5608695652173913, "grad_norm": 1.018426144847798, "learning_rate": 1.7237434338286424e-05, "loss": 0.5013039112091064, "step": 2064 }, { "epoch": 0.561141304347826, "grad_norm": 1.056173884674487, "learning_rate": 1.7234333041197127e-05, "loss": 0.56648850440979, "step": 2065 }, { "epoch": 0.5614130434782608, "grad_norm": 1.0588325140380992, "learning_rate": 1.7231230283628556e-05, "loss": 0.49254870414733887, "step": 2066 }, { "epoch": 0.5616847826086957, "grad_norm": 0.981043113612665, "learning_rate": 1.722812606620711e-05, "loss": 0.5216925144195557, "step": 2067 }, { "epoch": 0.5619565217391305, "grad_norm": 1.1040408097464962, "learning_rate": 1.7225020389559465e-05, "loss": 0.5104339718818665, "step": 2068 }, { "epoch": 0.5622282608695652, "grad_norm": 0.9097233556405948, "learning_rate": 1.7221913254312604e-05, "loss": 0.5011395215988159, "step": 2069 }, { "epoch": 0.5625, "grad_norm": 0.9161913439575717, "learning_rate": 1.7218804661093802e-05, "loss": 0.4660103917121887, "step": 2070 }, { "epoch": 0.5627717391304348, "grad_norm": 1.0598145398811447, "learning_rate": 1.7215694610530624e-05, "loss": 0.5124689340591431, "step": 2071 }, { "epoch": 0.5630434782608695, "grad_norm": 0.9455753719815684, "learning_rate": 1.7212583103250933e-05, "loss": 0.4341259002685547, "step": 2072 }, { "epoch": 0.5633152173913043, "grad_norm": 1.1100583239028996, "learning_rate": 1.7209470139882887e-05, "loss": 0.6123274564743042, "step": 2073 }, { "epoch": 0.5635869565217392, "grad_norm": 0.9783730001213854, "learning_rate": 1.7206355721054935e-05, "loss": 0.483279824256897, "step": 2074 }, { "epoch": 0.563858695652174, "grad_norm": 0.9867407523378701, "learning_rate": 1.720323984739582e-05, "loss": 0.4612887501716614, "step": 2075 }, { "epoch": 0.5641304347826087, "grad_norm": 0.8402594257412562, "learning_rate": 1.7200122519534582e-05, "loss": 0.4034051299095154, "step": 2076 }, { "epoch": 0.5644021739130435, "grad_norm": 0.9272801633308418, "learning_rate": 1.7197003738100556e-05, "loss": 0.4346516728401184, "step": 2077 }, { "epoch": 0.5646739130434782, "grad_norm": 1.0004554856833194, "learning_rate": 1.7193883503723357e-05, "loss": 0.5187975168228149, "step": 2078 }, { "epoch": 0.564945652173913, "grad_norm": 1.056456006587825, "learning_rate": 1.719076181703291e-05, "loss": 0.4562011957168579, "step": 2079 }, { "epoch": 0.5652173913043478, "grad_norm": 0.946534249178087, "learning_rate": 1.7187638678659425e-05, "loss": 0.39002710580825806, "step": 2080 }, { "epoch": 0.5654891304347827, "grad_norm": 1.108627198203264, "learning_rate": 1.7184514089233403e-05, "loss": 0.5698450803756714, "step": 2081 }, { "epoch": 0.5657608695652174, "grad_norm": 1.0948638943291589, "learning_rate": 1.7181388049385648e-05, "loss": 0.5725702047348022, "step": 2082 }, { "epoch": 0.5660326086956522, "grad_norm": 0.9223116176228494, "learning_rate": 1.7178260559747247e-05, "loss": 0.4166659116744995, "step": 2083 }, { "epoch": 0.566304347826087, "grad_norm": 0.9779570723015378, "learning_rate": 1.717513162094958e-05, "loss": 0.49444150924682617, "step": 2084 }, { "epoch": 0.5665760869565217, "grad_norm": 0.9204724120332015, "learning_rate": 1.7172001233624325e-05, "loss": 0.41680631041526794, "step": 2085 }, { "epoch": 0.5668478260869565, "grad_norm": 0.808972115781435, "learning_rate": 1.716886939840345e-05, "loss": 0.40364760160446167, "step": 2086 }, { "epoch": 0.5671195652173913, "grad_norm": 1.0645128736091856, "learning_rate": 1.7165736115919216e-05, "loss": 0.5551884174346924, "step": 2087 }, { "epoch": 0.5673913043478261, "grad_norm": 1.107922055179677, "learning_rate": 1.7162601386804172e-05, "loss": 0.4754486083984375, "step": 2088 }, { "epoch": 0.5676630434782609, "grad_norm": 1.046526438496434, "learning_rate": 1.7159465211691167e-05, "loss": 0.5002586245536804, "step": 2089 }, { "epoch": 0.5679347826086957, "grad_norm": 1.0038918779924721, "learning_rate": 1.7156327591213335e-05, "loss": 0.5054787993431091, "step": 2090 }, { "epoch": 0.5682065217391304, "grad_norm": 0.9304754377490548, "learning_rate": 1.7153188526004103e-05, "loss": 0.43893373012542725, "step": 2091 }, { "epoch": 0.5684782608695652, "grad_norm": 1.0665518248848422, "learning_rate": 1.7150048016697193e-05, "loss": 0.5592573881149292, "step": 2092 }, { "epoch": 0.56875, "grad_norm": 1.131495755211385, "learning_rate": 1.7146906063926613e-05, "loss": 0.5436713695526123, "step": 2093 }, { "epoch": 0.5690217391304347, "grad_norm": 0.9931484937839079, "learning_rate": 1.7143762668326667e-05, "loss": 0.5081691145896912, "step": 2094 }, { "epoch": 0.5692934782608695, "grad_norm": 0.8787352608466406, "learning_rate": 1.714061783053195e-05, "loss": 0.4250699281692505, "step": 2095 }, { "epoch": 0.5695652173913044, "grad_norm": 1.0920546645748472, "learning_rate": 1.713747155117735e-05, "loss": 0.5070326328277588, "step": 2096 }, { "epoch": 0.5698369565217392, "grad_norm": 1.1225202664689962, "learning_rate": 1.7134323830898036e-05, "loss": 0.5120375156402588, "step": 2097 }, { "epoch": 0.5701086956521739, "grad_norm": 1.2363999983972531, "learning_rate": 1.713117467032948e-05, "loss": 0.5553551912307739, "step": 2098 }, { "epoch": 0.5703804347826087, "grad_norm": 1.2072683657984729, "learning_rate": 1.7128024070107438e-05, "loss": 0.5573428869247437, "step": 2099 }, { "epoch": 0.5706521739130435, "grad_norm": 1.0094645450482491, "learning_rate": 1.712487203086796e-05, "loss": 0.44946879148483276, "step": 2100 }, { "epoch": 0.5709239130434782, "grad_norm": 0.9959613551379844, "learning_rate": 1.7121718553247384e-05, "loss": 0.5138950347900391, "step": 2101 }, { "epoch": 0.571195652173913, "grad_norm": 0.9741764252736647, "learning_rate": 1.711856363788234e-05, "loss": 0.4728882908821106, "step": 2102 }, { "epoch": 0.5714673913043479, "grad_norm": 1.0491225964810726, "learning_rate": 1.7115407285409745e-05, "loss": 0.5460719466209412, "step": 2103 }, { "epoch": 0.5717391304347826, "grad_norm": 1.0189269316314948, "learning_rate": 1.7112249496466816e-05, "loss": 0.5649716854095459, "step": 2104 }, { "epoch": 0.5720108695652174, "grad_norm": 1.2331800385648999, "learning_rate": 1.7109090271691044e-05, "loss": 0.6036542057991028, "step": 2105 }, { "epoch": 0.5722826086956522, "grad_norm": 0.925096271478374, "learning_rate": 1.710592961172022e-05, "loss": 0.44830435514450073, "step": 2106 }, { "epoch": 0.5725543478260869, "grad_norm": 1.1384885994077472, "learning_rate": 1.710276751719243e-05, "loss": 0.638231635093689, "step": 2107 }, { "epoch": 0.5728260869565217, "grad_norm": 1.051091434866395, "learning_rate": 1.7099603988746035e-05, "loss": 0.514388918876648, "step": 2108 }, { "epoch": 0.5730978260869565, "grad_norm": 0.9852212503423371, "learning_rate": 1.7096439027019695e-05, "loss": 0.49213820695877075, "step": 2109 }, { "epoch": 0.5733695652173914, "grad_norm": 0.9759781099411018, "learning_rate": 1.7093272632652362e-05, "loss": 0.5209836959838867, "step": 2110 }, { "epoch": 0.5736413043478261, "grad_norm": 1.083351297070139, "learning_rate": 1.709010480628327e-05, "loss": 0.5032913684844971, "step": 2111 }, { "epoch": 0.5739130434782609, "grad_norm": 0.9809237295049703, "learning_rate": 1.7086935548551947e-05, "loss": 0.45918509364128113, "step": 2112 }, { "epoch": 0.5741847826086957, "grad_norm": 1.0445114435560052, "learning_rate": 1.7083764860098206e-05, "loss": 0.5660051703453064, "step": 2113 }, { "epoch": 0.5744565217391304, "grad_norm": 1.1172045369360402, "learning_rate": 1.708059274156215e-05, "loss": 0.5475469827651978, "step": 2114 }, { "epoch": 0.5747282608695652, "grad_norm": 1.2312251136616807, "learning_rate": 1.7077419193584178e-05, "loss": 0.6041431427001953, "step": 2115 }, { "epoch": 0.575, "grad_norm": 0.805130699919102, "learning_rate": 1.707424421680496e-05, "loss": 0.372456431388855, "step": 2116 }, { "epoch": 0.5752717391304348, "grad_norm": 1.0076282158983418, "learning_rate": 1.7071067811865477e-05, "loss": 0.5434607863426208, "step": 2117 }, { "epoch": 0.5755434782608696, "grad_norm": 0.9709504948076713, "learning_rate": 1.7067889979406978e-05, "loss": 0.477359801530838, "step": 2118 }, { "epoch": 0.5758152173913044, "grad_norm": 0.8722134920578134, "learning_rate": 1.706471072007102e-05, "loss": 0.386131227016449, "step": 2119 }, { "epoch": 0.5760869565217391, "grad_norm": 0.9829500369941746, "learning_rate": 1.7061530034499423e-05, "loss": 0.4489639401435852, "step": 2120 }, { "epoch": 0.5763586956521739, "grad_norm": 0.9215982537521337, "learning_rate": 1.7058347923334316e-05, "loss": 0.4085035026073456, "step": 2121 }, { "epoch": 0.5766304347826087, "grad_norm": 1.1218136900814684, "learning_rate": 1.7055164387218114e-05, "loss": 0.552871584892273, "step": 2122 }, { "epoch": 0.5769021739130434, "grad_norm": 0.9593265076824757, "learning_rate": 1.7051979426793506e-05, "loss": 0.5295161008834839, "step": 2123 }, { "epoch": 0.5771739130434783, "grad_norm": 0.9487744247796848, "learning_rate": 1.7048793042703487e-05, "loss": 0.4286331832408905, "step": 2124 }, { "epoch": 0.5774456521739131, "grad_norm": 0.9369399873345543, "learning_rate": 1.7045605235591317e-05, "loss": 0.4023359417915344, "step": 2125 }, { "epoch": 0.5777173913043478, "grad_norm": 0.9065632159186823, "learning_rate": 1.7042416006100567e-05, "loss": 0.4135787785053253, "step": 2126 }, { "epoch": 0.5779891304347826, "grad_norm": 1.029915191461002, "learning_rate": 1.7039225354875078e-05, "loss": 0.5305976867675781, "step": 2127 }, { "epoch": 0.5782608695652174, "grad_norm": 0.8217421489583348, "learning_rate": 1.7036033282558984e-05, "loss": 0.38919657468795776, "step": 2128 }, { "epoch": 0.5785326086956522, "grad_norm": 1.032821383980858, "learning_rate": 1.7032839789796712e-05, "loss": 0.48043787479400635, "step": 2129 }, { "epoch": 0.5788043478260869, "grad_norm": 1.020834405388549, "learning_rate": 1.702964487723296e-05, "loss": 0.4443703293800354, "step": 2130 }, { "epoch": 0.5790760869565217, "grad_norm": 0.9807608799308449, "learning_rate": 1.702644854551273e-05, "loss": 0.45335185527801514, "step": 2131 }, { "epoch": 0.5793478260869566, "grad_norm": 0.9901341254808197, "learning_rate": 1.7023250795281298e-05, "loss": 0.5070977210998535, "step": 2132 }, { "epoch": 0.5796195652173913, "grad_norm": 1.020267186218815, "learning_rate": 1.7020051627184234e-05, "loss": 0.5072765350341797, "step": 2133 }, { "epoch": 0.5798913043478261, "grad_norm": 1.136016015113836, "learning_rate": 1.7016851041867394e-05, "loss": 0.5093010663986206, "step": 2134 }, { "epoch": 0.5801630434782609, "grad_norm": 0.855965214369529, "learning_rate": 1.701364903997691e-05, "loss": 0.4097943603992462, "step": 2135 }, { "epoch": 0.5804347826086956, "grad_norm": 0.8783946862862846, "learning_rate": 1.7010445622159214e-05, "loss": 0.4015134572982788, "step": 2136 }, { "epoch": 0.5807065217391304, "grad_norm": 1.0153195771368473, "learning_rate": 1.7007240789061014e-05, "loss": 0.5448260307312012, "step": 2137 }, { "epoch": 0.5809782608695652, "grad_norm": 1.024716188796183, "learning_rate": 1.7004034541329312e-05, "loss": 0.495841383934021, "step": 2138 }, { "epoch": 0.58125, "grad_norm": 0.9418342888143777, "learning_rate": 1.7000826879611382e-05, "loss": 0.40060216188430786, "step": 2139 }, { "epoch": 0.5815217391304348, "grad_norm": 0.9819211544868445, "learning_rate": 1.69976178045548e-05, "loss": 0.422211617231369, "step": 2140 }, { "epoch": 0.5817934782608696, "grad_norm": 1.070724980180336, "learning_rate": 1.6994407316807412e-05, "loss": 0.5239693522453308, "step": 2141 }, { "epoch": 0.5820652173913043, "grad_norm": 0.9450998516115475, "learning_rate": 1.6991195417017367e-05, "loss": 0.45663294196128845, "step": 2142 }, { "epoch": 0.5823369565217391, "grad_norm": 0.9736306367219846, "learning_rate": 1.698798210583308e-05, "loss": 0.49818623065948486, "step": 2143 }, { "epoch": 0.5826086956521739, "grad_norm": 0.9618673119518852, "learning_rate": 1.698476738390326e-05, "loss": 0.5129861831665039, "step": 2144 }, { "epoch": 0.5828804347826086, "grad_norm": 1.103153336299408, "learning_rate": 1.6981551251876905e-05, "loss": 0.5038436055183411, "step": 2145 }, { "epoch": 0.5831521739130435, "grad_norm": 0.8925140002132582, "learning_rate": 1.697833371040329e-05, "loss": 0.34588131308555603, "step": 2146 }, { "epoch": 0.5834239130434783, "grad_norm": 0.7995209187937542, "learning_rate": 1.6975114760131983e-05, "loss": 0.3051808774471283, "step": 2147 }, { "epoch": 0.5836956521739131, "grad_norm": 1.105932664485999, "learning_rate": 1.6971894401712822e-05, "loss": 0.5193653106689453, "step": 2148 }, { "epoch": 0.5839673913043478, "grad_norm": 1.0894637639455471, "learning_rate": 1.6968672635795946e-05, "loss": 0.5411765575408936, "step": 2149 }, { "epoch": 0.5842391304347826, "grad_norm": 1.064590856326342, "learning_rate": 1.6965449463031766e-05, "loss": 0.523512601852417, "step": 2150 }, { "epoch": 0.5845108695652174, "grad_norm": 1.014482371575391, "learning_rate": 1.6962224884070985e-05, "loss": 0.4369208514690399, "step": 2151 }, { "epoch": 0.5847826086956521, "grad_norm": 1.0589606528449556, "learning_rate": 1.695899889956458e-05, "loss": 0.5206896662712097, "step": 2152 }, { "epoch": 0.585054347826087, "grad_norm": 0.9006229812955616, "learning_rate": 1.6955771510163826e-05, "loss": 0.3861387372016907, "step": 2153 }, { "epoch": 0.5853260869565218, "grad_norm": 1.1359355719299233, "learning_rate": 1.695254271652027e-05, "loss": 0.5633008480072021, "step": 2154 }, { "epoch": 0.5855978260869565, "grad_norm": 1.0918692256626121, "learning_rate": 1.694931251928575e-05, "loss": 0.5521878600120544, "step": 2155 }, { "epoch": 0.5858695652173913, "grad_norm": 0.8481294107775628, "learning_rate": 1.6946080919112377e-05, "loss": 0.33000448346138, "step": 2156 }, { "epoch": 0.5861413043478261, "grad_norm": 1.1554755013230082, "learning_rate": 1.6942847916652555e-05, "loss": 0.533633828163147, "step": 2157 }, { "epoch": 0.5864130434782608, "grad_norm": 1.0821908936802496, "learning_rate": 1.6939613512558968e-05, "loss": 0.5324103236198425, "step": 2158 }, { "epoch": 0.5866847826086956, "grad_norm": 1.092103515379003, "learning_rate": 1.6936377707484584e-05, "loss": 0.5976450443267822, "step": 2159 }, { "epoch": 0.5869565217391305, "grad_norm": 0.8639307284430289, "learning_rate": 1.6933140502082647e-05, "loss": 0.39180463552474976, "step": 2160 }, { "epoch": 0.5872282608695653, "grad_norm": 0.9693563587612667, "learning_rate": 1.69299018970067e-05, "loss": 0.40760791301727295, "step": 2161 }, { "epoch": 0.5875, "grad_norm": 1.0353783120598776, "learning_rate": 1.692666189291055e-05, "loss": 0.5180569291114807, "step": 2162 }, { "epoch": 0.5877717391304348, "grad_norm": 1.4609370132052082, "learning_rate": 1.6923420490448298e-05, "loss": 0.4279177784919739, "step": 2163 }, { "epoch": 0.5880434782608696, "grad_norm": 0.9503175121423856, "learning_rate": 1.6920177690274323e-05, "loss": 0.41203010082244873, "step": 2164 }, { "epoch": 0.5883152173913043, "grad_norm": 1.1654325430960357, "learning_rate": 1.6916933493043287e-05, "loss": 0.5059161186218262, "step": 2165 }, { "epoch": 0.5885869565217391, "grad_norm": 1.1439969874645963, "learning_rate": 1.691368789941013e-05, "loss": 0.5683927536010742, "step": 2166 }, { "epoch": 0.5888586956521739, "grad_norm": 1.2878767909398, "learning_rate": 1.691044091003008e-05, "loss": 0.5828983187675476, "step": 2167 }, { "epoch": 0.5891304347826087, "grad_norm": 1.1130075475879218, "learning_rate": 1.6907192525558653e-05, "loss": 0.5270891189575195, "step": 2168 }, { "epoch": 0.5894021739130435, "grad_norm": 1.0539544957222484, "learning_rate": 1.6903942746651626e-05, "loss": 0.5290110111236572, "step": 2169 }, { "epoch": 0.5896739130434783, "grad_norm": 0.8052137912554223, "learning_rate": 1.6900691573965074e-05, "loss": 0.3698885440826416, "step": 2170 }, { "epoch": 0.589945652173913, "grad_norm": 1.1211013236554908, "learning_rate": 1.689743900815535e-05, "loss": 0.546419084072113, "step": 2171 }, { "epoch": 0.5902173913043478, "grad_norm": 0.923534381899983, "learning_rate": 1.6894185049879094e-05, "loss": 0.43347328901290894, "step": 2172 }, { "epoch": 0.5904891304347826, "grad_norm": 0.9909273418287051, "learning_rate": 1.689092969979321e-05, "loss": 0.40027040243148804, "step": 2173 }, { "epoch": 0.5907608695652173, "grad_norm": 0.9172034119524318, "learning_rate": 1.68876729585549e-05, "loss": 0.41495198011398315, "step": 2174 }, { "epoch": 0.5910326086956522, "grad_norm": 1.0948971423089005, "learning_rate": 1.6884414826821632e-05, "loss": 0.5323447585105896, "step": 2175 }, { "epoch": 0.591304347826087, "grad_norm": 1.0314878927266835, "learning_rate": 1.6881155305251176e-05, "loss": 0.4878137707710266, "step": 2176 }, { "epoch": 0.5915760869565218, "grad_norm": 1.093731993984405, "learning_rate": 1.687789439450156e-05, "loss": 0.5472662448883057, "step": 2177 }, { "epoch": 0.5918478260869565, "grad_norm": 0.9530885901746702, "learning_rate": 1.68746320952311e-05, "loss": 0.47476446628570557, "step": 2178 }, { "epoch": 0.5921195652173913, "grad_norm": 0.9726875238013099, "learning_rate": 1.687136840809841e-05, "loss": 0.4739103317260742, "step": 2179 }, { "epoch": 0.592391304347826, "grad_norm": 1.0163846932426404, "learning_rate": 1.6868103333762345e-05, "loss": 0.5363413095474243, "step": 2180 }, { "epoch": 0.5926630434782608, "grad_norm": 1.0217998465768985, "learning_rate": 1.6864836872882086e-05, "loss": 0.46827611327171326, "step": 2181 }, { "epoch": 0.5929347826086957, "grad_norm": 0.8935615239473365, "learning_rate": 1.686156902611706e-05, "loss": 0.35811400413513184, "step": 2182 }, { "epoch": 0.5932065217391305, "grad_norm": 1.0619054479401586, "learning_rate": 1.685829979412699e-05, "loss": 0.477660208940506, "step": 2183 }, { "epoch": 0.5934782608695652, "grad_norm": 1.0349653353150994, "learning_rate": 1.6855029177571868e-05, "loss": 0.561913251876831, "step": 2184 }, { "epoch": 0.59375, "grad_norm": 1.0415606920676566, "learning_rate": 1.685175717711198e-05, "loss": 0.4285082519054413, "step": 2185 }, { "epoch": 0.5940217391304348, "grad_norm": 0.923973327070125, "learning_rate": 1.6848483793407874e-05, "loss": 0.3240022659301758, "step": 2186 }, { "epoch": 0.5942934782608695, "grad_norm": 1.0377870865771985, "learning_rate": 1.684520902712039e-05, "loss": 0.5625501871109009, "step": 2187 }, { "epoch": 0.5945652173913043, "grad_norm": 0.9932060165190599, "learning_rate": 1.6841932878910646e-05, "loss": 0.414156049489975, "step": 2188 }, { "epoch": 0.5948369565217392, "grad_norm": 0.961652741721742, "learning_rate": 1.6838655349440033e-05, "loss": 0.44433480501174927, "step": 2189 }, { "epoch": 0.595108695652174, "grad_norm": 1.093545552649565, "learning_rate": 1.6835376439370226e-05, "loss": 0.49920889735221863, "step": 2190 }, { "epoch": 0.5953804347826087, "grad_norm": 0.9288969909889407, "learning_rate": 1.6832096149363174e-05, "loss": 0.39366960525512695, "step": 2191 }, { "epoch": 0.5956521739130435, "grad_norm": 1.0354686431980162, "learning_rate": 1.682881448008111e-05, "loss": 0.4865860342979431, "step": 2192 }, { "epoch": 0.5959239130434782, "grad_norm": 1.0933566123273215, "learning_rate": 1.6825531432186545e-05, "loss": 0.4599331021308899, "step": 2193 }, { "epoch": 0.596195652173913, "grad_norm": 1.0264883599326984, "learning_rate": 1.682224700634226e-05, "loss": 0.47039058804512024, "step": 2194 }, { "epoch": 0.5964673913043478, "grad_norm": 1.0792786369791925, "learning_rate": 1.6818961203211325e-05, "loss": 0.47353023290634155, "step": 2195 }, { "epoch": 0.5967391304347827, "grad_norm": 0.9407895942798026, "learning_rate": 1.681567402345708e-05, "loss": 0.452461838722229, "step": 2196 }, { "epoch": 0.5970108695652174, "grad_norm": 1.0501820858885065, "learning_rate": 1.681238546774315e-05, "loss": 0.5106304287910461, "step": 2197 }, { "epoch": 0.5972826086956522, "grad_norm": 1.1774242032386073, "learning_rate": 1.6809095536733436e-05, "loss": 0.5424141883850098, "step": 2198 }, { "epoch": 0.597554347826087, "grad_norm": 1.113862833763351, "learning_rate": 1.6805804231092108e-05, "loss": 0.5220037698745728, "step": 2199 }, { "epoch": 0.5978260869565217, "grad_norm": 1.1286431312975538, "learning_rate": 1.6802511551483627e-05, "loss": 0.5443233847618103, "step": 2200 }, { "epoch": 0.5980978260869565, "grad_norm": 1.0291831680709977, "learning_rate": 1.679921749857272e-05, "loss": 0.4971659779548645, "step": 2201 }, { "epoch": 0.5983695652173913, "grad_norm": 1.0181446335163713, "learning_rate": 1.6795922073024397e-05, "loss": 0.5212778449058533, "step": 2202 }, { "epoch": 0.5986413043478261, "grad_norm": 0.8347039727132988, "learning_rate": 1.6792625275503945e-05, "loss": 0.34881067276000977, "step": 2203 }, { "epoch": 0.5989130434782609, "grad_norm": 0.8372574714531303, "learning_rate": 1.6789327106676924e-05, "loss": 0.3818083703517914, "step": 2204 }, { "epoch": 0.5991847826086957, "grad_norm": 1.0368967396497941, "learning_rate": 1.678602756720918e-05, "loss": 0.5153015851974487, "step": 2205 }, { "epoch": 0.5994565217391304, "grad_norm": 1.120676310719048, "learning_rate": 1.6782726657766826e-05, "loss": 0.47337645292282104, "step": 2206 }, { "epoch": 0.5997282608695652, "grad_norm": 1.0538477062017055, "learning_rate": 1.6779424379016252e-05, "loss": 0.47556111216545105, "step": 2207 }, { "epoch": 0.6, "grad_norm": 0.8584445205868368, "learning_rate": 1.6776120731624134e-05, "loss": 0.3769823908805847, "step": 2208 }, { "epoch": 0.6002717391304347, "grad_norm": 1.0940637712902022, "learning_rate": 1.6772815716257414e-05, "loss": 0.55284184217453, "step": 2209 }, { "epoch": 0.6005434782608695, "grad_norm": 0.92813945769107, "learning_rate": 1.676950933358331e-05, "loss": 0.41273802518844604, "step": 2210 }, { "epoch": 0.6008152173913044, "grad_norm": 1.1298577079394256, "learning_rate": 1.676620158426933e-05, "loss": 0.502103328704834, "step": 2211 }, { "epoch": 0.6010869565217392, "grad_norm": 0.9881690230605278, "learning_rate": 1.6762892468983237e-05, "loss": 0.43897151947021484, "step": 2212 }, { "epoch": 0.6013586956521739, "grad_norm": 0.9378435587450644, "learning_rate": 1.6759581988393093e-05, "loss": 0.4252811074256897, "step": 2213 }, { "epoch": 0.6016304347826087, "grad_norm": 1.0861103415778066, "learning_rate": 1.6756270143167212e-05, "loss": 0.5196929574012756, "step": 2214 }, { "epoch": 0.6019021739130435, "grad_norm": 0.7498738815468314, "learning_rate": 1.67529569339742e-05, "loss": 0.3285151422023773, "step": 2215 }, { "epoch": 0.6021739130434782, "grad_norm": 1.2291118562572034, "learning_rate": 1.6749642361482936e-05, "loss": 0.5325192809104919, "step": 2216 }, { "epoch": 0.602445652173913, "grad_norm": 1.0136085204059169, "learning_rate": 1.6746326426362566e-05, "loss": 0.4124257564544678, "step": 2217 }, { "epoch": 0.6027173913043479, "grad_norm": 1.2324498434741387, "learning_rate": 1.6743009129282517e-05, "loss": 0.5180618762969971, "step": 2218 }, { "epoch": 0.6029891304347826, "grad_norm": 0.9486890250242046, "learning_rate": 1.6739690470912495e-05, "loss": 0.42694950103759766, "step": 2219 }, { "epoch": 0.6032608695652174, "grad_norm": 1.1290573289558237, "learning_rate": 1.673637045192247e-05, "loss": 0.5556227564811707, "step": 2220 }, { "epoch": 0.6035326086956522, "grad_norm": 1.0121635196725787, "learning_rate": 1.6733049072982698e-05, "loss": 0.4446582794189453, "step": 2221 }, { "epoch": 0.6038043478260869, "grad_norm": 0.9727385183026482, "learning_rate": 1.6729726334763705e-05, "loss": 0.4190531373023987, "step": 2222 }, { "epoch": 0.6040760869565217, "grad_norm": 1.1425316367172904, "learning_rate": 1.6726402237936287e-05, "loss": 0.5664331316947937, "step": 2223 }, { "epoch": 0.6043478260869565, "grad_norm": 0.9723098780933139, "learning_rate": 1.6723076783171522e-05, "loss": 0.45136207342147827, "step": 2224 }, { "epoch": 0.6046195652173914, "grad_norm": 0.9672177268420749, "learning_rate": 1.6719749971140756e-05, "loss": 0.47926265001296997, "step": 2225 }, { "epoch": 0.6048913043478261, "grad_norm": 1.0524438898944724, "learning_rate": 1.671642180251561e-05, "loss": 0.509868323802948, "step": 2226 }, { "epoch": 0.6051630434782609, "grad_norm": 1.0298036088154954, "learning_rate": 1.6713092277967986e-05, "loss": 0.5265815854072571, "step": 2227 }, { "epoch": 0.6054347826086957, "grad_norm": 1.146747646583336, "learning_rate": 1.6709761398170047e-05, "loss": 0.5930702686309814, "step": 2228 }, { "epoch": 0.6057065217391304, "grad_norm": 1.0037286526672544, "learning_rate": 1.670642916379424e-05, "loss": 0.47345906496047974, "step": 2229 }, { "epoch": 0.6059782608695652, "grad_norm": 1.0049048034791432, "learning_rate": 1.6703095575513282e-05, "loss": 0.4879886209964752, "step": 2230 }, { "epoch": 0.60625, "grad_norm": 0.9317518236067578, "learning_rate": 1.6699760634000166e-05, "loss": 0.44124677777290344, "step": 2231 }, { "epoch": 0.6065217391304348, "grad_norm": 1.0074970746018843, "learning_rate": 1.6696424339928153e-05, "loss": 0.5482822060585022, "step": 2232 }, { "epoch": 0.6067934782608696, "grad_norm": 1.0234012093882319, "learning_rate": 1.6693086693970776e-05, "loss": 0.5132659673690796, "step": 2233 }, { "epoch": 0.6070652173913044, "grad_norm": 1.1246249948459028, "learning_rate": 1.668974769680185e-05, "loss": 0.5622271299362183, "step": 2234 }, { "epoch": 0.6073369565217391, "grad_norm": 1.0700526944271709, "learning_rate": 1.6686407349095453e-05, "loss": 0.4985525906085968, "step": 2235 }, { "epoch": 0.6076086956521739, "grad_norm": 1.0091226227970038, "learning_rate": 1.6683065651525946e-05, "loss": 0.5059670209884644, "step": 2236 }, { "epoch": 0.6078804347826087, "grad_norm": 1.1196678862650817, "learning_rate": 1.667972260476795e-05, "loss": 0.5166305303573608, "step": 2237 }, { "epoch": 0.6081521739130434, "grad_norm": 1.1799285829316213, "learning_rate": 1.667637820949637e-05, "loss": 0.6163492798805237, "step": 2238 }, { "epoch": 0.6084239130434783, "grad_norm": 1.0243284605278953, "learning_rate": 1.6673032466386375e-05, "loss": 0.4447896480560303, "step": 2239 }, { "epoch": 0.6086956521739131, "grad_norm": 1.0769788163530742, "learning_rate": 1.666968537611341e-05, "loss": 0.5533590316772461, "step": 2240 }, { "epoch": 0.6089673913043478, "grad_norm": 0.9717641975753181, "learning_rate": 1.666633693935319e-05, "loss": 0.38223278522491455, "step": 2241 }, { "epoch": 0.6092391304347826, "grad_norm": 0.7341588092726753, "learning_rate": 1.6662987156781706e-05, "loss": 0.31900590658187866, "step": 2242 }, { "epoch": 0.6095108695652174, "grad_norm": 1.1743071931045392, "learning_rate": 1.6659636029075213e-05, "loss": 0.5279752016067505, "step": 2243 }, { "epoch": 0.6097826086956522, "grad_norm": 0.9784309260335852, "learning_rate": 1.665628355691025e-05, "loss": 0.4502393305301666, "step": 2244 }, { "epoch": 0.6100543478260869, "grad_norm": 1.0578121689792792, "learning_rate": 1.6652929740963616e-05, "loss": 0.46279311180114746, "step": 2245 }, { "epoch": 0.6103260869565217, "grad_norm": 1.1904980806853946, "learning_rate": 1.6649574581912376e-05, "loss": 0.5579099655151367, "step": 2246 }, { "epoch": 0.6105978260869566, "grad_norm": 0.9497592681767453, "learning_rate": 1.664621808043389e-05, "loss": 0.4837680459022522, "step": 2247 }, { "epoch": 0.6108695652173913, "grad_norm": 1.0975862152699143, "learning_rate": 1.6642860237205763e-05, "loss": 0.5028617978096008, "step": 2248 }, { "epoch": 0.6111413043478261, "grad_norm": 0.9925447317521479, "learning_rate": 1.663950105290589e-05, "loss": 0.45551568269729614, "step": 2249 }, { "epoch": 0.6114130434782609, "grad_norm": 1.0134014651610914, "learning_rate": 1.6636140528212427e-05, "loss": 0.4410894513130188, "step": 2250 }, { "epoch": 0.6116847826086956, "grad_norm": 1.0709773565365364, "learning_rate": 1.6632778663803794e-05, "loss": 0.5555447936058044, "step": 2251 }, { "epoch": 0.6119565217391304, "grad_norm": 0.8027798645031373, "learning_rate": 1.6629415460358698e-05, "loss": 0.35909608006477356, "step": 2252 }, { "epoch": 0.6122282608695652, "grad_norm": 0.8685532550519032, "learning_rate": 1.6626050918556108e-05, "loss": 0.40398719906806946, "step": 2253 }, { "epoch": 0.6125, "grad_norm": 0.9652573047536626, "learning_rate": 1.662268503907526e-05, "loss": 0.4257481098175049, "step": 2254 }, { "epoch": 0.6127717391304348, "grad_norm": 1.0369475639825023, "learning_rate": 1.6619317822595666e-05, "loss": 0.4678877592086792, "step": 2255 }, { "epoch": 0.6130434782608696, "grad_norm": 1.0503732149982523, "learning_rate": 1.6615949269797106e-05, "loss": 0.5036877989768982, "step": 2256 }, { "epoch": 0.6133152173913043, "grad_norm": 0.9050812878465765, "learning_rate": 1.6612579381359624e-05, "loss": 0.3928300142288208, "step": 2257 }, { "epoch": 0.6135869565217391, "grad_norm": 1.0826138580874767, "learning_rate": 1.6609208157963546e-05, "loss": 0.5381064414978027, "step": 2258 }, { "epoch": 0.6138586956521739, "grad_norm": 1.1484834294630362, "learning_rate": 1.6605835600289456e-05, "loss": 0.5697644948959351, "step": 2259 }, { "epoch": 0.6141304347826086, "grad_norm": 0.9641482941048856, "learning_rate": 1.6602461709018208e-05, "loss": 0.41980499029159546, "step": 2260 }, { "epoch": 0.6144021739130435, "grad_norm": 1.321317823868094, "learning_rate": 1.6599086484830936e-05, "loss": 0.5801324844360352, "step": 2261 }, { "epoch": 0.6146739130434783, "grad_norm": 0.9893181978867444, "learning_rate": 1.659570992840903e-05, "loss": 0.4679874777793884, "step": 2262 }, { "epoch": 0.6149456521739131, "grad_norm": 0.9740610867286285, "learning_rate": 1.659233204043416e-05, "loss": 0.4379512071609497, "step": 2263 }, { "epoch": 0.6152173913043478, "grad_norm": 0.9126141734442941, "learning_rate": 1.6588952821588256e-05, "loss": 0.43913769721984863, "step": 2264 }, { "epoch": 0.6154891304347826, "grad_norm": 1.0147782403904955, "learning_rate": 1.658557227255352e-05, "loss": 0.5026188492774963, "step": 2265 }, { "epoch": 0.6157608695652174, "grad_norm": 1.1035198888116537, "learning_rate": 1.6582190394012427e-05, "loss": 0.5468815565109253, "step": 2266 }, { "epoch": 0.6160326086956521, "grad_norm": 0.9296876734265028, "learning_rate": 1.6578807186647715e-05, "loss": 0.45822131633758545, "step": 2267 }, { "epoch": 0.616304347826087, "grad_norm": 0.9079935794539796, "learning_rate": 1.657542265114239e-05, "loss": 0.4045273959636688, "step": 2268 }, { "epoch": 0.6165760869565218, "grad_norm": 1.0444645424826882, "learning_rate": 1.6572036788179728e-05, "loss": 0.48081934452056885, "step": 2269 }, { "epoch": 0.6168478260869565, "grad_norm": 0.8803406282797237, "learning_rate": 1.6568649598443268e-05, "loss": 0.3775184452533722, "step": 2270 }, { "epoch": 0.6171195652173913, "grad_norm": 1.0873757630309255, "learning_rate": 1.656526108261683e-05, "loss": 0.5057700872421265, "step": 2271 }, { "epoch": 0.6173913043478261, "grad_norm": 1.161251005121236, "learning_rate": 1.656187124138449e-05, "loss": 0.6141793727874756, "step": 2272 }, { "epoch": 0.6176630434782608, "grad_norm": 1.0557689762417712, "learning_rate": 1.6558480075430594e-05, "loss": 0.5169159770011902, "step": 2273 }, { "epoch": 0.6179347826086956, "grad_norm": 0.9909385633372497, "learning_rate": 1.655508758543976e-05, "loss": 0.41386276483535767, "step": 2274 }, { "epoch": 0.6182065217391305, "grad_norm": 1.0710402606169522, "learning_rate": 1.655169377209686e-05, "loss": 0.5248281955718994, "step": 2275 }, { "epoch": 0.6184782608695653, "grad_norm": 1.064421102445594, "learning_rate": 1.6548298636087056e-05, "loss": 0.4924268424510956, "step": 2276 }, { "epoch": 0.61875, "grad_norm": 1.1119140127706575, "learning_rate": 1.654490217809575e-05, "loss": 0.49409282207489014, "step": 2277 }, { "epoch": 0.6190217391304348, "grad_norm": 1.0591252380713179, "learning_rate": 1.6541504398808633e-05, "loss": 0.6119670867919922, "step": 2278 }, { "epoch": 0.6192934782608696, "grad_norm": 1.039070020081279, "learning_rate": 1.6538105298911653e-05, "loss": 0.4931381344795227, "step": 2279 }, { "epoch": 0.6195652173913043, "grad_norm": 1.6197953047282938, "learning_rate": 1.6534704879091027e-05, "loss": 0.464574933052063, "step": 2280 }, { "epoch": 0.6198369565217391, "grad_norm": 1.0462635807149958, "learning_rate": 1.6531303140033235e-05, "loss": 0.48762384057044983, "step": 2281 }, { "epoch": 0.6201086956521739, "grad_norm": 0.8394234650591523, "learning_rate": 1.6527900082425025e-05, "loss": 0.37979429960250854, "step": 2282 }, { "epoch": 0.6203804347826087, "grad_norm": 1.0804223371716455, "learning_rate": 1.6524495706953417e-05, "loss": 0.5380542874336243, "step": 2283 }, { "epoch": 0.6206521739130435, "grad_norm": 0.9671205867116803, "learning_rate": 1.6521090014305685e-05, "loss": 0.437722384929657, "step": 2284 }, { "epoch": 0.6209239130434783, "grad_norm": 1.186335146274832, "learning_rate": 1.6517683005169384e-05, "loss": 0.6002308130264282, "step": 2285 }, { "epoch": 0.621195652173913, "grad_norm": 0.9403502295799341, "learning_rate": 1.651427468023232e-05, "loss": 0.42998379468917847, "step": 2286 }, { "epoch": 0.6214673913043478, "grad_norm": 1.1053729140964461, "learning_rate": 1.6510865040182575e-05, "loss": 0.5250385999679565, "step": 2287 }, { "epoch": 0.6217391304347826, "grad_norm": 1.2677363243203317, "learning_rate": 1.650745408570849e-05, "loss": 0.4124045968055725, "step": 2288 }, { "epoch": 0.6220108695652173, "grad_norm": 1.2260282419343131, "learning_rate": 1.6504041817498676e-05, "loss": 0.6013075113296509, "step": 2289 }, { "epoch": 0.6222826086956522, "grad_norm": 1.0844473484454404, "learning_rate": 1.650062823624201e-05, "loss": 0.5682260990142822, "step": 2290 }, { "epoch": 0.622554347826087, "grad_norm": 0.9194839050527158, "learning_rate": 1.649721334262763e-05, "loss": 0.42582178115844727, "step": 2291 }, { "epoch": 0.6228260869565218, "grad_norm": 1.1680227364456996, "learning_rate": 1.6493797137344935e-05, "loss": 0.6341871619224548, "step": 2292 }, { "epoch": 0.6230978260869565, "grad_norm": 1.2570712809875935, "learning_rate": 1.64903796210836e-05, "loss": 0.47891202569007874, "step": 2293 }, { "epoch": 0.6233695652173913, "grad_norm": 1.01615184334225, "learning_rate": 1.6486960794533558e-05, "loss": 0.42129427194595337, "step": 2294 }, { "epoch": 0.623641304347826, "grad_norm": 0.8938578034627408, "learning_rate": 1.6483540658385003e-05, "loss": 0.4227486252784729, "step": 2295 }, { "epoch": 0.6239130434782608, "grad_norm": 1.0484217568491563, "learning_rate": 1.6480119213328406e-05, "loss": 0.4947534501552582, "step": 2296 }, { "epoch": 0.6241847826086957, "grad_norm": 0.8665907588040574, "learning_rate": 1.647669646005449e-05, "loss": 0.3953205943107605, "step": 2297 }, { "epoch": 0.6244565217391305, "grad_norm": 1.025798886193572, "learning_rate": 1.6473272399254245e-05, "loss": 0.6018059849739075, "step": 2298 }, { "epoch": 0.6247282608695652, "grad_norm": 0.9756191507409406, "learning_rate": 1.6469847031618925e-05, "loss": 0.4635924696922302, "step": 2299 }, { "epoch": 0.625, "grad_norm": 1.077074256954098, "learning_rate": 1.6466420357840053e-05, "loss": 0.49924081563949585, "step": 2300 }, { "epoch": 0.6252717391304348, "grad_norm": 0.8779829173107687, "learning_rate": 1.646299237860941e-05, "loss": 0.4380606412887573, "step": 2301 }, { "epoch": 0.6255434782608695, "grad_norm": 1.1722701814783512, "learning_rate": 1.645956309461904e-05, "loss": 0.5317721366882324, "step": 2302 }, { "epoch": 0.6258152173913043, "grad_norm": 0.9659899883424584, "learning_rate": 1.6456132506561258e-05, "loss": 0.49180424213409424, "step": 2303 }, { "epoch": 0.6260869565217392, "grad_norm": 1.1639958273291546, "learning_rate": 1.645270061512863e-05, "loss": 0.5939716100692749, "step": 2304 }, { "epoch": 0.626358695652174, "grad_norm": 0.8596083642790604, "learning_rate": 1.6449267421013994e-05, "loss": 0.3931213617324829, "step": 2305 }, { "epoch": 0.6266304347826087, "grad_norm": 0.9746628842199557, "learning_rate": 1.6445832924910453e-05, "loss": 0.4792634844779968, "step": 2306 }, { "epoch": 0.6269021739130435, "grad_norm": 0.9690344243602457, "learning_rate": 1.6442397127511366e-05, "loss": 0.5191505551338196, "step": 2307 }, { "epoch": 0.6271739130434782, "grad_norm": 0.8541396712276171, "learning_rate": 1.6438960029510357e-05, "loss": 0.41003501415252686, "step": 2308 }, { "epoch": 0.627445652173913, "grad_norm": 1.1801466671074565, "learning_rate": 1.643552163160131e-05, "loss": 0.5447651147842407, "step": 2309 }, { "epoch": 0.6277173913043478, "grad_norm": 0.8587743235622044, "learning_rate": 1.6432081934478382e-05, "loss": 0.5088872909545898, "step": 2310 }, { "epoch": 0.6279891304347827, "grad_norm": 1.0189169358143686, "learning_rate": 1.6428640938835984e-05, "loss": 0.5481293797492981, "step": 2311 }, { "epoch": 0.6282608695652174, "grad_norm": 1.0408779440908469, "learning_rate": 1.6425198645368783e-05, "loss": 0.5869156122207642, "step": 2312 }, { "epoch": 0.6285326086956522, "grad_norm": 1.1564160694430998, "learning_rate": 1.6421755054771718e-05, "loss": 0.5446702241897583, "step": 2313 }, { "epoch": 0.628804347826087, "grad_norm": 1.0917825291755223, "learning_rate": 1.6418310167739987e-05, "loss": 0.5539510250091553, "step": 2314 }, { "epoch": 0.6290760869565217, "grad_norm": 1.1035598710125225, "learning_rate": 1.6414863984969054e-05, "loss": 0.5190497636795044, "step": 2315 }, { "epoch": 0.6293478260869565, "grad_norm": 1.286947545954564, "learning_rate": 1.6411416507154635e-05, "loss": 0.6107396483421326, "step": 2316 }, { "epoch": 0.6296195652173913, "grad_norm": 1.1394494615514872, "learning_rate": 1.6407967734992714e-05, "loss": 0.5481127500534058, "step": 2317 }, { "epoch": 0.6298913043478261, "grad_norm": 0.9896527839937449, "learning_rate": 1.6404517669179534e-05, "loss": 0.492096483707428, "step": 2318 }, { "epoch": 0.6301630434782609, "grad_norm": 1.0987214953855358, "learning_rate": 1.6401066310411603e-05, "loss": 0.4612850546836853, "step": 2319 }, { "epoch": 0.6304347826086957, "grad_norm": 1.1894479208085467, "learning_rate": 1.6397613659385685e-05, "loss": 0.5804173946380615, "step": 2320 }, { "epoch": 0.6307065217391304, "grad_norm": 1.0022889157473922, "learning_rate": 1.6394159716798807e-05, "loss": 0.47190576791763306, "step": 2321 }, { "epoch": 0.6309782608695652, "grad_norm": 1.0086543715736906, "learning_rate": 1.6390704483348255e-05, "loss": 0.5211383700370789, "step": 2322 }, { "epoch": 0.63125, "grad_norm": 0.8917843735964642, "learning_rate": 1.6387247959731582e-05, "loss": 0.44511938095092773, "step": 2323 }, { "epoch": 0.6315217391304347, "grad_norm": 0.9948406099798611, "learning_rate": 1.638379014664659e-05, "loss": 0.43207037448883057, "step": 2324 }, { "epoch": 0.6317934782608695, "grad_norm": 1.093385803418641, "learning_rate": 1.6380331044791354e-05, "loss": 0.5329213738441467, "step": 2325 }, { "epoch": 0.6320652173913044, "grad_norm": 1.0949937998050077, "learning_rate": 1.63768706548642e-05, "loss": 0.5592366456985474, "step": 2326 }, { "epoch": 0.6323369565217392, "grad_norm": 1.1057316527819068, "learning_rate": 1.6373408977563726e-05, "loss": 0.5346699953079224, "step": 2327 }, { "epoch": 0.6326086956521739, "grad_norm": 1.100106585485227, "learning_rate": 1.6369946013588766e-05, "loss": 0.46941912174224854, "step": 2328 }, { "epoch": 0.6328804347826087, "grad_norm": 0.9413996100662158, "learning_rate": 1.636648176363844e-05, "loss": 0.4654950499534607, "step": 2329 }, { "epoch": 0.6331521739130435, "grad_norm": 1.0617757037208184, "learning_rate": 1.6363016228412114e-05, "loss": 0.4548371732234955, "step": 2330 }, { "epoch": 0.6334239130434782, "grad_norm": 1.0040016393429054, "learning_rate": 1.6359549408609407e-05, "loss": 0.42728450894355774, "step": 2331 }, { "epoch": 0.633695652173913, "grad_norm": 1.1072590201152732, "learning_rate": 1.6356081304930223e-05, "loss": 0.46922606229782104, "step": 2332 }, { "epoch": 0.6339673913043479, "grad_norm": 1.1213425804629074, "learning_rate": 1.63526119180747e-05, "loss": 0.41076040267944336, "step": 2333 }, { "epoch": 0.6342391304347826, "grad_norm": 0.8219984397653197, "learning_rate": 1.634914124874324e-05, "loss": 0.35896164178848267, "step": 2334 }, { "epoch": 0.6345108695652174, "grad_norm": 1.0873846461573018, "learning_rate": 1.6345669297636516e-05, "loss": 0.47660931944847107, "step": 2335 }, { "epoch": 0.6347826086956522, "grad_norm": 0.9855965292029801, "learning_rate": 1.6342196065455447e-05, "loss": 0.4228582978248596, "step": 2336 }, { "epoch": 0.6350543478260869, "grad_norm": 1.024526747989759, "learning_rate": 1.633872155290121e-05, "loss": 0.4088207483291626, "step": 2337 }, { "epoch": 0.6353260869565217, "grad_norm": 1.067915611241238, "learning_rate": 1.6335245760675257e-05, "loss": 0.49746131896972656, "step": 2338 }, { "epoch": 0.6355978260869565, "grad_norm": 0.8932401444180281, "learning_rate": 1.6331768689479274e-05, "loss": 0.44141870737075806, "step": 2339 }, { "epoch": 0.6358695652173914, "grad_norm": 1.2705980794844924, "learning_rate": 1.6328290340015227e-05, "loss": 0.6725325584411621, "step": 2340 }, { "epoch": 0.6361413043478261, "grad_norm": 1.1523029772556033, "learning_rate": 1.6324810712985325e-05, "loss": 0.5277920365333557, "step": 2341 }, { "epoch": 0.6364130434782609, "grad_norm": 1.0673033441334643, "learning_rate": 1.6321329809092046e-05, "loss": 0.47553879022598267, "step": 2342 }, { "epoch": 0.6366847826086957, "grad_norm": 1.004248576078848, "learning_rate": 1.631784762903812e-05, "loss": 0.49389445781707764, "step": 2343 }, { "epoch": 0.6369565217391304, "grad_norm": 1.0004558660881098, "learning_rate": 1.631436417352653e-05, "loss": 0.4946083128452301, "step": 2344 }, { "epoch": 0.6372282608695652, "grad_norm": 1.0531872750342293, "learning_rate": 1.631087944326053e-05, "loss": 0.5461223125457764, "step": 2345 }, { "epoch": 0.6375, "grad_norm": 0.8996973717198429, "learning_rate": 1.6307393438943617e-05, "loss": 0.41973668336868286, "step": 2346 }, { "epoch": 0.6377717391304348, "grad_norm": 0.989559474080135, "learning_rate": 1.6303906161279554e-05, "loss": 0.4581335484981537, "step": 2347 }, { "epoch": 0.6380434782608696, "grad_norm": 0.9537120921533047, "learning_rate": 1.6300417610972353e-05, "loss": 0.44148433208465576, "step": 2348 }, { "epoch": 0.6383152173913044, "grad_norm": 0.9884873871460622, "learning_rate": 1.62969277887263e-05, "loss": 0.4330853223800659, "step": 2349 }, { "epoch": 0.6385869565217391, "grad_norm": 1.1246183623653787, "learning_rate": 1.6293436695245914e-05, "loss": 0.5200570225715637, "step": 2350 }, { "epoch": 0.6388586956521739, "grad_norm": 1.1435666097183494, "learning_rate": 1.6289944331235993e-05, "loss": 0.557449460029602, "step": 2351 }, { "epoch": 0.6391304347826087, "grad_norm": 1.1004886960343576, "learning_rate": 1.6286450697401576e-05, "loss": 0.5282344222068787, "step": 2352 }, { "epoch": 0.6394021739130434, "grad_norm": 1.161904381098301, "learning_rate": 1.628295579444796e-05, "loss": 0.5388159155845642, "step": 2353 }, { "epoch": 0.6396739130434783, "grad_norm": 0.8993547926772011, "learning_rate": 1.6279459623080706e-05, "loss": 0.3979865312576294, "step": 2354 }, { "epoch": 0.6399456521739131, "grad_norm": 1.0186027091859564, "learning_rate": 1.627596218400563e-05, "loss": 0.5203224420547485, "step": 2355 }, { "epoch": 0.6402173913043478, "grad_norm": 0.8268669486742329, "learning_rate": 1.62724634779288e-05, "loss": 0.39117810130119324, "step": 2356 }, { "epoch": 0.6404891304347826, "grad_norm": 0.9978401685287756, "learning_rate": 1.6268963505556532e-05, "loss": 0.5075061321258545, "step": 2357 }, { "epoch": 0.6407608695652174, "grad_norm": 1.008769671542796, "learning_rate": 1.6265462267595416e-05, "loss": 0.5141727328300476, "step": 2358 }, { "epoch": 0.6410326086956522, "grad_norm": 0.8746709219220967, "learning_rate": 1.626195976475228e-05, "loss": 0.3756948411464691, "step": 2359 }, { "epoch": 0.6413043478260869, "grad_norm": 1.0312724873018946, "learning_rate": 1.6258455997734227e-05, "loss": 0.4997377395629883, "step": 2360 }, { "epoch": 0.6415760869565217, "grad_norm": 0.8858045406288222, "learning_rate": 1.6254950967248592e-05, "loss": 0.40035486221313477, "step": 2361 }, { "epoch": 0.6418478260869566, "grad_norm": 1.0867724827010308, "learning_rate": 1.625144467400298e-05, "loss": 0.4967573583126068, "step": 2362 }, { "epoch": 0.6421195652173913, "grad_norm": 1.0307540431254985, "learning_rate": 1.624793711870525e-05, "loss": 0.46039754152297974, "step": 2363 }, { "epoch": 0.6423913043478261, "grad_norm": 1.0726968219299802, "learning_rate": 1.6244428302063506e-05, "loss": 0.4605698585510254, "step": 2364 }, { "epoch": 0.6426630434782609, "grad_norm": 1.0948423811735153, "learning_rate": 1.6240918224786123e-05, "loss": 0.47611379623413086, "step": 2365 }, { "epoch": 0.6429347826086956, "grad_norm": 1.0560947175603195, "learning_rate": 1.6237406887581717e-05, "loss": 0.48862424492836, "step": 2366 }, { "epoch": 0.6432065217391304, "grad_norm": 1.102852096518143, "learning_rate": 1.623389429115916e-05, "loss": 0.5670334696769714, "step": 2367 }, { "epoch": 0.6434782608695652, "grad_norm": 0.9861157182576328, "learning_rate": 1.623038043622758e-05, "loss": 0.3875936269760132, "step": 2368 }, { "epoch": 0.64375, "grad_norm": 1.0262297190626746, "learning_rate": 1.6226865323496373e-05, "loss": 0.4565664529800415, "step": 2369 }, { "epoch": 0.6440217391304348, "grad_norm": 0.8358137493765813, "learning_rate": 1.6223348953675163e-05, "loss": 0.33141881227493286, "step": 2370 }, { "epoch": 0.6442934782608696, "grad_norm": 0.9580640581870113, "learning_rate": 1.621983132747384e-05, "loss": 0.37968069314956665, "step": 2371 }, { "epoch": 0.6445652173913043, "grad_norm": 1.1005121147818242, "learning_rate": 1.621631244560255e-05, "loss": 0.5426890850067139, "step": 2372 }, { "epoch": 0.6448369565217391, "grad_norm": 1.0369072462146933, "learning_rate": 1.62127923087717e-05, "loss": 0.48865699768066406, "step": 2373 }, { "epoch": 0.6451086956521739, "grad_norm": 1.0970028788554762, "learning_rate": 1.620927091769193e-05, "loss": 0.49026811122894287, "step": 2374 }, { "epoch": 0.6453804347826086, "grad_norm": 1.1386469803117552, "learning_rate": 1.6205748273074147e-05, "loss": 0.514380931854248, "step": 2375 }, { "epoch": 0.6456521739130435, "grad_norm": 0.7893315490249128, "learning_rate": 1.620222437562951e-05, "loss": 0.32896947860717773, "step": 2376 }, { "epoch": 0.6459239130434783, "grad_norm": 0.8939797612530197, "learning_rate": 1.619869922606943e-05, "loss": 0.441630482673645, "step": 2377 }, { "epoch": 0.6461956521739131, "grad_norm": 1.1650621389747087, "learning_rate": 1.619517282510557e-05, "loss": 0.5833041667938232, "step": 2378 }, { "epoch": 0.6464673913043478, "grad_norm": 1.0476762300340647, "learning_rate": 1.6191645173449843e-05, "loss": 0.5136332511901855, "step": 2379 }, { "epoch": 0.6467391304347826, "grad_norm": 1.0412231197027173, "learning_rate": 1.618811627181442e-05, "loss": 0.5084941983222961, "step": 2380 }, { "epoch": 0.6470108695652174, "grad_norm": 1.0363912985807677, "learning_rate": 1.618458612091172e-05, "loss": 0.498953640460968, "step": 2381 }, { "epoch": 0.6472826086956521, "grad_norm": 1.029664964562985, "learning_rate": 1.6181054721454418e-05, "loss": 0.44423115253448486, "step": 2382 }, { "epoch": 0.647554347826087, "grad_norm": 1.0546992139612519, "learning_rate": 1.6177522074155436e-05, "loss": 0.5469783544540405, "step": 2383 }, { "epoch": 0.6478260869565218, "grad_norm": 1.092709629382852, "learning_rate": 1.6173988179727952e-05, "loss": 0.4551374614238739, "step": 2384 }, { "epoch": 0.6480978260869565, "grad_norm": 1.0893538096154478, "learning_rate": 1.6170453038885394e-05, "loss": 0.5114643573760986, "step": 2385 }, { "epoch": 0.6483695652173913, "grad_norm": 1.053298554760582, "learning_rate": 1.616691665234144e-05, "loss": 0.4701641798019409, "step": 2386 }, { "epoch": 0.6486413043478261, "grad_norm": 0.9483508367928889, "learning_rate": 1.6163379020810032e-05, "loss": 0.41681408882141113, "step": 2387 }, { "epoch": 0.6489130434782608, "grad_norm": 0.9452224206560039, "learning_rate": 1.615984014500534e-05, "loss": 0.4315577447414398, "step": 2388 }, { "epoch": 0.6491847826086956, "grad_norm": 0.9029464367776571, "learning_rate": 1.6156300025641805e-05, "loss": 0.40452390909194946, "step": 2389 }, { "epoch": 0.6494565217391305, "grad_norm": 1.0627621329897972, "learning_rate": 1.6152758663434108e-05, "loss": 0.5632803440093994, "step": 2390 }, { "epoch": 0.6497282608695653, "grad_norm": 1.0717281555403286, "learning_rate": 1.614921605909719e-05, "loss": 0.48812729120254517, "step": 2391 }, { "epoch": 0.65, "grad_norm": 1.2326826083207598, "learning_rate": 1.6145672213346234e-05, "loss": 0.595186710357666, "step": 2392 }, { "epoch": 0.6502717391304348, "grad_norm": 0.9822241227946201, "learning_rate": 1.6142127126896682e-05, "loss": 0.5196582078933716, "step": 2393 }, { "epoch": 0.6505434782608696, "grad_norm": 1.061535090597358, "learning_rate": 1.6138580800464218e-05, "loss": 0.5011014342308044, "step": 2394 }, { "epoch": 0.6508152173913043, "grad_norm": 1.1605157992881758, "learning_rate": 1.6135033234764777e-05, "loss": 0.5475097298622131, "step": 2395 }, { "epoch": 0.6510869565217391, "grad_norm": 1.1078119424145672, "learning_rate": 1.6131484430514557e-05, "loss": 0.546087384223938, "step": 2396 }, { "epoch": 0.6513586956521739, "grad_norm": 0.9462955701453519, "learning_rate": 1.612793438842999e-05, "loss": 0.3901281952857971, "step": 2397 }, { "epoch": 0.6516304347826087, "grad_norm": 1.1681606454836815, "learning_rate": 1.6124383109227766e-05, "loss": 0.5218679904937744, "step": 2398 }, { "epoch": 0.6519021739130435, "grad_norm": 0.885276648290595, "learning_rate": 1.612083059362482e-05, "loss": 0.36344921588897705, "step": 2399 }, { "epoch": 0.6521739130434783, "grad_norm": 1.0774939806670498, "learning_rate": 1.611727684233835e-05, "loss": 0.505590558052063, "step": 2400 }, { "epoch": 0.652445652173913, "grad_norm": 1.1503695514251557, "learning_rate": 1.6113721856085783e-05, "loss": 0.5358390808105469, "step": 2401 }, { "epoch": 0.6527173913043478, "grad_norm": 1.040050853626789, "learning_rate": 1.6110165635584807e-05, "loss": 0.4456617832183838, "step": 2402 }, { "epoch": 0.6529891304347826, "grad_norm": 1.0739988018547744, "learning_rate": 1.610660818155336e-05, "loss": 0.508898913860321, "step": 2403 }, { "epoch": 0.6532608695652173, "grad_norm": 0.9533793856867826, "learning_rate": 1.610304949470963e-05, "loss": 0.45024555921554565, "step": 2404 }, { "epoch": 0.6535326086956522, "grad_norm": 1.0529826251646406, "learning_rate": 1.6099489575772046e-05, "loss": 0.48599305748939514, "step": 2405 }, { "epoch": 0.653804347826087, "grad_norm": 1.0670067796198843, "learning_rate": 1.609592842545929e-05, "loss": 0.44096243381500244, "step": 2406 }, { "epoch": 0.6540760869565218, "grad_norm": 0.9581467379883741, "learning_rate": 1.6092366044490293e-05, "loss": 0.437100887298584, "step": 2407 }, { "epoch": 0.6543478260869565, "grad_norm": 1.1019783696561136, "learning_rate": 1.6088802433584236e-05, "loss": 0.49608081579208374, "step": 2408 }, { "epoch": 0.6546195652173913, "grad_norm": 1.0561313829161536, "learning_rate": 1.6085237593460544e-05, "loss": 0.4475274682044983, "step": 2409 }, { "epoch": 0.654891304347826, "grad_norm": 0.9658304998521077, "learning_rate": 1.60816715248389e-05, "loss": 0.431684672832489, "step": 2410 }, { "epoch": 0.6551630434782608, "grad_norm": 1.1087883072707485, "learning_rate": 1.6078104228439225e-05, "loss": 0.5258973836898804, "step": 2411 }, { "epoch": 0.6554347826086957, "grad_norm": 1.0350582302429332, "learning_rate": 1.6074535704981686e-05, "loss": 0.4453100562095642, "step": 2412 }, { "epoch": 0.6557065217391305, "grad_norm": 0.958489236409274, "learning_rate": 1.6070965955186705e-05, "loss": 0.4557369351387024, "step": 2413 }, { "epoch": 0.6559782608695652, "grad_norm": 1.090104512555428, "learning_rate": 1.606739497977495e-05, "loss": 0.6249349117279053, "step": 2414 }, { "epoch": 0.65625, "grad_norm": 0.8316960131859389, "learning_rate": 1.606382277946734e-05, "loss": 0.37498098611831665, "step": 2415 }, { "epoch": 0.6565217391304348, "grad_norm": 0.9114381862388942, "learning_rate": 1.6060249354985023e-05, "loss": 0.46681109070777893, "step": 2416 }, { "epoch": 0.6567934782608695, "grad_norm": 1.015375723129676, "learning_rate": 1.6056674707049423e-05, "loss": 0.4738791584968567, "step": 2417 }, { "epoch": 0.6570652173913043, "grad_norm": 1.1038686131492588, "learning_rate": 1.6053098836382187e-05, "loss": 0.4877675175666809, "step": 2418 }, { "epoch": 0.6573369565217392, "grad_norm": 0.9892675732622427, "learning_rate": 1.6049521743705224e-05, "loss": 0.4762152135372162, "step": 2419 }, { "epoch": 0.657608695652174, "grad_norm": 0.8928288152646889, "learning_rate": 1.6045943429740674e-05, "loss": 0.4126914441585541, "step": 2420 }, { "epoch": 0.6578804347826087, "grad_norm": 0.9391797961002089, "learning_rate": 1.6042363895210948e-05, "loss": 0.42222559452056885, "step": 2421 }, { "epoch": 0.6581521739130435, "grad_norm": 0.9485737226950132, "learning_rate": 1.6038783140838675e-05, "loss": 0.4008307456970215, "step": 2422 }, { "epoch": 0.6584239130434782, "grad_norm": 1.2445883909954878, "learning_rate": 1.6035201167346746e-05, "loss": 0.6401928663253784, "step": 2423 }, { "epoch": 0.658695652173913, "grad_norm": 1.0112433166023018, "learning_rate": 1.60316179754583e-05, "loss": 0.4392552971839905, "step": 2424 }, { "epoch": 0.6589673913043478, "grad_norm": 0.9798474248689942, "learning_rate": 1.6028033565896715e-05, "loss": 0.4681716561317444, "step": 2425 }, { "epoch": 0.6592391304347827, "grad_norm": 1.001874118588784, "learning_rate": 1.6024447939385617e-05, "loss": 0.4629269242286682, "step": 2426 }, { "epoch": 0.6595108695652174, "grad_norm": 1.1673924915563978, "learning_rate": 1.6020861096648883e-05, "loss": 0.5630910992622375, "step": 2427 }, { "epoch": 0.6597826086956522, "grad_norm": 1.1042650705349097, "learning_rate": 1.6017273038410626e-05, "loss": 0.42451825737953186, "step": 2428 }, { "epoch": 0.660054347826087, "grad_norm": 1.2736676060190784, "learning_rate": 1.601368376539521e-05, "loss": 0.5964167714118958, "step": 2429 }, { "epoch": 0.6603260869565217, "grad_norm": 1.18578664123428, "learning_rate": 1.6010093278327246e-05, "loss": 0.5393899083137512, "step": 2430 }, { "epoch": 0.6605978260869565, "grad_norm": 1.0250561523323132, "learning_rate": 1.6006501577931582e-05, "loss": 0.43474793434143066, "step": 2431 }, { "epoch": 0.6608695652173913, "grad_norm": 0.9873697078388902, "learning_rate": 1.6002908664933323e-05, "loss": 0.44640231132507324, "step": 2432 }, { "epoch": 0.6611413043478261, "grad_norm": 1.091448919669691, "learning_rate": 1.599931454005781e-05, "loss": 0.455901563167572, "step": 2433 }, { "epoch": 0.6614130434782609, "grad_norm": 1.0076236356905508, "learning_rate": 1.599571920403063e-05, "loss": 0.46584367752075195, "step": 2434 }, { "epoch": 0.6616847826086957, "grad_norm": 0.9528585873240267, "learning_rate": 1.5992122657577612e-05, "loss": 0.4488168954849243, "step": 2435 }, { "epoch": 0.6619565217391304, "grad_norm": 0.877149979298835, "learning_rate": 1.598852490142484e-05, "loss": 0.38394951820373535, "step": 2436 }, { "epoch": 0.6622282608695652, "grad_norm": 0.9958185219307645, "learning_rate": 1.598492593629863e-05, "loss": 0.40628358721733093, "step": 2437 }, { "epoch": 0.6625, "grad_norm": 0.8354703769322208, "learning_rate": 1.598132576292555e-05, "loss": 0.33310461044311523, "step": 2438 }, { "epoch": 0.6627717391304347, "grad_norm": 0.9343829482663752, "learning_rate": 1.597772438203241e-05, "loss": 0.437463641166687, "step": 2439 }, { "epoch": 0.6630434782608695, "grad_norm": 1.0790563241828637, "learning_rate": 1.597412179434626e-05, "loss": 0.4687923789024353, "step": 2440 }, { "epoch": 0.6633152173913044, "grad_norm": 1.0068801098272389, "learning_rate": 1.5970518000594392e-05, "loss": 0.41803741455078125, "step": 2441 }, { "epoch": 0.6635869565217392, "grad_norm": 1.0166696012007757, "learning_rate": 1.5966913001504358e-05, "loss": 0.38863036036491394, "step": 2442 }, { "epoch": 0.6638586956521739, "grad_norm": 1.0371469572967864, "learning_rate": 1.596330679780393e-05, "loss": 0.527935266494751, "step": 2443 }, { "epoch": 0.6641304347826087, "grad_norm": 1.01072506534207, "learning_rate": 1.5959699390221136e-05, "loss": 0.4882989525794983, "step": 2444 }, { "epoch": 0.6644021739130435, "grad_norm": 1.1947922082997635, "learning_rate": 1.5956090779484252e-05, "loss": 0.4983060359954834, "step": 2445 }, { "epoch": 0.6646739130434782, "grad_norm": 1.0388857841620647, "learning_rate": 1.5952480966321785e-05, "loss": 0.4764907658100128, "step": 2446 }, { "epoch": 0.664945652173913, "grad_norm": 1.09914739351566, "learning_rate": 1.59488699514625e-05, "loss": 0.4515378773212433, "step": 2447 }, { "epoch": 0.6652173913043479, "grad_norm": 0.9104729545914022, "learning_rate": 1.5945257735635376e-05, "loss": 0.4024536609649658, "step": 2448 }, { "epoch": 0.6654891304347826, "grad_norm": 0.9808133754735786, "learning_rate": 1.5941644319569665e-05, "loss": 0.5228855609893799, "step": 2449 }, { "epoch": 0.6657608695652174, "grad_norm": 1.0194623075586684, "learning_rate": 1.593802970399485e-05, "loss": 0.49137526750564575, "step": 2450 }, { "epoch": 0.6660326086956522, "grad_norm": 0.9263932792153011, "learning_rate": 1.5934413889640653e-05, "loss": 0.43997395038604736, "step": 2451 }, { "epoch": 0.6663043478260869, "grad_norm": 1.103047720563673, "learning_rate": 1.5930796877237043e-05, "loss": 0.5382890701293945, "step": 2452 }, { "epoch": 0.6665760869565217, "grad_norm": 1.001706323860064, "learning_rate": 1.5927178667514226e-05, "loss": 0.4786052107810974, "step": 2453 }, { "epoch": 0.6668478260869565, "grad_norm": 1.0771280531994676, "learning_rate": 1.5923559261202652e-05, "loss": 0.4531256854534149, "step": 2454 }, { "epoch": 0.6671195652173914, "grad_norm": 0.9798850595410428, "learning_rate": 1.5919938659033016e-05, "loss": 0.39335206151008606, "step": 2455 }, { "epoch": 0.6673913043478261, "grad_norm": 1.1422589624023851, "learning_rate": 1.591631686173625e-05, "loss": 0.5296807885169983, "step": 2456 }, { "epoch": 0.6676630434782609, "grad_norm": 0.9291857596702112, "learning_rate": 1.5912693870043524e-05, "loss": 0.3509114384651184, "step": 2457 }, { "epoch": 0.6679347826086957, "grad_norm": 1.1452944526506112, "learning_rate": 1.5909069684686257e-05, "loss": 0.43159157037734985, "step": 2458 }, { "epoch": 0.6682065217391304, "grad_norm": 1.024540459808951, "learning_rate": 1.590544430639611e-05, "loss": 0.5603830814361572, "step": 2459 }, { "epoch": 0.6684782608695652, "grad_norm": 1.3880904177894984, "learning_rate": 1.5901817735904973e-05, "loss": 0.5032303929328918, "step": 2460 }, { "epoch": 0.66875, "grad_norm": 0.9965360995582753, "learning_rate": 1.5898189973944988e-05, "loss": 0.5040937662124634, "step": 2461 }, { "epoch": 0.6690217391304348, "grad_norm": 1.0547618468232878, "learning_rate": 1.5894561021248535e-05, "loss": 0.5144962668418884, "step": 2462 }, { "epoch": 0.6692934782608696, "grad_norm": 1.0359885296108944, "learning_rate": 1.589093087854823e-05, "loss": 0.5090943574905396, "step": 2463 }, { "epoch": 0.6695652173913044, "grad_norm": 1.0771952841567831, "learning_rate": 1.5887299546576934e-05, "loss": 0.5402264595031738, "step": 2464 }, { "epoch": 0.6698369565217391, "grad_norm": 0.9865809030013958, "learning_rate": 1.5883667026067745e-05, "loss": 0.46963226795196533, "step": 2465 }, { "epoch": 0.6701086956521739, "grad_norm": 1.1517194235494248, "learning_rate": 1.5880033317754006e-05, "loss": 0.6238670349121094, "step": 2466 }, { "epoch": 0.6703804347826087, "grad_norm": 1.3229022189011974, "learning_rate": 1.587639842236929e-05, "loss": 0.5256537795066833, "step": 2467 }, { "epoch": 0.6706521739130434, "grad_norm": 0.7343554085155205, "learning_rate": 1.587276234064742e-05, "loss": 0.3546408414840698, "step": 2468 }, { "epoch": 0.6709239130434783, "grad_norm": 1.07095772894994, "learning_rate": 1.586912507332245e-05, "loss": 0.49622154235839844, "step": 2469 }, { "epoch": 0.6711956521739131, "grad_norm": 1.082465739597413, "learning_rate": 1.586548662112869e-05, "loss": 0.4437679052352905, "step": 2470 }, { "epoch": 0.6714673913043478, "grad_norm": 1.0010319348119403, "learning_rate": 1.5861846984800658e-05, "loss": 0.4115763306617737, "step": 2471 }, { "epoch": 0.6717391304347826, "grad_norm": 1.0301454058634167, "learning_rate": 1.585820616507314e-05, "loss": 0.5058993101119995, "step": 2472 }, { "epoch": 0.6720108695652174, "grad_norm": 1.1676675906306506, "learning_rate": 1.585456416268116e-05, "loss": 0.5583504438400269, "step": 2473 }, { "epoch": 0.6722826086956522, "grad_norm": 1.0579201267127858, "learning_rate": 1.5850920978359953e-05, "loss": 0.5334765911102295, "step": 2474 }, { "epoch": 0.6725543478260869, "grad_norm": 0.98760719828353, "learning_rate": 1.5847276612845025e-05, "loss": 0.41816896200180054, "step": 2475 }, { "epoch": 0.6728260869565217, "grad_norm": 1.0418734337133935, "learning_rate": 1.58436310668721e-05, "loss": 0.484244167804718, "step": 2476 }, { "epoch": 0.6730978260869566, "grad_norm": 0.7021609334892572, "learning_rate": 1.5839984341177147e-05, "loss": 0.2832091450691223, "step": 2477 }, { "epoch": 0.6733695652173913, "grad_norm": 0.8823387050516549, "learning_rate": 1.5836336436496377e-05, "loss": 0.3767336905002594, "step": 2478 }, { "epoch": 0.6736413043478261, "grad_norm": 0.9993325378508752, "learning_rate": 1.5832687353566232e-05, "loss": 0.45207351446151733, "step": 2479 }, { "epoch": 0.6739130434782609, "grad_norm": 1.0124779188355764, "learning_rate": 1.5829037093123397e-05, "loss": 0.392467200756073, "step": 2480 }, { "epoch": 0.6741847826086956, "grad_norm": 0.9502997473424447, "learning_rate": 1.582538565590479e-05, "loss": 0.44596949219703674, "step": 2481 }, { "epoch": 0.6744565217391304, "grad_norm": 1.0437215548177214, "learning_rate": 1.5821733042647572e-05, "loss": 0.5571517944335938, "step": 2482 }, { "epoch": 0.6747282608695652, "grad_norm": 1.1080174886028564, "learning_rate": 1.5818079254089135e-05, "loss": 0.5048707723617554, "step": 2483 }, { "epoch": 0.675, "grad_norm": 1.0306465183497024, "learning_rate": 1.5814424290967118e-05, "loss": 0.482657790184021, "step": 2484 }, { "epoch": 0.6752717391304348, "grad_norm": 0.9108329970373505, "learning_rate": 1.5810768154019386e-05, "loss": 0.3950628340244293, "step": 2485 }, { "epoch": 0.6755434782608696, "grad_norm": 0.9488314022891221, "learning_rate": 1.5807110843984046e-05, "loss": 0.4269114136695862, "step": 2486 }, { "epoch": 0.6758152173913043, "grad_norm": 0.8476163499733803, "learning_rate": 1.5803452361599447e-05, "loss": 0.35385608673095703, "step": 2487 }, { "epoch": 0.6760869565217391, "grad_norm": 1.0065997527254076, "learning_rate": 1.579979270760416e-05, "loss": 0.4695548713207245, "step": 2488 }, { "epoch": 0.6763586956521739, "grad_norm": 1.006861436415502, "learning_rate": 1.5796131882737013e-05, "loss": 0.49811333417892456, "step": 2489 }, { "epoch": 0.6766304347826086, "grad_norm": 1.0515690782280358, "learning_rate": 1.579246988773705e-05, "loss": 0.46480947732925415, "step": 2490 }, { "epoch": 0.6769021739130435, "grad_norm": 0.9817271557438825, "learning_rate": 1.5788806723343572e-05, "loss": 0.43672382831573486, "step": 2491 }, { "epoch": 0.6771739130434783, "grad_norm": 1.1437640644546307, "learning_rate": 1.5785142390296093e-05, "loss": 0.41012346744537354, "step": 2492 }, { "epoch": 0.6774456521739131, "grad_norm": 1.0441490956820596, "learning_rate": 1.578147688933438e-05, "loss": 0.4441613256931305, "step": 2493 }, { "epoch": 0.6777173913043478, "grad_norm": 1.130871755720264, "learning_rate": 1.5777810221198433e-05, "loss": 0.4978055953979492, "step": 2494 }, { "epoch": 0.6779891304347826, "grad_norm": 1.0141178646219502, "learning_rate": 1.577414238662848e-05, "loss": 0.4945192039012909, "step": 2495 }, { "epoch": 0.6782608695652174, "grad_norm": 0.9596803991618511, "learning_rate": 1.5770473386364992e-05, "loss": 0.4234103560447693, "step": 2496 }, { "epoch": 0.6785326086956521, "grad_norm": 1.1288785907768755, "learning_rate": 1.5766803221148676e-05, "loss": 0.4496306777000427, "step": 2497 }, { "epoch": 0.678804347826087, "grad_norm": 1.217759011746539, "learning_rate": 1.576313189172046e-05, "loss": 0.637073814868927, "step": 2498 }, { "epoch": 0.6790760869565218, "grad_norm": 1.215195250346307, "learning_rate": 1.575945939882153e-05, "loss": 0.538176417350769, "step": 2499 }, { "epoch": 0.6793478260869565, "grad_norm": 1.0604406996044955, "learning_rate": 1.5755785743193296e-05, "loss": 0.4622746706008911, "step": 2500 }, { "epoch": 0.6796195652173913, "grad_norm": 1.0294896878558195, "learning_rate": 1.575211092557739e-05, "loss": 0.4417971968650818, "step": 2501 }, { "epoch": 0.6798913043478261, "grad_norm": 1.1012634396509928, "learning_rate": 1.57484349467157e-05, "loss": 0.496532678604126, "step": 2502 }, { "epoch": 0.6801630434782608, "grad_norm": 1.1218579724628066, "learning_rate": 1.5744757807350336e-05, "loss": 0.572102427482605, "step": 2503 }, { "epoch": 0.6804347826086956, "grad_norm": 1.0380765422986338, "learning_rate": 1.574107950822364e-05, "loss": 0.5853321552276611, "step": 2504 }, { "epoch": 0.6807065217391305, "grad_norm": 0.9334821821816192, "learning_rate": 1.5737400050078203e-05, "loss": 0.4379987120628357, "step": 2505 }, { "epoch": 0.6809782608695653, "grad_norm": 0.982692410039372, "learning_rate": 1.5733719433656837e-05, "loss": 0.4877004027366638, "step": 2506 }, { "epoch": 0.68125, "grad_norm": 0.8283210201243141, "learning_rate": 1.573003765970259e-05, "loss": 0.3810647130012512, "step": 2507 }, { "epoch": 0.6815217391304348, "grad_norm": 1.0538675736121346, "learning_rate": 1.5726354728958736e-05, "loss": 0.508068859577179, "step": 2508 }, { "epoch": 0.6817934782608696, "grad_norm": 1.1962344796655484, "learning_rate": 1.5722670642168805e-05, "loss": 0.5836648941040039, "step": 2509 }, { "epoch": 0.6820652173913043, "grad_norm": 0.8969537258954139, "learning_rate": 1.571898540007654e-05, "loss": 0.4074585735797882, "step": 2510 }, { "epoch": 0.6823369565217391, "grad_norm": 1.0897279715172992, "learning_rate": 1.5715299003425925e-05, "loss": 0.4866722822189331, "step": 2511 }, { "epoch": 0.6826086956521739, "grad_norm": 1.0569038916182834, "learning_rate": 1.5711611452961172e-05, "loss": 0.5065258145332336, "step": 2512 }, { "epoch": 0.6828804347826087, "grad_norm": 0.9864257886078472, "learning_rate": 1.5707922749426735e-05, "loss": 0.49294036626815796, "step": 2513 }, { "epoch": 0.6831521739130435, "grad_norm": 0.8802675740615882, "learning_rate": 1.5704232893567296e-05, "loss": 0.41886258125305176, "step": 2514 }, { "epoch": 0.6834239130434783, "grad_norm": 0.9407951387320208, "learning_rate": 1.570054188612777e-05, "loss": 0.4517514109611511, "step": 2515 }, { "epoch": 0.683695652173913, "grad_norm": 0.95167012861452, "learning_rate": 1.5696849727853297e-05, "loss": 0.4785078167915344, "step": 2516 }, { "epoch": 0.6839673913043478, "grad_norm": 0.9959852640799655, "learning_rate": 1.5693156419489263e-05, "loss": 0.4751562476158142, "step": 2517 }, { "epoch": 0.6842391304347826, "grad_norm": 0.9605736706622569, "learning_rate": 1.5689461961781273e-05, "loss": 0.5182964205741882, "step": 2518 }, { "epoch": 0.6845108695652173, "grad_norm": 0.9029373608285809, "learning_rate": 1.5685766355475173e-05, "loss": 0.3942337930202484, "step": 2519 }, { "epoch": 0.6847826086956522, "grad_norm": 0.907590261985272, "learning_rate": 1.5682069601317043e-05, "loss": 0.42384275794029236, "step": 2520 }, { "epoch": 0.685054347826087, "grad_norm": 1.0473687554560502, "learning_rate": 1.5678371700053184e-05, "loss": 0.5190576910972595, "step": 2521 }, { "epoch": 0.6853260869565218, "grad_norm": 1.1967804132223907, "learning_rate": 1.5674672652430137e-05, "loss": 0.5797896385192871, "step": 2522 }, { "epoch": 0.6855978260869565, "grad_norm": 0.9529578932976086, "learning_rate": 1.567097245919467e-05, "loss": 0.46828514337539673, "step": 2523 }, { "epoch": 0.6858695652173913, "grad_norm": 1.124310241060654, "learning_rate": 1.5667271121093784e-05, "loss": 0.5204868316650391, "step": 2524 }, { "epoch": 0.686141304347826, "grad_norm": 1.1153902108479363, "learning_rate": 1.566356863887472e-05, "loss": 0.6194449067115784, "step": 2525 }, { "epoch": 0.6864130434782608, "grad_norm": 0.9494491518483563, "learning_rate": 1.5659865013284926e-05, "loss": 0.4522847533226013, "step": 2526 }, { "epoch": 0.6866847826086957, "grad_norm": 0.9479149270142871, "learning_rate": 1.565616024507211e-05, "loss": 0.39924782514572144, "step": 2527 }, { "epoch": 0.6869565217391305, "grad_norm": 0.8270614139425942, "learning_rate": 1.5652454334984187e-05, "loss": 0.39038029313087463, "step": 2528 }, { "epoch": 0.6872282608695652, "grad_norm": 1.139832407880611, "learning_rate": 1.564874728376932e-05, "loss": 0.49727916717529297, "step": 2529 }, { "epoch": 0.6875, "grad_norm": 1.1587549864068656, "learning_rate": 1.5645039092175887e-05, "loss": 0.4675447344779968, "step": 2530 }, { "epoch": 0.6877717391304348, "grad_norm": 0.8719373710452732, "learning_rate": 1.5641329760952514e-05, "loss": 0.4040156602859497, "step": 2531 }, { "epoch": 0.6880434782608695, "grad_norm": 1.0406792668936156, "learning_rate": 1.5637619290848034e-05, "loss": 0.539178729057312, "step": 2532 }, { "epoch": 0.6883152173913043, "grad_norm": 1.0466902027681888, "learning_rate": 1.5633907682611535e-05, "loss": 0.4403577446937561, "step": 2533 }, { "epoch": 0.6885869565217392, "grad_norm": 1.1832431685777534, "learning_rate": 1.5630194936992317e-05, "loss": 0.5343607664108276, "step": 2534 }, { "epoch": 0.688858695652174, "grad_norm": 0.8241738195050485, "learning_rate": 1.5626481054739916e-05, "loss": 0.31925565004348755, "step": 2535 }, { "epoch": 0.6891304347826087, "grad_norm": 1.2096103577657982, "learning_rate": 1.5622766036604094e-05, "loss": 0.4923704266548157, "step": 2536 }, { "epoch": 0.6894021739130435, "grad_norm": 1.0480317324784723, "learning_rate": 1.5619049883334853e-05, "loss": 0.5223239660263062, "step": 2537 }, { "epoch": 0.6896739130434782, "grad_norm": 1.1141032257568664, "learning_rate": 1.5615332595682405e-05, "loss": 0.5513715744018555, "step": 2538 }, { "epoch": 0.689945652173913, "grad_norm": 1.0279840110424765, "learning_rate": 1.5611614174397212e-05, "loss": 0.4917106032371521, "step": 2539 }, { "epoch": 0.6902173913043478, "grad_norm": 1.0206731595178111, "learning_rate": 1.5607894620229952e-05, "loss": 0.4887380003929138, "step": 2540 }, { "epoch": 0.6904891304347827, "grad_norm": 0.8741051573096493, "learning_rate": 1.5604173933931536e-05, "loss": 0.3863508701324463, "step": 2541 }, { "epoch": 0.6907608695652174, "grad_norm": 0.9379866619344845, "learning_rate": 1.5600452116253096e-05, "loss": 0.4577694237232208, "step": 2542 }, { "epoch": 0.6910326086956522, "grad_norm": 0.9807918831993663, "learning_rate": 1.559672916794601e-05, "loss": 0.418815016746521, "step": 2543 }, { "epoch": 0.691304347826087, "grad_norm": 1.0925967570109203, "learning_rate": 1.559300508976186e-05, "loss": 0.44049781560897827, "step": 2544 }, { "epoch": 0.6915760869565217, "grad_norm": 1.283153576510071, "learning_rate": 1.5589279882452476e-05, "loss": 0.5213927030563354, "step": 2545 }, { "epoch": 0.6918478260869565, "grad_norm": 0.9924236741037299, "learning_rate": 1.558555354676991e-05, "loss": 0.4731292128562927, "step": 2546 }, { "epoch": 0.6921195652173913, "grad_norm": 0.8676386001427936, "learning_rate": 1.5581826083466442e-05, "loss": 0.3785427510738373, "step": 2547 }, { "epoch": 0.6923913043478261, "grad_norm": 1.0956622079817768, "learning_rate": 1.5578097493294574e-05, "loss": 0.5523238182067871, "step": 2548 }, { "epoch": 0.6926630434782609, "grad_norm": 1.0615084108671005, "learning_rate": 1.557436777700704e-05, "loss": 0.45787110924720764, "step": 2549 }, { "epoch": 0.6929347826086957, "grad_norm": 1.1809264341193997, "learning_rate": 1.5570636935356808e-05, "loss": 0.5410493016242981, "step": 2550 }, { "epoch": 0.6932065217391304, "grad_norm": 0.9521001001445528, "learning_rate": 1.5566904969097062e-05, "loss": 0.4347158670425415, "step": 2551 }, { "epoch": 0.6934782608695652, "grad_norm": 1.1167061467092756, "learning_rate": 1.5563171878981215e-05, "loss": 0.5544413328170776, "step": 2552 }, { "epoch": 0.69375, "grad_norm": 0.8939319098092041, "learning_rate": 1.5559437665762913e-05, "loss": 0.3717230260372162, "step": 2553 }, { "epoch": 0.6940217391304347, "grad_norm": 1.070160812756266, "learning_rate": 1.5555702330196024e-05, "loss": 0.5025150179862976, "step": 2554 }, { "epoch": 0.6942934782608695, "grad_norm": 1.0222302944577142, "learning_rate": 1.5551965873034643e-05, "loss": 0.4707837998867035, "step": 2555 }, { "epoch": 0.6945652173913044, "grad_norm": 1.1416702531491845, "learning_rate": 1.5548228295033096e-05, "loss": 0.5340657234191895, "step": 2556 }, { "epoch": 0.6948369565217392, "grad_norm": 0.9946155645868825, "learning_rate": 1.5544489596945927e-05, "loss": 0.4578937888145447, "step": 2557 }, { "epoch": 0.6951086956521739, "grad_norm": 1.119436852490841, "learning_rate": 1.5540749779527914e-05, "loss": 0.4987996518611908, "step": 2558 }, { "epoch": 0.6953804347826087, "grad_norm": 1.3892146258229117, "learning_rate": 1.553700884353406e-05, "loss": 0.5258411765098572, "step": 2559 }, { "epoch": 0.6956521739130435, "grad_norm": 1.0059640937029586, "learning_rate": 1.5533266789719584e-05, "loss": 0.4277993440628052, "step": 2560 }, { "epoch": 0.6959239130434782, "grad_norm": 0.9390005182003884, "learning_rate": 1.5529523618839937e-05, "loss": 0.4013899564743042, "step": 2561 }, { "epoch": 0.696195652173913, "grad_norm": 1.0970279483788183, "learning_rate": 1.5525779331650806e-05, "loss": 0.49578243494033813, "step": 2562 }, { "epoch": 0.6964673913043479, "grad_norm": 1.0661225081195735, "learning_rate": 1.5522033928908092e-05, "loss": 0.5174760818481445, "step": 2563 }, { "epoch": 0.6967391304347826, "grad_norm": 0.997186637737598, "learning_rate": 1.5518287411367915e-05, "loss": 0.4807285666465759, "step": 2564 }, { "epoch": 0.6970108695652174, "grad_norm": 0.9790002485415096, "learning_rate": 1.5514539779786638e-05, "loss": 0.5185769200325012, "step": 2565 }, { "epoch": 0.6972826086956522, "grad_norm": 1.04811968094587, "learning_rate": 1.551079103492083e-05, "loss": 0.4929603934288025, "step": 2566 }, { "epoch": 0.6975543478260869, "grad_norm": 1.0738823733159135, "learning_rate": 1.5507041177527306e-05, "loss": 0.48068171739578247, "step": 2567 }, { "epoch": 0.6978260869565217, "grad_norm": 1.2186539208691696, "learning_rate": 1.550329020836308e-05, "loss": 0.5466300249099731, "step": 2568 }, { "epoch": 0.6980978260869565, "grad_norm": 1.0406767822038137, "learning_rate": 1.5499538128185413e-05, "loss": 0.46619942784309387, "step": 2569 }, { "epoch": 0.6983695652173914, "grad_norm": 0.9849362390523657, "learning_rate": 1.5495784937751776e-05, "loss": 0.4353402256965637, "step": 2570 }, { "epoch": 0.6986413043478261, "grad_norm": 1.1067558695509083, "learning_rate": 1.5492030637819874e-05, "loss": 0.5722728371620178, "step": 2571 }, { "epoch": 0.6989130434782609, "grad_norm": 1.067149551577062, "learning_rate": 1.548827522914763e-05, "loss": 0.5410939455032349, "step": 2572 }, { "epoch": 0.6991847826086957, "grad_norm": 1.1122258795335684, "learning_rate": 1.5484518712493188e-05, "loss": 0.49480411410331726, "step": 2573 }, { "epoch": 0.6994565217391304, "grad_norm": 0.8336148496123712, "learning_rate": 1.5480761088614923e-05, "loss": 0.38791871070861816, "step": 2574 }, { "epoch": 0.6997282608695652, "grad_norm": 0.7461659880301392, "learning_rate": 1.5477002358271436e-05, "loss": 0.29923415184020996, "step": 2575 }, { "epoch": 0.7, "grad_norm": 1.0305433490404996, "learning_rate": 1.5473242522221536e-05, "loss": 0.4901658296585083, "step": 2576 }, { "epoch": 0.7002717391304348, "grad_norm": 0.8190629329474929, "learning_rate": 1.5469481581224274e-05, "loss": 0.36156368255615234, "step": 2577 }, { "epoch": 0.7005434782608696, "grad_norm": 0.8902047040671708, "learning_rate": 1.5465719536038904e-05, "loss": 0.4289747476577759, "step": 2578 }, { "epoch": 0.7008152173913044, "grad_norm": 1.0134161113091693, "learning_rate": 1.5461956387424923e-05, "loss": 0.5471310615539551, "step": 2579 }, { "epoch": 0.7010869565217391, "grad_norm": 1.1635094313856211, "learning_rate": 1.545819213614204e-05, "loss": 0.5164341330528259, "step": 2580 }, { "epoch": 0.7013586956521739, "grad_norm": 0.7954831990481777, "learning_rate": 1.5454426782950185e-05, "loss": 0.3890700340270996, "step": 2581 }, { "epoch": 0.7016304347826087, "grad_norm": 1.0176078902579693, "learning_rate": 1.5450660328609517e-05, "loss": 0.4760732352733612, "step": 2582 }, { "epoch": 0.7019021739130434, "grad_norm": 1.0517264734836413, "learning_rate": 1.5446892773880415e-05, "loss": 0.4711228907108307, "step": 2583 }, { "epoch": 0.7021739130434783, "grad_norm": 1.0423391930501187, "learning_rate": 1.5443124119523478e-05, "loss": 0.49549436569213867, "step": 2584 }, { "epoch": 0.7024456521739131, "grad_norm": 1.1594816302786892, "learning_rate": 1.5439354366299528e-05, "loss": 0.6061825752258301, "step": 2585 }, { "epoch": 0.7027173913043478, "grad_norm": 1.0347013396904001, "learning_rate": 1.5435583514969606e-05, "loss": 0.5054459571838379, "step": 2586 }, { "epoch": 0.7029891304347826, "grad_norm": 0.8452354222944634, "learning_rate": 1.5431811566294987e-05, "loss": 0.41887015104293823, "step": 2587 }, { "epoch": 0.7032608695652174, "grad_norm": 1.0516904317866793, "learning_rate": 1.5428038521037145e-05, "loss": 0.5145807266235352, "step": 2588 }, { "epoch": 0.7035326086956522, "grad_norm": 1.1205692776271172, "learning_rate": 1.54242643799578e-05, "loss": 0.5244410634040833, "step": 2589 }, { "epoch": 0.7038043478260869, "grad_norm": 1.0060415925800874, "learning_rate": 1.5420489143818875e-05, "loss": 0.48850512504577637, "step": 2590 }, { "epoch": 0.7040760869565217, "grad_norm": 0.9502370598719547, "learning_rate": 1.5416712813382528e-05, "loss": 0.4757123589515686, "step": 2591 }, { "epoch": 0.7043478260869566, "grad_norm": 1.0637995001492495, "learning_rate": 1.5412935389411124e-05, "loss": 0.5758436918258667, "step": 2592 }, { "epoch": 0.7046195652173913, "grad_norm": 1.0287859224786542, "learning_rate": 1.540915687266726e-05, "loss": 0.5192601680755615, "step": 2593 }, { "epoch": 0.7048913043478261, "grad_norm": 0.9344526507719348, "learning_rate": 1.5405377263913742e-05, "loss": 0.3781484365463257, "step": 2594 }, { "epoch": 0.7051630434782609, "grad_norm": 1.1006418156003355, "learning_rate": 1.5401596563913615e-05, "loss": 0.5112974047660828, "step": 2595 }, { "epoch": 0.7054347826086956, "grad_norm": 1.146325253343987, "learning_rate": 1.5397814773430123e-05, "loss": 0.5558577179908752, "step": 2596 }, { "epoch": 0.7057065217391304, "grad_norm": 1.0513838954921026, "learning_rate": 1.539403189322675e-05, "loss": 0.4095451235771179, "step": 2597 }, { "epoch": 0.7059782608695652, "grad_norm": 1.0927504870241296, "learning_rate": 1.539024792406718e-05, "loss": 0.5250794887542725, "step": 2598 }, { "epoch": 0.70625, "grad_norm": 1.2068551432681602, "learning_rate": 1.538646286671534e-05, "loss": 0.5050420761108398, "step": 2599 }, { "epoch": 0.7065217391304348, "grad_norm": 1.1904223643245853, "learning_rate": 1.5382676721935344e-05, "loss": 0.5502063035964966, "step": 2600 }, { "epoch": 0.7067934782608696, "grad_norm": 1.0302404161155971, "learning_rate": 1.5378889490491565e-05, "loss": 0.5116949081420898, "step": 2601 }, { "epoch": 0.7070652173913043, "grad_norm": 0.9981133224568488, "learning_rate": 1.5375101173148565e-05, "loss": 0.46639251708984375, "step": 2602 }, { "epoch": 0.7073369565217391, "grad_norm": 0.9823056710125194, "learning_rate": 1.5371311770671138e-05, "loss": 0.4243243634700775, "step": 2603 }, { "epoch": 0.7076086956521739, "grad_norm": 0.9599417794763946, "learning_rate": 1.5367521283824294e-05, "loss": 0.4491550922393799, "step": 2604 }, { "epoch": 0.7078804347826086, "grad_norm": 0.9581995128045754, "learning_rate": 1.536372971337327e-05, "loss": 0.4638722836971283, "step": 2605 }, { "epoch": 0.7081521739130435, "grad_norm": 1.0243509234509323, "learning_rate": 1.53599370600835e-05, "loss": 0.49300599098205566, "step": 2606 }, { "epoch": 0.7084239130434783, "grad_norm": 0.9160272061302365, "learning_rate": 1.5356143324720663e-05, "loss": 0.46661269664764404, "step": 2607 }, { "epoch": 0.7086956521739131, "grad_norm": 1.0734788537395388, "learning_rate": 1.5352348508050643e-05, "loss": 0.4855771064758301, "step": 2608 }, { "epoch": 0.7089673913043478, "grad_norm": 1.0038348664437808, "learning_rate": 1.534855261083954e-05, "loss": 0.4828795790672302, "step": 2609 }, { "epoch": 0.7092391304347826, "grad_norm": 0.9007842318263486, "learning_rate": 1.5344755633853683e-05, "loss": 0.4584309458732605, "step": 2610 }, { "epoch": 0.7095108695652174, "grad_norm": 1.0120552159551808, "learning_rate": 1.5340957577859605e-05, "loss": 0.5246891975402832, "step": 2611 }, { "epoch": 0.7097826086956521, "grad_norm": 0.9735423844204437, "learning_rate": 1.5337158443624068e-05, "loss": 0.4602866768836975, "step": 2612 }, { "epoch": 0.710054347826087, "grad_norm": 0.9854733842699007, "learning_rate": 1.5333358231914043e-05, "loss": 0.4173598885536194, "step": 2613 }, { "epoch": 0.7103260869565218, "grad_norm": 0.9576836467573904, "learning_rate": 1.5329556943496725e-05, "loss": 0.41619807481765747, "step": 2614 }, { "epoch": 0.7105978260869565, "grad_norm": 0.8712440473721397, "learning_rate": 1.532575457913953e-05, "loss": 0.35858675837516785, "step": 2615 }, { "epoch": 0.7108695652173913, "grad_norm": 1.045239658366619, "learning_rate": 1.5321951139610077e-05, "loss": 0.44525885581970215, "step": 2616 }, { "epoch": 0.7111413043478261, "grad_norm": 1.0954320421794153, "learning_rate": 1.531814662567622e-05, "loss": 0.5223145484924316, "step": 2617 }, { "epoch": 0.7114130434782608, "grad_norm": 1.022012736880226, "learning_rate": 1.5314341038106012e-05, "loss": 0.4487978219985962, "step": 2618 }, { "epoch": 0.7116847826086956, "grad_norm": 1.0549761914791989, "learning_rate": 1.5310534377667737e-05, "loss": 0.4876291751861572, "step": 2619 }, { "epoch": 0.7119565217391305, "grad_norm": 0.8954705795380901, "learning_rate": 1.530672664512989e-05, "loss": 0.4273010492324829, "step": 2620 }, { "epoch": 0.7122282608695653, "grad_norm": 1.0791681948488228, "learning_rate": 1.5302917841261172e-05, "loss": 0.41683053970336914, "step": 2621 }, { "epoch": 0.7125, "grad_norm": 1.0668594779789886, "learning_rate": 1.5299107966830528e-05, "loss": 0.46953293681144714, "step": 2622 }, { "epoch": 0.7127717391304348, "grad_norm": 1.2614041604606354, "learning_rate": 1.529529702260709e-05, "loss": 0.6298997402191162, "step": 2623 }, { "epoch": 0.7130434782608696, "grad_norm": 1.0630341147283742, "learning_rate": 1.529148500936022e-05, "loss": 0.4531649351119995, "step": 2624 }, { "epoch": 0.7133152173913043, "grad_norm": 1.0754684400967625, "learning_rate": 1.5287671927859494e-05, "loss": 0.4620913565158844, "step": 2625 }, { "epoch": 0.7135869565217391, "grad_norm": 1.0330632135374136, "learning_rate": 1.5283857778874707e-05, "loss": 0.49431246519088745, "step": 2626 }, { "epoch": 0.7138586956521739, "grad_norm": 0.9877433402171905, "learning_rate": 1.528004256317586e-05, "loss": 0.48030948638916016, "step": 2627 }, { "epoch": 0.7141304347826087, "grad_norm": 1.1502484172610532, "learning_rate": 1.527622628153318e-05, "loss": 0.5419813394546509, "step": 2628 }, { "epoch": 0.7144021739130435, "grad_norm": 1.1574979795630087, "learning_rate": 1.5272408934717098e-05, "loss": 0.5825904011726379, "step": 2629 }, { "epoch": 0.7146739130434783, "grad_norm": 0.8540447492940149, "learning_rate": 1.526859052349827e-05, "loss": 0.34584885835647583, "step": 2630 }, { "epoch": 0.714945652173913, "grad_norm": 1.0777407917360236, "learning_rate": 1.526477104864757e-05, "loss": 0.528558611869812, "step": 2631 }, { "epoch": 0.7152173913043478, "grad_norm": 0.9694441362419516, "learning_rate": 1.526095051093607e-05, "loss": 0.48368164896965027, "step": 2632 }, { "epoch": 0.7154891304347826, "grad_norm": 1.0704424114776352, "learning_rate": 1.5257128911135069e-05, "loss": 0.45881205797195435, "step": 2633 }, { "epoch": 0.7157608695652173, "grad_norm": 0.7180688050647523, "learning_rate": 1.5253306250016077e-05, "loss": 0.2940506935119629, "step": 2634 }, { "epoch": 0.7160326086956522, "grad_norm": 0.7867411175449621, "learning_rate": 1.5249482528350827e-05, "loss": 0.33143436908721924, "step": 2635 }, { "epoch": 0.716304347826087, "grad_norm": 1.0612343072636141, "learning_rate": 1.5245657746911252e-05, "loss": 0.4815713167190552, "step": 2636 }, { "epoch": 0.7165760869565218, "grad_norm": 0.945539810299676, "learning_rate": 1.5241831906469502e-05, "loss": 0.42996469140052795, "step": 2637 }, { "epoch": 0.7168478260869565, "grad_norm": 1.0411700659775585, "learning_rate": 1.5238005007797952e-05, "loss": 0.46389859914779663, "step": 2638 }, { "epoch": 0.7171195652173913, "grad_norm": 0.932501665278894, "learning_rate": 1.5234177051669175e-05, "loss": 0.467149555683136, "step": 2639 }, { "epoch": 0.717391304347826, "grad_norm": 1.0260425236514878, "learning_rate": 1.5230348038855968e-05, "loss": 0.45001113414764404, "step": 2640 }, { "epoch": 0.7176630434782608, "grad_norm": 1.043844723986996, "learning_rate": 1.5226517970131345e-05, "loss": 0.5348938703536987, "step": 2641 }, { "epoch": 0.7179347826086957, "grad_norm": 0.9979344538117827, "learning_rate": 1.5222686846268518e-05, "loss": 0.4557957053184509, "step": 2642 }, { "epoch": 0.7182065217391305, "grad_norm": 0.9864293114687706, "learning_rate": 1.5218854668040923e-05, "loss": 0.5390331745147705, "step": 2643 }, { "epoch": 0.7184782608695652, "grad_norm": 0.8590621752758127, "learning_rate": 1.521502143622221e-05, "loss": 0.3942371606826782, "step": 2644 }, { "epoch": 0.71875, "grad_norm": 1.0603762299082888, "learning_rate": 1.5211187151586233e-05, "loss": 0.5169593691825867, "step": 2645 }, { "epoch": 0.7190217391304348, "grad_norm": 1.0045787367253651, "learning_rate": 1.5207351814907068e-05, "loss": 0.45151495933532715, "step": 2646 }, { "epoch": 0.7192934782608695, "grad_norm": 1.1898923996017046, "learning_rate": 1.5203515426958997e-05, "loss": 0.5329718589782715, "step": 2647 }, { "epoch": 0.7195652173913043, "grad_norm": 1.1698828325422959, "learning_rate": 1.519967798851652e-05, "loss": 0.5065211057662964, "step": 2648 }, { "epoch": 0.7198369565217392, "grad_norm": 1.0542528234931925, "learning_rate": 1.5195839500354337e-05, "loss": 0.5081573724746704, "step": 2649 }, { "epoch": 0.720108695652174, "grad_norm": 0.8843889855260252, "learning_rate": 1.5191999963247376e-05, "loss": 0.4101070761680603, "step": 2650 }, { "epoch": 0.7203804347826087, "grad_norm": 1.01253510586601, "learning_rate": 1.5188159377970771e-05, "loss": 0.49323609471321106, "step": 2651 }, { "epoch": 0.7206521739130435, "grad_norm": 1.167675465909467, "learning_rate": 1.5184317745299856e-05, "loss": 0.546697199344635, "step": 2652 }, { "epoch": 0.7209239130434782, "grad_norm": 1.114031595591881, "learning_rate": 1.5180475066010195e-05, "loss": 0.4895595610141754, "step": 2653 }, { "epoch": 0.721195652173913, "grad_norm": 1.1366536971133723, "learning_rate": 1.5176631340877555e-05, "loss": 0.5251865983009338, "step": 2654 }, { "epoch": 0.7214673913043478, "grad_norm": 0.9228804094602526, "learning_rate": 1.5172786570677906e-05, "loss": 0.38361233472824097, "step": 2655 }, { "epoch": 0.7217391304347827, "grad_norm": 1.0833956043344302, "learning_rate": 1.5168940756187445e-05, "loss": 0.5238540172576904, "step": 2656 }, { "epoch": 0.7220108695652174, "grad_norm": 1.1325758847639795, "learning_rate": 1.516509389818256e-05, "loss": 0.5314116477966309, "step": 2657 }, { "epoch": 0.7222826086956522, "grad_norm": 1.0815541202898842, "learning_rate": 1.5161245997439876e-05, "loss": 0.4750939905643463, "step": 2658 }, { "epoch": 0.722554347826087, "grad_norm": 1.1357906639677418, "learning_rate": 1.5157397054736204e-05, "loss": 0.49461740255355835, "step": 2659 }, { "epoch": 0.7228260869565217, "grad_norm": 0.935362359226643, "learning_rate": 1.5153547070848575e-05, "loss": 0.42096859216690063, "step": 2660 }, { "epoch": 0.7230978260869565, "grad_norm": 1.0522701220844723, "learning_rate": 1.5149696046554236e-05, "loss": 0.5268068313598633, "step": 2661 }, { "epoch": 0.7233695652173913, "grad_norm": 0.93431748091844, "learning_rate": 1.5145843982630634e-05, "loss": 0.4040941596031189, "step": 2662 }, { "epoch": 0.7236413043478261, "grad_norm": 0.8358023642416771, "learning_rate": 1.5141990879855431e-05, "loss": 0.35589006543159485, "step": 2663 }, { "epoch": 0.7239130434782609, "grad_norm": 1.0546015727010156, "learning_rate": 1.5138136739006497e-05, "loss": 0.44107362627983093, "step": 2664 }, { "epoch": 0.7241847826086957, "grad_norm": 1.006481992205329, "learning_rate": 1.5134281560861912e-05, "loss": 0.492564857006073, "step": 2665 }, { "epoch": 0.7244565217391304, "grad_norm": 0.9210968362135655, "learning_rate": 1.5130425346199969e-05, "loss": 0.4464862048625946, "step": 2666 }, { "epoch": 0.7247282608695652, "grad_norm": 1.1451197233971742, "learning_rate": 1.5126568095799163e-05, "loss": 0.606390118598938, "step": 2667 }, { "epoch": 0.725, "grad_norm": 1.2430412798423238, "learning_rate": 1.5122709810438205e-05, "loss": 0.6090595722198486, "step": 2668 }, { "epoch": 0.7252717391304347, "grad_norm": 1.012943236762312, "learning_rate": 1.5118850490896012e-05, "loss": 0.48948925733566284, "step": 2669 }, { "epoch": 0.7255434782608695, "grad_norm": 1.1828644284955963, "learning_rate": 1.5114990137951709e-05, "loss": 0.5051157474517822, "step": 2670 }, { "epoch": 0.7258152173913044, "grad_norm": 1.021673099194147, "learning_rate": 1.511112875238463e-05, "loss": 0.4953664541244507, "step": 2671 }, { "epoch": 0.7260869565217392, "grad_norm": 0.9562370515437123, "learning_rate": 1.510726633497432e-05, "loss": 0.4549162983894348, "step": 2672 }, { "epoch": 0.7263586956521739, "grad_norm": 0.949119946251056, "learning_rate": 1.5103402886500526e-05, "loss": 0.4690982401371002, "step": 2673 }, { "epoch": 0.7266304347826087, "grad_norm": 1.0691023552920118, "learning_rate": 1.5099538407743213e-05, "loss": 0.4440189301967621, "step": 2674 }, { "epoch": 0.7269021739130435, "grad_norm": 1.032234970253873, "learning_rate": 1.5095672899482546e-05, "loss": 0.4720619022846222, "step": 2675 }, { "epoch": 0.7271739130434782, "grad_norm": 1.18916738264211, "learning_rate": 1.50918063624989e-05, "loss": 0.5420469045639038, "step": 2676 }, { "epoch": 0.727445652173913, "grad_norm": 1.5322600429345128, "learning_rate": 1.5087938797572854e-05, "loss": 0.5082096457481384, "step": 2677 }, { "epoch": 0.7277173913043479, "grad_norm": 1.157572030655693, "learning_rate": 1.5084070205485204e-05, "loss": 0.5491743087768555, "step": 2678 }, { "epoch": 0.7279891304347826, "grad_norm": 1.200187576467366, "learning_rate": 1.5080200587016953e-05, "loss": 0.6160041093826294, "step": 2679 }, { "epoch": 0.7282608695652174, "grad_norm": 1.012372241677218, "learning_rate": 1.5076329942949292e-05, "loss": 0.5103723406791687, "step": 2680 }, { "epoch": 0.7285326086956522, "grad_norm": 1.0925038974889203, "learning_rate": 1.5072458274063645e-05, "loss": 0.5176466703414917, "step": 2681 }, { "epoch": 0.7288043478260869, "grad_norm": 0.9905405695714733, "learning_rate": 1.5068585581141623e-05, "loss": 0.4171726703643799, "step": 2682 }, { "epoch": 0.7290760869565217, "grad_norm": 0.9785268961120395, "learning_rate": 1.5064711864965058e-05, "loss": 0.4640737771987915, "step": 2683 }, { "epoch": 0.7293478260869565, "grad_norm": 1.0609310645395358, "learning_rate": 1.5060837126315981e-05, "loss": 0.5175793766975403, "step": 2684 }, { "epoch": 0.7296195652173914, "grad_norm": 0.9995054943643883, "learning_rate": 1.5056961365976626e-05, "loss": 0.49225008487701416, "step": 2685 }, { "epoch": 0.7298913043478261, "grad_norm": 0.943089889574199, "learning_rate": 1.505308458472944e-05, "loss": 0.44520044326782227, "step": 2686 }, { "epoch": 0.7301630434782609, "grad_norm": 1.1321832820090867, "learning_rate": 1.5049206783357082e-05, "loss": 0.504291296005249, "step": 2687 }, { "epoch": 0.7304347826086957, "grad_norm": 0.9309680763574036, "learning_rate": 1.5045327962642398e-05, "loss": 0.46359169483184814, "step": 2688 }, { "epoch": 0.7307065217391304, "grad_norm": 1.141446096544705, "learning_rate": 1.5041448123368454e-05, "loss": 0.5158535242080688, "step": 2689 }, { "epoch": 0.7309782608695652, "grad_norm": 0.762732083441141, "learning_rate": 1.5037567266318522e-05, "loss": 0.282234251499176, "step": 2690 }, { "epoch": 0.73125, "grad_norm": 0.990555668887195, "learning_rate": 1.5033685392276071e-05, "loss": 0.531595766544342, "step": 2691 }, { "epoch": 0.7315217391304348, "grad_norm": 0.9810650919770345, "learning_rate": 1.5029802502024788e-05, "loss": 0.46098458766937256, "step": 2692 }, { "epoch": 0.7317934782608696, "grad_norm": 1.2664840830352966, "learning_rate": 1.5025918596348548e-05, "loss": 0.6065942645072937, "step": 2693 }, { "epoch": 0.7320652173913044, "grad_norm": 0.9586443638057942, "learning_rate": 1.5022033676031447e-05, "loss": 0.3876507878303528, "step": 2694 }, { "epoch": 0.7323369565217391, "grad_norm": 1.2097658642183058, "learning_rate": 1.5018147741857776e-05, "loss": 0.5386353731155396, "step": 2695 }, { "epoch": 0.7326086956521739, "grad_norm": 0.9124153161110671, "learning_rate": 1.5014260794612035e-05, "loss": 0.39778316020965576, "step": 2696 }, { "epoch": 0.7328804347826087, "grad_norm": 0.9339733091108668, "learning_rate": 1.501037283507893e-05, "loss": 0.39803552627563477, "step": 2697 }, { "epoch": 0.7331521739130434, "grad_norm": 1.0949727739395467, "learning_rate": 1.5006483864043362e-05, "loss": 0.5883221626281738, "step": 2698 }, { "epoch": 0.7334239130434783, "grad_norm": 1.048078762600634, "learning_rate": 1.5002593882290447e-05, "loss": 0.5132486820220947, "step": 2699 }, { "epoch": 0.7336956521739131, "grad_norm": 0.9746385381615624, "learning_rate": 1.49987028906055e-05, "loss": 0.4317088723182678, "step": 2700 }, { "epoch": 0.7339673913043478, "grad_norm": 0.8747871183798324, "learning_rate": 1.4994810889774045e-05, "loss": 0.3479289710521698, "step": 2701 }, { "epoch": 0.7342391304347826, "grad_norm": 1.116328531965606, "learning_rate": 1.4990917880581802e-05, "loss": 0.5297036170959473, "step": 2702 }, { "epoch": 0.7345108695652174, "grad_norm": 1.0845507932644363, "learning_rate": 1.4987023863814698e-05, "loss": 0.5481277108192444, "step": 2703 }, { "epoch": 0.7347826086956522, "grad_norm": 1.0042674538259682, "learning_rate": 1.4983128840258864e-05, "loss": 0.4911978840827942, "step": 2704 }, { "epoch": 0.7350543478260869, "grad_norm": 1.0600367463788745, "learning_rate": 1.4979232810700638e-05, "loss": 0.47107064723968506, "step": 2705 }, { "epoch": 0.7353260869565217, "grad_norm": 1.0960680864756525, "learning_rate": 1.4975335775926547e-05, "loss": 0.5349897146224976, "step": 2706 }, { "epoch": 0.7355978260869566, "grad_norm": 0.9573010351942469, "learning_rate": 1.497143773672334e-05, "loss": 0.3802507817745209, "step": 2707 }, { "epoch": 0.7358695652173913, "grad_norm": 0.9197322408675643, "learning_rate": 1.4967538693877958e-05, "loss": 0.4124268889427185, "step": 2708 }, { "epoch": 0.7361413043478261, "grad_norm": 1.070369719369256, "learning_rate": 1.4963638648177544e-05, "loss": 0.507140576839447, "step": 2709 }, { "epoch": 0.7364130434782609, "grad_norm": 0.8899682021120964, "learning_rate": 1.4959737600409448e-05, "loss": 0.395033597946167, "step": 2710 }, { "epoch": 0.7366847826086956, "grad_norm": 1.0577626065696633, "learning_rate": 1.4955835551361217e-05, "loss": 0.45901456475257874, "step": 2711 }, { "epoch": 0.7369565217391304, "grad_norm": 0.992831519124854, "learning_rate": 1.4951932501820608e-05, "loss": 0.4210977852344513, "step": 2712 }, { "epoch": 0.7372282608695652, "grad_norm": 1.0273472129080528, "learning_rate": 1.4948028452575572e-05, "loss": 0.5216702818870544, "step": 2713 }, { "epoch": 0.7375, "grad_norm": 1.0472401898090546, "learning_rate": 1.4944123404414264e-05, "loss": 0.4191865921020508, "step": 2714 }, { "epoch": 0.7377717391304348, "grad_norm": 0.92010493339495, "learning_rate": 1.4940217358125042e-05, "loss": 0.39589497447013855, "step": 2715 }, { "epoch": 0.7380434782608696, "grad_norm": 0.8970712372065736, "learning_rate": 1.4936310314496468e-05, "loss": 0.4384605288505554, "step": 2716 }, { "epoch": 0.7383152173913043, "grad_norm": 1.1159663585861108, "learning_rate": 1.4932402274317297e-05, "loss": 0.6044947504997253, "step": 2717 }, { "epoch": 0.7385869565217391, "grad_norm": 1.0091693909172057, "learning_rate": 1.4928493238376498e-05, "loss": 0.5396151542663574, "step": 2718 }, { "epoch": 0.7388586956521739, "grad_norm": 0.947860455838, "learning_rate": 1.492458320746323e-05, "loss": 0.3818020820617676, "step": 2719 }, { "epoch": 0.7391304347826086, "grad_norm": 1.2472359464040192, "learning_rate": 1.4920672182366857e-05, "loss": 0.5943140983581543, "step": 2720 }, { "epoch": 0.7394021739130435, "grad_norm": 0.9906112976787376, "learning_rate": 1.491676016387694e-05, "loss": 0.45472651720046997, "step": 2721 }, { "epoch": 0.7396739130434783, "grad_norm": 0.9269878025042456, "learning_rate": 1.491284715278325e-05, "loss": 0.4516075849533081, "step": 2722 }, { "epoch": 0.7399456521739131, "grad_norm": 0.7962568640969272, "learning_rate": 1.490893314987575e-05, "loss": 0.32948943972587585, "step": 2723 }, { "epoch": 0.7402173913043478, "grad_norm": 0.9849293555264365, "learning_rate": 1.4905018155944601e-05, "loss": 0.42737114429473877, "step": 2724 }, { "epoch": 0.7404891304347826, "grad_norm": 0.826029363964163, "learning_rate": 1.4901102171780175e-05, "loss": 0.33891308307647705, "step": 2725 }, { "epoch": 0.7407608695652174, "grad_norm": 1.147584124092114, "learning_rate": 1.4897185198173032e-05, "loss": 0.5342199802398682, "step": 2726 }, { "epoch": 0.7410326086956521, "grad_norm": 0.9846072390328412, "learning_rate": 1.489326723591394e-05, "loss": 0.4734615087509155, "step": 2727 }, { "epoch": 0.741304347826087, "grad_norm": 1.0679903055231121, "learning_rate": 1.4889348285793866e-05, "loss": 0.48649710416793823, "step": 2728 }, { "epoch": 0.7415760869565218, "grad_norm": 1.2903490840631153, "learning_rate": 1.4885428348603968e-05, "loss": 0.5291908979415894, "step": 2729 }, { "epoch": 0.7418478260869565, "grad_norm": 1.0915821358954787, "learning_rate": 1.4881507425135615e-05, "loss": 0.47483301162719727, "step": 2730 }, { "epoch": 0.7421195652173913, "grad_norm": 0.9225453001926108, "learning_rate": 1.487758551618037e-05, "loss": 0.46887141466140747, "step": 2731 }, { "epoch": 0.7423913043478261, "grad_norm": 0.9757752660042852, "learning_rate": 1.4873662622529989e-05, "loss": 0.38928666710853577, "step": 2732 }, { "epoch": 0.7426630434782608, "grad_norm": 0.8556293188867387, "learning_rate": 1.4869738744976436e-05, "loss": 0.37860721349716187, "step": 2733 }, { "epoch": 0.7429347826086956, "grad_norm": 0.9677731714953031, "learning_rate": 1.486581388431187e-05, "loss": 0.4665490388870239, "step": 2734 }, { "epoch": 0.7432065217391305, "grad_norm": 1.058757912898622, "learning_rate": 1.486188804132865e-05, "loss": 0.458029180765152, "step": 2735 }, { "epoch": 0.7434782608695653, "grad_norm": 1.1835937731957995, "learning_rate": 1.4857961216819327e-05, "loss": 0.569452166557312, "step": 2736 }, { "epoch": 0.74375, "grad_norm": 1.2728604456903407, "learning_rate": 1.4854033411576659e-05, "loss": 0.5264586806297302, "step": 2737 }, { "epoch": 0.7440217391304348, "grad_norm": 0.9581275879648401, "learning_rate": 1.4850104626393598e-05, "loss": 0.3694622218608856, "step": 2738 }, { "epoch": 0.7442934782608696, "grad_norm": 1.1911360711158914, "learning_rate": 1.4846174862063292e-05, "loss": 0.49472716450691223, "step": 2739 }, { "epoch": 0.7445652173913043, "grad_norm": 1.0958386105234665, "learning_rate": 1.4842244119379086e-05, "loss": 0.5096947550773621, "step": 2740 }, { "epoch": 0.7448369565217391, "grad_norm": 1.12193871680373, "learning_rate": 1.4838312399134531e-05, "loss": 0.6088662147521973, "step": 2741 }, { "epoch": 0.7451086956521739, "grad_norm": 1.1687397196377447, "learning_rate": 1.4834379702123363e-05, "loss": 0.5531191229820251, "step": 2742 }, { "epoch": 0.7453804347826087, "grad_norm": 1.1371476192609817, "learning_rate": 1.4830446029139526e-05, "loss": 0.5601851344108582, "step": 2743 }, { "epoch": 0.7456521739130435, "grad_norm": 0.9250016475421021, "learning_rate": 1.4826511380977155e-05, "loss": 0.4206138551235199, "step": 2744 }, { "epoch": 0.7459239130434783, "grad_norm": 1.1742745612977086, "learning_rate": 1.4822575758430581e-05, "loss": 0.5177225470542908, "step": 2745 }, { "epoch": 0.746195652173913, "grad_norm": 1.0927769310492381, "learning_rate": 1.4818639162294339e-05, "loss": 0.47149527072906494, "step": 2746 }, { "epoch": 0.7464673913043478, "grad_norm": 1.1931070134624628, "learning_rate": 1.4814701593363154e-05, "loss": 0.5210810899734497, "step": 2747 }, { "epoch": 0.7467391304347826, "grad_norm": 0.9968658233435216, "learning_rate": 1.4810763052431947e-05, "loss": 0.43693816661834717, "step": 2748 }, { "epoch": 0.7470108695652173, "grad_norm": 1.0057868289855134, "learning_rate": 1.4806823540295839e-05, "loss": 0.4797494411468506, "step": 2749 }, { "epoch": 0.7472826086956522, "grad_norm": 1.2213835937626272, "learning_rate": 1.4802883057750141e-05, "loss": 0.5532264709472656, "step": 2750 }, { "epoch": 0.747554347826087, "grad_norm": 1.0007041182683392, "learning_rate": 1.4798941605590372e-05, "loss": 0.4425719678401947, "step": 2751 }, { "epoch": 0.7478260869565218, "grad_norm": 1.1487331779004126, "learning_rate": 1.4794999184612229e-05, "loss": 0.488278329372406, "step": 2752 }, { "epoch": 0.7480978260869565, "grad_norm": 1.0572444976615138, "learning_rate": 1.4791055795611623e-05, "loss": 0.44530802965164185, "step": 2753 }, { "epoch": 0.7483695652173913, "grad_norm": 1.142410274676524, "learning_rate": 1.4787111439384651e-05, "loss": 0.5196715593338013, "step": 2754 }, { "epoch": 0.748641304347826, "grad_norm": 1.0001283528787215, "learning_rate": 1.4783166116727603e-05, "loss": 0.45293182134628296, "step": 2755 }, { "epoch": 0.7489130434782608, "grad_norm": 1.0489561665667775, "learning_rate": 1.477921982843697e-05, "loss": 0.4880104660987854, "step": 2756 }, { "epoch": 0.7491847826086957, "grad_norm": 1.0676378171998484, "learning_rate": 1.4775272575309434e-05, "loss": 0.5044180154800415, "step": 2757 }, { "epoch": 0.7494565217391305, "grad_norm": 1.1295516120923736, "learning_rate": 1.4771324358141872e-05, "loss": 0.46536967158317566, "step": 2758 }, { "epoch": 0.7497282608695652, "grad_norm": 1.0132943126515375, "learning_rate": 1.4767375177731358e-05, "loss": 0.4894029498100281, "step": 2759 }, { "epoch": 0.75, "grad_norm": 1.0251220588407743, "learning_rate": 1.4763425034875159e-05, "loss": 0.4566153287887573, "step": 2760 }, { "epoch": 0.7502717391304348, "grad_norm": 1.0516579622572404, "learning_rate": 1.4759473930370738e-05, "loss": 0.4693199098110199, "step": 2761 }, { "epoch": 0.7505434782608695, "grad_norm": 1.2181485496274798, "learning_rate": 1.4755521865015747e-05, "loss": 0.5386351346969604, "step": 2762 }, { "epoch": 0.7508152173913043, "grad_norm": 1.127769205965991, "learning_rate": 1.4751568839608036e-05, "loss": 0.6407933235168457, "step": 2763 }, { "epoch": 0.7510869565217392, "grad_norm": 0.8538453075822464, "learning_rate": 1.4747614854945655e-05, "loss": 0.32606637477874756, "step": 2764 }, { "epoch": 0.751358695652174, "grad_norm": 1.0024747166208614, "learning_rate": 1.4743659911826833e-05, "loss": 0.521848201751709, "step": 2765 }, { "epoch": 0.7516304347826087, "grad_norm": 0.9460078680133331, "learning_rate": 1.4739704011050004e-05, "loss": 0.44114580750465393, "step": 2766 }, { "epoch": 0.7519021739130435, "grad_norm": 1.0539732236776476, "learning_rate": 1.4735747153413793e-05, "loss": 0.4752507209777832, "step": 2767 }, { "epoch": 0.7521739130434782, "grad_norm": 1.033979967086736, "learning_rate": 1.4731789339717014e-05, "loss": 0.5280629396438599, "step": 2768 }, { "epoch": 0.752445652173913, "grad_norm": 0.9631554938905953, "learning_rate": 1.472783057075868e-05, "loss": 0.4653479754924774, "step": 2769 }, { "epoch": 0.7527173913043478, "grad_norm": 1.1103781631356082, "learning_rate": 1.4723870847337989e-05, "loss": 0.5232816338539124, "step": 2770 }, { "epoch": 0.7529891304347827, "grad_norm": 1.0576448435986998, "learning_rate": 1.471991017025434e-05, "loss": 0.5383557081222534, "step": 2771 }, { "epoch": 0.7532608695652174, "grad_norm": 1.1044032604229892, "learning_rate": 1.4715948540307325e-05, "loss": 0.5191980600357056, "step": 2772 }, { "epoch": 0.7535326086956522, "grad_norm": 1.0233108428579685, "learning_rate": 1.4711985958296722e-05, "loss": 0.5239685773849487, "step": 2773 }, { "epoch": 0.753804347826087, "grad_norm": 1.0311356566977004, "learning_rate": 1.4708022425022499e-05, "loss": 0.4357362985610962, "step": 2774 }, { "epoch": 0.7540760869565217, "grad_norm": 1.1242897280294977, "learning_rate": 1.4704057941284823e-05, "loss": 0.552132785320282, "step": 2775 }, { "epoch": 0.7543478260869565, "grad_norm": 0.8955500907992876, "learning_rate": 1.4700092507884053e-05, "loss": 0.42787182331085205, "step": 2776 }, { "epoch": 0.7546195652173913, "grad_norm": 0.9973104969591576, "learning_rate": 1.4696126125620738e-05, "loss": 0.4960390329360962, "step": 2777 }, { "epoch": 0.7548913043478261, "grad_norm": 1.0398487612701288, "learning_rate": 1.4692158795295614e-05, "loss": 0.43912068009376526, "step": 2778 }, { "epoch": 0.7551630434782609, "grad_norm": 1.096428741927705, "learning_rate": 1.4688190517709616e-05, "loss": 0.5007702112197876, "step": 2779 }, { "epoch": 0.7554347826086957, "grad_norm": 1.1832729323120472, "learning_rate": 1.4684221293663863e-05, "loss": 0.5059356093406677, "step": 2780 }, { "epoch": 0.7557065217391304, "grad_norm": 1.0487920691682238, "learning_rate": 1.468025112395967e-05, "loss": 0.49972549080848694, "step": 2781 }, { "epoch": 0.7559782608695652, "grad_norm": 0.8743490612261929, "learning_rate": 1.4676280009398544e-05, "loss": 0.43569216132164, "step": 2782 }, { "epoch": 0.75625, "grad_norm": 0.962225341157365, "learning_rate": 1.4672307950782179e-05, "loss": 0.4479592442512512, "step": 2783 }, { "epoch": 0.7565217391304347, "grad_norm": 0.875511966072115, "learning_rate": 1.4668334948912455e-05, "loss": 0.4306511878967285, "step": 2784 }, { "epoch": 0.7567934782608695, "grad_norm": 1.0715375450349678, "learning_rate": 1.4664361004591459e-05, "loss": 0.47393834590911865, "step": 2785 }, { "epoch": 0.7570652173913044, "grad_norm": 0.8951312637267257, "learning_rate": 1.4660386118621448e-05, "loss": 0.39760732650756836, "step": 2786 }, { "epoch": 0.7573369565217392, "grad_norm": 0.8918106848913915, "learning_rate": 1.4656410291804883e-05, "loss": 0.4578860104084015, "step": 2787 }, { "epoch": 0.7576086956521739, "grad_norm": 1.0285975676262789, "learning_rate": 1.465243352494441e-05, "loss": 0.45583581924438477, "step": 2788 }, { "epoch": 0.7578804347826087, "grad_norm": 1.0107163761577442, "learning_rate": 1.4648455818842866e-05, "loss": 0.4480496346950531, "step": 2789 }, { "epoch": 0.7581521739130435, "grad_norm": 1.098062241882745, "learning_rate": 1.4644477174303277e-05, "loss": 0.5205421447753906, "step": 2790 }, { "epoch": 0.7584239130434782, "grad_norm": 0.9571381957608245, "learning_rate": 1.4640497592128858e-05, "loss": 0.4567432403564453, "step": 2791 }, { "epoch": 0.758695652173913, "grad_norm": 1.0255231688540223, "learning_rate": 1.4636517073123014e-05, "loss": 0.5804033279418945, "step": 2792 }, { "epoch": 0.7589673913043479, "grad_norm": 1.027666541958503, "learning_rate": 1.4632535618089334e-05, "loss": 0.4607222080230713, "step": 2793 }, { "epoch": 0.7592391304347826, "grad_norm": 0.9435596158260272, "learning_rate": 1.4628553227831607e-05, "loss": 0.4345993995666504, "step": 2794 }, { "epoch": 0.7595108695652174, "grad_norm": 0.9934727503234074, "learning_rate": 1.4624569903153805e-05, "loss": 0.4252578318119049, "step": 2795 }, { "epoch": 0.7597826086956522, "grad_norm": 1.0783111702812003, "learning_rate": 1.4620585644860085e-05, "loss": 0.43088921904563904, "step": 2796 }, { "epoch": 0.7600543478260869, "grad_norm": 0.9992775760271608, "learning_rate": 1.4616600453754796e-05, "loss": 0.4043908715248108, "step": 2797 }, { "epoch": 0.7603260869565217, "grad_norm": 0.9120692059424254, "learning_rate": 1.4612614330642477e-05, "loss": 0.4552769064903259, "step": 2798 }, { "epoch": 0.7605978260869565, "grad_norm": 1.0646313960455969, "learning_rate": 1.4608627276327851e-05, "loss": 0.498555451631546, "step": 2799 }, { "epoch": 0.7608695652173914, "grad_norm": 1.1331494767020263, "learning_rate": 1.4604639291615835e-05, "loss": 0.5254843831062317, "step": 2800 }, { "epoch": 0.7611413043478261, "grad_norm": 1.0300206448789542, "learning_rate": 1.4600650377311523e-05, "loss": 0.47576063871383667, "step": 2801 }, { "epoch": 0.7614130434782609, "grad_norm": 0.9339317353381104, "learning_rate": 1.4596660534220208e-05, "loss": 0.4004290699958801, "step": 2802 }, { "epoch": 0.7616847826086957, "grad_norm": 0.9319728025658353, "learning_rate": 1.4592669763147368e-05, "loss": 0.46894025802612305, "step": 2803 }, { "epoch": 0.7619565217391304, "grad_norm": 0.9977840581064603, "learning_rate": 1.4588678064898664e-05, "loss": 0.46145308017730713, "step": 2804 }, { "epoch": 0.7622282608695652, "grad_norm": 1.0867097614965058, "learning_rate": 1.458468544027995e-05, "loss": 0.5919659733772278, "step": 2805 }, { "epoch": 0.7625, "grad_norm": 0.8238542179966517, "learning_rate": 1.4580691890097259e-05, "loss": 0.43092823028564453, "step": 2806 }, { "epoch": 0.7627717391304348, "grad_norm": 0.9216754703773563, "learning_rate": 1.4576697415156818e-05, "loss": 0.4043980836868286, "step": 2807 }, { "epoch": 0.7630434782608696, "grad_norm": 0.9667740643434403, "learning_rate": 1.4572702016265039e-05, "loss": 0.4138484001159668, "step": 2808 }, { "epoch": 0.7633152173913044, "grad_norm": 0.9974670710693017, "learning_rate": 1.4568705694228517e-05, "loss": 0.49341142177581787, "step": 2809 }, { "epoch": 0.7635869565217391, "grad_norm": 1.1166499955802955, "learning_rate": 1.456470844985404e-05, "loss": 0.4461100697517395, "step": 2810 }, { "epoch": 0.7638586956521739, "grad_norm": 1.0501581745294661, "learning_rate": 1.4560710283948573e-05, "loss": 0.5298428535461426, "step": 2811 }, { "epoch": 0.7641304347826087, "grad_norm": 1.0708929069433433, "learning_rate": 1.4556711197319277e-05, "loss": 0.4336012601852417, "step": 2812 }, { "epoch": 0.7644021739130434, "grad_norm": 1.1159012569062916, "learning_rate": 1.4552711190773496e-05, "loss": 0.4188690185546875, "step": 2813 }, { "epoch": 0.7646739130434783, "grad_norm": 0.9726591397482683, "learning_rate": 1.4548710265118754e-05, "loss": 0.447509765625, "step": 2814 }, { "epoch": 0.7649456521739131, "grad_norm": 1.0263667868207462, "learning_rate": 1.4544708421162766e-05, "loss": 0.4668140411376953, "step": 2815 }, { "epoch": 0.7652173913043478, "grad_norm": 1.1566140992321319, "learning_rate": 1.454070565971343e-05, "loss": 0.609898030757904, "step": 2816 }, { "epoch": 0.7654891304347826, "grad_norm": 0.996036572163243, "learning_rate": 1.453670198157883e-05, "loss": 0.42538779973983765, "step": 2817 }, { "epoch": 0.7657608695652174, "grad_norm": 1.1640866320621213, "learning_rate": 1.453269738756724e-05, "loss": 0.5472406148910522, "step": 2818 }, { "epoch": 0.7660326086956522, "grad_norm": 1.015953284104636, "learning_rate": 1.4528691878487107e-05, "loss": 0.511754035949707, "step": 2819 }, { "epoch": 0.7663043478260869, "grad_norm": 1.0779027152349603, "learning_rate": 1.4524685455147071e-05, "loss": 0.494344025850296, "step": 2820 }, { "epoch": 0.7665760869565217, "grad_norm": 0.9680360740509087, "learning_rate": 1.4520678118355962e-05, "loss": 0.4460715651512146, "step": 2821 }, { "epoch": 0.7668478260869566, "grad_norm": 0.9296441167453525, "learning_rate": 1.4516669868922782e-05, "loss": 0.42234301567077637, "step": 2822 }, { "epoch": 0.7671195652173913, "grad_norm": 1.0558436465234449, "learning_rate": 1.4512660707656725e-05, "loss": 0.48034465312957764, "step": 2823 }, { "epoch": 0.7673913043478261, "grad_norm": 1.1813282547374966, "learning_rate": 1.4508650635367168e-05, "loss": 0.6110357642173767, "step": 2824 }, { "epoch": 0.7676630434782609, "grad_norm": 0.9769471432565434, "learning_rate": 1.4504639652863666e-05, "loss": 0.46023303270339966, "step": 2825 }, { "epoch": 0.7679347826086956, "grad_norm": 1.0080631550110213, "learning_rate": 1.450062776095597e-05, "loss": 0.4945431351661682, "step": 2826 }, { "epoch": 0.7682065217391304, "grad_norm": 1.0677615244161343, "learning_rate": 1.4496614960454002e-05, "loss": 0.5451408624649048, "step": 2827 }, { "epoch": 0.7684782608695652, "grad_norm": 1.0029279495558734, "learning_rate": 1.4492601252167877e-05, "loss": 0.5054175853729248, "step": 2828 }, { "epoch": 0.76875, "grad_norm": 1.0960204545553207, "learning_rate": 1.4488586636907887e-05, "loss": 0.5166972279548645, "step": 2829 }, { "epoch": 0.7690217391304348, "grad_norm": 1.0190033269396803, "learning_rate": 1.4484571115484508e-05, "loss": 0.47049379348754883, "step": 2830 }, { "epoch": 0.7692934782608696, "grad_norm": 0.9930414236684661, "learning_rate": 1.4480554688708404e-05, "loss": 0.4512861967086792, "step": 2831 }, { "epoch": 0.7695652173913043, "grad_norm": 1.0490471604964082, "learning_rate": 1.4476537357390413e-05, "loss": 0.435683012008667, "step": 2832 }, { "epoch": 0.7698369565217391, "grad_norm": 1.1368403537648504, "learning_rate": 1.4472519122341566e-05, "loss": 0.5173265933990479, "step": 2833 }, { "epoch": 0.7701086956521739, "grad_norm": 1.0137550484162503, "learning_rate": 1.4468499984373068e-05, "loss": 0.4224281311035156, "step": 2834 }, { "epoch": 0.7703804347826086, "grad_norm": 1.0543410335803454, "learning_rate": 1.4464479944296308e-05, "loss": 0.5153795480728149, "step": 2835 }, { "epoch": 0.7706521739130435, "grad_norm": 1.1657550004008572, "learning_rate": 1.4460459002922863e-05, "loss": 0.5880765318870544, "step": 2836 }, { "epoch": 0.7709239130434783, "grad_norm": 0.9630065709562136, "learning_rate": 1.4456437161064481e-05, "loss": 0.4343281388282776, "step": 2837 }, { "epoch": 0.7711956521739131, "grad_norm": 0.8581781548558269, "learning_rate": 1.4452414419533104e-05, "loss": 0.39002305269241333, "step": 2838 }, { "epoch": 0.7714673913043478, "grad_norm": 0.9420084303519003, "learning_rate": 1.4448390779140844e-05, "loss": 0.3971659541130066, "step": 2839 }, { "epoch": 0.7717391304347826, "grad_norm": 0.9969285546159978, "learning_rate": 1.4444366240700005e-05, "loss": 0.40967434644699097, "step": 2840 }, { "epoch": 0.7720108695652174, "grad_norm": 1.2198849731551373, "learning_rate": 1.4440340805023068e-05, "loss": 0.5762540698051453, "step": 2841 }, { "epoch": 0.7722826086956521, "grad_norm": 1.4858324903681794, "learning_rate": 1.4436314472922692e-05, "loss": 0.4284863770008087, "step": 2842 }, { "epoch": 0.772554347826087, "grad_norm": 1.0799412486370803, "learning_rate": 1.443228724521172e-05, "loss": 0.47387003898620605, "step": 2843 }, { "epoch": 0.7728260869565218, "grad_norm": 1.0626533354170458, "learning_rate": 1.4428259122703176e-05, "loss": 0.45786935091018677, "step": 2844 }, { "epoch": 0.7730978260869565, "grad_norm": 1.15590656746533, "learning_rate": 1.4424230106210265e-05, "loss": 0.5904238224029541, "step": 2845 }, { "epoch": 0.7733695652173913, "grad_norm": 0.9188260394482095, "learning_rate": 1.442020019654637e-05, "loss": 0.419144868850708, "step": 2846 }, { "epoch": 0.7736413043478261, "grad_norm": 1.127762665381821, "learning_rate": 1.4416169394525056e-05, "loss": 0.471876859664917, "step": 2847 }, { "epoch": 0.7739130434782608, "grad_norm": 0.9005517060956295, "learning_rate": 1.4412137700960069e-05, "loss": 0.35322099924087524, "step": 2848 }, { "epoch": 0.7741847826086956, "grad_norm": 1.0987540457675902, "learning_rate": 1.4408105116665336e-05, "loss": 0.5754649043083191, "step": 2849 }, { "epoch": 0.7744565217391305, "grad_norm": 0.7680720245819472, "learning_rate": 1.4404071642454957e-05, "loss": 0.293428510427475, "step": 2850 }, { "epoch": 0.7747282608695653, "grad_norm": 1.0658267527127816, "learning_rate": 1.440003727914322e-05, "loss": 0.4265434145927429, "step": 2851 }, { "epoch": 0.775, "grad_norm": 1.1146237673549428, "learning_rate": 1.439600202754459e-05, "loss": 0.4975133538246155, "step": 2852 }, { "epoch": 0.7752717391304348, "grad_norm": 1.0071266540178703, "learning_rate": 1.4391965888473705e-05, "loss": 0.45562368631362915, "step": 2853 }, { "epoch": 0.7755434782608696, "grad_norm": 1.0705482955572057, "learning_rate": 1.4387928862745393e-05, "loss": 0.47762829065322876, "step": 2854 }, { "epoch": 0.7758152173913043, "grad_norm": 0.9399925786724773, "learning_rate": 1.4383890951174653e-05, "loss": 0.511483371257782, "step": 2855 }, { "epoch": 0.7760869565217391, "grad_norm": 1.1123472687632325, "learning_rate": 1.4379852154576669e-05, "loss": 0.4222223162651062, "step": 2856 }, { "epoch": 0.7763586956521739, "grad_norm": 1.0476044443818289, "learning_rate": 1.437581247376679e-05, "loss": 0.436845600605011, "step": 2857 }, { "epoch": 0.7766304347826087, "grad_norm": 1.0320152184190903, "learning_rate": 1.4371771909560566e-05, "loss": 0.5092877745628357, "step": 2858 }, { "epoch": 0.7769021739130435, "grad_norm": 0.9954612081019104, "learning_rate": 1.4367730462773708e-05, "loss": 0.4662293791770935, "step": 2859 }, { "epoch": 0.7771739130434783, "grad_norm": 1.142546405396446, "learning_rate": 1.4363688134222106e-05, "loss": 0.496809720993042, "step": 2860 }, { "epoch": 0.777445652173913, "grad_norm": 1.0300682674177952, "learning_rate": 1.4359644924721836e-05, "loss": 0.47509127855300903, "step": 2861 }, { "epoch": 0.7777173913043478, "grad_norm": 1.037605450870975, "learning_rate": 1.435560083508915e-05, "loss": 0.5213876962661743, "step": 2862 }, { "epoch": 0.7779891304347826, "grad_norm": 1.003138576186588, "learning_rate": 1.4351555866140467e-05, "loss": 0.40928566455841064, "step": 2863 }, { "epoch": 0.7782608695652173, "grad_norm": 1.1886087011374153, "learning_rate": 1.4347510018692406e-05, "loss": 0.49855977296829224, "step": 2864 }, { "epoch": 0.7785326086956522, "grad_norm": 0.9841102737353364, "learning_rate": 1.4343463293561734e-05, "loss": 0.4323834776878357, "step": 2865 }, { "epoch": 0.778804347826087, "grad_norm": 1.117934473091412, "learning_rate": 1.4339415691565421e-05, "loss": 0.48836424946784973, "step": 2866 }, { "epoch": 0.7790760869565218, "grad_norm": 1.2516361990165543, "learning_rate": 1.4335367213520605e-05, "loss": 0.5081477761268616, "step": 2867 }, { "epoch": 0.7793478260869565, "grad_norm": 1.0472258013743083, "learning_rate": 1.4331317860244592e-05, "loss": 0.4879705309867859, "step": 2868 }, { "epoch": 0.7796195652173913, "grad_norm": 1.3119411052675434, "learning_rate": 1.4327267632554873e-05, "loss": 0.5265461802482605, "step": 2869 }, { "epoch": 0.779891304347826, "grad_norm": 1.1155795324843985, "learning_rate": 1.4323216531269121e-05, "loss": 0.4962879419326782, "step": 2870 }, { "epoch": 0.7801630434782608, "grad_norm": 0.9589449351961369, "learning_rate": 1.4319164557205173e-05, "loss": 0.46154987812042236, "step": 2871 }, { "epoch": 0.7804347826086957, "grad_norm": 1.0190601272443596, "learning_rate": 1.4315111711181053e-05, "loss": 0.5154733657836914, "step": 2872 }, { "epoch": 0.7807065217391305, "grad_norm": 1.0451804725530067, "learning_rate": 1.4311057994014953e-05, "loss": 0.5196101665496826, "step": 2873 }, { "epoch": 0.7809782608695652, "grad_norm": 0.9353148686322228, "learning_rate": 1.4307003406525244e-05, "loss": 0.44571948051452637, "step": 2874 }, { "epoch": 0.78125, "grad_norm": 0.9585481522868473, "learning_rate": 1.4302947949530475e-05, "loss": 0.46671250462532043, "step": 2875 }, { "epoch": 0.7815217391304348, "grad_norm": 0.9446487627247168, "learning_rate": 1.429889162384937e-05, "loss": 0.42477482557296753, "step": 2876 }, { "epoch": 0.7817934782608695, "grad_norm": 0.9649829703689272, "learning_rate": 1.4294834430300822e-05, "loss": 0.4898419678211212, "step": 2877 }, { "epoch": 0.7820652173913043, "grad_norm": 0.9572716632142706, "learning_rate": 1.4290776369703908e-05, "loss": 0.4193437397480011, "step": 2878 }, { "epoch": 0.7823369565217392, "grad_norm": 1.1183772936289662, "learning_rate": 1.4286717442877871e-05, "loss": 0.5055937767028809, "step": 2879 }, { "epoch": 0.782608695652174, "grad_norm": 0.9495756215387764, "learning_rate": 1.428265765064214e-05, "loss": 0.45680397748947144, "step": 2880 }, { "epoch": 0.7828804347826087, "grad_norm": 1.1140208058041214, "learning_rate": 1.427859699381631e-05, "loss": 0.48977065086364746, "step": 2881 }, { "epoch": 0.7831521739130435, "grad_norm": 1.0682176342832423, "learning_rate": 1.4274535473220153e-05, "loss": 0.43261468410491943, "step": 2882 }, { "epoch": 0.7834239130434782, "grad_norm": 1.0864057666772793, "learning_rate": 1.4270473089673616e-05, "loss": 0.5071091651916504, "step": 2883 }, { "epoch": 0.783695652173913, "grad_norm": 1.0248019714085495, "learning_rate": 1.426640984399682e-05, "loss": 0.4497007131576538, "step": 2884 }, { "epoch": 0.7839673913043478, "grad_norm": 1.0680447255864476, "learning_rate": 1.4262345737010061e-05, "loss": 0.476358562707901, "step": 2885 }, { "epoch": 0.7842391304347827, "grad_norm": 1.2084110208774683, "learning_rate": 1.4258280769533804e-05, "loss": 0.526432991027832, "step": 2886 }, { "epoch": 0.7845108695652174, "grad_norm": 1.1439851162589862, "learning_rate": 1.4254214942388696e-05, "loss": 0.5640232563018799, "step": 2887 }, { "epoch": 0.7847826086956522, "grad_norm": 1.0936423211835524, "learning_rate": 1.4250148256395549e-05, "loss": 0.5336848497390747, "step": 2888 }, { "epoch": 0.785054347826087, "grad_norm": 1.1261556394888927, "learning_rate": 1.4246080712375352e-05, "loss": 0.506365954875946, "step": 2889 }, { "epoch": 0.7853260869565217, "grad_norm": 1.1318139147377348, "learning_rate": 1.4242012311149274e-05, "loss": 0.5353065729141235, "step": 2890 }, { "epoch": 0.7855978260869565, "grad_norm": 0.8971519435247227, "learning_rate": 1.4237943053538643e-05, "loss": 0.3783726692199707, "step": 2891 }, { "epoch": 0.7858695652173913, "grad_norm": 1.0522303865554992, "learning_rate": 1.4233872940364975e-05, "loss": 0.511707603931427, "step": 2892 }, { "epoch": 0.7861413043478261, "grad_norm": 0.8792826844050371, "learning_rate": 1.4229801972449946e-05, "loss": 0.4257848262786865, "step": 2893 }, { "epoch": 0.7864130434782609, "grad_norm": 0.8521605210045351, "learning_rate": 1.4225730150615406e-05, "loss": 0.35270220041275024, "step": 2894 }, { "epoch": 0.7866847826086957, "grad_norm": 1.2401609565884022, "learning_rate": 1.4221657475683392e-05, "loss": 0.5875886082649231, "step": 2895 }, { "epoch": 0.7869565217391304, "grad_norm": 1.0642703283076764, "learning_rate": 1.4217583948476094e-05, "loss": 0.48528558015823364, "step": 2896 }, { "epoch": 0.7872282608695652, "grad_norm": 0.8981972113879966, "learning_rate": 1.4213509569815884e-05, "loss": 0.4196832776069641, "step": 2897 }, { "epoch": 0.7875, "grad_norm": 0.9249080409031826, "learning_rate": 1.4209434340525308e-05, "loss": 0.478250652551651, "step": 2898 }, { "epoch": 0.7877717391304347, "grad_norm": 0.915290562727626, "learning_rate": 1.4205358261427076e-05, "loss": 0.4023358225822449, "step": 2899 }, { "epoch": 0.7880434782608695, "grad_norm": 0.9715845851606123, "learning_rate": 1.4201281333344077e-05, "loss": 0.4566168785095215, "step": 2900 }, { "epoch": 0.7883152173913044, "grad_norm": 1.0666068267333282, "learning_rate": 1.4197203557099367e-05, "loss": 0.45080381631851196, "step": 2901 }, { "epoch": 0.7885869565217392, "grad_norm": 1.1437910330811445, "learning_rate": 1.4193124933516172e-05, "loss": 0.5440713167190552, "step": 2902 }, { "epoch": 0.7888586956521739, "grad_norm": 1.1624559007492368, "learning_rate": 1.4189045463417892e-05, "loss": 0.6073203086853027, "step": 2903 }, { "epoch": 0.7891304347826087, "grad_norm": 1.0400106785539023, "learning_rate": 1.41849651476281e-05, "loss": 0.535861611366272, "step": 2904 }, { "epoch": 0.7894021739130435, "grad_norm": 1.0876162874635933, "learning_rate": 1.4180883986970536e-05, "loss": 0.4519503116607666, "step": 2905 }, { "epoch": 0.7896739130434782, "grad_norm": 1.0682968289191836, "learning_rate": 1.4176801982269108e-05, "loss": 0.49618956446647644, "step": 2906 }, { "epoch": 0.789945652173913, "grad_norm": 1.062268209377481, "learning_rate": 1.41727191343479e-05, "loss": 0.46086883544921875, "step": 2907 }, { "epoch": 0.7902173913043479, "grad_norm": 1.103432135427638, "learning_rate": 1.4168635444031169e-05, "loss": 0.4994698166847229, "step": 2908 }, { "epoch": 0.7904891304347826, "grad_norm": 0.9430941034093323, "learning_rate": 1.4164550912143331e-05, "loss": 0.4238623380661011, "step": 2909 }, { "epoch": 0.7907608695652174, "grad_norm": 0.97986834798568, "learning_rate": 1.4160465539508981e-05, "loss": 0.458504855632782, "step": 2910 }, { "epoch": 0.7910326086956522, "grad_norm": 1.147661339019198, "learning_rate": 1.4156379326952881e-05, "loss": 0.5408713221549988, "step": 2911 }, { "epoch": 0.7913043478260869, "grad_norm": 1.0800231299577623, "learning_rate": 1.415229227529996e-05, "loss": 0.5840598344802856, "step": 2912 }, { "epoch": 0.7915760869565217, "grad_norm": 1.1869106547303285, "learning_rate": 1.414820438537532e-05, "loss": 0.5296626091003418, "step": 2913 }, { "epoch": 0.7918478260869565, "grad_norm": 0.9395828129766701, "learning_rate": 1.4144115658004233e-05, "loss": 0.3959054946899414, "step": 2914 }, { "epoch": 0.7921195652173914, "grad_norm": 0.8747773258156657, "learning_rate": 1.4140026094012136e-05, "loss": 0.33894550800323486, "step": 2915 }, { "epoch": 0.7923913043478261, "grad_norm": 1.273993340447514, "learning_rate": 1.4135935694224638e-05, "loss": 0.5888677835464478, "step": 2916 }, { "epoch": 0.7926630434782609, "grad_norm": 0.895268931196184, "learning_rate": 1.4131844459467514e-05, "loss": 0.4499571919441223, "step": 2917 }, { "epoch": 0.7929347826086957, "grad_norm": 0.9785155735208823, "learning_rate": 1.4127752390566713e-05, "loss": 0.40842676162719727, "step": 2918 }, { "epoch": 0.7932065217391304, "grad_norm": 0.9587701684059672, "learning_rate": 1.4123659488348346e-05, "loss": 0.40507572889328003, "step": 2919 }, { "epoch": 0.7934782608695652, "grad_norm": 1.048517045360442, "learning_rate": 1.4119565753638695e-05, "loss": 0.5305424332618713, "step": 2920 }, { "epoch": 0.79375, "grad_norm": 1.0766806504159807, "learning_rate": 1.4115471187264212e-05, "loss": 0.49358516931533813, "step": 2921 }, { "epoch": 0.7940217391304348, "grad_norm": 0.8305833238447099, "learning_rate": 1.4111375790051511e-05, "loss": 0.3183687925338745, "step": 2922 }, { "epoch": 0.7942934782608696, "grad_norm": 1.0183872238641969, "learning_rate": 1.410727956282738e-05, "loss": 0.4140773415565491, "step": 2923 }, { "epoch": 0.7945652173913044, "grad_norm": 1.1586671891569287, "learning_rate": 1.4103182506418773e-05, "loss": 0.4931670129299164, "step": 2924 }, { "epoch": 0.7948369565217391, "grad_norm": 0.9758892381255133, "learning_rate": 1.409908462165281e-05, "loss": 0.39778900146484375, "step": 2925 }, { "epoch": 0.7951086956521739, "grad_norm": 0.9641091481088303, "learning_rate": 1.409498590935678e-05, "loss": 0.47480595111846924, "step": 2926 }, { "epoch": 0.7953804347826087, "grad_norm": 1.2163653924715567, "learning_rate": 1.4090886370358137e-05, "loss": 0.4791763424873352, "step": 2927 }, { "epoch": 0.7956521739130434, "grad_norm": 1.0958202046538077, "learning_rate": 1.4086786005484498e-05, "loss": 0.4834006428718567, "step": 2928 }, { "epoch": 0.7959239130434783, "grad_norm": 1.0934928092507021, "learning_rate": 1.408268481556366e-05, "loss": 0.5005463361740112, "step": 2929 }, { "epoch": 0.7961956521739131, "grad_norm": 0.9871341511999701, "learning_rate": 1.407858280142357e-05, "loss": 0.4226594865322113, "step": 2930 }, { "epoch": 0.7964673913043478, "grad_norm": 1.01714167949649, "learning_rate": 1.4074479963892358e-05, "loss": 0.43151962757110596, "step": 2931 }, { "epoch": 0.7967391304347826, "grad_norm": 1.1173354100006132, "learning_rate": 1.4070376303798305e-05, "loss": 0.4864168167114258, "step": 2932 }, { "epoch": 0.7970108695652174, "grad_norm": 0.9456858409580305, "learning_rate": 1.4066271821969866e-05, "loss": 0.4259607195854187, "step": 2933 }, { "epoch": 0.7972826086956522, "grad_norm": 0.8741259130813196, "learning_rate": 1.4062166519235665e-05, "loss": 0.42021262645721436, "step": 2934 }, { "epoch": 0.7975543478260869, "grad_norm": 1.2930545278548304, "learning_rate": 1.405806039642448e-05, "loss": 0.6321585178375244, "step": 2935 }, { "epoch": 0.7978260869565217, "grad_norm": 1.1202627541701426, "learning_rate": 1.405395345436527e-05, "loss": 0.5040304660797119, "step": 2936 }, { "epoch": 0.7980978260869566, "grad_norm": 1.0658988052879332, "learning_rate": 1.4049845693887146e-05, "loss": 0.483578622341156, "step": 2937 }, { "epoch": 0.7983695652173913, "grad_norm": 1.1978858118144644, "learning_rate": 1.4045737115819387e-05, "loss": 0.4701111614704132, "step": 2938 }, { "epoch": 0.7986413043478261, "grad_norm": 0.9406884625071267, "learning_rate": 1.4041627720991448e-05, "loss": 0.4302641749382019, "step": 2939 }, { "epoch": 0.7989130434782609, "grad_norm": 1.0262917987918245, "learning_rate": 1.4037517510232934e-05, "loss": 0.5490854978561401, "step": 2940 }, { "epoch": 0.7991847826086956, "grad_norm": 0.8892816757629565, "learning_rate": 1.4033406484373624e-05, "loss": 0.3800462484359741, "step": 2941 }, { "epoch": 0.7994565217391304, "grad_norm": 1.158304448007674, "learning_rate": 1.4029294644243456e-05, "loss": 0.48903894424438477, "step": 2942 }, { "epoch": 0.7997282608695652, "grad_norm": 1.092349060463349, "learning_rate": 1.4025181990672541e-05, "loss": 0.46101945638656616, "step": 2943 }, { "epoch": 0.8, "grad_norm": 1.0211123150751193, "learning_rate": 1.4021068524491144e-05, "loss": 0.43781065940856934, "step": 2944 }, { "epoch": 0.8002717391304348, "grad_norm": 0.9338697601871111, "learning_rate": 1.4016954246529697e-05, "loss": 0.4101276397705078, "step": 2945 }, { "epoch": 0.8005434782608696, "grad_norm": 1.1492602167543486, "learning_rate": 1.4012839157618799e-05, "loss": 0.5408405661582947, "step": 2946 }, { "epoch": 0.8008152173913043, "grad_norm": 1.019743072737483, "learning_rate": 1.4008723258589214e-05, "loss": 0.5274088382720947, "step": 2947 }, { "epoch": 0.8010869565217391, "grad_norm": 1.4782885295686783, "learning_rate": 1.400460655027186e-05, "loss": 0.4600054621696472, "step": 2948 }, { "epoch": 0.8013586956521739, "grad_norm": 1.0084520907573449, "learning_rate": 1.4000489033497833e-05, "loss": 0.46946799755096436, "step": 2949 }, { "epoch": 0.8016304347826086, "grad_norm": 0.9578659900240241, "learning_rate": 1.3996370709098377e-05, "loss": 0.43452346324920654, "step": 2950 }, { "epoch": 0.8019021739130435, "grad_norm": 1.0096919991468873, "learning_rate": 1.3992251577904912e-05, "loss": 0.47876542806625366, "step": 2951 }, { "epoch": 0.8021739130434783, "grad_norm": 1.0073122006569142, "learning_rate": 1.398813164074901e-05, "loss": 0.43985551595687866, "step": 2952 }, { "epoch": 0.8024456521739131, "grad_norm": 1.1209504114879825, "learning_rate": 1.3984010898462417e-05, "loss": 0.5928995609283447, "step": 2953 }, { "epoch": 0.8027173913043478, "grad_norm": 1.0133410694849305, "learning_rate": 1.397988935187703e-05, "loss": 0.4162819981575012, "step": 2954 }, { "epoch": 0.8029891304347826, "grad_norm": 1.0334256926491368, "learning_rate": 1.3975767001824913e-05, "loss": 0.4600348472595215, "step": 2955 }, { "epoch": 0.8032608695652174, "grad_norm": 1.2828531388358293, "learning_rate": 1.3971643849138296e-05, "loss": 0.5730762481689453, "step": 2956 }, { "epoch": 0.8035326086956521, "grad_norm": 0.9024134251798447, "learning_rate": 1.396751989464957e-05, "loss": 0.419735312461853, "step": 2957 }, { "epoch": 0.803804347826087, "grad_norm": 1.104847072699048, "learning_rate": 1.396339513919128e-05, "loss": 0.45344558358192444, "step": 2958 }, { "epoch": 0.8040760869565218, "grad_norm": 1.0616588490837071, "learning_rate": 1.3959269583596145e-05, "loss": 0.5289534330368042, "step": 2959 }, { "epoch": 0.8043478260869565, "grad_norm": 0.8956140672229935, "learning_rate": 1.3955143228697033e-05, "loss": 0.40570688247680664, "step": 2960 }, { "epoch": 0.8046195652173913, "grad_norm": 0.9586618808561352, "learning_rate": 1.395101607532698e-05, "loss": 0.425751268863678, "step": 2961 }, { "epoch": 0.8048913043478261, "grad_norm": 1.1709739357368263, "learning_rate": 1.394688812431919e-05, "loss": 0.5406315326690674, "step": 2962 }, { "epoch": 0.8051630434782608, "grad_norm": 0.9928267824622868, "learning_rate": 1.3942759376507013e-05, "loss": 0.47053903341293335, "step": 2963 }, { "epoch": 0.8054347826086956, "grad_norm": 1.0942503093618887, "learning_rate": 1.3938629832723969e-05, "loss": 0.4472465217113495, "step": 2964 }, { "epoch": 0.8057065217391305, "grad_norm": 1.231160647608704, "learning_rate": 1.3934499493803741e-05, "loss": 0.596405565738678, "step": 2965 }, { "epoch": 0.8059782608695653, "grad_norm": 1.4441038597748106, "learning_rate": 1.3930368360580164e-05, "loss": 0.4778022766113281, "step": 2966 }, { "epoch": 0.80625, "grad_norm": 1.0404836399435893, "learning_rate": 1.392623643388724e-05, "loss": 0.48499470949172974, "step": 2967 }, { "epoch": 0.8065217391304348, "grad_norm": 1.04376661211828, "learning_rate": 1.392210371455913e-05, "loss": 0.4591231644153595, "step": 2968 }, { "epoch": 0.8067934782608696, "grad_norm": 0.8516134695725089, "learning_rate": 1.3917970203430154e-05, "loss": 0.3401121497154236, "step": 2969 }, { "epoch": 0.8070652173913043, "grad_norm": 1.0407299292251582, "learning_rate": 1.391383590133479e-05, "loss": 0.5230125784873962, "step": 2970 }, { "epoch": 0.8073369565217391, "grad_norm": 0.9319547540769291, "learning_rate": 1.3909700809107682e-05, "loss": 0.4533214569091797, "step": 2971 }, { "epoch": 0.8076086956521739, "grad_norm": 0.9359187900079565, "learning_rate": 1.3905564927583625e-05, "loss": 0.4556306302547455, "step": 2972 }, { "epoch": 0.8078804347826087, "grad_norm": 1.1314371502517668, "learning_rate": 1.390142825759758e-05, "loss": 0.5419502258300781, "step": 2973 }, { "epoch": 0.8081521739130435, "grad_norm": 1.1069635574030219, "learning_rate": 1.3897290799984664e-05, "loss": 0.5297812223434448, "step": 2974 }, { "epoch": 0.8084239130434783, "grad_norm": 1.0080407470265962, "learning_rate": 1.3893152555580157e-05, "loss": 0.5164291262626648, "step": 2975 }, { "epoch": 0.808695652173913, "grad_norm": 0.9125797452881861, "learning_rate": 1.3889013525219488e-05, "loss": 0.45979052782058716, "step": 2976 }, { "epoch": 0.8089673913043478, "grad_norm": 1.2392470211376314, "learning_rate": 1.3884873709738259e-05, "loss": 0.5789604187011719, "step": 2977 }, { "epoch": 0.8092391304347826, "grad_norm": 0.8836067357454197, "learning_rate": 1.3880733109972215e-05, "loss": 0.4305219054222107, "step": 2978 }, { "epoch": 0.8095108695652173, "grad_norm": 0.9145724651305781, "learning_rate": 1.3876591726757276e-05, "loss": 0.3947349786758423, "step": 2979 }, { "epoch": 0.8097826086956522, "grad_norm": 0.9621289149301095, "learning_rate": 1.3872449560929506e-05, "loss": 0.37670519948005676, "step": 2980 }, { "epoch": 0.810054347826087, "grad_norm": 0.9275981296376588, "learning_rate": 1.3868306613325131e-05, "loss": 0.4583791494369507, "step": 2981 }, { "epoch": 0.8103260869565218, "grad_norm": 1.0448627822818624, "learning_rate": 1.3864162884780538e-05, "loss": 0.48752933740615845, "step": 2982 }, { "epoch": 0.8105978260869565, "grad_norm": 1.0415748053855836, "learning_rate": 1.3860018376132275e-05, "loss": 0.5482602119445801, "step": 2983 }, { "epoch": 0.8108695652173913, "grad_norm": 1.0638897755348444, "learning_rate": 1.3855873088217033e-05, "loss": 0.46178704500198364, "step": 2984 }, { "epoch": 0.811141304347826, "grad_norm": 1.007907264063714, "learning_rate": 1.3851727021871679e-05, "loss": 0.45091119408607483, "step": 2985 }, { "epoch": 0.8114130434782608, "grad_norm": 0.8496720868621609, "learning_rate": 1.384758017793322e-05, "loss": 0.3913378119468689, "step": 2986 }, { "epoch": 0.8116847826086957, "grad_norm": 0.978092557539366, "learning_rate": 1.3843432557238832e-05, "loss": 0.4337768256664276, "step": 2987 }, { "epoch": 0.8119565217391305, "grad_norm": 1.1025282146559294, "learning_rate": 1.3839284160625844e-05, "loss": 0.45763546228408813, "step": 2988 }, { "epoch": 0.8122282608695652, "grad_norm": 0.977469769189905, "learning_rate": 1.3835134988931736e-05, "loss": 0.3660081624984741, "step": 2989 }, { "epoch": 0.8125, "grad_norm": 1.0186971657287285, "learning_rate": 1.3830985042994157e-05, "loss": 0.4585718512535095, "step": 2990 }, { "epoch": 0.8127717391304348, "grad_norm": 1.1131013980118825, "learning_rate": 1.3826834323650899e-05, "loss": 0.5613927841186523, "step": 2991 }, { "epoch": 0.8130434782608695, "grad_norm": 1.0154777196194085, "learning_rate": 1.382268283173992e-05, "loss": 0.5023368000984192, "step": 2992 }, { "epoch": 0.8133152173913043, "grad_norm": 0.9731077849652865, "learning_rate": 1.3818530568099328e-05, "loss": 0.48540371656417847, "step": 2993 }, { "epoch": 0.8135869565217392, "grad_norm": 1.1907326686724338, "learning_rate": 1.3814377533567392e-05, "loss": 0.5229232907295227, "step": 2994 }, { "epoch": 0.813858695652174, "grad_norm": 1.5743686262645207, "learning_rate": 1.381022372898253e-05, "loss": 0.4819262623786926, "step": 2995 }, { "epoch": 0.8141304347826087, "grad_norm": 0.982831255808471, "learning_rate": 1.380606915518332e-05, "loss": 0.4479055404663086, "step": 2996 }, { "epoch": 0.8144021739130435, "grad_norm": 1.1427236079102594, "learning_rate": 1.3801913813008496e-05, "loss": 0.49258357286453247, "step": 2997 }, { "epoch": 0.8146739130434782, "grad_norm": 0.9719060565902452, "learning_rate": 1.3797757703296946e-05, "loss": 0.396697074174881, "step": 2998 }, { "epoch": 0.814945652173913, "grad_norm": 1.3993463760257419, "learning_rate": 1.3793600826887708e-05, "loss": 0.5080807209014893, "step": 2999 }, { "epoch": 0.8152173913043478, "grad_norm": 0.9827976781582423, "learning_rate": 1.3789443184619984e-05, "loss": 0.4275343716144562, "step": 3000 }, { "epoch": 0.8154891304347827, "grad_norm": 1.2161381155761406, "learning_rate": 1.378528477733312e-05, "loss": 0.6129547953605652, "step": 3001 }, { "epoch": 0.8157608695652174, "grad_norm": 1.1825916960270446, "learning_rate": 1.3781125605866625e-05, "loss": 0.534515380859375, "step": 3002 }, { "epoch": 0.8160326086956522, "grad_norm": 1.205444614548856, "learning_rate": 1.3776965671060165e-05, "loss": 0.5268707275390625, "step": 3003 }, { "epoch": 0.816304347826087, "grad_norm": 0.9586027041736508, "learning_rate": 1.3772804973753549e-05, "loss": 0.454912006855011, "step": 3004 }, { "epoch": 0.8165760869565217, "grad_norm": 0.897409110937099, "learning_rate": 1.3768643514786744e-05, "loss": 0.4839075207710266, "step": 3005 }, { "epoch": 0.8168478260869565, "grad_norm": 1.1358818824793597, "learning_rate": 1.3764481294999876e-05, "loss": 0.5171059966087341, "step": 3006 }, { "epoch": 0.8171195652173913, "grad_norm": 1.0259982302108694, "learning_rate": 1.3760318315233218e-05, "loss": 0.4741596579551697, "step": 3007 }, { "epoch": 0.8173913043478261, "grad_norm": 0.994797375128336, "learning_rate": 1.3756154576327206e-05, "loss": 0.43493932485580444, "step": 3008 }, { "epoch": 0.8176630434782609, "grad_norm": 1.1592563270330645, "learning_rate": 1.3751990079122412e-05, "loss": 0.5183864831924438, "step": 3009 }, { "epoch": 0.8179347826086957, "grad_norm": 1.1137565953103274, "learning_rate": 1.3747824824459577e-05, "loss": 0.5357530117034912, "step": 3010 }, { "epoch": 0.8182065217391304, "grad_norm": 0.9790394869499347, "learning_rate": 1.3743658813179594e-05, "loss": 0.4793965816497803, "step": 3011 }, { "epoch": 0.8184782608695652, "grad_norm": 1.119962510854209, "learning_rate": 1.3739492046123498e-05, "loss": 0.5499706864356995, "step": 3012 }, { "epoch": 0.81875, "grad_norm": 0.8996578459774561, "learning_rate": 1.3735324524132484e-05, "loss": 0.39139628410339355, "step": 3013 }, { "epoch": 0.8190217391304347, "grad_norm": 0.9844213594217267, "learning_rate": 1.3731156248047903e-05, "loss": 0.4034543037414551, "step": 3014 }, { "epoch": 0.8192934782608695, "grad_norm": 1.0492903560345503, "learning_rate": 1.3726987218711248e-05, "loss": 0.4936123490333557, "step": 3015 }, { "epoch": 0.8195652173913044, "grad_norm": 1.0622727017903166, "learning_rate": 1.372281743696417e-05, "loss": 0.49647364020347595, "step": 3016 }, { "epoch": 0.8198369565217392, "grad_norm": 0.9543142456096186, "learning_rate": 1.3718646903648476e-05, "loss": 0.3875243663787842, "step": 3017 }, { "epoch": 0.8201086956521739, "grad_norm": 1.1336784294590836, "learning_rate": 1.3714475619606116e-05, "loss": 0.4788663983345032, "step": 3018 }, { "epoch": 0.8203804347826087, "grad_norm": 1.0796037403500751, "learning_rate": 1.3710303585679195e-05, "loss": 0.4925743341445923, "step": 3019 }, { "epoch": 0.8206521739130435, "grad_norm": 1.0069843533573626, "learning_rate": 1.3706130802709974e-05, "loss": 0.4511178731918335, "step": 3020 }, { "epoch": 0.8209239130434782, "grad_norm": 1.2475328446381266, "learning_rate": 1.3701957271540863e-05, "loss": 0.5610195398330688, "step": 3021 }, { "epoch": 0.821195652173913, "grad_norm": 1.1550607797724717, "learning_rate": 1.3697782993014412e-05, "loss": 0.5380496978759766, "step": 3022 }, { "epoch": 0.8214673913043479, "grad_norm": 0.9981961013945244, "learning_rate": 1.369360796797334e-05, "loss": 0.40982070565223694, "step": 3023 }, { "epoch": 0.8217391304347826, "grad_norm": 1.0473306450686402, "learning_rate": 1.3689432197260508e-05, "loss": 0.4781489372253418, "step": 3024 }, { "epoch": 0.8220108695652174, "grad_norm": 1.0417445239840655, "learning_rate": 1.3685255681718922e-05, "loss": 0.4299201965332031, "step": 3025 }, { "epoch": 0.8222826086956522, "grad_norm": 1.155143026245772, "learning_rate": 1.368107842219175e-05, "loss": 0.530179500579834, "step": 3026 }, { "epoch": 0.8225543478260869, "grad_norm": 1.1045303560608948, "learning_rate": 1.36769004195223e-05, "loss": 0.4915224313735962, "step": 3027 }, { "epoch": 0.8228260869565217, "grad_norm": 1.0843013668471033, "learning_rate": 1.3672721674554033e-05, "loss": 0.4042373299598694, "step": 3028 }, { "epoch": 0.8230978260869565, "grad_norm": 0.929681741046824, "learning_rate": 1.3668542188130567e-05, "loss": 0.45582494139671326, "step": 3029 }, { "epoch": 0.8233695652173914, "grad_norm": 1.1438867908008314, "learning_rate": 1.3664361961095658e-05, "loss": 0.5307189226150513, "step": 3030 }, { "epoch": 0.8236413043478261, "grad_norm": 1.01314062259106, "learning_rate": 1.3660180994293222e-05, "loss": 0.45787444710731506, "step": 3031 }, { "epoch": 0.8239130434782609, "grad_norm": 0.8883191269485851, "learning_rate": 1.3655999288567315e-05, "loss": 0.3750036954879761, "step": 3032 }, { "epoch": 0.8241847826086957, "grad_norm": 0.9853570134834658, "learning_rate": 1.3651816844762148e-05, "loss": 0.46986040472984314, "step": 3033 }, { "epoch": 0.8244565217391304, "grad_norm": 1.0633961913698864, "learning_rate": 1.3647633663722081e-05, "loss": 0.5329889059066772, "step": 3034 }, { "epoch": 0.8247282608695652, "grad_norm": 0.9696424630663539, "learning_rate": 1.3643449746291624e-05, "loss": 0.4262750744819641, "step": 3035 }, { "epoch": 0.825, "grad_norm": 0.9704096301439539, "learning_rate": 1.3639265093315429e-05, "loss": 0.4793041944503784, "step": 3036 }, { "epoch": 0.8252717391304348, "grad_norm": 0.7664011567017394, "learning_rate": 1.3635079705638298e-05, "loss": 0.3254196047782898, "step": 3037 }, { "epoch": 0.8255434782608696, "grad_norm": 1.0828087874788028, "learning_rate": 1.3630893584105192e-05, "loss": 0.567823588848114, "step": 3038 }, { "epoch": 0.8258152173913044, "grad_norm": 1.059742217989161, "learning_rate": 1.3626706729561211e-05, "loss": 0.4893684983253479, "step": 3039 }, { "epoch": 0.8260869565217391, "grad_norm": 0.9863527637758743, "learning_rate": 1.3622519142851597e-05, "loss": 0.4833095669746399, "step": 3040 }, { "epoch": 0.8263586956521739, "grad_norm": 0.9348550481895108, "learning_rate": 1.3618330824821753e-05, "loss": 0.4080364406108856, "step": 3041 }, { "epoch": 0.8266304347826087, "grad_norm": 1.102728239257473, "learning_rate": 1.3614141776317224e-05, "loss": 0.5362544059753418, "step": 3042 }, { "epoch": 0.8269021739130434, "grad_norm": 1.2834969274879007, "learning_rate": 1.36099519981837e-05, "loss": 0.5134382247924805, "step": 3043 }, { "epoch": 0.8271739130434783, "grad_norm": 0.7510420832579415, "learning_rate": 1.360576149126702e-05, "loss": 0.28858762979507446, "step": 3044 }, { "epoch": 0.8274456521739131, "grad_norm": 0.9326617471472733, "learning_rate": 1.360157025641317e-05, "loss": 0.4362766444683075, "step": 3045 }, { "epoch": 0.8277173913043478, "grad_norm": 0.9388194391812508, "learning_rate": 1.3597378294468286e-05, "loss": 0.4232179820537567, "step": 3046 }, { "epoch": 0.8279891304347826, "grad_norm": 0.9628115354610722, "learning_rate": 1.359318560627865e-05, "loss": 0.4118311405181885, "step": 3047 }, { "epoch": 0.8282608695652174, "grad_norm": 1.0214178489510304, "learning_rate": 1.3588992192690683e-05, "loss": 0.48459142446517944, "step": 3048 }, { "epoch": 0.8285326086956522, "grad_norm": 0.9174256441861098, "learning_rate": 1.3584798054550963e-05, "loss": 0.40481385588645935, "step": 3049 }, { "epoch": 0.8288043478260869, "grad_norm": 1.0980761838385318, "learning_rate": 1.3580603192706208e-05, "loss": 0.5086297988891602, "step": 3050 }, { "epoch": 0.8290760869565217, "grad_norm": 1.0556596877742817, "learning_rate": 1.3576407608003282e-05, "loss": 0.4237728714942932, "step": 3051 }, { "epoch": 0.8293478260869566, "grad_norm": 1.033187817971354, "learning_rate": 1.35722113012892e-05, "loss": 0.5318535566329956, "step": 3052 }, { "epoch": 0.8296195652173913, "grad_norm": 1.0747875174378043, "learning_rate": 1.3568014273411117e-05, "loss": 0.48885804414749146, "step": 3053 }, { "epoch": 0.8298913043478261, "grad_norm": 1.0799427581351133, "learning_rate": 1.3563816525216337e-05, "loss": 0.5096100568771362, "step": 3054 }, { "epoch": 0.8301630434782609, "grad_norm": 1.0833712672895421, "learning_rate": 1.355961805755231e-05, "loss": 0.5117170214653015, "step": 3055 }, { "epoch": 0.8304347826086956, "grad_norm": 0.9790261116054232, "learning_rate": 1.3555418871266628e-05, "loss": 0.37861698865890503, "step": 3056 }, { "epoch": 0.8307065217391304, "grad_norm": 1.0718828977884487, "learning_rate": 1.3551218967207032e-05, "loss": 0.48972249031066895, "step": 3057 }, { "epoch": 0.8309782608695652, "grad_norm": 1.1079153221034548, "learning_rate": 1.3547018346221399e-05, "loss": 0.5099431276321411, "step": 3058 }, { "epoch": 0.83125, "grad_norm": 1.0262730613598416, "learning_rate": 1.3542817009157763e-05, "loss": 0.44788461923599243, "step": 3059 }, { "epoch": 0.8315217391304348, "grad_norm": 1.1462187700951694, "learning_rate": 1.3538614956864297e-05, "loss": 0.5629431009292603, "step": 3060 }, { "epoch": 0.8317934782608696, "grad_norm": 1.063761743948039, "learning_rate": 1.3534412190189317e-05, "loss": 0.48572373390197754, "step": 3061 }, { "epoch": 0.8320652173913043, "grad_norm": 0.9834210985978951, "learning_rate": 1.3530208709981285e-05, "loss": 0.476470410823822, "step": 3062 }, { "epoch": 0.8323369565217391, "grad_norm": 1.0863359033816677, "learning_rate": 1.3526004517088805e-05, "loss": 0.48061344027519226, "step": 3063 }, { "epoch": 0.8326086956521739, "grad_norm": 0.991036078931257, "learning_rate": 1.3521799612360628e-05, "loss": 0.36161869764328003, "step": 3064 }, { "epoch": 0.8328804347826086, "grad_norm": 1.0633239084646122, "learning_rate": 1.351759399664565e-05, "loss": 0.4813365936279297, "step": 3065 }, { "epoch": 0.8331521739130435, "grad_norm": 1.00393445245785, "learning_rate": 1.3513387670792904e-05, "loss": 0.464633971452713, "step": 3066 }, { "epoch": 0.8334239130434783, "grad_norm": 1.057987269793595, "learning_rate": 1.350918063565157e-05, "loss": 0.5299080610275269, "step": 3067 }, { "epoch": 0.8336956521739131, "grad_norm": 1.0088566223477886, "learning_rate": 1.3504972892070974e-05, "loss": 0.4578176736831665, "step": 3068 }, { "epoch": 0.8339673913043478, "grad_norm": 1.1031542486510835, "learning_rate": 1.3500764440900577e-05, "loss": 0.5229237675666809, "step": 3069 }, { "epoch": 0.8342391304347826, "grad_norm": 1.0168179386370328, "learning_rate": 1.3496555282989997e-05, "loss": 0.4594436287879944, "step": 3070 }, { "epoch": 0.8345108695652174, "grad_norm": 1.0389353508432957, "learning_rate": 1.3492345419188975e-05, "loss": 0.4688419699668884, "step": 3071 }, { "epoch": 0.8347826086956521, "grad_norm": 1.0239449946698098, "learning_rate": 1.3488134850347414e-05, "loss": 0.43325918912887573, "step": 3072 }, { "epoch": 0.835054347826087, "grad_norm": 1.11070349227709, "learning_rate": 1.3483923577315347e-05, "loss": 0.5531870722770691, "step": 3073 }, { "epoch": 0.8353260869565218, "grad_norm": 1.0492885983138258, "learning_rate": 1.3479711600942955e-05, "loss": 0.5055721998214722, "step": 3074 }, { "epoch": 0.8355978260869565, "grad_norm": 0.975495426190462, "learning_rate": 1.3475498922080557e-05, "loss": 0.4742954969406128, "step": 3075 }, { "epoch": 0.8358695652173913, "grad_norm": 0.9054783892058418, "learning_rate": 1.3471285541578614e-05, "loss": 0.38441044092178345, "step": 3076 }, { "epoch": 0.8361413043478261, "grad_norm": 1.0170070756797533, "learning_rate": 1.3467071460287733e-05, "loss": 0.40955618023872375, "step": 3077 }, { "epoch": 0.8364130434782608, "grad_norm": 0.8864151066731888, "learning_rate": 1.346285667905866e-05, "loss": 0.3941185474395752, "step": 3078 }, { "epoch": 0.8366847826086956, "grad_norm": 0.9738993130441357, "learning_rate": 1.3458641198742282e-05, "loss": 0.49729546904563904, "step": 3079 }, { "epoch": 0.8369565217391305, "grad_norm": 1.062103180277198, "learning_rate": 1.3454425020189626e-05, "loss": 0.5032207369804382, "step": 3080 }, { "epoch": 0.8372282608695653, "grad_norm": 0.765127375054672, "learning_rate": 1.3450208144251861e-05, "loss": 0.29361337423324585, "step": 3081 }, { "epoch": 0.8375, "grad_norm": 1.1042972716011086, "learning_rate": 1.3445990571780296e-05, "loss": 0.5259791016578674, "step": 3082 }, { "epoch": 0.8377717391304348, "grad_norm": 1.3096772457611523, "learning_rate": 1.3441772303626387e-05, "loss": 0.6043139696121216, "step": 3083 }, { "epoch": 0.8380434782608696, "grad_norm": 0.986313609069834, "learning_rate": 1.3437553340641718e-05, "loss": 0.435793399810791, "step": 3084 }, { "epoch": 0.8383152173913043, "grad_norm": 1.0640186891459604, "learning_rate": 1.3433333683678026e-05, "loss": 0.45160675048828125, "step": 3085 }, { "epoch": 0.8385869565217391, "grad_norm": 0.9518883292452716, "learning_rate": 1.3429113333587181e-05, "loss": 0.42827126383781433, "step": 3086 }, { "epoch": 0.8388586956521739, "grad_norm": 1.0445400206606252, "learning_rate": 1.3424892291221188e-05, "loss": 0.4808838963508606, "step": 3087 }, { "epoch": 0.8391304347826087, "grad_norm": 0.8736730453701509, "learning_rate": 1.342067055743221e-05, "loss": 0.3807935416698456, "step": 3088 }, { "epoch": 0.8394021739130435, "grad_norm": 0.94937764570471, "learning_rate": 1.3416448133072525e-05, "loss": 0.3521919250488281, "step": 3089 }, { "epoch": 0.8396739130434783, "grad_norm": 1.0672650082867292, "learning_rate": 1.3412225018994573e-05, "loss": 0.44619375467300415, "step": 3090 }, { "epoch": 0.839945652173913, "grad_norm": 0.867848263035158, "learning_rate": 1.340800121605092e-05, "loss": 0.395831823348999, "step": 3091 }, { "epoch": 0.8402173913043478, "grad_norm": 1.072578202745567, "learning_rate": 1.3403776725094274e-05, "loss": 0.5076891183853149, "step": 3092 }, { "epoch": 0.8404891304347826, "grad_norm": 3.7091700807289825, "learning_rate": 1.3399551546977481e-05, "loss": 0.4830675721168518, "step": 3093 }, { "epoch": 0.8407608695652173, "grad_norm": 1.1727676491194035, "learning_rate": 1.3395325682553527e-05, "loss": 0.5815223455429077, "step": 3094 }, { "epoch": 0.8410326086956522, "grad_norm": 0.8693879701948533, "learning_rate": 1.3391099132675538e-05, "loss": 0.40213248133659363, "step": 3095 }, { "epoch": 0.841304347826087, "grad_norm": 1.1140724489880696, "learning_rate": 1.3386871898196781e-05, "loss": 0.5280697345733643, "step": 3096 }, { "epoch": 0.8415760869565218, "grad_norm": 1.17328119854583, "learning_rate": 1.3382643979970649e-05, "loss": 0.5274847745895386, "step": 3097 }, { "epoch": 0.8418478260869565, "grad_norm": 1.0627888448247005, "learning_rate": 1.3378415378850686e-05, "loss": 0.44681113958358765, "step": 3098 }, { "epoch": 0.8421195652173913, "grad_norm": 1.0172084731156041, "learning_rate": 1.337418609569057e-05, "loss": 0.44040447473526, "step": 3099 }, { "epoch": 0.842391304347826, "grad_norm": 1.2056602299501398, "learning_rate": 1.3369956131344112e-05, "loss": 0.5552291870117188, "step": 3100 }, { "epoch": 0.8426630434782608, "grad_norm": 0.979386008421373, "learning_rate": 1.3365725486665268e-05, "loss": 0.4674525856971741, "step": 3101 }, { "epoch": 0.8429347826086957, "grad_norm": 1.131190053524556, "learning_rate": 1.3361494162508124e-05, "loss": 0.5430477857589722, "step": 3102 }, { "epoch": 0.8432065217391305, "grad_norm": 1.1375602362959099, "learning_rate": 1.3357262159726911e-05, "loss": 0.4775448441505432, "step": 3103 }, { "epoch": 0.8434782608695652, "grad_norm": 1.0390979397642426, "learning_rate": 1.3353029479175991e-05, "loss": 0.49772942066192627, "step": 3104 }, { "epoch": 0.84375, "grad_norm": 0.9982554019552768, "learning_rate": 1.3348796121709862e-05, "loss": 0.422940194606781, "step": 3105 }, { "epoch": 0.8440217391304348, "grad_norm": 1.1944220700087478, "learning_rate": 1.3344562088183166e-05, "loss": 0.5781092047691345, "step": 3106 }, { "epoch": 0.8442934782608695, "grad_norm": 1.0767038898017918, "learning_rate": 1.3340327379450674e-05, "loss": 0.46903401613235474, "step": 3107 }, { "epoch": 0.8445652173913043, "grad_norm": 1.0518609401978316, "learning_rate": 1.3336091996367295e-05, "loss": 0.41737571358680725, "step": 3108 }, { "epoch": 0.8448369565217392, "grad_norm": 1.0163639533457698, "learning_rate": 1.3331855939788078e-05, "loss": 0.43378937244415283, "step": 3109 }, { "epoch": 0.845108695652174, "grad_norm": 1.0262065101435258, "learning_rate": 1.3327619210568204e-05, "loss": 0.5096249580383301, "step": 3110 }, { "epoch": 0.8453804347826087, "grad_norm": 1.0739024630681362, "learning_rate": 1.332338180956299e-05, "loss": 0.47367429733276367, "step": 3111 }, { "epoch": 0.8456521739130435, "grad_norm": 0.9808267278955642, "learning_rate": 1.3319143737627888e-05, "loss": 0.39531514048576355, "step": 3112 }, { "epoch": 0.8459239130434782, "grad_norm": 1.045533414898171, "learning_rate": 1.3314904995618494e-05, "loss": 0.457758367061615, "step": 3113 }, { "epoch": 0.846195652173913, "grad_norm": 1.764729840620908, "learning_rate": 1.3310665584390525e-05, "loss": 0.6186362504959106, "step": 3114 }, { "epoch": 0.8464673913043478, "grad_norm": 1.0870130942209113, "learning_rate": 1.3306425504799844e-05, "loss": 0.46211349964141846, "step": 3115 }, { "epoch": 0.8467391304347827, "grad_norm": 1.015909611017321, "learning_rate": 1.3302184757702448e-05, "loss": 0.4633476138114929, "step": 3116 }, { "epoch": 0.8470108695652174, "grad_norm": 1.184539354934287, "learning_rate": 1.3297943343954458e-05, "loss": 0.49676936864852905, "step": 3117 }, { "epoch": 0.8472826086956522, "grad_norm": 1.1352125559528055, "learning_rate": 1.3293701264412145e-05, "loss": 0.5229306817054749, "step": 3118 }, { "epoch": 0.847554347826087, "grad_norm": 1.1975281089196643, "learning_rate": 1.3289458519931903e-05, "loss": 0.5244495868682861, "step": 3119 }, { "epoch": 0.8478260869565217, "grad_norm": 0.9803448481368255, "learning_rate": 1.328521511137027e-05, "loss": 0.4775868058204651, "step": 3120 }, { "epoch": 0.8480978260869565, "grad_norm": 0.9929933122114686, "learning_rate": 1.3280971039583906e-05, "loss": 0.4237733483314514, "step": 3121 }, { "epoch": 0.8483695652173913, "grad_norm": 1.0923070241069064, "learning_rate": 1.327672630542961e-05, "loss": 0.5167917013168335, "step": 3122 }, { "epoch": 0.8486413043478261, "grad_norm": 0.9574235488032363, "learning_rate": 1.3272480909764323e-05, "loss": 0.44681310653686523, "step": 3123 }, { "epoch": 0.8489130434782609, "grad_norm": 1.1291671287878717, "learning_rate": 1.3268234853445113e-05, "loss": 0.6391574740409851, "step": 3124 }, { "epoch": 0.8491847826086957, "grad_norm": 0.9983081304383016, "learning_rate": 1.3263988137329173e-05, "loss": 0.46608966588974, "step": 3125 }, { "epoch": 0.8494565217391304, "grad_norm": 0.9478917287039107, "learning_rate": 1.325974076227384e-05, "loss": 0.4181259274482727, "step": 3126 }, { "epoch": 0.8497282608695652, "grad_norm": 0.9727742564413029, "learning_rate": 1.3255492729136585e-05, "loss": 0.3682939410209656, "step": 3127 }, { "epoch": 0.85, "grad_norm": 0.9876950225517076, "learning_rate": 1.3251244038775007e-05, "loss": 0.44903889298439026, "step": 3128 }, { "epoch": 0.8502717391304347, "grad_norm": 1.1627472775366479, "learning_rate": 1.3246994692046837e-05, "loss": 0.5510815382003784, "step": 3129 }, { "epoch": 0.8505434782608695, "grad_norm": 1.1522731257178407, "learning_rate": 1.3242744689809939e-05, "loss": 0.5410008430480957, "step": 3130 }, { "epoch": 0.8508152173913044, "grad_norm": 1.070990717850513, "learning_rate": 1.3238494032922313e-05, "loss": 0.4804041385650635, "step": 3131 }, { "epoch": 0.8510869565217392, "grad_norm": 0.9182384805066282, "learning_rate": 1.3234242722242088e-05, "loss": 0.43297091126441956, "step": 3132 }, { "epoch": 0.8513586956521739, "grad_norm": 1.0872295413065876, "learning_rate": 1.3229990758627527e-05, "loss": 0.49406832456588745, "step": 3133 }, { "epoch": 0.8516304347826087, "grad_norm": 0.9296716536105097, "learning_rate": 1.3225738142937022e-05, "loss": 0.32840508222579956, "step": 3134 }, { "epoch": 0.8519021739130435, "grad_norm": 1.0959353352310945, "learning_rate": 1.3221484876029096e-05, "loss": 0.5185918807983398, "step": 3135 }, { "epoch": 0.8521739130434782, "grad_norm": 1.0461504084921758, "learning_rate": 1.321723095876241e-05, "loss": 0.481356680393219, "step": 3136 }, { "epoch": 0.852445652173913, "grad_norm": 1.1408760232995072, "learning_rate": 1.3212976391995752e-05, "loss": 0.5575435161590576, "step": 3137 }, { "epoch": 0.8527173913043479, "grad_norm": 0.9917245098070507, "learning_rate": 1.3208721176588035e-05, "loss": 0.39732426404953003, "step": 3138 }, { "epoch": 0.8529891304347826, "grad_norm": 1.015652496128275, "learning_rate": 1.3204465313398316e-05, "loss": 0.4789959490299225, "step": 3139 }, { "epoch": 0.8532608695652174, "grad_norm": 0.9564559819884081, "learning_rate": 1.320020880328577e-05, "loss": 0.3977208137512207, "step": 3140 }, { "epoch": 0.8535326086956522, "grad_norm": 0.7816421272216394, "learning_rate": 1.3195951647109715e-05, "loss": 0.30223071575164795, "step": 3141 }, { "epoch": 0.8538043478260869, "grad_norm": 1.157664715360752, "learning_rate": 1.3191693845729587e-05, "loss": 0.6114940047264099, "step": 3142 }, { "epoch": 0.8540760869565217, "grad_norm": 1.034194634696496, "learning_rate": 1.318743540000496e-05, "loss": 0.4204721450805664, "step": 3143 }, { "epoch": 0.8543478260869565, "grad_norm": 1.1523444165471435, "learning_rate": 1.3183176310795535e-05, "loss": 0.6083630919456482, "step": 3144 }, { "epoch": 0.8546195652173914, "grad_norm": 1.038020722506586, "learning_rate": 1.317891657896115e-05, "loss": 0.4779706597328186, "step": 3145 }, { "epoch": 0.8548913043478261, "grad_norm": 1.0943856779222823, "learning_rate": 1.3174656205361758e-05, "loss": 0.46187472343444824, "step": 3146 }, { "epoch": 0.8551630434782609, "grad_norm": 1.430699275611118, "learning_rate": 1.3170395190857456e-05, "loss": 0.5169101357460022, "step": 3147 }, { "epoch": 0.8554347826086957, "grad_norm": 1.0309809049349459, "learning_rate": 1.3166133536308463e-05, "loss": 0.4825800061225891, "step": 3148 }, { "epoch": 0.8557065217391304, "grad_norm": 1.3512170218264279, "learning_rate": 1.3161871242575131e-05, "loss": 0.4544947147369385, "step": 3149 }, { "epoch": 0.8559782608695652, "grad_norm": 0.9909550770989928, "learning_rate": 1.3157608310517936e-05, "loss": 0.47683632373809814, "step": 3150 }, { "epoch": 0.85625, "grad_norm": 1.0835278672663327, "learning_rate": 1.3153344740997484e-05, "loss": 0.5080944895744324, "step": 3151 }, { "epoch": 0.8565217391304348, "grad_norm": 0.8240089908447024, "learning_rate": 1.3149080534874519e-05, "loss": 0.4031643271446228, "step": 3152 }, { "epoch": 0.8567934782608696, "grad_norm": 1.0655208892453876, "learning_rate": 1.31448156930099e-05, "loss": 0.4302199184894562, "step": 3153 }, { "epoch": 0.8570652173913044, "grad_norm": 1.029028920000794, "learning_rate": 1.314055021626462e-05, "loss": 0.4629557728767395, "step": 3154 }, { "epoch": 0.8573369565217391, "grad_norm": 0.9170932868683376, "learning_rate": 1.3136284105499808e-05, "loss": 0.48154738545417786, "step": 3155 }, { "epoch": 0.8576086956521739, "grad_norm": 1.0745553244802193, "learning_rate": 1.3132017361576704e-05, "loss": 0.5577428936958313, "step": 3156 }, { "epoch": 0.8578804347826087, "grad_norm": 1.1223499242189157, "learning_rate": 1.312774998535669e-05, "loss": 0.5723893642425537, "step": 3157 }, { "epoch": 0.8581521739130434, "grad_norm": 1.0309563788493288, "learning_rate": 1.3123481977701271e-05, "loss": 0.5532289743423462, "step": 3158 }, { "epoch": 0.8584239130434783, "grad_norm": 1.1022257217888218, "learning_rate": 1.3119213339472083e-05, "loss": 0.5035018920898438, "step": 3159 }, { "epoch": 0.8586956521739131, "grad_norm": 0.9459262018295089, "learning_rate": 1.311494407153088e-05, "loss": 0.4706418216228485, "step": 3160 }, { "epoch": 0.8589673913043478, "grad_norm": 0.9193379415250921, "learning_rate": 1.311067417473955e-05, "loss": 0.43142974376678467, "step": 3161 }, { "epoch": 0.8592391304347826, "grad_norm": 0.9460788466696121, "learning_rate": 1.3106403649960109e-05, "loss": 0.4490859806537628, "step": 3162 }, { "epoch": 0.8595108695652174, "grad_norm": 0.9348113524368481, "learning_rate": 1.3102132498054696e-05, "loss": 0.4055704176425934, "step": 3163 }, { "epoch": 0.8597826086956522, "grad_norm": 0.7507078851981628, "learning_rate": 1.3097860719885578e-05, "loss": 0.32025986909866333, "step": 3164 }, { "epoch": 0.8600543478260869, "grad_norm": 1.0539474390206647, "learning_rate": 1.309358831631515e-05, "loss": 0.5167649984359741, "step": 3165 }, { "epoch": 0.8603260869565217, "grad_norm": 1.1074945501628028, "learning_rate": 1.3089315288205933e-05, "loss": 0.5566813945770264, "step": 3166 }, { "epoch": 0.8605978260869566, "grad_norm": 0.7708093623680318, "learning_rate": 1.3085041636420573e-05, "loss": 0.3674086034297943, "step": 3167 }, { "epoch": 0.8608695652173913, "grad_norm": 1.0651704438913696, "learning_rate": 1.308076736182184e-05, "loss": 0.5766773819923401, "step": 3168 }, { "epoch": 0.8611413043478261, "grad_norm": 1.1364491951709241, "learning_rate": 1.3076492465272632e-05, "loss": 0.5567755103111267, "step": 3169 }, { "epoch": 0.8614130434782609, "grad_norm": 0.9915632182881393, "learning_rate": 1.3072216947635972e-05, "loss": 0.49117958545684814, "step": 3170 }, { "epoch": 0.8616847826086956, "grad_norm": 0.9606062926110982, "learning_rate": 1.306794080977501e-05, "loss": 0.5097065567970276, "step": 3171 }, { "epoch": 0.8619565217391304, "grad_norm": 1.1721177568037326, "learning_rate": 1.3063664052553017e-05, "loss": 0.46824073791503906, "step": 3172 }, { "epoch": 0.8622282608695652, "grad_norm": 1.1425747213627113, "learning_rate": 1.3059386676833398e-05, "loss": 0.5464867353439331, "step": 3173 }, { "epoch": 0.8625, "grad_norm": 1.0757507966868172, "learning_rate": 1.3055108683479672e-05, "loss": 0.48497432470321655, "step": 3174 }, { "epoch": 0.8627717391304348, "grad_norm": 0.9813781682247075, "learning_rate": 1.305083007335549e-05, "loss": 0.4037540555000305, "step": 3175 }, { "epoch": 0.8630434782608696, "grad_norm": 1.0018790208164865, "learning_rate": 1.3046550847324621e-05, "loss": 0.43181368708610535, "step": 3176 }, { "epoch": 0.8633152173913043, "grad_norm": 1.020583452664558, "learning_rate": 1.3042271006250965e-05, "loss": 0.5522676706314087, "step": 3177 }, { "epoch": 0.8635869565217391, "grad_norm": 0.9283264523966569, "learning_rate": 1.3037990550998544e-05, "loss": 0.36947494745254517, "step": 3178 }, { "epoch": 0.8638586956521739, "grad_norm": 0.9663287506075973, "learning_rate": 1.3033709482431503e-05, "loss": 0.4556308388710022, "step": 3179 }, { "epoch": 0.8641304347826086, "grad_norm": 1.040271653347593, "learning_rate": 1.3029427801414109e-05, "loss": 0.5050100684165955, "step": 3180 }, { "epoch": 0.8644021739130435, "grad_norm": 1.1658151446743619, "learning_rate": 1.302514550881076e-05, "loss": 0.5453274250030518, "step": 3181 }, { "epoch": 0.8646739130434783, "grad_norm": 0.9534020734646449, "learning_rate": 1.3020862605485966e-05, "loss": 0.4185507893562317, "step": 3182 }, { "epoch": 0.8649456521739131, "grad_norm": 1.0677447378992562, "learning_rate": 1.3016579092304373e-05, "loss": 0.4427710175514221, "step": 3183 }, { "epoch": 0.8652173913043478, "grad_norm": 1.1904725114311863, "learning_rate": 1.301229497013074e-05, "loss": 0.43922287225723267, "step": 3184 }, { "epoch": 0.8654891304347826, "grad_norm": 1.0909116170731405, "learning_rate": 1.3008010239829951e-05, "loss": 0.5135825872421265, "step": 3185 }, { "epoch": 0.8657608695652174, "grad_norm": 1.1914514399089984, "learning_rate": 1.3003724902267023e-05, "loss": 0.5693268179893494, "step": 3186 }, { "epoch": 0.8660326086956521, "grad_norm": 0.9777444504596304, "learning_rate": 1.2999438958307076e-05, "loss": 0.4223870038986206, "step": 3187 }, { "epoch": 0.866304347826087, "grad_norm": 1.0555807621758713, "learning_rate": 1.299515240881537e-05, "loss": 0.5091903209686279, "step": 3188 }, { "epoch": 0.8665760869565218, "grad_norm": 0.9396026020657751, "learning_rate": 1.2990865254657281e-05, "loss": 0.45034849643707275, "step": 3189 }, { "epoch": 0.8668478260869565, "grad_norm": 0.9810498365925829, "learning_rate": 1.2986577496698303e-05, "loss": 0.4862082898616791, "step": 3190 }, { "epoch": 0.8671195652173913, "grad_norm": 1.1184374871489855, "learning_rate": 1.298228913580406e-05, "loss": 0.4995931386947632, "step": 3191 }, { "epoch": 0.8673913043478261, "grad_norm": 0.9633509450365593, "learning_rate": 1.297800017284029e-05, "loss": 0.4148726165294647, "step": 3192 }, { "epoch": 0.8676630434782608, "grad_norm": 1.1599157312516932, "learning_rate": 1.297371060867286e-05, "loss": 0.4464147090911865, "step": 3193 }, { "epoch": 0.8679347826086956, "grad_norm": 0.9496206955616477, "learning_rate": 1.2969420444167753e-05, "loss": 0.5226848125457764, "step": 3194 }, { "epoch": 0.8682065217391305, "grad_norm": 1.121272657297292, "learning_rate": 1.296512968019107e-05, "loss": 0.5254678726196289, "step": 3195 }, { "epoch": 0.8684782608695653, "grad_norm": 1.1840815715344482, "learning_rate": 1.2960838317609046e-05, "loss": 0.5062006115913391, "step": 3196 }, { "epoch": 0.86875, "grad_norm": 1.1841306650029229, "learning_rate": 1.295654635728802e-05, "loss": 0.5460925698280334, "step": 3197 }, { "epoch": 0.8690217391304348, "grad_norm": 0.9507958419588344, "learning_rate": 1.2952253800094467e-05, "loss": 0.4763356149196625, "step": 3198 }, { "epoch": 0.8692934782608696, "grad_norm": 1.1963343635240795, "learning_rate": 1.294796064689497e-05, "loss": 0.5428792834281921, "step": 3199 }, { "epoch": 0.8695652173913043, "grad_norm": 1.0572191515475213, "learning_rate": 1.294366689855624e-05, "loss": 0.41682881116867065, "step": 3200 }, { "epoch": 0.8698369565217391, "grad_norm": 1.0679078539826454, "learning_rate": 1.2939372555945112e-05, "loss": 0.4287721812725067, "step": 3201 }, { "epoch": 0.8701086956521739, "grad_norm": 1.0912093355462937, "learning_rate": 1.2935077619928525e-05, "loss": 0.48378264904022217, "step": 3202 }, { "epoch": 0.8703804347826087, "grad_norm": 1.1147326217656204, "learning_rate": 1.2930782091373551e-05, "loss": 0.454174280166626, "step": 3203 }, { "epoch": 0.8706521739130435, "grad_norm": 0.9645455605528163, "learning_rate": 1.2926485971147385e-05, "loss": 0.4149933159351349, "step": 3204 }, { "epoch": 0.8709239130434783, "grad_norm": 1.0777332962555781, "learning_rate": 1.2922189260117324e-05, "loss": 0.5047105550765991, "step": 3205 }, { "epoch": 0.871195652173913, "grad_norm": 1.040407917347822, "learning_rate": 1.2917891959150808e-05, "loss": 0.5259917378425598, "step": 3206 }, { "epoch": 0.8714673913043478, "grad_norm": 1.0551051469261499, "learning_rate": 1.2913594069115371e-05, "loss": 0.4291968047618866, "step": 3207 }, { "epoch": 0.8717391304347826, "grad_norm": 0.9733926725528432, "learning_rate": 1.2909295590878686e-05, "loss": 0.4408950209617615, "step": 3208 }, { "epoch": 0.8720108695652173, "grad_norm": 1.242342049582915, "learning_rate": 1.2904996525308534e-05, "loss": 0.5425246953964233, "step": 3209 }, { "epoch": 0.8722826086956522, "grad_norm": 0.9424685976471251, "learning_rate": 1.2900696873272823e-05, "loss": 0.44834280014038086, "step": 3210 }, { "epoch": 0.872554347826087, "grad_norm": 1.0410656938431773, "learning_rate": 1.2896396635639565e-05, "loss": 0.47874701023101807, "step": 3211 }, { "epoch": 0.8728260869565218, "grad_norm": 0.7888469477568286, "learning_rate": 1.2892095813276903e-05, "loss": 0.28115400671958923, "step": 3212 }, { "epoch": 0.8730978260869565, "grad_norm": 0.8813234763008719, "learning_rate": 1.2887794407053096e-05, "loss": 0.36886322498321533, "step": 3213 }, { "epoch": 0.8733695652173913, "grad_norm": 1.1232347047606208, "learning_rate": 1.2883492417836519e-05, "loss": 0.44844022393226624, "step": 3214 }, { "epoch": 0.873641304347826, "grad_norm": 1.0398142259812553, "learning_rate": 1.287918984649566e-05, "loss": 0.5029596090316772, "step": 3215 }, { "epoch": 0.8739130434782608, "grad_norm": 1.0858049181634952, "learning_rate": 1.2874886693899136e-05, "loss": 0.5298219919204712, "step": 3216 }, { "epoch": 0.8741847826086957, "grad_norm": 0.9521780530604192, "learning_rate": 1.2870582960915669e-05, "loss": 0.41765135526657104, "step": 3217 }, { "epoch": 0.8744565217391305, "grad_norm": 1.082327863870896, "learning_rate": 1.2866278648414108e-05, "loss": 0.4340866208076477, "step": 3218 }, { "epoch": 0.8747282608695652, "grad_norm": 1.1769247467725572, "learning_rate": 1.2861973757263416e-05, "loss": 0.6109941005706787, "step": 3219 }, { "epoch": 0.875, "grad_norm": 0.956407974358895, "learning_rate": 1.2857668288332666e-05, "loss": 0.46437644958496094, "step": 3220 }, { "epoch": 0.8752717391304348, "grad_norm": 1.0528981830167774, "learning_rate": 1.2853362242491054e-05, "loss": 0.478472501039505, "step": 3221 }, { "epoch": 0.8755434782608695, "grad_norm": 1.1259676922940336, "learning_rate": 1.2849055620607897e-05, "loss": 0.5633730888366699, "step": 3222 }, { "epoch": 0.8758152173913043, "grad_norm": 1.1551016183181362, "learning_rate": 1.2844748423552617e-05, "loss": 0.5189248323440552, "step": 3223 }, { "epoch": 0.8760869565217392, "grad_norm": 1.0928283590321355, "learning_rate": 1.2840440652194765e-05, "loss": 0.4764435887336731, "step": 3224 }, { "epoch": 0.876358695652174, "grad_norm": 1.103900939405807, "learning_rate": 1.2836132307403994e-05, "loss": 0.522065281867981, "step": 3225 }, { "epoch": 0.8766304347826087, "grad_norm": 0.937366308366416, "learning_rate": 1.2831823390050084e-05, "loss": 0.4453509449958801, "step": 3226 }, { "epoch": 0.8769021739130435, "grad_norm": 0.9680542259799234, "learning_rate": 1.2827513901002928e-05, "loss": 0.4174324572086334, "step": 3227 }, { "epoch": 0.8771739130434782, "grad_norm": 1.138001788771501, "learning_rate": 1.2823203841132527e-05, "loss": 0.4851629137992859, "step": 3228 }, { "epoch": 0.877445652173913, "grad_norm": 0.8715721060570175, "learning_rate": 1.2818893211309007e-05, "loss": 0.3391711115837097, "step": 3229 }, { "epoch": 0.8777173913043478, "grad_norm": 1.0379652242068564, "learning_rate": 1.2814582012402604e-05, "loss": 0.5414408445358276, "step": 3230 }, { "epoch": 0.8779891304347827, "grad_norm": 1.218387597647255, "learning_rate": 1.2810270245283672e-05, "loss": 0.5532763004302979, "step": 3231 }, { "epoch": 0.8782608695652174, "grad_norm": 0.9944871082688416, "learning_rate": 1.2805957910822678e-05, "loss": 0.4435645341873169, "step": 3232 }, { "epoch": 0.8785326086956522, "grad_norm": 1.0644621216371843, "learning_rate": 1.2801645009890195e-05, "loss": 0.541118323802948, "step": 3233 }, { "epoch": 0.878804347826087, "grad_norm": 0.9962933982458446, "learning_rate": 1.2797331543356933e-05, "loss": 0.45698386430740356, "step": 3234 }, { "epoch": 0.8790760869565217, "grad_norm": 1.0002507972802561, "learning_rate": 1.2793017512093688e-05, "loss": 0.42171573638916016, "step": 3235 }, { "epoch": 0.8793478260869565, "grad_norm": 1.0852838347441989, "learning_rate": 1.2788702916971396e-05, "loss": 0.4481276869773865, "step": 3236 }, { "epoch": 0.8796195652173913, "grad_norm": 1.0947303935807529, "learning_rate": 1.2784387758861088e-05, "loss": 0.4809560477733612, "step": 3237 }, { "epoch": 0.8798913043478261, "grad_norm": 1.0179277528464181, "learning_rate": 1.2780072038633913e-05, "loss": 0.46270620822906494, "step": 3238 }, { "epoch": 0.8801630434782609, "grad_norm": 1.13458924006036, "learning_rate": 1.277575575716114e-05, "loss": 0.4407579302787781, "step": 3239 }, { "epoch": 0.8804347826086957, "grad_norm": 1.0005748537800174, "learning_rate": 1.2771438915314147e-05, "loss": 0.4262901842594147, "step": 3240 }, { "epoch": 0.8807065217391304, "grad_norm": 1.1387691386876018, "learning_rate": 1.276712151396442e-05, "loss": 0.469608873128891, "step": 3241 }, { "epoch": 0.8809782608695652, "grad_norm": 0.9889346509554342, "learning_rate": 1.2762803553983573e-05, "loss": 0.39145970344543457, "step": 3242 }, { "epoch": 0.88125, "grad_norm": 1.019010965871774, "learning_rate": 1.2758485036243316e-05, "loss": 0.4982009530067444, "step": 3243 }, { "epoch": 0.8815217391304347, "grad_norm": 0.9993243850797184, "learning_rate": 1.2754165961615482e-05, "loss": 0.44325852394104004, "step": 3244 }, { "epoch": 0.8817934782608695, "grad_norm": 0.9983779301667773, "learning_rate": 1.2749846330972009e-05, "loss": 0.45013341307640076, "step": 3245 }, { "epoch": 0.8820652173913044, "grad_norm": 1.1082301672130064, "learning_rate": 1.2745526145184952e-05, "loss": 0.37722617387771606, "step": 3246 }, { "epoch": 0.8823369565217392, "grad_norm": 0.9637110698571444, "learning_rate": 1.274120540512648e-05, "loss": 0.466321736574173, "step": 3247 }, { "epoch": 0.8826086956521739, "grad_norm": 1.1608001649265123, "learning_rate": 1.273688411166887e-05, "loss": 0.518463134765625, "step": 3248 }, { "epoch": 0.8828804347826087, "grad_norm": 1.018149022008698, "learning_rate": 1.2732562265684512e-05, "loss": 0.4769577980041504, "step": 3249 }, { "epoch": 0.8831521739130435, "grad_norm": 1.0236853417988467, "learning_rate": 1.2728239868045908e-05, "loss": 0.5208179354667664, "step": 3250 }, { "epoch": 0.8834239130434782, "grad_norm": 1.258946192097666, "learning_rate": 1.2723916919625667e-05, "loss": 0.5589504837989807, "step": 3251 }, { "epoch": 0.883695652173913, "grad_norm": 1.216280671480272, "learning_rate": 1.271959342129652e-05, "loss": 0.5825835466384888, "step": 3252 }, { "epoch": 0.8839673913043479, "grad_norm": 1.1161539895098798, "learning_rate": 1.27152693739313e-05, "loss": 0.519960880279541, "step": 3253 }, { "epoch": 0.8842391304347826, "grad_norm": 1.0824888320972956, "learning_rate": 1.2710944778402949e-05, "loss": 0.4189082384109497, "step": 3254 }, { "epoch": 0.8845108695652174, "grad_norm": 1.0910052784265944, "learning_rate": 1.2706619635584528e-05, "loss": 0.41471654176712036, "step": 3255 }, { "epoch": 0.8847826086956522, "grad_norm": 1.1594903019143257, "learning_rate": 1.27022939463492e-05, "loss": 0.5023292303085327, "step": 3256 }, { "epoch": 0.8850543478260869, "grad_norm": 1.3029948311432658, "learning_rate": 1.2697967711570243e-05, "loss": 0.4503878355026245, "step": 3257 }, { "epoch": 0.8853260869565217, "grad_norm": 0.9878527884298685, "learning_rate": 1.269364093212105e-05, "loss": 0.3984636664390564, "step": 3258 }, { "epoch": 0.8855978260869565, "grad_norm": 1.0266156337016752, "learning_rate": 1.268931360887511e-05, "loss": 0.46545320749282837, "step": 3259 }, { "epoch": 0.8858695652173914, "grad_norm": 1.0337052566123952, "learning_rate": 1.2684985742706037e-05, "loss": 0.4477226734161377, "step": 3260 }, { "epoch": 0.8861413043478261, "grad_norm": 0.9622446362531916, "learning_rate": 1.2680657334487547e-05, "loss": 0.4637336730957031, "step": 3261 }, { "epoch": 0.8864130434782609, "grad_norm": 0.9642266853670572, "learning_rate": 1.2676328385093464e-05, "loss": 0.415046751499176, "step": 3262 }, { "epoch": 0.8866847826086957, "grad_norm": 1.017397686320933, "learning_rate": 1.2671998895397725e-05, "loss": 0.470647394657135, "step": 3263 }, { "epoch": 0.8869565217391304, "grad_norm": 0.9733713014999634, "learning_rate": 1.2667668866274371e-05, "loss": 0.4317277669906616, "step": 3264 }, { "epoch": 0.8872282608695652, "grad_norm": 1.0383646207246457, "learning_rate": 1.2663338298597562e-05, "loss": 0.46333253383636475, "step": 3265 }, { "epoch": 0.8875, "grad_norm": 1.1330678625592405, "learning_rate": 1.2659007193241557e-05, "loss": 0.46140602231025696, "step": 3266 }, { "epoch": 0.8877717391304348, "grad_norm": 0.9057837059054058, "learning_rate": 1.2654675551080724e-05, "loss": 0.36759305000305176, "step": 3267 }, { "epoch": 0.8880434782608696, "grad_norm": 0.9457959529270475, "learning_rate": 1.2650343372989546e-05, "loss": 0.40859514474868774, "step": 3268 }, { "epoch": 0.8883152173913044, "grad_norm": 1.0284910891011416, "learning_rate": 1.264601065984261e-05, "loss": 0.44322139024734497, "step": 3269 }, { "epoch": 0.8885869565217391, "grad_norm": 0.8617495203117129, "learning_rate": 1.264167741251461e-05, "loss": 0.38369041681289673, "step": 3270 }, { "epoch": 0.8888586956521739, "grad_norm": 0.9621806064315128, "learning_rate": 1.2637343631880352e-05, "loss": 0.348884642124176, "step": 3271 }, { "epoch": 0.8891304347826087, "grad_norm": 1.1216034659040421, "learning_rate": 1.2633009318814742e-05, "loss": 0.48600703477859497, "step": 3272 }, { "epoch": 0.8894021739130434, "grad_norm": 1.1854544489966652, "learning_rate": 1.2628674474192802e-05, "loss": 0.5821530818939209, "step": 3273 }, { "epoch": 0.8896739130434783, "grad_norm": 1.1591928000904048, "learning_rate": 1.2624339098889655e-05, "loss": 0.5185285806655884, "step": 3274 }, { "epoch": 0.8899456521739131, "grad_norm": 1.014856945351836, "learning_rate": 1.262000319378054e-05, "loss": 0.39207613468170166, "step": 3275 }, { "epoch": 0.8902173913043478, "grad_norm": 1.1232672087715037, "learning_rate": 1.2615666759740788e-05, "loss": 0.5334481596946716, "step": 3276 }, { "epoch": 0.8904891304347826, "grad_norm": 1.067870396325979, "learning_rate": 1.2611329797645851e-05, "loss": 0.5714041590690613, "step": 3277 }, { "epoch": 0.8907608695652174, "grad_norm": 1.1026369853386018, "learning_rate": 1.2606992308371282e-05, "loss": 0.47404858469963074, "step": 3278 }, { "epoch": 0.8910326086956522, "grad_norm": 1.010852246881803, "learning_rate": 1.260265429279274e-05, "loss": 0.43288421630859375, "step": 3279 }, { "epoch": 0.8913043478260869, "grad_norm": 1.111038497377992, "learning_rate": 1.259831575178599e-05, "loss": 0.5215749740600586, "step": 3280 }, { "epoch": 0.8915760869565217, "grad_norm": 1.2085746087130866, "learning_rate": 1.2593976686226906e-05, "loss": 0.605160117149353, "step": 3281 }, { "epoch": 0.8918478260869566, "grad_norm": 1.0809750021381221, "learning_rate": 1.2589637096991463e-05, "loss": 0.4583700895309448, "step": 3282 }, { "epoch": 0.8921195652173913, "grad_norm": 1.1357708125331611, "learning_rate": 1.2585296984955746e-05, "loss": 0.5091573596000671, "step": 3283 }, { "epoch": 0.8923913043478261, "grad_norm": 0.9090774411481523, "learning_rate": 1.2580956350995944e-05, "loss": 0.39660000801086426, "step": 3284 }, { "epoch": 0.8926630434782609, "grad_norm": 0.9227091224968065, "learning_rate": 1.2576615195988349e-05, "loss": 0.37882277369499207, "step": 3285 }, { "epoch": 0.8929347826086956, "grad_norm": 1.0329173648720968, "learning_rate": 1.2572273520809366e-05, "loss": 0.47069549560546875, "step": 3286 }, { "epoch": 0.8932065217391304, "grad_norm": 1.1182420363500958, "learning_rate": 1.2567931326335496e-05, "loss": 0.5354431867599487, "step": 3287 }, { "epoch": 0.8934782608695652, "grad_norm": 1.1349682491041941, "learning_rate": 1.256358861344335e-05, "loss": 0.47792142629623413, "step": 3288 }, { "epoch": 0.89375, "grad_norm": 1.121253470785749, "learning_rate": 1.2559245383009642e-05, "loss": 0.4813023805618286, "step": 3289 }, { "epoch": 0.8940217391304348, "grad_norm": 0.9621586270566518, "learning_rate": 1.2554901635911188e-05, "loss": 0.4652003049850464, "step": 3290 }, { "epoch": 0.8942934782608696, "grad_norm": 0.9607915892569667, "learning_rate": 1.2550557373024915e-05, "loss": 0.40397530794143677, "step": 3291 }, { "epoch": 0.8945652173913043, "grad_norm": 1.0116449853068588, "learning_rate": 1.2546212595227845e-05, "loss": 0.39343684911727905, "step": 3292 }, { "epoch": 0.8948369565217391, "grad_norm": 1.136944099066455, "learning_rate": 1.2541867303397114e-05, "loss": 0.43624648451805115, "step": 3293 }, { "epoch": 0.8951086956521739, "grad_norm": 0.9845448276295256, "learning_rate": 1.2537521498409955e-05, "loss": 0.4237091541290283, "step": 3294 }, { "epoch": 0.8953804347826086, "grad_norm": 1.0653594162803413, "learning_rate": 1.2533175181143704e-05, "loss": 0.43340712785720825, "step": 3295 }, { "epoch": 0.8956521739130435, "grad_norm": 1.0356459941375757, "learning_rate": 1.252882835247581e-05, "loss": 0.45025551319122314, "step": 3296 }, { "epoch": 0.8959239130434783, "grad_norm": 1.1314940753455573, "learning_rate": 1.2524481013283809e-05, "loss": 0.4503944516181946, "step": 3297 }, { "epoch": 0.8961956521739131, "grad_norm": 1.1357363057877867, "learning_rate": 1.2520133164445354e-05, "loss": 0.4533807039260864, "step": 3298 }, { "epoch": 0.8964673913043478, "grad_norm": 0.9017840265645415, "learning_rate": 1.2515784806838196e-05, "loss": 0.38256973028182983, "step": 3299 }, { "epoch": 0.8967391304347826, "grad_norm": 1.0413454510672588, "learning_rate": 1.2511435941340187e-05, "loss": 0.45144838094711304, "step": 3300 }, { "epoch": 0.8970108695652174, "grad_norm": 1.0629677037725156, "learning_rate": 1.2507086568829285e-05, "loss": 0.44102203845977783, "step": 3301 }, { "epoch": 0.8972826086956521, "grad_norm": 1.1878592811740252, "learning_rate": 1.2502736690183545e-05, "loss": 0.516616702079773, "step": 3302 }, { "epoch": 0.897554347826087, "grad_norm": 1.0944638461771365, "learning_rate": 1.2498386306281132e-05, "loss": 0.4856458902359009, "step": 3303 }, { "epoch": 0.8978260869565218, "grad_norm": 0.8808560315271154, "learning_rate": 1.2494035418000312e-05, "loss": 0.3948575258255005, "step": 3304 }, { "epoch": 0.8980978260869565, "grad_norm": 1.2021411404989981, "learning_rate": 1.248968402621944e-05, "loss": 0.5778157114982605, "step": 3305 }, { "epoch": 0.8983695652173913, "grad_norm": 1.0312474457327325, "learning_rate": 1.2485332131816988e-05, "loss": 0.42894625663757324, "step": 3306 }, { "epoch": 0.8986413043478261, "grad_norm": 1.2628900228504156, "learning_rate": 1.2480979735671524e-05, "loss": 0.5469780564308167, "step": 3307 }, { "epoch": 0.8989130434782608, "grad_norm": 0.9090730894187339, "learning_rate": 1.2476626838661713e-05, "loss": 0.38652729988098145, "step": 3308 }, { "epoch": 0.8991847826086956, "grad_norm": 1.0410972797550133, "learning_rate": 1.2472273441666332e-05, "loss": 0.5110565423965454, "step": 3309 }, { "epoch": 0.8994565217391305, "grad_norm": 1.08429877491126, "learning_rate": 1.2467919545564245e-05, "loss": 0.41544637084007263, "step": 3310 }, { "epoch": 0.8997282608695653, "grad_norm": 1.0542008212160985, "learning_rate": 1.2463565151234429e-05, "loss": 0.5323896408081055, "step": 3311 }, { "epoch": 0.9, "grad_norm": 0.9203292139861885, "learning_rate": 1.2459210259555954e-05, "loss": 0.4304751753807068, "step": 3312 }, { "epoch": 0.9002717391304348, "grad_norm": 1.0484621658455056, "learning_rate": 1.2454854871407993e-05, "loss": 0.46432843804359436, "step": 3313 }, { "epoch": 0.9005434782608696, "grad_norm": 1.0647072235031978, "learning_rate": 1.245049898766982e-05, "loss": 0.5043153762817383, "step": 3314 }, { "epoch": 0.9008152173913043, "grad_norm": 1.106396868239588, "learning_rate": 1.2446142609220806e-05, "loss": 0.5151565074920654, "step": 3315 }, { "epoch": 0.9010869565217391, "grad_norm": 1.1250980355116469, "learning_rate": 1.2441785736940427e-05, "loss": 0.4991414546966553, "step": 3316 }, { "epoch": 0.9013586956521739, "grad_norm": 0.9800718278838805, "learning_rate": 1.2437428371708253e-05, "loss": 0.47164082527160645, "step": 3317 }, { "epoch": 0.9016304347826087, "grad_norm": 1.0670807064931576, "learning_rate": 1.2433070514403957e-05, "loss": 0.4851253032684326, "step": 3318 }, { "epoch": 0.9019021739130435, "grad_norm": 1.2548908989277985, "learning_rate": 1.2428712165907313e-05, "loss": 0.5687408447265625, "step": 3319 }, { "epoch": 0.9021739130434783, "grad_norm": 0.9462471013457364, "learning_rate": 1.2424353327098187e-05, "loss": 0.41437000036239624, "step": 3320 }, { "epoch": 0.902445652173913, "grad_norm": 1.2084248184304849, "learning_rate": 1.2419993998856553e-05, "loss": 0.6047589778900146, "step": 3321 }, { "epoch": 0.9027173913043478, "grad_norm": 1.0599465370920245, "learning_rate": 1.2415634182062478e-05, "loss": 0.45440927147865295, "step": 3322 }, { "epoch": 0.9029891304347826, "grad_norm": 0.9736982484152288, "learning_rate": 1.2411273877596126e-05, "loss": 0.39436542987823486, "step": 3323 }, { "epoch": 0.9032608695652173, "grad_norm": 1.0363305198895354, "learning_rate": 1.2406913086337769e-05, "loss": 0.40105220675468445, "step": 3324 }, { "epoch": 0.9035326086956522, "grad_norm": 1.084598232879997, "learning_rate": 1.2402551809167762e-05, "loss": 0.4767879247665405, "step": 3325 }, { "epoch": 0.903804347826087, "grad_norm": 1.152870053743365, "learning_rate": 1.2398190046966576e-05, "loss": 0.5499680042266846, "step": 3326 }, { "epoch": 0.9040760869565218, "grad_norm": 1.0248813061881636, "learning_rate": 1.2393827800614766e-05, "loss": 0.41679859161376953, "step": 3327 }, { "epoch": 0.9043478260869565, "grad_norm": 1.0839895702441766, "learning_rate": 1.238946507099299e-05, "loss": 0.5073221921920776, "step": 3328 }, { "epoch": 0.9046195652173913, "grad_norm": 1.2067147109305651, "learning_rate": 1.2385101858982004e-05, "loss": 0.4956786632537842, "step": 3329 }, { "epoch": 0.904891304347826, "grad_norm": 1.0072682144630494, "learning_rate": 1.2380738165462662e-05, "loss": 0.4219246506690979, "step": 3330 }, { "epoch": 0.9051630434782608, "grad_norm": 1.1629064553266257, "learning_rate": 1.2376373991315911e-05, "loss": 0.4811197519302368, "step": 3331 }, { "epoch": 0.9054347826086957, "grad_norm": 0.9681791152541607, "learning_rate": 1.23720093374228e-05, "loss": 0.4105963706970215, "step": 3332 }, { "epoch": 0.9057065217391305, "grad_norm": 0.9903953555234866, "learning_rate": 1.2367644204664468e-05, "loss": 0.4794158637523651, "step": 3333 }, { "epoch": 0.9059782608695652, "grad_norm": 0.9977659841075935, "learning_rate": 1.2363278593922161e-05, "loss": 0.41547495126724243, "step": 3334 }, { "epoch": 0.90625, "grad_norm": 1.1073436353476622, "learning_rate": 1.2358912506077214e-05, "loss": 0.48491236567497253, "step": 3335 }, { "epoch": 0.9065217391304348, "grad_norm": 1.1638483670548143, "learning_rate": 1.2354545942011058e-05, "loss": 0.48564621806144714, "step": 3336 }, { "epoch": 0.9067934782608695, "grad_norm": 1.2716257153956294, "learning_rate": 1.2350178902605228e-05, "loss": 0.5360954999923706, "step": 3337 }, { "epoch": 0.9070652173913043, "grad_norm": 0.9964078883865671, "learning_rate": 1.234581138874134e-05, "loss": 0.4892836809158325, "step": 3338 }, { "epoch": 0.9073369565217392, "grad_norm": 0.865554620745503, "learning_rate": 1.2341443401301123e-05, "loss": 0.32444074749946594, "step": 3339 }, { "epoch": 0.907608695652174, "grad_norm": 1.2586034096298717, "learning_rate": 1.2337074941166392e-05, "loss": 0.5847055912017822, "step": 3340 }, { "epoch": 0.9078804347826087, "grad_norm": 0.962799229567299, "learning_rate": 1.2332706009219055e-05, "loss": 0.41933923959732056, "step": 3341 }, { "epoch": 0.9081521739130435, "grad_norm": 0.9303695118077486, "learning_rate": 1.2328336606341124e-05, "loss": 0.42747214436531067, "step": 3342 }, { "epoch": 0.9084239130434782, "grad_norm": 1.0812680683252034, "learning_rate": 1.2323966733414698e-05, "loss": 0.5310288667678833, "step": 3343 }, { "epoch": 0.908695652173913, "grad_norm": 0.9835762514050163, "learning_rate": 1.2319596391321977e-05, "loss": 0.4187748432159424, "step": 3344 }, { "epoch": 0.9089673913043478, "grad_norm": 1.127450513396706, "learning_rate": 1.2315225580945252e-05, "loss": 0.4998663067817688, "step": 3345 }, { "epoch": 0.9092391304347827, "grad_norm": 1.1257052440395396, "learning_rate": 1.2310854303166909e-05, "loss": 0.5457580089569092, "step": 3346 }, { "epoch": 0.9095108695652174, "grad_norm": 1.169762429466756, "learning_rate": 1.230648255886943e-05, "loss": 0.5280800461769104, "step": 3347 }, { "epoch": 0.9097826086956522, "grad_norm": 1.2418224176553807, "learning_rate": 1.230211034893539e-05, "loss": 0.5474176406860352, "step": 3348 }, { "epoch": 0.910054347826087, "grad_norm": 0.9507345789533959, "learning_rate": 1.2297737674247454e-05, "loss": 0.4142618179321289, "step": 3349 }, { "epoch": 0.9103260869565217, "grad_norm": 1.1626760060313936, "learning_rate": 1.2293364535688391e-05, "loss": 0.47138267755508423, "step": 3350 }, { "epoch": 0.9105978260869565, "grad_norm": 0.8563154151778581, "learning_rate": 1.2288990934141051e-05, "loss": 0.3687793016433716, "step": 3351 }, { "epoch": 0.9108695652173913, "grad_norm": 1.0267220960033483, "learning_rate": 1.228461687048839e-05, "loss": 0.44568678736686707, "step": 3352 }, { "epoch": 0.9111413043478261, "grad_norm": 1.0771896872879367, "learning_rate": 1.228024234561345e-05, "loss": 0.41343188285827637, "step": 3353 }, { "epoch": 0.9114130434782609, "grad_norm": 1.208545383884564, "learning_rate": 1.2275867360399367e-05, "loss": 0.519913911819458, "step": 3354 }, { "epoch": 0.9116847826086957, "grad_norm": 1.0615219118930157, "learning_rate": 1.227149191572937e-05, "loss": 0.4736397862434387, "step": 3355 }, { "epoch": 0.9119565217391304, "grad_norm": 1.0853558567019121, "learning_rate": 1.2267116012486779e-05, "loss": 0.5194226503372192, "step": 3356 }, { "epoch": 0.9122282608695652, "grad_norm": 1.0414880676247469, "learning_rate": 1.2262739651555011e-05, "loss": 0.4392288327217102, "step": 3357 }, { "epoch": 0.9125, "grad_norm": 1.2092316770365, "learning_rate": 1.2258362833817574e-05, "loss": 0.5311858654022217, "step": 3358 }, { "epoch": 0.9127717391304347, "grad_norm": 0.9077758490602154, "learning_rate": 1.2253985560158064e-05, "loss": 0.40498417615890503, "step": 3359 }, { "epoch": 0.9130434782608695, "grad_norm": 1.004142484602884, "learning_rate": 1.2249607831460177e-05, "loss": 0.4033290147781372, "step": 3360 }, { "epoch": 0.9133152173913044, "grad_norm": 0.929711926424687, "learning_rate": 1.224522964860769e-05, "loss": 0.43382495641708374, "step": 3361 }, { "epoch": 0.9135869565217392, "grad_norm": 1.069211525363726, "learning_rate": 1.2240851012484482e-05, "loss": 0.447823166847229, "step": 3362 }, { "epoch": 0.9138586956521739, "grad_norm": 1.0082970186144566, "learning_rate": 1.223647192397452e-05, "loss": 0.42403990030288696, "step": 3363 }, { "epoch": 0.9141304347826087, "grad_norm": 0.9754555453146322, "learning_rate": 1.2232092383961862e-05, "loss": 0.43810057640075684, "step": 3364 }, { "epoch": 0.9144021739130435, "grad_norm": 0.8872065747339872, "learning_rate": 1.2227712393330653e-05, "loss": 0.3855000138282776, "step": 3365 }, { "epoch": 0.9146739130434782, "grad_norm": 1.100532024901279, "learning_rate": 1.2223331952965136e-05, "loss": 0.5338557958602905, "step": 3366 }, { "epoch": 0.914945652173913, "grad_norm": 1.2991797125196174, "learning_rate": 1.221895106374964e-05, "loss": 0.4941261410713196, "step": 3367 }, { "epoch": 0.9152173913043479, "grad_norm": 0.9317914365404851, "learning_rate": 1.2214569726568587e-05, "loss": 0.42957359552383423, "step": 3368 }, { "epoch": 0.9154891304347826, "grad_norm": 1.0637799114726487, "learning_rate": 1.2210187942306491e-05, "loss": 0.5396064519882202, "step": 3369 }, { "epoch": 0.9157608695652174, "grad_norm": 0.9643728010782223, "learning_rate": 1.2205805711847949e-05, "loss": 0.39667460322380066, "step": 3370 }, { "epoch": 0.9160326086956522, "grad_norm": 1.0537638314580093, "learning_rate": 1.2201423036077657e-05, "loss": 0.43179646134376526, "step": 3371 }, { "epoch": 0.9163043478260869, "grad_norm": 1.1597228623043139, "learning_rate": 1.2197039915880395e-05, "loss": 0.5608896017074585, "step": 3372 }, { "epoch": 0.9165760869565217, "grad_norm": 1.069341662279499, "learning_rate": 1.219265635214104e-05, "loss": 0.5034765601158142, "step": 3373 }, { "epoch": 0.9168478260869565, "grad_norm": 1.080231301158431, "learning_rate": 1.2188272345744543e-05, "loss": 0.4638749659061432, "step": 3374 }, { "epoch": 0.9171195652173914, "grad_norm": 1.061054855959147, "learning_rate": 1.2183887897575959e-05, "loss": 0.4868876338005066, "step": 3375 }, { "epoch": 0.9173913043478261, "grad_norm": 1.0239112409230362, "learning_rate": 1.2179503008520432e-05, "loss": 0.5028243660926819, "step": 3376 }, { "epoch": 0.9176630434782609, "grad_norm": 1.0546660713110307, "learning_rate": 1.2175117679463187e-05, "loss": 0.4388684630393982, "step": 3377 }, { "epoch": 0.9179347826086957, "grad_norm": 0.9625965670661801, "learning_rate": 1.2170731911289541e-05, "loss": 0.4770496189594269, "step": 3378 }, { "epoch": 0.9182065217391304, "grad_norm": 1.1158103972997164, "learning_rate": 1.2166345704884903e-05, "loss": 0.5126324892044067, "step": 3379 }, { "epoch": 0.9184782608695652, "grad_norm": 1.0246310828951446, "learning_rate": 1.2161959061134765e-05, "loss": 0.4417303502559662, "step": 3380 }, { "epoch": 0.91875, "grad_norm": 0.8957451221257378, "learning_rate": 1.2157571980924713e-05, "loss": 0.31307435035705566, "step": 3381 }, { "epoch": 0.9190217391304348, "grad_norm": 1.11754874201008, "learning_rate": 1.2153184465140413e-05, "loss": 0.40855294466018677, "step": 3382 }, { "epoch": 0.9192934782608696, "grad_norm": 0.9895074602903056, "learning_rate": 1.2148796514667629e-05, "loss": 0.4772789180278778, "step": 3383 }, { "epoch": 0.9195652173913044, "grad_norm": 1.0293746329625013, "learning_rate": 1.2144408130392207e-05, "loss": 0.5794593691825867, "step": 3384 }, { "epoch": 0.9198369565217391, "grad_norm": 1.183328633267897, "learning_rate": 1.2140019313200078e-05, "loss": 0.48734137415885925, "step": 3385 }, { "epoch": 0.9201086956521739, "grad_norm": 1.0128761527917272, "learning_rate": 1.2135630063977266e-05, "loss": 0.40567201375961304, "step": 3386 }, { "epoch": 0.9203804347826087, "grad_norm": 0.9651462074990966, "learning_rate": 1.2131240383609881e-05, "loss": 0.40456056594848633, "step": 3387 }, { "epoch": 0.9206521739130434, "grad_norm": 0.8816734839109633, "learning_rate": 1.2126850272984118e-05, "loss": 0.3730168044567108, "step": 3388 }, { "epoch": 0.9209239130434783, "grad_norm": 0.9768909453102288, "learning_rate": 1.212245973298626e-05, "loss": 0.49069973826408386, "step": 3389 }, { "epoch": 0.9211956521739131, "grad_norm": 1.0015495960436456, "learning_rate": 1.2118068764502677e-05, "loss": 0.437478631734848, "step": 3390 }, { "epoch": 0.9214673913043478, "grad_norm": 0.835623456103697, "learning_rate": 1.2113677368419827e-05, "loss": 0.3612080216407776, "step": 3391 }, { "epoch": 0.9217391304347826, "grad_norm": 0.8314113582931579, "learning_rate": 1.2109285545624245e-05, "loss": 0.2928207516670227, "step": 3392 }, { "epoch": 0.9220108695652174, "grad_norm": 1.1212012468922345, "learning_rate": 1.2104893297002566e-05, "loss": 0.48369988799095154, "step": 3393 }, { "epoch": 0.9222826086956522, "grad_norm": 1.1569996753573755, "learning_rate": 1.2100500623441505e-05, "loss": 0.47995525598526, "step": 3394 }, { "epoch": 0.9225543478260869, "grad_norm": 1.0321115650935013, "learning_rate": 1.2096107525827859e-05, "loss": 0.45113512873649597, "step": 3395 }, { "epoch": 0.9228260869565217, "grad_norm": 1.1204567147007913, "learning_rate": 1.2091714005048516e-05, "loss": 0.5130646228790283, "step": 3396 }, { "epoch": 0.9230978260869566, "grad_norm": 0.8201129519054781, "learning_rate": 1.2087320061990443e-05, "loss": 0.3522183299064636, "step": 3397 }, { "epoch": 0.9233695652173913, "grad_norm": 0.8802801062466651, "learning_rate": 1.2082925697540705e-05, "loss": 0.467116117477417, "step": 3398 }, { "epoch": 0.9236413043478261, "grad_norm": 0.9243249824385622, "learning_rate": 1.207853091258644e-05, "loss": 0.3780699372291565, "step": 3399 }, { "epoch": 0.9239130434782609, "grad_norm": 1.0839921475828405, "learning_rate": 1.2074135708014868e-05, "loss": 0.5307721495628357, "step": 3400 }, { "epoch": 0.9241847826086956, "grad_norm": 0.8670448949401776, "learning_rate": 1.2069740084713307e-05, "loss": 0.40881675481796265, "step": 3401 }, { "epoch": 0.9244565217391304, "grad_norm": 0.9901646789996178, "learning_rate": 1.2065344043569154e-05, "loss": 0.4756937026977539, "step": 3402 }, { "epoch": 0.9247282608695652, "grad_norm": 1.059379476424103, "learning_rate": 1.2060947585469885e-05, "loss": 0.49896061420440674, "step": 3403 }, { "epoch": 0.925, "grad_norm": 1.179137507076081, "learning_rate": 1.2056550711303065e-05, "loss": 0.47651392221450806, "step": 3404 }, { "epoch": 0.9252717391304348, "grad_norm": 1.1898174309633447, "learning_rate": 1.2052153421956343e-05, "loss": 0.5595986843109131, "step": 3405 }, { "epoch": 0.9255434782608696, "grad_norm": 0.9742005748273915, "learning_rate": 1.2047755718317453e-05, "loss": 0.4307882785797119, "step": 3406 }, { "epoch": 0.9258152173913043, "grad_norm": 0.9003756800453611, "learning_rate": 1.204335760127421e-05, "loss": 0.4307297468185425, "step": 3407 }, { "epoch": 0.9260869565217391, "grad_norm": 1.0007474649802004, "learning_rate": 1.2038959071714511e-05, "loss": 0.401088148355484, "step": 3408 }, { "epoch": 0.9263586956521739, "grad_norm": 1.0479752244859357, "learning_rate": 1.2034560130526341e-05, "loss": 0.43383607268333435, "step": 3409 }, { "epoch": 0.9266304347826086, "grad_norm": 1.165728452916549, "learning_rate": 1.2030160778597762e-05, "loss": 0.5059629678726196, "step": 3410 }, { "epoch": 0.9269021739130435, "grad_norm": 0.9666073566003389, "learning_rate": 1.2025761016816925e-05, "loss": 0.4249190092086792, "step": 3411 }, { "epoch": 0.9271739130434783, "grad_norm": 1.100633706151084, "learning_rate": 1.2021360846072063e-05, "loss": 0.49613311886787415, "step": 3412 }, { "epoch": 0.9274456521739131, "grad_norm": 1.0231805690719014, "learning_rate": 1.2016960267251488e-05, "loss": 0.4905465245246887, "step": 3413 }, { "epoch": 0.9277173913043478, "grad_norm": 1.123890964861878, "learning_rate": 1.2012559281243597e-05, "loss": 0.5516200065612793, "step": 3414 }, { "epoch": 0.9279891304347826, "grad_norm": 0.9360207202032961, "learning_rate": 1.2008157888936867e-05, "loss": 0.3933059573173523, "step": 3415 }, { "epoch": 0.9282608695652174, "grad_norm": 1.045661744631118, "learning_rate": 1.200375609121986e-05, "loss": 0.46076303720474243, "step": 3416 }, { "epoch": 0.9285326086956521, "grad_norm": 1.0641362024580354, "learning_rate": 1.1999353888981218e-05, "loss": 0.44039595127105713, "step": 3417 }, { "epoch": 0.928804347826087, "grad_norm": 0.9397221167744441, "learning_rate": 1.1994951283109662e-05, "loss": 0.40611881017684937, "step": 3418 }, { "epoch": 0.9290760869565218, "grad_norm": 1.2612192666024082, "learning_rate": 1.1990548274494e-05, "loss": 0.5811107158660889, "step": 3419 }, { "epoch": 0.9293478260869565, "grad_norm": 1.1662704286080745, "learning_rate": 1.198614486402312e-05, "loss": 0.5002867579460144, "step": 3420 }, { "epoch": 0.9296195652173913, "grad_norm": 1.1152898676235659, "learning_rate": 1.1981741052585987e-05, "loss": 0.5614016056060791, "step": 3421 }, { "epoch": 0.9298913043478261, "grad_norm": 0.977349996497448, "learning_rate": 1.1977336841071655e-05, "loss": 0.490667462348938, "step": 3422 }, { "epoch": 0.9301630434782608, "grad_norm": 1.0410995386434898, "learning_rate": 1.1972932230369247e-05, "loss": 0.43259066343307495, "step": 3423 }, { "epoch": 0.9304347826086956, "grad_norm": 1.0255028856876975, "learning_rate": 1.1968527221367978e-05, "loss": 0.40533265471458435, "step": 3424 }, { "epoch": 0.9307065217391305, "grad_norm": 1.0568790994471382, "learning_rate": 1.1964121814957136e-05, "loss": 0.4781210720539093, "step": 3425 }, { "epoch": 0.9309782608695653, "grad_norm": 1.1881613481009525, "learning_rate": 1.1959716012026093e-05, "loss": 0.4893760681152344, "step": 3426 }, { "epoch": 0.93125, "grad_norm": 1.0949341985251428, "learning_rate": 1.19553098134643e-05, "loss": 0.4439198970794678, "step": 3427 }, { "epoch": 0.9315217391304348, "grad_norm": 1.0547173070801334, "learning_rate": 1.1950903220161286e-05, "loss": 0.45887356996536255, "step": 3428 }, { "epoch": 0.9317934782608696, "grad_norm": 1.0360924306161465, "learning_rate": 1.1946496233006659e-05, "loss": 0.49769946932792664, "step": 3429 }, { "epoch": 0.9320652173913043, "grad_norm": 1.089671837053031, "learning_rate": 1.1942088852890118e-05, "loss": 0.45108962059020996, "step": 3430 }, { "epoch": 0.9323369565217391, "grad_norm": 1.066777431841031, "learning_rate": 1.1937681080701424e-05, "loss": 0.4531171917915344, "step": 3431 }, { "epoch": 0.9326086956521739, "grad_norm": 0.999197992647904, "learning_rate": 1.1933272917330432e-05, "loss": 0.457699716091156, "step": 3432 }, { "epoch": 0.9328804347826087, "grad_norm": 0.9760618329555952, "learning_rate": 1.1928864363667064e-05, "loss": 0.3771648406982422, "step": 3433 }, { "epoch": 0.9331521739130435, "grad_norm": 1.2168592980662636, "learning_rate": 1.1924455420601326e-05, "loss": 0.5457174777984619, "step": 3434 }, { "epoch": 0.9334239130434783, "grad_norm": 1.106581763027436, "learning_rate": 1.1920046089023307e-05, "loss": 0.5235923528671265, "step": 3435 }, { "epoch": 0.933695652173913, "grad_norm": 1.0337552822189893, "learning_rate": 1.1915636369823167e-05, "loss": 0.43715140223503113, "step": 3436 }, { "epoch": 0.9339673913043478, "grad_norm": 1.0041986241843528, "learning_rate": 1.191122626389115e-05, "loss": 0.44261837005615234, "step": 3437 }, { "epoch": 0.9342391304347826, "grad_norm": 1.0199763220900415, "learning_rate": 1.190681577211757e-05, "loss": 0.3933965563774109, "step": 3438 }, { "epoch": 0.9345108695652173, "grad_norm": 1.1482599265617408, "learning_rate": 1.1902404895392829e-05, "loss": 0.5183990001678467, "step": 3439 }, { "epoch": 0.9347826086956522, "grad_norm": 1.0512931819597002, "learning_rate": 1.1897993634607405e-05, "loss": 0.45236265659332275, "step": 3440 }, { "epoch": 0.935054347826087, "grad_norm": 1.2403488182366809, "learning_rate": 1.1893581990651848e-05, "loss": 0.5646060705184937, "step": 3441 }, { "epoch": 0.9353260869565218, "grad_norm": 1.050939275365576, "learning_rate": 1.1889169964416785e-05, "loss": 0.39926883578300476, "step": 3442 }, { "epoch": 0.9355978260869565, "grad_norm": 1.0745408411101347, "learning_rate": 1.1884757556792928e-05, "loss": 0.48874765634536743, "step": 3443 }, { "epoch": 0.9358695652173913, "grad_norm": 1.105205001734911, "learning_rate": 1.1880344768671055e-05, "loss": 0.4911876916885376, "step": 3444 }, { "epoch": 0.936141304347826, "grad_norm": 1.058718641953386, "learning_rate": 1.1875931600942035e-05, "loss": 0.40755608677864075, "step": 3445 }, { "epoch": 0.9364130434782608, "grad_norm": 1.1208388525653779, "learning_rate": 1.1871518054496798e-05, "loss": 0.525856614112854, "step": 3446 }, { "epoch": 0.9366847826086957, "grad_norm": 1.0302737482131394, "learning_rate": 1.1867104130226363e-05, "loss": 0.4582520127296448, "step": 3447 }, { "epoch": 0.9369565217391305, "grad_norm": 1.1296090013622926, "learning_rate": 1.186268982902182e-05, "loss": 0.4874449670314789, "step": 3448 }, { "epoch": 0.9372282608695652, "grad_norm": 1.100878582086465, "learning_rate": 1.1858275151774334e-05, "loss": 0.46260547637939453, "step": 3449 }, { "epoch": 0.9375, "grad_norm": 1.1326057505435065, "learning_rate": 1.1853860099375151e-05, "loss": 0.39343512058258057, "step": 3450 }, { "epoch": 0.9377717391304348, "grad_norm": 1.0637546370198472, "learning_rate": 1.1849444672715587e-05, "loss": 0.5416452288627625, "step": 3451 }, { "epoch": 0.9380434782608695, "grad_norm": 1.0145258609747945, "learning_rate": 1.184502887268703e-05, "loss": 0.4656045138835907, "step": 3452 }, { "epoch": 0.9383152173913043, "grad_norm": 1.103011223472387, "learning_rate": 1.1840612700180957e-05, "loss": 0.5038444399833679, "step": 3453 }, { "epoch": 0.9385869565217392, "grad_norm": 1.1156630091102957, "learning_rate": 1.183619615608891e-05, "loss": 0.4408279061317444, "step": 3454 }, { "epoch": 0.938858695652174, "grad_norm": 1.0932927580175449, "learning_rate": 1.1831779241302507e-05, "loss": 0.47081902623176575, "step": 3455 }, { "epoch": 0.9391304347826087, "grad_norm": 1.0095429663314448, "learning_rate": 1.1827361956713444e-05, "loss": 0.4528087377548218, "step": 3456 }, { "epoch": 0.9394021739130435, "grad_norm": 1.0877213774537482, "learning_rate": 1.1822944303213486e-05, "loss": 0.5408624410629272, "step": 3457 }, { "epoch": 0.9396739130434782, "grad_norm": 1.1465307889374043, "learning_rate": 1.1818526281694484e-05, "loss": 0.5464935898780823, "step": 3458 }, { "epoch": 0.939945652173913, "grad_norm": 1.0055566381610652, "learning_rate": 1.1814107893048348e-05, "loss": 0.38237500190734863, "step": 3459 }, { "epoch": 0.9402173913043478, "grad_norm": 1.1317471533658265, "learning_rate": 1.180968913816707e-05, "loss": 0.5429584980010986, "step": 3460 }, { "epoch": 0.9404891304347827, "grad_norm": 1.0156955132444268, "learning_rate": 1.1805270017942723e-05, "loss": 0.45753514766693115, "step": 3461 }, { "epoch": 0.9407608695652174, "grad_norm": 1.1583928682611784, "learning_rate": 1.1800850533267436e-05, "loss": 0.556373655796051, "step": 3462 }, { "epoch": 0.9410326086956522, "grad_norm": 1.0563946077500204, "learning_rate": 1.1796430685033434e-05, "loss": 0.46755027770996094, "step": 3463 }, { "epoch": 0.941304347826087, "grad_norm": 1.2003675290269105, "learning_rate": 1.1792010474132991e-05, "loss": 0.5838000178337097, "step": 3464 }, { "epoch": 0.9415760869565217, "grad_norm": 0.8664246521038401, "learning_rate": 1.1787589901458474e-05, "loss": 0.38911163806915283, "step": 3465 }, { "epoch": 0.9418478260869565, "grad_norm": 0.9504814917724829, "learning_rate": 1.1783168967902314e-05, "loss": 0.42557597160339355, "step": 3466 }, { "epoch": 0.9421195652173913, "grad_norm": 1.09657246773837, "learning_rate": 1.1778747674357019e-05, "loss": 0.5808545351028442, "step": 3467 }, { "epoch": 0.9423913043478261, "grad_norm": 1.1315800529757702, "learning_rate": 1.177432602171516e-05, "loss": 0.4815272092819214, "step": 3468 }, { "epoch": 0.9426630434782609, "grad_norm": 1.0228326473456608, "learning_rate": 1.1769904010869396e-05, "loss": 0.4711100459098816, "step": 3469 }, { "epoch": 0.9429347826086957, "grad_norm": 1.1763594653290568, "learning_rate": 1.1765481642712441e-05, "loss": 0.5904314517974854, "step": 3470 }, { "epoch": 0.9432065217391304, "grad_norm": 0.9446886761915965, "learning_rate": 1.1761058918137103e-05, "loss": 0.4233047068119049, "step": 3471 }, { "epoch": 0.9434782608695652, "grad_norm": 0.9421879795099749, "learning_rate": 1.1756635838036235e-05, "loss": 0.37747544050216675, "step": 3472 }, { "epoch": 0.94375, "grad_norm": 1.0470064299267021, "learning_rate": 1.1752212403302785e-05, "loss": 0.33086878061294556, "step": 3473 }, { "epoch": 0.9440217391304347, "grad_norm": 1.0695686858259517, "learning_rate": 1.1747788614829758e-05, "loss": 0.4340594410896301, "step": 3474 }, { "epoch": 0.9442934782608695, "grad_norm": 1.1820404458616751, "learning_rate": 1.1743364473510239e-05, "loss": 0.5642147660255432, "step": 3475 }, { "epoch": 0.9445652173913044, "grad_norm": 1.015964468643449, "learning_rate": 1.1738939980237385e-05, "loss": 0.37721049785614014, "step": 3476 }, { "epoch": 0.9448369565217392, "grad_norm": 1.0971896534556946, "learning_rate": 1.1734515135904412e-05, "loss": 0.4533542990684509, "step": 3477 }, { "epoch": 0.9451086956521739, "grad_norm": 1.3978403787914184, "learning_rate": 1.1730089941404616e-05, "loss": 0.5062353014945984, "step": 3478 }, { "epoch": 0.9453804347826087, "grad_norm": 0.968618517060519, "learning_rate": 1.1725664397631369e-05, "loss": 0.35293206572532654, "step": 3479 }, { "epoch": 0.9456521739130435, "grad_norm": 1.1286694627210225, "learning_rate": 1.17212385054781e-05, "loss": 0.5722038745880127, "step": 3480 }, { "epoch": 0.9459239130434782, "grad_norm": 1.051044048599072, "learning_rate": 1.1716812265838325e-05, "loss": 0.4776521325111389, "step": 3481 }, { "epoch": 0.946195652173913, "grad_norm": 0.9795347588514564, "learning_rate": 1.1712385679605608e-05, "loss": 0.36870646476745605, "step": 3482 }, { "epoch": 0.9464673913043479, "grad_norm": 1.213577522053917, "learning_rate": 1.1707958747673605e-05, "loss": 0.4662421941757202, "step": 3483 }, { "epoch": 0.9467391304347826, "grad_norm": 2.159514448563424, "learning_rate": 1.1703531470936031e-05, "loss": 0.4960644245147705, "step": 3484 }, { "epoch": 0.9470108695652174, "grad_norm": 1.131032158317174, "learning_rate": 1.1699103850286668e-05, "loss": 0.47791314125061035, "step": 3485 }, { "epoch": 0.9472826086956522, "grad_norm": 1.2182137414172407, "learning_rate": 1.1694675886619378e-05, "loss": 0.47218984365463257, "step": 3486 }, { "epoch": 0.9475543478260869, "grad_norm": 1.0884228423335736, "learning_rate": 1.1690247580828075e-05, "loss": 0.48999491333961487, "step": 3487 }, { "epoch": 0.9478260869565217, "grad_norm": 1.0113790975699153, "learning_rate": 1.1685818933806765e-05, "loss": 0.38960427045822144, "step": 3488 }, { "epoch": 0.9480978260869565, "grad_norm": 1.0333565692875561, "learning_rate": 1.1681389946449504e-05, "loss": 0.46272850036621094, "step": 3489 }, { "epoch": 0.9483695652173914, "grad_norm": 1.077160544497604, "learning_rate": 1.1676960619650424e-05, "loss": 0.49645087122917175, "step": 3490 }, { "epoch": 0.9486413043478261, "grad_norm": 0.9930025800257783, "learning_rate": 1.1672530954303726e-05, "loss": 0.4294201731681824, "step": 3491 }, { "epoch": 0.9489130434782609, "grad_norm": 1.0362160859203122, "learning_rate": 1.166810095130368e-05, "loss": 0.4513568878173828, "step": 3492 }, { "epoch": 0.9491847826086957, "grad_norm": 1.008863678489594, "learning_rate": 1.166367061154462e-05, "loss": 0.40005144476890564, "step": 3493 }, { "epoch": 0.9494565217391304, "grad_norm": 0.9986979644733556, "learning_rate": 1.1659239935920949e-05, "loss": 0.39533233642578125, "step": 3494 }, { "epoch": 0.9497282608695652, "grad_norm": 0.833125919111314, "learning_rate": 1.1654808925327141e-05, "loss": 0.31591230630874634, "step": 3495 }, { "epoch": 0.95, "grad_norm": 1.08772231732823, "learning_rate": 1.1650377580657734e-05, "loss": 0.4602251648902893, "step": 3496 }, { "epoch": 0.9502717391304348, "grad_norm": 0.9244230114066971, "learning_rate": 1.164594590280734e-05, "loss": 0.374440997838974, "step": 3497 }, { "epoch": 0.9505434782608696, "grad_norm": 1.0251115092176077, "learning_rate": 1.1641513892670628e-05, "loss": 0.4306075870990753, "step": 3498 }, { "epoch": 0.9508152173913044, "grad_norm": 0.9933922937584247, "learning_rate": 1.1637081551142347e-05, "loss": 0.41384297609329224, "step": 3499 }, { "epoch": 0.9510869565217391, "grad_norm": 1.0920677860521413, "learning_rate": 1.1632648879117296e-05, "loss": 0.5387758612632751, "step": 3500 }, { "epoch": 0.9513586956521739, "grad_norm": 1.0792085070545752, "learning_rate": 1.1628215877490359e-05, "loss": 0.4923211336135864, "step": 3501 }, { "epoch": 0.9516304347826087, "grad_norm": 1.1531439999083517, "learning_rate": 1.1623782547156473e-05, "loss": 0.4598643183708191, "step": 3502 }, { "epoch": 0.9519021739130434, "grad_norm": 1.1026662912258185, "learning_rate": 1.1619348889010643e-05, "loss": 0.49793413281440735, "step": 3503 }, { "epoch": 0.9521739130434783, "grad_norm": 1.1604592954621973, "learning_rate": 1.1614914903947952e-05, "loss": 0.5524051189422607, "step": 3504 }, { "epoch": 0.9524456521739131, "grad_norm": 0.9682759860545852, "learning_rate": 1.161048059286353e-05, "loss": 0.4612606167793274, "step": 3505 }, { "epoch": 0.9527173913043478, "grad_norm": 0.9348041555861307, "learning_rate": 1.1606045956652592e-05, "loss": 0.3859213590621948, "step": 3506 }, { "epoch": 0.9529891304347826, "grad_norm": 0.9447876556706254, "learning_rate": 1.1601610996210408e-05, "loss": 0.45892107486724854, "step": 3507 }, { "epoch": 0.9532608695652174, "grad_norm": 1.0141734936604967, "learning_rate": 1.1597175712432312e-05, "loss": 0.4412561058998108, "step": 3508 }, { "epoch": 0.9535326086956522, "grad_norm": 1.1644342521967708, "learning_rate": 1.1592740106213711e-05, "loss": 0.5296580195426941, "step": 3509 }, { "epoch": 0.9538043478260869, "grad_norm": 1.1265724573263256, "learning_rate": 1.1588304178450069e-05, "loss": 0.49591439962387085, "step": 3510 }, { "epoch": 0.9540760869565217, "grad_norm": 0.88389669226581, "learning_rate": 1.1583867930036917e-05, "loss": 0.389641672372818, "step": 3511 }, { "epoch": 0.9543478260869566, "grad_norm": 1.1126324240897383, "learning_rate": 1.1579431361869857e-05, "loss": 0.42913925647735596, "step": 3512 }, { "epoch": 0.9546195652173913, "grad_norm": 1.1045453968483938, "learning_rate": 1.1574994474844545e-05, "loss": 0.5677263736724854, "step": 3513 }, { "epoch": 0.9548913043478261, "grad_norm": 1.151281999481685, "learning_rate": 1.1570557269856714e-05, "loss": 0.5427863597869873, "step": 3514 }, { "epoch": 0.9551630434782609, "grad_norm": 1.0236303300706755, "learning_rate": 1.1566119747802151e-05, "loss": 0.4440503716468811, "step": 3515 }, { "epoch": 0.9554347826086956, "grad_norm": 1.0710081862492367, "learning_rate": 1.156168190957671e-05, "loss": 0.43923473358154297, "step": 3516 }, { "epoch": 0.9557065217391304, "grad_norm": 1.1163886022523561, "learning_rate": 1.1557243756076311e-05, "loss": 0.46560174226760864, "step": 3517 }, { "epoch": 0.9559782608695652, "grad_norm": 1.1623429548401212, "learning_rate": 1.1552805288196934e-05, "loss": 0.5807333588600159, "step": 3518 }, { "epoch": 0.95625, "grad_norm": 1.0592037756084556, "learning_rate": 1.1548366506834624e-05, "loss": 0.3845776319503784, "step": 3519 }, { "epoch": 0.9565217391304348, "grad_norm": 1.0040443701281212, "learning_rate": 1.1543927412885489e-05, "loss": 0.4542279839515686, "step": 3520 }, { "epoch": 0.9567934782608696, "grad_norm": 0.9788452350672346, "learning_rate": 1.1539488007245704e-05, "loss": 0.41606277227401733, "step": 3521 }, { "epoch": 0.9570652173913043, "grad_norm": 1.1510847244883773, "learning_rate": 1.1535048290811502e-05, "loss": 0.49013403058052063, "step": 3522 }, { "epoch": 0.9573369565217391, "grad_norm": 0.9822982399263673, "learning_rate": 1.153060826447918e-05, "loss": 0.41072162985801697, "step": 3523 }, { "epoch": 0.9576086956521739, "grad_norm": 1.0440019770070603, "learning_rate": 1.1526167929145095e-05, "loss": 0.4436478614807129, "step": 3524 }, { "epoch": 0.9578804347826086, "grad_norm": 1.0549614684821589, "learning_rate": 1.1521727285705677e-05, "loss": 0.4446898400783539, "step": 3525 }, { "epoch": 0.9581521739130435, "grad_norm": 1.0234025585756967, "learning_rate": 1.1517286335057403e-05, "loss": 0.41696465015411377, "step": 3526 }, { "epoch": 0.9584239130434783, "grad_norm": 1.0127186729794146, "learning_rate": 1.1512845078096826e-05, "loss": 0.47872769832611084, "step": 3527 }, { "epoch": 0.9586956521739131, "grad_norm": 1.030217271292442, "learning_rate": 1.150840351572055e-05, "loss": 0.415010929107666, "step": 3528 }, { "epoch": 0.9589673913043478, "grad_norm": 0.9153451274074722, "learning_rate": 1.1503961648825244e-05, "loss": 0.37879231572151184, "step": 3529 }, { "epoch": 0.9592391304347826, "grad_norm": 0.9895888202148915, "learning_rate": 1.1499519478307644e-05, "loss": 0.40711766481399536, "step": 3530 }, { "epoch": 0.9595108695652174, "grad_norm": 0.8817520619825892, "learning_rate": 1.1495077005064539e-05, "loss": 0.33209192752838135, "step": 3531 }, { "epoch": 0.9597826086956521, "grad_norm": 1.311878128064791, "learning_rate": 1.1490634229992784e-05, "loss": 0.5904517769813538, "step": 3532 }, { "epoch": 0.960054347826087, "grad_norm": 1.1111324273489387, "learning_rate": 1.1486191153989294e-05, "loss": 0.5178158283233643, "step": 3533 }, { "epoch": 0.9603260869565218, "grad_norm": 1.0259626627477787, "learning_rate": 1.1481747777951047e-05, "loss": 0.44982707500457764, "step": 3534 }, { "epoch": 0.9605978260869565, "grad_norm": 0.9568538652348095, "learning_rate": 1.1477304102775075e-05, "loss": 0.4347735047340393, "step": 3535 }, { "epoch": 0.9608695652173913, "grad_norm": 1.0783242417228813, "learning_rate": 1.1472860129358479e-05, "loss": 0.5450208187103271, "step": 3536 }, { "epoch": 0.9611413043478261, "grad_norm": 1.0976011152050555, "learning_rate": 1.1468415858598413e-05, "loss": 0.4546690583229065, "step": 3537 }, { "epoch": 0.9614130434782608, "grad_norm": 0.9991154033557805, "learning_rate": 1.1463971291392093e-05, "loss": 0.4442058205604553, "step": 3538 }, { "epoch": 0.9616847826086956, "grad_norm": 1.0696973934495302, "learning_rate": 1.1459526428636795e-05, "loss": 0.4802817702293396, "step": 3539 }, { "epoch": 0.9619565217391305, "grad_norm": 1.076147316179051, "learning_rate": 1.145508127122986e-05, "loss": 0.42907506227493286, "step": 3540 }, { "epoch": 0.9622282608695653, "grad_norm": 1.1584143729317895, "learning_rate": 1.145063582006868e-05, "loss": 0.5575184226036072, "step": 3541 }, { "epoch": 0.9625, "grad_norm": 1.1032456144638287, "learning_rate": 1.144619007605071e-05, "loss": 0.5330278873443604, "step": 3542 }, { "epoch": 0.9627717391304348, "grad_norm": 1.0292933331035614, "learning_rate": 1.1441744040073469e-05, "loss": 0.46564242243766785, "step": 3543 }, { "epoch": 0.9630434782608696, "grad_norm": 1.0455253217942555, "learning_rate": 1.1437297713034525e-05, "loss": 0.4788096845149994, "step": 3544 }, { "epoch": 0.9633152173913043, "grad_norm": 1.0308750043784236, "learning_rate": 1.1432851095831511e-05, "loss": 0.5003594756126404, "step": 3545 }, { "epoch": 0.9635869565217391, "grad_norm": 1.090961609777767, "learning_rate": 1.142840418936212e-05, "loss": 0.4649047553539276, "step": 3546 }, { "epoch": 0.9638586956521739, "grad_norm": 0.9788948650010901, "learning_rate": 1.1423956994524095e-05, "loss": 0.3593553900718689, "step": 3547 }, { "epoch": 0.9641304347826087, "grad_norm": 1.1111813555097332, "learning_rate": 1.1419509512215253e-05, "loss": 0.4570712447166443, "step": 3548 }, { "epoch": 0.9644021739130435, "grad_norm": 1.1284005015423135, "learning_rate": 1.1415061743333446e-05, "loss": 0.5131372809410095, "step": 3549 }, { "epoch": 0.9646739130434783, "grad_norm": 0.9268637539196989, "learning_rate": 1.141061368877661e-05, "loss": 0.4512594938278198, "step": 3550 }, { "epoch": 0.964945652173913, "grad_norm": 1.1987311499731796, "learning_rate": 1.140616534944272e-05, "loss": 0.5501708984375, "step": 3551 }, { "epoch": 0.9652173913043478, "grad_norm": 1.0632220529971248, "learning_rate": 1.140171672622981e-05, "loss": 0.449004590511322, "step": 3552 }, { "epoch": 0.9654891304347826, "grad_norm": 0.9963652374083722, "learning_rate": 1.1397267820035986e-05, "loss": 0.4444463849067688, "step": 3553 }, { "epoch": 0.9657608695652173, "grad_norm": 1.1697026536055077, "learning_rate": 1.139281863175939e-05, "loss": 0.4910612106323242, "step": 3554 }, { "epoch": 0.9660326086956522, "grad_norm": 0.9603990408838045, "learning_rate": 1.1388369162298236e-05, "loss": 0.4003276228904724, "step": 3555 }, { "epoch": 0.966304347826087, "grad_norm": 1.116423801339495, "learning_rate": 1.1383919412550792e-05, "loss": 0.5051262378692627, "step": 3556 }, { "epoch": 0.9665760869565218, "grad_norm": 1.1413746342052569, "learning_rate": 1.1379469383415378e-05, "loss": 0.5833501219749451, "step": 3557 }, { "epoch": 0.9668478260869565, "grad_norm": 1.0992398812350095, "learning_rate": 1.1375019075790376e-05, "loss": 0.5521178245544434, "step": 3558 }, { "epoch": 0.9671195652173913, "grad_norm": 1.0989902129037448, "learning_rate": 1.1370568490574219e-05, "loss": 0.4841589331626892, "step": 3559 }, { "epoch": 0.967391304347826, "grad_norm": 0.8662609082727319, "learning_rate": 1.1366117628665398e-05, "loss": 0.3505204916000366, "step": 3560 }, { "epoch": 0.9676630434782608, "grad_norm": 1.0715620294980228, "learning_rate": 1.1361666490962468e-05, "loss": 0.4512154161930084, "step": 3561 }, { "epoch": 0.9679347826086957, "grad_norm": 1.1841766429783993, "learning_rate": 1.1357215078364024e-05, "loss": 0.540393054485321, "step": 3562 }, { "epoch": 0.9682065217391305, "grad_norm": 0.9522941147081965, "learning_rate": 1.1352763391768725e-05, "loss": 0.4432871639728546, "step": 3563 }, { "epoch": 0.9684782608695652, "grad_norm": 1.241299290405308, "learning_rate": 1.1348311432075287e-05, "loss": 0.5701521039009094, "step": 3564 }, { "epoch": 0.96875, "grad_norm": 1.087734461328552, "learning_rate": 1.1343859200182479e-05, "loss": 0.4146401882171631, "step": 3565 }, { "epoch": 0.9690217391304348, "grad_norm": 1.0226714299315074, "learning_rate": 1.1339406696989128e-05, "loss": 0.44890713691711426, "step": 3566 }, { "epoch": 0.9692934782608695, "grad_norm": 1.0435622697541285, "learning_rate": 1.1334953923394105e-05, "loss": 0.4677031338214874, "step": 3567 }, { "epoch": 0.9695652173913043, "grad_norm": 1.0528774112845107, "learning_rate": 1.1330500880296354e-05, "loss": 0.5199775695800781, "step": 3568 }, { "epoch": 0.9698369565217392, "grad_norm": 1.0776377952108496, "learning_rate": 1.1326047568594852e-05, "loss": 0.4839057922363281, "step": 3569 }, { "epoch": 0.970108695652174, "grad_norm": 0.934046602127926, "learning_rate": 1.1321593989188648e-05, "loss": 0.4487752318382263, "step": 3570 }, { "epoch": 0.9703804347826087, "grad_norm": 0.9089370720936962, "learning_rate": 1.1317140142976839e-05, "loss": 0.39159852266311646, "step": 3571 }, { "epoch": 0.9706521739130435, "grad_norm": 1.0014511011876248, "learning_rate": 1.1312686030858568e-05, "loss": 0.43805238604545593, "step": 3572 }, { "epoch": 0.9709239130434782, "grad_norm": 1.0114914066862029, "learning_rate": 1.1308231653733043e-05, "loss": 0.49128180742263794, "step": 3573 }, { "epoch": 0.971195652173913, "grad_norm": 1.0364911320763068, "learning_rate": 1.1303777012499523e-05, "loss": 0.4717394709587097, "step": 3574 }, { "epoch": 0.9714673913043478, "grad_norm": 1.0145095627263605, "learning_rate": 1.1299322108057313e-05, "loss": 0.42131173610687256, "step": 3575 }, { "epoch": 0.9717391304347827, "grad_norm": 1.0679761781852155, "learning_rate": 1.1294866941305784e-05, "loss": 0.5315932035446167, "step": 3576 }, { "epoch": 0.9720108695652174, "grad_norm": 1.109834787043302, "learning_rate": 1.1290411513144343e-05, "loss": 0.575809895992279, "step": 3577 }, { "epoch": 0.9722826086956522, "grad_norm": 0.7446186126778501, "learning_rate": 1.1285955824472472e-05, "loss": 0.29521942138671875, "step": 3578 }, { "epoch": 0.972554347826087, "grad_norm": 1.0337562105175344, "learning_rate": 1.128149987618968e-05, "loss": 0.4390445351600647, "step": 3579 }, { "epoch": 0.9728260869565217, "grad_norm": 1.17829331922914, "learning_rate": 1.1277043669195549e-05, "loss": 0.4887995719909668, "step": 3580 }, { "epoch": 0.9730978260869565, "grad_norm": 1.0594603316129965, "learning_rate": 1.12725872043897e-05, "loss": 0.47840815782546997, "step": 3581 }, { "epoch": 0.9733695652173913, "grad_norm": 1.019610917593635, "learning_rate": 1.126813048267182e-05, "loss": 0.4851599335670471, "step": 3582 }, { "epoch": 0.9736413043478261, "grad_norm": 1.052310586112319, "learning_rate": 1.126367350494163e-05, "loss": 0.517501950263977, "step": 3583 }, { "epoch": 0.9739130434782609, "grad_norm": 1.0110387441140964, "learning_rate": 1.1259216272098918e-05, "loss": 0.46536412835121155, "step": 3584 }, { "epoch": 0.9741847826086957, "grad_norm": 1.1301353950498476, "learning_rate": 1.1254758785043516e-05, "loss": 0.47876739501953125, "step": 3585 }, { "epoch": 0.9744565217391304, "grad_norm": 1.0443113728769249, "learning_rate": 1.1250301044675312e-05, "loss": 0.44455260038375854, "step": 3586 }, { "epoch": 0.9747282608695652, "grad_norm": 1.214408461962545, "learning_rate": 1.1245843051894237e-05, "loss": 0.5196367502212524, "step": 3587 }, { "epoch": 0.975, "grad_norm": 1.1436329257802835, "learning_rate": 1.1241384807600279e-05, "loss": 0.4925938844680786, "step": 3588 }, { "epoch": 0.9752717391304347, "grad_norm": 1.1540497130394891, "learning_rate": 1.123692631269348e-05, "loss": 0.5233514308929443, "step": 3589 }, { "epoch": 0.9755434782608695, "grad_norm": 1.0191982773902397, "learning_rate": 1.1232467568073921e-05, "loss": 0.43061721324920654, "step": 3590 }, { "epoch": 0.9758152173913044, "grad_norm": 1.0654088038294538, "learning_rate": 1.1228008574641746e-05, "loss": 0.45566922426223755, "step": 3591 }, { "epoch": 0.9760869565217392, "grad_norm": 1.0846543573726468, "learning_rate": 1.1223549333297147e-05, "loss": 0.47138893604278564, "step": 3592 }, { "epoch": 0.9763586956521739, "grad_norm": 0.9859776949142032, "learning_rate": 1.1219089844940356e-05, "loss": 0.42592018842697144, "step": 3593 }, { "epoch": 0.9766304347826087, "grad_norm": 0.9976734497316978, "learning_rate": 1.121463011047167e-05, "loss": 0.5078129172325134, "step": 3594 }, { "epoch": 0.9769021739130435, "grad_norm": 0.9628385124752328, "learning_rate": 1.1210170130791417e-05, "loss": 0.4160568118095398, "step": 3595 }, { "epoch": 0.9771739130434782, "grad_norm": 1.0975212688004647, "learning_rate": 1.1205709906799995e-05, "loss": 0.39349082112312317, "step": 3596 }, { "epoch": 0.977445652173913, "grad_norm": 1.0864571805345637, "learning_rate": 1.120124943939784e-05, "loss": 0.5003697276115417, "step": 3597 }, { "epoch": 0.9777173913043479, "grad_norm": 1.1436680278860956, "learning_rate": 1.1196788729485433e-05, "loss": 0.5684577822685242, "step": 3598 }, { "epoch": 0.9779891304347826, "grad_norm": 0.9754684831349301, "learning_rate": 1.1192327777963313e-05, "loss": 0.45253944396972656, "step": 3599 }, { "epoch": 0.9782608695652174, "grad_norm": 1.0870024279804176, "learning_rate": 1.1187866585732064e-05, "loss": 0.5129872560501099, "step": 3600 }, { "epoch": 0.9785326086956522, "grad_norm": 0.9329855531422436, "learning_rate": 1.118340515369232e-05, "loss": 0.34899312257766724, "step": 3601 }, { "epoch": 0.9788043478260869, "grad_norm": 0.9868349689765473, "learning_rate": 1.1178943482744761e-05, "loss": 0.44456952810287476, "step": 3602 }, { "epoch": 0.9790760869565217, "grad_norm": 1.1757060781253983, "learning_rate": 1.117448157379012e-05, "loss": 0.5512656569480896, "step": 3603 }, { "epoch": 0.9793478260869565, "grad_norm": 0.9438244470540997, "learning_rate": 1.1170019427729168e-05, "loss": 0.3899575173854828, "step": 3604 }, { "epoch": 0.9796195652173914, "grad_norm": 1.0143465924846757, "learning_rate": 1.1165557045462738e-05, "loss": 0.40036988258361816, "step": 3605 }, { "epoch": 0.9798913043478261, "grad_norm": 1.114451181235651, "learning_rate": 1.11610944278917e-05, "loss": 0.38945913314819336, "step": 3606 }, { "epoch": 0.9801630434782609, "grad_norm": 1.0789971932856153, "learning_rate": 1.1156631575916972e-05, "loss": 0.4804612696170807, "step": 3607 }, { "epoch": 0.9804347826086957, "grad_norm": 1.0543022896699303, "learning_rate": 1.1152168490439524e-05, "loss": 0.5174779891967773, "step": 3608 }, { "epoch": 0.9807065217391304, "grad_norm": 1.03154907978893, "learning_rate": 1.114770517236037e-05, "loss": 0.42955443263053894, "step": 3609 }, { "epoch": 0.9809782608695652, "grad_norm": 1.1298001649142968, "learning_rate": 1.1143241622580579e-05, "loss": 0.5792672634124756, "step": 3610 }, { "epoch": 0.98125, "grad_norm": 1.1416219717084408, "learning_rate": 1.113877784200125e-05, "loss": 0.4849771559238434, "step": 3611 }, { "epoch": 0.9815217391304348, "grad_norm": 1.0627201288412538, "learning_rate": 1.1134313831523547e-05, "loss": 0.5088103413581848, "step": 3612 }, { "epoch": 0.9817934782608696, "grad_norm": 1.1129684653889655, "learning_rate": 1.1129849592048665e-05, "loss": 0.47201234102249146, "step": 3613 }, { "epoch": 0.9820652173913044, "grad_norm": 0.9569379545759137, "learning_rate": 1.1125385124477857e-05, "loss": 0.3881707787513733, "step": 3614 }, { "epoch": 0.9823369565217391, "grad_norm": 1.1542713713885133, "learning_rate": 1.1120920429712415e-05, "loss": 0.5252590179443359, "step": 3615 }, { "epoch": 0.9826086956521739, "grad_norm": 1.1001272209680169, "learning_rate": 1.1116455508653676e-05, "loss": 0.506388783454895, "step": 3616 }, { "epoch": 0.9828804347826087, "grad_norm": 1.2792380616707026, "learning_rate": 1.1111990362203034e-05, "loss": 0.5417640209197998, "step": 3617 }, { "epoch": 0.9831521739130434, "grad_norm": 1.0991104789240718, "learning_rate": 1.1107524991261913e-05, "loss": 0.5073069930076599, "step": 3618 }, { "epoch": 0.9834239130434783, "grad_norm": 1.0463607676392503, "learning_rate": 1.1103059396731791e-05, "loss": 0.507667601108551, "step": 3619 }, { "epoch": 0.9836956521739131, "grad_norm": 1.1101918327362577, "learning_rate": 1.1098593579514193e-05, "loss": 0.4945695698261261, "step": 3620 }, { "epoch": 0.9839673913043478, "grad_norm": 1.0556626348858202, "learning_rate": 1.1094127540510684e-05, "loss": 0.49385592341423035, "step": 3621 }, { "epoch": 0.9842391304347826, "grad_norm": 1.0333001462899492, "learning_rate": 1.1089661280622872e-05, "loss": 0.4303828477859497, "step": 3622 }, { "epoch": 0.9845108695652174, "grad_norm": 1.0966635247877328, "learning_rate": 1.1085194800752418e-05, "loss": 0.5466312766075134, "step": 3623 }, { "epoch": 0.9847826086956522, "grad_norm": 0.9868813856510378, "learning_rate": 1.1080728101801018e-05, "loss": 0.43097999691963196, "step": 3624 }, { "epoch": 0.9850543478260869, "grad_norm": 1.191828871081948, "learning_rate": 1.107626118467042e-05, "loss": 0.4968869388103485, "step": 3625 }, { "epoch": 0.9853260869565217, "grad_norm": 1.0693527949359407, "learning_rate": 1.107179405026241e-05, "loss": 0.5132988095283508, "step": 3626 }, { "epoch": 0.9855978260869566, "grad_norm": 1.242595385508601, "learning_rate": 1.1067326699478821e-05, "loss": 0.5091342926025391, "step": 3627 }, { "epoch": 0.9858695652173913, "grad_norm": 1.1449604016657748, "learning_rate": 1.1062859133221535e-05, "loss": 0.5355184674263, "step": 3628 }, { "epoch": 0.9861413043478261, "grad_norm": 0.9867015286258813, "learning_rate": 1.1058391352392465e-05, "loss": 0.40472936630249023, "step": 3629 }, { "epoch": 0.9864130434782609, "grad_norm": 1.1106282785330528, "learning_rate": 1.1053923357893577e-05, "loss": 0.5042904615402222, "step": 3630 }, { "epoch": 0.9866847826086956, "grad_norm": 0.9074768896371496, "learning_rate": 1.1049455150626878e-05, "loss": 0.3824397921562195, "step": 3631 }, { "epoch": 0.9869565217391304, "grad_norm": 1.0571030413766955, "learning_rate": 1.1044986731494411e-05, "loss": 0.4606174826622009, "step": 3632 }, { "epoch": 0.9872282608695652, "grad_norm": 1.0901784258915814, "learning_rate": 1.1040518101398277e-05, "loss": 0.4381544291973114, "step": 3633 }, { "epoch": 0.9875, "grad_norm": 1.1111530388106379, "learning_rate": 1.1036049261240602e-05, "loss": 0.5154281258583069, "step": 3634 }, { "epoch": 0.9877717391304348, "grad_norm": 1.1809100308710063, "learning_rate": 1.103158021192357e-05, "loss": 0.6228834390640259, "step": 3635 }, { "epoch": 0.9880434782608696, "grad_norm": 1.1832941411707383, "learning_rate": 1.1027110954349396e-05, "loss": 0.5096420049667358, "step": 3636 }, { "epoch": 0.9883152173913043, "grad_norm": 1.1658125270646758, "learning_rate": 1.1022641489420342e-05, "loss": 0.558996319770813, "step": 3637 }, { "epoch": 0.9885869565217391, "grad_norm": 1.1755272614956689, "learning_rate": 1.1018171818038716e-05, "loss": 0.5718487501144409, "step": 3638 }, { "epoch": 0.9888586956521739, "grad_norm": 1.0734543998975365, "learning_rate": 1.1013701941106854e-05, "loss": 0.484321653842926, "step": 3639 }, { "epoch": 0.9891304347826086, "grad_norm": 1.0187543965023556, "learning_rate": 1.1009231859527145e-05, "loss": 0.4635501205921173, "step": 3640 }, { "epoch": 0.9894021739130435, "grad_norm": 1.1534136547859173, "learning_rate": 1.100476157420202e-05, "loss": 0.5795324444770813, "step": 3641 }, { "epoch": 0.9896739130434783, "grad_norm": 1.0360005173997864, "learning_rate": 1.1000291086033945e-05, "loss": 0.42790520191192627, "step": 3642 }, { "epoch": 0.9899456521739131, "grad_norm": 1.0032442109463358, "learning_rate": 1.0995820395925431e-05, "loss": 0.4398868680000305, "step": 3643 }, { "epoch": 0.9902173913043478, "grad_norm": 1.197109967598882, "learning_rate": 1.0991349504779027e-05, "loss": 0.5010481476783752, "step": 3644 }, { "epoch": 0.9904891304347826, "grad_norm": 0.8128168321825934, "learning_rate": 1.0986878413497325e-05, "loss": 0.33449703454971313, "step": 3645 }, { "epoch": 0.9907608695652174, "grad_norm": 1.0137236583109523, "learning_rate": 1.098240712298296e-05, "loss": 0.49180087447166443, "step": 3646 }, { "epoch": 0.9910326086956521, "grad_norm": 1.1861735053173008, "learning_rate": 1.0977935634138595e-05, "loss": 0.4892917275428772, "step": 3647 }, { "epoch": 0.991304347826087, "grad_norm": 1.1276626517207953, "learning_rate": 1.0973463947866949e-05, "loss": 0.48042619228363037, "step": 3648 }, { "epoch": 0.9915760869565218, "grad_norm": 1.3058816136287381, "learning_rate": 1.096899206507077e-05, "loss": 0.3879097104072571, "step": 3649 }, { "epoch": 0.9918478260869565, "grad_norm": 1.0114766871683196, "learning_rate": 1.0964519986652846e-05, "loss": 0.4376164674758911, "step": 3650 }, { "epoch": 0.9921195652173913, "grad_norm": 0.9956604374348166, "learning_rate": 1.0960047713516018e-05, "loss": 0.423727810382843, "step": 3651 }, { "epoch": 0.9923913043478261, "grad_norm": 1.0099479438527377, "learning_rate": 1.0955575246563146e-05, "loss": 0.41531050205230713, "step": 3652 }, { "epoch": 0.9926630434782608, "grad_norm": 0.924386503303623, "learning_rate": 1.0951102586697144e-05, "loss": 0.4249230921268463, "step": 3653 }, { "epoch": 0.9929347826086956, "grad_norm": 0.8955543123190954, "learning_rate": 1.0946629734820959e-05, "loss": 0.3722456693649292, "step": 3654 }, { "epoch": 0.9932065217391305, "grad_norm": 0.9687398814560175, "learning_rate": 1.0942156691837575e-05, "loss": 0.34850913286209106, "step": 3655 }, { "epoch": 0.9934782608695653, "grad_norm": 1.1247621025703929, "learning_rate": 1.0937683458650029e-05, "loss": 0.47812843322753906, "step": 3656 }, { "epoch": 0.99375, "grad_norm": 1.1034808288911078, "learning_rate": 1.0933210036161368e-05, "loss": 0.4739164113998413, "step": 3657 }, { "epoch": 0.9940217391304348, "grad_norm": 1.3046280818451446, "learning_rate": 1.0928736425274702e-05, "loss": 0.4548605680465698, "step": 3658 }, { "epoch": 0.9942934782608696, "grad_norm": 1.0653482015193474, "learning_rate": 1.0924262626893175e-05, "loss": 0.4939829707145691, "step": 3659 }, { "epoch": 0.9945652173913043, "grad_norm": 1.1273413221468618, "learning_rate": 1.0919788641919957e-05, "loss": 0.4738856256008148, "step": 3660 }, { "epoch": 0.9948369565217391, "grad_norm": 1.0755877786363612, "learning_rate": 1.0915314471258271e-05, "loss": 0.505339503288269, "step": 3661 }, { "epoch": 0.9951086956521739, "grad_norm": 1.122111733519826, "learning_rate": 1.0910840115811366e-05, "loss": 0.46079200506210327, "step": 3662 }, { "epoch": 0.9953804347826087, "grad_norm": 0.9725821405706813, "learning_rate": 1.0906365576482532e-05, "loss": 0.42501312494277954, "step": 3663 }, { "epoch": 0.9956521739130435, "grad_norm": 1.055745634536401, "learning_rate": 1.09018908541751e-05, "loss": 0.4663723409175873, "step": 3664 }, { "epoch": 0.9959239130434783, "grad_norm": 1.2460433981668364, "learning_rate": 1.0897415949792427e-05, "loss": 0.6261590123176575, "step": 3665 }, { "epoch": 0.996195652173913, "grad_norm": 0.8524730641000104, "learning_rate": 1.0892940864237923e-05, "loss": 0.32729536294937134, "step": 3666 }, { "epoch": 0.9964673913043478, "grad_norm": 1.1007905419648094, "learning_rate": 1.0888465598415018e-05, "loss": 0.5172801613807678, "step": 3667 }, { "epoch": 0.9967391304347826, "grad_norm": 1.115020257796069, "learning_rate": 1.0883990153227193e-05, "loss": 0.5652531385421753, "step": 3668 }, { "epoch": 0.9970108695652173, "grad_norm": 6.028172446071747, "learning_rate": 1.0879514529577956e-05, "loss": 0.443850576877594, "step": 3669 }, { "epoch": 0.9972826086956522, "grad_norm": 1.0332112264255982, "learning_rate": 1.087503872837085e-05, "loss": 0.4388608932495117, "step": 3670 }, { "epoch": 0.997554347826087, "grad_norm": 1.03341005521297, "learning_rate": 1.0870562750509464e-05, "loss": 0.4206606149673462, "step": 3671 }, { "epoch": 0.9978260869565218, "grad_norm": 1.012654576932231, "learning_rate": 1.0866086596897409e-05, "loss": 0.45976194739341736, "step": 3672 }, { "epoch": 0.9980978260869565, "grad_norm": 0.9505900205530431, "learning_rate": 1.0861610268438343e-05, "loss": 0.36968719959259033, "step": 3673 }, { "epoch": 0.9983695652173913, "grad_norm": 1.1587746965371468, "learning_rate": 1.0857133766035954e-05, "loss": 0.5145360827445984, "step": 3674 }, { "epoch": 0.998641304347826, "grad_norm": 1.102980697453018, "learning_rate": 1.0852657090593961e-05, "loss": 0.4588046669960022, "step": 3675 }, { "epoch": 0.9989130434782608, "grad_norm": 1.1139421837201198, "learning_rate": 1.0848180243016129e-05, "loss": 0.5398901104927063, "step": 3676 }, { "epoch": 0.9991847826086957, "grad_norm": 0.9463768481988724, "learning_rate": 1.084370322420625e-05, "loss": 0.3962083160877228, "step": 3677 }, { "epoch": 0.9994565217391305, "grad_norm": 1.1005799999654031, "learning_rate": 1.0839226035068149e-05, "loss": 0.5288739204406738, "step": 3678 }, { "epoch": 0.9997282608695652, "grad_norm": 1.166908528635159, "learning_rate": 1.083474867650569e-05, "loss": 0.5483239889144897, "step": 3679 }, { "epoch": 1.0, "grad_norm": 1.0789571495629835, "learning_rate": 1.0830271149422773e-05, "loss": 0.44649162888526917, "step": 3680 }, { "epoch": 1.0002717391304348, "grad_norm": 1.0167498892191322, "learning_rate": 1.0825793454723325e-05, "loss": 0.34614187479019165, "step": 3681 }, { "epoch": 1.0005434782608695, "grad_norm": 0.835402963172966, "learning_rate": 1.0821315593311313e-05, "loss": 0.37337028980255127, "step": 3682 }, { "epoch": 1.0008152173913043, "grad_norm": 1.1258944221537401, "learning_rate": 1.081683756609073e-05, "loss": 0.4691426157951355, "step": 3683 }, { "epoch": 1.001086956521739, "grad_norm": 1.027863609142992, "learning_rate": 1.0812359373965613e-05, "loss": 0.4807808995246887, "step": 3684 }, { "epoch": 1.0013586956521738, "grad_norm": 1.1341607080109626, "learning_rate": 1.0807881017840023e-05, "loss": 0.45447245240211487, "step": 3685 }, { "epoch": 1.0016304347826086, "grad_norm": 1.1274535235870649, "learning_rate": 1.0803402498618061e-05, "loss": 0.46756982803344727, "step": 3686 }, { "epoch": 1.0019021739130434, "grad_norm": 0.9108261712355262, "learning_rate": 1.079892381720386e-05, "loss": 0.3004852831363678, "step": 3687 }, { "epoch": 1.0021739130434784, "grad_norm": 0.7842493455691973, "learning_rate": 1.0794444974501577e-05, "loss": 0.2978273034095764, "step": 3688 }, { "epoch": 1.0024456521739131, "grad_norm": 1.0019151304136866, "learning_rate": 1.0789965971415415e-05, "loss": 0.4043525457382202, "step": 3689 }, { "epoch": 1.002717391304348, "grad_norm": 1.1078554728828554, "learning_rate": 1.0785486808849599e-05, "loss": 0.4204344153404236, "step": 3690 }, { "epoch": 1.0029891304347827, "grad_norm": 1.158305794896737, "learning_rate": 1.0781007487708388e-05, "loss": 0.4949212372303009, "step": 3691 }, { "epoch": 1.0032608695652174, "grad_norm": 1.0122006859276276, "learning_rate": 1.0776528008896081e-05, "loss": 0.3703128695487976, "step": 3692 }, { "epoch": 1.0035326086956522, "grad_norm": 1.0884591594829651, "learning_rate": 1.0772048373316997e-05, "loss": 0.42171645164489746, "step": 3693 }, { "epoch": 1.003804347826087, "grad_norm": 1.0989753241937366, "learning_rate": 1.0767568581875494e-05, "loss": 0.3748127818107605, "step": 3694 }, { "epoch": 1.0040760869565217, "grad_norm": 1.0364454051955185, "learning_rate": 1.0763088635475963e-05, "loss": 0.37642040848731995, "step": 3695 }, { "epoch": 1.0043478260869565, "grad_norm": 1.0707399709068481, "learning_rate": 1.0758608535022816e-05, "loss": 0.39718180894851685, "step": 3696 }, { "epoch": 1.0046195652173913, "grad_norm": 0.9044727489180453, "learning_rate": 1.0754128281420511e-05, "loss": 0.3169100880622864, "step": 3697 }, { "epoch": 1.004891304347826, "grad_norm": 0.9235983833143966, "learning_rate": 1.0749647875573526e-05, "loss": 0.3783224821090698, "step": 3698 }, { "epoch": 1.0051630434782608, "grad_norm": 1.2117632887442638, "learning_rate": 1.074516731838637e-05, "loss": 0.49171173572540283, "step": 3699 }, { "epoch": 1.0054347826086956, "grad_norm": 1.091809499848687, "learning_rate": 1.0740686610763588e-05, "loss": 0.4124959707260132, "step": 3700 }, { "epoch": 1.0057065217391303, "grad_norm": 1.3075810107852035, "learning_rate": 1.0736205753609754e-05, "loss": 0.5103284120559692, "step": 3701 }, { "epoch": 1.0059782608695653, "grad_norm": 1.1664359933258157, "learning_rate": 1.073172474782947e-05, "loss": 0.44590455293655396, "step": 3702 }, { "epoch": 1.00625, "grad_norm": 1.1715075882592845, "learning_rate": 1.0727243594327366e-05, "loss": 0.5028125047683716, "step": 3703 }, { "epoch": 1.0065217391304349, "grad_norm": 1.1170621295021568, "learning_rate": 1.0722762294008107e-05, "loss": 0.4787008762359619, "step": 3704 }, { "epoch": 1.0067934782608696, "grad_norm": 1.059345946046826, "learning_rate": 1.0718280847776387e-05, "loss": 0.4487937390804291, "step": 3705 }, { "epoch": 1.0070652173913044, "grad_norm": 0.9937212859894896, "learning_rate": 1.0713799256536928e-05, "loss": 0.39565420150756836, "step": 3706 }, { "epoch": 1.0073369565217392, "grad_norm": 1.0570669975973406, "learning_rate": 1.0709317521194478e-05, "loss": 0.43873581290245056, "step": 3707 }, { "epoch": 1.007608695652174, "grad_norm": 1.3625219357776646, "learning_rate": 1.0704835642653821e-05, "loss": 0.4781535863876343, "step": 3708 }, { "epoch": 1.0078804347826087, "grad_norm": 1.3598449362514404, "learning_rate": 1.0700353621819761e-05, "loss": 0.460995614528656, "step": 3709 }, { "epoch": 1.0081521739130435, "grad_norm": 1.0896205922102151, "learning_rate": 1.069587145959714e-05, "loss": 0.4546208381652832, "step": 3710 }, { "epoch": 1.0084239130434782, "grad_norm": 1.068909003299142, "learning_rate": 1.0691389156890823e-05, "loss": 0.41356998682022095, "step": 3711 }, { "epoch": 1.008695652173913, "grad_norm": 0.9473395382738712, "learning_rate": 1.0686906714605708e-05, "loss": 0.3405885100364685, "step": 3712 }, { "epoch": 1.0089673913043478, "grad_norm": 1.1353391570712914, "learning_rate": 1.0682424133646712e-05, "loss": 0.38609811663627625, "step": 3713 }, { "epoch": 1.0092391304347825, "grad_norm": 1.0170926553320165, "learning_rate": 1.0677941414918786e-05, "loss": 0.3636488914489746, "step": 3714 }, { "epoch": 1.0095108695652173, "grad_norm": 1.1740656630507889, "learning_rate": 1.067345855932692e-05, "loss": 0.4983910918235779, "step": 3715 }, { "epoch": 1.0097826086956523, "grad_norm": 1.0108207649331926, "learning_rate": 1.0668975567776107e-05, "loss": 0.3608897030353546, "step": 3716 }, { "epoch": 1.010054347826087, "grad_norm": 1.1462769383451532, "learning_rate": 1.0664492441171385e-05, "loss": 0.4760481119155884, "step": 3717 }, { "epoch": 1.0103260869565218, "grad_norm": 1.1267045212390185, "learning_rate": 1.0660009180417819e-05, "loss": 0.4686831831932068, "step": 3718 }, { "epoch": 1.0105978260869566, "grad_norm": 1.1095499823320696, "learning_rate": 1.0655525786420493e-05, "loss": 0.40773260593414307, "step": 3719 }, { "epoch": 1.0108695652173914, "grad_norm": 1.078937022518289, "learning_rate": 1.0651042260084523e-05, "loss": 0.40853655338287354, "step": 3720 }, { "epoch": 1.0111413043478261, "grad_norm": 1.1639977956672565, "learning_rate": 1.0646558602315052e-05, "loss": 0.4870893359184265, "step": 3721 }, { "epoch": 1.0114130434782609, "grad_norm": 1.0770690208987028, "learning_rate": 1.0642074814017244e-05, "loss": 0.4252176284790039, "step": 3722 }, { "epoch": 1.0116847826086957, "grad_norm": 0.9542699341409511, "learning_rate": 1.0637590896096303e-05, "loss": 0.34337395429611206, "step": 3723 }, { "epoch": 1.0119565217391304, "grad_norm": 1.3526304493402423, "learning_rate": 1.0633106849457445e-05, "loss": 0.564602255821228, "step": 3724 }, { "epoch": 1.0122282608695652, "grad_norm": 0.9416103388912169, "learning_rate": 1.062862267500591e-05, "loss": 0.3670089840888977, "step": 3725 }, { "epoch": 1.0125, "grad_norm": 1.0305322743203569, "learning_rate": 1.062413837364698e-05, "loss": 0.3674558401107788, "step": 3726 }, { "epoch": 1.0127717391304347, "grad_norm": 1.1295828560150056, "learning_rate": 1.0619653946285948e-05, "loss": 0.4486205577850342, "step": 3727 }, { "epoch": 1.0130434782608695, "grad_norm": 1.0305589029698892, "learning_rate": 1.0615169393828141e-05, "loss": 0.3962913751602173, "step": 3728 }, { "epoch": 1.0133152173913043, "grad_norm": 1.030115986155499, "learning_rate": 1.0610684717178905e-05, "loss": 0.43516913056373596, "step": 3729 }, { "epoch": 1.013586956521739, "grad_norm": 1.047883056648677, "learning_rate": 1.0606199917243617e-05, "loss": 0.37071937322616577, "step": 3730 }, { "epoch": 1.013858695652174, "grad_norm": 1.2276677183136646, "learning_rate": 1.0601714994927673e-05, "loss": 0.4228387773036957, "step": 3731 }, { "epoch": 1.0141304347826088, "grad_norm": 1.0974722748700076, "learning_rate": 1.0597229951136498e-05, "loss": 0.4236997365951538, "step": 3732 }, { "epoch": 1.0144021739130435, "grad_norm": 1.0413551628928266, "learning_rate": 1.0592744786775547e-05, "loss": 0.3946869671344757, "step": 3733 }, { "epoch": 1.0146739130434783, "grad_norm": 1.1417989776092847, "learning_rate": 1.0588259502750278e-05, "loss": 0.4412795305252075, "step": 3734 }, { "epoch": 1.014945652173913, "grad_norm": 1.0585954558437256, "learning_rate": 1.0583774099966197e-05, "loss": 0.37912410497665405, "step": 3735 }, { "epoch": 1.0152173913043478, "grad_norm": 1.0649604673412483, "learning_rate": 1.0579288579328824e-05, "loss": 0.46907368302345276, "step": 3736 }, { "epoch": 1.0154891304347826, "grad_norm": 2.202580744497976, "learning_rate": 1.0574802941743702e-05, "loss": 0.4983287751674652, "step": 3737 }, { "epoch": 1.0157608695652174, "grad_norm": 1.225480412316099, "learning_rate": 1.0570317188116401e-05, "loss": 0.5479917526245117, "step": 3738 }, { "epoch": 1.0160326086956522, "grad_norm": 1.0433698540664194, "learning_rate": 1.0565831319352508e-05, "loss": 0.4331567883491516, "step": 3739 }, { "epoch": 1.016304347826087, "grad_norm": 1.0702864972403177, "learning_rate": 1.056134533635764e-05, "loss": 0.45950961112976074, "step": 3740 }, { "epoch": 1.0165760869565217, "grad_norm": 1.1328234640601962, "learning_rate": 1.0556859240037443e-05, "loss": 0.3984362483024597, "step": 3741 }, { "epoch": 1.0168478260869565, "grad_norm": 1.0507695326167419, "learning_rate": 1.0552373031297563e-05, "loss": 0.4089139401912689, "step": 3742 }, { "epoch": 1.0171195652173912, "grad_norm": 1.2687786728173114, "learning_rate": 1.0547886711043689e-05, "loss": 0.40355151891708374, "step": 3743 }, { "epoch": 1.017391304347826, "grad_norm": 1.0945312007247692, "learning_rate": 1.0543400280181533e-05, "loss": 0.4207441210746765, "step": 3744 }, { "epoch": 1.017663043478261, "grad_norm": 1.1511337730429283, "learning_rate": 1.0538913739616817e-05, "loss": 0.4531497359275818, "step": 3745 }, { "epoch": 1.0179347826086957, "grad_norm": 1.293347603440212, "learning_rate": 1.0534427090255293e-05, "loss": 0.5039825439453125, "step": 3746 }, { "epoch": 1.0182065217391305, "grad_norm": 1.148825563639006, "learning_rate": 1.0529940333002731e-05, "loss": 0.4365791380405426, "step": 3747 }, { "epoch": 1.0184782608695653, "grad_norm": 1.067876617879987, "learning_rate": 1.0525453468764927e-05, "loss": 0.384782075881958, "step": 3748 }, { "epoch": 1.01875, "grad_norm": 1.0641114307247055, "learning_rate": 1.0520966498447698e-05, "loss": 0.42522132396698, "step": 3749 }, { "epoch": 1.0190217391304348, "grad_norm": 1.1495184513538501, "learning_rate": 1.0516479422956882e-05, "loss": 0.45783987641334534, "step": 3750 }, { "epoch": 1.0192934782608696, "grad_norm": 1.1935782243343147, "learning_rate": 1.0511992243198335e-05, "loss": 0.43808412551879883, "step": 3751 }, { "epoch": 1.0195652173913043, "grad_norm": 1.0524263606610815, "learning_rate": 1.0507504960077932e-05, "loss": 0.40101897716522217, "step": 3752 }, { "epoch": 1.0198369565217391, "grad_norm": 1.13530177805578, "learning_rate": 1.050301757450158e-05, "loss": 0.34589532017707825, "step": 3753 }, { "epoch": 1.0201086956521739, "grad_norm": 1.095147498723779, "learning_rate": 1.0498530087375199e-05, "loss": 0.4409634470939636, "step": 3754 }, { "epoch": 1.0203804347826086, "grad_norm": 1.1211024895426782, "learning_rate": 1.0494042499604726e-05, "loss": 0.3865218758583069, "step": 3755 }, { "epoch": 1.0206521739130434, "grad_norm": 1.253147182598253, "learning_rate": 1.0489554812096128e-05, "loss": 0.4504491686820984, "step": 3756 }, { "epoch": 1.0209239130434782, "grad_norm": 0.9633591840386838, "learning_rate": 1.0485067025755384e-05, "loss": 0.39404571056365967, "step": 3757 }, { "epoch": 1.021195652173913, "grad_norm": 1.1119045807824228, "learning_rate": 1.0480579141488499e-05, "loss": 0.4665203094482422, "step": 3758 }, { "epoch": 1.021467391304348, "grad_norm": 1.1342257633814057, "learning_rate": 1.047609116020149e-05, "loss": 0.4557115435600281, "step": 3759 }, { "epoch": 1.0217391304347827, "grad_norm": 1.0068514062401483, "learning_rate": 1.04716030828004e-05, "loss": 0.39124855399131775, "step": 3760 }, { "epoch": 1.0220108695652175, "grad_norm": 1.2238882451512652, "learning_rate": 1.046711491019129e-05, "loss": 0.4821348190307617, "step": 3761 }, { "epoch": 1.0222826086956522, "grad_norm": 1.0576986395710701, "learning_rate": 1.0462626643280239e-05, "loss": 0.40901368856430054, "step": 3762 }, { "epoch": 1.022554347826087, "grad_norm": 2.296630499089273, "learning_rate": 1.0458138282973345e-05, "loss": 0.29731592535972595, "step": 3763 }, { "epoch": 1.0228260869565218, "grad_norm": 1.100083276495441, "learning_rate": 1.045364983017673e-05, "loss": 0.443886935710907, "step": 3764 }, { "epoch": 1.0230978260869565, "grad_norm": 1.218970766011567, "learning_rate": 1.0449161285796526e-05, "loss": 0.5059449672698975, "step": 3765 }, { "epoch": 1.0233695652173913, "grad_norm": 1.1886235571067512, "learning_rate": 1.0444672650738891e-05, "loss": 0.4262940287590027, "step": 3766 }, { "epoch": 1.023641304347826, "grad_norm": 1.0582577256611485, "learning_rate": 1.0440183925909998e-05, "loss": 0.4667292833328247, "step": 3767 }, { "epoch": 1.0239130434782608, "grad_norm": 1.0267726840824558, "learning_rate": 1.0435695112216033e-05, "loss": 0.4120858311653137, "step": 3768 }, { "epoch": 1.0241847826086956, "grad_norm": 0.9245481577786158, "learning_rate": 1.0431206210563211e-05, "loss": 0.32149139046669006, "step": 3769 }, { "epoch": 1.0244565217391304, "grad_norm": 1.2113259706927129, "learning_rate": 1.0426717221857756e-05, "loss": 0.44666481018066406, "step": 3770 }, { "epoch": 1.0247282608695651, "grad_norm": 1.10249197668079, "learning_rate": 1.0422228147005914e-05, "loss": 0.3822609782218933, "step": 3771 }, { "epoch": 1.025, "grad_norm": 1.0761109028059916, "learning_rate": 1.0417738986913948e-05, "loss": 0.39130422472953796, "step": 3772 }, { "epoch": 1.0252717391304347, "grad_norm": 1.1883261262910922, "learning_rate": 1.0413249742488132e-05, "loss": 0.4847264289855957, "step": 3773 }, { "epoch": 1.0255434782608697, "grad_norm": 1.1327247297157577, "learning_rate": 1.040876041463477e-05, "loss": 0.40768563747406006, "step": 3774 }, { "epoch": 1.0258152173913044, "grad_norm": 0.8864851082499942, "learning_rate": 1.040427100426017e-05, "loss": 0.38274043798446655, "step": 3775 }, { "epoch": 1.0260869565217392, "grad_norm": 0.9927252414162543, "learning_rate": 1.039978151227066e-05, "loss": 0.3818967938423157, "step": 3776 }, { "epoch": 1.026358695652174, "grad_norm": 1.130204361046462, "learning_rate": 1.0395291939572593e-05, "loss": 0.4449688792228699, "step": 3777 }, { "epoch": 1.0266304347826087, "grad_norm": 1.28506337959621, "learning_rate": 1.0390802287072323e-05, "loss": 0.46940749883651733, "step": 3778 }, { "epoch": 1.0269021739130435, "grad_norm": 0.8979426699655916, "learning_rate": 1.0386312555676236e-05, "loss": 0.30867624282836914, "step": 3779 }, { "epoch": 1.0271739130434783, "grad_norm": 0.9858625539401253, "learning_rate": 1.0381822746290722e-05, "loss": 0.3729749321937561, "step": 3780 }, { "epoch": 1.027445652173913, "grad_norm": 0.9750646699514061, "learning_rate": 1.0377332859822192e-05, "loss": 0.3898220658302307, "step": 3781 }, { "epoch": 1.0277173913043478, "grad_norm": 1.0344790543469307, "learning_rate": 1.0372842897177074e-05, "loss": 0.4116407036781311, "step": 3782 }, { "epoch": 1.0279891304347826, "grad_norm": 1.0425695210329824, "learning_rate": 1.0368352859261805e-05, "loss": 0.413814514875412, "step": 3783 }, { "epoch": 1.0282608695652173, "grad_norm": 1.1325738747567982, "learning_rate": 1.0363862746982847e-05, "loss": 0.4375690817832947, "step": 3784 }, { "epoch": 1.028532608695652, "grad_norm": 0.9813657068118296, "learning_rate": 1.0359372561246668e-05, "loss": 0.33852940797805786, "step": 3785 }, { "epoch": 1.0288043478260869, "grad_norm": 0.9547482383834931, "learning_rate": 1.0354882302959752e-05, "loss": 0.3812822997570038, "step": 3786 }, { "epoch": 1.0290760869565216, "grad_norm": 1.095276687840936, "learning_rate": 1.0350391973028604e-05, "loss": 0.4256644546985626, "step": 3787 }, { "epoch": 1.0293478260869566, "grad_norm": 1.0039193122590995, "learning_rate": 1.0345901572359735e-05, "loss": 0.38441202044487, "step": 3788 }, { "epoch": 1.0296195652173914, "grad_norm": 0.9637282332371455, "learning_rate": 1.034141110185968e-05, "loss": 0.3710825741291046, "step": 3789 }, { "epoch": 1.0298913043478262, "grad_norm": 1.1072968750881715, "learning_rate": 1.033692056243498e-05, "loss": 0.48603498935699463, "step": 3790 }, { "epoch": 1.030163043478261, "grad_norm": 1.2294851406791476, "learning_rate": 1.0332429954992191e-05, "loss": 0.5240777134895325, "step": 3791 }, { "epoch": 1.0304347826086957, "grad_norm": 1.0152936780234292, "learning_rate": 1.0327939280437887e-05, "loss": 0.3887942135334015, "step": 3792 }, { "epoch": 1.0307065217391305, "grad_norm": 1.0111288710149475, "learning_rate": 1.0323448539678653e-05, "loss": 0.3821766972541809, "step": 3793 }, { "epoch": 1.0309782608695652, "grad_norm": 1.256841579023887, "learning_rate": 1.0318957733621085e-05, "loss": 0.48140349984169006, "step": 3794 }, { "epoch": 1.03125, "grad_norm": 1.163290935390121, "learning_rate": 1.0314466863171794e-05, "loss": 0.42684441804885864, "step": 3795 }, { "epoch": 1.0315217391304348, "grad_norm": 1.1571684039687853, "learning_rate": 1.0309975929237408e-05, "loss": 0.39459457993507385, "step": 3796 }, { "epoch": 1.0317934782608695, "grad_norm": 1.0769738393508097, "learning_rate": 1.0305484932724563e-05, "loss": 0.4205949902534485, "step": 3797 }, { "epoch": 1.0320652173913043, "grad_norm": 1.0337405463249814, "learning_rate": 1.0300993874539906e-05, "loss": 0.35355275869369507, "step": 3798 }, { "epoch": 1.032336956521739, "grad_norm": 1.0828366526057425, "learning_rate": 1.02965027555901e-05, "loss": 0.44290652871131897, "step": 3799 }, { "epoch": 1.0326086956521738, "grad_norm": 1.1438046703559173, "learning_rate": 1.0292011576781826e-05, "loss": 0.41923093795776367, "step": 3800 }, { "epoch": 1.0328804347826086, "grad_norm": 0.9992480770901173, "learning_rate": 1.0287520339021763e-05, "loss": 0.3489561676979065, "step": 3801 }, { "epoch": 1.0331521739130434, "grad_norm": 1.100910927281952, "learning_rate": 1.0283029043216613e-05, "loss": 0.4624503552913666, "step": 3802 }, { "epoch": 1.0334239130434784, "grad_norm": 1.4534889501793944, "learning_rate": 1.0278537690273088e-05, "loss": 0.5424986481666565, "step": 3803 }, { "epoch": 1.0336956521739131, "grad_norm": 1.0302224765866923, "learning_rate": 1.0274046281097902e-05, "loss": 0.4220386743545532, "step": 3804 }, { "epoch": 1.033967391304348, "grad_norm": 1.1789751760335623, "learning_rate": 1.02695548165978e-05, "loss": 0.44429197907447815, "step": 3805 }, { "epoch": 1.0342391304347827, "grad_norm": 1.1475036360529558, "learning_rate": 1.0265063297679514e-05, "loss": 0.4276505410671234, "step": 3806 }, { "epoch": 1.0345108695652174, "grad_norm": 1.095366666987261, "learning_rate": 1.0260571725249808e-05, "loss": 0.45050984621047974, "step": 3807 }, { "epoch": 1.0347826086956522, "grad_norm": 1.0209378976554846, "learning_rate": 1.0256080100215448e-05, "loss": 0.3955444097518921, "step": 3808 }, { "epoch": 1.035054347826087, "grad_norm": 1.0268256105691498, "learning_rate": 1.0251588423483205e-05, "loss": 0.35738590359687805, "step": 3809 }, { "epoch": 1.0353260869565217, "grad_norm": 1.163327109858212, "learning_rate": 1.0247096695959877e-05, "loss": 0.3867539167404175, "step": 3810 }, { "epoch": 1.0355978260869565, "grad_norm": 1.0569616467931202, "learning_rate": 1.0242604918552245e-05, "loss": 0.3965910077095032, "step": 3811 }, { "epoch": 1.0358695652173913, "grad_norm": 1.2104559549279341, "learning_rate": 1.023811309216713e-05, "loss": 0.4502255320549011, "step": 3812 }, { "epoch": 1.036141304347826, "grad_norm": 1.1466323138866603, "learning_rate": 1.0233621217711348e-05, "loss": 0.44729703664779663, "step": 3813 }, { "epoch": 1.0364130434782608, "grad_norm": 1.139837713837362, "learning_rate": 1.0229129296091718e-05, "loss": 0.43094950914382935, "step": 3814 }, { "epoch": 1.0366847826086956, "grad_norm": 1.034897663174964, "learning_rate": 1.0224637328215086e-05, "loss": 0.37843894958496094, "step": 3815 }, { "epoch": 1.0369565217391303, "grad_norm": 0.977941368060972, "learning_rate": 1.022014531498829e-05, "loss": 0.39298349618911743, "step": 3816 }, { "epoch": 1.0372282608695653, "grad_norm": 1.1803835659521993, "learning_rate": 1.021565325731819e-05, "loss": 0.48185235261917114, "step": 3817 }, { "epoch": 1.0375, "grad_norm": 1.051760490090944, "learning_rate": 1.0211161156111653e-05, "loss": 0.40317749977111816, "step": 3818 }, { "epoch": 1.0377717391304349, "grad_norm": 1.0002413026945924, "learning_rate": 1.0206669012275546e-05, "loss": 0.3911241888999939, "step": 3819 }, { "epoch": 1.0380434782608696, "grad_norm": 1.045969261943935, "learning_rate": 1.020217682671675e-05, "loss": 0.4393240213394165, "step": 3820 }, { "epoch": 1.0383152173913044, "grad_norm": 1.2214721007631741, "learning_rate": 1.0197684600342162e-05, "loss": 0.4851042628288269, "step": 3821 }, { "epoch": 1.0385869565217392, "grad_norm": 1.0886917805170713, "learning_rate": 1.0193192334058672e-05, "loss": 0.37406283617019653, "step": 3822 }, { "epoch": 1.038858695652174, "grad_norm": 0.9509723138974322, "learning_rate": 1.0188700028773194e-05, "loss": 0.34679603576660156, "step": 3823 }, { "epoch": 1.0391304347826087, "grad_norm": 1.366698559513171, "learning_rate": 1.0184207685392632e-05, "loss": 0.581727147102356, "step": 3824 }, { "epoch": 1.0394021739130435, "grad_norm": 1.1280349099193239, "learning_rate": 1.0179715304823921e-05, "loss": 0.462709903717041, "step": 3825 }, { "epoch": 1.0396739130434782, "grad_norm": 1.0700652403641489, "learning_rate": 1.0175222887973979e-05, "loss": 0.4794507622718811, "step": 3826 }, { "epoch": 1.039945652173913, "grad_norm": 0.8677020873231902, "learning_rate": 1.017073043574975e-05, "loss": 0.2888489365577698, "step": 3827 }, { "epoch": 1.0402173913043478, "grad_norm": 1.1953857839138293, "learning_rate": 1.0166237949058173e-05, "loss": 0.45335596799850464, "step": 3828 }, { "epoch": 1.0404891304347825, "grad_norm": 1.1262006998268737, "learning_rate": 1.01617454288062e-05, "loss": 0.4269137978553772, "step": 3829 }, { "epoch": 1.0407608695652173, "grad_norm": 1.1671242236942263, "learning_rate": 1.0157252875900788e-05, "loss": 0.4345662593841553, "step": 3830 }, { "epoch": 1.041032608695652, "grad_norm": 1.0892034047995003, "learning_rate": 1.0152760291248904e-05, "loss": 0.4316064119338989, "step": 3831 }, { "epoch": 1.041304347826087, "grad_norm": 0.9174794464518612, "learning_rate": 1.0148267675757516e-05, "loss": 0.34892410039901733, "step": 3832 }, { "epoch": 1.0415760869565218, "grad_norm": 1.061280287225869, "learning_rate": 1.0143775030333601e-05, "loss": 0.37738022208213806, "step": 3833 }, { "epoch": 1.0418478260869566, "grad_norm": 1.2387333371766067, "learning_rate": 1.013928235588414e-05, "loss": 0.42950505018234253, "step": 3834 }, { "epoch": 1.0421195652173914, "grad_norm": 1.135569783574096, "learning_rate": 1.0134789653316126e-05, "loss": 0.48557353019714355, "step": 3835 }, { "epoch": 1.0423913043478261, "grad_norm": 0.9944742174412206, "learning_rate": 1.0130296923536554e-05, "loss": 0.36808475852012634, "step": 3836 }, { "epoch": 1.0426630434782609, "grad_norm": 1.046756362817808, "learning_rate": 1.0125804167452418e-05, "loss": 0.3711630702018738, "step": 3837 }, { "epoch": 1.0429347826086957, "grad_norm": 1.1763720841575394, "learning_rate": 1.0121311385970724e-05, "loss": 0.44262486696243286, "step": 3838 }, { "epoch": 1.0432065217391304, "grad_norm": 1.1884394396121334, "learning_rate": 1.0116818579998486e-05, "loss": 0.3948103189468384, "step": 3839 }, { "epoch": 1.0434782608695652, "grad_norm": 1.2030642087873107, "learning_rate": 1.0112325750442717e-05, "loss": 0.4001016616821289, "step": 3840 }, { "epoch": 1.04375, "grad_norm": 1.1348584642013042, "learning_rate": 1.0107832898210438e-05, "loss": 0.38951632380485535, "step": 3841 }, { "epoch": 1.0440217391304347, "grad_norm": 1.2243480712878805, "learning_rate": 1.0103340024208674e-05, "loss": 0.5019509196281433, "step": 3842 }, { "epoch": 1.0442934782608695, "grad_norm": 0.9785060296414173, "learning_rate": 1.0098847129344451e-05, "loss": 0.3414373993873596, "step": 3843 }, { "epoch": 1.0445652173913043, "grad_norm": 1.2476843216386495, "learning_rate": 1.0094354214524806e-05, "loss": 0.5084272623062134, "step": 3844 }, { "epoch": 1.044836956521739, "grad_norm": 1.0327067511445642, "learning_rate": 1.0089861280656772e-05, "loss": 0.3781873881816864, "step": 3845 }, { "epoch": 1.045108695652174, "grad_norm": 1.1988744993877813, "learning_rate": 1.0085368328647395e-05, "loss": 0.42383846640586853, "step": 3846 }, { "epoch": 1.0453804347826088, "grad_norm": 1.1314195668489528, "learning_rate": 1.0080875359403715e-05, "loss": 0.4024072587490082, "step": 3847 }, { "epoch": 1.0456521739130435, "grad_norm": 0.980323736004549, "learning_rate": 1.007638237383278e-05, "loss": 0.3607500195503235, "step": 3848 }, { "epoch": 1.0459239130434783, "grad_norm": 0.9788621277017243, "learning_rate": 1.0071889372841646e-05, "loss": 0.3695625066757202, "step": 3849 }, { "epoch": 1.046195652173913, "grad_norm": 1.1080749316762402, "learning_rate": 1.0067396357337363e-05, "loss": 0.47546398639678955, "step": 3850 }, { "epoch": 1.0464673913043478, "grad_norm": 1.0438406295739355, "learning_rate": 1.0062903328226995e-05, "loss": 0.4163902699947357, "step": 3851 }, { "epoch": 1.0467391304347826, "grad_norm": 0.9930878130254092, "learning_rate": 1.0058410286417592e-05, "loss": 0.3663593828678131, "step": 3852 }, { "epoch": 1.0470108695652174, "grad_norm": 1.1014360748281924, "learning_rate": 1.0053917232816227e-05, "loss": 0.42455291748046875, "step": 3853 }, { "epoch": 1.0472826086956522, "grad_norm": 1.07920995521073, "learning_rate": 1.0049424168329958e-05, "loss": 0.39171504974365234, "step": 3854 }, { "epoch": 1.047554347826087, "grad_norm": 1.2292504394890227, "learning_rate": 1.0044931093865856e-05, "loss": 0.4832305312156677, "step": 3855 }, { "epoch": 1.0478260869565217, "grad_norm": 1.0529867742915813, "learning_rate": 1.0040438010330988e-05, "loss": 0.3775864839553833, "step": 3856 }, { "epoch": 1.0480978260869565, "grad_norm": 0.9976301005607607, "learning_rate": 1.0035944918632429e-05, "loss": 0.41400569677352905, "step": 3857 }, { "epoch": 1.0483695652173912, "grad_norm": 1.1118251757968105, "learning_rate": 1.0031451819677249e-05, "loss": 0.3928610682487488, "step": 3858 }, { "epoch": 1.048641304347826, "grad_norm": 1.262805030426082, "learning_rate": 1.0026958714372523e-05, "loss": 0.5149139165878296, "step": 3859 }, { "epoch": 1.048913043478261, "grad_norm": 1.3623991335826489, "learning_rate": 1.0022465603625326e-05, "loss": 0.49290257692337036, "step": 3860 }, { "epoch": 1.0491847826086957, "grad_norm": 1.2842960520754918, "learning_rate": 1.0017972488342736e-05, "loss": 0.4941542148590088, "step": 3861 }, { "epoch": 1.0494565217391305, "grad_norm": 1.1901537430116789, "learning_rate": 1.0013479369431832e-05, "loss": 0.4330548346042633, "step": 3862 }, { "epoch": 1.0497282608695653, "grad_norm": 1.121636535835043, "learning_rate": 1.0008986247799685e-05, "loss": 0.42549723386764526, "step": 3863 }, { "epoch": 1.05, "grad_norm": 1.2097958888204792, "learning_rate": 1.0004493124353384e-05, "loss": 0.49310997128486633, "step": 3864 }, { "epoch": 1.0502717391304348, "grad_norm": 1.1297261618291816, "learning_rate": 1e-05, "loss": 0.4212944805622101, "step": 3865 }, { "epoch": 1.0505434782608696, "grad_norm": 1.0397220458357868, "learning_rate": 9.99550687564662e-06, "loss": 0.4275500774383545, "step": 3866 }, { "epoch": 1.0508152173913043, "grad_norm": 1.1745234515350171, "learning_rate": 9.991013752200318e-06, "loss": 0.40179207921028137, "step": 3867 }, { "epoch": 1.0510869565217391, "grad_norm": 1.089611878850974, "learning_rate": 9.986520630568173e-06, "loss": 0.46013718843460083, "step": 3868 }, { "epoch": 1.0513586956521739, "grad_norm": 1.0913253431244627, "learning_rate": 9.982027511657266e-06, "loss": 0.37640634179115295, "step": 3869 }, { "epoch": 1.0516304347826086, "grad_norm": 1.1421500804957754, "learning_rate": 9.977534396374677e-06, "loss": 0.41547900438308716, "step": 3870 }, { "epoch": 1.0519021739130434, "grad_norm": 1.2263990624191923, "learning_rate": 9.973041285627477e-06, "loss": 0.48697149753570557, "step": 3871 }, { "epoch": 1.0521739130434782, "grad_norm": 1.0943365407897443, "learning_rate": 9.968548180322754e-06, "loss": 0.3878243565559387, "step": 3872 }, { "epoch": 1.052445652173913, "grad_norm": 1.1358735880479467, "learning_rate": 9.96405508136757e-06, "loss": 0.4145349860191345, "step": 3873 }, { "epoch": 1.052717391304348, "grad_norm": 1.0933999558499958, "learning_rate": 9.959561989669015e-06, "loss": 0.3914410471916199, "step": 3874 }, { "epoch": 1.0529891304347827, "grad_norm": 1.2727584952434454, "learning_rate": 9.955068906134149e-06, "loss": 0.476749986410141, "step": 3875 }, { "epoch": 1.0532608695652175, "grad_norm": 1.048433196696981, "learning_rate": 9.950575831670044e-06, "loss": 0.36747193336486816, "step": 3876 }, { "epoch": 1.0535326086956522, "grad_norm": 1.0949292041258827, "learning_rate": 9.946082767183778e-06, "loss": 0.39610084891319275, "step": 3877 }, { "epoch": 1.053804347826087, "grad_norm": 1.0291461209827808, "learning_rate": 9.94158971358241e-06, "loss": 0.34353315830230713, "step": 3878 }, { "epoch": 1.0540760869565218, "grad_norm": 1.2374889209009443, "learning_rate": 9.93709667177301e-06, "loss": 0.5469303727149963, "step": 3879 }, { "epoch": 1.0543478260869565, "grad_norm": 1.1735416853544574, "learning_rate": 9.932603642662637e-06, "loss": 0.46445947885513306, "step": 3880 }, { "epoch": 1.0546195652173913, "grad_norm": 0.9830369943140028, "learning_rate": 9.928110627158357e-06, "loss": 0.33562982082366943, "step": 3881 }, { "epoch": 1.054891304347826, "grad_norm": 1.0539162624568794, "learning_rate": 9.923617626167222e-06, "loss": 0.3490658402442932, "step": 3882 }, { "epoch": 1.0551630434782608, "grad_norm": 1.114157994928872, "learning_rate": 9.91912464059629e-06, "loss": 0.43838053941726685, "step": 3883 }, { "epoch": 1.0554347826086956, "grad_norm": 1.0586060144930667, "learning_rate": 9.91463167135261e-06, "loss": 0.3329299986362457, "step": 3884 }, { "epoch": 1.0557065217391304, "grad_norm": 1.1912779525778738, "learning_rate": 9.91013871934323e-06, "loss": 0.4216713607311249, "step": 3885 }, { "epoch": 1.0559782608695651, "grad_norm": 1.0161903632206608, "learning_rate": 9.905645785475197e-06, "loss": 0.3996860384941101, "step": 3886 }, { "epoch": 1.05625, "grad_norm": 1.0287196767033384, "learning_rate": 9.90115287065555e-06, "loss": 0.3870985507965088, "step": 3887 }, { "epoch": 1.0565217391304347, "grad_norm": 1.1975437072379889, "learning_rate": 9.89665997579133e-06, "loss": 0.48815295100212097, "step": 3888 }, { "epoch": 1.0567934782608697, "grad_norm": 1.19620909878616, "learning_rate": 9.892167101789563e-06, "loss": 0.5139625072479248, "step": 3889 }, { "epoch": 1.0570652173913044, "grad_norm": 1.3878027134765694, "learning_rate": 9.887674249557284e-06, "loss": 0.46136027574539185, "step": 3890 }, { "epoch": 1.0573369565217392, "grad_norm": 1.2099553954506002, "learning_rate": 9.883181420001517e-06, "loss": 0.4244346618652344, "step": 3891 }, { "epoch": 1.057608695652174, "grad_norm": 1.0721533703630108, "learning_rate": 9.878688614029279e-06, "loss": 0.36006027460098267, "step": 3892 }, { "epoch": 1.0578804347826087, "grad_norm": 1.0363150336768214, "learning_rate": 9.874195832547589e-06, "loss": 0.4168555438518524, "step": 3893 }, { "epoch": 1.0581521739130435, "grad_norm": 1.2356779475213089, "learning_rate": 9.869703076463448e-06, "loss": 0.44814443588256836, "step": 3894 }, { "epoch": 1.0584239130434783, "grad_norm": 1.2237602023533616, "learning_rate": 9.865210346683878e-06, "loss": 0.4635205566883087, "step": 3895 }, { "epoch": 1.058695652173913, "grad_norm": 1.1101902498762874, "learning_rate": 9.86071764411586e-06, "loss": 0.39481088519096375, "step": 3896 }, { "epoch": 1.0589673913043478, "grad_norm": 1.2376949788330958, "learning_rate": 9.856224969666402e-06, "loss": 0.4049378037452698, "step": 3897 }, { "epoch": 1.0592391304347826, "grad_norm": 1.1396382551084452, "learning_rate": 9.851732324242486e-06, "loss": 0.4794064164161682, "step": 3898 }, { "epoch": 1.0595108695652173, "grad_norm": 1.0684881777650914, "learning_rate": 9.8472397087511e-06, "loss": 0.3583727478981018, "step": 3899 }, { "epoch": 1.059782608695652, "grad_norm": 1.088785478341379, "learning_rate": 9.842747124099216e-06, "loss": 0.39723220467567444, "step": 3900 }, { "epoch": 1.0600543478260869, "grad_norm": 1.1808533484220973, "learning_rate": 9.838254571193804e-06, "loss": 0.43873825669288635, "step": 3901 }, { "epoch": 1.0603260869565216, "grad_norm": 1.0574797259673583, "learning_rate": 9.833762050941832e-06, "loss": 0.4507177472114563, "step": 3902 }, { "epoch": 1.0605978260869566, "grad_norm": 1.0229627538833068, "learning_rate": 9.829269564250254e-06, "loss": 0.33629488945007324, "step": 3903 }, { "epoch": 1.0608695652173914, "grad_norm": 0.9192183700918011, "learning_rate": 9.824777112026025e-06, "loss": 0.2934683561325073, "step": 3904 }, { "epoch": 1.0611413043478262, "grad_norm": 1.0091129320922412, "learning_rate": 9.820284695176082e-06, "loss": 0.37602293491363525, "step": 3905 }, { "epoch": 1.061413043478261, "grad_norm": 1.0239994669228496, "learning_rate": 9.81579231460737e-06, "loss": 0.3379532992839813, "step": 3906 }, { "epoch": 1.0616847826086957, "grad_norm": 1.1459239979261013, "learning_rate": 9.81129997122681e-06, "loss": 0.43461042642593384, "step": 3907 }, { "epoch": 1.0619565217391305, "grad_norm": 0.993638862119444, "learning_rate": 9.806807665941331e-06, "loss": 0.3285162150859833, "step": 3908 }, { "epoch": 1.0622282608695652, "grad_norm": 1.1536263036360053, "learning_rate": 9.802315399657843e-06, "loss": 0.45262452960014343, "step": 3909 }, { "epoch": 1.0625, "grad_norm": 1.0411105470325277, "learning_rate": 9.797823173283253e-06, "loss": 0.4430305063724518, "step": 3910 }, { "epoch": 1.0627717391304348, "grad_norm": 0.9941535940829498, "learning_rate": 9.79333098772446e-06, "loss": 0.3348478078842163, "step": 3911 }, { "epoch": 1.0630434782608695, "grad_norm": 1.1000877614063438, "learning_rate": 9.78883884388835e-06, "loss": 0.3743116855621338, "step": 3912 }, { "epoch": 1.0633152173913043, "grad_norm": 1.049419322342736, "learning_rate": 9.784346742681813e-06, "loss": 0.41879427433013916, "step": 3913 }, { "epoch": 1.063586956521739, "grad_norm": 1.1275884051657405, "learning_rate": 9.779854685011711e-06, "loss": 0.37918388843536377, "step": 3914 }, { "epoch": 1.0638586956521738, "grad_norm": 1.1762924161817407, "learning_rate": 9.775362671784918e-06, "loss": 0.45708325505256653, "step": 3915 }, { "epoch": 1.0641304347826086, "grad_norm": 1.1872528395466138, "learning_rate": 9.770870703908284e-06, "loss": 0.4103816747665405, "step": 3916 }, { "epoch": 1.0644021739130434, "grad_norm": 1.2465232465605094, "learning_rate": 9.766378782288658e-06, "loss": 0.41995853185653687, "step": 3917 }, { "epoch": 1.0646739130434784, "grad_norm": 1.3442359507400303, "learning_rate": 9.761886907832873e-06, "loss": 0.4532601833343506, "step": 3918 }, { "epoch": 1.0649456521739131, "grad_norm": 1.2068025656866506, "learning_rate": 9.757395081447756e-06, "loss": 0.4713829755783081, "step": 3919 }, { "epoch": 1.065217391304348, "grad_norm": 0.9731058943820625, "learning_rate": 9.75290330404013e-06, "loss": 0.2758164405822754, "step": 3920 }, { "epoch": 1.0654891304347827, "grad_norm": 1.1240736622138463, "learning_rate": 9.748411576516794e-06, "loss": 0.3871764540672302, "step": 3921 }, { "epoch": 1.0657608695652174, "grad_norm": 1.0523849534563623, "learning_rate": 9.743919899784555e-06, "loss": 0.4218860864639282, "step": 3922 }, { "epoch": 1.0660326086956522, "grad_norm": 1.2565267099492503, "learning_rate": 9.739428274750192e-06, "loss": 0.4404551386833191, "step": 3923 }, { "epoch": 1.066304347826087, "grad_norm": 1.2305329913492522, "learning_rate": 9.734936702320488e-06, "loss": 0.46002137660980225, "step": 3924 }, { "epoch": 1.0665760869565217, "grad_norm": 1.2251851920282302, "learning_rate": 9.730445183402207e-06, "loss": 0.49262237548828125, "step": 3925 }, { "epoch": 1.0668478260869565, "grad_norm": 1.2184247716189383, "learning_rate": 9.7259537189021e-06, "loss": 0.4377801716327667, "step": 3926 }, { "epoch": 1.0671195652173913, "grad_norm": 1.2371504918256817, "learning_rate": 9.721462309726919e-06, "loss": 0.41870760917663574, "step": 3927 }, { "epoch": 1.067391304347826, "grad_norm": 1.190415294354739, "learning_rate": 9.71697095678339e-06, "loss": 0.45479530096054077, "step": 3928 }, { "epoch": 1.0676630434782608, "grad_norm": 1.1091581998289668, "learning_rate": 9.712479660978242e-06, "loss": 0.47401684522628784, "step": 3929 }, { "epoch": 1.0679347826086956, "grad_norm": 1.076591793393692, "learning_rate": 9.707988423218175e-06, "loss": 0.45212459564208984, "step": 3930 }, { "epoch": 1.0682065217391303, "grad_norm": 1.2508317265273459, "learning_rate": 9.703497244409901e-06, "loss": 0.43528544902801514, "step": 3931 }, { "epoch": 1.0684782608695653, "grad_norm": 1.1894050563737641, "learning_rate": 9.699006125460095e-06, "loss": 0.3806459903717041, "step": 3932 }, { "epoch": 1.06875, "grad_norm": 0.9306155068401039, "learning_rate": 9.694515067275439e-06, "loss": 0.2979292571544647, "step": 3933 }, { "epoch": 1.0690217391304349, "grad_norm": 1.1946579079317965, "learning_rate": 9.690024070762597e-06, "loss": 0.4577926695346832, "step": 3934 }, { "epoch": 1.0692934782608696, "grad_norm": 1.1065926053361406, "learning_rate": 9.685533136828207e-06, "loss": 0.3821651041507721, "step": 3935 }, { "epoch": 1.0695652173913044, "grad_norm": 1.2002544613576578, "learning_rate": 9.681042266378918e-06, "loss": 0.474776029586792, "step": 3936 }, { "epoch": 1.0698369565217392, "grad_norm": 1.159073368760966, "learning_rate": 9.67655146032135e-06, "loss": 0.4534207582473755, "step": 3937 }, { "epoch": 1.070108695652174, "grad_norm": 1.0862042853711462, "learning_rate": 9.672060719562114e-06, "loss": 0.4390857219696045, "step": 3938 }, { "epoch": 1.0703804347826087, "grad_norm": 1.049675286219937, "learning_rate": 9.66757004500781e-06, "loss": 0.39696449041366577, "step": 3939 }, { "epoch": 1.0706521739130435, "grad_norm": 1.3609612377340226, "learning_rate": 9.663079437565023e-06, "loss": 0.45389324426651, "step": 3940 }, { "epoch": 1.0709239130434782, "grad_norm": 1.232653708755547, "learning_rate": 9.658588898140322e-06, "loss": 0.48753833770751953, "step": 3941 }, { "epoch": 1.071195652173913, "grad_norm": 1.0443840467549734, "learning_rate": 9.654098427640268e-06, "loss": 0.38835567235946655, "step": 3942 }, { "epoch": 1.0714673913043478, "grad_norm": 1.1356739288796514, "learning_rate": 9.649608026971401e-06, "loss": 0.44129401445388794, "step": 3943 }, { "epoch": 1.0717391304347825, "grad_norm": 1.2278704833781238, "learning_rate": 9.645117697040251e-06, "loss": 0.4902278184890747, "step": 3944 }, { "epoch": 1.0720108695652173, "grad_norm": 1.1678289373765163, "learning_rate": 9.640627438753337e-06, "loss": 0.4332742691040039, "step": 3945 }, { "epoch": 1.072282608695652, "grad_norm": 1.329196801339887, "learning_rate": 9.636137253017156e-06, "loss": 0.5535984635353088, "step": 3946 }, { "epoch": 1.072554347826087, "grad_norm": 1.0270086417475708, "learning_rate": 9.631647140738197e-06, "loss": 0.3921666145324707, "step": 3947 }, { "epoch": 1.0728260869565218, "grad_norm": 0.9692094262824484, "learning_rate": 9.627157102822928e-06, "loss": 0.3530333638191223, "step": 3948 }, { "epoch": 1.0730978260869566, "grad_norm": 1.1095949643269813, "learning_rate": 9.622667140177812e-06, "loss": 0.37748488783836365, "step": 3949 }, { "epoch": 1.0733695652173914, "grad_norm": 1.1709886374656002, "learning_rate": 9.61817725370928e-06, "loss": 0.3475607633590698, "step": 3950 }, { "epoch": 1.0736413043478261, "grad_norm": 1.1533050011514725, "learning_rate": 9.613687444323765e-06, "loss": 0.5419265031814575, "step": 3951 }, { "epoch": 1.0739130434782609, "grad_norm": 1.2489517879726237, "learning_rate": 9.60919771292768e-06, "loss": 0.5120820999145508, "step": 3952 }, { "epoch": 1.0741847826086957, "grad_norm": 1.0010901830144916, "learning_rate": 9.604708060427408e-06, "loss": 0.35541650652885437, "step": 3953 }, { "epoch": 1.0744565217391304, "grad_norm": 1.1940139500383213, "learning_rate": 9.600218487729341e-06, "loss": 0.4606730341911316, "step": 3954 }, { "epoch": 1.0747282608695652, "grad_norm": 1.0955755598410568, "learning_rate": 9.595728995739832e-06, "loss": 0.4380890727043152, "step": 3955 }, { "epoch": 1.075, "grad_norm": 1.091683812090575, "learning_rate": 9.591239585365233e-06, "loss": 0.42367634177207947, "step": 3956 }, { "epoch": 1.0752717391304347, "grad_norm": 1.1348371482568123, "learning_rate": 9.586750257511868e-06, "loss": 0.3954507112503052, "step": 3957 }, { "epoch": 1.0755434782608695, "grad_norm": 1.1902387425732461, "learning_rate": 9.582261013086056e-06, "loss": 0.39178043603897095, "step": 3958 }, { "epoch": 1.0758152173913043, "grad_norm": 1.2439111140039012, "learning_rate": 9.577771852994088e-06, "loss": 0.4192986488342285, "step": 3959 }, { "epoch": 1.0760869565217392, "grad_norm": 1.1451242661783487, "learning_rate": 9.573282778142246e-06, "loss": 0.48842883110046387, "step": 3960 }, { "epoch": 1.076358695652174, "grad_norm": 1.0387426235354096, "learning_rate": 9.568793789436794e-06, "loss": 0.427463173866272, "step": 3961 }, { "epoch": 1.0766304347826088, "grad_norm": 1.2442270762689802, "learning_rate": 9.56430488778397e-06, "loss": 0.4228247106075287, "step": 3962 }, { "epoch": 1.0769021739130435, "grad_norm": 0.752741464040826, "learning_rate": 9.559816074090007e-06, "loss": 0.28160184621810913, "step": 3963 }, { "epoch": 1.0771739130434783, "grad_norm": 1.2399977354017755, "learning_rate": 9.555327349261112e-06, "loss": 0.4750428795814514, "step": 3964 }, { "epoch": 1.077445652173913, "grad_norm": 1.1022118805519334, "learning_rate": 9.550838714203476e-06, "loss": 0.46224695444107056, "step": 3965 }, { "epoch": 1.0777173913043478, "grad_norm": 1.2391989293598065, "learning_rate": 9.546350169823271e-06, "loss": 0.44471126794815063, "step": 3966 }, { "epoch": 1.0779891304347826, "grad_norm": 1.2337577298231925, "learning_rate": 9.541861717026659e-06, "loss": 0.4973453879356384, "step": 3967 }, { "epoch": 1.0782608695652174, "grad_norm": 1.0609526847445625, "learning_rate": 9.537373356719766e-06, "loss": 0.37218761444091797, "step": 3968 }, { "epoch": 1.0785326086956522, "grad_norm": 1.2493802206756575, "learning_rate": 9.532885089808713e-06, "loss": 0.45776546001434326, "step": 3969 }, { "epoch": 1.078804347826087, "grad_norm": 0.8283553312776459, "learning_rate": 9.528396917199607e-06, "loss": 0.3068457841873169, "step": 3970 }, { "epoch": 1.0790760869565217, "grad_norm": 1.0146606328849361, "learning_rate": 9.523908839798514e-06, "loss": 0.3783637285232544, "step": 3971 }, { "epoch": 1.0793478260869565, "grad_norm": 1.0155811557698098, "learning_rate": 9.519420858511508e-06, "loss": 0.3531573414802551, "step": 3972 }, { "epoch": 1.0796195652173912, "grad_norm": 1.365657859487776, "learning_rate": 9.514932974244618e-06, "loss": 0.4557155966758728, "step": 3973 }, { "epoch": 1.079891304347826, "grad_norm": 1.0970558977493416, "learning_rate": 9.510445187903874e-06, "loss": 0.39757150411605835, "step": 3974 }, { "epoch": 1.0801630434782608, "grad_norm": 1.0377983757305016, "learning_rate": 9.505957500395275e-06, "loss": 0.36201047897338867, "step": 3975 }, { "epoch": 1.0804347826086957, "grad_norm": 1.1263796113132738, "learning_rate": 9.501469912624806e-06, "loss": 0.4629632532596588, "step": 3976 }, { "epoch": 1.0807065217391305, "grad_norm": 1.0615338359157622, "learning_rate": 9.496982425498424e-06, "loss": 0.3509795069694519, "step": 3977 }, { "epoch": 1.0809782608695653, "grad_norm": 1.180493035721689, "learning_rate": 9.492495039922071e-06, "loss": 0.40622270107269287, "step": 3978 }, { "epoch": 1.08125, "grad_norm": 0.958900216165248, "learning_rate": 9.488007756801672e-06, "loss": 0.329332172870636, "step": 3979 }, { "epoch": 1.0815217391304348, "grad_norm": 1.4297359927824287, "learning_rate": 9.483520577043121e-06, "loss": 0.5803382992744446, "step": 3980 }, { "epoch": 1.0817934782608696, "grad_norm": 1.0135031306663445, "learning_rate": 9.479033501552303e-06, "loss": 0.3572777509689331, "step": 3981 }, { "epoch": 1.0820652173913043, "grad_norm": 1.027351432980153, "learning_rate": 9.474546531235075e-06, "loss": 0.3548448085784912, "step": 3982 }, { "epoch": 1.0823369565217391, "grad_norm": 1.24595129879191, "learning_rate": 9.470059666997272e-06, "loss": 0.5134743452072144, "step": 3983 }, { "epoch": 1.0826086956521739, "grad_norm": 1.1866538896269165, "learning_rate": 9.465572909744708e-06, "loss": 0.4170117974281311, "step": 3984 }, { "epoch": 1.0828804347826086, "grad_norm": 1.1949402356526753, "learning_rate": 9.461086260383188e-06, "loss": 0.4214951694011688, "step": 3985 }, { "epoch": 1.0831521739130434, "grad_norm": 0.9673570902047961, "learning_rate": 9.456599719818472e-06, "loss": 0.3372223377227783, "step": 3986 }, { "epoch": 1.0834239130434782, "grad_norm": 1.1548828494885144, "learning_rate": 9.452113288956313e-06, "loss": 0.3604336082935333, "step": 3987 }, { "epoch": 1.083695652173913, "grad_norm": 1.1727131772111885, "learning_rate": 9.447626968702442e-06, "loss": 0.49906763434410095, "step": 3988 }, { "epoch": 1.083967391304348, "grad_norm": 1.1905882452203365, "learning_rate": 9.443140759962562e-06, "loss": 0.46468695998191833, "step": 3989 }, { "epoch": 1.0842391304347827, "grad_norm": 1.0738430393135625, "learning_rate": 9.438654663642363e-06, "loss": 0.4142725467681885, "step": 3990 }, { "epoch": 1.0845108695652175, "grad_norm": 1.130710658014415, "learning_rate": 9.434168680647493e-06, "loss": 0.49748504161834717, "step": 3991 }, { "epoch": 1.0847826086956522, "grad_norm": 1.4643060761276718, "learning_rate": 9.429682811883602e-06, "loss": 0.5391616821289062, "step": 3992 }, { "epoch": 1.085054347826087, "grad_norm": 1.0994185226124449, "learning_rate": 9.4251970582563e-06, "loss": 0.4702783226966858, "step": 3993 }, { "epoch": 1.0853260869565218, "grad_norm": 1.1983219745055316, "learning_rate": 9.420711420671178e-06, "loss": 0.4451489746570587, "step": 3994 }, { "epoch": 1.0855978260869565, "grad_norm": 1.0200463156729964, "learning_rate": 9.416225900033806e-06, "loss": 0.43618401885032654, "step": 3995 }, { "epoch": 1.0858695652173913, "grad_norm": 1.1019142763484369, "learning_rate": 9.411740497249726e-06, "loss": 0.4390469789505005, "step": 3996 }, { "epoch": 1.086141304347826, "grad_norm": 1.2783237319173237, "learning_rate": 9.407255213224458e-06, "loss": 0.4397720694541931, "step": 3997 }, { "epoch": 1.0864130434782608, "grad_norm": 1.1704816695548297, "learning_rate": 9.402770048863502e-06, "loss": 0.4636816680431366, "step": 3998 }, { "epoch": 1.0866847826086956, "grad_norm": 1.146605621244608, "learning_rate": 9.398285005072328e-06, "loss": 0.39921820163726807, "step": 3999 }, { "epoch": 1.0869565217391304, "grad_norm": 1.3715866666506156, "learning_rate": 9.393800082756384e-06, "loss": 0.5106947422027588, "step": 4000 }, { "epoch": 1.0872282608695651, "grad_norm": 1.181898395886928, "learning_rate": 9.389315282821097e-06, "loss": 0.4312618374824524, "step": 4001 }, { "epoch": 1.0875, "grad_norm": 1.1297405941053376, "learning_rate": 9.384830606171862e-06, "loss": 0.45898693799972534, "step": 4002 }, { "epoch": 1.0877717391304347, "grad_norm": 1.2296560393211866, "learning_rate": 9.380346053714055e-06, "loss": 0.41575121879577637, "step": 4003 }, { "epoch": 1.0880434782608697, "grad_norm": 0.9800041863710226, "learning_rate": 9.375861626353025e-06, "loss": 0.3500171899795532, "step": 4004 }, { "epoch": 1.0883152173913044, "grad_norm": 1.1877734535611766, "learning_rate": 9.371377324994092e-06, "loss": 0.44396907091140747, "step": 4005 }, { "epoch": 1.0885869565217392, "grad_norm": 0.9227152090143601, "learning_rate": 9.366893150542562e-06, "loss": 0.258401095867157, "step": 4006 }, { "epoch": 1.088858695652174, "grad_norm": 0.8638113670895161, "learning_rate": 9.362409103903697e-06, "loss": 0.28478318452835083, "step": 4007 }, { "epoch": 1.0891304347826087, "grad_norm": 1.1306468165506505, "learning_rate": 9.357925185982757e-06, "loss": 0.4232722520828247, "step": 4008 }, { "epoch": 1.0894021739130435, "grad_norm": 1.0440023389095852, "learning_rate": 9.353441397684948e-06, "loss": 0.347847580909729, "step": 4009 }, { "epoch": 1.0896739130434783, "grad_norm": 1.0419935388661208, "learning_rate": 9.348957739915479e-06, "loss": 0.3111765384674072, "step": 4010 }, { "epoch": 1.089945652173913, "grad_norm": 1.2576022094982768, "learning_rate": 9.344474213579512e-06, "loss": 0.46942204236984253, "step": 4011 }, { "epoch": 1.0902173913043478, "grad_norm": 1.1222875672203594, "learning_rate": 9.339990819582183e-06, "loss": 0.3886765241622925, "step": 4012 }, { "epoch": 1.0904891304347826, "grad_norm": 1.226836668288873, "learning_rate": 9.335507558828616e-06, "loss": 0.4899876117706299, "step": 4013 }, { "epoch": 1.0907608695652173, "grad_norm": 1.3551396742842257, "learning_rate": 9.331024432223896e-06, "loss": 0.4815727174282074, "step": 4014 }, { "epoch": 1.091032608695652, "grad_norm": 1.2366628380007836, "learning_rate": 9.326541440673085e-06, "loss": 0.4047169089317322, "step": 4015 }, { "epoch": 1.0913043478260869, "grad_norm": 1.0834934355724402, "learning_rate": 9.322058585081214e-06, "loss": 0.354007363319397, "step": 4016 }, { "epoch": 1.0915760869565216, "grad_norm": 1.4560769672398266, "learning_rate": 9.317575866353293e-06, "loss": 0.5744339227676392, "step": 4017 }, { "epoch": 1.0918478260869566, "grad_norm": 1.180043508805332, "learning_rate": 9.313093285394295e-06, "loss": 0.42721259593963623, "step": 4018 }, { "epoch": 1.0921195652173914, "grad_norm": 1.0932596570693665, "learning_rate": 9.308610843109179e-06, "loss": 0.4330865144729614, "step": 4019 }, { "epoch": 1.0923913043478262, "grad_norm": 1.025487824929633, "learning_rate": 9.304128540402864e-06, "loss": 0.4168506860733032, "step": 4020 }, { "epoch": 1.092663043478261, "grad_norm": 1.3432240236645605, "learning_rate": 9.299646378180242e-06, "loss": 0.49786561727523804, "step": 4021 }, { "epoch": 1.0929347826086957, "grad_norm": 1.1798050959140334, "learning_rate": 9.295164357346185e-06, "loss": 0.45071330666542053, "step": 4022 }, { "epoch": 1.0932065217391305, "grad_norm": 1.129710627936903, "learning_rate": 9.290682478805524e-06, "loss": 0.40289029479026794, "step": 4023 }, { "epoch": 1.0934782608695652, "grad_norm": 1.2410747107883788, "learning_rate": 9.286200743463077e-06, "loss": 0.4635312855243683, "step": 4024 }, { "epoch": 1.09375, "grad_norm": 1.1280282841176696, "learning_rate": 9.281719152223611e-06, "loss": 0.37947675585746765, "step": 4025 }, { "epoch": 1.0940217391304348, "grad_norm": 1.1012258989207973, "learning_rate": 9.277237705991895e-06, "loss": 0.4135998785495758, "step": 4026 }, { "epoch": 1.0942934782608695, "grad_norm": 1.1574851785197326, "learning_rate": 9.272756405672635e-06, "loss": 0.425392746925354, "step": 4027 }, { "epoch": 1.0945652173913043, "grad_norm": 1.2594225089760072, "learning_rate": 9.268275252170533e-06, "loss": 0.462586373090744, "step": 4028 }, { "epoch": 1.094836956521739, "grad_norm": 1.160244842700014, "learning_rate": 9.26379424639025e-06, "loss": 0.38713520765304565, "step": 4029 }, { "epoch": 1.0951086956521738, "grad_norm": 1.0929014167563702, "learning_rate": 9.259313389236413e-06, "loss": 0.4020523130893707, "step": 4030 }, { "epoch": 1.0953804347826086, "grad_norm": 1.1027344765249747, "learning_rate": 9.254832681613632e-06, "loss": 0.361203134059906, "step": 4031 }, { "epoch": 1.0956521739130434, "grad_norm": 1.1242389060746192, "learning_rate": 9.250352124426478e-06, "loss": 0.4503781199455261, "step": 4032 }, { "epoch": 1.0959239130434784, "grad_norm": 1.2105973438256552, "learning_rate": 9.24587171857949e-06, "loss": 0.5294656157493591, "step": 4033 }, { "epoch": 1.0961956521739131, "grad_norm": 1.220269502619672, "learning_rate": 9.241391464977184e-06, "loss": 0.4321269392967224, "step": 4034 }, { "epoch": 1.096467391304348, "grad_norm": 1.0811599341817097, "learning_rate": 9.23691136452404e-06, "loss": 0.38826191425323486, "step": 4035 }, { "epoch": 1.0967391304347827, "grad_norm": 1.0255755572250655, "learning_rate": 9.232431418124507e-06, "loss": 0.38244926929473877, "step": 4036 }, { "epoch": 1.0970108695652174, "grad_norm": 0.9844831998547066, "learning_rate": 9.227951626683006e-06, "loss": 0.3730134963989258, "step": 4037 }, { "epoch": 1.0972826086956522, "grad_norm": 2.0091059311143513, "learning_rate": 9.223471991103922e-06, "loss": 0.39903467893600464, "step": 4038 }, { "epoch": 1.097554347826087, "grad_norm": 1.1127940559400875, "learning_rate": 9.218992512291614e-06, "loss": 0.3673540949821472, "step": 4039 }, { "epoch": 1.0978260869565217, "grad_norm": 1.0131799263761934, "learning_rate": 9.214513191150407e-06, "loss": 0.32397401332855225, "step": 4040 }, { "epoch": 1.0980978260869565, "grad_norm": 1.0730739381746286, "learning_rate": 9.210034028584588e-06, "loss": 0.3263363242149353, "step": 4041 }, { "epoch": 1.0983695652173913, "grad_norm": 1.1788183308842455, "learning_rate": 9.205555025498427e-06, "loss": 0.3712044954299927, "step": 4042 }, { "epoch": 1.098641304347826, "grad_norm": 1.0166823854794909, "learning_rate": 9.20107618279614e-06, "loss": 0.38265714049339294, "step": 4043 }, { "epoch": 1.0989130434782608, "grad_norm": 1.1904764038109026, "learning_rate": 9.196597501381942e-06, "loss": 0.499298095703125, "step": 4044 }, { "epoch": 1.0991847826086956, "grad_norm": 1.0043866990669776, "learning_rate": 9.192118982159982e-06, "loss": 0.4022058844566345, "step": 4045 }, { "epoch": 1.0994565217391303, "grad_norm": 1.1524683671825473, "learning_rate": 9.18764062603439e-06, "loss": 0.4624575972557068, "step": 4046 }, { "epoch": 1.0997282608695653, "grad_norm": 1.1758616002336022, "learning_rate": 9.183162433909276e-06, "loss": 0.4438740015029907, "step": 4047 }, { "epoch": 1.1, "grad_norm": 1.0563907613674988, "learning_rate": 9.178684406688692e-06, "loss": 0.4091085195541382, "step": 4048 }, { "epoch": 1.1002717391304349, "grad_norm": 1.3509256196737462, "learning_rate": 9.174206545276678e-06, "loss": 0.4696037173271179, "step": 4049 }, { "epoch": 1.1005434782608696, "grad_norm": 1.027994006682766, "learning_rate": 9.169728850577229e-06, "loss": 0.36895138025283813, "step": 4050 }, { "epoch": 1.1008152173913044, "grad_norm": 1.1677444618613262, "learning_rate": 9.165251323494311e-06, "loss": 0.38323265314102173, "step": 4051 }, { "epoch": 1.1010869565217392, "grad_norm": 1.02504178083184, "learning_rate": 9.160773964931853e-06, "loss": 0.3406081795692444, "step": 4052 }, { "epoch": 1.101358695652174, "grad_norm": 1.1811449810046486, "learning_rate": 9.156296775793754e-06, "loss": 0.49598199129104614, "step": 4053 }, { "epoch": 1.1016304347826087, "grad_norm": 1.0502066493547333, "learning_rate": 9.151819756983875e-06, "loss": 0.3725753724575043, "step": 4054 }, { "epoch": 1.1019021739130435, "grad_norm": 1.2980835910746809, "learning_rate": 9.14734290940604e-06, "loss": 0.48683813214302063, "step": 4055 }, { "epoch": 1.1021739130434782, "grad_norm": 1.029385819669837, "learning_rate": 9.142866233964051e-06, "loss": 0.3822407126426697, "step": 4056 }, { "epoch": 1.102445652173913, "grad_norm": 0.9340371558689945, "learning_rate": 9.13838973156166e-06, "loss": 0.4033043682575226, "step": 4057 }, { "epoch": 1.1027173913043478, "grad_norm": 1.1152117087437192, "learning_rate": 9.133913403102595e-06, "loss": 0.4704977869987488, "step": 4058 }, { "epoch": 1.1029891304347825, "grad_norm": 1.2051305035534725, "learning_rate": 9.12943724949054e-06, "loss": 0.5043717622756958, "step": 4059 }, { "epoch": 1.1032608695652173, "grad_norm": 1.192398798750705, "learning_rate": 9.124961271629151e-06, "loss": 0.4445078670978546, "step": 4060 }, { "epoch": 1.103532608695652, "grad_norm": 1.1758796390669848, "learning_rate": 9.120485470422045e-06, "loss": 0.4766416549682617, "step": 4061 }, { "epoch": 1.103804347826087, "grad_norm": 1.195274831460082, "learning_rate": 9.11600984677281e-06, "loss": 0.43713313341140747, "step": 4062 }, { "epoch": 1.1040760869565218, "grad_norm": 0.9878710601877366, "learning_rate": 9.111534401584985e-06, "loss": 0.33097392320632935, "step": 4063 }, { "epoch": 1.1043478260869566, "grad_norm": 1.315985707209097, "learning_rate": 9.107059135762079e-06, "loss": 0.4976711869239807, "step": 4064 }, { "epoch": 1.1046195652173914, "grad_norm": 1.1818538825026623, "learning_rate": 9.102584050207578e-06, "loss": 0.3924162983894348, "step": 4065 }, { "epoch": 1.1048913043478261, "grad_norm": 1.1653261557432086, "learning_rate": 9.098109145824906e-06, "loss": 0.39973974227905273, "step": 4066 }, { "epoch": 1.1051630434782609, "grad_norm": 1.0867517891805711, "learning_rate": 9.093634423517472e-06, "loss": 0.44098904728889465, "step": 4067 }, { "epoch": 1.1054347826086957, "grad_norm": 1.0158797609862653, "learning_rate": 9.089159884188636e-06, "loss": 0.39448294043540955, "step": 4068 }, { "epoch": 1.1057065217391304, "grad_norm": 1.2794305951722915, "learning_rate": 9.084685528741732e-06, "loss": 0.46968966722488403, "step": 4069 }, { "epoch": 1.1059782608695652, "grad_norm": 1.2404221902021195, "learning_rate": 9.080211358080043e-06, "loss": 0.47507378458976746, "step": 4070 }, { "epoch": 1.10625, "grad_norm": 1.1557542972588246, "learning_rate": 9.075737373106829e-06, "loss": 0.4584633409976959, "step": 4071 }, { "epoch": 1.1065217391304347, "grad_norm": 1.0083341159574335, "learning_rate": 9.0712635747253e-06, "loss": 0.3467537462711334, "step": 4072 }, { "epoch": 1.1067934782608695, "grad_norm": 1.3082294994209662, "learning_rate": 9.066789963838636e-06, "loss": 0.43523430824279785, "step": 4073 }, { "epoch": 1.1070652173913043, "grad_norm": 1.2995253986929902, "learning_rate": 9.062316541349978e-06, "loss": 0.48738133907318115, "step": 4074 }, { "epoch": 1.1073369565217392, "grad_norm": 1.1783892413047918, "learning_rate": 9.057843308162424e-06, "loss": 0.44838371872901917, "step": 4075 }, { "epoch": 1.107608695652174, "grad_norm": 1.1959072858226594, "learning_rate": 9.053370265179045e-06, "loss": 0.40688377618789673, "step": 4076 }, { "epoch": 1.1078804347826088, "grad_norm": 1.0688458644328669, "learning_rate": 9.048897413302858e-06, "loss": 0.35703521966934204, "step": 4077 }, { "epoch": 1.1081521739130435, "grad_norm": 1.1352475190525464, "learning_rate": 9.044424753436859e-06, "loss": 0.3951318860054016, "step": 4078 }, { "epoch": 1.1084239130434783, "grad_norm": 1.19707808902933, "learning_rate": 9.039952286483983e-06, "loss": 0.37665730714797974, "step": 4079 }, { "epoch": 1.108695652173913, "grad_norm": 0.9959003901345296, "learning_rate": 9.035480013347156e-06, "loss": 0.4146953225135803, "step": 4080 }, { "epoch": 1.1089673913043478, "grad_norm": 1.2937099719949905, "learning_rate": 9.031007934929237e-06, "loss": 0.5022380352020264, "step": 4081 }, { "epoch": 1.1092391304347826, "grad_norm": 1.0867868867780788, "learning_rate": 9.026536052133054e-06, "loss": 0.37450987100601196, "step": 4082 }, { "epoch": 1.1095108695652174, "grad_norm": 1.0914437275939857, "learning_rate": 9.022064365861409e-06, "loss": 0.38569125533103943, "step": 4083 }, { "epoch": 1.1097826086956522, "grad_norm": 1.0887536559040678, "learning_rate": 9.017592877017042e-06, "loss": 0.39267539978027344, "step": 4084 }, { "epoch": 1.110054347826087, "grad_norm": 1.1796091740958932, "learning_rate": 9.013121586502678e-06, "loss": 0.415070503950119, "step": 4085 }, { "epoch": 1.1103260869565217, "grad_norm": 1.090139656380647, "learning_rate": 9.008650495220973e-06, "loss": 0.3869902789592743, "step": 4086 }, { "epoch": 1.1105978260869565, "grad_norm": 1.0930114548723986, "learning_rate": 9.00417960407457e-06, "loss": 0.41596585512161255, "step": 4087 }, { "epoch": 1.1108695652173912, "grad_norm": 0.9687066928651188, "learning_rate": 8.99970891396606e-06, "loss": 0.30704063177108765, "step": 4088 }, { "epoch": 1.111141304347826, "grad_norm": 1.1392888371616356, "learning_rate": 8.995238425797981e-06, "loss": 0.39995279908180237, "step": 4089 }, { "epoch": 1.1114130434782608, "grad_norm": 1.070237875822079, "learning_rate": 8.990768140472858e-06, "loss": 0.4378843307495117, "step": 4090 }, { "epoch": 1.1116847826086957, "grad_norm": 1.2444484186753018, "learning_rate": 8.986298058893149e-06, "loss": 0.4613218307495117, "step": 4091 }, { "epoch": 1.1119565217391305, "grad_norm": 1.1849413163318578, "learning_rate": 8.981828181961289e-06, "loss": 0.354544997215271, "step": 4092 }, { "epoch": 1.1122282608695653, "grad_norm": 1.0147229295451086, "learning_rate": 8.977358510579658e-06, "loss": 0.38903558254241943, "step": 4093 }, { "epoch": 1.1125, "grad_norm": 0.9937360456765578, "learning_rate": 8.972889045650605e-06, "loss": 0.3219801187515259, "step": 4094 }, { "epoch": 1.1127717391304348, "grad_norm": 1.1809726027653642, "learning_rate": 8.968419788076431e-06, "loss": 0.4713515341281891, "step": 4095 }, { "epoch": 1.1130434782608696, "grad_norm": 1.3175830717641623, "learning_rate": 8.9639507387594e-06, "loss": 0.4837373197078705, "step": 4096 }, { "epoch": 1.1133152173913043, "grad_norm": 1.099048133677459, "learning_rate": 8.959481898601729e-06, "loss": 0.39743122458457947, "step": 4097 }, { "epoch": 1.1135869565217391, "grad_norm": 1.051774823454213, "learning_rate": 8.95501326850559e-06, "loss": 0.36766940355300903, "step": 4098 }, { "epoch": 1.1138586956521739, "grad_norm": 1.116508987038445, "learning_rate": 8.950544849373129e-06, "loss": 0.4079602360725403, "step": 4099 }, { "epoch": 1.1141304347826086, "grad_norm": 0.9987390898612507, "learning_rate": 8.946076642106426e-06, "loss": 0.3350597620010376, "step": 4100 }, { "epoch": 1.1144021739130434, "grad_norm": 1.0746775207091919, "learning_rate": 8.941608647607538e-06, "loss": 0.37961673736572266, "step": 4101 }, { "epoch": 1.1146739130434782, "grad_norm": 1.279008043751712, "learning_rate": 8.937140866778465e-06, "loss": 0.5041247606277466, "step": 4102 }, { "epoch": 1.114945652173913, "grad_norm": 1.2300651800240145, "learning_rate": 8.93267330052118e-06, "loss": 0.4341399073600769, "step": 4103 }, { "epoch": 1.115217391304348, "grad_norm": 1.2647731849912807, "learning_rate": 8.92820594973759e-06, "loss": 0.500937283039093, "step": 4104 }, { "epoch": 1.1154891304347827, "grad_norm": 1.1250193821719874, "learning_rate": 8.923738815329583e-06, "loss": 0.4318220019340515, "step": 4105 }, { "epoch": 1.1157608695652175, "grad_norm": 0.9772290738792102, "learning_rate": 8.919271898198987e-06, "loss": 0.3164936900138855, "step": 4106 }, { "epoch": 1.1160326086956522, "grad_norm": 1.0582351888686168, "learning_rate": 8.914805199247586e-06, "loss": 0.40024471282958984, "step": 4107 }, { "epoch": 1.116304347826087, "grad_norm": 1.047825975001191, "learning_rate": 8.910338719377133e-06, "loss": 0.3658064007759094, "step": 4108 }, { "epoch": 1.1165760869565218, "grad_norm": 1.0227952215645544, "learning_rate": 8.90587245948932e-06, "loss": 0.3999607563018799, "step": 4109 }, { "epoch": 1.1168478260869565, "grad_norm": 1.1184072019059161, "learning_rate": 8.901406420485809e-06, "loss": 0.3660242259502411, "step": 4110 }, { "epoch": 1.1171195652173913, "grad_norm": 1.1090519057797459, "learning_rate": 8.89694060326821e-06, "loss": 0.49006739258766174, "step": 4111 }, { "epoch": 1.117391304347826, "grad_norm": 1.073450405863513, "learning_rate": 8.89247500873809e-06, "loss": 0.34548282623291016, "step": 4112 }, { "epoch": 1.1176630434782608, "grad_norm": 1.2837849648072979, "learning_rate": 8.888009637796968e-06, "loss": 0.45632684230804443, "step": 4113 }, { "epoch": 1.1179347826086956, "grad_norm": 0.998840543083889, "learning_rate": 8.883544491346326e-06, "loss": 0.3470548689365387, "step": 4114 }, { "epoch": 1.1182065217391304, "grad_norm": 1.0876727505728783, "learning_rate": 8.87907957028759e-06, "loss": 0.36607974767684937, "step": 4115 }, { "epoch": 1.1184782608695651, "grad_norm": 1.1521538157393034, "learning_rate": 8.874614875522148e-06, "loss": 0.4095398187637329, "step": 4116 }, { "epoch": 1.11875, "grad_norm": 1.1600081351850127, "learning_rate": 8.870150407951339e-06, "loss": 0.4380892515182495, "step": 4117 }, { "epoch": 1.1190217391304347, "grad_norm": 1.1445529224093192, "learning_rate": 8.865686168476458e-06, "loss": 0.4868503212928772, "step": 4118 }, { "epoch": 1.1192934782608697, "grad_norm": 1.0869603514893407, "learning_rate": 8.861222157998754e-06, "loss": 0.40437328815460205, "step": 4119 }, { "epoch": 1.1195652173913044, "grad_norm": 1.1699500642799352, "learning_rate": 8.856758377419422e-06, "loss": 0.43790265917778015, "step": 4120 }, { "epoch": 1.1198369565217392, "grad_norm": 0.9621034643837362, "learning_rate": 8.852294827639632e-06, "loss": 0.3149869441986084, "step": 4121 }, { "epoch": 1.120108695652174, "grad_norm": 1.2734329854668938, "learning_rate": 8.847831509560478e-06, "loss": 0.4308457374572754, "step": 4122 }, { "epoch": 1.1203804347826087, "grad_norm": 1.042355367580741, "learning_rate": 8.84336842408303e-06, "loss": 0.4058544337749481, "step": 4123 }, { "epoch": 1.1206521739130435, "grad_norm": 1.0173154064864829, "learning_rate": 8.838905572108307e-06, "loss": 0.3707617521286011, "step": 4124 }, { "epoch": 1.1209239130434783, "grad_norm": 0.9825525651325347, "learning_rate": 8.834442954537264e-06, "loss": 0.3577786684036255, "step": 4125 }, { "epoch": 1.121195652173913, "grad_norm": 0.9485596007652901, "learning_rate": 8.829980572270833e-06, "loss": 0.32569363713264465, "step": 4126 }, { "epoch": 1.1214673913043478, "grad_norm": 1.0468416839904469, "learning_rate": 8.825518426209882e-06, "loss": 0.42284977436065674, "step": 4127 }, { "epoch": 1.1217391304347826, "grad_norm": 1.35990468507808, "learning_rate": 8.82105651725524e-06, "loss": 0.4949805736541748, "step": 4128 }, { "epoch": 1.1220108695652173, "grad_norm": 1.1349813110583344, "learning_rate": 8.816594846307682e-06, "loss": 0.4605359435081482, "step": 4129 }, { "epoch": 1.122282608695652, "grad_norm": 1.2016227080167035, "learning_rate": 8.812133414267938e-06, "loss": 0.5037881135940552, "step": 4130 }, { "epoch": 1.1225543478260869, "grad_norm": 1.157326312448871, "learning_rate": 8.807672222036692e-06, "loss": 0.4492673873901367, "step": 4131 }, { "epoch": 1.1228260869565216, "grad_norm": 1.252011062017339, "learning_rate": 8.803211270514572e-06, "loss": 0.4091106057167053, "step": 4132 }, { "epoch": 1.1230978260869566, "grad_norm": 1.2628173551851607, "learning_rate": 8.798750560602166e-06, "loss": 0.5332562923431396, "step": 4133 }, { "epoch": 1.1233695652173914, "grad_norm": 1.336861097830171, "learning_rate": 8.794290093200007e-06, "loss": 0.48271116614341736, "step": 4134 }, { "epoch": 1.1236413043478262, "grad_norm": 1.1035685781875832, "learning_rate": 8.789829869208586e-06, "loss": 0.42766571044921875, "step": 4135 }, { "epoch": 1.123913043478261, "grad_norm": 1.0718945510143876, "learning_rate": 8.785369889528335e-06, "loss": 0.38577741384506226, "step": 4136 }, { "epoch": 1.1241847826086957, "grad_norm": 1.0454955532931032, "learning_rate": 8.780910155059647e-06, "loss": 0.39681676030158997, "step": 4137 }, { "epoch": 1.1244565217391305, "grad_norm": 1.2883003510057176, "learning_rate": 8.776450666702855e-06, "loss": 0.4910582900047302, "step": 4138 }, { "epoch": 1.1247282608695652, "grad_norm": 1.1517436092519553, "learning_rate": 8.771991425358256e-06, "loss": 0.4615691900253296, "step": 4139 }, { "epoch": 1.125, "grad_norm": 1.156750507470713, "learning_rate": 8.767532431926084e-06, "loss": 0.40245920419692993, "step": 4140 }, { "epoch": 1.1252717391304348, "grad_norm": 1.0513257063497288, "learning_rate": 8.763073687306523e-06, "loss": 0.4148689806461334, "step": 4141 }, { "epoch": 1.1255434782608695, "grad_norm": 1.1280309547099183, "learning_rate": 8.758615192399726e-06, "loss": 0.46309328079223633, "step": 4142 }, { "epoch": 1.1258152173913043, "grad_norm": 1.1824200737494543, "learning_rate": 8.754156948105765e-06, "loss": 0.4077717065811157, "step": 4143 }, { "epoch": 1.126086956521739, "grad_norm": 1.2463816868997388, "learning_rate": 8.749698955324691e-06, "loss": 0.4656456708908081, "step": 4144 }, { "epoch": 1.1263586956521738, "grad_norm": 1.1732519734581548, "learning_rate": 8.745241214956484e-06, "loss": 0.5446220636367798, "step": 4145 }, { "epoch": 1.1266304347826086, "grad_norm": 1.3712434677435057, "learning_rate": 8.740783727901083e-06, "loss": 0.4673038125038147, "step": 4146 }, { "epoch": 1.1269021739130434, "grad_norm": 1.278325741224715, "learning_rate": 8.73632649505837e-06, "loss": 0.4858759641647339, "step": 4147 }, { "epoch": 1.1271739130434781, "grad_norm": 1.2843519965970087, "learning_rate": 8.731869517328183e-06, "loss": 0.47185295820236206, "step": 4148 }, { "epoch": 1.1274456521739131, "grad_norm": 1.2677829961999159, "learning_rate": 8.727412795610302e-06, "loss": 0.4439970850944519, "step": 4149 }, { "epoch": 1.127717391304348, "grad_norm": 1.1748705527255001, "learning_rate": 8.722956330804456e-06, "loss": 0.44307130575180054, "step": 4150 }, { "epoch": 1.1279891304347827, "grad_norm": 1.0698191386036593, "learning_rate": 8.718500123810324e-06, "loss": 0.38694262504577637, "step": 4151 }, { "epoch": 1.1282608695652174, "grad_norm": 1.3211765858832092, "learning_rate": 8.714044175527531e-06, "loss": 0.4217965602874756, "step": 4152 }, { "epoch": 1.1285326086956522, "grad_norm": 1.2461501287618444, "learning_rate": 8.709588486855659e-06, "loss": 0.4397798180580139, "step": 4153 }, { "epoch": 1.128804347826087, "grad_norm": 1.181319005907706, "learning_rate": 8.705133058694219e-06, "loss": 0.45881277322769165, "step": 4154 }, { "epoch": 1.1290760869565217, "grad_norm": 1.4878686396933691, "learning_rate": 8.70067789194269e-06, "loss": 0.4761415719985962, "step": 4155 }, { "epoch": 1.1293478260869565, "grad_norm": 0.9884586841167585, "learning_rate": 8.696222987500478e-06, "loss": 0.33501923084259033, "step": 4156 }, { "epoch": 1.1296195652173913, "grad_norm": 1.1900173212805838, "learning_rate": 8.69176834626696e-06, "loss": 0.4269821047782898, "step": 4157 }, { "epoch": 1.129891304347826, "grad_norm": 1.0794112335611765, "learning_rate": 8.687313969141438e-06, "loss": 0.35827672481536865, "step": 4158 }, { "epoch": 1.1301630434782608, "grad_norm": 1.2634634633212136, "learning_rate": 8.682859857023165e-06, "loss": 0.390281081199646, "step": 4159 }, { "epoch": 1.1304347826086956, "grad_norm": 1.1306756124224062, "learning_rate": 8.678406010811355e-06, "loss": 0.37230920791625977, "step": 4160 }, { "epoch": 1.1307065217391306, "grad_norm": 1.1572117406977878, "learning_rate": 8.673952431405148e-06, "loss": 0.3824584484100342, "step": 4161 }, { "epoch": 1.1309782608695653, "grad_norm": 1.2291659396419057, "learning_rate": 8.66949911970365e-06, "loss": 0.4120701849460602, "step": 4162 }, { "epoch": 1.13125, "grad_norm": 1.0476363261948674, "learning_rate": 8.665046076605894e-06, "loss": 0.3245706856250763, "step": 4163 }, { "epoch": 1.1315217391304349, "grad_norm": 1.1545430503403287, "learning_rate": 8.660593303010876e-06, "loss": 0.4717852473258972, "step": 4164 }, { "epoch": 1.1317934782608696, "grad_norm": 1.1416433406266238, "learning_rate": 8.656140799817523e-06, "loss": 0.39948570728302, "step": 4165 }, { "epoch": 1.1320652173913044, "grad_norm": 1.1980051201254718, "learning_rate": 8.651688567924715e-06, "loss": 0.4091218113899231, "step": 4166 }, { "epoch": 1.1323369565217392, "grad_norm": 1.224604926525221, "learning_rate": 8.64723660823128e-06, "loss": 0.4181942939758301, "step": 4167 }, { "epoch": 1.132608695652174, "grad_norm": 1.100332335662391, "learning_rate": 8.64278492163598e-06, "loss": 0.3495118319988251, "step": 4168 }, { "epoch": 1.1328804347826087, "grad_norm": 1.062305328349021, "learning_rate": 8.638333509037537e-06, "loss": 0.31559163331985474, "step": 4169 }, { "epoch": 1.1331521739130435, "grad_norm": 1.058987219299754, "learning_rate": 8.6338823713346e-06, "loss": 0.37647905945777893, "step": 4170 }, { "epoch": 1.1334239130434782, "grad_norm": 1.1998363102944336, "learning_rate": 8.629431509425785e-06, "loss": 0.3966484069824219, "step": 4171 }, { "epoch": 1.133695652173913, "grad_norm": 1.0737712961308357, "learning_rate": 8.624980924209626e-06, "loss": 0.38667500019073486, "step": 4172 }, { "epoch": 1.1339673913043478, "grad_norm": 1.2547200091367865, "learning_rate": 8.620530616584626e-06, "loss": 0.5058387517929077, "step": 4173 }, { "epoch": 1.1342391304347825, "grad_norm": 1.128033179732969, "learning_rate": 8.616080587449213e-06, "loss": 0.43410831689834595, "step": 4174 }, { "epoch": 1.1345108695652173, "grad_norm": 1.0821683477633823, "learning_rate": 8.611630837701766e-06, "loss": 0.36026379466056824, "step": 4175 }, { "epoch": 1.134782608695652, "grad_norm": 1.143590141835069, "learning_rate": 8.607181368240615e-06, "loss": 0.418107807636261, "step": 4176 }, { "epoch": 1.135054347826087, "grad_norm": 0.9383534770412238, "learning_rate": 8.602732179964017e-06, "loss": 0.2825886011123657, "step": 4177 }, { "epoch": 1.1353260869565218, "grad_norm": 1.0658840585553269, "learning_rate": 8.598283273770192e-06, "loss": 0.42091506719589233, "step": 4178 }, { "epoch": 1.1355978260869566, "grad_norm": 1.2180295519047264, "learning_rate": 8.593834650557282e-06, "loss": 0.4966919422149658, "step": 4179 }, { "epoch": 1.1358695652173914, "grad_norm": 1.3434039647949874, "learning_rate": 8.589386311223393e-06, "loss": 0.4981215000152588, "step": 4180 }, { "epoch": 1.1361413043478261, "grad_norm": 1.0131135739661958, "learning_rate": 8.584938256666554e-06, "loss": 0.3217594027519226, "step": 4181 }, { "epoch": 1.1364130434782609, "grad_norm": 1.2054391141221654, "learning_rate": 8.580490487784752e-06, "loss": 0.41135111451148987, "step": 4182 }, { "epoch": 1.1366847826086957, "grad_norm": 1.1737687387440408, "learning_rate": 8.576043005475909e-06, "loss": 0.41423141956329346, "step": 4183 }, { "epoch": 1.1369565217391304, "grad_norm": 1.1504258661305566, "learning_rate": 8.571595810637884e-06, "loss": 0.39823034405708313, "step": 4184 }, { "epoch": 1.1372282608695652, "grad_norm": 1.1710818039478141, "learning_rate": 8.567148904168492e-06, "loss": 0.403386652469635, "step": 4185 }, { "epoch": 1.1375, "grad_norm": 0.8741207233705786, "learning_rate": 8.562702286965478e-06, "loss": 0.3208111822605133, "step": 4186 }, { "epoch": 1.1377717391304347, "grad_norm": 1.0545831137597934, "learning_rate": 8.558255959926533e-06, "loss": 0.42333734035491943, "step": 4187 }, { "epoch": 1.1380434782608695, "grad_norm": 0.7727303085045102, "learning_rate": 8.55380992394929e-06, "loss": 0.26050370931625366, "step": 4188 }, { "epoch": 1.1383152173913043, "grad_norm": 1.237715426849372, "learning_rate": 8.549364179931322e-06, "loss": 0.4971332550048828, "step": 4189 }, { "epoch": 1.1385869565217392, "grad_norm": 0.8315880681660583, "learning_rate": 8.544918728770142e-06, "loss": 0.2555876076221466, "step": 4190 }, { "epoch": 1.138858695652174, "grad_norm": 1.2147436885526222, "learning_rate": 8.540473571363207e-06, "loss": 0.4505554437637329, "step": 4191 }, { "epoch": 1.1391304347826088, "grad_norm": 1.1152757117647512, "learning_rate": 8.536028708607913e-06, "loss": 0.39921149611473083, "step": 4192 }, { "epoch": 1.1394021739130435, "grad_norm": 1.216062404363589, "learning_rate": 8.531584141401592e-06, "loss": 0.3974170684814453, "step": 4193 }, { "epoch": 1.1396739130434783, "grad_norm": 1.3129632744020885, "learning_rate": 8.527139870641526e-06, "loss": 0.4104669988155365, "step": 4194 }, { "epoch": 1.139945652173913, "grad_norm": 1.236304874391486, "learning_rate": 8.522695897224923e-06, "loss": 0.48305249214172363, "step": 4195 }, { "epoch": 1.1402173913043478, "grad_norm": 1.1216408915017757, "learning_rate": 8.518252222048957e-06, "loss": 0.4116753339767456, "step": 4196 }, { "epoch": 1.1404891304347826, "grad_norm": 1.1478158107646266, "learning_rate": 8.513808846010705e-06, "loss": 0.41644495725631714, "step": 4197 }, { "epoch": 1.1407608695652174, "grad_norm": 1.1561953919112822, "learning_rate": 8.50936577000722e-06, "loss": 0.42795681953430176, "step": 4198 }, { "epoch": 1.1410326086956522, "grad_norm": 1.1256531469257853, "learning_rate": 8.504922994935461e-06, "loss": 0.4022432565689087, "step": 4199 }, { "epoch": 1.141304347826087, "grad_norm": 1.1397159637902068, "learning_rate": 8.500480521692357e-06, "loss": 0.40911686420440674, "step": 4200 }, { "epoch": 1.1415760869565217, "grad_norm": 1.509494971384001, "learning_rate": 8.49603835117476e-06, "loss": 0.46275418996810913, "step": 4201 }, { "epoch": 1.1418478260869565, "grad_norm": 1.2360695486244178, "learning_rate": 8.491596484279452e-06, "loss": 0.5073287487030029, "step": 4202 }, { "epoch": 1.1421195652173912, "grad_norm": 1.25815197238508, "learning_rate": 8.487154921903177e-06, "loss": 0.48612111806869507, "step": 4203 }, { "epoch": 1.142391304347826, "grad_norm": 1.2314621317838415, "learning_rate": 8.482713664942597e-06, "loss": 0.44739556312561035, "step": 4204 }, { "epoch": 1.1426630434782608, "grad_norm": 1.4331141599830866, "learning_rate": 8.478272714294326e-06, "loss": 0.5255909562110901, "step": 4205 }, { "epoch": 1.1429347826086957, "grad_norm": 1.1279112012982244, "learning_rate": 8.473832070854905e-06, "loss": 0.4228530526161194, "step": 4206 }, { "epoch": 1.1432065217391305, "grad_norm": 1.3330988556137966, "learning_rate": 8.469391735520824e-06, "loss": 0.48275721073150635, "step": 4207 }, { "epoch": 1.1434782608695653, "grad_norm": 1.3460364570773393, "learning_rate": 8.464951709188503e-06, "loss": 0.46103227138519287, "step": 4208 }, { "epoch": 1.14375, "grad_norm": 1.211994568217999, "learning_rate": 8.4605119927543e-06, "loss": 0.42853039503097534, "step": 4209 }, { "epoch": 1.1440217391304348, "grad_norm": 1.2175027894644295, "learning_rate": 8.456072587114516e-06, "loss": 0.4770244359970093, "step": 4210 }, { "epoch": 1.1442934782608696, "grad_norm": 1.2721219464948175, "learning_rate": 8.451633493165381e-06, "loss": 0.4243274927139282, "step": 4211 }, { "epoch": 1.1445652173913043, "grad_norm": 1.1169223929571819, "learning_rate": 8.447194711803071e-06, "loss": 0.37410420179367065, "step": 4212 }, { "epoch": 1.1448369565217391, "grad_norm": 1.2015508008043703, "learning_rate": 8.442756243923692e-06, "loss": 0.4171571135520935, "step": 4213 }, { "epoch": 1.1451086956521739, "grad_norm": 1.1785432351843292, "learning_rate": 8.438318090423294e-06, "loss": 0.39481091499328613, "step": 4214 }, { "epoch": 1.1453804347826086, "grad_norm": 1.2154950005832228, "learning_rate": 8.433880252197849e-06, "loss": 0.4684731960296631, "step": 4215 }, { "epoch": 1.1456521739130434, "grad_norm": 1.1708795463262793, "learning_rate": 8.429442730143289e-06, "loss": 0.4142259359359741, "step": 4216 }, { "epoch": 1.1459239130434782, "grad_norm": 1.1373830362654682, "learning_rate": 8.425005525155459e-06, "loss": 0.4680030941963196, "step": 4217 }, { "epoch": 1.146195652173913, "grad_norm": 1.3167682960171239, "learning_rate": 8.420568638130146e-06, "loss": 0.4823928773403168, "step": 4218 }, { "epoch": 1.146467391304348, "grad_norm": 1.0840932453123346, "learning_rate": 8.416132069963088e-06, "loss": 0.4084441363811493, "step": 4219 }, { "epoch": 1.1467391304347827, "grad_norm": 1.0558108909168802, "learning_rate": 8.411695821549935e-06, "loss": 0.3354598879814148, "step": 4220 }, { "epoch": 1.1470108695652175, "grad_norm": 1.2081752909449013, "learning_rate": 8.407259893786292e-06, "loss": 0.4570372998714447, "step": 4221 }, { "epoch": 1.1472826086956522, "grad_norm": 1.1673011211101842, "learning_rate": 8.402824287567688e-06, "loss": 0.3356582224369049, "step": 4222 }, { "epoch": 1.147554347826087, "grad_norm": 1.1596847575279587, "learning_rate": 8.398389003789594e-06, "loss": 0.4053429365158081, "step": 4223 }, { "epoch": 1.1478260869565218, "grad_norm": 0.9571477676939588, "learning_rate": 8.393954043347408e-06, "loss": 0.3299229145050049, "step": 4224 }, { "epoch": 1.1480978260869565, "grad_norm": 1.0816955176944687, "learning_rate": 8.389519407136471e-06, "loss": 0.3999488651752472, "step": 4225 }, { "epoch": 1.1483695652173913, "grad_norm": 1.0321147063444913, "learning_rate": 8.385085096052053e-06, "loss": 0.39655303955078125, "step": 4226 }, { "epoch": 1.148641304347826, "grad_norm": 1.0807338585424746, "learning_rate": 8.38065111098936e-06, "loss": 0.4143466353416443, "step": 4227 }, { "epoch": 1.1489130434782608, "grad_norm": 1.1362899759822658, "learning_rate": 8.376217452843534e-06, "loss": 0.38777047395706177, "step": 4228 }, { "epoch": 1.1491847826086956, "grad_norm": 1.039791206639112, "learning_rate": 8.371784122509646e-06, "loss": 0.29954463243484497, "step": 4229 }, { "epoch": 1.1494565217391304, "grad_norm": 1.1563709882686506, "learning_rate": 8.367351120882708e-06, "loss": 0.4236874282360077, "step": 4230 }, { "epoch": 1.1497282608695651, "grad_norm": 1.2411591198939302, "learning_rate": 8.362918448857656e-06, "loss": 0.4377003610134125, "step": 4231 }, { "epoch": 1.15, "grad_norm": 1.147621702822627, "learning_rate": 8.358486107329374e-06, "loss": 0.4399021863937378, "step": 4232 }, { "epoch": 1.1502717391304347, "grad_norm": 1.2070307812352268, "learning_rate": 8.35405409719266e-06, "loss": 0.39449045062065125, "step": 4233 }, { "epoch": 1.1505434782608694, "grad_norm": 1.0985859553446755, "learning_rate": 8.349622419342269e-06, "loss": 0.3512219190597534, "step": 4234 }, { "epoch": 1.1508152173913044, "grad_norm": 1.2448258792241111, "learning_rate": 8.345191074672864e-06, "loss": 0.4118863046169281, "step": 4235 }, { "epoch": 1.1510869565217392, "grad_norm": 1.3892047878538312, "learning_rate": 8.340760064079054e-06, "loss": 0.4364975392818451, "step": 4236 }, { "epoch": 1.151358695652174, "grad_norm": 0.9964972054988054, "learning_rate": 8.336329388455386e-06, "loss": 0.35065191984176636, "step": 4237 }, { "epoch": 1.1516304347826087, "grad_norm": 1.0851881163593275, "learning_rate": 8.331899048696322e-06, "loss": 0.3442380428314209, "step": 4238 }, { "epoch": 1.1519021739130435, "grad_norm": 1.0157451630312109, "learning_rate": 8.327469045696275e-06, "loss": 0.4176305830478668, "step": 4239 }, { "epoch": 1.1521739130434783, "grad_norm": 1.1813902033207024, "learning_rate": 8.323039380349575e-06, "loss": 0.394558310508728, "step": 4240 }, { "epoch": 1.152445652173913, "grad_norm": 1.107941445931905, "learning_rate": 8.318610053550498e-06, "loss": 0.3609755039215088, "step": 4241 }, { "epoch": 1.1527173913043478, "grad_norm": 1.2029671836305473, "learning_rate": 8.314181066193237e-06, "loss": 0.4824795722961426, "step": 4242 }, { "epoch": 1.1529891304347826, "grad_norm": 1.078929123914596, "learning_rate": 8.309752419171926e-06, "loss": 0.3643118739128113, "step": 4243 }, { "epoch": 1.1532608695652173, "grad_norm": 1.0969612195581557, "learning_rate": 8.305324113380629e-06, "loss": 0.39782464504241943, "step": 4244 }, { "epoch": 1.153532608695652, "grad_norm": 1.1470334741410937, "learning_rate": 8.300896149713334e-06, "loss": 0.37564709782600403, "step": 4245 }, { "epoch": 1.1538043478260869, "grad_norm": 1.098697473819547, "learning_rate": 8.296468529063974e-06, "loss": 0.42006948590278625, "step": 4246 }, { "epoch": 1.1540760869565219, "grad_norm": 1.052260774279434, "learning_rate": 8.292041252326397e-06, "loss": 0.34782806038856506, "step": 4247 }, { "epoch": 1.1543478260869566, "grad_norm": 1.2318324745301825, "learning_rate": 8.287614320394395e-06, "loss": 0.47146421670913696, "step": 4248 }, { "epoch": 1.1546195652173914, "grad_norm": 1.155913916390136, "learning_rate": 8.28318773416168e-06, "loss": 0.42368754744529724, "step": 4249 }, { "epoch": 1.1548913043478262, "grad_norm": 1.3305104265668493, "learning_rate": 8.278761494521902e-06, "loss": 0.46677935123443604, "step": 4250 }, { "epoch": 1.155163043478261, "grad_norm": 1.3960760409309745, "learning_rate": 8.274335602368635e-06, "loss": 0.46352407336235046, "step": 4251 }, { "epoch": 1.1554347826086957, "grad_norm": 1.0747983016713085, "learning_rate": 8.269910058595385e-06, "loss": 0.367190957069397, "step": 4252 }, { "epoch": 1.1557065217391305, "grad_norm": 1.235233992617292, "learning_rate": 8.265484864095594e-06, "loss": 0.4633215665817261, "step": 4253 }, { "epoch": 1.1559782608695652, "grad_norm": 1.0628066821513145, "learning_rate": 8.261060019762617e-06, "loss": 0.36876553297042847, "step": 4254 }, { "epoch": 1.15625, "grad_norm": 1.1491904659439611, "learning_rate": 8.256635526489763e-06, "loss": 0.41359883546829224, "step": 4255 }, { "epoch": 1.1565217391304348, "grad_norm": 1.1607485744142099, "learning_rate": 8.252211385170242e-06, "loss": 0.37880322337150574, "step": 4256 }, { "epoch": 1.1567934782608695, "grad_norm": 1.1378964544124703, "learning_rate": 8.247787596697217e-06, "loss": 0.40053319931030273, "step": 4257 }, { "epoch": 1.1570652173913043, "grad_norm": 1.0893608864813304, "learning_rate": 8.243364161963766e-06, "loss": 0.41248950362205505, "step": 4258 }, { "epoch": 1.157336956521739, "grad_norm": 1.1823449135425301, "learning_rate": 8.2389410818629e-06, "loss": 0.47029343247413635, "step": 4259 }, { "epoch": 1.1576086956521738, "grad_norm": 1.1282182510693304, "learning_rate": 8.23451835728756e-06, "loss": 0.4181150794029236, "step": 4260 }, { "epoch": 1.1578804347826086, "grad_norm": 1.2734014339313482, "learning_rate": 8.230095989130607e-06, "loss": 0.5086780786514282, "step": 4261 }, { "epoch": 1.1581521739130434, "grad_norm": 1.1517469512404437, "learning_rate": 8.225673978284842e-06, "loss": 0.36102306842803955, "step": 4262 }, { "epoch": 1.1584239130434781, "grad_norm": 1.1854736919133189, "learning_rate": 8.221252325642986e-06, "loss": 0.3726850748062134, "step": 4263 }, { "epoch": 1.1586956521739131, "grad_norm": 1.0821222704000437, "learning_rate": 8.216831032097689e-06, "loss": 0.38897818326950073, "step": 4264 }, { "epoch": 1.158967391304348, "grad_norm": 1.370694467118523, "learning_rate": 8.212410098541528e-06, "loss": 0.3973499834537506, "step": 4265 }, { "epoch": 1.1592391304347827, "grad_norm": 1.1720649911756738, "learning_rate": 8.207989525867014e-06, "loss": 0.38945087790489197, "step": 4266 }, { "epoch": 1.1595108695652174, "grad_norm": 1.1624565235654445, "learning_rate": 8.203569314966571e-06, "loss": 0.41958338022232056, "step": 4267 }, { "epoch": 1.1597826086956522, "grad_norm": 1.0677485447276827, "learning_rate": 8.199149466732565e-06, "loss": 0.3840898275375366, "step": 4268 }, { "epoch": 1.160054347826087, "grad_norm": 1.102219756410182, "learning_rate": 8.194729982057283e-06, "loss": 0.3732724189758301, "step": 4269 }, { "epoch": 1.1603260869565217, "grad_norm": 1.1637385734411991, "learning_rate": 8.190310861832931e-06, "loss": 0.4158473610877991, "step": 4270 }, { "epoch": 1.1605978260869565, "grad_norm": 1.330136438834125, "learning_rate": 8.185892106951657e-06, "loss": 0.5158302783966064, "step": 4271 }, { "epoch": 1.1608695652173913, "grad_norm": 1.091596243940107, "learning_rate": 8.181473718305519e-06, "loss": 0.315104603767395, "step": 4272 }, { "epoch": 1.161141304347826, "grad_norm": 1.180481610003668, "learning_rate": 8.177055696786516e-06, "loss": 0.4221454858779907, "step": 4273 }, { "epoch": 1.1614130434782608, "grad_norm": 1.0493684768546103, "learning_rate": 8.172638043286558e-06, "loss": 0.3706870377063751, "step": 4274 }, { "epoch": 1.1616847826086956, "grad_norm": 1.2582841232353683, "learning_rate": 8.168220758697494e-06, "loss": 0.48346298933029175, "step": 4275 }, { "epoch": 1.1619565217391306, "grad_norm": 1.2004229484450892, "learning_rate": 8.163803843911092e-06, "loss": 0.47593775391578674, "step": 4276 }, { "epoch": 1.1622282608695653, "grad_norm": 1.1226118495426243, "learning_rate": 8.159387299819044e-06, "loss": 0.39125728607177734, "step": 4277 }, { "epoch": 1.1625, "grad_norm": 1.0842878779491243, "learning_rate": 8.154971127312973e-06, "loss": 0.38933855295181274, "step": 4278 }, { "epoch": 1.1627717391304349, "grad_norm": 1.1247128267446869, "learning_rate": 8.150555327284417e-06, "loss": 0.4091416895389557, "step": 4279 }, { "epoch": 1.1630434782608696, "grad_norm": 1.2788604390114506, "learning_rate": 8.146139900624852e-06, "loss": 0.455208420753479, "step": 4280 }, { "epoch": 1.1633152173913044, "grad_norm": 1.1931125014637574, "learning_rate": 8.141724848225666e-06, "loss": 0.3711777329444885, "step": 4281 }, { "epoch": 1.1635869565217392, "grad_norm": 1.2338492047690879, "learning_rate": 8.137310170978183e-06, "loss": 0.4585084915161133, "step": 4282 }, { "epoch": 1.163858695652174, "grad_norm": 1.1072556069349813, "learning_rate": 8.132895869773638e-06, "loss": 0.41704851388931274, "step": 4283 }, { "epoch": 1.1641304347826087, "grad_norm": 1.061352449478121, "learning_rate": 8.128481945503206e-06, "loss": 0.35813024640083313, "step": 4284 }, { "epoch": 1.1644021739130435, "grad_norm": 1.0395807502315741, "learning_rate": 8.12406839905797e-06, "loss": 0.3431923985481262, "step": 4285 }, { "epoch": 1.1646739130434782, "grad_norm": 1.1820880487502727, "learning_rate": 8.119655231328948e-06, "loss": 0.3837331533432007, "step": 4286 }, { "epoch": 1.164945652173913, "grad_norm": 1.077335587397852, "learning_rate": 8.115242443207079e-06, "loss": 0.38928836584091187, "step": 4287 }, { "epoch": 1.1652173913043478, "grad_norm": 1.3012511209457307, "learning_rate": 8.110830035583218e-06, "loss": 0.46836617588996887, "step": 4288 }, { "epoch": 1.1654891304347825, "grad_norm": 1.1543431946032645, "learning_rate": 8.106418009348157e-06, "loss": 0.363735556602478, "step": 4289 }, { "epoch": 1.1657608695652173, "grad_norm": 0.9243998921741342, "learning_rate": 8.102006365392594e-06, "loss": 0.3445422649383545, "step": 4290 }, { "epoch": 1.166032608695652, "grad_norm": 1.0698846849702655, "learning_rate": 8.097595104607173e-06, "loss": 0.4262118935585022, "step": 4291 }, { "epoch": 1.166304347826087, "grad_norm": 1.0680777300130888, "learning_rate": 8.09318422788243e-06, "loss": 0.36220741271972656, "step": 4292 }, { "epoch": 1.1665760869565218, "grad_norm": 0.9928049338268025, "learning_rate": 8.088773736108856e-06, "loss": 0.2861364185810089, "step": 4293 }, { "epoch": 1.1668478260869566, "grad_norm": 1.0116824726420266, "learning_rate": 8.08436363017684e-06, "loss": 0.4153579771518707, "step": 4294 }, { "epoch": 1.1671195652173914, "grad_norm": 1.4110385959332543, "learning_rate": 8.079953910976696e-06, "loss": 0.5149142742156982, "step": 4295 }, { "epoch": 1.1673913043478261, "grad_norm": 1.2483239503414736, "learning_rate": 8.075544579398679e-06, "loss": 0.45109227299690247, "step": 4296 }, { "epoch": 1.1676630434782609, "grad_norm": 1.1926186173466524, "learning_rate": 8.071135636332938e-06, "loss": 0.5026606917381287, "step": 4297 }, { "epoch": 1.1679347826086957, "grad_norm": 1.1380813297370957, "learning_rate": 8.066727082669573e-06, "loss": 0.36225759983062744, "step": 4298 }, { "epoch": 1.1682065217391304, "grad_norm": 1.296539698609144, "learning_rate": 8.062318919298576e-06, "loss": 0.43132370710372925, "step": 4299 }, { "epoch": 1.1684782608695652, "grad_norm": 1.188040654057191, "learning_rate": 8.057911147109885e-06, "loss": 0.4003998041152954, "step": 4300 }, { "epoch": 1.16875, "grad_norm": 1.3275435617935565, "learning_rate": 8.05350376699334e-06, "loss": 0.4805954098701477, "step": 4301 }, { "epoch": 1.1690217391304347, "grad_norm": 1.111799639644297, "learning_rate": 8.04909677983872e-06, "loss": 0.4276447296142578, "step": 4302 }, { "epoch": 1.1692934782608695, "grad_norm": 1.2713894282732543, "learning_rate": 8.044690186535707e-06, "loss": 0.5214360356330872, "step": 4303 }, { "epoch": 1.1695652173913043, "grad_norm": 1.1561756317228655, "learning_rate": 8.040283987973912e-06, "loss": 0.3894931674003601, "step": 4304 }, { "epoch": 1.1698369565217392, "grad_norm": 1.1575933279411454, "learning_rate": 8.035878185042869e-06, "loss": 0.4213980436325073, "step": 4305 }, { "epoch": 1.170108695652174, "grad_norm": 1.09886109570063, "learning_rate": 8.031472778632026e-06, "loss": 0.3575391471385956, "step": 4306 }, { "epoch": 1.1703804347826088, "grad_norm": 1.1849944044364713, "learning_rate": 8.027067769630756e-06, "loss": 0.3755849599838257, "step": 4307 }, { "epoch": 1.1706521739130435, "grad_norm": 1.1391715653122816, "learning_rate": 8.022663158928345e-06, "loss": 0.45291852951049805, "step": 4308 }, { "epoch": 1.1709239130434783, "grad_norm": 0.9977140772648055, "learning_rate": 8.018258947414015e-06, "loss": 0.3686177134513855, "step": 4309 }, { "epoch": 1.171195652173913, "grad_norm": 1.1695949917139612, "learning_rate": 8.01385513597688e-06, "loss": 0.3902242183685303, "step": 4310 }, { "epoch": 1.1714673913043478, "grad_norm": 1.1715016525157635, "learning_rate": 8.009451725506004e-06, "loss": 0.3688393831253052, "step": 4311 }, { "epoch": 1.1717391304347826, "grad_norm": 1.2980011645931122, "learning_rate": 8.005048716890343e-06, "loss": 0.4708235263824463, "step": 4312 }, { "epoch": 1.1720108695652174, "grad_norm": 1.2865402465662263, "learning_rate": 8.000646111018786e-06, "loss": 0.5455511808395386, "step": 4313 }, { "epoch": 1.1722826086956522, "grad_norm": 1.15015460001937, "learning_rate": 7.996243908780146e-06, "loss": 0.4277068078517914, "step": 4314 }, { "epoch": 1.172554347826087, "grad_norm": 1.2500465246502401, "learning_rate": 7.991842111063135e-06, "loss": 0.46472179889678955, "step": 4315 }, { "epoch": 1.1728260869565217, "grad_norm": 1.0823379861055322, "learning_rate": 7.987440718756404e-06, "loss": 0.37357297539711, "step": 4316 }, { "epoch": 1.1730978260869565, "grad_norm": 1.1434865111090384, "learning_rate": 7.983039732748514e-06, "loss": 0.48014143109321594, "step": 4317 }, { "epoch": 1.1733695652173912, "grad_norm": 1.1182831614727717, "learning_rate": 7.978639153927938e-06, "loss": 0.4685404300689697, "step": 4318 }, { "epoch": 1.173641304347826, "grad_norm": 1.1476896574156434, "learning_rate": 7.974238983183076e-06, "loss": 0.4406668543815613, "step": 4319 }, { "epoch": 1.1739130434782608, "grad_norm": 1.2432962500762779, "learning_rate": 7.96983922140224e-06, "loss": 0.4355163872241974, "step": 4320 }, { "epoch": 1.1741847826086957, "grad_norm": 1.1222422640273988, "learning_rate": 7.965439869473664e-06, "loss": 0.41022512316703796, "step": 4321 }, { "epoch": 1.1744565217391305, "grad_norm": 1.250699653644952, "learning_rate": 7.961040928285492e-06, "loss": 0.48851078748703003, "step": 4322 }, { "epoch": 1.1747282608695653, "grad_norm": 1.297348941597779, "learning_rate": 7.956642398725794e-06, "loss": 0.49016088247299194, "step": 4323 }, { "epoch": 1.175, "grad_norm": 1.1746109685655242, "learning_rate": 7.952244281682548e-06, "loss": 0.4435293674468994, "step": 4324 }, { "epoch": 1.1752717391304348, "grad_norm": 1.3731572837845687, "learning_rate": 7.947846578043658e-06, "loss": 0.422674298286438, "step": 4325 }, { "epoch": 1.1755434782608696, "grad_norm": 0.9621657512056156, "learning_rate": 7.943449288696938e-06, "loss": 0.3635541796684265, "step": 4326 }, { "epoch": 1.1758152173913043, "grad_norm": 1.1823851063212893, "learning_rate": 7.93905241453012e-06, "loss": 0.4091692268848419, "step": 4327 }, { "epoch": 1.1760869565217391, "grad_norm": 1.1928100610647925, "learning_rate": 7.934655956430851e-06, "loss": 0.5088236927986145, "step": 4328 }, { "epoch": 1.1763586956521739, "grad_norm": 1.0026988331509756, "learning_rate": 7.930259915286695e-06, "loss": 0.3356802761554718, "step": 4329 }, { "epoch": 1.1766304347826086, "grad_norm": 1.1965841739000835, "learning_rate": 7.925864291985137e-06, "loss": 0.42621544003486633, "step": 4330 }, { "epoch": 1.1769021739130434, "grad_norm": 1.352189683014216, "learning_rate": 7.921469087413563e-06, "loss": 0.5284194946289062, "step": 4331 }, { "epoch": 1.1771739130434782, "grad_norm": 1.2000967008635137, "learning_rate": 7.917074302459299e-06, "loss": 0.39503633975982666, "step": 4332 }, { "epoch": 1.177445652173913, "grad_norm": 1.0514064793227598, "learning_rate": 7.912679938009557e-06, "loss": 0.3588331639766693, "step": 4333 }, { "epoch": 1.177717391304348, "grad_norm": 1.0204631529327244, "learning_rate": 7.908285994951487e-06, "loss": 0.35951995849609375, "step": 4334 }, { "epoch": 1.1779891304347827, "grad_norm": 1.2177035952547326, "learning_rate": 7.903892474172143e-06, "loss": 0.40673890709877014, "step": 4335 }, { "epoch": 1.1782608695652175, "grad_norm": 0.9071789963082633, "learning_rate": 7.899499376558499e-06, "loss": 0.35448527336120605, "step": 4336 }, { "epoch": 1.1785326086956522, "grad_norm": 1.1365664508855997, "learning_rate": 7.895106702997437e-06, "loss": 0.4078065752983093, "step": 4337 }, { "epoch": 1.178804347826087, "grad_norm": 1.124508414835278, "learning_rate": 7.890714454375758e-06, "loss": 0.3723253011703491, "step": 4338 }, { "epoch": 1.1790760869565218, "grad_norm": 1.3048977073914778, "learning_rate": 7.88632263158018e-06, "loss": 0.49105000495910645, "step": 4339 }, { "epoch": 1.1793478260869565, "grad_norm": 1.32826844877133, "learning_rate": 7.881931235497324e-06, "loss": 0.5072579979896545, "step": 4340 }, { "epoch": 1.1796195652173913, "grad_norm": 1.1305728097997838, "learning_rate": 7.877540267013743e-06, "loss": 0.3391415476799011, "step": 4341 }, { "epoch": 1.179891304347826, "grad_norm": 1.1331061393751867, "learning_rate": 7.873149727015883e-06, "loss": 0.41589879989624023, "step": 4342 }, { "epoch": 1.1801630434782608, "grad_norm": 1.3332607190553636, "learning_rate": 7.868759616390122e-06, "loss": 0.4775891900062561, "step": 4343 }, { "epoch": 1.1804347826086956, "grad_norm": 1.2159848497013577, "learning_rate": 7.864369936022736e-06, "loss": 0.5166662335395813, "step": 4344 }, { "epoch": 1.1807065217391304, "grad_norm": 1.2128902327551656, "learning_rate": 7.859980686799926e-06, "loss": 0.4248253107070923, "step": 4345 }, { "epoch": 1.1809782608695651, "grad_norm": 1.0986806115219703, "learning_rate": 7.855591869607799e-06, "loss": 0.3748146593570709, "step": 4346 }, { "epoch": 1.18125, "grad_norm": 1.202693887973726, "learning_rate": 7.851203485332374e-06, "loss": 0.42865878343582153, "step": 4347 }, { "epoch": 1.1815217391304347, "grad_norm": 1.1709061524048652, "learning_rate": 7.846815534859592e-06, "loss": 0.4292272925376892, "step": 4348 }, { "epoch": 1.1817934782608694, "grad_norm": 1.1260807178793735, "learning_rate": 7.842428019075288e-06, "loss": 0.33075088262557983, "step": 4349 }, { "epoch": 1.1820652173913044, "grad_norm": 1.2894552720969623, "learning_rate": 7.838040938865238e-06, "loss": 0.42448848485946655, "step": 4350 }, { "epoch": 1.1823369565217392, "grad_norm": 1.2212797616779, "learning_rate": 7.833654295115096e-06, "loss": 0.446902334690094, "step": 4351 }, { "epoch": 1.182608695652174, "grad_norm": 1.1394475955298555, "learning_rate": 7.82926808871046e-06, "loss": 0.4826338589191437, "step": 4352 }, { "epoch": 1.1828804347826087, "grad_norm": 1.1369824796095103, "learning_rate": 7.824882320536814e-06, "loss": 0.43613725900650024, "step": 4353 }, { "epoch": 1.1831521739130435, "grad_norm": 1.473494316924062, "learning_rate": 7.82049699147957e-06, "loss": 0.5722991228103638, "step": 4354 }, { "epoch": 1.1834239130434783, "grad_norm": 1.1268551977458099, "learning_rate": 7.816112102424043e-06, "loss": 0.3342337906360626, "step": 4355 }, { "epoch": 1.183695652173913, "grad_norm": 1.1737653704681688, "learning_rate": 7.81172765425546e-06, "loss": 0.4013930559158325, "step": 4356 }, { "epoch": 1.1839673913043478, "grad_norm": 1.3083291919893942, "learning_rate": 7.807343647858965e-06, "loss": 0.47370433807373047, "step": 4357 }, { "epoch": 1.1842391304347826, "grad_norm": 1.2442973145308216, "learning_rate": 7.802960084119606e-06, "loss": 0.4556558132171631, "step": 4358 }, { "epoch": 1.1845108695652173, "grad_norm": 1.1401403095812865, "learning_rate": 7.798576963922347e-06, "loss": 0.4346769452095032, "step": 4359 }, { "epoch": 1.184782608695652, "grad_norm": 1.2179915022855028, "learning_rate": 7.794194288152053e-06, "loss": 0.4661819338798523, "step": 4360 }, { "epoch": 1.1850543478260869, "grad_norm": 1.1006465094369113, "learning_rate": 7.789812057693514e-06, "loss": 0.3829277753829956, "step": 4361 }, { "epoch": 1.1853260869565219, "grad_norm": 1.0208399128246368, "learning_rate": 7.785430273431416e-06, "loss": 0.3757920265197754, "step": 4362 }, { "epoch": 1.1855978260869566, "grad_norm": 1.1963977377673698, "learning_rate": 7.781048936250362e-06, "loss": 0.40387046337127686, "step": 4363 }, { "epoch": 1.1858695652173914, "grad_norm": 1.2729596554423184, "learning_rate": 7.77666804703487e-06, "loss": 0.4114355444908142, "step": 4364 }, { "epoch": 1.1861413043478262, "grad_norm": 1.1820871085622662, "learning_rate": 7.77228760666935e-06, "loss": 0.4357169270515442, "step": 4365 }, { "epoch": 1.186413043478261, "grad_norm": 1.0423122530901263, "learning_rate": 7.767907616038143e-06, "loss": 0.38025856018066406, "step": 4366 }, { "epoch": 1.1866847826086957, "grad_norm": 1.0360624974589174, "learning_rate": 7.763528076025479e-06, "loss": 0.38484352827072144, "step": 4367 }, { "epoch": 1.1869565217391305, "grad_norm": 1.2143931658003337, "learning_rate": 7.759148987515521e-06, "loss": 0.38778889179229736, "step": 4368 }, { "epoch": 1.1872282608695652, "grad_norm": 1.204252508097262, "learning_rate": 7.754770351392311e-06, "loss": 0.45961248874664307, "step": 4369 }, { "epoch": 1.1875, "grad_norm": 1.3531159832970276, "learning_rate": 7.750392168539827e-06, "loss": 0.522977352142334, "step": 4370 }, { "epoch": 1.1877717391304348, "grad_norm": 0.853408457314545, "learning_rate": 7.746014439841941e-06, "loss": 0.2912319600582123, "step": 4371 }, { "epoch": 1.1880434782608695, "grad_norm": 1.206007408813775, "learning_rate": 7.741637166182428e-06, "loss": 0.4228270649909973, "step": 4372 }, { "epoch": 1.1883152173913043, "grad_norm": 1.1637404808629486, "learning_rate": 7.737260348444992e-06, "loss": 0.4359150826931, "step": 4373 }, { "epoch": 1.188586956521739, "grad_norm": 1.2299872204109068, "learning_rate": 7.732883987513223e-06, "loss": 0.48672500252723694, "step": 4374 }, { "epoch": 1.1888586956521738, "grad_norm": 1.1471124571271243, "learning_rate": 7.728508084270635e-06, "loss": 0.415910005569458, "step": 4375 }, { "epoch": 1.1891304347826086, "grad_norm": 0.9752288722945633, "learning_rate": 7.724132639600635e-06, "loss": 0.3131738305091858, "step": 4376 }, { "epoch": 1.1894021739130434, "grad_norm": 1.0882438531046252, "learning_rate": 7.719757654386551e-06, "loss": 0.34068700671195984, "step": 4377 }, { "epoch": 1.1896739130434781, "grad_norm": 1.1919686086280583, "learning_rate": 7.71538312951161e-06, "loss": 0.434003621339798, "step": 4378 }, { "epoch": 1.1899456521739131, "grad_norm": 0.9728511745192292, "learning_rate": 7.71100906585895e-06, "loss": 0.34114885330200195, "step": 4379 }, { "epoch": 1.190217391304348, "grad_norm": 1.2807584124943328, "learning_rate": 7.706635464311614e-06, "loss": 0.5039771199226379, "step": 4380 }, { "epoch": 1.1904891304347827, "grad_norm": 0.9051800081575611, "learning_rate": 7.70226232575255e-06, "loss": 0.30839598178863525, "step": 4381 }, { "epoch": 1.1907608695652174, "grad_norm": 1.1957072802008908, "learning_rate": 7.697889651064615e-06, "loss": 0.4827585220336914, "step": 4382 }, { "epoch": 1.1910326086956522, "grad_norm": 1.2029449637393193, "learning_rate": 7.693517441130573e-06, "loss": 0.41547590494155884, "step": 4383 }, { "epoch": 1.191304347826087, "grad_norm": 0.993805921327066, "learning_rate": 7.689145696833095e-06, "loss": 0.3489300608634949, "step": 4384 }, { "epoch": 1.1915760869565217, "grad_norm": 1.1031359045984557, "learning_rate": 7.684774419054748e-06, "loss": 0.3864593505859375, "step": 4385 }, { "epoch": 1.1918478260869565, "grad_norm": 1.256449774708891, "learning_rate": 7.680403608678027e-06, "loss": 0.44129443168640137, "step": 4386 }, { "epoch": 1.1921195652173913, "grad_norm": 1.4514727492483026, "learning_rate": 7.676033266585302e-06, "loss": 0.5155380964279175, "step": 4387 }, { "epoch": 1.192391304347826, "grad_norm": 1.0706827319132928, "learning_rate": 7.671663393658878e-06, "loss": 0.38744068145751953, "step": 4388 }, { "epoch": 1.1926630434782608, "grad_norm": 1.1504301233241603, "learning_rate": 7.66729399078095e-06, "loss": 0.39991849660873413, "step": 4389 }, { "epoch": 1.1929347826086956, "grad_norm": 1.0382101021364858, "learning_rate": 7.66292505883361e-06, "loss": 0.30193567276000977, "step": 4390 }, { "epoch": 1.1932065217391306, "grad_norm": 1.2413338190857786, "learning_rate": 7.658556598698879e-06, "loss": 0.5026373863220215, "step": 4391 }, { "epoch": 1.1934782608695653, "grad_norm": 1.2098973714074845, "learning_rate": 7.65418861125866e-06, "loss": 0.4371632933616638, "step": 4392 }, { "epoch": 1.19375, "grad_norm": 1.1372144714939971, "learning_rate": 7.649821097394776e-06, "loss": 0.3645434081554413, "step": 4393 }, { "epoch": 1.1940217391304349, "grad_norm": 1.3595231211209098, "learning_rate": 7.645454057988942e-06, "loss": 0.46628913283348083, "step": 4394 }, { "epoch": 1.1942934782608696, "grad_norm": 1.1389293767109305, "learning_rate": 7.641087493922789e-06, "loss": 0.36330729722976685, "step": 4395 }, { "epoch": 1.1945652173913044, "grad_norm": 1.2332560374608736, "learning_rate": 7.63672140607784e-06, "loss": 0.46722161769866943, "step": 4396 }, { "epoch": 1.1948369565217392, "grad_norm": 1.2498106949184555, "learning_rate": 7.632355795335533e-06, "loss": 0.428483247756958, "step": 4397 }, { "epoch": 1.195108695652174, "grad_norm": 1.1789859770868913, "learning_rate": 7.6279906625772045e-06, "loss": 0.4368427097797394, "step": 4398 }, { "epoch": 1.1953804347826087, "grad_norm": 1.2100134330913717, "learning_rate": 7.623626008684092e-06, "loss": 0.3657945692539215, "step": 4399 }, { "epoch": 1.1956521739130435, "grad_norm": 1.2881568074467171, "learning_rate": 7.619261834537341e-06, "loss": 0.48856398463249207, "step": 4400 }, { "epoch": 1.1959239130434782, "grad_norm": 1.0352492280514127, "learning_rate": 7.6148981410179966e-06, "loss": 0.39108458161354065, "step": 4401 }, { "epoch": 1.196195652173913, "grad_norm": 1.0681145432028805, "learning_rate": 7.610534929007013e-06, "loss": 0.3359427750110626, "step": 4402 }, { "epoch": 1.1964673913043478, "grad_norm": 1.091513358201481, "learning_rate": 7.6061721993852346e-06, "loss": 0.40412771701812744, "step": 4403 }, { "epoch": 1.1967391304347825, "grad_norm": 1.1351428715056684, "learning_rate": 7.601809953033428e-06, "loss": 0.3855108618736267, "step": 4404 }, { "epoch": 1.1970108695652173, "grad_norm": 1.1646135426124378, "learning_rate": 7.597448190832242e-06, "loss": 0.39448118209838867, "step": 4405 }, { "epoch": 1.197282608695652, "grad_norm": 1.1769596799170152, "learning_rate": 7.593086913662235e-06, "loss": 0.4059000015258789, "step": 4406 }, { "epoch": 1.197554347826087, "grad_norm": 1.3509697631367272, "learning_rate": 7.588726122403879e-06, "loss": 0.4764646291732788, "step": 4407 }, { "epoch": 1.1978260869565218, "grad_norm": 1.193101979519218, "learning_rate": 7.584365817937525e-06, "loss": 0.4381486177444458, "step": 4408 }, { "epoch": 1.1980978260869566, "grad_norm": 1.24277968072625, "learning_rate": 7.580006001143452e-06, "loss": 0.42288559675216675, "step": 4409 }, { "epoch": 1.1983695652173914, "grad_norm": 1.1292568757649386, "learning_rate": 7.575646672901814e-06, "loss": 0.4065534472465515, "step": 4410 }, { "epoch": 1.1986413043478261, "grad_norm": 1.3102412623924662, "learning_rate": 7.5712878340926895e-06, "loss": 0.5535716414451599, "step": 4411 }, { "epoch": 1.1989130434782609, "grad_norm": 1.2661043835226968, "learning_rate": 7.566929485596044e-06, "loss": 0.40005171298980713, "step": 4412 }, { "epoch": 1.1991847826086957, "grad_norm": 1.198415973578944, "learning_rate": 7.56257162829175e-06, "loss": 0.38404202461242676, "step": 4413 }, { "epoch": 1.1994565217391304, "grad_norm": 1.2687461502375583, "learning_rate": 7.5582142630595766e-06, "loss": 0.5086624622344971, "step": 4414 }, { "epoch": 1.1997282608695652, "grad_norm": 1.2195444831570035, "learning_rate": 7.553857390779195e-06, "loss": 0.3487051725387573, "step": 4415 }, { "epoch": 1.2, "grad_norm": 1.1326680818299693, "learning_rate": 7.549501012330184e-06, "loss": 0.43404585123062134, "step": 4416 }, { "epoch": 1.2002717391304347, "grad_norm": 1.234123555789356, "learning_rate": 7.545145128592009e-06, "loss": 0.4464271068572998, "step": 4417 }, { "epoch": 1.2005434782608695, "grad_norm": 1.1061721138988845, "learning_rate": 7.54078974044405e-06, "loss": 0.3418448567390442, "step": 4418 }, { "epoch": 1.2008152173913043, "grad_norm": 1.2479573477437307, "learning_rate": 7.5364348487655735e-06, "loss": 0.458670973777771, "step": 4419 }, { "epoch": 1.2010869565217392, "grad_norm": 1.1966265972721213, "learning_rate": 7.5320804544357585e-06, "loss": 0.43746498227119446, "step": 4420 }, { "epoch": 1.201358695652174, "grad_norm": 1.1565437041712945, "learning_rate": 7.52772655833367e-06, "loss": 0.40317094326019287, "step": 4421 }, { "epoch": 1.2016304347826088, "grad_norm": 1.2515376784524863, "learning_rate": 7.52337316133829e-06, "loss": 0.44827380776405334, "step": 4422 }, { "epoch": 1.2019021739130435, "grad_norm": 1.1166403374804161, "learning_rate": 7.519020264328483e-06, "loss": 0.43531471490859985, "step": 4423 }, { "epoch": 1.2021739130434783, "grad_norm": 1.1284558297614529, "learning_rate": 7.514667868183017e-06, "loss": 0.38150060176849365, "step": 4424 }, { "epoch": 1.202445652173913, "grad_norm": 1.0101219062856261, "learning_rate": 7.5103159737805656e-06, "loss": 0.3763367831707001, "step": 4425 }, { "epoch": 1.2027173913043478, "grad_norm": 1.090285109985126, "learning_rate": 7.505964581999692e-06, "loss": 0.3804006576538086, "step": 4426 }, { "epoch": 1.2029891304347826, "grad_norm": 1.106560923189038, "learning_rate": 7.501613693718871e-06, "loss": 0.46228766441345215, "step": 4427 }, { "epoch": 1.2032608695652174, "grad_norm": 1.1175494154020371, "learning_rate": 7.497263309816456e-06, "loss": 0.44675904512405396, "step": 4428 }, { "epoch": 1.2035326086956522, "grad_norm": 1.2224689302554324, "learning_rate": 7.492913431170719e-06, "loss": 0.4061375558376312, "step": 4429 }, { "epoch": 1.203804347826087, "grad_norm": 1.1889313670220267, "learning_rate": 7.488564058659815e-06, "loss": 0.3678644299507141, "step": 4430 }, { "epoch": 1.2040760869565217, "grad_norm": 1.099093292275005, "learning_rate": 7.484215193161808e-06, "loss": 0.41124850511550903, "step": 4431 }, { "epoch": 1.2043478260869565, "grad_norm": 0.9636129932557487, "learning_rate": 7.47986683555465e-06, "loss": 0.34380656480789185, "step": 4432 }, { "epoch": 1.2046195652173912, "grad_norm": 1.1293710913622863, "learning_rate": 7.475518986716193e-06, "loss": 0.41987481713294983, "step": 4433 }, { "epoch": 1.204891304347826, "grad_norm": 1.244882980572049, "learning_rate": 7.471171647524195e-06, "loss": 0.4889688491821289, "step": 4434 }, { "epoch": 1.2051630434782608, "grad_norm": 1.3721607765288655, "learning_rate": 7.466824818856296e-06, "loss": 0.46192502975463867, "step": 4435 }, { "epoch": 1.2054347826086957, "grad_norm": 1.0950170838071362, "learning_rate": 7.462478501590049e-06, "loss": 0.35799896717071533, "step": 4436 }, { "epoch": 1.2057065217391305, "grad_norm": 1.2876353055030085, "learning_rate": 7.458132696602887e-06, "loss": 0.4162302017211914, "step": 4437 }, { "epoch": 1.2059782608695653, "grad_norm": 1.1582024344682227, "learning_rate": 7.453787404772157e-06, "loss": 0.3834682106971741, "step": 4438 }, { "epoch": 1.20625, "grad_norm": 1.2731023307482712, "learning_rate": 7.449442626975089e-06, "loss": 0.37400656938552856, "step": 4439 }, { "epoch": 1.2065217391304348, "grad_norm": 1.140112873393068, "learning_rate": 7.445098364088815e-06, "loss": 0.44910016655921936, "step": 4440 }, { "epoch": 1.2067934782608696, "grad_norm": 1.1336372967567523, "learning_rate": 7.440754616990363e-06, "loss": 0.35345232486724854, "step": 4441 }, { "epoch": 1.2070652173913043, "grad_norm": 1.2172386752062305, "learning_rate": 7.436411386556652e-06, "loss": 0.38556981086730957, "step": 4442 }, { "epoch": 1.2073369565217391, "grad_norm": 1.1889239843836827, "learning_rate": 7.432068673664507e-06, "loss": 0.42114371061325073, "step": 4443 }, { "epoch": 1.2076086956521739, "grad_norm": 1.16131218948989, "learning_rate": 7.427726479190634e-06, "loss": 0.38415423035621643, "step": 4444 }, { "epoch": 1.2078804347826086, "grad_norm": 1.6547826536646921, "learning_rate": 7.423384804011654e-06, "loss": 0.3776308298110962, "step": 4445 }, { "epoch": 1.2081521739130434, "grad_norm": 1.1580127900886044, "learning_rate": 7.419043649004059e-06, "loss": 0.43094098567962646, "step": 4446 }, { "epoch": 1.2084239130434782, "grad_norm": 0.9308384232546768, "learning_rate": 7.4147030150442574e-06, "loss": 0.310842901468277, "step": 4447 }, { "epoch": 1.208695652173913, "grad_norm": 1.350351938855676, "learning_rate": 7.410362903008543e-06, "loss": 0.47215789556503296, "step": 4448 }, { "epoch": 1.208967391304348, "grad_norm": 1.1950088268161585, "learning_rate": 7.406023313773097e-06, "loss": 0.3304523825645447, "step": 4449 }, { "epoch": 1.2092391304347827, "grad_norm": 1.1352686900461098, "learning_rate": 7.401684248214014e-06, "loss": 0.39212149381637573, "step": 4450 }, { "epoch": 1.2095108695652175, "grad_norm": 1.3794226427590073, "learning_rate": 7.3973457072072615e-06, "loss": 0.48894035816192627, "step": 4451 }, { "epoch": 1.2097826086956522, "grad_norm": 1.075482716640009, "learning_rate": 7.393007691628721e-06, "loss": 0.41048675775527954, "step": 4452 }, { "epoch": 1.210054347826087, "grad_norm": 1.3328164968223148, "learning_rate": 7.3886702023541515e-06, "loss": 0.5001846551895142, "step": 4453 }, { "epoch": 1.2103260869565218, "grad_norm": 1.290186988805731, "learning_rate": 7.384333240259216e-06, "loss": 0.46860742568969727, "step": 4454 }, { "epoch": 1.2105978260869565, "grad_norm": 1.3719981027525034, "learning_rate": 7.379996806219464e-06, "loss": 0.4723714292049408, "step": 4455 }, { "epoch": 1.2108695652173913, "grad_norm": 1.044342769528404, "learning_rate": 7.375660901110347e-06, "loss": 0.3731147050857544, "step": 4456 }, { "epoch": 1.211141304347826, "grad_norm": 1.214914493739193, "learning_rate": 7.371325525807202e-06, "loss": 0.3989856541156769, "step": 4457 }, { "epoch": 1.2114130434782608, "grad_norm": 1.3405296511096134, "learning_rate": 7.366990681185262e-06, "loss": 0.45831143856048584, "step": 4458 }, { "epoch": 1.2116847826086956, "grad_norm": 1.0340540059931052, "learning_rate": 7.3626563681196535e-06, "loss": 0.3635905683040619, "step": 4459 }, { "epoch": 1.2119565217391304, "grad_norm": 1.138046907503778, "learning_rate": 7.3583225874853925e-06, "loss": 0.43092697858810425, "step": 4460 }, { "epoch": 1.2122282608695651, "grad_norm": 1.1809318165820786, "learning_rate": 7.353989340157393e-06, "loss": 0.42641085386276245, "step": 4461 }, { "epoch": 1.2125, "grad_norm": 1.2402474080329768, "learning_rate": 7.3496566270104545e-06, "loss": 0.4338858723640442, "step": 4462 }, { "epoch": 1.2127717391304347, "grad_norm": 1.1633433620176556, "learning_rate": 7.34532444891928e-06, "loss": 0.42400509119033813, "step": 4463 }, { "epoch": 1.2130434782608694, "grad_norm": 1.0713281989436199, "learning_rate": 7.340992806758444e-06, "loss": 0.3813138008117676, "step": 4464 }, { "epoch": 1.2133152173913044, "grad_norm": 1.2188758648086884, "learning_rate": 7.336661701402439e-06, "loss": 0.43292051553726196, "step": 4465 }, { "epoch": 1.2135869565217392, "grad_norm": 1.3579347448887096, "learning_rate": 7.332331133725632e-06, "loss": 0.4868254065513611, "step": 4466 }, { "epoch": 1.213858695652174, "grad_norm": 1.2793768497385438, "learning_rate": 7.328001104602278e-06, "loss": 0.48415690660476685, "step": 4467 }, { "epoch": 1.2141304347826087, "grad_norm": 1.400120841522362, "learning_rate": 7.323671614906539e-06, "loss": 0.4777712821960449, "step": 4468 }, { "epoch": 1.2144021739130435, "grad_norm": 1.2584817641753463, "learning_rate": 7.3193426655124545e-06, "loss": 0.4176947772502899, "step": 4469 }, { "epoch": 1.2146739130434783, "grad_norm": 0.996548136487396, "learning_rate": 7.315014257293964e-06, "loss": 0.3037570118904114, "step": 4470 }, { "epoch": 1.214945652173913, "grad_norm": 1.3075629610574717, "learning_rate": 7.31068639112489e-06, "loss": 0.4600486755371094, "step": 4471 }, { "epoch": 1.2152173913043478, "grad_norm": 1.1228137373787512, "learning_rate": 7.306359067878954e-06, "loss": 0.4256342053413391, "step": 4472 }, { "epoch": 1.2154891304347826, "grad_norm": 1.0237567814931994, "learning_rate": 7.3020322884297565e-06, "loss": 0.3594539165496826, "step": 4473 }, { "epoch": 1.2157608695652173, "grad_norm": 1.1301482444321271, "learning_rate": 7.297706053650804e-06, "loss": 0.3777618110179901, "step": 4474 }, { "epoch": 1.216032608695652, "grad_norm": 1.202588808701248, "learning_rate": 7.293380364415476e-06, "loss": 0.4585643410682678, "step": 4475 }, { "epoch": 1.2163043478260869, "grad_norm": 1.2423533241263784, "learning_rate": 7.2890552215970535e-06, "loss": 0.44992759823799133, "step": 4476 }, { "epoch": 1.2165760869565219, "grad_norm": 1.0138551294552056, "learning_rate": 7.2847306260687035e-06, "loss": 0.3061442971229553, "step": 4477 }, { "epoch": 1.2168478260869566, "grad_norm": 1.09915576020525, "learning_rate": 7.280406578703481e-06, "loss": 0.4839444160461426, "step": 4478 }, { "epoch": 1.2171195652173914, "grad_norm": 1.282398485913294, "learning_rate": 7.2760830803743345e-06, "loss": 0.4796919524669647, "step": 4479 }, { "epoch": 1.2173913043478262, "grad_norm": 1.0968108094447373, "learning_rate": 7.271760131954093e-06, "loss": 0.35929325222969055, "step": 4480 }, { "epoch": 1.217663043478261, "grad_norm": 1.3766513691867026, "learning_rate": 7.267437734315493e-06, "loss": 0.5064210295677185, "step": 4481 }, { "epoch": 1.2179347826086957, "grad_norm": 1.2047989661913405, "learning_rate": 7.263115888331135e-06, "loss": 0.40321463346481323, "step": 4482 }, { "epoch": 1.2182065217391305, "grad_norm": 1.2890496766834176, "learning_rate": 7.258794594873522e-06, "loss": 0.52644282579422, "step": 4483 }, { "epoch": 1.2184782608695652, "grad_norm": 1.1502351604735053, "learning_rate": 7.254473854815054e-06, "loss": 0.4397081136703491, "step": 4484 }, { "epoch": 1.21875, "grad_norm": 1.1744958852814034, "learning_rate": 7.250153669027995e-06, "loss": 0.4282926321029663, "step": 4485 }, { "epoch": 1.2190217391304348, "grad_norm": 1.0382256006737873, "learning_rate": 7.245834038384523e-06, "loss": 0.34752702713012695, "step": 4486 }, { "epoch": 1.2192934782608695, "grad_norm": 1.117366649216182, "learning_rate": 7.241514963756686e-06, "loss": 0.35473883152008057, "step": 4487 }, { "epoch": 1.2195652173913043, "grad_norm": 1.223743004356131, "learning_rate": 7.237196446016429e-06, "loss": 0.4119885563850403, "step": 4488 }, { "epoch": 1.219836956521739, "grad_norm": 1.2091472006177109, "learning_rate": 7.2328784860355795e-06, "loss": 0.4393315017223358, "step": 4489 }, { "epoch": 1.2201086956521738, "grad_norm": 1.3706512588363189, "learning_rate": 7.2285610846858565e-06, "loss": 0.49592262506484985, "step": 4490 }, { "epoch": 1.2203804347826086, "grad_norm": 1.2130395782074448, "learning_rate": 7.2242442428388646e-06, "loss": 0.4460732638835907, "step": 4491 }, { "epoch": 1.2206521739130434, "grad_norm": 1.262178946052609, "learning_rate": 7.219927961366091e-06, "loss": 0.49292364716529846, "step": 4492 }, { "epoch": 1.2209239130434781, "grad_norm": 1.258203672163307, "learning_rate": 7.215612241138918e-06, "loss": 0.4658120274543762, "step": 4493 }, { "epoch": 1.2211956521739131, "grad_norm": 1.103519019710516, "learning_rate": 7.211297083028606e-06, "loss": 0.3865891098976135, "step": 4494 }, { "epoch": 1.221467391304348, "grad_norm": 1.0092441895769706, "learning_rate": 7.206982487906313e-06, "loss": 0.37741830945014954, "step": 4495 }, { "epoch": 1.2217391304347827, "grad_norm": 1.271923877080878, "learning_rate": 7.20266845664307e-06, "loss": 0.42621445655822754, "step": 4496 }, { "epoch": 1.2220108695652174, "grad_norm": 1.1569467379038965, "learning_rate": 7.198354990109806e-06, "loss": 0.3814384341239929, "step": 4497 }, { "epoch": 1.2222826086956522, "grad_norm": 1.2175271111464885, "learning_rate": 7.194042089177324e-06, "loss": 0.40356069803237915, "step": 4498 }, { "epoch": 1.222554347826087, "grad_norm": 1.012344612254488, "learning_rate": 7.1897297547163325e-06, "loss": 0.3395412564277649, "step": 4499 }, { "epoch": 1.2228260869565217, "grad_norm": 1.1329282262664313, "learning_rate": 7.1854179875974005e-06, "loss": 0.35322579741477966, "step": 4500 }, { "epoch": 1.2230978260869565, "grad_norm": 1.2771892344102818, "learning_rate": 7.1811067886909945e-06, "loss": 0.4830823838710785, "step": 4501 }, { "epoch": 1.2233695652173913, "grad_norm": 1.0068850773243856, "learning_rate": 7.176796158867478e-06, "loss": 0.3345496654510498, "step": 4502 }, { "epoch": 1.223641304347826, "grad_norm": 1.1818250274959738, "learning_rate": 7.172486098997076e-06, "loss": 0.38721978664398193, "step": 4503 }, { "epoch": 1.2239130434782608, "grad_norm": 1.194572297732351, "learning_rate": 7.168176609949917e-06, "loss": 0.40079566836357117, "step": 4504 }, { "epoch": 1.2241847826086956, "grad_norm": 1.2688157469929058, "learning_rate": 7.163867692596007e-06, "loss": 0.40653306245803833, "step": 4505 }, { "epoch": 1.2244565217391306, "grad_norm": 1.1976067739672362, "learning_rate": 7.159559347805239e-06, "loss": 0.3757587671279907, "step": 4506 }, { "epoch": 1.2247282608695653, "grad_norm": 1.2062545413476733, "learning_rate": 7.155251576447383e-06, "loss": 0.37961509823799133, "step": 4507 }, { "epoch": 1.225, "grad_norm": 1.325620074813115, "learning_rate": 7.1509443793921065e-06, "loss": 0.48293715715408325, "step": 4508 }, { "epoch": 1.2252717391304349, "grad_norm": 1.1459477492900725, "learning_rate": 7.14663775750895e-06, "loss": 0.3444652557373047, "step": 4509 }, { "epoch": 1.2255434782608696, "grad_norm": 1.2220557126181577, "learning_rate": 7.142331711667339e-06, "loss": 0.4079847037792206, "step": 4510 }, { "epoch": 1.2258152173913044, "grad_norm": 1.1892600947495842, "learning_rate": 7.1380262427365885e-06, "loss": 0.3884877562522888, "step": 4511 }, { "epoch": 1.2260869565217392, "grad_norm": 1.2246593052695203, "learning_rate": 7.133721351585894e-06, "loss": 0.42285478115081787, "step": 4512 }, { "epoch": 1.226358695652174, "grad_norm": 1.0863452493648398, "learning_rate": 7.1294170390843335e-06, "loss": 0.3542129397392273, "step": 4513 }, { "epoch": 1.2266304347826087, "grad_norm": 1.1875489021685417, "learning_rate": 7.125113306100867e-06, "loss": 0.36342188715934753, "step": 4514 }, { "epoch": 1.2269021739130435, "grad_norm": 1.0526048794420004, "learning_rate": 7.1208101535043424e-06, "loss": 0.328136682510376, "step": 4515 }, { "epoch": 1.2271739130434782, "grad_norm": 1.0587867628801388, "learning_rate": 7.1165075821634834e-06, "loss": 0.4630463719367981, "step": 4516 }, { "epoch": 1.227445652173913, "grad_norm": 1.2335100627182072, "learning_rate": 7.112205592946908e-06, "loss": 0.4779164493083954, "step": 4517 }, { "epoch": 1.2277173913043478, "grad_norm": 1.2324469972055994, "learning_rate": 7.107904186723103e-06, "loss": 0.4514023959636688, "step": 4518 }, { "epoch": 1.2279891304347825, "grad_norm": 1.3159299286447625, "learning_rate": 7.103603364360437e-06, "loss": 0.5215233564376831, "step": 4519 }, { "epoch": 1.2282608695652173, "grad_norm": 1.2829974149552013, "learning_rate": 7.099303126727184e-06, "loss": 0.5183038115501404, "step": 4520 }, { "epoch": 1.228532608695652, "grad_norm": 1.2913490735410358, "learning_rate": 7.0950034746914655e-06, "loss": 0.4059767425060272, "step": 4521 }, { "epoch": 1.228804347826087, "grad_norm": 1.047501257245931, "learning_rate": 7.090704409121319e-06, "loss": 0.35680773854255676, "step": 4522 }, { "epoch": 1.2290760869565218, "grad_norm": 1.1431908262273698, "learning_rate": 7.08640593088463e-06, "loss": 0.4269833564758301, "step": 4523 }, { "epoch": 1.2293478260869566, "grad_norm": 1.194170079622039, "learning_rate": 7.082108040849196e-06, "loss": 0.4189904034137726, "step": 4524 }, { "epoch": 1.2296195652173914, "grad_norm": 1.0665472250388244, "learning_rate": 7.07781073988268e-06, "loss": 0.3713865876197815, "step": 4525 }, { "epoch": 1.2298913043478261, "grad_norm": 1.1967617670957142, "learning_rate": 7.07351402885262e-06, "loss": 0.3423249125480652, "step": 4526 }, { "epoch": 1.2301630434782609, "grad_norm": 1.1859329652893684, "learning_rate": 7.069217908626451e-06, "loss": 0.5185002088546753, "step": 4527 }, { "epoch": 1.2304347826086957, "grad_norm": 1.1293959415901773, "learning_rate": 7.06492238007148e-06, "loss": 0.3365703523159027, "step": 4528 }, { "epoch": 1.2307065217391304, "grad_norm": 1.1249931289413546, "learning_rate": 7.0606274440548935e-06, "loss": 0.3927573561668396, "step": 4529 }, { "epoch": 1.2309782608695652, "grad_norm": 1.3195956363392667, "learning_rate": 7.056333101443761e-06, "loss": 0.5340770483016968, "step": 4530 }, { "epoch": 1.23125, "grad_norm": 1.0120879203751507, "learning_rate": 7.052039353105033e-06, "loss": 0.3996409773826599, "step": 4531 }, { "epoch": 1.2315217391304347, "grad_norm": 1.1586507081308697, "learning_rate": 7.0477461999055365e-06, "loss": 0.3619486391544342, "step": 4532 }, { "epoch": 1.2317934782608695, "grad_norm": 1.158339226664195, "learning_rate": 7.043453642711982e-06, "loss": 0.37328264117240906, "step": 4533 }, { "epoch": 1.2320652173913043, "grad_norm": 1.2279630676468807, "learning_rate": 7.039161682390958e-06, "loss": 0.4251527190208435, "step": 4534 }, { "epoch": 1.2323369565217392, "grad_norm": 1.1756468251437318, "learning_rate": 7.034870319808931e-06, "loss": 0.4115406274795532, "step": 4535 }, { "epoch": 1.232608695652174, "grad_norm": 0.9613000210410023, "learning_rate": 7.030579555832251e-06, "loss": 0.3199138045310974, "step": 4536 }, { "epoch": 1.2328804347826088, "grad_norm": 1.1944960973387972, "learning_rate": 7.026289391327141e-06, "loss": 0.405248761177063, "step": 4537 }, { "epoch": 1.2331521739130435, "grad_norm": 1.1298552140498026, "learning_rate": 7.021999827159711e-06, "loss": 0.3744349479675293, "step": 4538 }, { "epoch": 1.2334239130434783, "grad_norm": 1.1012292208091141, "learning_rate": 7.01771086419594e-06, "loss": 0.40465158224105835, "step": 4539 }, { "epoch": 1.233695652173913, "grad_norm": 1.0336293310195956, "learning_rate": 7.0134225033017e-06, "loss": 0.33418774604797363, "step": 4540 }, { "epoch": 1.2339673913043478, "grad_norm": 1.1889150575024374, "learning_rate": 7.00913474534272e-06, "loss": 0.3810231685638428, "step": 4541 }, { "epoch": 1.2342391304347826, "grad_norm": 1.0436855698728198, "learning_rate": 7.004847591184632e-06, "loss": 0.3427352011203766, "step": 4542 }, { "epoch": 1.2345108695652174, "grad_norm": 1.0858381404029624, "learning_rate": 7.00056104169293e-06, "loss": 0.43925678730010986, "step": 4543 }, { "epoch": 1.2347826086956522, "grad_norm": 1.1677965080751251, "learning_rate": 6.996275097732981e-06, "loss": 0.40490108728408813, "step": 4544 }, { "epoch": 1.235054347826087, "grad_norm": 1.1970845977475701, "learning_rate": 6.991989760170051e-06, "loss": 0.4209796190261841, "step": 4545 }, { "epoch": 1.2353260869565217, "grad_norm": 1.3792422912538098, "learning_rate": 6.987705029869263e-06, "loss": 0.44616878032684326, "step": 4546 }, { "epoch": 1.2355978260869565, "grad_norm": 0.9720818644938737, "learning_rate": 6.9834209076956305e-06, "loss": 0.28189218044281006, "step": 4547 }, { "epoch": 1.2358695652173912, "grad_norm": 1.296490516578181, "learning_rate": 6.979137394514035e-06, "loss": 0.4716930389404297, "step": 4548 }, { "epoch": 1.236141304347826, "grad_norm": 1.242458117930392, "learning_rate": 6.974854491189243e-06, "loss": 0.4416930675506592, "step": 4549 }, { "epoch": 1.2364130434782608, "grad_norm": 1.1673725706056925, "learning_rate": 6.970572198585892e-06, "loss": 0.43768608570098877, "step": 4550 }, { "epoch": 1.2366847826086957, "grad_norm": 1.1095735326737963, "learning_rate": 6.9662905175685005e-06, "loss": 0.46253886818885803, "step": 4551 }, { "epoch": 1.2369565217391305, "grad_norm": 1.1878157509483915, "learning_rate": 6.96200944900146e-06, "loss": 0.41430217027664185, "step": 4552 }, { "epoch": 1.2372282608695653, "grad_norm": 1.3704658755355446, "learning_rate": 6.957728993749038e-06, "loss": 0.5212184190750122, "step": 4553 }, { "epoch": 1.2375, "grad_norm": 1.034336703909899, "learning_rate": 6.953449152675382e-06, "loss": 0.39429783821105957, "step": 4554 }, { "epoch": 1.2377717391304348, "grad_norm": 1.243071850214497, "learning_rate": 6.949169926644513e-06, "loss": 0.4348025321960449, "step": 4555 }, { "epoch": 1.2380434782608696, "grad_norm": 1.1304201208417255, "learning_rate": 6.94489131652033e-06, "loss": 0.41207194328308105, "step": 4556 }, { "epoch": 1.2383152173913043, "grad_norm": 1.4207568466954152, "learning_rate": 6.940613323166601e-06, "loss": 0.4919837713241577, "step": 4557 }, { "epoch": 1.2385869565217391, "grad_norm": 1.146705243883837, "learning_rate": 6.9363359474469836e-06, "loss": 0.4365846514701843, "step": 4558 }, { "epoch": 1.2388586956521739, "grad_norm": 1.014383662748021, "learning_rate": 6.932059190224991e-06, "loss": 0.3438577950000763, "step": 4559 }, { "epoch": 1.2391304347826086, "grad_norm": 1.1562052943894534, "learning_rate": 6.92778305236403e-06, "loss": 0.45943352580070496, "step": 4560 }, { "epoch": 1.2394021739130434, "grad_norm": 1.12435370194883, "learning_rate": 6.923507534727374e-06, "loss": 0.40051543712615967, "step": 4561 }, { "epoch": 1.2396739130434782, "grad_norm": 1.3098788642895935, "learning_rate": 6.919232638178161e-06, "loss": 0.4739378094673157, "step": 4562 }, { "epoch": 1.239945652173913, "grad_norm": 1.1936882121901045, "learning_rate": 6.9149583635794295e-06, "loss": 0.45576179027557373, "step": 4563 }, { "epoch": 1.240217391304348, "grad_norm": 1.263266637017236, "learning_rate": 6.910684711794067e-06, "loss": 0.4130244851112366, "step": 4564 }, { "epoch": 1.2404891304347827, "grad_norm": 1.1690343202055087, "learning_rate": 6.9064116836848505e-06, "loss": 0.3704918920993805, "step": 4565 }, { "epoch": 1.2407608695652175, "grad_norm": 1.180351691109865, "learning_rate": 6.902139280114424e-06, "loss": 0.4560987949371338, "step": 4566 }, { "epoch": 1.2410326086956522, "grad_norm": 1.2361919475232828, "learning_rate": 6.897867501945308e-06, "loss": 0.4135711193084717, "step": 4567 }, { "epoch": 1.241304347826087, "grad_norm": 1.3477935768725986, "learning_rate": 6.893596350039896e-06, "loss": 0.3908923864364624, "step": 4568 }, { "epoch": 1.2415760869565218, "grad_norm": 0.9735061104671214, "learning_rate": 6.8893258252604536e-06, "loss": 0.3138161897659302, "step": 4569 }, { "epoch": 1.2418478260869565, "grad_norm": 1.2746422821038097, "learning_rate": 6.885055928469125e-06, "loss": 0.4314512014389038, "step": 4570 }, { "epoch": 1.2421195652173913, "grad_norm": 1.1919358324996632, "learning_rate": 6.880786660527921e-06, "loss": 0.4571473300457001, "step": 4571 }, { "epoch": 1.242391304347826, "grad_norm": 1.1682077119140428, "learning_rate": 6.87651802229873e-06, "loss": 0.41353070735931396, "step": 4572 }, { "epoch": 1.2426630434782608, "grad_norm": 1.2739768978072348, "learning_rate": 6.872250014643311e-06, "loss": 0.425655335187912, "step": 4573 }, { "epoch": 1.2429347826086956, "grad_norm": 1.1601791025443366, "learning_rate": 6.8679826384233e-06, "loss": 0.3723934292793274, "step": 4574 }, { "epoch": 1.2432065217391304, "grad_norm": 1.1129334578966033, "learning_rate": 6.863715894500194e-06, "loss": 0.4193316102027893, "step": 4575 }, { "epoch": 1.2434782608695651, "grad_norm": 1.1720188684724815, "learning_rate": 6.859449783735381e-06, "loss": 0.3954819440841675, "step": 4576 }, { "epoch": 1.24375, "grad_norm": 1.2261219731740118, "learning_rate": 6.855184306990106e-06, "loss": 0.4723154306411743, "step": 4577 }, { "epoch": 1.2440217391304347, "grad_norm": 1.2042371956438138, "learning_rate": 6.8509194651254825e-06, "loss": 0.40313321352005005, "step": 4578 }, { "epoch": 1.2442934782608694, "grad_norm": 1.0984099643086096, "learning_rate": 6.8466552590025195e-06, "loss": 0.3515326976776123, "step": 4579 }, { "epoch": 1.2445652173913044, "grad_norm": 1.3340180791959284, "learning_rate": 6.8423916894820665e-06, "loss": 0.4500214159488678, "step": 4580 }, { "epoch": 1.2448369565217392, "grad_norm": 1.2801702321508517, "learning_rate": 6.838128757424873e-06, "loss": 0.46099355816841125, "step": 4581 }, { "epoch": 1.245108695652174, "grad_norm": 1.1171796494839938, "learning_rate": 6.833866463691539e-06, "loss": 0.37993162870407104, "step": 4582 }, { "epoch": 1.2453804347826087, "grad_norm": 1.1766355698290945, "learning_rate": 6.829604809142547e-06, "loss": 0.4253223240375519, "step": 4583 }, { "epoch": 1.2456521739130435, "grad_norm": 1.0116201102063096, "learning_rate": 6.8253437946382435e-06, "loss": 0.299152135848999, "step": 4584 }, { "epoch": 1.2459239130434783, "grad_norm": 1.063922617527691, "learning_rate": 6.821083421038854e-06, "loss": 0.35743802785873413, "step": 4585 }, { "epoch": 1.246195652173913, "grad_norm": 1.412461997272495, "learning_rate": 6.816823689204466e-06, "loss": 0.5150481462478638, "step": 4586 }, { "epoch": 1.2464673913043478, "grad_norm": 1.2369115987378576, "learning_rate": 6.812564599995042e-06, "loss": 0.5014169216156006, "step": 4587 }, { "epoch": 1.2467391304347826, "grad_norm": 1.1363892585448616, "learning_rate": 6.808306154270417e-06, "loss": 0.34246256947517395, "step": 4588 }, { "epoch": 1.2470108695652173, "grad_norm": 1.2381949903685188, "learning_rate": 6.804048352890288e-06, "loss": 0.37054556608200073, "step": 4589 }, { "epoch": 1.247282608695652, "grad_norm": 1.2313128206171602, "learning_rate": 6.799791196714231e-06, "loss": 0.4849330186843872, "step": 4590 }, { "epoch": 1.2475543478260869, "grad_norm": 1.3374631540687356, "learning_rate": 6.795534686601687e-06, "loss": 0.4773426353931427, "step": 4591 }, { "epoch": 1.2478260869565219, "grad_norm": 1.341501827923001, "learning_rate": 6.7912788234119685e-06, "loss": 0.5114855766296387, "step": 4592 }, { "epoch": 1.2480978260869566, "grad_norm": 1.0517783709012547, "learning_rate": 6.787023608004251e-06, "loss": 0.342816561460495, "step": 4593 }, { "epoch": 1.2483695652173914, "grad_norm": 1.2985306347813907, "learning_rate": 6.782769041237593e-06, "loss": 0.47485464811325073, "step": 4594 }, { "epoch": 1.2486413043478262, "grad_norm": 1.16310333830107, "learning_rate": 6.778515123970908e-06, "loss": 0.3643823266029358, "step": 4595 }, { "epoch": 1.248913043478261, "grad_norm": 1.1063895049855723, "learning_rate": 6.77426185706298e-06, "loss": 0.4020860195159912, "step": 4596 }, { "epoch": 1.2491847826086957, "grad_norm": 1.043944348597808, "learning_rate": 6.770009241372477e-06, "loss": 0.36455655097961426, "step": 4597 }, { "epoch": 1.2494565217391305, "grad_norm": 1.1937042282082138, "learning_rate": 6.765757277757912e-06, "loss": 0.422781765460968, "step": 4598 }, { "epoch": 1.2497282608695652, "grad_norm": 1.2707201444652825, "learning_rate": 6.761505967077689e-06, "loss": 0.4736545979976654, "step": 4599 }, { "epoch": 1.25, "grad_norm": 1.1987422349955108, "learning_rate": 6.757255310190062e-06, "loss": 0.4427204132080078, "step": 4600 }, { "epoch": 1.2502717391304348, "grad_norm": 1.1882642037218503, "learning_rate": 6.7530053079531664e-06, "loss": 0.4290688633918762, "step": 4601 }, { "epoch": 1.2505434782608695, "grad_norm": 1.1314627551494618, "learning_rate": 6.748755961224997e-06, "loss": 0.3534654974937439, "step": 4602 }, { "epoch": 1.2508152173913043, "grad_norm": 0.850422053748421, "learning_rate": 6.744507270863416e-06, "loss": 0.2509966790676117, "step": 4603 }, { "epoch": 1.251086956521739, "grad_norm": 1.0628668865118702, "learning_rate": 6.740259237726162e-06, "loss": 0.33976051211357117, "step": 4604 }, { "epoch": 1.2513586956521738, "grad_norm": 1.2369243884928505, "learning_rate": 6.73601186267083e-06, "loss": 0.46220675110816956, "step": 4605 }, { "epoch": 1.2516304347826086, "grad_norm": 1.1811077118967674, "learning_rate": 6.731765146554891e-06, "loss": 0.43027645349502563, "step": 4606 }, { "epoch": 1.2519021739130434, "grad_norm": 1.2294566574481318, "learning_rate": 6.7275190902356776e-06, "loss": 0.5189447402954102, "step": 4607 }, { "epoch": 1.2521739130434781, "grad_norm": 1.0537969529551336, "learning_rate": 6.723273694570391e-06, "loss": 0.3748573362827301, "step": 4608 }, { "epoch": 1.2524456521739131, "grad_norm": 1.2854487613442958, "learning_rate": 6.7190289604160986e-06, "loss": 0.42619624733924866, "step": 4609 }, { "epoch": 1.252717391304348, "grad_norm": 1.4037259996303875, "learning_rate": 6.714784888629735e-06, "loss": 0.5236258506774902, "step": 4610 }, { "epoch": 1.2529891304347827, "grad_norm": 1.2853928034862536, "learning_rate": 6.7105414800681e-06, "loss": 0.4748520851135254, "step": 4611 }, { "epoch": 1.2532608695652174, "grad_norm": 1.114890511564483, "learning_rate": 6.7062987355878585e-06, "loss": 0.39504534006118774, "step": 4612 }, { "epoch": 1.2535326086956522, "grad_norm": 1.2826016476056739, "learning_rate": 6.702056656045546e-06, "loss": 0.44494491815567017, "step": 4613 }, { "epoch": 1.253804347826087, "grad_norm": 1.2106167913721133, "learning_rate": 6.697815242297554e-06, "loss": 0.4244270920753479, "step": 4614 }, { "epoch": 1.2540760869565217, "grad_norm": 1.204964519932856, "learning_rate": 6.693574495200159e-06, "loss": 0.3914068341255188, "step": 4615 }, { "epoch": 1.2543478260869565, "grad_norm": 0.9142634081939056, "learning_rate": 6.6893344156094754e-06, "loss": 0.32909494638442993, "step": 4616 }, { "epoch": 1.2546195652173913, "grad_norm": 1.202524341510798, "learning_rate": 6.685095004381508e-06, "loss": 0.45180749893188477, "step": 4617 }, { "epoch": 1.254891304347826, "grad_norm": 1.1634148947874967, "learning_rate": 6.680856262372111e-06, "loss": 0.39528006315231323, "step": 4618 }, { "epoch": 1.2551630434782608, "grad_norm": 1.2270262297637342, "learning_rate": 6.676618190437013e-06, "loss": 0.46002161502838135, "step": 4619 }, { "epoch": 1.2554347826086958, "grad_norm": 1.1886758614375807, "learning_rate": 6.6723807894318025e-06, "loss": 0.38081061840057373, "step": 4620 }, { "epoch": 1.2557065217391306, "grad_norm": 1.1993463484336397, "learning_rate": 6.668144060211925e-06, "loss": 0.48969024419784546, "step": 4621 }, { "epoch": 1.2559782608695653, "grad_norm": 1.1310288089410498, "learning_rate": 6.663908003632708e-06, "loss": 0.3980395197868347, "step": 4622 }, { "epoch": 1.25625, "grad_norm": 1.2065248173861103, "learning_rate": 6.659672620549329e-06, "loss": 0.42658698558807373, "step": 4623 }, { "epoch": 1.2565217391304349, "grad_norm": 1.0448325939361873, "learning_rate": 6.655437911816838e-06, "loss": 0.3567315638065338, "step": 4624 }, { "epoch": 1.2567934782608696, "grad_norm": 1.1197592755057693, "learning_rate": 6.651203878290139e-06, "loss": 0.41643375158309937, "step": 4625 }, { "epoch": 1.2570652173913044, "grad_norm": 1.3472748042658698, "learning_rate": 6.646970520824012e-06, "loss": 0.4938785135746002, "step": 4626 }, { "epoch": 1.2573369565217392, "grad_norm": 1.1897774753407426, "learning_rate": 6.64273784027309e-06, "loss": 0.357252836227417, "step": 4627 }, { "epoch": 1.257608695652174, "grad_norm": 1.1102487607193754, "learning_rate": 6.638505837491878e-06, "loss": 0.3817133605480194, "step": 4628 }, { "epoch": 1.2578804347826087, "grad_norm": 1.092938507030819, "learning_rate": 6.634274513334737e-06, "loss": 0.3641071915626526, "step": 4629 }, { "epoch": 1.2581521739130435, "grad_norm": 1.2788101366500555, "learning_rate": 6.630043868655891e-06, "loss": 0.41037529706954956, "step": 4630 }, { "epoch": 1.2584239130434782, "grad_norm": 1.2166935161483974, "learning_rate": 6.625813904309435e-06, "loss": 0.44485336542129517, "step": 4631 }, { "epoch": 1.258695652173913, "grad_norm": 1.0923975619723694, "learning_rate": 6.621584621149315e-06, "loss": 0.3258945345878601, "step": 4632 }, { "epoch": 1.2589673913043478, "grad_norm": 1.1412772764354309, "learning_rate": 6.617356020029355e-06, "loss": 0.40422940254211426, "step": 4633 }, { "epoch": 1.2592391304347825, "grad_norm": 1.108652686448469, "learning_rate": 6.6131281018032215e-06, "loss": 0.33886539936065674, "step": 4634 }, { "epoch": 1.2595108695652173, "grad_norm": 1.276277736462878, "learning_rate": 6.608900867324465e-06, "loss": 0.49935078620910645, "step": 4635 }, { "epoch": 1.259782608695652, "grad_norm": 1.217565973568328, "learning_rate": 6.604674317446473e-06, "loss": 0.44688063859939575, "step": 4636 }, { "epoch": 1.2600543478260868, "grad_norm": 1.135340246099962, "learning_rate": 6.600448453022523e-06, "loss": 0.3890475034713745, "step": 4637 }, { "epoch": 1.2603260869565218, "grad_norm": 1.0541046303558959, "learning_rate": 6.596223274905733e-06, "loss": 0.43678051233291626, "step": 4638 }, { "epoch": 1.2605978260869566, "grad_norm": 1.3205272265104286, "learning_rate": 6.591998783949083e-06, "loss": 0.41486889123916626, "step": 4639 }, { "epoch": 1.2608695652173914, "grad_norm": 1.1970018538527416, "learning_rate": 6.587774981005429e-06, "loss": 0.43153896927833557, "step": 4640 }, { "epoch": 1.2611413043478261, "grad_norm": 1.258782502533776, "learning_rate": 6.583551866927475e-06, "loss": 0.47529757022857666, "step": 4641 }, { "epoch": 1.2614130434782609, "grad_norm": 1.1271927267515578, "learning_rate": 6.579329442567795e-06, "loss": 0.3858233392238617, "step": 4642 }, { "epoch": 1.2616847826086957, "grad_norm": 0.9176976732000819, "learning_rate": 6.575107708778812e-06, "loss": 0.27380460500717163, "step": 4643 }, { "epoch": 1.2619565217391304, "grad_norm": 1.195530323574896, "learning_rate": 6.570886666412823e-06, "loss": 0.37424594163894653, "step": 4644 }, { "epoch": 1.2622282608695652, "grad_norm": 1.2172721797291477, "learning_rate": 6.566666316321977e-06, "loss": 0.44246429204940796, "step": 4645 }, { "epoch": 1.2625, "grad_norm": 1.4784402246523656, "learning_rate": 6.562446659358284e-06, "loss": 0.4707062840461731, "step": 4646 }, { "epoch": 1.2627717391304347, "grad_norm": 1.4375212077117763, "learning_rate": 6.558227696373617e-06, "loss": 0.5220645070075989, "step": 4647 }, { "epoch": 1.2630434782608695, "grad_norm": 1.3433501737438243, "learning_rate": 6.554009428219705e-06, "loss": 0.45322778820991516, "step": 4648 }, { "epoch": 1.2633152173913045, "grad_norm": 1.137306470452516, "learning_rate": 6.549791855748143e-06, "loss": 0.39370983839035034, "step": 4649 }, { "epoch": 1.2635869565217392, "grad_norm": 1.2950106711886702, "learning_rate": 6.545574979810377e-06, "loss": 0.46428632736206055, "step": 4650 }, { "epoch": 1.263858695652174, "grad_norm": 0.9895936637222603, "learning_rate": 6.541358801257722e-06, "loss": 0.3489524722099304, "step": 4651 }, { "epoch": 1.2641304347826088, "grad_norm": 1.3189381826416555, "learning_rate": 6.537143320941339e-06, "loss": 0.5053955912590027, "step": 4652 }, { "epoch": 1.2644021739130435, "grad_norm": 5.020971003563768, "learning_rate": 6.53292853971227e-06, "loss": 0.5079717040061951, "step": 4653 }, { "epoch": 1.2646739130434783, "grad_norm": 1.218943310413646, "learning_rate": 6.52871445842139e-06, "loss": 0.4513688087463379, "step": 4654 }, { "epoch": 1.264945652173913, "grad_norm": 1.1444001724713644, "learning_rate": 6.524501077919446e-06, "loss": 0.3798707127571106, "step": 4655 }, { "epoch": 1.2652173913043478, "grad_norm": 1.1436745023825823, "learning_rate": 6.520288399057051e-06, "loss": 0.3538743257522583, "step": 4656 }, { "epoch": 1.2654891304347826, "grad_norm": 1.2224671518684243, "learning_rate": 6.516076422684654e-06, "loss": 0.402876615524292, "step": 4657 }, { "epoch": 1.2657608695652174, "grad_norm": 1.1988482529980495, "learning_rate": 6.511865149652589e-06, "loss": 0.3947654366493225, "step": 4658 }, { "epoch": 1.2660326086956522, "grad_norm": 1.5121706138129514, "learning_rate": 6.507654580811027e-06, "loss": 0.5404440760612488, "step": 4659 }, { "epoch": 1.266304347826087, "grad_norm": 1.392767648283502, "learning_rate": 6.503444717010008e-06, "loss": 0.46832090616226196, "step": 4660 }, { "epoch": 1.2665760869565217, "grad_norm": 1.2251579063632916, "learning_rate": 6.499235559099424e-06, "loss": 0.452692449092865, "step": 4661 }, { "epoch": 1.2668478260869565, "grad_norm": 1.2883232844677204, "learning_rate": 6.495027107929031e-06, "loss": 0.49515634775161743, "step": 4662 }, { "epoch": 1.2671195652173912, "grad_norm": 1.4242100304870209, "learning_rate": 6.490819364348434e-06, "loss": 0.514724850654602, "step": 4663 }, { "epoch": 1.267391304347826, "grad_norm": 1.1102725220812015, "learning_rate": 6.4866123292070996e-06, "loss": 0.430295467376709, "step": 4664 }, { "epoch": 1.2676630434782608, "grad_norm": 0.9595296444866811, "learning_rate": 6.4824060033543535e-06, "loss": 0.3147868514060974, "step": 4665 }, { "epoch": 1.2679347826086955, "grad_norm": 1.5739598491048146, "learning_rate": 6.4782003876393726e-06, "loss": 0.3592475652694702, "step": 4666 }, { "epoch": 1.2682065217391305, "grad_norm": 1.325808875436212, "learning_rate": 6.473995482911197e-06, "loss": 0.4641464352607727, "step": 4667 }, { "epoch": 1.2684782608695653, "grad_norm": 1.2410934815484298, "learning_rate": 6.469791290018718e-06, "loss": 0.4325292706489563, "step": 4668 }, { "epoch": 1.26875, "grad_norm": 1.4006672133945557, "learning_rate": 6.465587809810687e-06, "loss": 0.4266984164714813, "step": 4669 }, { "epoch": 1.2690217391304348, "grad_norm": 1.2491114924684654, "learning_rate": 6.461385043135704e-06, "loss": 0.3339568078517914, "step": 4670 }, { "epoch": 1.2692934782608696, "grad_norm": 1.0658860738410818, "learning_rate": 6.457182990842241e-06, "loss": 0.3441507816314697, "step": 4671 }, { "epoch": 1.2695652173913043, "grad_norm": 1.3804052134734806, "learning_rate": 6.452981653778606e-06, "loss": 0.5159010291099548, "step": 4672 }, { "epoch": 1.2698369565217391, "grad_norm": 1.2718256223216975, "learning_rate": 6.4487810327929726e-06, "loss": 0.4966646432876587, "step": 4673 }, { "epoch": 1.2701086956521739, "grad_norm": 1.2214027520013633, "learning_rate": 6.444581128733376e-06, "loss": 0.4747001826763153, "step": 4674 }, { "epoch": 1.2703804347826086, "grad_norm": 1.193854175035036, "learning_rate": 6.440381942447691e-06, "loss": 0.4361509382724762, "step": 4675 }, { "epoch": 1.2706521739130434, "grad_norm": 1.2113116560786052, "learning_rate": 6.436183474783664e-06, "loss": 0.3358150124549866, "step": 4676 }, { "epoch": 1.2709239130434782, "grad_norm": 1.2069005720758794, "learning_rate": 6.431985726588884e-06, "loss": 0.4431338906288147, "step": 4677 }, { "epoch": 1.2711956521739132, "grad_norm": 1.3222640323857577, "learning_rate": 6.427788698710803e-06, "loss": 0.4267084002494812, "step": 4678 }, { "epoch": 1.271467391304348, "grad_norm": 1.28753676064985, "learning_rate": 6.4235923919967204e-06, "loss": 0.4563789963722229, "step": 4679 }, { "epoch": 1.2717391304347827, "grad_norm": 1.1291756651493445, "learning_rate": 6.419396807293797e-06, "loss": 0.40370458364486694, "step": 4680 }, { "epoch": 1.2720108695652175, "grad_norm": 1.2811657996991495, "learning_rate": 6.415201945449041e-06, "loss": 0.39960330724716187, "step": 4681 }, { "epoch": 1.2722826086956522, "grad_norm": 1.1691063701245894, "learning_rate": 6.41100780730932e-06, "loss": 0.3520883619785309, "step": 4682 }, { "epoch": 1.272554347826087, "grad_norm": 1.2189832515192782, "learning_rate": 6.406814393721355e-06, "loss": 0.4390387237071991, "step": 4683 }, { "epoch": 1.2728260869565218, "grad_norm": 1.0690733598836213, "learning_rate": 6.402621705531715e-06, "loss": 0.33718380331993103, "step": 4684 }, { "epoch": 1.2730978260869565, "grad_norm": 1.186265279485025, "learning_rate": 6.398429743586833e-06, "loss": 0.4180711507797241, "step": 4685 }, { "epoch": 1.2733695652173913, "grad_norm": 1.1509933861333135, "learning_rate": 6.394238508732984e-06, "loss": 0.4228088855743408, "step": 4686 }, { "epoch": 1.273641304347826, "grad_norm": 1.272428752999604, "learning_rate": 6.390048001816305e-06, "loss": 0.4898222088813782, "step": 4687 }, { "epoch": 1.2739130434782608, "grad_norm": 1.2350151257914759, "learning_rate": 6.385858223682781e-06, "loss": 0.4471644163131714, "step": 4688 }, { "epoch": 1.2741847826086956, "grad_norm": 1.1688505344887805, "learning_rate": 6.381669175178249e-06, "loss": 0.40356630086898804, "step": 4689 }, { "epoch": 1.2744565217391304, "grad_norm": 1.017927136061449, "learning_rate": 6.377480857148407e-06, "loss": 0.3791643977165222, "step": 4690 }, { "epoch": 1.2747282608695651, "grad_norm": 1.3430224298120512, "learning_rate": 6.373293270438792e-06, "loss": 0.4486181437969208, "step": 4691 }, { "epoch": 1.275, "grad_norm": 1.362009706006925, "learning_rate": 6.3691064158948105e-06, "loss": 0.45998263359069824, "step": 4692 }, { "epoch": 1.2752717391304347, "grad_norm": 1.4552135444382346, "learning_rate": 6.364920294361701e-06, "loss": 0.5382031202316284, "step": 4693 }, { "epoch": 1.2755434782608694, "grad_norm": 1.2004504976606558, "learning_rate": 6.360734906684575e-06, "loss": 0.3663069009780884, "step": 4694 }, { "epoch": 1.2758152173913042, "grad_norm": 1.1082796006736597, "learning_rate": 6.356550253708378e-06, "loss": 0.3543522357940674, "step": 4695 }, { "epoch": 1.2760869565217392, "grad_norm": 1.5258520121523513, "learning_rate": 6.352366336277919e-06, "loss": 0.44733762741088867, "step": 4696 }, { "epoch": 1.276358695652174, "grad_norm": 1.1969042434605326, "learning_rate": 6.348183155237855e-06, "loss": 0.42705363035202026, "step": 4697 }, { "epoch": 1.2766304347826087, "grad_norm": 1.152857382844306, "learning_rate": 6.344000711432688e-06, "loss": 0.34652501344680786, "step": 4698 }, { "epoch": 1.2769021739130435, "grad_norm": 1.366937394397715, "learning_rate": 6.339819005706782e-06, "loss": 0.4597252607345581, "step": 4699 }, { "epoch": 1.2771739130434783, "grad_norm": 1.2159791051746758, "learning_rate": 6.335638038904343e-06, "loss": 0.4484320282936096, "step": 4700 }, { "epoch": 1.277445652173913, "grad_norm": 1.1823338072534868, "learning_rate": 6.331457811869437e-06, "loss": 0.42433714866638184, "step": 4701 }, { "epoch": 1.2777173913043478, "grad_norm": 1.1496187682409256, "learning_rate": 6.327278325445968e-06, "loss": 0.4167323708534241, "step": 4702 }, { "epoch": 1.2779891304347826, "grad_norm": 1.3203585216959974, "learning_rate": 6.323099580477705e-06, "loss": 0.4448787569999695, "step": 4703 }, { "epoch": 1.2782608695652173, "grad_norm": 1.1670199608879357, "learning_rate": 6.318921577808253e-06, "loss": 0.4689245820045471, "step": 4704 }, { "epoch": 1.278532608695652, "grad_norm": 1.2342353246493973, "learning_rate": 6.314744318281081e-06, "loss": 0.45049959421157837, "step": 4705 }, { "epoch": 1.2788043478260869, "grad_norm": 1.1176685182625323, "learning_rate": 6.310567802739498e-06, "loss": 0.38839590549468994, "step": 4706 }, { "epoch": 1.2790760869565219, "grad_norm": 1.0337333191272329, "learning_rate": 6.306392032026662e-06, "loss": 0.34799885749816895, "step": 4707 }, { "epoch": 1.2793478260869566, "grad_norm": 1.215162790976807, "learning_rate": 6.302217006985591e-06, "loss": 0.41437119245529175, "step": 4708 }, { "epoch": 1.2796195652173914, "grad_norm": 1.1724027352605109, "learning_rate": 6.2980427284591415e-06, "loss": 0.4313551187515259, "step": 4709 }, { "epoch": 1.2798913043478262, "grad_norm": 1.3277705122048977, "learning_rate": 6.29386919729003e-06, "loss": 0.38863450288772583, "step": 4710 }, { "epoch": 1.280163043478261, "grad_norm": 1.0875170992530379, "learning_rate": 6.289696414320805e-06, "loss": 0.3248360753059387, "step": 4711 }, { "epoch": 1.2804347826086957, "grad_norm": 1.2074048898878846, "learning_rate": 6.285524380393888e-06, "loss": 0.42152100801467896, "step": 4712 }, { "epoch": 1.2807065217391305, "grad_norm": 1.1034594775780215, "learning_rate": 6.281353096351526e-06, "loss": 0.33944153785705566, "step": 4713 }, { "epoch": 1.2809782608695652, "grad_norm": 1.3708307822879109, "learning_rate": 6.2771825630358305e-06, "loss": 0.4695664048194885, "step": 4714 }, { "epoch": 1.28125, "grad_norm": 1.37038093307685, "learning_rate": 6.2730127812887565e-06, "loss": 0.5296118259429932, "step": 4715 }, { "epoch": 1.2815217391304348, "grad_norm": 1.2082751638904479, "learning_rate": 6.2688437519521e-06, "loss": 0.4128537178039551, "step": 4716 }, { "epoch": 1.2817934782608695, "grad_norm": 1.2046683668374347, "learning_rate": 6.264675475867516e-06, "loss": 0.48149770498275757, "step": 4717 }, { "epoch": 1.2820652173913043, "grad_norm": 1.0125033266613375, "learning_rate": 6.260507953876504e-06, "loss": 0.31163638830184937, "step": 4718 }, { "epoch": 1.282336956521739, "grad_norm": 1.4570912209690934, "learning_rate": 6.25634118682041e-06, "loss": 0.5018157958984375, "step": 4719 }, { "epoch": 1.2826086956521738, "grad_norm": 1.1880240067683023, "learning_rate": 6.2521751755404226e-06, "loss": 0.41609811782836914, "step": 4720 }, { "epoch": 1.2828804347826086, "grad_norm": 1.0760964691636004, "learning_rate": 6.248009920877591e-06, "loss": 0.3535616993904114, "step": 4721 }, { "epoch": 1.2831521739130434, "grad_norm": 1.243300353850957, "learning_rate": 6.243845423672801e-06, "loss": 0.49643391370773315, "step": 4722 }, { "epoch": 1.2834239130434781, "grad_norm": 1.2466550943889225, "learning_rate": 6.239681684766783e-06, "loss": 0.48056653141975403, "step": 4723 }, { "epoch": 1.2836956521739131, "grad_norm": 1.1222525789168223, "learning_rate": 6.235518705000127e-06, "loss": 0.3755907416343689, "step": 4724 }, { "epoch": 1.283967391304348, "grad_norm": 1.235793573687817, "learning_rate": 6.231356485213259e-06, "loss": 0.39062294363975525, "step": 4725 }, { "epoch": 1.2842391304347827, "grad_norm": 1.2781344375692878, "learning_rate": 6.227195026246457e-06, "loss": 0.44028931856155396, "step": 4726 }, { "epoch": 1.2845108695652174, "grad_norm": 1.0790115776707985, "learning_rate": 6.223034328939837e-06, "loss": 0.44182801246643066, "step": 4727 }, { "epoch": 1.2847826086956522, "grad_norm": 1.0455584059399472, "learning_rate": 6.218874394133376e-06, "loss": 0.3526356816291809, "step": 4728 }, { "epoch": 1.285054347826087, "grad_norm": 1.207652402079336, "learning_rate": 6.21471522266688e-06, "loss": 0.4634478688240051, "step": 4729 }, { "epoch": 1.2853260869565217, "grad_norm": 1.3291100591616716, "learning_rate": 6.21055681538002e-06, "loss": 0.38698795437812805, "step": 4730 }, { "epoch": 1.2855978260869565, "grad_norm": 1.1256814724990971, "learning_rate": 6.206399173112298e-06, "loss": 0.4248685836791992, "step": 4731 }, { "epoch": 1.2858695652173913, "grad_norm": 1.2702629838244923, "learning_rate": 6.202242296703056e-06, "loss": 0.43803149461746216, "step": 4732 }, { "epoch": 1.286141304347826, "grad_norm": 1.2113361459415497, "learning_rate": 6.198086186991508e-06, "loss": 0.43949177861213684, "step": 4733 }, { "epoch": 1.2864130434782608, "grad_norm": 1.4191015663930364, "learning_rate": 6.19393084481668e-06, "loss": 0.4758853614330292, "step": 4734 }, { "epoch": 1.2866847826086958, "grad_norm": 1.378880792649716, "learning_rate": 6.189776271017471e-06, "loss": 0.5221052765846252, "step": 4735 }, { "epoch": 1.2869565217391306, "grad_norm": 1.0560431975061881, "learning_rate": 6.185622466432609e-06, "loss": 0.4182626008987427, "step": 4736 }, { "epoch": 1.2872282608695653, "grad_norm": 1.3253002395749487, "learning_rate": 6.181469431900673e-06, "loss": 0.45579397678375244, "step": 4737 }, { "epoch": 1.2875, "grad_norm": 1.0610089518455763, "learning_rate": 6.177317168260082e-06, "loss": 0.34129980206489563, "step": 4738 }, { "epoch": 1.2877717391304349, "grad_norm": 1.3053316931303192, "learning_rate": 6.173165676349103e-06, "loss": 0.45898348093032837, "step": 4739 }, { "epoch": 1.2880434782608696, "grad_norm": 1.3237711625248407, "learning_rate": 6.1690149570058476e-06, "loss": 0.4516652226448059, "step": 4740 }, { "epoch": 1.2883152173913044, "grad_norm": 1.3650397462450705, "learning_rate": 6.164865011068266e-06, "loss": 0.5088115930557251, "step": 4741 }, { "epoch": 1.2885869565217392, "grad_norm": 1.2406792549648762, "learning_rate": 6.160715839374162e-06, "loss": 0.46797099709510803, "step": 4742 }, { "epoch": 1.288858695652174, "grad_norm": 1.288704054623386, "learning_rate": 6.156567442761171e-06, "loss": 0.479727566242218, "step": 4743 }, { "epoch": 1.2891304347826087, "grad_norm": 1.1135373795382384, "learning_rate": 6.152419822066784e-06, "loss": 0.4165271520614624, "step": 4744 }, { "epoch": 1.2894021739130435, "grad_norm": 1.30016739567975, "learning_rate": 6.148272978128322e-06, "loss": 0.4474722146987915, "step": 4745 }, { "epoch": 1.2896739130434782, "grad_norm": 1.3666218060707724, "learning_rate": 6.1441269117829685e-06, "loss": 0.5073551535606384, "step": 4746 }, { "epoch": 1.289945652173913, "grad_norm": 1.35499057962468, "learning_rate": 6.1399816238677256e-06, "loss": 0.4329310357570648, "step": 4747 }, { "epoch": 1.2902173913043478, "grad_norm": 1.1155407004504503, "learning_rate": 6.135837115219463e-06, "loss": 0.3754706382751465, "step": 4748 }, { "epoch": 1.2904891304347825, "grad_norm": 1.3761515101940716, "learning_rate": 6.131693386674873e-06, "loss": 0.4663306176662445, "step": 4749 }, { "epoch": 1.2907608695652173, "grad_norm": 1.308762003539636, "learning_rate": 6.127550439070497e-06, "loss": 0.47297948598861694, "step": 4750 }, { "epoch": 1.291032608695652, "grad_norm": 1.368198587455412, "learning_rate": 6.1234082732427295e-06, "loss": 0.5172114372253418, "step": 4751 }, { "epoch": 1.2913043478260868, "grad_norm": 1.1668771983715676, "learning_rate": 6.119266890027785e-06, "loss": 0.4479449987411499, "step": 4752 }, { "epoch": 1.2915760869565218, "grad_norm": 1.158706031676341, "learning_rate": 6.115126290261746e-06, "loss": 0.33594152331352234, "step": 4753 }, { "epoch": 1.2918478260869566, "grad_norm": 1.407475145820021, "learning_rate": 6.110986474780514e-06, "loss": 0.3572876453399658, "step": 4754 }, { "epoch": 1.2921195652173914, "grad_norm": 1.2334442342832055, "learning_rate": 6.106847444419847e-06, "loss": 0.3825588524341583, "step": 4755 }, { "epoch": 1.2923913043478261, "grad_norm": 1.2120714950035436, "learning_rate": 6.102709200015338e-06, "loss": 0.403227835893631, "step": 4756 }, { "epoch": 1.2926630434782609, "grad_norm": 1.1996682255924958, "learning_rate": 6.098571742402423e-06, "loss": 0.428542822599411, "step": 4757 }, { "epoch": 1.2929347826086957, "grad_norm": 1.2913560682792309, "learning_rate": 6.094435072416379e-06, "loss": 0.408290296792984, "step": 4758 }, { "epoch": 1.2932065217391304, "grad_norm": 1.5427286303421726, "learning_rate": 6.090299190892322e-06, "loss": 0.4699115753173828, "step": 4759 }, { "epoch": 1.2934782608695652, "grad_norm": 1.1633188865086068, "learning_rate": 6.0861640986652125e-06, "loss": 0.3938952088356018, "step": 4760 }, { "epoch": 1.29375, "grad_norm": 1.2383661002629565, "learning_rate": 6.0820297965698495e-06, "loss": 0.41203588247299194, "step": 4761 }, { "epoch": 1.2940217391304347, "grad_norm": 1.3494655218244747, "learning_rate": 6.077896285440874e-06, "loss": 0.4670839309692383, "step": 4762 }, { "epoch": 1.2942934782608695, "grad_norm": 1.211597983866262, "learning_rate": 6.0737635661127625e-06, "loss": 0.4250221252441406, "step": 4763 }, { "epoch": 1.2945652173913045, "grad_norm": 1.1826163595103194, "learning_rate": 6.06963163941984e-06, "loss": 0.384333074092865, "step": 4764 }, { "epoch": 1.2948369565217392, "grad_norm": 1.1637586018315087, "learning_rate": 6.065500506196263e-06, "loss": 0.464718222618103, "step": 4765 }, { "epoch": 1.295108695652174, "grad_norm": 0.9849046703731702, "learning_rate": 6.061370167276033e-06, "loss": 0.2725580334663391, "step": 4766 }, { "epoch": 1.2953804347826088, "grad_norm": 1.2488730838993405, "learning_rate": 6.05724062349299e-06, "loss": 0.29338234663009644, "step": 4767 }, { "epoch": 1.2956521739130435, "grad_norm": 1.3968289280497668, "learning_rate": 6.053111875680811e-06, "loss": 0.5079230666160583, "step": 4768 }, { "epoch": 1.2959239130434783, "grad_norm": 1.1008198201235875, "learning_rate": 6.048983924673022e-06, "loss": 0.367422491312027, "step": 4769 }, { "epoch": 1.296195652173913, "grad_norm": 1.1739865488995447, "learning_rate": 6.04485677130297e-06, "loss": 0.38756781816482544, "step": 4770 }, { "epoch": 1.2964673913043478, "grad_norm": 1.1111056798902772, "learning_rate": 6.040730416403858e-06, "loss": 0.4164109230041504, "step": 4771 }, { "epoch": 1.2967391304347826, "grad_norm": 1.1352456165712914, "learning_rate": 6.036604860808721e-06, "loss": 0.40751123428344727, "step": 4772 }, { "epoch": 1.2970108695652174, "grad_norm": 1.369770570329304, "learning_rate": 6.032480105350433e-06, "loss": 0.49966564774513245, "step": 4773 }, { "epoch": 1.2972826086956522, "grad_norm": 1.3566599346944956, "learning_rate": 6.028356150861706e-06, "loss": 0.4720461070537567, "step": 4774 }, { "epoch": 1.297554347826087, "grad_norm": 1.1835422324556786, "learning_rate": 6.024232998175089e-06, "loss": 0.3404308259487152, "step": 4775 }, { "epoch": 1.2978260869565217, "grad_norm": 1.2031923921802148, "learning_rate": 6.020110648122975e-06, "loss": 0.4820031523704529, "step": 4776 }, { "epoch": 1.2980978260869565, "grad_norm": 1.0880474800962803, "learning_rate": 6.015989101537586e-06, "loss": 0.32029807567596436, "step": 4777 }, { "epoch": 1.2983695652173912, "grad_norm": 1.2375941342717645, "learning_rate": 6.011868359250992e-06, "loss": 0.4054601192474365, "step": 4778 }, { "epoch": 1.298641304347826, "grad_norm": 1.2252430573074011, "learning_rate": 6.00774842209509e-06, "loss": 0.4527958929538727, "step": 4779 }, { "epoch": 1.2989130434782608, "grad_norm": 1.156863087642788, "learning_rate": 6.003629290901625e-06, "loss": 0.40695318579673767, "step": 4780 }, { "epoch": 1.2991847826086955, "grad_norm": 1.140942675180329, "learning_rate": 5.999510966502169e-06, "loss": 0.354275107383728, "step": 4781 }, { "epoch": 1.2994565217391305, "grad_norm": 1.3198715576381468, "learning_rate": 5.995393449728142e-06, "loss": 0.4399831295013428, "step": 4782 }, { "epoch": 1.2997282608695653, "grad_norm": 1.240556520860815, "learning_rate": 5.991276741410792e-06, "loss": 0.439517080783844, "step": 4783 }, { "epoch": 1.3, "grad_norm": 1.0626985879509372, "learning_rate": 5.987160842381203e-06, "loss": 0.3188331127166748, "step": 4784 }, { "epoch": 1.3002717391304348, "grad_norm": 1.4415789295476364, "learning_rate": 5.983045753470308e-06, "loss": 0.45062801241874695, "step": 4785 }, { "epoch": 1.3005434782608696, "grad_norm": 1.1651208458175233, "learning_rate": 5.978931475508859e-06, "loss": 0.3925940692424774, "step": 4786 }, { "epoch": 1.3008152173913043, "grad_norm": 1.3313000959821895, "learning_rate": 5.974818009327463e-06, "loss": 0.4322141408920288, "step": 4787 }, { "epoch": 1.3010869565217391, "grad_norm": 1.3374975523124988, "learning_rate": 5.970705355756543e-06, "loss": 0.4125695824623108, "step": 4788 }, { "epoch": 1.3013586956521739, "grad_norm": 1.2808663337253996, "learning_rate": 5.9665935156263775e-06, "loss": 0.4687441885471344, "step": 4789 }, { "epoch": 1.3016304347826086, "grad_norm": 1.2535154855170387, "learning_rate": 5.9624824897670675e-06, "loss": 0.4216848313808441, "step": 4790 }, { "epoch": 1.3019021739130434, "grad_norm": 1.0933582837712144, "learning_rate": 5.958372279008555e-06, "loss": 0.3374617099761963, "step": 4791 }, { "epoch": 1.3021739130434782, "grad_norm": 1.2136516834398159, "learning_rate": 5.954262884180615e-06, "loss": 0.4728602170944214, "step": 4792 }, { "epoch": 1.3024456521739132, "grad_norm": 0.9744612276401765, "learning_rate": 5.950154306112859e-06, "loss": 0.2794012129306793, "step": 4793 }, { "epoch": 1.302717391304348, "grad_norm": 1.4597362052076677, "learning_rate": 5.9460465456347335e-06, "loss": 0.5259509086608887, "step": 4794 }, { "epoch": 1.3029891304347827, "grad_norm": 1.134798454326872, "learning_rate": 5.941939603575522e-06, "loss": 0.3885095715522766, "step": 4795 }, { "epoch": 1.3032608695652175, "grad_norm": 1.1506665310549629, "learning_rate": 5.937833480764339e-06, "loss": 0.3645305633544922, "step": 4796 }, { "epoch": 1.3035326086956522, "grad_norm": 1.23893005885549, "learning_rate": 5.933728178030135e-06, "loss": 0.48131152987480164, "step": 4797 }, { "epoch": 1.303804347826087, "grad_norm": 1.3369953625628745, "learning_rate": 5.929623696201698e-06, "loss": 0.4761657118797302, "step": 4798 }, { "epoch": 1.3040760869565218, "grad_norm": 0.9688579350155402, "learning_rate": 5.925520036107646e-06, "loss": 0.2819991111755371, "step": 4799 }, { "epoch": 1.3043478260869565, "grad_norm": 2.537225674726374, "learning_rate": 5.921417198576431e-06, "loss": 0.48424574732780457, "step": 4800 }, { "epoch": 1.3046195652173913, "grad_norm": 1.2537788281575915, "learning_rate": 5.917315184436345e-06, "loss": 0.48372769355773926, "step": 4801 }, { "epoch": 1.304891304347826, "grad_norm": 1.2816967225418199, "learning_rate": 5.913213994515504e-06, "loss": 0.44995054602622986, "step": 4802 }, { "epoch": 1.3051630434782608, "grad_norm": 1.1884599224143273, "learning_rate": 5.90911362964187e-06, "loss": 0.3939392566680908, "step": 4803 }, { "epoch": 1.3054347826086956, "grad_norm": 1.3787339055014483, "learning_rate": 5.905014090643222e-06, "loss": 0.4060068130493164, "step": 4804 }, { "epoch": 1.3057065217391304, "grad_norm": 1.2120673739571424, "learning_rate": 5.900915378347193e-06, "loss": 0.37595075368881226, "step": 4805 }, { "epoch": 1.3059782608695651, "grad_norm": 1.3082229541505956, "learning_rate": 5.896817493581226e-06, "loss": 0.46082258224487305, "step": 4806 }, { "epoch": 1.30625, "grad_norm": 1.1530537801003582, "learning_rate": 5.892720437172621e-06, "loss": 0.4218594431877136, "step": 4807 }, { "epoch": 1.3065217391304347, "grad_norm": 1.2714774944065754, "learning_rate": 5.888624209948495e-06, "loss": 0.385875403881073, "step": 4808 }, { "epoch": 1.3067934782608694, "grad_norm": 1.1782168528898551, "learning_rate": 5.8845288127357905e-06, "loss": 0.441893994808197, "step": 4809 }, { "epoch": 1.3070652173913042, "grad_norm": 1.279747329565372, "learning_rate": 5.880434246361307e-06, "loss": 0.41772109270095825, "step": 4810 }, { "epoch": 1.3073369565217392, "grad_norm": 1.5385720873350683, "learning_rate": 5.876340511651655e-06, "loss": 0.4281291365623474, "step": 4811 }, { "epoch": 1.307608695652174, "grad_norm": 1.2936016143598905, "learning_rate": 5.872247609433288e-06, "loss": 0.41604533791542053, "step": 4812 }, { "epoch": 1.3078804347826087, "grad_norm": 1.5908026215451951, "learning_rate": 5.868155540532486e-06, "loss": 0.45994308590888977, "step": 4813 }, { "epoch": 1.3081521739130435, "grad_norm": 1.0536811963832564, "learning_rate": 5.8640643057753645e-06, "loss": 0.3865776062011719, "step": 4814 }, { "epoch": 1.3084239130434783, "grad_norm": 1.2619703848992188, "learning_rate": 5.859973905987866e-06, "loss": 0.37838658690452576, "step": 4815 }, { "epoch": 1.308695652173913, "grad_norm": 1.0559771065181114, "learning_rate": 5.85588434199577e-06, "loss": 0.32645124197006226, "step": 4816 }, { "epoch": 1.3089673913043478, "grad_norm": 1.0662584974124822, "learning_rate": 5.8517956146246826e-06, "loss": 0.3451680541038513, "step": 4817 }, { "epoch": 1.3092391304347826, "grad_norm": 1.3109799284530081, "learning_rate": 5.847707724700043e-06, "loss": 0.43612155318260193, "step": 4818 }, { "epoch": 1.3095108695652173, "grad_norm": 1.0276499940775758, "learning_rate": 5.843620673047123e-06, "loss": 0.33606070280075073, "step": 4819 }, { "epoch": 1.309782608695652, "grad_norm": 1.2447326481802135, "learning_rate": 5.8395344604910206e-06, "loss": 0.44088348746299744, "step": 4820 }, { "epoch": 1.3100543478260869, "grad_norm": 1.417375343135921, "learning_rate": 5.835449087856673e-06, "loss": 0.46947598457336426, "step": 4821 }, { "epoch": 1.3103260869565219, "grad_norm": 1.2361660255457172, "learning_rate": 5.831364555968834e-06, "loss": 0.45168808102607727, "step": 4822 }, { "epoch": 1.3105978260869566, "grad_norm": 1.2955396040710276, "learning_rate": 5.8272808656521e-06, "loss": 0.43964850902557373, "step": 4823 }, { "epoch": 1.3108695652173914, "grad_norm": 1.3495347789970653, "learning_rate": 5.823198017730893e-06, "loss": 0.4284546375274658, "step": 4824 }, { "epoch": 1.3111413043478262, "grad_norm": 1.2246880025694236, "learning_rate": 5.81911601302947e-06, "loss": 0.4393112063407898, "step": 4825 }, { "epoch": 1.311413043478261, "grad_norm": 1.2735208790986345, "learning_rate": 5.815034852371903e-06, "loss": 0.4067501425743103, "step": 4826 }, { "epoch": 1.3116847826086957, "grad_norm": 1.1370236601756487, "learning_rate": 5.81095453658211e-06, "loss": 0.41047561168670654, "step": 4827 }, { "epoch": 1.3119565217391305, "grad_norm": 1.258565728795183, "learning_rate": 5.806875066483835e-06, "loss": 0.46506267786026, "step": 4828 }, { "epoch": 1.3122282608695652, "grad_norm": 1.2230793630262358, "learning_rate": 5.802796442900638e-06, "loss": 0.392223596572876, "step": 4829 }, { "epoch": 1.3125, "grad_norm": 1.1057494989343433, "learning_rate": 5.798718666655925e-06, "loss": 0.37395551800727844, "step": 4830 }, { "epoch": 1.3127717391304348, "grad_norm": 1.1275314917911359, "learning_rate": 5.794641738572925e-06, "loss": 0.41048455238342285, "step": 4831 }, { "epoch": 1.3130434782608695, "grad_norm": 1.1765102939143859, "learning_rate": 5.790565659474696e-06, "loss": 0.4452971816062927, "step": 4832 }, { "epoch": 1.3133152173913043, "grad_norm": 1.2236363759553153, "learning_rate": 5.786490430184115e-06, "loss": 0.40597349405288696, "step": 4833 }, { "epoch": 1.313586956521739, "grad_norm": 1.108417547715483, "learning_rate": 5.782416051523909e-06, "loss": 0.39376240968704224, "step": 4834 }, { "epoch": 1.3138586956521738, "grad_norm": 1.199736648614784, "learning_rate": 5.778342524316615e-06, "loss": 0.41132980585098267, "step": 4835 }, { "epoch": 1.3141304347826086, "grad_norm": 1.2614403562074048, "learning_rate": 5.774269849384593e-06, "loss": 0.42508113384246826, "step": 4836 }, { "epoch": 1.3144021739130434, "grad_norm": 1.2042605117241543, "learning_rate": 5.77019802755006e-06, "loss": 0.36202478408813477, "step": 4837 }, { "epoch": 1.3146739130434781, "grad_norm": 1.1504676587543714, "learning_rate": 5.76612705963503e-06, "loss": 0.37471646070480347, "step": 4838 }, { "epoch": 1.3149456521739131, "grad_norm": 1.2382071999694877, "learning_rate": 5.762056946461357e-06, "loss": 0.3947606086730957, "step": 4839 }, { "epoch": 1.315217391304348, "grad_norm": 1.3296487063660107, "learning_rate": 5.757987688850727e-06, "loss": 0.42107096314430237, "step": 4840 }, { "epoch": 1.3154891304347827, "grad_norm": 0.9452831790472372, "learning_rate": 5.75391928762465e-06, "loss": 0.2780679166316986, "step": 4841 }, { "epoch": 1.3157608695652174, "grad_norm": 1.239541633032202, "learning_rate": 5.749851743604454e-06, "loss": 0.4225488305091858, "step": 4842 }, { "epoch": 1.3160326086956522, "grad_norm": 1.0569404392268538, "learning_rate": 5.745785057611306e-06, "loss": 0.3470942974090576, "step": 4843 }, { "epoch": 1.316304347826087, "grad_norm": 1.2408496265872737, "learning_rate": 5.7417192304662e-06, "loss": 0.43808993697166443, "step": 4844 }, { "epoch": 1.3165760869565217, "grad_norm": 1.1287434697600247, "learning_rate": 5.737654262989943e-06, "loss": 0.4001196026802063, "step": 4845 }, { "epoch": 1.3168478260869565, "grad_norm": 1.1150040803434338, "learning_rate": 5.733590156003181e-06, "loss": 0.34556254744529724, "step": 4846 }, { "epoch": 1.3171195652173913, "grad_norm": 1.1824083262136313, "learning_rate": 5.729526910326385e-06, "loss": 0.3323608934879303, "step": 4847 }, { "epoch": 1.317391304347826, "grad_norm": 1.1762903700620266, "learning_rate": 5.725464526779851e-06, "loss": 0.4183129072189331, "step": 4848 }, { "epoch": 1.3176630434782608, "grad_norm": 1.468670313857947, "learning_rate": 5.72140300618369e-06, "loss": 0.48310160636901855, "step": 4849 }, { "epoch": 1.3179347826086958, "grad_norm": 1.2719360176935364, "learning_rate": 5.717342349357863e-06, "loss": 0.38213473558425903, "step": 4850 }, { "epoch": 1.3182065217391306, "grad_norm": 0.993565672617138, "learning_rate": 5.713282557122135e-06, "loss": 0.33974653482437134, "step": 4851 }, { "epoch": 1.3184782608695653, "grad_norm": 1.2805115591093543, "learning_rate": 5.709223630296095e-06, "loss": 0.4360756278038025, "step": 4852 }, { "epoch": 1.31875, "grad_norm": 1.3579993977214229, "learning_rate": 5.7051655696991825e-06, "loss": 0.48132890462875366, "step": 4853 }, { "epoch": 1.3190217391304349, "grad_norm": 1.3482754437012479, "learning_rate": 5.701108376150635e-06, "loss": 0.4718872308731079, "step": 4854 }, { "epoch": 1.3192934782608696, "grad_norm": 1.192932694823442, "learning_rate": 5.697052050469529e-06, "loss": 0.4362320899963379, "step": 4855 }, { "epoch": 1.3195652173913044, "grad_norm": 1.2321388051517308, "learning_rate": 5.692996593474755e-06, "loss": 0.4239931106567383, "step": 4856 }, { "epoch": 1.3198369565217392, "grad_norm": 1.2562164064195334, "learning_rate": 5.688942005985052e-06, "loss": 0.44958508014678955, "step": 4857 }, { "epoch": 1.320108695652174, "grad_norm": 1.2067509244423784, "learning_rate": 5.684888288818951e-06, "loss": 0.4213854670524597, "step": 4858 }, { "epoch": 1.3203804347826087, "grad_norm": 1.4167607696075037, "learning_rate": 5.680835442794827e-06, "loss": 0.5087082982063293, "step": 4859 }, { "epoch": 1.3206521739130435, "grad_norm": 1.1548039756615798, "learning_rate": 5.676783468730885e-06, "loss": 0.3601762056350708, "step": 4860 }, { "epoch": 1.3209239130434782, "grad_norm": 1.0869430872070547, "learning_rate": 5.6727323674451295e-06, "loss": 0.336357057094574, "step": 4861 }, { "epoch": 1.321195652173913, "grad_norm": 1.3521018880271247, "learning_rate": 5.668682139755411e-06, "loss": 0.5082486867904663, "step": 4862 }, { "epoch": 1.3214673913043478, "grad_norm": 1.1757216947416378, "learning_rate": 5.664632786479398e-06, "loss": 0.4280236065387726, "step": 4863 }, { "epoch": 1.3217391304347825, "grad_norm": 1.1376305828010778, "learning_rate": 5.66058430843458e-06, "loss": 0.37575745582580566, "step": 4864 }, { "epoch": 1.3220108695652173, "grad_norm": 1.0241559825235054, "learning_rate": 5.656536706438267e-06, "loss": 0.32225823402404785, "step": 4865 }, { "epoch": 1.322282608695652, "grad_norm": 2.666188290965457, "learning_rate": 5.652489981307599e-06, "loss": 0.3587925434112549, "step": 4866 }, { "epoch": 1.3225543478260868, "grad_norm": 1.3773023087912626, "learning_rate": 5.648444133859532e-06, "loss": 0.5077449679374695, "step": 4867 }, { "epoch": 1.3228260869565218, "grad_norm": 1.3034865260615036, "learning_rate": 5.6443991649108565e-06, "loss": 0.4403514266014099, "step": 4868 }, { "epoch": 1.3230978260869566, "grad_norm": 1.1979519323275722, "learning_rate": 5.640355075278167e-06, "loss": 0.39230844378471375, "step": 4869 }, { "epoch": 1.3233695652173914, "grad_norm": 1.256842145862882, "learning_rate": 5.636311865777897e-06, "loss": 0.38426584005355835, "step": 4870 }, { "epoch": 1.3236413043478261, "grad_norm": 1.2154881784059388, "learning_rate": 5.6322695372262994e-06, "loss": 0.43155473470687866, "step": 4871 }, { "epoch": 1.3239130434782609, "grad_norm": 1.1229370744576912, "learning_rate": 5.628228090439434e-06, "loss": 0.3509353995323181, "step": 4872 }, { "epoch": 1.3241847826086957, "grad_norm": 1.2721256982616973, "learning_rate": 5.624187526233211e-06, "loss": 0.3887515068054199, "step": 4873 }, { "epoch": 1.3244565217391304, "grad_norm": 1.2449697888808824, "learning_rate": 5.620147845423336e-06, "loss": 0.5076614618301392, "step": 4874 }, { "epoch": 1.3247282608695652, "grad_norm": 1.00845145116955, "learning_rate": 5.616109048825348e-06, "loss": 0.30627453327178955, "step": 4875 }, { "epoch": 1.325, "grad_norm": 1.0265229701622063, "learning_rate": 5.612071137254607e-06, "loss": 0.3497518301010132, "step": 4876 }, { "epoch": 1.3252717391304347, "grad_norm": 1.2309616276048814, "learning_rate": 5.608034111526298e-06, "loss": 0.3962942361831665, "step": 4877 }, { "epoch": 1.3255434782608695, "grad_norm": 1.1979052215454138, "learning_rate": 5.603997972455414e-06, "loss": 0.42431551218032837, "step": 4878 }, { "epoch": 1.3258152173913045, "grad_norm": 1.2300564346162617, "learning_rate": 5.599962720856781e-06, "loss": 0.40157550573349, "step": 4879 }, { "epoch": 1.3260869565217392, "grad_norm": 1.259905510800105, "learning_rate": 5.5959283575450466e-06, "loss": 0.35126495361328125, "step": 4880 }, { "epoch": 1.326358695652174, "grad_norm": 1.2058410746476473, "learning_rate": 5.591894883334668e-06, "loss": 0.44995519518852234, "step": 4881 }, { "epoch": 1.3266304347826088, "grad_norm": 1.1731394477080055, "learning_rate": 5.5878622990399326e-06, "loss": 0.3930358290672302, "step": 4882 }, { "epoch": 1.3269021739130435, "grad_norm": 1.144313239207526, "learning_rate": 5.583830605474945e-06, "loss": 0.3959873914718628, "step": 4883 }, { "epoch": 1.3271739130434783, "grad_norm": 1.2110500376417097, "learning_rate": 5.579799803453634e-06, "loss": 0.40348339080810547, "step": 4884 }, { "epoch": 1.327445652173913, "grad_norm": 1.1383339026150063, "learning_rate": 5.575769893789739e-06, "loss": 0.3905078172683716, "step": 4885 }, { "epoch": 1.3277173913043478, "grad_norm": 1.335560462167701, "learning_rate": 5.571740877296825e-06, "loss": 0.4002875089645386, "step": 4886 }, { "epoch": 1.3279891304347826, "grad_norm": 1.2475709077770605, "learning_rate": 5.567712754788286e-06, "loss": 0.4724094569683075, "step": 4887 }, { "epoch": 1.3282608695652174, "grad_norm": 1.290598502308497, "learning_rate": 5.5636855270773116e-06, "loss": 0.45415496826171875, "step": 4888 }, { "epoch": 1.3285326086956522, "grad_norm": 1.1341863007907143, "learning_rate": 5.559659194976935e-06, "loss": 0.43352290987968445, "step": 4889 }, { "epoch": 1.328804347826087, "grad_norm": 1.1795859073286046, "learning_rate": 5.555633759299996e-06, "loss": 0.4486171305179596, "step": 4890 }, { "epoch": 1.3290760869565217, "grad_norm": 1.1506788597039526, "learning_rate": 5.55160922085916e-06, "loss": 0.3473854064941406, "step": 4891 }, { "epoch": 1.3293478260869565, "grad_norm": 1.1066523711862415, "learning_rate": 5.547585580466898e-06, "loss": 0.36585181951522827, "step": 4892 }, { "epoch": 1.3296195652173912, "grad_norm": 1.1289936346711904, "learning_rate": 5.543562838935522e-06, "loss": 0.3788878321647644, "step": 4893 }, { "epoch": 1.329891304347826, "grad_norm": 0.988387341944396, "learning_rate": 5.539540997077144e-06, "loss": 0.3215341866016388, "step": 4894 }, { "epoch": 1.3301630434782608, "grad_norm": 1.1466963008357103, "learning_rate": 5.535520055703692e-06, "loss": 0.38993704319000244, "step": 4895 }, { "epoch": 1.3304347826086955, "grad_norm": 1.0625395198797962, "learning_rate": 5.531500015626936e-06, "loss": 0.4025784134864807, "step": 4896 }, { "epoch": 1.3307065217391305, "grad_norm": 1.1919416766953015, "learning_rate": 5.527480877658437e-06, "loss": 0.34112754464149475, "step": 4897 }, { "epoch": 1.3309782608695653, "grad_norm": 1.2640038674140897, "learning_rate": 5.523462642609587e-06, "loss": 0.3906520903110504, "step": 4898 }, { "epoch": 1.33125, "grad_norm": 1.2016385116218187, "learning_rate": 5.519445311291597e-06, "loss": 0.4067329168319702, "step": 4899 }, { "epoch": 1.3315217391304348, "grad_norm": 1.3751961872740521, "learning_rate": 5.515428884515495e-06, "loss": 0.4434381127357483, "step": 4900 }, { "epoch": 1.3317934782608696, "grad_norm": 1.1034451437181003, "learning_rate": 5.511413363092116e-06, "loss": 0.3388558626174927, "step": 4901 }, { "epoch": 1.3320652173913043, "grad_norm": 1.2964048651756166, "learning_rate": 5.507398747832124e-06, "loss": 0.40429455041885376, "step": 4902 }, { "epoch": 1.3323369565217391, "grad_norm": 1.2378592641391066, "learning_rate": 5.503385039546002e-06, "loss": 0.4135822057723999, "step": 4903 }, { "epoch": 1.3326086956521739, "grad_norm": 1.0194062830847537, "learning_rate": 5.499372239044034e-06, "loss": 0.35850319266319275, "step": 4904 }, { "epoch": 1.3328804347826086, "grad_norm": 1.236261265741949, "learning_rate": 5.495360347136336e-06, "loss": 0.44789475202560425, "step": 4905 }, { "epoch": 1.3331521739130434, "grad_norm": 1.2939467442553174, "learning_rate": 5.491349364632836e-06, "loss": 0.4430767893791199, "step": 4906 }, { "epoch": 1.3334239130434782, "grad_norm": 1.1783998917363308, "learning_rate": 5.48733929234328e-06, "loss": 0.44832301139831543, "step": 4907 }, { "epoch": 1.3336956521739132, "grad_norm": 1.1678363769208104, "learning_rate": 5.483330131077218e-06, "loss": 0.38186633586883545, "step": 4908 }, { "epoch": 1.333967391304348, "grad_norm": 1.3251639589783064, "learning_rate": 5.479321881644041e-06, "loss": 0.4783569276332855, "step": 4909 }, { "epoch": 1.3342391304347827, "grad_norm": 1.3434733359824453, "learning_rate": 5.4753145448529284e-06, "loss": 0.49326568841934204, "step": 4910 }, { "epoch": 1.3345108695652175, "grad_norm": 1.2120084126597568, "learning_rate": 5.471308121512895e-06, "loss": 0.37969377636909485, "step": 4911 }, { "epoch": 1.3347826086956522, "grad_norm": 1.0906979270613917, "learning_rate": 5.467302612432765e-06, "loss": 0.34726762771606445, "step": 4912 }, { "epoch": 1.335054347826087, "grad_norm": 1.3138127064253975, "learning_rate": 5.463298018421171e-06, "loss": 0.460049569606781, "step": 4913 }, { "epoch": 1.3353260869565218, "grad_norm": 1.1973190810671068, "learning_rate": 5.459294340286571e-06, "loss": 0.33004292845726013, "step": 4914 }, { "epoch": 1.3355978260869565, "grad_norm": 1.161109189699843, "learning_rate": 5.455291578837234e-06, "loss": 0.4244241714477539, "step": 4915 }, { "epoch": 1.3358695652173913, "grad_norm": 1.0421030920979903, "learning_rate": 5.451289734881249e-06, "loss": 0.3157370090484619, "step": 4916 }, { "epoch": 1.336141304347826, "grad_norm": 1.3138680506499851, "learning_rate": 5.447288809226505e-06, "loss": 0.45591282844543457, "step": 4917 }, { "epoch": 1.3364130434782608, "grad_norm": 1.0353570925903666, "learning_rate": 5.443288802680722e-06, "loss": 0.32786571979522705, "step": 4918 }, { "epoch": 1.3366847826086956, "grad_norm": 0.996222508061658, "learning_rate": 5.43928971605143e-06, "loss": 0.34793737530708313, "step": 4919 }, { "epoch": 1.3369565217391304, "grad_norm": 1.3043079032269613, "learning_rate": 5.4352915501459644e-06, "loss": 0.39994555711746216, "step": 4920 }, { "epoch": 1.3372282608695651, "grad_norm": 1.2163613245758214, "learning_rate": 5.431294305771486e-06, "loss": 0.37234359979629517, "step": 4921 }, { "epoch": 1.3375, "grad_norm": 1.3650794770756665, "learning_rate": 5.427297983734963e-06, "loss": 0.306351900100708, "step": 4922 }, { "epoch": 1.3377717391304347, "grad_norm": 1.28551928370125, "learning_rate": 5.423302584843186e-06, "loss": 0.447555273771286, "step": 4923 }, { "epoch": 1.3380434782608694, "grad_norm": 1.2411023809205552, "learning_rate": 5.4193081099027444e-06, "loss": 0.39956778287887573, "step": 4924 }, { "epoch": 1.3383152173913042, "grad_norm": 1.2840204426123563, "learning_rate": 5.415314559720053e-06, "loss": 0.4302152991294861, "step": 4925 }, { "epoch": 1.3385869565217392, "grad_norm": 1.1807738013672004, "learning_rate": 5.4113219351013354e-06, "loss": 0.4021172821521759, "step": 4926 }, { "epoch": 1.338858695652174, "grad_norm": 1.17493266843865, "learning_rate": 5.407330236852635e-06, "loss": 0.3772047162055969, "step": 4927 }, { "epoch": 1.3391304347826087, "grad_norm": 1.2542324685307213, "learning_rate": 5.403339465779794e-06, "loss": 0.39030247926712036, "step": 4928 }, { "epoch": 1.3394021739130435, "grad_norm": 1.142135585985911, "learning_rate": 5.399349622688479e-06, "loss": 0.36426031589508057, "step": 4929 }, { "epoch": 1.3396739130434783, "grad_norm": 1.1781198593531002, "learning_rate": 5.395360708384172e-06, "loss": 0.41633933782577515, "step": 4930 }, { "epoch": 1.339945652173913, "grad_norm": 0.9942749053935079, "learning_rate": 5.3913727236721494e-06, "loss": 0.3127426505088806, "step": 4931 }, { "epoch": 1.3402173913043478, "grad_norm": 1.1791991211857038, "learning_rate": 5.3873856693575265e-06, "loss": 0.3181344270706177, "step": 4932 }, { "epoch": 1.3404891304347826, "grad_norm": 1.1239147651863983, "learning_rate": 5.383399546245205e-06, "loss": 0.41567081212997437, "step": 4933 }, { "epoch": 1.3407608695652173, "grad_norm": 1.2684779128195716, "learning_rate": 5.379414355139916e-06, "loss": 0.40201300382614136, "step": 4934 }, { "epoch": 1.341032608695652, "grad_norm": 1.2442557203054825, "learning_rate": 5.3754300968461945e-06, "loss": 0.4344361126422882, "step": 4935 }, { "epoch": 1.3413043478260869, "grad_norm": 1.011934291921998, "learning_rate": 5.371446772168395e-06, "loss": 0.33879804611206055, "step": 4936 }, { "epoch": 1.3415760869565219, "grad_norm": 1.3050695091908369, "learning_rate": 5.367464381910667e-06, "loss": 0.4425780773162842, "step": 4937 }, { "epoch": 1.3418478260869566, "grad_norm": 1.1681898020110826, "learning_rate": 5.36348292687699e-06, "loss": 0.4756506085395813, "step": 4938 }, { "epoch": 1.3421195652173914, "grad_norm": 1.1736369936138935, "learning_rate": 5.359502407871146e-06, "loss": 0.4563470482826233, "step": 4939 }, { "epoch": 1.3423913043478262, "grad_norm": 1.3539905307290658, "learning_rate": 5.355522825696726e-06, "loss": 0.4463486075401306, "step": 4940 }, { "epoch": 1.342663043478261, "grad_norm": 1.259067264507075, "learning_rate": 5.351544181157135e-06, "loss": 0.4318707585334778, "step": 4941 }, { "epoch": 1.3429347826086957, "grad_norm": 1.2851761718387884, "learning_rate": 5.34756647505559e-06, "loss": 0.4110035300254822, "step": 4942 }, { "epoch": 1.3432065217391305, "grad_norm": 1.3434313048050908, "learning_rate": 5.34358970819512e-06, "loss": 0.4465569853782654, "step": 4943 }, { "epoch": 1.3434782608695652, "grad_norm": 1.4459793489338653, "learning_rate": 5.339613881378551e-06, "loss": 0.5058653354644775, "step": 4944 }, { "epoch": 1.34375, "grad_norm": 1.2793726374024252, "learning_rate": 5.3356389954085455e-06, "loss": 0.4368714690208435, "step": 4945 }, { "epoch": 1.3440217391304348, "grad_norm": 1.092936542500604, "learning_rate": 5.331665051087549e-06, "loss": 0.39410364627838135, "step": 4946 }, { "epoch": 1.3442934782608695, "grad_norm": 1.4025075455926737, "learning_rate": 5.327692049217823e-06, "loss": 0.4353351294994354, "step": 4947 }, { "epoch": 1.3445652173913043, "grad_norm": 1.1580121766526112, "learning_rate": 5.323719990601459e-06, "loss": 0.3693985044956207, "step": 4948 }, { "epoch": 1.344836956521739, "grad_norm": 1.331659573375675, "learning_rate": 5.319748876040331e-06, "loss": 0.481623113155365, "step": 4949 }, { "epoch": 1.3451086956521738, "grad_norm": 1.0372745099738983, "learning_rate": 5.315778706336142e-06, "loss": 0.31395137310028076, "step": 4950 }, { "epoch": 1.3453804347826086, "grad_norm": 1.253976696929415, "learning_rate": 5.311809482290384e-06, "loss": 0.4202778935432434, "step": 4951 }, { "epoch": 1.3456521739130434, "grad_norm": 1.1663197387979496, "learning_rate": 5.307841204704389e-06, "loss": 0.4179137647151947, "step": 4952 }, { "epoch": 1.3459239130434781, "grad_norm": 1.1778799673208056, "learning_rate": 5.303873874379264e-06, "loss": 0.42181527614593506, "step": 4953 }, { "epoch": 1.3461956521739131, "grad_norm": 1.2234362900870808, "learning_rate": 5.299907492115947e-06, "loss": 0.4236137866973877, "step": 4954 }, { "epoch": 1.346467391304348, "grad_norm": 1.2498436617899975, "learning_rate": 5.29594205871518e-06, "loss": 0.35992252826690674, "step": 4955 }, { "epoch": 1.3467391304347827, "grad_norm": 1.3605012270375776, "learning_rate": 5.2919775749775045e-06, "loss": 0.46684664487838745, "step": 4956 }, { "epoch": 1.3470108695652174, "grad_norm": 1.1991081963481203, "learning_rate": 5.288014041703282e-06, "loss": 0.402192622423172, "step": 4957 }, { "epoch": 1.3472826086956522, "grad_norm": 1.0854112131306757, "learning_rate": 5.284051459692675e-06, "loss": 0.3585605323314667, "step": 4958 }, { "epoch": 1.347554347826087, "grad_norm": 0.8666259063109993, "learning_rate": 5.280089829745662e-06, "loss": 0.2698366940021515, "step": 4959 }, { "epoch": 1.3478260869565217, "grad_norm": 1.1277850579197952, "learning_rate": 5.276129152662014e-06, "loss": 0.32636818289756775, "step": 4960 }, { "epoch": 1.3480978260869565, "grad_norm": 1.2680436193928109, "learning_rate": 5.272169429241325e-06, "loss": 0.4659308195114136, "step": 4961 }, { "epoch": 1.3483695652173913, "grad_norm": 1.1739637227569735, "learning_rate": 5.268210660282992e-06, "loss": 0.38643312454223633, "step": 4962 }, { "epoch": 1.348641304347826, "grad_norm": 1.2166134100562465, "learning_rate": 5.264252846586212e-06, "loss": 0.40988194942474365, "step": 4963 }, { "epoch": 1.3489130434782608, "grad_norm": 1.3077621573005294, "learning_rate": 5.26029598895e-06, "loss": 0.43160009384155273, "step": 4964 }, { "epoch": 1.3491847826086958, "grad_norm": 1.2167784402375417, "learning_rate": 5.256340088173169e-06, "loss": 0.4378291666507721, "step": 4965 }, { "epoch": 1.3494565217391306, "grad_norm": 1.311926260062099, "learning_rate": 5.25238514505435e-06, "loss": 0.45241063833236694, "step": 4966 }, { "epoch": 1.3497282608695653, "grad_norm": 1.256873687202055, "learning_rate": 5.248431160391963e-06, "loss": 0.42538589239120483, "step": 4967 }, { "epoch": 1.35, "grad_norm": 1.230938542754337, "learning_rate": 5.2444781349842565e-06, "loss": 0.4264571964740753, "step": 4968 }, { "epoch": 1.3502717391304349, "grad_norm": 1.1540230970809922, "learning_rate": 5.240526069629265e-06, "loss": 0.345817893743515, "step": 4969 }, { "epoch": 1.3505434782608696, "grad_norm": 1.2011460770357323, "learning_rate": 5.236574965124842e-06, "loss": 0.4318215847015381, "step": 4970 }, { "epoch": 1.3508152173913044, "grad_norm": 1.4543074248383947, "learning_rate": 5.232624822268646e-06, "loss": 0.47144341468811035, "step": 4971 }, { "epoch": 1.3510869565217392, "grad_norm": 1.1086980944352054, "learning_rate": 5.228675641858132e-06, "loss": 0.3773188292980194, "step": 4972 }, { "epoch": 1.351358695652174, "grad_norm": 1.3904208785327021, "learning_rate": 5.224727424690569e-06, "loss": 0.5433279275894165, "step": 4973 }, { "epoch": 1.3516304347826087, "grad_norm": 1.1587650948761825, "learning_rate": 5.2207801715630315e-06, "loss": 0.4035693407058716, "step": 4974 }, { "epoch": 1.3519021739130435, "grad_norm": 1.0586049319221609, "learning_rate": 5.216833883272401e-06, "loss": 0.2816065549850464, "step": 4975 }, { "epoch": 1.3521739130434782, "grad_norm": 1.2605931551602245, "learning_rate": 5.212888560615351e-06, "loss": 0.5014947652816772, "step": 4976 }, { "epoch": 1.352445652173913, "grad_norm": 1.224388543038091, "learning_rate": 5.208944204388377e-06, "loss": 0.45967739820480347, "step": 4977 }, { "epoch": 1.3527173913043478, "grad_norm": 1.2532824225468857, "learning_rate": 5.205000815387771e-06, "loss": 0.4219924807548523, "step": 4978 }, { "epoch": 1.3529891304347825, "grad_norm": 1.113497669847183, "learning_rate": 5.201058394409635e-06, "loss": 0.3228617310523987, "step": 4979 }, { "epoch": 1.3532608695652173, "grad_norm": 1.1360708638485433, "learning_rate": 5.1971169422498625e-06, "loss": 0.376031756401062, "step": 4980 }, { "epoch": 1.353532608695652, "grad_norm": 1.1523435805715905, "learning_rate": 5.1931764597041655e-06, "loss": 0.39255034923553467, "step": 4981 }, { "epoch": 1.3538043478260868, "grad_norm": 1.2113396087333044, "learning_rate": 5.189236947568059e-06, "loss": 0.41756802797317505, "step": 4982 }, { "epoch": 1.3540760869565218, "grad_norm": 1.2879580268840831, "learning_rate": 5.18529840663685e-06, "loss": 0.4817129373550415, "step": 4983 }, { "epoch": 1.3543478260869566, "grad_norm": 1.2751287322266283, "learning_rate": 5.1813608377056625e-06, "loss": 0.48165464401245117, "step": 4984 }, { "epoch": 1.3546195652173914, "grad_norm": 1.3128454939523155, "learning_rate": 5.177424241569419e-06, "loss": 0.5012530088424683, "step": 4985 }, { "epoch": 1.3548913043478261, "grad_norm": 1.1250704311581015, "learning_rate": 5.1734886190228496e-06, "loss": 0.40771180391311646, "step": 4986 }, { "epoch": 1.3551630434782609, "grad_norm": 1.2071550387152867, "learning_rate": 5.169553970860474e-06, "loss": 0.4013482928276062, "step": 4987 }, { "epoch": 1.3554347826086957, "grad_norm": 1.375736770417236, "learning_rate": 5.165620297876639e-06, "loss": 0.45977526903152466, "step": 4988 }, { "epoch": 1.3557065217391304, "grad_norm": 1.3403930553515675, "learning_rate": 5.161687600865476e-06, "loss": 0.4559473395347595, "step": 4989 }, { "epoch": 1.3559782608695652, "grad_norm": 1.3315730395882843, "learning_rate": 5.157755880620914e-06, "loss": 0.5015953183174133, "step": 4990 }, { "epoch": 1.35625, "grad_norm": 1.1855976913749677, "learning_rate": 5.153825137936713e-06, "loss": 0.31819331645965576, "step": 4991 }, { "epoch": 1.3565217391304347, "grad_norm": 1.2149273324616254, "learning_rate": 5.149895373606405e-06, "loss": 0.419339656829834, "step": 4992 }, { "epoch": 1.3567934782608695, "grad_norm": 1.1942893005564805, "learning_rate": 5.145966588423341e-06, "loss": 0.39488184452056885, "step": 4993 }, { "epoch": 1.3570652173913045, "grad_norm": 1.3557746575513658, "learning_rate": 5.142038783180673e-06, "loss": 0.49081528186798096, "step": 4994 }, { "epoch": 1.3573369565217392, "grad_norm": 1.2611774514375194, "learning_rate": 5.1381119586713545e-06, "loss": 0.43675941228866577, "step": 4995 }, { "epoch": 1.357608695652174, "grad_norm": 1.054020943214634, "learning_rate": 5.134186115688131e-06, "loss": 0.30820432305336, "step": 4996 }, { "epoch": 1.3578804347826088, "grad_norm": 1.3721863565613825, "learning_rate": 5.130261255023566e-06, "loss": 0.48264575004577637, "step": 4997 }, { "epoch": 1.3581521739130435, "grad_norm": 1.3272553901593442, "learning_rate": 5.126337377470017e-06, "loss": 0.4638548493385315, "step": 4998 }, { "epoch": 1.3584239130434783, "grad_norm": 1.2760219298406512, "learning_rate": 5.122414483819635e-06, "loss": 0.45913219451904297, "step": 4999 }, { "epoch": 1.358695652173913, "grad_norm": 1.134845557440637, "learning_rate": 5.118492574864387e-06, "loss": 0.33947861194610596, "step": 5000 }, { "epoch": 1.3589673913043478, "grad_norm": 1.233998973996579, "learning_rate": 5.114571651396033e-06, "loss": 0.4151337444782257, "step": 5001 }, { "epoch": 1.3592391304347826, "grad_norm": 1.1690471155283224, "learning_rate": 5.11065171420614e-06, "loss": 0.41856759786605835, "step": 5002 }, { "epoch": 1.3595108695652174, "grad_norm": 1.6251242655625195, "learning_rate": 5.10673276408606e-06, "loss": 0.4294673800468445, "step": 5003 }, { "epoch": 1.3597826086956522, "grad_norm": 1.1222288174647665, "learning_rate": 5.1028148018269704e-06, "loss": 0.3550443649291992, "step": 5004 }, { "epoch": 1.360054347826087, "grad_norm": 1.3446175445222646, "learning_rate": 5.098897828219831e-06, "loss": 0.4486743211746216, "step": 5005 }, { "epoch": 1.3603260869565217, "grad_norm": 1.2192545809696327, "learning_rate": 5.0949818440554e-06, "loss": 0.39193782210350037, "step": 5006 }, { "epoch": 1.3605978260869565, "grad_norm": 1.0816760824876797, "learning_rate": 5.091066850124256e-06, "loss": 0.32274702191352844, "step": 5007 }, { "epoch": 1.3608695652173912, "grad_norm": 1.2196928328961156, "learning_rate": 5.087152847216752e-06, "loss": 0.38714441657066345, "step": 5008 }, { "epoch": 1.361141304347826, "grad_norm": 1.181222504489368, "learning_rate": 5.0832398361230595e-06, "loss": 0.40195533633232117, "step": 5009 }, { "epoch": 1.3614130434782608, "grad_norm": 1.2899464307161308, "learning_rate": 5.079327817633144e-06, "loss": 0.44652414321899414, "step": 5010 }, { "epoch": 1.3616847826086955, "grad_norm": 1.1632389272167671, "learning_rate": 5.075416792536774e-06, "loss": 0.3812996745109558, "step": 5011 }, { "epoch": 1.3619565217391305, "grad_norm": 1.1437937233789248, "learning_rate": 5.071506761623503e-06, "loss": 0.36239010095596313, "step": 5012 }, { "epoch": 1.3622282608695653, "grad_norm": 1.217959664526742, "learning_rate": 5.067597725682702e-06, "loss": 0.38821351528167725, "step": 5013 }, { "epoch": 1.3625, "grad_norm": 1.0980496957814492, "learning_rate": 5.0636896855035374e-06, "loss": 0.32436710596084595, "step": 5014 }, { "epoch": 1.3627717391304348, "grad_norm": 1.1643613177632537, "learning_rate": 5.059782641874962e-06, "loss": 0.3490183353424072, "step": 5015 }, { "epoch": 1.3630434782608696, "grad_norm": 1.2892372660342306, "learning_rate": 5.05587659558574e-06, "loss": 0.48947858810424805, "step": 5016 }, { "epoch": 1.3633152173913043, "grad_norm": 1.2548753554571415, "learning_rate": 5.051971547424432e-06, "loss": 0.4173399806022644, "step": 5017 }, { "epoch": 1.3635869565217391, "grad_norm": 1.1207125620555916, "learning_rate": 5.048067498179396e-06, "loss": 0.3997137248516083, "step": 5018 }, { "epoch": 1.3638586956521739, "grad_norm": 1.3583212205052948, "learning_rate": 5.044164448638785e-06, "loss": 0.3629910945892334, "step": 5019 }, { "epoch": 1.3641304347826086, "grad_norm": 1.4970430455329484, "learning_rate": 5.040262399590554e-06, "loss": 0.5097061395645142, "step": 5020 }, { "epoch": 1.3644021739130434, "grad_norm": 1.3819815368748334, "learning_rate": 5.036361351822456e-06, "loss": 0.4739728569984436, "step": 5021 }, { "epoch": 1.3646739130434782, "grad_norm": 1.2091853828225547, "learning_rate": 5.032461306122046e-06, "loss": 0.37097257375717163, "step": 5022 }, { "epoch": 1.3649456521739132, "grad_norm": 1.1651437983151738, "learning_rate": 5.028562263276662e-06, "loss": 0.3599700927734375, "step": 5023 }, { "epoch": 1.365217391304348, "grad_norm": 1.2095927067068786, "learning_rate": 5.024664224073454e-06, "loss": 0.43649494647979736, "step": 5024 }, { "epoch": 1.3654891304347827, "grad_norm": 1.2488871421194119, "learning_rate": 5.020767189299369e-06, "loss": 0.44253653287887573, "step": 5025 }, { "epoch": 1.3657608695652175, "grad_norm": 1.256754043172745, "learning_rate": 5.016871159741137e-06, "loss": 0.3847018778324127, "step": 5026 }, { "epoch": 1.3660326086956522, "grad_norm": 1.0984990134474137, "learning_rate": 5.012976136185305e-06, "loss": 0.3724527955055237, "step": 5027 }, { "epoch": 1.366304347826087, "grad_norm": 1.4032525017908615, "learning_rate": 5.0090821194182006e-06, "loss": 0.46454498171806335, "step": 5028 }, { "epoch": 1.3665760869565218, "grad_norm": 1.1219182839480972, "learning_rate": 5.005189110225957e-06, "loss": 0.3891613185405731, "step": 5029 }, { "epoch": 1.3668478260869565, "grad_norm": 1.3256815671065116, "learning_rate": 5.001297109394499e-06, "loss": 0.4457288980484009, "step": 5030 }, { "epoch": 1.3671195652173913, "grad_norm": 1.1811233350494663, "learning_rate": 4.997406117709557e-06, "loss": 0.3311655521392822, "step": 5031 }, { "epoch": 1.367391304347826, "grad_norm": 1.4046470462057903, "learning_rate": 4.993516135956642e-06, "loss": 0.46780043840408325, "step": 5032 }, { "epoch": 1.3676630434782608, "grad_norm": 1.2992147962982377, "learning_rate": 4.989627164921073e-06, "loss": 0.4383236765861511, "step": 5033 }, { "epoch": 1.3679347826086956, "grad_norm": 1.0423820136955633, "learning_rate": 4.985739205387969e-06, "loss": 0.32889074087142944, "step": 5034 }, { "epoch": 1.3682065217391304, "grad_norm": 1.2365410542274393, "learning_rate": 4.981852258142227e-06, "loss": 0.39773064851760864, "step": 5035 }, { "epoch": 1.3684782608695651, "grad_norm": 1.291735929073261, "learning_rate": 4.977966323968555e-06, "loss": 0.43085747957229614, "step": 5036 }, { "epoch": 1.36875, "grad_norm": 1.2499430431750833, "learning_rate": 4.974081403651452e-06, "loss": 0.4332837462425232, "step": 5037 }, { "epoch": 1.3690217391304347, "grad_norm": 1.28595304610308, "learning_rate": 4.970197497975216e-06, "loss": 0.4159506559371948, "step": 5038 }, { "epoch": 1.3692934782608694, "grad_norm": 1.1468709242083635, "learning_rate": 4.96631460772393e-06, "loss": 0.4167964458465576, "step": 5039 }, { "epoch": 1.3695652173913042, "grad_norm": 1.0851730799779922, "learning_rate": 4.9624327336814805e-06, "loss": 0.29289719462394714, "step": 5040 }, { "epoch": 1.3698369565217392, "grad_norm": 1.101331714380653, "learning_rate": 4.95855187663155e-06, "loss": 0.34986862540245056, "step": 5041 }, { "epoch": 1.370108695652174, "grad_norm": 1.076395548812646, "learning_rate": 4.954672037357603e-06, "loss": 0.3061603307723999, "step": 5042 }, { "epoch": 1.3703804347826087, "grad_norm": 1.2884516920799352, "learning_rate": 4.950793216642923e-06, "loss": 0.49363234639167786, "step": 5043 }, { "epoch": 1.3706521739130435, "grad_norm": 1.14608611089918, "learning_rate": 4.94691541527056e-06, "loss": 0.3610216975212097, "step": 5044 }, { "epoch": 1.3709239130434783, "grad_norm": 1.1512586232469129, "learning_rate": 4.943038634023376e-06, "loss": 0.3734428584575653, "step": 5045 }, { "epoch": 1.371195652173913, "grad_norm": 1.1067371298839426, "learning_rate": 4.939162873684021e-06, "loss": 0.36417341232299805, "step": 5046 }, { "epoch": 1.3714673913043478, "grad_norm": 1.1112234816791147, "learning_rate": 4.935288135034945e-06, "loss": 0.3193380832672119, "step": 5047 }, { "epoch": 1.3717391304347826, "grad_norm": 1.394616086864297, "learning_rate": 4.931414418858383e-06, "loss": 0.47916871309280396, "step": 5048 }, { "epoch": 1.3720108695652173, "grad_norm": 1.0782069240472583, "learning_rate": 4.927541725936357e-06, "loss": 0.3460652232170105, "step": 5049 }, { "epoch": 1.372282608695652, "grad_norm": 1.1410487280398385, "learning_rate": 4.923670057050711e-06, "loss": 0.39505624771118164, "step": 5050 }, { "epoch": 1.3725543478260869, "grad_norm": 1.3880662618251884, "learning_rate": 4.919799412983052e-06, "loss": 0.45638400316238403, "step": 5051 }, { "epoch": 1.3728260869565219, "grad_norm": 1.3052606052934124, "learning_rate": 4.915929794514795e-06, "loss": 0.42904818058013916, "step": 5052 }, { "epoch": 1.3730978260869566, "grad_norm": 1.3506036815356264, "learning_rate": 4.9120612024271465e-06, "loss": 0.4246121048927307, "step": 5053 }, { "epoch": 1.3733695652173914, "grad_norm": 1.168923055201292, "learning_rate": 4.908193637501106e-06, "loss": 0.3539664149284363, "step": 5054 }, { "epoch": 1.3736413043478262, "grad_norm": 1.0658234258315316, "learning_rate": 4.904327100517459e-06, "loss": 0.3413918614387512, "step": 5055 }, { "epoch": 1.373913043478261, "grad_norm": 1.1461375686477395, "learning_rate": 4.90046159225679e-06, "loss": 0.3949469327926636, "step": 5056 }, { "epoch": 1.3741847826086957, "grad_norm": 1.2428854614507276, "learning_rate": 4.896597113499479e-06, "loss": 0.4191402494907379, "step": 5057 }, { "epoch": 1.3744565217391305, "grad_norm": 1.2346130972388487, "learning_rate": 4.8927336650256865e-06, "loss": 0.4408625364303589, "step": 5058 }, { "epoch": 1.3747282608695652, "grad_norm": 1.0128152018294343, "learning_rate": 4.888871247615373e-06, "loss": 0.3767619729042053, "step": 5059 }, { "epoch": 1.375, "grad_norm": 1.5881778804648745, "learning_rate": 4.885009862048295e-06, "loss": 0.4524943232536316, "step": 5060 }, { "epoch": 1.3752717391304348, "grad_norm": 1.1921068484538995, "learning_rate": 4.881149509103993e-06, "loss": 0.4142189621925354, "step": 5061 }, { "epoch": 1.3755434782608695, "grad_norm": 1.35751526029432, "learning_rate": 4.877290189561795e-06, "loss": 0.4387986958026886, "step": 5062 }, { "epoch": 1.3758152173913043, "grad_norm": 1.278493089445939, "learning_rate": 4.8734319042008395e-06, "loss": 0.4319956302642822, "step": 5063 }, { "epoch": 1.376086956521739, "grad_norm": 1.1557392311232206, "learning_rate": 4.869574653800033e-06, "loss": 0.40331506729125977, "step": 5064 }, { "epoch": 1.3763586956521738, "grad_norm": 1.1796879132222038, "learning_rate": 4.865718439138089e-06, "loss": 0.39746880531311035, "step": 5065 }, { "epoch": 1.3766304347826086, "grad_norm": 1.3343320719302632, "learning_rate": 4.861863260993508e-06, "loss": 0.40306612849235535, "step": 5066 }, { "epoch": 1.3769021739130434, "grad_norm": 1.374094386027119, "learning_rate": 4.858009120144572e-06, "loss": 0.4646649956703186, "step": 5067 }, { "epoch": 1.3771739130434781, "grad_norm": 1.2808147221805215, "learning_rate": 4.854156017369368e-06, "loss": 0.4567394256591797, "step": 5068 }, { "epoch": 1.3774456521739131, "grad_norm": 1.2382559674632758, "learning_rate": 4.850303953445764e-06, "loss": 0.4302513003349304, "step": 5069 }, { "epoch": 1.377717391304348, "grad_norm": 1.402542204206812, "learning_rate": 4.846452929151427e-06, "loss": 0.4876457452774048, "step": 5070 }, { "epoch": 1.3779891304347827, "grad_norm": 1.3050954372880965, "learning_rate": 4.842602945263799e-06, "loss": 0.4644486904144287, "step": 5071 }, { "epoch": 1.3782608695652174, "grad_norm": 1.3939209730894764, "learning_rate": 4.838754002560127e-06, "loss": 0.4391337037086487, "step": 5072 }, { "epoch": 1.3785326086956522, "grad_norm": 1.2237282583862343, "learning_rate": 4.8349061018174385e-06, "loss": 0.4214312732219696, "step": 5073 }, { "epoch": 1.378804347826087, "grad_norm": 1.1590789825972296, "learning_rate": 4.831059243812561e-06, "loss": 0.3747767508029938, "step": 5074 }, { "epoch": 1.3790760869565217, "grad_norm": 1.3512659401763008, "learning_rate": 4.8272134293220974e-06, "loss": 0.39540839195251465, "step": 5075 }, { "epoch": 1.3793478260869565, "grad_norm": 1.1891111002661992, "learning_rate": 4.823368659122448e-06, "loss": 0.4399068355560303, "step": 5076 }, { "epoch": 1.3796195652173913, "grad_norm": 1.267656046655206, "learning_rate": 4.819524933989808e-06, "loss": 0.48015284538269043, "step": 5077 }, { "epoch": 1.379891304347826, "grad_norm": 1.3681484418064291, "learning_rate": 4.815682254700143e-06, "loss": 0.46679842472076416, "step": 5078 }, { "epoch": 1.3801630434782608, "grad_norm": 1.2923388413154064, "learning_rate": 4.811840622029233e-06, "loss": 0.4623568058013916, "step": 5079 }, { "epoch": 1.3804347826086958, "grad_norm": 1.1823057262159344, "learning_rate": 4.8080000367526244e-06, "loss": 0.39663398265838623, "step": 5080 }, { "epoch": 1.3807065217391306, "grad_norm": 1.1910708278906812, "learning_rate": 4.804160499645667e-06, "loss": 0.42040061950683594, "step": 5081 }, { "epoch": 1.3809782608695653, "grad_norm": 1.1280563958356902, "learning_rate": 4.8003220114834855e-06, "loss": 0.3863527178764343, "step": 5082 }, { "epoch": 1.38125, "grad_norm": 1.2418929573469624, "learning_rate": 4.796484573041006e-06, "loss": 0.4144390821456909, "step": 5083 }, { "epoch": 1.3815217391304349, "grad_norm": 1.288724276344498, "learning_rate": 4.7926481850929376e-06, "loss": 0.5085331201553345, "step": 5084 }, { "epoch": 1.3817934782608696, "grad_norm": 1.1906370310116212, "learning_rate": 4.788812848413768e-06, "loss": 0.4213366210460663, "step": 5085 }, { "epoch": 1.3820652173913044, "grad_norm": 1.176390431628106, "learning_rate": 4.784978563777795e-06, "loss": 0.3804609179496765, "step": 5086 }, { "epoch": 1.3823369565217392, "grad_norm": 1.3515279368321642, "learning_rate": 4.781145331959079e-06, "loss": 0.3914903402328491, "step": 5087 }, { "epoch": 1.382608695652174, "grad_norm": 1.224036597691233, "learning_rate": 4.777313153731485e-06, "loss": 0.43173646926879883, "step": 5088 }, { "epoch": 1.3828804347826087, "grad_norm": 1.2935984720711056, "learning_rate": 4.773482029868657e-06, "loss": 0.4424346089363098, "step": 5089 }, { "epoch": 1.3831521739130435, "grad_norm": 1.3173114477851102, "learning_rate": 4.769651961144033e-06, "loss": 0.4765320420265198, "step": 5090 }, { "epoch": 1.3834239130434782, "grad_norm": 1.2532417424781723, "learning_rate": 4.765822948330828e-06, "loss": 0.48311856389045715, "step": 5091 }, { "epoch": 1.383695652173913, "grad_norm": 1.2516234470156782, "learning_rate": 4.761994992202051e-06, "loss": 0.4377180337905884, "step": 5092 }, { "epoch": 1.3839673913043478, "grad_norm": 1.3120255362154336, "learning_rate": 4.758168093530503e-06, "loss": 0.43856877088546753, "step": 5093 }, { "epoch": 1.3842391304347825, "grad_norm": 1.42473543066603, "learning_rate": 4.754342253088754e-06, "loss": 0.4727741777896881, "step": 5094 }, { "epoch": 1.3845108695652173, "grad_norm": 1.3040242573339271, "learning_rate": 4.750517471649175e-06, "loss": 0.4517340660095215, "step": 5095 }, { "epoch": 1.384782608695652, "grad_norm": 1.1786309506466024, "learning_rate": 4.746693749983922e-06, "loss": 0.3585623502731323, "step": 5096 }, { "epoch": 1.3850543478260868, "grad_norm": 1.2709403666345014, "learning_rate": 4.742871088864935e-06, "loss": 0.4804409146308899, "step": 5097 }, { "epoch": 1.3853260869565218, "grad_norm": 1.4450443825186863, "learning_rate": 4.739049489063932e-06, "loss": 0.45626282691955566, "step": 5098 }, { "epoch": 1.3855978260869566, "grad_norm": 1.2664507024430898, "learning_rate": 4.735228951352434e-06, "loss": 0.45477303862571716, "step": 5099 }, { "epoch": 1.3858695652173914, "grad_norm": 1.0713546095577209, "learning_rate": 4.7314094765017325e-06, "loss": 0.2913602590560913, "step": 5100 }, { "epoch": 1.3861413043478261, "grad_norm": 1.3044932258478619, "learning_rate": 4.727591065282903e-06, "loss": 0.4833405911922455, "step": 5101 }, { "epoch": 1.3864130434782609, "grad_norm": 1.1502824050466085, "learning_rate": 4.723773718466825e-06, "loss": 0.39900851249694824, "step": 5102 }, { "epoch": 1.3866847826086957, "grad_norm": 1.2745248911565161, "learning_rate": 4.719957436824143e-06, "loss": 0.4123210906982422, "step": 5103 }, { "epoch": 1.3869565217391304, "grad_norm": 1.1409383421731356, "learning_rate": 4.716142221125296e-06, "loss": 0.33603066205978394, "step": 5104 }, { "epoch": 1.3872282608695652, "grad_norm": 1.2508339152594496, "learning_rate": 4.712328072140505e-06, "loss": 0.39691513776779175, "step": 5105 }, { "epoch": 1.3875, "grad_norm": 1.2104888251712442, "learning_rate": 4.708514990639785e-06, "loss": 0.37794721126556396, "step": 5106 }, { "epoch": 1.3877717391304347, "grad_norm": 1.0376028765182757, "learning_rate": 4.704702977392914e-06, "loss": 0.2671966552734375, "step": 5107 }, { "epoch": 1.3880434782608695, "grad_norm": 1.3703171356256325, "learning_rate": 4.700892033169476e-06, "loss": 0.4283573031425476, "step": 5108 }, { "epoch": 1.3883152173913045, "grad_norm": 1.4750411508965475, "learning_rate": 4.697082158738831e-06, "loss": 0.5213485956192017, "step": 5109 }, { "epoch": 1.3885869565217392, "grad_norm": 0.9217133153852641, "learning_rate": 4.693273354870117e-06, "loss": 0.31933778524398804, "step": 5110 }, { "epoch": 1.388858695652174, "grad_norm": 1.2004953400603635, "learning_rate": 4.689465622332266e-06, "loss": 0.39273256063461304, "step": 5111 }, { "epoch": 1.3891304347826088, "grad_norm": 1.096150294591588, "learning_rate": 4.68565896189399e-06, "loss": 0.3411552309989929, "step": 5112 }, { "epoch": 1.3894021739130435, "grad_norm": 1.153655460290038, "learning_rate": 4.681853374323786e-06, "loss": 0.39565134048461914, "step": 5113 }, { "epoch": 1.3896739130434783, "grad_norm": 1.2725110297444575, "learning_rate": 4.678048860389925e-06, "loss": 0.4200679361820221, "step": 5114 }, { "epoch": 1.389945652173913, "grad_norm": 1.1724662464122768, "learning_rate": 4.674245420860474e-06, "loss": 0.3958747982978821, "step": 5115 }, { "epoch": 1.3902173913043478, "grad_norm": 1.2436835634866306, "learning_rate": 4.670443056503276e-06, "loss": 0.38479653000831604, "step": 5116 }, { "epoch": 1.3904891304347826, "grad_norm": 1.0857901184706127, "learning_rate": 4.666641768085963e-06, "loss": 0.3645804524421692, "step": 5117 }, { "epoch": 1.3907608695652174, "grad_norm": 1.2207932764279155, "learning_rate": 4.662841556375939e-06, "loss": 0.3748602867126465, "step": 5118 }, { "epoch": 1.3910326086956522, "grad_norm": 1.1211718505960977, "learning_rate": 4.659042422140399e-06, "loss": 0.39594799280166626, "step": 5119 }, { "epoch": 1.391304347826087, "grad_norm": 1.0414309922719467, "learning_rate": 4.655244366146323e-06, "loss": 0.3015797436237335, "step": 5120 }, { "epoch": 1.3915760869565217, "grad_norm": 1.419148705287028, "learning_rate": 4.6514473891604584e-06, "loss": 0.47045478224754333, "step": 5121 }, { "epoch": 1.3918478260869565, "grad_norm": 1.3878207721808575, "learning_rate": 4.6476514919493595e-06, "loss": 0.498735249042511, "step": 5122 }, { "epoch": 1.3921195652173912, "grad_norm": 1.2433494394295603, "learning_rate": 4.643856675279339e-06, "loss": 0.45978590846061707, "step": 5123 }, { "epoch": 1.392391304347826, "grad_norm": 1.1856055732622464, "learning_rate": 4.640062939916502e-06, "loss": 0.43914172053337097, "step": 5124 }, { "epoch": 1.3926630434782608, "grad_norm": 1.169703394330797, "learning_rate": 4.636270286626738e-06, "loss": 0.38658297061920166, "step": 5125 }, { "epoch": 1.3929347826086955, "grad_norm": 2.1142379081135414, "learning_rate": 4.632478716175709e-06, "loss": 0.35190945863723755, "step": 5126 }, { "epoch": 1.3932065217391305, "grad_norm": 1.3417040253770975, "learning_rate": 4.628688229328865e-06, "loss": 0.4981003701686859, "step": 5127 }, { "epoch": 1.3934782608695653, "grad_norm": 1.1897994632191837, "learning_rate": 4.624898826851437e-06, "loss": 0.3626258075237274, "step": 5128 }, { "epoch": 1.39375, "grad_norm": 1.317518211380332, "learning_rate": 4.62111050950844e-06, "loss": 0.4766135811805725, "step": 5129 }, { "epoch": 1.3940217391304348, "grad_norm": 1.1835102086965632, "learning_rate": 4.617323278064657e-06, "loss": 0.31792527437210083, "step": 5130 }, { "epoch": 1.3942934782608696, "grad_norm": 1.1651589767135875, "learning_rate": 4.613537133284666e-06, "loss": 0.3884810209274292, "step": 5131 }, { "epoch": 1.3945652173913043, "grad_norm": 0.9249666259793168, "learning_rate": 4.60975207593282e-06, "loss": 0.31325656175613403, "step": 5132 }, { "epoch": 1.3948369565217391, "grad_norm": 1.5727473337862132, "learning_rate": 4.6059681067732544e-06, "loss": 0.501575767993927, "step": 5133 }, { "epoch": 1.3951086956521739, "grad_norm": 1.0340548717211113, "learning_rate": 4.602185226569877e-06, "loss": 0.3691646456718445, "step": 5134 }, { "epoch": 1.3953804347826086, "grad_norm": 1.2481327100666337, "learning_rate": 4.598403436086387e-06, "loss": 0.4093102812767029, "step": 5135 }, { "epoch": 1.3956521739130434, "grad_norm": 1.3411915214969548, "learning_rate": 4.594622736086261e-06, "loss": 0.4126524031162262, "step": 5136 }, { "epoch": 1.3959239130434782, "grad_norm": 1.3056713740807935, "learning_rate": 4.590843127332744e-06, "loss": 0.40375572443008423, "step": 5137 }, { "epoch": 1.3961956521739132, "grad_norm": 1.2816880010917535, "learning_rate": 4.587064610588881e-06, "loss": 0.5166260004043579, "step": 5138 }, { "epoch": 1.396467391304348, "grad_norm": 1.285426054597675, "learning_rate": 4.583287186617476e-06, "loss": 0.5023655295372009, "step": 5139 }, { "epoch": 1.3967391304347827, "grad_norm": 1.1945317799688013, "learning_rate": 4.579510856181126e-06, "loss": 0.4188883304595947, "step": 5140 }, { "epoch": 1.3970108695652175, "grad_norm": 1.159823093376369, "learning_rate": 4.575735620042201e-06, "loss": 0.3747541606426239, "step": 5141 }, { "epoch": 1.3972826086956522, "grad_norm": 1.2582901948159682, "learning_rate": 4.571961478962858e-06, "loss": 0.5042937994003296, "step": 5142 }, { "epoch": 1.397554347826087, "grad_norm": 1.268515145171657, "learning_rate": 4.568188433705017e-06, "loss": 0.4193519949913025, "step": 5143 }, { "epoch": 1.3978260869565218, "grad_norm": 1.2935719621296509, "learning_rate": 4.564416485030394e-06, "loss": 0.46224191784858704, "step": 5144 }, { "epoch": 1.3980978260869565, "grad_norm": 1.1901065802208566, "learning_rate": 4.560645633700477e-06, "loss": 0.45120856165885925, "step": 5145 }, { "epoch": 1.3983695652173913, "grad_norm": 1.2194055309824219, "learning_rate": 4.556875880476525e-06, "loss": 0.37422043085098267, "step": 5146 }, { "epoch": 1.398641304347826, "grad_norm": 1.2668325291158868, "learning_rate": 4.553107226119586e-06, "loss": 0.46410441398620605, "step": 5147 }, { "epoch": 1.3989130434782608, "grad_norm": 1.1492744797115173, "learning_rate": 4.549339671390482e-06, "loss": 0.4434778690338135, "step": 5148 }, { "epoch": 1.3991847826086956, "grad_norm": 1.2368024121164196, "learning_rate": 4.545573217049819e-06, "loss": 0.44526898860931396, "step": 5149 }, { "epoch": 1.3994565217391304, "grad_norm": 1.1171018447552727, "learning_rate": 4.5418078638579645e-06, "loss": 0.37399932742118835, "step": 5150 }, { "epoch": 1.3997282608695651, "grad_norm": 1.260921513016418, "learning_rate": 4.53804361257508e-06, "loss": 0.4145505726337433, "step": 5151 }, { "epoch": 1.4, "grad_norm": 1.3146096750347716, "learning_rate": 4.534280463961102e-06, "loss": 0.4485318064689636, "step": 5152 }, { "epoch": 1.4002717391304347, "grad_norm": 0.9346169415643977, "learning_rate": 4.530518418775734e-06, "loss": 0.29860395193099976, "step": 5153 }, { "epoch": 1.4005434782608694, "grad_norm": 1.1240598301063638, "learning_rate": 4.5267574777784676e-06, "loss": 0.4005666971206665, "step": 5154 }, { "epoch": 1.4008152173913042, "grad_norm": 1.250664673761538, "learning_rate": 4.522997641728567e-06, "loss": 0.37356725335121155, "step": 5155 }, { "epoch": 1.4010869565217392, "grad_norm": 1.3411860280953238, "learning_rate": 4.519238911385079e-06, "loss": 0.4112752079963684, "step": 5156 }, { "epoch": 1.401358695652174, "grad_norm": 1.009924407222294, "learning_rate": 4.515481287506811e-06, "loss": 0.38668835163116455, "step": 5157 }, { "epoch": 1.4016304347826087, "grad_norm": 1.332705580389425, "learning_rate": 4.511724770852374e-06, "loss": 0.5129861235618591, "step": 5158 }, { "epoch": 1.4019021739130435, "grad_norm": 1.3222630570060132, "learning_rate": 4.507969362180131e-06, "loss": 0.5025807619094849, "step": 5159 }, { "epoch": 1.4021739130434783, "grad_norm": 1.1497757796200665, "learning_rate": 4.504215062248224e-06, "loss": 0.3897807002067566, "step": 5160 }, { "epoch": 1.402445652173913, "grad_norm": 1.2398985260692292, "learning_rate": 4.5004618718145905e-06, "loss": 0.3907698392868042, "step": 5161 }, { "epoch": 1.4027173913043478, "grad_norm": 1.1261993097787617, "learning_rate": 4.4967097916369215e-06, "loss": 0.3670329451560974, "step": 5162 }, { "epoch": 1.4029891304347826, "grad_norm": 1.186493289227318, "learning_rate": 4.492958822472697e-06, "loss": 0.3611729145050049, "step": 5163 }, { "epoch": 1.4032608695652173, "grad_norm": 1.2381990780642598, "learning_rate": 4.489208965079168e-06, "loss": 0.4223659038543701, "step": 5164 }, { "epoch": 1.403532608695652, "grad_norm": 1.2662709435914354, "learning_rate": 4.485460220213366e-06, "loss": 0.4350751042366028, "step": 5165 }, { "epoch": 1.4038043478260869, "grad_norm": 1.1962928160923096, "learning_rate": 4.481712588632087e-06, "loss": 0.3486889600753784, "step": 5166 }, { "epoch": 1.4040760869565219, "grad_norm": 1.2757381915798427, "learning_rate": 4.47796607109191e-06, "loss": 0.43687254190444946, "step": 5167 }, { "epoch": 1.4043478260869566, "grad_norm": 1.5312512865715546, "learning_rate": 4.474220668349196e-06, "loss": 0.5289496183395386, "step": 5168 }, { "epoch": 1.4046195652173914, "grad_norm": 1.415979597127869, "learning_rate": 4.470476381160065e-06, "loss": 0.4514319896697998, "step": 5169 }, { "epoch": 1.4048913043478262, "grad_norm": 1.1984344257850639, "learning_rate": 4.46673321028042e-06, "loss": 0.3691694736480713, "step": 5170 }, { "epoch": 1.405163043478261, "grad_norm": 1.2492985365215532, "learning_rate": 4.4629911564659435e-06, "loss": 0.42403244972229004, "step": 5171 }, { "epoch": 1.4054347826086957, "grad_norm": 1.3401671006632119, "learning_rate": 4.459250220472089e-06, "loss": 0.4106173813343048, "step": 5172 }, { "epoch": 1.4057065217391305, "grad_norm": 1.1968348488668854, "learning_rate": 4.455510403054071e-06, "loss": 0.4234914183616638, "step": 5173 }, { "epoch": 1.4059782608695652, "grad_norm": 1.1887254561717342, "learning_rate": 4.451771704966906e-06, "loss": 0.3691946268081665, "step": 5174 }, { "epoch": 1.40625, "grad_norm": 1.3283969802002475, "learning_rate": 4.4480341269653575e-06, "loss": 0.4875339865684509, "step": 5175 }, { "epoch": 1.4065217391304348, "grad_norm": 1.344801818446387, "learning_rate": 4.444297669803981e-06, "loss": 0.40365397930145264, "step": 5176 }, { "epoch": 1.4067934782608695, "grad_norm": 1.2451476389394671, "learning_rate": 4.4405623342370916e-06, "loss": 0.36417829990386963, "step": 5177 }, { "epoch": 1.4070652173913043, "grad_norm": 1.2532712926292797, "learning_rate": 4.436828121018789e-06, "loss": 0.4004310369491577, "step": 5178 }, { "epoch": 1.407336956521739, "grad_norm": 1.2578778867594334, "learning_rate": 4.4330950309029455e-06, "loss": 0.3785448372364044, "step": 5179 }, { "epoch": 1.4076086956521738, "grad_norm": 1.3591595052720042, "learning_rate": 4.429363064643193e-06, "loss": 0.43673768639564514, "step": 5180 }, { "epoch": 1.4078804347826086, "grad_norm": 1.3196191349048978, "learning_rate": 4.425632222992961e-06, "loss": 0.4370715022087097, "step": 5181 }, { "epoch": 1.4081521739130434, "grad_norm": 1.1060785683442396, "learning_rate": 4.421902506705429e-06, "loss": 0.3480643630027771, "step": 5182 }, { "epoch": 1.4084239130434781, "grad_norm": 1.2670560952295178, "learning_rate": 4.4181739165335614e-06, "loss": 0.45034050941467285, "step": 5183 }, { "epoch": 1.4086956521739131, "grad_norm": 1.278875409966291, "learning_rate": 4.41444645323009e-06, "loss": 0.3784967064857483, "step": 5184 }, { "epoch": 1.408967391304348, "grad_norm": 1.075060750431434, "learning_rate": 4.410720117547528e-06, "loss": 0.37601518630981445, "step": 5185 }, { "epoch": 1.4092391304347827, "grad_norm": 1.1245375374607487, "learning_rate": 4.406994910238144e-06, "loss": 0.35933077335357666, "step": 5186 }, { "epoch": 1.4095108695652174, "grad_norm": 1.2520350728022214, "learning_rate": 4.403270832053995e-06, "loss": 0.4340696334838867, "step": 5187 }, { "epoch": 1.4097826086956522, "grad_norm": 1.0121789850640208, "learning_rate": 4.399547883746908e-06, "loss": 0.3450999855995178, "step": 5188 }, { "epoch": 1.410054347826087, "grad_norm": 1.133388275122423, "learning_rate": 4.395826066068469e-06, "loss": 0.34852421283721924, "step": 5189 }, { "epoch": 1.4103260869565217, "grad_norm": 1.281067171965258, "learning_rate": 4.3921053797700495e-06, "loss": 0.45328956842422485, "step": 5190 }, { "epoch": 1.4105978260869565, "grad_norm": 1.1798239609974284, "learning_rate": 4.3883858256027875e-06, "loss": 0.3798934817314148, "step": 5191 }, { "epoch": 1.4108695652173913, "grad_norm": 1.3135747194581162, "learning_rate": 4.384667404317597e-06, "loss": 0.39198634028434753, "step": 5192 }, { "epoch": 1.411141304347826, "grad_norm": 1.3190236241593414, "learning_rate": 4.380950116665148e-06, "loss": 0.4531952142715454, "step": 5193 }, { "epoch": 1.4114130434782608, "grad_norm": 1.2403317361110007, "learning_rate": 4.377233963395907e-06, "loss": 0.5039726495742798, "step": 5194 }, { "epoch": 1.4116847826086958, "grad_norm": 1.1304776082371712, "learning_rate": 4.37351894526009e-06, "loss": 0.3342643678188324, "step": 5195 }, { "epoch": 1.4119565217391306, "grad_norm": 1.3223232869318238, "learning_rate": 4.369805063007684e-06, "loss": 0.41937685012817383, "step": 5196 }, { "epoch": 1.4122282608695653, "grad_norm": 1.2636888862045923, "learning_rate": 4.366092317388468e-06, "loss": 0.36647945642471313, "step": 5197 }, { "epoch": 1.4125, "grad_norm": 1.2781558914039801, "learning_rate": 4.362380709151966e-06, "loss": 0.4005638360977173, "step": 5198 }, { "epoch": 1.4127717391304349, "grad_norm": 1.2218597968065437, "learning_rate": 4.35867023904749e-06, "loss": 0.40842747688293457, "step": 5199 }, { "epoch": 1.4130434782608696, "grad_norm": 1.114924434783498, "learning_rate": 4.354960907824112e-06, "loss": 0.36457961797714233, "step": 5200 }, { "epoch": 1.4133152173913044, "grad_norm": 1.4366488720816735, "learning_rate": 4.351252716230685e-06, "loss": 0.4082360863685608, "step": 5201 }, { "epoch": 1.4135869565217392, "grad_norm": 1.1037816469609003, "learning_rate": 4.347545665015815e-06, "loss": 0.3530174493789673, "step": 5202 }, { "epoch": 1.413858695652174, "grad_norm": 1.2182902857038744, "learning_rate": 4.343839754927893e-06, "loss": 0.4000653028488159, "step": 5203 }, { "epoch": 1.4141304347826087, "grad_norm": 1.2267007624028086, "learning_rate": 4.340134986715078e-06, "loss": 0.4249347448348999, "step": 5204 }, { "epoch": 1.4144021739130435, "grad_norm": 1.1530194705540329, "learning_rate": 4.336431361125287e-06, "loss": 0.40227222442626953, "step": 5205 }, { "epoch": 1.4146739130434782, "grad_norm": 1.1264282153996998, "learning_rate": 4.332728878906216e-06, "loss": 0.3967841863632202, "step": 5206 }, { "epoch": 1.414945652173913, "grad_norm": 1.0981435019034418, "learning_rate": 4.329027540805332e-06, "loss": 0.3783450722694397, "step": 5207 }, { "epoch": 1.4152173913043478, "grad_norm": 1.2265508551827464, "learning_rate": 4.325327347569869e-06, "loss": 0.38554495573043823, "step": 5208 }, { "epoch": 1.4154891304347825, "grad_norm": 1.1502786224826986, "learning_rate": 4.32162829994682e-06, "loss": 0.39897775650024414, "step": 5209 }, { "epoch": 1.4157608695652173, "grad_norm": 1.3402499891274093, "learning_rate": 4.31793039868296e-06, "loss": 0.39878153800964355, "step": 5210 }, { "epoch": 1.416032608695652, "grad_norm": 1.2746626696802874, "learning_rate": 4.3142336445248316e-06, "loss": 0.4998847246170044, "step": 5211 }, { "epoch": 1.4163043478260868, "grad_norm": 1.3341394756521368, "learning_rate": 4.3105380382187325e-06, "loss": 0.4612380862236023, "step": 5212 }, { "epoch": 1.4165760869565218, "grad_norm": 1.1074398446097846, "learning_rate": 4.306843580510743e-06, "loss": 0.34576067328453064, "step": 5213 }, { "epoch": 1.4168478260869566, "grad_norm": 1.2076511341587004, "learning_rate": 4.303150272146706e-06, "loss": 0.37357544898986816, "step": 5214 }, { "epoch": 1.4171195652173914, "grad_norm": 1.2604096378344518, "learning_rate": 4.299458113872236e-06, "loss": 0.40870991349220276, "step": 5215 }, { "epoch": 1.4173913043478261, "grad_norm": 1.1529357027744183, "learning_rate": 4.295767106432702e-06, "loss": 0.350215882062912, "step": 5216 }, { "epoch": 1.4176630434782609, "grad_norm": 1.317604286679881, "learning_rate": 4.292077250573265e-06, "loss": 0.4132585823535919, "step": 5217 }, { "epoch": 1.4179347826086957, "grad_norm": 1.2694014705277503, "learning_rate": 4.288388547038828e-06, "loss": 0.4183291792869568, "step": 5218 }, { "epoch": 1.4182065217391304, "grad_norm": 1.2103443099611948, "learning_rate": 4.284700996574077e-06, "loss": 0.4233209192752838, "step": 5219 }, { "epoch": 1.4184782608695652, "grad_norm": 1.3189708319982447, "learning_rate": 4.281014599923464e-06, "loss": 0.5125439763069153, "step": 5220 }, { "epoch": 1.41875, "grad_norm": 1.2851238761954016, "learning_rate": 4.277329357831198e-06, "loss": 0.40488433837890625, "step": 5221 }, { "epoch": 1.4190217391304347, "grad_norm": 1.2892959757190154, "learning_rate": 4.2736452710412645e-06, "loss": 0.4845345616340637, "step": 5222 }, { "epoch": 1.4192934782608695, "grad_norm": 1.2289814508364563, "learning_rate": 4.269962340297414e-06, "loss": 0.4077635407447815, "step": 5223 }, { "epoch": 1.4195652173913045, "grad_norm": 1.2701221527815818, "learning_rate": 4.266280566343167e-06, "loss": 0.3857794404029846, "step": 5224 }, { "epoch": 1.4198369565217392, "grad_norm": 0.9412856677916815, "learning_rate": 4.262599949921797e-06, "loss": 0.3469432592391968, "step": 5225 }, { "epoch": 1.420108695652174, "grad_norm": 1.2753373311248488, "learning_rate": 4.258920491776359e-06, "loss": 0.4817087650299072, "step": 5226 }, { "epoch": 1.4203804347826088, "grad_norm": 1.3943495570879016, "learning_rate": 4.255242192649666e-06, "loss": 0.4920361638069153, "step": 5227 }, { "epoch": 1.4206521739130435, "grad_norm": 1.3604680946880243, "learning_rate": 4.251565053284304e-06, "loss": 0.42131346464157104, "step": 5228 }, { "epoch": 1.4209239130434783, "grad_norm": 1.198533759068252, "learning_rate": 4.247889074422613e-06, "loss": 0.37207216024398804, "step": 5229 }, { "epoch": 1.421195652173913, "grad_norm": 1.2692539039874766, "learning_rate": 4.244214256806708e-06, "loss": 0.43911632895469666, "step": 5230 }, { "epoch": 1.4214673913043478, "grad_norm": 1.0461218039758529, "learning_rate": 4.240540601178472e-06, "loss": 0.3243211507797241, "step": 5231 }, { "epoch": 1.4217391304347826, "grad_norm": 1.3542258103924927, "learning_rate": 4.236868108279538e-06, "loss": 0.43396785855293274, "step": 5232 }, { "epoch": 1.4220108695652174, "grad_norm": 1.3684998834571176, "learning_rate": 4.2331967788513295e-06, "loss": 0.4609607756137848, "step": 5233 }, { "epoch": 1.4222826086956522, "grad_norm": 1.3790737513433529, "learning_rate": 4.22952661363501e-06, "loss": 0.5011645555496216, "step": 5234 }, { "epoch": 1.422554347826087, "grad_norm": 1.4397497858266997, "learning_rate": 4.225857613371521e-06, "loss": 0.47022414207458496, "step": 5235 }, { "epoch": 1.4228260869565217, "grad_norm": 1.3259442516474738, "learning_rate": 4.222189778801571e-06, "loss": 0.4769288897514343, "step": 5236 }, { "epoch": 1.4230978260869565, "grad_norm": 1.5024913521502312, "learning_rate": 4.218523110665621e-06, "loss": 0.4713786244392395, "step": 5237 }, { "epoch": 1.4233695652173912, "grad_norm": 1.3483570694628528, "learning_rate": 4.214857609703908e-06, "loss": 0.4483141303062439, "step": 5238 }, { "epoch": 1.423641304347826, "grad_norm": 1.313939356670097, "learning_rate": 4.21119327665643e-06, "loss": 0.4732043743133545, "step": 5239 }, { "epoch": 1.4239130434782608, "grad_norm": 1.1486941420314951, "learning_rate": 4.207530112262951e-06, "loss": 0.4350789487361908, "step": 5240 }, { "epoch": 1.4241847826086955, "grad_norm": 1.235473892756011, "learning_rate": 4.20386811726299e-06, "loss": 0.4076608419418335, "step": 5241 }, { "epoch": 1.4244565217391305, "grad_norm": 1.2025947299864723, "learning_rate": 4.200207292395841e-06, "loss": 0.37235963344573975, "step": 5242 }, { "epoch": 1.4247282608695653, "grad_norm": 0.8657863490335674, "learning_rate": 4.1965476384005566e-06, "loss": 0.24039191007614136, "step": 5243 }, { "epoch": 1.425, "grad_norm": 1.1607734400178005, "learning_rate": 4.192889156015958e-06, "loss": 0.4253815710544586, "step": 5244 }, { "epoch": 1.4252717391304348, "grad_norm": 1.304538872042963, "learning_rate": 4.189231845980618e-06, "loss": 0.4703376889228821, "step": 5245 }, { "epoch": 1.4255434782608696, "grad_norm": 1.150892593158823, "learning_rate": 4.185575709032885e-06, "loss": 0.4330824017524719, "step": 5246 }, { "epoch": 1.4258152173913043, "grad_norm": 1.2440893687954504, "learning_rate": 4.181920745910869e-06, "loss": 0.42898425459861755, "step": 5247 }, { "epoch": 1.4260869565217391, "grad_norm": 1.1423531486544123, "learning_rate": 4.178266957352432e-06, "loss": 0.3529357612133026, "step": 5248 }, { "epoch": 1.4263586956521739, "grad_norm": 1.2879987647562234, "learning_rate": 4.1746143440952135e-06, "loss": 0.4402170777320862, "step": 5249 }, { "epoch": 1.4266304347826086, "grad_norm": 1.156432292565747, "learning_rate": 4.170962906876606e-06, "loss": 0.35991328954696655, "step": 5250 }, { "epoch": 1.4269021739130434, "grad_norm": 1.028644270485466, "learning_rate": 4.167312646433772e-06, "loss": 0.2989448606967926, "step": 5251 }, { "epoch": 1.4271739130434782, "grad_norm": 1.2356372089543772, "learning_rate": 4.1636635635036235e-06, "loss": 0.3588331937789917, "step": 5252 }, { "epoch": 1.4274456521739132, "grad_norm": 1.4357930300753305, "learning_rate": 4.160015658822856e-06, "loss": 0.5038352012634277, "step": 5253 }, { "epoch": 1.427717391304348, "grad_norm": 1.0731085435492087, "learning_rate": 4.156368933127907e-06, "loss": 0.3396303057670593, "step": 5254 }, { "epoch": 1.4279891304347827, "grad_norm": 1.1343118680117426, "learning_rate": 4.152723387154977e-06, "loss": 0.37756574153900146, "step": 5255 }, { "epoch": 1.4282608695652175, "grad_norm": 1.3058298462473936, "learning_rate": 4.14907902164005e-06, "loss": 0.43842488527297974, "step": 5256 }, { "epoch": 1.4285326086956522, "grad_norm": 1.1399388774122405, "learning_rate": 4.145435837318845e-06, "loss": 0.3776973783969879, "step": 5257 }, { "epoch": 1.428804347826087, "grad_norm": 1.2122881657400981, "learning_rate": 4.141793834926858e-06, "loss": 0.42574989795684814, "step": 5258 }, { "epoch": 1.4290760869565218, "grad_norm": 1.3113418803356833, "learning_rate": 4.138153015199342e-06, "loss": 0.41949722170829773, "step": 5259 }, { "epoch": 1.4293478260869565, "grad_norm": 1.3120453539842398, "learning_rate": 4.134513378871317e-06, "loss": 0.40389108657836914, "step": 5260 }, { "epoch": 1.4296195652173913, "grad_norm": 1.3040463837311265, "learning_rate": 4.1308749266775506e-06, "loss": 0.5011157989501953, "step": 5261 }, { "epoch": 1.429891304347826, "grad_norm": 1.16876956584469, "learning_rate": 4.127237659352583e-06, "loss": 0.370916485786438, "step": 5262 }, { "epoch": 1.4301630434782608, "grad_norm": 1.2039289762962286, "learning_rate": 4.123601577630716e-06, "loss": 0.4181405007839203, "step": 5263 }, { "epoch": 1.4304347826086956, "grad_norm": 1.17559052027914, "learning_rate": 4.119966682245999e-06, "loss": 0.39944949746131897, "step": 5264 }, { "epoch": 1.4307065217391304, "grad_norm": 1.2623780668831988, "learning_rate": 4.116332973932256e-06, "loss": 0.47009944915771484, "step": 5265 }, { "epoch": 1.4309782608695651, "grad_norm": 1.2095534354010464, "learning_rate": 4.112700453423068e-06, "loss": 0.4430464506149292, "step": 5266 }, { "epoch": 1.43125, "grad_norm": 1.3490041989163717, "learning_rate": 4.109069121451774e-06, "loss": 0.5289698839187622, "step": 5267 }, { "epoch": 1.4315217391304347, "grad_norm": 1.1457987709785904, "learning_rate": 4.105438978751465e-06, "loss": 0.4151803255081177, "step": 5268 }, { "epoch": 1.4317934782608694, "grad_norm": 1.188085588148888, "learning_rate": 4.101810026055013e-06, "loss": 0.4280584752559662, "step": 5269 }, { "epoch": 1.4320652173913042, "grad_norm": 1.2711081206144472, "learning_rate": 4.098182264095028e-06, "loss": 0.4260280728340149, "step": 5270 }, { "epoch": 1.4323369565217392, "grad_norm": 1.2350368932875122, "learning_rate": 4.094555693603891e-06, "loss": 0.3339666724205017, "step": 5271 }, { "epoch": 1.432608695652174, "grad_norm": 1.4580840182722334, "learning_rate": 4.0909303153137444e-06, "loss": 0.4968733787536621, "step": 5272 }, { "epoch": 1.4328804347826087, "grad_norm": 1.1992354839769959, "learning_rate": 4.087306129956478e-06, "loss": 0.39344778656959534, "step": 5273 }, { "epoch": 1.4331521739130435, "grad_norm": 1.3613575127593611, "learning_rate": 4.083683138263758e-06, "loss": 0.4484499394893646, "step": 5274 }, { "epoch": 1.4334239130434783, "grad_norm": 1.2524661443230107, "learning_rate": 4.080061340966984e-06, "loss": 0.45730525255203247, "step": 5275 }, { "epoch": 1.433695652173913, "grad_norm": 1.3344874464860954, "learning_rate": 4.07644073879735e-06, "loss": 0.41981643438339233, "step": 5276 }, { "epoch": 1.4339673913043478, "grad_norm": 1.385905660199417, "learning_rate": 4.072821332485777e-06, "loss": 0.3730123043060303, "step": 5277 }, { "epoch": 1.4342391304347826, "grad_norm": 1.1515749322279811, "learning_rate": 4.069203122762958e-06, "loss": 0.3893877863883972, "step": 5278 }, { "epoch": 1.4345108695652173, "grad_norm": 1.1267499244313632, "learning_rate": 4.06558611035935e-06, "loss": 0.37183666229248047, "step": 5279 }, { "epoch": 1.434782608695652, "grad_norm": 0.9919463079442391, "learning_rate": 4.061970296005152e-06, "loss": 0.292958527803421, "step": 5280 }, { "epoch": 1.4350543478260869, "grad_norm": 1.2285435042643824, "learning_rate": 4.058355680430337e-06, "loss": 0.46365439891815186, "step": 5281 }, { "epoch": 1.4353260869565219, "grad_norm": 1.1253988913414772, "learning_rate": 4.054742264364626e-06, "loss": 0.3555523753166199, "step": 5282 }, { "epoch": 1.4355978260869566, "grad_norm": 1.2507097574142374, "learning_rate": 4.051130048537508e-06, "loss": 0.48633885383605957, "step": 5283 }, { "epoch": 1.4358695652173914, "grad_norm": 1.3212864990521953, "learning_rate": 4.047519033678216e-06, "loss": 0.39863714575767517, "step": 5284 }, { "epoch": 1.4361413043478262, "grad_norm": 1.1960117343049972, "learning_rate": 4.0439092205157485e-06, "loss": 0.3836628794670105, "step": 5285 }, { "epoch": 1.436413043478261, "grad_norm": 1.8390634115740958, "learning_rate": 4.040300609778863e-06, "loss": 0.36985623836517334, "step": 5286 }, { "epoch": 1.4366847826086957, "grad_norm": 1.079650328761268, "learning_rate": 4.036693202196075e-06, "loss": 0.3141784369945526, "step": 5287 }, { "epoch": 1.4369565217391305, "grad_norm": 1.168063866566412, "learning_rate": 4.0330869984956464e-06, "loss": 0.432645320892334, "step": 5288 }, { "epoch": 1.4372282608695652, "grad_norm": 1.156799690297362, "learning_rate": 4.029481999405609e-06, "loss": 0.37435102462768555, "step": 5289 }, { "epoch": 1.4375, "grad_norm": 1.14428711966483, "learning_rate": 4.025878205653747e-06, "loss": 0.3499303460121155, "step": 5290 }, { "epoch": 1.4377717391304348, "grad_norm": 1.1017223491972967, "learning_rate": 4.0222756179675915e-06, "loss": 0.29709625244140625, "step": 5291 }, { "epoch": 1.4380434782608695, "grad_norm": 1.2652054637997328, "learning_rate": 4.0186742370744514e-06, "loss": 0.4387677311897278, "step": 5292 }, { "epoch": 1.4383152173913043, "grad_norm": 1.264213567307118, "learning_rate": 4.015074063701371e-06, "loss": 0.44186121225357056, "step": 5293 }, { "epoch": 1.438586956521739, "grad_norm": 1.097777379744192, "learning_rate": 4.0114750985751605e-06, "loss": 0.3388412594795227, "step": 5294 }, { "epoch": 1.4388586956521738, "grad_norm": 1.3884122713931424, "learning_rate": 4.007877342422387e-06, "loss": 0.43307727575302124, "step": 5295 }, { "epoch": 1.4391304347826086, "grad_norm": 1.3747767733823435, "learning_rate": 4.004280795969375e-06, "loss": 0.47722017765045166, "step": 5296 }, { "epoch": 1.4394021739130434, "grad_norm": 1.4914954023461036, "learning_rate": 4.000685459942193e-06, "loss": 0.5280004739761353, "step": 5297 }, { "epoch": 1.4396739130434781, "grad_norm": 1.2532009956251022, "learning_rate": 3.997091335066678e-06, "loss": 0.43455997109413147, "step": 5298 }, { "epoch": 1.4399456521739131, "grad_norm": 1.4460267770177868, "learning_rate": 3.993498422068421e-06, "loss": 0.4314914345741272, "step": 5299 }, { "epoch": 1.440217391304348, "grad_norm": 1.384183551354324, "learning_rate": 3.989906721672758e-06, "loss": 0.4413500130176544, "step": 5300 }, { "epoch": 1.4404891304347827, "grad_norm": 1.2214878449103066, "learning_rate": 3.986316234604792e-06, "loss": 0.391594260931015, "step": 5301 }, { "epoch": 1.4407608695652174, "grad_norm": 1.2334496794382763, "learning_rate": 3.982726961589375e-06, "loss": 0.38698500394821167, "step": 5302 }, { "epoch": 1.4410326086956522, "grad_norm": 1.2919829670794827, "learning_rate": 3.9791389033511205e-06, "loss": 0.4120637774467468, "step": 5303 }, { "epoch": 1.441304347826087, "grad_norm": 1.2037802839055505, "learning_rate": 3.975552060614381e-06, "loss": 0.4102017879486084, "step": 5304 }, { "epoch": 1.4415760869565217, "grad_norm": 1.1085131642437183, "learning_rate": 3.9719664341032875e-06, "loss": 0.3342236280441284, "step": 5305 }, { "epoch": 1.4418478260869565, "grad_norm": 0.9845005989224309, "learning_rate": 3.968382024541705e-06, "loss": 0.3183251917362213, "step": 5306 }, { "epoch": 1.4421195652173913, "grad_norm": 1.4360625390119701, "learning_rate": 3.964798832653259e-06, "loss": 0.4524105191230774, "step": 5307 }, { "epoch": 1.442391304347826, "grad_norm": 1.2401733452789678, "learning_rate": 3.96121685916133e-06, "loss": 0.4149889349937439, "step": 5308 }, { "epoch": 1.4426630434782608, "grad_norm": 1.4036908732933207, "learning_rate": 3.957636104789056e-06, "loss": 0.4164596199989319, "step": 5309 }, { "epoch": 1.4429347826086958, "grad_norm": 1.3010343808926983, "learning_rate": 3.954056570259327e-06, "loss": 0.41352760791778564, "step": 5310 }, { "epoch": 1.4432065217391306, "grad_norm": 1.1399270672726092, "learning_rate": 3.950478256294776e-06, "loss": 0.391933411359787, "step": 5311 }, { "epoch": 1.4434782608695653, "grad_norm": 1.3076061942571235, "learning_rate": 3.946901163617814e-06, "loss": 0.4338071346282959, "step": 5312 }, { "epoch": 1.44375, "grad_norm": 1.384879199101772, "learning_rate": 3.943325292950579e-06, "loss": 0.4795229434967041, "step": 5313 }, { "epoch": 1.4440217391304349, "grad_norm": 1.2174697872675713, "learning_rate": 3.939750645014977e-06, "loss": 0.3930480480194092, "step": 5314 }, { "epoch": 1.4442934782608696, "grad_norm": 1.2426354205353463, "learning_rate": 3.936177220532667e-06, "loss": 0.44237083196640015, "step": 5315 }, { "epoch": 1.4445652173913044, "grad_norm": 1.204145823109158, "learning_rate": 3.932605020225052e-06, "loss": 0.39040517807006836, "step": 5316 }, { "epoch": 1.4448369565217392, "grad_norm": 1.4976230264824157, "learning_rate": 3.929034044813297e-06, "loss": 0.4848383665084839, "step": 5317 }, { "epoch": 1.445108695652174, "grad_norm": 1.0182515606990576, "learning_rate": 3.925464295018317e-06, "loss": 0.3147107660770416, "step": 5318 }, { "epoch": 1.4453804347826087, "grad_norm": 1.2706313394128728, "learning_rate": 3.9218957715607795e-06, "loss": 0.3782910704612732, "step": 5319 }, { "epoch": 1.4456521739130435, "grad_norm": 1.3252353998463675, "learning_rate": 3.918328475161102e-06, "loss": 0.4655446708202362, "step": 5320 }, { "epoch": 1.4459239130434782, "grad_norm": 1.224206470908883, "learning_rate": 3.914762406539455e-06, "loss": 0.38435208797454834, "step": 5321 }, { "epoch": 1.446195652173913, "grad_norm": 1.0591364915301449, "learning_rate": 3.91119756641577e-06, "loss": 0.36021482944488525, "step": 5322 }, { "epoch": 1.4464673913043478, "grad_norm": 1.2572733074153062, "learning_rate": 3.907633955509713e-06, "loss": 0.3834684491157532, "step": 5323 }, { "epoch": 1.4467391304347825, "grad_norm": 1.2424695747659542, "learning_rate": 3.904071574540715e-06, "loss": 0.37673258781433105, "step": 5324 }, { "epoch": 1.4470108695652173, "grad_norm": 1.2135386480991308, "learning_rate": 3.900510424227959e-06, "loss": 0.3927902579307556, "step": 5325 }, { "epoch": 1.447282608695652, "grad_norm": 1.4916601548749107, "learning_rate": 3.896950505290375e-06, "loss": 0.4252781867980957, "step": 5326 }, { "epoch": 1.4475543478260868, "grad_norm": 1.320547154213518, "learning_rate": 3.8933918184466375e-06, "loss": 0.448666512966156, "step": 5327 }, { "epoch": 1.4478260869565218, "grad_norm": 1.3707127110259445, "learning_rate": 3.8898343644151945e-06, "loss": 0.45989513397216797, "step": 5328 }, { "epoch": 1.4480978260869566, "grad_norm": 1.2414072498861493, "learning_rate": 3.886278143914219e-06, "loss": 0.4062599539756775, "step": 5329 }, { "epoch": 1.4483695652173914, "grad_norm": 1.149505926183889, "learning_rate": 3.882723157661651e-06, "loss": 0.33096587657928467, "step": 5330 }, { "epoch": 1.4486413043478261, "grad_norm": 1.3431059962084175, "learning_rate": 3.879169406375181e-06, "loss": 0.4453187584877014, "step": 5331 }, { "epoch": 1.4489130434782609, "grad_norm": 1.187347876237737, "learning_rate": 3.875616890772237e-06, "loss": 0.4202725887298584, "step": 5332 }, { "epoch": 1.4491847826086957, "grad_norm": 1.3029337399926904, "learning_rate": 3.872065611570012e-06, "loss": 0.4849480092525482, "step": 5333 }, { "epoch": 1.4494565217391304, "grad_norm": 1.3766778301558424, "learning_rate": 3.868515569485445e-06, "loss": 0.46817824244499207, "step": 5334 }, { "epoch": 1.4497282608695652, "grad_norm": 1.2334849914682824, "learning_rate": 3.864966765235226e-06, "loss": 0.39908814430236816, "step": 5335 }, { "epoch": 1.45, "grad_norm": 1.0404508193846125, "learning_rate": 3.861419199535787e-06, "loss": 0.3625625967979431, "step": 5336 }, { "epoch": 1.4502717391304347, "grad_norm": 1.314711933537215, "learning_rate": 3.857872873103322e-06, "loss": 0.43892502784729004, "step": 5337 }, { "epoch": 1.4505434782608695, "grad_norm": 1.2994982667897947, "learning_rate": 3.8543277866537674e-06, "loss": 0.45775479078292847, "step": 5338 }, { "epoch": 1.4508152173913045, "grad_norm": 1.0723883498108346, "learning_rate": 3.8507839409028145e-06, "loss": 0.37891149520874023, "step": 5339 }, { "epoch": 1.4510869565217392, "grad_norm": 1.3466148097798087, "learning_rate": 3.847241336565895e-06, "loss": 0.44996505975723267, "step": 5340 }, { "epoch": 1.451358695652174, "grad_norm": 1.4558760295666937, "learning_rate": 3.8436999743581995e-06, "loss": 0.49427855014801025, "step": 5341 }, { "epoch": 1.4516304347826088, "grad_norm": 1.235498267523015, "learning_rate": 3.840159854994665e-06, "loss": 0.41058599948883057, "step": 5342 }, { "epoch": 1.4519021739130435, "grad_norm": 1.3872807797093687, "learning_rate": 3.836620979189973e-06, "loss": 0.4681456685066223, "step": 5343 }, { "epoch": 1.4521739130434783, "grad_norm": 1.175546940587484, "learning_rate": 3.833083347658559e-06, "loss": 0.3920077681541443, "step": 5344 }, { "epoch": 1.452445652173913, "grad_norm": 1.275143404951348, "learning_rate": 3.829546961114608e-06, "loss": 0.4638110101222992, "step": 5345 }, { "epoch": 1.4527173913043478, "grad_norm": 1.0417411721461647, "learning_rate": 3.826011820272052e-06, "loss": 0.29172053933143616, "step": 5346 }, { "epoch": 1.4529891304347826, "grad_norm": 1.3741309610853862, "learning_rate": 3.822477925844564e-06, "loss": 0.4338175058364868, "step": 5347 }, { "epoch": 1.4532608695652174, "grad_norm": 1.292027321896807, "learning_rate": 3.818945278545584e-06, "loss": 0.4528670907020569, "step": 5348 }, { "epoch": 1.4535326086956522, "grad_norm": 0.9853076611941962, "learning_rate": 3.815413879088284e-06, "loss": 0.29423269629478455, "step": 5349 }, { "epoch": 1.453804347826087, "grad_norm": 1.226834388877741, "learning_rate": 3.8118837281855804e-06, "loss": 0.3943910002708435, "step": 5350 }, { "epoch": 1.4540760869565217, "grad_norm": 1.0516360961606337, "learning_rate": 3.808354826550159e-06, "loss": 0.3638949990272522, "step": 5351 }, { "epoch": 1.4543478260869565, "grad_norm": 1.1471759384105849, "learning_rate": 3.804827174894432e-06, "loss": 0.39203977584838867, "step": 5352 }, { "epoch": 1.4546195652173912, "grad_norm": 1.2205334283261837, "learning_rate": 3.801300773930571e-06, "loss": 0.4037058353424072, "step": 5353 }, { "epoch": 1.454891304347826, "grad_norm": 1.1730365233574827, "learning_rate": 3.79777562437049e-06, "loss": 0.3505817651748657, "step": 5354 }, { "epoch": 1.4551630434782608, "grad_norm": 1.2827834730909369, "learning_rate": 3.794251726925856e-06, "loss": 0.36384087800979614, "step": 5355 }, { "epoch": 1.4554347826086955, "grad_norm": 1.3243008957165074, "learning_rate": 3.7907290823080746e-06, "loss": 0.3998444378376007, "step": 5356 }, { "epoch": 1.4557065217391305, "grad_norm": 1.2025607512270096, "learning_rate": 3.787207691228303e-06, "loss": 0.3949282765388489, "step": 5357 }, { "epoch": 1.4559782608695653, "grad_norm": 1.2129363340461186, "learning_rate": 3.7836875543974527e-06, "loss": 0.3789299428462982, "step": 5358 }, { "epoch": 1.45625, "grad_norm": 1.2297012040568134, "learning_rate": 3.7801686725261655e-06, "loss": 0.42831963300704956, "step": 5359 }, { "epoch": 1.4565217391304348, "grad_norm": 1.3639275273467286, "learning_rate": 3.776651046324843e-06, "loss": 0.499534547328949, "step": 5360 }, { "epoch": 1.4567934782608696, "grad_norm": 1.1494166005499111, "learning_rate": 3.773134676503629e-06, "loss": 0.464746356010437, "step": 5361 }, { "epoch": 1.4570652173913043, "grad_norm": 1.3396142064106071, "learning_rate": 3.7696195637724197e-06, "loss": 0.4579061269760132, "step": 5362 }, { "epoch": 1.4573369565217391, "grad_norm": 1.1819317079834817, "learning_rate": 3.7661057088408405e-06, "loss": 0.3733427822589874, "step": 5363 }, { "epoch": 1.4576086956521739, "grad_norm": 1.1026362763580417, "learning_rate": 3.7625931124182867e-06, "loss": 0.35025936365127563, "step": 5364 }, { "epoch": 1.4578804347826086, "grad_norm": 1.2404997729143787, "learning_rate": 3.759081775213882e-06, "loss": 0.46308913826942444, "step": 5365 }, { "epoch": 1.4581521739130434, "grad_norm": 1.0472760077326126, "learning_rate": 3.755571697936493e-06, "loss": 0.32170143723487854, "step": 5366 }, { "epoch": 1.4584239130434782, "grad_norm": 0.9858493393940704, "learning_rate": 3.7520628812947547e-06, "loss": 0.33274197578430176, "step": 5367 }, { "epoch": 1.4586956521739132, "grad_norm": 1.2606254829213945, "learning_rate": 3.748555325997022e-06, "loss": 0.4358150362968445, "step": 5368 }, { "epoch": 1.458967391304348, "grad_norm": 1.0844784778737946, "learning_rate": 3.74504903275141e-06, "loss": 0.33645838499069214, "step": 5369 }, { "epoch": 1.4592391304347827, "grad_norm": 1.2026329450867583, "learning_rate": 3.7415440022657746e-06, "loss": 0.3797769546508789, "step": 5370 }, { "epoch": 1.4595108695652175, "grad_norm": 1.3568111841051027, "learning_rate": 3.7380402352477207e-06, "loss": 0.501762866973877, "step": 5371 }, { "epoch": 1.4597826086956522, "grad_norm": 1.0516791074769558, "learning_rate": 3.7345377324045875e-06, "loss": 0.351392924785614, "step": 5372 }, { "epoch": 1.460054347826087, "grad_norm": 1.1179876025961675, "learning_rate": 3.7310364944434696e-06, "loss": 0.38384079933166504, "step": 5373 }, { "epoch": 1.4603260869565218, "grad_norm": 1.0591007459353172, "learning_rate": 3.7275365220712067e-06, "loss": 0.3027695417404175, "step": 5374 }, { "epoch": 1.4605978260869565, "grad_norm": 1.4671507252562037, "learning_rate": 3.7240378159943725e-06, "loss": 0.5318887233734131, "step": 5375 }, { "epoch": 1.4608695652173913, "grad_norm": 1.3369787138255003, "learning_rate": 3.7205403769192947e-06, "loss": 0.4828179180622101, "step": 5376 }, { "epoch": 1.461141304347826, "grad_norm": 1.232813653752502, "learning_rate": 3.7170442055520418e-06, "loss": 0.3944943845272064, "step": 5377 }, { "epoch": 1.4614130434782608, "grad_norm": 1.2025733192947337, "learning_rate": 3.7135493025984305e-06, "loss": 0.3452836871147156, "step": 5378 }, { "epoch": 1.4616847826086956, "grad_norm": 1.1264709625233948, "learning_rate": 3.7100556687640097e-06, "loss": 0.3702573776245117, "step": 5379 }, { "epoch": 1.4619565217391304, "grad_norm": 1.2429639520484008, "learning_rate": 3.706563304754086e-06, "loss": 0.4892963469028473, "step": 5380 }, { "epoch": 1.4622282608695651, "grad_norm": 1.9435389310528104, "learning_rate": 3.7030722112737016e-06, "loss": 0.4716721773147583, "step": 5381 }, { "epoch": 1.4625, "grad_norm": 1.2197486921980205, "learning_rate": 3.6995823890276485e-06, "loss": 0.401411235332489, "step": 5382 }, { "epoch": 1.4627717391304347, "grad_norm": 1.2867290983494544, "learning_rate": 3.69609383872045e-06, "loss": 0.5047985315322876, "step": 5383 }, { "epoch": 1.4630434782608694, "grad_norm": 1.199833752706024, "learning_rate": 3.6926065610563853e-06, "loss": 0.4350782334804535, "step": 5384 }, { "epoch": 1.4633152173913042, "grad_norm": 1.1349116986855248, "learning_rate": 3.689120556739475e-06, "loss": 0.3472698926925659, "step": 5385 }, { "epoch": 1.4635869565217392, "grad_norm": 1.1995718159406317, "learning_rate": 3.6856358264734695e-06, "loss": 0.41504108905792236, "step": 5386 }, { "epoch": 1.463858695652174, "grad_norm": 1.3374429086068396, "learning_rate": 3.6821523709618833e-06, "loss": 0.4663788676261902, "step": 5387 }, { "epoch": 1.4641304347826087, "grad_norm": 1.0684494307768755, "learning_rate": 3.678670190907956e-06, "loss": 0.3819342255592346, "step": 5388 }, { "epoch": 1.4644021739130435, "grad_norm": 1.2356964789285585, "learning_rate": 3.6751892870146755e-06, "loss": 0.3810248076915741, "step": 5389 }, { "epoch": 1.4646739130434783, "grad_norm": 1.2869162432109895, "learning_rate": 3.6717096599847744e-06, "loss": 0.4288384020328522, "step": 5390 }, { "epoch": 1.464945652173913, "grad_norm": 1.2489322315428044, "learning_rate": 3.668231310520729e-06, "loss": 0.4558844566345215, "step": 5391 }, { "epoch": 1.4652173913043478, "grad_norm": 1.0635883669238089, "learning_rate": 3.664754239324747e-06, "loss": 0.3561277985572815, "step": 5392 }, { "epoch": 1.4654891304347826, "grad_norm": 1.281350466936248, "learning_rate": 3.6612784470987893e-06, "loss": 0.4742288291454315, "step": 5393 }, { "epoch": 1.4657608695652173, "grad_norm": 1.1898828492942002, "learning_rate": 3.6578039345445584e-06, "loss": 0.3547603189945221, "step": 5394 }, { "epoch": 1.466032608695652, "grad_norm": 1.3730095425332016, "learning_rate": 3.6543307023634865e-06, "loss": 0.4663884937763214, "step": 5395 }, { "epoch": 1.4663043478260869, "grad_norm": 1.1353116278891358, "learning_rate": 3.6508587512567594e-06, "loss": 0.32913461327552795, "step": 5396 }, { "epoch": 1.4665760869565219, "grad_norm": 1.3139282823046312, "learning_rate": 3.6473880819253016e-06, "loss": 0.45480525493621826, "step": 5397 }, { "epoch": 1.4668478260869566, "grad_norm": 1.2369131208058006, "learning_rate": 3.6439186950697804e-06, "loss": 0.4279073476791382, "step": 5398 }, { "epoch": 1.4671195652173914, "grad_norm": 1.1817594993395657, "learning_rate": 3.640450591390593e-06, "loss": 0.3381701707839966, "step": 5399 }, { "epoch": 1.4673913043478262, "grad_norm": 1.22910701035162, "learning_rate": 3.636983771587892e-06, "loss": 0.3616071939468384, "step": 5400 }, { "epoch": 1.467663043478261, "grad_norm": 1.4843215491609214, "learning_rate": 3.633518236361567e-06, "loss": 0.4961009621620178, "step": 5401 }, { "epoch": 1.4679347826086957, "grad_norm": 1.1196160768907288, "learning_rate": 3.630053986411235e-06, "loss": 0.3939916491508484, "step": 5402 }, { "epoch": 1.4682065217391305, "grad_norm": 1.27870511433214, "learning_rate": 3.62659102243628e-06, "loss": 0.40613287687301636, "step": 5403 }, { "epoch": 1.4684782608695652, "grad_norm": 1.1544561430428304, "learning_rate": 3.6231293451357994e-06, "loss": 0.3990454077720642, "step": 5404 }, { "epoch": 1.46875, "grad_norm": 1.3599807139300335, "learning_rate": 3.6196689552086496e-06, "loss": 0.4771358370780945, "step": 5405 }, { "epoch": 1.4690217391304348, "grad_norm": 1.3094925406783369, "learning_rate": 3.6162098533534095e-06, "loss": 0.47018444538116455, "step": 5406 }, { "epoch": 1.4692934782608695, "grad_norm": 1.04661229984131, "learning_rate": 3.6127520402684222e-06, "loss": 0.33487358689308167, "step": 5407 }, { "epoch": 1.4695652173913043, "grad_norm": 1.0570825817010978, "learning_rate": 3.6092955166517497e-06, "loss": 0.3526146113872528, "step": 5408 }, { "epoch": 1.469836956521739, "grad_norm": 1.3388251120397203, "learning_rate": 3.6058402832011953e-06, "loss": 0.5025137066841125, "step": 5409 }, { "epoch": 1.4701086956521738, "grad_norm": 1.0919061499830143, "learning_rate": 3.6023863406143187e-06, "loss": 0.3454759120941162, "step": 5410 }, { "epoch": 1.4703804347826086, "grad_norm": 1.2091648910511772, "learning_rate": 3.5989336895883996e-06, "loss": 0.3653378188610077, "step": 5411 }, { "epoch": 1.4706521739130434, "grad_norm": 1.260283829960676, "learning_rate": 3.595482330820467e-06, "loss": 0.4476553499698639, "step": 5412 }, { "epoch": 1.4709239130434781, "grad_norm": 1.240725743235923, "learning_rate": 3.592032265007287e-06, "loss": 0.37384095788002014, "step": 5413 }, { "epoch": 1.4711956521739131, "grad_norm": 1.2380505129754207, "learning_rate": 3.588583492845369e-06, "loss": 0.40387803316116333, "step": 5414 }, { "epoch": 1.471467391304348, "grad_norm": 1.2843552196112453, "learning_rate": 3.585136015030949e-06, "loss": 0.47665971517562866, "step": 5415 }, { "epoch": 1.4717391304347827, "grad_norm": 1.1976099191450482, "learning_rate": 3.5816898322600136e-06, "loss": 0.3959968686103821, "step": 5416 }, { "epoch": 1.4720108695652174, "grad_norm": 1.17913774731413, "learning_rate": 3.578244945228286e-06, "loss": 0.38967809081077576, "step": 5417 }, { "epoch": 1.4722826086956522, "grad_norm": 1.28230580408252, "learning_rate": 3.574801354631222e-06, "loss": 0.41642338037490845, "step": 5418 }, { "epoch": 1.472554347826087, "grad_norm": 1.0792870669030377, "learning_rate": 3.5713590611640214e-06, "loss": 0.3241729736328125, "step": 5419 }, { "epoch": 1.4728260869565217, "grad_norm": 1.3312639523434964, "learning_rate": 3.5679180655216185e-06, "loss": 0.465206116437912, "step": 5420 }, { "epoch": 1.4730978260869565, "grad_norm": 1.4328079252562023, "learning_rate": 3.5644783683986927e-06, "loss": 0.43491101264953613, "step": 5421 }, { "epoch": 1.4733695652173913, "grad_norm": 1.2751573446999327, "learning_rate": 3.561039970489645e-06, "loss": 0.3533100485801697, "step": 5422 }, { "epoch": 1.473641304347826, "grad_norm": 1.3391933863216865, "learning_rate": 3.557602872488638e-06, "loss": 0.40165525674819946, "step": 5423 }, { "epoch": 1.4739130434782608, "grad_norm": 1.1449011398443567, "learning_rate": 3.5541670750895486e-06, "loss": 0.35115712881088257, "step": 5424 }, { "epoch": 1.4741847826086958, "grad_norm": 1.1725829710535722, "learning_rate": 3.550732578986006e-06, "loss": 0.39549875259399414, "step": 5425 }, { "epoch": 1.4744565217391306, "grad_norm": 1.1113370834829515, "learning_rate": 3.5472993848713743e-06, "loss": 0.3728005588054657, "step": 5426 }, { "epoch": 1.4747282608695653, "grad_norm": 1.3229538521227147, "learning_rate": 3.5438674934387463e-06, "loss": 0.42953333258628845, "step": 5427 }, { "epoch": 1.475, "grad_norm": 1.35548806797527, "learning_rate": 3.5404369053809606e-06, "loss": 0.48270729184150696, "step": 5428 }, { "epoch": 1.4752717391304349, "grad_norm": 1.3543434385176703, "learning_rate": 3.5370076213905904e-06, "loss": 0.4770790934562683, "step": 5429 }, { "epoch": 1.4755434782608696, "grad_norm": 1.3618398540292431, "learning_rate": 3.533579642159949e-06, "loss": 0.4815499186515808, "step": 5430 }, { "epoch": 1.4758152173913044, "grad_norm": 1.1332193523019625, "learning_rate": 3.530152968381075e-06, "loss": 0.37031376361846924, "step": 5431 }, { "epoch": 1.4760869565217392, "grad_norm": 1.14144509129833, "learning_rate": 3.5267276007457564e-06, "loss": 0.38714221119880676, "step": 5432 }, { "epoch": 1.476358695652174, "grad_norm": 1.2477890179975575, "learning_rate": 3.5233035399455107e-06, "loss": 0.4158385396003723, "step": 5433 }, { "epoch": 1.4766304347826087, "grad_norm": 1.4022041587723677, "learning_rate": 3.5198807866715954e-06, "loss": 0.535789430141449, "step": 5434 }, { "epoch": 1.4769021739130435, "grad_norm": 1.209837954228937, "learning_rate": 3.5164593416149962e-06, "loss": 0.4011610746383667, "step": 5435 }, { "epoch": 1.4771739130434782, "grad_norm": 1.2629512350237642, "learning_rate": 3.513039205466444e-06, "loss": 0.37786224484443665, "step": 5436 }, { "epoch": 1.477445652173913, "grad_norm": 1.3156074186465485, "learning_rate": 3.509620378916404e-06, "loss": 0.46813660860061646, "step": 5437 }, { "epoch": 1.4777173913043478, "grad_norm": 1.5450372456666124, "learning_rate": 3.506202862655068e-06, "loss": 0.5267109870910645, "step": 5438 }, { "epoch": 1.4779891304347825, "grad_norm": 1.2828176835720204, "learning_rate": 3.5027866573723747e-06, "loss": 0.46123647689819336, "step": 5439 }, { "epoch": 1.4782608695652173, "grad_norm": 1.2958279306717493, "learning_rate": 3.4993717637579904e-06, "loss": 0.3992520570755005, "step": 5440 }, { "epoch": 1.478532608695652, "grad_norm": 1.2131970298402415, "learning_rate": 3.4959581825013256e-06, "loss": 0.46547389030456543, "step": 5441 }, { "epoch": 1.4788043478260868, "grad_norm": 1.0475175729974895, "learning_rate": 3.492545914291512e-06, "loss": 0.29745885729789734, "step": 5442 }, { "epoch": 1.4790760869565218, "grad_norm": 1.2698674257690092, "learning_rate": 3.4891349598174275e-06, "loss": 0.4359574317932129, "step": 5443 }, { "epoch": 1.4793478260869566, "grad_norm": 1.2157662434333216, "learning_rate": 3.485725319767684e-06, "loss": 0.4081561267375946, "step": 5444 }, { "epoch": 1.4796195652173914, "grad_norm": 1.2258247037071865, "learning_rate": 3.482316994830618e-06, "loss": 0.3937516212463379, "step": 5445 }, { "epoch": 1.4798913043478261, "grad_norm": 1.3109290565484053, "learning_rate": 3.4789099856943175e-06, "loss": 0.4107917547225952, "step": 5446 }, { "epoch": 1.4801630434782609, "grad_norm": 1.2578681332441326, "learning_rate": 3.4755042930465866e-06, "loss": 0.4044170081615448, "step": 5447 }, { "epoch": 1.4804347826086957, "grad_norm": 1.3414868712763524, "learning_rate": 3.472099917574977e-06, "loss": 0.44865691661834717, "step": 5448 }, { "epoch": 1.4807065217391304, "grad_norm": 1.1844277026651266, "learning_rate": 3.468696859966768e-06, "loss": 0.392894983291626, "step": 5449 }, { "epoch": 1.4809782608695652, "grad_norm": 1.0353641522628876, "learning_rate": 3.465295120908978e-06, "loss": 0.3484167754650116, "step": 5450 }, { "epoch": 1.48125, "grad_norm": 1.2310302707916014, "learning_rate": 3.4618947010883498e-06, "loss": 0.39008551836013794, "step": 5451 }, { "epoch": 1.4815217391304347, "grad_norm": 1.0844123590480241, "learning_rate": 3.4584956011913693e-06, "loss": 0.3895204961299896, "step": 5452 }, { "epoch": 1.4817934782608695, "grad_norm": 1.0857313587703075, "learning_rate": 3.4550978219042553e-06, "loss": 0.3242277503013611, "step": 5453 }, { "epoch": 1.4820652173913045, "grad_norm": 1.205750975987064, "learning_rate": 3.4517013639129515e-06, "loss": 0.4096270203590393, "step": 5454 }, { "epoch": 1.4823369565217392, "grad_norm": 1.2381708378093885, "learning_rate": 3.448306227903142e-06, "loss": 0.37539660930633545, "step": 5455 }, { "epoch": 1.482608695652174, "grad_norm": 1.178358843436661, "learning_rate": 3.4449124145602443e-06, "loss": 0.4569922983646393, "step": 5456 }, { "epoch": 1.4828804347826088, "grad_norm": 1.3745993929630158, "learning_rate": 3.4415199245694084e-06, "loss": 0.42321503162384033, "step": 5457 }, { "epoch": 1.4831521739130435, "grad_norm": 1.0497884423131973, "learning_rate": 3.438128758615509e-06, "loss": 0.3598913848400116, "step": 5458 }, { "epoch": 1.4834239130434783, "grad_norm": 1.141598790087725, "learning_rate": 3.43473891738317e-06, "loss": 0.37798136472702026, "step": 5459 }, { "epoch": 1.483695652173913, "grad_norm": 1.2928516431058958, "learning_rate": 3.4313504015567347e-06, "loss": 0.38027065992355347, "step": 5460 }, { "epoch": 1.4839673913043478, "grad_norm": 1.4236658743294561, "learning_rate": 3.4279632118202744e-06, "loss": 0.4203747510910034, "step": 5461 }, { "epoch": 1.4842391304347826, "grad_norm": 1.0468441087884204, "learning_rate": 3.424577348857614e-06, "loss": 0.30836552381515503, "step": 5462 }, { "epoch": 1.4845108695652174, "grad_norm": 1.2494365998195598, "learning_rate": 3.4211928133522865e-06, "loss": 0.4308432936668396, "step": 5463 }, { "epoch": 1.4847826086956522, "grad_norm": 1.2562832362867602, "learning_rate": 3.417809605987572e-06, "loss": 0.388155996799469, "step": 5464 }, { "epoch": 1.485054347826087, "grad_norm": 1.1769906978852853, "learning_rate": 3.414427727446479e-06, "loss": 0.3479464054107666, "step": 5465 }, { "epoch": 1.4853260869565217, "grad_norm": 1.3139995844998842, "learning_rate": 3.4110471784117472e-06, "loss": 0.4186186194419861, "step": 5466 }, { "epoch": 1.4855978260869565, "grad_norm": 1.3507433450254447, "learning_rate": 3.4076679595658425e-06, "loss": 0.3966948986053467, "step": 5467 }, { "epoch": 1.4858695652173912, "grad_norm": 1.2110533108895183, "learning_rate": 3.404290071590971e-06, "loss": 0.4107159376144409, "step": 5468 }, { "epoch": 1.486141304347826, "grad_norm": 1.282828347057583, "learning_rate": 3.40091351516907e-06, "loss": 0.42515110969543457, "step": 5469 }, { "epoch": 1.4864130434782608, "grad_norm": 1.1916795944537466, "learning_rate": 3.3975382909817957e-06, "loss": 0.36908912658691406, "step": 5470 }, { "epoch": 1.4866847826086955, "grad_norm": 1.319227810178801, "learning_rate": 3.394164399710549e-06, "loss": 0.43873393535614014, "step": 5471 }, { "epoch": 1.4869565217391305, "grad_norm": 1.258176655120365, "learning_rate": 3.390791842036456e-06, "loss": 0.45716917514801025, "step": 5472 }, { "epoch": 1.4872282608695653, "grad_norm": 1.3579574414722382, "learning_rate": 3.387420618640379e-06, "loss": 0.49625393748283386, "step": 5473 }, { "epoch": 1.4875, "grad_norm": 1.417420585927469, "learning_rate": 3.384050730202898e-06, "loss": 0.4836527705192566, "step": 5474 }, { "epoch": 1.4877717391304348, "grad_norm": 1.2656285626774217, "learning_rate": 3.380682177404335e-06, "loss": 0.45579028129577637, "step": 5475 }, { "epoch": 1.4880434782608696, "grad_norm": 1.3848236942937966, "learning_rate": 3.3773149609247435e-06, "loss": 0.45588064193725586, "step": 5476 }, { "epoch": 1.4883152173913043, "grad_norm": 1.2797062572859568, "learning_rate": 3.3739490814438957e-06, "loss": 0.45660656690597534, "step": 5477 }, { "epoch": 1.4885869565217391, "grad_norm": 1.114262057363314, "learning_rate": 3.3705845396413038e-06, "loss": 0.3508206307888031, "step": 5478 }, { "epoch": 1.4888586956521739, "grad_norm": 1.211141816100089, "learning_rate": 3.367221336196208e-06, "loss": 0.37581396102905273, "step": 5479 }, { "epoch": 1.4891304347826086, "grad_norm": 1.4512603643765858, "learning_rate": 3.3638594717875807e-06, "loss": 0.5211488604545593, "step": 5480 }, { "epoch": 1.4894021739130434, "grad_norm": 1.1611897678715923, "learning_rate": 3.3604989470941095e-06, "loss": 0.3649677336215973, "step": 5481 }, { "epoch": 1.4896739130434782, "grad_norm": 1.3557755936231315, "learning_rate": 3.3571397627942383e-06, "loss": 0.46044617891311646, "step": 5482 }, { "epoch": 1.4899456521739132, "grad_norm": 1.3082015222661116, "learning_rate": 3.3537819195661125e-06, "loss": 0.4156324863433838, "step": 5483 }, { "epoch": 1.490217391304348, "grad_norm": 1.1272298627371207, "learning_rate": 3.350425418087624e-06, "loss": 0.37442561984062195, "step": 5484 }, { "epoch": 1.4904891304347827, "grad_norm": 1.1602746908806856, "learning_rate": 3.34707025903639e-06, "loss": 0.3534546494483948, "step": 5485 }, { "epoch": 1.4907608695652175, "grad_norm": 1.2238223227467675, "learning_rate": 3.3437164430897528e-06, "loss": 0.3698577284812927, "step": 5486 }, { "epoch": 1.4910326086956522, "grad_norm": 1.379677851544052, "learning_rate": 3.340363970924787e-06, "loss": 0.4754643440246582, "step": 5487 }, { "epoch": 1.491304347826087, "grad_norm": 1.2386997141149636, "learning_rate": 3.3370128432182957e-06, "loss": 0.45845240354537964, "step": 5488 }, { "epoch": 1.4915760869565218, "grad_norm": 1.2059022876244314, "learning_rate": 3.3336630606468136e-06, "loss": 0.33841365575790405, "step": 5489 }, { "epoch": 1.4918478260869565, "grad_norm": 1.1971020798132972, "learning_rate": 3.3303146238865935e-06, "loss": 0.3853587806224823, "step": 5490 }, { "epoch": 1.4921195652173913, "grad_norm": 1.2458526284410794, "learning_rate": 3.3269675336136274e-06, "loss": 0.38065510988235474, "step": 5491 }, { "epoch": 1.492391304347826, "grad_norm": 1.332258093430003, "learning_rate": 3.3236217905036317e-06, "loss": 0.42049676179885864, "step": 5492 }, { "epoch": 1.4926630434782608, "grad_norm": 1.269706331176333, "learning_rate": 3.320277395232053e-06, "loss": 0.40298891067504883, "step": 5493 }, { "epoch": 1.4929347826086956, "grad_norm": 1.328144544083364, "learning_rate": 3.316934348474058e-06, "loss": 0.43026018142700195, "step": 5494 }, { "epoch": 1.4932065217391304, "grad_norm": 1.1830367015891936, "learning_rate": 3.313592650904548e-06, "loss": 0.3526718020439148, "step": 5495 }, { "epoch": 1.4934782608695651, "grad_norm": 1.1310808456803128, "learning_rate": 3.3102523031981547e-06, "loss": 0.3722835183143616, "step": 5496 }, { "epoch": 1.49375, "grad_norm": 1.3585266530983309, "learning_rate": 3.3069133060292246e-06, "loss": 0.401212215423584, "step": 5497 }, { "epoch": 1.4940217391304347, "grad_norm": 1.3032497190822105, "learning_rate": 3.3035756600718515e-06, "loss": 0.4873535633087158, "step": 5498 }, { "epoch": 1.4942934782608694, "grad_norm": 1.3456107126856218, "learning_rate": 3.3002393659998357e-06, "loss": 0.4130798578262329, "step": 5499 }, { "epoch": 1.4945652173913042, "grad_norm": 1.1891856103499554, "learning_rate": 3.29690442448672e-06, "loss": 0.40571022033691406, "step": 5500 }, { "epoch": 1.4948369565217392, "grad_norm": 1.3424149269979848, "learning_rate": 3.2935708362057593e-06, "loss": 0.4534379541873932, "step": 5501 }, { "epoch": 1.495108695652174, "grad_norm": 1.2223704292905335, "learning_rate": 3.290238601829956e-06, "loss": 0.4159018397331238, "step": 5502 }, { "epoch": 1.4953804347826087, "grad_norm": 1.1841268258599058, "learning_rate": 3.28690772203202e-06, "loss": 0.457211434841156, "step": 5503 }, { "epoch": 1.4956521739130435, "grad_norm": 1.1405969740320017, "learning_rate": 3.2835781974843896e-06, "loss": 0.3260848820209503, "step": 5504 }, { "epoch": 1.4959239130434783, "grad_norm": 1.364949545068668, "learning_rate": 3.2802500288592477e-06, "loss": 0.4227982461452484, "step": 5505 }, { "epoch": 1.496195652173913, "grad_norm": 1.2806171176822456, "learning_rate": 3.2769232168284802e-06, "loss": 0.37343329191207886, "step": 5506 }, { "epoch": 1.4964673913043478, "grad_norm": 1.2689068224931355, "learning_rate": 3.273597762063714e-06, "loss": 0.35242730379104614, "step": 5507 }, { "epoch": 1.4967391304347826, "grad_norm": 1.324492085631299, "learning_rate": 3.2702736652362954e-06, "loss": 0.41027817130088806, "step": 5508 }, { "epoch": 1.4970108695652173, "grad_norm": 1.318737941555159, "learning_rate": 3.266950927017304e-06, "loss": 0.38665544986724854, "step": 5509 }, { "epoch": 1.497282608695652, "grad_norm": 1.0974567679981326, "learning_rate": 3.2636295480775315e-06, "loss": 0.32295578718185425, "step": 5510 }, { "epoch": 1.4975543478260869, "grad_norm": 1.1667017985615131, "learning_rate": 3.2603095290875075e-06, "loss": 0.4027172029018402, "step": 5511 }, { "epoch": 1.4978260869565219, "grad_norm": 1.326515876105038, "learning_rate": 3.256990870717487e-06, "loss": 0.38435518741607666, "step": 5512 }, { "epoch": 1.4980978260869566, "grad_norm": 1.23287332927305, "learning_rate": 3.253673573637438e-06, "loss": 0.3814985156059265, "step": 5513 }, { "epoch": 1.4983695652173914, "grad_norm": 1.3057168027178498, "learning_rate": 3.2503576385170676e-06, "loss": 0.49093425273895264, "step": 5514 }, { "epoch": 1.4986413043478262, "grad_norm": 1.0208589809186777, "learning_rate": 3.2470430660258e-06, "loss": 0.3416564464569092, "step": 5515 }, { "epoch": 1.498913043478261, "grad_norm": 1.3232833969456537, "learning_rate": 3.243729856832791e-06, "loss": 0.4239084720611572, "step": 5516 }, { "epoch": 1.4991847826086957, "grad_norm": 1.2587254853402134, "learning_rate": 3.2404180116069073e-06, "loss": 0.42959776520729065, "step": 5517 }, { "epoch": 1.4994565217391305, "grad_norm": 1.2115259281663804, "learning_rate": 3.2371075310167634e-06, "loss": 0.3863968551158905, "step": 5518 }, { "epoch": 1.4997282608695652, "grad_norm": 1.1614506982700161, "learning_rate": 3.2337984157306757e-06, "loss": 0.33290281891822815, "step": 5519 }, { "epoch": 1.5, "grad_norm": 1.2972152938786397, "learning_rate": 3.230490666416689e-06, "loss": 0.43634214997291565, "step": 5520 }, { "epoch": 1.5002717391304348, "grad_norm": 1.3280456944279369, "learning_rate": 3.2271842837425917e-06, "loss": 0.4136001169681549, "step": 5521 }, { "epoch": 1.5005434782608695, "grad_norm": 1.2078888920117836, "learning_rate": 3.223879268375869e-06, "loss": 0.4521213173866272, "step": 5522 }, { "epoch": 1.5008152173913043, "grad_norm": 1.3026784143237213, "learning_rate": 3.220575620983748e-06, "loss": 0.5486711263656616, "step": 5523 }, { "epoch": 1.501086956521739, "grad_norm": 1.1094909967509945, "learning_rate": 3.2172733422331748e-06, "loss": 0.3190372884273529, "step": 5524 }, { "epoch": 1.5013586956521738, "grad_norm": 1.1449564298751618, "learning_rate": 3.213972432790823e-06, "loss": 0.4095747470855713, "step": 5525 }, { "epoch": 1.5016304347826086, "grad_norm": 1.2412445306904438, "learning_rate": 3.2106728933230756e-06, "loss": 0.39286646246910095, "step": 5526 }, { "epoch": 1.5019021739130434, "grad_norm": 1.2665041173082279, "learning_rate": 3.2073747244960574e-06, "loss": 0.45149075984954834, "step": 5527 }, { "epoch": 1.5021739130434781, "grad_norm": 1.4392822257046611, "learning_rate": 3.204077926975607e-06, "loss": 0.4466422200202942, "step": 5528 }, { "epoch": 1.502445652173913, "grad_norm": 1.2220436523964078, "learning_rate": 3.2007825014272844e-06, "loss": 0.3892761170864105, "step": 5529 }, { "epoch": 1.5027173913043477, "grad_norm": 1.0485987201732967, "learning_rate": 3.197488448516376e-06, "loss": 0.36248815059661865, "step": 5530 }, { "epoch": 1.5029891304347827, "grad_norm": 1.1148118728158023, "learning_rate": 3.1941957689078916e-06, "loss": 0.34851011633872986, "step": 5531 }, { "epoch": 1.5032608695652174, "grad_norm": 1.3452883131731013, "learning_rate": 3.1909044632665685e-06, "loss": 0.3850345015525818, "step": 5532 }, { "epoch": 1.5035326086956522, "grad_norm": 1.260933383020169, "learning_rate": 3.187614532256851e-06, "loss": 0.4086500406265259, "step": 5533 }, { "epoch": 1.503804347826087, "grad_norm": 1.1778281745209498, "learning_rate": 3.1843259765429214e-06, "loss": 0.3557496666908264, "step": 5534 }, { "epoch": 1.5040760869565217, "grad_norm": 1.187322640650683, "learning_rate": 3.181038796788678e-06, "loss": 0.4240737557411194, "step": 5535 }, { "epoch": 1.5043478260869565, "grad_norm": 1.3180167097860114, "learning_rate": 3.1777529936577456e-06, "loss": 0.4460075795650482, "step": 5536 }, { "epoch": 1.5046195652173913, "grad_norm": 1.3252092269664624, "learning_rate": 3.174468567813461e-06, "loss": 0.4725203216075897, "step": 5537 }, { "epoch": 1.5048913043478263, "grad_norm": 1.22439037917988, "learning_rate": 3.171185519918892e-06, "loss": 0.4011766314506531, "step": 5538 }, { "epoch": 1.505163043478261, "grad_norm": 1.158761971533283, "learning_rate": 3.167903850636832e-06, "loss": 0.39510399103164673, "step": 5539 }, { "epoch": 1.5054347826086958, "grad_norm": 1.2432754373400439, "learning_rate": 3.164623560629776e-06, "loss": 0.40074294805526733, "step": 5540 }, { "epoch": 1.5057065217391306, "grad_norm": 1.2938242425898936, "learning_rate": 3.161344650559971e-06, "loss": 0.48357316851615906, "step": 5541 }, { "epoch": 1.5059782608695653, "grad_norm": 1.2158898217907856, "learning_rate": 3.1580671210893577e-06, "loss": 0.4094412922859192, "step": 5542 }, { "epoch": 1.50625, "grad_norm": 1.4100423064924696, "learning_rate": 3.1547909728796112e-06, "loss": 0.49335581064224243, "step": 5543 }, { "epoch": 1.5065217391304349, "grad_norm": 1.4475046065698547, "learning_rate": 3.151516206592128e-06, "loss": 0.44728201627731323, "step": 5544 }, { "epoch": 1.5067934782608696, "grad_norm": 1.0688022731634665, "learning_rate": 3.148242822888026e-06, "loss": 0.3288528025150299, "step": 5545 }, { "epoch": 1.5070652173913044, "grad_norm": 1.446524787494873, "learning_rate": 3.1449708224281326e-06, "loss": 0.5019391775131226, "step": 5546 }, { "epoch": 1.5073369565217392, "grad_norm": 1.288562498787549, "learning_rate": 3.1417002058730116e-06, "loss": 0.4375048279762268, "step": 5547 }, { "epoch": 1.507608695652174, "grad_norm": 1.426715383347651, "learning_rate": 3.138430973882942e-06, "loss": 0.4251575767993927, "step": 5548 }, { "epoch": 1.5078804347826087, "grad_norm": 1.1835707957844408, "learning_rate": 3.135163127117915e-06, "loss": 0.3961149752140045, "step": 5549 }, { "epoch": 1.5081521739130435, "grad_norm": 1.2679716694239302, "learning_rate": 3.1318966662376537e-06, "loss": 0.38310766220092773, "step": 5550 }, { "epoch": 1.5084239130434782, "grad_norm": 1.2570907218879734, "learning_rate": 3.1286315919015953e-06, "loss": 0.40693145990371704, "step": 5551 }, { "epoch": 1.508695652173913, "grad_norm": 1.2514865920591245, "learning_rate": 3.125367904768901e-06, "loss": 0.409161776304245, "step": 5552 }, { "epoch": 1.5089673913043478, "grad_norm": 1.4051846693276244, "learning_rate": 3.122105605498442e-06, "loss": 0.43560606241226196, "step": 5553 }, { "epoch": 1.5092391304347825, "grad_norm": 1.139929722795316, "learning_rate": 3.118844694748828e-06, "loss": 0.37019050121307373, "step": 5554 }, { "epoch": 1.5095108695652173, "grad_norm": 1.46541262853529, "learning_rate": 3.1155851731783713e-06, "loss": 0.49534091353416443, "step": 5555 }, { "epoch": 1.509782608695652, "grad_norm": 1.383765913413515, "learning_rate": 3.1123270414451035e-06, "loss": 0.4156453013420105, "step": 5556 }, { "epoch": 1.5100543478260868, "grad_norm": 1.5107404904102615, "learning_rate": 3.1090703002067936e-06, "loss": 0.5189172029495239, "step": 5557 }, { "epoch": 1.5103260869565216, "grad_norm": 1.1394695221715407, "learning_rate": 3.105814950120909e-06, "loss": 0.38572949171066284, "step": 5558 }, { "epoch": 1.5105978260869564, "grad_norm": 1.1583270311811655, "learning_rate": 3.1025609918446475e-06, "loss": 0.38336867094039917, "step": 5559 }, { "epoch": 1.5108695652173914, "grad_norm": 1.4902613245701224, "learning_rate": 3.099308426034925e-06, "loss": 0.4815387427806854, "step": 5560 }, { "epoch": 1.5111413043478261, "grad_norm": 1.3910202445132913, "learning_rate": 3.096057253348378e-06, "loss": 0.46241435408592224, "step": 5561 }, { "epoch": 1.5114130434782609, "grad_norm": 1.2048877065105303, "learning_rate": 3.0928074744413504e-06, "loss": 0.36654597520828247, "step": 5562 }, { "epoch": 1.5116847826086957, "grad_norm": 1.1152813763813398, "learning_rate": 3.089559089969919e-06, "loss": 0.3532366156578064, "step": 5563 }, { "epoch": 1.5119565217391304, "grad_norm": 1.1653003358474097, "learning_rate": 3.0863121005898744e-06, "loss": 0.3840577006340027, "step": 5564 }, { "epoch": 1.5122282608695652, "grad_norm": 1.2275838572931246, "learning_rate": 3.083066506956718e-06, "loss": 0.42141515016555786, "step": 5565 }, { "epoch": 1.5125, "grad_norm": 1.1098896203229274, "learning_rate": 3.0798223097256795e-06, "loss": 0.3756713569164276, "step": 5566 }, { "epoch": 1.512771739130435, "grad_norm": 1.3610356125142706, "learning_rate": 3.0765795095517026e-06, "loss": 0.5076009035110474, "step": 5567 }, { "epoch": 1.5130434782608697, "grad_norm": 1.2398813421960388, "learning_rate": 3.073338107089452e-06, "loss": 0.4125802218914032, "step": 5568 }, { "epoch": 1.5133152173913045, "grad_norm": 1.2284329462746273, "learning_rate": 3.0700981029933017e-06, "loss": 0.40874454379081726, "step": 5569 }, { "epoch": 1.5135869565217392, "grad_norm": 1.2237564071566454, "learning_rate": 3.0668594979173525e-06, "loss": 0.395058274269104, "step": 5570 }, { "epoch": 1.513858695652174, "grad_norm": 1.2490465234707582, "learning_rate": 3.0636222925154223e-06, "loss": 0.4641476273536682, "step": 5571 }, { "epoch": 1.5141304347826088, "grad_norm": 1.1765155417290034, "learning_rate": 3.060386487441037e-06, "loss": 0.4066125154495239, "step": 5572 }, { "epoch": 1.5144021739130435, "grad_norm": 1.1715328095434803, "learning_rate": 3.05715208334745e-06, "loss": 0.40796327590942383, "step": 5573 }, { "epoch": 1.5146739130434783, "grad_norm": 1.3334719731899631, "learning_rate": 3.0539190808876273e-06, "loss": 0.4156542718410492, "step": 5574 }, { "epoch": 1.514945652173913, "grad_norm": 1.1603077603788172, "learning_rate": 3.050687480714256e-06, "loss": 0.374262273311615, "step": 5575 }, { "epoch": 1.5152173913043478, "grad_norm": 1.3342390620322655, "learning_rate": 3.0474572834797287e-06, "loss": 0.5053960084915161, "step": 5576 }, { "epoch": 1.5154891304347826, "grad_norm": 1.243128219930335, "learning_rate": 3.0442284898361764e-06, "loss": 0.35726675391197205, "step": 5577 }, { "epoch": 1.5157608695652174, "grad_norm": 1.2281065503549373, "learning_rate": 3.04100110043542e-06, "loss": 0.382729709148407, "step": 5578 }, { "epoch": 1.5160326086956522, "grad_norm": 1.2266037545868083, "learning_rate": 3.037775115929018e-06, "loss": 0.43556612730026245, "step": 5579 }, { "epoch": 1.516304347826087, "grad_norm": 1.1471329065131362, "learning_rate": 3.0345505369682373e-06, "loss": 0.32783666253089905, "step": 5580 }, { "epoch": 1.5165760869565217, "grad_norm": 1.3116448070632258, "learning_rate": 3.0313273642040577e-06, "loss": 0.48721668124198914, "step": 5581 }, { "epoch": 1.5168478260869565, "grad_norm": 1.207063393945968, "learning_rate": 3.028105598287179e-06, "loss": 0.3762618601322174, "step": 5582 }, { "epoch": 1.5171195652173912, "grad_norm": 1.366578877663995, "learning_rate": 3.0248852398680194e-06, "loss": 0.44348132610321045, "step": 5583 }, { "epoch": 1.517391304347826, "grad_norm": 0.9844717837354721, "learning_rate": 3.021666289596711e-06, "loss": 0.2997947335243225, "step": 5584 }, { "epoch": 1.5176630434782608, "grad_norm": 1.1155979439374255, "learning_rate": 3.018448748123097e-06, "loss": 0.42933499813079834, "step": 5585 }, { "epoch": 1.5179347826086955, "grad_norm": 1.2409260868352188, "learning_rate": 3.0152326160967414e-06, "loss": 0.45231741666793823, "step": 5586 }, { "epoch": 1.5182065217391303, "grad_norm": 1.18107545336314, "learning_rate": 3.012017894166923e-06, "loss": 0.40129712224006653, "step": 5587 }, { "epoch": 1.518478260869565, "grad_norm": 1.0712297327777698, "learning_rate": 3.0088045829826385e-06, "loss": 0.3549078702926636, "step": 5588 }, { "epoch": 1.51875, "grad_norm": 1.253791814921946, "learning_rate": 3.0055926831925898e-06, "loss": 0.4189265966415405, "step": 5589 }, { "epoch": 1.5190217391304348, "grad_norm": 1.0152554372695688, "learning_rate": 3.0023821954452036e-06, "loss": 0.28950268030166626, "step": 5590 }, { "epoch": 1.5192934782608696, "grad_norm": 1.1325935747850004, "learning_rate": 2.9991731203886233e-06, "loss": 0.3871959447860718, "step": 5591 }, { "epoch": 1.5195652173913043, "grad_norm": 1.3281219176286156, "learning_rate": 2.995965458670691e-06, "loss": 0.3936729431152344, "step": 5592 }, { "epoch": 1.5198369565217391, "grad_norm": 1.280936403356523, "learning_rate": 2.992759210938988e-06, "loss": 0.40299779176712036, "step": 5593 }, { "epoch": 1.5201086956521739, "grad_norm": 0.9105425619514202, "learning_rate": 2.9895543778407875e-06, "loss": 0.2995341420173645, "step": 5594 }, { "epoch": 1.5203804347826086, "grad_norm": 1.366380993533997, "learning_rate": 2.9863509600230913e-06, "loss": 0.4766708016395569, "step": 5595 }, { "epoch": 1.5206521739130436, "grad_norm": 1.198084344154281, "learning_rate": 2.9831489581326113e-06, "loss": 0.3603951930999756, "step": 5596 }, { "epoch": 1.5209239130434784, "grad_norm": 1.1545959663763594, "learning_rate": 2.9799483728157674e-06, "loss": 0.4536617696285248, "step": 5597 }, { "epoch": 1.5211956521739132, "grad_norm": 1.266630682433618, "learning_rate": 2.976749204718706e-06, "loss": 0.42166972160339355, "step": 5598 }, { "epoch": 1.521467391304348, "grad_norm": 1.1939371252123587, "learning_rate": 2.9735514544872723e-06, "loss": 0.41783249378204346, "step": 5599 }, { "epoch": 1.5217391304347827, "grad_norm": 1.1571338275907126, "learning_rate": 2.9703551227670434e-06, "loss": 0.42942291498184204, "step": 5600 }, { "epoch": 1.5220108695652175, "grad_norm": 1.512130659031343, "learning_rate": 2.9671602102032926e-06, "loss": 0.5058668255805969, "step": 5601 }, { "epoch": 1.5222826086956522, "grad_norm": 1.249497780981543, "learning_rate": 2.963966717441017e-06, "loss": 0.4632906913757324, "step": 5602 }, { "epoch": 1.522554347826087, "grad_norm": 1.1162583571185434, "learning_rate": 2.960774645124923e-06, "loss": 0.41977155208587646, "step": 5603 }, { "epoch": 1.5228260869565218, "grad_norm": 1.2979212835418454, "learning_rate": 2.9575839938994365e-06, "loss": 0.3781992793083191, "step": 5604 }, { "epoch": 1.5230978260869565, "grad_norm": 1.3496249430496061, "learning_rate": 2.9543947644086833e-06, "loss": 0.42734819650650024, "step": 5605 }, { "epoch": 1.5233695652173913, "grad_norm": 1.29917515616666, "learning_rate": 2.951206957296516e-06, "loss": 0.4609249234199524, "step": 5606 }, { "epoch": 1.523641304347826, "grad_norm": 1.3076506066811167, "learning_rate": 2.948020573206496e-06, "loss": 0.5161340236663818, "step": 5607 }, { "epoch": 1.5239130434782608, "grad_norm": 1.4026780863427213, "learning_rate": 2.944835612781889e-06, "loss": 0.4232032299041748, "step": 5608 }, { "epoch": 1.5241847826086956, "grad_norm": 0.7742729181369016, "learning_rate": 2.9416520766656844e-06, "loss": 0.18788206577301025, "step": 5609 }, { "epoch": 1.5244565217391304, "grad_norm": 1.3931031230499864, "learning_rate": 2.938469965500579e-06, "loss": 0.4297358989715576, "step": 5610 }, { "epoch": 1.5247282608695651, "grad_norm": 1.1234453115424592, "learning_rate": 2.9352892799289866e-06, "loss": 0.3500717878341675, "step": 5611 }, { "epoch": 1.525, "grad_norm": 1.2801601381192347, "learning_rate": 2.93211002059302e-06, "loss": 0.40812361240386963, "step": 5612 }, { "epoch": 1.5252717391304347, "grad_norm": 1.2300866152262069, "learning_rate": 2.9289321881345257e-06, "loss": 0.4057857096195221, "step": 5613 }, { "epoch": 1.5255434782608694, "grad_norm": 1.2644449553469308, "learning_rate": 2.9257557831950435e-06, "loss": 0.43067893385887146, "step": 5614 }, { "epoch": 1.5258152173913042, "grad_norm": 1.270822682083672, "learning_rate": 2.9225808064158244e-06, "loss": 0.4391605257987976, "step": 5615 }, { "epoch": 1.526086956521739, "grad_norm": 1.1663698330154166, "learning_rate": 2.9194072584378506e-06, "loss": 0.38253486156463623, "step": 5616 }, { "epoch": 1.5263586956521737, "grad_norm": 1.2942035821986466, "learning_rate": 2.9162351399017964e-06, "loss": 0.4718632996082306, "step": 5617 }, { "epoch": 1.5266304347826087, "grad_norm": 1.364923150795971, "learning_rate": 2.913064451448054e-06, "loss": 0.4384320378303528, "step": 5618 }, { "epoch": 1.5269021739130435, "grad_norm": 1.2487648970507512, "learning_rate": 2.9098951937167297e-06, "loss": 0.39091432094573975, "step": 5619 }, { "epoch": 1.5271739130434783, "grad_norm": 0.9219520567082685, "learning_rate": 2.9067273673476404e-06, "loss": 0.26649075746536255, "step": 5620 }, { "epoch": 1.527445652173913, "grad_norm": 1.1596091718356616, "learning_rate": 2.903560972980307e-06, "loss": 0.36359864473342896, "step": 5621 }, { "epoch": 1.5277173913043478, "grad_norm": 1.4267118232576217, "learning_rate": 2.9003960112539687e-06, "loss": 0.49917668104171753, "step": 5622 }, { "epoch": 1.5279891304347826, "grad_norm": 1.1144246985568147, "learning_rate": 2.897232482807577e-06, "loss": 0.3544132709503174, "step": 5623 }, { "epoch": 1.5282608695652173, "grad_norm": 1.2075210727903225, "learning_rate": 2.8940703882797827e-06, "loss": 0.3799206018447876, "step": 5624 }, { "epoch": 1.5285326086956523, "grad_norm": 0.9875140016154904, "learning_rate": 2.8909097283089606e-06, "loss": 0.26106613874435425, "step": 5625 }, { "epoch": 1.528804347826087, "grad_norm": 1.353257468702995, "learning_rate": 2.8877505035331876e-06, "loss": 0.4131070375442505, "step": 5626 }, { "epoch": 1.5290760869565219, "grad_norm": 1.3863914360642426, "learning_rate": 2.8845927145902574e-06, "loss": 0.4071488380432129, "step": 5627 }, { "epoch": 1.5293478260869566, "grad_norm": 1.1630119740631348, "learning_rate": 2.88143636211766e-06, "loss": 0.33780109882354736, "step": 5628 }, { "epoch": 1.5296195652173914, "grad_norm": 1.209561856757093, "learning_rate": 2.878281446752618e-06, "loss": 0.39916718006134033, "step": 5629 }, { "epoch": 1.5298913043478262, "grad_norm": 1.1767234772401567, "learning_rate": 2.875127969132041e-06, "loss": 0.4162474572658539, "step": 5630 }, { "epoch": 1.530163043478261, "grad_norm": 1.229436264202614, "learning_rate": 2.8719759298925652e-06, "loss": 0.33383113145828247, "step": 5631 }, { "epoch": 1.5304347826086957, "grad_norm": 1.1940774033461898, "learning_rate": 2.868825329670524e-06, "loss": 0.3381708860397339, "step": 5632 }, { "epoch": 1.5307065217391305, "grad_norm": 1.3889904269785696, "learning_rate": 2.8656761691019673e-06, "loss": 0.46949779987335205, "step": 5633 }, { "epoch": 1.5309782608695652, "grad_norm": 1.483577095566796, "learning_rate": 2.8625284488226566e-06, "loss": 0.5201734304428101, "step": 5634 }, { "epoch": 1.53125, "grad_norm": 1.3799789942304872, "learning_rate": 2.85938216946805e-06, "loss": 0.5006132125854492, "step": 5635 }, { "epoch": 1.5315217391304348, "grad_norm": 1.2693294389772147, "learning_rate": 2.856237331673336e-06, "loss": 0.4069893956184387, "step": 5636 }, { "epoch": 1.5317934782608695, "grad_norm": 1.1778585095503438, "learning_rate": 2.853093936073391e-06, "loss": 0.35313135385513306, "step": 5637 }, { "epoch": 1.5320652173913043, "grad_norm": 1.1896153290528515, "learning_rate": 2.84995198330281e-06, "loss": 0.43346893787384033, "step": 5638 }, { "epoch": 1.532336956521739, "grad_norm": 1.1307926214402826, "learning_rate": 2.846811473995902e-06, "loss": 0.3652094602584839, "step": 5639 }, { "epoch": 1.5326086956521738, "grad_norm": 1.19924507843025, "learning_rate": 2.8436724087866686e-06, "loss": 0.4323567748069763, "step": 5640 }, { "epoch": 1.5328804347826086, "grad_norm": 1.1035857502863713, "learning_rate": 2.840534788308835e-06, "loss": 0.35231924057006836, "step": 5641 }, { "epoch": 1.5331521739130434, "grad_norm": 1.3176866777845924, "learning_rate": 2.8373986131958286e-06, "loss": 0.5162299871444702, "step": 5642 }, { "epoch": 1.5334239130434781, "grad_norm": 1.3049519189933494, "learning_rate": 2.834263884080788e-06, "loss": 0.4155609607696533, "step": 5643 }, { "epoch": 1.533695652173913, "grad_norm": 1.2354352479336506, "learning_rate": 2.8311306015965533e-06, "loss": 0.39021503925323486, "step": 5644 }, { "epoch": 1.5339673913043477, "grad_norm": 1.2969113325033004, "learning_rate": 2.827998766375678e-06, "loss": 0.41868963837623596, "step": 5645 }, { "epoch": 1.5342391304347827, "grad_norm": 1.0309944721468283, "learning_rate": 2.8248683790504226e-06, "loss": 0.27365171909332275, "step": 5646 }, { "epoch": 1.5345108695652174, "grad_norm": 0.9758055743601148, "learning_rate": 2.821739440252759e-06, "loss": 0.2728344202041626, "step": 5647 }, { "epoch": 1.5347826086956522, "grad_norm": 1.3376391309136828, "learning_rate": 2.818611950614356e-06, "loss": 0.38361287117004395, "step": 5648 }, { "epoch": 1.535054347826087, "grad_norm": 1.2984664988534993, "learning_rate": 2.815485910766599e-06, "loss": 0.44126594066619873, "step": 5649 }, { "epoch": 1.5353260869565217, "grad_norm": 1.2486600965254497, "learning_rate": 2.812361321340582e-06, "loss": 0.45184189081192017, "step": 5650 }, { "epoch": 1.5355978260869565, "grad_norm": 1.2856186159194056, "learning_rate": 2.809238182967092e-06, "loss": 0.41159695386886597, "step": 5651 }, { "epoch": 1.5358695652173913, "grad_norm": 1.213507632901665, "learning_rate": 2.8061164962766463e-06, "loss": 0.3900727927684784, "step": 5652 }, { "epoch": 1.5361413043478263, "grad_norm": 1.0857383605090904, "learning_rate": 2.8029962618994478e-06, "loss": 0.2972177267074585, "step": 5653 }, { "epoch": 1.536413043478261, "grad_norm": 1.2885367749855496, "learning_rate": 2.799877480465417e-06, "loss": 0.438454270362854, "step": 5654 }, { "epoch": 1.5366847826086958, "grad_norm": 1.3245239108720799, "learning_rate": 2.7967601526041788e-06, "loss": 0.42665961384773254, "step": 5655 }, { "epoch": 1.5369565217391306, "grad_norm": 1.2424173610794742, "learning_rate": 2.793644278945068e-06, "loss": 0.3544754981994629, "step": 5656 }, { "epoch": 1.5372282608695653, "grad_norm": 1.1063086074082038, "learning_rate": 2.7905298601171147e-06, "loss": 0.28705698251724243, "step": 5657 }, { "epoch": 1.5375, "grad_norm": 1.0829441195130056, "learning_rate": 2.7874168967490677e-06, "loss": 0.4145970940589905, "step": 5658 }, { "epoch": 1.5377717391304349, "grad_norm": 1.3096287179944948, "learning_rate": 2.7843053894693805e-06, "loss": 0.40904700756073, "step": 5659 }, { "epoch": 1.5380434782608696, "grad_norm": 1.7902253990973864, "learning_rate": 2.781195338906202e-06, "loss": 0.42622724175453186, "step": 5660 }, { "epoch": 1.5383152173913044, "grad_norm": 1.2391547729136654, "learning_rate": 2.7780867456873984e-06, "loss": 0.4225238561630249, "step": 5661 }, { "epoch": 1.5385869565217392, "grad_norm": 1.2981808414708051, "learning_rate": 2.774979610440537e-06, "loss": 0.4361429214477539, "step": 5662 }, { "epoch": 1.538858695652174, "grad_norm": 1.2232607091651309, "learning_rate": 2.7718739337928947e-06, "loss": 0.45944809913635254, "step": 5663 }, { "epoch": 1.5391304347826087, "grad_norm": 1.2898752346994624, "learning_rate": 2.7687697163714455e-06, "loss": 0.4497773051261902, "step": 5664 }, { "epoch": 1.5394021739130435, "grad_norm": 1.0807942730257543, "learning_rate": 2.765666958802876e-06, "loss": 0.3362320065498352, "step": 5665 }, { "epoch": 1.5396739130434782, "grad_norm": 1.073315281689374, "learning_rate": 2.76256566171358e-06, "loss": 0.36361163854599, "step": 5666 }, { "epoch": 1.539945652173913, "grad_norm": 1.10271735794154, "learning_rate": 2.7594658257296456e-06, "loss": 0.3700263798236847, "step": 5667 }, { "epoch": 1.5402173913043478, "grad_norm": 1.1456222872002007, "learning_rate": 2.7563674514768755e-06, "loss": 0.3432946801185608, "step": 5668 }, { "epoch": 1.5404891304347825, "grad_norm": 1.285945463612538, "learning_rate": 2.753270539580777e-06, "loss": 0.41076019406318665, "step": 5669 }, { "epoch": 1.5407608695652173, "grad_norm": 1.2324079013943396, "learning_rate": 2.7501750906665603e-06, "loss": 0.4256150722503662, "step": 5670 }, { "epoch": 1.541032608695652, "grad_norm": 1.217729022192474, "learning_rate": 2.747081105359133e-06, "loss": 0.48629024624824524, "step": 5671 }, { "epoch": 1.5413043478260868, "grad_norm": 1.0989584500652625, "learning_rate": 2.7439885842831237e-06, "loss": 0.3762977719306946, "step": 5672 }, { "epoch": 1.5415760869565216, "grad_norm": 1.005739513639174, "learning_rate": 2.7408975280628513e-06, "loss": 0.3286309242248535, "step": 5673 }, { "epoch": 1.5418478260869564, "grad_norm": 1.2929893799266634, "learning_rate": 2.7378079373223364e-06, "loss": 0.4417612552642822, "step": 5674 }, { "epoch": 1.5421195652173914, "grad_norm": 1.1771147227385572, "learning_rate": 2.7347198126853247e-06, "loss": 0.39024168252944946, "step": 5675 }, { "epoch": 1.5423913043478261, "grad_norm": 1.1524592270774945, "learning_rate": 2.7316331547752406e-06, "loss": 0.3494388461112976, "step": 5676 }, { "epoch": 1.5426630434782609, "grad_norm": 1.2884952671603744, "learning_rate": 2.728547964215228e-06, "loss": 0.4085046350955963, "step": 5677 }, { "epoch": 1.5429347826086957, "grad_norm": 1.2051203373755721, "learning_rate": 2.7254642416281306e-06, "loss": 0.44654232263565063, "step": 5678 }, { "epoch": 1.5432065217391304, "grad_norm": 1.1443187756656104, "learning_rate": 2.7223819876364978e-06, "loss": 0.34478360414505005, "step": 5679 }, { "epoch": 1.5434782608695652, "grad_norm": 1.3039143546893381, "learning_rate": 2.719301202862574e-06, "loss": 0.4885460138320923, "step": 5680 }, { "epoch": 1.54375, "grad_norm": 1.1626712891187951, "learning_rate": 2.7162218879283174e-06, "loss": 0.42812323570251465, "step": 5681 }, { "epoch": 1.544021739130435, "grad_norm": 1.3197457507916965, "learning_rate": 2.713144043455388e-06, "loss": 0.41861987113952637, "step": 5682 }, { "epoch": 1.5442934782608697, "grad_norm": 1.1866325762884888, "learning_rate": 2.7100676700651385e-06, "loss": 0.40107202529907227, "step": 5683 }, { "epoch": 1.5445652173913045, "grad_norm": 1.292671364615222, "learning_rate": 2.706992768378637e-06, "loss": 0.5120666027069092, "step": 5684 }, { "epoch": 1.5448369565217392, "grad_norm": 1.2832241038756946, "learning_rate": 2.7039193390166497e-06, "loss": 0.4181162714958191, "step": 5685 }, { "epoch": 1.545108695652174, "grad_norm": 0.9540030858774329, "learning_rate": 2.7008473825996474e-06, "loss": 0.30681905150413513, "step": 5686 }, { "epoch": 1.5453804347826088, "grad_norm": 1.217457143135664, "learning_rate": 2.6977768997477937e-06, "loss": 0.34782835841178894, "step": 5687 }, { "epoch": 1.5456521739130435, "grad_norm": 1.2877315851104822, "learning_rate": 2.6947078910809752e-06, "loss": 0.35867488384246826, "step": 5688 }, { "epoch": 1.5459239130434783, "grad_norm": 1.3299241932206725, "learning_rate": 2.691640357218759e-06, "loss": 0.4432380497455597, "step": 5689 }, { "epoch": 1.546195652173913, "grad_norm": 1.3129703158886248, "learning_rate": 2.6885742987804275e-06, "loss": 0.44401249289512634, "step": 5690 }, { "epoch": 1.5464673913043478, "grad_norm": 1.2662506027748415, "learning_rate": 2.6855097163849642e-06, "loss": 0.4193737506866455, "step": 5691 }, { "epoch": 1.5467391304347826, "grad_norm": 1.3217239659459954, "learning_rate": 2.682446610651045e-06, "loss": 0.44673144817352295, "step": 5692 }, { "epoch": 1.5470108695652174, "grad_norm": 1.1632900886534663, "learning_rate": 2.6793849821970606e-06, "loss": 0.38278305530548096, "step": 5693 }, { "epoch": 1.5472826086956522, "grad_norm": 1.1477219085200785, "learning_rate": 2.676324831641095e-06, "loss": 0.350339412689209, "step": 5694 }, { "epoch": 1.547554347826087, "grad_norm": 1.40257769060587, "learning_rate": 2.673266159600941e-06, "loss": 0.47854259610176086, "step": 5695 }, { "epoch": 1.5478260869565217, "grad_norm": 1.3245743674772243, "learning_rate": 2.670208966694082e-06, "loss": 0.4519600570201874, "step": 5696 }, { "epoch": 1.5480978260869565, "grad_norm": 1.2819935342078332, "learning_rate": 2.6671532535377132e-06, "loss": 0.44426190853118896, "step": 5697 }, { "epoch": 1.5483695652173912, "grad_norm": 1.2790353690418548, "learning_rate": 2.664099020748725e-06, "loss": 0.523400604724884, "step": 5698 }, { "epoch": 1.548641304347826, "grad_norm": 1.354522805391816, "learning_rate": 2.6610462689437155e-06, "loss": 0.4497143626213074, "step": 5699 }, { "epoch": 1.5489130434782608, "grad_norm": 1.4662071341861282, "learning_rate": 2.6579949987389732e-06, "loss": 0.490855872631073, "step": 5700 }, { "epoch": 1.5491847826086955, "grad_norm": 1.1277766604024897, "learning_rate": 2.654945210750497e-06, "loss": 0.31320858001708984, "step": 5701 }, { "epoch": 1.5494565217391303, "grad_norm": 1.2630445170632119, "learning_rate": 2.651896905593985e-06, "loss": 0.4166683554649353, "step": 5702 }, { "epoch": 1.549728260869565, "grad_norm": 1.2431173544812713, "learning_rate": 2.6488500838848284e-06, "loss": 0.3585166931152344, "step": 5703 }, { "epoch": 1.55, "grad_norm": 1.1778508000173264, "learning_rate": 2.64580474623813e-06, "loss": 0.3349170684814453, "step": 5704 }, { "epoch": 1.5502717391304348, "grad_norm": 1.3258764606490825, "learning_rate": 2.642760893268684e-06, "loss": 0.5145168304443359, "step": 5705 }, { "epoch": 1.5505434782608696, "grad_norm": 1.1838506620571767, "learning_rate": 2.639718525590994e-06, "loss": 0.34771329164505005, "step": 5706 }, { "epoch": 1.5508152173913043, "grad_norm": 1.207726135566758, "learning_rate": 2.6366776438192477e-06, "loss": 0.3566966652870178, "step": 5707 }, { "epoch": 1.5510869565217391, "grad_norm": 1.3065455129263646, "learning_rate": 2.6336382485673574e-06, "loss": 0.3906797766685486, "step": 5708 }, { "epoch": 1.5513586956521739, "grad_norm": 1.3830884862334478, "learning_rate": 2.6306003404489146e-06, "loss": 0.46739327907562256, "step": 5709 }, { "epoch": 1.5516304347826086, "grad_norm": 1.3421486652113077, "learning_rate": 2.6275639200772096e-06, "loss": 0.4644995331764221, "step": 5710 }, { "epoch": 1.5519021739130436, "grad_norm": 1.2192188750133577, "learning_rate": 2.6245289880652535e-06, "loss": 0.4464772343635559, "step": 5711 }, { "epoch": 1.5521739130434784, "grad_norm": 1.2527270592438466, "learning_rate": 2.621495545025735e-06, "loss": 0.4041401445865631, "step": 5712 }, { "epoch": 1.5524456521739132, "grad_norm": 1.3776078139192147, "learning_rate": 2.618463591571052e-06, "loss": 0.39325231313705444, "step": 5713 }, { "epoch": 1.552717391304348, "grad_norm": 1.755464984425608, "learning_rate": 2.615433128313303e-06, "loss": 0.4295448064804077, "step": 5714 }, { "epoch": 1.5529891304347827, "grad_norm": 1.109695209240555, "learning_rate": 2.612404155864282e-06, "loss": 0.3505665957927704, "step": 5715 }, { "epoch": 1.5532608695652175, "grad_norm": 1.3533976445738012, "learning_rate": 2.609376674835481e-06, "loss": 0.4184172749519348, "step": 5716 }, { "epoch": 1.5535326086956522, "grad_norm": 0.9056462213242495, "learning_rate": 2.6063506858380937e-06, "loss": 0.25450557470321655, "step": 5717 }, { "epoch": 1.553804347826087, "grad_norm": 1.2328354745139096, "learning_rate": 2.603326189483015e-06, "loss": 0.2758345305919647, "step": 5718 }, { "epoch": 1.5540760869565218, "grad_norm": 1.22640499561568, "learning_rate": 2.6003031863808305e-06, "loss": 0.37010401487350464, "step": 5719 }, { "epoch": 1.5543478260869565, "grad_norm": 1.3878814800723867, "learning_rate": 2.5972816771418306e-06, "loss": 0.4992925524711609, "step": 5720 }, { "epoch": 1.5546195652173913, "grad_norm": 1.3434344174303499, "learning_rate": 2.594261662376003e-06, "loss": 0.4989641308784485, "step": 5721 }, { "epoch": 1.554891304347826, "grad_norm": 1.5992717384262065, "learning_rate": 2.591243142693037e-06, "loss": 0.44294148683547974, "step": 5722 }, { "epoch": 1.5551630434782608, "grad_norm": 1.2346676433898542, "learning_rate": 2.588226118702308e-06, "loss": 0.43485182523727417, "step": 5723 }, { "epoch": 1.5554347826086956, "grad_norm": 1.2157577589759732, "learning_rate": 2.585210591012909e-06, "loss": 0.4010952413082123, "step": 5724 }, { "epoch": 1.5557065217391304, "grad_norm": 1.095743645503469, "learning_rate": 2.582196560233612e-06, "loss": 0.33751463890075684, "step": 5725 }, { "epoch": 1.5559782608695651, "grad_norm": 1.198369071751335, "learning_rate": 2.579184026972892e-06, "loss": 0.3690326511859894, "step": 5726 }, { "epoch": 1.55625, "grad_norm": 1.2439613589885816, "learning_rate": 2.576172991838933e-06, "loss": 0.3828979730606079, "step": 5727 }, { "epoch": 1.5565217391304347, "grad_norm": 1.2464525318214401, "learning_rate": 2.573163455439601e-06, "loss": 0.35030463337898254, "step": 5728 }, { "epoch": 1.5567934782608694, "grad_norm": 1.2561823104089007, "learning_rate": 2.570155418382473e-06, "loss": 0.4083555340766907, "step": 5729 }, { "epoch": 1.5570652173913042, "grad_norm": 1.3361908335744273, "learning_rate": 2.5671488812748047e-06, "loss": 0.45628225803375244, "step": 5730 }, { "epoch": 1.557336956521739, "grad_norm": 1.0552235482291359, "learning_rate": 2.5641438447235745e-06, "loss": 0.39169949293136597, "step": 5731 }, { "epoch": 1.5576086956521737, "grad_norm": 1.295496054413047, "learning_rate": 2.5611403093354347e-06, "loss": 0.4209042489528656, "step": 5732 }, { "epoch": 1.5578804347826087, "grad_norm": 1.1916341403121335, "learning_rate": 2.5581382757167484e-06, "loss": 0.3295481204986572, "step": 5733 }, { "epoch": 1.5581521739130435, "grad_norm": 1.1467482865692236, "learning_rate": 2.5551377444735728e-06, "loss": 0.3673902750015259, "step": 5734 }, { "epoch": 1.5584239130434783, "grad_norm": 1.1686620771138618, "learning_rate": 2.552138716211654e-06, "loss": 0.34807276725769043, "step": 5735 }, { "epoch": 1.558695652173913, "grad_norm": 1.4284445726639514, "learning_rate": 2.549141191536445e-06, "loss": 0.450243204832077, "step": 5736 }, { "epoch": 1.5589673913043478, "grad_norm": 1.141459563839613, "learning_rate": 2.5461451710530905e-06, "loss": 0.38743889331817627, "step": 5737 }, { "epoch": 1.5592391304347826, "grad_norm": 1.4779023323564995, "learning_rate": 2.5431506553664352e-06, "loss": 0.47316133975982666, "step": 5738 }, { "epoch": 1.5595108695652173, "grad_norm": 1.2024168925793428, "learning_rate": 2.5401576450810105e-06, "loss": 0.39965349435806274, "step": 5739 }, { "epoch": 1.5597826086956523, "grad_norm": 1.3721770124185462, "learning_rate": 2.537166140801054e-06, "loss": 0.45282527804374695, "step": 5740 }, { "epoch": 1.560054347826087, "grad_norm": 1.1546434828108267, "learning_rate": 2.5341761431304945e-06, "loss": 0.3740600347518921, "step": 5741 }, { "epoch": 1.5603260869565219, "grad_norm": 1.4491489739903722, "learning_rate": 2.531187652672962e-06, "loss": 0.5160682797431946, "step": 5742 }, { "epoch": 1.5605978260869566, "grad_norm": 1.4290140831880844, "learning_rate": 2.5282006700317717e-06, "loss": 0.44962024688720703, "step": 5743 }, { "epoch": 1.5608695652173914, "grad_norm": 1.355598212788271, "learning_rate": 2.5252151958099436e-06, "loss": 0.45906662940979004, "step": 5744 }, { "epoch": 1.5611413043478262, "grad_norm": 1.2397568611203, "learning_rate": 2.5222312306101925e-06, "loss": 0.4130162298679352, "step": 5745 }, { "epoch": 1.561413043478261, "grad_norm": 1.2226194823761887, "learning_rate": 2.519248775034918e-06, "loss": 0.428400456905365, "step": 5746 }, { "epoch": 1.5616847826086957, "grad_norm": 1.2679120857180248, "learning_rate": 2.516267829686235e-06, "loss": 0.40555840730667114, "step": 5747 }, { "epoch": 1.5619565217391305, "grad_norm": 1.1892321428250028, "learning_rate": 2.513288395165934e-06, "loss": 0.42635852098464966, "step": 5748 }, { "epoch": 1.5622282608695652, "grad_norm": 1.1524517309541027, "learning_rate": 2.510310472075509e-06, "loss": 0.3653263449668884, "step": 5749 }, { "epoch": 1.5625, "grad_norm": 1.212936705654529, "learning_rate": 2.507334061016149e-06, "loss": 0.36242127418518066, "step": 5750 }, { "epoch": 1.5627717391304348, "grad_norm": 1.2593902114892108, "learning_rate": 2.504359162588741e-06, "loss": 0.42735135555267334, "step": 5751 }, { "epoch": 1.5630434782608695, "grad_norm": 1.1299680412473574, "learning_rate": 2.5013857773938545e-06, "loss": 0.36933618783950806, "step": 5752 }, { "epoch": 1.5633152173913043, "grad_norm": 1.3077427614442645, "learning_rate": 2.4984139060317658e-06, "loss": 0.43736666440963745, "step": 5753 }, { "epoch": 1.563586956521739, "grad_norm": 1.2679731861901056, "learning_rate": 2.495443549102444e-06, "loss": 0.4058845043182373, "step": 5754 }, { "epoch": 1.5638586956521738, "grad_norm": 1.1024293327822787, "learning_rate": 2.4924747072055445e-06, "loss": 0.36595597863197327, "step": 5755 }, { "epoch": 1.5641304347826086, "grad_norm": 1.028139801973843, "learning_rate": 2.4895073809404223e-06, "loss": 0.31148049235343933, "step": 5756 }, { "epoch": 1.5644021739130434, "grad_norm": 1.3140939643758618, "learning_rate": 2.48654157090613e-06, "loss": 0.45646899938583374, "step": 5757 }, { "epoch": 1.5646739130434781, "grad_norm": 1.352413323582405, "learning_rate": 2.48357727770141e-06, "loss": 0.4098276197910309, "step": 5758 }, { "epoch": 1.564945652173913, "grad_norm": 1.411958545401816, "learning_rate": 2.4806145019246953e-06, "loss": 0.48404330015182495, "step": 5759 }, { "epoch": 1.5652173913043477, "grad_norm": 1.3167509699769027, "learning_rate": 2.4776532441741176e-06, "loss": 0.46340322494506836, "step": 5760 }, { "epoch": 1.5654891304347827, "grad_norm": 1.2055776850527007, "learning_rate": 2.474693505047504e-06, "loss": 0.3817805051803589, "step": 5761 }, { "epoch": 1.5657608695652174, "grad_norm": 1.3725188944308584, "learning_rate": 2.471735285142366e-06, "loss": 0.5279587507247925, "step": 5762 }, { "epoch": 1.5660326086956522, "grad_norm": 1.0993530967801066, "learning_rate": 2.4687785850559153e-06, "loss": 0.3348439037799835, "step": 5763 }, { "epoch": 1.566304347826087, "grad_norm": 1.2588088461586118, "learning_rate": 2.465823405385057e-06, "loss": 0.42257586121559143, "step": 5764 }, { "epoch": 1.5665760869565217, "grad_norm": 1.3950964775339008, "learning_rate": 2.4628697467263916e-06, "loss": 0.4954386353492737, "step": 5765 }, { "epoch": 1.5668478260869565, "grad_norm": 1.0106599801426286, "learning_rate": 2.459917609676198e-06, "loss": 0.30074864625930786, "step": 5766 }, { "epoch": 1.5671195652173913, "grad_norm": 1.2954494360032751, "learning_rate": 2.45696699483047e-06, "loss": 0.4175713062286377, "step": 5767 }, { "epoch": 1.5673913043478263, "grad_norm": 1.388218594985925, "learning_rate": 2.4540179027848786e-06, "loss": 0.5000905394554138, "step": 5768 }, { "epoch": 1.567663043478261, "grad_norm": 1.2144871295933959, "learning_rate": 2.451070334134784e-06, "loss": 0.4268547296524048, "step": 5769 }, { "epoch": 1.5679347826086958, "grad_norm": 1.4968866065192372, "learning_rate": 2.448124289475259e-06, "loss": 0.43282729387283325, "step": 5770 }, { "epoch": 1.5682065217391306, "grad_norm": 1.246020186836564, "learning_rate": 2.445179769401047e-06, "loss": 0.38298606872558594, "step": 5771 }, { "epoch": 1.5684782608695653, "grad_norm": 1.2915457047139742, "learning_rate": 2.4422367745065955e-06, "loss": 0.4061438739299774, "step": 5772 }, { "epoch": 1.56875, "grad_norm": 1.0673603458717942, "learning_rate": 2.4392953053860415e-06, "loss": 0.32781463861465454, "step": 5773 }, { "epoch": 1.5690217391304349, "grad_norm": 1.2245828391688367, "learning_rate": 2.4363553626332157e-06, "loss": 0.37129437923431396, "step": 5774 }, { "epoch": 1.5692934782608696, "grad_norm": 1.258901307487102, "learning_rate": 2.4334169468416337e-06, "loss": 0.4222143292427063, "step": 5775 }, { "epoch": 1.5695652173913044, "grad_norm": 1.1224072337649278, "learning_rate": 2.4304800586045106e-06, "loss": 0.34964337944984436, "step": 5776 }, { "epoch": 1.5698369565217392, "grad_norm": 1.3563903807483366, "learning_rate": 2.427544698514753e-06, "loss": 0.39911386370658875, "step": 5777 }, { "epoch": 1.570108695652174, "grad_norm": 1.5661212864620575, "learning_rate": 2.4246108671649514e-06, "loss": 0.4938831329345703, "step": 5778 }, { "epoch": 1.5703804347826087, "grad_norm": 1.3287564687614077, "learning_rate": 2.421678565147394e-06, "loss": 0.3785012364387512, "step": 5779 }, { "epoch": 1.5706521739130435, "grad_norm": 1.2394008201520856, "learning_rate": 2.4187477930540605e-06, "loss": 0.41823840141296387, "step": 5780 }, { "epoch": 1.5709239130434782, "grad_norm": 1.1397545718968736, "learning_rate": 2.4158185514766206e-06, "loss": 0.38932836055755615, "step": 5781 }, { "epoch": 1.571195652173913, "grad_norm": 1.3504353012258972, "learning_rate": 2.4128908410064276e-06, "loss": 0.4371057152748108, "step": 5782 }, { "epoch": 1.5714673913043478, "grad_norm": 1.2497745481207743, "learning_rate": 2.4099646622345453e-06, "loss": 0.4219608008861542, "step": 5783 }, { "epoch": 1.5717391304347825, "grad_norm": 1.294704694268601, "learning_rate": 2.4070400157517036e-06, "loss": 0.45646440982818604, "step": 5784 }, { "epoch": 1.5720108695652173, "grad_norm": 1.1066198917336878, "learning_rate": 2.4041169021483414e-06, "loss": 0.3518408536911011, "step": 5785 }, { "epoch": 1.572282608695652, "grad_norm": 1.3903534582668637, "learning_rate": 2.401195322014581e-06, "loss": 0.44622206687927246, "step": 5786 }, { "epoch": 1.5725543478260868, "grad_norm": 1.4685362033973757, "learning_rate": 2.398275275940234e-06, "loss": 0.4312310516834259, "step": 5787 }, { "epoch": 1.5728260869565216, "grad_norm": 1.1895061197300172, "learning_rate": 2.3953567645148044e-06, "loss": 0.4068242609500885, "step": 5788 }, { "epoch": 1.5730978260869564, "grad_norm": 1.1757911947729707, "learning_rate": 2.3924397883274863e-06, "loss": 0.36795032024383545, "step": 5789 }, { "epoch": 1.5733695652173914, "grad_norm": 1.238192314087365, "learning_rate": 2.389524347967168e-06, "loss": 0.45925790071487427, "step": 5790 }, { "epoch": 1.5736413043478261, "grad_norm": 1.2710687718121527, "learning_rate": 2.386610444022417e-06, "loss": 0.42635124921798706, "step": 5791 }, { "epoch": 1.5739130434782609, "grad_norm": 1.0910491837555965, "learning_rate": 2.3836980770815e-06, "loss": 0.33780479431152344, "step": 5792 }, { "epoch": 1.5741847826086957, "grad_norm": 1.3137060518759933, "learning_rate": 2.3807872477323736e-06, "loss": 0.43134385347366333, "step": 5793 }, { "epoch": 1.5744565217391304, "grad_norm": 1.413167068923047, "learning_rate": 2.3778779565626753e-06, "loss": 0.46247661113739014, "step": 5794 }, { "epoch": 1.5747282608695652, "grad_norm": 1.2714954381071109, "learning_rate": 2.374970204159739e-06, "loss": 0.4540225863456726, "step": 5795 }, { "epoch": 1.575, "grad_norm": 1.1582568105596365, "learning_rate": 2.3720639911105882e-06, "loss": 0.3465965986251831, "step": 5796 }, { "epoch": 1.575271739130435, "grad_norm": 1.1037419997148852, "learning_rate": 2.369159318001937e-06, "loss": 0.30156540870666504, "step": 5797 }, { "epoch": 1.5755434782608697, "grad_norm": 1.3928872865139195, "learning_rate": 2.3662561854201795e-06, "loss": 0.41178035736083984, "step": 5798 }, { "epoch": 1.5758152173913045, "grad_norm": 1.3370437394312864, "learning_rate": 2.363354593951407e-06, "loss": 0.5101302862167358, "step": 5799 }, { "epoch": 1.5760869565217392, "grad_norm": 1.3546627420058404, "learning_rate": 2.3604545441813997e-06, "loss": 0.40726733207702637, "step": 5800 }, { "epoch": 1.576358695652174, "grad_norm": 1.272768015782484, "learning_rate": 2.3575560366956252e-06, "loss": 0.4142055809497833, "step": 5801 }, { "epoch": 1.5766304347826088, "grad_norm": 1.3630313852513258, "learning_rate": 2.354659072079234e-06, "loss": 0.4896509647369385, "step": 5802 }, { "epoch": 1.5769021739130435, "grad_norm": 1.4456165210873693, "learning_rate": 2.351763650917074e-06, "loss": 0.5386984944343567, "step": 5803 }, { "epoch": 1.5771739130434783, "grad_norm": 1.2675304322965817, "learning_rate": 2.3488697737936796e-06, "loss": 0.4040870666503906, "step": 5804 }, { "epoch": 1.577445652173913, "grad_norm": 1.3983594184661476, "learning_rate": 2.3459774412932634e-06, "loss": 0.4917982518672943, "step": 5805 }, { "epoch": 1.5777173913043478, "grad_norm": 1.3024685490711363, "learning_rate": 2.3430866539997456e-06, "loss": 0.4669269025325775, "step": 5806 }, { "epoch": 1.5779891304347826, "grad_norm": 1.1490219795856547, "learning_rate": 2.340197412496714e-06, "loss": 0.4013846516609192, "step": 5807 }, { "epoch": 1.5782608695652174, "grad_norm": 1.312689153310732, "learning_rate": 2.3373097173674573e-06, "loss": 0.4643843173980713, "step": 5808 }, { "epoch": 1.5785326086956522, "grad_norm": 1.2977352128024913, "learning_rate": 2.334423569194948e-06, "loss": 0.4607933461666107, "step": 5809 }, { "epoch": 1.578804347826087, "grad_norm": 1.1376048241563632, "learning_rate": 2.3315389685618483e-06, "loss": 0.4200019836425781, "step": 5810 }, { "epoch": 1.5790760869565217, "grad_norm": 1.2990350882190804, "learning_rate": 2.328655916050503e-06, "loss": 0.4669261574745178, "step": 5811 }, { "epoch": 1.5793478260869565, "grad_norm": 1.3206501682929022, "learning_rate": 2.325774412242947e-06, "loss": 0.41777873039245605, "step": 5812 }, { "epoch": 1.5796195652173912, "grad_norm": 1.4482220434025328, "learning_rate": 2.322894457720909e-06, "loss": 0.48250818252563477, "step": 5813 }, { "epoch": 1.579891304347826, "grad_norm": 1.3801533935460306, "learning_rate": 2.3200160530657924e-06, "loss": 0.4376501441001892, "step": 5814 }, { "epoch": 1.5801630434782608, "grad_norm": 1.3254705668128643, "learning_rate": 2.3171391988586965e-06, "loss": 0.425070583820343, "step": 5815 }, { "epoch": 1.5804347826086955, "grad_norm": 1.2929893634736698, "learning_rate": 2.3142638956804063e-06, "loss": 0.41141068935394287, "step": 5816 }, { "epoch": 1.5807065217391303, "grad_norm": 1.2082442050300537, "learning_rate": 2.3113901441113953e-06, "loss": 0.35528117418289185, "step": 5817 }, { "epoch": 1.580978260869565, "grad_norm": 1.1868391302430235, "learning_rate": 2.3085179447318118e-06, "loss": 0.38347750902175903, "step": 5818 }, { "epoch": 1.58125, "grad_norm": 0.9942470766367769, "learning_rate": 2.3056472981215126e-06, "loss": 0.2886614501476288, "step": 5819 }, { "epoch": 1.5815217391304348, "grad_norm": 1.2330850348834976, "learning_rate": 2.3027782048600247e-06, "loss": 0.3816852867603302, "step": 5820 }, { "epoch": 1.5817934782608696, "grad_norm": 1.083265848590981, "learning_rate": 2.2999106655265557e-06, "loss": 0.30105215311050415, "step": 5821 }, { "epoch": 1.5820652173913043, "grad_norm": 1.1410541844813675, "learning_rate": 2.2970446807000237e-06, "loss": 0.3368958830833435, "step": 5822 }, { "epoch": 1.5823369565217391, "grad_norm": 1.6324819338198824, "learning_rate": 2.2941802509590096e-06, "loss": 0.512356698513031, "step": 5823 }, { "epoch": 1.5826086956521739, "grad_norm": 1.4052338791822174, "learning_rate": 2.2913173768817942e-06, "loss": 0.488578200340271, "step": 5824 }, { "epoch": 1.5828804347826086, "grad_norm": 1.1678298398443412, "learning_rate": 2.288456059046331e-06, "loss": 0.36832669377326965, "step": 5825 }, { "epoch": 1.5831521739130436, "grad_norm": 1.2094714709070538, "learning_rate": 2.2855962980302794e-06, "loss": 0.3634375333786011, "step": 5826 }, { "epoch": 1.5834239130434784, "grad_norm": 1.1802210405402536, "learning_rate": 2.2827380944109635e-06, "loss": 0.3509398400783539, "step": 5827 }, { "epoch": 1.5836956521739132, "grad_norm": 1.0474137236686853, "learning_rate": 2.2798814487654065e-06, "loss": 0.3304743468761444, "step": 5828 }, { "epoch": 1.583967391304348, "grad_norm": 1.3667698234729813, "learning_rate": 2.2770263616703137e-06, "loss": 0.4707241654396057, "step": 5829 }, { "epoch": 1.5842391304347827, "grad_norm": 1.4320964177091362, "learning_rate": 2.27417283370207e-06, "loss": 0.48919230699539185, "step": 5830 }, { "epoch": 1.5845108695652175, "grad_norm": 1.2102022187811858, "learning_rate": 2.2713208654367534e-06, "loss": 0.4206088185310364, "step": 5831 }, { "epoch": 1.5847826086956522, "grad_norm": 1.4972898720753987, "learning_rate": 2.268470457450125e-06, "loss": 0.44041013717651367, "step": 5832 }, { "epoch": 1.585054347826087, "grad_norm": 1.2709216560227372, "learning_rate": 2.265621610317631e-06, "loss": 0.5028864145278931, "step": 5833 }, { "epoch": 1.5853260869565218, "grad_norm": 1.1391621446425535, "learning_rate": 2.2627743246143964e-06, "loss": 0.3667675256729126, "step": 5834 }, { "epoch": 1.5855978260869565, "grad_norm": 1.2820585094270966, "learning_rate": 2.2599286009152387e-06, "loss": 0.37794357538223267, "step": 5835 }, { "epoch": 1.5858695652173913, "grad_norm": 1.4246278851707828, "learning_rate": 2.2570844397946613e-06, "loss": 0.45103955268859863, "step": 5836 }, { "epoch": 1.586141304347826, "grad_norm": 1.2141241627444654, "learning_rate": 2.254241841826841e-06, "loss": 0.3619440197944641, "step": 5837 }, { "epoch": 1.5864130434782608, "grad_norm": 1.4812114864422232, "learning_rate": 2.2514008075856507e-06, "loss": 0.5204609632492065, "step": 5838 }, { "epoch": 1.5866847826086956, "grad_norm": 1.340903352979805, "learning_rate": 2.2485613376446414e-06, "loss": 0.4143804609775543, "step": 5839 }, { "epoch": 1.5869565217391304, "grad_norm": 1.3640933972705067, "learning_rate": 2.2457234325770537e-06, "loss": 0.4339088797569275, "step": 5840 }, { "epoch": 1.5872282608695651, "grad_norm": 1.3276039808285318, "learning_rate": 2.2428870929558012e-06, "loss": 0.372394323348999, "step": 5841 }, { "epoch": 1.5875, "grad_norm": 1.0665657403063609, "learning_rate": 2.2400523193534985e-06, "loss": 0.4017717242240906, "step": 5842 }, { "epoch": 1.5877717391304347, "grad_norm": 1.1897334312368606, "learning_rate": 2.237219112342426e-06, "loss": 0.43223148584365845, "step": 5843 }, { "epoch": 1.5880434782608694, "grad_norm": 1.3119417137877125, "learning_rate": 2.23438747249456e-06, "loss": 0.47962135076522827, "step": 5844 }, { "epoch": 1.5883152173913042, "grad_norm": 1.2403980019549887, "learning_rate": 2.23155740038156e-06, "loss": 0.37238600850105286, "step": 5845 }, { "epoch": 1.588586956521739, "grad_norm": 1.3652316622267955, "learning_rate": 2.228728896574759e-06, "loss": 0.48719918727874756, "step": 5846 }, { "epoch": 1.5888586956521737, "grad_norm": 1.2357731482741336, "learning_rate": 2.225901961645183e-06, "loss": 0.4221993088722229, "step": 5847 }, { "epoch": 1.5891304347826087, "grad_norm": 1.181036856443671, "learning_rate": 2.223076596163538e-06, "loss": 0.33668577671051025, "step": 5848 }, { "epoch": 1.5894021739130435, "grad_norm": 1.333784987199622, "learning_rate": 2.2202528007002176e-06, "loss": 0.46142813563346863, "step": 5849 }, { "epoch": 1.5896739130434783, "grad_norm": 1.43651087317056, "learning_rate": 2.217430575825289e-06, "loss": 0.4213520884513855, "step": 5850 }, { "epoch": 1.589945652173913, "grad_norm": 1.3426116973238718, "learning_rate": 2.214609922108508e-06, "loss": 0.49366337060928345, "step": 5851 }, { "epoch": 1.5902173913043478, "grad_norm": 1.1795163233755068, "learning_rate": 2.2117908401193143e-06, "loss": 0.34452253580093384, "step": 5852 }, { "epoch": 1.5904891304347826, "grad_norm": 1.246992814237485, "learning_rate": 2.2089733304268334e-06, "loss": 0.4076848328113556, "step": 5853 }, { "epoch": 1.5907608695652173, "grad_norm": 1.4436177086895554, "learning_rate": 2.2061573935998603e-06, "loss": 0.41041332483291626, "step": 5854 }, { "epoch": 1.5910326086956523, "grad_norm": 1.2270902485832815, "learning_rate": 2.203343030206886e-06, "loss": 0.39001235365867615, "step": 5855 }, { "epoch": 1.591304347826087, "grad_norm": 1.0899869057788691, "learning_rate": 2.20053024081608e-06, "loss": 0.34725314378738403, "step": 5856 }, { "epoch": 1.5915760869565219, "grad_norm": 1.1650329494224256, "learning_rate": 2.1977190259952887e-06, "loss": 0.39505574107170105, "step": 5857 }, { "epoch": 1.5918478260869566, "grad_norm": 1.2382895816202475, "learning_rate": 2.1949093863120473e-06, "loss": 0.45191580057144165, "step": 5858 }, { "epoch": 1.5921195652173914, "grad_norm": 1.1229963911966765, "learning_rate": 2.1921013223335705e-06, "loss": 0.34332382678985596, "step": 5859 }, { "epoch": 1.5923913043478262, "grad_norm": 1.2178090271577775, "learning_rate": 2.1892948346267583e-06, "loss": 0.36941763758659363, "step": 5860 }, { "epoch": 1.592663043478261, "grad_norm": 1.4048092368384213, "learning_rate": 2.1864899237581793e-06, "loss": 0.47007185220718384, "step": 5861 }, { "epoch": 1.5929347826086957, "grad_norm": 1.2713573519797887, "learning_rate": 2.183686590294106e-06, "loss": 0.43014365434646606, "step": 5862 }, { "epoch": 1.5932065217391305, "grad_norm": 1.32419537786232, "learning_rate": 2.180884834800474e-06, "loss": 0.4467531442642212, "step": 5863 }, { "epoch": 1.5934782608695652, "grad_norm": 1.1090150144357875, "learning_rate": 2.1780846578429016e-06, "loss": 0.33662718534469604, "step": 5864 }, { "epoch": 1.59375, "grad_norm": 1.2988379375095775, "learning_rate": 2.1752860599867033e-06, "loss": 0.3942842185497284, "step": 5865 }, { "epoch": 1.5940217391304348, "grad_norm": 1.1581895412451206, "learning_rate": 2.172489041796856e-06, "loss": 0.36617231369018555, "step": 5866 }, { "epoch": 1.5942934782608695, "grad_norm": 1.2669775824918177, "learning_rate": 2.169693603838031e-06, "loss": 0.41508352756500244, "step": 5867 }, { "epoch": 1.5945652173913043, "grad_norm": 1.322129280045516, "learning_rate": 2.1668997466745746e-06, "loss": 0.4327709674835205, "step": 5868 }, { "epoch": 1.594836956521739, "grad_norm": 1.2938391571067624, "learning_rate": 2.1641074708705177e-06, "loss": 0.4634144604206085, "step": 5869 }, { "epoch": 1.5951086956521738, "grad_norm": 1.304540249785207, "learning_rate": 2.161316776989566e-06, "loss": 0.4579671621322632, "step": 5870 }, { "epoch": 1.5953804347826086, "grad_norm": 1.440819095993718, "learning_rate": 2.1585276655951106e-06, "loss": 0.45362940430641174, "step": 5871 }, { "epoch": 1.5956521739130434, "grad_norm": 1.2265038744745085, "learning_rate": 2.1557401372502264e-06, "loss": 0.3760088086128235, "step": 5872 }, { "epoch": 1.5959239130434781, "grad_norm": 1.2512455393590942, "learning_rate": 2.1529541925176555e-06, "loss": 0.3829965889453888, "step": 5873 }, { "epoch": 1.596195652173913, "grad_norm": 1.331088950531237, "learning_rate": 2.1501698319598354e-06, "loss": 0.4672010540962219, "step": 5874 }, { "epoch": 1.5964673913043477, "grad_norm": 1.1693720977301767, "learning_rate": 2.1473870561388766e-06, "loss": 0.37620460987091064, "step": 5875 }, { "epoch": 1.5967391304347827, "grad_norm": 1.0915655255826502, "learning_rate": 2.1446058656165716e-06, "loss": 0.30564063787460327, "step": 5876 }, { "epoch": 1.5970108695652174, "grad_norm": 1.2875071447071276, "learning_rate": 2.141826260954385e-06, "loss": 0.3651844561100006, "step": 5877 }, { "epoch": 1.5972826086956522, "grad_norm": 1.251296792041313, "learning_rate": 2.1390482427134784e-06, "loss": 0.4251185357570648, "step": 5878 }, { "epoch": 1.597554347826087, "grad_norm": 1.3712347833847771, "learning_rate": 2.1362718114546777e-06, "loss": 0.4842654764652252, "step": 5879 }, { "epoch": 1.5978260869565217, "grad_norm": 1.3834815363527537, "learning_rate": 2.1334969677384887e-06, "loss": 0.4788033366203308, "step": 5880 }, { "epoch": 1.5980978260869565, "grad_norm": 1.141517039718135, "learning_rate": 2.1307237121251112e-06, "loss": 0.34458184242248535, "step": 5881 }, { "epoch": 1.5983695652173913, "grad_norm": 1.2132573569943457, "learning_rate": 2.1279520451744064e-06, "loss": 0.4059735834598541, "step": 5882 }, { "epoch": 1.5986413043478263, "grad_norm": 1.27167229049171, "learning_rate": 2.1251819674459263e-06, "loss": 0.41602593660354614, "step": 5883 }, { "epoch": 1.598913043478261, "grad_norm": 1.234641390611055, "learning_rate": 2.122413479498899e-06, "loss": 0.3720475435256958, "step": 5884 }, { "epoch": 1.5991847826086958, "grad_norm": 1.221566055931738, "learning_rate": 2.119646581892235e-06, "loss": 0.3869386315345764, "step": 5885 }, { "epoch": 1.5994565217391306, "grad_norm": 1.1819261011959665, "learning_rate": 2.1168812751845134e-06, "loss": 0.3699384927749634, "step": 5886 }, { "epoch": 1.5997282608695653, "grad_norm": 1.280213629679956, "learning_rate": 2.1141175599340026e-06, "loss": 0.4373124837875366, "step": 5887 }, { "epoch": 1.6, "grad_norm": 1.2781233383006785, "learning_rate": 2.1113554366986467e-06, "loss": 0.39883798360824585, "step": 5888 }, { "epoch": 1.6002717391304349, "grad_norm": 1.1262717985444246, "learning_rate": 2.1085949060360654e-06, "loss": 0.355920672416687, "step": 5889 }, { "epoch": 1.6005434782608696, "grad_norm": 1.1971737331275643, "learning_rate": 2.10583596850356e-06, "loss": 0.3745504915714264, "step": 5890 }, { "epoch": 1.6008152173913044, "grad_norm": 1.2282779185586339, "learning_rate": 2.1030786246581105e-06, "loss": 0.397807776927948, "step": 5891 }, { "epoch": 1.6010869565217392, "grad_norm": 1.3666310241705115, "learning_rate": 2.1003228750563752e-06, "loss": 0.4578159749507904, "step": 5892 }, { "epoch": 1.601358695652174, "grad_norm": 1.2910932145570428, "learning_rate": 2.0975687202546855e-06, "loss": 0.4868933856487274, "step": 5893 }, { "epoch": 1.6016304347826087, "grad_norm": 1.5275888586902364, "learning_rate": 2.094816160809058e-06, "loss": 0.48695623874664307, "step": 5894 }, { "epoch": 1.6019021739130435, "grad_norm": 1.4229589600790655, "learning_rate": 2.0920651972751816e-06, "loss": 0.4129268527030945, "step": 5895 }, { "epoch": 1.6021739130434782, "grad_norm": 1.2848211000429126, "learning_rate": 2.0893158302084303e-06, "loss": 0.4592401385307312, "step": 5896 }, { "epoch": 1.602445652173913, "grad_norm": 1.2536688322566096, "learning_rate": 2.086568060163845e-06, "loss": 0.35986602306365967, "step": 5897 }, { "epoch": 1.6027173913043478, "grad_norm": 1.3013905318291281, "learning_rate": 2.0838218876961524e-06, "loss": 0.3994976282119751, "step": 5898 }, { "epoch": 1.6029891304347825, "grad_norm": 1.223350476301137, "learning_rate": 2.0810773133597574e-06, "loss": 0.3919108211994171, "step": 5899 }, { "epoch": 1.6032608695652173, "grad_norm": 1.2273649648214273, "learning_rate": 2.0783343377087307e-06, "loss": 0.37787503004074097, "step": 5900 }, { "epoch": 1.603532608695652, "grad_norm": 1.447178328465096, "learning_rate": 2.0755929612968404e-06, "loss": 0.44869786500930786, "step": 5901 }, { "epoch": 1.6038043478260868, "grad_norm": 1.2773399336836222, "learning_rate": 2.0728531846775113e-06, "loss": 0.4182931184768677, "step": 5902 }, { "epoch": 1.6040760869565216, "grad_norm": 1.2537276236119992, "learning_rate": 2.070115008403858e-06, "loss": 0.4054381549358368, "step": 5903 }, { "epoch": 1.6043478260869564, "grad_norm": 1.326774621156748, "learning_rate": 2.0673784330286673e-06, "loss": 0.4469253420829773, "step": 5904 }, { "epoch": 1.6046195652173914, "grad_norm": 1.2643841491836456, "learning_rate": 2.064643459104405e-06, "loss": 0.4009609818458557, "step": 5905 }, { "epoch": 1.6048913043478261, "grad_norm": 1.3468824142422742, "learning_rate": 2.06191008718321e-06, "loss": 0.4222134053707123, "step": 5906 }, { "epoch": 1.6051630434782609, "grad_norm": 1.2157113116640577, "learning_rate": 2.059178317816899e-06, "loss": 0.3699093461036682, "step": 5907 }, { "epoch": 1.6054347826086957, "grad_norm": 1.3339387053988163, "learning_rate": 2.0564481515569714e-06, "loss": 0.4401013255119324, "step": 5908 }, { "epoch": 1.6057065217391304, "grad_norm": 1.3660357835117674, "learning_rate": 2.0537195889545924e-06, "loss": 0.4420205354690552, "step": 5909 }, { "epoch": 1.6059782608695652, "grad_norm": 1.2811124804712253, "learning_rate": 2.0509926305606097e-06, "loss": 0.4172881841659546, "step": 5910 }, { "epoch": 1.60625, "grad_norm": 1.2762080432416538, "learning_rate": 2.0482672769255472e-06, "loss": 0.3873690366744995, "step": 5911 }, { "epoch": 1.606521739130435, "grad_norm": 1.3097147179565851, "learning_rate": 2.045543528599607e-06, "loss": 0.3902163505554199, "step": 5912 }, { "epoch": 1.6067934782608697, "grad_norm": 1.2072590764901039, "learning_rate": 2.0428213861326585e-06, "loss": 0.3474184274673462, "step": 5913 }, { "epoch": 1.6070652173913045, "grad_norm": 1.335504777975144, "learning_rate": 2.040100850074255e-06, "loss": 0.41521355509757996, "step": 5914 }, { "epoch": 1.6073369565217392, "grad_norm": 1.2156508973491764, "learning_rate": 2.0373819209736244e-06, "loss": 0.36532875895500183, "step": 5915 }, { "epoch": 1.607608695652174, "grad_norm": 1.0764978717318328, "learning_rate": 2.034664599379662e-06, "loss": 0.3238353133201599, "step": 5916 }, { "epoch": 1.6078804347826088, "grad_norm": 1.371497475608441, "learning_rate": 2.0319488858409552e-06, "loss": 0.4293394684791565, "step": 5917 }, { "epoch": 1.6081521739130435, "grad_norm": 1.1551872337541103, "learning_rate": 2.029234780905749e-06, "loss": 0.3913882374763489, "step": 5918 }, { "epoch": 1.6084239130434783, "grad_norm": 1.383596077289645, "learning_rate": 2.0265222851219747e-06, "loss": 0.4333352744579315, "step": 5919 }, { "epoch": 1.608695652173913, "grad_norm": 1.3310318611101217, "learning_rate": 2.0238113990372343e-06, "loss": 0.43604838848114014, "step": 5920 }, { "epoch": 1.6089673913043478, "grad_norm": 1.1910138453920989, "learning_rate": 2.0211021231988103e-06, "loss": 0.37358206510543823, "step": 5921 }, { "epoch": 1.6092391304347826, "grad_norm": 1.1536572635057452, "learning_rate": 2.018394458153652e-06, "loss": 0.32994866371154785, "step": 5922 }, { "epoch": 1.6095108695652174, "grad_norm": 1.3058148220641381, "learning_rate": 2.0156884044483814e-06, "loss": 0.4128517508506775, "step": 5923 }, { "epoch": 1.6097826086956522, "grad_norm": 1.3375927430832688, "learning_rate": 2.0129839626293134e-06, "loss": 0.4788677394390106, "step": 5924 }, { "epoch": 1.610054347826087, "grad_norm": 1.2743215936347192, "learning_rate": 2.0102811332424167e-06, "loss": 0.38006263971328735, "step": 5925 }, { "epoch": 1.6103260869565217, "grad_norm": 1.205150338803177, "learning_rate": 2.007579916833344e-06, "loss": 0.45426803827285767, "step": 5926 }, { "epoch": 1.6105978260869565, "grad_norm": 1.0634428441211892, "learning_rate": 2.0048803139474247e-06, "loss": 0.3051326274871826, "step": 5927 }, { "epoch": 1.6108695652173912, "grad_norm": 1.0807607674779625, "learning_rate": 2.002182325129659e-06, "loss": 0.36549633741378784, "step": 5928 }, { "epoch": 1.611141304347826, "grad_norm": 1.238658726432824, "learning_rate": 1.999485950924718e-06, "loss": 0.4213656783103943, "step": 5929 }, { "epoch": 1.6114130434782608, "grad_norm": 1.3547303883201514, "learning_rate": 1.996791191876951e-06, "loss": 0.4612373113632202, "step": 5930 }, { "epoch": 1.6116847826086955, "grad_norm": 1.2904052763118752, "learning_rate": 1.994098048530385e-06, "loss": 0.40144047141075134, "step": 5931 }, { "epoch": 1.6119565217391303, "grad_norm": 1.337127922860454, "learning_rate": 1.9914065214287104e-06, "loss": 0.4007454216480255, "step": 5932 }, { "epoch": 1.612228260869565, "grad_norm": 1.2375255157131078, "learning_rate": 1.988716611115299e-06, "loss": 0.39096975326538086, "step": 5933 }, { "epoch": 1.6125, "grad_norm": 1.2814638932952274, "learning_rate": 1.986028318133194e-06, "loss": 0.42611101269721985, "step": 5934 }, { "epoch": 1.6127717391304348, "grad_norm": 1.3765207605589271, "learning_rate": 1.983341643025117e-06, "loss": 0.4311988949775696, "step": 5935 }, { "epoch": 1.6130434782608696, "grad_norm": 1.1949357265861718, "learning_rate": 1.980656586333449e-06, "loss": 0.36961787939071655, "step": 5936 }, { "epoch": 1.6133152173913043, "grad_norm": 1.009826804833282, "learning_rate": 1.9779731486002664e-06, "loss": 0.343711793422699, "step": 5937 }, { "epoch": 1.6135869565217391, "grad_norm": 1.3390947246755935, "learning_rate": 1.975291330367296e-06, "loss": 0.4636226296424866, "step": 5938 }, { "epoch": 1.6138586956521739, "grad_norm": 1.300315056620844, "learning_rate": 1.97261113217595e-06, "loss": 0.41281819343566895, "step": 5939 }, { "epoch": 1.6141304347826086, "grad_norm": 1.1571826535471892, "learning_rate": 1.9699325545673165e-06, "loss": 0.33962905406951904, "step": 5940 }, { "epoch": 1.6144021739130436, "grad_norm": 1.132157718958285, "learning_rate": 1.967255598082144e-06, "loss": 0.32890963554382324, "step": 5941 }, { "epoch": 1.6146739130434784, "grad_norm": 1.3353192189651404, "learning_rate": 1.9645802632608636e-06, "loss": 0.45522522926330566, "step": 5942 }, { "epoch": 1.6149456521739132, "grad_norm": 1.146189071070244, "learning_rate": 1.9619065506435774e-06, "loss": 0.34795743227005005, "step": 5943 }, { "epoch": 1.615217391304348, "grad_norm": 1.4170525734942685, "learning_rate": 1.959234460770061e-06, "loss": 0.4959564507007599, "step": 5944 }, { "epoch": 1.6154891304347827, "grad_norm": 1.5097318278232774, "learning_rate": 1.9565639941797553e-06, "loss": 0.5525280833244324, "step": 5945 }, { "epoch": 1.6157608695652175, "grad_norm": 1.188795443960301, "learning_rate": 1.953895151411781e-06, "loss": 0.37823593616485596, "step": 5946 }, { "epoch": 1.6160326086956522, "grad_norm": 1.290114373780425, "learning_rate": 1.951227933004929e-06, "loss": 0.4113519489765167, "step": 5947 }, { "epoch": 1.616304347826087, "grad_norm": 1.4291053377208607, "learning_rate": 1.9485623394976638e-06, "loss": 0.5186833143234253, "step": 5948 }, { "epoch": 1.6165760869565218, "grad_norm": 1.1288531344544726, "learning_rate": 1.9458983714281154e-06, "loss": 0.35896551609039307, "step": 5949 }, { "epoch": 1.6168478260869565, "grad_norm": 1.190233136241936, "learning_rate": 1.9432360293340914e-06, "loss": 0.3831070065498352, "step": 5950 }, { "epoch": 1.6171195652173913, "grad_norm": 1.2800544804221428, "learning_rate": 1.9405753137530735e-06, "loss": 0.3979293704032898, "step": 5951 }, { "epoch": 1.617391304347826, "grad_norm": 0.9863642153549453, "learning_rate": 1.937916225222204e-06, "loss": 0.30662161111831665, "step": 5952 }, { "epoch": 1.6176630434782608, "grad_norm": 1.1566143421731832, "learning_rate": 1.9352587642783137e-06, "loss": 0.3586989641189575, "step": 5953 }, { "epoch": 1.6179347826086956, "grad_norm": 1.2035869879388572, "learning_rate": 1.9326029314578887e-06, "loss": 0.3804064393043518, "step": 5954 }, { "epoch": 1.6182065217391304, "grad_norm": 1.3411698608518579, "learning_rate": 1.929948727297096e-06, "loss": 0.4113002121448517, "step": 5955 }, { "epoch": 1.6184782608695651, "grad_norm": 1.2450903712845087, "learning_rate": 1.9272961523317677e-06, "loss": 0.47853079438209534, "step": 5956 }, { "epoch": 1.61875, "grad_norm": 1.2121369607564427, "learning_rate": 1.924645207097412e-06, "loss": 0.3969460129737854, "step": 5957 }, { "epoch": 1.6190217391304347, "grad_norm": 1.083191030370224, "learning_rate": 1.921995892129208e-06, "loss": 0.3321917951107025, "step": 5958 }, { "epoch": 1.6192934782608694, "grad_norm": 1.2488158248612158, "learning_rate": 1.919348207961996e-06, "loss": 0.42126452922821045, "step": 5959 }, { "epoch": 1.6195652173913042, "grad_norm": 1.1442864174770542, "learning_rate": 1.916702155130307e-06, "loss": 0.34822729229927063, "step": 5960 }, { "epoch": 1.619836956521739, "grad_norm": 1.3869962353166847, "learning_rate": 1.9140577341683218e-06, "loss": 0.4438280463218689, "step": 5961 }, { "epoch": 1.6201086956521737, "grad_norm": 1.200962122510764, "learning_rate": 1.911414945609903e-06, "loss": 0.3660414218902588, "step": 5962 }, { "epoch": 1.6203804347826087, "grad_norm": 1.2752563610588785, "learning_rate": 1.9087737899885806e-06, "loss": 0.4036870300769806, "step": 5963 }, { "epoch": 1.6206521739130435, "grad_norm": 1.0956829101796657, "learning_rate": 1.9061342678375595e-06, "loss": 0.31084322929382324, "step": 5964 }, { "epoch": 1.6209239130434783, "grad_norm": 1.1800481410391483, "learning_rate": 1.9034963796897044e-06, "loss": 0.3934238851070404, "step": 5965 }, { "epoch": 1.621195652173913, "grad_norm": 1.1734776640018745, "learning_rate": 1.9008601260775595e-06, "loss": 0.37504398822784424, "step": 5966 }, { "epoch": 1.6214673913043478, "grad_norm": 1.0783747351497452, "learning_rate": 1.898225507533339e-06, "loss": 0.33489173650741577, "step": 5967 }, { "epoch": 1.6217391304347826, "grad_norm": 1.2021130278499093, "learning_rate": 1.8955925245889194e-06, "loss": 0.35207927227020264, "step": 5968 }, { "epoch": 1.6220108695652173, "grad_norm": 1.2894582596434185, "learning_rate": 1.8929611777758528e-06, "loss": 0.3883925974369049, "step": 5969 }, { "epoch": 1.6222826086956523, "grad_norm": 1.4676939445642356, "learning_rate": 1.8903314676253615e-06, "loss": 0.4569198489189148, "step": 5970 }, { "epoch": 1.622554347826087, "grad_norm": 1.3293025710045088, "learning_rate": 1.8877033946683376e-06, "loss": 0.4669201374053955, "step": 5971 }, { "epoch": 1.6228260869565219, "grad_norm": 1.4108904577759938, "learning_rate": 1.8850769594353336e-06, "loss": 0.5239821672439575, "step": 5972 }, { "epoch": 1.6230978260869566, "grad_norm": 1.3786066937850445, "learning_rate": 1.8824521624565883e-06, "loss": 0.4852375388145447, "step": 5973 }, { "epoch": 1.6233695652173914, "grad_norm": 1.2003136344572087, "learning_rate": 1.8798290042619949e-06, "loss": 0.3953825831413269, "step": 5974 }, { "epoch": 1.6236413043478262, "grad_norm": 1.2287397267424955, "learning_rate": 1.8772074853811163e-06, "loss": 0.4320296347141266, "step": 5975 }, { "epoch": 1.623913043478261, "grad_norm": 1.3493170257298426, "learning_rate": 1.8745876063431989e-06, "loss": 0.4338153600692749, "step": 5976 }, { "epoch": 1.6241847826086957, "grad_norm": 1.2749034307576297, "learning_rate": 1.8719693676771412e-06, "loss": 0.3718206286430359, "step": 5977 }, { "epoch": 1.6244565217391305, "grad_norm": 1.393071070085828, "learning_rate": 1.8693527699115198e-06, "loss": 0.4436803460121155, "step": 5978 }, { "epoch": 1.6247282608695652, "grad_norm": 1.3815371568040606, "learning_rate": 1.8667378135745783e-06, "loss": 0.4224882125854492, "step": 5979 }, { "epoch": 1.625, "grad_norm": 1.1835961853008692, "learning_rate": 1.8641244991942299e-06, "loss": 0.38376355171203613, "step": 5980 }, { "epoch": 1.6252717391304348, "grad_norm": 1.369965625266548, "learning_rate": 1.861512827298051e-06, "loss": 0.4574962854385376, "step": 5981 }, { "epoch": 1.6255434782608695, "grad_norm": 1.1804017255691943, "learning_rate": 1.8589027984132925e-06, "loss": 0.4027119278907776, "step": 5982 }, { "epoch": 1.6258152173913043, "grad_norm": 1.2325779556435428, "learning_rate": 1.8562944130668735e-06, "loss": 0.2824884355068207, "step": 5983 }, { "epoch": 1.626086956521739, "grad_norm": 1.0481501097790336, "learning_rate": 1.8536876717853747e-06, "loss": 0.3867340087890625, "step": 5984 }, { "epoch": 1.6263586956521738, "grad_norm": 1.157660536972683, "learning_rate": 1.8510825750950512e-06, "loss": 0.4101323187351227, "step": 5985 }, { "epoch": 1.6266304347826086, "grad_norm": 1.191437372627265, "learning_rate": 1.8484791235218247e-06, "loss": 0.40956079959869385, "step": 5986 }, { "epoch": 1.6269021739130434, "grad_norm": 1.0304078944145088, "learning_rate": 1.845877317591288e-06, "loss": 0.3816818594932556, "step": 5987 }, { "epoch": 1.6271739130434781, "grad_norm": 1.454486385398962, "learning_rate": 1.8432771578286911e-06, "loss": 0.494208425283432, "step": 5988 }, { "epoch": 1.627445652173913, "grad_norm": 1.6002637164656592, "learning_rate": 1.840678644758962e-06, "loss": 0.35460513830184937, "step": 5989 }, { "epoch": 1.6277173913043477, "grad_norm": 1.502668664011912, "learning_rate": 1.8380817789066929e-06, "loss": 0.5504698753356934, "step": 5990 }, { "epoch": 1.6279891304347827, "grad_norm": 1.427269216520149, "learning_rate": 1.8354865607961457e-06, "loss": 0.48436439037323, "step": 5991 }, { "epoch": 1.6282608695652174, "grad_norm": 1.2658859653048775, "learning_rate": 1.8328929909512428e-06, "loss": 0.44402819871902466, "step": 5992 }, { "epoch": 1.6285326086956522, "grad_norm": 1.3426405068057723, "learning_rate": 1.8303010698955803e-06, "loss": 0.42515307664871216, "step": 5993 }, { "epoch": 1.628804347826087, "grad_norm": 1.4142136463791442, "learning_rate": 1.8277107981524222e-06, "loss": 0.4728586673736572, "step": 5994 }, { "epoch": 1.6290760869565217, "grad_norm": 1.0899164566753745, "learning_rate": 1.8251221762446903e-06, "loss": 0.34958615899086, "step": 5995 }, { "epoch": 1.6293478260869565, "grad_norm": 1.2359358620312946, "learning_rate": 1.8225352046949896e-06, "loss": 0.4327515959739685, "step": 5996 }, { "epoch": 1.6296195652173913, "grad_norm": 1.1535684553986651, "learning_rate": 1.8199498840255737e-06, "loss": 0.3528054356575012, "step": 5997 }, { "epoch": 1.6298913043478263, "grad_norm": 1.3003391117489047, "learning_rate": 1.8173662147583749e-06, "loss": 0.35061851143836975, "step": 5998 }, { "epoch": 1.630163043478261, "grad_norm": 1.0706550886631239, "learning_rate": 1.8147841974149904e-06, "loss": 0.349473774433136, "step": 5999 }, { "epoch": 1.6304347826086958, "grad_norm": 1.22214004518047, "learning_rate": 1.812203832516678e-06, "loss": 0.3937668800354004, "step": 6000 }, { "epoch": 1.6307065217391306, "grad_norm": 1.1771197089314098, "learning_rate": 1.8096251205843685e-06, "loss": 0.4073689579963684, "step": 6001 }, { "epoch": 1.6309782608695653, "grad_norm": 1.14482929291558, "learning_rate": 1.8070480621386566e-06, "loss": 0.3231924772262573, "step": 6002 }, { "epoch": 1.63125, "grad_norm": 1.2441896262694618, "learning_rate": 1.8044726576998051e-06, "loss": 0.44813060760498047, "step": 6003 }, { "epoch": 1.6315217391304349, "grad_norm": 1.259322928910064, "learning_rate": 1.8018989077877368e-06, "loss": 0.36702919006347656, "step": 6004 }, { "epoch": 1.6317934782608696, "grad_norm": 1.3353174121760942, "learning_rate": 1.7993268129220453e-06, "loss": 0.42804962396621704, "step": 6005 }, { "epoch": 1.6320652173913044, "grad_norm": 1.2252171191475871, "learning_rate": 1.796756373621993e-06, "loss": 0.4455081820487976, "step": 6006 }, { "epoch": 1.6323369565217392, "grad_norm": 1.1673786094019194, "learning_rate": 1.7941875904065032e-06, "loss": 0.37010353803634644, "step": 6007 }, { "epoch": 1.632608695652174, "grad_norm": 1.1898379572856512, "learning_rate": 1.7916204637941626e-06, "loss": 0.4076036214828491, "step": 6008 }, { "epoch": 1.6328804347826087, "grad_norm": 1.5117670379148485, "learning_rate": 1.789054994303231e-06, "loss": 0.47482502460479736, "step": 6009 }, { "epoch": 1.6331521739130435, "grad_norm": 1.4639419580555455, "learning_rate": 1.7864911824516297e-06, "loss": 0.39477139711380005, "step": 6010 }, { "epoch": 1.6334239130434782, "grad_norm": 1.367045803497594, "learning_rate": 1.783929028756939e-06, "loss": 0.4219294786453247, "step": 6011 }, { "epoch": 1.633695652173913, "grad_norm": 1.114252022716945, "learning_rate": 1.7813685337364205e-06, "loss": 0.37312090396881104, "step": 6012 }, { "epoch": 1.6339673913043478, "grad_norm": 1.3405280430330284, "learning_rate": 1.7788096979069846e-06, "loss": 0.4276895225048065, "step": 6013 }, { "epoch": 1.6342391304347825, "grad_norm": 1.3474395412395042, "learning_rate": 1.7762525217852155e-06, "loss": 0.5041408538818359, "step": 6014 }, { "epoch": 1.6345108695652173, "grad_norm": 1.2854571439243818, "learning_rate": 1.7736970058873592e-06, "loss": 0.38352423906326294, "step": 6015 }, { "epoch": 1.634782608695652, "grad_norm": 1.045778090191478, "learning_rate": 1.771143150729332e-06, "loss": 0.32709068059921265, "step": 6016 }, { "epoch": 1.6350543478260868, "grad_norm": 1.266976225801293, "learning_rate": 1.7685909568267034e-06, "loss": 0.3973581790924072, "step": 6017 }, { "epoch": 1.6353260869565216, "grad_norm": 1.3663036326269455, "learning_rate": 1.7660404246947172e-06, "loss": 0.5051561594009399, "step": 6018 }, { "epoch": 1.6355978260869564, "grad_norm": 1.2086162272187986, "learning_rate": 1.7634915548482834e-06, "loss": 0.3684409260749817, "step": 6019 }, { "epoch": 1.6358695652173914, "grad_norm": 1.280227649155615, "learning_rate": 1.760944347801965e-06, "loss": 0.4107739329338074, "step": 6020 }, { "epoch": 1.6361413043478261, "grad_norm": 1.3023099441339852, "learning_rate": 1.7583988040700006e-06, "loss": 0.4214741587638855, "step": 6021 }, { "epoch": 1.6364130434782609, "grad_norm": 1.1743536236708867, "learning_rate": 1.7558549241662882e-06, "loss": 0.3819921016693115, "step": 6022 }, { "epoch": 1.6366847826086957, "grad_norm": 1.0347107317980158, "learning_rate": 1.7533127086043923e-06, "loss": 0.3197229504585266, "step": 6023 }, { "epoch": 1.6369565217391304, "grad_norm": 1.4318621474037914, "learning_rate": 1.7507721578975368e-06, "loss": 0.4217798709869385, "step": 6024 }, { "epoch": 1.6372282608695652, "grad_norm": 1.3079917199297613, "learning_rate": 1.7482332725586115e-06, "loss": 0.4376208186149597, "step": 6025 }, { "epoch": 1.6375, "grad_norm": 1.3235632398207064, "learning_rate": 1.7456960531001765e-06, "loss": 0.4307291507720947, "step": 6026 }, { "epoch": 1.637771739130435, "grad_norm": 1.2719708891196568, "learning_rate": 1.743160500034443e-06, "loss": 0.42716559767723083, "step": 6027 }, { "epoch": 1.6380434782608697, "grad_norm": 1.3154270511402768, "learning_rate": 1.7406266138732962e-06, "loss": 0.46313685178756714, "step": 6028 }, { "epoch": 1.6383152173913045, "grad_norm": 1.3129487828332813, "learning_rate": 1.7380943951282802e-06, "loss": 0.44171059131622314, "step": 6029 }, { "epoch": 1.6385869565217392, "grad_norm": 1.2388472708756968, "learning_rate": 1.735563844310607e-06, "loss": 0.4239020347595215, "step": 6030 }, { "epoch": 1.638858695652174, "grad_norm": 1.2583802340214143, "learning_rate": 1.7330349619311415e-06, "loss": 0.3962295651435852, "step": 6031 }, { "epoch": 1.6391304347826088, "grad_norm": 1.3321841723913301, "learning_rate": 1.7305077485004274e-06, "loss": 0.44679102301597595, "step": 6032 }, { "epoch": 1.6394021739130435, "grad_norm": 1.1565608275497243, "learning_rate": 1.7279822045286577e-06, "loss": 0.4165293574333191, "step": 6033 }, { "epoch": 1.6396739130434783, "grad_norm": 1.3099346584041531, "learning_rate": 1.7254583305256901e-06, "loss": 0.38081467151641846, "step": 6034 }, { "epoch": 1.639945652173913, "grad_norm": 1.3036874673692176, "learning_rate": 1.722936127001057e-06, "loss": 0.47713321447372437, "step": 6035 }, { "epoch": 1.6402173913043478, "grad_norm": 1.2744553982738513, "learning_rate": 1.7204155944639378e-06, "loss": 0.44840627908706665, "step": 6036 }, { "epoch": 1.6404891304347826, "grad_norm": 1.3055039647300035, "learning_rate": 1.7178967334231844e-06, "loss": 0.43006086349487305, "step": 6037 }, { "epoch": 1.6407608695652174, "grad_norm": 1.288914509204941, "learning_rate": 1.7153795443873088e-06, "loss": 0.40268561244010925, "step": 6038 }, { "epoch": 1.6410326086956522, "grad_norm": 1.232936804144875, "learning_rate": 1.7128640278644882e-06, "loss": 0.3992071747779846, "step": 6039 }, { "epoch": 1.641304347826087, "grad_norm": 1.3244365950605073, "learning_rate": 1.7103501843625537e-06, "loss": 0.41063830256462097, "step": 6040 }, { "epoch": 1.6415760869565217, "grad_norm": 1.2289835396013633, "learning_rate": 1.707838014389007e-06, "loss": 0.33543163537979126, "step": 6041 }, { "epoch": 1.6418478260869565, "grad_norm": 1.1416344653863757, "learning_rate": 1.705327518451011e-06, "loss": 0.2852887511253357, "step": 6042 }, { "epoch": 1.6421195652173912, "grad_norm": 1.4620780333282508, "learning_rate": 1.7028186970553862e-06, "loss": 0.46157628297805786, "step": 6043 }, { "epoch": 1.642391304347826, "grad_norm": 1.3308743581891247, "learning_rate": 1.7003115507086166e-06, "loss": 0.43285059928894043, "step": 6044 }, { "epoch": 1.6426630434782608, "grad_norm": 1.2644479658190806, "learning_rate": 1.6978060799168515e-06, "loss": 0.40617650747299194, "step": 6045 }, { "epoch": 1.6429347826086955, "grad_norm": 1.3886233336518017, "learning_rate": 1.6953022851859024e-06, "loss": 0.48987245559692383, "step": 6046 }, { "epoch": 1.6432065217391303, "grad_norm": 1.3823222319692499, "learning_rate": 1.6928001670212302e-06, "loss": 0.4851442873477936, "step": 6047 }, { "epoch": 1.643478260869565, "grad_norm": 1.2701155120659344, "learning_rate": 1.6902997259279785e-06, "loss": 0.4331859350204468, "step": 6048 }, { "epoch": 1.64375, "grad_norm": 1.2560954630119723, "learning_rate": 1.6878009624109315e-06, "loss": 0.35394763946533203, "step": 6049 }, { "epoch": 1.6440217391304348, "grad_norm": 1.4351920928747108, "learning_rate": 1.6853038769745466e-06, "loss": 0.45679932832717896, "step": 6050 }, { "epoch": 1.6442934782608696, "grad_norm": 1.190817838702382, "learning_rate": 1.682808470122943e-06, "loss": 0.35651126503944397, "step": 6051 }, { "epoch": 1.6445652173913043, "grad_norm": 1.4251536537715765, "learning_rate": 1.680314742359893e-06, "loss": 0.4667987823486328, "step": 6052 }, { "epoch": 1.6448369565217391, "grad_norm": 1.2675092071100724, "learning_rate": 1.6778226941888375e-06, "loss": 0.3745231032371521, "step": 6053 }, { "epoch": 1.6451086956521739, "grad_norm": 1.3763137366690692, "learning_rate": 1.6753323261128695e-06, "loss": 0.4362919330596924, "step": 6054 }, { "epoch": 1.6453804347826086, "grad_norm": 1.333642620819467, "learning_rate": 1.6728436386347568e-06, "loss": 0.389289528131485, "step": 6055 }, { "epoch": 1.6456521739130436, "grad_norm": 1.4380335511531297, "learning_rate": 1.6703566322569154e-06, "loss": 0.4378831088542938, "step": 6056 }, { "epoch": 1.6459239130434784, "grad_norm": 1.3152174910997947, "learning_rate": 1.6678713074814257e-06, "loss": 0.4395073652267456, "step": 6057 }, { "epoch": 1.6461956521739132, "grad_norm": 1.1669646963278428, "learning_rate": 1.665387664810032e-06, "loss": 0.41059237718582153, "step": 6058 }, { "epoch": 1.646467391304348, "grad_norm": 1.1415932859005786, "learning_rate": 1.6629057047441366e-06, "loss": 0.328380823135376, "step": 6059 }, { "epoch": 1.6467391304347827, "grad_norm": 1.2567145821771237, "learning_rate": 1.6604254277847974e-06, "loss": 0.4586275815963745, "step": 6060 }, { "epoch": 1.6470108695652175, "grad_norm": 1.2319895138124126, "learning_rate": 1.6579468344327386e-06, "loss": 0.39892157912254333, "step": 6061 }, { "epoch": 1.6472826086956522, "grad_norm": 1.0982185616041205, "learning_rate": 1.6554699251883477e-06, "loss": 0.33661210536956787, "step": 6062 }, { "epoch": 1.647554347826087, "grad_norm": 1.2838837684583047, "learning_rate": 1.65299470055166e-06, "loss": 0.4274390935897827, "step": 6063 }, { "epoch": 1.6478260869565218, "grad_norm": 1.119484939421885, "learning_rate": 1.6505211610223815e-06, "loss": 0.3667531907558441, "step": 6064 }, { "epoch": 1.6480978260869565, "grad_norm": 1.2918462224540075, "learning_rate": 1.6480493070998738e-06, "loss": 0.3882780075073242, "step": 6065 }, { "epoch": 1.6483695652173913, "grad_norm": 1.2324023358597826, "learning_rate": 1.645579139283161e-06, "loss": 0.37109869718551636, "step": 6066 }, { "epoch": 1.648641304347826, "grad_norm": 1.295618928796382, "learning_rate": 1.64311065807092e-06, "loss": 0.4127434492111206, "step": 6067 }, { "epoch": 1.6489130434782608, "grad_norm": 1.2542639349209317, "learning_rate": 1.6406438639614986e-06, "loss": 0.42651164531707764, "step": 6068 }, { "epoch": 1.6491847826086956, "grad_norm": 1.355843538700648, "learning_rate": 1.638178757452894e-06, "loss": 0.46031346917152405, "step": 6069 }, { "epoch": 1.6494565217391304, "grad_norm": 1.2684628585213211, "learning_rate": 1.6357153390427605e-06, "loss": 0.3854999542236328, "step": 6070 }, { "epoch": 1.6497282608695651, "grad_norm": 1.215899523365956, "learning_rate": 1.633253609228428e-06, "loss": 0.3973110318183899, "step": 6071 }, { "epoch": 1.65, "grad_norm": 1.2462385097951747, "learning_rate": 1.6307935685068654e-06, "loss": 0.3836829662322998, "step": 6072 }, { "epoch": 1.6502717391304347, "grad_norm": 1.3099187724034715, "learning_rate": 1.6283352173747148e-06, "loss": 0.3738059997558594, "step": 6073 }, { "epoch": 1.6505434782608694, "grad_norm": 1.3146919521232676, "learning_rate": 1.6258785563282698e-06, "loss": 0.3825002610683441, "step": 6074 }, { "epoch": 1.6508152173913042, "grad_norm": 1.3052483945000148, "learning_rate": 1.6234235858634884e-06, "loss": 0.42244499921798706, "step": 6075 }, { "epoch": 1.651086956521739, "grad_norm": 1.2867937443675286, "learning_rate": 1.6209703064759807e-06, "loss": 0.3897045850753784, "step": 6076 }, { "epoch": 1.6513586956521737, "grad_norm": 1.2384477178405966, "learning_rate": 1.6185187186610196e-06, "loss": 0.4115217328071594, "step": 6077 }, { "epoch": 1.6516304347826087, "grad_norm": 1.2828049946958073, "learning_rate": 1.616068822913539e-06, "loss": 0.3875833749771118, "step": 6078 }, { "epoch": 1.6519021739130435, "grad_norm": 1.3840160463945508, "learning_rate": 1.6136206197281235e-06, "loss": 0.4876627027988434, "step": 6079 }, { "epoch": 1.6521739130434783, "grad_norm": 1.345633807943965, "learning_rate": 1.6111741095990218e-06, "loss": 0.4708094894886017, "step": 6080 }, { "epoch": 1.652445652173913, "grad_norm": 1.2772716276771936, "learning_rate": 1.6087292930201393e-06, "loss": 0.41872191429138184, "step": 6081 }, { "epoch": 1.6527173913043478, "grad_norm": 1.1245442753635235, "learning_rate": 1.606286170485043e-06, "loss": 0.2854885160923004, "step": 6082 }, { "epoch": 1.6529891304347826, "grad_norm": 1.6036874490247117, "learning_rate": 1.6038447424869496e-06, "loss": 0.5065473914146423, "step": 6083 }, { "epoch": 1.6532608695652173, "grad_norm": 1.238979344235066, "learning_rate": 1.6014050095187417e-06, "loss": 0.43820858001708984, "step": 6084 }, { "epoch": 1.6535326086956523, "grad_norm": 1.1099437858577519, "learning_rate": 1.5989669720729572e-06, "loss": 0.2867514491081238, "step": 6085 }, { "epoch": 1.653804347826087, "grad_norm": 1.298002287684586, "learning_rate": 1.5965306306417882e-06, "loss": 0.3896638751029968, "step": 6086 }, { "epoch": 1.6540760869565219, "grad_norm": 1.2318134482525427, "learning_rate": 1.594095985717089e-06, "loss": 0.3986222445964813, "step": 6087 }, { "epoch": 1.6543478260869566, "grad_norm": 1.080552337951857, "learning_rate": 1.5916630377903696e-06, "loss": 0.34228283166885376, "step": 6088 }, { "epoch": 1.6546195652173914, "grad_norm": 1.3671519395808651, "learning_rate": 1.5892317873528006e-06, "loss": 0.436099112033844, "step": 6089 }, { "epoch": 1.6548913043478262, "grad_norm": 1.2015265749921984, "learning_rate": 1.5868022348951995e-06, "loss": 0.3959132432937622, "step": 6090 }, { "epoch": 1.655163043478261, "grad_norm": 1.2277868228062307, "learning_rate": 1.5843743809080581e-06, "loss": 0.4059731960296631, "step": 6091 }, { "epoch": 1.6554347826086957, "grad_norm": 1.3382575965521222, "learning_rate": 1.5819482258815078e-06, "loss": 0.41284507513046265, "step": 6092 }, { "epoch": 1.6557065217391305, "grad_norm": 1.1029702319205112, "learning_rate": 1.5795237703053479e-06, "loss": 0.3514360189437866, "step": 6093 }, { "epoch": 1.6559782608695652, "grad_norm": 1.16028634853714, "learning_rate": 1.5771010146690336e-06, "loss": 0.3401530981063843, "step": 6094 }, { "epoch": 1.65625, "grad_norm": 1.4753943720330525, "learning_rate": 1.5746799594616702e-06, "loss": 0.46725791692733765, "step": 6095 }, { "epoch": 1.6565217391304348, "grad_norm": 1.3245278048400158, "learning_rate": 1.5722606051720268e-06, "loss": 0.39963456988334656, "step": 6096 }, { "epoch": 1.6567934782608695, "grad_norm": 1.1877692790659138, "learning_rate": 1.5698429522885273e-06, "loss": 0.35757696628570557, "step": 6097 }, { "epoch": 1.6570652173913043, "grad_norm": 1.2805413234246419, "learning_rate": 1.5674270012992521e-06, "loss": 0.3933752179145813, "step": 6098 }, { "epoch": 1.657336956521739, "grad_norm": 1.4089468743748277, "learning_rate": 1.5650127526919346e-06, "loss": 0.4812627136707306, "step": 6099 }, { "epoch": 1.6576086956521738, "grad_norm": 1.219785343732921, "learning_rate": 1.5626002069539691e-06, "loss": 0.448857843875885, "step": 6100 }, { "epoch": 1.6578804347826086, "grad_norm": 1.5448761134802609, "learning_rate": 1.5601893645724042e-06, "loss": 0.47050926089286804, "step": 6101 }, { "epoch": 1.6581521739130434, "grad_norm": 1.3494461290391597, "learning_rate": 1.557780226033947e-06, "loss": 0.40462857484817505, "step": 6102 }, { "epoch": 1.6584239130434781, "grad_norm": 1.0566062856546075, "learning_rate": 1.555372791824954e-06, "loss": 0.3057997226715088, "step": 6103 }, { "epoch": 1.658695652173913, "grad_norm": 1.2790707703303723, "learning_rate": 1.5529670624314442e-06, "loss": 0.3794093430042267, "step": 6104 }, { "epoch": 1.6589673913043477, "grad_norm": 1.3204065953211086, "learning_rate": 1.5505630383390925e-06, "loss": 0.4463459849357605, "step": 6105 }, { "epoch": 1.6592391304347827, "grad_norm": 1.3438882100042888, "learning_rate": 1.5481607200332205e-06, "loss": 0.37237003445625305, "step": 6106 }, { "epoch": 1.6595108695652174, "grad_norm": 1.4162289504417243, "learning_rate": 1.5457601079988226e-06, "loss": 0.43240195512771606, "step": 6107 }, { "epoch": 1.6597826086956522, "grad_norm": 1.3793827295245866, "learning_rate": 1.5433612027205303e-06, "loss": 0.45253539085388184, "step": 6108 }, { "epoch": 1.660054347826087, "grad_norm": 1.3967043775920707, "learning_rate": 1.5409640046826402e-06, "loss": 0.4222460389137268, "step": 6109 }, { "epoch": 1.6603260869565217, "grad_norm": 1.291673280007685, "learning_rate": 1.5385685143691066e-06, "loss": 0.45627474784851074, "step": 6110 }, { "epoch": 1.6605978260869565, "grad_norm": 1.159154145772887, "learning_rate": 1.536174732263529e-06, "loss": 0.3317704200744629, "step": 6111 }, { "epoch": 1.6608695652173913, "grad_norm": 1.1729528768594641, "learning_rate": 1.533782658849171e-06, "loss": 0.33484697341918945, "step": 6112 }, { "epoch": 1.6611413043478263, "grad_norm": 1.3297602737435648, "learning_rate": 1.5313922946089488e-06, "loss": 0.3845842480659485, "step": 6113 }, { "epoch": 1.661413043478261, "grad_norm": 1.2327804443322308, "learning_rate": 1.529003640025436e-06, "loss": 0.4239185154438019, "step": 6114 }, { "epoch": 1.6616847826086958, "grad_norm": 1.8085584041140106, "learning_rate": 1.5266166955808514e-06, "loss": 0.3869737386703491, "step": 6115 }, { "epoch": 1.6619565217391306, "grad_norm": 1.3274533474649601, "learning_rate": 1.5242314617570808e-06, "loss": 0.4557691216468811, "step": 6116 }, { "epoch": 1.6622282608695653, "grad_norm": 1.3889238152292103, "learning_rate": 1.5218479390356556e-06, "loss": 0.46137428283691406, "step": 6117 }, { "epoch": 1.6625, "grad_norm": 1.236129534906879, "learning_rate": 1.5194661278977707e-06, "loss": 0.3745971918106079, "step": 6118 }, { "epoch": 1.6627717391304349, "grad_norm": 1.298449653972054, "learning_rate": 1.5170860288242638e-06, "loss": 0.42830121517181396, "step": 6119 }, { "epoch": 1.6630434782608696, "grad_norm": 1.3114889731743842, "learning_rate": 1.5147076422956353e-06, "loss": 0.44362616539001465, "step": 6120 }, { "epoch": 1.6633152173913044, "grad_norm": 1.3083275423975491, "learning_rate": 1.5123309687920417e-06, "loss": 0.4211583733558655, "step": 6121 }, { "epoch": 1.6635869565217392, "grad_norm": 1.0248320713041765, "learning_rate": 1.5099560087932851e-06, "loss": 0.3386263847351074, "step": 6122 }, { "epoch": 1.663858695652174, "grad_norm": 1.222942801773308, "learning_rate": 1.5075827627788264e-06, "loss": 0.341100811958313, "step": 6123 }, { "epoch": 1.6641304347826087, "grad_norm": 1.336576015033405, "learning_rate": 1.5052112312277834e-06, "loss": 0.44443535804748535, "step": 6124 }, { "epoch": 1.6644021739130435, "grad_norm": 1.472099303082061, "learning_rate": 1.5028414146189252e-06, "loss": 0.42312026023864746, "step": 6125 }, { "epoch": 1.6646739130434782, "grad_norm": 1.1941056611469776, "learning_rate": 1.5004733134306692e-06, "loss": 0.37628480792045593, "step": 6126 }, { "epoch": 1.664945652173913, "grad_norm": 1.2643316805962708, "learning_rate": 1.4981069281410988e-06, "loss": 0.39825713634490967, "step": 6127 }, { "epoch": 1.6652173913043478, "grad_norm": 1.5425052115133782, "learning_rate": 1.4957422592279402e-06, "loss": 0.5104658603668213, "step": 6128 }, { "epoch": 1.6654891304347825, "grad_norm": 1.106609099663324, "learning_rate": 1.493379307168573e-06, "loss": 0.37734049558639526, "step": 6129 }, { "epoch": 1.6657608695652173, "grad_norm": 1.2994353728023476, "learning_rate": 1.4910180724400414e-06, "loss": 0.3453272581100464, "step": 6130 }, { "epoch": 1.666032608695652, "grad_norm": 1.381948942224702, "learning_rate": 1.4886585555190291e-06, "loss": 0.49230751395225525, "step": 6131 }, { "epoch": 1.6663043478260868, "grad_norm": 1.2590273546661885, "learning_rate": 1.486300756881882e-06, "loss": 0.34480300545692444, "step": 6132 }, { "epoch": 1.6665760869565216, "grad_norm": 1.3986233044901701, "learning_rate": 1.4839446770045973e-06, "loss": 0.45680856704711914, "step": 6133 }, { "epoch": 1.6668478260869564, "grad_norm": 1.3593979880342772, "learning_rate": 1.4815903163628255e-06, "loss": 0.43714743852615356, "step": 6134 }, { "epoch": 1.6671195652173914, "grad_norm": 1.3106302577525344, "learning_rate": 1.4792376754318637e-06, "loss": 0.43701815605163574, "step": 6135 }, { "epoch": 1.6673913043478261, "grad_norm": 1.1410142043460858, "learning_rate": 1.4768867546866706e-06, "loss": 0.3760533332824707, "step": 6136 }, { "epoch": 1.6676630434782609, "grad_norm": 1.2760104088805329, "learning_rate": 1.4745375546018547e-06, "loss": 0.4483491778373718, "step": 6137 }, { "epoch": 1.6679347826086957, "grad_norm": 1.4256924317381674, "learning_rate": 1.4721900756516739e-06, "loss": 0.41767752170562744, "step": 6138 }, { "epoch": 1.6682065217391304, "grad_norm": 1.1797302079029106, "learning_rate": 1.469844318310042e-06, "loss": 0.3227503299713135, "step": 6139 }, { "epoch": 1.6684782608695652, "grad_norm": 1.1328828279735155, "learning_rate": 1.4675002830505257e-06, "loss": 0.3551473617553711, "step": 6140 }, { "epoch": 1.66875, "grad_norm": 1.326595234487354, "learning_rate": 1.4651579703463437e-06, "loss": 0.4028068780899048, "step": 6141 }, { "epoch": 1.669021739130435, "grad_norm": 1.4823162600946627, "learning_rate": 1.4628173806703594e-06, "loss": 0.4639890789985657, "step": 6142 }, { "epoch": 1.6692934782608697, "grad_norm": 1.1394468215107327, "learning_rate": 1.460478514495105e-06, "loss": 0.3570331335067749, "step": 6143 }, { "epoch": 1.6695652173913045, "grad_norm": 1.217951097042275, "learning_rate": 1.4581413722927473e-06, "loss": 0.40698009729385376, "step": 6144 }, { "epoch": 1.6698369565217392, "grad_norm": 0.9958438122988368, "learning_rate": 1.4558059545351144e-06, "loss": 0.2804616093635559, "step": 6145 }, { "epoch": 1.670108695652174, "grad_norm": 1.3063854499498506, "learning_rate": 1.453472261693687e-06, "loss": 0.44666826725006104, "step": 6146 }, { "epoch": 1.6703804347826088, "grad_norm": 1.1485741701056287, "learning_rate": 1.4511402942395903e-06, "loss": 0.43479883670806885, "step": 6147 }, { "epoch": 1.6706521739130435, "grad_norm": 1.2722380918226135, "learning_rate": 1.44881005264361e-06, "loss": 0.42563992738723755, "step": 6148 }, { "epoch": 1.6709239130434783, "grad_norm": 1.2861935523055923, "learning_rate": 1.4464815373761731e-06, "loss": 0.456217885017395, "step": 6149 }, { "epoch": 1.671195652173913, "grad_norm": 1.1903579619035392, "learning_rate": 1.4441547489073727e-06, "loss": 0.37834733724594116, "step": 6150 }, { "epoch": 1.6714673913043478, "grad_norm": 1.3267394259889222, "learning_rate": 1.4418296877069383e-06, "loss": 0.4507543444633484, "step": 6151 }, { "epoch": 1.6717391304347826, "grad_norm": 1.3838637086373542, "learning_rate": 1.43950635424426e-06, "loss": 0.4515385031700134, "step": 6152 }, { "epoch": 1.6720108695652174, "grad_norm": 1.3016603512547813, "learning_rate": 1.4371847489883762e-06, "loss": 0.48006772994995117, "step": 6153 }, { "epoch": 1.6722826086956522, "grad_norm": 1.2789624559786115, "learning_rate": 1.434864872407975e-06, "loss": 0.40571096539497375, "step": 6154 }, { "epoch": 1.672554347826087, "grad_norm": 1.4871021347138476, "learning_rate": 1.4325467249713964e-06, "loss": 0.5201965570449829, "step": 6155 }, { "epoch": 1.6728260869565217, "grad_norm": 1.335649216466843, "learning_rate": 1.430230307146634e-06, "loss": 0.41755104064941406, "step": 6156 }, { "epoch": 1.6730978260869565, "grad_norm": 1.5362318721153683, "learning_rate": 1.4279156194013322e-06, "loss": 0.5432389378547668, "step": 6157 }, { "epoch": 1.6733695652173912, "grad_norm": 1.2045075268787888, "learning_rate": 1.4256026622027774e-06, "loss": 0.41996321082115173, "step": 6158 }, { "epoch": 1.673641304347826, "grad_norm": 1.181997910368808, "learning_rate": 1.4232914360179184e-06, "loss": 0.3213973939418793, "step": 6159 }, { "epoch": 1.6739130434782608, "grad_norm": 1.5118843524095635, "learning_rate": 1.4209819413133474e-06, "loss": 0.44822120666503906, "step": 6160 }, { "epoch": 1.6741847826086955, "grad_norm": 1.3585919535996616, "learning_rate": 1.4186741785553116e-06, "loss": 0.48532432317733765, "step": 6161 }, { "epoch": 1.6744565217391303, "grad_norm": 1.3379469646658064, "learning_rate": 1.4163681482097025e-06, "loss": 0.46534520387649536, "step": 6162 }, { "epoch": 1.674728260869565, "grad_norm": 1.1188881308379963, "learning_rate": 1.4140638507420668e-06, "loss": 0.3721289038658142, "step": 6163 }, { "epoch": 1.675, "grad_norm": 1.1276966954373338, "learning_rate": 1.4117612866176022e-06, "loss": 0.3477545380592346, "step": 6164 }, { "epoch": 1.6752717391304348, "grad_norm": 1.4101535390278295, "learning_rate": 1.409460456301147e-06, "loss": 0.48281461000442505, "step": 6165 }, { "epoch": 1.6755434782608696, "grad_norm": 1.054018745496135, "learning_rate": 1.4071613602572076e-06, "loss": 0.3462839126586914, "step": 6166 }, { "epoch": 1.6758152173913043, "grad_norm": 1.2662525233607362, "learning_rate": 1.4048639989499203e-06, "loss": 0.3941202163696289, "step": 6167 }, { "epoch": 1.6760869565217391, "grad_norm": 1.1038966143007334, "learning_rate": 1.4025683728430838e-06, "loss": 0.3305080533027649, "step": 6168 }, { "epoch": 1.6763586956521739, "grad_norm": 1.1761275400161657, "learning_rate": 1.4002744824001436e-06, "loss": 0.33780962228775024, "step": 6169 }, { "epoch": 1.6766304347826086, "grad_norm": 1.1317789573974464, "learning_rate": 1.3979823280841943e-06, "loss": 0.3252137303352356, "step": 6170 }, { "epoch": 1.6769021739130436, "grad_norm": 1.2639328980247708, "learning_rate": 1.395691910357977e-06, "loss": 0.4555954337120056, "step": 6171 }, { "epoch": 1.6771739130434784, "grad_norm": 1.1820897164372686, "learning_rate": 1.3934032296838862e-06, "loss": 0.42084813117980957, "step": 6172 }, { "epoch": 1.6774456521739132, "grad_norm": 1.174967190684751, "learning_rate": 1.391116286523968e-06, "loss": 0.39930692315101624, "step": 6173 }, { "epoch": 1.677717391304348, "grad_norm": 1.2913340171356786, "learning_rate": 1.3888310813399085e-06, "loss": 0.44627952575683594, "step": 6174 }, { "epoch": 1.6779891304347827, "grad_norm": 1.055203288191506, "learning_rate": 1.3865476145930524e-06, "loss": 0.34690532088279724, "step": 6175 }, { "epoch": 1.6782608695652175, "grad_norm": 1.2380945362630948, "learning_rate": 1.3842658867443892e-06, "loss": 0.33823126554489136, "step": 6176 }, { "epoch": 1.6785326086956522, "grad_norm": 1.068401919718605, "learning_rate": 1.3819858982545598e-06, "loss": 0.2969077229499817, "step": 6177 }, { "epoch": 1.678804347826087, "grad_norm": 1.2154012794671438, "learning_rate": 1.3797076495838456e-06, "loss": 0.3945003151893616, "step": 6178 }, { "epoch": 1.6790760869565218, "grad_norm": 1.3541386810457225, "learning_rate": 1.3774311411921925e-06, "loss": 0.391726553440094, "step": 6179 }, { "epoch": 1.6793478260869565, "grad_norm": 1.285288688543278, "learning_rate": 1.3751563735391816e-06, "loss": 0.41664940118789673, "step": 6180 }, { "epoch": 1.6796195652173913, "grad_norm": 1.3908710821096695, "learning_rate": 1.3728833470840442e-06, "loss": 0.43416082859039307, "step": 6181 }, { "epoch": 1.679891304347826, "grad_norm": 1.2450249141140515, "learning_rate": 1.3706120622856644e-06, "loss": 0.4109887182712555, "step": 6182 }, { "epoch": 1.6801630434782608, "grad_norm": 1.1695189027620183, "learning_rate": 1.3683425196025734e-06, "loss": 0.35380566120147705, "step": 6183 }, { "epoch": 1.6804347826086956, "grad_norm": 1.3356550674067253, "learning_rate": 1.3660747194929524e-06, "loss": 0.44846808910369873, "step": 6184 }, { "epoch": 1.6807065217391304, "grad_norm": 1.089264302384278, "learning_rate": 1.3638086624146218e-06, "loss": 0.38905540108680725, "step": 6185 }, { "epoch": 1.6809782608695651, "grad_norm": 1.2341500287896698, "learning_rate": 1.3615443488250668e-06, "loss": 0.39604324102401733, "step": 6186 }, { "epoch": 1.68125, "grad_norm": 1.2855288452813158, "learning_rate": 1.3592817791814039e-06, "loss": 0.4370400309562683, "step": 6187 }, { "epoch": 1.6815217391304347, "grad_norm": 1.3129540209165682, "learning_rate": 1.3570209539404067e-06, "loss": 0.4904153347015381, "step": 6188 }, { "epoch": 1.6817934782608694, "grad_norm": 1.2932391139825208, "learning_rate": 1.3547618735584956e-06, "loss": 0.3760558068752289, "step": 6189 }, { "epoch": 1.6820652173913042, "grad_norm": 1.3576111501908623, "learning_rate": 1.3525045384917335e-06, "loss": 0.49256813526153564, "step": 6190 }, { "epoch": 1.682336956521739, "grad_norm": 1.4209354537498105, "learning_rate": 1.350248949195838e-06, "loss": 0.3923344612121582, "step": 6191 }, { "epoch": 1.6826086956521737, "grad_norm": 1.4003879927883838, "learning_rate": 1.3479951061261699e-06, "loss": 0.42710569500923157, "step": 6192 }, { "epoch": 1.6828804347826087, "grad_norm": 1.196459043557925, "learning_rate": 1.3457430097377421e-06, "loss": 0.31926798820495605, "step": 6193 }, { "epoch": 1.6831521739130435, "grad_norm": 1.292558417081342, "learning_rate": 1.3434926604852071e-06, "loss": 0.3880084753036499, "step": 6194 }, { "epoch": 1.6834239130434783, "grad_norm": 1.3685835351373905, "learning_rate": 1.3412440588228715e-06, "loss": 0.44048333168029785, "step": 6195 }, { "epoch": 1.683695652173913, "grad_norm": 0.9994009707083954, "learning_rate": 1.3389972052046884e-06, "loss": 0.2784308195114136, "step": 6196 }, { "epoch": 1.6839673913043478, "grad_norm": 1.4391308903541158, "learning_rate": 1.3367521000842533e-06, "loss": 0.4826487600803375, "step": 6197 }, { "epoch": 1.6842391304347826, "grad_norm": 1.1391715942653848, "learning_rate": 1.3345087439148118e-06, "loss": 0.3412780165672302, "step": 6198 }, { "epoch": 1.6845108695652173, "grad_norm": 1.4275679273939625, "learning_rate": 1.3322671371492568e-06, "loss": 0.4776151180267334, "step": 6199 }, { "epoch": 1.6847826086956523, "grad_norm": 1.229719949228952, "learning_rate": 1.3300272802401315e-06, "loss": 0.37919604778289795, "step": 6200 }, { "epoch": 1.685054347826087, "grad_norm": 1.3217027260587906, "learning_rate": 1.3277891736396143e-06, "loss": 0.4250047206878662, "step": 6201 }, { "epoch": 1.6853260869565219, "grad_norm": 1.1848485305238434, "learning_rate": 1.325552817799547e-06, "loss": 0.34267595410346985, "step": 6202 }, { "epoch": 1.6855978260869566, "grad_norm": 1.3690322744863739, "learning_rate": 1.3233182131714018e-06, "loss": 0.5133469104766846, "step": 6203 }, { "epoch": 1.6858695652173914, "grad_norm": 1.2019336072140228, "learning_rate": 1.3210853602063067e-06, "loss": 0.36862602829933167, "step": 6204 }, { "epoch": 1.6861413043478262, "grad_norm": 1.3752077092985389, "learning_rate": 1.3188542593550368e-06, "loss": 0.4223974943161011, "step": 6205 }, { "epoch": 1.686413043478261, "grad_norm": 1.086225603907694, "learning_rate": 1.3166249110680052e-06, "loss": 0.2932966351509094, "step": 6206 }, { "epoch": 1.6866847826086957, "grad_norm": 1.3049726958647794, "learning_rate": 1.3143973157952784e-06, "loss": 0.3851126432418823, "step": 6207 }, { "epoch": 1.6869565217391305, "grad_norm": 1.319198717516097, "learning_rate": 1.3121714739865677e-06, "loss": 0.37940049171447754, "step": 6208 }, { "epoch": 1.6872282608695652, "grad_norm": 1.2874392117434243, "learning_rate": 1.3099473860912325e-06, "loss": 0.41459259390830994, "step": 6209 }, { "epoch": 1.6875, "grad_norm": 1.1732014233032964, "learning_rate": 1.3077250525582708e-06, "loss": 0.36211609840393066, "step": 6210 }, { "epoch": 1.6877717391304348, "grad_norm": 1.180608368162025, "learning_rate": 1.305504473836331e-06, "loss": 0.34453776478767395, "step": 6211 }, { "epoch": 1.6880434782608695, "grad_norm": 1.135885264128142, "learning_rate": 1.3032856503737102e-06, "loss": 0.34967243671417236, "step": 6212 }, { "epoch": 1.6883152173913043, "grad_norm": 1.3277182110041024, "learning_rate": 1.301068582618349e-06, "loss": 0.4150815010070801, "step": 6213 }, { "epoch": 1.688586956521739, "grad_norm": 1.0832590072544075, "learning_rate": 1.2988532710178292e-06, "loss": 0.3384418785572052, "step": 6214 }, { "epoch": 1.6888586956521738, "grad_norm": 1.0729803094980985, "learning_rate": 1.2966397160193823e-06, "loss": 0.3119385838508606, "step": 6215 }, { "epoch": 1.6891304347826086, "grad_norm": 1.7352277178633715, "learning_rate": 1.2944279180698882e-06, "loss": 0.4393886625766754, "step": 6216 }, { "epoch": 1.6894021739130434, "grad_norm": 1.2635443649775473, "learning_rate": 1.2922178776158633e-06, "loss": 0.44279804825782776, "step": 6217 }, { "epoch": 1.6896739130434781, "grad_norm": 1.5089631549693419, "learning_rate": 1.2900095951034764e-06, "loss": 0.5868579149246216, "step": 6218 }, { "epoch": 1.689945652173913, "grad_norm": 1.2824560860390877, "learning_rate": 1.2878030709785394e-06, "loss": 0.419692724943161, "step": 6219 }, { "epoch": 1.6902173913043477, "grad_norm": 1.1672232127572901, "learning_rate": 1.2855983056865106e-06, "loss": 0.3421693444252014, "step": 6220 }, { "epoch": 1.6904891304347827, "grad_norm": 1.4297764029293065, "learning_rate": 1.2833952996724864e-06, "loss": 0.4517345726490021, "step": 6221 }, { "epoch": 1.6907608695652174, "grad_norm": 1.1736397594208106, "learning_rate": 1.2811940533812216e-06, "loss": 0.32333940267562866, "step": 6222 }, { "epoch": 1.6910326086956522, "grad_norm": 0.9697261995630482, "learning_rate": 1.2789945672571036e-06, "loss": 0.29288020730018616, "step": 6223 }, { "epoch": 1.691304347826087, "grad_norm": 1.1916562819749208, "learning_rate": 1.2767968417441624e-06, "loss": 0.3893582820892334, "step": 6224 }, { "epoch": 1.6915760869565217, "grad_norm": 1.3214944814600642, "learning_rate": 1.2746008772860885e-06, "loss": 0.43797141313552856, "step": 6225 }, { "epoch": 1.6918478260869565, "grad_norm": 1.138240963989646, "learning_rate": 1.2724066743261997e-06, "loss": 0.3721187114715576, "step": 6226 }, { "epoch": 1.6921195652173913, "grad_norm": 1.2074410650519984, "learning_rate": 1.2702142333074685e-06, "loss": 0.38795268535614014, "step": 6227 }, { "epoch": 1.6923913043478263, "grad_norm": 1.2999862294938709, "learning_rate": 1.2680235546725084e-06, "loss": 0.4958314001560211, "step": 6228 }, { "epoch": 1.692663043478261, "grad_norm": 1.4469333584884552, "learning_rate": 1.265834638863579e-06, "loss": 0.4562274217605591, "step": 6229 }, { "epoch": 1.6929347826086958, "grad_norm": 1.2964910184688136, "learning_rate": 1.2636474863225768e-06, "loss": 0.45927780866622925, "step": 6230 }, { "epoch": 1.6932065217391306, "grad_norm": 1.4045930604992043, "learning_rate": 1.2614620974910518e-06, "loss": 0.3897666335105896, "step": 6231 }, { "epoch": 1.6934782608695653, "grad_norm": 1.3411194155769415, "learning_rate": 1.259278472810196e-06, "loss": 0.41083624958992004, "step": 6232 }, { "epoch": 1.69375, "grad_norm": 1.3462746854457004, "learning_rate": 1.2570966127208373e-06, "loss": 0.44765186309814453, "step": 6233 }, { "epoch": 1.6940217391304349, "grad_norm": 1.3635512258976934, "learning_rate": 1.2549165176634582e-06, "loss": 0.4260742664337158, "step": 6234 }, { "epoch": 1.6942934782608696, "grad_norm": 1.4511124250298086, "learning_rate": 1.2527381880781764e-06, "loss": 0.4883650541305542, "step": 6235 }, { "epoch": 1.6945652173913044, "grad_norm": 1.24137721549915, "learning_rate": 1.2505616244047613e-06, "loss": 0.4195636510848999, "step": 6236 }, { "epoch": 1.6948369565217392, "grad_norm": 1.1342955102251324, "learning_rate": 1.2483868270826149e-06, "loss": 0.41384726762771606, "step": 6237 }, { "epoch": 1.695108695652174, "grad_norm": 1.1612573895306928, "learning_rate": 1.246213796550796e-06, "loss": 0.4084390699863434, "step": 6238 }, { "epoch": 1.6953804347826087, "grad_norm": 1.3476831595133099, "learning_rate": 1.2440425332479965e-06, "loss": 0.44769763946533203, "step": 6239 }, { "epoch": 1.6956521739130435, "grad_norm": 1.35376770077315, "learning_rate": 1.24187303761255e-06, "loss": 0.4448446035385132, "step": 6240 }, { "epoch": 1.6959239130434782, "grad_norm": 1.2878248379036796, "learning_rate": 1.2397053100824463e-06, "loss": 0.43906980752944946, "step": 6241 }, { "epoch": 1.696195652173913, "grad_norm": 1.2157048673969275, "learning_rate": 1.2375393510953038e-06, "loss": 0.32821184396743774, "step": 6242 }, { "epoch": 1.6964673913043478, "grad_norm": 1.392967991952385, "learning_rate": 1.2353751610883924e-06, "loss": 0.39239004254341125, "step": 6243 }, { "epoch": 1.6967391304347825, "grad_norm": 1.3607516308511305, "learning_rate": 1.2332127404986216e-06, "loss": 0.4265846610069275, "step": 6244 }, { "epoch": 1.6970108695652173, "grad_norm": 1.5908492564850771, "learning_rate": 1.2310520897625466e-06, "loss": 0.4495427906513214, "step": 6245 }, { "epoch": 1.697282608695652, "grad_norm": 1.3185670909364327, "learning_rate": 1.2288932093163596e-06, "loss": 0.4224170446395874, "step": 6246 }, { "epoch": 1.6975543478260868, "grad_norm": 1.2647576965248546, "learning_rate": 1.226736099595901e-06, "loss": 0.4618684649467468, "step": 6247 }, { "epoch": 1.6978260869565216, "grad_norm": 1.3808494915473661, "learning_rate": 1.2245807610366533e-06, "loss": 0.3902975618839264, "step": 6248 }, { "epoch": 1.6980978260869564, "grad_norm": 1.2887241316470615, "learning_rate": 1.2224271940737364e-06, "loss": 0.42092540860176086, "step": 6249 }, { "epoch": 1.6983695652173914, "grad_norm": 1.1744279425333994, "learning_rate": 1.220275399141917e-06, "loss": 0.3480578064918518, "step": 6250 }, { "epoch": 1.6986413043478261, "grad_norm": 1.245686879321559, "learning_rate": 1.218125376675605e-06, "loss": 0.38603025674819946, "step": 6251 }, { "epoch": 1.6989130434782609, "grad_norm": 1.3600300410027977, "learning_rate": 1.2159771271088505e-06, "loss": 0.477862685918808, "step": 6252 }, { "epoch": 1.6991847826086957, "grad_norm": 1.2383063765125613, "learning_rate": 1.2138306508753428e-06, "loss": 0.39148974418640137, "step": 6253 }, { "epoch": 1.6994565217391304, "grad_norm": 1.2697458318109982, "learning_rate": 1.2116859484084175e-06, "loss": 0.43189314007759094, "step": 6254 }, { "epoch": 1.6997282608695652, "grad_norm": 1.2533365855264411, "learning_rate": 1.2095430201410507e-06, "loss": 0.400057852268219, "step": 6255 }, { "epoch": 1.7, "grad_norm": 1.1615796705311006, "learning_rate": 1.207401866505863e-06, "loss": 0.332410991191864, "step": 6256 }, { "epoch": 1.700271739130435, "grad_norm": 1.3509159414595824, "learning_rate": 1.2052624879351105e-06, "loss": 0.4180452823638916, "step": 6257 }, { "epoch": 1.7005434782608697, "grad_norm": 1.263202696694416, "learning_rate": 1.2031248848606948e-06, "loss": 0.39865824580192566, "step": 6258 }, { "epoch": 1.7008152173913045, "grad_norm": 1.1558190782306146, "learning_rate": 1.2009890577141625e-06, "loss": 0.3557220995426178, "step": 6259 }, { "epoch": 1.7010869565217392, "grad_norm": 1.4483712097408514, "learning_rate": 1.1988550069266912e-06, "loss": 0.4796431064605713, "step": 6260 }, { "epoch": 1.701358695652174, "grad_norm": 1.1867593713021323, "learning_rate": 1.1967227329291142e-06, "loss": 0.4254031479358673, "step": 6261 }, { "epoch": 1.7016304347826088, "grad_norm": 1.2228673565000483, "learning_rate": 1.1945922361518935e-06, "loss": 0.4152982831001282, "step": 6262 }, { "epoch": 1.7019021739130435, "grad_norm": 1.2983605120836468, "learning_rate": 1.1924635170251386e-06, "loss": 0.3777306079864502, "step": 6263 }, { "epoch": 1.7021739130434783, "grad_norm": 1.1329070491115918, "learning_rate": 1.1903365759786001e-06, "loss": 0.4019628167152405, "step": 6264 }, { "epoch": 1.702445652173913, "grad_norm": 1.3666617558411216, "learning_rate": 1.188211413441669e-06, "loss": 0.42512446641921997, "step": 6265 }, { "epoch": 1.7027173913043478, "grad_norm": 1.0875623690884773, "learning_rate": 1.186088029843373e-06, "loss": 0.35521191358566284, "step": 6266 }, { "epoch": 1.7029891304347826, "grad_norm": 1.3159260530553858, "learning_rate": 1.1839664256123863e-06, "loss": 0.45122838020324707, "step": 6267 }, { "epoch": 1.7032608695652174, "grad_norm": 1.2790983796323334, "learning_rate": 1.1818466011770246e-06, "loss": 0.4216134548187256, "step": 6268 }, { "epoch": 1.7035326086956522, "grad_norm": 1.3501593841674808, "learning_rate": 1.1797285569652372e-06, "loss": 0.4549408555030823, "step": 6269 }, { "epoch": 1.703804347826087, "grad_norm": 1.2964854247416187, "learning_rate": 1.1776122934046198e-06, "loss": 0.4355596899986267, "step": 6270 }, { "epoch": 1.7040760869565217, "grad_norm": 1.2923664887846722, "learning_rate": 1.175497810922408e-06, "loss": 0.44985994696617126, "step": 6271 }, { "epoch": 1.7043478260869565, "grad_norm": 1.265991181996663, "learning_rate": 1.1733851099454795e-06, "loss": 0.3763517439365387, "step": 6272 }, { "epoch": 1.7046195652173912, "grad_norm": 1.2022502023838533, "learning_rate": 1.1712741909003444e-06, "loss": 0.36669814586639404, "step": 6273 }, { "epoch": 1.704891304347826, "grad_norm": 1.1489122694741596, "learning_rate": 1.1691650542131627e-06, "loss": 0.3409048914909363, "step": 6274 }, { "epoch": 1.7051630434782608, "grad_norm": 1.555908293904493, "learning_rate": 1.1670577003097306e-06, "loss": 0.5036498308181763, "step": 6275 }, { "epoch": 1.7054347826086955, "grad_norm": 1.3149093490163035, "learning_rate": 1.1649521296154787e-06, "loss": 0.44706785678863525, "step": 6276 }, { "epoch": 1.7057065217391303, "grad_norm": 1.381990057716987, "learning_rate": 1.1628483425554905e-06, "loss": 0.44929802417755127, "step": 6277 }, { "epoch": 1.705978260869565, "grad_norm": 0.8265976778164676, "learning_rate": 1.1607463395544782e-06, "loss": 0.1969141960144043, "step": 6278 }, { "epoch": 1.70625, "grad_norm": 1.1998965652442215, "learning_rate": 1.1586461210367995e-06, "loss": 0.4519397020339966, "step": 6279 }, { "epoch": 1.7065217391304348, "grad_norm": 1.2324730593969613, "learning_rate": 1.1565476874264448e-06, "loss": 0.3867868185043335, "step": 6280 }, { "epoch": 1.7067934782608696, "grad_norm": 1.223440864816665, "learning_rate": 1.1544510391470575e-06, "loss": 0.4371323585510254, "step": 6281 }, { "epoch": 1.7070652173913043, "grad_norm": 1.273541397818021, "learning_rate": 1.1523561766219081e-06, "loss": 0.35778361558914185, "step": 6282 }, { "epoch": 1.7073369565217391, "grad_norm": 1.3340377156584728, "learning_rate": 1.1502631002739084e-06, "loss": 0.409891277551651, "step": 6283 }, { "epoch": 1.7076086956521739, "grad_norm": 1.3862882404488353, "learning_rate": 1.148171810525618e-06, "loss": 0.42388683557510376, "step": 6284 }, { "epoch": 1.7078804347826086, "grad_norm": 1.3629671370946874, "learning_rate": 1.1460823077992256e-06, "loss": 0.4913683533668518, "step": 6285 }, { "epoch": 1.7081521739130436, "grad_norm": 0.980526256275546, "learning_rate": 1.1439945925165642e-06, "loss": 0.28597310185432434, "step": 6286 }, { "epoch": 1.7084239130434784, "grad_norm": 1.1483387555299729, "learning_rate": 1.1419086650991063e-06, "loss": 0.3842662572860718, "step": 6287 }, { "epoch": 1.7086956521739132, "grad_norm": 1.3642076801574234, "learning_rate": 1.139824525967964e-06, "loss": 0.4378196597099304, "step": 6288 }, { "epoch": 1.708967391304348, "grad_norm": 1.270627761262197, "learning_rate": 1.1377421755438834e-06, "loss": 0.40804386138916016, "step": 6289 }, { "epoch": 1.7092391304347827, "grad_norm": 1.1502342005104256, "learning_rate": 1.1356616142472532e-06, "loss": 0.43929851055145264, "step": 6290 }, { "epoch": 1.7095108695652175, "grad_norm": 1.2495789497099377, "learning_rate": 1.1335828424981043e-06, "loss": 0.4462825059890747, "step": 6291 }, { "epoch": 1.7097826086956522, "grad_norm": 1.2497676502672515, "learning_rate": 1.131505860716099e-06, "loss": 0.35730940103530884, "step": 6292 }, { "epoch": 1.710054347826087, "grad_norm": 1.4486697913203237, "learning_rate": 1.1294306693205426e-06, "loss": 0.45729637145996094, "step": 6293 }, { "epoch": 1.7103260869565218, "grad_norm": 1.1961421352665549, "learning_rate": 1.127357268730378e-06, "loss": 0.3668016791343689, "step": 6294 }, { "epoch": 1.7105978260869565, "grad_norm": 1.1427183606604185, "learning_rate": 1.1252856593641892e-06, "loss": 0.3911275863647461, "step": 6295 }, { "epoch": 1.7108695652173913, "grad_norm": 1.3068157920691539, "learning_rate": 1.1232158416401905e-06, "loss": 0.4194742739200592, "step": 6296 }, { "epoch": 1.711141304347826, "grad_norm": 1.0100760938724354, "learning_rate": 1.121147815976248e-06, "loss": 0.32502713799476624, "step": 6297 }, { "epoch": 1.7114130434782608, "grad_norm": 1.2322567215135956, "learning_rate": 1.1190815827898526e-06, "loss": 0.3537047505378723, "step": 6298 }, { "epoch": 1.7116847826086956, "grad_norm": 1.4382608293388912, "learning_rate": 1.1170171424981403e-06, "loss": 0.4463391900062561, "step": 6299 }, { "epoch": 1.7119565217391304, "grad_norm": 1.1635295647606116, "learning_rate": 1.114954495517886e-06, "loss": 0.3492954969406128, "step": 6300 }, { "epoch": 1.7122282608695651, "grad_norm": 1.3623433280485018, "learning_rate": 1.1128936422654957e-06, "loss": 0.42772674560546875, "step": 6301 }, { "epoch": 1.7125, "grad_norm": 1.429661864312948, "learning_rate": 1.110834583157021e-06, "loss": 0.4947437047958374, "step": 6302 }, { "epoch": 1.7127717391304347, "grad_norm": 1.111441433358299, "learning_rate": 1.1087773186081474e-06, "loss": 0.3191300630569458, "step": 6303 }, { "epoch": 1.7130434782608694, "grad_norm": 1.4358920374148771, "learning_rate": 1.1067218490342014e-06, "loss": 0.37196987867355347, "step": 6304 }, { "epoch": 1.7133152173913042, "grad_norm": 1.0861228898488424, "learning_rate": 1.1046681748501409e-06, "loss": 0.3571450412273407, "step": 6305 }, { "epoch": 1.713586956521739, "grad_norm": 1.176166183098062, "learning_rate": 1.1026162964705656e-06, "loss": 0.3507373332977295, "step": 6306 }, { "epoch": 1.7138586956521737, "grad_norm": 1.3895486170613842, "learning_rate": 1.100566214309714e-06, "loss": 0.4429613947868347, "step": 6307 }, { "epoch": 1.7141304347826087, "grad_norm": 1.236057459579732, "learning_rate": 1.0985179287814607e-06, "loss": 0.4398201107978821, "step": 6308 }, { "epoch": 1.7144021739130435, "grad_norm": 1.0970802912058062, "learning_rate": 1.0964714402993137e-06, "loss": 0.2864857614040375, "step": 6309 }, { "epoch": 1.7146739130434783, "grad_norm": 1.2885377691877273, "learning_rate": 1.0944267492764237e-06, "loss": 0.42007848620414734, "step": 6310 }, { "epoch": 1.714945652173913, "grad_norm": 1.2979830721793675, "learning_rate": 1.0923838561255784e-06, "loss": 0.42163902521133423, "step": 6311 }, { "epoch": 1.7152173913043478, "grad_norm": 1.4539778201623264, "learning_rate": 1.0903427612591954e-06, "loss": 0.46728914976119995, "step": 6312 }, { "epoch": 1.7154891304347826, "grad_norm": 1.2428637956752293, "learning_rate": 1.0883034650893376e-06, "loss": 0.3854687213897705, "step": 6313 }, { "epoch": 1.7157608695652173, "grad_norm": 1.2411234926205639, "learning_rate": 1.086265968027701e-06, "loss": 0.4480699598789215, "step": 6314 }, { "epoch": 1.7160326086956523, "grad_norm": 1.305660315971789, "learning_rate": 1.0842302704856201e-06, "loss": 0.3967115879058838, "step": 6315 }, { "epoch": 1.716304347826087, "grad_norm": 1.2819058462107555, "learning_rate": 1.0821963728740626e-06, "loss": 0.39957931637763977, "step": 6316 }, { "epoch": 1.7165760869565219, "grad_norm": 1.5786532654830023, "learning_rate": 1.0801642756036367e-06, "loss": 0.5068488121032715, "step": 6317 }, { "epoch": 1.7168478260869566, "grad_norm": 0.9998212871108051, "learning_rate": 1.0781339790845868e-06, "loss": 0.2959609031677246, "step": 6318 }, { "epoch": 1.7171195652173914, "grad_norm": 1.3441692769701779, "learning_rate": 1.076105483726787e-06, "loss": 0.43711453676223755, "step": 6319 }, { "epoch": 1.7173913043478262, "grad_norm": 1.2510136624409458, "learning_rate": 1.0740787899397609e-06, "loss": 0.41684386134147644, "step": 6320 }, { "epoch": 1.717663043478261, "grad_norm": 1.2820195521263806, "learning_rate": 1.0720538981326557e-06, "loss": 0.3866145610809326, "step": 6321 }, { "epoch": 1.7179347826086957, "grad_norm": 1.3009221550875139, "learning_rate": 1.0700308087142608e-06, "loss": 0.4442916214466095, "step": 6322 }, { "epoch": 1.7182065217391305, "grad_norm": 1.3165187911204204, "learning_rate": 1.0680095220930032e-06, "loss": 0.43433523178100586, "step": 6323 }, { "epoch": 1.7184782608695652, "grad_norm": 1.4068861756893987, "learning_rate": 1.065990038676943e-06, "loss": 0.43671923875808716, "step": 6324 }, { "epoch": 1.71875, "grad_norm": 1.1741286768468522, "learning_rate": 1.0639723588737738e-06, "loss": 0.40183067321777344, "step": 6325 }, { "epoch": 1.7190217391304348, "grad_norm": 1.3250058372543645, "learning_rate": 1.0619564830908303e-06, "loss": 0.414656400680542, "step": 6326 }, { "epoch": 1.7192934782608695, "grad_norm": 1.3781725312738002, "learning_rate": 1.0599424117350832e-06, "loss": 0.4622865915298462, "step": 6327 }, { "epoch": 1.7195652173913043, "grad_norm": 1.4899133603607058, "learning_rate": 1.057930145213132e-06, "loss": 0.4365535378456116, "step": 6328 }, { "epoch": 1.719836956521739, "grad_norm": 1.2964954644532636, "learning_rate": 1.0559196839312192e-06, "loss": 0.42742326855659485, "step": 6329 }, { "epoch": 1.7201086956521738, "grad_norm": 1.1780082602496031, "learning_rate": 1.0539110282952191e-06, "loss": 0.3510274887084961, "step": 6330 }, { "epoch": 1.7203804347826086, "grad_norm": 1.093922784151843, "learning_rate": 1.0519041787106453e-06, "loss": 0.34080591797828674, "step": 6331 }, { "epoch": 1.7206521739130434, "grad_norm": 1.3761428562385458, "learning_rate": 1.0498991355826372e-06, "loss": 0.4100366532802582, "step": 6332 }, { "epoch": 1.7209239130434781, "grad_norm": 1.4037401630588298, "learning_rate": 1.0478958993159848e-06, "loss": 0.3968888223171234, "step": 6333 }, { "epoch": 1.721195652173913, "grad_norm": 1.3571326548939713, "learning_rate": 1.0458944703151008e-06, "loss": 0.49682432413101196, "step": 6334 }, { "epoch": 1.7214673913043477, "grad_norm": 1.4320493048542462, "learning_rate": 1.0438948489840327e-06, "loss": 0.451873242855072, "step": 6335 }, { "epoch": 1.7217391304347827, "grad_norm": 1.2950001829625475, "learning_rate": 1.0418970357264757e-06, "loss": 0.3506561517715454, "step": 6336 }, { "epoch": 1.7220108695652174, "grad_norm": 1.1094510094553, "learning_rate": 1.0399010309457459e-06, "loss": 0.34114933013916016, "step": 6337 }, { "epoch": 1.7222826086956522, "grad_norm": 1.5665483631301562, "learning_rate": 1.0379068350448019e-06, "loss": 0.4312995672225952, "step": 6338 }, { "epoch": 1.722554347826087, "grad_norm": 1.1801751539104202, "learning_rate": 1.035914448426235e-06, "loss": 0.376930296421051, "step": 6339 }, { "epoch": 1.7228260869565217, "grad_norm": 1.2080769872539185, "learning_rate": 1.033923871492274e-06, "loss": 0.3966860771179199, "step": 6340 }, { "epoch": 1.7230978260869565, "grad_norm": 1.3426070735060185, "learning_rate": 1.0319351046447756e-06, "loss": 0.43293145298957825, "step": 6341 }, { "epoch": 1.7233695652173913, "grad_norm": 1.370210102713611, "learning_rate": 1.029948148285237e-06, "loss": 0.36725693941116333, "step": 6342 }, { "epoch": 1.7236413043478263, "grad_norm": 1.559974438582686, "learning_rate": 1.0279630028147913e-06, "loss": 0.44929465651512146, "step": 6343 }, { "epoch": 1.723913043478261, "grad_norm": 1.104819558442932, "learning_rate": 1.0259796686341972e-06, "loss": 0.332186758518219, "step": 6344 }, { "epoch": 1.7241847826086958, "grad_norm": 1.328485650384869, "learning_rate": 1.0239981461438574e-06, "loss": 0.44715452194213867, "step": 6345 }, { "epoch": 1.7244565217391306, "grad_norm": 1.326456244500988, "learning_rate": 1.0220184357438057e-06, "loss": 0.4068800210952759, "step": 6346 }, { "epoch": 1.7247282608695653, "grad_norm": 1.278880965394144, "learning_rate": 1.0200405378337087e-06, "loss": 0.37461429834365845, "step": 6347 }, { "epoch": 1.725, "grad_norm": 1.1860904892604394, "learning_rate": 1.0180644528128648e-06, "loss": 0.37641382217407227, "step": 6348 }, { "epoch": 1.7252717391304349, "grad_norm": 1.3261764505630895, "learning_rate": 1.0160901810802114e-06, "loss": 0.4983111023902893, "step": 6349 }, { "epoch": 1.7255434782608696, "grad_norm": 1.2412482962098506, "learning_rate": 1.0141177230343203e-06, "loss": 0.42261648178100586, "step": 6350 }, { "epoch": 1.7258152173913044, "grad_norm": 1.3186166609360028, "learning_rate": 1.0121470790733912e-06, "loss": 0.45458829402923584, "step": 6351 }, { "epoch": 1.7260869565217392, "grad_norm": 1.1753729959969321, "learning_rate": 1.0101782495952605e-06, "loss": 0.39913684129714966, "step": 6352 }, { "epoch": 1.726358695652174, "grad_norm": 1.177852709018174, "learning_rate": 1.0082112349974017e-06, "loss": 0.37900659441947937, "step": 6353 }, { "epoch": 1.7266304347826087, "grad_norm": 1.3629416773107887, "learning_rate": 1.0062460356769189e-06, "loss": 0.5157626867294312, "step": 6354 }, { "epoch": 1.7269021739130435, "grad_norm": 1.1692496704203195, "learning_rate": 1.0042826520305449e-06, "loss": 0.3288397789001465, "step": 6355 }, { "epoch": 1.7271739130434782, "grad_norm": 1.0730553784794539, "learning_rate": 1.0023210844546582e-06, "loss": 0.3715934157371521, "step": 6356 }, { "epoch": 1.727445652173913, "grad_norm": 1.4515977858238713, "learning_rate": 1.0003613333452578e-06, "loss": 0.41189777851104736, "step": 6357 }, { "epoch": 1.7277173913043478, "grad_norm": 1.240866423629183, "learning_rate": 9.984033990979835e-07, "loss": 0.3449782729148865, "step": 6358 }, { "epoch": 1.7279891304347825, "grad_norm": 1.3479626035759356, "learning_rate": 9.964472821081084e-07, "loss": 0.40145599842071533, "step": 6359 }, { "epoch": 1.7282608695652173, "grad_norm": 1.483408088177268, "learning_rate": 9.944929827705329e-07, "loss": 0.4907209575176239, "step": 6360 }, { "epoch": 1.728532608695652, "grad_norm": 1.0039202473616222, "learning_rate": 9.925405014797962e-07, "loss": 0.27869224548339844, "step": 6361 }, { "epoch": 1.7288043478260868, "grad_norm": 1.4632383548664711, "learning_rate": 9.905898386300672e-07, "loss": 0.3890642523765564, "step": 6362 }, { "epoch": 1.7290760869565216, "grad_norm": 1.2478621335617588, "learning_rate": 9.88640994615152e-07, "loss": 0.3445316553115845, "step": 6363 }, { "epoch": 1.7293478260869564, "grad_norm": 1.274199039410826, "learning_rate": 9.866939698284838e-07, "loss": 0.41795891523361206, "step": 6364 }, { "epoch": 1.7296195652173914, "grad_norm": 1.1662634405072188, "learning_rate": 9.847487646631304e-07, "loss": 0.40664398670196533, "step": 6365 }, { "epoch": 1.7298913043478261, "grad_norm": 1.2257858859073887, "learning_rate": 9.828053795117954e-07, "loss": 0.413178414106369, "step": 6366 }, { "epoch": 1.7301630434782609, "grad_norm": 1.2772027412975353, "learning_rate": 9.808638147668137e-07, "loss": 0.35274940729141235, "step": 6367 }, { "epoch": 1.7304347826086957, "grad_norm": 1.202835659905609, "learning_rate": 9.789240708201474e-07, "loss": 0.4071248173713684, "step": 6368 }, { "epoch": 1.7307065217391304, "grad_norm": 1.1421123091217564, "learning_rate": 9.76986148063398e-07, "loss": 0.3664047122001648, "step": 6369 }, { "epoch": 1.7309782608695652, "grad_norm": 1.3763045404986722, "learning_rate": 9.750500468877988e-07, "loss": 0.4100631773471832, "step": 6370 }, { "epoch": 1.73125, "grad_norm": 1.4683641611465224, "learning_rate": 9.731157676842063e-07, "loss": 0.47433608770370483, "step": 6371 }, { "epoch": 1.731521739130435, "grad_norm": 1.126663137167698, "learning_rate": 9.711833108431234e-07, "loss": 0.3170397877693176, "step": 6372 }, { "epoch": 1.7317934782608697, "grad_norm": 1.2493983920564773, "learning_rate": 9.692526767546727e-07, "loss": 0.4091048538684845, "step": 6373 }, { "epoch": 1.7320652173913045, "grad_norm": 1.293045087101998, "learning_rate": 9.673238658086182e-07, "loss": 0.4596250057220459, "step": 6374 }, { "epoch": 1.7323369565217392, "grad_norm": 1.2845994576060575, "learning_rate": 9.653968783943457e-07, "loss": 0.3872515559196472, "step": 6375 }, { "epoch": 1.732608695652174, "grad_norm": 1.3407970029250953, "learning_rate": 9.634717149008855e-07, "loss": 0.4572744369506836, "step": 6376 }, { "epoch": 1.7328804347826088, "grad_norm": 1.0884039155870895, "learning_rate": 9.615483757168887e-07, "loss": 0.34565794467926025, "step": 6377 }, { "epoch": 1.7331521739130435, "grad_norm": 1.3169595603158917, "learning_rate": 9.596268612306393e-07, "loss": 0.431821346282959, "step": 6378 }, { "epoch": 1.7334239130434783, "grad_norm": 1.2590157283041203, "learning_rate": 9.577071718300635e-07, "loss": 0.4537539482116699, "step": 6379 }, { "epoch": 1.733695652173913, "grad_norm": 1.1785039823177657, "learning_rate": 9.557893079027047e-07, "loss": 0.31239479780197144, "step": 6380 }, { "epoch": 1.7339673913043478, "grad_norm": 1.3976080000994358, "learning_rate": 9.538732698357477e-07, "loss": 0.4342100918292999, "step": 6381 }, { "epoch": 1.7342391304347826, "grad_norm": 1.4287773906954089, "learning_rate": 9.519590580160054e-07, "loss": 0.479745090007782, "step": 6382 }, { "epoch": 1.7345108695652174, "grad_norm": 1.0593178581130585, "learning_rate": 9.500466728299229e-07, "loss": 0.36036449670791626, "step": 6383 }, { "epoch": 1.7347826086956522, "grad_norm": 1.0970709794160778, "learning_rate": 9.481361146635726e-07, "loss": 0.2834376096725464, "step": 6384 }, { "epoch": 1.735054347826087, "grad_norm": 1.3189154653029773, "learning_rate": 9.462273839026625e-07, "loss": 0.4522762596607208, "step": 6385 }, { "epoch": 1.7353260869565217, "grad_norm": 1.390308437139984, "learning_rate": 9.443204809325335e-07, "loss": 0.41818732023239136, "step": 6386 }, { "epoch": 1.7355978260869565, "grad_norm": 1.3076108539874167, "learning_rate": 9.424154061381496e-07, "loss": 0.42663663625717163, "step": 6387 }, { "epoch": 1.7358695652173912, "grad_norm": 1.1412090991826418, "learning_rate": 9.405121599041123e-07, "loss": 0.3688306212425232, "step": 6388 }, { "epoch": 1.736141304347826, "grad_norm": 1.2403486541200661, "learning_rate": 9.38610742614653e-07, "loss": 0.38340121507644653, "step": 6389 }, { "epoch": 1.7364130434782608, "grad_norm": 1.2664716587911324, "learning_rate": 9.367111546536334e-07, "loss": 0.4142090082168579, "step": 6390 }, { "epoch": 1.7366847826086955, "grad_norm": 1.0523139952292884, "learning_rate": 9.348133964045414e-07, "loss": 0.3241225481033325, "step": 6391 }, { "epoch": 1.7369565217391303, "grad_norm": 1.3908301245709345, "learning_rate": 9.32917468250506e-07, "loss": 0.4067414402961731, "step": 6392 }, { "epoch": 1.737228260869565, "grad_norm": 1.065466571356253, "learning_rate": 9.310233705742777e-07, "loss": 0.3440515995025635, "step": 6393 }, { "epoch": 1.7375, "grad_norm": 1.2800801803569206, "learning_rate": 9.291311037582362e-07, "loss": 0.40284937620162964, "step": 6394 }, { "epoch": 1.7377717391304348, "grad_norm": 1.5020382789714817, "learning_rate": 9.272406681844015e-07, "loss": 0.43881332874298096, "step": 6395 }, { "epoch": 1.7380434782608696, "grad_norm": 1.3257646602321222, "learning_rate": 9.253520642344139e-07, "loss": 0.4118295907974243, "step": 6396 }, { "epoch": 1.7383152173913043, "grad_norm": 1.3220042639537521, "learning_rate": 9.234652922895493e-07, "loss": 0.394356369972229, "step": 6397 }, { "epoch": 1.7385869565217391, "grad_norm": 1.3533402182935241, "learning_rate": 9.215803527307121e-07, "loss": 0.49017661809921265, "step": 6398 }, { "epoch": 1.7388586956521739, "grad_norm": 1.4978775441477348, "learning_rate": 9.19697245938439e-07, "loss": 0.4680248498916626, "step": 6399 }, { "epoch": 1.7391304347826086, "grad_norm": 1.3138948872531981, "learning_rate": 9.178159722928914e-07, "loss": 0.40369343757629395, "step": 6400 }, { "epoch": 1.7394021739130436, "grad_norm": 1.4546778308730135, "learning_rate": 9.159365321738655e-07, "loss": 0.4459415078163147, "step": 6401 }, { "epoch": 1.7396739130434784, "grad_norm": 1.2068556112265922, "learning_rate": 9.140589259607879e-07, "loss": 0.4044276773929596, "step": 6402 }, { "epoch": 1.7399456521739132, "grad_norm": 1.2265862681158504, "learning_rate": 9.121831540327086e-07, "loss": 0.3078470826148987, "step": 6403 }, { "epoch": 1.740217391304348, "grad_norm": 1.3714495164477525, "learning_rate": 9.103092167683136e-07, "loss": 0.4681811034679413, "step": 6404 }, { "epoch": 1.7404891304347827, "grad_norm": 1.2566589881111847, "learning_rate": 9.084371145459181e-07, "loss": 0.34267887473106384, "step": 6405 }, { "epoch": 1.7407608695652175, "grad_norm": 1.27436405677574, "learning_rate": 9.065668477434652e-07, "loss": 0.34383660554885864, "step": 6406 }, { "epoch": 1.7410326086956522, "grad_norm": 1.00504119923953, "learning_rate": 9.046984167385242e-07, "loss": 0.3211309313774109, "step": 6407 }, { "epoch": 1.741304347826087, "grad_norm": 1.2409495497384062, "learning_rate": 9.028318219082987e-07, "loss": 0.3370603024959564, "step": 6408 }, { "epoch": 1.7415760869565218, "grad_norm": 1.2490350801250585, "learning_rate": 9.009670636296209e-07, "loss": 0.3790801167488098, "step": 6409 }, { "epoch": 1.7418478260869565, "grad_norm": 1.455000292374808, "learning_rate": 8.991041422789526e-07, "loss": 0.5168970227241516, "step": 6410 }, { "epoch": 1.7421195652173913, "grad_norm": 1.305800712889706, "learning_rate": 8.972430582323788e-07, "loss": 0.426724910736084, "step": 6411 }, { "epoch": 1.742391304347826, "grad_norm": 1.3667745838475416, "learning_rate": 8.953838118656222e-07, "loss": 0.4540995955467224, "step": 6412 }, { "epoch": 1.7426630434782608, "grad_norm": 1.4895059276165976, "learning_rate": 8.935264035540314e-07, "loss": 0.4673028588294983, "step": 6413 }, { "epoch": 1.7429347826086956, "grad_norm": 1.200645822007807, "learning_rate": 8.916708336725777e-07, "loss": 0.39798545837402344, "step": 6414 }, { "epoch": 1.7432065217391304, "grad_norm": 1.3467947730061747, "learning_rate": 8.898171025958735e-07, "loss": 0.42928045988082886, "step": 6415 }, { "epoch": 1.7434782608695651, "grad_norm": 1.2522539710903373, "learning_rate": 8.879652106981473e-07, "loss": 0.35034048557281494, "step": 6416 }, { "epoch": 1.74375, "grad_norm": 1.2095743967475625, "learning_rate": 8.861151583532657e-07, "loss": 0.3576541841030121, "step": 6417 }, { "epoch": 1.7440217391304347, "grad_norm": 1.177121120004644, "learning_rate": 8.842669459347186e-07, "loss": 0.43957236409187317, "step": 6418 }, { "epoch": 1.7442934782608694, "grad_norm": 1.3563140727884608, "learning_rate": 8.824205738156299e-07, "loss": 0.38865941762924194, "step": 6419 }, { "epoch": 1.7445652173913042, "grad_norm": 1.2981133850150575, "learning_rate": 8.805760423687426e-07, "loss": 0.35996124148368835, "step": 6420 }, { "epoch": 1.744836956521739, "grad_norm": 1.0730097549075317, "learning_rate": 8.787333519664365e-07, "loss": 0.30602383613586426, "step": 6421 }, { "epoch": 1.7451086956521737, "grad_norm": 1.1416442546844054, "learning_rate": 8.768925029807185e-07, "loss": 0.37967631220817566, "step": 6422 }, { "epoch": 1.7453804347826087, "grad_norm": 1.3189600801322223, "learning_rate": 8.75053495783219e-07, "loss": 0.3879662752151489, "step": 6423 }, { "epoch": 1.7456521739130435, "grad_norm": 1.2354353325129073, "learning_rate": 8.732163307452024e-07, "loss": 0.3699796199798584, "step": 6424 }, { "epoch": 1.7459239130434783, "grad_norm": 1.4623271102068933, "learning_rate": 8.713810082375573e-07, "loss": 0.42244505882263184, "step": 6425 }, { "epoch": 1.746195652173913, "grad_norm": 1.2144294600633851, "learning_rate": 8.695475286308053e-07, "loss": 0.3687070906162262, "step": 6426 }, { "epoch": 1.7464673913043478, "grad_norm": 1.177859118783923, "learning_rate": 8.677158922950846e-07, "loss": 0.37554821372032166, "step": 6427 }, { "epoch": 1.7467391304347826, "grad_norm": 1.335023648216821, "learning_rate": 8.658860996001772e-07, "loss": 0.3942204713821411, "step": 6428 }, { "epoch": 1.7470108695652173, "grad_norm": 1.3718241876432828, "learning_rate": 8.640581509154821e-07, "loss": 0.3936592638492584, "step": 6429 }, { "epoch": 1.7472826086956523, "grad_norm": 1.2178353808550808, "learning_rate": 8.622320466100242e-07, "loss": 0.3819931745529175, "step": 6430 }, { "epoch": 1.747554347826087, "grad_norm": 0.9289555895777456, "learning_rate": 8.604077870524674e-07, "loss": 0.2505936622619629, "step": 6431 }, { "epoch": 1.7478260869565219, "grad_norm": 1.2935542875362478, "learning_rate": 8.585853726110904e-07, "loss": 0.42270535230636597, "step": 6432 }, { "epoch": 1.7480978260869566, "grad_norm": 1.230043077758474, "learning_rate": 8.56764803653809e-07, "loss": 0.3910095989704132, "step": 6433 }, { "epoch": 1.7483695652173914, "grad_norm": 1.2170126658783738, "learning_rate": 8.549460805481613e-07, "loss": 0.3569868803024292, "step": 6434 }, { "epoch": 1.7486413043478262, "grad_norm": 1.0948230952127063, "learning_rate": 8.531292036613159e-07, "loss": 0.3575572371482849, "step": 6435 }, { "epoch": 1.748913043478261, "grad_norm": 1.116194292104388, "learning_rate": 8.513141733600638e-07, "loss": 0.3273870646953583, "step": 6436 }, { "epoch": 1.7491847826086957, "grad_norm": 1.0788237895498758, "learning_rate": 8.495009900108275e-07, "loss": 0.3203873038291931, "step": 6437 }, { "epoch": 1.7494565217391305, "grad_norm": 1.2307124552366855, "learning_rate": 8.476896539796586e-07, "loss": 0.38422656059265137, "step": 6438 }, { "epoch": 1.7497282608695652, "grad_norm": 1.5038533723879333, "learning_rate": 8.45880165632228e-07, "loss": 0.48469436168670654, "step": 6439 }, { "epoch": 1.75, "grad_norm": 1.2810698634204443, "learning_rate": 8.440725253338411e-07, "loss": 0.37326210737228394, "step": 6440 }, { "epoch": 1.7502717391304348, "grad_norm": 1.3206349075545243, "learning_rate": 8.42266733449425e-07, "loss": 0.3975270688533783, "step": 6441 }, { "epoch": 1.7505434782608695, "grad_norm": 1.2618888652417226, "learning_rate": 8.404627903435403e-07, "loss": 0.36648494005203247, "step": 6442 }, { "epoch": 1.7508152173913043, "grad_norm": 1.2330540485827048, "learning_rate": 8.386606963803656e-07, "loss": 0.41552281379699707, "step": 6443 }, { "epoch": 1.751086956521739, "grad_norm": 1.297448473441436, "learning_rate": 8.368604519237123e-07, "loss": 0.3873337507247925, "step": 6444 }, { "epoch": 1.7513586956521738, "grad_norm": 1.2707836578907417, "learning_rate": 8.350620573370183e-07, "loss": 0.4562046229839325, "step": 6445 }, { "epoch": 1.7516304347826086, "grad_norm": 1.3302267584943452, "learning_rate": 8.332655129833434e-07, "loss": 0.46844589710235596, "step": 6446 }, { "epoch": 1.7519021739130434, "grad_norm": 1.4115721942375574, "learning_rate": 8.314708192253795e-07, "loss": 0.5137619376182556, "step": 6447 }, { "epoch": 1.7521739130434781, "grad_norm": 1.2588458531382443, "learning_rate": 8.296779764254403e-07, "loss": 0.3627421259880066, "step": 6448 }, { "epoch": 1.752445652173913, "grad_norm": 1.3245849121482463, "learning_rate": 8.278869849454718e-07, "loss": 0.4418327212333679, "step": 6449 }, { "epoch": 1.7527173913043477, "grad_norm": 1.0763815767852798, "learning_rate": 8.260978451470347e-07, "loss": 0.32027938961982727, "step": 6450 }, { "epoch": 1.7529891304347827, "grad_norm": 1.503389951018683, "learning_rate": 8.243105573913335e-07, "loss": 0.54095059633255, "step": 6451 }, { "epoch": 1.7532608695652174, "grad_norm": 1.3308555499790806, "learning_rate": 8.225251220391817e-07, "loss": 0.4256086051464081, "step": 6452 }, { "epoch": 1.7535326086956522, "grad_norm": 1.0587107884792086, "learning_rate": 8.207415394510276e-07, "loss": 0.30896487832069397, "step": 6453 }, { "epoch": 1.753804347826087, "grad_norm": 1.1856344995111632, "learning_rate": 8.189598099869467e-07, "loss": 0.4240509271621704, "step": 6454 }, { "epoch": 1.7540760869565217, "grad_norm": 1.393608171178929, "learning_rate": 8.171799340066333e-07, "loss": 0.39915192127227783, "step": 6455 }, { "epoch": 1.7543478260869565, "grad_norm": 1.4081622452938083, "learning_rate": 8.154019118694135e-07, "loss": 0.45851388573646545, "step": 6456 }, { "epoch": 1.7546195652173913, "grad_norm": 1.5318757398925307, "learning_rate": 8.136257439342377e-07, "loss": 0.34482502937316895, "step": 6457 }, { "epoch": 1.7548913043478263, "grad_norm": 1.3619826781586317, "learning_rate": 8.118514305596836e-07, "loss": 0.40280887484550476, "step": 6458 }, { "epoch": 1.755163043478261, "grad_norm": 1.350908371481213, "learning_rate": 8.100789721039481e-07, "loss": 0.44099730253219604, "step": 6459 }, { "epoch": 1.7554347826086958, "grad_norm": 1.144193883357892, "learning_rate": 8.083083689248606e-07, "loss": 0.3947850465774536, "step": 6460 }, { "epoch": 1.7557065217391306, "grad_norm": 1.2523287283746118, "learning_rate": 8.065396213798737e-07, "loss": 0.4210466146469116, "step": 6461 }, { "epoch": 1.7559782608695653, "grad_norm": 1.2210449651947115, "learning_rate": 8.047727298260655e-07, "loss": 0.45160719752311707, "step": 6462 }, { "epoch": 1.75625, "grad_norm": 1.1879529239095352, "learning_rate": 8.030076946201371e-07, "loss": 0.37630486488342285, "step": 6463 }, { "epoch": 1.7565217391304349, "grad_norm": 1.185691718181053, "learning_rate": 8.012445161184179e-07, "loss": 0.33790481090545654, "step": 6464 }, { "epoch": 1.7567934782608696, "grad_norm": 1.904438783532776, "learning_rate": 7.994831946768622e-07, "loss": 0.4453454613685608, "step": 6465 }, { "epoch": 1.7570652173913044, "grad_norm": 1.2036305467440882, "learning_rate": 7.977237306510455e-07, "loss": 0.3966543674468994, "step": 6466 }, { "epoch": 1.7573369565217392, "grad_norm": 1.2967209506771633, "learning_rate": 7.959661243961759e-07, "loss": 0.44759246706962585, "step": 6467 }, { "epoch": 1.757608695652174, "grad_norm": 1.3524537841267235, "learning_rate": 7.942103762670783e-07, "loss": 0.45394641160964966, "step": 6468 }, { "epoch": 1.7578804347826087, "grad_norm": 1.0179689495491664, "learning_rate": 7.924564866182072e-07, "loss": 0.30790355801582336, "step": 6469 }, { "epoch": 1.7581521739130435, "grad_norm": 1.1648369959793228, "learning_rate": 7.907044558036425e-07, "loss": 0.37696489691734314, "step": 6470 }, { "epoch": 1.7584239130434782, "grad_norm": 1.2770327390935183, "learning_rate": 7.889542841770836e-07, "loss": 0.39302027225494385, "step": 6471 }, { "epoch": 1.758695652173913, "grad_norm": 1.2658225997094075, "learning_rate": 7.872059720918613e-07, "loss": 0.33490699529647827, "step": 6472 }, { "epoch": 1.7589673913043478, "grad_norm": 1.1496265494079367, "learning_rate": 7.854595199009229e-07, "loss": 0.4130322337150574, "step": 6473 }, { "epoch": 1.7592391304347825, "grad_norm": 1.3140733316563393, "learning_rate": 7.837149279568523e-07, "loss": 0.4134306013584137, "step": 6474 }, { "epoch": 1.7595108695652173, "grad_norm": 1.317535015778978, "learning_rate": 7.819721966118454e-07, "loss": 0.41580283641815186, "step": 6475 }, { "epoch": 1.759782608695652, "grad_norm": 1.4132176066018192, "learning_rate": 7.802313262177285e-07, "loss": 0.47398048639297485, "step": 6476 }, { "epoch": 1.7600543478260868, "grad_norm": 1.1405994006423714, "learning_rate": 7.784923171259529e-07, "loss": 0.3329354524612427, "step": 6477 }, { "epoch": 1.7603260869565216, "grad_norm": 1.2280719670130438, "learning_rate": 7.767551696875942e-07, "loss": 0.39578351378440857, "step": 6478 }, { "epoch": 1.7605978260869564, "grad_norm": 1.3234232366240875, "learning_rate": 7.750198842533451e-07, "loss": 0.4149225056171417, "step": 6479 }, { "epoch": 1.7608695652173914, "grad_norm": 1.3696922148199249, "learning_rate": 7.732864611735325e-07, "loss": 0.47947925329208374, "step": 6480 }, { "epoch": 1.7611413043478261, "grad_norm": 1.3334357383679356, "learning_rate": 7.715549007981026e-07, "loss": 0.4261801838874817, "step": 6481 }, { "epoch": 1.7614130434782609, "grad_norm": 1.2205993966055964, "learning_rate": 7.69825203476623e-07, "loss": 0.34497857093811035, "step": 6482 }, { "epoch": 1.7616847826086957, "grad_norm": 1.3577186757284354, "learning_rate": 7.680973695582894e-07, "loss": 0.4954586625099182, "step": 6483 }, { "epoch": 1.7619565217391304, "grad_norm": 1.4089737090087555, "learning_rate": 7.663713993919197e-07, "loss": 0.36443600058555603, "step": 6484 }, { "epoch": 1.7622282608695652, "grad_norm": 1.241677351434111, "learning_rate": 7.646472933259574e-07, "loss": 0.41167402267456055, "step": 6485 }, { "epoch": 1.7625, "grad_norm": 1.6292317578867455, "learning_rate": 7.62925051708463e-07, "loss": 0.3870077431201935, "step": 6486 }, { "epoch": 1.762771739130435, "grad_norm": 1.2039532636988177, "learning_rate": 7.612046748871327e-07, "loss": 0.3532176613807678, "step": 6487 }, { "epoch": 1.7630434782608697, "grad_norm": 1.4330591716886112, "learning_rate": 7.594861632092754e-07, "loss": 0.4987249970436096, "step": 6488 }, { "epoch": 1.7633152173913045, "grad_norm": 1.1820971137172382, "learning_rate": 7.577695170218225e-07, "loss": 0.3787515163421631, "step": 6489 }, { "epoch": 1.7635869565217392, "grad_norm": 1.2531700860989028, "learning_rate": 7.560547366713422e-07, "loss": 0.3777139484882355, "step": 6490 }, { "epoch": 1.763858695652174, "grad_norm": 1.295751756707502, "learning_rate": 7.5434182250401e-07, "loss": 0.44533848762512207, "step": 6491 }, { "epoch": 1.7641304347826088, "grad_norm": 1.3354676382738242, "learning_rate": 7.526307748656358e-07, "loss": 0.4426640272140503, "step": 6492 }, { "epoch": 1.7644021739130435, "grad_norm": 1.4084183144727205, "learning_rate": 7.509215941016479e-07, "loss": 0.42799991369247437, "step": 6493 }, { "epoch": 1.7646739130434783, "grad_norm": 1.453934446093955, "learning_rate": 7.49214280557099e-07, "loss": 0.4815438389778137, "step": 6494 }, { "epoch": 1.764945652173913, "grad_norm": 1.204032839538461, "learning_rate": 7.475088345766624e-07, "loss": 0.42606496810913086, "step": 6495 }, { "epoch": 1.7652173913043478, "grad_norm": 1.1019857616630107, "learning_rate": 7.458052565046381e-07, "loss": 0.34701165556907654, "step": 6496 }, { "epoch": 1.7654891304347826, "grad_norm": 1.0269744498838729, "learning_rate": 7.441035466849489e-07, "loss": 0.33345848321914673, "step": 6497 }, { "epoch": 1.7657608695652174, "grad_norm": 1.1435087299691575, "learning_rate": 7.424037054611343e-07, "loss": 0.3731915354728699, "step": 6498 }, { "epoch": 1.7660326086956522, "grad_norm": 1.3762037424541589, "learning_rate": 7.407057331763645e-07, "loss": 0.45776939392089844, "step": 6499 }, { "epoch": 1.766304347826087, "grad_norm": 1.1333112111483752, "learning_rate": 7.390096301734284e-07, "loss": 0.36082205176353455, "step": 6500 }, { "epoch": 1.7665760869565217, "grad_norm": 1.3699090909206353, "learning_rate": 7.373153967947389e-07, "loss": 0.47391605377197266, "step": 6501 }, { "epoch": 1.7668478260869565, "grad_norm": 1.5498606977969835, "learning_rate": 7.356230333823266e-07, "loss": 0.4913460314273834, "step": 6502 }, { "epoch": 1.7671195652173912, "grad_norm": 1.2978336439799742, "learning_rate": 7.339325402778552e-07, "loss": 0.40875110030174255, "step": 6503 }, { "epoch": 1.767391304347826, "grad_norm": 1.3481627802324732, "learning_rate": 7.322439178225982e-07, "loss": 0.38745182752609253, "step": 6504 }, { "epoch": 1.7676630434782608, "grad_norm": 1.481505145457985, "learning_rate": 7.305571663574629e-07, "loss": 0.49462443590164185, "step": 6505 }, { "epoch": 1.7679347826086955, "grad_norm": 1.2784657696279293, "learning_rate": 7.288722862229691e-07, "loss": 0.43837815523147583, "step": 6506 }, { "epoch": 1.7682065217391303, "grad_norm": 1.2966881367272691, "learning_rate": 7.271892777592638e-07, "loss": 0.3815882205963135, "step": 6507 }, { "epoch": 1.768478260869565, "grad_norm": 1.34904825041361, "learning_rate": 7.255081413061194e-07, "loss": 0.43177175521850586, "step": 6508 }, { "epoch": 1.76875, "grad_norm": 1.2758502627765453, "learning_rate": 7.238288772029201e-07, "loss": 0.37710872292518616, "step": 6509 }, { "epoch": 1.7690217391304348, "grad_norm": 1.4118934246371864, "learning_rate": 7.221514857886857e-07, "loss": 0.4373759627342224, "step": 6510 }, { "epoch": 1.7692934782608696, "grad_norm": 1.2606431921126144, "learning_rate": 7.204759674020457e-07, "loss": 0.3522506356239319, "step": 6511 }, { "epoch": 1.7695652173913043, "grad_norm": 1.1518388733520994, "learning_rate": 7.18802322381258e-07, "loss": 0.3831607699394226, "step": 6512 }, { "epoch": 1.7698369565217391, "grad_norm": 1.122271506491699, "learning_rate": 7.171305510642024e-07, "loss": 0.3194677233695984, "step": 6513 }, { "epoch": 1.7701086956521739, "grad_norm": 1.3720915326501215, "learning_rate": 7.154606537883757e-07, "loss": 0.415915846824646, "step": 6514 }, { "epoch": 1.7703804347826086, "grad_norm": 1.4489022848928865, "learning_rate": 7.137926308909027e-07, "loss": 0.4543108344078064, "step": 6515 }, { "epoch": 1.7706521739130436, "grad_norm": 1.1302199058136653, "learning_rate": 7.12126482708524e-07, "loss": 0.3184998631477356, "step": 6516 }, { "epoch": 1.7709239130434784, "grad_norm": 1.3114838347458726, "learning_rate": 7.104622095776081e-07, "loss": 0.3688080608844757, "step": 6517 }, { "epoch": 1.7711956521739132, "grad_norm": 1.2632765398546777, "learning_rate": 7.087998118341366e-07, "loss": 0.3837469816207886, "step": 6518 }, { "epoch": 1.771467391304348, "grad_norm": 1.423107926187245, "learning_rate": 7.071392898137198e-07, "loss": 0.4210670590400696, "step": 6519 }, { "epoch": 1.7717391304347827, "grad_norm": 1.3760752816792459, "learning_rate": 7.05480643851586e-07, "loss": 0.4455452561378479, "step": 6520 }, { "epoch": 1.7720108695652175, "grad_norm": 1.3192342159944765, "learning_rate": 7.038238742825876e-07, "loss": 0.4018821120262146, "step": 6521 }, { "epoch": 1.7722826086956522, "grad_norm": 1.3344911503062535, "learning_rate": 7.021689814411925e-07, "loss": 0.43932706117630005, "step": 6522 }, { "epoch": 1.772554347826087, "grad_norm": 1.3456422686116183, "learning_rate": 7.005159656614957e-07, "loss": 0.42171376943588257, "step": 6523 }, { "epoch": 1.7728260869565218, "grad_norm": 1.2530013501043695, "learning_rate": 6.988648272772103e-07, "loss": 0.4073728322982788, "step": 6524 }, { "epoch": 1.7730978260869565, "grad_norm": 1.4601919894921809, "learning_rate": 6.972155666216684e-07, "loss": 0.45174771547317505, "step": 6525 }, { "epoch": 1.7733695652173913, "grad_norm": 1.1318295915121348, "learning_rate": 6.955681840278294e-07, "loss": 0.38958558440208435, "step": 6526 }, { "epoch": 1.773641304347826, "grad_norm": 1.1567734317075056, "learning_rate": 6.939226798282672e-07, "loss": 0.3447136878967285, "step": 6527 }, { "epoch": 1.7739130434782608, "grad_norm": 1.2486357372939225, "learning_rate": 6.922790543551794e-07, "loss": 0.4459841251373291, "step": 6528 }, { "epoch": 1.7741847826086956, "grad_norm": 1.3071969827373087, "learning_rate": 6.90637307940385e-07, "loss": 0.44685691595077515, "step": 6529 }, { "epoch": 1.7744565217391304, "grad_norm": 1.2845493747758308, "learning_rate": 6.889974409153221e-07, "loss": 0.42113766074180603, "step": 6530 }, { "epoch": 1.7747282608695651, "grad_norm": 1.3338321912928635, "learning_rate": 6.873594536110484e-07, "loss": 0.4573695659637451, "step": 6531 }, { "epoch": 1.775, "grad_norm": 1.298120381531395, "learning_rate": 6.857233463582435e-07, "loss": 0.392080694437027, "step": 6532 }, { "epoch": 1.7752717391304347, "grad_norm": 1.2212320441747915, "learning_rate": 6.840891194872112e-07, "loss": 0.4231026768684387, "step": 6533 }, { "epoch": 1.7755434782608694, "grad_norm": 1.3536818445512102, "learning_rate": 6.824567733278675e-07, "loss": 0.4329949617385864, "step": 6534 }, { "epoch": 1.7758152173913042, "grad_norm": 1.2946523415779823, "learning_rate": 6.808263082097544e-07, "loss": 0.4097141623497009, "step": 6535 }, { "epoch": 1.776086956521739, "grad_norm": 1.1215233686649586, "learning_rate": 6.79197724462033e-07, "loss": 0.40956687927246094, "step": 6536 }, { "epoch": 1.7763586956521737, "grad_norm": 1.4468647885883137, "learning_rate": 6.775710224134869e-07, "loss": 0.46812838315963745, "step": 6537 }, { "epoch": 1.7766304347826087, "grad_norm": 1.4167337548925034, "learning_rate": 6.759462023925134e-07, "loss": 0.4300195574760437, "step": 6538 }, { "epoch": 1.7769021739130435, "grad_norm": 1.1873382318260393, "learning_rate": 6.74323264727137e-07, "loss": 0.3726501762866974, "step": 6539 }, { "epoch": 1.7771739130434783, "grad_norm": 1.126115258122073, "learning_rate": 6.727022097449986e-07, "loss": 0.3295113444328308, "step": 6540 }, { "epoch": 1.777445652173913, "grad_norm": 1.2806340796705153, "learning_rate": 6.710830377733579e-07, "loss": 0.41979214549064636, "step": 6541 }, { "epoch": 1.7777173913043478, "grad_norm": 1.260774130215456, "learning_rate": 6.694657491390965e-07, "loss": 0.41050902009010315, "step": 6542 }, { "epoch": 1.7779891304347826, "grad_norm": 1.286207684548033, "learning_rate": 6.678503441687156e-07, "loss": 0.3850133419036865, "step": 6543 }, { "epoch": 1.7782608695652173, "grad_norm": 1.2963937650491775, "learning_rate": 6.662368231883388e-07, "loss": 0.3569667339324951, "step": 6544 }, { "epoch": 1.7785326086956523, "grad_norm": 1.3174363076636717, "learning_rate": 6.646251865236997e-07, "loss": 0.4202989339828491, "step": 6545 }, { "epoch": 1.778804347826087, "grad_norm": 1.1421019249622968, "learning_rate": 6.63015434500166e-07, "loss": 0.4360082149505615, "step": 6546 }, { "epoch": 1.7790760869565219, "grad_norm": 1.3809961069548131, "learning_rate": 6.614075674427134e-07, "loss": 0.5464322566986084, "step": 6547 }, { "epoch": 1.7793478260869566, "grad_norm": 1.307696637158083, "learning_rate": 6.598015856759366e-07, "loss": 0.42951250076293945, "step": 6548 }, { "epoch": 1.7796195652173914, "grad_norm": 1.1985736209374247, "learning_rate": 6.581974895240628e-07, "loss": 0.42434513568878174, "step": 6549 }, { "epoch": 1.7798913043478262, "grad_norm": 1.3883392340121972, "learning_rate": 6.56595279310922e-07, "loss": 0.44555675983428955, "step": 6550 }, { "epoch": 1.780163043478261, "grad_norm": 1.2666826612111146, "learning_rate": 6.549949553599744e-07, "loss": 0.3847145140171051, "step": 6551 }, { "epoch": 1.7804347826086957, "grad_norm": 1.2838623687553596, "learning_rate": 6.533965179942958e-07, "loss": 0.3880724608898163, "step": 6552 }, { "epoch": 1.7807065217391305, "grad_norm": 1.3699526556231267, "learning_rate": 6.517999675365827e-07, "loss": 0.37727952003479004, "step": 6553 }, { "epoch": 1.7809782608695652, "grad_norm": 1.2601114157546478, "learning_rate": 6.502053043091461e-07, "loss": 0.3417876958847046, "step": 6554 }, { "epoch": 1.78125, "grad_norm": 1.1837752054276456, "learning_rate": 6.486125286339217e-07, "loss": 0.35281312465667725, "step": 6555 }, { "epoch": 1.7815217391304348, "grad_norm": 1.216947944651246, "learning_rate": 6.470216408324626e-07, "loss": 0.3880545496940613, "step": 6556 }, { "epoch": 1.7817934782608695, "grad_norm": 1.174373174156077, "learning_rate": 6.454326412259371e-07, "loss": 0.39206844568252563, "step": 6557 }, { "epoch": 1.7820652173913043, "grad_norm": 1.430327826310278, "learning_rate": 6.438455301351354e-07, "loss": 0.49032062292099, "step": 6558 }, { "epoch": 1.782336956521739, "grad_norm": 1.1529015458814735, "learning_rate": 6.422603078804679e-07, "loss": 0.37046509981155396, "step": 6559 }, { "epoch": 1.7826086956521738, "grad_norm": 1.2832807677872347, "learning_rate": 6.40676974781963e-07, "loss": 0.4451935291290283, "step": 6560 }, { "epoch": 1.7828804347826086, "grad_norm": 1.5756422165728783, "learning_rate": 6.390955311592617e-07, "loss": 0.49134543538093567, "step": 6561 }, { "epoch": 1.7831521739130434, "grad_norm": 1.2362242981196243, "learning_rate": 6.375159773316341e-07, "loss": 0.4033368229866028, "step": 6562 }, { "epoch": 1.7834239130434781, "grad_norm": 1.0890368117332307, "learning_rate": 6.359383136179598e-07, "loss": 0.3213374614715576, "step": 6563 }, { "epoch": 1.783695652173913, "grad_norm": 1.3009636049517992, "learning_rate": 6.343625403367415e-07, "loss": 0.4440377652645111, "step": 6564 }, { "epoch": 1.7839673913043477, "grad_norm": 1.3712485762549382, "learning_rate": 6.327886578060993e-07, "loss": 0.4563472270965576, "step": 6565 }, { "epoch": 1.7842391304347827, "grad_norm": 1.1621715854407189, "learning_rate": 6.312166663437692e-07, "loss": 0.36711081862449646, "step": 6566 }, { "epoch": 1.7845108695652174, "grad_norm": 1.1999260909899714, "learning_rate": 6.29646566267108e-07, "loss": 0.3543550968170166, "step": 6567 }, { "epoch": 1.7847826086956522, "grad_norm": 1.0226845491027194, "learning_rate": 6.280783578930915e-07, "loss": 0.33261656761169434, "step": 6568 }, { "epoch": 1.785054347826087, "grad_norm": 1.4122394542221257, "learning_rate": 6.265120415383141e-07, "loss": 0.4808018207550049, "step": 6569 }, { "epoch": 1.7853260869565217, "grad_norm": 1.4649086751787166, "learning_rate": 6.249476175189817e-07, "loss": 0.47322002053260803, "step": 6570 }, { "epoch": 1.7855978260869565, "grad_norm": 1.279371318739693, "learning_rate": 6.23385086150925e-07, "loss": 0.38468360900878906, "step": 6571 }, { "epoch": 1.7858695652173913, "grad_norm": 1.301810649920741, "learning_rate": 6.218244477495894e-07, "loss": 0.3536141514778137, "step": 6572 }, { "epoch": 1.7861413043478263, "grad_norm": 1.129816987210793, "learning_rate": 6.202657026300429e-07, "loss": 0.3106532692909241, "step": 6573 }, { "epoch": 1.786413043478261, "grad_norm": 1.1559685797159738, "learning_rate": 6.187088511069639e-07, "loss": 0.376739501953125, "step": 6574 }, { "epoch": 1.7866847826086958, "grad_norm": 1.458738194573312, "learning_rate": 6.171538934946519e-07, "loss": 0.5464988350868225, "step": 6575 }, { "epoch": 1.7869565217391306, "grad_norm": 1.2469325161458684, "learning_rate": 6.156008301070283e-07, "loss": 0.48896652460098267, "step": 6576 }, { "epoch": 1.7872282608695653, "grad_norm": 1.3219206758380142, "learning_rate": 6.140496612576241e-07, "loss": 0.4098358452320099, "step": 6577 }, { "epoch": 1.7875, "grad_norm": 1.3279377361378226, "learning_rate": 6.125003872595925e-07, "loss": 0.428811252117157, "step": 6578 }, { "epoch": 1.7877717391304349, "grad_norm": 1.1907091433170252, "learning_rate": 6.109530084257043e-07, "loss": 0.37121301889419556, "step": 6579 }, { "epoch": 1.7880434782608696, "grad_norm": 1.2698894338684494, "learning_rate": 6.094075250683495e-07, "loss": 0.4666909873485565, "step": 6580 }, { "epoch": 1.7883152173913044, "grad_norm": 1.2170269156119347, "learning_rate": 6.078639374995265e-07, "loss": 0.3792039453983307, "step": 6581 }, { "epoch": 1.7885869565217392, "grad_norm": 5.363783671621911, "learning_rate": 6.063222460308649e-07, "loss": 0.34180909395217896, "step": 6582 }, { "epoch": 1.788858695652174, "grad_norm": 1.4596535613679757, "learning_rate": 6.047824509735989e-07, "loss": 0.4838481545448303, "step": 6583 }, { "epoch": 1.7891304347826087, "grad_norm": 1.1023537027852315, "learning_rate": 6.032445526385844e-07, "loss": 0.3941723704338074, "step": 6584 }, { "epoch": 1.7894021739130435, "grad_norm": 1.4265522865071067, "learning_rate": 6.017085513362997e-07, "loss": 0.44520246982574463, "step": 6585 }, { "epoch": 1.7896739130434782, "grad_norm": 1.3744697869113744, "learning_rate": 6.001744473768301e-07, "loss": 0.4250006079673767, "step": 6586 }, { "epoch": 1.789945652173913, "grad_norm": 1.2874792742053611, "learning_rate": 5.986422410698855e-07, "loss": 0.41911613941192627, "step": 6587 }, { "epoch": 1.7902173913043478, "grad_norm": 1.5288084492833969, "learning_rate": 5.971119327247898e-07, "loss": 0.48947715759277344, "step": 6588 }, { "epoch": 1.7904891304347825, "grad_norm": 1.2096574207142294, "learning_rate": 5.955835226504869e-07, "loss": 0.31821104884147644, "step": 6589 }, { "epoch": 1.7907608695652173, "grad_norm": 1.3227441737965846, "learning_rate": 5.940570111555311e-07, "loss": 0.45329493284225464, "step": 6590 }, { "epoch": 1.791032608695652, "grad_norm": 1.2337313563698928, "learning_rate": 5.925323985480969e-07, "loss": 0.3954126834869385, "step": 6591 }, { "epoch": 1.7913043478260868, "grad_norm": 1.2963362853573603, "learning_rate": 5.910096851359804e-07, "loss": 0.4619738459587097, "step": 6592 }, { "epoch": 1.7915760869565216, "grad_norm": 1.3352044251013235, "learning_rate": 5.894888712265834e-07, "loss": 0.4038097858428955, "step": 6593 }, { "epoch": 1.7918478260869564, "grad_norm": 1.566527078426045, "learning_rate": 5.879699571269337e-07, "loss": 0.5002365708351135, "step": 6594 }, { "epoch": 1.7921195652173914, "grad_norm": 1.0561176807166193, "learning_rate": 5.864529431436716e-07, "loss": 0.3135859966278076, "step": 6595 }, { "epoch": 1.7923913043478261, "grad_norm": 1.3451960408301027, "learning_rate": 5.849378295830565e-07, "loss": 0.3882262408733368, "step": 6596 }, { "epoch": 1.7926630434782609, "grad_norm": 1.014227809145432, "learning_rate": 5.83424616750956e-07, "loss": 0.2858150601387024, "step": 6597 }, { "epoch": 1.7929347826086957, "grad_norm": 1.3037733723915887, "learning_rate": 5.81913304952868e-07, "loss": 0.46639692783355713, "step": 6598 }, { "epoch": 1.7932065217391304, "grad_norm": 1.3781066563582078, "learning_rate": 5.80403894493895e-07, "loss": 0.4411214590072632, "step": 6599 }, { "epoch": 1.7934782608695652, "grad_norm": 1.1297449607693564, "learning_rate": 5.788963856787566e-07, "loss": 0.34045788645744324, "step": 6600 }, { "epoch": 1.79375, "grad_norm": 1.2940787421804745, "learning_rate": 5.77390778811796e-07, "loss": 0.38306647539138794, "step": 6601 }, { "epoch": 1.794021739130435, "grad_norm": 1.4184144990353775, "learning_rate": 5.758870741969635e-07, "loss": 0.4536875784397125, "step": 6602 }, { "epoch": 1.7942934782608697, "grad_norm": 1.4416678226585273, "learning_rate": 5.74385272137834e-07, "loss": 0.5060940384864807, "step": 6603 }, { "epoch": 1.7945652173913045, "grad_norm": 1.1772652318031454, "learning_rate": 5.728853729375872e-07, "loss": 0.3972446918487549, "step": 6604 }, { "epoch": 1.7948369565217392, "grad_norm": 1.3952650981474561, "learning_rate": 5.71387376899033e-07, "loss": 0.4149983525276184, "step": 6605 }, { "epoch": 1.795108695652174, "grad_norm": 1.3254758177542714, "learning_rate": 5.69891284324583e-07, "loss": 0.4094810485839844, "step": 6606 }, { "epoch": 1.7953804347826088, "grad_norm": 1.1999393132256382, "learning_rate": 5.683970955162743e-07, "loss": 0.41500166058540344, "step": 6607 }, { "epoch": 1.7956521739130435, "grad_norm": 1.349676958494573, "learning_rate": 5.669048107757568e-07, "loss": 0.42131540179252625, "step": 6608 }, { "epoch": 1.7959239130434783, "grad_norm": 1.2019032780918169, "learning_rate": 5.65414430404293e-07, "loss": 0.4115896224975586, "step": 6609 }, { "epoch": 1.796195652173913, "grad_norm": 1.3261370845231018, "learning_rate": 5.639259547027642e-07, "loss": 0.49783605337142944, "step": 6610 }, { "epoch": 1.7964673913043478, "grad_norm": 1.2317038416022448, "learning_rate": 5.624393839716669e-07, "loss": 0.43067628145217896, "step": 6611 }, { "epoch": 1.7967391304347826, "grad_norm": 1.410108518051609, "learning_rate": 5.609547185111131e-07, "loss": 0.4119008183479309, "step": 6612 }, { "epoch": 1.7970108695652174, "grad_norm": 1.3710719358687, "learning_rate": 5.594719586208275e-07, "loss": 0.5151336193084717, "step": 6613 }, { "epoch": 1.7972826086956522, "grad_norm": 1.2405174303326467, "learning_rate": 5.579911046001529e-07, "loss": 0.4518073797225952, "step": 6614 }, { "epoch": 1.797554347826087, "grad_norm": 1.1931812597077975, "learning_rate": 5.565121567480469e-07, "loss": 0.37310177087783813, "step": 6615 }, { "epoch": 1.7978260869565217, "grad_norm": 1.1706786307001247, "learning_rate": 5.550351153630839e-07, "loss": 0.3511825501918793, "step": 6616 }, { "epoch": 1.7980978260869565, "grad_norm": 1.27733122610521, "learning_rate": 5.535599807434488e-07, "loss": 0.38178205490112305, "step": 6617 }, { "epoch": 1.7983695652173912, "grad_norm": 1.2118186218792655, "learning_rate": 5.520867531869445e-07, "loss": 0.3555438816547394, "step": 6618 }, { "epoch": 1.798641304347826, "grad_norm": 1.010896647900135, "learning_rate": 5.506154329909908e-07, "loss": 0.3033096194267273, "step": 6619 }, { "epoch": 1.7989130434782608, "grad_norm": 1.1437313333236498, "learning_rate": 5.491460204526156e-07, "loss": 0.3389871120452881, "step": 6620 }, { "epoch": 1.7991847826086955, "grad_norm": 1.2161229733901981, "learning_rate": 5.476785158684728e-07, "loss": 0.37525513768196106, "step": 6621 }, { "epoch": 1.7994565217391303, "grad_norm": 1.080358791761515, "learning_rate": 5.462129195348198e-07, "loss": 0.3140972852706909, "step": 6622 }, { "epoch": 1.799728260869565, "grad_norm": 1.4173462469970783, "learning_rate": 5.447492317475344e-07, "loss": 0.42300647497177124, "step": 6623 }, { "epoch": 1.8, "grad_norm": 1.2009275483176902, "learning_rate": 5.432874528021104e-07, "loss": 0.38299262523651123, "step": 6624 }, { "epoch": 1.8002717391304348, "grad_norm": 1.3611333935331793, "learning_rate": 5.418275829936537e-07, "loss": 0.43383339047431946, "step": 6625 }, { "epoch": 1.8005434782608696, "grad_norm": 2.6661237544381917, "learning_rate": 5.40369622616883e-07, "loss": 0.37464118003845215, "step": 6626 }, { "epoch": 1.8008152173913043, "grad_norm": 1.306121725800348, "learning_rate": 5.389135719661353e-07, "loss": 0.4019692540168762, "step": 6627 }, { "epoch": 1.8010869565217391, "grad_norm": 1.21889379856466, "learning_rate": 5.374594313353632e-07, "loss": 0.34937700629234314, "step": 6628 }, { "epoch": 1.8013586956521739, "grad_norm": 1.221579556731235, "learning_rate": 5.360072010181261e-07, "loss": 0.3733539879322052, "step": 6629 }, { "epoch": 1.8016304347826086, "grad_norm": 1.2912028580087371, "learning_rate": 5.34556881307604e-07, "loss": 0.39777323603630066, "step": 6630 }, { "epoch": 1.8019021739130436, "grad_norm": 1.2120887262409723, "learning_rate": 5.331084724965919e-07, "loss": 0.3984486162662506, "step": 6631 }, { "epoch": 1.8021739130434784, "grad_norm": 1.1576702770318497, "learning_rate": 5.316619748774976e-07, "loss": 0.33194500207901, "step": 6632 }, { "epoch": 1.8024456521739132, "grad_norm": 1.2896545222284326, "learning_rate": 5.302173887423389e-07, "loss": 0.3544909358024597, "step": 6633 }, { "epoch": 1.802717391304348, "grad_norm": 1.182067767993464, "learning_rate": 5.287747143827526e-07, "loss": 0.3985807001590729, "step": 6634 }, { "epoch": 1.8029891304347827, "grad_norm": 1.3887316812719293, "learning_rate": 5.273339520899911e-07, "loss": 0.42898088693618774, "step": 6635 }, { "epoch": 1.8032608695652175, "grad_norm": 1.2755042232781764, "learning_rate": 5.258951021549141e-07, "loss": 0.4700778126716614, "step": 6636 }, { "epoch": 1.8035326086956522, "grad_norm": 1.2486587198663646, "learning_rate": 5.244581648680003e-07, "loss": 0.3800152540206909, "step": 6637 }, { "epoch": 1.803804347826087, "grad_norm": 1.1826526903894998, "learning_rate": 5.230231405193409e-07, "loss": 0.39223575592041016, "step": 6638 }, { "epoch": 1.8040760869565218, "grad_norm": 1.390765373955359, "learning_rate": 5.215900293986431e-07, "loss": 0.43743252754211426, "step": 6639 }, { "epoch": 1.8043478260869565, "grad_norm": 1.1482222854005344, "learning_rate": 5.201588317952222e-07, "loss": 0.3451962471008301, "step": 6640 }, { "epoch": 1.8046195652173913, "grad_norm": 1.2280369866883087, "learning_rate": 5.187295479980136e-07, "loss": 0.38163235783576965, "step": 6641 }, { "epoch": 1.804891304347826, "grad_norm": 1.24105564548106, "learning_rate": 5.173021782955634e-07, "loss": 0.4110574722290039, "step": 6642 }, { "epoch": 1.8051630434782608, "grad_norm": 1.2893105249026218, "learning_rate": 5.158767229760275e-07, "loss": 0.4436696171760559, "step": 6643 }, { "epoch": 1.8054347826086956, "grad_norm": 1.2662927763872367, "learning_rate": 5.144531823271859e-07, "loss": 0.4070015549659729, "step": 6644 }, { "epoch": 1.8057065217391304, "grad_norm": 1.3634294299700214, "learning_rate": 5.130315566364185e-07, "loss": 0.4169977307319641, "step": 6645 }, { "epoch": 1.8059782608695651, "grad_norm": 1.3328816788555162, "learning_rate": 5.116118461907304e-07, "loss": 0.42256373167037964, "step": 6646 }, { "epoch": 1.80625, "grad_norm": 1.2868264009910637, "learning_rate": 5.101940512767323e-07, "loss": 0.41467568278312683, "step": 6647 }, { "epoch": 1.8065217391304347, "grad_norm": 1.2567199247811855, "learning_rate": 5.087781721806539e-07, "loss": 0.36457404494285583, "step": 6648 }, { "epoch": 1.8067934782608694, "grad_norm": 1.141406065271531, "learning_rate": 5.073642091883313e-07, "loss": 0.37025851011276245, "step": 6649 }, { "epoch": 1.8070652173913042, "grad_norm": 1.3418160525626446, "learning_rate": 5.059521625852204e-07, "loss": 0.45579320192337036, "step": 6650 }, { "epoch": 1.807336956521739, "grad_norm": 1.2910711743452947, "learning_rate": 5.045420326563888e-07, "loss": 0.3694441318511963, "step": 6651 }, { "epoch": 1.8076086956521737, "grad_norm": 1.2781576770845144, "learning_rate": 5.031338196865121e-07, "loss": 0.43217355012893677, "step": 6652 }, { "epoch": 1.8078804347826087, "grad_norm": 1.2057836319598583, "learning_rate": 5.017275239598852e-07, "loss": 0.3822716474533081, "step": 6653 }, { "epoch": 1.8081521739130435, "grad_norm": 1.3891711748759734, "learning_rate": 5.003231457604119e-07, "loss": 0.4842960238456726, "step": 6654 }, { "epoch": 1.8084239130434783, "grad_norm": 1.179325737039236, "learning_rate": 4.989206853716133e-07, "loss": 0.36762917041778564, "step": 6655 }, { "epoch": 1.808695652173913, "grad_norm": 1.4620828370352483, "learning_rate": 4.975201430766164e-07, "loss": 0.4508317708969116, "step": 6656 }, { "epoch": 1.8089673913043478, "grad_norm": 1.2269915543695669, "learning_rate": 4.961215191581692e-07, "loss": 0.4180455803871155, "step": 6657 }, { "epoch": 1.8092391304347826, "grad_norm": 1.3194427312391406, "learning_rate": 4.947248138986249e-07, "loss": 0.44119367003440857, "step": 6658 }, { "epoch": 1.8095108695652173, "grad_norm": 1.3792546432674677, "learning_rate": 4.933300275799546e-07, "loss": 0.46506914496421814, "step": 6659 }, { "epoch": 1.8097826086956523, "grad_norm": 1.2470572212565698, "learning_rate": 4.919371604837397e-07, "loss": 0.4180852770805359, "step": 6660 }, { "epoch": 1.810054347826087, "grad_norm": 1.166094794676884, "learning_rate": 4.905462128911742e-07, "loss": 0.36663007736206055, "step": 6661 }, { "epoch": 1.8103260869565219, "grad_norm": 1.2275225704096222, "learning_rate": 4.891571850830645e-07, "loss": 0.4284296929836273, "step": 6662 }, { "epoch": 1.8105978260869566, "grad_norm": 1.3857559065126621, "learning_rate": 4.877700773398309e-07, "loss": 0.42943519353866577, "step": 6663 }, { "epoch": 1.8108695652173914, "grad_norm": 1.3959622880381668, "learning_rate": 4.863848899415058e-07, "loss": 0.4365319609642029, "step": 6664 }, { "epoch": 1.8111413043478262, "grad_norm": 1.4387478896460666, "learning_rate": 4.850016231677302e-07, "loss": 0.43463897705078125, "step": 6665 }, { "epoch": 1.811413043478261, "grad_norm": 1.29966094677806, "learning_rate": 4.836202772977628e-07, "loss": 0.3948964476585388, "step": 6666 }, { "epoch": 1.8116847826086957, "grad_norm": 1.4223859307238698, "learning_rate": 4.822408526104716e-07, "loss": 0.4294155240058899, "step": 6667 }, { "epoch": 1.8119565217391305, "grad_norm": 1.1283792443533358, "learning_rate": 4.808633493843362e-07, "loss": 0.30025917291641235, "step": 6668 }, { "epoch": 1.8122282608695652, "grad_norm": 1.3917385984340758, "learning_rate": 4.794877678974496e-07, "loss": 0.45995616912841797, "step": 6669 }, { "epoch": 1.8125, "grad_norm": 1.3869990717839935, "learning_rate": 4.781141084275164e-07, "loss": 0.4417036771774292, "step": 6670 }, { "epoch": 1.8127717391304348, "grad_norm": 1.2219986123538824, "learning_rate": 4.7674237125185597e-07, "loss": 0.3943532407283783, "step": 6671 }, { "epoch": 1.8130434782608695, "grad_norm": 1.2473548559957948, "learning_rate": 4.753725566473921e-07, "loss": 0.38782060146331787, "step": 6672 }, { "epoch": 1.8133152173913043, "grad_norm": 1.3412140192491069, "learning_rate": 4.7400466489066823e-07, "loss": 0.45635461807250977, "step": 6673 }, { "epoch": 1.813586956521739, "grad_norm": 1.176101957531631, "learning_rate": 4.7263869625783663e-07, "loss": 0.3520089089870453, "step": 6674 }, { "epoch": 1.8138586956521738, "grad_norm": 1.3498229126227392, "learning_rate": 4.712746510246624e-07, "loss": 0.4086918830871582, "step": 6675 }, { "epoch": 1.8141304347826086, "grad_norm": 1.3634322287532643, "learning_rate": 4.6991252946651744e-07, "loss": 0.4089699685573578, "step": 6676 }, { "epoch": 1.8144021739130434, "grad_norm": 1.2348100265190065, "learning_rate": 4.6855233185839175e-07, "loss": 0.42868632078170776, "step": 6677 }, { "epoch": 1.8146739130434781, "grad_norm": 1.3272910930847972, "learning_rate": 4.6719405847488573e-07, "loss": 0.44874364137649536, "step": 6678 }, { "epoch": 1.814945652173913, "grad_norm": 1.2994621674560718, "learning_rate": 4.6583770959020556e-07, "loss": 0.4379235506057739, "step": 6679 }, { "epoch": 1.8152173913043477, "grad_norm": 1.2744768200110492, "learning_rate": 4.644832854781778e-07, "loss": 0.3609439730644226, "step": 6680 }, { "epoch": 1.8154891304347827, "grad_norm": 1.3301888125451407, "learning_rate": 4.6313078641223255e-07, "loss": 0.44172602891921997, "step": 6681 }, { "epoch": 1.8157608695652174, "grad_norm": 1.0868653601679645, "learning_rate": 4.6178021266541693e-07, "loss": 0.3759479820728302, "step": 6682 }, { "epoch": 1.8160326086956522, "grad_norm": 1.32327163695829, "learning_rate": 4.6043156451038516e-07, "loss": 0.3888694643974304, "step": 6683 }, { "epoch": 1.816304347826087, "grad_norm": 1.2549937712182173, "learning_rate": 4.590848422194072e-07, "loss": 0.4044484496116638, "step": 6684 }, { "epoch": 1.8165760869565217, "grad_norm": 1.353151828935046, "learning_rate": 4.5774004606435775e-07, "loss": 0.37542209029197693, "step": 6685 }, { "epoch": 1.8168478260869565, "grad_norm": 1.369621190650096, "learning_rate": 4.5639717631672966e-07, "loss": 0.36586499214172363, "step": 6686 }, { "epoch": 1.8171195652173913, "grad_norm": 1.3142854601968363, "learning_rate": 4.550562332476227e-07, "loss": 0.3937874138355255, "step": 6687 }, { "epoch": 1.8173913043478263, "grad_norm": 1.161579420189579, "learning_rate": 4.5371721712774816e-07, "loss": 0.3524395823478699, "step": 6688 }, { "epoch": 1.817663043478261, "grad_norm": 1.3865342630976303, "learning_rate": 4.5238012822742874e-07, "loss": 0.3774436116218567, "step": 6689 }, { "epoch": 1.8179347826086958, "grad_norm": 1.344422672334059, "learning_rate": 4.510449668165995e-07, "loss": 0.4590703845024109, "step": 6690 }, { "epoch": 1.8182065217391306, "grad_norm": 1.3302201132916296, "learning_rate": 4.49711733164806e-07, "loss": 0.4394705593585968, "step": 6691 }, { "epoch": 1.8184782608695653, "grad_norm": 1.2130681917382253, "learning_rate": 4.483804275411985e-07, "loss": 0.4049954414367676, "step": 6692 }, { "epoch": 1.81875, "grad_norm": 1.2975580633324344, "learning_rate": 4.4705105021454976e-07, "loss": 0.4226974844932556, "step": 6693 }, { "epoch": 1.8190217391304349, "grad_norm": 1.3596616945643745, "learning_rate": 4.45723601453234e-07, "loss": 0.38723891973495483, "step": 6694 }, { "epoch": 1.8192934782608696, "grad_norm": 1.2513591869179697, "learning_rate": 4.4439808152523466e-07, "loss": 0.38324499130249023, "step": 6695 }, { "epoch": 1.8195652173913044, "grad_norm": 0.9698110902588106, "learning_rate": 4.430744906981577e-07, "loss": 0.2550693452358246, "step": 6696 }, { "epoch": 1.8198369565217392, "grad_norm": 1.3489746724337108, "learning_rate": 4.417528292392059e-07, "loss": 0.5241485238075256, "step": 6697 }, { "epoch": 1.820108695652174, "grad_norm": 1.2190258026956584, "learning_rate": 4.404330974152027e-07, "loss": 0.4342564046382904, "step": 6698 }, { "epoch": 1.8203804347826087, "grad_norm": 1.3505540188696286, "learning_rate": 4.391152954925715e-07, "loss": 0.4774441719055176, "step": 6699 }, { "epoch": 1.8206521739130435, "grad_norm": 1.112176994761376, "learning_rate": 4.3779942373736064e-07, "loss": 0.34712737798690796, "step": 6700 }, { "epoch": 1.8209239130434782, "grad_norm": 1.3324963861922399, "learning_rate": 4.3648548241521426e-07, "loss": 0.398147314786911, "step": 6701 }, { "epoch": 1.821195652173913, "grad_norm": 1.1482725105065223, "learning_rate": 4.351734717913947e-07, "loss": 0.34752559661865234, "step": 6702 }, { "epoch": 1.8214673913043478, "grad_norm": 1.6756714847724143, "learning_rate": 4.3386339213077444e-07, "loss": 0.44476091861724854, "step": 6703 }, { "epoch": 1.8217391304347825, "grad_norm": 1.2779098804669728, "learning_rate": 4.325552436978331e-07, "loss": 0.37698501348495483, "step": 6704 }, { "epoch": 1.8220108695652173, "grad_norm": 1.3209591448305023, "learning_rate": 4.312490267566616e-07, "loss": 0.45204561948776245, "step": 6705 }, { "epoch": 1.822282608695652, "grad_norm": 1.1659061986315293, "learning_rate": 4.2994474157096124e-07, "loss": 0.32754918932914734, "step": 6706 }, { "epoch": 1.8225543478260868, "grad_norm": 1.4396152539477205, "learning_rate": 4.286423884040458e-07, "loss": 0.4342269003391266, "step": 6707 }, { "epoch": 1.8228260869565216, "grad_norm": 1.2056651210015166, "learning_rate": 4.2734196751883283e-07, "loss": 0.3697706460952759, "step": 6708 }, { "epoch": 1.8230978260869564, "grad_norm": 1.2103494458363238, "learning_rate": 4.2604347917785563e-07, "loss": 0.43013978004455566, "step": 6709 }, { "epoch": 1.8233695652173914, "grad_norm": 1.0798253688139852, "learning_rate": 4.2474692364325556e-07, "loss": 0.31132346391677856, "step": 6710 }, { "epoch": 1.8236413043478261, "grad_norm": 1.3823779384677795, "learning_rate": 4.2345230117678105e-07, "loss": 0.4637746810913086, "step": 6711 }, { "epoch": 1.8239130434782609, "grad_norm": 1.1874960813043447, "learning_rate": 4.221596120397942e-07, "loss": 0.37082475423812866, "step": 6712 }, { "epoch": 1.8241847826086957, "grad_norm": 1.2437827815781874, "learning_rate": 4.2086885649326616e-07, "loss": 0.41355448961257935, "step": 6713 }, { "epoch": 1.8244565217391304, "grad_norm": 1.1327494449806654, "learning_rate": 4.195800347977763e-07, "loss": 0.3399755358695984, "step": 6714 }, { "epoch": 1.8247282608695652, "grad_norm": 1.3503594487248407, "learning_rate": 4.1829314721351213e-07, "loss": 0.3929290473461151, "step": 6715 }, { "epoch": 1.825, "grad_norm": 1.252772169229567, "learning_rate": 4.170081940002757e-07, "loss": 0.380697637796402, "step": 6716 }, { "epoch": 1.825271739130435, "grad_norm": 1.2053088376240788, "learning_rate": 4.1572517541747294e-07, "loss": 0.350871205329895, "step": 6717 }, { "epoch": 1.8255434782608697, "grad_norm": 1.2398663600242255, "learning_rate": 4.1444409172412324e-07, "loss": 0.478667676448822, "step": 6718 }, { "epoch": 1.8258152173913045, "grad_norm": 1.5282547149455552, "learning_rate": 4.1316494317885536e-07, "loss": 0.48359644412994385, "step": 6719 }, { "epoch": 1.8260869565217392, "grad_norm": 1.2533386491013763, "learning_rate": 4.118877300399016e-07, "loss": 0.31140387058258057, "step": 6720 }, { "epoch": 1.826358695652174, "grad_norm": 1.2196782356432156, "learning_rate": 4.1061245256511227e-07, "loss": 0.37910160422325134, "step": 6721 }, { "epoch": 1.8266304347826088, "grad_norm": 1.390091731418888, "learning_rate": 4.0933911101193934e-07, "loss": 0.4182126522064209, "step": 6722 }, { "epoch": 1.8269021739130435, "grad_norm": 1.2897236365945963, "learning_rate": 4.080677056374505e-07, "loss": 0.434355229139328, "step": 6723 }, { "epoch": 1.8271739130434783, "grad_norm": 1.3621648930903958, "learning_rate": 4.0679823669831585e-07, "loss": 0.40053534507751465, "step": 6724 }, { "epoch": 1.827445652173913, "grad_norm": 1.3190492521419548, "learning_rate": 4.055307044508183e-07, "loss": 0.4195440113544464, "step": 6725 }, { "epoch": 1.8277173913043478, "grad_norm": 1.2730326631746196, "learning_rate": 4.0426510915085207e-07, "loss": 0.410092830657959, "step": 6726 }, { "epoch": 1.8279891304347826, "grad_norm": 1.1352675177792018, "learning_rate": 4.03001451053916e-07, "loss": 0.3247610330581665, "step": 6727 }, { "epoch": 1.8282608695652174, "grad_norm": 1.297463132150212, "learning_rate": 4.0173973041511827e-07, "loss": 0.41611331701278687, "step": 6728 }, { "epoch": 1.8285326086956522, "grad_norm": 1.3913417628533542, "learning_rate": 4.0047994748917854e-07, "loss": 0.4557555019855499, "step": 6729 }, { "epoch": 1.828804347826087, "grad_norm": 1.1491415777263116, "learning_rate": 3.992221025304255e-07, "loss": 0.425372838973999, "step": 6730 }, { "epoch": 1.8290760869565217, "grad_norm": 1.2353846127561272, "learning_rate": 3.979661957927905e-07, "loss": 0.38712531328201294, "step": 6731 }, { "epoch": 1.8293478260869565, "grad_norm": 1.3385616566833511, "learning_rate": 3.9671222752982185e-07, "loss": 0.491060346364975, "step": 6732 }, { "epoch": 1.8296195652173912, "grad_norm": 1.2151465539202082, "learning_rate": 3.9546019799467153e-07, "loss": 0.40902912616729736, "step": 6733 }, { "epoch": 1.829891304347826, "grad_norm": 1.2496404040538869, "learning_rate": 3.942101074401028e-07, "loss": 0.4672606289386749, "step": 6734 }, { "epoch": 1.8301630434782608, "grad_norm": 1.2198571440869368, "learning_rate": 3.9296195611848275e-07, "loss": 0.40152716636657715, "step": 6735 }, { "epoch": 1.8304347826086955, "grad_norm": 1.3155779481992596, "learning_rate": 3.9171574428179424e-07, "loss": 0.4255542755126953, "step": 6736 }, { "epoch": 1.8307065217391303, "grad_norm": 1.317464132976481, "learning_rate": 3.9047147218162273e-07, "loss": 0.40894705057144165, "step": 6737 }, { "epoch": 1.830978260869565, "grad_norm": 1.1363276016985848, "learning_rate": 3.8922914006916167e-07, "loss": 0.38469335436820984, "step": 6738 }, { "epoch": 1.83125, "grad_norm": 1.1840411106853088, "learning_rate": 3.8798874819521937e-07, "loss": 0.36095130443573, "step": 6739 }, { "epoch": 1.8315217391304348, "grad_norm": 1.209638144620073, "learning_rate": 3.867502968102055e-07, "loss": 0.3496997058391571, "step": 6740 }, { "epoch": 1.8317934782608696, "grad_norm": 1.249052742362792, "learning_rate": 3.8551378616414115e-07, "loss": 0.3519611954689026, "step": 6741 }, { "epoch": 1.8320652173913043, "grad_norm": 1.0828965050286286, "learning_rate": 3.8427921650665446e-07, "loss": 0.3145643174648285, "step": 6742 }, { "epoch": 1.8323369565217391, "grad_norm": 1.2510268076438484, "learning_rate": 3.8304658808698604e-07, "loss": 0.38488009572029114, "step": 6743 }, { "epoch": 1.8326086956521739, "grad_norm": 1.4520593139292748, "learning_rate": 3.8181590115397573e-07, "loss": 0.3779062032699585, "step": 6744 }, { "epoch": 1.8328804347826086, "grad_norm": 1.1410940424181248, "learning_rate": 3.8058715595608034e-07, "loss": 0.33701616525650024, "step": 6745 }, { "epoch": 1.8331521739130436, "grad_norm": 1.3710549162663368, "learning_rate": 3.793603527413603e-07, "loss": 0.44178271293640137, "step": 6746 }, { "epoch": 1.8334239130434784, "grad_norm": 1.3633430973578142, "learning_rate": 3.781354917574842e-07, "loss": 0.47755053639411926, "step": 6747 }, { "epoch": 1.8336956521739132, "grad_norm": 1.267406576549635, "learning_rate": 3.7691257325172757e-07, "loss": 0.4505717158317566, "step": 6748 }, { "epoch": 1.833967391304348, "grad_norm": 1.17778616500045, "learning_rate": 3.756915974709785e-07, "loss": 0.3261575698852539, "step": 6749 }, { "epoch": 1.8342391304347827, "grad_norm": 1.3590826522913302, "learning_rate": 3.744725646617275e-07, "loss": 0.4262421727180481, "step": 6750 }, { "epoch": 1.8345108695652175, "grad_norm": 1.4594653147381296, "learning_rate": 3.7325547507007455e-07, "loss": 0.434425950050354, "step": 6751 }, { "epoch": 1.8347826086956522, "grad_norm": 1.340332573426189, "learning_rate": 3.720403289417296e-07, "loss": 0.4573129415512085, "step": 6752 }, { "epoch": 1.835054347826087, "grad_norm": 1.3798519784849221, "learning_rate": 3.708271265220087e-07, "loss": 0.47059500217437744, "step": 6753 }, { "epoch": 1.8353260869565218, "grad_norm": 1.4290640445166025, "learning_rate": 3.6961586805583037e-07, "loss": 0.3994039297103882, "step": 6754 }, { "epoch": 1.8355978260869565, "grad_norm": 1.324926851839928, "learning_rate": 3.6840655378773126e-07, "loss": 0.45446139574050903, "step": 6755 }, { "epoch": 1.8358695652173913, "grad_norm": 1.5999585800361142, "learning_rate": 3.6719918396184716e-07, "loss": 0.49022212624549866, "step": 6756 }, { "epoch": 1.836141304347826, "grad_norm": 1.3612172806666858, "learning_rate": 3.6599375882192423e-07, "loss": 0.47512027621269226, "step": 6757 }, { "epoch": 1.8364130434782608, "grad_norm": 1.2406071574999076, "learning_rate": 3.6479027861131445e-07, "loss": 0.4349960684776306, "step": 6758 }, { "epoch": 1.8366847826086956, "grad_norm": 1.2828209781753415, "learning_rate": 3.6358874357298235e-07, "loss": 0.38886600732803345, "step": 6759 }, { "epoch": 1.8369565217391304, "grad_norm": 1.1776682006144474, "learning_rate": 3.623891539494906e-07, "loss": 0.36122772097587585, "step": 6760 }, { "epoch": 1.8372282608695651, "grad_norm": 1.2869129460914879, "learning_rate": 3.6119150998301767e-07, "loss": 0.39020836353302, "step": 6761 }, { "epoch": 1.8375, "grad_norm": 1.387764478376674, "learning_rate": 3.5999581191534684e-07, "loss": 0.4171834886074066, "step": 6762 }, { "epoch": 1.8377717391304347, "grad_norm": 1.2381463130887995, "learning_rate": 3.588020599878639e-07, "loss": 0.3986772298812866, "step": 6763 }, { "epoch": 1.8380434782608694, "grad_norm": 1.3265648068376985, "learning_rate": 3.576102544415683e-07, "loss": 0.4168546199798584, "step": 6764 }, { "epoch": 1.8383152173913042, "grad_norm": 1.1127730061269308, "learning_rate": 3.5642039551706307e-07, "loss": 0.35916098952293396, "step": 6765 }, { "epoch": 1.838586956521739, "grad_norm": 1.3357289014050617, "learning_rate": 3.5523248345455954e-07, "loss": 0.4450167119503021, "step": 6766 }, { "epoch": 1.8388586956521737, "grad_norm": 1.316259638815474, "learning_rate": 3.540465184938735e-07, "loss": 0.4088588356971741, "step": 6767 }, { "epoch": 1.8391304347826087, "grad_norm": 1.1270686898454203, "learning_rate": 3.528625008744313e-07, "loss": 0.2975301444530487, "step": 6768 }, { "epoch": 1.8394021739130435, "grad_norm": 1.2805035287581088, "learning_rate": 3.516804308352628e-07, "loss": 0.4052038788795471, "step": 6769 }, { "epoch": 1.8396739130434783, "grad_norm": 1.3539965094137203, "learning_rate": 3.5050030861500937e-07, "loss": 0.4510110914707184, "step": 6770 }, { "epoch": 1.839945652173913, "grad_norm": 1.1897859857609843, "learning_rate": 3.4932213445191264e-07, "loss": 0.3555293381214142, "step": 6771 }, { "epoch": 1.8402173913043478, "grad_norm": 1.161067686683059, "learning_rate": 3.481459085838268e-07, "loss": 0.32600274682044983, "step": 6772 }, { "epoch": 1.8404891304347826, "grad_norm": 1.1992158672760793, "learning_rate": 3.4697163124820853e-07, "loss": 0.4556969404220581, "step": 6773 }, { "epoch": 1.8407608695652173, "grad_norm": 1.3409213626646312, "learning_rate": 3.4579930268212267e-07, "loss": 0.4023668169975281, "step": 6774 }, { "epoch": 1.8410326086956523, "grad_norm": 1.2992565436849384, "learning_rate": 3.4462892312224437e-07, "loss": 0.46283358335494995, "step": 6775 }, { "epoch": 1.841304347826087, "grad_norm": 1.1255703341984762, "learning_rate": 3.4346049280484686e-07, "loss": 0.34309184551239014, "step": 6776 }, { "epoch": 1.8415760869565219, "grad_norm": 1.2649129191783437, "learning_rate": 3.42294011965818e-07, "loss": 0.39246052503585815, "step": 6777 }, { "epoch": 1.8418478260869566, "grad_norm": 1.283642388977984, "learning_rate": 3.4112948084064844e-07, "loss": 0.3735131025314331, "step": 6778 }, { "epoch": 1.8421195652173914, "grad_norm": 1.4092161157374086, "learning_rate": 3.3996689966443564e-07, "loss": 0.4557151794433594, "step": 6779 }, { "epoch": 1.8423913043478262, "grad_norm": 1.27462533569546, "learning_rate": 3.388062686718829e-07, "loss": 0.446053683757782, "step": 6780 }, { "epoch": 1.842663043478261, "grad_norm": 1.1570215007583364, "learning_rate": 3.3764758809729957e-07, "loss": 0.39002442359924316, "step": 6781 }, { "epoch": 1.8429347826086957, "grad_norm": 1.307710452895658, "learning_rate": 3.3649085817460515e-07, "loss": 0.336403489112854, "step": 6782 }, { "epoch": 1.8432065217391305, "grad_norm": 1.2478360873574739, "learning_rate": 3.3533607913731834e-07, "loss": 0.3798927664756775, "step": 6783 }, { "epoch": 1.8434782608695652, "grad_norm": 1.3423249504226267, "learning_rate": 3.3418325121857053e-07, "loss": 0.45060452818870544, "step": 6784 }, { "epoch": 1.84375, "grad_norm": 1.3738696906322756, "learning_rate": 3.330323746510955e-07, "loss": 0.47157174348831177, "step": 6785 }, { "epoch": 1.8440217391304348, "grad_norm": 1.3420716986865453, "learning_rate": 3.3188344966723516e-07, "loss": 0.461656391620636, "step": 6786 }, { "epoch": 1.8442934782608695, "grad_norm": 1.3375932145801652, "learning_rate": 3.3073647649893405e-07, "loss": 0.4063422977924347, "step": 6787 }, { "epoch": 1.8445652173913043, "grad_norm": 1.3721520348804357, "learning_rate": 3.2959145537774685e-07, "loss": 0.46931859850883484, "step": 6788 }, { "epoch": 1.844836956521739, "grad_norm": 1.240152533325557, "learning_rate": 3.2844838653483315e-07, "loss": 0.29418620467185974, "step": 6789 }, { "epoch": 1.8451086956521738, "grad_norm": 1.2149086018970103, "learning_rate": 3.273072702009539e-07, "loss": 0.3719596862792969, "step": 6790 }, { "epoch": 1.8453804347826086, "grad_norm": 1.333469118033598, "learning_rate": 3.261681066064859e-07, "loss": 0.43208104372024536, "step": 6791 }, { "epoch": 1.8456521739130434, "grad_norm": 1.2709803386737546, "learning_rate": 3.250308959813986e-07, "loss": 0.45694202184677124, "step": 6792 }, { "epoch": 1.8459239130434781, "grad_norm": 1.2761076801017142, "learning_rate": 3.2389563855527826e-07, "loss": 0.41989201307296753, "step": 6793 }, { "epoch": 1.846195652173913, "grad_norm": 1.1017996969726471, "learning_rate": 3.2276233455731164e-07, "loss": 0.3002946376800537, "step": 6794 }, { "epoch": 1.8464673913043477, "grad_norm": 1.4026788512716284, "learning_rate": 3.216309842162924e-07, "loss": 0.42604270577430725, "step": 6795 }, { "epoch": 1.8467391304347827, "grad_norm": 1.4119606010477204, "learning_rate": 3.2050158776061767e-07, "loss": 0.5584536790847778, "step": 6796 }, { "epoch": 1.8470108695652174, "grad_norm": 1.335887161794098, "learning_rate": 3.193741454182919e-07, "loss": 0.4278464615345001, "step": 6797 }, { "epoch": 1.8472826086956522, "grad_norm": 1.1410523742177192, "learning_rate": 3.1824865741692855e-07, "loss": 0.3505250811576843, "step": 6798 }, { "epoch": 1.847554347826087, "grad_norm": 1.18782576658931, "learning_rate": 3.1712512398373917e-07, "loss": 0.4314552843570709, "step": 6799 }, { "epoch": 1.8478260869565217, "grad_norm": 1.2563208794229308, "learning_rate": 3.160035453455468e-07, "loss": 0.40355247259140015, "step": 6800 }, { "epoch": 1.8480978260869565, "grad_norm": 1.2484776600441578, "learning_rate": 3.1488392172877577e-07, "loss": 0.4372256398200989, "step": 6801 }, { "epoch": 1.8483695652173913, "grad_norm": 0.981818839696661, "learning_rate": 3.13766253359461e-07, "loss": 0.30685049295425415, "step": 6802 }, { "epoch": 1.8486413043478263, "grad_norm": 1.1830155855436533, "learning_rate": 3.1265054046323404e-07, "loss": 0.32347404956817627, "step": 6803 }, { "epoch": 1.848913043478261, "grad_norm": 1.2976794014994493, "learning_rate": 3.115367832653415e-07, "loss": 0.40595394372940063, "step": 6804 }, { "epoch": 1.8491847826086958, "grad_norm": 1.4548786456312435, "learning_rate": 3.104249819906291e-07, "loss": 0.43272101879119873, "step": 6805 }, { "epoch": 1.8494565217391306, "grad_norm": 1.3625207754756212, "learning_rate": 3.093151368635472e-07, "loss": 0.4274366497993469, "step": 6806 }, { "epoch": 1.8497282608695653, "grad_norm": 1.2552393633725643, "learning_rate": 3.082072481081555e-07, "loss": 0.38596779108047485, "step": 6807 }, { "epoch": 1.85, "grad_norm": 1.26895479381818, "learning_rate": 3.071013159481162e-07, "loss": 0.4438563883304596, "step": 6808 }, { "epoch": 1.8502717391304349, "grad_norm": 1.3089540440813907, "learning_rate": 3.059973406066963e-07, "loss": 0.412411630153656, "step": 6809 }, { "epoch": 1.8505434782608696, "grad_norm": 0.8912933331344347, "learning_rate": 3.0489532230676744e-07, "loss": 0.27801305055618286, "step": 6810 }, { "epoch": 1.8508152173913044, "grad_norm": 1.3047598960721836, "learning_rate": 3.0379526127080837e-07, "loss": 0.42069530487060547, "step": 6811 }, { "epoch": 1.8510869565217392, "grad_norm": 1.3096030434906323, "learning_rate": 3.0269715772090036e-07, "loss": 0.35886552929878235, "step": 6812 }, { "epoch": 1.851358695652174, "grad_norm": 1.2057591811940986, "learning_rate": 3.0160101187873047e-07, "loss": 0.3786483407020569, "step": 6813 }, { "epoch": 1.8516304347826087, "grad_norm": 1.1833933598329067, "learning_rate": 3.005068239655917e-07, "loss": 0.41915401816368103, "step": 6814 }, { "epoch": 1.8519021739130435, "grad_norm": 1.286761565783398, "learning_rate": 2.994145942023785e-07, "loss": 0.38427507877349854, "step": 6815 }, { "epoch": 1.8521739130434782, "grad_norm": 1.4250540093119262, "learning_rate": 2.9832432280959223e-07, "loss": 0.4335346519947052, "step": 6816 }, { "epoch": 1.852445652173913, "grad_norm": 1.2930127949024615, "learning_rate": 2.9723601000734013e-07, "loss": 0.3777865767478943, "step": 6817 }, { "epoch": 1.8527173913043478, "grad_norm": 1.2191162333558612, "learning_rate": 2.961496560153332e-07, "loss": 0.33605486154556274, "step": 6818 }, { "epoch": 1.8529891304347825, "grad_norm": 1.2462519065039395, "learning_rate": 2.9506526105288257e-07, "loss": 0.38465574383735657, "step": 6819 }, { "epoch": 1.8532608695652173, "grad_norm": 1.4096778746623018, "learning_rate": 2.939828253389121e-07, "loss": 0.4700445532798767, "step": 6820 }, { "epoch": 1.853532608695652, "grad_norm": 1.3246700985070619, "learning_rate": 2.929023490919425e-07, "loss": 0.33486491441726685, "step": 6821 }, { "epoch": 1.8538043478260868, "grad_norm": 1.218112421297276, "learning_rate": 2.9182383253010594e-07, "loss": 0.38106396794319153, "step": 6822 }, { "epoch": 1.8540760869565216, "grad_norm": 1.2289699605144122, "learning_rate": 2.907472758711305e-07, "loss": 0.40111783146858215, "step": 6823 }, { "epoch": 1.8543478260869564, "grad_norm": 1.075082056730369, "learning_rate": 2.8967267933235567e-07, "loss": 0.3610149919986725, "step": 6824 }, { "epoch": 1.8546195652173914, "grad_norm": 1.428445755542384, "learning_rate": 2.8860004313072453e-07, "loss": 0.4613913297653198, "step": 6825 }, { "epoch": 1.8548913043478261, "grad_norm": 1.3283890351009797, "learning_rate": 2.8752936748277724e-07, "loss": 0.4117807149887085, "step": 6826 }, { "epoch": 1.8551630434782609, "grad_norm": 1.2624436071914107, "learning_rate": 2.8646065260466984e-07, "loss": 0.4065242409706116, "step": 6827 }, { "epoch": 1.8554347826086957, "grad_norm": 1.2021656212075826, "learning_rate": 2.8539389871215295e-07, "loss": 0.3611309230327606, "step": 6828 }, { "epoch": 1.8557065217391304, "grad_norm": 1.3562180069275946, "learning_rate": 2.843291060205855e-07, "loss": 0.426886647939682, "step": 6829 }, { "epoch": 1.8559782608695652, "grad_norm": 1.4056650219322762, "learning_rate": 2.8326627474492885e-07, "loss": 0.44349387288093567, "step": 6830 }, { "epoch": 1.85625, "grad_norm": 1.0862613306746407, "learning_rate": 2.8220540509975024e-07, "loss": 0.33010542392730713, "step": 6831 }, { "epoch": 1.856521739130435, "grad_norm": 1.3874691816294948, "learning_rate": 2.811464972992195e-07, "loss": 0.48312854766845703, "step": 6832 }, { "epoch": 1.8567934782608697, "grad_norm": 1.3481858949591403, "learning_rate": 2.8008955155710784e-07, "loss": 0.4299573600292206, "step": 6833 }, { "epoch": 1.8570652173913045, "grad_norm": 1.2666584576283697, "learning_rate": 2.7903456808679894e-07, "loss": 0.4066808819770813, "step": 6834 }, { "epoch": 1.8573369565217392, "grad_norm": 1.209314764720801, "learning_rate": 2.7798154710126925e-07, "loss": 0.3982323408126831, "step": 6835 }, { "epoch": 1.857608695652174, "grad_norm": 1.1547739018903922, "learning_rate": 2.769304888131075e-07, "loss": 0.3779074549674988, "step": 6836 }, { "epoch": 1.8578804347826088, "grad_norm": 1.220821325368421, "learning_rate": 2.7588139343450173e-07, "loss": 0.2918640673160553, "step": 6837 }, { "epoch": 1.8581521739130435, "grad_norm": 1.2446455486552475, "learning_rate": 2.7483426117724697e-07, "loss": 0.456865519285202, "step": 6838 }, { "epoch": 1.8584239130434783, "grad_norm": 1.402476296776907, "learning_rate": 2.7378909225273865e-07, "loss": 0.46355313062667847, "step": 6839 }, { "epoch": 1.858695652173913, "grad_norm": 1.3035825985901066, "learning_rate": 2.727458868719757e-07, "loss": 0.35516655445098877, "step": 6840 }, { "epoch": 1.8589673913043478, "grad_norm": 1.313732876833818, "learning_rate": 2.7170464524556517e-07, "loss": 0.408968985080719, "step": 6841 }, { "epoch": 1.8592391304347826, "grad_norm": 1.2681646730086154, "learning_rate": 2.706653675837123e-07, "loss": 0.4318949580192566, "step": 6842 }, { "epoch": 1.8595108695652174, "grad_norm": 1.1803095218792254, "learning_rate": 2.696280540962293e-07, "loss": 0.3691042363643646, "step": 6843 }, { "epoch": 1.8597826086956522, "grad_norm": 1.1513266300576277, "learning_rate": 2.685927049925308e-07, "loss": 0.39349591732025146, "step": 6844 }, { "epoch": 1.860054347826087, "grad_norm": 1.2751086458289536, "learning_rate": 2.675593204816351e-07, "loss": 0.3895784616470337, "step": 6845 }, { "epoch": 1.8603260869565217, "grad_norm": 1.1103864393815028, "learning_rate": 2.6652790077216106e-07, "loss": 0.3694497346878052, "step": 6846 }, { "epoch": 1.8605978260869565, "grad_norm": 1.0933476788431657, "learning_rate": 2.6549844607233753e-07, "loss": 0.33410245180130005, "step": 6847 }, { "epoch": 1.8608695652173912, "grad_norm": 1.4121111063582183, "learning_rate": 2.6447095658999054e-07, "loss": 0.42121070623397827, "step": 6848 }, { "epoch": 1.861141304347826, "grad_norm": 1.1652807971346828, "learning_rate": 2.634454325325497e-07, "loss": 0.3739777207374573, "step": 6849 }, { "epoch": 1.8614130434782608, "grad_norm": 1.3075381409896547, "learning_rate": 2.624218741070528e-07, "loss": 0.40122970938682556, "step": 6850 }, { "epoch": 1.8616847826086955, "grad_norm": 1.2702615516717872, "learning_rate": 2.614002815201344e-07, "loss": 0.37662333250045776, "step": 6851 }, { "epoch": 1.8619565217391303, "grad_norm": 1.3056765695249002, "learning_rate": 2.6038065497803744e-07, "loss": 0.44940897822380066, "step": 6852 }, { "epoch": 1.862228260869565, "grad_norm": 1.4006098127138287, "learning_rate": 2.5936299468660387e-07, "loss": 0.4622841477394104, "step": 6853 }, { "epoch": 1.8625, "grad_norm": 1.3982432015882091, "learning_rate": 2.583473008512849e-07, "loss": 0.3603283166885376, "step": 6854 }, { "epoch": 1.8627717391304348, "grad_norm": 1.4887721965938645, "learning_rate": 2.573335736771254e-07, "loss": 0.47606927156448364, "step": 6855 }, { "epoch": 1.8630434782608696, "grad_norm": 1.4573877149253556, "learning_rate": 2.563218133687806e-07, "loss": 0.48163041472435, "step": 6856 }, { "epoch": 1.8633152173913043, "grad_norm": 1.1292966025637399, "learning_rate": 2.55312020130507e-07, "loss": 0.31428977847099304, "step": 6857 }, { "epoch": 1.8635869565217391, "grad_norm": 1.286437126812392, "learning_rate": 2.5430419416616167e-07, "loss": 0.381253719329834, "step": 6858 }, { "epoch": 1.8638586956521739, "grad_norm": 1.5142559838055376, "learning_rate": 2.5329833567920736e-07, "loss": 0.46221691370010376, "step": 6859 }, { "epoch": 1.8641304347826086, "grad_norm": 1.252818377248434, "learning_rate": 2.522944448727083e-07, "loss": 0.4379313588142395, "step": 6860 }, { "epoch": 1.8644021739130436, "grad_norm": 1.3508062434452672, "learning_rate": 2.512925219493323e-07, "loss": 0.4187575578689575, "step": 6861 }, { "epoch": 1.8646739130434784, "grad_norm": 1.2195626927050365, "learning_rate": 2.5029256711134766e-07, "loss": 0.3937179744243622, "step": 6862 }, { "epoch": 1.8649456521739132, "grad_norm": 1.039495772656511, "learning_rate": 2.4929458056062725e-07, "loss": 0.35516613721847534, "step": 6863 }, { "epoch": 1.865217391304348, "grad_norm": 1.384985311342454, "learning_rate": 2.482985624986478e-07, "loss": 0.44259148836135864, "step": 6864 }, { "epoch": 1.8654891304347827, "grad_norm": 1.37499431928842, "learning_rate": 2.4730451312648617e-07, "loss": 0.4726009964942932, "step": 6865 }, { "epoch": 1.8657608695652175, "grad_norm": 1.302388038579124, "learning_rate": 2.4631243264482296e-07, "loss": 0.4397982358932495, "step": 6866 }, { "epoch": 1.8660326086956522, "grad_norm": 1.4975943535494087, "learning_rate": 2.453223212539391e-07, "loss": 0.45001012086868286, "step": 6867 }, { "epoch": 1.866304347826087, "grad_norm": 1.3339558225307324, "learning_rate": 2.443341791537235e-07, "loss": 0.37960416078567505, "step": 6868 }, { "epoch": 1.8665760869565218, "grad_norm": 1.599137264506897, "learning_rate": 2.4334800654366e-07, "loss": 0.5016778707504272, "step": 6869 }, { "epoch": 1.8668478260869565, "grad_norm": 1.2148506405772164, "learning_rate": 2.4236380362284263e-07, "loss": 0.42615318298339844, "step": 6870 }, { "epoch": 1.8671195652173913, "grad_norm": 1.2821112942442268, "learning_rate": 2.4138157058996135e-07, "loss": 0.4192488193511963, "step": 6871 }, { "epoch": 1.867391304347826, "grad_norm": 1.233706442955639, "learning_rate": 2.404013076433109e-07, "loss": 0.3852154016494751, "step": 6872 }, { "epoch": 1.8676630434782608, "grad_norm": 1.0703309476349412, "learning_rate": 2.394230149807908e-07, "loss": 0.3447345495223999, "step": 6873 }, { "epoch": 1.8679347826086956, "grad_norm": 1.3139950427319615, "learning_rate": 2.384466927998974e-07, "loss": 0.361921489238739, "step": 6874 }, { "epoch": 1.8682065217391304, "grad_norm": 1.191915747621453, "learning_rate": 2.3747234129773422e-07, "loss": 0.41319432854652405, "step": 6875 }, { "epoch": 1.8684782608695651, "grad_norm": 1.3264431116959174, "learning_rate": 2.36499960671005e-07, "loss": 0.4127950072288513, "step": 6876 }, { "epoch": 1.86875, "grad_norm": 1.176339145864402, "learning_rate": 2.3552955111601493e-07, "loss": 0.3177364468574524, "step": 6877 }, { "epoch": 1.8690217391304347, "grad_norm": 1.2924719091005503, "learning_rate": 2.3456111282867178e-07, "loss": 0.42869266867637634, "step": 6878 }, { "epoch": 1.8692934782608694, "grad_norm": 1.1732668061578524, "learning_rate": 2.335946460044858e-07, "loss": 0.299668550491333, "step": 6879 }, { "epoch": 1.8695652173913042, "grad_norm": 1.2201067914983452, "learning_rate": 2.3263015083856754e-07, "loss": 0.35125732421875, "step": 6880 }, { "epoch": 1.869836956521739, "grad_norm": 1.4196042783635574, "learning_rate": 2.3166762752563466e-07, "loss": 0.41164976358413696, "step": 6881 }, { "epoch": 1.8701086956521737, "grad_norm": 1.1742099908453851, "learning_rate": 2.3070707625999945e-07, "loss": 0.3693438172340393, "step": 6882 }, { "epoch": 1.8703804347826087, "grad_norm": 1.275879581032204, "learning_rate": 2.2974849723558012e-07, "loss": 0.46182557940483093, "step": 6883 }, { "epoch": 1.8706521739130435, "grad_norm": 1.2720246917262368, "learning_rate": 2.2879189064589746e-07, "loss": 0.35382741689682007, "step": 6884 }, { "epoch": 1.8709239130434783, "grad_norm": 1.3675870590098471, "learning_rate": 2.278372566840714e-07, "loss": 0.4399723708629608, "step": 6885 }, { "epoch": 1.871195652173913, "grad_norm": 1.2514657205582107, "learning_rate": 2.2688459554282673e-07, "loss": 0.3596053719520569, "step": 6886 }, { "epoch": 1.8714673913043478, "grad_norm": 1.306649893349069, "learning_rate": 2.2593390741448617e-07, "loss": 0.4093482494354248, "step": 6887 }, { "epoch": 1.8717391304347826, "grad_norm": 1.1552126996994527, "learning_rate": 2.249851924909785e-07, "loss": 0.35847097635269165, "step": 6888 }, { "epoch": 1.8720108695652173, "grad_norm": 1.1009346316783446, "learning_rate": 2.2403845096382937e-07, "loss": 0.31690192222595215, "step": 6889 }, { "epoch": 1.8722826086956523, "grad_norm": 1.2125643192318125, "learning_rate": 2.2309368302417145e-07, "loss": 0.4288114309310913, "step": 6890 }, { "epoch": 1.872554347826087, "grad_norm": 1.2791990495724763, "learning_rate": 2.221508888627344e-07, "loss": 0.37312230467796326, "step": 6891 }, { "epoch": 1.8728260869565219, "grad_norm": 1.3548929954687803, "learning_rate": 2.212100686698504e-07, "loss": 0.4360661804676056, "step": 6892 }, { "epoch": 1.8730978260869566, "grad_norm": 1.182856338579764, "learning_rate": 2.202712226354564e-07, "loss": 0.39682841300964355, "step": 6893 }, { "epoch": 1.8733695652173914, "grad_norm": 1.4276601030565925, "learning_rate": 2.1933435094908416e-07, "loss": 0.48532360792160034, "step": 6894 }, { "epoch": 1.8736413043478262, "grad_norm": 1.118367003344851, "learning_rate": 2.183994537998746e-07, "loss": 0.3173511028289795, "step": 6895 }, { "epoch": 1.873913043478261, "grad_norm": 1.1160182713523192, "learning_rate": 2.1746653137656447e-07, "loss": 0.3914230465888977, "step": 6896 }, { "epoch": 1.8741847826086957, "grad_norm": 1.274417938813623, "learning_rate": 2.165355838674943e-07, "loss": 0.36641642451286316, "step": 6897 }, { "epoch": 1.8744565217391305, "grad_norm": 1.3947306014772631, "learning_rate": 2.156066114606048e-07, "loss": 0.5092507600784302, "step": 6898 }, { "epoch": 1.8747282608695652, "grad_norm": 1.2331653562012719, "learning_rate": 2.1467961434343932e-07, "loss": 0.39139294624328613, "step": 6899 }, { "epoch": 1.875, "grad_norm": 1.2004732118784893, "learning_rate": 2.1375459270314148e-07, "loss": 0.3967882990837097, "step": 6900 }, { "epoch": 1.8752717391304348, "grad_norm": 1.2440083704384115, "learning_rate": 2.1283154672645522e-07, "loss": 0.40654245018959045, "step": 6901 }, { "epoch": 1.8755434782608695, "grad_norm": 1.4252724727629427, "learning_rate": 2.1191047659972818e-07, "loss": 0.5162900686264038, "step": 6902 }, { "epoch": 1.8758152173913043, "grad_norm": 1.207931390238852, "learning_rate": 2.1099138250890493e-07, "loss": 0.3490472435951233, "step": 6903 }, { "epoch": 1.876086956521739, "grad_norm": 1.2433506566048433, "learning_rate": 2.1007426463953707e-07, "loss": 0.3741413354873657, "step": 6904 }, { "epoch": 1.8763586956521738, "grad_norm": 1.3416036910432212, "learning_rate": 2.091591231767709e-07, "loss": 0.4590725302696228, "step": 6905 }, { "epoch": 1.8766304347826086, "grad_norm": 1.2817408931270178, "learning_rate": 2.0824595830535978e-07, "loss": 0.3994067907333374, "step": 6906 }, { "epoch": 1.8769021739130434, "grad_norm": 1.3471480141958896, "learning_rate": 2.0733477020965287e-07, "loss": 0.4508916139602661, "step": 6907 }, { "epoch": 1.8771739130434781, "grad_norm": 1.2848379256459386, "learning_rate": 2.0642555907360085e-07, "loss": 0.46179819107055664, "step": 6908 }, { "epoch": 1.877445652173913, "grad_norm": 1.2071678941597306, "learning_rate": 2.0551832508076019e-07, "loss": 0.4031229019165039, "step": 6909 }, { "epoch": 1.8777173913043477, "grad_norm": 1.1615496588801635, "learning_rate": 2.046130684142822e-07, "loss": 0.35220855474472046, "step": 6910 }, { "epoch": 1.8779891304347827, "grad_norm": 1.0285739621443108, "learning_rate": 2.0370978925692398e-07, "loss": 0.3142333924770355, "step": 6911 }, { "epoch": 1.8782608695652174, "grad_norm": 1.2495803014444935, "learning_rate": 2.0280848779103856e-07, "loss": 0.3547998368740082, "step": 6912 }, { "epoch": 1.8785326086956522, "grad_norm": 1.3168297058758236, "learning_rate": 2.0190916419858486e-07, "loss": 0.46056270599365234, "step": 6913 }, { "epoch": 1.878804347826087, "grad_norm": 1.2538484408776374, "learning_rate": 2.0101181866111652e-07, "loss": 0.3672834038734436, "step": 6914 }, { "epoch": 1.8790760869565217, "grad_norm": 1.1916858615675359, "learning_rate": 2.001164513597942e-07, "loss": 0.4409629702568054, "step": 6915 }, { "epoch": 1.8793478260869565, "grad_norm": 1.1120310584792257, "learning_rate": 1.9922306247537437e-07, "loss": 0.30353260040283203, "step": 6916 }, { "epoch": 1.8796195652173913, "grad_norm": 1.2157039536069434, "learning_rate": 1.9833165218821727e-07, "loss": 0.36073464155197144, "step": 6917 }, { "epoch": 1.8798913043478263, "grad_norm": 1.4570263334228746, "learning_rate": 1.9744222067828e-07, "loss": 0.44472575187683105, "step": 6918 }, { "epoch": 1.880163043478261, "grad_norm": 1.3543688798285451, "learning_rate": 1.9655476812512454e-07, "loss": 0.4919557571411133, "step": 6919 }, { "epoch": 1.8804347826086958, "grad_norm": 1.3115155244705814, "learning_rate": 1.9566929470791195e-07, "loss": 0.45605480670928955, "step": 6920 }, { "epoch": 1.8807065217391306, "grad_norm": 1.3799860702514002, "learning_rate": 1.947858006053993e-07, "loss": 0.46621018648147583, "step": 6921 }, { "epoch": 1.8809782608695653, "grad_norm": 1.3619880748121749, "learning_rate": 1.939042859959528e-07, "loss": 0.37508997321128845, "step": 6922 }, { "epoch": 1.88125, "grad_norm": 1.1306952498210734, "learning_rate": 1.9302475105753005e-07, "loss": 0.3575604259967804, "step": 6923 }, { "epoch": 1.8815217391304349, "grad_norm": 1.3976280613339214, "learning_rate": 1.921471959676957e-07, "loss": 0.43721479177474976, "step": 6924 }, { "epoch": 1.8817934782608696, "grad_norm": 1.4261139048276978, "learning_rate": 1.912716209036114e-07, "loss": 0.463297963142395, "step": 6925 }, { "epoch": 1.8820652173913044, "grad_norm": 1.1635706974705047, "learning_rate": 1.9039802604203795e-07, "loss": 0.3947291374206543, "step": 6926 }, { "epoch": 1.8823369565217392, "grad_norm": 1.097389902781821, "learning_rate": 1.8952641155934094e-07, "loss": 0.330735981464386, "step": 6927 }, { "epoch": 1.882608695652174, "grad_norm": 1.2751453830089359, "learning_rate": 1.886567776314796e-07, "loss": 0.4009043276309967, "step": 6928 }, { "epoch": 1.8828804347826087, "grad_norm": 1.261667501589732, "learning_rate": 1.8778912443402242e-07, "loss": 0.4128877818584442, "step": 6929 }, { "epoch": 1.8831521739130435, "grad_norm": 1.4170989812613914, "learning_rate": 1.8692345214212814e-07, "loss": 0.4164562225341797, "step": 6930 }, { "epoch": 1.8834239130434782, "grad_norm": 0.9764657337865393, "learning_rate": 1.8605976093056145e-07, "loss": 0.30120325088500977, "step": 6931 }, { "epoch": 1.883695652173913, "grad_norm": 1.224922153251798, "learning_rate": 1.8519805097368614e-07, "loss": 0.37241554260253906, "step": 6932 }, { "epoch": 1.8839673913043478, "grad_norm": 1.0750508172323217, "learning_rate": 1.8433832244546646e-07, "loss": 0.307623028755188, "step": 6933 }, { "epoch": 1.8842391304347825, "grad_norm": 1.4371174690119484, "learning_rate": 1.8348057551946352e-07, "loss": 0.5001438856124878, "step": 6934 }, { "epoch": 1.8845108695652173, "grad_norm": 1.2151672664870405, "learning_rate": 1.8262481036884328e-07, "loss": 0.3688940405845642, "step": 6935 }, { "epoch": 1.884782608695652, "grad_norm": 1.2453296871228199, "learning_rate": 1.8177102716636753e-07, "loss": 0.33514586091041565, "step": 6936 }, { "epoch": 1.8850543478260868, "grad_norm": 1.2044809468219029, "learning_rate": 1.8091922608439948e-07, "loss": 0.45976752042770386, "step": 6937 }, { "epoch": 1.8853260869565216, "grad_norm": 1.2521870092746725, "learning_rate": 1.8006940729490274e-07, "loss": 0.35358792543411255, "step": 6938 }, { "epoch": 1.8855978260869564, "grad_norm": 1.3100817690669144, "learning_rate": 1.7922157096943894e-07, "loss": 0.4519765377044678, "step": 6939 }, { "epoch": 1.8858695652173914, "grad_norm": 1.1550962796855013, "learning_rate": 1.7837571727917337e-07, "loss": 0.3689262270927429, "step": 6940 }, { "epoch": 1.8861413043478261, "grad_norm": 1.4054538570402846, "learning_rate": 1.7753184639486498e-07, "loss": 0.42041513323783875, "step": 6941 }, { "epoch": 1.8864130434782609, "grad_norm": 1.0813537126383201, "learning_rate": 1.7668995848687865e-07, "loss": 0.3441030979156494, "step": 6942 }, { "epoch": 1.8866847826086957, "grad_norm": 1.0851343866824967, "learning_rate": 1.7585005372517504e-07, "loss": 0.3831478953361511, "step": 6943 }, { "epoch": 1.8869565217391304, "grad_norm": 1.1961711672163275, "learning_rate": 1.7501213227931413e-07, "loss": 0.3635370135307312, "step": 6944 }, { "epoch": 1.8872282608695652, "grad_norm": 1.2917390907632145, "learning_rate": 1.7417619431845945e-07, "loss": 0.43207719922065735, "step": 6945 }, { "epoch": 1.8875, "grad_norm": 1.4220197596402882, "learning_rate": 1.7334224001137045e-07, "loss": 0.43825724720954895, "step": 6946 }, { "epoch": 1.887771739130435, "grad_norm": 1.4794332620625323, "learning_rate": 1.7251026952640583e-07, "loss": 0.49915850162506104, "step": 6947 }, { "epoch": 1.8880434782608697, "grad_norm": 1.303142620037788, "learning_rate": 1.7168028303152784e-07, "loss": 0.4191054105758667, "step": 6948 }, { "epoch": 1.8883152173913045, "grad_norm": 1.1368510001176761, "learning_rate": 1.708522806942936e-07, "loss": 0.3754761219024658, "step": 6949 }, { "epoch": 1.8885869565217392, "grad_norm": 1.3794935049323505, "learning_rate": 1.7002626268186273e-07, "loss": 0.4657178521156311, "step": 6950 }, { "epoch": 1.888858695652174, "grad_norm": 1.061922312230908, "learning_rate": 1.6920222916099184e-07, "loss": 0.3411339223384857, "step": 6951 }, { "epoch": 1.8891304347826088, "grad_norm": 1.3869361388228802, "learning_rate": 1.6838018029804005e-07, "loss": 0.4226166009902954, "step": 6952 }, { "epoch": 1.8894021739130435, "grad_norm": 1.2574687408066882, "learning_rate": 1.675601162589613e-07, "loss": 0.42625877261161804, "step": 6953 }, { "epoch": 1.8896739130434783, "grad_norm": 1.1899244742513733, "learning_rate": 1.6674203720931314e-07, "loss": 0.3725396990776062, "step": 6954 }, { "epoch": 1.889945652173913, "grad_norm": 1.3196402766802144, "learning_rate": 1.6592594331425127e-07, "loss": 0.4042428433895111, "step": 6955 }, { "epoch": 1.8902173913043478, "grad_norm": 1.4693917807871806, "learning_rate": 1.6511183473853055e-07, "loss": 0.493732750415802, "step": 6956 }, { "epoch": 1.8904891304347826, "grad_norm": 1.2010146874731107, "learning_rate": 1.642997116465017e-07, "loss": 0.3993363380432129, "step": 6957 }, { "epoch": 1.8907608695652174, "grad_norm": 1.1929796562995698, "learning_rate": 1.634895742021203e-07, "loss": 0.3823467493057251, "step": 6958 }, { "epoch": 1.8910326086956522, "grad_norm": 1.5643461611285363, "learning_rate": 1.626814225689377e-07, "loss": 0.4592176079750061, "step": 6959 }, { "epoch": 1.891304347826087, "grad_norm": 1.200832880946765, "learning_rate": 1.618752569101034e-07, "loss": 0.33617717027664185, "step": 6960 }, { "epoch": 1.8915760869565217, "grad_norm": 1.0613343574926304, "learning_rate": 1.6107107738836835e-07, "loss": 0.3890015482902527, "step": 6961 }, { "epoch": 1.8918478260869565, "grad_norm": 1.202448535929291, "learning_rate": 1.6026888416608267e-07, "loss": 0.3963872790336609, "step": 6962 }, { "epoch": 1.8921195652173912, "grad_norm": 1.1521525407197415, "learning_rate": 1.5946867740519346e-07, "loss": 0.4048403203487396, "step": 6963 }, { "epoch": 1.892391304347826, "grad_norm": 1.23959639847652, "learning_rate": 1.5867045726724707e-07, "loss": 0.3881048560142517, "step": 6964 }, { "epoch": 1.8926630434782608, "grad_norm": 1.3016066247071, "learning_rate": 1.5787422391339125e-07, "loss": 0.381252259016037, "step": 6965 }, { "epoch": 1.8929347826086955, "grad_norm": 1.189806073497672, "learning_rate": 1.5707997750436966e-07, "loss": 0.4053196310997009, "step": 6966 }, { "epoch": 1.8932065217391303, "grad_norm": 1.209392956190307, "learning_rate": 1.5628771820052736e-07, "loss": 0.33589744567871094, "step": 6967 }, { "epoch": 1.893478260869565, "grad_norm": 1.2867976982735883, "learning_rate": 1.554974461618053e-07, "loss": 0.3983483910560608, "step": 6968 }, { "epoch": 1.89375, "grad_norm": 1.3332962918424838, "learning_rate": 1.5470916154774583e-07, "loss": 0.46190136671066284, "step": 6969 }, { "epoch": 1.8940217391304348, "grad_norm": 1.2697252943343489, "learning_rate": 1.539228645174895e-07, "loss": 0.37349367141723633, "step": 6970 }, { "epoch": 1.8942934782608696, "grad_norm": 1.0576665392928404, "learning_rate": 1.5313855522977485e-07, "loss": 0.3362858295440674, "step": 6971 }, { "epoch": 1.8945652173913043, "grad_norm": 1.1107608747670539, "learning_rate": 1.5235623384293962e-07, "loss": 0.34994930028915405, "step": 6972 }, { "epoch": 1.8948369565217391, "grad_norm": 1.6569882404833813, "learning_rate": 1.5157590051492087e-07, "loss": 0.44260603189468384, "step": 6973 }, { "epoch": 1.8951086956521739, "grad_norm": 1.215103340255325, "learning_rate": 1.5079755540325147e-07, "loss": 0.3391036093235016, "step": 6974 }, { "epoch": 1.8953804347826086, "grad_norm": 1.0224168817308783, "learning_rate": 1.500211986650668e-07, "loss": 0.33452582359313965, "step": 6975 }, { "epoch": 1.8956521739130436, "grad_norm": 1.201442639369742, "learning_rate": 1.4924683045709932e-07, "loss": 0.3497847020626068, "step": 6976 }, { "epoch": 1.8959239130434784, "grad_norm": 1.082962364211051, "learning_rate": 1.4847445093567836e-07, "loss": 0.3649051785469055, "step": 6977 }, { "epoch": 1.8961956521739132, "grad_norm": 1.3434407545865075, "learning_rate": 1.4770406025673478e-07, "loss": 0.38781484961509705, "step": 6978 }, { "epoch": 1.896467391304348, "grad_norm": 1.2369030378159287, "learning_rate": 1.4693565857579528e-07, "loss": 0.38829559087753296, "step": 6979 }, { "epoch": 1.8967391304347827, "grad_norm": 1.41123594647473, "learning_rate": 1.4616924604798466e-07, "loss": 0.4296260178089142, "step": 6980 }, { "epoch": 1.8970108695652175, "grad_norm": 1.3053245121972432, "learning_rate": 1.4540482282803136e-07, "loss": 0.40005260705947876, "step": 6981 }, { "epoch": 1.8972826086956522, "grad_norm": 1.2857683639415454, "learning_rate": 1.446423890702553e-07, "loss": 0.4067094624042511, "step": 6982 }, { "epoch": 1.897554347826087, "grad_norm": 1.2130953021552702, "learning_rate": 1.4388194492857887e-07, "loss": 0.40582385659217834, "step": 6983 }, { "epoch": 1.8978260869565218, "grad_norm": 1.3442762377936164, "learning_rate": 1.4312349055652374e-07, "loss": 0.4605703353881836, "step": 6984 }, { "epoch": 1.8980978260869565, "grad_norm": 1.382102589943385, "learning_rate": 1.423670261072041e-07, "loss": 0.47718513011932373, "step": 6985 }, { "epoch": 1.8983695652173913, "grad_norm": 1.162350456168864, "learning_rate": 1.4161255173333778e-07, "loss": 0.41551780700683594, "step": 6986 }, { "epoch": 1.898641304347826, "grad_norm": 1.3107526424586902, "learning_rate": 1.408600675872407e-07, "loss": 0.443676620721817, "step": 6987 }, { "epoch": 1.8989130434782608, "grad_norm": 1.2334524057604963, "learning_rate": 1.4010957382082468e-07, "loss": 0.4058090150356293, "step": 6988 }, { "epoch": 1.8991847826086956, "grad_norm": 1.4502250199631177, "learning_rate": 1.3936107058560077e-07, "loss": 0.42570850253105164, "step": 6989 }, { "epoch": 1.8994565217391304, "grad_norm": 1.1135697308044998, "learning_rate": 1.3861455803267698e-07, "loss": 0.3470723032951355, "step": 6990 }, { "epoch": 1.8997282608695651, "grad_norm": 1.430860815146749, "learning_rate": 1.3787003631276162e-07, "loss": 0.45416563749313354, "step": 6991 }, { "epoch": 1.9, "grad_norm": 1.2975067430440645, "learning_rate": 1.3712750557616117e-07, "loss": 0.4333706498146057, "step": 6992 }, { "epoch": 1.9002717391304347, "grad_norm": 1.1247747229098577, "learning_rate": 1.3638696597277678e-07, "loss": 0.35577836632728577, "step": 6993 }, { "epoch": 1.9005434782608694, "grad_norm": 1.158025547143486, "learning_rate": 1.3564841765211002e-07, "loss": 0.36715811491012573, "step": 6994 }, { "epoch": 1.9008152173913042, "grad_norm": 1.163078779117117, "learning_rate": 1.349118607632627e-07, "loss": 0.35969415307044983, "step": 6995 }, { "epoch": 1.901086956521739, "grad_norm": 1.3075124315208784, "learning_rate": 1.341772954549281e-07, "loss": 0.47826817631721497, "step": 6996 }, { "epoch": 1.9013586956521737, "grad_norm": 1.4045937136891664, "learning_rate": 1.3344472187540536e-07, "loss": 0.42403507232666016, "step": 6997 }, { "epoch": 1.9016304347826087, "grad_norm": 1.428466117053618, "learning_rate": 1.327141401725851e-07, "loss": 0.5156093835830688, "step": 6998 }, { "epoch": 1.9019021739130435, "grad_norm": 1.2887914794444661, "learning_rate": 1.3198555049396046e-07, "loss": 0.37101632356643677, "step": 6999 }, { "epoch": 1.9021739130434783, "grad_norm": 1.2190060889373435, "learning_rate": 1.3125895298661705e-07, "loss": 0.36852508783340454, "step": 7000 }, { "epoch": 1.902445652173913, "grad_norm": 1.2755871660431148, "learning_rate": 1.305343477972465e-07, "loss": 0.3783190846443176, "step": 7001 }, { "epoch": 1.9027173913043478, "grad_norm": 1.319094872000214, "learning_rate": 1.2981173507212953e-07, "loss": 0.471399188041687, "step": 7002 }, { "epoch": 1.9029891304347826, "grad_norm": 1.4171364673726554, "learning_rate": 1.2909111495714833e-07, "loss": 0.47275450825691223, "step": 7003 }, { "epoch": 1.9032608695652173, "grad_norm": 1.3530139407404362, "learning_rate": 1.2837248759778543e-07, "loss": 0.43303871154785156, "step": 7004 }, { "epoch": 1.9035326086956523, "grad_norm": 1.0791188972848582, "learning_rate": 1.27655853139117e-07, "loss": 0.365295946598053, "step": 7005 }, { "epoch": 1.903804347826087, "grad_norm": 1.0941694281697325, "learning_rate": 1.2694121172581843e-07, "loss": 0.3107282519340515, "step": 7006 }, { "epoch": 1.9040760869565219, "grad_norm": 1.4493087528990425, "learning_rate": 1.2622856350216207e-07, "loss": 0.4907578229904175, "step": 7007 }, { "epoch": 1.9043478260869566, "grad_norm": 1.3607620477853968, "learning_rate": 1.2551790861202062e-07, "loss": 0.444705605506897, "step": 7008 }, { "epoch": 1.9046195652173914, "grad_norm": 1.306241865127606, "learning_rate": 1.2480924719885934e-07, "loss": 0.46646255254745483, "step": 7009 }, { "epoch": 1.9048913043478262, "grad_norm": 1.1962442872336305, "learning_rate": 1.2410257940574712e-07, "loss": 0.37328317761421204, "step": 7010 }, { "epoch": 1.905163043478261, "grad_norm": 1.3046110655696561, "learning_rate": 1.2339790537534534e-07, "loss": 0.3384997248649597, "step": 7011 }, { "epoch": 1.9054347826086957, "grad_norm": 1.2681228757242697, "learning_rate": 1.2269522524991474e-07, "loss": 0.4121881127357483, "step": 7012 }, { "epoch": 1.9057065217391305, "grad_norm": 1.3299603093207597, "learning_rate": 1.21994539171314e-07, "loss": 0.41904178261756897, "step": 7013 }, { "epoch": 1.9059782608695652, "grad_norm": 1.4692636956522618, "learning_rate": 1.2129584728099887e-07, "loss": 0.500756561756134, "step": 7014 }, { "epoch": 1.90625, "grad_norm": 1.429227321555678, "learning_rate": 1.2059914972002207e-07, "loss": 0.4467180669307709, "step": 7015 }, { "epoch": 1.9065217391304348, "grad_norm": 1.290378226488471, "learning_rate": 1.1990444662903445e-07, "loss": 0.3421543836593628, "step": 7016 }, { "epoch": 1.9067934782608695, "grad_norm": 1.10669952976881, "learning_rate": 1.1921173814828378e-07, "loss": 0.33042412996292114, "step": 7017 }, { "epoch": 1.9070652173913043, "grad_norm": 1.1788627526972737, "learning_rate": 1.1852102441761493e-07, "loss": 0.3429604172706604, "step": 7018 }, { "epoch": 1.907336956521739, "grad_norm": 1.365799239373928, "learning_rate": 1.1783230557647075e-07, "loss": 0.4363654851913452, "step": 7019 }, { "epoch": 1.9076086956521738, "grad_norm": 1.3152909925159946, "learning_rate": 1.1714558176389224e-07, "loss": 0.4351534843444824, "step": 7020 }, { "epoch": 1.9078804347826086, "grad_norm": 1.1951362279971989, "learning_rate": 1.1646085311851297e-07, "loss": 0.3681353032588959, "step": 7021 }, { "epoch": 1.9081521739130434, "grad_norm": 1.453306167697414, "learning_rate": 1.1577811977857012e-07, "loss": 0.4422870874404907, "step": 7022 }, { "epoch": 1.9084239130434781, "grad_norm": 1.082806013151563, "learning_rate": 1.1509738188189234e-07, "loss": 0.293170690536499, "step": 7023 }, { "epoch": 1.908695652173913, "grad_norm": 1.3660349865056454, "learning_rate": 1.1441863956591192e-07, "loss": 0.4308714270591736, "step": 7024 }, { "epoch": 1.9089673913043477, "grad_norm": 1.1081688570656634, "learning_rate": 1.1374189296765037e-07, "loss": 0.38761264085769653, "step": 7025 }, { "epoch": 1.9092391304347827, "grad_norm": 1.0493077250577085, "learning_rate": 1.1306714222373282e-07, "loss": 0.2938616871833801, "step": 7026 }, { "epoch": 1.9095108695652174, "grad_norm": 1.331787364135173, "learning_rate": 1.1239438747038034e-07, "loss": 0.3893313407897949, "step": 7027 }, { "epoch": 1.9097826086956522, "grad_norm": 1.41310110421059, "learning_rate": 1.1172362884340648e-07, "loss": 0.44305524230003357, "step": 7028 }, { "epoch": 1.910054347826087, "grad_norm": 1.3309107726917169, "learning_rate": 1.1105486647822628e-07, "loss": 0.4423445463180542, "step": 7029 }, { "epoch": 1.9103260869565217, "grad_norm": 1.317385756131507, "learning_rate": 1.1038810050985171e-07, "loss": 0.39267462491989136, "step": 7030 }, { "epoch": 1.9105978260869565, "grad_norm": 1.3022316995067478, "learning_rate": 1.0972333107289068e-07, "loss": 0.39655226469039917, "step": 7031 }, { "epoch": 1.9108695652173913, "grad_norm": 1.2886321500272324, "learning_rate": 1.0906055830154583e-07, "loss": 0.40609192848205566, "step": 7032 }, { "epoch": 1.9111413043478263, "grad_norm": 0.9314641308907676, "learning_rate": 1.0839978232962122e-07, "loss": 0.2322104573249817, "step": 7033 }, { "epoch": 1.911413043478261, "grad_norm": 1.1343201346045562, "learning_rate": 1.0774100329051352e-07, "loss": 0.3160235285758972, "step": 7034 }, { "epoch": 1.9116847826086958, "grad_norm": 1.288916436441, "learning_rate": 1.0708422131721962e-07, "loss": 0.3530310392379761, "step": 7035 }, { "epoch": 1.9119565217391306, "grad_norm": 1.4696566765642751, "learning_rate": 1.064294365423313e-07, "loss": 0.4440479278564453, "step": 7036 }, { "epoch": 1.9122282608695653, "grad_norm": 1.2675138281820766, "learning_rate": 1.057766490980372e-07, "loss": 0.4300011098384857, "step": 7037 }, { "epoch": 1.9125, "grad_norm": 1.2447021844610324, "learning_rate": 1.0512585911612416e-07, "loss": 0.3694014549255371, "step": 7038 }, { "epoch": 1.9127717391304349, "grad_norm": 1.3986837316034142, "learning_rate": 1.0447706672797264e-07, "loss": 0.4528197646141052, "step": 7039 }, { "epoch": 1.9130434782608696, "grad_norm": 1.2693284281821893, "learning_rate": 1.0383027206456342e-07, "loss": 0.44008052349090576, "step": 7040 }, { "epoch": 1.9133152173913044, "grad_norm": 1.4793402493578598, "learning_rate": 1.0318547525647316e-07, "loss": 0.45562127232551575, "step": 7041 }, { "epoch": 1.9135869565217392, "grad_norm": 1.2569542166576007, "learning_rate": 1.0254267643387327e-07, "loss": 0.3829813003540039, "step": 7042 }, { "epoch": 1.913858695652174, "grad_norm": 1.4421650048400958, "learning_rate": 1.0190187572653332e-07, "loss": 0.49628543853759766, "step": 7043 }, { "epoch": 1.9141304347826087, "grad_norm": 1.3830024571747144, "learning_rate": 1.012630732638209e-07, "loss": 0.4786844849586487, "step": 7044 }, { "epoch": 1.9144021739130435, "grad_norm": 1.1123767731174576, "learning_rate": 1.006262691746962e-07, "loss": 0.29834553599357605, "step": 7045 }, { "epoch": 1.9146739130434782, "grad_norm": 1.2035751134876507, "learning_rate": 9.999146358771861e-08, "loss": 0.3769993782043457, "step": 7046 }, { "epoch": 1.914945652173913, "grad_norm": 1.3142393218686776, "learning_rate": 9.935865663104449e-08, "loss": 0.4031158685684204, "step": 7047 }, { "epoch": 1.9152173913043478, "grad_norm": 1.2557128209372161, "learning_rate": 9.872784843242611e-08, "loss": 0.42305445671081543, "step": 7048 }, { "epoch": 1.9154891304347825, "grad_norm": 1.246552121909973, "learning_rate": 9.80990391192116e-08, "loss": 0.40932077169418335, "step": 7049 }, { "epoch": 1.9157608695652173, "grad_norm": 1.022868824265909, "learning_rate": 9.747222881834495e-08, "loss": 0.2993863821029663, "step": 7050 }, { "epoch": 1.916032608695652, "grad_norm": 1.3217628109493131, "learning_rate": 9.684741765637051e-08, "loss": 0.4515259265899658, "step": 7051 }, { "epoch": 1.9163043478260868, "grad_norm": 1.3162114829760525, "learning_rate": 9.622460575942405e-08, "loss": 0.46579495072364807, "step": 7052 }, { "epoch": 1.9165760869565216, "grad_norm": 1.2047076992665662, "learning_rate": 9.560379325324054e-08, "loss": 0.40793463587760925, "step": 7053 }, { "epoch": 1.9168478260869564, "grad_norm": 1.1038036905727233, "learning_rate": 9.498498026315084e-08, "loss": 0.3804364800453186, "step": 7054 }, { "epoch": 1.9171195652173914, "grad_norm": 1.3002873721064072, "learning_rate": 9.436816691408058e-08, "loss": 0.4649256467819214, "step": 7055 }, { "epoch": 1.9173913043478261, "grad_norm": 1.4917513502439415, "learning_rate": 9.375335333055568e-08, "loss": 0.4806312322616577, "step": 7056 }, { "epoch": 1.9176630434782609, "grad_norm": 1.175890668418207, "learning_rate": 9.314053963669245e-08, "loss": 0.40528249740600586, "step": 7057 }, { "epoch": 1.9179347826086957, "grad_norm": 1.2216392654968633, "learning_rate": 9.252972595620969e-08, "loss": 0.4185483753681183, "step": 7058 }, { "epoch": 1.9182065217391304, "grad_norm": 1.172229732590213, "learning_rate": 9.192091241241763e-08, "loss": 0.3845083713531494, "step": 7059 }, { "epoch": 1.9184782608695652, "grad_norm": 1.272846952754066, "learning_rate": 9.131409912822575e-08, "loss": 0.3568685054779053, "step": 7060 }, { "epoch": 1.91875, "grad_norm": 1.4013907253502842, "learning_rate": 9.070928622613718e-08, "loss": 0.38403695821762085, "step": 7061 }, { "epoch": 1.919021739130435, "grad_norm": 1.1514349027104385, "learning_rate": 9.010647382825421e-08, "loss": 0.39332497119903564, "step": 7062 }, { "epoch": 1.9192934782608697, "grad_norm": 1.09375826921493, "learning_rate": 8.950566205627287e-08, "loss": 0.3417243957519531, "step": 7063 }, { "epoch": 1.9195652173913045, "grad_norm": 1.3893733776880437, "learning_rate": 8.8906851031485e-08, "loss": 0.4168333113193512, "step": 7064 }, { "epoch": 1.9198369565217392, "grad_norm": 1.2836123923716742, "learning_rate": 8.831004087478168e-08, "loss": 0.4246388077735901, "step": 7065 }, { "epoch": 1.920108695652174, "grad_norm": 1.2424852370326391, "learning_rate": 8.771523170664542e-08, "loss": 0.3847666382789612, "step": 7066 }, { "epoch": 1.9203804347826088, "grad_norm": 1.336232610287481, "learning_rate": 8.712242364716017e-08, "loss": 0.3973119556903839, "step": 7067 }, { "epoch": 1.9206521739130435, "grad_norm": 1.255821366007973, "learning_rate": 8.65316168160002e-08, "loss": 0.3669562339782715, "step": 7068 }, { "epoch": 1.9209239130434783, "grad_norm": 1.2363732280366955, "learning_rate": 8.59428113324412e-08, "loss": 0.3646828532218933, "step": 7069 }, { "epoch": 1.921195652173913, "grad_norm": 1.0880058742203924, "learning_rate": 8.535600731535033e-08, "loss": 0.38068896532058716, "step": 7070 }, { "epoch": 1.9214673913043478, "grad_norm": 1.2921291303621352, "learning_rate": 8.47712048831928e-08, "loss": 0.41187620162963867, "step": 7071 }, { "epoch": 1.9217391304347826, "grad_norm": 1.2482491895033003, "learning_rate": 8.418840415402973e-08, "loss": 0.39248785376548767, "step": 7072 }, { "epoch": 1.9220108695652174, "grad_norm": 1.381081868274134, "learning_rate": 8.360760524551814e-08, "loss": 0.4355621933937073, "step": 7073 }, { "epoch": 1.9222826086956522, "grad_norm": 1.1660715043076706, "learning_rate": 8.3028808274912e-08, "loss": 0.3910045921802521, "step": 7074 }, { "epoch": 1.922554347826087, "grad_norm": 1.177824268560639, "learning_rate": 8.245201335905562e-08, "loss": 0.3545803427696228, "step": 7075 }, { "epoch": 1.9228260869565217, "grad_norm": 1.1594431088215291, "learning_rate": 8.187722061439806e-08, "loss": 0.32891130447387695, "step": 7076 }, { "epoch": 1.9230978260869565, "grad_norm": 1.4009846411701798, "learning_rate": 8.130443015697765e-08, "loss": 0.4682501554489136, "step": 7077 }, { "epoch": 1.9233695652173912, "grad_norm": 1.3584372800249755, "learning_rate": 8.073364210242852e-08, "loss": 0.4259669780731201, "step": 7078 }, { "epoch": 1.923641304347826, "grad_norm": 1.274961146622448, "learning_rate": 8.016485656598516e-08, "loss": 0.47841358184814453, "step": 7079 }, { "epoch": 1.9239130434782608, "grad_norm": 1.17789007961354, "learning_rate": 7.959807366247352e-08, "loss": 0.3899983763694763, "step": 7080 }, { "epoch": 1.9241847826086955, "grad_norm": 1.3765208860383271, "learning_rate": 7.903329350631649e-08, "loss": 0.42206358909606934, "step": 7081 }, { "epoch": 1.9244565217391303, "grad_norm": 1.1710329669433361, "learning_rate": 7.8470516211534e-08, "loss": 0.36774304509162903, "step": 7082 }, { "epoch": 1.924728260869565, "grad_norm": 1.36997074073104, "learning_rate": 7.790974189173961e-08, "loss": 0.45377206802368164, "step": 7083 }, { "epoch": 1.925, "grad_norm": 1.272395411842735, "learning_rate": 7.735097066014275e-08, "loss": 0.4374814033508301, "step": 7084 }, { "epoch": 1.9252717391304348, "grad_norm": 1.0243154662545115, "learning_rate": 7.679420262954984e-08, "loss": 0.26765957474708557, "step": 7085 }, { "epoch": 1.9255434782608696, "grad_norm": 1.0641359104623707, "learning_rate": 7.62394379123621e-08, "loss": 0.3072524666786194, "step": 7086 }, { "epoch": 1.9258152173913043, "grad_norm": 1.2925358108896832, "learning_rate": 7.568667662057661e-08, "loss": 0.44120603799819946, "step": 7087 }, { "epoch": 1.9260869565217391, "grad_norm": 1.2387599327851944, "learning_rate": 7.51359188657852e-08, "loss": 0.3879799246788025, "step": 7088 }, { "epoch": 1.9263586956521739, "grad_norm": 1.2060434627412167, "learning_rate": 7.45871647591756e-08, "loss": 0.3824622631072998, "step": 7089 }, { "epoch": 1.9266304347826086, "grad_norm": 1.190623200408072, "learning_rate": 7.404041441153253e-08, "loss": 0.41648638248443604, "step": 7090 }, { "epoch": 1.9269021739130436, "grad_norm": 1.146381288550326, "learning_rate": 7.349566793323326e-08, "loss": 0.326651394367218, "step": 7091 }, { "epoch": 1.9271739130434784, "grad_norm": 1.3523717933331758, "learning_rate": 7.295292543425314e-08, "loss": 0.3901810646057129, "step": 7092 }, { "epoch": 1.9274456521739132, "grad_norm": 1.3595016694211943, "learning_rate": 7.24121870241623e-08, "loss": 0.3742074966430664, "step": 7093 }, { "epoch": 1.927717391304348, "grad_norm": 1.242501015340326, "learning_rate": 7.187345281212455e-08, "loss": 0.3929617404937744, "step": 7094 }, { "epoch": 1.9279891304347827, "grad_norm": 1.4738147804461366, "learning_rate": 7.133672290690064e-08, "loss": 0.5114099979400635, "step": 7095 }, { "epoch": 1.9282608695652175, "grad_norm": 1.4277480361716823, "learning_rate": 7.080199741684834e-08, "loss": 0.49237510561943054, "step": 7096 }, { "epoch": 1.9285326086956522, "grad_norm": 1.417602328319926, "learning_rate": 7.026927644991688e-08, "loss": 0.40062010288238525, "step": 7097 }, { "epoch": 1.928804347826087, "grad_norm": 1.2702609723624128, "learning_rate": 6.973856011365354e-08, "loss": 0.37679851055145264, "step": 7098 }, { "epoch": 1.9290760869565218, "grad_norm": 1.0942802744541102, "learning_rate": 6.92098485152004e-08, "loss": 0.2561713755130768, "step": 7099 }, { "epoch": 1.9293478260869565, "grad_norm": 1.236958982292488, "learning_rate": 6.868314176129432e-08, "loss": 0.3718823492527008, "step": 7100 }, { "epoch": 1.9296195652173913, "grad_norm": 1.246314469163168, "learning_rate": 6.815843995826799e-08, "loss": 0.3652656674385071, "step": 7101 }, { "epoch": 1.929891304347826, "grad_norm": 1.1901982769494095, "learning_rate": 6.763574321205002e-08, "loss": 0.37614551186561584, "step": 7102 }, { "epoch": 1.9301630434782608, "grad_norm": 1.2663470136265491, "learning_rate": 6.711505162816157e-08, "loss": 0.46086573600769043, "step": 7103 }, { "epoch": 1.9304347826086956, "grad_norm": 1.1763305771435424, "learning_rate": 6.659636531172076e-08, "loss": 0.375298410654068, "step": 7104 }, { "epoch": 1.9307065217391304, "grad_norm": 1.2566143452001068, "learning_rate": 6.607968436744272e-08, "loss": 0.3615386486053467, "step": 7105 }, { "epoch": 1.9309782608695651, "grad_norm": 1.2634470701822333, "learning_rate": 6.556500889963402e-08, "loss": 0.3865692615509033, "step": 7106 }, { "epoch": 1.93125, "grad_norm": 1.2727357505138701, "learning_rate": 6.505233901219932e-08, "loss": 0.37671393156051636, "step": 7107 }, { "epoch": 1.9315217391304347, "grad_norm": 1.4463064515143136, "learning_rate": 6.454167480863694e-08, "loss": 0.4675363302230835, "step": 7108 }, { "epoch": 1.9317934782608694, "grad_norm": 1.360804542352623, "learning_rate": 6.403301639203996e-08, "loss": 0.41408485174179077, "step": 7109 }, { "epoch": 1.9320652173913042, "grad_norm": 1.3755639212665502, "learning_rate": 6.352636386509847e-08, "loss": 0.45284074544906616, "step": 7110 }, { "epoch": 1.932336956521739, "grad_norm": 1.0584043364273228, "learning_rate": 6.302171733009399e-08, "loss": 0.3141763210296631, "step": 7111 }, { "epoch": 1.9326086956521737, "grad_norm": 1.345444727584555, "learning_rate": 6.251907688890945e-08, "loss": 0.4303303360939026, "step": 7112 }, { "epoch": 1.9328804347826087, "grad_norm": 1.254808593096738, "learning_rate": 6.201844264301483e-08, "loss": 0.42205214500427246, "step": 7113 }, { "epoch": 1.9331521739130435, "grad_norm": 1.1135719208798693, "learning_rate": 6.151981469348034e-08, "loss": 0.3994370698928833, "step": 7114 }, { "epoch": 1.9334239130434783, "grad_norm": 1.328674132690625, "learning_rate": 6.102319314097105e-08, "loss": 0.43520015478134155, "step": 7115 }, { "epoch": 1.933695652173913, "grad_norm": 1.05529588968992, "learning_rate": 6.052857808574453e-08, "loss": 0.310513436794281, "step": 7116 }, { "epoch": 1.9339673913043478, "grad_norm": 1.5166958929904812, "learning_rate": 6.003596962765424e-08, "loss": 0.46433311700820923, "step": 7117 }, { "epoch": 1.9342391304347826, "grad_norm": 1.1913624250677428, "learning_rate": 5.9545367866149504e-08, "loss": 0.3426838517189026, "step": 7118 }, { "epoch": 1.9345108695652173, "grad_norm": 1.3467101490734394, "learning_rate": 5.905677290027334e-08, "loss": 0.4477195739746094, "step": 7119 }, { "epoch": 1.9347826086956523, "grad_norm": 1.166996520050152, "learning_rate": 5.8570184828664614e-08, "loss": 0.4256899654865265, "step": 7120 }, { "epoch": 1.935054347826087, "grad_norm": 1.2001403095291039, "learning_rate": 5.808560374955585e-08, "loss": 0.347533255815506, "step": 7121 }, { "epoch": 1.9353260869565219, "grad_norm": 1.1804788968557363, "learning_rate": 5.760302976077659e-08, "loss": 0.38019877672195435, "step": 7122 }, { "epoch": 1.9355978260869566, "grad_norm": 1.4383326591171455, "learning_rate": 5.712246295974777e-08, "loss": 0.48694807291030884, "step": 7123 }, { "epoch": 1.9358695652173914, "grad_norm": 1.4361892692537994, "learning_rate": 5.664390344348736e-08, "loss": 0.5092818737030029, "step": 7124 }, { "epoch": 1.9361413043478262, "grad_norm": 1.1817807999030217, "learning_rate": 5.6167351308609174e-08, "loss": 0.39288777112960815, "step": 7125 }, { "epoch": 1.936413043478261, "grad_norm": 1.4840948454581215, "learning_rate": 5.5692806651318486e-08, "loss": 0.4505614638328552, "step": 7126 }, { "epoch": 1.9366847826086957, "grad_norm": 1.2784806864879852, "learning_rate": 5.522026956741866e-08, "loss": 0.3737223744392395, "step": 7127 }, { "epoch": 1.9369565217391305, "grad_norm": 1.2553566678298413, "learning_rate": 5.474974015230561e-08, "loss": 0.425950825214386, "step": 7128 }, { "epoch": 1.9372282608695652, "grad_norm": 1.2714654758348631, "learning_rate": 5.428121850097112e-08, "loss": 0.4552552103996277, "step": 7129 }, { "epoch": 1.9375, "grad_norm": 1.157429772781338, "learning_rate": 5.3814704708000656e-08, "loss": 0.3497551679611206, "step": 7130 }, { "epoch": 1.9377717391304348, "grad_norm": 1.177603133530392, "learning_rate": 5.3350198867574424e-08, "loss": 0.3930180072784424, "step": 7131 }, { "epoch": 1.9380434782608695, "grad_norm": 1.7230346825349234, "learning_rate": 5.2887701073468525e-08, "loss": 0.3409222960472107, "step": 7132 }, { "epoch": 1.9383152173913043, "grad_norm": 1.33158684224773, "learning_rate": 5.2427211419051605e-08, "loss": 0.42427897453308105, "step": 7133 }, { "epoch": 1.938586956521739, "grad_norm": 1.2896053897496438, "learning_rate": 5.19687299972893e-08, "loss": 0.3947017788887024, "step": 7134 }, { "epoch": 1.9388586956521738, "grad_norm": 1.3611473005969106, "learning_rate": 5.151225690074091e-08, "loss": 0.45700111985206604, "step": 7135 }, { "epoch": 1.9391304347826086, "grad_norm": 1.2108550586597366, "learning_rate": 5.105779222155827e-08, "loss": 0.370781809091568, "step": 7136 }, { "epoch": 1.9394021739130434, "grad_norm": 1.4442295448531413, "learning_rate": 5.06053360514902e-08, "loss": 0.4812021851539612, "step": 7137 }, { "epoch": 1.9396739130434781, "grad_norm": 1.4215679474497724, "learning_rate": 5.015488848187921e-08, "loss": 0.4725261330604553, "step": 7138 }, { "epoch": 1.939945652173913, "grad_norm": 1.1560093254828372, "learning_rate": 4.9706449603664774e-08, "loss": 0.3581981062889099, "step": 7139 }, { "epoch": 1.9402173913043477, "grad_norm": 1.2082337362589453, "learning_rate": 4.926001950737447e-08, "loss": 0.37645578384399414, "step": 7140 }, { "epoch": 1.9404891304347827, "grad_norm": 1.2897369594924655, "learning_rate": 4.881559828313731e-08, "loss": 0.45457619428634644, "step": 7141 }, { "epoch": 1.9407608695652174, "grad_norm": 1.2895567012934124, "learning_rate": 4.8373186020672645e-08, "loss": 0.4282847046852112, "step": 7142 }, { "epoch": 1.9410326086956522, "grad_norm": 1.3518900930420814, "learning_rate": 4.7932782809294585e-08, "loss": 0.46812891960144043, "step": 7143 }, { "epoch": 1.941304347826087, "grad_norm": 1.2723448450833028, "learning_rate": 4.749438873791423e-08, "loss": 0.3969513773918152, "step": 7144 }, { "epoch": 1.9415760869565217, "grad_norm": 1.3727657838789122, "learning_rate": 4.705800389503412e-08, "loss": 0.4624193608760834, "step": 7145 }, { "epoch": 1.9418478260869565, "grad_norm": 1.3499670985709915, "learning_rate": 4.6623628368753784e-08, "loss": 0.4672471880912781, "step": 7146 }, { "epoch": 1.9421195652173913, "grad_norm": 1.0359222174472327, "learning_rate": 4.619126224676418e-08, "loss": 0.27853161096572876, "step": 7147 }, { "epoch": 1.9423913043478263, "grad_norm": 1.172769808712194, "learning_rate": 4.576090561635216e-08, "loss": 0.39399218559265137, "step": 7148 }, { "epoch": 1.942663043478261, "grad_norm": 1.420603666718809, "learning_rate": 4.5332558564400444e-08, "loss": 0.4722668528556824, "step": 7149 }, { "epoch": 1.9429347826086958, "grad_norm": 1.0634578874743168, "learning_rate": 4.4906221177380974e-08, "loss": 0.3265949487686157, "step": 7150 }, { "epoch": 1.9432065217391306, "grad_norm": 1.3148169779351087, "learning_rate": 4.448189354136823e-08, "loss": 0.38016045093536377, "step": 7151 }, { "epoch": 1.9434782608695653, "grad_norm": 1.3765802003479988, "learning_rate": 4.405957574202147e-08, "loss": 0.38774344325065613, "step": 7152 }, { "epoch": 1.94375, "grad_norm": 1.3630436480483772, "learning_rate": 4.3639267864603594e-08, "loss": 0.41916292905807495, "step": 7153 }, { "epoch": 1.9440217391304349, "grad_norm": 1.3996932764615255, "learning_rate": 4.32209699939623e-08, "loss": 0.48565369844436646, "step": 7154 }, { "epoch": 1.9442934782608696, "grad_norm": 1.4112155713139813, "learning_rate": 4.280468221454781e-08, "loss": 0.41711539030075073, "step": 7155 }, { "epoch": 1.9445652173913044, "grad_norm": 1.1549334122609385, "learning_rate": 4.239040461039956e-08, "loss": 0.3618631064891815, "step": 7156 }, { "epoch": 1.9448369565217392, "grad_norm": 1.3806743259245322, "learning_rate": 4.197813726515287e-08, "loss": 0.49475014209747314, "step": 7157 }, { "epoch": 1.945108695652174, "grad_norm": 1.294160659974601, "learning_rate": 4.156788026203673e-08, "loss": 0.4097515344619751, "step": 7158 }, { "epoch": 1.9453804347826087, "grad_norm": 1.0202188287764575, "learning_rate": 4.115963368387488e-08, "loss": 0.26625120639801025, "step": 7159 }, { "epoch": 1.9456521739130435, "grad_norm": 1.4515886717271, "learning_rate": 4.075339761308472e-08, "loss": 0.4394344091415405, "step": 7160 }, { "epoch": 1.9459239130434782, "grad_norm": 1.266829602967349, "learning_rate": 4.034917213167733e-08, "loss": 0.4018288254737854, "step": 7161 }, { "epoch": 1.946195652173913, "grad_norm": 1.1780972815069035, "learning_rate": 3.9946957321259635e-08, "loss": 0.3895847201347351, "step": 7162 }, { "epoch": 1.9464673913043478, "grad_norm": 1.3381377887800947, "learning_rate": 3.9546753263030035e-08, "loss": 0.41705432534217834, "step": 7163 }, { "epoch": 1.9467391304347825, "grad_norm": 1.0467563014312478, "learning_rate": 3.914856003778389e-08, "loss": 0.3616117835044861, "step": 7164 }, { "epoch": 1.9470108695652173, "grad_norm": 1.3207462753003647, "learning_rate": 3.87523777259069e-08, "loss": 0.4068918228149414, "step": 7165 }, { "epoch": 1.947282608695652, "grad_norm": 1.210337137901914, "learning_rate": 3.835820640738397e-08, "loss": 0.38948243856430054, "step": 7166 }, { "epoch": 1.9475543478260868, "grad_norm": 1.2406445167514273, "learning_rate": 3.7966046161788116e-08, "loss": 0.38730794191360474, "step": 7167 }, { "epoch": 1.9478260869565216, "grad_norm": 1.4563356102226628, "learning_rate": 3.757589706829157e-08, "loss": 0.4436868727207184, "step": 7168 }, { "epoch": 1.9480978260869564, "grad_norm": 1.2959416727394473, "learning_rate": 3.718775920565687e-08, "loss": 0.48215773701667786, "step": 7169 }, { "epoch": 1.9483695652173914, "grad_norm": 1.2784989938017888, "learning_rate": 3.680163265224246e-08, "loss": 0.38783201575279236, "step": 7170 }, { "epoch": 1.9486413043478261, "grad_norm": 1.2282550124495335, "learning_rate": 3.641751748600042e-08, "loss": 0.4258124530315399, "step": 7171 }, { "epoch": 1.9489130434782609, "grad_norm": 1.1366922302243165, "learning_rate": 3.6035413784475396e-08, "loss": 0.3523831367492676, "step": 7172 }, { "epoch": 1.9491847826086957, "grad_norm": 1.2564540565783342, "learning_rate": 3.5655321624809e-08, "loss": 0.42577478289604187, "step": 7173 }, { "epoch": 1.9494565217391304, "grad_norm": 1.1010030074290438, "learning_rate": 3.5277241083734315e-08, "loss": 0.31228408217430115, "step": 7174 }, { "epoch": 1.9497282608695652, "grad_norm": 1.161582889298567, "learning_rate": 3.490117223757805e-08, "loss": 0.2889745533466339, "step": 7175 }, { "epoch": 1.95, "grad_norm": 1.4313609431591394, "learning_rate": 3.4527115162261703e-08, "loss": 0.3963147699832916, "step": 7176 }, { "epoch": 1.950271739130435, "grad_norm": 1.2650415336084901, "learning_rate": 3.4155069933301535e-08, "loss": 0.4734641909599304, "step": 7177 }, { "epoch": 1.9505434782608697, "grad_norm": 1.12847487750689, "learning_rate": 3.3785036625806344e-08, "loss": 0.34994691610336304, "step": 7178 }, { "epoch": 1.9508152173913045, "grad_norm": 1.2326126043890275, "learning_rate": 3.3417015314477494e-08, "loss": 0.3500772714614868, "step": 7179 }, { "epoch": 1.9510869565217392, "grad_norm": 1.3600759230985437, "learning_rate": 3.3051006073613335e-08, "loss": 0.4563954770565033, "step": 7180 }, { "epoch": 1.951358695652174, "grad_norm": 1.0056432515324765, "learning_rate": 3.268700897710475e-08, "loss": 0.28997722268104553, "step": 7181 }, { "epoch": 1.9516304347826088, "grad_norm": 1.1944913870356446, "learning_rate": 3.232502409843519e-08, "loss": 0.3842273950576782, "step": 7182 }, { "epoch": 1.9519021739130435, "grad_norm": 1.3403972705530347, "learning_rate": 3.1965051510682856e-08, "loss": 0.3543500304222107, "step": 7183 }, { "epoch": 1.9521739130434783, "grad_norm": 1.218425392254686, "learning_rate": 3.160709128652073e-08, "loss": 0.43890753388404846, "step": 7184 }, { "epoch": 1.952445652173913, "grad_norm": 1.2836558355625622, "learning_rate": 3.125114349821212e-08, "loss": 0.4725092649459839, "step": 7185 }, { "epoch": 1.9527173913043478, "grad_norm": 1.4867338675368544, "learning_rate": 3.0897208217618436e-08, "loss": 0.5228495597839355, "step": 7186 }, { "epoch": 1.9529891304347826, "grad_norm": 1.4135993428085365, "learning_rate": 3.05452855161914e-08, "loss": 0.4472629427909851, "step": 7187 }, { "epoch": 1.9532608695652174, "grad_norm": 1.1542050504478418, "learning_rate": 3.019537546497864e-08, "loss": 0.3652150630950928, "step": 7188 }, { "epoch": 1.9535326086956522, "grad_norm": 1.1661578664087906, "learning_rate": 2.984747813462141e-08, "loss": 0.3581894636154175, "step": 7189 }, { "epoch": 1.953804347826087, "grad_norm": 1.2183179907431212, "learning_rate": 2.950159359535132e-08, "loss": 0.35300588607788086, "step": 7190 }, { "epoch": 1.9540760869565217, "grad_norm": 1.4461653666713044, "learning_rate": 2.915772191699806e-08, "loss": 0.5490684509277344, "step": 7191 }, { "epoch": 1.9543478260869565, "grad_norm": 1.3099430692552805, "learning_rate": 2.881586316898166e-08, "loss": 0.4139519929885864, "step": 7192 }, { "epoch": 1.9546195652173912, "grad_norm": 1.098725748209616, "learning_rate": 2.8476017420319134e-08, "loss": 0.33126020431518555, "step": 7193 }, { "epoch": 1.954891304347826, "grad_norm": 1.4708289863348512, "learning_rate": 2.8138184739616715e-08, "loss": 0.4087814390659332, "step": 7194 }, { "epoch": 1.9551630434782608, "grad_norm": 1.3116263150171883, "learning_rate": 2.780236519507873e-08, "loss": 0.40539464354515076, "step": 7195 }, { "epoch": 1.9554347826086955, "grad_norm": 1.2894937212062267, "learning_rate": 2.7468558854499838e-08, "loss": 0.41929805278778076, "step": 7196 }, { "epoch": 1.9557065217391303, "grad_norm": 1.3418997209346366, "learning_rate": 2.713676578526947e-08, "loss": 0.45667564868927, "step": 7197 }, { "epoch": 1.955978260869565, "grad_norm": 1.198663364228883, "learning_rate": 2.680698605437071e-08, "loss": 0.3731139600276947, "step": 7198 }, { "epoch": 1.95625, "grad_norm": 1.2563158539482187, "learning_rate": 2.64792197283803e-08, "loss": 0.4261360764503479, "step": 7199 }, { "epoch": 1.9565217391304348, "grad_norm": 1.4486717072043815, "learning_rate": 2.6153466873468646e-08, "loss": 0.4646282494068146, "step": 7200 }, { "epoch": 1.9567934782608696, "grad_norm": 1.3203510425652079, "learning_rate": 2.5829727555397587e-08, "loss": 0.408413827419281, "step": 7201 }, { "epoch": 1.9570652173913043, "grad_norm": 1.4011214761052402, "learning_rate": 2.5508001839525952e-08, "loss": 0.48356470465660095, "step": 7202 }, { "epoch": 1.9573369565217391, "grad_norm": 1.18282305543622, "learning_rate": 2.51882897908029e-08, "loss": 0.39493733644485474, "step": 7203 }, { "epoch": 1.9576086956521739, "grad_norm": 1.342792661994685, "learning_rate": 2.4870591473773463e-08, "loss": 0.42062675952911377, "step": 7204 }, { "epoch": 1.9578804347826086, "grad_norm": 1.31779881283842, "learning_rate": 2.455490695257523e-08, "loss": 0.47329235076904297, "step": 7205 }, { "epoch": 1.9581521739130436, "grad_norm": 1.4545648472613981, "learning_rate": 2.4241236290938332e-08, "loss": 0.4949396848678589, "step": 7206 }, { "epoch": 1.9584239130434784, "grad_norm": 1.4213991717487784, "learning_rate": 2.3929579552187665e-08, "loss": 0.44663098454475403, "step": 7207 }, { "epoch": 1.9586956521739132, "grad_norm": 1.4734174482216957, "learning_rate": 2.3619936799240683e-08, "loss": 0.4973899722099304, "step": 7208 }, { "epoch": 1.958967391304348, "grad_norm": 1.4108209667589393, "learning_rate": 2.3312308094607382e-08, "loss": 0.4109044671058655, "step": 7209 }, { "epoch": 1.9592391304347827, "grad_norm": 1.2755253216469125, "learning_rate": 2.300669350039586e-08, "loss": 0.39077526330947876, "step": 7210 }, { "epoch": 1.9595108695652175, "grad_norm": 1.0059544338572812, "learning_rate": 2.2703093078300098e-08, "loss": 0.2769041657447815, "step": 7211 }, { "epoch": 1.9597826086956522, "grad_norm": 1.3368913051779032, "learning_rate": 2.24015068896144e-08, "loss": 0.3299810290336609, "step": 7212 }, { "epoch": 1.960054347826087, "grad_norm": 1.2113300310499229, "learning_rate": 2.2101934995222285e-08, "loss": 0.4043996334075928, "step": 7213 }, { "epoch": 1.9603260869565218, "grad_norm": 1.3606970976078332, "learning_rate": 2.1804377455600933e-08, "loss": 0.40840405225753784, "step": 7214 }, { "epoch": 1.9605978260869565, "grad_norm": 1.2682826648917285, "learning_rate": 2.1508834330823403e-08, "loss": 0.3743137717247009, "step": 7215 }, { "epoch": 1.9608695652173913, "grad_norm": 1.0968560680763955, "learning_rate": 2.1215305680554186e-08, "loss": 0.33965185284614563, "step": 7216 }, { "epoch": 1.961141304347826, "grad_norm": 1.270834598977967, "learning_rate": 2.0923791564050333e-08, "loss": 0.41692885756492615, "step": 7217 }, { "epoch": 1.9614130434782608, "grad_norm": 1.3532634952254052, "learning_rate": 2.063429204016365e-08, "loss": 0.4243910014629364, "step": 7218 }, { "epoch": 1.9616847826086956, "grad_norm": 1.260695979054735, "learning_rate": 2.0346807167339612e-08, "loss": 0.3581916093826294, "step": 7219 }, { "epoch": 1.9619565217391304, "grad_norm": 1.18876066580683, "learning_rate": 2.0061337003615122e-08, "loss": 0.3955005705356598, "step": 7220 }, { "epoch": 1.9622282608695651, "grad_norm": 1.3296319492527822, "learning_rate": 1.9777881606621864e-08, "loss": 0.4481944739818573, "step": 7221 }, { "epoch": 1.9625, "grad_norm": 1.1785738205528002, "learning_rate": 1.949644103358406e-08, "loss": 0.3389328122138977, "step": 7222 }, { "epoch": 1.9627717391304347, "grad_norm": 1.0998807468651255, "learning_rate": 1.9217015341318478e-08, "loss": 0.31249377131462097, "step": 7223 }, { "epoch": 1.9630434782608694, "grad_norm": 1.1805577934215126, "learning_rate": 1.893960458623889e-08, "loss": 0.39394235610961914, "step": 7224 }, { "epoch": 1.9633152173913042, "grad_norm": 1.359314076516034, "learning_rate": 1.8664208824346054e-08, "loss": 0.44724369049072266, "step": 7225 }, { "epoch": 1.963586956521739, "grad_norm": 1.3664576477214385, "learning_rate": 1.8390828111238822e-08, "loss": 0.4214475750923157, "step": 7226 }, { "epoch": 1.9638586956521737, "grad_norm": 1.2092422222850383, "learning_rate": 1.8119462502108608e-08, "loss": 0.37550264596939087, "step": 7227 }, { "epoch": 1.9641304347826087, "grad_norm": 1.0953814655637295, "learning_rate": 1.7850112051738255e-08, "loss": 0.35257649421691895, "step": 7228 }, { "epoch": 1.9644021739130435, "grad_norm": 1.122056797464441, "learning_rate": 1.7582776814504266e-08, "loss": 0.3378285765647888, "step": 7229 }, { "epoch": 1.9646739130434783, "grad_norm": 1.029542614224089, "learning_rate": 1.731745684437791e-08, "loss": 0.3221186399459839, "step": 7230 }, { "epoch": 1.964945652173913, "grad_norm": 1.2829805111679708, "learning_rate": 1.7054152194921902e-08, "loss": 0.3842497766017914, "step": 7231 }, { "epoch": 1.9652173913043478, "grad_norm": 1.6306097525610448, "learning_rate": 1.6792862919291498e-08, "loss": 0.4641203284263611, "step": 7232 }, { "epoch": 1.9654891304347826, "grad_norm": 1.1423898241061932, "learning_rate": 1.653358907023783e-08, "loss": 0.3390302062034607, "step": 7233 }, { "epoch": 1.9657608695652173, "grad_norm": 1.2923966913951275, "learning_rate": 1.6276330700102373e-08, "loss": 0.4009455144405365, "step": 7234 }, { "epoch": 1.9660326086956523, "grad_norm": 1.2416009579153293, "learning_rate": 1.6021087860822458e-08, "loss": 0.3933837413787842, "step": 7235 }, { "epoch": 1.966304347826087, "grad_norm": 1.4506120699770118, "learning_rate": 1.5767860603924656e-08, "loss": 0.41890913248062134, "step": 7236 }, { "epoch": 1.9665760869565219, "grad_norm": 1.309262125204929, "learning_rate": 1.551664898053362e-08, "loss": 0.4153475761413574, "step": 7237 }, { "epoch": 1.9668478260869566, "grad_norm": 1.4039369634827508, "learning_rate": 1.5267453041361015e-08, "loss": 0.49396973848342896, "step": 7238 }, { "epoch": 1.9671195652173914, "grad_norm": 1.1325448483852112, "learning_rate": 1.5020272836717696e-08, "loss": 0.37323522567749023, "step": 7239 }, { "epoch": 1.9673913043478262, "grad_norm": 1.1282376423980587, "learning_rate": 1.4775108416503758e-08, "loss": 0.348285436630249, "step": 7240 }, { "epoch": 1.967663043478261, "grad_norm": 1.354523931426661, "learning_rate": 1.4531959830214048e-08, "loss": 0.4081348776817322, "step": 7241 }, { "epoch": 1.9679347826086957, "grad_norm": 1.223725794908502, "learning_rate": 1.4290827126935969e-08, "loss": 0.3680172264575958, "step": 7242 }, { "epoch": 1.9682065217391305, "grad_norm": 1.3996817917764188, "learning_rate": 1.4051710355347248e-08, "loss": 0.4901280403137207, "step": 7243 }, { "epoch": 1.9684782608695652, "grad_norm": 1.1683850851925826, "learning_rate": 1.3814609563724823e-08, "loss": 0.3938254117965698, "step": 7244 }, { "epoch": 1.96875, "grad_norm": 1.4266452776757994, "learning_rate": 1.357952479993263e-08, "loss": 0.4973446726799011, "step": 7245 }, { "epoch": 1.9690217391304348, "grad_norm": 1.250672861168374, "learning_rate": 1.3346456111430484e-08, "loss": 0.4349166452884674, "step": 7246 }, { "epoch": 1.9692934782608695, "grad_norm": 1.2882301888503598, "learning_rate": 1.3115403545270744e-08, "loss": 0.38962918519973755, "step": 7247 }, { "epoch": 1.9695652173913043, "grad_norm": 1.24814466303568, "learning_rate": 1.2886367148099433e-08, "loss": 0.3943675756454468, "step": 7248 }, { "epoch": 1.969836956521739, "grad_norm": 1.4019373329837406, "learning_rate": 1.2659346966152897e-08, "loss": 0.4856101870536804, "step": 7249 }, { "epoch": 1.9701086956521738, "grad_norm": 1.3287621208204305, "learning_rate": 1.2434343045264474e-08, "loss": 0.4495185613632202, "step": 7250 }, { "epoch": 1.9703804347826086, "grad_norm": 1.0837379794005542, "learning_rate": 1.2211355430857829e-08, "loss": 0.33442428708076477, "step": 7251 }, { "epoch": 1.9706521739130434, "grad_norm": 1.3915332232495383, "learning_rate": 1.1990384167948067e-08, "loss": 0.41826432943344116, "step": 7252 }, { "epoch": 1.9709239130434781, "grad_norm": 1.2434571323028107, "learning_rate": 1.1771429301148384e-08, "loss": 0.351662814617157, "step": 7253 }, { "epoch": 1.971195652173913, "grad_norm": 1.3340521782302592, "learning_rate": 1.1554490874660096e-08, "loss": 0.44283777475357056, "step": 7254 }, { "epoch": 1.9714673913043477, "grad_norm": 0.974341349035639, "learning_rate": 1.1339568932278167e-08, "loss": 0.2859936058521271, "step": 7255 }, { "epoch": 1.9717391304347827, "grad_norm": 1.1927413016558783, "learning_rate": 1.1126663517393444e-08, "loss": 0.4069061279296875, "step": 7256 }, { "epoch": 1.9720108695652174, "grad_norm": 1.2136665093283792, "learning_rate": 1.0915774672985991e-08, "loss": 0.444404661655426, "step": 7257 }, { "epoch": 1.9722826086956522, "grad_norm": 1.1904120954824338, "learning_rate": 1.070690244163175e-08, "loss": 0.3633812665939331, "step": 7258 }, { "epoch": 1.972554347826087, "grad_norm": 1.3514650773216759, "learning_rate": 1.0500046865496993e-08, "loss": 0.3857545852661133, "step": 7259 }, { "epoch": 1.9728260869565217, "grad_norm": 1.189631021470486, "learning_rate": 1.0295207986342759e-08, "loss": 0.32732266187667847, "step": 7260 }, { "epoch": 1.9730978260869565, "grad_norm": 1.195511474662425, "learning_rate": 1.0092385845522634e-08, "loss": 0.3681017756462097, "step": 7261 }, { "epoch": 1.9733695652173913, "grad_norm": 1.5019931210910733, "learning_rate": 9.891580483981645e-09, "loss": 0.5353784561157227, "step": 7262 }, { "epoch": 1.9736413043478263, "grad_norm": 1.2980667988894523, "learning_rate": 9.692791942258473e-09, "loss": 0.4625912308692932, "step": 7263 }, { "epoch": 1.973913043478261, "grad_norm": 1.0997537608607826, "learning_rate": 9.49602026048657e-09, "loss": 0.33410096168518066, "step": 7264 }, { "epoch": 1.9741847826086958, "grad_norm": 1.2771672406774417, "learning_rate": 9.30126547838861e-09, "loss": 0.3876801133155823, "step": 7265 }, { "epoch": 1.9744565217391306, "grad_norm": 1.0610125857800299, "learning_rate": 9.108527635284248e-09, "loss": 0.34641703963279724, "step": 7266 }, { "epoch": 1.9747282608695653, "grad_norm": 1.3141987212166524, "learning_rate": 8.91780677008236e-09, "loss": 0.39870744943618774, "step": 7267 }, { "epoch": 1.975, "grad_norm": 1.151009729497788, "learning_rate": 8.729102921285481e-09, "loss": 0.3418155312538147, "step": 7268 }, { "epoch": 1.9752717391304349, "grad_norm": 1.1536961069073957, "learning_rate": 8.542416126989805e-09, "loss": 0.3406527042388916, "step": 7269 }, { "epoch": 1.9755434782608696, "grad_norm": 1.3558883832766517, "learning_rate": 8.357746424884072e-09, "loss": 0.3779221177101135, "step": 7270 }, { "epoch": 1.9758152173913044, "grad_norm": 1.331048359288516, "learning_rate": 8.175093852250682e-09, "loss": 0.4559659957885742, "step": 7271 }, { "epoch": 1.9760869565217392, "grad_norm": 1.3426448520849061, "learning_rate": 7.994458445963471e-09, "loss": 0.3844466209411621, "step": 7272 }, { "epoch": 1.976358695652174, "grad_norm": 1.229775424923443, "learning_rate": 7.815840242487716e-09, "loss": 0.3499528169631958, "step": 7273 }, { "epoch": 1.9766304347826087, "grad_norm": 1.351377337271896, "learning_rate": 7.639239277885679e-09, "loss": 0.4398081302642822, "step": 7274 }, { "epoch": 1.9769021739130435, "grad_norm": 1.3300718497713904, "learning_rate": 7.464655587807734e-09, "loss": 0.40970271825790405, "step": 7275 }, { "epoch": 1.9771739130434782, "grad_norm": 1.1573852760156176, "learning_rate": 7.2920892075001305e-09, "loss": 0.3806554973125458, "step": 7276 }, { "epoch": 1.977445652173913, "grad_norm": 1.3436930603247081, "learning_rate": 7.121540171800556e-09, "loss": 0.41580986976623535, "step": 7277 }, { "epoch": 1.9777173913043478, "grad_norm": 1.2186310530929245, "learning_rate": 6.953008515140358e-09, "loss": 0.3469700813293457, "step": 7278 }, { "epoch": 1.9779891304347825, "grad_norm": 1.2591132497170294, "learning_rate": 6.786494271542321e-09, "loss": 0.3462406098842621, "step": 7279 }, { "epoch": 1.9782608695652173, "grad_norm": 1.2818576604753957, "learning_rate": 6.621997474622888e-09, "loss": 0.3644425868988037, "step": 7280 }, { "epoch": 1.978532608695652, "grad_norm": 1.1209410231177903, "learning_rate": 6.4595181575910496e-09, "loss": 0.3427906036376953, "step": 7281 }, { "epoch": 1.9788043478260868, "grad_norm": 1.2834189069536233, "learning_rate": 6.299056353248345e-09, "loss": 0.41791486740112305, "step": 7282 }, { "epoch": 1.9790760869565216, "grad_norm": 1.3446348562920034, "learning_rate": 6.1406120939877525e-09, "loss": 0.36465463042259216, "step": 7283 }, { "epoch": 1.9793478260869564, "grad_norm": 1.0323914537528356, "learning_rate": 5.984185411799237e-09, "loss": 0.32385170459747314, "step": 7284 }, { "epoch": 1.9796195652173914, "grad_norm": 1.200172808054, "learning_rate": 5.8297763382597625e-09, "loss": 0.35226231813430786, "step": 7285 }, { "epoch": 1.9798913043478261, "grad_norm": 1.3492489747276997, "learning_rate": 5.677384904543282e-09, "loss": 0.42437028884887695, "step": 7286 }, { "epoch": 1.9801630434782609, "grad_norm": 1.2355137622544874, "learning_rate": 5.5270111414129635e-09, "loss": 0.4303194284439087, "step": 7287 }, { "epoch": 1.9804347826086957, "grad_norm": 1.0349140650150883, "learning_rate": 5.378655079228967e-09, "loss": 0.26833826303482056, "step": 7288 }, { "epoch": 1.9807065217391304, "grad_norm": 1.2935598522784881, "learning_rate": 5.232316747940669e-09, "loss": 0.4229671359062195, "step": 7289 }, { "epoch": 1.9809782608695652, "grad_norm": 1.5256460177979954, "learning_rate": 5.087996177089993e-09, "loss": 0.4997873604297638, "step": 7290 }, { "epoch": 1.98125, "grad_norm": 1.2037707301452925, "learning_rate": 4.945693395813633e-09, "loss": 0.4416523575782776, "step": 7291 }, { "epoch": 1.981521739130435, "grad_norm": 1.1328818551394617, "learning_rate": 4.80540843283972e-09, "loss": 0.2797786593437195, "step": 7292 }, { "epoch": 1.9817934782608697, "grad_norm": 1.1831988935010187, "learning_rate": 4.6671413164900425e-09, "loss": 0.3275648355484009, "step": 7293 }, { "epoch": 1.9820652173913045, "grad_norm": 1.320778294118947, "learning_rate": 4.530892074676718e-09, "loss": 0.4266727566719055, "step": 7294 }, { "epoch": 1.9823369565217392, "grad_norm": 1.233758579552739, "learning_rate": 4.396660734906633e-09, "loss": 0.42066237330436707, "step": 7295 }, { "epoch": 1.982608695652174, "grad_norm": 1.5405156510028457, "learning_rate": 4.26444732427922e-09, "loss": 0.5838204622268677, "step": 7296 }, { "epoch": 1.9828804347826088, "grad_norm": 1.1166397387802818, "learning_rate": 4.13425186948535e-09, "loss": 0.31292450428009033, "step": 7297 }, { "epoch": 1.9831521739130435, "grad_norm": 1.0816119720033799, "learning_rate": 4.006074396809556e-09, "loss": 0.38978296518325806, "step": 7298 }, { "epoch": 1.9834239130434783, "grad_norm": 1.271428980402056, "learning_rate": 3.879914932127804e-09, "loss": 0.40001338720321655, "step": 7299 }, { "epoch": 1.983695652173913, "grad_norm": 1.2647465418226866, "learning_rate": 3.75577350090861e-09, "loss": 0.35587334632873535, "step": 7300 }, { "epoch": 1.9839673913043478, "grad_norm": 1.3005547195686993, "learning_rate": 3.6336501282163704e-09, "loss": 0.44139552116394043, "step": 7301 }, { "epoch": 1.9842391304347826, "grad_norm": 1.2391459644809562, "learning_rate": 3.5135448387035866e-09, "loss": 0.3863281011581421, "step": 7302 }, { "epoch": 1.9845108695652174, "grad_norm": 1.3195034574165652, "learning_rate": 3.395457656618639e-09, "loss": 0.38909199833869934, "step": 7303 }, { "epoch": 1.9847826086956522, "grad_norm": 1.2410954247765043, "learning_rate": 3.2793886057991277e-09, "loss": 0.4275544583797455, "step": 7304 }, { "epoch": 1.985054347826087, "grad_norm": 1.4588933903823165, "learning_rate": 3.165337709679639e-09, "loss": 0.46730929613113403, "step": 7305 }, { "epoch": 1.9853260869565217, "grad_norm": 1.270057829770368, "learning_rate": 3.053304991282868e-09, "loss": 0.3961200714111328, "step": 7306 }, { "epoch": 1.9855978260869565, "grad_norm": 1.312030943237769, "learning_rate": 2.9432904732273894e-09, "loss": 0.42694956064224243, "step": 7307 }, { "epoch": 1.9858695652173912, "grad_norm": 1.1408611957620503, "learning_rate": 2.8352941777232135e-09, "loss": 0.3647082448005676, "step": 7308 }, { "epoch": 1.986141304347826, "grad_norm": 1.122834184138448, "learning_rate": 2.7293161265729007e-09, "loss": 0.34374088048934937, "step": 7309 }, { "epoch": 1.9864130434782608, "grad_norm": 1.7952079630208615, "learning_rate": 2.6253563411704485e-09, "loss": 0.46902593970298767, "step": 7310 }, { "epoch": 1.9866847826086955, "grad_norm": 1.2465435732546977, "learning_rate": 2.523414842503513e-09, "loss": 0.37934404611587524, "step": 7311 }, { "epoch": 1.9869565217391303, "grad_norm": 1.2755700340986935, "learning_rate": 2.423491651153409e-09, "loss": 0.36533212661743164, "step": 7312 }, { "epoch": 1.987228260869565, "grad_norm": 1.3374497300766788, "learning_rate": 2.3255867872928885e-09, "loss": 0.4075353741645813, "step": 7313 }, { "epoch": 1.9875, "grad_norm": 1.1978791796230543, "learning_rate": 2.2297002706850313e-09, "loss": 0.3765418529510498, "step": 7314 }, { "epoch": 1.9877717391304348, "grad_norm": 1.5497834800241876, "learning_rate": 2.1358321206899067e-09, "loss": 0.5168664455413818, "step": 7315 }, { "epoch": 1.9880434782608696, "grad_norm": 1.2879542347084454, "learning_rate": 2.0439823562568017e-09, "loss": 0.3998711407184601, "step": 7316 }, { "epoch": 1.9883152173913043, "grad_norm": 1.2562015924597947, "learning_rate": 1.9541509959275506e-09, "loss": 0.3808039426803589, "step": 7317 }, { "epoch": 1.9885869565217391, "grad_norm": 1.3179622325673017, "learning_rate": 1.866338057838757e-09, "loss": 0.46600472927093506, "step": 7318 }, { "epoch": 1.9888586956521739, "grad_norm": 1.1655648200445714, "learning_rate": 1.7805435597184617e-09, "loss": 0.3445856273174286, "step": 7319 }, { "epoch": 1.9891304347826086, "grad_norm": 1.2428680122969493, "learning_rate": 1.6967675188861443e-09, "loss": 0.35187697410583496, "step": 7320 }, { "epoch": 1.9894021739130436, "grad_norm": 1.3481445294183918, "learning_rate": 1.6150099522549423e-09, "loss": 0.37872979044914246, "step": 7321 }, { "epoch": 1.9896739130434784, "grad_norm": 1.2679813930934694, "learning_rate": 1.5352708763305412e-09, "loss": 0.3831833600997925, "step": 7322 }, { "epoch": 1.9899456521739132, "grad_norm": 1.3342201565493204, "learning_rate": 1.4575503072100649e-09, "loss": 0.40326011180877686, "step": 7323 }, { "epoch": 1.990217391304348, "grad_norm": 1.0488903901945772, "learning_rate": 1.3818482605842954e-09, "loss": 0.3242691457271576, "step": 7324 }, { "epoch": 1.9904891304347827, "grad_norm": 1.273106100501595, "learning_rate": 1.3081647517354522e-09, "loss": 0.42408281564712524, "step": 7325 }, { "epoch": 1.9907608695652175, "grad_norm": 1.1131251691647515, "learning_rate": 1.2364997955405245e-09, "loss": 0.3476192355155945, "step": 7326 }, { "epoch": 1.9910326086956522, "grad_norm": 1.4781531781149972, "learning_rate": 1.1668534064657179e-09, "loss": 0.4619982838630676, "step": 7327 }, { "epoch": 1.991304347826087, "grad_norm": 1.3273479054508752, "learning_rate": 1.0992255985731171e-09, "loss": 0.4066306948661804, "step": 7328 }, { "epoch": 1.9915760869565218, "grad_norm": 1.262703839488733, "learning_rate": 1.0336163855129143e-09, "loss": 0.4149545133113861, "step": 7329 }, { "epoch": 1.9918478260869565, "grad_norm": 1.1439637058263463, "learning_rate": 9.70025780532291e-10, "loss": 0.35139644145965576, "step": 7330 }, { "epoch": 1.9921195652173913, "grad_norm": 1.0682556769445408, "learning_rate": 9.084537964687556e-10, "loss": 0.3302580714225769, "step": 7331 }, { "epoch": 1.992391304347826, "grad_norm": 1.3588416982098594, "learning_rate": 8.489004457523653e-10, "loss": 0.43688929080963135, "step": 7332 }, { "epoch": 1.9926630434782608, "grad_norm": 1.0986083204699262, "learning_rate": 7.913657404068353e-10, "loss": 0.3264853358268738, "step": 7333 }, { "epoch": 1.9929347826086956, "grad_norm": 1.2933457873201828, "learning_rate": 7.358496920450986e-10, "loss": 0.4793882966041565, "step": 7334 }, { "epoch": 1.9932065217391304, "grad_norm": 1.2970211493032366, "learning_rate": 6.82352311877077e-10, "loss": 0.40712255239486694, "step": 7335 }, { "epoch": 1.9934782608695651, "grad_norm": 1.1671089393931295, "learning_rate": 6.308736107019098e-10, "loss": 0.3109266757965088, "step": 7336 }, { "epoch": 1.99375, "grad_norm": 1.1772317461814226, "learning_rate": 5.814135989112846e-10, "loss": 0.4038925766944885, "step": 7337 }, { "epoch": 1.9940217391304347, "grad_norm": 1.3396964099472362, "learning_rate": 5.339722864927677e-10, "loss": 0.43845900893211365, "step": 7338 }, { "epoch": 1.9942934782608694, "grad_norm": 1.2662607900717184, "learning_rate": 4.885496830220327e-10, "loss": 0.4352239668369293, "step": 7339 }, { "epoch": 1.9945652173913042, "grad_norm": 1.3068915758273403, "learning_rate": 4.451457976684115e-10, "loss": 0.49976566433906555, "step": 7340 }, { "epoch": 1.994836956521739, "grad_norm": 1.2298587424008296, "learning_rate": 4.037606391960047e-10, "loss": 0.4158872961997986, "step": 7341 }, { "epoch": 1.9951086956521737, "grad_norm": 1.4179048375996057, "learning_rate": 3.6439421595924065e-10, "loss": 0.48268377780914307, "step": 7342 }, { "epoch": 1.9953804347826087, "grad_norm": 1.0838017272638456, "learning_rate": 3.270465359062058e-10, "loss": 0.31055542826652527, "step": 7343 }, { "epoch": 1.9956521739130435, "grad_norm": 1.2462104873488387, "learning_rate": 2.9171760657420446e-10, "loss": 0.44288355112075806, "step": 7344 }, { "epoch": 1.9959239130434783, "grad_norm": 1.435131743509379, "learning_rate": 2.584074350986399e-10, "loss": 0.4193897247314453, "step": 7345 }, { "epoch": 1.996195652173913, "grad_norm": 0.8051937592670559, "learning_rate": 2.2711602820191248e-10, "loss": 0.18798944354057312, "step": 7346 }, { "epoch": 1.9964673913043478, "grad_norm": 1.1580288333694586, "learning_rate": 1.9784339220230153e-10, "loss": 0.3565661311149597, "step": 7347 }, { "epoch": 1.9967391304347826, "grad_norm": 1.239920500960355, "learning_rate": 1.7058953300841396e-10, "loss": 0.41519472002983093, "step": 7348 }, { "epoch": 1.9970108695652173, "grad_norm": 1.2834115439837164, "learning_rate": 1.453544561236253e-10, "loss": 0.4783967435359955, "step": 7349 }, { "epoch": 1.9972826086956523, "grad_norm": 1.3999294509687201, "learning_rate": 1.221381666416388e-10, "loss": 0.46060043573379517, "step": 7350 }, { "epoch": 1.997554347826087, "grad_norm": 1.392949763127715, "learning_rate": 1.0094066924981605e-10, "loss": 0.47506648302078247, "step": 7351 }, { "epoch": 1.9978260869565219, "grad_norm": 1.1770157280690732, "learning_rate": 8.176196822695659e-11, "loss": 0.36407673358917236, "step": 7352 }, { "epoch": 1.9980978260869566, "grad_norm": 1.4085888226277086, "learning_rate": 6.460206744551834e-11, "loss": 0.4331989586353302, "step": 7353 }, { "epoch": 1.9983695652173914, "grad_norm": 0.9266755230970457, "learning_rate": 4.946097036939712e-11, "loss": 0.2670395076274872, "step": 7354 }, { "epoch": 1.9986413043478262, "grad_norm": 1.1740762641573737, "learning_rate": 3.633868005614716e-11, "loss": 0.3756300210952759, "step": 7355 }, { "epoch": 1.998913043478261, "grad_norm": 1.0784808962467565, "learning_rate": 2.523519915365036e-11, "loss": 0.3273250460624695, "step": 7356 }, { "epoch": 1.9991847826086957, "grad_norm": 1.3025185811251778, "learning_rate": 1.615052990344701e-11, "loss": 0.431853711605072, "step": 7357 }, { "epoch": 1.9994565217391305, "grad_norm": 1.229825318925008, "learning_rate": 9.084674140735772e-12, "loss": 0.43327802419662476, "step": 7358 }, { "epoch": 1.9997282608695652, "grad_norm": 1.0539254464377523, "learning_rate": 4.037633291043008e-12, "loss": 0.3650134205818176, "step": 7359 }, { "epoch": 2.0, "grad_norm": 1.0240411674848098, "learning_rate": 1.0094083735534555e-12, "loss": 0.2778973877429962, "step": 7360 }, { "epoch": 2.0, "step": 7360, "total_flos": 4478140117352448.0, "train_loss": 0.46655594361541064, "train_runtime": 34704.4823, "train_samples_per_second": 1.697, "train_steps_per_second": 0.212 } ], "logging_steps": 1, "max_steps": 7360, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4478140117352448.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }